"""
KYC & Employee Onboarding GUI — v6 (Manual Crop) — **Stamp fix**

Fix: PyMuPDF stamp footer was calling `insert_text` with `x, y, text` (3 positional args)
     instead of `(x, y), text`. Updated to `page.insert_text((rect.x0 + 20, rect.y1 - 20), footer, ...)`.

This file is identical to the previous manual‑crop version except for that one line.
"""

import io, os, re, smtplib, ssl, tempfile, base64
from email.message import EmailMessage
from datetime import datetime

import fitz  # PyMuPDF
import pytesseract
import cv2
import numpy as np
import yaml
from PIL import Image, ImageTk
from reportlab.lib.pagesizes import A4
from reportlab.pdfgen import canvas
from reportlab.lib.units import mm
from reportlab.lib.utils import ImageReader
from PyPDF2 import PdfMerger
import tkinter as tk
from tkinter import ttk, filedialog, messagebox

# Gmail API
try:
    from google.oauth2.credentials import Credentials
    from google_auth_oauthlib.flow import InstalledAppFlow
    from googleapiclient.discovery import build
except Exception:  # libs may not be installed yet
    Credentials = None
    InstalledAppFlow = None
    build = None

# ---------------- Config (YAML) ---------------- #
CONFIG_FILE = "kyc_config.yaml"
DEFAULT_CONFIG = {
    "org_name": "Your Company Pvt Ltd",
    "logo_path": "",
    "policy_folder": "",
    "footer_org": "Your Company Pvt Ltd",
    "email": {
        "provider": "gmail_api",  # or "smtp"
        "smtp": {
            "server": "smtp.example.com",
            "port": 587,
            "username": "user@example.com",
            "password": "your_app_password",
            "use_starttls": True
        },
        "gmail_api": {
            "credentials_file": "credentials.json",
            "token_file": "token.json"
        }
    }
}

def load_config(path=CONFIG_FILE):
    import yaml
    if os.path.exists(path):
        try:
            with open(path, 'r', encoding='utf-8') as f:
                cfg = yaml.safe_load(f) or {}
        except Exception:
            cfg = {}
    else:
        cfg = {}
    def merge(a, b):
        for k, v in b.items():
            if isinstance(v, dict):
                a[k] = merge(a.get(k, {}), v)
            else:
                a.setdefault(k, v)
        return a
    cfg = merge(cfg, DEFAULT_CONFIG.copy())
    if not os.path.exists(path):
        try:
            with open(path, 'w', encoding='utf-8') as f:
                yaml.safe_dump(cfg, f, sort_keys=False)
        except Exception:
            pass
    return cfg

# ---------------- OCR helpers ---------------- #
def load_image_from_pdf_first_page(pdf_path, dpi=300):
    doc = fitz.open(pdf_path)
    if doc.page_count == 0:
        doc.close(); return None
    page = doc.load_page(0)
    mat = fitz.Matrix(dpi/72, dpi/72)
    pix = page.get_pixmap(matrix=mat, alpha=False)
    img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
    doc.close(); return img


def extract_text_from_image(pil_image):
    img = pil_image.convert("L")
    return pytesseract.image_to_string(img)

# ---------------- Regex & validation ---------------- #
PAN_REGEX = re.compile(r"\b([A-Z]{5}[0-9]{4}[A-Z])\b")
AADHAAR_REGEX = re.compile(r"\b(\d{4}\s\d{4}\s\d{4}|\d{12})\b")
DATE_REGEX = re.compile(r"\b(\d{2}[\-/]\d{2}[\-/]\d{4}|\d{4}[\-/]\d{2}[\-/]\d{2})\b")
PINCODE_REGEX = re.compile(r"\b\d{6}\b")
HEADER_STOPWORDS = re.compile(r"INCOME|TAX|DEPARTMENT|GOVERNMENT|REPUBLIC|INDIA|UNION|MINISTRY|AADHAAR|UNIQUE|IDENTITY|CARD|PERMANENT|ACCOUNT|NUMBER", re.IGNORECASE)
_name_token_re = re.compile(r"[^A-Z. ]+")

# helpers

def _norm(s: str) -> str:
    return s.upper().replace("’", "'").replace("‘", "'").replace("´", "'").replace("`", "'")

def _clean_person(s: str) -> str:
    s = _norm(s)
    s = _name_token_re.sub(" ", s)
    s = re.sub(r"\s+", " ", s).strip(" .")
    parts = [p for p in s.split(" ") if p]
    if 1 < len(parts) <= 4 and not HEADER_STOPWORDS.search(s):
        return " ".join(p.title() for p in parts)
    return ""

# ---------------- Address cleanup ---------------- #
LABEL_RE = re.compile(r"^(aadhaar|postal\s*address|address|adress|to|from)\b\s*:?\s*", re.IGNORECASE)
STOP_RE = re.compile(r"\b(DOB|Date|Year|Gender|Male|Female|Aadhaar|VID)\b", re.IGNORECASE)

def parse_address_from_text(text: str) -> str:
    raw_lines = [LABEL_RE.sub('', ln.strip(' :,-')) for ln in text.splitlines()]
    raw_lines = [ln for ln in raw_lines if ln]

    # Find label; start from next line
    start = None
    for i, ln in enumerate(raw_lines):
        if re.search(r"postal\s*address|adress|address", ln, re.IGNORECASE):
            start = i + 1
            break

    lines = raw_lines[start:] if start is not None else raw_lines

    clean = []
    for ln in lines[:10]:
        if STOP_RE.search(ln):
            continue
        ln2 = LABEL_RE.sub('', ln).strip(' ,:-')
        if not ln2:
            continue
        clean.append(ln2)
        if PINCODE_REGEX.search(ln2):
            break

    # PIN-anchored fallback
    if not clean:
        for i, ln in enumerate(lines):
            if PINCODE_REGEX.search(ln):
                window = [LABEL_RE.sub('', x).strip(' ,:-') for x in lines[max(0, i-4):i+1]]
                clean = [x for x in window if x and not STOP_RE.search(x)]
                break

    # Join & scrub small tokens
    address = ", ".join(clean)
    address = re.sub(r"\b(AADHAAR|Aadhaar|Postal\s*Address|Adress|Address)\b\s*:?\s*", '', address, flags=re.IGNORECASE)
    address = re.sub(r"\b([A-Za-z]{1,2})\b[, ]*", '', address)  # drop stray 1–2 letter OCR tokens
    address = re.sub(r"\s{2,}", ' ', address)
    address = address.replace("Ro ad", "Road").replace("Ap art", "Apart").strip(' ,')
    return address

# ---------------- Public validators ---------------- #

def validate_pan(pan: str) -> bool:
    return bool(PAN_REGEX.fullmatch(pan.strip().upper()))

def normalize_aadhaar(a: str) -> str:
    a = re.sub(r"\D", "", a)
    if len(a) == 12:
        return f"{a[0:4]} {a[4:8]} {a[8:12]}"
    return a

# ---------------- Parse PAN / Aadhaar ---------------- #

def parse_pan_text(text):
    result = {"pan_number": "", "name": "", "father_name": "", "dob": ""}
    m = PAN_REGEX.search(text)
    if m: result["pan_number"] = m.group(1)
    m = DATE_REGEX.search(text)
    if m: result["dob"] = m.group(1)

    T = _norm(text)
    lines = [ln.strip() for ln in T.splitlines() if ln.strip()]

    # Father label patterns
    for ln in lines:
        m = re.search(r"FATHER\s*'?S?\s*NAME\s*[:\-]?\s*([A-Z .]{3,})", ln)
        if m:
            cand = _clean_person(m.group(1))
            if cand: result["father_name"] = cand; break
    if not result["father_name"]:
        for i, ln in enumerate(lines):
            if re.fullmatch(r"FATHER\s*'?S?\s*NAME[:\-]?", ln):
                for j in range(i+1, min(i+6, len(lines))):
                    cand = _clean_person(lines[j])
                    if cand: result["father_name"] = cand; break
                if result["father_name"]: break
    if not result["father_name"]:
        for ln in lines:
            m = re.search(r"[SDW]\s*/\s*O\s*[:\-]?\s*([A-Z .]{3,})", ln)
            if m:
                cand = _clean_person(m.group(1))
                if cand: result["father_name"] = cand; break

    # Holder name (avoid headers)
    for ln in lines:
        m = re.search(r"^NAME\s*[:\-]?\s*([A-Z .]{3,})$", ln)
        if m:
            cand = _clean_person(m.group(1))
            if cand: result["name"] = cand; break
    if not result["name"]:
        plausible = []
        for ln in lines:
            cand = _clean_person(ln)
            if cand: plausible.append(cand.upper())
        plausible = [p for p in plausible if p and not HEADER_STOPWORDS.search(p)]
        if plausible: result["name"] = plausible[0].title()

    return result


def parse_aadhaar_text(text):
    result = {"aadhaar_number": "", "name": "", "dob": "", "gender": "", "address": ""}
    lines = [ln.strip() for ln in text.splitlines() if ln.strip()]

    m = AADHAAR_REGEX.search(text)
    if m: result["aadhaar_number"] = m.group(1).replace(" ", "")

    m = DATE_REGEX.search(text)
    if m: result["dob"] = m.group(1)
    else:
        yob = re.search(r"Year\s*of\s*Birth\s*:?\s*(\d{4})", text, re.IGNORECASE)
        if yob: result["dob"] = yob.group(1)

    if re.search(r"\bFEMALE\b", text, re.IGNORECASE): result["gender"] = "Female"
    elif re.search(r"\bMALE\b", text, re.IGNORECASE): result["gender"] = "Male"
    elif re.search(r"\bOTHER|TRANSGENDER\b", text, re.IGNORECASE): result["gender"] = "Other"

    caps = [ln for ln in lines if _clean_person(ln)]
    if caps: result["name"] = _clean_person(caps[0])

    return result

# ---------------- Photo helpers (auto) ---------------- #

def extract_images_from_pdf(pdf_path):
    images = []
    doc = fitz.open(pdf_path)
    for page_index in range(len(doc)):
        page = doc.load_page(page_index)
        for img in page.get_images(full=True):
            base_image = doc.extract_image(img[0])
            try:
                pil_img = Image.open(io.BytesIO(base_image["image"]))
                images.append(pil_img.convert("RGB"))
            except Exception:
                pass
    doc.close(); return images


def detect_face_bbox(pil_image):
    cascade_path = cv2.data.haarcascades + "haarcascade_frontalface_default.xml"
    face_cascade = cv2.CascadeClassifier(cascade_path)
    cv_img = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
    gray = cv2.cvtColor(cv_img, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(60, 60))
    if len(faces) == 0:
        return None
    x, y, w, h = max(faces, key=lambda f: f[2]*f[3])
    return (x, y, w, h)


def find_photo_rectangle(pil_image, face_bbox=None):
    img = np.array(pil_image)
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
    gray = cv2.GaussianBlur(gray, (5,5), 0)
    edges = cv2.Canny(gray, 60, 180)
    edges = cv2.dilate(edges, np.ones((3,3), np.uint8), iterations=1)
    contours, _ = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)

    H, W = gray.shape
    img_area = W * H
    face_center = None
    if face_bbox is not None:
        fx, fy, fw, fh = face_bbox
        face_center = (fx + fw/2, fy + fh/2)

    best = None
    best_score = 1e18  # we want the *smallest* rectangle that still matches criteria
    for cnt in contours:
        peri = cv2.arcLength(cnt, True)
        approx = cv2.approxPolyDP(cnt, 0.02*peri, True)
        if len(approx) != 4:
            continue
        x, y, w, h = cv2.boundingRect(approx)
        if w < 40 or h < 40:
            continue
        area = w*h
        if area < 0.004*img_area or area > 0.35*img_area:
            continue
        ar = h / float(w)
        if ar < 0.65 or ar > 1.05:
            continue
        cnt_area = cv2.contourArea(approx)
        if cnt_area / float(area) < 0.75:
            continue
        if face_center and not (x < face_center[0] < x+w and y < face_center[1] < y+h):
            continue
        if area < best_score:
            best_score = area
            best = (x, y, w, h)
    return best


def crop_to_photo_border(pil_image):
    face = detect_face_bbox(pil_image)
    rect = find_photo_rectangle(pil_image, face_bbox=face)
    if rect is not None:
        x, y, w, h = rect
        inset = 2
        x2 = max(0, x + inset); y2 = max(0, y + inset)
        x3 = min(pil_image.width, x + w - inset); y3 = min(pil_image.height, y + h - inset)
        return pil_image.crop((x2, y2, x3, y3))
    if face is not None:
        fx, fy, fw, fh = face
        mx = int(0.45 * fw); my = int(0.65 * fh)
        left   = max(0, fx - mx)
        upper  = max(0, fy - my)
        right  = min(pil_image.width, fx + fw + mx)
        lower  = min(pil_image.height, fy + fh + my)
        return pil_image.crop((left, upper, right, lower))
    return pil_image


def choose_best_aadhaar_photo(source):
    if isinstance(source, str) and source.lower().endswith('.pdf'):
        imgs = extract_images_from_pdf(source)
        best = None; best_area = 0
        for im in imgs:
            bbox = detect_face_bbox(im)
            if bbox is not None:
                area = im.size[0]*im.size[1]
                if area > best_area:
                    best = im; best_area = area
        if best is not None:
            return crop_to_photo_border(best)
        page_img = load_image_from_pdf_first_page(source)
        if page_img is None:
            return None
        return crop_to_photo_border(page_img)
    else:
        src = source if isinstance(source, Image.Image) else Image.open(source).convert('RGB')
        return crop_to_photo_border(src)

# ---------------- Policy stamping ---------------- #
POLICY_KEYWORDS = {
    "HR Policy": ["hr", "human resources"],
    "IS Policy": ["is", "information security"],
    "Social Media Policy": ["social media"],
    "NDA Policy": ["nda", "non disclosure", "non-disclosure"],
    "Reimbursement Policy": ["reimbursement", "expense"],
}

def list_pdfs(folder):
    return [os.path.join(folder, f) for f in sorted(os.listdir(folder)) if f.lower().endswith('.pdf')]

def classify_policy_title(path):
    base = os.path.splitext(os.path.basename(path))[0]
    low = base.lower().replace('_',' ')
    for disp, kws in POLICY_KEYWORDS.items():
        if any(kw in low for kw in kws):
            return disp
    return base


def stamp_pdf(src_path, dest_path, employee_name, policy_title, org_name, timestamp_str):
    doc = fitz.open(src_path)
    total = doc.page_count
    for i, page in enumerate(doc):
        rect = page.rect
        # Header
        page.insert_text((rect.x0 + 20, rect.y0 + 16), f"Policy: {policy_title}", fontsize=9, fontname="helv")
        page.insert_text((rect.x0 + 20, rect.y0 + 28), f"Accepted by: {employee_name} on {timestamp_str}", fontsize=9, fontname="helv")
        # Footer (FIXED: pass point tuple)
        footer = f"{org_name} — Page {i+1} of {total}"
        page.insert_text((rect.x0 + 20, rect.y1 - 20), footer, fontsize=9, fontname="helv")
    doc.save(dest_path); doc.close()


def merge_policies_with_stamp(output_path, selected_paths, employee_name, org_name):
    ts = datetime.now().strftime("%Y-%m-%d %H:%M")
    tmp_files = []
    try:
        for p in selected_paths:
            title = classify_policy_title(p)
            fd, tmp_path = tempfile.mkstemp(suffix="_stamped.pdf"); os.close(fd)
            stamp_pdf(p, tmp_path, employee_name, title, org_name, ts)
            tmp_files.append(tmp_path)
        merger = PdfMerger()
        for fp in tmp_files: merger.append(fp)
        with open(output_path, 'wb') as f: merger.write(f)
        merger.close()
    finally:
        for t in tmp_files:
            try: os.remove(t)
            except Exception: pass

# ---------------- Onboarding PDF ---------------- #

def draw_wrapped_text(c, text, x, y, max_width):
    from reportlab.pdfbase.pdfmetrics import stringWidth
    if not text: return y
    words = text.split(); line = ""; line_h = 12
    for w in words:
        test = (line + " " + w).strip()
        if stringWidth(test, "Helvetica", 10) <= max_width:
            line = test
        else:
            c.drawString(x, y, line); y -= line_h; line = w
    if line: c.drawString(x, y, line); y -= line_h
    return y


def create_onboarding_pdf(output_path, data, photo_pil=None, org_name="", logo_path="", footer_org=""):
    c = canvas.Canvas(output_path, pagesize=A4)
    width, height = A4

    # Branding
    c.setFont("Helvetica-Bold", 14)
    title = f"{org_name or 'Organization'} — Employee Onboarding Form"
    c.drawString(20*mm, height - 25*mm, title)

    # Logo (optional)
    if logo_path and os.path.exists(logo_path):
        try:
            c.drawImage(logo_path, 20*mm, height - 40*mm, width=25*mm, height=15*mm, preserveAspectRatio=True, mask='auto')
        except Exception:
            pass

    c.setFont("Helvetica", 10)
    y = height - 50*mm

    # Photo block
    if photo_pil:
        target_w, target_h = int(35*mm), int(45*mm)
        img = photo_pil.copy(); img.thumbnail((target_w, target_h))
        c.drawImage(ImageReader(img), width - 40*mm, height - 80*mm, width=35*mm, height=45*mm, preserveAspectRatio=True, mask='auto')
        c.rect(width - 40*mm, height - 80*mm, 35*mm, 45*mm)

    def field(label, value):
        nonlocal y
        c.setFont("Helvetica-Bold", 10); c.drawString(20*mm, y, f"{label}:")
        c.setFont("Helvetica", 10); c.drawString(60*mm, y, value or ""); y -= 12

    field("Full Name", data.get("name", ""))
    field("Father's Name", data.get("father_name", ""))
    field("Date of Birth", data.get("dob", ""))
    field("Gender", data.get("gender", ""))
    field("PAN", data.get("pan_number", ""))
    field("Aadhaar", normalize_aadhaar(data.get("aadhaar_number", "")))

    c.setFont("Helvetica-Bold", 10); c.drawString(20*mm, y, "Address:"); y -= 12
    c.setFont("Helvetica", 10); y = draw_wrapped_text(c, data.get("address", ""), 20*mm, y, max_width=160*mm)

    # Footer
    c.setFont("Helvetica", 9)
    footer = footer_org or org_name or "Organization"
    c.drawString(20*mm, 10*mm, f"{footer} — Page 1 of 1")

    c.showPage(); c.save()

# ---------------- Email ---------------- #
SCOPES = ["https://www.googleapis.com/auth/gmail.send"]

def get_gmail_service(credentials_file: str, token_file: str):
    if Credentials is None or InstalledAppFlow is None or build is None:
        raise RuntimeError("Google API libraries not installed. Run: pip install google-api-python-client google-auth-httplib2 google-auth-oauthlib")
    creds = None
    if os.path.exists(token_file):
        creds = Credentials.from_authorized_user_file(token_file, SCOPES)
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            try:
                from google.auth.transport.requests import Request
                creds.refresh(Request())
            except Exception:
                creds = None
        if not creds:
            flow = InstalledAppFlow.from_client_secrets_file(credentials_file, SCOPES)
            creds = flow.run_local_server(port=0)
        with open(token_file, 'w', encoding='utf-8') as f:
            f.write(creds.to_json())
    return build('gmail', 'v1', credentials=creds)


def send_email_gmail_api(cfg: dict, to_addrs, subject: str, body: str, filepath: str):
    if isinstance(to_addrs, str):
        to_addrs = [a.strip() for a in to_addrs.split(',') if a.strip()]
    message = EmailMessage()
    message['To'] = ', '.join(to_addrs)
    message['Subject'] = subject
    message.set_content(body)
    with open(filepath, 'rb') as f:
        data = f.read()
    message.add_attachment(data, maintype='application', subtype='pdf', filename=os.path.basename(filepath))
    raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
    service = get_gmail_service(cfg.get('credentials_file','credentials.json'), cfg.get('token_file','token.json'))
    service.users().messages().send(userId='me', body={'raw': raw}).execute()


def send_email_smtp(cfg: dict, to_addrs, subject: str, body: str, filepath: str):
    if not os.path.exists(filepath):
        raise FileNotFoundError(filepath)
    if isinstance(to_addrs, str):
        to_addrs = [a.strip() for a in to_addrs.split(',') if a.strip()]
    msg = EmailMessage()
    msg['From'] = cfg.get('username')
    msg['To'] = ", ".join(to_addrs)
    msg['Subject'] = subject
    msg.set_content(body)
    with open(filepath, 'rb') as f:
        data = f.read()
    msg.add_attachment(data, maintype='application', subtype='pdf', filename=os.path.basename(filepath))

    context = ssl.create_default_context()
    if cfg.get('use_starttls', True):
        with smtplib.SMTP(cfg['server'], cfg.get('port', 587)) as s:
            s.starttls(context=context)
            if cfg.get('username'):
                s.login(cfg['username'], cfg.get('password',''))
            s.send_message(msg)
    else:
        with smtplib.SMTP_SSL(cfg['server'], cfg.get('port', 465), context=context) as s:
            if cfg.get('username'):
                s.login(cfg['username'], cfg.get('password',''))
            s.send_message(msg)


def send_email_via_config(config: dict, to_addrs, subject: str, body: str, filepath: str):
    provider = (config.get('email', {}).get('provider') or 'gmail_api').lower()
    if provider == 'smtp':
        return send_email_smtp(config.get('email', {}).get('smtp', {}), to_addrs, subject, body, filepath)
    else:
        return send_email_gmail_api(config.get('email', {}).get('gmail_api', {}), to_addrs, subject, body, filepath)

# ---------------- GUI ---------------- #
class App:
    def __init__(self, root):
        self.root = root
        self.cfg = load_config()

        self.root.title("KYC & Onboarding Assistant")
        self.root.geometry("1120x760")

        # paths
        self.pan_path = tk.StringVar()
        self.aadhaar_front_path = tk.StringVar()
        self.aadhaar_back_path = tk.StringVar()
        self.policy_folder = tk.StringVar(value=self.cfg.get('policy_folder',''))

        # fields
        self.name_var = tk.StringVar()
        self.father_var = tk.StringVar()
        self.dob_var = tk.StringVar()
        self.gender_var = tk.StringVar(value="Male")
        self.pan_var = tk.StringVar()
        self.aadhaar_var = tk.StringVar()
        self.address_text = None

        self.photo_pil = None
        self.last_policy_pdf = None

        self.build()

    # ---------- UI ---------- #
    def build(self):
        top = ttk.LabelFrame(self.root, text="Files")
        top.pack(fill="x", padx=10, pady=10)

        ttk.Label(top, text="PAN (image/PDF):").grid(row=0,column=0,sticky='w')
        ttk.Entry(top, textvariable=self.pan_path, width=85).grid(row=0,column=1,padx=5)
        ttk.Button(top, text="Browse", command=self.browse_pan).grid(row=0,column=2)

        ttk.Label(top, text="Aadhaar FRONT:").grid(row=1,column=0,sticky='w')
        ttk.Entry(top, textvariable=self.aadhaar_front_path, width=85).grid(row=1,column=1,padx=5)
        ttk.Button(top, text="Browse", command=self.browse_aadhaar_front).grid(row=1,column=2)

        ttk.Label(top, text="Aadhaar BACK:").grid(row=2,column=0,sticky='w')
        ttk.Entry(top, textvariable=self.aadhaar_back_path, width=85).grid(row=2,column=1,padx=5)
        ttk.Button(top, text="Browse", command=self.browse_aadhaar_back).grid(row=2,column=2)

        ttk.Label(top, text="Policy folder:").grid(row=3,column=0,sticky='w')
        ttk.Entry(top, textvariable=self.policy_folder, width=85).grid(row=3,column=1,padx=5)
        ttk.Button(top, text="Choose", command=self.choose_policy_folder).grid(row=3,column=2)

        ttk.Button(top, text="Extract & Populate", command=self.extract).grid(row=4,column=1, sticky='w', pady=5)
        ttk.Button(top, text="Load Photo Manually", command=self.load_photo_manually).grid(row=4,column=2, sticky='w')
        ttk.Button(top, text="Crop Photo Manually", command=self.manual_crop_dialog).grid(row=4,column=3, sticky='w')

        form = ttk.LabelFrame(self.root, text="KYC Details (Review & Edit)")
        form.pack(fill="both", expand=True, padx=10, pady=10)

        ttk.Label(form, text="Full Name:").grid(row=0,column=0,sticky='e',padx=5,pady=4)
        ttk.Entry(form, textvariable=self.name_var, width=45).grid(row=0,column=1,sticky='w')
        ttk.Label(form, text="Father's Name:").grid(row=1,column=0,sticky='e',padx=5,pady=4)
        ttk.Entry(form, textvariable=self.father_var, width=45).grid(row=1,column=1,sticky='w')
        ttk.Label(form, text="Date of Birth:").grid(row=2,column=0,sticky='e',padx=5,pady=4)
        ttk.Entry(form, textvariable=self.dob_var, width=45).grid(row=2,column=1,sticky='w')
        ttk.Label(form, text="Gender:").grid(row=3,column=0,sticky='e',padx=5,pady=4)
        ttk.Combobox(form, textvariable=self.gender_var, values=["Male","Female","Other"], state="readonly", width=30).grid(row=3,column=1,sticky='w')
        ttk.Label(form, text="PAN:").grid(row=4,column=0,sticky='e',padx=5,pady=4)
        ttk.Entry(form, textvariable=self.pan_var, width=45).grid(row=4,column=1,sticky='w')
        ttk.Label(form, text="Aadhaar:").grid(row=5,column=0,sticky='e',padx=5,pady=4)
        ttk.Entry(form, textvariable=self.aadhaar_var, width=45).grid(row=5,column=1,sticky='w')
        ttk.Label(form, text="Address:").grid(row=6,column=0,sticky='ne',padx=5,pady=4)
        self.address_text = tk.Text(form, width=45, height=6)
        self.address_text.grid(row=6,column=1,sticky='w')

        photo_fr = ttk.LabelFrame(form, text="Photo (auto from Aadhaar if possible)")
        photo_fr.grid(row=0, column=2, rowspan=7, padx=10, pady=10)
        self.photo_label = ttk.Label(photo_fr, text="No photo")
        self.photo_label.pack(padx=10, pady=10)

        bottom = ttk.Frame(self.root); bottom.pack(fill='x', padx=10, pady=10)
        ttk.Button(bottom, text="Save Onboarding PDF", command=self.save_pdf).pack(side='left', padx=5)
        ttk.Button(bottom, text="Select & Combine Policies", command=self.select_policies_dialog).pack(side='left', padx=5)
        ttk.Button(bottom, text="Email Stamped Policies", command=self.email_policies_dialog).pack(side='left', padx=5)

    # ---------- File pickers ---------- #
    def _pick_file(self, title):
        return filedialog.askopenfilename(title=title, filetypes=[("All supported","*.png;*.jpg;*.jpeg;*.pdf"),("Images","*.png;*.jpg;*.jpeg"),("PDF","*.pdf")])
    def browse_pan(self):
        p = self._pick_file("Select PAN image/PDF");
        if p: self.pan_path.set(p)
    def browse_aadhaar_front(self):
        p = self._pick_file("Select Aadhaar FRONT");
        if p: self.aadhaar_front_path.set(p)
    def browse_aadhaar_back(self):
        p = self._pick_file("Select Aadhaar BACK");
        if p: self.aadhaar_back_path.set(p)
    def choose_policy_folder(self):
        folder = filedialog.askdirectory(title="Choose policy folder")
        if folder: self.policy_folder.set(folder)

    # ---------- Extraction ---------- #
    def extract(self):
        # PAN
        pan_path = self.pan_path.get().strip()
        if pan_path:
            try:
                pan_img = load_image_from_pdf_first_page(pan_path) if pan_path.lower().endswith('.pdf') else Image.open(pan_path)
                text = extract_text_from_image(pan_img) if pan_img is not None else ''
                pd = parse_pan_text(text)
                if pd.get('pan_number'): self.pan_var.set(pd['pan_number'])
                if pd.get('father_name'): self.father_var.set(pd['father_name'])
                if pd.get('dob') and not self.dob_var.get(): self.dob_var.set(pd['dob'])
                if pd.get('name') and not self.name_var.get(): self.name_var.set(pd['name'])
            except Exception as e:
                messagebox.showwarning("PAN OCR", f"Failed to OCR PAN file: {e}")

        # Aadhaar front
        af = self.aadhaar_front_path.get().strip()
        if af:
            try:
                if af.lower().endswith('.pdf'):
                    photo = choose_best_aadhaar_photo(af)
                    if photo is not None: self.set_photo(photo)
                    page_img = load_image_from_pdf_first_page(af)
                    a_text = extract_text_from_image(page_img) if page_img is not None else ''
                else:
                    img = Image.open(af).convert('RGB')
                    photo = choose_best_aadhaar_photo(img)
                    if photo is not None: self.set_photo(photo)
                    a_text = extract_text_from_image(img)
                ad = parse_aadhaar_text(a_text)
                if ad.get('aadhaar_number'): self.aadhaar_var.set(ad['aadhaar_number'])
                if ad.get('dob') and not self.dob_var.get(): self.dob_var.set(ad['dob'])
                if ad.get('gender'): self.gender_var.set(ad['gender'])
                if ad.get('name'): self.name_var.set(ad['name'])
            except Exception as e:
                messagebox.showwarning("Aadhaar FRONT OCR", f"Failed to OCR Aadhaar front: {e}")

        # Aadhaar back for address
        ab = self.aadhaar_back_path.get().strip()
        if ab:
            try:
                img = load_image_from_pdf_first_page(ab) if ab.lower().endswith('.pdf') else Image.open(ab)
                text = extract_text_from_image(img) if img is not None else ''
                addr = parse_address_from_text(text)
                if addr:
                    self.address_text.delete('1.0','end'); self.address_text.insert('1.0', addr)
            except Exception as e:
                messagebox.showwarning("Aadhaar BACK OCR", f"Failed to OCR Aadhaar back: {e}")

        # Title-case names
        if self.name_var.get(): self.name_var.set(_clean_person(self.name_var.get()))
        if self.father_var.get(): self.father_var.set(_clean_person(self.father_var.get()))

        messagebox.showinfo("Done", "Extraction complete. Review & edit details. You can also use 'Crop Photo Manually'.")

    # ---------- Photo helpers ---------- #
    def set_photo(self, pil_image):
        self.photo_pil = pil_image
        preview = pil_image.copy(); preview.thumbnail((240, 280))
        img_tk = ImageTk.PhotoImage(preview)
        self.photo_label.configure(image=img_tk)
        self.photo_label.image = img_tk

    def load_photo_manually(self):
        path = filedialog.askopenfilename(title="Select Photo (JPEG/PNG)", filetypes=[("Images","*.png;*.jpg;*.jpeg")])
        if not path: return
        try:
            img = Image.open(path).convert('RGB'); self.set_photo(img)
        except Exception as e:
            messagebox.showerror("Photo", f"Failed to load photo: {e}")

    # ---------- Manual Crop Dialog ---------- #
    def manual_crop_dialog(self):
        if self.photo_pil is None:
            messagebox.showinfo("Crop", "No photo available. Load or extract a photo first.")
            return

        dlg = tk.Toplevel(self.root)
        dlg.title("Manual Photo Crop — drag to select, then Apply")
        max_w, max_h = 760, 560
        src = self.photo_pil
        scale = min(max_w / src.width, max_h / src.height, 1.0)
        disp_w, disp_h = int(src.width * scale), int(src.height * scale)
        disp = src.resize((disp_w, disp_h), Image.LANCZOS)
        disp_tk = ImageTk.PhotoImage(disp)

        canvas = tk.Canvas(dlg, width=disp_w, height=disp_h, cursor="cross")
        canvas.grid(row=0, column=0, columnspan=4, padx=8, pady=8)
        canvas.create_image(0, 0, anchor='nw', image=disp_tk)

        lock_ratio = tk.BooleanVar(value=True)
        ttk.Checkbutton(dlg, text="Lock Aadhaar ratio (45×35)", variable=lock_ratio).grid(row=1, column=0, sticky='w', padx=8)

        rect_id = None
        start = [0, 0]
        cur = [0, 0]
        target_ratio = 45/35  # h/w

        def clamp(v, lo, hi):
            return max(lo, min(hi, v))

        def on_press(e):
            nonlocal rect_id
            start[0], start[1] = clamp(e.x,0,disp_w-1), clamp(e.y,0,disp_h-1)
            cur[0], cur[1] = start[0], start[1]
            if rect_id is None:
                rect_id = canvas.create_rectangle(start[0], start[1], cur[0], cur[1], outline='red', width=2)
            else:
                canvas.coords(rect_id, start[0], start[1], cur[0], cur[1])

        def on_drag(e):
            x = clamp(e.x, 0, disp_w-1)
            y = clamp(e.y, 0, disp_h-1)
            if lock_ratio.get():
                w = abs(x - start[0]); h = int(w * target_ratio)
                if y < start[1]:
                    y = start[1] - h
                else:
                    y = start[1] + h
                y = clamp(y, 0, disp_h-1)
            canvas.coords(rect_id, start[0], start[1], x, y)
            cur[0], cur[1] = x, y

        def on_release(e):
            on_drag(e)

        canvas.bind('<ButtonPress-1>', on_press)
        canvas.bind('<B1-Motion>', on_drag)
        canvas.bind('<ButtonRelease-1>', on_release)

        def apply_crop():
            if rect_id is None:
                messagebox.showinfo("Crop", "Please drag to select an area to crop.")
                return
            x1, y1, x2, y2 = canvas.coords(rect_id)
            x1, y1, x2, y2 = int(min(x1,x2)), int(min(y1,y2)), int(max(x1,x2)), int(max(y1,y2))
            if x2 - x1 < 10 or y2 - y1 < 10:
                messagebox.showinfo("Crop", "Selection too small.")
                return
            ox1, oy1 = int(x1/scale), int(y1/scale)
            ox2, oy2 = int(x2/scale), int(y2/scale)
            ox1, oy1 = clamp(ox1, 0, src.width-1), clamp(oy1, 0, src.height-1)
            ox2, oy2 = clamp(ox2, 0, src.width), clamp(oy2, 0, src.height)
            cropped = src.crop((ox1, oy1, ox2, oy2))
            self.set_photo(cropped)
            dlg.destroy()

        def reset_crop():
            nonlocal rect_id
            if rect_id is not None:
                canvas.delete(rect_id); rect_id = None

        ttk.Button(dlg, text="Reset", command=reset_crop).grid(row=1, column=2, sticky='e', padx=8, pady=8)
        ttk.Button(dlg, text="Apply", command=apply_crop).grid(row=1, column=3, sticky='e', padx=8, pady=8)
        dlg.transient(self.root)
        dlg.grab_set()
        dlg.wait_window()

    # ---------- Save onboarding PDF ---------- #
    def save_pdf(self):
        pan = self.pan_var.get().strip().upper()
        aad = self.aadhaar_var.get().strip()
        errs = []
        if pan and not validate_pan(pan): errs.append("PAN format looks invalid.")
        if aad and len(re.sub(r"\D", "", aad)) != 12: errs.append("Aadhaar format looks invalid.")
        if errs:
            if not messagebox.askyesno("Validate", "\n".join(errs) + "\n\nProceed anyway?"):
                return

        out = filedialog.asksaveasfilename(defaultextension=".pdf", filetypes=[("PDF","*.pdf")])
        if not out: return
        data = {
            "name": self.name_var.get().strip(),
            "father_name": self.father_var.get().strip(),
            "dob": self.dob_var.get().strip(),
            "gender": self.gender_var.get().strip(),
            "pan_number": pan,
            "aadhaar_number": aad,
            "address": self.address_text.get('1.0','end').strip(),
        }
        try:
            create_onboarding_pdf(out, data, photo_pil=self.photo_pil, org_name=self.cfg.get('org_name',''), logo_path=self.cfg.get('logo_path',''), footer_org=self.cfg.get('footer_org',''))
            messagebox.showinfo("Saved", f"Onboarding PDF saved to\n{out}")
        except Exception as e:
            messagebox.showerror("Save PDF", f"Failed to save PDF: {e}")

    # ---------- Policy selection / stamping ---------- #
    def select_policies_dialog(self):
        folder = self.policy_folder.get().strip() or self.cfg.get('policy_folder','')
        if not folder or not os.path.isdir(folder):
            messagebox.showwarning("Policies", "Please choose a valid policy folder first.")
            return
        pdfs = list_pdfs(folder)
        if not pdfs:
            messagebox.showwarning("Policies", "No PDFs found in selected folder.")
            return
        dlg = tk.Toplevel(self.root); dlg.title("Select Policies to Combine & Save (stamped)"); dlg.geometry("560x420")
        canvas_frm = tk.Canvas(dlg); scroll = ttk.Scrollbar(dlg, orient='vertical', command=canvas_frm.yview)
        list_frm = ttk.Frame(canvas_frm); list_frm.bind('<Configure>', lambda e: canvas_frm.configure(scrollregion=canvas_frm.bbox('all')))
        canvas_frm.create_window((0,0), window=list_frm, anchor='nw'); canvas_frm.configure(yscrollcommand=scroll.set)
        canvas_frm.grid(row=0,column=0,sticky='nsew'); scroll.grid(row=0,column=1,sticky='ns')
        dlg.grid_rowconfigure(0, weight=1); dlg.grid_columnconfigure(0, weight=1)

        vars_map = {}
        def classify(p):
            disp = classify_policy_title(p)
            return (0 if disp in POLICY_KEYWORDS else 1, disp)
        pdfs_sorted = sorted(pdfs, key=lambda p: classify(p))
        for i, p in enumerate(pdfs_sorted):
            disp = classify_policy_title(p)
            v = tk.BooleanVar(value=(disp in POLICY_KEYWORDS))
            vars_map[p] = (v, disp)
            ttk.Checkbutton(list_frm, text=f"{disp} — {os.path.basename(p)}", variable=v).grid(row=i, column=0, sticky='w', padx=8, pady=4)

        def on_combine():
            selected = [p for p,(v,_) in vars_map.items() if v.get()]
            if not selected:
                messagebox.showwarning("Policies", "Please select at least one PDF.")
                return
            out = filedialog.asksaveasfilename(title="Save Combined Policies PDF", defaultextension=".pdf", filetypes=[("PDF","*.pdf")])
            if not out: return
            try:
                emp_name = self.name_var.get().strip() or "Employee"
                merge_policies_with_stamp(out, selected, emp_name, self.cfg.get('org_name',''))
                self.last_policy_pdf = out
                messagebox.showinfo("Policies", f"Combined, stamped policies saved to\n{out}")
                dlg.destroy()
            except Exception as e:
                messagebox.showerror("Policies", f"Failed to merge PDFs: {e}")

        ttk.Button(dlg, text="Combine, Stamp & Save", command=on_combine).grid(row=1, column=0, pady=10, padx=8, sticky='w')

    # ---------- Email stamped policies ---------- #
    def email_policies_dialog(self):
        if not self.last_policy_pdf or not os.path.exists(self.last_policy_pdf):
            messagebox.showwarning("Email", "Please create the stamped policies PDF first.")
            return
        dlg = tk.Toplevel(self.root); dlg.title("Email Stamped Policies"); dlg.geometry("520x220")
        to_var = tk.StringVar(); subj_var = tk.StringVar(value=f"Policies — {self.name_var.get().strip() or 'Employee'}")
        body_txt = tk.Text(dlg, width=60, height=6)
        ttk.Label(dlg, text="To (comma separated):").grid(row=0, column=0, sticky='e', padx=5, pady=4)
        ttk.Entry(dlg, textvariable=to_var, width=45).grid(row=0, column=1, sticky='w')
        ttk.Label(dlg, text="Subject:").grid(row=1, column=0, sticky='e', padx=5, pady=4)
        ttk.Entry(dlg, textvariable=subj_var, width=45).grid(row=1, column=1, sticky='w')
        ttk.Label(dlg, text="Message:").grid(row=2, column=0, sticky='ne', padx=5, pady=4)
        body_txt.grid(row=2, column=1, sticky='w')

        def on_send():
            try:
                to = to_var.get().strip()
                if not to:
                    messagebox.showwarning("Email", "Please enter at least one recipient."); return
                body = body_txt.get('1.0','end').strip() or f"Please find attached the stamped policies for {self.name_var.get().strip() or 'employee'}."
                send_email_via_config(self.cfg, to, subj_var.get().strip() or 'Policies', body, self.last_policy_pdf)
                messagebox.showinfo("Email", "Email sent successfully."); dlg.destroy()
            except Exception as e:
                messagebox.showerror("Email", f"Failed to send email: {e}")

        ttk.Button(dlg, text="Send Email", command=on_send).grid(row=3, column=1, pady=10, sticky='e')

if __name__=="__main__":
    root = tk.Tk()
    try:
        style = ttk.Style(); style.theme_use("clam")
    except Exception:
        pass
    App(root)
    root.mainloop()
