#!/usr/bin/env python3
import sys, json

PDF = "/tmp/practice-data-profile/blank.pdf"

def acroform_fields():
    out = []
    try:
        from pypdf import PdfReader
    except Exception:
        from PyPDF2 import PdfReader
    r = PdfReader(PDF)
    flds = r.get_fields()
    if not flds:
        return None
    # Build page-order index of widget annotations
    order = []
    for pno, page in enumerate(r.pages):
        annots = page.get("/Annots")
        if not annots:
            continue
        for a in annots:
            try:
                obj = a.get_object()
            except Exception:
                continue
            if obj.get("/Subtype") != "/Widget":
                continue
            # resolve field name (may be a kid -> parent)
            name = obj.get("/T")
            parent = obj.get("/Parent")
            if name is None and parent is not None:
                name = parent.get_object().get("/T")
            rect = obj.get("/Rect")
            order.append((pno, str(name), [float(x) for x in rect] if rect else None))
    seen = set()
    for pno, name, rect in order:
        if name in seen:
            continue
        seen.add(name)
        f = flds.get(name)
        if f is None:
            # try without hierarchy
            for k, v in flds.items():
                if k.split(".")[-1] == name:
                    f = v; name = k; break
        ft = (f.get("/FT") if f else None) or ""
        states = None
        if str(ft) == "/Btn":
            kids_states = set()
            # collect /AP /N states for checkbox/radio
            try:
                obj = None
                states = sorted(kids_states)
            except Exception:
                pass
        opts = f.get("/Opt") if f else None
        out.append({
            "page": pno + 1,
            "name": name,
            "ft": str(ft),
            "rect": rect,
            "options": [str(o) for o in opts] if opts else None,
            "value": str(f.get("/V")) if f and f.get("/V") is not None else None,
        })
    # also append any fields never seen via annots
    for k, v in flds.items():
        if k in seen:
            continue
        out.append({
            "page": None, "name": k, "ft": str(v.get("/FT") or ""),
            "rect": None,
            "options": [str(o) for o in v.get("/Opt")] if v.get("/Opt") else None,
            "value": str(v.get("/V")) if v.get("/V") is not None else None,
        })
    return out

def dump_layout():
    import pdfplumber
    pages = []
    with pdfplumber.open(PDF) as pdf:
        for pno, page in enumerate(pdf.pages):
            txt = page.extract_text(layout=True) or ""
            pages.append({"page": pno + 1, "w": page.width, "h": page.height, "text": txt})
    return pages

result = {"acroform": None, "layout": None}
fields = None
try:
    fields = acroform_fields()
except Exception as e:
    result["acroform_error"] = repr(e)
result["acroform"] = fields
try:
    result["layout"] = dump_layout()
except Exception as e:
    result["layout_error"] = repr(e)

with open("/tmp/practice-data-profile/extract.json", "w") as fh:
    json.dump(result, fh, indent=2)

print("ACROFORM_FIELDS:", 0 if not fields else len(fields))
if fields:
    for f in fields:
        print(f["page"], "|", f["ft"], "|", f["name"], "|opts=", f["options"])
print("LAYOUT_PAGES:", 0 if not result.get("layout") else len(result["layout"]))
