import json, glob, os, pickle
from datetime import date

TODAY = date(2026,6,16)
KEPT = {"3","5","8","9"}
# A follow-up = any later visit that is scheduled OR kept (not cancelled/no-show/bumped)
SCHED_OR_KEPT = {"0","1","2","3","5","7","8","9"}

def iso(ds): y,m,d=ds.split("-"); return date(int(y),int(m),int(d))

# 1) Load full visit universe from day-files (4/1 onward, incl future to 8/15)
#    Build: per-pid list of (date, status)
visits_by_pid = {}
day_dates = []
for fp in sorted(glob.glob("/tmp/days/*.json")):
    ds = os.path.basename(fp)[:-5]
    day_dates.append(ds)
    try:
        rows = json.load(open(fp))
    except Exception:
        rows = []
    for v in rows:
        pid = v.get("patient_id")
        if not pid: continue
        visits_by_pid.setdefault(pid, []).append((ds, v.get("apptstatus"), v.get("columnheading",""), v.get("patient_name","")))

print("day-files loaded:", len(day_dates), "range", day_dates[0], "..", day_dates[-1])
print("distinct pids in universe:", len(visits_by_pid))

# 2) Compute last kept visit per pid within 4/1..6/16
kept = {}  # pid -> {date, provider, name}
for pid, vs in visits_by_pid.items():
    for ds, st, prov, name in vs:
        if st in KEPT and "2026-04-01" <= ds <= "2026-06-16":
            rec = kept.get(pid)
            if rec is None or ds > rec["date"]:
                kept[pid] = {"date": ds, "provider": prov, "name": name}
print("kept-in-window pids:", len(kept))

# 3) Exclude pids that have a follow-up (sched-or-kept) dated AFTER their last kept date
rows_out = []
for pid, info in kept.items():
    last = info["date"]
    has_followup = False
    for ds, st, prov, name in visits_by_pid.get(pid, []):
        if ds > last and st in SCHED_OR_KEPT:
            has_followup = True
            break
    if has_followup:
        continue
    rows_out.append({
        "pid": pid,
        "name": info["name"],
        "lastdate": last,
        "provider": info["provider"],
        "days": (TODAY - iso(last)).days,
    })

print("Active No-Followup pids:", len(rows_out))

# 4) Build contact map. PRIMARY: get_updated_patients(2026-04-01). FALLBACK: 6/15 report.
bypid = pickle.load(open("/tmp/existing_bypid.pkl","rb"))

# Primary contact map from get_updated_patients
upmap = {}
try:
    up = json.load(open("/tmp/updated_patients.json"))
    # discover field names from first row
    if up:
        sample_keys = set(up[0].keys())
        print("updated_patients keys:", sorted(sample_keys))
    def pick(rec, *names):
        for n in names:
            v = rec.get(n)
            if v: return str(v).strip()
        return ""
    for rec in up:
        pid = str(rec.get("id") or rec.get("patient_id") or rec.get("patientid") or "")
        if not pid: continue
        # phone priority cell > home > work
        phone = pick(rec, "cellphone","cell","mobile","mobilephone","cellphonenumber",
                          "homephone","home","homephonenumber",
                          "workphone","work","workphonenumber","phone")
        email = pick(rec, "email","emailaddress","email1","patientemail")
        name = pick(rec, "name","patient_name","fullname")
        upmap[pid] = {"phone":phone, "email":email, "name":name}
except FileNotFoundError:
    print("WARN: updated_patients.json missing; using 6/15 report only")

phone_n = email_n = 0
for r in rows_out:
    pid = r["pid"]
    phone = ""; email = ""; name = r["name"]
    u = upmap.get(pid)
    if u:
        phone = u["phone"]; email = u["email"]; name = u["name"] or name
    # fallback to 6/15 report for any missing field
    er = bypid.get(pid)
    if er:
        e0 = er[0]
        if not phone: phone = e0.get("phone","") or ""
        if not email: email = e0.get("email","") or ""
        if not name: name = e0.get("name") or name
    r["phone"] = phone
    r["email"] = email
    r["name"] = name
    if phone: phone_n += 1
    if email: email_n += 1

rows_out.sort(key=lambda r: r["days"], reverse=True)
json.dump(rows_out, open("/tmp/sheet2_rows_final.json","w"))
print(f"FINAL Sheet2 rows: {len(rows_out)} | phone {phone_n}/{len(rows_out)} | email {email_n}/{len(rows_out)}")
