"""Phase 7 evaluation runner — connects to server via paramiko and runs evaluations."""
import paramiko
import warnings
import time
import sys
import json

warnings.filterwarnings("ignore", category=DeprecationWarning)

HOST = "10.82.3.180"
PORT = 20083
USER = "root"
PASS = "m2dGcwyrhI"
PROJ = "/root/siton-data-2849d4ce327c4ccfb233ce33868fe7fe/zsy/CompanionGuard-RL"


def ssh_run(client, cmd, timeout=600, print_live=False):
    """Run a command and return (stdout, stderr, exit_code)."""
    transport = client.get_transport()
    chan = transport.open_session()
    chan.exec_command(cmd)

    out_parts = []
    err_parts = []
    while True:
        if chan.recv_ready():
            chunk = chan.recv(4096).decode("utf-8", errors="replace")
            out_parts.append(chunk)
            if print_live:
                print(chunk, end="", flush=True)
        if chan.recv_stderr_ready():
            chunk = chan.recv_stderr(4096).decode("utf-8", errors="replace")
            err_parts.append(chunk)
        if chan.exit_status_ready():
            # drain remaining
            while chan.recv_ready():
                chunk = chan.recv(4096).decode("utf-8", errors="replace")
                out_parts.append(chunk)
                if print_live:
                    print(chunk, end="", flush=True)
            while chan.recv_stderr_ready():
                err_parts.append(chan.recv_stderr(4096).decode("utf-8", errors="replace"))
            break
        time.sleep(0.1)

    exit_code = chan.recv_exit_status()
    return "".join(out_parts), "".join(err_parts), exit_code


def connect():
    client = paramiko.SSHClient()
    client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
    client.connect(HOST, port=PORT, username=USER, password=PASS, timeout=30)
    return client


def main():
    print("=" * 60)
    print("Phase 7: CompanionGuard-RL Evaluation")
    print("=" * 60)

    client = connect()
    print("SSH connection established.")

    # ── Phase 7-C: check source field distribution ──────────────
    print("\n--- Phase 7-C: source field check ---")
    check_script = r"""python3 << 'PYEOF'
import json
from collections import Counter
path = 'data/processed/CompanionRisk-Bench/test.jsonl'
samples = [json.loads(l) for l in open(path) if l.strip()]
src_counter = Counter(s.get('source', '(no source field)') for s in samples)
print("source field distribution:")
for k, v in sorted(src_counter.items(), key=lambda x: -x[1]):
    print(f"  {k}: {v}")
id_pfx = Counter(s.get('id','?')[:12] for s in samples if not s.get('source'))
if id_pfx:
    print("id prefix distribution (for samples without source field):")
    for k, v in sorted(id_pfx.items(), key=lambda x: -x[1])[:15]:
        print(f"  {k}: {v}")
risky = sum(int(s.get('y_risk', 0)) for s in samples)
print(f"Total: {len(samples)}, Risky: {risky}, Safe: {len(samples)-risky}")
PYEOF"""

    out, err, code = ssh_run(client, f"cd {PROJ} && {check_script}", timeout=60)
    print(out)
    if err.strip():
        print("STDERR:", err[:500])

    # ── Phase 7-A: full test set ─────────────────────────────────
    print("\n--- Phase 7-A: running eval --source-filter all ---")
    cmd_all = (
        f"cd {PROJ} && "
        f"python3 scripts/evaluate.py "
        f"--detector-ckpt checkpoints/detector/best.pt "
        f"--config configs/detector_config_server.yaml "
        f"--test-data data/processed/CompanionRisk-Bench/test.jsonl "
        f"--source-filter all "
        f"--output experiments/eval_all.json "
        f"2>&1"
    )
    print("Command:", cmd_all[:120], "...")
    out_all, err_all, code_all = ssh_run(client, cmd_all, timeout=600, print_live=True)
    print(f"\n[exit code: {code_all}]")
    if code_all != 0 and err_all.strip():
        print("STDERR:", err_all[-1000:])

    # ── Phase 7-B: human-annotated subset ───────────────────────
    print("\n--- Phase 7-B: running eval --source-filter human ---")
    cmd_human = (
        f"cd {PROJ} && "
        f"python3 scripts/evaluate.py "
        f"--detector-ckpt checkpoints/detector/best.pt "
        f"--config configs/detector_config_server.yaml "
        f"--test-data data/processed/CompanionRisk-Bench/test.jsonl "
        f"--source-filter human "
        f"--output experiments/eval_human_only.json "
        f"2>&1"
    )
    out_human, err_human, code_human = ssh_run(client, cmd_human, timeout=600, print_live=True)
    print(f"\n[exit code: {code_human}]")
    if code_human != 0 and err_human.strip():
        print("STDERR:", err_human[-1000:])

    # ── Fetch result JSONs ───────────────────────────────────────
    print("\n--- Fetching result JSON files ---")
    sftp = client.open_sftp()
    results = {}
    for tag, remote_path, local_path in [
        ("all",   f"{PROJ}/experiments/eval_all.json",        "eval_all.json"),
        ("human", f"{PROJ}/experiments/eval_human_only.json", "eval_human_only.json"),
    ]:
        try:
            sftp.get(remote_path, local_path)
            with open(local_path) as f:
                results[tag] = json.load(f)
            print(f"  Fetched {tag}: {local_path}")
        except Exception as e:
            print(f"  [WARN] Could not fetch {tag} results: {e}")
    sftp.close()

    # ── Print summary table ──────────────────────────────────────
    print("\n" + "=" * 60)
    print("RESULTS SUMMARY")
    print("=" * 60)
    for tag, data in results.items():
        ours = data.get("ours_detection", {})
        bf1   = ours.get("binary_f1", float("nan"))
        lvlf1 = ours.get("level_macro_f1", float("nan"))
        finef1 = ours.get("fine_macro_f1", float("nan"))
        recall = ours.get("high_risk_recall", float("nan"))
        fnr    = ours.get("false_negative_rate", float("nan"))
        n_filt = data.get("meta", {}).get("n_filtered", "?")
        print(f"\n  source_filter={tag}  (n={n_filt})")
        print(f"    binary_f1        = {bf1:.4f}")
        print(f"    level_macro_f1   = {lvlf1:.4f}")
        print(f"    fine_macro_f1    = {finef1:.4f}")
        print(f"    high_risk_recall = {recall:.4f}")
        print(f"    false_neg_rate   = {fnr:.4f}")

    client.close()
    print("\n=== Phase 7 done ===")


if __name__ == "__main__":
    main()