from __future__ import annotations

import csv
import json
import re
from collections import Counter
from datetime import datetime, timezone
from pathlib import Path
from typing import Any


ROOT = Path(__file__).resolve().parents[1]
ASSEMBLY_CATALOG = ROOT / "preview_exports" / "assembly" / "roof_mount_platform_engine_assembly.json"
ALL_PARTS_CATALOG = ROOT / "preview_exports" / "all_parts" / "parts-catalog.json"
SOURCE_STEP_DIR = ROOT / "STEP"
GENERATED_STEP_DIR = ROOT / "preview_exports" / "step_library"
EXPORT_DIR = ROOT / "preview_exports" / "exports"
MANIFEST_PATH = EXPORT_DIR / "step_library_manifest.json"
CSV_PATH = EXPORT_DIR / "step_library_manifest.csv"


def read_json(path: Path) -> Any:
    return json.loads(path.read_text(encoding="utf-8-sig"))


def write_json(path: Path, value: Any) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(json.dumps(value, indent=2), encoding="utf-8")


def safe_id(rel_path: str) -> str:
    value = re.sub(r"\.[^.\\/]+$", "", rel_path)
    value = re.sub(r"[\\/]+", "__", value)
    value = re.sub(r"[^A-Za-z0-9_.-]+", "_", value)
    return value.strip("_")


def normalized_stem(value: str) -> str:
    value = Path(value).stem.upper()
    value = re.sub(r"(_\d{8}_\d+|_\d{6,}|_COPY|COPY|BACKUP)", "", value)
    value = re.sub(r"[^A-Z0-9]+", "_", value).strip("_")
    value = re.sub(r"_(IPT|IAM|STP|STEP)$", "", value)
    return value


def stem_tokens(value: str) -> set[str]:
    return {token for token in normalized_stem(value).split("_") if token and token not in {"PARTS", "PART"}}


def collect_catalog_parts() -> dict[str, dict[str, Any]]:
    sources: dict[str, dict[str, Any]] = {}
    for catalog_path, scope in ((ASSEMBLY_CATALOG, "activeAssembly"), (ALL_PARTS_CATALOG, "allParts")):
        if not catalog_path.exists():
            continue
        catalog = read_json(catalog_path)
        parts = catalog.get("parts", {})
        if isinstance(parts, list):
            iterable = parts
        else:
            iterable = parts.values()
        for part in iterable:
            rel_path = part.get("sourcePath") or part.get("path")
            if not rel_path or not str(rel_path).lower().endswith(".ipt"):
                continue
            key = str(rel_path).replace("/", "\\")
            item = sources.setdefault(
                key,
                {
                    "sourcePath": key,
                    "partName": Path(key).name,
                    "partId": part.get("id") or safe_id(key),
                    "scopes": set(),
                    "occurrenceCount": 0,
                    "meshJson": part.get("meshJson", ""),
                },
            )
            item["scopes"].add(scope)
            item["occurrenceCount"] += int(part.get("count", 0) or 0)
            if not item.get("meshJson") and part.get("meshJson"):
                item["meshJson"] = part["meshJson"]
    return sources


def collect_step_files() -> list[Path]:
    files = []
    for folder in (SOURCE_STEP_DIR, GENERATED_STEP_DIR):
        if folder.exists():
            files.extend(sorted(folder.rglob("*.stp")))
            files.extend(sorted(folder.rglob("*.step")))
    return sorted(set(files))


def choose_step(source_path: str, step_files: list[Path]) -> tuple[Path | None, str, float]:
    source_stem = Path(source_path).stem
    exact = normalized_stem(source_stem)
    by_exact = {normalized_stem(path.stem): path for path in step_files}
    if exact in by_exact:
        return by_exact[exact], "exact", 1.0

    source_tokens = stem_tokens(source_stem)
    best: tuple[Path | None, str, float] = (None, "", 0.0)
    for path in step_files:
        tokens = stem_tokens(path.stem)
        if not source_tokens or not tokens:
            continue
        overlap = len(source_tokens & tokens)
        score = overlap / max(len(source_tokens), len(tokens))
        if exact in normalized_stem(path.stem) or normalized_stem(path.stem) in exact:
            score += 0.35
        if score > best[2]:
            best = (path, "fuzzy", min(score, 0.99))
    if best[2] >= 0.34:
        return best
    return None, "", 0.0


def build_manifest() -> dict[str, Any]:
    EXPORT_DIR.mkdir(parents=True, exist_ok=True)
    parts = collect_catalog_parts()
    step_files = collect_step_files()
    all_ipt_paths = sorted(path for path in ROOT.rglob("*.ipt") if "OldVersions" not in path.parts)
    known_source_paths = set(parts)
    for ipt in all_ipt_paths:
        rel = str(ipt.relative_to(ROOT)).replace("/", "\\")
        parts.setdefault(
            rel,
            {
                "sourcePath": rel,
                "partName": ipt.name,
                "partId": safe_id(rel),
                "scopes": {"workspaceIpt"},
                "occurrenceCount": 0,
                "meshJson": "",
            },
        )

    rows = []
    for source_path, part in sorted(parts.items()):
        candidate, match_kind, score = choose_step(source_path, step_files)
        generated_target = GENERATED_STEP_DIR / (safe_id(source_path) + ".stp")
        source_abs = ROOT / source_path
        status = "matched" if candidate else "missing"
        if generated_target.exists():
            candidate = generated_target
            match_kind = "generated"
            score = 1.0
            status = "generated"
        rows.append(
            {
                "sourcePath": source_path,
                "sourceExists": source_abs.exists(),
                "partName": part["partName"],
                "partId": part["partId"],
                "scopes": sorted(part["scopes"]),
                "occurrenceCount": part["occurrenceCount"],
                "stepPath": str(candidate.relative_to(ROOT)).replace("\\", "/") if candidate else "",
                "matchKind": match_kind,
                "matchScore": round(score, 3),
                "status": status,
                "targetStepPath": str(generated_target.relative_to(ROOT)).replace("\\", "/"),
            }
        )

    counts = Counter(row["status"] for row in rows)
    match_counts = Counter(row["matchKind"] or "none" for row in rows)
    manifest = {
        "ok": True,
        "generatedAt": datetime.now(timezone.utc).isoformat(),
        "sourceStepFolder": str(SOURCE_STEP_DIR.relative_to(ROOT)).replace("\\", "/"),
        "generatedStepFolder": str(GENERATED_STEP_DIR.relative_to(ROOT)).replace("\\", "/"),
        "sourceStepFileCount": len([path for path in SOURCE_STEP_DIR.rglob("*.stp")]) if SOURCE_STEP_DIR.exists() else 0,
        "generatedStepFileCount": len([path for path in GENERATED_STEP_DIR.rglob("*.stp")]) if GENERATED_STEP_DIR.exists() else 0,
        "catalogPartCount": len(known_source_paths),
        "workspaceIptCount": len(all_ipt_paths),
        "rowCount": len(rows),
        "statusCounts": dict(counts),
        "matchKindCounts": dict(match_counts),
        "csv": str(CSV_PATH.relative_to(ROOT)).replace("\\", "/"),
        "rows": rows,
    }
    write_json(MANIFEST_PATH, manifest)

    with CSV_PATH.open("w", newline="", encoding="utf-8") as handle:
        fieldnames = [
            "sourcePath",
            "sourceExists",
            "partName",
            "partId",
            "scopes",
            "occurrenceCount",
            "stepPath",
            "matchKind",
            "matchScore",
            "status",
            "targetStepPath",
        ]
        writer = csv.DictWriter(handle, fieldnames=fieldnames)
        writer.writeheader()
        for row in rows:
            csv_row = dict(row)
            csv_row["scopes"] = ";".join(row["scopes"])
            writer.writerow(csv_row)
    return manifest


if __name__ == "__main__":
    result = build_manifest()
    print(json.dumps({key: result[key] for key in result if key != "rows"}, indent=2))