from __future__ import annotations

import json
from datetime import datetime, timezone
from pathlib import Path

ROOT = Path(r"D:\AIcoding\WishFulfilled\知识库\under-anything\wishfulfilled-wiki")
DASHBOARD_PUBLIC = Path(r"D:\AIcoding\WishFulfilled\知识库\under-anything\Understand-Anything-main\understand-anything-plugin\packages\dashboard\public")
UA_DIR = ROOT / ".understand-anything"

SECTIONS = [
    {
        "id": "layer-overview",
        "name": "知识库入口",
        "description": "知识库使用说明、首页、知识地图和问答入口。先从这里理解知识库结构与检索方式。",
        "paths": ["知识库使用说明.md", "欢迎.md", "00_首页"],
    },
    {
        "id": "layer-requirements",
        "name": "需求文档",
        "description": "所有正式需求、业务规则、需求变更和需求索引。点击本层可查看全部需求文档并检索。",
        "paths": ["05_需求文档"],
    },
    {
        "id": "layer-milestones",
        "name": "里程碑",
        "description": "项目阶段计划、里程碑节点、评审记录、准入准出和交付物节奏。",
        "paths": ["06_里程碑", "02_项目管理流程"],
    },
    {
        "id": "layer-technical",
        "name": "技术文档",
        "description": "系统架构、数据模型、接口说明、技术方案和技术决策。",
        "paths": ["07_技术文档"],
    },
    {
        "id": "layer-testing",
        "name": "测试相关",
        "description": "测试计划、测试用例、缺陷记录、验收记录和上线检查。",
        "paths": ["08_测试相关"],
    },
    {
        "id": "layer-agent",
        "name": "Agent检索",
        "description": "检索说明、关键词、同义词、来源索引和持续更新验证流程。",
        "paths": ["04_Agent检索"],
    },
]

EXCLUDE_DIRS = {".git", ".obsidian", ".understand-anything", "raw", "99_归档"}


def rel(path: Path) -> str:
    return path.relative_to(ROOT).as_posix()


def iter_markdown() -> list[Path]:
    files: list[Path] = []
    for p in ROOT.rglob("*.md"):
        parts = set(p.relative_to(ROOT).parts)
        if parts & EXCLUDE_DIRS:
            continue
        files.append(p)
    return sorted(files, key=lambda x: rel(x))


def read_text(path: Path) -> str:
    return path.read_text(encoding="utf-8", errors="ignore")


def title_from_content(path: Path, content: str) -> str:
    for line in content.splitlines():
        line = line.strip()
        if line.startswith("# "):
            return line[2:].strip()
    return path.stem


def summary_from_content(content: str) -> str:
    in_frontmatter = False
    started = False
    for raw in content.splitlines():
        line = raw.strip()
        if line == "---" and not started:
            in_frontmatter = True
            started = True
            continue
        if line == "---" and in_frontmatter:
            in_frontmatter = False
            continue
        if in_frontmatter or not line or line.startswith("#") or line.startswith("---"):
            continue
        if line.startswith("|") or line.startswith("```"):
            continue
        return line[:180]
    return "知识库文档。"


def tags_for(path: Path) -> list[str]:
    parts = path.relative_to(ROOT).parts
    tags = [parts[0]] if parts else []
    name = path.stem
    if "需求" in name or "05_需求文档" in parts:
        tags.append("需求文档")
    if "测试" in name or "08_测试相关" in parts:
        tags.append("测试相关")
    if "技术" in name or "07_技术文档" in parts:
        tags.append("技术文档")
    if "里程碑" in name or "06_里程碑" in parts:
        tags.append("里程碑")
    if "Agent" in name or "04_Agent检索" in parts:
        tags.append("Agent检索")
    return list(dict.fromkeys(tags))


def layer_for(path: Path) -> str | None:
    rp = rel(path)
    for section in SECTIONS:
        for prefix in section["paths"]:
            prefix_norm = prefix.replace("\\", "/")
            if rp == prefix_norm or rp.startswith(prefix_norm.rstrip("/") + "/"):
                return section["id"]
    return None


def edge(source: str, target: str, type_: str = "documents", weight: float = 0.8, description: str = "") -> dict:
    allowed = {
        "documents": "documents",
        "related": "related",
        "depends_on": "depends_on",
    }
    mapped = allowed.get(type_, "related")
    return {
        "source": source,
        "target": target,
        "type": mapped,
        "direction": "forward",
        "description": description or mapped,
        "weight": weight,
    }


def main() -> None:
    nodes = []
    layer_node_ids: dict[str, list[str]] = {s["id"]: [] for s in SECTIONS}
    path_to_id: dict[str, str] = {}

    for path in iter_markdown():
        lid = layer_for(path)
        if not lid:
            continue
        content = read_text(path)
        rp = rel(path)
        node_id = "doc:" + rp[:-3]
        path_to_id[rp] = node_id
        node = {
            "id": node_id,
            "type": "document",
            "name": title_from_content(path, content),
            "filePath": rp,
            "summary": summary_from_content(content),
            "tags": tags_for(path),
            "complexity": "moderate" if len(content) > 4000 else "simple",
            "knowledgeMeta": {
                "content": content,
                "wikilinks": [],
                "category": lid,
            },
        }
        nodes.append(node)
        layer_node_ids[lid].append(node_id)

    # Add one virtual process node per layer so the overview forms a clear flow even when a layer has many docs.
    flow_nodes = []
    for order, section in enumerate(SECTIONS, start=1):
        node_id = f"flow:{section['id']}"
        flow_nodes.append(node_id)
        docs = layer_node_ids[section["id"]]
        node = {
            "id": node_id,
            "type": "document",
            "name": f"{order}. {section['name']}",
            "summary": section["description"],
            "tags": ["流程入口", section["name"]],
            "complexity": "simple",
            "knowledgeMeta": {
                "content": f"# {section['name']}\n\n{section['description']}\n\n本层包含 {len(docs)} 个文档。点击右侧 Files 或在本层详情中选择具体文档查看内容。",
                "wikilinks": [],
                "category": section["id"],
            },
        }
        nodes.append(node)
        layer_node_ids[section["id"]].insert(0, node_id)

    edges = []
    for a, b in zip(flow_nodes, flow_nodes[1:]):
        edges.append(edge(a, b, "documents", 1.0, "知识库主流程"))

    for section in SECTIONS:
        root_id = f"flow:{section['id']}"
        for doc_id in layer_node_ids[section["id"]][1:]:
            edges.append(edge(root_id, doc_id, "documents", 0.65, "本层文档"))

    # Important requirement docs should build on their upstream links when those linked files exist in this knowledge base.
    for node in nodes:
        if not node.get("filePath"):
            continue
        content = node.get("knowledgeMeta", {}).get("content", "")
        for rp, target_id in path_to_id.items():
            if rp != node["filePath"] and Path(rp).name in content:
                edges.append(edge(target_id, node["id"], "depends_on", 0.7, "文档引用关系"))

    layers = [
        {
            "id": section["id"],
            "name": section["name"],
            "description": section["description"],
            "nodeIds": layer_node_ids[section["id"]],
        }
        for section in SECTIONS
    ]

    graph = {
        "version": "1.0.0",
        "kind": "codebase",
        "project": {
            "name": "如愿知识库",
            "languages": ["markdown"],
            "frameworks": ["Understand-Anything", "Obsidian"],
            "description": "按需求文档、里程碑、技术文档、测试相关、Agent检索组织的流程式知识库。",
            "analyzedAt": datetime.now(timezone.utc).isoformat(),
            "gitCommitHash": "",
        },
        "nodes": nodes,
        "edges": edges,
        "layers": layers,
        "tour": [
            {
                "order": i,
                "title": section["name"],
                "description": section["description"],
                "nodeIds": [f"flow:{section['id']}"],
            }
            for i, section in enumerate(SECTIONS, start=1)
        ],
    }

    UA_DIR.mkdir(parents=True, exist_ok=True)
    DASHBOARD_PUBLIC.mkdir(parents=True, exist_ok=True)
    for target in [UA_DIR / "knowledge-graph.json", DASHBOARD_PUBLIC / "knowledge-graph.json"]:
        target.write_text(json.dumps(graph, ensure_ascii=False, indent=2), encoding="utf-8")

    meta = {
        "lastAnalyzedAt": graph["project"]["analyzedAt"],
        "gitCommitHash": "",
        "version": "1.0.0",
        "analyzedFiles": len([n for n in nodes if n.get("filePath")]),
        "theme": {"presetId": "dark", "accentId": "cyan"},
    }
    for target in [UA_DIR / "meta.json", DASHBOARD_PUBLIC / "meta.json"]:
        target.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")

    print(f"Generated {len(nodes)} nodes, {len(edges)} edges, {len(layers)} layers")


if __name__ == "__main__":
    main()