from __future__ import annotations import json from datetime import datetime, timezone from pathlib import Path ROOT = Path(r"D:\AIcoding\WishFulfilled\知识库\under-anything\wishfulfilled-wiki") DASHBOARD_PUBLIC = Path(r"D:\AIcoding\WishFulfilled\知识库\under-anything\Understand-Anything-main\understand-anything-plugin\packages\dashboard\public") UA_DIR = ROOT / ".understand-anything" SECTIONS = [ { "id": "layer-overview", "name": "知识库入口", "description": "知识库使用说明、首页、知识地图和问答入口。先从这里理解知识库结构与检索方式。", "paths": ["知识库使用说明.md", "欢迎.md", "00_首页"], }, { "id": "layer-requirements", "name": "需求文档", "description": "所有正式需求、业务规则、需求变更和需求索引。点击本层可查看全部需求文档并检索。", "paths": ["05_需求文档"], }, { "id": "layer-milestones", "name": "里程碑", "description": "项目阶段计划、里程碑节点、评审记录、准入准出和交付物节奏。", "paths": ["06_里程碑", "02_项目管理流程"], }, { "id": "layer-technical", "name": "技术文档", "description": "系统架构、数据模型、接口说明、技术方案和技术决策。", "paths": ["07_技术文档"], }, { "id": "layer-testing", "name": "测试相关", "description": "测试计划、测试用例、缺陷记录、验收记录和上线检查。", "paths": ["08_测试相关"], }, { "id": "layer-agent", "name": "Agent检索", "description": "检索说明、关键词、同义词、来源索引和持续更新验证流程。", "paths": ["04_Agent检索"], }, ] EXCLUDE_DIRS = {".git", ".obsidian", ".understand-anything", "raw", "99_归档"} def rel(path: Path) -> str: return path.relative_to(ROOT).as_posix() def iter_markdown() -> list[Path]: files: list[Path] = [] for p in ROOT.rglob("*.md"): parts = set(p.relative_to(ROOT).parts) if parts & EXCLUDE_DIRS: continue files.append(p) return sorted(files, key=lambda x: rel(x)) def read_text(path: Path) -> str: return path.read_text(encoding="utf-8", errors="ignore") def title_from_content(path: Path, content: str) -> str: for line in content.splitlines(): line = line.strip() if line.startswith("# "): return line[2:].strip() return path.stem def summary_from_content(content: str) -> str: in_frontmatter = False started = False for raw in content.splitlines(): line = raw.strip() if line == "---" and not started: in_frontmatter = True started = True continue if line == "---" and in_frontmatter: in_frontmatter = False continue if in_frontmatter or not line or line.startswith("#") or line.startswith("---"): continue if line.startswith("|") or line.startswith("```"): continue return line[:180] return "知识库文档。" def tags_for(path: Path) -> list[str]: parts = path.relative_to(ROOT).parts tags = [parts[0]] if parts else [] name = path.stem if "需求" in name or "05_需求文档" in parts: tags.append("需求文档") if "测试" in name or "08_测试相关" in parts: tags.append("测试相关") if "技术" in name or "07_技术文档" in parts: tags.append("技术文档") if "里程碑" in name or "06_里程碑" in parts: tags.append("里程碑") if "Agent" in name or "04_Agent检索" in parts: tags.append("Agent检索") return list(dict.fromkeys(tags)) def layer_for(path: Path) -> str | None: rp = rel(path) for section in SECTIONS: for prefix in section["paths"]: prefix_norm = prefix.replace("\\", "/") if rp == prefix_norm or rp.startswith(prefix_norm.rstrip("/") + "/"): return section["id"] return None def edge(source: str, target: str, type_: str = "documents", weight: float = 0.8, description: str = "") -> dict: allowed = { "documents": "documents", "related": "related", "depends_on": "depends_on", } mapped = allowed.get(type_, "related") return { "source": source, "target": target, "type": mapped, "direction": "forward", "description": description or mapped, "weight": weight, } def main() -> None: nodes = [] layer_node_ids: dict[str, list[str]] = {s["id"]: [] for s in SECTIONS} path_to_id: dict[str, str] = {} for path in iter_markdown(): lid = layer_for(path) if not lid: continue content = read_text(path) rp = rel(path) node_id = "doc:" + rp[:-3] path_to_id[rp] = node_id node = { "id": node_id, "type": "document", "name": title_from_content(path, content), "filePath": rp, "summary": summary_from_content(content), "tags": tags_for(path), "complexity": "moderate" if len(content) > 4000 else "simple", "knowledgeMeta": { "content": content, "wikilinks": [], "category": lid, }, } nodes.append(node) layer_node_ids[lid].append(node_id) # Add one virtual process node per layer so the overview forms a clear flow even when a layer has many docs. flow_nodes = [] for order, section in enumerate(SECTIONS, start=1): node_id = f"flow:{section['id']}" flow_nodes.append(node_id) docs = layer_node_ids[section["id"]] node = { "id": node_id, "type": "document", "name": f"{order}. {section['name']}", "summary": section["description"], "tags": ["流程入口", section["name"]], "complexity": "simple", "knowledgeMeta": { "content": f"# {section['name']}\n\n{section['description']}\n\n本层包含 {len(docs)} 个文档。点击右侧 Files 或在本层详情中选择具体文档查看内容。", "wikilinks": [], "category": section["id"], }, } nodes.append(node) layer_node_ids[section["id"]].insert(0, node_id) edges = [] for a, b in zip(flow_nodes, flow_nodes[1:]): edges.append(edge(a, b, "documents", 1.0, "知识库主流程")) for section in SECTIONS: root_id = f"flow:{section['id']}" for doc_id in layer_node_ids[section["id"]][1:]: edges.append(edge(root_id, doc_id, "documents", 0.65, "本层文档")) # Important requirement docs should build on their upstream links when those linked files exist in this knowledge base. for node in nodes: if not node.get("filePath"): continue content = node.get("knowledgeMeta", {}).get("content", "") for rp, target_id in path_to_id.items(): if rp != node["filePath"] and Path(rp).name in content: edges.append(edge(target_id, node["id"], "depends_on", 0.7, "文档引用关系")) layers = [ { "id": section["id"], "name": section["name"], "description": section["description"], "nodeIds": layer_node_ids[section["id"]], } for section in SECTIONS ] graph = { "version": "1.0.0", "kind": "codebase", "project": { "name": "如愿知识库", "languages": ["markdown"], "frameworks": ["Understand-Anything", "Obsidian"], "description": "按需求文档、里程碑、技术文档、测试相关、Agent检索组织的流程式知识库。", "analyzedAt": datetime.now(timezone.utc).isoformat(), "gitCommitHash": "", }, "nodes": nodes, "edges": edges, "layers": layers, "tour": [ { "order": i, "title": section["name"], "description": section["description"], "nodeIds": [f"flow:{section['id']}"], } for i, section in enumerate(SECTIONS, start=1) ], } UA_DIR.mkdir(parents=True, exist_ok=True) DASHBOARD_PUBLIC.mkdir(parents=True, exist_ok=True) for target in [UA_DIR / "knowledge-graph.json", DASHBOARD_PUBLIC / "knowledge-graph.json"]: target.write_text(json.dumps(graph, ensure_ascii=False, indent=2), encoding="utf-8") meta = { "lastAnalyzedAt": graph["project"]["analyzedAt"], "gitCommitHash": "", "version": "1.0.0", "analyzedFiles": len([n for n in nodes if n.get("filePath")]), "theme": {"presetId": "dark", "accentId": "cyan"}, } for target in [UA_DIR / "meta.json", DASHBOARD_PUBLIC / "meta.json"]: target.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8") print(f"Generated {len(nodes)} nodes, {len(edges)} edges, {len(layers)} layers") if __name__ == "__main__": main()