Files
Fulfilled-Knowledge/Understand-Anything-main/understand-anything-plugin/packages/dashboard/scripts/generate-wishfulfilled-flow-graph.py
2026-05-27 15:40:32 +08:00

268 lines
9.2 KiB
Python

from __future__ import annotations
import json
from datetime import datetime, timezone
from pathlib import Path
ROOT = Path(r"D:\AIcoding\WishFulfilled\知识库\under-anything\wishfulfilled-wiki")
DASHBOARD_PUBLIC = Path(r"D:\AIcoding\WishFulfilled\知识库\under-anything\Understand-Anything-main\understand-anything-plugin\packages\dashboard\public")
UA_DIR = ROOT / ".understand-anything"
SECTIONS = [
{
"id": "layer-overview",
"name": "知识库入口",
"description": "知识库使用说明、首页、知识地图和问答入口。先从这里理解知识库结构与检索方式。",
"paths": ["知识库使用说明.md", "欢迎.md", "00_首页"],
},
{
"id": "layer-requirements",
"name": "需求文档",
"description": "所有正式需求、业务规则、需求变更和需求索引。点击本层可查看全部需求文档并检索。",
"paths": ["05_需求文档"],
},
{
"id": "layer-milestones",
"name": "里程碑",
"description": "项目阶段计划、里程碑节点、评审记录、准入准出和交付物节奏。",
"paths": ["06_里程碑", "02_项目管理流程"],
},
{
"id": "layer-technical",
"name": "技术文档",
"description": "系统架构、数据模型、接口说明、技术方案和技术决策。",
"paths": ["07_技术文档"],
},
{
"id": "layer-testing",
"name": "测试相关",
"description": "测试计划、测试用例、缺陷记录、验收记录和上线检查。",
"paths": ["08_测试相关"],
},
{
"id": "layer-agent",
"name": "Agent检索",
"description": "检索说明、关键词、同义词、来源索引和持续更新验证流程。",
"paths": ["04_Agent检索"],
},
]
EXCLUDE_DIRS = {".git", ".obsidian", ".understand-anything", "raw", "99_归档"}
def rel(path: Path) -> str:
return path.relative_to(ROOT).as_posix()
def iter_markdown() -> list[Path]:
files: list[Path] = []
for p in ROOT.rglob("*.md"):
parts = set(p.relative_to(ROOT).parts)
if parts & EXCLUDE_DIRS:
continue
files.append(p)
return sorted(files, key=lambda x: rel(x))
def read_text(path: Path) -> str:
return path.read_text(encoding="utf-8", errors="ignore")
def title_from_content(path: Path, content: str) -> str:
for line in content.splitlines():
line = line.strip()
if line.startswith("# "):
return line[2:].strip()
return path.stem
def summary_from_content(content: str) -> str:
in_frontmatter = False
started = False
for raw in content.splitlines():
line = raw.strip()
if line == "---" and not started:
in_frontmatter = True
started = True
continue
if line == "---" and in_frontmatter:
in_frontmatter = False
continue
if in_frontmatter or not line or line.startswith("#") or line.startswith("---"):
continue
if line.startswith("|") or line.startswith("```"):
continue
return line[:180]
return "知识库文档。"
def tags_for(path: Path) -> list[str]:
parts = path.relative_to(ROOT).parts
tags = [parts[0]] if parts else []
name = path.stem
if "需求" in name or "05_需求文档" in parts:
tags.append("需求文档")
if "测试" in name or "08_测试相关" in parts:
tags.append("测试相关")
if "技术" in name or "07_技术文档" in parts:
tags.append("技术文档")
if "里程碑" in name or "06_里程碑" in parts:
tags.append("里程碑")
if "Agent" in name or "04_Agent检索" in parts:
tags.append("Agent检索")
return list(dict.fromkeys(tags))
def layer_for(path: Path) -> str | None:
rp = rel(path)
for section in SECTIONS:
for prefix in section["paths"]:
prefix_norm = prefix.replace("\\", "/")
if rp == prefix_norm or rp.startswith(prefix_norm.rstrip("/") + "/"):
return section["id"]
return None
def edge(source: str, target: str, type_: str = "documents", weight: float = 0.8, description: str = "") -> dict:
allowed = {
"documents": "documents",
"related": "related",
"depends_on": "depends_on",
}
mapped = allowed.get(type_, "related")
return {
"source": source,
"target": target,
"type": mapped,
"direction": "forward",
"description": description or mapped,
"weight": weight,
}
def main() -> None:
nodes = []
layer_node_ids: dict[str, list[str]] = {s["id"]: [] for s in SECTIONS}
path_to_id: dict[str, str] = {}
for path in iter_markdown():
lid = layer_for(path)
if not lid:
continue
content = read_text(path)
rp = rel(path)
node_id = "doc:" + rp[:-3]
path_to_id[rp] = node_id
node = {
"id": node_id,
"type": "document",
"name": title_from_content(path, content),
"filePath": rp,
"summary": summary_from_content(content),
"tags": tags_for(path),
"complexity": "moderate" if len(content) > 4000 else "simple",
"knowledgeMeta": {
"content": content,
"wikilinks": [],
"category": lid,
},
}
nodes.append(node)
layer_node_ids[lid].append(node_id)
# Add one virtual process node per layer so the overview forms a clear flow even when a layer has many docs.
flow_nodes = []
for order, section in enumerate(SECTIONS, start=1):
node_id = f"flow:{section['id']}"
flow_nodes.append(node_id)
docs = layer_node_ids[section["id"]]
node = {
"id": node_id,
"type": "document",
"name": f"{order}. {section['name']}",
"summary": section["description"],
"tags": ["流程入口", section["name"]],
"complexity": "simple",
"knowledgeMeta": {
"content": f"# {section['name']}\n\n{section['description']}\n\n本层包含 {len(docs)} 个文档。点击右侧 Files 或在本层详情中选择具体文档查看内容。",
"wikilinks": [],
"category": section["id"],
},
}
nodes.append(node)
layer_node_ids[section["id"]].insert(0, node_id)
edges = []
for a, b in zip(flow_nodes, flow_nodes[1:]):
edges.append(edge(a, b, "documents", 1.0, "知识库主流程"))
for section in SECTIONS:
root_id = f"flow:{section['id']}"
for doc_id in layer_node_ids[section["id"]][1:]:
edges.append(edge(root_id, doc_id, "documents", 0.65, "本层文档"))
# Important requirement docs should build on their upstream links when those linked files exist in this knowledge base.
for node in nodes:
if not node.get("filePath"):
continue
content = node.get("knowledgeMeta", {}).get("content", "")
for rp, target_id in path_to_id.items():
if rp != node["filePath"] and Path(rp).name in content:
edges.append(edge(target_id, node["id"], "depends_on", 0.7, "文档引用关系"))
layers = [
{
"id": section["id"],
"name": section["name"],
"description": section["description"],
"nodeIds": layer_node_ids[section["id"]],
}
for section in SECTIONS
]
graph = {
"version": "1.0.0",
"kind": "codebase",
"project": {
"name": "如愿知识库",
"languages": ["markdown"],
"frameworks": ["Understand-Anything", "Obsidian"],
"description": "按需求文档、里程碑、技术文档、测试相关、Agent检索组织的流程式知识库。",
"analyzedAt": datetime.now(timezone.utc).isoformat(),
"gitCommitHash": "",
},
"nodes": nodes,
"edges": edges,
"layers": layers,
"tour": [
{
"order": i,
"title": section["name"],
"description": section["description"],
"nodeIds": [f"flow:{section['id']}"],
}
for i, section in enumerate(SECTIONS, start=1)
],
}
UA_DIR.mkdir(parents=True, exist_ok=True)
DASHBOARD_PUBLIC.mkdir(parents=True, exist_ok=True)
for target in [UA_DIR / "knowledge-graph.json", DASHBOARD_PUBLIC / "knowledge-graph.json"]:
target.write_text(json.dumps(graph, ensure_ascii=False, indent=2), encoding="utf-8")
meta = {
"lastAnalyzedAt": graph["project"]["analyzedAt"],
"gitCommitHash": "",
"version": "1.0.0",
"analyzedFiles": len([n for n in nodes if n.get("filePath")]),
"theme": {"presetId": "dark", "accentId": "cyan"},
}
for target in [UA_DIR / "meta.json", DASHBOARD_PUBLIC / "meta.json"]:
target.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")
print(f"Generated {len(nodes)} nodes, {len(edges)} edges, {len(layers)} layers")
if __name__ == "__main__":
main()