268 lines
9.2 KiB
Python
268 lines
9.2 KiB
Python
from __future__ import annotations
|
|
|
|
import json
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
ROOT = Path(r"D:\AIcoding\WishFulfilled\知识库\under-anything\wishfulfilled-wiki")
|
|
DASHBOARD_PUBLIC = Path(r"D:\AIcoding\WishFulfilled\知识库\under-anything\Understand-Anything-main\understand-anything-plugin\packages\dashboard\public")
|
|
UA_DIR = ROOT / ".understand-anything"
|
|
|
|
SECTIONS = [
|
|
{
|
|
"id": "layer-overview",
|
|
"name": "知识库入口",
|
|
"description": "知识库使用说明、首页、知识地图和问答入口。先从这里理解知识库结构与检索方式。",
|
|
"paths": ["知识库使用说明.md", "欢迎.md", "00_首页"],
|
|
},
|
|
{
|
|
"id": "layer-requirements",
|
|
"name": "需求文档",
|
|
"description": "所有正式需求、业务规则、需求变更和需求索引。点击本层可查看全部需求文档并检索。",
|
|
"paths": ["05_需求文档"],
|
|
},
|
|
{
|
|
"id": "layer-milestones",
|
|
"name": "里程碑",
|
|
"description": "项目阶段计划、里程碑节点、评审记录、准入准出和交付物节奏。",
|
|
"paths": ["06_里程碑", "02_项目管理流程"],
|
|
},
|
|
{
|
|
"id": "layer-technical",
|
|
"name": "技术文档",
|
|
"description": "系统架构、数据模型、接口说明、技术方案和技术决策。",
|
|
"paths": ["07_技术文档"],
|
|
},
|
|
{
|
|
"id": "layer-testing",
|
|
"name": "测试相关",
|
|
"description": "测试计划、测试用例、缺陷记录、验收记录和上线检查。",
|
|
"paths": ["08_测试相关"],
|
|
},
|
|
{
|
|
"id": "layer-agent",
|
|
"name": "Agent检索",
|
|
"description": "检索说明、关键词、同义词、来源索引和持续更新验证流程。",
|
|
"paths": ["04_Agent检索"],
|
|
},
|
|
]
|
|
|
|
EXCLUDE_DIRS = {".git", ".obsidian", ".understand-anything", "raw", "99_归档"}
|
|
|
|
|
|
def rel(path: Path) -> str:
|
|
return path.relative_to(ROOT).as_posix()
|
|
|
|
|
|
def iter_markdown() -> list[Path]:
|
|
files: list[Path] = []
|
|
for p in ROOT.rglob("*.md"):
|
|
parts = set(p.relative_to(ROOT).parts)
|
|
if parts & EXCLUDE_DIRS:
|
|
continue
|
|
files.append(p)
|
|
return sorted(files, key=lambda x: rel(x))
|
|
|
|
|
|
def read_text(path: Path) -> str:
|
|
return path.read_text(encoding="utf-8", errors="ignore")
|
|
|
|
|
|
def title_from_content(path: Path, content: str) -> str:
|
|
for line in content.splitlines():
|
|
line = line.strip()
|
|
if line.startswith("# "):
|
|
return line[2:].strip()
|
|
return path.stem
|
|
|
|
|
|
def summary_from_content(content: str) -> str:
|
|
in_frontmatter = False
|
|
started = False
|
|
for raw in content.splitlines():
|
|
line = raw.strip()
|
|
if line == "---" and not started:
|
|
in_frontmatter = True
|
|
started = True
|
|
continue
|
|
if line == "---" and in_frontmatter:
|
|
in_frontmatter = False
|
|
continue
|
|
if in_frontmatter or not line or line.startswith("#") or line.startswith("---"):
|
|
continue
|
|
if line.startswith("|") or line.startswith("```"):
|
|
continue
|
|
return line[:180]
|
|
return "知识库文档。"
|
|
|
|
|
|
def tags_for(path: Path) -> list[str]:
|
|
parts = path.relative_to(ROOT).parts
|
|
tags = [parts[0]] if parts else []
|
|
name = path.stem
|
|
if "需求" in name or "05_需求文档" in parts:
|
|
tags.append("需求文档")
|
|
if "测试" in name or "08_测试相关" in parts:
|
|
tags.append("测试相关")
|
|
if "技术" in name or "07_技术文档" in parts:
|
|
tags.append("技术文档")
|
|
if "里程碑" in name or "06_里程碑" in parts:
|
|
tags.append("里程碑")
|
|
if "Agent" in name or "04_Agent检索" in parts:
|
|
tags.append("Agent检索")
|
|
return list(dict.fromkeys(tags))
|
|
|
|
|
|
def layer_for(path: Path) -> str | None:
|
|
rp = rel(path)
|
|
for section in SECTIONS:
|
|
for prefix in section["paths"]:
|
|
prefix_norm = prefix.replace("\\", "/")
|
|
if rp == prefix_norm or rp.startswith(prefix_norm.rstrip("/") + "/"):
|
|
return section["id"]
|
|
return None
|
|
|
|
|
|
def edge(source: str, target: str, type_: str = "documents", weight: float = 0.8, description: str = "") -> dict:
|
|
allowed = {
|
|
"documents": "documents",
|
|
"related": "related",
|
|
"depends_on": "depends_on",
|
|
}
|
|
mapped = allowed.get(type_, "related")
|
|
return {
|
|
"source": source,
|
|
"target": target,
|
|
"type": mapped,
|
|
"direction": "forward",
|
|
"description": description or mapped,
|
|
"weight": weight,
|
|
}
|
|
|
|
|
|
def main() -> None:
|
|
nodes = []
|
|
layer_node_ids: dict[str, list[str]] = {s["id"]: [] for s in SECTIONS}
|
|
path_to_id: dict[str, str] = {}
|
|
|
|
for path in iter_markdown():
|
|
lid = layer_for(path)
|
|
if not lid:
|
|
continue
|
|
content = read_text(path)
|
|
rp = rel(path)
|
|
node_id = "doc:" + rp[:-3]
|
|
path_to_id[rp] = node_id
|
|
node = {
|
|
"id": node_id,
|
|
"type": "document",
|
|
"name": title_from_content(path, content),
|
|
"filePath": rp,
|
|
"summary": summary_from_content(content),
|
|
"tags": tags_for(path),
|
|
"complexity": "moderate" if len(content) > 4000 else "simple",
|
|
"knowledgeMeta": {
|
|
"content": content,
|
|
"wikilinks": [],
|
|
"category": lid,
|
|
},
|
|
}
|
|
nodes.append(node)
|
|
layer_node_ids[lid].append(node_id)
|
|
|
|
# Add one virtual process node per layer so the overview forms a clear flow even when a layer has many docs.
|
|
flow_nodes = []
|
|
for order, section in enumerate(SECTIONS, start=1):
|
|
node_id = f"flow:{section['id']}"
|
|
flow_nodes.append(node_id)
|
|
docs = layer_node_ids[section["id"]]
|
|
node = {
|
|
"id": node_id,
|
|
"type": "document",
|
|
"name": f"{order}. {section['name']}",
|
|
"summary": section["description"],
|
|
"tags": ["流程入口", section["name"]],
|
|
"complexity": "simple",
|
|
"knowledgeMeta": {
|
|
"content": f"# {section['name']}\n\n{section['description']}\n\n本层包含 {len(docs)} 个文档。点击右侧 Files 或在本层详情中选择具体文档查看内容。",
|
|
"wikilinks": [],
|
|
"category": section["id"],
|
|
},
|
|
}
|
|
nodes.append(node)
|
|
layer_node_ids[section["id"]].insert(0, node_id)
|
|
|
|
edges = []
|
|
for a, b in zip(flow_nodes, flow_nodes[1:]):
|
|
edges.append(edge(a, b, "documents", 1.0, "知识库主流程"))
|
|
|
|
for section in SECTIONS:
|
|
root_id = f"flow:{section['id']}"
|
|
for doc_id in layer_node_ids[section["id"]][1:]:
|
|
edges.append(edge(root_id, doc_id, "documents", 0.65, "本层文档"))
|
|
|
|
# Important requirement docs should build on their upstream links when those linked files exist in this knowledge base.
|
|
for node in nodes:
|
|
if not node.get("filePath"):
|
|
continue
|
|
content = node.get("knowledgeMeta", {}).get("content", "")
|
|
for rp, target_id in path_to_id.items():
|
|
if rp != node["filePath"] and Path(rp).name in content:
|
|
edges.append(edge(target_id, node["id"], "depends_on", 0.7, "文档引用关系"))
|
|
|
|
layers = [
|
|
{
|
|
"id": section["id"],
|
|
"name": section["name"],
|
|
"description": section["description"],
|
|
"nodeIds": layer_node_ids[section["id"]],
|
|
}
|
|
for section in SECTIONS
|
|
]
|
|
|
|
graph = {
|
|
"version": "1.0.0",
|
|
"kind": "codebase",
|
|
"project": {
|
|
"name": "如愿知识库",
|
|
"languages": ["markdown"],
|
|
"frameworks": ["Understand-Anything", "Obsidian"],
|
|
"description": "按需求文档、里程碑、技术文档、测试相关、Agent检索组织的流程式知识库。",
|
|
"analyzedAt": datetime.now(timezone.utc).isoformat(),
|
|
"gitCommitHash": "",
|
|
},
|
|
"nodes": nodes,
|
|
"edges": edges,
|
|
"layers": layers,
|
|
"tour": [
|
|
{
|
|
"order": i,
|
|
"title": section["name"],
|
|
"description": section["description"],
|
|
"nodeIds": [f"flow:{section['id']}"],
|
|
}
|
|
for i, section in enumerate(SECTIONS, start=1)
|
|
],
|
|
}
|
|
|
|
UA_DIR.mkdir(parents=True, exist_ok=True)
|
|
DASHBOARD_PUBLIC.mkdir(parents=True, exist_ok=True)
|
|
for target in [UA_DIR / "knowledge-graph.json", DASHBOARD_PUBLIC / "knowledge-graph.json"]:
|
|
target.write_text(json.dumps(graph, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
|
|
meta = {
|
|
"lastAnalyzedAt": graph["project"]["analyzedAt"],
|
|
"gitCommitHash": "",
|
|
"version": "1.0.0",
|
|
"analyzedFiles": len([n for n in nodes if n.get("filePath")]),
|
|
"theme": {"presetId": "dark", "accentId": "cyan"},
|
|
}
|
|
for target in [UA_DIR / "meta.json", DASHBOARD_PUBLIC / "meta.json"]:
|
|
target.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
|
|
print(f"Generated {len(nodes)} nodes, {len(edges)} edges, {len(layers)} layers")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|