Add under-anything knowledge dashboard

This commit is contained in:
qiaoxinjiu
2026-05-27 15:40:32 +08:00
commit e31a75d2bb
565 changed files with 143063 additions and 0 deletions

View File

@@ -0,0 +1,99 @@
// Per-layer aggregation perf benchmark.
//
// Mirrors the BEFORE shape (graph.nodes.filter(n => layer.nodeIds.includes(n.id))
// per layer) and the AFTER shape (single nodesById Map + iterate layer.nodeIds)
// from `useOverviewGraph` in `src/components/GraphView.tsx`. Issue #102 reported
// a 4.8 MB knowledge graph that froze the dashboard on overview render — the
// quadratic Array.includes pass was the dominant synchronous cost.
//
// We can't import the dashboard helper directly (Vite-bundled, no
// per-module dist), so the new shape is reproduced here in lockstep with
// `src/utils/layerStats.ts::computeLayerStats`.
//
// Usage:
// node understand-anything-plugin/packages/dashboard/scripts/benchmark-aggregations.mjs
import { performance } from "node:perf_hooks";
function makeGraph(layerCount, nodesPerLayer) {
const nodes = [];
const layers = [];
for (let li = 0; li < layerCount; li++) {
const ids = [];
for (let ni = 0; ni < nodesPerLayer; ni++) {
const id = `n-${li}-${ni}`;
const complexity = ["simple", "moderate", "complex"][(li + ni) % 3];
nodes.push({ id, complexity });
ids.push(id);
}
layers.push({ id: `L${li}`, nodeIds: ids });
}
return { nodes, layers };
}
// --- BEFORE: O(N × K × L) per overview render ----------------------------
function aggregateBefore(graph) {
const out = [];
for (const layer of graph.layers) {
const memberNodes = graph.nodes.filter((n) => layer.nodeIds.includes(n.id));
const c = { simple: 0, moderate: 0, complex: 0 };
for (const n of memberNodes) c[n.complexity]++;
const aggregate =
c.complex > memberNodes.length * 0.3
? "complex"
: c.moderate > memberNodes.length * 0.3
? "moderate"
: "simple";
out.push({ id: layer.id, aggregateComplexity: aggregate });
}
return out;
}
// --- AFTER: O(N + Σ K_i) per overview render ----------------------------
function aggregateAfter(graph, nodesById) {
const out = [];
for (const layer of graph.layers) {
const c = { simple: 0, moderate: 0, complex: 0 };
let resolved = 0;
for (const nid of layer.nodeIds) {
const node = nodesById.get(nid);
if (!node) continue;
resolved++;
c[node.complexity]++;
}
const aggregate =
c.complex > resolved * 0.3
? "complex"
: c.moderate > resolved * 0.3
? "moderate"
: "simple";
out.push({ id: layer.id, aggregateComplexity: aggregate });
}
return out;
}
function bench(label, layerCount, nodesPerLayer) {
const graph = makeGraph(layerCount, nodesPerLayer);
const nodesById = new Map(graph.nodes.map((n) => [n.id, n]));
const t0 = performance.now();
const before = aggregateBefore(graph);
const t1 = performance.now();
const after = aggregateAfter(graph, nodesById);
const t2 = performance.now();
const beforeMs = t1 - t0;
const afterMs = t2 - t1;
const speedup = afterMs > 0 ? beforeMs / afterMs : Infinity;
const parity = JSON.stringify(before) === JSON.stringify(after);
console.log(
`${label} (${layerCount} layers × ${nodesPerLayer} nodes = ${graph.nodes.length} total): ` +
`BEFORE ${beforeMs.toFixed(1)}ms | AFTER ${afterMs.toFixed(1)}ms | ` +
`${speedup.toFixed(1)}× faster | parity ${parity}`,
);
}
bench("small", 10, 50);
bench("medium", 30, 100);
bench("large", 50, 200);
bench("issue#102 shape", 100, 200);

View File

@@ -0,0 +1,80 @@
// Stage 1 ELK layout perf benchmark.
//
// Mirrors `applyElkLayout` from `src/utils/elk-layout.ts` using `elkjs`
// directly. The dashboard build is a Vite bundle (hashed chunks), so it has
// no per-module `dist/utils/elk-layout.js` we can import. The Stage 1 hot
// path is `elk.layout()` on a sized input, which we reproduce faithfully
// here — same default node dimensions, same dim-defaulting behavior.
//
// Targets (spec §8.3):
// - Stage 1 < 200ms at 500 nodes
// - Stage 1 < 500ms at 3000 nodes
//
// Usage:
// node understand-anything-plugin/packages/dashboard/scripts/benchmark-layout.mjs
import { performance } from "node:perf_hooks";
import ELK from "elkjs/lib/elk.bundled.js";
// Keep in lockstep with NODE_WIDTH / NODE_HEIGHT in src/utils/layout.ts.
const DEFAULT_NODE_WIDTH = 280;
const DEFAULT_NODE_HEIGHT = 120;
const elk = new ELK();
/**
* Default missing width/height on every node (mirrors repairElkInput's
* ensureNodeDimensions step). Stage 1 in prod always feeds ELK sized nodes,
* but the repair pass is part of the measured path so we model it.
*/
function fillDims(children) {
return children.map((c) => {
const next = { ...c };
if (next.width == null) next.width = DEFAULT_NODE_WIDTH;
if (next.height == null) next.height = DEFAULT_NODE_HEIGHT;
if (next.children) next.children = fillDims(next.children);
return next;
});
}
async function applyElkLayout(input) {
const repaired = { ...input, children: fillDims(input.children) };
return elk.layout(repaired);
}
/**
* Synthetic Stage 1 graph: top-level container nodes with a sparse edge mesh.
* Stage 1 only lays out containers (lazy children — see plan §3), so the
* "node count" parameter is interpreted as total leaves while the container
* count scales sub-linearly, matching production shape.
*/
function makeGraph(nodeCount, containerCount = Math.min(20, Math.ceil(nodeCount / 25))) {
const containers = Array.from({ length: containerCount }, (_, i) => ({
id: `c${i}`,
width: 400,
height: 300,
}));
const edges = [];
for (let i = 0; i < containerCount; i++) {
for (let j = i + 1; j < containerCount; j++) {
if (Math.random() < 0.3) {
edges.push({ id: `e-${i}-${j}`, sources: [`c${i}`], targets: [`c${j}`] });
}
}
}
return { id: "root", children: containers, edges };
}
async function bench(label, n) {
const input = makeGraph(n);
const t0 = performance.now();
await applyElkLayout(input);
const t1 = performance.now();
const ms = t1 - t0;
console.log(`${label} (${n} nodes): ${ms.toFixed(1)}ms`);
return ms;
}
await bench("Stage1", 500);
await bench("Stage1", 1000);
await bench("Stage1", 3000);

View File

@@ -0,0 +1,267 @@
from __future__ import annotations
import json
from datetime import datetime, timezone
from pathlib import Path
ROOT = Path(r"D:\AIcoding\WishFulfilled\知识库\under-anything\wishfulfilled-wiki")
DASHBOARD_PUBLIC = Path(r"D:\AIcoding\WishFulfilled\知识库\under-anything\Understand-Anything-main\understand-anything-plugin\packages\dashboard\public")
UA_DIR = ROOT / ".understand-anything"
SECTIONS = [
{
"id": "layer-overview",
"name": "知识库入口",
"description": "知识库使用说明、首页、知识地图和问答入口。先从这里理解知识库结构与检索方式。",
"paths": ["知识库使用说明.md", "欢迎.md", "00_首页"],
},
{
"id": "layer-requirements",
"name": "需求文档",
"description": "所有正式需求、业务规则、需求变更和需求索引。点击本层可查看全部需求文档并检索。",
"paths": ["05_需求文档"],
},
{
"id": "layer-milestones",
"name": "里程碑",
"description": "项目阶段计划、里程碑节点、评审记录、准入准出和交付物节奏。",
"paths": ["06_里程碑", "02_项目管理流程"],
},
{
"id": "layer-technical",
"name": "技术文档",
"description": "系统架构、数据模型、接口说明、技术方案和技术决策。",
"paths": ["07_技术文档"],
},
{
"id": "layer-testing",
"name": "测试相关",
"description": "测试计划、测试用例、缺陷记录、验收记录和上线检查。",
"paths": ["08_测试相关"],
},
{
"id": "layer-agent",
"name": "Agent检索",
"description": "检索说明、关键词、同义词、来源索引和持续更新验证流程。",
"paths": ["04_Agent检索"],
},
]
EXCLUDE_DIRS = {".git", ".obsidian", ".understand-anything", "raw", "99_归档"}
def rel(path: Path) -> str:
return path.relative_to(ROOT).as_posix()
def iter_markdown() -> list[Path]:
files: list[Path] = []
for p in ROOT.rglob("*.md"):
parts = set(p.relative_to(ROOT).parts)
if parts & EXCLUDE_DIRS:
continue
files.append(p)
return sorted(files, key=lambda x: rel(x))
def read_text(path: Path) -> str:
return path.read_text(encoding="utf-8", errors="ignore")
def title_from_content(path: Path, content: str) -> str:
for line in content.splitlines():
line = line.strip()
if line.startswith("# "):
return line[2:].strip()
return path.stem
def summary_from_content(content: str) -> str:
in_frontmatter = False
started = False
for raw in content.splitlines():
line = raw.strip()
if line == "---" and not started:
in_frontmatter = True
started = True
continue
if line == "---" and in_frontmatter:
in_frontmatter = False
continue
if in_frontmatter or not line or line.startswith("#") or line.startswith("---"):
continue
if line.startswith("|") or line.startswith("```"):
continue
return line[:180]
return "知识库文档。"
def tags_for(path: Path) -> list[str]:
parts = path.relative_to(ROOT).parts
tags = [parts[0]] if parts else []
name = path.stem
if "需求" in name or "05_需求文档" in parts:
tags.append("需求文档")
if "测试" in name or "08_测试相关" in parts:
tags.append("测试相关")
if "技术" in name or "07_技术文档" in parts:
tags.append("技术文档")
if "里程碑" in name or "06_里程碑" in parts:
tags.append("里程碑")
if "Agent" in name or "04_Agent检索" in parts:
tags.append("Agent检索")
return list(dict.fromkeys(tags))
def layer_for(path: Path) -> str | None:
rp = rel(path)
for section in SECTIONS:
for prefix in section["paths"]:
prefix_norm = prefix.replace("\\", "/")
if rp == prefix_norm or rp.startswith(prefix_norm.rstrip("/") + "/"):
return section["id"]
return None
def edge(source: str, target: str, type_: str = "documents", weight: float = 0.8, description: str = "") -> dict:
allowed = {
"documents": "documents",
"related": "related",
"depends_on": "depends_on",
}
mapped = allowed.get(type_, "related")
return {
"source": source,
"target": target,
"type": mapped,
"direction": "forward",
"description": description or mapped,
"weight": weight,
}
def main() -> None:
nodes = []
layer_node_ids: dict[str, list[str]] = {s["id"]: [] for s in SECTIONS}
path_to_id: dict[str, str] = {}
for path in iter_markdown():
lid = layer_for(path)
if not lid:
continue
content = read_text(path)
rp = rel(path)
node_id = "doc:" + rp[:-3]
path_to_id[rp] = node_id
node = {
"id": node_id,
"type": "document",
"name": title_from_content(path, content),
"filePath": rp,
"summary": summary_from_content(content),
"tags": tags_for(path),
"complexity": "moderate" if len(content) > 4000 else "simple",
"knowledgeMeta": {
"content": content,
"wikilinks": [],
"category": lid,
},
}
nodes.append(node)
layer_node_ids[lid].append(node_id)
# Add one virtual process node per layer so the overview forms a clear flow even when a layer has many docs.
flow_nodes = []
for order, section in enumerate(SECTIONS, start=1):
node_id = f"flow:{section['id']}"
flow_nodes.append(node_id)
docs = layer_node_ids[section["id"]]
node = {
"id": node_id,
"type": "document",
"name": f"{order}. {section['name']}",
"summary": section["description"],
"tags": ["流程入口", section["name"]],
"complexity": "simple",
"knowledgeMeta": {
"content": f"# {section['name']}\n\n{section['description']}\n\n本层包含 {len(docs)} 个文档。点击右侧 Files 或在本层详情中选择具体文档查看内容。",
"wikilinks": [],
"category": section["id"],
},
}
nodes.append(node)
layer_node_ids[section["id"]].insert(0, node_id)
edges = []
for a, b in zip(flow_nodes, flow_nodes[1:]):
edges.append(edge(a, b, "documents", 1.0, "知识库主流程"))
for section in SECTIONS:
root_id = f"flow:{section['id']}"
for doc_id in layer_node_ids[section["id"]][1:]:
edges.append(edge(root_id, doc_id, "documents", 0.65, "本层文档"))
# Important requirement docs should build on their upstream links when those linked files exist in this knowledge base.
for node in nodes:
if not node.get("filePath"):
continue
content = node.get("knowledgeMeta", {}).get("content", "")
for rp, target_id in path_to_id.items():
if rp != node["filePath"] and Path(rp).name in content:
edges.append(edge(target_id, node["id"], "depends_on", 0.7, "文档引用关系"))
layers = [
{
"id": section["id"],
"name": section["name"],
"description": section["description"],
"nodeIds": layer_node_ids[section["id"]],
}
for section in SECTIONS
]
graph = {
"version": "1.0.0",
"kind": "codebase",
"project": {
"name": "如愿知识库",
"languages": ["markdown"],
"frameworks": ["Understand-Anything", "Obsidian"],
"description": "按需求文档、里程碑、技术文档、测试相关、Agent检索组织的流程式知识库。",
"analyzedAt": datetime.now(timezone.utc).isoformat(),
"gitCommitHash": "",
},
"nodes": nodes,
"edges": edges,
"layers": layers,
"tour": [
{
"order": i,
"title": section["name"],
"description": section["description"],
"nodeIds": [f"flow:{section['id']}"],
}
for i, section in enumerate(SECTIONS, start=1)
],
}
UA_DIR.mkdir(parents=True, exist_ok=True)
DASHBOARD_PUBLIC.mkdir(parents=True, exist_ok=True)
for target in [UA_DIR / "knowledge-graph.json", DASHBOARD_PUBLIC / "knowledge-graph.json"]:
target.write_text(json.dumps(graph, ensure_ascii=False, indent=2), encoding="utf-8")
meta = {
"lastAnalyzedAt": graph["project"]["analyzedAt"],
"gitCommitHash": "",
"version": "1.0.0",
"analyzedFiles": len([n for n in nodes if n.get("filePath")]),
"theme": {"presetId": "dark", "accentId": "cyan"},
}
for target in [UA_DIR / "meta.json", DASHBOARD_PUBLIC / "meta.json"]:
target.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")
print(f"Generated {len(nodes)} nodes, {len(edges)} edges, {len(layers)} layers")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,563 @@
import http from "node:http";
import fs from "node:fs";
import path from "node:path";
import { fileURLToPath } from "node:url";
const __dirname = path.dirname(fileURLToPath(import.meta.url));
const defaultStaticRoot = path.resolve(__dirname, "..", "..", "..", "..", "..", "wishfulfilled-dashboard");
const staticRoot = process.env.RAG_STATIC_ROOT || defaultStaticRoot;
const graphPath = process.env.RAG_GRAPH_PATH || path.join(staticRoot, "knowledge-graph.json");
const port = Number(process.env.PORT || process.env.RAG_PORT || 8080);
const host = process.env.HOST || "0.0.0.0";
const MIME = {
".html": "text/html; charset=utf-8",
".js": "text/javascript; charset=utf-8",
".css": "text/css; charset=utf-8",
".json": "application/json; charset=utf-8",
".svg": "image/svg+xml",
".ico": "image/x-icon",
".png": "image/png",
".jpg": "image/jpeg",
".jpeg": "image/jpeg",
".woff": "font/woff",
".woff2": "font/woff2",
};
const SYNONYMS = {
回评: ["评价提交", "评价展示", "客服邀评", "评价结果追踪", "review submission", "review display", "review follow-up", "邀评", "留评"],
黑名单: ["blacklist", "blacklist_entities", "风险", "风控", "风险案件", "risk_cases", "risk_signals", "反欺诈", "拦截"],
测评: ["评价计划", "review plan", "评价提交", "额度统计", "测评单"],
免评: ["KOC", "KOL", "内容发布", "引流", "带货", "exemption"],
真实人: ["person", "person_profiles", "跨账号归并", "身份归并", "真实人归并"],
额度: ["4/4/12", "person_quota_ledgers", "额度台账", "预占", "额度与频控"],
对外API: ["对外 API 契约", "接口契约", "API 草案", "接口说明", "对外接口"],
工单: ["客服工单", "support_tickets", "客服跟进", "support_followups"],
风险: ["风险与反欺诈", "risk_signals", "risk_cases", "黑名单", "反欺诈"],
};
let graphCache = null;
let graphMtime = 0;
let chunksCache = [];
function sendJson(res, status, data) {
const body = JSON.stringify(data, null, 2);
res.writeHead(status, {
"Content-Type": "application/json; charset=utf-8",
"Cache-Control": "no-store",
"Access-Control-Allow-Origin": "*",
"Access-Control-Allow-Headers": "Content-Type, Authorization",
"Access-Control-Allow-Methods": "GET, POST, OPTIONS",
});
res.end(body);
}
function normalizeText(value) {
if (Array.isArray(value)) return value.join(" ");
return typeof value === "string" ? value : "";
}
function stripHtml(text) {
return text
.replace(/<script[\s\S]*?<\/script>/gi, " ")
.replace(/<style[\s\S]*?<\/style>/gi, " ")
.replace(/<[^>]+>/g, " ")
.replace(/&nbsp;/g, " ")
.replace(/&amp;/g, "&")
.replace(/&lt;/g, "<")
.replace(/&gt;/g, ">");
}
function stripInjectedLineNumbers(text) {
const lines = text.split("\n");
let changed = 0;
const cleaned = lines.map((line) => {
const match = line.match(/^\s*\d+\|(.+)$/);
if (!match) return line;
changed += 1;
return match[1];
});
return changed >= Math.max(3, Math.floor(lines.length / 4)) ? cleaned.join("\n") : text;
}
function compactKey(value) {
return String(value || "")
.toLowerCase()
.replace(/[\s`*_>#|\-\[\]()()【】《》“”"',。、::;,.\/\\!?]+/g, "");
}
function normalizeForSearch(value) {
return stripHtml(stripInjectedLineNumbers(String(value || "")))
.toLowerCase()
.replace(/[\t\r]+/g, " ")
.replace(/[]/g, " ")
.replace(/[【】《》“”]/g, " ")
.replace(/[_*`>#|\[\]]/g, " ")
.replace(/\s+/g, " ")
.trim();
}
function unique(values) {
return Array.from(new Set(values.map((v) => String(v || "").trim()).filter(Boolean)));
}
function tokenizeText(text) {
const lower = normalizeForSearch(text);
const latin = lower.match(/[a-z0-9_.\/+-]{2,}/g) || [];
const cjkRuns = lower.match(/[\u4e00-\u9fa5]{2,}/g) || [];
const cjkChars = lower.match(/[\u4e00-\u9fa5]/g) || [];
const ngrams = [];
for (let i = 0; i < cjkChars.length - 1; i += 1) ngrams.push(`${cjkChars[i]}${cjkChars[i + 1]}`);
for (let i = 0; i < cjkChars.length - 2; i += 1) ngrams.push(`${cjkChars[i]}${cjkChars[i + 1]}${cjkChars[i + 2]}`);
return unique([...latin, ...cjkRuns, ...ngrams, ...cjkChars.filter((ch) => !/[是什么吗呢的了和与及或在中为]/.test(ch))]);
}
function detectQuestionType(query) {
const text = normalizeForSearch(query);
if (/api|接口|契约|endpoint|对外/.test(text)) return "api";
if (/数据|字段|表|对象|模型|schema|建表/.test(text)) return "data_model";
if (/流程|怎么走|如何|步骤|流转|处理/.test(text)) return "process";
if (/规则|限制|口径|额度|频控|条件/.test(text)) return "rule";
if (/谁|负责|职责|owner|角色/.test(text)) return "responsibility";
if (/测试|验收|用例|缺陷|上线/.test(text)) return "test";
if (/里程碑|阶段|计划|节点/.test(text)) return "milestone";
if (/是什么|定义|含义|意思|解释/.test(text)) return "definition";
return "general";
}
function expandTerms(tokens, raw) {
const compact = compactKey(raw);
const expanded = [];
for (const [term, values] of Object.entries(SYNONYMS)) {
const termCompact = compactKey(term);
if (compact.includes(termCompact) || tokens.some((token) => compactKey(token).includes(termCompact) || termCompact.includes(compactKey(token)))) {
expanded.push(term, ...values);
}
}
return unique(expanded);
}
function processQuery(raw) {
const normalized = normalizeForSearch(raw);
const compact = compactKey(raw);
const tokens = tokenizeText(raw);
const expandedTerms = expandTerms(tokens, raw);
const coreTerms = unique([String(raw || "").trim(), normalized, compact, ...tokens.filter((token) => token.length >= 2)]);
return { raw, normalized, compact, tokens, coreTerms, expandedTerms, questionType: detectQuestionType(raw) };
}
function loadGraph() {
const stat = fs.statSync(graphPath);
if (graphCache && stat.mtimeMs === graphMtime) return graphCache;
const graph = JSON.parse(fs.readFileSync(graphPath, "utf8"));
graphCache = graph;
graphMtime = stat.mtimeMs;
chunksCache = buildChunks(graph.nodes || []);
return graph;
}
function nodeContent(node) {
return stripHtml(stripInjectedLineNumbers(normalizeText(node?.knowledgeMeta?.content)));
}
function nodeLayer(node) {
return normalizeText(node?.knowledgeMeta?.category) || (node?.tags || []).find((tag) => /需求|技术|测试|里程碑/.test(tag)) || "";
}
function detectChunkType(heading, content) {
const text = `${heading}\n${content}`.toLowerCase();
if (/api|接口|契约|endpoint|对外/.test(text)) return "api_contract";
if (/数据模型|字段|表结构|对象|schema|建表/.test(text)) return "data_model";
if (/业务规则|规则|口径|额度|频控|限制/.test(text)) return "business_rule";
if (/流程|流转|步骤|时序|处理/.test(text)) return "process";
if (/faq|常见问题|是什么|问答/.test(text)) return "faq";
return "general";
}
function splitSections(text) {
const source = text.trim();
if (!source) return [];
const sections = [];
let currentHeading = "全文";
let buffer = [];
for (const line of source.split("\n")) {
const heading = line.match(/^\s{0,3}#{1,6}\s+(.+)$/)?.[1]?.trim();
if (heading) {
if (buffer.length > 0) sections.push({ heading: currentHeading, text: buffer.join("\n") });
currentHeading = heading;
buffer = [line];
} else {
buffer.push(line);
}
}
if (buffer.length > 0) sections.push({ heading: currentHeading, text: buffer.join("\n") });
return sections.length ? sections : [{ heading: "全文", text: source }];
}
function buildChunks(nodes) {
return nodes.flatMap((node) => {
const content = nodeContent(node);
const titleBlock = [node.name, node.summary, node.filePath, (node.tags || []).join(" ")].join("\n");
const sections = splitSections(content || titleBlock);
return sections.map((section, index) => {
const sectionText = section.text.trim() || titleBlock;
const normalizedContent = normalizeForSearch(sectionText);
return {
chunkId: `${node.id}#${index}`,
nodeId: node.id,
docTitle: node.name || node.id,
docPath: node.filePath || node.id,
sectionTitle: section.heading,
content: sectionText,
normalizedContent,
compactContent: compactKey(sectionText),
tags: node.tags || [],
summary: node.summary || "",
chunkType: detectChunkType(section.heading, sectionText),
layer: nodeLayer(node),
};
});
});
}
function countOccurrences(haystack, needle) {
if (!needle) return 0;
return haystack.split(needle).length - 1;
}
function fieldScore(field, query, fieldName, multiplier = 1) {
const normalized = normalizeForSearch(field);
const compact = compactKey(field);
const reasons = [];
let score = 0;
const rawNeedles = unique([query.normalized, String(query.raw || "").toLowerCase()].filter((v) => v.length >= 2));
for (const needle of rawNeedles) {
if (normalized.includes(needle)) {
const add = 70 * multiplier;
score += add;
reasons.push({ type: "exact", field: fieldName, message: `${fieldName} 原文命中:${needle}`, score: add });
}
}
if (query.compact && compact.includes(query.compact)) {
const add = 80 * multiplier;
score += add;
reasons.push({ type: "compact", field: fieldName, message: `${fieldName} 去标点命中`, score: add });
}
let tokenHits = 0;
for (const term of query.coreTerms) {
const normalizedTerm = normalizeForSearch(term);
const compactTerm = compactKey(term);
if (!normalizedTerm && !compactTerm) continue;
const occurrences = Math.max(countOccurrences(normalized, normalizedTerm), countOccurrences(compact, compactTerm));
if (occurrences > 0) {
tokenHits += 1;
score += Math.min(occurrences, 4) * Math.min(Math.max(compactTerm.length, normalizedTerm.length), 10) * multiplier;
}
}
if (tokenHits > 0) reasons.push({ type: "token", field: fieldName, message: `${fieldName} 关键词覆盖 ${tokenHits}`, score: tokenHits * multiplier });
return { score, reasons };
}
function semanticScore(chunk, query) {
const reasons = [];
const terms = unique([...query.tokens, ...query.expandedTerms.flatMap(tokenizeText)]).filter((term) => term.length >= 2);
if (!terms.length) return { score: 0, reasons };
const targetTokens = new Set(tokenizeText(`${chunk.docTitle} ${chunk.sectionTitle} ${chunk.normalizedContent}`));
let overlap = 0;
for (const term of terms) {
const compact = compactKey(term);
if (targetTokens.has(term) || chunk.compactContent.includes(compact) || compactKey(`${chunk.docTitle}${chunk.sectionTitle}`).includes(compact)) overlap += 1;
}
const ratio = overlap / Math.max(terms.length, 1);
const score = ratio * 90;
if (score > 0) reasons.push({ type: "semantic", message: `语义/同义词覆盖 ${(ratio * 100).toFixed(0)}%`, score });
return { score, reasons };
}
function directoryBoost(chunk, query) {
const p = chunk.docPath || "";
const base = p.includes("05_需求文档") ? 15 : p.includes("01_业务流程") ? 12 : p.includes("07_技术文档") ? 10 : p.includes("08_测试相关") ? 8 : p.includes("06_里程碑") ? 6 : p.includes("02_项目管理流程") ? 5 : p.includes("04_Agent检索") ? 4 : p.includes("03_规范与模板") ? 1 : 0;
if (query.questionType === "api" && p.includes("07_技术文档")) return base + 8;
if ((query.questionType === "definition" || query.questionType === "rule") && p.includes("05_需求文档")) return base + 5;
if (query.questionType === "test" && p.includes("08_测试相关")) return base + 12;
return base;
}
function chunkTypeBoost(chunk, query) {
if (query.questionType === "api" && chunk.chunkType === "api_contract") return 18;
if (query.questionType === "data_model" && chunk.chunkType === "data_model") return 18;
if (query.questionType === "process" && chunk.chunkType === "process") return 14;
if (query.questionType === "rule" && chunk.chunkType === "business_rule") return 14;
if (query.questionType === "definition" && (chunk.chunkType === "faq" || /概述|目标|说明/.test(chunk.sectionTitle))) return 10;
return 0;
}
function makeSnippet(text, query) {
const compact = String(text || "").replace(/\s+/g, " ").trim();
if (!compact) return "";
const lower = compact.toLowerCase();
const compactLower = compactKey(compact);
const terms = unique([query.normalized, query.raw, query.compact, ...query.coreTerms, ...query.expandedTerms]);
const hit = terms.map((term) => {
const plainIndex = lower.indexOf(normalizeForSearch(term));
if (plainIndex >= 0) return plainIndex;
const compactIndex = compactLower.indexOf(compactKey(term));
return compactIndex >= 0 ? Math.min(compactIndex, compact.length - 1) : -1;
}).filter((i) => i >= 0).sort((a, b) => a - b)[0];
const start = Math.max(0, (hit ?? 0) - 110);
const snippet = compact.slice(start, start + 420);
return `${start > 0 ? "…" : ""}${snippet}${start + 420 < compact.length ? "…" : ""}`;
}
function searchChunks(queryText, topK = 16) {
loadGraph();
const query = processQuery(queryText);
if (!query.normalized && !query.compact) return { query, hits: [] };
const weights = query.questionType === "api" ? { exact: 0.45, fuzzy: 0.25, semantic: 0.1 } : query.questionType === "definition" || query.questionType === "process" ? { exact: 0.25, fuzzy: 0.2, semantic: 0.3 } : { exact: 0.3, fuzzy: 0.2, semantic: 0.25 };
const hits = chunksCache.map((chunk) => {
const title = fieldScore(`${chunk.docTitle}\n${chunk.docPath}`, query, "docTitle", 4);
const section = fieldScore(chunk.sectionTitle, query, "sectionTitle", 6);
const content = fieldScore(chunk.normalizedContent, query, "content", 1);
const tags = fieldScore((chunk.tags || []).join(" "), query, "tags", 3);
const semantic = semanticScore(chunk, query);
const exact = title.score + section.score + tags.score + content.score;
const fuzzy = (chunk.compactContent.includes(query.compact) ? 80 : 0) + content.score * 0.2;
const titleBoost = title.score > 0 ? 12 : 0;
const sectionBoost = section.score > 0 ? 18 : 0;
const dirBoost = directoryBoost(chunk, query);
const typeBoost = chunkTypeBoost(chunk, query);
const score = exact * weights.exact + fuzzy * weights.fuzzy + semantic.score * weights.semantic + titleBoost + sectionBoost + dirBoost + typeBoost;
const reasons = [...title.reasons, ...section.reasons, ...tags.reasons, ...content.reasons.slice(0, 3), ...semantic.reasons];
if (dirBoost) reasons.push({ type: "directory", message: `目录权重:${(chunk.docPath || "").split("/")[0] || ""}`, score: dirBoost });
if (typeBoost) reasons.push({ type: "chunkType", message: `章节类型匹配:${chunk.chunkType}`, score: typeBoost });
return {
chunkId: chunk.chunkId,
nodeId: chunk.nodeId,
docTitle: chunk.docTitle,
docPath: chunk.docPath,
sectionTitle: chunk.sectionTitle,
chunkType: chunk.chunkType,
layer: chunk.layer,
score,
scores: { exact, fuzzy, semantic: semantic.score, titleBoost, sectionBoost, directoryBoost: dirBoost, chunkTypeBoost: typeBoost },
snippet: makeSnippet(chunk.content, query),
evidenceContent: chunk.content.slice(0, 2600),
reasons,
};
}).filter((hit) => hit.score > 10 && hit.snippet).sort((a, b) => b.score - a.score).reduce((acc, hit) => {
const key = `${hit.nodeId}:${hit.sectionTitle}`;
if (!acc.some((item) => `${item.nodeId}:${item.sectionTitle}` === key)) acc.push(hit);
return acc;
}, []).slice(0, topK);
return { query, hits };
}
function evidenceDecision(question, hits) {
if (!String(question || "").trim()) return { allowed: false, confidence: "none", reason: "请输入问题。" };
if (!hits.length) return { allowed: false, confidence: "none", reason: "没有检索到相关证据。" };
if (hits[0].score >= 75) return { allowed: true, confidence: "high", reason: "Top1 证据分数达到阈值。" };
if (hits[0].score >= 60 && (hits[1]?.score || 0) >= 50) return { allowed: true, confidence: "medium", reason: "多个证据片段达到中等相关。" };
return { allowed: false, confidence: "low", reason: "证据分数不足。" };
}
function localAnswer(question, hits) {
const decision = evidenceDecision(question, hits);
if (!decision.allowed) return "暂无该需求描述。";
const top = hits.slice(0, 5);
return [
`结论:已基于本地知识库检索到可用证据。${decision.reason}`,
"",
"相关依据:",
...top.map((hit, index) => `${index + 1}. ${hit.docTitle}${hit.sectionTitle ? ` / ${hit.sectionTitle}` : ""}\n ${hit.snippet}\n 来源:${hit.docPath}`),
].join("\n");
}
function detectProvider({ endpoint, apiKey, provider }) {
if (provider && provider !== "auto") return provider;
const raw = String(endpoint || "").toLowerCase();
if (String(apiKey || "").startsWith("plan-") || raw.includes("/plan/v1")) return "routin-plan";
return "openai";
}
function normalizeOpenAiEndpoint(endpoint) {
const raw = String(endpoint || "").trim();
if (!raw) return "";
if (/\/chat\/completions\/?$/.test(raw)) return raw;
return `${raw.replace(/\/+$/, "")}/chat/completions`;
}
function normalizePlanEndpoint(endpoint) {
const raw = String(endpoint || "").trim();
let base = raw || "https://api.routin.ai/plan/v1";
base = base.replace(/\/chat\/completions\/?$/, "").replace(/\/messages\/?$/, "").replace(/\/+$/, "");
if (!base.includes("/plan/v1")) base = base.replace(/\/v1$/, "/plan/v1");
return `${base}/messages`;
}
function extractPlanMessageText(data) {
if (data && typeof data === "object") {
const content = data.content;
if (Array.isArray(content)) {
const texts = content
.map((part) => typeof part === "string" ? part : part?.text)
.filter((part) => typeof part === "string" && part.trim());
if (texts.length) return texts.join("");
}
if (typeof content === "string") return content;
}
return "";
}
function extractAnswer(data, protocol) {
if (protocol === "routin-plan") {
const planText = extractPlanMessageText(data);
if (planText) return planText;
}
return data?.choices?.[0]?.message?.content
|| data?.message?.content
|| data?.response
|| data?.data?.answer
|| data?.answer
|| extractPlanMessageText(data);
}
async function callLlm({ endpoint, model, apiKey, provider = "auto", question, hits, timeoutMs = 60000 }) {
const protocol = detectProvider({ endpoint, apiKey, provider });
const requestEndpoint = protocol === "routin-plan" ? normalizePlanEndpoint(endpoint) : normalizeOpenAiEndpoint(endpoint);
const evidence = hits.slice(0, 8).map((hit, index) => `[${index + 1}]\n来源:${hit.docPath}\n章节:${hit.sectionTitle}\n内容:${hit.snippet}`).join("\n\n");
const systemPrompt = "你是如愿知识库问答助手。你只能基于提供的知识库片段回答,不得使用外部知识,不得编造文档中没有的内容。如果证据不足,请只回答:暂无该需求描述。回答末尾必须列出来源文件。";
const userPrompt = `用户问题:\n${question}\n\n检索证据:\n${evidence}\n\n请用中文结构化回答。`;
const messages = protocol === "routin-plan"
? [{ role: "user", content: `${systemPrompt}\n\n${userPrompt}` }]
: [{ role: "system", content: systemPrompt }, { role: "user", content: userPrompt }];
const body = protocol === "routin-plan"
? { model, messages, max_tokens: 4096, temperature: 0.7 }
: { model, messages, temperature: 0.1, stream: false };
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), timeoutMs);
const startedAt = Date.now();
try {
const response = await fetch(requestEndpoint, {
method: "POST",
headers: {
"Content-Type": "application/json",
...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}),
},
body: JSON.stringify(body),
signal: controller.signal,
});
const text = await response.text();
let data = null;
try { data = text ? JSON.parse(text) : null; } catch {}
if (!response.ok) {
const remoteMessage = data?.error?.message || data?.message || text.slice(0, 500);
const err = new Error(`模型接口返回 ${response.status} ${response.statusText}${remoteMessage ? `${remoteMessage}` : ""}`);
err.status = response.status;
err.endpoint = requestEndpoint;
err.elapsedMs = Date.now() - startedAt;
err.protocol = protocol;
throw err;
}
const answer = extractAnswer(data, protocol);
if (typeof answer !== "string" || !answer.trim()) {
const err = new Error("模型返回成功,但响应结构里没有可解析的回答内容。支持 OpenAI choices[0].message.content、RoutIn content[]/content、message.content / response / answer。原始返回" + text.slice(0, 500));
err.endpoint = requestEndpoint;
err.elapsedMs = Date.now() - startedAt;
err.protocol = protocol;
throw err;
}
return { answer: answer.trim(), endpoint: requestEndpoint, elapsedMs: Date.now() - startedAt, rawShape: Object.keys(data || {}), protocol };
} finally {
clearTimeout(timer);
}
}
async function readBody(req) {
const chunks = [];
for await (const chunk of req) chunks.push(chunk);
const raw = Buffer.concat(chunks).toString("utf8");
return raw ? JSON.parse(raw) : {};
}
async function handleApi(req, res, url) {
if (req.method === "OPTIONS") return sendJson(res, 200, { ok: true });
if (url.pathname === "/api/rag/health") {
const graph = loadGraph();
return sendJson(res, 200, { ok: true, graphPath, staticRoot, documentCount: (graph.nodes || []).length, chunkCount: chunksCache.length, llmProxy: true });
}
if (url.pathname === "/api/rag/search" && req.method === "POST") {
const body = await readBody(req);
const { query, hits } = searchChunks(body.query || "", body.topK || 16);
const decision = evidenceDecision(body.query || "", hits);
return sendJson(res, 200, { ok: true, query, decision, answer: localAnswer(body.query || "", hits), hits });
}
if (url.pathname === "/api/rag/answer" && req.method === "POST") {
const body = await readBody(req);
const requestStatus = { stage: "search", startedAt: new Date().toISOString() };
const { query, hits } = searchChunks(body.query || "", body.topK || 16);
const decision = evidenceDecision(body.query || "", hits);
if (!decision.allowed) return sendJson(res, 200, { ok: true, mode: "llm", requestStatus: { ...requestStatus, stage: "evidence_rejected" }, query, decision, answer: "暂无该需求描述。", hits });
if (!body.llm?.enabled) return sendJson(res, 200, { ok: true, mode: "local", requestStatus: { ...requestStatus, stage: "local_answer" }, query, decision, answer: localAnswer(body.query || "", hits), hits });
try {
const result = await callLlm({ endpoint: body.llm.endpoint, model: body.llm.model, apiKey: body.llm.apiKey, question: body.query || "", hits });
return sendJson(res, 200, { ok: true, mode: "llm", requestStatus: { ...requestStatus, stage: "llm_done", endpoint: result.endpoint, elapsedMs: result.elapsedMs, rawShape: result.rawShape }, query, decision, answer: result.answer, hits });
} catch (error) {
return sendJson(res, 502, {
ok: false,
mode: "llm",
requestStatus: { ...requestStatus, stage: "llm_failed", endpoint: error.endpoint || (detectProvider({ endpoint: body.llm?.endpoint, apiKey: body.llm?.apiKey, provider: body.llm?.provider }) === "routin-plan" ? normalizePlanEndpoint(body.llm?.endpoint) : normalizeOpenAiEndpoint(body.llm?.endpoint)), elapsedMs: error.elapsedMs, protocol: error.protocol },
query,
decision,
answer: localAnswer(body.query || "", hits),
hits,
error: {
message: error instanceof Error ? error.message : String(error),
name: error?.name,
status: error?.status,
},
});
}
}
if (url.pathname === "/api/llm/test" && req.method === "POST") {
const body = await readBody(req);
try {
const result = await callLlm({ endpoint: body.endpoint, model: body.model, apiKey: body.apiKey, question: "连接测试", hits: [{ docPath: "测试", sectionTitle: "连接测试", snippet: "请只回答:连接成功。", score: 100 }] });
return sendJson(res, 200, { ok: true, endpoint: result.endpoint, elapsedMs: result.elapsedMs, answer: result.answer });
} catch (error) {
return sendJson(res, 502, { ok: false, endpoint: error.endpoint || (detectProvider({ endpoint: body.endpoint, apiKey: body.apiKey, provider: body.provider }) === "routin-plan" ? normalizePlanEndpoint(body.endpoint) : normalizeOpenAiEndpoint(body.endpoint)), protocol: error.protocol, elapsedMs: error.elapsedMs, error: { message: error instanceof Error ? error.message : String(error), name: error?.name, status: error?.status } });
}
}
return sendJson(res, 404, { ok: false, error: "API not found" });
}
function serveStatic(req, res, url) {
let pathname = decodeURIComponent(url.pathname);
if (pathname === "/") pathname = "/index.html";
const requested = path.normalize(path.join(staticRoot, pathname));
if (!requested.startsWith(staticRoot)) {
res.writeHead(403);
res.end("Forbidden");
return;
}
const filePath = fs.existsSync(requested) && fs.statSync(requested).isFile() ? requested : path.join(staticRoot, "index.html");
const ext = path.extname(filePath).toLowerCase();
res.writeHead(200, { "Content-Type": MIME[ext] || "application/octet-stream", "Cache-Control": ext === ".html" ? "no-store" : "no-cache" });
fs.createReadStream(filePath).pipe(res);
}
const server = http.createServer(async (req, res) => {
const url = new URL(req.url || "/", `http://${req.headers.host || "localhost"}`);
try {
if (url.pathname.startsWith("/api/")) return await handleApi(req, res, url);
return serveStatic(req, res, url);
} catch (error) {
return sendJson(res, 500, { ok: false, error: error instanceof Error ? error.message : String(error) });
}
});
server.listen(port, host, () => {
loadGraph();
console.log(`RAG dashboard server listening on http://${host}:${port}`);
console.log(`Static root: ${staticRoot}`);
console.log(`Graph path: ${graphPath}`);
});

View File

@@ -0,0 +1,174 @@
import fs from 'node:fs';
import path from 'node:path';
const root = 'D:/AIcoding/WishFulfilled/知识库/under-anything';
const wiki = `${root}/wishfulfilled-wiki`;
const reqDir = `${wiki}/05_需求文档`;
const graphPaths = [
`${wiki}/.understand-anything/knowledge-graph.json`,
`${root}/wishfulfilled-dashboard/knowledge-graph.json`,
];
const metaPaths = [
`${wiki}/.understand-anything/meta.json`,
`${root}/wishfulfilled-dashboard/meta.json`,
];
const files = fs.readdirSync(reqDir)
.filter((name) => /\.(md|html?)$/i.test(name))
.sort((a, b) => a.localeCompare(b, 'zh-Hans-CN'));
function readText(filePath) {
return fs.readFileSync(filePath, 'utf8');
}
function cleanLineNumbers(text) {
const lines = text.split(/\r?\n/);
let changed = 0;
const cleaned = lines.map((line) => {
const match = line.match(/^\s*\d+\|(.+)$/);
if (!match) return line;
changed += 1;
return match[1];
});
return changed >= Math.max(3, Math.floor(lines.length / 4)) ? cleaned.join('\n') : text;
}
function stripHtml(text) {
return text
.replace(/<script[\s\S]*?<\/script>|<style[\s\S]*?<\/style>/gi, ' ')
.replace(/<[^>]+>/g, ' ')
.replace(/&nbsp;/g, ' ')
.replace(/&amp;/g, '&')
.replace(/\s+/g, ' ')
.trim();
}
function titleFor(fileName, text) {
if (/\.html?$/i.test(fileName)) {
const match = text.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
const title = match ? stripHtml(match[1]) : '';
if (title) return title;
}
for (const line of text.split(/\r?\n/)) {
const trimmed = line.trim();
if (trimmed.startsWith('#')) return trimmed.replace(/^#+\s*/, '').trim();
}
return path.basename(fileName, path.extname(fileName)).replace(/^\d+[-_]/, '').replace(/_/g, ' ');
}
function summaryFor(fileName, text) {
const plain = /\.html?$/i.test(fileName)
? stripHtml(text)
: text.replace(/[`*_>#|\-\[\]()]/g, ' ').replace(/\s+/g, ' ').trim();
return plain ? plain.slice(0, 180) : '需求文档。';
}
function tagsFor(text) {
const tags = ['05_需求文档', '需求文档'];
const match = text.match(/^tags:\s*\[(.*?)\]/m);
if (!match) return tags;
for (const item of match[1].split(/[,]/)) {
const tag = item.trim().replace(/^['"]|['"]$/g, '');
if (tag && !tags.includes(tag)) tags.push(tag);
}
return tags;
}
function complexityFor(text) {
if (text.length > 20000) return 'complex';
if (text.length > 5000) return 'moderate';
return 'simple';
}
function updateGraph(graphPath) {
const graph = JSON.parse(fs.readFileSync(graphPath, 'utf8'));
graph.nodes ??= [];
graph.edges ??= [];
graph.layers ??= [];
const byId = new Map(graph.nodes.map((node) => [node.id, node]));
const edgeKeys = new Set(graph.edges.map((edge) => `${edge.source}|${edge.target}|${edge.type}`));
let layer = graph.layers.find((item) => item.id === 'layer-requirements');
if (!layer) {
layer = {
id: 'layer-requirements',
name: '需求文档',
description: '所有正式需求、业务规则、需求变更和需求索引。',
nodeIds: ['flow:layer-requirements'],
};
graph.layers.push(layer);
}
layer.nodeIds ??= [];
let added = 0;
let updated = 0;
for (const fileName of files) {
const absolutePath = `${reqDir}/${fileName}`;
const relPath = `05_需求文档/${fileName}`;
const nodeId = `doc:${relPath.replace(/\.[^.]+$/, '')}`;
const text = cleanLineNumbers(readText(absolutePath));
const node = {
id: nodeId,
type: 'document',
name: titleFor(fileName, text),
filePath: relPath,
summary: summaryFor(fileName, text),
tags: tagsFor(text),
complexity: complexityFor(text),
knowledgeMeta: {
content: text,
wikilinks: [...text.matchAll(/\[\[([^\]]+)\]\]/g)].map((match) => match[1]),
category: 'layer-requirements',
},
};
if (byId.has(nodeId)) {
Object.assign(byId.get(nodeId), node);
updated += 1;
} else {
graph.nodes.push(node);
byId.set(nodeId, node);
added += 1;
}
if (!layer.nodeIds.includes(nodeId)) layer.nodeIds.push(nodeId);
const edgeKey = `flow:layer-requirements|${nodeId}|documents`;
if (!edgeKeys.has(edgeKey)) {
graph.edges.push({
source: 'flow:layer-requirements',
target: nodeId,
type: 'documents',
direction: 'forward',
description: '本层文档',
weight: 0.65,
});
edgeKeys.add(edgeKey);
}
}
const count = layer.nodeIds.filter((id) => id !== 'flow:layer-requirements').length;
const flow = byId.get('flow:layer-requirements');
if (flow) {
flow.summary = '所有正式需求、业务规则、需求变更和需求索引。点击本层可查看全部需求文档并检索。';
flow.knowledgeMeta ??= {};
flow.knowledgeMeta.content = `# 需求文档\n\n所有正式需求、业务规则、需求变更和需求索引。点击本层可查看全部需求文档并检索。\n\n本层包含 ${count} 个文档。点击右侧 Files 或在本层详情中选择具体文档查看内容。`;
flow.knowledgeMeta.category = 'layer-requirements';
}
graph.project ??= {};
graph.project.analyzedAt = new Date().toISOString();
fs.writeFileSync(graphPath, `${JSON.stringify(graph, null, 2)}\n`, 'utf8');
return { graphPath, added, updated, requirements: count, nodes: graph.nodes.length };
}
const results = graphPaths.map(updateGraph);
for (const metaPath of metaPaths) {
if (!fs.existsSync(metaPath)) continue;
const meta = JSON.parse(fs.readFileSync(metaPath, 'utf8'));
const graph = JSON.parse(fs.readFileSync(`${path.dirname(metaPath)}/knowledge-graph.json`, 'utf8'));
meta.lastAnalyzedAt = new Date().toISOString();
meta.analyzedFiles = graph.nodes.filter((node) => String(node.id).startsWith('doc:')).length;
fs.writeFileSync(metaPath, `${JSON.stringify(meta, null, 2)}\n`, 'utf8');
}
console.log(JSON.stringify(results, null, 2));