Add under-anything knowledge dashboard
This commit is contained in:
@@ -0,0 +1,99 @@
|
||||
// Per-layer aggregation perf benchmark.
|
||||
//
|
||||
// Mirrors the BEFORE shape (graph.nodes.filter(n => layer.nodeIds.includes(n.id))
|
||||
// per layer) and the AFTER shape (single nodesById Map + iterate layer.nodeIds)
|
||||
// from `useOverviewGraph` in `src/components/GraphView.tsx`. Issue #102 reported
|
||||
// a 4.8 MB knowledge graph that froze the dashboard on overview render — the
|
||||
// quadratic Array.includes pass was the dominant synchronous cost.
|
||||
//
|
||||
// We can't import the dashboard helper directly (Vite-bundled, no
|
||||
// per-module dist), so the new shape is reproduced here in lockstep with
|
||||
// `src/utils/layerStats.ts::computeLayerStats`.
|
||||
//
|
||||
// Usage:
|
||||
// node understand-anything-plugin/packages/dashboard/scripts/benchmark-aggregations.mjs
|
||||
|
||||
import { performance } from "node:perf_hooks";
|
||||
|
||||
function makeGraph(layerCount, nodesPerLayer) {
|
||||
const nodes = [];
|
||||
const layers = [];
|
||||
for (let li = 0; li < layerCount; li++) {
|
||||
const ids = [];
|
||||
for (let ni = 0; ni < nodesPerLayer; ni++) {
|
||||
const id = `n-${li}-${ni}`;
|
||||
const complexity = ["simple", "moderate", "complex"][(li + ni) % 3];
|
||||
nodes.push({ id, complexity });
|
||||
ids.push(id);
|
||||
}
|
||||
layers.push({ id: `L${li}`, nodeIds: ids });
|
||||
}
|
||||
return { nodes, layers };
|
||||
}
|
||||
|
||||
// --- BEFORE: O(N × K × L) per overview render ----------------------------
|
||||
function aggregateBefore(graph) {
|
||||
const out = [];
|
||||
for (const layer of graph.layers) {
|
||||
const memberNodes = graph.nodes.filter((n) => layer.nodeIds.includes(n.id));
|
||||
const c = { simple: 0, moderate: 0, complex: 0 };
|
||||
for (const n of memberNodes) c[n.complexity]++;
|
||||
const aggregate =
|
||||
c.complex > memberNodes.length * 0.3
|
||||
? "complex"
|
||||
: c.moderate > memberNodes.length * 0.3
|
||||
? "moderate"
|
||||
: "simple";
|
||||
out.push({ id: layer.id, aggregateComplexity: aggregate });
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
// --- AFTER: O(N + Σ K_i) per overview render ----------------------------
|
||||
function aggregateAfter(graph, nodesById) {
|
||||
const out = [];
|
||||
for (const layer of graph.layers) {
|
||||
const c = { simple: 0, moderate: 0, complex: 0 };
|
||||
let resolved = 0;
|
||||
for (const nid of layer.nodeIds) {
|
||||
const node = nodesById.get(nid);
|
||||
if (!node) continue;
|
||||
resolved++;
|
||||
c[node.complexity]++;
|
||||
}
|
||||
const aggregate =
|
||||
c.complex > resolved * 0.3
|
||||
? "complex"
|
||||
: c.moderate > resolved * 0.3
|
||||
? "moderate"
|
||||
: "simple";
|
||||
out.push({ id: layer.id, aggregateComplexity: aggregate });
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function bench(label, layerCount, nodesPerLayer) {
|
||||
const graph = makeGraph(layerCount, nodesPerLayer);
|
||||
const nodesById = new Map(graph.nodes.map((n) => [n.id, n]));
|
||||
|
||||
const t0 = performance.now();
|
||||
const before = aggregateBefore(graph);
|
||||
const t1 = performance.now();
|
||||
const after = aggregateAfter(graph, nodesById);
|
||||
const t2 = performance.now();
|
||||
|
||||
const beforeMs = t1 - t0;
|
||||
const afterMs = t2 - t1;
|
||||
const speedup = afterMs > 0 ? beforeMs / afterMs : Infinity;
|
||||
const parity = JSON.stringify(before) === JSON.stringify(after);
|
||||
console.log(
|
||||
`${label} (${layerCount} layers × ${nodesPerLayer} nodes = ${graph.nodes.length} total): ` +
|
||||
`BEFORE ${beforeMs.toFixed(1)}ms | AFTER ${afterMs.toFixed(1)}ms | ` +
|
||||
`${speedup.toFixed(1)}× faster | parity ${parity}`,
|
||||
);
|
||||
}
|
||||
|
||||
bench("small", 10, 50);
|
||||
bench("medium", 30, 100);
|
||||
bench("large", 50, 200);
|
||||
bench("issue#102 shape", 100, 200);
|
||||
@@ -0,0 +1,80 @@
|
||||
// Stage 1 ELK layout perf benchmark.
|
||||
//
|
||||
// Mirrors `applyElkLayout` from `src/utils/elk-layout.ts` using `elkjs`
|
||||
// directly. The dashboard build is a Vite bundle (hashed chunks), so it has
|
||||
// no per-module `dist/utils/elk-layout.js` we can import. The Stage 1 hot
|
||||
// path is `elk.layout()` on a sized input, which we reproduce faithfully
|
||||
// here — same default node dimensions, same dim-defaulting behavior.
|
||||
//
|
||||
// Targets (spec §8.3):
|
||||
// - Stage 1 < 200ms at 500 nodes
|
||||
// - Stage 1 < 500ms at 3000 nodes
|
||||
//
|
||||
// Usage:
|
||||
// node understand-anything-plugin/packages/dashboard/scripts/benchmark-layout.mjs
|
||||
|
||||
import { performance } from "node:perf_hooks";
|
||||
import ELK from "elkjs/lib/elk.bundled.js";
|
||||
|
||||
// Keep in lockstep with NODE_WIDTH / NODE_HEIGHT in src/utils/layout.ts.
|
||||
const DEFAULT_NODE_WIDTH = 280;
|
||||
const DEFAULT_NODE_HEIGHT = 120;
|
||||
|
||||
const elk = new ELK();
|
||||
|
||||
/**
|
||||
* Default missing width/height on every node (mirrors repairElkInput's
|
||||
* ensureNodeDimensions step). Stage 1 in prod always feeds ELK sized nodes,
|
||||
* but the repair pass is part of the measured path so we model it.
|
||||
*/
|
||||
function fillDims(children) {
|
||||
return children.map((c) => {
|
||||
const next = { ...c };
|
||||
if (next.width == null) next.width = DEFAULT_NODE_WIDTH;
|
||||
if (next.height == null) next.height = DEFAULT_NODE_HEIGHT;
|
||||
if (next.children) next.children = fillDims(next.children);
|
||||
return next;
|
||||
});
|
||||
}
|
||||
|
||||
async function applyElkLayout(input) {
|
||||
const repaired = { ...input, children: fillDims(input.children) };
|
||||
return elk.layout(repaired);
|
||||
}
|
||||
|
||||
/**
|
||||
* Synthetic Stage 1 graph: top-level container nodes with a sparse edge mesh.
|
||||
* Stage 1 only lays out containers (lazy children — see plan §3), so the
|
||||
* "node count" parameter is interpreted as total leaves while the container
|
||||
* count scales sub-linearly, matching production shape.
|
||||
*/
|
||||
function makeGraph(nodeCount, containerCount = Math.min(20, Math.ceil(nodeCount / 25))) {
|
||||
const containers = Array.from({ length: containerCount }, (_, i) => ({
|
||||
id: `c${i}`,
|
||||
width: 400,
|
||||
height: 300,
|
||||
}));
|
||||
const edges = [];
|
||||
for (let i = 0; i < containerCount; i++) {
|
||||
for (let j = i + 1; j < containerCount; j++) {
|
||||
if (Math.random() < 0.3) {
|
||||
edges.push({ id: `e-${i}-${j}`, sources: [`c${i}`], targets: [`c${j}`] });
|
||||
}
|
||||
}
|
||||
}
|
||||
return { id: "root", children: containers, edges };
|
||||
}
|
||||
|
||||
async function bench(label, n) {
|
||||
const input = makeGraph(n);
|
||||
const t0 = performance.now();
|
||||
await applyElkLayout(input);
|
||||
const t1 = performance.now();
|
||||
const ms = t1 - t0;
|
||||
console.log(`${label} (${n} nodes): ${ms.toFixed(1)}ms`);
|
||||
return ms;
|
||||
}
|
||||
|
||||
await bench("Stage1", 500);
|
||||
await bench("Stage1", 1000);
|
||||
await bench("Stage1", 3000);
|
||||
@@ -0,0 +1,267 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
ROOT = Path(r"D:\AIcoding\WishFulfilled\知识库\under-anything\wishfulfilled-wiki")
|
||||
DASHBOARD_PUBLIC = Path(r"D:\AIcoding\WishFulfilled\知识库\under-anything\Understand-Anything-main\understand-anything-plugin\packages\dashboard\public")
|
||||
UA_DIR = ROOT / ".understand-anything"
|
||||
|
||||
SECTIONS = [
|
||||
{
|
||||
"id": "layer-overview",
|
||||
"name": "知识库入口",
|
||||
"description": "知识库使用说明、首页、知识地图和问答入口。先从这里理解知识库结构与检索方式。",
|
||||
"paths": ["知识库使用说明.md", "欢迎.md", "00_首页"],
|
||||
},
|
||||
{
|
||||
"id": "layer-requirements",
|
||||
"name": "需求文档",
|
||||
"description": "所有正式需求、业务规则、需求变更和需求索引。点击本层可查看全部需求文档并检索。",
|
||||
"paths": ["05_需求文档"],
|
||||
},
|
||||
{
|
||||
"id": "layer-milestones",
|
||||
"name": "里程碑",
|
||||
"description": "项目阶段计划、里程碑节点、评审记录、准入准出和交付物节奏。",
|
||||
"paths": ["06_里程碑", "02_项目管理流程"],
|
||||
},
|
||||
{
|
||||
"id": "layer-technical",
|
||||
"name": "技术文档",
|
||||
"description": "系统架构、数据模型、接口说明、技术方案和技术决策。",
|
||||
"paths": ["07_技术文档"],
|
||||
},
|
||||
{
|
||||
"id": "layer-testing",
|
||||
"name": "测试相关",
|
||||
"description": "测试计划、测试用例、缺陷记录、验收记录和上线检查。",
|
||||
"paths": ["08_测试相关"],
|
||||
},
|
||||
{
|
||||
"id": "layer-agent",
|
||||
"name": "Agent检索",
|
||||
"description": "检索说明、关键词、同义词、来源索引和持续更新验证流程。",
|
||||
"paths": ["04_Agent检索"],
|
||||
},
|
||||
]
|
||||
|
||||
EXCLUDE_DIRS = {".git", ".obsidian", ".understand-anything", "raw", "99_归档"}
|
||||
|
||||
|
||||
def rel(path: Path) -> str:
|
||||
return path.relative_to(ROOT).as_posix()
|
||||
|
||||
|
||||
def iter_markdown() -> list[Path]:
|
||||
files: list[Path] = []
|
||||
for p in ROOT.rglob("*.md"):
|
||||
parts = set(p.relative_to(ROOT).parts)
|
||||
if parts & EXCLUDE_DIRS:
|
||||
continue
|
||||
files.append(p)
|
||||
return sorted(files, key=lambda x: rel(x))
|
||||
|
||||
|
||||
def read_text(path: Path) -> str:
|
||||
return path.read_text(encoding="utf-8", errors="ignore")
|
||||
|
||||
|
||||
def title_from_content(path: Path, content: str) -> str:
|
||||
for line in content.splitlines():
|
||||
line = line.strip()
|
||||
if line.startswith("# "):
|
||||
return line[2:].strip()
|
||||
return path.stem
|
||||
|
||||
|
||||
def summary_from_content(content: str) -> str:
|
||||
in_frontmatter = False
|
||||
started = False
|
||||
for raw in content.splitlines():
|
||||
line = raw.strip()
|
||||
if line == "---" and not started:
|
||||
in_frontmatter = True
|
||||
started = True
|
||||
continue
|
||||
if line == "---" and in_frontmatter:
|
||||
in_frontmatter = False
|
||||
continue
|
||||
if in_frontmatter or not line or line.startswith("#") or line.startswith("---"):
|
||||
continue
|
||||
if line.startswith("|") or line.startswith("```"):
|
||||
continue
|
||||
return line[:180]
|
||||
return "知识库文档。"
|
||||
|
||||
|
||||
def tags_for(path: Path) -> list[str]:
|
||||
parts = path.relative_to(ROOT).parts
|
||||
tags = [parts[0]] if parts else []
|
||||
name = path.stem
|
||||
if "需求" in name or "05_需求文档" in parts:
|
||||
tags.append("需求文档")
|
||||
if "测试" in name or "08_测试相关" in parts:
|
||||
tags.append("测试相关")
|
||||
if "技术" in name or "07_技术文档" in parts:
|
||||
tags.append("技术文档")
|
||||
if "里程碑" in name or "06_里程碑" in parts:
|
||||
tags.append("里程碑")
|
||||
if "Agent" in name or "04_Agent检索" in parts:
|
||||
tags.append("Agent检索")
|
||||
return list(dict.fromkeys(tags))
|
||||
|
||||
|
||||
def layer_for(path: Path) -> str | None:
|
||||
rp = rel(path)
|
||||
for section in SECTIONS:
|
||||
for prefix in section["paths"]:
|
||||
prefix_norm = prefix.replace("\\", "/")
|
||||
if rp == prefix_norm or rp.startswith(prefix_norm.rstrip("/") + "/"):
|
||||
return section["id"]
|
||||
return None
|
||||
|
||||
|
||||
def edge(source: str, target: str, type_: str = "documents", weight: float = 0.8, description: str = "") -> dict:
|
||||
allowed = {
|
||||
"documents": "documents",
|
||||
"related": "related",
|
||||
"depends_on": "depends_on",
|
||||
}
|
||||
mapped = allowed.get(type_, "related")
|
||||
return {
|
||||
"source": source,
|
||||
"target": target,
|
||||
"type": mapped,
|
||||
"direction": "forward",
|
||||
"description": description or mapped,
|
||||
"weight": weight,
|
||||
}
|
||||
|
||||
|
||||
def main() -> None:
|
||||
nodes = []
|
||||
layer_node_ids: dict[str, list[str]] = {s["id"]: [] for s in SECTIONS}
|
||||
path_to_id: dict[str, str] = {}
|
||||
|
||||
for path in iter_markdown():
|
||||
lid = layer_for(path)
|
||||
if not lid:
|
||||
continue
|
||||
content = read_text(path)
|
||||
rp = rel(path)
|
||||
node_id = "doc:" + rp[:-3]
|
||||
path_to_id[rp] = node_id
|
||||
node = {
|
||||
"id": node_id,
|
||||
"type": "document",
|
||||
"name": title_from_content(path, content),
|
||||
"filePath": rp,
|
||||
"summary": summary_from_content(content),
|
||||
"tags": tags_for(path),
|
||||
"complexity": "moderate" if len(content) > 4000 else "simple",
|
||||
"knowledgeMeta": {
|
||||
"content": content,
|
||||
"wikilinks": [],
|
||||
"category": lid,
|
||||
},
|
||||
}
|
||||
nodes.append(node)
|
||||
layer_node_ids[lid].append(node_id)
|
||||
|
||||
# Add one virtual process node per layer so the overview forms a clear flow even when a layer has many docs.
|
||||
flow_nodes = []
|
||||
for order, section in enumerate(SECTIONS, start=1):
|
||||
node_id = f"flow:{section['id']}"
|
||||
flow_nodes.append(node_id)
|
||||
docs = layer_node_ids[section["id"]]
|
||||
node = {
|
||||
"id": node_id,
|
||||
"type": "document",
|
||||
"name": f"{order}. {section['name']}",
|
||||
"summary": section["description"],
|
||||
"tags": ["流程入口", section["name"]],
|
||||
"complexity": "simple",
|
||||
"knowledgeMeta": {
|
||||
"content": f"# {section['name']}\n\n{section['description']}\n\n本层包含 {len(docs)} 个文档。点击右侧 Files 或在本层详情中选择具体文档查看内容。",
|
||||
"wikilinks": [],
|
||||
"category": section["id"],
|
||||
},
|
||||
}
|
||||
nodes.append(node)
|
||||
layer_node_ids[section["id"]].insert(0, node_id)
|
||||
|
||||
edges = []
|
||||
for a, b in zip(flow_nodes, flow_nodes[1:]):
|
||||
edges.append(edge(a, b, "documents", 1.0, "知识库主流程"))
|
||||
|
||||
for section in SECTIONS:
|
||||
root_id = f"flow:{section['id']}"
|
||||
for doc_id in layer_node_ids[section["id"]][1:]:
|
||||
edges.append(edge(root_id, doc_id, "documents", 0.65, "本层文档"))
|
||||
|
||||
# Important requirement docs should build on their upstream links when those linked files exist in this knowledge base.
|
||||
for node in nodes:
|
||||
if not node.get("filePath"):
|
||||
continue
|
||||
content = node.get("knowledgeMeta", {}).get("content", "")
|
||||
for rp, target_id in path_to_id.items():
|
||||
if rp != node["filePath"] and Path(rp).name in content:
|
||||
edges.append(edge(target_id, node["id"], "depends_on", 0.7, "文档引用关系"))
|
||||
|
||||
layers = [
|
||||
{
|
||||
"id": section["id"],
|
||||
"name": section["name"],
|
||||
"description": section["description"],
|
||||
"nodeIds": layer_node_ids[section["id"]],
|
||||
}
|
||||
for section in SECTIONS
|
||||
]
|
||||
|
||||
graph = {
|
||||
"version": "1.0.0",
|
||||
"kind": "codebase",
|
||||
"project": {
|
||||
"name": "如愿知识库",
|
||||
"languages": ["markdown"],
|
||||
"frameworks": ["Understand-Anything", "Obsidian"],
|
||||
"description": "按需求文档、里程碑、技术文档、测试相关、Agent检索组织的流程式知识库。",
|
||||
"analyzedAt": datetime.now(timezone.utc).isoformat(),
|
||||
"gitCommitHash": "",
|
||||
},
|
||||
"nodes": nodes,
|
||||
"edges": edges,
|
||||
"layers": layers,
|
||||
"tour": [
|
||||
{
|
||||
"order": i,
|
||||
"title": section["name"],
|
||||
"description": section["description"],
|
||||
"nodeIds": [f"flow:{section['id']}"],
|
||||
}
|
||||
for i, section in enumerate(SECTIONS, start=1)
|
||||
],
|
||||
}
|
||||
|
||||
UA_DIR.mkdir(parents=True, exist_ok=True)
|
||||
DASHBOARD_PUBLIC.mkdir(parents=True, exist_ok=True)
|
||||
for target in [UA_DIR / "knowledge-graph.json", DASHBOARD_PUBLIC / "knowledge-graph.json"]:
|
||||
target.write_text(json.dumps(graph, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
|
||||
meta = {
|
||||
"lastAnalyzedAt": graph["project"]["analyzedAt"],
|
||||
"gitCommitHash": "",
|
||||
"version": "1.0.0",
|
||||
"analyzedFiles": len([n for n in nodes if n.get("filePath")]),
|
||||
"theme": {"presetId": "dark", "accentId": "cyan"},
|
||||
}
|
||||
for target in [UA_DIR / "meta.json", DASHBOARD_PUBLIC / "meta.json"]:
|
||||
target.write_text(json.dumps(meta, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
|
||||
print(f"Generated {len(nodes)} nodes, {len(edges)} edges, {len(layers)} layers")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,563 @@
|
||||
import http from "node:http";
|
||||
import fs from "node:fs";
|
||||
import path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
const defaultStaticRoot = path.resolve(__dirname, "..", "..", "..", "..", "..", "wishfulfilled-dashboard");
|
||||
const staticRoot = process.env.RAG_STATIC_ROOT || defaultStaticRoot;
|
||||
const graphPath = process.env.RAG_GRAPH_PATH || path.join(staticRoot, "knowledge-graph.json");
|
||||
const port = Number(process.env.PORT || process.env.RAG_PORT || 8080);
|
||||
const host = process.env.HOST || "0.0.0.0";
|
||||
|
||||
const MIME = {
|
||||
".html": "text/html; charset=utf-8",
|
||||
".js": "text/javascript; charset=utf-8",
|
||||
".css": "text/css; charset=utf-8",
|
||||
".json": "application/json; charset=utf-8",
|
||||
".svg": "image/svg+xml",
|
||||
".ico": "image/x-icon",
|
||||
".png": "image/png",
|
||||
".jpg": "image/jpeg",
|
||||
".jpeg": "image/jpeg",
|
||||
".woff": "font/woff",
|
||||
".woff2": "font/woff2",
|
||||
};
|
||||
|
||||
const SYNONYMS = {
|
||||
回评: ["评价提交", "评价展示", "客服邀评", "评价结果追踪", "review submission", "review display", "review follow-up", "邀评", "留评"],
|
||||
黑名单: ["blacklist", "blacklist_entities", "风险", "风控", "风险案件", "risk_cases", "risk_signals", "反欺诈", "拦截"],
|
||||
测评: ["评价计划", "review plan", "评价提交", "额度统计", "测评单"],
|
||||
免评: ["KOC", "KOL", "内容发布", "引流", "带货", "exemption"],
|
||||
真实人: ["person", "person_profiles", "跨账号归并", "身份归并", "真实人归并"],
|
||||
额度: ["4/4/12", "person_quota_ledgers", "额度台账", "预占", "额度与频控"],
|
||||
对外API: ["对外 API 契约", "接口契约", "API 草案", "接口说明", "对外接口"],
|
||||
工单: ["客服工单", "support_tickets", "客服跟进", "support_followups"],
|
||||
风险: ["风险与反欺诈", "risk_signals", "risk_cases", "黑名单", "反欺诈"],
|
||||
};
|
||||
|
||||
let graphCache = null;
|
||||
let graphMtime = 0;
|
||||
let chunksCache = [];
|
||||
|
||||
function sendJson(res, status, data) {
|
||||
const body = JSON.stringify(data, null, 2);
|
||||
res.writeHead(status, {
|
||||
"Content-Type": "application/json; charset=utf-8",
|
||||
"Cache-Control": "no-store",
|
||||
"Access-Control-Allow-Origin": "*",
|
||||
"Access-Control-Allow-Headers": "Content-Type, Authorization",
|
||||
"Access-Control-Allow-Methods": "GET, POST, OPTIONS",
|
||||
});
|
||||
res.end(body);
|
||||
}
|
||||
|
||||
function normalizeText(value) {
|
||||
if (Array.isArray(value)) return value.join(" ");
|
||||
return typeof value === "string" ? value : "";
|
||||
}
|
||||
|
||||
function stripHtml(text) {
|
||||
return text
|
||||
.replace(/<script[\s\S]*?<\/script>/gi, " ")
|
||||
.replace(/<style[\s\S]*?<\/style>/gi, " ")
|
||||
.replace(/<[^>]+>/g, " ")
|
||||
.replace(/ /g, " ")
|
||||
.replace(/&/g, "&")
|
||||
.replace(/</g, "<")
|
||||
.replace(/>/g, ">");
|
||||
}
|
||||
|
||||
function stripInjectedLineNumbers(text) {
|
||||
const lines = text.split("\n");
|
||||
let changed = 0;
|
||||
const cleaned = lines.map((line) => {
|
||||
const match = line.match(/^\s*\d+\|(.+)$/);
|
||||
if (!match) return line;
|
||||
changed += 1;
|
||||
return match[1];
|
||||
});
|
||||
return changed >= Math.max(3, Math.floor(lines.length / 4)) ? cleaned.join("\n") : text;
|
||||
}
|
||||
|
||||
function compactKey(value) {
|
||||
return String(value || "")
|
||||
.toLowerCase()
|
||||
.replace(/[\s`*_>#|\-\[\]()()【】《》“”"',。、::;;,.\/\\!?!?]+/g, "");
|
||||
}
|
||||
|
||||
function normalizeForSearch(value) {
|
||||
return stripHtml(stripInjectedLineNumbers(String(value || "")))
|
||||
.toLowerCase()
|
||||
.replace(/[\t\r]+/g, " ")
|
||||
.replace(/[()]/g, " ")
|
||||
.replace(/[【】《》“”]/g, " ")
|
||||
.replace(/[_*`>#|\[\]]/g, " ")
|
||||
.replace(/\s+/g, " ")
|
||||
.trim();
|
||||
}
|
||||
|
||||
function unique(values) {
|
||||
return Array.from(new Set(values.map((v) => String(v || "").trim()).filter(Boolean)));
|
||||
}
|
||||
|
||||
function tokenizeText(text) {
|
||||
const lower = normalizeForSearch(text);
|
||||
const latin = lower.match(/[a-z0-9_.\/+-]{2,}/g) || [];
|
||||
const cjkRuns = lower.match(/[\u4e00-\u9fa5]{2,}/g) || [];
|
||||
const cjkChars = lower.match(/[\u4e00-\u9fa5]/g) || [];
|
||||
const ngrams = [];
|
||||
for (let i = 0; i < cjkChars.length - 1; i += 1) ngrams.push(`${cjkChars[i]}${cjkChars[i + 1]}`);
|
||||
for (let i = 0; i < cjkChars.length - 2; i += 1) ngrams.push(`${cjkChars[i]}${cjkChars[i + 1]}${cjkChars[i + 2]}`);
|
||||
return unique([...latin, ...cjkRuns, ...ngrams, ...cjkChars.filter((ch) => !/[是什么吗呢的了和与及或在中为]/.test(ch))]);
|
||||
}
|
||||
|
||||
function detectQuestionType(query) {
|
||||
const text = normalizeForSearch(query);
|
||||
if (/api|接口|契约|endpoint|对外/.test(text)) return "api";
|
||||
if (/数据|字段|表|对象|模型|schema|建表/.test(text)) return "data_model";
|
||||
if (/流程|怎么走|如何|步骤|流转|处理/.test(text)) return "process";
|
||||
if (/规则|限制|口径|额度|频控|条件/.test(text)) return "rule";
|
||||
if (/谁|负责|职责|owner|角色/.test(text)) return "responsibility";
|
||||
if (/测试|验收|用例|缺陷|上线/.test(text)) return "test";
|
||||
if (/里程碑|阶段|计划|节点/.test(text)) return "milestone";
|
||||
if (/是什么|定义|含义|意思|解释/.test(text)) return "definition";
|
||||
return "general";
|
||||
}
|
||||
|
||||
function expandTerms(tokens, raw) {
|
||||
const compact = compactKey(raw);
|
||||
const expanded = [];
|
||||
for (const [term, values] of Object.entries(SYNONYMS)) {
|
||||
const termCompact = compactKey(term);
|
||||
if (compact.includes(termCompact) || tokens.some((token) => compactKey(token).includes(termCompact) || termCompact.includes(compactKey(token)))) {
|
||||
expanded.push(term, ...values);
|
||||
}
|
||||
}
|
||||
return unique(expanded);
|
||||
}
|
||||
|
||||
function processQuery(raw) {
|
||||
const normalized = normalizeForSearch(raw);
|
||||
const compact = compactKey(raw);
|
||||
const tokens = tokenizeText(raw);
|
||||
const expandedTerms = expandTerms(tokens, raw);
|
||||
const coreTerms = unique([String(raw || "").trim(), normalized, compact, ...tokens.filter((token) => token.length >= 2)]);
|
||||
return { raw, normalized, compact, tokens, coreTerms, expandedTerms, questionType: detectQuestionType(raw) };
|
||||
}
|
||||
|
||||
function loadGraph() {
|
||||
const stat = fs.statSync(graphPath);
|
||||
if (graphCache && stat.mtimeMs === graphMtime) return graphCache;
|
||||
const graph = JSON.parse(fs.readFileSync(graphPath, "utf8"));
|
||||
graphCache = graph;
|
||||
graphMtime = stat.mtimeMs;
|
||||
chunksCache = buildChunks(graph.nodes || []);
|
||||
return graph;
|
||||
}
|
||||
|
||||
function nodeContent(node) {
|
||||
return stripHtml(stripInjectedLineNumbers(normalizeText(node?.knowledgeMeta?.content)));
|
||||
}
|
||||
|
||||
function nodeLayer(node) {
|
||||
return normalizeText(node?.knowledgeMeta?.category) || (node?.tags || []).find((tag) => /需求|技术|测试|里程碑/.test(tag)) || "";
|
||||
}
|
||||
|
||||
function detectChunkType(heading, content) {
|
||||
const text = `${heading}\n${content}`.toLowerCase();
|
||||
if (/api|接口|契约|endpoint|对外/.test(text)) return "api_contract";
|
||||
if (/数据模型|字段|表结构|对象|schema|建表/.test(text)) return "data_model";
|
||||
if (/业务规则|规则|口径|额度|频控|限制/.test(text)) return "business_rule";
|
||||
if (/流程|流转|步骤|时序|处理/.test(text)) return "process";
|
||||
if (/faq|常见问题|是什么|问答/.test(text)) return "faq";
|
||||
return "general";
|
||||
}
|
||||
|
||||
function splitSections(text) {
|
||||
const source = text.trim();
|
||||
if (!source) return [];
|
||||
const sections = [];
|
||||
let currentHeading = "全文";
|
||||
let buffer = [];
|
||||
for (const line of source.split("\n")) {
|
||||
const heading = line.match(/^\s{0,3}#{1,6}\s+(.+)$/)?.[1]?.trim();
|
||||
if (heading) {
|
||||
if (buffer.length > 0) sections.push({ heading: currentHeading, text: buffer.join("\n") });
|
||||
currentHeading = heading;
|
||||
buffer = [line];
|
||||
} else {
|
||||
buffer.push(line);
|
||||
}
|
||||
}
|
||||
if (buffer.length > 0) sections.push({ heading: currentHeading, text: buffer.join("\n") });
|
||||
return sections.length ? sections : [{ heading: "全文", text: source }];
|
||||
}
|
||||
|
||||
function buildChunks(nodes) {
|
||||
return nodes.flatMap((node) => {
|
||||
const content = nodeContent(node);
|
||||
const titleBlock = [node.name, node.summary, node.filePath, (node.tags || []).join(" ")].join("\n");
|
||||
const sections = splitSections(content || titleBlock);
|
||||
return sections.map((section, index) => {
|
||||
const sectionText = section.text.trim() || titleBlock;
|
||||
const normalizedContent = normalizeForSearch(sectionText);
|
||||
return {
|
||||
chunkId: `${node.id}#${index}`,
|
||||
nodeId: node.id,
|
||||
docTitle: node.name || node.id,
|
||||
docPath: node.filePath || node.id,
|
||||
sectionTitle: section.heading,
|
||||
content: sectionText,
|
||||
normalizedContent,
|
||||
compactContent: compactKey(sectionText),
|
||||
tags: node.tags || [],
|
||||
summary: node.summary || "",
|
||||
chunkType: detectChunkType(section.heading, sectionText),
|
||||
layer: nodeLayer(node),
|
||||
};
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function countOccurrences(haystack, needle) {
|
||||
if (!needle) return 0;
|
||||
return haystack.split(needle).length - 1;
|
||||
}
|
||||
|
||||
function fieldScore(field, query, fieldName, multiplier = 1) {
|
||||
const normalized = normalizeForSearch(field);
|
||||
const compact = compactKey(field);
|
||||
const reasons = [];
|
||||
let score = 0;
|
||||
const rawNeedles = unique([query.normalized, String(query.raw || "").toLowerCase()].filter((v) => v.length >= 2));
|
||||
for (const needle of rawNeedles) {
|
||||
if (normalized.includes(needle)) {
|
||||
const add = 70 * multiplier;
|
||||
score += add;
|
||||
reasons.push({ type: "exact", field: fieldName, message: `${fieldName} 原文命中:${needle}`, score: add });
|
||||
}
|
||||
}
|
||||
if (query.compact && compact.includes(query.compact)) {
|
||||
const add = 80 * multiplier;
|
||||
score += add;
|
||||
reasons.push({ type: "compact", field: fieldName, message: `${fieldName} 去标点命中`, score: add });
|
||||
}
|
||||
let tokenHits = 0;
|
||||
for (const term of query.coreTerms) {
|
||||
const normalizedTerm = normalizeForSearch(term);
|
||||
const compactTerm = compactKey(term);
|
||||
if (!normalizedTerm && !compactTerm) continue;
|
||||
const occurrences = Math.max(countOccurrences(normalized, normalizedTerm), countOccurrences(compact, compactTerm));
|
||||
if (occurrences > 0) {
|
||||
tokenHits += 1;
|
||||
score += Math.min(occurrences, 4) * Math.min(Math.max(compactTerm.length, normalizedTerm.length), 10) * multiplier;
|
||||
}
|
||||
}
|
||||
if (tokenHits > 0) reasons.push({ type: "token", field: fieldName, message: `${fieldName} 关键词覆盖 ${tokenHits} 项`, score: tokenHits * multiplier });
|
||||
return { score, reasons };
|
||||
}
|
||||
|
||||
function semanticScore(chunk, query) {
|
||||
const reasons = [];
|
||||
const terms = unique([...query.tokens, ...query.expandedTerms.flatMap(tokenizeText)]).filter((term) => term.length >= 2);
|
||||
if (!terms.length) return { score: 0, reasons };
|
||||
const targetTokens = new Set(tokenizeText(`${chunk.docTitle} ${chunk.sectionTitle} ${chunk.normalizedContent}`));
|
||||
let overlap = 0;
|
||||
for (const term of terms) {
|
||||
const compact = compactKey(term);
|
||||
if (targetTokens.has(term) || chunk.compactContent.includes(compact) || compactKey(`${chunk.docTitle}${chunk.sectionTitle}`).includes(compact)) overlap += 1;
|
||||
}
|
||||
const ratio = overlap / Math.max(terms.length, 1);
|
||||
const score = ratio * 90;
|
||||
if (score > 0) reasons.push({ type: "semantic", message: `语义/同义词覆盖 ${(ratio * 100).toFixed(0)}%`, score });
|
||||
return { score, reasons };
|
||||
}
|
||||
|
||||
function directoryBoost(chunk, query) {
|
||||
const p = chunk.docPath || "";
|
||||
const base = p.includes("05_需求文档") ? 15 : p.includes("01_业务流程") ? 12 : p.includes("07_技术文档") ? 10 : p.includes("08_测试相关") ? 8 : p.includes("06_里程碑") ? 6 : p.includes("02_项目管理流程") ? 5 : p.includes("04_Agent检索") ? 4 : p.includes("03_规范与模板") ? 1 : 0;
|
||||
if (query.questionType === "api" && p.includes("07_技术文档")) return base + 8;
|
||||
if ((query.questionType === "definition" || query.questionType === "rule") && p.includes("05_需求文档")) return base + 5;
|
||||
if (query.questionType === "test" && p.includes("08_测试相关")) return base + 12;
|
||||
return base;
|
||||
}
|
||||
|
||||
function chunkTypeBoost(chunk, query) {
|
||||
if (query.questionType === "api" && chunk.chunkType === "api_contract") return 18;
|
||||
if (query.questionType === "data_model" && chunk.chunkType === "data_model") return 18;
|
||||
if (query.questionType === "process" && chunk.chunkType === "process") return 14;
|
||||
if (query.questionType === "rule" && chunk.chunkType === "business_rule") return 14;
|
||||
if (query.questionType === "definition" && (chunk.chunkType === "faq" || /概述|目标|说明/.test(chunk.sectionTitle))) return 10;
|
||||
return 0;
|
||||
}
|
||||
|
||||
function makeSnippet(text, query) {
|
||||
const compact = String(text || "").replace(/\s+/g, " ").trim();
|
||||
if (!compact) return "";
|
||||
const lower = compact.toLowerCase();
|
||||
const compactLower = compactKey(compact);
|
||||
const terms = unique([query.normalized, query.raw, query.compact, ...query.coreTerms, ...query.expandedTerms]);
|
||||
const hit = terms.map((term) => {
|
||||
const plainIndex = lower.indexOf(normalizeForSearch(term));
|
||||
if (plainIndex >= 0) return plainIndex;
|
||||
const compactIndex = compactLower.indexOf(compactKey(term));
|
||||
return compactIndex >= 0 ? Math.min(compactIndex, compact.length - 1) : -1;
|
||||
}).filter((i) => i >= 0).sort((a, b) => a - b)[0];
|
||||
const start = Math.max(0, (hit ?? 0) - 110);
|
||||
const snippet = compact.slice(start, start + 420);
|
||||
return `${start > 0 ? "…" : ""}${snippet}${start + 420 < compact.length ? "…" : ""}`;
|
||||
}
|
||||
|
||||
function searchChunks(queryText, topK = 16) {
|
||||
loadGraph();
|
||||
const query = processQuery(queryText);
|
||||
if (!query.normalized && !query.compact) return { query, hits: [] };
|
||||
const weights = query.questionType === "api" ? { exact: 0.45, fuzzy: 0.25, semantic: 0.1 } : query.questionType === "definition" || query.questionType === "process" ? { exact: 0.25, fuzzy: 0.2, semantic: 0.3 } : { exact: 0.3, fuzzy: 0.2, semantic: 0.25 };
|
||||
const hits = chunksCache.map((chunk) => {
|
||||
const title = fieldScore(`${chunk.docTitle}\n${chunk.docPath}`, query, "docTitle", 4);
|
||||
const section = fieldScore(chunk.sectionTitle, query, "sectionTitle", 6);
|
||||
const content = fieldScore(chunk.normalizedContent, query, "content", 1);
|
||||
const tags = fieldScore((chunk.tags || []).join(" "), query, "tags", 3);
|
||||
const semantic = semanticScore(chunk, query);
|
||||
const exact = title.score + section.score + tags.score + content.score;
|
||||
const fuzzy = (chunk.compactContent.includes(query.compact) ? 80 : 0) + content.score * 0.2;
|
||||
const titleBoost = title.score > 0 ? 12 : 0;
|
||||
const sectionBoost = section.score > 0 ? 18 : 0;
|
||||
const dirBoost = directoryBoost(chunk, query);
|
||||
const typeBoost = chunkTypeBoost(chunk, query);
|
||||
const score = exact * weights.exact + fuzzy * weights.fuzzy + semantic.score * weights.semantic + titleBoost + sectionBoost + dirBoost + typeBoost;
|
||||
const reasons = [...title.reasons, ...section.reasons, ...tags.reasons, ...content.reasons.slice(0, 3), ...semantic.reasons];
|
||||
if (dirBoost) reasons.push({ type: "directory", message: `目录权重:${(chunk.docPath || "").split("/")[0] || ""}`, score: dirBoost });
|
||||
if (typeBoost) reasons.push({ type: "chunkType", message: `章节类型匹配:${chunk.chunkType}`, score: typeBoost });
|
||||
return {
|
||||
chunkId: chunk.chunkId,
|
||||
nodeId: chunk.nodeId,
|
||||
docTitle: chunk.docTitle,
|
||||
docPath: chunk.docPath,
|
||||
sectionTitle: chunk.sectionTitle,
|
||||
chunkType: chunk.chunkType,
|
||||
layer: chunk.layer,
|
||||
score,
|
||||
scores: { exact, fuzzy, semantic: semantic.score, titleBoost, sectionBoost, directoryBoost: dirBoost, chunkTypeBoost: typeBoost },
|
||||
snippet: makeSnippet(chunk.content, query),
|
||||
evidenceContent: chunk.content.slice(0, 2600),
|
||||
reasons,
|
||||
};
|
||||
}).filter((hit) => hit.score > 10 && hit.snippet).sort((a, b) => b.score - a.score).reduce((acc, hit) => {
|
||||
const key = `${hit.nodeId}:${hit.sectionTitle}`;
|
||||
if (!acc.some((item) => `${item.nodeId}:${item.sectionTitle}` === key)) acc.push(hit);
|
||||
return acc;
|
||||
}, []).slice(0, topK);
|
||||
return { query, hits };
|
||||
}
|
||||
|
||||
function evidenceDecision(question, hits) {
|
||||
if (!String(question || "").trim()) return { allowed: false, confidence: "none", reason: "请输入问题。" };
|
||||
if (!hits.length) return { allowed: false, confidence: "none", reason: "没有检索到相关证据。" };
|
||||
if (hits[0].score >= 75) return { allowed: true, confidence: "high", reason: "Top1 证据分数达到阈值。" };
|
||||
if (hits[0].score >= 60 && (hits[1]?.score || 0) >= 50) return { allowed: true, confidence: "medium", reason: "多个证据片段达到中等相关。" };
|
||||
return { allowed: false, confidence: "low", reason: "证据分数不足。" };
|
||||
}
|
||||
|
||||
function localAnswer(question, hits) {
|
||||
const decision = evidenceDecision(question, hits);
|
||||
if (!decision.allowed) return "暂无该需求描述。";
|
||||
const top = hits.slice(0, 5);
|
||||
return [
|
||||
`结论:已基于本地知识库检索到可用证据。${decision.reason}`,
|
||||
"",
|
||||
"相关依据:",
|
||||
...top.map((hit, index) => `${index + 1}. ${hit.docTitle}${hit.sectionTitle ? ` / ${hit.sectionTitle}` : ""}\n ${hit.snippet}\n 来源:${hit.docPath}`),
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
function detectProvider({ endpoint, apiKey, provider }) {
|
||||
if (provider && provider !== "auto") return provider;
|
||||
const raw = String(endpoint || "").toLowerCase();
|
||||
if (String(apiKey || "").startsWith("plan-") || raw.includes("/plan/v1")) return "routin-plan";
|
||||
return "openai";
|
||||
}
|
||||
|
||||
function normalizeOpenAiEndpoint(endpoint) {
|
||||
const raw = String(endpoint || "").trim();
|
||||
if (!raw) return "";
|
||||
if (/\/chat\/completions\/?$/.test(raw)) return raw;
|
||||
return `${raw.replace(/\/+$/, "")}/chat/completions`;
|
||||
}
|
||||
|
||||
function normalizePlanEndpoint(endpoint) {
|
||||
const raw = String(endpoint || "").trim();
|
||||
let base = raw || "https://api.routin.ai/plan/v1";
|
||||
base = base.replace(/\/chat\/completions\/?$/, "").replace(/\/messages\/?$/, "").replace(/\/+$/, "");
|
||||
if (!base.includes("/plan/v1")) base = base.replace(/\/v1$/, "/plan/v1");
|
||||
return `${base}/messages`;
|
||||
}
|
||||
|
||||
function extractPlanMessageText(data) {
|
||||
if (data && typeof data === "object") {
|
||||
const content = data.content;
|
||||
if (Array.isArray(content)) {
|
||||
const texts = content
|
||||
.map((part) => typeof part === "string" ? part : part?.text)
|
||||
.filter((part) => typeof part === "string" && part.trim());
|
||||
if (texts.length) return texts.join("");
|
||||
}
|
||||
if (typeof content === "string") return content;
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
function extractAnswer(data, protocol) {
|
||||
if (protocol === "routin-plan") {
|
||||
const planText = extractPlanMessageText(data);
|
||||
if (planText) return planText;
|
||||
}
|
||||
return data?.choices?.[0]?.message?.content
|
||||
|| data?.message?.content
|
||||
|| data?.response
|
||||
|| data?.data?.answer
|
||||
|| data?.answer
|
||||
|| extractPlanMessageText(data);
|
||||
}
|
||||
|
||||
async function callLlm({ endpoint, model, apiKey, provider = "auto", question, hits, timeoutMs = 60000 }) {
|
||||
const protocol = detectProvider({ endpoint, apiKey, provider });
|
||||
const requestEndpoint = protocol === "routin-plan" ? normalizePlanEndpoint(endpoint) : normalizeOpenAiEndpoint(endpoint);
|
||||
const evidence = hits.slice(0, 8).map((hit, index) => `[${index + 1}]\n来源:${hit.docPath}\n章节:${hit.sectionTitle}\n内容:${hit.snippet}`).join("\n\n");
|
||||
const systemPrompt = "你是如愿知识库问答助手。你只能基于提供的知识库片段回答,不得使用外部知识,不得编造文档中没有的内容。如果证据不足,请只回答:暂无该需求描述。回答末尾必须列出来源文件。";
|
||||
const userPrompt = `用户问题:\n${question}\n\n检索证据:\n${evidence}\n\n请用中文结构化回答。`;
|
||||
const messages = protocol === "routin-plan"
|
||||
? [{ role: "user", content: `${systemPrompt}\n\n${userPrompt}` }]
|
||||
: [{ role: "system", content: systemPrompt }, { role: "user", content: userPrompt }];
|
||||
const body = protocol === "routin-plan"
|
||||
? { model, messages, max_tokens: 4096, temperature: 0.7 }
|
||||
: { model, messages, temperature: 0.1, stream: false };
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
||||
const startedAt = Date.now();
|
||||
try {
|
||||
const response = await fetch(requestEndpoint, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}),
|
||||
},
|
||||
body: JSON.stringify(body),
|
||||
signal: controller.signal,
|
||||
});
|
||||
const text = await response.text();
|
||||
let data = null;
|
||||
try { data = text ? JSON.parse(text) : null; } catch {}
|
||||
if (!response.ok) {
|
||||
const remoteMessage = data?.error?.message || data?.message || text.slice(0, 500);
|
||||
const err = new Error(`模型接口返回 ${response.status} ${response.statusText}${remoteMessage ? `:${remoteMessage}` : ""}`);
|
||||
err.status = response.status;
|
||||
err.endpoint = requestEndpoint;
|
||||
err.elapsedMs = Date.now() - startedAt;
|
||||
err.protocol = protocol;
|
||||
throw err;
|
||||
}
|
||||
const answer = extractAnswer(data, protocol);
|
||||
if (typeof answer !== "string" || !answer.trim()) {
|
||||
const err = new Error("模型返回成功,但响应结构里没有可解析的回答内容。支持 OpenAI choices[0].message.content、RoutIn content[]/content、message.content / response / answer。原始返回:" + text.slice(0, 500));
|
||||
err.endpoint = requestEndpoint;
|
||||
err.elapsedMs = Date.now() - startedAt;
|
||||
err.protocol = protocol;
|
||||
throw err;
|
||||
}
|
||||
return { answer: answer.trim(), endpoint: requestEndpoint, elapsedMs: Date.now() - startedAt, rawShape: Object.keys(data || {}), protocol };
|
||||
} finally {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
}
|
||||
|
||||
async function readBody(req) {
|
||||
const chunks = [];
|
||||
for await (const chunk of req) chunks.push(chunk);
|
||||
const raw = Buffer.concat(chunks).toString("utf8");
|
||||
return raw ? JSON.parse(raw) : {};
|
||||
}
|
||||
|
||||
async function handleApi(req, res, url) {
|
||||
if (req.method === "OPTIONS") return sendJson(res, 200, { ok: true });
|
||||
if (url.pathname === "/api/rag/health") {
|
||||
const graph = loadGraph();
|
||||
return sendJson(res, 200, { ok: true, graphPath, staticRoot, documentCount: (graph.nodes || []).length, chunkCount: chunksCache.length, llmProxy: true });
|
||||
}
|
||||
if (url.pathname === "/api/rag/search" && req.method === "POST") {
|
||||
const body = await readBody(req);
|
||||
const { query, hits } = searchChunks(body.query || "", body.topK || 16);
|
||||
const decision = evidenceDecision(body.query || "", hits);
|
||||
return sendJson(res, 200, { ok: true, query, decision, answer: localAnswer(body.query || "", hits), hits });
|
||||
}
|
||||
if (url.pathname === "/api/rag/answer" && req.method === "POST") {
|
||||
const body = await readBody(req);
|
||||
const requestStatus = { stage: "search", startedAt: new Date().toISOString() };
|
||||
const { query, hits } = searchChunks(body.query || "", body.topK || 16);
|
||||
const decision = evidenceDecision(body.query || "", hits);
|
||||
if (!decision.allowed) return sendJson(res, 200, { ok: true, mode: "llm", requestStatus: { ...requestStatus, stage: "evidence_rejected" }, query, decision, answer: "暂无该需求描述。", hits });
|
||||
if (!body.llm?.enabled) return sendJson(res, 200, { ok: true, mode: "local", requestStatus: { ...requestStatus, stage: "local_answer" }, query, decision, answer: localAnswer(body.query || "", hits), hits });
|
||||
try {
|
||||
const result = await callLlm({ endpoint: body.llm.endpoint, model: body.llm.model, apiKey: body.llm.apiKey, question: body.query || "", hits });
|
||||
return sendJson(res, 200, { ok: true, mode: "llm", requestStatus: { ...requestStatus, stage: "llm_done", endpoint: result.endpoint, elapsedMs: result.elapsedMs, rawShape: result.rawShape }, query, decision, answer: result.answer, hits });
|
||||
} catch (error) {
|
||||
return sendJson(res, 502, {
|
||||
ok: false,
|
||||
mode: "llm",
|
||||
requestStatus: { ...requestStatus, stage: "llm_failed", endpoint: error.endpoint || (detectProvider({ endpoint: body.llm?.endpoint, apiKey: body.llm?.apiKey, provider: body.llm?.provider }) === "routin-plan" ? normalizePlanEndpoint(body.llm?.endpoint) : normalizeOpenAiEndpoint(body.llm?.endpoint)), elapsedMs: error.elapsedMs, protocol: error.protocol },
|
||||
query,
|
||||
decision,
|
||||
answer: localAnswer(body.query || "", hits),
|
||||
hits,
|
||||
error: {
|
||||
message: error instanceof Error ? error.message : String(error),
|
||||
name: error?.name,
|
||||
status: error?.status,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
if (url.pathname === "/api/llm/test" && req.method === "POST") {
|
||||
const body = await readBody(req);
|
||||
try {
|
||||
const result = await callLlm({ endpoint: body.endpoint, model: body.model, apiKey: body.apiKey, question: "连接测试", hits: [{ docPath: "测试", sectionTitle: "连接测试", snippet: "请只回答:连接成功。", score: 100 }] });
|
||||
return sendJson(res, 200, { ok: true, endpoint: result.endpoint, elapsedMs: result.elapsedMs, answer: result.answer });
|
||||
} catch (error) {
|
||||
return sendJson(res, 502, { ok: false, endpoint: error.endpoint || (detectProvider({ endpoint: body.endpoint, apiKey: body.apiKey, provider: body.provider }) === "routin-plan" ? normalizePlanEndpoint(body.endpoint) : normalizeOpenAiEndpoint(body.endpoint)), protocol: error.protocol, elapsedMs: error.elapsedMs, error: { message: error instanceof Error ? error.message : String(error), name: error?.name, status: error?.status } });
|
||||
}
|
||||
}
|
||||
return sendJson(res, 404, { ok: false, error: "API not found" });
|
||||
}
|
||||
|
||||
function serveStatic(req, res, url) {
|
||||
let pathname = decodeURIComponent(url.pathname);
|
||||
if (pathname === "/") pathname = "/index.html";
|
||||
const requested = path.normalize(path.join(staticRoot, pathname));
|
||||
if (!requested.startsWith(staticRoot)) {
|
||||
res.writeHead(403);
|
||||
res.end("Forbidden");
|
||||
return;
|
||||
}
|
||||
const filePath = fs.existsSync(requested) && fs.statSync(requested).isFile() ? requested : path.join(staticRoot, "index.html");
|
||||
const ext = path.extname(filePath).toLowerCase();
|
||||
res.writeHead(200, { "Content-Type": MIME[ext] || "application/octet-stream", "Cache-Control": ext === ".html" ? "no-store" : "no-cache" });
|
||||
fs.createReadStream(filePath).pipe(res);
|
||||
}
|
||||
|
||||
const server = http.createServer(async (req, res) => {
|
||||
const url = new URL(req.url || "/", `http://${req.headers.host || "localhost"}`);
|
||||
try {
|
||||
if (url.pathname.startsWith("/api/")) return await handleApi(req, res, url);
|
||||
return serveStatic(req, res, url);
|
||||
} catch (error) {
|
||||
return sendJson(res, 500, { ok: false, error: error instanceof Error ? error.message : String(error) });
|
||||
}
|
||||
});
|
||||
|
||||
server.listen(port, host, () => {
|
||||
loadGraph();
|
||||
console.log(`RAG dashboard server listening on http://${host}:${port}`);
|
||||
console.log(`Static root: ${staticRoot}`);
|
||||
console.log(`Graph path: ${graphPath}`);
|
||||
});
|
||||
@@ -0,0 +1,174 @@
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
|
||||
const root = 'D:/AIcoding/WishFulfilled/知识库/under-anything';
|
||||
const wiki = `${root}/wishfulfilled-wiki`;
|
||||
const reqDir = `${wiki}/05_需求文档`;
|
||||
const graphPaths = [
|
||||
`${wiki}/.understand-anything/knowledge-graph.json`,
|
||||
`${root}/wishfulfilled-dashboard/knowledge-graph.json`,
|
||||
];
|
||||
const metaPaths = [
|
||||
`${wiki}/.understand-anything/meta.json`,
|
||||
`${root}/wishfulfilled-dashboard/meta.json`,
|
||||
];
|
||||
|
||||
const files = fs.readdirSync(reqDir)
|
||||
.filter((name) => /\.(md|html?)$/i.test(name))
|
||||
.sort((a, b) => a.localeCompare(b, 'zh-Hans-CN'));
|
||||
|
||||
function readText(filePath) {
|
||||
return fs.readFileSync(filePath, 'utf8');
|
||||
}
|
||||
|
||||
function cleanLineNumbers(text) {
|
||||
const lines = text.split(/\r?\n/);
|
||||
let changed = 0;
|
||||
const cleaned = lines.map((line) => {
|
||||
const match = line.match(/^\s*\d+\|(.+)$/);
|
||||
if (!match) return line;
|
||||
changed += 1;
|
||||
return match[1];
|
||||
});
|
||||
return changed >= Math.max(3, Math.floor(lines.length / 4)) ? cleaned.join('\n') : text;
|
||||
}
|
||||
|
||||
function stripHtml(text) {
|
||||
return text
|
||||
.replace(/<script[\s\S]*?<\/script>|<style[\s\S]*?<\/style>/gi, ' ')
|
||||
.replace(/<[^>]+>/g, ' ')
|
||||
.replace(/ /g, ' ')
|
||||
.replace(/&/g, '&')
|
||||
.replace(/\s+/g, ' ')
|
||||
.trim();
|
||||
}
|
||||
|
||||
function titleFor(fileName, text) {
|
||||
if (/\.html?$/i.test(fileName)) {
|
||||
const match = text.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
||||
const title = match ? stripHtml(match[1]) : '';
|
||||
if (title) return title;
|
||||
}
|
||||
for (const line of text.split(/\r?\n/)) {
|
||||
const trimmed = line.trim();
|
||||
if (trimmed.startsWith('#')) return trimmed.replace(/^#+\s*/, '').trim();
|
||||
}
|
||||
return path.basename(fileName, path.extname(fileName)).replace(/^\d+[-_]/, '').replace(/_/g, ' ');
|
||||
}
|
||||
|
||||
function summaryFor(fileName, text) {
|
||||
const plain = /\.html?$/i.test(fileName)
|
||||
? stripHtml(text)
|
||||
: text.replace(/[`*_>#|\-\[\]()]/g, ' ').replace(/\s+/g, ' ').trim();
|
||||
return plain ? plain.slice(0, 180) : '需求文档。';
|
||||
}
|
||||
|
||||
function tagsFor(text) {
|
||||
const tags = ['05_需求文档', '需求文档'];
|
||||
const match = text.match(/^tags:\s*\[(.*?)\]/m);
|
||||
if (!match) return tags;
|
||||
for (const item of match[1].split(/[,,]/)) {
|
||||
const tag = item.trim().replace(/^['"]|['"]$/g, '');
|
||||
if (tag && !tags.includes(tag)) tags.push(tag);
|
||||
}
|
||||
return tags;
|
||||
}
|
||||
|
||||
function complexityFor(text) {
|
||||
if (text.length > 20000) return 'complex';
|
||||
if (text.length > 5000) return 'moderate';
|
||||
return 'simple';
|
||||
}
|
||||
|
||||
function updateGraph(graphPath) {
|
||||
const graph = JSON.parse(fs.readFileSync(graphPath, 'utf8'));
|
||||
graph.nodes ??= [];
|
||||
graph.edges ??= [];
|
||||
graph.layers ??= [];
|
||||
|
||||
const byId = new Map(graph.nodes.map((node) => [node.id, node]));
|
||||
const edgeKeys = new Set(graph.edges.map((edge) => `${edge.source}|${edge.target}|${edge.type}`));
|
||||
let layer = graph.layers.find((item) => item.id === 'layer-requirements');
|
||||
if (!layer) {
|
||||
layer = {
|
||||
id: 'layer-requirements',
|
||||
name: '需求文档',
|
||||
description: '所有正式需求、业务规则、需求变更和需求索引。',
|
||||
nodeIds: ['flow:layer-requirements'],
|
||||
};
|
||||
graph.layers.push(layer);
|
||||
}
|
||||
layer.nodeIds ??= [];
|
||||
|
||||
let added = 0;
|
||||
let updated = 0;
|
||||
for (const fileName of files) {
|
||||
const absolutePath = `${reqDir}/${fileName}`;
|
||||
const relPath = `05_需求文档/${fileName}`;
|
||||
const nodeId = `doc:${relPath.replace(/\.[^.]+$/, '')}`;
|
||||
const text = cleanLineNumbers(readText(absolutePath));
|
||||
const node = {
|
||||
id: nodeId,
|
||||
type: 'document',
|
||||
name: titleFor(fileName, text),
|
||||
filePath: relPath,
|
||||
summary: summaryFor(fileName, text),
|
||||
tags: tagsFor(text),
|
||||
complexity: complexityFor(text),
|
||||
knowledgeMeta: {
|
||||
content: text,
|
||||
wikilinks: [...text.matchAll(/\[\[([^\]]+)\]\]/g)].map((match) => match[1]),
|
||||
category: 'layer-requirements',
|
||||
},
|
||||
};
|
||||
|
||||
if (byId.has(nodeId)) {
|
||||
Object.assign(byId.get(nodeId), node);
|
||||
updated += 1;
|
||||
} else {
|
||||
graph.nodes.push(node);
|
||||
byId.set(nodeId, node);
|
||||
added += 1;
|
||||
}
|
||||
|
||||
if (!layer.nodeIds.includes(nodeId)) layer.nodeIds.push(nodeId);
|
||||
const edgeKey = `flow:layer-requirements|${nodeId}|documents`;
|
||||
if (!edgeKeys.has(edgeKey)) {
|
||||
graph.edges.push({
|
||||
source: 'flow:layer-requirements',
|
||||
target: nodeId,
|
||||
type: 'documents',
|
||||
direction: 'forward',
|
||||
description: '本层文档',
|
||||
weight: 0.65,
|
||||
});
|
||||
edgeKeys.add(edgeKey);
|
||||
}
|
||||
}
|
||||
|
||||
const count = layer.nodeIds.filter((id) => id !== 'flow:layer-requirements').length;
|
||||
const flow = byId.get('flow:layer-requirements');
|
||||
if (flow) {
|
||||
flow.summary = '所有正式需求、业务规则、需求变更和需求索引。点击本层可查看全部需求文档并检索。';
|
||||
flow.knowledgeMeta ??= {};
|
||||
flow.knowledgeMeta.content = `# 需求文档\n\n所有正式需求、业务规则、需求变更和需求索引。点击本层可查看全部需求文档并检索。\n\n本层包含 ${count} 个文档。点击右侧 Files 或在本层详情中选择具体文档查看内容。`;
|
||||
flow.knowledgeMeta.category = 'layer-requirements';
|
||||
}
|
||||
|
||||
graph.project ??= {};
|
||||
graph.project.analyzedAt = new Date().toISOString();
|
||||
fs.writeFileSync(graphPath, `${JSON.stringify(graph, null, 2)}\n`, 'utf8');
|
||||
return { graphPath, added, updated, requirements: count, nodes: graph.nodes.length };
|
||||
}
|
||||
|
||||
const results = graphPaths.map(updateGraph);
|
||||
for (const metaPath of metaPaths) {
|
||||
if (!fs.existsSync(metaPath)) continue;
|
||||
const meta = JSON.parse(fs.readFileSync(metaPath, 'utf8'));
|
||||
const graph = JSON.parse(fs.readFileSync(`${path.dirname(metaPath)}/knowledge-graph.json`, 'utf8'));
|
||||
meta.lastAnalyzedAt = new Date().toISOString();
|
||||
meta.analyzedFiles = graph.nodes.filter((node) => String(node.id).startsWith('doc:')).length;
|
||||
fs.writeFileSync(metaPath, `${JSON.stringify(meta, null, 2)}\n`, 'utf8');
|
||||
}
|
||||
|
||||
console.log(JSON.stringify(results, null, 2));
|
||||
Reference in New Issue
Block a user