252 lines
8.4 KiB
JavaScript
252 lines
8.4 KiB
JavaScript
import fs from 'node:fs';
|
||
import path from 'node:path';
|
||
import { execFileSync } from 'node:child_process';
|
||
|
||
const root = 'D:/AIcoding/WishFulfilled/知识库/under-anything';
|
||
const wiki = `${root}/wishfulfilled-wiki`;
|
||
const graphPaths = [
|
||
`${wiki}/.understand-anything/knowledge-graph.json`,
|
||
`${root}/wishfulfilled-dashboard/knowledge-graph.json`,
|
||
];
|
||
const metaPaths = [
|
||
`${wiki}/.understand-anything/meta.json`,
|
||
`${root}/wishfulfilled-dashboard/meta.json`,
|
||
];
|
||
|
||
const directoryConfigs = [
|
||
{
|
||
dir: `${wiki}/05_需求文档`,
|
||
relDir: '05_需求文档',
|
||
layerId: 'layer-requirements',
|
||
flowId: 'flow:layer-requirements',
|
||
layerName: '需求文档',
|
||
layerDescription: '所有正式需求、业务规则、需求变更和需求索引。点击本层可查看全部需求文档并检索。',
|
||
defaultTags: ['05_需求文档', '需求文档'],
|
||
category: 'layer-requirements',
|
||
fallbackSummary: '需求文档。',
|
||
},
|
||
{
|
||
dir: `${wiki}/07_技术文档`,
|
||
relDir: '07_技术文档',
|
||
layerId: 'layer-technical',
|
||
flowId: 'flow:layer-technical',
|
||
layerName: '技术文档',
|
||
layerDescription: '系统架构、数据模型、接口说明、技术方案和技术决策。点击本层可查看全部技术文档并检索。',
|
||
defaultTags: ['07_技术文档', '技术文档'],
|
||
category: 'layer-technical',
|
||
fallbackSummary: '技术文档。',
|
||
},
|
||
{
|
||
dir: `${wiki}/08_测试相关`,
|
||
relDir: '08_测试相关',
|
||
layerId: 'layer-testing',
|
||
flowId: 'flow:layer-testing',
|
||
layerName: '测试相关',
|
||
layerDescription: '测试计划、测试用例、缺陷记录、验收记录、上线检查和测试资产。点击本层可查看全部测试相关文档并检索。',
|
||
defaultTags: ['08_测试相关', '测试相关'],
|
||
category: 'layer-testing',
|
||
fallbackSummary: '测试相关文档。',
|
||
},
|
||
];
|
||
|
||
function listFiles(dir) {
|
||
if (!fs.existsSync(dir)) return [];
|
||
return fs.readdirSync(dir)
|
||
.filter((name) => /\.(md|html?|xlsx)$/i.test(name))
|
||
.sort((a, b) => a.localeCompare(b, 'zh-Hans-CN'));
|
||
}
|
||
|
||
function readText(filePath) {
|
||
if (/\.xlsx$/i.test(filePath)) return readXlsxText(filePath);
|
||
return fs.readFileSync(filePath, 'utf8');
|
||
}
|
||
|
||
function readXlsxText(filePath) {
|
||
const script = [
|
||
'import json, sys',
|
||
'from openpyxl import load_workbook',
|
||
'path = sys.argv[1]',
|
||
'wb = load_workbook(path, data_only=True, read_only=True)',
|
||
'out = []',
|
||
'for ws in wb.worksheets:',
|
||
' out.append(f"# Sheet: {ws.title}")',
|
||
' for row in ws.iter_rows(values_only=True):',
|
||
' vals = [str(v).strip() for v in row if v is not None and str(v).strip()]',
|
||
' if vals:',
|
||
' out.append(" | ".join(vals))',
|
||
'print(json.dumps("\\n".join(out), ensure_ascii=False))',
|
||
].join('\n');
|
||
const stdout = execFileSync('python', ['-c', script, filePath], { encoding: 'utf8', maxBuffer: 50 * 1024 * 1024 });
|
||
return JSON.parse(stdout);
|
||
}
|
||
|
||
function cleanLineNumbers(text) {
|
||
const lines = text.split(/\r?\n/);
|
||
let changed = 0;
|
||
const cleaned = lines.map((line) => {
|
||
const match = line.match(/^\s*\d+\|(.+)$/);
|
||
if (!match) return line;
|
||
changed += 1;
|
||
return match[1];
|
||
});
|
||
return changed >= Math.max(3, Math.floor(lines.length / 4)) ? cleaned.join('\n') : text;
|
||
}
|
||
|
||
function stripHtml(text) {
|
||
return text
|
||
.replace(/<script[\s\S]*?<\/script>|<style[\s\S]*?<\/style>/gi, ' ')
|
||
.replace(/<[^>]+>/g, ' ')
|
||
.replace(/ /g, ' ')
|
||
.replace(/&/g, '&')
|
||
.replace(/\s+/g, ' ')
|
||
.trim();
|
||
}
|
||
|
||
function titleFor(fileName, text) {
|
||
if (/\.html?$/i.test(fileName)) {
|
||
const match = text.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
||
const title = match ? stripHtml(match[1]) : '';
|
||
if (title) return title;
|
||
}
|
||
for (const line of text.split(/\r?\n/)) {
|
||
const trimmed = line.trim();
|
||
if (trimmed.startsWith('#')) return trimmed.replace(/^#+\s*/, '').trim();
|
||
}
|
||
return path.basename(fileName, path.extname(fileName)).replace(/^\d+[-_]/, '').replace(/_/g, ' ');
|
||
}
|
||
|
||
function summaryFor(fileName, text, fallbackSummary) {
|
||
const plain = /\.html?$/i.test(fileName)
|
||
? stripHtml(text)
|
||
: text.replace(/[`*_>#|\-\[\]()]/g, ' ').replace(/\s+/g, ' ').trim();
|
||
return plain ? plain.slice(0, 180) : fallbackSummary;
|
||
}
|
||
|
||
function tagsFor(text, defaultTags) {
|
||
const tags = [...defaultTags];
|
||
const match = text.match(/^tags:\s*\[(.*?)\]/m);
|
||
if (!match) return tags;
|
||
for (const item of match[1].split(/[,,]/)) {
|
||
const tag = item.trim().replace(/^['"]|['"]$/g, '');
|
||
if (tag && !tags.includes(tag)) tags.push(tag);
|
||
}
|
||
return tags;
|
||
}
|
||
|
||
function complexityFor(text) {
|
||
if (text.length > 20000) return 'complex';
|
||
if (text.length > 5000) return 'moderate';
|
||
return 'simple';
|
||
}
|
||
|
||
function ensureLayer(graph, config) {
|
||
let layer = graph.layers.find((item) => item.id === config.layerId);
|
||
if (!layer) {
|
||
layer = {
|
||
id: config.layerId,
|
||
name: config.layerName,
|
||
description: config.layerDescription,
|
||
nodeIds: [config.flowId],
|
||
};
|
||
graph.layers.push(layer);
|
||
}
|
||
layer.name = config.layerName;
|
||
layer.description = config.layerDescription;
|
||
layer.nodeIds ??= [];
|
||
if (!layer.nodeIds.includes(config.flowId)) layer.nodeIds.unshift(config.flowId);
|
||
return layer;
|
||
}
|
||
|
||
function updateFlowNode(byId, layer, config) {
|
||
const count = layer.nodeIds.filter((id) => id !== config.flowId).length;
|
||
const flow = byId.get(config.flowId);
|
||
if (flow) {
|
||
flow.summary = config.layerDescription;
|
||
flow.knowledgeMeta ??= {};
|
||
flow.knowledgeMeta.content = `# ${config.layerName}\n\n${config.layerDescription}\n\n本层包含 ${count} 个文档。点击右侧 Files 或在本层详情中选择具体文档查看内容。`;
|
||
flow.knowledgeMeta.category = config.category;
|
||
}
|
||
return count;
|
||
}
|
||
|
||
function updateGraph(graphPath) {
|
||
const graph = JSON.parse(fs.readFileSync(graphPath, 'utf8'));
|
||
graph.nodes ??= [];
|
||
graph.edges ??= [];
|
||
graph.layers ??= [];
|
||
|
||
const byId = new Map(graph.nodes.map((node) => [node.id, node]));
|
||
const edgeKeys = new Set(graph.edges.map((edge) => `${edge.source}|${edge.target}|${edge.type}`));
|
||
const stats = [];
|
||
|
||
for (const config of directoryConfigs) {
|
||
const files = listFiles(config.dir);
|
||
const layer = ensureLayer(graph, config);
|
||
let added = 0;
|
||
let updated = 0;
|
||
|
||
for (const fileName of files) {
|
||
const absolutePath = `${config.dir}/${fileName}`;
|
||
const relPath = `${config.relDir}/${fileName}`;
|
||
const nodeId = `doc:${relPath.replace(/\.[^.]+$/, '')}`;
|
||
const text = cleanLineNumbers(readText(absolutePath));
|
||
const node = {
|
||
id: nodeId,
|
||
type: 'document',
|
||
name: titleFor(fileName, text),
|
||
filePath: relPath,
|
||
summary: summaryFor(fileName, text, config.fallbackSummary),
|
||
tags: tagsFor(text, config.defaultTags),
|
||
complexity: complexityFor(text),
|
||
knowledgeMeta: {
|
||
content: text,
|
||
wikilinks: [...text.matchAll(/\[\[([^\]]+)\]\]/g)].map((match) => match[1]),
|
||
category: config.category,
|
||
},
|
||
};
|
||
|
||
if (byId.has(nodeId)) {
|
||
Object.assign(byId.get(nodeId), node);
|
||
updated += 1;
|
||
} else {
|
||
graph.nodes.push(node);
|
||
byId.set(nodeId, node);
|
||
added += 1;
|
||
}
|
||
|
||
if (!layer.nodeIds.includes(nodeId)) layer.nodeIds.push(nodeId);
|
||
const edgeKey = `${config.flowId}|${nodeId}|documents`;
|
||
if (!edgeKeys.has(edgeKey)) {
|
||
graph.edges.push({
|
||
source: config.flowId,
|
||
target: nodeId,
|
||
type: 'documents',
|
||
direction: 'forward',
|
||
description: '本层文档',
|
||
weight: 0.65,
|
||
});
|
||
edgeKeys.add(edgeKey);
|
||
}
|
||
}
|
||
|
||
stats.push({ layer: config.layerName, added, updated, count: updateFlowNode(byId, layer, config) });
|
||
}
|
||
|
||
graph.project ??= {};
|
||
graph.project.analyzedAt = new Date().toISOString();
|
||
fs.writeFileSync(graphPath, `${JSON.stringify(graph, null, 2)}\n`, 'utf8');
|
||
return { graphPath, stats, nodes: graph.nodes.length };
|
||
}
|
||
|
||
const results = graphPaths.map(updateGraph);
|
||
for (const metaPath of metaPaths) {
|
||
if (!fs.existsSync(metaPath)) continue;
|
||
const meta = JSON.parse(fs.readFileSync(metaPath, 'utf8'));
|
||
const graph = JSON.parse(fs.readFileSync(`${path.dirname(metaPath)}/knowledge-graph.json`, 'utf8'));
|
||
meta.lastAnalyzedAt = new Date().toISOString();
|
||
meta.analyzedFiles = graph.nodes.filter((node) => String(node.id).startsWith('doc:')).length;
|
||
fs.writeFileSync(metaPath, `${JSON.stringify(meta, null, 2)}\n`, 'utf8');
|
||
}
|
||
|
||
console.log(JSON.stringify(results, null, 2));
|