import { describe, it, expect, beforeEach, afterEach } from 'vitest'; import { mkdtempSync, mkdirSync, writeFileSync, readFileSync, rmSync } from 'node:fs'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; import { spawnSync } from 'node:child_process'; import { fileURLToPath } from 'node:url'; import { dirname, resolve } from 'node:path'; const __dirname = dirname(fileURLToPath(import.meta.url)); const SCRIPT = resolve(__dirname, '../../../understand-anything-plugin/skills/understand/compute-batches.mjs'); const FIXTURES = resolve(__dirname, 'fixtures'); function runScript(projectRoot, extraArgs = []) { return spawnSync('node', [SCRIPT, projectRoot, ...extraArgs], { encoding: 'utf-8', }); } function setupProject(fixtureName) { const root = mkdtempSync(join(tmpdir(), 'ua-cb-test-')); mkdirSync(join(root, '.understand-anything', 'intermediate'), { recursive: true }); const fixturePath = join(FIXTURES, fixtureName); const dest = join(root, '.understand-anything', 'intermediate', 'scan-result.json'); writeFileSync(dest, readFileSync(fixturePath, 'utf-8')); return root; } function readBatches(projectRoot) { const p = join(projectRoot, '.understand-anything', 'intermediate', 'batches.json'); return JSON.parse(readFileSync(p, 'utf-8')); } describe('compute-batches.mjs — Louvain basic', () => { let projectRoot; beforeEach(() => { projectRoot = setupProject('scan-result-3-cliques.json'); }); afterEach(() => { if (projectRoot) rmSync(projectRoot, { recursive: true, force: true }); }); it('produces 3 batches for 3 disjoint cliques', () => { const result = runScript(projectRoot); expect(result.status).toBe(0); const batches = readBatches(projectRoot); expect(batches.algorithm).toBe('louvain'); expect(batches.totalFiles).toBe(9); expect(batches.batches.length).toBe(3); expect(batches.schemaVersion).toBe(1); expect(batches.totalBatches).toBe(3); expect(batches.batches.map(b => b.batchIndex)).toEqual([1, 2, 3]); // Each batch should contain exactly one clique (3 files) for (const b of batches.batches) { expect(b.files.length).toBe(3); const dirs = new Set(b.files.map(f => f.path.split('/')[1])); expect(dirs.size).toBe(1); // all files in the batch share src// } }); it('produces deterministic output across runs', () => { const r1 = runScript(projectRoot); expect(r1.status).toBe(0); const json1 = readFileSync( join(projectRoot, '.understand-anything', 'intermediate', 'batches.json'), 'utf-8', ); const r2 = runScript(projectRoot); expect(r2.status).toBe(0); const json2 = readFileSync( join(projectRoot, '.understand-anything', 'intermediate', 'batches.json'), 'utf-8', ); expect(json1).toBe(json2); }); }); describe('compute-batches.mjs — size enforcement', () => { let projectRoot; beforeEach(() => { projectRoot = setupProject('scan-result-large-community.json'); }); afterEach(() => { if (projectRoot) rmSync(projectRoot, { recursive: true, force: true }); }); it('splits a 40-node clique into batches ≤ 35', () => { const result = runScript(projectRoot); expect(result.status).toBe(0); const batches = readBatches(projectRoot); expect(batches.algorithm).toBe('louvain'); // confirm fallback didn't fire expect(batches.totalFiles).toBe(40); expect(batches.batches.length).toBe(2); expect(batches.batches.map(b => b.files.length).sort()).toEqual([20, 20]); // Sum of all batch file counts equals total files const sum = batches.batches.reduce((acc, b) => acc + b.files.length, 0); expect(sum).toBe(40); // Warning was emitted to stderr expect(result.stderr).toMatch(/Warning: compute-batches: community size 40 > max 35/); }); }); describe('compute-batches.mjs — exports extraction', () => { let root; afterEach(() => { if (root) rmSync(root, { recursive: true, force: true }); }); it('populates exports for code files via tree-sitter', () => { root = mkdtempSync(join(tmpdir(), 'ua-cb-exp-')); mkdirSync(join(root, '.understand-anything', 'intermediate'), { recursive: true }); mkdirSync(join(root, 'src'), { recursive: true }); writeFileSync(join(root, 'src', 'a.ts'), 'export function greet(name: string) { return "hi " + name; }\n' + 'export class Greeter { greet(n: string) { return "hi " + n; } }\n'); writeFileSync(join(root, 'src', 'b.ts'), 'import { greet } from "./a";\nexport const helper = () => greet("world");\n'); const scan = { name: 'exports-test', description: '', languages: ['typescript'], frameworks: [], files: [ { path: 'src/a.ts', language: 'typescript', sizeLines: 2, fileCategory: 'code' }, { path: 'src/b.ts', language: 'typescript', sizeLines: 2, fileCategory: 'code' }, ], totalFiles: 2, filteredByIgnore: 0, estimatedComplexity: 'small', importMap: { 'src/a.ts': [], 'src/b.ts': ['src/a.ts'] }, }; writeFileSync( join(root, '.understand-anything', 'intermediate', 'scan-result.json'), JSON.stringify(scan)); const result = runScript(root); expect(result.status).toBe(0); const batches = readBatches(root); expect(batches.exportsByPath).toBeDefined(); expect(batches.exportsByPath['src/a.ts']).toEqual( expect.arrayContaining(['greet', 'Greeter'])); expect(batches.exportsByPath['src/b.ts']).toEqual( expect.arrayContaining(['helper'])); }); it('emits warning when file is missing from disk (read error path)', () => { root = mkdtempSync(join(tmpdir(), 'ua-cb-exp-err-')); mkdirSync(join(root, '.understand-anything', 'intermediate'), { recursive: true }); // Note: NOT creating the file on disk — scan-result.json references it, // but the file doesn't exist, so the read branch fires. const scan = { name: 'missing-file-test', description: '', languages: ['typescript'], frameworks: [], files: [ { path: 'src/missing.ts', language: 'typescript', sizeLines: 1, fileCategory: 'code' }, ], totalFiles: 1, filteredByIgnore: 0, estimatedComplexity: 'small', importMap: { 'src/missing.ts': [] }, }; writeFileSync( join(root, '.understand-anything', 'intermediate', 'scan-result.json'), JSON.stringify(scan)); const result = runScript(root); expect(result.status).toBe(0); // script must still succeed expect(result.stderr).toMatch( /Warning: compute-batches: exports extraction failed for src\/missing\.ts \(read error:/); const batches = readBatches(root); expect(batches.exportsByPath['src/missing.ts']).toEqual([]); }); }); describe('compute-batches.mjs — non-code grouping', () => { let root; let batches; beforeEach(() => { root = setupProject('scan-result-non-code.json'); const result = runScript(root); expect(result.status).toBe(0); batches = readBatches(root); }); afterEach(() => { if (root) rmSync(root, { recursive: true, force: true }); }); it('Group A: bundles Dockerfile cluster per directory', () => { // Root-level cluster: Dockerfile + docker-compose.yml + .dockerignore → one batch const rootDockerBatch = batches.batches.find(b => b.files.some(f => f.path === 'Dockerfile')); expect(rootDockerBatch).toBeDefined(); const paths = rootDockerBatch.files.map(f => f.path).sort(); expect(paths).toEqual(['.dockerignore', 'Dockerfile', 'docker-compose.yml']); // services/api cluster is a separate batch const apiDockerBatch = batches.batches.find(b => b.files.some(f => f.path === 'services/api/Dockerfile')); expect(apiDockerBatch).toBeDefined(); expect(apiDockerBatch).not.toBe(rootDockerBatch); expect(apiDockerBatch.files.map(f => f.path).sort()).toEqual([ 'services/api/Dockerfile', 'services/api/docker-compose.yml', ]); }); it('Group B: .github/workflows/* all in one batch', () => { const wfBatch = batches.batches.find(b => b.files.some(f => f.path.startsWith('.github/workflows/'))); expect(wfBatch).toBeDefined(); const wfPaths = wfBatch.files.map(f => f.path).filter(p => p.startsWith('.github/workflows/')); expect(wfPaths.sort()).toEqual([ '.github/workflows/ci.yml', '.github/workflows/deploy.yml', ]); }); it('Group C: .gitlab-ci.yml + .circleci/* in one batch', () => { const ciBatch = batches.batches.find(b => b.files.some(f => f.path === '.gitlab-ci.yml')); expect(ciBatch).toBeDefined(); const ciPaths = ciBatch.files.map(f => f.path).sort(); expect(ciPaths).toEqual(['.circleci/config.yml', '.gitlab-ci.yml']); }); it('Group D: SQL migrations under migrations/ in one batch', () => { const migBatch = batches.batches.find(b => b.files.some(f => f.path.startsWith('migrations/'))); expect(migBatch).toBeDefined(); const migPaths = migBatch.files.map(f => f.path).filter(p => p.startsWith('migrations/')); expect(migPaths.sort()).toEqual([ 'migrations/001_init.sql', 'migrations/002_users.sql', ]); }); it('non-code batch indices follow code batches', () => { const codeBatches = batches.batches.filter(b => b.files.every(f => f.fileCategory === 'code')); const nonCodeBatches = batches.batches.filter(b => b.files.some(f => f.fileCategory !== 'code')); expect(codeBatches.length).toBeGreaterThan(0); expect(nonCodeBatches.length).toBeGreaterThan(0); const maxCodeIdx = Math.max(...codeBatches.map(b => b.batchIndex)); const minNonCodeIdx = Math.min(...nonCodeBatches.map(b => b.batchIndex)); expect(minNonCodeIdx).toBeGreaterThan(maxCodeIdx); }); }); describe('compute-batches.mjs — Group E MAX_E split', () => { let root; afterEach(() => { if (root) rmSync(root, { recursive: true, force: true }); }); it('splits 25 .md files under docs/ into [20, 5]', () => { root = mkdtempSync(join(tmpdir(), 'ua-cb-maxe-')); mkdirSync(join(root, '.understand-anything', 'intermediate'), { recursive: true }); const files = []; const importMap = {}; for (let i = 0; i < 25; i++) { const p = `docs/page${String(i).padStart(2, '0')}.md`; files.push({ path: p, language: 'markdown', sizeLines: 10, fileCategory: 'docs' }); importMap[p] = []; } const scan = { name: 'maxe-test', description: '', languages: ['markdown'], frameworks: [], files, totalFiles: 25, filteredByIgnore: 0, estimatedComplexity: 'small', importMap, }; writeFileSync( join(root, '.understand-anything', 'intermediate', 'scan-result.json'), JSON.stringify(scan)); const result = runScript(root); expect(result.status).toBe(0); const batches = readBatches(root); // All 25 docs/ files go through Group E with MAX_E = 20, split into [20, 5]. const docsBatches = batches.batches.filter(b => b.files.every(f => f.path.startsWith('docs/'))); expect(docsBatches.length).toBe(2); const sizes = docsBatches.map(b => b.files.length).sort((a, b) => b - a); expect(sizes).toEqual([20, 5]); }); }); describe('compute-batches.mjs — neighborMap + batchImportData', () => { let batches; let batchOf; // path → batchIndex let projectRoot; beforeEach(() => { projectRoot = setupProject('scan-result-3-cliques.json'); const result = runScript(projectRoot); expect(result.status).toBe(0); batches = readBatches(projectRoot); batchOf = new Map(); for (const b of batches.batches) { for (const f of b.files) batchOf.set(f.path, b.batchIndex); } }); afterEach(() => { if (projectRoot) rmSync(projectRoot, { recursive: true, force: true }); }); it('batchImportData mirrors scan importMap per batch', () => { for (const b of batches.batches) { for (const f of b.files) { expect(b.batchImportData[f.path]).toBeDefined(); expect(Array.isArray(b.batchImportData[f.path])).toBe(true); } } // src/auth/login.ts imports src/auth/session.ts and src/auth/tokens.ts const loginBatch = batches.batches.find(b => b.files.some(f => f.path === 'src/auth/login.ts')); expect(loginBatch.batchImportData['src/auth/login.ts'].sort()).toEqual([ 'src/auth/session.ts', 'src/auth/tokens.ts', ]); }); it('neighborMap excludes same-batch files', () => { // The fixture's three cliques each go into one batch — all imports are // intra-batch, so no neighbor map should reference any same-batch file. for (const b of batches.batches) { const sameBatchPaths = new Set(b.files.map(f => f.path)); for (const [, neighbors] of Object.entries(b.neighborMap)) { for (const n of neighbors) { expect(sameBatchPaths.has(n.path)).toBe(false); } } } }); it('neighborMap entries carry symbols when target has exports', () => { const root = mkdtempSync(join(tmpdir(), 'ua-cb-nbr-')); mkdirSync(join(root, '.understand-anything', 'intermediate'), { recursive: true }); mkdirSync(join(root, 'src', 'a'), { recursive: true }); mkdirSync(join(root, 'src', 'b'), { recursive: true }); // Cluster A: 3 tightly-imported files. a/core.ts exports symbols. writeFileSync(join(root, 'src', 'a', 'core.ts'), 'export function findUser(id: string) { return null; }\nexport class User {}\n'); writeFileSync(join(root, 'src', 'a', 'helper1.ts'), 'import { findUser } from "./core";\nexport const h1 = () => findUser("x");\n'); writeFileSync(join(root, 'src', 'a', 'helper2.ts'), 'import { User } from "./core";\nimport { h1 } from "./helper1";\nexport const h2 = () => h1();\n'); // Cluster B: 3 tightly-imported files. b/entry.ts has ONE cross-cluster import to a/core.ts. writeFileSync(join(root, 'src', 'b', 'entry.ts'), 'import { findUser } from "../a/core";\nexport const entry = () => findUser("y");\n'); writeFileSync(join(root, 'src', 'b', 'middle.ts'), 'import { entry } from "./entry";\nexport const middle = () => entry();\n'); writeFileSync(join(root, 'src', 'b', 'leaf.ts'), 'import { middle } from "./middle";\nexport const leaf = () => middle();\n'); const files = [ { path: 'src/a/core.ts', language: 'typescript', sizeLines: 2, fileCategory: 'code' }, { path: 'src/a/helper1.ts', language: 'typescript', sizeLines: 2, fileCategory: 'code' }, { path: 'src/a/helper2.ts', language: 'typescript', sizeLines: 3, fileCategory: 'code' }, { path: 'src/b/entry.ts', language: 'typescript', sizeLines: 2, fileCategory: 'code' }, { path: 'src/b/middle.ts', language: 'typescript', sizeLines: 2, fileCategory: 'code' }, { path: 'src/b/leaf.ts', language: 'typescript', sizeLines: 2, fileCategory: 'code' }, ]; const scan = { name: 't', description: '', languages: ['typescript'], frameworks: [], files, totalFiles: 6, filteredByIgnore: 0, estimatedComplexity: 'small', importMap: { 'src/a/core.ts': [], 'src/a/helper1.ts': ['src/a/core.ts'], 'src/a/helper2.ts': ['src/a/core.ts', 'src/a/helper1.ts'], 'src/b/entry.ts': ['src/a/core.ts'], // CROSS-CLUSTER 'src/b/middle.ts': ['src/b/entry.ts'], 'src/b/leaf.ts': ['src/b/middle.ts'], }, }; writeFileSync( join(root, '.understand-anything', 'intermediate', 'scan-result.json'), JSON.stringify(scan)); const result = runScript(root); expect(result.status).toBe(0); const out = readBatches(root); // Expect 2 communities (cluster A and cluster B). Verify that some batch's // neighborMap entry references src/a/core.ts with its symbols. let sawSymbols = false; for (const batch of out.batches) { for (const [, neighbors] of Object.entries(batch.neighborMap)) { for (const n of neighbors) { if (n.path === 'src/a/core.ts') { expect(n.symbols).toEqual(expect.arrayContaining(['findUser', 'User'])); sawSymbols = true; } } } } expect(sawSymbols).toBe(true); rmSync(root, { recursive: true, force: true }); }); }); describe('compute-batches.mjs — neighborMap truncation', () => { let root; afterEach(() => { if (root) rmSync(root, { recursive: true, force: true }); }); it('truncates and warns when neighbors > 50', () => { root = mkdtempSync(join(tmpdir(), 'ua-cb-trunc-')); mkdirSync(join(root, '.understand-anything', 'intermediate'), { recursive: true }); // hub.ts imported by 60 other files const files = [{ path: 'src/hub.ts', language: 'typescript', sizeLines: 1, fileCategory: 'code' }]; const importMap = { 'src/hub.ts': [] }; for (let i = 0; i < 60; i++) { const p = `src/leaf${i}.ts`; files.push({ path: p, language: 'typescript', sizeLines: 1, fileCategory: 'code' }); importMap[p] = ['src/hub.ts']; } const scan = { name: 't', description: '', languages: ['typescript'], frameworks: [], files, totalFiles: files.length, filteredByIgnore: 0, estimatedComplexity: 'moderate', importMap, }; writeFileSync( join(root, '.understand-anything', 'intermediate', 'scan-result.json'), JSON.stringify(scan)); const result = runScript(root); expect(result.status).toBe(0); expect(result.stderr).toMatch( /neighborMap for src\/hub\.ts has high 1-hop degree 60 — exceeds soft cap of 50/); const out = readBatches(root); // Find hub.ts and confirm its neighbor list capped at 50 (in whichever batch it landed) for (const b of out.batches) { const nbrs = b.neighborMap['src/hub.ts']; if (nbrs) expect(nbrs.length).toBeLessThanOrEqual(50); } }); }); describe('compute-batches.mjs — fallback', () => { let root; afterEach(() => { if (root) rmSync(root, { recursive: true, force: true }); }); it('falls back to count-based when Louvain throws (env-injected mock)', () => { // We can't easily monkey-patch louvain mid-script in Vitest because the // script runs in a subprocess. Instead, set an env var the script honors: // UA_COMPUTE_BATCHES_FORCE_LOUVAIN_THROW=1 → script throws inside its // Louvain branch, exercising the fallback path. root = setupProject('scan-result-3-cliques.json'); const result = spawnSync('node', [SCRIPT, root], { encoding: 'utf-8', env: { ...process.env, UA_COMPUTE_BATCHES_FORCE_LOUVAIN_THROW: '1' } }, ); expect(result.status).toBe(0); expect(result.stderr).toMatch( /Warning: compute-batches: Louvain failed.*falling back to count-based grouping/); const out = readBatches(root); expect(out.algorithm).toBe('count-fallback'); expect(out.totalFiles).toBe(9); // Count-based: 12 files per batch → all 9 fit in one batch const codeBatchFileCount = out.batches .filter(b => b.files.every(f => f.fileCategory === 'code')) .reduce((sum, b) => sum + b.files.length, 0); expect(codeBatchFileCount).toBe(9); }); }); describe('compute-batches.mjs — merge-small', () => { let projectRoot; beforeEach(() => { projectRoot = setupProject('scan-result-singletons.json'); }); afterEach(() => { if (projectRoot) rmSync(projectRoot, { recursive: true, force: true }); }); it('merges 100 isolated singletons into a small number of misc batches', () => { const result = runScript(projectRoot); expect(result.status).toBe(0); const batches = readBatches(projectRoot); expect(batches.totalFiles).toBe(100); // Without merge: 100 singletons → 100 batches. // With merge-small (MAX_MERGE_TARGET=25): ceil(100 / 25) = exactly 4 misc // batches. Pin the exact count — a loose >=4 && <=8 would mask off-by-one // regressions in the slice math (e.g., a stride miscalculation that // splintered the pool into 5-7 underfull buckets). expect(batches.batches.length).toBe(4); // All files accounted for const totalAssigned = batches.batches.reduce((sum, b) => sum + b.files.length, 0); expect(totalAssigned).toBe(100); // Bucket-fullness check: 100 singletons evenly divisible by // MAX_MERGE_TARGET=25, so every bucket must be exactly 25 — not just // ≤ 25. Drift toward [25, 25, 25, 24, 1] etc. would slip past a // ≤25 bound while indicating a stride bug. for (const b of batches.batches) { expect(b.files.length).toBe(25); } // Info: (not Warning:) — merge-small is a routine optimization, not a // fallback path. See compute-batches.mjs mergeSmallBatches WHY comment. expect(result.stderr).toMatch( /Info: compute-batches: merged \d+ small batches \(\d+ files\) into \d+ misc batches/); expect(result.stderr).not.toMatch(/Warning: compute-batches: merged \d+ small batches/); }); it('preserves non-mergeable batches: Dockerfile cluster not pooled into misc', () => { // Dedicated fixture: 30 isolated TS singletons + 1 Dockerfile-only cluster. // Group A marks the Dockerfile batch mergeable=false; even though its size // (1) is below MIN_BATCH_SIZE=3, mergeSmallBatches must leave it intact. const altRoot = setupProject('scan-result-merge-respects-non-mergeable.json'); try { const result = runScript(altRoot); expect(result.status).toBe(0); const out = readBatches(altRoot); expect(out.totalFiles).toBe(31); const dockerBatch = out.batches.find(b => b.files.some(f => f.path === 'services/api/Dockerfile')); expect(dockerBatch).toBeDefined(); // Standalone: exactly the Dockerfile, nothing pooled in alongside it. expect(dockerBatch.files.length).toBe(1); expect(dockerBatch.files[0].path).toBe('services/api/Dockerfile'); // The TS singletons must still merge into at least one misc batch — // and that misc batch must NOT contain the Dockerfile. const miscBatches = out.batches.filter(b => b.files.some(f => f.path.startsWith('src/leaf'))); expect(miscBatches.length).toBeGreaterThanOrEqual(1); for (const m of miscBatches) { for (const f of m.files) { expect(f.path).not.toBe('services/api/Dockerfile'); } } // Every TS singleton accounted for across the misc bucket(s). const tsInMisc = miscBatches.flatMap(b => b.files.map(f => f.path)) .filter(p => p.startsWith('src/leaf')); expect(tsInMisc.length).toBe(30); } finally { rmSync(altRoot, { recursive: true, force: true }); } }); }); describe('compute-batches.mjs — --changed-files', () => { let root; afterEach(() => { if (root) rmSync(root, { recursive: true, force: true }); }); it('emits only batches containing changed files', () => { root = setupProject('scan-result-3-cliques.json'); const changedPath = join(root, 'changed.txt'); // Only the auth clique is changed writeFileSync(changedPath, ['src/auth/login.ts', 'src/auth/tokens.ts'].join('\n')); const result = runScript(root, [`--changed-files=${changedPath}`]); expect(result.status).toBe(0); const out = readBatches(root); // Auth files are in batches; other cliques' batches must be omitted const allPaths = out.batches.flatMap(b => b.files.map(f => f.path)); expect(allPaths).toContain('src/auth/login.ts'); expect(allPaths).toContain('src/auth/tokens.ts'); expect(allPaths).not.toContain('src/api/handlers.ts'); expect(allPaths).not.toContain('src/db/users.ts'); // neighborMap may still reference unchanged files (with their full-graph batchIndex) const loginBatch = out.batches.find(b => b.files.some(f => f.path === 'src/auth/login.ts')); expect(loginBatch).toBeDefined(); }); });