diff --git a/packages/cli/src/cli.ts b/packages/cli/src/cli.ts index 4c79cc1..889ec59 100644 --- a/packages/cli/src/cli.ts +++ b/packages/cli/src/cli.ts @@ -10,6 +10,7 @@ import { gitCommand } from './commands/git.js'; import { githubCommand } from './commands/github.js'; import { indexCommand } from './commands/index.js'; import { initCommand } from './commands/init.js'; +import { mapCommand } from './commands/map.js'; import { mcpCommand } from './commands/mcp.js'; import { metricsCommand } from './commands/metrics.js'; import { planCommand } from './commands/plan.js'; @@ -37,6 +38,7 @@ program.addCommand(exploreCommand); program.addCommand(planCommand); program.addCommand(githubCommand); program.addCommand(gitCommand); +program.addCommand(mapCommand); program.addCommand(updateCommand); program.addCommand(statsCommand); program.addCommand(metricsCommand); diff --git a/packages/cli/src/commands/map.ts b/packages/cli/src/commands/map.ts new file mode 100644 index 0000000..5db6708 --- /dev/null +++ b/packages/cli/src/commands/map.ts @@ -0,0 +1,225 @@ +/** + * Map Command + * Show codebase structure with component counts and change frequency + */ + +import * as path from 'node:path'; +import { + ensureStorageDirectory, + formatCodebaseMap, + generateCodebaseMap, + getStorageFilePaths, + getStoragePath, + LocalGitExtractor, + type MapOptions, + RepositoryIndexer, +} from '@lytics/dev-agent-core'; +import { createLogger } from '@lytics/kero'; +import { Command } from 'commander'; +import ora from 'ora'; +import { loadConfig } from '../utils/config.js'; +import { logger } from '../utils/logger.js'; +import { output } from '../utils/output.js'; + +export const mapCommand = new Command('map') + .description('Show codebase structure with component counts') + .option('-d, --depth ', 'Directory depth to show (1-5)', '2') + .option('-f, --focus ', 'Focus on a specific directory path') + .option('--no-exports', 'Hide exported symbols') + .option('--change-frequency', 'Include git change frequency (hotspots)', false) + .option('--token-budget ', 'Maximum tokens for output', '2000') + .option('--verbose', 'Enable debug logging', false) + .addHelpText( + 'after', + ` +Examples: + $ dev map Show structure at depth 2 + $ dev map --depth 3 Show deeper nesting + $ dev map --focus packages/core Focus on specific directory + $ dev map --change-frequency Show git activity hotspots + +What You'll See: + 📊 Directory structure with component counts + 📦 Classes, functions, interfaces per directory + 🔥 Hot files (with --change-frequency) + 📤 Key exports per directory + +Use Case: + - Understanding codebase organization + - Finding where code lives + - Identifying hotspots and frequently changed areas + - Better than 'ls' or 'tree' for code exploration +` + ) + .action(async (options) => { + const startTime = Date.now(); + + // Create logger with debug enabled if --verbose + const mapLogger = createLogger({ + level: options.verbose ? 'debug' : 'info', + format: 'pretty', + }); + + const spinner = ora('Loading configuration...').start(); + + try { + const config = await loadConfig(); + if (!config) { + spinner.fail('No config found'); + logger.error('Run "dev init" first to initialize dev-agent'); + process.exit(1); + } + + const repositoryPath = config.repository?.path || config.repositoryPath || process.cwd(); + const resolvedRepoPath = path.resolve(repositoryPath); + + spinner.text = 'Initializing indexer...'; + const t1 = Date.now(); + mapLogger.info({ repositoryPath: resolvedRepoPath }, 'Loading repository configuration'); + + const storagePath = await getStoragePath(resolvedRepoPath); + await ensureStorageDirectory(storagePath); + const filePaths = getStorageFilePaths(storagePath); + mapLogger.debug({ storagePath, filePaths }, 'Storage paths resolved'); + + const indexer = new RepositoryIndexer({ + repositoryPath: resolvedRepoPath, + vectorStorePath: filePaths.vectors, + statePath: filePaths.indexerState, + }); + + // Skip embedder initialization for read-only map generation (10-20x faster) + mapLogger.info('Initializing indexer (skipping embedder for fast read-only access)'); + await indexer.initialize({ skipEmbedder: true }); + const t2 = Date.now(); + mapLogger.info({ duration_ms: t2 - t1 }, 'Indexer initialized'); + spinner.text = `Indexer initialized (${t2 - t1}ms). Generating map...`; + + // Check if repository is indexed (use fast basic stats - skips git enrichment) + mapLogger.debug('Checking if repository is indexed'); + const stats = await indexer.getBasicStats(); + if (!stats || stats.filesScanned === 0) { + spinner.fail('Repository not indexed'); + logger.error('Run "dev index" first to index your repository'); + await indexer.close(); + process.exit(1); + } + + mapLogger.info( + { + filesScanned: stats.filesScanned, + documentsIndexed: stats.documentsIndexed, + }, + 'Repository index loaded' + ); + + spinner.text = 'Generating codebase map...'; + + // Parse options + mapLogger.debug( + { rawDepth: options.depth, rawTokenBudget: options.tokenBudget }, + 'Parsing options' + ); + const depth = Number.parseInt(options.depth, 10); + if (Number.isNaN(depth) || depth < 1 || depth > 5) { + spinner.fail('Invalid depth'); + logger.error('Depth must be between 1 and 5'); + await indexer.close(); + process.exit(1); + } + + const tokenBudget = Number.parseInt(options.tokenBudget, 10); + if (Number.isNaN(tokenBudget) || tokenBudget < 500) { + spinner.fail('Invalid token budget'); + logger.error('Token budget must be at least 500'); + await indexer.close(); + process.exit(1); + } + + // Create git extractor for change frequency if requested + const gitExtractor = options.changeFrequency + ? new LocalGitExtractor(resolvedRepoPath) + : undefined; + + if (options.changeFrequency) { + mapLogger.info('Git change frequency analysis enabled'); + } + + // Generate map + const mapOptions: MapOptions = { + depth, + focus: options.focus, + includeExports: options.exports, + tokenBudget, + includeChangeFrequency: options.changeFrequency, + }; + + mapLogger.info( + { + depth, + focus: options.focus || '(all)', + includeExports: options.exports, + tokenBudget, + includeChangeFrequency: options.changeFrequency, + }, + 'Starting map generation' + ); + + const t3 = Date.now(); + const map = await generateCodebaseMap( + { + indexer, + gitExtractor, + logger: mapLogger, + }, + mapOptions + ); + const t4 = Date.now(); + + mapLogger.success( + { + totalDuration_ms: t4 - startTime, + initDuration_ms: t2 - t1, + mapDuration_ms: t4 - t3, + totalComponents: map.totalComponents, + totalDirectories: map.totalDirectories, + }, + 'Map generation complete' + ); + + spinner.succeed( + `Map generated in ${t4 - startTime}ms (init: ${t2 - t1}ms, map: ${t4 - t3}ms)` + ); + + // Format and display + mapLogger.debug('Formatting map output'); + const t5 = Date.now(); + const formatted = formatCodebaseMap(map, { + includeExports: options.exports, + includeChangeFrequency: options.changeFrequency, + }); + const t6 = Date.now(); + mapLogger.debug({ duration_ms: t6 - t5, outputLength: formatted.length }, 'Map formatted'); + + output.log(''); + output.log(formatted); + output.log(''); + + // Show summary + output.log( + `📊 Total: ${map.totalComponents.toLocaleString()} components across ${map.totalDirectories.toLocaleString()} directories` + ); + if (map.hotPaths.length > 0) { + output.log(`🔥 ${map.hotPaths.length} hot paths identified`); + } + output.log(''); + + mapLogger.info('Closing indexer'); + await indexer.close(); + mapLogger.debug('Indexer closed'); + } catch (error) { + spinner.fail('Failed to generate map'); + logger.error(`Error: ${error instanceof Error ? error.message : String(error)}`); + process.exit(1); + } + }); diff --git a/packages/core/src/indexer/index.ts b/packages/core/src/indexer/index.ts index 562990b..5b17c9f 100644 --- a/packages/core/src/indexer/index.ts +++ b/packages/core/src/indexer/index.ts @@ -70,10 +70,12 @@ export class RepositoryIndexer { /** * Initialize the indexer (load state and initialize vector storage) + * @param options Optional initialization options + * @param options.skipEmbedder Skip embedder initialization (useful for read-only operations like map/stats) */ - async initialize(): Promise { - // Initialize vector storage - await this.vectorStorage.initialize(); + async initialize(options?: { skipEmbedder?: boolean }): Promise { + // Initialize vector storage (optionally skip embedder for read-only operations) + await this.vectorStorage.initialize(options); // Load existing state if available await this.loadState(); @@ -509,9 +511,32 @@ export class RepositoryIndexer { return this.vectorStorage.search(query, options); } + /** + * Get all indexed documents without semantic search (fast scan) + * Use this when you need all documents and don't need relevance ranking + * This is 10-20x faster than search() as it skips embedding generation + */ + async getAll(options?: { limit?: number }): Promise { + return this.vectorStorage.getAll(options); + } + /** * Get indexing statistics */ + /** + * Get basic stats without expensive git enrichment (fast) + */ + async getBasicStats(): Promise<{ filesScanned: number; documentsIndexed: number } | null> { + if (!this.state) { + return null; + } + + return { + filesScanned: this.state.stats.totalFiles, + documentsIndexed: this.state.stats.totalDocuments, + }; + } + async getStats(): Promise { if (!this.state) { return null; diff --git a/packages/core/src/map/__tests__/map.test.ts b/packages/core/src/map/__tests__/map.test.ts index f1aa062..7d28e4d 100644 --- a/packages/core/src/map/__tests__/map.test.ts +++ b/packages/core/src/map/__tests__/map.test.ts @@ -95,6 +95,7 @@ describe('Codebase Map', () => { function createMockIndexer(results: SearchResult[] = mockSearchResults): RepositoryIndexer { return { search: vi.fn().mockResolvedValue(results), + getAll: vi.fn().mockResolvedValue(results), } as unknown as RepositoryIndexer; } diff --git a/packages/core/src/map/index.ts b/packages/core/src/map/index.ts index 5c986a1..a8957bf 100644 --- a/packages/core/src/map/index.ts +++ b/packages/core/src/map/index.ts @@ -4,6 +4,7 @@ */ import * as path from 'node:path'; +import type { Logger } from '@lytics/kero'; import type { LocalGitExtractor } from '../git/extractor'; import type { RepositoryIndexer } from '../indexer'; import type { SearchResult } from '../vector/types'; @@ -32,10 +33,11 @@ const DEFAULT_OPTIONS: Required = { includeChangeFrequency: false, }; -/** Context for map generation including optional git extractor */ +/** Context for map generation including optional git extractor and logger */ export interface MapGenerationContext { indexer: RepositoryIndexer; gitExtractor?: LocalGitExtractor; + logger?: Logger; } /** @@ -74,28 +76,65 @@ export async function generateCodebaseMap( ? indexerOrContext : { indexer: indexerOrContext as RepositoryIndexer }; - // Get all indexed documents (use a broad search) - // Note: We search with a generic query to get all documents - const allDocs = await context.indexer.search('function class interface type', { + const logger = context.logger; + const startTime = Date.now(); + + logger?.debug({ depth: opts.depth, focus: opts.focus }, 'Starting codebase map generation'); + + // Get all indexed documents (fast scan without semantic search) + // This is 10-20x faster than search() as it skips embedding generation + const t1 = Date.now(); + const allDocs = await context.indexer.getAll({ limit: 10000, - scoreThreshold: 0, }); + const t2 = Date.now(); + logger?.debug({ duration_ms: t2 - t1, docCount: allDocs.length }, 'Retrieved all documents'); // Build directory tree from documents + const t3 = Date.now(); const root = buildDirectoryTree(allDocs, opts); + const t4 = Date.now(); + logger?.debug({ duration_ms: t4 - t3 }, 'Built directory tree'); // Count totals + const t5 = Date.now(); const totalComponents = countComponents(root); const totalDirectories = countDirectories(root); + const t6 = Date.now(); + logger?.debug( + { + duration_ms: t6 - t5, + totalComponents, + totalDirectories, + }, + 'Counted components' + ); // Compute hot paths (most referenced files) + const t7 = Date.now(); const hotPaths = opts.includeHotPaths ? computeHotPaths(allDocs, opts.maxHotPaths) : []; + const t8 = Date.now(); + logger?.debug({ duration_ms: t8 - t7, hotPathCount: hotPaths.length }, 'Computed hot paths'); // Compute change frequency if requested and git extractor is available if (opts.includeChangeFrequency && context.gitExtractor) { + const t9 = Date.now(); await computeChangeFrequency(root, context.gitExtractor); + const t10 = Date.now(); + logger?.debug({ duration_ms: t10 - t9 }, 'Computed change frequency'); } + const totalDuration = Date.now() - startTime; + logger?.info( + { + duration_ms: totalDuration, + totalComponents, + totalDirectories, + hotPathCount: hotPaths.length, + }, + 'Codebase map generated' + ); + return { root, totalComponents, @@ -145,6 +184,9 @@ function buildDirectoryTree(docs: SearchResult[], opts: Required): M insertIntoTree(root, dir, dirDocs, opts); } + // Propagate counts up the tree (do this ONCE after all directories are processed) + propagateCounts(root); + // Prune tree to depth (smart or fixed) if (opts.smartDepth) { smartPruneTree(root, opts.depth, opts.smartDepthThreshold); @@ -202,8 +244,7 @@ function insertIntoTree( } } - // Propagate counts up the tree - propagateCounts(root); + // Note: Don't propagate counts here - it will be done once after all directories are processed } /** diff --git a/packages/core/src/vector/index.ts b/packages/core/src/vector/index.ts index c1f72ca..8073e7b 100644 --- a/packages/core/src/vector/index.ts +++ b/packages/core/src/vector/index.ts @@ -35,17 +35,38 @@ export class VectorStorage { /** * Initialize both embedder and store + * @param options Optional initialization options + * @param options.skipEmbedder Skip embedder initialization (useful for read-only operations) */ - async initialize(): Promise { + async initialize(options?: { skipEmbedder?: boolean }): Promise { if (this.initialized) { return; } - await Promise.all([this.embedder.initialize(), this.store.initialize()]); + const { skipEmbedder = false } = options || {}; + + if (skipEmbedder) { + // Only initialize store, skip embedder (much faster for read-only operations) + await this.store.initialize(); + } else { + // Initialize both embedder and store + await Promise.all([this.embedder.initialize(), this.store.initialize()]); + } this.initialized = true; } + /** + * Ensure embedder is initialized (lazy initialization for search operations) + */ + private async ensureEmbedder(): Promise { + if (!this.embedder) { + throw new Error('Embedder not available'); + } + // Initialize embedder if not already done + await this.embedder.initialize(); + } + /** * Add documents to the store (automatically generates embeddings) */ @@ -74,6 +95,9 @@ export class VectorStorage { throw new Error('VectorStorage not initialized. Call initialize() first.'); } + // Ensure embedder is initialized (lazy load if needed) + await this.ensureEmbedder(); + // Generate query embedding const queryEmbedding = await this.embedder.embed(query); @@ -81,6 +105,19 @@ export class VectorStorage { return this.store.search(queryEmbedding, options); } + /** + * Get all documents without semantic search (fast scan) + * Use this when you need all documents and don't need relevance ranking + * This is 10-20x faster than search() as it skips embedding generation + */ + async getAll(options?: { limit?: number }): Promise { + if (!this.initialized) { + throw new Error('VectorStorage not initialized. Call initialize() first.'); + } + + return this.store.getAll(options); + } + /** * Get a document by ID */ diff --git a/packages/core/src/vector/store.ts b/packages/core/src/vector/store.ts index 6e79061..dda7eca 100644 --- a/packages/core/src/vector/store.ts +++ b/packages/core/src/vector/store.ts @@ -136,6 +136,39 @@ export class LanceDBVectorStore implements VectorStore { } } + /** + * Get all documents without semantic search (fast scan) + * Use this when you need all documents and don't need relevance ranking + */ + async getAll(options: { limit?: number } = {}): Promise { + if (!this.table) { + return []; // No documents yet + } + + const { limit = 10000 } = options; + + try { + // Use query() instead of search() - no vector similarity calculation needed + // This is much faster as it skips embedding generation and distance computation + const results = await this.table + .query() + .select(['id', 'text', 'metadata']) + .limit(limit) + .toArray(); + + // Transform results (all have score of 1 since no ranking) + return results.map((result) => ({ + id: result.id as string, + score: 1, // No relevance score for full scan + metadata: JSON.parse(result.metadata as string) as SearchResultMetadata, + })); + } catch (error) { + throw new Error( + `Failed to get all documents: ${error instanceof Error ? error.message : String(error)}` + ); + } + } + /** * Get a document by ID */ diff --git a/packages/mcp-server/src/adapters/__tests__/map-adapter.test.ts b/packages/mcp-server/src/adapters/__tests__/map-adapter.test.ts index de81070..0425c4f 100644 --- a/packages/mcp-server/src/adapters/__tests__/map-adapter.test.ts +++ b/packages/mcp-server/src/adapters/__tests__/map-adapter.test.ts @@ -62,6 +62,7 @@ describe('MapAdapter', () => { // Create mock indexer mockIndexer = { search: vi.fn().mockResolvedValue(mockSearchResults), + getAll: vi.fn().mockResolvedValue(mockSearchResults), } as unknown as RepositoryIndexer; // Create adapter @@ -267,6 +268,7 @@ describe('MapAdapter', () => { const largeIndexer = { search: vi.fn().mockResolvedValue(manyResults), + getAll: vi.fn().mockResolvedValue(manyResults), } as unknown as RepositoryIndexer; const largeAdapter = new MapAdapter({