diff --git a/packages/core/src/context.ts b/packages/core/src/context.ts index eef0b1f..aae2ab5 100644 --- a/packages/core/src/context.ts +++ b/packages/core/src/context.ts @@ -77,6 +77,9 @@ const DEFAULT_IGNORE_PATTERNS = [ '*.polyfills.js', '*.runtime.js', '*.map', // source map files + 'node_modules', '.git', '.svn', '.hg', 'build', 'dist', 'out', + 'target', '.vscode', '.idea', '__pycache__', '.pytest_cache', + 'coverage', '.nyc_output', 'logs', 'tmp', 'temp' ]; export interface CodeContextConfig { @@ -135,12 +138,15 @@ export class CodeContext { ): Promise<{ indexedFiles: number; totalChunks: number; status: 'completed' | 'limit_reached' }> { console.log(`🚀 Starting to index codebase: ${codebasePath}`); - // 1. Check and prepare vector collection + // 1. Load .gitignore patterns if not already loaded + await this.loadGitignorePatterns(codebasePath); + + // 2. Check and prepare vector collection progressCallback?.({ phase: 'Preparing collection...', current: 0, total: 100, percentage: 0 }); console.log(`Debug2: Preparing vector collection for codebase`); await this.prepareCollection(codebasePath); - // 2. Recursively traverse codebase to get all supported files + // 3. Recursively traverse codebase to get all supported files progressCallback?.({ phase: 'Scanning files...', current: 5, total: 100, percentage: 5 }); const codeFiles = await this.getCodeFiles(codebasePath); console.log(`📁 Found ${codeFiles.length} code files`); @@ -429,10 +435,7 @@ export class CodeContext { } if (entry.isDirectory()) { - // Skip common ignored directories - if (!this.shouldIgnoreDirectory(entry.name)) { - await traverseDirectory(fullPath); - } + await traverseDirectory(fullPath); } else if (entry.isFile()) { const ext = path.extname(entry.name); if (this.supportedExtensions.includes(ext)) { @@ -446,18 +449,6 @@ export class CodeContext { return files; } - /** - * Determine whether directory should be ignored - */ - private shouldIgnoreDirectory(dirName: string): boolean { - const ignoredDirs = [ - 'node_modules', '.git', '.svn', '.hg', 'build', 'dist', 'out', - 'target', '.vscode', '.idea', '__pycache__', '.pytest_cache', - 'coverage', '.nyc_output', 'logs', 'tmp', 'temp' - ]; - return ignoredDirs.includes(dirName) || dirName.startsWith('.'); - } - /** * Process a list of files with streaming chunk processing * @param filePaths Array of file paths to process @@ -670,6 +661,40 @@ export class CodeContext { } } + /** + * Load .gitignore patterns from the codebase root directory + * @param codebasePath Path to the codebase + */ + private async loadGitignorePatterns(codebasePath: string): Promise { + try { + const gitignorePath = path.join(codebasePath, '.gitignore'); + + // Check if .gitignore exists + try { + await fs.promises.access(gitignorePath); + console.log(`📄 Found .gitignore file at: ${gitignorePath}`); + + // Use the static method from CodeContext to read ignore patterns + const ignorePatterns = await CodeContext.getIgnorePatternsFromFile(gitignorePath); + + if (ignorePatterns.length > 0) { + // Update the CodeContext instance with new patterns + this.updateIgnorePatterns(ignorePatterns); + console.log(`🚫 Loaded ${ignorePatterns.length} ignore patterns from .gitignore`); + } else { + console.log('📄 .gitignore file found but no valid patterns detected'); + } + } catch (error) { + console.log('📄 No .gitignore file found, using default ignore patterns only'); + // No need to update patterns - CodeContext will use defaults + } + } catch (error) { + console.warn(`⚠️ Failed to load .gitignore patterns: ${error}`); + // Continue with default patterns on error + this.updateIgnorePatterns([]); + } + } + /** * Check if a path matches any ignore pattern * @param filePath Path to check diff --git a/packages/core/src/sync/synchronizer.ts b/packages/core/src/sync/synchronizer.ts index 2bbcd7b..828ff77 100644 --- a/packages/core/src/sync/synchronizer.ts +++ b/packages/core/src/sync/synchronizer.ts @@ -96,6 +96,12 @@ export class FileSynchronizer { } private shouldIgnore(relativePath: string, isDirectory: boolean = false): boolean { + // Always ignore hidden files and directories (starting with .) + const pathParts = relativePath.split(path.sep); + if (pathParts.some(part => part.startsWith('.'))) { + return true; + } + if (this.ignorePatterns.length === 0) { return false; } @@ -115,15 +121,15 @@ export class FileSynchronizer { } // Check if any parent directory is ignored - const pathParts = normalizedPath.split('/'); - for (let i = 0; i < pathParts.length; i++) { - const partialPath = pathParts.slice(0, i + 1).join('/'); + const normalizedPathParts = normalizedPath.split('/'); + for (let i = 0; i < normalizedPathParts.length; i++) { + const partialPath = normalizedPathParts.slice(0, i + 1).join('/'); for (const pattern of this.ignorePatterns) { // Check directory patterns if (pattern.endsWith('/')) { const dirPattern = pattern.slice(0, -1); if (this.simpleGlobMatch(partialPath, dirPattern) || - this.simpleGlobMatch(pathParts[i], dirPattern)) { + this.simpleGlobMatch(normalizedPathParts[i], dirPattern)) { return true; } } @@ -135,7 +141,7 @@ export class FileSynchronizer { } // Check filename patterns against any path component else { - if (this.simpleGlobMatch(pathParts[i], pattern)) { + if (this.simpleGlobMatch(normalizedPathParts[i], pattern)) { return true; } } @@ -190,7 +196,7 @@ export class FileSynchronizer { const dag = new MerkleDAG(); const keys = Array.from(fileHashes.keys()); const sortedPaths = keys.slice().sort(); // Create a sorted copy - + // Create a root node for the entire directory let valuesString = ""; keys.forEach(key => { @@ -198,13 +204,13 @@ export class FileSynchronizer { }); const rootNodeData = "root:" + valuesString; const rootNodeId = dag.addNode(rootNodeData); - + // Add each file as a child of the root for (const path of sortedPaths) { const fileData = path + ":" + fileHashes.get(path); dag.addNode(fileData, rootNodeId); } - + return dag; } @@ -228,11 +234,11 @@ export class FileSynchronizer { if (changes.added.length > 0 || changes.removed.length > 0 || changes.modified.length > 0) { console.log('Merkle DAG has changed. Comparing file states...'); const fileChanges = this.compareStates(this.fileHashes, newFileHashes); - + this.fileHashes = newFileHashes; this.merkleDAG = newMerkleDAG; await this.saveSnapshot(); - + console.log(`Found changes: ${fileChanges.added.length} added, ${fileChanges.removed.length} removed, ${fileChanges.modified.length} modified.`); return fileChanges; } @@ -274,14 +280,14 @@ export class FileSynchronizer { private async saveSnapshot(): Promise { const merkleDir = path.dirname(this.snapshotPath); await fs.mkdir(merkleDir, { recursive: true }); - + // Convert Map to array without using iterator const fileHashesArray: [string, string][] = []; const keys = Array.from(this.fileHashes.keys()); keys.forEach(key => { fileHashesArray.push([key, this.fileHashes.get(key)!]); }); - + const data = JSON.stringify({ fileHashes: fileHashesArray, merkleDAG: this.merkleDAG.serialize() @@ -294,13 +300,13 @@ export class FileSynchronizer { try { const data = await fs.readFile(this.snapshotPath, 'utf-8'); const obj = JSON.parse(data); - + // Reconstruct Map without using constructor with iterator this.fileHashes = new Map(); for (const [key, value] of obj.fileHashes) { this.fileHashes.set(key, value); } - + if (obj.merkleDAG) { this.merkleDAG = MerkleDAG.deserialize(obj.merkleDAG); } @@ -339,4 +345,4 @@ export class FileSynchronizer { } } } -} \ No newline at end of file +} \ No newline at end of file diff --git a/packages/mcp/src/index.ts b/packages/mcp/src/index.ts index e6b2221..f5159f2 100644 --- a/packages/mcp/src/index.ts +++ b/packages/mcp/src/index.ts @@ -539,6 +539,9 @@ Example response when indexing is in progress: this.indexingCodebases.push(absolutePath); this.saveCodebaseSnapshot(); + // Track the codebase path for syncing + this.trackCodebasePath(absolutePath); + // Start background indexing this.startBackgroundIndexing(absolutePath, forceReindex, splitterType); @@ -779,6 +782,9 @@ Example response when indexing is in progress: }; } + // Track the codebase path for syncing (even if not indexed yet) + this.trackCodebasePath(absolutePath); + // Check if this codebase is indexed or being indexed const isIndexed = this.indexedCodebases.includes(absolutePath); const isIndexing = this.indexingCodebases.includes(absolutePath); @@ -1080,6 +1086,15 @@ Example response when indexing is in progress: return resolved; } + + private trackCodebasePath(codebasePath: string) { + const absolutePath = this.ensureAbsolutePath(codebasePath); + if (!this.indexedCodebases.includes(absolutePath)) { + this.indexedCodebases.push(absolutePath); + this.saveCodebaseSnapshot(); + console.log(`[TRACKING] Added codebase path to indexedCodebases: ${absolutePath}`); + } + } } // Main execution diff --git a/packages/vscode-extension/src/commands/indexCommand.ts b/packages/vscode-extension/src/commands/indexCommand.ts index 1b08624..00cfcd1 100644 --- a/packages/vscode-extension/src/commands/indexCommand.ts +++ b/packages/vscode-extension/src/commands/indexCommand.ts @@ -1,7 +1,6 @@ import * as vscode from 'vscode'; import { CodeContext } from '@zilliz/code-context-core'; import * as path from 'path'; -import * as fs from 'fs'; import * as crypto from 'crypto'; export class IndexCommand { @@ -18,44 +17,6 @@ export class IndexCommand { this.codeContext = codeContext; } - /** - * Load .gitignore patterns from the codebase root directory - * @param codebasePath Path to the codebase - */ - private async loadGitignorePatterns(codebasePath: string): Promise { - try { - const gitignorePath = path.join(codebasePath, '.gitignore'); - - // Check if .gitignore exists - if (fs.existsSync(gitignorePath)) { - console.log(`📄 Found .gitignore file at: ${gitignorePath}`); - - // Use the static method from CodeContext to read ignore patterns - const ignorePatterns = await CodeContext.getIgnorePatternsFromFile(gitignorePath); - - if (ignorePatterns.length > 0) { - // Update the CodeContext instance with new patterns - this.codeContext.updateIgnorePatterns(ignorePatterns); - console.log(`🚫 Loaded ${ignorePatterns.length} ignore patterns from .gitignore`); - - vscode.window.showInformationMessage( - `📄 Loaded ${ignorePatterns.length} ignore patterns from .gitignore` - ); - } else { - console.log('📄 .gitignore file found but no valid patterns detected'); - } - } else { - console.log('📄 No .gitignore file found, using default ignore patterns only'); - // No need to update patterns - CodeContext will use defaults - } - } catch (error) { - console.warn(`⚠️ Failed to load .gitignore patterns: ${error}`); - vscode.window.showWarningMessage(`⚠️ Failed to load .gitignore: ${error}`); - // Continue with default patterns on error - this.codeContext.updateIgnorePatterns([]); - } - } - async execute(): Promise { const workspaceFolders = vscode.workspace.workspaceFolders; if (!workspaceFolders || workspaceFolders.length === 0) { @@ -103,10 +64,6 @@ export class IndexCommand { }, async (progress) => { let lastPercentage = 0; - // Load .gitignore patterns before indexing - progress.report({ increment: 0, message: 'Loading .gitignore patterns...' }); - await this.loadGitignorePatterns(selectedFolder.uri.fsPath); - // Clear existing index first await this.codeContext.clearIndex( selectedFolder.uri.fsPath, @@ -159,12 +116,12 @@ export class IndexCommand { } catch (error: any) { console.error('Indexing failed:', error); const errorString = typeof error === 'string' ? error : (error.message || error.toString() || ''); - + // Check for collection limit message from the core library if (errorString.includes('collection limit') || errorString.includes('zilliz.com/pricing')) { const message = 'Your Zilliz Cloud account has hit its collection limit. To continue creating collections, you\'ll need to expand your capacity. We recommend visiting https://zilliz.com/pricing to explore options for dedicated or serverless clusters.'; const openButton = 'Explore Pricing Options'; - + vscode.window.showErrorMessage(message, { modal: true }, openButton).then(selection => { if (selection === openButton) { vscode.env.openExternal(vscode.Uri.parse('https://zilliz.com/pricing'));