Skip to content

[Refactor]: Refactor the file synchronizer to load .gitignore & patia… #68

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 24, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 43 additions & 18 deletions packages/core/src/context.ts
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ const DEFAULT_IGNORE_PATTERNS = [
'*.polyfills.js',
'*.runtime.js',
'*.map', // source map files
'node_modules', '.git', '.svn', '.hg', 'build', 'dist', 'out',
'target', '.vscode', '.idea', '__pycache__', '.pytest_cache',
'coverage', '.nyc_output', 'logs', 'tmp', 'temp'
];

export interface CodeContextConfig {
Expand Down Expand Up @@ -135,12 +138,15 @@ export class CodeContext {
): Promise<{ indexedFiles: number; totalChunks: number; status: 'completed' | 'limit_reached' }> {
console.log(`🚀 Starting to index codebase: ${codebasePath}`);

// 1. Check and prepare vector collection
// 1. Load .gitignore patterns if not already loaded
await this.loadGitignorePatterns(codebasePath);

// 2. Check and prepare vector collection
progressCallback?.({ phase: 'Preparing collection...', current: 0, total: 100, percentage: 0 });
console.log(`Debug2: Preparing vector collection for codebase`);
await this.prepareCollection(codebasePath);

// 2. Recursively traverse codebase to get all supported files
// 3. Recursively traverse codebase to get all supported files
progressCallback?.({ phase: 'Scanning files...', current: 5, total: 100, percentage: 5 });
const codeFiles = await this.getCodeFiles(codebasePath);
console.log(`📁 Found ${codeFiles.length} code files`);
Expand Down Expand Up @@ -429,10 +435,7 @@ export class CodeContext {
}

if (entry.isDirectory()) {
// Skip common ignored directories
if (!this.shouldIgnoreDirectory(entry.name)) {
await traverseDirectory(fullPath);
}
await traverseDirectory(fullPath);
} else if (entry.isFile()) {
const ext = path.extname(entry.name);
if (this.supportedExtensions.includes(ext)) {
Expand All @@ -446,18 +449,6 @@ export class CodeContext {
return files;
}

/**
* Determine whether directory should be ignored
*/
private shouldIgnoreDirectory(dirName: string): boolean {
const ignoredDirs = [
'node_modules', '.git', '.svn', '.hg', 'build', 'dist', 'out',
'target', '.vscode', '.idea', '__pycache__', '.pytest_cache',
'coverage', '.nyc_output', 'logs', 'tmp', 'temp'
];
return ignoredDirs.includes(dirName) || dirName.startsWith('.');
}

/**
* Process a list of files with streaming chunk processing
* @param filePaths Array of file paths to process
Expand Down Expand Up @@ -670,6 +661,40 @@ export class CodeContext {
}
}

/**
* Load .gitignore patterns from the codebase root directory
* @param codebasePath Path to the codebase
*/
private async loadGitignorePatterns(codebasePath: string): Promise<void> {
try {
const gitignorePath = path.join(codebasePath, '.gitignore');

// Check if .gitignore exists
try {
await fs.promises.access(gitignorePath);
console.log(`📄 Found .gitignore file at: ${gitignorePath}`);

// Use the static method from CodeContext to read ignore patterns
const ignorePatterns = await CodeContext.getIgnorePatternsFromFile(gitignorePath);

if (ignorePatterns.length > 0) {
// Update the CodeContext instance with new patterns
this.updateIgnorePatterns(ignorePatterns);
console.log(`🚫 Loaded ${ignorePatterns.length} ignore patterns from .gitignore`);
} else {
console.log('📄 .gitignore file found but no valid patterns detected');
}
} catch (error) {
console.log('📄 No .gitignore file found, using default ignore patterns only');
// No need to update patterns - CodeContext will use defaults
}
} catch (error) {
console.warn(`⚠️ Failed to load .gitignore patterns: ${error}`);
// Continue with default patterns on error
this.updateIgnorePatterns([]);
}
}

/**
* Check if a path matches any ignore pattern
* @param filePath Path to check
Expand Down
36 changes: 21 additions & 15 deletions packages/core/src/sync/synchronizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,12 @@ export class FileSynchronizer {
}

private shouldIgnore(relativePath: string, isDirectory: boolean = false): boolean {
// Always ignore hidden files and directories (starting with .)
const pathParts = relativePath.split(path.sep);
if (pathParts.some(part => part.startsWith('.'))) {
return true;
}

if (this.ignorePatterns.length === 0) {
return false;
}
Expand All @@ -115,15 +121,15 @@ export class FileSynchronizer {
}

// Check if any parent directory is ignored
const pathParts = normalizedPath.split('/');
for (let i = 0; i < pathParts.length; i++) {
const partialPath = pathParts.slice(0, i + 1).join('/');
const normalizedPathParts = normalizedPath.split('/');
for (let i = 0; i < normalizedPathParts.length; i++) {
const partialPath = normalizedPathParts.slice(0, i + 1).join('/');
for (const pattern of this.ignorePatterns) {
// Check directory patterns
if (pattern.endsWith('/')) {
const dirPattern = pattern.slice(0, -1);
if (this.simpleGlobMatch(partialPath, dirPattern) ||
this.simpleGlobMatch(pathParts[i], dirPattern)) {
this.simpleGlobMatch(normalizedPathParts[i], dirPattern)) {
return true;
}
}
Expand All @@ -135,7 +141,7 @@ export class FileSynchronizer {
}
// Check filename patterns against any path component
else {
if (this.simpleGlobMatch(pathParts[i], pattern)) {
if (this.simpleGlobMatch(normalizedPathParts[i], pattern)) {
return true;
}
}
Expand Down Expand Up @@ -190,21 +196,21 @@ export class FileSynchronizer {
const dag = new MerkleDAG();
const keys = Array.from(fileHashes.keys());
const sortedPaths = keys.slice().sort(); // Create a sorted copy

// Create a root node for the entire directory
let valuesString = "";
keys.forEach(key => {
valuesString += fileHashes.get(key);
});
const rootNodeData = "root:" + valuesString;
const rootNodeId = dag.addNode(rootNodeData);

// Add each file as a child of the root
for (const path of sortedPaths) {
const fileData = path + ":" + fileHashes.get(path);
dag.addNode(fileData, rootNodeId);
}

return dag;
}

Expand All @@ -228,11 +234,11 @@ export class FileSynchronizer {
if (changes.added.length > 0 || changes.removed.length > 0 || changes.modified.length > 0) {
console.log('Merkle DAG has changed. Comparing file states...');
const fileChanges = this.compareStates(this.fileHashes, newFileHashes);

this.fileHashes = newFileHashes;
this.merkleDAG = newMerkleDAG;
await this.saveSnapshot();

console.log(`Found changes: ${fileChanges.added.length} added, ${fileChanges.removed.length} removed, ${fileChanges.modified.length} modified.`);
return fileChanges;
}
Expand Down Expand Up @@ -274,14 +280,14 @@ export class FileSynchronizer {
private async saveSnapshot(): Promise<void> {
const merkleDir = path.dirname(this.snapshotPath);
await fs.mkdir(merkleDir, { recursive: true });

// Convert Map to array without using iterator
const fileHashesArray: [string, string][] = [];
const keys = Array.from(this.fileHashes.keys());
keys.forEach(key => {
fileHashesArray.push([key, this.fileHashes.get(key)!]);
});

const data = JSON.stringify({
fileHashes: fileHashesArray,
merkleDAG: this.merkleDAG.serialize()
Expand All @@ -294,13 +300,13 @@ export class FileSynchronizer {
try {
const data = await fs.readFile(this.snapshotPath, 'utf-8');
const obj = JSON.parse(data);

// Reconstruct Map without using constructor with iterator
this.fileHashes = new Map();
for (const [key, value] of obj.fileHashes) {
this.fileHashes.set(key, value);
}

if (obj.merkleDAG) {
this.merkleDAG = MerkleDAG.deserialize(obj.merkleDAG);
}
Expand Down Expand Up @@ -339,4 +345,4 @@ export class FileSynchronizer {
}
}
}
}
}
15 changes: 15 additions & 0 deletions packages/mcp/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,9 @@ Example response when indexing is in progress:
this.indexingCodebases.push(absolutePath);
this.saveCodebaseSnapshot();

// Track the codebase path for syncing
this.trackCodebasePath(absolutePath);

// Start background indexing
this.startBackgroundIndexing(absolutePath, forceReindex, splitterType);

Expand Down Expand Up @@ -779,6 +782,9 @@ Example response when indexing is in progress:
};
}

// Track the codebase path for syncing (even if not indexed yet)
this.trackCodebasePath(absolutePath);

// Check if this codebase is indexed or being indexed
const isIndexed = this.indexedCodebases.includes(absolutePath);
const isIndexing = this.indexingCodebases.includes(absolutePath);
Expand Down Expand Up @@ -1080,6 +1086,15 @@ Example response when indexing is in progress:

return resolved;
}

private trackCodebasePath(codebasePath: string) {
const absolutePath = this.ensureAbsolutePath(codebasePath);
if (!this.indexedCodebases.includes(absolutePath)) {
this.indexedCodebases.push(absolutePath);
this.saveCodebaseSnapshot();
console.log(`[TRACKING] Added codebase path to indexedCodebases: ${absolutePath}`);
}
}
}

// Main execution
Expand Down
47 changes: 2 additions & 45 deletions packages/vscode-extension/src/commands/indexCommand.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import * as vscode from 'vscode';
import { CodeContext } from '@zilliz/code-context-core';
import * as path from 'path';
import * as fs from 'fs';
import * as crypto from 'crypto';

export class IndexCommand {
Expand All @@ -18,44 +17,6 @@ export class IndexCommand {
this.codeContext = codeContext;
}

/**
* Load .gitignore patterns from the codebase root directory
* @param codebasePath Path to the codebase
*/
private async loadGitignorePatterns(codebasePath: string): Promise<void> {
try {
const gitignorePath = path.join(codebasePath, '.gitignore');

// Check if .gitignore exists
if (fs.existsSync(gitignorePath)) {
console.log(`📄 Found .gitignore file at: ${gitignorePath}`);

// Use the static method from CodeContext to read ignore patterns
const ignorePatterns = await CodeContext.getIgnorePatternsFromFile(gitignorePath);

if (ignorePatterns.length > 0) {
// Update the CodeContext instance with new patterns
this.codeContext.updateIgnorePatterns(ignorePatterns);
console.log(`🚫 Loaded ${ignorePatterns.length} ignore patterns from .gitignore`);

vscode.window.showInformationMessage(
`📄 Loaded ${ignorePatterns.length} ignore patterns from .gitignore`
);
} else {
console.log('📄 .gitignore file found but no valid patterns detected');
}
} else {
console.log('📄 No .gitignore file found, using default ignore patterns only');
// No need to update patterns - CodeContext will use defaults
}
} catch (error) {
console.warn(`⚠️ Failed to load .gitignore patterns: ${error}`);
vscode.window.showWarningMessage(`⚠️ Failed to load .gitignore: ${error}`);
// Continue with default patterns on error
this.codeContext.updateIgnorePatterns([]);
}
}

async execute(): Promise<void> {
const workspaceFolders = vscode.workspace.workspaceFolders;
if (!workspaceFolders || workspaceFolders.length === 0) {
Expand Down Expand Up @@ -103,10 +64,6 @@ export class IndexCommand {
}, async (progress) => {
let lastPercentage = 0;

// Load .gitignore patterns before indexing
progress.report({ increment: 0, message: 'Loading .gitignore patterns...' });
await this.loadGitignorePatterns(selectedFolder.uri.fsPath);

// Clear existing index first
await this.codeContext.clearIndex(
selectedFolder.uri.fsPath,
Expand Down Expand Up @@ -159,12 +116,12 @@ export class IndexCommand {
} catch (error: any) {
console.error('Indexing failed:', error);
const errorString = typeof error === 'string' ? error : (error.message || error.toString() || '');

// Check for collection limit message from the core library
if (errorString.includes('collection limit') || errorString.includes('zilliz.com/pricing')) {
const message = 'Your Zilliz Cloud account has hit its collection limit. To continue creating collections, you\'ll need to expand your capacity. We recommend visiting https://zilliz.com/pricing to explore options for dedicated or serverless clusters.';
const openButton = 'Explore Pricing Options';

vscode.window.showErrorMessage(message, { modal: true }, openButton).then(selection => {
if (selection === openButton) {
vscode.env.openExternal(vscode.Uri.parse('https://zilliz.com/pricing'));
Expand Down