Skip to content

Commit 858bb26

Browse files
Shawnzheng011019zc277584121
authored andcommitted
[Refactor]: Refactor the file synchronizer to load .gitignore & patial sync
Signed-off-by: ShawnZheng <shawn.zheng@zilliz.com>
1 parent 7d0d9da commit 858bb26

File tree

4 files changed

+81
-78
lines changed

4 files changed

+81
-78
lines changed

packages/core/src/context.ts

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ const DEFAULT_IGNORE_PATTERNS = [
7777
'*.polyfills.js',
7878
'*.runtime.js',
7979
'*.map', // source map files
80+
'node_modules', '.git', '.svn', '.hg', 'build', 'dist', 'out',
81+
'target', '.vscode', '.idea', '__pycache__', '.pytest_cache',
82+
'coverage', '.nyc_output', 'logs', 'tmp', 'temp'
8083
];
8184

8285
export interface CodeContextConfig {
@@ -135,12 +138,15 @@ export class CodeContext {
135138
): Promise<{ indexedFiles: number; totalChunks: number; status: 'completed' | 'limit_reached' }> {
136139
console.log(`🚀 Starting to index codebase: ${codebasePath}`);
137140

138-
// 1. Check and prepare vector collection
141+
// 1. Load .gitignore patterns if not already loaded
142+
await this.loadGitignorePatterns(codebasePath);
143+
144+
// 2. Check and prepare vector collection
139145
progressCallback?.({ phase: 'Preparing collection...', current: 0, total: 100, percentage: 0 });
140146
console.log(`Debug2: Preparing vector collection for codebase`);
141147
await this.prepareCollection(codebasePath);
142148

143-
// 2. Recursively traverse codebase to get all supported files
149+
// 3. Recursively traverse codebase to get all supported files
144150
progressCallback?.({ phase: 'Scanning files...', current: 5, total: 100, percentage: 5 });
145151
const codeFiles = await this.getCodeFiles(codebasePath);
146152
console.log(`📁 Found ${codeFiles.length} code files`);
@@ -429,10 +435,7 @@ export class CodeContext {
429435
}
430436

431437
if (entry.isDirectory()) {
432-
// Skip common ignored directories
433-
if (!this.shouldIgnoreDirectory(entry.name)) {
434-
await traverseDirectory(fullPath);
435-
}
438+
await traverseDirectory(fullPath);
436439
} else if (entry.isFile()) {
437440
const ext = path.extname(entry.name);
438441
if (this.supportedExtensions.includes(ext)) {
@@ -446,18 +449,6 @@ export class CodeContext {
446449
return files;
447450
}
448451

449-
/**
450-
* Determine whether directory should be ignored
451-
*/
452-
private shouldIgnoreDirectory(dirName: string): boolean {
453-
const ignoredDirs = [
454-
'node_modules', '.git', '.svn', '.hg', 'build', 'dist', 'out',
455-
'target', '.vscode', '.idea', '__pycache__', '.pytest_cache',
456-
'coverage', '.nyc_output', 'logs', 'tmp', 'temp'
457-
];
458-
return ignoredDirs.includes(dirName) || dirName.startsWith('.');
459-
}
460-
461452
/**
462453
* Process a list of files with streaming chunk processing
463454
* @param filePaths Array of file paths to process
@@ -670,6 +661,40 @@ export class CodeContext {
670661
}
671662
}
672663

664+
/**
665+
* Load .gitignore patterns from the codebase root directory
666+
* @param codebasePath Path to the codebase
667+
*/
668+
private async loadGitignorePatterns(codebasePath: string): Promise<void> {
669+
try {
670+
const gitignorePath = path.join(codebasePath, '.gitignore');
671+
672+
// Check if .gitignore exists
673+
try {
674+
await fs.promises.access(gitignorePath);
675+
console.log(`📄 Found .gitignore file at: ${gitignorePath}`);
676+
677+
// Use the static method from CodeContext to read ignore patterns
678+
const ignorePatterns = await CodeContext.getIgnorePatternsFromFile(gitignorePath);
679+
680+
if (ignorePatterns.length > 0) {
681+
// Update the CodeContext instance with new patterns
682+
this.updateIgnorePatterns(ignorePatterns);
683+
console.log(`🚫 Loaded ${ignorePatterns.length} ignore patterns from .gitignore`);
684+
} else {
685+
console.log('📄 .gitignore file found but no valid patterns detected');
686+
}
687+
} catch (error) {
688+
console.log('📄 No .gitignore file found, using default ignore patterns only');
689+
// No need to update patterns - CodeContext will use defaults
690+
}
691+
} catch (error) {
692+
console.warn(`⚠️ Failed to load .gitignore patterns: ${error}`);
693+
// Continue with default patterns on error
694+
this.updateIgnorePatterns([]);
695+
}
696+
}
697+
673698
/**
674699
* Check if a path matches any ignore pattern
675700
* @param filePath Path to check

packages/core/src/sync/synchronizer.ts

Lines changed: 21 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,12 @@ export class FileSynchronizer {
9696
}
9797

9898
private shouldIgnore(relativePath: string, isDirectory: boolean = false): boolean {
99+
// Always ignore hidden files and directories (starting with .)
100+
const pathParts = relativePath.split(path.sep);
101+
if (pathParts.some(part => part.startsWith('.'))) {
102+
return true;
103+
}
104+
99105
if (this.ignorePatterns.length === 0) {
100106
return false;
101107
}
@@ -115,15 +121,15 @@ export class FileSynchronizer {
115121
}
116122

117123
// Check if any parent directory is ignored
118-
const pathParts = normalizedPath.split('/');
119-
for (let i = 0; i < pathParts.length; i++) {
120-
const partialPath = pathParts.slice(0, i + 1).join('/');
124+
const normalizedPathParts = normalizedPath.split('/');
125+
for (let i = 0; i < normalizedPathParts.length; i++) {
126+
const partialPath = normalizedPathParts.slice(0, i + 1).join('/');
121127
for (const pattern of this.ignorePatterns) {
122128
// Check directory patterns
123129
if (pattern.endsWith('/')) {
124130
const dirPattern = pattern.slice(0, -1);
125131
if (this.simpleGlobMatch(partialPath, dirPattern) ||
126-
this.simpleGlobMatch(pathParts[i], dirPattern)) {
132+
this.simpleGlobMatch(normalizedPathParts[i], dirPattern)) {
127133
return true;
128134
}
129135
}
@@ -135,7 +141,7 @@ export class FileSynchronizer {
135141
}
136142
// Check filename patterns against any path component
137143
else {
138-
if (this.simpleGlobMatch(pathParts[i], pattern)) {
144+
if (this.simpleGlobMatch(normalizedPathParts[i], pattern)) {
139145
return true;
140146
}
141147
}
@@ -190,21 +196,21 @@ export class FileSynchronizer {
190196
const dag = new MerkleDAG();
191197
const keys = Array.from(fileHashes.keys());
192198
const sortedPaths = keys.slice().sort(); // Create a sorted copy
193-
199+
194200
// Create a root node for the entire directory
195201
let valuesString = "";
196202
keys.forEach(key => {
197203
valuesString += fileHashes.get(key);
198204
});
199205
const rootNodeData = "root:" + valuesString;
200206
const rootNodeId = dag.addNode(rootNodeData);
201-
207+
202208
// Add each file as a child of the root
203209
for (const path of sortedPaths) {
204210
const fileData = path + ":" + fileHashes.get(path);
205211
dag.addNode(fileData, rootNodeId);
206212
}
207-
213+
208214
return dag;
209215
}
210216

@@ -228,11 +234,11 @@ export class FileSynchronizer {
228234
if (changes.added.length > 0 || changes.removed.length > 0 || changes.modified.length > 0) {
229235
console.log('Merkle DAG has changed. Comparing file states...');
230236
const fileChanges = this.compareStates(this.fileHashes, newFileHashes);
231-
237+
232238
this.fileHashes = newFileHashes;
233239
this.merkleDAG = newMerkleDAG;
234240
await this.saveSnapshot();
235-
241+
236242
console.log(`Found changes: ${fileChanges.added.length} added, ${fileChanges.removed.length} removed, ${fileChanges.modified.length} modified.`);
237243
return fileChanges;
238244
}
@@ -274,14 +280,14 @@ export class FileSynchronizer {
274280
private async saveSnapshot(): Promise<void> {
275281
const merkleDir = path.dirname(this.snapshotPath);
276282
await fs.mkdir(merkleDir, { recursive: true });
277-
283+
278284
// Convert Map to array without using iterator
279285
const fileHashesArray: [string, string][] = [];
280286
const keys = Array.from(this.fileHashes.keys());
281287
keys.forEach(key => {
282288
fileHashesArray.push([key, this.fileHashes.get(key)!]);
283289
});
284-
290+
285291
const data = JSON.stringify({
286292
fileHashes: fileHashesArray,
287293
merkleDAG: this.merkleDAG.serialize()
@@ -294,13 +300,13 @@ export class FileSynchronizer {
294300
try {
295301
const data = await fs.readFile(this.snapshotPath, 'utf-8');
296302
const obj = JSON.parse(data);
297-
303+
298304
// Reconstruct Map without using constructor with iterator
299305
this.fileHashes = new Map();
300306
for (const [key, value] of obj.fileHashes) {
301307
this.fileHashes.set(key, value);
302308
}
303-
309+
304310
if (obj.merkleDAG) {
305311
this.merkleDAG = MerkleDAG.deserialize(obj.merkleDAG);
306312
}
@@ -339,4 +345,4 @@ export class FileSynchronizer {
339345
}
340346
}
341347
}
342-
}
348+
}

packages/mcp/src/index.ts

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -539,6 +539,9 @@ Example response when indexing is in progress:
539539
this.indexingCodebases.push(absolutePath);
540540
this.saveCodebaseSnapshot();
541541

542+
// Track the codebase path for syncing
543+
this.trackCodebasePath(absolutePath);
544+
542545
// Start background indexing
543546
this.startBackgroundIndexing(absolutePath, forceReindex, splitterType);
544547

@@ -779,6 +782,9 @@ Example response when indexing is in progress:
779782
};
780783
}
781784

785+
// Track the codebase path for syncing (even if not indexed yet)
786+
this.trackCodebasePath(absolutePath);
787+
782788
// Check if this codebase is indexed or being indexed
783789
const isIndexed = this.indexedCodebases.includes(absolutePath);
784790
const isIndexing = this.indexingCodebases.includes(absolutePath);
@@ -1080,6 +1086,15 @@ Example response when indexing is in progress:
10801086

10811087
return resolved;
10821088
}
1089+
1090+
private trackCodebasePath(codebasePath: string) {
1091+
const absolutePath = this.ensureAbsolutePath(codebasePath);
1092+
if (!this.indexedCodebases.includes(absolutePath)) {
1093+
this.indexedCodebases.push(absolutePath);
1094+
this.saveCodebaseSnapshot();
1095+
console.log(`[TRACKING] Added codebase path to indexedCodebases: ${absolutePath}`);
1096+
}
1097+
}
10831098
}
10841099

10851100
// Main execution

packages/vscode-extension/src/commands/indexCommand.ts

Lines changed: 2 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import * as vscode from 'vscode';
22
import { CodeContext } from '@zilliz/code-context-core';
33
import * as path from 'path';
4-
import * as fs from 'fs';
54
import * as crypto from 'crypto';
65

76
export class IndexCommand {
@@ -18,44 +17,6 @@ export class IndexCommand {
1817
this.codeContext = codeContext;
1918
}
2019

21-
/**
22-
* Load .gitignore patterns from the codebase root directory
23-
* @param codebasePath Path to the codebase
24-
*/
25-
private async loadGitignorePatterns(codebasePath: string): Promise<void> {
26-
try {
27-
const gitignorePath = path.join(codebasePath, '.gitignore');
28-
29-
// Check if .gitignore exists
30-
if (fs.existsSync(gitignorePath)) {
31-
console.log(`📄 Found .gitignore file at: ${gitignorePath}`);
32-
33-
// Use the static method from CodeContext to read ignore patterns
34-
const ignorePatterns = await CodeContext.getIgnorePatternsFromFile(gitignorePath);
35-
36-
if (ignorePatterns.length > 0) {
37-
// Update the CodeContext instance with new patterns
38-
this.codeContext.updateIgnorePatterns(ignorePatterns);
39-
console.log(`🚫 Loaded ${ignorePatterns.length} ignore patterns from .gitignore`);
40-
41-
vscode.window.showInformationMessage(
42-
`📄 Loaded ${ignorePatterns.length} ignore patterns from .gitignore`
43-
);
44-
} else {
45-
console.log('📄 .gitignore file found but no valid patterns detected');
46-
}
47-
} else {
48-
console.log('📄 No .gitignore file found, using default ignore patterns only');
49-
// No need to update patterns - CodeContext will use defaults
50-
}
51-
} catch (error) {
52-
console.warn(`⚠️ Failed to load .gitignore patterns: ${error}`);
53-
vscode.window.showWarningMessage(`⚠️ Failed to load .gitignore: ${error}`);
54-
// Continue with default patterns on error
55-
this.codeContext.updateIgnorePatterns([]);
56-
}
57-
}
58-
5920
async execute(): Promise<void> {
6021
const workspaceFolders = vscode.workspace.workspaceFolders;
6122
if (!workspaceFolders || workspaceFolders.length === 0) {
@@ -103,10 +64,6 @@ export class IndexCommand {
10364
}, async (progress) => {
10465
let lastPercentage = 0;
10566

106-
// Load .gitignore patterns before indexing
107-
progress.report({ increment: 0, message: 'Loading .gitignore patterns...' });
108-
await this.loadGitignorePatterns(selectedFolder.uri.fsPath);
109-
11067
// Clear existing index first
11168
await this.codeContext.clearIndex(
11269
selectedFolder.uri.fsPath,
@@ -159,12 +116,12 @@ export class IndexCommand {
159116
} catch (error: any) {
160117
console.error('Indexing failed:', error);
161118
const errorString = typeof error === 'string' ? error : (error.message || error.toString() || '');
162-
119+
163120
// Check for collection limit message from the core library
164121
if (errorString.includes('collection limit') || errorString.includes('zilliz.com/pricing')) {
165122
const message = 'Your Zilliz Cloud account has hit its collection limit. To continue creating collections, you\'ll need to expand your capacity. We recommend visiting https://zilliz.com/pricing to explore options for dedicated or serverless clusters.';
166123
const openButton = 'Explore Pricing Options';
167-
124+
168125
vscode.window.showErrorMessage(message, { modal: true }, openButton).then(selection => {
169126
if (selection === openButton) {
170127
vscode.env.openExternal(vscode.Uri.parse('https://zilliz.com/pricing'));

0 commit comments

Comments
 (0)