Skip to content

Commit ed3130c

Browse files
committed
Refactor vectordb init to support zilliz auto create cluster and env variable management
1 parent 1b178bd commit ed3130c

File tree

14 files changed

+839
-179
lines changed

14 files changed

+839
-179
lines changed

.env.example

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
# CodeContext Environment Variables Example
2+
#
3+
# Copy this file to ~/.codecontext/.env and modify the values as needed
4+
#
5+
# Usage: cp env.example ~/.codecontext/.env
6+
7+
# =============================================================================
8+
# Embedding Provider Configuration
9+
# =============================================================================
10+
11+
# Embedding provider: OpenAI, VoyageAI, Gemini, Ollama
12+
EMBEDDING_PROVIDER=OpenAI
13+
14+
# Embedding model (provider-specific)
15+
EMBEDDING_MODEL=text-embedding-3-small
16+
17+
# Embedding batch size for processing (default: 100)
18+
EMBEDDING_BATCH_SIZE=100
19+
20+
# =============================================================================
21+
# OpenAI Configuration
22+
# =============================================================================
23+
24+
# OpenAI API key
25+
OPENAI_API_KEY=your-openai-api-key-here
26+
27+
# OpenAI base URL (optional, for custom endpoints)
28+
# OPENAI_BASE_URL=https://api.openai.com/v1
29+
30+
# =============================================================================
31+
# VoyageAI Configuration
32+
# =============================================================================
33+
34+
# VoyageAI API key
35+
# VOYAGEAI_API_KEY=your-voyageai-api-key-here
36+
37+
# =============================================================================
38+
# Gemini Configuration
39+
# =============================================================================
40+
41+
# Google Gemini API key
42+
# GEMINI_API_KEY=your-gemini-api-key-here
43+
44+
# =============================================================================
45+
# Ollama Configuration
46+
# =============================================================================
47+
48+
# Ollama model name
49+
# OLLAMA_MODEL=
50+
51+
# Ollama host (default: http://localhost:11434)
52+
# OLLAMA_HOST=http://localhost:11434
53+
54+
# =============================================================================
55+
# Vector Database Configuration (Milvus/Zilliz)
56+
# =============================================================================
57+
58+
# Milvus server address
59+
MILVUS_ADDRESS=your-zilliz-cloud-public-endpoint
60+
61+
# Milvus authentication token
62+
# MILVUS_TOKEN=your-zilliz-cloud-api-key
63+
64+
# Zilliz Cloud base URL (optional, default: https://api.cloud.zilliz.com)
65+
# ZILLIZ_BASE_URL=https://api.cloud.zilliz.com
66+
67+
# =============================================================================
68+
# Code Splitter Configuration
69+
# =============================================================================
70+
71+
# Code splitter type: ast, langchain
72+
SPLITTER_TYPE=ast

examples/basic-usage/index.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { CodeContext, MilvusVectorDatabase, MilvusRestfulVectorDatabase, AstCodeSplitter, LangChainCodeSplitter } from '@zilliz/code-context-core';
2+
import { envManager } from '@zilliz/code-context-core';
23
import * as path from 'path';
34

45
// Try to load .env file
@@ -17,9 +18,9 @@ async function main() {
1718
// Set to true to use RESTful API (for environments without gRPC support)
1819
// Set to false to use gRPC (default, more efficient)
1920
const useRestfulApi = false;
20-
const milvusAddress = process.env.MILVUS_ADDRESS || 'localhost:19530';
21-
const milvusToken = process.env.MILVUS_TOKEN;
22-
const splitterType = process.env.SPLITTER_TYPE?.toLowerCase() || 'ast';
21+
const milvusAddress = envManager.get('MILVUS_ADDRESS') || 'localhost:19530';
22+
const milvusToken = envManager.get('MILVUS_TOKEN');
23+
const splitterType = envManager.get('SPLITTER_TYPE')?.toLowerCase() || 'ast';
2324

2425
console.log(`🔧 Using ${useRestfulApi ? 'RESTful API' : 'gRPC'} implementation`);
2526
console.log(`🔌 Connecting to Milvus at: ${milvusAddress}`);

packages/core/src/context.ts

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import {
1414
VectorSearchResult
1515
} from './vectordb';
1616
import { SemanticSearchResult } from './types';
17+
import { envManager } from './utils/env-manager';
1718
import * as fs from 'fs';
1819
import * as path from 'path';
1920
import * as crypto from 'crypto';
@@ -101,9 +102,9 @@ export class CodeContext {
101102
constructor(config: CodeContextConfig = {}) {
102103
// Initialize services
103104
this.embedding = config.embedding || new OpenAIEmbedding({
104-
apiKey: process.env.OPENAI_API_KEY || 'your-openai-api-key',
105+
apiKey: envManager.get('OPENAI_API_KEY') || 'your-openai-api-key',
105106
model: 'text-embedding-3-small',
106-
...(process.env.OPENAI_BASE_URL && { baseURL: process.env.OPENAI_BASE_URL })
107+
...(envManager.get('OPENAI_BASE_URL') && { baseURL: envManager.get('OPENAI_BASE_URL') })
107108
});
108109

109110
if (!config.vectorDatabase) {
@@ -461,7 +462,7 @@ export class CodeContext {
461462
codebasePath: string,
462463
onFileProcessed?: (filePath: string, fileIndex: number, totalFiles: number) => void
463464
): Promise<{ processedFiles: number; totalChunks: number; status: 'completed' | 'limit_reached' }> {
464-
const EMBEDDING_BATCH_SIZE = Math.max(1, parseInt(process.env.EMBEDDING_BATCH_SIZE || '100', 10));
465+
const EMBEDDING_BATCH_SIZE = Math.max(1, parseInt(envManager.get('EMBEDDING_BATCH_SIZE') || '100', 10));
465466
const CHUNK_LIMIT = 450000;
466467
console.log(`🔧 Using EMBEDDING_BATCH_SIZE: ${EMBEDDING_BATCH_SIZE}`);
467468

packages/core/src/index.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@ export * from './embedding';
33
export * from './vectordb';
44
export * from './types';
55
export * from './context';
6-
export * from './sync/synchronizer';
6+
export * from './sync/synchronizer';
7+
export * from './utils';
Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
import * as fs from 'fs';
2+
import * as path from 'path';
3+
import * as os from 'os';
4+
5+
export class EnvManager {
6+
private envFilePath: string;
7+
8+
constructor() {
9+
const homeDir = os.homedir();
10+
this.envFilePath = path.join(homeDir, '.codecontext', '.env');
11+
}
12+
13+
/**
14+
* Get environment variable by name
15+
* Priority: process.env > .env file > undefined
16+
*/
17+
get(name: string): string | undefined {
18+
// First try to get from process environment variables
19+
if (process.env[name]) {
20+
return process.env[name];
21+
}
22+
23+
// If not found in process env, try to read from .env file
24+
try {
25+
if (fs.existsSync(this.envFilePath)) {
26+
const content = fs.readFileSync(this.envFilePath, 'utf-8');
27+
const lines = content.split('\n');
28+
29+
for (const line of lines) {
30+
const trimmedLine = line.trim();
31+
if (trimmedLine.startsWith(`${name}=`)) {
32+
return trimmedLine.substring(name.length + 1);
33+
}
34+
}
35+
}
36+
} catch (error) {
37+
// Ignore file read errors
38+
}
39+
40+
return undefined;
41+
}
42+
43+
/**
44+
* Set environment variable to the .env file
45+
*/
46+
set(name: string, value: string): void {
47+
try {
48+
// Ensure directory exists
49+
const envDir = path.dirname(this.envFilePath);
50+
if (!fs.existsSync(envDir)) {
51+
fs.mkdirSync(envDir, { recursive: true });
52+
}
53+
54+
let content = '';
55+
let found = false;
56+
57+
// Read existing content if file exists
58+
if (fs.existsSync(this.envFilePath)) {
59+
content = fs.readFileSync(this.envFilePath, 'utf-8');
60+
61+
// Update existing variable
62+
const lines = content.split('\n');
63+
for (let i = 0; i < lines.length; i++) {
64+
if (lines[i].trim().startsWith(`${name}=`)) {
65+
// Replace the existing value
66+
lines[i] = `${name}=${value}`;
67+
found = true;
68+
console.log(`✅ Updated ${name} in ${this.envFilePath}`);
69+
break;
70+
}
71+
}
72+
content = lines.join('\n');
73+
}
74+
75+
// If variable not found, append it
76+
if (!found) {
77+
if (content && !content.endsWith('\n')) {
78+
content += '\n';
79+
}
80+
content += `${name}=${value}\n`;
81+
console.log(`✅ Added ${name} to ${this.envFilePath}`);
82+
}
83+
84+
fs.writeFileSync(this.envFilePath, content, 'utf-8');
85+
86+
} catch (error) {
87+
console.error(`Failed to write env file: ${error}`);
88+
throw error;
89+
}
90+
}
91+
92+
/**
93+
* Get the path to the .env file
94+
*/
95+
getEnvFilePath(): string {
96+
return this.envFilePath;
97+
}
98+
}
99+
100+
// Export a default instance for convenience
101+
export const envManager = new EnvManager();

packages/core/src/utils/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
export { EnvManager, envManager } from './env-manager';
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import { VectorDatabase, VectorDocument, SearchOptions, VectorSearchResult } from './types';
2+
3+
// Common configuration interface for Milvus implementations
4+
export interface BaseMilvusConfig {
5+
address?: string;
6+
token?: string;
7+
username?: string;
8+
password?: string;
9+
}
10+
11+
/**
12+
* Abstract base class for Milvus vector database implementations
13+
* Provides common initialization logic for address resolution and environment management
14+
*/
15+
export abstract class AbstractMilvusVectorDatabase implements VectorDatabase {
16+
protected config: BaseMilvusConfig;
17+
protected initializationPromise: Promise<void>;
18+
19+
constructor(config: BaseMilvusConfig) {
20+
this.config = config;
21+
22+
// Start initialization asynchronously without waiting
23+
this.initializationPromise = this.initialize();
24+
}
25+
26+
private async initialize(): Promise<void> {
27+
const resolvedAddress = await this.resolveAddress();
28+
await this.initializeClient(resolvedAddress);
29+
}
30+
31+
/**
32+
* Resolve address from config or token
33+
* Common logic for both gRPC and REST implementations
34+
*/
35+
protected async resolveAddress(): Promise<string> {
36+
let finalConfig = { ...this.config };
37+
38+
// If address is not provided, get it using token
39+
if (!finalConfig.address && finalConfig.token) {
40+
const { ClusterManager } = await import('./zilliz-utils');
41+
finalConfig.address = await ClusterManager.getAddressFromToken(finalConfig.token);
42+
}
43+
44+
if (!finalConfig.address) {
45+
throw new Error('Address is required and could not be resolved from token');
46+
}
47+
48+
return finalConfig.address;
49+
}
50+
51+
/**
52+
* Initialize the specific client implementation
53+
* Must be implemented by subclasses
54+
*/
55+
protected abstract initializeClient(address: string): Promise<void>;
56+
57+
/**
58+
* Ensure initialization is complete before method execution
59+
*/
60+
protected async ensureInitialized(): Promise<void> {
61+
await this.initializationPromise;
62+
}
63+
64+
// Abstract methods that must be implemented by subclasses
65+
abstract createCollection(collectionName: string, dimension: number, description?: string): Promise<void>;
66+
abstract dropCollection(collectionName: string): Promise<void>;
67+
abstract hasCollection(collectionName: string): Promise<boolean>;
68+
abstract insert(collectionName: string, documents: VectorDocument[]): Promise<void>;
69+
abstract search(collectionName: string, queryVector: number[], options?: SearchOptions): Promise<VectorSearchResult[]>;
70+
abstract delete(collectionName: string, ids: string[]): Promise<void>;
71+
abstract query(collectionName: string, filter: string, outputFields: string[]): Promise<Record<string, any>[]>;
72+
}

0 commit comments

Comments
 (0)