diff --git a/.changeset/bright-clouds-drop.md b/.changeset/bright-clouds-drop.md new file mode 100644 index 00000000..ac90afaf --- /dev/null +++ b/.changeset/bright-clouds-drop.md @@ -0,0 +1,5 @@ +--- +"hai-build-code-generator": patch +--- + +Resolved an issue where buildContextOptions was undefined, which caused useIndex to default to false. As a result, the find_relevant_tools custom tool prompt was not being rendered.This fix ensures buildContextOptions is properly initialized before being accessed, restoring expected behavior in the tool selection flow. diff --git a/.changeset/kind-panthers-own.md b/.changeset/kind-panthers-own.md new file mode 100644 index 00000000..e18d3aeb --- /dev/null +++ b/.changeset/kind-panthers-own.md @@ -0,0 +1,5 @@ +--- +"hai-build-code-generator": minor +--- + +Apply guardrails in Api conversation history to sanitize or block the message when anamolly detected diff --git a/.changeset/rotten-mugs-itch.md b/.changeset/rotten-mugs-itch.md new file mode 100644 index 00000000..d3f27cd1 --- /dev/null +++ b/.changeset/rotten-mugs-itch.md @@ -0,0 +1,5 @@ +--- +"hai-build-code-generator": minor +--- + +Added runtime checks for prompt leakage, prompt injection, secrets, and PII to improve response safety and protect sensitive information. diff --git a/.changeset/six-cameras-begin.md b/.changeset/six-cameras-begin.md new file mode 100644 index 00000000..57556349 --- /dev/null +++ b/.changeset/six-cameras-begin.md @@ -0,0 +1,5 @@ +--- +"hai-build-code-generator": minor +--- + +Add profile support for Bedrock embeddings configuration diff --git a/.changeset/tall-rockets-greet.md b/.changeset/tall-rockets-greet.md new file mode 100644 index 00000000..a7b6b236 --- /dev/null +++ b/.changeset/tall-rockets-greet.md @@ -0,0 +1,7 @@ +--- +"hai-build-code-generator": minor +--- + +Add optional CorMatrix integration for AI code origin tracking + +Introduces privacy-first code retention analysis that tracks AI-generated code patterns through cryptographic hashes. Includes comprehensive documentation, workspace configuration support, and zero-impact background processing. diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index e61847aa..ae171a52 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -88,7 +88,7 @@ jobs: POST_HOG_HOST: ${{ secrets.POST_HOG_API_URL }} run: | # Required to generate the .vsix - vsce package --out "hai-build-code-generator-${{ steps.get_version.outputs.version }}.vsix" + vsce package --out "hai-build-code-generator-${{ steps.get_version.outputs.version }}.vsix" --allow-package-secrets sendgrid if [ "${{ github.event.inputs.release-type }}" = "pre-release" ]; then npm run publish:marketplace:prerelease diff --git a/.github/workflows/release-dev.yml b/.github/workflows/release-dev.yml index 9c47cfec..4fc9aa20 100644 --- a/.github/workflows/release-dev.yml +++ b/.github/workflows/release-dev.yml @@ -25,7 +25,7 @@ jobs: - name: Setup Node.js environment uses: actions/setup-node@v4 with: - node-version: 20.15.1 + node-version: 22 # Cache root dependencies - only reuse if package-lock.json exactly matches - name: Cache root dependencies @@ -63,7 +63,7 @@ jobs: # Build the VSIX package echo "Output Package Name: $PACKAGE_NAME-$BUILD_VERSION.vsix" - npx @vscode/vsce package --out "$PACKAGE_NAME-$BUILD_VERSION.vsix" + npx @vscode/vsce package --out "$PACKAGE_NAME-$BUILD_VERSION.vsix" --allow-package-secrets sendgrid env: LANGFUSE_API_URL: ${{ secrets.LANGFUSE_API_URL }} LANGFUSE_API_KEY: ${{ secrets.LANGFUSE_API_KEY }} diff --git a/.gitignore b/.gitignore index 7c7c3798..244881aa 100644 --- a/.gitignore +++ b/.gitignore @@ -32,3 +32,4 @@ src/core/controller/*/index.ts src/core/controller/grpc-service-config.ts webview-ui/src/services/grpc-client.ts src/standalone/server-setup.ts +.hai.config diff --git a/README.md b/README.md index ae318e2b..db3a422c 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ - [📝 HAI Tasks](#-hai-tasks) : Integrate AI-generated user stories and tasks seamlessly into your workflow - [🔍 File Identification](#-file-identification) : Discover and contextualize code files with intelligent indexing - [⚙️ Settings Interface](#-settings-interface) : Easily configure LLMs and embedding models for tailored performance +- [📊 CorMatrix Integration](#-cormatrix-integration) : Track AI code retention patterns and analyze code origin over time
@@ -170,6 +171,18 @@ Customize and seamlessly integrate advanced language and embedding models into y --- +### 📊 CorMatrix Integration +Track AI code retention patterns and analyze how much AI-generated code remains in your codebase over time. + +- **Code Origin Tracking**: Monitor AI-generated code longevity and evolution patterns +- **Privacy-First**: Only cryptographic hashes are transmitted, your code stays local +- **Optional Integration**: Activate through workspace configuration when needed +- **Zero Performance Impact**: Background processing with graceful degradation + +For detailed setup and configuration, see our [CorMatrix Integration Guide](docs/features/cormatrix-integration.md). + +--- + ### 📊 Telemetry Configure external telemetry settings to monitor and analyze your AI-powered development workflows with environment-specific customization capabilities. diff --git a/docs/features/cormatrix-integration.md b/docs/features/cormatrix-integration.md new file mode 100644 index 00000000..7bd3f69a --- /dev/null +++ b/docs/features/cormatrix-integration.md @@ -0,0 +1,115 @@ +# CorMatrix Integration + +The HAI Code Generator includes built-in integration with CorMatrix, a Code Origin Ratio tracking system that helps you understand how much AI-generated code is retained over time. + +## What is CorMatrix? + +CorMatrix is a Node.js SDK and CLI that analyzes AI code retention patterns by tracking how much AI-generated code remains in your codebase versus how much gets modified or removed over time. The HAI Code Generator automatically tracks file operations performed by the AI assistant and sends this data to CorMatrix for analysis. + +This provides valuable insights into: + +- **AI Code Longevity**: Whether AI-generated code tends to be temporary scaffolding or permanent solutions +- **Code Evolution**: How developers iterate on AI-generated code +- **Retention Rates**: What percentage of AI-generated code survives in the final codebase +- **Usage Patterns**: Understanding the real-world effectiveness of AI coding assistance + +For detailed information about CorMatrix SDK and CLI, see the [official documentation](https://www.npmjs.com/package/@presidio-dev/cor-matrix). + +## How It Works + +The HAI Code Generator conditionally tracks file operations through the `CorMatrixService` only when **all** conditions are met: + +1. The AI assistant performs a file modification or creation +2. The operation contains valid file content with line-level changes +3. Required CorMatrix configuration is present in your workspace +4. The CorMatrix service is available and properly configured + +> **Important Note**: Your actual source code **never leaves your system**. Only cryptographic hash signatures are generated locally and sent to CorMatrix for analysis. All data is encrypted in transit and at rest. Tracking runs in the background with batch processing, ensuring **zero impact** on AI assistant performance. + +## Privacy & Security + +CorMatrix integration is designed with privacy and security in mind: + +- **Your Code Stays Local**: Your actual source code **never leaves your development environment** +- **Hash-Only Transmission**: Only cryptographic hash signatures are generated locally and sent to CorMatrix +- **Encryption**: All transmitted data is encrypted in transit and at rest +- **Selective Tracking**: Only AI-generated code additions are monitored (deletions are ignored) +- **Background Processing**: Tracking uses batching and background processing for zero performance impact + +## Configuration + +CorMatrix integration is **completely optional** and activates only when configured. + +### Workspace Configuration + +Create a `.hai.config` file in your workspace root with the following CorMatrix settings: + +``` +# CorMatrix Configuration +cormatrix.baseURL=https://your-cormatrix-instance.com +cormatrix.token=your-api-token +cormatrix.workspaceId=your-workspace-id +``` + +### Configuration Parameters + +- **`baseURL`**: Your CorMatrix server endpoint +- **`token`**: Authentication token for CorMatrix API +- **`workspaceId`**: Unique identifier for your workspace + +All parameters are optional, but the integration will only activate when all required parameters are provided. + +### Configuration File Security + +> **Important**: The `.hai.config` file is not git-excluded by default. Ensure sensitive tokens are not committed unintentionally to your repository. + +## Optional Integration + +CorMatrix integration provides graceful operation: + +- **Default Behavior**: HAI Code Generator operates normally without CorMatrix configuration +- **Silent Activation**: Integration only activates when required configuration is present +- **Graceful Degradation**: If CorMatrix service is unavailable, the AI assistant continues working unaffected +- **Zero Performance Impact**: All tracking happens in the background without affecting your development workflow + +## How Tracking Works + +The integration automatically: + +1. **Monitors File Operations**: Tracks when the AI assistant modifies or creates files +2. **Captures Line Diffs**: Records line-by-line changes made by the AI +3. **Processes Added Code**: Only tracks newly added code (deletions are ignored) +4. **Generates Hashes**: Creates cryptographic signatures of the added code locally +5. **Transmits Safely**: Sends only hash signatures and metadata to CorMatrix +6. **Associates with Files**: Links generated code signatures to specific file paths + +## Troubleshooting + +### Integration Not Working + +If CorMatrix integration isn't tracking changes: + +1. **Check Configuration**: Ensure all required parameters are set in `.hai.config` +2. **Verify Connectivity**: Test connection to your CorMatrix instance +3. **Review Logs**: Check HAI Code Generator logs for CorMatrix-related errors +4. **Validate Credentials**: Confirm your token and workspace ID are correct + +### Performance Concerns + +CorMatrix integration is designed for zero performance impact: + +- All processing happens in background threads +- Batch processing minimizes network requests +- Local hash generation is computationally lightweight +- Graceful degradation prevents blocking operations + +### Privacy Questions + +**Q: What data is sent to CorMatrix?** +A: Only cryptographic hash signatures of added code and associated file paths. Your actual source code never leaves your system. + +**Q: Can CorMatrix reconstruct my code from hashes?** +A: No. Cryptographic hashes are one-way functions that cannot be reversed to reveal the original code. + +**Q: Is tracking mandatory?** +A: No. CorMatrix integration is completely optional and only activates when explicitly configured. \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index 4beca31d..09e957ec 100644 --- a/package-lock.json +++ b/package-lock.json @@ -32,6 +32,8 @@ "@opentelemetry/sdk-node": "^0.39.1", "@opentelemetry/sdk-trace-node": "^1.30.1", "@opentelemetry/semantic-conventions": "^1.30.0", + "@presidio-dev/cor-matrix": "^0.0.2", + "@presidio-dev/hai-guardrails": "^1.10.1", "@sentry/browser": "^9.12.0", "@vscode/codicons": "^0.0.36", "archiver": "^7.0.1", @@ -12907,6 +12909,146 @@ "node": ">=18" } }, + "node_modules/@presidio-dev/cor-matrix": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/@presidio-dev/cor-matrix/-/cor-matrix-0.0.2.tgz", + "integrity": "sha512-3kJpyeMoFtT1eQ0FEBNsFB3yzVNT+Q/xtd3TrdqTHB3ApPcWzEFU9mMEqpLpoS7kex1yhLisJkTYPuXjahmajg==", + "dependencies": { + "@elysiajs/eden": "^1.3.2", + "chalk": "^5.4.1", + "dotenv": "^16.5.0", + "ignore-walk": "^7.0.0", + "isbinaryfile": "^5.0.4", + "yargs": "17.7.2" + }, + "bin": { + "cor-matrix": "dist/cli/cli.js" + }, + "engines": { + "bun": ">=1.0.0", + "node": ">=16.0.0" + }, + "peerDependencies": { + "typescript": "^5" + } + }, + "node_modules/@presidio-dev/cor-matrix/node_modules/@elysiajs/eden": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/@elysiajs/eden/-/eden-1.3.2.tgz", + "integrity": "sha512-0bCU5DO7J7hQfS2y3O3399GtoxMWRDMgQNMTHOnf70/F2nF8SwGHvzwh3+wO62Ko5FMF7EYqTN9Csw/g/Q7qwg==", + "license": "MIT", + "peerDependencies": { + "elysia": ">= 1.3.0" + } + }, + "node_modules/@presidio-dev/cor-matrix/node_modules/cookie": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-1.0.2.tgz", + "integrity": "sha512-9Kr/j4O16ISv8zBBhJoi4bXOYNTkFLOqSL3UDB0njXxCXNezjeyVrJyGOWtgfs/q2km1gwBcfH8q1yEGoMYunA==", + "license": "MIT", + "peer": true, + "engines": { + "node": ">=18" + } + }, + "node_modules/@presidio-dev/cor-matrix/node_modules/elysia": { + "version": "1.3.4", + "resolved": "https://registry.npmjs.org/elysia/-/elysia-1.3.4.tgz", + "integrity": "sha512-kAfM3Zwovy3z255IZgTKVxBw91HbgKhYl3TqrGRdZqqr+Fd+4eKOfvxgaKij22+MZLczPzIHtscAmvfpI3+q/A==", + "license": "MIT", + "peer": true, + "dependencies": { + "cookie": "^1.0.2", + "exact-mirror": "0.1.2", + "fast-decode-uri-component": "^1.0.1" + }, + "optionalDependencies": { + "@sinclair/typebox": "^0.34.33", + "openapi-types": "^12.1.3" + }, + "peerDependencies": { + "@sinclair/typebox": ">= 0.34.0", + "exact-mirror": ">= 0.0.9", + "file-type": ">= 20.0.0", + "openapi-types": ">= 12.0.0", + "typescript": ">= 5.0.0" + } + }, + "node_modules/@presidio-dev/cor-matrix/node_modules/file-type": { + "version": "21.0.0", + "resolved": "https://registry.npmjs.org/file-type/-/file-type-21.0.0.tgz", + "integrity": "sha512-ek5xNX2YBYlXhiUXui3D/BXa3LdqPmoLJ7rqEx2bKJ7EAUEfmXgW0Das7Dc6Nr9MvqaOnIqiPV0mZk/r/UpNAg==", + "license": "MIT", + "peer": true, + "dependencies": { + "@tokenizer/inflate": "^0.2.7", + "strtok3": "^10.2.2", + "token-types": "^6.0.0", + "uint8array-extras": "^1.4.0" + }, + "engines": { + "node": ">=20" + }, + "funding": { + "url": "https://github.com/sindresorhus/file-type?sponsor=1" + } + }, + "node_modules/@presidio-dev/cor-matrix/node_modules/strtok3": { + "version": "10.3.1", + "resolved": "https://registry.npmjs.org/strtok3/-/strtok3-10.3.1.tgz", + "integrity": "sha512-3JWEZM6mfix/GCJBBUrkA8p2Id2pBkyTkVCJKto55w080QBKZ+8R171fGrbiSp+yMO/u6F8/yUh7K4V9K+YCnw==", + "license": "MIT", + "peer": true, + "dependencies": { + "@tokenizer/token": "^0.3.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Borewit" + } + }, + "node_modules/@presidio-dev/cor-matrix/node_modules/token-types": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/token-types/-/token-types-6.0.0.tgz", + "integrity": "sha512-lbDrTLVsHhOMljPscd0yitpozq7Ga2M5Cvez5AjGg8GASBjtt6iERCAJ93yommPmz62fb45oFIXHEZ3u9bfJEA==", + "license": "MIT", + "peer": true, + "dependencies": { + "@tokenizer/token": "^0.3.0", + "ieee754": "^1.2.1" + }, + "engines": { + "node": ">=14.16" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Borewit" + } + }, + "node_modules/@presidio-dev/hai-guardrails": { + "version": "1.10.1", + "resolved": "https://registry.npmjs.org/@presidio-dev/hai-guardrails/-/hai-guardrails-1.10.1.tgz", + "integrity": "sha512-fhzd6/xzyu6+r0YGuB5S4Pt4fttFlgMB7mm3SVqrnLfrW5FwWcMFv+PntGHTAT4PCgxYiaqmcBTrUPlq9M7gBg==", + "license": "MIT", + "dependencies": { + "jsonrepair": "^3.12.0", + "pino": "^9.7.0", + "string-similarity": "^4.0.4", + "ts-pattern": "^5.7.1", + "zod": "^3.25.0-beta.20250519T094321" + }, + "engines": { + "bun": ">=1.0.0", + "node": ">=16.0.0" + }, + "peerDependencies": { + "@langchain/core": "^0.3.49", + "typescript": "^5" + } + }, "node_modules/@protobufjs/aspromise": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz", @@ -13064,6 +13206,14 @@ "node": ">=18" } }, + "node_modules/@sinclair/typebox": { + "version": "0.34.35", + "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.34.35.tgz", + "integrity": "sha512-C6ypdODf2VZkgRT6sFM8E1F8vR+HcffniX0Kp8MsU8PIfrlXbNCBz0jzj17GjdmjTx1OtZzdH8+iALL21UjF5A==", + "license": "MIT", + "optional": true, + "peer": true + }, "node_modules/@sindresorhus/is": { "version": "5.6.0", "resolved": "https://registry.npmjs.org/@sindresorhus/is/-/is-5.6.0.tgz", @@ -15383,6 +15533,43 @@ "node": ">=14.16" } }, + "node_modules/@tokenizer/inflate": { + "version": "0.2.7", + "resolved": "https://registry.npmjs.org/@tokenizer/inflate/-/inflate-0.2.7.tgz", + "integrity": "sha512-MADQgmZT1eKjp06jpI2yozxaU9uVs4GzzgSL+uEq7bVcJ9V1ZXQkeGNql1fsSI0gMy1vhvNTNbUqrx+pZfJVmg==", + "license": "MIT", + "peer": true, + "dependencies": { + "debug": "^4.4.0", + "fflate": "^0.8.2", + "token-types": "^6.0.0" + }, + "engines": { + "node": ">=18" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Borewit" + } + }, + "node_modules/@tokenizer/inflate/node_modules/token-types": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/token-types/-/token-types-6.0.0.tgz", + "integrity": "sha512-lbDrTLVsHhOMljPscd0yitpozq7Ga2M5Cvez5AjGg8GASBjtt6iERCAJ93yommPmz62fb45oFIXHEZ3u9bfJEA==", + "license": "MIT", + "peer": true, + "dependencies": { + "@tokenizer/token": "^0.3.0", + "ieee754": "^1.2.1" + }, + "engines": { + "node": ">=14.16" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/Borewit" + } + }, "node_modules/@tokenizer/token": { "version": "0.3.0", "resolved": "https://registry.npmjs.org/@tokenizer/token/-/token-0.3.0.tgz", @@ -16651,6 +16838,14 @@ "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==", "license": "MIT" }, + "node_modules/atomic-sleep": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/atomic-sleep/-/atomic-sleep-1.0.0.tgz", + "integrity": "sha512-kNOjDqAh7px0XWNI+4QbzoiR/nTkHAWNud2uvnJquD1/x5a7EQZMJT0AczqK0Qn67oY/TTQ1LbUKajZpp3I9tQ==", + "engines": { + "node": ">=8.0.0" + } + }, "node_modules/available-typed-arrays": { "version": "1.0.7", "resolved": "https://registry.npmjs.org/available-typed-arrays/-/available-typed-arrays-1.0.7.tgz", @@ -17277,7 +17472,6 @@ "version": "5.4.1", "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.4.1.tgz", "integrity": "sha512-zgVZuo2WcZgfUEmsn6eO3kINexW8RAE4maiQ8QNs8CtpPCSyMiYsULR3HQYkm3w8FIA3SberyMJMSldGsW+U3w==", - "dev": true, "license": "MIT", "engines": { "node": "^12.17.0 || ^14.13 || >=16.0.0" @@ -18723,7 +18917,6 @@ "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.5.0.tgz", "integrity": "sha512-m/C+AwOAr9/W1UOIZUo232ejMNnJAJtYQjUbHoNTBNTJSvqzzDh7vnrei3o3r3m9blf6ZoDkvcw0VmozNRFJxg==", "license": "BSD-2-Clause", - "peer": true, "engines": { "node": ">=12" }, @@ -19668,6 +19861,21 @@ "node": ">=18.0.0" } }, + "node_modules/exact-mirror": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/exact-mirror/-/exact-mirror-0.1.2.tgz", + "integrity": "sha512-wFCPCDLmHbKGUb8TOi/IS7jLsgR8WVDGtDK3CzcB4Guf/weq7G+I+DkXiRSZfbemBFOxOINKpraM6ml78vo8Zw==", + "license": "MIT", + "peer": true, + "peerDependencies": { + "@sinclair/typebox": "^0.34.15" + }, + "peerDependenciesMeta": { + "@sinclair/typebox": { + "optional": true + } + } + }, "node_modules/execa": { "version": "9.5.2", "resolved": "https://registry.npmjs.org/execa/-/execa-9.5.2.tgz", @@ -19998,6 +20206,13 @@ "node": ">= 14.0.0" } }, + "node_modules/fast-decode-uri-component": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/fast-decode-uri-component/-/fast-decode-uri-component-1.0.1.tgz", + "integrity": "sha512-WKgKWg5eUxvRZGwW8FvfbaH7AXSh2cL+3j5fMGzUMCxWBJ3dV3a7Wz8y2f/uQ0e3B6WmodD3oS54jTQ9HVTIIg==", + "license": "MIT", + "peer": true + }, "node_modules/fast-deep-equal": { "version": "3.1.3", "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", @@ -20046,6 +20261,14 @@ "integrity": "sha512-Ue0LwpDYErFbmNnZSF0UH6eImUwDmogUO1jyE+JbN2gsQz/jICm1Ve7t9QT0rNSsfJt+Hs4/S3GnsDVjL4HVrw==", "dev": true }, + "node_modules/fast-redact": { + "version": "3.5.0", + "resolved": "https://registry.npmjs.org/fast-redact/-/fast-redact-3.5.0.tgz", + "integrity": "sha512-dwsoQlS7h9hMeYUq1W++23NDcBLV4KqONnITDV9DjfS3q1SgDGVrBdvvTLUotWtPSD7asWDV9/CmsZPy8Hf70A==", + "engines": { + "node": ">=6" + } + }, "node_modules/fast-uri": { "version": "3.0.6", "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.0.6.tgz", @@ -20139,6 +20362,13 @@ "pend": "~1.2.0" } }, + "node_modules/fflate": { + "version": "0.8.2", + "resolved": "https://registry.npmjs.org/fflate/-/fflate-0.8.2.tgz", + "integrity": "sha512-cPJU47OaAoCbg0pBvzsgpTPhmhqI5eJjh/JIu8tPj5q+T7iLvW/JAYUqmE7KOB4R1ZyEhzBaIQpQpardBF5z8A==", + "license": "MIT", + "peer": true + }, "node_modules/figures": { "version": "6.1.0", "resolved": "https://registry.npmjs.org/figures/-/figures-6.1.0.tgz", @@ -22739,9 +22969,9 @@ "license": "MIT" }, "node_modules/isbinaryfile": { - "version": "5.0.2", - "resolved": "https://registry.npmjs.org/isbinaryfile/-/isbinaryfile-5.0.2.tgz", - "integrity": "sha512-GvcjojwonMjWbTkfMpnVHVqXW/wKMYDfEpY94/8zy8HFMOqb/VL6oeONq9v87q4ttVlaTLnGXnJD4B5B1OTGIg==", + "version": "5.0.4", + "resolved": "https://registry.npmjs.org/isbinaryfile/-/isbinaryfile-5.0.4.tgz", + "integrity": "sha512-YKBKVkKhty7s8rxddb40oOkuP0NbaeXrQvLin6QMHL7Ypiy2RW9LwOVrVgZRyOrhQlayMd9t+D8yDy8MKFTSDQ==", "license": "MIT", "engines": { "node": ">= 18.0.0" @@ -23032,6 +23262,14 @@ "node": ">=0.10.0" } }, + "node_modules/jsonrepair": { + "version": "3.12.0", + "resolved": "https://registry.npmjs.org/jsonrepair/-/jsonrepair-3.12.0.tgz", + "integrity": "sha512-SWfjz8SuQ0wZjwsxtSJ3Zy8vvLg6aO/kxcp9TWNPGwJKgTZVfhNEQBMk/vPOpYCDFWRxD6QWuI6IHR1t615f0w==", + "bin": { + "jsonrepair": "bin/cli.js" + } + }, "node_modules/JSONStream": { "version": "1.3.5", "resolved": "https://registry.npmjs.org/JSONStream/-/JSONStream-1.3.5.tgz", @@ -26151,6 +26389,14 @@ "whatwg-fetch": "^3.6.20" } }, + "node_modules/on-exit-leak-free": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/on-exit-leak-free/-/on-exit-leak-free-2.1.2.tgz", + "integrity": "sha512-0eJJY6hXLGf1udHwfNftBqH+g73EU4B504nZeKpz1sYRKafAghwxEJunB2O7rDZkL4PGfsMVnTXZ2EjibbqcsA==", + "engines": { + "node": ">=14.0.0" + } + }, "node_modules/on-finished": { "version": "2.4.1", "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz", @@ -26974,6 +27220,48 @@ "node": ">=4" } }, + "node_modules/pino": { + "version": "9.7.0", + "resolved": "https://registry.npmjs.org/pino/-/pino-9.7.0.tgz", + "integrity": "sha512-vnMCM6xZTb1WDmLvtG2lE/2p+t9hDEIvTWJsu6FejkE62vB7gDhvzrpFR4Cw2to+9JNQxVnkAKVPA1KPB98vWg==", + "dependencies": { + "atomic-sleep": "^1.0.0", + "fast-redact": "^3.1.1", + "on-exit-leak-free": "^2.1.0", + "pino-abstract-transport": "^2.0.0", + "pino-std-serializers": "^7.0.0", + "process-warning": "^5.0.0", + "quick-format-unescaped": "^4.0.3", + "real-require": "^0.2.0", + "safe-stable-stringify": "^2.3.1", + "sonic-boom": "^4.0.1", + "thread-stream": "^3.0.0" + }, + "bin": { + "pino": "bin.js" + } + }, + "node_modules/pino-abstract-transport": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/pino-abstract-transport/-/pino-abstract-transport-2.0.0.tgz", + "integrity": "sha512-F63x5tizV6WCh4R6RHyi2Ml+M70DNRXt/+HANowMflpgGFMAym/VKm6G7ZOQRjqN7XbGxK1Lg9t6ZrtzOaivMw==", + "dependencies": { + "split2": "^4.0.0" + } + }, + "node_modules/pino-std-serializers": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/pino-std-serializers/-/pino-std-serializers-7.0.0.tgz", + "integrity": "sha512-e906FRY0+tV27iq4juKzSYPbUj2do2X2JX4EzSca1631EB2QJQUqGbDuERal7LCtOpxl6x3+nvo9NPZcmjkiFA==" + }, + "node_modules/pino/node_modules/safe-stable-stringify": { + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/safe-stable-stringify/-/safe-stable-stringify-2.5.0.tgz", + "integrity": "sha512-b3rppTKm9T+PsVCBEOUR46GWI7fdOs00VKZ1+9c1EWDaDMvjQc6tUwuFyIprgGgTcWoVHSKrU8H31ZHA2e0RHA==", + "engines": { + "node": ">=10" + } + }, "node_modules/pkce-challenge": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/pkce-challenge/-/pkce-challenge-5.0.0.tgz", @@ -27233,6 +27521,21 @@ "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==", "license": "MIT" }, + "node_modules/process-warning": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/process-warning/-/process-warning-5.0.0.tgz", + "integrity": "sha512-a39t9ApHNx2L4+HBnQKqxxHNs1r7KF+Intd8Q/g1bUh6q0WIp9voPXJ/x0j+ZL45KF1pJd9+q2jLIRMfvEshkA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/fastify" + }, + { + "type": "opencollective", + "url": "https://opencollective.com/fastify" + } + ] + }, "node_modules/progress": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz", @@ -27615,6 +27918,11 @@ ], "license": "MIT" }, + "node_modules/quick-format-unescaped": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/quick-format-unescaped/-/quick-format-unescaped-4.0.4.tgz", + "integrity": "sha512-tYC1Q1hgyRuHgloV/YXs2w15unPVh8qfu/qCTfhTYamaw7fyhumKa2yGpdSo87vY32rIclj+4fWYQXUMs9EHvg==" + }, "node_modules/quick-lru": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/quick-lru/-/quick-lru-5.1.1.tgz", @@ -27927,6 +28235,14 @@ "url": "https://paulmillr.com/funding/" } }, + "node_modules/real-require": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/real-require/-/real-require-0.2.0.tgz", + "integrity": "sha512-57frrGM/OCTLqLOAh0mhVA9VBMHd+9U7Zb2THMGdBUoZVOtGbJzjxsYGDJ3A9AYYCP4hn6y1TVbaOfzWtm5GFg==", + "engines": { + "node": ">= 12.13.0" + } + }, "node_modules/recma-build-jsx": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/recma-build-jsx/-/recma-build-jsx-1.0.0.tgz", @@ -29445,6 +29761,14 @@ "node": ">= 14" } }, + "node_modules/sonic-boom": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/sonic-boom/-/sonic-boom-4.2.0.tgz", + "integrity": "sha512-INb7TM37/mAcsGmc9hyyI6+QR3rR1zVRu36B0NeGXKnOOLiZOfER5SA+N7X7k3yUYRzLWafduTDvJAfDswwEww==", + "dependencies": { + "atomic-sleep": "^1.0.0" + } + }, "node_modules/source-map": { "version": "0.6.1", "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", @@ -29516,7 +29840,6 @@ "version": "4.2.0", "resolved": "https://registry.npmjs.org/split2/-/split2-4.2.0.tgz", "integrity": "sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==", - "dev": true, "license": "ISC", "engines": { "node": ">= 10.x" @@ -29575,6 +29898,13 @@ "safe-buffer": "~5.1.0" } }, + "node_modules/string-similarity": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/string-similarity/-/string-similarity-4.0.4.tgz", + "integrity": "sha512-/q/8Q4Bl4ZKAPjj8WerIBJWALKkaPRfrvhfF8k/B23i4nzrlRj2/go1m90In7nG/3XDSbOo0+pu6RvCTM9RGMQ==", + "deprecated": "Package no longer supported. Contact Support at https://www.npmjs.com/support for more info.", + "license": "ISC" + }, "node_modules/string-template": { "version": "0.2.1", "resolved": "https://registry.npmjs.org/string-template/-/string-template-0.2.1.tgz", @@ -30025,6 +30355,14 @@ "dev": true, "license": "MIT" }, + "node_modules/thread-stream": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/thread-stream/-/thread-stream-3.1.0.tgz", + "integrity": "sha512-OqyPZ9u96VohAyMfJykzmivOrY2wfMSf3C5TtFJVgN+Hm6aj+voFhlK+kZEIv2FBh1X6Xp3DlnCOfEQ3B2J86A==", + "dependencies": { + "real-require": "^0.2.0" + } + }, "node_modules/thriftrw": { "version": "3.11.4", "resolved": "https://registry.npmjs.org/thriftrw/-/thriftrw-3.11.4.tgz", @@ -30274,6 +30612,11 @@ "node": ">=0.3.1" } }, + "node_modules/ts-pattern": { + "version": "5.7.1", + "resolved": "https://registry.npmjs.org/ts-pattern/-/ts-pattern-5.7.1.tgz", + "integrity": "sha512-EGs8PguQqAAUIcQfK4E9xdXxB6s2GK4sJfT/vcc9V1ELIvC4LH/zXu2t/5fajtv6oiRCxdv7BgtVK3vWgROxag==" + }, "node_modules/ts-poet": { "version": "6.11.0", "resolved": "https://registry.npmjs.org/ts-poet/-/ts-poet-6.11.0.tgz", @@ -30508,7 +30851,6 @@ "version": "5.5.3", "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.5.3.tgz", "integrity": "sha512-/hreyEujaB0w76zKo6717l3L0o/qEUtRgdvUBvlkhoWeOVMjMuHNHk0BRBzikzuGDqNmPQbg5ifMEqsHLiIUcQ==", - "dev": true, "license": "Apache-2.0", "bin": { "tsc": "bin/tsc", @@ -30518,6 +30860,19 @@ "node": ">=14.17" } }, + "node_modules/uint8array-extras": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/uint8array-extras/-/uint8array-extras-1.4.0.tgz", + "integrity": "sha512-ZPtzy0hu4cZjv3z5NW9gfKnNLjoz4y6uv4HlelAjDK7sY/xOkKZv9xK/WQpcsBB3jEybChz9DPC2U/+cusjJVQ==", + "license": "MIT", + "peer": true, + "engines": { + "node": ">=18" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/unbox-primitive": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/unbox-primitive/-/unbox-primitive-1.0.2.tgz", @@ -31733,10 +32088,9 @@ } }, "node_modules/zod": { - "version": "3.24.2", - "resolved": "https://registry.npmjs.org/zod/-/zod-3.24.2.tgz", - "integrity": "sha512-lY7CDW43ECgW9u1TcT3IoXHflywfVqDYze4waEz812jR/bZ8FHDsl7pFQoSZTz5N+2NqRXs8GBwnAwo3ZNxqhQ==", - "license": "MIT", + "version": "3.25.28", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.28.tgz", + "integrity": "sha512-/nt/67WYKnr5by3YS7LroZJbtcCBurDKKPBPWWzaxvVCGuG/NOsiKkrjoOhI8mJ+SQUXEbUzeB3S+6XDUEEj7Q==", "funding": { "url": "https://github.com/sponsors/colinhacks" } @@ -41013,6 +41367,89 @@ "playwright": "1.52.0" } }, + "@presidio-dev/cor-matrix": { + "version": "0.0.2", + "resolved": "https://registry.npmjs.org/@presidio-dev/cor-matrix/-/cor-matrix-0.0.2.tgz", + "integrity": "sha512-3kJpyeMoFtT1eQ0FEBNsFB3yzVNT+Q/xtd3TrdqTHB3ApPcWzEFU9mMEqpLpoS7kex1yhLisJkTYPuXjahmajg==", + "requires": { + "@elysiajs/eden": "^1.3.2", + "chalk": "^5.4.1", + "dotenv": "^16.5.0", + "ignore-walk": "^7.0.0", + "isbinaryfile": "^5.0.4", + "yargs": "17.7.2" + }, + "dependencies": { + "@elysiajs/eden": { + "version": "1.3.2", + "resolved": "https://registry.npmjs.org/@elysiajs/eden/-/eden-1.3.2.tgz", + "integrity": "sha512-0bCU5DO7J7hQfS2y3O3399GtoxMWRDMgQNMTHOnf70/F2nF8SwGHvzwh3+wO62Ko5FMF7EYqTN9Csw/g/Q7qwg==", + "requires": {} + }, + "cookie": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-1.0.2.tgz", + "integrity": "sha512-9Kr/j4O16ISv8zBBhJoi4bXOYNTkFLOqSL3UDB0njXxCXNezjeyVrJyGOWtgfs/q2km1gwBcfH8q1yEGoMYunA==", + "peer": true + }, + "elysia": { + "version": "1.3.4", + "resolved": "https://registry.npmjs.org/elysia/-/elysia-1.3.4.tgz", + "integrity": "sha512-kAfM3Zwovy3z255IZgTKVxBw91HbgKhYl3TqrGRdZqqr+Fd+4eKOfvxgaKij22+MZLczPzIHtscAmvfpI3+q/A==", + "peer": true, + "requires": { + "@sinclair/typebox": "^0.34.33", + "cookie": "^1.0.2", + "exact-mirror": "0.1.2", + "fast-decode-uri-component": "^1.0.1", + "openapi-types": "^12.1.3" + } + }, + "file-type": { + "version": "21.0.0", + "resolved": "https://registry.npmjs.org/file-type/-/file-type-21.0.0.tgz", + "integrity": "sha512-ek5xNX2YBYlXhiUXui3D/BXa3LdqPmoLJ7rqEx2bKJ7EAUEfmXgW0Das7Dc6Nr9MvqaOnIqiPV0mZk/r/UpNAg==", + "peer": true, + "requires": { + "@tokenizer/inflate": "^0.2.7", + "strtok3": "^10.2.2", + "token-types": "^6.0.0", + "uint8array-extras": "^1.4.0" + } + }, + "strtok3": { + "version": "10.3.1", + "resolved": "https://registry.npmjs.org/strtok3/-/strtok3-10.3.1.tgz", + "integrity": "sha512-3JWEZM6mfix/GCJBBUrkA8p2Id2pBkyTkVCJKto55w080QBKZ+8R171fGrbiSp+yMO/u6F8/yUh7K4V9K+YCnw==", + "peer": true, + "requires": { + "@tokenizer/token": "^0.3.0" + } + }, + "token-types": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/token-types/-/token-types-6.0.0.tgz", + "integrity": "sha512-lbDrTLVsHhOMljPscd0yitpozq7Ga2M5Cvez5AjGg8GASBjtt6iERCAJ93yommPmz62fb45oFIXHEZ3u9bfJEA==", + "peer": true, + "requires": { + "@tokenizer/token": "^0.3.0", + "ieee754": "^1.2.1" + } + } + } + }, + "@presidio-dev/hai-guardrails": { + "version": "1.10.1", + "resolved": "https://registry.npmjs.org/@presidio-dev/hai-guardrails/-/hai-guardrails-1.10.1.tgz", + "integrity": "sha512-fhzd6/xzyu6+r0YGuB5S4Pt4fttFlgMB7mm3SVqrnLfrW5FwWcMFv+PntGHTAT4PCgxYiaqmcBTrUPlq9M7gBg==", + "requires": { + "jsonrepair": "^3.12.0", + "pino": "^9.7.0", + "string-similarity": "^4.0.4", + "ts-pattern": "^5.7.1", + "zod": "^3.25.0-beta.20250519T094321" + } + }, "@protobufjs/aspromise": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/@protobufjs/aspromise/-/aspromise-1.1.2.tgz", @@ -41138,6 +41575,13 @@ "resolved": "https://registry.npmjs.org/@sentry/core/-/core-9.12.0.tgz", "integrity": "sha512-jOqQK/90uzHmsBvkPTj/DAEFvA5poX4ZRyC7LE1zjg4F5jdOp3+M4W3qCy0CkSTu88Zu5VWBoppCU2Bs34XEqg==" }, + "@sinclair/typebox": { + "version": "0.34.35", + "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.34.35.tgz", + "integrity": "sha512-C6ypdODf2VZkgRT6sFM8E1F8vR+HcffniX0Kp8MsU8PIfrlXbNCBz0jzj17GjdmjTx1OtZzdH8+iALL21UjF5A==", + "optional": true, + "peer": true + }, "@sindresorhus/is": { "version": "5.6.0", "resolved": "https://registry.npmjs.org/@sindresorhus/is/-/is-5.6.0.tgz", @@ -43068,6 +43512,29 @@ "defer-to-connect": "^2.0.1" } }, + "@tokenizer/inflate": { + "version": "0.2.7", + "resolved": "https://registry.npmjs.org/@tokenizer/inflate/-/inflate-0.2.7.tgz", + "integrity": "sha512-MADQgmZT1eKjp06jpI2yozxaU9uVs4GzzgSL+uEq7bVcJ9V1ZXQkeGNql1fsSI0gMy1vhvNTNbUqrx+pZfJVmg==", + "peer": true, + "requires": { + "debug": "^4.4.0", + "fflate": "^0.8.2", + "token-types": "^6.0.0" + }, + "dependencies": { + "token-types": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/token-types/-/token-types-6.0.0.tgz", + "integrity": "sha512-lbDrTLVsHhOMljPscd0yitpozq7Ga2M5Cvez5AjGg8GASBjtt6iERCAJ93yommPmz62fb45oFIXHEZ3u9bfJEA==", + "peer": true, + "requires": { + "@tokenizer/token": "^0.3.0", + "ieee754": "^1.2.1" + } + } + } + }, "@tokenizer/token": { "version": "0.3.0", "resolved": "https://registry.npmjs.org/@tokenizer/token/-/token-0.3.0.tgz", @@ -43985,6 +44452,11 @@ "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" }, + "atomic-sleep": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/atomic-sleep/-/atomic-sleep-1.0.0.tgz", + "integrity": "sha512-kNOjDqAh7px0XWNI+4QbzoiR/nTkHAWNud2uvnJquD1/x5a7EQZMJT0AczqK0Qn67oY/TTQ1LbUKajZpp3I9tQ==" + }, "available-typed-arrays": { "version": "1.0.7", "resolved": "https://registry.npmjs.org/available-typed-arrays/-/available-typed-arrays-1.0.7.tgz", @@ -44396,8 +44868,7 @@ "chalk": { "version": "5.4.1", "resolved": "https://registry.npmjs.org/chalk/-/chalk-5.4.1.tgz", - "integrity": "sha512-zgVZuo2WcZgfUEmsn6eO3kINexW8RAE4maiQ8QNs8CtpPCSyMiYsULR3HQYkm3w8FIA3SberyMJMSldGsW+U3w==", - "dev": true + "integrity": "sha512-zgVZuo2WcZgfUEmsn6eO3kINexW8RAE4maiQ8QNs8CtpPCSyMiYsULR3HQYkm3w8FIA3SberyMJMSldGsW+U3w==" }, "character-entities": { "version": "2.0.2", @@ -45319,8 +45790,7 @@ "dotenv": { "version": "16.5.0", "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.5.0.tgz", - "integrity": "sha512-m/C+AwOAr9/W1UOIZUo232ejMNnJAJtYQjUbHoNTBNTJSvqzzDh7vnrei3o3r3m9blf6ZoDkvcw0VmozNRFJxg==", - "peer": true + "integrity": "sha512-m/C+AwOAr9/W1UOIZUo232ejMNnJAJtYQjUbHoNTBNTJSvqzzDh7vnrei3o3r3m9blf6ZoDkvcw0VmozNRFJxg==" }, "dprint-node": { "version": "1.0.8", @@ -45987,6 +46457,13 @@ "resolved": "https://registry.npmjs.org/eventsource-parser/-/eventsource-parser-3.0.0.tgz", "integrity": "sha512-T1C0XCUimhxVQzW4zFipdx0SficT651NnkR0ZSH3yQwh+mFMdLfgjABVi4YtMTtaL4s168593DaoaRLMqryavA==" }, + "exact-mirror": { + "version": "0.1.2", + "resolved": "https://registry.npmjs.org/exact-mirror/-/exact-mirror-0.1.2.tgz", + "integrity": "sha512-wFCPCDLmHbKGUb8TOi/IS7jLsgR8WVDGtDK3CzcB4Guf/weq7G+I+DkXiRSZfbemBFOxOINKpraM6ml78vo8Zw==", + "peer": true, + "requires": {} + }, "execa": { "version": "9.5.2", "resolved": "https://registry.npmjs.org/execa/-/execa-9.5.2.tgz", @@ -46197,6 +46674,12 @@ "prebuild-install": "^7.1.1" } }, + "fast-decode-uri-component": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/fast-decode-uri-component/-/fast-decode-uri-component-1.0.1.tgz", + "integrity": "sha512-WKgKWg5eUxvRZGwW8FvfbaH7AXSh2cL+3j5fMGzUMCxWBJ3dV3a7Wz8y2f/uQ0e3B6WmodD3oS54jTQ9HVTIIg==", + "peer": true + }, "fast-deep-equal": { "version": "3.1.3", "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", @@ -46237,6 +46720,11 @@ "integrity": "sha512-Ue0LwpDYErFbmNnZSF0UH6eImUwDmogUO1jyE+JbN2gsQz/jICm1Ve7t9QT0rNSsfJt+Hs4/S3GnsDVjL4HVrw==", "dev": true }, + "fast-redact": { + "version": "3.5.0", + "resolved": "https://registry.npmjs.org/fast-redact/-/fast-redact-3.5.0.tgz", + "integrity": "sha512-dwsoQlS7h9hMeYUq1W++23NDcBLV4KqONnITDV9DjfS3q1SgDGVrBdvvTLUotWtPSD7asWDV9/CmsZPy8Hf70A==" + }, "fast-uri": { "version": "3.0.6", "resolved": "https://registry.npmjs.org/fast-uri/-/fast-uri-3.0.6.tgz", @@ -46295,6 +46783,12 @@ "pend": "~1.2.0" } }, + "fflate": { + "version": "0.8.2", + "resolved": "https://registry.npmjs.org/fflate/-/fflate-0.8.2.tgz", + "integrity": "sha512-cPJU47OaAoCbg0pBvzsgpTPhmhqI5eJjh/JIu8tPj5q+T7iLvW/JAYUqmE7KOB4R1ZyEhzBaIQpQpardBF5z8A==", + "peer": true + }, "figures": { "version": "6.1.0", "resolved": "https://registry.npmjs.org/figures/-/figures-6.1.0.tgz", @@ -48029,9 +48523,9 @@ "integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==" }, "isbinaryfile": { - "version": "5.0.2", - "resolved": "https://registry.npmjs.org/isbinaryfile/-/isbinaryfile-5.0.2.tgz", - "integrity": "sha512-GvcjojwonMjWbTkfMpnVHVqXW/wKMYDfEpY94/8zy8HFMOqb/VL6oeONq9v87q4ttVlaTLnGXnJD4B5B1OTGIg==" + "version": "5.0.4", + "resolved": "https://registry.npmjs.org/isbinaryfile/-/isbinaryfile-5.0.4.tgz", + "integrity": "sha512-YKBKVkKhty7s8rxddb40oOkuP0NbaeXrQvLin6QMHL7Ypiy2RW9LwOVrVgZRyOrhQlayMd9t+D8yDy8MKFTSDQ==" }, "isexe": { "version": "2.0.0", @@ -48239,6 +48733,11 @@ "resolved": "https://registry.npmjs.org/jsonpointer/-/jsonpointer-5.0.1.tgz", "integrity": "sha512-p/nXbhSEcu3pZRdkW1OfJhpsVtW1gd4Wa1fnQc9YLiTfAjn0312eMKimbdIQzuZl9aa9xUGaRlP9T/CJE/ditQ==" }, + "jsonrepair": { + "version": "3.12.0", + "resolved": "https://registry.npmjs.org/jsonrepair/-/jsonrepair-3.12.0.tgz", + "integrity": "sha512-SWfjz8SuQ0wZjwsxtSJ3Zy8vvLg6aO/kxcp9TWNPGwJKgTZVfhNEQBMk/vPOpYCDFWRxD6QWuI6IHR1t615f0w==" + }, "JSONStream": { "version": "1.3.5", "resolved": "https://registry.npmjs.org/JSONStream/-/JSONStream-1.3.5.tgz", @@ -50336,6 +50835,11 @@ "whatwg-fetch": "^3.6.20" } }, + "on-exit-leak-free": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/on-exit-leak-free/-/on-exit-leak-free-2.1.2.tgz", + "integrity": "sha512-0eJJY6hXLGf1udHwfNftBqH+g73EU4B504nZeKpz1sYRKafAghwxEJunB2O7rDZkL4PGfsMVnTXZ2EjibbqcsA==" + }, "on-finished": { "version": "2.4.1", "resolved": "https://registry.npmjs.org/on-finished/-/on-finished-2.4.1.tgz", @@ -50878,6 +51382,44 @@ "integrity": "sha512-C3FsVNH1udSEX48gGX1xfvwTWfsYWj5U+8/uK15BGzIGrKoUpghX8hWZwa/OFnakBiiVNmBvemTJR5mcy7iPcg==", "dev": true }, + "pino": { + "version": "9.7.0", + "resolved": "https://registry.npmjs.org/pino/-/pino-9.7.0.tgz", + "integrity": "sha512-vnMCM6xZTb1WDmLvtG2lE/2p+t9hDEIvTWJsu6FejkE62vB7gDhvzrpFR4Cw2to+9JNQxVnkAKVPA1KPB98vWg==", + "requires": { + "atomic-sleep": "^1.0.0", + "fast-redact": "^3.1.1", + "on-exit-leak-free": "^2.1.0", + "pino-abstract-transport": "^2.0.0", + "pino-std-serializers": "^7.0.0", + "process-warning": "^5.0.0", + "quick-format-unescaped": "^4.0.3", + "real-require": "^0.2.0", + "safe-stable-stringify": "^2.3.1", + "sonic-boom": "^4.0.1", + "thread-stream": "^3.0.0" + }, + "dependencies": { + "safe-stable-stringify": { + "version": "2.5.0", + "resolved": "https://registry.npmjs.org/safe-stable-stringify/-/safe-stable-stringify-2.5.0.tgz", + "integrity": "sha512-b3rppTKm9T+PsVCBEOUR46GWI7fdOs00VKZ1+9c1EWDaDMvjQc6tUwuFyIprgGgTcWoVHSKrU8H31ZHA2e0RHA==" + } + } + }, + "pino-abstract-transport": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/pino-abstract-transport/-/pino-abstract-transport-2.0.0.tgz", + "integrity": "sha512-F63x5tizV6WCh4R6RHyi2Ml+M70DNRXt/+HANowMflpgGFMAym/VKm6G7ZOQRjqN7XbGxK1Lg9t6ZrtzOaivMw==", + "requires": { + "split2": "^4.0.0" + } + }, + "pino-std-serializers": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/pino-std-serializers/-/pino-std-serializers-7.0.0.tgz", + "integrity": "sha512-e906FRY0+tV27iq4juKzSYPbUj2do2X2JX4EzSca1631EB2QJQUqGbDuERal7LCtOpxl6x3+nvo9NPZcmjkiFA==" + }, "pkce-challenge": { "version": "5.0.0", "resolved": "https://registry.npmjs.org/pkce-challenge/-/pkce-challenge-5.0.0.tgz", @@ -51041,6 +51583,11 @@ "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz", "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==" }, + "process-warning": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/process-warning/-/process-warning-5.0.0.tgz", + "integrity": "sha512-a39t9ApHNx2L4+HBnQKqxxHNs1r7KF+Intd8Q/g1bUh6q0WIp9voPXJ/x0j+ZL45KF1pJd9+q2jLIRMfvEshkA==" + }, "progress": { "version": "2.0.3", "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz", @@ -51313,6 +51860,11 @@ "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==" }, + "quick-format-unescaped": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/quick-format-unescaped/-/quick-format-unescaped-4.0.4.tgz", + "integrity": "sha512-tYC1Q1hgyRuHgloV/YXs2w15unPVh8qfu/qCTfhTYamaw7fyhumKa2yGpdSo87vY32rIclj+4fWYQXUMs9EHvg==" + }, "quick-lru": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/quick-lru/-/quick-lru-5.1.1.tgz", @@ -51537,6 +52089,11 @@ "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-4.0.2.tgz", "integrity": "sha512-yDMz9g+VaZkqBYS/ozoBJwaBhTbZo3UNYQHNRw1D3UFQB8oHB4uS/tAODO+ZLjGWmUbKnIlOWO+aaIiAxrUWHA==" }, + "real-require": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/real-require/-/real-require-0.2.0.tgz", + "integrity": "sha512-57frrGM/OCTLqLOAh0mhVA9VBMHd+9U7Zb2THMGdBUoZVOtGbJzjxsYGDJ3A9AYYCP4hn6y1TVbaOfzWtm5GFg==" + }, "recma-build-jsx": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/recma-build-jsx/-/recma-build-jsx-1.0.0.tgz", @@ -52615,6 +53172,14 @@ "socks": "^2.8.3" } }, + "sonic-boom": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/sonic-boom/-/sonic-boom-4.2.0.tgz", + "integrity": "sha512-INb7TM37/mAcsGmc9hyyI6+QR3rR1zVRu36B0NeGXKnOOLiZOfER5SA+N7X7k3yUYRzLWafduTDvJAfDswwEww==", + "requires": { + "atomic-sleep": "^1.0.0" + } + }, "source-map": { "version": "0.6.1", "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", @@ -52672,8 +53237,7 @@ "split2": { "version": "4.2.0", "resolved": "https://registry.npmjs.org/split2/-/split2-4.2.0.tgz", - "integrity": "sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==", - "dev": true + "integrity": "sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==" }, "sprintf-js": { "version": "1.0.3", @@ -52712,6 +53276,11 @@ "safe-buffer": "~5.1.0" } }, + "string-similarity": { + "version": "4.0.4", + "resolved": "https://registry.npmjs.org/string-similarity/-/string-similarity-4.0.4.tgz", + "integrity": "sha512-/q/8Q4Bl4ZKAPjj8WerIBJWALKkaPRfrvhfF8k/B23i4nzrlRj2/go1m90In7nG/3XDSbOo0+pu6RvCTM9RGMQ==" + }, "string-template": { "version": "0.2.1", "resolved": "https://registry.npmjs.org/string-template/-/string-template-0.2.1.tgz", @@ -53015,6 +53584,14 @@ "integrity": "sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw==", "dev": true }, + "thread-stream": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/thread-stream/-/thread-stream-3.1.0.tgz", + "integrity": "sha512-OqyPZ9u96VohAyMfJykzmivOrY2wfMSf3C5TtFJVgN+Hm6aj+voFhlK+kZEIv2FBh1X6Xp3DlnCOfEQ3B2J86A==", + "requires": { + "real-require": "^0.2.0" + } + }, "thriftrw": { "version": "3.11.4", "resolved": "https://registry.npmjs.org/thriftrw/-/thriftrw-3.11.4.tgz", @@ -53181,6 +53758,11 @@ } } }, + "ts-pattern": { + "version": "5.7.1", + "resolved": "https://registry.npmjs.org/ts-pattern/-/ts-pattern-5.7.1.tgz", + "integrity": "sha512-EGs8PguQqAAUIcQfK4E9xdXxB6s2GK4sJfT/vcc9V1ELIvC4LH/zXu2t/5fajtv6oiRCxdv7BgtVK3vWgROxag==" + }, "ts-poet": { "version": "6.11.0", "resolved": "https://registry.npmjs.org/ts-poet/-/ts-poet-6.11.0.tgz", @@ -53349,8 +53931,13 @@ "typescript": { "version": "5.5.3", "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.5.3.tgz", - "integrity": "sha512-/hreyEujaB0w76zKo6717l3L0o/qEUtRgdvUBvlkhoWeOVMjMuHNHk0BRBzikzuGDqNmPQbg5ifMEqsHLiIUcQ==", - "dev": true + "integrity": "sha512-/hreyEujaB0w76zKo6717l3L0o/qEUtRgdvUBvlkhoWeOVMjMuHNHk0BRBzikzuGDqNmPQbg5ifMEqsHLiIUcQ==" + }, + "uint8array-extras": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/uint8array-extras/-/uint8array-extras-1.4.0.tgz", + "integrity": "sha512-ZPtzy0hu4cZjv3z5NW9gfKnNLjoz4y6uv4HlelAjDK7sY/xOkKZv9xK/WQpcsBB3jEybChz9DPC2U/+cusjJVQ==", + "peer": true }, "unbox-primitive": { "version": "1.0.2", @@ -54170,9 +54757,9 @@ } }, "zod": { - "version": "3.24.2", - "resolved": "https://registry.npmjs.org/zod/-/zod-3.24.2.tgz", - "integrity": "sha512-lY7CDW43ECgW9u1TcT3IoXHflywfVqDYze4waEz812jR/bZ8FHDsl7pFQoSZTz5N+2NqRXs8GBwnAwo3ZNxqhQ==" + "version": "3.25.28", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.25.28.tgz", + "integrity": "sha512-/nt/67WYKnr5by3YS7LroZJbtcCBurDKKPBPWWzaxvVCGuG/NOsiKkrjoOhI8mJ+SQUXEbUzeB3S+6XDUEEj7Q==" }, "zod-to-json-schema": { "version": "3.24.4", diff --git a/package.json b/package.json index d3d23d96..6f9fc398 100644 --- a/package.json +++ b/package.json @@ -171,7 +171,7 @@ { "command": "hai.expertsButtonClicked", "title": "Experts", - "icon": "$(gist)", + "icon": "$(robot)", "category": "HAI Build" } ], @@ -366,8 +366,10 @@ "chalk": "^5.3.0", "esbuild": "^0.25.0", "eslint": "^8.57.0", + "got": "^14.4.5", "grpc-tools": "^1.13.0", "husky": "^9.1.7", + "keep-a-changelog": "^2.5.3", "mintlify": "^4.0.515", "npm-run-all": "^4.1.5", "prettier": "^3.3.3", @@ -378,8 +380,6 @@ "ts-node": "^10.9.2", "ts-proto": "^2.6.1", "tsconfig-paths": "^4.2.0", - "got": "^14.4.5", - "keep-a-changelog": "^2.5.3", "typescript": "^5.4.5" }, "dependencies": { @@ -387,17 +387,17 @@ "@anthropic-ai/sdk": "^0.37.0", "@anthropic-ai/vertex-sdk": "^0.6.4", "@aws-sdk/client-bedrock-runtime": "^3.758.0", + "@bufbuild/protobuf": "^2.2.5", + "@google-cloud/vertexai": "^1.9.3", + "@google/genai": "^0.13.0", + "@grpc/grpc-js": "^1.9.15", + "@grpc/reflection": "^1.0.4", "@langchain/aws": "^0.1.1", "@langchain/community": "^0.3.11", "@langchain/core": "^0.3.17", "@langchain/ollama": "^0.1.5", "@langchain/openai": "^0.3.12", "@langchain/textsplitters": "^0.1.0", - "@bufbuild/protobuf": "^2.2.5", - "@google-cloud/vertexai": "^1.9.3", - "@google/genai": "^0.13.0", - "@grpc/grpc-js": "^1.9.15", - "@grpc/reflection": "^1.0.4", "@mistralai/mistralai": "^1.5.0", "@modelcontextprotocol/sdk": "^1.11.1", "@opentelemetry/api": "^1.4.1", @@ -406,6 +406,8 @@ "@opentelemetry/sdk-node": "^0.39.1", "@opentelemetry/sdk-trace-node": "^1.30.1", "@opentelemetry/semantic-conventions": "^1.30.0", + "@presidio-dev/cor-matrix": "^0.0.2", + "@presidio-dev/hai-guardrails": "^1.10.1", "@sentry/browser": "^9.12.0", "@vscode/codicons": "^0.0.36", "archiver": "^7.0.1", @@ -417,6 +419,7 @@ "default-shell": "^2.2.0", "diff": "^5.2.0", "execa": "^9.5.2", + "faiss-node": "^0.5.1", "fast-deep-equal": "^3.1.3", "firebase": "^11.2.0", "fzf": "^0.5.2", @@ -429,7 +432,6 @@ "image-size": "^2.0.2", "isbinaryfile": "^5.0.2", "jschardet": "^3.1.4", - "faiss-node": "^0.5.1", "langfuse": "^3.37.1", "mammoth": "^1.8.0", "monaco-vscode-textmate-theme-converter": "^0.1.7", @@ -447,12 +449,12 @@ "serialize-error": "^11.0.3", "simple-git": "^3.27.0", "strip-ansi": "^7.1.0", + "tar": "^7.4.3", "tree-sitter-wasms": "^0.1.11", - "uuid": "^11.1.0", - "watcher": "^2.3.1", "ts-morph": "^25.0.1", "turndown": "^7.2.0", - "tar": "^7.4.3", + "uuid": "^11.1.0", + "watcher": "^2.3.1", "web-tree-sitter": "^0.22.6", "zod": "^3.24.2" } diff --git a/src/core/assistant-message/custom.ts b/src/core/assistant-message/custom.ts index 8dc7f0e9..da983305 100644 --- a/src/core/assistant-message/custom.ts +++ b/src/core/assistant-message/custom.ts @@ -1,3 +1,3 @@ -export const customToolUseNames = ["find_relevant_files", "code_security_scan"] +export const customToolUseNames = ["find_relevant_files", "code_security_scan", "custom_expert_context"] -export const customToolParamNames = ["task"] +export const customToolParamNames = ["task", "query", "expertName"] diff --git a/src/core/controller/index.ts b/src/core/controller/index.ts index 238ccb21..47c33926 100644 --- a/src/core/controller/index.ts +++ b/src/core/controller/index.ts @@ -75,7 +75,9 @@ import { deleteFromContextDirectory } from "@utils/delete-helper" import { isLocalMcp, getLocalMcpDetails, getLocalMcp, getAllLocalMcps } from "@utils/local-mcp-registry" import { getStarCount } from "../../services/github/github" import { openFile } from "@integrations/misc/open-file" +import { Guardrails, GuardrailsConfig } from "@integrations/guardrails" import { posthogClientProvider } from "@/services/posthog/PostHogClientProvider" +import { ExpertFileManager } from "../experts/ExpertFileManager" /* https://github.com/microsoft/vscode-webview-ui-toolkit-samples/blob/main/default/weather-webview/src/providers/WeatherViewProvider.ts @@ -104,8 +106,9 @@ export class Controller { private vsCodeWorkSpaceFolderFsPath!: string private codeIndexAbortController: AbortController private isSideBar: boolean - private expertManager: ExpertManager + private expertManager: ExpertManager | undefined private isCodeIndexInProgress: boolean = false + private guardrails: Guardrails constructor( readonly context: vscode.ExtensionContext, @@ -130,6 +133,7 @@ export class Controller { return apiConfiguration?.clineApiKey }, ) + this.guardrails = new Guardrails(this.context) // Clean up legacy checkpoints cleanupLegacyCheckpoints(this.context.globalStorageUri.fsPath, this.outputChannel).catch((error) => { @@ -139,7 +143,6 @@ export class Controller { // TAG:HAI this.codeIndexAbortController = new AbortController() this.workspaceId = getWorkspaceID() || "" - this.expertManager = new ExpertManager(this.context, this.workspaceId) this.isSideBar = isSideBar this.vsCodeWorkSpaceFolderFsPath = (getWorkspacePath() || "").trim() if (this.vsCodeWorkSpaceFolderFsPath) { @@ -155,6 +158,15 @@ export class Controller { this.disposables.push(registration) } + // TAG:HAI + private async getExpertManager(): Promise { + if (!this.expertManager) { + const { embeddingConfiguration } = await getAllExtensionState(this.context, this.workspaceId) + this.expertManager = new ExpertManager(this.context, this.workspaceId, embeddingConfiguration) + } + return this.expertManager + } + /* VSCode extensions use the disposable pattern to clean up resources when the sidebar/editor tab is closed by the user or system. This applies to event listening, commands, interacting with the UI, etc. - https://vscode-docs.readthedocs.io/en/stable/extensions/patterns-and-principles/ @@ -209,6 +221,8 @@ export class Controller { taskHistory, embeddingConfiguration, expertPrompt, + expertName, + isDeepCrawlEnabled, buildContextOptions, } = await getAllExtensionState(this.context, this.workspaceId) @@ -245,6 +259,9 @@ export class Controller { enableCheckpointsSetting ?? true, customInstructions, expertPrompt, + expertName, + isDeepCrawlEnabled, + buildContextOptions, task, images, historyItem, @@ -1707,7 +1724,7 @@ Commit message:` if (buildContextOptions.useIndex) { if (!isManualTrigger && (!buildIndexProgress || !buildIndexProgress.progress)) { const userConfirmation = await vscode.window.showWarningMessage( - "hAI performs best with a code index. Would you like to navigate to Settings to start indexing for this workspace?", + "HAI performs best with a code index. Would you like to navigate to Settings to start indexing for this workspace?", "Open Settings", "No", ) @@ -1992,6 +2009,7 @@ Commit message:` } async customWebViewMessageHandlers(message: WebviewMessage) { + const expertManager = await this.getExpertManager() switch (message.type) { case "requestOllamaEmbeddingModels": const ollamaEmbeddingModels = await this.getOllamaEmbeddingModels(message.text) @@ -2013,49 +2031,65 @@ Commit message:` break } break - case "expertPrompt": + case "selectExpert": const expertName = message.text || "" - if (message.category === "viewExpert") { - if (message.isDefault && message.prompt) { - try { - // Create a unique URI for this expert prompt - const encodedContent = Buffer.from(message.prompt).toString("base64") - const uri = vscode.Uri.parse(`${EXPERT_PROMPT_URI_SCHEME}:${expertName}.md?${encodedContent}`) + const expertPrompt = message.prompt || "" + const isDeepCrawlEnabled = !!message.isDeepCrawlEnabled + await customUpdateState(this.context, "expertPrompt", expertPrompt || undefined) + await customUpdateState(this.context, "expertName", expertName || undefined) + await customUpdateState(this.context, "isDeepCrawlEnabled", isDeepCrawlEnabled) + if (!isDeepCrawlEnabled) { + await this.updateExpertPrompt(message.prompt, expertName) + } + break + case "viewExpertPrompt": + const viewExpertName = message.text || "" - // Open the document - const document = await vscode.workspace.openTextDocument(uri) - await vscode.window.showTextDocument(document, { preview: false }) - } catch (error) { - console.error("Error creating or opening the virtual document:", error) - } - } else { - // For custom experts, use the existing path - const promptPath = await this.expertManager.getExpertPromptPath( - this.vsCodeWorkSpaceFolderFsPath, - expertName, - ) - if (promptPath) { - openFile(promptPath) - } else { - vscode.window.showErrorMessage(`Could not find prompt file for expert: ${expertName}`) - } + if (message.isDefault && message.prompt) { + try { + const encodedContent = Buffer.from(message.prompt).toString("base64") + const uri = vscode.Uri.parse(`${EXPERT_PROMPT_URI_SCHEME}:${viewExpertName}.md?${encodedContent}`) + const document = await vscode.workspace.openTextDocument(uri) + await vscode.window.showTextDocument(document, { preview: false }) + } catch (error) { + console.error("Error creating or opening the virtual document:", error) } } else { - await this.updateExpertPrompt(message.prompt, expertName) + const expertManager = await this.getExpertManager() + const promptPath = await expertManager.getExpertPromptPath(this.vsCodeWorkSpaceFolderFsPath, viewExpertName) + if (promptPath) { + openFile(promptPath) + } else { + vscode.window.showErrorMessage(`Could not find prompt file for expert: ${viewExpertName}`) + } } break case "saveExpert": if (message.text) { const expert = JSON.parse(message.text) as ExpertData - await this.expertManager.saveExpert(this.vsCodeWorkSpaceFolderFsPath, expert) + await expertManager.saveExpert(this.vsCodeWorkSpaceFolderFsPath, expert) await this.loadExperts() } break case "deleteExpert": if (message.text) { - const expertName = message.text - await this.expertManager.deleteExpert(this.vsCodeWorkSpaceFolderFsPath, expertName) + const expertToDelete = message.text + const { expertName } = await getAllExtensionState(this.context, this.workspaceId) + + // Delete the expert + const expertManager = await this.getExpertManager() + await expertManager.deleteExpert(this.vsCodeWorkSpaceFolderFsPath, expertToDelete) + + // Clear selected expert state if the deleted expert was selected + if (expertName === expertToDelete) { + await customUpdateState(this.context, "expertName", undefined) + await customUpdateState(this.context, "expertPrompt", undefined) + await customUpdateState(this.context, "isDeepCrawlEnabled", false) + } + + // Reload experts to update the UI await this.loadExperts() + await this.loadDefaultExperts() } break case "loadExperts": @@ -2066,18 +2100,14 @@ Commit message:` break case "refreshDocumentLink": if (message.text && message.expert) { - await this.expertManager.refreshDocumentLink(this.vsCodeWorkSpaceFolderFsPath, message.expert, message.text) + await expertManager.refreshDocumentLink(this.vsCodeWorkSpaceFolderFsPath, message.expert, message.text) } await this.loadExperts() break case "deleteDocumentLink": if (message.text && message.expert) { try { - await this.expertManager.deleteDocumentLink( - this.vsCodeWorkSpaceFolderFsPath, - message.expert, - message.text, - ) + await expertManager.deleteDocumentLink(this.vsCodeWorkSpaceFolderFsPath, message.expert, message.text) await this.loadExperts() } catch (error) { console.error(`Failed to delete document link for expert ${message.expert}:`, error) @@ -2088,7 +2118,7 @@ Commit message:` case "addDocumentLink": if (message.text && message.expert) { try { - await this.expertManager.addDocumentLink(this.vsCodeWorkSpaceFolderFsPath, message.expert, message.text) + await expertManager.addDocumentLink(this.vsCodeWorkSpaceFolderFsPath, message.expert, message.text) await this.loadExperts() } catch (error) { console.error(`Failed to add document link for expert ${message.expert}:`, error) @@ -2239,6 +2269,22 @@ Commit message:` } await this.postStateToWebview() break + case "loadGuards": + await this.loadGuards() + break + case "updateGuards": + if (message.guards && Array.isArray(message.guards)) { + const guardUpdates = message.guards.map((guard) => ({ + guardKey: guard.key as keyof GuardrailsConfig, + updates: { + ...(guard.threshold !== undefined && { threshold: guard.threshold }), + ...(guard.mode !== undefined && { mode: guard.mode }), + }, + })) + await this.guardrails.updateGuard(guardUpdates) + } + await this.loadGuards() + break } } @@ -2279,30 +2325,43 @@ Commit message:` } async loadExperts() { - const experts = await this.expertManager.readExperts(this.vsCodeWorkSpaceFolderFsPath) + const expertManager = await this.getExpertManager() + const { experts, selectedExpert } = await expertManager.readExperts(this.vsCodeWorkSpaceFolderFsPath) await this.postMessageToWebview({ type: "expertsUpdated", experts, + selectedExpert, }) } async loadDefaultExperts() { - const experts = await this.expertManager.loadDefaultExperts() + const expertManager = await this.getExpertManager() + const { experts, selectedExpert } = await expertManager.loadDefaultExperts() await this.postMessageToWebview({ type: "defaultExpertsLoaded", experts, + selectedExpert, + }) + } + + async loadGuards() { + const guards = this.guardrails.activeGuards + await this.postMessageToWebview({ + type: "defaultGuards", + guards, }) } private async getExpertDocumentsContent(expertName: string): Promise { - const expertPath = await this.expertManager.getExpertPromptPath(this.vsCodeWorkSpaceFolderFsPath, expertName) + const expertManager = await this.getExpertManager() + const expertPath = await expertManager.getExpertPromptPath(this.vsCodeWorkSpaceFolderFsPath, expertName) if (!expertPath) { return "" } - const docsDir = path.join(path.dirname(expertPath), ExpertManager.DOCS_DIR) - const statusFilePath = path.join(docsDir, ExpertManager.STATUS_FILE) + const docsDir = path.join(path.dirname(expertPath), ExpertFileManager.DOCS_DIR) + const statusFilePath = path.join(docsDir, ExpertFileManager.STATUS_FILE) if (!(await fileExistsAtPath(statusFilePath))) { return "" diff --git a/src/core/experts/DocumentProcessor.ts b/src/core/experts/DocumentProcessor.ts new file mode 100644 index 00000000..12292455 --- /dev/null +++ b/src/core/experts/DocumentProcessor.ts @@ -0,0 +1,307 @@ +import * as vscode from "vscode" +import { v4 as uuidv4 } from "uuid" +import fs from "fs" +import { DocumentLink, DocumentStatus } from "../../shared/experts" +import { CrawlResult, UrlContentFetcher } from "../../services/browser/UrlContentFetcher" +import { getAllExtensionState } from "../storage/state" +import { buildApiHandler } from "../../api" +import { HaiBuildDefaults } from "../../shared/haiDefaults" +import { ExpertFileManager } from "./ExpertFileManager" +import path from "path" + +/** + * Handles document processing operations + */ +export class DocumentProcessor { + private extensionContext: vscode.ExtensionContext + private workspaceId: string + private systemPrompt: string + private fileManager: ExpertFileManager + private urlContentFetcher: UrlContentFetcher + onCrawlComplete: + | (( + markdown: string, + expertName: string, + workspacePath: string, + url: string, + suburl: string, + title?: string, + ) => Promise) + | undefined = undefined + + /** + * Create a new DocumentProcessor + */ + constructor(extensionContext: vscode.ExtensionContext, workspaceId: string) { + this.extensionContext = extensionContext + this.workspaceId = workspaceId + this.systemPrompt = HaiBuildDefaults.defaultMarkDownSummarizer + this.fileManager = new ExpertFileManager() + this.urlContentFetcher = new UrlContentFetcher(this.extensionContext) + } + + /** + * Process document links for an expert + */ + public async processDocumentLinks( + expertName: string, + expertDir: string, + documentLinks: DocumentLink[], + workspacePath: string, + deepCrawl: boolean = false, + maxDepth: number = 10, + maxPages: number = 20, + crawlTimeout: number = 10_0000, + ): Promise { + // Simple dispatcher based on type + if (deepCrawl) { + await this.processDeepCrawlLinks(documentLinks, expertName, workspacePath, maxDepth, maxPages, crawlTimeout) + } else { + await this.processRegularLinks(expertName, expertDir, documentLinks, workspacePath) + } + } + + /** + * Process regular (non-deepcrawl) document links + */ + private async processRegularLinks( + expertName: string, + expertDir: string, + documentLinks: DocumentLink[], + workspacePath: string, + ): Promise { + if (!this.extensionContext) { + console.error("Extension context not available") + return + } + + const { docsDir, statusFilePath } = this.fileManager.getExpertPaths(workspacePath, expertName) + + try { + await this.urlContentFetcher.launchBrowser() + + // Read existing status data + let existingStatusData = await this.fileManager.readStatusFile(statusFilePath) + + for (const link of documentLinks) { + // Find or create entry in status data + let linkIndex = existingStatusData.findIndex((l) => l.url === link.url) + + const processingLink = { + ...link, + status: DocumentStatus.PROCESSING, + processedAt: new Date().toISOString(), + } + + if (linkIndex !== -1) { + existingStatusData[linkIndex] = processingLink + } else { + linkIndex = existingStatusData.push(processingLink) - 1 + } + + await this.fileManager.writeStatusFile(statusFilePath, existingStatusData) + + const updatedLink = await this.processSingleDocumentLink( + expertName, + docsDir, + processingLink, + this.urlContentFetcher, + ) + + existingStatusData[linkIndex] = updatedLink + + await this.fileManager.writeStatusFile(statusFilePath, existingStatusData) + } + } catch (error) { + console.error(`Error processing document links for expert ${expertName}:`, error) + } finally { + await this.urlContentFetcher.closeBrowser() + } + } + + /** + * Process deep crawl document links + */ + private async processDeepCrawlLinks( + documentLinks: DocumentLink[], + expertName: string, + workspacePath: string, + maxDepth: number, + maxPages: number, + crawlTimeout: number, + ): Promise { + // For deep crawl, we just need to crawl each URL + for (const link of documentLinks) { + await this.crawlAndConvertToMarkdown(link.url, expertName, workspacePath, maxDepth, maxPages, crawlTimeout) + } + } + + /** + * Process a single document link + */ + public async processSingleDocumentLink( + expertName: string, + docsDir: string, + link: DocumentLink, + urlContentFetcher: UrlContentFetcher, + ): Promise { + if (!link.filename) { + link.filename = `doc-${uuidv4()}.md` + } + + // Create a copy to avoid direct modification + const updatedLink = { + ...link, + status: DocumentStatus.PROCESSING, + processedAt: new Date().toISOString(), + } + + try { + // Fetch and convert document + const markdown = await this.fetchAndConvertDocument(link.url, urlContentFetcher) + + // Make sure docs directory exists + await this.fileManager.createExpertDocsDirectory(docsDir) + + // Save to file - ensure filename is always a string + const docFilePath = this.fileManager.getDocumentFilePath(docsDir, updatedLink.filename || `doc-${uuidv4()}.md`) + await this.fileManager.writeExpertPrompt(docFilePath, markdown) + + // Update status + updatedLink.status = DocumentStatus.COMPLETED + updatedLink.processedAt = new Date().toISOString() + updatedLink.error = null + + console.log(`Successfully processed document link for expert ${expertName}: ${link.url}`) + } catch (error) { + // Update with error status + updatedLink.status = DocumentStatus.FAILED + updatedLink.processedAt = new Date().toISOString() + updatedLink.error = error instanceof Error ? error.message : String(error) + + console.error(`Failed to process document link for expert ${expertName}:`, error) + } + + return updatedLink + } + + /** + * Fetch and convert a document to summarized markdown + */ + private async fetchAndConvertDocument(url: string, urlContentFetcher: UrlContentFetcher): Promise { + const markdown = await urlContentFetcher.urlToMarkdown(url) + return this.summarizeMarkdownContent(markdown) + } + + /** + * Summarize markdown content + */ + public async summarizeMarkdownContent(markdownContent: string): Promise { + let content = "" + const { apiConfiguration } = await getAllExtensionState(this.extensionContext, this.workspaceId) + const llmApi = buildApiHandler(apiConfiguration) + + const apiStream = llmApi.createMessage(this.systemPrompt, [ + { + role: "user", + content: markdownContent, + }, + ]) + + const iterator = apiStream[Symbol.asyncIterator]() + for await (const chunk of iterator) { + if (chunk && chunk.type === "text" && chunk.text) { + content += chunk.text + } + } + + return content + } + + /** + * Crawl a website and convert to markdown + */ + /** + * Crawl a website and convert to markdown + */ + public async crawlAndConvertToMarkdown( + url: string, + expertName: string, + workspacePath: string, + maxDepth: number = 10, + maxPages: number = 20, + crawlTimeout: number = 10_0000, + ): Promise { + const { faissStatusFilePath } = this.fileManager.getExpertPaths(workspacePath, expertName) + + // Initialize or update status file in .faiss directory + let faissStatusData = await this.fileManager.readStatusFile(faissStatusFilePath) + + // Find or create entry for this URL + let linkIndex = faissStatusData.findIndex((link) => link.url === url) + if (linkIndex !== -1) { + faissStatusData[linkIndex].status = DocumentStatus.PROCESSING + faissStatusData[linkIndex].processedAt = new Date().toISOString() + } else { + linkIndex = + faissStatusData.push({ + url, + status: DocumentStatus.PROCESSING, + processedAt: new Date().toISOString(), + error: null, + }) - 1 + } + + // Update status file before crawling + await this.fileManager.updateFaissStatusFile(faissStatusFilePath, faissStatusData) + + // Store reference to this instance for use in the crawler + const self = this + + try { + await this.urlContentFetcher.launchBrowser() + + await this.urlContentFetcher.deepCrawl(url, { + maxDepth, + maxPages, + timeout: crawlTimeout, + onPageCrawlComplete: async (data: CrawlResult) => { + // Forward to vector store manager using the instance reference + if (self.onCrawlComplete) { + await self.onCrawlComplete(data.content, expertName, workspacePath, data.url, data.parentUrl || "") + } + }, + }) + + // Update status to COMPLETED after successful crawl + faissStatusData[linkIndex].status = DocumentStatus.COMPLETED + faissStatusData[linkIndex].processedAt = new Date().toISOString() + faissStatusData[linkIndex].error = null + + await this.fileManager.updateFaissStatusFile(faissStatusFilePath, faissStatusData) + } catch (error) { + // Update status to FAILED + console.error(`Error in crawling ${url} for expert ${expertName}:`, error) + + faissStatusData[linkIndex].status = DocumentStatus.FAILED + faissStatusData[linkIndex].processedAt = new Date().toISOString() + faissStatusData[linkIndex].error = error instanceof Error ? error.message : String(error) + + await this.fileManager.updateFaissStatusFile(faissStatusFilePath, faissStatusData) + } finally { + await this.urlContentFetcher.closeBrowser() + } + } + /** + * Create a new document link + */ + public createDocumentLink(url: string): DocumentLink { + return { + url, + status: DocumentStatus.PENDING, + filename: `doc-${uuidv4()}.md`, + processedAt: new Date().toISOString(), + error: null, + } + } +} diff --git a/src/core/experts/ExpertFileManager.ts b/src/core/experts/ExpertFileManager.ts new file mode 100644 index 00000000..7ffa9b19 --- /dev/null +++ b/src/core/experts/ExpertFileManager.ts @@ -0,0 +1,230 @@ +import fs from "fs/promises" +import * as path from "path" +import { existsSync } from "fs" +import { fileExistsAtPath, createDirectoriesForFile } from "../../utils/fs" +import { DocumentLink, DocumentStatus, ExpertData, ExpertDataSchema } from "../../shared/experts" +import { GlobalFileNames } from "../../global-constants" +import { ExpertPaths, ExpertMetadata } from "./types" + +/** + * Manages file operations for experts + */ +export class ExpertFileManager { + // File and directory constants + public static readonly METADATA_FILE = "metadata.json" + public static readonly PROMPT_FILE = "prompt.md" + public static readonly ICON = "icon.svg" + public static readonly DOCS_DIR = "docs" + public static readonly STATUS_FILE = "status.json" + public static readonly PLACEHOLDER_FILE = "placeholder.txt" + public static readonly FAISS = ".faiss" + public static readonly CRAWLEE_STORAGE = ".crawlee" + + /** + * Format expert names to be file system friendly + */ + public formatExpertName(name: string): string { + return name.replace(/[^a-zA-Z0-9_-]/g, "_").toLowerCase() + } + + /** + * Get expert directory paths + */ + public getExpertPaths(workspacePath: string, expertName: string): ExpertPaths { + const sanitizedName = this.formatExpertName(expertName) + const expertDir = path.join(workspacePath, GlobalFileNames.experts, sanitizedName) + const docsDir = path.join(expertDir, ExpertFileManager.DOCS_DIR) + const statusFilePath = path.join(docsDir, ExpertFileManager.STATUS_FILE) + const metadataFilePath = path.join(expertDir, ExpertFileManager.METADATA_FILE) + const faissFilePath = path.join(expertDir, ExpertFileManager.FAISS) + const faissStatusFilePath = path.join(faissFilePath, ExpertFileManager.STATUS_FILE) + const crawlStorage = path.join(expertDir, ExpertFileManager.CRAWLEE_STORAGE) + + return { + sanitizedName, + expertDir, + docsDir, + statusFilePath, + metadataFilePath, + faissFilePath, + faissStatusFilePath, + crawlStorage, + } + } + + /** + * Read status file + */ + public async readStatusFile(filePath: string): Promise { + try { + // First check if the file exists + if (await fileExistsAtPath(filePath)) { + try { + // Read and parse the content + const content = await fs.readFile(filePath, "utf-8") + const parsed = JSON.parse(content) + + // Validate that it's an array + if (Array.isArray(parsed)) { + // Ensure each item has a valid status value + return parsed.map((item) => { + // Ensure status is one of the valid enum values + if ( + item.status && + item.status !== DocumentStatus.PENDING && + item.status !== DocumentStatus.PROCESSING && + item.status !== DocumentStatus.COMPLETED && + item.status !== DocumentStatus.FAILED + ) { + item.status = DocumentStatus.PENDING + } + return item + }) + } + + // If not an array, return empty + console.error(`Status file content is not an array: ${filePath}`) + return [] + } catch (parseError) { + console.error(`Failed to parse status file at ${filePath}:`, parseError) + return [] + } + } + // If the file doesn't exist, return an empty array + return [] + } catch (error) { + console.error(`Failed to check or read status file at ${filePath}:`, error) + return [] + } + } + + /** + * Write status file + */ + public async writeStatusFile(filePath: string, data: DocumentLink[]): Promise { + try { + await createDirectoriesForFile(filePath) + await fs.writeFile(filePath, JSON.stringify(data, null, 2)) + } catch (error) { + console.error(`Failed to write status file at ${filePath}:`, error) + } + } + + /** + * Update faiss status file + */ + public async updateFaissStatusFile(faissStatusFilePath: string, links: DocumentLink[]): Promise { + try { + await createDirectoriesForFile(faissStatusFilePath) + await fs.writeFile(faissStatusFilePath, JSON.stringify(links, null, 2)) + } catch (error) { + console.error(`Failed to update faiss status file at ${faissStatusFilePath}:`, error) + } + } + + /** + * Read expert metadata + */ + public async readExpertMetadata(metadataPath: string): Promise { + try { + if (await fileExistsAtPath(metadataPath)) { + const content = await fs.readFile(metadataPath, "utf-8") + return JSON.parse(content) + } + return null + } catch (error) { + console.error(`Failed to read metadata at ${metadataPath}:`, error) + return null + } + } + + /** + * Write expert metadata + */ + public async writeExpertMetadata(metadataPath: string, metadata: ExpertMetadata): Promise { + try { + await createDirectoriesForFile(metadataPath) + await fs.writeFile(metadataPath, JSON.stringify(metadata, null, 2)) + } catch (error) { + console.error(`Failed to write metadata at ${metadataPath}:`, error) + } + } + + /** + * Read expert prompt + */ + public async readExpertPrompt(promptPath: string): Promise { + try { + if (await fileExistsAtPath(promptPath)) { + return await fs.readFile(promptPath, "utf-8") + } + return null + } catch (error) { + console.error(`Failed to read prompt at ${promptPath}:`, error) + return null + } + } + + /** + * Write expert prompt + */ + public async writeExpertPrompt(promptPath: string, prompt: string): Promise { + try { + await createDirectoriesForFile(promptPath) + await fs.writeFile(promptPath, prompt) + } catch (error) { + console.error(`Failed to write prompt at ${promptPath}:`, error) + } + } + + /** + * Check if expert exists + */ + public async expertExists(workspacePath: string, expertName: string): Promise { + const { expertDir } = this.getExpertPaths(workspacePath, expertName) + return await fileExistsAtPath(expertDir) + } + + /** + * Get document file path + */ + public getDocumentFilePath(docsDir: string, filename: string): string { + return path.join(docsDir, filename) + } + + /** + * Create expert directory structure + */ + public async createExpertDirectoryStructure(expertDir: string): Promise { + await createDirectoriesForFile(path.join(expertDir, ExpertFileManager.PLACEHOLDER_FILE)) + } + + /** + * Create expert docs directory + */ + public async createExpertDocsDirectory(docsDir: string): Promise { + await createDirectoriesForFile(path.join(docsDir, ExpertFileManager.PLACEHOLDER_FILE)) + } + + /** + * Delete file if exists + */ + public async deleteFileIfExists(filePath: string): Promise { + if (await fileExistsAtPath(filePath)) { + await fs.unlink(filePath) + } + } + + /** + * Delete expert directory + */ + public async deleteExpertDirectory(expertDir: string): Promise { + try { + if (await fileExistsAtPath(expertDir)) { + await fs.rm(expertDir, { recursive: true, force: true }) + } + } catch (error) { + console.error(`Failed to delete expert directory ${expertDir}:`, error) + } + } +} diff --git a/src/core/experts/ExpertManager.ts b/src/core/experts/ExpertManager.ts index eb8526fe..06964aa2 100644 --- a/src/core/experts/ExpertManager.ts +++ b/src/core/experts/ExpertManager.ts @@ -1,50 +1,46 @@ -import fs from "fs/promises" -import * as path from "path" import * as vscode from "vscode" +import * as path from "path" +import fs from "fs/promises" import { v4 as uuidv4 } from "uuid" import { DocumentLink, ExpertData, ExpertDataSchema, DocumentStatus } from "../../shared/experts" -import { fileExistsAtPath, createDirectoriesForFile } from "../../utils/fs" import { GlobalFileNames } from "../../global-constants" +import { EmbeddingConfiguration } from "@/shared/embeddings" +import { fileExistsAtPath } from "../../utils/fs" import { UrlContentFetcher } from "../../services/browser/UrlContentFetcher" +import { ExpertFileManager } from "./ExpertFileManager" +import { DocumentProcessor } from "./DocumentProcessor" +import { VectorStoreManager } from "./VectorStoreManager" import { getAllExtensionState } from "../storage/state" -import { buildApiHandler } from "../../api" -import { HaiBuildDefaults } from "../../shared/haiDefaults" +/** + * Manages experts, coordinating between file operations, document processing, + * and vector store management + */ export class ExpertManager { private extensionContext: vscode.ExtensionContext private workspaceId: string - private systemPrompt: string - - public static readonly METADATA_FILE = "metadata.json" - public static readonly PROMPT_FILE = "prompt.md" - public static readonly ICON = "icon.svg" - public static readonly DOCS_DIR = "docs" - public static readonly STATUS_FILE = "status.json" - public static readonly PLACEHOLDER_FILE = "placeholder.txt" - - constructor(extensionContext: vscode.ExtensionContext, workspaceId: string) { - this.extensionContext = extensionContext - this.workspaceId = workspaceId - this.systemPrompt = HaiBuildDefaults.defaultMarkDownSummarizer - } - - /** - * Utility function to format expert names - */ - private formatExpertName(name: string): string { - return name.replace(/[^a-zA-Z0-9_-]/g, "_").toLowerCase() - } + private fileManager: ExpertFileManager + private documentProcessor: DocumentProcessor + private vectorStoreManager: VectorStoreManager /** - * Helper to get expert directory paths + * Create a new ExpertManager */ - private getExpertPaths(workspacePath: string, expertName: string) { - const sanitizedName = this.formatExpertName(expertName) - const expertDir = path.join(workspacePath, GlobalFileNames.experts, sanitizedName) - const docsDir = path.join(expertDir, ExpertManager.DOCS_DIR) - const statusFilePath = path.join(docsDir, ExpertManager.STATUS_FILE) - const metadataFilePath = path.join(expertDir, ExpertManager.METADATA_FILE) - return { sanitizedName, expertDir, docsDir, statusFilePath, metadataFilePath } + constructor(extensionContext: vscode.ExtensionContext, workspaceId: string, embeddingConfig: EmbeddingConfiguration) { + this.extensionContext = extensionContext + this.workspaceId = workspaceId + this.fileManager = new ExpertFileManager() + this.documentProcessor = new DocumentProcessor(extensionContext, workspaceId) + + // Initialize embedding client and vector store manager + const embeddings = VectorStoreManager.initializeEmbeddings(embeddingConfig) + this.vectorStoreManager = new VectorStoreManager({ + embeddings, + embeddingConfig, + workspaceId, + }) + + this.connectProcessorToVectorStore() } /** @@ -55,135 +51,139 @@ export class ExpertManager { throw new Error("No workspace path provided") } + // Validate expert data const validationResult = ExpertDataSchema.safeParse(expert) if (!validationResult.success) { throw new Error(`Invalid expert data: ${validationResult.error.message}`) } const parsedExpert = validationResult.data + const isDeepCrawl = parsedExpert.deepCrawl || false + + const { expertDir, docsDir, statusFilePath, faissStatusFilePath, metadataFilePath } = this.fileManager.getExpertPaths( + workspacePath, + parsedExpert.name, + ) - const { expertDir, docsDir, statusFilePath, metadataFilePath } = this.getExpertPaths(workspacePath, parsedExpert.name) - await createDirectoriesForFile(path.join(expertDir, ExpertManager.PLACEHOLDER_FILE)) + // Create expert directory structure + await this.fileManager.createExpertDirectoryStructure(expertDir) + // Prepare and write metadata const metadata = { name: parsedExpert.name, isDefault: parsedExpert.isDefault, createdAt: parsedExpert.createdAt || new Date().toISOString(), documentLinks: parsedExpert.documentLinks || [], + deepCrawl: isDeepCrawl, + maxDepth: parsedExpert.maxDepth || 10, + maxPages: parsedExpert.maxPages || 20, + crawlTimeout: parsedExpert.crawlTimeout || 10_0000, } - await fs.writeFile(metadataFilePath, JSON.stringify(metadata, null, 2)) + await this.fileManager.writeExpertMetadata(metadataFilePath, metadata) - const promptFilePath = path.join(expertDir, ExpertManager.PROMPT_FILE) - await fs.writeFile(promptFilePath, parsedExpert.prompt) + // Write prompt file + const promptFilePath = `${expertDir}/${ExpertFileManager.PROMPT_FILE}` + await this.fileManager.writeExpertPrompt(promptFilePath, parsedExpert.prompt) + // Process document links if any if (parsedExpert.documentLinks && parsedExpert.documentLinks.length > 0) { - const docsDir = path.join(expertDir, ExpertManager.DOCS_DIR) - await createDirectoriesForFile(path.join(docsDir, ExpertManager.PLACEHOLDER_FILE)) - // Set initial status to "pending" const statusData = parsedExpert.documentLinks.map((link) => ({ ...link, - filename: `doc-${uuidv4()}.md`, + ...(isDeepCrawl ? {} : { filename: `doc-${uuidv4()}.md` }), status: DocumentStatus.PENDING, processedAt: new Date().toISOString(), error: null, })) - const statusFilePath = path.join(docsDir, ExpertManager.STATUS_FILE) - await fs.writeFile(statusFilePath, JSON.stringify(statusData, null, 2)) + if (isDeepCrawl) { + // For deepcrawl, store status in the .faiss directory + await this.fileManager.updateFaissStatusFile(faissStatusFilePath, statusData) + } else { + // For regular docs, store status in the docs directory + await this.fileManager.createExpertDocsDirectory(docsDir) + await this.fileManager.writeStatusFile(statusFilePath, statusData) + } - // Process document links (pass workspacePath) - this.processDocumentLinks(parsedExpert.name, expertDir, statusData, workspacePath) + // Process document links + await this.documentProcessor.processDocumentLinks( + parsedExpert.name, + expertDir, + statusData, + workspacePath, + isDeepCrawl, + parsedExpert.maxDepth, + parsedExpert.maxPages, + parsedExpert.crawlTimeout, + ) } } /** - * Process document links for an expert + * Connect document processor to vector store + * This is needed to handle crawled content */ - private async processDocumentLinks( - expertName: string, - expertDir: string, - documentLinks: DocumentLink[], - workspacePath: string, - ): Promise { - const { docsDir, statusFilePath } = this.getExpertPaths(workspacePath, expertName) - - if (!this.extensionContext) { - console.error("Extension context not available") - return - } - - const urlContentFetcher = new UrlContentFetcher(this.extensionContext) - try { - await urlContentFetcher.launchBrowser() - - // Read the existing status data - let existingStatusData: DocumentLink[] = [] - if (await fileExistsAtPath(statusFilePath)) { - try { - const fileContent = await fs.readFile(statusFilePath, "utf-8") - existingStatusData = JSON.parse(fileContent) - } catch (error) { - console.error("Failed to read existing status data:", error) - } - } - - for (const link of documentLinks) { - // Find the existing link in the status data - const existingLinkIndex = existingStatusData.findIndex((l) => l.url === link.url) - - // Update to processing before extraction - link.status = DocumentStatus.PROCESSING - link.processedAt = new Date().toISOString() - - if (existingLinkIndex !== -1) { - // Update the existing link - existingStatusData[existingLinkIndex] = link - } else { - // Add the new link - existingStatusData.push(link) - } - - await fs.writeFile(statusFilePath, JSON.stringify(existingStatusData, null, 2)) - - try { - const markdown = await urlContentFetcher.urlToMarkdown(link.url) - const summarizedMarkDown = await this.summarizeMarDownContent(markdown) - const docFilePath = path.join(docsDir, link.filename || "") - await fs.writeFile(docFilePath, summarizedMarkDown) - - link.status = DocumentStatus.COMPLETED - link.processedAt = new Date().toISOString() - link.error = null - } catch (error) { - link.status = DocumentStatus.FAILED - link.processedAt = new Date().toISOString() - link.error = error instanceof Error ? error.message : String(error) - console.error(`Failed to process document link for expert ${expertName}:`, error) - } - - // Update the status file after processing - if (existingLinkIndex !== -1) { - existingStatusData[existingLinkIndex] = link - } else { - existingStatusData.push(link) - } - await fs.writeFile(statusFilePath, JSON.stringify(existingStatusData, null, 2)) - } - } catch (error) { - console.error(`Error processing document links for expert ${expertName}:`, error) - } finally { - await urlContentFetcher.closeBrowser() + async connectProcessorToVectorStore(): Promise { + // Add the onCrawlComplete handler to pass data from DocumentProcessor to VectorStoreManager + // This injects the handleCrawledContent method from the DocumentProcessor class + this.documentProcessor.onCrawlComplete = async ( + markdown: string, + expertName: string, + workspacePath: string, + url: string, + suburl: string, + title?: string, + ): Promise => { + await this.vectorStoreManager.chunkAndStore({ + markdown, + expertName, + workspacePath, + url, + suburl, + title, + }) } } /** - * Refresh (or edit) a single document link for an expert. + * Refresh a document link */ async refreshDocumentLink(workspacePath: string, expertName: string, linkUrl: string): Promise { - const { expertDir, docsDir, statusFilePath } = this.getExpertPaths(workspacePath, expertName) + // Read metadata to determine if this is a deepcrawl expert + const { docsDir, statusFilePath, faissStatusFilePath, metadataFilePath } = this.fileManager.getExpertPaths( + workspacePath, + expertName, + ) + + // Get deepcrawl setting from metadata + const metadata = await this.fileManager.readExpertMetadata(metadataFilePath) + if (!metadata) { + throw new Error(`Expert metadata not found for ${expertName}`) + } + + const isDeepCrawl = metadata.deepCrawl || false + + if (isDeepCrawl) { + // For deepcrawl experts, delete chunks and re-crawl + await this.vectorStoreManager.deleteChunk(linkUrl, expertName, workspacePath) + await this.documentProcessor.crawlAndConvertToMarkdown( + linkUrl, + expertName, + workspacePath, + metadata.maxDepth || 10, + metadata.maxPages || 20, + metadata.crawlTimeout || 10_0000, + ) + return + } + + // For regular experts, read the status file + const statusData = await this.fileManager.readStatusFile(statusFilePath) + if (statusData.length === 0) { + console.error("Status file not found or empty for expert:", expertName) + return + } - let statusData: DocumentLink[] = JSON.parse(await fs.readFile(statusFilePath, "utf-8")) const index = statusData.findIndex((link) => link.url === linkUrl) if (index === -1) { return @@ -192,208 +192,269 @@ export class ExpertManager { // Update status to processing statusData[index].status = DocumentStatus.PROCESSING statusData[index].processedAt = new Date().toISOString() - await fs.writeFile(statusFilePath, JSON.stringify(statusData, null, 2)) + await this.fileManager.writeStatusFile(statusFilePath, statusData) if (!this.extensionContext) { console.error("Extension context not available") return } + + // Create URL content fetcher and process the link const urlContentFetcher = new UrlContentFetcher(this.extensionContext) - await urlContentFetcher.launchBrowser() try { - const markdown = await urlContentFetcher.urlToMarkdown(linkUrl) - const summarizedMarkDown = await this.summarizeMarDownContent(markdown) - const docFilePath = path.join(docsDir, statusData[index].filename || "") - await fs.writeFile(docFilePath, summarizedMarkDown) + await urlContentFetcher.launchBrowser() - statusData[index].status = DocumentStatus.COMPLETED - statusData[index].processedAt = new Date().toISOString() - statusData[index].error = null - await fs.writeFile(statusFilePath, JSON.stringify(statusData, null, 2)) + // Process the document link + const updatedLink = await this.documentProcessor.processSingleDocumentLink( + expertName, + docsDir, + statusData[index], + urlContentFetcher, + ) + + // Update the status file with the result + statusData[index] = updatedLink + await this.fileManager.writeStatusFile(statusFilePath, statusData) } catch (error) { + // Handle any errors during processing statusData[index].status = DocumentStatus.FAILED statusData[index].processedAt = new Date().toISOString() statusData[index].error = error instanceof Error ? error.message : String(error) - await fs.writeFile(statusFilePath, JSON.stringify(statusData, null, 2)) - console.error(`Failed to refresh document link for expert ${expertName}:`, error) + await this.fileManager.writeStatusFile(statusFilePath, statusData) } finally { await urlContentFetcher.closeBrowser() } } /** - * Add document link + * Add a document link to an expert */ - async addDocumentLink(workspacePath: string, expertName: string, linkUrl: string): Promise { - const { expertDir, statusFilePath, metadataFilePath } = this.getExpertPaths(workspacePath, expertName) - - // Ensure the docs directory exists - await createDirectoriesForFile(statusFilePath) - - // Read or initialize the status file - let statusData: DocumentLink[] = [] - if (await fileExistsAtPath(statusFilePath)) { - statusData = JSON.parse(await fs.readFile(statusFilePath, "utf-8")) + const { expertDir, statusFilePath, faissStatusFilePath, metadataFilePath } = this.fileManager.getExpertPaths( + workspacePath, + expertName, + ) + + // Read metadata to determine if this is a deepcrawl expert + const metadata = await this.fileManager.readExpertMetadata(metadataFilePath) + if (!metadata) { + throw new Error(`Expert metadata not found for ${expertName}`) } - // Check if the maximum number of document links is reached - if (statusData.length >= 3) { - vscode.window.showWarningMessage("Maximum of 3 document links allowed. Additional links cannot be added.") - return - } + const isDeepCrawl = metadata.deepCrawl || false + const newLink = this.documentProcessor.createDocumentLink(linkUrl) - // Add the new document link - const newLink: DocumentLink = { - url: linkUrl, - status: DocumentStatus.PENDING, - filename: `doc-${uuidv4()}.md`, - processedAt: new Date().toISOString(), - error: null, - } + if (isDeepCrawl) { + // For deepcrawl experts, use faiss status file + let faissStatusData = await this.fileManager.readStatusFile(faissStatusFilePath) - statusData.push(newLink) - await fs.writeFile(statusFilePath, JSON.stringify(statusData, null, 2)) + // Check max links + if (faissStatusData.length >= 3) { + vscode.window.showWarningMessage("Maximum of 3 document links allowed. Additional links cannot be added.") + return + } - // Update metadata.json with the new document link - const metadata = JSON.parse(await fs.readFile(metadataFilePath, "utf-8")) - metadata.documentLinks = statusData.map((link) => ({ url: link.url })) - await fs.writeFile(metadataFilePath, JSON.stringify(metadata, null, 2)) + // Add to faiss status.json + faissStatusData.push(newLink) + await this.fileManager.updateFaissStatusFile(faissStatusFilePath, faissStatusData) + + // Update metadata.json + metadata.documentLinks = faissStatusData.map((link) => ({ url: link.url })) + await this.fileManager.writeExpertMetadata(metadataFilePath, metadata) + + // Process the document with deep crawling + await this.documentProcessor.crawlAndConvertToMarkdown( + linkUrl, + expertName, + workspacePath, + metadata.maxDepth || 10, + metadata.maxPages || 20, + metadata.crawlTimeout || 10_0000, + ) + } else { + // For regular experts, use docs status file + let statusData = await this.fileManager.readStatusFile(statusFilePath) + + // Check if the maximum number of document links is reached + if (statusData.length >= 3) { + vscode.window.showWarningMessage("Maximum of 3 document links allowed. Additional links cannot be added.") + return + } - // Process the newly added document link - await this.processDocumentLinks(expertName, expertDir, [newLink], workspacePath) + // Add to status.json + statusData.push(newLink) + await this.fileManager.writeStatusFile(statusFilePath, statusData) + + // Update metadata.json with the new document link + metadata.documentLinks = statusData.map((link) => ({ url: link.url })) + await this.fileManager.writeExpertMetadata(metadataFilePath, metadata) + + // Process the newly added document link + await this.documentProcessor.processDocumentLinks( + expertName, + expertDir, + [newLink], + workspacePath, + isDeepCrawl, + metadata.maxDepth, + metadata.maxPages, + metadata.crawlTimeout, + ) + } } /** - * Delete a document link for a custom expert + * Delete a document link from an expert */ async deleteDocumentLink(workspacePath: string, expertName: string, linkUrl: string): Promise { - const { docsDir, statusFilePath, metadataFilePath } = this.getExpertPaths(workspacePath, expertName) - - if (!(await fileExistsAtPath(statusFilePath))) { - throw new Error("Status file not found") + const { docsDir, statusFilePath, faissStatusFilePath, metadataFilePath } = this.fileManager.getExpertPaths( + workspacePath, + expertName, + ) + + // Read metadata to determine if this was a deepcrawl link + const metadata = await this.fileManager.readExpertMetadata(metadataFilePath) + if (!metadata) { + throw new Error(`Expert metadata not found for ${expertName}`) } - const statusData: DocumentLink[] = JSON.parse(await fs.readFile(statusFilePath, "utf-8")) - const updatedStatusData = statusData.filter((link) => link.url !== linkUrl) + const isDeepCrawl = metadata.deepCrawl || false - await fs.writeFile(statusFilePath, JSON.stringify(updatedStatusData, null, 2)) + // Delete from vector DB regardless (since the URL might have been crawled before) + await this.vectorStoreManager.deleteChunk(linkUrl, expertName, workspacePath) - // Update metadata.json after deleting the document link - const metadata = JSON.parse(await fs.readFile(metadataFilePath, "utf-8")) - metadata.documentLinks = updatedStatusData.map((link) => ({ url: link.url })) - await fs.writeFile(metadataFilePath, JSON.stringify(metadata, null, 2)) - - // Optionally delete the associated file if it exists - const linkToDelete = statusData.find((link) => link.url === linkUrl) - if (linkToDelete?.filename) { - const filePath = path.join(docsDir, linkToDelete.filename) - if (await fileExistsAtPath(filePath)) { - await fs.unlink(filePath) + // Update regular status.json if it exists + if (await this.fileManager.readStatusFile(statusFilePath)) { + const statusData = await this.fileManager.readStatusFile(statusFilePath) + const updatedStatusData = statusData.filter((link) => link.url !== linkUrl) + await this.fileManager.writeStatusFile(statusFilePath, updatedStatusData) + + // Delete the associated file if it exists + const linkToDelete = statusData.find((link) => link.url === linkUrl) + if (linkToDelete?.filename) { + const filePath = this.fileManager.getDocumentFilePath(docsDir, linkToDelete.filename) + await this.fileManager.deleteFileIfExists(filePath) } } + + // Update faiss status.json if it exists for deepcrawl experts + if (isDeepCrawl) { + const faissStatusData = await this.fileManager.readStatusFile(faissStatusFilePath) + const updatedFaissStatusData = faissStatusData.filter((link) => link.url !== linkUrl) + await this.fileManager.updateFaissStatusFile(faissStatusFilePath, updatedFaissStatusData) + } + + // Update metadata.json after deleting the document link + metadata.documentLinks = metadata.documentLinks.filter((link: any) => link.url !== linkUrl) + await this.fileManager.writeExpertMetadata(metadataFilePath, metadata) } /** * Read all experts from the .hai-experts directory */ - async readExperts(workspacePath: string): Promise { + async readExperts(workspacePath: string): Promise<{ experts: ExpertData[]; selectedExpert: ExpertData | null }> { if (!workspacePath) { - return [] + return { experts: [], selectedExpert: null } } - const expertsDir = path.join(workspacePath, GlobalFileNames.experts) - if (!(await fileExistsAtPath(expertsDir))) { - return [] - } + const { expertName } = await getAllExtensionState(this.extensionContext, this.workspaceId) + const expertsDir = `${workspacePath}/${GlobalFileNames.experts}` try { const expertFolders = await fs.readdir(expertsDir) const experts: ExpertData[] = [] + let selectedExpert: ExpertData | null = null for (const folder of expertFolders) { - const expertDir = path.join(expertsDir, folder) + const expertDir = `${expertsDir}/${folder}` const stats = await fs.stat(expertDir) if (!stats.isDirectory()) { continue } try { - const metadataPath = path.join(expertDir, ExpertManager.METADATA_FILE) - const promptPath = path.join(expertDir, ExpertManager.PROMPT_FILE) - const docsDir = path.join(expertDir, ExpertManager.DOCS_DIR) - const statusFilePath = path.join(docsDir, ExpertManager.STATUS_FILE) + const { docsDir, statusFilePath, faissStatusFilePath, metadataFilePath } = this.fileManager.getExpertPaths( + workspacePath, + folder, + ) + + const promptPath = `${expertDir}/${ExpertFileManager.PROMPT_FILE}` - if (!(await fileExistsAtPath(metadataPath)) || !(await fileExistsAtPath(promptPath))) { + // Skip if metadata or prompt is missing + if (!(await fileExistsAtPath(metadataFilePath)) || !(await fileExistsAtPath(promptPath))) { continue } - const metadataContent = await fs.readFile(metadataPath, "utf-8") - const metadata = JSON.parse(metadataContent) - const promptContent = await fs.readFile(promptPath, "utf-8") - - let documentLinks: DocumentLink[] = metadata.documentLinks || [] - - // Keep metadata.json as source of truth - if (await fileExistsAtPath(statusFilePath)) { - try { - const statusContent = await fs.readFile(statusFilePath, "utf-8") - const allStatusLinks: DocumentLink[] = JSON.parse(statusContent) - - const metadataUrls = new Set(documentLinks.map((link) => link.url)) - const statusUrls = new Set(allStatusLinks.map((link) => link.url)) - - // Add missing links from metadata.json - for (const link of documentLinks) { - if (!statusUrls.has(link.url)) { - await this.addDocumentLink(workspacePath, metadata.name, link.url) - } - } - - // Remove links from status.json that are not in metadata.json - for (const link of allStatusLinks) { - if (!metadataUrls.has(link.url)) { - await this.deleteDocumentLink(workspacePath, metadata.name, link.url) - } - } + // Read metadata and prompt + const metadata = await this.fileManager.readExpertMetadata(metadataFilePath) + const promptContent = await this.fileManager.readExpertPrompt(promptPath) - // Filtered status.json entries based on metadata.json - const seenUrls = new Set() - documentLinks = [] + if (!metadata || !promptContent) { + continue + } - for (const link of allStatusLinks) { - if (metadataUrls.has(link.url) && !seenUrls.has(link.url)) { - documentLinks.push(link) - seenUrls.add(link.url) - } - } - } catch (error) { - console.error(`Failed to sync status.json for ${folder}:`, error) - } + // Initialize document links from metadata + let documentLinks: DocumentLink[] = metadata.documentLinks.map((link) => ({ + url: link.url, + status: DocumentStatus.PENDING, + processedAt: new Date().toISOString(), + error: null, + })) + + const isDeepCrawl = metadata.deepCrawl || false + + // Synchronize document links based on expert type + if (isDeepCrawl) { + documentLinks = await this.syncDeepCrawlLinks( + faissStatusFilePath, + documentLinks, + metadata.name, + workspacePath, + metadata.maxDepth || 10, + metadata.maxPages || 20, + metadata.crawlTimeout || 10_0000, + ) } else { - // status.json missing, process links from metadata - for (const link of documentLinks) { - await this.addDocumentLink(workspacePath, metadata.name, link.url) - } + documentLinks = await this.syncRegularLinks( + statusFilePath, + documentLinks, + metadata.name, + workspacePath, + expertDir, + ) + } - if (await fileExistsAtPath(statusFilePath)) { - const refreshed = JSON.parse(await fs.readFile(statusFilePath, "utf-8")) - const metadataUrls = new Set(documentLinks.map((l) => l.url)) - documentLinks = refreshed.filter((link: DocumentLink) => metadataUrls.has(link.url)) + // Determine expert status based on document link statuses + let expertStatus = DocumentStatus.COMPLETED + if (documentLinks.length > 0) { + const hasNonCompletedLinks = documentLinks.some((link) => link.status !== DocumentStatus.COMPLETED) + if (hasNonCompletedLinks) { + expertStatus = DocumentStatus.PROCESSING } } + // Build expert data const expertData = { name: metadata.name, isDefault: metadata.isDefault, prompt: promptContent, createdAt: metadata.createdAt, documentLinks, + deepCrawl: isDeepCrawl, + maxDepth: metadata.maxDepth, + maxPages: metadata.maxPages, + crawlTimeout: metadata.crawlTimeout, + status: expertStatus, } + // Validate and add to list const validationResult = ExpertDataSchema.safeParse(expertData) if (validationResult.success) { - experts.push(validationResult.data) + const validExpert = validationResult.data + experts.push(validExpert) + if (expertName && validExpert.name === expertName) { + selectedExpert = validExpert + } } else { vscode.window.showWarningMessage( `Invalid expert data for ${folder}: ${validationResult.error.issues.map((i) => i.message).join(", ")}`, @@ -404,10 +465,172 @@ export class ExpertManager { } } - return experts + return { experts, selectedExpert } } catch (error) { console.error("Failed to read experts directory:", error) - return [] + return { experts: [], selectedExpert: null } + } + } + + /** + * Synchronize deep crawl document links + */ + private async syncDeepCrawlLinks( + faissStatusFilePath: string, + documentLinks: DocumentLink[], + expertName: string, + workspacePath: string, + maxDepth: number, + maxPages: number, + crawlTimeout: number, + ): Promise { + try { + // Read faiss status or initialize it + let faissStatusLinks = await this.fileManager.readStatusFile(faissStatusFilePath) + + if (faissStatusLinks.length === 0) { + // Initialize from document links + faissStatusLinks = documentLinks.map((link) => ({ + url: link.url, + status: DocumentStatus.PENDING, + filename: `doc-${uuidv4()}.md`, // Ensure filename is set + processedAt: new Date().toISOString(), + error: null, + })) + + // Save initial faiss status + await this.fileManager.updateFaissStatusFile(faissStatusFilePath, faissStatusLinks) + + // Process links for crawling + for (const link of documentLinks) { + await this.documentProcessor.crawlAndConvertToMarkdown( + link.url, + expertName, + workspacePath, + maxDepth, + maxPages, + crawlTimeout, + ) + } + + // Read updated status + return await this.fileManager.readStatusFile(faissStatusFilePath) + } else { + // Synchronize metadata links with faiss status links + const metadataUrls = new Set(documentLinks.map((link) => link.url)) + const faissStatusUrls = new Set(faissStatusLinks.map((link) => link.url)) + + // Add missing links from metadata to faiss status + for (const link of documentLinks) { + if (!faissStatusUrls.has(link.url)) { + try { + await this.addDocumentLink(workspacePath, expertName, link.url) + } catch (error) { + console.error(`Failed to add document link ${link.url}:`, error) + } + } + } + + // Remove links from faiss status that are not in metadata + for (const link of faissStatusLinks) { + if (!metadataUrls.has(link.url)) { + try { + await this.deleteDocumentLink(workspacePath, expertName, link.url) + } catch (error) { + console.error(`Failed to delete document link ${link.url}:`, error) + } + } + } + + // Return updated faiss status links with proper error handling + try { + const updatedLinks = await this.fileManager.readStatusFile(faissStatusFilePath) + return updatedLinks.filter((link) => metadataUrls.has(link.url)) + } catch (error) { + console.error(`Failed to read status file ${faissStatusFilePath}:`, error) + // Return the links we have in memory if file read fails + return faissStatusLinks.filter((link) => metadataUrls.has(link.url)) + } + } + } catch (error) { + console.error(`Error in syncDeepCrawlLinks for expert ${expertName}:`, error) + // Return the original links if something went wrong + return documentLinks + } + } + + /** + * Synchronize regular document links + */ + private async syncRegularLinks( + statusFilePath: string, + documentLinks: DocumentLink[], + expertName: string, + workspacePath: string, + expertDir: string, + ): Promise { + try { + // Read status or initialize it + let statusLinks = await this.fileManager.readStatusFile(statusFilePath) + + if (statusLinks.length === 0) { + // Process links from metadata, ensure they have filenames + for (const link of documentLinks) { + try { + await this.addDocumentLink(workspacePath, expertName, link.url) + } catch (error) { + console.error(`Failed to add document link ${link.url}:`, error) + } + } + + // Read the updated status + try { + return await this.fileManager.readStatusFile(statusFilePath) + } catch (error) { + console.error(`Failed to read status file ${statusFilePath}:`, error) + return documentLinks + } + } else { + // Synchronize metadata links with status links + const metadataUrls = new Set(documentLinks.map((link) => link.url)) + const statusUrls = new Set(statusLinks.map((link) => link.url)) + + // Add missing links from metadata to status + for (const link of documentLinks) { + if (!statusUrls.has(link.url)) { + try { + await this.addDocumentLink(workspacePath, expertName, link.url) + } catch (error) { + console.error(`Failed to add document link ${link.url}:`, error) + } + } + } + + // Remove links from status that are not in metadata + for (const link of statusLinks) { + if (!metadataUrls.has(link.url)) { + try { + await this.deleteDocumentLink(workspacePath, expertName, link.url) + } catch (error) { + console.error(`Failed to delete document link ${link.url}:`, error) + } + } + } + + // Return updated status links + try { + const updatedLinks = await this.fileManager.readStatusFile(statusFilePath) + return updatedLinks.filter((link) => metadataUrls.has(link.url)) + } catch (error) { + console.error(`Failed to read status file ${statusFilePath}:`, error) + // Return what we have in memory if file read fails + return statusLinks.filter((link) => metadataUrls.has(link.url)) + } + } + } catch (error) { + console.error(`Error in syncRegularLinks for expert ${expertName}:`, error) + // Return original links if something went wrong + return documentLinks } } @@ -418,25 +641,30 @@ export class ExpertManager { if (!workspacePath) { throw new Error("No workspace path provided") } + if (!expertName || typeof expertName !== "string") { throw new Error("Expert name must be a non-empty string") } - const expertsDir = path.join(workspacePath, GlobalFileNames.experts) + + const expertsDir = `${workspacePath}/${GlobalFileNames.experts}` if (!(await fileExistsAtPath(expertsDir))) { return } + const expertFolders = await fs.readdir(expertsDir) for (const folder of expertFolders) { - const expertDir = path.join(expertsDir, folder) + const expertDir = `${expertsDir}/${folder}` const stats = await fs.stat(expertDir) + if (stats.isDirectory()) { - const metadataPath = path.join(expertDir, ExpertManager.METADATA_FILE) + const metadataPath = `${expertDir}/${ExpertFileManager.METADATA_FILE}` + if (await fileExistsAtPath(metadataPath)) { try { - const metadataContent = await fs.readFile(metadataPath, "utf-8") - const metadata = JSON.parse(metadataContent) - if (metadata.name === expertName) { - await fs.rm(expertDir, { recursive: true, force: true }) + const metadata = await this.fileManager.readExpertMetadata(metadataPath) + + if (metadata && metadata.name === expertName) { + await this.fileManager.deleteExpertDirectory(expertDir) return } } catch (error) { @@ -454,25 +682,31 @@ export class ExpertManager { if (!workspacePath) { throw new Error("No workspace path provided") } + if (!expertName || typeof expertName !== "string") { throw new Error("Expert name must be a non-empty string") } - const expertsDir = path.join(workspacePath, GlobalFileNames.experts) + + const expertsDir = `${workspacePath}/${GlobalFileNames.experts}` if (!(await fileExistsAtPath(expertsDir))) { return null } + const expertFolders = await fs.readdir(expertsDir) for (const folder of expertFolders) { - const expertDir = path.join(expertsDir, folder) + const expertDir = `${expertsDir}/${folder}` const stats = await fs.stat(expertDir) + if (stats.isDirectory()) { - const metadataPath = path.join(expertDir, ExpertManager.METADATA_FILE) + const metadataPath = `${expertDir}/${ExpertFileManager.METADATA_FILE}` + if (await fileExistsAtPath(metadataPath)) { try { - const metadataContent = await fs.readFile(metadataPath, "utf-8") - const metadata = JSON.parse(metadataContent) - if (metadata.name === expertName) { - const promptPath = path.join(expertDir, ExpertManager.PROMPT_FILE) + const metadata = await this.fileManager.readExpertMetadata(metadataPath) + + if (metadata && metadata.name === expertName) { + const promptPath = `${expertDir}/${ExpertFileManager.PROMPT_FILE}` + if (await fileExistsAtPath(promptPath)) { return promptPath } @@ -484,39 +718,18 @@ export class ExpertManager { } } } - return null - } - /** - * Summarize the content - */ - private async summarizeMarDownContent(markDownContent: string) { - let content = "" - const { apiConfiguration } = await getAllExtensionState(this.extensionContext, this.workspaceId) - const llmApi = buildApiHandler(apiConfiguration) - const apiStream = llmApi.createMessage(this.systemPrompt, [ - { - role: "user", - content: markDownContent, - }, - ]) - const iterator = apiStream[Symbol.asyncIterator]() - for await (const chunk of iterator) { - if (chunk && chunk.type === "text" && chunk.text) { - content += chunk.text - } - } - return content + return null } /** * Load the default Experts */ - - async loadDefaultExperts(): Promise { + async loadDefaultExperts(): Promise<{ experts: ExpertData[]; selectedExpert: ExpertData | null }> { const expertsDir = path.join(this.extensionContext.extensionPath, GlobalFileNames.defaultExperts) - let experts: ExpertData[] = [] + let selectedExpert: ExpertData | null = null + const { expertName } = await getAllExtensionState(this.extensionContext, this.workspaceId) try { const directoryEntries = await fs.readdir(expertsDir, { withFileTypes: true }) @@ -528,21 +741,17 @@ export class ExpertManager { const folderName = directoryEntry.name const folderPath = path.join(expertsDir, folderName) - const promptPath = path.join(folderPath, ExpertManager.PROMPT_FILE) - const iconPath = path.join(folderPath, ExpertManager.ICON) + const promptPath = path.join(folderPath, ExpertFileManager.PROMPT_FILE) + const iconPath = path.join(folderPath, ExpertFileManager.ICON) - let prompt = "" - try { - prompt = await fs.readFile(promptPath, "utf8") - if (!prompt.trim()) { - console.warn(`Empty prompt for ${folderName}, skipping...`) - continue - } - } catch (error) { - console.error(`Error reading prompt for ${folderName}:`, error) + // Read prompt + let prompt = await this.fileManager.readExpertPrompt(promptPath) + if (!prompt || !prompt.trim()) { + console.warn(`Empty prompt for ${folderName}, skipping...`) continue } + // Read icon if available let iconBase64 = "" try { const svgContent = await fs.readFile(iconPath) @@ -551,17 +760,29 @@ export class ExpertManager { console.warn(`Icon not found for ${folderName}`) } - experts.push({ + const expert: ExpertData = { name: folderName, prompt, isDefault: true, iconComponent: iconBase64, - }) + } + experts.push(expert) + + if (expertName && expert.name === expertName) { + selectedExpert = expert + } } } catch (error) { console.error("Error reading experts directory:", error) } - return experts + return { experts, selectedExpert } + } + + /** + * Search for a query in the expert's vector store + */ + async search(query: string, expertName: string, workspacePath: string, k?: number): Promise { + return this.vectorStoreManager.search(query, expertName, workspacePath, k) } } diff --git a/src/core/experts/VectorStoreManager.ts b/src/core/experts/VectorStoreManager.ts new file mode 100644 index 00000000..122d6e41 --- /dev/null +++ b/src/core/experts/VectorStoreManager.ts @@ -0,0 +1,200 @@ +import { RecursiveCharacterTextSplitter } from "@langchain/textsplitters" +import { FaissStore } from "@langchain/community/vectorstores/faiss" +import { Document } from "@langchain/core/documents" +import { existsSync } from "fs" +import { join } from "path" +import { fileExists } from "@/utils/runtime-downloader" +import { ensureFaissPlatformDeps } from "@/utils/faiss" +import { OpenAIEmbeddings } from "@langchain/openai" +import { BedrockEmbeddings } from "@langchain/aws" +import { OllamaEmbeddings } from "@langchain/ollama" +import { EmbeddingConfiguration } from "@/shared/embeddings" +import { buildEmbeddingHandler } from "@/embedding" +import { VectorStoreConfig, DocumentChunkData } from "./types" +import { ExpertFileManager } from "./ExpertFileManager" + +/** + * Manages vector store operations for experts + */ +export class VectorStoreManager { + private embeddings: OpenAIEmbeddings | BedrockEmbeddings | OllamaEmbeddings + private vectorStore: FaissStore + private fileManager: ExpertFileManager + private workspaceId: string + + /** + * Create a new VectorStoreManager + */ + constructor(config: VectorStoreConfig) { + this.embeddings = config.embeddings + this.vectorStore = new FaissStore(this.embeddings, {}) + this.fileManager = new ExpertFileManager() + this.workspaceId = config.workspaceId + } + + /** + * Initialize vector store embeddings + */ + public static initializeEmbeddings( + embeddingConfig: EmbeddingConfiguration, + ): OpenAIEmbeddings | BedrockEmbeddings | OllamaEmbeddings { + const embeddingHandler = buildEmbeddingHandler(embeddingConfig) + return embeddingHandler.getClient() + } + + /** + * Ensure vector store is loaded + */ + public async ensureVectorStoreLoaded(expertName: string, workspacePath: string): Promise { + try { + const { faissFilePath } = this.fileManager.getExpertPaths(workspacePath, expertName) + + await ensureFaissPlatformDeps() + + if (existsSync(faissFilePath)) { + const faissIndexPath = join(faissFilePath, "faiss.index") + + if (fileExists(faissIndexPath)) { + this.vectorStore = await FaissStore.load(faissFilePath, this.embeddings) + return true + } + } + + // If we get here, there's no vector store to load + this.vectorStore = new FaissStore(this.embeddings, {}) + return false + } catch (error) { + console.error(`Failed to load vector store for expert ${expertName}:`, error) + this.vectorStore = new FaissStore(this.embeddings, {}) + return false + } + } + + /** + * Chunk and store document in vector database + */ + public async chunkAndStore(data: DocumentChunkData): Promise { + console.log(`Storing content for expert ${data.expertName} from ${data.suburl}`) + + try { + await ensureFaissPlatformDeps() + + // Load existing vector store if available + await this.ensureVectorStoreLoaded(data.expertName, data.workspacePath) + + // Split text into chunks + const mdSplitter = RecursiveCharacterTextSplitter.fromLanguage("markdown", { + chunkSize: 8192, + chunkOverlap: 0, + }) + + const texts = await mdSplitter.splitText(data.markdown) + + // Create documents from text chunks with title and URL metadata + const docs: Document[] = texts.map((text) => ({ + pageContent: text, + id: data.url.trim(), + metadata: { + source: data.suburl.trim(), + title: data.title || "Untitled", + expertName: data.expertName, + }, + })) + + // Add documents to vector store + await this.vectorStore.addDocuments(docs) + + // Save vector store + const { faissFilePath } = this.fileManager.getExpertPaths(data.workspacePath, data.expertName) + await this.vectorStore.save(faissFilePath) + + console.log(`Successfully stored ${docs.length} chunks for expert ${data.expertName} from ${data.suburl}`) + } catch (error) { + console.error(`Failed to store chunks in vector database for expert ${data.expertName}:`, error) + throw error + } + } + + /** + * Delete chunks for a URL + */ + public async deleteChunk(url: string, expertName: string, workspacePath: string): Promise { + console.log(`Deleting chunk for URL: ${url} in expert: ${expertName}`) + + try { + // Ensure vector store is loaded + const loaded = await this.ensureVectorStoreLoaded(expertName, workspacePath) + + if (!loaded) { + console.log(`No vector store found for ${expertName}`) + return + } + + const ids = await this.getDocumentIds(url) + + if (ids.length > 0) { + await this.vectorStore.delete({ ids }) + + const { faissFilePath } = this.fileManager.getExpertPaths(workspacePath, expertName) + await this.vectorStore.save(faissFilePath) + + console.log(`Removed ${ids.length} vectors for ${url}`) + } else { + console.log(`No vectors found for ${url}`) + } + } catch (error) { + console.error(`Failed to delete chunk for ${url}:`, error) + throw error + } + } + + /** + * Search vector store + */ + public async search(query: string, expertName: string, workspacePath: string, k?: number): Promise { + console.log(`Searching for query: ${query} in expert: ${expertName}`) + + try { + // Ensure vector store is loaded + const loaded = await this.ensureVectorStoreLoaded(expertName, workspacePath) + + if (!loaded) { + throw new Error(`No vector store found for ${expertName}`) + } + + const results = await this.vectorStore.similaritySearchWithScore(query, k) + + const formattedResults = results.map(([doc, score]) => ({ + id: doc.id, + content: doc.pageContent, + metadata: doc.metadata, + score: score, + })) + + return JSON.stringify(formattedResults, null, 2) + } catch (error) { + console.error(`Error searching vector store for expert ${expertName}:`, error) + throw error + } + } + + /** + * Get document IDs for a given URL + */ + private async getDocumentIds(url: string): Promise { + console.log(`Retrieving document IDs for URL: ${url}`) + + const docStore = this.vectorStore.getDocstore()._docs + const ids: string[] = [] + + // Iterate through the Map entries + docStore.forEach((doc, id) => { + if (doc?.id === url) { + ids.push(id) + } + }) + + console.log(`Found ${ids.length} document IDs for file ${url}`) + return ids + } +} diff --git a/src/core/experts/types.ts b/src/core/experts/types.ts new file mode 100644 index 00000000..3c480fd1 --- /dev/null +++ b/src/core/experts/types.ts @@ -0,0 +1,80 @@ +import { EmbeddingConfiguration } from "@/shared/embeddings" +import * as vscode from "vscode" +import { OpenAIEmbeddings } from "@langchain/openai" +import { BedrockEmbeddings } from "@langchain/aws" +import { OllamaEmbeddings } from "@langchain/ollama" + +/** + * Expert file paths structure + */ +export interface ExpertPaths { + sanitizedName: string + expertDir: string + docsDir: string + statusFilePath: string + metadataFilePath: string + faissFilePath: string + faissStatusFilePath: string + crawlStorage: string +} + +/** + * Expert metadata structure + */ +export interface ExpertMetadata { + name: string + isDefault?: boolean + createdAt: string + documentLinks: { url: string }[] + deepCrawl: boolean + maxDepth: number + maxPages?: number + crawlTimeout?: number +} + +/** + * Document processing options + */ +export interface DocumentProcessOptions { + expertName: string + workspacePath: string + docsDir: string + extensionContext: vscode.ExtensionContext + deepCrawl?: boolean + maxDepth?: number + maxPages?: number + crawlTimeout?: number +} + +/** + * Crawler configuration options + */ +export interface CrawlerOptions { + url: string + expertName: string + workspacePath: string + maxDepth: number + maxPages?: number + crawlTimeout?: number +} + +/** + * Vector store configuration + */ +export interface VectorStoreConfig { + embeddings: OpenAIEmbeddings | BedrockEmbeddings | OllamaEmbeddings + embeddingConfig: EmbeddingConfiguration + workspaceId: string +} + +/** + * Document chunk data + */ +export interface DocumentChunkData { + markdown: string + expertName: string + workspacePath: string + url: string + suburl: string + title?: string +} diff --git a/src/core/prompts/custom.ts b/src/core/prompts/custom.ts index ea6d628e..e9e6a483 100644 --- a/src/core/prompts/custom.ts +++ b/src/core/prompts/custom.ts @@ -1,7 +1,8 @@ -export const customToolsPrompt = (enabled: boolean) => - !enabled - ? "" - : ` +export const customToolsPrompt = (isCodeIndexEnabled: boolean, isDeepCrawlEnabled?: boolean, expertName?: string) => { + let prompt = "" + + if (isCodeIndexEnabled) { + prompt += ` ## find_relevant_files Description: Request to find relevant files on the system for the given task. Use this when you need to find relevant files for the given task. You must provide a path that is relative to your current working directory, and the task user gives to implement. The tool will return a list of paths to all relevant files in the user's current working directory that's related to that task. This tool is specialized in finding relevant files for a given task, so use it wisely! Prefer this tool over \`list_files\`, \`search_files\`, or \`list_code_definition_names\`. Parameters: @@ -18,7 +19,33 @@ Description: Request to perform a security scan on the generated code for OWASP Usage: -` +\n + ` + } + + if (isDeepCrawlEnabled && expertName) { + prompt += ` +## custom_expert_context +Description: **INTELLIGENT EXPERT CONSULTATION**: Request to find relevant context from a custom expert's knowledge base when needed to provide accurate and comprehensive answers. Use this tool when: + * The user's query requires specialized knowledge from the expert's domain + * You need additional context to provide a complete answer + * The user asks for more detailed information + * You determine that expert knowledge would significantly improve your response +This tool will search the vector database and return the most relevant content. Use your judgment to determine when expert consultation is necessary rather than calling it for every query. +Parameters: +- query: (required) The search query to find relevant information in the expert's knowledge base. +- expertName: (required) The name of the custom expert whose knowledge base should be searched. +Usage: + +Your search query here +${expertName} + +\n + ` + } + + return prompt +} export const customToolUseGuidelinePrompt = (enabled: boolean) => !enabled @@ -37,10 +64,11 @@ export const customCapabilitiesPrompt = (enabled: boolean) => - Always ensure that the code you generate adheres to best practices and security standards to minimize risks and vulnerabilities. ` -export const customRulesPrompt = (enabled: boolean) => - !enabled - ? "" - : ` +export const customRulesPrompt = (isCodeIndexEnabled: boolean, isDeepCrawlEnabled?: boolean) => { + let prompt = "" + + if (isCodeIndexEnabled) { + prompt += ` - For code-related tasks that involve modifying, creating, or understanding files in the codebase, use \`find_relevant_files\` first to identify the most relevant context. Do NOT use this tool for general questions, explanations, or non-code-related queries. - When determining if a query requires the \`find_relevant_files\` tool, consider: * Does the query require knowledge about specific files in the codebase? @@ -49,8 +77,25 @@ export const customRulesPrompt = (enabled: boolean) => * Is file context necessary to provide an appropriate response? If the answer is no to these questions, skip using the tool and respond directly. - You should always use the \`code_security_scan\` tool before attempting to complete any code-related tasks to ensure security and compliance with best practices. Failure to perform a security scan may expose the code to vulnerabilities and security risks. Always prioritize security and compliance with best practices. If any issue or vulnerabilities are found, address them before proceeding with the task. -- While fixing the vulnerabilities do not remove any of the working code only replace the implementation that is affected with the vulnerabilities, if you require anymore information or course correction, consult with the user before proceeding to apply any of the change. Always show the plan before fixing the security vulnerabilities. Before fixing the vulnerabilities, you should always get the user's approval before proceeding with the fixing process. -` +- While fixing the vulnerabilities do not remove any of the working code only replace the implementation that is affected with the vulnerabilities, if you require anymore information or course correction, consult with the user before proceeding to apply any of the change. Always show the plan before fixing the security vulnerabilities. Before fixing the vulnerabilities, you should always get the user's approval before proceeding with the fixing process.\n + ` + } + + if (isDeepCrawlEnabled) { + prompt += ` +- **INTELLIGENT EXPERT CONSULTATION**: When deep crawl is enabled, use the \`custom_expert_context\` tool intelligently based on the nature of the user's query: + * Use it when the query requires specialized knowledge from the expert's domain + * Use it when you need additional context to provide a comprehensive answer + * Use it when the user explicitly asks for more detailed information + * Use it when expert knowledge would significantly enhance your response +- Evaluate each query to determine if expert consultation is necessary. Not every query requires expert knowledge - use your judgment. +- For simple greetings, basic clarifications, or queries you can answer adequately without expert context, you may respond directly. +- When you do use the tool, analyze the results and determine if additional queries to the expert knowledge base are needed for a complete response.\n + ` + } + + return prompt +} export const customObjectivePrompt = (enabled: boolean) => !enabled diff --git a/src/core/prompts/system.hai.ts b/src/core/prompts/system.hai.ts index 14541912..992fbeb7 100644 --- a/src/core/prompts/system.hai.ts +++ b/src/core/prompts/system.hai.ts @@ -13,15 +13,53 @@ export const haiSystemPrompt = ( browserSettings: BrowserSettings, version?: string, expertPrompt?: string, + isDeepCrawlEnabled?: boolean, + expertName?: string, ) => { switch (version) { case "v1": - return haiSystemPromptV1(cwd, supportsBrowserUse, supportsCodeIndex, mcpHub, browserSettings, expertPrompt) + return haiSystemPromptV1( + cwd, + supportsBrowserUse, + mcpHub, + browserSettings, + supportsCodeIndex, + expertPrompt, + isDeepCrawlEnabled, + expertName, + ) case "v2": - return haiSystemPromptV2(cwd, supportsBrowserUse, supportsCodeIndex, mcpHub, browserSettings, expertPrompt) + return haiSystemPromptV2( + cwd, + supportsBrowserUse, + mcpHub, + browserSettings, + supportsCodeIndex, + expertPrompt, + isDeepCrawlEnabled, + expertName, + ) case "v3": - return haiSystemPromptV3(cwd, supportsBrowserUse, supportsCodeIndex, mcpHub, browserSettings, expertPrompt) + return haiSystemPromptV3( + cwd, + supportsBrowserUse, + mcpHub, + browserSettings, + supportsCodeIndex, + expertPrompt, + isDeepCrawlEnabled, + expertName, + ) default: - return haiDefaultSystemPrompt(cwd, supportsBrowserUse, mcpHub, browserSettings, supportsCodeIndex, expertPrompt) + return haiDefaultSystemPrompt( + cwd, + supportsBrowserUse, + mcpHub, + browserSettings, + supportsCodeIndex, + expertPrompt, + isDeepCrawlEnabled, + expertName, + ) } } diff --git a/src/core/prompts/system.hai.v1.ts b/src/core/prompts/system.hai.v1.ts index 21cfb301..3ec1a2b2 100644 --- a/src/core/prompts/system.hai.v1.ts +++ b/src/core/prompts/system.hai.v1.ts @@ -21,10 +21,14 @@ import { BrowserSettings } from "../../shared/BrowserSettings" export const SYSTEM_PROMPT = async ( cwd: string, supportsBrowserUse: boolean, - supportsCodeIndex: boolean, mcpHub: McpHub, browserSettings: BrowserSettings, + + // TAG:HAI + supportsCodeIndex: boolean, expertPrompt?: string, + isDeepCrawlEnabled?: boolean, + expertName?: string, ) => `${expertPrompt || "You are HAI, a highly skilled software engineer with extensive knowledge in many programming languages, frameworks, design patterns, and best practices."} ==== @@ -50,7 +54,7 @@ Always adhere to this format for the tool use to ensure proper parsing and execu # Tools -${customToolsPrompt(supportsCodeIndex)} +${customToolsPrompt(supportsCodeIndex, isDeepCrawlEnabled, expertName)} ## execute_command Description: Request to executes CLI command. Use for system operations/tasks. Tailor command to user system. Explain command. Use shell syntax for command chaining. Prefer CLI commands over scripts (more flexible). Runs in: ${cwd.toPosix()} @@ -532,7 +536,7 @@ RULES ? " Then if you want to test your work, you might use browser_action to launch the site, wait for the user's response confirming the site was launched along with a screenshot, then perhaps e.g., click a button to test functionality if needed, wait for the user's response confirming the button was clicked along with a screenshot of the new state, before finally closing the browser." : "" } -${customRulesPrompt(supportsCodeIndex)} +${customRulesPrompt(supportsCodeIndex, isDeepCrawlEnabled)} - MCP operations should be used one at a time, similar to other tool usage. Wait for confirmation of success before proceeding with additional operations. diff --git a/src/core/prompts/system.hai.v2.ts b/src/core/prompts/system.hai.v2.ts index 32ba7c90..93f1cb67 100644 --- a/src/core/prompts/system.hai.v2.ts +++ b/src/core/prompts/system.hai.v2.ts @@ -21,10 +21,14 @@ import { BrowserSettings } from "../../shared/BrowserSettings" export const SYSTEM_PROMPT = async ( cwd: string, supportsBrowserUse: boolean, - supportsCodeIndex: boolean, mcpHub: McpHub, browserSettings: BrowserSettings, + + // TAG:HAI + supportsCodeIndex: boolean, expertPrompt?: string, + isDeepCrawlEnabled?: boolean, + expertName?: string, ) => `${expertPrompt || "You are HAI, a highly skilled software engineer with extensive knowledge in many programming languages, frameworks, design patterns, and best practices."} ==== @@ -53,7 +57,7 @@ Always adhere to this format for the tool use to ensure proper parsing and execu # Tools -${customToolsPrompt(supportsCodeIndex)} +${customToolsPrompt(supportsCodeIndex, isDeepCrawlEnabled, expertName)} ## execute_command Description: Request to execute a CLI command on the system. Use this when you need to perform system operations or run specific commands to accomplish any step in the user's task. You must tailor your command to the user's system and provide a clear explanation of what the command does. For command chaining, use the appropriate chaining syntax for the user's shell. Prefer to execute complex CLI commands over creating executable scripts, as they are more flexible and easier to run. Commands will be executed in the current working directory: ${cwd.toPosix()} @@ -598,7 +602,7 @@ RULES - **Images & Environment Details:** - Analyze images for meaningful insights when provided. - \`environment_details\` provides context but does not replace user requests. Use it for guidance, not assumptions. -${customRulesPrompt(supportsCodeIndex)} +${customRulesPrompt(supportsCodeIndex, isDeepCrawlEnabled)} - MCP operations should be used one at a time, similar to other tool usage. Wait for confirmation of success before proceeding with additional operations. ==== diff --git a/src/core/prompts/system.hai.v3.ts b/src/core/prompts/system.hai.v3.ts index fed91584..8c70cbb7 100644 --- a/src/core/prompts/system.hai.v3.ts +++ b/src/core/prompts/system.hai.v3.ts @@ -21,11 +21,15 @@ import { BrowserSettings } from "../../shared/BrowserSettings" export const SYSTEM_PROMPT = async ( cwd: string, - supportsComputerUse: boolean, - supportsCodeIndex: boolean, + supportsBrowserUse: boolean, mcpHub: McpHub, browserSettings: BrowserSettings, + + // TAG:HAI + supportsCodeIndex: boolean, expertPrompt?: string, + isDeepCrawlEnabled?: boolean, + expertName?: string, ) => `${expertPrompt || "You are HAI, a highly skilled software engineer with extensive knowledge in many programming languages, frameworks, design patterns, and best practices."} ==== @@ -51,7 +55,7 @@ Always adhere to this format for the tool use to ensure proper parsing and execu # Tools -${customToolsPrompt(supportsCodeIndex)} +${customToolsPrompt(supportsCodeIndex, isDeepCrawlEnabled, expertName)} ## execute_command Description: Request to executes CLI command. Use for system operations/tasks. Tailor command to user system. Explain command. Use shell syntax for command chaining. Prefer CLI commands over scripts (more flexible). Runs in: ${cwd.toPosix()} @@ -143,7 +147,7 @@ Usage: Directory path here ${ - supportsComputerUse + supportsBrowserUse ? ` ## browser_action @@ -480,7 +484,7 @@ PLAN MODE CAPABILITIES -- You can execute CLI commands, list files, view source code definitions, perform regex searches ${supportsComputerUse ? ", use the browser" : ""}, read and edit files, and ask follow-up questions. These tools help with coding, file modifications, project understanding, system operations, and more. +- You can execute CLI commands, list files, view source code definitions, perform regex searches ${supportsBrowserUse ? ", use the browser" : ""}, read and edit files, and ask follow-up questions. These tools help with coding, file modifications, project understanding, system operations, and more. - When given a task, \`environment_details\` includes a recursive list of all file paths in \`'${cwd.toPosix()}'\`, providing an overview of the project structure. - To explore beyond this directory, use \`list_files\`. - \`recursive: true\` lists all files within subdirectories. @@ -498,7 +502,7 @@ CAPABILITIES - Long-running commands are supported via the user's VS Code terminal. ${ - supportsComputerUse + supportsBrowserUse ? `- Use \`browser_action\` to interact with websites or locally running servers via a Puppeteer-controlled browser. - Useful for web development tasks: feature testing, troubleshooting, or verifying changes. - Example: @@ -534,9 +538,9 @@ RULES - **Finalizing Tasks:** - Use \`attempt_completion\` to present results. Do not end with open-ended questions—responses must be **final**. - **Prohibited Phrases:** Never start messages with "Great," "Certainly," "Okay," or "Sure." Be **direct and technical** (e.g., "CSS updated," not "Great, I've updated the CSS"). -- **Browser & Terminal Considerations (${supportsComputerUse ? "if applicable" : ""}):** +- **Browser & Terminal Considerations (${supportsBrowserUse ? "if applicable" : ""}):** ${ - supportsComputerUse + supportsBrowserUse ? ` - Use \`browser_action\` for web interactions when beneficial. - If an MCP server tool is available, prefer it over browser actions. @@ -547,7 +551,7 @@ RULES - **Images & Environment Details:** - Analyze images for meaningful insights when provided. - \`environment_details\` provides context but does not replace user requests. Use it for guidance, not assumptions. -${customRulesPrompt(supportsCodeIndex)} +${customRulesPrompt(supportsCodeIndex, isDeepCrawlEnabled)} - MCP operations should be used one at a time, similar to other tool usage. Wait for confirmation of success before proceeding with additional operations. ==== diff --git a/src/core/prompts/system.ts b/src/core/prompts/system.ts index 4be3e364..5cef1b18 100644 --- a/src/core/prompts/system.ts +++ b/src/core/prompts/system.ts @@ -22,6 +22,8 @@ export const SYSTEM_PROMPT = async ( // TAG:HAI supportsCodeIndex: boolean, expertPrompt?: string, + isDeepCrawlEnabled?: boolean, + expertName?: string, ) => `${expertPrompt || "You are HAI, a highly skilled software engineer with extensive knowledge in many programming languages, frameworks, design patterns, and best practices."} ==== @@ -50,7 +52,7 @@ Always adhere to this format for the tool use to ensure proper parsing and execu # Tools -${customToolsPrompt(supportsCodeIndex)} +${customToolsPrompt(supportsCodeIndex, isDeepCrawlEnabled, expertName)} ## execute_command Description: Request to execute a CLI command on the system. Use this when you need to perform system operations or run specific commands to accomplish any step in the user's task. You must tailor your command to the user's system and provide a clear explanation of what the command does. For command chaining, use the appropriate chaining syntax for the user's shell. Prefer to execute complex CLI commands over creating executable scripts, as they are more flexible and easier to run. Commands will be executed in the current working directory: ${cwd.toPosix()} @@ -628,7 +630,7 @@ RULES ? " Then if you want to test your work, you might use browser_action to launch the site, wait for the user's response confirming the site was launched along with a screenshot, then perhaps e.g., click a button to test functionality if needed, wait for the user's response confirming the button was clicked along with a screenshot of the new state, before finally closing the browser." : "" } -${customRulesPrompt(supportsCodeIndex)} +${customRulesPrompt(supportsCodeIndex, isDeepCrawlEnabled)} - MCP operations should be used one at a time, similar to other tool usage. Wait for confirmation of success before proceeding with additional operations. ==== diff --git a/src/core/storage/state-keys.ts b/src/core/storage/state-keys.ts index 037bd889..96d1b517 100644 --- a/src/core/storage/state-keys.ts +++ b/src/core/storage/state-keys.ts @@ -102,11 +102,16 @@ export type GlobalStateKey = // TAG:HAI | "expertPrompt" + | "expertName" + | "isDeepCrawlEnabled" // Embedding specific keys | "embeddingProvider" | "embeddingModelId" | "embeddingAwsRegion" | "embeddingOpenAiBaseUrl" | "embeddingOpenAiModelId" + | "guardrailsConfig" + | "embeddingAwsUseProfile" + | "embeddingAwsProfile" export type LocalStateKey = "localClineRulesToggles" diff --git a/src/core/storage/state.ts b/src/core/storage/state.ts index 2fddf0f9..a2abc85b 100644 --- a/src/core/storage/state.ts +++ b/src/core/storage/state.ts @@ -15,6 +15,7 @@ import { ClineRulesToggles } from "@shared/cline-rules" // TAG:HAI import { EmbeddingConfiguration, EmbeddingProvider } from "@shared/embeddings" import { HaiBuildContextOptions, HaiBuildIndexProgress } from "@shared/customApi" +import { Default_GuardsConfig, GuardrailsConfig } from "@/integrations/guardrails" /* Storage @@ -257,6 +258,8 @@ export async function getAllExtensionState(context: vscode.ExtensionContext, wor // TAG:HAI expertPrompt, + expertName, + isDeepCrawlEnabled, buildContextOptions, buildIndexProgress, isApiConfigurationValid, @@ -279,6 +282,10 @@ export async function getAllExtensionState(context: vscode.ExtensionContext, wor isEmbeddingConfigurationValid, embeddingOllamaBaseUrl, embeddingOllamaModelId, + //Guardrails + guardrailsConfig, + embeddingAwsUseProfile, + embeddingAwsProfile, ] = await Promise.all([ customGetState(context, "isNewUser") as Promise, customGetState(context, "apiProvider") as Promise, @@ -371,6 +378,8 @@ export async function getAllExtensionState(context: vscode.ExtensionContext, wor // TAG:HAI customGetState(context, "expertPrompt") as Promise, + customGetState(context, "expertName") as Promise, + customGetState(context, "isDeepCrawlEnabled") as Promise, customGetState(context, "buildContextOptions") as Promise, customGetState(context, "buildIndexProgress") as Promise, customGetState(context, "isApiConfigurationValid") as Promise, @@ -393,6 +402,9 @@ export async function getAllExtensionState(context: vscode.ExtensionContext, wor customGetState(context, "isEmbeddingConfigurationValid") as Promise, customGetState(context, "embeddingOllamaBaseUrl") as Promise, customGetState(context, "embeddingOllamaModelId") as Promise, + customGetState(context, "guardrailsConfig") as Promise, + customGetState(context, "embeddingAwsUseProfile") as Promise, + customGetState(context, "embeddingAwsProfile") as Promise, fetch, ]) @@ -532,8 +544,13 @@ export async function getAllExtensionState(context: vscode.ExtensionContext, wor isEmbeddingConfigurationValid, ollamaBaseUrl: embeddingOllamaBaseUrl, ollamaModelId: embeddingOllamaModelId, + awsUseProfile: embeddingAwsUseProfile, + awsProfile: embeddingAwsProfile, }, expertPrompt, + expertName, + isDeepCrawlEnabled, + guardrailsConfig: guardrailsConfig ?? Default_GuardsConfig, buildContextOptions: buildContextOptions ? { ...buildContextOptions, @@ -726,6 +743,8 @@ export async function updateEmbeddingConfiguration( azureOpenAIApiVersion, ollamaBaseUrl, ollamaModelId, + awsProfile, + awsUseProfile, } = embeddingConfiguration // Update Global State @@ -739,6 +758,8 @@ export async function updateEmbeddingConfiguration( await customUpdateState(context, "embeddingAzureOpenAIApiEmbeddingsDeploymentName", azureOpenAIApiEmbeddingsDeploymentName) await customUpdateState(context, "embeddingOllamaBaseUrl", ollamaBaseUrl) await customUpdateState(context, "embeddingOllamaModelId", ollamaModelId) + await customUpdateState(context, "embeddingAwsProfile", awsProfile) + await customUpdateState(context, "embeddingAwsUseProfile", awsUseProfile) // Update Secrets await customStoreSecret(context, "embeddingAwsAccessKey", workspaceId, awsAccessKey, true) await customStoreSecret(context, "embeddingAwsSecretKey", workspaceId, awsSecretKey, true) diff --git a/src/core/task/index.ts b/src/core/task/index.ts index 1f93d277..d9c66077 100644 --- a/src/core/task/index.ts +++ b/src/core/task/index.ts @@ -25,7 +25,7 @@ import { BrowserSession } from "@services/browser/BrowserSession" import { UrlContentFetcher } from "@services/browser/UrlContentFetcher" import { listFiles } from "@services/glob/list-files" import { regexSearchFiles } from "@services/ripgrep" -import { telemetryService } from "@/services/posthog/telemetry/TelemetryService" +import { telemetryService } from "@services/posthog/telemetry/TelemetryService" import { parseSourceCodeForDefinitionsTopLevel } from "@services/tree-sitter" import { ApiConfiguration } from "@shared/api" import { findLast, findLastIndex, parsePartialArrayString } from "@shared/array" @@ -111,6 +111,11 @@ import { isCommandIncludedInSecretScanning, isSecretFile } from "../../integrati import { FindFilesToEditAgent } from "../../integrations/code-prep/FindFilesToEditAgent" import { buildTreeString } from "@utils/customFs" import { CodeScanner } from "../../integrations/security/code-scan" +import { ExpertManager } from "@core/experts/ExpertManager" +import { GuardResult, LLMMessage } from "@presidio-dev/hai-guardrails" +import { Guardrails } from "../../integrations/guardrails" +import type { Change as DiffChange } from "diff" +import { CorMatrixService } from "@/integrations/cor-matrix" export const cwd = vscode.workspace.workspaceFolders?.map((folder) => folder.uri.fsPath).at(0) ?? path.join(os.homedir(), "Desktop") // may or may not exist but fs checking existence would immediately ask for permission which would be bad UX, need to come up with a better solution @@ -182,11 +187,15 @@ export class Task { // HAI variables expertPrompt?: string + expertName?: string + isDeepCrawlEnabled?: boolean buildContextOptions?: HaiBuildContextOptions private task?: string private apiConfiguration: ApiConfiguration private embeddingConfiguration: EmbeddingConfiguration private filesEditedByAI: Set = new Set([]) + private guardrails: Guardrails + private failedGuards: { guardId: string; guardName: string; messages: Omit[] }[] = [] constructor( context: vscode.ExtensionContext, @@ -206,6 +215,10 @@ export class Task { enableCheckpointsSetting: boolean, customInstructions?: string, expertPrompt?: string, + expertName?: string, + isDeepCrawlEnabled?: boolean, + buildContextOptions?: HaiBuildContextOptions, + task?: string, images?: string[], historyItem?: HistoryItem, @@ -235,8 +248,13 @@ export class Task { // HAI variable initialization this.task = task this.expertPrompt = expertPrompt + this.expertName = expertName + this.isDeepCrawlEnabled = isDeepCrawlEnabled + this.buildContextOptions = buildContextOptions this.apiConfiguration = apiConfiguration this.embeddingConfiguration = embeddingConfiguration + this.guardrails = new Guardrails(this.context) + this.failedGuards = [] // Initialize taskId first if (historyItem) { @@ -1137,6 +1155,9 @@ export class Task { let nextUserContent = userContent let includeFileDetails = true while (!this.abort) { + if (this.failedGuards.length > 0) { + break // Exit the loop + } const didEndLoop = await this.recursivelyMakeClineRequests(nextUserContent, includeFileDetails) includeFileDetails = false // we only need file details the first time @@ -1589,6 +1610,8 @@ export class Task { this.browserSettings, this.buildContextOptions?.systemPromptVersion, this.expertPrompt, + this.isDeepCrawlEnabled, + this.expertName, ) let settingsCustomInstructions = this.customInstructions?.trim() await this.migratePreferredLanguageToolSetting() @@ -1640,6 +1663,64 @@ export class Task { ) systemPrompt += userInstructions } + + const messages = await this.formatClineMessagesForGuardrails() + const result = await this.guardrails.run(messages) + result.messagesWithGuardResult + .filter((guard) => ["secret", "pii"].includes(guard.guardId)) + .flatMap((guard) => guard.messages) + .filter((message) => !message.passed && message.modifiedMessage) + .forEach((msg) => { + const isJson = msg.message.content.startsWith("REQUEST:::") + let msgContent = msg.message.content.toString() + if (isJson) { + msgContent = JSON.stringify({ + request: `${msg.message.content}`.replace(/^(REQUEST:::)+/, ""), + }) + } + const match = this.clineMessages.find((m) => m.text?.toString() === msgContent) + if (match) { + match.text = isJson ? JSON.stringify({ request: msg.modifiedMessage?.content }) : msg.modifiedMessage?.content + } + }) + const guardsToApply = ["leakage", "injection"] + if (Guardrails.DEFAULT_GUARDS_CONFIG.secret?.mode === "block") { + guardsToApply.push("secret") + } + if (Guardrails.DEFAULT_GUARDS_CONFIG.pii?.mode === "block") { + guardsToApply.push("pii") + } + this.failedGuards = result.messagesWithGuardResult.filter( + (guard) => guardsToApply.includes(guard.guardId) && guard.messages.some((msg) => msg.passed === false), + ) + if (this.failedGuards.length > 0) { + this.didRejectTool = false + this.didAlreadyUseTool = false + this.didCompleteReadingStream = false + this.isWaitingForFirstChunk = false + this.didAutomaticallyRetryFailedApiRequest = false + await this.saveClineMessagesAndUpdateHistory() + yield { + type: "text", + text: `${Guardrails.MESSAGE} (${this.failedGuards.map((guard) => guard.guardName).join(", ")})`, + } + return + } + + this.apiConversationHistory + .filter((message) => message.role === "user") + .map(async (m) => { + if (Array.isArray(m.content)) { + m.content.map(async (c) => { + if (c.type === "text") { + c.text = await this.guardrails.applyPiiAndSecretGuards(c.text) + } + }) + } else if (typeof m.content === "string") { + m.content = await this.guardrails.applyPiiAndSecretGuards(m.content) + } + }) + const contextManagementMetadata = await this.contextManager.getNewContextMessagesAndMetadata( this.apiConversationHistory, this.clineMessages, @@ -2049,6 +2130,13 @@ export class Task { this.diffViewProvider.editType = fileExists ? "modify" : "create" } + let fileDiff: + | { + diff: DiffChange[] | undefined + path: string + } + | undefined = undefined + try { // Construct newContent from diff let newContent: string @@ -2254,9 +2342,13 @@ export class Task { // Mark the file as edited by Cline to prevent false "recently modified" warnings this.fileContextTracker.markFileAsEditedByCline(relPath) - const { newProblemsMessage, userEdits, autoFormattingEdits, finalContent } = + const { newProblemsMessage, userEdits, autoFormattingEdits, finalContent, lineDiffs } = await this.diffViewProvider.saveChanges() this.didEditFile = true // used to determine if we should wait for busy terminal to update before sending api request + fileDiff = { + diff: lineDiffs, + path: relPath, + } // Track file edit operation await this.fileContextTracker.trackFileContext(relPath, "cline_edited") @@ -2308,7 +2400,12 @@ export class Task { await this.diffViewProvider.revertChanges() await this.diffViewProvider.reset() await this.saveCheckpoint() + fileDiff = undefined break + } finally { + if (fileDiff) { + CorMatrixService.track(fileDiff) + } } } case "read_file": { @@ -3774,6 +3871,15 @@ export class Task { } catch {} } + if (this.failedGuards && this.failedGuards.length > 0) { + await this.ask("guardrails_filter", "Guardrail triggered: stopping task execution.") + this.failedGuards = [] + await this.saveClineMessagesAndUpdateHistory() + await this.postStateToWebview() + this.consecutiveMistakeCount = 0 + return false + } + if (this.consecutiveMistakeCount >= 3) { if (this.autoApprovalSettings.enabled && this.autoApprovalSettings.enableNotifications) { showSystemNotification({ @@ -4476,6 +4582,8 @@ export class Task { return `[${block.name} for '${block.params.path}']` case "code_security_scan": return `[${block.name}']` + case "custom_expert_context": + return `[${block.name} for '${block.params.query}' in expert '${block.params.expertName}']` default: return "" } @@ -4580,6 +4688,101 @@ export class Task { break } } + case "custom_expert_context": { + const query: string | undefined = block.params.query + const expertName: string | undefined = block.params.expertName + + // Format the path and content to be compatible with ClineSayTool + const sharedMessageProps: ClineSayTool = { + tool: "customExpertContext", + path: expertName ? expertName : "(unknown expert)", + content: "", + } + + try { + if (block.partial) { + // For partial tool content, we just display the initial message + await this.ask("tool", JSON.stringify(sharedMessageProps), block.partial).catch(() => {}) + break + } else { + // Validate required parameters + if (!query) { + this.consecutiveMistakeCount++ + pushToolResult(await this.sayAndCreateMissingParamError("custom_expert_context", "query")) + await this.saveCheckpoint() + break + } + if (!expertName) { + this.consecutiveMistakeCount++ + pushToolResult(await this.sayAndCreateMissingParamError("custom_expert_context", "expertName")) + await this.saveCheckpoint() + break + } + + this.consecutiveMistakeCount = 0 + + // Create ExpertManager instance and search for the query + const expertManager = new ExpertManager(this.context, this.taskId, this.embeddingConfiguration) + const searchResults = await expertManager.search(query, expertName, cwd) + + // Format the complete message with search results + const completeMessage = JSON.stringify({ + tool: "customExpertContext", + path: expertName, + content: searchResults, + operationIsLocatedInWorkspace: true, + } satisfies ClineSayTool) + + // Display results in UI + await this.say("tool", completeMessage, undefined, false) + + // Return results to the model + pushToolResult(searchResults) + await this.saveCheckpoint() + break + } + } catch (error) { + await handleError("searching expert context", error) + await this.saveCheckpoint() + break + } + } } } + + /** + * Formats the conversation history messages for Guardrails. + * This function filters the messages to include only those that are relevant for Guardrails, + * specifically messages of type "say" and "ask". + * It then maps the filtered messages to the format expected by Guardrails. + * The resulting array of messages is returned. + * @function formatClineMessagesForGuardrails + * @returns {LLMMessage[]} - An array of LLMMessage objects formatted for Guardrails. + */ + async formatClineMessagesForGuardrails(): Promise { + return this.clineMessages + .filter((message) => { + const isValidSay = message.type === "say" + const isAsk = message.type === "ask" + if (!isValidSay && !isAsk) { + // Unknown message type, skipping leakage guard + } + return (isValidSay && !!message.text) || isAsk + }) + .map((message) => { + const role = message.type === "say" ? "user" : "system" + const shouldParse = ["api_req_started"].includes(message.say!) + try { + return { + role, + content: shouldParse ? `REQUEST:::${JSON.parse(message.text!).request}` : message.text!, + } + } catch (error) { + return { + role, + content: message.text!, + } + } + }) + } } diff --git a/src/core/webview/customClientProvider.ts b/src/core/webview/customClientProvider.ts index d6687419..8e65700e 100644 --- a/src/core/webview/customClientProvider.ts +++ b/src/core/webview/customClientProvider.ts @@ -1,6 +1,6 @@ export const InvalidWorkspaceErrorMessage = "No workspace open for Code Index" -export const CodeContextStartMessage = "(hAI) Adding Code Context" +export const CodeContextStartMessage = "(HAI) Adding Code Context" export const CodeContextCompletionMessage = "Code Context Addition Done, Moving Ahead with Code Indexing..." -export const CodeIndexStartMessage = "(hAI) Indexing Code" +export const CodeIndexStartMessage = "(HAI) Indexing Code" export const CodeIndexCompletionMessage = "Indexing Code Completed" export const CodeContextErrorMessage = "Error occurred while adding Code Context. Please restoring the current workspace." diff --git a/src/embedding/providers/bedrock.ts b/src/embedding/providers/bedrock.ts index 4a723282..39a34796 100644 --- a/src/embedding/providers/bedrock.ts +++ b/src/embedding/providers/bedrock.ts @@ -1,33 +1,49 @@ import { EmbeddingHandler } from "../" import { BedrockEmbeddings } from "@langchain/aws" import { EmbeddingHandlerOptions } from "../../shared/embeddings" +import { BedrockRuntimeClient } from "@aws-sdk/client-bedrock-runtime" export class AwsBedrockEmbeddingHandler implements EmbeddingHandler { private options: EmbeddingHandlerOptions - private client: BedrockEmbeddings + private client: BedrockEmbeddings | null = null constructor(options: EmbeddingHandlerOptions) { this.options = options + let bedrockRuntimeClient: BedrockRuntimeClient + + if (this.options.awsUseProfile) { + bedrockRuntimeClient = new BedrockRuntimeClient({ + region: this.options.awsRegion, + profile: this.options.awsProfile, + }) + } else { + bedrockRuntimeClient = new BedrockRuntimeClient({ + region: this.options.awsRegion, + credentials: { + accessKeyId: this.options.awsAccessKey!, + secretAccessKey: this.options.awsSecretKey!, + ...(this.options.awsSessionToken ? { sessionToken: this.options.awsSessionToken } : {}), + }, + }) + } + this.client = new BedrockEmbeddings({ - model: this.options.modelId, - region: this.options.awsRegion, - credentials: { - accessKeyId: this.options.awsAccessKey!, - secretAccessKey: this.options.awsSecretKey!, - ...(this.options.awsSessionToken ? { sessionToken: this.options.awsSessionToken } : {}), + client: bedrockRuntimeClient, + onFailedAttempt: (error) => { + console.error("Failed attempt in Bedrock Embeddings:", error) + throw error }, - maxRetries: this.options.maxRetries, }) } getClient() { - return this.client + return this.client! } async validateAPIKey(): Promise { try { - await this.client.embedQuery("Test") + await this.client!.embedQuery("Test") return true } catch (error) { console.error("Error validating Bedrock embedding credentials: ", error) diff --git a/src/integrations/cor-matrix/index.ts b/src/integrations/cor-matrix/index.ts new file mode 100644 index 00000000..22a88356 --- /dev/null +++ b/src/integrations/cor-matrix/index.ts @@ -0,0 +1,43 @@ +import type { Change as DiffChange } from "diff" +import CorMatrix from "@presidio-dev/cor-matrix" +import { HaiConfig } from "@/shared/hai-config" + +type ToolMetrics = { + diff: DiffChange[] | undefined + path: string +} + +export class CorMatrixService { + private static _instance: CorMatrix + static APP_NAME: string = "hai-code-generator" + + private constructor() {} + + private static getInstance(): CorMatrix { + if (!this._instance || (this._instance && !this._instance.isEnabled())) { + const config = HaiConfig.getCorMatrixConfig() + this._instance = new CorMatrix({ + appName: this.APP_NAME, + baseURL: config?.baseURL, + token: config?.token, + workspaceId: config?.workspaceId, + logLevel: "debug", + }) + } + return this._instance + } + + static track(metrics: ToolMetrics): void { + const instance = this.getInstance() + if (metrics.diff && metrics.path) { + for (const change of metrics.diff) { + if (change.added) { + instance.addCodeOriginRecord({ + code: change.value, + path: metrics.path, + }) + } + } + } + } +} diff --git a/src/integrations/editor/DiffViewProvider.ts b/src/integrations/editor/DiffViewProvider.ts index 2e7de525..11c8353c 100644 --- a/src/integrations/editor/DiffViewProvider.ts +++ b/src/integrations/editor/DiffViewProvider.ts @@ -203,6 +203,7 @@ export class DiffViewProvider { userEdits: string | undefined autoFormattingEdits: string | undefined finalContent: string | undefined + lineDiffs: diff.Change[] | undefined }> { if (!this.relPath || !this.newContent || !this.activeDiffEditor) { return { @@ -210,6 +211,7 @@ export class DiffViewProvider { userEdits: undefined, autoFormattingEdits: undefined, finalContent: undefined, + lineDiffs: undefined, } } const absolutePath = path.resolve(this.cwd, this.relPath) @@ -291,6 +293,12 @@ export class DiffViewProvider { userEdits, autoFormattingEdits, finalContent: normalizedPostSaveContent, + lineDiffs: diff.diffLines( + (this.originalContent || "") + .replace(/\r\n|\n/g, newContentEOL) // align EOLs + .trimEnd() + newContentEOL, + normalizedPostSaveContent, + ), } } diff --git a/src/integrations/guardrails/index.ts b/src/integrations/guardrails/index.ts new file mode 100644 index 00000000..a661f7c0 --- /dev/null +++ b/src/integrations/guardrails/index.ts @@ -0,0 +1,239 @@ +import { + GuardrailsEngine, + injectionGuard, + leakageGuard, + MessageType, + Pattern, + piiGuard, + secretGuard, + SelectionType, +} from "@presidio-dev/hai-guardrails" +import * as vscode from "vscode" +import { customGetState, customUpdateState } from "../../core/storage/state" + +// Define a type for our guardrails configuration +export interface GuardrailsConfig { + injection: { + name: string + threshold: number + mode: string + } + pii: { + name: string + selection: SelectionType + mode: string + } + secret: { + name: string + selection: SelectionType + mode: string + } + leakage: { + name: string + roles: MessageType[] + threshold: number + mode: string + } +} + +export const Default_GuardsConfig = { + injection: { + name: "Prompt Injection", + threshold: 1, + mode: "heuristic", + }, + pii: { + name: "PII", + selection: SelectionType.All, + mode: "redact", + }, + secret: { + name: "Secrets", + selection: SelectionType.All, + mode: "block", + }, + leakage: { + name: "Prompt Leakage", + mode: "heuristic", + roles: ["user"], + threshold: 1, + }, +} + +export class Guardrails extends GuardrailsEngine { + public static MESSAGE = "Message blocked by HAI Guardrails filter." + private context: vscode.ExtensionContext + + // Default configuration that will be used if no config is found in state + public static DEFAULT_GUARDS_CONFIG: GuardrailsConfig = { + injection: { + name: "Prompt Injection", + threshold: 1, + mode: "heuristic", + }, + pii: { + name: "PII", + selection: SelectionType.All, + mode: "redact", + }, + secret: { + name: "Secrets", + selection: SelectionType.All, + mode: "block", + }, + leakage: { + name: "Prompt Leakage", + mode: "heuristic", + roles: ["user"], + threshold: 1, + }, + } + + constructor(context: vscode.ExtensionContext) { + // Initially create with default guards, will be updated after loading config + const guards = Guardrails.createGuards(Guardrails.DEFAULT_GUARDS_CONFIG) + super({ guards }) + + this.context = context + + // Load configuration asynchronously + this.loadGuardsConfig().then(() => { + // Update guards after loading config + this.updateGuards() + }) + } + + private async loadGuardsConfig(): Promise { + const storedConfig = (await customGetState(this.context, "guardrailsConfig")) as GuardrailsConfig | undefined + if (storedConfig) { + Guardrails.DEFAULT_GUARDS_CONFIG = storedConfig + } else { + // If no config exists in storage, save the default one + await this.saveGuardsConfig() + } + } + + private async saveGuardsConfig(): Promise { + await customUpdateState(this.context, "guardrailsConfig", Guardrails.DEFAULT_GUARDS_CONFIG) + } + + private updateGuards(): void { + Guardrails.createGuards(Guardrails.DEFAULT_GUARDS_CONFIG) + } + + private static createGuards(config: GuardrailsConfig) { + return [ + injectionGuard({ roles: ["user"] }, { mode: "heuristic", threshold: config.injection.threshold }), + piiGuard({ + selection: config.pii.selection, + mode: config.pii.mode as "redact" | "block", + patterns: [ + { + id: "Email-pattern", + name: "Email pattern", + description: "Redact email in all combinations of special characters", + regex: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g, + replacement: "####", + }, + ], + }), + secretGuard({ + selection: config.secret.selection, + mode: config.secret.mode as "redact" | "block", + }), + leakageGuard({ roles: config.leakage.roles }, { mode: "heuristic", threshold: config.leakage.threshold }), + ] + } + + get activeGuards() { + const guards = Object.keys(Guardrails.DEFAULT_GUARDS_CONFIG).map((key) => { + const config = Guardrails.DEFAULT_GUARDS_CONFIG[key as keyof GuardrailsConfig] + const hasThreshold = "threshold" in config + const hasMode = "mode" in config + return { + key: key, + name: config.name, + hasThreshold: hasThreshold, + threshold: hasThreshold ? (config as any).threshold : undefined, + mode: hasMode ? (config as any).mode : undefined, + } + }) + + guards.sort((a, b) => { + return a.hasThreshold === b.hasThreshold ? 0 : a.hasThreshold ? -1 : 1 + }) + + return guards + } + + public async updateGuard( + guardUpdates: Array<{ + guardKey: keyof GuardrailsConfig + updates: { threshold?: number; mode?: string } + }>, + ): Promise { + let hasAnyUpdates = 0 + const allChanges: string[] = [] + + for (const { guardKey, updates } of guardUpdates) { + const guard = Guardrails.DEFAULT_GUARDS_CONFIG[guardKey] + if (!guard) { + console.error(`Guard ${guardKey} not found.`) + continue + } + + const changes: string[] = [] + + // Update threshold if provided and guard supports it + if (updates.threshold !== undefined && "threshold" in guard) { + guard.threshold = updates.threshold + console.log(`Updating ${guardKey} threshold to ${guard.threshold}`) + hasAnyUpdates++ + changes.push(`threshold to ${updates.threshold}`) + } + + // Update mode if provided and guard supports it + if (updates.mode !== undefined && "mode" in guard) { + guard.mode = updates.mode + hasAnyUpdates++ + changes.push(`mode to ${updates.mode}`) + } + + allChanges.push(`${guardKey}: ${changes.join(", ")}`) + } + + if (hasAnyUpdates > 0) { + await this.saveGuardsConfig() + this.updateGuards() + console.log(`Updated guards: ${allChanges.join(" | ")}`) + } else { + console.warn(`No valid updates provided for any guards`) + } + } + + public async applyPiiAndSecretGuards(content: string): Promise { + let engine = new GuardrailsEngine({ + guards: [ + secretGuard({ + selection: Guardrails.DEFAULT_GUARDS_CONFIG.secret.selection, + mode: Guardrails.DEFAULT_GUARDS_CONFIG.secret.mode as "redact" | "block", + }), + piiGuard({ + selection: Guardrails.DEFAULT_GUARDS_CONFIG.pii.selection, + mode: Guardrails.DEFAULT_GUARDS_CONFIG.pii.mode as "redact" | "block", + patterns: [ + { + id: "Email-pattern", + name: "Email pattern", + description: "Redact email in all combinations of special characters", + regex: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/g, + replacement: "####", + }, + ], + }), + ], + }) + let results = await engine.run([{ role: "user", content }]) + return results.messages[0].content + } +} diff --git a/src/integrations/inline-editing/index.ts b/src/integrations/inline-editing/index.ts index 003926d8..38d5c080 100644 --- a/src/integrations/inline-editing/index.ts +++ b/src/integrations/inline-editing/index.ts @@ -44,7 +44,7 @@ export class InlineEditingProvider { provideCodeActions(document, range) { const editor = vscode.window.activeTextEditor if (editor && editor.document === document && !range.isEmpty) { - const action = new vscode.CodeAction("Edit with hAI", vscode.CodeActionKind.RefactorRewrite) + const action = new vscode.CodeAction("Edit with HAI", vscode.CodeActionKind.RefactorRewrite) action.command = { command: "hai.editSelectedWithAI", title: "Edit with HAI", @@ -160,7 +160,7 @@ export class InlineEditingProvider { this.activeCodeLensProvider?.dispose() this.isEditing = true - const systemPrompt = `You are hAI, an AI coding assistant. You are an AI programming assistant who is an expert in adding new code by following instructions. + const systemPrompt = `You are HAI, an AI coding assistant. You are an AI programming assistant who is an expert in adding new code by following instructions. - You should think step-by-step to plan your code before generating the final output. - You should ensure your code matches the indentation and whitespace of the preceding code in the users' file @@ -354,7 +354,7 @@ export class InlineEditingProvider { ) { return [ new vscode.CodeLens(editor.selection, { - title: "⌥⇧K Edit with hAI", + title: "⌥⇧K Edit with HAI", command: "hai.editSelectedWithAI", tooltip: "Edit selected code with HAI", }), diff --git a/src/integrations/workspace/HaiFileSystemWatcher.ts b/src/integrations/workspace/HaiFileSystemWatcher.ts index d30da781..47d2081b 100644 --- a/src/integrations/workspace/HaiFileSystemWatcher.ts +++ b/src/integrations/workspace/HaiFileSystemWatcher.ts @@ -28,7 +28,13 @@ class HaiFileSystemWatcher { this.ig.add( content .split("\n") - .filter((line) => line.trim() && !line.startsWith("#") && !line.includes(GlobalFileNames.haiConfig)), + .filter( + (line) => + line.trim() && + !line.startsWith("#") && + !line.includes(GlobalFileNames.haiConfig) && + !line.includes(GlobalFileNames.experts), + ), ) } catch (error) { console.log("HaiFileSystemWatcher No .gitignore found, using default exclusions.") diff --git a/src/services/browser/UrlContentFetcher.ts b/src/services/browser/UrlContentFetcher.ts index ef23a04f..1e2bc341 100644 --- a/src/services/browser/UrlContentFetcher.ts +++ b/src/services/browser/UrlContentFetcher.ts @@ -8,6 +8,22 @@ import TurndownService from "turndown" import PCR from "puppeteer-chromium-resolver" import { fileExistsAtPath } from "@utils/fs" +export interface CrawlOptions { + maxDepth: number + maxPages?: number + timeout?: number + urlFilter?: (url: string) => boolean + onPageCrawlComplete?: (data: CrawlResult) => Promise +} + +export interface CrawlResult { + url: string + content: string + depth: number + links: string[] + parentUrl?: string +} + interface PCRStats { puppeteer: { launch: typeof launch } executablePath: string @@ -61,8 +77,85 @@ export class UrlContentFetcher { this.page = undefined } + private async extractLinks(page: Page): Promise { + return await page.evaluate(() => { + const links = Array.from(document.querySelectorAll("a[href]")) + return links.map((link) => link.getAttribute("href")).filter((href): href is string => href !== null) + }) + } + + private normalizeUrl(url: string, baseUrl: string): string { + try { + return new URL(url, baseUrl).href + } catch { + return "" + } + } + + async deepCrawl(startUrl: string, options: CrawlOptions): Promise { + if (!this.browser || !this.page) { + throw new Error("Browser not initialized") + } + + const results: CrawlResult[] = [] + const visited = new Set() + const queue: Array<{ url: string; depth: number; parentUrl?: string }> = [{ url: startUrl, depth: 0 }] + const maxPages = options.maxPages || 20 + + while (queue.length > 0 && results.length < maxPages) { + const current = queue.shift() + if (!current) { + continue + } + + const { url, depth, parentUrl } = current + + // Skip if URL already visited or exceeds max depth + if (visited.has(url) || depth > options.maxDepth) { + continue + } + + try { + // Extract markdown and sub-links + const markdown = await this.urlToMarkdown(url, options.timeout || 10_0000) + const links = await this.extractLinks(this.page) + const result = { + url, + content: markdown, + depth, + links, + parentUrl, + } + + results.push(result) + visited.add(url) + + if (options.onPageCrawlComplete) { + await options.onPageCrawlComplete(result) + } + + // Add new URLs to queue if not at max depth + if (depth < options.maxDepth) { + const newUrls = result.links + .map((link) => this.normalizeUrl(link, url)) + .filter((link) => link && !visited.has(link)) + .filter((link) => !options.urlFilter || options.urlFilter(link)) + + for (const newUrl of newUrls) { + queue.push({ url: newUrl, depth: depth + 1, parentUrl: url }) + } + } + } catch (error) { + console.error(`Error processing ${url}:`, error) + continue + } + } + + return results + } + // must make sure to call launchBrowser before and closeBrowser after using this - async urlToMarkdown(url: string): Promise { + async urlToMarkdown(url: string, timeout: number = 10_000): Promise { if (!this.browser || !this.page) { throw new Error("Browser not initialized") } @@ -72,7 +165,7 @@ export class UrlContentFetcher { this should be sufficient for most doc sites */ await this.page.goto(url, { - timeout: 10_000, + timeout, waitUntil: ["domcontentloaded", "networkidle2"], }) const content = await this.page.content() diff --git a/src/shared/ExtensionMessage.ts b/src/shared/ExtensionMessage.ts index 7b67d273..a341b29e 100644 --- a/src/shared/ExtensionMessage.ts +++ b/src/shared/ExtensionMessage.ts @@ -14,6 +14,7 @@ import { ClineRulesToggles } from "./cline-rules" import { HaiBuildContextOptions, HaiBuildIndexProgress } from "./customApi" import { IHaiStory } from "./hai-task" import { EmbeddingConfiguration } from "./embeddings" +import { ExpertData } from "./experts" // webview will hold state export interface ExtensionMessage { @@ -58,6 +59,7 @@ export interface ExtensionMessage { | "defaultExpertsLoaded" | "expertPrompt" | "writeTaskStatus" + | "defaultGuards" text?: string action?: | "chatButtonClicked" @@ -131,6 +133,7 @@ export interface ExtensionMessage { haiTaskData?: { folder: string; tasks: IHaiStory[]; ts: string } haiConfig?: {} experts?: any[] // Expert data array + selectedExpert?: ExpertData | null // Selected expert documentLinks?: any[] // Document links with status expertName?: string // Expert name for document links status writeTaskStatusResult?: { @@ -138,6 +141,7 @@ export interface ExtensionMessage { message: string status: string } + guards?: Guard[] } export type Invoke = "sendMessage" | "primaryButtonClick" | "secondaryButtonClick" @@ -187,6 +191,7 @@ export interface ExtensionState { expertPrompt?: string vscodeWorkspacePath?: string enableInlineEdit?: boolean + guards?: Guard[] } export interface ClineMessage { @@ -222,6 +227,8 @@ export type ClineAsk = | "new_task" | "condense" | "report_bug" + | "guardrails" + | "guardrails_filter" export type ClineSay = | "task" @@ -251,6 +258,9 @@ export type ClineSay = | "load_mcp_documentation" | "info" // Added for general informational messages like retry status + // TAG:HAI + | "guardrails_filter" + export interface ClineSayTool { tool: | "editedExistingFile" @@ -264,6 +274,7 @@ export interface ClineSayTool { // TAG:HAI | "findRelevantFiles" | "codeSecurityScan" + | "customExpertContext" path?: string diff?: string content?: string @@ -330,6 +341,13 @@ export interface ClineApiReqInfo { } } -export type ClineApiReqCancelReason = "streaming_failed" | "user_cancelled" | "retries_exhausted" +export type ClineApiReqCancelReason = "streaming_failed" | "user_cancelled" | "guardrails_interrupted" | "retries_exhausted" export const COMPLETION_RESULT_CHANGES_FLAG = "HAS_CHANGES" +export interface Guard { + key: string + name: string + hasThreshold: boolean + threshold?: number + mode?: string +} diff --git a/src/shared/WebviewMessage.ts b/src/shared/WebviewMessage.ts index 991627f8..09a433f1 100644 --- a/src/shared/WebviewMessage.ts +++ b/src/shared/WebviewMessage.ts @@ -5,6 +5,7 @@ import { UserInfo } from "./UserInfo" import { ChatContent } from "./ChatContent" import { TelemetrySetting } from "./TelemetrySetting" import { McpViewTab } from "./mcp" +import { Guard } from "./ExtensionMessage" // TAG:HAI import { HaiBuildContextOptions } from "./customApi" @@ -41,11 +42,16 @@ export interface WebviewMessage { | "toggleWorkflow" // TAG:HAI - | "expertPrompt" + | "selectExpert" + | "viewExpertPrompt" | "saveExpert" | "deleteExpert" | "loadExperts" | "loadDefaultExperts" + | "loadGuards" + | "updateGuardThreshold" + | "updateGuardMode" + | "updateGuards" | "refreshDocumentLink" | "deleteDocumentLink" | "addDocumentLink" @@ -62,6 +68,7 @@ export interface WebviewMessage { | "startIndex" | "resetIndex" | "writeTaskStatus" + | "defaultGuards" text?: string disabled?: boolean @@ -127,6 +134,9 @@ export interface WebviewMessage { buildContextOptions?: HaiBuildContextOptions embeddingConfiguration?: EmbeddingConfiguration toast?: { message: string; toastType: "error" | "warning" | "info" } + isDeepCrawlEnabled?: boolean + guard?: Guard + guards?: Guard[] } export type ClineAskResponse = "yesButtonClicked" | "noButtonClicked" | "messageResponse" diff --git a/src/shared/embeddings.ts b/src/shared/embeddings.ts index c7f9653c..673e8321 100644 --- a/src/shared/embeddings.ts +++ b/src/shared/embeddings.ts @@ -19,6 +19,8 @@ export interface EmbeddingHandlerOptions { maxRetries?: number ollamaBaseUrl?: string ollamaModelId?: string + awsProfile?: string + awsUseProfile?: boolean } export type EmbeddingConfiguration = EmbeddingHandlerOptions & { diff --git a/src/shared/experts.ts b/src/shared/experts.ts index 5347e476..ab8a4576 100644 --- a/src/shared/experts.ts +++ b/src/shared/experts.ts @@ -30,6 +30,11 @@ export const ExpertDataSchema = z.object({ iconPath: z.string().trim().optional(), iconComponent: z.unknown().optional(), documentLinks: z.array(DocumentLinkSchema).optional(), + deepCrawl: z.boolean().optional(), + maxDepth: z.number().min(1).optional(), + maxPages: z.number().min(1).optional(), + crawlTimeout: z.number().min(1).optional(), + status: z.nativeEnum(DocumentStatus).optional(), }) export type ExpertDataType = z.infer @@ -44,4 +49,9 @@ export interface ExpertData { iconPath?: string iconComponent?: any documentLinks?: DocumentLink[] + deepCrawl?: boolean + maxDepth?: number + maxPages?: number + crawlTimeout?: number + status?: DocumentStatus } diff --git a/src/shared/hai-config.ts b/src/shared/hai-config.ts index 1a22bdc2..9c601943 100644 --- a/src/shared/hai-config.ts +++ b/src/shared/hai-config.ts @@ -19,6 +19,13 @@ export const haiConfigSchema = z.object({ apiKey: z.string().trim().optional(), }) .optional(), + cormatrix: z + .object({ + baseURL: z.string().trim().optional(), + token: z.string().trim().optional(), + workspaceId: z.string().trim().optional(), + }) + .optional(), }) export class HaiConfig { @@ -83,4 +90,9 @@ export class HaiConfig { const config = HaiConfig.getConfig(workspacePath) return config?.langfuse } + + static getCorMatrixConfig(workspacePath?: string) { + const config = HaiConfig.getConfig(workspacePath) + return config?.cormatrix + } } diff --git a/src/shared/validate.ts b/src/shared/validate.ts index 8f44f4e7..dd42d9f3 100644 --- a/src/shared/validate.ts +++ b/src/shared/validate.ts @@ -128,8 +128,14 @@ export function validateEmbeddingConfiguration(config?: EmbeddingConfiguration): } break case "bedrock": - if (!config.awsRegion || !config.awsAccessKey || !config.awsSecretKey) { - return "You must provide a valid Access Key, Secret Key and Region to use AWS Bedrock." + if (config.awsUseProfile) { + if (!config.awsRegion) { + return "You must provide a valid Region to use AWS Bedrock with profile." + } + } else { + if (!config.awsRegion || !config.awsAccessKey || !config.awsSecretKey) { + return "You must provide a valid Access Key, Secret Key and Region to use AWS Bedrock." + } } break case "openai": diff --git a/webview-ui/src/components/chat/ChatRowExtra.tsx b/webview-ui/src/components/chat/ChatRowExtra.tsx index d6098a62..ce447ac9 100644 --- a/webview-ui/src/components/chat/ChatRowExtra.tsx +++ b/webview-ui/src/components/chat/ChatRowExtra.tsx @@ -70,6 +70,28 @@ const ChatRowExtra: React.FC = ({ headerStyle, toolIcon, mess /> ) + case "customExpertContext": + return ( + <> +
+ {toolIcon("book")} + + {message.type === "ask" ? ( + <>HAI is querying the "{tool.path}" expert: + ) : ( + <>Expert context from "{tool.path}": + )} + +
+ + + ) default: return null } diff --git a/webview-ui/src/components/chat/ChatTextArea.tsx b/webview-ui/src/components/chat/ChatTextArea.tsx index bed0c8eb..a3e1734a 100644 --- a/webview-ui/src/components/chat/ChatTextArea.tsx +++ b/webview-ui/src/components/chat/ChatTextArea.tsx @@ -1,7 +1,7 @@ import { VSCodeButton } from "@vscode/webview-ui-toolkit/react" import React, { forwardRef, useCallback, useEffect, useLayoutEffect, useMemo, useRef, useState } from "react" import DynamicTextArea from "react-textarea-autosize" -import { useClickAway, useEvent, useWindowSize } from "react-use" +import { useClickAway, useDeepCompareEffect, useEvent, useWindowSize } from "react-use" import styled from "styled-components" import { mentionRegex, mentionRegexGlobal } from "@shared/context-mentions" import { ExtensionMessage } from "@shared/ExtensionMessage" @@ -42,7 +42,7 @@ import ClineRulesToggleModal from "../cline-rules/ClineRulesToggleModal" import { PlanActMode } from "@shared/proto/state" // TAG:HAI -import { ExpertData } from "@shared/experts.ts" +import { DocumentStatus, ExpertData } from "@shared/experts.ts" const getImageDimensions = (dataUrl: string): Promise<{ width: number; height: number }> => { return new Promise((resolve, reject) => { @@ -350,19 +350,25 @@ const ExpertsList = styled.div` gap: 8px; ` -const ExpertItem = styled.div<{ isSelected?: boolean }>` +const ExpertItem = styled.div<{ isSelected?: boolean; isDisabled?: boolean }>` padding: 6px 8px; font-size: 12px; - cursor: pointer; + cursor: ${(props) => (props.isDisabled ? "not-allowed" : "pointer")}; border-radius: 3px; background-color: ${(props) => (props.isSelected ? "var(--vscode-quickInputList-focusBackground)" : "transparent")}; - color: ${(props) => (props.isSelected ? "var(--vscode-quickInputList-focusForeground)" : "var(--vscode-foreground)")}; + color: ${(props) => + props.isDisabled + ? "var(--vscode-disabledForeground)" + : props.isSelected + ? "var(--vscode-quickInputList-focusForeground)" + : "var(--vscode-foreground)"}; display: flex; align-items: center; - justify-content: space-between; // Add this to push items to edges + justify-content: space-between; + opacity: ${(props) => (props.isDisabled ? 0.6 : 1)}; &:hover { - background-color: var(--vscode-list-hoverBackground); + background-color: ${(props) => (props.isDisabled ? "transparent" : "var(--vscode-list-hoverBackground)")}; } ` @@ -479,35 +485,73 @@ const ChatTextArea = forwardRef( } }, [selectedType, searchQuery]) - const handleMessage = useCallback((event: MessageEvent) => { - const message: ExtensionMessage = event.data - switch (message.type) { - case "fileSearchResults": { - // Only update results if they match the current query or if there's no mentionsRequestId - better UX - if (!message.mentionsRequestId || message.mentionsRequestId === currentSearchQueryRef.current) { - setFileSearchResults(message.results || []) - setSearchLoading(false) + useDeepCompareEffect(() => { + const messageHandler = (event: MessageEvent) => { + const message: ExtensionMessage = event.data + + switch (message.type) { + case "fileSearchResults": { + // Only update results if they match the current query or if there's no mentionsRequestId - better UX + if (!message.mentionsRequestId || message.mentionsRequestId === currentSearchQueryRef.current) { + setFileSearchResults(message.results || []) + setSearchLoading(false) + } + break } - break - } - // TAG:HAI - case "expertsUpdated": { - if (message.experts) { - setCustomExperts(message.experts) + // TAG:HAI + case "expertsUpdated": { + if (message.experts) { + setCustomExperts(message.experts) + + // Check if currently selected expert still exists + if (selectedExpert && !message.experts.find((expert) => expert.name === selectedExpert.name)) { + setSelectedExpert(null) + } + + // Set selected expert if provided + if (message.selectedExpert) { + setSelectedExpert(message.selectedExpert) + } + } + break } - break - } - case "defaultExpertsLoaded": { - if (message.experts) { - setDefaultExperts(message.experts) + case "defaultExpertsLoaded": { + if (message.experts) { + setDefaultExperts(message.experts) + + // Check if currently selected expert still exists in default experts + if ( + selectedExpert && + selectedExpert.isDefault && + !message.experts.find((expert) => expert.name === selectedExpert.name) + ) { + setSelectedExpert(null) + } + + // Set selected expert if provided + if (message.selectedExpert) { + setSelectedExpert(message.selectedExpert) + } + } + break } - break + + default: + console.warn(`Unhandled message type: ${message.type}`) } } - }, []) - useEvent("message", handleMessage) + window.addEventListener("message", messageHandler) + + // Load experts data on component mount + vscode.postMessage({ type: "loadExperts" }) + vscode.postMessage({ type: "loadDefaultExperts" }) + + return () => { + window.removeEventListener("message", messageHandler) + } + }, [customExperts, defaultExperts]) const queryItems = useMemo(() => { return [ @@ -1225,17 +1269,14 @@ const ChatTextArea = forwardRef( // Send the selected expert's prompt to the extension vscode.postMessage({ - type: "expertPrompt", + type: "selectExpert", text: expert?.name || "", prompt: expert?.prompt || "", - category: "selectExpert", + isDeepCrawlEnabled: expert?.deepCrawl || false, }) }, []) const handleExpertsButtonClick = useCallback(() => { - // Request custom experts from the extension - vscode.postMessage({ type: "loadExperts" }) - vscode.postMessage({ type: "loadDefaultExperts" }) setShowExpertsSelector(!showExpertsSelector) }, [showExpertsSelector]) @@ -1881,7 +1922,7 @@ const ChatTextArea = forwardRef( ) : ( )} @@ -1916,14 +1957,27 @@ const ChatTextArea = forwardRef( ))} {/* Custom experts without icons */} - {customExperts.map((expert) => ( - handleExpertSelect(expert)}> - {expert.name} - - ))} + {customExperts.map((expert) => { + const isProcessing = expert.status === DocumentStatus.PROCESSING + return ( + handleExpertSelect(expert)}> + {expert.name} + {isProcessing && ( + + Processing + + )} + + ) + })} )} diff --git a/webview-ui/src/components/chat/ChatView.tsx b/webview-ui/src/components/chat/ChatView.tsx index 93fcad9f..5a629e65 100644 --- a/webview-ui/src/components/chat/ChatView.tsx +++ b/webview-ui/src/components/chat/ChatView.tsx @@ -396,6 +396,13 @@ const ChatView = ({ setPrimaryButtonText("Report GitHub issue") setSecondaryButtonText(undefined) break + case "guardrails_filter": + setSendingDisabled(false) + setClineAsk("followup") + setEnableButtons(false) + setPrimaryButtonText(undefined) + setSecondaryButtonText(undefined) + break } break case "say": @@ -411,6 +418,13 @@ const ChatView = ({ setEnableButtons(false) } break + case "guardrails_filter": + setSendingDisabled(false) + setClineAsk("followup") + setEnableButtons(false) + setPrimaryButtonText(undefined) + setSecondaryButtonText(undefined) + break case "task": case "error": case "api_req_finished": @@ -492,7 +506,6 @@ const ChatView = ({ } if (hasContent) { - console.log("[ChatView] handleSendMessage - Sending message:", messageToSend) if (messages.length === 0) { await TaskServiceClient.newTask({ text: messageToSend, images }) } else if (clineAsk) { @@ -799,6 +812,8 @@ const ChatView = ({ break case "mcp_server_request_started": return false + case "guardrails_filter": + return false } return true }) diff --git a/webview-ui/src/components/chat/TaskTimeline.tsx b/webview-ui/src/components/chat/TaskTimeline.tsx index aa046af1..5638f5a3 100644 --- a/webview-ui/src/components/chat/TaskTimeline.tsx +++ b/webview-ui/src/components/chat/TaskTimeline.tsx @@ -53,6 +53,8 @@ const getBlockColor = (message: ClineMessage): string => { return COLOR_PURPLE // Purple for browser actions case "completion_result": return COLOR_GREEN // Green for task success + case "guardrails_filter": + return COLOR_RED // Red for guardrails filter default: return COLOR_DARK_GRAY // Dark gray for unknown } @@ -87,6 +89,8 @@ const getBlockColor = (message: ClineMessage): string => { return COLOR_PURPLE // Red for command approvals (same as terminal commands) case "browser_action_launch": return COLOR_PURPLE // Purple for browser launch approvals (same as browser actions) + case "guardrails_filter": + return COLOR_RED // Red for guardrails filter default: return COLOR_DARK_GRAY // Dark gray for unknown } diff --git a/webview-ui/src/components/experts/ExpertsView.tsx b/webview-ui/src/components/experts/ExpertsView.tsx index 40469cf1..4c83138b 100644 --- a/webview-ui/src/components/experts/ExpertsView.tsx +++ b/webview-ui/src/components/experts/ExpertsView.tsx @@ -1,6 +1,12 @@ import React, { useState, useEffect, memo, useMemo } from "react" import styled from "styled-components" -import { VSCodeButton, VSCodeTextField, VSCodeTextArea, VSCodeProgressRing } from "@vscode/webview-ui-toolkit/react" +import { + VSCodeButton, + VSCodeTextField, + VSCodeTextArea, + VSCodeProgressRing, + VSCodeCheckbox, +} from "@vscode/webview-ui-toolkit/react" import { vscode } from "../../utils/vscode" import { DocumentLink, DocumentStatus, ExpertData } from "../../../../src/shared/experts" import { useExtensionState } from "../../context/ExtensionStateContext" @@ -34,8 +40,12 @@ const ExpertsView: React.FC = ({ onDone }) => { expertName: string linkUrl: string } | null>(null) - - const { vscodeWorkspacePath } = useExtensionState() + const [deepCrawl, setDeepCrawl] = useState(false) + const [maxDepth, setMaxDepth] = useState(10) + const [maxPages, setMaxPages] = useState(20) + const [crawlTimeout, setCrawlTimeout] = useState(10_0000) + const [isEmbeddingValid, setIsEmbeddingValid] = useState(null) + const { vscodeWorkspacePath, embeddingConfiguration } = useExtensionState() const fileInputRef = React.useRef(null) const allExperts = useMemo(() => [...defaultExperts, ...customExperts], [defaultExperts, customExperts]) @@ -56,6 +66,9 @@ const ExpertsView: React.FC = ({ onDone }) => { setCustomExperts(message.experts) } break + case "embeddingConfigValidation": + setIsEmbeddingValid(!!message.bool) + break default: console.warn(`Unhandled message type: ${message.type}`) @@ -64,6 +77,7 @@ const ExpertsView: React.FC = ({ onDone }) => { window.addEventListener("message", messageHandler) vscode.postMessage({ type: "loadDefaultExperts" }) vscode.postMessage({ type: "loadExperts" }) + vscode.postMessage({ type: "validateEmbeddingConfig", embeddingConfiguration }) return () => { window.removeEventListener("message", messageHandler) } @@ -101,6 +115,10 @@ const ExpertsView: React.FC = ({ onDone }) => { setSelectedExpert(null) setIsFormReadOnly(false) setNameError(null) + setDeepCrawl(false) + setMaxDepth(10) + setMaxPages(20) + setCrawlTimeout(10_0000) } const handleSelectExpert = (expert: ExpertData) => { @@ -128,6 +146,15 @@ const ExpertsView: React.FC = ({ onDone }) => { }) return } + + if (deepCrawl && documentLinks.length === 0) { + vscode.postMessage({ + type: "showToast", + toast: { message: "At least one document link is required when DeepCrawl is enabled", toastType: "error" }, + }) + return + } + const expertExists = allExperts.some((expert) => expert.name.toLowerCase() === newExpertName.toLowerCase()) if (expertExists) { setNameError("An expert with this name already exists") @@ -143,6 +170,10 @@ const ExpertsView: React.FC = ({ onDone }) => { isDefault: false, createdAt: new Date().toISOString(), documentLinks: documentLinks.length > 0 ? documentLinks : undefined, + deepCrawl: deepCrawl, + maxDepth: deepCrawl ? maxDepth : undefined, + maxPages: deepCrawl ? maxPages : undefined, + crawlTimeout: deepCrawl ? crawlTimeout : undefined, } vscode.postMessage({ type: "saveExpert", @@ -189,11 +220,11 @@ const ExpertsView: React.FC = ({ onDone }) => { const expertToOpen = allExperts.find((expert) => expert.name === expertName) if (expertToOpen) { vscode.postMessage({ - type: "expertPrompt", + type: "viewExpertPrompt", text: expertName.trim(), - category: "viewExpert", isDefault: expertToOpen.isDefault, prompt: expertToOpen.isDefault ? expertToOpen.prompt : undefined, + isDeepCrawlEnabled: expertToOpen.deepCrawl, }) } } @@ -313,11 +344,14 @@ const ExpertsView: React.FC = ({ onDone }) => { - {link.status.toLowerCase() === "completed" ? ( + {link.status.toLowerCase() === + DocumentStatus.COMPLETED ? ( - ) : link.status.toLowerCase() === "failed" ? ( + ) : link.status.toLowerCase() === + DocumentStatus.FAILED ? ( - ) : link.status.toLowerCase() === "processing" ? ( + ) : link.status.toLowerCase() === + DocumentStatus.PROCESSING ? (
= ({ onDone }) => {
)} + + setDeepCrawl((e.target as HTMLInputElement).checked)} + disabled={isFormReadOnly || !isEmbeddingValid}> + DeepCrawl + +

+ Enabling deep crawl can explore websites beyond a single page by following internal links. +

+
+ {deepCrawl && ( + <> + + + setMaxDepth(parseInt((e.target as HTMLInputElement).value) || 10)} + placeholder="10" + disabled={isFormReadOnly || !isEmbeddingValid} + style={{ width: "100%" }} + /> +

Sets the maximum link depth for the crawl.

+
+ + + setMaxPages(parseInt((e.target as HTMLInputElement).value) || 20)} + placeholder="10" + disabled={isFormReadOnly || !isEmbeddingValid} + style={{ width: "100%" }} + /> +

Sets the maximum number of unique pages to crawl.

+
+ + + + setCrawlTimeout(parseInt((e.target as HTMLInputElement).value) || 10_0000) + } + placeholder="10" + disabled={isFormReadOnly || !isEmbeddingValid} + style={{ width: "100%" }} + /> +

Sets the crawl timeout for each page.

+
+ + )} + {!isEmbeddingValid && ( +
+ + Valid embedding configuration required for deep crawling +
+ )} {!isFormReadOnly && ( diff --git a/webview-ui/src/components/settings/EmbeddingOptions.tsx b/webview-ui/src/components/settings/EmbeddingOptions.tsx index bb481458..a51ae296 100644 --- a/webview-ui/src/components/settings/EmbeddingOptions.tsx +++ b/webview-ui/src/components/settings/EmbeddingOptions.tsx @@ -1,4 +1,12 @@ -import { VSCodeCheckbox, VSCodeDropdown, VSCodeLink, VSCodeOption, VSCodeTextField } from "@vscode/webview-ui-toolkit/react" +import { + VSCodeCheckbox, + VSCodeDropdown, + VSCodeLink, + VSCodeOption, + VSCodeRadioGroup, + VSCodeTextField, + VSCodeRadio, +} from "@vscode/webview-ui-toolkit/react" import { memo, useEffect, useMemo, useState, useCallback } from "react" import { EmbeddingConfiguration, @@ -52,6 +60,8 @@ const EmbeddingOptions = ({ showModelOptions, showModelError = true, onValid }: awsSecretKey: apiConfiguration.awsSecretKey, awsSessionToken: apiConfiguration.awsSessionToken, awsRegion: apiConfiguration.awsRegion, + awsProfile: apiConfiguration.awsProfile, + awsUseProfile: apiConfiguration.awsUseProfile, }) } else if (apiConfiguration.apiProvider === "openai") { setEmbeddingConfiguration({ @@ -72,6 +82,10 @@ const EmbeddingOptions = ({ showModelOptions, showModelError = true, onValid }: } const newEmbeddingConfiguration = { ...embeddingConfiguration, [field]: event.target.value } + if (field === "awsProfile") { + // Make sure to preserve the awsUseProfile setting + newEmbeddingConfiguration.awsUseProfile = embeddingConfiguration?.awsUseProfile || false + } setEmbeddingConfiguration(newEmbeddingConfiguration) } @@ -209,34 +223,72 @@ const EmbeddingOptions = ({ showModelOptions, showModelError = true, onValid }: {selectedProvider === "bedrock" && (
- - - AWS Access Key * - - - - - AWS Secret Key * - - - - AWS Session Token - + { + const value = (e.target as HTMLInputElement)?.value + const useProfile = value === "profile" + setEmbeddingConfiguration({ + ...embeddingConfiguration, + awsUseProfile: useProfile, + }) + }}> + AWS Credentials + AWS Profile + + {embeddingConfiguration?.awsUseProfile ? ( + <> + + AWS Profile Name + + + ) : ( + <> + + + AWS Access Key * + + + + + AWS Secret Key * + + + + AWS Session Token + +

+ Authenticate by either providing the keys above or use the default AWS credential providers, i.e. + ~/.aws/credentials or environment variables. These credentials are only used locally to make API + requests from this extension. +

+ + )}
-

- Authenticate by either providing the keys above or use the default AWS credential providers, i.e. - ~/.aws/credentials or environment variables. These credentials are only used locally to make API requests - from this extension. -

)} diff --git a/webview-ui/src/components/settings/SettingsView.tsx b/webview-ui/src/components/settings/SettingsView.tsx index 8835a9b8..55c7b14d 100644 --- a/webview-ui/src/components/settings/SettingsView.tsx +++ b/webview-ui/src/components/settings/SettingsView.tsx @@ -50,6 +50,7 @@ import SettingsViewExtra from "./SettingsViewExtra" import EmbeddingOptions from "./EmbeddingOptions" import { CREATE_HAI_RULES_PROMPT, HAI_RULES_PATH } from "@utils/constants" import { validateEmbeddingConfiguration } from "@shared/validate" +import Guardrails from "./guardrails/Guardrails" // Styles for the tab system const settingsTabsContainer = "flex flex-1 overflow-hidden [&.narrow_.tab-label]:hidden" @@ -166,6 +167,7 @@ const SettingsView = ({ onDone, targetSection }: SettingsViewProps) => { embeddingConfiguration, vscodeWorkspacePath, enableInlineEdit, + guards, } = useExtensionState() // Store the original state to detect changes @@ -180,6 +182,7 @@ const SettingsView = ({ onDone, targetSection }: SettingsViewProps) => { embeddingConfiguration, buildContextOptions, enableInlineEdit, + guards, }) const [apiErrorMessage, setApiErrorMessage] = useState(undefined) const [modelIdErrorMessage, setModelIdErrorMessage] = useState(undefined) @@ -243,6 +246,11 @@ const SettingsView = ({ onDone, targetSection }: SettingsViewProps) => { enableInlineEdit, }) + vscode.postMessage({ + type: "updateGuards", + guards, + }) + if (!withoutDone) { onDone() } @@ -261,7 +269,8 @@ const SettingsView = ({ onDone, targetSection }: SettingsViewProps) => { // TAG:HAI JSON.stringify(embeddingConfiguration) !== JSON.stringify(originalState.current.embeddingConfiguration) || JSON.stringify(buildContextOptions) !== JSON.stringify(originalState.current.buildContextOptions) || - enableInlineEdit !== originalState.current.enableInlineEdit + enableInlineEdit !== originalState.current.enableInlineEdit || + JSON.stringify(guards) !== JSON.stringify(originalState.current.guards) setHasUnsavedChanges(hasChanges) }, [ @@ -275,6 +284,7 @@ const SettingsView = ({ onDone, targetSection }: SettingsViewProps) => { embeddingConfiguration, buildContextOptions, enableInlineEdit, + guards, ]) // Handle cancel button click @@ -638,6 +648,9 @@ const SettingsView = ({ onDone, targetSection }: SettingsViewProps) => { vscodeWorkspacePath={vscodeWorkspacePath} buildIndexProgress={buildIndexProgress} /> + + {/* Guardrails */} + )} diff --git a/webview-ui/src/components/settings/guardrails/Guardrails.css b/webview-ui/src/components/settings/guardrails/Guardrails.css new file mode 100644 index 00000000..cff42eea --- /dev/null +++ b/webview-ui/src/components/settings/guardrails/Guardrails.css @@ -0,0 +1,158 @@ +.guardrails-container { + display: flex; + flex-direction: column; + gap: 16px; + padding: 16px 0; + border-top: 1px solid var(--vscode-panel-border); +} + +.guardrails-header { + margin-bottom: 8px; +} + +.guardrails-header h3 { + margin: 0 0 4px 0; + font-size: 14px; + font-weight: 600; + color: var(--vscode-foreground); +} + +.guardrails-description { + margin: 0; + font-size: 12px; + color: var(--vscode-descriptionForeground); +} + +.guards-list { + display: flex; + flex-direction: column; + gap: 12px; +} + +.guard-item { + padding: 12px; + border: 1px solid var(--vscode-widget-border); + border-radius: 4px; + background-color: var(--vscode-editor-background); +} + +.guard-item:not(:has(.guard-threshold-section)) { + padding: 12px 8px 6px; +} + +.guard-header { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 8px; + flex-wrap: wrap; +} + +.guard-info { + display: flex; + align-items: center; + gap: 8px; +} + +.guard-status-icon { + color: var(--vscode-testing-iconPassed); + cursor: default; +} + +.guard-name { + font-size: 13px; + font-weight: 500; + color: var(--vscode-foreground); +} + +.guard-mode-selector { + min-width: 80px; +} + +.guard-threshold-section { + margin-top: 8px; + padding-top: 8px; + border-top: 1px solid var(--vscode-widget-border); +} + +.threshold-header { + display: flex; + justify-content: space-between; + align-items: center; + margin-bottom: 8px; +} + +.threshold-label { + font-size: 12px; + color: var(--vscode-descriptionForeground); + font-weight: 500; +} + +.threshold-value { + display: flex; + align-items: center; + gap: 4px; +} + +.threshold-indicator { + font-size: 12px; +} + +.threshold-level { + font-size: 12px; + color: var(--vscode-foreground); + font-weight: 500; +} + +.threshold-controls { + display: flex; + justify-content: space-between; + align-items: center; + gap: 8px; +} + +.threshold-buttons { + display: flex; + gap: 4px; + flex: 1; + flex-wrap: wrap; +} + +.threshold-button { + flex: 1; + font-size: 11px; + min-height: 24px; +} + +.guardrails-footer { + margin-top: 8px; + padding-top: 8px; + border-top: 1px solid var(--vscode-widget-border); +} + +.recommendation { + display: flex; + align-items: center; + gap: 6px; + font-size: 11px; + color: var(--vscode-descriptionForeground); +} + +.recommendation .codicon { + color: var(--vscode-notificationsInfoIcon-foreground); +} + +/* Dark theme adjustments */ +.vscode-dark .guard-item { + background-color: var(--vscode-sideBar-background); +} + +/* Light theme adjustments */ +.vscode-light .guard-item { + background-color: var(--vscode-editor-background); +} + +/* High contrast theme adjustments */ +.vscode-high-contrast .guard-item { + border: 2px solid var(--vscode-contrastBorder); +} diff --git a/webview-ui/src/components/settings/guardrails/Guardrails.tsx b/webview-ui/src/components/settings/guardrails/Guardrails.tsx new file mode 100644 index 00000000..b7410149 --- /dev/null +++ b/webview-ui/src/components/settings/guardrails/Guardrails.tsx @@ -0,0 +1,161 @@ +import { memo, useEffect } from "react" +import { vscode } from "../../../utils/vscode" +import "./Guardrails.css" +import { VSCodeButton, VSCodeDropdown, VSCodeOption } from "@vscode/webview-ui-toolkit/react" +import { useExtensionState } from "@/context/ExtensionStateContext" + +interface Guard { + key: string + name: string + hasThreshold: boolean + threshold?: number + mode?: string +} + +const Guardrails = () => { + const { guards, setGuards } = useExtensionState() + + useEffect(() => { + const messageHandler = (event: MessageEvent) => { + const message = event.data + + switch (message.type) { + case "defaultGuards": + if (message.guards) { + setGuards(message.guards) + } + break + + default: + console.warn(`Unhandled message type: ${message.type}`) + } + } + window.addEventListener("message", messageHandler) + vscode.postMessage({ type: "loadGuards" }) + return () => { + window.removeEventListener("message", messageHandler) + } + }, []) + + const handleGuardThresholdChange = (guard: Guard, newThreshold: number) => { + if (guards) { + const updatedGuards = guards.map((g) => (g.key === guard.key ? { ...g, threshold: newThreshold } : g)) + setGuards(updatedGuards) + } + } + + const handleGuardModeChange = (guard: Guard, mode: string) => { + if (guards) { + const updatedGuards = guards.map((g) => (g.key === guard.key ? { ...g, mode: mode } : g)) + setGuards(updatedGuards) + } + } + + const getThresholdColor = (threshold: number) => { + if (threshold <= 0.25) return "var(--vscode-testing-iconPassed)" + if (threshold <= 0.5) return "var(--vscode-testing-iconQueued)" + return "var(--vscode-testing-iconFailed)" + } + + const getThresholdLevel = (threshold: number) => { + if (threshold <= 0.25) return "High" + if (threshold <= 0.5) return "Medium" + return "Low" + } + + const toggleLabel = (value: number) => { + if (value == null) return <>No Threshold + switch (value) { + case 0: + return "Max" + case 0.25: + return "High" + case 0.5: + return "Medium" + case 0.75: + return "Low" + case 1: + return "Off" + default: + return "Invalid Config" + } + } + + return ( +
+
+

Active Guards

+

Configure security guards to protect your code generation

+
+ +
+ {guards?.map((guard, index) => ( +
+
+
+ + + + {guard.name} +
+ + {["secret", "pii"].includes(guard.key) && ( +
+ handleGuardModeChange(guard, (e.target as HTMLSelectElement).value)}> + Block + Redact + +
+ )} +
+ {guard.hasThreshold && guard.threshold != null && ( +
+
+ Sensitivity +
+ + ● + + {getThresholdLevel(guard.threshold)} +
+
+ +
+
+ {[1.0, 0.75, 0.5, 0.25, 0].map((value) => ( + handleGuardThresholdChange(guard, value)} + className="threshold-button"> + {toggleLabel(value)} + {/* {value === 0.25 ? "Low" : value === 0.5 ? "Med" : value === 0.75 ? "High" : "Max"} */} + + ))} +
+
+
+ )} +
+ ))} +
+
+ + + Recommended threshold: 0.75 for optimal accuracy + +
+
+ To proceed after disabling the guardrail, please create a new task. +
+ +
+
+ ) +} + +export default memo(Guardrails) diff --git a/webview-ui/src/context/ExtensionStateContext.tsx b/webview-ui/src/context/ExtensionStateContext.tsx index 3debfc72..89407493 100644 --- a/webview-ui/src/context/ExtensionStateContext.tsx +++ b/webview-ui/src/context/ExtensionStateContext.tsx @@ -3,7 +3,7 @@ import { useEvent } from "react-use" import { StateServiceClient } from "@/services/grpc-client" import { EmptyRequest } from "@shared/proto/common" import { DEFAULT_AUTO_APPROVAL_SETTINGS } from "@shared/AutoApprovalSettings" -import { ExtensionMessage, ExtensionState, DEFAULT_PLATFORM } from "@shared/ExtensionMessage" +import { ExtensionMessage, ExtensionState, DEFAULT_PLATFORM, Guard } from "@shared/ExtensionMessage" import { ApiConfiguration, ModelInfo, @@ -97,6 +97,7 @@ interface ExtensionStateContextType extends ExtensionState { setBuildContextOptions: (value: HaiBuildContextOptions) => void setHaiConfig: (value: { [key in string]: any }) => void setEmbeddingConfiguration: (config: EmbeddingConfiguration) => void + setGuards: (value: Guard[]) => void navigateToExperts: () => void hideExperts: () => void } @@ -754,6 +755,11 @@ export const ExtensionStateContextProvider: React.FC<{ ...prevState, embeddingConfiguration: value, })), + setGuards: (value) => + setState((prevState) => ({ + ...prevState, + guards: value, + })), navigateToExperts, hideExperts, }