From 63cb80e633ac613f0ca94f220cfc5acdba991bf7 Mon Sep 17 00:00:00 2001 From: betterclever Date: Sun, 4 Jan 2026 23:20:08 +0530 Subject: [PATCH 1/8] docs: rewrite component development guide - Add file location guide and ID naming conventions - Document ExecutionContext services (logger, secrets, artifacts) - Add error handling section with SDK error types - Add testing section (unit, integration, E2E patterns) - Remove content duplicated in isolated-volumes.mdx - Fix metadata section syntax error - Keep important Docker/PTY patterns and decision tree Signed-off-by: betterclever --- docs/development/component-development.mdx | 625 +++++++++++++++------ 1 file changed, 462 insertions(+), 163 deletions(-) diff --git a/docs/development/component-development.mdx b/docs/development/component-development.mdx index 248823e9..9749ea7f 100644 --- a/docs/development/component-development.mdx +++ b/docs/development/component-development.mdx @@ -3,26 +3,96 @@ title: "Component Development" description: "Build custom security components for ShipSec Studio" --- -This guide provides best practices and required patterns for developing ShipSec Studio components. +This guide covers everything you need to build components for ShipSec Studio. --- -## Component Basics +## Getting Started -### Quick Start +### File Location + +Components live in `worker/src/components//`: + +``` +worker/src/components/ +├── security/ # Security tools (dnsx, subfinder, nuclei) +├── core/ # Core utilities (http-request, file-loader) +├── ai/ # AI components (llm, agents) +├── notification/ # Notifications (slack, email) +├── manual-action/ # Human-in-the-loop (approvals, forms) +└── it-automation/ # IT ops (okta, google workspace) +``` + +### ID Naming Convention + +``` +.. + +Examples: + shipsec.dnsx.run # Security tool + core.http.request # Core utility + ai.llm.generate # AI component + notification.slack # Notification +``` + +--- + +## Runner Types + +| Type | Use Case | Example | +|------|----------|---------| +| `inline` | Pure TypeScript (HTTP calls, transforms, logic) | FileLoader, WebhookPost, HTTP Request | +| `docker` | CLI tools in containers | Subfinder, DNSX, Nuclei | +| `remote` | External executors (future) | K8s jobs, ECS tasks | + +### Inline Component Example ```typescript import { z } from 'zod'; import { componentRegistry, ComponentDefinition } from '@shipsec/component-sdk'; const inputSchema = z.object({ - target: z.string() + url: z.string().url(), + method: z.enum(['GET', 'POST']).default('GET'), }); const outputSchema = z.object({ - result: z.string() + status: z.number(), + body: z.string(), }); +type Input = z.infer; +type Output = z.infer; + +const definition: ComponentDefinition = { + id: 'core.http.request', + label: 'HTTP Request', + category: 'transform', + runner: { kind: 'inline' }, + inputSchema, + outputSchema, + async execute(input, context) { + context.logger.info(`Fetching ${input.url}`); + const response = await fetch(input.url, { method: input.method }); + return { status: response.status, body: await response.text() }; + } +}; + +componentRegistry.register(definition); +export default definition; +``` + +### Docker Component Example + +```typescript +import { z } from 'zod'; +import { + componentRegistry, + ComponentDefinition, + DockerRunnerConfig, + runComponentWithRunner +} from '@shipsec/component-sdk'; + const definition: ComponentDefinition = { id: 'shipsec.tool.scan', label: 'Tool Scanner', @@ -30,67 +100,251 @@ const definition: ComponentDefinition = { runner: { kind: 'docker', image: 'tool:latest', - command: [/* build args */], + entrypoint: 'sh', + command: ['-c', 'tool "$@"', '--'], // Shell wrapper for PTY network: 'bridge' }, inputSchema, outputSchema, async execute(input, context) { - // Implementation + const args = ['-json', '-target', input.target]; + + const runnerConfig: DockerRunnerConfig = { + ...this.runner, + command: [...(this.runner.command ?? []), ...args], + }; + + const rawOutput = await runComponentWithRunner( + runnerConfig, + async () => ({} as Output), + input, + context + ); + + return parseOutput(rawOutput); } }; componentRegistry.register(definition); +export default definition; ``` --- -## Docker Component Requirements +## ExecutionContext - - All Docker-based components run with PTY (pseudo-terminal) enabled by default in workflows. Your component MUST be designed for PTY mode. - +The `context` passed to `execute()` provides services and utilities: -### Shell Wrapper Pattern (Required) +```typescript +async execute(input, context) { + // Logging (shows in UI timeline) + context.logger.info('Starting scan...'); + context.logger.warn('Rate limit approaching'); + context.logger.error('Failed to connect'); + + // Progress events (shows in UI) + context.emitProgress('Processing 50 targets...'); + + // Secrets (encrypted, from secret manager) + const apiKey = await context.secrets?.get('API_KEY'); + + // File downloads (from MinIO) + const file = await context.storage?.downloadFile(input.fileId); + + // Artifact uploads (saved to MinIO, shown in UI) + await context.artifacts?.upload({ + name: 'report.json', + content: Buffer.from(JSON.stringify(results)), + mimeType: 'application/json', + }); + + // Run metadata + const { runId, componentRef } = context; +} +``` -All Docker-based components MUST use a shell wrapper for PTY compatibility: +--- + +## Component Metadata + +Define UI metadata for the workflow builder: ```typescript -// ✅ CORRECT - Shell wrapper pattern +import { port } from '@shipsec/component-sdk'; + const definition: ComponentDefinition = { - id: 'shipsec.tool.scan', - runner: { - kind: 'docker', - image: 'tool:latest', - entrypoint: 'sh', // Shell wrapper - command: ['-c', 'tool "$@"', '--'], // Wraps CLI execution - network: 'bridge', + id: 'shipsec.dnsx.scan', + label: 'DNSX Scanner', + category: 'security', + runner: { ... }, + inputSchema, + outputSchema, + + metadata: { + slug: 'dnsx', + version: '1.0.0', + type: 'scan', + category: 'security', + description: 'DNS resolution and enumeration', + icon: 'dns', + author: { name: 'ShipSec', type: 'shipsecai' }, + inputs: [ + { id: 'domains', label: 'Domains', dataType: port.list(port.text()), required: true }, + ], + outputs: [ + { id: 'results', label: 'Results', dataType: port.json() }, + ], + parameters: [ + { id: 'threads', label: 'Threads', type: 'number', default: 10 }, + { id: 'recordTypes', label: 'Record Types', type: 'multi-select', options: [ + { label: 'A', value: 'A' }, + { label: 'AAAA', value: 'AAAA' }, + { label: 'CNAME', value: 'CNAME' }, + ]}, + ], }, - async execute(input, context) { - const args = ['-json', '-output', '/data/results.json']; +}; +``` - const config: DockerRunnerConfig = { - ...this.runner, - command: [...(this.runner.command ?? []), ...args], - }; +### Port Utilities + +```typescript +import { port } from '@shipsec/component-sdk'; + +port.text() // String +port.number() // Number +port.boolean() // Boolean +port.secret() // Secret value +port.json() // JSON object +port.any() // Any type +port.list(port.text()) // Array +port.map(port.text()) // Record +port.credential('github') // OAuth credential contract +``` + +--- - return runComponentWithRunner(config, input, context); +## Dynamic Ports (resolvePorts) + +Components can dynamically generate input/output ports based on parameter values: + +```typescript +import { port } from '@shipsec/component-sdk'; + +const definition: ComponentDefinition = { + id: 'core.workflow.call', + // ... other fields ... + + resolvePorts(params) { + const dynamicInputs = (params.childRuntimeInputs || []).map(input => ({ + id: input.id, + label: input.label || input.id, + dataType: port.text(), + required: input.required ?? true, + })); + + return { + inputs: dynamicInputs, + outputs: this.metadata?.outputs ?? [], + }; } }; ``` +**Use cases:** Workflow calls, Slack templates, manual actions with dynamic options. + +--- + +## Retry Policy + +Components can specify custom retry behavior (maps to Temporal activity retry): + ```typescript -// ❌ WRONG - Direct binary execution const definition: ComponentDefinition = { - runner: { - kind: 'docker', - image: 'tool:latest', - entrypoint: 'tool', // No shell wrapper - will hang - command: ['-read-stdin', '-output'], - } + id: 'shipsec.api.call', + // ... other fields ... + + retryPolicy: { + maxAttempts: 5, // Max retries (0 = unlimited, 1 = no retry) + initialIntervalSeconds: 2, // Initial delay + maximumIntervalSeconds: 120, // Max delay + backoffCoefficient: 2.0, // Exponential backoff + nonRetryableErrorTypes: [ // Errors that should NOT retry + 'AuthenticationError', + 'ValidationError', + ], + }, }; ``` +**Default policy:** 3 attempts, 1s initial, 60s max, 2x backoff. + +--- + +## Error Handling + +Use SDK error types for proper retry behavior: + +```typescript +import { + NetworkError, // Retryable - network issues + RateLimitError, // Retryable - with delay + ServiceError, // Retryable - 5xx errors + AuthenticationError, // Non-retryable - bad credentials + ValidationError, // Non-retryable - bad input + NotFoundError, // Non-retryable - resource missing + fromHttpResponse, // Convert HTTP response to error + wrapError, // Wrap unknown errors +} from '@shipsec/component-sdk'; + +async execute(input, context) { + try { + const response = await fetch(url); + + if (!response.ok) { + throw fromHttpResponse(response, await response.text()); + } + + return await response.json(); + } catch (error) { + throw wrapError(error, 'Failed to call API'); + } +} +``` + +--- + +## Docker Component Requirements + + + All Docker-based components run with PTY (pseudo-terminal) enabled by default in workflows. Your component MUST be designed for PTY mode. + + +### Shell Wrapper Pattern (Required) + +All Docker-based components MUST use a shell wrapper for PTY compatibility: + +```typescript +// ✅ CORRECT - Shell wrapper pattern +runner: { + kind: 'docker', + image: 'tool:latest', + entrypoint: 'sh', // Shell wrapper + command: ['-c', 'tool "$@"', '--'], // Wraps CLI execution + network: 'bridge', +} +``` + +```typescript +// ❌ WRONG - Direct binary execution +runner: { + kind: 'docker', + image: 'tool:latest', + entrypoint: 'tool', // No shell wrapper - will hang + command: ['-read-stdin', '-output'], +} +``` + ### Why Shell Wrappers? | Benefit | Description | @@ -112,7 +366,7 @@ Does your Docker image have a shell (/bin/sh)? │ entrypoint: 'tool', command: ['-stream', ...] │ └─ NO → Rely on SDK stdin handling - Note: May have buffering issues + Note: May have buffering issues ``` --- @@ -123,113 +377,40 @@ Does your Docker image have a shell (/bin/sh)? All components that require file-based input/output MUST use the `IsolatedContainerVolume` utility for Docker-in-Docker compatibility and multi-tenant security. -### Standard File Access Pattern +For detailed patterns and security guarantees, see [Isolated Volumes](/development/isolated-volumes). + +### Quick Example ```typescript import { IsolatedContainerVolume } from '../../utils/isolated-volume'; -import type { DockerRunnerConfig } from '@shipsec/component-sdk'; -async execute(input: Input, context: ExecutionContext): Promise { - // 1. Get tenant ID +async execute(input, context) { const tenantId = (context as any).tenantId ?? 'default-tenant'; - - // 2. Create volume const volume = new IsolatedContainerVolume(tenantId, context.runId); try { - // 3. Prepare files - const files: Record = { + await volume.initialize({ 'targets.txt': input.targets.join('\n') - }; + }); - // 4. Initialize volume - await volume.initialize(files); - context.logger.info(`Created volume: ${volume.getVolumeName()}`); - - // 5. Build command args - const args = buildCommandArgs(input); - - // 6. Configure runner const runnerConfig: DockerRunnerConfig = { - kind: 'docker', - image: 'tool:latest', - command: args, - network: 'bridge', - volumes: [ - volume.getVolumeConfig('/inputs', true) // read-only - ] + ...this.runner, + command: [...(this.runner.command ?? []), '-l', '/inputs/targets.txt'], + volumes: [volume.getVolumeConfig('/inputs', true)] // read-only }; - // 7. Execute - const rawOutput = await runComponentWithRunner( - runnerConfig, - async () => ({} as Output), - input, - context - ); - - // 8. Parse and return - return parseOutput(rawOutput); - + return await runComponentWithRunner(runnerConfig, parseOutput, input, context); } finally { - // 9. ALWAYS cleanup - await volume.cleanup(); - context.logger.info('Cleaned up volume'); + await volume.cleanup(); // ALWAYS cleanup } } ``` -### Input + Output Files - -```typescript -const volume = new IsolatedContainerVolume(tenantId, context.runId); - -try { - // Write inputs - await volume.initialize({ 'config.json': JSON.stringify(cfg) }); - - // Tool writes to same volume - const config = { - command: [ - '--config', '/data/config.json', - '--output', '/data/results.json' - ], - volumes: [volume.getVolumeConfig('/data', false)] // read-write - }; - - await runComponentWithRunner(config, ...); - - // Read outputs - const outputs = await volume.readFiles(['results.json', 'errors.log']); - return JSON.parse(outputs['results.json']); -} finally { - await volume.cleanup(); -} -``` - --- -## Output Buffering Solutions - -Even with PTY enabled, some CLI tools buffer their output. Use the shell wrapper pattern for PTY compatibility: - -```typescript -runner: { - kind: 'docker', - image: 'projectdiscovery/nuclei:latest', - // Use shell wrapper for PTY compatibility - // Running CLI tools directly as entrypoint can cause them to hang with PTY - // The shell wrapper ensures proper TTY signal handling and clean exit - entrypoint: 'sh', - // Shell wrapper pattern: sh -c 'nuclei "$@"' -- [args...] - // This allows dynamic args to be appended and properly passed to the tool - command: ['-c', 'nuclei "$@"', '--'], -} -``` - ## UI-Only Components -Components that are purely for UI purposes (documentation, notes) should be marked: +Components that are purely for UI purposes (documentation, notes): ```typescript const definition: ComponentDefinition = { @@ -250,41 +431,54 @@ const definition: ComponentDefinition = { --- -## Security Requirements +## Testing -### Tenant Isolation - -Every execution gets a unique volume: -``` -tenant-{tenantId}-run-{runId}-{timestamp} -``` +### Unit Tests -### Automatic Cleanup +Located alongside component: `worker/src/components//__tests__/.test.ts` ```typescript -try { - await volume.initialize(...); - // ... use volume ... -} finally { - await volume.cleanup(); // MUST be in finally -} +import { describe, it, expect, vi } from 'bun:test'; +import * as sdk from '@shipsec/component-sdk'; +import { componentRegistry } from '../../index'; + +describe('my-component', () => { + it('should process input correctly', async () => { + const component = componentRegistry.get('my.component.id'); + + const context = sdk.createExecutionContext({ + runId: 'test-run', + componentRef: 'test-node', + }); + + // Mock the runner for Docker components + vi.spyOn(sdk, 'runComponentWithRunner').mockResolvedValue('mock output'); + + const result = await component!.execute({ target: 'example.com' }, context); + + expect(result.success).toBe(true); + }); +}); ``` -### Read-Only Mounts +**Run:** `bun --cwd worker test` + +### Integration Tests (Docker) + +Same folder with `-integration.test.ts`. Uses real Docker containers. ```typescript -// Input files should be read-only -volume.getVolumeConfig('/inputs', true) // ✅ read-only +const enableDocker = process.env.ENABLE_DOCKER_TESTS === 'true'; +const dockerDescribe = enableDocker ? describe : describe.skip; -// Only make writable if tool needs to write -volume.getVolumeConfig('/outputs', false) // ⚠️ read-write +dockerDescribe('Component Integration', () => { + // Tests that run real Docker containers +}); ``` ---- - -## Testing Checklist +**Run:** `ENABLE_DOCKER_TESTS=true bun --cwd worker test` -### Before Deployment +### Testing Checklist - [ ] Used `entrypoint: 'sh'` with `command: ['-c', 'tool "$@"', '--']` - [ ] Tested with `docker run --rm -t` (PTY mode) @@ -293,16 +487,6 @@ volume.getVolumeConfig('/outputs', false) // ⚠️ read-write - [ ] Tool arguments appended after `'--'` in command array - [ ] Workflow run completes successfully -### Volume Testing - -```bash -# Before execution -docker volume ls --filter "label=studio.managed=true" - -# After execution (should be same or empty) -docker volume ls --filter "label=studio.managed=true" -``` - ### PTY Testing ```bash @@ -313,7 +497,108 @@ docker run --rm -t your-image:latest sh -c 'tool "$@"' -- -flag value timeout 5 docker run --rm -t your-image:latest sh -c 'tool "$@"' -- --help ``` ---- +### E2E Tests (Full Stack) + +E2E tests validate your component works with the entire platform: Backend API, Worker, Temporal, and infrastructure. + +Located in `e2e-tests/`. These tests create real workflows via the API and execute them. + +**Prerequisites:** +```bash +# Start full local environment +just dev + +# Verify services are running +curl http://localhost:3211/api/v1/health -H "x-internal-token: local-internal-token" +``` + +**Run E2E tests:** +```bash +RUN_E2E=true bun --cwd e2e-tests test +``` + +**Example E2E test pattern:** + +```typescript +import { describe, test, expect } from 'bun:test'; + +const API_BASE = 'http://localhost:3211/api/v1'; +const HEADERS = { + 'Content-Type': 'application/json', + 'x-internal-token': 'local-internal-token', +}; + +// Only run when RUN_E2E=true and services are available +const runE2E = process.env.RUN_E2E === 'true'; +const e2eDescribe = runE2E ? describe : describe.skip; + +// Helper to poll workflow status until completion +async function pollRunStatus(runId: string, timeoutMs = 180000) { + const startTime = Date.now(); + while (Date.now() - startTime < timeoutMs) { + const res = await fetch(`${API_BASE}/workflows/runs/${runId}/status`, { headers: HEADERS }); + const status = await res.json(); + if (['COMPLETED', 'FAILED', 'CANCELLED'].includes(status.status)) { + return status; + } + await new Promise(r => setTimeout(r, 1000)); + } + throw new Error(`Timeout waiting for workflow ${runId}`); +} + +e2eDescribe('My Component E2E', () => { + test('should execute in a real workflow', async () => { + // 1. Create workflow with your component + const workflow = { + name: 'Test: My Component', + nodes: [ + { + id: 'start', + type: 'core.workflow.entrypoint', + position: { x: 0, y: 0 }, + data: { label: 'Start', config: { runtimeInputs: [] } }, + }, + { + id: 'my-node', + type: 'my.component.id', // Your component ID + position: { x: 200, y: 0 }, + data: { + label: 'My Component', + config: { target: 'example.com' }, + }, + }, + ], + edges: [{ id: 'e1', source: 'start', target: 'my-node' }], + }; + + // 2. Create workflow via API + const createRes = await fetch(`${API_BASE}/workflows`, { + method: 'POST', + headers: HEADERS, + body: JSON.stringify(workflow), + }); + const { id: workflowId } = await createRes.json(); + + // 3. Execute workflow + const runRes = await fetch(`${API_BASE}/workflows/${workflowId}/run`, { + method: 'POST', + headers: HEADERS, + body: JSON.stringify({ inputs: {} }), + }); + const { runId } = await runRes.json(); + + // 4. Poll until completion + const result = await pollRunStatus(runId); + + // 5. Assert results + expect(result.status).toBe('COMPLETED'); + }, 180000); // 3 minute timeout for workflow execution +}); +``` + + + E2E tests are **not** run in CI yet. They require the full local environment (`just dev`) and are intended for manual validation during development. + ## Complete Example @@ -323,7 +608,8 @@ import { componentRegistry, ComponentDefinition, DockerRunnerConfig, - runComponentWithRunner + runComponentWithRunner, + port, } from '@shipsec/component-sdk'; import { IsolatedContainerVolume } from '../../utils/isolated-volume'; @@ -357,32 +643,44 @@ const definition: ComponentDefinition = { inputSchema, outputSchema, + metadata: { + slug: 'dnsx', + version: '1.0.0', + type: 'scan', + category: 'security', + description: 'DNS resolution and enumeration', + inputs: [ + { id: 'domains', label: 'Domains', dataType: port.list(port.text()), required: true }, + ], + outputs: [ + { id: 'results', label: 'Results', dataType: port.json() }, + ], + }, + async execute(input, context) { const tenantId = (context as any).tenantId ?? 'default-tenant'; const volume = new IsolatedContainerVolume(tenantId, context.runId); try { - // Prepare input files + context.emitProgress('Preparing input files...'); await volume.initialize({ 'domains.txt': input.domains.join('\n') }); - // Build command args const args = [ '-l', '/inputs/domains.txt', '-json', '-t', String(input.threads), - '-stream', // Prevent output buffering + '-stream', ]; - // Configure runner const runnerConfig: DockerRunnerConfig = { ...this.runner, command: [...(this.runner.command ?? []), ...args], volumes: [volume.getVolumeConfig('/inputs', true)], }; - // Execute + context.emitProgress('Running DNSX...'); const rawOutput = await runComponentWithRunner( runnerConfig, async (stdout) => { @@ -400,6 +698,7 @@ const definition: ComponentDefinition = { context ); + context.logger.info(`Found ${rawOutput.results.length} results`); return rawOutput; } finally { @@ -416,7 +715,7 @@ export default definition; ## Questions? -- Component development: Check this guide - File access patterns: See [Isolated Volumes](/development/isolated-volumes) -- Security questions: Email support@shipsec.ai +- SDK source: `packages/component-sdk/src/` +- Example components: `worker/src/components/security/` - Bug reports: GitHub Issues From 4b2fe2615f0b4b889cd816e09921959aef281435 Mon Sep 17 00:00:00 2001 From: betterclever Date: Sun, 4 Jan 2026 23:20:19 +0530 Subject: [PATCH 2/8] docs: add agent skills system with openskills format - Refactor AGENTS.md to be concise with essential commands - Add architecture overview with data flow diagram - Create .claude/skills/ directory for modular skills - Add component-development skill with agent-focused instructions - Remove .claude/ from .gitignore to track skills - Use openskills XML format for skill discovery Signed-off-by: betterclever --- .claude/skills/component-development/SKILL.md | 195 +++++++++ .gitignore | 1 - AGENTS.md | 384 +++--------------- 3 files changed, 261 insertions(+), 319 deletions(-) create mode 100644 .claude/skills/component-development/SKILL.md diff --git a/.claude/skills/component-development/SKILL.md b/.claude/skills/component-development/SKILL.md new file mode 100644 index 00000000..b8e50aa9 --- /dev/null +++ b/.claude/skills/component-development/SKILL.md @@ -0,0 +1,195 @@ +--- +name: component-development +description: Creating components (inline/docker). Dynamic ports, retry policies, PTY patterns, IsolatedContainerVolume. +--- + +# Component Development + +**Full guide:** `docs/development/component-development.mdx` + +--- + +## Quick Reference + +### File Location +``` +worker/src/components//.ts +``` +Categories: `security/`, `core/`, `ai/`, `notification/`, `manual-action/`, `it-automation/` + +### ID Pattern +``` +.. +``` +Examples: `shipsec.dnsx.run`, `core.http.request`, `ai.llm.generate` + +### Minimal Component +```typescript +import { z } from 'zod'; +import { componentRegistry, ComponentDefinition } from '@shipsec/component-sdk'; + +const definition: ComponentDefinition = { + id: 'category.tool.action', + label: 'My Component', + category: 'security', // or: core, ai, notification, manual_action, it_ops + runner: { kind: 'inline' }, // or: docker + inputSchema: z.object({ ... }), + outputSchema: z.object({ ... }), + async execute(input, context) { ... } +}; + +componentRegistry.register(definition); +export default definition; +``` + +--- + +## Agent Instructions + +### When Creating a New Component + +1. **Check existing components** in same category for patterns + ```bash + ls worker/src/components// + ``` + +2. **Copy structure from similar component** — don't start from scratch + +3. **Always include:** + - `inputSchema` + `outputSchema` (Zod) + - `metadata` block with inputs/outputs/parameters + - Unit test in `__tests__/.test.ts` + +4. **For Docker components:** + - MUST use shell wrapper: `entrypoint: 'sh', command: ['-c', 'tool "$@"', '--']` + - MUST use `IsolatedContainerVolume` for file I/O + - Reference: `worker/src/components/security/dnsx.ts` + +### Quick Component Checklist + +``` +□ ID follows pattern: namespace.tool.action +□ File in correct category folder +□ inputSchema/outputSchema defined with Zod +□ metadata.inputs/outputs match schema +□ Docker: shell wrapper pattern used +□ Docker with files: IsolatedContainerVolume used +□ Unit test created +□ Registered with componentRegistry.register() +□ Exported as default +``` + +--- + +## Key Patterns (Quick Look) + +### Inline Component +```typescript +runner: { kind: 'inline' } +// Just write TypeScript in execute() +``` + +### Docker Component +```typescript +runner: { + kind: 'docker', + image: 'tool:latest', + entrypoint: 'sh', + command: ['-c', 'tool "$@"', '--'], + network: 'bridge', +} +// ⚠️ Shell wrapper required for PTY +``` +→ See: `docs/development/component-development.mdx#docker-component-requirements` + +### File I/O (Docker) +```typescript +import { IsolatedContainerVolume } from '../../utils/isolated-volume'; +const volume = new IsolatedContainerVolume(tenantId, context.runId); +try { + await volume.initialize({ 'input.txt': data }); + // volumes: [volume.getVolumeConfig('/path', true)] +} finally { + await volume.cleanup(); +} +``` +→ See: `docs/development/isolated-volumes.mdx` + +### Dynamic Ports +```typescript +resolvePorts(params) { + return { inputs: [...], outputs: [...] }; +} +``` +→ See: `docs/development/component-development.mdx#dynamic-ports-resolveports` + +--- + +## Context Services + +```typescript +async execute(input, context) { + context.logger.info('...'); // Logs to UI timeline + context.emitProgress('...'); // Progress events + await context.secrets?.get('KEY'); // Encrypted secrets + await context.storage?.downloadFile(id); // MinIO files + await context.artifacts?.upload({...}); // Save artifacts +} +``` + +--- + +## Error Handling + +```typescript +import { ValidationError, AuthenticationError, ServiceError } from '@shipsec/component-sdk'; + +// Non-retryable (immediate fail) +throw new ValidationError('Bad input', { fieldErrors: {...} }); +throw new AuthenticationError('Invalid API key'); + +// Retryable (Temporal will retry) +throw new ServiceError('API down', { statusCode: 503 }); +``` +→ See: `docs/development/component-development.mdx#error-handling` + +--- + +## Testing Commands + +```bash +# Unit tests (mocked, fast) +bun --cwd worker test + +# Integration tests (real Docker) +ENABLE_DOCKER_TESTS=true bun --cwd worker test + +# E2E tests (full stack - requires `just dev`) +RUN_E2E=true bun --cwd e2e-tests test +``` + +--- + +## Common Mistakes to Avoid + +| Mistake | Fix | +|---------|-----| +| Docker without shell wrapper | Use `entrypoint: 'sh', command: ['-c', 'tool "$@"', '--']` | +| Direct file mounts in Docker | Use `IsolatedContainerVolume` | +| Missing `finally` for volume cleanup | Always `await volume.cleanup()` in finally | +| No metadata block | Add `metadata: { inputs: [...], outputs: [...] }` | +| Throwing plain Error | Use SDK errors: `ValidationError`, `ServiceError`, etc. | +| Forgetting to register | Add `componentRegistry.register(definition)` | + +--- + +## Reference Files + +| What | Where | +|------|-------| +| Full docs | `docs/development/component-development.mdx` | +| Isolated volumes | `docs/development/isolated-volumes.mdx` | +| SDK source | `packages/component-sdk/src/` | +| Good example (Docker) | `worker/src/components/security/dnsx.ts` | +| Good example (inline) | `worker/src/components/core/http-request.ts` | +| E2E tests | `e2e-tests/` | diff --git a/.gitignore b/.gitignore index c76b9460..b54a2cc8 100644 --- a/.gitignore +++ b/.gitignore @@ -30,7 +30,6 @@ lerna-debug.log* .vscode/* !.vscode/extensions.json !.vscode/settings.json -.claude/ CLAUDE.md .idea/ .DS_Store diff --git a/AGENTS.md b/AGENTS.md index 105df164..b2218ce5 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,342 +1,90 @@ -# Repository Guidelines +# ShipSec Studio -## Project Structure & Module Organization -- Bun workspace monorepo: `frontend/` (React + Vite UI), `backend/` (NestJS API + Temporal client), `worker/` (Temporal activities), and `packages/` for shared SDKs. -- Frontend features live in `frontend/src/features`; shared UI sits in `frontend/src/components`; state lives in `frontend/src/store`. Tests stay alongside code under `__tests__`. -- Backend modules follow NestJS (`backend/src/modules/*`), with integration suites under `backend/src/**/__tests__` and schema definitions in `backend/src/db`. -- Worker logic stays in `worker/src/temporal`; reusable component helpers live in `worker/src/components`. -- Specs and runbooks live in `docs/` (notably `docs/execution-contract.md`) and `.ai/` for observability work—update both when contracts change. +Security workflow orchestration platform. Visual builder + Temporal for reliability. -## Build, Test, and Development Commands -- `bun install` — install workspace dependencies. -- `docker compose -p shipsec up -d` — bring up Temporal, Postgres, MinIO, and Loki (fixed project name for consistent up/down across directories). -- Always timebox commands; pick a realistic limit (e.g., `timeout 30s ` for logs, `timeout 2m ` for tests) so shells never hang indefinitely. On macOS install GNU coreutils (`brew install coreutils`) and use `gtimeout`, or wrap commands manually with `sleep`/`kill` if `timeout` is unavailable. -- `pm2 start pm2.config.cjs` — run backend API and worker (inspect with `timeout 5s pm2 logs backend --nostream --lines 200` so the command exits on its own). -- `bun --cwd frontend dev` and `bun --cwd backend run dev` — start frontend and API locally. -- `bun run test`, `bun run lint`, `bun run typecheck` — monorepo test, lint, and type gates; target runs via `bun --cwd backend run migration:smoke` when narrowing failures. -- **OpenAPI + backend client workflow:** whenever backend routes change, regenerate the spec with `bun --cwd backend run generate:openapi`, then rebuild the typed client via `bun --cwd packages/backend-client run generate`. Do not hand-edit the client—always go spec → generator → import. +## Stack +- `frontend/` — React + Vite +- `backend/` — NestJS API +- `worker/` — Temporal activities + components +- `packages/` — Shared code (component-sdk, backend-client) -## Coding Style & Naming Conventions -- TypeScript everywhere with ESM modules and two-space indentation; keep import order stable and skip extra semicolons. -- Reuse `@shipsec/shared` Zod schemas for contracts. Workflow/run identifiers keep the `shipsec-run-*` shape. -- React files use PascalCase components and colocated hooks. Backend services/controllers follow NestJS naming (`*.service.ts`, `*.controller.ts`). -- Run `bun run lint` (ESLint + Prettier) before submitting; frontend also enforces Tailwind utility ordering via `tailwind-merge`. +## Development -## Testing Guidelines -- Unit tests belong near the code (`__tests__` folders, `*.spec.ts`/`*.test.ts`). Mirror existing patterns when expanding coverage. -- Run `bun run test` before opening a PR. For backend integration suites, export Temporal services then use `RUN_BACKEND_INTEGRATION=true bun --cwd backend test`. -- When adding workflows or trace handling, confirm schemas against `docs/execution-contract.md` and add fixtures covering new fields. -- Document manual validation steps (e.g., workflow run IDs, Loki log checks) in the PR description. - -## Commit & Pull Request Guidelines -- Follow Conventional Commit-style subjects (`feat:`, `fix:`, `docs:`) in imperative voice; include context on the scope touched. -- Sign off every commit for DCO compliance (`git commit -s` or `--signoff` on amendments). -- Reference issues or milestones in the PR body, summarise behaviour changes, and attach screenshots or trace snippets for UI/observability updates. -- Confirm linters and tests pass (`bun run test`, targeted smoke checks) and call out any gaps or blockers explicitly. - -## ShipSec Studio Agent Playbook (Reference) - -### 1. Mission -- Deliver reliable, observable workflow automation for security reconnaissance in line with `.ai/claude-spec.md`. -- Move the repository toward the observability roadmap in `.ai/implementation-plan.md` while preserving existing behaviour and data integrity. -- Keep humans in the loop: surface assumptions, blockers, and validation gaps promptly. - -### 0. Capability Check -- Identify whether your execution environment exposes CORE Memory (Codex CLI `memory` tool suite). -- **If CORE is available:** immediately switch to the “CORE Memory Protocol” section below and follow it verbatim. -- **If CORE is unavailable:** skip the CORE-specific block and rely on `.ai` docs, recent git history, and open issues for context; keep a written trail in repo docs or the PR description so teammates inherit your findings. - -### CORE Memory Protocol (CORE-enabled agents only) -`trigger: always_on` — apply this section only when the CLI exposes CORE Memory. Agents that start without CORE should continue with the standard instructions; do **not** assume memory state exists. - -⚠️ **CRITICAL: READ THIS FIRST – MANDATORY MEMORY PROTOCOL** ⚠️ -CORE Memory preserves project context, so every CORE-enabled session must follow the exact startup and shutdown sequences below. - -#### Mandatory startup sequence (run before any response) -1. **Step 1 – `memory_search` (required first action):** - - Always search before replying to the user to pull prior discussions, decisions, and preferences related to the current topic. - - Extra triggers: the user references prior work (“previously”, “before”, etc.), you are working in the CORE project, or the task likely has history. - - Ask yourself which context is missing and craft a full semantic query (complete sentences, not keyword fragments). - -2. **Query patterns (pick the one that fits best):** - - **Entity-centric:** `[Person/Project] + [relationship/attribute] + [context]` (e.g., “Manoj's preferences for API design and error handling”). - - **Multi-entity relationship:** `[Entity1] + [relationship] + [Entity2] + [context]` (e.g., “Manoj and Harshith discussions about BFS search implementation”). - - **Semantic question:** fully phrased questions about requirements or causes (e.g., “What causes BFS search to return empty results?”). - - **Concept exploration:** `[concept] + related/connected + [domain/context]` (e.g., “concepts related to semantic relevance in knowledge graph search”). - - **Temporal:** `[temporal marker] + [topic] + [context]` (e.g., “recent changes to search implementation and reranking logic”). - -#### Mandatory shutdown sequence (run after you finish helping) -1. **Final step – `memory_ingest` (required last action):** - - Capture the conversation summary before ending the session. - - Include `spaceId` from your initial `memory_get_space` call so the note lands in the right project bucket. - -2. **What to store (conceptual, no raw code/logs):** - - **From the user:** request, context, goals, constraints, and any blockers mentioned. - - **From the assistant:** solution details, reasoning, trade-offs, alternative approaches, and methodologies used. - - Emphasize technical explanations, decisions, and insights that will help in future sessions. - - Exclude code snippets, file dumps, CLI commands, or raw logs. - -3. **Quality check before storing:** - - Would someone understand the project context and decisions from this memory alone? - - Does it capture key reasoning and outcomes? - -#### Protocol summary -1. **First action:** `memory_search` with a well-formed semantic query relevant to the user’s request. -2. **Respond:** perform the requested work. -3. **Final action:** `memory_ingest` with the session summary (and `spaceId`). - -### 2. Core Operating Loop -1. **Gather context** - - With CORE Memory: run a search before answering any user message to pull history, decisions, and preferences. - - Without CORE Memory: review relevant `.ai` docs, recent commits, and any prior notes in the repository before proceeding. -2. **Study context**: skim `.ai` documents and recent code touching the task. Default references: - - `.ai/claude-spec.md` for system architecture and APIs. - - `.ai/visual-execution-notes.md` and `.ai/implementation-plan.md` for observability status and runbook details. - - Other `.ai/*.md` files as needed (component SDK, worker architecture, marketplace notes). -3. **Plan** the work when it is more than a trivial change. Plans must have multiple steps, at most one active step, and be updated as progress is made. Skip planning only for straightforward tasks. -4. **Execute carefully**: - - Prefer `rg` for search; use `["bash","-lc", ""]` with `workdir` set on all shell calls. - - Use `apply_patch` for handcrafted edits to single files; avoid it for large auto-generated diffs. - - Keep edits ASCII unless the file already uses other characters. - - Never undo or overwrite unrelated user changes; avoid destructive commands (`git reset --hard`, `rm -rf`, etc.). -5. **Validate** results via targeted tests or reasoning. Default test commands: - - `bun run test` - - `bun run --filter backend test` - - `bun run lint`, `bun run typecheck` - - Backend integration suites may need services started with Docker + PM2 (see §5). -6. **Document & Store** - - CORE-enabled agents: ingest a summary into CORE Memory capturing the interaction and any follow-ups. - - Non-CORE agents: append findings to an appropriate repo log (e.g., `.ai/visual-execution-notes.md`, issue tracker, or PR summary) so the next contributor has continuity. - -### 3. Coding Standards -- Align with repository architecture: Temporal workflows orchestrate component activities; components are registered via the ShipSec SDK (`.ai/component-sdk.md`). -- Uphold observability contract schemas from `@shipsec/shared` and `docs/execution-contract.md`. -- Keep commits (when requested) small and reviewable. Do not create commits unless the user asks. -- Add concise comments only when logic is non-obvious. -- Maintain consistent import/order/style with existing code; prefer existing utilities over new dependencies. - -### 4. Communication & Delivery -- Final responses must be concise, reference touched files (`path:line`), and highlight risks or follow-ups. -- Present findings before summaries when performing reviews. -- Offer practical next actions (tests, docs, PR steps) when they naturally follow from the work. -- State clearly when something could not be validated or requires human attention. - -### 5. Environment & Tooling Notes -- Infrastructure stack: Temporal, Postgres, MinIO, Loki (Phase 5+) started via `docker compose up -d`. -- Runtime processes: use `pm2` (`pm2 start`, `pm2 status`, `timeout 5s pm2 logs ...`) with orphan Bun processes cleaned before restarting. -- Worker separation and task-queue strategy described in `.ai/temporal-worker-architecture.md` and `.ai/worker-implementation-example.md`. -- Live execution experience depends on trace streaming (Phase 6) and UI updates described in `.ai/visual-execution-notes.md`. - -### 6. Observability Roadmap Snapshot -- Phase 0–7 complete; Phase 8 (metrics + regression suite) not started. -- Trace + Loki pipeline (Phase 5) and live streaming (Phase 6) are active; ensure new work preserves these contracts. -- Backlog items for the execution timeline remain open (see `.ai/implementation-plan.md` Phase 8 checklist). - -### 7. Document Map -- `.ai/bounty-hunter-painpoints.md`: user persona & pain points. -- `.ai/component-sdk.md`: component interfaces, runner configuration, execution context. -- `.ai/file-storage-implementation.md`: artifact storage strategy (S3/MinIO expectations). -- `.ai/implementation-plan.md`: observability phases, required tests, environment runbook. -- `.ai/sample-workflow-dsl.md`: DSL structure for workflow definitions. -- `.ai/shipsec-differentiators.md`: positioning vs competitors. -- `.ai/temporal-worker-architecture.md`: task queue strategy and worker specialization. -- `.ai/tracecat-temporal-overview.md`: external comparison insights. -- `.ai/visual-execution-notes.md`: audit findings, infrastructure status, UX expectations. -- `.ai/worker-implementation-example.md`: end-to-end worker example with logging & progress emission. - -### 8. Component Development (CRITICAL: Read Before Creating Components) - -**⚠️ ALWAYS READ EXISTING DOCUMENTATION FIRST ⚠️** - -Before creating ANY Docker component: -1. **READ `docs/component-development.md` COMPLETELY** — Contains critical PTY compatibility requirements and patterns -2. **CHECK existing components** — Look at similar components (dnsx, nuclei, httpx) for reference patterns -3. **NEVER assume patterns** — Docker/PTY behavior is counterintuitive; documented patterns prevent hours of debugging - -**Common mistake:** Skipping documentation and implementing direct patterns that hang or buffer in PTY mode. This wastes significant time debugging issues that are already solved and documented. - -**When creating or modifying Docker-based components, you MUST follow the file system access patterns and PTY compatibility guidelines documented below.** - -#### Required Reading (in order) -1. **`docs/component-development.md`** — **START HERE!** PTY compatibility patterns (shell wrapper vs direct binary), Docker entrypoint requirements, and decision tree -2. **`.ai/component-sdk.md`** — Authoritative component interface, runner config, and **File System Access Pattern** section -3. **`worker/src/components/security/dnsx.ts:615-662`** — Reference implementation: shell wrapper pattern + isolated volumes -4. **`worker/src/components/security/nuclei.ts`** — Reference implementation: direct binary + `-stream` flag (distroless pattern) - -#### MANDATORY Pattern: IsolatedContainerVolume - -**ALL Docker components requiring file input/output MUST use `IsolatedContainerVolume`:** - -```typescript -import { IsolatedContainerVolume } from '../../utils/isolated-volume'; - -async execute(input, context) { - const tenantId = (context as any).tenantId ?? 'default-tenant'; - const volume = new IsolatedContainerVolume(tenantId, context.runId); - - try { - // Write input files - await volume.initialize({ - 'targets.txt': targets.join('\n'), - 'config.json': JSON.stringify(config) - }); - - // Configure runner with volume - const runnerConfig: DockerRunnerConfig = { - kind: 'docker', - image: 'tool:latest', - command: buildCommandArgs(input), - volumes: [volume.getVolumeConfig('/inputs', true)] // read-only - }; - - const result = await runComponentWithRunner(runnerConfig, ...); - return result; - - } finally { - await volume.cleanup(); // MANDATORY - always cleanup - } -} +```bash +just init # First time setup +just dev # Start everything +just dev stop # Stop +just dev logs # View logs +just help # All commands ``` -#### Why This Pattern is REQUIRED - -❌ **NEVER use direct file mounts** (broken in DinD, security risk): -```typescript -// WRONG - DO NOT DO THIS -const tempDir = await mkdtemp(path.join(tmpdir(), 'input-')); -await writeFile(path.join(tempDir, 'file.txt'), data); -volumes: [{ source: tempDir, target: '/inputs' }] // FAILS in Docker-in-Docker -``` - -✅ **ALWAYS use IsolatedContainerVolume** (DinD compatible, tenant isolated): -- Works in Docker-in-Docker environments (named volumes vs file mounts) -- Enforces multi-tenant isolation (`tenant-{tenantId}-run-{runId}-{timestamp}`) -- Automatic cleanup prevents data leakage -- Audit trail via volume labels -- Path validation prevents security exploits - -#### Component Creation Checklist +**URLs**: Frontend http://localhost:5173 | Backend http://localhost:3211 | Temporal http://localhost:8081 -When creating a new Docker component: - -- [ ] Read `.ai/component-sdk.md` File System Access Pattern section -- [ ] Import `IsolatedContainerVolume` from `../../utils/isolated-volume` -- [ ] Create volume with tenant ID and run ID -- [ ] Use `volume.initialize()` to write input files -- [ ] Mount volume with `volume.getVolumeConfig('/path', readOnly)` -- [ ] Put cleanup in `finally` block (MANDATORY) -- [ ] Add logging for volume creation and cleanup -- [ ] Test volume creation, usage, and cleanup -- [ ] Verify no orphaned volumes after execution - -#### Pattern Variations - -**Input files only** (most common): -```typescript -const volume = new IsolatedContainerVolume(tenantId, context.runId); -try { - await volume.initialize({ 'domains.txt': domains.join('\n') }); - volumes: [volume.getVolumeConfig('/inputs', true)] - // ... run component ... -} finally { - await volume.cleanup(); -} +### After Backend Route Changes +```bash +bun --cwd backend run generate:openapi +bun --cwd packages/backend-client run generate ``` -**Input + output files**: -```typescript -const volume = new IsolatedContainerVolume(tenantId, context.runId); -try { - await volume.initialize({ 'config.yaml': yamlConfig }); - volumes: [volume.getVolumeConfig('/data', false)] // read-write - // ... run component ... - const outputs = await volume.readFiles(['results.json']); - return JSON.parse(outputs['results.json']); -} finally { - await volume.cleanup(); -} +### Testing +```bash +bun run test # All tests +bun run typecheck # Type check +bun run lint # Lint ``` -**Multiple volumes** (separate input/output): -```typescript -const inputVol = new IsolatedContainerVolume(tenantId, `${runId}-in`); -const outputVol = new IsolatedContainerVolume(tenantId, `${runId}-out`); -try { - await inputVol.initialize({ 'data.csv': csvData }); - await outputVol.initialize({}); - volumes: [ - inputVol.getVolumeConfig('/inputs', true), - outputVol.getVolumeConfig('/outputs', false) - ] - // ... run component ... -} finally { - await Promise.all([inputVol.cleanup(), outputVol.cleanup()]); -} +### Database +```bash +just db-reset # Reset database +bun --cwd backend run migration:push # Push schema +bun --cwd backend run db:studio # View data ``` -#### Reference Documentation +## Rules +- TypeScript, 2-space indent +- Conventional commits with DCO: `git commit -s -m "feat: ..."` +- Tests alongside code in `__tests__/` folders -- **Component SDK**: `.ai/component-sdk.md` — Interface and file system pattern -- **Development Guide**: `docs/component-development.md` — Full patterns and security -- **API Reference**: `worker/src/utils/README.md` — IsolatedContainerVolume API -- **Architecture**: `docs/ISOLATED_VOLUMES.md` — How it works, security model -- **Migration Tracking**: `worker/src/utils/COMPONENTS_TO_MIGRATE.md` — Components needing updates -- **Working Example**: `worker/src/components/security/dnsx.ts:615-662` +--- -#### Security Guarantees +## Architecture -Using IsolatedContainerVolume ensures: -- **Tenant Isolation**: Volume names include tenant ID to prevent cross-tenant access -- **No Collisions**: Timestamp in name prevents concurrent execution conflicts -- **Path Safety**: Filenames validated (blocks `..` and `/` prefixes) -- **Automatic Cleanup**: Finally blocks guarantee volume removal -- **Audit Trail**: Volumes labeled with `studio.managed=true` for tracking -- **DinD Compatible**: Named volumes work in nested Docker scenarios +Full details: **`docs/architecture.mdx`** -#### Common Mistakes to Avoid +``` +Frontend ←→ Backend ←→ Temporal ←→ Worker + ↓ + Component Execution + ↓ + Terminal(Redis) | Events(Kafka) | Logs(Loki) + ↓ + Frontend (SSE/WebSocket) +``` -1. ❌ Using `mkdtemp` + `writeFile` + file mounts (broken in DinD) -2. ❌ Forgetting `finally` block for cleanup (causes volume leaks) -3. ❌ Using read-write mounts when read-only is sufficient (security risk) -4. ❌ Hardcoding tenant ID instead of getting from context -5. ❌ Not logging volume creation/cleanup (makes debugging harder) -6. ❌ Skipping validation that volumes are cleaned up (check `docker volume ls`) +### Component Runners +- **inline** — TypeScript code (HTTP calls, transforms, file ops) +- **docker** — Containers (security tools: Subfinder, DNSX, Nuclei) +- **remote** — External executors (future: K8s, ECS) -#### Testing Requirements +### Real-time Streaming +- Terminal: Redis Streams → SSE → xterm.js +- Events: Kafka → WebSocket +- Logs: Loki + PostgreSQL -After implementing file-based component: -- Component executes successfully -- Volume is created with correct naming pattern -- Files are written to volume and accessible to container -- Volume is cleaned up after successful execution -- Volume is cleaned up on error/exception -- No orphaned volumes remain (`docker volume ls --filter "label=studio.managed=true"`) -- Logs show volume creation and cleanup messages +--- -### 9. When Blocked -- Capture the issue, attempted approaches, and uncertainties in the response. -- Suggest concrete follow-ups (information needed, commands to rerun, potential fixes). -- CORE-enabled agents: store the blocker in CORE Memory. - Non-CORE agents: record the blocker in a shared doc or ticket so the next teammate can resume quickly. + -## MDFiler + +When tasks match a skill, load it: `cat .claude/skills//SKILL.md` + -### DNSX Resolver Component (`worker/src/components/security/dnsx.ts`) -- Container: docker runner locks to `projectdiscovery/dnsx:latest` with `sh -c` entrypoint, `bridge` networking, and an explicit `$HOME` to keep dnsx happy in ephemeral containers. -- Input marshaling: the shell stub reads the JSON payload from stdin, extracts `domains`, `recordTypes`, `resolvers`, `retryCount`, and `rateLimit` via `sed`, and materialises them into temp files. Record types default to `A`, resolver lines are written to a file only when provided, and every temp file is cleaned with `trap`. -- Runtime flags: record type switches are mapped manually (`A` → `-a`, `AAAA` → `-aaaa`, etc.), retry and rate limit parameters are appended when they are ≥1, and dnsx is always invoked with `-json -resp -silent` so we get NDJSON back for parsing. -- Error surfacing: non-zero dnsx exits funnel stderr into a JSON object with `__error__` flag so the TypeScript layer can bubble the message without crashing the workflow. -- Raw output handling: `execute` always awaits `runComponentWithRunner`; if the runner hands back an object (the docker helper occasionally serialises JSON), we stringify it before parsing and coerce `undefined/null` to an empty string. -- Parsing + normalisation: NDJSON lines are validated with `dnsxLineSchema`. We derive a canonical `answers` map per record, coerce TTLs that arrive as strings, and dedupe record types/resolvers by combining requested values with what dnsx actually returned. -- Fallback path: when the output is not valid JSON, we emit synthetic result rows keyed by the raw line, attach the raw output, and report a friendly parse error so downstream steps can still show “something” instead of silently failing. -- Runner contract: `workflow-runner.ts` must call `component.execute` (not `runComponentWithRunner` directly) so this normalisation logic always runs; calling the runner directly bypasses the parsing guardrails and breaks downstream consumers. -- Telemetry: we log the domain counts up front, emit progress events (`Running dnsx for … domains`), and propagate any parse errors through the `errors` array for Loki/search indexing. -- Validation: unit tests mock the runner to cover structured JSON, raw fallback, and runner metadata; the integration test executes dnsx in Docker with a 180s timeout, so keep the daemon available when running locally. + + + component-development + Creating components (inline/docker). Dynamic ports, retry policies, PTY patterns, IsolatedContainerVolume. + project + + -## Design Iteration Protocol -- When the user explicitly says **"Design Iteration Mode"**, follow this workflow: - 1. Audit the current UI (code + screenshot) and list concrete visual/UX issues before changing anything. - 2. Implement three alternative layouts/variants of the component in question (A/B/C). - 3. Expose a temporary debug selector (dropdown/tabs, etc.) near the component so the user can live-switch between the variants. - 4. Leave the selector in place until the user picks a winner, then remove the extras and clean up. -- Only engage this process when the user uses the exact phrase above; otherwise, do normal single-path design tweaks. + From 60c07fe6614d5409c7af32be89c4fe4c702bc914 Mon Sep 17 00:00:00 2001 From: betterclever Date: Sun, 4 Jan 2026 23:23:24 +0530 Subject: [PATCH 3/8] docs: remove obsolete .ai/component-sdk.md Content now covered in: - docs/development/component-development.mdx - docs/development/isolated-volumes.mdx - .claude/skills/component-development/SKILL.md Signed-off-by: betterclever --- .ai/component-sdk.md | 294 ------------------------------------------- current_body.md | 40 ------ 2 files changed, 334 deletions(-) delete mode 100644 .ai/component-sdk.md delete mode 100644 current_body.md diff --git a/.ai/component-sdk.md b/.ai/component-sdk.md deleted file mode 100644 index 3f23a6fa..00000000 --- a/.ai/component-sdk.md +++ /dev/null @@ -1,294 +0,0 @@ -# ShipSec Component SDK & Runner Architecture - -## Component Definition Interface -```ts -interface ComponentDefinition { - id: string; - label: string; - category: 'trigger' | 'input' | 'discovery' | 'transform' | 'output'; - runner: RunnerConfig; - inputSchema: z.ZodType; - outputSchema: z.ZodType; - defaults?: Partial; - docs?: string; - execute(params: I, context: ExecutionContext): Promise; -} - -interface RunnerConfig { - kind: 'inline' | 'docker' | 'remote'; - docker?: { - image: string; - // Build the full CLI invocation in TypeScript; pass flags/args here - command: string[]; - // Optional container overrides - entrypoint?: string; - env?: Record; - network?: 'none' | 'bridge' | 'host'; - timeoutSeconds?: number; - // When true, the runner writes the JSON-encoded params to stdin - // Use only for tools that read from stdin; prefer args otherwise - stdinJson?: boolean; - }; - inline?: { concurrency?: number }; - remote?: { endpoint: string; authSecretName?: string }; -} - -interface ExecutionContext { - runId: string; - componentRef: string; - logger: Logger; - secrets: SecretAccessor; - artifacts: ArtifactStore; - workspace: WorkspaceMetadata; - emitProgress(event: ProgressEvent): void; -} -``` - -## ShipSec SDK Responsibilities -1. Component registration (`registerComponent(def)` → stored in registry). -2. Shared utilities for schema validation, template evaluation. -3. Runner abstraction: map RunnerConfig → execution strategy (inline, Docker, remote executor). -4. Temporal integration: auto-register one activity per component ID. -5. Lifecycle hooks: logging, progress events, artifact management. - -## Temporal Orchestration -- Workflow stores the DSL and schedules activities by component ID. -- `ShipSecWorkflow.run()` topologically sorts actions, resolves params, and calls `workflow.executeActivity(component.id, …)`. -- Activities delegate to SDK’s `invoke()` which: - - Validates params via `inputSchema`. - - Runs the component (calls inline code, spawns Docker, or hits remote executor). - - Streams logs, emits progress, stores artifacts. - - Validates outputs with `outputSchema` before returning to the workflow. - -## Runner Layer -- Initial runners: inline (TypeScript) and Docker (with configurable resources). -- Future runners: Kubernetes jobs, ECS tasks, Firecracker, serverless functions. -- ExecutionContext provides consistent access to secrets/artifacts irrespective of runner. - -### Docker Component Pattern (TS-first) -- Build the entire CLI command in TypeScript and pass it via `runner.docker.command`. - - Prefer direct flags/args over shell wrappers. Only use a minimal shell when absolutely necessary (e.g., creating a temp file for tools that require `-L file`). - - Set `stdinJson: true` only for tools that read JSON from stdin; otherwise keep it unset so no stdin is written. -- Perform all parsing/normalisation in the component's TypeScript `execute` function: - - Parse NDJSON or text output, validate each record with Zod, and normalise into the shared output schema. - - Derive metadata (counts, record types, resolver lists) in TypeScript so helpers and unit tests are reusable. -- Error handling: catch runner errors (non-zero exit, stderr) in TypeScript, wrap them in a friendly message, and propagate through the component's `errors` array or thrown error as appropriate. -- Example shape (illustrative): -```ts -const args = [ - '-json', '-resp', '-silent', - ...mapRecordTypesToFlags(input.recordTypes), - '-rl', input.rateLimit?.toString() ?? '0', - ...input.resolvers.flatMap(r => ['-r', r]), - ...input.domains.flatMap(d => ['-d', d]), -]; - -const runner = { - kind: 'docker' as const, - image: 'projectdiscovery/dnsx:latest', - command: args, - network: 'bridge' as const, -}; -``` -Note: if a tool strictly requires a file input (e.g., `-list file.txt`), use the IsolatedContainerVolume pattern (see below) for secure, multi-tenant file handling in Docker-in-Docker environments. - -## File System Access Pattern (Docker Components) - -**IMPORTANT:** All Docker components that require file-based input/output **MUST** use the `IsolatedContainerVolume` utility for secure multi-tenant isolation in Docker-in-Docker (DinD) environments. - -### Why IsolatedContainerVolume? - -❌ **DO NOT use direct file mounts:** -```ts -// WRONG - Breaks in DinD, no tenant isolation -const tempDir = await mkdtemp(path.join(tmpdir(), 'input-')); -await writeFile(path.join(tempDir, 'file.txt'), data); -volumes: [{ source: tempDir, target: '/inputs' }] -``` - -✅ **DO use IsolatedContainerVolume:** -```ts -// CORRECT - DinD compatible, tenant isolated -import { IsolatedContainerVolume } from '../../utils/isolated-volume'; - -const tenantId = context.tenantId ?? 'default-tenant'; -const volume = new IsolatedContainerVolume(tenantId, context.runId); - -try { - await volume.initialize({ 'file.txt': data }); - volumes: [volume.getVolumeConfig('/inputs', true)] -} finally { - await volume.cleanup(); -} -``` - -### Standard Pattern (REQUIRED for all file-based components) - -```typescript -import { IsolatedContainerVolume } from '../../utils/isolated-volume'; -import type { DockerRunnerConfig } from '@shipsec/component-sdk'; - -async execute(input, context) { - // 1. Get tenant ID (context will have this once ExecutionContext is updated) - const tenantId = (context as any).tenantId ?? 'default-tenant'; - - // 2. Create isolated volume instance - const volume = new IsolatedContainerVolume(tenantId, context.runId); - - try { - // 3. Prepare input files - const inputFiles: Record = { - 'targets.txt': targets.join('\n'), - 'config.json': JSON.stringify(config), - // Binary files work too - 'wordlist.bin': binaryBuffer - }; - - // 4. Initialize volume with files - await volume.initialize(inputFiles); - context.logger.info(`Created isolated volume: ${volume.getVolumeName()}`); - - // 5. Configure runner with volume mount - const runnerConfig: DockerRunnerConfig = { - kind: 'docker', - image: 'tool:latest', - command: buildCommandArgs(input), - volumes: [ - // Input files (read-only for security) - volume.getVolumeConfig('/inputs', true), - // Output files (read-write if tool writes outputs) - volume.getVolumeConfig('/outputs', false) - ] - }; - - // 6. Run the component - const result = await runComponentWithRunner(runnerConfig, async () => ({} as Output), input, context); - - // 7. Read output files if tool writes them - const outputs = await volume.readFiles(['results.json', 'summary.txt']); - const parsedResults = JSON.parse(outputs['results.json'] || '{}'); - - return { ...result, additionalData: parsedResults }; - - } finally { - // 8. ALWAYS cleanup volume (even on error) - await volume.cleanup(); - context.logger.info('Cleaned up isolated volume'); - } -} -``` - -### Pattern Variations - -#### Input Files Only -```typescript -const volume = new IsolatedContainerVolume(tenantId, context.runId); -try { - await volume.initialize({ 'domains.txt': domains.join('\n') }); - - const config = { - command: ['-l', '/inputs/domains.txt', ...otherFlags], - volumes: [volume.getVolumeConfig('/inputs', true)] - }; - - return await runComponentWithRunner(config, ...); -} finally { - await volume.cleanup(); -} -``` - -#### Input + Output Files -```typescript -const volume = new IsolatedContainerVolume(tenantId, context.runId); -try { - // Write inputs - await volume.initialize({ 'config.yaml': yamlConfig }); - - const config = { - command: [ - '--input', '/data/config.yaml', - '--output', '/data/results.json' - ], - volumes: [volume.getVolumeConfig('/data', false)] // Read-write - }; - - await runComponentWithRunner(config, ...); - - // Read outputs - const outputs = await volume.readFiles(['results.json']); - return JSON.parse(outputs['results.json']); -} finally { - await volume.cleanup(); -} -``` - -#### Multiple Volumes (Separate Input/Output) -```typescript -const inputVol = new IsolatedContainerVolume(tenantId, `${context.runId}-in`); -const outputVol = new IsolatedContainerVolume(tenantId, `${context.runId}-out`); - -try { - await inputVol.initialize({ 'data.csv': csvData }); - await outputVol.initialize({}); // Empty volume for outputs - - const config = { - volumes: [ - inputVol.getVolumeConfig('/inputs', true), - outputVol.getVolumeConfig('/outputs', false) - ] - }; - - await runComponentWithRunner(config, ...); - - const results = await outputVol.readFiles(['output.json']); - return JSON.parse(results['output.json']); -} finally { - await Promise.all([inputVol.cleanup(), outputVol.cleanup()]); -} -``` - -### Security Guarantees - -Using `IsolatedContainerVolume` ensures: -- ✅ **Tenant Isolation** - Volume name includes tenant ID: `tenant-{tenantId}-run-{runId}-{timestamp}` -- ✅ **No Collisions** - Timestamp prevents concurrent execution conflicts -- ✅ **Path Safety** - Filenames validated (no `..` or `/` prefix) -- ✅ **Automatic Cleanup** - Guaranteed cleanup via finally blocks -- ✅ **Audit Trail** - Volumes labeled with `studio.managed=true` -- ✅ **DinD Compatible** - Named volumes work where file mounts fail - -### When to Use - -| Scenario | Use IsolatedVolume? | -|----------|---------------------| -| Tool requires file input (e.g., `-l file.txt`) | ✅ Yes | -| Tool writes output files | ✅ Yes | -| Tool reads binary files (wordlists, images) | ✅ Yes | -| Tool reads config files (.yaml, .json) | ✅ Yes | -| Tool only uses CLI args/flags | ❌ No | -| Tool reads from stdin only | ❌ No | - -### Reference Documentation - -- **API Reference**: `worker/src/utils/README.md` -- **Architecture Guide**: `docs/ISOLATED_VOLUMES.md` -- **Migration Tracking**: `worker/src/utils/COMPONENTS_TO_MIGRATE.md` - -### Examples in Codebase - -- **dnsx**: `worker/src/components/security/dnsx.ts:615-662` - Input files only -- **More examples coming** as other components migrate - -## Sample Flow: File Loader → Subfinder → Webhook -1. **FileLoader** (`core.file.loader`) - - Runner: inline. - - Reads file by path / upload ID, returns `{ fileName, mimeType, content }`. -2. **SubfinderRunner** (`shipsec.subfinder.run`) - - Runner: Docker image `shipsec/subfinder`. - - Inputs: domain, optional wordlist from FileLoader’s output. - - Outputs: `{ subdomains: string[], rawOutput: string, stats: … }`. -3. **WebhookUploader** (`core.webhook.post`) - - Runner: inline (HTTP POST). - - Sends subfinder results to a target URL, returns status. - -The workflow DSL references these by component ID; Temporal executes them sequentially with retries, progress tracking, and trace events. diff --git a/current_body.md b/current_body.md deleted file mode 100644 index c6177611..00000000 --- a/current_body.md +++ /dev/null @@ -1,40 +0,0 @@ -This PR implements the foundational **Human-in-the-Loop (HITL)** system for ShipSec AI. It enables workflows to pause execution and wait for human intervention—whether for simple approvals, data collection via forms, or making specific selections. - -This is a comprehensive implementation spanning the backend (Temporal, Drizzle, NestJS) and the frontend (Action Center, Workflow Designer). - -### Key Features - -#### 1. Centralized Action Center -* A new **Action Center** (`/actions`) that serves as a command center for all manual tasks. -* Filter tasks by status (Pending, Resolved, Expired). -* Search and sort by Workflow Run ID, Node Name, or Title. -* Direct response actions from the table view for quick approvals. - -#### 2. Manual Action Components (HITL Nodes) -Implemented a set of specialized nodes for the workflow designer: -* **Manual Approval**: A binary gate (Approve/Reject) to control workflow flow. -* **Manual Form**: Generates dynamic UI forms based on configurable JSON Schema. Supports strings, numbers, enums, and booleans. -* **Manual Selection**: Allows humans to choose from a list of predefined options (single or multiple choice). -* **Manual Acknowledgment**: A "Mark as Read" style node to ensure human awareness before proceeding. - -#### 3. Dynamic Context & Templating -* **Variable Injection**: Task titles and descriptions can now use dynamic variables (e.g., `{{steps.scan.output.vulnerabilities}}`) to provide humans with the necessary context to make decisions. -* **Markdown Support**: Full Markdown rendering in task descriptions for rich context display. - -#### 4. Robust Backend Architecture -* **Temporal Integration**: Built using Temporal activities that handle suspension and resumption of workflow execution. -* **Persistence**: Detailed tracking of requests in Drizzle ORM, including `respondedBy`, `respondedAt`, and full `responseData` payloads. -* **Timeout Handling**: Support for configurable timeouts, allowing workflows to handle cases where humans don't respond in time. - -#### 5. Unified Resolution Framework -* Created `HumanInputResolutionView`, a "smart" component that handles the entire resolution lifecycle. -* Seamlessly manages different input types (form, selection, approval) within a consistent, premium UI. -* Shared across the Action Center and the Workflow Execution Inspector for a unified user experience. - -### Technical Implementation Details -* **Database**: Added `human_input_requests` table with relational support. -* **API**: RESTful endpoints for internal system and frontend consumption. -* **Schema**: Leveraging Zod for rigorous DTO validation and OpenAPI generation. -* **State Management**: Optimized hooks for real-time status updates and interaction handling. - -This PR establishes the core capability of "Human-in-the-Loop" which is essential for secure and reliable AI-driven security workflows. From 4a63433f97ba176b310f7d6a22043221c2079ba1 Mon Sep 17 00:00:00 2001 From: betterclever Date: Sun, 4 Jan 2026 23:41:03 +0530 Subject: [PATCH 4/8] docs: revamp architecture and remove it-automation categories - Complete rewrite of architecture.mdx with accurate components and diagrams - Simplify component registry diagram to 4 major categories - Document correct service adapters and infrastructure stack - Remove it-automation/it_ops categories from all docs and skills Signed-off-by: betterclever --- .claude/skills/component-development/SKILL.md | 4 +- docs/architecture.mdx | 476 +++++++++--------- docs/development/component-development.mdx | 3 +- 3 files changed, 248 insertions(+), 235 deletions(-) diff --git a/.claude/skills/component-development/SKILL.md b/.claude/skills/component-development/SKILL.md index b8e50aa9..e939fb5a 100644 --- a/.claude/skills/component-development/SKILL.md +++ b/.claude/skills/component-development/SKILL.md @@ -15,7 +15,7 @@ description: Creating components (inline/docker). Dynamic ports, retry policies, ``` worker/src/components//.ts ``` -Categories: `security/`, `core/`, `ai/`, `notification/`, `manual-action/`, `it-automation/` +Categories: `security/`, `core/`, `ai/`, `notification/`, `manual-action/` ### ID Pattern ``` @@ -31,7 +31,7 @@ import { componentRegistry, ComponentDefinition } from '@shipsec/component-sdk'; const definition: ComponentDefinition = { id: 'category.tool.action', label: 'My Component', - category: 'security', // or: core, ai, notification, manual_action, it_ops + category: 'security', // or: core, ai, notification, manual_action runner: { kind: 'inline' }, // or: docker inputSchema: z.object({ ... }), outputSchema: z.object({ ... }), diff --git a/docs/architecture.mdx b/docs/architecture.mdx index c1ef2a03..0aca0e0f 100644 --- a/docs/architecture.mdx +++ b/docs/architecture.mdx @@ -3,29 +3,75 @@ title: "Architecture" description: "System design and module responsibilities for ShipSec Studio" --- -## Overview +## What is ShipSec Studio? -ShipSec Studio is an open source, no-code security workflow orchestration platform designed for security teams to build, execute, and monitor security automation workflows. Focus on security, not infrastructure. +ShipSec Studio is an open-source, no-code security workflow orchestration platform. Build, execute, and monitor security automation workflows through a visual interface — focus on security, not infrastructure. -The system is composed of four main layers: +--- + +## System Overview ``` -Frontend (React 19) ←→ Backend (NestJS) ←→ Temporal ←→ Worker (Node.js) - ↓ ↓ ↓ ↓ - Visual Builder REST API Workflow Component - & Timeline & Auth Orchestration Execution +┌─────────────────────────────────────────────────────────────────────────┐ +│ FRONTEND │ +│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ +│ │ Visual │ │ Terminal │ │ Timeline │ │ Config │ │ +│ │ Builder │ │ Viewer │ │ Replay │ │ Panel │ │ +│ │ (ReactFlow) │ │ (xterm.js) │ │ (Zustand) │ │ (Forms) │ │ +│ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │ +└───────────────────────────────────┬─────────────────────────────────────┘ + │ REST API + SSE + WebSocket +┌───────────────────────────────────▼─────────────────────────────────────┐ +│ BACKEND │ +│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ +│ │Workflows │ │ Secrets │ │ Storage │ │ Trace │ │ Auth │ │ +│ │ + DSL │ │(AES-256) │ │ (MinIO) │ │ Events │ │ (Clerk) │ │ +│ └──────────┘ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ +│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ +│ │ Webhooks │ │Schedules │ │ Agents │ │Human │ │Integr- │ │ +│ │ │ │ (CRON) │ │ │ │Inputs │ │ations │ │ +│ └──────────┘ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ +└───────────────────────────────────┬─────────────────────────────────────┘ + │ Temporal Client +┌───────────────────────────────────▼─────────────────────────────────────┐ +│ TEMPORAL │ +│ Workflow Orchestration • Retry Logic • Durability │ +└───────────────────────────────────┬─────────────────────────────────────┘ + │ Activity Execution +┌───────────────────────────────────▼─────────────────────────────────────┐ +│ WORKER │ +│ ┌─────────────────────────────────────────────────────────────────┐ │ +│ │ COMPONENT REGISTRY │ │ +│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │ +│ │ │ Tools │ │ AI │ │ Human │ │ Core │ │ │ +│ │ │(Security)│ │ Agents │ │ in Loop │ │ Utils │ │ │ +│ │ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ │ +│ └─────────────────────────────────────────────────────────────────┘ │ +│ ┌─────────────────────────────────────────────────────────────────┐ │ +│ │ SERVICE ADAPTERS │ │ +│ │ Secrets │ Storage │ Artifacts │ Trace │ Terminal │ Logs │ │ +│ └─────────────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────────┘ + │ +┌───────────────────────────────────▼─────────────────────────────────────┐ +│ INFRASTRUCTURE │ +│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ +│ │PostgreSQL│ │ MinIO │ │ Redis │ │Redpanda │ │ Loki │ │ +│ │ (Data) │ │ (Files) │ │(Terminal)│ │ (Kafka) │ │ (Logs) │ │ +│ └──────────┘ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ +└─────────────────────────────────────────────────────────────────────────┘ ``` --- ## Technology Stack -| Layer | Technologies | -|-------|-------------| -| **Frontend** | React 19, TypeScript, Vite, Tailwind CSS, Radix UI, ReactFlow, xterm.js | -| **Backend** | NestJS, TypeScript, Bun runtime, PostgreSQL, Drizzle ORM, Clerk Auth | -| **Worker** | Node.js, TypeScript, Temporal.io, Docker containers | -| **Infrastructure** | PostgreSQL, Temporal, MinIO, Redis, Loki, Redpanda (Kafka) | +| Layer | Stack | +|-------|-------| +| **Frontend** | React 19, TypeScript, Vite, TailwindCSS, Radix UI, ReactFlow, xterm.js, Zustand | +| **Backend** | NestJS, TypeScript, Bun, Drizzle ORM, Clerk Auth | +| **Worker** | Node.js, TypeScript, Temporal SDK, Docker | +| **Infrastructure** | PostgreSQL 16, Temporal, MinIO, Redis, Redpanda (Kafka), Loki | --- @@ -33,229 +79,163 @@ Frontend (React 19) ←→ Backend (NestJS) ←→ Temporal ←→ Worker (Node. ``` shipsec-studio/ -├── packages/ -│ ├── component-sdk/ # Framework-agnostic component SDK -│ ├── backend-client/ # Generated TypeScript API client -│ └── shared/ # Shared types and schemas +├── frontend/ # React workflow builder +│ └── src/ +│ ├── components/ # UI components +│ ├── pages/ # Route pages +│ ├── store/ # Zustand stores +│ └── hooks/ # Custom hooks │ -├── worker/ # Component execution engine +├── backend/ # NestJS API │ └── src/ -│ ├── components/ # Security component implementations -│ ├── adapters/ # Service interface implementations -│ └── temporal/ # Workflow orchestration +│ ├── workflows/ # Workflow CRUD + DSL compiler +│ ├── secrets/ # AES-256 encrypted secrets +│ ├── storage/ # MinIO file management +│ ├── trace/ # Event tracking +│ ├── auth/ # Clerk authentication +│ ├── webhooks/ # Webhook triggers +│ ├── schedules/ # CRON scheduling +│ ├── agents/ # AI agent management +│ ├── human-inputs/ # Manual approvals/forms +│ └── integrations/ # OAuth connections │ -├── backend/ # REST API and orchestration +├── worker/ # Component execution │ └── src/ -│ ├── workflows/ # Workflow CRUD + compilation -│ ├── storage/ # File upload/download API -│ ├── secrets/ # Encrypted secrets management -│ └── temporal/ # Temporal client wrapper +│ ├── components/ # Component implementations +│ │ ├── security/ # Subfinder, DNSX, Nuclei, etc. +│ │ ├── ai/ # LLM, Agents, MCP +│ │ ├── core/ # HTTP, FileLoader, Logic +│ │ ├── notification/ # Slack, Email +│ │ ├── manual-action/# Approvals, Forms +│ │ └── github/ # GitHub integrations +│ ├── adapters/ # Service implementations +│ └── temporal/ # Workflow/activity definitions │ -└── frontend/ # React workflow builder - └── src/ - ├── components/ - │ ├── workflow-builder/ # ReactFlow visual editor - │ ├── terminal/ # Real-time terminal display - │ └── timeline/ # Execution timeline - ├── store/ # Zustand state management - └── hooks/ # API and real-time hooks +├── packages/ +│ ├── component-sdk/ # Component definition framework +│ ├── backend-client/ # Generated API client +│ └── shared/ # Shared types and utilities +│ +└── docker/ # Infrastructure configs + ├── docker-compose.infra.yml + └── docker-compose.full.yml ``` --- -## Core System Components - -### Component SDK +## Component Categories -Framework-agnostic component definition system with zero runtime dependencies (except Zod). +Components are the building blocks of workflows: -```typescript -interface ComponentDefinition { - id: string; - label: string; - category: 'triggers' | 'discovery' | 'transform' | 'output'; - runner: DockerRunnerConfig | InlineRunnerConfig; - inputSchema: ZodSchema; - outputSchema: ZodSchema; - execute: (input: Input, context: ExecutionContext) => Promise; -} -``` - -**Component Categories:** -- **Triggers**: Manual, schedule, webhook, file monitor -- **Discovery**: Subfinder, DNSx, Nmap, HTTPx, Katana -- **Transform**: JSON/CSV/text processing and data enrichment -- **Output**: Email, Slack, file export, database storage - -### Service Interfaces - -```typescript -interface IFileStorageService { - upload(buffer: Buffer, mimeType: string): Promise; - download(key: string): Promise; - delete(key: string): Promise; -} - -interface ISecretsService { - getSecret(secretId: string): Promise; - rotateSecret(secretId: string, newValue: string): Promise; -} - -interface ITraceService { - record(event: TraceEvent): Promise; - setRunMetadata(runId: RunMetadata): void; - finalizeRun(runId: string): void; -} -``` +| Category | Description | Examples | +|----------|-------------|----------| +| **security** | Security scanning and enumeration tools | Subfinder, DNSX, Nuclei, Naabu, HTTPx, TruffleHog | +| **ai** | AI/ML and agent components | LLM Generate, AI Agent, MCP Providers | +| **core** | Utility and data processing | HTTP Request, File Loader, Logic Script, JSON Transform | +| **notification** | Alerts and messaging | Slack, Email | +| **manual-action** | Human-in-the-loop | Approvals, Forms, Selection | +| **github** | GitHub integrations | Remove Org Membership | --- -## Logging Infrastructure - -The system implements a three-pipeline logging architecture: +## Component Runners -### Terminal Streaming Pipeline +Components can execute in different environments: -Real-time terminal output capture and delivery: -- **Capture**: Docker container output captured as base64-encoded chunks -- **Transport**: Redis Streams with pattern `terminal:{runId}:{nodeRef}:{stream}` -- **Frontend**: xterm.js renders real-time terminal output with timeline synchronization +| Runner | Use Case | Example | +|--------|----------|---------| +| **inline** | Pure TypeScript — HTTP calls, transforms, logic | HTTP Request, File Loader | +| **docker** | CLI tools running in containers | Subfinder, DNSX, Nuclei | +| **remote** | External executors (future) | K8s jobs, ECS tasks | -### Log Streaming Architecture - -Structured log transport and persistence: -- **Sources**: Component stdout/stderr and console logs -- **Multi-transport**: Kafka for streaming, Loki for aggregation, PostgreSQL for metadata -- **Query Interface**: Frontend queries logs by run ID, node, time range, and level +--- -### Event Streaming Pipeline +## Real-time Streaming -Workflow lifecycle event tracking: -- **Event Types**: NODE_STARTED, NODE_COMPLETED, NODE_FAILED, NODE_PROGRESS -- **Transport**: Kafka-based with per-run sequence numbering -- **Timeline Generation**: Events processed to create visual execution timeline +Three separate pipelines for different data types: ---- +### Terminal Streaming +``` +Docker Container → PTY Output → Redis Streams → SSE → xterm.js +``` +- Pattern: `terminal:{runId}:{nodeRef}:{stream}` +- Low-latency (<50ms) for real-time terminal display -## Worker Architecture - -Executes components in isolated environments with real service implementations. - -```typescript -async function runComponentActivity( - componentId: string, - input: unknown, - context: ActivityContext -): Promise { - const component = componentRegistry.getComponent(componentId); - const executionContext = createExecutionContext({ - storage: globalStorage, - secrets: allowSecrets ? globalSecrets : undefined, - artifacts: scopedArtifacts, - trace: globalTrace, - logCollector: globalLogs, - terminalCollector: globalTerminal, - }); - - return await component.execute(input, executionContext); -} +### Event Streaming ``` +Component → Trace Events → Kafka → Backend → WebSocket → Timeline +``` +- Events: `NODE_STARTED`, `NODE_COMPLETED`, `NODE_FAILED`, `NODE_PROGRESS`, `AWAITING_INPUT` +- Powers the execution timeline and status updates -**Service Adapters:** -- **File Storage**: MinIO integration with PostgreSQL metadata -- **Secrets**: HashiCorp Vault with AES-256 encryption -- **Tracing**: Redis/pubsub for real-time events -- **Logging**: Kafka, Loki, and database persistence -- **Terminal**: Redis streams for real-time output +### Log Streaming +``` +Component → Logs → Kafka → Loki → Backend API → Frontend +``` +- Structured logs with nanosecond precision +- Queryable by run ID, node, time range, log level --- -## Backend Services - -#### Core Modules - -- **WorkflowsModule**: Workflow CRUD, compilation, Temporal integration -- **AuthModule**: Clerk-based authentication and multi-tenancy -- **SecretsModule**: Encrypted secrets management with versioning -- **IntegrationsModule**: OAuth orchestration and token vault -- **TraceModule**: Event management and timeline generation -- **LoggingModule**: Log ingestion and processing - - -#### Key API Endpoints - -| Endpoint | Description | -|----------|-------------| -| `POST /api/v1/workflows` | Create and compile workflows | -| `POST /api/v1/workflows/{id}/runs` | Execute workflows | -| `GET /api/v1/runs/{runId}/terminal` | Get terminal chunks | -| `GET /api/v1/runs/{runId}/logs` | Get execution logs | -| `GET /api/v1/runs/{runId}/events` | Get trace events | -| `GET /api/v1/runs/{runId}/stream` | SSE streaming endpoint | - - -## Frontend Architecture - -#### Real-time Features - -- **Visual Builder**: ReactFlow-based workflow editor with drag-and-drop -- **Terminal Display**: xterm.js integration for real-time terminal output -- **Execution Timeline**: Zustand-based timeline state with event synchronization -- **Live Updates**: WebSocket/SSE streaming for real-time status updates - -#### State Management - -- **Timeline Store**: Zustand for execution timeline state -- **API State**: React Query for server state management -- **Component State**: Local React state with hooks - - ## Workflow Execution Flow ``` -1. Frontend creates workflow graph (ReactFlow) - └─> POST /api/v1/workflows with nodes & edges - -2. Backend validates and compiles - └─> Validates nodes against componentRegistry - └─> Compiles graph → DSL (topological sort + join strategies) - └─> Stores in PostgreSQL - └─> Calls TemporalService.startWorkflow() - -3. Temporal orchestrates execution - └─> Schedules workflow on "shipsec-workflows" queue - └─> Worker picks up and executes components via activities - -4. Component execution in Worker - └─> runComponentActivity() looks up component in registry - └─> Creates ExecutionContext with injected services - └─> Executes in Docker container with isolation - └─> Streams logs, events, and terminal output in real-time - -5. Real-time monitoring - └─> Events → Kafka → Backend → WebSocket to Frontend - └─> Terminal → Redis Streams → SSE to Frontend - └─> Logs → Kafka → Loki → Backend API queries +┌─────────────────────────────────────────────────────────────────────┐ +│ 1. DESIGN │ +│ Frontend: User creates workflow in ReactFlow visual builder │ +│ └─→ POST /api/v1/workflows with nodes & edges │ +└───────────────────────────────────┬─────────────────────────────────┘ + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ 2. COMPILE │ +│ Backend: Validates graph and compiles to DSL │ +│ └─→ Topological sort → Join strategies → Store in PostgreSQL │ +└───────────────────────────────────┬─────────────────────────────────┘ + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ 3. EXECUTE │ +│ POST /api/v1/workflows/{id}/run │ +│ └─→ Backend calls Temporal.startWorkflow() │ +└───────────────────────────────────┬─────────────────────────────────┘ + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ 4. ORCHESTRATE │ +│ Temporal schedules workflow on "shipsec-workflows" queue │ +│ └─→ Worker picks up and executes components as activities │ +└───────────────────────────────────┬─────────────────────────────────┘ + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ 5. COMPONENT EXECUTION │ +│ Worker: runComponentActivity() │ +│ ├─→ Look up component in registry │ +│ ├─→ Create ExecutionContext with service adapters │ +│ ├─→ Execute (inline TypeScript OR Docker container) │ +│ └─→ Stream terminal, events, logs in real-time │ +└───────────────────────────────────┬─────────────────────────────────┘ + ▼ +┌─────────────────────────────────────────────────────────────────────┐ +│ 6. MONITOR │ +│ Frontend receives real-time updates: │ +│ ├─→ Terminal: Redis Streams → SSE │ +│ ├─→ Events: Kafka → WebSocket │ +│ └─→ Status: Polling /runs/{runId}/status │ +└─────────────────────────────────────────────────────────────────────┘ ``` --- -## Workflow Replay +## Service Adapters -#### Data Sources for Replay +Worker injects these services into component execution context: -- **Terminal Cast Files**: Asciinema-compatible `.cast` files stored in MinIO -- **Structured Logs**: Loki with nanosecond precision -- **Trace Events**: PostgreSQL with sequence numbers -- **Artifacts**: MinIO with component outputs - -#### Timeline Features - -- Playback controls (play, pause, seek) -- Node state visualization -- Data flow display -- Terminal seeking -- Speed control +| Adapter | Purpose | Backend | +|---------|---------|---------| +| **SecretsAdapter** | Encrypted secret retrieval | PostgreSQL + AES-256-GCM | +| **FileStorageAdapter** | File download/upload | MinIO | +| **ArtifactAdapter** | Component output storage | MinIO + PostgreSQL | +| **TraceAdapter** | Event recording | Kafka + PostgreSQL | +| **TerminalStreamAdapter** | Real-time terminal output | Redis Streams | +| **LokiLogAdapter** | Structured log aggregation | Loki | --- @@ -266,57 +246,91 @@ async function runComponentActivity( workflows ( id UUID PRIMARY KEY, name TEXT NOT NULL, - graph JSONB NOT NULL, - compiled_definition JSONB, - organization_id VARCHAR + description TEXT, + graph JSONB NOT NULL, -- ReactFlow nodes/edges + compiled_dsl JSONB, -- Compiled execution plan + organization_id VARCHAR, + created_at TIMESTAMP, + updated_at TIMESTAMP ); --- Workflow execution instances +-- Workflow execution instances workflow_runs ( run_id TEXT PRIMARY KEY, - workflow_id UUID NOT NULL, + workflow_id UUID REFERENCES workflows(id), temporal_run_id TEXT, inputs JSONB NOT NULL, - status VARCHAR, + status VARCHAR, -- PENDING, RUNNING, COMPLETED, FAILED started_at TIMESTAMP, completed_at TIMESTAMP ); --- Component execution results -workflow_nodes ( - id UUID PRIMARY KEY, - run_id TEXT NOT NULL, - node_ref TEXT NOT NULL, - component_id TEXT NOT NULL, - inputs JSONB, - outputs JSONB, - status VARCHAR, - error_message TEXT -); - --- Secure storage +-- Secrets (encrypted at rest) secrets ( id UUID PRIMARY KEY, name TEXT NOT NULL UNIQUE, - current_version INTEGER DEFAULT 1, - versions JSONB NOT NULL, organization_id VARCHAR ); + +secret_versions ( + id UUID PRIMARY KEY, + secret_id UUID REFERENCES secrets(id), + version INTEGER NOT NULL, + encrypted_value BYTEA NOT NULL, + iv BYTEA NOT NULL, + auth_tag BYTEA NOT NULL, + encryption_key_id TEXT, + is_active BOOLEAN DEFAULT true +); + +-- Trace events +trace_events ( + id UUID PRIMARY KEY, + run_id TEXT NOT NULL, + node_ref TEXT NOT NULL, + type VARCHAR NOT NULL, -- NODE_STARTED, NODE_COMPLETED, etc. + timestamp TIMESTAMP NOT NULL, + sequence INTEGER, + data JSONB +); ``` --- ## Security Architecture -### Multi-tenant Authentication +### Authentication & Multi-tenancy +- **Clerk Integration** — Production-ready authentication +- **Organization Isolation** — All data scoped by `organization_id` +- **Role-Based Access** — Admin, Member, Viewer roles + +### Secrets Management +- **AES-256-GCM** encryption at rest (NOT HashiCorp Vault) +- **Versioned secrets** with active/inactive tracking +- **Master key** via `SECRET_STORE_MASTER_KEY` environment variable -- **Clerk Integration**: Production-ready authentication -- **Organization Isolation**: Tenant-based data separation -- **Role-Based Access**: Admin, User, Viewer roles +### Container Isolation +- **IsolatedContainerVolume** — Per-tenant, per-run Docker volumes +- **Network isolation** — Components run with `network: none` or `bridge` +- **Automatic cleanup** — Volumes destroyed after execution + +--- + +## Development URLs + +| Service | URL | +|---------|-----| +| Frontend | http://localhost:5173 | +| Backend API | http://localhost:3211 | +| Temporal UI | http://localhost:8081 | +| MinIO Console | http://localhost:9001 | +| Redpanda Console | http://localhost:8082 | +| Loki | http://localhost:3100 | + +--- -### Data Security +## Learn More -- **Secrets Encryption**: AES-256-GCM encryption at rest -- **Container Isolation**: Docker isolation for component execution -- **Network Security**: TLS encryption, proper CORS configuration -- **Access Control**: Fine-grained permissions and audit logging +- **Component Development**: `/development/component-development` +- **Isolated Volumes**: `/development/isolated-volumes` +- **Getting Started**: `/getting-started` diff --git a/docs/development/component-development.mdx b/docs/development/component-development.mdx index 9749ea7f..f6c952fb 100644 --- a/docs/development/component-development.mdx +++ b/docs/development/component-development.mdx @@ -19,8 +19,7 @@ worker/src/components/ ├── core/ # Core utilities (http-request, file-loader) ├── ai/ # AI components (llm, agents) ├── notification/ # Notifications (slack, email) -├── manual-action/ # Human-in-the-loop (approvals, forms) -└── it-automation/ # IT ops (okta, google workspace) +└── manual-action/ # Human-in-the-loop (approvals, forms) ``` ### ID Naming Convention From 9b4acfb08c69f4be9998a71082a74a079a878362 Mon Sep 17 00:00:00 2001 From: betterclever Date: Sun, 4 Jan 2026 23:44:30 +0530 Subject: [PATCH 5/8] docs: add HITL architecture and local auth details - Create dedicated docs/architecture/human-in-the-loop.mdx - Explain block/resume scenario via Temporal signals - Add sequence diagram for manual resolution - Update architecture.mdx with Local Auth details and HITL link Signed-off-by: betterclever --- docs/architecture.mdx | 11 +++-- docs/architecture/human-in-the-loop.mdx | 58 +++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 3 deletions(-) create mode 100644 docs/architecture/human-in-the-loop.mdx diff --git a/docs/architecture.mdx b/docs/architecture.mdx index 0aca0e0f..bba868a2 100644 --- a/docs/architecture.mdx +++ b/docs/architecture.mdx @@ -48,6 +48,10 @@ ShipSec Studio is an open-source, no-code security workflow orchestration platfo │ │ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ │ │ └─────────────────────────────────────────────────────────────────┘ │ │ ┌─────────────────────────────────────────────────────────────────┐ │ +│ │ [Learn more about HITL Architecture] │ │ +│ │ (/architecture/human-in-the-loop) │ │ +│ └─────────────────────────────────────────────────────────────────┘ │ +│ ┌─────────────────────────────────────────────────────────────────┐ │ │ │ SERVICE ADAPTERS │ │ │ │ Secrets │ Storage │ Artifacts │ Trace │ Terminal │ Logs │ │ │ └─────────────────────────────────────────────────────────────────┘ │ @@ -300,9 +304,10 @@ trace_events ( ## Security Architecture ### Authentication & Multi-tenancy -- **Clerk Integration** — Production-ready authentication -- **Organization Isolation** — All data scoped by `organization_id` -- **Role-Based Access** — Admin, Member, Viewer roles +- **Clerk Integration** — Production-ready authentication for hosted environments. +- **Local Auth** — Default for local setup using `ADMIN_USERNAME` / `ADMIN_PASSWORD`. +- **Organization Isolation** — All data scoped by `organization_id`. +- **Role-Based Access** — Admin, Member, Viewer roles. ### Secrets Management - **AES-256-GCM** encryption at rest (NOT HashiCorp Vault) diff --git a/docs/architecture/human-in-the-loop.mdx b/docs/architecture/human-in-the-loop.mdx new file mode 100644 index 00000000..bfb02f87 --- /dev/null +++ b/docs/architecture/human-in-the-loop.mdx @@ -0,0 +1,58 @@ +--- +title: "Human-in-the-Loop (HITL)" +description: "How ShipSec Studio handles manual approvals, forms, and workflow pauses" +--- + +# Human-in-the-Loop (HITL) + +Human-in-the-Loop (HITL) allows workflows to pause and wait for human interaction (approvals, data entry, selection) before proceeding. + +## Architecture Overview + +HITL is implemented using **Temporal Signals** and a dedicated **Human Input Service** in the backend. + +### 1. The Block Scenario +When a workflow hits a manual action node (e.g., `manual-action.approval`): +- The Worker executes the component, which creates a **Human Input Request** in the database. +- The component then enters a **Pending State** within the Temporal workflow. +- Technically, the Temporal workflow task waits for a specific signal (`resolveHumanInput`). + +### 2. Notification & Access +- The system generates a unique, cryptographically secure **Resolve Token**. +- A notification (Slack, Email) is sent with a link containing this token. +- Alternatively, the request appears in the "Pending Actions" section of the ShipSec Studio UI. + +### 3. The Resume Scenario (Signal) +When a human interacts with the request (approves/rejects/submits form): +- The Frontend (or public link) calls the Backend API. +- The **Human Input Service**: + 1. Validates the input/token. + 2. Updates the request status in PostgreSQL to `resolved`. + 3. Calls `temporalClient.signalWorkflow()` with the `resolveHumanInput` signal. +- The blocked Temporal workflow receives the signal, resumes execution, and the manual action node returns the human's input to the next node in the graph. + +## Sequence Diagram + +``` +┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────────┐ +│ Temporal │ │ Worker │ │ Backend │ │ Human │ +└────┬─────┘ └────┬─────┘ └────┬─────┘ └──────┬───────┘ + │ Execute Node │ │ │ + ├────────────────>│ │ │ + │ │ Create Request │ │ + │ ├────────────────>│ │ + │ Signal Wait │ │ Notify Human │ + │<────────────────┤ ├──────────────────>│ + │ (PAUSED) │ │ │ + │ │ │ Interact │ + │ │ │<──────────────────┤ + │ Signal Workflow│ │ │ + │<──────────────────────────────────┤ │ + │ (RESUME) │ │ │ + │ │ │ │ +``` + +## Security +- **Token Isolation**: Each request has a unique `resolveToken`. +- **Expirations**: Requests can be configured with timeouts in the Temporal workflow logic. +- **Audit Log**: Every resolution is tracked with `respondedBy` and `respondedAt` metadata. From dab5efaa098b6ca25d8b1a57161b5be9b8dd3892 Mon Sep 17 00:00:00 2001 From: betterclever Date: Sun, 4 Jan 2026 23:55:44 +0530 Subject: [PATCH 6/8] docs: overhaul architecture documentation with modular deep-dives and accurate streaming logic Signed-off-by: betterclever --- .ai/temporal-worker-architecture.md | 424 ------------------- docs/architecture.mdx | 290 ++----------- docs/architecture/streaming-pipelines.mdx | 56 +++ docs/architecture/temporal-orchestration.mdx | 58 +++ docs/architecture/workflow-compilation.mdx | 47 ++ 5 files changed, 205 insertions(+), 670 deletions(-) delete mode 100644 .ai/temporal-worker-architecture.md create mode 100644 docs/architecture/streaming-pipelines.mdx create mode 100644 docs/architecture/temporal-orchestration.mdx create mode 100644 docs/architecture/workflow-compilation.mdx diff --git a/.ai/temporal-worker-architecture.md b/.ai/temporal-worker-architecture.md deleted file mode 100644 index 5ae19e58..00000000 --- a/.ai/temporal-worker-architecture.md +++ /dev/null @@ -1,424 +0,0 @@ -# Temporal Worker Architecture Explained - -## 🏗️ How Temporal Workers Work - -### Basic Concepts - -**Workflow vs Activity Execution:** -- **Workflows** = Orchestration logic (deterministic, event-sourced) -- **Activities** = Actual work (non-deterministic, can fail/retry) - -### Single Workflow Execution - -``` -┌─────────────────────────────────────────────────┐ -│ Temporal Server │ -│ ┌───────────────┐ ┌──────────────────┐ │ -│ │ Workflow Task │───────▶│ Activity Tasks │ │ -│ │ Queue │ │ Queue │ │ -│ └───────────────┘ └──────────────────┘ │ -└─────────────────────────────────────────────────┘ - │ │ - │ │ - ▼ ▼ - ┌──────────────┐ ┌──────────────┐ - │ Worker 1 │ │ Worker 2 │ - │ │ │ │ - │ • Workflows │ │ • Activities │ - │ • Activities │ │ (heavy) │ - └──────────────┘ └──────────────┘ -``` - ---- - -## 🎯 Current ShipSec Studio Setup - -### Single Worker (Current State) - -```typescript -// backend/src/temporal/workers/dev.worker.ts -const worker = await Worker.create({ - connection, - namespace: 'shipsec-dev', - taskQueue: 'shipsec-default', // ← Single task queue - workflowsPath, - activities: { - runWorkflow: runWorkflowActivity, // ← All activities here - }, -}); -``` - -**What happens:** -1. Worker polls `shipsec-default` task queue -2. Executes **both** workflows AND activities -3. Everything runs on the same worker process - ---- - -## 🔀 Specialized Workers Architecture - -### Multi-Worker Setup - -``` - Temporal Server - │ - ┌────────────────┼────────────────┐ - │ │ │ - ▼ ▼ ▼ - ┌──────────┐ ┌──────────┐ ┌──────────┐ - │ Worker 1 │ │ Worker 2 │ │ Worker 3 │ - │ │ │ │ │ │ - │ Queue: │ │ Queue: │ │ Queue: │ - │ default │ │ security │ │ file-ops │ - │ │ │ │ │ │ - │ • Wrkflw │ │ • Nmap │ │ • S3 │ - │ • Basic │ │ • Subfin │ │ • MinIO │ - └──────────┘ └──────────┘ └──────────┘ -``` - -### Why Multiple Workers? - -1. **Resource Isolation**: Heavy security scans don't block file operations -2. **Scaling**: Scale security workers independently -3. **Specialization**: Different machines for different tasks -4. **Failure Isolation**: One worker crash doesn't affect others - ---- - -## 💡 How It Works in Practice - -### Example: Security Scan Workflow - -```typescript -// Workflow code (runs on default queue) -export async function securityScanWorkflow(input: ScanInput) { - log.info('Starting security scan workflow'); - - // This activity runs on 'security-tools' queue - const subdomains = await securityActivities.subfinder({ - domain: input.domain, - }); - - // This activity runs on 'file-ops' queue - await fileActivities.saveResults({ - data: subdomains, - path: 's3://bucket/results.json', - }); - - return { subdomains }; -} -``` - -### Activity Registration with Task Queues - -```typescript -// Activities with task queue routing -const securityActivities = proxyActivities({ - taskQueue: 'security-tools', // ← Specific queue - startToCloseTimeout: '30 minutes', -}); - -const fileActivities = proxyActivities({ - taskQueue: 'file-ops', // ← Different queue - startToCloseTimeout: '5 minutes', -}); -``` - ---- - -## 🛠️ Implementing Specialized Workers in ShipSec - -### Step 1: Create Specialized Worker - -```typescript -// backend/src/temporal/workers/security.worker.ts -import { Worker, NativeConnection } from '@temporalio/worker'; -import { nmapScanActivity } from '../activities/nmap-scan.activity'; -import { subfinderActivity } from '../activities/subfinder.activity'; - -async function main() { - const connection = await NativeConnection.connect({ - address: 'localhost:7233', - }); - - const worker = await Worker.create({ - connection, - namespace: 'shipsec-dev', - taskQueue: 'security-tools', // ← Specialized queue - activities: { - // Only security-related activities - nmapScan: nmapScanActivity, - subfinder: subfinderActivity, - }, - // No workflowsPath - only runs activities - }); - - console.log('🔒 Security worker running...'); - await worker.run(); -} - -main().catch(console.error); -``` - -### Step 2: Update Workflow to Use Specialized Queue - -```typescript -// backend/src/temporal/workflows/security-scan.workflow.ts -import { proxyActivities } from '@temporalio/workflow'; - -// Activities on specialized queue -const securityActivities = proxyActivities<{ - subfinder(input: SubfinderInput): Promise; - nmapScan(input: NmapInput): Promise; -}>({ - taskQueue: 'security-tools', // ← Routes to specialized worker - startToCloseTimeout: '30 minutes', -}); - -export async function securityScanWorkflow(input: ScanInput) { - // This will be executed by the security-tools worker - const subdomains = await securityActivities.subfinder({ - domain: input.domain, - }); - - const portScans = await Promise.all( - subdomains.map(subdomain => - securityActivities.nmapScan({ target: subdomain }) - ) - ); - - return { subdomains, portScans }; -} -``` - -### Step 3: Run Multiple Workers - -```javascript -// pm2.config.cjs -module.exports = { - apps: [ - { - name: 'shipsec-backend', - cwd: './backend', - script: 'bun', - args: 'run src/main.ts', - }, - { - name: 'shipsec-worker-default', - cwd: './backend', - script: 'npm', - args: 'run worker:dev', - }, - { - name: 'shipsec-worker-security', // ← New specialized worker - cwd: './backend', - script: 'tsx', - args: 'src/temporal/workers/security.worker.ts', - }, - { - name: 'shipsec-worker-file-ops', // ← Another specialized worker - cwd: './backend', - script: 'tsx', - args: 'src/temporal/workers/file-ops.worker.ts', - }, - ], -}; -``` - ---- - -## 🎭 Task Queue Routing - -### How Activities Find Workers - -``` -Workflow (on default queue): - └─> Calls activity with taskQueue='security-tools' - └─> Temporal Server queues task on 'security-tools' - └─> Security Worker polls 'security-tools' - └─> Executes activity - └─> Returns result to workflow -``` - -### Multiple Workers, Same Queue - -``` - Temporal Server - │ - ┌───────┴───────┐ - ▼ ▼ - Worker A Worker B - (security) (security) - │ │ - └───────┬───────┘ - ▼ - Load Balanced! -``` - -**Benefits:** -- Horizontal scaling -- Automatic load balancing -- No configuration needed - ---- - -## 🚀 Real-World ShipSec Example - -### Scenario: Subdomain Enumeration Pipeline - -```typescript -// Workflow orchestration (lightweight) -export async function subdomainPipelineWorkflow(input: PipelineInput) { - // 1. Run on security-tools worker (heavy) - const subdomains = await securityActivities.subfinder({ - domain: input.domain, - options: input.subfinderOptions, - }); - - // 2. Run on file-ops worker (I/O bound) - const stored = await fileActivities.storeResults({ - data: subdomains, - bucket: input.outputBucket, - }); - - // 3. Run on notification worker (external API) - await notificationActivities.sendAlert({ - message: `Found ${subdomains.length} subdomains`, - webhook: input.webhookUrl, - }); - - return { count: subdomains.length, stored }; -} -``` - -### Worker Distribution - -| Worker | Task Queue | Activities | Resources | -|--------|------------|------------|-----------| -| **Default** | `shipsec-default` | Workflow execution | CPU: Low, RAM: Low | -| **Security** | `security-tools` | Subfinder, Nmap | CPU: High, RAM: High | -| **File Ops** | `file-ops` | S3, MinIO | CPU: Low, I/O: High | -| **Notifications** | `notifications` | Webhooks, Email | CPU: Low, Network: High | - ---- - -## 🎯 Best Practices - -### 1. Default Worker for Workflows -```typescript -// Always have a worker for workflows -const defaultWorker = await Worker.create({ - taskQueue: 'shipsec-default', - workflowsPath: './workflows', - activities: { - // Light activities only - validateInput, - logEvent, - }, -}); -``` - -### 2. Specialized Workers for Heavy Activities -```typescript -// Security tools worker -const securityWorker = await Worker.create({ - taskQueue: 'security-tools', - activities: { - subfinder, - nmap, - nuclei, - }, - maxConcurrentActivityTaskExecutions: 5, // Limit concurrency -}); -``` - -### 3. Route Activities by Task Queue -```typescript -// In workflow -const heavyActivities = proxyActivities({ - taskQueue: 'security-tools', // ← Explicit routing - startToCloseTimeout: '1 hour', -}); - -const lightActivities = proxyActivities({ - // No taskQueue = uses workflow's queue - startToCloseTimeout: '1 minute', -}); -``` - ---- - -## 🔍 Monitoring Task Queues - -### Check Queue Status - -```bash -# Temporal CLI -temporal task-queue describe \ - --namespace shipsec-dev \ - --task-queue security-tools -``` - -### Via Temporal UI -``` -http://localhost:8081/namespaces/shipsec-dev/task-queues -``` - -You can see: -- Active workers per queue -- Pending tasks -- Task backlog -- Worker health - ---- - -## 📊 Performance Implications - -### Single Worker (Current) -``` -✅ Simple setup -✅ Easy debugging -❌ Single point of failure -❌ No resource isolation -❌ Limited scaling -``` - -### Multi-Worker (Recommended) -``` -✅ Resource isolation -✅ Horizontal scaling -✅ Failure isolation -✅ Optimized for workload -⚠️ More complex setup -⚠️ More processes to manage -``` - ---- - -## 🎓 Summary - -### Key Concepts - -1. **Workflows run on ANY worker** that polls their task queue -2. **Activities can be routed** to specific workers via `taskQueue` parameter -3. **Multiple workers on same queue** = automatic load balancing -4. **Different queues** = workload isolation and specialization - -### Current ShipSec Setup -- ✅ Single worker on `shipsec-default` queue -- ✅ Executes both workflows and activities -- ✅ Good for development and small scale - -### Recommended Production Setup -- 🎯 Default worker: Workflows + light activities -- 🎯 Security worker: Heavy security scanning tools -- 🎯 File ops worker: S3/MinIO operations -- 🎯 Notification worker: Webhooks, emails, alerts - -### Next Steps -1. Identify heavy activities in your workflows -2. Create specialized workers for resource-intensive tasks -3. Update workflows to route activities via `taskQueue` -4. Monitor queue health in Temporal UI - - diff --git a/docs/architecture.mdx b/docs/architecture.mdx index bba868a2..d00d2dd9 100644 --- a/docs/architecture.mdx +++ b/docs/architecture.mdx @@ -12,57 +12,53 @@ ShipSec Studio is an open-source, no-code security workflow orchestration platfo ## System Overview ``` -┌─────────────────────────────────────────────────────────────────────────┐ -│ FRONTEND │ +┌────────────────────────────────────────────────────────────────────────┐ +│ FRONTEND │ │ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │ │ │ Visual │ │ Terminal │ │ Timeline │ │ Config │ │ │ │ Builder │ │ Viewer │ │ Replay │ │ Panel │ │ │ │ (ReactFlow) │ │ (xterm.js) │ │ (Zustand) │ │ (Forms) │ │ │ └─────────────┘ └─────────────┘ └─────────────┘ └─────────────┘ │ -└───────────────────────────────────┬─────────────────────────────────────┘ - │ REST API + SSE + WebSocket +└───────────────────────────────────┬────────────────────────────────────┘ + │ REST API + Unified SSE ┌───────────────────────────────────▼─────────────────────────────────────┐ -│ BACKEND │ -│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ -│ │Workflows │ │ Secrets │ │ Storage │ │ Trace │ │ Auth │ │ -│ │ + DSL │ │(AES-256) │ │ (MinIO) │ │ Events │ │ (Clerk) │ │ -│ └──────────┘ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ -│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ -│ │ Webhooks │ │Schedules │ │ Agents │ │Human │ │Integr- │ │ -│ │ │ │ (CRON) │ │ │ │Inputs │ │ations │ │ -│ └──────────┘ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ +│ BACKEND │ +│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ +│ │Workflows │ │ Secrets │ │ Storage │ │ Trace │ │ Auth │ │ +│ │ + DSL │ │(AES-256) │ │ (MinIO) │ │ Events │ │ (Clerk) │ │ +│ └──────────┘ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ +│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ +│ │ Webhooks │ │Schedules │ │ Agents │ │Human │ │Integr- │ │ +│ │ │ │ (CRON) │ │ │ │Inputs │ │ations │ │ +│ └──────────┘ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ └───────────────────────────────────┬─────────────────────────────────────┘ │ Temporal Client ┌───────────────────────────────────▼─────────────────────────────────────┐ -│ TEMPORAL │ +│ TEMPORAL │ │ Workflow Orchestration • Retry Logic • Durability │ └───────────────────────────────────┬─────────────────────────────────────┘ │ Activity Execution ┌───────────────────────────────────▼─────────────────────────────────────┐ -│ WORKER │ -│ ┌─────────────────────────────────────────────────────────────────┐ │ -│ │ COMPONENT REGISTRY │ │ -│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │ -│ │ │ Tools │ │ AI │ │ Human │ │ Core │ │ │ -│ │ │(Security)│ │ Agents │ │ in Loop │ │ Utils │ │ │ -│ │ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ │ -│ └─────────────────────────────────────────────────────────────────┘ │ -│ ┌─────────────────────────────────────────────────────────────────┐ │ -│ │ [Learn more about HITL Architecture] │ │ -│ │ (/architecture/human-in-the-loop) │ │ -│ └─────────────────────────────────────────────────────────────────┘ │ -│ ┌─────────────────────────────────────────────────────────────────┐ │ -│ │ SERVICE ADAPTERS │ │ -│ │ Secrets │ Storage │ Artifacts │ Trace │ Terminal │ Logs │ │ -│ └─────────────────────────────────────────────────────────────────┘ │ +│ WORKER │ +│ ┌─────────────────────────────────────────────────────────────────┐ │ +│ │ COMPONENT REGISTRY │ │ +│ │ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ │ +│ │ │ Tools │ │ AI │ │ Human │ │ Core │ │ │ +│ │ │(Security)│ │ Agents │ │ in Loop │ │ Utils │ │ │ +│ │ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ │ +│ └─────────────────────────────────────────────────────────────────┘ │ +│ ┌─────────────────────────────────────────────────────────────────┐ │ +│ │ SERVICE ADAPTERS │ │ +│ │ Secrets │ Storage │ Artifacts │ Trace │ Terminal │ Logs │ │ +│ └─────────────────────────────────────────────────────────────────┘ │ └─────────────────────────────────────────────────────────────────────────┘ │ ┌───────────────────────────────────▼─────────────────────────────────────┐ -│ INFRASTRUCTURE │ -│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ -│ │PostgreSQL│ │ MinIO │ │ Redis │ │Redpanda │ │ Loki │ │ -│ │ (Data) │ │ (Files) │ │(Terminal)│ │ (Kafka) │ │ (Logs) │ │ -│ └──────────┘ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ +│ INFRASTRUCTURE │ +│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │ +│ │PostgreSQL│ │ MinIO │ │ Redis │ │Redpanda │ │ Loki │ │ +│ │ (Data) │ │ (Files) │ │(Terminal)│ │ (Kafka) │ │ (Logs) │ │ +│ └──────────┘ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │ └─────────────────────────────────────────────────────────────────────────┘ ``` @@ -79,51 +75,14 @@ ShipSec Studio is an open-source, no-code security workflow orchestration platfo --- -## Monorepo Structure +## Core Deep-Dives -``` -shipsec-studio/ -├── frontend/ # React workflow builder -│ └── src/ -│ ├── components/ # UI components -│ ├── pages/ # Route pages -│ ├── store/ # Zustand stores -│ └── hooks/ # Custom hooks -│ -├── backend/ # NestJS API -│ └── src/ -│ ├── workflows/ # Workflow CRUD + DSL compiler -│ ├── secrets/ # AES-256 encrypted secrets -│ ├── storage/ # MinIO file management -│ ├── trace/ # Event tracking -│ ├── auth/ # Clerk authentication -│ ├── webhooks/ # Webhook triggers -│ ├── schedules/ # CRON scheduling -│ ├── agents/ # AI agent management -│ ├── human-inputs/ # Manual approvals/forms -│ └── integrations/ # OAuth connections -│ -├── worker/ # Component execution -│ └── src/ -│ ├── components/ # Component implementations -│ │ ├── security/ # Subfinder, DNSX, Nuclei, etc. -│ │ ├── ai/ # LLM, Agents, MCP -│ │ ├── core/ # HTTP, FileLoader, Logic -│ │ ├── notification/ # Slack, Email -│ │ ├── manual-action/# Approvals, Forms -│ │ └── github/ # GitHub integrations -│ ├── adapters/ # Service implementations -│ └── temporal/ # Workflow/activity definitions -│ -├── packages/ -│ ├── component-sdk/ # Component definition framework -│ ├── backend-client/ # Generated API client -│ └── shared/ # Shared types and utilities -│ -└── docker/ # Infrastructure configs - ├── docker-compose.infra.yml - └── docker-compose.full.yml -``` +To keep this guide concise, complicated subsystems are documented in their own dedicated files: + +- **[Workflow Compilation (DSL)](/architecture/workflow-compilation)**: How visual graphs are transformed into executable instructions. +- **[Temporal Orchestration](/architecture/temporal-orchestration)**: How we use Temporal for durability and worker scaling. +- **[Streaming Pipelines](/architecture/streaming-pipelines)**: How terminal, logs, and events are delivered in real-time. +- **[Human-in-the-Loop](/architecture/human-in-the-loop)**: How we pause workflows for manual approvals and forms. --- @@ -142,182 +101,22 @@ Components are the building blocks of workflows: --- -## Component Runners - -Components can execute in different environments: - -| Runner | Use Case | Example | -|--------|----------|---------| -| **inline** | Pure TypeScript — HTTP calls, transforms, logic | HTTP Request, File Loader | -| **docker** | CLI tools running in containers | Subfinder, DNSX, Nuclei | -| **remote** | External executors (future) | K8s jobs, ECS tasks | - ---- - -## Real-time Streaming - -Three separate pipelines for different data types: - -### Terminal Streaming -``` -Docker Container → PTY Output → Redis Streams → SSE → xterm.js -``` -- Pattern: `terminal:{runId}:{nodeRef}:{stream}` -- Low-latency (<50ms) for real-time terminal display - -### Event Streaming -``` -Component → Trace Events → Kafka → Backend → WebSocket → Timeline -``` -- Events: `NODE_STARTED`, `NODE_COMPLETED`, `NODE_FAILED`, `NODE_PROGRESS`, `AWAITING_INPUT` -- Powers the execution timeline and status updates - -### Log Streaming -``` -Component → Logs → Kafka → Loki → Backend API → Frontend -``` -- Structured logs with nanosecond precision -- Queryable by run ID, node, time range, log level - ---- - -## Workflow Execution Flow - -``` -┌─────────────────────────────────────────────────────────────────────┐ -│ 1. DESIGN │ -│ Frontend: User creates workflow in ReactFlow visual builder │ -│ └─→ POST /api/v1/workflows with nodes & edges │ -└───────────────────────────────────┬─────────────────────────────────┘ - ▼ -┌─────────────────────────────────────────────────────────────────────┐ -│ 2. COMPILE │ -│ Backend: Validates graph and compiles to DSL │ -│ └─→ Topological sort → Join strategies → Store in PostgreSQL │ -└───────────────────────────────────┬─────────────────────────────────┘ - ▼ -┌─────────────────────────────────────────────────────────────────────┐ -│ 3. EXECUTE │ -│ POST /api/v1/workflows/{id}/run │ -│ └─→ Backend calls Temporal.startWorkflow() │ -└───────────────────────────────────┬─────────────────────────────────┘ - ▼ -┌─────────────────────────────────────────────────────────────────────┐ -│ 4. ORCHESTRATE │ -│ Temporal schedules workflow on "shipsec-workflows" queue │ -│ └─→ Worker picks up and executes components as activities │ -└───────────────────────────────────┬─────────────────────────────────┘ - ▼ -┌─────────────────────────────────────────────────────────────────────┐ -│ 5. COMPONENT EXECUTION │ -│ Worker: runComponentActivity() │ -│ ├─→ Look up component in registry │ -│ ├─→ Create ExecutionContext with service adapters │ -│ ├─→ Execute (inline TypeScript OR Docker container) │ -│ └─→ Stream terminal, events, logs in real-time │ -└───────────────────────────────────┬─────────────────────────────────┘ - ▼ -┌─────────────────────────────────────────────────────────────────────┐ -│ 6. MONITOR │ -│ Frontend receives real-time updates: │ -│ ├─→ Terminal: Redis Streams → SSE │ -│ ├─→ Events: Kafka → WebSocket │ -│ └─→ Status: Polling /runs/{runId}/status │ -└─────────────────────────────────────────────────────────────────────┘ -``` - ---- - -## Service Adapters - -Worker injects these services into component execution context: - -| Adapter | Purpose | Backend | -|---------|---------|---------| -| **SecretsAdapter** | Encrypted secret retrieval | PostgreSQL + AES-256-GCM | -| **FileStorageAdapter** | File download/upload | MinIO | -| **ArtifactAdapter** | Component output storage | MinIO + PostgreSQL | -| **TraceAdapter** | Event recording | Kafka + PostgreSQL | -| **TerminalStreamAdapter** | Real-time terminal output | Redis Streams | -| **LokiLogAdapter** | Structured log aggregation | Loki | - ---- - -## Database Schema - -```sql --- Workflow definitions -workflows ( - id UUID PRIMARY KEY, - name TEXT NOT NULL, - description TEXT, - graph JSONB NOT NULL, -- ReactFlow nodes/edges - compiled_dsl JSONB, -- Compiled execution plan - organization_id VARCHAR, - created_at TIMESTAMP, - updated_at TIMESTAMP -); - --- Workflow execution instances -workflow_runs ( - run_id TEXT PRIMARY KEY, - workflow_id UUID REFERENCES workflows(id), - temporal_run_id TEXT, - inputs JSONB NOT NULL, - status VARCHAR, -- PENDING, RUNNING, COMPLETED, FAILED - started_at TIMESTAMP, - completed_at TIMESTAMP -); - --- Secrets (encrypted at rest) -secrets ( - id UUID PRIMARY KEY, - name TEXT NOT NULL UNIQUE, - organization_id VARCHAR -); - -secret_versions ( - id UUID PRIMARY KEY, - secret_id UUID REFERENCES secrets(id), - version INTEGER NOT NULL, - encrypted_value BYTEA NOT NULL, - iv BYTEA NOT NULL, - auth_tag BYTEA NOT NULL, - encryption_key_id TEXT, - is_active BOOLEAN DEFAULT true -); - --- Trace events -trace_events ( - id UUID PRIMARY KEY, - run_id TEXT NOT NULL, - node_ref TEXT NOT NULL, - type VARCHAR NOT NULL, -- NODE_STARTED, NODE_COMPLETED, etc. - timestamp TIMESTAMP NOT NULL, - sequence INTEGER, - data JSONB -); -``` - ---- - ## Security Architecture ### Authentication & Multi-tenancy - **Clerk Integration** — Production-ready authentication for hosted environments. - **Local Auth** — Default for local setup using `ADMIN_USERNAME` / `ADMIN_PASSWORD`. - **Organization Isolation** — All data scoped by `organization_id`. -- **Role-Based Access** — Admin, Member, Viewer roles. ### Secrets Management -- **AES-256-GCM** encryption at rest (NOT HashiCorp Vault) -- **Versioned secrets** with active/inactive tracking -- **Master key** via `SECRET_STORE_MASTER_KEY` environment variable +- **AES-256-GCM** encryption at rest. +- **Versioned secrets** with active/inactive tracking. +- **Master key** via `SECRET_STORE_MASTER_KEY` environment variable. ### Container Isolation -- **IsolatedContainerVolume** — Per-tenant, per-run Docker volumes -- **Network isolation** — Components run with `network: none` or `bridge` -- **Automatic cleanup** — Volumes destroyed after execution +- **IsolatedContainerVolume** — Per-tenant, per-run Docker volumes. See **[Isolated Volumes](/development/isolated-volumes)**. +- **Network isolation** — Components run with `network: none` or `bridge`. +- **Automatic cleanup** — Volumes destroyed after execution. --- @@ -337,5 +136,4 @@ trace_events ( ## Learn More - **Component Development**: `/development/component-development` -- **Isolated Volumes**: `/development/isolated-volumes` - **Getting Started**: `/getting-started` diff --git a/docs/architecture/streaming-pipelines.mdx b/docs/architecture/streaming-pipelines.mdx new file mode 100644 index 00000000..268eb657 --- /dev/null +++ b/docs/architecture/streaming-pipelines.mdx @@ -0,0 +1,56 @@ +--- +title: "Streaming Pipelines" +description: "Real-time delivery of terminal output, logs, and trace events" +--- + +# Streaming Pipelines + +ShipSec Studio handles three distinct types of real-time data, each optimized for its specific use case. + +## 1. Terminal Streaming (Redis Streams) +**Purpose:** Delivers raw PTY (pseudo-terminal) output from Docker containers to the frontend with sub-50ms latency. + +- **Storage:** Redis Streams (`XADD`, `XRANGE`). +- **Key Pattern:** `terminal:{runId}:{nodeRef}:{stream}` (where stream is `stdout` or `stderr`). +- **Transport:** Server-Sent Events (SSE). +- **Frontend:** [xterm.js](https://xtermjs.org/) renders the ANSI escape sequences in real-time. +- **Why Redis?** Extreme low-latency and built-in "cursor" support (stream IDs) allow the frontend to catch up if disconnected. + +## 2. Event Streaming (Postgres LISTEN/NOTIFY + SSE) +**Purpose:** Updates the visual workflow timeline and node statuses (Pending → Running → Completed). + +- **Storage:** PostgreSQL. +- **Trigger:** `LISTEN/NOTIFY` (via Postgres triggers on `trace_events`). +- **Events:** `STARTED`, `PROGRESS`, `COMPLETED`, `FAILED`, `AWAITING_INPUT`. +- **Transport:** Server-Sent Events (SSE). +- **Why Postgres?** Simplifies the stack by using the primary database for both persistence and real-time signaling for essential workflow events. + +## 3. Log Streaming (Kafka + Loki + SSE) +**Purpose:** Providing searchable, structured logs from both the platform components and the tools. + +- **Ingestion:** Kafka (as a reliable buffer). +- **Aggregation:** [Grafana Loki](https://grafana.com/oss/loki/). +- **Transport:** Server-Sent Events (SSE) — the backend SSE loop periodically polls Loki for new lines. +- **Why Loki?** Optimized for multi-tenant log storage with high compression and nanosecond precision. + +--- + +## Technical Flow + +```mermaid +graph TD + subgraph "Worker (Producer)" + C[Component] -->|Raw Output| R[Redis Streams] + C -->|Trace Event| K[Kafka Topic] + C -->|Log Line| L[Kafka Topic] + end + + subgraph "Backend (Consumer/Proxy)" + K -->|Ingest| DB[(PostgreSQL)] + L -->|Ingest| LO[Loki] + DB -->|LISTEN/NOTIFY| S[SSE Controller] + R -->|Poll| S + LO -->|Poll| S + S -->|Unified SSE Stream| F[Frontend] + end +``` diff --git a/docs/architecture/temporal-orchestration.mdx b/docs/architecture/temporal-orchestration.mdx new file mode 100644 index 00000000..cc759905 --- /dev/null +++ b/docs/architecture/temporal-orchestration.mdx @@ -0,0 +1,58 @@ +--- +title: "Temporal Orchestration" +description: "How ShipSec Studio uses Temporal for workflow and activity management" +--- + +# Temporal Orchestration + +ShipSec Studio uses [Temporal.io](https://temporal.io) as its core orchestration engine. Temporal ensures that workflows are durable, retriable, and scalable. + +## Workflows vs. Activities + +- **Workflows**: Contain the orchestration logic. They must be **deterministic** because Temporal may "replay" them to reconstruct state. +- **Activities**: Perform the actual work (e.g., running a Docker container, making an HTTP request). They can be **non-deterministic** and are automatically retried by Temporal on failure. + +## Worker Architecture + +ShipSec uses a specialized worker architecture to handle different types of workloads. + +### Default Worker +- **Task Queue**: `shipsec-default` +- **Responsibilities**: + - Executing workflow logic (orchestration). + - Running lightweight "inline" activities (logic, simple transforms). + +### Specialized Workers (Scaling) +For high-volume or heavy workloads, we can deploy specialized workers: + +| Worker Type | Task Queue | Activities | Resource Profile | +|-------------|------------|------------|------------------| +| **Security** | `security-tools` | Nmap, Subfinder, Nuclei | High CPU, High RAM | +| **I/O Bound** | `file-ops` | MinIO/S3 uploads, ZIP ops | High Network/Disk I/O | +| **Long-Running** | `heavy-scans` | Massive port scans | High Timeout, Reserved Capacity | + +## Execution Flow + +1. **Backend** starts a workflow via the Temporal Client. +2. **Temporal Server** places a "Workflow Task" on the `shipsec-default` queue. +3. **Default Worker** picks up the task and starts the workflow. +4. When the workflow reaches a node, it schedules an **Activity Task**. +5. The **Activity worker** (could be the same worker or a specialized one) executes the activity and returns the result. +6. Temporal persists every step in its **Event History**, allowing it to recover from worker crashes. + +## Routing Logic + +In the workflow code, we route activities to specific queues using `proxyActivities`: + +```typescript +// Inside a workflow +const heavyActivities = proxyActivities({ + taskQueue: 'security-tools', // Routes to specialized security worker + startToCloseTimeout: '1 hour', +}); + +const results = await heavyActivities.runScanner({ target: 'example.com' }); +``` + +## Scaling +ShipSec workers are horizontally scalable. Multiple workers can poll the same task queue, and Temporal will automatically load-balance tasks between them. Workers can be deployed as separate Docker containers or Kubernetes pods. diff --git a/docs/architecture/workflow-compilation.mdx b/docs/architecture/workflow-compilation.mdx new file mode 100644 index 00000000..1570d20e --- /dev/null +++ b/docs/architecture/workflow-compilation.mdx @@ -0,0 +1,47 @@ +--- +title: "Workflow Compilation (DSL)" +description: "How visual graphs are transformed into executable Domain Specific Language" +--- + +# Workflow Compilation + +Before a workflow is executed by Temporal, it must be compiled from a UI-friendly graph (nodes and edges) into an execution-efficient **DSL (Domain Specific Language)**. + +## The Compiler Logic +The compiler lives in `backend/src/dsl/compiler.ts`. + +### 1. Filtering & Validation +- Filters out **UI-only nodes** (e.g., text blocks, documentation). +- Validates that every node references a registered component. +- Ensures the workflow has exactly one **Entry Point**. + +### 2. Topological Sorting +The compiler uses a standard topological sort algorithm to determine the execution order and detect cycles. +- **Cycles**: If node A depends on B and B depends on A, compilation fails. +- **Order**: Nodes with zero dependencies are placed first. + +### 3. Dependency Mapping +For every node, the compiler calculates: +- `dependsOn`: A list of node IDs that must complete before this node can start. +- `dependencyCounts`: A pre-calculated map used by the executor to track readiness. + +### 4. Port & Variable Mapping +This is the most critical phase. The compiler maps **Edges** to **Input Port** mappings: +- Maps `sourceHandle` (e.g., `results`) to `targetHandle` (e.g., `domainList`). +- Ensures that if a port is connected via an edge, manual configuration values are handled according to `valuePriority` (usually ignoring manual values if a connection exists). + +## Join Strategies +When a node has multiple incoming edges, it must decide when to execute: + +| Strategy | Behavior | Use Case | +|----------|----------|----------| +| **all** (default) | Waits for ALL parent nodes to complete successfully. | Standard data aggregation. | +| **any** | Executes as soon as ANY parent node completes. | Race conditions or fallback providers. | +| **first** | Executes for the first parent that finishes, ignores others. | Redundant security scanners. | + +## DSL Schema +The resulting JSON contains enough metadata for the Worker to rebuild the environment: +- **`actions`**: Flat list of execution steps. +- **`params`**: Manual configuration values. +- **`inputMappings`**: Dynamic data flow instructions. +- **`config`**: Global workflow settings (timeouts, environments). From 5d90789aef45760a39ac43e910e0d0f8a8bf56e1 Mon Sep 17 00:00:00 2001 From: betterclever Date: Mon, 5 Jan 2026 00:21:51 +0530 Subject: [PATCH 7/8] docs: overhaul README with professional technical pillars and status Signed-off-by: betterclever --- README.md | 189 +++++++++++++++++------------------------------------- 1 file changed, 59 insertions(+), 130 deletions(-) diff --git a/README.md b/README.md index 2f898a7a..6919b634 100644 --- a/README.md +++ b/README.md @@ -1,181 +1,110 @@
- ShipSec AI + ShipSec AI

- Status - License - Website - Security Automation - Live Observability - Component Catalog + Version + License + Discord

-

- Discord - Discussions - Twitter -

- # ShipSec Studio +**Open-Source Security Workflow Orchestration Platform.** -The no-code security automation studio for security teams. Design reliable and reusable security workflows. +> [!IMPORTANT] +> ShipSec is currently in active development. We are optimizing the platform for stable production use and high-performance security operations. -ShipSec Studio is a security workflow orchestration platform that combines the power of visual programming with enterprise-grade reliability. Unlike traditional automation tools that require complex scripting, ShipSec Studio lets you build security workflows through an intuitive canvas while maintaining the robustness your team needs. +ShipSec Studio provides a visual DSL and runtime for building, executing, and monitoring automated security workflows. It decouples security logic from infrastructure management, providing a durable and isolated environment for running security tooling at scale. -## Demo +## 📺 Demo
ShipSec Studio Demo -

Click to watch the demo (hosted on Hacking Simplified YouTube)

+

Watch the platform in action on YouTube.

-## Why ShipSec Studio? - -🎨 **Visual Workflow Builder** : Design security automations with drag-and-drop, no coding required - -⚡ **Real-Time Execution** : Watch workflows run live with streaming logs and progress indicators - -🧩 **Pre-Built Security Components** : Subfinder, DNSX, HTTPx, Nuclei, and more ready to use - -🔒 **Enterprise Reliability** : Built on Temporal for durable, resumable workflow executions - -🛡️ **Secure by Default** : Encrypted secrets, role-based access, and audit trails - -💻 **Run Anywhere** : Cloud hosted or self-hosted on your own infrastructure - -📅 **Scheduled Workflows** : Schedule your scans to run at specific times or intervals +--- -🔗 **Codify Your Workflows** : Trigger workflows via a simple POST request, through cURL, python etc. +### 🏗️ Core Pillars -## Quick Start +* **Durable, resumable workflows** powered by Temporal.io for stateful execution across failures. +* **Isolated security runtimes** using ephemeral containers with per-run volume management. +* **Unified telemetry streams** delivering terminal output, events, and logs via a low-latency SSE pipeline. +* **Visual no-code builder** that compiles complex security graphs into an executable DSL. -Get started with ShipSec Studio in minutes: +--- -### Option 1: Use the Hosted Platform +## 🚀 Deployment Options -1. **Sign up** at [studio.shipsec.ai](https://studio.shipsec.ai) -2. **Create your first workflow** using the visual builder -3. **Run a scan** with pre-built components like Subfinder, Nuclei, or HTTPx -4. **View results** in real-time as the workflow executes +### 1. ShipSec Cloud (Preview) +The fastest way to test ShipSec Studio without managing infrastructure. +- **Try it out:** [studio.shipsec.ai](https://studio.shipsec.ai) +- **Note:** ShipSec Studio is under active development. The cloud environment is a technical preview for evaluation and sandbox testing. -### Option 2: Self-Host with Docker (Recommended) +### 2. Self-Host (Docker) +For teams requiring data residency and air-gapped security orchestrations. This setup runs the full stack (Frontend, Backend, Worker, and Infrastructure). -The easiest way to run ShipSec Studio on your own infrastructure: - -#### Prerequisites - -- **[docker](https://www.docker.com/)** - For running the application and security components -- **[just](https://github.com/casey/just)** - Command runner for simplified workflows -- **curl** and **jq** - For fetching release information - -#### Quick Start +**Prerequisites:** Docker, `just` command runner. ```bash -# Clone the repository +# Clone and start the latest stable release git clone https://github.com/ShipSecAI/studio.git cd studio - -# Download the latest release and start just prod start-latest - -# Visit http://localhost:8090 to access ShipSec Studio -``` - -This command automatically: -- Fetches the latest release version from GitHub -- Pulls pre-built Docker images from GHCR -- Starts the full stack (frontend, backend, worker, and infrastructure) - -#### Other Commands - -```bash -just prod stop # Stop the environment -just prod logs # View logs -just prod status # Check status -just prod clean # Remove all data ``` +Access the studio at `http://localhost:8090`. -### Option 3: Development Setup - -For contributors who want to modify the source code: - -#### Prerequisites +--- -- **[bun.sh](https://bun.sh)** - Fast JavaScript runtime and package manager -- **[docker](https://www.docker.com/)** - For running security components in isolated containers -- **[just](https://github.com/casey/just)** - Command runner for simplified development workflows +## 🛠️ Capabilities -#### Setup - -```bash -# Clone the repository -git clone https://github.com/ShipSecAI/studio.git -cd studio +### Integrated Tooling +Native support for industry-standard security tools including: +- **Discovery**: `Subfinder`, `DNSX`, `Naabu`, `HTTPx` +- **Vulnerability**: `Nuclei`, `TruffleHog` +- **Utility**: `JSON Transform`, `Logic Scripts`, `HTTP Requests` -# Initialize (installs dependencies and creates environment files) -just init +### Advanced Orchestration +- **Human-in-the-Loop**: Pause workflows for approvals, form inputs, or manual validation before continuing. +- **AI-Driven Analysis**: Leverage LLM nodes and MCP providers for intelligent results interpretation. +- **Native Scheduling**: Integrated CRON support for recurring security posture and compliance monitoring. +- **API First**: Trigger and monitor any workflow execution via a comprehensive REST API. -# Start development environment with hot-reload -just dev +--- -# Visit http://localhost:5173 to access ShipSec Studio -``` +## 🏛️ Architecture Overview -### Your First Workflow +ShipSec Studio is designed for enterprise-grade durability and horizontal scalability. -1. **Open the Workflow Builder** from the dashboard -2. **Add a Manual Trigger node** for manual execution -3. **Add a Subfinder node** for subdomain discovery -4. **Run the workflow** and watch real-time execution +- **Management Plane (Backend)**: NestJS service handling DSL compilation, secret management (AES-256-GCM), and identity. +- **Orchestration Plane (Temporal)**: Manages workflow state, concurrency, and persistent wait states. +- **Execution Plane (Worker)**: Stateless agents that pull tasks from Temporal and execute tool-bound activities in isolated runtimes. +- **Monitoring (SSE/Loki)**: Real-time telemetry pipeline for deterministic execution visibility. -🎉 **Congratulations!** You've just run your first security workflow in ShipSec Studio. +--- -## 🔎 System Architecture +## 🤝 Community & Support -
- System Architecture -
+- 💬 **[Discord](https://discord.gg/fmMA4BtNXC)** — Real-time support and community discussion. +- 🗣️ **[GitHub Discussions](https://github.com/ShipSecAI/studio/discussions)** — Technical RFCs and feature requests. +- 📚 **[Documentation](https://docs.shipsec.ai)** — Full guides on component development and deployment. -## 🔥 Latest Updates +--- -- Dec 11, 2025 - **Execution Canvas Improvements** - Enhanced drag-and-drop experience -- Dec 10, 2025 - **Modernized Documentation** - Updated terminology and cleaner structure -- Dec 9, 2025 - **Backend Version Check** - Automatic compatibility verification on startup -- Dec 8, 2025 - **Workflow Scheduling** - Schedule workflows to run at specific times or intervals +## ✍️ Contributing +We welcome contributions to the management plane, worker logic, or new security components. +See [CONTRIBUTING.md](CONTRIBUTING.md) for architectural guidelines and setup instructions. -## Documentation - -📚 **Complete documentation** is available at **[docs.shipsec.ai](https://docs.shipsec.ai)** - -- Getting Started Guides -- Component Development -- API Reference -- Architecture Overview -- And much more... - -## Community - -Join the ShipSec community to get help, share ideas, and stay updated: - -- 💬 **[Discord](https://discord.gg/fmMA4BtNXC)** — Chat with the team and community -- 🗣️ **[Discussions](https://github.com/ShipSecAI/studio/discussions)** — Ask questions and share ideas -- 🐛 **[Issues](https://github.com/ShipSecAI/studio/issues)** — Report bugs or request features -- 🐦 **[Twitter](https://twitter.com/shipsecai)** — Follow for updates and announcements - -## Contributing - -We're excited that you're interested in ShipSec Studio! Whether you're fixing bugs, adding features, improving docs, or sharing ideas — every contribution helps make security automation more accessible. - -See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines. +--- ## License -ShipSec Studio is licensed under the [Apache License 2.0](LICENSE). +ShipSec Studio is licensed under the **Apache License 2.0**.
-

Built with ❤️ by the ShipSec AI team

+

Engineered for security teams by the ShipSec AI team.

+ From bb1154dfeb107f0cb79c9fd2d5049852f52ecd01 Mon Sep 17 00:00:00 2001 From: betterclever Date: Mon, 5 Jan 2026 00:25:20 +0530 Subject: [PATCH 8/8] docs: add architecture deep-dive link to README Signed-off-by: betterclever --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6919b634..b9a42d36 100644 --- a/README.md +++ b/README.md @@ -11,12 +11,10 @@ # ShipSec Studio **Open-Source Security Workflow Orchestration Platform.** -> [!IMPORTANT] > ShipSec is currently in active development. We are optimizing the platform for stable production use and high-performance security operations. ShipSec Studio provides a visual DSL and runtime for building, executing, and monitoring automated security workflows. It decouples security logic from infrastructure management, providing a durable and isolated environment for running security tooling at scale. -## 📺 Demo