From 96c55cc67b71fe9cb431152280f3801e21bef016 Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Wed, 17 Dec 2025 07:58:19 +0000 Subject: [PATCH 01/15] feat(engine): run debounce system --- .cursor/rules/migrations.mdc | 6 + ai/references/migrations.md | 121 ++ apps/webapp/app/env.server.ts | 6 + .../app/presenters/v3/SpanPresenter.server.ts | 3 + .../route.tsx | 13 + .../runEngine/concerns/traceEvents.server.ts | 68 + .../runEngine/services/triggerTask.server.ts | 45 +- apps/webapp/app/runEngine/types.ts | 17 + apps/webapp/app/v3/runEngine.server.ts | 4 + apps/webapp/package.json | 4 +- apps/webapp/test/engine/triggerTask.test.ts | 27 + docs/triggering.mdx | 61 + .../migration.sql | 5 + .../database/prisma/schema.prisma | 6 + .../run-engine/src/engine/index.ts | 109 +- .../src/engine/systems/debounceSystem.ts | 542 ++++++ .../src/engine/systems/delayedRunSystem.ts | 141 +- .../src/engine/systems/waitpointSystem.ts | 12 + .../src/engine/tests/debounce.test.ts | 1489 +++++++++++++++++ .../run-engine/src/engine/types.ts | 21 +- packages/core/src/v3/schemas/api.ts | 6 + packages/core/src/v3/schemas/runEngine.ts | 1 + packages/core/src/v3/types/tasks.ts | 34 + packages/trigger-sdk/package.json | 2 +- packages/trigger-sdk/src/v3/shared.ts | 2 + .../hello-world/src/trigger/debounce.ts | 572 +++++++ 26 files changed, 3246 insertions(+), 71 deletions(-) create mode 100644 .cursor/rules/migrations.mdc create mode 100644 ai/references/migrations.md create mode 100644 internal-packages/database/prisma/migrations/20251216225303_add_debounce_and_delayed_status/migration.sql create mode 100644 internal-packages/run-engine/src/engine/systems/debounceSystem.ts create mode 100644 internal-packages/run-engine/src/engine/tests/debounce.test.ts create mode 100644 references/hello-world/src/trigger/debounce.ts diff --git a/.cursor/rules/migrations.mdc b/.cursor/rules/migrations.mdc new file mode 100644 index 0000000000..370c87c051 --- /dev/null +++ b/.cursor/rules/migrations.mdc @@ -0,0 +1,6 @@ +--- +description: how to create and apply database migrations +alwaysApply: false +--- + +Follow our [migrations.md](mdc:ai/references/migrations.md) guide for how to create and apply database migrations. diff --git a/ai/references/migrations.md b/ai/references/migrations.md new file mode 100644 index 0000000000..c6fbf79e9d --- /dev/null +++ b/ai/references/migrations.md @@ -0,0 +1,121 @@ +## Creating and applying migrations + +We use prisma migrations to manage the database schema. Please follow the following steps when editing the `internal-packages/database/prisma/schema.prisma` file: + +Edit the `schema.prisma` file to add or modify the schema. + +Create a new migration file but don't apply it yet: + +```bash +cd internal-packages/database +pnpm run db:migrate:dev:create --name "add_new_column_to_table" +``` + +The migration file will be created in the `prisma/migrations` directory, but it will have a bunch of edits to the schema that are not needed and will need to be removed before we can apply the migration. Here's an example of what the migration file might look like: + +```sql +-- AlterEnum +ALTER TYPE "public"."TaskRunExecutionStatus" ADD VALUE 'DELAYED'; + +-- AlterTable +ALTER TABLE "public"."TaskRun" ADD COLUMN "debounce" JSONB; + +-- AlterTable +ALTER TABLE "public"."_BackgroundWorkerToBackgroundWorkerFile" ADD CONSTRAINT "_BackgroundWorkerToBackgroundWorkerFile_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_BackgroundWorkerToBackgroundWorkerFile_AB_unique"; + +-- AlterTable +ALTER TABLE "public"."_BackgroundWorkerToTaskQueue" ADD CONSTRAINT "_BackgroundWorkerToTaskQueue_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_BackgroundWorkerToTaskQueue_AB_unique"; + +-- AlterTable +ALTER TABLE "public"."_TaskRunToTaskRunTag" ADD CONSTRAINT "_TaskRunToTaskRunTag_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_TaskRunToTaskRunTag_AB_unique"; + +-- AlterTable +ALTER TABLE "public"."_WaitpointRunConnections" ADD CONSTRAINT "_WaitpointRunConnections_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_WaitpointRunConnections_AB_unique"; + +-- AlterTable +ALTER TABLE "public"."_completedWaitpoints" ADD CONSTRAINT "_completedWaitpoints_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_completedWaitpoints_AB_unique"; + +-- CreateIndex +CREATE INDEX "SecretStore_key_idx" ON "public"."SecretStore"("key" text_pattern_ops); + +-- CreateIndex +CREATE INDEX "TaskRun_runtimeEnvironmentId_id_idx" ON "public"."TaskRun"("runtimeEnvironmentId", "id" DESC); + +-- CreateIndex +CREATE INDEX "TaskRun_runtimeEnvironmentId_createdAt_idx" ON "public"."TaskRun"("runtimeEnvironmentId", "createdAt" DESC); +``` + +All the following lines should be removed: + +```sql +-- AlterTable +ALTER TABLE "public"."_BackgroundWorkerToBackgroundWorkerFile" ADD CONSTRAINT "_BackgroundWorkerToBackgroundWorkerFile_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_BackgroundWorkerToBackgroundWorkerFile_AB_unique"; + +-- AlterTable +ALTER TABLE "public"."_BackgroundWorkerToTaskQueue" ADD CONSTRAINT "_BackgroundWorkerToTaskQueue_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_BackgroundWorkerToTaskQueue_AB_unique"; + +-- AlterTable +ALTER TABLE "public"."_TaskRunToTaskRunTag" ADD CONSTRAINT "_TaskRunToTaskRunTag_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_TaskRunToTaskRunTag_AB_unique"; + +-- AlterTable +ALTER TABLE "public"."_WaitpointRunConnections" ADD CONSTRAINT "_WaitpointRunConnections_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_WaitpointRunConnections_AB_unique"; + +-- AlterTable +ALTER TABLE "public"."_completedWaitpoints" ADD CONSTRAINT "_completedWaitpoints_AB_pkey" PRIMARY KEY ("A", "B"); + +-- DropIndex +DROP INDEX "public"."_completedWaitpoints_AB_unique"; + +-- CreateIndex +CREATE INDEX "SecretStore_key_idx" ON "public"."SecretStore"("key" text_pattern_ops); + +-- CreateIndex +CREATE INDEX "TaskRun_runtimeEnvironmentId_id_idx" ON "public"."TaskRun"("runtimeEnvironmentId", "id" DESC); + +-- CreateIndex +CREATE INDEX "TaskRun_runtimeEnvironmentId_createdAt_idx" ON "public"."TaskRun"("runtimeEnvironmentId", "createdAt" DESC); +``` + +Leaving only this: + +```sql +-- AlterEnum +ALTER TYPE "public"."TaskRunExecutionStatus" ADD VALUE 'DELAYED'; + +-- AlterTable +ALTER TABLE "public"."TaskRun" ADD COLUMN "debounce" JSONB; +``` + +After editing the migration file, apply the migration: + +```bash +cd internal-packages/database +pnpm run db:migrate:deploy && pnpm run generate +``` diff --git a/apps/webapp/app/env.server.ts b/apps/webapp/app/env.server.ts index c5dcbe0520..1cc0db0bf0 100644 --- a/apps/webapp/app/env.server.ts +++ b/apps/webapp/app/env.server.ts @@ -611,6 +611,12 @@ const EnvironmentSchema = z .default(60_000), RUN_ENGINE_SUSPENDED_HEARTBEAT_RETRIES_FACTOR: z.coerce.number().default(2), + /** Maximum duration in milliseconds that a run can be debounced. Default: 1 hour (3,600,000ms) */ + RUN_ENGINE_MAXIMUM_DEBOUNCE_DURATION_MS: z.coerce + .number() + .int() + .default(60_000 * 60), // 1 hour + RUN_ENGINE_WORKER_REDIS_HOST: z .string() .optional() diff --git a/apps/webapp/app/presenters/v3/SpanPresenter.server.ts b/apps/webapp/app/presenters/v3/SpanPresenter.server.ts index bf43f40525..4c0e3405cf 100644 --- a/apps/webapp/app/presenters/v3/SpanPresenter.server.ts +++ b/apps/webapp/app/presenters/v3/SpanPresenter.server.ts @@ -234,6 +234,7 @@ export class SpanPresenter extends BasePresenter { environmentId: run.runtimeEnvironment.id, idempotencyKey: run.idempotencyKey, idempotencyKeyExpiresAt: run.idempotencyKeyExpiresAt, + debounce: run.debounce as { key: string; delay: string; createdAt: Date } | null, schedule: await this.resolveSchedule(run.scheduleId ?? undefined), queue: { name: run.queue, @@ -357,6 +358,8 @@ export class SpanPresenter extends BasePresenter { //idempotency idempotencyKey: true, idempotencyKeyExpiresAt: true, + //debounce + debounce: true, //delayed delayUntil: true, //ttl diff --git a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.spans.$spanParam/route.tsx b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.spans.$spanParam/route.tsx index c957653fd8..5e5d2ca3f6 100644 --- a/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.spans.$spanParam/route.tsx +++ b/apps/webapp/app/routes/resources.orgs.$organizationSlug.projects.$projectParam.env.$envParam.runs.$runParam.spans.$spanParam/route.tsx @@ -556,6 +556,19 @@ function RunBody({ )} + + Debounce + + {run.debounce ? ( +
+
Key: {run.debounce.key}
+
Delay: {run.debounce.delay}
+
+ ) : ( + "–" + )} +
+
Version diff --git a/apps/webapp/app/runEngine/concerns/traceEvents.server.ts b/apps/webapp/app/runEngine/concerns/traceEvents.server.ts index 634df34e4a..cb2eaa30a5 100644 --- a/apps/webapp/app/runEngine/concerns/traceEvents.server.ts +++ b/apps/webapp/app/runEngine/concerns/traceEvents.server.ts @@ -51,6 +51,7 @@ export class DefaultTraceEventsConcern implements TraceEventConcern { traceparent, setAttribute: (key, value) => event.setAttribute(key as any, value), failWithError: event.failWithError.bind(event), + stop: event.stop.bind(event), }, store ); @@ -116,6 +117,73 @@ export class DefaultTraceEventsConcern implements TraceEventConcern { traceparent, setAttribute: (key, value) => event.setAttribute(key as any, value), failWithError: event.failWithError.bind(event), + stop: event.stop.bind(event), + }, + store + ); + } + ); + } + + async traceDebouncedRun( + request: TriggerTaskRequest, + parentStore: string | undefined, + options: { + existingRun: TaskRun; + debounceKey: string; + incomplete: boolean; + isError: boolean; + }, + callback: (span: TracedEventSpan, store: string) => Promise + ): Promise { + const { existingRun, debounceKey, incomplete, isError } = options; + const { repository, store } = await this.#getEventRepository(request, parentStore); + + return await repository.traceEvent( + `${request.taskId} (debounced)`, + { + context: request.options?.traceContext, + spanParentAsLink: request.options?.spanParentAsLink, + kind: "SERVER", + environment: request.environment, + taskSlug: request.taskId, + attributes: { + properties: { + [SemanticInternalAttributes.ORIGINAL_RUN_ID]: existingRun.friendlyId, + }, + style: { + icon: "task-cached", + }, + runId: existingRun.friendlyId, + }, + incomplete, + isError, + immediate: true, + }, + async (event, traceContext, traceparent) => { + // Log a message about the debounced trigger + await repository.recordEvent( + `Debounced: using existing run with key "${debounceKey}"`, + { + taskSlug: request.taskId, + environment: request.environment, + attributes: { + runId: existingRun.friendlyId, + }, + context: request.options?.traceContext, + parentId: event.spanId, + } + ); + + return await callback( + { + traceId: event.traceId, + spanId: event.spanId, + traceContext, + traceparent, + setAttribute: (key, value) => event.setAttribute(key as any, value), + failWithError: event.failWithError.bind(event), + stop: event.stop.bind(event), }, store ); diff --git a/apps/webapp/app/runEngine/services/triggerTask.server.ts b/apps/webapp/app/runEngine/services/triggerTask.server.ts index f2e9ed5502..9016d4bc25 100644 --- a/apps/webapp/app/runEngine/services/triggerTask.server.ts +++ b/apps/webapp/app/runEngine/services/triggerTask.server.ts @@ -160,10 +160,19 @@ export class RunEngineTriggerTaskService { } } - const [parseDelayError, delayUntil] = await tryCatch(parseDelay(body.options?.delay)); + // Parse delay from either explicit delay option or debounce.delay + const delaySource = body.options?.delay ?? body.options?.debounce?.delay; + const [parseDelayError, delayUntil] = await tryCatch(parseDelay(delaySource)); if (parseDelayError) { - throw new ServiceValidationError(`Invalid delay ${body.options?.delay}`); + throw new ServiceValidationError(`Invalid delay ${delaySource}`); + } + + // Validate debounce options + if (body.options?.debounce && !delayUntil) { + throw new ServiceValidationError( + `Debounce requires a valid delay duration. Provided: ${body.options.debounce.delay}` + ); } const ttl = @@ -340,10 +349,42 @@ export class RunEngineTriggerTaskService { bulkActionId: body.options?.bulkActionId, planType, realtimeStreamsVersion: options.realtimeStreamsVersion, + debounce: body.options?.debounce, + // When debouncing with triggerAndWait, create a span for the debounced trigger + onDebounced: + body.options?.debounce && body.options?.resumeParentOnCompletion + ? async ({ existingRun, waitpoint, debounceKey }) => { + return await this.traceEventConcern.traceDebouncedRun( + triggerRequest, + parentRun?.taskEventStore, + { + existingRun, + debounceKey, + incomplete: waitpoint.status === "PENDING", + isError: waitpoint.outputIsError, + }, + async (spanEvent) => { + const spanId = + options?.parentAsLinkType === "replay" + ? spanEvent.spanId + : spanEvent.traceparent?.spanId + ? `${spanEvent.traceparent.spanId}:${spanEvent.spanId}` + : spanEvent.spanId; + return spanId; + } + ); + } + : undefined, }, this.prisma ); + // If the returned run has a different friendlyId, it was debounced + // Stop the outer span to prevent a duplicate - the debounced span was created via onDebounced + if (taskRun.friendlyId !== runFriendlyId) { + event.stop(); + } + const error = taskRun.error ? TaskRunError.parse(taskRun.error) : undefined; if (error) { diff --git a/apps/webapp/app/runEngine/types.ts b/apps/webapp/app/runEngine/types.ts index 03fa7a322f..7186d81ff9 100644 --- a/apps/webapp/app/runEngine/types.ts +++ b/apps/webapp/app/runEngine/types.ts @@ -131,6 +131,12 @@ export type TracedEventSpan = { }; setAttribute: (key: string, value: string) => void; failWithError: (error: TaskRunError) => void; + /** + * Stop the span without writing any event. + * Used when a debounced run is returned - the span for the debounced + * trigger is created separately via traceDebouncedRun. + */ + stop: () => void; }; export interface TraceEventConcern { @@ -150,6 +156,17 @@ export interface TraceEventConcern { }, callback: (span: TracedEventSpan, store: string) => Promise ): Promise; + traceDebouncedRun( + request: TriggerTaskRequest, + parentStore: string | undefined, + options: { + existingRun: TaskRun; + debounceKey: string; + incomplete: boolean; + isError: boolean; + }, + callback: (span: TracedEventSpan, store: string) => Promise + ): Promise; } export type TriggerRacepoints = "idempotencyKey"; diff --git a/apps/webapp/app/v3/runEngine.server.ts b/apps/webapp/app/v3/runEngine.server.ts index db1760755c..5f88d5f6a4 100644 --- a/apps/webapp/app/v3/runEngine.server.ts +++ b/apps/webapp/app/v3/runEngine.server.ts @@ -182,6 +182,10 @@ function createRunEngine() { ? createBatchGlobalRateLimiter(env.BATCH_QUEUE_GLOBAL_RATE_LIMIT) : undefined, }, + // Debounce configuration + debounce: { + maxDebounceDurationMs: env.RUN_ENGINE_MAXIMUM_DEBOUNCE_DURATION_MS, + }, }); return engine; diff --git a/apps/webapp/package.json b/apps/webapp/package.json index 9d732eb17f..45de003c8d 100644 --- a/apps/webapp/package.json +++ b/apps/webapp/package.json @@ -14,7 +14,7 @@ "lint": "eslint --cache --cache-location ./node_modules/.cache/eslint .", "start": "cross-env NODE_ENV=production node --max-old-space-size=8192 ./build/server.js", "start:local": "cross-env node --max-old-space-size=8192 ./build/server.js", - "typecheck": "tsc --noEmit -p ./tsconfig.check.json", + "typecheck": "cross-env NODE_OPTIONS=\"--max-old-space-size=8192\" tsc --noEmit -p ./tsconfig.check.json", "db:seed": "tsx seed.mts", "upload:sourcemaps": "bash ./upload-sourcemaps.sh", "test": "vitest --no-file-parallelism", @@ -288,4 +288,4 @@ "engines": { "node": ">=18.19.0 || >=20.6.0" } -} +} \ No newline at end of file diff --git a/apps/webapp/test/engine/triggerTask.test.ts b/apps/webapp/test/engine/triggerTask.test.ts index 463e6e71df..91fa035806 100644 --- a/apps/webapp/test/engine/triggerTask.test.ts +++ b/apps/webapp/test/engine/triggerTask.test.ts @@ -80,6 +80,7 @@ class MockTraceEventConcern implements TraceEventConcern { traceparent: undefined, setAttribute: () => {}, failWithError: () => {}, + stop: () => {}, }, "test" ); @@ -104,6 +105,32 @@ class MockTraceEventConcern implements TraceEventConcern { traceparent: undefined, setAttribute: () => {}, failWithError: () => {}, + stop: () => {}, + }, + "test" + ); + } + + async traceDebouncedRun( + request: TriggerTaskRequest, + parentStore: string | undefined, + options: { + existingRun: TaskRun; + debounceKey: string; + incomplete: boolean; + isError: boolean; + }, + callback: (span: TracedEventSpan, store: string) => Promise + ): Promise { + return await callback( + { + traceId: "test", + spanId: "test", + traceContext: {}, + traceparent: undefined, + setAttribute: () => {}, + failWithError: () => {}, + stop: () => {}, }, "test" ); diff --git a/docs/triggering.mdx b/docs/triggering.mdx index 599fe67e99..741da53f15 100644 --- a/docs/triggering.mdx +++ b/docs/triggering.mdx @@ -831,6 +831,67 @@ export const myTask = task({ For more information, see our [Idempotency](/idempotency) documentation. +### `debounce` + +You can debounce task triggers to consolidate multiple trigger calls into a single delayed run. When a run with the same debounce key already exists in the delayed state, subsequent triggers "push" the existing run's execution time later rather than creating new runs. + +This is useful for scenarios like: + +- Real-time document indexing where you want to wait for the user to finish typing +- Aggregating webhook events from the same source +- Rate limiting expensive operations while still processing the final request + +```ts +// First trigger creates a new run, delayed by 5 seconds +await myTask.trigger({ some: "data" }, { debounce: { key: "user-123", delay: "5s" } }); + +// If triggered again within 5 seconds, the existing run is pushed later +await myTask.trigger({ updated: "data" }, { debounce: { key: "user-123", delay: "5s" } }); + +// The run only executes after 5 seconds of no new triggers +// Note: The first payload is used (first trigger wins) +``` + + + Debounce keys are scoped to the task identifier, so different tasks can use the same key without + conflicts. + + +The `debounce` option accepts: + +- `key` - A unique string to identify the debounce group (scoped to the task) +- `delay` - Duration string specifying how long to delay (e.g., "5s", "1m", "30s") + +**How it works:** + +1. First trigger with a debounce key creates a new delayed run +2. Subsequent triggers with the same key (while the run is still delayed) push the execution time further +3. Once no new triggers occur within the delay duration, the run executes with the **first** payload +4. After the run starts executing, a new trigger with the same key will create a new run + +**With `triggerAndWait`:** + +When using `triggerAndWait` with debounce, the parent run blocks on the existing debounced run if one exists: + +```ts +export const parentTask = task({ + id: "parent-task", + run: async (payload: string) => { + // Both will wait for the same run + const result = await childTask.triggerAndWait( + { data: payload }, + { debounce: { key: "shared-key", delay: "3s" } } + ); + return result; + }, +}); +``` + + + Idempotency keys take precedence over debounce keys. If both are provided and an idempotency match + is found, it wins. + + ### `queue` When you trigger a task you can override the concurrency limit. This is really useful if you sometimes have high priority runs. diff --git a/internal-packages/database/prisma/migrations/20251216225303_add_debounce_and_delayed_status/migration.sql b/internal-packages/database/prisma/migrations/20251216225303_add_debounce_and_delayed_status/migration.sql new file mode 100644 index 0000000000..407c3c856d --- /dev/null +++ b/internal-packages/database/prisma/migrations/20251216225303_add_debounce_and_delayed_status/migration.sql @@ -0,0 +1,5 @@ +-- AlterEnum +ALTER TYPE "public"."TaskRunExecutionStatus" ADD VALUE 'DELAYED'; + +-- AlterTable +ALTER TABLE "public"."TaskRun" ADD COLUMN "debounce" JSONB; \ No newline at end of file diff --git a/internal-packages/database/prisma/schema.prisma b/internal-packages/database/prisma/schema.prisma index 5207ada480..28c8332966 100644 --- a/internal-packages/database/prisma/schema.prisma +++ b/internal-packages/database/prisma/schema.prisma @@ -581,6 +581,10 @@ model TaskRun { idempotencyKey String? idempotencyKeyExpiresAt DateTime? + + /// Debounce options: { key: string, delay: string, createdAt: Date } + debounce Json? + taskIdentifier String isTest Boolean @default(false) @@ -947,6 +951,8 @@ model TaskRunExecutionSnapshot { enum TaskRunExecutionStatus { /// Run has been created RUN_CREATED + /// Run is delayed, waiting to be enqueued + DELAYED /// Run is in the RunQueue QUEUED /// Run is in the RunQueue, and is also executing. This happens when a run is continued cannot reacquire concurrency diff --git a/internal-packages/run-engine/src/engine/index.ts b/internal-packages/run-engine/src/engine/index.ts index 9bd495f327..69851cc047 100644 --- a/internal-packages/run-engine/src/engine/index.ts +++ b/internal-packages/run-engine/src/engine/index.ts @@ -47,6 +47,7 @@ import { RunLocker } from "./locking.js"; import { getFinalRunStatuses } from "./statuses.js"; import { BatchSystem } from "./systems/batchSystem.js"; import { CheckpointSystem } from "./systems/checkpointSystem.js"; +import { DebounceSystem } from "./systems/debounceSystem.js"; import { DelayedRunSystem } from "./systems/delayedRunSystem.js"; import { DequeueSystem } from "./systems/dequeueSystem.js"; import { EnqueueSystem } from "./systems/enqueueSystem.js"; @@ -95,6 +96,7 @@ export class RunEngine { enqueueSystem: EnqueueSystem; checkpointSystem: CheckpointSystem; delayedRunSystem: DelayedRunSystem; + debounceSystem: DebounceSystem; ttlSystem: TtlSystem; pendingVersionSystem: PendingVersionSystem; raceSimulationSystem: RaceSimulationSystem = new RaceSimulationSystem(); @@ -297,6 +299,14 @@ export class RunEngine { enqueueSystem: this.enqueueSystem, }); + this.debounceSystem = new DebounceSystem({ + resources, + redis: options.debounce?.redis ?? options.runLock.redis, + executionSnapshotSystem: this.executionSnapshotSystem, + delayedRunSystem: this.delayedRunSystem, + maxDebounceDurationMs: options.debounce?.maxDebounceDurationMs ?? 60 * 60 * 1000, // Default 1 hour + }); + this.pendingVersionSystem = new PendingVersionSystem({ resources, enqueueSystem: this.enqueueSystem, @@ -428,6 +438,8 @@ export class RunEngine { bulkActionId, planType, realtimeStreamsVersion, + debounce, + onDebounced, }: TriggerParams, tx?: PrismaClientOrTransaction ): Promise { @@ -437,6 +449,62 @@ export class RunEngine { this.tracer, "trigger", async (span) => { + // Handle debounce before creating a new run + // Store claimId if we successfully claimed the debounce key + let debounceClaimId: string | undefined; + + if (debounce) { + const debounceResult = await this.debounceSystem.handleDebounce({ + environmentId: environment.id, + taskIdentifier, + debounce, + tx: prisma, + }); + + if (debounceResult.status === "existing") { + span.setAttribute("debounced", true); + span.setAttribute("existingRunId", debounceResult.run.id); + + // For triggerAndWait, block the parent run with the existing run's waitpoint + if (resumeParentOnCompletion && parentTaskRunId && debounceResult.waitpoint) { + // Call the onDebounced callback to create a span and get spanIdToComplete + let spanIdToComplete: string | undefined; + if (onDebounced) { + spanIdToComplete = await onDebounced({ + existingRun: debounceResult.run, + waitpoint: debounceResult.waitpoint, + debounceKey: debounce.key, + }); + } + + await this.waitpointSystem.blockRunWithWaitpoint({ + runId: parentTaskRunId, + waitpoints: debounceResult.waitpoint.id, + spanIdToComplete, + projectId: environment.project.id, + organizationId: environment.organization.id, + batch, + workerId, + runnerId, + tx: prisma, + }); + } + + return debounceResult.run; + } + + // If max_duration_exceeded, we continue to create a new run without debouncing + if (debounceResult.status === "max_duration_exceeded") { + span.setAttribute("debounceMaxDurationExceeded", true); + } + + // Store the claimId for later registration + if (debounceResult.status === "new" && debounceResult.claimId) { + debounceClaimId = debounceResult.claimId; + span.setAttribute("debounceClaimId", debounceClaimId); + } + } + const status = delayUntil ? "DELAYED" : "PENDING"; //create run @@ -508,11 +576,18 @@ export class RunEngine { bulkActionGroupIds: bulkActionId ? [bulkActionId] : undefined, planType, realtimeStreamsVersion, + debounce: debounce + ? { + key: debounce.key, + delay: debounce.delay, + createdAt: new Date(), + } + : undefined, executionSnapshots: { create: { engine: "V2", - executionStatus: "RUN_CREATED", - description: "Run was created", + executionStatus: delayUntil ? "DELAYED" : "RUN_CREATED", + description: delayUntil ? "Run is delayed" : "Run was created", runStatus: status, environmentId: environment.id, environmentType: environment.type, @@ -582,6 +657,27 @@ export class RunEngine { runId: taskRun.id, delayUntil: taskRun.delayUntil, }); + + // Register debounced run in Redis for future lookups + if (debounce) { + const registered = await this.debounceSystem.registerDebouncedRun({ + runId: taskRun.id, + environmentId: environment.id, + taskIdentifier, + debounceKey: debounce.key, + delayUntil: taskRun.delayUntil, + claimId: debounceClaimId, + }); + + if (!registered) { + // We lost the claim - this shouldn't normally happen, but log it + this.logger.warn("trigger: lost debounce claim after creating run", { + runId: taskRun.id, + debounceKey: debounce.key, + claimId: debounceClaimId, + }); + } + } } else { if (taskRun.ttl) { await this.ttlSystem.scheduleExpireRun({ runId: taskRun.id, ttl: taskRun.ttl }); @@ -1307,6 +1403,9 @@ export class RunEngine { // Close the batch queue and its Redis connections await this.batchQueue.close(); + + // Close the debounce system Redis connection + await this.debounceSystem.quit(); } catch (error) { // And should always throw } @@ -1780,6 +1879,9 @@ export class RunEngine { case "FINISHED": { throw new NotImplementedError("There shouldn't be a heartbeat for FINISHED"); } + case "DELAYED": { + throw new NotImplementedError("There shouldn't be a heartbeat for DELAYED"); + } default: { assertNever(latestSnapshot.executionStatus); } @@ -1820,7 +1922,8 @@ export class RunEngine { case "PENDING_CANCEL": case "PENDING_EXECUTING": case "QUEUED_EXECUTING": - case "RUN_CREATED": { + case "RUN_CREATED": + case "DELAYED": { // Do nothing; return; } diff --git a/internal-packages/run-engine/src/engine/systems/debounceSystem.ts b/internal-packages/run-engine/src/engine/systems/debounceSystem.ts new file mode 100644 index 0000000000..73a1a6f192 --- /dev/null +++ b/internal-packages/run-engine/src/engine/systems/debounceSystem.ts @@ -0,0 +1,542 @@ +import { createRedisClient, Redis, RedisOptions } from "@internal/redis"; +import { startSpan } from "@internal/tracing"; +import { parseNaturalLanguageDuration } from "@trigger.dev/core/v3/isomorphic"; +import { PrismaClientOrTransaction, TaskRun, Waitpoint } from "@trigger.dev/database"; +import { nanoid } from "nanoid"; +import { SystemResources } from "./systems.js"; +import { ExecutionSnapshotSystem, getLatestExecutionSnapshot } from "./executionSnapshotSystem.js"; +import { DelayedRunSystem } from "./delayedRunSystem.js"; + +export type DebounceOptions = { + key: string; + delay: string; +}; + +export type DebounceSystemOptions = { + resources: SystemResources; + redis: RedisOptions; + executionSnapshotSystem: ExecutionSnapshotSystem; + delayedRunSystem: DelayedRunSystem; + maxDebounceDurationMs: number; +}; + +export type DebounceResult = + | { + status: "new"; + claimId?: string; // Present when we claimed the key atomically + } + | { + status: "existing"; + run: TaskRun; + waitpoint: Waitpoint | null; + } + | { + status: "max_duration_exceeded"; + }; + +// TTL for the pending claim state (30 seconds) +const CLAIM_TTL_MS = 30_000; +// Max retries when waiting for another server to complete its claim +const MAX_CLAIM_RETRIES = 10; +// Delay between retries when waiting for pending claim +const CLAIM_RETRY_DELAY_MS = 50; + +export type DebounceData = { + key: string; + delay: string; + createdAt: Date; +}; + +/** + * DebounceSystem handles debouncing of task triggers. + * + * When a run is triggered with a debounce key, if an existing run with the same key + * is still in the DELAYED execution status, the new trigger "pushes" the existing + * run's execution time later rather than creating a new run. + * + * The debounce key mapping is stored in Redis for fast lookups (to avoid database indexes). + */ +export class DebounceSystem { + private readonly $: SystemResources; + private readonly redis: Redis; + private readonly executionSnapshotSystem: ExecutionSnapshotSystem; + private readonly delayedRunSystem: DelayedRunSystem; + private readonly maxDebounceDurationMs: number; + + constructor(options: DebounceSystemOptions) { + this.$ = options.resources; + this.redis = createRedisClient( + { + ...options.redis, + keyPrefix: `${options.redis.keyPrefix ?? ""}debounce:`, + }, + { + onError: (error) => { + this.$.logger.error("DebounceSystem redis client error:", { error }); + }, + } + ); + this.executionSnapshotSystem = options.executionSnapshotSystem; + this.delayedRunSystem = options.delayedRunSystem; + this.maxDebounceDurationMs = options.maxDebounceDurationMs; + } + + /** + * Gets the Redis key for a debounce lookup. + * Key pattern: {envId}:{taskId}:{debounceKey} + */ + private getDebounceRedisKey(envId: string, taskId: string, debounceKey: string): string { + return `${envId}:${taskId}:${debounceKey}`; + } + + /** + * Atomically claims a debounce key using SET NX. + * This prevents the race condition where two servers both check for an existing + * run, find none, and both create new runs. + * + * Returns: + * - { claimed: true } if we successfully claimed the key + * - { claimed: false, existingRunId: string } if key exists with a run ID + * - { claimed: false, existingRunId: null } if key exists but is pending (another server is creating) + */ + private async claimDebounceKey({ + environmentId, + taskIdentifier, + debounceKey, + claimId, + ttlMs, + }: { + environmentId: string; + taskIdentifier: string; + debounceKey: string; + claimId: string; + ttlMs: number; + }): Promise<{ claimed: true } | { claimed: false; existingRunId: string | null }> { + const redisKey = this.getDebounceRedisKey(environmentId, taskIdentifier, debounceKey); + + // Try to claim with SET NX (only succeeds if key doesn't exist) + const result = await this.redis.set(redisKey, `pending:${claimId}`, "PX", ttlMs, "NX"); + + if (result === "OK") { + this.$.logger.debug("claimDebounceKey: claimed key", { + redisKey, + claimId, + debounceKey, + }); + return { claimed: true }; + } + + // Claim failed - get existing value + const existingValue = await this.redis.get(redisKey); + + if (!existingValue) { + // Key expired between SET and GET - rare race, return null to trigger retry + this.$.logger.debug("claimDebounceKey: key expired between SET and GET", { + redisKey, + debounceKey, + }); + return { claimed: false, existingRunId: null }; + } + + if (existingValue.startsWith("pending:")) { + // Another server is creating the run - return null to trigger wait/retry + this.$.logger.debug("claimDebounceKey: key is pending (another server is creating)", { + redisKey, + debounceKey, + existingValue, + }); + return { claimed: false, existingRunId: null }; + } + + // It's a run ID + this.$.logger.debug("claimDebounceKey: found existing run", { + redisKey, + debounceKey, + existingRunId: existingValue, + }); + return { claimed: false, existingRunId: existingValue }; + } + + /** + * Waits for another server to complete its claim and register a run ID. + * Used when we detect a "pending" state, meaning another server has claimed + * the key but hasn't yet created the run. + */ + private async waitForExistingRun({ + environmentId, + taskIdentifier, + debounce, + tx, + }: { + environmentId: string; + taskIdentifier: string; + debounce: DebounceOptions; + tx?: PrismaClientOrTransaction; + }): Promise { + const redisKey = this.getDebounceRedisKey(environmentId, taskIdentifier, debounce.key); + + for (let i = 0; i < MAX_CLAIM_RETRIES; i++) { + await new Promise((resolve) => setTimeout(resolve, CLAIM_RETRY_DELAY_MS)); + + const value = await this.redis.get(redisKey); + + if (!value) { + // Key expired or was deleted - return "new" to create fresh + this.$.logger.debug("waitForExistingRun: key expired/deleted, returning new", { + redisKey, + debounceKey: debounce.key, + attempt: i + 1, + }); + return { status: "new" }; + } + + if (!value.startsWith("pending:")) { + // It's a run ID now - proceed with reschedule logic + this.$.logger.debug("waitForExistingRun: found run ID, handling existing run", { + redisKey, + debounceKey: debounce.key, + existingRunId: value, + attempt: i + 1, + }); + return await this.handleExistingRun({ + existingRunId: value, + redisKey, + debounce, + tx, + }); + } + + this.$.logger.debug("waitForExistingRun: still pending, retrying", { + redisKey, + debounceKey: debounce.key, + attempt: i + 1, + value, + }); + } + + // Timed out waiting - the other server may have failed + // Delete the stale pending key and return "new" + this.$.logger.warn("waitForExistingRun: timed out waiting for pending claim, deleting stale key", { + redisKey, + debounceKey: debounce.key, + }); + await this.redis.del(redisKey); + return { status: "new" }; + } + + /** + * Handles an existing debounced run by locking it and rescheduling. + * Extracted to be reusable by both handleDebounce and waitForExistingRun. + */ + private async handleExistingRun({ + existingRunId, + redisKey, + debounce, + tx, + }: { + existingRunId: string; + redisKey: string; + debounce: DebounceOptions; + tx?: PrismaClientOrTransaction; + }): Promise { + return await this.$.runLock.lock("handleDebounce", [existingRunId], async () => { + const prisma = tx ?? this.$.prisma; + + // Get the latest execution snapshot + let snapshot; + try { + snapshot = await getLatestExecutionSnapshot(prisma, existingRunId); + } catch (error) { + // Run no longer exists or has no snapshot + this.$.logger.debug("handleExistingRun: existing run not found or has no snapshot", { + existingRunId, + debounceKey: debounce.key, + error, + }); + // Clean up stale Redis key + await this.redis.del(redisKey); + return { status: "new" }; + } + + // Check if run is still in DELAYED status (or legacy RUN_CREATED for older runs) + if (snapshot.executionStatus !== "DELAYED" && snapshot.executionStatus !== "RUN_CREATED") { + this.$.logger.debug("handleExistingRun: existing run is no longer delayed", { + existingRunId, + executionStatus: snapshot.executionStatus, + debounceKey: debounce.key, + }); + // Clean up Redis key since run is no longer debounceable + await this.redis.del(redisKey); + return { status: "new" }; + } + + // Get the run to check debounce metadata and createdAt + const existingRun = await prisma.taskRun.findFirst({ + where: { id: existingRunId }, + include: { + associatedWaitpoint: true, + }, + }); + + if (!existingRun) { + this.$.logger.debug("handleExistingRun: existing run not found in database", { + existingRunId, + debounceKey: debounce.key, + }); + await this.redis.del(redisKey); + return { status: "new" }; + } + + // Calculate new delay + const delayMs = parseNaturalLanguageDuration(debounce.delay); + if (!delayMs) { + this.$.logger.error("handleExistingRun: invalid delay duration", { + delay: debounce.delay, + }); + return { status: "new" }; + } + + const newDelayUntil = new Date(Date.now() + delayMs.getTime() - Date.now()); + + // Check if max debounce duration would be exceeded + const runCreatedAt = existingRun.createdAt; + const maxDelayUntil = new Date(runCreatedAt.getTime() + this.maxDebounceDurationMs); + + if (newDelayUntil > maxDelayUntil) { + this.$.logger.debug("handleExistingRun: max debounce duration would be exceeded", { + existingRunId, + debounceKey: debounce.key, + runCreatedAt, + newDelayUntil, + maxDelayUntil, + maxDebounceDurationMs: this.maxDebounceDurationMs, + }); + // Clean up Redis key since this debounce window is closed + await this.redis.del(redisKey); + return { status: "max_duration_exceeded" }; + } + + // Reschedule the delayed run + await this.delayedRunSystem.rescheduleDelayedRun({ + runId: existingRunId, + delayUntil: newDelayUntil, + tx: prisma, + }); + + // Update Redis TTL + const ttlMs = Math.max( + newDelayUntil.getTime() - Date.now() + 60_000, // Add 1 minute buffer + 60_000 + ); + await this.redis.pexpire(redisKey, ttlMs); + + this.$.logger.debug("handleExistingRun: rescheduled existing debounced run", { + existingRunId, + debounceKey: debounce.key, + newDelayUntil, + }); + + return { + status: "existing", + run: existingRun, + waitpoint: existingRun.associatedWaitpoint, + }; + }); + } + + /** + * Called during trigger to check for an existing debounced run. + * If found and still in DELAYED status, reschedules it and returns the existing run. + * + * Uses atomic SET NX to prevent the distributed race condition where two servers + * both check for an existing run, find none, and both create new runs. + * + * Note: This method does NOT handle blocking parent runs for triggerAndWait. + * The caller (RunEngine.trigger) is responsible for blocking using waitpointSystem.blockRunWithWaitpoint(). + */ + async handleDebounce({ + environmentId, + taskIdentifier, + debounce, + tx, + }: { + environmentId: string; + taskIdentifier: string; + debounce: DebounceOptions; + tx?: PrismaClientOrTransaction; + }): Promise { + return startSpan( + this.$.tracer, + "handleDebounce", + async (span) => { + span.setAttribute("debounceKey", debounce.key); + span.setAttribute("taskIdentifier", taskIdentifier); + span.setAttribute("environmentId", environmentId); + + const redisKey = this.getDebounceRedisKey(environmentId, taskIdentifier, debounce.key); + const claimId = nanoid(16); // Unique ID for this claim attempt + + // Try to atomically claim the debounce key + const claimResult = await this.claimDebounceKey({ + environmentId, + taskIdentifier, + debounceKey: debounce.key, + claimId, + ttlMs: CLAIM_TTL_MS, + }); + + if (claimResult.claimed) { + // We successfully claimed the key - return "new" to create the run + // Caller will call registerDebouncedRun after creating the run + this.$.logger.debug("handleDebounce: claimed key, returning new", { + debounceKey: debounce.key, + taskIdentifier, + environmentId, + claimId, + }); + span.setAttribute("claimed", true); + span.setAttribute("claimId", claimId); + return { status: "new", claimId }; + } + + if (!claimResult.existingRunId) { + // Another server is creating - wait and retry to get the run ID + this.$.logger.debug("handleDebounce: key is pending, waiting for existing run", { + debounceKey: debounce.key, + taskIdentifier, + environmentId, + }); + span.setAttribute("waitingForPending", true); + return await this.waitForExistingRun({ + environmentId, + taskIdentifier, + debounce, + tx, + }); + } + + // Found existing run - lock and reschedule + span.setAttribute("existingRunId", claimResult.existingRunId); + return await this.handleExistingRun({ + existingRunId: claimResult.existingRunId, + redisKey, + debounce, + tx, + }); + }, + { + attributes: { + environmentId, + taskIdentifier, + debounceKey: debounce.key, + }, + } + ); + } + + /** + * Stores the debounce key -> runId mapping after creating a new debounced run. + * + * If claimId is provided, verifies we still own the pending claim before registering. + * This prevents a race where our claim expired and another server took over. + * + * @returns true if registration succeeded, false if we lost the claim + */ + async registerDebouncedRun({ + runId, + environmentId, + taskIdentifier, + debounceKey, + delayUntil, + claimId, + }: { + runId: string; + environmentId: string; + taskIdentifier: string; + debounceKey: string; + delayUntil: Date; + claimId?: string; + }): Promise { + return startSpan( + this.$.tracer, + "registerDebouncedRun", + async (span) => { + const redisKey = this.getDebounceRedisKey(environmentId, taskIdentifier, debounceKey); + + if (claimId) { + // Verify we still own the pending claim before overwriting + const currentValue = await this.redis.get(redisKey); + if (currentValue !== `pending:${claimId}`) { + // We lost the claim - another server took over or it expired + this.$.logger.warn("registerDebouncedRun: lost claim, not registering", { + runId, + environmentId, + taskIdentifier, + debounceKey, + claimId, + currentValue, + }); + span.setAttribute("claimLost", true); + return false; + } + } + + // Calculate TTL: delay until + buffer + const ttlMs = Math.max( + delayUntil.getTime() - Date.now() + 60_000, // Add 1 minute buffer + 60_000 + ); + + await this.redis.set(redisKey, runId, "PX", ttlMs); + + this.$.logger.debug("registerDebouncedRun: stored debounce key mapping", { + runId, + environmentId, + taskIdentifier, + debounceKey, + delayUntil, + ttlMs, + claimId, + }); + + span.setAttribute("registered", true); + return true; + }, + { + attributes: { + runId, + environmentId, + taskIdentifier, + debounceKey, + claimId: claimId ?? "none", + }, + } + ); + } + + /** + * Clears the debounce key when a run is enqueued or completed. + */ + async clearDebounceKey({ + environmentId, + taskIdentifier, + debounceKey, + }: { + environmentId: string; + taskIdentifier: string; + debounceKey: string; + }): Promise { + const redisKey = this.getDebounceRedisKey(environmentId, taskIdentifier, debounceKey); + await this.redis.del(redisKey); + + this.$.logger.debug("clearDebounceKey: cleared debounce key mapping", { + environmentId, + taskIdentifier, + debounceKey, + }); + } + + async quit(): Promise { + await this.redis.quit(); + } +} diff --git a/internal-packages/run-engine/src/engine/systems/delayedRunSystem.ts b/internal-packages/run-engine/src/engine/systems/delayedRunSystem.ts index e3dca4b544..1a567a0b40 100644 --- a/internal-packages/run-engine/src/engine/systems/delayedRunSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/delayedRunSystem.ts @@ -40,8 +40,8 @@ export class DelayedRunSystem { return await this.$.runLock.lock("rescheduleDelayedRun", [runId], async () => { const snapshot = await getLatestExecutionSnapshot(prisma, runId); - //if the run isn't just created then we can't reschedule it - if (snapshot.executionStatus !== "RUN_CREATED") { + // Check if the run is still in DELAYED status (or legacy RUN_CREATED for older runs) + if (snapshot.executionStatus !== "DELAYED" && snapshot.executionStatus !== "RUN_CREATED") { throw new ServiceValidationError("Cannot reschedule a run that is not delayed"); } @@ -54,9 +54,9 @@ export class DelayedRunSystem { executionSnapshots: { create: { engine: "V2", - executionStatus: "RUN_CREATED", + executionStatus: "DELAYED", description: "Delayed run was rescheduled to a future date", - runStatus: "EXPIRED", + runStatus: "DELAYED", environmentId: snapshot.environmentId, environmentType: snapshot.environmentType, projectId: snapshot.projectId, @@ -98,71 +98,88 @@ export class DelayedRunSystem { } async enqueueDelayedRun({ runId }: { runId: string }) { - const run = await this.$.prisma.taskRun.findFirst({ - where: { id: runId }, - include: { - runtimeEnvironment: { - include: { - project: true, - organization: true, + // Use lock to prevent race with debounce rescheduling + return await this.$.runLock.lock("enqueueDelayedRun", [runId], async () => { + // Check if run is still in DELAYED status before enqueuing + // This prevents a race where debounce reschedules the run while we're about to enqueue it + const snapshot = await getLatestExecutionSnapshot(this.$.prisma, runId); + + if (snapshot.executionStatus !== "DELAYED" && snapshot.executionStatus !== "RUN_CREATED") { + this.$.logger.debug("enqueueDelayedRun: run is no longer delayed, skipping enqueue", { + runId, + executionStatus: snapshot.executionStatus, + }); + return; + } + + const run = await this.$.prisma.taskRun.findFirst({ + where: { id: runId }, + include: { + runtimeEnvironment: { + include: { + project: true, + organization: true, + }, }, }, - }, - }); - - if (!run) { - throw new Error(`#enqueueDelayedRun: run not found: ${runId}`); - } + }); - // Now we need to enqueue the run into the RunQueue - await this.enqueueSystem.enqueueRun({ - run, - env: run.runtimeEnvironment, - batchId: run.batchId ?? undefined, - }); - - const queuedAt = new Date(); - - const updatedRun = await this.$.prisma.taskRun.update({ - where: { id: runId }, - data: { - status: "PENDING", - queuedAt, - }, - }); + if (!run) { + throw new Error(`#enqueueDelayedRun: run not found: ${runId}`); + } - this.$.eventBus.emit("runEnqueuedAfterDelay", { - time: new Date(), - run: { - id: runId, - status: "PENDING", - queuedAt, - updatedAt: updatedRun.updatedAt, - createdAt: updatedRun.createdAt, - }, - organization: { - id: run.runtimeEnvironment.organizationId, - }, - project: { - id: run.runtimeEnvironment.projectId, - }, - environment: { - id: run.runtimeEnvironmentId, - }, - }); + // Now we need to enqueue the run into the RunQueue + // Skip the lock in enqueueRun since we already hold it + await this.enqueueSystem.enqueueRun({ + run, + env: run.runtimeEnvironment, + batchId: run.batchId ?? undefined, + skipRunLock: true, + }); + + const queuedAt = new Date(); + + const updatedRun = await this.$.prisma.taskRun.update({ + where: { id: runId }, + data: { + status: "PENDING", + queuedAt, + }, + }); + + this.$.eventBus.emit("runEnqueuedAfterDelay", { + time: new Date(), + run: { + id: runId, + status: "PENDING", + queuedAt, + updatedAt: updatedRun.updatedAt, + createdAt: updatedRun.createdAt, + }, + organization: { + id: run.runtimeEnvironment.organizationId, + }, + project: { + id: run.runtimeEnvironment.projectId, + }, + environment: { + id: run.runtimeEnvironmentId, + }, + }); - if (run.ttl) { - const expireAt = parseNaturalLanguageDuration(run.ttl); + if (run.ttl) { + const expireAt = parseNaturalLanguageDuration(run.ttl); - if (expireAt) { - await this.$.worker.enqueue({ - id: `expireRun:${runId}`, - job: "expireRun", - payload: { runId }, - availableAt: expireAt, - }); + if (expireAt) { + await this.$.worker.enqueue({ + id: `expireRun:${runId}`, + job: "expireRun", + payload: { runId }, + availableAt: expireAt, + }); + } } - } + }); } async scheduleDelayedRunEnqueuing({ runId, delayUntil }: { runId: string; delayUntil: Date }) { diff --git a/internal-packages/run-engine/src/engine/systems/waitpointSystem.ts b/internal-packages/run-engine/src/engine/systems/waitpointSystem.ts index 181a6fe277..40a92abb55 100644 --- a/internal-packages/run-engine/src/engine/systems/waitpointSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/waitpointSystem.ts @@ -576,6 +576,18 @@ export class WaitpointSystem { reason: "run is already executing", }; } + case "DELAYED": { + this.$.logger.debug(`continueRunIfUnblocked: run is delayed, skipping`, { + runId, + snapshot, + executionStatus: snapshot.executionStatus, + }); + + return { + status: "skipped", + reason: "run is delayed", + }; + } case "QUEUED": { this.$.logger.info(`continueRunIfUnblocked: run is queued, skipping`, { runId, diff --git a/internal-packages/run-engine/src/engine/tests/debounce.test.ts b/internal-packages/run-engine/src/engine/tests/debounce.test.ts new file mode 100644 index 0000000000..791dda2d59 --- /dev/null +++ b/internal-packages/run-engine/src/engine/tests/debounce.test.ts @@ -0,0 +1,1489 @@ +import { containerTest, assertNonNullable } from "@internal/testcontainers"; +import { trace } from "@internal/tracing"; +import { expect } from "vitest"; +import { RunEngine } from "../index.js"; +import { setTimeout } from "timers/promises"; +import { setupAuthenticatedEnvironment, setupBackgroundWorker } from "./setup.js"; + +vi.setConfig({ testTimeout: 60_000 }); + +describe("RunEngine debounce", () => { + containerTest("Basic debounce: first trigger creates run", async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, // 1 minute max debounce + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // Trigger with debounce + const run = await engine.trigger( + { + number: 1, + friendlyId: "run_deb1", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "first"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "user-123", + delay: "5s", + }, + }, + prisma + ); + + expect(run.friendlyId).toBe("run_deb1"); + expect(run.status).toBe("DELAYED"); + + // Verify debounce is stored in the run + const dbRun = await prisma.taskRun.findFirst({ + where: { id: run.id }, + }); + assertNonNullable(dbRun); + const debounce = dbRun.debounce as { key: string; delay: string } | null; + expect(debounce?.key).toBe("user-123"); + expect(debounce?.delay).toBe("5s"); + + // Verify execution status is DELAYED + const executionData = await engine.getRunExecutionData({ runId: run.id }); + assertNonNullable(executionData); + expect(executionData.snapshot.executionStatus).toBe("DELAYED"); + } finally { + await engine.quit(); + } + }); + + containerTest( + "Debounce: multiple triggers return same run", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // First trigger creates run + const run1 = await engine.trigger( + { + number: 1, + friendlyId: "run_deb1", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "first"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "user-123", + delay: "5s", + }, + }, + prisma + ); + + // Second trigger should return same run + const run2 = await engine.trigger( + { + number: 2, + friendlyId: "run_deb2", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "second"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "user-123", + delay: "5s", + }, + }, + prisma + ); + + // Both should return the same run (first run wins) + expect(run2.id).toBe(run1.id); + expect(run2.friendlyId).toBe(run1.friendlyId); + + // Only one run should exist in DB + const runs = await prisma.taskRun.findMany({ + where: { + taskIdentifier, + runtimeEnvironmentId: authenticatedEnvironment.id, + }, + }); + expect(runs.length).toBe(1); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce: delay extension on subsequent triggers", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + const initialDelay = 1000; + const initialDelayUntil = new Date(Date.now() + initialDelay); + + // First trigger + const run1 = await engine.trigger( + { + number: 1, + friendlyId: "run_deb1", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "first"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: initialDelayUntil, + debounce: { + key: "user-123", + delay: "1s", + }, + }, + prisma + ); + + const originalDelayUntil = run1.delayUntil; + assertNonNullable(originalDelayUntil); + + // Wait a bit then trigger again + await setTimeout(300); + + // Second trigger should extend the delay + const run2 = await engine.trigger( + { + number: 2, + friendlyId: "run_deb2", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "second"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 1000), + debounce: { + key: "user-123", + delay: "1s", + }, + }, + prisma + ); + + // Same run returned + expect(run2.id).toBe(run1.id); + + // delayUntil should have been extended + const updatedRun = await prisma.taskRun.findFirst({ + where: { id: run1.id }, + }); + assertNonNullable(updatedRun); + assertNonNullable(updatedRun.delayUntil); + + // The new delayUntil should be later than the original + expect(updatedRun.delayUntil.getTime()).toBeGreaterThan(originalDelayUntil.getTime()); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce: different keys create separate runs", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // Trigger with key "user-123" + const run1 = await engine.trigger( + { + number: 1, + friendlyId: "run_deb1", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "first"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "user-123", + delay: "5s", + }, + }, + prisma + ); + + // Trigger with different key "user-456" + const run2 = await engine.trigger( + { + number: 2, + friendlyId: "run_deb2", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "second"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "user-456", + delay: "5s", + }, + }, + prisma + ); + + // Different keys should create different runs + expect(run2.id).not.toBe(run1.id); + + const runs = await prisma.taskRun.findMany({ + where: { + taskIdentifier, + runtimeEnvironmentId: authenticatedEnvironment.id, + }, + }); + expect(runs.length).toBe(2); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce: run executes after final delay", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // First trigger with 500ms delay + const run = await engine.trigger( + { + number: 1, + friendlyId: "run_deb1", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "first"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 500), + debounce: { + key: "user-123", + delay: "500ms", + }, + }, + prisma + ); + + // Verify it's in DELAYED status + let executionData = await engine.getRunExecutionData({ runId: run.id }); + assertNonNullable(executionData); + expect(executionData.snapshot.executionStatus).toBe("DELAYED"); + + // Wait for delay to pass + await setTimeout(1000); + + // Should now be QUEUED + executionData = await engine.getRunExecutionData({ runId: run.id }); + assertNonNullable(executionData); + expect(executionData.snapshot.executionStatus).toBe("QUEUED"); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce: no longer works after run is enqueued", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // First trigger with short delay + const run1 = await engine.trigger( + { + number: 1, + friendlyId: "run_deb1", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "first"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 300), + debounce: { + key: "user-123", + delay: "300ms", + }, + }, + prisma + ); + + // Wait for run to be enqueued + await setTimeout(800); + + // Verify first run is now QUEUED + const executionData = await engine.getRunExecutionData({ runId: run1.id }); + assertNonNullable(executionData); + expect(executionData.snapshot.executionStatus).toBe("QUEUED"); + + // New trigger with same key should create a NEW run + const run2 = await engine.trigger( + { + number: 2, + friendlyId: "run_deb2", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "second"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "user-123", + delay: "5s", + }, + }, + prisma + ); + + // Should be a different run + expect(run2.id).not.toBe(run1.id); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce: max duration exceeded creates new run", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + // Set a very short max debounce duration + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 500, // Very short max duration + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // First trigger + const run1 = await engine.trigger( + { + number: 1, + friendlyId: "run_deb1", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "first"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 2000), + debounce: { + key: "user-123", + delay: "2s", + }, + }, + prisma + ); + + // Wait for max duration to be exceeded + await setTimeout(700); + + // Second trigger should create a new run because max duration exceeded + const run2 = await engine.trigger( + { + number: 2, + friendlyId: "run_deb2", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "second"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 2000), + debounce: { + key: "user-123", + delay: "2s", + }, + }, + prisma + ); + + // Should be a different run because max duration exceeded + expect(run2.id).not.toBe(run1.id); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce keys are scoped to task identifier", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier1 = "test-task-1"; + const taskIdentifier2 = "test-task-2"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier1); + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier2); + + // Trigger task 1 with debounce key + const run1 = await engine.trigger( + { + number: 1, + friendlyId: "run_task1", + environment: authenticatedEnvironment, + taskIdentifier: taskIdentifier1, + payload: '{"data": "task1"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: `task/${taskIdentifier1}`, + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "shared-key", + delay: "5s", + }, + }, + prisma + ); + + // Trigger task 2 with same debounce key - should create separate run + const run2 = await engine.trigger( + { + number: 2, + friendlyId: "run_task2", + environment: authenticatedEnvironment, + taskIdentifier: taskIdentifier2, + payload: '{"data": "task2"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + workerQueue: "main", + queue: `task/${taskIdentifier2}`, + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "shared-key", + delay: "5s", + }, + }, + prisma + ); + + // Should be different runs (debounce scoped to task) + expect(run2.id).not.toBe(run1.id); + expect(run1.taskIdentifier).toBe(taskIdentifier1); + expect(run2.taskIdentifier).toBe(taskIdentifier2); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce with triggerAndWait: parent blocked by debounced child run", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const parentTask = "parent-task"; + const childTask = "child-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, [parentTask, childTask]); + + // Trigger parent run + const parentRun = await engine.trigger( + { + number: 1, + friendlyId: "run_parent1", + environment: authenticatedEnvironment, + taskIdentifier: parentTask, + payload: "{}", + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + queue: `task/${parentTask}`, + isTest: false, + tags: [], + workerQueue: "main", + }, + prisma + ); + + // Dequeue parent and create the attempt + await setTimeout(500); + const dequeued = await engine.dequeueFromWorkerQueue({ + consumerId: "test_12345", + workerQueue: "main", + }); + await engine.startRunAttempt({ + runId: parentRun.id, + snapshotId: dequeued[0].snapshot.id, + }); + + // First triggerAndWait with debounce - creates child run + const childRun1 = await engine.trigger( + { + number: 1, + friendlyId: "run_child1", + environment: authenticatedEnvironment, + taskIdentifier: childTask, + payload: '{"data": "first"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + queue: `task/${childTask}`, + isTest: false, + tags: [], + workerQueue: "main", + resumeParentOnCompletion: true, + parentTaskRunId: parentRun.id, + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "user-123", + delay: "5s", + }, + }, + prisma + ); + + // Verify parent is blocked + const parentExecData1 = await engine.getRunExecutionData({ runId: parentRun.id }); + assertNonNullable(parentExecData1); + expect(parentExecData1.snapshot.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS"); + + // Verify child run is in DELAYED status + const childExecData1 = await engine.getRunExecutionData({ runId: childRun1.id }); + assertNonNullable(childExecData1); + expect(childExecData1.snapshot.executionStatus).toBe("DELAYED"); + + // Check that parent is blocked by the child's waitpoint + const runWaitpoint = await prisma.taskRunWaitpoint.findFirst({ + where: { + taskRunId: parentRun.id, + }, + include: { + waitpoint: true, + }, + }); + assertNonNullable(runWaitpoint); + expect(runWaitpoint.waitpoint.type).toBe("RUN"); + expect(runWaitpoint.waitpoint.completedByTaskRunId).toBe(childRun1.id); + + // Second triggerAndWait with same debounce key should return same child run + const childRun2 = await engine.trigger( + { + number: 2, + friendlyId: "run_child2", + environment: authenticatedEnvironment, + taskIdentifier: childTask, + payload: '{"data": "second"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12347", + spanId: "s12347", + queue: `task/${childTask}`, + isTest: false, + tags: [], + workerQueue: "main", + resumeParentOnCompletion: true, + parentTaskRunId: parentRun.id, + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "user-123", + delay: "5s", + }, + }, + prisma + ); + + // Should return the same child run (debounced) + expect(childRun2.id).toBe(childRun1.id); + + // Only one child run should exist + const childRuns = await prisma.taskRun.findMany({ + where: { + taskIdentifier: childTask, + runtimeEnvironmentId: authenticatedEnvironment.id, + }, + }); + expect(childRuns.length).toBe(1); + + // Parent should still be blocked by the same child run's waitpoint + const parentExecData2 = await engine.getRunExecutionData({ runId: parentRun.id }); + assertNonNullable(parentExecData2); + expect(parentExecData2.snapshot.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS"); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce with triggerAndWait: second parent also blocked by same child", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const parentTask = "parent-task"; + const childTask = "child-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, [parentTask, childTask]); + + // Trigger first parent run + const parentRun1 = await engine.trigger( + { + number: 1, + friendlyId: "run_parent1", + environment: authenticatedEnvironment, + taskIdentifier: parentTask, + payload: "{}", + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + queue: `task/${parentTask}`, + isTest: false, + tags: [], + workerQueue: "main", + }, + prisma + ); + + // Dequeue first parent and start attempt + await setTimeout(500); + const dequeued1 = await engine.dequeueFromWorkerQueue({ + consumerId: "test_12345", + workerQueue: "main", + }); + await engine.startRunAttempt({ + runId: parentRun1.id, + snapshotId: dequeued1[0].snapshot.id, + }); + + // First parent triggers child with debounce + const childRun1 = await engine.trigger( + { + number: 1, + friendlyId: "run_child1", + environment: authenticatedEnvironment, + taskIdentifier: childTask, + payload: '{"data": "first"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + queue: `task/${childTask}`, + isTest: false, + tags: [], + workerQueue: "main", + resumeParentOnCompletion: true, + parentTaskRunId: parentRun1.id, + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "shared-key", + delay: "5s", + }, + }, + prisma + ); + + // Verify first parent is blocked + const parent1ExecData = await engine.getRunExecutionData({ runId: parentRun1.id }); + assertNonNullable(parent1ExecData); + expect(parent1ExecData.snapshot.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS"); + + // Trigger second parent run + const parentRun2 = await engine.trigger( + { + number: 2, + friendlyId: "run_parent2", + environment: authenticatedEnvironment, + taskIdentifier: parentTask, + payload: "{}", + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12347", + spanId: "s12347", + queue: `task/${parentTask}`, + isTest: false, + tags: [], + workerQueue: "main", + }, + prisma + ); + + // Dequeue second parent and start attempt + await setTimeout(500); + const dequeued2 = await engine.dequeueFromWorkerQueue({ + consumerId: "test_12346", + workerQueue: "main", + }); + await engine.startRunAttempt({ + runId: parentRun2.id, + snapshotId: dequeued2[0].snapshot.id, + }); + + // Second parent triggers same child with debounce - should return existing child + const childRun2 = await engine.trigger( + { + number: 2, + friendlyId: "run_child2", + environment: authenticatedEnvironment, + taskIdentifier: childTask, + payload: '{"data": "second"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12348", + spanId: "s12348", + queue: `task/${childTask}`, + isTest: false, + tags: [], + workerQueue: "main", + resumeParentOnCompletion: true, + parentTaskRunId: parentRun2.id, + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "shared-key", + delay: "5s", + }, + }, + prisma + ); + + // Should return the same child run + expect(childRun2.id).toBe(childRun1.id); + + // Second parent should also be blocked by the same child run + const parent2ExecData = await engine.getRunExecutionData({ runId: parentRun2.id }); + assertNonNullable(parent2ExecData); + expect(parent2ExecData.snapshot.executionStatus).toBe("EXECUTING_WITH_WAITPOINTS"); + + // Both parents should have waitpoints pointing to the same child + const waitpoints = await prisma.taskRunWaitpoint.findMany({ + where: { + taskRunId: { in: [parentRun1.id, parentRun2.id] }, + }, + include: { + waitpoint: true, + }, + }); + expect(waitpoints.length).toBe(2); + expect(waitpoints[0].waitpoint.completedByTaskRunId).toBe(childRun1.id); + expect(waitpoints[1].waitpoint.completedByTaskRunId).toBe(childRun1.id); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce: keys scoped to environment", + async ({ prisma, redisOptions }) => { + // Create production environment (also creates org and project) + const prodEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + // Create a second environment (development) within the same org/project + const devEnvironment = await prisma.runtimeEnvironment.create({ + data: { + type: "DEVELOPMENT", + slug: "dev-slug", + projectId: prodEnvironment.projectId, + organizationId: prodEnvironment.organizationId, + apiKey: "dev_api_key", + pkApiKey: "dev_pk_api_key", + shortcode: "dev_short", + maximumConcurrencyLimit: 10, + }, + include: { + project: true, + organization: true, + orgMember: true, + }, + }); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, prodEnvironment, taskIdentifier); + await setupBackgroundWorker(engine, devEnvironment, taskIdentifier); + + // Trigger in production environment + const runProd = await engine.trigger( + { + number: 1, + friendlyId: "run_prod1", + environment: prodEnvironment, + taskIdentifier, + payload: '{"env": "prod"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: `task/${taskIdentifier}`, + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "same-key", + delay: "5s", + }, + }, + prisma + ); + + // Trigger in development environment with same key - should create separate run + const runDev = await engine.trigger( + { + number: 2, + friendlyId: "run_dev1", + environment: devEnvironment, + taskIdentifier, + payload: '{"env": "dev"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + workerQueue: "main", + queue: `task/${taskIdentifier}`, + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "same-key", + delay: "5s", + }, + }, + prisma + ); + + // Should be different runs (debounce scoped to environment) + expect(runDev.id).not.toBe(runProd.id); + expect(runProd.runtimeEnvironmentId).toBe(prodEnvironment.id); + expect(runDev.runtimeEnvironmentId).toBe(devEnvironment.id); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce: concurrent triggers only create one run (distributed race protection)", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // Trigger multiple runs concurrently with the same debounce key + // This simulates the distributed race condition where multiple servers + // try to create runs at the exact same time + const concurrentTriggers = Promise.all([ + engine.trigger( + { + number: 1, + friendlyId: "run_conc1", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "first"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "concurrent-key", + delay: "5s", + }, + }, + prisma + ), + engine.trigger( + { + number: 2, + friendlyId: "run_conc2", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "second"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "concurrent-key", + delay: "5s", + }, + }, + prisma + ), + engine.trigger( + { + number: 3, + friendlyId: "run_conc3", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "third"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12347", + spanId: "s12347", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "concurrent-key", + delay: "5s", + }, + }, + prisma + ), + ]); + + const [run1, run2, run3] = await concurrentTriggers; + + // All should return the same run (one won the claim, others waited and got it) + expect(run2.id).toBe(run1.id); + expect(run3.id).toBe(run1.id); + + // Only one run should exist in DB + const runs = await prisma.taskRun.findMany({ + where: { + taskIdentifier, + runtimeEnvironmentId: authenticatedEnvironment.id, + }, + }); + expect(runs.length).toBe(1); + + // The run should be in DELAYED status + const executionData = await engine.getRunExecutionData({ runId: run1.id }); + assertNonNullable(executionData); + expect(executionData.snapshot.executionStatus).toBe("DELAYED"); + } finally { + await engine.quit(); + } + } + ); +}); + diff --git a/internal-packages/run-engine/src/engine/types.ts b/internal-packages/run-engine/src/engine/types.ts index bdc6da4152..e819cf8c1b 100644 --- a/internal-packages/run-engine/src/engine/types.ts +++ b/internal-packages/run-engine/src/engine/types.ts @@ -7,7 +7,7 @@ import { RetryOptions, TriggerTraceContext, } from "@trigger.dev/core/v3"; -import { PrismaClient, PrismaReplicaClient } from "@trigger.dev/database"; +import { PrismaClient, PrismaReplicaClient, TaskRun, Waitpoint } from "@trigger.dev/database"; import { Worker, type WorkerConcurrencyOptions, @@ -83,6 +83,11 @@ export type RunEngineOptions = { /** Optional global rate limiter to limit processing across all consumers */ globalRateLimiter?: GlobalRateLimiter; }; + debounce?: { + redis?: RedisOptions; + /** Maximum duration in milliseconds that a run can be debounced. Default: 1 hour */ + maxDebounceDurationMs?: number; + }; /** If not set then checkpoints won't ever be used */ retryWarmStartThresholdMs?: number; heartbeatTimeoutsMs?: Partial; @@ -164,6 +169,20 @@ export type TriggerParams = { bulkActionId?: string; planType?: string; realtimeStreamsVersion?: string; + debounce?: { + key: string; + delay: string; + }; + /** + * Called when a run is debounced (existing delayed run found with triggerAndWait). + * Return spanIdToComplete to enable span closing when the run completes. + * This allows the webapp to create a trace span for the debounced trigger. + */ + onDebounced?: (params: { + existingRun: TaskRun; + waitpoint: Waitpoint; + debounceKey: string; + }) => Promise; }; export type EngineWorker = Worker; diff --git a/packages/core/src/v3/schemas/api.ts b/packages/core/src/v3/schemas/api.ts index 1782683969..19dce63c95 100644 --- a/packages/core/src/v3/schemas/api.ts +++ b/packages/core/src/v3/schemas/api.ts @@ -203,6 +203,12 @@ export const TriggerTaskRequestBody = z.object({ priority: z.number().optional(), bulkActionId: z.string().optional(), region: z.string().optional(), + debounce: z + .object({ + key: z.string(), + delay: z.string(), + }) + .optional(), }) .optional(), }); diff --git a/packages/core/src/v3/schemas/runEngine.ts b/packages/core/src/v3/schemas/runEngine.ts index 376a8522de..e4057e7ca6 100644 --- a/packages/core/src/v3/schemas/runEngine.ts +++ b/packages/core/src/v3/schemas/runEngine.ts @@ -13,6 +13,7 @@ export const TaskRunExecutionStatus = { SUSPENDED: "SUSPENDED", PENDING_CANCEL: "PENDING_CANCEL", FINISHED: "FINISHED", + DELAYED: "DELAYED", } satisfies Enum; export type TaskRunExecutionStatus = diff --git a/packages/core/src/v3/types/tasks.ts b/packages/core/src/v3/types/tasks.ts index 857f0cc2f3..7ebad9e7bb 100644 --- a/packages/core/src/v3/types/tasks.ts +++ b/packages/core/src/v3/types/tasks.ts @@ -896,6 +896,40 @@ export type TriggerOptions = { * ``` */ region?: string; + + /** + * Debounce settings for consolidating multiple trigger calls into a single delayed run. + * + * When a run with the same debounce key already exists in the delayed state, subsequent triggers + * "push" the existing run's execution time later rather than creating new runs. + * + * The debounce key is scoped to the task identifier, so different tasks can use the same key without conflicts. + * + * @example + * + * ```ts + * // First trigger creates a new run, delayed by 5 seconds + * await myTask.trigger({ some: "data" }, { debounce: { key: "user-123", delay: "5s" } }); + * + * // Second trigger within 5 seconds pushes the existing run's delay further + * await myTask.trigger({ some: "data" }, { debounce: { key: "user-123", delay: "5s" } }); + * + * // After no more triggers for 5 seconds, the single run executes with the first payload + * ``` + */ + debounce?: { + /** + * Unique key scoped to the task identifier. Runs with the same key will be debounced together. + */ + key: string; + /** + * Duration string specifying how long to delay the run. If another trigger with the same key + * occurs within this duration, the delay is extended. + * + * @example "5s", "1m", "30s" + */ + delay: string; + }; }; export type TriggerAndWaitOptions = Omit; diff --git a/packages/trigger-sdk/package.json b/packages/trigger-sdk/package.json index 86a0a8a5f2..1311344d50 100644 --- a/packages/trigger-sdk/package.json +++ b/packages/trigger-sdk/package.json @@ -128,4 +128,4 @@ "main": "./dist/commonjs/v3/index.js", "types": "./dist/commonjs/v3/index.d.ts", "module": "./dist/esm/v3/index.js" -} \ No newline at end of file +} diff --git a/packages/trigger-sdk/src/v3/shared.ts b/packages/trigger-sdk/src/v3/shared.ts index 1c5426ed06..1ccd81222b 100644 --- a/packages/trigger-sdk/src/v3/shared.ts +++ b/packages/trigger-sdk/src/v3/shared.ts @@ -2054,6 +2054,7 @@ async function trigger_internal( priority: options?.priority, region: options?.region, lockToVersion: options?.version ?? getEnvVar("TRIGGER_VERSION"), + debounce: options?.debounce, }, }, { @@ -2285,6 +2286,7 @@ async function triggerAndWait_internal }) => { + logger.info("Processing data update", { payload }); + + // Simulate some processing work + await wait.for({ seconds: 1 }); + + logger.info("Data update processed successfully", { userId: payload.userId }); + + return { + processed: true, + userId: payload.userId, + timestamp: new Date().toISOString(), + }; + }, +}); + +/** + * Example 1: Basic Debounce + * + * This demonstrates how debounce works with rapid triggers. + * When triggered multiple times with the same key within the delay period, + * only one run will execute (with the first payload). + * + * Trigger this task multiple times rapidly with the same debounceKey to see + * how only one run is created. + */ +export const basicDebounceExample = task({ + id: "basic-debounce-example", + run: async (payload: { value: string; debounceKey: string }) => { + logger.info("Starting basic debounce example", { payload }); + + // Trigger processDataUpdate with debounce + // If this task is triggered multiple times within 5 seconds with the same + // debounceKey, only one processDataUpdate run will be created + const handle = await processDataUpdate.trigger( + { + userId: payload.debounceKey, + data: { value: payload.value, triggeredAt: new Date().toISOString() }, + }, + { + debounce: { + key: payload.debounceKey, + delay: "5s", + }, + } + ); + + logger.info("Triggered processDataUpdate with debounce", { + runId: handle.id, + debounceKey: payload.debounceKey, + }); + + return { triggeredRunId: handle.id }; + }, +}); + +/** + * Demonstration: Rapid Debounce Triggering + * + * This task demonstrates debounce in action by triggering processDataUpdate + * multiple times rapidly with the same debounce key. Despite 5 triggers, + * only ONE processDataUpdate run will be created. + * + * Run this task and watch the logs - you'll see: + * - 5 "Triggering attempt" logs + * - All 5 return the SAME run ID + * - Only 1 processDataUpdate run actually executes + */ +export const demonstrateDebounce = task({ + id: "demonstrate-debounce", + run: async (payload: { debounceKey?: string }) => { + const key = payload.debounceKey ?? "demo-key"; + + logger.info("Starting debounce demonstration", { debounceKey: key }); + logger.info("Will trigger processDataUpdate 5 times rapidly with the same debounce key"); + + const handles: string[] = []; + + // Trigger 5 times rapidly - all should return the same run + for (let i = 1; i <= 5; i++) { + logger.info(`Triggering attempt ${i}/5`, { attempt: i }); + + const handle = await processDataUpdate.trigger( + { + userId: key, + data: { + attempt: i, + triggeredAt: new Date().toISOString(), + message: `This is trigger attempt ${i}`, + }, + }, + { + debounce: { + key: key, + delay: "5s", + }, + } + ); + + handles.push(handle.id); + logger.info(`Attempt ${i} returned run ID: ${handle.id}`, { + attempt: i, + runId: handle.id, + }); + + // Small delay between triggers (but still within debounce window) + await new Promise((resolve) => setTimeout(resolve, 200)); + } + + // Check if all handles are the same (they should be!) + const uniqueHandles = [...new Set(handles)]; + const allSameRun = uniqueHandles.length === 1; + + logger.info("Debounce demonstration complete", { + totalTriggers: 5, + uniqueRuns: uniqueHandles.length, + allSameRun, + runIds: handles, + }); + + if (allSameRun) { + logger.info("SUCCESS: All 5 triggers returned the same run ID - debounce is working!"); + } else { + logger.warn("UNEXPECTED: Multiple runs were created", { uniqueHandles }); + } + + return { + debounceKey: key, + totalTriggers: 5, + uniqueRunsCreated: uniqueHandles.length, + allSameRun, + runId: uniqueHandles[0], + }; + }, +}); + +/** + * Demonstration: Debounce with triggerAndWait + * + * This shows how multiple parent tasks can wait on the same debounced child. + * Each parent task calls triggerAndWait with the same debounce key. + * All parents will be blocked by and receive the result from the SAME child run. + * + * To test this: + * 1. Run "demonstrate-debounce-trigger-and-wait-orchestrator" + * 2. Watch as 3 parent runs are created + * 3. All 3 parents will wait for the SAME debounced child run + * 4. When the child completes, all 3 parents complete with the same result + */ + +// Parent task that calls triggerAndWait with debounce +export const debounceTriggerAndWaitParent = task({ + id: "debounce-trigger-and-wait-parent", + run: async (payload: { parentNumber: number; debounceKey: string }) => { + logger.info(`Parent ${payload.parentNumber}: Starting`, { + parentNumber: payload.parentNumber, + debounceKey: payload.debounceKey, + }); + + logger.info(`Parent ${payload.parentNumber}: Calling triggerAndWait with debounce`); + + // This will be debounced - if another parent calls with the same key, + // they'll both wait for the same child run + const result = await processDataUpdate.triggerAndWait( + { + userId: payload.debounceKey, + data: { + parentNumber: payload.parentNumber, + triggeredAt: new Date().toISOString(), + }, + }, + { + debounce: { + key: payload.debounceKey, + delay: "5s", + }, + } + ); + + logger.info(`Parent ${payload.parentNumber}: Got result from child`, { result }); + + if (result.ok) { + return { + parentNumber: payload.parentNumber, + childOutput: result.output, + success: true, + }; + } else { + return { + parentNumber: payload.parentNumber, + error: "Child task failed", + success: false, + }; + } + }, +}); + +// Orchestrator that triggers multiple parents (without waiting) +export const demonstrateDebounceTriggerAndWaitOrchestrator = task({ + id: "demonstrate-debounce-trigger-and-wait-orchestrator", + run: async (payload: { debounceKey?: string; parentCount?: number }) => { + const key = payload.debounceKey ?? "wait-demo-key"; + const count = payload.parentCount ?? 3; + + logger.info("Starting debounce triggerAndWait demonstration", { + debounceKey: key, + parentCount: count, + }); + + logger.info( + `Triggering ${count} parent tasks - each will call triggerAndWait with the same debounce key` + ); + logger.info("All parents should be blocked by the SAME debounced child run"); + + const handles: string[] = []; + + // Trigger multiple parent tasks as fast as possible (no delay) to maximize race condition chance + for (let i = 1; i <= count; i++) { + const handle = await debounceTriggerAndWaitParent.trigger({ + parentNumber: i, + debounceKey: key, + }); + + logger.info(`Triggered parent ${i}`, { runId: handle.id }); + handles.push(handle.id); + } + + logger.info("All parent tasks triggered", { + parentRunIds: handles, + debounceKey: key, + }); + + logger.info( + "Watch the parent runs - they should all complete around the same time when the single debounced child finishes" + ); + + return { + debounceKey: key, + parentCount: count, + parentRunIds: handles, + message: `Triggered ${count} parent tasks. They will all wait for the same debounced child.`, + }; + }, +}); + +/** + * Example 2: User Activity Debouncing + * + * A real-world use case: debouncing user activity updates. + * When a user performs multiple actions in quick succession, + * we only want to process the final state after they've stopped. + * + * Common use cases: + * - Search-as-you-type + * - Form auto-save + * - Activity logging + * - Rate limiting user actions + */ +export const syncUserActivity = task({ + id: "sync-user-activity", + run: async (payload: { + userId: string; + activityType: string; + details: Record; + }) => { + logger.info("Syncing user activity", { payload }); + + // Simulate syncing to external service + await wait.for({ seconds: 2 }); + + logger.info("User activity synced", { + userId: payload.userId, + activityType: payload.activityType, + }); + + return { + synced: true, + syncedAt: new Date().toISOString(), + }; + }, +}); + +export const trackUserActivity = task({ + id: "track-user-activity", + run: async (payload: { userId: string; action: string; metadata?: Record }) => { + logger.info("Tracking user activity", { payload }); + + // Debounce per user - if the same user performs multiple actions, + // only sync once after 10 seconds of inactivity + const handle = await syncUserActivity.trigger( + { + userId: payload.userId, + activityType: payload.action, + details: { + ...payload.metadata, + lastAction: payload.action, + lastActionAt: new Date().toISOString(), + }, + }, + { + debounce: { + // Key is scoped to the user, so each user has their own debounce window + key: `user-${payload.userId}`, + delay: "10s", + }, + } + ); + + logger.info("User activity tracked (debounced)", { + userId: payload.userId, + runId: handle.id, + }); + + return { runId: handle.id }; + }, +}); + +/** + * Example 3: Document Auto-Save with Debounce + * + * Simulates a document editing system where saves are debounced + * to avoid excessive save operations during rapid editing. + */ +export const saveDocument = task({ + id: "save-document", + run: async (payload: { documentId: string; content: string; version: number }) => { + logger.info("Saving document", { + documentId: payload.documentId, + contentLength: payload.content.length, + version: payload.version, + }); + + // Simulate save operation + await wait.for({ seconds: 1 }); + + logger.info("Document saved successfully", { + documentId: payload.documentId, + savedAt: new Date().toISOString(), + }); + + return { + saved: true, + documentId: payload.documentId, + version: payload.version, + savedAt: new Date().toISOString(), + }; + }, +}); + +export const onDocumentEdit = task({ + id: "on-document-edit", + run: async (payload: { documentId: string; content: string; editorId: string }) => { + logger.info("Document edited", { + documentId: payload.documentId, + editorId: payload.editorId, + }); + + // Debounce saves per document - save only after 3 seconds of no edits + const handle = await saveDocument.trigger( + { + documentId: payload.documentId, + content: payload.content, + version: Date.now(), + }, + { + debounce: { + // Key is scoped to the document, so each document has its own debounce + key: `doc-${payload.documentId}`, + delay: "3s", + }, + } + ); + + return { + acknowledged: true, + pendingSaveRunId: handle.id, + }; + }, +}); + +/** + * Example 4: Webhook Consolidation + * + * When receiving many webhooks from an external service, + * debounce to consolidate them into fewer processing runs. + */ +export const processWebhookBatch = task({ + id: "process-webhook-batch", + run: async (payload: { source: string; eventType: string; data: unknown }) => { + logger.info("Processing webhook batch", { + source: payload.source, + eventType: payload.eventType, + }); + + // Process the webhook data + await wait.for({ seconds: 2 }); + + logger.info("Webhook batch processed", { + source: payload.source, + eventType: payload.eventType, + }); + + return { + processed: true, + processedAt: new Date().toISOString(), + }; + }, +}); + +export const handleWebhook = task({ + id: "handle-webhook", + run: async (payload: { source: string; eventType: string; webhookId: string; data: unknown }) => { + logger.info("Received webhook", { + source: payload.source, + eventType: payload.eventType, + webhookId: payload.webhookId, + }); + + // Debounce webhooks from the same source and event type + // This consolidates rapid webhook bursts into single processing runs + const handle = await processWebhookBatch.trigger( + { + source: payload.source, + eventType: payload.eventType, + data: payload.data, + }, + { + debounce: { + key: `webhook-${payload.source}-${payload.eventType}`, + delay: "2s", + }, + } + ); + + logger.info("Webhook queued for processing (debounced)", { + webhookId: payload.webhookId, + runId: handle.id, + }); + + return { + acknowledged: true, + processingRunId: handle.id, + }; + }, +}); + +/** + * Example 5: Debounce with triggerAndWait + * + * When using triggerAndWait with debounce, the parent task will be blocked + * by the debounced child run. If another parent triggers with the same + * debounce key, it will also be blocked by the SAME child run. + */ +export const debouncedChildTask = task({ + id: "debounced-child-task", + run: async (payload: { key: string; value: string }) => { + logger.info("Debounced child task executing", { payload }); + + await wait.for({ seconds: 3 }); + + logger.info("Debounced child task completed", { key: payload.key }); + + return { + result: `Processed: ${payload.value}`, + completedAt: new Date().toISOString(), + }; + }, +}); + +export const parentWithDebouncedChild = task({ + id: "parent-with-debounced-child", + run: async (payload: { parentId: string; debounceKey: string; data: string }) => { + logger.info("Parent task starting", { parentId: payload.parentId }); + + // triggerAndWait with debounce - the parent will wait for the debounced child + // If another parent triggers with the same debounce key, they'll both wait + // for the same child run + const result = await debouncedChildTask.triggerAndWait( + { + key: payload.debounceKey, + value: payload.data, + }, + { + debounce: { + key: payload.debounceKey, + delay: "5s", + }, + } + ); + + logger.info("Parent task completed", { + parentId: payload.parentId, + childResult: result, + }); + + if (result.ok) { + return { + parentId: payload.parentId, + childOutput: result.output, + }; + } else { + return { + parentId: payload.parentId, + error: "Child task failed", + }; + } + }, +}); + +/** + * Example 6: Different Delay Durations + * + * Shows various delay duration formats supported by debounce. + */ +export const shortDebounce = task({ + id: "short-debounce", + run: async (payload: { key: string }) => { + logger.info("Short debounce task (500ms)", { key: payload.key }); + return { key: payload.key, delay: "500ms" }; + }, +}); + +export const mediumDebounce = task({ + id: "medium-debounce", + run: async (payload: { key: string }) => { + logger.info("Medium debounce task (5s)", { key: payload.key }); + return { key: payload.key, delay: "5s" }; + }, +}); + +export const longDebounce = task({ + id: "long-debounce", + run: async (payload: { key: string }) => { + logger.info("Long debounce task (1m)", { key: payload.key }); + return { key: payload.key, delay: "1m" }; + }, +}); + +export const testDifferentDelays = task({ + id: "test-different-delays", + run: async (payload: { key: string }) => { + logger.info("Testing different debounce delays", { key: payload.key }); + + // 500ms debounce - good for rapid UI updates + await shortDebounce.trigger( + { key: `${payload.key}-short` }, + { debounce: { key: `${payload.key}-short`, delay: "500ms" } } + ); + + // 5 second debounce - good for user input + await mediumDebounce.trigger( + { key: `${payload.key}-medium` }, + { debounce: { key: `${payload.key}-medium`, delay: "5s" } } + ); + + // 1 minute debounce - good for batch processing + await longDebounce.trigger( + { key: `${payload.key}-long` }, + { debounce: { key: `${payload.key}-long`, delay: "1m" } } + ); + + return { triggered: true }; + }, +}); From 57f45f15fc62954e5d079227ddc0b9ea9b552732 Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Wed, 17 Dec 2025 15:46:42 +0000 Subject: [PATCH 02/15] support debounce options when batch triggering --- .changeset/ninety-cows-lay.md | 5 + packages/cli-v3/src/build/manifests.ts | 2 +- .../src/entryPoints/dev-run-controller.ts | 3 +- .../src/entryPoints/managed/execution.ts | 5 +- packages/core/src/v3/schemas/api.ts | 6 + packages/core/src/v3/zodSocket.ts | 2 +- packages/trigger-sdk/src/v3/shared.ts | 10 ++ .../hello-world/src/trigger/debounce.ts | 162 +++++++++++++++++- 8 files changed, 189 insertions(+), 6 deletions(-) create mode 100644 .changeset/ninety-cows-lay.md diff --git a/.changeset/ninety-cows-lay.md b/.changeset/ninety-cows-lay.md new file mode 100644 index 0000000000..67e588ec94 --- /dev/null +++ b/.changeset/ninety-cows-lay.md @@ -0,0 +1,5 @@ +--- +"@trigger.dev/sdk": patch +--- + +feat(sdk): Support debouncing runs when triggering with new debounce options diff --git a/packages/cli-v3/src/build/manifests.ts b/packages/cli-v3/src/build/manifests.ts index 8b1da98ceb..f1188233a5 100644 --- a/packages/cli-v3/src/build/manifests.ts +++ b/packages/cli-v3/src/build/manifests.ts @@ -54,7 +54,7 @@ export async function copyManifestToDir( */ async function computeFileHash(filePath: string): Promise { const contents = await readFile(filePath); - return createHash("sha256").update(contents).digest("hex").slice(0, 16); + return createHash("sha256").update(contents as Uint8Array).digest("hex").slice(0, 16); } /** diff --git a/packages/cli-v3/src/entryPoints/dev-run-controller.ts b/packages/cli-v3/src/entryPoints/dev-run-controller.ts index e5578567b4..5db271f9d6 100644 --- a/packages/cli-v3/src/entryPoints/dev-run-controller.ts +++ b/packages/cli-v3/src/entryPoints/dev-run-controller.ts @@ -428,7 +428,8 @@ export class DevRunController { } case "RUN_CREATED": case "QUEUED_EXECUTING": - case "QUEUED": { + case "QUEUED": + case "DELAYED": { logger.debug("Status change not handled", { status: snapshot.executionStatus }); return; } diff --git a/packages/cli-v3/src/entryPoints/managed/execution.ts b/packages/cli-v3/src/entryPoints/managed/execution.ts index 2dd3e6838e..ec927627f3 100644 --- a/packages/cli-v3/src/entryPoints/managed/execution.ts +++ b/packages/cli-v3/src/entryPoints/managed/execution.ts @@ -372,9 +372,10 @@ export class RunExecution { return; } - case "RUN_CREATED": { + case "RUN_CREATED": + case "DELAYED": { this.sendDebugLog( - "aborting execution: invalid status change: RUN_CREATED", + "aborting execution: invalid status change: RUN_CREATED or DELAYED", snapshotMetadata ); diff --git a/packages/core/src/v3/schemas/api.ts b/packages/core/src/v3/schemas/api.ts index 19dce63c95..49e5869ab4 100644 --- a/packages/core/src/v3/schemas/api.ts +++ b/packages/core/src/v3/schemas/api.ts @@ -257,6 +257,12 @@ export const BatchTriggerTaskItem = z.object({ ttl: z.string().or(z.number().nonnegative().int()).optional(), priority: z.number().optional(), region: z.string().optional(), + debounce: z + .object({ + key: z.string(), + delay: z.string(), + }) + .optional(), }) .optional(), }); diff --git a/packages/core/src/v3/zodSocket.ts b/packages/core/src/v3/zodSocket.ts index 160620c42c..5ec1f179a4 100644 --- a/packages/core/src/v3/zodSocket.ts +++ b/packages/core/src/v3/zodSocket.ts @@ -100,7 +100,7 @@ export class ZodSocketMessageHandler( priority: item.options?.priority, region: item.options?.region, lockToVersion: item.options?.version ?? getEnvVar("TRIGGER_VERSION"), + debounce: item.options?.debounce, }, }; }) @@ -904,6 +905,7 @@ export async function batchTriggerByIdAndWait( machine: item.options?.machine, priority: item.options?.priority, region: item.options?.region, + debounce: item.options?.debounce, }, }; }) @@ -1163,6 +1165,7 @@ export async function batchTriggerTasks( priority: item.options?.priority, region: item.options?.region, lockToVersion: item.options?.version ?? getEnvVar("TRIGGER_VERSION"), + debounce: item.options?.debounce, }, }; }) @@ -1423,6 +1426,7 @@ export async function batchTriggerAndWaitTasks( priority: item.options?.priority, region: item.options?.region, lockToVersion: item.options?.version ?? getEnvVar("TRIGGER_VERSION"), + debounce: item.options?.debounce, }, }; } @@ -1809,6 +1814,7 @@ async function* transformBatchItemsStreamForWait( machine: item.options?.machine, priority: item.options?.priority, region: item.options?.region, + debounce: item.options?.debounce, }, }; } @@ -1859,6 +1865,7 @@ async function* transformBatchByTaskItemsStream( priority: item.options?.priority, region: item.options?.region, lockToVersion: item.options?.version ?? getEnvVar("TRIGGER_VERSION"), + debounce: item.options?.debounce, }, }; } @@ -2013,6 +2022,7 @@ async function* transformSingleTaskBatchItemsStreamForWait( machine: item.options?.machine, priority: item.options?.priority, region: item.options?.region, + debounce: item.options?.debounce, }, }; } diff --git a/references/hello-world/src/trigger/debounce.ts b/references/hello-world/src/trigger/debounce.ts index a70194a628..81b489ee54 100644 --- a/references/hello-world/src/trigger/debounce.ts +++ b/references/hello-world/src/trigger/debounce.ts @@ -1,4 +1,4 @@ -import { logger, task, wait } from "@trigger.dev/sdk/v3"; +import { batch, logger, task, wait } from "@trigger.dev/sdk/v3"; /** * A simple task that processes data updates. @@ -570,3 +570,163 @@ export const testDifferentDelays = task({ return { triggered: true }; }, }); + +/** + * Example 7: Batch Trigger with Debounce + * + * Demonstrates using debounce with batchTrigger. + * Each item in the batch can have its own debounce key and delay. + * Items with the same debounce key will be consolidated into a single run. + */ +export const batchItemTask = task({ + id: "batch-item-task", + run: async (payload: { itemId: string; data: string }) => { + logger.info("Processing batch item", { payload }); + + await wait.for({ seconds: 1 }); + + logger.info("Batch item processed", { itemId: payload.itemId }); + + return { + processed: true, + itemId: payload.itemId, + processedAt: new Date().toISOString(), + }; + }, +}); + +/** + * Demonstrates batch.trigger() with debounce options on individual items. + * + * This shows how you can: + * - Use different debounce keys for different items + * - Items with the same debounce key will be consolidated + * - Items with different keys will create separate runs + * + * Run this task and watch: + * - Items 1 and 3 share debounce key "group-a" -> ONE run + * - Items 2 and 4 share debounce key "group-b" -> ONE run + * - Item 5 has unique key "group-c" -> ONE run + * - Total: 3 runs instead of 5 (but batch shows 5 items) + * + * Note: The batch itself still reports 5 items, but only 3 actual task runs + * will execute due to debouncing. + */ +export const demonstrateBatchDebounce = task({ + id: "demonstrate-batch-debounce", + run: async (payload: { prefix?: string }) => { + const prefix = payload.prefix ?? "batch-demo"; + + logger.info("Starting batch debounce demonstration"); + logger.info("Will trigger 5 items with 3 different debounce keys"); + logger.info( + "Items 1&3 share key 'group-a', items 2&4 share key 'group-b', item 5 has key 'group-c'" + ); + + // Use batch.trigger with debounce options on each item + const result = await batch.trigger([ + { + id: "batch-item-task", + payload: { itemId: `${prefix}-1`, data: "First item in group A" }, + options: { + debounce: { key: `${prefix}-group-a`, delay: "5s" }, + }, + }, + { + id: "batch-item-task", + payload: { itemId: `${prefix}-2`, data: "First item in group B" }, + options: { + debounce: { key: `${prefix}-group-b`, delay: "5s" }, + }, + }, + { + id: "batch-item-task", + payload: { itemId: `${prefix}-3`, data: "Second item in group A (debounced)" }, + options: { + debounce: { key: `${prefix}-group-a`, delay: "5s" }, + }, + }, + { + id: "batch-item-task", + payload: { itemId: `${prefix}-4`, data: "Second item in group B (debounced)" }, + options: { + debounce: { key: `${prefix}-group-b`, delay: "5s" }, + }, + }, + { + id: "batch-item-task", + payload: { itemId: `${prefix}-5`, data: "Only item in group C" }, + options: { + debounce: { key: `${prefix}-group-c`, delay: "5s" }, + }, + }, + ]); + + logger.info("Batch debounce demonstration complete", { + batchId: result.batchId, + totalItemsInBatch: result.runCount, + note: "Check the dashboard - only 3 actual task runs should execute due to debouncing", + }); + + return { + batchId: result.batchId, + totalItemsInBatch: result.runCount, + expectedUniqueRuns: 3, + message: + "5 items submitted, but only 3 runs will execute: group-a (1 run), group-b (1 run), group-c (1 run)", + }; + }, +}); + +/** + * Demonstrates batchTrigger on a single task with debounce. + * + * Similar to batch.trigger but using myTask.batchTrigger() syntax. + * Each item can have its own debounce configuration. + * + * When all items share the same debounce key, only ONE run will execute. + */ +export const demonstrateSingleTaskBatchDebounce = task({ + id: "demonstrate-single-task-batch-debounce", + run: async (payload: { debounceKey?: string }) => { + const key = payload.debounceKey ?? "single-batch-demo"; + + logger.info("Starting single task batch debounce demonstration", { debounceKey: key }); + logger.info("Triggering 4 items with the SAME debounce key - only 1 run should execute"); + + // All items have the same debounce key, so they should all resolve to the same run + const result = await batchItemTask.batchTrigger([ + { + payload: { itemId: `${key}-1`, data: "Item 1" }, + options: { debounce: { key, delay: "5s" } }, + }, + { + payload: { itemId: `${key}-2`, data: "Item 2" }, + options: { debounce: { key, delay: "5s" } }, + }, + { + payload: { itemId: `${key}-3`, data: "Item 3" }, + options: { debounce: { key, delay: "5s" } }, + }, + { + payload: { itemId: `${key}-4`, data: "Item 4" }, + options: { debounce: { key, delay: "5s" } }, + }, + ]); + + logger.info("Single task batch debounce complete", { + batchId: result.batchId, + totalItemsInBatch: result.runCount, + debounceKey: key, + note: "All items share the same debounce key, so only 1 task run should execute", + }); + + return { + batchId: result.batchId, + totalItemsInBatch: result.runCount, + debounceKey: key, + expectedUniqueRuns: 1, + message: "4 items submitted with same debounce key - only 1 run will execute", + }; + }, +}); From 96680f00f8502417fb394fe4472ba83658c9aefc Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Wed, 17 Dec 2025 16:12:17 +0000 Subject: [PATCH 03/15] fixed run engine delay tests --- .../src/engine/tests/delays.test.ts | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/internal-packages/run-engine/src/engine/tests/delays.test.ts b/internal-packages/run-engine/src/engine/tests/delays.test.ts index 162967e9e9..2ae8afd88e 100644 --- a/internal-packages/run-engine/src/engine/tests/delays.test.ts +++ b/internal-packages/run-engine/src/engine/tests/delays.test.ts @@ -73,10 +73,10 @@ describe("RunEngine delays", () => { prisma ); - //should be created but not queued yet + //should be delayed but not queued yet const executionData = await engine.getRunExecutionData({ runId: run.id }); assertNonNullable(executionData); - expect(executionData.snapshot.executionStatus).toBe("RUN_CREATED"); + expect(executionData.snapshot.executionStatus).toBe("DELAYED"); //wait for 1 seconds await setTimeout(1_000); @@ -155,10 +155,10 @@ describe("RunEngine delays", () => { prisma ); - //should be created but not queued yet + //should be delayed but not queued yet const executionData = await engine.getRunExecutionData({ runId: run.id }); assertNonNullable(executionData); - expect(executionData.snapshot.executionStatus).toBe("RUN_CREATED"); + expect(executionData.snapshot.executionStatus).toBe("DELAYED"); const rescheduleTo = new Date(Date.now() + 1_500); const updatedRun = await engine.rescheduleDelayedRun({ @@ -170,10 +170,10 @@ describe("RunEngine delays", () => { //wait so the initial delay passes await setTimeout(1_000); - //should still be created + //should still be delayed (rescheduled) const executionData2 = await engine.getRunExecutionData({ runId: run.id }); assertNonNullable(executionData2); - expect(executionData2.snapshot.executionStatus).toBe("RUN_CREATED"); + expect(executionData2.snapshot.executionStatus).toBe("DELAYED"); //wait so the updated delay passes await setTimeout(1_750); @@ -253,10 +253,10 @@ describe("RunEngine delays", () => { prisma ); - //should be created but not queued yet + //should be delayed but not queued yet const executionData = await engine.getRunExecutionData({ runId: run.id }); assertNonNullable(executionData); - expect(executionData.snapshot.executionStatus).toBe("RUN_CREATED"); + expect(executionData.snapshot.executionStatus).toBe("DELAYED"); expect(run.status).toBe("DELAYED"); //wait for 1 seconds @@ -356,10 +356,10 @@ describe("RunEngine delays", () => { prisma ); - //verify it's created but not queued + //verify it's delayed but not queued const executionData = await engine.getRunExecutionData({ runId: run.id }); assertNonNullable(executionData); - expect(executionData.snapshot.executionStatus).toBe("RUN_CREATED"); + expect(executionData.snapshot.executionStatus).toBe("DELAYED"); expect(run.status).toBe("DELAYED"); //cancel the run From fb4138a7998ff613c3fe9fdd978688d97a005ae4 Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Wed, 17 Dec 2025 16:22:22 +0000 Subject: [PATCH 04/15] better delay calculations --- .../src/engine/systems/debounceSystem.ts | 70 ++++++++++++------- 1 file changed, 44 insertions(+), 26 deletions(-) diff --git a/internal-packages/run-engine/src/engine/systems/debounceSystem.ts b/internal-packages/run-engine/src/engine/systems/debounceSystem.ts index 73a1a6f192..07e18afc26 100644 --- a/internal-packages/run-engine/src/engine/systems/debounceSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/debounceSystem.ts @@ -216,10 +216,13 @@ export class DebounceSystem { // Timed out waiting - the other server may have failed // Delete the stale pending key and return "new" - this.$.logger.warn("waitForExistingRun: timed out waiting for pending claim, deleting stale key", { - redisKey, - debounceKey: debounce.key, - }); + this.$.logger.warn( + "waitForExistingRun: timed out waiting for pending claim, deleting stale key", + { + redisKey, + debounceKey: debounce.key, + } + ); await this.redis.del(redisKey); return { status: "new" }; } @@ -287,17 +290,15 @@ export class DebounceSystem { return { status: "new" }; } - // Calculate new delay - const delayMs = parseNaturalLanguageDuration(debounce.delay); - if (!delayMs) { + // Calculate new delay - parseNaturalLanguageDuration returns a Date (now + duration) + const newDelayUntil = parseNaturalLanguageDuration(debounce.delay); + if (!newDelayUntil) { this.$.logger.error("handleExistingRun: invalid delay duration", { delay: debounce.delay, }); return { status: "new" }; } - const newDelayUntil = new Date(Date.now() + delayMs.getTime() - Date.now()); - // Check if max debounce duration would be exceeded const runCreatedAt = existingRun.createdAt; const maxDelayUntil = new Date(runCreatedAt.getTime() + this.maxDebounceDurationMs); @@ -316,25 +317,42 @@ export class DebounceSystem { return { status: "max_duration_exceeded" }; } - // Reschedule the delayed run - await this.delayedRunSystem.rescheduleDelayedRun({ - runId: existingRunId, - delayUntil: newDelayUntil, - tx: prisma, - }); + // Only reschedule if the new delay would push the run later + // This ensures debounce always "pushes later", never earlier + const currentDelayUntil = existingRun.delayUntil; + const shouldReschedule = !currentDelayUntil || newDelayUntil > currentDelayUntil; + + if (shouldReschedule) { + // Reschedule the delayed run + await this.delayedRunSystem.rescheduleDelayedRun({ + runId: existingRunId, + delayUntil: newDelayUntil, + tx: prisma, + }); - // Update Redis TTL - const ttlMs = Math.max( - newDelayUntil.getTime() - Date.now() + 60_000, // Add 1 minute buffer - 60_000 - ); - await this.redis.pexpire(redisKey, ttlMs); + // Update Redis TTL + const ttlMs = Math.max( + newDelayUntil.getTime() - Date.now() + 60_000, // Add 1 minute buffer + 60_000 + ); + await this.redis.pexpire(redisKey, ttlMs); - this.$.logger.debug("handleExistingRun: rescheduled existing debounced run", { - existingRunId, - debounceKey: debounce.key, - newDelayUntil, - }); + this.$.logger.debug("handleExistingRun: rescheduled existing debounced run", { + existingRunId, + debounceKey: debounce.key, + newDelayUntil, + }); + } else { + this.$.logger.debug( + "handleExistingRun: skipping reschedule, new delay is not later than current", + { + existingRunId, + debounceKey: debounce.key, + currentDelayUntil, + newDelayUntil, + } + ); + } return { status: "existing", From 36cded7fce36354b379fa444ee9b47d3d50d554d Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Wed, 17 Dec 2025 16:28:59 +0000 Subject: [PATCH 05/15] better docs for options --- .../run-engine/src/engine/tests/debounce.test.ts | 8 ++++---- packages/core/src/v3/types/tasks.ts | 6 +++++- references/hello-world/src/trigger/debounce.ts | 8 ++++---- 3 files changed, 13 insertions(+), 9 deletions(-) diff --git a/internal-packages/run-engine/src/engine/tests/debounce.test.ts b/internal-packages/run-engine/src/engine/tests/debounce.test.ts index 791dda2d59..dfa0bc4e43 100644 --- a/internal-packages/run-engine/src/engine/tests/debounce.test.ts +++ b/internal-packages/run-engine/src/engine/tests/debounce.test.ts @@ -480,7 +480,7 @@ describe("RunEngine debounce", () => { await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); - // First trigger with 500ms delay + // First trigger with 1s delay const run = await engine.trigger( { number: 1, @@ -497,10 +497,10 @@ describe("RunEngine debounce", () => { queue: "task/test-task", isTest: false, tags: [], - delayUntil: new Date(Date.now() + 500), + delayUntil: new Date(Date.now() + 1000), debounce: { key: "user-123", - delay: "500ms", + delay: "1s", }, }, prisma @@ -512,7 +512,7 @@ describe("RunEngine debounce", () => { expect(executionData.snapshot.executionStatus).toBe("DELAYED"); // Wait for delay to pass - await setTimeout(1000); + await setTimeout(1500); // Should now be QUEUED executionData = await engine.getRunExecutionData({ runId: run.id }); diff --git a/packages/core/src/v3/types/tasks.ts b/packages/core/src/v3/types/tasks.ts index 7ebad9e7bb..017edfac5d 100644 --- a/packages/core/src/v3/types/tasks.ts +++ b/packages/core/src/v3/types/tasks.ts @@ -920,13 +920,17 @@ export type TriggerOptions = { debounce?: { /** * Unique key scoped to the task identifier. Runs with the same key will be debounced together. + * Maximum length is 512 characters. */ key: string; /** * Duration string specifying how long to delay the run. If another trigger with the same key * occurs within this duration, the delay is extended. * - * @example "5s", "1m", "30s" + * Supported formats: `{number}s` (seconds), `{number}m` (minutes), `{number}h` (hours), + * `{number}d` (days), `{number}w` (weeks). Minimum delay is 1 second. + * + * @example "1s", "5s", "1m", "30m", "1h" */ delay: string; }; diff --git a/references/hello-world/src/trigger/debounce.ts b/references/hello-world/src/trigger/debounce.ts index 81b489ee54..62bd11966b 100644 --- a/references/hello-world/src/trigger/debounce.ts +++ b/references/hello-world/src/trigger/debounce.ts @@ -523,8 +523,8 @@ export const parentWithDebouncedChild = task({ export const shortDebounce = task({ id: "short-debounce", run: async (payload: { key: string }) => { - logger.info("Short debounce task (500ms)", { key: payload.key }); - return { key: payload.key, delay: "500ms" }; + logger.info("Short debounce task (1s)", { key: payload.key }); + return { key: payload.key, delay: "1s" }; }, }); @@ -549,10 +549,10 @@ export const testDifferentDelays = task({ run: async (payload: { key: string }) => { logger.info("Testing different debounce delays", { key: payload.key }); - // 500ms debounce - good for rapid UI updates + // 1 second debounce - good for rapid UI updates await shortDebounce.trigger( { key: `${payload.key}-short` }, - { debounce: { key: `${payload.key}-short`, delay: "500ms" } } + { debounce: { key: `${payload.key}-short`, delay: "1s" } } ); // 5 second debounce - good for user input From f0484209a275c45ed5d13636a22efaa8c4e5ebb1 Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Wed, 17 Dec 2025 16:33:32 +0000 Subject: [PATCH 06/15] debounce keys with a maximum size of 512 --- packages/core/src/v3/schemas/api.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/core/src/v3/schemas/api.ts b/packages/core/src/v3/schemas/api.ts index 49e5869ab4..8630e183dc 100644 --- a/packages/core/src/v3/schemas/api.ts +++ b/packages/core/src/v3/schemas/api.ts @@ -205,7 +205,7 @@ export const TriggerTaskRequestBody = z.object({ region: z.string().optional(), debounce: z .object({ - key: z.string(), + key: z.string().max(512), delay: z.string(), }) .optional(), @@ -259,7 +259,7 @@ export const BatchTriggerTaskItem = z.object({ region: z.string().optional(), debounce: z .object({ - key: z.string(), + key: z.string().max(512), delay: z.string(), }) .optional(), From b11562701e32e9d61771e1c8e2809cd788d250fa Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Thu, 18 Dec 2025 09:38:35 +0000 Subject: [PATCH 07/15] treat DELAYED execution status as an initiate state --- internal-packages/run-engine/src/engine/statuses.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal-packages/run-engine/src/engine/statuses.ts b/internal-packages/run-engine/src/engine/statuses.ts index ce7e9c8129..8483225623 100644 --- a/internal-packages/run-engine/src/engine/statuses.ts +++ b/internal-packages/run-engine/src/engine/statuses.ts @@ -37,7 +37,7 @@ export function isFinishedOrPendingFinished(status: TaskRunExecutionStatus): boo } export function isInitialState(status: TaskRunExecutionStatus): boolean { - const startedStatuses: TaskRunExecutionStatus[] = ["RUN_CREATED"]; + const startedStatuses: TaskRunExecutionStatus[] = ["RUN_CREATED", "DELAYED"]; return startedStatuses.includes(status); } From f613c00d115c9ee7967c88a8b73b927baaca35ca Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Thu, 18 Dec 2025 09:49:49 +0000 Subject: [PATCH 08/15] make it clear whats happening here --- .../app/runEngine/services/triggerTask.server.ts | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/apps/webapp/app/runEngine/services/triggerTask.server.ts b/apps/webapp/app/runEngine/services/triggerTask.server.ts index 9016d4bc25..1cf42f57b9 100644 --- a/apps/webapp/app/runEngine/services/triggerTask.server.ts +++ b/apps/webapp/app/runEngine/services/triggerTask.server.ts @@ -379,9 +379,15 @@ export class RunEngineTriggerTaskService { this.prisma ); - // If the returned run has a different friendlyId, it was debounced - // Stop the outer span to prevent a duplicate - the debounced span was created via onDebounced - if (taskRun.friendlyId !== runFriendlyId) { + // If the returned run has a different friendlyId, it was debounced. + // For triggerAndWait: stop the outer span since a replacement debounced span was created via onDebounced. + // For regular trigger: let the span complete normally - no replacement span needed since the + // original run already has its span from when it was first created. + if ( + taskRun.friendlyId !== runFriendlyId && + body.options?.debounce && + body.options?.resumeParentOnCompletion + ) { event.stop(); } From cddc7b638dc96d6ca22d827cea436a8c5226e919 Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Thu, 18 Dec 2025 10:05:19 +0000 Subject: [PATCH 09/15] fix deleting race condition using a lua script --- .../src/engine/systems/debounceSystem.ts | 109 +++++++++++++++++- 1 file changed, 104 insertions(+), 5 deletions(-) diff --git a/internal-packages/run-engine/src/engine/systems/debounceSystem.ts b/internal-packages/run-engine/src/engine/systems/debounceSystem.ts index 07e18afc26..20b3320afe 100644 --- a/internal-packages/run-engine/src/engine/systems/debounceSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/debounceSystem.ts @@ -1,4 +1,10 @@ -import { createRedisClient, Redis, RedisOptions } from "@internal/redis"; +import { + createRedisClient, + Redis, + RedisOptions, + type Callback, + type Result, +} from "@internal/redis"; import { startSpan } from "@internal/tracing"; import { parseNaturalLanguageDuration } from "@trigger.dev/core/v3/isomorphic"; import { PrismaClientOrTransaction, TaskRun, Waitpoint } from "@trigger.dev/database"; @@ -79,6 +85,30 @@ export class DebounceSystem { this.executionSnapshotSystem = options.executionSnapshotSystem; this.delayedRunSystem = options.delayedRunSystem; this.maxDebounceDurationMs = options.maxDebounceDurationMs; + + this.#registerCommands(); + } + + #registerCommands() { + // Atomically deletes a key only if its value starts with "pending:". + // Returns [1, nil] if deleted (was pending or didn't exist) + // Returns [0, value] if not deleted (has a run ID) + // This prevents the race condition where between checking "still pending?" + // and calling DEL, the original server could complete and register a valid run ID. + this.redis.defineCommand("conditionallyDeletePendingKey", { + numberOfKeys: 1, + lua: ` +local value = redis.call('GET', KEYS[1]) +if not value then + return { 1, nil } +end +if string.sub(value, 1, 8) == 'pending:' then + redis.call('DEL', KEYS[1]) + return { 1, nil } +end +return { 0, value } + `, + }); } /** @@ -89,6 +119,34 @@ export class DebounceSystem { return `${envId}:${taskId}:${debounceKey}`; } + /** + * Atomically deletes a key only if its value still starts with "pending:". + * This prevents the race condition where between the final GET check and DEL, + * the original server could complete and register a valid run ID. + * + * @returns { deleted: true } if the key was deleted or didn't exist + * @returns { deleted: false, existingRunId: string } if the key has a valid run ID + */ + private async conditionallyDeletePendingKey( + redisKey: string + ): Promise<{ deleted: true } | { deleted: false; existingRunId: string }> { + const result = await this.redis.conditionallyDeletePendingKey(redisKey); + + if (!result) { + // Should not happen, but treat as deleted if no result + return { deleted: true }; + } + + const [wasDeleted, currentValue] = result; + + if (wasDeleted === 1) { + return { deleted: true }; + } + + // Key exists with a valid run ID + return { deleted: false, existingRunId: currentValue! }; + } + /** * Atomically claims a debounce key using SET NX. * This prevents the race condition where two servers both check for an existing @@ -215,16 +273,43 @@ export class DebounceSystem { } // Timed out waiting - the other server may have failed - // Delete the stale pending key and return "new" + // Conditionally delete the key only if it's still pending + // This prevents the race where the original server completed between our last check and now this.$.logger.warn( - "waitForExistingRun: timed out waiting for pending claim, deleting stale key", + "waitForExistingRun: timed out waiting for pending claim, attempting conditional delete", { redisKey, debounceKey: debounce.key, } ); - await this.redis.del(redisKey); - return { status: "new" }; + + const deleteResult = await this.conditionallyDeletePendingKey(redisKey); + + if (deleteResult.deleted) { + // Key was pending (or didn't exist) - safe to create new run + this.$.logger.debug("waitForExistingRun: stale pending key deleted, returning new", { + redisKey, + debounceKey: debounce.key, + }); + return { status: "new" }; + } + + // Key now has a valid run ID - the original server completed! + // Handle the existing run instead of creating a duplicate + this.$.logger.debug( + "waitForExistingRun: original server completed during timeout, handling existing run", + { + redisKey, + debounceKey: debounce.key, + existingRunId: deleteResult.existingRunId, + } + ); + return await this.handleExistingRun({ + existingRunId: deleteResult.existingRunId, + redisKey, + debounce, + tx, + }); } /** @@ -558,3 +643,17 @@ export class DebounceSystem { await this.redis.quit(); } } + +declare module "@internal/redis" { + interface RedisCommander { + /** + * Atomically deletes a key only if its value starts with "pending:". + * @returns [1, nil] if deleted (was pending or didn't exist) + * @returns [0, value] if not deleted (has a run ID) + */ + conditionallyDeletePendingKey( + key: string, + callback?: Callback<[number, string | null]> + ): Result<[number, string | null], Context>; + } +} From b383a732c7782323f20094177d867226ab8fbc4b Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Thu, 18 Dec 2025 12:20:39 +0000 Subject: [PATCH 10/15] fix race condition when the delayUntil is updated while the enqueueDelayedRun worker job is executing --- .../src/engine/systems/delayedRunSystem.ts | 18 ++- .../src/engine/tests/delays.test.ts | 106 ++++++++++++++++++ 2 files changed, 123 insertions(+), 1 deletion(-) diff --git a/internal-packages/run-engine/src/engine/systems/delayedRunSystem.ts b/internal-packages/run-engine/src/engine/systems/delayedRunSystem.ts index 1a567a0b40..740ce1a849 100644 --- a/internal-packages/run-engine/src/engine/systems/delayedRunSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/delayedRunSystem.ts @@ -41,7 +41,10 @@ export class DelayedRunSystem { const snapshot = await getLatestExecutionSnapshot(prisma, runId); // Check if the run is still in DELAYED status (or legacy RUN_CREATED for older runs) - if (snapshot.executionStatus !== "DELAYED" && snapshot.executionStatus !== "RUN_CREATED") { + if ( + snapshot.executionStatus !== "DELAYED" && + snapshot.executionStatus !== "RUN_CREATED" + ) { throw new ServiceValidationError("Cannot reschedule a run that is not delayed"); } @@ -128,6 +131,19 @@ export class DelayedRunSystem { throw new Error(`#enqueueDelayedRun: run not found: ${runId}`); } + // Check if delayUntil has been rescheduled to the future (e.g., by debounce) + // If so, don't enqueue - the rescheduled worker job will handle it + if (run.delayUntil && run.delayUntil > new Date()) { + this.$.logger.debug( + "enqueueDelayedRun: delay was rescheduled to the future, skipping enqueue", + { + runId, + delayUntil: run.delayUntil, + } + ); + return; + } + // Now we need to enqueue the run into the RunQueue // Skip the lock in enqueueRun since we already hold it await this.enqueueSystem.enqueueRun({ diff --git a/internal-packages/run-engine/src/engine/tests/delays.test.ts b/internal-packages/run-engine/src/engine/tests/delays.test.ts index 2ae8afd88e..8a93aa1ad1 100644 --- a/internal-packages/run-engine/src/engine/tests/delays.test.ts +++ b/internal-packages/run-engine/src/engine/tests/delays.test.ts @@ -401,4 +401,110 @@ describe("RunEngine delays", () => { await engine.quit(); } }); + + containerTest( + "enqueueDelayedRun respects rescheduled delayUntil", + async ({ prisma, redisOptions }) => { + // This test verifies the race condition fix where if delayUntil is updated + // (e.g., by debounce reschedule) while the worker job is executing, + // the run should NOT be enqueued at the original time. + // + // The race condition occurs when: + // 1. Worker job is scheduled for T1 + // 2. rescheduleDelayedRun updates delayUntil to T2 in DB + // 3. worker.reschedule() tries to update the job, but it's already dequeued + // 4. Original worker job fires and calls enqueueDelayedRun + // + // Without the fix: Run would be enqueued at T1 (wrong!) + // With the fix: enqueueDelayedRun checks delayUntil > now and skips + + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // Create a delayed run with a short delay (300ms) + const run = await engine.trigger( + { + number: 1, + friendlyId: "run_1235", + environment: authenticatedEnvironment, + taskIdentifier, + payload: "{}", + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 300), + }, + prisma + ); + + // Verify it's delayed + const executionData = await engine.getRunExecutionData({ runId: run.id }); + assertNonNullable(executionData); + expect(executionData.snapshot.executionStatus).toBe("DELAYED"); + + // Simulate race condition: directly update delayUntil in the database to a future time + // This simulates what happens when rescheduleDelayedRun updates the DB but the + // worker.reschedule() call doesn't affect the already-dequeued job + const newDelayUntil = new Date(Date.now() + 10_000); // 10 seconds in the future + await prisma.taskRun.update({ + where: { id: run.id }, + data: { delayUntil: newDelayUntil }, + }); + + // Wait past the original delay (500ms) so the worker job fires + await setTimeout(500); + + // KEY ASSERTION: The run should still be DELAYED because the fix checks delayUntil > now + // Without the fix, the run would be QUEUED here (wrong!) + const executionData2 = await engine.getRunExecutionData({ runId: run.id }); + assertNonNullable(executionData2); + expect(executionData2.snapshot.executionStatus).toBe("DELAYED"); + + // Note: We don't test the run eventually becoming QUEUED here because we only + // updated the DB (simulating the race). In the real scenario, rescheduleDelayedRun + // would also reschedule the worker job to fire at the new delayUntil time. + } finally { + await engine.quit(); + } + } + ); }); From 3bbee8c426e3ba11d4347a657ef7c7a33454867d Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Thu, 18 Dec 2025 14:00:57 +0000 Subject: [PATCH 11/15] Add an option for choosing the debounce mode, trailing or leading, with default leading --- docs/triggering.mdx | 25 +- .../run-engine/src/engine/index.ts | 17 +- .../src/engine/systems/debounceSystem.ts | 98 +++- .../src/engine/tests/debounce.test.ts | 454 ++++++++++++++++++ .../run-engine/src/engine/types.ts | 1 + packages/core/src/v3/schemas/api.ts | 2 + packages/core/src/v3/types/tasks.ts | 24 +- .../hello-world/src/trigger/debounce.ts | 326 +++++++++++++ 8 files changed, 938 insertions(+), 9 deletions(-) diff --git a/docs/triggering.mdx b/docs/triggering.mdx index 741da53f15..a7bbc74f4e 100644 --- a/docs/triggering.mdx +++ b/docs/triggering.mdx @@ -861,14 +861,37 @@ The `debounce` option accepts: - `key` - A unique string to identify the debounce group (scoped to the task) - `delay` - Duration string specifying how long to delay (e.g., "5s", "1m", "30s") +- `mode` - Optional. Controls which trigger's data is used: `"leading"` (default) or `"trailing"` **How it works:** 1. First trigger with a debounce key creates a new delayed run 2. Subsequent triggers with the same key (while the run is still delayed) push the execution time further -3. Once no new triggers occur within the delay duration, the run executes with the **first** payload +3. Once no new triggers occur within the delay duration, the run executes 4. After the run starts executing, a new trigger with the same key will create a new run +**Leading vs Trailing mode:** + +By default, debounce uses **leading mode** - the run executes with data from the **first** trigger. + +With **trailing mode**, each subsequent trigger updates the run's data (payload, metadata, tags, maxAttempts, maxDuration, and machine), so the run executes with data from the **last** trigger: + +```ts +// Leading mode (default): runs with first payload +await myTask.trigger({ count: 1 }, { debounce: { key: "user-123", delay: "5s" } }); +await myTask.trigger({ count: 2 }, { debounce: { key: "user-123", delay: "5s" } }); +// After 5 seconds, runs with { count: 1 } + +// Trailing mode: runs with last payload +await myTask.trigger({ count: 1 }, { debounce: { key: "user-123", delay: "5s", mode: "trailing" } }); +await myTask.trigger({ count: 2 }, { debounce: { key: "user-123", delay: "5s", mode: "trailing" } }); +// After 5 seconds, runs with { count: 2 } +``` + +Use **trailing mode** when you want to process the most recent data, such as: +- Saving the latest version of a document after edits stop +- Processing the final state after a series of rapid updates + **With `triggerAndWait`:** When using `triggerAndWait` with debounce, the parent run blocks on the existing debounced run if one exists: diff --git a/internal-packages/run-engine/src/engine/index.ts b/internal-packages/run-engine/src/engine/index.ts index 69851cc047..1b53d6378d 100644 --- a/internal-packages/run-engine/src/engine/index.ts +++ b/internal-packages/run-engine/src/engine/index.ts @@ -457,7 +457,22 @@ export class RunEngine { const debounceResult = await this.debounceSystem.handleDebounce({ environmentId: environment.id, taskIdentifier, - debounce, + debounce: + debounce.mode === "trailing" + ? { + ...debounce, + updateData: { + payload, + payloadType, + metadata, + metadataType, + tags, + maxAttempts, + maxDurationInSeconds, + machine, + }, + } + : debounce, tx: prisma, }); diff --git a/internal-packages/run-engine/src/engine/systems/debounceSystem.ts b/internal-packages/run-engine/src/engine/systems/debounceSystem.ts index 20b3320afe..f749c07683 100644 --- a/internal-packages/run-engine/src/engine/systems/debounceSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/debounceSystem.ts @@ -16,6 +16,18 @@ import { DelayedRunSystem } from "./delayedRunSystem.js"; export type DebounceOptions = { key: string; delay: string; + mode?: "leading" | "trailing"; + /** When mode: "trailing", these fields will be used to update the existing run */ + updateData?: { + payload: string; + payloadType: string; + metadata?: string; + metadataType?: string; + tags?: { id: string; name: string }[]; + maxAttempts?: number; + maxDurationInSeconds?: number; + machine?: string; + }; }; export type DebounceSystemOptions = { @@ -439,9 +451,24 @@ return { 0, value } ); } + // Update run data when mode is "trailing" + let updatedRun = existingRun; + if (debounce.mode === "trailing" && debounce.updateData) { + updatedRun = await this.#updateRunForTrailingMode({ + runId: existingRunId, + updateData: debounce.updateData, + tx: prisma, + }); + + this.$.logger.debug("handleExistingRun: updated run data for trailing mode", { + existingRunId, + debounceKey: debounce.key, + }); + } + return { status: "existing", - run: existingRun, + run: updatedRun, waitpoint: existingRun.associatedWaitpoint, }; }); @@ -639,6 +666,75 @@ return { 0, value } }); } + /** + * Updates a run's data for trailing mode debounce. + * Updates: payload, metadata, tags, maxAttempts, maxDurationInSeconds, machinePreset + */ + async #updateRunForTrailingMode({ + runId, + updateData, + tx, + }: { + runId: string; + updateData: NonNullable; + tx?: PrismaClientOrTransaction; + }): Promise { + const prisma = tx ?? this.$.prisma; + + // Build the update object + const updatePayload: { + payload: string; + payloadType: string; + metadata?: string; + metadataType?: string; + maxAttempts?: number; + maxDurationInSeconds?: number; + machinePreset?: string; + runTags?: string[]; + tags?: { + set: { id: string }[]; + }; + } = { + payload: updateData.payload, + payloadType: updateData.payloadType, + }; + + if (updateData.metadata !== undefined) { + updatePayload.metadata = updateData.metadata; + updatePayload.metadataType = updateData.metadataType ?? "application/json"; + } + + if (updateData.maxAttempts !== undefined) { + updatePayload.maxAttempts = updateData.maxAttempts; + } + + if (updateData.maxDurationInSeconds !== undefined) { + updatePayload.maxDurationInSeconds = updateData.maxDurationInSeconds; + } + + if (updateData.machine !== undefined) { + updatePayload.machinePreset = updateData.machine; + } + + // Handle tags update - replace existing tags + if (updateData.tags !== undefined) { + updatePayload.runTags = updateData.tags.map((t) => t.name); + updatePayload.tags = { + set: updateData.tags.map((t) => ({ id: t.id })), + }; + } + + const updatedRun = await prisma.taskRun.update({ + where: { id: runId }, + data: updatePayload, + include: { + associatedWaitpoint: true, + }, + }); + + return updatedRun; + } + async quit(): Promise { await this.redis.quit(); } diff --git a/internal-packages/run-engine/src/engine/tests/debounce.test.ts b/internal-packages/run-engine/src/engine/tests/debounce.test.ts index dfa0bc4e43..3d4e56af7b 100644 --- a/internal-packages/run-engine/src/engine/tests/debounce.test.ts +++ b/internal-packages/run-engine/src/engine/tests/debounce.test.ts @@ -1485,5 +1485,459 @@ describe("RunEngine debounce", () => { } } ); + + containerTest( + "Debounce trailing mode: updates payload on subsequent triggers", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // First trigger creates run with trailing mode + const run1 = await engine.trigger( + { + number: 1, + friendlyId: "run_trailing1", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "first"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "trailing-key", + delay: "5s", + mode: "trailing", + }, + }, + prisma + ); + + expect(run1.friendlyId).toBe("run_trailing1"); + expect(run1.payload).toBe('{"data": "first"}'); + + // Second trigger with trailing mode should update the payload + const run2 = await engine.trigger( + { + number: 2, + friendlyId: "run_trailing2", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "second"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "trailing-key", + delay: "5s", + mode: "trailing", + }, + }, + prisma + ); + + // Should return the same run + expect(run2.id).toBe(run1.id); + + // Verify the payload was updated to the second trigger's payload + const dbRun = await prisma.taskRun.findFirst({ + where: { id: run1.id }, + }); + assertNonNullable(dbRun); + expect(dbRun.payload).toBe('{"data": "second"}'); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce trailing mode: updates metadata on subsequent triggers", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // First trigger with metadata + const run1 = await engine.trigger( + { + number: 1, + friendlyId: "run_trailingmeta1", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "first"}', + payloadType: "application/json", + metadata: '{"version": 1}', + metadataType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "trailing-meta-key", + delay: "5s", + mode: "trailing", + }, + }, + prisma + ); + + // Second trigger with different metadata + await engine.trigger( + { + number: 2, + friendlyId: "run_trailingmeta2", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "second"}', + payloadType: "application/json", + metadata: '{"version": 2, "extra": "field"}', + metadataType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "trailing-meta-key", + delay: "5s", + mode: "trailing", + }, + }, + prisma + ); + + // Verify metadata was updated + const dbRun = await prisma.taskRun.findFirst({ + where: { id: run1.id }, + }); + assertNonNullable(dbRun); + expect(dbRun.metadata).toBe('{"version": 2, "extra": "field"}'); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce trailing mode: updates maxAttempts and maxDuration", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // First trigger with maxAttempts and maxDuration + const run1 = await engine.trigger( + { + number: 1, + friendlyId: "run_trailingopts1", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "first"}', + payloadType: "application/json", + maxAttempts: 3, + maxDurationInSeconds: 60, + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "trailing-opts-key", + delay: "5s", + mode: "trailing", + }, + }, + prisma + ); + + // Verify initial values + let dbRun = await prisma.taskRun.findFirst({ + where: { id: run1.id }, + }); + assertNonNullable(dbRun); + expect(dbRun.maxAttempts).toBe(3); + expect(dbRun.maxDurationInSeconds).toBe(60); + + // Second trigger with different maxAttempts and maxDuration + await engine.trigger( + { + number: 2, + friendlyId: "run_trailingopts2", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "second"}', + payloadType: "application/json", + maxAttempts: 5, + maxDurationInSeconds: 120, + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "trailing-opts-key", + delay: "5s", + mode: "trailing", + }, + }, + prisma + ); + + // Verify values were updated + dbRun = await prisma.taskRun.findFirst({ + where: { id: run1.id }, + }); + assertNonNullable(dbRun); + expect(dbRun.maxAttempts).toBe(5); + expect(dbRun.maxDurationInSeconds).toBe(120); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "Debounce leading mode (default): does NOT update payload", + async ({ prisma, redisOptions }) => { + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + try { + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // First trigger creates run (leading mode - default) + const run1 = await engine.trigger( + { + number: 1, + friendlyId: "run_leading1", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "first"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12345", + spanId: "s12345", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "leading-key", + delay: "5s", + // mode: "leading" is default, not specifying it + }, + }, + prisma + ); + + // Second trigger should NOT update the payload (leading mode) + await engine.trigger( + { + number: 2, + friendlyId: "run_leading2", + environment: authenticatedEnvironment, + taskIdentifier, + payload: '{"data": "second"}', + payloadType: "application/json", + context: {}, + traceContext: {}, + traceId: "t12346", + spanId: "s12346", + workerQueue: "main", + queue: "task/test-task", + isTest: false, + tags: [], + delayUntil: new Date(Date.now() + 5000), + debounce: { + key: "leading-key", + delay: "5s", + }, + }, + prisma + ); + + // Verify the payload is still the first trigger's payload + const dbRun = await prisma.taskRun.findFirst({ + where: { id: run1.id }, + }); + assertNonNullable(dbRun); + expect(dbRun.payload).toBe('{"data": "first"}'); + } finally { + await engine.quit(); + } + } + ); }); diff --git a/internal-packages/run-engine/src/engine/types.ts b/internal-packages/run-engine/src/engine/types.ts index e819cf8c1b..3b2ae8c9a1 100644 --- a/internal-packages/run-engine/src/engine/types.ts +++ b/internal-packages/run-engine/src/engine/types.ts @@ -172,6 +172,7 @@ export type TriggerParams = { debounce?: { key: string; delay: string; + mode?: "leading" | "trailing"; }; /** * Called when a run is debounced (existing delayed run found with triggerAndWait). diff --git a/packages/core/src/v3/schemas/api.ts b/packages/core/src/v3/schemas/api.ts index 8630e183dc..5e5fff18ea 100644 --- a/packages/core/src/v3/schemas/api.ts +++ b/packages/core/src/v3/schemas/api.ts @@ -207,6 +207,7 @@ export const TriggerTaskRequestBody = z.object({ .object({ key: z.string().max(512), delay: z.string(), + mode: z.enum(["leading", "trailing"]).optional(), }) .optional(), }) @@ -261,6 +262,7 @@ export const BatchTriggerTaskItem = z.object({ .object({ key: z.string().max(512), delay: z.string(), + mode: z.enum(["leading", "trailing"]).optional(), }) .optional(), }) diff --git a/packages/core/src/v3/types/tasks.ts b/packages/core/src/v3/types/tasks.ts index 017edfac5d..f463b20f49 100644 --- a/packages/core/src/v3/types/tasks.ts +++ b/packages/core/src/v3/types/tasks.ts @@ -908,13 +908,15 @@ export type TriggerOptions = { * @example * * ```ts - * // First trigger creates a new run, delayed by 5 seconds - * await myTask.trigger({ some: "data" }, { debounce: { key: "user-123", delay: "5s" } }); + * // Leading mode (default): executes with the FIRST payload + * await myTask.trigger({ some: "data1" }, { debounce: { key: "user-123", delay: "5s" } }); + * await myTask.trigger({ some: "data2" }, { debounce: { key: "user-123", delay: "5s" } }); + * // After 5 seconds, runs with { some: "data1" } * - * // Second trigger within 5 seconds pushes the existing run's delay further - * await myTask.trigger({ some: "data" }, { debounce: { key: "user-123", delay: "5s" } }); - * - * // After no more triggers for 5 seconds, the single run executes with the first payload + * // Trailing mode: executes with the LAST payload + * await myTask.trigger({ some: "data1" }, { debounce: { key: "user-123", delay: "5s", mode: "trailing" } }); + * await myTask.trigger({ some: "data2" }, { debounce: { key: "user-123", delay: "5s", mode: "trailing" } }); + * // After 5 seconds, runs with { some: "data2" } * ``` */ debounce?: { @@ -933,6 +935,16 @@ export type TriggerOptions = { * @example "1s", "5s", "1m", "30m", "1h" */ delay: string; + /** + * Controls which trigger's data is used when the debounced run finally executes. + * + * - `"leading"` (default): Use data from the first trigger (payload, metadata, tags, etc.) + * - `"trailing"`: Use data from the last trigger. Each subsequent trigger updates the run's + * payload, metadata, tags, maxAttempts, maxDuration, and machine preset. + * + * @default "leading" + */ + mode?: "leading" | "trailing"; }; }; diff --git a/references/hello-world/src/trigger/debounce.ts b/references/hello-world/src/trigger/debounce.ts index 62bd11966b..e396714eb8 100644 --- a/references/hello-world/src/trigger/debounce.ts +++ b/references/hello-world/src/trigger/debounce.ts @@ -730,3 +730,329 @@ export const demonstrateSingleTaskBatchDebounce = task({ }; }, }); + +/** + * Example 8: Trailing Mode - Process Latest Data + * + * Trailing mode updates the run's payload (and other options) with each subsequent trigger. + * When the debounce window closes, the task runs with the LAST payload instead of the first. + * + * This is perfect for scenarios like: + * - Auto-saving the latest document state + * - Processing the final search query after typing stops + * - Aggregating real-time data and processing the latest snapshot + */ +export const processLatestData = task({ + id: "process-latest-data", + run: async (payload: { version: number; content: string; timestamp: string }) => { + logger.info("Processing latest data", { payload }); + + await wait.for({ seconds: 1 }); + + logger.info("Processed latest data", { + version: payload.version, + content: payload.content, + }); + + return { + processed: true, + version: payload.version, + content: payload.content, + processedAt: new Date().toISOString(), + }; + }, +}); + +/** + * Demonstrates trailing mode in action. + * + * This task triggers processLatestData 5 times rapidly with different payloads. + * With mode: "trailing", the run will execute with version 5 (the LAST payload), + * not version 1 (the first payload). + * + * Compare this to the demonstrateDebounce task which uses the default leading mode. + */ +export const demonstrateTrailingMode = task({ + id: "demonstrate-trailing-mode", + run: async (payload: { debounceKey?: string }) => { + const key = payload.debounceKey ?? "trailing-demo-key"; + + logger.info("Starting trailing mode demonstration", { debounceKey: key }); + logger.info("Will trigger processLatestData 5 times with mode: 'trailing'"); + logger.info("The run should execute with version 5 (the LAST payload)"); + + const handles: string[] = []; + + // Trigger 5 times rapidly - with trailing mode, the LAST payload wins + for (let i = 1; i <= 5; i++) { + logger.info(`Triggering version ${i}/5`, { version: i }); + + const handle = await processLatestData.trigger( + { + version: i, + content: `Content version ${i}`, + timestamp: new Date().toISOString(), + }, + { + debounce: { + key: key, + delay: "5s", + mode: "trailing", // Use trailing mode - LAST payload wins + }, + } + ); + + handles.push(handle.id); + logger.info(`Version ${i} returned run ID: ${handle.id}`, { + version: i, + runId: handle.id, + }); + + // Small delay between triggers + await new Promise((resolve) => setTimeout(resolve, 200)); + } + + // All handles should be the same run + const uniqueHandles = [...new Set(handles)]; + const allSameRun = uniqueHandles.length === 1; + + logger.info("Trailing mode demonstration complete", { + totalTriggers: 5, + uniqueRuns: uniqueHandles.length, + allSameRun, + note: "The run should execute with version 5 (the LAST payload)", + }); + + return { + debounceKey: key, + totalTriggers: 5, + uniqueRunsCreated: uniqueHandles.length, + allSameRun, + runId: uniqueHandles[0], + expectedPayloadVersion: 5, + message: + "With trailing mode, the run executes with the LAST payload (version 5), not the first", + }; + }, +}); + +/** + * Example 9: Document Auto-Save with Trailing Mode + * + * A practical example: when editing a document, you want to save the LATEST + * version after the user stops typing, not the first version. + * + * Trailing mode is ideal for this because: + * - Each keystroke/edit triggers a save + * - Each trigger updates the pending run's payload to the latest content + * - When typing stops, the latest content is saved + */ +export const saveDocumentLatest = task({ + id: "save-document-latest", + run: async (payload: { + documentId: string; + content: string; + editCount: number; + lastEditedAt: string; + }) => { + logger.info("Saving document (latest version)", { + documentId: payload.documentId, + contentLength: payload.content.length, + editCount: payload.editCount, + }); + + // Simulate save operation + await wait.for({ seconds: 1 }); + + logger.info("Document saved successfully with latest content", { + documentId: payload.documentId, + editCount: payload.editCount, + savedAt: new Date().toISOString(), + }); + + return { + saved: true, + documentId: payload.documentId, + editCount: payload.editCount, + contentLength: payload.content.length, + savedAt: new Date().toISOString(), + }; + }, +}); + +export const onDocumentEditWithTrailing = task({ + id: "on-document-edit-with-trailing", + run: async (payload: { documentId: string; content: string; editorId: string }) => { + // Track how many edits we've made (for demonstration) + const editCount = payload.content.length; // Using content length as a simple proxy + + logger.info("Document edited (using trailing mode)", { + documentId: payload.documentId, + editorId: payload.editorId, + editCount, + }); + + // Use trailing mode - the LATEST content will be saved + const handle = await saveDocumentLatest.trigger( + { + documentId: payload.documentId, + content: payload.content, + editCount, + lastEditedAt: new Date().toISOString(), + }, + { + debounce: { + key: `doc-${payload.documentId}`, + delay: "3s", + mode: "trailing", // Save the LATEST content, not the first + }, + } + ); + + return { + acknowledged: true, + pendingSaveRunId: handle.id, + note: "With trailing mode, the latest content will be saved after 3 seconds of no edits", + }; + }, +}); + +/** + * Example 10: Leading vs Trailing Mode Comparison + * + * This task demonstrates the difference between leading and trailing modes + * by triggering two separate debounced tasks with the same data pattern. + * + * - Leading mode task: will process version 1 (first payload) + * - Trailing mode task: will process version 5 (last payload) + */ +export const processWithLeadingMode = task({ + id: "process-with-leading-mode", + run: async (payload: { version: number }) => { + logger.info("Leading mode: Processing data", { version: payload.version }); + return { mode: "leading", version: payload.version }; + }, +}); + +export const processWithTrailingMode = task({ + id: "process-with-trailing-mode", + run: async (payload: { version: number }) => { + logger.info("Trailing mode: Processing data", { version: payload.version }); + return { mode: "trailing", version: payload.version }; + }, +}); + +export const compareLeadingAndTrailing = task({ + id: "compare-leading-and-trailing", + run: async (payload: { prefix?: string }) => { + const prefix = payload.prefix ?? "compare"; + + logger.info("Starting leading vs trailing mode comparison"); + logger.info("Triggering both modes 5 times with versions 1-5"); + logger.info("Expected: Leading mode processes v1, Trailing mode processes v5"); + + // Trigger both modes 5 times + for (let i = 1; i <= 5; i++) { + // Leading mode (default) - will keep first payload + await processWithLeadingMode.trigger( + { version: i }, + { + debounce: { + key: `${prefix}-leading`, + delay: "5s", + // mode: "leading" is the default + }, + } + ); + + // Trailing mode - will update to latest payload + await processWithTrailingMode.trigger( + { version: i }, + { + debounce: { + key: `${prefix}-trailing`, + delay: "5s", + mode: "trailing", + }, + } + ); + + await new Promise((resolve) => setTimeout(resolve, 100)); + } + + logger.info("Comparison complete", { + leadingModeExpected: "version 1 (first payload)", + trailingModeExpected: "version 5 (last payload)", + }); + + return { + message: "Check the processWithLeadingMode and processWithTrailingMode runs", + leadingModeExpected: { version: 1 }, + trailingModeExpected: { version: 5 }, + }; + }, +}); + +/** + * Example 11: Trailing Mode with Metadata Updates + * + * Trailing mode also updates metadata, tags, maxAttempts, maxDuration, and machine. + * This example shows how metadata changes with each trigger. + */ +export const processWithMetadata = task({ + id: "process-with-metadata", + run: async (payload: { action: string }, { ctx }) => { + logger.info("Processing with metadata", { action: payload.action }); + + // The metadata will be from the LAST trigger when using trailing mode + logger.info("Run metadata reflects the latest trigger"); + + return { + action: payload.action, + processedAt: new Date().toISOString(), + }; + }, +}); + +export const demonstrateTrailingWithMetadata = task({ + id: "demonstrate-trailing-with-metadata", + run: async (payload: { debounceKey?: string }) => { + const key = payload.debounceKey ?? "metadata-trailing-demo"; + + logger.info("Demonstrating trailing mode with metadata updates"); + + const actions = ["created", "updated", "reviewed", "approved", "published"]; + + for (const action of actions) { + await processWithMetadata.trigger( + { action }, + { + debounce: { + key, + delay: "5s", + mode: "trailing", + }, + metadata: { + lastAction: action, + actionTimestamp: new Date().toISOString(), + actionIndex: actions.indexOf(action) + 1, + }, + } + ); + + await new Promise((resolve) => setTimeout(resolve, 100)); + } + + logger.info("Metadata trailing demonstration complete", { + expectedAction: "published", + expectedMetadata: { lastAction: "published", actionIndex: 5 }, + }); + + return { + debounceKey: key, + triggeredActions: actions, + expectedFinalAction: "published", + message: "The run will have metadata from the 'published' trigger (the last one)", + }; + }, +}); From 0d3bc3c44fc38d10a960bc6f977d01db3150c14b Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Thu, 18 Dec 2025 14:27:49 +0000 Subject: [PATCH 12/15] Fix TOCTOU Race Condition in registerDebouncedRun --- .../src/engine/systems/debounceSystem.ts | 65 ++++++++--- .../src/engine/tests/debounce.test.ts | 102 ++++++++++++++++++ 2 files changed, 155 insertions(+), 12 deletions(-) diff --git a/internal-packages/run-engine/src/engine/systems/debounceSystem.ts b/internal-packages/run-engine/src/engine/systems/debounceSystem.ts index f749c07683..a43a6b664e 100644 --- a/internal-packages/run-engine/src/engine/systems/debounceSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/debounceSystem.ts @@ -121,6 +121,22 @@ end return { 0, value } `, }); + + // Atomically sets runId only if current value equals expected pending claim. + // This prevents the TOCTOU race condition where between GET (check claim) and SET (register), + // another server could claim and register a different run, which would get overwritten. + // Returns 1 if set succeeded, 0 if claim mismatch (lost the claim). + this.redis.defineCommand("registerIfClaimOwned", { + numberOfKeys: 1, + lua: ` +local value = redis.call('GET', KEYS[1]) +if value == ARGV[1] then + redis.call('SET', KEYS[1], ARGV[2], 'PX', ARGV[3]) + return 1 +end +return 0 + `, + }); } /** @@ -593,10 +609,24 @@ return { 0, value } async (span) => { const redisKey = this.getDebounceRedisKey(environmentId, taskIdentifier, debounceKey); + // Calculate TTL: delay until + buffer + const ttlMs = Math.max( + delayUntil.getTime() - Date.now() + 60_000, // Add 1 minute buffer + 60_000 + ); + if (claimId) { - // Verify we still own the pending claim before overwriting - const currentValue = await this.redis.get(redisKey); - if (currentValue !== `pending:${claimId}`) { + // Use atomic Lua script to verify claim and set runId in one operation. + // This prevents the TOCTOU race where another server could claim and register + // between our GET check and SET. + const result = await this.redis.registerIfClaimOwned( + redisKey, + `pending:${claimId}`, + runId, + ttlMs.toString() + ); + + if (result === 0) { // We lost the claim - another server took over or it expired this.$.logger.warn("registerDebouncedRun: lost claim, not registering", { runId, @@ -604,21 +634,15 @@ return { 0, value } taskIdentifier, debounceKey, claimId, - currentValue, }); span.setAttribute("claimLost", true); return false; } + } else { + // No claim to verify, just set directly + await this.redis.set(redisKey, runId, "PX", ttlMs); } - // Calculate TTL: delay until + buffer - const ttlMs = Math.max( - delayUntil.getTime() - Date.now() + 60_000, // Add 1 minute buffer - 60_000 - ); - - await this.redis.set(redisKey, runId, "PX", ttlMs); - this.$.logger.debug("registerDebouncedRun: stored debounce key mapping", { runId, environmentId, @@ -751,5 +775,22 @@ declare module "@internal/redis" { key: string, callback?: Callback<[number, string | null]> ): Result<[number, string | null], Context>; + + /** + * Atomically sets runId only if current value equals expected pending claim. + * Prevents TOCTOU race condition between claim verification and registration. + * @param key - The Redis key + * @param expectedClaim - Expected value "pending:{claimId}" + * @param runId - The new value (run ID) to set + * @param ttlMs - TTL in milliseconds + * @returns 1 if set succeeded, 0 if claim mismatch + */ + registerIfClaimOwned( + key: string, + expectedClaim: string, + runId: string, + ttlMs: string, + callback?: Callback + ): Result; } } diff --git a/internal-packages/run-engine/src/engine/tests/debounce.test.ts b/internal-packages/run-engine/src/engine/tests/debounce.test.ts index 3d4e56af7b..92b1d4803a 100644 --- a/internal-packages/run-engine/src/engine/tests/debounce.test.ts +++ b/internal-packages/run-engine/src/engine/tests/debounce.test.ts @@ -1939,5 +1939,107 @@ describe("RunEngine debounce", () => { } } ); + + containerTest( + "registerDebouncedRun: atomic claim prevents overwrite when claim is lost", + async ({ prisma, redisOptions }) => { + // This test verifies the fix for the TOCTOU race condition in registerDebouncedRun. + // The race occurs when: + // 1. Server A claims debounce key with claimId-A + // 2. Server B claims same key with claimId-B (after A's claim expires) + // 3. Server B registers runId-B successfully + // 4. Server A attempts to register runId-A with stale claimId-A + // Without the fix, step 4 would overwrite runId-B. With the fix, it fails atomically. + + const { createRedisClient } = await import("@internal/redis"); + + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + // Create a separate Redis client to simulate "another server" modifying keys directly + const simulatedServerRedis = createRedisClient({ + ...redisOptions, + keyPrefix: `${redisOptions.keyPrefix ?? ""}debounce:`, + }); + + try { + const taskIdentifier = "test-task"; + const debounceKey = "race-test-key"; + const environmentId = authenticatedEnvironment.id; + const delayUntil = new Date(Date.now() + 60_000); + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // Construct the Redis key (same format as DebounceSystem.getDebounceRedisKey) + const redisKey = `${environmentId}:${taskIdentifier}:${debounceKey}`; + + // Step 1: Server A claims the key with claimId-A + const claimIdA = "claim-server-A"; + await simulatedServerRedis.set(redisKey, `pending:${claimIdA}`, "PX", 60_000); + + // Step 2 & 3: Simulate Server B claiming and registering (after A's claim "expires") + // In reality, this simulates the race where B's claim overwrites A's pending claim + const runIdB = "run_server_B"; + await simulatedServerRedis.set(redisKey, runIdB, "PX", 60_000); + + // Verify Server B's registration is in place + const valueAfterB = await simulatedServerRedis.get(redisKey); + expect(valueAfterB).toBe(runIdB); + + // Step 4: Server A attempts to register with its stale claimId-A + // This should FAIL because the key no longer contains "pending:claim-server-A" + const runIdA = "run_server_A"; + const registered = await engine.debounceSystem.registerDebouncedRun({ + runId: runIdA, + environmentId, + taskIdentifier, + debounceKey, + delayUntil, + claimId: claimIdA, // Stale claim ID + }); + + // Step 5: Verify Server A's registration failed + expect(registered).toBe(false); + + // Step 6: Verify Redis still contains runId-B (not overwritten by Server A) + const finalValue = await simulatedServerRedis.get(redisKey); + expect(finalValue).toBe(runIdB); + } finally { + await simulatedServerRedis.quit(); + await engine.quit(); + } + } + ); }); From f3f73554eb766fc59b5513b5cca7a1403a97f46a Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Thu, 18 Dec 2025 14:34:35 +0000 Subject: [PATCH 13/15] remove the docs from this PR --- docs/triggering.mdx | 84 --------------------------------------------- 1 file changed, 84 deletions(-) diff --git a/docs/triggering.mdx b/docs/triggering.mdx index a7bbc74f4e..599fe67e99 100644 --- a/docs/triggering.mdx +++ b/docs/triggering.mdx @@ -831,90 +831,6 @@ export const myTask = task({ For more information, see our [Idempotency](/idempotency) documentation. -### `debounce` - -You can debounce task triggers to consolidate multiple trigger calls into a single delayed run. When a run with the same debounce key already exists in the delayed state, subsequent triggers "push" the existing run's execution time later rather than creating new runs. - -This is useful for scenarios like: - -- Real-time document indexing where you want to wait for the user to finish typing -- Aggregating webhook events from the same source -- Rate limiting expensive operations while still processing the final request - -```ts -// First trigger creates a new run, delayed by 5 seconds -await myTask.trigger({ some: "data" }, { debounce: { key: "user-123", delay: "5s" } }); - -// If triggered again within 5 seconds, the existing run is pushed later -await myTask.trigger({ updated: "data" }, { debounce: { key: "user-123", delay: "5s" } }); - -// The run only executes after 5 seconds of no new triggers -// Note: The first payload is used (first trigger wins) -``` - - - Debounce keys are scoped to the task identifier, so different tasks can use the same key without - conflicts. - - -The `debounce` option accepts: - -- `key` - A unique string to identify the debounce group (scoped to the task) -- `delay` - Duration string specifying how long to delay (e.g., "5s", "1m", "30s") -- `mode` - Optional. Controls which trigger's data is used: `"leading"` (default) or `"trailing"` - -**How it works:** - -1. First trigger with a debounce key creates a new delayed run -2. Subsequent triggers with the same key (while the run is still delayed) push the execution time further -3. Once no new triggers occur within the delay duration, the run executes -4. After the run starts executing, a new trigger with the same key will create a new run - -**Leading vs Trailing mode:** - -By default, debounce uses **leading mode** - the run executes with data from the **first** trigger. - -With **trailing mode**, each subsequent trigger updates the run's data (payload, metadata, tags, maxAttempts, maxDuration, and machine), so the run executes with data from the **last** trigger: - -```ts -// Leading mode (default): runs with first payload -await myTask.trigger({ count: 1 }, { debounce: { key: "user-123", delay: "5s" } }); -await myTask.trigger({ count: 2 }, { debounce: { key: "user-123", delay: "5s" } }); -// After 5 seconds, runs with { count: 1 } - -// Trailing mode: runs with last payload -await myTask.trigger({ count: 1 }, { debounce: { key: "user-123", delay: "5s", mode: "trailing" } }); -await myTask.trigger({ count: 2 }, { debounce: { key: "user-123", delay: "5s", mode: "trailing" } }); -// After 5 seconds, runs with { count: 2 } -``` - -Use **trailing mode** when you want to process the most recent data, such as: -- Saving the latest version of a document after edits stop -- Processing the final state after a series of rapid updates - -**With `triggerAndWait`:** - -When using `triggerAndWait` with debounce, the parent run blocks on the existing debounced run if one exists: - -```ts -export const parentTask = task({ - id: "parent-task", - run: async (payload: string) => { - // Both will wait for the same run - const result = await childTask.triggerAndWait( - { data: payload }, - { debounce: { key: "shared-key", delay: "3s" } } - ); - return result; - }, -}); -``` - - - Idempotency keys take precedence over debounce keys. If both are provided and an idempotency match - is found, it wins. - - ### `queue` When you trigger a task you can override the concurrency limit. This is really useful if you sometimes have high priority runs. From d78877b5815bc6887c9a108fd5f9c8127fe3a52b Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Thu, 18 Dec 2025 14:58:30 +0000 Subject: [PATCH 14/15] Fix new debounce run race --- .../src/engine/systems/debounceSystem.ts | 134 ++++++++++++++++-- .../src/engine/tests/debounce.test.ts | 129 +++++++++++++++++ 2 files changed, 251 insertions(+), 12 deletions(-) diff --git a/internal-packages/run-engine/src/engine/systems/debounceSystem.ts b/internal-packages/run-engine/src/engine/systems/debounceSystem.ts index a43a6b664e..af25a31552 100644 --- a/internal-packages/run-engine/src/engine/systems/debounceSystem.ts +++ b/internal-packages/run-engine/src/engine/systems/debounceSystem.ts @@ -243,6 +243,74 @@ return 0 return { claimed: false, existingRunId: existingValue }; } + /** + * Atomically claims the debounce key before returning "new". + * This prevents the race condition where returning "new" without a claimId + * allows registerDebouncedRun to do a plain SET that can overwrite another server's registration. + * + * This method is called when we've determined there's no valid existing run but need + * to safely claim the key before creating a new one. + */ + private async claimKeyForNewRun({ + environmentId, + taskIdentifier, + debounce, + tx, + }: { + environmentId: string; + taskIdentifier: string; + debounce: DebounceOptions; + tx?: PrismaClientOrTransaction; + }): Promise { + const redisKey = this.getDebounceRedisKey(environmentId, taskIdentifier, debounce.key); + const claimId = nanoid(16); + + const claimResult = await this.claimDebounceKey({ + environmentId, + taskIdentifier, + debounceKey: debounce.key, + claimId, + ttlMs: CLAIM_TTL_MS, + }); + + if (claimResult.claimed) { + this.$.logger.debug("claimKeyForNewRun: claimed key, returning new", { + debounceKey: debounce.key, + taskIdentifier, + environmentId, + claimId, + }); + return { status: "new", claimId }; + } + + if (claimResult.existingRunId) { + // Another server registered a run while we were processing - handle it + this.$.logger.debug("claimKeyForNewRun: found existing run, handling it", { + debounceKey: debounce.key, + existingRunId: claimResult.existingRunId, + }); + return await this.handleExistingRun({ + existingRunId: claimResult.existingRunId, + redisKey, + environmentId, + taskIdentifier, + debounce, + tx, + }); + } + + // Another server is creating (pending state) - wait for it + this.$.logger.debug("claimKeyForNewRun: key is pending, waiting for existing run", { + debounceKey: debounce.key, + }); + return await this.waitForExistingRun({ + environmentId, + taskIdentifier, + debounce, + tx, + }); + } + /** * Waits for another server to complete its claim and register a run ID. * Used when we detect a "pending" state, meaning another server has claimed @@ -267,13 +335,18 @@ return 0 const value = await this.redis.get(redisKey); if (!value) { - // Key expired or was deleted - return "new" to create fresh - this.$.logger.debug("waitForExistingRun: key expired/deleted, returning new", { + // Key expired or was deleted - atomically claim before returning "new" + this.$.logger.debug("waitForExistingRun: key expired/deleted, claiming key", { redisKey, debounceKey: debounce.key, attempt: i + 1, }); - return { status: "new" }; + return await this.claimKeyForNewRun({ + environmentId, + taskIdentifier, + debounce, + tx, + }); } if (!value.startsWith("pending:")) { @@ -287,6 +360,8 @@ return 0 return await this.handleExistingRun({ existingRunId: value, redisKey, + environmentId, + taskIdentifier, debounce, tx, }); @@ -314,12 +389,17 @@ return 0 const deleteResult = await this.conditionallyDeletePendingKey(redisKey); if (deleteResult.deleted) { - // Key was pending (or didn't exist) - safe to create new run - this.$.logger.debug("waitForExistingRun: stale pending key deleted, returning new", { + // Key was pending (or didn't exist) - atomically claim before returning "new" + this.$.logger.debug("waitForExistingRun: stale pending key deleted, claiming key", { redisKey, debounceKey: debounce.key, }); - return { status: "new" }; + return await this.claimKeyForNewRun({ + environmentId, + taskIdentifier, + debounce, + tx, + }); } // Key now has a valid run ID - the original server completed! @@ -335,6 +415,8 @@ return 0 return await this.handleExistingRun({ existingRunId: deleteResult.existingRunId, redisKey, + environmentId, + taskIdentifier, debounce, tx, }); @@ -347,11 +429,15 @@ return 0 private async handleExistingRun({ existingRunId, redisKey, + environmentId, + taskIdentifier, debounce, tx, }: { existingRunId: string; redisKey: string; + environmentId: string; + taskIdentifier: string; debounce: DebounceOptions; tx?: PrismaClientOrTransaction; }): Promise { @@ -369,9 +455,14 @@ return 0 debounceKey: debounce.key, error, }); - // Clean up stale Redis key + // Clean up stale Redis key and atomically claim before returning "new" await this.redis.del(redisKey); - return { status: "new" }; + return await this.claimKeyForNewRun({ + environmentId, + taskIdentifier, + debounce, + tx, + }); } // Check if run is still in DELAYED status (or legacy RUN_CREATED for older runs) @@ -381,9 +472,14 @@ return 0 executionStatus: snapshot.executionStatus, debounceKey: debounce.key, }); - // Clean up Redis key since run is no longer debounceable + // Clean up Redis key and atomically claim before returning "new" await this.redis.del(redisKey); - return { status: "new" }; + return await this.claimKeyForNewRun({ + environmentId, + taskIdentifier, + debounce, + tx, + }); } // Get the run to check debounce metadata and createdAt @@ -399,8 +495,14 @@ return 0 existingRunId, debounceKey: debounce.key, }); + // Clean up stale Redis key and atomically claim before returning "new" await this.redis.del(redisKey); - return { status: "new" }; + return await this.claimKeyForNewRun({ + environmentId, + taskIdentifier, + debounce, + tx, + }); } // Calculate new delay - parseNaturalLanguageDuration returns a Date (now + duration) @@ -409,7 +511,13 @@ return 0 this.$.logger.error("handleExistingRun: invalid delay duration", { delay: debounce.delay, }); - return { status: "new" }; + // Invalid delay but we still need to atomically claim before returning "new" + return await this.claimKeyForNewRun({ + environmentId, + taskIdentifier, + debounce, + tx, + }); } // Check if max debounce duration would be exceeded @@ -566,6 +674,8 @@ return 0 return await this.handleExistingRun({ existingRunId: claimResult.existingRunId, redisKey, + environmentId, + taskIdentifier, debounce, tx, }); diff --git a/internal-packages/run-engine/src/engine/tests/debounce.test.ts b/internal-packages/run-engine/src/engine/tests/debounce.test.ts index 92b1d4803a..0c3d09d887 100644 --- a/internal-packages/run-engine/src/engine/tests/debounce.test.ts +++ b/internal-packages/run-engine/src/engine/tests/debounce.test.ts @@ -2041,5 +2041,134 @@ describe("RunEngine debounce", () => { } } ); + + containerTest( + "waitForExistingRun: returns claimId when key expires during wait", + async ({ prisma, redisOptions }) => { + // This test verifies the fix for the race condition where waitForExistingRun + // returns { status: "new" } without a claimId. Without the fix: + // 1. Server A's pending claim expires + // 2. Server B's waitForExistingRun detects key is gone, returns { status: "new" } (no claimId) + // 3. Server C atomically claims the key and registers runId-C + // 4. Server B calls registerDebouncedRun without claimId, does plain SET, overwrites runId-C + // + // With the fix, step 2 atomically claims the key before returning, preventing step 4's overwrite. + + const { createRedisClient } = await import("@internal/redis"); + + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0001, + }, + debounce: { + maxDebounceDurationMs: 60_000, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + // Create a separate Redis client to simulate "another server" modifying keys directly + const simulatedServerRedis = createRedisClient({ + ...redisOptions, + keyPrefix: `${redisOptions.keyPrefix ?? ""}debounce:`, + }); + + try { + const taskIdentifier = "test-task"; + const debounceKey = "wait-race-test-key"; + const environmentId = authenticatedEnvironment.id; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + // Construct the Redis key (same format as DebounceSystem.getDebounceRedisKey) + const redisKey = `${environmentId}:${taskIdentifier}:${debounceKey}`; + + // Step 1: Server A claims the key with a pending claim + const claimIdA = "claim-server-A"; + await simulatedServerRedis.set(redisKey, `pending:${claimIdA}`, "PX", 60_000); + + // Step 2: Delete the key to simulate Server A's claim expiring + await simulatedServerRedis.del(redisKey); + + // Step 3: Server B calls handleDebounce - since key is gone, it should atomically claim + const debounceResult = await engine.debounceSystem.handleDebounce({ + environmentId, + taskIdentifier, + debounce: { + key: debounceKey, + delay: "5s", + }, + }); + + // Step 4: Verify result is { status: "new" } WITH a claimId + expect(debounceResult.status).toBe("new"); + if (debounceResult.status === "new") { + expect(debounceResult.claimId).toBeDefined(); + expect(typeof debounceResult.claimId).toBe("string"); + expect(debounceResult.claimId!.length).toBeGreaterThan(0); + + // Step 5: Verify the key now contains Server B's pending claim + const valueAfterB = await simulatedServerRedis.get(redisKey); + expect(valueAfterB).toBe(`pending:${debounceResult.claimId}`); + + // Step 6: Server C tries to claim the same key - should fail + const claimIdC = "claim-server-C"; + const claimResultC = await simulatedServerRedis.set( + redisKey, + `pending:${claimIdC}`, + "PX", + 60_000, + "NX" + ); + expect(claimResultC).toBeNull(); // NX fails because key exists + + // Step 7: Server B registers its run using its claimId + const runIdB = "run_server_B"; + const delayUntil = new Date(Date.now() + 60_000); + const registered = await engine.debounceSystem.registerDebouncedRun({ + runId: runIdB, + environmentId, + taskIdentifier, + debounceKey, + delayUntil, + claimId: debounceResult.claimId, + }); + + // Step 8: Verify Server B's registration succeeded + expect(registered).toBe(true); + + // Step 9: Verify Redis contains Server B's run ID + const finalValue = await simulatedServerRedis.get(redisKey); + expect(finalValue).toBe(runIdB); + } + } finally { + await simulatedServerRedis.quit(); + await engine.quit(); + } + } + ); }); From 20f3a88faeca02c9cd9ccacd1888c8748374949a Mon Sep 17 00:00:00 2001 From: Eric Allam Date: Thu, 18 Dec 2025 15:48:09 +0000 Subject: [PATCH 15/15] better handle invalid debounce delay options --- .../runEngine/services/triggerTask.server.ts | 21 +- apps/webapp/test/engine/triggerTask.test.ts | 235 ++++++++++++++++++ 2 files changed, 253 insertions(+), 3 deletions(-) diff --git a/apps/webapp/app/runEngine/services/triggerTask.server.ts b/apps/webapp/app/runEngine/services/triggerTask.server.ts index 1cf42f57b9..ab32682811 100644 --- a/apps/webapp/app/runEngine/services/triggerTask.server.ts +++ b/apps/webapp/app/runEngine/services/triggerTask.server.ts @@ -169,10 +169,25 @@ export class RunEngineTriggerTaskService { } // Validate debounce options - if (body.options?.debounce && !delayUntil) { - throw new ServiceValidationError( - `Debounce requires a valid delay duration. Provided: ${body.options.debounce.delay}` + if (body.options?.debounce) { + if (!delayUntil) { + throw new ServiceValidationError( + `Debounce requires a valid delay duration. Provided: ${body.options.debounce.delay}` + ); + } + + // Always validate debounce.delay separately since it's used for rescheduling + // This catches the case where options.delay is valid but debounce.delay is invalid + const [debounceDelayError, debounceDelayUntil] = await tryCatch( + parseDelay(body.options.debounce.delay) ); + + if (debounceDelayError || !debounceDelayUntil) { + throw new ServiceValidationError( + `Invalid debounce delay: ${body.options.debounce.delay}. ` + + `Supported formats: {number}s, {number}m, {number}h, {number}d, {number}w` + ); + } } const ttl = diff --git a/apps/webapp/test/engine/triggerTask.test.ts b/apps/webapp/test/engine/triggerTask.test.ts index 91fa035806..0306c6f235 100644 --- a/apps/webapp/test/engine/triggerTask.test.ts +++ b/apps/webapp/test/engine/triggerTask.test.ts @@ -937,4 +937,239 @@ describe("RunEngineTriggerTaskService", () => { await engine.quit(); } ); + + containerTest( + "should reject invalid debounce.delay when no explicit delay is provided", + async ({ prisma, redisOptions }) => { + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0005, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + const queuesManager = new DefaultQueueManager(prisma, engine); + const idempotencyKeyConcern = new IdempotencyKeyConcern( + prisma, + engine, + new MockTraceEventConcern() + ); + + const triggerTaskService = new RunEngineTriggerTaskService({ + engine, + prisma, + payloadProcessor: new MockPayloadProcessor(), + queueConcern: queuesManager, + idempotencyKeyConcern, + validator: new MockTriggerTaskValidator(), + traceEventConcern: new MockTraceEventConcern(), + tracer: trace.getTracer("test", "0.0.0"), + metadataMaximumSize: 1024 * 1024 * 1, + }); + + // Invalid debounce.delay format (ms not supported) + await expect( + triggerTaskService.call({ + taskId: taskIdentifier, + environment: authenticatedEnvironment, + body: { + payload: { test: "test" }, + options: { + debounce: { + key: "test-key", + delay: "300ms", // Invalid - ms not supported + }, + }, + }, + }) + ).rejects.toThrow("Debounce requires a valid delay duration"); + + await engine.quit(); + } + ); + + containerTest( + "should reject invalid debounce.delay even when explicit delay is valid", + async ({ prisma, redisOptions }) => { + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0005, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + const queuesManager = new DefaultQueueManager(prisma, engine); + const idempotencyKeyConcern = new IdempotencyKeyConcern( + prisma, + engine, + new MockTraceEventConcern() + ); + + const triggerTaskService = new RunEngineTriggerTaskService({ + engine, + prisma, + payloadProcessor: new MockPayloadProcessor(), + queueConcern: queuesManager, + idempotencyKeyConcern, + validator: new MockTriggerTaskValidator(), + traceEventConcern: new MockTraceEventConcern(), + tracer: trace.getTracer("test", "0.0.0"), + metadataMaximumSize: 1024 * 1024 * 1, + }); + + // Valid explicit delay but invalid debounce.delay + // This is the bug case: the explicit delay passes validation, + // but debounce.delay would fail later when rescheduling + await expect( + triggerTaskService.call({ + taskId: taskIdentifier, + environment: authenticatedEnvironment, + body: { + payload: { test: "test" }, + options: { + delay: "5m", // Valid explicit delay + debounce: { + key: "test-key", + delay: "invalid-delay", // Invalid debounce delay + }, + }, + }, + }) + ).rejects.toThrow("Invalid debounce delay"); + + await engine.quit(); + } + ); + + containerTest( + "should accept valid debounce.delay formats", + async ({ prisma, redisOptions }) => { + const engine = new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0005, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "test-task"; + + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + const queuesManager = new DefaultQueueManager(prisma, engine); + const idempotencyKeyConcern = new IdempotencyKeyConcern( + prisma, + engine, + new MockTraceEventConcern() + ); + + const triggerTaskService = new RunEngineTriggerTaskService({ + engine, + prisma, + payloadProcessor: new MockPayloadProcessor(), + queueConcern: queuesManager, + idempotencyKeyConcern, + validator: new MockTriggerTaskValidator(), + traceEventConcern: new MockTraceEventConcern(), + tracer: trace.getTracer("test", "0.0.0"), + metadataMaximumSize: 1024 * 1024 * 1, + }); + + // Valid debounce.delay format + const result = await triggerTaskService.call({ + taskId: taskIdentifier, + environment: authenticatedEnvironment, + body: { + payload: { test: "test" }, + options: { + debounce: { + key: "test-key", + delay: "5s", // Valid format + }, + }, + }, + }); + + expect(result).toBeDefined(); + expect(result?.run.friendlyId).toBeDefined(); + + await engine.quit(); + } + ); });