Skip to content

Commit 94a669d

Browse files
authored
feat(tasks): emit duration and success/failure counts for tasks (#1364)
Emits success/failure counter values as well as execution duration as a gauge for task execution. This allows monitoring the background task health using HyperDX alerts.
1 parent cfba5cb commit 94a669d

File tree

5 files changed

+114
-21
lines changed

5 files changed

+114
-21
lines changed

.changeset/stale-horses-punch.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@hyperdx/api": minor
3+
---
4+
5+
Add metrics to task execution

packages/api/src/tasks/checkAlerts/__tests__/checkAlertsTask.test.ts

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ import {
1414
AlertTaskType,
1515
loadProvider,
1616
} from '@/tasks/checkAlerts/providers';
17-
import { CheckAlertsTaskArgs } from '@/tasks/types';
17+
import { CheckAlertsTaskArgs, TaskName } from '@/tasks/types';
1818

1919
jest.mock('@/tasks/checkAlerts/providers', () => {
2020
return {
@@ -65,7 +65,7 @@ describe('CheckAlertTask', () => {
6565
});
6666

6767
it('should execute successfully with no alert tasks', async () => {
68-
const args: CheckAlertsTaskArgs = { taskName: 'check-alerts' };
68+
const args: CheckAlertsTaskArgs = { taskName: TaskName.CHECK_ALERTS };
6969
const task = new CheckAlertTask(args);
7070

7171
mockAlertProvider.getAlertTasks.mockResolvedValue([]);
@@ -83,7 +83,7 @@ describe('CheckAlertTask', () => {
8383

8484
it('should execute successfully with custom provider', async () => {
8585
const args: CheckAlertsTaskArgs = {
86-
taskName: 'check-alerts',
86+
taskName: TaskName.CHECK_ALERTS,
8787
provider: 'custom-provider',
8888
};
8989
const task = new CheckAlertTask(args);
@@ -99,7 +99,7 @@ describe('CheckAlertTask', () => {
9999
});
100100

101101
it('should process alert tasks', async () => {
102-
const args: CheckAlertsTaskArgs = { taskName: 'check-alerts' };
102+
const args: CheckAlertsTaskArgs = { taskName: TaskName.CHECK_ALERTS };
103103
const task = new CheckAlertTask(args);
104104

105105
const mockAlert = {
@@ -171,7 +171,7 @@ describe('CheckAlertTask', () => {
171171
});
172172

173173
it("should ensure that the correct team's webhooks are passed to processAlert", async () => {
174-
const args: CheckAlertsTaskArgs = { taskName: 'check-alerts' };
174+
const args: CheckAlertsTaskArgs = { taskName: TaskName.CHECK_ALERTS };
175175
const task = new CheckAlertTask(args);
176176

177177
// Create two teams

packages/api/src/tasks/index.ts

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,63 +1,75 @@
11
import { CronJob } from 'cron';
22
import minimist from 'minimist';
3-
import { performance } from 'perf_hooks';
43
import { serializeError } from 'serialize-error';
54

65
import { RUN_SCHEDULED_TASKS_EXTERNALLY } from '@/config';
76
import CheckAlertTask from '@/tasks/checkAlerts';
7+
import {
8+
taskExecutionDurationGauge,
9+
taskExecutionFailureCounter,
10+
taskExecutionSuccessCounter,
11+
timeExec,
12+
} from '@/tasks/metrics';
813
import PingPongTask from '@/tasks/pingPongTask';
9-
import { asTaskArgs, HdxTask, TaskArgs } from '@/tasks/types';
14+
import { asTaskArgs, HdxTask, TaskArgs, TaskName } from '@/tasks/types';
1015
import logger from '@/utils/logger';
1116

1217
import { tasksTracer } from './tracer';
1318

1419
function createTask(argv: TaskArgs): HdxTask<TaskArgs> {
1520
const taskName = argv.taskName;
1621
switch (taskName) {
17-
case 'check-alerts':
22+
case TaskName.CHECK_ALERTS:
1823
return new CheckAlertTask(argv);
19-
case 'ping-pong':
24+
case TaskName.PING_PONG:
2025
return new PingPongTask(argv);
2126
default:
2227
throw new Error(`Unknown task name ${taskName}`);
2328
}
2429
}
2530

26-
const main = async (argv: TaskArgs) => {
31+
async function main(argv: TaskArgs): Promise<void> {
2732
await tasksTracer.startActiveSpan(argv.taskName || 'task', async span => {
2833
const task: HdxTask<TaskArgs> = createTask(argv);
2934
try {
30-
const t0 = performance.now();
31-
logger.info(`Task [${task.name()}] started at ${new Date()}`);
35+
logger.info(`${task.name()} started at ${new Date()}`);
3236
await task.execute();
33-
logger.info(
34-
`Task [${task.name()}] finished in ${(performance.now() - t0).toFixed(2)} ms`,
35-
);
37+
taskExecutionSuccessCounter.get(argv.taskName)?.add(1);
3638
} catch (e: unknown) {
3739
logger.error(
3840
{
3941
cause: e,
4042
task,
4143
},
42-
`Task [${task.name()}] failed: ${serializeError(e)}`,
44+
`${task.name()} failed: ${serializeError(e)}`,
4345
);
46+
taskExecutionFailureCounter.get(argv.taskName)?.add(1);
4447
} finally {
4548
await task.asyncDispose();
4649
span.end();
4750
}
4851
});
49-
};
52+
}
5053

5154
// Entry point
5255
const argv = asTaskArgs(minimist(process.argv.slice(2)));
56+
57+
const instrumentedMain = timeExec(main, duration => {
58+
const gauge = taskExecutionDurationGauge.get(argv.taskName);
59+
if (gauge) {
60+
gauge.record(duration, { useCron: !RUN_SCHEDULED_TASKS_EXTERNALLY });
61+
}
62+
logger.info(`${argv.taskName} finished in ${duration.toFixed(2)} ms`);
63+
});
64+
5365
// WARNING: the cron job will be enabled only in development mode
5466
if (!RUN_SCHEDULED_TASKS_EXTERNALLY) {
5567
logger.info('In-app cron job is enabled');
5668
// run cron job every 1 minute
5769
const job = CronJob.from({
5870
cronTime: '0 * * * * *',
5971
waitForCompletion: true,
60-
onTick: async () => main(argv),
72+
onTick: async () => instrumentedMain(argv),
6173
errorHandler: async err => {
6274
console.error(err);
6375
},
@@ -66,7 +78,7 @@ if (!RUN_SCHEDULED_TASKS_EXTERNALLY) {
6678
});
6779
} else {
6880
logger.warn('In-app cron job is disabled');
69-
main(argv)
81+
instrumentedMain(argv)
7082
.then(() => {
7183
process.exit(0);
7284
})

packages/api/src/tasks/metrics.ts

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import {
2+
Attributes,
3+
Counter,
4+
Gauge,
5+
metrics,
6+
ValueType,
7+
} from '@opentelemetry/api';
8+
import { performance } from 'perf_hooks';
9+
10+
import { TaskName } from '@/tasks/types';
11+
12+
const meter = metrics.getMeter('hyperdx-tasks');
13+
14+
export const taskExecutionSuccessCounter: Map<
15+
TaskName,
16+
Counter<Attributes>
17+
> = new Map();
18+
19+
export const taskExecutionFailureCounter: Map<
20+
TaskName,
21+
Counter<Attributes>
22+
> = new Map();
23+
24+
export const taskExecutionDurationGauge: Map<
25+
TaskName,
26+
Gauge<Attributes>
27+
> = new Map();
28+
29+
for (const name of Object.values(TaskName)) {
30+
taskExecutionSuccessCounter.set(
31+
name,
32+
meter.createCounter(`hyperdx.tasks.${name}.success`, {
33+
description:
34+
'Count of the number of times the task finished without exceptions.',
35+
}),
36+
);
37+
38+
taskExecutionFailureCounter.set(
39+
name,
40+
meter.createCounter(`hyperdx.tasks.${name}.failure`, {
41+
description:
42+
'Count of the number of times the task failed to finish because of an exception',
43+
}),
44+
);
45+
46+
taskExecutionDurationGauge.set(
47+
name,
48+
meter.createGauge(`hyperdx.tasks.${name}.duration`, {
49+
description: `The wall time required for the ${name} task to complete execution.`,
50+
unit: 'ms',
51+
valueType: ValueType.DOUBLE,
52+
}),
53+
);
54+
}
55+
56+
export function timeExec<T extends unknown[], R>(
57+
fn: (...args: T) => Promise<R>,
58+
recordFn?: (duration: number) => void,
59+
) {
60+
return async (...args: T) => {
61+
const start = performance.now();
62+
try {
63+
return await fn(...args);
64+
} finally {
65+
if (recordFn) {
66+
const end = performance.now();
67+
recordFn(end - start);
68+
}
69+
}
70+
};
71+
}

packages/api/src/tasks/types.ts

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,20 @@
11
import { z } from 'zod';
22

3+
export enum TaskName {
4+
PING_PONG = 'ping-pong',
5+
CHECK_ALERTS = 'check-alerts',
6+
}
7+
38
/**
49
* Command line arguments structure for tasks.
510
* Contains task name and optional provider configuration.
611
*/
712
const pingTaskArgsSchema = z.object({
8-
taskName: z.literal('ping-pong'),
13+
taskName: z.literal(TaskName.PING_PONG),
914
});
1015

1116
const checkAlertsTaskArgsSchema = z.object({
12-
taskName: z.literal('check-alerts'),
17+
taskName: z.literal(TaskName.CHECK_ALERTS),
1318
provider: z.string().optional(),
1419
concurrency: z
1520
.number()

0 commit comments

Comments
 (0)