From 3f08d178826303e41867f252a16fbdbf24eaa9f7 Mon Sep 17 00:00:00 2001 From: Aaron Knudtson <87577305+knudtty@users.noreply.github.com> Date: Tue, 7 Oct 2025 17:17:10 -0400 Subject: [PATCH 1/3] chore: split json otel collector to enable both during dev --- docker-compose.dev.yml | 38 +++++++++++++-- package.json | 2 +- packages/api/package.json | 1 + packages/api/src/config.ts | 2 + .../src/opamp/controllers/opampController.ts | 47 ++++++++++++++++++- packages/api/src/server.ts | 25 ++++++---- 6 files changed, 99 insertions(+), 16 deletions(-) diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml index 7cf0381a5..f80a13fb2 100644 --- a/docker-compose.dev.yml +++ b/docker-compose.dev.yml @@ -34,9 +34,6 @@ services: CUSTOM_OTELCOL_CONFIG_FILE: '/etc/otelcol-contrib/custom.config.yaml' # Uncomment to enable stdout logging for the OTel collector # OTEL_SUPERVISOR_LOGS: 'true' - # Uncomment to enable JSON schema in ClickHouse - # Be sure to also set BETA_CH_OTEL_JSON_SCHEMA_ENABLED to 'true' in ch-server - # OTEL_AGENT_FEATURE_GATE_ARG: '--feature-gates=clickhouse.json' volumes: - ./docker/otel-collector/config.yaml:/etc/otelcol-contrib/config.yaml - ./docker/otel-collector/supervisor_docker.yaml.tmpl:/etc/otel/supervisor.yaml.tmpl @@ -54,6 +51,41 @@ services: depends_on: ch-server: condition: service_healthy + otel-collector-json: + # image: otel/opentelemetry-collector-contrib:0.120.0 + build: + context: ./docker/otel-collector + target: dev + environment: + CLICKHOUSE_ENDPOINT: 'tcp://ch-server:9000?dial_timeout=10s' + CLICKHOUSE_PROMETHEUS_METRICS_ENDPOINT: 'ch-server:9363' + HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE: ${HYPERDX_OTEL_EXPORTER_CLICKHOUSE_DATABASE} + HYPERDX_API_KEY: ${HYPERDX_API_KEY} + HYPERDX_LOG_LEVEL: ${HYPERDX_LOG_LEVEL} + OPAMP_SERVER_URL: 'http://host.docker.internal:4321' + CUSTOM_OTELCOL_CONFIG_FILE: '/etc/otelcol-contrib/custom.config.yaml' + # Uncomment to enable stdout logging for the OTel collector + # OTEL_SUPERVISOR_LOGS: 'true' + # Uncomment to enable JSON schema in ClickHouse + # Be sure to also set BETA_CH_OTEL_JSON_SCHEMA_ENABLED to 'true' in ch-server + OTEL_AGENT_FEATURE_GATE_ARG: '--feature-gates=clickhouse.json' + volumes: + - ./docker/otel-collector/config.yaml:/etc/otelcol-contrib/config.yaml + - ./docker/otel-collector/supervisor_docker.yaml.tmpl:/etc/otel/supervisor.yaml.tmpl + # Add a custom config file + - ./docker/otel-collector/custom.config.yaml:/etc/otelcol-contrib/custom.config.yaml + ports: + - '13134:13133' # health_check extension + - '24226:24225' # fluentd receiver + - '14317:4317' # OTLP gRPC receiver + - '14318:4318' # OTLP http receiver + - '18888:8888' # metrics extension + restart: always + networks: + - internal + depends_on: + ch-server: + condition: service_healthy ch-server: image: clickhouse/clickhouse-server:25.7-alpine ports: diff --git a/package.json b/package.json index 6d8e8d869..9b39a11f3 100644 --- a/package.json +++ b/package.json @@ -29,7 +29,7 @@ }, "scripts": { "setup": "yarn install && husky install", - "app:dev": "npx concurrently -k -n 'API,APP,ALERTS-TASK,COMMON-UTILS' -c 'green.bold,blue.bold,yellow.bold,magenta' 'nx run @hyperdx/api:dev' 'nx run @hyperdx/app:dev' 'nx run @hyperdx/api:dev-task check-alerts' 'nx run @hyperdx/common-utils:dev'", + "app:dev": "npx concurrently -k -n 'API,JSON-OPAMP,APP,ALERTS-TASK,COMMON-UTILS' -c 'green.bold,blue.bold,yellow.bold,magenta' 'nx run @hyperdx/api:dev' 'nx run @hyperdx/api:dev:json' 'nx run @hyperdx/app:dev' 'nx run @hyperdx/api:dev-task check-alerts' 'nx run @hyperdx/common-utils:dev'", "app:dev:local": "npx concurrently -k -n 'APP,COMMON-UTILS' -c 'blue.bold,magenta' 'nx run @hyperdx/app:dev:local' 'nx run @hyperdx/common-utils:dev'", "app:lint": "nx run @hyperdx/app:ci:lint", "dev": "docker compose -f docker-compose.dev.yml up -d && yarn app:dev && docker compose -f docker-compose.dev.yml down", diff --git a/packages/api/package.json b/packages/api/package.json index 522c8b653..67bd556fd 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -77,6 +77,7 @@ "scripts": { "start": "node ./dist/index.js", "dev": "DOTENV_CONFIG_PATH=.env.development nodemon --signal SIGTERM -e ts,json --exec 'ts-node' --transpile-only -r tsconfig-paths/register -r dotenv-expand/config -r '@hyperdx/node-opentelemetry/build/src/tracing' ./src/index.ts", + "dev:json": "IS_JSON_OPAMP=true IS_OPAMP_ONLY=true OPAMP_PORT=4321 DOTENV_CONFIG_PATH=.env.development nodemon --signal SIGTERM -e ts,json --exec 'ts-node' --transpile-only -r tsconfig-paths/register -r dotenv-expand/config -r '@hyperdx/node-opentelemetry/build/src/tracing' ./src/index.ts", "dev-task": "DOTENV_CONFIG_PATH=.env.development nodemon --signal SIGTERM -e ts,json --exec 'ts-node' --transpile-only -r tsconfig-paths/register -r dotenv-expand/config -r '@hyperdx/node-opentelemetry/build/src/tracing' ./src/tasks/index.ts", "build": "tsc -p ./tsconfig.build.json && tsc-alias -p ./tsconfig.build.json && esbuild ../../node_modules/@hyperdx/node-opentelemetry/build/src/tracing --bundle --minify --platform=node --outfile=dist/tracing.js && esbuild ./build/src/index.js ./build/src/tasks/index.js --bundle --minify --platform=node --target=node22 --outdir=dist --alias:@='./src' && mkdir -p dist/opamp && cp -r src/opamp/proto dist/opamp/proto && rimraf ./build", "lint": "npx eslint --quiet . --ext .ts", diff --git a/packages/api/src/config.ts b/packages/api/src/config.ts index 408aedbaf..a66a320eb 100644 --- a/packages/api/src/config.ts +++ b/packages/api/src/config.ts @@ -30,6 +30,8 @@ export const MONGO_URI = env.MONGO_URI; export const OTEL_SERVICE_NAME = env.OTEL_SERVICE_NAME as string; export const PORT = Number.parseInt(env.PORT as string); export const OPAMP_PORT = Number.parseInt(env.OPAMP_PORT as string); +export const IS_OPAMP_ONLY = env.IS_OPAMP_ONLY === 'true'; +export const IS_JSON_OPAMP = env.IS_JSON_OPAMP === 'true'; export const USAGE_STATS_ENABLED = env.USAGE_STATS_ENABLED !== 'false'; export const RUN_SCHEDULED_TASKS_EXTERNALLY = env.RUN_SCHEDULED_TASKS_EXTERNALLY === 'true'; diff --git a/packages/api/src/opamp/controllers/opampController.ts b/packages/api/src/opamp/controllers/opampController.ts index d4ec9a106..bda7f32d2 100644 --- a/packages/api/src/opamp/controllers/opampController.ts +++ b/packages/api/src/opamp/controllers/opampController.ts @@ -94,6 +94,8 @@ type CollectorConfig = { password: string; ttl: string; timeout: string; + logs_table_name: string; + traces_table_name: string; retry_on_failure: { enabled: boolean; initial_interval: string; @@ -101,6 +103,13 @@ type CollectorConfig = { max_elapsed_time: string; }; }; + 'otlphttp/internal'?: { + endpoint: string; + headers: { + authorization: string; + compression: string; + }; + }; }; service: { extensions: string[]; @@ -124,6 +133,38 @@ export const buildOtelCollectorConfig = (teams: ITeam[]): CollectorConfig => { } } + let clickhouseExporterTables = { + logs_table_name: 'otel_logs', + traces_table_name: 'otel_traces', + }; + let otlpForward: string[] | undefined; + let otlpExporter: { + 'otlphttp/internal': NonNullable< + CollectorConfig['exporters'] + >['otlphttp/internal']; + } = { + 'otlphttp/internal': undefined, + }; + if (config.IS_DEV) { + if (config.IS_JSON_OPAMP) { + clickhouseExporterTables = { + logs_table_name: 'otel_logs_json', + traces_table_name: 'otel_traces_json', + }; + } else { + otlpForward = ['otlphttp/internal']; + otlpExporter = { + 'otlphttp/internal': { + endpoint: 'http://host.docker.internal:14318', + headers: { + authorization: apiKeys.length > 0 ? apiKeys[0] : '', + compression: 'gzip', + }, + }, + }; + } + } + const collectorAuthenticationEnforced = teams[0]?.collectorAuthenticationEnforced; @@ -217,7 +258,9 @@ export const buildOtelCollectorConfig = (teams: ITeam[]): CollectorConfig => { max_interval: '30s', max_elapsed_time: '300s', }, + ...clickhouseExporterTables, }, + ...(otlpExporter ? otlpExporter : {}), }, service: { extensions: [], @@ -225,7 +268,7 @@ export const buildOtelCollectorConfig = (teams: ITeam[]): CollectorConfig => { traces: { receivers: ['nop'], processors: ['memory_limiter', 'batch'], - exporters: ['clickhouse'], + exporters: ['clickhouse', ...(otlpForward ? otlpForward : [])], }, metrics: { // TODO: prometheus needs to be authenticated @@ -236,7 +279,7 @@ export const buildOtelCollectorConfig = (teams: ITeam[]): CollectorConfig => { 'logs/in': { // TODO: fluentforward needs to be authenticated receivers: ['fluentforward'], - exporters: ['routing/logs'], + exporters: ['routing/logs', ...(otlpForward ? otlpForward : [])], }, 'logs/out-default': { receivers: ['routing/logs'], diff --git a/packages/api/src/server.ts b/packages/api/src/server.ts index e60779ad6..1de8cf188 100644 --- a/packages/api/src/server.ts +++ b/packages/api/src/server.ts @@ -42,28 +42,33 @@ export default class Server { } async start() { - this.appServer = this.createAppServer(); - this.appServer.keepAliveTimeout = 61000; // Ensure all inactive connections are terminated by the ALB, by setting this a few seconds higher than the ALB idle timeout - this.appServer.headersTimeout = 62000; // Ensure the headersTimeout is set higher than the keepAliveTimeout due to this nodejs regression bug: https://github.com/nodejs/node/issues/27363 + const runningServers: http.Server[] = []; + if (config.IS_DEV && !config.IS_OPAMP_ONLY) { + this.appServer = this.createAppServer(); + this.appServer.keepAliveTimeout = 61000; // Ensure all inactive connections are terminated by the ALB, by setting this a few seconds higher than the ALB idle timeout + this.appServer.headersTimeout = 62000; // Ensure the headersTimeout is set higher than the keepAliveTimeout due to this nodejs regression bug: https://github.com/nodejs/node/issues/27363 + + this.appServer.listen(config.PORT, () => { + logger.info( + `API Server listening on port ${config.PORT}, NODE_ENV=${process.env.NODE_ENV}`, + ); + }); + runningServers.push(this.appServer); + } this.opampServer = this.createOpampServer(); this.opampServer.keepAliveTimeout = 61000; this.opampServer.headersTimeout = 62000; - this.appServer.listen(config.PORT, () => { - logger.info( - `API Server listening on port ${config.PORT}, NODE_ENV=${process.env.NODE_ENV}`, - ); - }); - this.opampServer.listen(config.OPAMP_PORT, () => { logger.info( `OpAMP Server listening on port ${config.OPAMP_PORT}, NODE_ENV=${process.env.NODE_ENV}`, ); }); + runningServers.push(this.opampServer); if (this.shouldHandleGracefulShutdown) { - [this.appServer, this.opampServer].forEach(server => { + runningServers.forEach(server => { gracefulShutdown(server, { signals: 'SIGINT SIGTERM', timeout: 10000, // 10 secs From cea967222cb5218694acbbf26011c8922ca9e2bf Mon Sep 17 00:00:00 2001 From: Aaron Knudtson <87577305+knudtty@users.noreply.github.com> Date: Thu, 9 Oct 2025 11:56:10 -0400 Subject: [PATCH 2/3] fix startup condition --- packages/api/src/server.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/api/src/server.ts b/packages/api/src/server.ts index 1de8cf188..c3eba43e2 100644 --- a/packages/api/src/server.ts +++ b/packages/api/src/server.ts @@ -43,7 +43,7 @@ export default class Server { async start() { const runningServers: http.Server[] = []; - if (config.IS_DEV && !config.IS_OPAMP_ONLY) { + if (!config.IS_DEV || !config.IS_OPAMP_ONLY) { this.appServer = this.createAppServer(); this.appServer.keepAliveTimeout = 61000; // Ensure all inactive connections are terminated by the ALB, by setting this a few seconds higher than the ALB idle timeout this.appServer.headersTimeout = 62000; // Ensure the headersTimeout is set higher than the keepAliveTimeout due to this nodejs regression bug: https://github.com/nodejs/node/issues/27363 From f6dedd23937b8472d64cfd7390df1b7e4b68a6aa Mon Sep 17 00:00:00 2001 From: Aaron Knudtson <87577305+knudtty@users.noreply.github.com> Date: Wed, 15 Oct 2025 16:22:30 -0400 Subject: [PATCH 3/3] fix: restrict IS_OPAMP_ONLY to dev --- packages/api/src/server.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/api/src/server.ts b/packages/api/src/server.ts index b288e0a96..b55ecd3f4 100644 --- a/packages/api/src/server.ts +++ b/packages/api/src/server.ts @@ -46,7 +46,7 @@ export default class Server { async start() { const runningServers: http.Server[] = []; - if (!config.IS_DEV || !config.IS_OPAMP_ONLY) { + if (!(config.IS_DEV && !config.IS_OPAMP_ONLY)) { this.appServer = this.createAppServer(); this.appServer.keepAliveTimeout = 61000; // Ensure all inactive connections are terminated by the ALB, by setting this a few seconds higher than the ALB idle timeout this.appServer.headersTimeout = 62000; // Ensure the headersTimeout is set higher than the keepAliveTimeout due to this nodejs regression bug: https://github.com/nodejs/node/issues/27363