Skip to content

Commit 8190ee8

Browse files
authored
perf: Improve getKeyValues query performance for JSON keys (#1284)
Closes HDX-2623 # Summary This change improves the performance of `getKeyValues` when getting values of a JSON key. Generally, columns that are not referenced outside of a CTE will be pruned by the query planner. For JSON however, if the outer select references one field in a JSON column, then the inner select will read (it seems) the entire JSON object. This PR also adds integration tests for `getKeyValues` to ensure that the function generates queries that work as expected in ClickHouse. ## Performance impact (on single JSON Dashboard Filter) - Original: 15.03s <img width="584" height="71" alt="Screenshot 2025-10-21 at 3 28 07 PM" src="https://github.com/user-attachments/assets/184de198-cee1-4b1d-beed-ec4465d3e248" /> - Optimized: 0.443s <img width="590" height="61" alt="Screenshot 2025-10-21 at 3 25 47 PM" src="https://github.com/user-attachments/assets/690d0ef0-15b8-47c5-9a7e-8b8f6b8f5e92" />
1 parent 93edb6f commit 8190ee8

File tree

10 files changed

+255
-89
lines changed

10 files changed

+255
-89
lines changed

.changeset/sweet-vans-pump.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@hyperdx/common-utils": patch
3+
---
4+
5+
perf: Improve getKeyValues query performance for JSON keys

Makefile

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,16 @@ dev-int:
4545
npx nx run @hyperdx/api:dev:int $(FILE)
4646
docker compose -p int -f ./docker-compose.ci.yml down
4747

48+
.PHONY: dev-int-common-utils
49+
dev-int-common-utils:
50+
docker compose -p int -f ./docker-compose.ci.yml up -d
51+
npx nx run @hyperdx/common-utils:dev:int $(FILE)
52+
docker compose -p int -f ./docker-compose.ci.yml down
53+
4854
.PHONY: ci-int
4955
ci-int:
5056
docker compose -p int -f ./docker-compose.ci.yml up -d
51-
npx nx run @hyperdx/api:ci:int
57+
npx nx run-many -t ci:int --parallel=false
5258
docker compose -p int -f ./docker-compose.ci.yml down
5359

5460
.PHONY: dev-unit

packages/common-utils/.env.test

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
CLICKHOUSE_HOST=http://localhost:8123
2+
CLICKHOUSE_PASSWORD=
3+
CLICKHOUSE_USER=default
4+
NODE_ENV=test

packages/common-utils/jest.config.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ module.exports = {
55
verbose: true,
66
rootDir: './src',
77
testMatch: ['**/__tests__/*.test.ts?(x)'],
8+
testPathIgnorePatterns: ['.*\\.int\\.test\\.ts$'],
89
testTimeout: 30000,
910
moduleNameMapper: {
1011
'@/(.*)$': '<rootDir>/$1',
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
/** @type {import('ts-jest/dist/types').InitialOptionsTsJest} */
2+
module.exports = {
3+
setupFiles: ['dotenv/config'],
4+
preset: 'ts-jest',
5+
testEnvironment: 'node',
6+
verbose: true,
7+
rootDir: './src',
8+
testMatch: ['**/__tests__/*.int.test.ts?(x)'],
9+
testTimeout: 30000,
10+
moduleNameMapper: {
11+
'@/(.*)$': '<rootDir>/$1',
12+
},
13+
};

packages/common-utils/package.json

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
"@types/sqlstring": "^2.3.0",
3737
"@types/supertest": "^2.0.12",
3838
"@types/uuid": "^8.3.4",
39+
"dotenv": "^17.2.3",
3940
"jest": "^28.1.1",
4041
"nodemon": "^2.0.20",
4142
"rimraf": "^4.4.1",
@@ -56,6 +57,8 @@
5657
"lint:fix": "npx eslint . --ext .ts --fix",
5758
"ci:lint": "yarn lint && yarn tsc --noEmit",
5859
"ci:unit": "jest --runInBand --ci --forceExit --coverage",
59-
"dev:unit": "jest --watchAll --runInBand --detectOpenHandles"
60+
"dev:unit": "jest --watchAll --runInBand --detectOpenHandles",
61+
"ci:int": "DOTENV_CONFIG_PATH=.env.test jest --config jest.int.config.js --runInBand --ci --forceExit --coverage",
62+
"dev:int": "DOTENV_CONFIG_PATH=.env.test jest --config jest.int.config.js --watchAll --runInBand --detectOpenHandles"
6063
}
6164
}
Lines changed: 193 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,193 @@
1+
import { createClient } from '@clickhouse/client';
2+
import { ClickHouseClient } from '@clickhouse/client-common';
3+
4+
import { ClickhouseClient as HdxClickhouseClient } from '@/clickhouse/node';
5+
import { Metadata, MetadataCache } from '@/metadata';
6+
import { ChartConfigWithDateRange } from '@/types';
7+
8+
describe('Metadata Integration Tests', () => {
9+
let client: ClickHouseClient;
10+
let hdxClient: HdxClickhouseClient;
11+
12+
beforeAll(() => {
13+
const host = process.env.CLICKHOUSE_HOST || 'http://localhost:8123';
14+
const username = process.env.CLICKHOUSE_USER || 'default';
15+
const password = process.env.CLICKHOUSE_PASSWORD || '';
16+
17+
client = createClient({
18+
url: host,
19+
username,
20+
password,
21+
});
22+
23+
hdxClient = new HdxClickhouseClient({
24+
host,
25+
username,
26+
password,
27+
});
28+
});
29+
30+
describe('getKeyValues', () => {
31+
let metadata: Metadata;
32+
const chartConfig: ChartConfigWithDateRange = {
33+
connection: 'test_connection',
34+
from: {
35+
databaseName: 'default',
36+
tableName: 'test_table',
37+
},
38+
dateRange: [new Date('2023-01-01'), new Date('2025-01-01')],
39+
select: 'col1, col2',
40+
timestampValueExpression: 'Timestamp',
41+
where: '',
42+
};
43+
44+
beforeAll(async () => {
45+
await client.command({
46+
query: `CREATE OR REPLACE TABLE default.test_table (
47+
Timestamp DateTime64(9) CODEC(Delta(8), ZSTD(1)),
48+
SeverityText LowCardinality(String) CODEC(ZSTD(1)),
49+
TraceId String,
50+
LogAttributes JSON CODEC(ZSTD(1)),
51+
ResourceAttributes Map(LowCardinality(String), String) CODEC(ZSTD(1)),
52+
\`__hdx_materialized_k8s.pod.name\` String MATERIALIZED ResourceAttributes['k8s.pod.name'] CODEC(ZSTD(1)),
53+
)
54+
ENGINE = MergeTree()
55+
ORDER BY (Timestamp)
56+
`,
57+
});
58+
59+
await client.command({
60+
query: `INSERT INTO default.test_table (Timestamp, SeverityText, TraceId, ResourceAttributes, LogAttributes) VALUES
61+
('2023-06-01 12:00:00', 'info', '1o2udn120d8n', { 'k8s.pod.name': 'pod1', 'env': 'prod' },'{"action":"ping"}'),
62+
('2024-06-01 12:00:00', 'error', '67890-09098', { 'k8s.pod.name': 'pod2', 'env': 'prod' },'{}'),
63+
('2024-06-01 12:00:00', 'info', '11h9238re1h92', { 'env': 'staging' },'{"user":"john"}'),
64+
('2024-06-01 12:00:00', 'warning', '1o2udn120d8n', { 'k8s.pod.name': 'pod1', 'env': 'prod' }, '{"user":"jack","action":"login"}'),
65+
('2024-06-01 12:00:00', '', '1o2udn120d8n', { 'env': 'prod' }, '{"user":"jane","action":"login"}')
66+
`,
67+
});
68+
});
69+
70+
beforeEach(async () => {
71+
metadata = new Metadata(hdxClient, new MetadataCache());
72+
});
73+
74+
afterAll(async () => {
75+
await client.command({
76+
query: 'DROP TABLE IF EXISTS default.test_table',
77+
});
78+
79+
await client.close();
80+
});
81+
82+
describe.each([true, false])('with disableRowLimit=%s', disableRowLimit => {
83+
it('should return key-value pairs for a given metadata key', async () => {
84+
const resultSeverityText = await metadata.getKeyValues({
85+
chartConfig,
86+
keys: ['SeverityText'],
87+
disableRowLimit,
88+
});
89+
90+
expect(resultSeverityText).toHaveLength(1);
91+
expect(resultSeverityText[0].key).toBe('SeverityText');
92+
expect(resultSeverityText[0].value).toHaveLength(3);
93+
expect(resultSeverityText[0].value).toEqual(
94+
expect.arrayContaining(['info', 'error', 'warning']),
95+
);
96+
97+
const resultTraceId = await metadata.getKeyValues({
98+
chartConfig,
99+
keys: ['TraceId'],
100+
disableRowLimit,
101+
});
102+
103+
expect(resultTraceId).toHaveLength(1);
104+
expect(resultTraceId[0].key).toBe('TraceId');
105+
expect(resultTraceId[0].value).toHaveLength(3);
106+
expect(resultTraceId[0].value).toEqual(
107+
expect.arrayContaining([
108+
'1o2udn120d8n',
109+
'67890-09098',
110+
'11h9238re1h92',
111+
]),
112+
);
113+
114+
const resultBoth = await metadata.getKeyValues({
115+
chartConfig,
116+
keys: ['TraceId', 'SeverityText'],
117+
disableRowLimit,
118+
});
119+
120+
expect(resultBoth).toEqual([
121+
{
122+
key: 'TraceId',
123+
value: expect.arrayContaining([
124+
'1o2udn120d8n',
125+
'67890-09098',
126+
'11h9238re1h92',
127+
]),
128+
},
129+
{
130+
key: 'SeverityText',
131+
value: expect.arrayContaining(['info', 'error', 'warning']),
132+
},
133+
]);
134+
});
135+
136+
it('should handle materialized columns correctly', async () => {
137+
const resultPodName = await metadata.getKeyValues({
138+
chartConfig,
139+
keys: ['__hdx_materialized_k8s.pod.name'],
140+
disableRowLimit,
141+
});
142+
143+
expect(resultPodName).toHaveLength(1);
144+
expect(resultPodName[0].key).toBe('__hdx_materialized_k8s.pod.name');
145+
expect(resultPodName[0].value).toHaveLength(2);
146+
expect(resultPodName[0].value).toEqual(
147+
expect.arrayContaining(['pod1', 'pod2']),
148+
);
149+
});
150+
151+
it('should handle JSON columns correctly', async () => {
152+
const resultLogAttributes = await metadata.getKeyValues({
153+
chartConfig,
154+
keys: ['LogAttributes.user'],
155+
disableRowLimit,
156+
});
157+
158+
expect(resultLogAttributes).toHaveLength(1);
159+
expect(resultLogAttributes[0].key).toBe('LogAttributes.user');
160+
expect(resultLogAttributes[0].value).toHaveLength(3);
161+
expect(resultLogAttributes[0].value).toEqual(
162+
expect.arrayContaining(['john', 'jack', 'jane']),
163+
);
164+
});
165+
166+
it('should return an empty list when no keys are provided', async () => {
167+
const resultEmpty = await metadata.getKeyValues({
168+
chartConfig,
169+
keys: [],
170+
});
171+
172+
expect(resultEmpty).toEqual([]);
173+
});
174+
175+
it('should correctly limit the number of returned values', async () => {
176+
const resultLimited = await metadata.getKeyValues({
177+
chartConfig,
178+
keys: ['SeverityText'],
179+
limit: 2,
180+
});
181+
182+
expect(resultLimited).toHaveLength(1);
183+
expect(resultLimited[0].key).toBe('SeverityText');
184+
expect(resultLimited[0].value).toHaveLength(2);
185+
expect(
186+
resultLimited[0].value.every(v =>
187+
['info', 'error', 'warning'].includes(v),
188+
),
189+
).toBeTruthy();
190+
});
191+
});
192+
});
193+
});

packages/common-utils/src/__tests__/metadata.test.ts

Lines changed: 5 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -251,22 +251,6 @@ describe('Metadata', () => {
251251
],
252252
}),
253253
});
254-
255-
// Mock getColumns to return columns including materialized ones
256-
mockCache.getOrFetch.mockImplementation((key, queryFn) => {
257-
if (key.includes('.columns')) {
258-
return Promise.resolve([
259-
{ name: 'regular_column', type: 'String', default_type: '' },
260-
{
261-
name: 'materialized_column',
262-
type: 'String',
263-
default_type: 'MATERIALIZED',
264-
},
265-
{ name: 'default_column', type: 'String', default_type: 'DEFAULT' },
266-
]);
267-
}
268-
return queryFn();
269-
});
270254
});
271255

272256
it('should apply row limit when disableRowLimit is false', async () => {
@@ -355,71 +339,20 @@ describe('Metadata', () => {
355339
expect(result).toEqual([{ key: 'column1', value: ['value1', 'value2'] }]);
356340
});
357341

358-
it('should include materialized fields when selecting all columns', async () => {
359-
const renderChartConfigSpy = jest.spyOn(
360-
renderChartConfigModule,
361-
'renderChartConfig',
362-
);
363-
364-
await metadata.getKeyValues({
365-
chartConfig: mockChartConfig,
366-
keys: ['column1'],
367-
limit: 10,
368-
});
369-
370-
// Verify that renderChartConfig was called with the expanded select list
371-
// that includes all columns by name (including materialized ones)
372-
expect(renderChartConfigSpy).toHaveBeenCalledWith(
373-
expect.objectContaining({
374-
with: [
375-
expect.objectContaining({
376-
name: 'sampledData',
377-
chartConfig: expect.objectContaining({
378-
// Should expand to all column names instead of using '*'
379-
select:
380-
'`regular_column`, `materialized_column`, `default_column`',
381-
}),
382-
}),
383-
],
384-
}),
385-
metadata,
386-
);
387-
});
388-
389-
it('should fallback to * when no columns are found', async () => {
390-
// Mock getColumns to return empty array
391-
mockCache.getOrFetch.mockImplementation((key, queryFn) => {
392-
if (key.includes('.columns')) {
393-
return Promise.resolve([]);
394-
}
395-
return queryFn();
396-
});
397-
342+
it('should return an empty list when no keys are provided', async () => {
398343
const renderChartConfigSpy = jest.spyOn(
399344
renderChartConfigModule,
400345
'renderChartConfig',
401346
);
402347

403-
await metadata.getKeyValues({
348+
const results = await metadata.getKeyValues({
404349
chartConfig: mockChartConfig,
405-
keys: ['column1'],
350+
keys: [],
406351
limit: 10,
407352
});
408353

409-
// Should fallback to '*' when no columns are found
410-
expect(renderChartConfigSpy).toHaveBeenCalledWith(
411-
expect.objectContaining({
412-
with: [
413-
expect.objectContaining({
414-
name: 'sampledData',
415-
chartConfig: expect.objectContaining({
416-
select: '*',
417-
}),
418-
}),
419-
],
420-
}),
421-
metadata,
422-
);
354+
expect(results).toEqual([]);
355+
expect(renderChartConfigSpy).not.toHaveBeenCalled();
423356
});
424357
});
425358

0 commit comments

Comments
 (0)