Skip to content

Commit 2cc08b0

Browse files
authored
generator: allow outputing main_host once per interval or weekly (#204)
* generator: only output main_host once per interval prevents having to parse 31M host entries when generating 1M * generator: allow setting main_host output frequency MAIN_HOST_FREQUENCY=1 - default, output every day MAIN_HOST_FREQUENCY=7 - output every 7 days MAIN_HOST_FREQUENCY=0 - output once, at the end of the interval and update run-perf-gen to use 1, 7, 0 for 10k, 100k and 1M respectively
1 parent 0c851b4 commit 2cc08b0

File tree

2 files changed

+21
-4
lines changed

2 files changed

+21
-4
lines changed

tools/perf/generator.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,7 @@ def parse_env(self):
252252
self.main_host = (
253253
int(os.getenv('MAIN_HOST_SIZE', '10000')),
254254
int(os.getenv('MAIN_HOST_UNIQUE_SIZE', '2000')),
255+
int(os.getenv('MAIN_HOST_FREQUENCY', '1')), # every N days; or once when 0
255256
)
256257
self.main_indirectmanagednodeaudit = (
257258
int(os.getenv('MAIN_INDIRECT_SIZE', '10000')),
@@ -287,6 +288,7 @@ def parse_args(self):
287288
MAIN_JOBHOSTSUMMARY_UNIQUE_SIZE (default: 2000)
288289
MAIN_HOST_SIZE (default: 10000)
289290
MAIN_HOST_UNIQUE_SIZE (default: 2000)
291+
MAIN_HOST_FREQUENCY (default: 1)
290292
MAIN_INDIRECT_SIZE (default: 10000)
291293
MAIN_INDIRECT_UNIQUE_SIZE (default: 2000)
292294
MAIN_JOBEVENT_SIZE (default: 10000)
@@ -400,11 +402,25 @@ def save_tarballs(self, table):
400402
return
401403

402404
if table == 'main_host':
403-
# main_host - only generate csvs once, not filtered by since/until; use for each daily tarball
405+
# main_host - only generate csvs once, not filtered by since/until
404406
with tempfile.TemporaryDirectory(prefix=f'metrics-generator-save-{table}') as temp_dir:
405407
file_list = self.save_csvs(table, temp_dir, df)
406408

409+
# output every N days (MAIN_HOST_FREQUENCY=1), or at the end of the period if 0
410+
frequency = self.main_host[2]
411+
idx = 0
407412
for since, until in daily_slicing(since=self.output_from, until=self.output_to):
413+
idx += 1
414+
if not frequency:
415+
continue
416+
if idx % frequency:
417+
continue
418+
419+
logger.info(f'{table} - {since}-{until}')
420+
for file in file_list:
421+
self.tarify(table, since, until, file)
422+
423+
if not frequency:
408424
logger.info(f'{table} - {since}-{until}')
409425
for file in file_list:
410426
self.tarify(table, since, until, file)

tools/perf/run-perf-gen

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ run_generator() {
1919
export MAIN_JOBHOSTSUMMARY_UNIQUE_SIZE="$2"
2020
export MAIN_HOST_SIZE="$1"
2121
export MAIN_HOST_UNIQUE_SIZE="$2"
22+
export MAIN_HOST_FREQUENCY="$5"
2223
export MAIN_INDIRECT_SIZE="$1"
2324
export MAIN_INDIRECT_UNIQUE_SIZE="$2"
2425
export MAIN_JOBEVENT_SIZE="$1"
@@ -35,13 +36,13 @@ run_generator() {
3536

3637
## generator
3738
log "gen 10,000"
38-
run_generator 10000 2000 2026-01-01 2026-02-01
39+
run_generator 10000 2000 2026-01-01 2026-02-01 1
3940

4041
log "gen 100,000"
41-
run_generator 100000 20000 2026-02-01 2026-03-01
42+
run_generator 100000 20000 2026-02-01 2026-03-01 7
4243

4344
log "gen 1,000,000"
44-
run_generator 1000000 200000 2026-03-01 2026-04-01
45+
run_generator 1000000 200000 2026-03-01 2026-04-01 0
4546

4647
## checkup
4748
set -x

0 commit comments

Comments
 (0)