Skip to content

Commit 52ffcdc

Browse files
committed
update benchmarks/README and refactor
1 parent e15b2b0 commit 52ffcdc

File tree

8 files changed

+110
-55
lines changed

8 files changed

+110
-55
lines changed

Cargo.lock

Lines changed: 0 additions & 26 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

benchmarks/Cargo.toml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,5 @@ test-utils = { path = "../test-utils/", version = "0.1.0" }
5656
tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot"] }
5757
tokio-util = { version = "0.7.15" }
5858

59-
[target.'cfg(target_os = "linux")'.dependencies]
60-
procfs = "0.17.0"
61-
6259
[dev-dependencies]
6360
datafusion-proto = { workspace = true }

benchmarks/README.md

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,6 +283,7 @@ This will produce output like:
283283
└──────────────┴──────────────┴──────────────┴───────────────┘
284284
```
285285

286+
286287
# Benchmark Runner
287288

288289
The `dfbench` program contains subcommands to run the various
@@ -321,6 +322,64 @@ FLAGS:
321322
...
322323
```
323324

325+
# Profiling Memory Stats for each benchmark query
326+
The `mem_profile` program wraps benchmark execution to measure memory usage statistics, such as peak RSS. It runs each benchmark query in a separate subprocess, capturing the child process’s stdout to print structured output.
327+
328+
Subcommands supported by mem_profile are the subset of those in `dfbench`.
329+
Currently supported benchmarks include: Clickbench, H2o, Imdb, SortTpch, Tpch
330+
331+
Before running benchmarks, `mem_profile` automatically compiles the benchmark binary (`dfbench`) using `cargo build` with the same cargo profile (e.g., --release) as mem_profile itself. By prebuilding the binary and running each query in a separate process, we can ensure accurate memory statistics.
332+
333+
Currently, `mem_profile` only supports `mimalloc` as the memory allocator, since it relies on `mimalloc`'s API to collect memory statistics.
334+
335+
Because it runs the compiled binary directly from the target directory, make sure your working directory is the top-level datafusion/ directory, where the target/ is also located.
336+
337+
Example:
338+
```shell
339+
datafusion$ cargo run --profile release-nonlto --bin mem_profile -- tpch --path benchmarks/data/tpch_sf1 --partitions 4 --format parquet
340+
```
341+
Example Output:
342+
```
343+
Query Time (ms) Peak RSS Peak Commit Page Faults
344+
--------------------------------------------------------------
345+
1 539.96 252.4 MB 2.0 GB 0
346+
2 444.21 221.7 MB 2.0 GB 0
347+
3 607.90 317.7 MB 2.0 GB 0
348+
4 440.49 503.7 MB 3.0 GB 0
349+
5 673.57 361.1 MB 3.0 GB 0
350+
6 297.92 241.9 MB 2.0 GB 0
351+
7 690.04 615.8 MB 3.0 GB 0
352+
8 722.96 378.6 MB 3.0 GB 0
353+
9 817.40 581.5 MB 3.0 GB 0
354+
10 704.04 406.8 MB 2.0 GB 0
355+
11 264.40 194.2 MB 2.0 GB 0
356+
12 478.89 192.2 MB 2.0 GB 0
357+
13 502.77 349.1 MB 3.0 GB 0
358+
14 397.61 309.5 MB 2.0 GB 0
359+
15 501.35 273.4 MB 2.0 GB 0
360+
16 341.21 222.5 MB 2.0 GB 0
361+
17 724.57 481.9 MB 2.0 GB 0
362+
18 1035.77 604.2 MB 3.0 GB 0
363+
19 639.52 278.1 MB 3.0 GB 0
364+
20 566.33 405.8 MB 2.0 GB 0
365+
21 910.40 387.4 MB 3.0 GB 0
366+
22 381.24 149.2 MB 3.0 GB 0
367+
```
368+
369+
## Reported Metrics
370+
When running benchmarks, `mem_profile` collects several memory-related statistics using the mimalloc API:
371+
372+
- Peak RSS (Resident Set Size):
373+
The maximum amount of physical memory used by the process.
374+
This is a process-level metric collected via OS-specific mechanisms and is not mimalloc-specific.
375+
376+
- Peak Commit:
377+
The peak amount of memory committed by the allocator (i.e., total virtual memory reserved).
378+
This is mimalloc-specific. It gives a more allocator-aware view of memory usage than RSS.
379+
380+
- Page Faults:
381+
The number of page faults triggered during execution.
382+
This metric is obtained from the operating system and is not mimalloc-specific.
324383
# Writing a new benchmark
325384

326385
## Creating or downloading data outside of the benchmark

benchmarks/src/bin/mem_profile.rs

Lines changed: 46 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ use datafusion_benchmarks::{
3232

3333
#[derive(Debug, StructOpt)]
3434
#[structopt(about = "benchmark command")]
35-
#[allow(dead_code)]
3635
enum Options {
3736
Clickbench(clickbench::RunOpt),
3837
H2o(h2o::RunOpt),
@@ -43,31 +42,49 @@ enum Options {
4342

4443
#[tokio::main]
4544
pub async fn main() -> Result<()> {
46-
// 1. parse args and check which benchmarks should be run
47-
// let opt = MemProfileOpt::from_args();
45+
// 1. Parse args and check which benchmarks should be run
4846
let profile = env::var("PROFILE").unwrap_or_else(|_| "release".to_string());
49-
5047
let args = env::args().skip(1);
51-
// let opt = Options::from_iter(args);
5248
let query_range = match Options::from_args() {
53-
// TODO clickbench
54-
// TODO run for specific query id
55-
Options::Clickbench(_) => 0..=42,
49+
Options::Clickbench(opt) => {
50+
let entries = std::fs::read_dir(&opt.queries_path)?
51+
.filter_map(Result::ok)
52+
.filter(|e| {
53+
let path = e.path();
54+
path.extension().map(|ext| ext == "sql").unwrap_or(false)
55+
})
56+
.collect::<Vec<_>>();
57+
58+
let max_query_id = entries.len().saturating_sub(1);
59+
match opt.query {
60+
Some(query_id) => query_id..=query_id,
61+
None => 0..=max_query_id,
62+
}
63+
}
5664
Options::H2o(opt) => {
5765
let queries = AllQueries::try_new(&opt.queries_path)?;
5866
match opt.query {
5967
Some(query_id) => query_id..=query_id,
6068
None => queries.min_query_id()..=queries.max_query_id(),
6169
}
6270
}
63-
Options::Imdb(_) => imdb::IMDB_QUERY_START_ID..=imdb::IMDB_QUERY_END_ID,
64-
Options::SortTpch(_) => {
65-
sort_tpch::SORT_TPCH_QUERY_START_ID..=sort_tpch::SORT_TPCH_QUERY_END_ID
66-
}
67-
Options::Tpch(_) => tpch::TPCH_QUERY_START_ID..=tpch::TPCH_QUERY_END_ID,
71+
Options::Imdb(opt) => match opt.query {
72+
Some(query_id) => query_id..=query_id,
73+
None => imdb::IMDB_QUERY_START_ID..=imdb::IMDB_QUERY_END_ID,
74+
},
75+
Options::SortTpch(opt) => match opt.query {
76+
Some(query_id) => query_id..=query_id,
77+
None => {
78+
sort_tpch::SORT_TPCH_QUERY_START_ID..=sort_tpch::SORT_TPCH_QUERY_END_ID
79+
}
80+
},
81+
Options::Tpch(opt) => match opt.query {
82+
Some(query_id) => query_id..=query_id,
83+
None => tpch::TPCH_QUERY_START_ID..=tpch::TPCH_QUERY_END_ID,
84+
},
6885
};
6986

70-
// 2. prebuild test binary so that memory does not blow up due to build process
87+
// 2. Prebuild dfbench binary so that memory does not blow up due to build process
7188
println!("Pre-building benchmark binary...");
7289
let status = Command::new("cargo")
7390
.args([
@@ -84,9 +101,8 @@ pub async fn main() -> Result<()> {
84101
assert!(status.success());
85102
println!("Benchmark binary built successfully.");
86103

87-
// 3. spawn a new process per each benchmark query and print summary
104+
// 3. Create a new process per each benchmark query and print summary
88105
let mut dfbench_args: Vec<String> = args.collect();
89-
println!("{dfbench_args:?}");
90106
run_benchmark_as_child_process(&profile, query_range, &mut dfbench_args)?;
91107

92108
Ok(())
@@ -104,9 +120,17 @@ fn run_benchmark_as_child_process(
104120

105121
let command = format!("target/{profile}/dfbench");
106122
args.insert(0, command);
107-
args.push("--query".to_string());
108-
109123
let mut results = vec![];
124+
125+
// Run Single Query (args already contain --query num)
126+
if args.contains(&"--query".to_string()) {
127+
let _ = run_query(args, &mut results);
128+
print_summary_table(&results);
129+
return Ok(());
130+
}
131+
132+
// Run All Queries
133+
args.push("--query".to_string());
110134
for query_str in query_strings {
111135
args.push(query_str);
112136
let _ = run_query(args, &mut results);
@@ -130,18 +154,19 @@ fn run_query(args: &[String], results: &mut Vec<QueryResult>) -> Result<()> {
130154
let stdout = child.stdout.take().unwrap();
131155
let reader = BufReader::new(stdout);
132156

133-
// buffer stdout
157+
// Buffer child's stdout
134158
let lines: Result<Vec<String>, std::io::Error> =
135159
reader.lines().collect::<Result<_, _>>();
136160

137161
child
138162
.wait()
139163
.expect("Benchmark process exited with an error");
140164

141-
// parse after child process terminates
165+
// Parse after child process terminates
142166
let lines = lines?;
143167
let mut iter = lines.iter().peekable();
144168

169+
// Look for lines that contain execution time / memory stats
145170
while let Some(line) = iter.next() {
146171
if let Some((query, duration_ms)) = parse_query_time(line) {
147172
if let Some(next_line) = iter.peek() {
@@ -201,7 +226,7 @@ fn print_summary_table(results: &[QueryResult]) {
201226
"\n{:<8} {:>10} {:>12} {:>12} {:>12}",
202227
"Query", "Time (ms)", "Peak RSS", "Peak Commit", "Page Faults"
203228
);
204-
println!("{}", "-".repeat(68));
229+
println!("{}", "-".repeat(62));
205230

206231
for r in results {
207232
println!(

benchmarks/src/clickbench.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ use structopt::StructOpt;
4242
pub struct RunOpt {
4343
/// Query number (between 0 and 42). If not specified, runs all queries
4444
#[structopt(short, long)]
45-
query: Option<usize>,
45+
pub query: Option<usize>,
4646

4747
/// Common options
4848
#[structopt(flatten)]
@@ -65,7 +65,7 @@ pub struct RunOpt {
6565
long = "queries-path",
6666
default_value = "benchmarks/queries/clickbench/queries"
6767
)]
68-
queries_path: PathBuf,
68+
pub queries_path: PathBuf,
6969

7070
/// If present, write results json here
7171
#[structopt(parse(from_os_str), short = "o", long = "output")]

benchmarks/src/imdb/run.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ type BoolDefaultTrue = bool;
6262
pub struct RunOpt {
6363
/// Query number. If not specified, runs all queries
6464
#[structopt(short, long)]
65-
query: Option<usize>,
65+
pub query: Option<usize>,
6666

6767
/// Common options
6868
#[structopt(flatten)]

benchmarks/src/sort_tpch.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ pub struct RunOpt {
5050

5151
/// Sort query number. If not specified, runs all queries
5252
#[structopt(short, long)]
53-
query: Option<usize>,
53+
pub query: Option<usize>,
5454

5555
/// Path to data files (lineitem). Only parquet format is supported
5656
#[structopt(parse(from_os_str), required = true, short = "p", long = "path")]

benchmarks/src/tpch/run.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ type BoolDefaultTrue = bool;
6161
pub struct RunOpt {
6262
/// Query number. If not specified, runs all queries
6363
#[structopt(short, long)]
64-
query: Option<usize>,
64+
pub query: Option<usize>,
6565

6666
/// Common options
6767
#[structopt(flatten)]

0 commit comments

Comments
 (0)