update benchmarks/README and refactor

ding-young · ding-young · commit 52ffcdc2655b · 2025-07-25T02:42:14.000Z
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml
@@ -56,8 +56,5 @@ test-utils = { path = "../test-utils/", version = "0.1.0" }
 tokio = { workspace = true, features = ["rt-multi-thread", "parking_lot"] }
 tokio-util = { version = "0.7.15" }
 
-[target.'cfg(target_os = "linux")'.dependencies]
-procfs = "0.17.0"
-
 [dev-dependencies]
 datafusion-proto = { workspace = true }
diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -283,6 +283,7 @@ This will produce output like:
 └──────────────┴──────────────┴──────────────┴───────────────┘
 ```
 
+
 # Benchmark Runner
 
 The `dfbench` program contains subcommands to run the various
@@ -321,6 +322,64 @@ FLAGS:
 ...
 ```
 
+# Profiling Memory Stats for each benchmark query
+The `mem_profile` program wraps benchmark execution to measure memory usage statistics, such as peak RSS. It runs each benchmark query in a separate subprocess, capturing the child process’s stdout to print structured output.
+
+Subcommands supported by mem_profile are the subset of those in `dfbench`.
+Currently supported benchmarks include: Clickbench, H2o, Imdb, SortTpch, Tpch
+
+Before running benchmarks, `mem_profile` automatically compiles the benchmark binary (`dfbench`) using `cargo build` with the same cargo profile (e.g., --release) as mem_profile itself. By prebuilding the binary and running each query in a separate process, we can ensure accurate memory statistics.
+
+Currently, `mem_profile` only supports `mimalloc` as the memory allocator, since it relies on `mimalloc`'s API to collect memory statistics.
+
+Because it runs the compiled binary directly from the target directory, make sure your working directory is the top-level datafusion/ directory, where the target/ is also located. 
+
+Example: 
+```shell
+datafusion$ cargo run --profile release-nonlto --bin mem_profile -- tpch --path benchmarks/data/tpch_sf1 --partitions 4 --format parquet
+```
+Example Output:
+```
+Query     Time (ms)     Peak RSS  Peak Commit  Page Faults
+--------------------------------------------------------------
+1            539.96     252.4 MB       2.0 GB            0
+2            444.21     221.7 MB       2.0 GB            0
+3            607.90     317.7 MB       2.0 GB            0
+4            440.49     503.7 MB       3.0 GB            0
+5            673.57     361.1 MB       3.0 GB            0
+6            297.92     241.9 MB       2.0 GB            0
+7            690.04     615.8 MB       3.0 GB            0
+8            722.96     378.6 MB       3.0 GB            0
+9            817.40     581.5 MB       3.0 GB            0
+10           704.04     406.8 MB       2.0 GB            0
+11           264.40     194.2 MB       2.0 GB            0
+12           478.89     192.2 MB       2.0 GB            0
+13           502.77     349.1 MB       3.0 GB            0
+14           397.61     309.5 MB       2.0 GB            0
+15           501.35     273.4 MB       2.0 GB            0
+16           341.21     222.5 MB       2.0 GB            0
+17           724.57     481.9 MB       2.0 GB            0
+18          1035.77     604.2 MB       3.0 GB            0
+19           639.52     278.1 MB       3.0 GB            0
+20           566.33     405.8 MB       2.0 GB            0
+21           910.40     387.4 MB       3.0 GB            0
+22           381.24     149.2 MB       3.0 GB            0
+```
+
+## Reported Metrics
+When running benchmarks, `mem_profile` collects several memory-related statistics using the mimalloc API:
+
+- Peak RSS (Resident Set Size): 
+The maximum amount of physical memory used by the process.
+This is a process-level metric collected via OS-specific mechanisms and is not mimalloc-specific.
+
+- Peak Commit:
+The peak amount of memory committed by the allocator (i.e., total virtual memory reserved).
+This is mimalloc-specific. It gives a more allocator-aware view of memory usage than RSS.
+
+- Page Faults:
+The number of page faults triggered during execution.
+This metric is obtained from the operating system and is not mimalloc-specific.
 # Writing a new benchmark
 
 ## Creating or downloading data outside of the benchmark
diff --git a/benchmarks/src/bin/mem_profile.rs b/benchmarks/src/bin/mem_profile.rs
@@ -32,7 +32,6 @@ use datafusion_benchmarks::{
 
 #[derive(Debug, StructOpt)]
 #[structopt(about = "benchmark command")]
-#[allow(dead_code)]
 enum Options {
     Clickbench(clickbench::RunOpt),
     H2o(h2o::RunOpt),
@@ -43,31 +42,49 @@ enum Options {
 
 #[tokio::main]
 pub async fn main() -> Result<()> {
-    // 1. parse args and check which benchmarks should be run
-    // let opt = MemProfileOpt::from_args();
+    // 1. Parse args and check which benchmarks should be run
     let profile = env::var("PROFILE").unwrap_or_else(|_| "release".to_string());
-
     let args = env::args().skip(1);
-    // let opt = Options::from_iter(args);
     let query_range = match Options::from_args() {
-        // TODO clickbench
-        // TODO run for specific query id
-        Options::Clickbench(_) => 0..=42,
+        Options::Clickbench(opt) => {
+            let entries = std::fs::read_dir(&opt.queries_path)?
+                .filter_map(Result::ok)
+                .filter(|e| {
+                    let path = e.path();
+                    path.extension().map(|ext| ext == "sql").unwrap_or(false)
+                })
+                .collect::<Vec<_>>();
+
+            let max_query_id = entries.len().saturating_sub(1);
+            match opt.query {
+                Some(query_id) => query_id..=query_id,
+                None => 0..=max_query_id,
+            }
+        }
         Options::H2o(opt) => {
             let queries = AllQueries::try_new(&opt.queries_path)?;
             match opt.query {
                 Some(query_id) => query_id..=query_id,
                 None => queries.min_query_id()..=queries.max_query_id(),
             }
         }
-        Options::Imdb(_) => imdb::IMDB_QUERY_START_ID..=imdb::IMDB_QUERY_END_ID,
-        Options::SortTpch(_) => {
-            sort_tpch::SORT_TPCH_QUERY_START_ID..=sort_tpch::SORT_TPCH_QUERY_END_ID
-        }
-        Options::Tpch(_) => tpch::TPCH_QUERY_START_ID..=tpch::TPCH_QUERY_END_ID,
+        Options::Imdb(opt) => match opt.query {
+            Some(query_id) => query_id..=query_id,
+            None => imdb::IMDB_QUERY_START_ID..=imdb::IMDB_QUERY_END_ID,
+        },
+        Options::SortTpch(opt) => match opt.query {
+            Some(query_id) => query_id..=query_id,
+            None => {
+                sort_tpch::SORT_TPCH_QUERY_START_ID..=sort_tpch::SORT_TPCH_QUERY_END_ID
+            }
+        },
+        Options::Tpch(opt) => match opt.query {
+            Some(query_id) => query_id..=query_id,
+            None => tpch::TPCH_QUERY_START_ID..=tpch::TPCH_QUERY_END_ID,
+        },
     };
 
-    // 2. prebuild test binary so that memory does not blow up due to build process
+    // 2. Prebuild dfbench binary so that memory does not blow up due to build process
     println!("Pre-building benchmark binary...");
     let status = Command::new("cargo")
         .args([
@@ -84,9 +101,8 @@ pub async fn main() -> Result<()> {
     assert!(status.success());
     println!("Benchmark binary built successfully.");
 
-    // 3. spawn a new process per each benchmark query and print summary
+    // 3. Create a new process per each benchmark query and print summary
     let mut dfbench_args: Vec<String> = args.collect();
-    println!("{dfbench_args:?}");
     run_benchmark_as_child_process(&profile, query_range, &mut dfbench_args)?;
 
     Ok(())
@@ -104,9 +120,17 @@ fn run_benchmark_as_child_process(
 
     let command = format!("target/{profile}/dfbench");
     args.insert(0, command);
-    args.push("--query".to_string());
-
     let mut results = vec![];
+
+    // Run Single Query (args already contain --query num)
+    if args.contains(&"--query".to_string()) {
+        let _ = run_query(args, &mut results);
+        print_summary_table(&results);
+        return Ok(());
+    }
+
+    // Run All Queries
+    args.push("--query".to_string());
     for query_str in query_strings {
         args.push(query_str);
         let _ = run_query(args, &mut results);
@@ -130,18 +154,19 @@ fn run_query(args: &[String], results: &mut Vec<QueryResult>) -> Result<()> {
     let stdout = child.stdout.take().unwrap();
     let reader = BufReader::new(stdout);
 
-    // buffer stdout
+    // Buffer child's stdout
     let lines: Result<Vec<String>, std::io::Error> =
         reader.lines().collect::<Result<_, _>>();
 
     child
         .wait()
         .expect("Benchmark process exited with an error");
 
-    // parse after child process terminates
+    // Parse after child process terminates
     let lines = lines?;
     let mut iter = lines.iter().peekable();
 
+    // Look for lines that contain execution time / memory stats
     while let Some(line) = iter.next() {
         if let Some((query, duration_ms)) = parse_query_time(line) {
             if let Some(next_line) = iter.peek() {
@@ -201,7 +226,7 @@ fn print_summary_table(results: &[QueryResult]) {
         "\n{:<8} {:>10} {:>12} {:>12} {:>12}",
         "Query", "Time (ms)", "Peak RSS", "Peak Commit", "Page Faults"
     );
-    println!("{}", "-".repeat(68));
+    println!("{}", "-".repeat(62));
 
     for r in results {
         println!(
diff --git a/benchmarks/src/clickbench.rs b/benchmarks/src/clickbench.rs
@@ -42,7 +42,7 @@ use structopt::StructOpt;
 pub struct RunOpt {
     /// Query number (between 0 and 42). If not specified, runs all queries
     #[structopt(short, long)]
-    query: Option<usize>,
+    pub query: Option<usize>,
 
     /// Common options
     #[structopt(flatten)]
@@ -65,7 +65,7 @@ pub struct RunOpt {
         long = "queries-path",
         default_value = "benchmarks/queries/clickbench/queries"
     )]
-    queries_path: PathBuf,
+    pub queries_path: PathBuf,
 
     /// If present, write results json here
     #[structopt(parse(from_os_str), short = "o", long = "output")]
diff --git a/benchmarks/src/imdb/run.rs b/benchmarks/src/imdb/run.rs
@@ -62,7 +62,7 @@ type BoolDefaultTrue = bool;
 pub struct RunOpt {
     /// Query number. If not specified, runs all queries
     #[structopt(short, long)]
-    query: Option<usize>,
+    pub query: Option<usize>,
 
     /// Common options
     #[structopt(flatten)]
diff --git a/benchmarks/src/sort_tpch.rs b/benchmarks/src/sort_tpch.rs
@@ -50,7 +50,7 @@ pub struct RunOpt {
 
     /// Sort query number. If not specified, runs all queries
     #[structopt(short, long)]
-    query: Option<usize>,
+    pub query: Option<usize>,
 
     /// Path to data files (lineitem). Only parquet format is supported
     #[structopt(parse(from_os_str), required = true, short = "p", long = "path")]
diff --git a/benchmarks/src/tpch/run.rs b/benchmarks/src/tpch/run.rs
@@ -61,7 +61,7 @@ type BoolDefaultTrue = bool;
 pub struct RunOpt {
     /// Query number. If not specified, runs all queries
     #[structopt(short, long)]
-    query: Option<usize>,
+    pub query: Option<usize>,
 
     /// Common options
     #[structopt(flatten)]