|
| 1 | +use std::env; |
| 2 | +use std::fs; |
| 3 | + |
| 4 | +use criterion::{criterion_group, criterion_main, Criterion, Throughput}; |
| 5 | + |
| 6 | +use memchr::memmem; |
| 7 | +use stringzilla::StringZilla; |
| 8 | + |
| 9 | +fn configure_bench() -> Criterion { |
| 10 | + Criterion::default() |
| 11 | + .sample_size(1000) // Test this many needles. |
| 12 | + .warm_up_time(std::time::Duration::from_secs(10)) // Let the CPU frequencies settle. |
| 13 | + .measurement_time(std::time::Duration::from_secs(120)) // Actual measurement time. |
| 14 | +} |
| 15 | + |
| 16 | +fn bench_tfidf(c: &mut Criterion) { |
| 17 | + // Get the haystack path from the environment variable. |
| 18 | + let dataset_path = |
| 19 | + env::var("STRINGWARS_DATASET").expect("STRINGWARS_DATASET environment variable not set"); |
| 20 | + let haystack_content = fs::read_to_string(&dataset_path).expect("Could not read haystack"); |
| 21 | + |
| 22 | + // Tokenize the haystack content by white space. |
| 23 | + let needles: Vec<&str> = haystack_content.split_whitespace().collect(); |
| 24 | + if needles.is_empty() { |
| 25 | + panic!("No tokens found in the haystack."); |
| 26 | + } |
| 27 | + |
| 28 | + let haystack = haystack_content.as_bytes(); |
| 29 | + let haystack_length = haystack.len(); |
| 30 | + |
| 31 | + // Benchmarks for forward search |
| 32 | + let mut g = c.benchmark_group("search-forward"); |
| 33 | + g.throughput(Throughput::Bytes(haystack_length as u64)); |
| 34 | + perform_forward_benchmarks(&mut g, &needles, haystack); |
| 35 | + g.finish(); |
| 36 | + |
| 37 | + // Benchmarks for reverse search |
| 38 | + let mut g = c.benchmark_group("search-reverse"); |
| 39 | + g.throughput(Throughput::Bytes(haystack_length as u64)); |
| 40 | + perform_reverse_benchmarks(&mut g, &needles, haystack); |
| 41 | + g.finish(); |
| 42 | +} |
| 43 | + |
| 44 | +... |
| 45 | + |
| 46 | +criterion_group! { |
| 47 | + name = bench_tfidf_group; |
| 48 | + config = configure_bench(); |
| 49 | + targets = bench_tfidf |
| 50 | +} |
| 51 | +criterion_main!(bench_tfidf_group); |
0 commit comments