Skip to content

Commit e9cc593

Browse files
authored
Merge pull request #58 from github/aneubeck/bench
Add bench and speed things up
2 parents 7e50f35 + aef9a5d commit e9cc593

File tree

5 files changed

+147
-155
lines changed

5 files changed

+147
-155
lines changed

crates/bpe/benchmarks/performance.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ use bpe_benchmarks::*;
99
use criterion::{
1010
criterion_group, criterion_main, AxisScale, BenchmarkId, Criterion, PlotConfiguration,
1111
};
12-
use rand::{thread_rng, Rng};
12+
use rand::{rng, Rng};
1313

1414
fn counting_benchmark(c: &mut Criterion) {
1515
for (name, bpe, _, _) in TOKENIZERS.iter() {
@@ -22,7 +22,7 @@ fn counting_benchmark(c: &mut Criterion) {
2222
group.throughput(criterion::Throughput::Bytes(bytes as u64));
2323
group.bench_with_input(BenchmarkId::new("interval", bytes), &bytes, |b, bytes| {
2424
b.iter_batched(
25-
|| thread_rng().gen_range(0..input.len() - bytes),
25+
|| rng().random_range(0..input.len() - bytes),
2626
|start| fast.count(start..start + bytes),
2727
criterion::BatchSize::SmallInput,
2828
)
@@ -32,7 +32,7 @@ fn counting_benchmark(c: &mut Criterion) {
3232
&bytes,
3333
|b, bytes| {
3434
b.iter_batched(
35-
|| thread_rng().gen_range(0..input.len() - bytes),
35+
|| rng().random_range(0..input.len() - bytes),
3636
|start| bpe.bpe.count(&input.as_bytes()[start..start + bytes]),
3737
criterion::BatchSize::SmallInput,
3838
)

crates/string-offsets/Cargo.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,9 @@ wasm-bindgen = { version = "0.2", optional = true }
2222
[dev-dependencies]
2323
rand = "0.9"
2424
rand_chacha = "0.9"
25+
criterion = "0.5"
26+
27+
[[bench]]
28+
name = "performance"
29+
path = "benchmarks/performance.rs"
30+
harness = false
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion};
2+
use rand::{rng, Rng};
3+
use string_offsets::StringOffsets;
4+
5+
fn construction_benchmark(c: &mut Criterion) {
6+
let mut group = c.benchmark_group("construction");
7+
for size in [1000, 10000, 100000] {
8+
let mut rng = rng();
9+
// Generate random ascii input.
10+
let random_input: String = (0..size)
11+
.map(|_| rng.random_range(32u8..128) as char)
12+
.collect();
13+
group.throughput(criterion::Throughput::Bytes(random_input.len() as u64));
14+
group.bench_with_input(
15+
BenchmarkId::from_parameter(size),
16+
&random_input,
17+
|b, input| b.iter(|| black_box(StringOffsets::new(input))),
18+
);
19+
}
20+
group.finish();
21+
}
22+
23+
criterion_group!(
24+
name = benches;
25+
config = Criterion::default();
26+
targets = construction_benchmark
27+
);
28+
criterion_main!(benches);

crates/string-offsets/src/bitrank.rs

Lines changed: 61 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
type SubblockBits = u128;
77

88
// Static sizing of the various components of the data structure.
9-
const BITS_PER_BLOCK: usize = 16384;
109
const BITS_PER_SUB_BLOCK: usize = SubblockBits::BITS as usize;
11-
const SUB_BLOCKS_PER_BLOCK: usize = BITS_PER_BLOCK / BITS_PER_SUB_BLOCK;
10+
const SUB_BLOCKS_PER_BLOCK: usize = 64;
11+
const BITS_PER_BLOCK: usize = SUB_BLOCKS_PER_BLOCK * BITS_PER_SUB_BLOCK; // 8192 Bits = 1 kBytes
1212

1313
/// A container for a portion of the total bit vector and the associated indices.
1414
/// The bits within each chunk are stored from most significant bit (msb) to least significant bit (lsb).
@@ -44,20 +44,20 @@ impl Block {
4444
/// This panics if the bit was already set, because that indicates that the original positions
4545
/// list is invalid/had duplicates.
4646
fn set(&mut self, index: usize) {
47-
assert!(index < BITS_PER_BLOCK);
47+
debug_assert!(index < BITS_PER_BLOCK);
4848
let chunk_idx = index / BITS_PER_SUB_BLOCK;
4949
let bit_idx = index % BITS_PER_SUB_BLOCK;
50-
let mask = 1 << ((BITS_PER_SUB_BLOCK - 1) - bit_idx);
51-
assert_eq!(self.bits[chunk_idx] & mask, 0, "toggling bits off indicates that the original data was incorrect, most likely containing duplicate values.");
52-
self.bits[chunk_idx] ^= mask;
50+
let mask = 1 << bit_idx;
51+
debug_assert_eq!(self.bits[chunk_idx] & mask, 0, "toggling bits off indicates that the original data was incorrect, most likely containing duplicate values.");
52+
self.bits[chunk_idx] |= mask;
5353
}
5454

5555
/// The **total rank** of the block relative local index, and the index of the one
5656
/// bit that establishes that rank (aka "select") **if** it occurs within that same
5757
/// chunk, otherwise ['None']. The assumption is that if you would have to look back
5858
/// through previous chunks it would actually be cheaper to do a lookup in the original
5959
/// data structure that the bit vector was created from.
60-
fn rank_select(&self, local_idx: usize) -> (usize, Option<usize>) {
60+
fn rank(&self, local_idx: usize) -> usize {
6161
let mut rank = self.rank as usize;
6262
let sub_block = local_idx / BITS_PER_SUB_BLOCK;
6363
rank += self.sub_blocks[sub_block] as usize;
@@ -68,15 +68,9 @@ impl Block {
6868
let masked = if remainder == 0 {
6969
0
7070
} else {
71-
self.bits[last_chunk] >> (BITS_PER_SUB_BLOCK - remainder)
71+
self.bits[last_chunk] << (BITS_PER_SUB_BLOCK - remainder)
7272
};
73-
rank += masked.count_ones() as usize;
74-
let select = if masked == 0 {
75-
None
76-
} else {
77-
Some(local_idx - masked.trailing_zeros() as usize - 1)
78-
};
79-
(rank, select)
73+
rank + masked.count_ones() as usize
8074
}
8175

8276
fn total_rank(&self) -> usize {
@@ -107,62 +101,36 @@ pub struct BitRankBuilder {
107101
}
108102

109103
impl BitRankBuilder {
110-
/// Returns a new builder.
111-
#[cfg(test)]
112-
pub fn new() -> Self {
113-
Self::default()
114-
}
115-
116104
/// Returns a builder that can hold integers with values `0..cap`.
117105
pub fn with_capacity(cap: usize) -> Self {
106+
const ZERO_BLOCK: Block = Block {
107+
rank: 0,
108+
sub_blocks: [0; SUB_BLOCKS_PER_BLOCK],
109+
bits: [0; SUB_BLOCKS_PER_BLOCK],
110+
};
118111
Self {
119-
blocks: Vec::with_capacity(cap.div_ceil(BITS_PER_BLOCK)),
120-
}
121-
}
122-
123-
fn finish_last_block(&mut self) -> u64 {
124-
if let Some(block) = self.blocks.last_mut() {
125-
let mut local_rank = 0;
126-
for (i, chunk) in block.bits.iter().enumerate() {
127-
block.sub_blocks[i] = local_rank;
128-
local_rank += chunk.count_ones() as u16;
129-
}
130-
block.rank + local_rank as u64
131-
} else {
132-
0
112+
blocks: vec![ZERO_BLOCK; cap.div_ceil(BITS_PER_BLOCK)],
133113
}
134114
}
135115

136116
/// Adds a bit. Bits must be added in order of increasing `position`.
137117
pub fn push(&mut self, position: usize) {
138118
let block_id = position / BITS_PER_BLOCK;
139-
assert!(
140-
self.blocks.len() <= block_id + 1,
141-
"positions must be increasing!"
142-
);
143-
if block_id >= self.blocks.len() {
144-
let curr_rank = self.finish_last_block();
145-
while block_id >= self.blocks.len() {
146-
// Without this declared as a `const`, rustc 1.82 creates the Block value on the
147-
// stack first, then `memcpy`s it into `self.blocks`.
148-
const ZERO_BLOCK: Block = Block {
149-
rank: 0,
150-
sub_blocks: [0; SUB_BLOCKS_PER_BLOCK],
151-
bits: [0; SUB_BLOCKS_PER_BLOCK],
152-
};
153-
self.blocks.push(ZERO_BLOCK);
154-
self.blocks.last_mut().expect("just inserted").rank = curr_rank;
155-
}
156-
}
157-
self.blocks
158-
.last_mut()
159-
.expect("just ensured there are enough blocks")
160-
.set(position % BITS_PER_BLOCK);
119+
self.blocks[block_id].set(position % BITS_PER_BLOCK);
161120
}
162121

163122
/// Finishes the `BitRank` by writing the last block of data.
164123
pub fn finish(mut self) -> BitRank {
165-
self.finish_last_block();
124+
let mut total_rank = 0;
125+
for block in &mut self.blocks {
126+
block.rank = total_rank;
127+
let mut local_rank = 0;
128+
for (i, chunk) in block.bits.iter().enumerate() {
129+
block.sub_blocks[i] = local_rank;
130+
local_rank += chunk.count_ones() as u16;
131+
}
132+
total_rank += local_rank as u64
133+
}
166134
BitRank {
167135
blocks: self.blocks,
168136
}
@@ -181,7 +149,12 @@ impl BitRank {
181149
/// The (one) rank is defined as: `rank(i) = sum(b[j] for j in 0..i)`
182150
/// i.e. the number of elements less than `i`.
183151
pub fn rank(&self, idx: usize) -> usize {
184-
self.rank_select(idx).0
152+
let block_num = idx / BITS_PER_BLOCK;
153+
if block_num >= self.blocks.len() {
154+
self.max_rank() // fall back to 0 bits when the bitrank data structure is empty.
155+
} else {
156+
self.blocks[block_num].rank(idx % BITS_PER_BLOCK)
157+
}
185158
}
186159

187160
/// Returns the number of elements in the set.
@@ -191,25 +164,6 @@ impl BitRank {
191164
.map(|b| b.total_rank())
192165
.unwrap_or_default() // fall back to 0 when the bitrank data structure is empty.
193166
}
194-
195-
/// The rank at the specified index(exclusive) and the index of the one bit that
196-
/// establishes that rank (aka "select") **if** it occurs within that same chunk,
197-
/// otherwise ['None']. The assumption is that if you would have to look back
198-
/// through previous chunks it would actually be cheaper to do a lookup in the original
199-
/// data structure that the bit vector was created from.
200-
pub fn rank_select(&self, idx: usize) -> (usize, Option<usize>) {
201-
let block_num = idx / BITS_PER_BLOCK;
202-
// assert!(block_num < self.blocks.len(), "index out of bounds");
203-
if block_num >= self.blocks.len() {
204-
(
205-
self.max_rank(), // fall back to 0 when the bitrank data structure is empty.
206-
None,
207-
)
208-
} else {
209-
let (rank, b_idx) = self.blocks[block_num].rank_select(idx % BITS_PER_BLOCK);
210-
(rank, b_idx.map(|i| (block_num * BITS_PER_BLOCK) + i))
211-
}
212-
}
213167
}
214168

215169
#[cfg(test)]
@@ -222,8 +176,8 @@ mod tests {
222176

223177
/// Creates a `BitRank` containing the integers in `iter` (which should be strictly
224178
/// increasing).
225-
pub fn bitrank<I: IntoIterator<Item = usize>>(iter: I) -> BitRank {
226-
let mut builder = BitRankBuilder::new();
179+
pub fn bitrank<I: IntoIterator<Item = usize>>(capacity: usize, iter: I) -> BitRank {
180+
let mut builder = BitRankBuilder::with_capacity(capacity);
227181
for position in iter {
228182
builder.push(position);
229183
}
@@ -232,32 +186,32 @@ mod tests {
232186

233187
#[test]
234188
fn test_rank_zero() {
235-
let br = bitrank([0]);
189+
let br = bitrank(1, [0]);
236190
assert_eq!(br.rank(0), 0);
237191
assert_eq!(br.rank(1), 1);
238192
}
239193

240194
#[test]
241195
fn test_empty() {
242-
let br = bitrank([]);
196+
let br = bitrank(0, []);
243197
assert!(br.blocks.is_empty());
244198
}
245199

246200
#[test]
247201
fn test_index_out_of_bounds() {
248-
let br = bitrank([BITS_PER_BLOCK - 1]);
202+
let br = bitrank(BITS_PER_BLOCK, [BITS_PER_BLOCK - 1]);
249203
assert_eq!(br.rank(BITS_PER_BLOCK), 1);
250204
}
251205

252206
#[test]
253207
#[should_panic]
254208
fn test_duplicate_position() {
255-
bitrank([64, 66, 68, 68, 90]);
209+
bitrank(91, [64, 66, 68, 68, 90]);
256210
}
257211

258212
#[test]
259213
fn test_rank_exclusive() {
260-
let br = bitrank(0..132);
214+
let br = bitrank(133, 0..132);
261215
assert_eq!(br.blocks.len(), 1);
262216
assert_eq!(br.rank(64), 64);
263217
assert_eq!(br.rank(132), 132);
@@ -267,38 +221,38 @@ mod tests {
267221
fn test_rank() {
268222
let mut positions: Vec<usize> = (0..132).collect();
269223
positions.append(&mut vec![138usize, 140, 146]);
270-
let br = bitrank(positions);
224+
let br = bitrank(146, positions);
271225
assert_eq!(br.rank(135), 132);
272226

273-
let br2 = bitrank(0..BITS_PER_BLOCK - 5);
227+
let br2 = bitrank(BITS_PER_BLOCK, 0..BITS_PER_BLOCK - 5);
274228
assert_eq!(br2.rank(169), 169);
275229

276-
let br3 = bitrank(0..BITS_PER_BLOCK + 5);
230+
let br3 = bitrank(BITS_PER_BLOCK + 6, 0..BITS_PER_BLOCK + 5);
277231
assert_eq!(br3.rank(BITS_PER_BLOCK), BITS_PER_BLOCK);
278232
}
279233

280234
#[test]
281235
fn test_rank_idx() {
282236
let mut positions: Vec<usize> = (0..132).collect();
283237
positions.append(&mut vec![138usize, 140, 146]);
284-
let br = bitrank(positions);
285-
assert_eq!(br.rank_select(135), (132, Some(131)));
238+
let br = bitrank(147, positions);
239+
assert_eq!(br.rank(135), 132);
286240

287241
let bits2: Vec<usize> = (0..BITS_PER_BLOCK - 5).collect();
288-
let br2 = bitrank(bits2);
289-
assert_eq!(br2.rank_select(169), (169, Some(168)));
242+
let br2 = bitrank(BITS_PER_BLOCK, bits2);
243+
assert_eq!(br2.rank(169), 169);
290244

291245
let bits3: Vec<usize> = (0..BITS_PER_BLOCK + 5).collect();
292-
let br3 = bitrank(bits3);
293-
assert_eq!(br3.rank_select(BITS_PER_BLOCK), (BITS_PER_BLOCK, None));
246+
let br3 = bitrank(BITS_PER_BLOCK + 6, bits3);
247+
assert_eq!(br3.rank(BITS_PER_BLOCK), BITS_PER_BLOCK);
294248

295-
let bits4: Vec<usize> = vec![1, 1000, 9999, BITS_PER_BLOCK + 1];
296-
let br4 = bitrank(bits4);
297-
assert_eq!(br4.rank_select(10000), (3, Some(9999)));
249+
let bits4: Vec<usize> = vec![1, 1000, 7777, BITS_PER_BLOCK + 1];
250+
let br4 = bitrank(BITS_PER_BLOCK + 1, bits4);
251+
assert_eq!(br4.rank(8000), 3);
298252

299-
let bits5: Vec<usize> = vec![1, 1000, 9999, BITS_PER_BLOCK + 1];
300-
let br5 = bitrank(bits5);
301-
assert_eq!(br5.rank_select(BITS_PER_BLOCK), (3, None));
253+
let bits5: Vec<usize> = vec![1, 1000, 7777, BITS_PER_BLOCK + 1];
254+
let br5 = bitrank(BITS_PER_BLOCK + 1, bits5);
255+
assert_eq!(br5.rank(BITS_PER_BLOCK), 3);
302256
}
303257

304258
#[test]
@@ -313,17 +267,12 @@ mod tests {
313267
// This isn't strictly necessary, given that the bit would just be toggled again, but it
314268
// ensures that we are meeting the contract.
315269
random_bits.dedup();
316-
let br = bitrank(random_bits.iter().copied());
270+
let br = bitrank(1_000_000, random_bits.iter().copied());
317271
let mut rank = 0;
318-
let mut select = None;
319272
for i in 0..random_bits.capacity() {
320-
if i % BITS_PER_SUB_BLOCK == 0 {
321-
select = None;
322-
}
323-
assert_eq!(br.rank_select(i), (rank, select));
273+
assert_eq!(br.rank(i), rank);
324274
if i == random_bits[rank] {
325275
rank += 1;
326-
select = Some(i);
327276
}
328277
}
329278
}
@@ -333,7 +282,7 @@ mod tests {
333282
#[test]
334283
fn test_rank_out_of_bounds() {
335284
for i in 1..30 {
336-
let br = bitrank([BITS_PER_BLOCK * i - 1]);
285+
let br = bitrank(BITS_PER_BLOCK * i, [BITS_PER_BLOCK * i - 1]);
337286
assert_eq!(br.max_rank(), 1);
338287
assert_eq!(br.rank(BITS_PER_BLOCK * i - 1), 0);
339288
for j in 0..10 {
@@ -344,7 +293,10 @@ mod tests {
344293

345294
#[test]
346295
fn test_large_gap() {
347-
let br = bitrank((3..4).chain(BITS_PER_BLOCK * 15..BITS_PER_BLOCK * 15 + 17));
296+
let br = bitrank(
297+
BITS_PER_BLOCK * 16,
298+
(3..4).chain(BITS_PER_BLOCK * 15..BITS_PER_BLOCK * 15 + 17),
299+
);
348300
for i in 1..15 {
349301
assert_eq!(br.rank(BITS_PER_BLOCK * i), 1);
350302
}

0 commit comments

Comments
 (0)