6
6
type SubblockBits = u128 ;
7
7
8
8
// Static sizing of the various components of the data structure.
9
- const BITS_PER_BLOCK : usize = 16384 ;
10
9
const BITS_PER_SUB_BLOCK : usize = SubblockBits :: BITS as usize ;
11
- const SUB_BLOCKS_PER_BLOCK : usize = BITS_PER_BLOCK / BITS_PER_SUB_BLOCK ;
10
+ const SUB_BLOCKS_PER_BLOCK : usize = 64 ;
11
+ const BITS_PER_BLOCK : usize = SUB_BLOCKS_PER_BLOCK * BITS_PER_SUB_BLOCK ; // 8192 Bits = 1 kBytes
12
12
13
13
/// A container for a portion of the total bit vector and the associated indices.
14
14
/// The bits within each chunk are stored from most significant bit (msb) to least significant bit (lsb).
@@ -44,20 +44,20 @@ impl Block {
44
44
/// This panics if the bit was already set, because that indicates that the original positions
45
45
/// list is invalid/had duplicates.
46
46
fn set ( & mut self , index : usize ) {
47
- assert ! ( index < BITS_PER_BLOCK ) ;
47
+ debug_assert ! ( index < BITS_PER_BLOCK ) ;
48
48
let chunk_idx = index / BITS_PER_SUB_BLOCK ;
49
49
let bit_idx = index % BITS_PER_SUB_BLOCK ;
50
- let mask = 1 << ( ( BITS_PER_SUB_BLOCK - 1 ) - bit_idx) ;
51
- assert_eq ! ( self . bits[ chunk_idx] & mask, 0 , "toggling bits off indicates that the original data was incorrect, most likely containing duplicate values." ) ;
52
- self . bits [ chunk_idx] ^ = mask;
50
+ let mask = 1 << bit_idx;
51
+ debug_assert_eq ! ( self . bits[ chunk_idx] & mask, 0 , "toggling bits off indicates that the original data was incorrect, most likely containing duplicate values." ) ;
52
+ self . bits [ chunk_idx] | = mask;
53
53
}
54
54
55
55
/// The **total rank** of the block relative local index, and the index of the one
56
56
/// bit that establishes that rank (aka "select") **if** it occurs within that same
57
57
/// chunk, otherwise ['None']. The assumption is that if you would have to look back
58
58
/// through previous chunks it would actually be cheaper to do a lookup in the original
59
59
/// data structure that the bit vector was created from.
60
- fn rank_select ( & self , local_idx : usize ) -> ( usize , Option < usize > ) {
60
+ fn rank ( & self , local_idx : usize ) -> usize {
61
61
let mut rank = self . rank as usize ;
62
62
let sub_block = local_idx / BITS_PER_SUB_BLOCK ;
63
63
rank += self . sub_blocks [ sub_block] as usize ;
@@ -68,15 +68,9 @@ impl Block {
68
68
let masked = if remainder == 0 {
69
69
0
70
70
} else {
71
- self . bits [ last_chunk] >> ( BITS_PER_SUB_BLOCK - remainder)
71
+ self . bits [ last_chunk] << ( BITS_PER_SUB_BLOCK - remainder)
72
72
} ;
73
- rank += masked. count_ones ( ) as usize ;
74
- let select = if masked == 0 {
75
- None
76
- } else {
77
- Some ( local_idx - masked. trailing_zeros ( ) as usize - 1 )
78
- } ;
79
- ( rank, select)
73
+ rank + masked. count_ones ( ) as usize
80
74
}
81
75
82
76
fn total_rank ( & self ) -> usize {
@@ -107,62 +101,36 @@ pub struct BitRankBuilder {
107
101
}
108
102
109
103
impl BitRankBuilder {
110
- /// Returns a new builder.
111
- #[ cfg( test) ]
112
- pub fn new ( ) -> Self {
113
- Self :: default ( )
114
- }
115
-
116
104
/// Returns a builder that can hold integers with values `0..cap`.
117
105
pub fn with_capacity ( cap : usize ) -> Self {
106
+ const ZERO_BLOCK : Block = Block {
107
+ rank : 0 ,
108
+ sub_blocks : [ 0 ; SUB_BLOCKS_PER_BLOCK ] ,
109
+ bits : [ 0 ; SUB_BLOCKS_PER_BLOCK ] ,
110
+ } ;
118
111
Self {
119
- blocks : Vec :: with_capacity ( cap. div_ceil ( BITS_PER_BLOCK ) ) ,
120
- }
121
- }
122
-
123
- fn finish_last_block ( & mut self ) -> u64 {
124
- if let Some ( block) = self . blocks . last_mut ( ) {
125
- let mut local_rank = 0 ;
126
- for ( i, chunk) in block. bits . iter ( ) . enumerate ( ) {
127
- block. sub_blocks [ i] = local_rank;
128
- local_rank += chunk. count_ones ( ) as u16 ;
129
- }
130
- block. rank + local_rank as u64
131
- } else {
132
- 0
112
+ blocks : vec ! [ ZERO_BLOCK ; cap. div_ceil( BITS_PER_BLOCK ) ] ,
133
113
}
134
114
}
135
115
136
116
/// Adds a bit. Bits must be added in order of increasing `position`.
137
117
pub fn push ( & mut self , position : usize ) {
138
118
let block_id = position / BITS_PER_BLOCK ;
139
- assert ! (
140
- self . blocks. len( ) <= block_id + 1 ,
141
- "positions must be increasing!"
142
- ) ;
143
- if block_id >= self . blocks . len ( ) {
144
- let curr_rank = self . finish_last_block ( ) ;
145
- while block_id >= self . blocks . len ( ) {
146
- // Without this declared as a `const`, rustc 1.82 creates the Block value on the
147
- // stack first, then `memcpy`s it into `self.blocks`.
148
- const ZERO_BLOCK : Block = Block {
149
- rank : 0 ,
150
- sub_blocks : [ 0 ; SUB_BLOCKS_PER_BLOCK ] ,
151
- bits : [ 0 ; SUB_BLOCKS_PER_BLOCK ] ,
152
- } ;
153
- self . blocks . push ( ZERO_BLOCK ) ;
154
- self . blocks . last_mut ( ) . expect ( "just inserted" ) . rank = curr_rank;
155
- }
156
- }
157
- self . blocks
158
- . last_mut ( )
159
- . expect ( "just ensured there are enough blocks" )
160
- . set ( position % BITS_PER_BLOCK ) ;
119
+ self . blocks [ block_id] . set ( position % BITS_PER_BLOCK ) ;
161
120
}
162
121
163
122
/// Finishes the `BitRank` by writing the last block of data.
164
123
pub fn finish ( mut self ) -> BitRank {
165
- self . finish_last_block ( ) ;
124
+ let mut total_rank = 0 ;
125
+ for block in & mut self . blocks {
126
+ block. rank = total_rank;
127
+ let mut local_rank = 0 ;
128
+ for ( i, chunk) in block. bits . iter ( ) . enumerate ( ) {
129
+ block. sub_blocks [ i] = local_rank;
130
+ local_rank += chunk. count_ones ( ) as u16 ;
131
+ }
132
+ total_rank += local_rank as u64
133
+ }
166
134
BitRank {
167
135
blocks : self . blocks ,
168
136
}
@@ -181,7 +149,12 @@ impl BitRank {
181
149
/// The (one) rank is defined as: `rank(i) = sum(b[j] for j in 0..i)`
182
150
/// i.e. the number of elements less than `i`.
183
151
pub fn rank ( & self , idx : usize ) -> usize {
184
- self . rank_select ( idx) . 0
152
+ let block_num = idx / BITS_PER_BLOCK ;
153
+ if block_num >= self . blocks . len ( ) {
154
+ self . max_rank ( ) // fall back to 0 bits when the bitrank data structure is empty.
155
+ } else {
156
+ self . blocks [ block_num] . rank ( idx % BITS_PER_BLOCK )
157
+ }
185
158
}
186
159
187
160
/// Returns the number of elements in the set.
@@ -191,25 +164,6 @@ impl BitRank {
191
164
. map ( |b| b. total_rank ( ) )
192
165
. unwrap_or_default ( ) // fall back to 0 when the bitrank data structure is empty.
193
166
}
194
-
195
- /// The rank at the specified index(exclusive) and the index of the one bit that
196
- /// establishes that rank (aka "select") **if** it occurs within that same chunk,
197
- /// otherwise ['None']. The assumption is that if you would have to look back
198
- /// through previous chunks it would actually be cheaper to do a lookup in the original
199
- /// data structure that the bit vector was created from.
200
- pub fn rank_select ( & self , idx : usize ) -> ( usize , Option < usize > ) {
201
- let block_num = idx / BITS_PER_BLOCK ;
202
- // assert!(block_num < self.blocks.len(), "index out of bounds");
203
- if block_num >= self . blocks . len ( ) {
204
- (
205
- self . max_rank ( ) , // fall back to 0 when the bitrank data structure is empty.
206
- None ,
207
- )
208
- } else {
209
- let ( rank, b_idx) = self . blocks [ block_num] . rank_select ( idx % BITS_PER_BLOCK ) ;
210
- ( rank, b_idx. map ( |i| ( block_num * BITS_PER_BLOCK ) + i) )
211
- }
212
- }
213
167
}
214
168
215
169
#[ cfg( test) ]
@@ -222,8 +176,8 @@ mod tests {
222
176
223
177
/// Creates a `BitRank` containing the integers in `iter` (which should be strictly
224
178
/// increasing).
225
- pub fn bitrank < I : IntoIterator < Item = usize > > ( iter : I ) -> BitRank {
226
- let mut builder = BitRankBuilder :: new ( ) ;
179
+ pub fn bitrank < I : IntoIterator < Item = usize > > ( capacity : usize , iter : I ) -> BitRank {
180
+ let mut builder = BitRankBuilder :: with_capacity ( capacity ) ;
227
181
for position in iter {
228
182
builder. push ( position) ;
229
183
}
@@ -232,32 +186,32 @@ mod tests {
232
186
233
187
#[ test]
234
188
fn test_rank_zero ( ) {
235
- let br = bitrank ( [ 0 ] ) ;
189
+ let br = bitrank ( 1 , [ 0 ] ) ;
236
190
assert_eq ! ( br. rank( 0 ) , 0 ) ;
237
191
assert_eq ! ( br. rank( 1 ) , 1 ) ;
238
192
}
239
193
240
194
#[ test]
241
195
fn test_empty ( ) {
242
- let br = bitrank ( [ ] ) ;
196
+ let br = bitrank ( 0 , [ ] ) ;
243
197
assert ! ( br. blocks. is_empty( ) ) ;
244
198
}
245
199
246
200
#[ test]
247
201
fn test_index_out_of_bounds ( ) {
248
- let br = bitrank ( [ BITS_PER_BLOCK - 1 ] ) ;
202
+ let br = bitrank ( BITS_PER_BLOCK , [ BITS_PER_BLOCK - 1 ] ) ;
249
203
assert_eq ! ( br. rank( BITS_PER_BLOCK ) , 1 ) ;
250
204
}
251
205
252
206
#[ test]
253
207
#[ should_panic]
254
208
fn test_duplicate_position ( ) {
255
- bitrank ( [ 64 , 66 , 68 , 68 , 90 ] ) ;
209
+ bitrank ( 91 , [ 64 , 66 , 68 , 68 , 90 ] ) ;
256
210
}
257
211
258
212
#[ test]
259
213
fn test_rank_exclusive ( ) {
260
- let br = bitrank ( 0 ..132 ) ;
214
+ let br = bitrank ( 133 , 0 ..132 ) ;
261
215
assert_eq ! ( br. blocks. len( ) , 1 ) ;
262
216
assert_eq ! ( br. rank( 64 ) , 64 ) ;
263
217
assert_eq ! ( br. rank( 132 ) , 132 ) ;
@@ -267,38 +221,38 @@ mod tests {
267
221
fn test_rank ( ) {
268
222
let mut positions: Vec < usize > = ( 0 ..132 ) . collect ( ) ;
269
223
positions. append ( & mut vec ! [ 138usize , 140 , 146 ] ) ;
270
- let br = bitrank ( positions) ;
224
+ let br = bitrank ( 146 , positions) ;
271
225
assert_eq ! ( br. rank( 135 ) , 132 ) ;
272
226
273
- let br2 = bitrank ( 0 ..BITS_PER_BLOCK - 5 ) ;
227
+ let br2 = bitrank ( BITS_PER_BLOCK , 0 ..BITS_PER_BLOCK - 5 ) ;
274
228
assert_eq ! ( br2. rank( 169 ) , 169 ) ;
275
229
276
- let br3 = bitrank ( 0 ..BITS_PER_BLOCK + 5 ) ;
230
+ let br3 = bitrank ( BITS_PER_BLOCK + 6 , 0 ..BITS_PER_BLOCK + 5 ) ;
277
231
assert_eq ! ( br3. rank( BITS_PER_BLOCK ) , BITS_PER_BLOCK ) ;
278
232
}
279
233
280
234
#[ test]
281
235
fn test_rank_idx ( ) {
282
236
let mut positions: Vec < usize > = ( 0 ..132 ) . collect ( ) ;
283
237
positions. append ( & mut vec ! [ 138usize , 140 , 146 ] ) ;
284
- let br = bitrank ( positions) ;
285
- assert_eq ! ( br. rank_select ( 135 ) , ( 132 , Some ( 131 ) ) ) ;
238
+ let br = bitrank ( 147 , positions) ;
239
+ assert_eq ! ( br. rank ( 135 ) , 132 ) ;
286
240
287
241
let bits2: Vec < usize > = ( 0 ..BITS_PER_BLOCK - 5 ) . collect ( ) ;
288
- let br2 = bitrank ( bits2) ;
289
- assert_eq ! ( br2. rank_select ( 169 ) , ( 169 , Some ( 168 ) ) ) ;
242
+ let br2 = bitrank ( BITS_PER_BLOCK , bits2) ;
243
+ assert_eq ! ( br2. rank ( 169 ) , 169 ) ;
290
244
291
245
let bits3: Vec < usize > = ( 0 ..BITS_PER_BLOCK + 5 ) . collect ( ) ;
292
- let br3 = bitrank ( bits3) ;
293
- assert_eq ! ( br3. rank_select ( BITS_PER_BLOCK ) , ( BITS_PER_BLOCK , None ) ) ;
246
+ let br3 = bitrank ( BITS_PER_BLOCK + 6 , bits3) ;
247
+ assert_eq ! ( br3. rank ( BITS_PER_BLOCK ) , BITS_PER_BLOCK ) ;
294
248
295
- let bits4: Vec < usize > = vec ! [ 1 , 1000 , 9999 , BITS_PER_BLOCK + 1 ] ;
296
- let br4 = bitrank ( bits4) ;
297
- assert_eq ! ( br4. rank_select ( 10000 ) , ( 3 , Some ( 9999 ) ) ) ;
249
+ let bits4: Vec < usize > = vec ! [ 1 , 1000 , 7777 , BITS_PER_BLOCK + 1 ] ;
250
+ let br4 = bitrank ( BITS_PER_BLOCK + 1 , bits4) ;
251
+ assert_eq ! ( br4. rank ( 8000 ) , 3 ) ;
298
252
299
- let bits5: Vec < usize > = vec ! [ 1 , 1000 , 9999 , BITS_PER_BLOCK + 1 ] ;
300
- let br5 = bitrank ( bits5) ;
301
- assert_eq ! ( br5. rank_select ( BITS_PER_BLOCK ) , ( 3 , None ) ) ;
253
+ let bits5: Vec < usize > = vec ! [ 1 , 1000 , 7777 , BITS_PER_BLOCK + 1 ] ;
254
+ let br5 = bitrank ( BITS_PER_BLOCK + 1 , bits5) ;
255
+ assert_eq ! ( br5. rank ( BITS_PER_BLOCK ) , 3 ) ;
302
256
}
303
257
304
258
#[ test]
@@ -313,17 +267,12 @@ mod tests {
313
267
// This isn't strictly necessary, given that the bit would just be toggled again, but it
314
268
// ensures that we are meeting the contract.
315
269
random_bits. dedup ( ) ;
316
- let br = bitrank ( random_bits. iter ( ) . copied ( ) ) ;
270
+ let br = bitrank ( 1_000_000 , random_bits. iter ( ) . copied ( ) ) ;
317
271
let mut rank = 0 ;
318
- let mut select = None ;
319
272
for i in 0 ..random_bits. capacity ( ) {
320
- if i % BITS_PER_SUB_BLOCK == 0 {
321
- select = None ;
322
- }
323
- assert_eq ! ( br. rank_select( i) , ( rank, select) ) ;
273
+ assert_eq ! ( br. rank( i) , rank) ;
324
274
if i == random_bits[ rank] {
325
275
rank += 1 ;
326
- select = Some ( i) ;
327
276
}
328
277
}
329
278
}
@@ -333,7 +282,7 @@ mod tests {
333
282
#[ test]
334
283
fn test_rank_out_of_bounds ( ) {
335
284
for i in 1 ..30 {
336
- let br = bitrank ( [ BITS_PER_BLOCK * i - 1 ] ) ;
285
+ let br = bitrank ( BITS_PER_BLOCK * i , [ BITS_PER_BLOCK * i - 1 ] ) ;
337
286
assert_eq ! ( br. max_rank( ) , 1 ) ;
338
287
assert_eq ! ( br. rank( BITS_PER_BLOCK * i - 1 ) , 0 ) ;
339
288
for j in 0 ..10 {
@@ -344,7 +293,10 @@ mod tests {
344
293
345
294
#[ test]
346
295
fn test_large_gap ( ) {
347
- let br = bitrank ( ( 3 ..4 ) . chain ( BITS_PER_BLOCK * 15 ..BITS_PER_BLOCK * 15 + 17 ) ) ;
296
+ let br = bitrank (
297
+ BITS_PER_BLOCK * 16 ,
298
+ ( 3 ..4 ) . chain ( BITS_PER_BLOCK * 15 ..BITS_PER_BLOCK * 15 + 17 ) ,
299
+ ) ;
348
300
for i in 1 ..15 {
349
301
assert_eq ! ( br. rank( BITS_PER_BLOCK * i) , 1 ) ;
350
302
}
0 commit comments