File tree Expand file tree Collapse file tree 3 files changed +48
-0
lines changed Expand file tree Collapse file tree 3 files changed +48
-0
lines changed Original file line number Diff line number Diff line change @@ -7,6 +7,8 @@ For convencience it re-exports the `bpe` crate so that depending on this crate i
7
7
8
8
Supported token sets:
9
9
10
+ - r50k
11
+ - p50k
10
12
- cl100k
11
13
- o200k
12
14
Original file line number Diff line number Diff line change @@ -7,6 +7,24 @@ use serde::Serialize;
7
7
use tiktoken_rs:: CoreBPE ;
8
8
9
9
fn main ( ) {
10
+ serialize_tokens (
11
+ "r50k" ,
12
+ & tiktoken_rs:: r50k_base ( ) . expect ( "tiktoken initialization must not fail!" ) ,
13
+ 50256 ,
14
+ 1 ,
15
+ ) ;
16
+ serialize_tokens (
17
+ "p50k" ,
18
+ & tiktoken_rs:: p50k_base ( ) . expect ( "tiktoken initialization must not fail!" ) ,
19
+ 50280 ,
20
+ 1 ,
21
+ ) ;
22
+ serialize_tokens (
23
+ "cl100k" ,
24
+ & tiktoken_rs:: cl100k_base ( ) . expect ( "tiktoken initialization must not fail!" ) ,
25
+ 100256 ,
26
+ 17846336922010275747 ,
27
+ ) ;
10
28
serialize_tokens (
11
29
"cl100k" ,
12
30
& tiktoken_rs:: cl100k_base ( ) . expect ( "tiktoken initialization must not fail!" ) ,
Original file line number Diff line number Diff line change @@ -2,6 +2,16 @@ use std::sync::LazyLock;
2
2
3
3
use bpe:: byte_pair_encoding:: BytePairEncoding ;
4
4
5
+ static BPE_R50K : LazyLock < BytePairEncoding > = LazyLock :: new ( || {
6
+ let bytes = include_bytes ! ( concat!( env!( "OUT_DIR" ) , "/bpe_r50k.dict" ) ) ;
7
+ rmp_serde:: from_slice ( bytes) . expect ( "" )
8
+ } ) ;
9
+
10
+ static BPE_P50K : LazyLock < BytePairEncoding > = LazyLock :: new ( || {
11
+ let bytes = include_bytes ! ( concat!( env!( "OUT_DIR" ) , "/bpe_p50k.dict" ) ) ;
12
+ rmp_serde:: from_slice ( bytes) . expect ( "" )
13
+ } ) ;
14
+
5
15
static BPE_CL100K : LazyLock < BytePairEncoding > = LazyLock :: new ( || {
6
16
let bytes = include_bytes ! ( concat!( env!( "OUT_DIR" ) , "/bpe_cl100k.dict" ) ) ;
7
17
rmp_serde:: from_slice ( bytes) . expect ( "" )
@@ -14,6 +24,14 @@ static BPE_O200K: LazyLock<BytePairEncoding> = LazyLock::new(|| {
14
24
15
25
pub use bpe:: * ;
16
26
27
+ pub fn r50k ( ) -> & ' static BytePairEncoding {
28
+ & BPE_R50K
29
+ }
30
+
31
+ pub fn p50k ( ) -> & ' static BytePairEncoding {
32
+ & BPE_P50K
33
+ }
34
+
17
35
pub fn cl100k ( ) -> & ' static BytePairEncoding {
18
36
& BPE_CL100K
19
37
}
@@ -26,6 +44,16 @@ pub fn o200k() -> &'static BytePairEncoding {
26
44
mod tests {
27
45
use super :: * ;
28
46
47
+ #[ test]
48
+ fn can_load_r50k ( ) {
49
+ r50k ( ) . count ( "" . as_bytes ( ) ) ;
50
+ }
51
+
52
+ #[ test]
53
+ fn can_load_p50k ( ) {
54
+ p50k ( ) . count ( "" . as_bytes ( ) ) ;
55
+ }
56
+
29
57
#[ test]
30
58
fn can_load_cl100k ( ) {
31
59
cl100k ( ) . count ( "" . as_bytes ( ) ) ;
You can’t perform that action at this time.
0 commit comments