We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 031b858 commit 4a600d2Copy full SHA for 4a600d2
train.py
@@ -18,7 +18,7 @@
18
GRAD_ACCUM_EVERY = 4
19
LEARNING_RATE = 1e-4
20
VALIDATE_EVERY = 100
21
-PRIME_LENGTH = 128
+PRIME_LENGTH = 64
22
GENERATE_EVERY = 500
23
GENERATE_LENGTH = 256
24
SEQ_LEN = 256
@@ -95,9 +95,9 @@ def base_decoding(
95
use_sparse_attn = USE_SPARSE_ATTN,
96
sparse_attn_kwargs = dict(
97
sliding_window_size = 32,
98
- compress_block_size = 4,
99
- selection_block_size = 4,
100
- num_selected_blocks = 1,
+ compress_block_size = 32,
+ selection_block_size = 32,
+ num_selected_blocks = 2,
101
)
102
).cuda()
103
0 commit comments