some intermittent issue with flex attention on sample, just disable and plan on writing own triton kernel

lucidrains · lucidrains · commit 3eef68f37301 · 2025-03-01T16:07:33.000Z
diff --git a/native_sparse_attention_pytorch/transformer.py b/native_sparse_attention_pytorch/transformer.py
@@ -221,7 +221,7 @@ def sample(
                 out,
                 cache = cache,
                 return_cache = True,
-                disable_flex = not is_first,
+                disable_flex = True,
                 disable_triton_kernel = not is_first
             )
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "native-sparse-attention-pytorch"
-version = "0.0.66"
+version = "0.0.68"
 description = "Native Sparse Attention"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }

Original file line number	Diff line number	Diff line change
`@@ -221,7 +221,7 @@ def sample(`
`221`	`221`	`out,`
`222`	`222`	`cache = cache,`
`223`	`223`	`return_cache = True,`
`224`		`- disable_flex = not is_first,`
	`224`	`+ disable_flex = True,`
`225`	`225`	`disable_triton_kernel = not is_first`
`226`	`226`	`)`
`227`	`227`