@@ -109,10 +109,30 @@ Configs are typically discovered automatically through autotuning, but can also
109109
110110.. autoattribute:: Config.indexing
111111
112- Memory indexing strategy:
112+ Memory indexing strategy for load operations. Can be specified as :
113113
114- - ``"pointer"``: Pointer-based indexing
115- - ``"tensor_descriptor"``: Tensor descriptor indexing
114+ **Single strategy (applies to all loads - backward compatible):**
115+
116+ .. code-block:: python
117+
118+ indexing="block_ptr" # All loads use block pointers
119+
120+ **Per-load strategies (list, one per load operation):**
121+
122+ .. code-block:: python
123+
124+ indexing=["pointer", "block_ptr", "tensor_descriptor"]
125+
126+ **Empty/omitted (defaults to** ``"pointer"`` **for all loads):**
127+
128+ .. code-block:: python
129+
130+ # indexing not specified - all loads use pointer indexing
131+
132+ **Valid strategies:**
133+
134+ - ``"pointer"``: Pointer-based indexing (default)
135+ - ``"tensor_descriptor"``: Tensor descriptor indexing (requires Hopper+ GPU)
116136 - ``"block_ptr"``: Block pointer indexing
117137```
118138
@@ -185,6 +205,42 @@ def kernel_with_eviction(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
185205# hl.load(x, [tile], eviction_policy="evict_first")
186206```
187207
208+ ### Per-Load Indexing Example
209+
210+ ``` python
211+ import torch
212+ import helion
213+ import helion.language as hl
214+
215+ # Single indexing strategy for all loads (backward compatible)
216+ @helion.kernel (config = {" indexing" : " block_ptr" })
217+ def kernel_uniform_indexing (x : torch.Tensor, y : torch.Tensor) -> torch.Tensor:
218+ out = torch.empty_like(x)
219+ for tile in hl.tile(x.size(0 )):
220+ a = hl.load(x, [tile]) # Uses block_ptr
221+ b = hl.load(y, [tile]) # Uses block_ptr
222+ out[tile] = a + b
223+ return out
224+
225+ # Per-load indexing strategies for fine-grained control
226+ @helion.kernel (
227+ config = {
228+ " block_size" : 16 ,
229+ " indexing" : [" pointer" , " block_ptr" , " tensor_descriptor" ],
230+ }
231+ )
232+ def kernel_mixed_indexing (
233+ x : torch.Tensor, y : torch.Tensor, z : torch.Tensor
234+ ) -> torch.Tensor:
235+ out = torch.empty_like(x)
236+ for tile in hl.tile(x.size(0 )):
237+ a = hl.load(x, [tile]) # First load: pointer indexing
238+ b = hl.load(y, [tile]) # Second load: block_ptr indexing
239+ c = hl.load(z, [tile]) # Third load: tensor_descriptor indexing
240+ out[tile] = a + b + c
241+ return out
242+ ```
243+
188244### Config Serialization
189245
190246``` python
0 commit comments