Support ZCH vNext in torchrec sharding pass (#3283)

emlin · facebook-github-bot · commit fdd8534577e4 · 2025-08-14T15:09:25.000-07:00
Summary: Pull Request resolved: #3283 fix torchrec_sharding_pass gaps for inference tw sharding Reviewed By: kausv, jingsh Differential Revision: D80183693 fbshipit-source-id: 62199ce112332138434772cfb4ab9780d2a8014c
diff --git a/torchrec/distributed/planner/shard_estimators.py b/torchrec/distributed/planner/shard_estimators.py
@@ -1028,6 +1028,9 @@ def estimate(
                 if sharder.fused_params
                 else KV_CACHING_RATIO
             )
+            use_virtual_table: bool = (
+                constraints.use_virtual_table if constraints else False
+            )
 
             # hardcoded as 8 bytes
             # input indices can be of int32, but in TBE they get converted to int64 anyway
@@ -1073,6 +1076,7 @@ def estimate(
                 multipass_prefetch_max_pass=mpp_conf.num_passes if mpp_conf else None,
                 key_value_params=key_value_params,
                 kv_cache_load_factor=kv_cache_load_factor,
+                use_virtual_table=use_virtual_table,
             )
             for shard, storage in zip(sharding_option.shards, shard_storages):
                 shard.storage = storage
@@ -1143,6 +1147,7 @@ def calculate_shard_storages(
     multipass_prefetch_max_pass: Optional[int] = None,
     key_value_params: Optional[KeyValueParams] = None,
     kv_cache_load_factor: float = KV_CACHING_RATIO,
+    use_virtual_table: bool = False,
 ) -> List[Storage]:
     """
     Calculates estimated storage sizes for each sharded tensor, comprised of input,
@@ -1223,11 +1228,17 @@ def calculate_shard_storages(
         is_inference=is_inference,
     )
 
-    if compute_kernel in {
-        EmbeddingComputeKernel.KEY_VALUE.value,
-        EmbeddingComputeKernel.SSD_VIRTUAL_TABLE.value,
-        EmbeddingComputeKernel.DRAM_VIRTUAL_TABLE.value,
-    }:
+    if (
+        compute_kernel
+        in {
+            EmbeddingComputeKernel.KEY_VALUE.value,
+            EmbeddingComputeKernel.SSD_VIRTUAL_TABLE.value,
+            EmbeddingComputeKernel.DRAM_VIRTUAL_TABLE.value,
+        }
+        or use_virtual_table
+    ):
+        # KVZCH does not have dedicated inference compute kernel, so we use use_virtual_table
+        # to settup ddr_specific_sizes
         key_value_params = key_value_params or KeyValueParams(
             max_l1_cache_size=0, l2_cache_size=0
         )
diff --git a/torchrec/distributed/planner/types.py b/torchrec/distributed/planner/types.py
@@ -722,6 +722,7 @@ class ParameterConstraints:
             or a gpu device.
         key_value_params (Optional[KeyValueParams]): key value params for SSD TBE, either for
             SSD or PS.
+        use_virtual_table (bool): is virtual table enabled for this table.
     """
 
     sharding_types: Optional[List[str]] = None
@@ -741,6 +742,7 @@ class ParameterConstraints:
     output_dtype: Optional[DataType] = None
     device_group: Optional[str] = None
     key_value_params: Optional[KeyValueParams] = None
+    use_virtual_table: bool = False
 
     def __hash__(self) -> int:
         hashable_list = [
@@ -759,6 +761,7 @@ def __hash__(self) -> int:
             self.output_dtype,
             self.device_group,
             self.key_value_params,
+            self.use_virtual_table,
         ]
 
         return hash_sha256_to_int(hashable_list)