Fix handling of mixed placement in DistributedEmbedding. (#136)

hertschuh · web-flow · commit 7c3687975e29 · 2025-08-14T18:23:30.000-07:00
The transformation from the deeply nested input structure to the intermediate representation of "placement -> path -> tensor" and the reverse transformation for the outputs were incorrectly based on flattening and packing as `self._placement_to_path_to_feature_config`. This would use an incorrect order for the flat structure and result in tensors at the wrong place. This fix makes use of `self._placement_to_path_to_feature_config` instead, which can correctly do the mapping. Also changed `PlacementAndPath` to be a `dataclass` so that `keras.tree` considers those atoms and doesn't recurse into them. This simplifies some of the `map_structure{_up_to}` calls. Fixes #134
diff --git a/keras_rs/src/layers/embedding/base_distributed_embedding.py b/keras_rs/src/layers/embedding/base_distributed_embedding.py
@@ -1,4 +1,5 @@
 import collections
+import dataclasses
 import importlib.util
 import typing
 from typing import Any, Sequence
@@ -20,9 +21,10 @@
 SUPPORTED_PLACEMENTS = ("auto", "default_device", "sparsecore")
 
 
-PlacementAndPath = collections.namedtuple(
-    "PlacementAndPath", ["placement", "path"]
-)
+@dataclasses.dataclass(eq=True, unsafe_hash=True, order=True)
+class PlacementAndPath:
+    placement: str
+    path: str
 
 
 def _ragged_to_dense_inputs(
@@ -518,12 +520,12 @@ def _init_feature_configs_structures(
         With these structures in place, the steps to:
         - go from the deeply nested structure to the two-level structure are:
           - `assert_same_struct` as `self._feature_configs`
-          - `flatten`
-          - `pack_sequence_as` `self._placement_to_path_to_feature_config`
+          - use `self._feature_deeply_nested_placement_and_paths` to map from
+            deeply nested to two-level
         - go from the two-level structure to the deeply nested structure:
-         - `assert_same_struct` as `self._placement_to_path_to_feature_config`
-         - `flatten`
-         - `pack_sequence_as` `self._feature_configs`
+          - `assert_same_struct` as `self._placement_to_path_to_feature_config`
+          - use `self._feature_deeply_nested_placement_and_paths` to locate each
+            output in the two-level dicts
 
         Args:
             feature_configs: The deeply nested structure of `FeatureConfig` or
@@ -590,14 +592,14 @@ def build(self, input_shapes: types.Nested[types.Shape]) -> None:
         ] = collections.defaultdict(dict)
 
         def populate_placement_to_path_to_input_shape(
-            placement_and_path: PlacementAndPath, input_shape: types.Shape
+            pp: PlacementAndPath, input_shape: types.Shape
         ) -> None:
-            placement_to_path_to_input_shape[placement_and_path.placement][
-                placement_and_path.path
-            ] = input_shape
+            placement_to_path_to_input_shape[pp.placement][pp.path] = (
+                input_shape
+            )
 
         keras.tree.map_structure_up_to(
-            self._feature_configs,
+            self._feature_deeply_nested_placement_and_paths,
             populate_placement_to_path_to_input_shape,
             self._feature_deeply_nested_placement_and_paths,
             input_shapes,
@@ -645,35 +647,40 @@ def preprocess(
         """
         # Verify input structure.
         keras.tree.assert_same_structure(self._feature_configs, inputs)
+        if weights is not None:
+            keras.tree.assert_same_structure(self._feature_configs, weights)
 
         if not self.built:
-            input_shapes = keras.tree.map_structure_up_to(
-                self._feature_configs,
+            input_shapes = keras.tree.map_structure(
                 lambda array: backend.standardize_shape(array.shape),
                 inputs,
             )
             self.build(input_shapes)
 
-        # Go from deeply nested structure of inputs to flat inputs.
-        flat_inputs = keras.tree.flatten(inputs)
+        # Go from deeply nested to nested dict placement -> path -> input.
+        def to_placement_to_path(
+            tensors: types.Nested[types.Tensor],
+        ) -> dict[str, dict[str, types.Tensor]]:
+            result: dict[str, dict[str, types.Tensor]] = {
+                p: dict() for p in self._placement_to_path_to_feature_config
+            }
 
-        # Go from flat to nested dict placement -> path -> input.
-        placement_to_path_to_inputs = keras.tree.pack_sequence_as(
-            self._placement_to_path_to_feature_config, flat_inputs
-        )
+            def populate(pp: PlacementAndPath, x: types.Tensor) -> None:
+                result[pp.placement][pp.path] = x
 
-        if weights is not None:
-            # Same for weights if present.
-            keras.tree.assert_same_structure(self._feature_configs, weights)
-            flat_weights = keras.tree.flatten(weights)
-            placement_to_path_to_weights = keras.tree.pack_sequence_as(
-                self._placement_to_path_to_feature_config, flat_weights
+            keras.tree.map_structure(
+                populate,
+                self._feature_deeply_nested_placement_and_paths,
+                tensors,
             )
-        else:
-            # Populate keys for weights.
-            placement_to_path_to_weights = {
-                k: None for k in placement_to_path_to_inputs
-            }
+            return result
+
+        placement_to_path_to_inputs = to_placement_to_path(inputs)
+
+        # Same for weights if present.
+        placement_to_path_to_weights = (
+            to_placement_to_path(weights) if weights is not None else None
+        )
 
         placement_to_path_to_preprocessed: dict[
             str, dict[str, dict[str, types.Nested[types.Tensor]]]
@@ -684,7 +691,9 @@ def preprocess(
             placement_to_path_to_preprocessed["sparsecore"] = (
                 self._sparsecore_preprocess(
                     placement_to_path_to_inputs["sparsecore"],
-                    placement_to_path_to_weights["sparsecore"],
+                    placement_to_path_to_weights["sparsecore"]
+                    if placement_to_path_to_weights is not None
+                    else None,
                     training,
                 )
             )
@@ -694,7 +703,9 @@ def preprocess(
             placement_to_path_to_preprocessed["default_device"] = (
                 self._default_device_preprocess(
                     placement_to_path_to_inputs["default_device"],
-                    placement_to_path_to_weights["default_device"],
+                    placement_to_path_to_weights["default_device"]
+                    if placement_to_path_to_weights is not None
+                    else None,
                     training,
                 )
             )
@@ -780,11 +791,13 @@ def call(
             placement_to_path_to_outputs,
         )
 
-        # Go from placement -> path -> output to flat outputs.
-        flat_outputs = keras.tree.flatten(placement_to_path_to_outputs)
+        # Go from placement -> path -> output to deeply nested structure.
+        def populate_output(pp: PlacementAndPath) -> types.Tensor:
+            return placement_to_path_to_outputs[pp.placement][pp.path]
 
-        # Go from flat outputs to deeply nested structure.
-        return keras.tree.pack_sequence_as(self._feature_configs, flat_outputs)
+        return keras.tree.map_structure(
+            populate_output, self._feature_deeply_nested_placement_and_paths
+        )
 
     def get_embedding_tables(self) -> dict[str, types.Tensor]:
         """Return the content of the embedding tables by table name.
diff --git a/keras_rs/src/layers/embedding/distributed_embedding_test.py b/keras_rs/src/layers/embedding/distributed_embedding_test.py
@@ -685,6 +685,77 @@ def test_shared_table(self):
             res["feature3"].shape, (batch_size, EMBEDDING_OUTPUT_DIM)
         )
 
+    def test_mixed_placement(self):
+        if not self.on_tpu:
+            self.skipTest("Mixed placement test requires a TPU.")
+
+        # Use different embedding dimensions to verify that the correct tables
+        # are used for each feature.
+        embedding_output_dim1 = 16
+        embedding_output_dim2 = 32
+        embedding_output_dim3 = 64
+
+        # Intermix placement to exercise the change of order of inputs.
+        table1 = config.TableConfig(
+            name="table1",
+            vocabulary_size=VOCABULARY_SIZE,
+            embedding_dim=embedding_output_dim1,
+            placement="default_device",
+        )
+        table2 = config.TableConfig(
+            name="table2",
+            vocabulary_size=VOCABULARY_SIZE,
+            embedding_dim=embedding_output_dim2,
+            placement="sparsecore",
+        )
+        table3 = config.TableConfig(
+            name="table3",
+            vocabulary_size=VOCABULARY_SIZE,
+            embedding_dim=embedding_output_dim3,
+            placement="default_device",
+        )
+
+        embedding_config = {
+            "feature1": config.FeatureConfig(
+                name="feature1",
+                table=table1,
+                input_shape=(BATCH_SIZE_PER_CORE, 1),
+                output_shape=(BATCH_SIZE_PER_CORE, embedding_output_dim1),
+            ),
+            "feature2": config.FeatureConfig(
+                name="feature2",
+                table=table2,
+                input_shape=(BATCH_SIZE_PER_CORE, 1),
+                output_shape=(BATCH_SIZE_PER_CORE, embedding_output_dim2),
+            ),
+            "feature3": config.FeatureConfig(
+                name="feature3",
+                table=table3,
+                input_shape=(BATCH_SIZE_PER_CORE, 1),
+                output_shape=(BATCH_SIZE_PER_CORE, embedding_output_dim3),
+            ),
+        }
+
+        batch_size = self._strategy.num_replicas_in_sync * BATCH_SIZE_PER_CORE
+        inputs, _, _ = self.create_inputs_weights_and_labels(
+            batch_size, "dense", embedding_config
+        )
+
+        with self._strategy.scope():
+            layer = distributed_embedding.DistributedEmbedding(embedding_config)
+
+        res = self.run_with_strategy(layer.__call__, inputs)
+
+        self.assertEqual(
+            res["feature1"].shape, (batch_size, embedding_output_dim1)
+        )
+        self.assertEqual(
+            res["feature2"].shape, (batch_size, embedding_output_dim2)
+        )
+        self.assertEqual(
+            res["feature3"].shape, (batch_size, embedding_output_dim3)
+        )
+
     def test_save_load_model(self):
         batch_size = self._strategy.num_replicas_in_sync * BATCH_SIZE_PER_CORE
         feature_configs = self.get_embedding_config("dense", self.placement)