add feature combo guards

qiangxu1996 · qiangxu1996 · commit 3e96074c8c34 · 2025-11-07T23:39:11.000Z
Signed-off-by: Qiang Xu &lt;qiangx@nvidia.com&gt;
diff --git a/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py b/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py
@@ -350,6 +350,18 @@ def allocation_scope(current_stage: ExecutorMemoryType,
     validate_feature_combination(llm_args, model_engine, llm_args.sampler_type)
 
     if llm_args.sm_disagg_config is not None:
+        if llm_args.cache_transceiver_config is not None:
+            raise ValueError(
+                "SM-level disaggregation is not compatible with disaggregated serving."
+            )
+        if llm_args.parallel_config.world_size > 1:
+            raise NotImplementedError(
+                "SM-level disaggregation is not supported with parallelism.")
+        if scheduler_config.capacity_scheduler_policy != CapacitySchedulerPolicy.GUARANTEED_NO_EVICT:
+            raise NotImplementedError(
+                "SM-level disaggregation is only supported with guaranteed no evict scheduler policy."
+            )
+
         with allocation_scope(ExecutorMemoryType.MODEL_ENGINE_CTX,
                               RestoreMode.PINNED):
             ctx_llm_args = copy.copy(llm_args)
@@ -366,23 +378,6 @@ def allocation_scope(current_stage: ExecutorMemoryType,
     else:
         ctx_model_engine = None
 
-    if llm_args.sm_disagg_config is not None:
-        with allocation_scope(ExecutorMemoryType.MODEL_ENGINE_CTX,
-                              RestoreMode.PINNED):
-            ctx_backend_config = copy.copy(pytorch_backend_config)
-            ctx_backend_config.use_cuda_graph = False
-            ctx_model_engine = PyTorchModelEngine(
-                model_path=checkpoint_dir,
-                llm_args=llm_args,
-                mapping=mapping,
-                attn_runtime_features=attn_runtime_features,
-                dist=dist,
-                spec_config=spec_config,
-                weight_sharing_model=model_engine.model,
-            )
-    else:
-        ctx_model_engine = None
-
     if has_draft_model_engine:
         with allocation_scope(ExecutorMemoryType.MODEL_ENGINE_DRAFT,
                               RestoreMode.PINNED):