paulpak58
diff --git a/‎.buildkite/test-pipeline.yaml
Lines changed: 0 additions & 1 deletion b/‎.buildkite/test-pipeline.yaml
Lines changed: 0 additions & 1 deletion
diff --git a/‎tests/kv_transfer/test_disagg.py
Lines changed: 0 additions & 120 deletions b/‎tests/kv_transfer/test_disagg.py
Lines changed: 0 additions & 120 deletions
diff --git a/‎vllm/distributed/kv_transfer/kv_connector/base.py
Lines changed: 4 additions & 136 deletions b/‎vllm/distributed/kv_transfer/kv_connector/base.py
Lines changed: 4 additions & 136 deletions
diff --git a/‎vllm/distributed/kv_transfer/kv_connector/factory.py
Lines changed: 13 additions & 55 deletions b/‎vllm/distributed/kv_transfer/kv_connector/factory.py
Lines changed: 13 additions & 55 deletions
@@ -749,7 +749,6 @@ steps:
   # this test fails consistently.
   # TODO: investigate and fix
   - VLLM_USE_V1=0 CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py
-  - VLLM_USE_V1=0 CUDA_VISIBLE_DEVICES=0,1 pytest -v -s kv_transfer/test_disagg.py
   - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown
   - pytest -v -s models/multimodal/generation/test_maverick.py
 
 
@@ -1,142 +1,10 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
-"""
-KVConnectorBase Class for Distributed KV Cache & Hidden State communication
-
-The class provides two primary abstract methods:
-1. send_kv_caches_and_hidden_states(): Send KV caches and hidden states
-2. recv_kv_caches_and_hidden_states(): Recv KV caches and hidden states
-"""
-
-from abc import ABC, abstractmethod
-from typing import TYPE_CHECKING, Optional, Union
-
-import torch
+"""Defines the base type for KV cache connectors."""
 
 from vllm.distributed.kv_transfer.kv_connector.v1 import KVConnectorBase_V1
-from vllm.sequence import IntermediateTensors
-
-if TYPE_CHECKING:
-    from vllm.config import VllmConfig
-    from vllm.worker.model_runner import ModelInputForGPUWithSamplingMetadata
-
-
-class KVConnectorBase(ABC):
-    """
-    Abstract base class for a KV connector.
-
-    The class provides two primary abstract methods:
-    1. send_kv_caches_and_hidden_states(): Send KV caches and hidden states
-    2. recv_kv_caches_and_hidden_states(): Recv KV caches and hidden states
-    """
-
-    @abstractmethod
-    def __init__(
-        self,
-        rank: int,
-        local_rank: int,
-        config: "VllmConfig",
-    ):
-        raise NotImplementedError
-
-    @abstractmethod
-    def close(self) -> None:
-        """Close the buffer and release resources.
-
-        This method is responsible for cleaning up resources related to the 
-        connector when it is no longer needed.
-
-        Raises:
-            NotImplementedError: This method must be implemented in subclasses.
-        """
-        raise NotImplementedError
-
-    @abstractmethod
-    def send_kv_caches_and_hidden_states(
-        self,
-        model_executable: torch.nn.Module,
-        model_input: "ModelInputForGPUWithSamplingMetadata",
-        kv_caches: list[torch.Tensor],
-        hidden_or_intermediate_states: Union[torch.Tensor,
-                                             IntermediateTensors],
-    ) -> None:
-        """
-        Send KV caches and hidden states to the connector.
-
-        This method processes the input tokens, KV caches, and 
-        hidden/intermediate states for a given model and sends the data to the 
-        decode instance.
-
-        Args:
-            model_executable (torch.nn.Module): The model executable containing 
-                start and end layer information.
-            model_input (ModelInputForGPUWithSamplingMetadata): The input
-                metadata from vLLM.
-            kv_caches (list[torch.Tensor]): List of KV caches (keys and values) 
-                for each layer.
-            hidden_or_intermediate_states (Union[torch.Tensor, 
-            IntermediateTensors]): 
-                The hidden or intermediate states associated with the tokens.
-
-        Returns:
-            None
-
-        """
-
-        raise NotImplementedError
-
-    @abstractmethod
-    def recv_kv_caches_and_hidden_states(
-        self, model_executable: torch.nn.Module,
-        model_input: "ModelInputForGPUWithSamplingMetadata",
-        kv_caches: list[torch.Tensor]
-    ) -> tuple[Union[torch.Tensor, IntermediateTensors], bool,
-               "ModelInputForGPUWithSamplingMetadata"]:
-        """
-        Receive KV caches and hidden states from the connector.
-
-        This method attempts to retrieve KV caches and hidden states for input
-        tokens. If all required KV caches and hidden states are received, it
-        will bypass model input, else it will fall back to normal vLLM model 
-        forwarding.
-
-        Args:
-            model_executable (torch.nn.Module): 
-                The model executable from vLLM modelrunner.
-            model_input (ModelInputForGPUWithSamplingMetadata): 
-                The model input from vLLM modelrunner.
-            kv_caches (list[torch.Tensor]): 
-                List of KV caches for each layer.
-
-        Returns:
-            - hidden_or_intermediate_states (torch.Tensor or
-            IntermediateTensors): 
-                Concatenated hidden states if all required data is retrieved, 
-                otherwise `None`.
-            - bypass_model_exec (bool): 
-                Indicates whether the model execution can be skipped (True) or 
-                needs to be redone (False).
-            - model_input (ModelInputForGPUWithSamplingMetadata): 
-                Optionally adjusted input metadata for re-execution when 
-                `bypass_model_exec=False`.
-
-        """
-
-        raise NotImplementedError
-
-    @classmethod
-    def get_required_kvcache_layout(
-            cls, vllm_config: "VllmConfig") -> Optional[str]:
-        """
-        Get the required KV cache layout for this connector.
-        Args:
-            vllm_config (VllmConfig): the vllm config.
-
-        Returns:
-            str: the required KV cache layout. e.g. HND, or NHD.
-            None if the connector does not require a specific layout.
-        """
-        return None
 
+KVConnectorBase = KVConnectorBase_V1
+KVConnectorBaseType = KVConnectorBase_V1
 
-KVConnectorBaseType = Union[KVConnectorBase, KVConnectorBase_V1]
+__all__ = ["KVConnectorBase", "KVConnectorBaseType"]
@@ -5,22 +5,18 @@
 from typing import TYPE_CHECKING, Callable
 
 import vllm.envs as envs
-from vllm.config import KVTransferConfig
-from vllm.distributed.kv_transfer.kv_connector.base import KVConnectorBaseType
-from vllm.distributed.kv_transfer.kv_connector.v1 import (KVConnectorBase_V1,
-                                                          KVConnectorRole)
+from vllm.distributed.kv_transfer.kv_connector.base import KVConnectorBase
+from vllm.distributed.kv_transfer.kv_connector.v1 import KVConnectorRole
 from vllm.logger import init_logger
 
-from .base import KVConnectorBase
-
 if TYPE_CHECKING:
     from vllm.config import VllmConfig
 
 logger = init_logger(__name__)
 
 
 class KVConnectorFactory:
-    _registry: dict[str, Callable[[], type[KVConnectorBaseType]]] = {}
+    _registry: dict[str, Callable[[], type[KVConnectorBase]]] = {}
 
     @classmethod
     def register_connector(cls, name: str, module_path: str,
@@ -29,28 +25,23 @@ def register_connector(cls, name: str, module_path: str,
         if name in cls._registry:
             raise ValueError(f"Connector '{name}' is already registered.")
 
-        def loader() -> type[KVConnectorBaseType]:
+        def loader() -> type[KVConnectorBase]:
             module = importlib.import_module(module_path)
             return getattr(module, class_name)
 
         cls._registry[name] = loader
 
     @classmethod
-    def create_connector_v0(cls, rank: int, local_rank: int,
-                            config: "VllmConfig") -> KVConnectorBase:
-        if envs.VLLM_USE_V1:
-            raise ValueError("Attempting to initialize a V0 Connector, "
+    def create_connector(
+        cls,
+        config: "VllmConfig",
+        role: KVConnectorRole,
+    ) -> KVConnectorBase:
+        if not envs.VLLM_USE_V1:
+            raise ValueError("Attempting to initialize a V1 Connector, "
                              f"but found {envs.VLLM_USE_V1=}")
 
-        connector_cls = cls.get_connector_class(config.kv_transfer_config)
-        assert issubclass(connector_cls, KVConnectorBase)
-        return connector_cls(rank, local_rank, config)
-
-    @classmethod
-    def get_connector_class(
-            cls, kv_transfer_config: "KVTransferConfig"
-    ) -> type[KVConnectorBaseType]:
-        """Get the connector class by name."""
+        kv_transfer_config = config.kv_transfer_config
         connector_name = kv_transfer_config.kv_connector
         if connector_name in cls._registry:
             connector_cls = cls._registry[connector_name]()
@@ -61,21 +52,7 @@ def get_connector_class(
                     f"Unsupported connector type: {connector_name}")
             connector_module = importlib.import_module(connector_module_path)
             connector_cls = getattr(connector_module, connector_name)
-        return connector_cls
-
-    @classmethod
-    def create_connector_v1(
-        cls,
-        config: "VllmConfig",
-        role: KVConnectorRole,
-    ) -> KVConnectorBase_V1:
-        if not envs.VLLM_USE_V1:
-            raise ValueError("Attempting to initialize a V1 Connector, "
-                             f"but found {envs.VLLM_USE_V1=}")
-
-        kv_transfer_config = config.kv_transfer_config
-        connector_cls = cls.get_connector_class(kv_transfer_config)
-        assert issubclass(connector_cls, KVConnectorBase_V1)
+        assert issubclass(connector_cls, KVConnectorBase)
         logger.info("Creating v1 connector with name: %s and engine_id: %s",
                     connector_cls.__name__, kv_transfer_config.engine_id)
         # NOTE(Kuntai): v1 connector is explicitly separated into two roles.
@@ -92,25 +69,6 @@ def create_connector_v1(
 # Register various connectors here.
 # The registration should not be done in each individual file, as we want to
 # only load the files corresponding to the current connector.
-KVConnectorFactory.register_connector(
-    "PyNcclConnector",
-    "vllm.distributed.kv_transfer.kv_connector.simple_connector",
-    "SimpleConnector")
-
-KVConnectorFactory.register_connector(
-    "MooncakeConnector",
-    "vllm.distributed.kv_transfer.kv_connector.simple_connector",
-    "SimpleConnector")
-
-KVConnectorFactory.register_connector(
-    "LMCacheConnector",
-    "vllm.distributed.kv_transfer.kv_connector.lmcache_connector",
-    "LMCacheConnector")
-
-KVConnectorFactory.register_connector(
-    "MooncakeStoreConnector",
-    "vllm.distributed.kv_transfer.kv_connector.mooncake_store_connector",
-    "MooncakeStoreConnector")
 
 KVConnectorFactory.register_connector(
     "SharedStorageConnector",