[Feat]Support UCM Sparse on cuda (#126)

harrisonyhq · web-flow · commit bb8585475129 · 2025-08-30T16:40:03.000+08:00
* [Feat]Support UCM Sparse on cuda

* [DOCS]Add doc for format code.
diff --git a/.gitignore b/.gitignore
@@ -48,4 +48,5 @@
 **/build/**
 **/output/**
 .venv/**
-**/__pycache__/**
+**/__pycache__/**
+*.egg-info/**
diff --git a/docs/source/developer/contributing.md b/docs/source/developer/contributing.md
@@ -0,0 +1,26 @@
+# Contributing
+## Building and testing
+It’s recommended to set up a local development environment to build and test before you submit a PR.
+### Run lint locally
+Run following commands to format your code before submit:
+```bash
+# Choose a base dir (~/vllm-project/) and set up venv
+cd ~/vllm-project/
+python3 -m venv .venv
+source ./.venv/bin/activate
+
+# Clone UCM and install
+git clone https://github.com/ModelEngine-Group/unified-cache-management.git 
+cd unified-cache-management
+
+# Install lint requirement and enable pre-commit hook
+pip install -r requirements-lint.txt
+
+# Run lint (You need install pre-commits deps via proxy network at first time)
+bash format.sh
+```
+### Run unit test locally
+Run unit test locally with following command:
+```bash
+python3 -m unittest discover -s test
+```
diff --git a/docs/source/developer/index.md b/docs/source/developer/index.md
@@ -3,6 +3,7 @@
 :::{toctree}
 :maxdepth: 2
 architecture.md
+contributing.md
 add_connector.md
 nfs_connector.md
 performance_benchmark.md
diff --git a/unifiedcache/integration/vllm/ucm_sparse/base.py b/unifiedcache/integration/vllm/ucm_sparse/base.py
@@ -35,7 +35,6 @@
 import torch
 from vllm.distributed.kv_transfer import get_kv_transfer_group, has_kv_transfer_group
 from vllm.forward_context import ForwardContext
-from vllm_ascend.worker.npu_input_batch import CachedRequestState, InputBatch
 
 INVALID_SLOT = -1
 
@@ -194,9 +193,9 @@ def update_state_after_alloc(self, request: Request, num_blocks: int):
 
     def build_sparse_meta(
         self,
-        scheduler_output: SchedulerOutput,
-        requests: dict[str, CachedRequestState],
-        input_batch: InputBatch,
+        scheduler_output,
+        requests,
+        input_batch,
     ) -> UcmSparseMetadata:
         """
         Build the sparse metadata for this step.
diff --git a/unifiedcache/patch/0.9.2/vllm-adapt-sparse.patch b/unifiedcache/patch/0.9.2/vllm-adapt-sparse.patch
diff --git a/unifiedcache/ucm_sparse/esa.py b/unifiedcache/ucm_sparse/esa.py