Skip to content

Commit e9734b6

Browse files
authored
[Fix] Fix duplicate create/commit errors upon preemption (#109)
1 parent 265136e commit e9734b6

File tree

1 file changed

+6
-3
lines changed

1 file changed

+6
-3
lines changed

unifiedcache/integration/vllm/uc_connector.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,12 @@ def get_num_new_matched_tokens(
553553
the number of tokens that can be loaded from the
554554
external KV cache beyond what is already computed.
555555
"""
556+
# When the request is preempt req, need to commit succeed dumped blocks
557+
# to avoid duplicate invoking create/commit funcs. Only preempt reqs
558+
# whose succeed_dumped_blocks is non-empty need this check.
559+
if hasattr(request, "succeed_dumped_blocks") and request.succeed_dumped_blocks:
560+
self.connector.commit(request.succeed_dumped_blocks, True)
561+
request.succeed_dumped_blocks.clear()
556562

557563
def md5(input) -> int:
558564
input_bytes = pickle.dumps(input, protocol=pickle.HIGHEST_PROTOCOL)
@@ -648,9 +654,6 @@ def build_connector_meta(
648654
# When prompt tokens > max_num_batched_tokens, request of running requests may need to save
649655
cached_request_data = scheduler_output.scheduled_cached_reqs
650656
for i, req_id in enumerate(cached_request_data.req_ids):
651-
if cached_request_data.resumed_from_preemption[i]:
652-
continue
653-
654657
save_paras = self.save_paras.get(req_id, None)
655658
if save_paras is None:
656659
continue

0 commit comments

Comments
 (0)