File tree Expand file tree Collapse file tree 1 file changed +6
-3
lines changed
unifiedcache/integration/vllm Expand file tree Collapse file tree 1 file changed +6
-3
lines changed Original file line number Diff line number Diff line change @@ -553,6 +553,12 @@ def get_num_new_matched_tokens(
553553 the number of tokens that can be loaded from the
554554 external KV cache beyond what is already computed.
555555 """
556+ # When the request is preempt req, need to commit succeed dumped blocks
557+ # to avoid duplicate invoking create/commit funcs. Only preempt reqs
558+ # whose succeed_dumped_blocks is non-empty need this check.
559+ if hasattr (request , "succeed_dumped_blocks" ) and request .succeed_dumped_blocks :
560+ self .connector .commit (request .succeed_dumped_blocks , True )
561+ request .succeed_dumped_blocks .clear ()
556562
557563 def md5 (input ) -> int :
558564 input_bytes = pickle .dumps (input , protocol = pickle .HIGHEST_PROTOCOL )
@@ -648,9 +654,6 @@ def build_connector_meta(
648654 # When prompt tokens > max_num_batched_tokens, request of running requests may need to save
649655 cached_request_data = scheduler_output .scheduled_cached_reqs
650656 for i , req_id in enumerate (cached_request_data .req_ids ):
651- if cached_request_data .resumed_from_preemption [i ]:
652- continue
653-
654657 save_paras = self .save_paras .get (req_id , None )
655658 if save_paras is None :
656659 continue
You can’t perform that action at this time.
0 commit comments