diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 2fa9f25c37195..e15daaac95a47 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -467,10 +467,8 @@ class GPUModelRunner(LoRAModelRunnerMixin): # Update the block IDs. if not req_data.resumed_from_preemption: # Append the new blocks to the existing block IDs. - for block_ids, new_block_ids in zip( # type: ignore[call-overload] - req_state.block_ids, - req_data.new_block_ids, - strict=True): + for block_ids, new_block_ids in zip(req_state.block_ids, + req_data.new_block_ids): block_ids.extend(new_block_ids) else: # The request is resumed from preemption. diff --git a/vllm/v1/worker/tpu_model_runner.py b/vllm/v1/worker/tpu_model_runner.py index d5f40e4d3103c..89c6373b37730 100644 --- a/vllm/v1/worker/tpu_model_runner.py +++ b/vllm/v1/worker/tpu_model_runner.py @@ -413,10 +413,8 @@ class TPUModelRunner(LoRAModelRunnerMixin): req_state.num_computed_tokens = req_data.num_computed_tokens if not req_data.resumed_from_preemption: # Append the new blocks to the existing block IDs. - for block_ids, new_block_ids in zip( # type: ignore[call-overload] - req_state.block_ids, - req_data.new_block_ids, - strict=True): + for block_ids, new_block_ids in zip(req_state.block_ids, + req_data.new_block_ids): block_ids.extend(new_block_ids) else: # The request is resumed from preemption.