diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index b1e4d04717768..978224faae65e 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -5074,7 +5074,6 @@ class GPUModelRunner( logitsprocs_need_output_token_ids=self.input_batch.logitsprocs_need_output_token_ids, is_pooling_model=self.is_pooling_model, num_speculative_tokens=self.num_spec_tokens, - # sink_len=self.sink_len, ) def _allocate_kv_cache_tensors( diff --git a/vllm/v1/worker/kv_connector_model_runner_mixin.py b/vllm/v1/worker/kv_connector_model_runner_mixin.py index f266a1386e10d..2bcc87b63bcdf 100644 --- a/vllm/v1/worker/kv_connector_model_runner_mixin.py +++ b/vllm/v1/worker/kv_connector_model_runner_mixin.py @@ -200,7 +200,7 @@ class KVConnectorModelRunnerMixin: try: kv_cache_stride_order = attn_backend.get_kv_cache_stride_order( - include_num_layers_dimension=True, + include_num_layers_dimension=True ) except (AttributeError, NotImplementedError): return False @@ -270,7 +270,7 @@ class KVConnectorModelRunnerMixin: try: kv_cache_stride_order = attn_backend.get_kv_cache_stride_order( - include_num_layers_dimension=True, + include_num_layers_dimension=True ) assert len(kv_cache_stride_order) == len(kv_cache_shape) except (AttributeError, NotImplementedError):