From f961d7f6ef1441b9e3674cd9df5ddaee984ce3ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zhengyuan=20Su=20=28=E8=8B=8F=E6=94=BF=E6=B8=8A=29?= Date: Tue, 22 Apr 2025 21:44:10 +0800 Subject: [PATCH] [BugFix] Pass in correct VLLM config in FlashInfer backend (#13207) (#16973) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 苏政渊 Co-authored-by: 苏政渊 --- vllm/attention/backends/flashinfer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/attention/backends/flashinfer.py b/vllm/attention/backends/flashinfer.py index 09717a1121d05..718b15e58785c 100644 --- a/vllm/attention/backends/flashinfer.py +++ b/vllm/attention/backends/flashinfer.py @@ -37,7 +37,7 @@ from vllm.attention.backends.utils import (PAD_SLOT_ID, compute_slot_mapping, is_block_tables_empty) from vllm.attention.layer import Attention from vllm.attention.ops.paged_attn import PagedAttention -from vllm.config import VllmConfig, get_current_vllm_config +from vllm.config import VllmConfig from vllm.logger import init_logger from vllm.utils import (async_tensor_h2d, get_kv_cache_torch_dtype, make_tensor_with_pad) @@ -187,7 +187,7 @@ class FlashInferState(AttentionState): # Global hyperparameters shared by all attention layers self.global_hyperparameters: Optional[PerLayerParameters] = None - self.vllm_config = get_current_vllm_config() + self.vllm_config = self.runner.vllm_config def _get_workspace_buffer(self): if self._workspace_buffer is None: @@ -613,7 +613,7 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]): # Global hyperparameters shared by all attention layers self.global_hyperparameters: Optional[PerLayerParameters] = None - self.vllm_config = get_current_vllm_config() + self.vllm_config = self.runner.vllm_config def prepare(self): self.slot_mapping: List[int] = []