[Bugfix][V1] Fix FlashInfer V1 backend using the wrong VllmConfig (#18086)

This commit is contained in:
Michael Goin 2025-05-13 23:36:17 -04:00 committed by GitHub
parent 9a2a6357de
commit 12e6c0b41c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -14,8 +14,7 @@ import vllm.envs as envs
from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl,
AttentionType)
from vllm.attention.layer import Attention
from vllm.config import (VllmConfig, get_current_vllm_config,
get_layers_from_vllm_config)
from vllm.config import VllmConfig, get_layers_from_vllm_config
from vllm.logger import init_logger
from vllm.v1.attention.backends.flash_attn import use_cascade_attention
from vllm.v1.attention.backends.utils import CommonAttentionMetadata
@ -215,7 +214,7 @@ class FlashInferMetadataBuilder:
# Global hyperparameters shared by all attention layers
self.global_hyperparameters: Optional[PerLayerParameters] = None
self.vllm_config = get_current_vllm_config()
self.vllm_config = runner.vllm_config
self.kv_cache_spec = kv_cache_spec
self.block_table = block_table