mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-25 01:15:01 +08:00
[bug] Fix "Current vLLM config is not set." warnings when FlashInfer attention is used (#30241)
Signed-off-by: Po-Han Huang <pohanh@nvidia.com>
This commit is contained in:
parent
9f042ba26b
commit
eea41804a4
@ -269,6 +269,8 @@ def supports_trtllm_attention() -> bool:
|
||||
|
||||
def force_use_trtllm_attention() -> bool | None:
|
||||
"""
|
||||
This function should only be called during initialization stage when vllm config
|
||||
is set.
|
||||
Return `None` if --attention-config.use_trtllm_attention is not set,
|
||||
return `True` if TRTLLM attention is forced to be used,
|
||||
return `False` if TRTLLM attention is forced to be not used.
|
||||
@ -296,11 +298,12 @@ def use_trtllm_attention(
|
||||
kv_cache_dtype: str,
|
||||
q_dtype: torch.dtype,
|
||||
is_prefill: bool,
|
||||
# None means auto-detection, True means force on, False means force off
|
||||
force_use_trtllm: bool | None = None,
|
||||
has_sinks: bool = False,
|
||||
has_spec: bool = False,
|
||||
) -> bool:
|
||||
"""Return `True` if TRTLLM attention is used."""
|
||||
force_use_trtllm = force_use_trtllm_attention()
|
||||
|
||||
# CLI argument is set to 0 - respect it
|
||||
if force_use_trtllm is not None and not force_use_trtllm:
|
||||
|
||||
@ -429,6 +429,7 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
|
||||
super().__init__(kv_cache_spec, layer_names, vllm_config, device)
|
||||
self.cache_config = vllm_config.cache_config
|
||||
self.model_config = vllm_config.model_config
|
||||
self.attention_config = vllm_config.attention_config
|
||||
self._workspace_buffer = None
|
||||
self._prefill_wrapper: (
|
||||
BatchPrefillWithPagedKVCacheWrapper | BatchDCPPrefillWrapper | None
|
||||
@ -779,6 +780,7 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]):
|
||||
self.cache_dtype,
|
||||
self.q_data_type,
|
||||
is_prefill=True,
|
||||
force_use_trtllm=self.attention_config.use_trtllm_attention,
|
||||
has_sinks=self.has_sinks,
|
||||
has_spec=uses_spec_reorder,
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user