diff --git a/vllm/distributed/kv_transfer/kv_connector/utils.py b/vllm/distributed/kv_transfer/kv_connector/utils.py index 493235d724f4..5cbc8ca31752 100644 --- a/vllm/distributed/kv_transfer/kv_connector/utils.py +++ b/vllm/distributed/kv_transfer/kv_connector/utils.py @@ -97,10 +97,10 @@ def get_kv_connector_cache_layout(): # used for faster transfer. vllm_config = get_current_vllm_config() kv_config = vllm_config.kv_transfer_config - if vllm_config.model_config is None or kv_config is None: + if kv_config is not None and vllm_config.model_config is None: logger.warning_once("Unable to detect current VLLM config. " \ "Defaulting to NHD kv cache layout.") - else: + elif kv_config is not None: use_mla = vllm_config.model_config.use_mla if not use_mla and kv_config.kv_connector == "NixlConnector": logger.info_once("NixlConnector detected. Setting KV cache " \ diff --git a/vllm/v1/attention/backends/utils.py b/vllm/v1/attention/backends/utils.py index 8083f2002602..b0ebb00d9e6b 100644 --- a/vllm/v1/attention/backends/utils.py +++ b/vllm/v1/attention/backends/utils.py @@ -138,7 +138,7 @@ def get_kv_cache_layout(): if cache_layout is None: cache_layout = get_kv_connector_cache_layout() else: - logger.info_once("`FLASHINFER_KV_CACHE_LAYOUT` environment variable " \ + logger.info_once("`VLLM_KV_CACHE_LAYOUT` environment variable " \ "detected. Setting KV cache layout to %s.", cache_layout) return cache_layout