fix logging msg for block manager (#3701)

2026-01-04 06:59:37 +08:00 · 2024-03-28 16:29:55 -07:00 · 2024-03-28 16:29:55 -07:00 · 4716a32dd4
commit 4716a32dd4
parent c0935c96d3
3 changed files with 5 additions and 4 deletions
--- a/vllm/attention/selector.py
+++ b/vllm/attention/selector.py
@ -41,6 +41,8 @@ def _can_use_flash_attn(dtype: torch.dtype) -> bool:
    try:
        import flash_attn  # noqa: F401
    except ImportError:
-        logger.info("flash_attn is not found.")
+        logger.info(
+            "Cannot use FlashAttention because the package is not found. "
+            "Please install it for better performance.")
        return False
    return True
--- a/vllm/core/block_manager_v1.py
+++ b/vllm/core/block_manager_v1.py
@ -230,13 +230,12 @@ class BlockSpaceManagerV1(BlockSpaceManager):
        self.watermark_blocks = int(watermark * num_gpu_blocks)

        if self.enable_caching:
-            logger.info("enable automatic prefix caching")
+            logger.info("Automatic prefix caching is enabled.")
            self.gpu_allocator = CachedBlockAllocator(Device.GPU, block_size,
                                                      num_gpu_blocks)
            self.cpu_allocator = CachedBlockAllocator(Device.CPU, block_size,
                                                      num_cpu_blocks)
        else:
-            logger.info("disable automatic prefix caching")
            self.gpu_allocator = UncachedBlockAllocator(
                Device.GPU, block_size, num_gpu_blocks)
            self.cpu_allocator = UncachedBlockAllocator(
--- a/vllm/model_executor/parallel_utils/pynccl_utils.py
+++ b/vllm/model_executor/parallel_utils/pynccl_utils.py
@ -10,7 +10,6 @@ logger = logging.getLogger(__name__)
 try:
    from vllm.model_executor.parallel_utils.pynccl import (NCCLCommunicator,
                                                           ncclGetVersion)
-    logger.info(f"vLLM is using nccl=={ncclGetVersion()}")
 except Exception as e:
    # in non-NVIDIA environments, we can't import the nccl module
    # e.g. when running on machines with AMD GPUs
@ -40,6 +39,7 @@ def init_process_group(world_size: int, local_rank: int, rank: int,
                       init_method: str) -> None:
    assert not is_initialized()
    global comm
+    logger.info(f"vLLM is using nccl=={ncclGetVersion()}")
    comm = NCCLCommunicator(init_method=init_method,
                            world_size=world_size,
                            local_rank=local_rank,