From 19108ef31191e217766ffe52e8e382ddbec20fdb Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Sun, 29 Jun 2025 20:34:54 -0700 Subject: [PATCH] [Misc] Fix import (#20233) Signed-off-by: Woosuk Kwon --- vllm/v1/worker/gpu_model_runner.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 3c9de57204051..290b9a44a80e2 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -45,7 +45,7 @@ from vllm.sequence import IntermediateTensors from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, DeviceMemoryProfiler, GiB_bytes, LazyLoader, async_tensor_h2d, cdiv, check_use_alibi, get_dtype_size, - is_pin_memory_available) + is_pin_memory_available, round_up) from vllm.v1.attention.backends.mamba_attn import Mamba2AttentionBackend from vllm.v1.attention.backends.utils import (AttentionMetadataBuilder, CommonAttentionMetadata) @@ -1308,7 +1308,6 @@ class GPUModelRunner(LoRAModelRunnerMixin): tp_size = self.vllm_config.parallel_config.tensor_parallel_size if self.compilation_config.pass_config. \ enable_sequence_parallelism and tp_size > 1: - from vllm.utils import round_up num_input_tokens = round_up(num_scheduled_tokens, tp_size) else: num_input_tokens = num_scheduled_tokens