diff --git a/tests/plugins_tests/test_scheduler_plugins.py b/tests/plugins_tests/test_scheduler_plugins.py index 98981a81e909c..f4bbfb50594fd 100644 --- a/tests/plugins_tests/test_scheduler_plugins.py +++ b/tests/plugins_tests/test_scheduler_plugins.py @@ -6,7 +6,7 @@ from vllm.core.scheduler import Scheduler from vllm.engine.arg_utils import EngineArgs from vllm.engine.llm_engine import LLMEngine from vllm.sampling_params import SamplingParams -from vllm.v1.core.scheduler import Scheduler as V1Scheduler +from vllm.v1.core.sched.scheduler import Scheduler as V1Scheduler from vllm.v1.engine.llm_engine import LLMEngine as V1LLMEngine diff --git a/tests/v1/core/test_scheduler.py b/tests/v1/core/test_scheduler.py index 9413373390fe2..8916aa580000a 100644 --- a/tests/v1/core/test_scheduler.py +++ b/tests/v1/core/test_scheduler.py @@ -6,7 +6,8 @@ import pytest from vllm.config import CacheConfig, ModelConfig, SchedulerConfig, VllmConfig from vllm.multimodal.inputs import MultiModalKwargs, PlaceholderRange from vllm.sampling_params import SamplingParams -from vllm.v1.core.scheduler import Scheduler, SchedulerOutput +from vllm.v1.core.sched.output import SchedulerOutput +from vllm.v1.core.sched.scheduler import Scheduler from vllm.v1.outputs import ModelRunnerOutput from vllm.v1.request import Request, RequestStatus from vllm.v1.structured_output import StructuredOutputManager diff --git a/tests/v1/worker/test_gpu_model_runner.py b/tests/v1/worker/test_gpu_model_runner.py index f1575b1b0fc7d..dd95a7f53064e 100644 --- a/tests/v1/worker/test_gpu_model_runner.py +++ b/tests/v1/worker/test_gpu_model_runner.py @@ -3,8 +3,8 @@ import pytest from vllm.config import CacheConfig, ModelConfig, SchedulerConfig, VllmConfig from vllm.sampling_params import SamplingParams -from vllm.v1.core.scheduler.output import (CachedRequestData, NewRequestData, - SchedulerOutput) +from vllm.v1.core.sched.output import (CachedRequestData, NewRequestData, + SchedulerOutput) from vllm.v1.sample.metadata import SamplingMetadata from vllm.v1.worker.gpu_model_runner import GPUModelRunner diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index cef3a3f78b0b9..e26ff57e7d680 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1440,7 +1440,7 @@ class EngineArgs: # V1 should use the new scheduler by default. # Swap it only if this arg is set to the original V0 default if self.scheduler_cls == EngineArgs.scheduler_cls: - self.scheduler_cls = "vllm.v1.core.scheduler.Scheduler" + self.scheduler_cls = "vllm.v1.core.sched.scheduler.Scheduler" # When no user override, set the default values based on the usage # context. diff --git a/vllm/executor/ray_utils.py b/vllm/executor/ray_utils.py index c1bf2fb316d9b..a7042ca8df17c 100644 --- a/vllm/executor/ray_utils.py +++ b/vllm/executor/ray_utils.py @@ -17,7 +17,7 @@ from vllm.utils import get_ip from vllm.worker.worker_base import WorkerWrapperBase if TYPE_CHECKING: - from vllm.v1.core.scheduler import SchedulerOutput + from vllm.v1.core.sched.output import SchedulerOutput from vllm.v1.outputs import ModelRunnerOutput logger = init_logger(__name__) diff --git a/vllm/v1/attention/backends/flash_attn.py b/vllm/v1/attention/backends/flash_attn.py index 7c62c9a153fb3..9a5f39c3975dc 100755 --- a/vllm/v1/attention/backends/flash_attn.py +++ b/vllm/v1/attention/backends/flash_attn.py @@ -16,7 +16,7 @@ from vllm.platforms import current_platform from vllm.utils import cdiv if TYPE_CHECKING: - from vllm.v1.core.scheduler.output import SchedulerOutput + from vllm.v1.core.sched.output import SchedulerOutput from vllm.v1.worker.gpu_input_batch import InputBatch from vllm.v1.worker.gpu_model_runner import GPUModelRunner diff --git a/vllm/v1/attention/backends/mla/common.py b/vllm/v1/attention/backends/mla/common.py index d7a08ad4b058b..a69d82a67d8cd 100644 --- a/vllm/v1/attention/backends/mla/common.py +++ b/vllm/v1/attention/backends/mla/common.py @@ -233,7 +233,7 @@ except ImportError: from flash_attn import flash_attn_varlen_func if TYPE_CHECKING: - from vllm.v1.core.scheduler.output import SchedulerOutput + from vllm.v1.core.sched.output import SchedulerOutput from vllm.v1.worker.gpu_input_batch import InputBatch from vllm.v1.worker.gpu_model_runner import GPUModelRunner diff --git a/vllm/v1/core/scheduler/__init__.py b/vllm/v1/core/sched/__init__.py similarity index 100% rename from vllm/v1/core/scheduler/__init__.py rename to vllm/v1/core/sched/__init__.py diff --git a/vllm/v1/core/scheduler/output.py b/vllm/v1/core/sched/output.py similarity index 100% rename from vllm/v1/core/scheduler/output.py rename to vllm/v1/core/sched/output.py diff --git a/vllm/v1/core/scheduler.py b/vllm/v1/core/sched/scheduler.py similarity index 99% rename from vllm/v1/core/scheduler.py rename to vllm/v1/core/sched/scheduler.py index ac5dc4e92627c..d129c65f10131 100644 --- a/vllm/v1/core/scheduler.py +++ b/vllm/v1/core/sched/scheduler.py @@ -13,8 +13,8 @@ from vllm.logger import init_logger from vllm.v1.core.encoder_cache_manager import (EncoderCacheManager, compute_encoder_budget) from vllm.v1.core.kv_cache_manager import KVCacheManager -from vllm.v1.core.scheduler.output import (CachedRequestData, NewRequestData, - SchedulerOutput) +from vllm.v1.core.sched.output import (CachedRequestData, NewRequestData, + SchedulerOutput) from vllm.v1.engine import (EngineCoreEvent, EngineCoreEventType, EngineCoreOutput, EngineCoreOutputs) from vllm.v1.metrics.stats import SchedulerStats diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index 662edce410f18..333e30bebd930 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -22,7 +22,7 @@ from vllm.transformers_utils.config import ( from vllm.utils import (get_exception_traceback, resolve_obj_by_qualname, zmq_socket_ctx) from vllm.v1.core.kv_cache_utils import get_kv_cache_configs -from vllm.v1.core.scheduler.output import SchedulerOutput +from vllm.v1.core.sched.output import SchedulerOutput from vllm.v1.engine import (EngineCoreOutputs, EngineCoreRequest, EngineCoreRequestType, UtilityOutput) from vllm.v1.engine.mm_input_cache import MMInputCacheServer diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index dfdfe9a76a8bb..a1005bbc0e7fe 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -44,7 +44,7 @@ from vllm.v1.worker.lora_model_runner_mixin import LoRAModelRunnerMixin if TYPE_CHECKING: import xgrammar as xgr - from vllm.v1.core.scheduler.output import SchedulerOutput + from vllm.v1.core.sched.output import SchedulerOutput else: xgr = LazyLoader("xgr", globals(), "xgrammar") diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py index e69a8c6453fbc..706e8317c13f9 100644 --- a/vllm/v1/worker/gpu_worker.py +++ b/vllm/v1/worker/gpu_worker.py @@ -28,7 +28,7 @@ from vllm.v1.worker.worker_base import WorkerBase logger = init_logger(__name__) if TYPE_CHECKING: - from vllm.v1.core.scheduler.output import SchedulerOutput + from vllm.v1.core.sched.output import SchedulerOutput class Worker(WorkerBase): diff --git a/vllm/v1/worker/tpu_model_runner.py b/vllm/v1/worker/tpu_model_runner.py index bba016271fa07..33a8cbe0e89b4 100644 --- a/vllm/v1/worker/tpu_model_runner.py +++ b/vllm/v1/worker/tpu_model_runner.py @@ -35,7 +35,7 @@ from vllm.v1.utils import bind_kv_cache from vllm.v1.worker.gpu_input_batch import CachedRequestState, InputBatch if TYPE_CHECKING: - from vllm.v1.core.scheduler.output import SchedulerOutput + from vllm.v1.core.sched.output import SchedulerOutput logger = init_logger(__name__) diff --git a/vllm/v1/worker/tpu_worker.py b/vllm/v1/worker/tpu_worker.py index df00eedf0d943..dbb231950d08d 100644 --- a/vllm/v1/worker/tpu_worker.py +++ b/vllm/v1/worker/tpu_worker.py @@ -17,7 +17,7 @@ from vllm.distributed import (ensure_model_parallel_initialized, from vllm.logger import init_logger from vllm.model_executor import set_random_seed from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE -from vllm.v1.core.scheduler.output import SchedulerOutput +from vllm.v1.core.sched.output import SchedulerOutput from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig, KVCacheSpec) from vllm.v1.outputs import ModelRunnerOutput