diff --git a/vllm/model_executor/model_loader/default_loader.py b/vllm/model_executor/model_loader/default_loader.py index deee2324960dd..5d24e72919693 100644 --- a/vllm/model_executor/model_loader/default_loader.py +++ b/vllm/model_executor/model_loader/default_loader.py @@ -241,9 +241,6 @@ class DefaultModelLoader(BaseModelLoader): self.load_config.pt_load_map_location, ) - if current_platform.is_tpu(): - pass - if self.counter_before_loading_weights == 0.0: self.counter_before_loading_weights = time.perf_counter() # Apply the prefix. diff --git a/vllm/v1/worker/tpu_worker.py b/vllm/v1/worker/tpu_worker.py index 085b119e12600..4c73d6c92d391 100644 --- a/vllm/v1/worker/tpu_worker.py +++ b/vllm/v1/worker/tpu_worker.py @@ -11,6 +11,7 @@ logger = init_logger(__name__) _R = TypeVar("_R") +# TODO(weiyulin) Remove this file after adding an official way to use hardware plugin if USE_TPU_INFERENCE: from tpu_inference.worker.tpu_worker import TPUWorker as TpuInferenceWorker