From 762a4a6ca9020601220daf9ea11d32493b345442 Mon Sep 17 00:00:00 2001
From: Tsukasa OI
Date: Sat, 29 Nov 2025 11:32:08 +0900
Subject: [PATCH] [Frontend] Perform offline path replacement to `tokenizer`
(#29706)
Signed-off-by: Tsukasa OI
---
.../offline_mode/test_offline_mode.py | 10 ++++++++
vllm/engine/arg_utils.py | 23 ++++++++++++++-----
2 files changed, 27 insertions(+), 6 deletions(-)
diff --git a/tests/entrypoints/offline_mode/test_offline_mode.py b/tests/entrypoints/offline_mode/test_offline_mode.py
index 25e663f3af0e..539ff89abe9c 100644
--- a/tests/entrypoints/offline_mode/test_offline_mode.py
+++ b/tests/entrypoints/offline_mode/test_offline_mode.py
@@ -23,6 +23,16 @@ MODEL_CONFIGS = [
"max_num_seqs": 64,
"tensor_parallel_size": 1,
},
+ {
+ "model": "Qwen/Qwen3-0.6B",
+ "enforce_eager": True,
+ "gpu_memory_utilization": 0.50,
+ "max_model_len": 64,
+ "max_num_batched_tokens": 64,
+ "max_num_seqs": 64,
+ "tensor_parallel_size": 1,
+ "tokenizer": "Qwen/Qwen3-4B",
+ },
{
"model": "mistralai/Mistral-7B-Instruct-v0.1",
"enforce_eager": True,
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 31825980f3a1..186a2a414187 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -581,15 +581,26 @@ class EngineArgs:
from vllm.plugins import load_general_plugins
load_general_plugins()
- # when use hf offline,replace model id to local model path
+ # when use hf offline,replace model and tokenizer id to local model path
if huggingface_hub.constants.HF_HUB_OFFLINE:
model_id = self.model
self.model = get_model_path(self.model, self.revision)
- logger.info(
- "HF_HUB_OFFLINE is True, replace model_id [%s] to model_path [%s]",
- model_id,
- self.model,
- )
+ if model_id is not self.model:
+ logger.info(
+ "HF_HUB_OFFLINE is True, replace model_id [%s] to model_path [%s]",
+ model_id,
+ self.model,
+ )
+ if self.tokenizer is not None:
+ tokenizer_id = self.tokenizer
+ self.tokenizer = get_model_path(self.tokenizer, self.tokenizer_revision)
+ if tokenizer_id is not self.tokenizer:
+ logger.info(
+ "HF_HUB_OFFLINE is True, replace tokenizer_id [%s] "
+ "to tokenizer_path [%s]",
+ tokenizer_id,
+ self.tokenizer,
+ )
@staticmethod
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: