[Bugfix] Skip generation config fallback for GGUF to prevent multi-process hang (#30209)

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-06-22 14:37:25 +08:00 · 2025-12-08 19:52:43 -06:00 · 2025-12-08 19:52:43 -06:00 · e41312a2f5
commit e41312a2f5
parent 7b35011ad1
1 changed files with 7 additions and 0 deletions
--- a/vllm/transformers_utils/config.py
+++ b/vllm/transformers_utils/config.py
@ -954,6 +954,13 @@ def try_get_generation_config(
    revision: str | None = None,
    config_format: str | ConfigFormat = "auto",
 ) -> GenerationConfig | None:
+    # GGUF files don't have generation_config.json - their config is embedded
+    # in the file header. Skip all filesystem lookups to avoid re-reading the
+    # memory-mapped file, which can hang in multi-process scenarios when the
+    # EngineCore process already has the file mapped.
+    if is_gguf(model):
+        return None
+
    try:
        return GenerationConfig.from_pretrained(
            model,