From e41312a2f5086cc9199e024c5d451e65a303a4d1 Mon Sep 17 00:00:00 2001 From: Christina Norman Date: Mon, 8 Dec 2025 19:52:43 -0600 Subject: [PATCH] [Bugfix] Skip generation config fallback for GGUF to prevent multi-process hang (#30209) Co-authored-by: Claude Opus 4.5 --- vllm/transformers_utils/config.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 773fc05a52ef..d761802da940 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -954,6 +954,13 @@ def try_get_generation_config( revision: str | None = None, config_format: str | ConfigFormat = "auto", ) -> GenerationConfig | None: + # GGUF files don't have generation_config.json - their config is embedded + # in the file header. Skip all filesystem lookups to avoid re-reading the + # memory-mapped file, which can hang in multi-process scenarios when the + # EngineCore process already has the file mapped. + if is_gguf(model): + return None + try: return GenerationConfig.from_pretrained( model,