mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 05:35:01 +08:00
[Bugfix] Skip generation config fallback for GGUF to prevent multi-process hang (#30209)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
7b35011ad1
commit
e41312a2f5
@ -954,6 +954,13 @@ def try_get_generation_config(
|
|||||||
revision: str | None = None,
|
revision: str | None = None,
|
||||||
config_format: str | ConfigFormat = "auto",
|
config_format: str | ConfigFormat = "auto",
|
||||||
) -> GenerationConfig | None:
|
) -> GenerationConfig | None:
|
||||||
|
# GGUF files don't have generation_config.json - their config is embedded
|
||||||
|
# in the file header. Skip all filesystem lookups to avoid re-reading the
|
||||||
|
# memory-mapped file, which can hang in multi-process scenarios when the
|
||||||
|
# EngineCore process already has the file mapped.
|
||||||
|
if is_gguf(model):
|
||||||
|
return None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
return GenerationConfig.from_pretrained(
|
return GenerationConfig.from_pretrained(
|
||||||
model,
|
model,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user