[Bugfix] Fix boolean conversion for OpenVINO env variable (#13615)

2026-03-18 11:17:18 +08:00 · 2025-02-22 17:04:12 +01:00 · 2025-02-22 17:04:12 +01:00 · 382f66fb08
commit 382f66fb08
parent 8354f6640c
2 changed files with 5 additions and 3 deletions
--- a/vllm/envs.py
+++ b/vllm/envs.py
@ -360,8 +360,9 @@ environment_variables: Dict[str, Callable[[], Any]] = {
    # Enables weights compression during model export via HF Optimum
    # default is False
    "VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS":
-    lambda: bool(os.getenv("VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS", False)),
-
+    lambda:
+    (os.environ.get("VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS", "0").lower() in
+     ("on", "true", "1")),
    # If the env var is set, then all workers will execute as separate
    # processes from the engine, and we use the same mechanism to trigger
    # execution on all workers.
--- a/vllm/model_executor/model_loader/openvino.py
+++ b/vllm/model_executor/model_loader/openvino.py
@ -125,7 +125,8 @@ class OpenVINOCausalLM(nn.Module):
                "as-is, all possible options that may affect model conversion "
                "are ignored.")

-        load_in_8bit = envs.VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS
+        load_in_8bit = (envs.VLLM_OPENVINO_ENABLE_QUANTIZED_WEIGHTS
+                        if export else False)
        pt_model = OVModelForCausalLM.from_pretrained(
            model_config.model,
            export=export,