From 67532a1a6855e8262b3e1c9512c85e2fc934b3c0 Mon Sep 17 00:00:00 2001
From: Michael Goin <mgoin64@gmail.com>
Date: Tue, 16 Sep 2025 23:57:51 -0400
Subject: [PATCH] [UX] Remove "quantization is not fully optimized yet" log
 (#25012)

Signed-off-by: mgoin <mgoin64@gmail.com>
---
 vllm/config/__init__.py | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py
index 535802585d18b..5f30576099714 100644
--- a/vllm/config/__init__.py
+++ b/vllm/config/__init__.py
@@ -1086,22 +1086,6 @@ class ModelConfig:
 
     def _verify_quantization(self) -> None:
         supported_quantization = me_quant.QUANTIZATION_METHODS
-        optimized_quantization_methods = [
-            "fp8",
-            "modelopt",
-            "gptq_marlin_24",
-            "gptq_marlin",
-            "awq_marlin",
-            "fbgemm_fp8",
-            "compressed-tensors",
-            "experts_int8",
-            "quark",
-            "modelopt_fp4",
-            "bitblas",
-            "gptq_bitblas",
-            "inc",
-            "petit_nvfp4",
-        ]
         if self.quantization is not None:
             self.quantization = cast(me_quant.QuantizationMethods,
                                      self.quantization)
@@ -1183,11 +1167,6 @@ class ModelConfig:
                     f"be one of {supported_quantization}.")
             from vllm.platforms import current_platform
             current_platform.verify_quantization(self.quantization)
-            if self.quantization not in optimized_quantization_methods:
-                logger.warning(
-                    "%s quantization is not fully "
-                    "optimized yet. The speed can be slower than "
-                    "non-quantized models.", self.quantization)
 
     def _verify_cuda_graph(self) -> None:
         # The `max_seq_len_to_capture` was incorrectly