[Quantization] enable compressed-tensors marlin support for turing (#31000)

Signed-off-by: Jinzhen Lin <jinzhen.ljz@antgroup.com>
2025-12-24 21:55:38 +08:00 · 2025-12-19 12:29:48 +08:00 · 2025-12-19 12:29:48 +08:00 · de08b8f61b
commit de08b8f61b
parent 2ac85a4544
1 changed files with 1 additions and 1 deletions
--- a/vllm/model_executor/layers/quantization/kernels/mixed_precision/marlin.py
+++ b/vllm/model_executor/layers/quantization/kernels/mixed_precision/marlin.py
@ -30,7 +30,7 @@ from .MPLinearKernel import MPLinearKernel, MPLinearLayerConfig
 class MarlinLinearKernel(MPLinearKernel):
    @classmethod
    def get_min_capability(cls) -> int:
-        return 80
+        return 75

    @classmethod
    def can_implement(cls, c: MPLinearLayerConfig) -> tuple[bool, str | None]: