mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-17 08:14:27 +08:00
[Quantization] enable compressed-tensors marlin support for turing (2) (#31008)
Signed-off-by: Jinzhen Lin <jinzhen.ljz@antgroup.com>
This commit is contained in:
parent
ac1c934276
commit
9187de9fac
@ -48,7 +48,7 @@ def query_marlin_supported_quant_types(
|
||||
-1 if capability_tuple is None else capability_tuple.to_int()
|
||||
)
|
||||
|
||||
if device_capability < 80:
|
||||
if device_capability < 75:
|
||||
return []
|
||||
|
||||
# - has_zp is True: return quant_types that has zero points
|
||||
|
||||
@ -23,7 +23,7 @@ logger = init_logger(__name__)
|
||||
|
||||
|
||||
def is_fp4_marlin_supported():
|
||||
return current_platform.has_device_capability(80)
|
||||
return current_platform.has_device_capability(75)
|
||||
|
||||
|
||||
def nvfp4_marlin_process_scales(marlin_scales):
|
||||
|
||||
@ -22,7 +22,7 @@ logger = init_logger(__name__)
|
||||
|
||||
|
||||
def is_fp8_marlin_supported():
|
||||
return current_platform.has_device_capability(80)
|
||||
return current_platform.has_device_capability(75)
|
||||
|
||||
|
||||
def fp8_fused_exponent_bias_into_scales(scales):
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user