[Quantization] enable compressed-tensors marlin support for turing (2) (#31008)

Signed-off-by: Jinzhen Lin <jinzhen.ljz@antgroup.com>
This commit is contained in:
Jinzhen Lin 2025-12-19 16:56:35 +08:00 committed by GitHub
parent ac1c934276
commit 9187de9fac
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 3 additions and 3 deletions

View File

@ -48,7 +48,7 @@ def query_marlin_supported_quant_types(
-1 if capability_tuple is None else capability_tuple.to_int()
)
if device_capability < 80:
if device_capability < 75:
return []
# - has_zp is True: return quant_types that has zero points

View File

@ -23,7 +23,7 @@ logger = init_logger(__name__)
def is_fp4_marlin_supported():
return current_platform.has_device_capability(80)
return current_platform.has_device_capability(75)
def nvfp4_marlin_process_scales(marlin_scales):

View File

@ -22,7 +22,7 @@ logger = init_logger(__name__)
def is_fp8_marlin_supported():
return current_platform.has_device_capability(80)
return current_platform.has_device_capability(75)
def fp8_fused_exponent_bias_into_scales(scales):