mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-07 07:12:16 +08:00
[Misc] Fix quantization-related typos (#31116)
Signed-off-by: c0de128 <kevin.mckay@outlook.com>
This commit is contained in:
parent
8c084de59d
commit
ec58c10ce1
@ -258,16 +258,16 @@ class Config:
|
|||||||
f"{self.fe_supported_types()}."
|
f"{self.fe_supported_types()}."
|
||||||
)
|
)
|
||||||
|
|
||||||
# Check block quanization support
|
# Check block quantization support
|
||||||
is_block_quatized = self.quant_block_shape is not None
|
is_block_quantized = self.quant_block_shape is not None
|
||||||
if is_block_quatized and self.quant_dtype is None:
|
if is_block_quantized and self.quant_dtype is None:
|
||||||
return False, "No block quantization support."
|
return False, "No block quantization support."
|
||||||
|
|
||||||
if is_block_quatized and not self.is_block_quant_supported():
|
if is_block_quantized and not self.is_block_quant_supported():
|
||||||
return False, "Mismatched block quantization support."
|
return False, "Mismatched block quantization support."
|
||||||
|
|
||||||
# deep_gemm only works with block-quantized
|
# deep_gemm only works with block-quantized
|
||||||
if self.needs_deep_gemm() and not is_block_quatized:
|
if self.needs_deep_gemm() and not is_block_quantized:
|
||||||
return False, "Needs DeepGEMM but not block quantized."
|
return False, "Needs DeepGEMM but not block quantized."
|
||||||
|
|
||||||
# Check dependencies (turn into asserts?)
|
# Check dependencies (turn into asserts?)
|
||||||
|
|||||||
@ -217,7 +217,7 @@ def test_scaled_fp8_quant(dtype) -> None:
|
|||||||
ref_y, inv_scale = ops.scaled_fp8_quant(x, None)
|
ref_y, inv_scale = ops.scaled_fp8_quant(x, None)
|
||||||
ref_y = per_tensor_dequantize(ref_y, inv_scale, dtype)
|
ref_y = per_tensor_dequantize(ref_y, inv_scale, dtype)
|
||||||
|
|
||||||
# Reference dynamic quantizaton
|
# Reference dynamic quantization
|
||||||
y = quantize_ref(x, inv_scale)
|
y = quantize_ref(x, inv_scale)
|
||||||
torch.testing.assert_close(ref_y, per_tensor_dequantize(y, inv_scale, dtype))
|
torch.testing.assert_close(ref_y, per_tensor_dequantize(y, inv_scale, dtype))
|
||||||
|
|
||||||
|
|||||||
@ -389,7 +389,7 @@ def should_use_deepgemm_for_fp8_linear(
|
|||||||
|
|
||||||
# Verify DeepGEMM N/K dims requirements
|
# Verify DeepGEMM N/K dims requirements
|
||||||
# NOTE: Also synchronized with test_w8a8_block_fp8_deep_gemm_matmul
|
# NOTE: Also synchronized with test_w8a8_block_fp8_deep_gemm_matmul
|
||||||
# test inside kernels/quatization/test_block_fp8.py
|
# test inside kernels/quantization/test_block_fp8.py
|
||||||
N_MULTIPLE = 64
|
N_MULTIPLE = 64
|
||||||
K_MULTIPLE = 128
|
K_MULTIPLE = 128
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user