From cf8eed7bef52293a9a6706a2953374b24dff2c81 Mon Sep 17 00:00:00 2001 From: Kevin McKay Date: Sun, 21 Dec 2025 23:14:58 -0600 Subject: [PATCH] [Bugfix][ROCm] Fix typo: is_linear_fp8_enaled -> is_linear_fp8_enabled (#31109) Signed-off-by: c0de128 Co-authored-by: Claude Sonnet 4.5 --- vllm/_aiter_ops.py | 2 +- .../compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py | 2 +- vllm/model_executor/layers/quantization/fp8.py | 2 +- vllm/model_executor/layers/quantization/input_quant_fp8.py | 2 +- vllm/platforms/rocm.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/vllm/_aiter_ops.py b/vllm/_aiter_ops.py index 0eae279acf5be..03e3bb7594910 100644 --- a/vllm/_aiter_ops.py +++ b/vllm/_aiter_ops.py @@ -761,7 +761,7 @@ class rocm_aiter_ops: @classmethod @if_aiter_supported - def is_linear_fp8_enaled(cls) -> bool: + def is_linear_fp8_enabled(cls) -> bool: return cls.is_linear_enabled() @classmethod diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py index ee99572f5f499..758a54c10605a 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py @@ -61,7 +61,7 @@ class CompressedTensorsW8A8Fp8(CompressedTensorsScheme): ) self.cutlass_block_fp8_supported = cutlass_block_fp8_supported() - self.use_aiter_and_is_supported = rocm_aiter_ops.is_linear_fp8_enaled() + self.use_aiter_and_is_supported = rocm_aiter_ops.is_linear_fp8_enabled() if self.weight_block_size is not None: assert not self.is_static_input_scheme diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 1f770d6d89a13..d19b20798ed06 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -414,7 +414,7 @@ class Fp8LinearMethod(LinearMethodBase): if vllm_is_batch_invariant(): self.use_marlin = False - self.use_aiter_and_is_supported = rocm_aiter_ops.is_linear_fp8_enaled() + self.use_aiter_and_is_supported = rocm_aiter_ops.is_linear_fp8_enabled() self.use_deep_gemm = is_deep_gemm_supported() self.weight_block_size = self.quant_config.weight_block_size diff --git a/vllm/model_executor/layers/quantization/input_quant_fp8.py b/vllm/model_executor/layers/quantization/input_quant_fp8.py index a5db086fb4729..7994c838ad548 100644 --- a/vllm/model_executor/layers/quantization/input_quant_fp8.py +++ b/vllm/model_executor/layers/quantization/input_quant_fp8.py @@ -51,7 +51,7 @@ class QuantFP8(CustomOp): self.column_major_scales = column_major_scales self.use_ue8m0 = use_ue8m0 - self.use_aiter = rocm_aiter_ops.is_linear_fp8_enaled() + self.use_aiter = rocm_aiter_ops.is_linear_fp8_enabled() self.is_group_quant = group_shape.is_per_group() if self.is_group_quant: diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index 5892639eba406..b95287906c1fe 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -408,7 +408,7 @@ class RocmPlatform(Platform): parallel_config = vllm_config.parallel_config is_eager_execution = compilation_config == CUDAGraphMode.NONE use_aiter_rms_norm = rocm_aiter_ops.is_rmsnorm_enabled() - use_aiter_fp8_linear = rocm_aiter_ops.is_linear_fp8_enaled() + use_aiter_fp8_linear = rocm_aiter_ops.is_linear_fp8_enabled() if compilation_config.cudagraph_mode.has_full_cudagraphs(): # decode context parallel does not support full cudagraphs