diff --git a/vllm/model_executor/layers/fused_moe/config.py b/vllm/model_executor/layers/fused_moe/config.py index 742df3dbdc6af..b14bc06e913cf 100644 --- a/vllm/model_executor/layers/fused_moe/config.py +++ b/vllm/model_executor/layers/fused_moe/config.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from dataclasses import dataclass -from typing import TYPE_CHECKING, Optional, Union +from typing import Optional, Union import torch @@ -14,7 +14,7 @@ from vllm.model_executor.layers.quantization.utils.quant_utils import ( from vllm.utils import cdiv, has_triton_kernels from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe -if TYPE_CHECKING and has_triton_kernels: +if has_triton_kernels(): from triton_kernels.matmul_ogs import PrecisionConfig logger = init_logger(__name__) diff --git a/vllm/model_executor/layers/quantization/mxfp4.py b/vllm/model_executor/layers/quantization/mxfp4.py index 28c1e60ccd08a..5c3f8a891276b 100644 --- a/vllm/model_executor/layers/quantization/mxfp4.py +++ b/vllm/model_executor/layers/quantization/mxfp4.py @@ -638,8 +638,8 @@ class Mxfp4MoEMethod(FusedMoEMethodBase): return None if self.mxfp4_backend == Mxfp4Backend.TRITON: - w1_scale = layer.w13_precision_config - w2_scale = layer.w2_precision_config + w1_scale = self.w13_precision_config + w2_scale = self.w2_precision_config else: w1_scale = layer.w13_weight_scale w2_scale = layer.w2_weight_scale