From b6724e95f84dc7858d4ba232ca897f2936212326 Mon Sep 17 00:00:00 2001 From: Varun Sundar Rabindranath Date: Fri, 19 Sep 2025 15:14:09 -0400 Subject: [PATCH] [Bugfix] GPT OSS Attritbute error on H100 (#25228) Signed-off-by: Varun Sundar Rabindranath Co-authored-by: Varun Sundar Rabindranath Signed-off-by: yewentao256 --- vllm/model_executor/layers/fused_moe/config.py | 4 ++-- vllm/model_executor/layers/quantization/mxfp4.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/config.py b/vllm/model_executor/layers/fused_moe/config.py index 742df3dbdc6af..b14bc06e913cf 100644 --- a/vllm/model_executor/layers/fused_moe/config.py +++ b/vllm/model_executor/layers/fused_moe/config.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from dataclasses import dataclass -from typing import TYPE_CHECKING, Optional, Union +from typing import Optional, Union import torch @@ -14,7 +14,7 @@ from vllm.model_executor.layers.quantization.utils.quant_utils import ( from vllm.utils import cdiv, has_triton_kernels from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe -if TYPE_CHECKING and has_triton_kernels: +if has_triton_kernels(): from triton_kernels.matmul_ogs import PrecisionConfig logger = init_logger(__name__) diff --git a/vllm/model_executor/layers/quantization/mxfp4.py b/vllm/model_executor/layers/quantization/mxfp4.py index 28c1e60ccd08a..5c3f8a891276b 100644 --- a/vllm/model_executor/layers/quantization/mxfp4.py +++ b/vllm/model_executor/layers/quantization/mxfp4.py @@ -638,8 +638,8 @@ class Mxfp4MoEMethod(FusedMoEMethodBase): return None if self.mxfp4_backend == Mxfp4Backend.TRITON: - w1_scale = layer.w13_precision_config - w2_scale = layer.w2_precision_config + w1_scale = self.w13_precision_config + w2_scale = self.w2_precision_config else: w1_scale = layer.w13_weight_scale w2_scale = layer.w2_weight_scale