[Bugfix] GPT OSS Attritbute error on H100 (#25228)

Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com> Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
2026-07-01 17:27:15 +08:00 · 2025-09-19 15:14:09 -04:00 · 2025-09-19 15:14:09 -04:00 · 7852b82b93
commit 7852b82b93
parent a2a5f79e09
2 changed files with 4 additions and 4 deletions
--- a/vllm/model_executor/layers/fused_moe/config.py
+++ b/vllm/model_executor/layers/fused_moe/config.py
@ -1,7 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 from dataclasses import dataclass
-from typing import TYPE_CHECKING, Optional, Union
+from typing import Optional, Union

 import torch

@ -14,7 +14,7 @@ from vllm.model_executor.layers.quantization.utils.quant_utils import (
 from vllm.utils import cdiv, has_triton_kernels
 from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe

-if TYPE_CHECKING and has_triton_kernels:
+if has_triton_kernels():
    from triton_kernels.matmul_ogs import PrecisionConfig

 logger = init_logger(__name__)
--- a/vllm/model_executor/layers/quantization/mxfp4.py
+++ b/vllm/model_executor/layers/quantization/mxfp4.py
@ -638,8 +638,8 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
            return None

        if self.mxfp4_backend == Mxfp4Backend.TRITON:
-            w1_scale = layer.w13_precision_config
-            w2_scale = layer.w2_precision_config
+            w1_scale = self.w13_precision_config
+            w2_scale = self.w2_precision_config
        else:
            w1_scale = layer.w13_weight_scale
            w2_scale = layer.w2_weight_scale