[Bugfix] GPT OSS Attritbute error on H100 (#25228)

Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
Varun Sundar Rabindranath 2025-09-19 15:14:09 -04:00 committed by yewentao256
parent 17b9f3a83d
commit b6724e95f8
2 changed files with 4 additions and 4 deletions

View File

@ -1,7 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from dataclasses import dataclass
from typing import TYPE_CHECKING, Optional, Union
from typing import Optional, Union
import torch
@ -14,7 +14,7 @@ from vllm.model_executor.layers.quantization.utils.quant_utils import (
from vllm.utils import cdiv, has_triton_kernels
from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe
if TYPE_CHECKING and has_triton_kernels:
if has_triton_kernels():
from triton_kernels.matmul_ogs import PrecisionConfig
logger = init_logger(__name__)

View File

@ -638,8 +638,8 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
return None
if self.mxfp4_backend == Mxfp4Backend.TRITON:
w1_scale = layer.w13_precision_config
w2_scale = layer.w2_precision_config
w1_scale = self.w13_precision_config
w2_scale = self.w2_precision_config
else:
w1_scale = layer.w13_weight_scale
w2_scale = layer.w2_weight_scale