mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-15 02:54:27 +08:00
[Bugfix] GPT OSS Attritbute error on H100 (#25228)
Signed-off-by: Varun Sundar Rabindranath <vsundarr@redhat.com> Co-authored-by: Varun Sundar Rabindranath <vsundarr@redhat.com>
This commit is contained in:
parent
a2a5f79e09
commit
7852b82b93
@ -1,7 +1,7 @@
|
|||||||
# SPDX-License-Identifier: Apache-2.0
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import TYPE_CHECKING, Optional, Union
|
from typing import Optional, Union
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
@ -14,7 +14,7 @@ from vllm.model_executor.layers.quantization.utils.quant_utils import (
|
|||||||
from vllm.utils import cdiv, has_triton_kernels
|
from vllm.utils import cdiv, has_triton_kernels
|
||||||
from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe
|
from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe
|
||||||
|
|
||||||
if TYPE_CHECKING and has_triton_kernels:
|
if has_triton_kernels():
|
||||||
from triton_kernels.matmul_ogs import PrecisionConfig
|
from triton_kernels.matmul_ogs import PrecisionConfig
|
||||||
|
|
||||||
logger = init_logger(__name__)
|
logger = init_logger(__name__)
|
||||||
|
|||||||
@ -638,8 +638,8 @@ class Mxfp4MoEMethod(FusedMoEMethodBase):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
if self.mxfp4_backend == Mxfp4Backend.TRITON:
|
if self.mxfp4_backend == Mxfp4Backend.TRITON:
|
||||||
w1_scale = layer.w13_precision_config
|
w1_scale = self.w13_precision_config
|
||||||
w2_scale = layer.w2_precision_config
|
w2_scale = self.w2_precision_config
|
||||||
else:
|
else:
|
||||||
w1_scale = layer.w13_weight_scale
|
w1_scale = layer.w13_weight_scale
|
||||||
w2_scale = layer.w2_weight_scale
|
w2_scale = layer.w2_weight_scale
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user