mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 04:54:56 +08:00
[Log] Optimize Log for FP8MOE (#25709)
Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
a73f6491c8
commit
1244948885
@ -467,7 +467,8 @@ class Fp8MoEMethod(FusedMoEMethodBase):
|
|||||||
logger.info_once("DeepGemm disabled: FlashInfer MOE is"
|
logger.info_once("DeepGemm disabled: FlashInfer MOE is"
|
||||||
" enabled.")
|
" enabled.")
|
||||||
elif (is_deep_gemm_supported()):
|
elif (is_deep_gemm_supported()):
|
||||||
logger.info_once("Using DeepGemm kernels for Fp8MoEMethod.")
|
logger.debug_once(
|
||||||
|
"DeepGemm kernels available for Fp8MoEMethod.")
|
||||||
self.allow_deep_gemm = True
|
self.allow_deep_gemm = True
|
||||||
else:
|
else:
|
||||||
logger.warning_once(
|
logger.warning_once(
|
||||||
@ -481,9 +482,8 @@ class Fp8MoEMethod(FusedMoEMethodBase):
|
|||||||
elif (current_platform.is_cuda()
|
elif (current_platform.is_cuda()
|
||||||
and current_platform.is_device_capability(100)
|
and current_platform.is_device_capability(100)
|
||||||
and not self.flashinfer_moe_backend):
|
and not self.flashinfer_moe_backend):
|
||||||
logger.info_once(
|
logger.debug_once(
|
||||||
"Using CutlassBlockScaledGroupedGemm kernels for Fp8 MOE "
|
"CutlassBlockScaledGroupedGemm available for Fp8MoEMethod.")
|
||||||
"on SM100.")
|
|
||||||
self.allow_cutlass_block_scaled_grouped_gemm = True
|
self.allow_cutlass_block_scaled_grouped_gemm = True
|
||||||
|
|
||||||
def create_weights(self, layer: Module, num_experts: int, hidden_size: int,
|
def create_weights(self, layer: Module, num_experts: int, hidden_size: int,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user