mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-25 05:15:01 +08:00
Signed-off-by: Grace Ho <grho@nvidia.com> Signed-off-by: Grace Ho <146482179+gracehonv@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
63 lines
2.3 KiB
JSON
63 lines
2.3 KiB
JSON
{
|
|
"vllm": {
|
|
"llama": {
|
|
"fused_moe_kernel|GroupProblemShape|group_gemm_starts|bmm_|GemmUniversal": "moe_gemm",
|
|
"gemm|nvjet": "gemm",
|
|
"moe|sigmoid": "moe",
|
|
"CatArrayBatched|prepare_inputs": "prepare_next",
|
|
"ncclDevKernel|cross_device_reduce": "nccl_and_custom_ar",
|
|
"_norm_|Norm": "norm",
|
|
"act_and_mul_": "activation",
|
|
"Rotary": "rope",
|
|
"SoftMax": "softmax",
|
|
"flash|fmha": "attn",
|
|
"elementwise": "elementwise",
|
|
"fp8_quant|cvt_": "quantize",
|
|
"reduce_kernel": "reduce",
|
|
"triton": "triton_kernel",
|
|
"CUDA mem": "non-gpu-H_D_memops",
|
|
".*": "misc"
|
|
},
|
|
"ds": {
|
|
"block_fp8|gemm_fp8_blockwise": "block_fp8_gemm",
|
|
"fused_moe_kernel|_group_gemm|GroupProblemShape|GemmUniversal|bmm_": "moe_gemm",
|
|
"gemm|matmul|nvjet": "gemm",
|
|
"moe|sigmoid|expert": "moe",
|
|
"CatArrayBatched": "prepare_next",
|
|
"ncclDevKernel|cross_device_reduce": "nccl_and_custom_ar",
|
|
"Norm|_norm_": "norm",
|
|
"sbtopk": "topk",
|
|
"act_and_mul_": "activation",
|
|
"compute_position_kernel": "rope",
|
|
"elementwise": "elementwise",
|
|
"fp8_quant|quant_fp8|cvt_": "quantize",
|
|
"reduce": "reduce",
|
|
"SoftMax": "softmax",
|
|
"_fwd_|FlashAttn|_mla_|_attn_|fmha": "attn",
|
|
"triton": "triton_kernel",
|
|
"topk": "topk",
|
|
"CUDA mem": "non-gpu-H_D_memops",
|
|
".*": "misc"
|
|
},
|
|
"gpt-oss": {
|
|
"block_fp8|gemm_fp8_blockwise": "block_fp8_gemm",
|
|
"fused_moe_kernel|_group_gemm|GroupProblemShape|GemmUniversal|bmm_|matmul_ogs_|_topk_forward|_combined_routing|_sum_bitmatrix_rows|_compute_writeback_idx": "moe_gemm",
|
|
"gemm|matmul|nvjet": "gemm",
|
|
"moe|sigmoid|expert|splitKreduce": "moe",
|
|
"CatArrayBatched": "prepare_next",
|
|
"ncclDevKernel|cross_device_reduce": "nccl_and_custom_ar",
|
|
"Norm|_norm_": "norm",
|
|
"topk": "topk",
|
|
"act_and_mul_": "activation",
|
|
"compute_position_kernel": "rope",
|
|
"elementwise": "elementwise",
|
|
"fp8_quant|quant_fp8|cvt_|quantize": "quantize",
|
|
"reduce": "reduce",
|
|
"SoftMax": "softmax",
|
|
"_fwd_|FlashAttn|_mla_|_attn_|_flash_|flash::prepare_varlen|fmha": "attn",
|
|
"triton": "triton_kernel",
|
|
"CUDA mem": "non-gpu-H_D_memops",
|
|
".*": "misc"
|
|
}
|
|
}
|
|
} |