mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 04:15:01 +08:00
15 lines
543 B
C
15 lines
543 B
C
#pragma once
|
|
|
|
#include <torch/all.h>
|
|
|
|
void topk_softmax(torch::Tensor& topk_weights, torch::Tensor& topk_indices,
|
|
torch::Tensor& token_expert_indices,
|
|
torch::Tensor& gating_output);
|
|
|
|
void moe_sum(torch::Tensor& input, torch::Tensor& output);
|
|
|
|
void moe_align_block_size(torch::Tensor topk_ids, int64_t num_experts,
|
|
int64_t block_size, torch::Tensor sorted_token_ids,
|
|
torch::Tensor experts_ids,
|
|
torch::Tensor num_tokens_post_pad);
|