mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 02:55:40 +08:00
10 lines
519 B
C
10 lines
519 B
C
#pragma once
|
|
#include <torch/all.h>
|
|
|
|
// TODO(wentao): refactor the folder to 8bit, then includes fp8 and int8 folders
|
|
// 8-bit per-token-group quantization helper used by both FP8 and INT8
|
|
void per_token_group_quant_8bit(const torch::Tensor& input,
|
|
torch::Tensor& output_q,
|
|
torch::Tensor& output_s, int64_t group_size,
|
|
double eps, double min_8bit, double max_8bit,
|
|
bool scale_ue8m0 = false); |