mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-06-11 04:22:14 +08:00
use abosolute path
Signed-off-by: yewentao256 <zhyanwentao@126.com>
This commit is contained in:
parent
e92676ef4e
commit
afe23a2990
@ -1,6 +1,6 @@
|
||||
#include <ATen/cuda/CUDAContext.h>
|
||||
|
||||
#include "../per_token_group_quant_8bit.h"
|
||||
#include "quantization/w8a8/per_token_group_quant_8bit.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
@ -8,9 +8,9 @@
|
||||
|
||||
#include <torch/all.h>
|
||||
|
||||
#include "../../vectorization.cuh"
|
||||
#include "../../vectorization_utils.cuh"
|
||||
#include "../../../dispatch_utils.h"
|
||||
#include "quantization/vectorization.cuh"
|
||||
#include "quantization/vectorization_utils.cuh"
|
||||
#include "dispatch_utils.h"
|
||||
|
||||
__device__ __forceinline__ float GroupReduceMax(float val, const int tid) {
|
||||
unsigned mask = 0xffff;
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#include <ATen/cuda/CUDAContext.h>
|
||||
#include <torch/all.h>
|
||||
|
||||
#include "../per_token_group_quant_8bit.h"
|
||||
#include "quantization/w8a8/per_token_group_quant_8bit.h"
|
||||
|
||||
void per_token_group_quant_int8(const torch::Tensor& input,
|
||||
torch::Tensor& output_q,
|
||||
|
||||
@ -3,8 +3,8 @@
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "../../../dispatch_utils.h"
|
||||
#include "../../vectorization_utils.cuh"
|
||||
#include "dispatch_utils.h"
|
||||
#include "quantization/vectorization_utils.cuh"
|
||||
|
||||
#ifndef USE_ROCM
|
||||
#include <cub/cub.cuh>
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user