mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 21:45:40 +08:00
silu-v1: Fix EPS not being used during max-reduction (#25069)
Signed-off-by: elvircrn <elvircrn@gmail.com>
This commit is contained in:
parent
4f02b77de4
commit
abdfcd4f3d
@ -365,7 +365,6 @@ __global__ void silu_mul_fp8_quant_deep_gemm_kernel(
|
|||||||
int32_t compute_pipeline_offset_64 = 0;
|
int32_t compute_pipeline_offset_64 = 0;
|
||||||
|
|
||||||
for (int32_t t = n_tokens_lower; t < n_tokens_upper; ++t) {
|
for (int32_t t = n_tokens_lower; t < n_tokens_upper; ++t) {
|
||||||
__nv_bfloat16 y_max_bf16 = EPS;
|
|
||||||
__nv_bfloat162 results_bf162[2];
|
__nv_bfloat162 results_bf162[2];
|
||||||
|
|
||||||
cp_async_wait<NUM_STAGES - 2>();
|
cp_async_wait<NUM_STAGES - 2>();
|
||||||
@ -405,7 +404,7 @@ __global__ void silu_mul_fp8_quant_deep_gemm_kernel(
|
|||||||
auto _y_max2 =
|
auto _y_max2 =
|
||||||
__hmax2(__habs2(results_bf162[0]), __habs2(results_bf162[1]));
|
__hmax2(__habs2(results_bf162[0]), __habs2(results_bf162[1]));
|
||||||
|
|
||||||
y_max_bf16 = __hmax(_y_max2.x, _y_max2.y);
|
__nv_bfloat16 y_max_bf16 = __hmax(EPS, __hmax(_y_max2.x, _y_max2.y));
|
||||||
|
|
||||||
// An entire group is assigned to a single warp, so a simple warp reduce
|
// An entire group is assigned to a single warp, so a simple warp reduce
|
||||||
// is used.
|
// is used.
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user