mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 12:05:48 +08:00
[Kernel] Remove if-else with identical branches in marlin 2:4 (#10687)
Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
This commit is contained in:
parent
15cc2a9f1a
commit
e2251109c7
@ -296,13 +296,9 @@ __global__ void Marlin_24(
|
|||||||
// We use a different scale layout for grouped and column-wise quantization as
|
// We use a different scale layout for grouped and column-wise quantization as
|
||||||
// we scale a `half2` tile in column-major layout in the former and in
|
// we scale a `half2` tile in column-major layout in the former and in
|
||||||
// row-major in the latter case.
|
// row-major in the latter case.
|
||||||
if (group_blocks != -1) {
|
s_sh_rd = 8 * ((threadIdx.x / 32) % (thread_n_blocks / 4)) +
|
||||||
s_sh_rd = 8 * ((threadIdx.x / 32) % (thread_n_blocks / 4)) +
|
(threadIdx.x % 32) / 4; // Note that in the original Marlin kernel
|
||||||
(threadIdx.x % 32) / 4;
|
// this is (threadIdx.x % 32) / 4
|
||||||
} else {
|
|
||||||
s_sh_rd = 8 * ((threadIdx.x / 32) % (thread_n_blocks / 4)) +
|
|
||||||
(threadIdx.x % 32) / 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Precompute which thread should not read memory in which iterations; this is
|
// Precompute which thread should not read memory in which iterations; this is
|
||||||
// needed if there are more threads than required for a certain tilesize or
|
// needed if there are more threads than required for a certain tilesize or
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user