[Kernel] Remove if-else with identical branches in marlin 2:4 (#10687)

Signed-off-by: Tyler Michael Smith <tyler@neuralmagic.com>
2025-12-10 12:05:48 +08:00 · 2024-11-27 01:55:32 -05:00 · 2024-11-27 01:55:32 -05:00 · e2251109c7
commit e2251109c7
parent 15cc2a9f1a
1 changed files with 3 additions and 7 deletions
--- a/csrc/quantization/marlin/sparse/marlin_24_cuda_kernel.cu
+++ b/csrc/quantization/marlin/sparse/marlin_24_cuda_kernel.cu
@ -296,13 +296,9 @@ __global__ void Marlin_24(
  // We use a different scale layout for grouped and column-wise quantization as
  // we scale a `half2` tile in column-major layout in the former and in
  // row-major in the latter case.
-  if (group_blocks != -1) {
+  s_sh_rd = 8 * ((threadIdx.x / 32) % (thread_n_blocks / 4)) +
-    s_sh_rd = 8 * ((threadIdx.x / 32) % (thread_n_blocks / 4)) +
+            (threadIdx.x % 32) / 4;  // Note that in the original Marlin kernel
-              (threadIdx.x % 32) / 4;
+                                     // this is (threadIdx.x % 32) / 4
  } else {
    s_sh_rd = 8 * ((threadIdx.x / 32) % (thread_n_blocks / 4)) +
              (threadIdx.x % 32) / 4;
  }
  // Precompute which thread should not read memory in which iterations; this is
  // needed if there are more threads than required for a certain tilesize or