Merge 348de41b5253318b4a578d7d972511aebb50f7dc into 254f6b986720c92ddf97fbb1a6a6465da8e87e29

This commit is contained in:
Kevin McKay 2025-12-25 00:07:06 +00:00 committed by GitHub
commit ed2509b7b0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1839,6 +1839,13 @@ __launch_bounds__(NUM_THREADS, 3) void paged_attention_ll4mi_QKV_mfma16_kernel(
reinterpret_cast<const _B16x16*>(q_fetch_ptr);
Qlocal[qkhe_depth] = *q_fetch_ptr_32B;
}
} else {
// Zero out Qlocal for lanes that don't load Q data to prevent
// uninitialized register values from contaminating wmma results
#pragma unroll
for (int qkhe_depth = 0; qkhe_depth < QKHELOOP / 2; qkhe_depth++) {
Qlocal[qkhe_depth] = {};
}
}
} else {
// fetch Q in shared across warps and then write to registers
@ -2608,6 +2615,13 @@ __launch_bounds__(NUM_THREADS, 3) void paged_attention_ll4mi_QKV_mfma16_kernel(
reinterpret_cast<const _B16x8*>(q_fetch_ptr);
Qlocal[qkhe_depth] = *q_fetch_ptr_16B;
}
} else {
// Zero out Qlocal for lanes that don't load Q data to prevent
// uninitialized register values from contaminating wmma results
#pragma unroll
for (int qkhe_depth = 0; qkhe_depth < QKHELOOP; qkhe_depth++) {
Qlocal[qkhe_depth] = {};
}
}
} else {
// fetch Q in shared across warps and then write to registers