mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-28 08:05:16 +08:00
[Model Runner V2] Fix Triton warning on tl.where (#30355)
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
parent
0b6a8a304c
commit
9e6562a3f6
@ -62,6 +62,7 @@ def _penalties_and_temperature_kernel(
|
||||
mask=packed_block < tl.cdiv(vocab_size, 32),
|
||||
)
|
||||
prompt_bin_mask = (packed_mask[:, None] >> (tl.arange(0, 32)[None, :])) & 1
|
||||
prompt_bin_mask = prompt_bin_mask.to(tl.int1)
|
||||
prompt_bin_mask = prompt_bin_mask.reshape(BLOCK_SIZE)
|
||||
|
||||
# If token appears in prompt or output, apply, otherwise use 1.0 for no-op.
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user