mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-15 18:16:46 +08:00
Update test_flash_attn.py (#17102)
Signed-off-by: ShuaibinLi <lishuaibin@live.cn>
This commit is contained in:
parent
3642c59aa8
commit
9869453c42
@ -145,7 +145,7 @@ def test_flash_attn_with_paged_kv(
|
|||||||
v_descale = None
|
v_descale = None
|
||||||
if q_dtype is not None:
|
if q_dtype is not None:
|
||||||
# QKV are drawn from N(0, 1): no need for a fp8 scaling factor
|
# QKV are drawn from N(0, 1): no need for a fp8 scaling factor
|
||||||
maybe_quantized_query = query.to(q_dtype)
|
maybe_quantized_query = q.to(q_dtype)
|
||||||
maybe_quantized_key_cache = key_cache.to(q_dtype)
|
maybe_quantized_key_cache = key_cache.to(q_dtype)
|
||||||
maybe_quantized_value_cache = value_cache.to(q_dtype)
|
maybe_quantized_value_cache = value_cache.to(q_dtype)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user