diff --git a/tests/v1/tpu/test_sampler.py b/tests/v1/tpu/test_sampler.py index 57c195982ca86..2bbeb3ddac91b 100644 --- a/tests/v1/tpu/test_sampler.py +++ b/tests/v1/tpu/test_sampler.py @@ -26,7 +26,7 @@ def test_sampler_different(model_name: str): enforce_eager=False, max_num_seqs=1, max_model_len=512, - max_num_batched_tokens=512) + max_num_batched_tokens=256) prompts = [ "Write a short story about a robot that dreams for the first time." ] diff --git a/vllm/v1/attention/backends/pallas.py b/vllm/v1/attention/backends/pallas.py index 79ec67b89e976..8187e457d9e61 100644 --- a/vllm/v1/attention/backends/pallas.py +++ b/vllm/v1/attention/backends/pallas.py @@ -95,7 +95,7 @@ class PallasMetadata: block_tables: torch.Tensor context_lens: torch.Tensor query_start_loc: torch.Tensor - num_seqs: int + num_seqs: torch.Tensor class PallasAttentionBackendImpl(AttentionImpl):