diff --git a/vllm/v1/worker/tpu_model_runner.py b/vllm/v1/worker/tpu_model_runner.py index 6fd71259dbcb..828f09cbc8d8 100644 --- a/vllm/v1/worker/tpu_model_runner.py +++ b/vllm/v1/worker/tpu_model_runner.py @@ -2128,12 +2128,11 @@ def replace_set_lora(model): lora_a: torch.Tensor, lora_b: torch.Tensor, embeddings_tensor: torch.Tensor | None, - bias: torch.Tensor | None = None, ): # TODO: The integer index leads to a recompilation, but converting it # to a tensor doesn't seem to work anymore. This might be fixed with a # later release of torch_xla. - self._original_set_lora(index, lora_a, lora_b, embeddings_tensor, bias) + self._original_set_lora(index, lora_a, lora_b, embeddings_tensor) torch_xla.sync(wait=False) def _tpu_reset_lora(self, index: int):