diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 31895cc0832a..c2c8533c88f4 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -287,7 +287,6 @@ class GPUModelRunner(LoRAModelRunnerMixin): dtype=torch.int32, device="cpu", pin_memory=self.pin_memory) - self.input_ids_np = self.input_ids_cpu.numpy() self.positions_cpu = torch.zeros(self.max_num_tokens, dtype=torch.int64, device="cpu", diff --git a/vllm/v1/worker/tpu_model_runner.py b/vllm/v1/worker/tpu_model_runner.py index 687dabee2290..b4daf5a34678 100644 --- a/vllm/v1/worker/tpu_model_runner.py +++ b/vllm/v1/worker/tpu_model_runner.py @@ -191,7 +191,6 @@ class TPUModelRunner(LoRAModelRunnerMixin): self.input_ids_cpu = torch.zeros(self.max_num_tokens, dtype=torch.int32, device="cpu") - self.input_ids_np = self.input_ids_cpu.numpy() self.positions_cpu = torch.zeros(self.max_num_tokens, dtype=torch.int32,