mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-30 12:51:49 +08:00
[BugFix] lazy init _copy_stream to avoid torch init wrong gpu instance (#8403)
This commit is contained in:
parent
c6202daeed
commit
8a23e93302
@ -230,12 +230,15 @@ class MultiStepModelRunner(GPUModelRunnerBase[StatefulModelInput]):
|
||||
self._base_model_runner: GPUModelRunnerBase = base_model_runner
|
||||
|
||||
self.is_multi_step = self.scheduler_config.is_multi_step
|
||||
# used to copy tensors from GPU to CPU asynchronously
|
||||
self._copy_stream = torch.cuda.Stream()
|
||||
self.pinned_sampled_token_ids: Optional[torch.Tensor] = None
|
||||
|
||||
self.pythonization_cache = PythonizationCache()
|
||||
|
||||
@functools.cached_property
|
||||
def _copy_stream(self):
|
||||
# used to copy tensors from GPU to CPU asynchronously
|
||||
return torch.cuda.Stream()
|
||||
|
||||
def make_model_input_from_broadcasted_tensor_dict(
|
||||
self, tensor_dict: Dict[str, Any]) -> StatefulModelInput:
|
||||
model_input = (StatefulModelInput.from_broadcasted_tensor_dict(
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user