diff --git a/vllm/model_executor/input_metadata.py b/vllm/model_executor/input_metadata.py index ebba0ba0a261..01bba70ac10a 100644 --- a/vllm/model_executor/input_metadata.py +++ b/vllm/model_executor/input_metadata.py @@ -1,5 +1,5 @@ -from dataclasses import dataclass -from typing import Optional +from dataclasses import dataclass, fields +from typing import Optional, Any, Dict import torch @@ -31,3 +31,12 @@ class InputMetadata: def __post_init__(self): # will not appear in the __repr__ and __init__ self.attn_bias = None + + def asdict_zerocopy(self) -> Dict[str, Any]: + """Similar to dataclasses.asdict, but avoids deepcopying.""" + # Note that if we add dataclasses as fields, they will need + # similar handling. + return { + field.name: getattr(self, field.name) + for field in fields(self) + } diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index 1ef783da6d08..27213887ed26 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -1,5 +1,4 @@ import contextlib -import dataclasses import time from typing import Dict, List, Optional, Tuple, Set, Union @@ -527,7 +526,7 @@ class ModelRunner: "lora_requests": lora_requests, "lora_mapping": lora_mapping, } - metadata_dict.update(dataclasses.asdict(input_metadata)) + metadata_dict.update(input_metadata.asdict_zerocopy()) broadcast_tensor_dict(metadata_dict, src=0) else: metadata_dict = broadcast_tensor_dict(src=0)