diff --git a/vllm/prefix.py b/vllm/prefix.py index 415da1fc6d2b..06b5b32a38fc 100644 --- a/vllm/prefix.py +++ b/vllm/prefix.py @@ -11,7 +11,6 @@ class Prefix: prefix caching in the future. Args: - prefix_id: The id of the prefix in the prefix pool. token_ids: The token ids of the prefix. block_size: The block size of the executed model. """ diff --git a/vllm/sequence.py b/vllm/sequence.py index fd10bc9b5b8c..ca647afce9f1 100644 --- a/vllm/sequence.py +++ b/vllm/sequence.py @@ -229,6 +229,7 @@ class SequenceGroup: seqs: The list of sequences. sampling_params: The sampling parameters used to generate the outputs. arrival_time: The arrival time of the request. + prefix: The prefix of the prompt of the sequence group. """ def __init__(