diff --git a/vllm/sequence.py b/vllm/sequence.py index 4f0adac743c36..942cf9ecec80e 100644 --- a/vllm/sequence.py +++ b/vllm/sequence.py @@ -35,6 +35,9 @@ class SequenceStatus(enum.Enum): elif status == SequenceStatus.FINISHED_ABORTED: finish_reason = "abort" elif status == SequenceStatus.FINISHED_IGNORED: + # The ignored sequences are the sequences whose prompt lengths + # are longer than the model's length cap. Therefore, the stop + # reason should also be "length" as in OpenAI API. finish_reason = "length" else: finish_reason = None