From 2b1e2111b025d5f832d62167afab8756deaec22d Mon Sep 17 00:00:00 2001
From: Lu Fang <30275821+houseroad@users.noreply.github.com>
Date: Wed, 11 Jun 2025 12:54:59 +0800
Subject: [PATCH] Fix test_max_model_len in
 tests/entrypoints/llm/test_generate.py (#19451)

Signed-off-by: Lu Fang <lufang@fb.com>
---
 tests/entrypoints/llm/test_generate.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/entrypoints/llm/test_generate.py b/tests/entrypoints/llm/test_generate.py
index 3c3281c34d569..707891f6bdd8d 100644
--- a/tests/entrypoints/llm/test_generate.py
+++ b/tests/entrypoints/llm/test_generate.py
@@ -125,4 +125,7 @@ def test_max_model_len():
     for output in outputs:
         num_total_tokens = len(output.prompt_token_ids) + len(
             output.outputs[0].token_ids)
-        assert num_total_tokens == max_model_len
+        # Total tokens must not exceed max_model_len.
+        # It can be less if generation finishes due to other reasons (e.g., EOS)
+        # before reaching the absolute model length limit.
+        assert num_total_tokens <= max_model_len