fix sonnet dataset sample when prefix len is very small (#16379)

Signed-off-by: Chenyaaang <chenyangli@google.com>
2026-03-17 08:07:06 +08:00 · 2025-04-09 22:35:07 -07:00 · 2025-04-09 22:35:07 -07:00 · 417bcefbae
commit 417bcefbae
parent baada0e737
1 changed files with 1 additions and 1 deletions
--- a/benchmarks/benchmark_dataset.py
+++ b/benchmarks/benchmark_dataset.py
@ -472,7 +472,7 @@ class SonnetDataset(BenchmarkDataset):

        # Determine how many poem lines to use.
        num_input_lines = round((input_len - base_offset) / avg_len)
-        num_prefix_lines = round((prefix_len - base_offset) / avg_len)
+        num_prefix_lines = max(round((prefix_len - base_offset) / avg_len), 0)
        prefix_lines = self.data[:num_prefix_lines]

        samples = []