fix sonnet dataset sample when prefix len is very small (#16379)

Signed-off-by: Chenyaaang <chenyangli@google.com>
This commit is contained in:
Chenyaaang 2025-04-09 22:35:07 -07:00 committed by GitHub
parent baada0e737
commit 417bcefbae
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -472,7 +472,7 @@ class SonnetDataset(BenchmarkDataset):
# Determine how many poem lines to use.
num_input_lines = round((input_len - base_offset) / avg_len)
num_prefix_lines = round((prefix_len - base_offset) / avg_len)
num_prefix_lines = max(round((prefix_len - base_offset) / avg_len), 0)
prefix_lines = self.data[:num_prefix_lines]
samples = []