mirror of
https://git.datalinker.icu/deepseek-ai/DeepSeek-V3.git
synced 2025-12-09 21:04:36 +08:00
Merge 21a919d79f47829c1c9b11d7e067af4499fdf063 into 9b4e9788e4a3a731f7567338ed15d3ec549ce03b
This commit is contained in:
commit
70ddd25f08
@ -9,6 +9,7 @@ from transformers import AutoTokenizer
|
|||||||
from safetensors.torch import load_model
|
from safetensors.torch import load_model
|
||||||
|
|
||||||
from model import Transformer, ModelArgs
|
from model import Transformer, ModelArgs
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
def sample(logits, temperature: float = 1.0):
|
def sample(logits, temperature: float = 1.0):
|
||||||
@ -67,6 +68,24 @@ def generate(
|
|||||||
tokens[:, cur_pos] = next_token
|
tokens[:, cur_pos] = next_token
|
||||||
finished |= torch.logical_and(~prompt_mask[:, cur_pos], next_token == eos_id)
|
finished |= torch.logical_and(~prompt_mask[:, cur_pos], next_token == eos_id)
|
||||||
prev_pos = cur_pos
|
prev_pos = cur_pos
|
||||||
|
# === FIX for DeepSeek-V3 Issue #1008 ===
|
||||||
|
# Detect infinite repeating loops like "A5A5A5A5..." or same token repetition
|
||||||
|
|
||||||
|
# Convert current generated tokens to string for pattern check
|
||||||
|
decoded_output = "".join([str(t) for t in tokens[0, :cur_pos].tolist()])
|
||||||
|
|
||||||
|
# (a) Detect if "A5" repeats 10 or more times continuously
|
||||||
|
if re.search(r"(A5){10,}", decoded_output):
|
||||||
|
print("[Warning] Detected excessive 'A5' repetition — stopping generation early.")
|
||||||
|
break
|
||||||
|
|
||||||
|
# (b) Detect same token repeated multiple times (generic loop check)
|
||||||
|
if cur_pos > 10:
|
||||||
|
last_10 = tokens[0, cur_pos-10:cur_pos].tolist()
|
||||||
|
if len(set(last_10)) == 1:
|
||||||
|
print("[Warning] Detected same token repetition — stopping generation early.")
|
||||||
|
break
|
||||||
|
# === END FIX ===
|
||||||
if finished.all():
|
if finished.all():
|
||||||
break
|
break
|
||||||
completion_tokens = []
|
completion_tokens = []
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user