mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 04:05:01 +08:00
[CI] Revert back prepare_prompts and check_answers (#25087)
Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
parent
8b32464ac1
commit
eb68c2dcd9
@ -8,8 +8,7 @@ import pytest
|
|||||||
from vllm.platforms import current_platform
|
from vllm.platforms import current_platform
|
||||||
|
|
||||||
from ..conftest import HfRunner, VllmRunner
|
from ..conftest import HfRunner, VllmRunner
|
||||||
from ..core.block.e2e.test_correctness_sliding_window import prep_prompts
|
from ..utils import multi_gpu_test, prep_prompts
|
||||||
from ..utils import multi_gpu_test
|
|
||||||
from .utils import check_logprobs_close
|
from .utils import check_logprobs_close
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -8,6 +8,7 @@ import functools
|
|||||||
import importlib
|
import importlib
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
|
import random
|
||||||
import signal
|
import signal
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
@ -1150,3 +1151,49 @@ def override_cutlass_fp8_supported(value: bool):
|
|||||||
"vllm.model_executor.layers.quantization.utils.w8a8_utils.cutlass_fp8_supported",
|
"vllm.model_executor.layers.quantization.utils.w8a8_utils.cutlass_fp8_supported",
|
||||||
return_value=value):
|
return_value=value):
|
||||||
yield
|
yield
|
||||||
|
|
||||||
|
|
||||||
|
def prep_prompts(batch_size: int, ln_range: tuple[int, int] = (800, 1100)):
|
||||||
|
"""
|
||||||
|
Generate prompts which a bunch of assignments,
|
||||||
|
then asking for the value of one of them.
|
||||||
|
The prompt is just under 10k tokens; sliding window is 4k
|
||||||
|
so the answer is outside sliding window, but should still be correct.
|
||||||
|
Args:
|
||||||
|
batch_size: number of prompts to generate
|
||||||
|
ln_range: an argument to control the length of the prompt
|
||||||
|
"""
|
||||||
|
prompts: list[str] = []
|
||||||
|
answer: list[int] = []
|
||||||
|
indices: list[int] = []
|
||||||
|
random.seed(1)
|
||||||
|
for _ in range(batch_size):
|
||||||
|
idx = random.randint(30, 90)
|
||||||
|
indices.append(idx)
|
||||||
|
prompt = "```python\n# We set a number of variables, " + \
|
||||||
|
f"x{idx} will be important later\n"
|
||||||
|
ln = random.randint(*ln_range)
|
||||||
|
for k in range(30, ln):
|
||||||
|
v = random.randint(10, 99)
|
||||||
|
if k == idx:
|
||||||
|
answer.append(v)
|
||||||
|
prompt += f"x{k} = {v}\n"
|
||||||
|
prompt += f"# Now, we check the value of x{idx}:\n"
|
||||||
|
prompt += f"assert x{idx} == "
|
||||||
|
prompts.append(prompt)
|
||||||
|
return prompts, answer, indices
|
||||||
|
|
||||||
|
|
||||||
|
def check_answers(indices: list[int],
|
||||||
|
answer: list[int],
|
||||||
|
outputs: list[str],
|
||||||
|
accept_rate: float = 0.7):
|
||||||
|
answer2 = [int(text[0:2].strip()) for text in outputs]
|
||||||
|
print(list(zip(indices, zip(answer, answer2))))
|
||||||
|
numok = 0
|
||||||
|
for a1, a2 in zip(answer, answer2):
|
||||||
|
if a1 == a2:
|
||||||
|
numok += 1
|
||||||
|
frac_ok = numok / len(answer)
|
||||||
|
print(f"Num OK: {numok}/{len(answer)} {frac_ok}")
|
||||||
|
assert frac_ok >= accept_rate
|
||||||
|
|||||||
@ -6,8 +6,7 @@ import pytest
|
|||||||
|
|
||||||
from vllm import LLM, SamplingParams
|
from vllm import LLM, SamplingParams
|
||||||
|
|
||||||
from ...core.block.e2e.test_correctness_sliding_window import (check_answers,
|
from ...utils import check_answers, prep_prompts
|
||||||
prep_prompts)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user