[V1][CI/Test] Do basic test for top-p & top-k sampling (#12469)

Signed-off-by: Woosuk Kwon <woosuk.kwon@berkeley.edu>
This commit is contained in:
Woosuk Kwon 2025-01-27 09:40:04 -08:00 committed by GitHub
parent 01ba927040
commit 3f1fc7425a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -144,7 +144,7 @@ def test_engine_core(monkeypatch):
def test_engine_core_advanced_sampling(monkeypatch):
"""
A basic end-to-end test to verify that the engine functions correctly
when additional sampling parameters, such as min_tokens and
when additional sampling parameters, such as top_p, min_tokens, and
presence_penalty, are set.
"""
with monkeypatch.context() as m:
@ -167,11 +167,23 @@ def test_engine_core_advanced_sampling(monkeypatch):
stop_token_ids=[1001, 1002],
)
engine_core.add_request(request)
assert len(engine_core.scheduler.waiting) == 1
assert len(engine_core.scheduler.running) == 0
# Loop through until they are all done.
while len(engine_core.step().outputs) > 0:
pass
assert len(engine_core.scheduler.waiting) == 0
assert len(engine_core.scheduler.running) == 0
def _check_engine_state():
assert len(engine_core.scheduler.waiting) == 1
assert len(engine_core.scheduler.running) == 0
# Loop through until they are all done.
while len(engine_core.step().outputs) > 0:
pass
assert len(engine_core.scheduler.waiting) == 0
assert len(engine_core.scheduler.running) == 0
_check_engine_state()
# Second request.
request2 = make_request()
request2.sampling_params = SamplingParams(
top_p=0.99,
top_k=50,
)
engine_core.add_request(request2)
_check_engine_state()