diff --git a/tests/spec_decode/test_memory_usage.py b/tests/spec_decode/test_memory_usage.py index 7a205f2ab180..16dffe6d7d69 100644 --- a/tests/spec_decode/test_memory_usage.py +++ b/tests/spec_decode/test_memory_usage.py @@ -42,12 +42,12 @@ we can ensure we go through the _no_spec codepath for most of our engine steps. def test_memory_usage_no_spec(): previous_memory_allocated = None - llm = vllm.LLM( - model=MAIN_MODEL, - speculative_model=SPEC_MODEL, - num_speculative_tokens=3, - speculative_disable_by_batch_size=SPEC_DISABLE_BATCH_SIZE, - ) + llm = vllm.LLM(model=MAIN_MODEL, + speculative_config={ + "model": SPEC_MODEL, + "num_speculative_tokens": 3, + "disable_by_batch_size": SPEC_DISABLE_BATCH_SIZE, + }) batch_sequences = set() engine = llm.llm_engine