mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-24 03:14:28 +08:00
[Test] Add non-MoE DP test coverage (#28235)
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
This commit is contained in:
parent
da855b42d2
commit
ca90f50304
@ -20,13 +20,6 @@ from vllm.v1.metrics.stats import IterationStats, MultiModalCacheStats, Schedule
|
||||
|
||||
DP_SIZE = int(os.getenv("DP_SIZE", 2))
|
||||
|
||||
engine_args = AsyncEngineArgs(
|
||||
model="ibm-research/PowerMoE-3b",
|
||||
enforce_eager=True,
|
||||
tensor_parallel_size=int(os.getenv("TP_SIZE", 1)),
|
||||
data_parallel_size=DP_SIZE,
|
||||
)
|
||||
|
||||
|
||||
async def generate(
|
||||
engine: AsyncLLM,
|
||||
@ -65,6 +58,13 @@ async def generate(
|
||||
return count, request_id
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"model",
|
||||
[
|
||||
"ibm-research/PowerMoE-3b",
|
||||
"hmellor/tiny-random-LlamaForCausalLM",
|
||||
],
|
||||
)
|
||||
@pytest.mark.parametrize(
|
||||
"output_kind",
|
||||
[
|
||||
@ -76,7 +76,10 @@ async def generate(
|
||||
@pytest.mark.parametrize("async_scheduling", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
async def test_load(
|
||||
output_kind: RequestOutputKind, data_parallel_backend: str, async_scheduling: bool
|
||||
model: str,
|
||||
output_kind: RequestOutputKind,
|
||||
data_parallel_backend: str,
|
||||
async_scheduling: bool,
|
||||
):
|
||||
if async_scheduling and data_parallel_backend == "ray":
|
||||
# TODO(NickLucche) Re-enable when async scheduling is supported
|
||||
@ -107,8 +110,14 @@ async def test_load(
|
||||
with ExitStack() as after:
|
||||
prompt = "This is a test of data parallel"
|
||||
|
||||
engine_args.data_parallel_backend = data_parallel_backend
|
||||
engine_args.async_scheduling = async_scheduling
|
||||
engine_args = AsyncEngineArgs(
|
||||
model=model,
|
||||
enforce_eager=True,
|
||||
tensor_parallel_size=int(os.getenv("TP_SIZE", 1)),
|
||||
data_parallel_size=DP_SIZE,
|
||||
data_parallel_backend=data_parallel_backend,
|
||||
async_scheduling=async_scheduling,
|
||||
)
|
||||
engine = AsyncLLM.from_engine_args(
|
||||
engine_args, stat_loggers=[SimpleStatsLogger]
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user