mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-13 11:05:51 +08:00
[Test] Add non-MoE DP test coverage (#28235)
Signed-off-by: Matthew Bonanni <mbonanni@redhat.com>
This commit is contained in:
parent
da855b42d2
commit
ca90f50304
@ -20,13 +20,6 @@ from vllm.v1.metrics.stats import IterationStats, MultiModalCacheStats, Schedule
|
|||||||
|
|
||||||
DP_SIZE = int(os.getenv("DP_SIZE", 2))
|
DP_SIZE = int(os.getenv("DP_SIZE", 2))
|
||||||
|
|
||||||
engine_args = AsyncEngineArgs(
|
|
||||||
model="ibm-research/PowerMoE-3b",
|
|
||||||
enforce_eager=True,
|
|
||||||
tensor_parallel_size=int(os.getenv("TP_SIZE", 1)),
|
|
||||||
data_parallel_size=DP_SIZE,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
async def generate(
|
async def generate(
|
||||||
engine: AsyncLLM,
|
engine: AsyncLLM,
|
||||||
@ -65,6 +58,13 @@ async def generate(
|
|||||||
return count, request_id
|
return count, request_id
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"model",
|
||||||
|
[
|
||||||
|
"ibm-research/PowerMoE-3b",
|
||||||
|
"hmellor/tiny-random-LlamaForCausalLM",
|
||||||
|
],
|
||||||
|
)
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
"output_kind",
|
"output_kind",
|
||||||
[
|
[
|
||||||
@ -76,7 +76,10 @@ async def generate(
|
|||||||
@pytest.mark.parametrize("async_scheduling", [True, False])
|
@pytest.mark.parametrize("async_scheduling", [True, False])
|
||||||
@pytest.mark.asyncio
|
@pytest.mark.asyncio
|
||||||
async def test_load(
|
async def test_load(
|
||||||
output_kind: RequestOutputKind, data_parallel_backend: str, async_scheduling: bool
|
model: str,
|
||||||
|
output_kind: RequestOutputKind,
|
||||||
|
data_parallel_backend: str,
|
||||||
|
async_scheduling: bool,
|
||||||
):
|
):
|
||||||
if async_scheduling and data_parallel_backend == "ray":
|
if async_scheduling and data_parallel_backend == "ray":
|
||||||
# TODO(NickLucche) Re-enable when async scheduling is supported
|
# TODO(NickLucche) Re-enable when async scheduling is supported
|
||||||
@ -107,8 +110,14 @@ async def test_load(
|
|||||||
with ExitStack() as after:
|
with ExitStack() as after:
|
||||||
prompt = "This is a test of data parallel"
|
prompt = "This is a test of data parallel"
|
||||||
|
|
||||||
engine_args.data_parallel_backend = data_parallel_backend
|
engine_args = AsyncEngineArgs(
|
||||||
engine_args.async_scheduling = async_scheduling
|
model=model,
|
||||||
|
enforce_eager=True,
|
||||||
|
tensor_parallel_size=int(os.getenv("TP_SIZE", 1)),
|
||||||
|
data_parallel_size=DP_SIZE,
|
||||||
|
data_parallel_backend=data_parallel_backend,
|
||||||
|
async_scheduling=async_scheduling,
|
||||||
|
)
|
||||||
engine = AsyncLLM.from_engine_args(
|
engine = AsyncLLM.from_engine_args(
|
||||||
engine_args, stat_loggers=[SimpleStatsLogger]
|
engine_args, stat_loggers=[SimpleStatsLogger]
|
||||||
)
|
)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user