mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-13 09:25:30 +08:00
Fix inadvertently silenced PP tests for mp, add DeepSeek V2/V3 model family to PP tests (#20831)
Signed-off-by: Seiji Eicher <seiji@anyscale.com>
This commit is contained in:
parent
d31a647124
commit
d0dc4cfca4
@ -14,8 +14,9 @@ from typing import Literal, NamedTuple, Optional
|
|||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from vllm.config import TaskOption
|
from vllm.config import _FLOAT16_NOT_SUPPORTED_MODELS, TaskOption
|
||||||
from vllm.logger import init_logger
|
from vllm.logger import init_logger
|
||||||
|
from vllm.transformers_utils.config import get_config
|
||||||
|
|
||||||
from ..models.registry import HF_EXAMPLE_MODELS
|
from ..models.registry import HF_EXAMPLE_MODELS
|
||||||
from ..utils import compare_two_settings, create_new_process_for_each_test
|
from ..utils import compare_two_settings, create_new_process_for_each_test
|
||||||
@ -158,7 +159,7 @@ TEXT_GENERATION_MODELS = {
|
|||||||
"databricks/dbrx-instruct": PPTestSettings.fast(load_format="dummy"),
|
"databricks/dbrx-instruct": PPTestSettings.fast(load_format="dummy"),
|
||||||
"Deci/DeciLM-7B-instruct": PPTestSettings.fast(),
|
"Deci/DeciLM-7B-instruct": PPTestSettings.fast(),
|
||||||
"deepseek-ai/deepseek-llm-7b-chat": PPTestSettings.fast(),
|
"deepseek-ai/deepseek-llm-7b-chat": PPTestSettings.fast(),
|
||||||
"deepseek-ai/DeepSeek-V2-Lite-Chat": PPTestSettings.fast(),
|
"deepseek-ai/DeepSeek-V2-Lite-Chat": PPTestSettings.fast(tp_base=2),
|
||||||
"LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct": PPTestSettings.fast(),
|
"LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct": PPTestSettings.fast(),
|
||||||
"tiiuae/falcon-7b": PPTestSettings.fast(),
|
"tiiuae/falcon-7b": PPTestSettings.fast(),
|
||||||
"google/gemma-1.1-2b-it": PPTestSettings.fast(),
|
"google/gemma-1.1-2b-it": PPTestSettings.fast(),
|
||||||
@ -210,9 +211,11 @@ TEXT_GENERATION_MODELS = {
|
|||||||
|
|
||||||
EMBEDDING_MODELS = { # type: ignore[var-annotated]
|
EMBEDDING_MODELS = { # type: ignore[var-annotated]
|
||||||
# [Text-only]
|
# [Text-only]
|
||||||
"intfloat/e5-mistral-7b-instruct": PPTestSettings.fast(),
|
"intfloat/e5-mistral-7b-instruct": PPTestSettings.fast(task="embed"),
|
||||||
"BAAI/bge-multilingual-gemma2": PPTestSettings.fast(),
|
"BAAI/bge-multilingual-gemma2": PPTestSettings.fast(task="embed"),
|
||||||
"Qwen/Qwen2.5-Math-RM-72B": PPTestSettings.fast(load_format="dummy"),
|
"Qwen/Qwen2.5-Math-RM-72B": PPTestSettings.fast(
|
||||||
|
load_format="dummy", task="embed"
|
||||||
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
MULTIMODAL_MODELS = {
|
MULTIMODAL_MODELS = {
|
||||||
@ -248,6 +251,7 @@ TEST_MODELS = [
|
|||||||
"meta-llama/Llama-3.2-1B-Instruct",
|
"meta-llama/Llama-3.2-1B-Instruct",
|
||||||
"ArthurZ/Ilama-3.2-1B",
|
"ArthurZ/Ilama-3.2-1B",
|
||||||
"ibm/PowerLM-3b",
|
"ibm/PowerLM-3b",
|
||||||
|
"deepseek-ai/DeepSeek-V2-Lite-Chat",
|
||||||
# [LANGUAGE EMBEDDING]
|
# [LANGUAGE EMBEDDING]
|
||||||
"intfloat/e5-mistral-7b-instruct",
|
"intfloat/e5-mistral-7b-instruct",
|
||||||
"BAAI/bge-multilingual-gemma2",
|
"BAAI/bge-multilingual-gemma2",
|
||||||
@ -287,6 +291,11 @@ def _compare_tp(
|
|||||||
trust_remote_code = model_info.trust_remote_code
|
trust_remote_code = model_info.trust_remote_code
|
||||||
tokenizer_mode = model_info.tokenizer_mode
|
tokenizer_mode = model_info.tokenizer_mode
|
||||||
hf_overrides = model_info.hf_overrides
|
hf_overrides = model_info.hf_overrides
|
||||||
|
hf_config = get_config(model_id, trust_remote_code)
|
||||||
|
|
||||||
|
dtype = "float16"
|
||||||
|
if hf_config.model_type in _FLOAT16_NOT_SUPPORTED_MODELS:
|
||||||
|
dtype = "bfloat16"
|
||||||
|
|
||||||
if load_format == "dummy":
|
if load_format == "dummy":
|
||||||
# Avoid OOM
|
# Avoid OOM
|
||||||
@ -316,7 +325,7 @@ def _compare_tp(
|
|||||||
common_args = [
|
common_args = [
|
||||||
# use half precision for speed and memory savings in CI environment
|
# use half precision for speed and memory savings in CI environment
|
||||||
"--dtype",
|
"--dtype",
|
||||||
"float16",
|
dtype,
|
||||||
"--max-model-len",
|
"--max-model-len",
|
||||||
"2048",
|
"2048",
|
||||||
"--max-num-seqs",
|
"--max-num-seqs",
|
||||||
@ -338,6 +347,7 @@ def _compare_tp(
|
|||||||
common_args.extend(["--hf-overrides", json.dumps(hf_overrides)])
|
common_args.extend(["--hf-overrides", json.dumps(hf_overrides)])
|
||||||
|
|
||||||
specific_case = tp_size == 2 and pp_size == 2 and chunked_prefill
|
specific_case = tp_size == 2 and pp_size == 2 and chunked_prefill
|
||||||
|
testing_ray_compiled_graph = False
|
||||||
if distributed_backend == "ray" and (vllm_major_version == "1"
|
if distributed_backend == "ray" and (vllm_major_version == "1"
|
||||||
or specific_case):
|
or specific_case):
|
||||||
# For V1, test Ray Compiled Graph for all the tests
|
# For V1, test Ray Compiled Graph for all the tests
|
||||||
@ -351,6 +361,7 @@ def _compare_tp(
|
|||||||
# Temporary. Currently when zeromq + SPMD is used, it does not properly
|
# Temporary. Currently when zeromq + SPMD is used, it does not properly
|
||||||
# terminate because of a Ray Compiled Graph issue.
|
# terminate because of a Ray Compiled Graph issue.
|
||||||
common_args.append("--disable-frontend-multiprocessing")
|
common_args.append("--disable-frontend-multiprocessing")
|
||||||
|
testing_ray_compiled_graph = True
|
||||||
elif distributed_backend == "mp":
|
elif distributed_backend == "mp":
|
||||||
# Both V0/V1 of multiprocessing executor support PP
|
# Both V0/V1 of multiprocessing executor support PP
|
||||||
pp_env = {
|
pp_env = {
|
||||||
@ -394,7 +405,6 @@ def _compare_tp(
|
|||||||
tp_env,
|
tp_env,
|
||||||
method=method)
|
method=method)
|
||||||
except Exception:
|
except Exception:
|
||||||
testing_ray_compiled_graph = pp_env is not None
|
|
||||||
if testing_ray_compiled_graph and vllm_major_version == "0":
|
if testing_ray_compiled_graph and vllm_major_version == "0":
|
||||||
# Ray Compiled Graph tests are flaky for V0,
|
# Ray Compiled Graph tests are flaky for V0,
|
||||||
# so we don't want to fail the test
|
# so we don't want to fail the test
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user