From ed586e7724fdf91b391abcf6f3e473be641ff5d6 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Mon, 15 Dec 2025 21:45:36 +0800 Subject: [PATCH] [Refactor] [3/N] Move tool parser tests and run on CPU (#30693) Signed-off-by: DarkLight1337 --- .buildkite/test-amd.yaml | 20 +++++-------------- .buildkite/test-pipeline.yaml | 17 +++++----------- .buildkite/test_areas/misc.yaml | 4 +++- .buildkite/test_areas/tool_use.yaml | 12 +---------- tests/tool_parsers/__init__.py | 0 .../test_deepseekv31_tool_parser.py | 0 .../test_ernie45_moe_tool_parser.py | 0 .../test_glm4_moe_tool_parser.py | 2 -- .../test_jamba_tool_parser.py | 2 -- .../test_kimi_k2_tool_parser.py | 2 -- .../test_minimax_tool_parser.py | 2 -- .../test_mistral_tool_parser.py | 0 .../test_openai_tool_parser.py | 0 .../test_qwen3coder_tool_parser.py | 2 -- .../test_seed_oss_tool_parser.py | 2 -- .../test_xlam_tool_parser.py | 2 -- 16 files changed, 14 insertions(+), 53 deletions(-) create mode 100644 tests/tool_parsers/__init__.py rename tests/{tool_use => tool_parsers}/test_deepseekv31_tool_parser.py (100%) rename tests/{tool_use => tool_parsers}/test_ernie45_moe_tool_parser.py (100%) rename tests/{tool_use => tool_parsers}/test_glm4_moe_tool_parser.py (99%) rename tests/{tool_use => tool_parsers}/test_jamba_tool_parser.py (99%) rename tests/{tool_use => tool_parsers}/test_kimi_k2_tool_parser.py (99%) rename tests/{tool_use => tool_parsers}/test_minimax_tool_parser.py (99%) rename tests/{tool_use => tool_parsers}/test_mistral_tool_parser.py (100%) rename tests/{tool_use => tool_parsers}/test_openai_tool_parser.py (100%) rename tests/{tool_use => tool_parsers}/test_qwen3coder_tool_parser.py (99%) rename tests/{tool_use => tool_parsers}/test_seed_oss_tool_parser.py (99%) rename tests/{tool_use => tool_parsers}/test_xlam_tool_parser.py (99%) diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml index 0c2e4ed48dda6..3c9b8cbedcf06 100644 --- a/.buildkite/test-amd.yaml +++ b/.buildkite/test-amd.yaml @@ -61,8 +61,8 @@ steps: - pytest -v -s -m 'not cpu_test' multimodal - pytest -v -s utils_ -- label: Async Engine, Inputs, Utils, Worker, Config Test (CPU) # 15min - timeout_in_minutes: 20 +- label: Async Engine, Inputs, Utils, Worker, Config Test (CPU) # 20min + timeout_in_minutes: 30 mirror_hardwares: [amdexperimental, amdproduction, amdtentative] agent_pool: mi325_1 grade: Blocking @@ -73,6 +73,7 @@ steps: - tests/multimodal - tests/standalone_tests/lazy_imports.py - tests/tokenizers_ + - tests/tool_parsers - tests/transformers_utils - tests/config no_gpu: true @@ -82,6 +83,7 @@ steps: - pytest -v -s test_outputs.py - pytest -v -s -m 'cpu_test' multimodal - pytest -v -s tokenizers_ + - pytest -v -s tool_parsers - pytest -v -s transformers_utils - pytest -v -s config @@ -759,19 +761,7 @@ steps: - vllm/ - tests/tool_use commands: - - pytest -v -s -m 'not cpu_test' tool_use - -- label: OpenAI-Compatible Tool Use (CPU) # 5 mins - mirror_hardwares: [amdexperimental, amdproduction] - agent_pool: mi325_1 - # grade: Blocking - timeout_in_minutes: 10 - source_file_dependencies: - - vllm/ - - tests/tool_use - no_gpu: true - commands: - - pytest -v -s -m 'cpu_test' tool_use + - pytest -v -s tool_use ##### models test ##### diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 5fcf945f3e5a6..2dcca5711b3d5 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -57,8 +57,8 @@ steps: - pytest -v -s -m 'not cpu_test' multimodal - pytest -v -s utils_ -- label: Async Engine, Inputs, Utils, Worker, Config Test (CPU) # 15min - timeout_in_minutes: 20 +- label: Async Engine, Inputs, Utils, Worker, Config Test (CPU) # 20min + timeout_in_minutes: 30 source_file_dependencies: - vllm/ - tests/test_inputs.py @@ -66,6 +66,7 @@ steps: - tests/multimodal - tests/standalone_tests/lazy_imports.py - tests/tokenizers_ + - tests/tool_parsers - tests/transformers_utils - tests/config no_gpu: true @@ -75,6 +76,7 @@ steps: - pytest -v -s test_outputs.py - pytest -v -s -m 'cpu_test' multimodal - pytest -v -s tokenizers_ + - pytest -v -s tool_parsers - pytest -v -s transformers_utils - pytest -v -s config @@ -672,16 +674,7 @@ steps: - vllm/ - tests/tool_use commands: - - pytest -v -s -m 'not cpu_test' tool_use - -- label: OpenAI-Compatible Tool Use (CPU) # 5 mins - timeout_in_minutes: 10 - source_file_dependencies: - - vllm/ - - tests/tool_use - no_gpu: true - commands: - - pytest -v -s -m 'cpu_test' tool_use + - pytest -v -s tool_use ##### models test ##### diff --git a/.buildkite/test_areas/misc.yaml b/.buildkite/test_areas/misc.yaml index 072bccadb726a..252af1e56a105 100644 --- a/.buildkite/test_areas/misc.yaml +++ b/.buildkite/test_areas/misc.yaml @@ -115,7 +115,7 @@ steps: - label: Async Engine, Inputs, Utils, Worker, Config (CPU) depends_on: ~ - timeout_in_minutes: 20 + timeout_in_minutes: 30 source_file_dependencies: - vllm/ - tests/test_inputs.py @@ -123,6 +123,7 @@ steps: - tests/multimodal - tests/standalone_tests/lazy_imports.py - tests/tokenizers_ + - tests/tool_parsers - tests/transformers_utils - tests/config no_gpu: true @@ -132,6 +133,7 @@ steps: - pytest -v -s test_outputs.py - pytest -v -s -m 'cpu_test' multimodal - pytest -v -s tokenizers_ + - pytest -v -s tool_parsers - pytest -v -s transformers_utils - pytest -v -s config diff --git a/.buildkite/test_areas/tool_use.yaml b/.buildkite/test_areas/tool_use.yaml index 7040cd1d253b3..69527a1214229 100644 --- a/.buildkite/test_areas/tool_use.yaml +++ b/.buildkite/test_areas/tool_use.yaml @@ -10,14 +10,4 @@ steps: - vllm/ - tests/tool_use commands: - - pytest -v -s -m 'not cpu_test' tool_use - -- label: OpenAI-Compatible Tool Use (CPU) - depends_on: ~ - timeout_in_minutes: 10 - source_file_dependencies: - - vllm/ - - tests/tool_use - no_gpu: true - commands: - - pytest -v -s -m 'cpu_test' tool_use + - pytest -v -s tool_use diff --git a/tests/tool_parsers/__init__.py b/tests/tool_parsers/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/tests/tool_use/test_deepseekv31_tool_parser.py b/tests/tool_parsers/test_deepseekv31_tool_parser.py similarity index 100% rename from tests/tool_use/test_deepseekv31_tool_parser.py rename to tests/tool_parsers/test_deepseekv31_tool_parser.py diff --git a/tests/tool_use/test_ernie45_moe_tool_parser.py b/tests/tool_parsers/test_ernie45_moe_tool_parser.py similarity index 100% rename from tests/tool_use/test_ernie45_moe_tool_parser.py rename to tests/tool_parsers/test_ernie45_moe_tool_parser.py diff --git a/tests/tool_use/test_glm4_moe_tool_parser.py b/tests/tool_parsers/test_glm4_moe_tool_parser.py similarity index 99% rename from tests/tool_use/test_glm4_moe_tool_parser.py rename to tests/tool_parsers/test_glm4_moe_tool_parser.py index 749b0eef4ec85..52f5a9198e9b4 100644 --- a/tests/tool_use/test_glm4_moe_tool_parser.py +++ b/tests/tool_parsers/test_glm4_moe_tool_parser.py @@ -12,8 +12,6 @@ from vllm.tool_parsers.glm4_moe_tool_parser import ( Glm4MoeModelToolParser, ) -pytestmark = pytest.mark.cpu_test - pytest.skip("skip glm4_moe parser test", allow_module_level=True) # Use a common model that is likely to be available MODEL = "zai-org/GLM-4.5" diff --git a/tests/tool_use/test_jamba_tool_parser.py b/tests/tool_parsers/test_jamba_tool_parser.py similarity index 99% rename from tests/tool_use/test_jamba_tool_parser.py rename to tests/tool_parsers/test_jamba_tool_parser.py index 70e8253708592..ccad16ae2f6b6 100644 --- a/tests/tool_use/test_jamba_tool_parser.py +++ b/tests/tool_parsers/test_jamba_tool_parser.py @@ -13,8 +13,6 @@ from vllm.tokenizers import TokenizerLike, get_tokenizer from vllm.tokenizers.detokenizer_utils import detokenize_incrementally from vllm.tool_parsers.jamba_tool_parser import JambaToolParser -pytestmark = pytest.mark.cpu_test - MODEL = "ai21labs/Jamba-tiny-dev" diff --git a/tests/tool_use/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py similarity index 99% rename from tests/tool_use/test_kimi_k2_tool_parser.py rename to tests/tool_parsers/test_kimi_k2_tool_parser.py index c014d29fa9079..d02f53c34b455 100644 --- a/tests/tool_use/test_kimi_k2_tool_parser.py +++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py @@ -10,8 +10,6 @@ from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall from vllm.tokenizers import get_tokenizer from vllm.tool_parsers.kimi_k2_tool_parser import KimiK2ToolParser -pytestmark = pytest.mark.cpu_test - # Use a common model that is likely to be available MODEL = "moonshotai/Kimi-K2-Instruct" diff --git a/tests/tool_use/test_minimax_tool_parser.py b/tests/tool_parsers/test_minimax_tool_parser.py similarity index 99% rename from tests/tool_use/test_minimax_tool_parser.py rename to tests/tool_parsers/test_minimax_tool_parser.py index a931ce4679d18..28cfc4ea7a175 100644 --- a/tests/tool_use/test_minimax_tool_parser.py +++ b/tests/tool_parsers/test_minimax_tool_parser.py @@ -15,8 +15,6 @@ from vllm.entrypoints.openai.protocol import ( from vllm.tokenizers import get_tokenizer from vllm.tool_parsers.minimax_tool_parser import MinimaxToolParser -pytestmark = pytest.mark.cpu_test - # Use a common model that is likely to be available MODEL = "MiniMaxAi/MiniMax-M1-40k" diff --git a/tests/tool_use/test_mistral_tool_parser.py b/tests/tool_parsers/test_mistral_tool_parser.py similarity index 100% rename from tests/tool_use/test_mistral_tool_parser.py rename to tests/tool_parsers/test_mistral_tool_parser.py diff --git a/tests/tool_use/test_openai_tool_parser.py b/tests/tool_parsers/test_openai_tool_parser.py similarity index 100% rename from tests/tool_use/test_openai_tool_parser.py rename to tests/tool_parsers/test_openai_tool_parser.py diff --git a/tests/tool_use/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py similarity index 99% rename from tests/tool_use/test_qwen3coder_tool_parser.py rename to tests/tool_parsers/test_qwen3coder_tool_parser.py index 87ad816f0837d..3a0a612d7fbfd 100644 --- a/tests/tool_use/test_qwen3coder_tool_parser.py +++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py @@ -20,8 +20,6 @@ from vllm.tool_parsers.qwen3coder_tool_parser import ( ) from vllm.tool_parsers.qwen3xml_tool_parser import Qwen3XMLToolParser -pytestmark = pytest.mark.cpu_test - MODEL = "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8" diff --git a/tests/tool_use/test_seed_oss_tool_parser.py b/tests/tool_parsers/test_seed_oss_tool_parser.py similarity index 99% rename from tests/tool_use/test_seed_oss_tool_parser.py rename to tests/tool_parsers/test_seed_oss_tool_parser.py index fda91b514edd1..c7f595830f34b 100644 --- a/tests/tool_use/test_seed_oss_tool_parser.py +++ b/tests/tool_parsers/test_seed_oss_tool_parser.py @@ -18,8 +18,6 @@ from vllm.tokenizers import TokenizerLike, get_tokenizer from vllm.tokenizers.detokenizer_utils import detokenize_incrementally from vllm.tool_parsers.seed_oss_tool_parser import SeedOssToolParser -pytestmark = pytest.mark.cpu_test - # Use a common model that is likely to be available MODEL = "ByteDance-Seed/Seed-OSS-36B-Instruct" diff --git a/tests/tool_use/test_xlam_tool_parser.py b/tests/tool_parsers/test_xlam_tool_parser.py similarity index 99% rename from tests/tool_use/test_xlam_tool_parser.py rename to tests/tool_parsers/test_xlam_tool_parser.py index ed24ba7cba1ac..380792a9926a4 100644 --- a/tests/tool_use/test_xlam_tool_parser.py +++ b/tests/tool_parsers/test_xlam_tool_parser.py @@ -16,8 +16,6 @@ from vllm.tokenizers import TokenizerLike, get_tokenizer from vllm.tokenizers.detokenizer_utils import detokenize_incrementally from vllm.tool_parsers.xlam_tool_parser import xLAMToolParser -pytestmark = pytest.mark.cpu_test - # Use a common model that is likely to be available MODEL = "Salesforce/Llama-xLAM-2-8B-fc-r"