From ed586e7724fdf91b391abcf6f3e473be641ff5d6 Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Mon, 15 Dec 2025 21:45:36 +0800
Subject: [PATCH] [Refactor] [3/N] Move tool parser tests and run on CPU
 (#30693)

Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
---
 .buildkite/test-amd.yaml                      | 20 +++++--------------
 .buildkite/test-pipeline.yaml                 | 17 +++++-----------
 .buildkite/test_areas/misc.yaml               |  4 +++-
 .buildkite/test_areas/tool_use.yaml           | 12 +----------
 tests/tool_parsers/__init__.py                |  0
 .../test_deepseekv31_tool_parser.py           |  0
 .../test_ernie45_moe_tool_parser.py           |  0
 .../test_glm4_moe_tool_parser.py              |  2 --
 .../test_jamba_tool_parser.py                 |  2 --
 .../test_kimi_k2_tool_parser.py               |  2 --
 .../test_minimax_tool_parser.py               |  2 --
 .../test_mistral_tool_parser.py               |  0
 .../test_openai_tool_parser.py                |  0
 .../test_qwen3coder_tool_parser.py            |  2 --
 .../test_seed_oss_tool_parser.py              |  2 --
 .../test_xlam_tool_parser.py                  |  2 --
 16 files changed, 14 insertions(+), 53 deletions(-)
 create mode 100644 tests/tool_parsers/__init__.py
 rename tests/{tool_use => tool_parsers}/test_deepseekv31_tool_parser.py (100%)
 rename tests/{tool_use => tool_parsers}/test_ernie45_moe_tool_parser.py (100%)
 rename tests/{tool_use => tool_parsers}/test_glm4_moe_tool_parser.py (99%)
 rename tests/{tool_use => tool_parsers}/test_jamba_tool_parser.py (99%)
 rename tests/{tool_use => tool_parsers}/test_kimi_k2_tool_parser.py (99%)
 rename tests/{tool_use => tool_parsers}/test_minimax_tool_parser.py (99%)
 rename tests/{tool_use => tool_parsers}/test_mistral_tool_parser.py (100%)
 rename tests/{tool_use => tool_parsers}/test_openai_tool_parser.py (100%)
 rename tests/{tool_use => tool_parsers}/test_qwen3coder_tool_parser.py (99%)
 rename tests/{tool_use => tool_parsers}/test_seed_oss_tool_parser.py (99%)
 rename tests/{tool_use => tool_parsers}/test_xlam_tool_parser.py (99%)

diff --git a/.buildkite/test-amd.yaml b/.buildkite/test-amd.yaml
index 0c2e4ed48dda6..3c9b8cbedcf06 100644
--- a/.buildkite/test-amd.yaml
+++ b/.buildkite/test-amd.yaml
@@ -61,8 +61,8 @@ steps:
   - pytest -v -s -m 'not cpu_test' multimodal
   - pytest -v -s utils_
 
-- label: Async Engine, Inputs, Utils, Worker, Config Test (CPU) # 15min
-  timeout_in_minutes: 20
+- label: Async Engine, Inputs, Utils, Worker, Config Test (CPU) # 20min
+  timeout_in_minutes: 30
   mirror_hardwares: [amdexperimental, amdproduction, amdtentative]
   agent_pool: mi325_1
   grade: Blocking
@@ -73,6 +73,7 @@ steps:
   - tests/multimodal
   - tests/standalone_tests/lazy_imports.py
   - tests/tokenizers_
+  - tests/tool_parsers
   - tests/transformers_utils
   - tests/config
   no_gpu: true
@@ -82,6 +83,7 @@ steps:
   - pytest -v -s test_outputs.py
   - pytest -v -s -m 'cpu_test' multimodal
   - pytest -v -s tokenizers_
+  - pytest -v -s tool_parsers
   - pytest -v -s transformers_utils
   - pytest -v -s config
 
@@ -759,19 +761,7 @@ steps:
     - vllm/
     - tests/tool_use
   commands:
-    - pytest -v -s -m 'not cpu_test' tool_use
-
-- label: OpenAI-Compatible Tool Use (CPU) # 5 mins
-  mirror_hardwares: [amdexperimental, amdproduction]
-  agent_pool: mi325_1
-  # grade: Blocking
-  timeout_in_minutes: 10
-  source_file_dependencies:
-    - vllm/
-    - tests/tool_use
-  no_gpu: true
-  commands:
-    - pytest -v -s -m 'cpu_test' tool_use
+    - pytest -v -s tool_use
 
 #####  models test  #####
 
diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 5fcf945f3e5a6..2dcca5711b3d5 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -57,8 +57,8 @@ steps:
   - pytest -v -s -m 'not cpu_test' multimodal
   - pytest -v -s utils_
 
-- label: Async Engine, Inputs, Utils, Worker, Config Test (CPU) # 15min
-  timeout_in_minutes: 20
+- label: Async Engine, Inputs, Utils, Worker, Config Test (CPU) # 20min
+  timeout_in_minutes: 30
   source_file_dependencies:
   - vllm/
   - tests/test_inputs.py
@@ -66,6 +66,7 @@ steps:
   - tests/multimodal
   - tests/standalone_tests/lazy_imports.py
   - tests/tokenizers_
+  - tests/tool_parsers
   - tests/transformers_utils
   - tests/config
   no_gpu: true
@@ -75,6 +76,7 @@ steps:
   - pytest -v -s test_outputs.py
   - pytest -v -s -m 'cpu_test' multimodal
   - pytest -v -s tokenizers_
+  - pytest -v -s tool_parsers
   - pytest -v -s transformers_utils
   - pytest -v -s config
 
@@ -672,16 +674,7 @@ steps:
     - vllm/
     - tests/tool_use
   commands:
-    - pytest -v -s -m 'not cpu_test' tool_use
-
-- label: OpenAI-Compatible Tool Use (CPU) # 5 mins
-  timeout_in_minutes: 10
-  source_file_dependencies:
-    - vllm/
-    - tests/tool_use
-  no_gpu: true
-  commands:
-    - pytest -v -s -m 'cpu_test' tool_use
+    - pytest -v -s tool_use
 
 #####  models test  #####
 
diff --git a/.buildkite/test_areas/misc.yaml b/.buildkite/test_areas/misc.yaml
index 072bccadb726a..252af1e56a105 100644
--- a/.buildkite/test_areas/misc.yaml
+++ b/.buildkite/test_areas/misc.yaml
@@ -115,7 +115,7 @@ steps:
 
 - label: Async Engine, Inputs, Utils, Worker, Config (CPU)
   depends_on: ~
-  timeout_in_minutes: 20
+  timeout_in_minutes: 30
   source_file_dependencies:
   - vllm/
   - tests/test_inputs.py
@@ -123,6 +123,7 @@ steps:
   - tests/multimodal
   - tests/standalone_tests/lazy_imports.py
   - tests/tokenizers_
+  - tests/tool_parsers
   - tests/transformers_utils
   - tests/config
   no_gpu: true
@@ -132,6 +133,7 @@ steps:
   - pytest -v -s test_outputs.py
   - pytest -v -s -m 'cpu_test' multimodal
   - pytest -v -s tokenizers_
+  - pytest -v -s tool_parsers
   - pytest -v -s transformers_utils
   - pytest -v -s config
 
diff --git a/.buildkite/test_areas/tool_use.yaml b/.buildkite/test_areas/tool_use.yaml
index 7040cd1d253b3..69527a1214229 100644
--- a/.buildkite/test_areas/tool_use.yaml
+++ b/.buildkite/test_areas/tool_use.yaml
@@ -10,14 +10,4 @@ steps:
     - vllm/
     - tests/tool_use
   commands:
-    - pytest -v -s -m 'not cpu_test' tool_use
-
-- label: OpenAI-Compatible Tool Use (CPU)
-  depends_on: ~
-  timeout_in_minutes: 10
-  source_file_dependencies:
-    - vllm/
-    - tests/tool_use
-  no_gpu: true
-  commands:
-    - pytest -v -s -m 'cpu_test' tool_use
+    - pytest -v -s tool_use
diff --git a/tests/tool_parsers/__init__.py b/tests/tool_parsers/__init__.py
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/tests/tool_use/test_deepseekv31_tool_parser.py b/tests/tool_parsers/test_deepseekv31_tool_parser.py
similarity index 100%
rename from tests/tool_use/test_deepseekv31_tool_parser.py
rename to tests/tool_parsers/test_deepseekv31_tool_parser.py
diff --git a/tests/tool_use/test_ernie45_moe_tool_parser.py b/tests/tool_parsers/test_ernie45_moe_tool_parser.py
similarity index 100%
rename from tests/tool_use/test_ernie45_moe_tool_parser.py
rename to tests/tool_parsers/test_ernie45_moe_tool_parser.py
diff --git a/tests/tool_use/test_glm4_moe_tool_parser.py b/tests/tool_parsers/test_glm4_moe_tool_parser.py
similarity index 99%
rename from tests/tool_use/test_glm4_moe_tool_parser.py
rename to tests/tool_parsers/test_glm4_moe_tool_parser.py
index 749b0eef4ec85..52f5a9198e9b4 100644
--- a/tests/tool_use/test_glm4_moe_tool_parser.py
+++ b/tests/tool_parsers/test_glm4_moe_tool_parser.py
@@ -12,8 +12,6 @@ from vllm.tool_parsers.glm4_moe_tool_parser import (
     Glm4MoeModelToolParser,
 )
 
-pytestmark = pytest.mark.cpu_test
-
 pytest.skip("skip glm4_moe parser test", allow_module_level=True)
 # Use a common model that is likely to be available
 MODEL = "zai-org/GLM-4.5"
diff --git a/tests/tool_use/test_jamba_tool_parser.py b/tests/tool_parsers/test_jamba_tool_parser.py
similarity index 99%
rename from tests/tool_use/test_jamba_tool_parser.py
rename to tests/tool_parsers/test_jamba_tool_parser.py
index 70e8253708592..ccad16ae2f6b6 100644
--- a/tests/tool_use/test_jamba_tool_parser.py
+++ b/tests/tool_parsers/test_jamba_tool_parser.py
@@ -13,8 +13,6 @@ from vllm.tokenizers import TokenizerLike, get_tokenizer
 from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
 from vllm.tool_parsers.jamba_tool_parser import JambaToolParser
 
-pytestmark = pytest.mark.cpu_test
-
 MODEL = "ai21labs/Jamba-tiny-dev"
 
 
diff --git a/tests/tool_use/test_kimi_k2_tool_parser.py b/tests/tool_parsers/test_kimi_k2_tool_parser.py
similarity index 99%
rename from tests/tool_use/test_kimi_k2_tool_parser.py
rename to tests/tool_parsers/test_kimi_k2_tool_parser.py
index c014d29fa9079..d02f53c34b455 100644
--- a/tests/tool_use/test_kimi_k2_tool_parser.py
+++ b/tests/tool_parsers/test_kimi_k2_tool_parser.py
@@ -10,8 +10,6 @@ from vllm.entrypoints.openai.protocol import FunctionCall, ToolCall
 from vllm.tokenizers import get_tokenizer
 from vllm.tool_parsers.kimi_k2_tool_parser import KimiK2ToolParser
 
-pytestmark = pytest.mark.cpu_test
-
 # Use a common model that is likely to be available
 MODEL = "moonshotai/Kimi-K2-Instruct"
 
diff --git a/tests/tool_use/test_minimax_tool_parser.py b/tests/tool_parsers/test_minimax_tool_parser.py
similarity index 99%
rename from tests/tool_use/test_minimax_tool_parser.py
rename to tests/tool_parsers/test_minimax_tool_parser.py
index a931ce4679d18..28cfc4ea7a175 100644
--- a/tests/tool_use/test_minimax_tool_parser.py
+++ b/tests/tool_parsers/test_minimax_tool_parser.py
@@ -15,8 +15,6 @@ from vllm.entrypoints.openai.protocol import (
 from vllm.tokenizers import get_tokenizer
 from vllm.tool_parsers.minimax_tool_parser import MinimaxToolParser
 
-pytestmark = pytest.mark.cpu_test
-
 # Use a common model that is likely to be available
 MODEL = "MiniMaxAi/MiniMax-M1-40k"
 
diff --git a/tests/tool_use/test_mistral_tool_parser.py b/tests/tool_parsers/test_mistral_tool_parser.py
similarity index 100%
rename from tests/tool_use/test_mistral_tool_parser.py
rename to tests/tool_parsers/test_mistral_tool_parser.py
diff --git a/tests/tool_use/test_openai_tool_parser.py b/tests/tool_parsers/test_openai_tool_parser.py
similarity index 100%
rename from tests/tool_use/test_openai_tool_parser.py
rename to tests/tool_parsers/test_openai_tool_parser.py
diff --git a/tests/tool_use/test_qwen3coder_tool_parser.py b/tests/tool_parsers/test_qwen3coder_tool_parser.py
similarity index 99%
rename from tests/tool_use/test_qwen3coder_tool_parser.py
rename to tests/tool_parsers/test_qwen3coder_tool_parser.py
index 87ad816f0837d..3a0a612d7fbfd 100644
--- a/tests/tool_use/test_qwen3coder_tool_parser.py
+++ b/tests/tool_parsers/test_qwen3coder_tool_parser.py
@@ -20,8 +20,6 @@ from vllm.tool_parsers.qwen3coder_tool_parser import (
 )
 from vllm.tool_parsers.qwen3xml_tool_parser import Qwen3XMLToolParser
 
-pytestmark = pytest.mark.cpu_test
-
 MODEL = "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8"
 
 
diff --git a/tests/tool_use/test_seed_oss_tool_parser.py b/tests/tool_parsers/test_seed_oss_tool_parser.py
similarity index 99%
rename from tests/tool_use/test_seed_oss_tool_parser.py
rename to tests/tool_parsers/test_seed_oss_tool_parser.py
index fda91b514edd1..c7f595830f34b 100644
--- a/tests/tool_use/test_seed_oss_tool_parser.py
+++ b/tests/tool_parsers/test_seed_oss_tool_parser.py
@@ -18,8 +18,6 @@ from vllm.tokenizers import TokenizerLike, get_tokenizer
 from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
 from vllm.tool_parsers.seed_oss_tool_parser import SeedOssToolParser
 
-pytestmark = pytest.mark.cpu_test
-
 # Use a common model that is likely to be available
 MODEL = "ByteDance-Seed/Seed-OSS-36B-Instruct"
 
diff --git a/tests/tool_use/test_xlam_tool_parser.py b/tests/tool_parsers/test_xlam_tool_parser.py
similarity index 99%
rename from tests/tool_use/test_xlam_tool_parser.py
rename to tests/tool_parsers/test_xlam_tool_parser.py
index ed24ba7cba1ac..380792a9926a4 100644
--- a/tests/tool_use/test_xlam_tool_parser.py
+++ b/tests/tool_parsers/test_xlam_tool_parser.py
@@ -16,8 +16,6 @@ from vllm.tokenizers import TokenizerLike, get_tokenizer
 from vllm.tokenizers.detokenizer_utils import detokenize_incrementally
 from vllm.tool_parsers.xlam_tool_parser import xLAMToolParser
 
-pytestmark = pytest.mark.cpu_test
-
 # Use a common model that is likely to be available
 MODEL = "Salesforce/Llama-xLAM-2-8B-fc-r"