[CI] Nightly Tests (#14898)

Signed-off-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
Signed-off-by: rshaw@neuralmagic.com <robertgshaw2@gmail.com>
Co-authored-by: rshaw@neuralmagic.com <rshaw@neuralmagic.com>
This commit is contained in:
Robert Shaw 2025-03-16 22:06:43 -04:00 committed by GitHub
parent aecc780dba
commit bb3aeddfaf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 14 additions and 2 deletions

View File

@ -201,6 +201,7 @@ def test_models(
) )
@pytest.mark.skip("RE-ENABLE: test is currently failing on main.")
@pytest.mark.parametrize("model", MISTRAL_FORMAT_MODELS) @pytest.mark.parametrize("model", MISTRAL_FORMAT_MODELS)
@pytest.mark.parametrize("dtype", ["bfloat16"]) @pytest.mark.parametrize("dtype", ["bfloat16"])
@pytest.mark.parametrize("max_tokens", [64]) @pytest.mark.parametrize("max_tokens", [64])

View File

@ -46,6 +46,7 @@ CONFIGS: dict[str, ServerConfig] = {
"model": "model":
"NousResearch/Hermes-3-Llama-3.1-8B", "NousResearch/Hermes-3-Llama-3.1-8B",
"arguments": [ "arguments": [
"--enforce-eager", "--no-enable-prefix-caching",
"--tool-call-parser", "hermes", "--chat-template", "--tool-call-parser", "hermes", "--chat-template",
str(VLLM_PATH / "examples/tool_chat_template_hermes.jinja") str(VLLM_PATH / "examples/tool_chat_template_hermes.jinja")
], ],
@ -60,6 +61,7 @@ CONFIGS: dict[str, ServerConfig] = {
"model": "model":
"meta-llama/Meta-Llama-3.1-8B-Instruct", "meta-llama/Meta-Llama-3.1-8B-Instruct",
"arguments": [ "arguments": [
"--enforce-eager", "--no-enable-prefix-caching",
"--tool-call-parser", "llama3_json", "--chat-template", "--tool-call-parser", "llama3_json", "--chat-template",
str(VLLM_PATH / "examples/tool_chat_template_llama3.1_json.jinja") str(VLLM_PATH / "examples/tool_chat_template_llama3.1_json.jinja")
], ],
@ -70,6 +72,7 @@ CONFIGS: dict[str, ServerConfig] = {
"model": "model":
"meta-llama/Llama-3.2-3B-Instruct", "meta-llama/Llama-3.2-3B-Instruct",
"arguments": [ "arguments": [
"--enforce-eager", "--no-enable-prefix-caching",
"--tool-call-parser", "llama3_json", "--chat-template", "--tool-call-parser", "llama3_json", "--chat-template",
str(VLLM_PATH / "examples/tool_chat_template_llama3.2_json.jinja") str(VLLM_PATH / "examples/tool_chat_template_llama3.2_json.jinja")
], ],
@ -80,6 +83,7 @@ CONFIGS: dict[str, ServerConfig] = {
"model": "model":
"mistralai/Mistral-7B-Instruct-v0.3", "mistralai/Mistral-7B-Instruct-v0.3",
"arguments": [ "arguments": [
"--enforce-eager", "--no-enable-prefix-caching",
"--tool-call-parser", "mistral", "--chat-template", "--tool-call-parser", "mistral", "--chat-template",
str(VLLM_PATH / "examples/tool_chat_template_mistral.jinja"), str(VLLM_PATH / "examples/tool_chat_template_mistral.jinja"),
"--ignore-patterns=\"consolidated.safetensors\"" "--ignore-patterns=\"consolidated.safetensors\""
@ -111,22 +115,28 @@ CONFIGS: dict[str, ServerConfig] = {
"model": "model":
"ibm-granite/granite-3.0-8b-instruct", "ibm-granite/granite-3.0-8b-instruct",
"arguments": [ "arguments": [
"--enforce-eager", "--no-enable-prefix-caching",
"--tool-call-parser", "granite", "--chat-template", "--tool-call-parser", "granite", "--chat-template",
str(VLLM_PATH / "examples/tool_chat_template_granite.jinja") str(VLLM_PATH / "examples/tool_chat_template_granite.jinja")
], ],
}, },
"granite-3.1-8b": { "granite-3.1-8b": {
"model": "ibm-granite/granite-3.1-8b-instruct", "model":
"ibm-granite/granite-3.1-8b-instruct",
"arguments": [ "arguments": [
"--enforce-eager",
"--no-enable-prefix-caching",
"--tool-call-parser", "--tool-call-parser",
"granite", "granite",
], ],
"supports_parallel": True, "supports_parallel":
True,
}, },
"internlm": { "internlm": {
"model": "model":
"internlm/internlm2_5-7b-chat", "internlm/internlm2_5-7b-chat",
"arguments": [ "arguments": [
"--enforce-eager", "--no-enable-prefix-caching",
"--tool-call-parser", "internlm", "--chat-template", "--tool-call-parser", "internlm", "--chat-template",
str(VLLM_PATH / str(VLLM_PATH /
"examples/tool_chat_template_internlm2_tool.jinja"), "examples/tool_chat_template_internlm2_tool.jinja"),
@ -139,6 +149,7 @@ CONFIGS: dict[str, ServerConfig] = {
"model": "model":
"Team-ACE/ToolACE-8B", "Team-ACE/ToolACE-8B",
"arguments": [ "arguments": [
"--enforce-eager", "--no-enable-prefix-caching",
"--tool-call-parser", "pythonic", "--chat-template", "--tool-call-parser", "pythonic", "--chat-template",
str(VLLM_PATH / "examples/tool_chat_template_toolace.jinja") str(VLLM_PATH / "examples/tool_chat_template_toolace.jinja")
], ],