[CI][Entrypoints]: add filter to generation to filter out invalid tool calls (#22826)

Signed-off-by: Will Eaton <weaton@redhat.com>
This commit is contained in:
Will Eaton 2025-08-13 23:09:07 -04:00 committed by GitHub
parent 0ca2393b47
commit b6af24fba7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -54,38 +54,54 @@ def before_generate_case(context: schemathesis.hooks.HookContext, strategy):
op = context.operation op = context.operation
assert op is not None assert op is not None
def no_file_type(case: schemathesis.models.Case): def no_invalid_types(case: schemathesis.models.Case):
""" """
This filter skips test cases for the `POST /tokenize` endpoint where the This filter skips test cases with invalid data that schemathesis
HTTP request body uses `"type": "file"` in any message's content. incorrectly generates due to permissive schema configurations.
We expect these cases to fail because that type isn't implemented here
https://github.com/vllm-project/vllm/blob/0b34593017953051b3225b1483ce0f4670e3eb0e/vllm/entrypoints/chat_utils.py#L1038-L1095 1. Skips `POST /tokenize` endpoint cases with `"type": "file"` in
message content, which isn't implemented.
2. Skips tool_calls with `"type": "custom"` which schemathesis
incorrectly generates instead of the valid `"type": "function"`.
Example test cases that are skipped: Example test cases that are skipped:
curl -X POST -H 'Content-Type: application/json' \ curl -X POST -H 'Content-Type: application/json' \
-d '{"messages": [{"role": "assistant"}, {"content": [{"file": {}, "type": "file"}], "role": "user"}]}' \ -d '{"messages": [{"content": [{"file": {}, "type": "file"}], "role": "user"}]}' \
http://localhost:8000/tokenize http://localhost:8000/tokenize
curl -X POST -H 'Content-Type: application/json' \ curl -X POST -H 'Content-Type: application/json' \
-d '{"messages": [{"content": [{"file": {}, "type": "file"}], "role": "user"}]}' \ -d '{"messages": [{"role": "assistant", "tool_calls": [{"custom": {"input": "", "name": ""}, "id": "", "type": "custom"}]}]}' \
http://localhost:8000/tokenize http://localhost:8000/v1/chat/completions
""" # noqa: E501 """ # noqa: E501
if (op.method.lower() == "post" and op.path == "/tokenize" if (hasattr(case, "body") and isinstance(case.body, dict)
and hasattr(case, "body") and isinstance(case.body, dict)
and "messages" in case.body and "messages" in case.body
and isinstance(case.body["messages"], list) and isinstance(case.body["messages"], list)
and len(case.body["messages"]) > 0): and len(case.body["messages"]) > 0):
for message in case.body["messages"]: for message in case.body["messages"]:
if not isinstance(message, dict): if not isinstance(message, dict):
continue continue
content = message.get("content", [])
if not isinstance(content, list) or len(content) == 0: # Check for invalid file type in tokenize endpoint
continue if op.method.lower() == "post" and op.path == "/tokenize":
if any(item.get("type") == "file" for item in content): content = message.get("content", [])
return False if (isinstance(content, list) and len(content) > 0 and any(
item.get("type") == "file" for item in content)):
return False
# Check for invalid tool_calls with non-function types
tool_calls = message.get("tool_calls", [])
if isinstance(tool_calls, list):
for tool_call in tool_calls:
if isinstance(tool_call, dict):
if tool_call.get("type") != "function":
return False
if "custom" in tool_call:
return False
return True return True
return strategy.filter(no_file_type) return strategy.filter(no_invalid_types)
@schema.parametrize() @schema.parametrize()