Updated xgrammar backend to not deny supported string formats (#27253)

Signed-off-by: CNE Pierre FICHEPOIL <pierre-1.fichepoil@gendarmerie.interieur.gouv.fr>
Signed-off-by: ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com>
Co-authored-by: CNE Pierre FICHEPOIL <pierre-1.fichepoil@gendarmerie.interieur.gouv.fr>
Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
This commit is contained in:
ExtReMLapin 2025-10-22 00:25:23 +02:00 committed by GitHub
parent 344a0017c0
commit 4a8a567e16
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 26 additions and 2 deletions

View File

@ -13,7 +13,7 @@ pytestmark = pytest.mark.cpu_test
@pytest.fixture
def unsupported_string_schemas():
return [
{"type": "string", "format": "email"},
{"type": "string", "format": "non_existing_format"},
]
@ -58,6 +58,7 @@ def supported_schema():
"properties": {
"name": {"type": "string"},
"age": {"type": "integer"},
"email": {"type": "string", "format": "email"},
"status": {"type": "string"},
"scores": {"type": "array", "items": {"type": "number"}},
"car_type": {"type": "string", "enum": ["sedan", "suv", "truck"]},

View File

@ -200,6 +200,25 @@ class XgrammarGrammar(StructuredOutputGrammar):
self.matcher.reset()
# cf https://github.com/mlc-ai/xgrammar/blob/a32ac892676d2eedc0327416105b9b06edfb94b2/cpp/json_schema_converter.cc
STRING_SUPPORTED_FORMATS = {
"email",
"date",
"time",
"date-time",
"duration",
"ipv4",
"ipv6",
"hostname",
"uuid",
"uri",
"uri-reference",
"uri-template",
"json-pointer",
"relative-json-pointer",
}
def has_xgrammar_unsupported_json_features(schema: dict[str, Any]) -> bool:
"""Check if JSON schema contains features unsupported by xgrammar."""
@ -219,7 +238,11 @@ def has_xgrammar_unsupported_json_features(schema: dict[str, Any]) -> bool:
return True
# Unsupported keywords for strings
if obj.get("type") == "string" and "format" in obj:
if (
obj.get("type") == "string"
and "format" in obj
and obj["format"] not in STRING_SUPPORTED_FORMATS
):
return True
# Unsupported keywords for objects