[Feature][V1] Add xgrammar to support minLength, maxLength with test (#16516)

Signed-off-by: Leon Seidel <leon.seidel@fau.de>
This commit is contained in:
leon-seidel 2025-04-12 08:22:07 +02:00 committed by GitHub
parent bd6028d6b0
commit e92d7085bf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 48 additions and 10 deletions

View File

@ -325,6 +325,45 @@ def test_structured_output(
output_json = json.loads(generated_text)
jsonschema.validate(instance=output_json, schema=json_schema)
#
# Test 10: Generate structured with minLength and maxLength
#
min_length = 50
max_length = 50
json_schema = {
"type": "object",
"properties": {
"description": {
"type": "string",
"maxLength": max_length,
"minLength": min_length
}
},
"required": ["description"]
}
sampling_params = SamplingParams(
temperature=1.0,
max_tokens=1000,
guided_decoding=GuidedDecodingParams(json=json_schema))
outputs = llm.generate(
prompts="Generate a description of a frog using 50 characters.",
sampling_params=sampling_params,
use_tqdm=True)
assert outputs is not None
for output in outputs:
assert output is not None
assert isinstance(output, RequestOutput)
prompt = output.prompt
generated_text = output.outputs[0].text
assert generated_text is not None
print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
output_json = json.loads(generated_text)
jsonschema.validate(instance=output_json, schema=json_schema)
@pytest.mark.skip_global_cleanup
@pytest.mark.parametrize("model_name, tokenizer_mode",

View File

@ -13,14 +13,6 @@ def unsupported_string_schemas():
"type": "string",
"pattern": "^[a-zA-Z]+$"
},
{
"type": "string",
"minLength": 1
},
{
"type": "string",
"maxLength": 100
},
{
"type": "string",
"format": "email"
@ -164,6 +156,14 @@ def supported_schema():
"type": "string",
"enum": ["sedan", "suv", "truck"]
},
"short_description": {
"type": "string",
"maxLength": 50
},
"long_description": {
"type": "string",
"minLength": 50
},
"address": {
"type": "object",
"properties": {

View File

@ -41,8 +41,7 @@ def has_xgrammar_unsupported_json_features(schema: dict[str, Any]) -> bool:
return True
# Unsupported keywords for strings
if obj.get("type") == "string" and any(
key in obj for key in ("minLength", "maxLength", "format")):
if obj.get("type") == "string" and "format" in obj:
return True
# Unsupported keywords for objects