From e92d7085bfb25200c540391d0be06cb3b7c29ea4 Mon Sep 17 00:00:00 2001 From: leon-seidel <83984854+leon-seidel@users.noreply.github.com> Date: Sat, 12 Apr 2025 08:22:07 +0200 Subject: [PATCH] [Feature][V1] Add xgrammar to support minLength, maxLength with test (#16516) Signed-off-by: Leon Seidel --- .../llm/test_struct_output_generate.py | 39 +++++++++++++++++++ tests/v1/structured_output/test_utils.py | 16 ++++---- vllm/v1/structured_output/utils.py | 3 +- 3 files changed, 48 insertions(+), 10 deletions(-) diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py index d848490b89e8a..b179dc3b4747c 100644 --- a/tests/v1/entrypoints/llm/test_struct_output_generate.py +++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py @@ -325,6 +325,45 @@ def test_structured_output( output_json = json.loads(generated_text) jsonschema.validate(instance=output_json, schema=json_schema) + # + # Test 10: Generate structured with minLength and maxLength + # + min_length = 50 + max_length = 50 + json_schema = { + "type": "object", + "properties": { + "description": { + "type": "string", + "maxLength": max_length, + "minLength": min_length + } + }, + "required": ["description"] + } + + sampling_params = SamplingParams( + temperature=1.0, + max_tokens=1000, + guided_decoding=GuidedDecodingParams(json=json_schema)) + outputs = llm.generate( + prompts="Generate a description of a frog using 50 characters.", + sampling_params=sampling_params, + use_tqdm=True) + + assert outputs is not None + + for output in outputs: + assert output is not None + assert isinstance(output, RequestOutput) + prompt = output.prompt + + generated_text = output.outputs[0].text + assert generated_text is not None + print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") + output_json = json.loads(generated_text) + jsonschema.validate(instance=output_json, schema=json_schema) + @pytest.mark.skip_global_cleanup @pytest.mark.parametrize("model_name, tokenizer_mode", diff --git a/tests/v1/structured_output/test_utils.py b/tests/v1/structured_output/test_utils.py index 554f38926269b..0929f99016289 100644 --- a/tests/v1/structured_output/test_utils.py +++ b/tests/v1/structured_output/test_utils.py @@ -13,14 +13,6 @@ def unsupported_string_schemas(): "type": "string", "pattern": "^[a-zA-Z]+$" }, - { - "type": "string", - "minLength": 1 - }, - { - "type": "string", - "maxLength": 100 - }, { "type": "string", "format": "email" @@ -164,6 +156,14 @@ def supported_schema(): "type": "string", "enum": ["sedan", "suv", "truck"] }, + "short_description": { + "type": "string", + "maxLength": 50 + }, + "long_description": { + "type": "string", + "minLength": 50 + }, "address": { "type": "object", "properties": { diff --git a/vllm/v1/structured_output/utils.py b/vllm/v1/structured_output/utils.py index a771256ef29fd..56eed95944e2f 100644 --- a/vllm/v1/structured_output/utils.py +++ b/vllm/v1/structured_output/utils.py @@ -41,8 +41,7 @@ def has_xgrammar_unsupported_json_features(schema: dict[str, Any]) -> bool: return True # Unsupported keywords for strings - if obj.get("type") == "string" and any( - key in obj for key in ("minLength", "maxLength", "format")): + if obj.get("type") == "string" and "format" in obj: return True # Unsupported keywords for objects