From e92d7085bfb25200c540391d0be06cb3b7c29ea4 Mon Sep 17 00:00:00 2001
From: leon-seidel <83984854+leon-seidel@users.noreply.github.com>
Date: Sat, 12 Apr 2025 08:22:07 +0200
Subject: [PATCH] [Feature][V1] Add xgrammar to support minLength, maxLength
 with test (#16516)

Signed-off-by: Leon Seidel <leon.seidel@fau.de>
---
 .../llm/test_struct_output_generate.py        | 39 +++++++++++++++++++
 tests/v1/structured_output/test_utils.py      | 16 ++++----
 vllm/v1/structured_output/utils.py            |  3 +-
 3 files changed, 48 insertions(+), 10 deletions(-)

diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py
index d848490b89e8a..b179dc3b4747c 100644
--- a/tests/v1/entrypoints/llm/test_struct_output_generate.py
+++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py
@@ -325,6 +325,45 @@ def test_structured_output(
         output_json = json.loads(generated_text)
         jsonschema.validate(instance=output_json, schema=json_schema)
 
+    #
+    # Test 10: Generate structured with minLength and maxLength
+    #
+    min_length = 50
+    max_length = 50
+    json_schema = {
+        "type": "object",
+        "properties": {
+            "description": {
+                "type": "string",
+                "maxLength": max_length,
+                "minLength": min_length
+            }
+        },
+        "required": ["description"]
+    }
+
+    sampling_params = SamplingParams(
+        temperature=1.0,
+        max_tokens=1000,
+        guided_decoding=GuidedDecodingParams(json=json_schema))
+    outputs = llm.generate(
+        prompts="Generate a description of a frog using 50 characters.",
+        sampling_params=sampling_params,
+        use_tqdm=True)
+
+    assert outputs is not None
+
+    for output in outputs:
+        assert output is not None
+        assert isinstance(output, RequestOutput)
+        prompt = output.prompt
+
+        generated_text = output.outputs[0].text
+        assert generated_text is not None
+        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+        output_json = json.loads(generated_text)
+        jsonschema.validate(instance=output_json, schema=json_schema)
+
 
 @pytest.mark.skip_global_cleanup
 @pytest.mark.parametrize("model_name, tokenizer_mode",
diff --git a/tests/v1/structured_output/test_utils.py b/tests/v1/structured_output/test_utils.py
index 554f38926269b..0929f99016289 100644
--- a/tests/v1/structured_output/test_utils.py
+++ b/tests/v1/structured_output/test_utils.py
@@ -13,14 +13,6 @@ def unsupported_string_schemas():
             "type": "string",
             "pattern": "^[a-zA-Z]+$"
         },
-        {
-            "type": "string",
-            "minLength": 1
-        },
-        {
-            "type": "string",
-            "maxLength": 100
-        },
         {
             "type": "string",
             "format": "email"
@@ -164,6 +156,14 @@ def supported_schema():
                 "type": "string",
                 "enum": ["sedan", "suv", "truck"]
             },
+            "short_description": {
+                "type": "string",
+                "maxLength": 50
+            },
+            "long_description": {
+                "type": "string",
+                "minLength": 50
+            },
             "address": {
                 "type": "object",
                 "properties": {
diff --git a/vllm/v1/structured_output/utils.py b/vllm/v1/structured_output/utils.py
index a771256ef29fd..56eed95944e2f 100644
--- a/vllm/v1/structured_output/utils.py
+++ b/vllm/v1/structured_output/utils.py
@@ -41,8 +41,7 @@ def has_xgrammar_unsupported_json_features(schema: dict[str, Any]) -> bool:
             return True
 
         # Unsupported keywords for strings
-        if obj.get("type") == "string" and any(
-                key in obj for key in ("minLength", "maxLength", "format")):
+        if obj.get("type") == "string" and "format" in obj:
             return True
 
         # Unsupported keywords for objects