# SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project # ruff: noqa import json import os import pytest import yaml from transformers import AutoTokenizer from pydantic import ValidationError from vllm.transformers_utils.detokenizer_utils import convert_ids_list_to_tokens from vllm.utils.argparse_utils import FlexibleArgumentParser from ..utils import flat_product # Tests for FlexibleArgumentParser @pytest.fixture def parser(): parser = FlexibleArgumentParser() parser.add_argument( "--image-input-type", choices=["pixel_values", "image_features"] ) parser.add_argument("--model-name") parser.add_argument("--batch-size", type=int) parser.add_argument("--enable-feature", action="store_true") parser.add_argument("--hf-overrides", type=json.loads) parser.add_argument("-O", "--compilation-config", type=json.loads) return parser @pytest.fixture def parser_with_config(): parser = FlexibleArgumentParser() parser.add_argument("serve") parser.add_argument("model_tag", nargs="?") parser.add_argument("--model", type=str) parser.add_argument("--served-model-name", type=str) parser.add_argument("--config", type=str) parser.add_argument("--port", type=int) parser.add_argument("--tensor-parallel-size", type=int) parser.add_argument("--trust-remote-code", action="store_true") return parser def test_underscore_to_dash(parser): args = parser.parse_args(["--image_input_type", "pixel_values"]) assert args.image_input_type == "pixel_values" def test_mixed_usage(parser): args = parser.parse_args( ["--image_input_type", "image_features", "--model-name", "facebook/opt-125m"] ) assert args.image_input_type == "image_features" assert args.model_name == "facebook/opt-125m" def test_with_equals_sign(parser): args = parser.parse_args( ["--image_input_type=pixel_values", "--model-name=facebook/opt-125m"] ) assert args.image_input_type == "pixel_values" assert args.model_name == "facebook/opt-125m" def test_with_int_value(parser): args = parser.parse_args(["--batch_size", "32"]) assert args.batch_size == 32 args = parser.parse_args(["--batch-size", "32"]) assert args.batch_size == 32 def test_with_bool_flag(parser): args = parser.parse_args(["--enable_feature"]) assert args.enable_feature is True args = parser.parse_args(["--enable-feature"]) assert args.enable_feature is True def test_invalid_choice(parser): with pytest.raises(SystemExit): parser.parse_args(["--image_input_type", "invalid_choice"]) def test_missing_required_argument(parser): parser.add_argument("--required-arg", required=True) with pytest.raises(SystemExit): parser.parse_args([]) def test_cli_override_to_config(parser_with_config, cli_config_file): args = parser_with_config.parse_args( ["serve", "mymodel", "--config", cli_config_file, "--tensor-parallel-size", "3"] ) assert args.tensor_parallel_size == 3 args = parser_with_config.parse_args( ["serve", "mymodel", "--tensor-parallel-size", "3", "--config", cli_config_file] ) assert args.tensor_parallel_size == 3 assert args.port == 12312 args = parser_with_config.parse_args( [ "serve", "mymodel", "--tensor-parallel-size", "3", "--config", cli_config_file, "--port", "666", ] ) assert args.tensor_parallel_size == 3 assert args.port == 666 def test_config_args(parser_with_config, cli_config_file): args = parser_with_config.parse_args( ["serve", "mymodel", "--config", cli_config_file] ) assert args.tensor_parallel_size == 2 assert args.trust_remote_code def test_config_file(parser_with_config): with pytest.raises(FileNotFoundError): parser_with_config.parse_args( ["serve", "mymodel", "--config", "test_config.yml"] ) with pytest.raises(ValueError): parser_with_config.parse_args( ["serve", "mymodel", "--config", "./data/test_config.json"] ) with pytest.raises(ValueError): parser_with_config.parse_args( [ "serve", "mymodel", "--tensor-parallel-size", "3", "--config", "--batch-size", "32", ] ) def test_no_model_tag(parser_with_config, cli_config_file): with pytest.raises(ValueError): parser_with_config.parse_args(["serve", "--config", cli_config_file]) def test_dict_args(parser): args = [ "--model-name=something.something", "--hf-overrides.key1", "val1", # Test nesting "--hf-overrides.key2.key3", "val2", "--hf-overrides.key2.key4", "val3", # Test compile config and compilation mode "-O.use_inductor=true", "-O.backend", "custom", "-O1", # Test = sign "--hf-overrides.key5=val4", # Test underscore to dash conversion "--hf_overrides.key_6", "val5", "--hf_overrides.key-7.key_8", "val6", # Test data type detection "--hf_overrides.key9", "100", "--hf_overrides.key10", "100.0", "--hf_overrides.key11", "true", "--hf_overrides.key12.key13", "null", # Test '-' and '.' in value "--hf_overrides.key14.key15", "-minus.and.dot", # Test array values "-O.custom_ops+", "-quant_fp8", "-O.custom_ops+=+silu_mul,-rms_norm", ] parsed_args = parser.parse_args(args) assert parsed_args.model_name == "something.something" assert parsed_args.hf_overrides == { "key1": "val1", "key2": { "key3": "val2", "key4": "val3", }, "key5": "val4", "key_6": "val5", "key-7": { "key_8": "val6", }, "key9": 100, "key10": 100.0, "key11": True, "key12": { "key13": None, }, "key14": { "key15": "-minus.and.dot", }, } assert parsed_args.compilation_config == { "mode": 1, "use_inductor": True, "backend": "custom", "custom_ops": ["-quant_fp8", "+silu_mul", "-rms_norm"], } def test_duplicate_dict_args(caplog_vllm, parser): args = [ "--model-name=something.something", "--hf-overrides.key1", "val1", "--hf-overrides.key1", "val2", "-O1", "-O.mode", "2", "-O3", ] parsed_args = parser.parse_args(args) # Should be the last value assert parsed_args.hf_overrides == {"key1": "val2"} assert parsed_args.compilation_config == {"mode": 3} assert len(caplog_vllm.records) == 1 assert "duplicate" in caplog_vllm.text assert "--hf-overrides.key1" in caplog_vllm.text assert "-O.mode" in caplog_vllm.text def test_model_specification( parser_with_config, cli_config_file, cli_config_file_with_model ): # Test model in CLI takes precedence over config args = parser_with_config.parse_args( ["serve", "cli-model", "--config", cli_config_file_with_model] ) assert args.model_tag == "cli-model" assert args.served_model_name == "mymodel" # Test model from config file works args = parser_with_config.parse_args( [ "serve", "--config", cli_config_file_with_model, ] ) assert args.model == "config-model" assert args.served_model_name == "mymodel" # Test no model specified anywhere raises error with pytest.raises(ValueError, match="No model specified!"): parser_with_config.parse_args(["serve", "--config", cli_config_file]) # Test using --model option raises error # with pytest.raises( # ValueError, # match= # ("With `vllm serve`, you should provide the model as a positional " # "argument or in a config file instead of via the `--model` option."), # ): # parser_with_config.parse_args(['serve', '--model', 'my-model']) # Test using --model option back-compatibility # (when back-compatibility ends, the above test should be uncommented # and the below test should be removed) args = parser_with_config.parse_args( [ "serve", "--tensor-parallel-size", "2", "--model", "my-model", "--trust-remote-code", "--port", "8001", ] ) assert args.model is None assert args.tensor_parallel_size == 2 assert args.trust_remote_code is True assert args.port == 8001 args = parser_with_config.parse_args( [ "serve", "--tensor-parallel-size=2", "--model=my-model", "--trust-remote-code", "--port=8001", ] ) assert args.model is None assert args.tensor_parallel_size == 2 assert args.trust_remote_code is True assert args.port == 8001 # Test other config values are preserved args = parser_with_config.parse_args( [ "serve", "cli-model", "--config", cli_config_file_with_model, ] ) assert args.tensor_parallel_size == 2 assert args.trust_remote_code is True assert args.port == 12312 def test_convert_ids_list_to_tokens(): tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct") token_ids = tokenizer.encode("Hello, world!") # token_ids = [9707, 11, 1879, 0] assert tokenizer.convert_ids_to_tokens(token_ids) == ["Hello", ",", "Ġworld", "!"] tokens = convert_ids_list_to_tokens(tokenizer, token_ids) assert tokens == ["Hello", ",", " world", "!"] def test_load_config_file(tmp_path): # Define the configuration data config_data = { "enable-logging": True, "list-arg": ["item1", "item2"], "port": 12323, "tensor-parallel-size": 4, } # Write the configuration data to a temporary YAML file config_file_path = tmp_path / "config.yaml" with open(config_file_path, "w") as config_file: yaml.dump(config_data, config_file) # Initialize the parser parser = FlexibleArgumentParser() # Call the function with the temporary file path processed_args = parser.load_config_file(str(config_file_path)) # Expected output expected_args = [ "--enable-logging", "--list-arg", "item1", "item2", "--port", "12323", "--tensor-parallel-size", "4", ] # Assert that the processed arguments match the expected output assert processed_args == expected_args os.remove(str(config_file_path)) def test_compilation_mode_string_values(parser): """Test that -O.mode accepts both integer and string mode values.""" args = parser.parse_args(["-O.mode", "0"]) assert args.compilation_config == {"mode": 0} args = parser.parse_args(["-O3"]) assert args.compilation_config == {"mode": 3} args = parser.parse_args(["-O.mode=NONE"]) assert args.compilation_config == {"mode": "NONE"} args = parser.parse_args(["-O.mode", "STOCK_TORCH_COMPILE"]) assert args.compilation_config == {"mode": "STOCK_TORCH_COMPILE"} args = parser.parse_args(["-O.mode=DYNAMO_TRACE_ONCE"]) assert args.compilation_config == {"mode": "DYNAMO_TRACE_ONCE"} args = parser.parse_args(["-O.mode", "VLLM_COMPILE"]) assert args.compilation_config == {"mode": "VLLM_COMPILE"} args = parser.parse_args(["-O.mode=none"]) assert args.compilation_config == {"mode": "none"} args = parser.parse_args(["-O.mode=vllm_compile"]) assert args.compilation_config == {"mode": "vllm_compile"} def test_compilation_config_mode_validator(): """Test that CompilationConfig.mode field validator converts strings to integers.""" from vllm.config.compilation import CompilationConfig, CompilationMode config = CompilationConfig(mode=0) assert config.mode == CompilationMode.NONE config = CompilationConfig(mode=3) assert config.mode == CompilationMode.VLLM_COMPILE config = CompilationConfig(mode="NONE") assert config.mode == CompilationMode.NONE config = CompilationConfig(mode="STOCK_TORCH_COMPILE") assert config.mode == CompilationMode.STOCK_TORCH_COMPILE config = CompilationConfig(mode="DYNAMO_TRACE_ONCE") assert config.mode == CompilationMode.DYNAMO_TRACE_ONCE config = CompilationConfig(mode="VLLM_COMPILE") assert config.mode == CompilationMode.VLLM_COMPILE config = CompilationConfig(mode="none") assert config.mode == CompilationMode.NONE config = CompilationConfig(mode="vllm_compile") assert config.mode == CompilationMode.VLLM_COMPILE with pytest.raises(ValidationError, match="Invalid compilation mode"): CompilationConfig(mode="INVALID_MODE") def test_flat_product(): # Check regular itertools.product behavior result1 = list(flat_product([1, 2, 3], ["a", "b"])) assert result1 == [ (1, "a"), (1, "b"), (2, "a"), (2, "b"), (3, "a"), (3, "b"), ] # check that the tuples get flattened result2 = list(flat_product([(1, 2), (3, 4)], ["a", "b"], [(5, 6)])) assert result2 == [ (1, 2, "a", 5, 6), (1, 2, "b", 5, 6), (3, 4, "a", 5, 6), (3, 4, "b", 5, 6), ]