[build-system] # Should be mirrored in requirements/build.txt requires = [ "cmake>=3.26.1", "ninja", "packaging>=24.2", "setuptools>=77.0.3,<80.0.0", "setuptools-scm>=8.0", "torch == 2.8.0", "wheel", "jinja2", ] build-backend = "setuptools.build_meta" [project] name = "vllm" authors = [{name = "vLLM Team"}] license = "Apache-2.0" license-files = ["LICENSE"] readme = "README.md" description = "A high-throughput and memory-efficient inference and serving engine for LLMs" classifiers = [ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Intended Audience :: Developers", "Intended Audience :: Information Technology", "Intended Audience :: Science/Research", "Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Scientific/Engineering :: Information Analysis", ] requires-python = ">=3.9,<3.14" dynamic = [ "version", "dependencies", "optional-dependencies"] [project.urls] Homepage="https://github.com/vllm-project/vllm" Documentation="https://docs.vllm.ai/en/latest/" Slack="https://slack.vllm.ai/" [project.scripts] vllm = "vllm.entrypoints.cli.main:main" [project.entry-points."vllm.general_plugins"] lora_filesystem_resolver = "vllm.plugins.lora_resolvers.filesystem_resolver:register_filesystem_resolver" [tool.setuptools_scm] # no extra settings needed, presence enables setuptools-scm [tool.setuptools.packages.find] where = ["."] include = ["vllm*"] [tool.ruff.lint.per-file-ignores] "vllm/third_party/**" = ["ALL"] "vllm/version.py" = ["F401"] "vllm/_version.py" = ["ALL"] # TEMPORARY! These ignores will be fixed forward ## Line length violations "csrc/cutlass_extensions/vllm_cutlass_library_extension.py" = ["E501"] "tests/compile/piecewise/test_simple.py" = ["E501"] "tests/compile/piecewise/test_toy_llama.py" = ["E501", "B023"] "tests/entrypoints/conftest.py" = ["E501"] "tests/entrypoints/openai/test_audio.py" = ["E501"] "tests/entrypoints/openai/test_chat.py" = ["E501"] "tests/entrypoints/openai/test_chat_template.py" = ["E501"] "tests/entrypoints/openai/test_chat_with_tool_reasoning.py" = ["E501"] "tests/entrypoints/openai/test_completion_with_function_calling.py" = ["E501"] "tests/entrypoints/openai/test_video.py" = ["E501"] "tests/entrypoints/openai/test_vision.py" = ["E501"] "tests/entrypoints/test_chat_utils.py" = ["E501"] "tests/kernels/moe/modular_kernel_tools/common.py" = ["E501"] "tests/models/language/generation/test_gemma.py" = ["E501"] "tests/models/language/generation/test_mistral.py" = ["E501"] "tests/models/multimodal/generation/test_ultravox.py" = ["E501"] "tests/models/multimodal/generation/test_voxtral.py" = ["E501"] "tests/models/multimodal/generation/vlm_utils/custom_inputs.py" = ["E501"] "tests/tool_use/test_tool_choice_required.py" = ["E501"] "tests/v1/attention/utils.py" = ["E501"] "tests/v1/entrypoints/openai/responses/test_image.py" = ["E501"] "tests/v1/kv_connector/nixl_integration/test_accuracy.py" = ["E501"] "tests/v1/kv_connector/unit/test_offloading_connector.py" = ["E501"] "tests/v1/logits_processors/test_custom_offline.py" = ["E501"] "vllm/attention/ops/pallas_kv_cache_update.py" = ["E501"] "vllm/compilation/collective_fusion.py" = ["E501"] "vllm/compilation/wrapper.py" = ["E501"] "vllm/config/vllm.py" = ["E501"] "vllm/distributed/device_communicators/all2all.py" = ["E501"] "vllm/entrypoints/openai/protocol.py" = ["E501"] "vllm/lora/layers/vocal_parallel_embedding.py" = ["E501"] "vllm/model_executor/model_loader/bitsandbytes_loader.py" = ["E501"] "vllm/model_executor/models/bailing_moe.py" = ["E501"] "vllm/model_executor/models/hyperclovax_vision.py" = ["E501"] "vllm/model_executor/models/llama4_eagle.py" = ["E501"] "vllm/model_executor/models/longcat_flash_mtp.py" = ["E501"] "vllm/model_executor/models/phi4mm.py" = ["E501"] "vllm/model_executor/models/qwen3_next.py" = ["E501"] "vllm/model_executor/layers/quantization/ptpc_fp8.py" = ["E501"] "vllm/v1/attention/backends/mla/common.py" = ["E501"] "vllm/v1/engine/utils.py" = ["E501"] "vllm/v1/utils.py" = ["E501"] "vllm/v1/worker/gpu_model_runner.py" = ["E501"] # End of temporary ignores [tool.ruff.lint] select = [ # pycodestyle "E", # Pyflakes "F", # pyupgrade "UP", # flake8-bugbear "B", # flake8-simplify "SIM", # isort "I", # flake8-logging-format "G", ] ignore = [ # star imports "F405", "F403", # lambda expression assignment "E731", # Loop control variable not used within loop body "B007", # f-string format "UP032", # Can remove once 3.10+ is the minimum Python version "UP007", ] [tool.ruff.format] docstring-code-format = true [tool.mypy] plugins = ['pydantic.mypy'] ignore_missing_imports = true check_untyped_defs = true follow_imports = "silent" [tool.pytest.ini_options] markers = [ "slow_test", "skip_global_cleanup", "core_model: enable this model test in each PR instead of only nightly", "hybrid_model: models that contain mamba layers (including pure SSM and hybrid architectures)", "cpu_model: enable this model test in CPU tests", "cpu_test: mark test as CPU-only test", "split: run this test as part of a split", "distributed: run this test only in distributed GPU tests", "skip_v1: do not run this test with v1", "optional: optional tests that are automatically skipped, include --optional to run them", ] [tool.ty.src] root = "./vllm" respect-ignore-files = true [tool.ty.environment] python = "./.venv" [tool.typos.files] # these files may be written in non english words extend-exclude = ["tests/models/fixtures/*", "tests/prompts/*", "benchmarks/sonnet.txt", "tests/lora/data/*", "build/*", "vllm/third_party/*"] ignore-hidden = true ignore-files = true ignore-dot = true ignore-vcs = true ignore-global = true ignore-parent = true [tool.typos.default] binary = false check-filename = false check-file = true unicode = true ignore-hex = true identifier-leading-digits = false locale = "en" extend-ignore-identifiers-re = ["NVML_*", ".*Unc.*", ".*_thw", ".*UE8M0.*", ".*[UE4M3|ue4m3].*", ".*eles.*", ".*[Tt]h[rR].*"] extend-ignore-words-re = [] extend-ignore-re = [] [tool.typos.default.extend-identifiers] bbc5b7ede = "bbc5b7ede" womens_doubles = "womens_doubles" v_2nd = "v_2nd" # splitted_input = "splitted_input" NOOPs = "NOOPs" typ = "typ" nin_shortcut = "nin_shortcut" UperNetDecoder = "UperNetDecoder" subtile = "subtile" cudaDevAttrMaxSharedMemoryPerBlockOptin = "cudaDevAttrMaxSharedMemoryPerBlockOptin" SFOuput = "SFOuput" # huggingface transformers repo uses these words depthwise_seperable_out_channel = "depthwise_seperable_out_channel" DepthWiseSeperableConv1d = "DepthWiseSeperableConv1d" depthwise_seperable_CNN = "depthwise_seperable_CNN" [tool.typos.default.extend-words] iy = "iy" tendencias = "tendencias" # intel cpu features tme = "tme" dout = "dout" Pn = "Pn" arange = "arange" [tool.typos.type.py] extend-glob = [] extend-ignore-identifiers-re = [] extend-ignore-words-re = [] extend-ignore-re = [] [tool.typos.type.py.extend-identifiers] arange = "arange" NDArray = "NDArray" EOFError = "EOFError" fo = "fo" ba = "ba" [tool.typos.type.py.extend-words] ba = "ba" [tool.typos.type.cpp] extend-glob = ["*.cu"] extend-ignore-identifiers-re = [] extend-ignore-words-re = [] extend-ignore-re = [] [tool.typos.type.cpp.extend-identifiers] countr_one = "countr_one" k_ot = "k_ot" ot = "ot" [tool.typos.type.cpp.extend-words] [tool.typos.type.rust] extend-glob = [] extend-ignore-identifiers-re = [] extend-ignore-words-re = [] extend-ignore-re = [] [tool.typos.type.rust.extend-identifiers] flate2 = "flate2" [tool.typos.type.rust.extend-words] ser = "ser" [tool.typos.type.lock] extend-glob = [] check-file = false extend-ignore-identifiers-re = [] extend-ignore-words-re = [] extend-ignore-re = [] [tool.typos.type.lock.extend-identifiers] [tool.typos.type.lock.extend-words] [tool.typos.type.jl] extend-glob = [] extend-ignore-identifiers-re = [] extend-ignore-words-re = [] extend-ignore-re = [] [tool.typos.type.jl.extend-identifiers] [tool.typos.type.jl.extend-words] modul = "modul" egals = "egals" usig = "usig" egal = "egal" [tool.typos.type.go] extend-glob = [] extend-ignore-identifiers-re = [] extend-ignore-words-re = [] extend-ignore-re = [] [tool.typos.type.go.extend-identifiers] flate = "flate" [tool.typos.type.go.extend-words] [tool.typos.type.css] extend-glob = [] extend-ignore-identifiers-re = [] extend-ignore-words-re = [] extend-ignore-re = [] [tool.typos.type.css.extend-identifiers] nd = "nd" [tool.typos.type.css.extend-words] [tool.typos.type.man] extend-glob = [] extend-ignore-identifiers-re = [] extend-ignore-words-re = [] extend-ignore-re = [] [tool.typos.type.man.extend-identifiers] Nd = "Nd" [tool.typos.type.man.extend-words] [tool.typos.type.cert] extend-glob = [] check-file = false extend-ignore-identifiers-re = [] extend-ignore-words-re = [] extend-ignore-re = [] [tool.typos.type.cert.extend-identifiers] [tool.typos.type.cert.extend-words] [tool.typos.type.sh] extend-glob = [] extend-ignore-identifiers-re = [] extend-ignore-words-re = [] extend-ignore-re = [] [tool.typos.type.sh.extend-identifiers] ot = "ot" [tool.typos.type.sh.extend-words] [tool.typos.type.vimscript] extend-glob = [] extend-ignore-identifiers-re = [] extend-ignore-words-re = [] extend-ignore-re = [] [tool.typos.type.vimscript.extend-identifiers] windo = "windo" [tool.typos.type.vimscript.extend-words] [tool.uv] no-build-isolation-package = ["torch"]