From a1fe24d961d85089c8a254032d35e4bdbca278d6 Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Fri, 23 May 2025 11:09:53 +0200
Subject: [PATCH] Migrate docs from Sphinx to MkDocs (#18145)
Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
.buildkite/test-pipeline.yaml | 9 +-
.gitignore | 6 +-
.pre-commit-config.yaml | 1 +
.readthedocs.yaml | 8 +-
docker/Dockerfile | 2 +
docs/.nav.yml | 51 +
docs/Makefile | 25 -
docs/README.md | 71 +-
docs/api/README.md | 107 ++
docs/api/vllm/.meta.yml | 2 +
.../dockerfile-stages-dependency.png | Bin 0 -> 121821 bytes
.../deployment/anything-llm-chat-with-doc.png | Bin
.../anything-llm-chat-without-doc.png | Bin
.../deployment/anything-llm-provider.png | Bin
.../deployment/anything-llm-upload-doc.png | Bin
.../architecture_helm_deployment.png | Bin
.../assets/deployment/chatbox-chat.png | Bin
.../assets/deployment/chatbox-settings.png | Bin
.../assets/deployment/dify-chat.png | Bin
.../assets/deployment/dify-create-chatbot.png | Bin
.../assets/deployment/dify-settings.png | Bin
.../assets/deployment/open_webui.png | Bin
.../assets/deployment/streamlit-chat.png | Bin
.../arch_overview/entrypoints.excalidraw.png | Bin
.../arch_overview/llm_engine.excalidraw.png | Bin
docs/{source => }/assets/design/hierarchy.png | Bin
.../assets/design/v1/metrics/intervals-1.png | Bin
.../assets/design/v1/metrics/intervals-2.png | Bin
.../assets/design/v1/metrics/intervals-3.png | Bin
.../v1/prefix_caching/example-time-1.png | Bin
.../v1/prefix_caching/example-time-3.png | Bin
.../v1/prefix_caching/example-time-4.png | Bin
.../v1/prefix_caching/example-time-5.png | Bin
.../v1/prefix_caching/example-time-6.png | Bin
.../v1/prefix_caching/example-time-7.png | Bin
.../assets/design/v1/prefix_caching/free.png | Bin
.../design/v1/prefix_caching/overview.png | Bin
.../features/disagg_prefill/abstraction.jpg | Bin
.../features/disagg_prefill/overview.jpg | Bin
docs/{source => }/assets/kernel/k_vecs.png | Bin
docs/{source => }/assets/kernel/key.png | Bin
.../{source => }/assets/kernel/logits_vec.png | Bin
docs/{source => }/assets/kernel/q_vecs.png | Bin
docs/{source => }/assets/kernel/query.png | Bin
docs/{source => }/assets/kernel/v_vec.png | Bin
docs/{source => }/assets/kernel/value.png | Bin
.../assets/logos/vllm-logo-only-light.ico | Bin
.../assets/logos/vllm-logo-only-light.png | Bin
.../assets/logos/vllm-logo-text-dark.png | Bin
.../assets/logos/vllm-logo-text-light.png | Bin
docs/{source => }/community/meetups.md | 7 +-
docs/{source => }/community/sponsors.md | 0
.../contributing/deprecation_policy.md | 0
.../contributing/dockerfile/dockerfile.md | 10 +-
docs/contributing/model/README.md | 23 +
docs/{source => }/contributing/model/basic.md | 25 +-
docs/contributing/model/multimodal.md | 803 ++++++++++
.../contributing/model/registration.md | 37 +-
docs/{source => }/contributing/model/tests.md | 25 +-
docs/{source => }/contributing/overview.md | 47 +-
.../profiling.md} | 19 +-
.../contributing/vulnerability_management.md | 0
docs/deployment/docker.md | 126 ++
.../deployment/frameworks/anything-llm.md | 19 +-
.../deployment/frameworks/bentoml.md | 7 +-
.../deployment/frameworks/cerebrium.md | 9 +-
.../deployment/frameworks/chatbox.md | 13 +-
.../deployment/frameworks/dify.md | 16 +-
.../deployment/frameworks/dstack.md | 14 +-
docs/deployment/frameworks/helm.md | 95 ++
.../deployment/frameworks/litellm.md | 7 +-
.../deployment/frameworks/lobe-chat.md | 7 +-
.../{source => }/deployment/frameworks/lws.md | 7 +-
.../deployment/frameworks/modal.md | 7 +-
.../deployment/frameworks/open-webui.md | 10 +-
.../retrieval_augmented_generation.md | 7 +-
.../deployment/frameworks/skypilot.md | 25 +-
.../deployment/frameworks/streamlit.md | 10 +-
.../deployment/frameworks/triton.md | 7 +-
.../deployment/integrations/kserve.md | 7 +-
.../deployment/integrations/kubeai.md | 7 +-
.../deployment/integrations/llamastack.md | 7 +-
.../deployment/integrations/llmaz.md | 7 +-
.../integrations/production-stack.md | 9 +-
docs/{source => }/deployment/k8s.md | 12 +-
docs/{source => }/deployment/nginx.md | 40 +-
docs/{source => }/deployment/security.md | 0
docs/{source => }/design/arch_overview.md | 99 +-
.../design/automatic_prefix_caching.md | 7 +-
.../design/huggingface_integration.md | 7 +-
.../design/kernel/paged_attention.md | 97 +-
docs/{source => }/design/mm_processing.md | 25 +-
docs/{source => }/design/multiprocessing.md | 7 +-
docs/{source => }/design/plugin_system.md | 9 +-
docs/{source => }/design/v1/metrics.md | 16 +-
docs/{source => }/design/v1/prefix_caching.md | 32 +-
docs/{source => }/design/v1/torch_compile.md | 0
.../features/automatic_prefix_caching.md | 12 +-
docs/features/compatibility_matrix.md | 77 +
docs/{source => }/features/disagg_prefill.md | 30 +-
docs/{source => }/features/lora.md | 14 +-
.../features/multimodal_inputs.md | 98 +-
docs/{source => }/features/prompt_embeds.md | 7 +-
docs/features/quantization/README.md | 22 +
.../features/quantization/auto_awq.md | 7 +-
.../features/quantization/bitblas.md | 16 +-
.../{source => }/features/quantization/bnb.md | 7 +-
.../{source => }/features/quantization/fp8.md | 24 +-
.../features/quantization/gguf.md | 22 +-
.../features/quantization/gptqmodel.md | 7 +-
.../features/quantization/int4.md | 17 +-
.../features/quantization/int8.md | 17 +-
.../features/quantization/modelopt.md | 0
.../quantization/quantized_kvcache.md | 7 +-
.../features/quantization/quark.md | 20 +-
.../quantization/supported_hardware.md | 28 +
.../features/quantization/torchao.md | 0
.../features/reasoning_outputs.md | 16 +-
docs/{source => }/features/spec_decode.md | 30 +-
.../features/structured_outputs.md | 25 +-
docs/{source => }/features/tool_calling.md | 1 -
docs/{source => }/getting_started/faq.md | 13 +-
docs/getting_started/installation/.nav.yml | 5 +
docs/getting_started/installation/README.md | 20 +
.../installation/ai_accelerator.md | 117 ++
.../ai_accelerator/hpu-gaudi.inc.md | 106 +-
.../installation/ai_accelerator/neuron.inc.md | 39 +-
.../installation/ai_accelerator/tpu.inc.md | 114 +-
.../getting_started/installation/cpu.md | 164 +-
.../installation/cpu/apple.inc.md | 37 +-
.../installation/cpu/arm.inc.md | 41 +
.../installation/cpu/build.inc.md | 2 +
.../installation/cpu/s390x.inc.md | 37 +-
.../installation/cpu/x86.inc.md | 46 +
.../installation/device.template.md | 0
docs/getting_started/installation/gpu.md | 124 ++
.../installation/gpu/cuda.inc.md | 75 +-
.../installation/gpu/rocm.inc.md | 72 +-
.../installation/gpu/xpu.inc.md | 36 +-
.../installation/python_env_setup.inc.md | 0
.../getting_started/quickstart.md | 68 +-
.../getting_started/troubleshooting.md | 38 +-
.../getting_started/v1_user_guide.md | 0
docs/make.bat | 35 -
docs/mkdocs/hooks/generate_examples.py | 159 ++
docs/mkdocs/hooks/remove_announcement.py | 16 +
docs/mkdocs/hooks/url_schemes.py | 54 +
.../javascript/run_llm_widget.js} | 19 -
docs/mkdocs/overrides/main.html | 5 +
.../models/extensions/fastsafetensor.md | 0
.../models/extensions/runai_model_streamer.md | 17 +-
.../models/extensions/tensorizer.md | 12 +-
docs/{source => }/models/generative_models.md | 43 +-
docs/{source => }/models/pooling_models.md | 107 +-
docs/models/supported_models.md | 690 ++++++++
docs/{source => }/performance/benchmarks.md | 15 +-
docs/{source => }/performance/optimization.md | 9 +-
.../serving/distributed_serving.md | 39 +-
docs/serving/engine_args.md | 18 +
docs/serving/env_vars.md | 12 +
.../serving/integrations/langchain.md | 7 +-
.../serving/integrations/llamaindex.md | 7 +-
docs/{source => }/serving/metrics.md | 10 +-
.../{source => }/serving/offline_inference.md | 53 +-
.../serving/openai_compatible_server.md | 394 +++--
docs/serving/serve_args.md | 38 +
docs/{source => }/serving/usage_stats.md | 0
docs/source/_static/custom.css | 8 -
docs/source/_templates/sections/header.html | 39 -
docs/source/api/summary.md | 133 --
docs/source/autodoc2_docstring_parser.py | 21 -
docs/source/community/blog.md | 3 -
docs/source/conf.py | 263 ---
docs/source/contributing/model/index.md | 27 -
docs/source/contributing/model/multimodal.md | 834 ----------
docs/source/deployment/docker.md | 133 --
docs/source/deployment/frameworks/helm.md | 250 ---
docs/source/deployment/frameworks/index.md | 22 -
docs/source/deployment/integrations/index.md | 11 -
docs/source/features/compatibility_matrix.md | 476 ------
docs/source/features/quantization/index.md | 24 -
.../quantization/supported_hardware.md | 153 --
docs/source/generate_examples.py | 244 ---
docs/source/getting_started/installation.md | 28 -
.../installation/ai_accelerator.md | 299 ----
.../installation/cpu/arm.inc.md | 34 -
.../installation/cpu/x86.inc.md | 41 -
.../getting_started/installation/gpu.md | 301 ----
docs/source/index.md | 217 ---
docs/source/models/extensions/index.md | 9 -
docs/source/models/supported_models.md | 1406 -----------------
docs/source/serving/engine_args.md | 36 -
docs/source/serving/env_vars.md | 15 -
docs/source/serving/integrations/index.md | 8 -
docs/source/serving/serve_args.md | 47 -
docs/{source => }/training/rlhf.md | 0
docs/{source => }/training/trl.md | 9 +-
mkdocs.yaml | 117 ++
pyproject.toml | 2 +
requirements/docs.txt | 27 +-
vllm/engine/llm_engine.py | 10 +-
vllm/engine/metrics.py | 4 +-
vllm/entrypoints/llm.py | 32 +-
vllm/entrypoints/openai/protocol.py | 64 +-
vllm/envs.py | 4 +-
vllm/executor/ray_distributed_executor.py | 6 +-
vllm/model_executor/models/blip2.py | 5 +-
vllm/model_executor/models/llava.py | 5 +-
vllm/model_executor/models/llava_next.py | 5 +-
vllm/model_executor/models/mistral3.py | 5 +-
vllm/multimodal/__init__.py | 5 +-
vllm/multimodal/inputs.py | 22 +-
vllm/multimodal/registry.py | 10 +-
vllm/utils.py | 5 +-
vllm/v1/worker/gpu_worker.py | 7 +-
vllm/worker/hpu_worker.py | 7 +-
vllm/worker/worker.py | 7 +-
vllm/worker/xpu_worker.py | 7 +-
218 files changed, 4126 insertions(+), 6790 deletions(-)
create mode 100644 docs/.nav.yml
delete mode 100644 docs/Makefile
create mode 100644 docs/api/README.md
create mode 100644 docs/api/vllm/.meta.yml
create mode 100644 docs/assets/contributing/dockerfile-stages-dependency.png
rename docs/{source => }/assets/deployment/anything-llm-chat-with-doc.png (100%)
rename docs/{source => }/assets/deployment/anything-llm-chat-without-doc.png (100%)
rename docs/{source => }/assets/deployment/anything-llm-provider.png (100%)
rename docs/{source => }/assets/deployment/anything-llm-upload-doc.png (100%)
rename docs/{source => }/assets/deployment/architecture_helm_deployment.png (100%)
rename docs/{source => }/assets/deployment/chatbox-chat.png (100%)
rename docs/{source => }/assets/deployment/chatbox-settings.png (100%)
rename docs/{source => }/assets/deployment/dify-chat.png (100%)
rename docs/{source => }/assets/deployment/dify-create-chatbot.png (100%)
rename docs/{source => }/assets/deployment/dify-settings.png (100%)
rename docs/{source => }/assets/deployment/open_webui.png (100%)
rename docs/{source => }/assets/deployment/streamlit-chat.png (100%)
rename docs/{source => }/assets/design/arch_overview/entrypoints.excalidraw.png (100%)
rename docs/{source => }/assets/design/arch_overview/llm_engine.excalidraw.png (100%)
rename docs/{source => }/assets/design/hierarchy.png (100%)
rename docs/{source => }/assets/design/v1/metrics/intervals-1.png (100%)
rename docs/{source => }/assets/design/v1/metrics/intervals-2.png (100%)
rename docs/{source => }/assets/design/v1/metrics/intervals-3.png (100%)
rename docs/{source => }/assets/design/v1/prefix_caching/example-time-1.png (100%)
rename docs/{source => }/assets/design/v1/prefix_caching/example-time-3.png (100%)
rename docs/{source => }/assets/design/v1/prefix_caching/example-time-4.png (100%)
rename docs/{source => }/assets/design/v1/prefix_caching/example-time-5.png (100%)
rename docs/{source => }/assets/design/v1/prefix_caching/example-time-6.png (100%)
rename docs/{source => }/assets/design/v1/prefix_caching/example-time-7.png (100%)
rename docs/{source => }/assets/design/v1/prefix_caching/free.png (100%)
rename docs/{source => }/assets/design/v1/prefix_caching/overview.png (100%)
rename docs/{source => }/assets/features/disagg_prefill/abstraction.jpg (100%)
rename docs/{source => }/assets/features/disagg_prefill/overview.jpg (100%)
rename docs/{source => }/assets/kernel/k_vecs.png (100%)
rename docs/{source => }/assets/kernel/key.png (100%)
rename docs/{source => }/assets/kernel/logits_vec.png (100%)
rename docs/{source => }/assets/kernel/q_vecs.png (100%)
rename docs/{source => }/assets/kernel/query.png (100%)
rename docs/{source => }/assets/kernel/v_vec.png (100%)
rename docs/{source => }/assets/kernel/value.png (100%)
rename docs/{source => }/assets/logos/vllm-logo-only-light.ico (100%)
rename docs/{source => }/assets/logos/vllm-logo-only-light.png (100%)
rename docs/{source => }/assets/logos/vllm-logo-text-dark.png (100%)
rename docs/{source => }/assets/logos/vllm-logo-text-light.png (100%)
rename docs/{source => }/community/meetups.md (98%)
rename docs/{source => }/community/sponsors.md (100%)
rename docs/{source => }/contributing/deprecation_policy.md (100%)
rename docs/{source => }/contributing/dockerfile/dockerfile.md (89%)
create mode 100644 docs/contributing/model/README.md
rename docs/{source => }/contributing/model/basic.md (87%)
create mode 100644 docs/contributing/model/multimodal.md
rename docs/{source => }/contributing/model/registration.md (52%)
rename docs/{source => }/contributing/model/tests.md (75%)
rename docs/{source => }/contributing/overview.md (87%)
rename docs/{source/contributing/profiling/profiling_index.md => contributing/profiling.md} (90%)
rename docs/{source => }/contributing/vulnerability_management.md (100%)
create mode 100644 docs/deployment/docker.md
rename docs/{source => }/deployment/frameworks/anything-llm.md (78%)
rename docs/{source => }/deployment/frameworks/bentoml.md (89%)
rename docs/{source => }/deployment/frameworks/cerebrium.md (98%)
rename docs/{source => }/deployment/frameworks/chatbox.md (84%)
rename docs/{source => }/deployment/frameworks/dify.md (90%)
rename docs/{source => }/deployment/frameworks/dstack.md (83%)
create mode 100644 docs/deployment/frameworks/helm.md
rename docs/{source => }/deployment/frameworks/litellm.md (97%)
rename docs/{source => }/deployment/frameworks/lobe-chat.md (89%)
rename docs/{source => }/deployment/frameworks/lws.md (99%)
rename docs/{source => }/deployment/frameworks/modal.md (85%)
rename docs/{source => }/deployment/frameworks/open-webui.md (87%)
rename docs/{source => }/deployment/frameworks/retrieval_augmented_generation.md (96%)
rename docs/{source => }/deployment/frameworks/skypilot.md (97%)
rename docs/{source => }/deployment/frameworks/streamlit.md (91%)
rename docs/{source => }/deployment/frameworks/triton.md (87%)
rename docs/{source => }/deployment/integrations/kserve.md (85%)
rename docs/{source => }/deployment/integrations/kubeai.md (93%)
rename docs/{source => }/deployment/integrations/llamastack.md (94%)
rename docs/{source => }/deployment/integrations/llmaz.md (87%)
rename docs/{source => }/deployment/integrations/production-stack.md (98%)
rename docs/{source => }/deployment/k8s.md (98%)
rename docs/{source => }/deployment/nginx.md (77%)
rename docs/{source => }/deployment/security.md (100%)
rename docs/{source => }/design/arch_overview.md (81%)
rename docs/{source => }/design/automatic_prefix_caching.md (98%)
rename docs/{source => }/design/huggingface_integration.md (98%)
rename docs/{source => }/design/kernel/paged_attention.md (94%)
rename docs/{source => }/design/mm_processing.md (61%)
rename docs/{source => }/design/multiprocessing.md (97%)
rename docs/{source => }/design/plugin_system.md (86%)
rename docs/{source => }/design/v1/metrics.md (98%)
rename docs/{source => }/design/v1/prefix_caching.md (94%)
rename docs/{source => }/design/v1/torch_compile.md (100%)
rename docs/{source => }/features/automatic_prefix_caching.md (91%)
create mode 100644 docs/features/compatibility_matrix.md
rename docs/{source => }/features/disagg_prefill.md (87%)
rename docs/{source => }/features/lora.md (96%)
rename docs/{source => }/features/multimodal_inputs.md (84%)
rename docs/{source => }/features/prompt_embeds.md (92%)
create mode 100644 docs/features/quantization/README.md
rename docs/{source => }/features/quantization/auto_awq.md (98%)
rename docs/{source => }/features/quantization/bitblas.md (76%)
rename docs/{source => }/features/quantization/bnb.md (97%)
rename docs/{source => }/features/quantization/fp8.md (88%)
rename docs/{source => }/features/quantization/gguf.md (76%)
rename docs/{source => }/features/quantization/gptqmodel.md (98%)
rename docs/{source => }/features/quantization/int4.md (94%)
rename docs/{source => }/features/quantization/int8.md (92%)
rename docs/{source => }/features/quantization/modelopt.md (100%)
rename docs/{source => }/features/quantization/quantized_kvcache.md (98%)
rename docs/{source => }/features/quantization/quark.md (94%)
create mode 100644 docs/features/quantization/supported_hardware.md
rename docs/{source => }/features/quantization/torchao.md (100%)
rename docs/{source => }/features/reasoning_outputs.md (97%)
rename docs/{source => }/features/spec_decode.md (93%)
rename docs/{source => }/features/structured_outputs.md (96%)
rename docs/{source => }/features/tool_calling.md (99%)
rename docs/{source => }/getting_started/faq.md (91%)
create mode 100644 docs/getting_started/installation/.nav.yml
create mode 100644 docs/getting_started/installation/README.md
create mode 100644 docs/getting_started/installation/ai_accelerator.md
rename docs/{source => }/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md (84%)
rename docs/{source => }/getting_started/installation/ai_accelerator/neuron.inc.md (79%)
rename docs/{source => }/getting_started/installation/ai_accelerator/tpu.inc.md (55%)
rename docs/{source => }/getting_started/installation/cpu.md (74%)
rename docs/{source => }/getting_started/installation/cpu/apple.inc.md (58%)
create mode 100644 docs/getting_started/installation/cpu/arm.inc.md
rename docs/{source => }/getting_started/installation/cpu/build.inc.md (96%)
rename docs/{source => }/getting_started/installation/cpu/s390x.inc.md (64%)
create mode 100644 docs/getting_started/installation/cpu/x86.inc.md
rename docs/{source => }/getting_started/installation/device.template.md (100%)
create mode 100644 docs/getting_started/installation/gpu.md
rename docs/{source => }/getting_started/installation/gpu/cuda.inc.md (74%)
rename docs/{source => }/getting_started/installation/gpu/rocm.inc.md (72%)
rename docs/{source => }/getting_started/installation/gpu/xpu.inc.md (67%)
rename docs/{source => }/getting_started/installation/python_env_setup.inc.md (100%)
rename docs/{source => }/getting_started/quickstart.md (75%)
rename docs/{source => }/getting_started/troubleshooting.md (86%)
rename docs/{source => }/getting_started/v1_user_guide.md (100%)
delete mode 100644 docs/make.bat
create mode 100644 docs/mkdocs/hooks/generate_examples.py
create mode 100644 docs/mkdocs/hooks/remove_announcement.py
create mode 100644 docs/mkdocs/hooks/url_schemes.py
rename docs/{source/_static/custom.js => mkdocs/javascript/run_llm_widget.js} (54%)
create mode 100644 docs/mkdocs/overrides/main.html
rename docs/{source => }/models/extensions/fastsafetensor.md (100%)
rename docs/{source => }/models/extensions/runai_model_streamer.md (86%)
rename docs/{source => }/models/extensions/tensorizer.md (79%)
rename docs/{source => }/models/generative_models.md (63%)
rename docs/{source => }/models/pooling_models.md (62%)
create mode 100644 docs/models/supported_models.md
rename docs/{source => }/performance/benchmarks.md (86%)
rename docs/{source => }/performance/optimization.md (98%)
rename docs/{source => }/serving/distributed_serving.md (73%)
create mode 100644 docs/serving/engine_args.md
create mode 100644 docs/serving/env_vars.md
rename docs/{source => }/serving/integrations/langchain.md (93%)
rename docs/{source => }/serving/integrations/llamaindex.md (91%)
rename docs/{source => }/serving/metrics.md (90%)
rename docs/{source => }/serving/offline_inference.md (76%)
rename docs/{source => }/serving/openai_compatible_server.md (61%)
create mode 100644 docs/serving/serve_args.md
rename docs/{source => }/serving/usage_stats.md (100%)
delete mode 100644 docs/source/_static/custom.css
delete mode 100644 docs/source/_templates/sections/header.html
delete mode 100644 docs/source/api/summary.md
delete mode 100644 docs/source/autodoc2_docstring_parser.py
delete mode 100644 docs/source/community/blog.md
delete mode 100644 docs/source/conf.py
delete mode 100644 docs/source/contributing/model/index.md
delete mode 100644 docs/source/contributing/model/multimodal.md
delete mode 100644 docs/source/deployment/docker.md
delete mode 100644 docs/source/deployment/frameworks/helm.md
delete mode 100644 docs/source/deployment/frameworks/index.md
delete mode 100644 docs/source/deployment/integrations/index.md
delete mode 100644 docs/source/features/compatibility_matrix.md
delete mode 100644 docs/source/features/quantization/index.md
delete mode 100644 docs/source/features/quantization/supported_hardware.md
delete mode 100644 docs/source/generate_examples.py
delete mode 100644 docs/source/getting_started/installation.md
delete mode 100644 docs/source/getting_started/installation/ai_accelerator.md
delete mode 100644 docs/source/getting_started/installation/cpu/arm.inc.md
delete mode 100644 docs/source/getting_started/installation/cpu/x86.inc.md
delete mode 100644 docs/source/getting_started/installation/gpu.md
delete mode 100644 docs/source/index.md
delete mode 100644 docs/source/models/extensions/index.md
delete mode 100644 docs/source/models/supported_models.md
delete mode 100644 docs/source/serving/engine_args.md
delete mode 100644 docs/source/serving/env_vars.md
delete mode 100644 docs/source/serving/integrations/index.md
delete mode 100644 docs/source/serving/serve_args.md
rename docs/{source => }/training/rlhf.md (100%)
rename docs/{source => }/training/trl.md (66%)
create mode 100644 mkdocs.yaml
diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 6a7d220bbdcf8..774a5df16d7f3 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -33,14 +33,13 @@ steps:
- label: Documentation Build # 2min
mirror_hardwares: [amdexperimental]
- working_dir: "/vllm-workspace/test_docs/docs"
+ working_dir: "/vllm-workspace/test_docs"
fast_check: true
no_gpu: True
commands:
- - pip install -r ../../requirements/docs.txt
- - SPHINXOPTS=\"-W\" make html
- # Check API reference (if it fails, you may have missing mock imports)
- - grep \"sig sig-object py\" build/html/api/vllm/vllm.sampling_params.html
+ - pip install -r ../requirements/docs.txt
+ # TODO: add `--strict` once warnings in docstrings are fixed
+ - mkdocs build
- label: Async Engine, Inputs, Utils, Worker Test # 24min
mirror_hardwares: [amdexperimental]
diff --git a/.gitignore b/.gitignore
index 2756c612b82f8..8d5af1bed92d8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -77,11 +77,6 @@ instance/
# Scrapy stuff:
.scrapy
-# Sphinx documentation
-docs/_build/
-docs/source/getting_started/examples/
-docs/source/api/vllm
-
# PyBuilder
.pybuilder/
target/
@@ -151,6 +146,7 @@ venv.bak/
# mkdocs documentation
/site
+docs/getting_started/examples
# mypy
.mypy_cache/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f5c0c368d578c..658de23cf4da9 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -39,6 +39,7 @@ repos:
rev: v0.9.29
hooks:
- id: pymarkdown
+ exclude: '.*\.inc\.md'
args: [fix]
- repo: https://github.com/rhysd/actionlint
rev: v1.7.7
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 2781ec223b665..98c3be25f7e76 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -8,12 +8,8 @@ build:
tools:
python: "3.12"
-sphinx:
- configuration: docs/source/conf.py
- fail_on_warning: true
-
-# If using Sphinx, optionally build your docs in additional formats such as PDF
-formats: []
+mkdocs:
+ configuration: mkdocs.yaml
# Optionally declare the Python requirements required to build your docs
python:
diff --git a/docker/Dockerfile b/docker/Dockerfile
index cc3499d1f0a91..f28a1618298ff 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -329,7 +329,9 @@ COPY vllm/v1 /usr/local/lib/python3.12/dist-packages/vllm/v1
# will not be imported by other tests
RUN mkdir test_docs
RUN mv docs test_docs/
+RUN cp -r examples test_docs/
RUN mv vllm test_docs/
+RUN mv mkdocs.yaml test_docs/
#################### TEST IMAGE ####################
#################### OPENAI API SERVER ####################
diff --git a/docs/.nav.yml b/docs/.nav.yml
new file mode 100644
index 0000000000000..c410b6b8223ba
--- /dev/null
+++ b/docs/.nav.yml
@@ -0,0 +1,51 @@
+nav:
+ - Home:
+ - vLLM: README.md
+ - Getting Started:
+ - getting_started/quickstart.md
+ - getting_started/installation
+ - Examples:
+ - LMCache: getting_started/examples/lmcache
+ - getting_started/examples/offline_inference
+ - getting_started/examples/online_serving
+ - getting_started/examples/other
+ - Roadmap: https://roadmap.vllm.ai
+ - Releases: https://github.com/vllm-project/vllm/releases
+ - User Guide:
+ - Inference and Serving:
+ - serving/offline_inference.md
+ - serving/openai_compatible_server.md
+ - serving/*
+ - serving/integrations
+ - Training: training
+ - Deployment:
+ - deployment/*
+ - deployment/frameworks
+ - deployment/integrations
+ - Performance: performance
+ - Models:
+ - models/supported_models.md
+ - models/generative_models.md
+ - models/pooling_models.md
+ - models/extensions
+ - Features:
+ - features/compatibility_matrix.md
+ - features/*
+ - features/quantization
+ - Other:
+ - getting_started/*
+ - Developer Guide:
+ - contributing/overview.md
+ - glob: contributing/*
+ flatten_single_child_sections: true
+ - contributing/model
+ - Design Documents:
+ - V0: design
+ - V1: design/v1
+ - API Reference:
+ - api/README.md
+ - glob: api/vllm/*
+ preserve_directory_names: true
+ - Community:
+ - community/*
+ - vLLM Blog: https://blog.vllm.ai
diff --git a/docs/Makefile b/docs/Makefile
deleted file mode 100644
index d3b429dfb9257..0000000000000
--- a/docs/Makefile
+++ /dev/null
@@ -1,25 +0,0 @@
-# Minimal makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line, and also
-# from the environment for the first two.
-SPHINXOPTS ?=
-SPHINXBUILD ?= sphinx-build
-SOURCEDIR = source
-BUILDDIR = build
-
-# Put it first so that "make" without argument is like "make help".
-help:
- @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-
-.PHONY: help Makefile
-
-# Catch-all target: route all unknown targets to Sphinx using the new
-# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
- @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-
-clean:
- @$(SPHINXBUILD) -M clean "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
- rm -rf "$(SOURCEDIR)/getting_started/examples"
- rm -rf "$(SOURCEDIR)/api/vllm"
diff --git a/docs/README.md b/docs/README.md
index dcd5e759dfa88..57b1d03deee28 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,43 +1,50 @@
-# vLLM documents
+# Welcome to vLLM
-## Build the docs
+
+Easy, fast, and cheap LLM serving for everyone + +
-```bash -cd docs -``` + -- Install the dependencies: +vLLM is a fast and easy-to-use library for LLM inference and serving. -```bash -pip install -r ../requirements/docs.txt -``` +Originally developed in the [Sky Computing Lab](https://sky.cs.berkeley.edu) at UC Berkeley, vLLM has evolved into a community-driven project with contributions from both academia and industry. -- Clean the previous build (optional but recommended): +vLLM is fast with: -```bash -make clean -``` +- State-of-the-art serving throughput +- Efficient management of attention key and value memory with [**PagedAttention**](https://blog.vllm.ai/2023/06/20/vllm.html) +- Continuous batching of incoming requests +- Fast model execution with CUDA/HIP graph +- Quantization: [GPTQ](https://arxiv.org/abs/2210.17323), [AWQ](https://arxiv.org/abs/2306.00978), INT4, INT8, and FP8 +- Optimized CUDA kernels, including integration with FlashAttention and FlashInfer. +- Speculative decoding +- Chunked prefill -- Generate the HTML documentation: +vLLM is flexible and easy to use with: -```bash -make html -``` +- Seamless integration with popular HuggingFace models +- High-throughput serving with various decoding algorithms, including *parallel sampling*, *beam search*, and more +- Tensor parallelism and pipeline parallelism support for distributed inference +- Streaming outputs +- OpenAI-compatible API server +- Support NVIDIA GPUs, AMD CPUs and GPUs, Intel CPUs, Gaudi® accelerators and GPUs, IBM Power CPUs, TPU, and AWS Trainium and Inferentia Accelerators. +- Prefix caching support +- Multi-lora support -## Open the docs with your browser +For more information, check out the following: -- Serve the documentation locally: - -```bash -python -m http.server -d build/html/ -``` - -This will start a local server at http://localhost:8000. You can now open your browser and view the documentation. - -If port 8000 is already in use, you can specify a different port, for example: - -```bash -python -m http.server 3000 -d build/html/ -``` +- [vLLM announcing blog post](https://vllm.ai) (intro to PagedAttention) +- [vLLM paper](https://arxiv.org/abs/2309.06180) (SOSP 2023) +- [How continuous batching enables 23x throughput in LLM inference while reducing p50 latency](https://www.anyscale.com/blog/continuous-batching-llm-inference) by Cade Daniel et al. +- [vLLM Meetups][meetups] diff --git a/docs/api/README.md b/docs/api/README.md new file mode 100644 index 0000000000000..5c7b2ca79ee2c --- /dev/null +++ b/docs/api/README.md @@ -0,0 +1,107 @@ +# Summary + +[](){ #configuration } + +## Configuration + +API documentation for vLLM's configuration classes. + +- [vllm.config.ModelConfig][] +- [vllm.config.CacheConfig][] +- [vllm.config.TokenizerPoolConfig][] +- [vllm.config.LoadConfig][] +- [vllm.config.ParallelConfig][] +- [vllm.config.SchedulerConfig][] +- [vllm.config.DeviceConfig][] +- [vllm.config.SpeculativeConfig][] +- [vllm.config.LoRAConfig][] +- [vllm.config.PromptAdapterConfig][] +- [vllm.config.MultiModalConfig][] +- [vllm.config.PoolerConfig][] +- [vllm.config.DecodingConfig][] +- [vllm.config.ObservabilityConfig][] +- [vllm.config.KVTransferConfig][] +- [vllm.config.CompilationConfig][] +- [vllm.config.VllmConfig][] + +[](){ #offline-inference-api } + +## Offline Inference + +LLM Class. + +- [vllm.LLM][] + +LLM Inputs. + +- [vllm.inputs.PromptType][] +- [vllm.inputs.TextPrompt][] +- [vllm.inputs.TokensPrompt][] + +## vLLM Engines + +Engine classes for offline and online inference. + +- [vllm.LLMEngine][] +- [vllm.AsyncLLMEngine][] + +## Inference Parameters + +Inference parameters for vLLM APIs. + +[](){ #sampling-params } +[](){ #pooling-params } + +- [vllm.SamplingParams][] +- [vllm.PoolingParams][] + +[](){ #multi-modality } + +## Multi-Modality + +vLLM provides experimental support for multi-modal models through the [vllm.multimodal][] package. + +Multi-modal inputs can be passed alongside text and token prompts to [supported models][supported-mm-models] +via the `multi_modal_data` field in [vllm.inputs.PromptType][]. + +Looking to add your own multi-modal model? Please follow the instructions listed [here][supports-multimodal]. + +- [vllm.multimodal.MULTIMODAL_REGISTRY][] + +### Inputs + +User-facing inputs. + +- [vllm.multimodal.inputs.MultiModalDataDict][] + +Internal data structures. + +- [vllm.multimodal.inputs.PlaceholderRange][] +- [vllm.multimodal.inputs.NestedTensors][] +- [vllm.multimodal.inputs.MultiModalFieldElem][] +- [vllm.multimodal.inputs.MultiModalFieldConfig][] +- [vllm.multimodal.inputs.MultiModalKwargsItem][] +- [vllm.multimodal.inputs.MultiModalKwargs][] +- [vllm.multimodal.inputs.MultiModalInputs][] + +### Data Parsing + +- [vllm.multimodal.parse][] + +### Data Processing + +- [vllm.multimodal.processing][] + +### Memory Profiling + +- [vllm.multimodal.profiling][] + +### Registry + +- [vllm.multimodal.registry][] + +## Model Development + +- [vllm.model_executor.models.interfaces_base][] +- [vllm.model_executor.models.interfaces][] +- [vllm.model_executor.models.adapters][] diff --git a/docs/api/vllm/.meta.yml b/docs/api/vllm/.meta.yml new file mode 100644 index 0000000000000..c15adfec644cf --- /dev/null +++ b/docs/api/vllm/.meta.yml @@ -0,0 +1,2 @@ +search: + boost: 0.5 diff --git a/docs/assets/contributing/dockerfile-stages-dependency.png b/docs/assets/contributing/dockerfile-stages-dependency.png new file mode 100644 index 0000000000000000000000000000000000000000..0838bfa37fe62d60fba9adcbd18c81de0809f253 GIT binary patch literal 121821 zcmcG$cU+Et-#>mLWHii(R8~kO?b0+tT4-oWXlZHhBtnw$Y0*+r+EGeri9%`bol1M} z{e2$ind`c~*YCc6zrTLx{kSjk={(Qlc)wrcIbQGMp@N(=&3eZ5Boc|{+}TqXNhDeq z5@}^Q^-BE9;pF$~_+!mA8R=7`CF1`=Gb3M+NGzmtrz9`i2KF}@SSock3A6@1+)qWV zL%Z!%1PjY1n!6kK>N7nR?z@=#Zsm55u#eYbs+9X;1;fw2I~h?L&GhDmLgEqLtB0vr z)z&F*syyhna-HtVoy~M}cB9jVUr9ge)@jsu#5)X)i90O4&rsejy3i?R?&IUL0++J< zQdl*jxc2W4B$DfzlXKhu{iEUe{~vxj!re>$2KtES&!3lw&BdmrrA0;Q*Tico7Od#i zsCX+|QBmYr2-j5{Bc%vE6UTB-h2^`$HI2i)~p=W0_&=GteS
zhDsK0{yBL6-3exw%*H9cnuV9#gFAT==B!qbB=*6(6~vJQC~w^Av2k%dbG*Pr`Wvt5
z>dsB{si>+R;^1J~y!p88@YQpAR3xv<`ub6@W$f%e!8C=bk(`opAZQbbbavyT9R!+c
z&DgSM&tPxwd1_EJap!r#^QJ2#h^l)^ %VZoE)0SP=PM}
z#@)Mnx01+DYRpn*#gM8pe|C=05D7=!ey#^8B*EHV!X
z#=y)D{{|H#d{vjn!32;EYsk%F>w$h-@2Iimk0aV648PSwM-r(W@iOZ0d@gzHyXLo#
z&V@!tuWAW0+VT4s+ub@G2!gCUPyjpt#T<9*(7Y`$1e`nEdS(Bta4iK7714c&;)!Ss
z3KHf#`%>XAS{5Xok>nfthdlWr+M~5wX1;?(uKwRBq1T-Mi4sQN5rOfbY3pD&F)xs#
zhEAT-P)3THjO7`N$1HG^84}E2cH8oTBZuTq>Kq7;i;e%|MLX4z)E5~`bQp9&Fhd|P
zpag;w6)vY0o;AY4!AyrwIWghqp;$GaRZaqO;7HG~0)p6wXP)LdII2d3pKouvi?zJZnWx*}#LCJfvC4q}38!Yt<(#Z