From a1fe24d961d85089c8a254032d35e4bdbca278d6 Mon Sep 17 00:00:00 2001
From: Harry Mellor <19981378+hmellor@users.noreply.github.com>
Date: Fri, 23 May 2025 11:09:53 +0200
Subject: [PATCH] Migrate docs from Sphinx to MkDocs (#18145)

Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com>
---
 .buildkite/test-pipeline.yaml                 |    9 +-
 .gitignore                                    |    6 +-
 .pre-commit-config.yaml                       |    1 +
 .readthedocs.yaml                             |    8 +-
 docker/Dockerfile                             |    2 +
 docs/.nav.yml                                 |   51 +
 docs/Makefile                                 |   25 -
 docs/README.md                                |   71 +-
 docs/api/README.md                            |  107 ++
 docs/api/vllm/.meta.yml                       |    2 +
 .../dockerfile-stages-dependency.png          |  Bin 0 -> 121821 bytes
 .../deployment/anything-llm-chat-with-doc.png |  Bin
 .../anything-llm-chat-without-doc.png         |  Bin
 .../deployment/anything-llm-provider.png      |  Bin
 .../deployment/anything-llm-upload-doc.png    |  Bin
 .../architecture_helm_deployment.png          |  Bin
 .../assets/deployment/chatbox-chat.png        |  Bin
 .../assets/deployment/chatbox-settings.png    |  Bin
 .../assets/deployment/dify-chat.png           |  Bin
 .../assets/deployment/dify-create-chatbot.png |  Bin
 .../assets/deployment/dify-settings.png       |  Bin
 .../assets/deployment/open_webui.png          |  Bin
 .../assets/deployment/streamlit-chat.png      |  Bin
 .../arch_overview/entrypoints.excalidraw.png  |  Bin
 .../arch_overview/llm_engine.excalidraw.png   |  Bin
 docs/{source => }/assets/design/hierarchy.png |  Bin
 .../assets/design/v1/metrics/intervals-1.png  |  Bin
 .../assets/design/v1/metrics/intervals-2.png  |  Bin
 .../assets/design/v1/metrics/intervals-3.png  |  Bin
 .../v1/prefix_caching/example-time-1.png      |  Bin
 .../v1/prefix_caching/example-time-3.png      |  Bin
 .../v1/prefix_caching/example-time-4.png      |  Bin
 .../v1/prefix_caching/example-time-5.png      |  Bin
 .../v1/prefix_caching/example-time-6.png      |  Bin
 .../v1/prefix_caching/example-time-7.png      |  Bin
 .../assets/design/v1/prefix_caching/free.png  |  Bin
 .../design/v1/prefix_caching/overview.png     |  Bin
 .../features/disagg_prefill/abstraction.jpg   |  Bin
 .../features/disagg_prefill/overview.jpg      |  Bin
 docs/{source => }/assets/kernel/k_vecs.png    |  Bin
 docs/{source => }/assets/kernel/key.png       |  Bin
 .../{source => }/assets/kernel/logits_vec.png |  Bin
 docs/{source => }/assets/kernel/q_vecs.png    |  Bin
 docs/{source => }/assets/kernel/query.png     |  Bin
 docs/{source => }/assets/kernel/v_vec.png     |  Bin
 docs/{source => }/assets/kernel/value.png     |  Bin
 .../assets/logos/vllm-logo-only-light.ico     |  Bin
 .../assets/logos/vllm-logo-only-light.png     |  Bin
 .../assets/logos/vllm-logo-text-dark.png      |  Bin
 .../assets/logos/vllm-logo-text-light.png     |  Bin
 docs/{source => }/community/meetups.md        |    7 +-
 docs/{source => }/community/sponsors.md       |    0
 .../contributing/deprecation_policy.md        |    0
 .../contributing/dockerfile/dockerfile.md     |   10 +-
 docs/contributing/model/README.md             |   23 +
 docs/{source => }/contributing/model/basic.md |   25 +-
 docs/contributing/model/multimodal.md         |  803 ++++++++++
 .../contributing/model/registration.md        |   37 +-
 docs/{source => }/contributing/model/tests.md |   25 +-
 docs/{source => }/contributing/overview.md    |   47 +-
 .../profiling.md}                             |   19 +-
 .../contributing/vulnerability_management.md  |    0
 docs/deployment/docker.md                     |  126 ++
 .../deployment/frameworks/anything-llm.md     |   19 +-
 .../deployment/frameworks/bentoml.md          |    7 +-
 .../deployment/frameworks/cerebrium.md        |    9 +-
 .../deployment/frameworks/chatbox.md          |   13 +-
 .../deployment/frameworks/dify.md             |   16 +-
 .../deployment/frameworks/dstack.md           |   14 +-
 docs/deployment/frameworks/helm.md            |   95 ++
 .../deployment/frameworks/litellm.md          |    7 +-
 .../deployment/frameworks/lobe-chat.md        |    7 +-
 .../{source => }/deployment/frameworks/lws.md |    7 +-
 .../deployment/frameworks/modal.md            |    7 +-
 .../deployment/frameworks/open-webui.md       |   10 +-
 .../retrieval_augmented_generation.md         |    7 +-
 .../deployment/frameworks/skypilot.md         |   25 +-
 .../deployment/frameworks/streamlit.md        |   10 +-
 .../deployment/frameworks/triton.md           |    7 +-
 .../deployment/integrations/kserve.md         |    7 +-
 .../deployment/integrations/kubeai.md         |    7 +-
 .../deployment/integrations/llamastack.md     |    7 +-
 .../deployment/integrations/llmaz.md          |    7 +-
 .../integrations/production-stack.md          |    9 +-
 docs/{source => }/deployment/k8s.md           |   12 +-
 docs/{source => }/deployment/nginx.md         |   40 +-
 docs/{source => }/deployment/security.md      |    0
 docs/{source => }/design/arch_overview.md     |   99 +-
 .../design/automatic_prefix_caching.md        |    7 +-
 .../design/huggingface_integration.md         |    7 +-
 .../design/kernel/paged_attention.md          |   97 +-
 docs/{source => }/design/mm_processing.md     |   25 +-
 docs/{source => }/design/multiprocessing.md   |    7 +-
 docs/{source => }/design/plugin_system.md     |    9 +-
 docs/{source => }/design/v1/metrics.md        |   16 +-
 docs/{source => }/design/v1/prefix_caching.md |   32 +-
 docs/{source => }/design/v1/torch_compile.md  |    0
 .../features/automatic_prefix_caching.md      |   12 +-
 docs/features/compatibility_matrix.md         |   77 +
 docs/{source => }/features/disagg_prefill.md  |   30 +-
 docs/{source => }/features/lora.md            |   14 +-
 .../features/multimodal_inputs.md             |   98 +-
 docs/{source => }/features/prompt_embeds.md   |    7 +-
 docs/features/quantization/README.md          |   22 +
 .../features/quantization/auto_awq.md         |    7 +-
 .../features/quantization/bitblas.md          |   16 +-
 .../{source => }/features/quantization/bnb.md |    7 +-
 .../{source => }/features/quantization/fp8.md |   24 +-
 .../features/quantization/gguf.md             |   22 +-
 .../features/quantization/gptqmodel.md        |    7 +-
 .../features/quantization/int4.md             |   17 +-
 .../features/quantization/int8.md             |   17 +-
 .../features/quantization/modelopt.md         |    0
 .../quantization/quantized_kvcache.md         |    7 +-
 .../features/quantization/quark.md            |   20 +-
 .../quantization/supported_hardware.md        |   28 +
 .../features/quantization/torchao.md          |    0
 .../features/reasoning_outputs.md             |   16 +-
 docs/{source => }/features/spec_decode.md     |   30 +-
 .../features/structured_outputs.md            |   25 +-
 docs/{source => }/features/tool_calling.md    |    1 -
 docs/{source => }/getting_started/faq.md      |   13 +-
 docs/getting_started/installation/.nav.yml    |    5 +
 docs/getting_started/installation/README.md   |   20 +
 .../installation/ai_accelerator.md            |  117 ++
 .../ai_accelerator/hpu-gaudi.inc.md           |  106 +-
 .../installation/ai_accelerator/neuron.inc.md |   39 +-
 .../installation/ai_accelerator/tpu.inc.md    |  114 +-
 .../getting_started/installation/cpu.md       |  164 +-
 .../installation/cpu/apple.inc.md             |   37 +-
 .../installation/cpu/arm.inc.md               |   41 +
 .../installation/cpu/build.inc.md             |    2 +
 .../installation/cpu/s390x.inc.md             |   37 +-
 .../installation/cpu/x86.inc.md               |   46 +
 .../installation/device.template.md           |    0
 docs/getting_started/installation/gpu.md      |  124 ++
 .../installation/gpu/cuda.inc.md              |   75 +-
 .../installation/gpu/rocm.inc.md              |   72 +-
 .../installation/gpu/xpu.inc.md               |   36 +-
 .../installation/python_env_setup.inc.md      |    0
 .../getting_started/quickstart.md             |   68 +-
 .../getting_started/troubleshooting.md        |   38 +-
 .../getting_started/v1_user_guide.md          |    0
 docs/make.bat                                 |   35 -
 docs/mkdocs/hooks/generate_examples.py        |  159 ++
 docs/mkdocs/hooks/remove_announcement.py      |   16 +
 docs/mkdocs/hooks/url_schemes.py              |   54 +
 .../javascript/run_llm_widget.js}             |   19 -
 docs/mkdocs/overrides/main.html               |    5 +
 .../models/extensions/fastsafetensor.md       |    0
 .../models/extensions/runai_model_streamer.md |   17 +-
 .../models/extensions/tensorizer.md           |   12 +-
 docs/{source => }/models/generative_models.md |   43 +-
 docs/{source => }/models/pooling_models.md    |  107 +-
 docs/models/supported_models.md               |  690 ++++++++
 docs/{source => }/performance/benchmarks.md   |   15 +-
 docs/{source => }/performance/optimization.md |    9 +-
 .../serving/distributed_serving.md            |   39 +-
 docs/serving/engine_args.md                   |   18 +
 docs/serving/env_vars.md                      |   12 +
 .../serving/integrations/langchain.md         |    7 +-
 .../serving/integrations/llamaindex.md        |    7 +-
 docs/{source => }/serving/metrics.md          |   10 +-
 .../{source => }/serving/offline_inference.md |   53 +-
 .../serving/openai_compatible_server.md       |  394 +++--
 docs/serving/serve_args.md                    |   38 +
 docs/{source => }/serving/usage_stats.md      |    0
 docs/source/_static/custom.css                |    8 -
 docs/source/_templates/sections/header.html   |   39 -
 docs/source/api/summary.md                    |  133 --
 docs/source/autodoc2_docstring_parser.py      |   21 -
 docs/source/community/blog.md                 |    3 -
 docs/source/conf.py                           |  263 ---
 docs/source/contributing/model/index.md       |   27 -
 docs/source/contributing/model/multimodal.md  |  834 ----------
 docs/source/deployment/docker.md              |  133 --
 docs/source/deployment/frameworks/helm.md     |  250 ---
 docs/source/deployment/frameworks/index.md    |   22 -
 docs/source/deployment/integrations/index.md  |   11 -
 docs/source/features/compatibility_matrix.md  |  476 ------
 docs/source/features/quantization/index.md    |   24 -
 .../quantization/supported_hardware.md        |  153 --
 docs/source/generate_examples.py              |  244 ---
 docs/source/getting_started/installation.md   |   28 -
 .../installation/ai_accelerator.md            |  299 ----
 .../installation/cpu/arm.inc.md               |   34 -
 .../installation/cpu/x86.inc.md               |   41 -
 .../getting_started/installation/gpu.md       |  301 ----
 docs/source/index.md                          |  217 ---
 docs/source/models/extensions/index.md        |    9 -
 docs/source/models/supported_models.md        | 1406 -----------------
 docs/source/serving/engine_args.md            |   36 -
 docs/source/serving/env_vars.md               |   15 -
 docs/source/serving/integrations/index.md     |    8 -
 docs/source/serving/serve_args.md             |   47 -
 docs/{source => }/training/rlhf.md            |    0
 docs/{source => }/training/trl.md             |    9 +-
 mkdocs.yaml                                   |  117 ++
 pyproject.toml                                |    2 +
 requirements/docs.txt                         |   27 +-
 vllm/engine/llm_engine.py                     |   10 +-
 vllm/engine/metrics.py                        |    4 +-
 vllm/entrypoints/llm.py                       |   32 +-
 vllm/entrypoints/openai/protocol.py           |   64 +-
 vllm/envs.py                                  |    4 +-
 vllm/executor/ray_distributed_executor.py     |    6 +-
 vllm/model_executor/models/blip2.py           |    5 +-
 vllm/model_executor/models/llava.py           |    5 +-
 vllm/model_executor/models/llava_next.py      |    5 +-
 vllm/model_executor/models/mistral3.py        |    5 +-
 vllm/multimodal/__init__.py                   |    5 +-
 vllm/multimodal/inputs.py                     |   22 +-
 vllm/multimodal/registry.py                   |   10 +-
 vllm/utils.py                                 |    5 +-
 vllm/v1/worker/gpu_worker.py                  |    7 +-
 vllm/worker/hpu_worker.py                     |    7 +-
 vllm/worker/worker.py                         |    7 +-
 vllm/worker/xpu_worker.py                     |    7 +-
 218 files changed, 4126 insertions(+), 6790 deletions(-)
 create mode 100644 docs/.nav.yml
 delete mode 100644 docs/Makefile
 create mode 100644 docs/api/README.md
 create mode 100644 docs/api/vllm/.meta.yml
 create mode 100644 docs/assets/contributing/dockerfile-stages-dependency.png
 rename docs/{source => }/assets/deployment/anything-llm-chat-with-doc.png (100%)
 rename docs/{source => }/assets/deployment/anything-llm-chat-without-doc.png (100%)
 rename docs/{source => }/assets/deployment/anything-llm-provider.png (100%)
 rename docs/{source => }/assets/deployment/anything-llm-upload-doc.png (100%)
 rename docs/{source => }/assets/deployment/architecture_helm_deployment.png (100%)
 rename docs/{source => }/assets/deployment/chatbox-chat.png (100%)
 rename docs/{source => }/assets/deployment/chatbox-settings.png (100%)
 rename docs/{source => }/assets/deployment/dify-chat.png (100%)
 rename docs/{source => }/assets/deployment/dify-create-chatbot.png (100%)
 rename docs/{source => }/assets/deployment/dify-settings.png (100%)
 rename docs/{source => }/assets/deployment/open_webui.png (100%)
 rename docs/{source => }/assets/deployment/streamlit-chat.png (100%)
 rename docs/{source => }/assets/design/arch_overview/entrypoints.excalidraw.png (100%)
 rename docs/{source => }/assets/design/arch_overview/llm_engine.excalidraw.png (100%)
 rename docs/{source => }/assets/design/hierarchy.png (100%)
 rename docs/{source => }/assets/design/v1/metrics/intervals-1.png (100%)
 rename docs/{source => }/assets/design/v1/metrics/intervals-2.png (100%)
 rename docs/{source => }/assets/design/v1/metrics/intervals-3.png (100%)
 rename docs/{source => }/assets/design/v1/prefix_caching/example-time-1.png (100%)
 rename docs/{source => }/assets/design/v1/prefix_caching/example-time-3.png (100%)
 rename docs/{source => }/assets/design/v1/prefix_caching/example-time-4.png (100%)
 rename docs/{source => }/assets/design/v1/prefix_caching/example-time-5.png (100%)
 rename docs/{source => }/assets/design/v1/prefix_caching/example-time-6.png (100%)
 rename docs/{source => }/assets/design/v1/prefix_caching/example-time-7.png (100%)
 rename docs/{source => }/assets/design/v1/prefix_caching/free.png (100%)
 rename docs/{source => }/assets/design/v1/prefix_caching/overview.png (100%)
 rename docs/{source => }/assets/features/disagg_prefill/abstraction.jpg (100%)
 rename docs/{source => }/assets/features/disagg_prefill/overview.jpg (100%)
 rename docs/{source => }/assets/kernel/k_vecs.png (100%)
 rename docs/{source => }/assets/kernel/key.png (100%)
 rename docs/{source => }/assets/kernel/logits_vec.png (100%)
 rename docs/{source => }/assets/kernel/q_vecs.png (100%)
 rename docs/{source => }/assets/kernel/query.png (100%)
 rename docs/{source => }/assets/kernel/v_vec.png (100%)
 rename docs/{source => }/assets/kernel/value.png (100%)
 rename docs/{source => }/assets/logos/vllm-logo-only-light.ico (100%)
 rename docs/{source => }/assets/logos/vllm-logo-only-light.png (100%)
 rename docs/{source => }/assets/logos/vllm-logo-text-dark.png (100%)
 rename docs/{source => }/assets/logos/vllm-logo-text-light.png (100%)
 rename docs/{source => }/community/meetups.md (98%)
 rename docs/{source => }/community/sponsors.md (100%)
 rename docs/{source => }/contributing/deprecation_policy.md (100%)
 rename docs/{source => }/contributing/dockerfile/dockerfile.md (89%)
 create mode 100644 docs/contributing/model/README.md
 rename docs/{source => }/contributing/model/basic.md (87%)
 create mode 100644 docs/contributing/model/multimodal.md
 rename docs/{source => }/contributing/model/registration.md (52%)
 rename docs/{source => }/contributing/model/tests.md (75%)
 rename docs/{source => }/contributing/overview.md (87%)
 rename docs/{source/contributing/profiling/profiling_index.md => contributing/profiling.md} (90%)
 rename docs/{source => }/contributing/vulnerability_management.md (100%)
 create mode 100644 docs/deployment/docker.md
 rename docs/{source => }/deployment/frameworks/anything-llm.md (78%)
 rename docs/{source => }/deployment/frameworks/bentoml.md (89%)
 rename docs/{source => }/deployment/frameworks/cerebrium.md (98%)
 rename docs/{source => }/deployment/frameworks/chatbox.md (84%)
 rename docs/{source => }/deployment/frameworks/dify.md (90%)
 rename docs/{source => }/deployment/frameworks/dstack.md (83%)
 create mode 100644 docs/deployment/frameworks/helm.md
 rename docs/{source => }/deployment/frameworks/litellm.md (97%)
 rename docs/{source => }/deployment/frameworks/lobe-chat.md (89%)
 rename docs/{source => }/deployment/frameworks/lws.md (99%)
 rename docs/{source => }/deployment/frameworks/modal.md (85%)
 rename docs/{source => }/deployment/frameworks/open-webui.md (87%)
 rename docs/{source => }/deployment/frameworks/retrieval_augmented_generation.md (96%)
 rename docs/{source => }/deployment/frameworks/skypilot.md (97%)
 rename docs/{source => }/deployment/frameworks/streamlit.md (91%)
 rename docs/{source => }/deployment/frameworks/triton.md (87%)
 rename docs/{source => }/deployment/integrations/kserve.md (85%)
 rename docs/{source => }/deployment/integrations/kubeai.md (93%)
 rename docs/{source => }/deployment/integrations/llamastack.md (94%)
 rename docs/{source => }/deployment/integrations/llmaz.md (87%)
 rename docs/{source => }/deployment/integrations/production-stack.md (98%)
 rename docs/{source => }/deployment/k8s.md (98%)
 rename docs/{source => }/deployment/nginx.md (77%)
 rename docs/{source => }/deployment/security.md (100%)
 rename docs/{source => }/design/arch_overview.md (81%)
 rename docs/{source => }/design/automatic_prefix_caching.md (98%)
 rename docs/{source => }/design/huggingface_integration.md (98%)
 rename docs/{source => }/design/kernel/paged_attention.md (94%)
 rename docs/{source => }/design/mm_processing.md (61%)
 rename docs/{source => }/design/multiprocessing.md (97%)
 rename docs/{source => }/design/plugin_system.md (86%)
 rename docs/{source => }/design/v1/metrics.md (98%)
 rename docs/{source => }/design/v1/prefix_caching.md (94%)
 rename docs/{source => }/design/v1/torch_compile.md (100%)
 rename docs/{source => }/features/automatic_prefix_caching.md (91%)
 create mode 100644 docs/features/compatibility_matrix.md
 rename docs/{source => }/features/disagg_prefill.md (87%)
 rename docs/{source => }/features/lora.md (96%)
 rename docs/{source => }/features/multimodal_inputs.md (84%)
 rename docs/{source => }/features/prompt_embeds.md (92%)
 create mode 100644 docs/features/quantization/README.md
 rename docs/{source => }/features/quantization/auto_awq.md (98%)
 rename docs/{source => }/features/quantization/bitblas.md (76%)
 rename docs/{source => }/features/quantization/bnb.md (97%)
 rename docs/{source => }/features/quantization/fp8.md (88%)
 rename docs/{source => }/features/quantization/gguf.md (76%)
 rename docs/{source => }/features/quantization/gptqmodel.md (98%)
 rename docs/{source => }/features/quantization/int4.md (94%)
 rename docs/{source => }/features/quantization/int8.md (92%)
 rename docs/{source => }/features/quantization/modelopt.md (100%)
 rename docs/{source => }/features/quantization/quantized_kvcache.md (98%)
 rename docs/{source => }/features/quantization/quark.md (94%)
 create mode 100644 docs/features/quantization/supported_hardware.md
 rename docs/{source => }/features/quantization/torchao.md (100%)
 rename docs/{source => }/features/reasoning_outputs.md (97%)
 rename docs/{source => }/features/spec_decode.md (93%)
 rename docs/{source => }/features/structured_outputs.md (96%)
 rename docs/{source => }/features/tool_calling.md (99%)
 rename docs/{source => }/getting_started/faq.md (91%)
 create mode 100644 docs/getting_started/installation/.nav.yml
 create mode 100644 docs/getting_started/installation/README.md
 create mode 100644 docs/getting_started/installation/ai_accelerator.md
 rename docs/{source => }/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md (84%)
 rename docs/{source => }/getting_started/installation/ai_accelerator/neuron.inc.md (79%)
 rename docs/{source => }/getting_started/installation/ai_accelerator/tpu.inc.md (55%)
 rename docs/{source => }/getting_started/installation/cpu.md (74%)
 rename docs/{source => }/getting_started/installation/cpu/apple.inc.md (58%)
 create mode 100644 docs/getting_started/installation/cpu/arm.inc.md
 rename docs/{source => }/getting_started/installation/cpu/build.inc.md (96%)
 rename docs/{source => }/getting_started/installation/cpu/s390x.inc.md (64%)
 create mode 100644 docs/getting_started/installation/cpu/x86.inc.md
 rename docs/{source => }/getting_started/installation/device.template.md (100%)
 create mode 100644 docs/getting_started/installation/gpu.md
 rename docs/{source => }/getting_started/installation/gpu/cuda.inc.md (74%)
 rename docs/{source => }/getting_started/installation/gpu/rocm.inc.md (72%)
 rename docs/{source => }/getting_started/installation/gpu/xpu.inc.md (67%)
 rename docs/{source => }/getting_started/installation/python_env_setup.inc.md (100%)
 rename docs/{source => }/getting_started/quickstart.md (75%)
 rename docs/{source => }/getting_started/troubleshooting.md (86%)
 rename docs/{source => }/getting_started/v1_user_guide.md (100%)
 delete mode 100644 docs/make.bat
 create mode 100644 docs/mkdocs/hooks/generate_examples.py
 create mode 100644 docs/mkdocs/hooks/remove_announcement.py
 create mode 100644 docs/mkdocs/hooks/url_schemes.py
 rename docs/{source/_static/custom.js => mkdocs/javascript/run_llm_widget.js} (54%)
 create mode 100644 docs/mkdocs/overrides/main.html
 rename docs/{source => }/models/extensions/fastsafetensor.md (100%)
 rename docs/{source => }/models/extensions/runai_model_streamer.md (86%)
 rename docs/{source => }/models/extensions/tensorizer.md (79%)
 rename docs/{source => }/models/generative_models.md (63%)
 rename docs/{source => }/models/pooling_models.md (62%)
 create mode 100644 docs/models/supported_models.md
 rename docs/{source => }/performance/benchmarks.md (86%)
 rename docs/{source => }/performance/optimization.md (98%)
 rename docs/{source => }/serving/distributed_serving.md (73%)
 create mode 100644 docs/serving/engine_args.md
 create mode 100644 docs/serving/env_vars.md
 rename docs/{source => }/serving/integrations/langchain.md (93%)
 rename docs/{source => }/serving/integrations/llamaindex.md (91%)
 rename docs/{source => }/serving/metrics.md (90%)
 rename docs/{source => }/serving/offline_inference.md (76%)
 rename docs/{source => }/serving/openai_compatible_server.md (61%)
 create mode 100644 docs/serving/serve_args.md
 rename docs/{source => }/serving/usage_stats.md (100%)
 delete mode 100644 docs/source/_static/custom.css
 delete mode 100644 docs/source/_templates/sections/header.html
 delete mode 100644 docs/source/api/summary.md
 delete mode 100644 docs/source/autodoc2_docstring_parser.py
 delete mode 100644 docs/source/community/blog.md
 delete mode 100644 docs/source/conf.py
 delete mode 100644 docs/source/contributing/model/index.md
 delete mode 100644 docs/source/contributing/model/multimodal.md
 delete mode 100644 docs/source/deployment/docker.md
 delete mode 100644 docs/source/deployment/frameworks/helm.md
 delete mode 100644 docs/source/deployment/frameworks/index.md
 delete mode 100644 docs/source/deployment/integrations/index.md
 delete mode 100644 docs/source/features/compatibility_matrix.md
 delete mode 100644 docs/source/features/quantization/index.md
 delete mode 100644 docs/source/features/quantization/supported_hardware.md
 delete mode 100644 docs/source/generate_examples.py
 delete mode 100644 docs/source/getting_started/installation.md
 delete mode 100644 docs/source/getting_started/installation/ai_accelerator.md
 delete mode 100644 docs/source/getting_started/installation/cpu/arm.inc.md
 delete mode 100644 docs/source/getting_started/installation/cpu/x86.inc.md
 delete mode 100644 docs/source/getting_started/installation/gpu.md
 delete mode 100644 docs/source/index.md
 delete mode 100644 docs/source/models/extensions/index.md
 delete mode 100644 docs/source/models/supported_models.md
 delete mode 100644 docs/source/serving/engine_args.md
 delete mode 100644 docs/source/serving/env_vars.md
 delete mode 100644 docs/source/serving/integrations/index.md
 delete mode 100644 docs/source/serving/serve_args.md
 rename docs/{source => }/training/rlhf.md (100%)
 rename docs/{source => }/training/trl.md (66%)
 create mode 100644 mkdocs.yaml

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index 6a7d220bbdcf8..774a5df16d7f3 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -33,14 +33,13 @@ steps:
 
 - label: Documentation Build # 2min
   mirror_hardwares: [amdexperimental]
-  working_dir: "/vllm-workspace/test_docs/docs"
+  working_dir: "/vllm-workspace/test_docs"
   fast_check: true
   no_gpu: True
   commands:
-  - pip install -r ../../requirements/docs.txt
-  - SPHINXOPTS=\"-W\" make html
-  # Check API reference (if it fails, you may have missing mock imports)
-  - grep \"sig sig-object py\" build/html/api/vllm/vllm.sampling_params.html
+  - pip install -r ../requirements/docs.txt
+  # TODO: add `--strict` once warnings in docstrings are fixed
+  - mkdocs build
 
 - label: Async Engine, Inputs, Utils, Worker Test # 24min
   mirror_hardwares: [amdexperimental]
diff --git a/.gitignore b/.gitignore
index 2756c612b82f8..8d5af1bed92d8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -77,11 +77,6 @@ instance/
 # Scrapy stuff:
 .scrapy
 
-# Sphinx documentation
-docs/_build/
-docs/source/getting_started/examples/
-docs/source/api/vllm
-
 # PyBuilder
 .pybuilder/
 target/
@@ -151,6 +146,7 @@ venv.bak/
 
 # mkdocs documentation
 /site
+docs/getting_started/examples
 
 # mypy
 .mypy_cache/
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f5c0c368d578c..658de23cf4da9 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -39,6 +39,7 @@ repos:
   rev: v0.9.29
   hooks:
   - id: pymarkdown
+    exclude: '.*\.inc\.md'
     args: [fix]
 - repo: https://github.com/rhysd/actionlint
   rev: v1.7.7
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
index 2781ec223b665..98c3be25f7e76 100644
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -8,12 +8,8 @@ build:
   tools:
     python: "3.12"
 
-sphinx:
-  configuration: docs/source/conf.py
-  fail_on_warning: true
-
-# If using Sphinx, optionally build your docs in additional formats such as PDF
-formats: []
+mkdocs:
+  configuration: mkdocs.yaml
 
 # Optionally declare the Python requirements required to build your docs
 python:
diff --git a/docker/Dockerfile b/docker/Dockerfile
index cc3499d1f0a91..f28a1618298ff 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -329,7 +329,9 @@ COPY vllm/v1 /usr/local/lib/python3.12/dist-packages/vllm/v1
 # will not be imported by other tests
 RUN mkdir test_docs
 RUN mv docs test_docs/
+RUN cp -r examples test_docs/
 RUN mv vllm test_docs/
+RUN mv mkdocs.yaml test_docs/
 #################### TEST IMAGE ####################
 
 #################### OPENAI API SERVER ####################
diff --git a/docs/.nav.yml b/docs/.nav.yml
new file mode 100644
index 0000000000000..c410b6b8223ba
--- /dev/null
+++ b/docs/.nav.yml
@@ -0,0 +1,51 @@
+nav:
+  - Home: 
+    - vLLM: README.md
+    - Getting Started:
+      - getting_started/quickstart.md
+      - getting_started/installation
+    - Examples:
+      - LMCache: getting_started/examples/lmcache
+      - getting_started/examples/offline_inference
+      - getting_started/examples/online_serving
+      - getting_started/examples/other
+    - Roadmap: https://roadmap.vllm.ai
+    - Releases: https://github.com/vllm-project/vllm/releases
+  - User Guide:
+    - Inference and Serving:
+      - serving/offline_inference.md
+      - serving/openai_compatible_server.md
+      - serving/*
+      - serving/integrations
+    - Training: training
+    - Deployment:
+      - deployment/*
+      - deployment/frameworks
+      - deployment/integrations
+    - Performance: performance
+    - Models:
+      - models/supported_models.md
+      - models/generative_models.md
+      - models/pooling_models.md
+      - models/extensions
+    - Features:
+      - features/compatibility_matrix.md
+      - features/*
+      - features/quantization
+    - Other:
+      - getting_started/*
+  - Developer Guide:
+    - contributing/overview.md
+    - glob: contributing/*
+      flatten_single_child_sections: true
+    - contributing/model
+    - Design Documents:
+      - V0: design
+      - V1: design/v1
+  - API Reference:
+    - api/README.md
+    - glob: api/vllm/*
+      preserve_directory_names: true
+  - Community:
+    - community/*
+    - vLLM Blog: https://blog.vllm.ai
diff --git a/docs/Makefile b/docs/Makefile
deleted file mode 100644
index d3b429dfb9257..0000000000000
--- a/docs/Makefile
+++ /dev/null
@@ -1,25 +0,0 @@
-# Minimal makefile for Sphinx documentation
-#
-
-# You can set these variables from the command line, and also
-# from the environment for the first two.
-SPHINXOPTS    ?=
-SPHINXBUILD   ?= sphinx-build
-SOURCEDIR     = source
-BUILDDIR      = build
-
-# Put it first so that "make" without argument is like "make help".
-help:
-	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-
-.PHONY: help Makefile
-
-# Catch-all target: route all unknown targets to Sphinx using the new
-# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
-%: Makefile
-	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-
-clean:
-	@$(SPHINXBUILD) -M clean "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
-	rm -rf "$(SOURCEDIR)/getting_started/examples"
-	rm -rf "$(SOURCEDIR)/api/vllm"
diff --git a/docs/README.md b/docs/README.md
index dcd5e759dfa88..57b1d03deee28 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,43 +1,50 @@
-# vLLM documents
+# Welcome to vLLM
 
-## Build the docs
+<figure markdown="span">
+  ![](./assets/logos/vllm-logo-text-light.png){ align="center" alt="vLLM" class="no-scaled-link" width="60%" }
+</figure>
 
-- Make sure in `docs` directory
+<p style="text-align:center">
+<strong>Easy, fast, and cheap LLM serving for everyone
+</strong>
+</p>
 
-```bash
-cd docs
-```
+<p style="text-align:center">
+<script async defer src="https://buttons.github.io/buttons.js"></script>
+<a class="github-button" href="https://github.com/vllm-project/vllm" data-show-count="true" data-size="large" aria-label="Star">Star</a>
+<a class="github-button" href="https://github.com/vllm-project/vllm/subscription" data-icon="octicon-eye" data-size="large" aria-label="Watch">Watch</a>
+<a class="github-button" href="https://github.com/vllm-project/vllm/fork" data-icon="octicon-repo-forked" data-size="large" aria-label="Fork">Fork</a>
+</p>
 
-- Install the dependencies:
+vLLM is a fast and easy-to-use library for LLM inference and serving.
 
-```bash
-pip install -r ../requirements/docs.txt
-```
+Originally developed in the [Sky Computing Lab](https://sky.cs.berkeley.edu) at UC Berkeley, vLLM has evolved into a community-driven project with contributions from both academia and industry.
 
-- Clean the previous build (optional but recommended):
+vLLM is fast with:
 
-```bash
-make clean
-```
+- State-of-the-art serving throughput
+- Efficient management of attention key and value memory with [**PagedAttention**](https://blog.vllm.ai/2023/06/20/vllm.html)
+- Continuous batching of incoming requests
+- Fast model execution with CUDA/HIP graph
+- Quantization: [GPTQ](https://arxiv.org/abs/2210.17323), [AWQ](https://arxiv.org/abs/2306.00978), INT4, INT8, and FP8
+- Optimized CUDA kernels, including integration with FlashAttention and FlashInfer.
+- Speculative decoding
+- Chunked prefill
 
-- Generate the HTML documentation:
+vLLM is flexible and easy to use with:
 
-```bash
-make html
-```
+- Seamless integration with popular HuggingFace models
+- High-throughput serving with various decoding algorithms, including *parallel sampling*, *beam search*, and more
+- Tensor parallelism and pipeline parallelism support for distributed inference
+- Streaming outputs
+- OpenAI-compatible API server
+- Support NVIDIA GPUs, AMD CPUs and GPUs, Intel CPUs, Gaudi® accelerators and GPUs, IBM Power CPUs, TPU, and AWS Trainium and Inferentia Accelerators.
+- Prefix caching support
+- Multi-lora support
 
-## Open the docs with your browser
+For more information, check out the following:
 
-- Serve the documentation locally:
-
-```bash
-python -m http.server -d build/html/
-```
-
-This will start a local server at http://localhost:8000. You can now open your browser and view the documentation.
-
-If port 8000 is already in use, you can specify a different port, for example:
-
-```bash
-python -m http.server 3000 -d build/html/
-```
+- [vLLM announcing blog post](https://vllm.ai) (intro to PagedAttention)
+- [vLLM paper](https://arxiv.org/abs/2309.06180) (SOSP 2023)
+- [How continuous batching enables 23x throughput in LLM inference while reducing p50 latency](https://www.anyscale.com/blog/continuous-batching-llm-inference) by Cade Daniel et al.
+- [vLLM Meetups][meetups]
diff --git a/docs/api/README.md b/docs/api/README.md
new file mode 100644
index 0000000000000..5c7b2ca79ee2c
--- /dev/null
+++ b/docs/api/README.md
@@ -0,0 +1,107 @@
+# Summary
+
+[](){ #configuration }
+
+## Configuration
+
+API documentation for vLLM's configuration classes.
+
+- [vllm.config.ModelConfig][]
+- [vllm.config.CacheConfig][]
+- [vllm.config.TokenizerPoolConfig][]
+- [vllm.config.LoadConfig][]
+- [vllm.config.ParallelConfig][]
+- [vllm.config.SchedulerConfig][]
+- [vllm.config.DeviceConfig][]
+- [vllm.config.SpeculativeConfig][]
+- [vllm.config.LoRAConfig][]
+- [vllm.config.PromptAdapterConfig][]
+- [vllm.config.MultiModalConfig][]
+- [vllm.config.PoolerConfig][]
+- [vllm.config.DecodingConfig][]
+- [vllm.config.ObservabilityConfig][]
+- [vllm.config.KVTransferConfig][]
+- [vllm.config.CompilationConfig][]
+- [vllm.config.VllmConfig][]
+
+[](){ #offline-inference-api }
+
+## Offline Inference
+
+LLM Class.
+
+- [vllm.LLM][]
+
+LLM Inputs.
+
+- [vllm.inputs.PromptType][]
+- [vllm.inputs.TextPrompt][]
+- [vllm.inputs.TokensPrompt][]
+
+## vLLM Engines
+
+Engine classes for offline and online inference.
+
+- [vllm.LLMEngine][]
+- [vllm.AsyncLLMEngine][]
+
+## Inference Parameters
+
+Inference parameters for vLLM APIs.
+
+[](){ #sampling-params }
+[](){ #pooling-params }
+
+- [vllm.SamplingParams][]
+- [vllm.PoolingParams][]
+
+[](){ #multi-modality }
+
+## Multi-Modality
+
+vLLM provides experimental support for multi-modal models through the [vllm.multimodal][] package.
+
+Multi-modal inputs can be passed alongside text and token prompts to [supported models][supported-mm-models]
+via the `multi_modal_data` field in [vllm.inputs.PromptType][].
+
+Looking to add your own multi-modal model? Please follow the instructions listed [here][supports-multimodal].
+
+- [vllm.multimodal.MULTIMODAL_REGISTRY][]
+
+### Inputs
+
+User-facing inputs.
+
+- [vllm.multimodal.inputs.MultiModalDataDict][]
+
+Internal data structures.
+
+- [vllm.multimodal.inputs.PlaceholderRange][]
+- [vllm.multimodal.inputs.NestedTensors][]
+- [vllm.multimodal.inputs.MultiModalFieldElem][]
+- [vllm.multimodal.inputs.MultiModalFieldConfig][]
+- [vllm.multimodal.inputs.MultiModalKwargsItem][]
+- [vllm.multimodal.inputs.MultiModalKwargs][]
+- [vllm.multimodal.inputs.MultiModalInputs][]
+
+### Data Parsing
+
+- [vllm.multimodal.parse][]
+
+### Data Processing
+
+- [vllm.multimodal.processing][]
+
+### Memory Profiling
+
+- [vllm.multimodal.profiling][]
+
+### Registry
+
+- [vllm.multimodal.registry][]
+
+## Model Development
+
+- [vllm.model_executor.models.interfaces_base][]
+- [vllm.model_executor.models.interfaces][]
+- [vllm.model_executor.models.adapters][]
diff --git a/docs/api/vllm/.meta.yml b/docs/api/vllm/.meta.yml
new file mode 100644
index 0000000000000..c15adfec644cf
--- /dev/null
+++ b/docs/api/vllm/.meta.yml
@@ -0,0 +1,2 @@
+search:
+  boost: 0.5
diff --git a/docs/assets/contributing/dockerfile-stages-dependency.png b/docs/assets/contributing/dockerfile-stages-dependency.png
new file mode 100644
index 0000000000000000000000000000000000000000..0838bfa37fe62d60fba9adcbd18c81de0809f253
GIT binary patch
literal 121821
zcmcG$cU+Et-#>mLWHii(R8~kO?b0+tT4-oWXlZHhBtnw$Y0*+r+EGeri9%`bol1M}
z{e2$ind`c~*YCc6zrTLx{kSjk={(Qlc)wrcIbQGMp@N(=&3eZ5Boc|{+}TqXNhDeq
z5@}^Q^-BE9;pF$~_+!mA8R=7`CF1`=Gb3M+NGzmtrz9`i2KF}@SSock3A6@1+)qWV
zL%Z!%1PjY1n!6kK>N7nR?z@=#Zsm55u#eYbs+9X;1;fw2I~h?L&GhDmLgEqLtB0vr
z)z&F*syyhna-HtVoy~M}cB9jVUr9ge)@jsu#5)X)i90O4&rsejy3i?R?&IUL0++J<
zQdl*jxc2W4B$DfzlXKhu{iEUe{~vxj!re>$2KtES&!3lw&BdmrrA0;Q*Tico7Od#i
zsCX+|QBm<qz_dADGb8E)`B!P4PjV1{SsS>GUHS9qPxeorKH1sXF*^=R;*ZLQuGDWL
zBlT|FnCP#w7;epF3$na;=FFMOhs1ka@3Y3ge}BApBk=?M2E0>Vd||vOfJc0$;{`r{
zLgw1F->xfG6^jgg_T<VE6crWi6(oLFBJ-`&2K<@hY=UdUWjT!gVDmeAWz@&-;-yOt
z<K5=9Ivx>`kqwiZY3U;(BEEh5)>o6TG?l%SdGpuHgwHR1efh6_ePG_6&t(2}lj56I
z#D_AH28ci)O{#C(wyh#kK6~I}gV_9VUi6E1@7|sCpj}v;$`-fmE*)~7%2>B<-KK+7
zVM-brpDlaJg)IIQ1qxYIMJp+FwYRr(eIEYOV4A&<m`Ce*Y!C5~3gQMWM64ycs3q%{
z)h6k-*mVU(^VHSVsc6}Cu4d49>g&5*=UAw-{Zx|;n|$yItKQ1rzkd&3xOlO=N=4hL
zfXRl}pnQM*E#ku@GQ%i0zj(8+I!>U)acqnH2}MQ4O~FDVU$@|UCl?pyq<3wMTKZ_*
zz<=ZCKN&+=b7LjqbS0L(l?J07KZrd@&Luv|b(RuSt_weL!A^spy5hAmg-`TXM3yW}
zwTNH;{`Bn06*NWFaq7&@(`^nznKpBi18o6D@t^+Wu4TSe8KwC5b0o;o=(<o)`R-D|
z6Q*4sJFyWiGlkmCW_dJNEyb>tjBmWfX1hJa=i5HpjeW`M@9!619PthkwtB?7mADvs
z(dG4Di}kP3c3IH%7TvyQk3|tTHbcDQ>B%tJ)x4I-kxOqce44B`k~x3gjYYrW?XTD?
zAMB?FnXEX8PhmM<pGy46Yunk`shj`UZn91<FONMw?e|Cl^VI4Q3F9kSjx(RSg6G;i
znAb>R;U~MqmuB9)kb5Ldxs08Z%V7K%Bw`b6Q7puxSG>`U<?vyHpZ^^8)3P_HeC@jM
z>#gjoVzIe?tUeF5TSrF+gSO+yc!j+8@@LR2e@5b?M~^n`JA;7OAW6TUjZHp8Y@z={
z$rItNi7rPtI5NA#JQt@6#MelcMk$5|@aP5G{(h+FL4*ovZzy?}C06lU_t*Y-!M2vT
zR(`$WKr9P?@2Q-{*w3Hm*NRW)V#V*W=vT+q?K1srK3*m*%|sMB(gVKbYlvRA<=|sx
zaWQJQGhU1f6A3P&$gXfXs{W;)KYtdBPBrf2xk)AYG$7#9t-ANt1NEnu-+y@d{cmQ*
zx?F}FTG-jyJ-xj{#GGblx<Z((_O?nWUc7i4iN~dy$U^Owdw1>9!jOyng$s|D7eQpX
zez}S&tE$@Ou4eeh$7i*qSs_PiRP)NDf1W=djTB#ar(5tulK!Y_Vym9`+?D0J;QxM#
zTvTXE>hwYu{Icf{P`hoB6Kq2+$eum>KzNf`&P+}ABA@MWYk7J3^z<|?k&cn~Q12<?
z&Ltk#QlfFqp9nv#^)aad0RhNKw_XnVhgnW@&4^=3`pcIuKi<jnHaVI1@0;|=DO&n;
z>$YtR-O}RgYrMr5Z3}%l!hfyW{XO4nS53Cd5)1X{*cj4jFshySCK5L#6(vp{*ZJaZ
z)mM$%LlO~DgMvw`KQc0cDthbu`SbKtbSH*$T$biqqPCE0e9}|12IJJzLF69{JCL8u
zLbp4LgG6@m8S)}-@xpJeO;?UnGp<kWiA!xhf(_u{5S^&Jgv}$qyi&-4a>-X4ElNby
zKi*pBcD27Y$z%8Jvz6u~Hx_D>^q#i})HRaFPz6$qYR)i``$?3=a*AwJPc?D+6QJkD
za#ltrW9HAR@F%N=#nn;_C)$~p1gYJS0Alz$+zRn^y@8ZUPm?#^Q{n09d6#8}(CzCD
zAB~B3xu7JxL>{nEJCD6;`N!Ay?{}&H{jL{m^0W*{e!6hkmBZWO)KY}!`aj%dnf~p`
zrR6-+xxAoOGRs?c_18;2EdB<!P~?m>vy&xmDa)|PpS$h1yvyvR<a5L&xu#S4fW)d}
zM%A%qn+7@8DYR_db0SSCMp=^n^K5U7h|S<*VOrvM6+DtniTlte-7E=a;=E6@YrFAg
zSHlDQ_Xpnod03MER`X{eHa5292E$aIwHm)1y27?EE)`{_CCf=!S-J8GDOzQsYeiea
zQQ{-7x<pWN)F~BtBm0VfwdB*FASMO|2CLn!y-ZHyWo>)8xVY-~6Td7GwEU4-F}+E3
z)7zs;vgVqG_)QuGOUYm5x)4NJ--#yN?pDrpt3Kiao6ep++g%<ZLr;Ygtb+@{%T@Tz
z-|s~WGb@W$iqU^T>ere!GDJJXrg{kz-I1iALb>Irp+}ycd8h5|?Xhgu33g+D+QOou
z4lQ3-QyJw}vtsI{-k*DMu<W3YHcINjL8JQ{cW$<dXZ+OvzA$Ug;xB=gMo$?pCXvZL
zzgg7GD-F*co<Z-;`{v7`9wtjJT9r(cqS2dy+G4f|D?!|Xr}t8bd=vYTBmebVEZc>x
zdN0ppKM@&Bi)wdNI(_<^l8UBJaBy>m&CvY(e8T5hME1QOEw=4mPM^yz$%}5&GAs@h
znzKDj+#YG~Yw|LX=;vojT!PUd|NMGoVPWAi9?pE466&tAlz1Ys9OB|iQ1kQgIqgr*
z9anofa-fk|1g9D;q`mhvy*c+HHZ~SZ&r1mbQl<98W&HI(OG@g?KZntG0ybOGlfSxG
zgpzyYA4n3il>Yu-Kb4`<Oyzqsx5TI<pod1jd4mqzgIih{3t7^*a%HDga%QUiL~qE_
ze3vwn)n(7^9E<`cj=(NK*7XJij6gjIf3k-7*qr7H@-~seE?>D~*WxsF<HikH|6@Ae
z9&JPGIc;FjQ=ekAG#)7)gyKV>m5vwkS);xYGJg3^zk+O<XMB>R_t<sx>4rGZ_Ne63
ziq8)xc%zvPw3k>dElf>ym$eUO3~8Y^qGF)qB;mTykhIKlSKnPjqxRwEFEK4<;zC@b
zD20-wn2ti|JozDn$z^^d0niUUFv)3d^7H4<yG+tp@uGGk>hjLx9et}C?FaAgGMVm?
zcTu}|aWj=9@<|(6Khak+RGYL<Tzm;JV2jmyBEC2c<P)V!4yQx`N&vfl|Gcy~m)dOK
zmuBAnr#Og!d#ShTj`Q)Q=#>N$m9=GYGBwdP0~fu4Gc7bMEUZ^E(_tzeI~p6?`2I$r
z38!<DRn0wJ>|xz6RcB{sL2H1j%Aa%-b1h4Y05cnF^n$FazuenUBG~?Td9w!UCw$dS
zt^l%OO-w$I<g0yRB9?%}O({F1Q*RR!6OC%(=|sm%YqS~pZ`I=ZzD$Ij_OxyZjT8TP
ztM1~qj2nf%*$V@vTefV;KK{bTXCh}6o#XsS0k=H47)x+a(w`J&xiC@V;n3*ZG(FPZ
z{`>criGAKDHs>ignD$n_BepngkBIdx<Xa5WTrQn+DeA+HKjc*kt^T_s!UM?_Sv?U&
zMMday_xq4S#Qq<tY<|UWtX5?&Ag+c$9&XFieNIi>hO64Y?n5)f8f~WTS5BEUvu27`
zvc6&yTRj_~GeCKpu$WkuN`h9#&f=dxADufyyp?1aK&e*r0r<8iI?KVvPliE$ekuj#
zlT~v=+1hGqjPZ2B-J$3SO&e29Tf`2&CciK02BjfLTz|=H@P6(N@VHW(fJvh&@kSKo
z=<1%Ho($&2>Jai$yUw1Y<mrP8j`OLGrvhHSR7+`oe?x|;piTvqE=~<u)$c6G%KG{V
zr6nt{Eyv8P5rC*tAGYp{|C@tZfr)S4?EgY8#iYMwisiUAXN__USdL*6HZ3!w5X5G3
z(90njo2Jx<FlwSRZTJ}X-yi+`R&Aor)GC7HR0*@Q_pvABfBU8sXEWG1Bg-&z`(CP?
zRZsZ=yfg^T(X=p`+A{lrOfvp50n)RX?r?AG?A5nSOiZfmb6i(w+wmcsqg^vlo=wxg
zyDW}{IDJoB%gAR~iE3!3si>IbI5XO`F?((>gL@s(oJbNJ%Qa!|B}GMUe7mXrL^ePe
z+C|<3;H{R@t+>t1%puR7ZHljny)q_Sb0zU=Zq}N(Bu~!tD8mInC@X;%AM{G%=g72*
zR4GO&-z?{eh!)7SL}gG>R=(fY7oQPdAAo>7u5ncBbDH6Fp_a?S1fAIQ)ezn4%F_&*
zl~`soQM=I&)B0yN`Hy!J&8p}k88;|+{!QZj`}ZRp8qEusv|S7)`|IRSjGRi_bL?u4
zKeujl=9iwxkUg9L)gOSed)O09Y7^UjvL~GA4!vv0HJS>D@=DPBT3Rzw?0}!fMP=nl
zpMa_u6>Y03B@hg5ndK#+bXn3~9<$aQo?^jvhV1EY0eZnRvwaB++qU`q^Ur52pg^Np
z9==XXOKT5RF004xQP2`rv8-7-9+%;}%*hB>4Wc}GpBVkl7UK9GDUM1d|B62jVC3-O
z!{T#wCD*3*t<D9Fo2m#|<kcv_EnpLQZWbXjPNEunFgptZ6E<f$m;$~{Hkf+fI6_VU
zL(k>J*#OA$a87%ytAtJ-;RE<!{Pn>me1c$``<CO!j}uIGvVW%aF5UgA27vxaG~oWy
z*Leb({Gy@@TrM**pnrE+YQgd*YP4IDbPIs*Q;ZuX!#p{AYm?f+vYx+q5%WjHvYY;9
zRQ>D8;XFFJl1BT!I3?Y4=gzhLAn2DQy@qw&jkdPZ*C%6c-lt_^4E{Wr-k0^f7brRC
zn2wa8p&>n$jPFsQu_7Mz6hoiq&&xkqbd9tZAZ2a;6a|37Dvd47P0?{#{o<?X?;IoV
z0*mtUmLEa|!Ho^z0-1SkUPv`*!j0joM6CO5LCluW4_=D9I3rPQT<)P}#cO;T{n%gv
zvaSu4;Nr!L9<)qyLY@CGXxX-<)My!@{M~YXnlSxC-g$HgX!_49Q?b$S`yfdK2L?92
zKIzd8(4JJV#{oPVf15r_3Bkmn-$2jd#pDgh;ISX?jtp@YrgpPwaY{w+2f&>^<E%|Y
zYtEgVW-@;y|0+r05~W)G*G~!}__(~&8X6jZacKB5AoBx}=fsYXp<6?`l~J00`I~e9
z{aZEI8#6@>jokz~0xnw5pp>0Pe243T_c9371X@dricVQkDc_dxZhG35f;V!UFHivT
zY&@EL+i%cxZ~k>TCYO<=*>7o4aHnmVvx!oZ<6Kw(l?yb%=E{{Te}QPm2xzc}Lkpn5
zGetdee)c~7`;}eALAwM@c2nh!b{2*guN?MLj#b^nX+PP&(|9xSNuhESn4{1F)Vv$;
z78o0S8QI|_Oe-+~^u@qAJKnS9|NiY#(#Bo<M>UFqMMZ!(Tdr5fULkYg5VJ4q{_7HG
za?qUezG*Mb)&S66X6ND>3ilRU@79{u{=}$0*#H~>uYbYcRZo3<tiH2)P)Qy$ZT?Il
zIIS_k061pm`8)May&^-sD5GTxLNYfu4?p9z-YxX=XGf5EV%wL1bac^RCF$S1eTyKw
z_m}MWQWk|K55yc|m>bLVFH`3afHxp2^C9mpMViznAEtKO!1ejqzJ2?aSwqt@#Mgmj
z)+E#wxP>YgZJHqU#JjxlIZ(2AG_>tHeah~W7ysI_aI&TyVJ~yx!bZ2&V28oOM~*zg
ziqcc%LfRSm!4^U#X@(mbA8Zm?{-z*G<qys6u21Rv{%jxWK(fK5D!CV=O={WB3t;tI
zs3aX7=b3C8H(Bi_R*hx*f8+pP3i|F~Qw9OPSXtWuKqHM0-qH8k!TlQz%ZZ-KPviCv
zQ<u4hWm=YWb%?BuF+P_I5?=VtkWEcq#PRMjGQrwp*7}mqNLe-0xGK8JechI@C*)@L
z^}+HY7SRYa63mDy_t7?X0AgmV-L0ev6yU|ywc^w@D6?l#%Rsb=3;lXQ1K755C-L{I
zB}cWhKD#U}qL)h2&qIOBZgtxbCP%ef)q1c|2m%#AAv?8Og}k^Ex{e|n84z77pyV*`
ziA?(mdtj}+36y8Bl<mfQS|uz{sNAa>PNKzus(Vt{A^mcq^PUYyLEtmIOiu-LK`13-
zV`JdkyAPRUPE>;eKq-N60n&Q&`t?&mWEk~eSt*&kXl_1&x}RpzRUEbQLZF~I1W!qN
z$B}OgdG~4NAP<F~U%6Xw4-$t618O%27hS;)oFXD3%Nj)NHzj4|Gh|J`b&WyiPoJg?
z8#*_Zef!!=xqcKC-2!R`!F$DPaIt15ziIOcsF46F5FLhd*Jgw50>#Dky;`|87A<b*
z?JJY@;wmm3&!posC+nAGhfNf;d6i4)^5tacJyB6n^(sJ{AP+Xkq}`7ABxXmmNV%Zs
z?T|fFcIY;xTLCFIf@6XPH>E?z{opV)sOed=7BLVO_8WcGr9Y|QOP=;ZuL^d<DES$&
zbAM?<;*^}&Ob2?#U{6N<N8`E9<0jJ~5dF@}$TVc^K)E>qg@J)fYd4kT`SU+fzQs_T
zGc&g4)EJ}xqm|ir`lz_EJn-}+WK44GkO3&maY|PAXKK&?X0T~Amp0an1MSx~>AX)X
zHr<N&cItD0RPBhw`3!BNtqqw@$e929^H2THHM<jik6yXmY}Xa1nZZx(_Vw#my<odP
z*jH$C#P!o3pg^I?W^mywfU24vj&#eOQLwj(p_V0eHW?WiQ&58Fp?l)}iFwWoecH_6
zK~{`(wcY?3jRf_>?}ETVoWrs~Gs&LH96B9DX66t4DVgCg1YH{%rYw`wR|sOaD`W*0
zz~N-&<l<*C-Xhn)#0DBt*GkeeFbLc%@+a2V7=YB%J;(VLm~f4j!vKL4hs{buB|#Xa
zpwEWOl6!FF+T{g$K?v3aRE^V~1St##h(e*C`kgs5GlO8)N;A{3_4oC)9!RNq!YrNz
z_?hx@Q>{3MM%pk!XtqFnanqi+Z{9TCF60nsw$(*bUKV}r_li;M0^VB-vlB5*8*bjb
z2^yB|gdLlVx8FsT3k;fdHP`Lo7B+t4dR}TbTzZq!R1=ekHQ^Mf#HuRB=-gS5K6ma1
z^e^$NIj)j-*C;8O0_)Uv7W#n|i>qXkZzg(K(Q_J|Zuj=yB@nO7>ErKjTcxDZ1T+b$
zD8^?)yms~?!rr;}T_l1lMvlm{NDyfhv;~OG0QqX4HkFz|T+T$QK-CT8GpYt`jV`w7
zs~&*IaH22v!+aRA`J^l<N|$vy4hYtk=P@*A_5Q;LHrbbzk@6u<*_A4>;0cfcq2M^D
zA_ciZ<mKgu>OSZ}B*NkncqBMjOFd3Ilq}mFAzo@gKLk1lKL&6{51KFJL48UG-r`K}
z4s&;RkGf4RUlYsJ%HbxG9VUv9kPuiHUI?jXF|9=GP->HP17D5CrO4B%9O@r00AT^*
zUP-rnSMt?0K}KF)9Jk^;oySb5=YM<bvL2kJDJhu?o|~6H0G@!CQRjO!Mg~pGGE{Cf
z*-PvUu@Ap|`EmuE9|}O-`x^vMt#i6#e+UEx)$#6Hx;<i!(|}p8{`qIC)m~!3LRU}<
zODH2!T4VkIz-+WNGUp~bW)t}L?{A({<J&dUE$x0kWXnP#3|Q1=s%U7;W5>2{-Fgen
zbpH4()N<-E5}yF5D4ZY!%=q~D9#Q+<Fllz~+}Sc4Us+iRl%aS1IxiYNa0e91_zKNb
zqLsQXEIUz>^i=2{G0Jg2fBmAFO8Zq(lJL>v<<qC700@W!ybxtfR+WlSX9<l3_KU)p
zL&V#+&5)S{nk+BfzI}UOfhuRk_4a%(4<>6>cq>%_0H=fBLdZRv)PL`56uDK=2e`Sp
zH8wUTnrG;jg&|Q@M5@6c0s2+=OtP~wuA>zjI#&=#l@OCrRVRh{B_Q*x->{+U*DC?o
zJbv2FcGzb0+jEt#iIsF+buW`FRMN6QTXmQlNX^SPmXj-nGIe=C71^h(T%B`g75>qC
zk7(A!Pl($gPLp+mdRb#ZLygET#@iADM~)s1giwFOPqP|}ziid5r6QL!*M-L3su+vg
zx4+~MJU<|VK6+(9_2$h#p^|sM<Qoq)XI{E^ak86}D2GtUvwEsf8)M?rd*d?(4a?vB
z2#%nbl2Lk_!^y&`1P41Or`G%H=W5fOAeKS|Q5aA~N&E2rJ-&gvV0Q&DkkcmvtVPy4
zCkONjG=vw@*wicyvZ6!^4W<N-{xDQS2;M1XZC|jYw>y6tW)B?#wZ~3_Xy~+mL{G-9
zrTDzo&UV%oufqpJYgj6m;qvnIjEUQ|qVf?q`}nwJb`W_fGwD?ZmbMUEB_XP!pz!q7
zt6lsie#jV@4u%ECd3Zj2_<%fs#R%O^L)s34&GTo^6u}Dj<QEipGx8BM$ha)*G*i>2
ztJ;Asi!+2o2c5ptPrDh?$}&2lc}TA25_yTR08qF*yc$+-*dD8%8egCE=FJ;OP<K|X
zz3)Xp4&IyVur6pjK`hiyz%HS;qMeA}OkGpE@AT=@*i0auwl-W590!}lnJ(#9WH(3u
z(BIXb_*qy8Oah$~R}P>aW9_@}2pSB6`kq$<HXSxofuLa&RtU0VRvTouteHQchNqve
z2tA11IR?~#etbV$g@Hqj_xg8wN}~(C-%sW+3f?e4I5g9FA%+VG*x=@o(Xr%Q7Im21
zxGNQG&lFbYV+&=Yk>B6eGFg_|AoQZ{k}A*Y=(M4qP0x>&aKVlMegPV?{p<Bgl1nPf
z`3&m;d*})boEkg$O;qAFt_&3LJ$(2Owc$r`G3~BYU><MS3r=dT3m2W8vwF!+kixQO
zk)%xCM)j&Rj>CuFSs&%$;SmwZh&F%q@+D(?AhGErlB4FeN^+u3y5K6VBwagQn-(PQ
zvN$`@7sG^t*9X<0HjU|oEdg_`?b<?qldEYY<@V?PbA20Lszo*aodqz>h6tlL(DB-1
zk1)iAMDqsdT3Fa`f=5UWtZZyuAlSspR7w(`=t}+dUtsR4i-M|Z8l2)p^H(okT!QyB
zpbCBmFAW)jWL8hs--N604}bsuJuQ>4BlaEaF7x*z_Tm#`MF^p&ljIvpqR`AlKiCaE
zetv#0UTonvX#vzQEZDqdONM37asARzTo<HIw7VVBt3$W{3KSYku8@lnzSZz?AdgPm
zvF#qinH}qhD|`^KT*i3!uU@@+0|O^ESH_Q%gM%aP|FN)@L0}8s%;JkS*>jpYIy&@J
z0PIF6P|HQDHi_)m<qQ%w3OtWnjX)(!^T1QlOh%|0&!qS4<TpX%h`ql$u_Y!d$`)#8
z2oefeggy#dI`S91pCZRe>(V7<<&TC{(d6>_DkMZ(QE_A<30)Ee_NJa5axDCb@cE(j
ztvQ#qw1S^LrMDs{3X3&GdO%_RUn?`u@#EJ5m$&`<fiq;Gk!9uoceVZ3n`Y0DuTfAT
z7Ir!E<W?i+mG{@t_^Y1WxOtNnPUNk|Nsu+<a=y@t+96Sl5r6`S;E!-xa*yx#N<*!S
z;vx2-@*x!O12VquUJV3(#xGVq;R9ZR-*>B#=-_ecsZY%1NVJEBK99)(VbD{d8WN84
zHwNuS=$6aT;XvV%wJz{kQA7>fzTDGkw?-CLIgda4SN}tJ;PXH|Y9m*-=2!ytiA~n(
zDtQ<f7+{P+2!G%An9Q5|H<1fNs1Ey~L*LO#ccsFQYe>?SPSHh~BG+SUNM2sM1c-~H
zCU6zZJlK3%NIH5d*_XVB4;|7cSo<g9E?r+z$T6cozMfM%>jV}}HSsFwMb#6m6IwRS
zZoy+mn<l%<_F>VkPTjV!AQ~GKLokwKy=2ENXAh-QFuEd#yE;0sWT>}zsfhRn1PD>0
z9}o?1+R;V63)0nWza3WO3ou+RZ1`e~VW7EVnE`F!AfU5=V`9x7;ZDdeT3#}GmdI3~
zMB%*+3@G3}>?+E0UZVeFwOW3kD&>9G*RNlX8;D^{#l4ZE0x=6vaOu(|U4&k*H+*4q
z!aW)A)<7#ZTaib51SP&HWq1<fFcO-zXxw1SC=l~AW6;C22UOM6)DpBZ@h`2pcSWpv
zw>J2b6kJ4yb5;?!oE#J?8s^{bF=#;xsGtNz&YjcbP>ni?aR>l@ph%te$$`|CcrZWf
z(bAK2OoC=tYtw*TOrecj+r5p%=&qG%AE%lW)|&|Y22x{=Qsqu2b*&OFC~JAu<<kFN
z(9pcpKHl1if>uhOIg-DND|5UY-rWR5(6y3gMcf6j|A||CTt~jNS`~Bz6y3rfKi2I^
z1;*7%(WR{LPF{TlTpHjMl(xs=D|4K6e>^`xtQ;f}g4dl2BC`Y9qvS=;X+Cu5kSgd*
zZ5o;h;@RMDq$+te06G4R2-kWa%3Xx&)PA<JfC!8?e6}6w3USdIP<?SwmT1h0<|4Mk
ztx)pv1=%QDTS=k$q4xhqm%CE2qzHU%W*NA&;Lz6A*0pJ(y;0%6+uL<RmdCZOQHE@$
z#ZhI+>u-&wiuF#g28*yJY%0S>s0m;3MTs_xgvJ3;ZQq&4XXP<ZLZ*wnmx<XhMLps|
z_I?11N^Nl#xz+fmqr(vcAOD#@s5_MWaaA*iNmddhB5>e9CmKKonh7Q-+=+=Uax;-w
zMQJ8$1OJsus6qXWpFY$l{R>WD>EW*sSEi7Gh!(c&RzM51x;Z`E1_Jf?7+DtNJ+*9<
z_G&-?0i;BPs+(q}=;YluEO^Op{L;r~8^4LS&>w&JY6Zk+Mb@X9E&=(I*&+rI#FpmA
zfQJYhm~w4-x@5hnmJYZ5U!`p2)BmxQk*>wQdD8(^yb!YxQ$Qf87=EINWu-Y}e=Ns2
zEHsowKtKTb4_r|2go%=uO9>FyKY~<o7n9o}h6L3p0Kzph`TrfzftwI(Lw@zklN(Pc
z>y=D<D>*qi_&O#ggV~D{3D{(%|Ft~1F8GkaW2F(`AG$BfEHGw$nz?R@E_iJpI3-e`
zCEEprhUh<n^cOHgl+m(a0`w3@X(oz~Vq#+-3+JW6J1G%coNgDjmzf%DB6Om$8Biot
zTSsq`GBnO7Od^A2>cj*Lig2|fyo{cx@F(@*>bA;!U013C^^`1vm44Zgf6&+%yp5(3
zsu{665b?J$FsNL<e2Qsplm*BbTsq+BqVL}y2p2XVVrPdrP(@*tKYiF^7MO?r=OWpf
zlZ5b{9B2T4`ve_{&>5l4p&oCtIz+;x5NsXH{NTG_)(k+M>=l#<l981)ZvJeCGK5O8
zdCS0MYKegd26NUZ;o!#T+ZmXzn(Fof>2R2C^O)-9C^<290U!0N3x(4Kg1}%I$C)R)
z?SMIk%G5ct=SM-pUzVgARNRCO<zTs?B>K;Zn3rR*hQ%MEg(yFG=6(o!P25<T88NU1
zDg2qAUutB^t*hD3(Ei|>p-d9i_sRPk&&kO>5iS!rLT;3ChK;`e{BxPu@c?L^Ks_6G
zl)|(k$`9>zTv-t$<!IW9^74bSFD=o-r2)r6?-TomPvz43EC|^frnC(T7<qmYV*^9Y
zqU`K3fb@~U_PxO-{8S|E?Vj0Pgx!pBNo|LL6nMR0&yZRA4T!302p;s?2h)XslG;ua
zRrnEPrHuV3@d|`!3*#UJvu~M0PNp5-pMmV%1huRzbHk>_swVBd)mR)N?gCk->Ip2I
ziQXze5n2q^T#A%S@>)k4b~*ESM;W>dA=M{jPO8yUVH^6&!lem3XstI;pF)gs7IJ0J
zLs&tXexL=B2OSOJGznP@gEElrk@-&;`OecYxMnJ(4m4m-i1{EwRYRyo%R$h|N=w7k
zXXc-pp61Ls40EMRr<!2mZ_h8nev&s$ifp+DQ8`g3Z?ap{VdQk}zk>JQk*p$JVhD2@
zcm=oCSaITOIW!-v;ydEf(f((C${?I!k$}l4AjA+v;b+%=g;v>sshJHMHbABt!Ni+N
zP#`zvvUc)-HcERjT1`}`q#D(5`o8<W_`?6n5stHwas*(z2?4_q^Q<<_c6#t2&_Bq-
zO<1+)sot0bIjF|a_0X)f#X|i3YoS?iYNjUx2cs8f-Q7b=Xd4)3-Nk=P6&Xi3?aIpA
zgG<mp3kx$~)840Jc80(m2+yJH4^^nnLzISC&%SuIlwLGV4FvO2(_zkdRGKycUG$1P
zah(;eYf!3b@;vC|&z~1YvU7dvcn<yabkKtYVi{n;2*b)m2~Y}xWZZc;GVr+L^e{N>
ztiuh#oNe2;XH8VALoIp<f~Xhbq--U|!C~Bp*evia`TjksB7+NblG3g!J~pCqIL(fu
z)vsE!ahQ#)05}-ux~(QAS^fIf!^=h3?SjDckS*9B^*=Chx6TO`HQ*>{VPlYgFm6kf
zRW8$KW`I!5kUTJ<@(7c*G-n4=O(7nQ1J-0hU{q2uDGU3yQ!miG`ifqPX-k&%&}x!G
z4Pg+0ci$sM15gkru^||2^yc5DL@2J#^V2B$_dLKzA~6|;91?=t58k2wVa0^(jT@a9
zvmB|sB%e7yTBy`gT8`R)zSSNb6CWMdGVnh_nKtbxfJYFhf^Xld&J*3jbyYRcLPFMT
zFSb=Fs0ViKHWdwS!^)@~F6%azvTWVHU2I{z0@Erp5Ike#h)dqWCI|cXq<bz&pWg73
zlL(Yak3IHLhFl?N*9c9*WdkSPKD3fV&qI&{XsSz4Z_U9=+5!*-=*bgLPtrc*-$5wV
zP{X^50=Dcqc<><X_6W?tmMo~Oa*ToUw|0;1Rk)T|@wNc$vRuIN2na6@Ho4$b6d(-N
z=IiULyj<fntI}j;Wt$2q^Fd46nB#(PWj^*xz_cly??eS44lxfM`=R*#rB#Uz2yS9B
z*ctz?Z2YzqRm1^9ld$@e_tB9u6paQN2%Xq@ZXh9h(cacSICwFSd2tx;F3(s^np_3q
z`Rat-C^7Lf+#=`2$Ttf&_;#^S*QubsoC!h;!psB;O<No69fulC8)6ot{<1_K@K8u2
zK9*<>(;1>dLNf#_#ZV1ROBUoi#1@AZVBqg>WWnPtAzsdo1GVTvI6xoCMsclFv&SHQ
z6o%H>*z_Q|TQ9678P-6C;)IY06u$2S;m~W^H1S>gN}Z`5`Ul0qWh$ek3F0{huMBMe
zxNnj37jsH4v05>GFr^}EfzS+4@Q~;CJusME4yUXXl+G#pI7wn53F(Gf@u4?HPY#2h
zuS~OquqpMI*S>^Mpl<T1b8&8nZi(p?sE_&Yuju_Ey&4iH3Hc0_xT~wn!(*THlfV;p
zx7F0@Q?tzBb%LHP{`~Q87VXfu2TJJ2GbB>fD4ZO^lLcS^t{Eq^LnJ@p8_nGXG7Ln;
z($Qq)<TSw$CPuC#knmx}t9j2t>?ZP`O)d~h=_aaNLJa~rH3v7u)OV;@bkO7gF>1iP
zG+lsn2B?|)#^uZn@6IXvAcVQgYHGr`dUU+0HXUf>gf)#eapqNTP7?OyL$Pp7@7#9I
ztm)tI5oWh;tChwYK&P!hU1D6M>%bqdsOwtlR}-_|=s2S+)JU!xOwB~+8)J;%sMIPF
zuLLndfH3kfYeM^lU$}tslIXpal*knf83OAyRG$JT@h+_}!C9ek!w4#E&4hD=H9UIg
z5Zo@s@H0eL!??><fR)~$dV`iVlEd&+z$d%06{s*yaT@fb&}MtkYUha<ZR3_KQEE3x
zIwvrizqoGeDumW<lr|#USg0`wF$7o3C?S)-S0QJS=q|*7In;Q1DopWyZ~SC|PW}1S
z4D!xpzMZ*Xw?z@`x!p03fl(XKTu&I3zkvnB(ReBvAd{eKL=*pngB6Cqzdpo&F`kz9
zYzMBUbD2Jg(iBF!x}S}JbHq$KM(8E!zkd7H<g&DYd6<*v+)>(waZc($_|TZKN@|}N
zu|HE72?NQGPfR>U?FMxg#J-0pBf^!7v7w=@s88A{hD!8QK$MFOEsNF26rlLIS0Kwo
zn?;LV=N1YZfEa$J!`LAK@%v4Q@@7!+mT*|HzQjxp#)wJ0mkDPB^mI>rf!D4n)NxE2
zoZ7Xnwgj4nCPt1>1UJ%|W(<7PUL5r+JF_xW>s<=)bIzTcXc{#d7QY~E!*GMvg)JUP
z=<PcTO7fKTq%W8D9x6$=V+0u5_e5|nvEc6#6Nmq~B=@7RaD!W`NXtwy0Iad`29g2;
zv>7;W1i*o=Nuntyd`XkWG_wT^`-}rVd-2mHG82X*G5>h44-@wD9c<zc`RPa#9>nD|
zLlQ@EkAYBomj&ayg@gwO%_ZvOfGLv7nlM3-Gm`X}o14LK%n%AYaju{as;|6o&l`wd
zEYv6~AK{*1$tffGu1h0;PMVlYLK-1RhyfQ}m#y(+F!H4V{}sH2={-bh*nE`1LY
zsXr4A3ev_JBMul924KFF8E1vJ6LAK+z9nk`L8fGSC%qa)2aVNopq`J~jo3PjkpgN^
z4kKW^zZKI*yD`F(R#=Pj7Gb~PYmT?`csRw##&RKnmM}Gr*?iK38)0BfDnTYhC?TW6
zXx}rxJf`yngN=7tzJLGjkCC5}U{SKI4!=YMvJJ)VAxZ=>R7Oa~w3g@-#L1CJ5@ueZ
z`UV9pfEmE-28tI)i-%rF%YyRWMAQK)NjMn5mq>gDI8u9w!A1$u6f7Uk4ZMzxJt`))
zXo`7;cag!2IJ%HCn&7fv1bZIf7Ar|86vVg&-s!rko+Gsp;2{B0$~J`N1sNG)Mu6T3
zs{v8=7gP$jRzytzpONoS_^NB~Fie7(F94GbZddV%Xj<vnbX0;l#H@@0Gf@FBAqeFa
zqa8TO&}p37w4FFSfpO(GuvLywyP?=+O*K2f^m>TZ%UKZ}8{3bmFHWN7uwc{+WOk{{
z+u8E#Ch1eKj~@gP-ZJ6tyM@BXfTBPMSHzi!I<Q7->q(4axUpa~_zQ-lULX`GR(5YV
zd3D9-&FHB>S!OWzYC^Y)<p@-dMTqu0sB#}{+6P3m9kUrrNj6<~6ylebrnydTR@|g^
zZ&LQayF(HCWirAm-U?S4XeS5u>Yr2-j5{Bc%vE6UTB-h2^`$HI2i)~p=W0_&=GteS
zhDsK0{yBL6-3exw%*H9cnuV9#gFAT==B!qbB=*6(6~vJQC~w^Av2k%dbG*Pr`Wvt5
z>dsB{si>+R;^1J~y!p88@YQpAR3xv<`ub6@W$f%e!8C=bk(`opAZQbbbavyT9R!+c
z&DgSM&tPxwd1_EJap!r#^QJ2#h^l)^<ejapEh1dHyodNj-it3^zI^uVSzB8h<Zc8N
zi#x^{Rm;yoU6;ta0_jLchn4j@=*}Vd&g+gJJEod+Z8htg&`{En58i&eo9q#gT7nSn
zb2BrWV(V9tT0HPswY9bMQkZ=j`~7W$23Zwh0j0PCZ3**zei;0*<iz)v(tpm(EbwNQ
zr!Ffm$E@<U%&lul5*MMYSXx>(XS>K#6Py^xjB<NxqRB>J>)h|YK9QS6JEYFSDGUz_
zyMF!pe&YVQd3iU|&>V0Fs@K6R-gCLN#7*KAngn%khoD(2SSmFuuuF)bdDt7Cl_VC}
z($&?~?oSCr>fXJ!pAsb3{C8()95!#>jBeB?vW@(HsqctM{nqa82j%LdX*uAAyKBJj
zFpuST>p1y0@IPK2k&$d1nN>A4A;)yw-c+n0vAENNrQ)MEOYPme7aZ<jk0loNxZ!0d
zhxuu9Bcnbz|HCu$vvTCQ8O9G#YQ>!A{prKP!+CHL;9TS?((uwQq1$iXy*td>0i&t8
zx%ofu%FN0t;touYbeWYoHd5!$KQgGx34BZ3@4Kv7exw<;1!)Rn6gOk!O7??7%Y^O6
z57FE*Hn#iBsCi@!sq~m~f|l%Eq|P~hvI!7%50~Zc;UPm7+ot(Zv~qKE-JgQrd+p>o
zLxj$J2*xWXdn!EWH`CJ(|M|6xxKz?+1t-Zov__oop=O0{_#9%@(m%xdgg*OOT$}}6
zm-P*1QnD9kV~F*cW`6tjZGC;c`%~;P8rxZ6vh^^%rX)nX1v~-{EXs9bQxjMxIRZ$P
z810hQPqgg0_~Rz3Nfpdaxx1iMGQ^cXeb5_Yx-dUa&5EuMU>CpSNdE4Tbc-(9g*BD=
z`DYM+<jbeq<DGgnccs(=36?bHg>>R#KtfD&92xd(+LQ$F3*sZ{Jg@rxFp&u<m{!2<
zogvaGH7V(tr7(%)^D9{NRs3FO<i?{%>&drI6Zf*W0o<dzTVLud^ri-YE*9bgkl|Ky
zlg5k}0*;@4A3!2ef79^!8JIv^T&qVSP!hs&a;PO_QT6iW$`}={A8!e%zKTF`T_qvw
zSV2j!q{PODzDFQJBX~srJTP#6v{UBaUku0Rj)Vj?a^Tie@_jq=gBPQoKQI2hRzXY4
z8JdaIS!ih+H*VY?L`!m|2dM3C{KUh$|JbpoC0#O<#gBT2npYewItT3qyN;uV+KE?J
z9I?d(Wx0rniSZs25b(Qo5IqR;dpU_?7Y>TPHjFk6VCTWOtlH8K;;!Z|uUxeXzA`_)
z0Y&YcuphI;g$oz#?S)ydV@3{$iM)ni%D5abnv$27cK~%9FGrq}Cr{GRc>^7tL+J+y
z*V5NNM1wWm|7sn{^-$5eM?yDHFxwSnCyX&1$m(9uZUu*4a+Fx*X}jxsdg6AYZ*Z1}
zlT)tz(>w|MMe;5Z@H`P8wCU;TyjP3Wh;JfZ3e4RFT-aLFz~{xQNfLs_#>Nn@zx0|S
z9lh47OvFjBJn;5rbB96v8tjM0hbUw@@?o&wc^`Or`P@1v?v8T+WuXwFp8ujkue5k_
z?cblzVhte$s?oveuRBOt4j@mg?l7Y<t#MYE0rg!$qbe1@aqr$ecY1h?_kR-Qs<rEN
zT%7D(6oPa7FPK+m4tmm;f6$XB!ae7XkaVWYk`@s{h^f1WQGP55lh2EGtZsuC_ZrtB
z4frAY5QJIkE+Fadm@BpX`SYhd6gm^lbHYZ+k&gh#v|O=5EQG5^!<mGV%!_HPl<Pbz
zNUf!@S9l=Fe-Sn!Rx?L_2u>^%j1y=(@SWdS;9vER4nO23%^&iw*eb@mhNOQhE!uQ!
zbX30-*}CQ&aWJP<FFY#htSSTF3XlH&TT;~C1$nRJNz=t;?3a74kT==c9*aCUI9OIz
z_VMFKC}VJY6)#`@=gkYaAk#CQ9UY~ir#-im7j&1!%P$`JxI?dv1zB!xD;d8R77Cm_
zF8&6j4CN@LJRbo%HPE24cV_+{P<l{_oaVjk?7J8lkGykim9C)<QJkEdMA`J=CytA>
z=G@u*!(&$L!%BLQS+VtT=7w%svk{@8Iql|}tZT%de*Jn1J8^!O^y3{P@D0wrS;fLX
z{&W2}AF(z1`!RwJUH7!ESjf=e;2s8sso}PhKW@Tv8Ll|_;TUOJ@zCMJWuyTl3*)Z%
zUUlT-Dku<GnBE%9yXorcu4Bb%=Kh+5kT(@GLf;As8tUqhq?@DygMu(fq&e2`r?d0k
z{rezk`Cq>}F3txy`z1t0)uQ;odDyjUSB><!YtY42RsCTAqgeQHX^)PM+MWo8o{6_X
z0x$aY3&iHg<lT!_cCn<^S1`+|R<Fif)e(MvepuODT=EctQPSbONlO<L6~#irY!U7P
zYeg?cv<BluiJDq;baeeE3q6bz?AsR(UG~8OXGdpeQ?^S>P0bVvE<~$Gk-5T{Ag4SR
z;L)Sr-d@1Ll;q^f($c8HB0o+|@aJs(6NZ(CAYPky%!%u>8k|0TuU2nyrc`>}(1{hT
za~>8VSXYT~LV^SgkUnz;o74~1fiNueLv&xne-;=2hn<w;?ItA4%?8k*zaKHT>n@tO
zh_LY6h=>JzjVA+F{I`uToA7dfT3T8Pz<!%<J2e$2Cnt`9l$Dm&W;tcNeS4GRiK+<h
z@ohvZAiB7Qwic{;#@(=SFva;?_Ws?-$Sxcf$FxOuk8<+j1jb2F1JOZ!CSge7^%)=q
zqg$}k>fn@Nxxf0DA#^drfbteGfW5@R08l|<f~gMIA{@mfjrM%iVA5tk=FXi5&LA4`
za)0{Z!2<;OIpLtCmZ^ZM5^Pfkz8T{mp!l*8n)ptv0VYmy_+1DR9Iz;kMn*+PtGzJ5
zs0(qjrZsofWIo=9V>~V{F0!(}(6u3qYu>WC`!pv<LRwmSQi@}|amTi8Rsdq)GDp>t
z&z^b9Z)zasve^IU&%vw<D?*P$sCs+C9Fq5n6)T?qq5!7QYD+6CZzhqC7~F*xpf^7~
z@^92Z+8>*w(jqJ@jESHvQqX6)Q66Upsr!b8hI)F=QQOT;#(8=cqEGiunEBAk-o@Xp
z@#!VO&NR2QjMbF<ft^Nl{G1oOnnN-&GGbz4f{F+M2jdj8v$H7KN${nhtH9!b%*KRC
zq$Z0+bb3%D^@|s!mb>syQF}|AhQK~z)FU%96AwdJy;Ab()vK14<1H>rngxT;AE%#C
zCJoCvIywdh2I?P$sSXQDN2e^)!REk$2si}({{D#j=1&%pVPSh&SXyyB4{1119k**L
z5CS<k3=Y-rbtj$^0E=?fT3YP1r<WIs;w=RQ1w4!aP?~PfiKv7GE?)<L=p-DJv1b4w
zWdQC2-oAe02GL}{KHLId2H@_=lP6+kt**(*<Ee6k9UTPV_B}xqTf9QKVI{u%Nb?u7
zl6co>DEXt#z4g-cpkr>nx{mLdb<GsgeO7Gu96gPhAiwp5@yR`@+|(O3V7!o#gG~t^
zP<VO6_M>JtHUfNn#uab9P!qw&z&U+6HSPZt?G*kX!D41<xsi@8LAMx&4Bi+PkqW?#
z(Vtm(ASo#+J(3O+y(!KM_Ii4H>0)1yXaoKIU*Df{+riB4%KRPO6rc>RuS!p6<8Wy0
zc5BWGR1(&c_aV!lN9};BFE3wGR%Q+iH2(ZC)*k&QX|7ONNlh)?dO%Ihz5#Q#DBKuQ
zibk(KbZC4CJGQuhN)(ZYi7Xs?@JQF&vtx%XKAI5+&Ws_)Bj=t0#6rnHDU}Zt#3^5P
zsqfHgx_<v2jeDVus@GEyX(dl48<Zu*8SUbR2KC;hb+oj;mco3`{rn0#iju!!#a++1
z#mp*eYC6GlfU4XBv~%}BWe$eO8{b`GTfO=o{-Lk8w|c4#iH-WVpNp#na1Kg%)l}OK
zEF711)_!+f`bI|HsN5%W6&Owtfe8T0jT877ZOud|APCe--bO~I+KyaP8&-Sx@#7@K
z4*jF_)Ts9Xrvr_;P+%OzLh@0+1~GhE#hss@zkU05Uc^a!d$z!DC!&HEy%H!XEmi1_
z2beE;$>*7mrj01=^zVYR2z!jh9TI||u<((?hm{x;Z~kILN=uzJF^T{1!GfuR^T-ht
zayE_y0Un;1`bl-nw*s>APL7V+FV5TjVq1cki$@i#V?}hw>IQpxgoH5n{^w&?$V*9W
zGhwV-n(g9(Q^T&VuJ^C7KPqU42Pt&y*f6#ZP26bJ?q};aZhTWEbN~K*06c@l_Qi3n
zY-c9~-5rUmIy&Yvqc_k?(u??=W-Xs&u358&h;4HzDJhJ9X+GJudGnXA;2_)h9%r)i
z@+NaiZynNIP2yvNbz*G%i~DMI?3LsN4%QAdLKH_S52ztQC+wc*LnSP$u8tQ*&%B9>
z!Rw4-hX!R7BESk^Pac@l3!`<1EFa&ycN!kIL4R+r+Sm`9{#t<f$mr;FSV`&o>!KPn
zY)qk6pj^XP2J#M|a01-@w9cthrz(>|$a6}>YB-uQI!qYvYa1GpUxl9j<L2m?qLLDN
zV|MADr!9uic^g;N(l8p#{ZNBb8zG4@0)^=`O<(EjRa-DVP}%w5RIK+lQe_}40|@g%
zYhbGy85ooiMkh#~lm|XboQ=&RAK)$QCJ#TqNp<W_FGQTitH<kD-LYP__V(pzz=({D
zjA3Vv|M=`Ub6i+B{oo4-uCKJ*6V2Ptz;jSzs}ppZ9?q@8Sh$J}E(?00%I(|l3F1qB
zO~)TNc#v>0QD%#ai&63Zqs)I@Rh2594W$)2r_=+~ktm;n*Veqeyzt+65$Wj{%E1AL
z$uC2ri&d4C{{B5{uSoL^rLi11pyapeVQ11xSJUH=b?`fgaAlGA2O}A@wY3>>bnye%
zny5E~qcArafzxx|8`iJSV%7Bpo<U15Q`OcE(cSNm0FaZyqj&QrtRg@9Yleoxct}ls
z#9H^Kphb{XwJu$%`2M}SyDa>-z0Tiz`T;#T?epgoXdsaHu%AHd#5g_~UXpLyHU0TB
z%a!i*&oMC%ClqxVISRYbNMC=)&YiIJWC@!1ZowN1+>~a%Pf$=0?E-@$$bPU@Yzop8
zA--v@4FNZXYK6-_c;LXu%sg<+N3cg?D0~gg`{d-<nY&wd@75r?7KV4|3*x@y<uQxe
zZ{4{wz}GjwYi!%rtq^iEtoq)9@!#(F4ol@hmzGi&8WAunFAXejC8a71vEqRxS}H12
zul~$n`UztE-hKNJdKlNWI}r>u1<(fdlkI{(OMFIN8Q({-MD`muuJ?`?#6(2+1O%L;
z{!vm=QdG3nO1hSbCZO6sr6R|B+ewX*LsDnqL&J}9yL-1K)4?ww01{0zDk}|*7Ye$6
zKtPfwL!Y|6{Vb+X!OdPge-67B`A3X5#HkCcT6Gse@bkwHKGlO0#AFQ6AvO!M4!}Xg
z+<1Ko;auWb0+m%&2mEWjN8KjkY9kq5C+LIrY}&Mm7aL#!i&^MqA0D!Up0JJ;ZU&G5
zaps7FP=c=gM#ft_20)8UPPWAKKfzbcddedjdBFT$m2~0Y`zWN}sMFUJ9!juK<1Opg
zW7@9@V-#<(gNQ$Pq%y)>)7+;{pAN!WqTB(wVH%!jka)NPJnVz=!5RZM#E}8m`mC%h
zT-o?9JLD$D%6NfqF!z63W#AHs3vqk5JI|}Cs^a!AXc-oE4OPWs<IZd30heN@_;Ab}
za010ZTnG2X9KElfUrcl~MpTg_mhd$CZ%`)jLdzI&x)pjDvzX&q>K{LT2sut46C>*-
z^cgq;q=5<Gni>H>2h>e~MIda{RwPR*0&9||i};l^Ggh1;2DE2miv+V9-)DrdKrh0{
z-us_GGD*X_2BEzTb5ld08cOwo7&C!a*^GmjfT$1$Ecb<!5MvE6^Z=kB6xE~sP0FI<
zjgPcTzxIHs?&IY}GuDo=LQM^gXj%-_Uc_64MmJe8;4shBREtZ+aphj@9M%*o55J)X
zOLV&FDG=Q(_)&xxAnY?=-}q!j7Wzhn8U{4rCHD^?is1GGjZc!Nx7D+qGw<D7Jtg@L
z*;AHNTdUgb3Wor32U_r6U~~qJ5)%Nnh;!ht?%rOA6b<N$=g;>dC5TB{-XDY^HZ@ez
zOgy&7)s-~q3k;@_?W}n@7|{lRFgh}#V_H;Li0L#Unc>I-RIC2dJm5~hdFM_$m;#_@
zm(d_DUmnMj;GT%nL68y=2{F1WjPi6-xAmjfX}&!!AfUItXRsw(9D}K^^Y9mWP`$&L
zpikeO)b_A%=V{#AKvM=_m`^4*LK;(NfG2<}Q#b{(olC2zeL?%Axp`1Rtvc1pT%4c8
z?Wn7&e!m6J8U~DvoSZX;%Fyy5Fo?o?dD)U;dc(+Q*Y4d#&0pF_^xqw5fEEVsU8{aj
zE-(!U>*emfLPB;xO5EJsMZV9UZx#z-W@2)})GOGKn!0+8w(@WL+`aC#4Gj(s4j)pf
zsa9f?<a{1X{&b){sk7jd$TIc;&G*-TU`gxLPn<Y`$s`p6e{XMj&GR{Rk$~;#mzn69
zndJ-B&(Ro;;DmmFAV2>{>#Ed#Hqz84ph*l!sLyu!K7Fd7ruNhPToG!OzDpyVnQ#by
zDQ9E1CnqOk2Cg2fa9=3!4W5iMi(Q_c{{G`ffWN=0y86;+A(yzlj&NlNbTtMBKh$IH
zuI*Cafq_v;T01*oB|kvYpo1%R8{_d}HUsrmi}N$Te_xxNL{o5HnB_Zqw5Y18>dTkk
zfQq}>K&8K8?IDEwXL2Jqpo=1!p+In`C1X4idW$h)<CN_0&dwD33B$>%^X^Z{yy^|U
z(*)=f7A+8&Qp+&;kc&k4aqI`m9zHki(<gwC@}EDWbJI16<}y!@X_Ya!`%3kyR!UVU
zUAUn1-d|vY@U1%A+H&LRF~^9P%h(^UjEp&GEMQs(4Gjt~Xh7J?A~sK)#(*KwZ}39j
zB&OAXQjwv00=DNbT!@`rT@4j5+L;-TZL3Bmw@vNFH#?hcju+dADO+=MJfY(msAF`r
zA#?-)3WH~aUymmqSisDm)n}iJqunhCQ+zces+j|N7}y+A7vD27F(HbnXHZ<UG)`Y#
zg6UwsfcGt50y;GT_(Dp&A3aJ(1vE1=L#x09Bbtz2k$<kJI-1qlv($tp2USanX&8%-
zB&xWb3^Darh`2Ux`pT70;C8~ov#2=9i#Yp^*C-My>?cbvBzk?E7Pzo_7>i3l6}1)H
znVDVCLcr1fbakOUUlt)|>I_Puhu}a79&Ds~`ZOBzH15%6sCi;^^avN1xXYp=Kn_Ng
zGL%Q>rX~l}8F`KC2?@_Czz|Fp8s_*4aqE76l=+Vzts}Va+_?if{dxrECLBg(zkU42
zxW&S$S;2^eZ+B38VYrD8+MP*L`iSjk1fi{igZ8J}5g_b%(iqG#`0Bn8x+6vK^t)Sn
zdeWZtH?CjD*+d-!gE~YBl$q}CZo*RrTM!j>LbF57i=U<I_1+6H5S0a$f?YZG7+@a$
z8y>}S{dxxq8O$HZ-a$b@p!+x*Rg?2@;SiT<iUQ;YfB&<(V9l?CqA>?q^0mr$=Kb14
z(`ngSfy+utagz^{XFmb}L3z4ZC%jKY<dt&zuAMupjGoya0Lr7WjvbOzRD707yryuB
z6=z6I0kKU?7V|VmR!;n?tDC`~47|-EY|O;?ICLKzh()bW1xEw@1pWu5$FbD2-6ADW
zE6P;z&|r~yi#WRlA7?W)6>Lht7YyWp2u_WR?1aDqJ>K2<ZNRf<+?XChuh=A&|AYzb
zKL)R`u#2jy)A$hR;1Gb0;^+um!$;oUQ{&@X?HO<)>ytsnK}Pw2<KL>QKO)7E9_anV
z#1F0n?H0c=gT}+I5?}eoi!hduh>h4xYan{q@b4312v+SXeE#g&V>~?JnwnTAaN_ed
zUw(c6Zadx$Vo4lAId$qQYAPgDVQgPC59Inzv@-Xn_#Zs9!T?XBfG7D5@+R5`++#!`
zFo3>_rnLxEa1yEfYE>!>4ZL`(Yiby%sobA_`631J3p2LZuJjm`GTfm_RkY9DyQ^3`
zFkptj3pl2;l80^}u(!QEuQC$ji6PMEl)MFT77kruv(#=TCNymW%tH@uJ;ckbh>I!s
zr<rlrE&~1ZXrC@r@_PJuJEsOZ;!o~}etrxza=|C;#=8}tJlhD|fQEaVodwMNCuFhG
z(qVL8RJDWdC;$?er_MNd90QQz;vw|q*um)4N`P)icpPzHaR;^S#$^44woo8UWD8zE
zO9+Iz)b~kAuZ>0Sy1BtR;^-}feG`$3n&iJ%P7xiWVC#9{<YI8ux+*d*n8n4_H#F2Z
zewzUS4#pux^s8}clnJa06BFScCSbrGDTTkG@@%gfn-KXYAmE3_4*`tA2y^yAxpaaU
zuy(Bnt{w;+SAn(rgb{r_;){tx>l4Zoiu&r6cOb5yi{asyd!LAunSGspj5yeS$v(rZ
zP3rCESM=qO9x%?cap&>ts4O6nNPi%=BIbK(#VU_6HNqW{x|VkLj^o#<n>g9oWtP;M
z8XM(8#AK<>EiC#l4PNRtmo8$~O4@R)D?i#lz5#q45g9i#k43;$irNhBt~ie^w1&}v
z9Y(3RVQVYM3WaSSw4h_hjzxLLew}bTO`Sx}Ks~(!$@{p%(dEZ!aq!b@eipGO$(2di
zQZHV-h|=L>*t{8M9M7}^gRW=FGwIM6=={`w?TR8(=qJ!h?zp$OiI@S#LOFu=hDDnK
zrU81r6fj+sjZ1i4jdQxN(_BFIfnL%3pd#0w(k{2V)A8-=R}4PIYJ7U087KwjxCA8?
zBRyU?^o;T$CgzNomz0zYi}!(%muWM!XD%faWfl<?46h#sG=w!gQUMhiDWf*=k(QP=
zdAz#3d=yQgsNy|J1DgLH*atx3;EzW*-sD(A4R||ljh;GwOTGtywzn4yP3|S{J9zwf
z8}!?vUAjavk@PzfZ*t>L38nD{L#YR-RT!7($qx+;#c-y1OJZo~Rg6?7j}vDXA<|x$
zia=;#qnt3Q2jYiG6HvMjA3wsOQ;K*?hXsy^NX3@JdF8%zRbsKlCKtjFH}*r#Cg$C{
zbK)n93ky*N-t59DHsaK`ni%1&46k~Ay}bmd6TElw+<$ld8eFgAH+Yc>N~ZePu6>E!
z?$r5n+mn+Wuvx0(Fl!|<@EpTY(}eVmqevSNu+cIFfTLDuZ2*WVBJK*<9W0d;`(;G2
zj0!J3H5E{2M{q$)OUN1wSi~lyAbu$>e&XAV0cyZa8U2!Ch*XuAXqX8YPI%rU$jiEW
z&o|=q9w6sSaXcPrhHWm0TNefg`uFP*;2oj$2^crLVPGHx2@=x0d8HQb4)oxcaTtYm
zTfL!I>@_BgKYSR&!!*Ghi77jLJEl0fpv?eAen?9CWZ4613H-|g@!>Ou@0erGCGH4O
z#Eo32_z9%7fGIDf0!&&yKHNSzJbYDc>!shq+GuEY4h|Kz@Gmd)a5Ufq1Z4Bmx}YfM
z`^=XP!9`h`Zl`-Y?#s%~o}pVn3x+e>j*Jd)D_f(`#}Lc>1sp{`6qFd|!E0T8Ps`|f
zHPt^gr>I}6xH_^=#zl*2(0K3IvE%I0SNPa5#w{Cu`(TP;a1aR53<eIMCXma8@|}0w
zon}TOkeO0keB1&8NX7$Rpt0bWoQ9j#)zz(gIKojmFsm0j0(sA!^W>R?r>yRHdhMm|
zxdAK;ghs{6*Py+qZfA5)p3K82rV^8`fx(Z)XFwg&r%pY1NkfDy<z=Xcg@d`mD`q!(
zz3QI)D?TG@WL|yU5!`&QTYgc&H^k-^wW~OMt+KTenuY73=dn@kV>6gSP)Z%M{k&q;
zno@ten6C%f*$c-R9(#JWN40^g5RMJhZo*>C*pK)GE|4X79!7v)=N-J@pR=|0;l1GC
zJr;K^E8(#NywlLjF>866W(ze{@}+~Dl%3|_XW4Js!FnB7QmW<r#bkE9g%NDoPxXNE
z+Gs&*C7cEcg^+E?HP#IPbNR?W*MXbyhz(Jod~{?KRX``c6PBWA>U#Bd@mD^a=}sR`
zVWFokuc(-Dbgnx73#}M%2ZHb8#~YJ&$FM#e*=Ys-0dbuXCvQ|P7v|&Hs--JC@p}Na
z?`<CTIey!^`Nz+n<ynzRBeo}Qs$gmf^+j;7-bO}KQ?vUempeVm7Q&nn>caHQ44<lU
zigWJlBFvGpGi7L~I85&u#U|u%?$m2P!^EVurA8;Rw#_N?U5O78J)=JXSi?EcE{2IJ
zOgLBVJwWdX1ecbcj>3Zji4D!oxF;>d$yc0QOz%BlT#2E<+(1wN>OV@daO*2Mb#>dp
zR!UX7Gty8-D_Gdrq$~GdU>=Xg+Y1BVf<GSWEiy1ND*P<ks&#-&5GbLLrF8!M`ifi~
zWMo*D&MiCu2YA@P5zTdKsO7^{xmEdDV{uMcrmkn?&bSp+S61RgYFw%2h14Y+$wvXU
zKfL*sk53h(AOrSNK$~OV<ZoduFtK837Uw6Fg`tKm7Rj!iBesY+NJz>9PrmS*wMwLq
z3L;X3OR=3SLPA&I^K~bp`r#nvW-0W-YMk6Z?-1}O)SVtu!IM0i9mgcjoOv5ue*vBB
zxKS=zf9c08DGUimpc`IDaX(ZgX5a6z7y2TKzfu82l%$5=D3JHx&AfO3&#81T9rovD
zoR=`jNh54AOG{o>{MHT7Cg>G_OfagKhTHBYBn%<1;WFX{%+fzFfCqzoz1=u_FkkPo
zo!tx`0ASDv5BkYoXDh1-)PsWj{LNBmAc;VK))P8SB`UxuHw`<jtgM6#JL@z_NQ7To
z7Z)Wk<X>!_zlZypJMfQ}lcOVJze`j4=c`U7(~gS(Oa`BI^46d^EA9(8ZYgyZqbX^T
zH^i6f1Y(6I<E~$ic1idmcdmOAjAXyyU`{qR;t>{V9s9O}NyE%|E}X6k!BbM>60BDt
zK|$n1Q)#IglpG|js@;dAHe2xu&|W8}1r&06DNOHRqyfmec9lPaJuM+~GY$5vUgLuY
zC%R>w6T>+=NKWoqKX67C=l5iyO}JnlIL-2UK@J2eW<TOVjN$iGbfMb2KZOyRk&#i;
zL3tPk$@RR{$&>g+FetX3Q$(y~aY1Lg@$v$E0rSVa0s@g%>4+1E-dDNw^BqU*p+7?}
zMbu^Y{sLs45k2Zl81XHv=agFvjf_0&za8z6B^1I0wW3OlA41e?ZM`b>AUoRyrKHLv
z6jNYWhW`8)Ezxh@xK|(CEM<E_3;0=Z4Y+rTwe-8y);c(y>>Jgq4kwcFvazGOmKG?v
z1tSU?mfvD`JhUkk-FV~oRSzCLy4GtC8R?7WjVi>0VT@_afqnb(plKYwjt?p2hFN_r
zzm&PpwlrPP;?D&I1$x#~($ab$N7n*)084N-0$LCm$pK(}*dUCXf?XqzPKbR&oTGo?
zC~Yh94XpN>kbUOncAj3G=)9mu*ytAWRVcn{PBJCCz<Z10(?b$!3zaNfisM20<Ky9f
zpQlFqm&JHpRIytN#~=^!WC{OiYzCG!B`fXstIE#ReHjPnsquUSsSpmF{OfRzyRP!s
z$Jh6ENzoqE2o=x9T923?Da`%cO5W3ONXiO{=PeNr!yw{{uy4A5*9oqpazmElh0g{q
zYpAVF<Z`+bq6!@a=Q9P@05+HRs!N^K*Vm8eZFqTAZZ#nYPvNmNi@0O=ry!QdW;_^>
z7<Yr35%r@8$|WWs2?ElU#X>w50%Q;#+upryPS1!55CAHgy83zyyX|CT#3>j<v;*<R
z0w?0XOt?3_%PlQ^s=I$~LpA1!CgUN^YiHW~(1V`|g113P;?zh}_&<cb2{@K(+xCA~
ztCgmeRWxXjwIXRkniW|MNF^CclqRA<BV}GIDvCx$14%N4CLxrf!B9dfDl{i08JqC^
zPU>0D`~BbT|J~cOJ*%y_@9Vy<^E}S|*pL0#592mM>7lKt*>S)^6BCpBBOHn<%4anG
z)B^ZMT-uuRM;YuQtV7&{GRjn*(opqsZvKl*kY?%Wb@YV_O?+5v-OH-bQcur2I5;>p
zwUJFRojZ*K!=yu`_o{!6FYcsnKQ`L56mnf{g%_HH?yECsg}aquFlX`HaY3W}HA(hS
z&O!%4w2f9(z4vXg!TLQBGvcUda1%iqn0Jt@?6Z2Ykjevzn2s*&C3|7<hkrhfR*;rz
zqDDkP5}OwiwUAU}n4rsBYpmsN-MWPj)x*YnUA1@TDxdxwBz-a@<k}?J<W<Ya6vI+C
zYc4Jvae&iKe!Yof<;F-r9WmSYi_4XJN20zO-s*McV?od)=eKV<ldYh0d9p2V$8qWF
zDe6AgEo>~xw~xLRTW6E2)al#4xJ}2kv7lkgODV7*5y!0>zW?uEk!&?l9>h@9F4(ea
zqjce{V|igSdnuhRj5fJ(^X7zc<6!4)kw>YhAOIP1h`cC&NzTMYmx$Wrj@~!fvXBe&
z%*+zbo<%6Q;_B;8H`bnB_@U5|!_UCt!`Ig3ATjYG3-I#Fvm)UH3#}MwCz9oR0fpZS
z%>DM*ty#SqO^NNX;NYwDUO^PNy1F_$f1@QgE+xg{%n**ewA|_YQ|-YzN_M@Hy$?nV
zXaa5uEj7uXlmcfDA#lW=2KO5otjDg%&(54oFo*D4^4#ump9isNx)B^zqS|b}Hh$`)
z$#5$2T5f|pFH*U&8<~lWNa@h(@agBYZo@0kNlMK#A~SEi>CS+0B!}KTR;lYXcI?!t
zn4<@M37i;7<SNS<i&$2r5Z|_%4)5pt&E89akQ#DG-5HKCOBAB<b!X}ZYiMl35a49J
zAlb+#r=H1Gc3AZiO>KVkfK?P(Lh}zz2#R|Yh#^np>*0H$@vcM;Z@^e_<w)%ivW=%C
z3rBKIC-xiJ)tERNXcR?zR%NC4b0e_=?or4FIU7g;dv<Tz_E-FH&m#9(g+2Rhynp?H
z^f&tpN~7<*SMzU*?bdG`Hfji$aL4UgUuv0->-qgXSKfXnTMckH^_<jL12a)j{ZYuD
znKRMadONy(o^0~1Z46bH3<}cz^{-#Pd{kV_qPwJIV;QmxCidIQ%mcy_^ztU}MF*j$
zTv+?$&C)t3hHdT~iRbtM+!&`0|8~8bqj)vgZ9HN3xuj)CgT!MEHN#Egk8{6%*D0a0
z$k|waN(72QaVJmy(|COKonBfwSy>dGmmFE7H*;ob;`;Q=Oj}Y!GJs95S-R8?$$(%!
z7c5*ja^a=jsZ~S7@Bg*GFk$nsf1Z8XLZTcS8|%3j6EeaeZ5XP~BAJ9)gef6dKm02F
ze$j%$`hK!zv-XfMhlc(c(~b(;4dzQAWTbC8H@1;V_W0z+7{jKfCJkt`*vp5ntUonq
zVV4ZIz0r406?Yh~q+|{Ac`-T~@|m2OyG7-d$5j-+0>w#bbMXUs`BL0=QIqf^C{C$w
zDaHo2Yuh#;u~>F5cuGIozWi$$WsY8Ea&=P)2_a9b?f$WRdbeN1h&Myktg9QN>vNAa
zu8OJqvnFQH!uTN$5X^XLDR>4PIWl4V_`;VZble4bN3R*5Gr%x;e<LN9h{#CS#;^X;
z`>At6{aYTo(^Ty3nz3`IRB<<JPC2v`@wcb$hM<GH-jey+a0YFT(q?tkt!CZY-Deq_
zam5Og<nC)ehnP4QFKNzv^ZNC9$4_5e0IfiRJb54wM99hTaG`Jcv&Ox-7{!a~2YW9g
zYKc`<P`KmVn^F)xk>3VdQk8+`Mlwb4r0%&atfGg?t9ctg|0C#ST4<*rBIPzBA$6iO
z(ON~Du<F4uy}`nDVBl6TdHh-fR>|Gi2W#?Ad2id64!uYbfz8!VPA)6w7*+6O)tn=v
zE2x=1e|JdynpRg($b+YBV<L+EW!+DlF#&d$s;f6#zT|8vSGvFX!=s0)GZldU?;*T;
zZpUdfc%tL%mANrT%3`lHKA4tT(JfhTN&iv5ONpM_(69uK7`A=frHi%6vI7QOfw+ag
zA_eDM^gdU6Ncy_Q6_oiaSFY=3-R;DR#M1H5y^q|L=-)aYA0Lywdh__j=SCG>bqBMi
zSM1N+<JY)Kc3Huvo1(S&;=It1kdV;OWnDY(VkhNfTvR^P&ROzpd+8}!NOLqfpPvjK
zYJG}13Kx!ODhyIq-gzlilgchcWz?hy4my%F=gy`+y+#h$GZy;*MBorRm)1Tt&o57F
zh(3FEuQw>8D484}ApbF9>dNBy=n);>@&BRXwamie39R}gP0fJJT|Pei7&>|P=NTRA
zXFYNi&RNMk`OAaE14H53gL{E`m=_mT7~4<W@q4`yQD)!1P2o9D9&))N@nzik^VOU+
zFmFMm(H}mz@|dA+R97T|BDX-G!0Vg)h<EI3R`-ETK)bQ76xU6$W22l(m_6h{XAHgN
z<>hs5lQg3?N5`it^{Jr&!okeQNDl2~|2H<odyB&5w(sqprffYBXhXn_ACcOKj9F-T
zr-EPK{vq|=?k==MasROG0SPHO@i<U<N4xGV3hQk$$-PN)%$P$r3FKO9%Rg}FbF{#A
zojG}u8~i%};M3jQCSS9KDYu)N|9+~T^sG<s-uF*+naysk57v9?eCEL`7sV2TaWr%<
z#V7vx@dMww7@EY7E_gq9UCc<;#s_Dv%!yCzHDJJi7K#+~bvZHd?6;{K3LZS5z4NrZ
z+8`yR4XherGMb>IM=~{BL*roJ-q5d?s6uf6G(;K)0rkO8sxFGFBZvi!vY8!I`Zz7u
z^IP3vY3@vV22sYq!q8BAp<@pnnCSlf`epbVQCoumW9MBaXi}j2>NHDgJTafL1QMix
zeV`u$i;BrDwzv{We0tO2Uw9rT)ZiwJ=FZhaXrlEX_3NRKlP7hCF6Uz??Nq#eJ^ku_
zXC5ZYf~-7agABj&dHlM~DNqNH4la`ypHH2er1ta=Om{UWO|o@?%+k3HxgBNc7<82E
zjZPt7I`b1Bm-?*mYTz5GeL|IQH>q2}=LffM>#kh<SJ$pxPe-Xkx;R{2T0bEL)OT|G
zdb0+H@_a9E!q#oqsSZP?n{Wy)e|}lhX@4Dg3tOCAlVgJ&S-3XOVeNZ&&8Xq8mYY2>
zwVT}F3R;f>AsbMGd^165`nz>kx;rZ^y>_h4FKyd=cr^Te!h?p5!SG(A0td(N<qL~z
zowB0-_I~x=ypVT>f1V+Qo=XThua+ws5wu9bHT+;w+zPi)9PElJkH!6cmYzBh-*mH1
z2nk!@&^@H2kCLI2(xW%h^kw~>hCh`LF_zV7Zz*ZREiPTUlv3FRGRcD_ZK0d4P4alU
zHrQ36i02;Ys6-OJtR!Mr9=B(0=@sTG$ysNU-aKlDmNGE0I9kZrf4oiv-ERnwCw10D
z3`oX$AuRva4e@WI&F#NBKS;1&e0l^T#D&O7K|COOcCnxq`wzBfZ3&4t5;6|QTWX^6
zwOO;@mr`?WzaYPUiYx8|7*<-XUOjA+WY4X>s#cOw3rWXstp*Iz%g+l`ywo0}tYu66
zVk7w5eg_)f749mZ*Be;<G;80tDX(k3800LHN$h^P)0c*F%J0;ss9fzmeAp2X*=xT#
ztt>+RS68>nRmR~{0b3=p!-WnJ(a{gTp6EJ#cWwj-MBP6N6cAniaV@u{cH}hULi?Cp
zGd|wnI|aE)jbGaSO@R_BBP>j@;r4{6%svAtlz+SI6_w~d<bCqUi{~6Cttd>RP@E-G
z_TcM#^GwJAwz`x8XCkL@_shflNQx;(Ae-~LtLjv{Qtsg4!-9^RU`Wr>M#W4R(p9NV
zvnju##+&s5D7s;{OZCf+J+QHha@f)Clkfy5HKB;XV_jx%zqq33?Wzluh|1PR<W9~!
zYc}e3PR{bB4s)JF1)TgI1&>b64-N3Ro0!5JNSew(hrd%-v9@xOYuV-|-|Ewfi_K}&
z;5Dpy*w2T}e{VFI?tBqF^c~-!Hq@L%uU?F#nyII!RwrvBhE{MqXvEg87j$=8x5chA
zrItiJfVqGPTluFRc-zk;YSrEA*Z=Wu*RI|8>DxU$?>DY{e`Ky-tQAabzXL32V&gGs
zhO6Y}wRIq)GsvmUlJ!Y?L4IswZ(np?vB$1=x4zIzR%v>inw5d~x+zoI%m}@PUu9)f
zdj}g+hxOAH#S!s<d2pOC+*M8n6dDchi-fepX@c~(1)s=c*LOpVtk&aU*vo`#J3sZg
z@!}KYqxK8veWHQiHJ8a+1Vrh4q(OduH{vHQMr*ih$YWx}tIGM`Ys=?-{T}yGeQJH5
zGB%CQt#w)}b@F!1m8rja<$cXYR~wTVI?j7H?y;G@vS0W;?rGt=>m0J<#*Y{7ysIAG
zy>rL6QMvFK-B2}uY<_dM|J?NNjg5h$Uj@45#;3*VBV^f3={I7ZfjWuI2RMP4JyM&)
zPoKWTqyV}ip#J)4ZrS@Zt+&T4v?y2I7vC?T6sb0{l*)|N;(ziHJ}FuEl<DE~;?xxU
z<lvutibZOOqEgQ6vX8d8xf5$eWA@CMou#D8#!=?!IcbK_&LEV2($eXJi=3skwNgX)
z8u9!~WpTbg(v1x<rr_`nw~DN1!R}2VfBXBJ6sA6TngyXYyIsgEn=3<X_~Fl)-@6nh
zz_=Lr=ik`X3l>ZFsfybplCQv<0OCa_i#N6Z<rm}Ro367|V;stlo!tKA>(>ni-J!vw
zXL5(f_k7uZvdg%6xsWoW=SHWJ&la>7^0J|_HMVwk>NUH}dw!=%T3CPN`1p-V1+P9K
zy*jyl>y|C+<|rPTv1dJ}8AM)>9-;yH{&JW8hy#Vz55|-44YJrAxv{I1RDkAI66#OT
zI$jQLoVja+Ht{1x$+?ev_3nLpSR)OL!!HgTr=?YT_wHJ1rhrwBplLA$J^x%kiMl#C
z3n6a!x#Z-Dw**+o_en<xxV}qt-4|iera?mcJaJyzzsu0|c}jBw$$HEtuCY9Obb)&6
z?4e225)x*!iH1hy*`n@r&5%-nWBlEGk;R+^g}>LtC29|(UvkBzaD;?znXH58O$Rtt
z^U8QoI>8Z;S%nV{&830^T7(^^0f_n^zW(t^-8}hh_4YOtt?2uSn{b+<6h&B#2L%Po
z9M7D|c>2_pUVo0RepmefKWLZleMYOGa|`{7>YjAm$?Vo?X@4h|fsWb80nFm+KVj4D
zv^0<L9oy|Rs!O@GZvFZtcb(w_-dl=X+dTi*Uw%1j=4%CowP2Rq9b{v3@5cgH`OUmU
z>h1pZ9NmY?{p{`SbJt5@t@rg~I%WCjA4+}g_f4Q?$`WLxL|3=qNiF%+qeqRGR7MYa
zhcg5h6B|3?!Za*{;2Jf4>(T`(GbF-R%k?WYtLakZKp6^a(Z7{_P7IK~?lQ!*?!{6a
zT^<sl&hvE&Wz*tyX7|I3SfN#+-}*$^)1X6F_KX*#5-=WcvG~28GZ41WLhTWd`-ZAh
z{5!dLN^!+Ru!KuvWbCNa<%G=@dR)kpLNzq^FDtKS*KS`v!J`w>sQ8U<W}7e2?W`O<
z;>HMRDdaU%zRe(ONHH27w|jRE&ESFCOxKM1f<!=|zJ=w5sC7?Ydq!yTGVS?m`}QRE
z-ruGSz`Bj+ZTtOw(I^z?QOh{8r~0omk-NmbhEM}6BQaNN?_M_ur4+Y=M3AGEb<kIB
z>C_lulPm@UQg$WyA%_EGNBL2B`9cH!N-Wn>kV%EaKZKuvh0u8YX$w3-&##}wpfch)
zIWPYLXR*R*k^bJqpt+vX_ybd-$Q!4wzJdCxPQ^fOO&yh)?mEcS?5(YDsx9GI1kV~=
z6$-k}jUUm-QSqzbr10EJ{K8Quw%E4fBt*ACHy!bv!b(8te%{_ovJS7>i3S^%h4|xt
z>F~jWkBW*Q#|q?;>jyM1>pL)d0!Q4RJx0B<@Ra_VO&bH0e*UZgn`wMt;i_Lb2S|!c
z4opBQ=fP!m{G~0A0*cH*DP9!@31+do4p0^7bLi;Ng|=zpFM9OosKCXQo?WsnVk;dK
zq{{fWUZGeGIW%{3zHqbP7M7O^<iJ&1+G;amhr45oMalB|4kPYK@FHYz6lS*mJQCrW
z)_<bu+r^67Ca5|n<O(CW%h|VX?Qr{5UyAbVXEO65XCy$A>4=ph!^K+i)VssC&}!y<
z>$jGT)3I~s9(;U@H?-$oB)F#L+jmRnUvYJQedJhLS{kN?Qm)SH*pq17rF8&(V-SrW
zIVJEjdFyn(b*<JJx>6_ziu$%vu&=1&=t>s^Z9i+bsG|Z0V{KpMuhV3g!Ji<O4E@An
z_q=rWPEO7-4cqR)qM>=i=FPIjQDX)U)Ha?o0v7!B(IS2M1)~D&zp9Lm3z46KdI|bp
z=WSP-56WgVj503mu0y0OvV6eJ_~VfI{H%<O<)l*@ze!1b7V9k<Y`dS#KS;~H<mS!o
zxo@Do5-V@z-@Auhz+_*vLF8s{D=V@3G-MGYpuv=?Lx&FZ{rKHLPTdI=h>jHMSMQLJ
zCGk7Bt@5cJ?}?=dd#XNtg03M4$i92GzM%nmz~teY{Xy$EUPs-nLcW%_<1%1}Ey9_Z
zlbJbW`0z&Pbb=l(@k=Qw<M(R!F_B1r;mq0SXyF{hsH_Dv#mt}IyMO;i##Q1Ye+e3p
zTFdi*sc7>#1`Nt`IW|6g_yDJDf`Zn_NJy~JWC8TCH#0d(bdX67_{-R4@j2aDv+y8X
zZ*DH9a0-nrWizK^J%^iTs4WECCi{FPTh-aoh=pR#NR0^-oRBJzwt;7Pn4&hAmj^CI
zXtI{WSYfKZz8$`r24R~O4ZaLi7=%Oa_B-P$j?NF7d!P6nn6-?aQz-vQTdMFENIi3A
zckUbF<0h~xh5ELC3tItj$eH?=^D1fzsi9H&`Adqb7KGOeViJ+=2nr(9z(GN7=u_#n
zvPgFDm0pTyr&06wT~Iwt`#5^@+_`hq@mvV?$SUyRKshFqOWZ6DFz@o;=KsZHnN0j3
z)Rt}EvEvrFi-?50j-F;Qqpwe&%-z2i8pj8nj|xSct!g$B`Jt$9!|yXi#&iX`QL~sN
z(S05jrQ(aQh5!Zyo}HD2X#3-nB<g(yq?4E(Z;)?*+Q)q5R4-o|(5cR>4_=hmSF?u>
zSdxS5amZgCbZ)a|Sn$q#BRX*D1R(NI^?eOf;3<uE2gX*^)N0P=_Jy+O_vR5C2NeaM
zo7^;nX4IQT(uS+2mxPB9K`#jQM=I~(LvGrij@8|3g~$wHSojyWT_^_S<cQwXD~qzT
zE6|ZGd9b-{gaQW}N;nSh&E!2O<Jj|*;3xu$>Jw}A?E(Nx1(cM@;y>8AcP!S3=qiVl
z*V&o20c3futS(4UthKm^X|wFEp$j3=qsd6RXHOB<7A=}Ra?58dM*{sW5lbSS{{BT_
zIV~hY(o~MmU};)f8DAKBm_2YOE33M$F8<P`CU)pAZKsg4H#Iga9yUqTL-0k}bL+(i
zp~||~vsbSfTACujd|peU(^XDm=S_G{Is*|R6Jm~n2rU<FASAv2L>%VZoE)0SP=PM}
z#@)Mnx01+DYRpn*#gM8pe|C=05D7=!ey#^8B*EHV<R3Vq$$H+<GrD7k{iCzjND)kb
zLqogE{1^Z7@Ct|=NKV)kD%bW;62&$%*(fT`!QL{ru2(>0u^b0^4zozyyP!73f#4o!
zdeCH6@Z{qoZaRUbPd~Ia;i~(snvY@G!4ak-{`_+zDp2bfyATA=_BJ+y?cy$78WSVG
z6m^xbSPHzfYc)uU=MN1~`)uja37b{c9LFi7?pb<=xidC7I3T^!sCj{~jdF)+eCCKk
z$SYKUr{xDP<5CG0+}|)+T`G8d@JYYh2#L?~D4;|`oVRM0fLKwz4~x>S0tB&rdLH=z
zKttL3qO5EkcSk=N#Co5`)%rt}W~hXitXZR5{`M^?1WJkHP~-Wr`g<0wTX&Wk^gZiX
zPigQA(hSt~ChrbfuU&h0Z5oCBWB{p}dw;$Mw2#hUs!ZRZJH0BS1dj(}Y?AQp^XJvT
z9Hmibku7}njC;u;%+)XLw2n=3cW$sFG7qAkx6>JApgEPK)BEF9K;*AKPSevXhoj)2
z>@ned<(|T^zds!1gUcJwyxWX5pZEaIZ&3TH{g20@4hNo6a$8sZQm=S#oC>?HZ{MS2
z2WC;#=*JrB>wAtKzOj3gY1_ZkGBeAGMJA5ODT<4VU=GO`Pa6ICn5cPg=u|K3&4`u}
zNkc7}f49^F;9=o-O=I$zJ!jsPx356Ob@9Qa?gRFaFWO^Nv-u%ME^LVHW?M!2=DF8E
z$1_LLp>G0j*?_X30Hys;DRVbY##V-rt@1>t4lgaBe}EfG%CjM-3i-c-%@Z&ek6_*I
zhK2^m4I8>&R_?av7R90mB$-zuE25BJLqx}?ixqL8vu@OAum|&<=)nV*T+UUR0?U|r
z=MKA0=wm1KKgmu9l#yl6-&P*J57BshPj~VRDlv;tbKg^H^xR9`WpU!VDM)?ZmVi@P
zZ&$W_(s{dxMS!FntY9)>7Ib_QiaCw?y1LB{4yf%FZe7)ZWn*6*!;vhajVD6)V5CIQ
zFy3;ioTOKqn-*GH9$(h|^OR>Uf=ZQW?WIy~V<Y%;Etk)Goo{HeyFO+Sv{pvv@#c&I
z*3K{2Zbt*+07^jj{LFce<a?&-E|Z0@nF!z0WN98J3QD<7mZ8HYOep5l($oK1IYg?E
zt{E6c5T2!RuosLAj`n$#DgMxsh;Mg}nYt~w_gyo3;~(T+&^{X<KRU*ms*&-XnMHQ=
zi!DINCuIHW)>W}0ac-od5f&1nw&A5N(1GqSS|`@`UnQuITYe~~J9X>U{Q1)|8vThE
zD96F8WWC%AgSdU&vwlj`o;{;H!h>QCb>+v8@-1ylMA7IIC&1_(I6Kj)-5ayxq(P8o
z64?v!(P!t*6ES^I<FjN41UJr98}=aLv9&1ukO%DExs!U<i%(lXnm|UHqb_=T>vjFM
z?YF*TK*Fo|kuDuOdc)q;e4)mQXXX36&|5I`fG@fXjXn(Gx0->RLaAk8p}GF~)DW-r
zrGv>#S%+q3jJQ}$6}Yl<Ic*XvaiIP9v6%yb#X{2;krT+kbSW_ICCE*HF%QCetX+&9
zSQ$PVjz@U!3ACI4c>Ca38<`a=SB}G~4RrBqhYpZS6HpyM+BXH(Ry@ol{w6;-yWc!G
z4k^Lv)vNi)Eu5z4ZpjdHat!{~K7q<TCMG64ob8jieb^^TU~s-%Y$}3cVh9CBz7_rO
zeObEzw9{jsC1AUg2wX<MKs^d(7eqh~MtM&(oT5=PbM8<vBzl6FI7J9dO-Uj2t(+3$
z<D<{X%@SjJtbNE>0C@u67)lcT2&6Fxkz8zoW2GjH4zf?Zv%V)moah0`YFe!Tnn>vK
z^NlF+Q)%LRit?PV+cY|_Ytav~PB?#F=LY9mI?a?)8#9J9IoZAWN}OG}McPj-K%6}f
z8%~4Z4v{j7JID9*j>>I!F8b0_b?rM?qGRWzMbGkQI*GPz+eW>fE2CQs{fsqun5@As
zK^_Fqxm&lZB54?EMh{(i+qSG1gd*x{YJpnrZotE&Z{uko;dQU+4v!7A{P}C$Dli$>
zdHM1L*A)quF2&Z@7_$0V5A?B$J-fAf5cgh7Yq6eQBpeRM4y{y6Bg<Nwup>g300CKt
zWIQJ)Zx<bgpWTG5WNSW0$2^6(VZ(<r*Fa2d6r<+ReRfLqW-1`?3X$U;WNy@Fw78&2
zlPp<Yvu4GxFQ5<zWj*S)+M{=%&0sSuB4F&ElQ)2$GA=8CYDf|!08|Vm7He9}#%~Zf
z2#Gq$6p2+M{~`Y(;xno?JuAzOg2sFuhVhmSexRLdAF_lag!_SbpRNU>_k*)t2Xp=R
zgM&{utZAn`QV+L^veqERUmKR*o7r9BCNqwqe;Vec_rCsq#T}w#)1<E?eJ5*w7<co7
z|Gu=}8@hg*M};T+8fTW`kroT1fB(7VPieV&+m{7n{=4l}o25&@i$I2Q!HkwQv-Wf+
z5JRO$9eXS~;7_Pgl_DG^ow`5>z1jIyCrMTMHSf-N89jUFIusIW8x1l)Jx1nHJrO<t
zhd2G9B?0g2=(@R0&MKCaP2}%pNC`WUO@FjpLftFk1DJ$L3|aF&VLNqCS1mXiH^>&#
zptS%kjkLm5#L=#1b;D~+{%Evrq3W;5HoEtfH6NYFa8(^$`VJ@0Y>&Z;gTt6F?lvtm
zI6Q*)@7gt!86V&(Fv=v7O2CQkgg+54Zwkm=#L5@8HIlKfHN3j{wrTYQd3~hI<lxeQ
zKWuW4XqL;hO}rnkgyZv+ZB$!Yj9xHk+r20x>sWltS8$sQ2-xH+JTBdv8y)Fnsr^HP
zoAhhCOZ`6&2V$<G7CyvfWf#f4N9#;VVWos$!TpH2gtE0~J;htN$?54e%R4&`=w*`n
z+5P*dQSWb`Jn>Ikl_APXb}xFC{T4gwRAk+(S;HS@X^a{^I$>d??SdgkvI_OH&ubj+
zbGRbtpVJyAl~vY;C?C-|;W02L?c<(}BOl72>?J*}U5!)IX6w6ej%}UTt^ec!B{kz8
zvVA?JNyawVM7AZ4g!!@uvKT!WR0dGbw{d<JE|!yFBd}ATIC<CFx$FQ>X>60_Dz>zd
zs03m@dvQJONm~y{(hkVF8LZFiEg9xp*rOg50gV`u`6qb;<56DCMFi1vDSA<!_+8k(
z012`~4vg(0(VYEkxMOE6ar->%B9R}6IcG$DfFe*@KIw%-GR%*a0{&-dSXdl;vOrR%
zcWT#7m_TCQ;fX9eqe-?c^By#PuaCB{Ej^Tx{$ZiGpdKv~7d)l`zuXz`gSYA+M`?+N
zq7!DDUeX*V;Qb9YZ9JYorR^U5<+>YOe0T9y9zGl1KT=ch)CrN2lG;0QrYx>G8_C!e
zGN|Q*C0}mm(Wm%=%^qo;5W|Q<yCn93cp2Mp0{M>tl@7J<F4@wr+~?euw|{n)SdDLS
zaW~#cW_nD^eUVfi(n%y7NiU4sz%R1ud+9*CZF^<kCLkOvM<=6>RETLbwAQQEafA}X
z<8|c)Gm0PKjgq1#DO+FblzK6b>Jwj6q9WgN$(`1Ju3BQ-<{>aA>4I?|uZKqX$SZLn
z%7<o$ADH|&X|E8;^EmTp<cA{jlonO-?Cb**f0M}gA3d53Eu=6NuuGZcB_A8Hy2TY<
z#q;=aOTxFOw6JxUIMY9*(9T4~UwSD&sdPZEa-78QK2&v>FD|81N(S)(9z}@Zaf)?v
zaY=bHMItc?!m8rp^1m38M?NIpdYW*8fvVBjNz5`|uppC>=Y~Z;ZW3+3$k$hJejpNI
zWvah>7{?g$kqQ~;W5PamYUlA~9K}|mwwJUp*azD`SueiWsCe)Z8$1E>Cc2RH#h8op
zh+4z`%D)NY`;tNLdsM!!lmobM6D8+Q2unP}xd-*b%wrBsoF-dyv#-5wUlRL$V>?P@
zK4DO)*Vq2am(${hC6bNxq@JA>*fX_GuFxm2SY&Dnj>_~s2$`a-*Dgpaj)3WWY}H~2
zMp6CJ(ZEF%SiRv^8_8{t7Qg%)UFw1^uC89v1bbxgXSw{cQf%z5G&ec7JOQR`%8HdQ
zk-Qc&Rqz`QLr_y&1q?^}erRlW$%%JH$};)ALST<DuU2&IC;1!)<wIMF)B^%!=hi7N
z)>Ffbn4&YUI0%%SvP5#?9>Trd(?g|~A|OMyw3kQEq{=HaE}^U6@}>F}zM$|1*0fuY
z&+eNjBRSEjFKCzzD7_S5z~LsAi<CkJw}115<I*STEa2cf$X{v?=v3}3ObtB(RRax>
zIF4hrOD`q6BBkFu@ps8gN3<xYntI8|V2S{L4)rs)aw@-}UAQ+m*j_ylmhBaD0V0Y`
zV=cz7eKJmAs;=&B&g#>~=d*fC#=m;=hCPV*<fG0`j5I((nT)X<*v&eO_fQ?F;%^5=
z&WC8adi>BKCHn!Ao(P0#cvQ<DYlenuTAhey%HzX~^!QU%?IQQa&01Po3RCH(70bh~
zU0VLMsZ(_eACt`hpl-7&2lg#a5N|#aF439!L!Q#0To+6%#ZRAfWk4r--CjEQHVMBy
z9(>EI=dI#mCl+ugFC8g0@_&4Pl6tf`YXNSh@ngHRPNXs8g6T!o7hk{VV5+N_6680Y
zU-sDHib*Ibmy8a~r8SucE5|B?E>PxQw^i`;^}T~qCox8$<%2wiF@Z&$lBz-m{Jk*4
ze)7aZeAHCZC#*7=yeXrDM{{3L&4$5kCATAoT^*N`n~UvjAu%{?qP;*$ukIEUh<?-j
zdVIEJ<>g0;TlbdqvAMy$*+xCkQ<_E<TMqmu-r@@1;lwnj5)d&;*foQ?Iw`mz$|9fO
zjuBlMV2t|3OPab3C4j?XX;`nYEp=l)Cx#sp&{TtevnBz6Bm?`XEzJOkjqs#^D~1Ar
zDPPY1CG8)$E!6I1WZx|@f}QbATt8OxS2=kXx6F*jvc$3yh;o6R$Mg7Tkh&{mAkt<i
z(?oF(hOzU!ckYzdp^ciInwOcWCvJD3f0%3E0Ru)nU9%A&b`=fh)*0fRo7NC@?BdO(
zIM2KYLLlbb$E7@`0e1Roti0C}Z(xT$GyLOJb9+HMM;JoNP-*ze6Jg|BAP^KHv}H-x
zoHUIlrBYe5r{DPfm)m;ihZd9zz4#jH-WO32m5_aIG(5Ceo(i!rjF6gZXvhQpho^YR
zQHA%`mQtVw0(k^1RP@~G?cGP!N}E{?PeLTd8<h@-4oAYbjL!Wt3+9CvfQ&Nxi|~F{
zii#MnK{7g&jAEHqgjEuJ!G;}eB>{)oFtbR#q?V`$(w*Y;_MQT_DWVBq@dS9hqLmt*
z6tJYXEWhh3ksMtADX0}v8%XU74u)f`6^Gj@uFXS*Q6yBGFKh^pXp+-dc@~&FVe>mF
z1+?7-WF$njtp1&}<2h5rAR12T&I)u8z?>DMJxXdBlX7S&G%yW}U_Ii^Y3V<hKara|
zr_zweS;K57Vezc9Q%I(mLQY!EgNquKx~P;Ki7Zhe17Bpwl0lBR-X<B!V9oL2Hms+Z
zR7ifIBju&@i7+tJ)q-0xlX(rJIWVYSFp*bNRn@R^T%njbPOsH^oE8<P@+)*GE=X8w
z$;Cyig;YWfeU)G?r2wr$fAd3@!|m4lI~BJKWFAyMpsf5sA8Gw)>JyzA;RPKP2w?2-
zMy4T%J21NTsbeWE$@=DXV>C5g$RkLkTOP%(^9lA|(x73??!7K5?Ufc~BmSLyacKBz
zF53DXOeV($2XB-#S(KpMXkpl}BdQ4>uRLxsIZ<Z?jz*DCcWB?9$FE(t9wq<Z?K)uh
z)med<&OCwp+~C2x8D&5xI}}vx84uo^*XNO;dE;nb<anFzB>n(x7hBj)dF1raPZ<6}
zV}%%>k)t7yaXr~Px=6~FL_w7w{9?tOB@0}*U_m3(xOH{{7#XOU$h|;}_^Ky+IxPpy
z#m4HFndlvKcCynk*mwj@Qfb)pT0ub#BcAS+ZxRgf?c1M;<m}NR8Zx|e6dW}qCM5)&
zc=Hc1VxH2RbUb`Vdb@*MZX>ZF<1~vg-%Ot{<rFq2@#|Je$rYn&v3Vtu`4hs7A`9(M
zK%u!G;i=lY3&-y}tVpO4KFO09Z^gC4Jm%86@z<ez;J)?4j4~NeXtg%Tjvzx)&?p8D
zG&i{_0C=8KYk*M6nS;xz;T;$P5b!!u^!&fv-J4+`K_YWXj1P(4R-!C?;GWvEJS|9L
zsPF32Z4EaAZ&z{1va}8iU>2Mm50`k@T$2V1AQIkZHpVLcSFc_*k1>rdBs!xfGga}I
z?Jn2yWhBk#Fgp^XlE#iTlX~{<-B%_~`wf_M#H6p};4CEQ@K9oU#Qpom3Z5}BAH+;}
z=#(UkA9Ln@xyagLZ;25MERL`pWWQoI6^3g`#}J5WFPa{@9=*Bv-|gFbgF3<|VcHA{
z^0KRS;O^-!010Aq&02xn4RkSj$@O0j9s9*lg)>DAwq*}eJsB}cxZtpCDADZi*M!IR
zcNPW!0iJ)Tg9qsr`uyVN8Ju9*!*O91uK}Ryb8utO<$B#%!%YmMWF*!FamQZ4<T;@c
zr=xf80E{M9lP?f)_$Ns~%!XB^ofMGuz>i%aDt-U%BCt8l6o2WNcH#M}E~KO@3iC`%
ze6}JPR7gvh;;>-`C036JsXR?2k6}9qMZqj%4xF`X)(l@YLSmxr=GMds#<vus7Rm#;
zuM{m`GOtA?y$AwF=r~WF?DA{6m^*<^Y3UAT`RK2D4x_{fp3|QF7mM$u2wi;ZvZ}=D
z95w!rw69T7Gj)nER$&ckI#yc9b(ZKqhYx|t{mQoKJai6fA4=@Gs|uqOS@#}aRC4QM
zUQ)IbA49ziFB{{RBaZRbCc5V4M`njVCjVIS(KN(xrN$B7fBeLW*+;wXN6Uj;9nY%(
z1Eumn_6~w6JDQnAVl=j|q-GN&6Y$A_hE}iC$JMDmN06}f>NR-u`mVpGvubzsx5m%!
z%^Uur&~CG~wq)#Izwnts`RqNxZBc<@xpwP6!8p=5OF*q{?hK0t(G9hTzXp1VF7)d=
zb_6ma`L2=yCW)Q)EQ%W|=bL|kmCnhC!LvVCk@nr+{hO&cbT&<}M3F2`$_-jz9qnn!
zRC=&sW5WiK-+f-riV<V7rKJZhQ<WJ?{}5Fd=3NarRz#A(1DY0^R|~fWFI9cBoz-t}
zV3+JmV;@#P+<mJmzDSjXFd3cLcL$=?8V-ZKM-ri*iP$%O+qizcElqp(GpG|<j~0yZ
zi7p(-$##V*w%;&kKd!zai$jG?nG%U*@JN6oa2WhD2iDQ$Jc|y~Ksn#j>~c~zb@B0{
zUmC1gUiX!CN{SR;^@SuECaBSOY)RQROpm+E=3g0pbocJv5LW*0;Hc%PUK;i9PGNw#
zf7PFW;L2+<U-MfWjw*!;q%#P1hJ4vl61{)Vn^cwm<qx0rx5P1Bs3dN&mhHkwW4^x|
z+d%f=9i60cu!E3&+~sFx2NX%`>6eFutwo?s>4ew1b$f>n^2owM;Q$+$7Yz_B2Zm!M
z`Xo(_G^W#N*6|7Pu$=yfvX-kKr<`r2N<Y^O%$P`9FMAb*lD32?#hS+m-DG_H<}fd@
zUZO^%F4D5O+i%;ofs-z_wjz_Ri2Jx9iAWD5qkh3d!?7+QJ|1h5tyWeih<!(o9^I4|
zklejZ;le3X{@!$B90VVskdp+<`)4cUJf+c6@oPsZlXA@#<J(9~{QAV(&pAF}G_l91
ze>GT4n4%f)sAUBZj1}ceSx-dg!|xPv?h)9`sQ-EHKDYR=i{?j@%v&{!{Qs9fxV%a+
z!v_qe@|_^5n;PL}^*0Y&zKM?)Q_87Bq;7cS1Nji}Cxj$5-Q5Ewnb$9j=O8-9NO-&F
z!VY!4)`?a3BLoc<>HV16?XO+sj5o{{Yt62RaEB-zR;YpMTdxsboh1?7DdQ1h!5HWz
zvnq~F51kucfaIg{al*%EN1wLw$hdh^iMAIq7&+hVkCh}6{cQb8-cIs*CD0WN|NL8}
z$!t;+1bB2&zvqkUB?Rp8^&M_f-cP%39`S~Z+kq-3@PfsqejwL@iVylaUXZwprS=^=
zHcUi=ZP%>m^4YhYmb~da@EJ2^c7uH&DCp|$yf`kF65MKc@xf`s@Vw<k-Sd{fYr`ZS
z`F+S?5jCA!L%oW_fFmDE37Ve9O`Q0cKhQExt)!?ZDKSyEorR?(1U2>U<HwHiz3GkS
z&-@J?#MIJ2i7{u+Ah^LSrI~HRqY(Tl;Tr*k@`lc%B|UHOh-iPLU<V>C&kOCa>uFm_
zR?hC?1Zq)S$(G39hI~jpd&B*~xuIXSAj8SOG8G}MzVots`6W#O4N)(*#OeF}kN?DO
zC>c6-Y(oBkwioh6&UNRA=qHv!WETSkMK^<PgY0ES9X=_S7-M)cxcs!Rp@3+P>z;4~
zyjOybm?u5RlS~0W+M%nT^Dr*0d&E!FIi)U&>(!hz-QrOOf;`m&ONDTBU^2S#B)CWY
z;hLH$XI+nS8+b9(?BJVC>(`&7wE?IOaO930J1*LQTtGdb5W2i;o2?dDoN%J!e|_T~
z!FXeX`QU*mz}e%SHc7-lN2<oW3{%*~*hc8idP3{rt;XU^9L9-LMm;dM*%`O5<%c|?
zUXdI?v&eq!F6aB2AwBCI9pmnq!SZ@|wXh>1H@8v`WTM*QM;X2(pSigs9Zto?#of;^
zi7w=9=RzW1Mzp+}B&?7FgAO&HaTk7I5|Be>e?o*ub?a1matDmJYT);bdNfdn65c`Q
z(t)a?iD&eBw$LNA-1<MJ85(vc+FD{cofP~aH}@kTM`%Nmg8T4shjzDcDR{3>K}UK&
z!`H`0ARt^R&ZQ%d4^vu;^$VjnXysC5dYwd)a}NT3w45(o6r@zzNd%Z;B-|_KyJm}B
zHxuhcFfJ_+V6t@#uOwpa?&-^PbpE-YF}imbPST<ESDRJ*DX=ZJ6X*ZfRSygzjZNKz
zNxAkS0A$zBoohaQ+DiJ%02rH9tA>)DoHbNBkeQzTC$5ARcPSTKMBd6?Ig~9#dLDnY
zP+kIJK23EVtE4oYFhbQGE!v|c$THwiIKhx5Mu4wdzs2%A5*I`yw#l4Gf*@5}`b$Mw
zTaRJ%RbK&bg=_6Gk7)v9FhQ}pHe7rgpVl#|5=k~S8QDnhRjR!a+cDvFSO3@L<wIJI
zespyUBwhI7!a@qLV#v9e4RCdI^umNn$~gV{^?Ql!`XMoSmZk1=WR$Xu59rhxt6i!v
zulz;oxkaXhgoUp)Bh6X8`po@|Nq9W)S?AKpi_k;2o*ab#S2x8IQL$LkK@90eSb7MO
zV17hm;<$M8H5zi+4qI4RSD9Y~J$T5Tlv7eR_s2s?yQ(;N@XVG)&AMiTv>>vm=1{ml
zbO`<}!;vEf(hkn{vw5~q5@7%|gv^?7e0JQ!zFoGf2Zs~P(MP+ITV<&UJD{p@Ki*th
z?dKC0%L(9i?xA!mOb0h~>}U(sXcT$o`$c|7#B|sdT=Eh?XIv-SH0ZT4-yG3a!1#@)
z^}|%+a2JOb&$}YvhD6dVhFdc)`IJ(n(Adxq@m`f^3?bfnT98Axo)%DYcf|n<A)C7B
zDqjqox5IIWe->E~UTUAB<F2S|J#g*TEgM2VnW(I^Ahx!&j*{iV?gL!8*VnA5YH5K|
z4BxBL+Y{-HBi$3qmBi?uJ1l1k9POgGhvl1Q`1Ko-Z$x$fElO0==&s$m(RD`mp<SUc
zH&PtGhBJ}W){^kpsiUyb&>ZmNaaS4RG}ZF8_{4Fzkny<BoT1=#N&p2ka)!z>{tV|8
zFXJQhwy=DU&rU$ZMMV3<8SykS;jp}wq~l9gt@>6a_(xY}!_JwUL?Q;Dp)@=HZit9U
z`$JlXm`JkbvFVM|eKBbU2+&!<TE@4CFW2%dE#R42zoKR;5F8R=VQ5}ep}Iu*;E^Mj
z-J9L`eCqY=S=pk#($Xgi?MBPZqWu11vL-@QutJ09;guw&jP{Y~-FyDpmJc^N`QyV?
zd@`3$pc$_<=+M?o&UfxlwEzE&j1*^xhn{5xr-Zn`o;FBMe{vh$4!t}Sv8D8x9bQmT
z$Hi&L`F8tAYSX!6$GnvqVz3~#(ZS(R=vDRgSD>}dIg7zhvO`TdQ4h{p%e$U1_z^ce
z^!M-ExBlJzA(7tBWLAO@ns|nT1Jq~Sl2y{UQd?CO@ls?G+Iq-2H7LWJ?I)VsfhYCv
z3TKuWOAG@IlnMYPbxw^G*FjNc%j1%hok0!kMyP5Ynf~V2#2vEcsM`nNCOMk>QAM5S
zCC-|xc0U%vz&$TtE^mo1U4H!w+3k-&9NU5)TwCMU02|(xxV47@-+-vXcL4#tP1D*N
z3WrG)T#Mr#3fV1=Vk+k!K742vnzu5(7@5%g1q=K?C{pt<jduMUSXNd>ktl>icSRSm
z5)Te-;b{4WZe*zN?q0K=KzsC@NW-hZ3TM4}#@ZKHK$7#1fnm(#wo>g4axQ5tFlkTI
z!wsW#BSSbKJV;MARxyD-xM;ox-2jaX!A<czqNh)v3Z<N<p^880{875a0+Un3olR<;
zN6aB}mpq<`cS>n3V{vGJIhRhQh5G-{(d#LzFdIu{yr}ICq?rB)9|UU3FCn~NVe6$e
zW{hcg0m%e&!eoZ@hSwXVp<x`F$8?wE%a;#*oY_$E&>l#_x@`pZ;(NT{%3x9a4r-}b
z@dv91JV)vuosT9G?}&Og{Am60Kayw{f0+Y>K>cGIdc0GBE1fXGJiLHemuln3C*3ZY
z?mK#DIZ(BAMt~gJH(QM(;26^<%~#m>sT?;Xg-%<x7~h-8XGvDHRv6gQLYj@4#=#?r
z+%Yjoz^DJ~w{IWOpg~w;)vz8-T0k&~f*9WlJv1-wAsR>8!fKvb>W7`@E6Mpzk1k|A
z=t}|gm*F*MsI2m=r6`#HbK6CoZCPFj7+0wFreMp*v>ac5aYX;~4@Xq%t%W-yb0nr!
zY`RQ`RnMenyHitL$q?fO)q+&v;7|o|v)2CHLw46!YA6jFBr|5cFur-exf@yz9!G1q
z<SB$1E%;(97LR1_J~qBnZN#EDG4h6Sl0NTafDmABHVVp8aLfulQCtrg^<zbzZ~{24
zH89xCSxBJb)UkZ8iU<jM1z?8t3nm`P`!{k-r>Q@^;yMxCBJiSepk!D!KP!yMiBzCA
zR&CG!j#OuLLiQ?(uVJXzI#pgG>G=$T!KlYWD^*Y;<S)y=pkSWeb8afT8bof|@(||l
zg#Ma7b!r#bbxsEiT`A~z*j4h&Ehl6%yeYfrze3W;E1yr2&RKNi9(Y7~{eWF($Dz4l
znStX__`o*Z_zedyglDV+)*I2?zi$sNu<!xra4;#qplv;kB1PQb=E;p+JIrx}q)>!X
z#=y)D{{|H#d{vjn!32;EYsk%F>w$h-@2Iimk0aV648PSwM-r(W@iOZ0d@gzHyXLo#
z&V@!tuWAW0+VT4s+ub@G2!gCUPyjpt#T<9*(7Y`$1e`nEdS(Bta4iK7714c&;)!Ss
z3KHf#`%>XAS{5Xok>nfthdlWr+M~5wX1;?(uKwRBq1T-Mi4sQN5rOfbY3pD&F)xs#
zhEAT-P)3THjO7`N$1HG^84}E2cH8oTBZuTq>Kq7;i;e%|MLX4z)E5~`bQp9&Fhd|P
zpag;w6)vY0o;AY4!AyrwIWghqp;$GaRZaqO;7H<CDv7A>&ZQH(nePJ{2ESHtkY|!Z
zOJFC7a~$X#oFl7Un7hW-1~VWvf?9wTaN%wta-k;uf>_LHREG*(SNH6N3(*1hB{|dn
z8#Giuc4wpj%h)cym=bUZ(w7a20^UU_{@4T(MF!!vWSN3Ro4Z^sM2BZaA~J4VBFzjM
z!ot8%1xD;)MV(MhMEV&;8WR)5haySF*i3pUE%sn0w;RiU85sT(LNpL-r~#$kE+PBE
zNpK$Q5ETe!N3}k%_!&PQ*ok335eiYc7Ch_ofAL-(Iw27)Nlb(}6{lWua^~+x(CrN_
zVJq+|j9Xm#`0?ZH>}V$0U_%AB0?^ahiBydI`iOxN;x(A?%gM=RYh5I=(3>kDLY-3G
zc!uaer`6Lsghiunbm8QNCmX-t5*s68<&PCD$r2gNH7lS#APg;@8;6FaB^s<hIW)Fy
zgCjI)eQb)5Uyi)7&Q7MoiheCF#i;y2dV1)fu*;0mMZ4cgfhNByD%U1kEF`k!KX<jf
z=9YUaH72KI<DxZg-2BiyF$;n3()y%^N&h&(JvLak<a#4VuZQ&{>7xmL@q1nDScrz*
zyNjkfe|?k8xvU|pAb<j0WGgk~$lVbg6LL(W(n;6xBY1(S8269cgPz{laJED?W4U^N
z3i6!qbT}#~gxoF>C=2{Sj~n$C&WQ7a#G)D2@8_lX(6{@)LmMItDwHUqM9X&fSkMQR
zn^yCeISLufbQIO2gY{L=yg7t(DK*+3ME(4GO`8Y_k^?qkVfm&B$48)myj|kqA>mNS
zTgd@}3SgKBaT47h2ZO{o>i>ynJW?2aQiS@Gq9mjqUzmQY!|$pP5Nc^%Mat1h0firq
z?L(E6JT_D@un^*8MtA|zbC~^xXO=4dQBWDwC8(hRBU_(`;V(aKf~1J*A1YH>&L(2x
zd)1bs>FRZ*wc6VMu2~o2Llead1<x(zVTaMn8y2V2=!NaXpPnd?($V?{{MFW4@n4ML
zwC$SVa(f6N+8Sf?Rx)0;JuKjY?jl{>()Q=Jef^w#gG7p*irEcq<Byg=&2bt5Dd>uT
z1Mxprvz{oypiE1yENL6RZrsDG;-QL+58_E&LX7&aRS$%X-;9~0LWXEPp~&mw-SLDB
zaEfRZ{jdTR!Nm!6=@5$~=cdsg@a-f;Z+#R8|8pZYF8_a%5MC^{i`+*-*5!+-qP(nm
zmkmHNKG<^$Zmm%HGn3suu*3Nz>FfC4Q>aqEE8C{CBk>=E1o#3lQ+l41GBeBwdQD&D
zX%zCFmiwYC`}4k-XcO1r#lw^;N#&b>JWF>#aZ%-g1{8kNkT?E))RmRRxPwk!OHrbW
z0m|HNm6}3USd`*{%=^%kD5n)NI0eMu$l_7>`d~tCCUuV9FKW$)-KR?S^r99&e4{gO
z@W<67j{h?^+uH(TJkI}&{)Tl7EF&={yV`bbFFh_llRktcGBPjA$_&@<Bk6}}>~tV8
zIe8=nEh0#mWsrc-)81~njLC?CCq6vLNc5dYeI+XgI#6lBY6iay*gInb14;8CW*ExK
zh82`UJRL(WxI7NWaquDMHDDG>ul2kxEB~4GMijT4mO4v%h}(T*QPnvdEI=Jc63JXC
zF3nT%f7Sa1eT@+NBdeDq_@MGB=8XA`Bp_J*jRY36x51wCaqxnmT2P!WU?w$6DFxOO
zSCLYk+d+FL4QiZbU_JmiD%!aJ+S}W<eewH)FKO8ICsv9utt?6H+oJ~omil}(OlYr(
zIWUw|_KbQYupv1Q-Lf7ccq*rLWq@l*aq-vVjwON6dZD2&=3U{!JZ|{-`U-a!b}gr+
z2!QY3TSTQ(IIev2=J4_3Yiw)|@7-JW;>Elv<J)w`x=8l;?C^&b6od$w@+B*5Y;cP`
znYZ%SF1>L|S8O@UC9;42fLGd^9sd8wn(TGW1#r-3aB^PnTu~T3mbM2Fx_`Z(CiddC
z{rQ%>yNnLR@e4rSh!4+NiL`yvOOfD|x_w)dbgSJkfxTMMEC7*o(C7VTM@QO&6WE1?
z(HIXLXx)Xz8b4S5A#9I^lBYmZ(Y~yvcJ9#<iqe!BjCz>G2#I*gpuwVhg$h=%=Je3K
zVf6p88Nz1$#tj3O^1!gvOXCnyP`LK^akB9a{BFP*5dS8T#sjq6EMiQ}3}>H^qYtW>
zAuRxH_6ZAP*ZaD<y26zj1WMK-XK(9iVQBj^R?m-UO%7m_;g(UXwXFU+_f3pKMxdOh
zhSZOTIwI3c2}dkAuGvS&#B|B7AmYtWWP<HS3Z=btgzA8T^pIzwW+d<rBao2sqN4Yd
zMq>*Kk4-zFT}LZiBCsB4&S~;TLXWwT#*lRP?BbzgY8o02x9-fWpYTJ{nqUC<35rnk
zvd)FNVMbrRePg%6!xLmVRTVr*S?uB!-IIu7!xmn`>iNu*CZ49G=RiU))f1pmf_|7F
zIVt2mizSm{!w*(ev;-$}>Hw9v;=TF_;PO2|c0yi`b|B-uwtp?qI(AX&R|tq{L2<Av
zq~IV(Mn}DR{``-bW5kD4|ID&HPI4S`7*G-vwF-KY32x)ZxBvB5q5+stxNBXEQ^%zf
zc{-Qg5Lq)Bcy~--b~sFm28e@7R3$15J3Rmx#WQ@zjvdrZjN42PJ-05ZD$%*U@oCsa
z1W-m&&nhY&L-?X)p!J`oDwzd)YTWILFP2j?lIb+<M#e8u!eGVB%#2H-b|#AFtf_To
z(Bo)~2|ApM*q$x#(h;IlHDT*S51;y~l&1o;cln90ss<snz?}o+BnW1hj@)D8mq9aD
zyo50XC-l*BOA&P4#zPsb3@|teCWIEm1E-FN*#RACMWH>Fv<<<on0f&Tau7+|#`Wxd
z-nAcA=&fZdnvW1F4Qxk(ZEydP#Y%tM)z8m&5mX?|h4@3#d?Tg5hcvdqh;s(2s-C6e
znOX*xPLUT6i{a1W<zXaR%1PC_h|<hXKff!Gw*|p9NpFI_ao~^`G}MMWsc*Y>mN5va
zi-d4HFRuLwJAx|$MMOFGAPKV?L)+}yjIRR)F>u!_>dVMotsKY*ICI95kY;_~JXtWe
zH4c(FhJiH>KR3E^G7S#scgu--TeE6NmDtA6(YFbb7XM5d#`L2d@^165xNODnW}hWv
z^2qYsUkBR2cFeEuepZ?=#}=t%>oWF%+WW5en>TI3(V>cX8su8PV0?a?y|=D5)x|Mf
z59YMzg5OIaYf4K?8GZxJQdG1c)_K9?cZiJexq?gLCFC!++C<5c!+{)#_f#5Cg2*J*
z?a;?^G2tU;Nc#2bFZHDe&QTlqO0u{5z5ei@RbYGtLYW56REycK%FB}wTZxF7UF0bZ
zRl?T|Mj%As>KuGAA>kt)PSZon!KieK!3Awi?MI@>&Y(Ph3*cMl<cf&)S&e>JwZGY-
zK=$IOT+Kh1X-fRdsA>p)3wJJ;>3|3TG5;fY5G7j+#{=WoE{TbW6ztyCeT5uE-`=ZI
zSe%d-^RCNxcaV6zvMTwltmDfg2?amM<Ft8w*xtaI>`$IR6-#bChfi>|COL!nmvqMy
z^P&sw(fTzvmNo-NCcnyTV@y@{*n*@DuaGrx;tidI4_V`bh4zA8rQET1Y+nzbb*zts
z5~o!_Ag^2iEaDGq00$|K`mgP*g21;)<Uz<SVa;VH_u=n<{~h0{;mcG(GGkwVkm!f0
zzxl7bjD?FU>YwUK;jp(*ZB-~D7--}F3{adyPcz<fXR0Pvaz_Y-uE4VF(Mle6jMW-S
zs|K|Jlvw+MY#*H!bcS>7TLpVn$tz_oEq91i?kkg#4hD6^+Jb&z64(VY@@WSG4NkNA
z2n63)F8QNaDwOTrU-*s%(x*^oiLeexXs3K3EAT%zQ@aShG3oMUIp3Q;<F@jM$>M1A
zFpUajlmjH~7S7)CCC~ws9Ul?rKo}>!X6593f4*yE&tdaX0=bq40@ASZegmym{cF3b
zT7d)27HxgSl#u+t*!puOe_2k|?t9Y!?M$wa?6nEdLPh#=tpV#7h1ufp+5NX}Ya=mS
zqiWxws#e9RWcH7QRkA{~4hSap?aUNxzR4|PA>s6X6=s=a<wgtdmXx?5w?o!5px3K5
z2k&53GM?DRn$qo7XjjL1fd8EGDrLzWtU;{rotVO}%j!_IA^MGM*~beiEhJH=*rN!9
zNB!}K&gD1LIlja<!r@T}i40$<*>^DXQ(608mz99v6SQ=--<$|TWy)-P2<n4$@^2!J
zaC-;Pi5gh--|Q*wtNHom-nD0meIlHo!ya$l&gyRh6n2dWjsp%H_|r3jc{)IHk?ukW
ztq>Flu?9_DM+Mq}Xa?9p*NsrO`8#qu0X>|0ofQx(G0$KUu53b2Xy_#_^>>w}|8=Qj
zt<pReyEn#n=@4Zc#R?wH1@gvT!yj|CtaJPq57n<mNr_&j)TPF^b{%Mmc>U_vPm-)`
z4jQoz7=)+b3FITCC=Q*`jn7g(PQ)TE>sLJ4L_;_=$?B@Ay%S|T?8Mv+Xc_@NMPGpE
znMiFYIe>;*V$xkiqwpvH?h``?qz!D_E!QQid<3<Ti-*dt2|E-6(}u1hY5DCtclxVZ
zc~Cvpp-Y&#O25fZPO-S#MdF3`g2<xys7@{NM4lMXU0hn^kEFLaNOj3|(Q1vDWgp!a
z$h~OFMzxrqE`EYcvP73mZG6t-HpUmLkN>+Z`oLU#`TSY;F^jsSq{J)wkv<RB@T!|D
z^AUZr5ov2}sVbxB+#-x*2{*GLOrK=8IlG*I-y3p@h36&BZ#$(<Mtwz9x+#JEGdn<a
zO0m!g_Zl{TAmZY}N8Gos*E-1SJ`Q%RFQOd$>P|a}w(Rwk{@P(f;<irwng1Tl<LJWI
zV3_P0P;R)f%22Iz=mq5ALp)Krl2F)ist?s_C&@-%w8;5GJiJjy1!zNTI;);+%w^%E
z9dJS5$`f=#WTF!?A5f3XS&`ywp~aGox*>+*pHbN#;5?U@NO)5IXDt3bHw;a<LL0Pn
zzw11D^hg+gBevj(H*D<VdlRuGe3yyTv-0w-82feW*0MxO00EptEkb)D8gW{@_pYkU
z!ND%9XB{cH5f1%NvQuF1cW<32S)kdNa{LGKXHx>uF{``3QiEzUQZD?&viU%(A&}nC
z{#()QDojNd$*!?ku|kmsglW`7qYK{=9GE~(63h@wA%GqT8zQRPil1Wv;LV-+YU)l|
z|K45WNkYtCne^sVMYWJUFzf?`_xwXxO~sqLVojuw!K2ao_I?!a_PM#Yub894xWbMK
z?5B9wZw_HbL-_QbkUMouCn5st>}0cwLJs0ep_jdo4%A5r+E8-0gD6q*iVEVenHg1r
zD8-f5S0Qd+k28rogIE_XT&R%IYq5($<7sJ+FQ4CE$UWsA0L4Yo<AvF0nvL|sgbY%S
zz~(uQDM4tKhwv07IluG8s>K&uRSLY~b;9Qhm=`&`qJk%ll$<(D1`)b)9asK=xI|mV
zn&Mg^L+I1X<GoO*l4rnB`|Hd0?++30EN-76M{D{0$OD<nH{ibM`t>5R2cdvh{gaIo
z;H{qf_{+_fkK<eIr|Fl1_`>2vmp2V5TqsdL!WJZ_80m>^=XVZv`UT2LF{3ze{P=Ls
z0IW!^Uhgn4lu%o)H4?px{Mfa}L>G(d$e-`M;OALjk_puM%-WGs`1QAbpR-3^6*BuU
z2BV1?Oi04<W2ViV2s_g8kF@;qI;R-Zp`I(zA@RM*hF<Z#Pl_>>3;-py*4YVssDmvP
zTKItW9N;<nMXx`c`3XE##CQjKU|N<tK@7FDWFRFlV&p6Z^cD4Z{n3>)|1z8zxrE+h
zB+Tu_mzjutEMCyMk12p#twERn3y<Y>`09UAWrR8{sKNX8DR}l~QZNu+^a8bXNZ?#G
zSYayDl-Ta#%`cK5FuAdl0tgI~#}V|mNbBrm-~!!N3ZB=j9^vjMZr5IUKEpbq%YS`_
z90nRRkNqj)!!g=kTAFKEVWlYB6{b?$Yi;$IuVL%!qL4wFe8y1uW!H9+?kJnN$-0ld
z<mZtTs*4+A*yr_!PL*o}D+nZ4S;GCM8ZWCOFcR$@EMAXheLcNTOlYkt`<oGA>{i4~
z-0}G*A>|+^ETn#ymPYtDQgrAvV=MEVEKFx7P*Ydll#fvoAG09xzwUoj$l3KnH?M1x
z<^oG2G6$WVBw<u^dFp2k@nSnXri4eamHxI+Jh+Fif%O$Vdy=i3fmV0*N88){jGo4A
zsLLQD>aKHf&EtCDvAE!qw36Jz^i3fH*^(P0qiHni8O~hw?Cha<dR)8q3`z>|2@%Jm
znVGDY!^3S@WH=b`Y?89aP^l6h^!k5(5E<OzF{cU@+oU}qL&|*m)KcnGoO>g>DznOQ
z9yh#weP_*{-BZVN`}XRgE)wh83_AUKayDi-qN7hd$PVH`#v1?EgKP_)QCNepZy{Y}
zhQs{z`Fxtd&`?0BvU2A4JKaf=vzYGpFyHGRd$yHW*@E*_-4%oXJ<w#=F<nVvDw!}j
z&#Sv_B{znCe;QJ|ul)&ix^$-&?{%t@SZ(j5b)u8jd739Q#OAxYKlicik3f?41YPIo
z;lpQ%c0AcmUR>&lExD5#j$OR=;nd$*j+EXNJSkXle6+lK4F&}pH})_<?a)&M_8PP5
zf5UA+^7UO7!(2M29UE<%pK<Nl6vN0iDwcFLLjOyj#1CKeA`agAKbsRJC(a9OV{&la
zzu4w=C59{q#~`#5?R{I?`s)h~uV<pmm-8gr*+!rD_(isrjOvAsc8yGvJVoN{Yn-em
zbFcsR#Q;bb7BhbOFwJh7fwgM5=rL4uh-1&Re@=yk>GkDH1SDPG6M4Pc2zYt%d`zZw
z{1?GkRf>#a4T;Qc(J{&tKa*mb40t!IxdAGY2$fD+r}?jBnDqbmH#;5PduDFv_7puU
zQ;yHDEmI*75g6@4AT?+Wib~$`tg?Y@&Q$KJd}E%)@*HC8x3|b@Uf<rAa9_W=!!JSb
zbqk8EqIhcBg|;P&uG<dpp?iC%n%bwltADq2Dw@d}{*Ukb&p&N#w`CVda*i2&`50XZ
zK&@74Gfe;4)p24SSv<QXGq1hGqy5s7-%dm+Kl}GQ_E15<6<Vf+fwkDhl3|uhm!7QM
zxE1)lr|&2x9A1|%^$(1(IQX)gsQu0~Y`v6d(6E>LTvEefj>qhF{yg1VLW%f4L!+OW
z>3{ypWB*SU=0ATWIVkgUH~;7FPMrG5dj03GB#J*pUjP1;@{pg|yd=l6Rpu^~l_#da
zO(df=q76xSF$ra5dJ$>f7v`Dm*uH(t%6n-g&NAHte`*0rN(u|((VQdvGG`;fvSv@x
zq>pX}v=9vs2zJ3-@%RkZ*Sk@Fpq%rqDggP9&}hJv;H?cF<VP@hd_x`z_d8U%5Mn?J
z0X)5`VBduc9|I`(5z&FA7-&w>qkPk4qytdu%znK?@%ql47)r|Y9TyXwW+~0nYm4eu
z3`C$10k3xnIpl^7cKN3=hsLFxr*#5)pnDU>-cLkU#IGbLCF!~QkXt|*-?36XSS7@6
z%8O}b7%<xIAX?<q)KrFWlk#bOeH)5%#*SMU2-9kFj*=~+Lb7Ec2okM@xR@;JgQ-q4
z%{}z}oPe5ZWheLoh{OW9wNzY4a+==sLEx)J^%r@Vle04wL_`-(Tel*i<&16MY*XAl
z_ht17A}pLXgW={^msh4|HCC7L&oFefomnkgHV;)iwiGZ<0I(fms7q>f)==O%x0M<N
zL)D&wD-i<C-CqNp%2xUcC-I3%tqu;CkdlH5c?aQdfx(jcFK%)?H+Itp!vCqO-EsFI
zd6Vke6`%cQ9ADY%uH(p!bb1^eS(KhQg&#gdbaoCQ{i~pq@2xs66v%5J3G(C5aFX3V
z-d>$w*5OlpQd4Z97%0KflXVf77pK#p<vHX0Gnap*LiOzJ+s}y=VN`zwTG?<P(xfKw
zlW-~D@{-=xUHu)<^lorpXchE1rS0<H`}7&oSg>pN?t^q48&_(T<E&+wSkW<V;W%qH
z-!unbMz7?V(Z`U>(SW{i*B#ZR8B(En^9>Ex!EvHq={fN<OAtnMPP3ovmv*>V7qF5Z
znUS?V6CMzWHK)SE-C;vmr8fo)#Uh~@zC1|!HJShl(q_ssdnRs+kFNuCtyb+Fv~E9D
z5r?<;o^@0Miixqo>Q|R3XS|V>r0ow(&%MS9n6&o#i??s*FJE_VqZkk+)5kQiBF}bB
z$l#p4(Bog;M*q^G0}Ze)WNrE@)L@$<=e?m!l>dxkp~Jd$TQ=>6y`CNJoRxr6ixrTX
zJaLb6fu}U*IN5aOi_=A1_d;qdL8=QN%|&Ampyt3b&1cmo;9D2Pjok9Kx-6ZqI`5_N
zieyc0Mit*_@`Np*&!VE{J*nUPc0IT2JK60}iHx+f@1sBD=ISkNlRWpn<xa1R5?@P$
zrE3?~K3KWb+uP@6iSNx4hYzU(mCik!|IeeLZB<;Clxn@+`ueHO-?Fj+?cGYXs>kPy
zcYa-LdBOQZ&S;zG5Uc@}#-fXbJB19)&h8x+Y5DzYNcI@>n4Pn+8UBD9x60l=cy5}`
zPAKG=6V*e>%=62r0tapqeK?_kKyO?1Eo4qBvJPghOu?U?w>cGTeKY6##H%h}HlWun
zIR1f+{W{pVKJ(=p*`Lr~a@K;RMl=sy^xW3k$Q5Ui)LmY_8Qcof_<*Dp-N0`hJ7(q>
z!?_BjgO-~Ma#bYDj-{@SxXHi{z;_YoqCuM(h=_#s3K#O;706chA(eBMO;1#yAOJn-
z$JAw7UFIk-7;z~bOf<eBsirTOVGd%@UyttHMYk{A`nEYlg6V$!0^{cr11K$0Tf?<T
znAuTh#jSbJps7)X5DZlf4PzG^F&jg+&L`m*Y!PNeiY10bu)p_P&u3I2yq%XcIe0@|
z9m?ZuWe=iT44-4AY8+{_0;md;hOo_m37Gr~+7IgVaPs^{Df8oYTR%#$r@UVZ<LxO;
zG-3V0C@sJOQc$_1NjhGgIx2_;)QSpcstFA)NrcS3dV(*f-p*5GEbbkOI)n=`j*$DH
zvy*sw$+qA%TXMqjSW;X_PpSxx*c3xy!4(aZ%|>cPQRdU6ngnhQJyc?=QP6?T_^os2
zU4DKKD(dh+D@FNoa0!zvoZ**fRGaCzoz=mqkxTF%Nwezt6?B%&(%1L#_7>LbwC+F&
zfhQ5)(nh>EAZgCD={q_EPjWt8Xh%<K2_-Ir*aJuU@9037Vkp9r@(#L7mp&(Obm-E1
z;O66#T_-3j>q@EF-H!m=i<VrcvDCsE$l+$TkB2*<!lSxC@!@;|3xgUc0lj_xyjrwM
z0uV`3$Be0O?b!5>eFZyj#z~1CWKd>6sZf#Lr%(*l)=}g$BADWR!*ie)&6=-A9H%<L
zIWdpp2d;e7sAsG>(H94*w%J%#q_o55?;kmc;|=ruUrf318S<`!QoljG!=tJ8*?Z(j
zF=EDYzx;gFQL9Sp-{$=#hl-B%S#q(R$P(noJw3N<hhE&L=gN?yW97H<HN2$de3drN
zk=fcI*v#XPVN1HkXb$XrwY<dr!@?rS%~wm%oYvOV;P|gGfE1?r*$#RAc4*uX7R)=P
zyN`=Inu9~3jqPsw0{Zsya{BYd;Q3f&a+asMH5M;7pfQcE@-Q`mD;$o-s6Cl->HA^r
zg$0N<hL!Yf=ld0r%4rV7Hsyl{<t5!WFK|N4cO8K=YG1FJtB{8y@5}UH6(z8&^F6qk
z8XRCxx9-(*S=2^SJu3Ul00XRgj3f8U7(^O{<=Bi9L|5C3eV!T&9A&#kY1B)Y4ZUUh
zhev#P`SPH{r(!t%>>8nend;r0r0{rV0KIBZJG#qging<Mv>#OJxRi6K{5L5~<$fh!
z8~n75kv%MWe(V&)^AK_G%gP4&QVn*vJSyc=l@&@Wgg%tML_1&C>?`Tp)G^BYGDH8e
zmiEj3P%_wAEn9Y={;37io;8KRQlAeqFPC8-Nbj7+nvX;pemP7Jo#@fLLBC#DRjMy?
zAAd*2O^<on=4SKv{Kki|1?4Z@okbQtec$coyU2}MW`jN7-j6H*DdCIH3yEXOr4j!L
zg=rJNLW5Ao8WfLZ>c@(TRqEOpLeRK^#(+`pYFdRvQ10}Q_^aP4lQ$i9KZ^A$De<s)
zGFik2_D}lC$QYl}{XKG$eTn9%Z$5s0*f}1TUJ9Z7I<&1MZNvPgv6fr5e5Q+p_MP2h
z>G~0)p6wXP)LdII2d3pKouvi?zJZnWx*}#LCJfvC4q}38!Yt<(#Z<M1!!g*=V&+<N
zXKbH4Rie?gx7r^c9H`UssL;N4QlQ!#P`P0#+v9*7GP65(hJWzT_g8U+QS9@f*h-a@
za|r&GQqP#*#Ok{^_98k@oK~*fZz9&yM0g-iG*u`4AHv=|tjD!`<Gy1fGK9>Mj1`GO
zX31DGB~g)3k)cQ^qV`s1c4$zBvK5j<Buz4=0c|C75haxdB{aXEEBtoPd%VZ-_Q&(=
zV{hu)eP7pF=Q`K9&gHHjbY$Vc?bPa(HS3X-(G+HCC>TQI(3F90@}T2(*u8i!AMs37
znN#=c7Xdn1!Nr>1mzJ1dEM$@>@VfJrXJcitxN`jR&r?=@tchN6c8>p$r5)@NNHIY(
z?-(Gm{xPw>^6#goG_5!r`mcuBjT^Vh$|{0wIA{;)d-%5dP}{EES@1%rgNtj|l|Slh
zOI*e}eDe5~`I!d1!-Lj`_ikBDuO37;Cnk=QFI-7p0p&#UsdUl|Uw&Vp+5Y|dai0Qf
zr}uZG-FB%aB?Z~^jey0^=(JOR=oKO6I5m1>Le1=Y&^;sqfBGVBuV|-fee$(BV$7K!
zPj&eW;c_9q1|K#L^M?q03K)UVT@`(o84HF>>JnbUEcP40h!%>#LWc)vvXho@hz=L*
zvm^|qjd9W41AX32<NnI!WM^d9oz#q0(RJ+6w5fDUR~!^#B9wlbt=+!?f4;dM8966X
z^A;)qoj;oxiq?%u*9F+ByxLfF#MM=DT9O3^Bn-JSm!++qlW)IeiyhmTpJowoUdg~V
zdvqsdcAIs?cEO*P91Q!ZQ&HgN=(p{Na?`x$kbeE{J)9+?ru}|&Zh9%st*$;lCFYLZ
zT}Qj@jd0$gxPq*c(Ek_{MOsm#X7yQ_NQ`p+(7&+T;rX);Z@6xQ0an;~zrze5z~hWN
zV_rF+6P91I%`W;T-Q7vHvww1~TZxO~(9i=rT#draGk+A^aCkfGr)*HKNV{&vu=0@8
z)J<jUw0TojJ5D)Z*BM722A>kzB;Dc?A$IpWygVhpoX&`vms;jBAg9l>eIvWJrl|V(
zy+^lhFDaO=ZDUQzUgw={<)&At|9nZ(Ob5;e#&-He5bi0Rv}~}OKjvQmEfBjt`>&_A
ze8cGd>t;aP<$lAQ!eFaNa*7i=ck@3stZQp5+&r4Ebm+>gd5UHBTZdVG!}YdoBf#Z+
z6P3i~Zl=m^+>|wzSp0dq&$AicT`NwAJeXqYU*ucevg3fRt#?u5`1zg&BpCM1EdNs0
zt(G=8S8hUPw+qIhs{zYozw0SK%p*zK+8uW-{-bC6_RG#{GH=s664s#v!S2%uu(+J?
zbj9nS9IylOa$y28y6efyn3dj4czA5Xx8w76EvDHSRr~m5*oH5k(Aohjqwj*_?1pyT
z)w<ge+b|mBd-pOm_h^*{rG@?2bqU8S@9wmJfAa9fG2>SCIiuA3&Zm=_<+jNsp1zU!
zSfDlEc^aPD`p(e$klU?y-Y&%=_E68!X)p>Q8JmeEnaj{_a)=FST)up`=@{5An5Cmn
zvluw1_^*`GmMpl<Xd^~?pskCGhRvTdd9=|l&PcjEg+XNd*TlewiNrTOZL7!p%;`Cs
zUsb1f8)S5TuCJz5;5T|xLC*^hZkzbyUDK$qmSCLxON#cixX6M`dVDR&NO^cjup6E0
zW-a91I7IgF-<i7Vfc%~N_ouFo-CCh0cc$#yw~^^<oVUFPY~Q?FLoH|6S5s4S-Tj@5
zt9vLZmE`4VH&{FDF?f9Oc_Fro$`OQV#=AOd?L6pzcEb^5${yR=5V37nIuyMme7VaL
z`Mj@&pO}XO4-wcQM)PQGa&VaA-=8DV@!79qpUxU`52lQ^_7^|b5*$s<%w`r*<@tWK
zt}C?&()W58=Iys&cZ*raDrNe_&$rHcbg}AipZaQp>h6B0pBK{8{X$UYRU-%Z*I<?i
zQ8}^#{b=Prz4Iv&Ilu0!iUSAv&Nnv{S^m$Vr!<IC4}0rzxI=&aWBDV@D<<8vv!4c`
zJ75HQ4KyyR(+0g=IaF2E26SPgG~{GxB|DM<6c0<FX5=iB-a3Fq@@;)mpB+aW=~0q&
zoEdWWdA@UW4?JHTP1_`T1+B@vN5`K7)M-GoxP-1m$bkbrv}boe`@9f^v_Y@eu%}TL
zynL~~g9Aq^OUT5~qdc6L4vfCZ9+TBBxZMEktedB_JR<)d<%RV$dJ{RFfr@3#W`H=@
z7p&E7J@^QBPcQuubSrxvdSqaF-1qas+1u}Lvy3M^iJ+e{hE~!9xDxUaaYV>D^35;!
z9A->}QW7isP8#NKFjA^;NotRJMAOA_ZjhnZkR`7kx(+7m4Av?!vfdFGc=oJcJ7GG@
zU;Fy~J5Bhte>Q2xoYsM*cY)r?GROU=<AjW$_XdIz<$Jv4M`5AWwqd6n)9rSHY<tw3
zJvw;)ml#2^*vn(ey<Qo#?tcA}w7%*e7AR38UDQnZ5l{-OpLMnj8C1z0oVfZN?bwxQ
zjX3Jgd54yg>zn&W;`24HX1y32re}<1=zx9G@{N9TbX#>EtP;62)^S-H5R2?Zi!X{c
znvkvVsge3yKaM%Kr25&bS6itb`58WTTlkZ8s0--E5)I;K9PR^<N<CljeAJ}q4Ig$@
z{-GCYoU${00KHBr%JamtgnRcG?@b6?7+v~s)a1v*d=1vTRj<y(yu<gN9WbCQk)O+K
zbNA;~G`T{tr2(?{*Y`Y6`2fJ7jC=UGkOza}Pf#Xc_BL&hI_&lRVw0X3Nu-HYTW@OO
z==krV>vb~@i&<TK(SRu(2N=8niF2_~tDpQ9am4akCZ1Db2_gGG`qz%ON$Xd>;8j9c
z7O(hW{8suW9xaZ$R4^^1*)g>LnW|r{GuQq{x~%BvJ8H(d*w52VOPoutQ)3=qi{z)^
zZG8OGn*e~>=d_9;a!#9vegE>M5(M^k-QVzB`|YA5Q9Kkoi^XJbW*_$uLUM8h^BcN*
z_>dH?eOUg6D9^%@PVvX4c+{R`V<Yp|c@KTlp$tC+vTtKGXq-xsjJbdB-eFD_oqs-B
zBU-R>oz(h8@Nw+kRIno~xR<v7(&fOaB7}D2g^Morb+o8_BALJB-w4)>#F5r%s|_Fa
zHY3GMI>|0uu|k-RLUkdNV{YTaf#(mG#fj%yXWcexcs2IvRsc$@1eCops4}OWOCP_Z
z2D|8&KSr#jmH06ayLR7({d~uqTfXH~2;BWq`Q_IF5ldcQ^WIs2wO_-RccNoltMZUl
zv4_8eZ!`^k=3xUhFng?-fDm_U)B31%h%?v7JU!Om-%Ccy+3cKdbsDv9)VlD_&p9Kg
zMFvD*;Ojt@z`QQ?C{LwMaWkR)Gy(<0V4u+su#8ER@k5r2wFvy_eqNQy2RimsQu2Q|
zaE568@0JCg3>Fj0)bxShP+$^5axPJlqUfvI*>1ur=1p~<n0O#q7|fE~?7O>B&4@l0
z97XOW;cJWcwrNCijxs=&cjZ%DA7B`K&9n#;I-G$@K7KrF7_1q!bpQBq$8fytwh=ls
zcsygyCufUa0|@D>F=Av12LM!px{{JqWB%C#*BBpmVG-VpY3+vXsJMx9KZjC~ba|sT
zHoIW=vl5jS^%31}`itv!T-F=0cW<Am`x1+`m|wU(>M~u;;5c46a8<7e{t*x4#LUb%
z-h4?E@-_v}iSha5U#}-ESCpTtJXp9UUDJGnvAOZAGWgY`BD#ukW>40cA~k|&c)`5^
zLNExCd$kO*`k1EGzAuRvXL;}<?SpB+C*Kc9cz#N6X7;y|w{L6W+S>PZadFYgJwksm
z`O}{W>}fFwMb@4@G<o;a<hYh;F|>6z&3<ZX$&RHIx{#eJ5%~3rCa1EIcvxCw6LHQW
z_A96^-{JHsG7XLq^z5m&U^RqMn$<QQILG_Zs^C{|eiY;$i2auT>tyw|L(jPDO&$Em
zqjrZ592lN%7PEO>lw$k#Uv_M0x$N3Ue<nRjW~!V#osT!sm+@z@1w8oUY|6NDy*-O#
z2G`ly_H`Szslf9o7)fX~y;~SlQu7bkW|(A!V-x*B#!GF%5ueLw_suCTydUtBEh^QF
z&Wp`{!I%?I3$7}KQlzfCa`kFq`nkXVd19ghE`<US#sr+LG;dxwzetMXfiJpupv1RF
zD#m4GPqI0gdBCzV`*u(A($uKH7l`qwQD>fI6367GbCko$QxHa^y1*g<z<vH2dAP^Y
zxD%89gQajqgCd2x(mDsihu3Fm?fOM|NBR7g%4|2--)RsB5);VdPmTd>lUtlzcyUpC
zB_-jnjfvoSqYJ!FKYt})=>zR0AJMqIGzi=9UfE}WtTJyTr~jr|&+d&_c2ng#B|X}R
zhw96DZDN#6$n4g=v0Aq-lUNn39(Uh*b3Zq+n}fM5+#RDGf-4X6n4<PO7nuOyJPT%Y
zIfW>+n?|MYTO=K`kSiovno`DqLyp#*)4Oi7SZ>XL4_=+$JUXvkzPkm}Fw~%{taZGV
zdOei!b#8P2_>K|B*bp?VZX>dxvi;b*@tM57DU;tRs|>4=rf^0+ZfUQe(64s0<BCTP
zwvFKXQ$#Ef-ZP8%wPT$tHP0NTPc(dDhN!)L<e3f;on-6gN~6}lqgK}4L*n=CHsE=$
zI3Nn)H_gOX6z&&{lAKZD<+LI`r6Epy^p&acC9Ba3(U{cBDyEwjwwPAGme$HJ!DS_M
zg>_caid%5O=x}#hyaSdnS?u*D>&AvNm2srTX|fDpGb>w~&~UlhN0=@F1qR0<l_b~*
zp-a~VxgS^h8jP6gw>F&~udQ%pKGUt&I7f>C-5%FuPb#;GN<4LU=S%gYVW$({gzY*&
zl~_xPwm)471cuFCqnYsP6(={#uo{w32x+yJ;Q;(~vm+|s^K-LLHcjc&2xUOZ?rzg#
zKm@E9xG+u8n+2`-oM=D(_O`xm;p%($?^9EaKeG(V6>EZyLZJgFI_`U!8k_kPL;mbb
zf?MYkB8)4G$3!(0_`2gcp%W5xQ;`@_3DWQOeFwr$!w(P+-PFy2&h-f)2i)W-&$ZHp
zSKx@%&o0`eF{GZ;!HqaQVcWOCXY{U_hB}nI<FVxCb$Z8u;3+0Y`Y-iTXy5(`q$+xJ
z?D;=z>yl$8sFscAnvKp-unC_vI<wQX=Yc_1b=@x;^y)RdYwP?=FIM+SY1hs=^6v>%
ziCJ0Zt0}vN4;U~Ya~ZVjw$%RCGd)$V;8WzK-`{P9zCKFP6|V|%XkQqc58raft);a!
zw2eIs_E(SZd({>O{2yL$?~ccX^Sd3$F<}X3jHMPBh9L9`wbjH0Ob|KuqOegF?kHmi
zFtUcTHGUjE^6z6xZVqCjlW$8*TA0dLj$l|szGFu;P~8qrD?f{!hEDM<?Ii=K=Pv+}
zP@WHX%{*`4Fs0AM#S_>Ae^drR^WoQ6qYb+mauMHd;o2rv<F07UEBER=aJE=0%SfZ=
zD>R2%`Sda97)%F|y2%rbSK;}>TT7d5PZxvgy`?hX1oq);f-qFDKPj+z<bYAM8s6Q}
z>Pug8-+==LG2>Yr0sC&qaD6fowc*9gHeaD3_H@FInje%l>5maFHY?wBqR+<NE#fhh
zB&K&(GfJ#6GTGrcG=>2eQF|Izi(XQTwgxe?5s+N-42d5|I*D5JjSFW}U+Y5#00kLV
z^Adz=@V3Q_v8fawf%>Z$_@ZqW?_$yU(Es6#&dJZ7ZKjKnrn><t233DGmm!QbVXSOd
zR1`Uq{>Pl-Ba3J-4L6_Nw=ac%n#`Iw^LjXJNV;4s$LO(cjjKbq$!QEhEQzpBFt^ht
zI9Vi!9^MG$e!?K2$9`8*$y8Fw0YV1!ZZ%yW0B0?=Aw>?&`mtCvVL_Gu@iK*?Xuz4r
z-biXHt_V}k!RWmr0Jq5N#fB_?B7hANpT-QDlgKMqk_@_1C=?@8=07uDhEpfsU6}At
z+lr;QV<sypC_JSZd#&BY#ZhIb(}Fe~;-#B2_{5#birzGc-wvJ}Mpb|qP7g%{M@&qt
z(b@X$BpM8Z=ks`>mNn}0F?3*ydb1B#RA5)jzkQ=;6Gi@`KD4(l1L|w@_xSyw6k95z
zJEvl>DO+)y2abOyQtLE-P6h^md0a+hy@U|Kfzg$|{rtK1kRgw2vK}e*8n^zP4i|t5
z?i{p+(FLRYgQ@)+E+c;{`TM`ZK?A|r`Sg7JYN=kC|BvUrPzr-wm5;`Ee^?D+$%OCx
zV1AzhnmN9$1tqHyU$dNO;D%(unj_9f6)7152Cyxs3%^+ExIq}b0K&x4zxktf6~Dds
z8hku<=nDdo*P&*uZZM842&yr6L>NPBL325<2oa!fk`d{J7W!4Do~K;};NlJ_td@>m
zNt&v~Z%>0Tx!~NPH@i~@>9kKvnx}k^bK`ZWf0$E<T{JhuceH90-<$ygx@lS~VsZZt
zPFvC~#qxr~LL0AUEn3W+j}p{!c*5O~uctzPYXR<l2sJ<G;2&_?;@HS_ezAsrd-+x<
zY5JXM$)c$KVOCz=Xz}rD=6a-W#)WOZUp9w|M{~q(`tc#EaZd^>tWWW8guUEy>mx+G
zPt?YC752(Ye$x4J^}X0~aN|TxYlh=%-Wi;Y@J;e)(|j^(fU`z<#SXEFM*YK3gmK6S
zEwOaKuHr8m^>Excm0Ym?h#~6vd3O!|;f1y8GUUXQ6)B#o8)>*)8{vm2WSH-RhuM>W
zLeO$SXU`_@Zb2Pv^U*1;Jo`3;w6Bw*W_i<oM@v4USv~N58+&ySavx|ucJ$~+XtC?E
z2Mk(p!Sm~S22l?5GK2Q9c?kXX=w3N2d-n%d$vH#<zi#4~v7l>I(IC5zZ=>Y@;jma|
zMV#<SN(~u&ek+uV_;UR`AM%W12lN%XB!)|Pl{5iss>(q__7dZe|DHLr%3ve4j6Lo9
zZv9>y2M9ply%;Wt3r@y^%Ah$6d4~<wbT}o=iP?mUpqSCDq8n1=w;5z0DwhP|UxuSG
zXOjWL?-bPn@KUSLBIKe8Yi%3}4)mg*1-z`HQ#EAi%^Et5(BX3o3>-FYBpEpKkqGrm
z5FAk29DiAXCh-O~-2#{rt%>F^K*X~hcpb1MhF^`H$^eTGyqVZxah0Wa+hHR}<L?SE
zx+h!sC}vO;Ae?|Os>(S!2kr2`U_$3B_9zSzAD|!MNRoGYyLBD4J|Hby1tfY2+(0!z
z_WcZ&6@q(S8h&ZyO13|vk^9DNwS9i+Bzg|tDOY&1w1O8iH;8PN&!K>G1iNEztY1bH
zj4;LYJ@3Gf6<W*e9ZPwj%~~m_5c^j8Z+}K&peB-27B)J!VS264CYJ$fLOJV@l;XGI
zwF{p=pq26zepgs+KYkSd5uN}ClMxTz;}x`tnn}zjaxR3{h37`SjY9I`+|#<m1gr+E
z&(@79nb)m#^Pm+!Plfa_pFtLop}Ra*mzRt~{nD}}on5FQ6t1x1jn~ss8DG0M^pb<_
z%DA@v;?zt^O#RVo%JmD2uDmCy4RZUGelC^iDbKboqRa+N47+GQwBN7q{p<f+2Om9l
z%#oF>!As~OTkV`w_aA`&^+yuDn7YKf?5QoxbNKDv_5X({B-e|}hIS3yg?sAA4!xIV
z*N>8-+A1xUT%GrXO^iL>Zek7W6SeeX;eqsV#zC&SOIgyu(H**!H7i$I{F@`am4<`m
z%!4mpEVAk=BbAd#)JG7)JrDkoxbVb@6M@aqugNhUJ}7t+3a`Z7iLbv*5PfsT<!qk=
zt%b9_+vV_Zwho&<PH>M^J?b>}cMIdPgFt?LanpJ^L%J3F>C>uUWta6bi86t$Bzw8a
z75j{2B#(B7BR_2a%+4^xEu>|x!E9RPcrn!cxPtqUtSr5c^TVwvnrW9U9639ujqk(P
z>+Jgpe{;UxWimaY-zMr}8)@*-ufyP0?*Fw}XLg?jl80HM()_X(T_mZph11xqbWkod
zh@b637)f|V2)1K)m6gMiEwg6-E}I4~5ri;DN}JzbdWdsv+5;m>{LqUxDtdZsq+S{{
zVn!!v(WuH5`gQC()Tx`4b!bb+?%k`HCkN5{bMAJabA(dji#9koYF}sdD%7mk*1jy;
zuW=LOhWTR*a}@a<f`3tXu0bKxsZVdv#0ozwxpw1fd38>}a>#g0=iV-=#M9lor(0U~
z6E9*5xJelwn(6dcJr5z0J28z9@F6=}tV0683{sRWJ8bZ=Xt8s<pO0teLVQ?-r1u_n
zG~_W!v!9|*7hPwBHp$bP<V*%)vGE{gi-xAAVg4ei_jW3SZR|Q78Xe1ihMHZ7#&c7g
z8$F?u%tU}s+~wg-ZfwjuByL3&^}fakL1uG?%9JbXV@RwjJz7eZZoFZsY_sI8QU2et
zNNh$N?mvPc8nm__RxTXUR>GDVBaO2TgLQ89ed}gaHL*Q}4BHaAW7<A%WCuJvbhFyt
zDzlnkx3F95U7!CJemi_QYCuGs7rxbZ-oABVnn!<kQ|T-+@=pv#xzXjjPn_~<+ahrb
zAp{Nq8Q6RjQWQ~0+0+!<OEpS>W&qaT`}Wb;a;f?njJIA|dl@M}eD2Q0p=Fz&{d!S4
zL*~L{z`)8W^9#FhZ@Kx>d|GW@-D%2*=Q&599Zgdqo1lZtvvCb3$|4j)Z*a|Ka=yCM
z`)waIyV+T4cISRq2T`tMYRCckK0tqHXz?r5Pj_|}dxzcSiF$Y}7>?=L+n>?8u&0YI
zTl~p6(J?KXJ|D~_4yt5zII`(wOFOT~5IQm=9|%WuzAXQw*X+aIJ8Xr}!{C}qkk_C7
zhg81;WCv{PqAN7L=6`h2X{*(>HCYL<K(v04VBf(+UWa6*x(7TOF^9X|0F<~Oe?4l+
z4ybAOS6u&3<KKIW3Yk(&t;v%(#okv!o>Vs-0Smb+=o@obUTvBurjkEF$!_w^cP>N!
z*6>@3U66qbt_1$A$}!Or54><)`F{`m(gvb4Poc14EBYK}Yw&9c>f1#60!Tm5b(pSf
zdsy;yuSfD8$|7i}Lh&N6Un+}RMKJhGh4;IM9%%@bA&n+_x1<K#pp3>atDrEA^VB46
za@OPD;>^Sm7mR|5_+7*v0?*jAev-;3(ow^}3DvXc&E>Cmt$B<vySgOa=T+Zn2Uxhd
zU}P_e|HI7I+xs4~22Fx!;L3_KpO;y-`8P@DaXfMQO?Zh-RbOrRv>tEUC`}m0os)e6
z0t0)^;L+~ks6rK)$Znu`B0H%lwsozsrSM|Hq=T-blarVg5U=2}LQau1I5{S_NG*6V
z@lAXss-K0(6Z;3;5Qx*#Vyyj~!xX73nCpeUNqGub9Cp(b4FQj^x0}2Wb}>SH+d>6K
zQ%Es9RnIj11qcwjsI~AOG(M;jE6U5~izhZ%mNzf=<#D4o@59yrH#X16zbD-a2?s|$
z@f?y&fn|cnuX#3D-%cjw32(U}VY2SDl5(nF|Ng3q?OkhBIGEfXItHC}S>ouC1}SXd
z!BPIB<Y?s8-#T4;e{?d>`F_gElVk-U3U-;7zemc^;N}vl5i51n{o{{6*21{(uWOX(
zMhj;&v2TFAQ)>Zq9QxjGy8&!SSnl#Pir~jJ<_5Tmdv38I@wJRf@%Bddyja+^ilkY@
zr)K}5@k>W3N1Gb@u)ZDg5U8@#4x=Ok5|8bxYe~q*W!rb?5N@BV&x7@dXJp)9jC;wa
zPd9AR2k`4lGrI8Htm0Pw7e(!ZD4f5sMn?7vR?(2yb%>k9hm2<&<4r?ZNnyxmhk9Jd
zqt9pe?BTEs4r3~R*z;fuojP{E2z6)##DrOhvAWd<8I+HNfw{@mc}2gId+$gliEIIk
z?($-rCAMDNdhWGOmNv)@7$m1m;|YE>`@;wE?lqs1eo+F8<*!0WgX-n1I1)EeLG3_@
zwl&UN_6z>vB5d%erOOT+M0<1ZoJK#92_0HYoa)ZHsE&SARx%a8BtJmPv7-7G6VpN8
zgp(0KU9CquS8YGaGLEI2JPJU{(^pX&&+%3gQ}9kEp)s=TF1FnWMYM=Vu5bQCK0iD8
zLhQ-km3_`~uF4y9UU0E)@+j?W{tdXzlk#fYN~aelgWY@f90$>kBeEM4UrK@QPULSd
zeHLcu9-P+KZO{JwYhGV3BFKopkaZ1Y38GE7f4`-2lO|1gG|Q~+=R<aAy7Pk%CEbZV
z=>RJ#zM{;igwUgy&{$Ev2T~Oa7%Y=I1nq3%agI?C8Y3v|rpTT)3`UhQ0dEEIUrfjl
zlsbov)AVQ0h6)giV$lM69g@?@qN<&KusbPPBR?U6Ynye==BUU<8-Pj2xfhAHCZ{4p
z0#m;JB>r6&7jbRLJly2Nbv?23qMpaaSfNcT9rIFXZ<Y}Nt*48A&Q&S|qM)jxf@`Fp
z?pT5+4!Fz6)lWZR&#f}=AGh#%GKo#$Rw)=C(AAsig0m_F@_a_idQb^ODKOH%_K0Gm
z+pi%a2Y3PV+X1y=jGL#ZG{#}Z%kxssYMOGi+e+~l(YGV<hGWy9Z};b7X3N$3^|K@x
zq1x2l!@)v^%{=>_O{=9?Es9`@N37FloGWGJF{@&@$s&eGjw?B8;fufJ{x=^rP6#it
zY2+1l)P=I<ver`F;Kw|F%n&K+Av`R-3Shr#ZGK)}B>x#SuGXWI>-X=#H5Q=LsF_oB
zmBNYZegRd1cmkV5dD62d&u3YFDx+xJ@cg55Cd?bj07Y4A!n#Qxvql-}R2p3sl-ED1
zs#+;yPEX$u{(mT<G0<+%<fa~D8MG0jxxh2S{7}@hq(thkDf#Li+9*Nupu<GJsE&dB
zfXK)#^kLaC;nk+Kj&1v1y|P4kK4OjjxzYt{3qWSr;!OubNG5w?6>n`z{xkMu#IGih
zM+>TGY@%G4q2)RqxvroxH-OkGTr5G7BS%W!s>;d;k=h_Rk(HA>s?BtP1z-aPdfw2X
z(o0JUI`J&`?D43>IsB&q<|;<cy*d>>mxg*ZxOLWX)jm0lp(D4BDLZ^?meD!&I@m^X
z9e03FWR{fF=w6zLx(rH&F>Q}y$5iZu+6ikloVO-SfI+=oDlPg8<5<eb0*7c$JDuL7
z*J3RVB2unk*(8~qWgH@Y!f_no$pN_snPHy<TZ-xVi_52(6Zk7z=JFrVc_b;Uq-gdD
zL>e1M|5-HIMy>Cy#K^CuHgC-?aK?Y(x9kQf&H%2DJL2>Fw%3j5=p8@j@Xs&0^3QQr
zbB3%zljhZ`gU;iB3o9D@?VbQ_sP~h>LzI<XM~*BXcb0sPPDnTu;FDyyp-G}1OA%PS
zX~O;(7`g+I%o!}?Hxj;!7mU7uUp9)bGkug2UqhUDsu3jwFG#>A?+?4_Jx6vPLS@A2
z<gJT{vGnh^lyoLc$!e=(t=tGdcY%qG;D<A)gj&{b#d0YPUwQ1H))7nuH|a1~QV4i1
z_IL2DX%HRr^b90p4L*b&3(qsYs$fur>7)Mag0RGZ(2e-R!{nDXexwH|iaqgPaiUXK
z23yA4Ayf%v7>Z7Sn~ol+6w+c1nprqI0-ps&kz@)_D#Z{|8{Sc5wE9?@|K?#LgYARr
z=IatTlmQEDxM))c@)x9>0m$jcKp;Qe3^K#@cFPt9uUUDkw&-#Ic9B$2C$J{(*%KqS
zAQ|dOT0=BU;0P{9MzSe3fJH5QcwQQJou2x*(Ii`6hTND6<Wan)F2XA$=fy0w-^~o2
zx=tNCZbVZ=5x|+#+j2o46gNgidYQ<ZX2=EC3q}*blBjMii^fRS{rdG2+G14URW7*&
zxe;s~ds-jPKnEw9WYzfrAE?M>J3130dnFJh=n_qJB9}p^af{~7t+Si#;1^vcriUSr
z!Z**xMFlSvL>_!GtBJ79_=1rQsE-0BMT<DAt;tnb5MqrqzkJ!;Ujv~c>Ex(#++oa1
zD29n_PKD3Z^W*tjM<2K5TjVoY*bx2<Zlwt$0PgpIHTfu~7@AS(-CO6(6VE{lX9vF^
zb2|ex&eLzXUaT8x%&|qScIRbu7r@Hz>YbmaW9!8Pa<5*thO3~QG7lJkr7QiAMy+bM
zKQ!3@TL83gSet@V{qe5^as+C$svx5orJvg84<N~Qk}los)I+|+^K%eEYhMiY()Us<
zZ+ff8yvDy+6Sl&DK5h~y0d+a1uMukCLt|;bi<pX~|KON~y_=Q=GJ%O@JBB};GD_|T
zfsx#HME)#<pn$>oW#1HFLbIw=xHgX9%F1iF3?h|X0=Niwww4md{f6Y?ai;w9ys&Sd
zK7k!xix!mob>IWMXvw&9$B(~;7o+m=b>GmWa=xSE4VJ?+cry>Hj8Eg7;E5Y*EW9D8
zD)guFBi&j0An3xYn;(;Cc|CupERj_HYd~NuYUs=YBd(HI-#dHu<2yDCn9xxdB7?yv
zI^H70p+dpX3&dIu<8suT{RVXK4^Ct7Xr1l8Hd3NLy83-xG*~WFKG-*u2*GU@LKyFd
z&83pPzabI7*eyeHPBvxI!&jqu@I(UWH1CM?CLRb?Pq5W6B*PX#4-JF?aom(OFT>fq
z$G-{UBlT(DL<;bvWqi>zqM$YpU{(p6r?%k2*Sh&KI^O>=P@)o{uA-0!Pp4jJiTw}q
zpSN&)7yaa2orq~q@*PY=zmf6=ocq9c#tI#uEe1eE|Jlfm;~dC|p#2+{+8Qk;PZN4h
ztIHMXvdV7=E8bG)Dd*Dhz#YukxuH6(5)zAvW)Ig$d(I?0EY8Hh+JOTGbl<|R%jcnH
zxD`_m7=@ZH2u9eED8qtLOnt<Q{Z@)Ng56WG{8$rEP3*+~ISq!qiOZKSfA`O0q8*PK
zyDXPpQc2$-LyDnu$DFi9UPBvYWImc^T%9>`^kj%?;>hVdBZ`b?&tAhhp||Ih=LEA}
zCMw0jCYo|9nz~-p{mnD+J6V|Ef%FW8!uJzG=p$Uvz3(;cJ@c5xRBS9(QJCQ#&t4kR
z_4ye*so1}{MXZ{d6OutiSPq$0qUD9!U!XNS?pfflAX`S~*+0KY1%AZkQJg|y6ESw8
zGl3d}`uL8cWEn~x3U18;+%w#`ge6GhP+qre)r#>oL4SGfgZMGnN;e5)=P)Ldw@J_D
zU<`!<&`ebD{OpxC^+Q3FkiNKqJ=$d<(>tYclv;EG0yKttv}E&;Fz9F`mGgs*OnP;4
z9sHe)L~cYojv5JN*0KK!8%2eMDfLe!X+K$3IJ9M~Cgs{3iJ+G<V%;nJmwBfO8s>{-
z=K<Hm;wfuX`mcd&Pct*eoBh??_2T5l&v!5I9Rbek2^WxuP)fb)9H(;$)nJFtozJG{
z(<a;cA)(DtA@qWlHazo$u0=9>&cmD-_7xzxax)HrIW7dB7;NPiC@d)nGY-KHZyC+x
zHzS}YX<hjGzqx&T8EgX%dL})GOP1{#pC|lZ{l+k*!v@3Hj592&<z2NsYT81IIuz??
zv1rkv)vGUT@Hum41;0>WvkhnrgSiy0&iyqt%P=o84c$y8B{mPwfAmnl$OVnPSiE4}
zg+RB2h(_WJAG}IRwEmyFB6_?;1&Hw&5U<(z%lt3jh7rWaXKzV4vAhmc4Q36_mqOc+
zu!Jg~W`dzO(CjOlF7AxeqDB#qi<7W~dK9%Ce!|3EdNMmc7;*Crd^JHAGa@6=7jaP6
z+uJAm-H_=${eRBFAHl_&>5Zdv@zTCv<U8ifLVjIv6N8{uD_tdpj`RS!cZZ@Np`Ezy
z5X@c%tVn$SCmfaS`oFl(*uQ8HePu}V1`mSI{Pt};k<f0$M$k_ebyLgmCz+YsNquOQ
zT*3lmbXqokKFMRTA^oX<7-aXQqT(dWmB1`;{I+#hKxs^=E`lAqI(phqA7}Ey(|iXW
z+KAB~N<M#ngX$Tff~%{P{6$9c?b4&iHcUW%rX#GkIn6Ak3JE2-zDP*;n7R3MdOo)D
zUpliV5j;TlEmqvwT$<WA^Qq*MMa7sAz80}TK)Z=GrIUGs?xL85=her>d|JEjU%%3g
zbA-Fz*vDA%&Y{6gO3qjv!MmNQudi7g4!Sanyfp1h%hs)HD1*GX#}?UO7GX|O7m`CV
z3^NQ$t}elo`zFbr2A8fZW-@fF<{TTEeR7SHjLwGtc~abgdxRHAfB|;&U?20(vy7Yd
z(6khbK)s7w_n+x48idTuerw+9R(S5MzuYMV`i>Y;0nK6>Ix^FWo0$ntxnvx2bw#%d
z0HHIV7Jy<#yMrrrb+f&cxUBKd99?v8Z%2yHiR@yH%{}w?1SCa^0gQ-;lHP&x7hGI4
z&vf9!MU;=URSR-+y^6A%B=#pOBG@BX5mSj*)@f#NylgNAj<~qgH)+;NrGv7hqe>=2
z56-oMi4bE@`RcUS$ctyW*K{ATiJUcTFl+;)BdA>sQgdWc6Un}HV?^{=%(wxwIQJ@c
zY)O9QQeXYnq0rU}{g5!Z_~H$kLxv+JJs&45lF6}#cTJ1lMUDRdb&rhIbS0g(j%0kQ
z)lFkE`QN4SI}lJ=S#h}4ZMHk0QtT-^06}P0R+d4UoD?P7_~Xn*q4tylo-S*SIz31}
z#pkr3{nR6s57?Z}X@qh!tV)0KMEKSs#(0>pUM8m#N(P>4+Uf|}jy@BoS@Jj0F@zdd
zS5+CdZ6-M-0GhDorT-!)H*?mk5ykcn4k$Z)agtm$93k#pK6YZ!Z$wS5k;0!-8F2$*
zVLd9}ECyZ+DuaHA_Or}nF}}?LnudLTbSa1yCucork{I%JYb^(BB%K6MAIF(n4HdxA
zm^5h;{{X^Y*iTowRXsybuce$^dcFgH6ItCZy|AyN!=9|d;0zJ{Mbe{(Pvekd>4Q+B
zo;#4RDH3qgL(AHL>K1g8wfX{CRH&Qih5+vJInpElkn62<`1us(tpfF4x?<PL>0k+k
zj*bp7MBE||J=(ORsLx3VS5{1?ud1i9=0sDSk*BLhb{Pwi#;Nl>B>Zl4%!iV0ffy=^
z4U0x{REc`9_XQO3IcBwRci%8hWq;_cu^MWlsZlvAJ1R03nsgc9T2l;Z#Ww>oiSg}&
z2S*gMyBv{}H>y}1qbE!dFmuMjUxE2;{YM-Se(nfx;AUXzF|ru$4Z;QYIXX0h(2p9n
zZ=#bmtzzAf>pxqTstq5@HTE4%49d6pV_Jz<m)IAv+IY?paUH}x!;A^N@wh1f4#$of
z6`T67`OZH(rAU@XnZ~8DGehpZkdX#H$JAc!E<2qMhlYj<%*ZW-fe0gzBdJA!anzjl
zJnB~LDn%^_KO}%sWs8i+sIEWHaCobq|L@VCIEA<}eC<n3K`y8o1}5S&F_v0^1z1*K
zCxc(;>e{LIOeq*_rF?vfRwlw!v1`{)1qFWFm(kGvK!Hl$!~Op8%{WQtl6;pgTT0(e
zB07^a=!G2rn%rduBVmN*Y2^9dO8@WzkA~P$2xhS<3|^k6;ZgmNx5at+>nEIcw4)&>
zR%{+Dv;}pH<H69F2b8Gx=pn_5!BCVaOF?Cr=BlbD-AF-IM*#phq#)-SJuX{9QM`tz
zM_E&c0C}btKD*^jU2|hN@n9@K3QrnQ%)ywPcG<`Sc;{-Jl%<@NMsGhZx33U2JYh40
z0-i5>gwa)kV*YjDOS0h*(Wx&&YrePxv1S;`ZFY(7>QUDuQ456zCNucdvlamjm0lR4
z+XfDX#2*<RzO_CR+Q?gF-h0lT?jAUN&<4)0@%7gvk3r}grUJYpUZGp^Gulj_SlcFT
z3OKBJ^Ui~e4FpcmmF?W)S0|3omUSO>uUOIwLLAty-wZPv`9m>#BgL#N%zPo!J1;6G
z22zr*puj+EmhqM#NsGOyh9wQs=eRMn`?x<ps;k!w_alO%1&b{&21)k!%4(&|BvUVW
z`Jt3HctN~;{d(o?qcT#;Q4f#xSbl{X#}GLn#gBjWW~zgK2CeYF@QmWz4Q<?8FWt<y
zn!p95jg6e#hH?EAAU;7lzqt6)+O?8LD1ygYv=22k^i{g|>UD_?Y-2JJSh4#={yo1S
zDj-3_KsjJWWg1!xU?W*)ZPwtr{+P4#_E7FJ#}0b%5`fm^o28^<^J|>?=OO{Vfx%Xm
z+u`1Xdnz{;lQ?mWG#i%*9wvlpAfzvBEkP|v9jXDPSGTd7*Md?6^=5yJ1PpJ&*Q<gn
zKd)%dQR^)jODmZ>{yK2&S`V>K1JK1@Xf&BCzi7$0p78os(9GAlLYFx|Fog(nrPCT`
zV!LmkB4#ed_D&l3uioe6h~@yVl4@%4%NLT432Qe5flU#X9=EZI_lLAmU<QM%(Y0b{
z29~|iOUSuky@~v5jxpX};kikY&bR*y=x|oR)M!1s!+oZr`X=s@dC?yz?BxR)H%>4Q
zAwI-YKpmSCZ3b(?rD4hDzGyRP=*M^O{>9k~rjR5jriECb5t4CgoD(DXvdYv;SLp;}
zDgFOC!VfKvICQx=0;2vb2NyBmb>8U93InP4ep<GQqt@$SI}q^-CRdc=8V9GHF%A*-
ziB?uY<Q&&6jq44M2{v)B`9@P=l*|~^L>V@k8Ugqw;>4mG)J`OHVbwu65tA+?lXw>w
zB+&AGe(sxbh-MZvqjB}Rm5j6fLk^r9iCfGvfIEU0;@c=zQlaqS#6y^{()<HE64vCO
z3Hupw1g5yPt#Ktd)a!aNrmfdGkAjn@dW8^9T!NmVngs<&78imB5rKkq`6;-QToJwK
z{)|Mk^AH&q9W(9{^$zgK*Vj1gIRk9N_)q*!fPuu117##Tx(ZvFYQV%lJs&mLiZnB3
zFd!*|GYW3q(t+_3e2}Gbqx#6TUIP;rpbtm$h+UhtYt}H&6^T73kfkg+p}7?02Q%3K
zIe4{W$<GZ5_yMsiD615&$A+&+zjE+1;F@q16d&(0(yg^**BL4MyIV31RU{f-%d|mD
zOaq>*qOo8JQ7K;DXPrjHV(%#lg_$f+y~OJhKiFw6DhD)+P-sKBZ4}HlCli6_0|pL^
zzq=OtVO?f)1s9%;_LHo7xb`eZIVVhH9ipi<F>3iqM6eHBEgHf8Jc049H+~<Likl*(
z00nj;vO$S2uGD9G`=oJNns#IN-nI}kO>%NVI7&OgLvceB*Bp*@nr0*r3#D?x5?c9i
z7>tUrQW)g5Q2dO?jj5T+F8mA&<yOr80*1G!#Cg?{d}t_`U$&S;M>!nU|Npnp4HJ**
z#D8*mdLVrZY-Mhx!MS`tkxlzaw5f;z5K{0vSNo|+mByIIks(QIR37NnY|;vIa}jJm
zT^&KBgu(&Rx&GDYYU|V;nU`6_*=A1XyokO2aV{01mNa)n$x8{2{_Luyu2iZ2|5KET
zgYVk^e;$In(H1uNKIIDA$Td6<_yPyAY5jw&xE49i_4P_FOO~8GdT8)&g^vm=l+O8H
zUADq=^zc?wRr1z$J>D)nq3an>--jNLHq1^MdeP(VzaHs5z3qo|Qq|vGSXKL_ZpYjB
z!q?dy-YaI>xs2Zyx3uJWg=75dOZFA9<_m2?zX5xo6;=~~U?_tk2DoZ3!v2XEifR|y
z5t+)OcjtfZM8Xn8$k1$Tf@kn1O(uW;Jzz{)b^e;!M5kMq_U-3I%UQ2nNx7!X#Q{;k
zJq5*#a@D@e*%V|EUMYkCEGWB-G9``zE~2BO=|w&H*JHpS=~n&icE2=_B<t8D>`Tc#
zh}cfBL;7IXZr$2+)6BuF+J5t9<Kf+<cl8+lICo32r9DcgilSSb`jkaVw+dC1m9r^2
z1owo&3|nl^Y?~2k90`Mh!#mxw;DuqQ3|Zk}rId*n;@6j>7Fc7Kn;R+U>RWZz+H=+c
zWiERAq3*EqQSROQoe*CS9qOp}cawJ!^7s~^$wjl*sVl1I{H(Z`p9_g#q7;&vOOA2>
z{>LA?YU4nnziQPg7EKoKh!hXZH)3PnY*g}P&r(xTs#&JQvf2nnDm!*ODyZdi=b*-V
z4;oaMJ6^?A^A48CD1zutqV9lDjSh_@q<rR|$S4?5^<A-IMYG9GB=fNjzlV#_o;}(?
z`<+sPA<oaR;%N*&ClMC7T?IKgfon%fIvW7C`^H)#1!G{0r6<dm8zRV}2?XBj(WA%w
z`SaPm1zFCA5;yhy+}m(sd<s4e##wx#d-tYH_(C@cLaiEAM+U+Hcw(kKau%2?^11m_
zjGP7;;xmn)U?p}C^i1cEQIOtkn%ejb{lU{l1!)@VcqR7mX*|O4X`wAm!rNrULbk6k
zWH0zv`_}%$2taDwT1W>LKtQmGRZv@($;4h_PYC`!ecW!K{GL1aUm<uy*Csku^tbe<
zH<e;{eEL0=3ISbMgtS4>n($oMUk&FQzAq{o59T5jsvd8Hkcjb|pcv6WM&k(c?d9(u
zHr964szm@RZv88oXBQW7bT74h_XynTLlz>(Ajbm;<>lt;eIRM^1+Q8B<jeyOBX!e-
zwX^xr?r+gD{`xA7);BPqRhdmQHz!p-^77@b{Hh%25ToFKctx&#*jxOAG^jYic~LLg
zlfuUdav>U8`^E+#R&Vw}X09<P67L*B9XA&am|ZQ{r*Vh*;*e-V`0}jOPPUgyw+8(7
z3*{4piG9ULmPN<edy7WM+|z&k{dX$ta{jw_0S!MLMH)6PNS0?i1@a(SctsQ81CMiW
zaIg&|3CzT3KrwBDi!{%utEphS==S`%ij;mBDVb)R{=jCE=ceZR|ABGol-MZA5GEuh
zbIXCx%x2C^0Rf`Xrm>&Cw3(Fa0a?T@O)9uITMN+~17E<8{O^}DGCGG5LzTU)uhVAv
zHx)6Rl{5<k9eg<WY*>6$D3Z#D|9RutWC56j=<Sho&OEEcrYmqsh=Y#g+JwOeTAP|c
zA~x~}C+8K8j*b957<UpwT0Wm1hynM(8zpp!cDy`82`eb;7!+8BF(ZFQvg*<(pr{mb
zHpwj+aB^JMoV+|aAtU5fs8(dYdhtR<QC<o-z?gL`YbB64>&jx!h=mg2m<yG@Medz1
zCZ_1hxlp{)EMSW46_@%tO4eRjQs%FcTs8L<{~lCDfw%;K`w6bl_H7Zq#Urqj<>lh{
zz?jz{@w1KrO0a3-3Aa*4blr=&5=e5C=xQhBZWUf2m_ZX$poFo#QC_U{ff^7(7|+Me
zm<(?8D0NH}I(Ga}P*BA>jHy$mtcoyIo&u4<^JE}uqD@*Lv7+I(3q-a~uKoD&V=HCk
zO_V`<NxFluT0R~C*n@s4h7FSMGjQO8loZj^*5BPk@*LeLh6SvpmYG9F#KWcwHg6S~
z!p)nG=dfWL7)jzg>$mtrQW@O4w*_|RkVLY>c!%^`paxXL&p88T;^K8@wpgC75`q{f
zMt~cBj{2^~=h034XE-TdS~UuwBP{?70}YP7%gb8{4{kxQe2I*zPvmV%4lCYK@<*A{
zX_~`_(@+n_Nsk-rE)UotN*)`v;h!`{RIHjcIt_D=O*``vOA;NOTSOi8c5K$HMsSOA
zYS{3cIi`|p{S_v+m>?348m(Hk6y8#4dr5<V?aaHfc0ztR!&zvESWsq2_zW-@I+y<V
z@2})AX28LLG;GlzMpX!#3<|iLtV9b7(+DNYnXu4|?k=Nf^2@=r*Rq;8HbY7L9l4aO
zGGGW{KDv)Uhdz>yuTG=LtiXn#z7?6c5@J%O*L^M5l|?7i<CKQnjBt@glc|#NPT-@t
z$qSoq;zuywDEB^Fo&%Rel1?~PE(`?(b(E1jr#JfU=3+$#GbGP1EJ%6%x|*6U&4P|S
zQA?O76%<@B4ECHQE2*sEd*>WNeG;v{=(0}!6e-z90CCQ@?jAzV*OIsFtXj&ym)xH;
z%DM~t5e``%N35i<fW`A3E?oM7BaHYXt3cu<&9&xR_NIoErCWB8i`up8zeUnJFB%RM
zQ(HD~?yQUd#85oH8ovFPYL(v)T{4e`_J<Jxw;KR@R8{q4mswc+OT_0^3ws_)DsenL
zm(XjhL|f)AzdC}x&Ot;1`WS=vVj4#bCqRG5vX$)srGCQ`n$q|Q2_qm_NEjV5XCaY6
zR~83D^rj$>BD_(kD;s{sCPFj*g}QrEF$BX;e*T!Yu4N$*m;Ahrx`LdixYK_iBdP3d
z6f|NL;0aJrZnU=-g9&ik?($4Yh-p)%lsF(*O9l`jZ>{N#U&xp-rVZ^a`U~#qlFE1C
zvq<k(`JTYopI@GF=qb4_ZyYF8;U6zzheWXg1J2#+<3<CM^IStc?lWYG=~e4lK~2Fe
z?V!EU#2Ch51R*CUXH_n_MnzRZ)CURcQJVNKaS{z`jHnDred(g;ndL{#J%D90vdgTj
zs1=v;k<@wVe17p@VrOGv8Oca_OJ&pRYx5a(=TqVl4N@721VXlbj~-%|W6%d8=%hvu
zUVjoge|nAtp6_%?XKr(){bB>v!M{6>qZi{So^53%$J)WbWHnZX^k9H^ZCA+ysTVP8
zW<<?!!lqqoFC5i#jmSF?vvs3MB78hUMRvg9h^oZ?LThVluC-cri)%<?p{3r$yS&ZH
z5@Ot4yPEmeb9v|m49<Y6+sw?*P{F6s&h{w)f<Qn;^!r;*Qd!7M*Xp%v2gkj}H3tp!
zj#b0`sC=0EKSeLSmxH~16(;G^4s=~+YkP@Mc56)$WLlciQ!*St;sQ_PM?Di(T!8LE
zOhd&gEE-TT5rU+!?!>Wx17JRgu6+L$%a$F+{x^eu8)GW&dF)4sj)`&M$qN~W{bA7S
zY(gY8F)-!GG{}bL?RwQzJYQ;NYMPv$mY$wFj_Q!i-s~A%>1+_O5lgWgIGt5@n@Y*2
z3Eq^jnJkbY*|8BK6tpfr245Nr(ItS}@S50&#fuXgC3fK|?#|*Mc&NW#N|K!`>3EVF
zPIjSdJ4h;P*<40#qq|24h+Pa?NS;b07uP!gUeUVAQP$GBdS@r|Mjre4^_WCI8x$lU
z4N(rJf$`Gqe<q3E8#M+e+)e%`3s?o(6<d$#wDB?lW-k~yDxFMXMF?_{OJBB!V2`S}
zd`WL9AeM!g#~Rcy<y^61h9Tlz>IKLYzW2PWWPHd$l2;(0X+|aQtglRUVK?JY1`e@l
z6L0JIB;*j8Y!Be|u@=jMv#K+R6NU6U>G5N@;S|^-dQc57OF_J>%K$UTuNb$Z@PaU_
zHCD#P&wxLGk4hpvXNW1h40z@VehljK#zc4H*fGX2)9&7#EPL3;2kX({pm;QTthE%o
z_HIpn%2r3v7VW3#%t+{%*zMKV9X>^cDM)j(PSBq~M*$Tp$R8*_9vuefUUX~ta-7E$
z8c2|_PKt~&5#piK-xJ?0hDx`3d?a~7-gVYxU@{08LX>j-3xX5Jqn#v6v9t=<g}{B@
znD#_wO_BNDRkBU^5ni822Xyz~!^;~k>{Sa~q4A2A7CWDyXJ*Q7r%z{tzc67{aTE3$
z+@-}9kKm}OCxNYcXPzYPM2FQ;mlRTOW!{FODe%cEO!BZU0)fBBCypmilKS<wO2_e7
zDcOyfghQFB7aGYWgIgI#U)I2&HJR2mtwMy0sql_ov+xmT=5Du<Le}?IQfa~&ptt!p
zdL@RZW7IFnFNakX(R?SFr_(4ahOP~jBQgr8WD2PwNUCJyeg`o*#?s;Cy?cR#>Y$T~
z(%eIghXOqBrl(&8-B70RJd=}>=79_N3YhZ#!-v}1S{g{B@gvKh<prui(=dU4Ev+Ch
z+jtRjvvh<mcOSo<Dz@DF`c9mbG{;18E!#<Ua4Zs2SMFc4#~wLxqGxcoNT@#o(2(`|
zAbuoh?((q5s<H1}WtuYk{yfa*W(`yj#tob#TUvt!*)me@TpIUqEocG?;E-~2BS3s?
z2@IOtQ2XrL%z5D~WdzS(Nh1ppm@c|cITx3DUbyzSak2c!hVXp_wyv)mgGop0J2#iC
zW)brUDQ@zd2>q2ZQtWCj^sO~>wkDa$E@KR-9VO#}S932X`oweexySB8x?PqZK4(JZ
zy{Atd2up?<p4TRdr9ew(J4-s1oA@Wv{*9<8reZ6rs~_CE2crG;G3!J>4e24jH&^)L
zlT~i=6c0UrCNLIt_f%B$5?SCV<bi(mW4iTI@&`)+bBLsFVg^Y}V2qIyht`Ng>$!#F
zpEe-EJ;GL006&5U39XZw!_w`0{{Gu%9~5k7Cu6d~Z2t+NPgYTSiCre!&T}OY;l3r}
zLn_nJ!5ncjd4XNBwEsQd{lx{d1{+a4^Cb6!TY*~>8Ix=>@whk;QWy(H1opJB2&lPp
zM*bfNe?A|>Ab-(_6;U(y0TFFbr_Wd01{~P9VZ-S4?_A~IQ#YnfxFNaP|HX-z`vRLB
zI)z3#r^yf4j(wUqUQ25u??#j@BXCo8szH%v2#H{lvlkGoGIaiPj)%BC1Zi4W)|Qs+
zDKJ~KXk3w@hzP&5v)Xp2=x$)2g{l&O^DAKFPGxf@l?jZ1C*s_86eC-ZArbQcC#|fT
zN}k#bo%*ywrf79ka@|pu?pbq>bURRh5BHZZUWoPeiId&6Z^Dq5O^f1qk<jXClTo|_
zDuq`KRo9$TmGFA+J0Cvg=cj)kFwsg0^YNALtvd0!{n$n9_UUcBOH$bRH+Z`Hr?r=K
zmX&Q*&?qK(LJGHP-I}3}|AO7jH5<g!){)`rn}u8Ity*=y_*^a(*HdP{@Js;a-CoNL
zQyo8ki5e`CC*h$gY0i9l^>atIl-_j`;X&aU;ao_r*N0C#qk4JXCBoe9-4BpM(TfNX
z{DX?WVU8g|*^uLI^y7R~R8&YvA!mbfR9#&i*R*+e+;UAoDPFk3*-;X8#Cu0k5j&sr
z<ZnFOgg}j{d+}LayFhG2kjJnl)7vpIo=Gp%TRao^J}-7ttE7^gg5Ga(8oxf#Lz#+V
z$k48xI%Uviu1sN|>5kI3jMI}_IF%~~TpKlZY~c>SKc|FbsCD>wP|WoO!}tC7i<xVY
zX61E3CY<1ULL^|7)Cxy9Way(icXZVVd;1sXG4)|M@6DRdh>RBr2hlr^9?i;9VTx!f
z1vJJGu?*)S2U08DC3YiAx5`JupOXGKA?K>6)gn$t^%KOAOXig5WG!X2nuu>-kNTh8
z_qwZ`RC@Lw5rhZwGQK|wqhdzU`}e_vP>AWEY|7#xN*5MKg&Q$+7?v2sRG!?!!2$I3
zN%W7Bk_aWLUZr-i5EUwlGLph(Pzw;ntJsgNgm8`6NUQ|)IBLs^59hfL`(E%llgG(@
zxf2w7zQb*f(3AV?HdkOfp-l8$boZb$Z=z(SiU`VETM1DA0ec&BlX0=;40wD2(RS<6
z1H*k<UWKELA;mrRQFM@fysYWDmd11DU7s(4J-crARtUx@N=PVto5>Rxlo}tbQggV^
z5h8`GvKrqH+}dYg*B(8@We8HNoHcSw@#3JqlItC@^)9X8<_HK*cx|LH?6q3O7_{1&
z2JOITpX0}4pl?fVRu_}$%Pgq)(NN@;l-R9UaR!*eLS%AdA+zZI{u*K!+rre;4}_R@
z;zv4#*-tINY3@h+h7W9pMOVTh#TZ&&oD)*082U`R(IAulKrfoJ8kDhG&RJy+-px<F
zUgKHROt+~tcN{e?-7ZvaEn2h)x+<RYtsZ@dMG($3z{TFQaf_(<semi1s{BBz$J+2`
zqN#jir`zw?adUA~$u)deLzbgOu@1_LD+ZJjqvyHP%!sN$NM&{PB2X|Qz^CJ_00@R(
zoc)aqO@)Tw<k*23yg9X`yuE;^3c7l_vh(Ms1IEX1+VK<T8$p8kPa7hsTm@ji;YqS~
z9ENw$$p$o()e~i-P*L$Jc|_x0P`^OWvWRLf3watot^0ytL;70|lp7>(ey!odh6(T^
zsR%4y`U+Kf`S<VsJT)Nn4qDn|OT7<NdSF)*ID7yVrz|(?8T>1}k+K^a2q?<sQSNZS
z`BbJxOF7RkUseX6V*mugO-`9<Y8C*W8Z!|{OtoYfN0z{&Y@FR9>DEmA_;IGQqv~v+
z5r6&~Dpa*N+ldW>#>U>m|1k{4117tq<SjFbg0z)%{uU*5_YwN}0F!sBtvxA)RGLv6
zUkeZKplg63&%lAxyNlhl94L2r(V8bNh<<y?%d3<^f%HVlq4K(7yAWB_b>TzWgsfwR
zRHEGqTLq25yVX&}+<U{OO;ie5BtUTAiWMKIlzBdz(Zv4zL$nk|edDHR|A{OWRaIws
zc^Y(=9r6t!U2L4mWZe&b<i}5>qefkkQwg_(Xi$u&sAxz5Cs4Z-@ZcD37Noi(tAF@q
zmH-)`rcpO!Ly6?#7_o*O=ztg@l4y97{Nimyw?yUczyFH;cDj2&vp$wP$RF_O0q5rB
zUrn8WOJ35WM+f)rWkKT!CZ^<d8{C&uA3RtIp~=tNh7&Qv5!`XM0MC7wu^C=Nbq<eq
z-!@s0fX!^1I74oPhPsN|&=DillT?wwU3b`rW1QZ{l<!eF=Oju0B!n%oiDi5f>wbLb
zFd@5P4p+aJw2dC22GM>{YU~lhIGc{sF!aK(lXN8oIFKRM0L-Z!FhV%w<+XsD%XfhQ
zYTc&Zr5c3vkjW(obVyWwr&3P3kd!b)>+S6wbWA8d`4(JBF}Vv<svkszwdWTDEf(kJ
zS6+<&^QT6dJ_XBBi@8xmQ<MZ8@17VFLB{GCi_sa+JerK$9WVeOY3ujQEmZCwv$H9S
z>MnK>n~XWk2*ZbuAD{W`*|&*QQa})@&%w2d#(%?*kylxeS4S->G(gNy5i^+G_{j_K
zzR!OCsKECJIb$2oZ}eW!8lOK4pAza5n9K<kL%foMPSWRx;~LEr7O0qtc9wxb&z?O6
zfkF;}Sa`)6%N#XWrw9TjIe=f3GM|6D0QdoKLkY#tFxhMS2Q}^N{m-8vcnY$6&;NR{
zdPtI1!SHOV4`r2*STaIE8FcR4AjLNK%b2sgxs>xTpW8t8UP&W)qE)g29hyv+L&%UF
z=t=?z^v^Y=K#B{G-vtbaxnaAHCQ#vHm`YXqSKymX=a2y;?M8jGZ`)$0>SJvc$0&?q
z%S-f~zcnd}fF?XO1>e)8F4{7XFK3=OQ{GQ~ykYa^o3yBc-wZ!EO^b3uAha!o?D8gR
z%lR+L8Mu*&mGL|l?CuBKt76lS2=i0wQGfq>QU5Pmq~LRo!4+sbb?1NJ{<D*)J0GQ{
zDvh?cJUo*`u1fH^(GNf*+qyN|RW*j}mH!7oPgi+x+1K}FpW>Zs@p4hsATt157A)Qt
zLLd~$GA&52*Tfyr>!F6h`^;`9CZ2~pAo*<VJbHVT1C0IGd!Go(_yKKvdnHfjdt1rV
zY>v}(0usyLLm*oa{oq2g5$Kkcm9^E}ts%sX^T&9CDF@Ki<Kx;I;I`ic9|uE!w}bXA
z?vR};!IuXbim?XzNHK_owelO~tz|iSTefUbzDF=-o#9XjKJJ;QlZgOu`r5;WwbzBo
z{#y8)d{)wIh2_eX;9%N-6<kTmO7Q3emN+-)B&+CUVy=D3FqpO|N)+Ugrbl+CuNv;}
zT$aQiM*+N@JIZ0DHYBa-`S-2@Gr}se#WCha`ql7oB=cH)25Q-E-MY<U5*}p(7z}qH
zUIMW9oV-{}Nf(QqFY8){i6M2Wf@%zkj({k<yf#4_BNd+^&JDl8!Z&*fM}8BwZzVso
z-uBBE%vxqlo0b3vKq7&r&pbb`ukt<kRv5gE4E}Z4r8<>DpqL-U0F<NS59Abq7LYF9
zRU7b3<sk!LZuUu4_nL_rp>S3~7Dk3;;t;OSf1J~ilP866nkZyyVv_suqZqThVZ2uP
z9?0tniHQQUr7}|u;T&9kC-}6b%)}AX*_7Eyw}!YRN`3<a=i9HFNw?BfGwb%}J&0M!
z5tw_n1|Ak6Qxp~)X_HD@zi4QZImQ7xr%#yzk3H?X-FP;?Lb9TUFdCx>8~L$Q{Rn4j
zma=xw_$3Sc2ge0|EGRI|dD7)=wXu(BsQ1}8$_zp7)f9vj+lCYJ=@+~KJpVbGc5B%Q
z<{t{<qt6{#I*)M;P_f3iWfZJsAJ5XfbUM8MQ<j*+rcP9Np}Qy~gVwrm>pOLw>(j^Q
zNQvv=!%t{Vwwfcjj@+qm=KB`skQvJ`_527|6H@vyxU6Zg(S`S$+k0#KsbAg@?mSf(
zEDd1vqyoS(b%<f_IjY6m3t6^9^zV`tqm7Q2d2v6w$s6gp_-wF&k@6l0B^h7pMq6{{
zwnBnfCnt4^Df$dPodsqBi^v)LU{;ZdumQ7#k^7+P!Lrd?q<}ZeOB&i!K`<5N^v)gV
z+>dEM#<71=SJ3to@aWAOxc<RyZdN=qV^S5;6(b%KBBU3@|E<Cl{6HZ&3~LD~pL{D(
z!J)5_Q-<KHRXb7}MoGd=>Aa(26Xl`(c-|nKf?iBqfe4i&wL9r1Dte@@q<Uv^=c*As
z;XHbh?s=`d4!xC{I_CoJ95+xZ)vjT!l@qG%w2#5jd;*#?gSMDeRaSDyI#1X_P&FLS
zz3ubepq8l{;{1>d+!Yd&lhgcH(zSxtDsyQUPEjz!-8iHchl#Hj|0RKBmh3O?BsQ(E
zWn6Yc^|y7Evu|pO{PO%##O2E>L-Ae2cSSS!Cb5bJtofT(8um7nc}Ms0UQNx+x}2Iz
zufD$Sd;I<J*Nj6dN=qBGPj|33=Gdj3eH=~s5CMV-#<|WBG`S&!6}M`Q2zZqkax#-9
zfIFmuh)eplLC)+*cBA~JTV+2HZ0tCJgj(i(i5QZR>2%GNoW}PPO%o;dKZoeE@PI!6
zg|9X)iz}TuEn(T+U7b#$HiMko9?a2+U)paP<bn~TT2khDcjC{ith&u)xT@#OOqR0s
zu;4T_Q5Rc9?{d<iGn?zC4q0wt(OU^s#1X%96*%-%;h-k>w>ElAfTtmL$$H}lp<6n4
zV=^N^q987SnJ$=PSOA{h|9Zk*>cSm^ggY&-&Lrnrm#Ax0X?jzUlyBIt@N%O1%j@gR
z*coWluE$7&I&+10+)O6ngHg?2p|G}{BuVBy8};X`#1c#(4OQ|ma`Uxo!$l*mfU#1@
zQs&G*uoQznpu;o-FY>fUh7bMW-*(^o>#$4_;HfjiO^SzXAHfjKJjWEny$HjM&Sw<x
z2s@2FM|Lq3N{qb6>%l}w`DSk7a#h6EpQA;Aj@+XFT(~6Gab9XTHntum&dbCtEo8)N
z>i2&URcvCBPA~$_ok(#u>~|=9HtP~Qzbf3?>wd})@qRr9OktYiMo$KD+c^yr*?F{N
zf*o(q{*=creIf6>b4Yo4x!iY^_#Z(@>)s>RD|4CAj5W;22hme2Oi<|DdBAgom>C@>
z+cud!W5&PfdhMiJ`vn{E>nor&x_St4;ptAZovoStf_>eFjvU!*oa1`#FsMBX)E}hP
zgaR9s;C=iXV@u=>Bge;ZR>*eOu3jyiyA~dw{gkkOV=~atFdOSj6el?+=oinRNKV+e
z@(18Q*VOis`k`XC^XFB@Yl)e>yjA?rT#?VfB6Xq*b%`y|XI8eyavcG7!|69SKXobz
z6~KK(ROTN0$TI~dlO|SX&L_V``;@oxtT6^<<cxmy+{MPe#13wfi^imJ2sB2AOD7<_
zJ9g{~Bc4!IU!ETK5K*X4sWNw?ZKGziXG@R?1~-!cc4$RuN)Ozm{^Ip(_!Qz*A?juj
zx#5_;V1kRNsAf*3TfrI{Bgen44HSH1#oFqsBRG#LY(b7Ranm92i4lBA-s6Cn)4{QF
zYd1Iy{PZ?wLGgZdLyGI%Z||YCIn?8qHmt%`1K>DgurywaKx_Pl)`gZ+`|*PU&z`L#
z3iJMimKQ$;o>BfWMm>hB(-YfzG-J^GApK6`r;Dr}b!h+oLH3XLFMdTF&29c2EU=yC
z7%aqULISO6J~d~h!V+steQWEpMq|g0?a{q^f$v}!dY7QEr&=B1j2iVPUGE5ADgGMV
zG<P<sY@`J*K@=E-X?!%+WR3*TmTGcpL#dDr6<MAhpjZl9!*P-yGb=jmNNmU_xyPz$
zpri%&`IpCrtM~6eb;5mOX1!l@M~69s4YGV9M+wPBA-6K>=9ZFEi??mvS`hUI4Y$|s
zPV|+>w_CYv*}ON(PHVE?f4j(ygyp59*&X3IjgU*2OuoC_bi86}YUKBIV8MMc2DSBV
zKE-F;&s;gWK+fFHG?WEW)huhx8Oz@m>QO=fpHUd+rlyA7xU@90q=E)Rg-e(qzUMrv
zk83WyYu2dM9MHN!?K%M=X_qcOzyW3uW;B3B{u$Q-KtFpVlVUE)NUlD~w{A6LADm^t
zV!_)Sc=!-{#B?#}+9BW5hu+}xIkmM@w&=1~^^|Yb_hIUnJ#Cw3R{4G^Y3}{K!;ZEb
zQ2#NTE`05=gc6%4+lfJ(lP5znupIjFrZB#D)27?U>+<{Q1uymkC_ph>;-_`!kalUw
z*i5tSoJJb4^iJ8rbEDK{s7C&_HqtFQ!Hxa;dbc3u;ltK)a%&LSQbka31{K{NtgcRD
z1if}7p3?lTya5LTiEca@At`3JIEZDA=H^+P`hE(peq2BBZ)T4Xg9gp4x6EugCsQRd
zXQqj%X=mS+ROPQalz;u|e0bb_;m9P`%Zd<p7y2(QE{><KT5`-Rq0<)ssWwafSh$9K
zio1tVfN*c5QpvxbT?gzDox4}gR?%+@39?=_+pvDm&6^=B>9lP0Zb}R8J$L}D&LH)s
zrk04#rv(Jg$Hym+A3G+z2~pCj9A~W5K82zax8e&_0o#}m3C4=FkgP+u5MNo(ncu@7
zqEw$#znn7^Xalf2GRKL+yl%v%NR5cd$md$+!3SfbXVTSAj<7wIm(?ReLER*L$|d>)
z=H}@|K=^&t)}YeD(evs<hZIlP@3Ywk;T8plD2_p%*b&Ccl|kviPn%`>S{3n9T-~6m
zeto$vR30$kWBQzlg6Tj}PAw348-kfFc3&tSA+ZQ4^F|W4Jhnt~4d~u^t+r8m1CMhI
zBFx*shScm;NE!0U|6)wuDiG<{hb+a)*f^X9otD=83-s)OROXha5JJ{Eng0o-)^mq2
z!Ra?-?Xf=j@fr4EI8{QfMejEM7KE%1qY~OvCcR!>UbeH?jTS-T>$f-`_QmD)sTNni
zXCpY`xEBdP2aH2#$&njD<EjmP`{m0u8BD^p^w_bkluVha;B%=JZ`$k4vnc+Lij*>e
zDgF?n3s6d`$7h+~weja-f4V2)XdzTe(<-m2EZ=&F2%an&a7!2W%Cwo9wraRtTm@F6
z{Iy+mzNvY@-m;rln~glAu5<hG6K-Yx=nLE05oFb$2W_%}+q|S7#PX^kwtpe!<dg<g
zk@BK<KV7|vR}^SkwbU@RX-j2qBxbbk)iNl6zoz~ny_?!dnNVtoY++#U-T`EHUM+p9
zGz-oObt`rVOgEQq$#R<h(hX*1C|i=%)z^_b(h?$RfkPe~`X@@^8FgrshJd=fl>3ij
zdDvWwplCuoLG}67&s3QeJu`7|Tg;ufabp}G?MP&zdKxhd!rEOP2J4AdQIzlT<7hZ%
zR#2-l!LdQhj#Hx9#Pc4Mz}KGaE{_4Ctj3$)Db_)wh?3*U#%t8thLk4r-g%&fUTjCT
zRDVWPM!K@xe{Jo^bJLng@1h#rt+~leUjv%}nB2hr-_H(T^K316nOd6-S;9_GoJN&H
z*ela(GJ>+js~(XSPGD0fd$k1cd+x&&Q#|+^HlX(>t0N2%VYaSabBaZKYHAk}C&M#{
zCGCOHP>Jq6R_2aF4EOBlz=;!a!^_u|ojiFm1YWJvDKmn8(0gj(`$eUGUS7y+ANwf0
z3#NM9U+61tG66j373Bo1d-U{~U(zrG2+LcA3ehN*rDo0AVwjcu=oxKluKyt0!*IVn
z$8R-~9mdgwv=~1Em%r45jV@yv%HpfY5xIcF?XpBA2=$^XOYsCx3PhwQo|cl5!rGSn
zV~~_e5Or{L!NT{Ns183H5)u*~oZN&;%1JJbaTUx?6i!_wlrk2iSHY+$t+tCUkNDvX
zSbQcQHA|q_LTLpT8K{AE9zC_dfvJSgYW5*?GDidDLbOJ;+(p*_8iF@cRX#3K#O4YD
zXnI;&D}#Sm4Tl7XqCB42s?-Q30LB^;vsnTqc5dw{rV#158CmG6n!kcq349bNw}0Qh
zR}_Foe|H`B&#ie_Q`u&YnK^T&@)T23Q@Z?Gnwpd?kOk0V_#}S*jDLc^UkhMDBQ?#f
z{n$yJ(zX-31SYcQmZvjgCpYt%r|NvV)v$4*0v$vUmOS5p^O!ZN32GQzG9EhT<rVsx
z0!PUn{wY>~)wq|C0Hen5Pb>BftISX>y!!F%^=qM?oU;VAmWq~EIRxMw=8l}5orSZY
zelo-&{J0a77dEpkEG%f5e$7ile9ih3dL0ykb<pLUM)BAor4dn?vCKgJPDaKiYMG!-
zO0AeK-t%U)Gb99sM;(6;%I{%H3XcOZ`e*VN1Z5}ZzR%bXins11ejp+mgkh=!2j+ng
zDHJ&+%tLq3rQ-;FLA8DQ*506P{|{GZ9#?bvzVTxQlRcCrYuVC@WGO1gUY1G=sVpN)
zAtEhG%}m*&DA7=%qYYUiCCZW|YbjYOp-@ODlGN}0oXpJk^*eujUtcDtbDq!h+|PYq
z*L7d_GM<U(yQJ1Yb4?0HatDw!8ryY*?6VK&D5y5k2v)UM*~w@u9G-;NQS7rZ+O}&)
zz2LlV-4_5OP9tfW0CQs*<n#C6oOyJG3A%3Q$z=_+9p~oiZkDTInz)Eemz%WI*DVp4
zN~j!qOh3+~99KYWK0#4N#&9r2vJ-G3Ii`5Vs!M~4PD&4z?%joTSK}hDq(U_~bC)wu
z_`2mEp0S7*Z4#M2@M2at{kwcy&6P92i(^Iu`{$j`1AUD8aF?W)6Mr{5JK<ayI}X2{
znxl%!$^&NZ*t!+l{^dl(ZT8uhFRKk7ULEiC>RwdRpq(T!V6Mcpxg`Qqsz@~^YKI+Z
z2WxU~c+s=3$kSqMO4PtR0I6_KI#zX%dk!X_Q!%!~a4hY2`Ryc3wN9en5QEhv%a_}i
z*0C{gOUcr%{WF~TpQ4d=?HhBB!q@_J-&jP4)%H(lUqSE@vm}JL{TiZTK!&aEajh})
zp#Lr{>Cx<;yLrrM-W=_afS`p`o8F{%4`O%H@<Y^D#sEd)ScqIjc##V0-I>peOm68x
zoqK&+MCwN43ljq0lA92GI!mp{S-I#adugl>9igs$kl;t>4msQgA#v7mpl}94fiVP5
zdyTLYAwpEd5<n2kNd6U0TSlB}W8-Jshgn%N2pz7XnGP{lX!wZ-?F3Z>Hb%ps80cMk
zvol`Tq@RL8yna0vKoigUJDO80_v>D)g@V-{GscHb15S*onc3+xXI3EO=j?+*I*Y`6
zi|&SHTus8ynp}rB>8-3>10wV+-AfNF9)6GRF20WlsZ6Svl`%{FG^L)nzX*2lpTJ*G
z?avAe1;I;E3|jjPK>zlPd}yEX;JsmY{a<t7jDv)+=1E$xi1eDhgrGe0?Gi*l#sEBu
zFSvR+5>^HR*sbzr)M2G?Ptcm<hMR`heL5Ei5vY(+s+kYzL7mpJL8W0a!({iQ=mu@w
z%EW}jhgSP{z5H1jeE8jeN>Xz*b_oU;0#u;V98-JO@B^b?H^Ax>g@hnMyfQWar!U6e
z2wIUt;eZkV9AZqIO`_mCpvO>RkT=gq&?sK%3;(CWRC-SUq0eYGoSL8pYah;arR`-A
z=;^;7c}VeN+E1=@?urQFfBtEGANVUedfK*c&*;Ngo5Zl|f9t!902tzm=k}&e00ojB
zsNGh|J~=0T>&A`XtwO5}L0+|0-0DtQTtnQL_-O}IgItIfv&+w5KvHhs^l42-Q%e1X
z430iota@(z1yh5)IEP!z9(2p$uAhu@OQr@^{<yy+I;wv70AmaSvs<OPHVd7Cs)OtU
z0GSAOfTK+1Nq)taKTqZ{@$SwoTOOymyWlMgh%5yD41)&W9t7|j7%m{{%FU)Sw*Bdp
z6hGJ><8P(A*)2CW(bzeZbEG-}xQsk&j(r|`{0CVe*dVop@Im-I?{i{}nCHwqA979)
z^RHLl@nOGX|Aym6Gj43Mr!Ebta${mAds6<{vmbFHxN1u;al_&|%!ccbLPjGtPFYY`
zSeO+)3MP;Ht53vxa}J#<e_VpJo*KxJrvXK?LyL6vZ_7+Kyg<_4nFr;WM%nJm<Oq>o
z3~3;x+0k8ZAOo#DhHysMmw-ZPe~E3pd-nrYh46OFFfu&cgH5K^XDy+WI0~#e2L!A7
z<}l6(cR42z?z5b9i$mQeN=z#ulbfS}W9Da-GsUzK!R8Hv!k}iNJOG**F=U9M6nV^S
z8VDp~{^G*sc=94M{#yRv`jsm$87O6*m~Z-8Fi{d;k+FjrHF*wt-OtQqPODK>P0jlU
zu^k$ZH<Og)3x8-UMla6!`M}HGyop16nW;JHtG0hEP(<CxXFmM`;%F<_Is3}}ZU1Ni
zcFqLWfjXnl>>M8tV1xrwF#|UN64-A<2WAX<*L<q3PCjvB6x1@(1|(7ZjPS6g%D%i)
z3~3SLO%j@}%T)A(&>uhr{h2?B{=q;=MFsNV0oW(<B_W8+R3k@YAv#LwiV$^d@`tNt
z$@IL6wvVeEzBa?vf0a}jn9iIjxC8zI3x~Qd?+r-;$@Ai@G-PW;$j@wCO;{(RWDEws
zY+`RHZH6pH4p|`OL5Xm0GTN3cVQ6@soZ{TMQL@v~c^Fo47h9XgN=zS9<OL{F{3YeI
zTp;nc6uFWwkeS+<VtUOMfK9=4=L$-5486%bRUG+kJvPu`nCA^h@`$k@%T92P;S1iP
z7hz|j_-nIYO$>4(h}An0t<m>=BN2c|heKi>$CdniYXxC;OVrnyb7~PkA|lemV}Dnw
zk@dpnca+jRk5OBE^~x(|jP=!3{*eB8$Em3t6lDg$BC%Hp!LpR&2#kYtd*;lUcqpiO
zB#AKWZ#y+HA8%b_Ev@CO4S>ccF_)b#b?WQ>zTMF9nA-qW(DA>D>xaQ@pi4r^+L7b$
z0Xlx^x2M0Dq%MkHOY4L!qv}an0~G;($=O2Yx;ioCiwK<4YYwxK6)zVS3d%n1B?POx
zheuZ_4XPg?v<7&srT1FsuZ1w9>?7g_G-9e(DL7-!_MS@03QjGrU-={&<rDX`Bek(y
z0M^Bhm6&GX6$+68WlTeyYLGohDy$c%o-;QL_!xmt&=S;cK4LUDy<@x$m3tbtBJnj3
z*}{1HpbVxAhba=m1M^AAOLL?eV1=<lkh~-P``-FclxoJ5#pXx(^l}T-p~y$)&V8!(
z`pvp+xUx=(s{e!;ET6W^-p7pVj(@~_^H#s!IRD&{CXC(~V20SOr(0hPxcBlQH$@EB
zRZ8Z8mqjO|fIms73>%Wb@lF2%zNn0QiJ~iTCL_AVUZJj#=?aVs%M-*d-1TB+h4(y%
zX7uAk4*h)+Jx@kh^nlym`+MZf;uW?9>=`@|yLIWZjwr}KsCkp~JF8EI3ZT-Vb!+S$
z2?zJcaLqpS1|Xk3aNs~>sQ{uvmdG(+s|9V}9y4ByWz%=Dp<ahX=}k-lymLA%0*(40
zF><O&IRWv)(3Ej1M==D#GgjoV)WEP80;Q2CRiRum%Yv(v>ua}c-yXUZ(qDdwRdm<r
zMWYV1`-u2A>?+g`#}0`k)F&BE(2}FHrBh29zyGwNA!4?N@Ezj_i}rth58laKy`CI;
z^2^q$XoeqgS#>&|bd*GrO&gub)%Tlw#xyfCO`nC_5KL0y+}I@cqE~0_^HZRD*b$~`
z8qm&k?;du>)@f^CTSgJhsO*JGC+#H{lmYxW*Bf`1>U+(4E-5TCp0J&%nsfJq;ljN<
zx_<rjXr1m7^Y-jc4iYb!Oa__g%93)dsghs#`5l_Rf7D6t6e>8>a>5%l*Gt5v^ku|s
zRD^qkj)}&W-d6EeYy;LP8Q(yck2A1)%#MXGg+)bJfI+C{$dR@iHW^DoJCZaK6S|1`
zSO@~_%13dmgD$u>Q!;bjtv_s-_v-^5ro-kHAAET8W_*<e;JcVLRSS7#+N}gOC~Rv=
zvA3B=VP<AuE;gAZy}oAOOJ+n2A3SnoL2j-a&VU2#?N=kow`io{XReAEP!YNVkhWru
z>X0F;ps+zJ90`-e1Pa_QmFGXO9<!bIhhZ>|+L{9+MvK6Y?O}2$Ivwak)}3hD#qkRP
zgUjz5g|h6zrjeja6u@|mEQbEGoac^}M0`;TiAgH6%ig|y+k>7~;3lT7W!M5HG;+Pu
z(IC*GWQUc1coeaas0&lQJ})*gF(Cq=cqtQ2ie`9afmHD^zf>0e{ctX8IgIpb+4rYB
ze6Oc{CF+JEj{8bo5!<wF_gK<VlHDt^u9*_rc}E3=RI@M7>x|J`%pqvaz?~{8DlmPd
zzQ(wX2Q({na3J>2EivaaZ))2e*Y0f8nH(%wXa!Y~+o0d;q;p)%_W?1VoB)g<I|P3;
zApI#!2aJ3ja4Hv;W(QOqRvxxb5Yb>rGEO0f1X~CLitb4jXevYrF8BnZ8{3EK<Dmmz
z^_B2QS7dIm@cGIuCwh05VlD3u0z+aASO9AyFvW*{8bm9KozO)CB_%<x$=^SSmmklU
z-|+*)2oO<OilPC%4X+*aFUqY;FOb!|TP${I$s9Zr$#(R(F8H!(uCtQhFEDG5ii#4l
z<c}=BUUws(T5h-iMM1G4j>uh2-*>H0F5+|3F?%50OhTPJQDm}?f9rf(VzOlMV$(pB
zP9dFRR(nPC1}g$~C<Yu2ROHf{*pR^Z*+M>fOl&ObdMkw-90N&x!iQA!D1mOIP3-=P
z3(8WMOhMdcmiT?qZ^R`yKy+?V_Pf13rbsjPJC3a@xjj-lAi$U(e>qWggl;8Ie9Db;
zIA^$X-V~LPCxIASLh{o?RRu%Vn-J1}S(|r`Z6*GSheQ=d@vfk-IVh-IhYn)Y00Apd
zvAn#TlofSn5P~miEE+8ek4*1T)Mt^{k>)W%Jxd)#*vjzwd}Peh<;x3++g$174daAh
ztK=I~?rBN%rb^sOJ&oyO{bWsvx!~G~UxA9kHv-)<Ybj|>Gq6dHo10tAq0*?G!QtV9
zl-<5AX_hPW#dgQensm4WwL`w4p3g!W`8A|h$3GzB=X%$4oBWjQ=pHFD?s>W4KpSN%
z)~ADUcj=w?V*|ka6a`^wA~fxLnKIvl8POb&Wjl1hB;$t+8FC^ydD_gG<kPz+mD(&X
zf2l`>n@{W9t_k`hM(Es3E^uIdqn2$TE9Uz(Op7HoF>MUcOy%SmJ*cOlf#lxk!C*>h
zzlVM^-I7dqthaV6Ox(4;#n*~`z>s1MQBD$JgmNB1cU{%<@v0M?ot?S1c|iey>hmg1
z*({dUf7-w43rCPN@slT!2GI>v&Kpsdb)z;&7ixFFOCZN%HiiI|MlKyq9uGxhgmg2Y
z8zDA#4iQ1VtffX~@omobzkTP9Ov;|?YiKw(!p8G^8m!(g=i|8C5%ju{o>B|zR(7Pz
zS`Uo@@eFYddo(p*wAso`|7N)ZebSg@2KI13?e((@>pl3p7A^CgYq+1y#?IxIp~yV8
zM9L|%wA>{>yj7rd*4*My?c7{Sqhg*8*3AF3*&glD1UWnWqkHz>pvhw&_@w5fmWqzo
zqdPwxMJf<kqw3@98&zX*&UV1{3mT&<t#4lVbA3(EJm;FpLJTmu&ktyNRI{R=jD8Q<
zcofk*G~)3(ayp^0CvUm<r@Xe!zR2E%nix82R4u^9r|3Wa5cOQi+qX~2X+&=mKW47M
zFXZ3(f!=cY32h!7?xNw#1Zd9(2wu9Gq?w~Kjmt>l1kfJ6l<!KC&pk@xwOgy1CzF#y
zw{G9QU2V`9ST~Od#v$1WUQ@ItIPts@F~fAc_;Nq2yNZhG8pqt@DZH=Qv}syH_C`iJ
z7S;>Ya_m*<=J4uHeuVn!nZfJYDH#OiwIKLyIixc=Wu#r&1r`uBknx&@5=rF1kt3-Y
zE^rP+v?hD3JV-#E^7L7S*IWTT_<!n0;;N<f4#+B}$S@O}j{*;|I0RGnM9=~m{t49?
zjXyr%^%$r~VP`-UI;(M`f<!lL(&3OVrCr3#2tnE$P>@8orFd3K#NdW<&lE}C)Tt+_
zfGg^$C#DJ?$u@mR#ls!JVN*z|&lB4(niq!87sSGb3u#-Q*4ijMFDU_Gvz~vVYEp)b
zUQB6YbFV17qMc743|L`-ox-QW?%b%NS7X=h<TGcqJ47`P-LvP*w9oD--E|I%<{wNX
z=G^lhc%h-sdXL_U*54X@>wB+juC#i*g@ygESuM<_C%3i#-P~$O*Hz;(wTFi5S{<1^
zv&G_paox|gbZ*sPFA19MHa<^jXxGAyh2tk=1-ZZa_Okxs=c>5wH{8shC1u<#d~NjA
zXH-x3{i&}E#%~P$`rEgH#E*0KC!Ts~AJ*%2!n=g$_F=X>O@w2l_Ofg5Rp<y413qZa
z5nUZ`qO8O*P6`IWMKw`F?Nm>}Km4!OftQ=s9|K?OYHeTY&m_u0N}y6GBS=9I>CPj)
ze(Re=JtqbPC@fg9f{D?dsV{LL$3@BCj|rU_=!8@!SX$=t#2Vc%az!PtMxMRot8-<;
zk$pCw$%zl!e{R>dt@Dg_C%KAu?3l57^+(Qjl;JpyT`QUTU}l^}Vj20q=DR(c%)5Mv
zSD@9E1CL3J9S_|;ZjWVs!ok}+)>yA=`pype`xA#55LuYjgtGyAzI-*{u*{NqhXzaU
z_wCiI;hxBxO|V|lLMzT~EprWB;Ot!S>eUD$6LGdXDs2CRBZ-NS_;s8;Bq;(%kGB$?
zua)MKVsr9KQl?ZzDLL)%;h{5!N=l|*%V;KP2%+hO>r68ME&Yl)39j(o%N7Y7>aYkL
z=WX>Ku^`!Zw!F^G&ll{ho!t`%0OG`~AHP6!P+AWPKrZr@FAc^IG@3o^w2H<}@qA$O
zgsO$zN#M{PyK?^g5)ufxHJoCDye*|~0092x9XptvB2MYpm2}%-&OzNP;GQrvOIEQ0
zUc7ZnQ%g%rSNGnXJ1aPj>46H-b;`=hLEZHx)y*j3_aX=_o6%en+&Mo*(qO?=;$Ux2
z;)`syPxtQh0)NR1;4F|FGZt=22Oi$W_PrPhFEor)vKgFl*^=YPYZO>kaYCKHcPw57
z+B4P64C(Ph@>YD`%|0*`GHUBKrfo*dduFzjkPJbrv1<r(Mqz1l#Xu2fB+5yfuy*z#
z*O8*x{w%iWyw^C1n$J>u_u<36vfm`b_|=odXf7)&G?f^&ZAVXz19t(<7+G0ac&%K6
zHk3%5cOW0d4{RMJ12;uZ+lak;w^O5X?NeFp8VWuYoW<P>vjQ+enlqfAVs1)m=VqQS
zu@8JpXM-?T)r=4C&hwtc4(~dLh8-vpa`^YppRc9{5^6S&LY={bx9r?05)KkKhcurx
zWO#U01O^6z`jLt-&GodR8c%?V1?jz1fr~N0IpGd#&*H3O2o<>|*RO$rL2FfZ2RRRN
zv;alH{gAycnzk1X4K2YkL-=B?P4=ju46f$pp3?eESD5}xVdfA%wUs1pFG%i0%8iK=
zS%|66egrRFwv2g051?jXyhQRfRF#$Sk2rkd#43V2BqmW(mT?yAX$;2SmzOW2Kl*nr
zoW6&N)R6vJ#l=QY19Ta1e{y&2HdOK-c>~;yrX-z!AqX{3IkgGn%EuBceF{IogBUqK
z)1N+J$P%0+Gk5@~&5(m~!z)XLe!9Az%|*@l@XD2bc-N9DgW`yTTV75v&`Cb;hnxLV
zeSg;&R)QY2oUE)eNFUCEkbb15dV!FVF$O9UA-K{?kBpJ{YwRK=2Jq?Fk?d**`!)AG
z5Gx&8FoB$<(yao0i9W)}(>mUCkM6t;@1N-3i3WgxEo53jb(R}o_(uK!lCqCB5XH+6
z9z0+-6;INW6!&Bv5gn<yU&xV}C8_4xTVgUD_c4DzA*m(15(t-?+6zJ@#A5h0m;3~o
zl}7BeeV_8`A>z<uRDjACQO7}Q4u3?CFa;8<D4mYhE_EY|QQ)KR9K$pKswT2lj=&s+
z1t==GzLPH2C6J}Li8lSni-@t{2#}ATsGiTYN^=pIcN{u_o62qZh9=Wz+1ra5iI|G8
zO&w&G%dW8l@funL2*n=D771G3aB-Q^s0i^lb?Qwb)5{kxAd35Ws7sPW?zp4pXv?&W
zXJm)izN&lRcM&B2wziyEDj*jBgZLlUVp<WHYB-fAPppqJ%m8hpG``577{p`a#{EHt
zD3g_~S)$UT2eD_af(4gQ_$@+9l4ZPO&3Xg)0pX|FRa*aGu(mc#7Rk3$W8OiYAsIDF
z&W|ulvR{RgEiv)?yW14(SniS3zlsPU6QWvic8=jvh<RoUYLH#~PM3KL@(z!YBXw`7
z@?^NH;uJ4uW=3m$)-Twrc!<cXtK3;)zLFw1cJb58mlu+8Knk8E$sP*4#qpTryiSZg
zK6+H^PIP2sF_ucWmx-Y~tpm4HahU|wptGgpm=92i=m60tyAUv_Ls|P4C1;d-^e}WQ
zy#d3hpbBC68FJtz`HA@~bF#9YbkyU)@^aV=jGikqY&KWCc{9a2R`-2P?JyS#Q=yH#
zc2n0j%?jsW%EzwvyuMU+g9lt$zKLX5x!!lIj4fQ|AcoQK`*pt8Z=Lr$a;e`JFCIa;
z$KI$Zf3WG*SH6g3IQ-YT55>sI*rX%kJwmVYZRv?-RZuO&%fHK=SaWsh`0)tA=uHvb
z0eA>4zN3P`By~+Q&vrj>BxA4aYIyzP-8<UGyp2B5dBHc~?W1$Msi+u?8iff^+wQ|A
zfOA%od=hzsNi%5{KFT*_mqd)Cv62h9tW1A?ro;$qK8aIIVxsVqAnfrqxQx6!J^K$D
z#I~<bP4R2)A5I-64g;-C@r*sY%7qGepLBTq0cA?t=lCA+3KPCUDEqhIYEU>%{7qip
zh{eLjTzl}~ob5mxYiSmT)I)ibD!yx`rs?F#OSlf8`?z2_OK~sdR;wia9Q^&jm}VtW
z;>loGEtvf3`?OFomepKc>C=&4I59pud-5wx%4o%FA;!b8ykbNloqY=y<RDT*h#6mP
z!z0Vd84AtHoib#Id>UF+F0~BB39Br7x(bN8;K>sMEv;$JR;?tu<Br+j%kcEc6H{4A
zU0of)5Q)oR-rs*o&VMp9s|W%bydWPKOUJO|zRW|dfhPrSfxa3jB6d~8$1h(h^Wt~Y
z=L!JJfcn=jUv}=^UBLZ-95;Hk%hY@YF-ySXfbQwdC{=OGiWaE!c!@;63;31zQoKwC
z(5dw&1P%27l@@e@EO)M$nPyvn4wPH<x8}{wz1aa84zM)Tyx29Uj-AFVb$<Q%^Xn%~
z+L(SxlCHrj(7X5)CXa<VXwFeL($)@QKwM^<rVS+jZH|r@MRS=`igB;ts1SPKfOb#{
zSn-e{Rq&ELUA(ah6Bu7*)eBaOOl-7{&T+Pes9esFixd2~+q6KAWR^F3DVj`=v5eo)
z_`?JCL+Gap&so%+zs$%gY#Cqzpdv*m&0A{VCa#30S5nN+WC7^~rSQYH6>r}Hyn~8k
zkz~G_@0##{7!92JnI3-Ol>!FHB})VrwG9V%KON=!_Y*_0lGXK?CK;xwt`1e>3jWDV
z99sVti)MXc{KKtZyA8~{i`Bm_bpCGtrE;dGnCe7)<c*u0Shs*g1P=u;_T1gQTQ}ir
zOuPk15DF0z=RidmRQ|||{X;!PuP|Q%eOe^>b$tBbXI+1Bbag&c#|0`VM6L#IX7Sd1
zR;&Ya*D&=}jBCP6iBkARSZ7I9_QC%qP=wy($3>(8CdB68PTxB#n~7TnITb;;$q4|;
zMq}{c_!B406)eonS8*%}-{MIGMaRYp+>6`-FTG>Ov^{!q3OD{Y<D^Or!6$d?y%FbE
z6dW%KXw{i5jR;;>Y-#`33~50b#tQ`@PH6SO*&q!*q*pIqEKEJzEI5d7d-m))LgD1?
z-JYX9U%YrAw^e51*8lk(KHRcIBjGi|8Vvty<Hn8jH-P8~1-ZxrO}b<2(Uvy`xR-8*
z2jfB^gIV7=;7nGFsx*4n^nN4}n{hl5?C6Y{GaHdra!Xg@*-%<K9JgjJ(;EkrAc+5G
z>FWK^MhHY3IUAnap}lF``2oo=XnKTbIZi#rfwKzu<BxG*I4q}zhoa=uZ0v~H?s>l&
zB6!jeFGx0O{bEE6HX30-i*8HE15}br<y~3w-Rs=^jNJv&Lnv&D3bOX;<;M_$Kw+R9
z@I)6+24^Zi-RXxm)PKA#XEUb|;VG1sA#fdn6C}HOWzHc)j6s}xdHdn6ks6Zdjfej;
z`*BSpv=U3bh@jlH8uEgJ1h=wsC97^LdYdJ26Nu;T@)tpPG&U^DmkVok&=bw~f(e5Q
zV@e5i9@#ZcZs3aBgM!NLw3WpD5%;h0U`acfWjyNWoqRY7_pZvy%dSnIJ{>jrz?qSS
zNa(vm)AQK!b5Kl>yQQRbXxsMojT^M>ZV3u1!V7Kn>XV8upp)QadGWw`)Sw_L6vAZX
zh4IZK`Yot-e!kLIQ@+4X9if6>?Bukp<(cGUJZ;OVwtiMqB55K9?1s-qaFvwgifEmO
zhLDKFiAC-k;7KnAHz@6pkR&t`3vU2ZC!VZwYx(bU`_DVpf8<0$9#H{&4KzxWFqm>*
zrGB|}OQ_a|U6U&%yz-gDLk|OKG7pe=GjQNyHac&)`wNsc>H{V~1hOOGHJI9=GGx9#
zeQD|Ynn_OmXP@d{K*s3}8bI7&0fzsD9VHo1&6h72CFSHi?<fCNwex@p*>=1D0YDOW
zuo1Rwp*RS+czthJn6lt9yC5XPK6)NLm27x`$eRaLB<8-JnpB$dmw&-SY>7w;d-m=v
zL?5|=cRYZbG<*J=XE*UPj<>mcd7&@{EaYHw_Gz-AuTZ*~)DiIxFW8?m{J~U~OYilu
zl;T~6!z9t!7XJ!OGF{a5_4Pr-$vUYRa@(qO>!u!N+SKeGC{-1)gK$}f-msAAZCIv(
zT?;sfk$5>dkZql$^y&$X4Vw^_g-lgQ-(;USX4FbyhWe6d)BkR$%(pLze<ZsiUcgbA
z?tF^v5;*($^MCK#Coi=5VjX}%Eg6X?6MQ%UhcCwOBLL?XlMOZ59HVs=1IGAea1mng
zn0Q!W+x`nxj$RsJ6=>Nod3g$%X<-qvIC#kX`TuAE3Sr49;^`nDNeB^l0$A%FvI|Z$
zFKR}i?;2J}lrwGACDhN5m&u`^EQEe`<fe8#Kz#L5XZ<SCX!bw*|1uq3Cn(JXF$Az0
zF-}|iSO=rpOe!G8>7aAF<|%$x)ka#7J~1tbc|mj*QGRpacI==t>!RYJbLVPVP9%bR
zN3S_TF<=>wqn6MWNe>3Iq9p^G9$>T`;xmE-S631jh#T%StlomAYoMth#dSgt@pES)
z6q*cR%9~BcuXr|hH;gGSvLIknLI(ohr}XX0E>J!4;#`pmFh<kgGQQ{VrNKM`vBqVx
z0UiXW=DdIFKtTlYpssZTz9*E;Qdbh~3Lj3w-D_zbW#d8g*wov?1Snr^E-{$_qr>1F
zl3;{SzjI{EJINCzC+3n?ynOlK#Dei?ekl~o-Oq3r(TBZg(IU1KjayA=VqZh_;sE<F
za9qA%4;tY5Fh6#7jdPR2n2Xo3<MUUqDD=n6EW0BasM|A8;;+KcF{>~dbTk_L$ou(7
zy!V&tYPt*?Pj4buHq%4P5LL9EcHm_n;AHgwB2tF8q)__2y!@JI@V~%UIaV0eUOyYk
z#Q6I=J=F;&CjPG`Iu8&kdg(#BhwBUW@y_7VXB8Kc4zQ|a-g|@x5I_Y~pSV75Mw3@B
zXRsHnBN=Gu#Us1I!$~dmsPe(h$t!t&p890Y{rFnkW29JE6g^1axoE`-A#vn6Bd)+l
zu;R_C0z-aS61SS9h+7sJHe(oT(6F3OOLML(GV=P=f5eD~Nb4wf#oH(-;0zQtQd?3o
z&%$?G1-*2A=%fNq<U<RiTz)An#bIY^sSz<m`kHOmPMHy*-tpDnSODuE_T*)0w8YaI
z!M91QOKr%9KVVOK@MXD_PxT?5BitIM8bF6Mup{*nc8zw&Afe`m4QL^Wwu54SPFVO}
zx_c(YkygM`j8>%w6-7(({pMZ8S3gN=4n@j!jW??R4%Z~*lE@Mr_}cpo+?|`W5xYhX
z65l;q3tp+@`SXt5yYC}o3=XE_nf{SB%fd}%jK|>DEg(PvSfIz{GHKnWO<B9P5>K53
zSBo;b=VJ_{VWy%n?#7KN<$IPubR!NwdUcQ;z|?@_8*L|IU1yad)S>a4V_%@<L9_jV
zfl$UCA2@JA>q3X=7>^{*g?!>OPSHs8oqVpbMa*`K!AlrZDfjYWSnW2uV<3xoY?+Y`
zC`u2Sd*W`H?m3j)n{)f;z;k??Ok=Z|GrxZQ>dIJ|)2G)EZN$Y3Oe)$VVEw6e6*_eA
zSiAP<(UdIl^(FZ?0Vb%BU^`eO(g-_7p|cRLGCawr_=V&-Bj~Zv2;;jEw04giB8eW}
zI^En?sr5tvAc7cObyPTm0u+$+BJIes9WiWJCn+}1np6ib5})UH0>^!m1KoU>;=uZ0
zgn(1H^T^*Z_ghWN${kM&iyU?cz(^m)NP=hh8TgR3=Bn{Gx-a$U%ozU!>g}irgSwsf
z6Td|;<V?WxNjC%84ID5aaN|ZW4J0Lv6jjJZhhGv!X{4LHXJ`n81LlR0((m3aUslKt
z;lgWz(fQHb?}x-v_!s!o;oa0v#wYAa3mHjrZ0w--P-zzAP2{VXDsZj!QBqp_<=q^w
z2fs*~=cfb*V)`hwDvF%f40>R)x{{c>vsl&|`GSJGsN$$izS~g>mV}3m?ZXI;CCL|{
z(l_|9XWnm*;W(h63doa7aRwMk0D}Y+pl4KjzqiLyaQQuS)22(r_X#BLR-=rl5C6>j
z1WdOM66-~VwkygQsPcJt_VW-i-N}_71!3w)90X^gWvaHemK)YY1=+cWyL-@%9SE86
zf(otFrvB&HXw&0_Un)LgrLSJmw8Dywgb#jVZE5L-)X>kb5s`$*hA3FMWpY<ovUzq^
z%)N2t;X|7zaiPU2(#>RStyH<tuuhmaxQ$IL`*N_7kh#1AaHRwxsiD4~0XVLq5drxI
znG7(e&%Mt9Te9;4R;T<+HX()U?`YkZEgQY~cDM4gcW1(PSf~T{EF|jPtJFvNp{`_U
zoqUaFA2S}Fz5!s}56vZY(FiomXpG(53;=y*Z4*Y9ALqf42a8yISY5_lr|YpNQj?Kd
zoF%CORjH}Hjl#jiL_@?E6#RG4Wq^iDH}hs^0Sd|6d3q;Lobdc8#FSm7A{xJX)$m|H
z&9^QyZov}j-<w;vmhyA#dZEwfb0Tn1d_k6jG{J#82ECxD>iD8ddQ`r8{rb1xenYm1
zx~7g~5X6ueWX{Z3Q<=B(5jx4F2dln*6?r+JP)LXcTNv~L_+@mH%$s8c%9XLzFPlg3
zu7$R1)PZT8?RkhLgi>x?K6fSX434)rqsmqz9O0@1FQ!?QdzQ=3YmJ~_Bnd&(@|N*J
ziq9uuC!%^Q_I|5<=Vyk*U+Od1hkTc*q~L(Vj$lNiIK7K%2km2x$Le)xbO~)i5rCm8
z$606wc8YNi505V9iWc71t-ev6QHaA|AVci`UJOeBAZ5_6WnD0y?1Y1`4L{bBprOBD
z@t3i8EVY-*7M&-PrW7Fq8Zh?uy?a7@Oz_3W2((h8Y_Q{93TIgm_oPkEtWcv(PR3F6
zAS+6hx^^YTZ|^z!l<BTks~a19QPrW?D;JLqL<m`72LALWvz}rMA$DD^$AKGn@6uk%
zYc3TZ1<KpJ_|JK#s8I-&!DlgefLlqR4`}e&_U+o~>*;-`l|JDOhR^905Cd=~l;H#N
zi!_v4lNuqHXvlJ?kPSS?FZ}xHQ<_`Z*1YB(z&ga=Jx|co!X(MKX({X}rSQnjooAeg
zfCP+)s3kMhl4ml-izo_k+vSC=4r|$zyOI@!q!qs^ErnO()va4Mx_Avok8an;hyh_o
z++th$7xcN{s@h??_I#(J0n<UWnI@&cK=rMrb57J15p(fwJ_kL7JBFGlkZ#fsLOLhB
z1i^7zsbbo$UgH%uB|nW!BaU6fe&Qf;(L#l3<sXE6<erT=b!s{l0fZC46;#JZsx)A5
zO*jki3l7ho$Pr8K_N?(^AI?xo2f9iNr4i1?Mni778>oo1&4pB9fR1K>x`sv_!Wbmg
zHNdhYfUQ(v1J-?fF%jSn>evAkDr-~5=evfMTfXrH8kXG!@z;-a!oVQnwYF~>dbip?
zm*Ct==kH?BbN&~33lIqthU~nh3l=;FEvDK{G*`!A_%tX2pHVnA>>p*Ja1tsnHmw&I
zs@uBCLN8=}<zK%S%=6>cSEas#F6R=G(g?e`Yh$g%f22*SU9r(N`AzG&h8AW39=qAI
z@nB?{kEyOVRVob)+;e16%5e5#syK&oFv%xDld_7~05HAQ!$XY4N!Fx_#1NeHi3K^$
z^nkBlXU>@MYJ3+-oPpIdDRRy+T-IHsjN*dmvwC<6e(zy!t_W+?1#{-O*bKsjlME-B
z0u7x59rW)56gfuNwOIl=F&KDY{!w_=X?lJv7eI}YbotY>cQD|ja%!r<vGM1xaMuRH
zv3zcp1xwq&^I#)iJa}*ccq4f6AmtcBcr`bf?uB>QbSOFf4lh`!EDSILPMkU8Ti*7b
z`@2U&i8E|NCP+n%UbC2!EZqzh`<;tXbY<{9gx}MQOzV1d{=$W7)_Y=QGV7Hig4h{o
zM>z^$!&h>1E$7Y4ph^|p1dN_JGTD_V2I)8;fDZOcJem~@^N5WtU@HJ<PjOEsL157~
zI14_C1$#tAhDC-7zFO}=UAcxCC!-o5Y#~U;Cpy))Y$i!+$AmMgXBU_AOYZen4ItWn
z`64`P&@J6Mvm>whNFALS&R&}OmwFAEZ!*yl9*X<+I=fjYR8g?;|EM1e3+<PsbCF}W
zb&j-0D*cnrI8pGAmLFn$d#Y1-R^S3bH~KX;0O%p`nRa?4(5<?99On@8CiKCaZt86j
zTnl#XC~Q`>nC>p2+(w_cf#gVa0vf;pU(!;NlPTb6^K|4y8-7TiHRqPi-yGnIH}xO-
z++=;DxLO}{n|;t6KTn|%5D{!r>(;G>urGjCl@IGbT(I>%02%$`wQ-}^BtUK$tTQiH
zBl6Dm>(}%0lB;jfp5<3pK1fx#rI)4kSqmmGW5y1}Lx81W=iLZp=)z7AQn)VKr~<hR
z40_9+;SKqEw+8|LCl#&ZM+6i?o|b**NC<F6&z@S=cHtu+z3zOWo()ulUZnQ&b+#kj
z;=l($6Luur$vi~c#*-oFF7xxXH8gVVw?-JB;ETjnKWI>)_|X1w0!F3mhh`<RY1y_{
z$Ph5OoOfKAPx6HvAiOtPa?OI)EUs@w+6n|?+@Fta)wQ&c5iKeYVZBB`&)E;fQh(0u
z*>PUjH9A0gx*el)WHkZc&iJuoF94A3aS0@t&;5LTUM}FNn{WL$GT7-!9$Rn{V^`f$
z{_CyjWp|l{UQ!~EJbKMZ3Fo0xmp^vnFqa)WuRY!QPpFJS5+)dv_*TdBf*n5(5blp9
z%9gWdlLnsWc0*!cel2<zNgE9hZbFjw<EYqe5_MfT8d5OIR3J_Gq3hr<nCUV}(7_c>
zv}s5N5~CUf5@0P$mMp1T(#%8#&}0q!UH({Y3twUbeH#?#Vc+eMS<o*ey02Lz+Od9Q
zJESKpt8CFXAlRVD$WS;9ls`va!u*XuMNp@EI<oF?ovUP4&y2oT`cZ7h1m#YgQg(&{
zlHx9-`Z?b%#8C^U!VA@E_pFI*KsbI+Tox>#lKPgkZAyGhT}eW;xjG$y^M;I*S@vT3
zgThCupN9$7Z{shZV{=AnhX^Up_*`<VPC9pq^Q7Tl=@QFo-OzHLF^jDEV-;Zp*3>uE
zW-qW6DTsatZozMVnte>&zzp<Q6}|AUGt@{&!H;E=$q%w?I?ku3a8~RK?)q@=Dd9z^
zpUrI58QHU^?4H9iq2aZ#h}gt72)kHou?~aD{UoC0A>x8glzt@FlXH6^^p<>TczWG^
z!1J`+xP1iz=rxt?8es$q%ENy3*v~uLj3>+zaqm-iPno_d1%95E#?*`71csL$z*9s2
z3hAbUx~cjcMO8|Qm=LZ1p$X_T9|t?Y3jy!r$B#K@6oG{BT?Prryhq9Z(>kU_04=R{
za|6mMd!PENM18thrNpx{dl$!0C*-7I7u|TAOl_WPZhnGf#B`nyW!csxV^V6Z4@1h_
z>x~<7ec?@3VCD*A!$YYZ{f~IQNuB;3E&Fj`QE$5zW9Mccy=jxSWxhTSAu@RZ;n{X$
z*I*4BdOp!&CM^5`-j;Fh!E?|=Kza3s=W03Ijn{{ymssX4Up2EeL-;Jqk0~A^-3I|o
z^ejI%32qtt$QUoto=S|ge)xhVG0Dd-km09dxH_6JF5=-b!5FguNbG5riAzZt*@V&f
zC)@{rp>p9l5aQ8La1>ltRwY`-BjDqw9}$E}T`<cWIB=ki#R4Yf6u<G^rt!L@BpC1A
z08YFx7VcnNe*%;bl#tGBZ+sSHOPrU5#rL#N(Dc;_lny1Dldy=MSi;e<`EK2RJkSo)
zztB?utM`VL?%SjXkyOA!9JKP8v)ZXX-%3wF^!<3`rcrTRc4hBBsW<#>I(P2*wL@J7
z&~EX3+o1GXRwtuY?OTCaN1509_2C;H!DfFtJ_L|9*W+YkY<789cER;oL$hyXXII24
zBN(tcu1v6uw7Ud;o<0<kdRE$*FJ?Z1Z#lU(ux?voe$*|z0pt5z#UsvM6QS;#cxA^I
z9@$O%^N#{>PR1Ky|LV&2CI_PU1dgI%l+@L>*eME?<S_aL_>xn_3BoY!3CXVQDrBFM
z=n5qzC0dhd(=-B1ICpNg9)`VdUc6wij3*g%#5Wr_83NatMVJHQ*ltR~u1R0EK-Yi&
zU8uV*kfb(oUi@yrIf1BdY|E<8$S(tsGy=8nisk*EFZxJ4t<42=gyk%76qdMTjUW&R
zBir^Ib^qqgsd@*uPu!{-Nef^teSu@B<fxyNr`O-krkK$!P=#F!`+MB@S@FW~cga_J
zN94qQcJY|QM8{L7!iq>yvOS~T0*Dhs-o{@-hpwsA`iz}u-D^><Q2}Ws-As3nRZblR
zscvM|i)|D197Eb`D%ZEWxB*sJ^W2}5p)G7zxw~tBX!j;+;gTh`m-@sIua%9J$;g%M
z5B`~dNUAV-^7XDGM>X1#-|-*pZ95&;3{qL|0HWwjRt(e(7$yb*0*P{f3fQsXbIJLP
z8T&#M%{5aZeZvzM<Fl$sLM!5Y^k>N6F5~IQuG7Uu_(T{q_aAzSrzMkN#=)zrqT(tx
zGz>7+L~X5S`Ht&|J9zxOs2|AK%r(NO!pNqMRd*AtC~<-;Y2@-p^J#xAfBUP6`wO!#
z!Iuc=vW;KJP;)b0begXD;Pr^)g4;eH!<SQqbEkJxy^2~~LvvNT7_U~lLP9>iD;Dui
zrXiabzcgWwnMcw6?ZRu#^iP`s)RysQYvOkfcUBl|O|DSZ8ZWS3@sh8pUAHXWAAVZm
zZ<>SP4#2ksx@cev{7qno02c@bpLE>OI>NU%Z_#3E(h@+IkUe`SmM6>eXy<6|S&Aku
zdNJ3G$tItT!r+c*t|!}Plmra1SyRRx{R%%f8P?o*e6r|ROhe`9D;o-sx_wVdYuAF~
z2mXgyO9*|<d@e#Ss1@y+Y&aru*eNZ^jX>~?%twzrNuQ;gIiZL%E%a8>WL5Oai01gm
zhp~T_1(ADYc>a)sABflBfz0RZbBQ1+pL%R2UD;7ZiQTn)`z_=Da;d9c#U?m1!Vgj6
zYnw5)0jzuSN6=8eh|?I2x<9U;^)!#_wvDQov69_x-fD^>%M~u@5K6Oo_BFKAv{B`z
zi*dXm<7${%ai_kMM~{NIT33d98o;M~{QQ~e(qVSRUAlIq-xavyJ>ka5UX{LTUd6=?
zd`Y>1Kd1nWhawIvSSjK_J;@?Rnsr7S=Ii4lj9OM>>WR((*oo4Ke~|E|Q*#jvOq}B7
zb*l?v6S%h~@W9kheRpJJZzZKX0uj_F@jEUquB(-QXckX>c$Of|&=OOA`D~e?BQYNj
zspCZ@c2bB3HX}GR(AAx(!Yqo1<hT6-YH13I)XprzqeYHk;#j5YSP1G>T<mzvMl@LE
z?k-TI2m?pzEPcn`=Gu{YAcC#M`K7azb3tl!=Nxx;wc0Ymh~=QsLHsIf@5=;-sXYiB
zN2{~+W41jfL;#rRzEtI-SqYp;n)XZL4PXP=hYdA6jf%*!c|pC$+9`Y_3oPHIuHCn<
z$&+C3iMO1<J+?OPO8BGE+RN`RqV9t+St8yjc9ED5q@7?Cj4Qo+)z<q@jl49L*?8K5
z_ci}~fbOYh&oaQ(Sq~0u#D`=^ENzFhYW_8-3=<veROGaoB&qqobOoQzK5gwXFfdZI
zh{mV*KN+g2(dm8+B!yeQP5bsb>q909u1nU3$Vlo4MiOc4cd^rkmXf%2RF%ES1He~1
zOL_94$J^Au*rpqxh{6I{<Ohxx`q6nP2Ix3z)p_6)Su!`*O1u#>j_aPu@C606{vpxN
z1n>g38xXK+_3An3Y{|VSxuKf)=wyYX<+R4hldH?eL)io<B9kX+@C4`@H|`QgmHa<E
zIJmF2^%kP@Vw~Xv_8R+bkV-F;U1)mvPNfQy1<@#BlU1iXJ^S#nV`8LhcQM&)?Yl?R
z>{J#c8V*#4Cr(t+;6uq}Ir_Y1e0_o2I+_^B%5am<88+e)M?f???Gf2l+({e*y7%k}
zSsjl0h6n>3GFOGcb<FlQqyq>f8IPztw{Mr;X(iv^Ef5)t?1I@gWX;rK>)hS-hYk%{
zho_>L<3-s{5_#t2$yRb!#5ytG$=-`leXl*!Sm-?(%`l%_de-7}SoIO)MJ`2YXB1zc
zV7gxz)_`7(miLGG`C>#kf43BgI767?XAO$~U6AnHLbpzd=52VrM6LBf_gO1K83;Gc
z6buRfS;12>VbdYq3`JSXh)A!BV46th&Y9Cnl^b^fo-Vv6r81^`PB3c`;ih3SZhjnV
zI!YHUUW}{5GvG02BaI)qnoMADKuj2*h^XM$RVU3#zlPU-0g617@UNY_`uhHWY)W47
z)JA6Mi-Lk9#kr%T#*v%Mk8%roK=9=0#o3RVhc~VH&41$w4qO;9d)3#jUM3pf^7HkP
z)*|JFp<)p~y^@EN=-eiPy&t40S|3%_1tdcJ=lpj9D{mrVgy}Ids?MEf&7NKRXTA_;
z@gQDTVq1F?o*y;>p_T1T<6k9K0}@<Sd-UohMoG3&1;9vN^;~+V7QTtnPuT0CQU<D_
z7RT_H=)|-f6fX`=%X1Py<I2jct#e$q<0!n}&|@VsBhE6$fe#-%p!cO5F+GE$00$45
zt4Br%z?vo@bRO_V4h}_Z@bjoFV`2mlsqw-WA0XVe!KUIUyz-UrPsw=0!U04^Z;d>Z
z&NboApl#IvGPS}v6Sy=Ha9so^0izLviziQxrS#@-^IVDuMd&Wd#O#g(7tXcPX^JMh
z|9><g@5rUzaA0&N&Dz-QSjLyS{4Tb&e?cS(44fzim*6AZzskUfiMG1aElf#cX3ySB
zaY>2=D$Jx@(dOw;ni}M4_7r1Kt{~?p$96U=X`$rwp&=+@F<Z*A=s6fD^g@4+o+iE7
z2Ix_~0e-TQhIHztzEn^R;D$!l&6C5cw1tsBY9$2=E30Z@pvF$h4Ua^g!V=cu26VvK
z&oM%bBIDMAQ=IW|ud6x|m3Z9-a!E+a(jAmmeNV&vV?(omMS)_LU1xv<qGFbPP|;@j
z;q@O`2qyUgP13@J!X7^D^Fc&ONE1R$We|&VT`vr1$F=7HdD5766e@({O`n*LwzjwF
zFLC8@7kvXE$EO%I)_<nhJ3_iLXvTievoBrsaYk~<Ig|xEhn=L}f)BIG{?P(}A9-CN
zv>z5jqo5n6(3>!p2Oy?k-q}BTchXUjP4Ea*Cs6O8J_Oj}Gw82a8K_7ainxL%?5dJW
z>5Bw!MWZ3x0Q;Y<<S8)vtjDbN$BrHahow#O?W<Qdb$*4quim`r*1I?C*vstpZ6sk^
zTw9^Cpf4KEwT=~ia>{8NTZ%``=S2c@ofY!HD$jWjo30lz8H|RooI8beAe{%spWaK9
zuBE$R(&=|fUN(ys26U8ne*1%c1b;}KD!-g|g*U+zV{ED8LrtA17X$^2)|?9<*TWF)
z55txzX$P;TFjPZ&K~lvZt%JKtG}<9_aP*=f4|EXZkq;r*|Ni7T#`RTyDT;BNKK*}{
zWS(B@?k;+)kOSjP<Uz{Jgc#i)4MOT_zJjB#q^LOG$%#WE;5fjRDKBY}v<D|7$cu#V
z{reS^vaU14dP*FXNs19-IMUfpkYspQG9I)PmYmq|x!Pz8S$vYQGL5G#{3~Mcl5ks+
zX=aD~OK8s*LI?@jt)aY6%62C*I*QmpMOOI~eeL3}P4d$yc0pZe#i!gd%^2xON{NXb
zH`gfUIx4m9-dRq&R2c}{{GB`dFv|;k`qnYwJud`otYt#se2Mw$|4~9lw?>{pbnl0k
z4Pql9ui5U>)J1UR%I{#NAZfz}%Ht>X+2X}EB>CYw^n!vXi5>x=N)!O!&~6f|p8qOL
z{0klc?*Srhpnsjf64pC~1LE#}fayB)Y}+$khnHt(I2x-}fwU%ZJvQ<wN&HKno|ZVb
z|8L@WQW*K7a1h$GsSmB9)#vu@dyKOWbqgZHvTQlInAt=2B0pspI64+u!kLYj2X)je
z`hO-?r?=;}AbsXTVa4NUWfjvMs`5iYQWQNsfRY3Wr)hE<=FA>tkR_1F$B*WN>^~((
zp?wv$BipXC#8KtHFKIissqqTX4}||8nb6MedPg`PZcbvW(+8~FtCvG+8|n=Fa@TMY
z;QEH@=_M9sHH)_YPXd>|>!&0Q*cqYM!D`_WA-W%_s;Td?MSBk~%4nOcQv#GWGvoX`
zAP=3ZOr<sFKlzep7J48LDjjySEy~hyv~-hc3hImm7j4N;xtT;LC<M?yaoZ9nCo~W1
zu|8(v&0yDGB*FiP<x2Ezi#<F^TkOCwkB>(Hy_YKY_;}HH>2`+>BNQJCQ`;j?cK33<
zcr8`K@UHda)yK0%!xkvYIS48%??jo5;W*cU*E--Zjq{trYBH$UR~a{7Z+$CoVH1dU
z=HI`QLTiL2R6Nwx(P>~e5Hr|6OUMnx-GmT^3><fb__RSS%*P`|kBEqP7(eSeK>6g!
zkp{DW?wL%@=$M#Nig&~`y+<tHOZ!Ob4pB3wzB0s^v1Q9WZ-ZH6h^ElD!Amr+I12eH
zxLuh+DL2G>^7Pd9G}Vdbx>2@H*Hbmlb2oq@AvzPtJdc*#f(B1sjl&z;8QYdc+~bGB
zN7;%8RP~Aut%g_!;vOdl?1(gm&~@B8anHqzLXJ#36@(}>Ai7~87RD|iDTyn-7nXh0
zLZBTYPyB;apBak<Jh<=G<!hRxyA}22k~|U2DN#@d5p;z-hHN|WeSG{+F{QrHl}w%b
zn19Q9^WXah2%liDMCyX10d!b#jomsRj}wwHQG^*5Dr)Wo=qpF>$FSt^bfDTlH~?W?
zyJRM6-~k|4_x7T?zo2*-MF2aClsMmfqgjodf0D#)LH0^A*RX^X2P|EE>;_`)r!OY~
zKkZXrWn9&cc?LDWB_8@U&TSc}te0P~m|^vGPSXPa&z_;lFLCRdFtBn8AF`3~kpj4Y
z4CN9i)2St45IqB;1&_RR?D4~gAJAIE3;vpHG_`VEhiC)ku3brfO2og`amc=LLtRH_
z&ou-L!smh-u#Vh#KVY4gv4uw0K)Ykd)AB8381ug`D}x0>)rTvJJP*Lc=Vf_0P?nwQ
z)qm52`Xu2bM=&Mjx^e5)gSPCzSZF>Wwg!ZOlUcMKnYsBo4ii;Aa)ihcE9^vdEm8~U
z*%+v7MBz(73hr-T>f+*2o|}c=9Y>R=%C6yb_DyQy5OHtOMXJja;sQIJNcIEM6@=fl
z^%&i-2Jvo^=$7J1qcflymWhTDF&$*+!U^!$$OKCnRwXglL9{Y+$`nPZa0Nth%C8dB
z6sf_>m|@H-X*&-dc!7b2v-B1&SRh2OJB{k%DEfxA{MCHTo42CwqO`gTTmzvkT$+So
z`j0wy)vHwIe07kdblk_lhpj6=e~L%D&nvDYMbT(X{78|_WGUQ_fOe13Ig+bVNe{lj
zZ%^t;c*0%_63Np0oa-|qq$yysx!SYkZ*dP{FdB@9%+s1)E~NL^<HI74LPUkad5kTU
zImZQJ4jYz-Kv2tu7#KXk1(6XZVc#R$_OX|9Ws+|YsHk6t;YsBEM&9#Vlht(|Jl2Hb
z`TQQ;=}5Vdo^zCVY`MQ(AvY5DxRBW$Xpq0fy*l<D@CtMvXbK(k+}6-_R0<>&<Efxi
zA2f?Ar$?ASUg26JMPP|$D}00>;40)17=zNEifqc!Na|uqA5rDhe*ezcyx9sl^!@Qi
z3P*j{V*5&5s3c(~e5#u_2M-(OMluR!pzx7Ak_6xjEP@-u#zpJW8)^jt0aaUy&}?nz
zzf!Dq8@jN$dx|7mu-r5Q5==O<x!PTD*gy-IP~KwJxPO`vMu#Dd#nJN#+P_r@y6{~o
z4dT8xFGp>Oitsd7LOd0MbTiT>Jo$jfZXOUR5@{ZiI;sdKC#P1bv^k{{JEEievLvFD
z$Qp^g52XqD90HV1QhVYDdU%daTy6Hhzr6hITV1>48+b?1F870D1>sgoZ>q%H8i6Y-
zxRmj)g2hKRc8Pnov}tsveike}bEwU&tojGM!Rrrb0X$Dtw6pyMBtcbO&A2%Qi`lbj
zp%qS<t&dOr8Looo5Dy{Vhr+F-{>AsO8jauu<Shq@s{+O~7~`QR)8M^$1<!*)q-px7
zWqg1li`Y~e94tI2NjE~hAZ$cIG89A?Cntm}Z+1wFe`4_`J(`CNiM1yB2-h3#`8B+J
z|6Y5n{TXL#Sz#rnJ!Fjz6l4!VFHwfRN2!<h@Zp}3o%{=SQZqDuGj4p%7^L03TQ+a*
z!(FX=;lJDiVa^?T=P4X>*O7lyUzXmH#}~IbtTTdju^D_Qe<CiAlEk&<8-K4zOp1BW
zA>O!YQ$`^IfTlZu&v2uJ7N`i!aTFIKkmicoEzd$FH|CSAK;a^gRCxaS^+?hCO|R?^
z40dwhz{OJ0*^o?98a{hM?5SBzM~yd(o<^(}vo-`5c+mVt(}1sprvQ`SV%GNdS^-Qy
z5TgxfLoS(4zvOnZ>!V2W<KmW*je~OF$fsM-y87P^0ZH(5a2t8VkW{tQ(9vPodnvbZ
zfFj|Ps_tSTQ^DupiTR8MK-H^Ah0s3E0meLj<)5b^mRr265+N7|4I0!&l`d)Oq~zqd
zMZcOK<LKh`^_Z$l^(v4CZU1)1uY5C<@ARgB$p4!AlG9WX{#vW@^W4}1&=KEZk+X9e
zBt3CJ^(xdj_OIC+wEoQ$oBq9j1&^PbwX<pf!=Ib3gpxa=ynq;Ot%?c;4!QJHpD8~#
z>Oa3VI)bbZL(9nuIUq&U7Por+mihNxhEXABPzVVQt`c12NA>@5=B9s{Pp=tvSo5B=
zS5Sx}|48?ekbG4kzYRZY+*rTnjGe=t!~bNU)}sBHYo<~@Emge1w-t|14baw}u8_l~
zA<FYxB>{>Hsba`Iq?-{!5Rb#k6o39&6J-P6y5yv!4}ifmSN9i%p#0^3n?;Uk2QPtC
z-<46&R4V*o+1$?sdE5RG`6t{b$_FTlw&A;X_x!u+CNn6&(dJ1vlLMmwo=n4GRbx5>
zctAL?H!C{-l^ES<5cN}1Z*EN#Q^cYzAkN|Z2oFX1kswTtEF>tZuh`B2z2g3oD)}6O
z9-vWm1vh^l1pfqTDob<oz5(6Fc%A^mD8vY+>2=D2IW%Wibe58GqDiqApJ6yQ?_4~p
zSGn_X9oaT$*3WK*gsp3T8>)I0M~}X)qPbMO{L?|Y=>k;^z<nzPHktP>S;oCxiCSLq
z1z|*#{~s6qvyx%v@m8qam87uvbP8q*KhKh|YObBse&YR1${x{>ojfk1r9^)ZO2A}M
z{dT>V7hg9-{AZfih2*XA798o-y$y{oGmc!DGWp>CPHwZammRob+5h#TTMG^Zx@|vk
zs^i<_PA+q9RT`MR4c=cCY?(7~Pk*I}Yj+Jiu-G{ve(?ss!l-9IE<C$)yC`qk^p8IE
z?svu(*nf4YTzveTcTo}oqbS8;^D`yd+FIkpFSZ&_b1@eb<3w^eLmbU`!=B<lNF>Ue
zw{AV80_p7B4voEz_n(JiwY;igb({XHBu<%Su{unmTp=zWN%SR{hAmsSYS1UAsX5C-
ztLf(wKGuI*Bi#)6%LB>p#qhi32$A$gjJSL278<+XKg3&%Z1yt_Wfr_*lUN;wB>efF
zstEe~srJ{@+;;DEqT$c|=~Px)>Qi4EqPP&{PbFE$BXcJwo);C}>@?MXTQZ%Qp<<8^
zKV<IOUwoE!9e?bM+rGVR&7`I{8l6shW)TGUmpGE=zOH&WRy6DPti1702=lia*L4}K
zd|SNxi4VAR^Bcbj2fs9B&_2z`pyMVHrSp3LFd2GB92%svFS(+Bw>Sr(R01SOJ1k!T
z|BbORF%ybZGsBObI1#LP2+RakkoedViAryFWP+-qLY{aNUjCmbdE++nGvas7pFDFG
zT^CwK!M@)Is1F`|De899N=0An*g7ykk>sXbyLT7*&4q5^3~TmEK6}=PnYVT?zNxkN
z?YH$_Ue*Q(kv;*A$M@^Gv}yyFJD8odfhT!2_-EXJNP|^CX4L9d258E7CG7E3uhG+x
zgwa=WSS8OmQU0+cD#H^Te^qT>qY|2e!UQ3*l>uVIr7KoUH^xN{agVu7B*PrSGrjwm
z{2E)*E9b_=6`ABC^bjAZl;6ve60>Nwex+i`wyfu?@_PLem&v`)@=sQngJ(ROA??by
zVIRNMvj-UEy;E@*KsYGZ;D#rxHOvi+6P-&?x>etNLZ-wdCDHt@hf)Wgfkf3U)jsSx
zV3-eO2&!t}*FQ|mEi4Z5V|d8H)H2Qh%f=IXc6|s-wUL->YwUN-)1TE*N<QQc7>`pN
zIrXVi!^mzq5IYbrk;C!E`n_$`m1g`sLR(L7x3?#%IR0ybdTHbMMnhArE{W?FH9fhs
z$&Cdqw_TH%-y3{x;9u{aw!BRgguO1ZXj(#vkL>EglSEr?4-$DC;2v_|Dt6qbgP#J$
zT=Z%pmfv-u5pw~G=ZFIb#-iNg<}2v?N@d&T&C%j4x_dd6p{L{3Svo;2M|jxw+F=j7
zQ(RmOJ(2H=lh7xUbH4Ayg7yc?UW-=-=|0#LdLhu7{Qx>!UkreE1KMJvh3$A<CSJJU
z!X(|K{hY<1Cb7z&I#~arjmGn`vWeESFtm2_@US&&>%N@5gH3MC6E@B)*CEMu-SfLn
z3Vg6FZLx-Qvq|4tu3DNJZjHehoEILO;%mYbp!gf9-=m&8Yi&i-8IqVdpwuEs_^zOe
z6p^V~dmhJfL!lh7g8KSKCGFm|&v12WwVIC~$;XOM{=TVtgKy?9kDKH&EUbRVE?p+A
z$O#J$#*S${h3*q0(F#TbLMv6#Cv^5K5!-e}RLCcsDKyUC0GYac`OwN4bv4kCqtH#F
z$ab+Q5VZN2-aDP?X<iJ)0L6jrVc3x)19x6WBxQsR11bhb$71vcJ$v*xzu-#CvTfVI
z*3wc_eMKa?{CpKI3~bOx@HKR1QLqJW+9XE3-Fr>q0{H)}wwCr)`<r@}q3QqZzG3V>
z<W*>)QLwa9B`#7EzI~dKL^l`)J(Q{3jW9l&*c$J#*26I?zt@<716vxp|4ao<#$SK=
zk$zlF4GjfV_6;2)wu@t<liHp7r&T=KhQ6Lp-|BTe_ieNb+O@Kv_u^{{iuQ+eV%_wa
zc=+~R%)BiWPh58NjqFg&Wjk_;R5~~z!vOI;xBB<G&A|VF<f5B>%oxt>Bn2#GP3hbL
z5{Q|0yVI7dPoMs6dSO7o^5XP;bK-KFJWa_t=-B3^tEu_<%a`_T+YZ;)w|*5fK~~T@
zx|tX_vT4(A`IwR|rLuf$)KYnduKNg91aoUNUgKrO@KQCve8^JsW{F~*n-~BF7sEMo
z_5p(+cn5EZtt|nb<ZjtLrN3#*rYOo~w63}n9xE;O7$!^{U-a{Okz|J7ynDBj)GOAb
zW#gwuj~_GkgP13TzO23bDu9-eOr@iF*p)P0*j@gmXZhycT>3-|49g2jh*gx+o40Ly
z>6S_nz%QK}v<YhJ$;sb;(>oY+Bq=Gwr9cP9sX_?nLJJ4|;4=a#SUtUVZPKgWAKBY@
zyv9C3UJ8n5@oKwR3Q5Amzz1dycdY_~iK}D$8ulw5lF2YYwlS#La~7o%7<y-E^ItAe
zFeT{MQa5nbowGkqU(~ME|5OBVA@T9$#3f|Z4`Eig##g7>yHCyw?)&d`7>z~qaDf3J
z?S@X_OQ*KKE1ZhOM3M;;?t2Bf_k}hYJ$j#FE=en^UGUf|qa|?#Nd-g@cpHziATRIe
zM{{&H;WM-jGpIB4Ziw=h30$PdK5kJ^CCk$->WzzN`byXV6_v)6FO1oi-R{1{G)Y-v
zwXJn*c-sDWI;M8^GMsb<-M^C%PE4j~&dLgOrm7xgU|@IY5<N_-X{M3hK!*a`8MZ};
zB*n|*J=9)j1}As!{IaAZx^lKkw?+R}MgI1;FQ@cuKl#EXp9Ki0lDYVZQ#YoyN!+6`
z@-VP*pQ0n01rMj$*mNmcJk^jk(iIPncF%99F5f+DNq>zCKTl<a+mY6g=IZLvPv}nx
zE4V{}cRZ_yqGI%o8)_j-LEIOfd_=Or9?fq4&wi7%i`bL)Gj-(_uhSeeX20{JCr=Xe
zgLQ*x*9P_v?OBQ{Wb0bJp+gTo#n;~EQ_S!!6As-to&9=6VyRs-lQVIvyyL&`)9Bwn
zz!~zF$8PJG!j?oJ_>w3U5d7xN(u3x4iHWz2K4FtsoZ*+mKEZ?`?efT`<&j8y0)u~M
zwd6Z{XKP)g)s;<+Dge9pVvaVa*I@K${6{7i9sJSZ<m{f1VLf}+!_$Mw!hCZ10Yc)a
zkaz7!{x5%&fZswecoXa4T@a@Mg+kj=(cA+b#&>n_D1=>i*+L4!+d%8F8N|Lq`}T+t
zSl3xt%o7E`6WBswVfg?i0?UZ-unxmI9epK*Ey<BW^FOIR@g4G}lH~HC6tnc%FY>Fn
zIDjQ!{S6$&xVR}ZXNrE*yr;Nno=eC*Rx#{A=)Qr8&{((4A!DP3aj1aXZm&e7hd;bC
zmy6>*y<KS+Y?4vx@cY1b#an^1@jd{=JYd>jk6qH=zIAJBq$xo`Kgd>*UGkARP@)}e
zN6*Jx*mepT%_dl`r{MG91J>5of~;(r0GF7H65+Sse#1^xe^S=#&nWCrcau9I{KqFI
z2Lv2J2a`tZf-DBl<d#g>NF~f?7@RL`p)p;aZNT9<kbRL;ME=Y)#`)zu4HwESDS^fe
zHp#iCYSZI28Nd2N@mTz&`285)P|FImY4;$QahGL3GLo<0Ys8iFiOm8l2QbN^2q#A}
zE3W$1{Sc9#r{a~jwCD0TNB5ioe1=!jFkESPAI#NkT-cMHx_9>izD7odz=!f57vhI^
z@2=!tY<)bx$}^pNhWzXUmn7Txm0POlRlr{U%o+E!Yhy+}wJ3z_fG532LA1!p3A_ku
z;}Qa$OSh<wdMGJX1689nq3MO)&5H%_<>qE1CHxpC>HOc-b&*aJkn#AnRgg_=A(Zhj
zhSit&SiArr;P<Z=6&-lo$jPEzXxHcsJRK19%rV25;p%<e`3bxezKnBT<mBarj^4oF
zLj<>Mgg5=vLe!a7J9KCc0w`!!zTB{RQ(Coe|K-!CgthmxvYe3A<Q`xv3LlQoucn7g
zxjH8DZ}IBUr;i`O6B>`}E)@_~YmkiQ8}n3iTDy9!865^^E?R29(l=)<teekmWv<TW
ztp{5n%~`&3C6jY*b3u@?I>*6Czv_!U0P{K&KTlb>@KCk>W0V~bxTS$A%F3+$S@v3;
z!cH!FPwo=xPBzYD(BcH24Az9_XiP8Zh7CraM@8Lx{rMX?Cxi%kwjv@(5Bpter?)PG
zr3vfq41+^TB-`);0|9p4uE*Mhng**A%fCCiQf2@>&Aa`<f-}MdLx>+SBhAN0sgDNI
z7T&;0&G0=J0FQjf&YhT5aaQ|@U$lJrG~>j}-%M{#v3g5o4~3Pqwu)+m8+&nYHvjYq
z6PkY}gLn-8t~5Fj`L`TS9`boUt-Lt3320|#WIC9=vra-#97P;6Q5}}Wk(QG+YB9<6
z%9Xzw25y~j2t=0i)PT|y->G1Z(Lbe&a9J>&3^qi2ecTsNEcAW*?l~`AYVIYQM$*hO
z-R=kai?RJcec9#miPqyWbhfp!GGFnk-LJ!jx^|diYP#^|MFtIGvFd_W0z8YaI`3sw
zf{d1<JlFjK9=p^1XZ2-+F#s*Yr-wzUjCSkqes542VljBk(8{yE-`C|gq*MbC<eCj%
zb!7fOT7WxvaDZNXs;WBYU8CVkcp6FKFl?9@P|jpN!Vy^%c1G-x*zZl^ThjGNgrHzk
z8maM)Y9T$DJ4yEZ%OCz;V=!0X)xSUgL`-$1;Uup5GK~eaDILtcgr0_&P2M*w+#5ST
z#_({Xe&nqY$gO7UYJJ?C_2L<1BIN_E|3<Uz>@*Y=@K#Y&QYwA>_B>wE2w6>aLp|cT
z5`O*V7cy!jt&@7uc;_d+_>y}Z-n&u8&)A9kLV%!|njq$Ufe74w`O6<BeHCeiqO|~!
z;{eYJ>x*3*Qc<&T6UzSL??lV<>|&!K{}t;hG4cr9f!rBw1(nR|QTy3lID@H=7_mq3
z#p~AvTsTCwM8j!9i$FuFQ=qep3wY;xC~vZJdMzQE#!xeIXIY<fuK}JSKr5&sZ{xgm
zXEZk<;_AYWL`g!#Dk2<Tf<^G4q$A8G3=12irM2C8%94#8rObLLt*ngPzkixS4ofT-
z;xB8OZpfaUGUWzqFj36{k4nBEPeNwf!+HRGJUfz7Dj=A6M8iBYI%0?q$QTIi96gdh
zF)Svr=W7BNT<5@mRipM32w*4IF%fz0-0vJPYB|&(q{L=Zr_KRFr`1W=3EO!ao_WqT
zTRZpF%vq~RSB143>(NRT#{|m7T{Di~xp4!4);R7gsp0K2MRm(5t*X;b^CvD67OJMY
z8qcfoQYmt$Q~-8dLN$k3(e6XMi3_YnKB0xj1f>fr#uE?(2xj?F_d2v|hmnK<x<z)d
z^M~pAQB$0EPEZ53_FcO+nIRw0ZxnL6UA93aK$&;$-CHMQ3U7Dy<wv~DjxE~qu!7Zo
zfMyu(lrFp{q_SXltO!+&F=G|`cUpl^9yI=p#cvl@k_TDjo3jFkpCSo)cVJ(yy}F_6
z&`;C3+^$`a(0@Iie1qe0?b?x*O?((!bTS=CJlaOI1RO=CxZGrgNr~6hRWFm?U}0%X
zg$%C+vquPVm1`P{5Z;S`#w_>>KGFm=3*0|RpW(m;pFaB63uXk_9-ZA$Noj&Y&i(s)
zDT?9mu@UOuWgAPoo=ugo_c9cRBh$60Pc&u@>avYujxG7BynHlbCpgYOOn_;AKvPp+
zD+EgE6M!i+%^$lI=`mQ$g*C6+t4=U8LqaqW-6uCT+~)>_E!=oKDbF#*fEOhEKuQO1
ztZ&<@72^r7P|IU=N4rec%a1~XK!MDUaK*sd(dwOTzfHLE5hvr%o;g#qYk(Q6kM57A
zu_~eoJ%f}~t(*VCtQg8+)USg(wY6%qDK-4xG}{+Caj3y2H#j*0{<G6dI_SBR(FkE3
z`CZ=tVV1~;=-GbL=szc@(TjpF!hirOe#eXr4vv@Xq+&o0y8^(2Baiay@yn1o-_IUB
zx}4}i#g9-lwGcr2v}FsMEfY>ruCSY7>*+J^B*hc5L(frtbz_j)nb%Ax%fE=w1FIWB
z$WXh<9*p-IQ_NE$dMc=rUXqIJ@lcaaK{$h~h3fH~J<SSBWyenO{0KH?7xc+$N%f|>
z-e9_D2J#1ur>-OlVW(VKISd|57Uf^0h?Fr*exLKLMBl$7X$`5X-nFZfR9sw~GR?3W
zde0aSp{=f7TV3ASeGz~G$<yxM71ZZeU&Od0-piqUPRJBGM~p3$J2HNo;a8V0{_W*H
z*0&B*m>_4gb?%Tg8a;i`(OX5XCF-D7nOnZz!lBcaQgN;C?9CnYdetZ{E9Spjat)E6
z8vE8Mb?mqrsiaZVOoea6PE5ejhDJxEd~8h2cX8)SVyR-jf-%C-eWvr?$Hepp?Iw2D
zbw)`jpuQ;`!uIbULl17nmC=l*=5n@4NMoSt$)!1Jy?0JlIC<)n0p-_cdyV}`q1yu(
zDQKMVT#g88=<d}(rku$w>}+PcPxP;FNp7>U{=R2N0-J|alT0jBr|lYe1_~}Ykacrf
zq|FK|_P4C!F7DvW4m`nQr+IYYI$HJ)j98&!owJBNnj5w<QCYe30VAE$U$n7-Tb(!4
z{^pYupgAl3JWAa-!{!RAq<zSo7nSedwM+U;^*%f;f4uPyCGUCi3yX}0<;E@@uf6_5
z@v`MsxwuFoQ4rvd>{BD*=g!3*kGksi>v!eJlZYp&5chvvSY?7$FHcPPe-oEX%?tv9
z2^I#1h7ZP4O*pm3NU<L5Ub>l_mUT{F!??Ct7H23Y`YOWrl9cUv{KuT>02F6H8@b{3
z;t_KiaHxKKQUPsk0Z!xfCQ@y^BBTW<PM>}ULDG|_Pi-q!aaqH;T8s{+36DL$r+0u?
z*flOBQf?AXQ<tTbn4B{7-+5vj<i4*hAJ_^~nK>M2f5Qw&{qJkmlQ4y*xOel7hrRUV
z6OgUty4Z*M`%Ann1eyXY{kzvZay*08F9NFVvY|>L9A-#2Z`^ndbQRU$qLwtv{O;YW
z_Zip0=y1Q$;R|54qi0BQ@YW-zE^GW#P`VH9+c)#+)AN@3kEjHuM;RS(W-<W*Ga)x|
zRL}MY7F~JyQDa3v$D5>aSqo{GEOI(hta16-v#9Ku0|LZ-Vp*ZBEGDX;QjO31zpBnW
zuEx9#<Beg&OjJ{rY%R8uEg}@Dm{hc#Qb}1_D0_vn4kyGQ%PBEYIGj@wl8DOKq9R&k
zZKOh3Q=*dI?^Bj}{V|`hbe8A&J@<0m*LB^~oa*pZ_j2|Ta;D=kTsQ7*`HTsH!`zuz
zbjIxtu~w|Z5uhUF3^hna!8Ef?QfZ%PZPyiet;qg1GrLYDGC*3aYKHwAF>1V|m$R7s
z2BN4X&MqfzyLx&aP~I6Gee+R3!cH7cZUR~X;|g>+i8#)K<R(>ppr{27Dl+&GA>ekM
zxP-)8CT6@~6;X{;(F+n1k&`|Jv)rSH4h;hxgNE`F7K>wW<@^NI_Mw5!sTjBkW)r*Y
zIR6&_#72K`?nyKQ9IU)Ncg76tKb<QwwvjS?C<{al;E>dmzGLI#>a6N4H8#`2!8MFp
zVL+x7)?<qv=H_nyGO^%K5-(;F&H|tYbt~U+G7Jh?<zuC7?T>a=@8IO(Vwoacv&QM*
zp+n{=EVcBiRoQYCM_k<j1N<MJc@1Xb-@VJ%`zIyA(No4e5P<VH+a=H0wIL2?pUU`<
zNk?muvCSkNNgNio)5>Y;{Fb7IXR`UX$c{#i7;!}SIZ@xgIH?YDCbt9?;ig<Cw7&?A
zgw)Gcj8ogY($Z1ZSSQ9VKKDpCiUZmS+=XmI!9wkQ6;v0gBkurma;Qo99TiHRf6;rk
zAkbNz<aB6Vxp5<`_@uB`QeTI>Kw&!lnOhcg0rY|vihMbG3g7Lm{S>~#zPB}5ml*DG
zX?7)io5%Dck#B~;woUcYO>^kW>BecsUmT9}bAkRP<cd5OaW~7CRGfZd#SGZVqvB7G
zR0$z21M-XgYf8XK@9=l;7R70$Jr4rSpe~Z1;oK#!?zanGbi$zaK)Bp0$Jg{EH*!Yx
z6`v*&JhPJ7=ugePeR~mcPc*IAU_o$j9d$o7P^cLjmF|ISs~ImGxWlq{O==~!iDgb1
zvOh_<<bN_tT+d-3mU{a7j0s{ZM-JEIV$p<&9x^iEGW(1KRfwwY=m=HR!9lff|M0mx
z13iqqIm<}IMGXWN$)F=v&jv^=^6aQ4|9G~z!$x!8!Yy>1xg&zs<~}FcI6ZT$8LS^`
zXQ@7Ffs4o}O}6w1=);kRPG36AS{k!C*?J75_mwM+H(th=)OmhkK+CE5<Og9m(r<TU
zWcI_bQ04c0DcqBkl$4@AwDRj1ncYYlmTq76`a$9C+u^G}LC&)LNPm5}vk6ngSeq_2
zPvasY?ozju1<fwbBt3i74&~{u7=2dX*4xXA0D5D~#_V#3(7&Z~=ghfnd$RN57&a1Q
zKax#$bPRydAZnnH(bvJTR$ON;7Z395rlNAkbYau+-2b3kJ1_i|%ZNGe*ZB;b^LoJ0
zBjis`ZlXkB_7hbKd_g(Eg3lWdyT(~TLX3TxQ2VmT)<dJ0mKT@Na7^fFPB5M9VQM%1
zRg_P%YiNGTKd6LR6!e_NLYur2RQO>Q+HnJ;_hn=I^GDIXV@iNZM;^Y4wBNDiRb^#8
z+<=ghX-D)DJ!nCuOzF`0j??3`KgAi_O~<(}sOR3*)QE2EF^jzoPn=Ez#zINeauOI@
ztQhJrW5vlQk6HKi@+I`Q8L(5BJoWC|H?Hh((E8%>`%kR)0l39;Bx|f%bnFxt$JBOO
zfmR<Wb)nBi*~-1r9VYcwGDAK4VxjCbs05DU_5}HbMcyGNFSt95ejKH|Ga|xyYKiPG
z0!ZO4QO-dF##+?X_frhw?=VmMOvn{0_v?3*s;7U~W@RTQ0jIwsRA!=w|NiS)Joyzl
zr?(hL(<(AH!HN1IA|+<>;EW;p<^y6on*jg#bvMVEJnj5rv??wOHZVU?pT3}$GPY*x
z3xlob9`YhWY--%PYn2h9a*hgPgrbHMXU;?{+cJis{N0EJNssJpEAXeipq_vr$8IZ#
zB(3D*fAHcZeUnnWKa+9ow~{r$QBJh|xiIvVK%W<W%?YRd_v*=Rrh{**ecqVdyrSPx
zcVQKBl5k~%xp{{*A{TfUZy%JlUEPr;rsl&3|D_QcF)wTB)^5D4M#2@>`U%nfA(720
z7nd{ff~isNE8v;(wE?B?4smzRX+6Hyax|=$ox}2`CdR~=YVWXIhDpA9KuKX%mQ|nU
z2~m`QJ&c!!E3;JVGbO*%xu4H#(=Q4S{rdIzm~7a<8U66?9mZ7!`T0x{c$Uf4<d5!7
z8$Uq`qbk7E$NtNOAk*HecP15b2+_05tjPHi7AKR%jQd--alL+i7IrwmL-Kc`sLQ0r
z%rEA5nAJAf^igXMr)+A!@hTvL!E64C9kAnfGooK*-=0qPB{c`%M{Rc9PYwQ8+OVYc
zOqis;S7%j`z-W6I<-pFC{MO9e1i!xaNY;;Or`!B^3r%JBb4x;%wUb8pCG1T3JVh}!
z@{~ZK8MyytMTNbRx3lq{ecKVl?c-H{cA9F|6DTyo7roW^6nJS_?mzi-cnC%sF`jxh
z4{#-}N4IX-9*ZNqRq}x37G&q7JGBmo7Q2pUB{H-AKB-YCXdO@@R}C38$wy_3nI-)#
zXq3F6Kv4LB>sz%OiCHAA38gZyq2ac79QeR}((XD^kd*gJLj5)IasvNi5w7KwDV-Es
zgAN!}Ta-`lWoGH((xoum^+AMdt;F^G$v3*Ici_dO);$R#9F}JpO<z{9!w(V_){Z$E
zQ8-`2mdksY?dIktlmPVmz`JCm43x@ka^si4YJ~$q*^UNF_@<IxW^MR1&_Mt6BgJ?;
z^GjyR%3ux|jU5_veQ0&77-C7(`xg!}G?Wagoag74wLh30vjMfSF9yEC`9><2-^h!K
z+B?Z}+TNYjlJ+|Crh9Api^GX`I63b3>({4`Tu=8vw01^<;!Cl6M>&&&t=!&y8^1VY
zJA&p^hwcV@;Y+KSjhJ`uqQm88BT9jMdc^j?8)UJKxiypmG(`yoS|}I$tFdD_>Fqe@
zZUE(FCKD0|pR9?U+DdYN)e8!{z<tMC4d|Hkb9HFmiwiDMf=xhTqHRJUqvbWg>EKR=
zn*^6I#zNw7NRqT45m_p*3U+@~3+Q?R`MAf(9hQN@1P|kR@GsosG?lMsfAsF!+V7YD
zWc5j%WruVf5(JL}3K}i3TBlAdJogurT)y&FyI>eF5tlC(lPCkvXXTxV%c{`T*B9h9
z91N{tkI$$EuL9wr29ceqxpdq@TYh(1yoqX<IO<eR&oJ$)YJF70&>j(#B<`WFP~4;R
z-`#b5FcW81GV1p4e;fJTq9+O~CbYd;wcI$a_!q4!IXUy#%<w*hVKxN|Y?f+qI0FO<
zwoyh#vfdjsl*24|!-$W^s2HIlC~A>3YR31;NXogC$3%kK&vA$->O;TXQHi2%?Y1DE
ziO7lpLr+|<G0<ts;lTUqW{L-L6U!HU>6l%1`SX0~^~2L`g;|g^YI9v<f<Z#m{Xgs4
z#a<c%LaZe;P5J@Nq6fxdH+iP6(9yU(WP6~^lqttWi)(3U#DeC}e@?-NFSR)<Hb^RT
z^09X^o4;O1aHEEtZmx*jMB0&t`^5H13pY)z2+h-L{OG9>y~<jVMTkd}7Di>$rEml1
zb!{*j-gA5A&Q;l`4oa^-u(!0_1QQPXaP1w#W7^0z96X*8)e?d68vn=RpOaR!I!B61
zWs@Jy1xF=W_H8;}vO;OdFr#?)O`;q1b9t=T-p2=#PdH77hsty?j~%zfw#zTSb-6fp
z3Ry&QvMAYqYRLQ@&JGTjj=9*C8akK55g(>AGdI<_?B^svacoTO#+?P<`NE_Dgn>eM
zmPZ<V-2T0S=V#~SxZ{JkJ(D3Hy{o{AbTV8b5~)o$BAe{JTyi21;NZwZD($IGza?)x
zu41ib`P8@RmchojPCkBPCP&p-CGRK*;P&Ly#a)|vB(?HO?(AeM2DPtmigIjffaGg$
z<#dqfy+tILKnexsLnUJGH1F%hu&H^SC7Q1A%#o4kLJGLZY!rE^8rtz~iO4UON|M$0
z6pp^PF!gR;+(CEUfc`!o+WHt5UfQp8Ea$-;_#5ydZKcJCJB7XL<bR^f`VztCs%Nh_
z@q=&lhj)fgmz={~f!VA$oKpSD)vI@c$5Ba|#R|&@;BMPLQ_ER#H>%sTqWv7w^x*0Z
zek{XW2zp`It>4!nrPa82csX*ptG)er6XLA5+}u4Lb}&mJ$M<`(*+Sb}yB&sbOJ8dg
zzF|8i&19^lN_ohp%+i6le~~A$%LQe+U@uVeq#3p&tqHIlW5@VQL;Tt$y_q06UpQc_
zh&V1A-N*1q&PwS6OO4UbM91PbmwKu*`A;70h+))@;DdTPJL(|9exk54vt)TH101zc
zZ)u|TbcDKE_WE_g^<P_H_$Tp&LT#BAVj|4US;-iTE^zcsA+Cv)F{uEUiyLO~VdjDb
zj^#eyIXCC;30ma8c9hz%Uwj()Hw>t={+=K_`Us+aPc~kFFi=hHaE|k8yJ7#KfDJ_n
zOg)JtjCqc1OsD&p*RdbGSDekf88bAaZ%&j%2wUnU`Yv<4yev|pQm$u<$}49|Nc*^F
zOi4zp2k_`;tQNh>fA(yhM*WSK**$RZZP8%rm2}6#gW1Qju9Pfdt|~CUHiGVZPfyMo
z&|O*CG=*dR(9}_o%|meBxqtt0xtumtP}avUUHPovxva7YKE=LU2bjxRM`lY;y|_1Y
z+znnJZCJz7TENl3X?@<NU8|O4>Fn^ON7?LN<~Q@ePlr!7*<Ej1z1IJ;@1DwuP79qr
zZWV&f>nQfruS>Tsqb3&aolxjJu>5+O<A#GA`Q@vRJ+KCvBJCqsOKH`cN9W8HqsA;d
zzPB{K4GEK9PD^<Tx<2+9>^n{V^NRE(l3%apg#H7Vb^WiSS#6p|FXArMr|C~S^P4=y
zuWG{lkY+Ul@ABafdwUh4puzgdTdBnH;;={t|CG(YJA2kFTxfy8xc!KPa(eHtsR^vT
z71)06Rv^?n{`09tNesvD8!>%gm3eP5n&b%AHsNy8av<P*vQNLKi;P5vjvce{IH1>g
zsCveACyx?%t9DA}=9g){Pb@or02&T|0>T6$z}^MH=ty#al_YDa1G8?ty0~&?n)iYQ
zQ+%gmvyl{;A@%t!BKAdDDLs6uS-{r9<>?N=z4gYgX$)I3%(Rcw>YE6mUYrTs?>%sW
z)5jMhwL5KbUH!B$L<eB2a>fy1)f8c`kez@7SJc%p;C4_N6gg(m6Vqp!wrWGzp<Cal
z87c_9^z})P!CB;w`u&UIifNZ|2|#~G(*j0m?>Wcs+9SQ^3}atK!+NS1v8puHhKYlI
zu8*&8e*9hmM}*3hvwTmyP33!L@~I|RMjB3hZ)pCN$_Q&9?TAg3nIU{KFp73sfJ&NT
z*fB$Yop{%VLF0?1W@fFdZO0yHs;l<xW1D&5+<~+sI!+zz_G3&R#}?87i5-)sXf;gJ
z5hG?egQOXHxI7Sv*G9h+y%#&5+a&)qLHMrON!+o(IH|!@+pV8gegPPeJ3S+uEC%>9
zI8RETURuqJ({PM66)90NtcbX0;OVk(uD|~@H#dj*CtW^2yV)(`7Exw)u(P>&DOI-g
z0nn|}mtajz&9(J+?0(;2W4JHTPe--<j^~<7qkk5SJa?dRBL-WRwz`6}by?>E2T_pi
zj*&YhlB@eYZyml=Azc7>?K<;J`9wHF)eG$<*hc23c;G)1?t1cTKNPb7dY4V~Xo(*P
z@>3Eltpii;j&)|g{;Rj(dbk!9iPJi6TmEYp6iGGUzh1!+9{n}Nw?}@o%Dnul?=Ba8
zfs6*~C`y0cajR>OwXTO!c7~jfY4kSz&%L14v-<afFasdV=zZLMYl)+<%Gd_{tQJ_t
zb;&u#o04%olFrOouwamNE@v>pw?bekbqihvmS`KPC~e5zjQ}<}dU7*dtY<P|up<3w
z){0;oo8PjZh?RYw2|n<yA*VdOAd#lJE%NZ_X`O6Y8-H{VS)1bIXs2z^-K`Rf;=2c&
z6j(XhJ5B_`>WHYjRBuydTR@qdJ+^?6qHPL~>Af+`L-q;0CBu35h6JzjGxn4ltCkOK
z>S?O}FydNiRhda2Tj3B@?Zg$ie!CI}<j!r`g%KXs_PUcC9g|WA&rDYd{(YV#X~1NX
zZ1hc$SQ&e#dgsm5SvOun)tDH$b?S{o+!KUfYc=+aSvDw5aue19R(jPH`Tcm5`Az)D
zAt0>rom)Cj{Vl}=gu_V-0g{OL{UT=G?|fQ*b?9bF@e9kTMl;ig-4lw%&E4L=F>&rW
z-5^TYY~ZDAQKrAEXSeUnqKie==H}}?{-N|L#hES(8bx5mrF28d)Aitq(SBPj4ic4b
zyz1L&3x!7f=L$wd7QR<#rbWdOPwD3-vD9#+<;01<IO&`LvCi3gT5s-y)4FNz_k@RE
zv^jN5Ts&a%==iYW+Uc6gU~=Q@lUI(-$g=r|Zud@~wiMK564NJD%i$Dw_lgsc!b6Z@
zJYv}|<}krQ>X{Z|$n^v`2u6$MuUe!UYj>7m7zRLQ*BRC_k@7Pxpkv{G`1ogLU2PvV
z;0$eL>|hZL>1?N)w{E4cZX8_*l_Tfo{U`|Y#u(yZ=E=X2Uz-_!o5=)+)+(89s{cYI
z6K(Bv<tr?NIRtDTt7O`++@0zn(8}<xGrJCR;a>c`yYqyInC1U(x`~M=v{nb7155)>
z9$jV~FO!9j%(v>T3dJQZAvSg>CH?3vR+AVy2A)5BSX?KZs(*<D)aK41?M%+A)`hyL
zeZD62IN4E~m1oVEVS3;xH!tULzqvev03k;oP-M?e-13!xWw(M!xoPLYXCVksVJd0~
z3x;SM-AWxYc>Wx>zJI@@^3?SjkfQtnBbY9{nk_UX?D2T!Xx#HH@Pz0*xM^NkQlK#{
zlztTh?oajgs_0HmchYyc*wc3F(2t>b_iwwfKrkYdDy;oMvrAvXQxbT`F|N}L>6ix+
zK`BFuv76pGe=dCQ%1IXEMXzbQZsaHlg(()1f$G;PNeIbFNrxZ?E@GCG*T%g~tA8lJ
zU4Dsu+rYrhX!?TjN5GZx7tW_DH^?5q_Vh5T13gAA=>9lr1gR=@VDu1Xk8A46JNrC{
zG6sFEF{q*bIc%-D$wHeIPMnn5#oc@Ks5pbrDDdtv7yV)iZ1AcEBQ7C?7#x`KxS>(y
zoFu*<*#8jIJ_PQfYljaVDrHn6>H7NUfz^-+es3QN`10t?{CzoNxpXy|g_)WCw6w;j
zZa38g>f%KTE_253OqdoWw0jIKE?j_Fo5B;K_t8K8XV_>SDGPcOKC7<dOzkY*I}mIq
zGxKQHTPma2ahF$)T>~Ut(`IH!^)sap3BwHx+)jD5YuM8g)BCWt!FAd+$JfBM746En
zn{2{3lD%<&CAomk2O?|4>PnahHOJR6^v9uDmL%KlMdnU{%JX6=Vb1BCMe1H_NNq35
zDj-TgGqdUeZ-f;}@qf>7GD<6Gw3(n7#R`C+Zj(TCndG-=HD20*bSr(+C~TYp&+FSP
zC?F8^X+cUOA{?@>^M3aJ79ZB7r8P48k=4T6UB7<4w88!S(wl%+evp8bdW_z^DhJd#
zf!PqTT(T`PLfBTiJH0NLVh@Nxzl!ZIy{O>xO5vw`W|mmcaq*3veR7g}6z(<fZr`JX
zm#wJGup!Yz2H!Dt`hkdgCUeKPbs4&DwT%5niI%=fAc6SRILHa!b9fCPLY~qz%igMr
zt@-}&xyJ=n?oLZfBY(a>I_>1FHqo8fbQsN0kFyDc%qM{E#p!?z3vF?Scdvla4b)l@
zjqdJ)J_47$Y*uGrlmLAS(gNKsm{@-`YEOK_1^)tZIN$Cwtb{xrP|i?~c4(;q-zI~}
zgjqn#5mcL}ye5f61yI4_R|<w`ki6R_-y+mzf`h_H56diN!h_^P;0^IUgj_bKt2Y?o
zsL^G}428ngL>>_EM24-*qV)8rEQsm&tYWF_Tt4U><mH^-rU$DJcUqTKBV87r!Kfx9
z?!;exRl{UinQFo3PEbePXE}moMb`7`&iXFGsFL6rYu8^jjJZ081JGoO^uCH2YdEPA
z^#f!~mhQ|Cvv7znvRX_dQR69CJkYcCQc+>Ok|sZwpU(${F)(lJfx&Ml4`qOe4NVaR
zrSf}XGNTFB;vSYtVSp0f!Kz8e1kwy(MW{FYx^hLl>Y_&ORK4vM!Azu%j-62O2sM)%
zLwq(>p|%DTq1$C+S2`cJP$M5U1K1ZdBTg!SmcFho8WHl*G^Va(M)_E}!Nmaq@!7K7
z1=x7hcfLX|A<XOD-1hF=d5U?PUV?M|u$Y8|$xW*U>FVBi{J4=ShfebJ=^E5|8Azyk
zqVbB0NleUL;z85q7bREJ;v`_gix0Mr7=eDyzLB)^M$K;Q*REbo@JJg?S=P*GrW^;>
zJ@L0_E1nKa_;K6*DQ;ei{z+k_obh5ISWkTklyA9o>OHV7ga+-j0N|iEp*|$|Kyzne
zf~sUXoup7cUP^I1Vp9cWZ^=c_3N>|gv=O~bLWcM0Bc&{YWYxd_9EwXK*5P_=H>VW0
zAJxM)IQSe01kE&NKh^Kwlh5T7J|PJONEjy$$4nM_m$OsR_`RW_ZB}grw8Lcg0rq9I
z@yQl!BPd0mimPWu_gQvYQ+mAJ^{t2T41=-Xw_iVi(Fqn7{YNoC>73noAAXL=it|;v
z+<1S^SYqN)$Hc>c>*x~0{cQJ*nCuhRpUT}4XO|1UOjQh_E<W2qlfznp;I3x0_r!A?
zwesW}F;t|Uho$J_I{fsB6EW{M_UD<SZa!*l?94o{^vxS$vWI-cVc4tOGdf2MDReq*
zVI2%5vvuS>FZ8vaD4KE6o}PMZkT6dI4q!dQ&boOpgo*UtK5EHx?#u$L!?pmf$nly2
zdT`mQ^Luy$F1ud!&lk&=z>6s<E9;#Wzg!yMPpv&_mZezbM%`Ir)SXU+$wbJ}B@akh
zaOAC%;SD#}rB8wuExLDl?yj7lN=r-CkIg7yks*#N6{W^KZ9^J{O+eIqnj$AJQ=T}m
z5P(L|GF?#N`M~~TP{6Qddt+lu$j<ES)K1*C1fQ^^;xgg{?v=VpN8cC4rG9Q~q$IbE
z>a&Y^6XvT9&dxj$9u;zKOrA7gF1hqwRTbRDunPeY#pc4`AlW$fK2dpVYStf1P{&8?
zy-(Ex?e4FqLx)UHUwJ<>^N-ZjAzEHAHVA|Q8;2OBXA;z+1Iqd<3$Zq10(xHqYXe8m
z655#^+2wZ~S_;^T_K(!+-H5M~a_61qc{}cMa$PVRRF*RACsd|ZGh*h6(p=@P--*Y#
zk-24_E!2HzLW0{`Qy%%Ry^@mj0gYpU5#k7f8<^LNk4}@Ilyg~*KS|ks@JV0+X^_T!
zz}REPf+Q9bI4v(2E;$;no9kr-r8@v@><+O|yidD~M@d1CDQP%RuMW9iP2cmXX`cr+
z3gAv<CW6sE&l0hTt#+Hd!^Ps()Ybrb1}VC`MyY9e@oj4wYbB5d9sGrAw2?ZhTuEI+
zgAYG7VtE+@&oyvKK`Erm&ct@<+LdJIgI(hLiOWB|(@F~!!Kk3ggGgKb#bfXB<8LTR
z$+DR}bD8`iI3F8aMdAWl@`7F=G4NEsPyj<=rfW_0$B(`~zW}Jc1&EKYDgI+O451wS
z8X)FLwmJSg9{eYD-U)%*%_e8_;v`Sw`lE}=uB(KYIBPiU`>FzY;$H-8B-CVG<2TPM
z^_Wg$1kdJI5H6-&JWD(@Ks|!R8=Dv51e`Q*+~Sw>=+S5Dixx*yd_?H5#mcNNA)?VG
zE@I!G@L+cA7+Lp#(nK$8DGlM~!vay^!v``fgg*+PXgWwT9>y{O=iD6~^TDpR62D0l
z`h%Ia<B94*c(=5K2CRJ`cjS;ELxv6OX6Uz@G6E+O;`F+OD{se)M`cI;Ndx<`vTD!2
z8$KMH;c|)|<n=Sy^EPa8f+~2GuvY_(I(G%qG*4J>RhAP9+kJUIf(oNaBj|k%nv9gV
zAhlIhH?CjrB5c($T=o*4)Bzw0<_yAe;)f6SNf~VkOwj$t!Kx}MRrFYVk@l_E!P>;^
zg^C8aF#&Qe$q?^=SzW!L2P0>Jc6yLm@VYj?mPw1_u2L5X`(410;PnIZP?i!ZgbUtn
zAN>ZO&tk#^T_X{c7J<jtvJ-)$4U|z`w~|g3uNBBJs2}+FyW>*g6uqAfD4clk0tt?Q
z)c`opm%Evjr8ju+LKt;aIcS++_c<jEB@(S5XJUXzg2Y_HA?!rozI|yN_Ay$g!f-q2
z;6RK7+e4dE*ROBi4ehxgFZL}=k1|?(kb9UD39D@0gVQ(g%jEk*Hf3}VDNS%gWTOCY
z#JUQ{%!T8V)eevs8TRprhS0`S@?wfB&w9c838Djv7;5?zBSy45o2JoGbP1d_L^e##
zt%IA&n?^%_CA-@k7~4pmB7=Z!6+D|wNN-}>1lMpV-sOz>uC|5h@RB>lc?jWF3oW9r
z@5>i2z`z8~K2)L{?JMt}x`Kue+fRW(Nyp6KFo%(HK-iHWSufjs{(J*-Pa$|R2qssO
zM{)f}=5dOc#8JVrBVH8tv*_34AF49hiQ0h$O6>pAi@i;`a_iQV$0dB9m9Oo-Dvr{7
z%U1nb=^(F70J{q*{j=Wop9d*HQ*X3<Y5rR|5SW)oo+UW$Z#cb$Jp?DMZ@)|%e++jO
zG|^))g%gMKz@t-*`zBFt{>N+Ut^YGPTwC8FkBlGFp1%$M^KuCsvX&S1F3A|7aQn##
zTX#O$*Y`;_1Eo@Sz~EqqWdZ7BhANkJ)Y{7@5<Q!hz5L1A_b+;+yl8sGmMvS*5s^6@
zBWbrEG6iQlYt>mN(VV(GJ}JEHaAPW4(fY>0Cc^I!;+R3_lWj27Y!)KF`m}OvVGLs7
zAC-Hu05tl|7$|tK#QgGt#kNhyT9jRL&N7daO=L@Rh|ZshSD?0Y#+Vp)1BR@RNlF?P
zB$x^L{iRenDrEA8{b#-M;&#mt-jiQX;`8Z#h3&EQzZx268#?ocF%OU$$|urB{`hvN
zQRNfaAoWdh%yh-O;u>K*NVR#VV)Lu<pZ<O@t)a_OEs&F(zA3!*ZWAgw{ZO--Xx%Hf
zwNWrV4)}rg12x(!idX%|b}POT^O|2PX0<SX`f@R!B`!gkU)9(eR7U;8=(>mE_$pk;
z>k(8%63GO`iu+0ZR@i=rW13&0U@-CLhz6#UElP3jn^XPAx3-2TCS!`ht;^1>Y2rE~
zS^sgJg`c#`xV@&LD+{MvMBV&>gvevR6^|=H7_N!!j$&aKj2%|kZexDfxV-u8zW&El
zi&Myei{_Qbn}@9sv&>}{Eq|*=VeZ398aun&+#}*8y{deSw!+#bdme0xhjr^(5$%|!
zjh*~WM&9z1rY3Ag{};<e*v;HB?GG(N_w+7=W>MWk=mXF>;yLtdvjmEpot>#-SCmMS
zWr{K1aYNyiOw)EnMKQ&hiHH6h)0Wp3J%bJpA-cC1I!$jzy~1Pqxg#!qFdkLSXj@3X
z*NVG-pTf5MSo44FUiMa<M9Bw}|8mJ9JRt=2)|*WV3SR*8E<v`H&1GY6de#?>Amwk|
z;s5;j%5tObx2V$FX{m;Jkv=Lrv}HAP)rSw#xcd(IBFVQ%@t+?dDwr|ZCsi_JZ>L2X
z{)DSoVGk7r1=fZNUUU}P!uiK0ga<T!Wjnv`^1mOiVmh7mWkN$&$nZGcMc;WLwz-J*
zNMm~54HWqUc7|C0|Ec);!$Gtx&`Sr2!$FmxFSv2BE*8C^4}q00RwngOTj+k$>mWJ)
zEr9%ZTLrtemo8ss$*tB7OU60$N=5WDL4UMX@RUPWRMDA8T!rw2R_M1LqecI8KKT9S
z6&TLz^Wy<|C#-&PJv~Q73-$%8WJpfJIr^j_iDXoeenBSNq<qTjhU!&Vbw8reHVEdQ
zS#Xa2r?SJ>h$nowf@?)ZDO5IMOz`*cXv;ZApEQJ};Y53kR}g%2xeG%E{&=Z3f>ep7
zYdJZU@&WzG|HWq6MmA_r#BJNwK}%pQGcT~?A$S)w3ePFz42oK0JVEX|WQgo;_Kxqd
zHb`*YqU_KVqYg<ufBx4{GvVrd|5eGpvVVoS?rY)(^_OE>6<5EM3rPbcY7iW4ISlb*
zTK(sf;(I@bqlHdD91b@YhYmtQ+?f8~UsS<vJ7q0Q?48OE_V&W&BV?4#Hzl11071an
z3m5)Y7Pd67^-JTob>A69GDpl8>tQRxENiDF02M6on>jN!b-fU<WI|CzYg)OjC48AS
z3LB@Pt^IDs<d1rYdRTZ=o>+HMRW+bP5y<4%fhh<Y(~}neIDJ7kxd9*;z@Ne$>VmU2
zJDh{s3QeWmGI*yjv=5Tt(MWs`l|J1&JZB%zext?OT8Gb`y?6C$pCLo+9*_U&Vq^tQ
z%-lId1bxNy;eQ(kJD4BZF!%IcxXDR+=HDc$cBTwi7_zC+EwbNs*RJ0&LME^w=$4Hc
z#9Oa@;N;;UAPZb{bNx<~Lo^%df-y%2A+I)b4-5_I#28EV?AC2Sr!D;T&C)6q<^F%G
zw^gtMJ3F_1doCQDtFwk`c~SFF#8WLDuUh+kx~lMD$k3tGLd)@^HPys2DmA$JNP2oY
z>tQxtwqG_!c%NkLF=HNHzAT)Ag3C-EXHwF9%KVy_v@FccXG3tO8s`S}6*K>43XL>m
zBCWvV*q=<-N3zxn1fGe8V@EopU>L8ohThl8W?1V8FV8Q#Lj+hXvnnwCD!5GB!pI$o
zp-q~-&@sp&EVR#2LGtHqTKy!jjE}n}T@!pA7PmAM1!FizoQ+V+-r8ai8f<8P8ab$y
z>DEcTccP0G=m|ozAZ+6mYH@1ctnGh&KV^1F@Bx6U5FG4vJ3`!YMa;&EirX@KouG|E
zOo(a4m3@o?h+bT_k<RQ9PBMgqkV7pePnH6`f4Fzn?$f7-R|?<>eQzF}*oTR26qIh!
z&rrG^I(SgFk)b@D131&v8&AI9g0&$6pa=nt;fBr!-k)T+wNgpYHJeUiHZF8>G!j?1
zhvLg6HhYGk#=A_e<l^#Ah%WwY7yxqs{VOmheNl)fx+WdLa;%c2m>ObMzyN{?6f%!p
zjEdjj<|kCiPSHO+9PB(5LoabWtDC7cY(L=4#1rp0gnN~@T&0FjW#Gm&0{oG!XIcQ>
zqk*qz$n2KWJjawlG*0<Xsa)+kB2(c?8}h`lwPgw=astO-6^b1}JP6yLv6uk=$ApGn
zrS_cf>wEM;zz-Q(WhBB>8jdMZ1`{6t+582s!0Ca#hm@y62Vj$OX4%rEj7t2zvgKm5
zSgg((zS}Uv#;y5c(@WT%#WjzP7MPIlJO?U35aOeu-2u44Tj1#SeS7vS4-TfoJq*~S
zH%|9^JbPk|6Ot!Oh?cJz6#0;@%r9D-H%1iaHuxl*!J?w67cwk+;{cp+R~H@jTy~@<
zL_TrT?EnsW)_L>gWQXp@B~KS6s7-BK<cBVo9ZI5y_&(jgFDQD6F@+jm1MVJ>1YtQH
z*d3hQA=?Aa2`h_^(9a{88jffZdCl<QWs8g|H~+3sxz<9vq`D4b6GvG9k}NHkS3lco
z*?U$k#;DUSEI-LERkoxL4E^)_$^N7Y7WftjX!FN~bV#IxxS-8o-U7+hZSW}i7qW}`
z+a{hmpbpVXHIBH)xh3Ao9_#$2M(zWymi+f?x%Y67=OJ#WukWBS{Q8|cDl1~Y9)CMP
zdPc`=OTq%qO*g-T!dm4^<x8+P1Olcbj)B+`|2!|Q@J`E#6>h46Lb{&NhVkHRe*E=y
z{-#d_je+l}udK)oU%zf057zEIdt5v`j=#CBU^<`i4_sXX<TE@ZbQwh+Cm&`#PuQ2=
z@{&9Zas0&xbo*6b3|&?tso9HDc(Mm7M2Cbg%$^HPCQwww6&`rv=G3<3K7DLT4%le)
z+xi(@Hk@y#c<{zHyWGWR$E5zT=h&gRFNrfN-W%^X9ozWJw5#{^ZaFXh{fwUUkqIC8
z;=q@wXYRWBXP@exd1uAH9~)Ox4IK12Wq025Q}d@K7(O>{>Y6z8+UIFMzVef>;*77;
z)8>zByrKS^-50+<Sn!Qf#BY?P-yD$m&D#F|^)|DRf9nAGD^08Ksh!{1|JaeOps;E5
YPZf=B{%_`FG=G9g6Q)=kH=D8Uf82kiIRF3v

literal 0
HcmV?d00001

diff --git a/docs/source/assets/deployment/anything-llm-chat-with-doc.png b/docs/assets/deployment/anything-llm-chat-with-doc.png
similarity index 100%
rename from docs/source/assets/deployment/anything-llm-chat-with-doc.png
rename to docs/assets/deployment/anything-llm-chat-with-doc.png
diff --git a/docs/source/assets/deployment/anything-llm-chat-without-doc.png b/docs/assets/deployment/anything-llm-chat-without-doc.png
similarity index 100%
rename from docs/source/assets/deployment/anything-llm-chat-without-doc.png
rename to docs/assets/deployment/anything-llm-chat-without-doc.png
diff --git a/docs/source/assets/deployment/anything-llm-provider.png b/docs/assets/deployment/anything-llm-provider.png
similarity index 100%
rename from docs/source/assets/deployment/anything-llm-provider.png
rename to docs/assets/deployment/anything-llm-provider.png
diff --git a/docs/source/assets/deployment/anything-llm-upload-doc.png b/docs/assets/deployment/anything-llm-upload-doc.png
similarity index 100%
rename from docs/source/assets/deployment/anything-llm-upload-doc.png
rename to docs/assets/deployment/anything-llm-upload-doc.png
diff --git a/docs/source/assets/deployment/architecture_helm_deployment.png b/docs/assets/deployment/architecture_helm_deployment.png
similarity index 100%
rename from docs/source/assets/deployment/architecture_helm_deployment.png
rename to docs/assets/deployment/architecture_helm_deployment.png
diff --git a/docs/source/assets/deployment/chatbox-chat.png b/docs/assets/deployment/chatbox-chat.png
similarity index 100%
rename from docs/source/assets/deployment/chatbox-chat.png
rename to docs/assets/deployment/chatbox-chat.png
diff --git a/docs/source/assets/deployment/chatbox-settings.png b/docs/assets/deployment/chatbox-settings.png
similarity index 100%
rename from docs/source/assets/deployment/chatbox-settings.png
rename to docs/assets/deployment/chatbox-settings.png
diff --git a/docs/source/assets/deployment/dify-chat.png b/docs/assets/deployment/dify-chat.png
similarity index 100%
rename from docs/source/assets/deployment/dify-chat.png
rename to docs/assets/deployment/dify-chat.png
diff --git a/docs/source/assets/deployment/dify-create-chatbot.png b/docs/assets/deployment/dify-create-chatbot.png
similarity index 100%
rename from docs/source/assets/deployment/dify-create-chatbot.png
rename to docs/assets/deployment/dify-create-chatbot.png
diff --git a/docs/source/assets/deployment/dify-settings.png b/docs/assets/deployment/dify-settings.png
similarity index 100%
rename from docs/source/assets/deployment/dify-settings.png
rename to docs/assets/deployment/dify-settings.png
diff --git a/docs/source/assets/deployment/open_webui.png b/docs/assets/deployment/open_webui.png
similarity index 100%
rename from docs/source/assets/deployment/open_webui.png
rename to docs/assets/deployment/open_webui.png
diff --git a/docs/source/assets/deployment/streamlit-chat.png b/docs/assets/deployment/streamlit-chat.png
similarity index 100%
rename from docs/source/assets/deployment/streamlit-chat.png
rename to docs/assets/deployment/streamlit-chat.png
diff --git a/docs/source/assets/design/arch_overview/entrypoints.excalidraw.png b/docs/assets/design/arch_overview/entrypoints.excalidraw.png
similarity index 100%
rename from docs/source/assets/design/arch_overview/entrypoints.excalidraw.png
rename to docs/assets/design/arch_overview/entrypoints.excalidraw.png
diff --git a/docs/source/assets/design/arch_overview/llm_engine.excalidraw.png b/docs/assets/design/arch_overview/llm_engine.excalidraw.png
similarity index 100%
rename from docs/source/assets/design/arch_overview/llm_engine.excalidraw.png
rename to docs/assets/design/arch_overview/llm_engine.excalidraw.png
diff --git a/docs/source/assets/design/hierarchy.png b/docs/assets/design/hierarchy.png
similarity index 100%
rename from docs/source/assets/design/hierarchy.png
rename to docs/assets/design/hierarchy.png
diff --git a/docs/source/assets/design/v1/metrics/intervals-1.png b/docs/assets/design/v1/metrics/intervals-1.png
similarity index 100%
rename from docs/source/assets/design/v1/metrics/intervals-1.png
rename to docs/assets/design/v1/metrics/intervals-1.png
diff --git a/docs/source/assets/design/v1/metrics/intervals-2.png b/docs/assets/design/v1/metrics/intervals-2.png
similarity index 100%
rename from docs/source/assets/design/v1/metrics/intervals-2.png
rename to docs/assets/design/v1/metrics/intervals-2.png
diff --git a/docs/source/assets/design/v1/metrics/intervals-3.png b/docs/assets/design/v1/metrics/intervals-3.png
similarity index 100%
rename from docs/source/assets/design/v1/metrics/intervals-3.png
rename to docs/assets/design/v1/metrics/intervals-3.png
diff --git a/docs/source/assets/design/v1/prefix_caching/example-time-1.png b/docs/assets/design/v1/prefix_caching/example-time-1.png
similarity index 100%
rename from docs/source/assets/design/v1/prefix_caching/example-time-1.png
rename to docs/assets/design/v1/prefix_caching/example-time-1.png
diff --git a/docs/source/assets/design/v1/prefix_caching/example-time-3.png b/docs/assets/design/v1/prefix_caching/example-time-3.png
similarity index 100%
rename from docs/source/assets/design/v1/prefix_caching/example-time-3.png
rename to docs/assets/design/v1/prefix_caching/example-time-3.png
diff --git a/docs/source/assets/design/v1/prefix_caching/example-time-4.png b/docs/assets/design/v1/prefix_caching/example-time-4.png
similarity index 100%
rename from docs/source/assets/design/v1/prefix_caching/example-time-4.png
rename to docs/assets/design/v1/prefix_caching/example-time-4.png
diff --git a/docs/source/assets/design/v1/prefix_caching/example-time-5.png b/docs/assets/design/v1/prefix_caching/example-time-5.png
similarity index 100%
rename from docs/source/assets/design/v1/prefix_caching/example-time-5.png
rename to docs/assets/design/v1/prefix_caching/example-time-5.png
diff --git a/docs/source/assets/design/v1/prefix_caching/example-time-6.png b/docs/assets/design/v1/prefix_caching/example-time-6.png
similarity index 100%
rename from docs/source/assets/design/v1/prefix_caching/example-time-6.png
rename to docs/assets/design/v1/prefix_caching/example-time-6.png
diff --git a/docs/source/assets/design/v1/prefix_caching/example-time-7.png b/docs/assets/design/v1/prefix_caching/example-time-7.png
similarity index 100%
rename from docs/source/assets/design/v1/prefix_caching/example-time-7.png
rename to docs/assets/design/v1/prefix_caching/example-time-7.png
diff --git a/docs/source/assets/design/v1/prefix_caching/free.png b/docs/assets/design/v1/prefix_caching/free.png
similarity index 100%
rename from docs/source/assets/design/v1/prefix_caching/free.png
rename to docs/assets/design/v1/prefix_caching/free.png
diff --git a/docs/source/assets/design/v1/prefix_caching/overview.png b/docs/assets/design/v1/prefix_caching/overview.png
similarity index 100%
rename from docs/source/assets/design/v1/prefix_caching/overview.png
rename to docs/assets/design/v1/prefix_caching/overview.png
diff --git a/docs/source/assets/features/disagg_prefill/abstraction.jpg b/docs/assets/features/disagg_prefill/abstraction.jpg
similarity index 100%
rename from docs/source/assets/features/disagg_prefill/abstraction.jpg
rename to docs/assets/features/disagg_prefill/abstraction.jpg
diff --git a/docs/source/assets/features/disagg_prefill/overview.jpg b/docs/assets/features/disagg_prefill/overview.jpg
similarity index 100%
rename from docs/source/assets/features/disagg_prefill/overview.jpg
rename to docs/assets/features/disagg_prefill/overview.jpg
diff --git a/docs/source/assets/kernel/k_vecs.png b/docs/assets/kernel/k_vecs.png
similarity index 100%
rename from docs/source/assets/kernel/k_vecs.png
rename to docs/assets/kernel/k_vecs.png
diff --git a/docs/source/assets/kernel/key.png b/docs/assets/kernel/key.png
similarity index 100%
rename from docs/source/assets/kernel/key.png
rename to docs/assets/kernel/key.png
diff --git a/docs/source/assets/kernel/logits_vec.png b/docs/assets/kernel/logits_vec.png
similarity index 100%
rename from docs/source/assets/kernel/logits_vec.png
rename to docs/assets/kernel/logits_vec.png
diff --git a/docs/source/assets/kernel/q_vecs.png b/docs/assets/kernel/q_vecs.png
similarity index 100%
rename from docs/source/assets/kernel/q_vecs.png
rename to docs/assets/kernel/q_vecs.png
diff --git a/docs/source/assets/kernel/query.png b/docs/assets/kernel/query.png
similarity index 100%
rename from docs/source/assets/kernel/query.png
rename to docs/assets/kernel/query.png
diff --git a/docs/source/assets/kernel/v_vec.png b/docs/assets/kernel/v_vec.png
similarity index 100%
rename from docs/source/assets/kernel/v_vec.png
rename to docs/assets/kernel/v_vec.png
diff --git a/docs/source/assets/kernel/value.png b/docs/assets/kernel/value.png
similarity index 100%
rename from docs/source/assets/kernel/value.png
rename to docs/assets/kernel/value.png
diff --git a/docs/source/assets/logos/vllm-logo-only-light.ico b/docs/assets/logos/vllm-logo-only-light.ico
similarity index 100%
rename from docs/source/assets/logos/vllm-logo-only-light.ico
rename to docs/assets/logos/vllm-logo-only-light.ico
diff --git a/docs/source/assets/logos/vllm-logo-only-light.png b/docs/assets/logos/vllm-logo-only-light.png
similarity index 100%
rename from docs/source/assets/logos/vllm-logo-only-light.png
rename to docs/assets/logos/vllm-logo-only-light.png
diff --git a/docs/source/assets/logos/vllm-logo-text-dark.png b/docs/assets/logos/vllm-logo-text-dark.png
similarity index 100%
rename from docs/source/assets/logos/vllm-logo-text-dark.png
rename to docs/assets/logos/vllm-logo-text-dark.png
diff --git a/docs/source/assets/logos/vllm-logo-text-light.png b/docs/assets/logos/vllm-logo-text-light.png
similarity index 100%
rename from docs/source/assets/logos/vllm-logo-text-light.png
rename to docs/assets/logos/vllm-logo-text-light.png
diff --git a/docs/source/community/meetups.md b/docs/community/meetups.md
similarity index 98%
rename from docs/source/community/meetups.md
rename to docs/community/meetups.md
index aa1a71c86c0a6..2c47be443a5e9 100644
--- a/docs/source/community/meetups.md
+++ b/docs/community/meetups.md
@@ -1,6 +1,7 @@
-(meetups)=
-
-# vLLM Meetups
+---
+title: vLLM Meetups
+---
+[](){ #meetups }
 
 We host regular meetups in San Francisco Bay Area every 2 months. We will share the project updates from the vLLM team and have guest speakers from the industry to share their experience and insights. Please find the materials of our previous meetups below:
 
diff --git a/docs/source/community/sponsors.md b/docs/community/sponsors.md
similarity index 100%
rename from docs/source/community/sponsors.md
rename to docs/community/sponsors.md
diff --git a/docs/source/contributing/deprecation_policy.md b/docs/contributing/deprecation_policy.md
similarity index 100%
rename from docs/source/contributing/deprecation_policy.md
rename to docs/contributing/deprecation_policy.md
diff --git a/docs/source/contributing/dockerfile/dockerfile.md b/docs/contributing/dockerfile/dockerfile.md
similarity index 89%
rename from docs/source/contributing/dockerfile/dockerfile.md
rename to docs/contributing/dockerfile/dockerfile.md
index 90b9a33cfbe62..3765996cb03f2 100644
--- a/docs/source/contributing/dockerfile/dockerfile.md
+++ b/docs/contributing/dockerfile/dockerfile.md
@@ -1,7 +1,7 @@
 # Dockerfile
 
 We provide a <gh-file:docker/Dockerfile> to construct the image for running an OpenAI compatible server with vLLM.
-More information about deploying with Docker can be found [here](#deployment-docker).
+More information about deploying with Docker can be found [here][deployment-docker].
 
 Below is a visual representation of the multi-stage Dockerfile. The build graph contains the following nodes:
 
@@ -17,11 +17,9 @@ The edges of the build graph represent:
 
 - `RUN --mount=(.\*)from=...` dependencies (with a dotted line and an empty diamond arrow head)
 
-  > :::{figure} /assets/contributing/dockerfile-stages-dependency.png
-  > :align: center
-  > :alt: query
-  > :width: 100%
-  > :::
+  > <figure markdown="span">
+  >   ![](../../assets/contributing/dockerfile-stages-dependency.png){ align="center" alt="query" width="100%" }
+  > </figure>
   >
   > Made using: <https://github.com/patrickhoefler/dockerfilegraph>
   >
diff --git a/docs/contributing/model/README.md b/docs/contributing/model/README.md
new file mode 100644
index 0000000000000..b7727f02c11bf
--- /dev/null
+++ b/docs/contributing/model/README.md
@@ -0,0 +1,23 @@
+---
+title: Adding a New Model
+---
+[](){ #new-model }
+
+This section provides more information on how to integrate a [PyTorch](https://pytorch.org/) model into vLLM.
+
+Contents:
+
+- [Basic](basic.md)
+- [Registration](registration.md)
+- [Tests](tests.md)
+- [Multimodal](multimodal.md)
+
+!!! note
+    The complexity of adding a new model depends heavily on the model's architecture.
+    The process is considerably straightforward if the model shares a similar architecture with an existing model in vLLM.
+    However, for models that include new operators (e.g., a new attention mechanism), the process can be a bit more complex.
+
+!!! tip
+    If you are encountering issues while integrating your model into vLLM, feel free to open a [GitHub issue](https://github.com/vllm-project/vllm/issues)
+    or ask on our [developer slack](https://slack.vllm.ai).
+    We will be happy to help you out!
diff --git a/docs/source/contributing/model/basic.md b/docs/contributing/model/basic.md
similarity index 87%
rename from docs/source/contributing/model/basic.md
rename to docs/contributing/model/basic.md
index 1fa56dc4728d3..0c0ba33792578 100644
--- a/docs/source/contributing/model/basic.md
+++ b/docs/contributing/model/basic.md
@@ -1,6 +1,7 @@
-(new-model-basic)=
-
-# Implementing a Basic Model
+---
+title: Implementing a Basic Model
+---
+[](){ #new-model-basic }
 
 This guide walks you through the steps to implement a basic vLLM model.
 
@@ -10,9 +11,8 @@ First, clone the PyTorch model code from the source repository.
 For instance, vLLM's [OPT model](gh-file:vllm/model_executor/models/opt.py) was adapted from
 HuggingFace's [modeling_opt.py](https://github.com/huggingface/transformers/blob/main/src/transformers/models/opt/modeling_opt.py) file.
 
-:::{warning}
-Make sure to review and adhere to the original code's copyright and licensing terms!
-:::
+!!! warning
+    Make sure to review and adhere to the original code's copyright and licensing terms!
 
 ## 2. Make your code compatible with vLLM
 
@@ -67,7 +67,7 @@ class MyModel(nn.Module):
         ... 
 ```
 
-- Rewrite the {meth}`~torch.nn.Module.forward` method of your model to remove any unnecessary code, such as training-specific code. Modify the input parameters to treat `input_ids` and `positions` as flattened tensors with a single batch size dimension, without a max-sequence length dimension.
+- Rewrite the [forward][torch.nn.Module.forward] method of your model to remove any unnecessary code, such as training-specific code. Modify the input parameters to treat `input_ids` and `positions` as flattened tensors with a single batch size dimension, without a max-sequence length dimension.
 
 ```python
 def forward(
@@ -78,10 +78,9 @@ def forward(
     ...
 ```
 
-:::{note}
-Currently, vLLM supports the basic multi-head attention mechanism and its variant with rotary positional embeddings.
-If your model employs a different attention mechanism, you will need to implement a new attention layer in vLLM.
-:::
+!!! note
+    Currently, vLLM supports the basic multi-head attention mechanism and its variant with rotary positional embeddings.
+    If your model employs a different attention mechanism, you will need to implement a new attention layer in vLLM.
 
 For reference, check out our [Llama implementation](gh-file:vllm/model_executor/models/llama.py). vLLM already supports a large number of models. It is recommended to find a model similar to yours and adapt it to your model's architecture. Check out <gh-dir:vllm/model_executor/models> for more examples.
 
@@ -89,7 +88,7 @@ For reference, check out our [Llama implementation](gh-file:vllm/model_executor/
 
 If your model is too large to fit into a single GPU, you can use tensor parallelism to manage it.
 To do this, substitute your model's linear and embedding layers with their tensor-parallel versions.
-For the embedding layer, you can simply replace {class}`torch.nn.Embedding` with `VocabParallelEmbedding`. For the output LM head, you can use `ParallelLMHead`.
+For the embedding layer, you can simply replace [torch.nn.Embedding][] with `VocabParallelEmbedding`. For the output LM head, you can use `ParallelLMHead`.
 When it comes to the linear layers, we provide the following options to parallelize them:
 
 - `ReplicatedLinear`: Replicates the inputs and weights across multiple GPUs. No memory saving.
@@ -107,7 +106,7 @@ This method should load the weights from the HuggingFace's checkpoint file and a
 
 ## 5. Register your model
 
-See [this page](#new-model-registration) for instructions on how to register your new model to be used by vLLM.
+See [this page][new-model-registration] for instructions on how to register your new model to be used by vLLM.
 
 ## Frequently Asked Questions
 
diff --git a/docs/contributing/model/multimodal.md b/docs/contributing/model/multimodal.md
new file mode 100644
index 0000000000000..892ab9098407c
--- /dev/null
+++ b/docs/contributing/model/multimodal.md
@@ -0,0 +1,803 @@
+---
+title: Multi-Modal Support
+---
+[](){ #supports-multimodal }
+
+This document walks you through the steps to extend a basic model so that it accepts [multi-modal inputs][multimodal-inputs].
+
+## 1. Update the base vLLM model
+
+It is assumed that you have already implemented the model in vLLM according to [these steps][new-model-basic].
+Further update the model as follows:
+
+- Reserve a keyword parameter in [forward][torch.nn.Module.forward] for each input tensor that corresponds to a multi-modal input, as shown in the following example:
+
+  ```diff
+    def forward(
+        self,
+        input_ids: torch.Tensor,
+        positions: torch.Tensor,
+  +     pixel_values: torch.Tensor,
+    ) -> SamplerOutput:
+  ```
+  
+  More conveniently, you can simply pass `**kwargs` to the [forward][torch.nn.Module.forward] method and retrieve the keyword parameters for multimodal inputs from it.
+
+- Implement [get_multimodal_embeddings][vllm.model_executor.models.interfaces.SupportsMultiModal.get_multimodal_embeddings] that returns the embeddings from running the multimodal inputs through the multimodal tokenizer of the model. Below we provide a boilerplate of a typical implementation pattern, but feel free to adjust it to your own needs.
+
+    ```python
+    class YourModelForImage2Seq(nn.Module):
+        ...
+
+        def _process_image_input(self, image_input: YourModelImageInputs) -> torch.Tensor:
+
+            assert self.vision_encoder is not None
+            image_features = self.vision_encoder(image_input)
+            return self.multi_modal_projector(image_features)
+
+        def get_multimodal_embeddings(
+                self, **kwargs: object) -> Optional[MultiModalEmbeddings]:
+
+            # Validate the multimodal input keyword arguments
+            image_input = self._parse_and_validate_image_input(**kwargs)
+            if image_input is None:
+                return None
+
+            # Run multimodal inputs through encoder and projector
+            vision_embeddings = self._process_image_input(image_input)
+            return vision_embeddings
+    ```
+
+!!! warning
+        The returned `multimodal_embeddings` must be either a **3D [torch.Tensor][]** of shape `(num_items, feature_size, hidden_size)`, or a **list / tuple of 2D [torch.Tensor][]'s** of shape `(feature_size, hidden_size)`, so that `multimodal_embeddings[i]` retrieves the embeddings generated from the `i`-th multimodal data item (e.g, image) of the request.
+
+- Implement [get_input_embeddings][vllm.model_executor.models.interfaces.SupportsMultiModal.get_input_embeddings] to merge `multimodal_embeddings` with text embeddings from the `input_ids`. If input processing for the model is implemented correctly (see sections below), then you can leverage the utility function we provide to easily merge the embeddings.
+
+    ```python
+    from .utils import merge_multimodal_embeddings
+
+    class YourModelForImage2Seq(nn.Module):
+        ...
+
+        def get_input_embeddings(
+            self,
+            input_ids: torch.Tensor,
+            multimodal_embeddings: Optional[MultiModalEmbeddings] = None,
+        ) -> torch.Tensor:
+
+            # `get_input_embeddings` should already be implemented for the language 
+            # model as one of the requirements of basic vLLM model implementation.
+            inputs_embeds = self.language_model.get_input_embeddings(input_ids)
+
+            if multimodal_embeddings is not None:
+                inputs_embeds = merge_multimodal_embeddings(
+                    input_ids=input_ids, 
+                    inputs_embeds=inputs_embeds, 
+                    multimodal_embeddings=multimodal_embeddings,
+                    placeholder_token_id=self.config.image_token_index)
+
+            return inputs_embeds
+    ```
+
+- Implement [get_language_model][vllm.model_executor.models.interfaces.SupportsMultiModal.get_language_model] getter to provide stable access to the underlying language model.
+
+    ```python
+    class YourModelForImage2Seq(nn.Module):
+        ...
+
+        def get_language_model(self) -> torch.nn.Module:
+            # Change `language_model` according to your implementation.
+            return self.language_model
+    ```
+
+- Once the above steps are done, update the model class with the [SupportsMultiModal][vllm.model_executor.models.interfaces.SupportsMultiModal] interface.
+
+  ```diff
+  + from vllm.model_executor.models.interfaces import SupportsMultiModal
+
+  - class YourModelForImage2Seq(nn.Module):
+  + class YourModelForImage2Seq(nn.Module, SupportsMultiModal):
+  ```
+
+!!! note
+      The model class does not have to be named `*ForCausalLM`.
+      Check out [the HuggingFace Transformers documentation](https://huggingface.co/docs/transformers/model_doc/auto#multimodal) for some examples.
+
+## 2. Specify processing information
+
+Next, create a subclass of [BaseProcessingInfo][vllm.multimodal.processing.BaseProcessingInfo]
+to provide basic information related to HF processing.
+
+### Maximum number of input items
+
+You need to override the abstract method [get_supported_mm_limits][vllm.multimodal.processing.BaseProcessingInfo.get_supported_mm_limits]
+to return the maximum number of input items for each modality supported by the model.
+
+For example, if the model supports any number of images but only one video per prompt:
+
+```python
+def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]:
+    return {"image": None, "video": 1}
+```
+
+## 3. Specify dummy inputs
+
+Then, inherit [BaseDummyInputsBuilder][vllm.multimodal.profiling.BaseDummyInputsBuilder] to construct dummy inputs for
+HF processing as well as memory profiling.
+
+### For memory profiling
+
+Override the abstract methods [get_dummy_text][vllm.multimodal.profiling.BaseDummyInputsBuilder.get_dummy_text] and [get_dummy_mm_data][vllm.multimodal.profiling.BaseDummyInputsBuilder.get_dummy_mm_data] to construct dummy inputs for memory profiling. These dummy inputs should result in the worst-case memory usage of the model so that vLLM can reserve the correct amount of memory for it.
+
+Assuming that the memory usage increases with the number of tokens, the dummy inputs can be constructed to maximize the number of output embeddings, which is the same number as placeholder feature tokens.
+
+=== "Basic example: LLaVA"
+
+    Looking at the code of HF's `LlavaForConditionalGeneration`:
+
+    ```python
+    # https://github.com/huggingface/transformers/blob/v4.47.1/src/transformers/models/llava/modeling_llava.py#L530-L544
+    n_image_tokens = (input_ids == self.config.image_token_index).sum().item()
+    n_image_features = image_features.shape[0] * image_features.shape[1]
+
+    if n_image_tokens != n_image_features:
+        raise ValueError(
+            f"Image features and image tokens do not match: tokens: {n_image_tokens}, features {n_image_features}"
+        )
+    special_image_mask = (
+        (input_ids == self.config.image_token_index)
+        .unsqueeze(-1)
+        .expand_as(inputs_embeds)
+        .to(inputs_embeds.device)
+    )
+    image_features = image_features.to(inputs_embeds.device, inputs_embeds.dtype)
+    inputs_embeds = inputs_embeds.masked_scatter(special_image_mask, image_features)
+    ```
+
+    The number of placeholder feature tokens per image is `image_features.shape[1]`.
+    `image_features` is calculated inside the `get_image_features` method:
+
+    ```python
+    # https://github.com/huggingface/transformers/blob/v4.47.1/src/transformers/models/llava/modeling_llava.py#L290-L300
+    image_outputs = self.vision_tower(pixel_values, output_hidden_states=True)
+
+    selected_image_feature = image_outputs.hidden_states[vision_feature_layer]
+    if vision_feature_select_strategy == "default":
+        selected_image_feature = selected_image_feature[:, 1:]
+    elif vision_feature_select_strategy == "full":
+        selected_image_feature = selected_image_feature
+    else:
+        raise ValueError(f"Unexpected select feature strategy: {self.config.vision_feature_select_strategy}")
+    image_features = self.multi_modal_projector(selected_image_feature)
+    return image_features
+    ```
+
+    We can infer that `image_features.shape[1]` is based on `image_outputs.hidden_states.shape[1]` from the vision tower
+    (`CLIPVisionModel` for the [`llava-hf/llava-1.5-7b-hf`](https://huggingface.co/llava-hf/llava-1.5-7b-hf) model).
+    Moreover, we only need the sequence length (the second dimension of the tensor) to get `image_features.shape[1]`.
+    The sequence length is determined by the initial hidden states in `CLIPVisionTransformer` since the attention
+    mechanism doesn't change the sequence length of the output hidden states.
+
+    ```python
+    # https://github.com/huggingface/transformers/blob/v4.47.1/src/transformers/models/clip/modeling_clip.py#L1094-L1102
+    hidden_states = self.embeddings(pixel_values, interpolate_pos_encoding=interpolate_pos_encoding)
+    hidden_states = self.pre_layrnorm(hidden_states)
+
+    encoder_outputs = self.encoder(
+        inputs_embeds=hidden_states,
+        output_attentions=output_attentions,
+        output_hidden_states=output_hidden_states,
+        return_dict=return_dict,
+    )
+    ```
+
+    To find the sequence length, we turn to the code of `CLIPVisionEmbeddings`:
+
+    ```python
+    # https://github.com/huggingface/transformers/blob/v4.47.1/src/transformers/models/clip/modeling_clip.py#L247-L257
+    target_dtype = self.patch_embedding.weight.dtype
+    patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype))  # shape = [*, width, grid, grid]
+    patch_embeds = patch_embeds.flatten(2).transpose(1, 2)
+
+    class_embeds = self.class_embedding.expand(batch_size, 1, -1)
+    embeddings = torch.cat([class_embeds, patch_embeds], dim=1)
+    if interpolate_pos_encoding:
+        embeddings = embeddings + self.interpolate_pos_encoding(embeddings, height, width)
+    else:
+        embeddings = embeddings + self.position_embedding(self.position_ids)
+    return embeddings
+    ```
+
+    We can infer that `embeddings.shape[1] == self.num_positions`, where
+
+    ```python
+    # https://github.com/huggingface/transformers/blob/v4.47.1/src/transformers/models/clip/modeling_clip.py#L195-L196
+    self.num_patches = (self.image_size // self.patch_size) ** 2
+    self.num_positions = self.num_patches + 1
+    ```
+
+    Overall, the number of placeholder feature tokens for an image can be calculated as:
+
+    ```python
+    def get_num_image_tokens(
+        self,
+        *,
+        image_width: int,
+        image_height: int,
+    ) -> int:
+        hf_config = self.get_hf_config()
+        hf_processor = self.get_hf_processor()
+
+        image_size = hf_config.vision_config.image_size
+        patch_size = hf_config.vision_config.patch_size
+
+        num_image_tokens = (image_size // patch_size) ** 2 + 1
+        if hf_processor.vision_feature_select_strategy == "default":
+            num_image_tokens -= 1
+
+        return num_image_tokens
+    ```
+
+    Notice that the number of image tokens doesn't depend on the image width and height.
+    We can simply use a dummy `image_size` to calculate the multimodal profiling data:
+
+    ```python
+    # NOTE: In actuality, this is usually implemented as part of the
+    # model's subclass of `BaseProcessingInfo`, but we show it as is
+    # here for simplicity.
+    def get_image_size_with_most_features(self) -> ImageSize:
+        hf_config = self.get_hf_config()
+        width = height = hf_config.image_size
+        return ImageSize(width=width, height=height)
+
+    def get_dummy_mm_data(
+        self,
+        seq_len: int,
+        mm_counts: Mapping[str, int],
+    ) -> MultiModalDataDict:
+        num_images = mm_counts.get("image", 0)
+
+        target_width, target_height = \
+            self.info.get_image_size_with_most_features()
+
+        return {
+            "image":
+            self._get_dummy_images(width=target_width,
+                                   height=target_height,
+                                   num_images=num_images)
+        }
+    ```
+
+    For the text, we simply expand the multimodal image token from the model config to match the desired number of images.
+
+    ```python
+    def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str:
+        num_images = mm_counts.get("image", 0)
+
+        processor = self.info.get_hf_processor()
+        image_token = processor.image_token
+
+        return image_token * num_images
+    ```
+
+=== "No input placeholders: Fuyu"
+
+    Looking at the code of HF's `FuyuForCausalLM`:
+
+    ```python
+    # https://github.com/huggingface/transformers/blob/v4.48.3/src/transformers/models/fuyu/modeling_fuyu.py#L311-L322
+    if image_patches is not None and past_key_values is None:
+        patch_embeddings = [
+            self.vision_embed_tokens(patch.to(self.vision_embed_tokens.weight.dtype))
+            .squeeze(0)
+            .to(inputs_embeds.device)
+            for patch in image_patches
+        ]
+        inputs_embeds = self.gather_continuous_embeddings(
+            word_embeddings=inputs_embeds,
+            continuous_embeddings=patch_embeddings,
+            image_patch_input_indices=image_patches_indices,
+        )
+    ```
+
+    The number of placeholder feature tokens for the `i`th item in the batch is `patch_embeddings[i].shape[0]`,
+    which is the same as `image_patches[i].shape[0]`, i.e. `num_total_patches`.
+
+    Unlike LLaVA, Fuyu does not define the number of patches inside the modeling file. Where can we get more information?
+    Considering that the model input comes from the output of `FuyuProcessor`, let's **look at the preprocessing files**.
+
+    The image outputs are obtained by calling `FuyuImageProcessor.preprocess` and then
+    `FuyuImageProcessor.preprocess_with_tokenizer_info` inside `FuyuProcessor`.
+
+    In `FuyuImageProcessor.preprocess`, the images are resized and padded to the target `FuyuImageProcessor.size`,
+    returning the dimensions after resizing (but before padding) as metadata.
+
+    ```python
+    # https://github.com/huggingface/transformers/blob/v4.48.3/src/transformers/models/fuyu/processing_fuyu.py#L541-L544
+    image_encoding = self.image_processor.preprocess(images, **output_kwargs["images_kwargs"])
+    batch_images = image_encoding["images"]
+    image_unpadded_heights = image_encoding["image_unpadded_heights"]
+    image_unpadded_widths = image_encoding["image_unpadded_widths"]
+
+    # https://github.com/huggingface/transformers/blob/v4.48.3/src/transformers/models/fuyu/image_processing_fuyu.py#L480-L
+    if do_resize:
+        batch_images = [
+            [self.resize(image, size=size, input_data_format=input_data_format) for image in images]
+            for images in batch_images
+        ]
+
+    image_sizes = [get_image_size(images[0], channel_dim=input_data_format) for images in batch_images]
+    image_unpadded_heights = [[image_size[0]] for image_size in image_sizes]
+    image_unpadded_widths = [[image_size[1]] for image_size in image_sizes]
+
+    if do_pad:
+        batch_images = [
+            [
+                self.pad_image(
+                    image,
+                    size=size,
+                    mode=padding_mode,
+                    constant_values=padding_value,
+                    input_data_format=input_data_format,
+                )
+                for image in images
+            ]
+            for images in batch_images
+        ]
+    ```
+
+    In `FuyuImageProcessor.preprocess_with_tokenizer_info`, the images are split into patches based on this metadata:
+
+    ```python
+    # https://github.com/huggingface/transformers/blob/v4.48.3/src/transformers/models/fuyu/processing_fuyu.py#L417-L425
+    model_image_input = self.image_processor.preprocess_with_tokenizer_info(
+        image_input=tensor_batch_images,
+        image_present=image_present,
+        image_unpadded_h=image_unpadded_heights,
+        image_unpadded_w=image_unpadded_widths,
+        image_placeholder_id=image_placeholder_id,
+        image_newline_id=image_newline_id,
+        variable_sized=True,
+    )
+
+    # https://github.com/huggingface/transformers/blob/v4.48.3/src/transformers/models/fuyu/image_processing_fuyu.py#L638-L658
+    image_height, image_width = image.shape[1], image.shape[2]
+    if variable_sized:  # variable_sized=True
+        new_h = min(
+            image_height,
+            math.ceil(image_unpadded_h[batch_index, subseq_index] / patch_height) * patch_height,
+        )
+        new_w = min(
+            image_width,
+            math.ceil(image_unpadded_w[batch_index, subseq_index] / patch_width) * patch_width,
+        )
+        image = image[:, :new_h, :new_w]
+        image_height, image_width = new_h, new_w
+
+    num_patches = self.get_num_patches(image_height=image_height, image_width=image_width)
+    tensor_of_image_ids = torch.full(
+        [num_patches], image_placeholder_id, dtype=torch.int32, device=image_input.device
+    )
+    patches = self.patchify_image(image=image.unsqueeze(0)).squeeze(0)
+    assert num_patches == patches.shape[0]
+    ```
+
+    The number of patches is in turn defined by `FuyuImageProcessor.get_num_patches`:
+
+    ```python
+    # https://github.com/huggingface/transformers/blob/v4.48.3/src/transformers/models/fuyu/image_processing_fuyu.py#L552-L562
+    patch_size = patch_size if patch_size is not None else self.patch_size
+    patch_height, patch_width = self.patch_size["height"], self.patch_size["width"]
+
+    if image_height % patch_height != 0:
+        raise ValueError(f"{image_height=} must be divisible by {patch_height}")
+    if image_width % patch_width != 0:
+        raise ValueError(f"{image_width=} must be divisible by {patch_width}")
+
+    num_patches_per_dim_h = image_height // patch_height
+    num_patches_per_dim_w = image_width // patch_width
+    num_patches = num_patches_per_dim_h * num_patches_per_dim_w
+    ```
+
+    These image patches correspond to placeholder tokens (`|SPEAKER|`). So, we just need to maximize the number of image patches. Since input images are first resized
+    to fit within `image_processor.size`, we can maximize the number of image patches by inputting an image with size equal to `image_processor.size`.
+
+    ```python
+    def get_image_size_with_most_features(self) -> ImageSize:
+        image_processor = self.get_image_processor()
+        return ImageSize(width=image_processor.size["width"],
+                            height=image_processor.size["height"])
+    ```
+
+    Fuyu does not expect image placeholders in the inputs to HF processor, so
+    the dummy prompt text is empty regardless of the number of images.
+
+    ```python
+    def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str:
+        return ""
+    ```
+
+    For the multimodal image profiling data, the logic is very similar to LLaVA:
+
+    ```python
+    def get_dummy_mm_data(
+        self,
+        seq_len: int,
+        mm_counts: Mapping[str, int],
+    ) -> MultiModalDataDict:
+        target_width, target_height = \
+            self.info.get_image_size_with_most_features()
+        num_images = mm_counts.get("image", 0)
+
+        return {
+            "image":
+            self._get_dummy_images(width=target_width,
+                                   height=target_height,
+                                   num_images=num_images)
+        }
+    ```
+
+## 4. Specify processing details
+
+Afterwards, create a subclass of [BaseMultiModalProcessor][vllm.multimodal.processing.BaseMultiModalProcessor]
+to fill in the missing details about HF processing.
+
+!!! info
+    [Multi-Modal Data Processing][mm-processing]
+
+### Multi-modal fields
+
+Override [_get_mm_fields_config][vllm.multimodal.processing.BaseMultiModalProcessor._get_mm_fields_config] to
+return a schema of the tensors outputted by the HF processor that are related to the input multi-modal items.
+
+=== "Basic example: LLaVA"
+
+    The output of `CLIPImageProcessor` is a simple tensor with shape
+    `(num_images, num_channels, image_height, image_width)`:
+
+    ```python
+    # https://github.com/huggingface/transformers/blob/v4.47.1/src/transformers/models/clip/image_processing_clip.py#L339-L345
+    images = [
+        to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format)
+        for image in all_images
+    ]
+
+    data = {"pixel_values": images}
+    return BatchFeature(data=data, tensor_type=return_tensors)
+    ```
+
+    So, we override [_get_mm_fields_config][vllm.multimodal.processing.BaseMultiModalProcessor._get_mm_fields_config] as follows:
+
+    ```python
+    def _get_mm_fields_config(
+        self,
+        hf_inputs: BatchFeature,
+        hf_processor_mm_kwargs: Mapping[str, object],
+    ) -> Mapping[str, MultiModalFieldConfig]:
+        return dict(
+            pixel_values=MultiModalFieldConfig.batched("image"),
+        )
+    ```
+
+    !!! note
+        Our [actual code](gh-file:vllm/model_executor/models/llava.py) additionally supports
+        pre-computed image embeddings, which can be passed to be model via the `image_embeds` argument.
+
+=== "With postprocessing: Fuyu"
+
+    The `image_patches` output of `FuyuImageProcessor.preprocess_with_tokenizer_info` concatenates
+    the patches from each image belonging to an item in the batch:
+
+    ```python
+    # https://github.com/huggingface/transformers/blob/v4.48.3/src/transformers/models/fuyu/image_processing_fuyu.py#L673-L679
+            image_input_ids.append(tensor_of_image_ids)
+            image_patches.append(patches)
+        else:
+            image_input_ids.append(torch.tensor([], dtype=torch.int32, device=image_input.device))
+
+    batch_image_input_ids.append(image_input_ids)
+    batch_image_patches.append(image_patches)
+    ```
+
+    The shape of `image_patches` outputted by `FuyuImageProcessor` is therefore
+    `(1, num_images, num_patches, patch_width * patch_height * num_channels)`.
+
+    In order to support the use of [MultiModalFieldConfig.batched][] like in LLaVA,
+    we remove the extra batch dimension by overriding [BaseMultiModalProcessor._call_hf_processor][]:
+
+    ```python
+    def _call_hf_processor(
+        self,
+        prompt: str,
+        mm_data: Mapping[str, object],
+        mm_kwargs: Mapping[str, object],
+    ) -> BatchFeature:
+        processed_outputs = super()._call_hf_processor(
+            prompt=prompt,
+            mm_data=mm_data,
+            mm_kwargs=mm_kwargs,
+        )
+
+        image_patches = processed_outputs.get("image_patches")
+        if image_patches is not None:
+            images = mm_data["images"]
+            assert isinstance(images, list)
+
+            # Original output: (1, num_images, Pn, Px * Py * C)
+            # New output: (num_images, Pn, Px * Py * C)
+            assert (isinstance(image_patches, list)
+                    and len(image_patches) == 1)
+            assert (isinstance(image_patches[0], torch.Tensor)
+                    and len(image_patches[0]) == len(images))
+
+            processed_outputs["image_patches"] = image_patches[0]
+
+        return processed_outputs
+    ```
+
+    !!! note
+        Our [actual code](gh-file:vllm/model_executor/models/fuyu.py) has special handling
+        for text-only inputs to prevent unnecessary warnings from HF processor.
+
+    This lets us override [_get_mm_fields_config][vllm.multimodal.processing.BaseMultiModalProcessor._get_mm_fields_config] as follows:
+
+    ```python
+    def _get_mm_fields_config(
+        self,
+        hf_inputs: BatchFeature,
+        hf_processor_mm_kwargs: Mapping[str, object],
+    ) -> Mapping[str, MultiModalFieldConfig]:
+        return dict(image_patches=MultiModalFieldConfig.batched("image"))
+    ```
+
+### Prompt updates
+
+Override [_get_prompt_updates][vllm.multimodal.processing.BaseMultiModalProcessor._get_prompt_updates] to
+return a list of [PromptUpdate][vllm.multimodal.processing.PromptUpdate] instances.
+
+Each [PromptUpdate][vllm.multimodal.processing.PromptUpdate] instance specifies an update operation
+(e.g.: insertion, replacement) performed by the HF processor.
+
+=== "Basic example: LLaVA"
+
+    Looking at HF's `LlavaProcessor`:
+
+    ```python
+    # https://github.com/huggingface/transformers/blob/v4.47.1/src/transformers/models/llava/processing_llava.py#L167-L170
+    prompt_strings = []
+    for sample in text:
+        sample = sample.replace(self.image_token, self.image_token * num_image_tokens)
+        prompt_strings.append(sample)
+    ```
+
+    It simply repeats each input `image_token` a number of times equal to the number of placeholder feature tokens (`num_image_tokens`).
+    Based on this, we override [_get_prompt_updates][vllm.multimodal.processing.BaseMultiModalProcessor._get_prompt_updates] as follows:
+
+    ```python
+    def _get_prompt_updates(
+        self,
+        mm_items: MultiModalDataItems,
+        hf_processor_mm_kwargs: Mapping[str, object],
+        out_mm_kwargs: MultiModalKwargs,
+    ) -> Sequence[PromptUpdate]:
+        hf_config = self.info.get_hf_config()
+        image_token_id = hf_config.image_token_index
+
+        def get_replacement(item_idx: int):
+            images = mm_items.get_items("image", ImageProcessorItems)
+
+            image_size = images.get_image_size(item_idx)
+            num_image_tokens = self.info.get_num_image_tokens(
+                image_width=image_size.width,
+                image_height=image_size.height,
+            )
+
+            return [image_token_id] * num_image_tokens
+
+        return [
+            PromptReplacement(
+                modality="image",
+                target=[image_token_id],
+                replacement=get_replacement,
+            ),
+        ]
+    ```
+
+=== "Handling additional tokens: Fuyu"
+
+    Recall the layout of feature tokens from Step 2:
+
+    ```
+    |SPEAKER||SPEAKER|...|SPEAKER||NEWLINE|
+    |SPEAKER||SPEAKER|...|SPEAKER||NEWLINE|
+    ...
+    |SPEAKER||SPEAKER|...|SPEAKER||NEWLINE|
+    ```
+
+    We define a helper function to return `ncols` and `nrows` directly:
+
+    ```python
+    def get_image_feature_grid_size(
+        self,
+        *,
+        image_width: int,
+        image_height: int,
+    ) -> tuple[int, int]:
+        image_processor = self.get_image_processor()
+        target_width = image_processor.size["width"]
+        target_height = image_processor.size["height"]
+        patch_width = image_processor.patch_size["width"]
+        patch_height = image_processor.patch_size["height"]
+
+        if not (image_width <= target_width and image_height <= target_height):
+            height_scale_factor = target_height / image_height
+            width_scale_factor = target_width / image_width
+            optimal_scale_factor = min(height_scale_factor, width_scale_factor)
+
+            image_height = int(image_height * optimal_scale_factor)
+            image_width = int(image_width * optimal_scale_factor)
+
+        ncols = math.ceil(image_width / patch_width)
+        nrows = math.ceil(image_height / patch_height)
+        return ncols, nrows
+    ```
+
+    Based on this, we can initially define our replacement tokens as:
+
+    ```python
+    def get_replacement(item_idx: int):
+        images = mm_items.get_items("image", ImageProcessorItems)
+        image_size = images.get_image_size(item_idx)
+
+        ncols, nrows = self.info.get_image_feature_grid_size(
+            image_width=image_size.width,
+            image_height=image_size.height,
+        )
+
+        # `_IMAGE_TOKEN_ID` corresponds to `|SPEAKER|`
+        # `_NEWLINE_TOKEN_ID` corresponds to `|NEWLINE|`
+        return ([_IMAGE_TOKEN_ID] * ncols + [_NEWLINE_TOKEN_ID]) * nrows
+    ```
+
+    However, this is not entirely correct. After `FuyuImageProcessor.preprocess_with_tokenizer_info` is called,
+    a BOS token (`<s>`) is also added to the promopt:
+
+    ```python
+    # https://github.com/huggingface/transformers/blob/v4.48.3/src/transformers/models/fuyu/processing_fuyu.py#L417-L435
+    model_image_input = self.image_processor.preprocess_with_tokenizer_info(
+        image_input=tensor_batch_images,
+        image_present=image_present,
+        image_unpadded_h=image_unpadded_heights,
+        image_unpadded_w=image_unpadded_widths,
+        image_placeholder_id=image_placeholder_id,
+        image_newline_id=image_newline_id,
+        variable_sized=True,
+    )
+    prompt_tokens, prompts_length = _tokenize_prompts_with_image_and_batch(
+        tokenizer=self.tokenizer,
+        prompts=prompts,
+        scale_factors=scale_factors,
+        max_tokens_to_generate=self.max_tokens_to_generate,
+        max_position_embeddings=self.max_position_embeddings,
+        add_BOS=True,
+        add_beginning_of_answer_token=True,
+    )
+    ```
+
+    To assign the vision embeddings to only the image tokens, instead of a string
+    you can return an instance of [PromptUpdateDetails][vllm.multimodal.processing.PromptUpdateDetails]:
+
+    ```python
+    hf_config = self.info.get_hf_config()
+    bos_token_id = hf_config.bos_token_id  # `<s>`
+    assert isinstance(bos_token_id, int)
+
+    def get_replacement_fuyu(item_idx: int):
+        images = mm_items.get_items("image", ImageProcessorItems)
+        image_size = images.get_image_size(item_idx)
+
+        ncols, nrows = self.info.get_image_feature_grid_size(
+            image_width=image_size.width,
+            image_height=image_size.height,
+        )
+        image_tokens = ([_IMAGE_TOKEN_ID] * ncols +
+                        [_NEWLINE_TOKEN_ID]) * nrows
+
+        return PromptUpdateDetails.select_token_id(
+            image_tokens + [bos_token_id],
+            embed_token_id=_IMAGE_TOKEN_ID,
+        )
+    ```
+
+    Finally, noticing that the HF processor removes the `|ENDOFTEXT|` token from the tokenized prompt,
+    we can search for it to conduct the replacement at the start of the string:
+
+    ```python
+    def _get_prompt_updates(
+        self,
+        mm_items: MultiModalDataItems,
+        hf_processor_mm_kwargs: Mapping[str, object],
+        out_mm_kwargs: MultiModalKwargs,
+    ) -> Sequence[PromptUpdate]:
+        hf_config = self.info.get_hf_config()
+        bos_token_id = hf_config.bos_token_id
+        assert isinstance(bos_token_id, int)
+
+        tokenizer = self.info.get_tokenizer()
+        eot_token_id = tokenizer.bos_token_id
+        assert isinstance(eot_token_id, int)
+
+        def get_replacement_fuyu(item_idx: int):
+            images = mm_items.get_items("image", ImageProcessorItems)
+            image_size = images.get_image_size(item_idx)
+
+            ncols, nrows = self.info.get_image_feature_grid_size(
+                image_width=image_size.width,
+                image_height=image_size.height,
+            )
+            image_tokens = ([_IMAGE_TOKEN_ID] * ncols +
+                            [_NEWLINE_TOKEN_ID]) * nrows
+
+            return PromptUpdateDetails.select_token_id(
+                image_tokens + [bos_token_id],
+                embed_token_id=_IMAGE_TOKEN_ID,
+            )
+
+        return [
+            PromptReplacement(
+                modality="image",
+                target=[eot_token_id],
+                replacement=get_replacement_fuyu,
+            )
+        ]
+    ```
+
+## 5. Register processor-related classes
+
+After you have defined [BaseProcessingInfo][vllm.multimodal.processing.BaseProcessingInfo] (Step 2),
+[BaseDummyInputsBuilder][vllm.multimodal.profiling.BaseDummyInputsBuilder] (Step 3),
+and [BaseMultiModalProcessor][vllm.multimodal.processing.BaseMultiModalProcessor] (Step 4),
+decorate the model class with {meth}`MULTIMODAL_REGISTRY.register_processor <vllm.multimodal.registry.MultiModalRegistry.register_processor>`
+to register them to the multi-modal registry:
+
+```diff
+  from vllm.model_executor.models.interfaces import SupportsMultiModal
++ from vllm.multimodal import MULTIMODAL_REGISTRY
+
++ @MULTIMODAL_REGISTRY.register_processor(YourMultiModalProcessor,
++                                         info=YourProcessingInfo,
++                                         dummy_inputs=YourDummyInputsBuilder)
+  class YourModelForImage2Seq(nn.Module, SupportsMultiModal):
+```
+
+## Notes
+
+### Inserting feature tokens without replacement
+
+Some HF processors directly insert feature tokens without replacing anything in the original prompt. In that case, you can use [PromptInsertion][vllm.multimodal.processing.PromptInsertion] instead of [PromptReplacement][vllm.multimodal.processing.PromptReplacement] inside [_get_prompt_updates][vllm.multimodal.processing.BaseMultiModalProcessor._get_prompt_updates].
+
+Examples:
+
+- BLIP-2 (insert at start of prompt): <gh-file:vllm/model_executor/models/blip2.py>
+- Florence2 (insert at start of prompt): <gh-file:vllm/model_executor/models/florence2.py>
+- Molmo (insert after `<|endoftext|>` token): <gh-file:vllm/model_executor/models/molmo.py>
+
+### Handling prompt updates unrelated to multi-modal data
+
+[_get_prompt_updates][vllm.multimodal.processing.BaseMultiModalProcessor._get_prompt_updates] assumes that each application of prompt update corresponds to one multi-modal item. If the HF processor performs additional processing regardless of how many multi-modal items there are, you should override [_apply_hf_processor_tokens_only][vllm.multimodal.processing.BaseMultiModalProcessor._apply_hf_processor_tokens_only] so that the processed token inputs are consistent with the result of applying the HF processor on text inputs. This is because token inputs bypass the HF processor according to [our design][mm-processing].
+
+Examples:
+
+- Chameleon (appends `sep_token`): <gh-file:vllm/model_executor/models/chameleon.py>
+- Fuyu (appends `boa_token`): <gh-file:vllm/model_executor/models/fuyu.py>
+- Molmo (applies chat template which is not defined elsewhere): <gh-file:vllm/model_executor/models/molmo.py>
+
+### Custom HF processor
+
+Some models don't define a HF processor class on HF Hub. In that case, you can define a custom HF processor that has the same call signature as HF processors and pass it to [_call_hf_processor][vllm.multimodal.processing.BaseMultiModalProcessor._call_hf_processor].
+
+Examples:
+
+- DeepSeek-VL2: <gh-file:vllm/model_executor/models/deepseek_vl2.py>
+- InternVL: <gh-file:vllm/model_executor/models/internvl.py>
+- Qwen-VL: <gh-file:vllm/model_executor/models/qwen_vl.py>
diff --git a/docs/source/contributing/model/registration.md b/docs/contributing/model/registration.md
similarity index 52%
rename from docs/source/contributing/model/registration.md
rename to docs/contributing/model/registration.md
index 64cd25b53807e..e796e49a75013 100644
--- a/docs/source/contributing/model/registration.md
+++ b/docs/contributing/model/registration.md
@@ -1,33 +1,32 @@
-(new-model-registration)=
-
-# Registering a Model to vLLM
+---
+title: Registering a Model to vLLM
+---
+[](){ #new-model-registration }
 
 vLLM relies on a model registry to determine how to run each model.
-A list of pre-registered architectures can be found [here](#supported-models).
+A list of pre-registered architectures can be found [here][supported-models].
 
 If your model is not on this list, you must register it to vLLM.
 This page provides detailed instructions on how to do so.
 
 ## Built-in models
 
-To add a model directly to the vLLM library, start by forking our [GitHub repository](https://github.com/vllm-project/vllm) and then [build it from source](#build-from-source).
+To add a model directly to the vLLM library, start by forking our [GitHub repository](https://github.com/vllm-project/vllm) and then [build it from source][build-from-source].
 This gives you the ability to modify the codebase and test your model.
 
-After you have implemented your model (see [tutorial](#new-model-basic)), put it into the <gh-dir:vllm/model_executor/models> directory.
+After you have implemented your model (see [tutorial][new-model-basic]), put it into the <gh-dir:vllm/model_executor/models> directory.
 Then, add your model class to `_VLLM_MODELS` in <gh-file:vllm/model_executor/models/registry.py> so that it is automatically registered upon importing vLLM.
-Finally, update our [list of supported models](#supported-models) to promote your model!
+Finally, update our [list of supported models][supported-models] to promote your model!
 
-:::{important}
-The list of models in each section should be maintained in alphabetical order.
-:::
+!!! warning
+    The list of models in each section should be maintained in alphabetical order.
 
 ## Out-of-tree models
 
 You can load an external model using a plugin without modifying the vLLM codebase.
 
-:::{seealso}
-[vLLM's Plugin System](#plugin-system)
-:::
+!!! info
+    [vLLM's Plugin System][plugin-system]
 
 To register the model, use the following code:
 
@@ -45,11 +44,9 @@ from vllm import ModelRegistry
 ModelRegistry.register_model("YourModelForCausalLM", "your_code:YourModelForCausalLM")
 ```
 
-:::{important}
-If your model is a multimodal model, ensure the model class implements the {class}`~vllm.model_executor.models.interfaces.SupportsMultiModal` interface.
-Read more about that [here](#supports-multimodal).
-:::
+!!! warning
+    If your model is a multimodal model, ensure the model class implements the [SupportsMultiModal][vllm.model_executor.models.interfaces.SupportsMultiModal] interface.
+    Read more about that [here][supports-multimodal].
 
-:::{note}
-Although you can directly put these code snippets in your script using `vllm.LLM`, the recommended way is to place these snippets in a vLLM plugin. This ensures compatibility with various vLLM features like distributed inference and the API server.
-:::
+!!! note
+    Although you can directly put these code snippets in your script using `vllm.LLM`, the recommended way is to place these snippets in a vLLM plugin. This ensures compatibility with various vLLM features like distributed inference and the API server.
diff --git a/docs/source/contributing/model/tests.md b/docs/contributing/model/tests.md
similarity index 75%
rename from docs/source/contributing/model/tests.md
rename to docs/contributing/model/tests.md
index 68d51d89f7cff..26880986181d9 100644
--- a/docs/source/contributing/model/tests.md
+++ b/docs/contributing/model/tests.md
@@ -1,6 +1,7 @@
-(new-model-tests)=
-
-# Writing Unit Tests
+---
+title: Writing Unit Tests
+---
+[](){ #new-model-tests }
 
 This page explains how to write unit tests to verify the implementation of your model.
 
@@ -14,14 +15,12 @@ Without them, the CI for your PR will fail.
 Include an example HuggingFace repository for your model in <gh-file:tests/models/registry.py>.
 This enables a unit test that loads dummy weights to ensure that the model can be initialized in vLLM.
 
-:::{important}
-The list of models in each section should be maintained in alphabetical order.
-:::
+!!! warning
+    The list of models in each section should be maintained in alphabetical order.
 
-:::{tip}
-If your model requires a development version of HF Transformers, you can set
-`min_transformers_version` to skip the test in CI until the model is released.
-:::
+!!! tip
+    If your model requires a development version of HF Transformers, you can set
+    `min_transformers_version` to skip the test in CI until the model is released.
 
 ## Optional Tests
 
@@ -34,16 +33,16 @@ These tests compare the model outputs of vLLM against [HF Transformers](https://
 
 #### Generative models
 
-For [generative models](#generative-models), there are two levels of correctness tests, as defined in <gh-file:tests/models/utils.py>:
+For [generative models][generative-models], there are two levels of correctness tests, as defined in <gh-file:tests/models/utils.py>:
 
 - Exact correctness (`check_outputs_equal`): The text outputted by vLLM should exactly match the text outputted by HF.
 - Logprobs similarity (`check_logprobs_close`): The logprobs outputted by vLLM should be in the top-k logprobs outputted by HF, and vice versa.
 
 #### Pooling models
 
-For [pooling models](#pooling-models), we simply check the cosine similarity, as defined in <gh-file:tests/models/embedding/utils.py>.
+For [pooling models][pooling-models], we simply check the cosine similarity, as defined in <gh-file:tests/models/embedding/utils.py>.
 
-(mm-processing-tests)=
+[](){ #mm-processing-tests }
 
 ### Multi-modal processing
 
diff --git a/docs/source/contributing/overview.md b/docs/contributing/overview.md
similarity index 87%
rename from docs/source/contributing/overview.md
rename to docs/contributing/overview.md
index 89b31f0311e23..7dbf8bfdcf240 100644
--- a/docs/source/contributing/overview.md
+++ b/docs/contributing/overview.md
@@ -27,7 +27,21 @@ See <gh-file:LICENSE>.
 ## Developing
 
 Depending on the kind of development you'd like to do (e.g. Python, CUDA), you can choose to build vLLM with or without compilation.
-Check out the [building from source](#build-from-source) documentation for details.
+Check out the [building from source][build-from-source] documentation for details.
+
+### Building the docs
+
+Install the dependencies:
+
+```bash
+pip install -r requirements/docs.txt
+```
+
+Start the autoreloading MkDocs server:
+
+```bash
+mkdocs serve
+```
 
 ## Testing
 
@@ -48,29 +62,25 @@ pre-commit run mypy-3.9 --hook-stage manual --all-files
 pytest tests/
 ```
 
-:::{tip}
-Since the <gh-file:docker/Dockerfile> ships with Python 3.12, all tests in CI (except `mypy`) are run with Python 3.12.
+!!! tip
+    Since the <gh-file:docker/Dockerfile> ships with Python 3.12, all tests in CI (except `mypy`) are run with Python 3.12.
 
-Therefore, we recommend developing with Python 3.12 to minimise the chance of your local environment clashing with our CI environment.
-:::
+    Therefore, we recommend developing with Python 3.12 to minimise the chance of your local environment clashing with our CI environment.
 
-:::{note}
-Currently, the repository is not fully checked by `mypy`.
-:::
+!!! note
+    Currently, the repository is not fully checked by `mypy`.
 
-:::{note}
-Currently, not all unit tests pass when run on CPU platforms. If you don't have access to a GPU
-platform to run unit tests locally, rely on the continuous integration system to run the tests for
-now.
-:::
+!!! note
+    Currently, not all unit tests pass when run on CPU platforms. If you don't have access to a GPU
+    platform to run unit tests locally, rely on the continuous integration system to run the tests for
+    now.
 
 ## Issues
 
 If you encounter a bug or have a feature request, please [search existing issues](https://github.com/vllm-project/vllm/issues?q=is%3Aissue) first to see if it has already been reported. If not, please [file a new issue](https://github.com/vllm-project/vllm/issues/new/choose), providing as much relevant information as possible.
 
-:::{important}
-If you discover a security vulnerability, please follow the instructions [here](gh-file:SECURITY.md#reporting-a-vulnerability).
-:::
+!!! warning
+    If you discover a security vulnerability, please follow the instructions [here](gh-file:SECURITY.md#reporting-a-vulnerability).
 
 ## Pull Requests & Code Reviews
 
@@ -106,9 +116,8 @@ appropriately to indicate the type of change. Please use one of the following:
 - `[Misc]` for PRs that do not fit the above categories. Please use this
   sparingly.
 
-:::{note}
-If the PR spans more than one category, please include all relevant prefixes.
-:::
+!!! note
+    If the PR spans more than one category, please include all relevant prefixes.
 
 ### Code Quality
 
diff --git a/docs/source/contributing/profiling/profiling_index.md b/docs/contributing/profiling.md
similarity index 90%
rename from docs/source/contributing/profiling/profiling_index.md
rename to docs/contributing/profiling.md
index ce25daa39c5cb..be01b9b65f65c 100644
--- a/docs/source/contributing/profiling/profiling_index.md
+++ b/docs/contributing/profiling.md
@@ -1,8 +1,7 @@
 # Profiling vLLM
 
-:::{warning}
-Profiling is only intended for vLLM developers and maintainers to understand the proportion of time spent in different parts of the codebase. **vLLM end-users should never turn on profiling** as it will significantly slow down the inference.
-:::
+!!! warning
+    Profiling is only intended for vLLM developers and maintainers to understand the proportion of time spent in different parts of the codebase. **vLLM end-users should never turn on profiling** as it will significantly slow down the inference.
 
 ## Profile with PyTorch Profiler
 
@@ -14,15 +13,13 @@ When using `benchmarks/benchmark_serving.py`, you can enable profiling by passin
 
 Traces can be visualized using <https://ui.perfetto.dev/>.
 
-:::{tip}
-Only send a few requests through vLLM when profiling, as the traces can get quite large. Also, no need to untar the traces, they can be viewed directly.
-:::
+!!! tip
+    Only send a few requests through vLLM when profiling, as the traces can get quite large. Also, no need to untar the traces, they can be viewed directly.
 
-:::{tip}
-To stop the profiler - it flushes out all the profile trace files to the directory. This takes time, for example for about 100 requests worth of data for a llama 70b, it takes about 10 minutes to flush out on a H100.
-Set the env variable VLLM_RPC_TIMEOUT to a big number before you start the server. Say something like 30 minutes.
-`export VLLM_RPC_TIMEOUT=1800000`
-:::
+!!! tip
+    To stop the profiler - it flushes out all the profile trace files to the directory. This takes time, for example for about 100 requests worth of data for a llama 70b, it takes about 10 minutes to flush out on a H100.
+    Set the env variable VLLM_RPC_TIMEOUT to a big number before you start the server. Say something like 30 minutes.
+    `export VLLM_RPC_TIMEOUT=1800000`
 
 ### Example commands and usage
 
diff --git a/docs/source/contributing/vulnerability_management.md b/docs/contributing/vulnerability_management.md
similarity index 100%
rename from docs/source/contributing/vulnerability_management.md
rename to docs/contributing/vulnerability_management.md
diff --git a/docs/deployment/docker.md b/docs/deployment/docker.md
new file mode 100644
index 0000000000000..293536e52c4b0
--- /dev/null
+++ b/docs/deployment/docker.md
@@ -0,0 +1,126 @@
+---
+title: Using Docker
+---
+[](){ #deployment-docker }
+
+[](){ #deployment-docker-pre-built-image }
+
+## Use vLLM's Official Docker Image
+
+vLLM offers an official Docker image for deployment.
+The image can be used to run OpenAI compatible server and is available on Docker Hub as [vllm/vllm-openai](https://hub.docker.com/r/vllm/vllm-openai/tags).
+
+```console
+$ docker run --runtime nvidia --gpus all \
+    -v ~/.cache/huggingface:/root/.cache/huggingface \
+    --env "HUGGING_FACE_HUB_TOKEN=<secret>" \
+    -p 8000:8000 \
+    --ipc=host \
+    vllm/vllm-openai:latest \
+    --model mistralai/Mistral-7B-v0.1
+```
+
+This image can also be used with other container engines such as [Podman](https://podman.io/).
+
+```console
+$ podman run --gpus all \
+  -v ~/.cache/huggingface:/root/.cache/huggingface \
+  --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
+  -p 8000:8000 \
+  --ipc=host \
+  vllm/vllm-openai:latest \
+  --model mistralai/Mistral-7B-v0.1
+```
+
+You can add any other [engine-args][engine-args] you need after the image tag (`vllm/vllm-openai:latest`).
+
+!!! note
+    You can either use the `ipc=host` flag or `--shm-size` flag to allow the
+    container to access the host's shared memory. vLLM uses PyTorch, which uses shared
+    memory to share data between processes under the hood, particularly for tensor parallel inference.
+
+!!! note
+    Optional dependencies are not included in order to avoid licensing issues (e.g. <gh-issue:8030>).
+
+    If you need to use those dependencies (having accepted the license terms),
+    create a custom Dockerfile on top of the base image with an extra layer that installs them:
+
+    ```Dockerfile
+    FROM vllm/vllm-openai:v0.8.3
+
+    # e.g. install the `audio` optional dependencies
+    # NOTE: Make sure the version of vLLM matches the base image!
+    RUN uv pip install --system vllm[audio]==0.8.3
+    ```
+
+!!! tip
+    Some new models may only be available on the main branch of [HF Transformers](https://github.com/huggingface/transformers).
+
+    To use the development version of `transformers`, create a custom Dockerfile on top of the base image
+    with an extra layer that installs their code from source:
+
+    ```Dockerfile
+    FROM vllm/vllm-openai:latest
+
+    RUN uv pip install --system git+https://github.com/huggingface/transformers.git
+    ```
+
+[](){ #deployment-docker-build-image-from-source }
+
+## Building vLLM's Docker Image from Source
+
+You can build and run vLLM from source via the provided <gh-file:docker/Dockerfile>. To build vLLM:
+
+```console
+# optionally specifies: --build-arg max_jobs=8 --build-arg nvcc_threads=2
+DOCKER_BUILDKIT=1 docker build . --target vllm-openai --tag vllm/vllm-openai --file docker/Dockerfile
+```
+
+!!! note
+    By default vLLM will build for all GPU types for widest distribution. If you are just building for the
+    current GPU type the machine is running on, you can add the argument `--build-arg torch_cuda_arch_list=""`
+    for vLLM to find the current GPU type and build for that.
+
+    If you are using Podman instead of Docker, you might need to disable SELinux labeling by
+    adding `--security-opt label=disable` when running `podman build` command to avoid certain [existing issues](https://github.com/containers/buildah/discussions/4184).
+
+## Building for Arm64/aarch64
+
+A docker container can be built for aarch64 systems such as the Nvidia Grace-Hopper. At time of this writing, this requires the use
+of PyTorch Nightly and should be considered **experimental**. Using the flag `--platform "linux/arm64"` will attempt to build for arm64.
+
+!!! note
+    Multiple modules must be compiled, so this process can take a while. Recommend using `--build-arg max_jobs=` & `--build-arg nvcc_threads=`
+    flags to speed up build process. However, ensure your `max_jobs` is substantially larger than `nvcc_threads` to get the most benefits.
+    Keep an eye on memory usage with parallel jobs as it can be substantial (see example below).
+
+```console
+# Example of building on Nvidia GH200 server. (Memory usage: ~15GB, Build time: ~1475s / ~25 min, Image size: 6.93GB)
+$ python3 use_existing_torch.py
+$ DOCKER_BUILDKIT=1 docker build . \
+  --file docker/Dockerfile \
+  --target vllm-openai \
+  --platform "linux/arm64" \
+  -t vllm/vllm-gh200-openai:latest \
+  --build-arg max_jobs=66 \
+  --build-arg nvcc_threads=2 \
+  --build-arg torch_cuda_arch_list="9.0+PTX" \
+  --build-arg vllm_fa_cmake_gpu_arches="90-real"
+```
+
+## Use the custom-built vLLM Docker image
+
+To run vLLM with the custom-built Docker image:
+
+```console
+$ docker run --runtime nvidia --gpus all \
+    -v ~/.cache/huggingface:/root/.cache/huggingface \
+    -p 8000:8000 \
+    --env "HUGGING_FACE_HUB_TOKEN=<secret>" \
+    vllm/vllm-openai <args...>
+```
+
+The argument `vllm/vllm-openai` specifies the image to run, and should be replaced with the name of the custom-built image (the `-t` tag from the build command).
+
+!!! note
+    **For version 0.4.1 and 0.4.2 only** - the vLLM docker images under these versions are supposed to be run under the root user since a library under the root user's home directory, i.e. `/root/.config/vllm/nccl/cu12/libnccl.so.2.18.1` is required to be loaded during runtime. If you are running the container under a different user, you may need to first change the permissions of the library (and all the parent directories) to allow the user to access it, then run vLLM with environment variable `VLLM_NCCL_SO_PATH=/root/.config/vllm/nccl/cu12/libnccl.so.2.18.1` .
diff --git a/docs/source/deployment/frameworks/anything-llm.md b/docs/deployment/frameworks/anything-llm.md
similarity index 78%
rename from docs/source/deployment/frameworks/anything-llm.md
rename to docs/deployment/frameworks/anything-llm.md
index d430c170ef541..a89e633c086ea 100644
--- a/docs/source/deployment/frameworks/anything-llm.md
+++ b/docs/deployment/frameworks/anything-llm.md
@@ -1,6 +1,7 @@
-(deployment-anything-llm)=
-
-# Anything LLM
+---
+title: Anything LLM
+---
+[](){ #deployment-anything-llm }
 
 [Anything LLM](https://github.com/Mintplex-Labs/anything-llm) is a full-stack application that enables you to turn any document, resource, or piece of content into context that any LLM can use as references during chatting.
 
@@ -25,23 +26,19 @@ vllm serve Qwen/Qwen1.5-32B-Chat-AWQ --max-model-len 4096
   - Base URL: http://{vllm server host}:{vllm server port}/v1
   - Chat Model Name: `Qwen/Qwen1.5-32B-Chat-AWQ`
 
-:::{image} /assets/deployment/anything-llm-provider.png
-:::
+![](../../assets/deployment/anything-llm-provider.png)
 
 - Back to home page, New Workspace --> create `vllm` workspace, and start to chat:
 
-:::{image} /assets/deployment/anything-llm-chat-without-doc.png
-:::
+![](../../assets/deployment/anything-llm-chat-without-doc.png)
 
 - Click the upload button:
   - upload the doc
   - select the doc and move to the workspace
   - save and embed
 
-:::{image} /assets/deployment/anything-llm-upload-doc.png
-:::
+![](../../assets/deployment/anything-llm-upload-doc.png)
 
 - Chat again:
 
-:::{image} /assets/deployment/anything-llm-chat-with-doc.png
-:::
+![](../../assets/deployment/anything-llm-chat-with-doc.png)
diff --git a/docs/source/deployment/frameworks/bentoml.md b/docs/deployment/frameworks/bentoml.md
similarity index 89%
rename from docs/source/deployment/frameworks/bentoml.md
rename to docs/deployment/frameworks/bentoml.md
index 2bf435bda8380..7e64b6eb6fb03 100644
--- a/docs/source/deployment/frameworks/bentoml.md
+++ b/docs/deployment/frameworks/bentoml.md
@@ -1,6 +1,7 @@
-(deployment-bentoml)=
-
-# BentoML
+---
+title: BentoML
+---
+[](){ #deployment-bentoml }
 
 [BentoML](https://github.com/bentoml/BentoML) allows you to deploy a large language model (LLM) server with vLLM as the backend, which exposes OpenAI-compatible endpoints. You can serve the model locally or containerize it as an OCI-compliant image and deploy it on Kubernetes.
 
diff --git a/docs/source/deployment/frameworks/cerebrium.md b/docs/deployment/frameworks/cerebrium.md
similarity index 98%
rename from docs/source/deployment/frameworks/cerebrium.md
rename to docs/deployment/frameworks/cerebrium.md
index b20c95137b6e7..84cb2304fac20 100644
--- a/docs/source/deployment/frameworks/cerebrium.md
+++ b/docs/deployment/frameworks/cerebrium.md
@@ -1,12 +1,11 @@
-(deployment-cerebrium)=
+---
+title: Cerebrium
+---
+[](){ #deployment-cerebrium }
 
-# Cerebrium
-
-:::{raw} html
 <p align="center">
     <img src="https://i.ibb.co/hHcScTT/Screenshot-2024-06-13-at-10-14-54.png" alt="vLLM_plus_cerebrium"/>
 </p>
-:::
 
 vLLM can be run on a cloud based GPU machine with [Cerebrium](https://www.cerebrium.ai/), a serverless AI infrastructure platform that makes it easier for companies to build and deploy AI based applications.
 
diff --git a/docs/source/deployment/frameworks/chatbox.md b/docs/deployment/frameworks/chatbox.md
similarity index 84%
rename from docs/source/deployment/frameworks/chatbox.md
rename to docs/deployment/frameworks/chatbox.md
index e62f4647150f4..10da2fc710027 100644
--- a/docs/source/deployment/frameworks/chatbox.md
+++ b/docs/deployment/frameworks/chatbox.md
@@ -1,6 +1,7 @@
-(deployment-chatbox)=
-
-# Chatbox
+---
+title: Chatbox
+---
+[](){ #deployment-chatbox }
 
 [Chatbox](https://github.com/chatboxai/chatbox) is a desktop client for LLMs, available on Windows, Mac, Linux.
 
@@ -27,10 +28,8 @@ vllm serve qwen/Qwen1.5-0.5B-Chat
   - API Path: `/chat/completions`
   - Model: `qwen/Qwen1.5-0.5B-Chat`
 
-:::{image} /assets/deployment/chatbox-settings.png
-:::
+![](../../assets/deployment/chatbox-settings.png)
 
 - Go to `Just chat`, and start to chat:
 
-:::{image} /assets/deployment/chatbox-chat.png
-:::
+![](../../assets/deployment/chatbox-chat.png)
diff --git a/docs/source/deployment/frameworks/dify.md b/docs/deployment/frameworks/dify.md
similarity index 90%
rename from docs/source/deployment/frameworks/dify.md
rename to docs/deployment/frameworks/dify.md
index 5cdf6a3876371..886484b543475 100644
--- a/docs/source/deployment/frameworks/dify.md
+++ b/docs/deployment/frameworks/dify.md
@@ -1,6 +1,7 @@
-(deployment-dify)=
-
-# Dify
+---
+title: Dify
+---
+[](){ #deployment-dify }
 
 [Dify](https://github.com/langgenius/dify) is an open-source LLM app development platform. Its intuitive interface combines agentic AI workflow, RAG pipeline, agent capabilities, model management, observability features, and more, allowing you to quickly move from prototype to production.
 
@@ -42,15 +43,12 @@ docker compose up -d
   - **Model Name for API Endpoint**: `Qwen/Qwen1.5-7B-Chat`
   - **Completion Mode**: `Completion`
 
-:::{image} /assets/deployment/dify-settings.png
-:::
+![](../../assets/deployment/dify-settings.png)
 
 - To create a test chatbot, go to `Studio → Chatbot → Create from Blank`, then select Chatbot as the type:
 
-:::{image} /assets/deployment/dify-create-chatbot.png
-:::
+![](../../assets/deployment/dify-create-chatbot.png)
 
 - Click the chatbot you just created to open the chat interface and start interacting with the model:
 
-:::{image} /assets/deployment/dify-chat.png
-:::
+![](../../assets/deployment/dify-chat.png)
diff --git a/docs/source/deployment/frameworks/dstack.md b/docs/deployment/frameworks/dstack.md
similarity index 83%
rename from docs/source/deployment/frameworks/dstack.md
rename to docs/deployment/frameworks/dstack.md
index a16e28f2d8983..7de92855745b0 100644
--- a/docs/source/deployment/frameworks/dstack.md
+++ b/docs/deployment/frameworks/dstack.md
@@ -1,12 +1,11 @@
-(deployment-dstack)=
+---
+title: dstack
+---
+[](){ #deployment-dstack }
 
-# dstack
-
-:::{raw} html
 <p align="center">
     <img src="https://i.ibb.co/71kx6hW/vllm-dstack.png" alt="vLLM_plus_dstack"/>
 </p>
-:::
 
 vLLM can be run on a cloud based GPU machine with [dstack](https://dstack.ai/), an open-source framework for running LLMs on any cloud. This tutorial assumes that you have already configured credentials, gateway, and GPU quotas on your cloud environment.
 
@@ -97,6 +96,5 @@ completion = client.chat.completions.create(
 print(completion.choices[0].message.content)
 ```
 
-:::{note}
-dstack automatically handles authentication on the gateway using dstack's tokens. Meanwhile, if you don't want to configure a gateway, you can provision dstack `Task` instead of `Service`. The `Task` is for development purpose only. If you want to know more about hands-on materials how to serve vLLM using dstack, check out [this repository](https://github.com/dstackai/dstack-examples/tree/main/deployment/vllm)
-:::
+!!! note
+    dstack automatically handles authentication on the gateway using dstack's tokens. Meanwhile, if you don't want to configure a gateway, you can provision dstack `Task` instead of `Service`. The `Task` is for development purpose only. If you want to know more about hands-on materials how to serve vLLM using dstack, check out [this repository](https://github.com/dstackai/dstack-examples/tree/main/deployment/vllm)
diff --git a/docs/deployment/frameworks/helm.md b/docs/deployment/frameworks/helm.md
new file mode 100644
index 0000000000000..192b90438acf0
--- /dev/null
+++ b/docs/deployment/frameworks/helm.md
@@ -0,0 +1,95 @@
+---
+title: Helm
+---
+[](){ #deployment-helm }
+
+A Helm chart to deploy vLLM for Kubernetes
+
+Helm is a package manager for Kubernetes. It will help you to deploy vLLM on k8s and automate the deployment of vLLM Kubernetes applications. With Helm, you can deploy the same framework architecture with different configurations to multiple namespaces by overriding variable values.
+
+This guide will walk you through the process of deploying vLLM with Helm, including the necessary prerequisites, steps for helm installation and documentation on architecture and values file.
+
+## Prerequisites
+
+Before you begin, ensure that you have the following:
+
+- A running Kubernetes cluster
+- NVIDIA Kubernetes Device Plugin (`k8s-device-plugin`): This can be found at [https://github.com/NVIDIA/k8s-device-plugin](https://github.com/NVIDIA/k8s-device-plugin)
+- Available GPU resources in your cluster
+- S3 with the model which will be deployed
+
+## Installing the chart
+
+To install the chart with the release name `test-vllm`:
+
+```console
+helm upgrade --install --create-namespace --namespace=ns-vllm test-vllm . -f values.yaml --set secrets.s3endpoint=$ACCESS_POINT --set secrets.s3bucketname=$BUCKET --set secrets.s3accesskeyid=$ACCESS_KEY --set secrets.s3accesskey=$SECRET_KEY
+```
+
+## Uninstalling the Chart
+
+To uninstall the `test-vllm` deployment:
+
+```console
+helm uninstall test-vllm --namespace=ns-vllm
+```
+
+The command removes all the Kubernetes components associated with the
+chart **including persistent volumes** and deletes the release.
+
+## Architecture
+
+![](../../assets/deployment/architecture_helm_deployment.png)
+
+## Values
+
+| Key                                        | Type    | Default                                                                                                                                                  | Description                                                                                                                               |
+|--------------------------------------------|---------|----------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------|
+| autoscaling                                | object  | {"enabled":false,"maxReplicas":100,"minReplicas":1,"targetCPUUtilizationPercentage":80}                                                                  | Autoscaling configuration                                                                                                                 |
+| autoscaling.enabled                        | bool    | false                                                                                                                                                    | Enable autoscaling                                                                                                                        |
+| autoscaling.maxReplicas                    | int     | 100                                                                                                                                                      | Maximum replicas                                                                                                                          |
+| autoscaling.minReplicas                    | int     | 1                                                                                                                                                        | Minimum replicas                                                                                                                          |
+| autoscaling.targetCPUUtilizationPercentage | int     | 80                                                                                                                                                       | Target CPU utilization for autoscaling                                                                                                    |
+| configs                                    | object  | {}                                                                                                                                                       | Configmap                                                                                                                                 |
+| containerPort                              | int     | 8000                                                                                                                                                     | Container port                                                                                                                            |
+| customObjects                              | list    | []                                                                                                                                                       | Custom Objects configuration                                                                                                              |
+| deploymentStrategy                         | object  | {}                                                                                                                                                       | Deployment strategy configuration                                                                                                         |
+| externalConfigs                            | list    | []                                                                                                                                                       | External configuration                                                                                                                    |
+| extraContainers                            | list    | []                                                                                                                                                       | Additional containers configuration                                                                                                       |
+| extraInit                                  | object  | {"pvcStorage":"1Gi","s3modelpath":"relative_s3_model_path/opt-125m", "awsEc2MetadataDisabled": true}                                                     | Additional configuration for the init container                                                                                           |
+| extraInit.pvcStorage                       | string  | "50Gi"                                                                                                                                                   | Storage size of the s3                                                                                                                    |
+| extraInit.s3modelpath                      | string  | "relative_s3_model_path/opt-125m"                                                                                                                        | Path of the model on the s3 which hosts model weights and config files                                                                    |
+| extraInit.awsEc2MetadataDisabled           | boolean | true                                                                                                                                                     | Disables the use of the Amazon EC2 instance metadata service                                                                              |
+| extraPorts                                 | list    | []                                                                                                                                                       | Additional ports configuration                                                                                                            |
+| gpuModels                                  | list    | ["TYPE_GPU_USED"]                                                                                                                                        | Type of gpu used                                                                                                                          |
+| image                                      | object  | {"command":["vllm","serve","/data/","--served-model-name","opt-125m","--host","0.0.0.0","--port","8000"],"repository":"vllm/vllm-openai","tag":"latest"} | Image configuration                                                                                                                       |
+| image.command                              | list    | ["vllm","serve","/data/","--served-model-name","opt-125m","--host","0.0.0.0","--port","8000"]                                                            | Container launch command                                                                                                                  |
+| image.repository                           | string  | "vllm/vllm-openai"                                                                                                                                       | Image repository                                                                                                                          |
+| image.tag                                  | string  | "latest"                                                                                                                                                 | Image tag                                                                                                                                 |
+| livenessProbe                              | object  | {"failureThreshold":3,"httpGet":{"path":"/health","port":8000},"initialDelaySeconds":15,"periodSeconds":10}                                              | Liveness probe configuration                                                                                                              |
+| livenessProbe.failureThreshold             | int     | 3                                                                                                                                                        | Number of times after which if a probe fails in a row, Kubernetes considers that the overall check has failed: the container is not alive |
+| livenessProbe.httpGet                      | object  | {"path":"/health","port":8000}                                                                                                                           | Configuration of the Kubelet http request on the server                                                                                   |
+| livenessProbe.httpGet.path                 | string  | "/health"                                                                                                                                                | Path to access on the HTTP server                                                                                                         |
+| livenessProbe.httpGet.port                 | int     | 8000                                                                                                                                                     | Name or number of the port to access on the container, on which the server is listening                                                   |
+| livenessProbe.initialDelaySeconds          | int     | 15                                                                                                                                                       | Number of seconds after the container has started before liveness probe is initiated                                                      |
+| livenessProbe.periodSeconds                | int     | 10                                                                                                                                                       | How often (in seconds) to perform the liveness probe                                                                                      |
+| maxUnavailablePodDisruptionBudget          | string  | ""                                                                                                                                                       | Disruption Budget Configuration                                                                                                           |
+| readinessProbe                             | object  | {"failureThreshold":3,"httpGet":{"path":"/health","port":8000},"initialDelaySeconds":5,"periodSeconds":5}                                                | Readiness probe configuration                                                                                                             |
+| readinessProbe.failureThreshold            | int     | 3                                                                                                                                                        | Number of times after which if a probe fails in a row, Kubernetes considers that the overall check has failed: the container is not ready |
+| readinessProbe.httpGet                     | object  | {"path":"/health","port":8000}                                                                                                                           | Configuration of the Kubelet http request on the server                                                                                   |
+| readinessProbe.httpGet.path                | string  | "/health"                                                                                                                                                | Path to access on the HTTP server                                                                                                         |
+| readinessProbe.httpGet.port                | int     | 8000                                                                                                                                                     | Name or number of the port to access on the container, on which the server is listening                                                   |
+| readinessProbe.initialDelaySeconds         | int     | 5                                                                                                                                                        | Number of seconds after the container has started before readiness probe is initiated                                                     |
+| readinessProbe.periodSeconds               | int     | 5                                                                                                                                                        | How often (in seconds) to perform the readiness probe                                                                                     |
+| replicaCount                               | int     | 1                                                                                                                                                        | Number of replicas                                                                                                                        |
+| resources                                  | object  | {"limits":{"cpu":4,"memory":"16Gi","nvidia.com/gpu":1},"requests":{"cpu":4,"memory":"16Gi","nvidia.com/gpu":1}}                                          | Resource configuration                                                                                                                    |
+| resources.limits."nvidia.com/gpu"          | int     | 1                                                                                                                                                        | Number of gpus used                                                                                                                       |
+| resources.limits.cpu                       | int     | 4                                                                                                                                                        | Number of CPUs                                                                                                                            |
+| resources.limits.memory                    | string  | "16Gi"                                                                                                                                                   | CPU memory configuration                                                                                                                  |
+| resources.requests."nvidia.com/gpu"        | int     | 1                                                                                                                                                        | Number of gpus used                                                                                                                       |
+| resources.requests.cpu                     | int     | 4                                                                                                                                                        | Number of CPUs                                                                                                                            |
+| resources.requests.memory                  | string  | "16Gi"                                                                                                                                                   | CPU memory configuration                                                                                                                  |
+| secrets                                    | object  | {}                                                                                                                                                       | Secrets configuration                                                                                                                     |
+| serviceName                                | string  | Service name                                                                                                                                             |                                                                                                                                           |
+| servicePort                                | int     | 80                                                                                                                                                       | Service port                                                                                                                              |
+| labels.environment                         | string  | test                                                                                                                                                     | Environment name                                                                                                                          |
diff --git a/docs/source/deployment/frameworks/litellm.md b/docs/deployment/frameworks/litellm.md
similarity index 97%
rename from docs/source/deployment/frameworks/litellm.md
rename to docs/deployment/frameworks/litellm.md
index 6dd3607ca5e37..3011cde830180 100644
--- a/docs/source/deployment/frameworks/litellm.md
+++ b/docs/deployment/frameworks/litellm.md
@@ -1,6 +1,7 @@
-(deployment-litellm)=
-
-# LiteLLM
+---
+title: LiteLLM
+---
+[](){ #deployment-litellm }
 
 [LiteLLM](https://github.com/BerriAI/litellm) call all LLM APIs using the OpenAI format [Bedrock, Huggingface, VertexAI, TogetherAI, Azure, OpenAI, Groq etc.]
 
diff --git a/docs/source/deployment/frameworks/lobe-chat.md b/docs/deployment/frameworks/lobe-chat.md
similarity index 89%
rename from docs/source/deployment/frameworks/lobe-chat.md
rename to docs/deployment/frameworks/lobe-chat.md
index 6d86b7fa9cce1..cd95c028155e4 100644
--- a/docs/source/deployment/frameworks/lobe-chat.md
+++ b/docs/deployment/frameworks/lobe-chat.md
@@ -1,6 +1,7 @@
-(deployment-lobe-chat)=
-
-# Lobe Chat
+---
+title: Lobe Chat
+---
+[](){ #deployment-lobe-chat }
 
 [Lobe Chat](https://github.com/lobehub/lobe-chat) is an open-source, modern-design ChatGPT/LLMs UI/Framework.
 
diff --git a/docs/source/deployment/frameworks/lws.md b/docs/deployment/frameworks/lws.md
similarity index 99%
rename from docs/source/deployment/frameworks/lws.md
rename to docs/deployment/frameworks/lws.md
index 4e9a03b5c4c17..18282a89ddfff 100644
--- a/docs/source/deployment/frameworks/lws.md
+++ b/docs/deployment/frameworks/lws.md
@@ -1,6 +1,7 @@
-(deployment-lws)=
-
-# LWS
+---
+title: LWS
+---
+[](){ #deployment-lws }
 
 LeaderWorkerSet (LWS) is a Kubernetes API that aims to address common deployment patterns of AI/ML inference workloads.
 A major use case is for multi-host/multi-node distributed inference.
diff --git a/docs/source/deployment/frameworks/modal.md b/docs/deployment/frameworks/modal.md
similarity index 85%
rename from docs/source/deployment/frameworks/modal.md
rename to docs/deployment/frameworks/modal.md
index e7c42088e36a9..dbdb739a10005 100644
--- a/docs/source/deployment/frameworks/modal.md
+++ b/docs/deployment/frameworks/modal.md
@@ -1,6 +1,7 @@
-(deployment-modal)=
-
-# Modal
+---
+title: Modal
+---
+[](){ #deployment-modal }
 
 vLLM can be run on cloud GPUs with [Modal](https://modal.com), a serverless computing platform designed for fast auto-scaling.
 
diff --git a/docs/source/deployment/frameworks/open-webui.md b/docs/deployment/frameworks/open-webui.md
similarity index 87%
rename from docs/source/deployment/frameworks/open-webui.md
rename to docs/deployment/frameworks/open-webui.md
index 83e5303a00ef2..1ab1931068fae 100644
--- a/docs/source/deployment/frameworks/open-webui.md
+++ b/docs/deployment/frameworks/open-webui.md
@@ -1,6 +1,7 @@
-(deployment-open-webui)=
-
-# Open WebUI
+---
+title: Open WebUI
+---
+[](){ #deployment-open-webui }
 
 1. Install the [Docker](https://docs.docker.com/engine/install/)
 
@@ -25,5 +26,4 @@ ghcr.io/open-webui/open-webui:main
 
 On the top of the web page, you can see the model `qwen/Qwen1.5-0.5B-Chat`.
 
-:::{image} /assets/deployment/open_webui.png
-:::
+![](../../assets/deployment/open_webui.png)
diff --git a/docs/source/deployment/frameworks/retrieval_augmented_generation.md b/docs/deployment/frameworks/retrieval_augmented_generation.md
similarity index 96%
rename from docs/source/deployment/frameworks/retrieval_augmented_generation.md
rename to docs/deployment/frameworks/retrieval_augmented_generation.md
index f84451fafe91d..cb26c8378deec 100644
--- a/docs/source/deployment/frameworks/retrieval_augmented_generation.md
+++ b/docs/deployment/frameworks/retrieval_augmented_generation.md
@@ -1,6 +1,7 @@
-(deployment-retrieval-augmented-generation)=
-
-# Retrieval-Augmented Generation
+---
+title: Retrieval-Augmented Generation
+---
+[](){ #deployment-retrieval-augmented-generation }
 
 [Retrieval-augmented generation (RAG)](https://en.wikipedia.org/wiki/Retrieval-augmented_generation) is a technique that enables generative artificial intelligence (Gen AI) models to retrieve and incorporate new information. It modifies interactions with a large language model (LLM) so that the model responds to user queries with reference to a specified set of documents, using this information to supplement information from its pre-existing training data. This allows LLMs to use domain-specific and/or updated information. Use cases include providing chatbot access to internal company data or generating responses based on authoritative sources.
 
diff --git a/docs/source/deployment/frameworks/skypilot.md b/docs/deployment/frameworks/skypilot.md
similarity index 97%
rename from docs/source/deployment/frameworks/skypilot.md
rename to docs/deployment/frameworks/skypilot.md
index 5e101b9001033..1844a50c56041 100644
--- a/docs/source/deployment/frameworks/skypilot.md
+++ b/docs/deployment/frameworks/skypilot.md
@@ -1,12 +1,11 @@
-(deployment-skypilot)=
+---
+title: SkyPilot
+---
+[](){ #deployment-skypilot }
 
-# SkyPilot
-
-:::{raw} html
 <p align="center">
   <img src="https://imgur.com/yxtzPEu.png" alt="vLLM"/>
 </p>
-:::
 
 vLLM can be **run and scaled to multiple service replicas on clouds and Kubernetes** with [SkyPilot](https://github.com/skypilot-org/skypilot), an open-source framework for running LLMs on any cloud. More examples for various open models, such as Llama-3, Mixtral, etc, can be found in [SkyPilot AI gallery](https://skypilot.readthedocs.io/en/latest/gallery/index.html).
 
@@ -104,10 +103,8 @@ service:
   max_completion_tokens: 1
 ```
 
-:::{raw} html
 <details>
 <summary>Click to see the full recipe YAML</summary>
-:::
 
 ```yaml
 service:
@@ -153,9 +150,7 @@ run: |
     2>&1 | tee api_server.log
 ```
 
-:::{raw} html
 </details>
-:::
 
 Start the serving the Llama-3 8B model on multiple replicas:
 
@@ -169,10 +164,8 @@ Wait until the service is ready:
 watch -n10 sky serve status vllm
 ```
 
-:::{raw} html
 <details>
 <summary>Example outputs:</summary>
-:::
 
 ```console
 Services
@@ -185,9 +178,7 @@ vllm          1   1        xx.yy.zz.121  18 mins ago  1x GCP([Spot]{'L4': 1})  R
 vllm          2   1        xx.yy.zz.245  18 mins ago  1x GCP([Spot]{'L4': 1})  READY   us-east4
 ```
 
-:::{raw} html
 </details>
-:::
 
 After the service is READY, you can find a single endpoint for the service and access the service with the endpoint:
 
@@ -223,10 +214,8 @@ service:
 
 This will scale the service up to when the QPS exceeds 2 for each replica.
 
-:::{raw} html
 <details>
 <summary>Click to see the full recipe YAML</summary>
-:::
 
 ```yaml
 service:
@@ -275,9 +264,7 @@ run: |
     2>&1 | tee api_server.log
 ```
 
-:::{raw} html
 </details>
-:::
 
 To update the service with the new config:
 
@@ -295,10 +282,8 @@ sky serve down vllm
 
 It is also possible to access the Llama-3 service with a separate GUI frontend, so the user requests send to the GUI will be load-balanced across replicas.
 
-:::{raw} html
 <details>
 <summary>Click to see the full GUI YAML</summary>
-:::
 
 ```yaml
 envs:
@@ -328,9 +313,7 @@ run: |
     --stop-token-ids 128009,128001 | tee ~/gradio.log
 ```
 
-:::{raw} html
 </details>
-:::
 
 1. Start the chat web UI:
 
diff --git a/docs/source/deployment/frameworks/streamlit.md b/docs/deployment/frameworks/streamlit.md
similarity index 91%
rename from docs/source/deployment/frameworks/streamlit.md
rename to docs/deployment/frameworks/streamlit.md
index 084550ec991e1..8956d1ddc7d8d 100644
--- a/docs/source/deployment/frameworks/streamlit.md
+++ b/docs/deployment/frameworks/streamlit.md
@@ -1,6 +1,7 @@
-(deployment-streamlit)=
-
-# Streamlit
+---
+title: Streamlit
+---
+[](){ #deployment-streamlit }
 
 [Streamlit](https://github.com/streamlit/streamlit) lets you transform Python scripts into interactive web apps in minutes, instead of weeks. Build dashboards, generate reports, or create chat apps.
 
@@ -38,5 +39,4 @@ VLLM_API_BASE="http://vllm-server-host:vllm-server-port/v1" streamlit run stream
 streamlit run streamlit_openai_chatbot_webserver.py --logger.level=debug
 ```
 
-:::{image} /assets/deployment/streamlit-chat.png
-:::
+![](../../assets/deployment/streamlit-chat.png)
diff --git a/docs/source/deployment/frameworks/triton.md b/docs/deployment/frameworks/triton.md
similarity index 87%
rename from docs/source/deployment/frameworks/triton.md
rename to docs/deployment/frameworks/triton.md
index 94d87120159c6..082bc24d85aad 100644
--- a/docs/source/deployment/frameworks/triton.md
+++ b/docs/deployment/frameworks/triton.md
@@ -1,5 +1,6 @@
-(deployment-triton)=
-
-# NVIDIA Triton
+---
+title: NVIDIA Triton
+---
+[](){ #deployment-triton }
 
 The [Triton Inference Server](https://github.com/triton-inference-server) hosts a tutorial demonstrating how to quickly deploy a simple [facebook/opt-125m](https://huggingface.co/facebook/opt-125m) model using vLLM. Please see [Deploying a vLLM model in Triton](https://github.com/triton-inference-server/tutorials/blob/main/Quick_Deploy/vLLM/README.md#deploying-a-vllm-model-in-triton) for more details.
diff --git a/docs/source/deployment/integrations/kserve.md b/docs/deployment/integrations/kserve.md
similarity index 85%
rename from docs/source/deployment/integrations/kserve.md
rename to docs/deployment/integrations/kserve.md
index c780fd74e8f55..754b983dee92c 100644
--- a/docs/source/deployment/integrations/kserve.md
+++ b/docs/deployment/integrations/kserve.md
@@ -1,6 +1,7 @@
-(deployment-kserve)=
-
-# KServe
+---
+title: KServe
+---
+[](){ #deployment-kserve }
 
 vLLM can be deployed with [KServe](https://github.com/kserve/kserve) on Kubernetes for highly scalable distributed model serving.
 
diff --git a/docs/source/deployment/integrations/kubeai.md b/docs/deployment/integrations/kubeai.md
similarity index 93%
rename from docs/source/deployment/integrations/kubeai.md
rename to docs/deployment/integrations/kubeai.md
index 2f5772e075d87..ba0a3c52cca7a 100644
--- a/docs/source/deployment/integrations/kubeai.md
+++ b/docs/deployment/integrations/kubeai.md
@@ -1,6 +1,7 @@
-(deployment-kubeai)=
-
-# KubeAI
+---
+title: KubeAI
+---
+[](){ #deployment-kubeai }
 
 [KubeAI](https://github.com/substratusai/kubeai) is a Kubernetes operator that enables you to deploy and manage AI models on Kubernetes. It provides a simple and scalable way to deploy vLLM in production. Functionality such as scale-from-zero, load based autoscaling, model caching, and much more is provided out of the box with zero external dependencies.
 
diff --git a/docs/source/deployment/integrations/llamastack.md b/docs/deployment/integrations/llamastack.md
similarity index 94%
rename from docs/source/deployment/integrations/llamastack.md
rename to docs/deployment/integrations/llamastack.md
index a6c3569637abf..2ae600a423ff9 100644
--- a/docs/source/deployment/integrations/llamastack.md
+++ b/docs/deployment/integrations/llamastack.md
@@ -1,6 +1,7 @@
-(deployment-llamastack)=
-
-# Llama Stack
+---
+title: Llama Stack
+---
+[](){ #deployment-llamastack }
 
 vLLM is also available via [Llama Stack](https://github.com/meta-llama/llama-stack) .
 
diff --git a/docs/source/deployment/integrations/llmaz.md b/docs/deployment/integrations/llmaz.md
similarity index 87%
rename from docs/source/deployment/integrations/llmaz.md
rename to docs/deployment/integrations/llmaz.md
index cd4a76353d264..03d284c34769c 100644
--- a/docs/source/deployment/integrations/llmaz.md
+++ b/docs/deployment/integrations/llmaz.md
@@ -1,6 +1,7 @@
-(deployment-llmaz)=
-
-# llmaz
+---
+title: llmaz
+---
+[](){ #deployment-llmaz }
 
 [llmaz](https://github.com/InftyAI/llmaz) is an easy-to-use and advanced inference platform for large language models on Kubernetes, aimed for production use. It uses vLLM as the default model serving backend.
 
diff --git a/docs/source/deployment/integrations/production-stack.md b/docs/deployment/integrations/production-stack.md
similarity index 98%
rename from docs/source/deployment/integrations/production-stack.md
rename to docs/deployment/integrations/production-stack.md
index 05f1568306cc9..8288a4b6e6be3 100644
--- a/docs/source/deployment/integrations/production-stack.md
+++ b/docs/deployment/integrations/production-stack.md
@@ -1,6 +1,7 @@
-(deployment-production-stack)=
-
-# Production stack
+---
+title: Production stack
+---
+[](){ #deployment-production-stack }
 
 Deploying vLLM on Kubernetes is a scalable and efficient way to serve machine learning models. This guide walks you through deploying vLLM using the [vLLM production stack](https://github.com/vllm-project/production-stack). Born out of a Berkeley-UChicago collaboration, [vLLM production stack](https://github.com/vllm-project/production-stack) is an officially released, production-optimized codebase under the [vLLM project](https://github.com/vllm-project), designed for LLM deployment with:
 
@@ -114,7 +115,7 @@ To remove the deployment, run:
 sudo helm uninstall vllm
 ```
 
-------
+---
 
 ### (Advanced) Configuring vLLM production stack
 
diff --git a/docs/source/deployment/k8s.md b/docs/deployment/k8s.md
similarity index 98%
rename from docs/source/deployment/k8s.md
rename to docs/deployment/k8s.md
index 9079cfa8e1b66..bd2bd44cd5225 100644
--- a/docs/source/deployment/k8s.md
+++ b/docs/deployment/k8s.md
@@ -1,6 +1,7 @@
-(deployment-k8s)=
-
-# Using Kubernetes
+---
+title: Using Kubernetes
+---
+[](){ #deployment-k8s }
 
 Deploying vLLM on Kubernetes is a scalable and efficient way to serve machine learning models. This guide walks you through deploying vLLM using native Kubernetes.
 
@@ -19,9 +20,8 @@ Alternatively, you can deploy vLLM to Kubernetes using any of the following:
 
 ## Deployment with CPUs
 
-:::{note}
-The use of CPUs here is for demonstration and testing purposes only and its performance will not be on par with GPUs.
-:::
+!!! note
+    The use of CPUs here is for demonstration and testing purposes only and its performance will not be on par with GPUs.
 
 First, create a Kubernetes PVC and Secret for downloading and storing Hugging Face model:
 
diff --git a/docs/source/deployment/nginx.md b/docs/deployment/nginx.md
similarity index 77%
rename from docs/source/deployment/nginx.md
rename to docs/deployment/nginx.md
index bf404f1098c3b..9d1f74475781d 100644
--- a/docs/source/deployment/nginx.md
+++ b/docs/deployment/nginx.md
@@ -1,20 +1,21 @@
-(nginxloadbalancer)=
-
-# Using Nginx
+---
+title: Using Nginx
+---
+[](){ #nginxloadbalancer }
 
 This document shows how to launch multiple vLLM serving containers and use Nginx to act as a load balancer between the servers.
 
 Table of contents:
 
-1. [Build Nginx Container](#nginxloadbalancer-nginx-build)
-2. [Create Simple Nginx Config file](#nginxloadbalancer-nginx-conf)
-3. [Build vLLM Container](#nginxloadbalancer-nginx-vllm-container)
-4. [Create Docker Network](#nginxloadbalancer-nginx-docker-network)
-5. [Launch vLLM Containers](#nginxloadbalancer-nginx-launch-container)
-6. [Launch Nginx](#nginxloadbalancer-nginx-launch-nginx)
-7. [Verify That vLLM Servers Are Ready](#nginxloadbalancer-nginx-verify-nginx)
+1. [Build Nginx Container][nginxloadbalancer-nginx-build]
+2. [Create Simple Nginx Config file][nginxloadbalancer-nginx-conf]
+3. [Build vLLM Container][nginxloadbalancer-nginx-vllm-container]
+4. [Create Docker Network][nginxloadbalancer-nginx-docker-network]
+5. [Launch vLLM Containers][nginxloadbalancer-nginx-launch-container]
+6. [Launch Nginx][nginxloadbalancer-nginx-launch-nginx]
+7. [Verify That vLLM Servers Are Ready][nginxloadbalancer-nginx-verify-nginx]
 
-(nginxloadbalancer-nginx-build)=
+[](){ #nginxloadbalancer-nginx-build }
 
 ## Build Nginx Container
 
@@ -39,7 +40,7 @@ Build the container:
 docker build . -f Dockerfile.nginx --tag nginx-lb
 ```
 
-(nginxloadbalancer-nginx-conf)=
+[](){ #nginxloadbalancer-nginx-conf }
 
 ## Create Simple Nginx Config file
 
@@ -63,7 +64,7 @@ server {
 }
 ```
 
-(nginxloadbalancer-nginx-vllm-container)=
+[](){ #nginxloadbalancer-nginx-vllm-container }
 
 ## Build vLLM Container
 
@@ -79,7 +80,7 @@ cd $vllm_root
 docker build -f docker/Dockerfile . --tag vllm --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy
 ```
 
-(nginxloadbalancer-nginx-docker-network)=
+[](){ #nginxloadbalancer-nginx-docker-network }
 
 ## Create Docker Network
 
@@ -87,7 +88,7 @@ docker build -f docker/Dockerfile . --tag vllm --build-arg http_proxy=$http_prox
 docker network create vllm_nginx
 ```
 
-(nginxloadbalancer-nginx-launch-container)=
+[](){ #nginxloadbalancer-nginx-launch-container }
 
 ## Launch vLLM Containers
 
@@ -105,11 +106,10 @@ docker run -itd --ipc host --network vllm_nginx --gpus device=0 --shm-size=10.24
 docker run -itd --ipc host --network vllm_nginx --gpus device=1 --shm-size=10.24gb -v $hf_cache_dir:/root/.cache/huggingface/ -p 8082:8000 --name vllm1 vllm --model meta-llama/Llama-2-7b-chat-hf
 ```
 
-:::{note}
-If you are behind proxy, you can pass the proxy settings to the docker run command via `-e http_proxy=$http_proxy -e https_proxy=$https_proxy`.
-:::
+!!! note
+    If you are behind proxy, you can pass the proxy settings to the docker run command via `-e http_proxy=$http_proxy -e https_proxy=$https_proxy`.
 
-(nginxloadbalancer-nginx-launch-nginx)=
+[](){ #nginxloadbalancer-nginx-launch-nginx }
 
 ## Launch Nginx
 
@@ -117,7 +117,7 @@ If you are behind proxy, you can pass the proxy settings to the docker run comma
 docker run -itd -p 8000:80 --network vllm_nginx -v ./nginx_conf/:/etc/nginx/conf.d/ --name nginx-lb nginx-lb:latest
 ```
 
-(nginxloadbalancer-nginx-verify-nginx)=
+[](){ #nginxloadbalancer-nginx-verify-nginx }
 
 ## Verify That vLLM Servers Are Ready
 
diff --git a/docs/source/deployment/security.md b/docs/deployment/security.md
similarity index 100%
rename from docs/source/deployment/security.md
rename to docs/deployment/security.md
diff --git a/docs/source/design/arch_overview.md b/docs/design/arch_overview.md
similarity index 81%
rename from docs/source/design/arch_overview.md
rename to docs/design/arch_overview.md
index 94bda8b5c58d5..75d3e1b7ccc78 100644
--- a/docs/source/design/arch_overview.md
+++ b/docs/design/arch_overview.md
@@ -1,22 +1,18 @@
-(arch-overview)=
-
-# Architecture Overview
+---
+title: Architecture Overview
+---
+[](){ #arch-overview }
 
 This document provides an overview of the vLLM architecture.
 
-:::{contents} Table of Contents
-:depth: 2
-:local: true
-:::
+[TOC]
 
 ## Entrypoints
 
 vLLM provides a number of entrypoints for interacting with the system. The
 following diagram shows the relationship between them.
 
-:::{image} /assets/design/arch_overview/entrypoints.excalidraw.png
-:alt: Entrypoints Diagram
-:::
+![Entrypoints Diagram](../assets/design/arch_overview/entrypoints.excalidraw.png)
 
 ### LLM Class
 
@@ -77,16 +73,14 @@ python -m vllm.entrypoints.openai.api_server --model <model>
 
 That code can be found in <gh-file:vllm/entrypoints/openai/api_server.py>.
 
-More details on the API server can be found in the [OpenAI-Compatible Server](#openai-compatible-server) document.
+More details on the API server can be found in the [OpenAI-Compatible Server][openai-compatible-server] document.
 
 ## LLM Engine
 
 The `LLMEngine` and `AsyncLLMEngine` classes are central to the functioning of
 the vLLM system, handling model inference and asynchronous request processing.
 
-:::{image} /assets/design/arch_overview/llm_engine.excalidraw.png
-:alt: LLMEngine Diagram
-:::
+![LLMEngine Diagram](../assets/design/arch_overview/llm_engine.excalidraw.png)
 
 ### LLMEngine
 
@@ -137,18 +131,16 @@ input tensors and capturing cudagraphs.
 ## Model
 
 Every model runner object has one model object, which is the actual
-`torch.nn.Module` instance. See [huggingface_integration](#huggingface-integration) for how various
+`torch.nn.Module` instance. See [huggingface_integration][huggingface-integration] for how various
 configurations affect the class we ultimately get.
 
 ## Class Hierarchy
 
 The following figure shows the class hierarchy of vLLM:
 
-> :::{figure} /assets/design/hierarchy.png
-> :align: center
-> :alt: query
-> :width: 100%
-> :::
+> <figure markdown="span">
+>   ![](../assets/design/hierarchy.png){ align="center" alt="query" width="100%" }
+> </figure>
 
 There are several important design choices behind this class hierarchy:
 
@@ -178,44 +170,43 @@ of a vision model and a language model. By making the constructor uniform, we
 can easily create a vision model and a language model and compose them into a
 vision-language model.
 
-:::{note}
-To support this change, all vLLM models' signatures have been updated to:
+!!! note
+    To support this change, all vLLM models' signatures have been updated to:
 
-```python
-def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
-```
-
-To avoid accidentally passing incorrect arguments, the constructor is now keyword-only. This ensures that the constructor will raise an error if old configurations are passed. vLLM developers have already made this change for all models within vLLM. For out-of-tree registered models, developers need to update their models, for example by adding shim code to adapt the old constructor signature to the new one:
-
-```python
-class MyOldModel(nn.Module):
-    def __init__(
-        self,
-        config,
-        cache_config: Optional[CacheConfig] = None,
-        quant_config: Optional[QuantizationConfig] = None,
-        lora_config: Optional[LoRAConfig] = None,
-        prefix: str = "",
-    ) -> None:
-        ...
-
-from vllm.config import VllmConfig
-class MyNewModel(MyOldModel):
+    ```python
     def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
-        config = vllm_config.model_config.hf_config
-        cache_config = vllm_config.cache_config
-        quant_config = vllm_config.quant_config
-        lora_config = vllm_config.lora_config
-        super().__init__(config, cache_config, quant_config, lora_config, prefix)
+    ```
 
-if __version__ >= "0.6.4":
-    MyModel = MyNewModel
-else:
-    MyModel = MyOldModel
-```
+    To avoid accidentally passing incorrect arguments, the constructor is now keyword-only. This ensures that the constructor will raise an error if old configurations are passed. vLLM developers have already made this change for all models within vLLM. For out-of-tree registered models, developers need to update their models, for example by adding shim code to adapt the old constructor signature to the new one:
 
-This way, the model can work with both old and new versions of vLLM.
-:::
+    ```python
+    class MyOldModel(nn.Module):
+        def __init__(
+            self,
+            config,
+            cache_config: Optional[CacheConfig] = None,
+            quant_config: Optional[QuantizationConfig] = None,
+            lora_config: Optional[LoRAConfig] = None,
+            prefix: str = "",
+        ) -> None:
+            ...
+
+    from vllm.config import VllmConfig
+    class MyNewModel(MyOldModel):
+        def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
+            config = vllm_config.model_config.hf_config
+            cache_config = vllm_config.cache_config
+            quant_config = vllm_config.quant_config
+            lora_config = vllm_config.lora_config
+            super().__init__(config, cache_config, quant_config, lora_config, prefix)
+
+    if __version__ >= "0.6.4":
+        MyModel = MyNewModel
+    else:
+        MyModel = MyOldModel
+    ```
+
+    This way, the model can work with both old and new versions of vLLM.
 
 3\. **Sharding and Quantization at Initialization**: Certain features require
 changing the model weights. For example, tensor parallelism needs to shard the
diff --git a/docs/source/design/automatic_prefix_caching.md b/docs/design/automatic_prefix_caching.md
similarity index 98%
rename from docs/source/design/automatic_prefix_caching.md
rename to docs/design/automatic_prefix_caching.md
index 3928e0c16568b..80883bb1d90d8 100644
--- a/docs/source/design/automatic_prefix_caching.md
+++ b/docs/design/automatic_prefix_caching.md
@@ -1,6 +1,7 @@
-(design-automatic-prefix-caching)=
-
-# Automatic Prefix Caching
+---
+title: Automatic Prefix Caching
+---
+[](){ #design-automatic-prefix-caching }
 
 The core idea of [PagedAttention](https://blog.vllm.ai/2023/06/20/vllm.html) is to partition the KV cache of each request into KV Blocks. Each block contains the attention keys and values for a fixed number of tokens. The PagedAttention algorithm allows these blocks to be stored in non-contiguous physical memory so that we can eliminate memory fragmentation by allocating the memory on demand.
 
diff --git a/docs/source/design/huggingface_integration.md b/docs/design/huggingface_integration.md
similarity index 98%
rename from docs/source/design/huggingface_integration.md
rename to docs/design/huggingface_integration.md
index 7d271b1cfb3a0..68cc27ea768c6 100644
--- a/docs/source/design/huggingface_integration.md
+++ b/docs/design/huggingface_integration.md
@@ -1,6 +1,7 @@
-(huggingface-integration)=
-
-# Integration with HuggingFace
+---
+title: Integration with HuggingFace
+---
+[](){ #huggingface-integration }
 
 This document describes how vLLM integrates with HuggingFace libraries. We will explain step by step what happens under the hood when we run `vllm serve`.
 
diff --git a/docs/source/design/kernel/paged_attention.md b/docs/design/kernel/paged_attention.md
similarity index 94%
rename from docs/source/design/kernel/paged_attention.md
rename to docs/design/kernel/paged_attention.md
index e1770c8226435..ad8b5c9264d24 100644
--- a/docs/source/design/kernel/paged_attention.md
+++ b/docs/design/kernel/paged_attention.md
@@ -1,6 +1,7 @@
-(design-paged-attention)=
-
-# vLLM Paged Attention
+---
+title: vLLM Paged Attention
+---
+[](){ #design-paged-attention }
 
 - Currently, vLLM utilizes its own implementation of a multi-head query
   attention kernel (`csrc/attention/attention_kernels.cu`).
@@ -139,26 +140,22 @@
   const scalar_t* q_ptr = q + seq_idx * q_stride + head_idx * HEAD_SIZE;
   ```
 
-  :::{figure} ../../assets/kernel/query.png
-  :align: center
-  :alt: query
-  :width: 70%
-
-  Query data of one token at one head
-  :::
+  <figure markdown="span">
+    ![](../../assets/kernel/query.png){ align="center" alt="query" width="70%" }
+    <figcaption>
+</figcaption>
+  </figure>
 
 - Each thread defines its own `q_ptr` which points to the assigned
   query token data on global memory. For example, if `VEC_SIZE` is 4
   and `HEAD_SIZE` is 128, the `q_ptr` points to data that contains
   total of 128 elements divided into 128 / 4 = 32 vecs.
 
-  :::{figure} ../../assets/kernel/q_vecs.png
-  :align: center
-  :alt: q_vecs
-  :width: 70%
-
-  `q_vecs` for one thread group
-  :::
+  <figure markdown="span">
+    ![](../../assets/kernel/q_vecs.png){ align="center" alt="q_vecs" width="70%" }
+    <figcaption>
+</figcaption>
+  </figure>
 
   ```cpp
   __shared__ Q_vec q_vecs[THREAD_GROUP_SIZE][NUM_VECS_PER_THREAD];
@@ -195,13 +192,11 @@
   points to key token data based on `k_cache` at assigned block,
   assigned head and assigned token.
 
-  :::{figure} ../../assets/kernel/key.png
-  :align: center
-  :alt: key
-  :width: 70%
-
-  Key data of all context tokens at one head
-  :::
+  <figure markdown="span">
+    ![](../../assets/kernel/key.png){ align="center" alt="key" width="70%" }
+    <figcaption>
+</figcaption>
+  </figure>
 
 - The diagram above illustrates the memory layout for key data. It
   assumes that the `BLOCK_SIZE` is 16, `HEAD_SIZE` is 128, `x` is
@@ -214,13 +209,11 @@
   elements for one token) that will be processed by 2 threads (one
   thread group) separately.
 
-  :::{figure} ../../assets/kernel/k_vecs.png
-  :align: center
-  :alt: k_vecs
-  :width: 70%
-
-  `k_vecs` for one thread
-  :::
+  <figure markdown="span">
+    ![](../../assets/kernel/k_vecs.png){ align="center" alt="k_vecs" width="70%" }
+    <figcaption>
+</figcaption>
+  </figure>
 
   ```cpp
   K_vec k_vecs[NUM_VECS_PER_THREAD]
@@ -289,14 +282,12 @@
   should be performed across the entire thread block, encompassing
   results between the query token and all context key tokens.
 
-  :::{math}
-  :nowrap: true
-
+  $$
   \begin{gather*}
   m(x):=\max _i \quad x_i \\ \quad f(x):=\left[\begin{array}{lll}e^{x_1-m(x)} & \ldots & e^{x_B-m(x)}\end{array}\right]\\ \quad \ell(x):=\sum_i f(x)_i \\
   \quad \operatorname{softmax}(x):=\frac{f(x)}{\ell(x)}
   \end{gather*}
-  :::
+  $$
 
 ### `qk_max` and `logits`
 
@@ -379,29 +370,23 @@
 
 ## Value
 
-:::{figure} ../../assets/kernel/value.png
-:align: center
-:alt: value
-:width: 70%
+<figure markdown="span">
+  ![](../../assets/kernel/value.png){ align="center" alt="value" width="70%" }
+  <figcaption>
+</figcaption>
+</figure>
 
-Value data of all context tokens at one head
-:::
+<figure markdown="span">
+  ![](../../assets/kernel/logits_vec.png){ align="center" alt="logits_vec" width="50%" }
+  <figcaption>
+</figcaption>
+</figure>
 
-:::{figure} ../../assets/kernel/logits_vec.png
-:align: center
-:alt: logits_vec
-:width: 50%
-
-`logits_vec` for one thread
-:::
-
-:::{figure} ../../assets/kernel/v_vec.png
-:align: center
-:alt: v_vec
-:width: 70%
-
-List of `v_vec` for one thread
-:::
+<figure markdown="span">
+  ![](../../assets/kernel/v_vec.png){ align="center" alt="v_vec" width="70%" }
+  <figcaption>
+</figcaption>
+</figure>
 
 - Now we need to retrieve the value data and perform dot multiplication
   with `logits`. Unlike query and key, there is no thread group
diff --git a/docs/source/design/mm_processing.md b/docs/design/mm_processing.md
similarity index 61%
rename from docs/source/design/mm_processing.md
rename to docs/design/mm_processing.md
index dc92a3c2c511e..f3685ce76a4bd 100644
--- a/docs/source/design/mm_processing.md
+++ b/docs/design/mm_processing.md
@@ -1,10 +1,11 @@
-(mm-processing)=
+---
+title: Multi-Modal Data Processing
+---
+[](){ #mm-processing }
 
-# Multi-Modal Data Processing
+To enable various optimizations in vLLM such as [chunked prefill][chunked-prefill] and [prefix caching][automatic-prefix-caching], we use [BaseMultiModalProcessor][vllm.multimodal.processing.BaseMultiModalProcessor] to provide the correspondence between placeholder feature tokens (e.g. `<image>`) and multi-modal inputs (e.g. the raw input image) based on the outputs of HF processor.
 
-To enable various optimizations in vLLM such as [chunked prefill](#chunked-prefill) and [prefix caching](#automatic-prefix-caching), we use {class}`~vllm.multimodal.processing.BaseMultiModalProcessor` to provide the correspondence between placeholder feature tokens (e.g. `<image>`) and multi-modal inputs (e.g. the raw input image) based on the outputs of HF processor.
-
-Here are the main features of {class}`~vllm.multimodal.processing.BaseMultiModalProcessor`:
+Here are the main features of [BaseMultiModalProcessor][vllm.multimodal.processing.BaseMultiModalProcessor]:
 
 ## Prompt Update Detection
 
@@ -15,7 +16,7 @@ One of the main responsibilities of HF processor is to update the prompt with pl
 
 The information about which tokens have been updated is key to finding the correspondence between placeholder feature tokens and multi-modal inputs.
 
-In vLLM, this information is specified using {class}`~vllm.multimodal.processing.PromptUpdate` in {meth}`~vllm.multimodal.processing.BaseMultiModalProcessor._get_prompt_updates`. We can automatically detect whether HF has updated the prompt by checking the existence of the updated tokens.
+In vLLM, this information is specified using [PromptUpdate][vllm.multimodal.processing.PromptUpdate] in [_get_prompt_updates][vllm.multimodal.processing.BaseMultiModalProcessor._get_prompt_updates]. We can automatically detect whether HF has updated the prompt by checking the existence of the updated tokens.
 
 ## Tokenized Prompt Inputs
 
@@ -43,22 +44,22 @@ While HF processors support text + multi-modal inputs natively, this is not so f
 
 Moreover, since the tokenized text has not passed through the HF processor, we have to apply Step 3 by ourselves to keep the output tokens and multi-modal data consistent with each other.
 
-(mm-dummy-text)=
+[](){ #mm-dummy-text }
 
 ### Dummy text
 
-We work around the first issue by requiring each model to define how to generate dummy text based on the number of multi-modal inputs, via {meth}`~vllm.multimodal.profiling.BaseDummyInputsBuilder.get_dummy_text`. This lets us generate dummy text corresponding to the multi-modal inputs and input them together to obtain the processed multi-modal data.
+We work around the first issue by requiring each model to define how to generate dummy text based on the number of multi-modal inputs, via [get_dummy_text][vllm.multimodal.profiling.BaseDummyInputsBuilder.get_dummy_text]. This lets us generate dummy text corresponding to the multi-modal inputs and input them together to obtain the processed multi-modal data.
 
-(mm-automatic-prompt-updating)=
+[](){ #mm-automatic-prompt-updating }
 
 ### Automatic prompt updating
 
 We address the second issue by implementing model-agnostic code in
-{meth}`~vllm.multimodal.processing.BaseMultiModalProcessor._apply_prompt_updates` to automatically update the prompt with feature placeholder tokens based on the specification outputted by {meth}`~vllm.multimodal.processing.BaseMultiModalProcessor._get_prompt_updates`.
+[_apply_prompt_updates][vllm.multimodal.processing.BaseMultiModalProcessor._apply_prompt_updates] to automatically update the prompt with feature placeholder tokens based on the specification outputted by [_get_prompt_updates][vllm.multimodal.processing.BaseMultiModalProcessor._get_prompt_updates].
 
 ### Summary
 
-With the help of dummy text and automatic prompt updating, our multi-modal processor can finally accept both text and token prompts with multi-modal data. The detailed logic is shown in {meth}`~vllm.multimodal.processing.BaseMultiModalProcessor._apply_hf_processor_main`.
+With the help of dummy text and automatic prompt updating, our multi-modal processor can finally accept both text and token prompts with multi-modal data. The detailed logic is shown in [_apply_hf_processor_main][vllm.multimodal.processing.BaseMultiModalProcessor._apply_hf_processor_main].
 
 ## Processor Output Caching
 
@@ -66,4 +67,4 @@ Some HF processors, such as the one for Qwen2-VL, are [very slow](gh-issue:9238)
 
 When new data is passed in, we first check which items are in the cache, and which ones are missing. The missing items are passed into the HF processor in a single batch and cached, before being merged with the existing items in the cache.
 
-Since we only process the missing multi-modal data items, the number of input placeholder tokens no longer corresponds to the number of the multi-modal inputs, so they can't be passed alongside the text prompt to HF processor. Therefore, we process the text and multi-modal inputs separately, using [dummy text](#mm-dummy-text) to avoid HF errors. Since this skips HF's prompt updating code, we apply [automatic prompt updating](#mm-automatic-prompt-updating) afterwards to keep the output tokens and multi-modal data consistent with each other.
+Since we only process the missing multi-modal data items, the number of input placeholder tokens no longer corresponds to the number of the multi-modal inputs, so they can't be passed alongside the text prompt to HF processor. Therefore, we process the text and multi-modal inputs separately, using [dummy text][mm-dummy-text] to avoid HF errors. Since this skips HF's prompt updating code, we apply [automatic prompt updating][mm-automatic-prompt-updating] afterwards to keep the output tokens and multi-modal data consistent with each other.
diff --git a/docs/source/design/multiprocessing.md b/docs/design/multiprocessing.md
similarity index 97%
rename from docs/source/design/multiprocessing.md
rename to docs/design/multiprocessing.md
index 43fe5fe2e5e94..649edfcce69b2 100644
--- a/docs/source/design/multiprocessing.md
+++ b/docs/design/multiprocessing.md
@@ -2,14 +2,13 @@
 
 ## Debugging
 
-Please see the [Troubleshooting](#troubleshooting-python-multiprocessing)
+Please see the [Troubleshooting][troubleshooting-python-multiprocessing]
 page for information on known issues and how to solve them.
 
 ## Introduction
 
-:::{important}
-The source code references are to the state of the code at the time of writing in December, 2024.
-:::
+!!! warning
+    The source code references are to the state of the code at the time of writing in December, 2024.
 
 The use of Python multiprocessing in vLLM is complicated by:
 
diff --git a/docs/source/design/plugin_system.md b/docs/design/plugin_system.md
similarity index 86%
rename from docs/source/design/plugin_system.md
rename to docs/design/plugin_system.md
index 225030885f629..5027a35c23e8a 100644
--- a/docs/source/design/plugin_system.md
+++ b/docs/design/plugin_system.md
@@ -1,12 +1,13 @@
-(plugin-system)=
-
-# vLLM's Plugin System
+---
+title: vLLM's Plugin System
+---
+[](){ #plugin-system }
 
 The community frequently requests the ability to extend vLLM with custom features. To facilitate this, vLLM includes a plugin system that allows users to add custom features without modifying the vLLM codebase. This document explains how plugins work in vLLM and how to create a plugin for vLLM.
 
 ## How Plugins Work in vLLM
 
-Plugins are user-registered code that vLLM executes. Given vLLM's architecture (see [](#arch-overview)), multiple processes may be involved, especially when using distributed inference with various parallelism techniques. To enable plugins successfully, every process created by vLLM needs to load the plugin. This is done by the [load_general_plugins](https://github.com/vllm-project/vllm/blob/c76ac49d266e27aa3fea84ef2df1f813d24c91c7/vllm/plugins/__init__.py#L16) function in the `vllm.plugins` module. This function is called for every process created by vLLM before it starts any work.
+Plugins are user-registered code that vLLM executes. Given vLLM's architecture (see [Arch Overview][arch-overview]), multiple processes may be involved, especially when using distributed inference with various parallelism techniques. To enable plugins successfully, every process created by vLLM needs to load the plugin. This is done by the [load_general_plugins](https://github.com/vllm-project/vllm/blob/c76ac49d266e27aa3fea84ef2df1f813d24c91c7/vllm/plugins/__init__.py#L16) function in the `vllm.plugins` module. This function is called for every process created by vLLM before it starts any work.
 
 ## How vLLM Discovers Plugins
 
diff --git a/docs/source/design/v1/metrics.md b/docs/design/v1/metrics.md
similarity index 98%
rename from docs/source/design/v1/metrics.md
rename to docs/design/v1/metrics.md
index de80226553728..2631f28e46e42 100644
--- a/docs/source/design/v1/metrics.md
+++ b/docs/design/v1/metrics.md
@@ -57,7 +57,7 @@ In v0, the following metrics are exposed via a Prometheus-compatible `/metrics`
 - `vllm:spec_decode_num_draft_tokens_total` (Counter)
 - `vllm:spec_decode_num_emitted_tokens_total` (Counter)
 
-These are documented under [Inferencing and Serving -> Production Metrics](project:../../serving/metrics.md).
+These are documented under [Inferencing and Serving -> Production Metrics](../../serving/metrics.md).
 
 ### Grafana Dashboard
 
@@ -222,9 +222,7 @@ And the calculated intervals are:
 
 Put another way:
 
-:::{image} /assets/design/v1/metrics/intervals-1.png
-:alt: Interval calculations - common case
-:::
+![Interval calculations - common case](../../assets/design/v1/metrics/intervals-1.png)
 
 We explored the possibility of having the frontend calculate these
 intervals using the timing of events visible by the frontend. However,
@@ -239,17 +237,13 @@ When a preemption occurs during decode, since any already generated
 tokens are reused, we consider the preemption as affecting the
 inter-token, decode, and inference intervals.
 
-:::{image} /assets/design/v1/metrics/intervals-2.png
-:alt: Interval calculations - preempted decode
-:::
+![Interval calculations - preempted decode](../../assets/design/v1/metrics/intervals-2.png)
 
 When a preemption occurs during prefill (assuming such an event
 is possible), we consider the preemption as affecting the
 time-to-first-token and prefill intervals.
 
-:::{image} /assets/design/v1/metrics/intervals-3.png
-:alt: Interval calculations - preempted prefill
-:::
+![Interval calculations - preempted prefill](../../assets/design/v1/metrics/intervals-3.png)
 
 ### Frontend Stats Collection
 
@@ -467,7 +461,7 @@ In general:
    hatch](https://kubernetes.io/docs/concepts/cluster-administration/system-metrics/#show-hidden-metrics)
    for some time before deleting them.
 
-See the [deprecation policy](project:../../contributing/deprecation_policy.md) for
+See the [deprecation policy](../../contributing/deprecation_policy.md) for
 the project-wide deprecation policy.
 
 ### Unimplemented - `vllm:tokens_total`
diff --git a/docs/source/design/v1/prefix_caching.md b/docs/design/v1/prefix_caching.md
similarity index 94%
rename from docs/source/design/v1/prefix_caching.md
rename to docs/design/v1/prefix_caching.md
index 0f7475777797b..ad041b0059f58 100644
--- a/docs/source/design/v1/prefix_caching.md
+++ b/docs/design/v1/prefix_caching.md
@@ -122,9 +122,7 @@ There are two design points to highlight:
 
 As a result, we will have the following components when the KV cache manager is initialized:
 
-:::{image} /assets/design/v1/prefix_caching/overview.png
-:alt: Component Overview
-:::
+![Component Overview](../../assets/design/v1/prefix_caching/overview.png)
 
 * Block Pool: A list of KVCacheBlock.  
 * Free Block Queue: Only store the pointers of head and tail blocks for manipulations.  
@@ -194,9 +192,7 @@ As can be seen, block 3 is a new full block and is cached. However, it is redund
 
 When a request is finished, we free all its blocks if no other requests are using them (reference count = 0). In this example, we free request 1 and block 2, 3, 4, 8 associated with it. We can see that the freed blocks are added to the tail of the free queue in the *reverse* order. This is because the last block of a request must hash more tokens and is less likely to be reused by other requests. As a result, it should be evicted first.
 
-:::{image} /assets/design/v1/prefix_caching/free.png
-:alt: Free Queue after Free a Request
-:::
+![Free queue after a request us freed](../../assets/design/v1/prefix_caching/free.png)
 
 ### Eviction (LRU)
 
@@ -212,36 +208,24 @@ In this example, we assume the block size is 4 (each block can cache 4 tokens),
 
 **Time 1: The cache is empty and a new request comes in.** We allocate 4 blocks. 3 of them are already full and cached. The fourth block is partially full with 3 of 4 tokens.
 
-:::{image} /assets/design/v1/prefix_caching/example-time-1.png
-:alt: Example Time 1
-:::
+![Example Time 1](../../assets/design/v1/prefix_caching/example-time-1.png)
 
 **Time 3: Request 0 makes the block 3 full and asks for a new block to keep decoding.** We cache block 3 and allocate block 4.
 
-:::{image} /assets/design/v1/prefix_caching/example-time-3.png
-:alt: Example Time 3
-:::
+![Example Time 3](../../assets/design/v1/prefix_caching/example-time-3.png)
 
 **Time 4: Request 1 comes in with the 14 prompt tokens, where the first 10 tokens are the same as request 0.** We can see that only the first 2 blocks (8 tokens) hit the cache, because the 3rd block only matches 2 of 4 tokens.
 
-:::{image} /assets/design/v1/prefix_caching/example-time-4.png
-:alt: Example Time 4
-:::
+![Example Time 4](../../assets/design/v1/prefix_caching/example-time-4.png)
 
 **Time 5: Request 0 is finished and free.** Blocks 2, 3 and 4 are added to the free queue in the reverse order (but block 2 and 3 are still cached). Block 0 and 1 are not added to the free queue because they are being used by Request 1.
 
-:::{image} /assets/design/v1/prefix_caching/example-time-5.png
-:alt: Example Time 5
-:::
+![Example Time 5](../../assets/design/v1/prefix_caching/example-time-5.png)
 
 **Time 6: Request 1 is finished and free.**
 
-:::{image} /assets/design/v1/prefix_caching/example-time-6.png
-:alt: Example Time 6
-:::
+![Example Time 6](../../assets/design/v1/prefix_caching/example-time-6.png)
 
 **Time 7: Request 2 comes in with the 29 prompt tokens, where the first 12 tokens are the same as request 0\.** Note that even the block order in the free queue was `7 - 8 - 9 - 4 - 3 - 2 - 6 - 5 - 1 - 0`, the cache hit blocks (i.e., 0, 1, 2) are touched and removed from the queue before allocation, so the free queue becomes `7 - 8 - 9 - 4 - 3 - 6 - 5`. As a result, the allocated blocks are 0 (cached), 1 (cached), 2 (cached), 7, 8, 9, 4, 3 (evicted).
 
-:::{image} /assets/design/v1/prefix_caching/example-time-7.png
-:alt: Example Time 7
-:::
+![Example Time 7](../../assets/design/v1/prefix_caching/example-time-7.png)
diff --git a/docs/source/design/v1/torch_compile.md b/docs/design/v1/torch_compile.md
similarity index 100%
rename from docs/source/design/v1/torch_compile.md
rename to docs/design/v1/torch_compile.md
diff --git a/docs/source/features/automatic_prefix_caching.md b/docs/features/automatic_prefix_caching.md
similarity index 91%
rename from docs/source/features/automatic_prefix_caching.md
rename to docs/features/automatic_prefix_caching.md
index 5c5b37c2a071a..5e92796ddda7e 100644
--- a/docs/source/features/automatic_prefix_caching.md
+++ b/docs/features/automatic_prefix_caching.md
@@ -1,14 +1,14 @@
-(automatic-prefix-caching)=
-
-# Automatic Prefix Caching
+---
+title: Automatic Prefix Caching
+---
+[](){ #automatic-prefix-caching }
 
 ## Introduction
 
 Automatic Prefix Caching (APC in short) caches the KV cache of existing queries, so that a new query can directly reuse the KV cache if it shares the same prefix with one of the existing queries, allowing the new query to skip the computation of the shared part.
 
-:::{note}
-Technical details on how vLLM implements APC can be found [here](#design-automatic-prefix-caching).
-:::
+!!! note
+    Technical details on how vLLM implements APC can be found [here][design-automatic-prefix-caching].
 
 ## Enabling APC in vLLM
 
diff --git a/docs/features/compatibility_matrix.md b/docs/features/compatibility_matrix.md
new file mode 100644
index 0000000000000..77ceea49f1732
--- /dev/null
+++ b/docs/features/compatibility_matrix.md
@@ -0,0 +1,77 @@
+---
+title: Compatibility Matrix
+---
+[](){ #compatibility-matrix }
+
+The tables below show mutually exclusive features and the support on some hardware.
+
+The symbols used have the following meanings:
+
+- ✅ = Full compatibility
+- 🟠 = Partial compatibility
+- ❌ = No compatibility
+
+!!! note
+    Check the ❌ or 🟠 with links to see tracking issue for unsupported feature/hardware combination.
+
+## Feature x Feature
+
+<style>
+td:not(:first-child) {
+  text-align: center !important;
+}
+td {
+  padding: 0.5rem !important;
+  white-space: nowrap;
+}
+
+th {
+  padding: 0.5rem !important;
+  min-width: 0 !important;
+}
+
+th:not(:first-child) {
+  writing-mode: vertical-lr;
+  transform: rotate(180deg)
+}
+</style>
+
+| Feature                                                   | [CP][chunked-prefill]   | [APC][automatic-prefix-caching]   | [LoRA][lora-adapter]   | <abbr title="Prompt Adapter">prmpt adptr</abbr>   | [SD][spec-decode]   | CUDA graph   | <abbr title="Pooling Models">pooling</abbr>   | <abbr title="Encoder-Decoder Models">enc-dec</abbr>   | <abbr title="Logprobs">logP</abbr>   | <abbr title="Prompt Logprobs">prmpt logP</abbr>   | <abbr title="Async Output Processing">async output</abbr>   | multi-step         | <abbr title="Multimodal Inputs">mm</abbr>   | best-of   | beam-search   |
+|-----------------------------------------------------------|-------------------------|-----------------------------------|------------------------|---------------------------------------------------|---------------------|--------------|-----------------------------------------------|-------------------------------------------------------|--------------------------------------|---------------------------------------------------|-------------------------------------------------------------|--------------------|---------------------------------------------|-----------|---------------|
+| [CP][chunked-prefill]                                     | ✅                       |                                   |                        |                                                   |                     |              |                                               |                                                       |                                      |                                                   |                                                             |                    |                                             |           |               |
+| [APC][automatic-prefix-caching]                           | ✅                       | ✅                                 |                        |                                                   |                     |              |                                               |                                                       |                                      |                                                   |                                                             |                    |                                             |           |               |
+| [LoRA][lora-adapter]                                      | ✅                       | ✅                                 | ✅                      |                                                   |                     |              |                                               |                                                       |                                      |                                                   |                                                             |                    |                                             |           |               |
+| <abbr title="Prompt Adapter">prmpt adptr</abbr>           | ✅                       | ✅                                 | ✅                      | ✅                                                 |                     |              |                                               |                                                       |                                      |                                                   |                                                             |                    |                                             |           |               |
+| [SD][spec-decode]                                         | ✅                       | ✅                                 | ❌                      | ✅                                                 | ✅                   |              |                                               |                                                       |                                      |                                                   |                                                             |                    |                                             |           |               |
+| CUDA graph                                                | ✅                       | ✅                                 | ✅                      | ✅                                                 | ✅                   | ✅            |                                               |                                                       |                                      |                                                   |                                                             |                    |                                             |           |               |
+| <abbr title="Pooling Models">pooling</abbr>               | ❌                       | ❌                                 | ❌                      | ❌                                                 | ❌                   | ❌            | ✅                                             |                                                       |                                      |                                                   |                                                             |                    |                                             |           |               |
+| <abbr title="Encoder-Decoder Models">enc-dec</abbr>       | ❌                       | [❌](gh-issue:7366)                | ❌                      | ❌                                                 | [❌](gh-issue:7366)  | ✅            | ✅                                             | ✅                                                     |                                      |                                                   |                                                             |                    |                                             |           |               |
+| <abbr title="Logprobs">logP</abbr>                        | ✅                       | ✅                                 | ✅                      | ✅                                                 | ✅                   | ✅            | ❌                                             | ✅                                                     | ✅                                    |                                                   |                                                             |                    |                                             |           |               |
+| <abbr title="Prompt Logprobs">prmpt logP</abbr>           | ✅                       | ✅                                 | ✅                      | ✅                                                 | ✅                   | ✅            | ❌                                             | ✅                                                     | ✅                                    | ✅                                                 |                                                             |                    |                                             |           |               |
+| <abbr title="Async Output Processing">async output</abbr> | ✅                       | ✅                                 | ✅                      | ✅                                                 | ❌                   | ✅            | ❌                                             | ❌                                                     | ✅                                    | ✅                                                 | ✅                                                           |                    |                                             |           |               |
+| multi-step                                                | ❌                       | ✅                                 | ❌                      | ✅                                                 | ❌                   | ✅            | ❌                                             | ❌                                                     | ✅                                    | ✅                                                 | ✅                                                           | ✅                  |                                             |           |               |
+| <abbr title="Multimodal Inputs">mm</abbr>                 | ✅                       | [🟠](gh-pr:8348)                   | [🟠](gh-pr:4194)        | ❔                                                 | ❔                   | ✅            | ✅                                             | ✅                                                     | ✅                                    | ✅                                                 | ✅                                                           | ❔                  | ✅                                           |           |               |
+| best-of                                                   | ✅                       | ✅                                 | ✅                      | ✅                                                 | [❌](gh-issue:6137)  | ✅            | ❌                                             | ✅                                                     | ✅                                    | ✅                                                 | ❔                                                           | [❌](gh-issue:7968) | ✅                                           | ✅         |               |
+| beam-search                                               | ✅                       | ✅                                 | ✅                      | ✅                                                 | [❌](gh-issue:6137)  | ✅            | ❌                                             | ✅                                                     | ✅                                    | ✅                                                 | ❔                                                           | [❌](gh-issue:7968) | ❔                                           | ✅         | ✅             |
+
+[](){ #feature-x-hardware }
+
+## Feature x Hardware
+
+| Feature                                                   | Volta              | Turing   | Ampere   | Ada   | Hopper   | CPU                | AMD   |
+|-----------------------------------------------------------|--------------------|----------|----------|-------|----------|--------------------|-------|
+| [CP][chunked-prefill]                                     | [❌](gh-issue:2729) | ✅        | ✅        | ✅     | ✅        | ✅                  | ✅     |
+| [APC][automatic-prefix-caching]                           | [❌](gh-issue:3687) | ✅        | ✅        | ✅     | ✅        | ✅                  | ✅     |
+| [LoRA][lora-adapter]                                      | ✅                  | ✅        | ✅        | ✅     | ✅        | ✅                  | ✅     |
+| <abbr title="Prompt Adapter">prmpt adptr</abbr>           | ✅                  | ✅        | ✅        | ✅     | ✅        | [❌](gh-issue:8475) | ✅     |
+| [SD][spec-decode]                                         | ✅                  | ✅        | ✅        | ✅     | ✅        | ✅                  | ✅     |
+| CUDA graph                                                | ✅                  | ✅        | ✅        | ✅     | ✅        | ❌                  | ✅     |
+| <abbr title="Pooling Models">pooling</abbr>               | ✅                  | ✅        | ✅        | ✅     | ✅        | ✅                  | ❔     |
+| <abbr title="Encoder-Decoder Models">enc-dec</abbr>       | ✅                  | ✅        | ✅        | ✅     | ✅        | ✅                  | ❌     |
+| <abbr title="Multimodal Inputs">mm</abbr>                 | ✅                  | ✅        | ✅        | ✅     | ✅        | ✅                  | ✅     |
+| <abbr title="Logprobs">logP</abbr>                        | ✅                  | ✅        | ✅        | ✅     | ✅        | ✅                  | ✅     |
+| <abbr title="Prompt Logprobs">prmpt logP</abbr>           | ✅                  | ✅        | ✅        | ✅     | ✅        | ✅                  | ✅     |
+| <abbr title="Async Output Processing">async output</abbr> | ✅                  | ✅        | ✅        | ✅     | ✅        | ❌                  | ❌     |
+| multi-step                                                | ✅                  | ✅        | ✅        | ✅     | ✅        | [❌](gh-issue:8477) | ✅     |
+| best-of                                                   | ✅                  | ✅        | ✅        | ✅     | ✅        | ✅                  | ✅     |
+| beam-search                                               | ✅                  | ✅        | ✅        | ✅     | ✅        | ✅                  | ✅     |
diff --git a/docs/source/features/disagg_prefill.md b/docs/features/disagg_prefill.md
similarity index 87%
rename from docs/source/features/disagg_prefill.md
rename to docs/features/disagg_prefill.md
index 2fa20140c086d..54be05647d940 100644
--- a/docs/source/features/disagg_prefill.md
+++ b/docs/features/disagg_prefill.md
@@ -1,12 +1,12 @@
-(disagg-prefill)=
-
-# Disaggregated Prefilling (experimental)
+---
+title: Disaggregated Prefilling (experimental)
+---
+[](){ #disagg-prefill }
 
 This page introduces you the disaggregated prefilling feature in vLLM.
 
-:::{note}
-This feature is experimental and subject to change.
-:::
+!!! note
+    This feature is experimental and subject to change.
 
 ## Why disaggregated prefilling?
 
@@ -15,9 +15,8 @@ Two main reasons:
 - **Tuning time-to-first-token (TTFT) and inter-token-latency (ITL) separately**. Disaggregated prefilling put prefill and decode phase of LLM inference inside different vLLM instances. This gives you the flexibility to assign different parallel strategies (e.g. `tp` and `pp`) to tune TTFT without affecting ITL, or to tune ITL without affecting TTFT.
 - **Controlling tail ITL**. Without disaggregated prefilling, vLLM may insert some prefill jobs during the decoding of one request. This results in higher tail latency. Disaggregated prefilling helps you solve this issue and control tail ITL. Chunked prefill with a proper chunk size also can achieve the same goal, but in practice it's hard to figure out the correct chunk size value. So disaggregated prefilling is a much more reliable way to control tail ITL.
 
-:::{note}
-Disaggregated prefill DOES NOT improve throughput.
-:::
+!!! note
+    Disaggregated prefill DOES NOT improve throughput.
 
 ## Usage example
 
@@ -39,21 +38,16 @@ Key abstractions for disaggregated prefilling:
 - **LookupBuffer**: LookupBuffer provides two API: `insert` KV cache and `drop_select` KV cache. The semantics of `insert` and `drop_select` are similar to SQL, where `insert` inserts a KV cache into the buffer, and `drop_select` returns the KV cache that matches the given condition and drop it from the buffer.
 - **Pipe**: A single-direction FIFO pipe for tensor transmission. It supports `send_tensor` and `recv_tensor`.
 
-:::{note}
-`insert` is non-blocking operation but `drop_select` is blocking operation.
-:::
+!!! note
+    `insert` is non-blocking operation but `drop_select` is blocking operation.
 
 Here is a figure illustrating how the above 3 abstractions are organized:
 
-:::{image} /assets/features/disagg_prefill/abstraction.jpg
-:alt: Disaggregated prefilling abstractions
-:::
+![Disaggregated prefilling abstractions](../assets/features/disagg_prefill/abstraction.jpg)
 
 The workflow of disaggregated prefilling is as follows:
 
-:::{image} /assets/features/disagg_prefill/overview.jpg
-:alt: Disaggregated prefilling workflow
-:::
+![Disaggregated prefilling workflow](../assets/features/disagg_prefill/overview.jpg)
 
 The `buffer` corresponds to `insert` API in LookupBuffer, and the `drop_select` corresponds to `drop_select` API in LookupBuffer.
 
diff --git a/docs/source/features/lora.md b/docs/features/lora.md
similarity index 96%
rename from docs/source/features/lora.md
rename to docs/features/lora.md
index 5a3ce0c01f3fa..642462f7c4557 100644
--- a/docs/source/features/lora.md
+++ b/docs/features/lora.md
@@ -1,10 +1,11 @@
-(lora-adapter)=
-
-# LoRA Adapters
+---
+title: LoRA Adapters
+---
+[](){ #lora-adapter }
 
 This document shows you how to use [LoRA adapters](https://arxiv.org/abs/2106.09685) with vLLM on top of a base model.
 
-LoRA adapters can be used with any vLLM model that implements {class}`~vllm.model_executor.models.interfaces.SupportsLoRA`.
+LoRA adapters can be used with any vLLM model that implements [SupportsLoRA][vllm.model_executor.models.interfaces.SupportsLoRA].
 
 Adapters can be efficiently served on a per request basis with minimal overhead. First we download the adapter(s) and save
 them locally with
@@ -60,9 +61,8 @@ vllm serve meta-llama/Llama-2-7b-hf \
     --lora-modules sql-lora=$HOME/.cache/huggingface/hub/models--yard1--llama-2-7b-sql-lora-test/snapshots/0dfa347e8877a4d4ed19ee56c140fa518470028c/
 ```
 
-:::{note}
-The commit ID `0dfa347e8877a4d4ed19ee56c140fa518470028c` may change over time. Please check the latest commit ID in your environment to ensure you are using the correct one.
-:::
+!!! note
+    The commit ID `0dfa347e8877a4d4ed19ee56c140fa518470028c` may change over time. Please check the latest commit ID in your environment to ensure you are using the correct one.
 
 The server entrypoint accepts all other LoRA configuration parameters (`max_loras`, `max_lora_rank`, `max_cpu_loras`,
 etc.), which will apply to all forthcoming requests. Upon querying the `/models` endpoint, we should see our LoRA along
diff --git a/docs/source/features/multimodal_inputs.md b/docs/features/multimodal_inputs.md
similarity index 84%
rename from docs/source/features/multimodal_inputs.md
rename to docs/features/multimodal_inputs.md
index bb2997f008ed5..19b6681729028 100644
--- a/docs/source/features/multimodal_inputs.md
+++ b/docs/features/multimodal_inputs.md
@@ -1,20 +1,20 @@
-(multimodal-inputs)=
+---
+title: Multimodal Inputs
+---
+[](){ #multimodal-inputs }
 
-# Multimodal Inputs
+This page teaches you how to pass multi-modal inputs to [multi-modal models][supported-mm-models] in vLLM.
 
-This page teaches you how to pass multi-modal inputs to [multi-modal models](#supported-mm-models) in vLLM.
-
-:::{note}
-We are actively iterating on multi-modal support. See [this RFC](gh-issue:4194) for upcoming changes,
-and [open an issue on GitHub](https://github.com/vllm-project/vllm/issues/new/choose) if you have any feedback or feature requests.
-:::
+!!! note
+    We are actively iterating on multi-modal support. See [this RFC](gh-issue:4194) for upcoming changes,
+    and [open an issue on GitHub](https://github.com/vllm-project/vllm/issues/new/choose) if you have any feedback or feature requests.
 
 ## Offline Inference
 
-To input multi-modal data, follow this schema in {class}`vllm.inputs.PromptType`:
+To input multi-modal data, follow this schema in [vllm.inputs.PromptType][]:
 
 - `prompt`: The prompt should follow the format that is documented on HuggingFace.
-- `multi_modal_data`: This is a dictionary that follows the schema defined in {class}`vllm.multimodal.inputs.MultiModalDataDict`.
+- `multi_modal_data`: This is a dictionary that follows the schema defined in [vllm.multimodal.inputs.MultiModalDataDict][].
 
 ### Image Inputs
 
@@ -211,16 +211,15 @@ for o in outputs:
 
 Our OpenAI-compatible server accepts multi-modal data via the [Chat Completions API](https://platform.openai.com/docs/api-reference/chat).
 
-:::{important}
-A chat template is **required** to use Chat Completions API.
-For HF format models, the default chat template is defined inside `chat_template.json` or `tokenizer_config.json`.
+!!! warning
+    A chat template is **required** to use Chat Completions API.
+    For HF format models, the default chat template is defined inside `chat_template.json` or `tokenizer_config.json`.
 
-If no default chat template is available, we will first look for a built-in fallback in <gh-file:vllm/transformers_utils/chat_templates/registry.py>.
-If no fallback is available, an error is raised and you have to provide the chat template manually via the `--chat-template` argument.
+    If no default chat template is available, we will first look for a built-in fallback in <gh-file:vllm/transformers_utils/chat_templates/registry.py>.
+    If no fallback is available, an error is raised and you have to provide the chat template manually via the `--chat-template` argument.
 
-For certain models, we provide alternative chat templates inside <gh-dir:vllm/examples>.
-For example, VLM2Vec uses <gh-file:examples/template_vlm2vec.jinja> which is different from the default one for Phi-3-Vision.
-:::
+    For certain models, we provide alternative chat templates inside <gh-dir:vllm/examples>.
+    For example, VLM2Vec uses <gh-file:examples/template_vlm2vec.jinja> which is different from the default one for Phi-3-Vision.
 
 ### Image Inputs
 
@@ -284,25 +283,21 @@ print("Chat completion output:", chat_response.choices[0].message.content)
 
 Full example: <gh-file:examples/online_serving/openai_chat_completion_client_for_multimodal.py>
 
-:::{tip}
-Loading from local file paths is also supported on vLLM: You can specify the allowed local media path via `--allowed-local-media-path` when launching the API server/engine,
-and pass the file path as `url` in the API request.
-:::
+!!! tip
+    Loading from local file paths is also supported on vLLM: You can specify the allowed local media path via `--allowed-local-media-path` when launching the API server/engine,
+    and pass the file path as `url` in the API request.
 
-:::{tip}
-There is no need to place image placeholders in the text content of the API request - they are already represented by the image content.
-In fact, you can place image placeholders in the middle of the text by interleaving text and image content.
-:::
+!!! tip
+    There is no need to place image placeholders in the text content of the API request - they are already represented by the image content.
+    In fact, you can place image placeholders in the middle of the text by interleaving text and image content.
 
-:::{note}
-By default, the timeout for fetching images through HTTP URL is `5` seconds.
-You can override this by setting the environment variable:
+!!! note
+    By default, the timeout for fetching images through HTTP URL is `5` seconds.
+    You can override this by setting the environment variable:
 
-```console
-export VLLM_IMAGE_FETCH_TIMEOUT=<timeout>
-```
-
-:::
+    ```console
+    export VLLM_IMAGE_FETCH_TIMEOUT=<timeout>
+    ```
 
 ### Video Inputs
 
@@ -357,15 +352,13 @@ print("Chat completion output from image url:", result)
 
 Full example: <gh-file:examples/online_serving/openai_chat_completion_client_for_multimodal.py>
 
-:::{note}
-By default, the timeout for fetching videos through HTTP URL is `30` seconds.
-You can override this by setting the environment variable:
+!!! note
+    By default, the timeout for fetching videos through HTTP URL is `30` seconds.
+    You can override this by setting the environment variable:
 
-```console
-export VLLM_VIDEO_FETCH_TIMEOUT=<timeout>
-```
-
-:::
+    ```console
+    export VLLM_VIDEO_FETCH_TIMEOUT=<timeout>
+    ```
 
 ### Audio Inputs
 
@@ -461,15 +454,13 @@ print("Chat completion output from audio url:", result)
 
 Full example: <gh-file:examples/online_serving/openai_chat_completion_client_for_multimodal.py>
 
-:::{note}
-By default, the timeout for fetching audios through HTTP URL is `10` seconds.
-You can override this by setting the environment variable:
+!!! note
+    By default, the timeout for fetching audios through HTTP URL is `10` seconds.
+    You can override this by setting the environment variable:
 
-```console
-export VLLM_AUDIO_FETCH_TIMEOUT=<timeout>
-```
-
-:::
+    ```console
+    export VLLM_AUDIO_FETCH_TIMEOUT=<timeout>
+    ```
 
 ### Embedding Inputs
 
@@ -535,7 +526,6 @@ chat_completion = client.chat.completions.create(
 )
 ```
 
-:::{note}
-Only one message can contain `{"type": "image_embeds"}`.
-If used with a model that requires additional parameters, you must also provide a tensor for each of them, e.g. `image_grid_thw`, `image_sizes`, etc.
-:::
+!!! note
+    Only one message can contain `{"type": "image_embeds"}`.
+    If used with a model that requires additional parameters, you must also provide a tensor for each of them, e.g. `image_grid_thw`, `image_sizes`, etc.
diff --git a/docs/source/features/prompt_embeds.md b/docs/features/prompt_embeds.md
similarity index 92%
rename from docs/source/features/prompt_embeds.md
rename to docs/features/prompt_embeds.md
index 9d7b242bbe51d..6f5616e05d8c1 100644
--- a/docs/source/features/prompt_embeds.md
+++ b/docs/features/prompt_embeds.md
@@ -6,13 +6,12 @@ This page teaches you how to pass prompt embedding inputs to vLLM.
 
 The traditional flow of text data for a Large Language Model goes from text to token ids (via a tokenizer) then from token ids to prompt embeddings. For a traditional decoder-only model (such as meta-llama/Llama-3.1-8B-Instruct), this step of converting token ids to prompt embeddings happens via a look-up from a learned embedding matrix, but the model is not limited to processing only the embeddings corresponding to its token vocabulary.
 
-:::{note}
-Prompt embeddings are currently only supported in the v0 engine.
-:::
+!!! note
+    Prompt embeddings are currently only supported in the v0 engine.
 
 ## Offline Inference
 
-To input multi-modal data, follow this schema in {class}`vllm.inputs.EmbedsPrompt`:
+To input multi-modal data, follow this schema in [vllm.inputs.EmbedsPrompt][]:
 
 - `prompt_embeds`: A torch tensor representing a sequence of prompt/token embeddings. This has the shape (sequence_length, hidden_size), where sequence length is the number of tokens embeddings and hidden_size is the hidden size (embedding size) of the model.
 
diff --git a/docs/features/quantization/README.md b/docs/features/quantization/README.md
new file mode 100644
index 0000000000000..71f62065f63d2
--- /dev/null
+++ b/docs/features/quantization/README.md
@@ -0,0 +1,22 @@
+---
+title: Quantization
+---
+[](){ #quantization-index }
+
+Quantization trades off model precision for smaller memory footprint, allowing large models to be run on a wider range of devices.
+
+Contents:
+
+- [Supported_Hardware](supported_hardware.md)
+- [Auto_Awq](auto_awq.md)
+- [Bnb](bnb.md)
+- [Bitblas](bitblas.md)
+- [Gguf](gguf.md)
+- [Gptqmodel](gptqmodel.md)
+- [Int4](int4.md)
+- [Int8](int8.md)
+- [Fp8](fp8.md)
+- [Modelopt](modelopt.md)
+- [Quark](quark.md)
+- [Quantized_Kvcache](quantized_kvcache.md)
+- [Torchao](torchao.md)
diff --git a/docs/source/features/quantization/auto_awq.md b/docs/features/quantization/auto_awq.md
similarity index 98%
rename from docs/source/features/quantization/auto_awq.md
rename to docs/features/quantization/auto_awq.md
index b4ac597f5a79c..5879b3126fa6e 100644
--- a/docs/source/features/quantization/auto_awq.md
+++ b/docs/features/quantization/auto_awq.md
@@ -1,6 +1,7 @@
-(auto-awq)=
-
-# AutoAWQ
+---
+title: AutoAWQ
+---
+[](){ #auto-awq }
 
 To create a new 4-bit quantized model, you can leverage [AutoAWQ](https://github.com/casper-hansen/AutoAWQ).
 Quantization reduces the model's precision from BF16/FP16 to INT4 which effectively reduces the total model memory footprint.
diff --git a/docs/source/features/quantization/bitblas.md b/docs/features/quantization/bitblas.md
similarity index 76%
rename from docs/source/features/quantization/bitblas.md
rename to docs/features/quantization/bitblas.md
index d0b2bf858c9b6..8e9cf67a7a69f 100644
--- a/docs/source/features/quantization/bitblas.md
+++ b/docs/features/quantization/bitblas.md
@@ -1,14 +1,14 @@
-(bitblas)=
-
-# BitBLAS
+---
+title: BitBLAS
+---
+[](){ #bitblas }
 
 vLLM now supports [BitBLAS](https://github.com/microsoft/BitBLAS) for more efficient and flexible model inference. Compared to other quantization frameworks, BitBLAS provides more precision combinations.
 
-:::{note}
-Ensure your hardware supports the selected `dtype` (`torch.bfloat16` or `torch.float16`).
-Most recent NVIDIA GPUs support `float16`, while `bfloat16` is more common on newer architectures like Ampere or Hopper.
-For details see [supported hardware](https://docs.vllm.ai/en/latest/features/quantization/supported_hardware.html).
-:::
+!!! note
+    Ensure your hardware supports the selected `dtype` (`torch.bfloat16` or `torch.float16`).
+    Most recent NVIDIA GPUs support `float16`, while `bfloat16` is more common on newer architectures like Ampere or Hopper.
+    For details see [supported hardware](https://docs.vllm.ai/en/latest/features/quantization/supported_hardware.html).
 
 Below are the steps to utilize BitBLAS with vLLM.
 
diff --git a/docs/source/features/quantization/bnb.md b/docs/features/quantization/bnb.md
similarity index 97%
rename from docs/source/features/quantization/bnb.md
rename to docs/features/quantization/bnb.md
index 1843a33a3dfdd..990ac34eb2fdf 100644
--- a/docs/source/features/quantization/bnb.md
+++ b/docs/features/quantization/bnb.md
@@ -1,6 +1,7 @@
-(bits-and-bytes)=
-
-# BitsAndBytes
+---
+title: BitsAndBytes
+---
+[](){ #bits-and-bytes }
 
 vLLM now supports [BitsAndBytes](https://github.com/TimDettmers/bitsandbytes) for more efficient model inference.
 BitsAndBytes quantizes models to reduce memory usage and enhance performance without significantly sacrificing accuracy.
diff --git a/docs/source/features/quantization/fp8.md b/docs/features/quantization/fp8.md
similarity index 88%
rename from docs/source/features/quantization/fp8.md
rename to docs/features/quantization/fp8.md
index cb304d54726c8..01d5d9da046de 100644
--- a/docs/source/features/quantization/fp8.md
+++ b/docs/features/quantization/fp8.md
@@ -1,6 +1,7 @@
-(fp8)=
-
-# FP8 W8A8
+---
+title: FP8 W8A8
+---
+[](){ #fp8 }
 
 vLLM supports FP8 (8-bit floating point) weight and activation quantization using hardware acceleration on GPUs such as Nvidia H100 and AMD MI300x.
 Currently, only Hopper and Ada Lovelace GPUs are officially supported for W8A8.
@@ -14,10 +15,9 @@ The FP8 types typically supported in hardware have two distinct representations,
 - **E4M3**: Consists of 1 sign bit, 4 exponent bits, and 3 bits of mantissa. It can store values up to +/-448 and `nan`.
 - **E5M2**: Consists of 1 sign bit, 5 exponent bits, and 2 bits of mantissa. It can store values up to +/-57344, +/- `inf`, and `nan`. The tradeoff for the increased dynamic range is lower precision of the stored values.
 
-:::{note}
-FP8 computation is supported on NVIDIA GPUs with compute capability > 8.9 (Ada Lovelace, Hopper).
-FP8 models will run on compute capability > 8.0 (Ampere) as weight-only W8A16, utilizing FP8 Marlin.
-:::
+!!! note
+    FP8 computation is supported on NVIDIA GPUs with compute capability > 8.9 (Ada Lovelace, Hopper).
+    FP8 models will run on compute capability > 8.0 (Ampere) as weight-only W8A16, utilizing FP8 Marlin.
 
 ## Installation
 
@@ -94,9 +94,8 @@ print(result[0].outputs[0].text)
 
 Evaluate accuracy with `lm_eval` (for example on 250 samples of `gsm8k`):
 
-:::{note}
-Quantized models can be sensitive to the presence of the `bos` token. `lm_eval` does not add a `bos` token by default, so make sure to include the `add_bos_token=True` argument when running your evaluations.
-:::
+!!! note
+    Quantized models can be sensitive to the presence of the `bos` token. `lm_eval` does not add a `bos` token by default, so make sure to include the `add_bos_token=True` argument when running your evaluations.
 
 ```console
 $ MODEL=$PWD/Meta-Llama-3-8B-Instruct-FP8-Dynamic
@@ -133,6 +132,5 @@ result = model.generate("Hello, my name is")
 print(result[0].outputs[0].text)
 ```
 
-:::{warning}
-Currently, we load the model at original precision before quantizing down to 8-bits, so you need enough memory to load the whole model.
-:::
+!!! warning
+    Currently, we load the model at original precision before quantizing down to 8-bits, so you need enough memory to load the whole model.
diff --git a/docs/source/features/quantization/gguf.md b/docs/features/quantization/gguf.md
similarity index 76%
rename from docs/source/features/quantization/gguf.md
rename to docs/features/quantization/gguf.md
index e93e4dcd3b578..04ab5945e8f6f 100644
--- a/docs/source/features/quantization/gguf.md
+++ b/docs/features/quantization/gguf.md
@@ -1,14 +1,13 @@
-(gguf)=
+---
+title: GGUF
+---
+[](){ #gguf }
 
-# GGUF
+!!! warning
+    Please note that GGUF support in vLLM is highly experimental and under-optimized at the moment, it might be incompatible with other features. Currently, you can use GGUF as a way to reduce memory footprint. If you encounter any issues, please report them to the vLLM team.
 
-:::{warning}
-Please note that GGUF support in vLLM is highly experimental and under-optimized at the moment, it might be incompatible with other features. Currently, you can use GGUF as a way to reduce memory footprint. If you encounter any issues, please report them to the vLLM team.
-:::
-
-:::{warning}
-Currently, vllm only supports loading single-file GGUF models. If you have a multi-files GGUF model, you can use [gguf-split](https://github.com/ggerganov/llama.cpp/pull/6135) tool to merge them to a single-file model.
-:::
+!!! warning
+    Currently, vllm only supports loading single-file GGUF models. If you have a multi-files GGUF model, you can use [gguf-split](https://github.com/ggerganov/llama.cpp/pull/6135) tool to merge them to a single-file model.
 
 To run a GGUF model with vLLM, you can download and use the local GGUF model from [TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF](https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF) with the following command:
 
@@ -25,9 +24,8 @@ You can also add `--tensor-parallel-size 2` to enable tensor parallelism inferen
 vllm serve ./tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf --tokenizer TinyLlama/TinyLlama-1.1B-Chat-v1.0 --tensor-parallel-size 2
 ```
 
-:::{warning}
-We recommend using the tokenizer from base model instead of GGUF model. Because the tokenizer conversion from GGUF is time-consuming and unstable, especially for some models with large vocab size.
-:::
+!!! warning
+    We recommend using the tokenizer from base model instead of GGUF model. Because the tokenizer conversion from GGUF is time-consuming and unstable, especially for some models with large vocab size.
 
 GGUF assumes that huggingface can convert the metadata to a config file. In case huggingface doesn't support your model you can manually create a config and pass it as hf-config-path
 
diff --git a/docs/source/features/quantization/gptqmodel.md b/docs/features/quantization/gptqmodel.md
similarity index 98%
rename from docs/source/features/quantization/gptqmodel.md
rename to docs/features/quantization/gptqmodel.md
index 9771d5a4fe9ee..10660a408fd2d 100644
--- a/docs/source/features/quantization/gptqmodel.md
+++ b/docs/features/quantization/gptqmodel.md
@@ -1,6 +1,7 @@
-(gptqmodel)=
-
-# GPTQModel
+---
+title: GPTQModel
+---
+[](){ #gptqmodel }
 
 To create a new 4-bit or 8-bit GPTQ quantized model, you can leverage [GPTQModel](https://github.com/ModelCloud/GPTQModel) from ModelCloud.AI.
 
diff --git a/docs/source/features/quantization/int4.md b/docs/features/quantization/int4.md
similarity index 94%
rename from docs/source/features/quantization/int4.md
rename to docs/features/quantization/int4.md
index 7a0ab4ad229e6..b7d09206365ff 100644
--- a/docs/source/features/quantization/int4.md
+++ b/docs/features/quantization/int4.md
@@ -1,14 +1,14 @@
-(int4)=
-
-# INT4 W4A16
+---
+title: INT4 W4A16
+---
+[](){ #int4 }
 
 vLLM supports quantizing weights to INT4 for memory savings and inference acceleration. This quantization method is particularly useful for reducing model size and maintaining low latency in workloads with low queries per second (QPS).
 
 Please visit the HF collection of [quantized INT4 checkpoints of popular LLMs ready to use with vLLM](https://huggingface.co/collections/neuralmagic/int4-llms-for-vllm-668ec34bf3c9fa45f857df2c).
 
-:::{note}
-INT4 computation is supported on NVIDIA GPUs with compute capability > 8.0 (Ampere, Ada Lovelace, Hopper, Blackwell).
-:::
+!!! note
+    INT4 computation is supported on NVIDIA GPUs with compute capability > 8.0 (Ampere, Ada Lovelace, Hopper, Blackwell).
 
 ## Prerequisites
 
@@ -121,9 +121,8 @@ $ lm_eval --model vllm \
   --batch_size 'auto'
 ```
 
-:::{note}
-Quantized models can be sensitive to the presence of the `bos` token. Make sure to include the `add_bos_token=True` argument when running evaluations.
-:::
+!!! note
+    Quantized models can be sensitive to the presence of the `bos` token. Make sure to include the `add_bos_token=True` argument when running evaluations.
 
 ## Best Practices
 
diff --git a/docs/source/features/quantization/int8.md b/docs/features/quantization/int8.md
similarity index 92%
rename from docs/source/features/quantization/int8.md
rename to docs/features/quantization/int8.md
index 1e4b01d35575c..1d9fba9dc87f1 100644
--- a/docs/source/features/quantization/int8.md
+++ b/docs/features/quantization/int8.md
@@ -1,15 +1,15 @@
-(int8)=
-
-# INT8 W8A8
+---
+title: INT8 W8A8
+---
+[](){ #int8 }
 
 vLLM supports quantizing weights and activations to INT8 for memory savings and inference acceleration.
 This quantization method is particularly useful for reducing model size while maintaining good performance.
 
 Please visit the HF collection of [quantized INT8 checkpoints of popular LLMs ready to use with vLLM](https://huggingface.co/collections/neuralmagic/int8-llms-for-vllm-668ec32c049dca0369816415).
 
-:::{note}
-INT8 computation is supported on NVIDIA GPUs with compute capability > 7.5 (Turing, Ampere, Ada Lovelace, Hopper, Blackwell).
-:::
+!!! note
+    INT8 computation is supported on NVIDIA GPUs with compute capability > 7.5 (Turing, Ampere, Ada Lovelace, Hopper, Blackwell).
 
 ## Prerequisites
 
@@ -125,9 +125,8 @@ $ lm_eval --model vllm \
   --batch_size 'auto'
 ```
 
-:::{note}
-Quantized models can be sensitive to the presence of the `bos` token. Make sure to include the `add_bos_token=True` argument when running evaluations.
-:::
+!!! note
+    Quantized models can be sensitive to the presence of the `bos` token. Make sure to include the `add_bos_token=True` argument when running evaluations.
 
 ## Best Practices
 
diff --git a/docs/source/features/quantization/modelopt.md b/docs/features/quantization/modelopt.md
similarity index 100%
rename from docs/source/features/quantization/modelopt.md
rename to docs/features/quantization/modelopt.md
diff --git a/docs/source/features/quantization/quantized_kvcache.md b/docs/features/quantization/quantized_kvcache.md
similarity index 98%
rename from docs/source/features/quantization/quantized_kvcache.md
rename to docs/features/quantization/quantized_kvcache.md
index 86e6354ec82e0..e3ebd024bab3c 100644
--- a/docs/source/features/quantization/quantized_kvcache.md
+++ b/docs/features/quantization/quantized_kvcache.md
@@ -1,6 +1,7 @@
-(quantized-kvcache)=
-
-# Quantized KV Cache
+---
+title: Quantized KV Cache
+---
+[](){ #quantized-kvcache }
 
 ## FP8 KV Cache
 
diff --git a/docs/source/features/quantization/quark.md b/docs/features/quantization/quark.md
similarity index 94%
rename from docs/source/features/quantization/quark.md
rename to docs/features/quantization/quark.md
index 955890dbc75ba..51da98cc09d3f 100644
--- a/docs/source/features/quantization/quark.md
+++ b/docs/features/quantization/quark.md
@@ -1,6 +1,7 @@
-(quark)=
-
-# AMD QUARK
+---
+title: AMD QUARK
+---
+[](){ #quark }
 
 Quantization can effectively reduce memory and bandwidth usage, accelerate computation and improve
 throughput while with minimal accuracy loss. vLLM can leverage [Quark](https://quark.docs.amd.com/latest/),
@@ -86,13 +87,12 @@ We need to set the quantization configuration, you can check
 for further details. Here we use FP8 per-tensor quantization on weight, activation,
 kv-cache and the quantization algorithm is AutoSmoothQuant.
 
-:::{note}
-Note the quantization algorithm needs a JSON config file and the config file is located in
-[Quark Pytorch examples](https://quark.docs.amd.com/latest/pytorch/pytorch_examples.html),
-under the directory `examples/torch/language_modeling/llm_ptq/models`. For example,
-AutoSmoothQuant config file for Llama is
-`examples/torch/language_modeling/llm_ptq/models/llama/autosmoothquant_config.json`.
-:::
+!!! note
+    Note the quantization algorithm needs a JSON config file and the config file is located in
+    [Quark Pytorch examples](https://quark.docs.amd.com/latest/pytorch/pytorch_examples.html),
+    under the directory `examples/torch/language_modeling/llm_ptq/models`. For example,
+    AutoSmoothQuant config file for Llama is
+    `examples/torch/language_modeling/llm_ptq/models/llama/autosmoothquant_config.json`.
 
 ```python
 from quark.torch.quantization import (Config, QuantizationConfig,
diff --git a/docs/features/quantization/supported_hardware.md b/docs/features/quantization/supported_hardware.md
new file mode 100644
index 0000000000000..2967bf9c7504a
--- /dev/null
+++ b/docs/features/quantization/supported_hardware.md
@@ -0,0 +1,28 @@
+---
+title: Supported Hardware
+---
+[](){ #quantization-supported-hardware }
+
+The table below shows the compatibility of various quantization implementations with different hardware platforms in vLLM:
+
+| Implementation        | Volta   | Turing   | Ampere   | Ada   | Hopper   | AMD GPU   | Intel GPU   | x86 CPU   | AWS Inferentia   | Google TPU   |
+|-----------------------|---------|----------|----------|-------|----------|-----------|-------------|-----------|------------------|--------------|
+| AWQ                   | ❌       | ✅︎       | ✅︎       | ✅︎    | ✅︎       | ❌         | ✅︎          | ✅︎        | ❌                | ❌            |
+| GPTQ                  | ✅︎      | ✅︎       | ✅︎       | ✅︎    | ✅︎       | ❌         | ✅︎          | ✅︎        | ❌                | ❌            |
+| Marlin (GPTQ/AWQ/FP8) | ❌       | ❌        | ✅︎       | ✅︎    | ✅︎       | ❌         | ❌           | ❌         | ❌                | ❌            |
+| INT8 (W8A8)           | ❌       | ✅︎       | ✅︎       | ✅︎    | ✅︎       | ❌         | ❌           | ✅︎        | ❌                | ✅︎           |
+| FP8 (W8A8)            | ❌       | ❌        | ❌        | ✅︎    | ✅︎       | ✅︎        | ❌           | ❌         | ❌                | ❌            |
+| BitBLAS (GPTQ)        | ✅︎      | ✅︎       | ✅︎       | ✅︎    | ✅︎       | ❌         | ❌           | ❌         | ❌                | ❌            |
+| AQLM                  | ✅︎      | ✅︎       | ✅︎       | ✅︎    | ✅︎       | ❌         | ❌           | ❌         | ❌                | ❌            |
+| bitsandbytes          | ✅︎      | ✅︎       | ✅︎       | ✅︎    | ✅︎       | ❌         | ❌           | ❌         | ❌                | ❌            |
+| DeepSpeedFP           | ✅︎      | ✅︎       | ✅︎       | ✅︎    | ✅︎       | ❌         | ❌           | ❌         | ❌                | ❌            |
+| GGUF                  | ✅︎      | ✅︎       | ✅︎       | ✅︎    | ✅︎       | ✅︎        | ❌           | ❌         | ❌                | ❌            |
+
+- Volta refers to SM 7.0, Turing to SM 7.5, Ampere to SM 8.0/8.6, Ada to SM 8.9, and Hopper to SM 9.0.
+- ✅︎ indicates that the quantization method is supported on the specified hardware.
+- ❌ indicates that the quantization method is not supported on the specified hardware.
+
+!!! note
+    This compatibility chart is subject to change as vLLM continues to evolve and expand its support for different hardware platforms and quantization methods.
+
+    For the most up-to-date information on hardware support and quantization methods, please refer to <gh-dir:vllm/model_executor/layers/quantization> or consult with the vLLM development team.
diff --git a/docs/source/features/quantization/torchao.md b/docs/features/quantization/torchao.md
similarity index 100%
rename from docs/source/features/quantization/torchao.md
rename to docs/features/quantization/torchao.md
diff --git a/docs/source/features/reasoning_outputs.md b/docs/features/reasoning_outputs.md
similarity index 97%
rename from docs/source/features/reasoning_outputs.md
rename to docs/features/reasoning_outputs.md
index bf4f8901a11a8..85464269efacd 100644
--- a/docs/source/features/reasoning_outputs.md
+++ b/docs/features/reasoning_outputs.md
@@ -1,6 +1,7 @@
-(reasoning-outputs)=
-
-# Reasoning Outputs
+---
+title: Reasoning Outputs
+---
+[](){ #reasoning-outputs }
 
 vLLM offers support for reasoning models like [DeepSeek R1](https://huggingface.co/deepseek-ai/DeepSeek-R1), which are designed to generate outputs containing both reasoning steps and final conclusions.
 
@@ -17,10 +18,9 @@ vLLM currently supports the following reasoning models:
 | [IBM Granite 3.2 language models](https://huggingface.co/collections/ibm-granite/granite-32-language-models-67b3bc8c13508f6d064cff9a) | `granite` | ❌ | ❌ |
 | [Qwen3 series](https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f) | `qwen3` | `guided_json`, `guided_regex` | ✅ |
 
-:::{note}
-IBM Granite 3.2 reasoning is disabled by default; to enable it, you must also pass `thinking=True` in your `chat_template_kwargs`.
-The reasoning feature for the Qwen3 series is enabled by default. To disable it, you must pass `enable_thinking=False` in your `chat_template_kwargs`.
-:::
+!!! note
+    IBM Granite 3.2 reasoning is disabled by default; to enable it, you must also pass `thinking=True` in your `chat_template_kwargs`.
+    The reasoning feature for the Qwen3 series is enabled by default. To disable it, you must pass `enable_thinking=False` in your `chat_template_kwargs`.
 
 ## Quickstart
 
@@ -167,12 +167,10 @@ client = OpenAI(
 models = client.models.list()
 model = models.data[0].id
 
-
 class People(BaseModel):
     name: str
     age: int
 
-
 json_schema = People.model_json_schema()
 
 prompt = ("Generate a JSON with the name and age of one random person.")
diff --git a/docs/source/features/spec_decode.md b/docs/features/spec_decode.md
similarity index 93%
rename from docs/source/features/spec_decode.md
rename to docs/features/spec_decode.md
index f16e0d96522da..dce87c27896c2 100644
--- a/docs/source/features/spec_decode.md
+++ b/docs/features/spec_decode.md
@@ -1,16 +1,15 @@
-(spec-decode)=
+---
+title: Speculative Decoding
+---
+[](){ #spec-decode }
 
-# Speculative Decoding
+!!! warning
+    Please note that speculative decoding in vLLM is not yet optimized and does
+    not usually yield inter-token latency reductions for all prompt datasets or sampling parameters.
+    The work to optimize it is ongoing and can be followed here: <gh-issue:4630>
 
-:::{warning}
-Please note that speculative decoding in vLLM is not yet optimized and does
-not usually yield inter-token latency reductions for all prompt datasets or sampling parameters.
-The work to optimize it is ongoing and can be followed here: <gh-issue:4630>
-:::
-
-:::{warning}
-Currently, speculative decoding in vLLM is not compatible with pipeline parallelism.
-:::
+!!! warning
+    Currently, speculative decoding in vLLM is not compatible with pipeline parallelism.
 
 This document shows how to use [Speculative Decoding](https://x.com/karpathy/status/1697318534555336961) with vLLM.
 Speculative decoding is a technique which improves inter-token latency in memory-bound LLM inference.
@@ -51,9 +50,8 @@ python -m vllm.entrypoints.openai.api_server --host 0.0.0.0 --port 8000 --model
     --speculative_config '{"model": "facebook/opt-125m", "num_speculative_tokens": 5}'
 ```
 
-:::{warning}
-Note: Please use `--speculative_config` to set all configurations related to speculative decoding. The previous method of specifying the model through `--speculative_model` and adding related parameters (e.g., `--num_speculative_tokens`) separately has been deprecated now.
-:::
+!!! warning
+    Note: Please use `--speculative_config` to set all configurations related to speculative decoding. The previous method of specifying the model through `--speculative_model` and adding related parameters (e.g., `--num_speculative_tokens`) separately has been deprecated now.
 
 Then use a client:
 
@@ -255,7 +253,7 @@ speculative decoding, breaking down the guarantees into three key areas:
 3. **vLLM Logprob Stability**
    \- vLLM does not currently guarantee stable token log probabilities (logprobs). This can result in different outputs for the
    same request across runs. For more details, see the FAQ section
-   titled *Can the output of a prompt vary across runs in vLLM?* in the [FAQs](#faq).
+   titled *Can the output of a prompt vary across runs in vLLM?* in the [FAQs][faq].
 
 While vLLM strives to ensure losslessness in speculative decoding, variations in generated outputs with and without speculative decoding
 can occur due to following factors:
@@ -264,7 +262,7 @@ can occur due to following factors:
 - **Batch Size and Numerical Stability**: Changes in batch size may cause variations in logprobs and output probabilities, potentially
   due to non-deterministic behavior in batched operations or numerical instability.
 
-For mitigation strategies, please refer to the FAQ entry *Can the output of a prompt vary across runs in vLLM?* in the [FAQs](#faq).
+For mitigation strategies, please refer to the FAQ entry *Can the output of a prompt vary across runs in vLLM?* in the [FAQs][faq].
 
 ## Resources for vLLM contributors
 
diff --git a/docs/source/features/structured_outputs.md b/docs/features/structured_outputs.md
similarity index 96%
rename from docs/source/features/structured_outputs.md
rename to docs/features/structured_outputs.md
index 03119ec7441c9..f96b598cff98d 100644
--- a/docs/source/features/structured_outputs.md
+++ b/docs/features/structured_outputs.md
@@ -1,6 +1,7 @@
-(structured-outputs)=
-
-# Structured Outputs
+---
+title: Structured Outputs
+---
+[](){ #structured-outputs }
 
 vLLM supports the generation of structured outputs using
 [xgrammar](https://github.com/mlc-ai/xgrammar) or
@@ -20,7 +21,7 @@ The following parameters are supported, which must be added as extra parameters:
 - `guided_grammar`: the output will follow the context free grammar.
 - `structural_tag`: Follow a JSON schema within a set of specified tags within the generated text.
 
-You can see the complete list of supported parameters on the [OpenAI-Compatible Server](#openai-compatible-server) page.
+You can see the complete list of supported parameters on the [OpenAI-Compatible Server][openai-compatible-server] page.
 
 Structured outputs are supported by default in the OpenAI-Compatible Server. You
 may choose to specify the backend to use by setting the
@@ -83,13 +84,11 @@ class CarType(str, Enum):
     truck = "Truck"
     coupe = "Coupe"
 
-
 class CarDescription(BaseModel):
     brand: str
     model: str
     car_type: CarType
 
-
 json_schema = CarDescription.model_json_schema()
 
 completion = client.chat.completions.create(
@@ -105,11 +104,10 @@ completion = client.chat.completions.create(
 print(completion.choices[0].message.content)
 ```
 
-:::{tip}
-While not strictly necessary, normally it´s better to indicate in the prompt the
-JSON schema and how the fields should be populated.  This can improve the
-results notably in most cases.
-:::
+!!! tip
+    While not strictly necessary, normally it´s better to indicate in the prompt the
+    JSON schema and how the fields should be populated.  This can improve the
+    results notably in most cases.
 
 Finally we have the `guided_grammar` option, which is probably the most
 difficult to use, but it´s really powerful. It allows us to define complete
@@ -160,12 +158,10 @@ Here is a simple example demonstrating how to get structured output using Pydant
 from pydantic import BaseModel
 from openai import OpenAI
 
-
 class Info(BaseModel):
     name: str
     age: int
 
-
 client = OpenAI(base_url="http://0.0.0.0:8000/v1", api_key="dummy")
 completion = client.beta.chat.completions.parse(
     model="meta-llama/Llama-3.1-8B-Instruct",
@@ -199,17 +195,14 @@ from typing import List
 from pydantic import BaseModel
 from openai import OpenAI
 
-
 class Step(BaseModel):
     explanation: str
     output: str
 
-
 class MathResponse(BaseModel):
     steps: list[Step]
     final_answer: str
 
-
 client = OpenAI(base_url="http://0.0.0.0:8000/v1", api_key="dummy")
 completion = client.beta.chat.completions.parse(
     model="meta-llama/Llama-3.1-8B-Instruct",
diff --git a/docs/source/features/tool_calling.md b/docs/features/tool_calling.md
similarity index 99%
rename from docs/source/features/tool_calling.md
rename to docs/features/tool_calling.md
index f76128406bfd1..75cd00e24d7b0 100644
--- a/docs/source/features/tool_calling.md
+++ b/docs/features/tool_calling.md
@@ -322,7 +322,6 @@ class ExampleToolParser(ToolParser):
                                             tool_calls=[],
                                             content=text)
 
-
 ```
 
 Then you can use this plugin in the command line like this.
diff --git a/docs/source/getting_started/faq.md b/docs/getting_started/faq.md
similarity index 91%
rename from docs/source/getting_started/faq.md
rename to docs/getting_started/faq.md
index c1bb28937c144..51977d4434f5a 100644
--- a/docs/source/getting_started/faq.md
+++ b/docs/getting_started/faq.md
@@ -1,23 +1,24 @@
-(faq)=
-
-# Frequently Asked Questions
+---
+title: Frequently Asked Questions
+---
+[](){ #faq }
 
 > Q: How can I serve multiple models on a single port using the OpenAI API?
 
 A: Assuming that you're referring to using OpenAI compatible server to serve multiple models at once, that is not currently supported, you can run multiple instances of the server (each serving a different model) at the same time, and have another layer to route the incoming request to the correct server accordingly.
 
-______________________________________________________________________
+---
 
 > Q: Which model to use for offline inference embedding?
 
 A: You can try [e5-mistral-7b-instruct](https://huggingface.co/intfloat/e5-mistral-7b-instruct) and [BAAI/bge-base-en-v1.5](https://huggingface.co/BAAI/bge-base-en-v1.5);
-more are listed [here](#supported-models).
+more are listed [here][supported-models].
 
 By extracting hidden states, vLLM can automatically convert text generation models like [Llama-3-8B](https://huggingface.co/meta-llama/Meta-Llama-3-8B),
 [Mistral-7B-Instruct-v0.3](https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3) into embedding models,
 but they are expected to be inferior to models that are specifically trained on embedding tasks.
 
-______________________________________________________________________
+---
 
 > Q: Can the output of a prompt vary across runs in vLLM?
 
diff --git a/docs/getting_started/installation/.nav.yml b/docs/getting_started/installation/.nav.yml
new file mode 100644
index 0000000000000..7acfc015ff508
--- /dev/null
+++ b/docs/getting_started/installation/.nav.yml
@@ -0,0 +1,5 @@
+nav:
+  - README.md
+  - gpu.md
+  - cpu.md
+  - ai_accelerator.md
\ No newline at end of file
diff --git a/docs/getting_started/installation/README.md b/docs/getting_started/installation/README.md
new file mode 100644
index 0000000000000..36bb16cc02249
--- /dev/null
+++ b/docs/getting_started/installation/README.md
@@ -0,0 +1,20 @@
+---
+title: Installation
+---
+[](){ #installation-index }
+
+vLLM supports the following hardware platforms:
+
+- [GPU](gpu.md)
+    - [NVIDIA CUDA](gpu.md#nvidia-cuda)
+    - [AMD ROCm](gpu.md#amd-rocm)
+    - [Intel XPU](gpu.md#intel-xpu)
+- [CPU](cpu.md)
+    - [Intel/AMD x86](cpu.md#intelamd-x86)
+    - [ARM AArch64](cpu.md#arm-aarch64)
+    - [Apple silicon](cpu.md#apple-silicon)
+    - [IBM Z (S390X)](cpu.md#ibm-z-s390x)
+- [Other AI accelerators](ai_accelerator.md)
+    - [Google TPU](ai_accelerator.md#google-tpu)
+    - [Intel Gaudi](ai_accelerator.md#intel-gaudi)
+    - [AWS Neuron](ai_accelerator.md#aws-neuron)
diff --git a/docs/getting_started/installation/ai_accelerator.md b/docs/getting_started/installation/ai_accelerator.md
new file mode 100644
index 0000000000000..a4f136a172fed
--- /dev/null
+++ b/docs/getting_started/installation/ai_accelerator.md
@@ -0,0 +1,117 @@
+# Other AI accelerators
+
+vLLM is a Python library that supports the following AI accelerators. Select your AI accelerator type to see vendor specific instructions:
+
+=== "Google TPU"
+
+    --8<-- "docs/getting_started/installation/ai_accelerator/tpu.inc.md:installation"
+
+=== "Intel Gaudi"
+
+    --8<-- "docs/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md:installation"
+
+=== "AWS Neuron"
+
+    --8<-- "docs/getting_started/installation/ai_accelerator/neuron.inc.md:installation"
+
+## Requirements
+
+=== "Google TPU"
+
+    --8<-- "docs/getting_started/installation/ai_accelerator/tpu.inc.md:requirements"
+
+=== "Intel Gaudi"
+
+    --8<-- "docs/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md:requirements"
+
+=== "AWS Neuron"
+
+    --8<-- "docs/getting_started/installation/ai_accelerator/neuron.inc.md:requirements"
+
+## Configure a new environment
+
+=== "Google TPU"
+
+    --8<-- "docs/getting_started/installation/ai_accelerator/tpu.inc.md:configure-a-new-environment"
+
+=== "Intel Gaudi"
+
+    --8<-- "docs/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md:configure-a-new-environment"
+
+=== "AWS Neuron"
+
+    --8<-- "docs/getting_started/installation/ai_accelerator/neuron.inc.md:configure-a-new-environment"
+
+## Set up using Python
+
+### Pre-built wheels
+
+=== "Google TPU"
+
+    --8<-- "docs/getting_started/installation/ai_accelerator/tpu.inc.md:pre-built-wheels"
+
+=== "Intel Gaudi"
+
+    --8<-- "docs/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md:pre-built-wheels"
+
+=== "AWS Neuron"
+
+    --8<-- "docs/getting_started/installation/ai_accelerator/neuron.inc.md:pre-built-wheels"
+
+### Build wheel from source
+
+=== "Google TPU"
+
+    --8<-- "docs/getting_started/installation/ai_accelerator/tpu.inc.md:build-wheel-from-source"
+
+=== "Intel Gaudi"
+
+    --8<-- "docs/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md:build-wheel-from-source"
+
+=== "AWS Neuron"
+
+    --8<-- "docs/getting_started/installation/ai_accelerator/neuron.inc.md:build-wheel-from-source"
+
+## Set up using Docker
+
+### Pre-built images
+
+=== "Google TPU"
+
+    --8<-- "docs/getting_started/installation/ai_accelerator/tpu.inc.md:pre-built-images"
+
+=== "Intel Gaudi"
+
+    --8<-- "docs/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md:pre-built-images"
+
+=== "AWS Neuron"
+
+    --8<-- "docs/getting_started/installation/ai_accelerator/neuron.inc.md:pre-built-images"
+
+### Build image from source
+
+=== "Google TPU"
+
+    --8<-- "docs/getting_started/installation/ai_accelerator/tpu.inc.md:build-image-from-source"
+
+=== "Intel Gaudi"
+
+    --8<-- "docs/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md:build-image-from-source"
+
+=== "AWS Neuron"
+
+    --8<-- "docs/getting_started/installation/ai_accelerator/neuron.inc.md:build-image-from-source"
+
+## Extra information
+
+=== "Google TPU"
+
+    --8<-- "docs/getting_started/installation/ai_accelerator/tpu.inc.md:extra-information"
+
+=== "Intel Gaudi"
+
+    --8<-- "docs/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md:extra-information"
+
+=== "AWS Neuron"
+
+    --8<-- "docs/getting_started/installation/ai_accelerator/neuron.inc.md:extra-information"
diff --git a/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md b/docs/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md
similarity index 84%
rename from docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md
rename to docs/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md
index 78938de317c48..1ca8a9216a4ee 100644
--- a/docs/source/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md
+++ b/docs/getting_started/installation/ai_accelerator/hpu-gaudi.inc.md
@@ -1,12 +1,12 @@
-# Installation
+# --8<-- [start:installation]
 
 This tab provides instructions on running vLLM with Intel Gaudi devices.
 
-:::{attention}
-There are no pre-built wheels or images for this device, so you must build vLLM from source.
-:::
+!!! warning
+    There are no pre-built wheels or images for this device, so you must build vLLM from source.
 
-## Requirements
+# --8<-- [end:installation]
+# --8<-- [start:requirements]
 
 - OS: Ubuntu 22.04 LTS
 - Python: 3.10
@@ -48,13 +48,16 @@ docker pull vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-i
 docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest
 ```
 
-## Set up using Python
+# --8<-- [end:requirements]
+# --8<-- [start:set-up-using-python]
 
-### Pre-built wheels
+# --8<-- [end:set-up-using-python]
+# --8<-- [start:pre-built-wheels]
 
 Currently, there are no pre-built Intel Gaudi wheels.
 
-### Build wheel from source
+# --8<-- [end:pre-built-wheels]
+# --8<-- [start:build-wheel-from-source]
 
 To build and install vLLM from source, run:
 
@@ -75,29 +78,32 @@ pip install -r requirements/hpu.txt
 python setup.py develop
 ```
 
-## Set up using Docker
+# --8<-- [end:build-wheel-from-source]
+# --8<-- [start:set-up-using-docker]
 
-### Pre-built images
+# --8<-- [end:set-up-using-docker]
+# --8<-- [start:pre-built-images]
 
 Currently, there are no pre-built Intel Gaudi images.
 
-### Build image from source
+# --8<-- [end:pre-built-images]
+# --8<-- [start:build-image-from-source]
 
 ```console
 docker build -f docker/Dockerfile.hpu -t vllm-hpu-env  .
 docker run -it --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --rm vllm-hpu-env
 ```
 
-:::{tip}
-If you're observing the following error: `docker: Error response from daemon: Unknown runtime specified habana.`, please refer to "Install Using Containers" section of [Intel Gaudi Software Stack and Driver Installation](https://docs.habana.ai/en/v1.18.0/Installation_Guide/Bare_Metal_Fresh_OS.html). Make sure you have `habana-container-runtime` package installed and that `habana` container runtime is registered.
-:::
+!!! tip
+    If you're observing the following error: `docker: Error response from daemon: Unknown runtime specified habana.`, please refer to "Install Using Containers" section of [Intel Gaudi Software Stack and Driver Installation](https://docs.habana.ai/en/v1.18.0/Installation_Guide/Bare_Metal_Fresh_OS.html). Make sure you have `habana-container-runtime` package installed and that `habana` container runtime is registered.
 
-## Extra information
+# --8<-- [end:build-image-from-source]
+# --8<-- [start:extra-information]
 
 ## Supported features
 
-- [Offline inference](#offline-inference)
-- Online serving via [OpenAI-Compatible Server](#openai-compatible-server)
+- [Offline inference][offline-inference]
+- Online serving via [OpenAI-Compatible Server][openai-compatible-server]
 - HPU autodetection - no need to manually select device within vLLM
 - Paged KV cache with algorithms enabled for Intel Gaudi accelerators
 - Custom Intel Gaudi implementations of Paged Attention, KV cache ops,
@@ -157,41 +163,25 @@ Gaudi2 devices. Configurations that are not listed may or may not work.
 
 Currently in vLLM for HPU we support four execution modes, depending on selected HPU PyTorch Bridge backend (via `PT_HPU_LAZY_MODE` environment variable), and `--enforce-eager` flag.
 
-:::{list-table} vLLM execution modes
-:widths: 25 25 50
-:header-rows: 1
+|   `PT_HPU_LAZY_MODE` |   `enforce_eager` | execution mode     |
+|----------------------|-------------------|--------------------|
+|                    0 |                 0 | torch.compile      |
+|                    0 |                 1 | PyTorch eager mode |
+|                    1 |                 0 | HPU Graphs         |
+  <figcaption>vLLM execution modes</figcaption>
 
-- * `PT_HPU_LAZY_MODE`
-  * `enforce_eager`
-  * execution mode
-- * 0
-  * 0
-  * torch.compile
-- * 0
-  * 1
-  * PyTorch eager mode
-- * 1
-  * 0
-  * HPU Graphs
-- * 1
-  * 1
-  * PyTorch lazy mode
-:::
+!!! warning
+    In 1.18.0, all modes utilizing `PT_HPU_LAZY_MODE=0` are highly experimental and should be only used for validating functional correctness. Their performance will be improved in the next releases. For obtaining the best performance in 1.18.0, please use HPU Graphs, or PyTorch lazy mode.
 
-:::{warning}
-In 1.18.0, all modes utilizing `PT_HPU_LAZY_MODE=0` are highly experimental and should be only used for validating functional correctness. Their performance will be improved in the next releases. For obtaining the best performance in 1.18.0, please use HPU Graphs, or PyTorch lazy mode.
-:::
-
-(gaudi-bucketing-mechanism)=
+[](){ #gaudi-bucketing-mechanism }
 
 ### Bucketing mechanism
 
 Intel Gaudi accelerators work best when operating on models with fixed tensor shapes. [Intel Gaudi Graph Compiler](https://docs.habana.ai/en/latest/Gaudi_Overview/Intel_Gaudi_Software_Suite.html#graph-compiler-and-runtime) is responsible for generating optimized binary code that implements the given model topology on Gaudi. In its default configuration, the produced binary code may be heavily dependent on input and output tensor shapes, and can require graph recompilation when encountering differently shaped tensors within the same topology. While the resulting binaries utilize Gaudi efficiently, the compilation itself may introduce a noticeable overhead in end-to-end execution.
 In a dynamic inference serving scenario, there is a need to minimize the number of graph compilations and reduce the risk of graph compilation occurring during server runtime. Currently it is achieved by "bucketing" model's forward pass across two dimensions - `batch_size` and `sequence_length`.
 
-:::{note}
-Bucketing allows us to reduce the number of required graphs significantly, but it does not handle any graph compilation and device code generation - this is done in warmup and HPUGraph capture phase.
-:::
+!!! note
+    Bucketing allows us to reduce the number of required graphs significantly, but it does not handle any graph compilation and device code generation - this is done in warmup and HPUGraph capture phase.
 
 Bucketing ranges are determined with 3 parameters - `min`, `step` and `max`. They can be set separately for prompt and decode phase, and for batch size and sequence length dimension. These parameters can be observed in logs during vLLM startup:
 
@@ -224,15 +214,13 @@ min = 128, step = 128, max = 512
 
 In the logged scenario, 24 buckets were generated for prompt (prefill) runs, and 48 buckets for decode runs. Each bucket corresponds to a separate optimized device binary for a given model with specified tensor shapes. Whenever a batch of requests is processed, it is padded across batch and sequence length dimension to the smallest possible bucket.
 
-:::{warning}
-If a request exceeds maximum bucket size in any dimension, it will be processed without padding, and its processing may require a graph compilation, potentially significantly increasing end-to-end latency. The boundaries of the buckets are user-configurable via environment variables, and upper bucket boundaries can be increased to avoid such scenario.
-:::
+!!! warning
+    If a request exceeds maximum bucket size in any dimension, it will be processed without padding, and its processing may require a graph compilation, potentially significantly increasing end-to-end latency. The boundaries of the buckets are user-configurable via environment variables, and upper bucket boundaries can be increased to avoid such scenario.
 
 As an example, if a request of 3 sequences, with max sequence length of 412 comes in to an idle vLLM server, it will be padded executed as `(4, 512)` prefill bucket, as `batch_size` (number of sequences) will be padded to 4 (closest batch_size dimension higher than 3), and max sequence length will be padded to 512 (closest sequence length dimension higher than 412). After prefill stage, it will be executed as `(4, 512)` decode bucket and will continue as that bucket until either batch dimension changes (due to request being finished) - in which case it will become a `(2, 512)` bucket, or context length increases above 512 tokens, in which case it will become `(4, 640)` bucket.
 
-:::{note}
-Bucketing is transparent to a client -- padding in sequence length dimension is never returned to the client, and padding in batch dimension does not create new requests.
-:::
+!!! note
+    Bucketing is transparent to a client -- padding in sequence length dimension is never returned to the client, and padding in batch dimension does not create new requests.
 
 ### Warmup
 
@@ -252,11 +240,10 @@ INFO 08-01 22:27:16 hpu_model_runner.py:1066] [Warmup][Decode][47/48] batch_size
 INFO 08-01 22:27:16 hpu_model_runner.py:1066] [Warmup][Decode][48/48] batch_size:1 seq_len:128 free_mem:55.43 GiB
 ```
 
-This example uses the same buckets as in the [Bucketing Mechanism](#gaudi-bucketing-mechanism) section. Each output line corresponds to execution of a single bucket. When bucket is executed for the first time, its graph is compiled and can be reused later on, skipping further graph compilations.
+This example uses the same buckets as in the [Bucketing Mechanism][gaudi-bucketing-mechanism] section. Each output line corresponds to execution of a single bucket. When bucket is executed for the first time, its graph is compiled and can be reused later on, skipping further graph compilations.
 
-:::{tip}
-Compiling all the buckets might take some time and can be turned off with `VLLM_SKIP_WARMUP=true` environment variable. Keep in mind that if you do that, you may face graph compilations once executing a given bucket for the first time. It is fine to disable warmup for development, but it's highly recommended to enable it in deployment.
-:::
+!!! tip
+    Compiling all the buckets might take some time and can be turned off with `VLLM_SKIP_WARMUP=true` environment variable. Keep in mind that if you do that, you may face graph compilations once executing a given bucket for the first time. It is fine to disable warmup for development, but it's highly recommended to enable it in deployment.
 
 ### HPU Graph capture
 
@@ -271,9 +258,8 @@ With its default value (`VLLM_GRAPH_RESERVED_MEM=0.1`), 10% of usable memory wil
 Environment variable `VLLM_GRAPH_PROMPT_RATIO` determines the ratio of usable graph memory reserved for prefill and decode graphs. By default (`VLLM_GRAPH_PROMPT_RATIO=0.3`), both stages have equal memory constraints.
 Lower value corresponds to less usable graph memory reserved for prefill stage, e.g. `VLLM_GRAPH_PROMPT_RATIO=0.2` will reserve 20% of usable graph memory for prefill graphs, and 80% of usable graph memory for decode graphs.
 
-:::{note}
-`gpu_memory_utilization` does not correspond to the absolute memory usage across HPU. It specifies the memory margin after loading the model and performing a profile run. If device has 100 GiB of total memory, and 50 GiB of free memory after loading model weights and executing profiling run, `gpu_memory_utilization` at its default value will mark 90% of 50 GiB as usable, leaving 5 GiB of margin, regardless of total device memory.
-:::
+!!! note
+    `gpu_memory_utilization` does not correspond to the absolute memory usage across HPU. It specifies the memory margin after loading the model and performing a profile run. If device has 100 GiB of total memory, and 50 GiB of free memory after loading model weights and executing profiling run, `gpu_memory_utilization` at its default value will mark 90% of 50 GiB as usable, leaving 5 GiB of margin, regardless of total device memory.
 
 User can also configure the strategy for capturing HPU Graphs for prompt and decode stages separately. Strategy affects the order of capturing graphs. There are two strategies implemented:
 
@@ -282,9 +268,8 @@ User can also configure the strategy for capturing HPU Graphs for prompt and dec
 
 When there's large amount of requests pending, vLLM scheduler will attempt to fill the maximum batch size for decode as soon as possible. When a request is finished, decode batch size decreases. When that happens, vLLM will attempt to schedule a prefill iteration for requests in the waiting queue, to fill the decode batch size to its previous state. This means that in a full load scenario, decode batch size is often at its maximum, which makes large batch size HPU Graphs crucial to capture, as reflected by `max_bs` strategy. On the other hand, prefills will be executed most frequently with very low batch sizes (1-4), which is reflected in `min_tokens` strategy.
 
-:::{note}
-`VLLM_GRAPH_PROMPT_RATIO` does not set a hard limit on memory taken by graphs for each stage (prefill and decode). vLLM will first attempt to use up entirety of usable prefill graph memory (usable graph memory * `VLLM_GRAPH_PROMPT_RATIO`) for capturing prefill HPU Graphs, next it will attempt do the same for decode graphs and usable decode graph memory pool. If one stage is fully captured, and there is unused memory left within usable graph memory pool, vLLM will attempt further graph capture for the other stage, until no more HPU Graphs can be captured without exceeding reserved memory pool. The behavior on that mechanism can be observed in the example below.
-:::
+!!! note
+    `VLLM_GRAPH_PROMPT_RATIO` does not set a hard limit on memory taken by graphs for each stage (prefill and decode). vLLM will first attempt to use up entirety of usable prefill graph memory (usable graph memory * `VLLM_GRAPH_PROMPT_RATIO`) for capturing prefill HPU Graphs, next it will attempt do the same for decode graphs and usable decode graph memory pool. If one stage is fully captured, and there is unused memory left within usable graph memory pool, vLLM will attempt further graph capture for the other stage, until no more HPU Graphs can be captured without exceeding reserved memory pool. The behavior on that mechanism can be observed in the example below.
 
 Each described step is logged by vLLM server, as follows (negative values correspond to memory being released):
 
@@ -401,3 +386,4 @@ the below:
   higher batches. You can do that by adding `--enforce-eager` flag to
   server (for online serving), or by passing `enforce_eager=True`
   argument to LLM constructor (for offline inference).
+# --8<-- [end:extra-information]
diff --git a/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md b/docs/getting_started/installation/ai_accelerator/neuron.inc.md
similarity index 79%
rename from docs/source/getting_started/installation/ai_accelerator/neuron.inc.md
rename to docs/getting_started/installation/ai_accelerator/neuron.inc.md
index b4bfb696faa28..671afa8d89008 100644
--- a/docs/source/getting_started/installation/ai_accelerator/neuron.inc.md
+++ b/docs/getting_started/installation/ai_accelerator/neuron.inc.md
@@ -1,14 +1,14 @@
-# Installation
+# --8<-- [start:installation]
 
 vLLM 0.3.3 onwards supports model inferencing and serving on AWS Trainium/Inferentia with Neuron SDK with continuous batching.
 Paged Attention and Chunked Prefill are currently in development and will be available soon.
 Data types currently supported in Neuron SDK are FP16 and BF16.
 
-:::{attention}
-There are no pre-built wheels or images for this device, so you must build vLLM from source.
-:::
+!!! warning
+    There are no pre-built wheels or images for this device, so you must build vLLM from source.
 
-## Requirements
+# --8<-- [end:installation]
+# --8<-- [start:requirements]
 
 - OS: Linux
 - Python: 3.9 -- 3.11
@@ -63,17 +63,19 @@ sudo apt-get install aws-neuronx-tools=2.* -y
 export PATH=/opt/aws/neuron/bin:$PATH
 ```
 
-## Set up using Python
+# --8<-- [end:requirements]
+# --8<-- [start:set-up-using-python]
 
-### Pre-built wheels
+# --8<-- [end:set-up-using-python]
+# --8<-- [start:pre-built-wheels]
 
 Currently, there are no pre-built Neuron wheels.
 
-### Build wheel from source
+# --8<-- [end:pre-built-wheels]
+# --8<-- [start:build-wheel-from-source]
 
-:::{note}
-The currently supported version of Pytorch for Neuron installs `triton` version `2.1.0`. This is incompatible with `vllm >= 0.5.3`. You may see an error `cannot import name 'default_dump_dir...`. To work around this, run a `pip install --upgrade triton==3.0.0` after installing the vLLM wheel.
-:::
+!!! note
+    The currently supported version of Pytorch for Neuron installs `triton` version `2.1.0`. This is incompatible with `vllm >= 0.5.3`. You may see an error `cannot import name 'default_dump_dir...`. To work around this, run a `pip install --upgrade triton==3.0.0` after installing the vLLM wheel.
 
 Following instructions are applicable to Neuron SDK 2.16 and beyond.
 
@@ -122,18 +124,23 @@ VLLM_TARGET_DEVICE="neuron" pip install .
 
 If neuron packages are detected correctly in the installation process, `vllm-0.3.0+neuron212` will be installed.
 
-## Set up using Docker
+# --8<-- [end:build-wheel-from-source]
+# --8<-- [start:set-up-using-docker]
 
-### Pre-built images
+# --8<-- [end:set-up-using-docker]
+# --8<-- [start:pre-built-images]
 
 Currently, there are no pre-built Neuron images.
 
-### Build image from source
+# --8<-- [end:pre-built-images]
+# --8<-- [start:build-image-from-source]
 
-See <project:#deployment-docker-build-image-from-source> for instructions on building the Docker image.
+See [deployment-docker-build-image-from-source][deployment-docker-build-image-from-source] for instructions on building the Docker image.
 
 Make sure to use <gh-file:docker/Dockerfile.neuron> in place of the default Dockerfile.
 
-## Extra information
+# --8<-- [end:build-image-from-source]
+# --8<-- [start:extra-information]
 
 There is no extra information for this device.
+# --8<-- [end:extra-information]
diff --git a/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md b/docs/getting_started/installation/ai_accelerator/tpu.inc.md
similarity index 55%
rename from docs/source/getting_started/installation/ai_accelerator/tpu.inc.md
rename to docs/getting_started/installation/ai_accelerator/tpu.inc.md
index 4459cc61e1cde..d0b1681201376 100644
--- a/docs/source/getting_started/installation/ai_accelerator/tpu.inc.md
+++ b/docs/getting_started/installation/ai_accelerator/tpu.inc.md
@@ -1,4 +1,4 @@
-# Installation
+# --8<-- [start:installation]
 
 Tensor Processing Units (TPUs) are Google's custom-developed application-specific
 integrated circuits (ASICs) used to accelerate machine learning workloads. TPUs
@@ -30,11 +30,11 @@ For TPU pricing information, see [Cloud TPU pricing](https://cloud.google.com/tp
 You may need additional persistent storage for your TPU VMs. For more
 information, see [Storage options for Cloud TPU data](https://cloud.devsite.corp.google.com/tpu/docs/storage-options).
 
-:::{attention}
-There are no pre-built wheels for this device, so you must either use the pre-built Docker image or build vLLM from source.
-:::
+!!! warning
+    There are no pre-built wheels for this device, so you must either use the pre-built Docker image or build vLLM from source.
 
-## Requirements
+# --8<-- [end:installation]
+# --8<-- [start:requirements]
 
 - Google Cloud TPU VM
 - TPU versions: v6e, v5e, v5p, v4
@@ -51,10 +51,9 @@ When you request queued resources, the request is added to a queue maintained by
 the Cloud TPU service. When the requested resource becomes available, it's
 assigned to your Google Cloud project for your immediate exclusive use.
 
-:::{note}
-In all of the following commands, replace the ALL CAPS parameter names with
-appropriate values. See the parameter descriptions table for more information.
-:::
+!!! note
+    In all of the following commands, replace the ALL CAPS parameter names with
+    appropriate values. See the parameter descriptions table for more information.
 
 ### Provision Cloud TPUs with GKE
 
@@ -79,33 +78,15 @@ gcloud alpha compute tpus queued-resources create QUEUED_RESOURCE_ID \
 --service-account SERVICE_ACCOUNT
 ```
 
-:::{list-table} Parameter descriptions
-:header-rows: 1
-
-- * Parameter name
-  * Description
-- * QUEUED_RESOURCE_ID
-  * The user-assigned ID of the queued resource request.
-- * TPU_NAME
-  * The user-assigned name of the TPU which is created when the queued
-    resource request is allocated.
-- * PROJECT_ID
-  * Your Google Cloud project
-- * ZONE
-  * The GCP zone where you want to create your Cloud TPU. The value you use
-    depends on the version of TPUs you are using. For more information, see
-    `TPU regions and zones <https://cloud.google.com/tpu/docs/regions-zones>`_
-- * ACCELERATOR_TYPE
-  * The TPU version you want to use. Specify the TPU version, for example
-    `v5litepod-4` specifies a v5e TPU with 4 cores, `v6e-1` specifies a v6e TPU with 1 core. For more information,
-    see [TPU versions](https://cloud.devsite.corp.google.com/tpu/docs/system-architecture-tpu-vm#versions).
-- * RUNTIME_VERSION
-  * The TPU VM runtime version to use. For example, use `v2-alpha-tpuv6e` for a VM loaded with one or more v6e TPU(s). For more information see [TPU VM images](https://cloud.google.com/tpu/docs/runtimes).
-- * SERVICE_ACCOUNT
-  * The email address for your service account. You can find it in the IAM
-    Cloud Console under *Service Accounts*. For example:
-    `tpu-service-account@<your_project_ID>.iam.gserviceaccount.com`
-:::
+| Parameter name     | Description                                                                                                                                                                                              |
+|--------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| QUEUED_RESOURCE_ID | The user-assigned ID of the queued resource request.                                                                                                                                                     |
+| TPU_NAME           | The user-assigned name of the TPU which is created when the queued                                                                                                                                       |
+| PROJECT_ID         | Your Google Cloud project                                                                                                                                                                                |
+| ZONE               | The GCP zone where you want to create your Cloud TPU. The value you use                                                                                                                                  |
+| ACCELERATOR_TYPE   | The TPU version you want to use. Specify the TPU version, for example                                                                                                                                    |
+| RUNTIME_VERSION    | The TPU VM runtime version to use. For example, use `v2-alpha-tpuv6e` for a VM loaded with one or more v6e TPU(s). For more information see [TPU VM images](https://cloud.google.com/tpu/docs/runtimes). |
+  <figcaption>Parameter descriptions</figcaption>
 
 Connect to your TPU using SSH:
 
@@ -113,13 +94,16 @@ Connect to your TPU using SSH:
 gcloud compute tpus tpu-vm ssh TPU_NAME --zone ZONE
 ```
 
-## Set up using Python
+# --8<-- [end:requirements]
+# --8<-- [start:set-up-using-python]
 
-### Pre-built wheels
+# --8<-- [end:set-up-using-python]
+# --8<-- [start:pre-built-wheels]
 
 Currently, there are no pre-built TPU wheels.
 
-### Build wheel from source
+# --8<-- [end:pre-built-wheels]
+# --8<-- [start:build-wheel-from-source]
 
 Install Miniconda:
 
@@ -161,13 +145,16 @@ Run the setup script:
 VLLM_TARGET_DEVICE="tpu" python -m pip install -e .
 ```
 
-## Set up using Docker
+# --8<-- [end:build-wheel-from-source]
+# --8<-- [start:set-up-using-docker]
 
-### Pre-built images
+# --8<-- [end:set-up-using-docker]
+# --8<-- [start:pre-built-images]
 
-See <project:#deployment-docker-pre-built-image> for instructions on using the official Docker image, making sure to substitute the image name `vllm/vllm-openai` with `vllm/vllm-tpu`.
+See [deployment-docker-pre-built-image][deployment-docker-pre-built-image] for instructions on using the official Docker image, making sure to substitute the image name `vllm/vllm-openai` with `vllm/vllm-tpu`.
 
-### Build image from source
+# --8<-- [end:pre-built-images]
+# --8<-- [start:build-image-from-source]
 
 You can use <gh-file:docker/Dockerfile.tpu> to build a Docker image with TPU support.
 
@@ -182,31 +169,30 @@ Run the Docker image with the following command:
 docker run --privileged --net host --shm-size=16G -it vllm-tpu
 ```
 
-:::{note}
-Since TPU relies on XLA which requires static shapes, vLLM bucketizes the
-possible input shapes and compiles an XLA graph for each shape. The
-compilation time may take 20~30 minutes in the first run. However, the
-compilation time reduces to ~5 minutes afterwards because the XLA graphs are
-cached in the disk (in {code}`VLLM_XLA_CACHE_PATH` or {code}`~/.cache/vllm/xla_cache` by default).
-:::
+!!! note
+    Since TPU relies on XLA which requires static shapes, vLLM bucketizes the
+    possible input shapes and compiles an XLA graph for each shape. The
+    compilation time may take 20~30 minutes in the first run. However, the
+    compilation time reduces to ~5 minutes afterwards because the XLA graphs are
+    cached in the disk (in `VLLM_XLA_CACHE_PATH` or `~/.cache/vllm/xla_cache` by default).
 
-:::{tip}
-If you encounter the following error:
+!!! tip
+    If you encounter the following error:
 
-```console
-from torch._C import *  # noqa: F403
-ImportError: libopenblas.so.0: cannot open shared object file: No such
-file or directory
-```
+    ```console
+    from torch._C import *  # noqa: F403
+    ImportError: libopenblas.so.0: cannot open shared object file: No such
+    file or directory
+    ```
 
-Install OpenBLAS with the following command:
+    Install OpenBLAS with the following command:
 
-```console
-sudo apt-get install libopenblas-base libopenmpi-dev libomp-dev
-```
+    ```console
+    sudo apt-get install libopenblas-base libopenmpi-dev libomp-dev
+    ```
 
-:::
-
-## Extra information
+# --8<-- [end:build-image-from-source]
+# --8<-- [start:extra-information]
 
 There is no extra information for this device.
+# --8<-- [end:extra-information]
diff --git a/docs/source/getting_started/installation/cpu.md b/docs/getting_started/installation/cpu.md
similarity index 74%
rename from docs/source/getting_started/installation/cpu.md
rename to docs/getting_started/installation/cpu.md
index 2c0ec60d7100f..18c96b264ad82 100644
--- a/docs/source/getting_started/installation/cpu.md
+++ b/docs/getting_started/installation/cpu.md
@@ -2,107 +2,47 @@
 
 vLLM is a Python library that supports the following CPU variants. Select your CPU type to see vendor specific instructions:
 
-:::::{tab-set}
-:sync-group: device
+=== "Intel/AMD x86"
 
-::::{tab-item} Intel/AMD x86
-:selected:
-:sync: x86
+    --8<-- "docs/getting_started/installation/cpu/x86.inc.md:installation"
 
-:::{include} cpu/x86.inc.md
-:start-after: "# Installation"
-:end-before: "## Requirements"
-:::
+=== "ARM AArch64"
 
-::::
+    --8<-- "docs/getting_started/installation/cpu/arm.inc.md:installation"
 
-::::{tab-item} ARM AArch64
-:sync: arm
+=== "Apple silicon"
 
-:::{include} cpu/arm.inc.md
-:start-after: "# Installation"
-:end-before: "## Requirements"
-:::
+    --8<-- "docs/getting_started/installation/cpu/apple.inc.md:installation"
 
-::::
+=== "IBM Z (S390X)"
 
-::::{tab-item} Apple silicon
-:sync: apple
-
-:::{include} cpu/apple.inc.md
-:start-after: "# Installation"
-:end-before: "## Requirements"
-:::
-
-::::
-
-::::{tab-item} IBM Z (S390X)
-:sync: s390x
-
-:::{include} cpu/s390x.inc.md
-:start-after: "# Installation"
-:end-before: "## Requirements"
-:::
-
-::::
-
-:::::
+    --8<-- "docs/getting_started/installation/cpu/s390x.inc.md:installation"
 
 ## Requirements
 
 - Python: 3.9 -- 3.12
 
-:::::{tab-set}
-:sync-group: device
+=== "Intel/AMD x86"
 
-::::{tab-item} Intel/AMD x86
-:sync: x86
+    --8<-- "docs/getting_started/installation/cpu/x86.inc.md:requirements"
 
-:::{include} cpu/x86.inc.md
-:start-after: "## Requirements"
-:end-before: "## Set up using Python"
-:::
+=== "ARM AArch64"
 
-::::
+    --8<-- "docs/getting_started/installation/cpu/arm.inc.md:requirements"
 
-::::{tab-item} ARM AArch64
-:sync: arm
+=== "Apple silicon"
 
-:::{include} cpu/arm.inc.md
-:start-after: "## Requirements"
-:end-before: "## Set up using Python"
-:::
+    --8<-- "docs/getting_started/installation/cpu/apple.inc.md:requirements"
 
-::::
+=== "IBM Z (S390X)"
 
-::::{tab-item} Apple silicon
-:sync: apple
-
-:::{include} cpu/apple.inc.md
-:start-after: "## Requirements"
-:end-before: "## Set up using Python"
-:::
-
-::::
-
-::::{tab-item} IBM Z (S390X)
-:sync: s390x
-
-:::{include} cpu/s390x.inc.md
-:start-after: "## Requirements"
-:end-before: "## Set up using Python"
-:::
-
-::::
-
-:::::
+    --8<-- "docs/getting_started/installation/cpu/s390x.inc.md:requirements"
 
 ## Set up using Python
 
 ### Create a new Python environment
 
-:::{include} python_env_setup.inc.md
-:::
+--8<-- "docs/getting_started/installation/python_env_setup.inc.md"
 
 ### Pre-built wheels
 
@@ -110,69 +50,29 @@ Currently, there are no pre-built CPU wheels.
 
 ### Build wheel from source
 
-:::::{tab-set}
-:sync-group: device
+=== "Intel/AMD x86"
 
-::::{tab-item} Intel/AMD x86
-:sync: x86
+    --8<-- "docs/getting_started/installation/cpu/x86.inc.md:build-wheel-from-source"
 
-:::{include} cpu/x86.inc.md
-:start-after: "### Build wheel from source"
-:end-before: "## Set up using Docker"
-:::
+=== "ARM AArch64"
 
-::::
+    --8<-- "docs/getting_started/installation/cpu/arm.inc.md:build-wheel-from-source"
 
-::::{tab-item} ARM AArch64
-:sync: arm
+=== "Apple silicon"
 
-:::{include} cpu/arm.inc.md
-:start-after: "### Build wheel from source"
-:end-before: "## Set up using Docker"
-:::
+    --8<-- "docs/getting_started/installation/cpu/apple.inc.md:build-wheel-from-source"
 
-::::
+=== "IBM Z (s390x)"
 
-::::{tab-item} Apple silicon
-:sync: apple
-
-:::{include} cpu/apple.inc.md
-:start-after: "### Build wheel from source"
-:end-before: "## Set up using Docker"
-:::
-
-::::
-
-::::{tab-item} IBM Z (s390x)
-:sync: s390x
-
-:::{include} cpu/s390x.inc.md
-:start-after: "### Build wheel from source"
-:end-before: "## Set up using Docker"
-:::
-
-::::
-
-:::::
+    --8<-- "docs/getting_started/installation/cpu/s390x.inc.md:build-wheel-from-source"
 
 ## Set up using Docker
 
 ### Pre-built images
 
-:::::{tab-set}
-:sync-group: device
+=== "Intel/AMD x86"
 
-::::{tab-item} Intel/AMD x86
-:sync: x86
-
-:::{include} cpu/x86.inc.md
-:start-after: "### Pre-built images"
-:end-before: "### Build image from source"
-:::
-
-::::
-
-:::::
+    --8<-- "docs/getting_started/installation/cpu/x86.inc.md:pre-built-images"
 
 ### Build image from source
 
@@ -192,13 +92,11 @@ $ docker run --rm \
              other vLLM OpenAI server arguments
 ```
 
-::::{tip}
-For ARM or Apple silicon, use `docker/Dockerfile.arm`
-::::
+!!! tip
+    For ARM or Apple silicon, use `docker/Dockerfile.arm`
 
-::::{tip}
-For IBM Z (s390x), use `docker/Dockerfile.s390x` and in `docker run` use flag `--dtype float`
-::::
+!!! tip
+    For IBM Z (s390x), use `docker/Dockerfile.s390x` and in `docker run` use flag `--dtype float`
 
 ## Supported features
 
diff --git a/docs/source/getting_started/installation/cpu/apple.inc.md b/docs/getting_started/installation/cpu/apple.inc.md
similarity index 58%
rename from docs/source/getting_started/installation/cpu/apple.inc.md
rename to docs/getting_started/installation/cpu/apple.inc.md
index 7bc9e85ecd964..7a91e3ce5e5bc 100644
--- a/docs/source/getting_started/installation/cpu/apple.inc.md
+++ b/docs/getting_started/installation/cpu/apple.inc.md
@@ -1,24 +1,27 @@
-# Installation
+# --8<-- [start:installation]
 
 vLLM has experimental support for macOS with Apple silicon. For now, users shall build from the source vLLM to natively run on macOS.
 
 Currently the CPU implementation for macOS supports FP32 and FP16 datatypes.
 
-:::{attention}
-There are no pre-built wheels or images for this device, so you must build vLLM from source.
-:::
+!!! warning
+    There are no pre-built wheels or images for this device, so you must build vLLM from source.
 
-## Requirements
+# --8<-- [end:installation]
+# --8<-- [start:requirements]
 
 - OS: `macOS Sonoma` or later
 - SDK: `XCode 15.4` or later with Command Line Tools
 - Compiler: `Apple Clang >= 15.0.0`
 
-## Set up using Python
+# --8<-- [end:requirements]
+# --8<-- [start:set-up-using-python]
 
-### Pre-built wheels
+# --8<-- [end:set-up-using-python]
+# --8<-- [start:pre-built-wheels]
 
-### Build wheel from source
+# --8<-- [end:pre-built-wheels]
+# --8<-- [start:build-wheel-from-source]
 
 After installation of XCode and the Command Line Tools, which include Apple Clang, execute the following commands to build and install vLLM from the source.
 
@@ -29,9 +32,8 @@ pip install -r requirements/cpu.txt
 pip install -e . 
 ```
 
-:::{note}
-On macOS the `VLLM_TARGET_DEVICE` is automatically set to `cpu`, which currently is the only supported device.
-:::
+!!! note
+    On macOS the `VLLM_TARGET_DEVICE` is automatically set to `cpu`, which currently is the only supported device.
 
 #### Troubleshooting
 
@@ -51,10 +53,15 @@ If the build has error like the following snippet where standard C++ headers can
       1 error generated.
 ```
 
-## Set up using Docker
+# --8<-- [end:build-wheel-from-source]
+# --8<-- [start:set-up-using-docker]
 
-### Pre-built images
+# --8<-- [end:set-up-using-docker]
+# --8<-- [start:pre-built-images]
 
-### Build image from source
+# --8<-- [end:pre-built-images]
+# --8<-- [start:build-image-from-source]
 
-## Extra information
+# --8<-- [end:build-image-from-source]
+# --8<-- [start:extra-information]
+# --8<-- [end:extra-information]
diff --git a/docs/getting_started/installation/cpu/arm.inc.md b/docs/getting_started/installation/cpu/arm.inc.md
new file mode 100644
index 0000000000000..59b71dcaf911a
--- /dev/null
+++ b/docs/getting_started/installation/cpu/arm.inc.md
@@ -0,0 +1,41 @@
+# --8<-- [start:installation]
+
+vLLM has been adapted to work on ARM64 CPUs with NEON support, leveraging the CPU backend initially developed for the x86 platform.
+
+ARM CPU backend currently supports Float32, FP16 and BFloat16 datatypes.
+
+!!! warning
+    There are no pre-built wheels or images for this device, so you must build vLLM from source.
+
+# --8<-- [end:installation]
+# --8<-- [start:requirements]
+
+- OS: Linux
+- Compiler: `gcc/g++ >= 12.3.0` (optional, recommended)
+- Instruction Set Architecture (ISA): NEON support is required
+
+# --8<-- [end:requirements]
+# --8<-- [start:set-up-using-python]
+
+# --8<-- [end:set-up-using-python]
+# --8<-- [start:pre-built-wheels]
+
+# --8<-- [end:pre-built-wheels]
+# --8<-- [start:build-wheel-from-source]
+
+--8<-- "docs/getting_started/installation/cpu/cpu/build.inc.md"
+
+Testing has been conducted on AWS Graviton3 instances for compatibility.
+
+# --8<-- [end:build-wheel-from-source]
+# --8<-- [start:set-up-using-docker]
+
+# --8<-- [end:set-up-using-docker]
+# --8<-- [start:pre-built-images]
+
+# --8<-- [end:pre-built-images]
+# --8<-- [start:build-image-from-source]
+
+# --8<-- [end:build-image-from-source]
+# --8<-- [start:extra-information]
+# --8<-- [end:extra-information]
diff --git a/docs/source/getting_started/installation/cpu/build.inc.md b/docs/getting_started/installation/cpu/build.inc.md
similarity index 96%
rename from docs/source/getting_started/installation/cpu/build.inc.md
rename to docs/getting_started/installation/cpu/build.inc.md
index f385f3d5b1984..7d6472afa7ea7 100644
--- a/docs/source/getting_started/installation/cpu/build.inc.md
+++ b/docs/getting_started/installation/cpu/build.inc.md
@@ -32,3 +32,5 @@ If you want to develop vllm, install it in editable mode instead.
 ```console
 VLLM_TARGET_DEVICE=cpu python setup.py develop
 ```
+
+# --8<-- [end:extra-information]
diff --git a/docs/source/getting_started/installation/cpu/s390x.inc.md b/docs/getting_started/installation/cpu/s390x.inc.md
similarity index 64%
rename from docs/source/getting_started/installation/cpu/s390x.inc.md
rename to docs/getting_started/installation/cpu/s390x.inc.md
index 9b41173b44cee..670485feefb65 100644
--- a/docs/source/getting_started/installation/cpu/s390x.inc.md
+++ b/docs/getting_started/installation/cpu/s390x.inc.md
@@ -1,25 +1,28 @@
-# Installation
+# --8<-- [start:installation]
 
 vLLM has experimental support for s390x architecture on IBM Z platform. For now, users shall build from the vLLM source to natively run on IBM Z platform.
 
 Currently the CPU implementation for s390x architecture supports FP32 datatype only.
 
-:::{attention}
-There are no pre-built wheels or images for this device, so you must build vLLM from source.
-:::
+!!! warning
+    There are no pre-built wheels or images for this device, so you must build vLLM from source.
 
-## Requirements
+# --8<-- [end:installation]
+# --8<-- [start:requirements]
 
 - OS: `Linux`
 - SDK: `gcc/g++ >= 12.3.0` or later with Command Line Tools
 - Instruction Set Architecture (ISA): VXE support is required. Works with Z14 and above.
 - Build install python packages: `pyarrow`, `torch` and `torchvision`
 
-## Set up using Python
+# --8<-- [end:requirements]
+# --8<-- [start:set-up-using-python]
 
-### Pre-built wheels
+# --8<-- [end:set-up-using-python]
+# --8<-- [start:pre-built-wheels]
 
-### Build wheel from source
+# --8<-- [end:pre-built-wheels]
+# --8<-- [start:build-wheel-from-source]
 
 Install the following packages from the package manager before building the vLLM. For example on RHEL 9.4:
 
@@ -39,9 +42,8 @@ curl https://sh.rustup.rs -sSf | sh -s -- -y && \
 
 Execute the following commands to build and install vLLM from the source.
 
-::::{tip}
-Please build the following dependencies, `torchvision`, `pyarrow` from the source before building vLLM.
-::::
+!!! tip
+    Please build the following dependencies, `torchvision`, `pyarrow` from the source before building vLLM.
 
 ```console
     sed -i '/^torch/d' requirements-build.txt    # remove torch from requirements-build.txt since we use nightly builds
@@ -53,10 +55,15 @@ Please build the following dependencies, `torchvision`, `pyarrow` from the sourc
     pip install dist/*.whl
 ```
 
-## Set up using Docker
+# --8<-- [end:build-wheel-from-source]
+# --8<-- [start:set-up-using-docker]
 
-### Pre-built images
+# --8<-- [end:set-up-using-docker]
+# --8<-- [start:pre-built-images]
 
-### Build image from source
+# --8<-- [end:pre-built-images]
+# --8<-- [start:build-image-from-source]
 
-## Extra information
+# --8<-- [end:build-image-from-source]
+# --8<-- [start:extra-information]
+# --8<-- [end:extra-information]
diff --git a/docs/getting_started/installation/cpu/x86.inc.md b/docs/getting_started/installation/cpu/x86.inc.md
new file mode 100644
index 0000000000000..9434eeea8b4a1
--- /dev/null
+++ b/docs/getting_started/installation/cpu/x86.inc.md
@@ -0,0 +1,46 @@
+# --8<-- [start:installation]
+
+vLLM initially supports basic model inferencing and serving on x86 CPU platform, with data types FP32, FP16 and BF16.
+
+!!! warning
+    There are no pre-built wheels or images for this device, so you must build vLLM from source.
+
+# --8<-- [end:installation]
+# --8<-- [start:requirements]
+
+- OS: Linux
+- Compiler: `gcc/g++ >= 12.3.0` (optional, recommended)
+- Instruction Set Architecture (ISA): AVX512 (optional, recommended)
+
+!!! tip
+    [Intel Extension for PyTorch (IPEX)](https://github.com/intel/intel-extension-for-pytorch) extends PyTorch with up-to-date features optimizations for an extra performance boost on Intel hardware.
+
+# --8<-- [end:requirements]
+# --8<-- [start:set-up-using-python]
+
+# --8<-- [end:set-up-using-python]
+# --8<-- [start:pre-built-wheels]
+
+# --8<-- [end:pre-built-wheels]
+# --8<-- [start:build-wheel-from-source]
+
+--8<-- "docs/getting_started/installation/cpu/cpu/build.inc.md"
+
+!!! note
+    - AVX512_BF16 is an extension ISA provides native BF16 data type conversion and vector product instructions, which brings some performance improvement compared with pure AVX512. The CPU backend build script will check the host CPU flags to determine whether to enable AVX512_BF16.
+    - If you want to force enable AVX512_BF16 for the cross-compilation, please set environment variable `VLLM_CPU_AVX512BF16=1` before the building.
+
+# --8<-- [end:build-wheel-from-source]
+# --8<-- [start:set-up-using-docker]
+
+# --8<-- [end:set-up-using-docker]
+# --8<-- [start:pre-built-images]
+
+See [https://gallery.ecr.aws/q9t5s3a7/vllm-cpu-release-repo](https://gallery.ecr.aws/q9t5s3a7/vllm-cpu-release-repo)
+
+# --8<-- [end:pre-built-images]
+# --8<-- [start:build-image-from-source]
+
+# --8<-- [end:build-image-from-source]
+# --8<-- [start:extra-information]
+# --8<-- [end:extra-information]
diff --git a/docs/source/getting_started/installation/device.template.md b/docs/getting_started/installation/device.template.md
similarity index 100%
rename from docs/source/getting_started/installation/device.template.md
rename to docs/getting_started/installation/device.template.md
diff --git a/docs/getting_started/installation/gpu.md b/docs/getting_started/installation/gpu.md
new file mode 100644
index 0000000000000..3c983f600673d
--- /dev/null
+++ b/docs/getting_started/installation/gpu.md
@@ -0,0 +1,124 @@
+# GPU
+
+vLLM is a Python library that supports the following GPU variants. Select your GPU type to see vendor specific instructions:
+
+=== "NVIDIA CUDA"
+
+    --8<-- "docs/getting_started/installation/gpu/cuda.inc.md:installation"
+
+=== "AMD ROCm"
+
+    --8<-- "docs/getting_started/installation/gpu/rocm.inc.md:installation"
+
+=== "Intel XPU"
+
+    --8<-- "docs/getting_started/installation/gpu/xpu.inc.md:installation"
+
+## Requirements
+
+- OS: Linux
+- Python: 3.9 -- 3.12
+
+=== "NVIDIA CUDA"
+
+    --8<-- "docs/getting_started/installation/gpu/cuda.inc.md:requirements"
+
+=== "AMD ROCm"
+
+    --8<-- "docs/getting_started/installation/gpu/rocm.inc.md:requirements"
+
+=== "Intel XPU"
+
+    --8<-- "docs/getting_started/installation/gpu/xpu.inc.md:requirements"
+
+## Set up using Python
+
+### Create a new Python environment
+
+--8<-- "docs/getting_started/installation/python_env_setup.inc.md"
+
+=== "NVIDIA CUDA"
+
+    --8<-- "docs/getting_started/installation/gpu/cuda.inc.md:create-a-new-python-environment"
+
+=== "AMD ROCm"
+
+    There is no extra information on creating a new Python environment for this device.
+
+=== "Intel XPU"
+
+    There is no extra information on creating a new Python environment for this device.
+
+### Pre-built wheels
+
+=== "NVIDIA CUDA"
+
+    --8<-- "docs/getting_started/installation/gpu/cuda.inc.md:pre-built-wheels"
+
+=== "AMD ROCm"
+
+    --8<-- "docs/getting_started/installation/gpu/rocm.inc.md:pre-built-wheels"
+
+=== "Intel XPU"
+
+    --8<-- "docs/getting_started/installation/gpu/xpu.inc.md:pre-built-wheels"
+
+[](){ #build-from-source }
+
+### Build wheel from source
+
+=== "NVIDIA CUDA"
+
+    --8<-- "docs/getting_started/installation/gpu/cuda.inc.md:build-wheel-from-source"
+
+=== "AMD ROCm"
+
+    --8<-- "docs/getting_started/installation/gpu/rocm.inc.md:build-wheel-from-source"
+
+=== "Intel XPU"
+
+    --8<-- "docs/getting_started/installation/gpu/xpu.inc.md:build-wheel-from-source"
+
+## Set up using Docker
+
+### Pre-built images
+
+=== "NVIDIA CUDA"
+
+    --8<-- "docs/getting_started/installation/gpu/cuda.inc.md:pre-built-images"
+
+=== "AMD ROCm"
+
+    --8<-- "docs/getting_started/installation/gpu/rocm.inc.md:pre-built-images"
+
+=== "Intel XPU"
+
+    --8<-- "docs/getting_started/installation/gpu/xpu.inc.md:pre-built-images"
+
+### Build image from source
+
+=== "NVIDIA CUDA"
+
+    --8<-- "docs/getting_started/installation/gpu/cuda.inc.md:build-image-from-source"
+
+=== "AMD ROCm"
+
+    --8<-- "docs/getting_started/installation/gpu/rocm.inc.md:build-image-from-source"
+
+=== "Intel XPU"
+
+    --8<-- "docs/getting_started/installation/gpu/xpu.inc.md:build-image-from-source"
+
+## Supported features
+
+=== "NVIDIA CUDA"
+
+    --8<-- "docs/getting_started/installation/gpu/cuda.inc.md:supported-features"
+
+=== "AMD ROCm"
+
+    --8<-- "docs/getting_started/installation/gpu/rocm.inc.md:supported-features"
+
+=== "Intel XPU"
+
+    --8<-- "docs/getting_started/installation/gpu/xpu.inc.md:supported-features"
diff --git a/docs/source/getting_started/installation/gpu/cuda.inc.md b/docs/getting_started/installation/gpu/cuda.inc.md
similarity index 74%
rename from docs/source/getting_started/installation/gpu/cuda.inc.md
rename to docs/getting_started/installation/gpu/cuda.inc.md
index d3d4b4ef6c80f..8653f980501f4 100644
--- a/docs/source/getting_started/installation/gpu/cuda.inc.md
+++ b/docs/getting_started/installation/gpu/cuda.inc.md
@@ -1,24 +1,26 @@
-# Installation
+# --8<-- [start:installation]
 
 vLLM contains pre-compiled C++ and CUDA (12.8) binaries.
 
-## Requirements
+# --8<-- [end:installation]
+# --8<-- [start:requirements]
 
 - GPU: compute capability 7.0 or higher (e.g., V100, T4, RTX20xx, A100, L4, H100, etc.)
 
-## Set up using Python
+# --8<-- [end:requirements]
+# --8<-- [start:set-up-using-python]
 
 ### Create a new Python environment
 
-:::{note}
-PyTorch installed via `conda` will statically link `NCCL` library, which can cause issues when vLLM tries to use `NCCL`. See <gh-issue:8420> for more details.
-:::
+!!! note
+    PyTorch installed via `conda` will statically link `NCCL` library, which can cause issues when vLLM tries to use `NCCL`. See <gh-issue:8420> for more details.
 
 In order to be performant, vLLM has to compile many cuda kernels. The compilation unfortunately introduces binary incompatibility with other CUDA versions and PyTorch versions, even for the same PyTorch version with different building configurations.
 
-Therefore, it is recommended to install vLLM with a **fresh new** environment. If either you have a different CUDA version or you want to use an existing PyTorch installation, you need to build vLLM from source. See [below](#build-from-source) for more details.
+Therefore, it is recommended to install vLLM with a **fresh new** environment. If either you have a different CUDA version or you want to use an existing PyTorch installation, you need to build vLLM from source. See [below][build-from-source] for more details.
 
-### Pre-built wheels
+# --8<-- [end:set-up-using-python]
+# --8<-- [start:pre-built-wheels]
 
 You can install vLLM using either `pip` or `uv pip`:
 
@@ -32,9 +34,8 @@ uv pip install vllm --torch-backend=auto
 
 We recommend leveraging `uv` to [automatically select the appropriate PyTorch index at runtime](https://docs.astral.sh/uv/guides/integration/pytorch/#automatic-backend-selection) by inspecting the installed CUDA driver version via `--torch-backend=auto` (or `UV_TORCH_BACKEND=auto`). To select a specific backend (e.g., `cu126`), set `--torch-backend=cu126` (or `UV_TORCH_BACKEND=cu126`). If this doesn't work, try running `uv self update` to update `uv` first.
 
-:::{note}
-NVIDIA Blackwell GPUs (B200, GB200) require a minimum of CUDA 12.8, so make sure you are installing PyTorch wheels with at least that version. PyTorch itself offers a [dedicated interface](https://pytorch.org/get-started/locally/) to determine the appropriate pip command to run for a given target configuration.
-:::
+!!! note
+    NVIDIA Blackwell GPUs (B200, GB200) require a minimum of CUDA 12.8, so make sure you are installing PyTorch wheels with at least that version. PyTorch itself offers a [dedicated interface](https://pytorch.org/get-started/locally/) to determine the appropriate pip command to run for a given target configuration.
 
 As of now, vLLM's binaries are compiled with CUDA 12.8 and public PyTorch release versions by default. We also provide vLLM binaries compiled with CUDA 12.6, 11.8, and public PyTorch release versions:
 
@@ -45,7 +46,7 @@ export PYTHON_VERSION=312
 uv pip install https://github.com/vllm-project/vllm/releases/download/v${VLLM_VERSION}/vllm-${VLLM_VERSION}+cu118-cp${PYTHON_VERSION}-cp${PYTHON_VERSION}-manylinux1_x86_64.whl --extra-index-url https://download.pytorch.org/whl/cu118
 ```
 
-(install-the-latest-code)=
+[](){ #install-the-latest-code }
 
 #### Install the latest code
 
@@ -87,7 +88,8 @@ uv pip install vllm --torch-backend=auto --extra-index-url https://wheels.vllm.a
 
 The `uv` approach works for vLLM `v0.6.6` and later and offers an easy-to-remember command. A unique feature of `uv` is that packages in `--extra-index-url` have [higher priority than the default index](https://docs.astral.sh/uv/pip/compatibility/#packages-that-exist-on-multiple-indexes). If the latest public release is `v0.6.6.post1`, `uv`'s behavior allows installing a commit before `v0.6.6.post1` by specifying the `--extra-index-url`. In contrast, `pip` combines packages from `--extra-index-url` and the default index, choosing only the latest version, which makes it difficult to install a development version prior to the released version.
 
-### Build wheel from source
+# --8<-- [end:pre-built-wheels]
+# --8<-- [start:build-wheel-from-source]
 
 #### Set up using Python-only build (without compilation)
 
@@ -105,10 +107,9 @@ This command will do the following:
 3. Download the pre-built wheel of the base commit.
 4. Use its compiled libraries in the installation.
 
-:::{note}
-1. If you change C++ or kernel code, you cannot use Python-only build; otherwise you will see an import error about library not found or undefined symbol.
-2. If you rebase your dev branch, it is recommended to uninstall vllm and re-run the above command to make sure your libraries are up to date.
-:::
+!!! note
+    1. If you change C++ or kernel code, you cannot use Python-only build; otherwise you will see an import error about library not found or undefined symbol.
+    2. If you rebase your dev branch, it is recommended to uninstall vllm and re-run the above command to make sure your libraries are up to date.
 
 In case you see an error about wheel not found when running the above command, it might be because the commit you based on in the main branch was just merged and the wheel is being built. In this case, you can wait for around an hour to try again, or manually assign the previous commit in the installation using the `VLLM_PRECOMPILED_WHEEL_LOCATION` environment variable.
 
@@ -118,12 +119,11 @@ export VLLM_PRECOMPILED_WHEEL_LOCATION=https://wheels.vllm.ai/${VLLM_COMMIT}/vll
 pip install --editable .
 ```
 
-You can find more information about vLLM's wheels in <project:#install-the-latest-code>.
+You can find more information about vLLM's wheels in [install-the-latest-code][install-the-latest-code].
 
-:::{note}
-There is a possibility that your source code may have a different commit ID compared to the latest vLLM wheel, which could potentially lead to unknown errors.
-It is recommended to use the same commit ID for the source code as the vLLM wheel you have installed. Please refer to <project:#install-the-latest-code> for instructions on how to install a specified wheel.
-:::
+!!! note
+    There is a possibility that your source code may have a different commit ID compared to the latest vLLM wheel, which could potentially lead to unknown errors.
+    It is recommended to use the same commit ID for the source code as the vLLM wheel you have installed. Please refer to [install-the-latest-code][install-the-latest-code] for instructions on how to install a specified wheel.
 
 #### Full build (with compilation)
 
@@ -135,17 +135,16 @@ cd vllm
 pip install -e .
 ```
 
-:::{tip}
-Building from source requires a lot of compilation. If you are building from source repeatedly, it's more efficient to cache the compilation results.
+!!! tip
+    Building from source requires a lot of compilation. If you are building from source repeatedly, it's more efficient to cache the compilation results.
 
-For example, you can install [ccache](https://github.com/ccache/ccache) using `conda install ccache` or `apt install ccache` .
-As long as `which ccache` command can find the `ccache` binary, it will be used automatically by the build system. After the first build, subsequent builds will be much faster.
+    For example, you can install [ccache](https://github.com/ccache/ccache) using `conda install ccache` or `apt install ccache` .
+    As long as `which ccache` command can find the `ccache` binary, it will be used automatically by the build system. After the first build, subsequent builds will be much faster.
 
-When using `ccache` with `pip install -e .`, you should run `CCACHE_NOHASHDIR="true" pip install --no-build-isolation -e .`. This is because `pip` creates a new folder with a random name for each build, preventing `ccache` from recognizing that the same files are being built.
+    When using `ccache` with `pip install -e .`, you should run `CCACHE_NOHASHDIR="true" pip install --no-build-isolation -e .`. This is because `pip` creates a new folder with a random name for each build, preventing `ccache` from recognizing that the same files are being built.
 
-[sccache](https://github.com/mozilla/sccache) works similarly to `ccache`, but has the capability to utilize caching in remote storage environments.
-The following environment variables can be set to configure the vLLM `sccache` remote: `SCCACHE_BUCKET=vllm-build-sccache SCCACHE_REGION=us-west-2 SCCACHE_S3_NO_CREDENTIALS=1`. We also recommend setting `SCCACHE_IDLE_TIMEOUT=0`.
-:::
+    [sccache](https://github.com/mozilla/sccache) works similarly to `ccache`, but has the capability to utilize caching in remote storage environments.
+    The following environment variables can be set to configure the vLLM `sccache` remote: `SCCACHE_BUCKET=vllm-build-sccache SCCACHE_REGION=us-west-2 SCCACHE_S3_NO_CREDENTIALS=1`. We also recommend setting `SCCACHE_IDLE_TIMEOUT=0`.
 
 ##### Use an existing PyTorch installation
 
@@ -220,11 +219,13 @@ export VLLM_TARGET_DEVICE=empty
 pip install -e .
 ```
 
-## Set up using Docker
+# --8<-- [end:build-wheel-from-source]
+# --8<-- [start:set-up-using-docker]
 
-### Pre-built images
+# --8<-- [end:set-up-using-docker]
+# --8<-- [start:pre-built-images]
 
-See <project:#deployment-docker-pre-built-image> for instructions on using the official Docker image.
+See [deployment-docker-pre-built-image][deployment-docker-pre-built-image] for instructions on using the official Docker image.
 
 Another way to access the latest code is to use the docker images:
 
@@ -237,10 +238,12 @@ These docker images are used for CI and testing only, and they are not intended
 
 The latest code can contain bugs and may not be stable. Please use it with caution.
 
-### Build image from source
+# --8<-- [end:pre-built-images]
+# --8<-- [start:build-image-from-source]
 
-See <project:#deployment-docker-build-image-from-source> for instructions on building the Docker image.
+See [deployment-docker-build-image-from-source][deployment-docker-build-image-from-source] for instructions on building the Docker image.
 
 ## Supported features
 
-See <project:#feature-x-hardware> compatibility matrix for feature support information.
+See [feature-x-hardware][feature-x-hardware] compatibility matrix for feature support information.
+# --8<-- [end:extra-information]
diff --git a/docs/source/getting_started/installation/gpu/rocm.inc.md b/docs/getting_started/installation/gpu/rocm.inc.md
similarity index 72%
rename from docs/source/getting_started/installation/gpu/rocm.inc.md
rename to docs/getting_started/installation/gpu/rocm.inc.md
index dc74368fe2c96..85d539b75669f 100644
--- a/docs/source/getting_started/installation/gpu/rocm.inc.md
+++ b/docs/getting_started/installation/gpu/rocm.inc.md
@@ -1,28 +1,31 @@
-# Installation
+# --8<-- [start:installation]
 
 vLLM supports AMD GPUs with ROCm 6.3.
 
-:::{attention}
-There are no pre-built wheels for this device, so you must either use the pre-built Docker image or build vLLM from source.
-:::
+!!! warning
+    There are no pre-built wheels for this device, so you must either use the pre-built Docker image or build vLLM from source.
 
-## Requirements
+# --8<-- [end:installation]
+# --8<-- [start:requirements]
 
 - GPU: MI200s (gfx90a), MI300 (gfx942), Radeon RX 7900 series (gfx1100/1101), Radeon RX 9000 series (gfx1200/1201)
 - ROCm 6.3
 
-## Set up using Python
+# --8<-- [end:requirements]
+# --8<-- [start:set-up-using-python]
 
-### Pre-built wheels
+# --8<-- [end:set-up-using-python]
+# --8<-- [start:pre-built-wheels]
 
 Currently, there are no pre-built ROCm wheels.
 
-### Build wheel from source
+# --8<-- [end:pre-built-wheels]
+# --8<-- [start:build-wheel-from-source]
 
 0. Install prerequisites (skip if you are already in an environment/docker with the following installed):
 
-- [ROCm](https://rocm.docs.amd.com/en/latest/deploy/linux/index.html)
-- [PyTorch](https://pytorch.org/)
+    - [ROCm](https://rocm.docs.amd.com/en/latest/deploy/linux/index.html)
+    - [PyTorch](https://pytorch.org/)
 
     For installing PyTorch, you can start from a fresh docker image, e.g, `rocm/pytorch:rocm6.3_ubuntu24.04_py3.12_pytorch_release_2.4.0`, `rocm/pytorch-nightly`. If you are using docker image, you can skip to Step 3.
 
@@ -49,9 +52,8 @@ Currently, there are no pre-built ROCm wheels.
     cd ../..
     ```
 
-    :::{note}
-    If you see HTTP issue related to downloading packages during building triton, please try again as the HTTP error is intermittent.
-    :::
+    !!! note
+        If you see HTTP issue related to downloading packages during building triton, please try again as the HTTP error is intermittent.
 
 2. Optionally, if you choose to use CK flash attention, you can install [flash attention for ROCm](https://github.com/ROCm/flash-attention)
 
@@ -69,9 +71,8 @@ Currently, there are no pre-built ROCm wheels.
     cd ..
     ```
 
-    :::{note}
-    You might need to downgrade the "ninja" version to 1.10 it is not used when compiling flash-attention-2 (e.g. `pip install ninja==1.10.2.4`)
-    :::
+    !!! note
+        You might need to downgrade the "ninja" version to 1.10 as it is not used when compiling flash-attention-2 (e.g. `pip install ninja==1.10.2.4`)
 
 3. If you choose to build AITER yourself to use a certain branch or commit, you can build AITER using the following steps:
 
@@ -84,9 +85,8 @@ Currently, there are no pre-built ROCm wheels.
     python3 setup.py develop
     ```
 
-    :::{note}
-    You will need to config the `$AITER_BRANCH_OR_COMMIT` for your purpose.
-    :::
+    !!! note
+        You will need to config the `$AITER_BRANCH_OR_COMMIT` for your purpose.
 
 4. Build vLLM. For example, vLLM on ROCM 6.3 can be built with the following steps:
 
@@ -108,31 +108,30 @@ Currently, there are no pre-built ROCm wheels.
 
     This may take 5-10 minutes. Currently, `pip install .` does not work for ROCm installation.
 
-    :::{tip}
-   - Triton flash attention is used by default. For benchmarking purposes, it is recommended to run a warm up step before collecting perf numbers.
-   - Triton flash attention does not currently support sliding window attention. If using half precision, please use CK flash-attention for sliding window support.
-   - To use CK flash-attention or PyTorch naive attention, please use this flag `export VLLM_USE_TRITON_FLASH_ATTN=0` to turn off triton flash attention.
-   - The ROCm version of PyTorch, ideally, should match the ROCm driver version.
-    :::
+    !!! tip
+        - Triton flash attention is used by default. For benchmarking purposes, it is recommended to run a warm up step before collecting perf numbers.
+        - Triton flash attention does not currently support sliding window attention. If using half precision, please use CK flash-attention for sliding window support.
+        - To use CK flash-attention or PyTorch naive attention, please use this flag `export VLLM_USE_TRITON_FLASH_ATTN=0` to turn off triton flash attention.
+        - The ROCm version of PyTorch, ideally, should match the ROCm driver version.
 
-:::{tip}
-- For MI300x (gfx942) users, to achieve optimal performance, please refer to [MI300x tuning guide](https://rocm.docs.amd.com/en/latest/how-to/tuning-guides/mi300x/index.html) for performance optimization and tuning tips on system and workflow level.
-  For vLLM, please refer to [vLLM performance optimization](https://rocm.docs.amd.com/en/latest/how-to/tuning-guides/mi300x/workload.html#vllm-performance-optimization).
-:::
+!!! tip
+    - For MI300x (gfx942) users, to achieve optimal performance, please refer to [MI300x tuning guide](https://rocm.docs.amd.com/en/latest/how-to/tuning-guides/mi300x/index.html) for performance optimization and tuning tips on system and workflow level.
+      For vLLM, please refer to [vLLM performance optimization](https://rocm.docs.amd.com/en/latest/how-to/tuning-guides/mi300x/workload.html#vllm-performance-optimization).
 
 ## Set up using Docker (Recommended)
 
-### Pre-built images
+# --8<-- [end:set-up-using-docker]
+# --8<-- [start:pre-built-images]
 
 The [AMD Infinity hub for vLLM](https://hub.docker.com/r/rocm/vllm/tags) offers a prebuilt, optimized
 docker image designed for validating inference performance on the AMD Instinct™ MI300X accelerator.
 
-:::{tip}
-Please check [LLM inference performance validation on AMD Instinct MI300X](https://rocm.docs.amd.com/en/latest/how-to/performance-validation/mi300x/vllm-benchmark.html)
-for instructions on how to use this prebuilt docker image.
-:::
+!!! tip
+    Please check [LLM inference performance validation on AMD Instinct MI300X](https://rocm.docs.amd.com/en/latest/how-to/performance-validation/mi300x/vllm-benchmark.html)
+    for instructions on how to use this prebuilt docker image.
 
-### Build image from source
+# --8<-- [end:pre-built-images]
+# --8<-- [start:build-image-from-source]
 
 Building the Docker image from source is the recommended way to use vLLM with ROCm.
 
@@ -213,4 +212,5 @@ Where the `<path/to/model>` is the location where the model is stored, for examp
 
 ## Supported features
 
-See <project:#feature-x-hardware> compatibility matrix for feature support information.
+See [feature-x-hardware][feature-x-hardware] compatibility matrix for feature support information.
+# --8<-- [end:extra-information]
diff --git a/docs/source/getting_started/installation/gpu/xpu.inc.md b/docs/getting_started/installation/gpu/xpu.inc.md
similarity index 67%
rename from docs/source/getting_started/installation/gpu/xpu.inc.md
rename to docs/getting_started/installation/gpu/xpu.inc.md
index 74937a1842279..bee9a7ebb717b 100644
--- a/docs/source/getting_started/installation/gpu/xpu.inc.md
+++ b/docs/getting_started/installation/gpu/xpu.inc.md
@@ -1,23 +1,26 @@
-# Installation
+# --8<-- [start:installation]
 
 vLLM initially supports basic model inference and serving on Intel GPU platform.
 
-:::{attention}
-There are no pre-built wheels or images for this device, so you must build vLLM from source.
-:::
+!!! warning
+    There are no pre-built wheels or images for this device, so you must build vLLM from source.
 
-## Requirements
+# --8<-- [end:installation]
+# --8<-- [start:requirements]
 
 - Supported Hardware: Intel Data Center GPU, Intel ARC GPU
 - OneAPI requirements: oneAPI 2025.0
 
-## Set up using Python
+# --8<-- [end:requirements]
+# --8<-- [start:set-up-using-python]
 
-### Pre-built wheels
+# --8<-- [end:set-up-using-python]
+# --8<-- [start:pre-built-wheels]
 
 Currently, there are no pre-built XPU wheels.
 
-### Build wheel from source
+# --8<-- [end:pre-built-wheels]
+# --8<-- [start:build-wheel-from-source]
 
 - First, install required driver and Intel OneAPI 2025.0 or later.
 - Second, install Python packages for vLLM XPU backend building:
@@ -35,18 +38,20 @@ pip install -v -r requirements/xpu.txt
 VLLM_TARGET_DEVICE=xpu python setup.py install
 ```
 
-:::{note}
-- FP16 is the default data type in the current XPU backend. The BF16 data
-  type is supported on Intel Data Center GPU, not supported on Intel Arc GPU yet.
-:::
+!!! note
+    - FP16 is the default data type in the current XPU backend. The BF16 data
+      type is supported on Intel Data Center GPU, not supported on Intel Arc GPU yet.
 
-## Set up using Docker
+# --8<-- [end:build-wheel-from-source]
+# --8<-- [start:set-up-using-docker]
 
-### Pre-built images
+# --8<-- [end:set-up-using-docker]
+# --8<-- [start:pre-built-images]
 
 Currently, there are no pre-built XPU images.
 
-### Build image from source
+# --8<-- [end:pre-built-images]
+# --8<-- [start:build-image-from-source]
 
 ```console
 $ docker build -f docker/Dockerfile.xpu -t vllm-xpu-env --shm-size=4g .
@@ -73,3 +78,4 @@ python -m vllm.entrypoints.openai.api_server \
 ```
 
 By default, a ray instance will be launched automatically if no existing one is detected in the system, with `num-gpus` equals to `parallel_config.world_size`. We recommend properly starting a ray cluster before execution, referring to the <gh-file:examples/online_serving/run_cluster.sh> helper script.
+# --8<-- [end:extra-information]
diff --git a/docs/source/getting_started/installation/python_env_setup.inc.md b/docs/getting_started/installation/python_env_setup.inc.md
similarity index 100%
rename from docs/source/getting_started/installation/python_env_setup.inc.md
rename to docs/getting_started/installation/python_env_setup.inc.md
diff --git a/docs/source/getting_started/quickstart.md b/docs/getting_started/quickstart.md
similarity index 75%
rename from docs/source/getting_started/quickstart.md
rename to docs/getting_started/quickstart.md
index ecca296b0b0cd..d24e75e8141d8 100644
--- a/docs/source/getting_started/quickstart.md
+++ b/docs/getting_started/quickstart.md
@@ -1,11 +1,12 @@
-(quickstart)=
-
-# Quickstart
+---
+title: Quickstart
+---
+[](){ #quickstart }
 
 This guide will help you quickly get started with vLLM to perform:
 
-- [Offline batched inference](#quickstart-offline)
-- [Online serving using OpenAI-compatible server](#quickstart-online)
+- [Offline batched inference][quickstart-offline]
+- [Online serving using OpenAI-compatible server][quickstart-online]
 
 ## Prerequisites
 
@@ -41,31 +42,29 @@ pip install --upgrade uv
 uv pip install vllm --torch-backend=auto
 ```
 
-:::{note}
-For more detail and non-CUDA platforms, please refer [here](#installation-index) for specific instructions on how to install vLLM.
-:::
+!!! note
+    For more detail and non-CUDA platforms, please refer [here][installation-index] for specific instructions on how to install vLLM.
 
-(quickstart-offline)=
+[](){ #quickstart-offline }
 
 ## Offline Batched Inference
 
 With vLLM installed, you can start generating texts for list of input prompts (i.e. offline batch inferencing). See the example script: <gh-file:examples/offline_inference/basic/basic.py>
 
-The first line of this example imports the classes {class}`~vllm.LLM` and {class}`~vllm.SamplingParams`:
+The first line of this example imports the classes [LLM][vllm.LLM] and [SamplingParams][vllm.SamplingParams]:
 
-- {class}`~vllm.LLM` is the main class for running offline inference with vLLM engine.
-- {class}`~vllm.SamplingParams` specifies the parameters for the sampling process.
+- [LLM][vllm.LLM] is the main class for running offline inference with vLLM engine.
+- [SamplingParams][vllm.SamplingParams] specifies the parameters for the sampling process.
 
 ```python
 from vllm import LLM, SamplingParams
 ```
 
-The next section defines a list of input prompts and sampling parameters for text generation. The [sampling temperature](https://arxiv.org/html/2402.05201v1) is set to `0.8` and the [nucleus sampling probability](https://en.wikipedia.org/wiki/Top-p_sampling) is set to `0.95`. You can find more information about the sampling parameters [here](#sampling-params).
-:::{important}
-By default, vLLM will use sampling parameters recommended by model creator by applying the `generation_config.json` from the Hugging Face model repository if it exists. In most cases, this will provide you with the best results by default if {class}`~vllm.SamplingParams` is not specified.
+The next section defines a list of input prompts and sampling parameters for text generation. The [sampling temperature](https://arxiv.org/html/2402.05201v1) is set to `0.8` and the [nucleus sampling probability](https://en.wikipedia.org/wiki/Top-p_sampling) is set to `0.95`. You can find more information about the sampling parameters [here][sampling-params].
+!!! warning
+    By default, vLLM will use sampling parameters recommended by model creator by applying the `generation_config.json` from the Hugging Face model repository if it exists. In most cases, this will provide you with the best results by default if [SamplingParams][vllm.SamplingParams] is not specified.
 
-However, if vLLM's default sampling parameters are preferred, please set `generation_config="vllm"` when creating the {class}`~vllm.LLM` instance.
-:::
+    However, if vLLM's default sampling parameters are preferred, please set `generation_config="vllm"` when creating the [LLM][vllm.LLM] instance.
 
 ```python
 prompts = [
@@ -77,20 +76,18 @@ prompts = [
 sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
 ```
 
-The {class}`~vllm.LLM` class initializes vLLM's engine and the [OPT-125M model](https://arxiv.org/abs/2205.01068) for offline inference. The list of supported models can be found [here](#supported-models).
+The [LLM][vllm.LLM] class initializes vLLM's engine and the [OPT-125M model](https://arxiv.org/abs/2205.01068) for offline inference. The list of supported models can be found [here][supported-models].
 
 ```python
 llm = LLM(model="facebook/opt-125m")
 ```
 
-:::{note}
-By default, vLLM downloads models from [Hugging Face](https://huggingface.co/). If you would like to use models from [ModelScope](https://www.modelscope.cn), set the environment variable `VLLM_USE_MODELSCOPE` before initializing the engine.
+!!! note
+    By default, vLLM downloads models from [Hugging Face](https://huggingface.co/). If you would like to use models from [ModelScope](https://www.modelscope.cn), set the environment variable `VLLM_USE_MODELSCOPE` before initializing the engine.
 
-```shell
-export VLLM_USE_MODELSCOPE=True
-```
-
-:::
+    ```shell
+    export VLLM_USE_MODELSCOPE=True
+    ```
 
 Now, the fun part! The outputs are generated using `llm.generate`. It adds the input prompts to the vLLM engine's waiting queue and executes the vLLM engine to generate the outputs with high throughput. The outputs are returned as a list of `RequestOutput` objects, which include all of the output tokens.
 
@@ -103,7 +100,7 @@ for output in outputs:
     print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
 ```
 
-(quickstart-online)=
+[](){ #quickstart-online }
 
 ## OpenAI-Compatible Server
 
@@ -116,15 +113,13 @@ Run the following command to start the vLLM server with the [Qwen2.5-1.5B-Instru
 vllm serve Qwen/Qwen2.5-1.5B-Instruct
 ```
 
-:::{note}
-By default, the server uses a predefined chat template stored in the tokenizer.
-You can learn about overriding it [here](#chat-template).
-:::
-:::{important}
-By default, the server applies `generation_config.json` from the huggingface model repository if it exists. This means the default values of certain sampling parameters can be overridden by those recommended by the model creator.
+!!! note
+    By default, the server uses a predefined chat template stored in the tokenizer.
+    You can learn about overriding it [here][chat-template].
+!!! warning
+    By default, the server applies `generation_config.json` from the huggingface model repository if it exists. This means the default values of certain sampling parameters can be overridden by those recommended by the model creator.
 
-To disable this behavior, please pass `--generation-config vllm` when launching the server.
-:::
+    To disable this behavior, please pass `--generation-config vllm` when launching the server.
 
 This server can be queried in the same format as OpenAI API. For example, to list the models:
 
@@ -215,6 +210,5 @@ Currently, vLLM supports multiple backends for efficient Attention computation a
 
 If desired, you can also manually set the backend of your choice by configuring the environment variable `VLLM_ATTENTION_BACKEND` to one of the following options: `FLASH_ATTN`, `FLASHINFER` or `XFORMERS`.
 
-```{attention}
-There are no pre-built vllm wheels containing Flash Infer, so you must install it in your environment first. Refer to the [Flash Infer official docs](https://docs.flashinfer.ai/) or see <gh-file:docker/Dockerfile> for instructions on how to install it.
-```
+!!! warning
+    There are no pre-built vllm wheels containing Flash Infer, so you must install it in your environment first. Refer to the [Flash Infer official docs](https://docs.flashinfer.ai/) or see <gh-file:docker/Dockerfile> for instructions on how to install it.
diff --git a/docs/source/getting_started/troubleshooting.md b/docs/getting_started/troubleshooting.md
similarity index 86%
rename from docs/source/getting_started/troubleshooting.md
rename to docs/getting_started/troubleshooting.md
index a4744827f2268..07e30f9684ae7 100644
--- a/docs/source/getting_started/troubleshooting.md
+++ b/docs/getting_started/troubleshooting.md
@@ -1,12 +1,12 @@
-(troubleshooting)=
-
-# Troubleshooting
+---
+title: Troubleshooting
+---
+[](){ #troubleshooting }
 
 This document outlines some troubleshooting strategies you can consider. If you think you've discovered a bug, please [search existing issues](https://github.com/vllm-project/vllm/issues?q=is%3Aissue) first to see if it has already been reported. If not, please [file a new issue](https://github.com/vllm-project/vllm/issues/new/choose), providing as much relevant information as possible.
 
-:::{note}
-Once you've debugged a problem, remember to turn off any debugging environment variables defined, or simply start a new shell to avoid being affected by lingering debugging settings. Otherwise, the system might be slow with debugging functionalities left activated.
-:::
+!!! note
+    Once you've debugged a problem, remember to turn off any debugging environment variables defined, or simply start a new shell to avoid being affected by lingering debugging settings. Otherwise, the system might be slow with debugging functionalities left activated.
 
 ## Hangs downloading a model
 
@@ -18,13 +18,12 @@ It's recommended to download the model first using the [huggingface-cli](https:/
 If the model is large, it can take a long time to load it from disk. Pay attention to where you store the model. Some clusters have shared filesystems across nodes, e.g. a distributed filesystem or a network filesystem, which can be slow.
 It'd be better to store the model in a local disk. Additionally, have a look at the CPU memory usage, when the model is too large it might take a lot of CPU memory, slowing down the operating system because it needs to frequently swap between disk and memory.
 
-:::{note}
-To isolate the model downloading and loading issue, you can use the `--load-format dummy` argument to skip loading the model weights. This way, you can check if the model downloading and loading is the bottleneck.
-:::
+!!! note
+    To isolate the model downloading and loading issue, you can use the `--load-format dummy` argument to skip loading the model weights. This way, you can check if the model downloading and loading is the bottleneck.
 
 ## Out of memory
 
-If the model is too large to fit in a single GPU, you will get an out-of-memory (OOM) error. Consider adopting [these options](#reducing-memory-usage) to reduce the memory consumption.
+If the model is too large to fit in a single GPU, you will get an out-of-memory (OOM) error. Consider adopting [these options][reducing-memory-usage] to reduce the memory consumption.
 
 ## Generation quality changed
 
@@ -53,9 +52,9 @@ You might also need to set `export NCCL_SOCKET_IFNAME=<your_network_interface>`
 ## Error near `self.graph.replay()`
 
 If vLLM crashes and the error trace captures it somewhere around `self.graph.replay()` in `vllm/worker/model_runner.py`, it is a CUDA error inside CUDAGraph.
-To identify the particular CUDA operation that causes the error, you can add `--enforce-eager` to the command line, or `enforce_eager=True` to the {class}`~vllm.LLM` class to disable the CUDAGraph optimization and isolate the exact CUDA operation that causes the error.
+To identify the particular CUDA operation that causes the error, you can add `--enforce-eager` to the command line, or `enforce_eager=True` to the [LLM][vllm.LLM] class to disable the CUDAGraph optimization and isolate the exact CUDA operation that causes the error.
 
-(troubleshooting-incorrect-hardware-driver)=
+[](){ #troubleshooting-incorrect-hardware-driver }
 
 ## Incorrect hardware/driver
 
@@ -140,16 +139,15 @@ If the script runs successfully, you should see the message `sanity check is suc
 
 If the test script hangs or crashes, usually it means the hardware/drivers are broken in some sense. You should try to contact your system administrator or hardware vendor for further assistance. As a common workaround, you can try to tune some NCCL environment variables, such as `export NCCL_P2P_DISABLE=1` to see if it helps. Please check [their documentation](https://docs.nvidia.com/deeplearning/nccl/user-guide/docs/env.html) for more information. Please only use these environment variables as a temporary workaround, as they might affect the performance of the system. The best solution is still to fix the hardware/drivers so that the test script can run successfully.
 
-:::{note}
-A multi-node environment is more complicated than a single-node one. If you see errors such as `torch.distributed.DistNetworkError`, it is likely that the network/DNS setup is incorrect. In that case, you can manually assign node rank and specify the IP via command line arguments:
+!!! note
+    A multi-node environment is more complicated than a single-node one. If you see errors such as `torch.distributed.DistNetworkError`, it is likely that the network/DNS setup is incorrect. In that case, you can manually assign node rank and specify the IP via command line arguments:
 
-- In the first node, run `NCCL_DEBUG=TRACE torchrun --nnodes 2 --nproc-per-node=2 --node-rank 0 --master_addr $MASTER_ADDR test.py`.
-- In the second node, run `NCCL_DEBUG=TRACE torchrun --nnodes 2 --nproc-per-node=2 --node-rank 1 --master_addr $MASTER_ADDR test.py`.
+    - In the first node, run `NCCL_DEBUG=TRACE torchrun --nnodes 2 --nproc-per-node=2 --node-rank 0 --master_addr $MASTER_ADDR test.py`.
+    - In the second node, run `NCCL_DEBUG=TRACE torchrun --nnodes 2 --nproc-per-node=2 --node-rank 1 --master_addr $MASTER_ADDR test.py`.
 
-Adjust `--nproc-per-node`, `--nnodes`, and `--node-rank` according to your setup, being sure to execute different commands (with different `--node-rank`) on different nodes.
-:::
+    Adjust `--nproc-per-node`, `--nnodes`, and `--node-rank` according to your setup, being sure to execute different commands (with different `--node-rank`) on different nodes.
 
-(troubleshooting-python-multiprocessing)=
+[](){ #troubleshooting-python-multiprocessing }
 
 ## Python multiprocessing
 
@@ -260,7 +258,7 @@ or:
 ValueError: Model architectures ['<arch>'] are not supported for now. Supported architectures: [...]
 ```
 
-But you are sure that the model is in the [list of supported models](#supported-models), there may be some issue with vLLM's model resolution. In that case, please follow [these steps](#model-resolution) to explicitly specify the vLLM implementation for the model.
+But you are sure that the model is in the [list of supported models][supported-models], there may be some issue with vLLM's model resolution. In that case, please follow [these steps][model-resolution] to explicitly specify the vLLM implementation for the model.
 
 ## Failed to infer device type
 
diff --git a/docs/source/getting_started/v1_user_guide.md b/docs/getting_started/v1_user_guide.md
similarity index 100%
rename from docs/source/getting_started/v1_user_guide.md
rename to docs/getting_started/v1_user_guide.md
diff --git a/docs/make.bat b/docs/make.bat
deleted file mode 100644
index 747ffb7b30336..0000000000000
--- a/docs/make.bat
+++ /dev/null
@@ -1,35 +0,0 @@
-@ECHO OFF
-
-pushd %~dp0
-
-REM Command file for Sphinx documentation
-
-if "%SPHINXBUILD%" == "" (
-	set SPHINXBUILD=sphinx-build
-)
-set SOURCEDIR=source
-set BUILDDIR=build
-
-%SPHINXBUILD% >NUL 2>NUL
-if errorlevel 9009 (
-	echo.
-	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
-	echo.installed, then set the SPHINXBUILD environment variable to point
-	echo.to the full path of the 'sphinx-build' executable. Alternatively you
-	echo.may add the Sphinx directory to PATH.
-	echo.
-	echo.If you don't have Sphinx installed, grab it from
-	echo.https://www.sphinx-doc.org/
-	exit /b 1
-)
-
-if "%1" == "" goto help
-
-%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-goto end
-
-:help
-%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
-
-:end
-popd
diff --git a/docs/mkdocs/hooks/generate_examples.py b/docs/mkdocs/hooks/generate_examples.py
new file mode 100644
index 0000000000000..9144f6824b09a
--- /dev/null
+++ b/docs/mkdocs/hooks/generate_examples.py
@@ -0,0 +1,159 @@
+# SPDX-License-Identifier: Apache-2.0
+
+import itertools
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Literal
+
+ROOT_DIR = Path(__file__).parent.parent.parent.parent
+ROOT_DIR_RELATIVE = '../../../../..'
+EXAMPLE_DIR = ROOT_DIR / "examples"
+EXAMPLE_DOC_DIR = ROOT_DIR / "docs/getting_started/examples"
+print(ROOT_DIR.resolve())
+print(EXAMPLE_DIR.resolve())
+print(EXAMPLE_DOC_DIR.resolve())
+
+
+def fix_case(text: str) -> str:
+    subs = {
+        "api": "API",
+        "cli": "CLI",
+        "cpu": "CPU",
+        "llm": "LLM",
+        "mae": "MAE",
+        "tpu": "TPU",
+        "aqlm": "AQLM",
+        "gguf": "GGUF",
+        "lora": "LoRA",
+        "rlhf": "RLHF",
+        "vllm": "vLLM",
+        "openai": "OpenAI",
+        "lmcache": "LMCache",
+        "multilora": "MultiLoRA",
+        "mlpspeculator": "MLPSpeculator",
+        r"fp\d+": lambda x: x.group(0).upper(),  # e.g. fp16, fp32
+        r"int\d+": lambda x: x.group(0).upper(),  # e.g. int8, int16
+    }
+    for pattern, repl in subs.items():
+        text = re.sub(rf'\b{pattern}\b', repl, text, flags=re.IGNORECASE)
+    return text
+
+
+@dataclass
+class Example:
+    """
+    Example class for generating documentation content from a given path.
+
+    Attributes:
+        path (Path): The path to the main directory or file.
+        category (str): The category of the document.
+        main_file (Path): The main file in the directory.
+        other_files (list[Path]): list of other files in the directory.
+        title (str): The title of the document.
+
+    Methods:
+        __post_init__(): Initializes the main_file, other_files, and title attributes.
+        determine_main_file() -> Path: Determines the main file in the given path.
+        determine_other_files() -> list[Path]: Determines other files in the directory excluding the main file.
+        determine_title() -> str: Determines the title of the document.
+        generate() -> str: Generates the documentation content.
+    """ # noqa: E501
+    path: Path
+    category: str = None
+    main_file: Path = field(init=False)
+    other_files: list[Path] = field(init=False)
+    title: str = field(init=False)
+
+    def __post_init__(self):
+        self.main_file = self.determine_main_file()
+        self.other_files = self.determine_other_files()
+        self.title = self.determine_title()
+
+    def determine_main_file(self) -> Path:
+        """
+        Determines the main file in the given path.
+        If the path is a file, it returns the path itself. Otherwise, it searches
+        for Markdown files (*.md) in the directory and returns the first one found.
+        Returns:
+            Path: The main file path, either the original path if it's a file or the first
+            Markdown file found in the directory.
+        Raises:
+            IndexError: If no Markdown files are found in the directory.
+        """ # noqa: E501
+        return self.path if self.path.is_file() else list(
+            self.path.glob("*.md")).pop()
+
+    def determine_other_files(self) -> list[Path]:
+        """
+        Determine other files in the directory excluding the main file.
+
+        This method checks if the given path is a file. If it is, it returns an empty list.
+        Otherwise, it recursively searches through the directory and returns a list of all
+        files that are not the main file.
+
+        Returns:
+            list[Path]: A list of Path objects representing the other files in the directory.
+        """ # noqa: E501
+        if self.path.is_file():
+            return []
+        is_other_file = lambda file: file.is_file() and file != self.main_file
+        return [file for file in self.path.rglob("*") if is_other_file(file)]
+
+    def determine_title(self) -> str:
+        return fix_case(self.path.stem.replace("_", " ").title())
+
+    def generate(self) -> str:
+        content = f"---\ntitle: {self.title}\n---\n\n"
+        content += f"Source <gh-file:{self.path.relative_to(ROOT_DIR)}>.\n\n"
+
+        is_code = self.main_file.suffix != ".md"
+        if is_code:
+            content += f"```{self.main_file.suffix[1:]}\n"
+        content += f'--8<-- "{self.main_file}"\n'
+        if is_code:
+            content += "```\n"
+        content += "\n"
+
+        if not self.other_files:
+            return content
+
+        content += "## Example materials\n\n"
+        for file in sorted(self.other_files):
+            content += f'??? abstract "{file.relative_to(self.path)}"\n'
+            if file.suffix != ".md":
+                content += f"    ```{file.suffix[1:]}\n"
+            content += f'    --8<-- "{file}"\n'
+            if file.suffix != ".md":
+                content += "    ```\n"
+
+        return content
+
+
+def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):
+    # Create the EXAMPLE_DOC_DIR if it doesn't exist
+    if not EXAMPLE_DOC_DIR.exists():
+        EXAMPLE_DOC_DIR.mkdir(parents=True)
+
+    categories = sorted(p for p in EXAMPLE_DIR.iterdir() if p.is_dir())
+
+    examples = []
+    glob_patterns = ["*.py", "*.md", "*.sh"]
+    # Find categorised examples
+    for category in categories:
+        globs = [category.glob(pattern) for pattern in glob_patterns]
+        for path in itertools.chain(*globs):
+            examples.append(Example(path, category.stem))
+        # Find examples in subdirectories
+        for path in category.glob("*/*.md"):
+            examples.append(Example(path.parent, category.stem))
+
+    # Generate the example documentation
+    for example in sorted(examples, key=lambda e: e.path.stem):
+        example_name = f"{example.path.stem}.md"
+        doc_path = EXAMPLE_DOC_DIR / example.category / example_name
+        print(doc_path)
+        if not doc_path.parent.exists():
+            doc_path.parent.mkdir(parents=True)
+        with open(doc_path, "w+") as f:
+            f.write(example.generate())
diff --git a/docs/mkdocs/hooks/remove_announcement.py b/docs/mkdocs/hooks/remove_announcement.py
new file mode 100644
index 0000000000000..e5f8549d83837
--- /dev/null
+++ b/docs/mkdocs/hooks/remove_announcement.py
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: Apache-2.0
+import os
+from typing import Literal
+
+
+def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool):
+    # see https://docs.readthedocs.io/en/stable/reference/environment-variables.html # noqa
+    if os.getenv('READTHEDOCS_VERSION_TYPE') == "tag":
+        # remove the warning banner if the version is a tagged release
+        docs_dir = os.path.dirname(__file__)
+        announcement_path = os.path.join(docs_dir,
+                                         "mkdocs/overrides/main.html")
+        # The file might be removed already if the build is triggered multiple
+        # times (readthedocs build both HTML and PDF versions separately)
+        if os.path.exists(announcement_path):
+            os.remove(announcement_path)
diff --git a/docs/mkdocs/hooks/url_schemes.py b/docs/mkdocs/hooks/url_schemes.py
new file mode 100644
index 0000000000000..03e7ffbb2733a
--- /dev/null
+++ b/docs/mkdocs/hooks/url_schemes.py
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: Apache-2.0
+import re
+
+from mkdocs.config.defaults import MkDocsConfig
+from mkdocs.structure.files import Files
+from mkdocs.structure.pages import Page
+
+
+def on_page_markdown(markdown: str, *, page: Page, config: MkDocsConfig,
+                     files: Files):
+    gh_icon = ":octicons-mark-github-16:"
+    gh_url = "https://github.com"
+    repo_url = f"{gh_url}/vllm-project/vllm"
+    org_url = f"{gh_url}/orgs/vllm-project"
+    urls = {
+        "issue": f"{repo_url}/issues",
+        "pr": f"{repo_url}/pull",
+        "project": f"{org_url}/projects",
+        "dir": f"{repo_url}/tree/main",
+        "file": f"{repo_url}/blob/main",
+    }
+    titles = {
+        "issue": "Issue #",
+        "pr": "Pull Request #",
+        "project": "Project #",
+        "dir": "",
+        "file": "",
+    }
+
+    scheme = r"gh-(?P<type>.+?):(?P<path>.+?)(#(?P<fragment>.+?))?"
+    inline_link = re.compile(r"\[(?P<title>[^\[]+?)\]\(" + scheme + r"\)")
+    auto_link = re.compile(f"<{scheme}>")
+
+    def replace_inline_link(match: re.Match) -> str:
+        url = f'{urls[match.group("type")]}/{match.group("path")}'
+        if fragment := match.group("fragment"):
+            url += f"#{fragment}"
+
+        return f'[{gh_icon} {match.group("title")}]({url})'
+
+    def replace_auto_link(match: re.Match) -> str:
+        type = match.group("type")
+        path = match.group("path")
+        title = f"{titles[type]}{path}"
+        url = f"{urls[type]}/{path}"
+        if fragment := match.group("fragment"):
+            url += f"#{fragment}"
+
+        return f"[{gh_icon} {title}]({url})"
+
+    markdown = inline_link.sub(replace_inline_link, markdown)
+    markdown = auto_link.sub(replace_auto_link, markdown)
+
+    return markdown
diff --git a/docs/source/_static/custom.js b/docs/mkdocs/javascript/run_llm_widget.js
similarity index 54%
rename from docs/source/_static/custom.js
rename to docs/mkdocs/javascript/run_llm_widget.js
index 58bc2ebb9614b..d0e5560e92b4e 100644
--- a/docs/source/_static/custom.js
+++ b/docs/mkdocs/javascript/run_llm_widget.js
@@ -17,22 +17,3 @@ document.addEventListener("DOMContentLoaded", function () {
     script.async = true;
     document.head.appendChild(script);
   });
-
-// Update URL search params when tab is clicked
-  document.addEventListener("DOMContentLoaded", function () {
-    const tabs = document.querySelectorAll(".sd-tab-label");
-
-    function updateURL(tab) {
-      const syncGroup = tab.getAttribute("data-sync-group");
-      const syncId = tab.getAttribute("data-sync-id");
-      if (syncGroup && syncId) {
-          const url = new URL(window.location);
-          url.searchParams.set(syncGroup, syncId);
-          window.history.replaceState(null, "", url);
-      }
-    }
-
-    tabs.forEach(tab => {
-        tab.addEventListener("click", () => updateURL(tab));
-    });
-});
diff --git a/docs/mkdocs/overrides/main.html b/docs/mkdocs/overrides/main.html
new file mode 100644
index 0000000000000..bdd62ebc158df
--- /dev/null
+++ b/docs/mkdocs/overrides/main.html
@@ -0,0 +1,5 @@
+{% extends "base.html" %}
+
+{% block announce %}
+  <p>You are viewing the latest developer preview docs. <a href="https://docs.vllm.ai/en/stable/">Click here</a> to view docs for the latest stable release.</p>
+{% endblock %}
diff --git a/docs/source/models/extensions/fastsafetensor.md b/docs/models/extensions/fastsafetensor.md
similarity index 100%
rename from docs/source/models/extensions/fastsafetensor.md
rename to docs/models/extensions/fastsafetensor.md
diff --git a/docs/source/models/extensions/runai_model_streamer.md b/docs/models/extensions/runai_model_streamer.md
similarity index 86%
rename from docs/source/models/extensions/runai_model_streamer.md
rename to docs/models/extensions/runai_model_streamer.md
index e0daa6f86dde4..c80120fa98f27 100644
--- a/docs/source/models/extensions/runai_model_streamer.md
+++ b/docs/models/extensions/runai_model_streamer.md
@@ -1,6 +1,7 @@
-(runai-model-streamer)=
-
-# Loading models with Run:ai Model Streamer
+---
+title: Loading models with Run:ai Model Streamer
+---
+[](){ #runai-model-streamer }
 
 Run:ai Model Streamer is a library to read tensors in concurrency, while streaming it to GPU memory.
 Further reading can be found in [Run:ai Model Streamer Documentation](https://github.com/run-ai/runai-model-streamer/blob/master/docs/README.md).
@@ -48,9 +49,8 @@ You can read further about CPU buffer memory limiting [here](https://github.com/
 vllm serve /home/meta-llama/Llama-3.2-3B-Instruct --load-format runai_streamer --model-loader-extra-config '{"memory_limit":5368709120}'
 ```
 
-:::{note}
-For further instructions about tunable parameters and additional parameters configurable through environment variables, read the [Environment Variables Documentation](https://github.com/run-ai/runai-model-streamer/blob/master/docs/src/env-vars.md).
-:::
+!!! note
+    For further instructions about tunable parameters and additional parameters configurable through environment variables, read the [Environment Variables Documentation](https://github.com/run-ai/runai-model-streamer/blob/master/docs/src/env-vars.md).
 
 ## Sharded Model Loading
 
@@ -74,6 +74,5 @@ The sharded loader supports all the same tunable parameters as the regular Run:a
 vllm serve /path/to/sharded/model --load-format runai_streamer_sharded --model-loader-extra-config '{"concurrency":16, "memory_limit":5368709120}'
 ```
 
-:::{note}
-The sharded loader is particularly efficient for tensor or pipeline parallel models where each worker only needs to read its own shard rather than the entire checkpoint.
-:::
+!!! note
+    The sharded loader is particularly efficient for tensor or pipeline parallel models where each worker only needs to read its own shard rather than the entire checkpoint.
diff --git a/docs/source/models/extensions/tensorizer.md b/docs/models/extensions/tensorizer.md
similarity index 79%
rename from docs/source/models/extensions/tensorizer.md
rename to docs/models/extensions/tensorizer.md
index cd94c81e620a2..36b49626d47db 100644
--- a/docs/source/models/extensions/tensorizer.md
+++ b/docs/models/extensions/tensorizer.md
@@ -1,6 +1,7 @@
-(tensorizer)=
-
-# Loading models with CoreWeave's Tensorizer
+---
+title: Loading models with CoreWeave's Tensorizer
+---
+[](){ #tensorizer }
 
 vLLM supports loading models with [CoreWeave's Tensorizer](https://docs.coreweave.com/coreweave-machine-learning-and-ai/inference/tensorizer).
 vLLM model tensors that have been serialized to disk, an HTTP/HTTPS endpoint, or S3 endpoint can be deserialized
@@ -11,6 +12,5 @@ For more information on CoreWeave's Tensorizer, please refer to
 [CoreWeave's Tensorizer documentation](https://github.com/coreweave/tensorizer). For more information on serializing a vLLM model, as well a general usage guide to using Tensorizer with vLLM, see
 the [vLLM example script](https://docs.vllm.ai/en/latest/getting_started/examples/tensorize_vllm_model.html).
 
-:::{note}
-Note that to use this feature you will need to install `tensorizer` by running `pip install vllm[tensorizer]`.
-:::
+!!! note
+    Note that to use this feature you will need to install `tensorizer` by running `pip install vllm[tensorizer]`.
diff --git a/docs/source/models/generative_models.md b/docs/models/generative_models.md
similarity index 63%
rename from docs/source/models/generative_models.md
rename to docs/models/generative_models.md
index dd765e4a97658..566b1c29fca9f 100644
--- a/docs/source/models/generative_models.md
+++ b/docs/models/generative_models.md
@@ -1,24 +1,25 @@
-(generative-models)=
-
-# Generative Models
+---
+title: Generative Models
+---
+[](){ #generative-models }
 
 vLLM provides first-class support for generative models, which covers most of LLMs.
 
-In vLLM, generative models implement the {class}`~vllm.model_executor.models.VllmModelForTextGeneration` interface.
+In vLLM, generative models implement the [VllmModelForTextGeneration][vllm.model_executor.models.VllmModelForTextGeneration] interface.
 Based on the final hidden states of the input, these models output log probabilities of the tokens to generate,
-which are then passed through {class}`~vllm.model_executor.layers.Sampler` to obtain the final text.
+which are then passed through [Sampler][vllm.model_executor.layers.Sampler] to obtain the final text.
 
 For generative models, the only supported `--task` option is `"generate"`.
 Usually, this is automatically inferred so you don't have to specify it.
 
 ## Offline Inference
 
-The {class}`~vllm.LLM` class provides various methods for offline inference.
-See <project:#configuration> for a list of options when initializing the model.
+The [LLM][vllm.LLM] class provides various methods for offline inference.
+See [configuration][configuration] for a list of options when initializing the model.
 
 ### `LLM.generate`
 
-The {class}`~vllm.LLM.generate` method is available to all generative models in vLLM.
+The [generate][vllm.LLM.generate] method is available to all generative models in vLLM.
 It is similar to [its counterpart in HF Transformers](https://huggingface.co/docs/transformers/main/en/main_classes/text_generation#transformers.GenerationMixin.generate),
 except that tokenization and detokenization are also performed automatically.
 
@@ -34,7 +35,7 @@ for output in outputs:
     print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
 ```
 
-You can optionally control the language generation by passing {class}`~vllm.SamplingParams`.
+You can optionally control the language generation by passing [SamplingParams][vllm.SamplingParams].
 For example, you can use greedy sampling by setting `temperature=0`:
 
 ```python
@@ -50,16 +51,15 @@ for output in outputs:
     print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
 ```
 
-:::{important}
-By default, vLLM will use sampling parameters recommended by model creator by applying the `generation_config.json` from the huggingface model repository if it exists. In most cases, this will provide you with the best results by default if {class}`~vllm.SamplingParams` is not specified.
+!!! warning
+    By default, vLLM will use sampling parameters recommended by model creator by applying the `generation_config.json` from the huggingface model repository if it exists. In most cases, this will provide you with the best results by default if [SamplingParams][vllm.SamplingParams] is not specified.
 
-However, if vLLM's default sampling parameters are preferred, please pass `generation_config="vllm"` when creating the {class}`~vllm.LLM` instance.
-:::
+    However, if vLLM's default sampling parameters are preferred, please pass `generation_config="vllm"` when creating the [LLM][vllm.LLM] instance.
 A code example can be found here: <gh-file:examples/offline_inference/basic/basic.py>
 
 ### `LLM.beam_search`
 
-The {class}`~vllm.LLM.beam_search` method implements [beam search](https://huggingface.co/docs/transformers/en/generation_strategies#beam-search) on top of {class}`~vllm.LLM.generate`.
+The [beam_search][vllm.LLM.beam_search] method implements [beam search](https://huggingface.co/docs/transformers/en/generation_strategies#beam-search) on top of [generate][vllm.LLM.generate].
 For example, to search using 5 beams and output at most 50 tokens:
 
 ```python
@@ -77,14 +77,13 @@ for output in outputs:
 
 ### `LLM.chat`
 
-The {class}`~vllm.LLM.chat` method implements chat functionality on top of {class}`~vllm.LLM.generate`.
+The [chat][vllm.LLM.chat] method implements chat functionality on top of [generate][vllm.LLM.generate].
 In particular, it accepts input similar to [OpenAI Chat Completions API](https://platform.openai.com/docs/api-reference/chat)
 and automatically applies the model's [chat template](https://huggingface.co/docs/transformers/en/chat_templating) to format the prompt.
 
-:::{important}
-In general, only instruction-tuned models have a chat template.
-Base models may perform poorly as they are not trained to respond to the chat conversation.
-:::
+!!! warning
+    In general, only instruction-tuned models have a chat template.
+    Base models may perform poorly as they are not trained to respond to the chat conversation.
 
 ```python
 from vllm import LLM
@@ -133,7 +132,7 @@ outputs = llm.chat(conversation, chat_template=custom_template)
 
 ## Online Serving
 
-Our [OpenAI-Compatible Server](#openai-compatible-server) provides endpoints that correspond to the offline APIs:
+Our [OpenAI-Compatible Server][openai-compatible-server] provides endpoints that correspond to the offline APIs:
 
-- [Completions API](#completions-api) is similar to `LLM.generate` but only accepts text.
-- [Chat API](#chat-api)  is similar to `LLM.chat`, accepting both text and [multi-modal inputs](#multimodal-inputs) for models with a chat template.
+- [Completions API][completions-api] is similar to `LLM.generate` but only accepts text.
+- [Chat API][chat-api]  is similar to `LLM.chat`, accepting both text and [multi-modal inputs][multimodal-inputs] for models with a chat template.
diff --git a/docs/source/models/pooling_models.md b/docs/models/pooling_models.md
similarity index 62%
rename from docs/source/models/pooling_models.md
rename to docs/models/pooling_models.md
index 3fd35e2e8bd17..89a128915a76c 100644
--- a/docs/source/models/pooling_models.md
+++ b/docs/models/pooling_models.md
@@ -1,70 +1,48 @@
-(pooling-models)=
-
-# Pooling Models
+---
+title: Pooling Models
+---
+[](){ #pooling-models }
 
 vLLM also supports pooling models, including embedding, reranking and reward models.
 
-In vLLM, pooling models implement the {class}`~vllm.model_executor.models.VllmModelForPooling` interface.
-These models use a {class}`~vllm.model_executor.layers.Pooler` to extract the final hidden states of the input
+In vLLM, pooling models implement the [VllmModelForPooling][vllm.model_executor.models.VllmModelForPooling] interface.
+These models use a [Pooler][vllm.model_executor.layers.Pooler] to extract the final hidden states of the input
 before returning them.
 
-:::{note}
-We currently support pooling models primarily as a matter of convenience.
-As shown in the [Compatibility Matrix](#compatibility-matrix), most vLLM features are not applicable to
-pooling models as they only work on the generation or decode stage, so performance may not improve as much.
-:::
+!!! note
+    We currently support pooling models primarily as a matter of convenience.
+    As shown in the [Compatibility Matrix][compatibility-matrix], most vLLM features are not applicable to
+    pooling models as they only work on the generation or decode stage, so performance may not improve as much.
 
 For pooling models, we support the following `--task` options.
 The selected option sets the default pooler used to extract the final hidden states:
 
-:::{list-table}
-:widths: 50 25 25 25
-:header-rows: 1
-
-- * Task
-  * Pooling Type
-  * Normalization
-  * Softmax
-- * Embedding (`embed`)
-  * `LAST`
-  * ✅︎
-  * ❌
-- * Classification (`classify`)
-  * `LAST`
-  * ❌
-  * ✅︎
-- * Sentence Pair Scoring (`score`)
-  * \*
-  * \*
-  * \*
-- * Reward Modeling (`reward`)
-  * `ALL`
-  * ❌
-  * ❌
-:::
+| Task                            | Pooling Type   | Normalization   | Softmax   |
+|---------------------------------|----------------|-----------------|-----------|
+| Embedding (`embed`)             | `LAST`         | ✅︎              | ❌         |
+| Classification (`classify`)     | `LAST`         | ❌               | ✅︎        |
+| Sentence Pair Scoring (`score`) | \*             | \*              | \*        |
 
 \*The default pooler is always defined by the model.
 
-:::{note}
-If the model's implementation in vLLM defines its own pooler, the default pooler is set to that instead of the one specified in this table.
-:::
+!!! note
+    If the model's implementation in vLLM defines its own pooler, the default pooler is set to that instead of the one specified in this table.
 
 When loading [Sentence Transformers](https://huggingface.co/sentence-transformers) models,
 we attempt to override the default pooler based on its Sentence Transformers configuration file (`modules.json`).
 
-:::{tip}
-You can customize the model's pooling method via the `--override-pooler-config` option,
-which takes priority over both the model's and Sentence Transformers's defaults.
-:::
+!!! tip
+    You can customize the model's pooling method via the `--override-pooler-config` option,
+    which takes priority over both the model's and Sentence Transformers's defaults.
 
 ## Offline Inference
 
-The {class}`~vllm.LLM` class provides various methods for offline inference.
-See <project:#configuration> for a list of options when initializing the model.
+The [LLM][vllm.LLM] class provides various methods for offline inference.
+See [configuration][configuration] for a list of options when initializing the model.
 
 ### `LLM.encode`
 
-The {class}`~vllm.LLM.encode` method is available to all pooling models in vLLM.
+The [encode][vllm.LLM.encode] method is available to all pooling models in vLLM.
 It returns the extracted hidden states directly, which is useful for reward models.
 
 ```python
@@ -79,7 +57,7 @@ print(f"Data: {data!r}")
 
 ### `LLM.embed`
 
-The {class}`~vllm.LLM.embed` method outputs an embedding vector for each prompt.
+The [embed][vllm.LLM.embed] method outputs an embedding vector for each prompt.
 It is primarily designed for embedding models.
 
 ```python
@@ -96,7 +74,7 @@ A code example can be found here: <gh-file:examples/offline_inference/basic/embe
 
 ### `LLM.classify`
 
-The {class}`~vllm.LLM.classify` method outputs a probability vector for each prompt.
+The [classify][vllm.LLM.classify] method outputs a probability vector for each prompt.
 It is primarily designed for classification models.
 
 ```python
@@ -113,13 +91,12 @@ A code example can be found here: <gh-file:examples/offline_inference/basic/clas
 
 ### `LLM.score`
 
-The {class}`~vllm.LLM.score` method outputs similarity scores between sentence pairs.
+The [score][vllm.LLM.score] method outputs similarity scores between sentence pairs.
 It is designed for embedding models and cross encoder models. Embedding models use cosine similarity, and [cross-encoder models](https://www.sbert.net/examples/applications/cross-encoder/README.html) serve as rerankers between candidate query-document pairs in RAG systems.
 
-:::{note}
-vLLM can only perform the model inference component (e.g. embedding, reranking) of RAG.
-To handle RAG at a higher level, you should use integration frameworks such as [LangChain](https://github.com/langchain-ai/langchain).
-:::
+!!! note
+    vLLM can only perform the model inference component (e.g. embedding, reranking) of RAG.
+    To handle RAG at a higher level, you should use integration frameworks such as [LangChain](https://github.com/langchain-ai/langchain).
 
 ```python
 from vllm import LLM
@@ -136,27 +113,25 @@ A code example can be found here: <gh-file:examples/offline_inference/basic/scor
 
 ## Online Serving
 
-Our [OpenAI-Compatible Server](#openai-compatible-server) provides endpoints that correspond to the offline APIs:
+Our [OpenAI-Compatible Server][openai-compatible-server] provides endpoints that correspond to the offline APIs:
 
-- [Pooling API](#pooling-api) is similar to `LLM.encode`, being applicable to all types of pooling models.
-- [Embeddings API](#embeddings-api) is similar to `LLM.embed`, accepting both text and [multi-modal inputs](#multimodal-inputs) for embedding models.
-- [Classification API](#classification-api) is similar to `LLM.classify` and is applicable to sequence classification models.
-- [Score API](#score-api) is similar to `LLM.score` for cross-encoder models.
+- [Pooling API][pooling-api] is similar to `LLM.encode`, being applicable to all types of pooling models.
+- [Embeddings API][embeddings-api] is similar to `LLM.embed`, accepting both text and [multi-modal inputs][multimodal-inputs] for embedding models.
+- [Classification API][classification-api] is similar to `LLM.classify` and is applicable to sequence classification models.
+- [Score API][score-api] is similar to `LLM.score` for cross-encoder models.
 
 ## Matryoshka Embeddings
 
 [Matryoshka Embeddings](https://sbert.net/examples/sentence_transformer/training/matryoshka/README.html#matryoshka-embeddings) or [Matryoshka Representation Learning (MRL)](https://arxiv.org/abs/2205.13147) is a technique used in training embedding models. It allows user to trade off between performance and cost.
 
-:::{warning}
-Not all embedding models are trained using Matryoshka Representation Learning. To avoid misuse of the `dimensions` parameter, vLLM returns an error for requests that attempt to change the output dimension of models that do not support Matryoshka Embeddings.
+!!! warning
+    Not all embedding models are trained using Matryoshka Representation Learning. To avoid misuse of the `dimensions` parameter, vLLM returns an error for requests that attempt to change the output dimension of models that do not support Matryoshka Embeddings.
 
-For example, setting `dimensions` parameter while using the `BAAI/bge-m3` model will result in the following error.
+    For example, setting `dimensions` parameter while using the `BAAI/bge-m3` model will result in the following error.
 
-```json
-{"object":"error","message":"Model \"BAAI/bge-m3\" does not support matryoshka representation, changing output dimensions will lead to poor results.","type":"BadRequestError","param":null,"code":400}
-```
-
-:::
+    ```json
+    {"object":"error","message":"Model \"BAAI/bge-m3\" does not support matryoshka representation, changing output dimensions will lead to poor results.","type":"BadRequestError","param":null,"code":400}
+    ```
 
 ### Manually enable Matryoshka Embeddings
 
@@ -172,7 +147,7 @@ vllm serve Snowflake/snowflake-arctic-embed-m-v1.5 --hf_overrides '{"matryoshka_
 
 ### Offline Inference
 
-You can change the output dimensions of embedding models that support Matryoshka Embeddings by using the dimensions parameter in {class}`~vllm.PoolingParams`.
+You can change the output dimensions of embedding models that support Matryoshka Embeddings by using the dimensions parameter in [PoolingParams][vllm.PoolingParams].
 
 ```python
 from vllm import LLM, PoolingParams
diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md
new file mode 100644
index 0000000000000..416fe42fcb799
--- /dev/null
+++ b/docs/models/supported_models.md
@@ -0,0 +1,690 @@
+---
+title: Supported Models
+---
+[](){ #supported-models }
+
+vLLM supports [generative](generative-models) and [pooling](pooling-models) models across various tasks.
+If a model supports more than one task, you can set the task via the `--task` argument.
+
+For each task, we list the model architectures that have been implemented in vLLM.
+Alongside each architecture, we include some popular models that use it.
+
+## Model Implementation
+
+### vLLM
+
+If vLLM natively supports a model, its implementation can be found in <gh-file:vllm/model_executor/models>.
+
+These models are what we list in [supported-text-models][supported-text-models] and [supported-mm-models][supported-mm-models].
+
+[](){ #transformers-backend }
+
+### Transformers
+
+vLLM also supports model implementations that are available in Transformers. This does not currently work for all models, but most decoder language models are supported, and vision language model support is planned!
+
+To check if the modeling backend is Transformers, you can simply do this:
+
+```python
+from vllm import LLM
+llm = LLM(model=..., task="generate")  # Name or path of your model
+llm.apply_model(lambda model: print(type(model)))
+```
+
+If it is `TransformersForCausalLM` then it means it's based on Transformers!
+
+!!! tip
+    You can force the use of `TransformersForCausalLM` by setting `model_impl="transformers"` for [offline-inference][offline-inference] or `--model-impl transformers` for the [openai-compatible-server][openai-compatible-server].
+
+!!! note
+    vLLM may not fully optimise the Transformers implementation so you may see degraded performance if comparing a native model to a Transformers model in vLLM.
+
+#### Custom models
+
+If a model is neither supported natively by vLLM or Transformers, it can still be used in vLLM!
+
+For a model to be compatible with the Transformers backend for vLLM it must:
+
+- be a Transformers compatible custom model (see [Transformers - Customizing models](https://huggingface.co/docs/transformers/en/custom_models)):
+    * The model directory must have the correct structure (e.g. `config.json` is present).
+    * `config.json` must contain `auto_map.AutoModel`.
+- be a Transformers backend for vLLM compatible model (see [writing-custom-models][writing-custom-models]):
+    * Customisation should be done in the base model (e.g. in `MyModel`, not `MyModelForCausalLM`).
+
+If the compatible model is:
+
+- on the Hugging Face Model Hub, simply set `trust_remote_code=True` for [offline-inference][offline-inference] or `--trust-remote-code` for the [openai-compatible-server][openai-compatible-server].
+- in a local directory, simply pass directory path to `model=<MODEL_DIR>` for [offline-inference][offline-inference] or `vllm serve <MODEL_DIR>` for the [openai-compatible-server][openai-compatible-server].
+
+This means that, with the Transformers backend for vLLM, new models can be used before they are officially supported in Transformers or vLLM!
+
+[](){ #writing-custom-models }
+
+#### Writing custom models
+
+This section details the necessary modifications to make to a Transformers compatible custom model that make it compatible with the Transformers backend for vLLM. (We assume that a Transformers compatible custom model has already been created, see [Transformers - Customizing models](https://huggingface.co/docs/transformers/en/custom_models)).
+
+To make your model compatible with the Transformers backend, it needs:
+
+1. `kwargs` passed down through all modules from `MyModel` to `MyAttention`.
+2. `MyAttention` must use `ALL_ATTENTION_FUNCTIONS` to call attention.
+3. `MyModel` must contain `_supports_attention_backend = True`.
+
+```python title="modeling_my_model.py"
+
+from transformers import PreTrainedModel
+from torch import nn
+
+class MyAttention(nn.Module):
+
+    def forward(self, hidden_states, **kwargs):
+        ...
+        attention_interface = ALL_ATTENTION_FUNCTIONS[self.config._attn_implementation]
+        attn_output, attn_weights = attention_interface(
+            self,
+            query_states,
+            key_states,
+            value_states,
+            **kwargs,
+        )
+        ...
+
+class MyModel(PreTrainedModel):
+    _supports_attention_backend = True
+```
+
+Here is what happens in the background when this model is loaded:
+
+1. The config is loaded.
+2. `MyModel` Python class is loaded from the `auto_map` in config, and we check that the model `is_backend_compatible()`.
+3. `MyModel` is loaded into `TransformersForCausalLM` (see <gh-file:vllm/model_executor/models/transformers.py>) which sets `self.config._attn_implementation = "vllm"` so that vLLM's attention layer is used.
+
+That's it!
+
+For your model to be compatible with vLLM's tensor parallel and/or pipeline parallel features, you must add `base_model_tp_plan` and/or `base_model_pp_plan` to your model's config class:
+
+```python title="configuration_my_model.py"
+
+from transformers import PretrainedConfig
+
+class MyConfig(PretrainedConfig):
+    base_model_tp_plan = {
+        "layers.*.self_attn.k_proj": "colwise",
+        "layers.*.self_attn.v_proj": "colwise",
+        "layers.*.self_attn.o_proj": "rowwise",
+        "layers.*.mlp.gate_proj": "colwise",
+        "layers.*.mlp.up_proj": "colwise",
+        "layers.*.mlp.down_proj": "rowwise",
+    }
+    base_model_pp_plan = {
+        "embed_tokens": (["input_ids"], ["inputs_embeds"]),
+        "layers": (["hidden_states", "attention_mask"], ["hidden_states"]),
+        "norm": (["hidden_states"], ["hidden_states"]),
+    }
+```
+
+- `base_model_tp_plan` is a `dict` that maps fully qualified layer name patterns to tensor parallel styles (currently only `"colwise"` and `"rowwise"` are supported).
+- `base_model_pp_plan` is a `dict` that maps direct child layer names to `tuple`s of `list`s of `str`s:
+    * You only need to do this for layers which are not present on all pipeline stages
+    * vLLM assumes that there will be only one `nn.ModuleList`, which is distributed across the pipeline stages
+    * The `list` in the first element of the `tuple` contains the names of the input arguments
+    * The `list` in the last element of the `tuple` contains the names of the variables the layer outputs to in your modeling code
+
+## Loading a Model
+
+### Hugging Face Hub
+
+By default, vLLM loads models from [Hugging Face (HF) Hub](https://huggingface.co/models). To change the download path for models, you can set the `HF_HOME` environment variable; for more details, refer to [their official documentation](https://huggingface.co/docs/huggingface_hub/package_reference/environment_variables#hfhome).
+
+To determine whether a given model is natively supported, you can check the `config.json` file inside the HF repository.
+If the `"architectures"` field contains a model architecture listed below, then it should be natively supported.
+
+Models do not _need_ to be natively supported to be used in vLLM.
+The [Transformers backend][transformers-backend] enables you to run models directly using their Transformers implementation (or even remote code on the Hugging Face Model Hub!).
+
+!!! tip
+    The easiest way to check if your model is really supported at runtime is to run the program below:
+
+    ```python
+    from vllm import LLM
+
+    # For generative models (task=generate) only
+    llm = LLM(model=..., task="generate")  # Name or path of your model
+    output = llm.generate("Hello, my name is")
+    print(output)
+
+    # For pooling models (task={embed,classify,reward,score}) only
+    llm = LLM(model=..., task="embed")  # Name or path of your model
+    output = llm.encode("Hello, my name is")
+    print(output)
+    ```
+
+    If vLLM successfully returns text (for generative models) or hidden states (for pooling models), it indicates that your model is supported.
+
+Otherwise, please refer to [Adding a New Model][new-model] for instructions on how to implement your model in vLLM.
+Alternatively, you can [open an issue on GitHub](https://github.com/vllm-project/vllm/issues/new/choose) to request vLLM support.
+
+#### Download a model
+
+If you prefer, you can use the Hugging Face CLI to [download a model](https://huggingface.co/docs/huggingface_hub/guides/cli#huggingface-cli-download) or specific files from a model repository:
+
+```console
+# Download a model
+huggingface-cli download HuggingFaceH4/zephyr-7b-beta
+
+# Specify a custom cache directory
+huggingface-cli download HuggingFaceH4/zephyr-7b-beta --cache-dir ./path/to/cache
+
+# Download a specific file from a model repo
+huggingface-cli download HuggingFaceH4/zephyr-7b-beta eval_results.json
+```
+
+#### List the downloaded models
+
+Use the Hugging Face CLI to [manage models](https://huggingface.co/docs/huggingface_hub/guides/manage-cache#scan-your-cache) stored in local cache:
+
+```console
+# List cached models
+huggingface-cli scan-cache
+
+# Show detailed (verbose) output
+huggingface-cli scan-cache -v
+
+# Specify a custom cache directory
+huggingface-cli scan-cache --dir ~/.cache/huggingface/hub
+```
+
+#### Delete a cached model
+
+Use the Hugging Face CLI to interactively [delete downloaded model](https://huggingface.co/docs/huggingface_hub/guides/manage-cache#clean-your-cache) from the cache:
+
+```console
+# The `delete-cache` command requires extra dependencies to work with the TUI.
+# Please run `pip install huggingface_hub[cli]` to install them.
+
+# Launch the interactive TUI to select models to delete
+$ huggingface-cli delete-cache
+? Select revisions to delete: 1 revisions selected counting for 438.9M.
+  ○ None of the following (if selected, nothing will be deleted).
+Model BAAI/bge-base-en-v1.5 (438.9M, used 1 week ago)
+❯ ◉ a5beb1e3: main # modified 1 week ago
+
+Model BAAI/bge-large-en-v1.5 (1.3G, used 1 week ago)
+  ○ d4aa6901: main # modified 1 week ago
+
+Model BAAI/bge-reranker-base (1.1G, used 4 weeks ago)
+  ○ 2cfc18c9: main # modified 4 weeks ago
+
+Press <space> to select, <enter> to validate and <ctrl+c> to quit without modification.
+
+# Need to confirm after selected
+? Select revisions to delete: 1 revision(s) selected.
+? 1 revisions selected counting for 438.9M. Confirm deletion ? Yes
+Start deletion.
+Done. Deleted 1 repo(s) and 0 revision(s) for a total of 438.9M.
+```
+
+#### Using a proxy
+
+Here are some tips for loading/downloading models from Hugging Face using a proxy:
+
+- Set the proxy globally for your session (or set it in the profile file):
+
+```shell
+export http_proxy=http://your.proxy.server:port
+export https_proxy=http://your.proxy.server:port
+```
+
+- Set the proxy for just the current command:
+
+```shell
+https_proxy=http://your.proxy.server:port huggingface-cli download <model_name>
+
+# or use vllm cmd directly
+https_proxy=http://your.proxy.server:port  vllm serve <model_name> --disable-log-requests
+```
+
+- Set the proxy in Python interpreter:
+
+```python
+import os
+
+os.environ['http_proxy'] = 'http://your.proxy.server:port'
+os.environ['https_proxy'] = 'http://your.proxy.server:port'
+```
+
+### ModelScope
+
+To use models from [ModelScope](https://www.modelscope.cn) instead of Hugging Face Hub, set an environment variable:
+
+```shell
+export VLLM_USE_MODELSCOPE=True
+```
+
+And use with `trust_remote_code=True`.
+
+```python
+from vllm import LLM
+
+llm = LLM(model=..., revision=..., task=..., trust_remote_code=True)
+
+# For generative models (task=generate) only
+output = llm.generate("Hello, my name is")
+print(output)
+
+# For pooling models (task={embed,classify,reward,score}) only
+output = llm.encode("Hello, my name is")
+print(output)
+```
+
+[](){ #feature-status-legend }
+
+## Feature Status Legend
+
+- ✅︎ indicates that the feature is supported for the model.
+
+- 🚧 indicates that the feature is planned but not yet supported for the model.
+
+- ⚠️ indicates that the feature is available but may have known issues or limitations.
+
+[](){ #supported-text-models }
+
+## List of Text-only Language Models
+
+### Generative Models
+
+See [this page][generative-models] for more information on how to use generative models.
+
+#### Text Generation
+
+Specified using `--task generate`.
+
+| Architecture                                      | Models                                              | Example HF Models                                                                                                                                                            | [LoRA][lora-adapter]   | [PP][distributed-serving]   |
+|---------------------------------------------------|-----------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------|-----------------------------|
+| `AquilaForCausalLM`                               | Aquila, Aquila2                                     | `BAAI/Aquila-7B`, `BAAI/AquilaChat-7B`, etc.                                                                                                                                 | ✅︎                     | ✅︎                          |
+| `ArcticForCausalLM`                               | Arctic                                              | `Snowflake/snowflake-arctic-base`, `Snowflake/snowflake-arctic-instruct`, etc.                                                                                               | ✅︎                     |                             |
+| `BaiChuanForCausalLM`                             | Baichuan2, Baichuan                                 | `baichuan-inc/Baichuan2-13B-Chat`, `baichuan-inc/Baichuan-7B`, etc.                                                                                                          | ✅︎                     | ✅︎                          |
+| `BambaForCausalLM`                                | Bamba                                               | `ibm-ai-platform/Bamba-9B-fp8`, `ibm-ai-platform/Bamba-9B`                                                                                                                   |                        |                             |
+| `BloomForCausalLM`                                | BLOOM, BLOOMZ, BLOOMChat                            | `bigscience/bloom`, `bigscience/bloomz`, etc.                                                                                                                                | ✅︎                     |                             |
+| `BartForConditionalGeneration`                    | BART                                                | `facebook/bart-base`, `facebook/bart-large-cnn`, etc.                                                                                                                        |                        |                             |
+| `ChatGLMModel`, `ChatGLMForConditionalGeneration` | ChatGLM                                             | `THUDM/chatglm2-6b`, `THUDM/chatglm3-6b`, `ShieldLM-6B-chatglm3`, etc.                                                                                                       | ✅︎                     | ✅︎                          |
+| `CohereForCausalLM`, `Cohere2ForCausalLM`         | Command-R                                           | `CohereForAI/c4ai-command-r-v01`, `CohereForAI/c4ai-command-r7b-12-2024`, etc.                                                                                               | ✅︎                     | ✅︎                          |
+| `DbrxForCausalLM`                                 | DBRX                                                | `databricks/dbrx-base`, `databricks/dbrx-instruct`, etc.                                                                                                                     | ✅︎                     |                             |
+| `DeciLMForCausalLM`                               | DeciLM                                              | `nvidia/Llama-3_3-Nemotron-Super-49B-v1`, etc.                                                                                                                               | ✅︎                     |                             |
+| `DeepseekForCausalLM`                             | DeepSeek                                            | `deepseek-ai/deepseek-llm-67b-base`, `deepseek-ai/deepseek-llm-7b-chat` etc.                                                                                                 | ✅︎                     |                             |
+| `DeepseekV2ForCausalLM`                           | DeepSeek-V2                                         | `deepseek-ai/DeepSeek-V2`, `deepseek-ai/DeepSeek-V2-Chat` etc.                                                                                                               | ✅︎                     |                             |
+| `DeepseekV3ForCausalLM`                           | DeepSeek-V3                                         | `deepseek-ai/DeepSeek-V3-Base`, `deepseek-ai/DeepSeek-V3` etc.                                                                                                               | ✅︎                     |                             |
+| `ExaoneForCausalLM`                               | EXAONE-3                                            | `LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct`, etc.                                                                                                                                 | ✅︎                     | ✅︎                          |
+| `FalconForCausalLM`                               | Falcon                                              | `tiiuae/falcon-7b`, `tiiuae/falcon-40b`, `tiiuae/falcon-rw-7b`, etc.                                                                                                         | ✅︎                     |                             |
+| `FalconMambaForCausalLM`                          | FalconMamba                                         | `tiiuae/falcon-mamba-7b`, `tiiuae/falcon-mamba-7b-instruct`, etc.                                                                                                            | ✅︎                     | ✅︎                          |
+| `FalconH1ForCausalLM`                             | Falcon-H1                                           | `tiiuae/Falcon-H1-34B-Base`, `tiiuae/Falcon-H1-34B-Instruct`, etc.                                                                                                           | ✅︎                     | ✅︎                          |
+| `GemmaForCausalLM`                                | Gemma                                               | `google/gemma-2b`, `google/gemma-1.1-2b-it`, etc.                                                                                                                            | ✅︎                     | ✅︎                          |
+| `Gemma2ForCausalLM`                               | Gemma 2                                             | `google/gemma-2-9b`, `google/gemma-2-27b`, etc.                                                                                                                              | ✅︎                     | ✅︎                          |
+| `Gemma3ForCausalLM`                               | Gemma 3                                             | `google/gemma-3-1b-it`, etc.                                                                                                                                                 | ✅︎                     | ✅︎                          |
+| `GlmForCausalLM`                                  | GLM-4                                               | `THUDM/glm-4-9b-chat-hf`, etc.                                                                                                                                               | ✅︎                     | ✅︎                          |
+| `Glm4ForCausalLM`                                 | GLM-4-0414                                          | `THUDM/GLM-4-32B-0414`, etc.                                                                                                                                                 | ✅︎                     | ✅︎                          |
+| `GPT2LMHeadModel`                                 | GPT-2                                               | `gpt2`, `gpt2-xl`, etc.                                                                                                                                                      | ✅︎                     |                             |
+| `GPTBigCodeForCausalLM`                           | StarCoder, SantaCoder, WizardCoder                  | `bigcode/starcoder`, `bigcode/gpt_bigcode-santacoder`, `WizardLM/WizardCoder-15B-V1.0`, etc.                                                                                 | ✅︎                     | ✅︎                          |
+| `GPTJForCausalLM`                                 | GPT-J                                               | `EleutherAI/gpt-j-6b`, `nomic-ai/gpt4all-j`, etc.                                                                                                                            | ✅︎                     |                             |
+| `GPTNeoXForCausalLM`                              | GPT-NeoX, Pythia, OpenAssistant, Dolly V2, StableLM | `EleutherAI/gpt-neox-20b`, `EleutherAI/pythia-12b`, `OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5`, `databricks/dolly-v2-12b`, `stabilityai/stablelm-tuned-alpha-7b`, etc. | ✅︎                     |                             |
+| `GraniteForCausalLM`                              | Granite 3.0, Granite 3.1, PowerLM                   | `ibm-granite/granite-3.0-2b-base`, `ibm-granite/granite-3.1-8b-instruct`, `ibm/PowerLM-3b`, etc.                                                                             | ✅︎                     | ✅︎                          |
+| `GraniteMoeForCausalLM`                           | Granite 3.0 MoE, PowerMoE                           | `ibm-granite/granite-3.0-1b-a400m-base`, `ibm-granite/granite-3.0-3b-a800m-instruct`, `ibm/PowerMoE-3b`, etc.                                                                | ✅︎                     | ✅︎                          |
+| `GraniteMoeHybridForCausalLM`                     | Granite 4.0 MoE Hybrid                              | `ibm-granite/granite-4.0-tiny-preview`, etc.                                                                                                                                 | ✅︎                     | ✅︎                          |
+| `GraniteMoeSharedForCausalLM`                     | Granite MoE Shared                                  | `ibm-research/moe-7b-1b-active-shared-experts` (test model)                                                                                                                  | ✅︎                     | ✅︎                          |
+| `GritLM`                                          | GritLM                                              | `parasail-ai/GritLM-7B-vllm`.                                                                                                                                                | ✅︎                     | ✅︎                          |
+| `Grok1ModelForCausalLM`                           | Grok1                                               | `hpcai-tech/grok-1`.                                                                                                                                                         | ✅︎                     | ✅︎                          |
+| `InternLMForCausalLM`                             | InternLM                                            | `internlm/internlm-7b`, `internlm/internlm-chat-7b`, etc.                                                                                                                    | ✅︎                     | ✅︎                          |
+| `InternLM2ForCausalLM`                            | InternLM2                                           | `internlm/internlm2-7b`, `internlm/internlm2-chat-7b`, etc.                                                                                                                  | ✅︎                     | ✅︎                          |
+| `InternLM3ForCausalLM`                            | InternLM3                                           | `internlm/internlm3-8b-instruct`, etc.                                                                                                                                       | ✅︎                     | ✅︎                          |
+| `JAISLMHeadModel`                                 | Jais                                                | `inceptionai/jais-13b`, `inceptionai/jais-13b-chat`, `inceptionai/jais-30b-v3`, `inceptionai/jais-30b-chat-v3`, etc.                                                         | ✅︎                     |                             |
+| `JambaForCausalLM`                                | Jamba                                               | `ai21labs/AI21-Jamba-1.5-Large`, `ai21labs/AI21-Jamba-1.5-Mini`, `ai21labs/Jamba-v0.1`, etc.                                                                                 | ✅︎                     | ✅︎                          |
+| `LlamaForCausalLM`                                | Llama 3.1, Llama 3, Llama 2, LLaMA, Yi              | `meta-llama/Meta-Llama-3.1-405B-Instruct`, `meta-llama/Meta-Llama-3.1-70B`, `meta-llama/Meta-Llama-3-70B-Instruct`, `meta-llama/Llama-2-70b-hf`, `01-ai/Yi-34B`, etc.        | ✅︎                     | ✅︎                          |
+| `MambaForCausalLM`                                | Mamba                                               | `state-spaces/mamba-130m-hf`, `state-spaces/mamba-790m-hf`, `state-spaces/mamba-2.8b-hf`, etc.                                                                               | ✅︎                     |                             |
+| `MiniCPMForCausalLM`                              | MiniCPM                                             | `openbmb/MiniCPM-2B-sft-bf16`, `openbmb/MiniCPM-2B-dpo-bf16`, `openbmb/MiniCPM-S-1B-sft`, etc.                                                                               | ✅︎                     | ✅︎                          |
+| `MiniCPM3ForCausalLM`                             | MiniCPM3                                            | `openbmb/MiniCPM3-4B`, etc.                                                                                                                                                  | ✅︎                     | ✅︎                          |
+| `MistralForCausalLM`                              | Mistral, Mistral-Instruct                           | `mistralai/Mistral-7B-v0.1`, `mistralai/Mistral-7B-Instruct-v0.1`, etc.                                                                                                      | ✅︎                     | ✅︎                          |
+| `MixtralForCausalLM`                              | Mixtral-8x7B, Mixtral-8x7B-Instruct                 | `mistralai/Mixtral-8x7B-v0.1`, `mistralai/Mixtral-8x7B-Instruct-v0.1`, `mistral-community/Mixtral-8x22B-v0.1`, etc.                                                          | ✅︎                     | ✅︎                          |
+| `MPTForCausalLM`                                  | MPT, MPT-Instruct, MPT-Chat, MPT-StoryWriter        | `mosaicml/mpt-7b`, `mosaicml/mpt-7b-storywriter`, `mosaicml/mpt-30b`, etc.                                                                                                   | ✅︎                     |                             |
+| `NemotronForCausalLM`                             | Nemotron-3, Nemotron-4, Minitron                    | `nvidia/Minitron-8B-Base`, `mgoin/Nemotron-4-340B-Base-hf-FP8`, etc.                                                                                                         | ✅︎                     | ✅︎                          |
+| `OLMoForCausalLM`                                 | OLMo                                                | `allenai/OLMo-1B-hf`, `allenai/OLMo-7B-hf`, etc.                                                                                                                             | ✅︎                     |                             |
+| `OLMo2ForCausalLM`                                | OLMo2                                               | `allenai/OLMo-2-0425-1B`, etc.                                                                                                                                               | ✅︎                     |                             |
+| `OLMoEForCausalLM`                                | OLMoE                                               | `allenai/OLMoE-1B-7B-0924`, `allenai/OLMoE-1B-7B-0924-Instruct`, etc.                                                                                                        | ✅︎                     | ✅︎                          |
+| `OPTForCausalLM`                                  | OPT, OPT-IML                                        | `facebook/opt-66b`, `facebook/opt-iml-max-30b`, etc.                                                                                                                         | ✅︎                     |                             |
+| `OrionForCausalLM`                                | Orion                                               | `OrionStarAI/Orion-14B-Base`, `OrionStarAI/Orion-14B-Chat`, etc.                                                                                                             | ✅︎                     |                             |
+| `PhiForCausalLM`                                  | Phi                                                 | `microsoft/phi-1_5`, `microsoft/phi-2`, etc.                                                                                                                                 | ✅︎                     | ✅︎                          |
+| `Phi3ForCausalLM`                                 | Phi-4, Phi-3                                        | `microsoft/Phi-4-mini-instruct`, `microsoft/Phi-4`, `microsoft/Phi-3-mini-4k-instruct`, `microsoft/Phi-3-mini-128k-instruct`, `microsoft/Phi-3-medium-128k-instruct`, etc.   | ✅︎                     | ✅︎                          |
+| `Phi3SmallForCausalLM`                            | Phi-3-Small                                         | `microsoft/Phi-3-small-8k-instruct`, `microsoft/Phi-3-small-128k-instruct`, etc.                                                                                             | ✅︎                     |                             |
+| `PhiMoEForCausalLM`                               | Phi-3.5-MoE                                         | `microsoft/Phi-3.5-MoE-instruct`, etc.                                                                                                                                       | ✅︎                     | ✅︎                          |
+| `PersimmonForCausalLM`                            | Persimmon                                           | `adept/persimmon-8b-base`, `adept/persimmon-8b-chat`, etc.                                                                                                                   | ✅︎                     |                             |
+| `Plamo2ForCausalLM`                               | PLaMo2                                              | `pfnet/plamo-2-1b`, `pfnet/plamo-2-8b`, etc.                                                                                                                                 |                        |                             |
+| `QWenLMHeadModel`                                 | Qwen                                                | `Qwen/Qwen-7B`, `Qwen/Qwen-7B-Chat`, etc.                                                                                                                                    | ✅︎                     | ✅︎                          |
+| `Qwen2ForCausalLM`                                | QwQ, Qwen2                                          | `Qwen/QwQ-32B-Preview`, `Qwen/Qwen2-7B-Instruct`, `Qwen/Qwen2-7B`, etc.                                                                                                      | ✅︎                     | ✅︎                          |
+| `Qwen2MoeForCausalLM`                             | Qwen2MoE                                            | `Qwen/Qwen1.5-MoE-A2.7B`, `Qwen/Qwen1.5-MoE-A2.7B-Chat`, etc.                                                                                                                | ✅︎                     |                             |
+| `Qwen3ForCausalLM`                                | Qwen3                                               | `Qwen/Qwen3-8B`, etc.                                                                                                                                                        | ✅︎                     | ✅︎                          |
+| `Qwen3MoeForCausalLM`                             | Qwen3MoE                                            | `Qwen/Qwen3-30B-A3B`, etc.                                                                                                                                                   | ✅︎                     |                             |
+| `StableLmForCausalLM`                             | StableLM                                            | `stabilityai/stablelm-3b-4e1t`, `stabilityai/stablelm-base-alpha-7b-v2`, etc.                                                                                                | ✅︎                     |                             |
+| `Starcoder2ForCausalLM`                           | Starcoder2                                          | `bigcode/starcoder2-3b`, `bigcode/starcoder2-7b`, `bigcode/starcoder2-15b`, etc.                                                                                             | ✅︎                     |                             |
+| `SolarForCausalLM`                                | Solar Pro                                           | `upstage/solar-pro-preview-instruct`, etc.                                                                                                                                   | ✅︎                     | ✅︎                          |
+| `TeleChat2ForCausalLM`                            | TeleChat2                                           | `Tele-AI/TeleChat2-3B`, `Tele-AI/TeleChat2-7B`, `Tele-AI/TeleChat2-35B`, etc.                                                                                                | ✅︎                     | ✅︎                          |
+| `TeleFLMForCausalLM`                              | TeleFLM                                             | `CofeAI/FLM-2-52B-Instruct-2407`, `CofeAI/Tele-FLM`, etc.                                                                                                                    | ✅︎                     | ✅︎                          |
+| `XverseForCausalLM`                               | XVERSE                                              | `xverse/XVERSE-7B-Chat`, `xverse/XVERSE-13B-Chat`, `xverse/XVERSE-65B-Chat`, etc.                                                                                            | ✅︎                     | ✅︎                          |
+| `MiniMaxText01ForCausalLM`                        | MiniMax-Text                                        | `MiniMaxAI/MiniMax-Text-01`, etc.                                                                                                                                            | ✅︎                     |                             |
+| `Zamba2ForCausalLM`                               | Zamba2                                              | `Zyphra/Zamba2-7B-instruct`, `Zyphra/Zamba2-2.7B-instruct`, `Zyphra/Zamba2-1.2B-instruct`, etc.                                                                              |                        |                             |
+
+!!! note
+    Currently, the ROCm version of vLLM supports Mistral and Mixtral only for context lengths up to 4096.
+
+### Pooling Models
+
+See [this page](pooling-models) for more information on how to use pooling models.
+
+!!! warning
+    Since some model architectures support both generative and pooling tasks,
+    you should explicitly specify the task type to ensure that the model is used in pooling mode instead of generative mode.
+
+#### Text Embedding
+
+Specified using `--task embed`.
+
+| Architecture                                           | Models              | Example HF Models                                                                                                   | [LoRA][lora-adapter]   | [PP][distributed-serving]   |
+|--------------------------------------------------------|---------------------|---------------------------------------------------------------------------------------------------------------------|------------------------|-----------------------------|
+| `BertModel`                                            | BERT-based          | `BAAI/bge-base-en-v1.5`, `Snowflake/snowflake-arctic-embed-xs`, etc.                                                |                        |                             |
+| `Gemma2Model`                                          | Gemma 2-based       | `BAAI/bge-multilingual-gemma2`, etc.                                                                                | ✅︎                     |                             |
+| `GritLM`                                               | GritLM              | `parasail-ai/GritLM-7B-vllm`.                                                                                       | ✅︎                     | ✅︎                          |
+| `GteModel`                                             | Arctic-Embed-2.0-M  | `Snowflake/snowflake-arctic-embed-m-v2.0`.                                                                          | ︎                      |                             |
+| `GteNewModel`                                          | mGTE-TRM (see note) | `Alibaba-NLP/gte-multilingual-base`, etc.                                                                           | ︎                      | ︎                           |
+| `ModernBertModel`                                      | ModernBERT-based    | `Alibaba-NLP/gte-modernbert-base`, etc.                                                                             | ︎                      | ︎                           |
+| `NomicBertModel`                                       | Nomic BERT          | `nomic-ai/nomic-embed-text-v1`, `nomic-ai/nomic-embed-text-v2-moe`, `Snowflake/snowflake-arctic-embed-m-long`, etc. | ︎                      | ︎                           |
+| `LlamaModel`, `LlamaForCausalLM`, `MistralModel`, etc. | Llama-based         | `intfloat/e5-mistral-7b-instruct`, etc.                                                                             | ✅︎                     | ✅︎                          |
+| `Qwen2Model`, `Qwen2ForCausalLM`                       | Qwen2-based         | `ssmits/Qwen2-7B-Instruct-embed-base` (see note), `Alibaba-NLP/gte-Qwen2-7B-instruct` (see note), etc.              | ✅︎                     | ✅︎                          |
+| `RobertaModel`, `RobertaForMaskedLM`                   | RoBERTa-based       | `sentence-transformers/all-roberta-large-v1`, etc.                                                                  |                        |                             |
+
+!!! note
+    `ssmits/Qwen2-7B-Instruct-embed-base` has an improperly defined Sentence Transformers config.
+    You should manually set mean pooling by passing `--override-pooler-config '{"pooling_type": "MEAN"}'`.
+
+!!! note
+    The HF implementation of `Alibaba-NLP/gte-Qwen2-1.5B-instruct` is hardcoded to use causal attention despite what is shown in `config.json`. To compare vLLM vs HF results,
+    you should set `--hf-overrides '{"is_causal": true}'` in vLLM so that the two implementations are consistent with each other.
+
+    For both the 1.5B and 7B variants, you also need to enable `--trust-remote-code` for the correct tokenizer to be loaded.
+    See [relevant issue on HF Transformers](https://github.com/huggingface/transformers/issues/34882).
+
+!!! note
+    `jinaai/jina-embeddings-v3` supports multiple tasks through lora, while vllm temporarily only supports text-matching tasks by merging lora weights.
+
+!!! note
+    The second-generation GTE model (mGTE-TRM) is named `NewModel`. The name `NewModel` is too generic, you should set `--hf-overrides '{"architectures": ["GteNewModel"]}'` to specify the use of the `GteNewModel` architecture.
+
+If your model is not in the above list, we will try to automatically convert the model using
+[as_embedding_model][vllm.model_executor.models.adapters.as_embedding_model]. By default, the embeddings
+of the whole prompt are extracted from the normalized hidden state corresponding to the last token.
+
+#### Reward Modeling
+
+Specified using `--task reward`.
+
+| Architecture              | Models          | Example HF Models                                                      | [LoRA][lora-adapter]   | [PP][distributed-serving]   |
+|---------------------------|-----------------|------------------------------------------------------------------------|------------------------|-----------------------------|
+| `InternLM2ForRewardModel` | InternLM2-based | `internlm/internlm2-1_8b-reward`, `internlm/internlm2-7b-reward`, etc. | ✅︎                     | ✅︎                          |
+| `LlamaForCausalLM`        | Llama-based     | `peiyi9979/math-shepherd-mistral-7b-prm`, etc.                         | ✅︎                     | ✅︎                          |
+| `Qwen2ForRewardModel`     | Qwen2-based     | `Qwen/Qwen2.5-Math-RM-72B`, etc.                                       | ✅︎                     | ✅︎                          |
+
+If your model is not in the above list, we will try to automatically convert the model using
+[as_reward_model][vllm.model_executor.models.adapters.as_reward_model]. By default, we return the hidden states of each token directly.
+
+!!! warning
+    For process-supervised reward models such as `peiyi9979/math-shepherd-mistral-7b-prm`, the pooling config should be set explicitly,
+    e.g.: `--override-pooler-config '{"pooling_type": "STEP", "step_tag_id": 123, "returned_token_ids": [456, 789]}'`.
+
+#### Classification
+
+Specified using `--task classify`.
+
+| Architecture                     | Models   | Example HF Models                      | [LoRA][lora-adapter]   | [PP][distributed-serving]   |
+|----------------------------------|----------|----------------------------------------|------------------------|-----------------------------|
+| `JambaForSequenceClassification` | Jamba    | `ai21labs/Jamba-tiny-reward-dev`, etc. | ✅︎                     | ✅︎                          |
+
+If your model is not in the above list, we will try to automatically convert the model using
+[as_classification_model][vllm.model_executor.models.adapters.as_classification_model]. By default, the class probabilities are extracted from the softmaxed hidden state corresponding to the last token.
+
+#### Sentence Pair Scoring
+
+Specified using `--task score`.
+
+| Architecture                          | Models            | Example HF Models                            |
+|---------------------------------------|-------------------|----------------------------------------------|
+| `BertForSequenceClassification`       | BERT-based        | `cross-encoder/ms-marco-MiniLM-L-6-v2`, etc. |
+| `RobertaForSequenceClassification`    | RoBERTa-based     | `cross-encoder/quora-roberta-base`, etc.     |
+| `XLMRobertaForSequenceClassification` | XLM-RoBERTa-based | `BAAI/bge-reranker-v2-m3`, etc.              |
+
+[](){ #supported-mm-models }
+
+## List of Multimodal Language Models
+
+The following modalities are supported depending on the model:
+
+- **T**ext
+- **I**mage
+- **V**ideo
+- **A**udio
+
+Any combination of modalities joined by `+` are supported.
+
+- e.g.: `T + I` means that the model supports text-only, image-only, and text-with-image inputs.
+
+On the other hand, modalities separated by `/` are mutually exclusive.
+
+- e.g.: `T / I` means that the model supports text-only and image-only inputs, but not text-with-image inputs.
+
+See [this page][multimodal-inputs] on how to pass multi-modal inputs to the model.
+
+!!! warning
+    **To enable multiple multi-modal items per text prompt in vLLM V0**, you have to set `limit_mm_per_prompt` (offline inference)
+    or `--limit-mm-per-prompt` (online serving). For example, to enable passing up to 4 images per text prompt:
+
+    Offline inference:
+
+    ```python
+    from vllm import LLM
+
+    llm = LLM(
+        model="Qwen/Qwen2-VL-7B-Instruct",
+        limit_mm_per_prompt={"image": 4},
+    )
+    ```
+
+    Online serving:
+
+    ```bash
+    vllm serve Qwen/Qwen2-VL-7B-Instruct --limit-mm-per-prompt '{"image":4}'
+    ```
+
+    **This is no longer required if you are using vLLM V1.**
+
+!!! note
+    vLLM currently only supports adding LoRA to the language backbone of multimodal models.
+
+### Generative Models
+
+See [this page][generative-models] for more information on how to use generative models.
+
+#### Text Generation
+
+Specified using `--task generate`.
+
+| Architecture                                 | Models                                                                   | Inputs                                                                | Example HF Models                                                                                                                                       | [LoRA][lora-adapter]   | [PP][distributed-serving]   | [V1](gh-issue:8779)   |
+|----------------------------------------------|--------------------------------------------------------------------------|-----------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------|-----------------------------|-----------------------|
+| `AriaForConditionalGeneration`               | Aria                                                                     | T + I<sup>+</sup>                                                     | `rhymes-ai/Aria`                                                                                                                                        | ✅︎                     | ✅︎                          |                       |
+| `AyaVisionForConditionalGeneration`          | Aya Vision                                                               | T + I<sup>+</sup>                                                     | `CohereForAI/aya-vision-8b`, `CohereForAI/aya-vision-32b`, etc.                                                                                         | ✅︎                     | ✅︎                          |                       |
+| `Blip2ForConditionalGeneration`              | BLIP-2                                                                   | T + I<sup>E</sup>                                                     | `Salesforce/blip2-opt-2.7b`, `Salesforce/blip2-opt-6.7b`, etc.                                                                                          | ✅︎                     | ✅︎                          |                       |
+| `ChameleonForConditionalGeneration`          | Chameleon                                                                | T + I                                                                 | `facebook/chameleon-7b` etc.                                                                                                                            | ✅︎                     | ✅︎                          |                       |
+| `DeepseekVLV2ForCausalLM`<sup>^</sup>        | DeepSeek-VL2                                                             | T + I<sup>+</sup>                                                     | `deepseek-ai/deepseek-vl2-tiny`, `deepseek-ai/deepseek-vl2-small`, `deepseek-ai/deepseek-vl2` etc.                                                      | ✅︎                     | ✅︎                          |                       |
+| `Florence2ForConditionalGeneration`          | Florence-2                                                               | T + I                                                                 | `microsoft/Florence-2-base`, `microsoft/Florence-2-large` etc.                                                                                          |                        |                             |                       |
+| `FuyuForCausalLM`                            | Fuyu                                                                     | T + I                                                                 | `adept/fuyu-8b` etc.                                                                                                                                    | ✅︎                     | ✅︎                          |                       |
+| `Gemma3ForConditionalGeneration`             | Gemma 3                                                                  | T + I<sup>+</sup>                                                     | `google/gemma-3-4b-it`, `google/gemma-3-27b-it`, etc.                                                                                                   | ✅︎                     | ✅︎                          | ⚠️                    |
+| `GLM4VForCausalLM`<sup>^</sup>               | GLM-4V                                                                   | T + I                                                                 | `THUDM/glm-4v-9b`, `THUDM/cogagent-9b-20241220` etc.                                                                                                    | ✅︎                     | ✅︎                          | ✅︎                    |
+| `GraniteSpeechForConditionalGeneration`      | Granite Speech                                                           | T + A                                                                 | `ibm-granite/granite-speech-3.3-8b`                                                                                                                     | ✅︎                     | ✅︎                          | ✅︎                    |
+| `H2OVLChatModel`                             | H2OVL                                                                    | T + I<sup>E+</sup>                                                    | `h2oai/h2ovl-mississippi-800m`, `h2oai/h2ovl-mississippi-2b`, etc.                                                                                      | ✅︎                     | ✅︎\*                        |                       |
+| `Idefics3ForConditionalGeneration`           | Idefics3                                                                 | T + I                                                                 | `HuggingFaceM4/Idefics3-8B-Llama3` etc.                                                                                                                 | ✅︎                     | ✅︎                          |                       |
+| `InternVLChatModel`                          | InternVL 3.0, InternVideo 2.5, InternVL 2.5, Mono-InternVL, InternVL 2.0 | T + I<sup>E+</sup>                                                    | `OpenGVLab/InternVL3-9B`, `OpenGVLab/InternVideo2_5_Chat_8B`, `OpenGVLab/InternVL2_5-4B`, `OpenGVLab/Mono-InternVL-2B`, `OpenGVLab/InternVL2-4B`, etc.  | ✅︎                     | ✅︎                          |                       |
+| `KimiVLForConditionalGeneration`             | Kimi-VL-A3B-Instruct, Kimi-VL-A3B-Thinking                               | T + I<sup>+</sup>                                                     | `moonshotai/Kimi-VL-A3B-Instruct`, `moonshotai/Kimi-VL-A3B-Thinking`                                                                                    | ✅︎                     |                             |                       |
+| `Llama4ForConditionalGeneration`             | Llama 4                                                                  | T + I<sup>+</sup>                                                     | `meta-llama/Llama-4-Scout-17B-16E-Instruct`, `meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8`, `meta-llama/Llama-4-Maverick-17B-128E-Instruct`, etc. | ✅︎                     | ✅︎                          |                       |
+| `LlavaForConditionalGeneration`              | LLaVA-1.5                                                                | T + I<sup>E+</sup>                                                    | `llava-hf/llava-1.5-7b-hf`, `TIGER-Lab/Mantis-8B-siglip-llama3` (see note), etc.                                                                        | ✅︎                     | ✅︎                          |                       |
+| `LlavaNextForConditionalGeneration`          | LLaVA-NeXT                                                               | T + I<sup>E+</sup>                                                    | `llava-hf/llava-v1.6-mistral-7b-hf`, `llava-hf/llava-v1.6-vicuna-7b-hf`, etc.                                                                           | ✅︎                     | ✅︎                          |                       |
+| `LlavaNextVideoForConditionalGeneration`     | LLaVA-NeXT-Video                                                         | T + V                                                                 | `llava-hf/LLaVA-NeXT-Video-7B-hf`, etc.                                                                                                                 | ✅︎                     | ✅︎                          |                       |
+| `LlavaOnevisionForConditionalGeneration`     | LLaVA-Onevision                                                          | T + I<sup>+</sup> + V<sup>+</sup>                                     | `llava-hf/llava-onevision-qwen2-7b-ov-hf`, `llava-hf/llava-onevision-qwen2-0.5b-ov-hf`, etc.                                                            | ✅︎                     | ✅︎                          |                       |
+| `MiniCPMO`                                   | MiniCPM-O                                                                | T + I<sup>E+</sup> + V<sup>E+</sup> + A<sup>E+</sup>                  | `openbmb/MiniCPM-o-2_6`, etc.                                                                                                                           | ✅︎                     | ✅︎                          | ✅︎                    |
+| `MiniCPMV`                                   | MiniCPM-V                                                                | T + I<sup>E+</sup> + V<sup>E+</sup>                                   | `openbmb/MiniCPM-V-2` (see note), `openbmb/MiniCPM-Llama3-V-2_5`, `openbmb/MiniCPM-V-2_6`, etc.                                                         | ✅︎                     | ✅︎                          | ✅︎                    |
+| `MiniMaxVL01ForConditionalGeneration`        | MiniMax-VL                                                               | T + I<sup>E+</sup>                                                    | `MiniMaxAI/MiniMax-VL-01`, etc.                                                                                                                         | ✅︎                     | ✅︎                          |                       |
+| `Mistral3ForConditionalGeneration`           | Mistral3                                                                 | T + I<sup>+</sup>                                                     | `mistralai/Mistral-Small-3.1-24B-Instruct-2503`, etc.                                                                                                   | ✅︎                     | ✅︎                          | ✅︎                    |
+| `MllamaForConditionalGeneration`             | Llama 3.2                                                                | T + I<sup>+</sup>                                                     | `meta-llama/Llama-3.2-90B-Vision-Instruct`, `meta-llama/Llama-3.2-11B-Vision`, etc.                                                                     |                        |                             |                       |
+| `MolmoForCausalLM`                           | Molmo                                                                    | T + I<sup>+</sup>                                                     | `allenai/Molmo-7B-D-0924`, `allenai/Molmo-7B-O-0924`, etc.                                                                                              | ✅︎                     | ✅︎                          | ✅︎                    |
+| `NVLM_D_Model`                               | NVLM-D 1.0                                                               | T + I<sup>+</sup>                                                     | `nvidia/NVLM-D-72B`, etc.                                                                                                                               | ✅︎                     | ✅︎                          |                       |
+| `Ovis`                                       | Ovis2, Ovis1.6                                                           | T + I<sup>+</sup>                                                     | `AIDC-AI/Ovis2-1B`, `AIDC-AI/Ovis1.6-Llama3.2-3B`, etc.                                                                                                 | ✅︎                     |                             |                       |
+| `PaliGemmaForConditionalGeneration`          | PaliGemma, PaliGemma 2                                                   | T + I<sup>E</sup>                                                     | `google/paligemma-3b-pt-224`, `google/paligemma-3b-mix-224`, `google/paligemma2-3b-ft-docci-448`, etc.                                                  | ✅︎                     | ⚠️                          |                       |
+| `Phi3VForCausalLM`                           | Phi-3-Vision, Phi-3.5-Vision                                             | T + I<sup>E+</sup>                                                    | `microsoft/Phi-3-vision-128k-instruct`, `microsoft/Phi-3.5-vision-instruct`, etc.                                                                       | ✅︎                     | ✅︎                          |                       |
+| `Phi4MMForCausalLM`                          | Phi-4-multimodal                                                         | T + I<sup>+</sup> / T + A<sup>+</sup> / I<sup>+</sup> + A<sup>+</sup> | `microsoft/Phi-4-multimodal-instruct`, etc.                                                                                                             | ✅︎                     | ✅︎                          |                       |
+| `PixtralForConditionalGeneration`            | Pixtral                                                                  | T + I<sup>+</sup>                                                     | `mistralai/Mistral-Small-3.1-24B-Instruct-2503`, `mistral-community/pixtral-12b`, etc.                                                                  | ✅︎                     | ✅︎                          |                       |
+| `QwenVLForConditionalGeneration`<sup>^</sup> | Qwen-VL                                                                  | T + I<sup>E+</sup>                                                    | `Qwen/Qwen-VL`, `Qwen/Qwen-VL-Chat`, etc.                                                                                                               | ✅︎                     | ✅︎                          | ✅︎                    |
+| `Qwen2AudioForConditionalGeneration`         | Qwen2-Audio                                                              | T + A<sup>+</sup>                                                     | `Qwen/Qwen2-Audio-7B-Instruct`                                                                                                                          | ✅︎                     | ✅︎                          |                       |
+| `Qwen2VLForConditionalGeneration`            | QVQ, Qwen2-VL                                                            | T + I<sup>E+</sup> + V<sup>E+</sup>                                   | `Qwen/QVQ-72B-Preview`, `Qwen/Qwen2-VL-7B-Instruct`, `Qwen/Qwen2-VL-72B-Instruct`, etc.                                                                 | ✅︎                     | ✅︎                          | ✅︎                    |
+| `Qwen2_5_VLForConditionalGeneration`         | Qwen2.5-VL                                                               | T + I<sup>E+</sup> + V<sup>E+</sup>                                   | `Qwen/Qwen2.5-VL-3B-Instruct`, `Qwen/Qwen2.5-VL-72B-Instruct`, etc.                                                                                     | ✅︎                     | ✅︎                          | ✅︎                    |
+| `Qwen2_5OmniThinkerForConditionalGeneration` | Qwen2.5-Omni                                                             | T + I<sup>E+</sup> + V<sup>E+</sup> + A<sup>+</sup>                   | `Qwen/Qwen2.5-Omni-7B`                                                                                                                                  | ✅︎                     | ✅︎\*                        |                       |
+| `SkyworkR1VChatModel`                        | Skywork-R1V-38B                                                          | T + I                                                                 | `Skywork/Skywork-R1V-38B`                                                                                                                               | ✅︎                     | ✅︎                          |                       |
+| `SmolVLMForConditionalGeneration`            | SmolVLM2                                                                 | T + I                                                                 | `SmolVLM2-2.2B-Instruct`                                                                                                                                | ✅︎                     | ✅︎                          |                       |
+
+<sup>^</sup> You need to set the architecture name via `--hf-overrides` to match the one in vLLM.  
+&nbsp;&nbsp;&nbsp;&nbsp;• For example, to use DeepSeek-VL2 series models:  
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`--hf-overrides '{"architectures": ["DeepseekVLV2ForCausalLM"]}'`  
+<sup>E</sup> Pre-computed embeddings can be inputted for this modality.  
+<sup>+</sup> Multiple items can be inputted per text prompt for this modality.
+
+!!! warning
+    Both V0 and V1 support `Gemma3ForConditionalGeneration` for text-only inputs.
+    However, there are differences in how they handle text + image inputs:
+
+    V0 correctly implements the model's attention pattern:
+    - Uses bidirectional attention between the image tokens corresponding to the same image
+    - Uses causal attention for other tokens
+    - Implemented via (naive) PyTorch SDPA with masking tensors
+    - Note: May use significant memory for long prompts with image
+
+    V1 currently uses a simplified attention pattern:
+    - Uses causal attention for all tokens, including image tokens
+    - Generates reasonable outputs but does not match the original model's attention for text + image inputs, especially when `{"do_pan_and_scan": true}`
+    - Will be updated in the future to support the correct behavior
+
+    This limitation exists because the model's mixed attention pattern (bidirectional for images, causal otherwise) is not yet supported by vLLM's attention backends.
+
+!!! note
+    `h2oai/h2ovl-mississippi-2b` will be available in V1 once we support head size 80.
+
+!!! note
+    To use `TIGER-Lab/Mantis-8B-siglip-llama3`, you have to pass `--hf_overrides '{"architectures": ["MantisForConditionalGeneration"]}'` when running vLLM.
+
+!!! warning
+    The output quality of `AllenAI/Molmo-7B-D-0924` (especially in object localization tasks) has deteriorated in recent updates.
+
+    For the best results, we recommend using the following dependency versions (tested on A10 and L40):
+
+    ```text
+    # Core vLLM-compatible dependencies with Molmo accuracy setup (tested on L40)
+    torch==2.5.1
+    torchvision==0.20.1
+    transformers==4.48.1
+    tokenizers==0.21.0
+    tiktoken==0.7.0
+    vllm==0.7.0
+
+    # Optional but recommended for improved performance and stability
+    triton==3.1.0
+    xformers==0.0.28.post3
+    uvloop==0.21.0
+    protobuf==5.29.3
+    openai==1.60.2
+    opencv-python-headless==4.11.0.86
+    pillow==10.4.0
+
+    # Installed FlashAttention (for float16 only)
+    flash-attn>=2.5.6  # Not used in float32, but should be documented
+    ```
+
+    **Note:** Make sure you understand the security implications of using outdated packages.
+
+!!! note
+    The official `openbmb/MiniCPM-V-2` doesn't work yet, so we need to use a fork (`HwwwH/MiniCPM-V-2`) for now.
+    For more details, please see: <gh-pr:4087#issuecomment-2250397630>
+
+!!! warning
+    Our PaliGemma implementations have the same problem as Gemma 3 (see above) for both V0 and V1.
+
+!!! note
+    To use Qwen2.5-Omni, you have to install Hugging Face Transformers library from source via
+    `pip install git+https://github.com/huggingface/transformers.git`.
+
+    Read audio from video pre-processing is currently supported on V0 (but not V1), because overlapping modalities is not yet supported in V1.
+    `--mm-processor-kwargs '{"use_audio_in_video": true}'`.
+
+### Pooling Models
+
+See [this page](pooling-models) for more information on how to use pooling models.
+
+!!! warning
+    Since some model architectures support both generative and pooling tasks,
+    you should explicitly specify the task type to ensure that the model is used in pooling mode instead of generative mode.
+
+#### Text Embedding
+
+Specified using `--task embed`.
+
+Any text generation model can be converted into an embedding model by passing `--task embed`.
+
+!!! note
+    To get the best results, you should use pooling models that are specifically trained as such.
+
+The following table lists those that are tested in vLLM.
+
+| Architecture                        | Models             | Inputs   | Example HF Models        | [LoRA][lora-adapter]   | [PP][distributed-serving]   |
+|-------------------------------------|--------------------|----------|--------------------------|------------------------|-----------------------------|
+| `LlavaNextForConditionalGeneration` | LLaVA-NeXT-based   | T / I    | `royokong/e5-v`          | ✅︎                     |                             |
+| `Phi3VForCausalLM`                  | Phi-3-Vision-based | T + I    | `TIGER-Lab/VLM2Vec-Full` | 🚧                      | ✅︎                          |
+
+#### Transcription
+
+Specified using `--task transcription`.
+
+Speech2Text models trained specifically for Automatic Speech Recognition.
+
+| Architecture   | Models   | Example HF Models   | [LoRA][lora-adapter]   | [PP][distributed-serving]   |
+|----------------|----------|---------------------|------------------------|-----------------------------|
+
+---
+
+## Model Support Policy
+
+At vLLM, we are committed to facilitating the integration and support of third-party models within our ecosystem. Our approach is designed to balance the need for robustness and the practical limitations of supporting a wide range of models. Here’s how we manage third-party model support:
+
+1. **Community-Driven Support**: We encourage community contributions for adding new models. When a user requests support for a new model, we welcome pull requests (PRs) from the community. These contributions are evaluated primarily on the sensibility of the output they generate, rather than strict consistency with existing implementations such as those in transformers. **Call for contribution:** PRs coming directly from model vendors are greatly appreciated!
+
+2. **Best-Effort Consistency**: While we aim to maintain a level of consistency between the models implemented in vLLM and other frameworks like transformers, complete alignment is not always feasible. Factors like acceleration techniques and the use of low-precision computations can introduce discrepancies. Our commitment is to ensure that the implemented models are functional and produce sensible results.
+
+    !!! tip
+        When comparing the output of `model.generate` from Hugging Face Transformers with the output of `llm.generate` from vLLM, note that the former reads the model's generation config file (i.e., [generation_config.json](https://github.com/huggingface/transformers/blob/19dabe96362803fb0a9ae7073d03533966598b17/src/transformers/generation/utils.py#L1945)) and applies the default parameters for generation, while the latter only uses the parameters passed to the function. Ensure all sampling parameters are identical when comparing outputs.
+
+3. **Issue Resolution and Model Updates**: Users are encouraged to report any bugs or issues they encounter with third-party models. Proposed fixes should be submitted via PRs, with a clear explanation of the problem and the rationale behind the proposed solution. If a fix for one model impacts another, we rely on the community to highlight and address these cross-model dependencies. Note: for bugfix PRs, it is good etiquette to inform the original author to seek their feedback.
+
+4. **Monitoring and Updates**: Users interested in specific models should monitor the commit history for those models (e.g., by tracking changes in the main/vllm/model_executor/models directory). This proactive approach helps users stay informed about updates and changes that may affect the models they use.
+
+5. **Selective Focus**: Our resources are primarily directed towards models with significant user interest and impact. Models that are less frequently used may receive less attention, and we rely on the community to play a more active role in their upkeep and improvement.
+
+Through this approach, vLLM fosters a collaborative environment where both the core development team and the broader community contribute to the robustness and diversity of the third-party models supported in our ecosystem.
+
+Note that, as an inference engine, vLLM does not introduce new models. Therefore, all models supported by vLLM are third-party models in this regard.
+
+We have the following levels of testing for models:
+
+1. **Strict Consistency**: We compare the output of the model with the output of the model in the HuggingFace Transformers library under greedy decoding. This is the most stringent test. Please refer to [models tests](https://github.com/vllm-project/vllm/blob/main/tests/models) for the models that have passed this test.
+2. **Output Sensibility**: We check if the output of the model is sensible and coherent, by measuring the perplexity of the output and checking for any obvious errors. This is a less stringent test.
+3. **Runtime Functionality**: We check if the model can be loaded and run without errors. This is the least stringent test. Please refer to [functionality tests](gh-dir:tests) and [examples](gh-dir:examples) for the models that have passed this test.
+4. **Community Feedback**: We rely on the community to provide feedback on the models. If a model is broken or not working as expected, we encourage users to raise issues to report it or open pull requests to fix it. The rest of the models fall under this category.
diff --git a/docs/source/performance/benchmarks.md b/docs/performance/benchmarks.md
similarity index 86%
rename from docs/source/performance/benchmarks.md
rename to docs/performance/benchmarks.md
index 39dc470a1c708..00505fc6f2a98 100644
--- a/docs/source/performance/benchmarks.md
+++ b/docs/performance/benchmarks.md
@@ -1,13 +1,14 @@
-(benchmarks)=
-
-# Benchmark Suites
+---
+title: Benchmark Suites
+---
+[](){ #benchmarks }
 
 vLLM contains two sets of benchmarks:
 
-- [Performance benchmarks](#performance-benchmarks)
-- [Nightly benchmarks](#nightly-benchmarks)
+- [Performance benchmarks][performance-benchmarks]
+- [Nightly benchmarks][nightly-benchmarks]
 
-(performance-benchmarks)=
+[](){ #performance-benchmarks }
 
 ## Performance Benchmarks
 
@@ -17,7 +18,7 @@ The latest performance results are hosted on the public [vLLM Performance Dashbo
 
 More information on the performance benchmarks and their parameters can be found [here](gh-file:.buildkite/nightly-benchmarks/performance-benchmarks-descriptions.md).
 
-(nightly-benchmarks)=
+[](){ #nightly-benchmarks }
 
 ## Nightly Benchmarks
 
diff --git a/docs/source/performance/optimization.md b/docs/performance/optimization.md
similarity index 98%
rename from docs/source/performance/optimization.md
rename to docs/performance/optimization.md
index 4160f07849626..57e01a384b524 100644
--- a/docs/source/performance/optimization.md
+++ b/docs/performance/optimization.md
@@ -1,6 +1,7 @@
-(optimization-and-tuning)=
-
-# Optimization and Tuning
+---
+title: Optimization and Tuning
+---
+[](){ #optimization-and-tuning }
 
 This guide covers optimization strategies and performance tuning for vLLM V1.
 
@@ -26,7 +27,7 @@ You can monitor the number of preemption requests through Prometheus metrics exp
 
 In vLLM V1, the default preemption mode is `RECOMPUTE` rather than `SWAP`, as recomputation has lower overhead in the V1 architecture.
 
-(chunked-prefill)=
+[](){ #chunked-prefill }
 
 ## Chunked Prefill
 
diff --git a/docs/source/serving/distributed_serving.md b/docs/serving/distributed_serving.md
similarity index 73%
rename from docs/source/serving/distributed_serving.md
rename to docs/serving/distributed_serving.md
index c285ef3e8e1c1..259af5cabcb8f 100644
--- a/docs/source/serving/distributed_serving.md
+++ b/docs/serving/distributed_serving.md
@@ -1,6 +1,7 @@
-(distributed-serving)=
-
-# Distributed Inference and Serving
+---
+title: Distributed Inference and Serving
+---
+[](){ #distributed-serving }
 
 ## How to decide the distributed inference strategy?
 
@@ -14,9 +15,8 @@ In short, you should increase the number of GPUs and the number of nodes until y
 
 After adding enough GPUs and nodes to hold the model, you can run vLLM first, which will print some logs like `# GPU blocks: 790`. Multiply the number by `16` (the block size), and you can get roughly the maximum number of tokens that can be served on the current configuration. If this number is not satisfying, e.g. you want higher throughput, you can further increase the number of GPUs or nodes, until the number of blocks is enough.
 
-:::{note}
-There is one edge case: if the model fits in a single node with multiple GPUs, but the number of GPUs cannot divide the model size evenly, you can use pipeline parallelism, which splits the model along layers and supports uneven splits. In this case, the tensor parallel size should be 1 and the pipeline parallel size should be the number of GPUs.
-:::
+!!! note
+    There is one edge case: if the model fits in a single node with multiple GPUs, but the number of GPUs cannot divide the model size evenly, you can use pipeline parallelism, which splits the model along layers and supports uneven splits. In this case, the tensor parallel size should be 1 and the pipeline parallel size should be the number of GPUs.
 
 ## Running vLLM on a single node
 
@@ -77,13 +77,11 @@ bash run_cluster.sh \
 
 Then you get a ray cluster of **containers**. Note that you need to keep the shells running these commands alive to hold the cluster. Any shell disconnect will terminate the cluster. In addition, please note that the argument `ip_of_head_node` should be the IP address of the head node, which is accessible by all the worker nodes. The IP addresses of each worker node should be specified in the `VLLM_HOST_IP` environment variable, and should be different for each worker node. Please check the network configuration of your cluster to make sure the nodes can communicate with each other through the specified IP addresses.
 
-:::{warning}
-It is considered best practice to set `VLLM_HOST_IP` to an address on a private network segment for the vLLM cluster. The traffic sent here is not encrypted. The endpoints are also exchanging data in a format that could be exploited to execute arbitrary code should a malicious party gain access to the network. Please ensure that this network is not reachable by any untrusted parties.
-:::
+!!! warning
+    It is considered best practice to set `VLLM_HOST_IP` to an address on a private network segment for the vLLM cluster. The traffic sent here is not encrypted. The endpoints are also exchanging data in a format that could be exploited to execute arbitrary code should a malicious party gain access to the network. Please ensure that this network is not reachable by any untrusted parties.
 
-:::{warning}
-Since this is a ray cluster of **containers**, all the following commands should be executed in the **containers**, otherwise you are executing the commands on the host machine, which is not connected to the ray cluster. To enter the container, you can use `docker exec -it node /bin/bash`.
-:::
+!!! warning
+    Since this is a ray cluster of **containers**, all the following commands should be executed in the **containers**, otherwise you are executing the commands on the host machine, which is not connected to the ray cluster. To enter the container, you can use `docker exec -it node /bin/bash`.
 
 Then, on any node, use `docker exec -it node /bin/bash` to enter the container, execute `ray status` and `ray list nodes` to check the status of the Ray cluster. You should see the right number of nodes and GPUs.
 
@@ -104,16 +102,13 @@ vllm serve /path/to/the/model/in/the/container \
 
 To make tensor parallel performant, you should make sure the communication between nodes is efficient, e.g. using high-speed network cards like Infiniband. To correctly set up the cluster to use Infiniband, append additional arguments like `--privileged -e NCCL_IB_HCA=mlx5` to the `run_cluster.sh` script. Please contact your system administrator for more information on how to set up the flags. One way to confirm if the Infiniband is working is to run vLLM with `NCCL_DEBUG=TRACE` environment variable set, e.g. `NCCL_DEBUG=TRACE vllm serve ...` and check the logs for the NCCL version and the network used. If you find `[send] via NET/Socket` in the logs, it means NCCL uses raw TCP Socket, which is not efficient for cross-node tensor parallel. If you find `[send] via NET/IB/GDRDMA` in the logs, it means NCCL uses Infiniband with GPU-Direct RDMA, which is efficient.
 
-:::{warning}
-After you start the Ray cluster, you'd better also check the GPU-GPU communication between nodes. It can be non-trivial to set up. Please refer to the [sanity check script](#troubleshooting-incorrect-hardware-driver) for more information. If you need to set some environment variables for the communication configuration, you can append them to the `run_cluster.sh` script, e.g. `-e NCCL_SOCKET_IFNAME=eth0`. Note that setting environment variables in the shell (e.g. `NCCL_SOCKET_IFNAME=eth0 vllm serve ...`) only works for the processes in the same node, not for the processes in the other nodes. Setting environment variables when you create the cluster is the recommended way. See <gh-issue:6803> for more information.
-:::
+!!! warning
+    After you start the Ray cluster, you'd better also check the GPU-GPU communication between nodes. It can be non-trivial to set up. Please refer to the [sanity check script][troubleshooting-incorrect-hardware-driver] for more information. If you need to set some environment variables for the communication configuration, you can append them to the `run_cluster.sh` script, e.g. `-e NCCL_SOCKET_IFNAME=eth0`. Note that setting environment variables in the shell (e.g. `NCCL_SOCKET_IFNAME=eth0 vllm serve ...`) only works for the processes in the same node, not for the processes in the other nodes. Setting environment variables when you create the cluster is the recommended way. See <gh-issue:6803> for more information.
 
-:::{warning}
-Please make sure you downloaded the model to all the nodes (with the same path), or the model is downloaded to some distributed file system that is accessible by all nodes.
+!!! warning
+    Please make sure you downloaded the model to all the nodes (with the same path), or the model is downloaded to some distributed file system that is accessible by all nodes.
 
-When you use huggingface repo id to refer to the model, you should append your huggingface token to the `run_cluster.sh` script, e.g. `-e HF_TOKEN=`. The recommended way is to download the model first, and then use the path to refer to the model.
-:::
+    When you use huggingface repo id to refer to the model, you should append your huggingface token to the `run_cluster.sh` script, e.g. `-e HF_TOKEN=`. The recommended way is to download the model first, and then use the path to refer to the model.
 
-:::{warning}
-If you keep receiving the error message `Error: No available node types can fulfill resource request` but you have enough GPUs in the cluster, chances are your nodes have multiple IP addresses and vLLM cannot find the right one, especially when you are using multi-node inference. Please make sure vLLM and ray use the same IP address. You can set the `VLLM_HOST_IP` environment variable to the right IP address in the `run_cluster.sh` script (different for each node!), and check `ray status` and `ray list nodes` to see the IP address used by Ray. See <gh-issue:7815> for more information.
-:::
+!!! warning
+    If you keep receiving the error message `Error: No available node types can fulfill resource request` but you have enough GPUs in the cluster, chances are your nodes have multiple IP addresses and vLLM cannot find the right one, especially when you are using multi-node inference. Please make sure vLLM and ray use the same IP address. You can set the `VLLM_HOST_IP` environment variable to the right IP address in the `run_cluster.sh` script (different for each node!), and check `ray status` and `ray list nodes` to see the IP address used by Ray. See <gh-issue:7815> for more information.
diff --git a/docs/serving/engine_args.md b/docs/serving/engine_args.md
new file mode 100644
index 0000000000000..fb2689a56391b
--- /dev/null
+++ b/docs/serving/engine_args.md
@@ -0,0 +1,18 @@
+---
+title: Engine Arguments
+---
+[](){ #engine-args }
+
+Engine arguments control the behavior of the vLLM engine.
+
+- For [offline inference][offline-inference], they are part of the arguments to [LLM][vllm.LLM] class.
+- For [online serving][openai-compatible-server], they are part of the arguments to `vllm serve`.
+
+You can look at [EngineArgs][vllm.engine.arg_utils.EngineArgs] and [AsyncEngineArgs][vllm.engine.arg_utils.AsyncEngineArgs] to see the available engine arguments.
+
+However, these classes are a combination of the configuration classes defined in [vllm.config][]. Therefore, we would recommend you read about them there where they are best documented.
+
+For offline inference you will have access to these configuration classes and for online serving you can cross-reference the configs with `vllm serve --help`, which has its arguments grouped by config.
+
+!!! note
+    Additional arguments are available to the [AsyncLLMEngine][vllm.engine.async_llm_engine.AsyncLLMEngine] which is used for online serving. These can be found by running `vllm serve --help`
diff --git a/docs/serving/env_vars.md b/docs/serving/env_vars.md
new file mode 100644
index 0000000000000..f6d548a19d91f
--- /dev/null
+++ b/docs/serving/env_vars.md
@@ -0,0 +1,12 @@
+# Environment Variables
+
+vLLM uses the following environment variables to configure the system:
+
+!!! warning
+    Please note that `VLLM_PORT` and `VLLM_HOST_IP` set the port and ip for vLLM's **internal usage**. It is not the port and ip for the API server. If you use `--host $VLLM_HOST_IP` and `--port $VLLM_PORT` to start the API server, it will not work.
+
+    All environment variables used by vLLM are prefixed with `VLLM_`. **Special care should be taken for Kubernetes users**: please do not name the service as `vllm`, otherwise environment variables set by Kubernetes might conflict with vLLM's environment variables, because [Kubernetes sets environment variables for each service with the capitalized service name as the prefix](https://kubernetes.io/docs/concepts/services-networking/service/#environment-variables).
+
+```python
+--8<-- "vllm/envs.py:env-vars-definition"
+```
diff --git a/docs/source/serving/integrations/langchain.md b/docs/serving/integrations/langchain.md
similarity index 93%
rename from docs/source/serving/integrations/langchain.md
rename to docs/serving/integrations/langchain.md
index 03142d23b145a..14ea6a0443415 100644
--- a/docs/source/serving/integrations/langchain.md
+++ b/docs/serving/integrations/langchain.md
@@ -1,6 +1,7 @@
-(serving-langchain)=
-
-# LangChain
+---
+title: LangChain
+---
+[](){ #serving-langchain }
 
 vLLM is also available via [LangChain](https://github.com/langchain-ai/langchain) .
 
diff --git a/docs/source/serving/integrations/llamaindex.md b/docs/serving/integrations/llamaindex.md
similarity index 91%
rename from docs/source/serving/integrations/llamaindex.md
rename to docs/serving/integrations/llamaindex.md
index 8c72605202cf5..251b7155c5567 100644
--- a/docs/source/serving/integrations/llamaindex.md
+++ b/docs/serving/integrations/llamaindex.md
@@ -1,6 +1,7 @@
-(serving-llamaindex)=
-
-# LlamaIndex
+---
+title: LlamaIndex
+---
+[](){ #serving-llamaindex }
 
 vLLM is also available via [LlamaIndex](https://github.com/run-llama/llama_index) .
 
diff --git a/docs/source/serving/metrics.md b/docs/serving/metrics.md
similarity index 90%
rename from docs/source/serving/metrics.md
rename to docs/serving/metrics.md
index 647ece3f85f06..9ad7253184d9d 100644
--- a/docs/source/serving/metrics.md
+++ b/docs/serving/metrics.md
@@ -4,7 +4,7 @@ vLLM exposes a number of metrics that can be used to monitor the health of the
 system. These metrics are exposed via the `/metrics` endpoint on the vLLM
 OpenAI compatible API server.
 
-You can start the server using Python, or using [Docker](#deployment-docker):
+You can start the server using Python, or using [Docker][deployment-docker]:
 
 ```console
 vllm serve unsloth/Llama-3.2-1B-Instruct
@@ -31,11 +31,9 @@ vllm:iteration_tokens_total_bucket{le="512.0",model_name="unsloth/Llama-3.2-1B-I
 
 The following metrics are exposed:
 
-:::{literalinclude} ../../../vllm/engine/metrics.py
-:end-before: end-metrics-definitions
-:language: python
-:start-after: begin-metrics-definitions
-:::
+```python
+--8<-- "vllm/engine/metrics.py:metrics-definitions"
+```
 
 The following metrics are deprecated and due to be removed in a future version:
 
diff --git a/docs/source/serving/offline_inference.md b/docs/serving/offline_inference.md
similarity index 76%
rename from docs/source/serving/offline_inference.md
rename to docs/serving/offline_inference.md
index 433d2e894dd8d..584d7cd143bc3 100644
--- a/docs/source/serving/offline_inference.md
+++ b/docs/serving/offline_inference.md
@@ -1,10 +1,11 @@
-(offline-inference)=
-
-# Offline Inference
+---
+title: Offline Inference
+---
+[](){ #offline-inference }
 
 You can run vLLM in your own code on a list of prompts.
 
-The offline API is based on the {class}`~vllm.LLM` class.
+The offline API is based on the [LLM][vllm.LLM] class.
 To initialize the vLLM engine, create a new instance of `LLM` and specify the model to run.
 
 For example, the following code downloads the [`facebook/opt-125m`](https://huggingface.co/facebook/opt-125m) model from HuggingFace
@@ -19,23 +20,22 @@ llm = LLM(model="facebook/opt-125m")
 After initializing the `LLM` instance, you can perform model inference using various APIs.
 The available APIs depend on the type of model that is being run:
 
-- [Generative models](#generative-models) output logprobs which are sampled from to obtain the final output text.
-- [Pooling models](#pooling-models) output their hidden states directly.
+- [Generative models][generative-models] output logprobs which are sampled from to obtain the final output text.
+- [Pooling models][pooling-models] output their hidden states directly.
 
 Please refer to the above pages for more details about each API.
 
-:::{seealso}
-[API Reference](#offline-inference-api)
-:::
+!!! info
+    [API Reference][offline-inference-api]
 
-(configuration-options)=
+[](){ #configuration-options }
 
 ## Configuration Options
 
 This section lists the most common options for running the vLLM engine.
-For a full list, refer to the <project:#configuration> page.
+For a full list, refer to the [configuration][configuration] page.
 
-(model-resolution)=
+[](){ #model-resolution }
 
 ### Model resolution
 
@@ -59,9 +59,9 @@ model = LLM(
 )
 ```
 
-Our [list of supported models](#supported-models) shows the model architectures that are recognized by vLLM.
+Our [list of supported models][supported-models] shows the model architectures that are recognized by vLLM.
 
-(reducing-memory-usage)=
+[](){ #reducing-memory-usage }
 
 ### Reducing memory usage
 
@@ -80,18 +80,16 @@ llm = LLM(model="ibm-granite/granite-3.1-8b-instruct",
           tensor_parallel_size=2)
 ```
 
-:::{important}
-To ensure that vLLM initializes CUDA correctly, you should avoid calling related functions (e.g. {func}`torch.cuda.set_device`)
-before initializing vLLM. Otherwise, you may run into an error like `RuntimeError: Cannot re-initialize CUDA in forked subprocess`.
+!!! warning
+    To ensure that vLLM initializes CUDA correctly, you should avoid calling related functions (e.g. [torch.cuda.set_device][])
+    before initializing vLLM. Otherwise, you may run into an error like `RuntimeError: Cannot re-initialize CUDA in forked subprocess`.
 
-To control which devices are used, please instead set the `CUDA_VISIBLE_DEVICES` environment variable.
-:::
+    To control which devices are used, please instead set the `CUDA_VISIBLE_DEVICES` environment variable.
 
-:::{note}
-With tensor parallelism enabled, each process will read the whole model and split it into chunks, which makes the disk reading time even longer (proportional to the size of tensor parallelism).
+!!! note
+    With tensor parallelism enabled, each process will read the whole model and split it into chunks, which makes the disk reading time even longer (proportional to the size of tensor parallelism).
 
-You can convert the model checkpoint to a sharded checkpoint using <gh-file:examples/offline_inference/save_sharded_state.py>. The conversion process might take some time, but later you can load the sharded checkpoint much faster. The model loading time should remain constant regardless of the size of tensor parallelism.
-:::
+    You can convert the model checkpoint to a sharded checkpoint using <gh-file:examples/offline_inference/save_sharded_state.py>. The conversion process might take some time, but later you can load the sharded checkpoint much faster. The model loading time should remain constant regardless of the size of tensor parallelism.
 
 #### Quantization
 
@@ -100,7 +98,7 @@ Quantized models take less memory at the cost of lower precision.
 Statically quantized models can be downloaded from HF Hub (some popular ones are available at [Red Hat AI](https://huggingface.co/RedHatAI))
 and used directly without extra configuration.
 
-Dynamic quantization is also supported via the `quantization` option -- see [here](#quantization-index) for more details.
+Dynamic quantization is also supported via the `quantization` option -- see [here][quantization-index] for more details.
 
 #### Context length and batch size
 
@@ -119,9 +117,8 @@ llm = LLM(model="adept/fuyu-8b",
 
 By default, we optimize model inference using CUDA graphs which take up extra memory in the GPU.
 
-:::{important}
-CUDA graph capture takes up more memory in V1 than in V0.
-:::
+!!! warning
+    CUDA graph capture takes up more memory in V1 than in V0.
 
 You can adjust `compilation_config` to achieve a better balance between inference speed and memory usage:
 
@@ -214,4 +211,4 @@ llm = LLM(model="OpenGVLab/InternVL2-2B",
 ### Performance optimization and tuning
 
 You can potentially improve the performance of vLLM by finetuning various options.
-Please refer to [this guide](#optimization-and-tuning) for more details.
+Please refer to [this guide][optimization-and-tuning] for more details.
diff --git a/docs/source/serving/openai_compatible_server.md b/docs/serving/openai_compatible_server.md
similarity index 61%
rename from docs/source/serving/openai_compatible_server.md
rename to docs/serving/openai_compatible_server.md
index 61f7e98bf1088..27cb9310c516a 100644
--- a/docs/source/serving/openai_compatible_server.md
+++ b/docs/serving/openai_compatible_server.md
@@ -1,10 +1,11 @@
-(openai-compatible-server)=
-
-# OpenAI-Compatible Server
+---
+title: OpenAI-Compatible Server
+---
+[](){ #openai-compatible-server }
 
 vLLM provides an HTTP server that implements OpenAI's [Completions API](https://platform.openai.com/docs/api-reference/completions), [Chat API](https://platform.openai.com/docs/api-reference/chat), and more! This functionality lets you serve models and interact with them using an HTTP client.
 
-In your terminal, you can [install](../getting_started/installation.md) vLLM, then start the server with the [`vllm serve`](#serve-args) command. (You can also use our [Docker](#deployment-docker) image.)
+In your terminal, you can [install](../getting_started/installation.md) vLLM, then start the server with the [`vllm serve`][serve-args] command. (You can also use our [Docker][deployment-docker] image.)
 
 ```bash
 vllm serve NousResearch/Meta-Llama-3-8B-Instruct --dtype auto --api-key token-abc123
@@ -20,58 +21,56 @@ client = OpenAI(
 )
 
 completion = client.chat.completions.create(
-  model="NousResearch/Meta-Llama-3-8B-Instruct",
-  messages=[
-    {"role": "user", "content": "Hello!"}
-  ]
+    model="NousResearch/Meta-Llama-3-8B-Instruct",
+    messages=[
+        {"role": "user", "content": "Hello!"}
+    ]
 )
 
 print(completion.choices[0].message)
 ```
 
-:::{tip}
-vLLM supports some parameters that are not supported by OpenAI, `top_k` for example.
-You can pass these parameters to vLLM using the OpenAI client in the `extra_body` parameter of your requests, i.e. `extra_body={"top_k": 50}` for `top_k`.
-:::
+!!! tip
+    vLLM supports some parameters that are not supported by OpenAI, `top_k` for example.
+    You can pass these parameters to vLLM using the OpenAI client in the `extra_body` parameter of your requests, i.e. `extra_body={"top_k": 50}` for `top_k`.
 
-:::{important}
-By default, the server applies `generation_config.json` from the Hugging Face model repository if it exists. This means the default values of certain sampling parameters can be overridden by those recommended by the model creator.
+!!! warning
+    By default, the server applies `generation_config.json` from the Hugging Face model repository if it exists. This means the default values of certain sampling parameters can be overridden by those recommended by the model creator.
 
-To disable this behavior, please pass `--generation-config vllm` when launching the server.
-:::
+    To disable this behavior, please pass `--generation-config vllm` when launching the server.
 
 ## Supported APIs
 
 We currently support the following OpenAI APIs:
 
-- [Completions API](#completions-api) (`/v1/completions`)
-  - Only applicable to [text generation models](../models/generative_models.md) (`--task generate`).
-  - *Note: `suffix` parameter is not supported.*
-- [Chat Completions API](#chat-api) (`/v1/chat/completions`)
-  - Only applicable to [text generation models](../models/generative_models.md) (`--task generate`) with a [chat template](#chat-template).
-  - *Note: `parallel_tool_calls` and `user` parameters are ignored.*
-- [Embeddings API](#embeddings-api) (`/v1/embeddings`)
-  - Only applicable to [embedding models](../models/pooling_models.md) (`--task embed`).
-- [Transcriptions API](#transcriptions-api) (`/v1/audio/transcriptions`)
-  - Only applicable to Automatic Speech Recognition (ASR) models (OpenAI Whisper) (`--task generate`).
+- [Completions API][completions-api] (`/v1/completions`)
+    - Only applicable to [text generation models](../models/generative_models.md) (`--task generate`).
+    - *Note: `suffix` parameter is not supported.*
+- [Chat Completions API][chat-api] (`/v1/chat/completions`)
+    - Only applicable to [text generation models](../models/generative_models.md) (`--task generate`) with a [chat template][chat-template].
+    - *Note: `parallel_tool_calls` and `user` parameters are ignored.*
+- [Embeddings API][embeddings-api] (`/v1/embeddings`)
+    - Only applicable to [embedding models](../models/pooling_models.md) (`--task embed`).
+- [Transcriptions API][transcriptions-api] (`/v1/audio/transcriptions`)
+    - Only applicable to Automatic Speech Recognition (ASR) models (OpenAI Whisper) (`--task generate`).
 
 In addition, we have the following custom APIs:
 
-- [Tokenizer API](#tokenizer-api) (`/tokenize`, `/detokenize`)
-  - Applicable to any model with a tokenizer.
-- [Pooling API](#pooling-api) (`/pooling`)
-  - Applicable to all [pooling models](../models/pooling_models.md).
-- [Classification API](#classification-api) (`/classify`)
-  - Only applicable to [classification models](../models/pooling_models.md) (`--task classify`).
-- [Score API](#score-api) (`/score`)
-  - Applicable to embedding models and [cross-encoder models](../models/pooling_models.md) (`--task score`).
-- [Re-rank API](#rerank-api) (`/rerank`, `/v1/rerank`, `/v2/rerank`)
-  - Implements [Jina AI's v1 re-rank API](https://jina.ai/reranker/)
-  - Also compatible with [Cohere's v1 & v2 re-rank APIs](https://docs.cohere.com/v2/reference/rerank)
-  - Jina and Cohere's APIs are very similar; Jina's includes extra information in the rerank endpoint's response.
-  - Only applicable to [cross-encoder models](../models/pooling_models.md) (`--task score`).
+- [Tokenizer API][tokenizer-api] (`/tokenize`, `/detokenize`)
+    - Applicable to any model with a tokenizer.
+- [Pooling API][pooling-api] (`/pooling`)
+    - Applicable to all [pooling models](../models/pooling_models.md).
+- [Classification API][classification-api] (`/classify`)
+    - Only applicable to [classification models](../models/pooling_models.md) (`--task classify`).
+- [Score API][score-api] (`/score`)
+    - Applicable to embedding models and [cross-encoder models](../models/pooling_models.md) (`--task score`).
+- [Re-rank API][rerank-api] (`/rerank`, `/v1/rerank`, `/v2/rerank`)
+    - Implements [Jina AI's v1 re-rank API](https://jina.ai/reranker/)
+    - Also compatible with [Cohere's v1 & v2 re-rank APIs](https://docs.cohere.com/v2/reference/rerank)
+    - Jina and Cohere's APIs are very similar; Jina's includes extra information in the rerank endpoint's response.
+    - Only applicable to [cross-encoder models](../models/pooling_models.md) (`--task score`).
 
-(chat-template)=
+[](){ #chat-template }
 
 ## Chat Template
 
@@ -97,10 +96,10 @@ both a `type` and a `text` field. An example is provided below:
 
 ```python
 completion = client.chat.completions.create(
-  model="NousResearch/Meta-Llama-3-8B-Instruct",
-  messages=[
-    {"role": "user", "content": [{"type": "text", "text": "Classify this sentiment: vLLM is wonderful!"}]}
-  ]
+    model="NousResearch/Meta-Llama-3-8B-Instruct",
+    messages=[
+        {"role": "user", "content": [{"type": "text", "text": "Classify this sentiment: vLLM is wonderful!"}]}
+    ]
 )
 ```
 
@@ -111,9 +110,9 @@ request. vLLM provides best-effort support to detect this automatically, which i
 the detected format, which can be one of:
 
 - `"string"`: A string.
-  - Example: `"Hello world"`
+    - Example: `"Hello world"`
 - `"openai"`: A list of dictionaries, similar to OpenAI schema.
-  - Example: `[{"type": "text", "text": "Hello world!"}]`
+    - Example: `[{"type": "text", "text": "Hello world!"}]`
 
 If the result is not what you expect, you can set the `--chat-template-content-format` CLI argument
 to override which format to use.
@@ -126,13 +125,13 @@ Or directly merge them into the JSON payload if you are using HTTP call directly
 
 ```python
 completion = client.chat.completions.create(
-  model="NousResearch/Meta-Llama-3-8B-Instruct",
-  messages=[
-    {"role": "user", "content": "Classify this sentiment: vLLM is wonderful!"}
-  ],
-  extra_body={
-    "guided_choice": ["positive", "negative"]
-  }
+    model="NousResearch/Meta-Llama-3-8B-Instruct",
+    messages=[
+        {"role": "user", "content": "Classify this sentiment: vLLM is wonderful!"}
+    ],
+    extra_body={
+        "guided_choice": ["positive", "negative"]
+    }
 )
 ```
 
@@ -148,29 +147,29 @@ with `--enable-request-id-headers`.
 
 ```python
 completion = client.chat.completions.create(
-  model="NousResearch/Meta-Llama-3-8B-Instruct",
-  messages=[
-    {"role": "user", "content": "Classify this sentiment: vLLM is wonderful!"}
-  ],
-  extra_headers={
-    "x-request-id": "sentiment-classification-00001",
-  }
+    model="NousResearch/Meta-Llama-3-8B-Instruct",
+    messages=[
+        {"role": "user", "content": "Classify this sentiment: vLLM is wonderful!"}
+    ],
+    extra_headers={
+        "x-request-id": "sentiment-classification-00001",
+    }
 )
 print(completion._request_id)
 
 completion = client.completions.create(
-  model="NousResearch/Meta-Llama-3-8B-Instruct",
-  prompt="A robot may not injure a human being",
-  extra_headers={
-    "x-request-id": "completion-test",
-  }
+    model="NousResearch/Meta-Llama-3-8B-Instruct",
+    prompt="A robot may not injure a human being",
+    extra_headers={
+        "x-request-id": "completion-test",
+    }
 )
 print(completion._request_id)
 ```
 
 ## API Reference
 
-(completions-api)=
+[](){ #completions-api }
 
 ### Completions API
 
@@ -181,23 +180,19 @@ Code example: <gh-file:examples/online_serving/openai_completion_client.py>
 
 #### Extra parameters
 
-The following [sampling parameters](#sampling-params) are supported.
+The following [sampling parameters][sampling-params] are supported.
 
-:::{literalinclude} ../../../vllm/entrypoints/openai/protocol.py
-:language: python
-:start-after: begin-completion-sampling-params
-:end-before: end-completion-sampling-params
-:::
+```python
+--8<-- "vllm/entrypoints/openai/protocol.py:completion-sampling-params"
+```
 
 The following extra parameters are supported:
 
-:::{literalinclude} ../../../vllm/entrypoints/openai/protocol.py
-:language: python
-:start-after: begin-completion-extra-params
-:end-before: end-completion-extra-params
-:::
+```python
+--8<-- "vllm/entrypoints/openai/protocol.py:completion-extra-params"
+```
 
-(chat-api)=
+[](){ #chat-api }
 
 ### Chat API
 
@@ -206,37 +201,33 @@ you can use the [official OpenAI Python client](https://github.com/openai/openai
 
 We support both [Vision](https://platform.openai.com/docs/guides/vision)- and
 [Audio](https://platform.openai.com/docs/guides/audio?audio-generation-quickstart-example=audio-in)-related parameters;
-see our [Multimodal Inputs](#multimodal-inputs) guide for more information.
+see our [Multimodal Inputs][multimodal-inputs] guide for more information.
 - *Note: `image_url.detail` parameter is not supported.*
 
 Code example: <gh-file:examples/online_serving/openai_chat_completion_client.py>
 
 #### Extra parameters
 
-The following [sampling parameters](#sampling-params) are supported.
+The following [sampling parameters][sampling-params] are supported.
 
-:::{literalinclude} ../../../vllm/entrypoints/openai/protocol.py
-:language: python
-:start-after: begin-chat-completion-sampling-params
-:end-before: end-chat-completion-sampling-params
-:::
+```python
+--8<-- "vllm/entrypoints/openai/protocol.py:chat-completion-sampling-params"
+```
 
 The following extra parameters are supported:
 
-:::{literalinclude} ../../../vllm/entrypoints/openai/protocol.py
-:language: python
-:start-after: begin-chat-completion-extra-params
-:end-before: end-chat-completion-extra-params
-:::
+```python
+--8<-- "vllm/entrypoints/openai/protocol.py:chat-completion-extra-params"
+```
 
-(embeddings-api)=
+[](){ #embeddings-api }
 
 ### Embeddings API
 
 Our Embeddings API is compatible with [OpenAI's Embeddings API](https://platform.openai.com/docs/api-reference/embeddings);
 you can use the [official OpenAI Python client](https://github.com/openai/openai-python) to interact with it.
 
-If the model has a [chat template](#chat-template), you can replace `inputs` with a list of `messages` (same schema as [Chat API](#chat-api))
+If the model has a [chat template][chat-template], you can replace `inputs` with a list of `messages` (same schema as [Chat API][chat-api])
 which will be treated as a single prompt to the model.
 
 Code example: <gh-file:examples/online_serving/openai_embedding_client.py>
@@ -246,138 +237,117 @@ Code example: <gh-file:examples/online_serving/openai_embedding_client.py>
 You can pass multi-modal inputs to embedding models by defining a custom chat template for the server
 and passing a list of `messages` in the request. Refer to the examples below for illustration.
 
-:::::{tab-set}
-::::{tab-item} VLM2Vec
+=== "VLM2Vec"
 
-To serve the model:
+    To serve the model:
 
-```bash
-vllm serve TIGER-Lab/VLM2Vec-Full --task embed \
-  --trust-remote-code --max-model-len 4096 --chat-template examples/template_vlm2vec.jinja
-```
+    ```bash
+    vllm serve TIGER-Lab/VLM2Vec-Full --task embed \
+      --trust-remote-code --max-model-len 4096 --chat-template examples/template_vlm2vec.jinja
+    ```
 
-:::{important}
-Since VLM2Vec has the same model architecture as Phi-3.5-Vision, we have to explicitly pass `--task embed`
-to run this model in embedding mode instead of text generation mode.
+    !!! warning
+        Since VLM2Vec has the same model architecture as Phi-3.5-Vision, we have to explicitly pass `--task embed`
+        to run this model in embedding mode instead of text generation mode.
 
-The custom chat template is completely different from the original one for this model,
-and can be found here: <gh-file:examples/template_vlm2vec.jinja>
-:::
+        The custom chat template is completely different from the original one for this model,
+        and can be found here: <gh-file:examples/template_vlm2vec.jinja>
 
-Since the request schema is not defined by OpenAI client, we post a request to the server using the lower-level `requests` library:
+    Since the request schema is not defined by OpenAI client, we post a request to the server using the lower-level `requests` library:
 
-```python
-import requests
+    ```python
+    import requests
 
-image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
+    image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
 
-response = requests.post(
-    "http://localhost:8000/v1/embeddings",
-    json={
-        "model": "TIGER-Lab/VLM2Vec-Full",
-        "messages": [{
-            "role": "user",
-            "content": [
-                {"type": "image_url", "image_url": {"url": image_url}},
-                {"type": "text", "text": "Represent the given image."},
-            ],
-        }],
-        "encoding_format": "float",
-    },
-)
-response.raise_for_status()
-response_json = response.json()
-print("Embedding output:", response_json["data"][0]["embedding"])
-```
+    response = requests.post(
+        "http://localhost:8000/v1/embeddings",
+        json={
+            "model": "TIGER-Lab/VLM2Vec-Full",
+            "messages": [{
+                "role": "user",
+                "content": [
+                    {"type": "image_url", "image_url": {"url": image_url}},
+                    {"type": "text", "text": "Represent the given image."},
+                ],
+            }],
+            "encoding_format": "float",
+        },
+    )
+    response.raise_for_status()
+    response_json = response.json()
+    print("Embedding output:", response_json["data"][0]["embedding"])
+    ```
 
-::::
+=== "DSE-Qwen2-MRL"
 
-::::{tab-item} DSE-Qwen2-MRL
+    To serve the model:
 
-To serve the model:
+    ```bash
+    vllm serve MrLight/dse-qwen2-2b-mrl-v1 --task embed \
+      --trust-remote-code --max-model-len 8192 --chat-template examples/template_dse_qwen2_vl.jinja
+    ```
 
-```bash
-vllm serve MrLight/dse-qwen2-2b-mrl-v1 --task embed \
-  --trust-remote-code --max-model-len 8192 --chat-template examples/template_dse_qwen2_vl.jinja
-```
+    !!! warning
+        Like with VLM2Vec, we have to explicitly pass `--task embed`.
 
-:::{important}
-Like with VLM2Vec, we have to explicitly pass `--task embed`.
+        Additionally, `MrLight/dse-qwen2-2b-mrl-v1` requires an EOS token for embeddings, which is handled
+        by a custom chat template: <gh-file:examples/template_dse_qwen2_vl.jinja>
 
-Additionally, `MrLight/dse-qwen2-2b-mrl-v1` requires an EOS token for embeddings, which is handled
-by a custom chat template: <gh-file:examples/template_dse_qwen2_vl.jinja>
-:::
-
-:::{important}
-`MrLight/dse-qwen2-2b-mrl-v1` requires a placeholder image of the minimum image size for text query embeddings. See the full code
-example below for details.
-:::
-
-::::
-
-:::::
+    !!! warning
+        `MrLight/dse-qwen2-2b-mrl-v1` requires a placeholder image of the minimum image size for text query embeddings. See the full code
+        example below for details.
 
 Full example: <gh-file:examples/online_serving/openai_chat_embedding_client_for_multimodal.py>
 
 #### Extra parameters
 
-The following [pooling parameters](#pooling-params) are supported.
+The following [pooling parameters][pooling-params] are supported.
 
-:::{literalinclude} ../../../vllm/entrypoints/openai/protocol.py
-:language: python
-:start-after: begin-embedding-pooling-params
-:end-before: end-embedding-pooling-params
-:::
+```python
+--8<-- "vllm/entrypoints/openai/protocol.py:embedding-pooling-params"
+```
 
 The following extra parameters are supported by default:
 
-:::{literalinclude} ../../../vllm/entrypoints/openai/protocol.py
-:language: python
-:start-after: begin-embedding-extra-params
-:end-before: end-embedding-extra-params
-:::
+```python
+--8<-- "vllm/entrypoints/openai/protocol.py:embedding-extra-params"
+```
 
 For chat-like input (i.e. if `messages` is passed), these extra parameters are supported instead:
 
-:::{literalinclude} ../../../vllm/entrypoints/openai/protocol.py
-:language: python
-:start-after: begin-chat-embedding-extra-params
-:end-before: end-chat-embedding-extra-params
-:::
+```python
+--8<-- "vllm/entrypoints/openai/protocol.py:chat-embedding-extra-params"
+```
 
-(transcriptions-api)=
+[](){ #transcriptions-api }
 
 ### Transcriptions API
 
 Our Transcriptions API is compatible with [OpenAI's Transcriptions API](https://platform.openai.com/docs/api-reference/audio/createTranscription);
 you can use the [official OpenAI Python client](https://github.com/openai/openai-python) to interact with it.
 
-:::{note}
-To use the Transcriptions API, please install with extra audio dependencies using `pip install vllm[audio]`.
-:::
+!!! note
+    To use the Transcriptions API, please install with extra audio dependencies using `pip install vllm[audio]`.
 
 Code example: <gh-file:examples/online_serving/openai_transcription_client.py>
 <!-- TODO: api enforced limits + uploading audios -->
 
 #### Extra Parameters
 
-The following [sampling parameters](#sampling-params) are supported.
+The following [sampling parameters][sampling-params] are supported.
 
-:::{literalinclude} ../../../vllm/entrypoints/openai/protocol.py
-:language: python
-:start-after: begin-transcription-sampling-params
-:end-before: end-transcription-sampling-params
-:::
+```python
+--8<-- "vllm/entrypoints/openai/protocol.py:transcription-sampling-params"
+```
 
 The following extra parameters are supported:
 
-:::{literalinclude} ../../../vllm/entrypoints/openai/protocol.py
-:language: python
-:start-after: begin-transcription-extra-params
-:end-before: end-transcription-extra-params
-:::
+```python
+--8<-- "vllm/entrypoints/openai/protocol.py:transcription-extra-params"
+```
 
-(tokenizer-api)=
+[](){ #tokenizer-api }
 
 ### Tokenizer API
 
@@ -387,17 +357,17 @@ It consists of two endpoints:
 - `/tokenize` corresponds to calling `tokenizer.encode()`.
 - `/detokenize` corresponds to calling `tokenizer.decode()`.
 
-(pooling-api)=
+[](){ #pooling-api }
 
 ### Pooling API
 
 Our Pooling API encodes input prompts using a [pooling model](../models/pooling_models.md) and returns the corresponding hidden states.
 
-The input format is the same as [Embeddings API](#embeddings-api), but the output data can contain an arbitrary nested list, not just a 1-D list of floats.
+The input format is the same as [Embeddings API][embeddings-api], but the output data can contain an arbitrary nested list, not just a 1-D list of floats.
 
 Code example: <gh-file:examples/online_serving/openai_pooling_client.py>
 
-(classification-api)=
+[](){ #classification-api }
 
 ### Classification API
 
@@ -505,23 +475,19 @@ Response:
 
 #### Extra parameters
 
-The following [pooling parameters](#pooling-params) are supported.
+The following [pooling parameters][pooling-params] are supported.
 
-:::{literalinclude} ../../../vllm/entrypoints/openai/protocol.py
-:language: python
-:start-after: begin-classification-pooling-params
-:end-before: end-classification-pooling-params
-:::
+```python
+--8<-- "vllm/entrypoints/openai/protocol.py:classification-pooling-params"
+```
 
 The following extra parameters are supported:
 
-:::{literalinclude} ../../../vllm/entrypoints/openai/protocol.py
-:language: python
-:start-after: begin-classification-extra-params
-:end-before: end-classification-extra-params
-:::
+```python
+--8<-- "vllm/entrypoints/openai/protocol.py:classification-extra-params"
+```
 
-(score-api)=
+[](){ #score-api }
 
 ### Score API
 
@@ -668,23 +634,19 @@ Response:
 
 #### Extra parameters
 
-The following [pooling parameters](#pooling-params) are supported.
+The following [pooling parameters][pooling-params] are supported.
 
-:::{literalinclude} ../../../vllm/entrypoints/openai/protocol.py
-:language: python
-:start-after: begin-score-pooling-params
-:end-before: end-score-pooling-params
-:::
+```python
+--8<-- "vllm/entrypoints/openai/protocol.py:score-pooling-params"
+```
 
 The following extra parameters are supported:
 
-:::{literalinclude} ../../../vllm/entrypoints/openai/protocol.py
-:language: python
-:start-after: begin-score-extra-params
-:end-before: end-score-extra-params
-:::
+```python
+--8<-- "vllm/entrypoints/openai/protocol.py:score-extra-params"
+```
 
-(rerank-api)=
+[](){ #rerank-api }
 
 ### Re-rank API
 
@@ -755,18 +717,14 @@ Response:
 
 #### Extra parameters
 
-The following [pooling parameters](#pooling-params) are supported.
+The following [pooling parameters][pooling-params] are supported.
 
-:::{literalinclude} ../../../vllm/entrypoints/openai/protocol.py
-:language: python
-:start-after: begin-rerank-pooling-params
-:end-before: end-rerank-pooling-params
-:::
+```python
+--8<-- "vllm/entrypoints/openai/protocol.py:rerank-pooling-params"
+```
 
 The following extra parameters are supported:
 
-:::{literalinclude} ../../../vllm/entrypoints/openai/protocol.py
-:language: python
-:start-after: begin-rerank-extra-params
-:end-before: end-rerank-extra-params
-:::
+```python
+--8<-- "vllm/entrypoints/openai/protocol.py:rerank-extra-params"
+```
diff --git a/docs/serving/serve_args.md b/docs/serving/serve_args.md
new file mode 100644
index 0000000000000..16b4b29f45d98
--- /dev/null
+++ b/docs/serving/serve_args.md
@@ -0,0 +1,38 @@
+---
+title: Server Arguments
+---
+[](){ #serve-args }
+
+The `vllm serve` command is used to launch the OpenAI-compatible server.
+
+## CLI Arguments
+
+The `vllm serve` command is used to launch the OpenAI-compatible server.
+To see the available CLI arguments, run `vllm serve --help`!
+
+## Configuration file
+
+You can load CLI arguments via a [YAML](https://yaml.org/) config file.
+The argument names must be the long form of those outlined [above][serve-args].
+
+For example:
+
+```yaml
+# config.yaml
+
+model: meta-llama/Llama-3.1-8B-Instruct
+host: "127.0.0.1"
+port: 6379
+uvicorn-log-level: "info"
+```
+
+To use the above config file:
+
+```bash
+vllm serve --config config.yaml
+```
+
+!!! note
+    In case an argument is supplied simultaneously using command line and the config file, the value from the command line will take precedence.
+    The order of priorities is `command line > config file values > defaults`.
+    e.g. `vllm serve SOME_MODEL --config config.yaml`, SOME_MODEL takes precedence over `model` in config file.
diff --git a/docs/source/serving/usage_stats.md b/docs/serving/usage_stats.md
similarity index 100%
rename from docs/source/serving/usage_stats.md
rename to docs/serving/usage_stats.md
diff --git a/docs/source/_static/custom.css b/docs/source/_static/custom.css
deleted file mode 100644
index 79bd2082b49e8..0000000000000
--- a/docs/source/_static/custom.css
+++ /dev/null
@@ -1,8 +0,0 @@
-.vertical-table-header th.head:not(.stub) {
-    writing-mode: sideways-lr;
-    white-space: nowrap;
-    max-width: 0;
-    p {
-       margin: 0;
-    }
-}
diff --git a/docs/source/_templates/sections/header.html b/docs/source/_templates/sections/header.html
deleted file mode 100644
index 7174431b10272..0000000000000
--- a/docs/source/_templates/sections/header.html
+++ /dev/null
@@ -1,39 +0,0 @@
-<style>
-  .notification-bar {
-    width: 100vw;
-    display: flex;
-    justify-content: center;
-    align-items: center;
-    font-size: 16px;
-    padding: 0 6px 0 6px;
-  }
-  .notification-bar p {
-    margin: 0;
-  }
-  .notification-bar a {
-    font-weight: bold;
-    text-decoration: none;
-  }
-
-  /* Light mode styles (default) */
-  .notification-bar {
-    background-color: #fff3cd;
-    color: #856404;
-  }
-  .notification-bar a {
-    color: #d97706;
-  }
-
-  /* Dark mode styles */
-  html[data-theme=dark] .notification-bar {
-    background-color: #333;
-    color: #ddd;
-  }
-  html[data-theme=dark] .notification-bar a {
-    color: #ffa500; /* Brighter color for visibility */
-  }
-</style>
-
-<div class="notification-bar">
-  <p>You are viewing the latest developer preview docs. <a href="https://docs.vllm.ai/en/stable/">Click here</a> to view docs for the latest stable release.</p>
-</div>
diff --git a/docs/source/api/summary.md b/docs/source/api/summary.md
deleted file mode 100644
index 46de545f9ded4..0000000000000
--- a/docs/source/api/summary.md
+++ /dev/null
@@ -1,133 +0,0 @@
-# Summary
-
-(configuration)=
-
-## Configuration
-
-API documentation for vLLM's configuration classes.
-
-```{autodoc2-summary}
-    vllm.config.ModelConfig
-    vllm.config.CacheConfig
-    vllm.config.TokenizerPoolConfig
-    vllm.config.LoadConfig
-    vllm.config.ParallelConfig
-    vllm.config.SchedulerConfig
-    vllm.config.DeviceConfig
-    vllm.config.SpeculativeConfig
-    vllm.config.LoRAConfig
-    vllm.config.PromptAdapterConfig
-    vllm.config.MultiModalConfig
-    vllm.config.PoolerConfig
-    vllm.config.DecodingConfig
-    vllm.config.ObservabilityConfig
-    vllm.config.KVTransferConfig
-    vllm.config.CompilationConfig
-    vllm.config.VllmConfig
-```
-
-(offline-inference-api)=
-
-## Offline Inference
-
-LLM Class.
-
-```{autodoc2-summary}
-    vllm.LLM
-```
-
-LLM Inputs.
-
-```{autodoc2-summary}
-    vllm.inputs.PromptType
-    vllm.inputs.TextPrompt
-    vllm.inputs.TokensPrompt
-```
-
-## vLLM Engines
-
-Engine classes for offline and online inference.
-
-```{autodoc2-summary}
-    vllm.LLMEngine
-    vllm.AsyncLLMEngine
-```
-
-## Inference Parameters
-
-Inference parameters for vLLM APIs.
-
-(sampling-params)=
-(pooling-params)=
-
-```{autodoc2-summary}
-    vllm.SamplingParams
-    vllm.PoolingParams
-```
-
-(multi-modality)=
-
-## Multi-Modality
-
-vLLM provides experimental support for multi-modal models through the {mod}`vllm.multimodal` package.
-
-Multi-modal inputs can be passed alongside text and token prompts to [supported models](#supported-mm-models)
-via the `multi_modal_data` field in {class}`vllm.inputs.PromptType`.
-
-Looking to add your own multi-modal model? Please follow the instructions listed [here](#supports-multimodal).
-
-```{autodoc2-summary}
-    vllm.multimodal.MULTIMODAL_REGISTRY
-```
-
-### Inputs
-
-User-facing inputs.
-
-```{autodoc2-summary}
-    vllm.multimodal.inputs.MultiModalDataDict
-```
-
-Internal data structures.
-
-```{autodoc2-summary}
-    vllm.multimodal.inputs.PlaceholderRange
-    vllm.multimodal.inputs.NestedTensors
-    vllm.multimodal.inputs.MultiModalFieldElem
-    vllm.multimodal.inputs.MultiModalFieldConfig
-    vllm.multimodal.inputs.MultiModalKwargsItem
-    vllm.multimodal.inputs.MultiModalKwargs
-    vllm.multimodal.inputs.MultiModalInputs
-```
-
-### Data Parsing
-
-```{autodoc2-summary}
-    vllm.multimodal.parse
-```
-
-### Data Processing
-
-```{autodoc2-summary}
-    vllm.multimodal.processing
-```
-
-### Memory Profiling
-
-```{autodoc2-summary}
-    vllm.multimodal.profiling
-```
-
-### Registry
-
-```{autodoc2-summary}
-    vllm.multimodal.registry
-```
-
-## Model Development
-
-```{autodoc2-summary}
-    vllm.model_executor.models.interfaces_base
-    vllm.model_executor.models.interfaces
-    vllm.model_executor.models.adapters
-```
diff --git a/docs/source/autodoc2_docstring_parser.py b/docs/source/autodoc2_docstring_parser.py
deleted file mode 100644
index 41c49ed1c545a..0000000000000
--- a/docs/source/autodoc2_docstring_parser.py
+++ /dev/null
@@ -1,21 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-from docutils import nodes
-from myst_parser.parsers.sphinx_ import MystParser
-from sphinx.ext.napoleon import docstring
-
-
-class NapoleonParser(MystParser):
-
-    def parse(self, input_string: str, document: nodes.document) -> None:
-        # Get the Sphinx configuration
-        config = document.settings.env.config
-
-        parsed_content = str(
-            docstring.GoogleDocstring(
-                str(docstring.NumpyDocstring(input_string, config)),
-                config,
-            ))
-        return super().parse(parsed_content, document)
-
-
-Parser = NapoleonParser
diff --git a/docs/source/community/blog.md b/docs/source/community/blog.md
deleted file mode 100644
index e8030edfa02ee..0000000000000
--- a/docs/source/community/blog.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# vLLM Blog
-
-vLLM blog posts are published [here](https://blog.vllm.ai/).
diff --git a/docs/source/conf.py b/docs/source/conf.py
deleted file mode 100644
index 5620d6de2c59b..0000000000000
--- a/docs/source/conf.py
+++ /dev/null
@@ -1,263 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-
-# Configuration file for the Sphinx documentation builder.
-#
-# This file only contains a selection of the most common options. For a full
-# list see the documentation:
-# https://www.sphinx-doc.org/en/master/usage/configuration.html
-
-# -- Path setup --------------------------------------------------------------
-
-# If extensions (or modules to document with autodoc) are in another directory,
-# add these directories to sys.path here. If the directory is relative to the
-# documentation root, use os.path.abspath to make it absolute, like shown here.
-
-import datetime
-import logging
-import os
-import re
-import sys
-from pathlib import Path
-
-import requests
-
-logger = logging.getLogger(__name__)
-REPO_ROOT = Path(__file__).resolve().parent.parent.parent
-sys.path.append(os.path.abspath(REPO_ROOT))
-
-# -- Project information -----------------------------------------------------
-
-project = 'vLLM'
-copyright = f'{datetime.datetime.now().year}, vLLM Team'
-author = 'the vLLM Team'
-
-# -- General configuration ---------------------------------------------------
-
-# Add any Sphinx extension module names here, as strings. They can be
-# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
-# ones.
-extensions = [
-    "sphinx.ext.napoleon",
-    "sphinx.ext.linkcode",
-    "sphinx.ext.intersphinx",
-    "sphinx_copybutton",
-    "autodoc2",
-    "myst_parser",
-    "sphinxarg.ext",
-    "sphinx_design",
-    "sphinx_togglebutton",
-]
-myst_enable_extensions = [
-    "colon_fence",
-    "fieldlist",
-]
-autodoc2_packages = [
-    {
-        "path": "../../vllm",
-        "exclude_dirs": ["__pycache__", "third_party"],
-    },
-]
-autodoc2_output_dir = "api"
-autodoc2_render_plugin = "myst"
-autodoc2_hidden_objects = ["dunder", "private", "inherited"]
-autodoc2_sort_names = True
-autodoc2_index_template = None
-
-# Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
-
-# List of patterns, relative to source directory, that match files and
-# directories to ignore when looking for source files.
-# This pattern also affects html_static_path and html_extra_path.
-exclude_patterns: list[str] = ["**/*.template.md", "**/*.inc.md"]
-
-# Exclude the prompt "$" when copying code
-copybutton_prompt_text = r"\$ "
-copybutton_prompt_is_regexp = True
-
-# -- Options for HTML output -------------------------------------------------
-
-# The theme to use for HTML and HTML Help pages.  See the documentation for
-# a list of builtin themes.
-#
-html_title = project
-html_theme = 'sphinx_book_theme'
-html_logo = 'assets/logos/vllm-logo-text-light.png'
-html_favicon = 'assets/logos/vllm-logo-only-light.ico'
-html_theme_options = {
-    'path_to_docs': 'docs/source',
-    'repository_url': 'https://github.com/vllm-project/vllm',
-    'use_repository_button': True,
-    'use_edit_page_button': True,
-    # Prevents the full API being added to the left sidebar of every page.
-    # Reduces build time by 2.5x and reduces build size from ~225MB to ~95MB.
-    'collapse_navbar': True,
-    # Makes API visible in the right sidebar on API reference pages.
-    'show_toc_level': 3,
-}
-# Add any paths that contain custom static files (such as style sheets) here,
-# relative to this directory. They are copied after the builtin static files,
-# so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ["_static"]
-html_js_files = ["custom.js"]
-html_css_files = ["custom.css"]
-
-myst_heading_anchors = 2
-myst_url_schemes = {
-    'http': None,
-    'https': None,
-    'mailto': None,
-    'ftp': None,
-    "gh-issue": {
-        "url":
-        "https://github.com/vllm-project/vllm/issues/{{path}}#{{fragment}}",
-        "title": "Issue #{{path}}",
-        "classes": ["github"],
-    },
-    "gh-pr": {
-        "url":
-        "https://github.com/vllm-project/vllm/pull/{{path}}#{{fragment}}",
-        "title": "Pull Request #{{path}}",
-        "classes": ["github"],
-    },
-    "gh-project": {
-        "url": "https://github.com/orgs/vllm-project/projects/{{path}}",
-        "title": "Project #{{path}}",
-        "classes": ["github"],
-    },
-    "gh-dir": {
-        "url": "https://github.com/vllm-project/vllm/tree/main/{{path}}",
-        "title": "{{path}}",
-        "classes": ["github"],
-    },
-    "gh-file": {
-        "url": "https://github.com/vllm-project/vllm/blob/main/{{path}}",
-        "title": "{{path}}",
-        "classes": ["github"],
-    },
-}
-
-# see https://docs.readthedocs.io/en/stable/reference/environment-variables.html # noqa
-READTHEDOCS_VERSION_TYPE = os.environ.get('READTHEDOCS_VERSION_TYPE')
-if READTHEDOCS_VERSION_TYPE == "tag":
-    # remove the warning banner if the version is a tagged release
-    header_file = os.path.join(os.path.dirname(__file__),
-                               "_templates/sections/header.html")
-    # The file might be removed already if the build is triggered multiple times
-    # (readthedocs build both HTML and PDF versions separately)
-    if os.path.exists(header_file):
-        os.remove(header_file)
-
-
-# Generate additional rst documentation here.
-def setup(app):
-    from docs.source.generate_examples import generate_examples
-    generate_examples()
-
-
-_cached_base: str = ""
-_cached_branch: str = ""
-
-
-def get_repo_base_and_branch(pr_number):
-    global _cached_base, _cached_branch
-    if _cached_base and _cached_branch:
-        return _cached_base, _cached_branch
-
-    url = f"https://api.github.com/repos/vllm-project/vllm/pulls/{pr_number}"
-    response = requests.get(url)
-    if response.status_code == 200:
-        data = response.json()
-        _cached_base = data['head']['repo']['full_name']
-        _cached_branch = data['head']['ref']
-        return _cached_base, _cached_branch
-    else:
-        logger.error("Failed to fetch PR details: %s", response)
-        return None, None
-
-
-def linkcode_resolve(domain, info):
-    if domain != 'py':
-        return None
-    if not info['module']:
-        return None
-
-    # Get path from module name
-    file = Path(f"{info['module'].replace('.', '/')}.py")
-    path = REPO_ROOT / file
-    if not path.exists():
-        path = REPO_ROOT / file.with_suffix("") / "__init__.py"
-    if not path.exists():
-        return None
-
-    # Get the line number of the object
-    with open(path) as f:
-        lines = f.readlines()
-    name = info['fullname'].split(".")[-1]
-    pattern = fr"^( {{4}})*((def|class) )?{name}\b.*"
-    for lineno, line in enumerate(lines, 1):
-        if not line or line.startswith("#"):
-            continue
-        if re.match(pattern, line):
-            break
-
-    # If the line number is not found, return None
-    if lineno == len(lines):
-        return None
-
-    # If the line number is found, create the URL
-    filename = path.relative_to(REPO_ROOT)
-    if "checkouts" in path.parts:
-        # a PR build on readthedocs
-        pr_number = REPO_ROOT.name
-        base, branch = get_repo_base_and_branch(pr_number)
-        if base and branch:
-            return f"https://github.com/{base}/blob/{branch}/{filename}#L{lineno}"
-    # Otherwise, link to the source file on the main branch
-    return f"https://github.com/vllm-project/vllm/blob/main/{filename}#L{lineno}"
-
-
-# Mock out external dependencies here, otherwise sphinx-argparse won't work.
-autodoc_mock_imports = [
-    "huggingface_hub",
-    "pydantic",
-    "zmq",
-    "cloudpickle",
-    "aiohttp",
-    "starlette",
-    "blake3",
-    "cpuinfo",
-    "transformers",
-    "psutil",
-    "vllm._C",
-    "PIL",
-    "numpy",
-    "tqdm",
-    # The mocks below are required by
-    # docs/source/serving/openai_compatible_server.md's
-    # vllm.entrypoints.openai.cli_args
-    "openai",
-    "fastapi",
-    "partial_json_parser",
-]
-
-for mock_target in autodoc_mock_imports:
-    if mock_target in sys.modules:
-        logger.info(
-            "Potentially problematic mock target (%s) found; "
-            "autodoc_mock_imports cannot mock modules that have already "
-            "been loaded into sys.modules when the sphinx build starts.",
-            mock_target)
-
-intersphinx_mapping = {
-    "python": ("https://docs.python.org/3", None),
-    "typing_extensions":
-    ("https://typing-extensions.readthedocs.io/en/latest", None),
-    "aiohttp": ("https://docs.aiohttp.org/en/stable", None),
-    "pillow": ("https://pillow.readthedocs.io/en/stable", None),
-    "numpy": ("https://numpy.org/doc/stable", None),
-    "torch": ("https://pytorch.org/docs/stable", None),
-    "psutil": ("https://psutil.readthedocs.io/en/stable", None),
-}
-
-navigation_with_keys = False
diff --git a/docs/source/contributing/model/index.md b/docs/source/contributing/model/index.md
deleted file mode 100644
index 721ee3cd2047c..0000000000000
--- a/docs/source/contributing/model/index.md
+++ /dev/null
@@ -1,27 +0,0 @@
-(new-model)=
-
-# Adding a New Model
-
-This section provides more information on how to integrate a [PyTorch](https://pytorch.org/) model into vLLM.
-
-:::{toctree}
-:caption: Contents
-:maxdepth: 1
-
-basic
-registration
-tests
-multimodal
-:::
-
-:::{note}
-The complexity of adding a new model depends heavily on the model's architecture.
-The process is considerably straightforward if the model shares a similar architecture with an existing model in vLLM.
-However, for models that include new operators (e.g., a new attention mechanism), the process can be a bit more complex.
-:::
-
-:::{tip}
-If you are encountering issues while integrating your model into vLLM, feel free to open a [GitHub issue](https://github.com/vllm-project/vllm/issues)
-or ask on our [developer slack](https://slack.vllm.ai).
-We will be happy to help you out!
-:::
diff --git a/docs/source/contributing/model/multimodal.md b/docs/source/contributing/model/multimodal.md
deleted file mode 100644
index b42536f054d76..0000000000000
--- a/docs/source/contributing/model/multimodal.md
+++ /dev/null
@@ -1,834 +0,0 @@
-(supports-multimodal)=
-
-# Multi-Modal Support
-
-This document walks you through the steps to extend a basic model so that it accepts [multi-modal inputs](#multimodal-inputs).
-
-## 1. Update the base vLLM model
-
-It is assumed that you have already implemented the model in vLLM according to [these steps](#new-model-basic).
-Further update the model as follows:
-
-- Reserve a keyword parameter in {meth}`~torch.nn.Module.forward` for each input tensor that corresponds to a multi-modal input, as shown in the following example:
-
-  ```diff
-    def forward(
-        self,
-        input_ids: torch.Tensor,
-        positions: torch.Tensor,
-  +     pixel_values: torch.Tensor,
-    ) -> SamplerOutput:
-  ```
-  
-  More conveniently, you can simply pass `**kwargs` to the {meth}`~torch.nn.Module.forward` method and retrieve the keyword parameters for multimodal inputs from it.
-
-- Implement {meth}`~vllm.model_executor.models.interfaces.SupportsMultiModal.get_multimodal_embeddings` that returns the embeddings from running the multimodal inputs through the multimodal tokenizer of the model. Below we provide a boilerplate of a typical implementation pattern, but feel free to adjust it to your own needs.
-
-    ```python
-    class YourModelForImage2Seq(nn.Module):
-        ...
-
-        def _process_image_input(self, image_input: YourModelImageInputs) -> torch.Tensor:
-
-            assert self.vision_encoder is not None
-            image_features = self.vision_encoder(image_input)
-            return self.multi_modal_projector(image_features)
-
-        def get_multimodal_embeddings(
-                self, **kwargs: object) -> Optional[MultiModalEmbeddings]:
-
-            # Validate the multimodal input keyword arguments
-            image_input = self._parse_and_validate_image_input(**kwargs)
-            if image_input is None:
-                return None
-
-            # Run multimodal inputs through encoder and projector
-            vision_embeddings = self._process_image_input(image_input)
-            return vision_embeddings
-    ```
-
-    :::{important}
-    The returned `multimodal_embeddings` must be either a **3D {class}`torch.Tensor`** of shape `(num_items, feature_size, hidden_size)`, or a **list / tuple of 2D {class}`torch.Tensor`'s** of shape `(feature_size, hidden_size)`, so that `multimodal_embeddings[i]` retrieves the embeddings generated from the `i`-th multimodal data item (e.g, image) of the request.
-    :::
-
-- Implement {meth}`~vllm.model_executor.models.interfaces.SupportsMultiModal.get_input_embeddings` to merge `multimodal_embeddings` with text embeddings from the `input_ids`. If input processing for the model is implemented correctly (see sections below), then you can leverage the utility function we provide to easily merge the embeddings.
-
-    ```python
-    from .utils import merge_multimodal_embeddings
-
-    class YourModelForImage2Seq(nn.Module):
-        ...
-
-        def get_input_embeddings(
-            self,
-            input_ids: torch.Tensor,
-            multimodal_embeddings: Optional[MultiModalEmbeddings] = None,
-        ) -> torch.Tensor:
-
-            # `get_input_embeddings` should already be implemented for the language 
-            # model as one of the requirements of basic vLLM model implementation.
-            inputs_embeds = self.language_model.get_input_embeddings(input_ids)
-
-            if multimodal_embeddings is not None:
-                inputs_embeds = merge_multimodal_embeddings(
-                    input_ids=input_ids, 
-                    inputs_embeds=inputs_embeds, 
-                    multimodal_embeddings=multimodal_embeddings,
-                    placeholder_token_id=self.config.image_token_index)
-
-            return inputs_embeds
-    ```
-
-- Implement {meth}`~vllm.model_executor.models.interfaces.SupportsMultiModal.get_language_model` getter to provide stable access to the underlying language model.
-
-    ```python
-    class YourModelForImage2Seq(nn.Module):
-        ...
-
-        def get_language_model(self) -> torch.nn.Module:
-            # Change `language_model` according to your implementation.
-            return self.language_model
-    ```
-
-- Once the above steps are done, update the model class with the {class}`~vllm.model_executor.models.interfaces.SupportsMultiModal` interface.
-
-  ```diff
-  + from vllm.model_executor.models.interfaces import SupportsMultiModal
-
-  - class YourModelForImage2Seq(nn.Module):
-  + class YourModelForImage2Seq(nn.Module, SupportsMultiModal):
-  ```
-
-  :::{note}
-  The model class does not have to be named {code}`*ForCausalLM`.
-  Check out [the HuggingFace Transformers documentation](https://huggingface.co/docs/transformers/model_doc/auto#multimodal) for some examples.
-  :::
-
-## 2. Specify processing information
-
-Next, create a subclass of {class}`~vllm.multimodal.processing.BaseProcessingInfo`
-to provide basic information related to HF processing.
-
-### Maximum number of input items
-
-You need to override the abstract method {meth}`~vllm.multimodal.processing.BaseProcessingInfo.get_supported_mm_limits`
-to return the maximum number of input items for each modality supported by the model.
-
-For example, if the model supports any number of images but only one video per prompt:
-
-```python
-def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]:
-    return {"image": None, "video": 1}
-```
-
-## 3. Specify dummy inputs
-
-Then, inherit {class}`~vllm.multimodal.profiling.BaseDummyInputsBuilder` to construct dummy inputs for
-HF processing as well as memory profiling.
-
-### For memory profiling
-
-Override the abstract methods {meth}`~vllm.multimodal.profiling.BaseDummyInputsBuilder.get_dummy_text` and {meth}`~vllm.multimodal.profiling.BaseDummyInputsBuilder.get_dummy_mm_data` to construct dummy inputs for memory profiling. These dummy inputs should result in the worst-case memory usage of the model so that vLLM can reserve the correct amount of memory for it.
-
-Assuming that the memory usage increases with the number of tokens, the dummy inputs can be constructed to maximize the number of output embeddings, which is the same number as placeholder feature tokens.
-
-::::{tab-set}
-:::{tab-item} Basic example: LLaVA
-:sync: llava
-
-Looking at the code of HF's `LlavaForConditionalGeneration`:
-
-```python
-# https://github.com/huggingface/transformers/blob/v4.47.1/src/transformers/models/llava/modeling_llava.py#L530-L544
-n_image_tokens = (input_ids == self.config.image_token_index).sum().item()
-n_image_features = image_features.shape[0] * image_features.shape[1]
-
-if n_image_tokens != n_image_features:
-    raise ValueError(
-        f"Image features and image tokens do not match: tokens: {n_image_tokens}, features {n_image_features}"
-    )
-special_image_mask = (
-    (input_ids == self.config.image_token_index)
-    .unsqueeze(-1)
-    .expand_as(inputs_embeds)
-    .to(inputs_embeds.device)
-)
-image_features = image_features.to(inputs_embeds.device, inputs_embeds.dtype)
-inputs_embeds = inputs_embeds.masked_scatter(special_image_mask, image_features)
-```
-
-The number of placeholder feature tokens per image is `image_features.shape[1]`.
-`image_features` is calculated inside the `get_image_features` method:
-
-```python
-# https://github.com/huggingface/transformers/blob/v4.47.1/src/transformers/models/llava/modeling_llava.py#L290-L300
-image_outputs = self.vision_tower(pixel_values, output_hidden_states=True)
-
-selected_image_feature = image_outputs.hidden_states[vision_feature_layer]
-if vision_feature_select_strategy == "default":
-    selected_image_feature = selected_image_feature[:, 1:]
-elif vision_feature_select_strategy == "full":
-    selected_image_feature = selected_image_feature
-else:
-    raise ValueError(f"Unexpected select feature strategy: {self.config.vision_feature_select_strategy}")
-image_features = self.multi_modal_projector(selected_image_feature)
-return image_features
-```
-
-We can infer that `image_features.shape[1]` is based on `image_outputs.hidden_states.shape[1]` from the vision tower
-(`CLIPVisionModel` for the [`llava-hf/llava-1.5-7b-hf`](https://huggingface.co/llava-hf/llava-1.5-7b-hf) model).
-Moreover, we only need the sequence length (the second dimension of the tensor) to get `image_features.shape[1]`.
-The sequence length is determined by the initial hidden states in `CLIPVisionTransformer` since the attention
-mechanism doesn't change the sequence length of the output hidden states.
-
-```python
-# https://github.com/huggingface/transformers/blob/v4.47.1/src/transformers/models/clip/modeling_clip.py#L1094-L1102
-hidden_states = self.embeddings(pixel_values, interpolate_pos_encoding=interpolate_pos_encoding)
-hidden_states = self.pre_layrnorm(hidden_states)
-
-encoder_outputs = self.encoder(
-    inputs_embeds=hidden_states,
-    output_attentions=output_attentions,
-    output_hidden_states=output_hidden_states,
-    return_dict=return_dict,
-)
-```
-
-To find the sequence length, we turn to the code of `CLIPVisionEmbeddings`:
-
-```python
-# https://github.com/huggingface/transformers/blob/v4.47.1/src/transformers/models/clip/modeling_clip.py#L247-L257
-target_dtype = self.patch_embedding.weight.dtype
-patch_embeds = self.patch_embedding(pixel_values.to(dtype=target_dtype))  # shape = [*, width, grid, grid]
-patch_embeds = patch_embeds.flatten(2).transpose(1, 2)
-
-class_embeds = self.class_embedding.expand(batch_size, 1, -1)
-embeddings = torch.cat([class_embeds, patch_embeds], dim=1)
-if interpolate_pos_encoding:
-    embeddings = embeddings + self.interpolate_pos_encoding(embeddings, height, width)
-else:
-    embeddings = embeddings + self.position_embedding(self.position_ids)
-return embeddings
-```
-
-We can infer that `embeddings.shape[1] == self.num_positions`, where
-
-```python
-# https://github.com/huggingface/transformers/blob/v4.47.1/src/transformers/models/clip/modeling_clip.py#L195-L196
-self.num_patches = (self.image_size // self.patch_size) ** 2
-self.num_positions = self.num_patches + 1
-```
-
-Overall, the number of placeholder feature tokens for an image can be calculated as:
-
-```python
-def get_num_image_tokens(
-    self,
-    *,
-    image_width: int,
-    image_height: int,
-) -> int:
-    hf_config = self.get_hf_config()
-    hf_processor = self.get_hf_processor()
-
-    image_size = hf_config.vision_config.image_size
-    patch_size = hf_config.vision_config.patch_size
-
-    num_image_tokens = (image_size // patch_size) ** 2 + 1
-    if hf_processor.vision_feature_select_strategy == "default":
-        num_image_tokens -= 1
-
-    return num_image_tokens
-```
-
-Notice that the number of image tokens doesn't depend on the image width and height.
-We can simply use a dummy `image_size` to calculate the multimodal profiling data:
-
-```python
-# NOTE: In actuality, this is usually implemented as part of the
-# model's subclass of `BaseProcessingInfo`, but we show it as is
-# here for simplicity.
-def get_image_size_with_most_features(self) -> ImageSize:
-    hf_config = self.get_hf_config()
-    width = height = hf_config.image_size
-    return ImageSize(width=width, height=height)
-
-def get_dummy_mm_data(
-    self,
-    seq_len: int,
-    mm_counts: Mapping[str, int],
-) -> MultiModalDataDict:
-    num_images = mm_counts.get("image", 0)
-
-    target_width, target_height = \
-        self.info.get_image_size_with_most_features()
-
-    return {
-        "image":
-        self._get_dummy_images(width=target_width,
-                               height=target_height,
-                               num_images=num_images)
-    }
-```
-
-For the text, we simply expand the multimodal image token from the model config to match the desired number of images.
-
-```python
-def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str:
-    num_images = mm_counts.get("image", 0)
-
-    processor = self.info.get_hf_processor()
-    image_token = processor.image_token
-
-    return image_token * num_images
-```
-
-:::
-
-:::{tab-item} No input placeholders: Fuyu
-:sync: fuyu
-
-Looking at the code of HF's `FuyuForCausalLM`:
-
-```python
-# https://github.com/huggingface/transformers/blob/v4.48.3/src/transformers/models/fuyu/modeling_fuyu.py#L311-L322
-if image_patches is not None and past_key_values is None:
-    patch_embeddings = [
-        self.vision_embed_tokens(patch.to(self.vision_embed_tokens.weight.dtype))
-        .squeeze(0)
-        .to(inputs_embeds.device)
-        for patch in image_patches
-    ]
-    inputs_embeds = self.gather_continuous_embeddings(
-        word_embeddings=inputs_embeds,
-        continuous_embeddings=patch_embeddings,
-        image_patch_input_indices=image_patches_indices,
-    )
-```
-
-The number of placeholder feature tokens for the `i`th item in the batch is `patch_embeddings[i].shape[0]`,
-which is the same as `image_patches[i].shape[0]`, i.e. `num_total_patches`.
-
-Unlike LLaVA, Fuyu does not define the number of patches inside the modeling file. Where can we get more information?
-Considering that the model input comes from the output of `FuyuProcessor`, let's **look at the preprocessing files**.
-
-The image outputs are obtained by calling `FuyuImageProcessor.preprocess` and then
-`FuyuImageProcessor.preprocess_with_tokenizer_info` inside `FuyuProcessor`.
-
-In `FuyuImageProcessor.preprocess`, the images are resized and padded to the target `FuyuImageProcessor.size`,
-returning the dimensions after resizing (but before padding) as metadata.
-
-```python
-# https://github.com/huggingface/transformers/blob/v4.48.3/src/transformers/models/fuyu/processing_fuyu.py#L541-L544
-image_encoding = self.image_processor.preprocess(images, **output_kwargs["images_kwargs"])
-batch_images = image_encoding["images"]
-image_unpadded_heights = image_encoding["image_unpadded_heights"]
-image_unpadded_widths = image_encoding["image_unpadded_widths"]
-
-# https://github.com/huggingface/transformers/blob/v4.48.3/src/transformers/models/fuyu/image_processing_fuyu.py#L480-L
-if do_resize:
-    batch_images = [
-        [self.resize(image, size=size, input_data_format=input_data_format) for image in images]
-        for images in batch_images
-    ]
-
-image_sizes = [get_image_size(images[0], channel_dim=input_data_format) for images in batch_images]
-image_unpadded_heights = [[image_size[0]] for image_size in image_sizes]
-image_unpadded_widths = [[image_size[1]] for image_size in image_sizes]
-
-if do_pad:
-    batch_images = [
-        [
-            self.pad_image(
-                image,
-                size=size,
-                mode=padding_mode,
-                constant_values=padding_value,
-                input_data_format=input_data_format,
-            )
-            for image in images
-        ]
-        for images in batch_images
-    ]
-```
-
-In `FuyuImageProcessor.preprocess_with_tokenizer_info`, the images are split into patches based on this metadata:
-
-```python
-# https://github.com/huggingface/transformers/blob/v4.48.3/src/transformers/models/fuyu/processing_fuyu.py#L417-L425
-model_image_input = self.image_processor.preprocess_with_tokenizer_info(
-    image_input=tensor_batch_images,
-    image_present=image_present,
-    image_unpadded_h=image_unpadded_heights,
-    image_unpadded_w=image_unpadded_widths,
-    image_placeholder_id=image_placeholder_id,
-    image_newline_id=image_newline_id,
-    variable_sized=True,
-)
-
-# https://github.com/huggingface/transformers/blob/v4.48.3/src/transformers/models/fuyu/image_processing_fuyu.py#L638-L658
-image_height, image_width = image.shape[1], image.shape[2]
-if variable_sized:  # variable_sized=True
-    new_h = min(
-        image_height,
-        math.ceil(image_unpadded_h[batch_index, subseq_index] / patch_height) * patch_height,
-    )
-    new_w = min(
-        image_width,
-        math.ceil(image_unpadded_w[batch_index, subseq_index] / patch_width) * patch_width,
-    )
-    image = image[:, :new_h, :new_w]
-    image_height, image_width = new_h, new_w
-
-num_patches = self.get_num_patches(image_height=image_height, image_width=image_width)
-tensor_of_image_ids = torch.full(
-    [num_patches], image_placeholder_id, dtype=torch.int32, device=image_input.device
-)
-patches = self.patchify_image(image=image.unsqueeze(0)).squeeze(0)
-assert num_patches == patches.shape[0]
-```
-
-The number of patches is in turn defined by `FuyuImageProcessor.get_num_patches`:
-
-```python
-# https://github.com/huggingface/transformers/blob/v4.48.3/src/transformers/models/fuyu/image_processing_fuyu.py#L552-L562
-patch_size = patch_size if patch_size is not None else self.patch_size
-patch_height, patch_width = self.patch_size["height"], self.patch_size["width"]
-
-if image_height % patch_height != 0:
-    raise ValueError(f"{image_height=} must be divisible by {patch_height}")
-if image_width % patch_width != 0:
-    raise ValueError(f"{image_width=} must be divisible by {patch_width}")
-
-num_patches_per_dim_h = image_height // patch_height
-num_patches_per_dim_w = image_width // patch_width
-num_patches = num_patches_per_dim_h * num_patches_per_dim_w
-```
-
-These image patches correspond to placeholder tokens (`|SPEAKER|`). So, we just need to maximize the number of image patches. Since input images are first resized
-to fit within `image_processor.size`, we can maximize the number of image patches by inputting an image with size equal to `image_processor.size`.
-
-```python
-def get_image_size_with_most_features(self) -> ImageSize:
-    image_processor = self.get_image_processor()
-    return ImageSize(width=image_processor.size["width"],
-                        height=image_processor.size["height"])
-```
-
-Fuyu does not expect image placeholders in the inputs to HF processor, so
-the dummy prompt text is empty regardless of the number of images.
-
-```python
-def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str:
-    return ""
-```
-
-For the multimodal image profiling data, the logic is very similar to LLaVA:
-
-```python
-def get_dummy_mm_data(
-    self,
-    seq_len: int,
-    mm_counts: Mapping[str, int],
-) -> MultiModalDataDict:
-    target_width, target_height = \
-        self.info.get_image_size_with_most_features()
-    num_images = mm_counts.get("image", 0)
-
-    return {
-        "image":
-        self._get_dummy_images(width=target_width,
-                               height=target_height,
-                               num_images=num_images)
-    }
-```
-
-:::
-
-::::
-
-## 4. Specify processing details
-
-Afterwards, create a subclass of {class}`~vllm.multimodal.processing.BaseMultiModalProcessor`
-to fill in the missing details about HF processing.
-
-:::{seealso}
-[Multi-Modal Data Processing](#mm-processing)
-:::
-
-### Multi-modal fields
-
-Override {meth}`~vllm.multimodal.processing.BaseMultiModalProcessor._get_mm_fields_config` to
-return a schema of the tensors outputted by the HF processor that are related to the input multi-modal items.
-
-:::::{tab-set}
-::::{tab-item} Basic example: LLaVA
-:sync: llava
-
-The output of `CLIPImageProcessor` is a simple tensor with shape
-`(num_images, num_channels, image_height, image_width)`:
-
-```python
-# https://github.com/huggingface/transformers/blob/v4.47.1/src/transformers/models/clip/image_processing_clip.py#L339-L345
-images = [
-    to_channel_dimension_format(image, data_format, input_channel_dim=input_data_format)
-    for image in all_images
-]
-
-data = {"pixel_values": images}
-return BatchFeature(data=data, tensor_type=return_tensors)
-```
-
-So, we override {meth}`~vllm.multimodal.processing.BaseMultiModalProcessor._get_mm_fields_config` as follows:
-
-```python
-def _get_mm_fields_config(
-    self,
-    hf_inputs: BatchFeature,
-    hf_processor_mm_kwargs: Mapping[str, object],
-) -> Mapping[str, MultiModalFieldConfig]:
-    return dict(
-        pixel_values=MultiModalFieldConfig.batched("image"),
-    )
-```
-
-:::{note}
-Our [actual code](gh-file:vllm/model_executor/models/llava.py) additionally supports
-pre-computed image embeddings, which can be passed to be model via the `image_embeds` argument.
-:::
-
-::::
-
-::::{tab-item} With postprocessing: Fuyu
-:sync: fuyu
-
-The `image_patches` output of `FuyuImageProcessor.preprocess_with_tokenizer_info` concatenates
-the patches from each image belonging to an item in the batch:
-
-```python
-# https://github.com/huggingface/transformers/blob/v4.48.3/src/transformers/models/fuyu/image_processing_fuyu.py#L673-L679
-        image_input_ids.append(tensor_of_image_ids)
-        image_patches.append(patches)
-    else:
-        image_input_ids.append(torch.tensor([], dtype=torch.int32, device=image_input.device))
-
-batch_image_input_ids.append(image_input_ids)
-batch_image_patches.append(image_patches)
-```
-
-The shape of `image_patches` outputted by `FuyuImageProcessor` is therefore
-`(1, num_images, num_patches, patch_width * patch_height * num_channels)`.
-
-In order to support the use of {func}`MultiModalFieldConfig.batched` like in LLaVA,
-we remove the extra batch dimension by overriding {meth}`BaseMultiModalProcessor._call_hf_processor`:
-
-```python
-def _call_hf_processor(
-    self,
-    prompt: str,
-    mm_data: Mapping[str, object],
-    mm_kwargs: Mapping[str, object],
-) -> BatchFeature:
-    processed_outputs = super()._call_hf_processor(
-        prompt=prompt,
-        mm_data=mm_data,
-        mm_kwargs=mm_kwargs,
-    )
-
-    image_patches = processed_outputs.get("image_patches")
-    if image_patches is not None:
-        images = mm_data["images"]
-        assert isinstance(images, list)
-
-        # Original output: (1, num_images, Pn, Px * Py * C)
-        # New output: (num_images, Pn, Px * Py * C)
-        assert (isinstance(image_patches, list)
-                and len(image_patches) == 1)
-        assert (isinstance(image_patches[0], torch.Tensor)
-                and len(image_patches[0]) == len(images))
-
-        processed_outputs["image_patches"] = image_patches[0]
-
-    return processed_outputs
-```
-
-:::{note}
-Our [actual code](gh-file:vllm/model_executor/models/fuyu.py) has special handling
-for text-only inputs to prevent unnecessary warnings from HF processor.
-:::
-
-This lets us override {meth}`~vllm.multimodal.processing.BaseMultiModalProcessor._get_mm_fields_config` as follows:
-
-```python
-def _get_mm_fields_config(
-    self,
-    hf_inputs: BatchFeature,
-    hf_processor_mm_kwargs: Mapping[str, object],
-) -> Mapping[str, MultiModalFieldConfig]:
-    return dict(image_patches=MultiModalFieldConfig.batched("image"))
-```
-
-::::
-
-:::::
-
-### Prompt updates
-
-Override {meth}`~vllm.multimodal.processing.BaseMultiModalProcessor._get_prompt_updates` to
-return a list of {class}`~vllm.multimodal.processing.PromptUpdate` instances.
-
-Each {class}`~vllm.multimodal.processing.PromptUpdate` instance specifies an update operation
-(e.g.: insertion, replacement) performed by the HF processor.
-
-::::{tab-set}
-:::{tab-item} Basic example: LLaVA
-:sync: llava
-
-Looking at HF's `LlavaProcessor`:
-
-```python
-# https://github.com/huggingface/transformers/blob/v4.47.1/src/transformers/models/llava/processing_llava.py#L167-L170
-prompt_strings = []
-for sample in text:
-    sample = sample.replace(self.image_token, self.image_token * num_image_tokens)
-    prompt_strings.append(sample)
-```
-
-It simply repeats each input `image_token` a number of times equal to the number of placeholder feature tokens (`num_image_tokens`).
-Based on this, we override {meth}`~vllm.multimodal.processing.BaseMultiModalProcessor._get_prompt_updates` as follows:
-
-```python
-def _get_prompt_updates(
-    self,
-    mm_items: MultiModalDataItems,
-    hf_processor_mm_kwargs: Mapping[str, object],
-    out_mm_kwargs: MultiModalKwargs,
-) -> Sequence[PromptUpdate]:
-    hf_config = self.info.get_hf_config()
-    image_token_id = hf_config.image_token_index
-
-    def get_replacement(item_idx: int):
-        images = mm_items.get_items("image", ImageProcessorItems)
-
-        image_size = images.get_image_size(item_idx)
-        num_image_tokens = self.info.get_num_image_tokens(
-            image_width=image_size.width,
-            image_height=image_size.height,
-        )
-
-        return [image_token_id] * num_image_tokens
-
-    return [
-        PromptReplacement(
-            modality="image",
-            target=[image_token_id],
-            replacement=get_replacement,
-        ),
-    ]
-```
-
-:::
-
-:::{tab-item} Handling additional tokens: Fuyu
-:sync: fuyu
-
-Recall the layout of feature tokens from Step 2:
-
-```
-|SPEAKER||SPEAKER|...|SPEAKER||NEWLINE|
-|SPEAKER||SPEAKER|...|SPEAKER||NEWLINE|
-...
-|SPEAKER||SPEAKER|...|SPEAKER||NEWLINE|
-```
-
-We define a helper function to return `ncols` and `nrows` directly:
-
-```python
-def get_image_feature_grid_size(
-    self,
-    *,
-    image_width: int,
-    image_height: int,
-) -> tuple[int, int]:
-    image_processor = self.get_image_processor()
-    target_width = image_processor.size["width"]
-    target_height = image_processor.size["height"]
-    patch_width = image_processor.patch_size["width"]
-    patch_height = image_processor.patch_size["height"]
-
-    if not (image_width <= target_width and image_height <= target_height):
-        height_scale_factor = target_height / image_height
-        width_scale_factor = target_width / image_width
-        optimal_scale_factor = min(height_scale_factor, width_scale_factor)
-
-        image_height = int(image_height * optimal_scale_factor)
-        image_width = int(image_width * optimal_scale_factor)
-
-    ncols = math.ceil(image_width / patch_width)
-    nrows = math.ceil(image_height / patch_height)
-    return ncols, nrows
-```
-
-Based on this, we can initially define our replacement tokens as:
-
-```python
-def get_replacement(item_idx: int):
-    images = mm_items.get_items("image", ImageProcessorItems)
-    image_size = images.get_image_size(item_idx)
-
-    ncols, nrows = self.info.get_image_feature_grid_size(
-        image_width=image_size.width,
-        image_height=image_size.height,
-    )
-
-    # `_IMAGE_TOKEN_ID` corresponds to `|SPEAKER|`
-    # `_NEWLINE_TOKEN_ID` corresponds to `|NEWLINE|`
-    return ([_IMAGE_TOKEN_ID] * ncols + [_NEWLINE_TOKEN_ID]) * nrows
-```
-
-However, this is not entirely correct. After `FuyuImageProcessor.preprocess_with_tokenizer_info` is called,
-a BOS token (`<s>`) is also added to the promopt:
-
-```python
-# https://github.com/huggingface/transformers/blob/v4.48.3/src/transformers/models/fuyu/processing_fuyu.py#L417-L435
-model_image_input = self.image_processor.preprocess_with_tokenizer_info(
-    image_input=tensor_batch_images,
-    image_present=image_present,
-    image_unpadded_h=image_unpadded_heights,
-    image_unpadded_w=image_unpadded_widths,
-    image_placeholder_id=image_placeholder_id,
-    image_newline_id=image_newline_id,
-    variable_sized=True,
-)
-prompt_tokens, prompts_length = _tokenize_prompts_with_image_and_batch(
-    tokenizer=self.tokenizer,
-    prompts=prompts,
-    scale_factors=scale_factors,
-    max_tokens_to_generate=self.max_tokens_to_generate,
-    max_position_embeddings=self.max_position_embeddings,
-    add_BOS=True,
-    add_beginning_of_answer_token=True,
-)
-```
-
-To assign the vision embeddings to only the image tokens, instead of a string
-you can return an instance of {class}`~vllm.multimodal.processing.PromptUpdateDetails`:
-
-```python
-hf_config = self.info.get_hf_config()
-bos_token_id = hf_config.bos_token_id  # `<s>`
-assert isinstance(bos_token_id, int)
-
-def get_replacement_fuyu(item_idx: int):
-    images = mm_items.get_items("image", ImageProcessorItems)
-    image_size = images.get_image_size(item_idx)
-
-    ncols, nrows = self.info.get_image_feature_grid_size(
-        image_width=image_size.width,
-        image_height=image_size.height,
-    )
-    image_tokens = ([_IMAGE_TOKEN_ID] * ncols +
-                    [_NEWLINE_TOKEN_ID]) * nrows
-
-    return PromptUpdateDetails.select_token_id(
-        image_tokens + [bos_token_id],
-        embed_token_id=_IMAGE_TOKEN_ID,
-    )
-```
-
-Finally, noticing that the HF processor removes the `|ENDOFTEXT|` token from the tokenized prompt,
-we can search for it to conduct the replacement at the start of the string:
-
-```python
-def _get_prompt_updates(
-    self,
-    mm_items: MultiModalDataItems,
-    hf_processor_mm_kwargs: Mapping[str, object],
-    out_mm_kwargs: MultiModalKwargs,
-) -> Sequence[PromptUpdate]:
-    hf_config = self.info.get_hf_config()
-    bos_token_id = hf_config.bos_token_id
-    assert isinstance(bos_token_id, int)
-
-    tokenizer = self.info.get_tokenizer()
-    eot_token_id = tokenizer.bos_token_id
-    assert isinstance(eot_token_id, int)
-
-    def get_replacement_fuyu(item_idx: int):
-        images = mm_items.get_items("image", ImageProcessorItems)
-        image_size = images.get_image_size(item_idx)
-
-        ncols, nrows = self.info.get_image_feature_grid_size(
-            image_width=image_size.width,
-            image_height=image_size.height,
-        )
-        image_tokens = ([_IMAGE_TOKEN_ID] * ncols +
-                        [_NEWLINE_TOKEN_ID]) * nrows
-
-        return PromptUpdateDetails.select_token_id(
-            image_tokens + [bos_token_id],
-            embed_token_id=_IMAGE_TOKEN_ID,
-        )
-
-    return [
-        PromptReplacement(
-            modality="image",
-            target=[eot_token_id],
-            replacement=get_replacement_fuyu,
-        )
-    ]
-```
-
-:::
-
-::::
-
-## 5. Register processor-related classes
-
-After you have defined {class}`~vllm.multimodal.processing.BaseProcessingInfo` (Step 2),
-{class}`~vllm.multimodal.profiling.BaseDummyInputsBuilder` (Step 3),
-and {class}`~vllm.multimodal.processing.BaseMultiModalProcessor` (Step 4),
-decorate the model class with {meth}`MULTIMODAL_REGISTRY.register_processor <vllm.multimodal.registry.MultiModalRegistry.register_processor>`
-to register them to the multi-modal registry:
-
-```diff
-  from vllm.model_executor.models.interfaces import SupportsMultiModal
-+ from vllm.multimodal import MULTIMODAL_REGISTRY
-
-+ @MULTIMODAL_REGISTRY.register_processor(YourMultiModalProcessor,
-+                                         info=YourProcessingInfo,
-+                                         dummy_inputs=YourDummyInputsBuilder)
-  class YourModelForImage2Seq(nn.Module, SupportsMultiModal):
-```
-
-## Notes
-
-### Inserting feature tokens without replacement
-
-Some HF processors directly insert feature tokens without replacing anything in the original prompt. In that case, you can use {class}`~vllm.multimodal.processing.PromptInsertion` instead of {class}`~vllm.multimodal.processing.PromptReplacement` inside {meth}`~vllm.multimodal.processing.BaseMultiModalProcessor._get_prompt_updates`.
-
-Examples:
-
-- BLIP-2 (insert at start of prompt): <gh-file:vllm/model_executor/models/blip2.py>
-- Florence2 (insert at start of prompt): <gh-file:vllm/model_executor/models/florence2.py>
-- Molmo (insert after `<|endoftext|>` token): <gh-file:vllm/model_executor/models/molmo.py>
-
-### Handling prompt updates unrelated to multi-modal data
-
-{meth}`~vllm.multimodal.processing.BaseMultiModalProcessor._get_prompt_updates` assumes that each application of prompt update corresponds to one multi-modal item. If the HF processor performs additional processing regardless of how many multi-modal items there are, you should override {meth}`~vllm.multimodal.processing.BaseMultiModalProcessor._apply_hf_processor_tokens_only` so that the processed token inputs are consistent with the result of applying the HF processor on text inputs. This is because token inputs bypass the HF processor according to [our design](#mm-processing).
-
-Examples:
-
-- Chameleon (appends `sep_token`): <gh-file:vllm/model_executor/models/chameleon.py>
-- Fuyu (appends `boa_token`): <gh-file:vllm/model_executor/models/fuyu.py>
-- Molmo (applies chat template which is not defined elsewhere): <gh-file:vllm/model_executor/models/molmo.py>
-
-### Custom HF processor
-
-Some models don't define a HF processor class on HF Hub. In that case, you can define a custom HF processor that has the same call signature as HF processors and pass it to {meth}`~vllm.multimodal.processing.BaseMultiModalProcessor._call_hf_processor`.
-
-Examples:
-
-- DeepSeek-VL2: <gh-file:vllm/model_executor/models/deepseek_vl2.py>
-- InternVL: <gh-file:vllm/model_executor/models/internvl.py>
-- Qwen-VL: <gh-file:vllm/model_executor/models/qwen_vl.py>
diff --git a/docs/source/deployment/docker.md b/docs/source/deployment/docker.md
deleted file mode 100644
index ca56710bc2ef2..0000000000000
--- a/docs/source/deployment/docker.md
+++ /dev/null
@@ -1,133 +0,0 @@
-(deployment-docker)=
-
-# Using Docker
-
-(deployment-docker-pre-built-image)=
-
-## Use vLLM's Official Docker Image
-
-vLLM offers an official Docker image for deployment.
-The image can be used to run OpenAI compatible server and is available on Docker Hub as [vllm/vllm-openai](https://hub.docker.com/r/vllm/vllm-openai/tags).
-
-```console
-$ docker run --runtime nvidia --gpus all \
-    -v ~/.cache/huggingface:/root/.cache/huggingface \
-    --env "HUGGING_FACE_HUB_TOKEN=<secret>" \
-    -p 8000:8000 \
-    --ipc=host \
-    vllm/vllm-openai:latest \
-    --model mistralai/Mistral-7B-v0.1
-```
-
-This image can also be used with other container engines such as [Podman](https://podman.io/).
-
-```console
-$ podman run --gpus all \
-  -v ~/.cache/huggingface:/root/.cache/huggingface \
-  --env "HUGGING_FACE_HUB_TOKEN=$HF_TOKEN" \
-  -p 8000:8000 \
-  --ipc=host \
-  vllm/vllm-openai:latest \
-  --model mistralai/Mistral-7B-v0.1
-```
-
-You can add any other <project:#engine-args> you need after the image tag (`vllm/vllm-openai:latest`).
-
-:::{note}
-You can either use the `ipc=host` flag or `--shm-size` flag to allow the
-container to access the host's shared memory. vLLM uses PyTorch, which uses shared
-memory to share data between processes under the hood, particularly for tensor parallel inference.
-:::
-
-:::{note}
-Optional dependencies are not included in order to avoid licensing issues (e.g. <gh-issue:8030>).
-
-If you need to use those dependencies (having accepted the license terms),
-create a custom Dockerfile on top of the base image with an extra layer that installs them:
-
-```Dockerfile
-FROM vllm/vllm-openai:v0.8.3
-
-# e.g. install the `audio` optional dependencies
-# NOTE: Make sure the version of vLLM matches the base image!
-RUN uv pip install --system vllm[audio]==0.8.3
-```
-
-:::
-
-:::{tip}
-Some new models may only be available on the main branch of [HF Transformers](https://github.com/huggingface/transformers).
-
-To use the development version of `transformers`, create a custom Dockerfile on top of the base image
-with an extra layer that installs their code from source:
-
-```Dockerfile
-FROM vllm/vllm-openai:latest
-
-RUN uv pip install --system git+https://github.com/huggingface/transformers.git
-```
-
-:::
-
-(deployment-docker-build-image-from-source)=
-
-## Building vLLM's Docker Image from Source
-
-You can build and run vLLM from source via the provided <gh-file:docker/Dockerfile>. To build vLLM:
-
-```console
-# optionally specifies: --build-arg max_jobs=8 --build-arg nvcc_threads=2
-DOCKER_BUILDKIT=1 docker build . --target vllm-openai --tag vllm/vllm-openai --file docker/Dockerfile
-```
-
-:::{note}
-By default vLLM will build for all GPU types for widest distribution. If you are just building for the
-current GPU type the machine is running on, you can add the argument `--build-arg torch_cuda_arch_list=""`
-for vLLM to find the current GPU type and build for that.
-
-If you are using Podman instead of Docker, you might need to disable SELinux labeling by
-adding `--security-opt label=disable` when running `podman build` command to avoid certain [existing issues](https://github.com/containers/buildah/discussions/4184).
-:::
-
-## Building for Arm64/aarch64
-
-A docker container can be built for aarch64 systems such as the Nvidia Grace-Hopper. At time of this writing, this requires the use
-of PyTorch Nightly and should be considered **experimental**. Using the flag `--platform "linux/arm64"` will attempt to build for arm64.
-
-:::{note}
-Multiple modules must be compiled, so this process can take a while. Recommend using `--build-arg max_jobs=` & `--build-arg nvcc_threads=`
-flags to speed up build process. However, ensure your `max_jobs` is substantially larger than `nvcc_threads` to get the most benefits.
-Keep an eye on memory usage with parallel jobs as it can be substantial (see example below).
-:::
-
-```console
-# Example of building on Nvidia GH200 server. (Memory usage: ~15GB, Build time: ~1475s / ~25 min, Image size: 6.93GB)
-$ python3 use_existing_torch.py
-$ DOCKER_BUILDKIT=1 docker build . \
-  --file docker/Dockerfile \
-  --target vllm-openai \
-  --platform "linux/arm64" \
-  -t vllm/vllm-gh200-openai:latest \
-  --build-arg max_jobs=66 \
-  --build-arg nvcc_threads=2 \
-  --build-arg torch_cuda_arch_list="9.0+PTX" \
-  --build-arg vllm_fa_cmake_gpu_arches="90-real"
-```
-
-## Use the custom-built vLLM Docker image
-
-To run vLLM with the custom-built Docker image:
-
-```console
-$ docker run --runtime nvidia --gpus all \
-    -v ~/.cache/huggingface:/root/.cache/huggingface \
-    -p 8000:8000 \
-    --env "HUGGING_FACE_HUB_TOKEN=<secret>" \
-    vllm/vllm-openai <args...>
-```
-
-The argument `vllm/vllm-openai` specifies the image to run, and should be replaced with the name of the custom-built image (the `-t` tag from the build command).
-
-:::{note}
-**For version 0.4.1 and 0.4.2 only** - the vLLM docker images under these versions are supposed to be run under the root user since a library under the root user's home directory, i.e. `/root/.config/vllm/nccl/cu12/libnccl.so.2.18.1` is required to be loaded during runtime. If you are running the container under a different user, you may need to first change the permissions of the library (and all the parent directories) to allow the user to access it, then run vLLM with environment variable `VLLM_NCCL_SO_PATH=/root/.config/vllm/nccl/cu12/libnccl.so.2.18.1` .
-:::
diff --git a/docs/source/deployment/frameworks/helm.md b/docs/source/deployment/frameworks/helm.md
deleted file mode 100644
index 7320d727fbaa4..0000000000000
--- a/docs/source/deployment/frameworks/helm.md
+++ /dev/null
@@ -1,250 +0,0 @@
-(deployment-helm)=
-
-# Helm
-
-A Helm chart to deploy vLLM for Kubernetes
-
-Helm is a package manager for Kubernetes. It will help you to deploy vLLM on k8s and automate the deployment of vLLM Kubernetes applications. With Helm, you can deploy the same framework architecture with different configurations to multiple namespaces by overriding variable values.
-
-This guide will walk you through the process of deploying vLLM with Helm, including the necessary prerequisites, steps for helm installation and documentation on architecture and values file.
-
-## Prerequisites
-
-Before you begin, ensure that you have the following:
-
-- A running Kubernetes cluster
-- NVIDIA Kubernetes Device Plugin (`k8s-device-plugin`): This can be found at [https://github.com/NVIDIA/k8s-device-plugin](https://github.com/NVIDIA/k8s-device-plugin)
-- Available GPU resources in your cluster
-- S3 with the model which will be deployed
-
-## Installing the chart
-
-To install the chart with the release name `test-vllm`:
-
-```console
-helm upgrade --install --create-namespace --namespace=ns-vllm test-vllm . -f values.yaml --set secrets.s3endpoint=$ACCESS_POINT --set secrets.s3bucketname=$BUCKET --set secrets.s3accesskeyid=$ACCESS_KEY --set secrets.s3accesskey=$SECRET_KEY
-```
-
-## Uninstalling the Chart
-
-To uninstall the `test-vllm` deployment:
-
-```console
-helm uninstall test-vllm --namespace=ns-vllm
-```
-
-The command removes all the Kubernetes components associated with the
-chart **including persistent volumes** and deletes the release.
-
-## Architecture
-
-:::{image} /assets/deployment/architecture_helm_deployment.png
-:::
-
-## Values
-
-:::{list-table}
-:widths: 25 25 25 25
-:header-rows: 1
-
-- * Key
-  * Type
-  * Default
-  * Description
-- * autoscaling
-  * object
-  * {"enabled":false,"maxReplicas":100,"minReplicas":1,"targetCPUUtilizationPercentage":80}
-  * Autoscaling configuration
-- * autoscaling.enabled
-  * bool
-  * false
-  * Enable autoscaling
-- * autoscaling.maxReplicas
-  * int
-  * 100
-  * Maximum replicas
-- * autoscaling.minReplicas
-  * int
-  * 1
-  * Minimum replicas
-- * autoscaling.targetCPUUtilizationPercentage
-  * int
-  * 80
-  * Target CPU utilization for autoscaling
-- * configs
-  * object
-  * {}
-  * Configmap
-- * containerPort
-  * int
-  * 8000
-  * Container port
-- * customObjects
-  * list
-  * []
-  * Custom Objects configuration
-- * deploymentStrategy
-  * object
-  * {}
-  * Deployment strategy configuration
-- * externalConfigs
-  * list
-  * []
-  * External configuration
-- * extraContainers
-  * list
-  * []
-  * Additional containers configuration
-- * extraInit
-  * object
-  * {"pvcStorage":"1Gi","s3modelpath":"relative_s3_model_path/opt-125m", "awsEc2MetadataDisabled": true}
-  * Additional configuration for the init container
-- * extraInit.pvcStorage
-  * string
-  * "50Gi"
-  * Storage size of the s3
-- * extraInit.s3modelpath
-  * string
-  * "relative_s3_model_path/opt-125m"
-  * Path of the model on the s3 which hosts model weights and config files
-- * extraInit.awsEc2MetadataDisabled
-  * boolean
-  * true
-  * Disables the use of the Amazon EC2 instance metadata service
-- * extraPorts
-  * list
-  * []
-  * Additional ports configuration
-- * gpuModels
-  * list
-  * ["TYPE_GPU_USED"]
-  * Type of gpu used
-- * image
-  * object
-  * {"command":["vllm","serve","/data/","--served-model-name","opt-125m","--host","0.0.0.0","--port","8000"],"repository":"vllm/vllm-openai","tag":"latest"}
-  * Image configuration
-- * image.command
-  * list
-  * ["vllm","serve","/data/","--served-model-name","opt-125m","--host","0.0.0.0","--port","8000"]
-  * Container launch command
-- * image.repository
-  * string
-  * "vllm/vllm-openai"
-  * Image repository
-- * image.tag
-  * string
-  * "latest"
-  * Image tag
-- * livenessProbe
-  * object
-  * {"failureThreshold":3,"httpGet":{"path":"/health","port":8000},"initialDelaySeconds":15,"periodSeconds":10}
-  * Liveness probe configuration
-- * livenessProbe.failureThreshold
-  * int
-  * 3
-  * Number of times after which if a probe fails in a row, Kubernetes considers that the overall check has failed: the container is not alive
-- * livenessProbe.httpGet
-  * object
-  * {"path":"/health","port":8000}
-  * Configuration of the Kubelet http request on the server
-- * livenessProbe.httpGet.path
-  * string
-  * "/health"
-  * Path to access on the HTTP server
-- * livenessProbe.httpGet.port
-  * int
-  * 8000
-  * Name or number of the port to access on the container, on which the server is listening
-- * livenessProbe.initialDelaySeconds
-  * int
-  * 15
-  * Number of seconds after the container has started before liveness probe is initiated
-- * livenessProbe.periodSeconds
-  * int
-  * 10
-  * How often (in seconds) to perform the liveness probe
-- * maxUnavailablePodDisruptionBudget
-  * string
-  * ""
-  * Disruption Budget Configuration
-- * readinessProbe
-  * object
-  * {"failureThreshold":3,"httpGet":{"path":"/health","port":8000},"initialDelaySeconds":5,"periodSeconds":5}
-  * Readiness probe configuration
-- * readinessProbe.failureThreshold
-  * int
-  * 3
-  * Number of times after which if a probe fails in a row, Kubernetes considers that the overall check has failed: the container is not ready
-- * readinessProbe.httpGet
-  * object
-  * {"path":"/health","port":8000}
-  * Configuration of the Kubelet http request on the server
-- * readinessProbe.httpGet.path
-  * string
-  * "/health"
-  * Path to access on the HTTP server
-- * readinessProbe.httpGet.port
-  * int
-  * 8000
-  * Name or number of the port to access on the container, on which the server is listening
-- * readinessProbe.initialDelaySeconds
-  * int
-  * 5
-  * Number of seconds after the container has started before readiness probe is initiated
-- * readinessProbe.periodSeconds
-  * int
-  * 5
-  * How often (in seconds) to perform the readiness probe
-- * replicaCount
-  * int
-  * 1
-  * Number of replicas
-- * resources
-  * object
-  * {"limits":{"cpu":4,"memory":"16Gi","nvidia.com/gpu":1},"requests":{"cpu":4,"memory":"16Gi","nvidia.com/gpu":1}}
-  * Resource configuration
-- * resources.limits."nvidia.com/gpu"
-  * int
-  * 1
-  * Number of gpus used
-- * resources.limits.cpu
-  * int
-  * 4
-  * Number of CPUs
-- * resources.limits.memory
-  * string
-  * "16Gi"
-  * CPU memory configuration
-- * resources.requests."nvidia.com/gpu"
-  * int
-  * 1
-  * Number of gpus used
-- * resources.requests.cpu
-  * int
-  * 4
-  * Number of CPUs
-- * resources.requests.memory
-  * string
-  * "16Gi"
-  * CPU memory configuration
-- * secrets
-  * object
-  * {}
-  * Secrets configuration
-- * serviceName
-  * string
-  *
-  * Service name
-- * servicePort
-  * int
-  * 80
-  * Service port
-- * labels.environment
-  * string
-  * test
-  * Environment name
-- * labels.release
-  * string
-  * test
-  * Release name
-:::
diff --git a/docs/source/deployment/frameworks/index.md b/docs/source/deployment/frameworks/index.md
deleted file mode 100644
index 3408c6c10edef..0000000000000
--- a/docs/source/deployment/frameworks/index.md
+++ /dev/null
@@ -1,22 +0,0 @@
-# Using other frameworks
-
-:::{toctree}
-:maxdepth: 1
-
-anything-llm
-bentoml
-cerebrium
-chatbox
-dify
-dstack
-helm
-litellm
-lobe-chat
-lws
-modal
-open-webui
-retrieval_augmented_generation
-skypilot
-streamlit
-triton
-:::
diff --git a/docs/source/deployment/integrations/index.md b/docs/source/deployment/integrations/index.md
deleted file mode 100644
index 410742b88c735..0000000000000
--- a/docs/source/deployment/integrations/index.md
+++ /dev/null
@@ -1,11 +0,0 @@
-# External Integrations
-
-:::{toctree}
-:maxdepth: 1
-
-kserve
-kubeai
-llamastack
-llmaz
-production-stack
-:::
diff --git a/docs/source/features/compatibility_matrix.md b/docs/source/features/compatibility_matrix.md
deleted file mode 100644
index 8865d26deaeda..0000000000000
--- a/docs/source/features/compatibility_matrix.md
+++ /dev/null
@@ -1,476 +0,0 @@
-(compatibility-matrix)=
-
-# Compatibility Matrix
-
-The tables below show mutually exclusive features and the support on some hardware.
-
-The symbols used have the following meanings:
-
-- ✅ = Full compatibility
-- 🟠 = Partial compatibility
-- ❌ = No compatibility
-
-:::{note}
-Check the ❌ or 🟠 with links to see tracking issue for unsupported feature/hardware combination.
-:::
-
-## Feature x Feature
-
-:::{raw} html
-<style>
-  /* Make smaller to try to improve readability  */
-  td {
-    font-size: 0.8rem;
-    text-align: center;
-  }
-
-  th {
-    text-align: center;
-    font-size: 0.8rem;
-  }
-</style>
-:::
-
-:::{list-table}
-:header-rows: 1
-:stub-columns: 1
-:widths: auto
-:class: vertical-table-header
-
-- * Feature
-  * [CP](#chunked-prefill)
-  * [APC](#automatic-prefix-caching)
-  * [LoRA](#lora-adapter)
-  * <abbr title="Prompt Adapter">prmpt adptr</abbr>
-  * [SD](#spec-decode)
-  * CUDA graph
-  * <abbr title="Pooling Models">pooling</abbr>
-  * <abbr title="Encoder-Decoder Models">enc-dec</abbr>
-  * <abbr title="Logprobs">logP</abbr>
-  * <abbr title="Prompt Logprobs">prmpt logP</abbr>
-  * <abbr title="Async Output Processing">async output</abbr>
-  * multi-step
-  * <abbr title="Multimodal Inputs">mm</abbr>
-  * best-of
-  * beam-search
-  * <abbr title="Guided Decoding">guided dec</abbr>
-- * [CP](#chunked-prefill)
-  * ✅
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-- * [APC](#automatic-prefix-caching)
-  * ✅
-  * ✅
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-- * [LoRA](#lora-adapter)
-  * ✅
-  * ✅
-  * ✅
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-- * <abbr title="Prompt Adapter">prmpt adptr</abbr>
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-- * [SD](#spec-decode)
-  * ✅
-  * ✅
-  * ❌
-  * ✅
-  * ✅
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-- * CUDA graph
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-- * <abbr title="Pooling Models">pooling</abbr>
-  * ❌
-  * ❌
-  * ❌
-  * ❌
-  * ❌
-  * ❌
-  * ✅
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-- * <abbr title="Encoder-Decoder Models">enc-dec</abbr>
-  * ❌
-  * [❌](gh-issue:7366)
-  * ❌
-  * ❌
-  * [❌](gh-issue:7366)
-  * ✅
-  * ✅
-  * ✅
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-- * <abbr title="Logprobs">logP</abbr>
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ❌
-  * ✅
-  * ✅
-  *
-  *
-  *
-  *
-  *
-  *
-  *
-- * <abbr title="Prompt Logprobs">prmpt logP</abbr>
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ❌
-  * ✅
-  * ✅
-  * ✅
-  *
-  *
-  *
-  *
-  *
-  *
-- * <abbr title="Async Output Processing">async output</abbr>
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ❌
-  * ✅
-  * ❌
-  * ❌
-  * ✅
-  * ✅
-  * ✅
-  *
-  *
-  *
-  *
-  *
-- * multi-step
-  * ❌
-  * ✅
-  * ❌
-  * ✅
-  * ❌
-  * ✅
-  * ❌
-  * ❌
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  *
-  *
-  *
-  *
-- * <abbr title="Multimodal Inputs">mm</abbr>
-  * ✅
-  * [🟠](gh-pr:8348)
-  * [🟠](gh-pr:4194)
-  * ❔
-  * ❔
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ❔
-  * ✅
-  *
-  *
-  *
-- * best-of
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * [❌](gh-issue:6137)
-  * ✅
-  * ❌
-  * ✅
-  * ✅
-  * ✅
-  * ❔
-  * [❌](gh-issue:7968)
-  * ✅
-  * ✅
-  *
-  *
-- * beam-search
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * [❌](gh-issue:6137)
-  * ✅
-  * ❌
-  * ✅
-  * ✅
-  * ✅
-  * ❔
-  * [❌](gh-issue:7968)
-  * ❔
-  * ✅
-  * ✅
-  *
-- * <abbr title="Guided Decoding">guided dec</abbr>
-  * ✅
-  * ✅
-  * ❔
-  * ❔
-  * [❌](gh-issue:11484)
-  * ✅
-  * ❌
-  * ❔
-  * ✅
-  * ✅
-  * ✅
-  * [❌](gh-issue:9893)
-  * ❔
-  * ✅
-  * ✅
-  * ✅
-:::
-
-(feature-x-hardware)=
-
-## Feature x Hardware
-
-:::{list-table}
-:header-rows: 1
-:stub-columns: 1
-:widths: auto
-
-- * Feature
-  * Volta
-  * Turing
-  * Ampere
-  * Ada
-  * Hopper
-  * CPU
-  * AMD
-- * [CP](#chunked-prefill)
-  * [❌](gh-issue:2729)
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-- * [APC](#automatic-prefix-caching)
-  * [❌](gh-issue:3687)
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-- * [LoRA](#lora-adapter)
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-- * <abbr title="Prompt Adapter">prmpt adptr</abbr>
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * [❌](gh-issue:8475)
-  * ✅
-- * [SD](#spec-decode)
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-- * CUDA graph
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ❌
-  * ✅
-- * <abbr title="Pooling Models">pooling</abbr>
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ❔
-- * <abbr title="Encoder-Decoder Models">enc-dec</abbr>
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ❌
-- * <abbr title="Multimodal Inputs">mm</abbr>
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-- * <abbr title="Logprobs">logP</abbr>
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-- * <abbr title="Prompt Logprobs">prmpt logP</abbr>
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-- * <abbr title="Async Output Processing">async output</abbr>
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ❌
-  * ❌
-- * multi-step
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * [❌](gh-issue:8477)
-  * ✅
-- * best-of
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-- * beam-search
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-- * <abbr title="Guided Decoding">guided dec</abbr>
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-  * ✅
-:::
diff --git a/docs/source/features/quantization/index.md b/docs/source/features/quantization/index.md
deleted file mode 100644
index 7ad46b7094ee9..0000000000000
--- a/docs/source/features/quantization/index.md
+++ /dev/null
@@ -1,24 +0,0 @@
-(quantization-index)=
-
-# Quantization
-
-Quantization trades off model precision for smaller memory footprint, allowing large models to be run on a wider range of devices.
-
-:::{toctree}
-:caption: Contents
-:maxdepth: 1
-
-supported_hardware
-auto_awq
-bnb
-bitblas
-gguf
-gptqmodel
-int4
-int8
-fp8
-modelopt
-quark
-quantized_kvcache
-torchao
-:::
diff --git a/docs/source/features/quantization/supported_hardware.md b/docs/source/features/quantization/supported_hardware.md
deleted file mode 100644
index f8af1ba60b125..0000000000000
--- a/docs/source/features/quantization/supported_hardware.md
+++ /dev/null
@@ -1,153 +0,0 @@
-(quantization-supported-hardware)=
-
-# Supported Hardware
-
-The table below shows the compatibility of various quantization implementations with different hardware platforms in vLLM:
-
-:::{list-table}
-:header-rows: 1
-:widths: 20 8 8 8 8 8 8 8 8 8 8
-
-- * Implementation
-  * Volta
-  * Turing
-  * Ampere
-  * Ada
-  * Hopper
-  * AMD GPU
-  * Intel GPU
-  * x86 CPU
-  * AWS Inferentia
-  * Google TPU
-- * AWQ
-  * ❌
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ❌
-  * ✅︎
-  * ✅︎
-  * ❌
-  * ❌
-- * GPTQ
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ❌
-  * ✅︎
-  * ✅︎
-  * ❌
-  * ❌
-- * Marlin (GPTQ/AWQ/FP8)
-  * ❌
-  * ❌
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ❌
-  * ❌
-  * ❌
-  * ❌
-  * ❌
-- * INT8 (W8A8)
-  * ❌
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ❌
-  * ❌
-  * ✅︎
-  * ❌
-  * ✅︎
-- * FP8 (W8A8)
-  * ❌
-  * ❌
-  * ❌
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ❌
-  * ❌
-  * ❌
-  * ❌
-- * BitBLAS (GPTQ)
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ❌
-  * ❌
-  * ❌
-  * ❌
-  * ❌
-- * AQLM
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ❌
-  * ❌
-  * ❌
-  * ❌
-  * ❌
-- * bitsandbytes
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ❌
-  * ❌
-  * ❌
-  * ❌
-  * ❌
-- * DeepSpeedFP
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ❌
-  * ❌
-  * ❌
-  * ❌
-  * ❌
-- * GGUF
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ❌
-  * ❌
-  * ❌
-  * ❌
-- * modelopt
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ✅︎
-  * ✅︎︎
-  * ❌
-  * ❌
-  * ❌
-  * ❌
-  * ❌
-:::
-
-- Volta refers to SM 7.0, Turing to SM 7.5, Ampere to SM 8.0/8.6, Ada to SM 8.9, and Hopper to SM 9.0.
-- ✅︎ indicates that the quantization method is supported on the specified hardware.
-- ❌ indicates that the quantization method is not supported on the specified hardware.
-
-:::{note}
-This compatibility chart is subject to change as vLLM continues to evolve and expand its support for different hardware platforms and quantization methods.
-
-For the most up-to-date information on hardware support and quantization methods, please refer to <gh-dir:vllm/model_executor/layers/quantization> or consult with the vLLM development team.
-:::
diff --git a/docs/source/generate_examples.py b/docs/source/generate_examples.py
deleted file mode 100644
index f77dbefb0a018..0000000000000
--- a/docs/source/generate_examples.py
+++ /dev/null
@@ -1,244 +0,0 @@
-# SPDX-License-Identifier: Apache-2.0
-
-import itertools
-import re
-from dataclasses import dataclass, field
-from pathlib import Path
-
-ROOT_DIR = Path(__file__).parent.parent.parent.resolve()
-ROOT_DIR_RELATIVE = '../../../..'
-EXAMPLE_DIR = ROOT_DIR / "examples"
-EXAMPLE_DOC_DIR = ROOT_DIR / "docs/source/getting_started/examples"
-
-
-def fix_case(text: str) -> str:
-    subs = {
-        "api": "API",
-        "cli": "CLI",
-        "cpu": "CPU",
-        "llm": "LLM",
-        "mae": "MAE",
-        "tpu": "TPU",
-        "aqlm": "AQLM",
-        "gguf": "GGUF",
-        "lora": "LoRA",
-        "rlhf": "RLHF",
-        "vllm": "vLLM",
-        "openai": "OpenAI",
-        "lmcache": "LMCache",
-        "multilora": "MultiLoRA",
-        "mlpspeculator": "MLPSpeculator",
-        r"fp\d+": lambda x: x.group(0).upper(),  # e.g. fp16, fp32
-        r"int\d+": lambda x: x.group(0).upper(),  # e.g. int8, int16
-    }
-    for pattern, repl in subs.items():
-        text = re.sub(rf'\b{pattern}\b', repl, text, flags=re.IGNORECASE)
-    return text
-
-
-@dataclass
-class Index:
-    """
-    Index class to generate a structured document index.
-
-    Attributes:
-        path (Path): The path save the index file to.
-        title (str): The title of the index.
-        description (str): A brief description of the index.
-        caption (str): An optional caption for the table of contents.
-        maxdepth (int): The maximum depth of the table of contents. Defaults to 1.
-        documents (list[str]): A list of document paths to include in the index. Defaults to an empty list.
-
-    Methods:
-        generate() -> str:
-            Generates the index content as a string in the specified format.
-    """ # noqa: E501
-    path: Path
-    title: str
-    description: str
-    caption: str
-    maxdepth: int = 1
-    documents: list[str] = field(default_factory=list)
-
-    def generate(self) -> str:
-        content = f"# {self.title}\n\n{self.description}\n\n"
-        content += ":::{toctree}\n"
-        content += f":caption: {self.caption}\n:maxdepth: {self.maxdepth}\n"
-        content += "\n".join(self.documents) + "\n:::\n"
-        return content
-
-
-@dataclass
-class Example:
-    """
-    Example class for generating documentation content from a given path.
-
-    Attributes:
-        path (Path): The path to the main directory or file.
-        category (str): The category of the document.
-        main_file (Path): The main file in the directory.
-        other_files (list[Path]): list of other files in the directory.
-        title (str): The title of the document.
-
-    Methods:
-        __post_init__(): Initializes the main_file, other_files, and title attributes.
-        determine_main_file() -> Path: Determines the main file in the given path.
-        determine_other_files() -> list[Path]: Determines other files in the directory excluding the main file.
-        determine_title() -> str: Determines the title of the document.
-        generate() -> str: Generates the documentation content.
-    """ # noqa: E501
-    path: Path
-    category: str = None
-    main_file: Path = field(init=False)
-    other_files: list[Path] = field(init=False)
-    title: str = field(init=False)
-
-    def __post_init__(self):
-        self.main_file = self.determine_main_file()
-        self.other_files = self.determine_other_files()
-        self.title = self.determine_title()
-
-    def determine_main_file(self) -> Path:
-        """
-        Determines the main file in the given path.
-        If the path is a file, it returns the path itself. Otherwise, it searches
-        for Markdown files (*.md) in the directory and returns the first one found.
-        Returns:
-            Path: The main file path, either the original path if it's a file or the first
-            Markdown file found in the directory.
-        Raises:
-            IndexError: If no Markdown files are found in the directory.
-        """ # noqa: E501
-        return self.path if self.path.is_file() else list(
-            self.path.glob("*.md")).pop()
-
-    def determine_other_files(self) -> list[Path]:
-        """
-        Determine other files in the directory excluding the main file.
-
-        This method checks if the given path is a file. If it is, it returns an empty list.
-        Otherwise, it recursively searches through the directory and returns a list of all
-        files that are not the main file.
-
-        Returns:
-            list[Path]: A list of Path objects representing the other files in the directory.
-        """ # noqa: E501
-        if self.path.is_file():
-            return []
-        is_other_file = lambda file: file.is_file() and file != self.main_file
-        return [file for file in self.path.rglob("*") if is_other_file(file)]
-
-    def determine_title(self) -> str:
-        return fix_case(self.path.stem.replace("_", " ").title())
-
-    def generate(self) -> str:
-        # Convert the path to a relative path from __file__
-        make_relative = lambda path: ROOT_DIR_RELATIVE / path.relative_to(
-            ROOT_DIR)
-
-        content = f"Source <gh-file:{self.path.relative_to(ROOT_DIR)}>.\n\n"
-        include = "include" if self.main_file.suffix == ".md" else \
-            "literalinclude"
-        if include == "literalinclude":
-            content += f"# {self.title}\n\n"
-        content += f":::{{{include}}} {make_relative(self.main_file)}\n"
-        if include == "literalinclude":
-            content += f":language: {self.main_file.suffix[1:]}\n"
-        content += ":::\n\n"
-
-        if not self.other_files:
-            return content
-
-        content += "## Example materials\n\n"
-        for file in sorted(self.other_files):
-            include = "include" if file.suffix == ".md" else "literalinclude"
-            content += f":::{{admonition}} {file.relative_to(self.path)}\n"
-            content += ":class: dropdown\n\n"
-            content += f":::{{{include}}} {make_relative(file)}\n:::\n"
-            content += ":::\n\n"
-
-        return content
-
-
-def generate_examples():
-    # Create the EXAMPLE_DOC_DIR if it doesn't exist
-    if not EXAMPLE_DOC_DIR.exists():
-        EXAMPLE_DOC_DIR.mkdir(parents=True)
-
-    # Create empty indices
-    examples_index = Index(
-        path=EXAMPLE_DOC_DIR / "examples_index.md",
-        title="Examples",
-        description=
-        "A collection of examples demonstrating usage of vLLM.\nAll documented examples are autogenerated using <gh-file:docs/source/generate_examples.py> from examples found in <gh-file:examples>.",  # noqa: E501
-        caption="Examples",
-        maxdepth=2)
-    # Category indices stored in reverse order because they are inserted into
-    # examples_index.documents at index 0 in order
-    category_indices = {
-        "other":
-        Index(
-            path=EXAMPLE_DOC_DIR / "examples_other_index.md",
-            title="Other",
-            description=
-            "Other examples that don't strongly fit into the online or offline serving categories.",  # noqa: E501
-            caption="Examples",
-        ),
-        "online_serving":
-        Index(
-            path=EXAMPLE_DOC_DIR / "examples_online_serving_index.md",
-            title="Online Serving",
-            description=
-            "Online serving examples demonstrate how to use vLLM in an online setting, where the model is queried for predictions in real-time.",  # noqa: E501
-            caption="Examples",
-        ),
-        "offline_inference":
-        Index(
-            path=EXAMPLE_DOC_DIR / "examples_offline_inference_index.md",
-            title="Offline Inference",
-            description=
-            "Offline inference examples demonstrate how to use vLLM in an offline setting, where the model is queried for predictions in batches. We recommend starting with <project:basic.md>.",  # noqa: E501
-            caption="Examples",
-        ),
-    }
-
-    examples = []
-    glob_patterns = ["*.py", "*.md", "*.sh"]
-    # Find categorised examples
-    for category in category_indices:
-        category_dir = EXAMPLE_DIR / category
-        globs = [category_dir.glob(pattern) for pattern in glob_patterns]
-        for path in itertools.chain(*globs):
-            examples.append(Example(path, category))
-        # Find examples in subdirectories
-        for path in category_dir.glob("*/*.md"):
-            examples.append(Example(path.parent, category))
-    # Find uncategorised examples
-    globs = [EXAMPLE_DIR.glob(pattern) for pattern in glob_patterns]
-    for path in itertools.chain(*globs):
-        examples.append(Example(path))
-    # Find examples in subdirectories
-    for path in EXAMPLE_DIR.glob("*/*.md"):
-        # Skip categorised examples
-        if path.parent.name in category_indices:
-            continue
-        examples.append(Example(path.parent))
-
-    # Generate the example documentation
-    for example in sorted(examples, key=lambda e: e.path.stem):
-        doc_path = EXAMPLE_DOC_DIR / f"{example.path.stem}.md"
-        with open(doc_path, "w+") as f:
-            f.write(example.generate())
-        # Add the example to the appropriate index
-        index = category_indices.get(example.category, examples_index)
-        index.documents.append(example.path.stem)
-
-    # Generate the index files
-    for category_index in category_indices.values():
-        if category_index.documents:
-            examples_index.documents.insert(0, category_index.path.name)
-            with open(category_index.path, "w+") as f:
-                f.write(category_index.generate())
-
-    with open(examples_index.path, "w+") as f:
-        f.write(examples_index.generate())
diff --git a/docs/source/getting_started/installation.md b/docs/source/getting_started/installation.md
deleted file mode 100644
index 44134bf01b76c..0000000000000
--- a/docs/source/getting_started/installation.md
+++ /dev/null
@@ -1,28 +0,0 @@
-(installation-index)=
-
-# Installation
-
-vLLM supports the following hardware platforms:
-
-:::{toctree}
-:maxdepth: 1
-:hidden:
-
-installation/gpu
-installation/cpu
-installation/ai_accelerator
-:::
-
-- <project:installation/gpu.md>
-  - NVIDIA CUDA
-  - AMD ROCm
-  - Intel XPU
-- <project:installation/cpu.md>
-  - Intel/AMD x86
-  - ARM AArch64
-  - Apple silicon
-  - IBM Z (S390X)
-- <project:installation/ai_accelerator.md>
-  - Google TPU
-  - Intel Gaudi
-  - AWS Neuron
diff --git a/docs/source/getting_started/installation/ai_accelerator.md b/docs/source/getting_started/installation/ai_accelerator.md
deleted file mode 100644
index 0a207af1a4c75..0000000000000
--- a/docs/source/getting_started/installation/ai_accelerator.md
+++ /dev/null
@@ -1,299 +0,0 @@
-# Other AI accelerators
-
-vLLM is a Python library that supports the following AI accelerators. Select your AI accelerator type to see vendor specific instructions:
-
-:::::{tab-set}
-:sync-group: device
-
-::::{tab-item} Google TPU
-:selected:
-:sync: tpu
-
-:::{include} ai_accelerator/tpu.inc.md
-:start-after: "# Installation"
-:end-before: "## Requirements"
-:::
-
-::::
-
-::::{tab-item} Intel Gaudi
-:sync: hpu-gaudi
-
-:::{include} ai_accelerator/hpu-gaudi.inc.md
-:start-after: "# Installation"
-:end-before: "## Requirements"
-:::
-
-::::
-
-::::{tab-item} AWS Neuron
-:sync: neuron
-
-:::{include} ai_accelerator/neuron.inc.md
-:start-after: "# Installation"
-:end-before: "## Requirements"
-:::
-
-::::
-
-:::::
-
-## Requirements
-
-:::::{tab-set}
-:sync-group: device
-
-::::{tab-item} Google TPU
-:sync: tpu
-
-:::{include} ai_accelerator/tpu.inc.md
-:start-after: "## Requirements"
-:end-before: "## Configure a new environment"
-:::
-
-::::
-
-::::{tab-item} Intel Gaudi
-:sync: hpu-gaudi
-
-:::{include} ai_accelerator/hpu-gaudi.inc.md
-:start-after: "## Requirements"
-:end-before: "## Configure a new environment"
-:::
-
-::::
-
-::::{tab-item} AWS Neuron
-:sync: neuron
-
-:::{include} ai_accelerator/neuron.inc.md
-:start-after: "## Requirements"
-:end-before: "## Configure a new environment"
-:::
-
-::::
-
-:::::
-
-## Configure a new environment
-
-:::::{tab-set}
-:sync-group: device
-
-::::{tab-item} Google TPU
-:sync: tpu
-
-:::{include} ai_accelerator/tpu.inc.md
-:start-after: "## Configure a new environment"
-:end-before: "## Set up using Python"
-:::
-
-::::
-
-::::{tab-item} Intel Gaudi
-:sync: hpu-gaudi
-
-:::{include} ai_accelerator/hpu-gaudi.inc.md
-:start-after: "## Configure a new environment"
-:end-before: "## Set up using Python"
-:::
-
-::::
-
-::::{tab-item} AWS Neuron
-:sync: neuron
-
-:::{include} ai_accelerator/neuron.inc.md
-:start-after: "## Configure a new environment"
-:end-before: "## Set up using Python"
-:::
-
-::::
-
-:::::
-
-## Set up using Python
-
-### Pre-built wheels
-
-:::::{tab-set}
-:sync-group: device
-
-::::{tab-item} Google TPU
-:sync: tpu
-
-:::{include} ai_accelerator/tpu.inc.md
-:start-after: "### Pre-built wheels"
-:end-before: "### Build wheel from source"
-:::
-
-::::
-
-::::{tab-item} Intel Gaudi
-:sync: hpu-gaudi
-
-:::{include} ai_accelerator/hpu-gaudi.inc.md
-:start-after: "### Pre-built wheels"
-:end-before: "### Build wheel from source"
-:::
-
-::::
-
-::::{tab-item} AWS Neuron
-:sync: neuron
-
-:::{include} ai_accelerator/neuron.inc.md
-:start-after: "### Pre-built wheels"
-:end-before: "### Build wheel from source"
-:::
-
-::::
-
-:::::
-
-### Build wheel from source
-
-:::::{tab-set}
-:sync-group: device
-
-::::{tab-item} Google TPU
-:sync: tpu
-
-:::{include} ai_accelerator/tpu.inc.md
-:start-after: "### Build wheel from source"
-:end-before: "## Set up using Docker"
-:::
-
-::::
-
-::::{tab-item} Intel Gaudi
-:sync: hpu-gaudi
-
-:::{include} ai_accelerator/hpu-gaudi.inc.md
-:start-after: "### Build wheel from source"
-:end-before: "## Set up using Docker"
-:::
-
-::::
-
-::::{tab-item} AWS Neuron
-:sync: neuron
-
-:::{include} ai_accelerator/neuron.inc.md
-:start-after: "### Build wheel from source"
-:end-before: "## Set up using Docker"
-:::
-
-::::
-
-:::::
-
-## Set up using Docker
-
-### Pre-built images
-
-:::::{tab-set}
-:sync-group: device
-
-::::{tab-item} Google TPU
-:sync: tpu
-
-:::{include} ai_accelerator/tpu.inc.md
-:start-after: "### Pre-built images"
-:end-before: "### Build image from source"
-:::
-
-::::
-
-::::{tab-item} Intel Gaudi
-:sync: hpu-gaudi
-
-:::{include} ai_accelerator/hpu-gaudi.inc.md
-:start-after: "### Pre-built images"
-:end-before: "### Build image from source"
-:::
-
-::::
-
-::::{tab-item} AWS Neuron
-:sync: neuron
-
-:::{include} ai_accelerator/neuron.inc.md
-:start-after: "### Pre-built images"
-:end-before: "### Build image from source"
-:::
-
-::::
-
-:::::
-
-### Build image from source
-
-:::::{tab-set}
-:sync-group: device
-
-::::{tab-item} Google TPU
-:sync: tpu
-
-:::{include} ai_accelerator/tpu.inc.md
-:start-after: "### Build image from source"
-:end-before: "## Extra information"
-:::
-
-::::
-
-::::{tab-item} Intel Gaudi
-:sync: hpu-gaudi
-
-:::{include} ai_accelerator/hpu-gaudi.inc.md
-:start-after: "### Build image from source"
-:end-before: "## Extra information"
-:::
-
-::::
-
-::::{tab-item} AWS Neuron
-:sync: neuron
-
-:::{include} ai_accelerator/neuron.inc.md
-:start-after: "### Build image from source"
-:end-before: "## Extra information"
-:::
-
-::::
-
-:::::
-
-## Extra information
-
-:::::{tab-set}
-:sync-group: device
-
-::::{tab-item} Google TPU
-:sync: tpu
-
-:::{include} ai_accelerator/tpu.inc.md
-:start-after: "## Extra information"
-:::
-
-::::
-
-::::{tab-item} Intel Gaudi
-:sync: hpu-gaudi
-
-:::{include} ai_accelerator/hpu-gaudi.inc.md
-:start-after: "## Extra information"
-:::
-
-::::
-
-::::{tab-item} AWS Neuron
-:sync: neuron
-
-:::{include} ai_accelerator/neuron.inc.md
-:start-after: "## Extra information"
-:::
-
-::::
-
-:::::
diff --git a/docs/source/getting_started/installation/cpu/arm.inc.md b/docs/source/getting_started/installation/cpu/arm.inc.md
deleted file mode 100644
index e7d8d60630dc0..0000000000000
--- a/docs/source/getting_started/installation/cpu/arm.inc.md
+++ /dev/null
@@ -1,34 +0,0 @@
-# Installation
-
-vLLM has been adapted to work on ARM64 CPUs with NEON support, leveraging the CPU backend initially developed for the x86 platform.
-
-ARM CPU backend currently supports Float32, FP16 and BFloat16 datatypes.
-
-:::{attention}
-There are no pre-built wheels or images for this device, so you must build vLLM from source.
-:::
-
-## Requirements
-
-- OS: Linux
-- Compiler: `gcc/g++ >= 12.3.0` (optional, recommended)
-- Instruction Set Architecture (ISA): NEON support is required
-
-## Set up using Python
-
-### Pre-built wheels
-
-### Build wheel from source
-
-:::{include} cpu/build.inc.md
-:::
-
-Testing has been conducted on AWS Graviton3 instances for compatibility.
-
-## Set up using Docker
-
-### Pre-built images
-
-### Build image from source
-
-## Extra information
diff --git a/docs/source/getting_started/installation/cpu/x86.inc.md b/docs/source/getting_started/installation/cpu/x86.inc.md
deleted file mode 100644
index 9ae2035db5433..0000000000000
--- a/docs/source/getting_started/installation/cpu/x86.inc.md
+++ /dev/null
@@ -1,41 +0,0 @@
-# Installation
-
-vLLM initially supports basic model inferencing and serving on x86 CPU platform, with data types FP32, FP16 and BF16.
-
-:::{attention}
-There are no pre-built wheels or images for this device, so you must build vLLM from source.
-:::
-
-## Requirements
-
-- OS: Linux
-- Compiler: `gcc/g++ >= 12.3.0` (optional, recommended)
-- Instruction Set Architecture (ISA): AVX512 (optional, recommended)
-
-:::{tip}
-[Intel Extension for PyTorch (IPEX)](https://github.com/intel/intel-extension-for-pytorch) extends PyTorch with up-to-date features optimizations for an extra performance boost on Intel hardware.
-:::
-
-## Set up using Python
-
-### Pre-built wheels
-
-### Build wheel from source
-
-:::{include} cpu/build.inc.md
-:::
-
-:::{note}
-- AVX512_BF16 is an extension ISA provides native BF16 data type conversion and vector product instructions, which brings some performance improvement compared with pure AVX512. The CPU backend build script will check the host CPU flags to determine whether to enable AVX512_BF16.
-- If you want to force enable AVX512_BF16 for the cross-compilation, please set environment variable `VLLM_CPU_AVX512BF16=1` before the building.
-:::
-
-## Set up using Docker
-
-### Pre-built images
-
-See [https://gallery.ecr.aws/q9t5s3a7/vllm-cpu-release-repo](https://gallery.ecr.aws/q9t5s3a7/vllm-cpu-release-repo)
-
-### Build image from source
-
-## Extra information
diff --git a/docs/source/getting_started/installation/gpu.md b/docs/source/getting_started/installation/gpu.md
deleted file mode 100644
index 22db992354fb1..0000000000000
--- a/docs/source/getting_started/installation/gpu.md
+++ /dev/null
@@ -1,301 +0,0 @@
-# GPU
-
-vLLM is a Python library that supports the following GPU variants. Select your GPU type to see vendor specific instructions:
-
-:::::{tab-set}
-:sync-group: device
-
-::::{tab-item} NVIDIA CUDA
-:selected:
-:sync: cuda
-
-:::{include} gpu/cuda.inc.md
-:start-after: "# Installation"
-:end-before: "## Requirements"
-:::
-
-::::
-
-::::{tab-item} AMD ROCm
-:sync: rocm
-
-:::{include} gpu/rocm.inc.md
-:start-after: "# Installation"
-:end-before: "## Requirements"
-:::
-
-::::
-
-::::{tab-item} Intel XPU
-:sync: xpu
-
-:::{include} gpu/xpu.inc.md
-:start-after: "# Installation"
-:end-before: "## Requirements"
-:::
-
-::::
-
-:::::
-
-## Requirements
-
-- OS: Linux
-- Python: 3.9 -- 3.12
-
-:::::{tab-set}
-:sync-group: device
-
-::::{tab-item} NVIDIA CUDA
-:sync: cuda
-
-:::{include} gpu/cuda.inc.md
-:start-after: "## Requirements"
-:end-before: "## Set up using Python"
-:::
-
-::::
-
-::::{tab-item} AMD ROCm
-:sync: rocm
-
-:::{include} gpu/rocm.inc.md
-:start-after: "## Requirements"
-:end-before: "## Set up using Python"
-:::
-
-::::
-
-::::{tab-item} Intel XPU
-:sync: xpu
-
-:::{include} gpu/xpu.inc.md
-:start-after: "## Requirements"
-:end-before: "## Set up using Python"
-:::
-
-::::
-
-:::::
-
-## Set up using Python
-
-### Create a new Python environment
-
-:::{include} python_env_setup.inc.md
-:::
-
-:::::{tab-set}
-:sync-group: device
-
-::::{tab-item} NVIDIA CUDA
-:sync: cuda
-
-:::{include} gpu/cuda.inc.md
-:start-after: "## Create a new Python environment"
-:end-before: "### Pre-built wheels"
-:::
-
-::::
-
-::::{tab-item} AMD ROCm
-:sync: rocm
-
-There is no extra information on creating a new Python environment for this device.
-
-::::
-
-::::{tab-item} Intel XPU
-:sync: xpu
-
-There is no extra information on creating a new Python environment for this device.
-
-::::
-
-:::::
-
-### Pre-built wheels
-
-:::::{tab-set}
-:sync-group: device
-
-::::{tab-item} NVIDIA CUDA
-:sync: cuda
-
-:::{include} gpu/cuda.inc.md
-:start-after: "### Pre-built wheels"
-:end-before: "### Build wheel from source"
-:::
-
-::::
-
-::::{tab-item} AMD ROCm
-:sync: rocm
-
-:::{include} gpu/rocm.inc.md
-:start-after: "### Pre-built wheels"
-:end-before: "### Build wheel from source"
-:::
-
-::::
-
-::::{tab-item} Intel XPU
-:sync: xpu
-
-:::{include} gpu/xpu.inc.md
-:start-after: "### Pre-built wheels"
-:end-before: "### Build wheel from source"
-:::
-
-::::
-
-:::::
-
-(build-from-source)=
-
-### Build wheel from source
-
-:::::{tab-set}
-:sync-group: device
-
-::::{tab-item} NVIDIA CUDA
-:sync: cuda
-
-:::{include} gpu/cuda.inc.md
-:start-after: "### Build wheel from source"
-:end-before: "## Set up using Docker"
-:::
-
-::::
-
-::::{tab-item} AMD ROCm
-:sync: rocm
-
-:::{include} gpu/rocm.inc.md
-:start-after: "### Build wheel from source"
-:end-before: "## Set up using Docker"
-:::
-
-::::
-
-::::{tab-item} Intel XPU
-:sync: xpu
-
-:::{include} gpu/xpu.inc.md
-:start-after: "### Build wheel from source"
-:end-before: "## Set up using Docker"
-:::
-
-::::
-
-:::::
-
-## Set up using Docker
-
-### Pre-built images
-
-:::::{tab-set}
-:sync-group: device
-
-::::{tab-item} NVIDIA CUDA
-:sync: cuda
-
-:::{include} gpu/cuda.inc.md
-:start-after: "### Pre-built images"
-:end-before: "### Build image from source"
-:::
-
-::::
-
-::::{tab-item} AMD ROCm
-:sync: rocm
-
-:::{include} gpu/rocm.inc.md
-:start-after: "### Pre-built images"
-:end-before: "### Build image from source"
-:::
-
-::::
-
-::::{tab-item} Intel XPU
-:sync: xpu
-
-:::{include} gpu/xpu.inc.md
-:start-after: "### Pre-built images"
-:end-before: "### Build image from source"
-:::
-
-::::
-
-:::::
-
-### Build image from source
-
-:::::{tab-set}
-:sync-group: device
-
-::::{tab-item} NVIDIA CUDA
-:sync: cuda
-
-:::{include} gpu/cuda.inc.md
-:start-after: "### Build image from source"
-:end-before: "## Supported features"
-:::
-
-::::
-
-::::{tab-item} AMD ROCm
-:sync: rocm
-
-:::{include} gpu/rocm.inc.md
-:start-after: "### Build image from source"
-:end-before: "## Supported features"
-:::
-
-::::
-
-::::{tab-item} Intel XPU
-:sync: xpu
-
-:::{include} gpu/xpu.inc.md
-:start-after: "### Build image from source"
-:end-before: "## Supported features"
-:::
-
-::::
-
-:::::
-
-## Supported features
-
-:::::{tab-set}
-:sync-group: device
-
-::::{tab-item} NVIDIA CUDA
-:sync: cuda
-
-:::{include} gpu/cuda.inc.md
-:start-after: "## Supported features"
-:::
-
-::::
-
-::::{tab-item} AMD ROCm
-:sync: rocm
-
-:::{include} gpu/rocm.inc.md
-:start-after: "## Supported features"
-:::
-
-::::
-
-::::{tab-item} Intel XPU
-:sync: xpu
-
-:::{include} gpu/xpu.inc.md
-:start-after: "## Supported features"
-:::
-
-::::
-
-:::::
diff --git a/docs/source/index.md b/docs/source/index.md
deleted file mode 100644
index db2192e87dcf2..0000000000000
--- a/docs/source/index.md
+++ /dev/null
@@ -1,217 +0,0 @@
-# Welcome to vLLM
-
-:::{figure} ./assets/logos/vllm-logo-text-light.png
-:align: center
-:alt: vLLM
-:class: no-scaled-link
-:width: 60%
-:::
-
-:::{raw} html
-<p style="text-align:center">
-<strong>Easy, fast, and cheap LLM serving for everyone
-</strong>
-</p>
-
-<p style="text-align:center">
-<script async defer src="https://buttons.github.io/buttons.js"></script>
-<a class="github-button" href="https://github.com/vllm-project/vllm" data-show-count="true" data-size="large" aria-label="Star">Star</a>
-<a class="github-button" href="https://github.com/vllm-project/vllm/subscription" data-icon="octicon-eye" data-size="large" aria-label="Watch">Watch</a>
-<a class="github-button" href="https://github.com/vllm-project/vllm/fork" data-icon="octicon-repo-forked" data-size="large" aria-label="Fork">Fork</a>
-</p>
-:::
-
-vLLM is a fast and easy-to-use library for LLM inference and serving.
-
-Originally developed in the [Sky Computing Lab](https://sky.cs.berkeley.edu) at UC Berkeley, vLLM has evolved into a community-driven project with contributions from both academia and industry.
-
-vLLM is fast with:
-
-- State-of-the-art serving throughput
-- Efficient management of attention key and value memory with [**PagedAttention**](https://blog.vllm.ai/2023/06/20/vllm.html)
-- Continuous batching of incoming requests
-- Fast model execution with CUDA/HIP graph
-- Quantization: [GPTQ](https://arxiv.org/abs/2210.17323), [AWQ](https://arxiv.org/abs/2306.00978), INT4, INT8, and FP8
-- Optimized CUDA kernels, including integration with FlashAttention and FlashInfer.
-- Speculative decoding
-- Chunked prefill
-
-vLLM is flexible and easy to use with:
-
-- Seamless integration with popular HuggingFace models
-- High-throughput serving with various decoding algorithms, including *parallel sampling*, *beam search*, and more
-- Tensor parallelism and pipeline parallelism support for distributed inference
-- Streaming outputs
-- OpenAI-compatible API server
-- Support NVIDIA GPUs, AMD CPUs and GPUs, Intel CPUs, Gaudi® accelerators and GPUs, IBM Power CPUs, TPU, and AWS Trainium and Inferentia Accelerators.
-- Prefix caching support
-- Multi-lora support
-
-For more information, check out the following:
-
-- [vLLM announcing blog post](https://vllm.ai) (intro to PagedAttention)
-- [vLLM paper](https://arxiv.org/abs/2309.06180) (SOSP 2023)
-- [How continuous batching enables 23x throughput in LLM inference while reducing p50 latency](https://www.anyscale.com/blog/continuous-batching-llm-inference) by Cade Daniel et al.
-- [vLLM Meetups](#meetups)
-
-## Documentation
-
-% How to start using vLLM?
-
-:::{toctree}
-:caption: Getting Started
-:maxdepth: 1
-
-getting_started/installation
-getting_started/quickstart
-getting_started/examples/examples_index
-getting_started/troubleshooting
-getting_started/faq
-getting_started/v1_user_guide
-
-:::
-
-% What does vLLM support?
-
-:::{toctree}
-:caption: Models
-:maxdepth: 1
-
-models/supported_models
-models/generative_models
-models/pooling_models
-models/extensions/index
-:::
-
-% Additional capabilities
-
-:::{toctree}
-:caption: Features
-:maxdepth: 1
-
-features/quantization/index
-features/multimodal_inputs
-features/prompt_embeds
-features/lora
-features/tool_calling
-features/reasoning_outputs
-features/structured_outputs
-features/automatic_prefix_caching
-features/disagg_prefill
-features/spec_decode
-features/compatibility_matrix
-:::
-
-% Details about running vLLM
-
-:::{toctree}
-:caption: Training
-:maxdepth: 1
-
-training/trl.md
-training/rlhf.md
-
-:::
-
-:::{toctree}
-:caption: Inference and Serving
-:maxdepth: 1
-
-serving/offline_inference
-serving/openai_compatible_server
-serving/serve_args
-serving/distributed_serving
-serving/metrics
-serving/engine_args
-serving/env_vars
-serving/usage_stats
-serving/integrations/index
-:::
-
-% Scaling up vLLM for production
-
-:::{toctree}
-:caption: Deployment
-:maxdepth: 1
-
-deployment/security
-deployment/docker
-deployment/k8s
-deployment/nginx
-deployment/frameworks/index
-deployment/integrations/index
-:::
-
-% Making the most out of vLLM
-
-:::{toctree}
-:caption: Performance
-:maxdepth: 1
-
-performance/optimization
-performance/benchmarks
-:::
-
-% Explanation of vLLM internals
-
-:::{toctree}
-:caption: Design Documents
-:maxdepth: 2
-
-design/arch_overview
-design/huggingface_integration
-design/plugin_system
-design/kernel/paged_attention
-design/mm_processing
-design/automatic_prefix_caching
-design/multiprocessing
-:::
-
-:::{toctree}
-:caption: V1 Design Documents
-:maxdepth: 2
-
-design/v1/torch_compile
-design/v1/prefix_caching
-design/v1/metrics
-:::
-
-% How to contribute to the vLLM project
-
-:::{toctree}
-:caption: Developer Guide
-:maxdepth: 2
-
-contributing/overview
-contributing/deprecation_policy
-contributing/profiling/profiling_index
-contributing/dockerfile/dockerfile
-contributing/model/index
-contributing/vulnerability_management
-:::
-
-% Technical API specifications
-
-:::{toctree}
-:caption: API Reference
-:maxdepth: 2
-
-api/summary
-api/vllm/vllm
-:::
-
-% Latest news and acknowledgements
-
-:::{toctree}
-:caption: Community
-:maxdepth: 1
-
-community/blog
-community/meetups
-community/sponsors
-:::
-
-## Indices and tables
-
-- {ref}`genindex`
-- {ref}`modindex`
diff --git a/docs/source/models/extensions/index.md b/docs/source/models/extensions/index.md
deleted file mode 100644
index cdcdaa5b35018..0000000000000
--- a/docs/source/models/extensions/index.md
+++ /dev/null
@@ -1,9 +0,0 @@
-# Built-in Extensions
-
-:::{toctree}
-:maxdepth: 1
-
-runai_model_streamer
-tensorizer
-fastsafetensor
-:::
diff --git a/docs/source/models/supported_models.md b/docs/source/models/supported_models.md
deleted file mode 100644
index 6022dfb9c2c6c..0000000000000
--- a/docs/source/models/supported_models.md
+++ /dev/null
@@ -1,1406 +0,0 @@
-(supported-models)=
-
-# Supported Models
-
-vLLM supports [generative](generative-models) and [pooling](pooling-models) models across various tasks.
-If a model supports more than one task, you can set the task via the `--task` argument.
-
-For each task, we list the model architectures that have been implemented in vLLM.
-Alongside each architecture, we include some popular models that use it.
-
-## Model Implementation
-
-### vLLM
-
-If vLLM natively supports a model, its implementation can be found in <gh-file:vllm/model_executor/models>.
-
-These models are what we list in <project:#supported-text-models> and <project:#supported-mm-models>.
-
-(transformers-backend)=
-
-### Transformers
-
-vLLM also supports model implementations that are available in Transformers. This does not currently work for all models, but most decoder language models are supported, and vision language model support is planned!
-
-To check if the modeling backend is Transformers, you can simply do this:
-
-```python
-from vllm import LLM
-llm = LLM(model=..., task="generate")  # Name or path of your model
-llm.apply_model(lambda model: print(type(model)))
-```
-
-If it is `TransformersForCausalLM` then it means it's based on Transformers!
-
-:::{tip}
-You can force the use of `TransformersForCausalLM` by setting `model_impl="transformers"` for <project:#offline-inference> or `--model-impl transformers` for the <project:#openai-compatible-server>.
-:::
-
-:::{note}
-vLLM may not fully optimise the Transformers implementation so you may see degraded performance if comparing a native model to a Transformers model in vLLM.
-:::
-
-#### Custom models
-
-If a model is neither supported natively by vLLM or Transformers, it can still be used in vLLM!
-
-For a model to be compatible with the Transformers backend for vLLM it must:
-
-- be a Transformers compatible custom model (see [Transformers - Customizing models](https://huggingface.co/docs/transformers/en/custom_models)):
-  * The model directory must have the correct structure (e.g. `config.json` is present).
-  * `config.json` must contain `auto_map.AutoModel`.
-- be a Transformers backend for vLLM compatible model (see <project:#writing-custom-models>):
-  * Customisation should be done in the base model (e.g. in `MyModel`, not `MyModelForCausalLM`).
-
-If the compatible model is:
-
-- on the Hugging Face Model Hub, simply set `trust_remote_code=True` for <project:#offline-inference> or `--trust-remote-code` for the <project:#openai-compatible-server>.
-- in a local directory, simply pass directory path to `model=<MODEL_DIR>` for <project:#offline-inference> or `vllm serve <MODEL_DIR>` for the <project:#openai-compatible-server>.
-
-This means that, with the Transformers backend for vLLM, new models can be used before they are officially supported in Transformers or vLLM!
-
-(writing-custom-models)=
-
-#### Writing custom models
-
-This section details the necessary modifications to make to a Transformers compatible custom model that make it compatible with the Transformers backend for vLLM. (We assume that a Transformers compatible custom model has already been created, see [Transformers - Customizing models](https://huggingface.co/docs/transformers/en/custom_models)).
-
-To make your model compatible with the Transformers backend, it needs:
-
-1. `kwargs` passed down through all modules from `MyModel` to `MyAttention`.
-2. `MyAttention` must use `ALL_ATTENTION_FUNCTIONS` to call attention.
-3. `MyModel` must contain `_supports_attention_backend = True`.
-
-```{code-block} python
-:caption: modeling_my_model.py
-
-from transformers import PreTrainedModel
-from torch import nn
-
-class MyAttention(nn.Module):
-
-  def forward(self, hidden_states, **kwargs):
-    ...
-    attention_interface = ALL_ATTENTION_FUNCTIONS[self.config._attn_implementation]
-    attn_output, attn_weights = attention_interface(
-      self,
-      query_states,
-      key_states,
-      value_states,
-      **kwargs,
-    )
-    ...
-
-class MyModel(PreTrainedModel):
-  _supports_attention_backend = True
-```
-
-Here is what happens in the background when this model is loaded:
-
-1. The config is loaded.
-2. `MyModel` Python class is loaded from the `auto_map` in config, and we check that the model `is_backend_compatible()`.
-3. `MyModel` is loaded into `TransformersForCausalLM` (see <gh-file:vllm/model_executor/models/transformers.py>) which sets `self.config._attn_implementation = "vllm"` so that vLLM's attention layer is used.
-
-That's it!
-
-For your model to be compatible with vLLM's tensor parallel and/or pipeline parallel features, you must add `base_model_tp_plan` and/or `base_model_pp_plan` to your model's config class:
-
-```{code-block} python
-:caption: configuration_my_model.py
-
-from transformers import PretrainedConfig
-
-class MyConfig(PretrainedConfig):
-  base_model_tp_plan = {
-    "layers.*.self_attn.k_proj": "colwise",
-    "layers.*.self_attn.v_proj": "colwise",
-    "layers.*.self_attn.o_proj": "rowwise",
-    "layers.*.mlp.gate_proj": "colwise",
-    "layers.*.mlp.up_proj": "colwise",
-    "layers.*.mlp.down_proj": "rowwise",
-  }
-  base_model_pp_plan = {
-    "embed_tokens": (["input_ids"], ["inputs_embeds"]),
-    "layers": (["hidden_states", "attention_mask"], ["hidden_states"]),
-    "norm": (["hidden_states"], ["hidden_states"]),
-  }
-```
-
-- `base_model_tp_plan` is a `dict` that maps fully qualified layer name patterns to tensor parallel styles (currently only `"colwise"` and `"rowwise"` are supported).
-- `base_model_pp_plan` is a `dict` that maps direct child layer names to `tuple`s of `list`s of `str`s:
-  * You only need to do this for layers which are not present on all pipeline stages
-  * vLLM assumes that there will be only one `nn.ModuleList`, which is distributed across the pipeline stages
-  * The `list` in the first element of the `tuple` contains the names of the input arguments
-  * The `list` in the last element of the `tuple` contains the names of the variables the layer outputs to in your modeling code
-
-## Loading a Model
-
-### Hugging Face Hub
-
-By default, vLLM loads models from [Hugging Face (HF) Hub](https://huggingface.co/models). To change the download path for models, you can set the `HF_HOME` environment variable; for more details, refer to [their official documentation](https://huggingface.co/docs/huggingface_hub/package_reference/environment_variables#hfhome).
-
-To determine whether a given model is natively supported, you can check the `config.json` file inside the HF repository.
-If the `"architectures"` field contains a model architecture listed below, then it should be natively supported.
-
-Models do not _need_ to be natively supported to be used in vLLM.
-The [Transformers backend](#transformers-backend) enables you to run models directly using their Transformers implementation (or even remote code on the Hugging Face Model Hub!).
-
-:::{tip}
-The easiest way to check if your model is really supported at runtime is to run the program below:
-
-```python
-from vllm import LLM
-
-# For generative models (task=generate) only
-llm = LLM(model=..., task="generate")  # Name or path of your model
-output = llm.generate("Hello, my name is")
-print(output)
-
-# For pooling models (task={embed,classify,reward,score}) only
-llm = LLM(model=..., task="embed")  # Name or path of your model
-output = llm.encode("Hello, my name is")
-print(output)
-```
-
-If vLLM successfully returns text (for generative models) or hidden states (for pooling models), it indicates that your model is supported.
-:::
-
-Otherwise, please refer to [Adding a New Model](#new-model) for instructions on how to implement your model in vLLM.
-Alternatively, you can [open an issue on GitHub](https://github.com/vllm-project/vllm/issues/new/choose) to request vLLM support.
-
-#### Download a model
-
-If you prefer, you can use the Hugging Face CLI to [download a model](https://huggingface.co/docs/huggingface_hub/guides/cli#huggingface-cli-download) or specific files from a model repository:
-
-```console
-# Download a model
-huggingface-cli download HuggingFaceH4/zephyr-7b-beta
-
-# Specify a custom cache directory
-huggingface-cli download HuggingFaceH4/zephyr-7b-beta --cache-dir ./path/to/cache
-
-# Download a specific file from a model repo
-huggingface-cli download HuggingFaceH4/zephyr-7b-beta eval_results.json
-```
-
-#### List the downloaded models
-
-Use the Hugging Face CLI to [manage models](https://huggingface.co/docs/huggingface_hub/guides/manage-cache#scan-your-cache) stored in local cache:
-
-```console
-# List cached models
-huggingface-cli scan-cache
-
-# Show detailed (verbose) output
-huggingface-cli scan-cache -v
-
-# Specify a custom cache directory
-huggingface-cli scan-cache --dir ~/.cache/huggingface/hub
-```
-
-#### Delete a cached model
-
-Use the Hugging Face CLI to interactively [delete downloaded model](https://huggingface.co/docs/huggingface_hub/guides/manage-cache#clean-your-cache) from the cache:
-
-```console
-# The `delete-cache` command requires extra dependencies to work with the TUI.
-# Please run `pip install huggingface_hub[cli]` to install them.
-
-# Launch the interactive TUI to select models to delete
-$ huggingface-cli delete-cache
-? Select revisions to delete: 1 revisions selected counting for 438.9M.
-  ○ None of the following (if selected, nothing will be deleted).
-Model BAAI/bge-base-en-v1.5 (438.9M, used 1 week ago)
-❯ ◉ a5beb1e3: main # modified 1 week ago
-
-Model BAAI/bge-large-en-v1.5 (1.3G, used 1 week ago)
-  ○ d4aa6901: main # modified 1 week ago
-
-Model BAAI/bge-reranker-base (1.1G, used 4 weeks ago)
-  ○ 2cfc18c9: main # modified 4 weeks ago
-
-Press <space> to select, <enter> to validate and <ctrl+c> to quit without modification.
-
-# Need to confirm after selected
-? Select revisions to delete: 1 revision(s) selected.
-? 1 revisions selected counting for 438.9M. Confirm deletion ? Yes
-Start deletion.
-Done. Deleted 1 repo(s) and 0 revision(s) for a total of 438.9M.
-```
-
-#### Using a proxy
-
-Here are some tips for loading/downloading models from Hugging Face using a proxy:
-
-- Set the proxy globally for your session (or set it in the profile file):
-
-```shell
-export http_proxy=http://your.proxy.server:port
-export https_proxy=http://your.proxy.server:port
-```
-
-- Set the proxy for just the current command:
-
-```shell
-https_proxy=http://your.proxy.server:port huggingface-cli download <model_name>
-
-# or use vllm cmd directly
-https_proxy=http://your.proxy.server:port  vllm serve <model_name> --disable-log-requests
-```
-
-- Set the proxy in Python interpreter:
-
-```python
-import os
-
-os.environ['http_proxy'] = 'http://your.proxy.server:port'
-os.environ['https_proxy'] = 'http://your.proxy.server:port'
-```
-
-### ModelScope
-
-To use models from [ModelScope](https://www.modelscope.cn) instead of Hugging Face Hub, set an environment variable:
-
-```shell
-export VLLM_USE_MODELSCOPE=True
-```
-
-And use with `trust_remote_code=True`.
-
-```python
-from vllm import LLM
-
-llm = LLM(model=..., revision=..., task=..., trust_remote_code=True)
-
-# For generative models (task=generate) only
-output = llm.generate("Hello, my name is")
-print(output)
-
-# For pooling models (task={embed,classify,reward,score}) only
-output = llm.encode("Hello, my name is")
-print(output)
-```
-
-(feature-status-legend)=
-
-## Feature Status Legend
-
-- ✅︎ indicates that the feature is supported for the model.
-
-- 🚧 indicates that the feature is planned but not yet supported for the model.
-
-- ⚠️ indicates that the feature is available but may have known issues or limitations.
-
-(supported-text-models)=
-
-## List of Text-only Language Models
-
-### Generative Models
-
-See [this page](#generative-models) for more information on how to use generative models.
-
-#### Text Generation
-
-Specified using `--task generate`.
-
-:::{list-table}
-:widths: 25 25 50 5 5
-:header-rows: 1
-
-- * Architecture
-  * Models
-  * Example HF Models
-  * [LoRA](#lora-adapter)
-  * [PP](#distributed-serving)
-- * `AquilaForCausalLM`
-  * Aquila, Aquila2
-  * `BAAI/Aquila-7B`, `BAAI/AquilaChat-7B`, etc.
-  * ✅︎
-  * ✅︎
-- * `ArcticForCausalLM`
-  * Arctic
-  * `Snowflake/snowflake-arctic-base`, `Snowflake/snowflake-arctic-instruct`, etc.
-  *
-  * ✅︎
-- * `BaiChuanForCausalLM`
-  * Baichuan2, Baichuan
-  * `baichuan-inc/Baichuan2-13B-Chat`, `baichuan-inc/Baichuan-7B`, etc.
-  * ✅︎
-  * ✅︎
-- * `BambaForCausalLM`
-  * Bamba
-  * `ibm-ai-platform/Bamba-9B-fp8`, `ibm-ai-platform/Bamba-9B`
-  *
-  *
-- * `BloomForCausalLM`
-  * BLOOM, BLOOMZ, BLOOMChat
-  * `bigscience/bloom`, `bigscience/bloomz`, etc.
-  *
-  * ✅︎
-- * `BartForConditionalGeneration`
-  * BART
-  * `facebook/bart-base`, `facebook/bart-large-cnn`, etc.
-  *
-  *
-- * `ChatGLMModel`, `ChatGLMForConditionalGeneration`
-  * ChatGLM
-  * `THUDM/chatglm2-6b`, `THUDM/chatglm3-6b`, `ShieldLM-6B-chatglm3`, etc.
-  * ✅︎
-  * ✅︎
-- * `CohereForCausalLM`, `Cohere2ForCausalLM`
-  * Command-R
-  * `CohereForAI/c4ai-command-r-v01`, `CohereForAI/c4ai-command-r7b-12-2024`, etc.
-  * ✅︎
-  * ✅︎
-- * `DbrxForCausalLM`
-  * DBRX
-  * `databricks/dbrx-base`, `databricks/dbrx-instruct`, etc.
-  *
-  * ✅︎
-- * `DeciLMForCausalLM`
-  * DeciLM
-  * `nvidia/Llama-3_3-Nemotron-Super-49B-v1`, etc.
-  *
-  * ✅︎
-- * `DeepseekForCausalLM`
-  * DeepSeek
-  * `deepseek-ai/deepseek-llm-67b-base`, `deepseek-ai/deepseek-llm-7b-chat` etc.
-  *
-  * ✅︎
-- * `DeepseekV2ForCausalLM`
-  * DeepSeek-V2
-  * `deepseek-ai/DeepSeek-V2`, `deepseek-ai/DeepSeek-V2-Chat` etc.
-  *
-  * ✅︎
-- * `DeepseekV3ForCausalLM`
-  * DeepSeek-V3
-  * `deepseek-ai/DeepSeek-V3-Base`, `deepseek-ai/DeepSeek-V3` etc.
-  *
-  * ✅︎
-- * `ExaoneForCausalLM`
-  * EXAONE-3
-  * `LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct`, etc.
-  * ✅︎
-  * ✅︎
-- * `FalconForCausalLM`
-  * Falcon
-  * `tiiuae/falcon-7b`, `tiiuae/falcon-40b`, `tiiuae/falcon-rw-7b`, etc.
-  *
-  * ✅︎
-- * `FalconMambaForCausalLM`
-  * FalconMamba
-  * `tiiuae/falcon-mamba-7b`, `tiiuae/falcon-mamba-7b-instruct`, etc.
-  * ✅︎
-  * ✅︎
-- * `FalconH1ForCausalLM`
-  * Falcon-H1
-  * `tiiuae/Falcon-H1-34B-Base`, `tiiuae/Falcon-H1-34B-Instruct`, etc.
-  * ✅︎
-  * ✅︎
-- * `GemmaForCausalLM`
-  * Gemma
-  * `google/gemma-2b`, `google/gemma-1.1-2b-it`, etc.
-  * ✅︎
-  * ✅︎
-- * `Gemma2ForCausalLM`
-  * Gemma 2
-  * `google/gemma-2-9b`, `google/gemma-2-27b`, etc.
-  * ✅︎
-  * ✅︎
-- * `Gemma3ForCausalLM`
-  * Gemma 3
-  * `google/gemma-3-1b-it`, etc.
-  * ✅︎
-  * ✅︎
-- * `GlmForCausalLM`
-  * GLM-4
-  * `THUDM/glm-4-9b-chat-hf`, etc.
-  * ✅︎
-  * ✅︎
-- * `Glm4ForCausalLM`
-  * GLM-4-0414
-  * `THUDM/GLM-4-32B-0414`, etc.
-  * ✅︎
-  * ✅︎
-- * `GPT2LMHeadModel`
-  * GPT-2
-  * `gpt2`, `gpt2-xl`, etc.
-  *
-  * ✅︎
-- * `GPTBigCodeForCausalLM`
-  * StarCoder, SantaCoder, WizardCoder
-  * `bigcode/starcoder`, `bigcode/gpt_bigcode-santacoder`, `WizardLM/WizardCoder-15B-V1.0`, etc.
-  * ✅︎
-  * ✅︎
-- * `GPTJForCausalLM`
-  * GPT-J
-  * `EleutherAI/gpt-j-6b`, `nomic-ai/gpt4all-j`, etc.
-  *
-  * ✅︎
-- * `GPTNeoXForCausalLM`
-  * GPT-NeoX, Pythia, OpenAssistant, Dolly V2, StableLM
-  * `EleutherAI/gpt-neox-20b`, `EleutherAI/pythia-12b`, `OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5`, `databricks/dolly-v2-12b`, `stabilityai/stablelm-tuned-alpha-7b`, etc.
-  *
-  * ✅︎
-- * `GraniteForCausalLM`
-  * Granite 3.0, Granite 3.1, PowerLM
-  * `ibm-granite/granite-3.0-2b-base`, `ibm-granite/granite-3.1-8b-instruct`, `ibm/PowerLM-3b`, etc.
-  * ✅︎
-  * ✅︎
-- * `GraniteMoeForCausalLM`
-  * Granite 3.0 MoE, PowerMoE
-  * `ibm-granite/granite-3.0-1b-a400m-base`, `ibm-granite/granite-3.0-3b-a800m-instruct`, `ibm/PowerMoE-3b`, etc.
-  * ✅︎
-  * ✅︎
-- * `GraniteMoeHybridForCausalLM`
-  * Granite 4.0 MoE Hybrid
-  * `ibm-granite/granite-4.0-tiny-preview`, etc.
-  * ✅︎
-  * ✅︎
-- * `GraniteMoeSharedForCausalLM`
-  * Granite MoE Shared
-  * `ibm-research/moe-7b-1b-active-shared-experts` (test model)
-  * ✅︎
-  * ✅︎
-- * `GritLM`
-  * GritLM
-  * `parasail-ai/GritLM-7B-vllm`.
-  * ✅︎
-  * ✅︎
-- * `Grok1ModelForCausalLM`
-  * Grok1
-  * `hpcai-tech/grok-1`.
-  * ✅︎
-  * ✅︎
-- * `InternLMForCausalLM`
-  * InternLM
-  * `internlm/internlm-7b`, `internlm/internlm-chat-7b`, etc.
-  * ✅︎
-  * ✅︎
-- * `InternLM2ForCausalLM`
-  * InternLM2
-  * `internlm/internlm2-7b`, `internlm/internlm2-chat-7b`, etc.
-  * ✅︎
-  * ✅︎
-- * `InternLM3ForCausalLM`
-  * InternLM3
-  * `internlm/internlm3-8b-instruct`, etc.
-  * ✅︎
-  * ✅︎
-- * `JAISLMHeadModel`
-  * Jais
-  * `inceptionai/jais-13b`, `inceptionai/jais-13b-chat`, `inceptionai/jais-30b-v3`, `inceptionai/jais-30b-chat-v3`, etc.
-  *
-  * ✅︎
-- * `JambaForCausalLM`
-  * Jamba
-  * `ai21labs/AI21-Jamba-1.5-Large`, `ai21labs/AI21-Jamba-1.5-Mini`, `ai21labs/Jamba-v0.1`, etc.
-  * ✅︎
-  * ✅︎
-- * `LlamaForCausalLM`
-  * Llama 3.1, Llama 3, Llama 2, LLaMA, Yi
-  * `meta-llama/Meta-Llama-3.1-405B-Instruct`, `meta-llama/Meta-Llama-3.1-70B`, `meta-llama/Meta-Llama-3-70B-Instruct`, `meta-llama/Llama-2-70b-hf`, `01-ai/Yi-34B`, etc.
-  * ✅︎
-  * ✅︎
-- * `MambaForCausalLM`
-  * Mamba
-  * `state-spaces/mamba-130m-hf`, `state-spaces/mamba-790m-hf`, `state-spaces/mamba-2.8b-hf`, etc.
-  *
-  * ✅︎
-- * `MiniCPMForCausalLM`
-  * MiniCPM
-  * `openbmb/MiniCPM-2B-sft-bf16`, `openbmb/MiniCPM-2B-dpo-bf16`, `openbmb/MiniCPM-S-1B-sft`, etc.
-  * ✅︎
-  * ✅︎
-- * `MiniCPM3ForCausalLM`
-  * MiniCPM3
-  * `openbmb/MiniCPM3-4B`, etc.
-  * ✅︎
-  * ✅︎
-- * `MistralForCausalLM`
-  * Mistral, Mistral-Instruct
-  * `mistralai/Mistral-7B-v0.1`, `mistralai/Mistral-7B-Instruct-v0.1`, etc.
-  * ✅︎
-  * ✅︎
-- * `MixtralForCausalLM`
-  * Mixtral-8x7B, Mixtral-8x7B-Instruct
-  * `mistralai/Mixtral-8x7B-v0.1`, `mistralai/Mixtral-8x7B-Instruct-v0.1`, `mistral-community/Mixtral-8x22B-v0.1`, etc.
-  * ✅︎
-  * ✅︎
-- * `MPTForCausalLM`
-  * MPT, MPT-Instruct, MPT-Chat, MPT-StoryWriter
-  * `mosaicml/mpt-7b`, `mosaicml/mpt-7b-storywriter`, `mosaicml/mpt-30b`, etc.
-  *
-  * ✅︎
-- * `NemotronForCausalLM`
-  * Nemotron-3, Nemotron-4, Minitron
-  * `nvidia/Minitron-8B-Base`, `mgoin/Nemotron-4-340B-Base-hf-FP8`, etc.
-  * ✅︎
-  * ✅︎
-- * `OLMoForCausalLM`
-  * OLMo
-  * `allenai/OLMo-1B-hf`, `allenai/OLMo-7B-hf`, etc.
-  *
-  * ✅︎
-- * `OLMo2ForCausalLM`
-  * OLMo2
-  * `allenai/OLMo-2-0425-1B`, etc.
-  *
-  * ✅︎
-- * `OLMoEForCausalLM`
-  * OLMoE
-  * `allenai/OLMoE-1B-7B-0924`, `allenai/OLMoE-1B-7B-0924-Instruct`, etc.
-  * ✅︎
-  * ✅︎
-- * `OPTForCausalLM`
-  * OPT, OPT-IML
-  * `facebook/opt-66b`, `facebook/opt-iml-max-30b`, etc.
-  *
-  * ✅︎
-- * `OrionForCausalLM`
-  * Orion
-  * `OrionStarAI/Orion-14B-Base`, `OrionStarAI/Orion-14B-Chat`, etc.
-  *
-  * ✅︎
-- * `PhiForCausalLM`
-  * Phi
-  * `microsoft/phi-1_5`, `microsoft/phi-2`, etc.
-  * ✅︎
-  * ✅︎
-- * `Phi3ForCausalLM`
-  * Phi-4, Phi-3
-  * `microsoft/Phi-4-mini-instruct`, `microsoft/Phi-4`, `microsoft/Phi-3-mini-4k-instruct`, `microsoft/Phi-3-mini-128k-instruct`, `microsoft/Phi-3-medium-128k-instruct`, etc.
-  * ✅︎
-  * ✅︎
-- * `Phi3SmallForCausalLM`
-  * Phi-3-Small
-  * `microsoft/Phi-3-small-8k-instruct`, `microsoft/Phi-3-small-128k-instruct`, etc.
-  *
-  * ✅︎
-- * `PhiMoEForCausalLM`
-  * Phi-3.5-MoE
-  * `microsoft/Phi-3.5-MoE-instruct`, etc.
-  * ✅︎
-  * ✅︎
-- * `PersimmonForCausalLM`
-  * Persimmon
-  * `adept/persimmon-8b-base`, `adept/persimmon-8b-chat`, etc.
-  *
-  * ✅︎
-- * `Plamo2ForCausalLM`
-  * PLaMo2
-  * `pfnet/plamo-2-1b`, `pfnet/plamo-2-8b`, etc.
-  *
-  *
-- * `QWenLMHeadModel`
-  * Qwen
-  * `Qwen/Qwen-7B`, `Qwen/Qwen-7B-Chat`, etc.
-  * ✅︎
-  * ✅︎
-- * `Qwen2ForCausalLM`
-  * QwQ, Qwen2
-  * `Qwen/QwQ-32B-Preview`, `Qwen/Qwen2-7B-Instruct`, `Qwen/Qwen2-7B`, etc.
-  * ✅︎
-  * ✅︎
-- * `Qwen2MoeForCausalLM`
-  * Qwen2MoE
-  * `Qwen/Qwen1.5-MoE-A2.7B`, `Qwen/Qwen1.5-MoE-A2.7B-Chat`, etc.
-  *
-  * ✅︎
-- * `Qwen3ForCausalLM`
-  * Qwen3
-  * `Qwen/Qwen3-8B`, etc.
-  * ✅︎
-  * ✅︎
-- * `Qwen3MoeForCausalLM`
-  * Qwen3MoE
-  * `Qwen/Qwen3-30B-A3B`, etc.
-  *
-  * ✅︎
-- * `StableLmForCausalLM`
-  * StableLM
-  * `stabilityai/stablelm-3b-4e1t`, `stabilityai/stablelm-base-alpha-7b-v2`, etc.
-  *
-  * ✅︎
-- * `Starcoder2ForCausalLM`
-  * Starcoder2
-  * `bigcode/starcoder2-3b`, `bigcode/starcoder2-7b`, `bigcode/starcoder2-15b`, etc.
-  *
-  * ✅︎
-- * `SolarForCausalLM`
-  * Solar Pro
-  * `upstage/solar-pro-preview-instruct`, etc.
-  * ✅︎
-  * ✅︎
-- * `TeleChat2ForCausalLM`
-  * TeleChat2
-  * `Tele-AI/TeleChat2-3B`, `Tele-AI/TeleChat2-7B`, `Tele-AI/TeleChat2-35B`, etc.
-  * ✅︎
-  * ✅︎
-- * `TeleFLMForCausalLM`
-  * TeleFLM
-  * `CofeAI/FLM-2-52B-Instruct-2407`, `CofeAI/Tele-FLM`, etc.
-  * ✅︎
-  * ✅︎
-- * `XverseForCausalLM`
-  * XVERSE
-  * `xverse/XVERSE-7B-Chat`, `xverse/XVERSE-13B-Chat`, `xverse/XVERSE-65B-Chat`, etc.
-  * ✅︎
-  * ✅︎
-- * `MiniMaxText01ForCausalLM`
-  * MiniMax-Text
-  * `MiniMaxAI/MiniMax-Text-01`, etc.
-  *
-  * ✅︎
-- * `Zamba2ForCausalLM`
-  * Zamba2
-  * `Zyphra/Zamba2-7B-instruct`, `Zyphra/Zamba2-2.7B-instruct`, `Zyphra/Zamba2-1.2B-instruct`, etc.
-  *
-  *
-- * `MiMoForCausalLM`
-  * MiMo
-  * `XiaomiMiMo/MiMo-7B-RL`, etc.
-  *
-  *
-:::
-
-:::{note}
-Currently, the ROCm version of vLLM supports Mistral and Mixtral only for context lengths up to 4096.
-:::
-
-### Pooling Models
-
-See [this page](pooling-models) for more information on how to use pooling models.
-
-:::{important}
-Since some model architectures support both generative and pooling tasks,
-you should explicitly specify the task type to ensure that the model is used in pooling mode instead of generative mode.
-:::
-
-#### Text Embedding
-
-Specified using `--task embed`.
-
-:::{list-table}
-:widths: 25 25 50 5 5
-:header-rows: 1
-
-- * Architecture
-  * Models
-  * Example HF Models
-  * [LoRA](#lora-adapter)
-  * [PP](#distributed-serving)
-- * `BertModel`
-  * BERT-based
-  * `BAAI/bge-base-en-v1.5`, `Snowflake/snowflake-arctic-embed-xs`, etc.
-  *
-  *
-- * `Gemma2Model`
-  * Gemma 2-based
-  * `BAAI/bge-multilingual-gemma2`, etc.
-  *
-  * ✅︎
-- * `GritLM`
-  * GritLM
-  * `parasail-ai/GritLM-7B-vllm`.
-  * ✅︎
-  * ✅︎
-- * `GteModel`
-  * Arctic-Embed-2.0-M
-  * `Snowflake/snowflake-arctic-embed-m-v2.0`.
-  *
-  * ︎
-- * `GteNewModel`
-  * mGTE-TRM (see note)
-  * `Alibaba-NLP/gte-multilingual-base`, etc.
-  * ︎
-  * ︎
-- * `ModernBertModel`
-  * ModernBERT-based
-  * `Alibaba-NLP/gte-modernbert-base`, etc.
-  * ︎
-  * ︎
-- * `NomicBertModel`
-  * Nomic BERT
-  * `nomic-ai/nomic-embed-text-v1`, `nomic-ai/nomic-embed-text-v2-moe`, `Snowflake/snowflake-arctic-embed-m-long`, etc.
-  * ︎
-  * ︎
-- * `LlamaModel`, `LlamaForCausalLM`, `MistralModel`, etc.
-  * Llama-based
-  * `intfloat/e5-mistral-7b-instruct`, etc.
-  * ✅︎
-  * ✅︎
-- * `Qwen2Model`, `Qwen2ForCausalLM`
-  * Qwen2-based
-  * `ssmits/Qwen2-7B-Instruct-embed-base` (see note), `Alibaba-NLP/gte-Qwen2-7B-instruct` (see note), etc.
-  * ✅︎
-  * ✅︎
-- * `RobertaModel`, `RobertaForMaskedLM`
-  * RoBERTa-based
-  * `sentence-transformers/all-roberta-large-v1`, etc.
-  *
-  *
-- * `XLMRobertaModel`
-  * XLM-RoBERTa-based
-  * `intfloat/multilingual-e5-large`, `jinaai/jina-reranker-v2-base-multilingual`, `Snowflake/snowflake-arctic-embed-l-v2.0`, `jinaai/jina-embeddings-v3`(see note), etc.
-  *
-  *
-:::
-
-:::{note}
-`ssmits/Qwen2-7B-Instruct-embed-base` has an improperly defined Sentence Transformers config.
-You should manually set mean pooling by passing `--override-pooler-config '{"pooling_type": "MEAN"}'`.
-:::
-
-:::{note}
-The HF implementation of `Alibaba-NLP/gte-Qwen2-1.5B-instruct` is hardcoded to use causal attention despite what is shown in `config.json`. To compare vLLM vs HF results,
-you should set `--hf-overrides '{"is_causal": true}'` in vLLM so that the two implementations are consistent with each other.
-
-For both the 1.5B and 7B variants, you also need to enable `--trust-remote-code` for the correct tokenizer to be loaded.
-See [relevant issue on HF Transformers](https://github.com/huggingface/transformers/issues/34882).
-:::
-
-:::{note}
-`jinaai/jina-embeddings-v3` supports multiple tasks through lora, while vllm temporarily only supports text-matching tasks by merging lora weights.
-:::
-
-:::{note}
-The second-generation GTE model (mGTE-TRM) is named `NewModel`. The name `NewModel` is too generic, you should set `--hf-overrides '{"architectures": ["GteNewModel"]}'` to specify the use of the `GteNewModel` architecture.
-:::
-
-If your model is not in the above list, we will try to automatically convert the model using
-{func}`~vllm.model_executor.models.adapters.as_embedding_model`. By default, the embeddings
-of the whole prompt are extracted from the normalized hidden state corresponding to the last token.
-
-#### Reward Modeling
-
-Specified using `--task reward`.
-
-:::{list-table}
-:widths: 25 25 50 5 5
-:header-rows: 1
-
-- * Architecture
-  * Models
-  * Example HF Models
-  * [LoRA](#lora-adapter)
-  * [PP](#distributed-serving)
-- * `InternLM2ForRewardModel`
-  * InternLM2-based
-  * `internlm/internlm2-1_8b-reward`, `internlm/internlm2-7b-reward`, etc.
-  * ✅︎
-  * ✅︎
-- * `LlamaForCausalLM`
-  * Llama-based
-  * `peiyi9979/math-shepherd-mistral-7b-prm`, etc.
-  * ✅︎
-  * ✅︎
-- * `Qwen2ForRewardModel`
-  * Qwen2-based
-  * `Qwen/Qwen2.5-Math-RM-72B`, etc.
-  * ✅︎
-  * ✅︎
-- * `Qwen2ForProcessRewardModel`
-  * Qwen2-based
-  * `Qwen/Qwen2.5-Math-PRM-7B`, `Qwen/Qwen2.5-Math-PRM-72B`, etc.
-  * ✅︎
-  * ✅︎
-:::
-
-If your model is not in the above list, we will try to automatically convert the model using
-{func}`~vllm.model_executor.models.adapters.as_reward_model`. By default, we return the hidden states of each token directly.
-
-:::{important}
-For process-supervised reward models such as `peiyi9979/math-shepherd-mistral-7b-prm`, the pooling config should be set explicitly,
-e.g.: `--override-pooler-config '{"pooling_type": "STEP", "step_tag_id": 123, "returned_token_ids": [456, 789]}'`.
-:::
-
-#### Classification
-
-Specified using `--task classify`.
-
-:::{list-table}
-:widths: 25 25 50 5 5
-:header-rows: 1
-
-- * Architecture
-  * Models
-  * Example HF Models
-  * [LoRA](#lora-adapter)
-  * [PP](#distributed-serving)
-- * `JambaForSequenceClassification`
-  * Jamba
-  * `ai21labs/Jamba-tiny-reward-dev`, etc.
-  * ✅︎
-  * ✅︎
-- * `Qwen2ForSequenceClassification`
-  * Qwen2-based
-  * `jason9693/Qwen2.5-1.5B-apeach`, etc.
-  * ✅︎
-  * ✅︎
-:::
-
-If your model is not in the above list, we will try to automatically convert the model using
-{func}`~vllm.model_executor.models.adapters.as_classification_model`. By default, the class probabilities are extracted from the softmaxed hidden state corresponding to the last token.
-
-#### Sentence Pair Scoring
-
-Specified using `--task score`.
-
-:::{list-table}
-:widths: 25 25 50 5 5
-:header-rows: 1
-
-- * Architecture
-  * Models
-  * Example HF Models
-  * [LoRA](#lora-adapter)
-  * [PP](#distributed-serving)
-- * `BertForSequenceClassification`
-  * BERT-based
-  * `cross-encoder/ms-marco-MiniLM-L-6-v2`, etc.
-  *
-  *
-- * `RobertaForSequenceClassification`
-  * RoBERTa-based
-  * `cross-encoder/quora-roberta-base`, etc.
-  *
-  *
-- * `XLMRobertaForSequenceClassification`
-  * XLM-RoBERTa-based
-  * `BAAI/bge-reranker-v2-m3`, etc.
-  *
-  *
-- * `ModernBertForSequenceClassification`
-  * ModernBert-based
-  * `Alibaba-NLP/gte-reranker-modernbert-base`, etc.
-  *
-  *
-:::
-
-(supported-mm-models)=
-
-## List of Multimodal Language Models
-
-The following modalities are supported depending on the model:
-
-- **T**ext
-- **I**mage
-- **V**ideo
-- **A**udio
-
-Any combination of modalities joined by `+` are supported.
-
-- e.g.: `T + I` means that the model supports text-only, image-only, and text-with-image inputs.
-
-On the other hand, modalities separated by `/` are mutually exclusive.
-
-- e.g.: `T / I` means that the model supports text-only and image-only inputs, but not text-with-image inputs.
-
-See [this page](#multimodal-inputs) on how to pass multi-modal inputs to the model.
-
-:::{important}
-**To enable multiple multi-modal items per text prompt in vLLM V0**, you have to set `limit_mm_per_prompt` (offline inference)
-or `--limit-mm-per-prompt` (online serving). For example, to enable passing up to 4 images per text prompt:
-
-Offline inference:
-
-```python
-from vllm import LLM
-
-llm = LLM(
-    model="Qwen/Qwen2-VL-7B-Instruct",
-    limit_mm_per_prompt={"image": 4},
-)
-```
-
-Online serving:
-
-```bash
-vllm serve Qwen/Qwen2-VL-7B-Instruct --limit-mm-per-prompt '{"image":4}'
-```
-
-**This is no longer required if you are using vLLM V1.**
-
-:::
-
-:::{note}
-vLLM currently only supports adding LoRA to the language backbone of multimodal models.
-:::
-
-### Generative Models
-
-See [this page](#generative-models) for more information on how to use generative models.
-
-#### Text Generation
-
-Specified using `--task generate`.
-
-:::{list-table}
-:widths: 25 25 15 20 5 5 5
-:header-rows: 1
-
-- * Architecture
-  * Models
-  * Inputs
-  * Example HF Models
-  * [LoRA](#lora-adapter)
-  * [PP](#distributed-serving)
-  * [V1](gh-issue:8779)
-- * `AriaForConditionalGeneration`
-  * Aria
-  * T + I<sup>+</sup>
-  * `rhymes-ai/Aria`
-  *
-  * ✅︎
-  * ✅︎
-- * `AyaVisionForConditionalGeneration`
-  * Aya Vision
-  * T + I<sup>+</sup>
-  * `CohereForAI/aya-vision-8b`, `CohereForAI/aya-vision-32b`, etc.
-  *
-  * ✅︎
-  * ✅︎
-- * `Blip2ForConditionalGeneration`
-  * BLIP-2
-  * T + I<sup>E</sup>
-  * `Salesforce/blip2-opt-2.7b`, `Salesforce/blip2-opt-6.7b`, etc.
-  *
-  * ✅︎
-  * ✅︎
-- * `ChameleonForConditionalGeneration`
-  * Chameleon
-  * T + I
-  * `facebook/chameleon-7b` etc.
-  *
-  * ✅︎
-  * ✅︎
-- * `DeepseekVLV2ForCausalLM`<sup>^</sup>
-  * DeepSeek-VL2
-  * T + I<sup>+</sup>
-  * `deepseek-ai/deepseek-vl2-tiny`, `deepseek-ai/deepseek-vl2-small`, `deepseek-ai/deepseek-vl2` etc.
-  *
-  * ✅︎
-  * ✅︎
-- * `Florence2ForConditionalGeneration`
-  * Florence-2
-  * T + I
-  * `microsoft/Florence-2-base`, `microsoft/Florence-2-large` etc.
-  *
-  *
-  *
-- * `FuyuForCausalLM`
-  * Fuyu
-  * T + I
-  * `adept/fuyu-8b` etc.
-  *
-  * ✅︎
-  * ✅︎
-- * `Gemma3ForConditionalGeneration`
-  * Gemma 3
-  * T + I<sup>+</sup>
-  * `google/gemma-3-4b-it`, `google/gemma-3-27b-it`, etc.
-  * ✅︎
-  * ✅︎
-  * ⚠️
-- * `GLM4VForCausalLM`<sup>^</sup>
-  * GLM-4V
-  * T + I
-  * `THUDM/glm-4v-9b`, `THUDM/cogagent-9b-20241220` etc.
-  * ✅︎
-  * ✅︎
-  * ✅︎
-- * `GraniteSpeechForConditionalGeneration`
-  * Granite Speech
-  * T + A
-  * `ibm-granite/granite-speech-3.3-8b`
-  * ✅︎
-  * ✅︎
-  * ✅︎
-- * `H2OVLChatModel`
-  * H2OVL
-  * T + I<sup>E+</sup>
-  * `h2oai/h2ovl-mississippi-800m`, `h2oai/h2ovl-mississippi-2b`, etc.
-  *
-  * ✅︎
-  * ✅︎\*
-- * `Idefics3ForConditionalGeneration`
-  * Idefics3
-  * T + I
-  * `HuggingFaceM4/Idefics3-8B-Llama3` etc.
-  * ✅︎
-  *
-  * ✅︎
-- * `InternVLChatModel`
-  * InternVL 3.0, InternVideo 2.5, InternVL 2.5, Mono-InternVL, InternVL 2.0
-  * T + I<sup>E+</sup>
-  * `OpenGVLab/InternVL3-9B`, `OpenGVLab/InternVideo2_5_Chat_8B`, `OpenGVLab/InternVL2_5-4B`, `OpenGVLab/Mono-InternVL-2B`, `OpenGVLab/InternVL2-4B`, etc.
-  *
-  * ✅︎
-  * ✅︎
-- * `KimiVLForConditionalGeneration`
-  * Kimi-VL-A3B-Instruct, Kimi-VL-A3B-Thinking
-  * T + I<sup>+</sup>
-  * `moonshotai/Kimi-VL-A3B-Instruct`, `moonshotai/Kimi-VL-A3B-Thinking`
-  *
-  *
-  * ✅︎
-- * `Llama4ForConditionalGeneration`
-  * Llama 4
-  * T + I<sup>+</sup>
-  * `meta-llama/Llama-4-Scout-17B-16E-Instruct`, `meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8`, `meta-llama/Llama-4-Maverick-17B-128E-Instruct`, etc.
-  *
-  * ✅︎
-  * ✅︎
-- * `LlavaForConditionalGeneration`
-  * LLaVA-1.5
-  * T + I<sup>E+</sup>
-  * `llava-hf/llava-1.5-7b-hf`, `TIGER-Lab/Mantis-8B-siglip-llama3` (see note), etc.
-  *
-  * ✅︎
-  * ✅︎
-- * `LlavaNextForConditionalGeneration`
-  * LLaVA-NeXT
-  * T + I<sup>E+</sup>
-  * `llava-hf/llava-v1.6-mistral-7b-hf`, `llava-hf/llava-v1.6-vicuna-7b-hf`, etc.
-  *
-  * ✅︎
-  * ✅︎
-- * `LlavaNextVideoForConditionalGeneration`
-  * LLaVA-NeXT-Video
-  * T + V
-  * `llava-hf/LLaVA-NeXT-Video-7B-hf`, etc.
-  *
-  * ✅︎
-  * ✅︎
-- * `LlavaOnevisionForConditionalGeneration`
-  * LLaVA-Onevision
-  * T + I<sup>+</sup> + V<sup>+</sup>
-  * `llava-hf/llava-onevision-qwen2-7b-ov-hf`, `llava-hf/llava-onevision-qwen2-0.5b-ov-hf`, etc.
-  *
-  * ✅︎
-  * ✅︎
-- * `MiniCPMO`
-  * MiniCPM-O
-  * T + I<sup>E+</sup> + V<sup>E+</sup> + A<sup>E+</sup>
-  * `openbmb/MiniCPM-o-2_6`, etc.
-  * ✅︎
-  * ✅︎
-  * ✅︎
-- * `MiniCPMV`
-  * MiniCPM-V
-  * T + I<sup>E+</sup> + V<sup>E+</sup>
-  * `openbmb/MiniCPM-V-2` (see note), `openbmb/MiniCPM-Llama3-V-2_5`, `openbmb/MiniCPM-V-2_6`, etc.
-  * ✅︎
-  * ✅︎
-  * ✅︎
-- * `MiniMaxVL01ForConditionalGeneration`
-  * MiniMax-VL
-  * T + I<sup>E+</sup>
-  * `MiniMaxAI/MiniMax-VL-01`, etc.
-  *
-  * ✅︎
-  * ✅︎
-- * `Mistral3ForConditionalGeneration`
-  * Mistral3
-  * T + I<sup>+</sup>
-  * `mistralai/Mistral-Small-3.1-24B-Instruct-2503`, etc.
-  * ✅︎
-  * ✅︎
-  * ✅︎
-- * `MllamaForConditionalGeneration`
-  * Llama 3.2
-  * T + I<sup>+</sup>
-  * `meta-llama/Llama-3.2-90B-Vision-Instruct`, `meta-llama/Llama-3.2-11B-Vision`, etc.
-  *
-  *
-  *
-- * `MolmoForCausalLM`
-  * Molmo
-  * T + I<sup>+</sup>
-  * `allenai/Molmo-7B-D-0924`, `allenai/Molmo-7B-O-0924`, etc.
-  * ✅︎
-  * ✅︎
-  * ✅︎
-- * `NVLM_D_Model`
-  * NVLM-D 1.0
-  * T + I<sup>+</sup>
-  * `nvidia/NVLM-D-72B`, etc.
-  *
-  * ✅︎
-  * ✅︎
-- * `Ovis`
-  * Ovis2, Ovis1.6
-  * T + I<sup>+</sup>
-  * `AIDC-AI/Ovis2-1B`, `AIDC-AI/Ovis1.6-Llama3.2-3B`, etc.
-  *
-  *
-  * ✅︎
-- * `PaliGemmaForConditionalGeneration`
-  * PaliGemma, PaliGemma 2
-  * T + I<sup>E</sup>
-  * `google/paligemma-3b-pt-224`, `google/paligemma-3b-mix-224`, `google/paligemma2-3b-ft-docci-448`, etc.
-  *
-  * ✅︎
-  * ⚠️
-- * `Phi3VForCausalLM`
-  * Phi-3-Vision, Phi-3.5-Vision
-  * T + I<sup>E+</sup>
-  * `microsoft/Phi-3-vision-128k-instruct`, `microsoft/Phi-3.5-vision-instruct`, etc.
-  *
-  * ✅︎
-  * ✅︎
-- * `Phi4MMForCausalLM`
-  * Phi-4-multimodal
-  * T + I<sup>+</sup> / T + A<sup>+</sup> / I<sup>+</sup> + A<sup>+</sup>
-  * `microsoft/Phi-4-multimodal-instruct`, etc.
-  * ✅︎
-  *
-  * ✅︎
-- * `PixtralForConditionalGeneration`
-  * Pixtral
-  * T + I<sup>+</sup>
-  * `mistralai/Mistral-Small-3.1-24B-Instruct-2503`, `mistral-community/pixtral-12b`, etc.
-  *
-  * ✅︎
-  * ✅︎
-- * `QwenVLForConditionalGeneration`<sup>^</sup>
-  * Qwen-VL
-  * T + I<sup>E+</sup>
-  * `Qwen/Qwen-VL`, `Qwen/Qwen-VL-Chat`, etc.
-  * ✅︎
-  * ✅︎
-  * ✅︎
-- * `Qwen2AudioForConditionalGeneration`
-  * Qwen2-Audio
-  * T + A<sup>+</sup>
-  * `Qwen/Qwen2-Audio-7B-Instruct`
-  *
-  * ✅︎
-  * ✅︎
-- * `Qwen2VLForConditionalGeneration`
-  * QVQ, Qwen2-VL
-  * T + I<sup>E+</sup> + V<sup>E+</sup>
-  * `Qwen/QVQ-72B-Preview`, `Qwen/Qwen2-VL-7B-Instruct`, `Qwen/Qwen2-VL-72B-Instruct`, etc.
-  * ✅︎
-  * ✅︎
-  * ✅︎
-- * `Qwen2_5_VLForConditionalGeneration`
-  * Qwen2.5-VL
-  * T + I<sup>E+</sup> + V<sup>E+</sup>
-  * `Qwen/Qwen2.5-VL-3B-Instruct`, `Qwen/Qwen2.5-VL-72B-Instruct`, etc.
-  * ✅︎
-  * ✅︎
-  * ✅︎
-- * `Qwen2_5OmniThinkerForConditionalGeneration`
-  * Qwen2.5-Omni
-  * T + I<sup>E+</sup> + V<sup>E+</sup> + A<sup>+</sup>
-  * `Qwen/Qwen2.5-Omni-7B`
-  *
-  * ✅︎
-  * ✅︎\*
-- * `SkyworkR1VChatModel`
-  * Skywork-R1V-38B
-  * T + I
-  * `Skywork/Skywork-R1V-38B`
-  *
-  * ✅︎
-  * ✅︎
-- * `SmolVLMForConditionalGeneration`
-  * SmolVLM2
-  * T + I
-  * `SmolVLM2-2.2B-Instruct`
-  *
-  * ✅︎
-  * ✅︎
-- * `UltravoxModel`
-  * Ultravox
-  * T + A<sup>E+</sup>
-  * `fixie-ai/ultravox-v0_5-llama-3_2-1b`
-  * ✅︎
-  * ✅︎
-  * ✅︎
-:::
-
-<sup>^</sup> You need to set the architecture name via `--hf-overrides` to match the one in vLLM.  
-&nbsp;&nbsp;&nbsp;&nbsp;• For example, to use DeepSeek-VL2 series models:  
-&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;`--hf-overrides '{"architectures": ["DeepseekVLV2ForCausalLM"]}'`  
-<sup>E</sup> Pre-computed embeddings can be inputted for this modality.  
-<sup>+</sup> Multiple items can be inputted per text prompt for this modality.
-
-:::{warning}
-Both V0 and V1 support `Gemma3ForConditionalGeneration` for text-only inputs.
-However, there are differences in how they handle text + image inputs:
-
-V0 correctly implements the model's attention pattern:
-- Uses bidirectional attention between the image tokens corresponding to the same image
-- Uses causal attention for other tokens
-- Implemented via (naive) PyTorch SDPA with masking tensors
-- Note: May use significant memory for long prompts with image
-
-V1 currently uses a simplified attention pattern:
-- Uses causal attention for all tokens, including image tokens
-- Generates reasonable outputs but does not match the original model's attention for text + image inputs, especially when `{"do_pan_and_scan": true}`
-- Will be updated in the future to support the correct behavior
-
-This limitation exists because the model's mixed attention pattern (bidirectional for images, causal otherwise) is not yet supported by vLLM's attention backends.
-:::
-
-:::{note}
-`h2oai/h2ovl-mississippi-2b` will be available in V1 once we support head size 80.
-:::
-
-:::{note}
-To use `TIGER-Lab/Mantis-8B-siglip-llama3`, you have to pass `--hf_overrides '{"architectures": ["MantisForConditionalGeneration"]}'` when running vLLM.
-:::
-
-:::{warning}
-The output quality of `AllenAI/Molmo-7B-D-0924` (especially in object localization tasks) has deteriorated in recent updates.
-
-For the best results, we recommend using the following dependency versions (tested on A10 and L40):
-
-```text
-# Core vLLM-compatible dependencies with Molmo accuracy setup (tested on L40)
-torch==2.5.1
-torchvision==0.20.1
-transformers==4.48.1
-tokenizers==0.21.0
-tiktoken==0.7.0
-vllm==0.7.0
-
-# Optional but recommended for improved performance and stability
-triton==3.1.0
-xformers==0.0.28.post3
-uvloop==0.21.0
-protobuf==5.29.3
-openai==1.60.2
-opencv-python-headless==4.11.0.86
-pillow==10.4.0
-
-# Installed FlashAttention (for float16 only)
-flash-attn>=2.5.6  # Not used in float32, but should be documented
-```
-
-**Note:** Make sure you understand the security implications of using outdated packages.
-:::
-
-:::{note}
-The official `openbmb/MiniCPM-V-2` doesn't work yet, so we need to use a fork (`HwwwH/MiniCPM-V-2`) for now.
-For more details, please see: <gh-pr:4087#issuecomment-2250397630>
-:::
-
-:::{warning}
-Our PaliGemma implementations have the same problem as Gemma 3 (see above) for both V0 and V1.
-:::
-
-:::{note}
-To use Qwen2.5-Omni, you have to install Hugging Face Transformers library from source via
-`pip install git+https://github.com/huggingface/transformers.git`.
-
-Read audio from video pre-processing is currently supported on V0 (but not V1), because overlapping modalities is not yet supported in V1.
-`--mm-processor-kwargs '{"use_audio_in_video": true}'`.
-:::
-
-### Pooling Models
-
-See [this page](pooling-models) for more information on how to use pooling models.
-
-:::{important}
-Since some model architectures support both generative and pooling tasks,
-you should explicitly specify the task type to ensure that the model is used in pooling mode instead of generative mode.
-:::
-
-#### Text Embedding
-
-Specified using `--task embed`.
-
-Any text generation model can be converted into an embedding model by passing `--task embed`.
-
-:::{note}
-To get the best results, you should use pooling models that are specifically trained as such.
-:::
-
-The following table lists those that are tested in vLLM.
-
-:::{list-table}
-:widths: 25 25 15 25 5 5
-:header-rows: 1
-
-- * Architecture
-  * Models
-  * Inputs
-  * Example HF Models
-  * [LoRA](#lora-adapter)
-  * [PP](#distributed-serving)
-- * `LlavaNextForConditionalGeneration`
-  * LLaVA-NeXT-based
-  * T / I
-  * `royokong/e5-v`
-  *
-  * ✅︎
-- * `Phi3VForCausalLM`
-  * Phi-3-Vision-based
-  * T + I
-  * `TIGER-Lab/VLM2Vec-Full`
-  * 🚧
-  * ✅︎
-- * `Qwen2VLForConditionalGeneration`
-  * Qwen2-VL-based
-  * T + I
-  * `MrLight/dse-qwen2-2b-mrl-v1`
-  *
-  * ✅︎
-:::
-
-#### Transcription
-
-Specified using `--task transcription`.
-
-Speech2Text models trained specifically for Automatic Speech Recognition.
-
-:::{list-table}
-:widths: 25 25 25 5 5
-:header-rows: 1
-
-- * Architecture
-  * Models
-  * Example HF Models
-  * [LoRA](#lora-adapter)
-  * [PP](#distributed-serving)
-- * `Whisper`
-  * Whisper-based
-  * `openai/whisper-large-v3-turbo`
-  * 🚧
-  * 🚧
-:::
-
-_________________
-
-## Model Support Policy
-
-At vLLM, we are committed to facilitating the integration and support of third-party models within our ecosystem. Our approach is designed to balance the need for robustness and the practical limitations of supporting a wide range of models. Here’s how we manage third-party model support:
-
-1. **Community-Driven Support**: We encourage community contributions for adding new models. When a user requests support for a new model, we welcome pull requests (PRs) from the community. These contributions are evaluated primarily on the sensibility of the output they generate, rather than strict consistency with existing implementations such as those in transformers. **Call for contribution:** PRs coming directly from model vendors are greatly appreciated!
-
-2. **Best-Effort Consistency**: While we aim to maintain a level of consistency between the models implemented in vLLM and other frameworks like transformers, complete alignment is not always feasible. Factors like acceleration techniques and the use of low-precision computations can introduce discrepancies. Our commitment is to ensure that the implemented models are functional and produce sensible results.
-
-    :::{tip}
-    When comparing the output of `model.generate` from Hugging Face Transformers with the output of `llm.generate` from vLLM, note that the former reads the model's generation config file (i.e., [generation_config.json](https://github.com/huggingface/transformers/blob/19dabe96362803fb0a9ae7073d03533966598b17/src/transformers/generation/utils.py#L1945)) and applies the default parameters for generation, while the latter only uses the parameters passed to the function. Ensure all sampling parameters are identical when comparing outputs.
-    :::
-
-3. **Issue Resolution and Model Updates**: Users are encouraged to report any bugs or issues they encounter with third-party models. Proposed fixes should be submitted via PRs, with a clear explanation of the problem and the rationale behind the proposed solution. If a fix for one model impacts another, we rely on the community to highlight and address these cross-model dependencies. Note: for bugfix PRs, it is good etiquette to inform the original author to seek their feedback.
-
-4. **Monitoring and Updates**: Users interested in specific models should monitor the commit history for those models (e.g., by tracking changes in the main/vllm/model_executor/models directory). This proactive approach helps users stay informed about updates and changes that may affect the models they use.
-
-5. **Selective Focus**: Our resources are primarily directed towards models with significant user interest and impact. Models that are less frequently used may receive less attention, and we rely on the community to play a more active role in their upkeep and improvement.
-
-Through this approach, vLLM fosters a collaborative environment where both the core development team and the broader community contribute to the robustness and diversity of the third-party models supported in our ecosystem.
-
-Note that, as an inference engine, vLLM does not introduce new models. Therefore, all models supported by vLLM are third-party models in this regard.
-
-We have the following levels of testing for models:
-
-1. **Strict Consistency**: We compare the output of the model with the output of the model in the HuggingFace Transformers library under greedy decoding. This is the most stringent test. Please refer to [models tests](https://github.com/vllm-project/vllm/blob/main/tests/models) for the models that have passed this test.
-2. **Output Sensibility**: We check if the output of the model is sensible and coherent, by measuring the perplexity of the output and checking for any obvious errors. This is a less stringent test.
-3. **Runtime Functionality**: We check if the model can be loaded and run without errors. This is the least stringent test. Please refer to [functionality tests](gh-dir:tests) and [examples](gh-dir:examples) for the models that have passed this test.
-4. **Community Feedback**: We rely on the community to provide feedback on the models. If a model is broken or not working as expected, we encourage users to raise issues to report it or open pull requests to fix it. The rest of the models fall under this category.
diff --git a/docs/source/serving/engine_args.md b/docs/source/serving/engine_args.md
deleted file mode 100644
index 9325a2406e8ca..0000000000000
--- a/docs/source/serving/engine_args.md
+++ /dev/null
@@ -1,36 +0,0 @@
-(engine-args)=
-
-# Engine Arguments
-
-Engine arguments control the behavior of the vLLM engine.
-
-- For [offline inference](#offline-inference), they are part of the arguments to `LLM` class.
-- For [online serving](#openai-compatible-server), they are part of the arguments to `vllm serve`.
-
-For references to all arguments available from `vllm serve` see the [serve args](#serve-args) documentation.
-
-Below, you can find an explanation of every engine argument:
-
-<!--- pyml disable-num-lines 7 no-space-in-emphasis -->
-```{eval-rst}
-.. argparse::
-    :module: vllm.engine.arg_utils
-    :func: _engine_args_parser
-    :prog: vllm serve
-    :nodefaultconst:
-    :markdownhelp:
-```
-
-## Async Engine Arguments
-
-Additional arguments are available to the asynchronous engine which is used for online serving:
-
-<!--- pyml disable-num-lines 7 no-space-in-emphasis -->
-```{eval-rst}
-.. argparse::
-    :module: vllm.engine.arg_utils
-    :func: _async_engine_args_parser
-    :prog: vllm serve
-    :nodefaultconst:
-    :markdownhelp:
-```
diff --git a/docs/source/serving/env_vars.md b/docs/source/serving/env_vars.md
deleted file mode 100644
index 9845241930a40..0000000000000
--- a/docs/source/serving/env_vars.md
+++ /dev/null
@@ -1,15 +0,0 @@
-# Environment Variables
-
-vLLM uses the following environment variables to configure the system:
-
-:::{warning}
-Please note that `VLLM_PORT` and `VLLM_HOST_IP` set the port and ip for vLLM's **internal usage**. It is not the port and ip for the API server. If you use `--host $VLLM_HOST_IP` and `--port $VLLM_PORT` to start the API server, it will not work.
-
-All environment variables used by vLLM are prefixed with `VLLM_`. **Special care should be taken for Kubernetes users**: please do not name the service as `vllm`, otherwise environment variables set by Kubernetes might conflict with vLLM's environment variables, because [Kubernetes sets environment variables for each service with the capitalized service name as the prefix](https://kubernetes.io/docs/concepts/services-networking/service/#environment-variables).
-:::
-
-:::{literalinclude} ../../../vllm/envs.py
-:end-before: end-env-vars-definition
-:language: python
-:start-after: begin-env-vars-definition
-:::
diff --git a/docs/source/serving/integrations/index.md b/docs/source/serving/integrations/index.md
deleted file mode 100644
index e2b4c0814605b..0000000000000
--- a/docs/source/serving/integrations/index.md
+++ /dev/null
@@ -1,8 +0,0 @@
-# External Integrations
-
-:::{toctree}
-:maxdepth: 1
-
-langchain
-llamaindex
-:::
diff --git a/docs/source/serving/serve_args.md b/docs/source/serving/serve_args.md
deleted file mode 100644
index edb49f4ba6de4..0000000000000
--- a/docs/source/serving/serve_args.md
+++ /dev/null
@@ -1,47 +0,0 @@
-(serve-args)=
-
-# Server Arguments
-
-The `vllm serve` command is used to launch the OpenAI-compatible server.
-
-## CLI Arguments
-
-The following are all arguments available from the `vllm serve` command:
-
-<!--- pyml disable-num-lines 7 no-space-in-emphasis -->
-```{eval-rst}
-.. argparse::
-    :module: vllm.entrypoints.openai.cli_args
-    :func: create_parser_for_docs
-    :prog: vllm serve
-    :nodefaultconst:
-    :markdownhelp:
-```
-
-## Configuration file
-
-You can load CLI arguments via a [YAML](https://yaml.org/) config file.
-The argument names must be the long form of those outlined [above](#serve-args).
-
-For example:
-
-```yaml
-# config.yaml
-
-model: meta-llama/Llama-3.1-8B-Instruct
-host: "127.0.0.1"
-port: 6379
-uvicorn-log-level: "info"
-```
-
-To use the above config file:
-
-```bash
-vllm serve --config config.yaml
-```
-
-:::{note}
-In case an argument is supplied simultaneously using command line and the config file, the value from the command line will take precedence.
-The order of priorities is `command line > config file values > defaults`.
-e.g. `vllm serve SOME_MODEL --config config.yaml`, SOME_MODEL takes precedence over `model` in config file.
-:::
diff --git a/docs/source/training/rlhf.md b/docs/training/rlhf.md
similarity index 100%
rename from docs/source/training/rlhf.md
rename to docs/training/rlhf.md
diff --git a/docs/source/training/trl.md b/docs/training/trl.md
similarity index 66%
rename from docs/source/training/trl.md
rename to docs/training/trl.md
index ebdf593dbde52..c7c1a5a3bbd1e 100644
--- a/docs/source/training/trl.md
+++ b/docs/training/trl.md
@@ -6,8 +6,7 @@ Online methods such as GRPO or Online DPO require the model to generate completi
 
 See the guide [vLLM for fast generation in online methods](https://huggingface.co/docs/trl/main/en/speeding_up_training#vllm-for-fast-generation-in-online-methods) in the TRL documentation for more information.
 
-:::{seealso}
-For more information on the `use_vllm` flag you can provide to the configs of these online methods, see:
-- [`trl.GRPOConfig.use_vllm`](https://huggingface.co/docs/trl/main/en/grpo_trainer#trl.GRPOConfig.use_vllm)
-- [`trl.OnlineDPOConfig.use_vllm`](https://huggingface.co/docs/trl/main/en/online_dpo_trainer#trl.OnlineDPOConfig.use_vllm)
-:::
+!!! info
+    For more information on the `use_vllm` flag you can provide to the configs of these online methods, see:
+    - [`trl.GRPOConfig.use_vllm`](https://huggingface.co/docs/trl/main/en/grpo_trainer#trl.GRPOConfig.use_vllm)
+    - [`trl.OnlineDPOConfig.use_vllm`](https://huggingface.co/docs/trl/main/en/online_dpo_trainer#trl.OnlineDPOConfig.use_vllm)
diff --git a/mkdocs.yaml b/mkdocs.yaml
new file mode 100644
index 0000000000000..a1c6319bb0080
--- /dev/null
+++ b/mkdocs.yaml
@@ -0,0 +1,117 @@
+site_name: vLLM
+site_url: https://docs.vllm.ai
+repo_url: https://github.com/vllm-project/vllm
+exclude_docs: |
+  *.inc.md
+  *.template.md
+theme:
+  name: material
+  logo: assets/logos/vllm-logo-only-light.ico
+  favicon: assets/logos/vllm-logo-only-light.ico
+  palette:
+    # Palette toggle for automatic mode
+    - media: "(prefers-color-scheme)"
+      toggle:
+        icon: material/brightness-auto
+        name: Switch to light mode
+    # Palette toggle for light mode
+    - media: "(prefers-color-scheme: light)"
+      scheme: default 
+      primary: white
+      toggle:
+        icon: material/brightness-7
+        name: Switch to dark mode
+    # Palette toggle for dark mode
+    - media: "(prefers-color-scheme: dark)"
+      scheme: slate
+      primary: black
+      toggle:
+        icon: material/brightness-2
+        name: Switch to system preference
+  features:
+    - content.code.copy
+    - content.tabs.link
+    - navigation.tracking
+    - navigation.tabs
+    - navigation.sections
+    - navigation.prune
+    - navigation.top
+    - search.highlight
+    - search.share
+    - toc.follow
+  custom_dir: docs/mkdocs/overrides
+
+hooks:
+  - docs/mkdocs/hooks/remove_announcement.py
+  - docs/mkdocs/hooks/generate_examples.py
+  - docs/mkdocs/hooks/url_schemes.py
+
+# Required to stop api-autonav from raising an error
+# https://github.com/tlambert03/mkdocs-api-autonav/issues/16
+nav:
+  - api
+
+plugins:
+  - meta
+  - search
+  - autorefs
+  - awesome-nav
+  # For API reference generation
+  - api-autonav:
+      modules: ["vllm"]
+      api_root_uri: "api"
+  - mkdocstrings:
+      handlers:
+        python:
+          options:
+            show_symbol_type_heading: true
+            show_symbol_type_toc: true
+            summary:
+              modules: true
+            show_if_no_docstring: true
+            show_signature_annotations: true
+            separate_signature: true
+            show_overloads: true
+            signature_crossrefs: true
+          inventories:
+          - https://docs.python.org/3/objects.inv
+          - https://typing-extensions.readthedocs.io/en/latest/objects.inv
+          - https://docs.aiohttp.org/en/stable/objects.inv
+          - https://pillow.readthedocs.io/en/stable/objects.inv
+          - https://numpy.org/doc/stable/objects.inv
+          - https://pytorch.org/docs/stable/objects.inv
+          - https://psutil.readthedocs.io/en/stable/objects.inv
+
+markdown_extensions:
+  - attr_list
+  - md_in_html
+  - admonition
+  - pymdownx.details
+  # For content tabs
+  - pymdownx.superfences
+  - pymdownx.tabbed:
+      slugify: !!python/object/apply:pymdownx.slugs.slugify
+        kwds:
+          case: lower
+      alternate_style: true
+  # For code highlighting
+  - pymdownx.highlight:
+      anchor_linenums: true
+      line_spans: __span
+      pygments_lang_class: true
+  - pymdownx.inlinehilite
+  - pymdownx.snippets
+  # For emoji and icons
+  - pymdownx.emoji:
+      emoji_index: !!python/name:material.extensions.emoji.twemoji
+      emoji_generator: !!python/name:material.extensions.emoji.to_svg
+  # For in page [TOC] (not sidebar)
+  - toc:
+      permalink: true
+  # For math rendering
+  - mdx_math:
+      enable_dollar_delimiter: true
+
+extra_javascript:
+  - mkdocs/javascript/run_llm_widget.js
+  - https://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS_HTML
diff --git a/pyproject.toml b/pyproject.toml
index 3011cffb8f1e2..29186d5ff0278 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -165,9 +165,11 @@ markers = [
 
 [tool.pymarkdown]
 plugins.md004.style = "sublist" # ul-style
+plugins.md007.indent = 4 # ul-indent
 plugins.md013.enabled = false # line-length
 plugins.md041.enabled = false # first-line-h1
 plugins.md033.enabled = false # inline-html
+plugins.md046.enabled = false # code-block-style
 plugins.md024.allow_different_nesting = true # no-duplicate-headers
 
 [tool.ty]
diff --git a/requirements/docs.txt b/requirements/docs.txt
index 9c267edaceaf1..a1f51334ed81a 100644
--- a/requirements/docs.txt
+++ b/requirements/docs.txt
@@ -1,19 +1,8 @@
-sphinx==7.4.7
-sphinx-argparse==0.5.2
-sphinx-book-theme==1.1.4
-sphinx-copybutton==0.5.2
-sphinx-design==0.6.1
-sphinx-togglebutton==0.3.2
-myst-parser==3.0.1  # `myst-parser==4.0.1` breaks inline code in titles
-msgspec
-snowballstemmer<3  # https://github.com/snowballstem/snowball/issues/229
-commonmark # Required by sphinx-argparse when using :markdownhelp:
-
-# Custom autodoc2 is necessary for faster docstring processing
-# see: https://github.com/sphinx-extensions2/sphinx-autodoc2/issues/33#issuecomment-2856386035
-git+https://github.com/hmellor/sphinx-autodoc2.git # sphinx-autodoc2==0.5.0
-
-# packages to install to build the documentation
-cachetools
--f https://download.pytorch.org/whl/cpu
-torch
\ No newline at end of file
+mkdocs
+mkdocs-api-autonav
+mkdocs-material
+mkdocstrings-python
+mkdocs-gen-files
+mkdocs-awesome-nav
+python-markdown-math
+ruff
diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py
index 2a27afe9757e1..c48d8a3869699 100644
--- a/vllm/engine/llm_engine.py
+++ b/vllm/engine/llm_engine.py
@@ -1263,12 +1263,10 @@ class LLMEngine:
     def step(self) -> List[Union[RequestOutput, PoolingRequestOutput]]:
         """Performs one decoding iteration and returns newly generated results.
 
-        :::{figure} https://i.imgur.com/sv2HssD.png
-        :alt: Overview of the step function
-        :align: center
-
-        Overview of the step function.
-        :::
+        <figure markdown="span">
+        ![Overview of the step function](https://i.imgur.com/sv2HssD.png)
+        <figcaption>Overview of the step function</figcaption>
+        </figure>
 
         Details:
         - Step 1: Schedules the sequences to be executed in the next
diff --git a/vllm/engine/metrics.py b/vllm/engine/metrics.py
index 033551d07c39f..34b48f83b6436 100644
--- a/vllm/engine/metrics.py
+++ b/vllm/engine/metrics.py
@@ -29,7 +29,7 @@ prometheus_client.disable_created_metrics()
 # to extract the metrics definitions.
 
 
-# begin-metrics-definitions
+# --8<-- [start:metrics-definitions]
 class Metrics:
     """
     vLLM uses a multiprocessing-based frontend for the OpenAI server.
@@ -293,7 +293,7 @@ class Metrics:
             labelnames=labelnames))
 
 
-# end-metrics-definitions
+# --8<-- [end:metrics-definitions]
 
     def _unregister_vllm_metrics(self) -> None:
         for collector in list(prometheus_client.REGISTRY._collector_to_names):
diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py
index 52b50229b8d16..0465302c5a1c8 100644
--- a/vllm/entrypoints/llm.py
+++ b/vllm/entrypoints/llm.py
@@ -131,10 +131,9 @@ class LLM:
         **kwargs: Arguments for {class}`~vllm.EngineArgs`. (See
             {ref}`engine-args`)
 
-    :::{note}
-    This class is intended to be used for offline inference. For online
-    serving, use the {class}`~vllm.AsyncLLMEngine` class instead.
-    :::
+    Note:
+        This class is intended to be used for offline inference. For online
+        serving, use the {class}`~vllm.AsyncLLMEngine` class instead.
     """
 
     DEPRECATE_LEGACY: ClassVar[bool] = True
@@ -422,11 +421,10 @@ class LLM:
             A list of `RequestOutput` objects containing the
             generated completions in the same order as the input prompts.
 
-        :::{note}
-        Using `prompts` and `prompt_token_ids` as keyword parameters is
-        considered legacy and may be deprecated in the future. You should
-        instead pass them via the `inputs` parameter.
-        :::
+        Note:
+            Using `prompts` and `prompt_token_ids` as keyword parameters is
+            considered legacy and may be deprecated in the future. You should
+            instead pass them via the `inputs` parameter.
         """
         runner_type = self.llm_engine.model_config.runner_type
         if runner_type not in ["generate", "transcription"]:
@@ -502,10 +500,9 @@ class LLM:
         Returns:
             A list containing the results from each worker.
 
-        :::{note}
-        It is recommended to use this API to only pass control messages,
-        and set up data-plane communication to pass data.
-        :::
+        Note:
+            It is recommended to use this API to only pass control messages,
+            and set up data-plane communication to pass data.
         """
 
         return self.llm_engine.collective_rpc(method, timeout, args, kwargs)
@@ -924,11 +921,10 @@ class LLM:
             A list of `PoolingRequestOutput` objects containing the
             pooled hidden states in the same order as the input prompts.
 
-        :::{note}
-        Using `prompts` and `prompt_token_ids` as keyword parameters is
-        considered legacy and may be deprecated in the future. You should
-        instead pass them via the `inputs` parameter.
-        :::
+        Note:
+            Using `prompts` and `prompt_token_ids` as keyword parameters is
+            considered legacy and may be deprecated in the future. You should
+            instead pass them via the `inputs` parameter.
         """
         runner_type = self.llm_engine.model_config.runner_type
         if runner_type != "pooling":
diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py
index 5ab2356a0898a..da01eb472c441 100644
--- a/vllm/entrypoints/openai/protocol.py
+++ b/vllm/entrypoints/openai/protocol.py
@@ -251,7 +251,7 @@ class ChatCompletionRequest(OpenAIBaseModel):
     parallel_tool_calls: Optional[bool] = False
     user: Optional[str] = None
 
-    # doc: begin-chat-completion-sampling-params
+    # --8<-- [start:chat-completion-sampling-params]
     best_of: Optional[int] = None
     use_beam_search: bool = False
     top_k: Optional[int] = None
@@ -266,9 +266,9 @@ class ChatCompletionRequest(OpenAIBaseModel):
     spaces_between_special_tokens: bool = True
     truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None
     prompt_logprobs: Optional[int] = None
-    # doc: end-chat-completion-sampling-params
+    # --8<-- [end:chat-completion-sampling-params]
 
-    # doc: begin-chat-completion-extra-params
+    # --8<-- [start:chat-completion-extra-params]
     echo: bool = Field(
         default=False,
         description=(
@@ -407,7 +407,7 @@ class ChatCompletionRequest(OpenAIBaseModel):
         default=None,
         description="KVTransfer parameters used for disaggregated serving.")
 
-    # doc: end-chat-completion-extra-params
+    # --8<-- [end:chat-completion-extra-params]
 
     # Default sampling parameters for chat completion requests
     _DEFAULT_SAMPLING_PARAMS: dict = {
@@ -764,7 +764,7 @@ class CompletionRequest(OpenAIBaseModel):
     top_p: Optional[float] = None
     user: Optional[str] = None
 
-    # doc: begin-completion-sampling-params
+    # --8<-- [start:completion-sampling-params]
     use_beam_search: bool = False
     top_k: Optional[int] = None
     min_p: Optional[float] = None
@@ -779,9 +779,9 @@ class CompletionRequest(OpenAIBaseModel):
     truncate_prompt_tokens: Optional[Annotated[int, Field(ge=1)]] = None
     allowed_token_ids: Optional[list[int]] = None
     prompt_logprobs: Optional[int] = None
-    # doc: end-completion-sampling-params
+    # --8<-- [end:completion-sampling-params]
 
-    # doc: begin-completion-extra-params
+    # --8<-- [start:completion-extra-params]
     add_special_tokens: bool = Field(
         default=True,
         description=(
@@ -858,7 +858,7 @@ class CompletionRequest(OpenAIBaseModel):
         default=None,
         description="KVTransfer parameters used for disaggregated serving.")
 
-    # doc: end-completion-extra-params
+    # --8<-- [end:completion-extra-params]
 
     # Default sampling parameters for completion requests
     _DEFAULT_SAMPLING_PARAMS: dict = {
@@ -1045,11 +1045,11 @@ class EmbeddingCompletionRequest(OpenAIBaseModel):
     user: Optional[str] = None
     truncate_prompt_tokens: Optional[Annotated[int, Field(ge=-1)]] = None
 
-    # doc: begin-embedding-pooling-params
+    # --8<-- [start:embedding-pooling-params]
     additional_data: Optional[Any] = None
-    # doc: end-embedding-pooling-params
+    # --8<-- [end:embedding-pooling-params]
 
-    # doc: begin-embedding-extra-params
+    # --8<-- [start:embedding-extra-params]
     add_special_tokens: bool = Field(
         default=True,
         description=(
@@ -1064,7 +1064,7 @@ class EmbeddingCompletionRequest(OpenAIBaseModel):
             "if the served model does not use priority scheduling."),
     )
 
-    # doc: end-embedding-extra-params
+    # --8<-- [end:embedding-extra-params]
 
     def to_pooling_params(self):
         return PoolingParams(dimensions=self.dimensions,
@@ -1080,11 +1080,11 @@ class EmbeddingChatRequest(OpenAIBaseModel):
     user: Optional[str] = None
     truncate_prompt_tokens: Optional[Annotated[int, Field(ge=-1)]] = None
 
-    # doc: begin-chat-embedding-pooling-params
+    # --8<-- [start:chat-embedding-pooling-params]
     additional_data: Optional[Any] = None
-    # doc: end-chat-embedding-pooling-params
+    # --8<-- [end:chat-embedding-pooling-params]
 
-    # doc: begin-chat-embedding-extra-params
+    # --8<-- [start:chat-embedding-extra-params]
     add_special_tokens: bool = Field(
         default=False,
         description=(
@@ -1118,7 +1118,7 @@ class EmbeddingChatRequest(OpenAIBaseModel):
             "default: 0). Any priority other than 0 will raise an error "
             "if the served model does not use priority scheduling."),
     )
-    # doc: end-chat-embedding-extra-params
+    # --8<-- [end:chat-embedding-extra-params]
 
     @model_validator(mode="before")
     @classmethod
@@ -1147,11 +1147,11 @@ class ScoreRequest(OpenAIBaseModel):
     text_2: Union[list[str], str]
     truncate_prompt_tokens: Optional[Annotated[int, Field(ge=-1)]] = None
 
-    # doc: begin-score-pooling-params
+    # --8<-- [start:score-pooling-params]
     additional_data: Optional[Any] = None
-    # doc: end-score-pooling-params
+    # --8<-- [end:score-pooling-params]
 
-    # doc: begin-score-extra-params
+    # --8<-- [start:score-extra-params]
     priority: int = Field(
         default=0,
         description=(
@@ -1160,7 +1160,7 @@ class ScoreRequest(OpenAIBaseModel):
             "if the served model does not use priority scheduling."),
     )
 
-    # doc: end-score-extra-params
+    # --8<-- [end:score-extra-params]
 
     def to_pooling_params(self):
         return PoolingParams(additional_data=self.additional_data)
@@ -1173,11 +1173,11 @@ class RerankRequest(OpenAIBaseModel):
     top_n: int = Field(default_factory=lambda: 0)
     truncate_prompt_tokens: Optional[Annotated[int, Field(ge=-1)]] = None
 
-    # doc: begin-rerank-pooling-params
+    # --8<-- [start:rerank-pooling-params]
     additional_data: Optional[Any] = None
-    # doc: end-rerank-pooling-params
+    # --8<-- [end:rerank-pooling-params]
 
-    # doc: begin-rerank-extra-params
+    # --8<-- [start:rerank-extra-params]
     priority: int = Field(
         default=0,
         description=(
@@ -1186,7 +1186,7 @@ class RerankRequest(OpenAIBaseModel):
             "if the served model does not use priority scheduling."),
     )
 
-    # doc: end-rerank-extra-params
+    # --8<-- [end:rerank-extra-params]
 
     def to_pooling_params(self):
         return PoolingParams(additional_data=self.additional_data)
@@ -1321,11 +1321,11 @@ class ClassificationRequest(OpenAIBaseModel):
     truncate_prompt_tokens: Optional[int] = None
     user: Optional[str] = None
 
-    # doc: begin-classification-pooling-params
+    # --8<-- [start:classification-pooling-params]
     additional_data: Optional[Any] = None
-    # doc: end-classification-pooling-params
+    # --8<-- [end:classification-pooling-params]
 
-    # doc: begin-classification-extra-params
+    # --8<-- [start:classification-extra-params]
     priority: int = Field(
         default=0,
         description=(
@@ -1334,7 +1334,7 @@ class ClassificationRequest(OpenAIBaseModel):
             "if the served model does not use priority scheduling."),
     )
 
-    # doc: end-classification-extra-params
+    # --8<-- [end:classification-extra-params]
 
     def to_pooling_params(self):
         return PoolingParams(additional_data=self.additional_data)
@@ -1698,7 +1698,7 @@ class TranscriptionRequest(OpenAIBaseModel):
     timestamps incurs additional latency.
     """
 
-    # doc: begin-transcription-extra-params
+    # --8<-- [start:transcription-extra-params]
     stream: Optional[bool] = False
     """Custom field not present in the original OpenAI definition. When set,
     it will enable output to be streamed in a similar fashion as the Chat
@@ -1707,9 +1707,9 @@ class TranscriptionRequest(OpenAIBaseModel):
     # Flattened stream option to simplify form data.
     stream_include_usage: Optional[bool] = False
     stream_continuous_usage_stats: Optional[bool] = False
-    # doc: end-transcription-extra-params
+    # --8<-- [end:transcription-extra-params]
 
-    # doc: begin-transcription-sampling-params
+    # --8<-- [start:transcription-sampling-params]
     temperature: float = Field(default=0.0)
     """The sampling temperature, between 0 and 1.
 
@@ -1743,7 +1743,7 @@ class TranscriptionRequest(OpenAIBaseModel):
 
     presence_penalty: Optional[float] = 0.0
     """The presence penalty to use for sampling."""
-    # doc: end-transcription-sampling-params
+    # --8<-- [end:transcription-sampling-params]
 
     # Default sampling parameters for transcription requests.
     _DEFAULT_SAMPLING_PARAMS: dict = {
diff --git a/vllm/envs.py b/vllm/envs.py
index dc23c8ea5314d..2d330b8fbee80 100644
--- a/vllm/envs.py
+++ b/vllm/envs.py
@@ -175,7 +175,7 @@ def get_vllm_port() -> Optional[int]:
 # The begin-* and end* here are used by the documentation generator
 # to extract the used env vars.
 
-# begin-env-vars-definition
+# --8<-- [start:env-vars-definition]
 
 environment_variables: dict[str, Callable[[], Any]] = {
 
@@ -813,7 +813,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
     lambda: os.getenv("VLLM_ALL2ALL_BACKEND", "naive"),
 }
 
-# end-env-vars-definition
+# --8<-- [end:env-vars-definition]
 
 
 def __getattr__(name: str):
diff --git a/vllm/executor/ray_distributed_executor.py b/vllm/executor/ray_distributed_executor.py
index 9b0b98731e033..8e67c7a41bb19 100644
--- a/vllm/executor/ray_distributed_executor.py
+++ b/vllm/executor/ray_distributed_executor.py
@@ -528,12 +528,12 @@ class RayDistributedExecutor(DistributedExecutorBase):
         ray.get(parallel_worker_tasks)
 
     def _check_ray_cgraph_installation(self):
-        import pkg_resources
+        import importlib.metadata
+
         from packaging import version
 
         required_version = version.parse("2.43.0")
-        current_version = version.parse(
-            pkg_resources.get_distribution("ray").version)
+        current_version = version.parse(importlib.metadata.version("ray"))
         if current_version < required_version:
             raise ValueError(f"Ray version {required_version} is "
                              f"required, but found {current_version}")
diff --git a/vllm/model_executor/models/blip2.py b/vllm/model_executor/models/blip2.py
index 2ff7e394a4163..db0dd2051d527 100644
--- a/vllm/model_executor/models/blip2.py
+++ b/vllm/model_executor/models/blip2.py
@@ -681,9 +681,8 @@ class Blip2ForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP,
                 batch.
             pixel_values: The pixels in each input image.
         
-        :::{seealso}
-        {class}`Blip2ImageInputs`
-        :::
+        Info:
+            [Blip2ImageInputs][]
         """
 
         if intermediate_tensors is not None:
diff --git a/vllm/model_executor/models/llava.py b/vllm/model_executor/models/llava.py
index 95c1a0ca0b981..ced71b6dcdebe 100644
--- a/vllm/model_executor/models/llava.py
+++ b/vllm/model_executor/models/llava.py
@@ -721,9 +721,8 @@ class LlavaForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsPP):
                 batch.
             pixel_values: The pixels in each input image.
 
-        :::{seealso}
-        {class}`LlavaImageInputs`
-        :::
+        Info:
+            [LlavaImageInputs][]
         """
         if intermediate_tensors is not None:
             inputs_embeds = None
diff --git a/vllm/model_executor/models/llava_next.py b/vllm/model_executor/models/llava_next.py
index 581a32325d4c7..10261aa423c01 100644
--- a/vllm/model_executor/models/llava_next.py
+++ b/vllm/model_executor/models/llava_next.py
@@ -551,9 +551,8 @@ class LlavaNextForConditionalGeneration(nn.Module, SupportsMultiModal,
             pixel_values: The pixels in each grid patch for each input image.
             image_sizes: The original `(height, width)` for each input image.
 
-        :::{seealso}
-        {class}`LlavaNextImageInputs`
-        :::
+        Info:
+            [LlavaNextImageInputs][]
         """
         if intermediate_tensors is not None:
             inputs_embeds = None
diff --git a/vllm/model_executor/models/mistral3.py b/vllm/model_executor/models/mistral3.py
index 2b9cbf10440ab..051a73120838e 100644
--- a/vllm/model_executor/models/mistral3.py
+++ b/vllm/model_executor/models/mistral3.py
@@ -559,9 +559,8 @@ class Mistral3ForConditionalGeneration(nn.Module, SupportsLoRA,
                 batch.
             pixel_values: The pixels in each input image.
 
-        :::{seealso}
-        {class}`Mistral3ImagePixelInputs`
-        :::
+        Info:
+            [Mistral3ImagePixelInputs][]
         """
         if intermediate_tensors is not None:
             inputs_embeds = None
diff --git a/vllm/multimodal/__init__.py b/vllm/multimodal/__init__.py
index 756ea11311daf..70568a195fd83 100644
--- a/vllm/multimodal/__init__.py
+++ b/vllm/multimodal/__init__.py
@@ -11,9 +11,8 @@ MULTIMODAL_REGISTRY = MultiModalRegistry()
 The global {class}`~MultiModalRegistry` is used by model runners to
 dispatch data processing according to the target model.
 
-:::{seealso}
-{ref}`mm-processing`
-:::
+Info:
+    {ref}`mm-processing`
 """
 
 __all__ = [
diff --git a/vllm/multimodal/inputs.py b/vllm/multimodal/inputs.py
index 2335af843ed5e..71ef1a98e0d06 100644
--- a/vllm/multimodal/inputs.py
+++ b/vllm/multimodal/inputs.py
@@ -289,9 +289,8 @@ class BaseMultiModalField(ABC):
 @dataclass(frozen=True)
 class MultiModalBatchedField(BaseMultiModalField):
     """
-    :::{seealso}
-    {func}`MultiModalFieldConfig.batched`
-    :::
+    Info:
+        [MultiModalFieldConfig.batched][]
     """
 
     def build_elems(
@@ -320,10 +319,9 @@ class MultiModalBatchedField(BaseMultiModalField):
 @dataclass(frozen=True)
 class MultiModalFlatField(BaseMultiModalField):
     """
-    :::{seealso}
-    {func}`MultiModalFieldConfig.flat`
-    {func}`MultiModalFieldConfig.flat_from_sizes`
-    :::
+    Info:
+        [MultiModalFieldConfig.flat][]
+        [MultiModalFieldConfig.flat_from_sizes][]
     """
     slices: Union[Sequence[slice], Sequence[Sequence[slice]]]
     dim: int = 0
@@ -363,9 +361,8 @@ class MultiModalFlatField(BaseMultiModalField):
 @dataclass(frozen=True)
 class MultiModalSharedField(BaseMultiModalField):
     """
-    :::{seealso}
-    {func}`MultiModalFieldConfig.shared`
-    :::
+    Info:
+        [MultiModalFieldConfig.shared][]
     """
     batch_size: int
 
@@ -510,9 +507,8 @@ class MultiModalFieldConfig:
             Element 3: [[C],[C]]
         ```
 
-        :::{seealso}
-        {func}`MultiModalFieldConfig.flat`
-        :::
+        Info:
+            [MultiModalFieldConfig.flat][]
         """
 
         if size_per_item.ndim != 1:
diff --git a/vllm/multimodal/registry.py b/vllm/multimodal/registry.py
index 67d0d7fc11834..8a27d866e88e3 100644
--- a/vllm/multimodal/registry.py
+++ b/vllm/multimodal/registry.py
@@ -214,9 +214,8 @@ class MultiModalRegistry:
         When the model receives multi-modal data, the provided function is
         invoked to transform the data into a dictionary of model inputs.
 
-        :::{seealso}
-        {ref}`mm-processing`
-        :::
+        Info:
+            {ref}`mm-processing`
         """
 
         def wrapper(model_cls: N) -> N:
@@ -260,9 +259,8 @@ class MultiModalRegistry:
         """
         Create a multi-modal processor for a specific model and tokenizer.
 
-        :::{seealso}
-        {ref}`mm-processing`
-        :::
+        Info:
+            {ref}`mm-processing`
         """
         if not model_config.is_multimodal_model:
             raise ValueError(f"{model_config.model} is not a multimodal model")
diff --git a/vllm/utils.py b/vllm/utils.py
index bfc01972bbd24..fcc0ab3b237ae 100644
--- a/vllm/utils.py
+++ b/vllm/utils.py
@@ -1926,9 +1926,8 @@ class _PlaceholderBase:
     We need to explicitly override each dunder method because
     {meth}`__getattr__` is not called when they are accessed.
 
-    :::{seealso}
-    [Special method lookup](https://docs.python.org/3/reference/datamodel.html#special-lookup)
-    :::
+    Info:
+        [Special method lookup](https://docs.python.org/3/reference/datamodel.html#special-lookup)
     """
 
     def __getattr__(self, key: str) -> Never:
diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py
index 2b945cc4111a4..a7c70fec042cd 100644
--- a/vllm/v1/worker/gpu_worker.py
+++ b/vllm/v1/worker/gpu_worker.py
@@ -172,10 +172,9 @@ class Worker(WorkerBase):
         Then, it calculate the free memory that can be used for KV cache in
         bytes.
 
-        :::{tip}
-        You may limit the usage of GPU memory
-        by adjusting the `gpu_memory_utilization` parameter.
-        :::
+        Tip:
+            You may limit the usage of GPU memory
+            by adjusting the `gpu_memory_utilization` parameter.
         """
         torch.cuda.empty_cache()
         torch.cuda.reset_peak_memory_stats()
diff --git a/vllm/worker/hpu_worker.py b/vllm/worker/hpu_worker.py
index 42882992f2da2..d7fe0fe0fe4c1 100644
--- a/vllm/worker/hpu_worker.py
+++ b/vllm/worker/hpu_worker.py
@@ -201,10 +201,9 @@ class HPUWorker(LocalOrDistributedWorkerBase):
         Then, it calculate the maximum possible number of GPU and CPU blocks
         that can be allocated with the remaining free memory.
 
-        :::{tip}
-        You may limit the usage of GPU memory
-        by adjusting the `gpu_memory_utilization` parameter.
-        :::
+        Tip:
+            You may limit the usage of GPU memory
+            by adjusting the `gpu_memory_utilization` parameter.
         """
         # Profile the memory usage of the model and get the maximum number of
         # cache blocks that can be allocated with the remaining free memory.
diff --git a/vllm/worker/worker.py b/vllm/worker/worker.py
index 41546462e5c4b..5e3b6e4b62ea2 100644
--- a/vllm/worker/worker.py
+++ b/vllm/worker/worker.py
@@ -234,10 +234,9 @@ class Worker(LocalOrDistributedWorkerBase):
         Then, it calculate the maximum possible number of GPU and CPU blocks
         that can be allocated with the remaining free memory.
 
-        :::{tip}
-        You may limit the usage of GPU memory
-        by adjusting the `gpu_memory_utilization` parameter.
-        :::
+        Tip:
+            You may limit the usage of GPU memory
+            by adjusting the `gpu_memory_utilization` parameter.
         """
         # Profile the memory usage of the model and get the maximum number of
         # cache blocks that can be allocated with the remaining free memory.
diff --git a/vllm/worker/xpu_worker.py b/vllm/worker/xpu_worker.py
index 65085f80f97ae..a78a41e03ea3d 100644
--- a/vllm/worker/xpu_worker.py
+++ b/vllm/worker/xpu_worker.py
@@ -93,10 +93,9 @@ class XPUWorker(LoRANotSupportedWorkerBase, Worker):
         Then, it calculate the maximum possible number of GPU and CPU blocks
         that can be allocated with the remaining free memory.
 
-        :::{tip}
-        You may limit the usage of GPU memory
-        by adjusting the `gpu_memory_utilization` parameter.
-        :::
+        Tip:
+            You may limit the usage of GPU memory
+            by adjusting the `gpu_memory_utilization` parameter.
         """
         # Profile the memory usage of the model and get the maximum number of
         # cache blocks that can be allocated with the remaining free memory.