From 7ed27f3cb55e3f64614300ec7acde1b382a48541 Mon Sep 17 00:00:00 2001 From: Didier Durand <2927957+didier-durand@users.noreply.github.com> Date: Wed, 19 Nov 2025 07:52:30 +0100 Subject: [PATCH] [Doc]: fix typos in various files (#28945) Signed-off-by: Didier Durand --- docs/design/moe_kernel_features.md | 4 ++-- docs/design/plugin_system.md | 2 +- docs/features/quantization/quark.md | 2 +- examples/online_serving/prometheus_grafana/README.md | 2 +- vllm/engine/arg_utils.py | 2 +- vllm/envs.py | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/design/moe_kernel_features.md b/docs/design/moe_kernel_features.md index 7663b82266f0b..36ae9506b65fb 100644 --- a/docs/design/moe_kernel_features.md +++ b/docs/design/moe_kernel_features.md @@ -4,7 +4,7 @@ The purpose of this document is to provide an overview of the various MoE kernel ## Fused MoE Modular All2All backends -There are a number of all2all communication backends that are used to implement expert parallelism (EP) for the `FusedMoE` layer. The different `FusedMoEPrepareAndFinalize` sub-classes provide an interface for each all2all backend. +There are a number of all2all communication backends that are used to implement expert parallelism (EP) for the `FusedMoE` layer. The different `FusedMoEPrepareAndFinalize` subclasses provide an interface for each all2all backend. The following table describes the relevant features of each backend, i.e. activation format, supported quantization schemes and async support. @@ -68,7 +68,7 @@ Modular kernels are supported by the following `FusedMoEMethodBase` classes. ## Fused MoE Experts Kernels -The are a number of MoE experts kernel implementations for different quantization types and architectures. Most follow the general API of the base Triton [`fused_experts`][vllm.model_executor.layers.fused_moe.fused_moe.fused_experts] function. Many have modular kernel adapters so they can be used with compatible all2all backends. This table lists each experts kernel and its particular properties. +There are a number of MoE experts kernel implementations for different quantization types and architectures. Most follow the general API of the base Triton [`fused_experts`][vllm.model_executor.layers.fused_moe.fused_moe.fused_experts] function. Many have modular kernel adapters so they can be used with compatible all2all backends. This table lists each experts kernel and its particular properties. Each kernel must be provided with one of the supported input activation formats. Some flavors of kernels support both standard and batched formats through different entry points, e.g. `TritonExperts` and `BatchedTritonExperts`. Batched format kernels are currently only needed for matching with certain all2all backends, e.g. `pplx`, `DeepEPLLPrepareAndFinalize`. diff --git a/docs/design/plugin_system.md b/docs/design/plugin_system.md index dc2f7c4aed3c3..e8db8047ca4e6 100644 --- a/docs/design/plugin_system.md +++ b/docs/design/plugin_system.md @@ -49,7 +49,7 @@ Every plugin has three parts: - **Platform plugins** (with group name `vllm.platform_plugins`): The primary use case for these plugins is to register custom, out-of-the-tree platforms into vLLM. The plugin function should return `None` when the platform is not supported in the current environment, or the platform class's fully qualified name when the platform is supported. -- **IO Processor plugins** (with group name `vllm.io_processor_plugins`): The primary use case for these plugins is to register custom pre/post processing of the model prompt and model output for pooling models. The plugin function returns the IOProcessor's class fully qualified name. +- **IO Processor plugins** (with group name `vllm.io_processor_plugins`): The primary use case for these plugins is to register custom pre-/post-processing of the model prompt and model output for pooling models. The plugin function returns the IOProcessor's class fully qualified name. - **Stat logger plugins** (with group name `vllm.stat_logger_plugins`): The primary use case for these plugins is to register custom, out-of-the-tree loggers into vLLM. The entry point should be a class that subclasses StatLoggerBase. diff --git a/docs/features/quantization/quark.md b/docs/features/quantization/quark.md index bd7bc186e13aa..c54d7d2251999 100644 --- a/docs/features/quantization/quark.md +++ b/docs/features/quantization/quark.md @@ -306,7 +306,7 @@ As examples, we provide some ready-to-use quantized mixed precision model to sho ### 2. inference the quantized mixed precision model in vLLM -Models quantized with AMD Quark using mixed precision can natively be reload in vLLM, and e.g. evaluated using lm-evaluation-harness as follow: +Models quantized with AMD Quark using mixed precision can natively be reload in vLLM, and e.g. evaluated using lm-evaluation-harness as follows: ```bash lm_eval --model vllm \ diff --git a/examples/online_serving/prometheus_grafana/README.md b/examples/online_serving/prometheus_grafana/README.md index 5cd4dab5a8fa7..9615210a2ad80 100644 --- a/examples/online_serving/prometheus_grafana/README.md +++ b/examples/online_serving/prometheus_grafana/README.md @@ -46,7 +46,7 @@ Navigate to [`http://localhost:3000`](http://localhost:3000). Log in with the de Navigate to [`http://localhost:3000/connections/datasources/new`](http://localhost:3000/connections/datasources/new) and select Prometheus. -On Prometheus configuration page, we need to add the `Prometheus Server URL` in `Connection`. For this setup, Grafana and Prometheus are running in separate containers, but Docker creates DNS name for each containers. You can just use `http://prometheus:9090`. +On Prometheus configuration page, we need to add the `Prometheus Server URL` in `Connection`. For this setup, Grafana and Prometheus are running in separate containers, but Docker creates DNS name for each container. You can just use `http://prometheus:9090`. Click `Save & Test`. You should get a green check saying "Successfully queried the Prometheus API.". diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index ab6e5e594c239..e2f7326448b3a 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1500,7 +1500,7 @@ class EngineArgs: # Local DP rank = 1, use pure-external LB. if data_parallel_external_lb: assert self.data_parallel_rank is not None, ( - "data_parallel_rank or node_rank must be spefified if " + "data_parallel_rank or node_rank must be specified if " "data_parallel_external_lb is enable." ) assert self.data_parallel_size_local in (1, None), ( diff --git a/vllm/envs.py b/vllm/envs.py index 6d92d5afee501..e61fb114325c6 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -1261,7 +1261,7 @@ environment_variables: dict[str, Callable[[], Any]] = { # MoE routing strategy selector. # See `RoutingSimulator.get_available_strategies()` # for available # strategies. - # Cutstom routing strategies can be registered by + # Custom routing strategies can be registered by # RoutingSimulator.register_strategy() # Note: custom strategies may not produce correct model outputs "VLLM_MOE_ROUTING_SIMULATION_STRATEGY": lambda: os.environ.get(