diff --git a/README.md b/README.md index df294c600770f..5b87ae838885c 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ vLLM is flexible and easy to use with: - OpenAI-compatible API server - Support NVIDIA GPUs, AMD CPUs and GPUs, Intel CPUs and GPUs, PowerPC CPUs, TPU, and AWS Neuron. - Prefix caching support -- Multi-lora support +- Multi-LoRA support vLLM seamlessly supports most popular open-source models on HuggingFace, including: - Transformer-like LLMs (e.g., Llama) diff --git a/vllm/model_executor/layers/logits_processor.py b/vllm/model_executor/layers/logits_processor.py index 4a359725bad0f..6b69a260826b1 100644 --- a/vllm/model_executor/layers/logits_processor.py +++ b/vllm/model_executor/layers/logits_processor.py @@ -119,7 +119,7 @@ class LogitsProcessor(nn.Module): def extra_repr(self) -> str: s = f"vocab_size={self.vocab_size}" - s += f", forg_vocab_size={self.org_vocab_size}" + s += f", org_vocab_size={self.org_vocab_size}" s += f", scale={self.scale}, logits_as_input={self.logits_as_input}" return s