diff --git a/vllm/benchmarks/datasets.py b/vllm/benchmarks/datasets.py index c999eb858bcc..e955b15e87fe 100644 --- a/vllm/benchmarks/datasets.py +++ b/vllm/benchmarks/datasets.py @@ -626,7 +626,7 @@ class RandomDataset(BenchmarkDataset): # Decode, then re-encode and truncate to preserve token count invariants total_input_len = prefix_len + int(input_len) prompt, adjusted_token_sequence, token_mismatch = ( - gen_prompt_decode_to_target_len( # noqa: E501 + gen_prompt_decode_to_target_len( tokenizer=tokenizer, token_sequence=token_sequence, target_token_len=total_input_len, @@ -2855,7 +2855,7 @@ class PrefixRepetitionRandomDataset(BenchmarkDataset): for _ in range(prompts_per_prefix): suffix_tokens, token_mistmatch = _generate_exact_length_tokens( suffix_len - ) # noqa: E501 + ) token_mismatch_total += token_mistmatch combined_tokens = prefix_tokens + suffix_tokens prompt = tokenizer.decode(combined_tokens) diff --git a/vllm/benchmarks/throughput.py b/vllm/benchmarks/throughput.py index 721eb554d026..181a3e196586 100644 --- a/vllm/benchmarks/throughput.py +++ b/vllm/benchmarks/throughput.py @@ -459,14 +459,14 @@ def validate_args(args): ): assert args.backend == "vllm-chat", ( f"{args.dataset_path} needs to use vllm-chat as the backend." - ) # noqa: E501 + ) elif args.dataset_path in ( InstructCoderDataset.SUPPORTED_DATASET_PATHS | AIMODataset.SUPPORTED_DATASET_PATHS ): assert args.backend == "vllm", ( f"{args.dataset_path} needs to use vllm as the backend." - ) # noqa: E501 + ) else: raise ValueError(f"{args.dataset_path} is not supported by hf dataset.") diff --git a/vllm/compilation/inductor_pass.py b/vllm/compilation/inductor_pass.py index 70392c5ac972..9085448d2397 100644 --- a/vllm/compilation/inductor_pass.py +++ b/vllm/compilation/inductor_pass.py @@ -19,7 +19,7 @@ if is_torch_equal_or_newer("2.6"): from torch._inductor.custom_graph_pass import CustomGraphPass else: # CustomGraphPass is not present in 2.5 or lower, import our version - from .torch25_custom_graph_pass import ( # noqa: E501 + from .torch25_custom_graph_pass import ( Torch25CustomGraphPass as CustomGraphPass, ) diff --git a/vllm/model_executor/layers/quantization/__init__.py b/vllm/model_executor/layers/quantization/__init__.py index f43d38cba878..9d1c66e56e91 100644 --- a/vllm/model_executor/layers/quantization/__init__.py +++ b/vllm/model_executor/layers/quantization/__init__.py @@ -95,7 +95,7 @@ def get_quantization_config(quantization: str) -> type[QuantizationConfig]: from .awq_marlin import AWQMarlinConfig from .bitblas import BitBLASConfig from .bitsandbytes import BitsAndBytesConfig - from .compressed_tensors.compressed_tensors import ( # noqa: E501 + from .compressed_tensors.compressed_tensors import ( CompressedTensorsConfig, ) from .deepspeedfp import DeepSpeedFPConfig diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py index 1c3fe2dd9869..e89d002078ac 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py @@ -26,7 +26,7 @@ from vllm.model_executor.layers.linear import ( UnquantizedLinearMethod, ) from vllm.model_executor.layers.quantization import QuantizationMethods -from vllm.model_executor.layers.quantization.base_config import ( # noqa: E501 +from vllm.model_executor.layers.quantization.base_config import ( QuantizationConfig, QuantizeMethodBase, ) @@ -256,7 +256,7 @@ class CompressedTensorsConfig(QuantizationConfig): ) else: target_scheme_map[target]["input_activations"] = ( - QuantizationArgs.model_validate( # noqa: E501 + QuantizationArgs.model_validate( quant_config.get("input_activations") ) ) diff --git a/vllm/model_executor/models/gemma3n_mm.py b/vllm/model_executor/models/gemma3n_mm.py index bef087b7a0b6..0e69fcfd8feb 100644 --- a/vllm/model_executor/models/gemma3n_mm.py +++ b/vllm/model_executor/models/gemma3n_mm.py @@ -176,7 +176,7 @@ class Gemma3nDummyInputsBuilder(BaseDummyInputsBuilder[Gemma3nProcessingInfo]): processor = self.info.get_hf_processor() audio_feature_extractor: Gemma3nAudioFeatureExtractor = ( processor.feature_extractor - ) # noqa: E501 + ) audio_len = audio_feature_extractor.fft_length image_processor: SiglipImageProcessorFast = processor.image_processor img_width = image_processor.size.get("width", 224) diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py index b640bdd1deeb..7c324b7e7872 100644 --- a/vllm/model_executor/models/registry.py +++ b/vllm/model_executor/models/registry.py @@ -120,7 +120,7 @@ _TEXT_GENERATION_MODELS = { "JambaForCausalLM": ("jamba", "JambaForCausalLM"), "Lfm2ForCausalLM": ("lfm2", "Lfm2ForCausalLM"), "LlamaForCausalLM": ("llama", "LlamaForCausalLM"), - "Llama4ForCausalLM": ("llama4", "Llama4ForCausalLM"), # noqa: E501 + "Llama4ForCausalLM": ("llama4", "Llama4ForCausalLM"), # For decapoda-research/llama-* "LLaMAForCausalLM": ("llama", "LlamaForCausalLM"), "LongcatFlashForCausalLM": ("longcat_flash", "LongcatFlashForCausalLM"), @@ -204,7 +204,7 @@ _EMBEDDING_MODELS = { "LlavaNextForConditionalGeneration": ( "llava_next", "LlavaNextForConditionalGeneration", - ), # noqa: E501 + ), "Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"), "Qwen2VLForConditionalGeneration": ("qwen2_vl", "Qwen2VLForConditionalGeneration"), # noqa: E501 # Technically Terratorch models work on images, both in @@ -240,46 +240,46 @@ _MULTIMODAL_MODELS = { "AyaVisionForConditionalGeneration": ( "aya_vision", "AyaVisionForConditionalGeneration", - ), # noqa: E501 + ), "Blip2ForConditionalGeneration": ("blip2", "Blip2ForConditionalGeneration"), "ChameleonForConditionalGeneration": ( "chameleon", "ChameleonForConditionalGeneration", - ), # noqa: E501 + ), "Cohere2VisionForConditionalGeneration": ( "cohere2_vision", "Cohere2VisionForConditionalGeneration", - ), # noqa: E501 + ), "DeepseekVLV2ForCausalLM": ("deepseek_vl2", "DeepseekVLV2ForCausalLM"), "DotsOCRForCausalLM": ("dots_ocr", "DotsOCRForCausalLM"), "Ernie4_5_VLMoeForConditionalGeneration": ( "ernie45_vl", "Ernie4_5_VLMoeForConditionalGeneration", - ), # noqa: E501 + ), "FuyuForCausalLM": ("fuyu", "FuyuForCausalLM"), "Gemma3ForConditionalGeneration": ("gemma3_mm", "Gemma3ForConditionalGeneration"), # noqa: E501 "Gemma3nForConditionalGeneration": ( "gemma3n_mm", "Gemma3nForConditionalGeneration", - ), # noqa: E501 + ), "GLM4VForCausalLM": ("glm4v", "GLM4VForCausalLM"), "Glm4vForConditionalGeneration": ("glm4_1v", "Glm4vForConditionalGeneration"), # noqa: E501 "Glm4vMoeForConditionalGeneration": ("glm4_1v", "Glm4vMoeForConditionalGeneration"), # noqa: E501 "GraniteSpeechForConditionalGeneration": ( "granite_speech", "GraniteSpeechForConditionalGeneration", - ), # noqa: E501 + ), "H2OVLChatModel": ("h2ovl", "H2OVLChatModel"), "InternVLChatModel": ("internvl", "InternVLChatModel"), "NemotronH_Nano_VL_V2": ("nano_nemotron_vl", "NemotronH_Nano_VL_V2"), "InternS1ForConditionalGeneration": ( "interns1", "InternS1ForConditionalGeneration", - ), # noqa: E501 + ), "InternVLForConditionalGeneration": ( "interns1", "InternS1ForConditionalGeneration", - ), # noqa: E501 + ), "Idefics3ForConditionalGeneration": ( "idefics3", "Idefics3ForConditionalGeneration", @@ -289,7 +289,7 @@ _MULTIMODAL_MODELS = { "KeyeVL1_5ForConditionalGeneration": ( "keye_vl1_5", "KeyeVL1_5ForConditionalGeneration", - ), # noqa: E501 + ), "RForConditionalGeneration": ("rvl", "RForConditionalGeneration"), "KimiVLForConditionalGeneration": ("kimi_vl", "KimiVLForConditionalGeneration"), # noqa: E501 "Llama_Nemotron_Nano_VL": ("nemotron_vl", "LlamaNemotronVLChatModel"), @@ -298,27 +298,27 @@ _MULTIMODAL_MODELS = { "LlavaNextForConditionalGeneration": ( "llava_next", "LlavaNextForConditionalGeneration", - ), # noqa: E501 + ), "LlavaNextVideoForConditionalGeneration": ( "llava_next_video", "LlavaNextVideoForConditionalGeneration", - ), # noqa: E501 + ), "LlavaOnevisionForConditionalGeneration": ( "llava_onevision", "LlavaOnevisionForConditionalGeneration", - ), # noqa: E501 + ), "MantisForConditionalGeneration": ("llava", "MantisForConditionalGeneration"), # noqa: E501 "MiDashengLMModel": ("midashenglm", "MiDashengLMModel"), "MiniMaxVL01ForConditionalGeneration": ( "minimax_vl_01", "MiniMaxVL01ForConditionalGeneration", - ), # noqa: E501 + ), "MiniCPMO": ("minicpmo", "MiniCPMO"), "MiniCPMV": ("minicpmv", "MiniCPMV"), "Mistral3ForConditionalGeneration": ( "mistral3", "Mistral3ForConditionalGeneration", - ), # noqa: E501 + ), "MolmoForCausalLM": ("molmo", "MolmoForCausalLM"), "NVLM_D": ("nvlm_d", "NVLM_D_Model"), "Ovis": ("ovis", "Ovis"), @@ -326,7 +326,7 @@ _MULTIMODAL_MODELS = { "PaliGemmaForConditionalGeneration": ( "paligemma", "PaliGemmaForConditionalGeneration", - ), # noqa: E501 + ), "Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"), "Phi4MMForCausalLM": ("phi4mm", "Phi4MMForCausalLM"), "Phi4MultimodalForCausalLM": ("phi4_multimodal", "Phi4MultimodalForCausalLM"), # noqa: E501 @@ -336,31 +336,31 @@ _MULTIMODAL_MODELS = { "Qwen2_5_VLForConditionalGeneration": ( "qwen2_5_vl", "Qwen2_5_VLForConditionalGeneration", - ), # noqa: E501 + ), "Qwen2AudioForConditionalGeneration": ( "qwen2_audio", "Qwen2AudioForConditionalGeneration", - ), # noqa: E501 + ), "Qwen2_5OmniModel": ( "qwen2_5_omni_thinker", "Qwen2_5OmniThinkerForConditionalGeneration", - ), # noqa: E501 + ), "Qwen2_5OmniForConditionalGeneration": ( "qwen2_5_omni_thinker", "Qwen2_5OmniThinkerForConditionalGeneration", - ), # noqa: E501 + ), "Qwen3VLForConditionalGeneration": ("qwen3_vl", "Qwen3VLForConditionalGeneration"), # noqa: E501 "Qwen3VLMoeForConditionalGeneration": ( "qwen3_vl_moe", "Qwen3VLMoeForConditionalGeneration", - ), # noqa: E501 + ), "SkyworkR1VChatModel": ("skyworkr1v", "SkyworkR1VChatModel"), "Step3VLForConditionalGeneration": ("step3_vl", "Step3VLForConditionalGeneration"), # noqa: E501 "TarsierForConditionalGeneration": ("tarsier", "TarsierForConditionalGeneration"), # noqa: E501 "Tarsier2ForConditionalGeneration": ( "qwen2_vl", "Tarsier2ForConditionalGeneration", - ), # noqa: E501 + ), "UltravoxModel": ("ultravox", "UltravoxModel"), "VoxtralForConditionalGeneration": ("voxtral", "VoxtralForConditionalGeneration"), # noqa: E501 # [Encoder-decoder] @@ -401,23 +401,23 @@ _TRANSFORMERS_BACKEND_MODELS = { "TransformersMoEForMultimodalLM": ( "transformers_moe", "TransformersMoEForMultimodalLM", - ), # noqa: E501 + ), "TransformersEmbeddingModel": ( "transformers_pooling", "TransformersEmbeddingModel", - ), # noqa: E501 + ), "TransformersForSequenceClassification": ( "transformers_pooling", "TransformersForSequenceClassification", - ), # noqa: E501 + ), "TransformersMoEForSequenceClassification": ( "transformers_pooling", "TransformersMoEForSequenceClassification", - ), # noqa: E501 + ), "TransformersMoEEmbeddingModel": ( "transformers_pooling", "TransformersMoEEmbeddingModel", - ), # noqa: E501 + ), } _VLLM_MODELS = { diff --git a/vllm/v1/attention/backends/gdn_attn.py b/vllm/v1/attention/backends/gdn_attn.py index 0e271da5fbe4..21fc2ab72768 100644 --- a/vllm/v1/attention/backends/gdn_attn.py +++ b/vllm/v1/attention/backends/gdn_attn.py @@ -79,7 +79,7 @@ class GDNAttentionMetadataBuilder(AttentionMetadataBuilder[GDNAttentionMetadata] self.speculative_config = vllm_config.speculative_config self.kv_cache_spec = kv_cache_spec if self.speculative_config: - self.num_spec = self.speculative_config.num_speculative_tokens # noqa: E501 + self.num_spec = self.speculative_config.num_speculative_tokens else: self.num_spec = 0 self.use_spec_decode = self.num_spec > 0