mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-10 08:25:01 +08:00
[Misc] Clean up unnecessary E501 ignore (#26274)
Signed-off-by: Roger Wang <hey@rogerw.io>
This commit is contained in:
parent
7c2ec0fe87
commit
43c146ca42
@ -626,7 +626,7 @@ class RandomDataset(BenchmarkDataset):
|
||||
# Decode, then re-encode and truncate to preserve token count invariants
|
||||
total_input_len = prefix_len + int(input_len)
|
||||
prompt, adjusted_token_sequence, token_mismatch = (
|
||||
gen_prompt_decode_to_target_len( # noqa: E501
|
||||
gen_prompt_decode_to_target_len(
|
||||
tokenizer=tokenizer,
|
||||
token_sequence=token_sequence,
|
||||
target_token_len=total_input_len,
|
||||
@ -2855,7 +2855,7 @@ class PrefixRepetitionRandomDataset(BenchmarkDataset):
|
||||
for _ in range(prompts_per_prefix):
|
||||
suffix_tokens, token_mistmatch = _generate_exact_length_tokens(
|
||||
suffix_len
|
||||
) # noqa: E501
|
||||
)
|
||||
token_mismatch_total += token_mistmatch
|
||||
combined_tokens = prefix_tokens + suffix_tokens
|
||||
prompt = tokenizer.decode(combined_tokens)
|
||||
|
||||
@ -459,14 +459,14 @@ def validate_args(args):
|
||||
):
|
||||
assert args.backend == "vllm-chat", (
|
||||
f"{args.dataset_path} needs to use vllm-chat as the backend."
|
||||
) # noqa: E501
|
||||
)
|
||||
elif args.dataset_path in (
|
||||
InstructCoderDataset.SUPPORTED_DATASET_PATHS
|
||||
| AIMODataset.SUPPORTED_DATASET_PATHS
|
||||
):
|
||||
assert args.backend == "vllm", (
|
||||
f"{args.dataset_path} needs to use vllm as the backend."
|
||||
) # noqa: E501
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"{args.dataset_path} is not supported by hf dataset.")
|
||||
|
||||
|
||||
@ -19,7 +19,7 @@ if is_torch_equal_or_newer("2.6"):
|
||||
from torch._inductor.custom_graph_pass import CustomGraphPass
|
||||
else:
|
||||
# CustomGraphPass is not present in 2.5 or lower, import our version
|
||||
from .torch25_custom_graph_pass import ( # noqa: E501
|
||||
from .torch25_custom_graph_pass import (
|
||||
Torch25CustomGraphPass as CustomGraphPass,
|
||||
)
|
||||
|
||||
|
||||
@ -95,7 +95,7 @@ def get_quantization_config(quantization: str) -> type[QuantizationConfig]:
|
||||
from .awq_marlin import AWQMarlinConfig
|
||||
from .bitblas import BitBLASConfig
|
||||
from .bitsandbytes import BitsAndBytesConfig
|
||||
from .compressed_tensors.compressed_tensors import ( # noqa: E501
|
||||
from .compressed_tensors.compressed_tensors import (
|
||||
CompressedTensorsConfig,
|
||||
)
|
||||
from .deepspeedfp import DeepSpeedFPConfig
|
||||
|
||||
@ -26,7 +26,7 @@ from vllm.model_executor.layers.linear import (
|
||||
UnquantizedLinearMethod,
|
||||
)
|
||||
from vllm.model_executor.layers.quantization import QuantizationMethods
|
||||
from vllm.model_executor.layers.quantization.base_config import ( # noqa: E501
|
||||
from vllm.model_executor.layers.quantization.base_config import (
|
||||
QuantizationConfig,
|
||||
QuantizeMethodBase,
|
||||
)
|
||||
@ -256,7 +256,7 @@ class CompressedTensorsConfig(QuantizationConfig):
|
||||
)
|
||||
else:
|
||||
target_scheme_map[target]["input_activations"] = (
|
||||
QuantizationArgs.model_validate( # noqa: E501
|
||||
QuantizationArgs.model_validate(
|
||||
quant_config.get("input_activations")
|
||||
)
|
||||
)
|
||||
|
||||
@ -176,7 +176,7 @@ class Gemma3nDummyInputsBuilder(BaseDummyInputsBuilder[Gemma3nProcessingInfo]):
|
||||
processor = self.info.get_hf_processor()
|
||||
audio_feature_extractor: Gemma3nAudioFeatureExtractor = (
|
||||
processor.feature_extractor
|
||||
) # noqa: E501
|
||||
)
|
||||
audio_len = audio_feature_extractor.fft_length
|
||||
image_processor: SiglipImageProcessorFast = processor.image_processor
|
||||
img_width = image_processor.size.get("width", 224)
|
||||
|
||||
@ -120,7 +120,7 @@ _TEXT_GENERATION_MODELS = {
|
||||
"JambaForCausalLM": ("jamba", "JambaForCausalLM"),
|
||||
"Lfm2ForCausalLM": ("lfm2", "Lfm2ForCausalLM"),
|
||||
"LlamaForCausalLM": ("llama", "LlamaForCausalLM"),
|
||||
"Llama4ForCausalLM": ("llama4", "Llama4ForCausalLM"), # noqa: E501
|
||||
"Llama4ForCausalLM": ("llama4", "Llama4ForCausalLM"),
|
||||
# For decapoda-research/llama-*
|
||||
"LLaMAForCausalLM": ("llama", "LlamaForCausalLM"),
|
||||
"LongcatFlashForCausalLM": ("longcat_flash", "LongcatFlashForCausalLM"),
|
||||
@ -204,7 +204,7 @@ _EMBEDDING_MODELS = {
|
||||
"LlavaNextForConditionalGeneration": (
|
||||
"llava_next",
|
||||
"LlavaNextForConditionalGeneration",
|
||||
), # noqa: E501
|
||||
),
|
||||
"Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"),
|
||||
"Qwen2VLForConditionalGeneration": ("qwen2_vl", "Qwen2VLForConditionalGeneration"), # noqa: E501
|
||||
# Technically Terratorch models work on images, both in
|
||||
@ -240,46 +240,46 @@ _MULTIMODAL_MODELS = {
|
||||
"AyaVisionForConditionalGeneration": (
|
||||
"aya_vision",
|
||||
"AyaVisionForConditionalGeneration",
|
||||
), # noqa: E501
|
||||
),
|
||||
"Blip2ForConditionalGeneration": ("blip2", "Blip2ForConditionalGeneration"),
|
||||
"ChameleonForConditionalGeneration": (
|
||||
"chameleon",
|
||||
"ChameleonForConditionalGeneration",
|
||||
), # noqa: E501
|
||||
),
|
||||
"Cohere2VisionForConditionalGeneration": (
|
||||
"cohere2_vision",
|
||||
"Cohere2VisionForConditionalGeneration",
|
||||
), # noqa: E501
|
||||
),
|
||||
"DeepseekVLV2ForCausalLM": ("deepseek_vl2", "DeepseekVLV2ForCausalLM"),
|
||||
"DotsOCRForCausalLM": ("dots_ocr", "DotsOCRForCausalLM"),
|
||||
"Ernie4_5_VLMoeForConditionalGeneration": (
|
||||
"ernie45_vl",
|
||||
"Ernie4_5_VLMoeForConditionalGeneration",
|
||||
), # noqa: E501
|
||||
),
|
||||
"FuyuForCausalLM": ("fuyu", "FuyuForCausalLM"),
|
||||
"Gemma3ForConditionalGeneration": ("gemma3_mm", "Gemma3ForConditionalGeneration"), # noqa: E501
|
||||
"Gemma3nForConditionalGeneration": (
|
||||
"gemma3n_mm",
|
||||
"Gemma3nForConditionalGeneration",
|
||||
), # noqa: E501
|
||||
),
|
||||
"GLM4VForCausalLM": ("glm4v", "GLM4VForCausalLM"),
|
||||
"Glm4vForConditionalGeneration": ("glm4_1v", "Glm4vForConditionalGeneration"), # noqa: E501
|
||||
"Glm4vMoeForConditionalGeneration": ("glm4_1v", "Glm4vMoeForConditionalGeneration"), # noqa: E501
|
||||
"GraniteSpeechForConditionalGeneration": (
|
||||
"granite_speech",
|
||||
"GraniteSpeechForConditionalGeneration",
|
||||
), # noqa: E501
|
||||
),
|
||||
"H2OVLChatModel": ("h2ovl", "H2OVLChatModel"),
|
||||
"InternVLChatModel": ("internvl", "InternVLChatModel"),
|
||||
"NemotronH_Nano_VL_V2": ("nano_nemotron_vl", "NemotronH_Nano_VL_V2"),
|
||||
"InternS1ForConditionalGeneration": (
|
||||
"interns1",
|
||||
"InternS1ForConditionalGeneration",
|
||||
), # noqa: E501
|
||||
),
|
||||
"InternVLForConditionalGeneration": (
|
||||
"interns1",
|
||||
"InternS1ForConditionalGeneration",
|
||||
), # noqa: E501
|
||||
),
|
||||
"Idefics3ForConditionalGeneration": (
|
||||
"idefics3",
|
||||
"Idefics3ForConditionalGeneration",
|
||||
@ -289,7 +289,7 @@ _MULTIMODAL_MODELS = {
|
||||
"KeyeVL1_5ForConditionalGeneration": (
|
||||
"keye_vl1_5",
|
||||
"KeyeVL1_5ForConditionalGeneration",
|
||||
), # noqa: E501
|
||||
),
|
||||
"RForConditionalGeneration": ("rvl", "RForConditionalGeneration"),
|
||||
"KimiVLForConditionalGeneration": ("kimi_vl", "KimiVLForConditionalGeneration"), # noqa: E501
|
||||
"Llama_Nemotron_Nano_VL": ("nemotron_vl", "LlamaNemotronVLChatModel"),
|
||||
@ -298,27 +298,27 @@ _MULTIMODAL_MODELS = {
|
||||
"LlavaNextForConditionalGeneration": (
|
||||
"llava_next",
|
||||
"LlavaNextForConditionalGeneration",
|
||||
), # noqa: E501
|
||||
),
|
||||
"LlavaNextVideoForConditionalGeneration": (
|
||||
"llava_next_video",
|
||||
"LlavaNextVideoForConditionalGeneration",
|
||||
), # noqa: E501
|
||||
),
|
||||
"LlavaOnevisionForConditionalGeneration": (
|
||||
"llava_onevision",
|
||||
"LlavaOnevisionForConditionalGeneration",
|
||||
), # noqa: E501
|
||||
),
|
||||
"MantisForConditionalGeneration": ("llava", "MantisForConditionalGeneration"), # noqa: E501
|
||||
"MiDashengLMModel": ("midashenglm", "MiDashengLMModel"),
|
||||
"MiniMaxVL01ForConditionalGeneration": (
|
||||
"minimax_vl_01",
|
||||
"MiniMaxVL01ForConditionalGeneration",
|
||||
), # noqa: E501
|
||||
),
|
||||
"MiniCPMO": ("minicpmo", "MiniCPMO"),
|
||||
"MiniCPMV": ("minicpmv", "MiniCPMV"),
|
||||
"Mistral3ForConditionalGeneration": (
|
||||
"mistral3",
|
||||
"Mistral3ForConditionalGeneration",
|
||||
), # noqa: E501
|
||||
),
|
||||
"MolmoForCausalLM": ("molmo", "MolmoForCausalLM"),
|
||||
"NVLM_D": ("nvlm_d", "NVLM_D_Model"),
|
||||
"Ovis": ("ovis", "Ovis"),
|
||||
@ -326,7 +326,7 @@ _MULTIMODAL_MODELS = {
|
||||
"PaliGemmaForConditionalGeneration": (
|
||||
"paligemma",
|
||||
"PaliGemmaForConditionalGeneration",
|
||||
), # noqa: E501
|
||||
),
|
||||
"Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"),
|
||||
"Phi4MMForCausalLM": ("phi4mm", "Phi4MMForCausalLM"),
|
||||
"Phi4MultimodalForCausalLM": ("phi4_multimodal", "Phi4MultimodalForCausalLM"), # noqa: E501
|
||||
@ -336,31 +336,31 @@ _MULTIMODAL_MODELS = {
|
||||
"Qwen2_5_VLForConditionalGeneration": (
|
||||
"qwen2_5_vl",
|
||||
"Qwen2_5_VLForConditionalGeneration",
|
||||
), # noqa: E501
|
||||
),
|
||||
"Qwen2AudioForConditionalGeneration": (
|
||||
"qwen2_audio",
|
||||
"Qwen2AudioForConditionalGeneration",
|
||||
), # noqa: E501
|
||||
),
|
||||
"Qwen2_5OmniModel": (
|
||||
"qwen2_5_omni_thinker",
|
||||
"Qwen2_5OmniThinkerForConditionalGeneration",
|
||||
), # noqa: E501
|
||||
),
|
||||
"Qwen2_5OmniForConditionalGeneration": (
|
||||
"qwen2_5_omni_thinker",
|
||||
"Qwen2_5OmniThinkerForConditionalGeneration",
|
||||
), # noqa: E501
|
||||
),
|
||||
"Qwen3VLForConditionalGeneration": ("qwen3_vl", "Qwen3VLForConditionalGeneration"), # noqa: E501
|
||||
"Qwen3VLMoeForConditionalGeneration": (
|
||||
"qwen3_vl_moe",
|
||||
"Qwen3VLMoeForConditionalGeneration",
|
||||
), # noqa: E501
|
||||
),
|
||||
"SkyworkR1VChatModel": ("skyworkr1v", "SkyworkR1VChatModel"),
|
||||
"Step3VLForConditionalGeneration": ("step3_vl", "Step3VLForConditionalGeneration"), # noqa: E501
|
||||
"TarsierForConditionalGeneration": ("tarsier", "TarsierForConditionalGeneration"), # noqa: E501
|
||||
"Tarsier2ForConditionalGeneration": (
|
||||
"qwen2_vl",
|
||||
"Tarsier2ForConditionalGeneration",
|
||||
), # noqa: E501
|
||||
),
|
||||
"UltravoxModel": ("ultravox", "UltravoxModel"),
|
||||
"VoxtralForConditionalGeneration": ("voxtral", "VoxtralForConditionalGeneration"), # noqa: E501
|
||||
# [Encoder-decoder]
|
||||
@ -401,23 +401,23 @@ _TRANSFORMERS_BACKEND_MODELS = {
|
||||
"TransformersMoEForMultimodalLM": (
|
||||
"transformers_moe",
|
||||
"TransformersMoEForMultimodalLM",
|
||||
), # noqa: E501
|
||||
),
|
||||
"TransformersEmbeddingModel": (
|
||||
"transformers_pooling",
|
||||
"TransformersEmbeddingModel",
|
||||
), # noqa: E501
|
||||
),
|
||||
"TransformersForSequenceClassification": (
|
||||
"transformers_pooling",
|
||||
"TransformersForSequenceClassification",
|
||||
), # noqa: E501
|
||||
),
|
||||
"TransformersMoEForSequenceClassification": (
|
||||
"transformers_pooling",
|
||||
"TransformersMoEForSequenceClassification",
|
||||
), # noqa: E501
|
||||
),
|
||||
"TransformersMoEEmbeddingModel": (
|
||||
"transformers_pooling",
|
||||
"TransformersMoEEmbeddingModel",
|
||||
), # noqa: E501
|
||||
),
|
||||
}
|
||||
|
||||
_VLLM_MODELS = {
|
||||
|
||||
@ -79,7 +79,7 @@ class GDNAttentionMetadataBuilder(AttentionMetadataBuilder[GDNAttentionMetadata]
|
||||
self.speculative_config = vllm_config.speculative_config
|
||||
self.kv_cache_spec = kv_cache_spec
|
||||
if self.speculative_config:
|
||||
self.num_spec = self.speculative_config.num_speculative_tokens # noqa: E501
|
||||
self.num_spec = self.speculative_config.num_speculative_tokens
|
||||
else:
|
||||
self.num_spec = 0
|
||||
self.use_spec_decode = self.num_spec > 0
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user