mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-16 04:05:25 +08:00
[Misc] Clean up unnecessary E501 ignore (#26274)
Signed-off-by: Roger Wang <hey@rogerw.io>
This commit is contained in:
parent
7c2ec0fe87
commit
43c146ca42
@ -626,7 +626,7 @@ class RandomDataset(BenchmarkDataset):
|
|||||||
# Decode, then re-encode and truncate to preserve token count invariants
|
# Decode, then re-encode and truncate to preserve token count invariants
|
||||||
total_input_len = prefix_len + int(input_len)
|
total_input_len = prefix_len + int(input_len)
|
||||||
prompt, adjusted_token_sequence, token_mismatch = (
|
prompt, adjusted_token_sequence, token_mismatch = (
|
||||||
gen_prompt_decode_to_target_len( # noqa: E501
|
gen_prompt_decode_to_target_len(
|
||||||
tokenizer=tokenizer,
|
tokenizer=tokenizer,
|
||||||
token_sequence=token_sequence,
|
token_sequence=token_sequence,
|
||||||
target_token_len=total_input_len,
|
target_token_len=total_input_len,
|
||||||
@ -2855,7 +2855,7 @@ class PrefixRepetitionRandomDataset(BenchmarkDataset):
|
|||||||
for _ in range(prompts_per_prefix):
|
for _ in range(prompts_per_prefix):
|
||||||
suffix_tokens, token_mistmatch = _generate_exact_length_tokens(
|
suffix_tokens, token_mistmatch = _generate_exact_length_tokens(
|
||||||
suffix_len
|
suffix_len
|
||||||
) # noqa: E501
|
)
|
||||||
token_mismatch_total += token_mistmatch
|
token_mismatch_total += token_mistmatch
|
||||||
combined_tokens = prefix_tokens + suffix_tokens
|
combined_tokens = prefix_tokens + suffix_tokens
|
||||||
prompt = tokenizer.decode(combined_tokens)
|
prompt = tokenizer.decode(combined_tokens)
|
||||||
|
|||||||
@ -459,14 +459,14 @@ def validate_args(args):
|
|||||||
):
|
):
|
||||||
assert args.backend == "vllm-chat", (
|
assert args.backend == "vllm-chat", (
|
||||||
f"{args.dataset_path} needs to use vllm-chat as the backend."
|
f"{args.dataset_path} needs to use vllm-chat as the backend."
|
||||||
) # noqa: E501
|
)
|
||||||
elif args.dataset_path in (
|
elif args.dataset_path in (
|
||||||
InstructCoderDataset.SUPPORTED_DATASET_PATHS
|
InstructCoderDataset.SUPPORTED_DATASET_PATHS
|
||||||
| AIMODataset.SUPPORTED_DATASET_PATHS
|
| AIMODataset.SUPPORTED_DATASET_PATHS
|
||||||
):
|
):
|
||||||
assert args.backend == "vllm", (
|
assert args.backend == "vllm", (
|
||||||
f"{args.dataset_path} needs to use vllm as the backend."
|
f"{args.dataset_path} needs to use vllm as the backend."
|
||||||
) # noqa: E501
|
)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"{args.dataset_path} is not supported by hf dataset.")
|
raise ValueError(f"{args.dataset_path} is not supported by hf dataset.")
|
||||||
|
|
||||||
|
|||||||
@ -19,7 +19,7 @@ if is_torch_equal_or_newer("2.6"):
|
|||||||
from torch._inductor.custom_graph_pass import CustomGraphPass
|
from torch._inductor.custom_graph_pass import CustomGraphPass
|
||||||
else:
|
else:
|
||||||
# CustomGraphPass is not present in 2.5 or lower, import our version
|
# CustomGraphPass is not present in 2.5 or lower, import our version
|
||||||
from .torch25_custom_graph_pass import ( # noqa: E501
|
from .torch25_custom_graph_pass import (
|
||||||
Torch25CustomGraphPass as CustomGraphPass,
|
Torch25CustomGraphPass as CustomGraphPass,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -95,7 +95,7 @@ def get_quantization_config(quantization: str) -> type[QuantizationConfig]:
|
|||||||
from .awq_marlin import AWQMarlinConfig
|
from .awq_marlin import AWQMarlinConfig
|
||||||
from .bitblas import BitBLASConfig
|
from .bitblas import BitBLASConfig
|
||||||
from .bitsandbytes import BitsAndBytesConfig
|
from .bitsandbytes import BitsAndBytesConfig
|
||||||
from .compressed_tensors.compressed_tensors import ( # noqa: E501
|
from .compressed_tensors.compressed_tensors import (
|
||||||
CompressedTensorsConfig,
|
CompressedTensorsConfig,
|
||||||
)
|
)
|
||||||
from .deepspeedfp import DeepSpeedFPConfig
|
from .deepspeedfp import DeepSpeedFPConfig
|
||||||
|
|||||||
@ -26,7 +26,7 @@ from vllm.model_executor.layers.linear import (
|
|||||||
UnquantizedLinearMethod,
|
UnquantizedLinearMethod,
|
||||||
)
|
)
|
||||||
from vllm.model_executor.layers.quantization import QuantizationMethods
|
from vllm.model_executor.layers.quantization import QuantizationMethods
|
||||||
from vllm.model_executor.layers.quantization.base_config import ( # noqa: E501
|
from vllm.model_executor.layers.quantization.base_config import (
|
||||||
QuantizationConfig,
|
QuantizationConfig,
|
||||||
QuantizeMethodBase,
|
QuantizeMethodBase,
|
||||||
)
|
)
|
||||||
@ -256,7 +256,7 @@ class CompressedTensorsConfig(QuantizationConfig):
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
target_scheme_map[target]["input_activations"] = (
|
target_scheme_map[target]["input_activations"] = (
|
||||||
QuantizationArgs.model_validate( # noqa: E501
|
QuantizationArgs.model_validate(
|
||||||
quant_config.get("input_activations")
|
quant_config.get("input_activations")
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|||||||
@ -176,7 +176,7 @@ class Gemma3nDummyInputsBuilder(BaseDummyInputsBuilder[Gemma3nProcessingInfo]):
|
|||||||
processor = self.info.get_hf_processor()
|
processor = self.info.get_hf_processor()
|
||||||
audio_feature_extractor: Gemma3nAudioFeatureExtractor = (
|
audio_feature_extractor: Gemma3nAudioFeatureExtractor = (
|
||||||
processor.feature_extractor
|
processor.feature_extractor
|
||||||
) # noqa: E501
|
)
|
||||||
audio_len = audio_feature_extractor.fft_length
|
audio_len = audio_feature_extractor.fft_length
|
||||||
image_processor: SiglipImageProcessorFast = processor.image_processor
|
image_processor: SiglipImageProcessorFast = processor.image_processor
|
||||||
img_width = image_processor.size.get("width", 224)
|
img_width = image_processor.size.get("width", 224)
|
||||||
|
|||||||
@ -120,7 +120,7 @@ _TEXT_GENERATION_MODELS = {
|
|||||||
"JambaForCausalLM": ("jamba", "JambaForCausalLM"),
|
"JambaForCausalLM": ("jamba", "JambaForCausalLM"),
|
||||||
"Lfm2ForCausalLM": ("lfm2", "Lfm2ForCausalLM"),
|
"Lfm2ForCausalLM": ("lfm2", "Lfm2ForCausalLM"),
|
||||||
"LlamaForCausalLM": ("llama", "LlamaForCausalLM"),
|
"LlamaForCausalLM": ("llama", "LlamaForCausalLM"),
|
||||||
"Llama4ForCausalLM": ("llama4", "Llama4ForCausalLM"), # noqa: E501
|
"Llama4ForCausalLM": ("llama4", "Llama4ForCausalLM"),
|
||||||
# For decapoda-research/llama-*
|
# For decapoda-research/llama-*
|
||||||
"LLaMAForCausalLM": ("llama", "LlamaForCausalLM"),
|
"LLaMAForCausalLM": ("llama", "LlamaForCausalLM"),
|
||||||
"LongcatFlashForCausalLM": ("longcat_flash", "LongcatFlashForCausalLM"),
|
"LongcatFlashForCausalLM": ("longcat_flash", "LongcatFlashForCausalLM"),
|
||||||
@ -204,7 +204,7 @@ _EMBEDDING_MODELS = {
|
|||||||
"LlavaNextForConditionalGeneration": (
|
"LlavaNextForConditionalGeneration": (
|
||||||
"llava_next",
|
"llava_next",
|
||||||
"LlavaNextForConditionalGeneration",
|
"LlavaNextForConditionalGeneration",
|
||||||
), # noqa: E501
|
),
|
||||||
"Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"),
|
"Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"),
|
||||||
"Qwen2VLForConditionalGeneration": ("qwen2_vl", "Qwen2VLForConditionalGeneration"), # noqa: E501
|
"Qwen2VLForConditionalGeneration": ("qwen2_vl", "Qwen2VLForConditionalGeneration"), # noqa: E501
|
||||||
# Technically Terratorch models work on images, both in
|
# Technically Terratorch models work on images, both in
|
||||||
@ -240,46 +240,46 @@ _MULTIMODAL_MODELS = {
|
|||||||
"AyaVisionForConditionalGeneration": (
|
"AyaVisionForConditionalGeneration": (
|
||||||
"aya_vision",
|
"aya_vision",
|
||||||
"AyaVisionForConditionalGeneration",
|
"AyaVisionForConditionalGeneration",
|
||||||
), # noqa: E501
|
),
|
||||||
"Blip2ForConditionalGeneration": ("blip2", "Blip2ForConditionalGeneration"),
|
"Blip2ForConditionalGeneration": ("blip2", "Blip2ForConditionalGeneration"),
|
||||||
"ChameleonForConditionalGeneration": (
|
"ChameleonForConditionalGeneration": (
|
||||||
"chameleon",
|
"chameleon",
|
||||||
"ChameleonForConditionalGeneration",
|
"ChameleonForConditionalGeneration",
|
||||||
), # noqa: E501
|
),
|
||||||
"Cohere2VisionForConditionalGeneration": (
|
"Cohere2VisionForConditionalGeneration": (
|
||||||
"cohere2_vision",
|
"cohere2_vision",
|
||||||
"Cohere2VisionForConditionalGeneration",
|
"Cohere2VisionForConditionalGeneration",
|
||||||
), # noqa: E501
|
),
|
||||||
"DeepseekVLV2ForCausalLM": ("deepseek_vl2", "DeepseekVLV2ForCausalLM"),
|
"DeepseekVLV2ForCausalLM": ("deepseek_vl2", "DeepseekVLV2ForCausalLM"),
|
||||||
"DotsOCRForCausalLM": ("dots_ocr", "DotsOCRForCausalLM"),
|
"DotsOCRForCausalLM": ("dots_ocr", "DotsOCRForCausalLM"),
|
||||||
"Ernie4_5_VLMoeForConditionalGeneration": (
|
"Ernie4_5_VLMoeForConditionalGeneration": (
|
||||||
"ernie45_vl",
|
"ernie45_vl",
|
||||||
"Ernie4_5_VLMoeForConditionalGeneration",
|
"Ernie4_5_VLMoeForConditionalGeneration",
|
||||||
), # noqa: E501
|
),
|
||||||
"FuyuForCausalLM": ("fuyu", "FuyuForCausalLM"),
|
"FuyuForCausalLM": ("fuyu", "FuyuForCausalLM"),
|
||||||
"Gemma3ForConditionalGeneration": ("gemma3_mm", "Gemma3ForConditionalGeneration"), # noqa: E501
|
"Gemma3ForConditionalGeneration": ("gemma3_mm", "Gemma3ForConditionalGeneration"), # noqa: E501
|
||||||
"Gemma3nForConditionalGeneration": (
|
"Gemma3nForConditionalGeneration": (
|
||||||
"gemma3n_mm",
|
"gemma3n_mm",
|
||||||
"Gemma3nForConditionalGeneration",
|
"Gemma3nForConditionalGeneration",
|
||||||
), # noqa: E501
|
),
|
||||||
"GLM4VForCausalLM": ("glm4v", "GLM4VForCausalLM"),
|
"GLM4VForCausalLM": ("glm4v", "GLM4VForCausalLM"),
|
||||||
"Glm4vForConditionalGeneration": ("glm4_1v", "Glm4vForConditionalGeneration"), # noqa: E501
|
"Glm4vForConditionalGeneration": ("glm4_1v", "Glm4vForConditionalGeneration"), # noqa: E501
|
||||||
"Glm4vMoeForConditionalGeneration": ("glm4_1v", "Glm4vMoeForConditionalGeneration"), # noqa: E501
|
"Glm4vMoeForConditionalGeneration": ("glm4_1v", "Glm4vMoeForConditionalGeneration"), # noqa: E501
|
||||||
"GraniteSpeechForConditionalGeneration": (
|
"GraniteSpeechForConditionalGeneration": (
|
||||||
"granite_speech",
|
"granite_speech",
|
||||||
"GraniteSpeechForConditionalGeneration",
|
"GraniteSpeechForConditionalGeneration",
|
||||||
), # noqa: E501
|
),
|
||||||
"H2OVLChatModel": ("h2ovl", "H2OVLChatModel"),
|
"H2OVLChatModel": ("h2ovl", "H2OVLChatModel"),
|
||||||
"InternVLChatModel": ("internvl", "InternVLChatModel"),
|
"InternVLChatModel": ("internvl", "InternVLChatModel"),
|
||||||
"NemotronH_Nano_VL_V2": ("nano_nemotron_vl", "NemotronH_Nano_VL_V2"),
|
"NemotronH_Nano_VL_V2": ("nano_nemotron_vl", "NemotronH_Nano_VL_V2"),
|
||||||
"InternS1ForConditionalGeneration": (
|
"InternS1ForConditionalGeneration": (
|
||||||
"interns1",
|
"interns1",
|
||||||
"InternS1ForConditionalGeneration",
|
"InternS1ForConditionalGeneration",
|
||||||
), # noqa: E501
|
),
|
||||||
"InternVLForConditionalGeneration": (
|
"InternVLForConditionalGeneration": (
|
||||||
"interns1",
|
"interns1",
|
||||||
"InternS1ForConditionalGeneration",
|
"InternS1ForConditionalGeneration",
|
||||||
), # noqa: E501
|
),
|
||||||
"Idefics3ForConditionalGeneration": (
|
"Idefics3ForConditionalGeneration": (
|
||||||
"idefics3",
|
"idefics3",
|
||||||
"Idefics3ForConditionalGeneration",
|
"Idefics3ForConditionalGeneration",
|
||||||
@ -289,7 +289,7 @@ _MULTIMODAL_MODELS = {
|
|||||||
"KeyeVL1_5ForConditionalGeneration": (
|
"KeyeVL1_5ForConditionalGeneration": (
|
||||||
"keye_vl1_5",
|
"keye_vl1_5",
|
||||||
"KeyeVL1_5ForConditionalGeneration",
|
"KeyeVL1_5ForConditionalGeneration",
|
||||||
), # noqa: E501
|
),
|
||||||
"RForConditionalGeneration": ("rvl", "RForConditionalGeneration"),
|
"RForConditionalGeneration": ("rvl", "RForConditionalGeneration"),
|
||||||
"KimiVLForConditionalGeneration": ("kimi_vl", "KimiVLForConditionalGeneration"), # noqa: E501
|
"KimiVLForConditionalGeneration": ("kimi_vl", "KimiVLForConditionalGeneration"), # noqa: E501
|
||||||
"Llama_Nemotron_Nano_VL": ("nemotron_vl", "LlamaNemotronVLChatModel"),
|
"Llama_Nemotron_Nano_VL": ("nemotron_vl", "LlamaNemotronVLChatModel"),
|
||||||
@ -298,27 +298,27 @@ _MULTIMODAL_MODELS = {
|
|||||||
"LlavaNextForConditionalGeneration": (
|
"LlavaNextForConditionalGeneration": (
|
||||||
"llava_next",
|
"llava_next",
|
||||||
"LlavaNextForConditionalGeneration",
|
"LlavaNextForConditionalGeneration",
|
||||||
), # noqa: E501
|
),
|
||||||
"LlavaNextVideoForConditionalGeneration": (
|
"LlavaNextVideoForConditionalGeneration": (
|
||||||
"llava_next_video",
|
"llava_next_video",
|
||||||
"LlavaNextVideoForConditionalGeneration",
|
"LlavaNextVideoForConditionalGeneration",
|
||||||
), # noqa: E501
|
),
|
||||||
"LlavaOnevisionForConditionalGeneration": (
|
"LlavaOnevisionForConditionalGeneration": (
|
||||||
"llava_onevision",
|
"llava_onevision",
|
||||||
"LlavaOnevisionForConditionalGeneration",
|
"LlavaOnevisionForConditionalGeneration",
|
||||||
), # noqa: E501
|
),
|
||||||
"MantisForConditionalGeneration": ("llava", "MantisForConditionalGeneration"), # noqa: E501
|
"MantisForConditionalGeneration": ("llava", "MantisForConditionalGeneration"), # noqa: E501
|
||||||
"MiDashengLMModel": ("midashenglm", "MiDashengLMModel"),
|
"MiDashengLMModel": ("midashenglm", "MiDashengLMModel"),
|
||||||
"MiniMaxVL01ForConditionalGeneration": (
|
"MiniMaxVL01ForConditionalGeneration": (
|
||||||
"minimax_vl_01",
|
"minimax_vl_01",
|
||||||
"MiniMaxVL01ForConditionalGeneration",
|
"MiniMaxVL01ForConditionalGeneration",
|
||||||
), # noqa: E501
|
),
|
||||||
"MiniCPMO": ("minicpmo", "MiniCPMO"),
|
"MiniCPMO": ("minicpmo", "MiniCPMO"),
|
||||||
"MiniCPMV": ("minicpmv", "MiniCPMV"),
|
"MiniCPMV": ("minicpmv", "MiniCPMV"),
|
||||||
"Mistral3ForConditionalGeneration": (
|
"Mistral3ForConditionalGeneration": (
|
||||||
"mistral3",
|
"mistral3",
|
||||||
"Mistral3ForConditionalGeneration",
|
"Mistral3ForConditionalGeneration",
|
||||||
), # noqa: E501
|
),
|
||||||
"MolmoForCausalLM": ("molmo", "MolmoForCausalLM"),
|
"MolmoForCausalLM": ("molmo", "MolmoForCausalLM"),
|
||||||
"NVLM_D": ("nvlm_d", "NVLM_D_Model"),
|
"NVLM_D": ("nvlm_d", "NVLM_D_Model"),
|
||||||
"Ovis": ("ovis", "Ovis"),
|
"Ovis": ("ovis", "Ovis"),
|
||||||
@ -326,7 +326,7 @@ _MULTIMODAL_MODELS = {
|
|||||||
"PaliGemmaForConditionalGeneration": (
|
"PaliGemmaForConditionalGeneration": (
|
||||||
"paligemma",
|
"paligemma",
|
||||||
"PaliGemmaForConditionalGeneration",
|
"PaliGemmaForConditionalGeneration",
|
||||||
), # noqa: E501
|
),
|
||||||
"Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"),
|
"Phi3VForCausalLM": ("phi3v", "Phi3VForCausalLM"),
|
||||||
"Phi4MMForCausalLM": ("phi4mm", "Phi4MMForCausalLM"),
|
"Phi4MMForCausalLM": ("phi4mm", "Phi4MMForCausalLM"),
|
||||||
"Phi4MultimodalForCausalLM": ("phi4_multimodal", "Phi4MultimodalForCausalLM"), # noqa: E501
|
"Phi4MultimodalForCausalLM": ("phi4_multimodal", "Phi4MultimodalForCausalLM"), # noqa: E501
|
||||||
@ -336,31 +336,31 @@ _MULTIMODAL_MODELS = {
|
|||||||
"Qwen2_5_VLForConditionalGeneration": (
|
"Qwen2_5_VLForConditionalGeneration": (
|
||||||
"qwen2_5_vl",
|
"qwen2_5_vl",
|
||||||
"Qwen2_5_VLForConditionalGeneration",
|
"Qwen2_5_VLForConditionalGeneration",
|
||||||
), # noqa: E501
|
),
|
||||||
"Qwen2AudioForConditionalGeneration": (
|
"Qwen2AudioForConditionalGeneration": (
|
||||||
"qwen2_audio",
|
"qwen2_audio",
|
||||||
"Qwen2AudioForConditionalGeneration",
|
"Qwen2AudioForConditionalGeneration",
|
||||||
), # noqa: E501
|
),
|
||||||
"Qwen2_5OmniModel": (
|
"Qwen2_5OmniModel": (
|
||||||
"qwen2_5_omni_thinker",
|
"qwen2_5_omni_thinker",
|
||||||
"Qwen2_5OmniThinkerForConditionalGeneration",
|
"Qwen2_5OmniThinkerForConditionalGeneration",
|
||||||
), # noqa: E501
|
),
|
||||||
"Qwen2_5OmniForConditionalGeneration": (
|
"Qwen2_5OmniForConditionalGeneration": (
|
||||||
"qwen2_5_omni_thinker",
|
"qwen2_5_omni_thinker",
|
||||||
"Qwen2_5OmniThinkerForConditionalGeneration",
|
"Qwen2_5OmniThinkerForConditionalGeneration",
|
||||||
), # noqa: E501
|
),
|
||||||
"Qwen3VLForConditionalGeneration": ("qwen3_vl", "Qwen3VLForConditionalGeneration"), # noqa: E501
|
"Qwen3VLForConditionalGeneration": ("qwen3_vl", "Qwen3VLForConditionalGeneration"), # noqa: E501
|
||||||
"Qwen3VLMoeForConditionalGeneration": (
|
"Qwen3VLMoeForConditionalGeneration": (
|
||||||
"qwen3_vl_moe",
|
"qwen3_vl_moe",
|
||||||
"Qwen3VLMoeForConditionalGeneration",
|
"Qwen3VLMoeForConditionalGeneration",
|
||||||
), # noqa: E501
|
),
|
||||||
"SkyworkR1VChatModel": ("skyworkr1v", "SkyworkR1VChatModel"),
|
"SkyworkR1VChatModel": ("skyworkr1v", "SkyworkR1VChatModel"),
|
||||||
"Step3VLForConditionalGeneration": ("step3_vl", "Step3VLForConditionalGeneration"), # noqa: E501
|
"Step3VLForConditionalGeneration": ("step3_vl", "Step3VLForConditionalGeneration"), # noqa: E501
|
||||||
"TarsierForConditionalGeneration": ("tarsier", "TarsierForConditionalGeneration"), # noqa: E501
|
"TarsierForConditionalGeneration": ("tarsier", "TarsierForConditionalGeneration"), # noqa: E501
|
||||||
"Tarsier2ForConditionalGeneration": (
|
"Tarsier2ForConditionalGeneration": (
|
||||||
"qwen2_vl",
|
"qwen2_vl",
|
||||||
"Tarsier2ForConditionalGeneration",
|
"Tarsier2ForConditionalGeneration",
|
||||||
), # noqa: E501
|
),
|
||||||
"UltravoxModel": ("ultravox", "UltravoxModel"),
|
"UltravoxModel": ("ultravox", "UltravoxModel"),
|
||||||
"VoxtralForConditionalGeneration": ("voxtral", "VoxtralForConditionalGeneration"), # noqa: E501
|
"VoxtralForConditionalGeneration": ("voxtral", "VoxtralForConditionalGeneration"), # noqa: E501
|
||||||
# [Encoder-decoder]
|
# [Encoder-decoder]
|
||||||
@ -401,23 +401,23 @@ _TRANSFORMERS_BACKEND_MODELS = {
|
|||||||
"TransformersMoEForMultimodalLM": (
|
"TransformersMoEForMultimodalLM": (
|
||||||
"transformers_moe",
|
"transformers_moe",
|
||||||
"TransformersMoEForMultimodalLM",
|
"TransformersMoEForMultimodalLM",
|
||||||
), # noqa: E501
|
),
|
||||||
"TransformersEmbeddingModel": (
|
"TransformersEmbeddingModel": (
|
||||||
"transformers_pooling",
|
"transformers_pooling",
|
||||||
"TransformersEmbeddingModel",
|
"TransformersEmbeddingModel",
|
||||||
), # noqa: E501
|
),
|
||||||
"TransformersForSequenceClassification": (
|
"TransformersForSequenceClassification": (
|
||||||
"transformers_pooling",
|
"transformers_pooling",
|
||||||
"TransformersForSequenceClassification",
|
"TransformersForSequenceClassification",
|
||||||
), # noqa: E501
|
),
|
||||||
"TransformersMoEForSequenceClassification": (
|
"TransformersMoEForSequenceClassification": (
|
||||||
"transformers_pooling",
|
"transformers_pooling",
|
||||||
"TransformersMoEForSequenceClassification",
|
"TransformersMoEForSequenceClassification",
|
||||||
), # noqa: E501
|
),
|
||||||
"TransformersMoEEmbeddingModel": (
|
"TransformersMoEEmbeddingModel": (
|
||||||
"transformers_pooling",
|
"transformers_pooling",
|
||||||
"TransformersMoEEmbeddingModel",
|
"TransformersMoEEmbeddingModel",
|
||||||
), # noqa: E501
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
_VLLM_MODELS = {
|
_VLLM_MODELS = {
|
||||||
|
|||||||
@ -79,7 +79,7 @@ class GDNAttentionMetadataBuilder(AttentionMetadataBuilder[GDNAttentionMetadata]
|
|||||||
self.speculative_config = vllm_config.speculative_config
|
self.speculative_config = vllm_config.speculative_config
|
||||||
self.kv_cache_spec = kv_cache_spec
|
self.kv_cache_spec = kv_cache_spec
|
||||||
if self.speculative_config:
|
if self.speculative_config:
|
||||||
self.num_spec = self.speculative_config.num_speculative_tokens # noqa: E501
|
self.num_spec = self.speculative_config.num_speculative_tokens
|
||||||
else:
|
else:
|
||||||
self.num_spec = 0
|
self.num_spec = 0
|
||||||
self.use_spec_decode = self.num_spec > 0
|
self.use_spec_decode = self.num_spec > 0
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user