diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index f13ce935ec4b6..b31e4931f2295 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -75,7 +75,6 @@ from vllm.platforms import CpuArchEnum, current_platform from vllm.plugins import load_general_plugins from vllm.ray.lazy_utils import is_in_ray_actor, is_ray_initialized from vllm.reasoning import ReasoningParserManager -from vllm.test_utils import MODEL_WEIGHTS_S3_BUCKET, MODELS_ON_S3 from vllm.transformers_utils.config import ( get_model_path, is_interleaved, @@ -1126,15 +1125,6 @@ class EngineArgs: if check_gguf_file(self.model): self.quantization = self.load_format = "gguf" - # NOTE: This is to allow model loading from S3 in CI - if ( - not isinstance(self, AsyncEngineArgs) - and envs.VLLM_CI_USE_S3 - and self.model in MODELS_ON_S3 - and self.load_format == "auto" - ): - self.model = f"{MODEL_WEIGHTS_S3_BUCKET}/{self.model}" - if self.disable_mm_preprocessor_cache: logger.warning( "`--disable-mm-preprocessor-cache` is deprecated " diff --git a/vllm/test_utils.py b/vllm/test_utils.py deleted file mode 100644 index 91dcc2fd84e17..0000000000000 --- a/vllm/test_utils.py +++ /dev/null @@ -1,129 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -MODELS_ON_S3 = [ - "adept/fuyu-8b", - "ai21labs/AI21-Jamba-1.5-Mini", - "ai21labs/Jamba-tiny-random", - "ai21labs/Jamba-tiny-reward-dev", - "allenai/Molmo-7B-D-0924", - "allenai/OLMo-1B-hf", - "allenai/OLMoE-1B-7B-0924-Instruct", - "amd/Llama-3.1-8B-Instruct-FP8-KV-Quark-test", - "AMead10/Llama-3.2-1B-Instruct-AWQ", - "hmellor/Ilama-3.2-1B", - "BAAI/bge-base-en-v1.5", - "BAAI/bge-multilingual-gemma2", - "BAAI/bge-reranker-v2-m3", - "bigcode/starcoder2-3b", - "cross-encoder/ms-marco-MiniLM-L-6-v2", - "cross-encoder/quora-roberta-base", - "deepseek-ai/deepseek-vl2-tiny", - "distilbert/distilgpt2", - "facebook/bart-base", - "facebook/bart-large-cnn", - # "fixie-ai/ultravox-v0_5-llama-3_2-1b", - "google/gemma-1.1-2b-it", - "google/gemma-2-2b-it", - "google/paligemma-3b-pt-224", - "h2oai/h2ovl-mississippi-800m", - "HuggingFaceM4/Idefics3-8B-Llama3", - "internlm/internlm2-1_8b-reward", - "intfloat/e5-mistral-7b-instruct", - "intfloat/multilingual-e5-small", - "jason9693/Qwen2.5-1.5B-apeach", - "llava-hf/llava-1.5-7b-hf", - "llava-hf/llava-onevision-qwen2-0.5b-ov-hf", - "llava-hf/llava-v1.6-mistral-7b-hf", - "llava-hf/LLaVA-NeXT-Video-7B-hf", - # "meta-llama/Llama-2-7b-hf", - "meta-llama/Llama-3.2-1B", - "meta-llama/Llama-3.2-1B-Instruct", - "meta-llama/Meta-Llama-3-8B", - "microsoft/phi-2", - "microsoft/Phi-3-mini-4k-instruct", - "microsoft/Phi-3-small-8k-instruct", - "microsoft/Phi-3-vision-128k-instruct", - "microsoft/Phi-3.5-MoE-instruct", - "microsoft/Phi-3.5-vision-instruct", - # "mistralai/Mistral-7B-Instruct-v0.1", - "mistralai/Mixtral-8x7B-Instruct-v0.1", - "mistralai/Pixtral-12B-2409", - "mistral-community/Mixtral-8x22B-v0.1-AWQ", - "ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head", - "ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symFalse", - "ModelCloud/Qwen1.5-1.8B-Chat-GPTQ-4bits-dynamic-cfg-with-lm_head-symTrue", - "ModelCloud/TinyLlama-1.1B-Chat-v1.0-GPTQ-4bit-10-25-2024", - "neuralmagic/Llama-3.2-1B-quantized.w8a8", - "neuralmagic/Meta-Llama-3-8B-Instruct-FP8", - "neuralmagic/Meta-Llama-3-8B-Instruct-FP8-KV", - "nm-testing/asym-w8w8-int8-static-per-tensor-tiny-llama", - "nm-testing/llama2.c-stories42M-pruned2.4-compressed", - "nm-testing/llama7b-one-shot-2_4-w4a16-marlin24-t", - "nm-testing/Meta-Llama-3-8B-FP8-compressed-tensors-test", - "nm-testing/Meta-Llama-3-8B-Instruct-FP8-Dynamic-2of4-testing", - "nm-testing/Meta-Llama-3-8B-Instruct-FP8-Dynamic-IA-Per-Tensor-Weight-testing", - "nm-testing/Meta-Llama-3-8B-Instruct-FP8-Static-Per-Tensor-testing", - "nm-testing/Meta-Llama-3-8B-Instruct-FP8-Static-testing", - "nm-testing/Meta-Llama-3-8B-Instruct-W8A8-Dynamic-Asym", - "nm-testing/Meta-Llama-3-8B-Instruct-W8A8-Static-Per-Tensor-Asym", - "nm-testing/Meta-Llama-3-8B-Instruct-W8A8-Static-Per-Tensor-Sym", - "nm-testing/Phi-3-mini-128k-instruct-FP8", - "nm-testing/Qwen2-0.5B-Instruct-FP8-SkipQKV", - "nm-testing/Qwen2-1.5B-Instruct-FP8-K-V", - "nm-testing/tinyllama-oneshot-w4a16-channel-v2", - "nm-testing/tinyllama-oneshot-w4a16-group128-v2", - "nm-testing/tinyllama-oneshot-w8-channel-a8-tensor", - "nm-testing/tinyllama-oneshot-w8a16-per-channel", - "nm-testing/tinyllama-oneshot-w8a8-channel-dynamic-token-v2", - "nm-testing/tinyllama-oneshot-w8a8-channel-dynamic-token-v2-asym", - "nm-testing/tinyllama-oneshot-w8a8-dynamic-token-v2", - "nm-testing/tinyllama-oneshot-w8a8-dynamic-token-v2-asym", - "nm-testing/tinyllama-oneshot-w8w8-test-static-shape-change", - "nm-testing/TinyLlama-1.1B-Chat-v1.0-2of4-Sparse-Dense-Compressor", - "nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-chnl_wts_per_tok_dyn_act_fp8-BitM", - "nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-chnl_wts_per_tok_dyn_act_int8-BitM", - "nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-chnl_wts_tensor_act_fp8-BitM", - "nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-chnl_wts_tensor_act_int8-BitM", - "nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-tensor_wts_per_tok_dyn_act_fp8-BitM", - "nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-tensor_wts_per_tok_dyn_act_int8-BitM", - "nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-tensor_wts_tensor_act_fp8-BitM", - "nm-testing/TinyLlama-1.1B-Chat-v1.0-gsm8k-pruned.2of4-tensor_wts_tensor_act_int8-BitM", - "nm-testing/TinyLlama-1.1B-Chat-v1.0-INT8-Dynamic-IA-Per-Channel-Weight-testing", - "nm-testing/TinyLlama-1.1B-Chat-v1.0-INT8-Dynamic-IA-Per-Tensor-Weight-testing", - "nm-testing/TinyLlama-1.1B-Chat-v1.0-INT8-Static-testing", - "nm-testing/TinyLlama-1.1B-compressed-tensors-kv-cache-scheme", - "nvidia/NVLM-D-72B", - "openai-community/gpt2", - # "openai/whisper-large-v3", - "openbmb/MiniCPM-o-2_6", - "openbmb/MiniCPM-V-2_6", - "OpenGVLab/InternVL2-1B", - "parasail-ai/GritLM-7B-vllm", - "Qwen/Qwen1.5-MoE-A2.7B-Chat", - "Qwen/Qwen2-7B-Instruct", - "Qwen/Qwen2-Audio-7B-Instruct", - "Qwen/Qwen2-VL-2B-Instruct", - "Qwen/Qwen2.5-1.5B-Instruct", - "Qwen/Qwen2.5-Math-PRM-7B", - "Qwen/Qwen2.5-Math-RM-72B", - "Qwen/Qwen2.5-VL-3B-Instruct", - "royokong/e5-v", - "sentence-transformers/all-roberta-large-v1", - "sentence-transformers/stsb-roberta-base-v2", - "allenai/OLMo-2-0425-1B", - "shuyuej/Llama-3.2-1B-Instruct-GPTQ", - "ssmits/Qwen2-7B-Instruct-embed-base", - "stabilityai/stablelm-3b-4e1t", - "stabilityai/stablelm-zephyr-3b", - "state-spaces/mamba-130m-hf", - "TheBloke/TinyLlama-1.1B-Chat-v1.0-GPTQ", - "zai-org/glm-4v-9b", - "TIGER-Lab/Mantis-8B-siglip-llama3", - "TIGER-Lab/VLM2Vec-Full", - "tiiuae/falcon-40b", - "tiiuae/falcon-mamba-7b-instruct", - "TinyLlama/TinyLlama-1.1B-Chat-v1.0", - "upstage/solar-pro-preview-instruct", -] - -MODEL_WEIGHTS_S3_BUCKET = "s3://vllm-ci-model-weights"