mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-01-26 12:44:30 +08:00
Add TP CLI argument to multimodal inference examples (#29301)
Signed-off-by: Lin, Fanli <fanli.lin@intel.com>
This commit is contained in:
parent
316c8492bf
commit
a21256c463
15
examples/offline_inference/audio_language.py
Normal file → Executable file
15
examples/offline_inference/audio_language.py
Normal file → Executable file
@ -425,6 +425,13 @@ def parse_args():
|
||||
default=None,
|
||||
help="Set the seed when initializing `vllm.LLM`.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tensor-parallel-size",
|
||||
"-tp",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Tensor parallel size to override the model's default setting. ",
|
||||
)
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
@ -434,6 +441,12 @@ def main(args):
|
||||
if model not in model_example_map:
|
||||
raise ValueError(f"Model type {model} is not supported.")
|
||||
|
||||
if args.tensor_parallel_size is not None and args.tensor_parallel_size < 1:
|
||||
raise ValueError(
|
||||
f"tensor_parallel_size must be a positive integer, "
|
||||
f"got {args.tensor_parallel_size}"
|
||||
)
|
||||
|
||||
audio_count = args.num_audios
|
||||
req_data = model_example_map[model](
|
||||
question_per_audio_count[audio_count], audio_count
|
||||
@ -446,6 +459,8 @@ def main(args):
|
||||
)
|
||||
|
||||
engine_args = asdict(req_data.engine_args) | {"seed": args.seed}
|
||||
if args.tensor_parallel_size is not None:
|
||||
engine_args["tensor_parallel_size"] = args.tensor_parallel_size
|
||||
llm = LLM(**engine_args)
|
||||
|
||||
# We set temperature to 0.2 so that outputs can be different
|
||||
|
||||
15
examples/offline_inference/vision_language.py
Normal file → Executable file
15
examples/offline_inference/vision_language.py
Normal file → Executable file
@ -2064,6 +2064,13 @@ def parse_args():
|
||||
help="If True, will send all requests in a second batch with empty mm "
|
||||
"data to verify cache hits with UUIDs.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tensor-parallel-size",
|
||||
"-tp",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Tensor parallel size to override the model's default setting. ",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
@ -2072,6 +2079,12 @@ def main(args):
|
||||
if model not in model_example_map:
|
||||
raise ValueError(f"Model type {model} is not supported.")
|
||||
|
||||
if args.tensor_parallel_size is not None and args.tensor_parallel_size < 1:
|
||||
raise ValueError(
|
||||
f"tensor_parallel_size must be a positive integer, "
|
||||
f"got {args.tensor_parallel_size}"
|
||||
)
|
||||
|
||||
modality = args.modality
|
||||
mm_input = get_multi_modal_input(args)
|
||||
data = mm_input["data"]
|
||||
@ -2089,6 +2102,8 @@ def main(args):
|
||||
"seed": args.seed,
|
||||
"mm_processor_cache_gb": 0 if args.disable_mm_processor_cache else 4,
|
||||
}
|
||||
if args.tensor_parallel_size is not None:
|
||||
engine_args["tensor_parallel_size"] = args.tensor_parallel_size
|
||||
llm = LLM(**engine_args)
|
||||
|
||||
# Don't want to check the flag multiple times, so just hijack `prompts`.
|
||||
|
||||
40
examples/offline_inference/vision_language_multi_image.py
Normal file → Executable file
40
examples/offline_inference/vision_language_multi_image.py
Normal file → Executable file
@ -1352,10 +1352,18 @@ model_example_map = {
|
||||
}
|
||||
|
||||
|
||||
def run_generate(model, question: str, image_urls: list[str], seed: int | None):
|
||||
def run_generate(
|
||||
model,
|
||||
question: str,
|
||||
image_urls: list[str],
|
||||
seed: int | None,
|
||||
tensor_parallel_size: int | None,
|
||||
):
|
||||
req_data = model_example_map[model](question, image_urls)
|
||||
|
||||
engine_args = asdict(req_data.engine_args) | {"seed": args.seed}
|
||||
engine_args = asdict(req_data.engine_args) | {"seed": seed}
|
||||
if tensor_parallel_size is not None:
|
||||
engine_args["tensor_parallel_size"] = tensor_parallel_size
|
||||
llm = LLM(**engine_args)
|
||||
|
||||
sampling_params = SamplingParams(
|
||||
@ -1378,7 +1386,13 @@ def run_generate(model, question: str, image_urls: list[str], seed: int | None):
|
||||
print("-" * 50)
|
||||
|
||||
|
||||
def run_chat(model: str, question: str, image_urls: list[str], seed: int | None):
|
||||
def run_chat(
|
||||
model: str,
|
||||
question: str,
|
||||
image_urls: list[str],
|
||||
seed: int | None,
|
||||
tensor_parallel_size: int | None,
|
||||
):
|
||||
req_data = model_example_map[model](question, image_urls)
|
||||
|
||||
# Disable other modalities to save memory
|
||||
@ -1388,6 +1402,8 @@ def run_chat(model: str, question: str, image_urls: list[str], seed: int | None)
|
||||
)
|
||||
|
||||
engine_args = asdict(req_data.engine_args) | {"seed": seed}
|
||||
if tensor_parallel_size is not None:
|
||||
engine_args["tensor_parallel_size"] = tensor_parallel_size
|
||||
llm = LLM(**engine_args)
|
||||
|
||||
sampling_params = (
|
||||
@ -1463,6 +1479,13 @@ def parse_args():
|
||||
default=2,
|
||||
help="Number of images to use for the demo.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tensor-parallel-size",
|
||||
"-tp",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Tensor parallel size to override the model's default setting. ",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
@ -1470,13 +1493,20 @@ def main(args: Namespace):
|
||||
model = args.model_type
|
||||
method = args.method
|
||||
seed = args.seed
|
||||
tensor_parallel_size = args.tensor_parallel_size
|
||||
|
||||
if tensor_parallel_size is not None and tensor_parallel_size < 1:
|
||||
raise ValueError(
|
||||
f"tensor_parallel_size must be a positive integer, "
|
||||
f"got {tensor_parallel_size}"
|
||||
)
|
||||
|
||||
image_urls = IMAGE_URLS[: args.num_images]
|
||||
|
||||
if method == "generate":
|
||||
run_generate(model, QUESTION, image_urls, seed)
|
||||
run_generate(model, QUESTION, image_urls, seed, tensor_parallel_size)
|
||||
elif method == "chat":
|
||||
run_chat(model, QUESTION, image_urls, seed)
|
||||
run_chat(model, QUESTION, image_urls, seed, tensor_parallel_size)
|
||||
else:
|
||||
raise ValueError(f"Invalid method: {method}")
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user