From be2e1632fdc58596a13018b3348696ef5d56fbd1 Mon Sep 17 00:00:00 2001 From: Sage Moore Date: Thu, 3 Jul 2025 13:01:01 +0000 Subject: [PATCH] delete basic-ub.py Signed-off-by: Sage Moore --- examples/basic-ub.py | 64 -------------------------------------------- 1 file changed, 64 deletions(-) delete mode 100644 examples/basic-ub.py diff --git a/examples/basic-ub.py b/examples/basic-ub.py deleted file mode 100644 index 3f6fd2fdb82ea..0000000000000 --- a/examples/basic-ub.py +++ /dev/null @@ -1,64 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 - -import logging -import os - -from vllm import LLM, SamplingParams - -# Sample prompts. -prompts = [ - "Hello, my name is", - "The president of the United States is", - "The capital of France is", - "The future of AI is", -] -# Configure logging level for vllm (optional, uses VLLM_LOGGING_LEVEL env var). -logging_level = os.getenv("VLLM_LOGGING_LEVEL", "").upper() -if logging_level: - logging.basicConfig(level=getattr(logging, logging_level, logging.INFO)) - -# Create a sampling params object, optionally limiting output tokens via MAX_TOKENS env var. -param_kwargs = {"temperature": 0.8, "top_p": 0.95} -max_tokens_env = os.getenv("MAX_TOKENS") -if max_tokens_env is not None: - try: - param_kwargs["max_tokens"] = int(max_tokens_env) - except ValueError: - raise ValueError(f"Invalid MAX_TOKENS value: {max_tokens_env}") -sampling_params = SamplingParams(**param_kwargs) - - -def main(): - # Create an LLM. - model = "deepseek-ai/DeepSeek-V2-Lite" - # model = "facebook/opt-125m" - llm = LLM(model=model, - enforce_eager=True, - compilation_config=2, - ############### - trust_remote_code=True, - max_model_len=1024, - #load_format="dummy", - ############### - #tensor_parallel_size=1, - data_parallel_size=2, - enable_expert_parallel=True, - ############### - #enable_microbatching=True, - ) - # Generate texts from the prompts. - # The output is a list of RequestOutput objects - # that contain the prompt, generated text, and other information. - outputs = llm.generate(prompts, sampling_params) - # Print the outputs. - print("\nGenerated Outputs:\n" + "-" * 60) - for output in outputs: - prompt = output.prompt - generated_text = output.outputs[0].text - print(f"Prompt: {prompt!r}") - print(f"Output: {generated_text!r}") - print("-" * 60) - - -if __name__ == "__main__": - main()