diff --git a/examples/offline_inference/mistral-small.py b/examples/offline_inference/mistral-small.py
index bf8fb7dc521c..efa1aa5b0369 100644
--- a/examples/offline_inference/mistral-small.py
+++ b/examples/offline_inference/mistral-small.py
@@ -90,8 +90,9 @@ def run_simple_demo(args: argparse.Namespace):
         },
     ]
     outputs = llm.chat(messages, sampling_params=sampling_params)
-
+    print("-" * 50)
     print(outputs[0].outputs[0].text)
+    print("-" * 50)
 
 
 def run_advanced_demo(args: argparse.Namespace):
@@ -162,7 +163,9 @@ def run_advanced_demo(args: argparse.Namespace):
     ]
 
     outputs = llm.chat(messages=messages, sampling_params=sampling_params)
+    print("-" * 50)
     print(outputs[0].outputs[0].text)
+    print("-" * 50)
 
 
 def main():
diff --git a/examples/offline_inference/multilora_inference.py b/examples/offline_inference/multilora_inference.py
index 4b0d115e6609..de409740292a 100644
--- a/examples/offline_inference/multilora_inference.py
+++ b/examples/offline_inference/multilora_inference.py
@@ -61,6 +61,7 @@ def process_requests(engine: LLMEngine,
     """Continuously process a list of prompts and handle the outputs."""
     request_id = 0
 
+    print("-" * 50)
     while test_prompts or engine.has_unfinished_requests():
         if test_prompts:
             prompt, sampling_params, lora_request = test_prompts.pop(0)
@@ -75,6 +76,7 @@ def process_requests(engine: LLMEngine,
         for request_output in request_outputs:
             if request_output.finished:
                 print(request_output)
+                print("-" * 50)
 
 
 def initialize_engine() -> LLMEngine:
diff --git a/examples/offline_inference/neuron.py b/examples/offline_inference/neuron.py
index 517d1bfce95d..5906c7b2c6b3 100644
--- a/examples/offline_inference/neuron.py
+++ b/examples/offline_inference/neuron.py
@@ -12,27 +12,36 @@ prompts = [
 # Create a sampling params object.
 sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
 
-# Create an LLM.
-llm = LLM(
-    model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-    max_num_seqs=8,
-    # The max_model_len and block_size arguments are required to be same as
-    # max sequence length when targeting neuron device.
-    # Currently, this is a known limitation in continuous batching support
-    # in transformers-neuronx.
-    # TODO(liangfu): Support paged-attention in transformers-neuronx.
-    max_model_len=1024,
-    block_size=1024,
-    # The device can be automatically detected when AWS Neuron SDK is installed.
-    # The device argument can be either unspecified for automated detection,
-    # or explicitly assigned.
-    device="neuron",
-    tensor_parallel_size=2)
-# Generate texts from the prompts. The output is a list of RequestOutput objects
-# that contain the prompt, generated text, and other information.
-outputs = llm.generate(prompts, sampling_params)
-# Print the outputs.
-for output in outputs:
-    prompt = output.prompt
-    generated_text = output.outputs[0].text
-    print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+
+def main():
+    # Create an LLM.
+    llm = LLM(
+        model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+        max_num_seqs=8,
+        # The max_model_len and block_size arguments are required to be same as
+        # max sequence length when targeting neuron device.
+        # Currently, this is a known limitation in continuous batching support
+        # in transformers-neuronx.
+        # TODO(liangfu): Support paged-attention in transformers-neuronx.
+        max_model_len=1024,
+        block_size=1024,
+        # ruff: noqa: E501
+        # The device can be automatically detected when AWS Neuron SDK is installed.
+        # The device argument can be either unspecified for automated detection,
+        # or explicitly assigned.
+        device="neuron",
+        tensor_parallel_size=2)
+    # Generate texts from the prompts. The output is a list of RequestOutput objects
+    # that contain the prompt, generated text, and other information.
+    outputs = llm.generate(prompts, sampling_params)
+    # Print the outputs.
+    print("-" * 50)
+    for output in outputs:
+        prompt = output.prompt
+        generated_text = output.outputs[0].text
+        print(f"Prompt: {prompt!r}\nGenerated text: {generated_text!r}")
+        print("-" * 50)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/offline_inference/neuron_int8_quantization.py b/examples/offline_inference/neuron_int8_quantization.py
index c899a01a0bb9..af21274a3a5b 100644
--- a/examples/offline_inference/neuron_int8_quantization.py
+++ b/examples/offline_inference/neuron_int8_quantization.py
@@ -22,31 +22,40 @@ prompts = [
 # Create a sampling params object.
 sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
 
-# Create an LLM.
-llm = LLM(
-    model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
-    max_num_seqs=8,
-    # The max_model_len and block_size arguments are required to be same as
-    # max sequence length when targeting neuron device.
-    # Currently, this is a known limitation in continuous batching support
-    # in transformers-neuronx.
-    # TODO(liangfu): Support paged-attention in transformers-neuronx.
-    max_model_len=2048,
-    block_size=2048,
-    # The device can be automatically detected when AWS Neuron SDK is installed.
-    # The device argument can be either unspecified for automated detection,
-    # or explicitly assigned.
-    device="neuron",
-    quantization="neuron_quant",
-    override_neuron_config={
-        "cast_logits_dtype": "bfloat16",
-    },
-    tensor_parallel_size=2)
-# Generate texts from the prompts. The output is a list of RequestOutput objects
-# that contain the prompt, generated text, and other information.
-outputs = llm.generate(prompts, sampling_params)
-# Print the outputs.
-for output in outputs:
-    prompt = output.prompt
-    generated_text = output.outputs[0].text
-    print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+
+def main():
+    # Create an LLM.
+    llm = LLM(
+        model="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
+        max_num_seqs=8,
+        # The max_model_len and block_size arguments are required to be same as
+        # max sequence length when targeting neuron device.
+        # Currently, this is a known limitation in continuous batching support
+        # in transformers-neuronx.
+        # TODO(liangfu): Support paged-attention in transformers-neuronx.
+        max_model_len=2048,
+        block_size=2048,
+        # ruff: noqa: E501
+        # The device can be automatically detected when AWS Neuron SDK is installed.
+        # The device argument can be either unspecified for automated detection,
+        # or explicitly assigned.
+        device="neuron",
+        quantization="neuron_quant",
+        override_neuron_config={
+            "cast_logits_dtype": "bfloat16",
+        },
+        tensor_parallel_size=2)
+    # Generate texts from the prompts. The output is a list of RequestOutput objects
+    # that contain the prompt, generated text, and other information.
+    outputs = llm.generate(prompts, sampling_params)
+    # Print the outputs.
+    print("-" * 50)
+    for output in outputs:
+        prompt = output.prompt
+        generated_text = output.outputs[0].text
+        print(f"Prompt: {prompt!r}\nGenerated text: {generated_text!r}")
+        print("-" * 50)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/offline_inference/prefix_caching.py b/examples/offline_inference/prefix_caching.py
index 4c326c417b4d..f0bec387d3a9 100644
--- a/examples/offline_inference/prefix_caching.py
+++ b/examples/offline_inference/prefix_caching.py
@@ -31,55 +31,62 @@ generating_prompts = [prefix + prompt for prompt in prompts]
 # Create a sampling params object.
 sampling_params = SamplingParams(temperature=0.0)
 
-# Create an LLM without prefix caching as a baseline.
-regular_llm = LLM(model="facebook/opt-125m", gpu_memory_utilization=0.4)
 
-print("Results without `enable_prefix_caching`")
+def main():
+    # Create an LLM without prefix caching as a baseline.
+    regular_llm = LLM(model="facebook/opt-125m", gpu_memory_utilization=0.4)
 
-# Generate texts from the prompts. The output is a list of RequestOutput objects
-# that contain the prompt, generated text, and other information.
-outputs = regular_llm.generate(generating_prompts, sampling_params)
+    print("Results without `enable_prefix_caching`")
 
-regular_generated_texts = []
-# Print the outputs.
-for output in outputs:
-    prompt = output.prompt
-    generated_text = output.outputs[0].text
-    regular_generated_texts.append(generated_text)
-    print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+    # ruff: noqa: E501
+    # Generate texts from the prompts. The output is a list of RequestOutput objects
+    # that contain the prompt, generated text, and other information.
+    outputs = regular_llm.generate(generating_prompts, sampling_params)
 
-print("-" * 80)
+    regular_generated_texts = []
+    # Print the outputs.
+    print("-" * 50)
+    for output in outputs:
+        prompt = output.prompt
+        generated_text = output.outputs[0].text
+        regular_generated_texts.append(generated_text)
+        print(f"Prompt: {prompt!r}\nGenerated text: {generated_text!r}")
+        print("-" * 50)
 
-# Destroy the LLM object and free up the GPU memory.
-del regular_llm
-cleanup_dist_env_and_memory()
+    # Destroy the LLM object and free up the GPU memory.
+    del regular_llm
+    cleanup_dist_env_and_memory()
 
-# Create an LLM with prefix caching enabled.
-prefix_cached_llm = LLM(model="facebook/opt-125m",
-                        enable_prefix_caching=True,
-                        gpu_memory_utilization=0.4)
+    # Create an LLM with prefix caching enabled.
+    prefix_cached_llm = LLM(model="facebook/opt-125m",
+                            enable_prefix_caching=True,
+                            gpu_memory_utilization=0.4)
 
-# Warmup so that the shared prompt's KV cache is computed.
-prefix_cached_llm.generate(generating_prompts[0], sampling_params)
+    # Warmup so that the shared prompt's KV cache is computed.
+    prefix_cached_llm.generate(generating_prompts[0], sampling_params)
 
-# Generate with prefix caching.
-outputs = prefix_cached_llm.generate(generating_prompts, sampling_params)
+    # Generate with prefix caching.
+    outputs = prefix_cached_llm.generate(generating_prompts, sampling_params)
 
-print("Results with `enable_prefix_caching`")
+    print("Results with `enable_prefix_caching`")
 
-cached_generated_texts = []
-# Print the outputs. You should see the same outputs as before.
-for output in outputs:
-    prompt = output.prompt
-    generated_text = output.outputs[0].text
-    cached_generated_texts.append(generated_text)
-    print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+    cached_generated_texts = []
+    # Print the outputs. You should see the same outputs as before.
+    print("-" * 50)
+    for output in outputs:
+        prompt = output.prompt
+        generated_text = output.outputs[0].text
+        cached_generated_texts.append(generated_text)
+        print(f"Prompt: {prompt!r}\nGenerated text: {generated_text!r}")
+        print("-" * 50)
 
-print("-" * 80)
+    # Compare the results and display the speedup
+    generated_same = all([
+        regular_generated_texts[i] == cached_generated_texts[i]
+        for i in range(len(prompts))
+    ])
+    print(f"Generated answers are the same: {generated_same}")
 
-# Compare the results and display the speedup
-generated_same = all([
-    regular_generated_texts[i] == cached_generated_texts[i]
-    for i in range(len(prompts))
-])
-print(f"Generated answers are the same: {generated_same}")
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/offline_inference/reproduciblity.py b/examples/offline_inference/reproduciblity.py
index d0197bf6d5ba..b2be117d1a0a 100644
--- a/examples/offline_inference/reproduciblity.py
+++ b/examples/offline_inference/reproduciblity.py
@@ -19,8 +19,6 @@ SEED = 42
 # because it is almost impossible to make the scheduling deterministic in the
 # online serving setting.
 
-llm = LLM(model="facebook/opt-125m", seed=SEED)
-
 prompts = [
     "Hello, my name is",
     "The president of the United States is",
@@ -29,8 +27,17 @@ prompts = [
 ]
 sampling_params = SamplingParams(temperature=0.8, top_p=0.95)
 
-outputs = llm.generate(prompts, sampling_params)
-for output in outputs:
-    prompt = output.prompt
-    generated_text = output.outputs[0].text
-    print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+
+def main():
+    llm = LLM(model="facebook/opt-125m", seed=SEED)
+    outputs = llm.generate(prompts, sampling_params)
+    print("-" * 50)
+    for output in outputs:
+        prompt = output.prompt
+        generated_text = output.outputs[0].text
+        print(f"Prompt: {prompt!r}\nGenerated text: {generated_text!r}")
+        print("-" * 50)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/offline_inference/rlhf.py b/examples/offline_inference/rlhf.py
index b0418c092ca3..e0ed0ac49754 100644
--- a/examples/offline_inference/rlhf.py
+++ b/examples/offline_inference/rlhf.py
@@ -85,11 +85,13 @@ sampling_params = SamplingParams(temperature=0)
 
 outputs = ray.get(llm.generate.remote(prompts, sampling_params))
 
+print("-" * 50)
 for output in outputs:
     prompt = output.prompt
     generated_text = output.outputs[0].text
-    print(f"Prompt: {prompt!r}, "
+    print(f"Prompt: {prompt!r}\n"
           f"Generated text: {generated_text!r}")
+    print("-" * 50)
 
 # set up the communication between the training process
 # and the inference engine.
@@ -120,8 +122,10 @@ assert all(ray.get(llm.collective_rpc.remote("check_weights_changed")))
 # use the updated model to generate texts, they will be nonsense
 # because the weights are all zeros.
 outputs_updated = ray.get(llm.generate.remote(prompts, sampling_params))
+print("-" * 50)
 for output in outputs_updated:
     prompt = output.prompt
     generated_text = output.outputs[0].text
-    print(f"Prompt: {prompt!r}, "
+    print(f"Prompt: {prompt!r}\n"
           f"Generated text: {generated_text!r}")
+    print("-" * 50)
diff --git a/examples/offline_inference/simple_profiling.py b/examples/offline_inference/simple_profiling.py
index b45954b3bd54..6a8e3a5a3e75 100644
--- a/examples/offline_inference/simple_profiling.py
+++ b/examples/offline_inference/simple_profiling.py
@@ -32,10 +32,12 @@ if __name__ == "__main__":
     llm.stop_profile()
 
     # Print the outputs.
+    print("-" * 50)
     for output in outputs:
         prompt = output.prompt
         generated_text = output.outputs[0].text
-        print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
+        print(f"Prompt: {prompt!r}\nGenerated text: {generated_text!r}")
+        print("-" * 50)
 
     # Add a buffer to wait for profiler in the background process
     # (in case MP is on) to finish writing profiling output.
diff --git a/examples/offline_inference/torchrun_example.py b/examples/offline_inference/torchrun_example.py
index 7a57f29a07fa..c6d9e6b47e21 100644
--- a/examples/offline_inference/torchrun_example.py
+++ b/examples/offline_inference/torchrun_example.py
@@ -36,11 +36,13 @@ llm = LLM(
 outputs = llm.generate(prompts, sampling_params)
 
 # all ranks will have the same outputs
+print("-" * 50)
 for output in outputs:
     prompt = output.prompt
     generated_text = output.outputs[0].text
-    print(f"Prompt: {prompt!r}, "
+    print(f"Prompt: {prompt!r}\n"
           f"Generated text: {generated_text!r}")
+    print("-" * 50)
 """
 Further tips:
 
diff --git a/examples/offline_inference/tpu.py b/examples/offline_inference/tpu.py
index 956219d30f38..dea717c36082 100644
--- a/examples/offline_inference/tpu.py
+++ b/examples/offline_inference/tpu.py
@@ -16,14 +16,22 @@ N = 1
 # Currently, top-p sampling is disabled. `top_p` should be 1.0.
 sampling_params = SamplingParams(temperature=0, top_p=1.0, n=N, max_tokens=16)
 
-# Set `enforce_eager=True` to avoid ahead-of-time compilation.
-# In real workloads, `enforace_eager` should be `False`.
-llm = LLM(model="Qwen/Qwen2-1.5B-Instruct",
-          max_num_batched_tokens=64,
-          max_num_seqs=4)
-outputs = llm.generate(prompts, sampling_params)
-for output, answer in zip(outputs, answers):
-    prompt = output.prompt
-    generated_text = output.outputs[0].text
-    print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}")
-    assert generated_text.startswith(answer)
+
+def main():
+    # Set `enforce_eager=True` to avoid ahead-of-time compilation.
+    # In real workloads, `enforace_eager` should be `False`.
+    llm = LLM(model="Qwen/Qwen2-1.5B-Instruct",
+              max_num_batched_tokens=64,
+              max_num_seqs=4)
+    outputs = llm.generate(prompts, sampling_params)
+    print("-" * 50)
+    for output, answer in zip(outputs, answers):
+        prompt = output.prompt
+        generated_text = output.outputs[0].text
+        print(f"Prompt: {prompt!r}\nGenerated text: {generated_text!r}")
+        assert generated_text.startswith(answer)
+        print("-" * 50)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/offline_inference/vision_language.py b/examples/offline_inference/vision_language.py
index 61d53dda1c47..a944260c2692 100644
--- a/examples/offline_inference/vision_language.py
+++ b/examples/offline_inference/vision_language.py
@@ -1089,14 +1089,18 @@ def main(args):
         start_time = time.time()
         outputs = llm.generate(inputs, sampling_params=sampling_params)
         elapsed_time = time.time() - start_time
+        print("-" * 50)
         print("-- generate time = {}".format(elapsed_time))
+        print("-" * 50)
 
     else:
         outputs = llm.generate(inputs, sampling_params=sampling_params)
 
+    print("-" * 50)
     for o in outputs:
         generated_text = o.outputs[0].text
         print(generated_text)
+        print("-" * 50)
 
 
 if __name__ == "__main__":
diff --git a/examples/offline_inference/vision_language_embedding.py b/examples/offline_inference/vision_language_embedding.py
index a0b2b44b4e82..8321d3e254a2 100644
--- a/examples/offline_inference/vision_language_embedding.py
+++ b/examples/offline_inference/vision_language_embedding.py
@@ -143,8 +143,10 @@ def run_encode(model: str, modality: QueryModality, seed: Optional[int]):
         "multi_modal_data": mm_data,
     })
 
+    print("-" * 50)
     for output in outputs:
         print(output.outputs.embedding)
+        print("-" * 50)
 
 
 def main(args: Namespace):
diff --git a/examples/offline_inference/vision_language_multi_image.py b/examples/offline_inference/vision_language_multi_image.py
index e03ebe485eaa..39465c9b0ce4 100644
--- a/examples/offline_inference/vision_language_multi_image.py
+++ b/examples/offline_inference/vision_language_multi_image.py
@@ -644,9 +644,11 @@ def run_generate(model, question: str, image_urls: list[str],
         },
         sampling_params=sampling_params)
 
+    print("-" * 50)
     for o in outputs:
         generated_text = o.outputs[0].text
         print(generated_text)
+        print("-" * 50)
 
 
 def run_chat(model: str, question: str, image_urls: list[str],
@@ -687,9 +689,11 @@ def run_chat(model: str, question: str, image_urls: list[str],
         chat_template=req_data.chat_template,
     )
 
+    print("-" * 50)
     for o in outputs:
         generated_text = o.outputs[0].text
         print(generated_text)
+        print("-" * 50)
 
 
 def main(args: Namespace):