diff --git a/tests/v1/engine/test_async_llm.py b/tests/v1/engine/test_async_llm.py
index 05197f44f93b..d864cb2af23e 100644
--- a/tests/v1/engine/test_async_llm.py
+++ b/tests/v1/engine/test_async_llm.py
@@ -8,7 +8,9 @@ import pytest
 
 from tests.v1.engine.utils import PLP_APC_UNSUPPORTED_MSG
 from vllm import SamplingParams
+from vllm.assets.image import ImageAsset
 from vllm.engine.arg_utils import AsyncEngineArgs
+from vllm.inputs import PromptType
 from vllm.platforms import current_platform
 from vllm.sampling_params import RequestOutputKind
 from vllm.v1.engine.async_llm import AsyncLLM
@@ -17,13 +19,32 @@ if not current_platform.is_cuda():
     pytest.skip(reason="V1 currently only supported on CUDA.",
                 allow_module_level=True)
 
-ENGINE_ARGS = AsyncEngineArgs(model="meta-llama/Llama-3.2-1B-Instruct",
-                              enforce_eager=True,
-                              disable_log_requests=True)
+TEXT_ENGINE_ARGS = AsyncEngineArgs(model="meta-llama/Llama-3.2-1B-Instruct",
+                                   enforce_eager=True,
+                                   disable_log_requests=True)
+
+VISION_ENGINE_ARGS = AsyncEngineArgs(model="Qwen/Qwen2-VL-2B-Instruct",
+                                     enforce_eager=True,
+                                     disable_log_requests=True)
+
+TEXT_PROMPT = "Hello my name is Robert and"
+
+VISION_PROMPT_TEMPLATE = (
+    "<|im_start|>system\nYou are a helpful assistant.<|im_end|>"
+    "\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>"
+    "What is in the image?<|im_end|>\n"
+    "<|im_start|>assistant\n")
+VISION_PROMPT = {
+    "prompt": VISION_PROMPT_TEMPLATE,
+    "multi_modal_data": {
+        "image": ImageAsset("stop_sign").pil_image
+    }
+}
 
 
 async def generate(engine: AsyncLLM,
                    request_id: str,
+                   prompt: PromptType,
                    output_kind: RequestOutputKind,
                    max_tokens: int,
                    prompt_logprobs: Optional[int] = None) -> Tuple[int, str]:
@@ -32,11 +53,12 @@ async def generate(engine: AsyncLLM,
 
     count = 0
     sampling_params = SamplingParams(max_tokens=max_tokens,
+                                     ignore_eos=True,
                                      output_kind=output_kind,
                                      temperature=0,
                                      prompt_logprobs=prompt_logprobs)
     async for out in engine.generate(request_id=request_id,
-                                     prompt="Hello my name is Robert and",
+                                     prompt=prompt,
                                      sampling_params=sampling_params):
 
         num_tokens = len(out.outputs[0].token_ids)
@@ -74,6 +96,7 @@ async def test_async_llm_refuses_prompt_logprobs_with_apc(
             await asyncio.create_task(
                 generate(engine,
                          "request-0",
+                         TEXT_PROMPT,
                          output_kind,
                          10,
                          prompt_logprobs=5))
@@ -86,18 +109,24 @@ async def test_async_llm_refuses_prompt_logprobs_with_apc(
 
 @pytest.mark.parametrize(
     "output_kind", [RequestOutputKind.DELTA, RequestOutputKind.FINAL_ONLY])
+@pytest.mark.parametrize("engine_args_and_prompt",
+                         [(TEXT_ENGINE_ARGS, TEXT_PROMPT),
+                          (VISION_ENGINE_ARGS, VISION_PROMPT)])
 @pytest.mark.asyncio
-async def test_load(monkeypatch, output_kind: RequestOutputKind):
+async def test_load(monkeypatch, output_kind: RequestOutputKind,
+                    engine_args_and_prompt: Tuple[AsyncEngineArgs,
+                                                  PromptType]):
     # TODO(rickyx): Remove monkeypatch once we have a better way to test V1
     # so that in the future when we switch, we don't have to change all the
     # tests.
     with monkeypatch.context() as m, ExitStack() as after:
         m.setenv("VLLM_USE_V1", "1")
+        engine_args, prompt = engine_args_and_prompt
 
-        engine = AsyncLLM.from_engine_args(ENGINE_ARGS)
+        engine = AsyncLLM.from_engine_args(engine_args)
         after.callback(engine.shutdown)
 
-        NUM_REQUESTS = 10000
+        NUM_REQUESTS = 100
         NUM_EXPECTED_TOKENS = 10
 
         request_ids = [f"request-{i}" for i in range(NUM_REQUESTS)]
@@ -107,7 +136,7 @@ async def test_load(monkeypatch, output_kind: RequestOutputKind):
         for request_id in request_ids:
             tasks.append(
                 asyncio.create_task(
-                    generate(engine, request_id, output_kind,
+                    generate(engine, request_id, prompt, output_kind,
                              NUM_EXPECTED_TOKENS)))
 
         # Confirm that we got all the EXPECTED tokens from the requests.
@@ -126,13 +155,19 @@ async def test_load(monkeypatch, output_kind: RequestOutputKind):
 
 @pytest.mark.parametrize(
     "output_kind", [RequestOutputKind.DELTA, RequestOutputKind.FINAL_ONLY])
+@pytest.mark.parametrize("engine_args_and_prompt",
+                         [(TEXT_ENGINE_ARGS, TEXT_PROMPT),
+                          (VISION_ENGINE_ARGS, VISION_PROMPT)])
 @pytest.mark.asyncio
-async def test_abort(monkeypatch, output_kind: RequestOutputKind):
+async def test_abort(monkeypatch, output_kind: RequestOutputKind,
+                     engine_args_and_prompt: Tuple[AsyncEngineArgs,
+                                                   PromptType]):
 
     with monkeypatch.context() as m, ExitStack() as after:
         m.setenv("VLLM_USE_V1", "1")
+        engine_args, prompt = engine_args_and_prompt
 
-        engine = AsyncLLM.from_engine_args(ENGINE_ARGS)
+        engine = AsyncLLM.from_engine_args(engine_args)
         after.callback(engine.shutdown)
 
         NUM_REQUESTS = 100
@@ -146,7 +181,7 @@ async def test_abort(monkeypatch, output_kind: RequestOutputKind):
         for request_id in request_ids:
             tasks.append(
                 asyncio.create_task(
-                    generate(engine, request_id, output_kind,
+                    generate(engine, request_id, prompt, output_kind,
                              NUM_EXPECTED_TOKENS)))
 
         # API server cancels requests when they disconnect.
@@ -172,7 +207,8 @@ async def test_abort(monkeypatch, output_kind: RequestOutputKind):
         # Confirm we can do another generation.
         request_id = f"request-{REQUEST_IDS_TO_ABORT[0]}"
         task = asyncio.create_task(
-            generate(engine, request_id, output_kind, NUM_EXPECTED_TOKENS))
+            generate(engine, request_id, prompt, output_kind,
+                     NUM_EXPECTED_TOKENS))
         num_generated_tokens, request_id = await task
         assert num_generated_tokens == NUM_EXPECTED_TOKENS
         assert not engine.output_processor.has_unfinished_requests()