diff --git a/vllm/model_executor/models/molmo.py b/vllm/model_executor/models/molmo.py
index ccfee165368e7..b04916f17088c 100644
--- a/vllm/model_executor/models/molmo.py
+++ b/vllm/model_executor/models/molmo.py
@@ -946,9 +946,12 @@ def pad_images(
 
 
 def input_processor_for_molmo(ctx: InputContext, llm_inputs: LLMInputs):
-    prompt = llm_inputs["prompt"]
-    multi_modal_data = llm_inputs.get("multi_modal_data")
-    image = multi_modal_data.get("image")
+    prompt = llm_inputs.get("prompt", None)
+    multi_modal_data = llm_inputs.get("multi_modal_data", None)
+    if multi_modal_data is not None:
+        image = multi_modal_data.get("image", None)
+    else:
+        image = None
     processor = cached_get_processor(ctx.model_config.model,
                                      trust_remote_code=True,
                                      revision=ctx.model_config.code_revision)