diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md
index 35a5fa0c2e42f..20cf75873af76 100644
--- a/docs/models/supported_models.md
+++ b/docs/models/supported_models.md
@@ -629,6 +629,7 @@ These models primarily accept the [`LLM.generate`](./generative_models.md#llmgen
| `Idefics3ForConditionalGeneration` | Idefics3 | T + I | `HuggingFaceM4/Idefics3-8B-Llama3`, etc. | ✅︎ | | ✅︎ |
| `InternS1ForConditionalGeneration` | Intern-S1 | T + IE+ + VE+ | `internlm/Intern-S1`, etc. | ✅︎ | ✅︎ | ✅︎ |
| `InternVLChatModel` | InternVL 3.5, InternVL 3.0, InternVideo 2.5, InternVL 2.5, Mono-InternVL, InternVL 2.0 | T + IE+ + (VE+) | `OpenGVLab/InternVL3_5-14B`, `OpenGVLab/InternVL3-9B`, `OpenGVLab/InternVideo2_5_Chat_8B`, `OpenGVLab/InternVL2_5-4B`, `OpenGVLab/Mono-InternVL-2B`, `OpenGVLab/InternVL2-4B`, etc. | ✅︎ | ✅︎ | ✅︎ |
+| `InternVLForConditionalGeneration` | InternVL 3.0 (HF format) | T + IE+ + VE+ | `OpenGVLab/InternVL3-1B-hf`, etc. | ✅︎ | ✅︎ | ✅︎ |
| `KeyeForConditionalGeneration` | Keye-VL-8B-Preview | T + IE+ + VE+ | `Kwai-Keye/Keye-VL-8B-Preview` | | | ✅︎ |
| `KimiVLForConditionalGeneration` | Kimi-VL-A3B-Instruct, Kimi-VL-A3B-Thinking | T + I+ | `moonshotai/Kimi-VL-A3B-Instruct`, `moonshotai/Kimi-VL-A3B-Thinking` | | ✅︎ | ✅︎ |
| `Llama4ForConditionalGeneration` | Llama 4 | T + I+ | `meta-llama/Llama-4-Scout-17B-16E-Instruct`, `meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8`, `meta-llama/Llama-4-Maverick-17B-128E-Instruct`, etc. | | ✅︎ | ✅︎ |
diff --git a/tests/models/multimodal/generation/test_common.py b/tests/models/multimodal/generation/test_common.py
index 96208f8eda628..2b60faae8ec0b 100644
--- a/tests/models/multimodal/generation/test_common.py
+++ b/tests/models/multimodal/generation/test_common.py
@@ -222,21 +222,6 @@ VLM_TEST_SETTINGS = {
},
marks=[large_gpu_mark(min_gb=32)],
),
- # Check "auto" with fallback to transformers
- "internvl-transformers": VLMTestInfo(
- models=["OpenGVLab/InternVL3-1B-hf"],
- test_type=(VLMTestType.IMAGE, VLMTestType.MULTI_IMAGE),
- prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n", # noqa: E501
- img_idx_to_prompt=lambda idx: "",
- max_model_len=4096,
- use_tokenizer_eos=True,
- image_size_factors=[(0.25, 0.5, 1.0)],
- vllm_runner_kwargs={
- "model_impl": "auto",
- },
- auto_cls=AutoModelForImageTextToText,
- marks=[pytest.mark.core_model],
- ),
#### Extended model tests
"aria": VLMTestInfo(
models=["rhymes-ai/Aria"],
@@ -461,6 +446,20 @@ VLM_TEST_SETTINGS = {
use_tokenizer_eos=True,
patch_hf_runner=model_utils.internvl_patch_hf_runner,
),
+ "intern_vl-hf": VLMTestInfo(
+ models=["OpenGVLab/InternVL3-1B-hf"],
+ test_type=(
+ VLMTestType.IMAGE,
+ VLMTestType.MULTI_IMAGE,
+ VLMTestType.VIDEO,
+ ),
+ prompt_formatter=lambda img_prompt: f"<|im_start|>User\n{img_prompt}<|im_end|>\n<|im_start|>Assistant\n", # noqa: E501
+ img_idx_to_prompt=lambda idx: "",
+ video_idx_to_prompt=lambda idx: "