mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2026-05-28 02:47:04 +08:00
[MODEL] New model support for naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B (#20931)
Signed-off-by: bigshanedogg <bigshane319@gmail.com>
This commit is contained in:
parent
c72f049cb4
commit
29c6fbe58c
@ -365,6 +365,7 @@ th {
|
|||||||
| `Grok1ModelForCausalLM` | Grok1 | `hpcai-tech/grok-1`. | ✅︎ | ✅︎ | ✅︎ |
|
| `Grok1ModelForCausalLM` | Grok1 | `hpcai-tech/grok-1`. | ✅︎ | ✅︎ | ✅︎ |
|
||||||
| `HunYuanDenseV1ForCausalLM` | Hunyuan-7B-Instruct-0124 | `tencent/Hunyuan-7B-Instruct-0124` | ✅︎ | | ✅︎ |
|
| `HunYuanDenseV1ForCausalLM` | Hunyuan-7B-Instruct-0124 | `tencent/Hunyuan-7B-Instruct-0124` | ✅︎ | | ✅︎ |
|
||||||
| `HunYuanMoEV1ForCausalLM` | Hunyuan-80B-A13B | `tencent/Hunyuan-A13B-Instruct`, `tencent/Hunyuan-A13B-Pretrain`, `tencent/Hunyuan-A13B-Instruct-FP8`, etc. | ✅︎ | | ✅︎ |
|
| `HunYuanMoEV1ForCausalLM` | Hunyuan-80B-A13B | `tencent/Hunyuan-A13B-Instruct`, `tencent/Hunyuan-A13B-Pretrain`, `tencent/Hunyuan-A13B-Instruct-FP8`, etc. | ✅︎ | | ✅︎ |
|
||||||
|
| `HCXVisionForCausalLM` | HyperCLOVAX-SEED-Vision-Instruct-3B | `naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B` | | | ✅︎ |
|
||||||
| `InternLMForCausalLM` | InternLM | `internlm/internlm-7b`, `internlm/internlm-chat-7b`, etc. | ✅︎ | ✅︎ | ✅︎ |
|
| `InternLMForCausalLM` | InternLM | `internlm/internlm-7b`, `internlm/internlm-chat-7b`, etc. | ✅︎ | ✅︎ | ✅︎ |
|
||||||
| `InternLM2ForCausalLM` | InternLM2 | `internlm/internlm2-7b`, `internlm/internlm2-chat-7b`, etc. | ✅︎ | ✅︎ | ✅︎ |
|
| `InternLM2ForCausalLM` | InternLM2 | `internlm/internlm2-7b`, `internlm/internlm2-chat-7b`, etc. | ✅︎ | ✅︎ | ✅︎ |
|
||||||
| `InternLM3ForCausalLM` | InternLM3 | `internlm/internlm3-8b-instruct`, etc. | ✅︎ | ✅︎ | ✅︎ |
|
| `InternLM3ForCausalLM` | InternLM3 | `internlm/internlm3-8b-instruct`, etc. | ✅︎ | ✅︎ | ✅︎ |
|
||||||
|
|||||||
@ -316,6 +316,85 @@ def run_h2ovl(questions: list[str], modality: str) -> ModelRequestData:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B
|
||||||
|
def run_hyperclovax_seed_vision(
|
||||||
|
questions: list[str], modality: str
|
||||||
|
) -> ModelRequestData:
|
||||||
|
model_name = "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B"
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
||||||
|
|
||||||
|
engine_args = EngineArgs(
|
||||||
|
model=model_name,
|
||||||
|
trust_remote_code=True,
|
||||||
|
max_model_len=8192 if modality == "image" else 16384,
|
||||||
|
limit_mm_per_prompt={modality: 1},
|
||||||
|
)
|
||||||
|
|
||||||
|
messages = list()
|
||||||
|
for question in questions:
|
||||||
|
if modality == "image":
|
||||||
|
"""
|
||||||
|
ocr: List the words in the image in raster order.
|
||||||
|
Even if the word order feels unnatural for reading,
|
||||||
|
the model will handle it as long as it follows raster order.
|
||||||
|
e.g. "Naver, CLOVA, bigshane"
|
||||||
|
lens_keywords: List the entity names in the image.
|
||||||
|
e.g. "iPhone"
|
||||||
|
lens_local_keywords: List the entity names with quads in the image.
|
||||||
|
e.g. "[0.07, 0.21, 0.92, 0.90] iPhone"
|
||||||
|
"""
|
||||||
|
messages.append(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "image",
|
||||||
|
"ocr": "",
|
||||||
|
"lens_keywords": "",
|
||||||
|
"lens_local_keywords": "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": question,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
elif modality == "video":
|
||||||
|
messages.append(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "video",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": question,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unsupported modality: {modality}")
|
||||||
|
|
||||||
|
prompts = tokenizer.apply_chat_template(
|
||||||
|
messages,
|
||||||
|
tokenize=False,
|
||||||
|
add_generation_prompt=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
return ModelRequestData(
|
||||||
|
engine_args=engine_args,
|
||||||
|
prompts=prompts,
|
||||||
|
stop_token_ids=None,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
# Idefics3-8B-Llama3
|
# Idefics3-8B-Llama3
|
||||||
def run_idefics3(questions: list[str], modality: str) -> ModelRequestData:
|
def run_idefics3(questions: list[str], modality: str) -> ModelRequestData:
|
||||||
assert modality == "image"
|
assert modality == "image"
|
||||||
@ -1222,6 +1301,7 @@ model_example_map = {
|
|||||||
"glm4v": run_glm4v,
|
"glm4v": run_glm4v,
|
||||||
"glm4_1v": run_glm4_1v,
|
"glm4_1v": run_glm4_1v,
|
||||||
"h2ovl_chat": run_h2ovl,
|
"h2ovl_chat": run_h2ovl,
|
||||||
|
"hyperclovax_seed_vision": run_hyperclovax_seed_vision,
|
||||||
"idefics3": run_idefics3,
|
"idefics3": run_idefics3,
|
||||||
"internvl_chat": run_internvl,
|
"internvl_chat": run_internvl,
|
||||||
"nemotron_vl": run_nemotron_vl,
|
"nemotron_vl": run_nemotron_vl,
|
||||||
|
|||||||
@ -289,6 +289,53 @@ def load_internvl(question: str, image_urls: list[str]) -> ModelRequestData:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def load_hyperclovax_seed_vision(
|
||||||
|
question: str, image_urls: list[str]
|
||||||
|
) -> ModelRequestData:
|
||||||
|
model_name = "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B"
|
||||||
|
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
||||||
|
|
||||||
|
engine_args = EngineArgs(
|
||||||
|
model=model_name,
|
||||||
|
trust_remote_code=True,
|
||||||
|
max_model_len=16384,
|
||||||
|
limit_mm_per_prompt={"image": len(image_urls)},
|
||||||
|
)
|
||||||
|
|
||||||
|
message = {"role": "user", "content": list()}
|
||||||
|
for _image_url in image_urls:
|
||||||
|
message["content"].append(
|
||||||
|
{
|
||||||
|
"type": "image",
|
||||||
|
"image": _image_url,
|
||||||
|
"ocr": "",
|
||||||
|
"lens_keywords": "",
|
||||||
|
"lens_local_keywords": "",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
message["content"].append(
|
||||||
|
{
|
||||||
|
"type": "text",
|
||||||
|
"text": question,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
prompt = tokenizer.apply_chat_template(
|
||||||
|
[
|
||||||
|
message,
|
||||||
|
],
|
||||||
|
tokenize=False,
|
||||||
|
add_generation_prompt=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
return ModelRequestData(
|
||||||
|
engine_args=engine_args,
|
||||||
|
prompt=prompt,
|
||||||
|
stop_token_ids=None,
|
||||||
|
image_data=[fetch_image(url) for url in image_urls],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def load_llava(question: str, image_urls: list[str]) -> ModelRequestData:
|
def load_llava(question: str, image_urls: list[str]) -> ModelRequestData:
|
||||||
# NOTE: CAUTION! Original Llava models wasn't really trained on multi-image inputs,
|
# NOTE: CAUTION! Original Llava models wasn't really trained on multi-image inputs,
|
||||||
# it will generate poor response for multi-image inputs!
|
# it will generate poor response for multi-image inputs!
|
||||||
@ -900,6 +947,7 @@ model_example_map = {
|
|||||||
"h2ovl_chat": load_h2ovl,
|
"h2ovl_chat": load_h2ovl,
|
||||||
"idefics3": load_idefics3,
|
"idefics3": load_idefics3,
|
||||||
"internvl_chat": load_internvl,
|
"internvl_chat": load_internvl,
|
||||||
|
"hyperclovax_seed_vision": load_hyperclovax_seed_vision,
|
||||||
"keye_vl": load_keye_vl,
|
"keye_vl": load_keye_vl,
|
||||||
"kimi_vl": load_kimi_vl,
|
"kimi_vl": load_kimi_vl,
|
||||||
"llava": load_llava,
|
"llava": load_llava,
|
||||||
|
|||||||
@ -278,6 +278,7 @@ def _test_processing_correctness_one(
|
|||||||
"HuggingFaceTB/SmolVLM2-2.2B-Instruct",
|
"HuggingFaceTB/SmolVLM2-2.2B-Instruct",
|
||||||
"moonshotai/Kimi-VL-A3B-Instruct",
|
"moonshotai/Kimi-VL-A3B-Instruct",
|
||||||
"meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
"meta-llama/Llama-4-Scout-17B-16E-Instruct",
|
||||||
|
"naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B",
|
||||||
"llava-hf/llava-1.5-7b-hf",
|
"llava-hf/llava-1.5-7b-hf",
|
||||||
"llava-hf/llava-v1.6-mistral-7b-hf",
|
"llava-hf/llava-v1.6-mistral-7b-hf",
|
||||||
"llava-hf/LLaVA-NeXT-Video-7B-hf",
|
"llava-hf/LLaVA-NeXT-Video-7B-hf",
|
||||||
|
|||||||
@ -201,6 +201,9 @@ _TEXT_GENERATION_EXAMPLE_MODELS = {
|
|||||||
trust_remote_code=True),
|
trust_remote_code=True),
|
||||||
"HunYuanDenseV1ForCausalLM":_HfExamplesInfo("tencent/Hunyuan-7B-Instruct-0124",
|
"HunYuanDenseV1ForCausalLM":_HfExamplesInfo("tencent/Hunyuan-7B-Instruct-0124",
|
||||||
trust_remote_code=True),
|
trust_remote_code=True),
|
||||||
|
"HCXVisionForCausalLM": _HfExamplesInfo(
|
||||||
|
"naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B",
|
||||||
|
trust_remote_code=True),
|
||||||
"InternLMForCausalLM": _HfExamplesInfo("internlm/internlm-chat-7b",
|
"InternLMForCausalLM": _HfExamplesInfo("internlm/internlm-chat-7b",
|
||||||
trust_remote_code=True),
|
trust_remote_code=True),
|
||||||
"InternLM2ForCausalLM": _HfExamplesInfo("internlm/internlm2-chat-7b",
|
"InternLM2ForCausalLM": _HfExamplesInfo("internlm/internlm2-chat-7b",
|
||||||
|
|||||||
1231
vllm/model_executor/models/hyperclovax_vision.py
Normal file
1231
vllm/model_executor/models/hyperclovax_vision.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -81,6 +81,7 @@ _TEXT_GENERATION_MODELS = {
|
|||||||
"Grok1ModelForCausalLM": ("grok1", "Grok1ForCausalLM"),
|
"Grok1ModelForCausalLM": ("grok1", "Grok1ForCausalLM"),
|
||||||
"HunYuanMoEV1ForCausalLM": ("hunyuan_v1", "HunYuanMoEV1ForCausalLM"),
|
"HunYuanMoEV1ForCausalLM": ("hunyuan_v1", "HunYuanMoEV1ForCausalLM"),
|
||||||
"HunYuanDenseV1ForCausalLM": ("hunyuan_v1", "HunYuanDenseV1ForCausalLM"),
|
"HunYuanDenseV1ForCausalLM": ("hunyuan_v1", "HunYuanDenseV1ForCausalLM"),
|
||||||
|
"HCXVisionForCausalLM": ("hyperclovax_vision", "HCXVisionForCausalLM"),
|
||||||
"InternLMForCausalLM": ("llama", "LlamaForCausalLM"),
|
"InternLMForCausalLM": ("llama", "LlamaForCausalLM"),
|
||||||
"InternLM2ForCausalLM": ("internlm2", "InternLM2ForCausalLM"),
|
"InternLM2ForCausalLM": ("internlm2", "InternLM2ForCausalLM"),
|
||||||
"InternLM2VEForCausalLM": ("internlm2_ve", "InternLM2VEForCausalLM"),
|
"InternLM2VEForCausalLM": ("internlm2_ve", "InternLM2VEForCausalLM"),
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user