mirror of
https://git.datalinker.icu/vllm-project/vllm.git
synced 2025-12-14 07:25:01 +08:00
[Bugfix] Fix LLaVA-NeXT feature size precision error (for real) (#11772)
Signed-off-by: DarkLight1337 <tlleungac@connect.ust.hk>
This commit is contained in:
parent
91b361ae89
commit
08fb75c72e
@ -17,7 +17,8 @@ def processor_for_llava_next():
|
|||||||
|
|
||||||
@pytest.mark.parametrize("model_id", ["llava-hf/llava-v1.6-mistral-7b-hf"])
|
@pytest.mark.parametrize("model_id", ["llava-hf/llava-v1.6-mistral-7b-hf"])
|
||||||
@pytest.mark.parametrize("image_size", [(1669, 2560), (2560, 1669), (183, 488),
|
@pytest.mark.parametrize("image_size", [(1669, 2560), (2560, 1669), (183, 488),
|
||||||
(488, 183), (198, 176), (176, 198)])
|
(488, 183), (198, 176), (176, 198),
|
||||||
|
(161, 184), (184, 161)])
|
||||||
@pytest.mark.parametrize("num_imgs", [1, 2])
|
@pytest.mark.parametrize("num_imgs", [1, 2])
|
||||||
def test_processor_prompt_replacements(
|
def test_processor_prompt_replacements(
|
||||||
processor_for_llava_next,
|
processor_for_llava_next,
|
||||||
|
|||||||
@ -18,7 +18,8 @@ def processor_for_llava_onevision():
|
|||||||
@pytest.mark.parametrize("model_id",
|
@pytest.mark.parametrize("model_id",
|
||||||
["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"])
|
["llava-hf/llava-onevision-qwen2-0.5b-ov-hf"])
|
||||||
@pytest.mark.parametrize("image_size", [(1669, 2560), (2560, 1669), (183, 488),
|
@pytest.mark.parametrize("image_size", [(1669, 2560), (2560, 1669), (183, 488),
|
||||||
(488, 183), (198, 176), (176, 198)])
|
(488, 183), (198, 176), (176, 198),
|
||||||
|
(161, 184), (184, 161)])
|
||||||
@pytest.mark.parametrize("num_imgs", [1, 2])
|
@pytest.mark.parametrize("num_imgs", [1, 2])
|
||||||
def test_processor_prompt_replacements(
|
def test_processor_prompt_replacements(
|
||||||
processor_for_llava_onevision,
|
processor_for_llava_onevision,
|
||||||
|
|||||||
@ -121,30 +121,29 @@ class LlavaNextProcessingMixin(BaseLlavaProcessingMixin):
|
|||||||
num_patch_height: int,
|
num_patch_height: int,
|
||||||
num_patch_width: int,
|
num_patch_width: int,
|
||||||
) -> tuple[int, int]:
|
) -> tuple[int, int]:
|
||||||
current_height = npatches * num_patch_height
|
|
||||||
current_width = npatches * num_patch_width
|
|
||||||
|
|
||||||
# NOTE: Use float32 to remain consistent with HF output
|
# NOTE: Use float32 to remain consistent with HF output
|
||||||
original_aspect_ratio = np.array(original_width / original_height,
|
current_height_f = np.float32(npatches * num_patch_height)
|
||||||
dtype=np.float32)
|
current_width_f = np.float32(npatches * num_patch_width)
|
||||||
current_aspect_ratio = np.array(current_width / current_height,
|
|
||||||
dtype=np.float32)
|
original_width_f = np.float32(original_width)
|
||||||
|
original_height_f = np.float32(original_height)
|
||||||
|
|
||||||
|
original_aspect_ratio = original_width_f / original_height_f
|
||||||
|
current_aspect_ratio = current_width_f / current_height_f
|
||||||
|
|
||||||
if original_aspect_ratio > current_aspect_ratio:
|
if original_aspect_ratio > current_aspect_ratio:
|
||||||
scale_factor = np.array(current_width / original_width,
|
scale_factor = current_width_f / original_width_f
|
||||||
dtype=np.float32)
|
new_height = int(original_height_f * scale_factor)
|
||||||
new_height = int(original_height * scale_factor)
|
padding = (current_height_f - new_height) // 2
|
||||||
padding = (current_height - new_height) // 2
|
current_height_f -= 2 * padding
|
||||||
current_height -= 2 * padding
|
|
||||||
else:
|
else:
|
||||||
scale_factor = np.array(current_height / original_height,
|
scale_factor = current_height_f / original_height_f
|
||||||
dtype=np.float32)
|
new_width = int(original_width_f * scale_factor)
|
||||||
new_width = int(original_width * scale_factor)
|
padding = (current_width_f - new_width) // 2
|
||||||
padding = (current_width - new_width) // 2
|
current_width_f -= 2 * padding
|
||||||
current_width -= 2 * padding
|
|
||||||
|
|
||||||
unpadded_features = current_height * current_width
|
unpadded_features = int(current_height_f * current_width_f)
|
||||||
newline_features = current_height
|
newline_features = int(current_height_f)
|
||||||
|
|
||||||
return (unpadded_features, newline_features)
|
return (unpadded_features, newline_features)
|
||||||
|
|
||||||
|
|||||||
@ -107,36 +107,37 @@ class LlavaOnevisionProcessingMixin(LlavaNextProcessingMixin):
|
|||||||
num_patch_height: int,
|
num_patch_height: int,
|
||||||
num_patch_width: int,
|
num_patch_width: int,
|
||||||
) -> tuple[int, int]:
|
) -> tuple[int, int]:
|
||||||
current_height = npatches * num_patch_height
|
|
||||||
current_width = npatches * num_patch_width
|
|
||||||
|
|
||||||
# NOTE: Use float32 to remain consistent with HF output
|
# NOTE: Use float32 to remain consistent with HF output
|
||||||
original_aspect_ratio = np.array(original_width / original_height,
|
current_height_f = np.float32(npatches * num_patch_height)
|
||||||
dtype=np.float32)
|
current_width_f = np.float32(npatches * num_patch_width)
|
||||||
current_aspect_ratio = np.array(current_width / current_height,
|
|
||||||
dtype=np.float32)
|
original_width_f = np.float32(original_width)
|
||||||
|
original_height_f = np.float32(original_height)
|
||||||
|
|
||||||
|
original_aspect_ratio = original_width_f / original_height_f
|
||||||
|
current_aspect_ratio = current_width_f / current_height_f
|
||||||
|
|
||||||
if original_aspect_ratio > current_aspect_ratio:
|
if original_aspect_ratio > current_aspect_ratio:
|
||||||
scale_factor = np.array(current_width / original_width,
|
scale_factor = current_width_f / original_width_f
|
||||||
dtype=np.float32)
|
new_height = int(original_height_f * scale_factor)
|
||||||
new_height = int(original_height * scale_factor)
|
padding = (current_height_f - new_height) // 2
|
||||||
padding = (current_height - new_height) // 2
|
current_height_f -= 2 * padding
|
||||||
current_height -= 2 * padding
|
|
||||||
else:
|
else:
|
||||||
scale_factor = np.array(current_height / original_height,
|
scale_factor = current_height_f / original_height_f
|
||||||
dtype=np.float32)
|
new_width = int(original_width_f * scale_factor)
|
||||||
new_width = int(original_width * scale_factor)
|
padding = (current_width_f - new_width) // 2
|
||||||
padding = (current_width - new_width) // 2
|
current_width_f -= 2 * padding
|
||||||
current_width -= 2 * padding
|
|
||||||
|
|
||||||
unpadded_features = current_height * current_width
|
unpadded_features = int(current_height_f * current_width_f)
|
||||||
newline_features = current_height
|
newline_features = int(current_height_f)
|
||||||
|
|
||||||
ratio = math.sqrt(current_height * current_width / (9 * npatches**2))
|
ratio = math.sqrt(current_height_f * current_width_f /
|
||||||
|
(9 * npatches**2))
|
||||||
if ratio > 1.1:
|
if ratio > 1.1:
|
||||||
unpadded_features = int(current_height // ratio) * int(
|
height_factor = int(current_height_f // ratio)
|
||||||
current_width // ratio)
|
width_factor = int(current_width_f // ratio)
|
||||||
newline_features = int(current_height // ratio)
|
unpadded_features = height_factor * width_factor
|
||||||
|
newline_features = height_factor
|
||||||
|
|
||||||
return (unpadded_features, newline_features)
|
return (unpadded_features, newline_features)
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user