From 1df43de9bb2cceecdc0dc2dc5c650a327aeabe0f Mon Sep 17 00:00:00 2001 From: xwjiang2010 <87673679+xwjiang2010@users.noreply.github.com> Date: Thu, 11 Jul 2024 10:21:10 -0700 Subject: [PATCH] [bug fix] Fix llava next feature size calculation. (#6339) Signed-off-by: Xiaowei Jiang --- tests/models/test_llava_next.py | 14 +++++++++++++- vllm/model_executor/models/llava_next.py | 18 ++++++++++-------- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/tests/models/test_llava_next.py b/tests/models/test_llava_next.py index 581cbcf9..163741a5 100644 --- a/tests/models/test_llava_next.py +++ b/tests/models/test_llava_next.py @@ -1,8 +1,10 @@ from typing import List, Optional, Tuple import pytest -from transformers import AutoTokenizer +from transformers import AutoConfig, AutoTokenizer +from vllm.model_executor.models.llava_next import ( + get_llava_next_image_feature_size) from vllm.multimodal.utils import rescale_image_size from vllm.sequence import SampleLogprobs @@ -120,3 +122,13 @@ def test_models(hf_runner, vllm_runner, image_assets, model, size_factors, name_0="hf", name_1="vllm", ) + + +@pytest.mark.parametrize("height_and_width_and_result", [(1669, 2560, 2144), + (183, 488, 776)]) +def test_image_feature_size(height_and_width_and_result): + height, width, result = height_and_width_and_result + config = AutoConfig.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf") + assert get_llava_next_image_feature_size(config, + input_height=height, + input_width=width) == result diff --git a/vllm/model_executor/models/llava_next.py b/vllm/model_executor/models/llava_next.py index 7e06f1e9..9369ec89 100644 --- a/vllm/model_executor/models/llava_next.py +++ b/vllm/model_executor/models/llava_next.py @@ -74,19 +74,21 @@ def _get_llava_next_num_unpadded_features( ) -> Tuple[int, int]: current_height = npatches * num_patch_height current_width = npatches * num_patch_width + current_height = torch.tensor(current_height).to("cuda") + current_width = torch.tensor(current_width).to("cuda") aspect_ratio: float = width / height current_aspect_ratio: float = current_width / current_height if aspect_ratio > current_aspect_ratio: - new_height = (height * current_width) // width - if new_height % 2 == 1: - new_height += 1 - current_height = new_height + scale_factor = current_width / width + new_height = int(height * scale_factor) + padding = (current_height - new_height) // 2 + current_height -= padding * 2 else: - new_width = (width * current_height) // height - if new_width % 2 == 1: - new_width += 1 - current_width = new_width + scale_factor = current_height / height + new_width = int(width * scale_factor) + padding = (current_width - new_width) // 2 + current_width -= padding * 2 unpadded_features = current_height * current_width newline_features = current_height