[Bugfix] Fix Phi-3v crash when input images are of certain sizes (#7840)

This commit is contained in:
zifeitong 2024-08-24 18:16:24 -07:00 committed by GitHub
parent aab0fcdb63
commit 80162c44b1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 22 additions and 7 deletions

View File

@ -3,13 +3,14 @@ import re
from typing import List, Optional, Tuple, Type from typing import List, Optional, Tuple, Type
import pytest import pytest
from PIL import Image
from transformers import AutoTokenizer from transformers import AutoTokenizer
from vllm.multimodal.utils import rescale_image_size from vllm.multimodal.utils import rescale_image_size
from vllm.sequence import SampleLogprobs from vllm.sequence import SampleLogprobs
from vllm.utils import is_cpu, is_hip from vllm.utils import is_cpu, is_hip
from ..conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets from ..conftest import IMAGE_ASSETS, HfRunner, VllmRunner
from .utils import check_logprobs_close from .utils import check_logprobs_close
pytestmark = pytest.mark.vlm pytestmark = pytest.mark.vlm
@ -58,7 +59,7 @@ if is_hip():
def run_test( def run_test(
hf_runner: Type[HfRunner], hf_runner: Type[HfRunner],
vllm_runner: Type[VllmRunner], vllm_runner: Type[VllmRunner],
image_assets: _ImageAssets, images: List[Image.Image],
model: str, model: str,
*, *,
size_factors: List[float], size_factors: List[float],
@ -77,8 +78,6 @@ def run_test(
Note, the text input is also adjusted to abide by vllm contract. Note, the text input is also adjusted to abide by vllm contract.
The text output is sanitized to be able to compare with hf. The text output is sanitized to be able to compare with hf.
""" """
images = [asset.pil_image for asset in image_assets]
inputs_per_image = [( inputs_per_image = [(
[prompt for _ in size_factors], [prompt for _ in size_factors],
[ [
@ -159,7 +158,7 @@ def test_models(hf_runner, vllm_runner, image_assets, model, size_factors,
run_test( run_test(
hf_runner, hf_runner,
vllm_runner, vllm_runner,
image_assets, [asset.pil_image for asset in image_assets],
model, model,
size_factors=size_factors, size_factors=size_factors,
dtype=dtype, dtype=dtype,
@ -167,3 +166,21 @@ def test_models(hf_runner, vllm_runner, image_assets, model, size_factors,
num_logprobs=num_logprobs, num_logprobs=num_logprobs,
tensor_parallel_size=1, tensor_parallel_size=1,
) )
@pytest.mark.parametrize("model", models)
@pytest.mark.parametrize("dtype", [target_dtype])
def test_regression_7840(hf_runner, vllm_runner, image_assets, model,
dtype) -> None:
# Regression test for #7840.
run_test(
hf_runner,
vllm_runner,
[image_assets[0].pil_image.resize((465, 226))],
model,
size_factors=[1.0],
dtype=dtype,
max_tokens=128,
num_logprobs=10,
tensor_parallel_size=1,
)

View File

@ -400,8 +400,6 @@ def input_processor_for_phi3v(ctx: InputContext, llm_inputs: LLMInputs):
image_data = multi_modal_data["image"] image_data = multi_modal_data["image"]
if isinstance(image_data, Image.Image): if isinstance(image_data, Image.Image):
w, h = image_data.size w, h = image_data.size
w, h = _calc_hd_transform_size(width=w, height=h)
image_feature_size = get_phi3v_image_feature_size(hf_config, image_feature_size = get_phi3v_image_feature_size(hf_config,
input_width=w, input_width=w,
input_height=h) input_height=h)