[Bugfix] Fix Phi-3v crash when input images are of certain sizes (#7840)
This commit is contained in:
parent
aab0fcdb63
commit
80162c44b1
@ -3,13 +3,14 @@ import re
|
|||||||
from typing import List, Optional, Tuple, Type
|
from typing import List, Optional, Tuple, Type
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
from PIL import Image
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
|
|
||||||
from vllm.multimodal.utils import rescale_image_size
|
from vllm.multimodal.utils import rescale_image_size
|
||||||
from vllm.sequence import SampleLogprobs
|
from vllm.sequence import SampleLogprobs
|
||||||
from vllm.utils import is_cpu, is_hip
|
from vllm.utils import is_cpu, is_hip
|
||||||
|
|
||||||
from ..conftest import IMAGE_ASSETS, HfRunner, VllmRunner, _ImageAssets
|
from ..conftest import IMAGE_ASSETS, HfRunner, VllmRunner
|
||||||
from .utils import check_logprobs_close
|
from .utils import check_logprobs_close
|
||||||
|
|
||||||
pytestmark = pytest.mark.vlm
|
pytestmark = pytest.mark.vlm
|
||||||
@ -58,7 +59,7 @@ if is_hip():
|
|||||||
def run_test(
|
def run_test(
|
||||||
hf_runner: Type[HfRunner],
|
hf_runner: Type[HfRunner],
|
||||||
vllm_runner: Type[VllmRunner],
|
vllm_runner: Type[VllmRunner],
|
||||||
image_assets: _ImageAssets,
|
images: List[Image.Image],
|
||||||
model: str,
|
model: str,
|
||||||
*,
|
*,
|
||||||
size_factors: List[float],
|
size_factors: List[float],
|
||||||
@ -77,8 +78,6 @@ def run_test(
|
|||||||
Note, the text input is also adjusted to abide by vllm contract.
|
Note, the text input is also adjusted to abide by vllm contract.
|
||||||
The text output is sanitized to be able to compare with hf.
|
The text output is sanitized to be able to compare with hf.
|
||||||
"""
|
"""
|
||||||
images = [asset.pil_image for asset in image_assets]
|
|
||||||
|
|
||||||
inputs_per_image = [(
|
inputs_per_image = [(
|
||||||
[prompt for _ in size_factors],
|
[prompt for _ in size_factors],
|
||||||
[
|
[
|
||||||
@ -159,7 +158,7 @@ def test_models(hf_runner, vllm_runner, image_assets, model, size_factors,
|
|||||||
run_test(
|
run_test(
|
||||||
hf_runner,
|
hf_runner,
|
||||||
vllm_runner,
|
vllm_runner,
|
||||||
image_assets,
|
[asset.pil_image for asset in image_assets],
|
||||||
model,
|
model,
|
||||||
size_factors=size_factors,
|
size_factors=size_factors,
|
||||||
dtype=dtype,
|
dtype=dtype,
|
||||||
@ -167,3 +166,21 @@ def test_models(hf_runner, vllm_runner, image_assets, model, size_factors,
|
|||||||
num_logprobs=num_logprobs,
|
num_logprobs=num_logprobs,
|
||||||
tensor_parallel_size=1,
|
tensor_parallel_size=1,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("model", models)
|
||||||
|
@pytest.mark.parametrize("dtype", [target_dtype])
|
||||||
|
def test_regression_7840(hf_runner, vllm_runner, image_assets, model,
|
||||||
|
dtype) -> None:
|
||||||
|
# Regression test for #7840.
|
||||||
|
run_test(
|
||||||
|
hf_runner,
|
||||||
|
vllm_runner,
|
||||||
|
[image_assets[0].pil_image.resize((465, 226))],
|
||||||
|
model,
|
||||||
|
size_factors=[1.0],
|
||||||
|
dtype=dtype,
|
||||||
|
max_tokens=128,
|
||||||
|
num_logprobs=10,
|
||||||
|
tensor_parallel_size=1,
|
||||||
|
)
|
||||||
|
|||||||
@ -400,8 +400,6 @@ def input_processor_for_phi3v(ctx: InputContext, llm_inputs: LLMInputs):
|
|||||||
image_data = multi_modal_data["image"]
|
image_data = multi_modal_data["image"]
|
||||||
if isinstance(image_data, Image.Image):
|
if isinstance(image_data, Image.Image):
|
||||||
w, h = image_data.size
|
w, h = image_data.size
|
||||||
w, h = _calc_hd_transform_size(width=w, height=h)
|
|
||||||
|
|
||||||
image_feature_size = get_phi3v_image_feature_size(hf_config,
|
image_feature_size = get_phi3v_image_feature_size(hf_config,
|
||||||
input_width=w,
|
input_width=w,
|
||||||
input_height=h)
|
input_height=h)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user