From 3dd507083f4d8416d5fed9827e91d22f29b0b723 Mon Sep 17 00:00:00 2001
From: Cyrus Leung <tlleungac@connect.ust.hk>
Date: Thu, 4 Jul 2024 09:58:18 +0800
Subject: [PATCH] [CI/Build] Cleanup VLM tests (#6107)

---
 tests/models/test_llava_next.py | 7 ++-----
 tests/models/test_phi3v.py      | 3 +--
 tests/models/utils.py           | 1 +
 vllm/multimodal/image.py        | 2 +-
 4 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/tests/models/test_llava_next.py b/tests/models/test_llava_next.py
index bf911b5c..581cbcf9 100644
--- a/tests/models/test_llava_next.py
+++ b/tests/models/test_llava_next.py
@@ -1,4 +1,3 @@
-import re
 from typing import List, Optional, Tuple
 
 import pytest
@@ -36,7 +35,6 @@ def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
     output_ids, output_str, out_logprobs = vllm_output
 
     tokenizer = AutoTokenizer.from_pretrained(model)
-    image_token_str = tokenizer.decode(IMAGE_TOKEN_ID)
     eos_token_id = tokenizer.eos_token_id
 
     hf_output_ids = [
@@ -44,9 +42,8 @@ def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
         if token_id != IMAGE_TOKEN_ID or output_ids[idx - 1] != IMAGE_TOKEN_ID
     ]
 
-    hf_output_str = re.sub(fr"({image_token_str})+", "", output_str)
-    assert hf_output_str[0] == " "
-    hf_output_str = hf_output_str[1:]
+    assert output_str[0] == " "
+    hf_output_str = output_str[1:]
     if hf_output_ids[-1] == eos_token_id:
         hf_output_str = hf_output_str + tokenizer.decode(eos_token_id)
 
diff --git a/tests/models/test_phi3v.py b/tests/models/test_phi3v.py
index cb32a047..faadab22 100644
--- a/tests/models/test_phi3v.py
+++ b/tests/models/test_phi3v.py
@@ -35,8 +35,7 @@ def vllm_to_hf_output(vllm_output: Tuple[List[int], str,
     assert output_str_without_image[0] == " "
     output_str_without_image = output_str_without_image[1:]
 
-    hf_output_str = output_str_without_image.replace("<|user|>", "") \
-        .replace("<|end|>\n<|assistant|>", " ")
+    hf_output_str = output_str_without_image + "<|end|><|endoftext|>"
 
     tokenizer = AutoTokenizer.from_pretrained(model)
     hf_output_ids = tokenizer.encode(output_str_without_image)
diff --git a/tests/models/utils.py b/tests/models/utils.py
index 51d57129..425f57ef 100644
--- a/tests/models/utils.py
+++ b/tests/models/utils.py
@@ -77,6 +77,7 @@ def check_logprobs_close(
                 # Each predicted token must be in top N logprobs of the other
                 fail_msg = (
                     f"Test{prompt_idx}:"
+                    f"\nMatched tokens:\t{output_ids_0[:idx]}"
                     f"\n{name_0}:\t{output_str_0!r}\t{logprobs_elem_0}"
                     f"\n{name_1}:\t{output_str_1!r}\t{logprobs_elem_1}")
 
diff --git a/vllm/multimodal/image.py b/vllm/multimodal/image.py
index dfef3312..27010fa6 100644
--- a/vllm/multimodal/image.py
+++ b/vllm/multimodal/image.py
@@ -115,7 +115,7 @@ class ImagePlugin(MultiModalPlugin):
         if isinstance(data, Image.Image):
             image_processor = self._get_hf_image_processor(model_config)
             if image_processor is None:
-                raise RuntimeError("No HuggingFace processor is available"
+                raise RuntimeError("No HuggingFace processor is available "
                                    "to process the image object")
             try:
                 batch_data = image_processor \