diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index d7cac22c..51fb8d9e 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -64,7 +64,7 @@ async def async_request_tgi( output.ttft = ttft output.latency = time.perf_counter() - st - body = data.decode("utf-8").lstrip("data:") + body = remove_prefix(data.decode("utf-8"), "data:") output.generated_text = json.loads(body)["generated_text"] output.success = True else: @@ -158,7 +158,7 @@ async def async_request_trt_llm( output.ttft = ttft output.latency = time.perf_counter() - st - body = data.decode("utf-8").lstrip("data:") + body = remove_prefix(data.decode("utf-8"), "data:") output.generated_text = json.loads(body)["text_output"] output.success = True @@ -255,7 +255,7 @@ async def async_request_openai_completions( if not chunk: continue - chunk = chunk.decode("utf-8").lstrip("data: ") + chunk = remove_prefix(chunk.decode("utf-8"), "data: ") if chunk == "[DONE]": latency = time.perf_counter() - st else: @@ -322,7 +322,7 @@ async def async_request_openai_chat_completions( if not chunk: continue - chunk = chunk.decode("utf-8").lstrip("data: ") + chunk = remove_prefix(chunk.decode("utf-8"), "data: ") if chunk == "[DONE]": latency = time.perf_counter() - st else: @@ -344,6 +344,13 @@ async def async_request_openai_chat_completions( return output +# Since vllm must support Python 3.8, we can't use str.removeprefix(prefix) introduced in Python 3.9 +def remove_prefix(text: str, prefix: str) -> str: + if text.startswith(prefix): + return text[len(prefix):] + return text + + ASYNC_REQUEST_FUNCS = { "tgi": async_request_tgi, "vllm": async_request_vllm, diff --git a/pyproject.toml b/pyproject.toml index d6fa5d7a..e0a01215 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,8 +33,6 @@ ignore = [ "F405", "F403", # lambda expression assignment "E731", - # .strip() with multi-character strings - "B005", # Loop control variable not used within loop body "B007", ]