fix python 3.8 syntax (#2716)

This commit is contained in:
Simon Mo 2024-02-01 14:00:58 -08:00 committed by GitHub
parent 923797fea4
commit b9e96b17de
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 21 additions and 8 deletions

View File

@ -4,8 +4,21 @@
#################### BASE BUILD IMAGE #################### #################### BASE BUILD IMAGE ####################
FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 AS dev FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 AS dev
# Set the DEBIAN_FRONTEND variable to noninteractive to avoid interactive prompts
ENV DEBIAN_FRONTEND=noninteractive
# Preconfigure tzdata for US Central Time (build running in us-central-1 but this really doesn't matter.)
RUN echo 'tzdata tzdata/Areas select America' | debconf-set-selections \
&& echo 'tzdata tzdata/Zones/America select Chicago' | debconf-set-selections
# We install an older version of python here for testing to make sure vllm works with older versions of Python.
# For the actual openai compatible server, we will use the latest version of Python.
RUN apt-get update -y \ RUN apt-get update -y \
&& apt-get install -y python3-pip git && apt-get install -y software-properties-common \
&& add-apt-repository ppa:deadsnakes/ppa -y \
&& apt-get update -y \
&& apt-get install -y python3.8 python3.8-dev python3.8-venv python3-pip git \
&& update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 1
# Workaround for https://github.com/openai/triton/issues/2507 and # Workaround for https://github.com/openai/triton/issues/2507 and
# https://github.com/pytorch/pytorch/issues/107960 -- hopefully # https://github.com/pytorch/pytorch/issues/107960 -- hopefully

View File

@ -1,7 +1,7 @@
import asyncio import asyncio
import time import time
from fastapi import Request from fastapi import Request
from typing import AsyncGenerator, AsyncIterator, Callable, List, Optional from typing import AsyncGenerator, AsyncIterator, Callable, List, Optional, Dict, Tuple
from vllm.logger import init_logger from vllm.logger import init_logger
from vllm.utils import random_uuid from vllm.utils import random_uuid
from vllm.engine.async_llm_engine import AsyncLLMEngine from vllm.engine.async_llm_engine import AsyncLLMEngine
@ -19,8 +19,8 @@ from vllm.entrypoints.openai.serving_engine import OpenAIServing
logger = init_logger(__name__) logger = init_logger(__name__)
TypeTokenIDs = list[int] TypeTokenIDs = List[int]
TypeTopLogProbs = List[Optional[dict[int, float]]] TypeTopLogProbs = List[Optional[Dict[int, float]]]
TypeCreateLogProbsFn = Callable[ TypeCreateLogProbsFn = Callable[
[TypeTokenIDs, TypeTopLogProbs, Optional[int], int], LogProbs] [TypeTokenIDs, TypeTopLogProbs, Optional[int], int], LogProbs]
@ -29,7 +29,7 @@ async def completion_stream_generator(
request: CompletionRequest, request: CompletionRequest,
raw_request: Request, raw_request: Request,
on_abort, on_abort,
result_generator: AsyncIterator[tuple[int, RequestOutput]], result_generator: AsyncIterator[Tuple[int, RequestOutput]],
create_logprobs_fn: TypeCreateLogProbsFn, create_logprobs_fn: TypeCreateLogProbsFn,
request_id: str, request_id: str,
created_time: int, created_time: int,
@ -126,7 +126,7 @@ async def completion_stream_generator(
yield "data: [DONE]\n\n" yield "data: [DONE]\n\n"
def parse_prompt_format(prompt) -> tuple[bool, list]: def parse_prompt_format(prompt) -> Tuple[bool, list]:
# get the prompt, openai supports the following # get the prompt, openai supports the following
# "a string, array of strings, array of tokens, or array of token arrays." # "a string, array of strings, array of tokens, or array of token arrays."
prompt_is_tokens = False prompt_is_tokens = False
@ -151,7 +151,7 @@ def parse_prompt_format(prompt) -> tuple[bool, list]:
def request_output_to_completion_response( def request_output_to_completion_response(
final_res_batch: list[RequestOutput], final_res_batch: List[RequestOutput],
request: CompletionRequest, request: CompletionRequest,
create_logprobs_fn: TypeCreateLogProbsFn, create_logprobs_fn: TypeCreateLogProbsFn,
request_id: str, request_id: str,
@ -302,7 +302,7 @@ class OpenAIServingCompletion(OpenAIServing):
except ValueError as e: except ValueError as e:
return self.create_error_response(str(e)) return self.create_error_response(str(e))
result_generator: AsyncIterator[tuple[ result_generator: AsyncIterator[Tuple[
int, RequestOutput]] = merge_async_iterators(*generators) int, RequestOutput]] = merge_async_iterators(*generators)
# Similar to the OpenAI API, when n != best_of, we do not stream the # Similar to the OpenAI API, when n != best_of, we do not stream the