From 62ec38ea4148bb8147f346f7e01cab6b8a2ec7b6 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Fri, 2 Jun 2023 22:35:17 -0700 Subject: [PATCH] Document supported models (#127) --- cacheflow/entrypoints/llm.py | 4 ++- docs/README.md | 3 +- docs/source/index.rst | 7 +++++ docs/source/models/adding_model.rst | 7 +++++ docs/source/models/supported_models.rst | 40 +++++++++++++++++++++++++ 5 files changed, 58 insertions(+), 3 deletions(-) create mode 100644 docs/source/models/adding_model.rst create mode 100644 docs/source/models/supported_models.rst diff --git a/cacheflow/entrypoints/llm.py b/cacheflow/entrypoints/llm.py index 337231e3..7f2fdd7b 100644 --- a/cacheflow/entrypoints/llm.py +++ b/cacheflow/entrypoints/llm.py @@ -39,11 +39,13 @@ class LLM: def generate( self, - prompts: List[str], + prompts: Union[str, List[str]], sampling_params: Optional[SamplingParams] = None, prompt_token_ids: Optional[List[List[int]]] = None, use_tqdm: bool = True, ) -> List[RequestOutput]: + if isinstance(prompts, str): + prompts = [prompts] if sampling_params is None: # Use default sampling params. sampling_params = SamplingParams() diff --git a/docs/README.md b/docs/README.md index e1b4c6cd..a1d4203d 100644 --- a/docs/README.md +++ b/docs/README.md @@ -14,7 +14,6 @@ make html ## Open the docs with your brower ```bash -cd build/html -python -m http.server +python -m http.server -d build/html/ ``` Launch your browser and open localhost:8000. diff --git a/docs/source/index.rst b/docs/source/index.rst index d31498a8..3f8cd651 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -10,3 +10,10 @@ Documentation getting_started/installation getting_started/quickstart + +.. toctree:: + :maxdepth: 1 + :caption: Models + + models/supported_models + models/adding_model diff --git a/docs/source/models/adding_model.rst b/docs/source/models/adding_model.rst new file mode 100644 index 00000000..b7f7f4d6 --- /dev/null +++ b/docs/source/models/adding_model.rst @@ -0,0 +1,7 @@ +.. _adding_a_new_model: + +Adding a New Model +================== + + +Placeholder diff --git a/docs/source/models/supported_models.rst b/docs/source/models/supported_models.rst new file mode 100644 index 00000000..e73ae6bd --- /dev/null +++ b/docs/source/models/supported_models.rst @@ -0,0 +1,40 @@ +.. _supported_models: + +Supported Models +================ + +CacheFlow supports a variety of generative Transformer models in `HuggingFace Transformers `_. +The following is the list of model architectures that are currently supported by CacheFlow. +Alongside each architecture, we include some popular models that use it. + +.. list-table:: + :widths: 25 75 + :header-rows: 1 + + * - Architecture + - Models + * - :code:`GPT2LMHeadModel` + - GPT-2 + * - :code:`GPTNeoXForCausalLM` + - GPT-NeoX, Pythia, OpenAssistant, Dolly V2, StableLM + * - :code:`LlamaForCausalLM` + - LLaMA, Vicuna, Alpaca, Koala + * - :code:`OPTForCausalLM` + - OPT, OPT-IML + +If your model uses one of the above model architectures, you can seamlessly run your model with CacheFlow. +Otherwise, please refer to :ref:`Adding a New Model ` for instructions on how to implement support for your model. +Alternatively, you can raise an issue on our `GitHub `_ project. + +.. tip:: + The easiest way to check if your model is supported is to run the program below: + + .. code-block:: python + + from cacheflow import LLM + + llm = LLM(model=...) # Name or path of your model + output = llm.generate("Hello, my name is") + print(output) + + If CacheFlow successfully generates text, it indicates that your model is supported.