From 62ec38ea4148bb8147f346f7e01cab6b8a2ec7b6 Mon Sep 17 00:00:00 2001
From: Woosuk Kwon <woosuk.kwon@berkeley.edu>
Date: Fri, 2 Jun 2023 22:35:17 -0700
Subject: [PATCH] Document supported models (#127)

---
 cacheflow/entrypoints/llm.py            |  4 ++-
 docs/README.md                          |  3 +-
 docs/source/index.rst                   |  7 +++++
 docs/source/models/adding_model.rst     |  7 +++++
 docs/source/models/supported_models.rst | 40 +++++++++++++++++++++++++
 5 files changed, 58 insertions(+), 3 deletions(-)
 create mode 100644 docs/source/models/adding_model.rst
 create mode 100644 docs/source/models/supported_models.rst

diff --git a/cacheflow/entrypoints/llm.py b/cacheflow/entrypoints/llm.py
index 337231e3..7f2fdd7b 100644
--- a/cacheflow/entrypoints/llm.py
+++ b/cacheflow/entrypoints/llm.py
@@ -39,11 +39,13 @@ class LLM:
 
     def generate(
         self,
-        prompts: List[str],
+        prompts: Union[str, List[str]],
         sampling_params: Optional[SamplingParams] = None,
         prompt_token_ids: Optional[List[List[int]]] = None,
         use_tqdm: bool = True,
     ) -> List[RequestOutput]:
+        if isinstance(prompts, str):
+            prompts = [prompts]
         if sampling_params is None:
             # Use default sampling params.
             sampling_params = SamplingParams()
diff --git a/docs/README.md b/docs/README.md
index e1b4c6cd..a1d4203d 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -14,7 +14,6 @@ make html
 ## Open the docs with your brower
 
 ```bash
-cd build/html
-python -m http.server
+python -m http.server -d build/html/
 ```
 Launch your browser and open localhost:8000.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index d31498a8..3f8cd651 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -10,3 +10,10 @@ Documentation
 
    getting_started/installation
    getting_started/quickstart
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Models
+
+   models/supported_models
+   models/adding_model
diff --git a/docs/source/models/adding_model.rst b/docs/source/models/adding_model.rst
new file mode 100644
index 00000000..b7f7f4d6
--- /dev/null
+++ b/docs/source/models/adding_model.rst
@@ -0,0 +1,7 @@
+.. _adding_a_new_model:
+
+Adding a New Model
+==================
+
+
+Placeholder
diff --git a/docs/source/models/supported_models.rst b/docs/source/models/supported_models.rst
new file mode 100644
index 00000000..e73ae6bd
--- /dev/null
+++ b/docs/source/models/supported_models.rst
@@ -0,0 +1,40 @@
+.. _supported_models:
+
+Supported Models
+================
+
+CacheFlow supports a variety of generative Transformer models in `HuggingFace Transformers <https://github.com/huggingface/transformers>`_.
+The following is the list of model architectures that are currently supported by CacheFlow.
+Alongside each architecture, we include some popular models that use it.
+
+.. list-table::
+  :widths: 25 75
+  :header-rows: 1
+
+  * - Architecture
+    - Models
+  * - :code:`GPT2LMHeadModel`
+    - GPT-2
+  * - :code:`GPTNeoXForCausalLM`
+    - GPT-NeoX, Pythia, OpenAssistant, Dolly V2, StableLM
+  * - :code:`LlamaForCausalLM`
+    - LLaMA, Vicuna, Alpaca, Koala
+  * - :code:`OPTForCausalLM`
+    - OPT, OPT-IML
+
+If your model uses one of the above model architectures, you can seamlessly run your model with CacheFlow.
+Otherwise, please refer to :ref:`Adding a New Model <adding_a_new_model>` for instructions on how to implement support for your model.
+Alternatively, you can raise an issue on our `GitHub <https://github.com/WoosukKwon/cacheflow/issues>`_ project.
+
+.. tip::
+    The easiest way to check if your model is supported is to run the program below:
+
+    .. code-block:: python
+
+        from cacheflow import LLM
+
+        llm = LLM(model=...)  # Name or path of your model
+        output = llm.generate("Hello, my name is")
+        print(output)
+
+    If CacheFlow successfully generates text, it indicates that your model is supported.