From 4040256b5e4b2f8a3fd5f314d61c1d87171aff49 Mon Sep 17 00:00:00 2001 From: Tri Dao Date: Tue, 15 Nov 2022 14:10:36 -0800 Subject: [PATCH] Update pip install instructions, bump to 0.2 --- README.md | 11 ++++++++--- setup.py | 4 ++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 6f77216..8bff416 100644 --- a/README.md +++ b/README.md @@ -24,9 +24,14 @@ and experiment with. The notations in the Triton implementation are also closer to what's used in our paper. -## Alpha release (0.1). +## Beta release (0.2). -To compile (requiring CUDA 11, NVCC, and an Turing or Ampere GPU): +To install (requiring CUDA 11, NVCC, and an Turing or Ampere GPU): +```sh +pip install flash-attn +``` + +Alternatively you can compile from source: ``` python setup.py install ``` @@ -44,7 +49,7 @@ FlashAttention currently supports: 3. Head dimensions that are multiples of 8, up to 128 (e.g., 8, 16, 24, ..., 128). Head dim > 64 backward requires A100. Our tentative roadmap: -1. [Jun 2022] Make package pip-installable. +1. ~~[Jun 2022] Make package pip-installable~~[Done, thanks to lucidrains]. 2. ~~[Jun 2022] Support SM86 GPUs (e.g., RTX 3080, 3090)~~[Done]. 3. [Jun 2022] Refactor to use Cutlass. 4. ~~[Jun 2022] Support SM75 GPUs (e.g. T4)~~[Done]. diff --git a/setup.py b/setup.py index fa27226..722f011 100644 --- a/setup.py +++ b/setup.py @@ -152,7 +152,7 @@ ext_modules.append( setup( name="flash_attn", - version="0.1", + version="0.2", packages=find_packages( exclude=("build", "csrc", "include", "tests", "dist", "docs", "benchmarks", "flash_attn.egg-info",) ), @@ -164,7 +164,7 @@ setup( url="https://github.com/HazyResearch/flash-attention", classifiers=[ "Programming Language :: Python :: 3", - "License :: OSI Approved :: Apache Software License", + "License :: OSI Approved :: BSD License", "Operating System :: Unix", ], ext_modules=ext_modules,