From add4f0bc42e7d85c23ed20a64453f918f232039d Mon Sep 17 00:00:00 2001 From: Pierce Freeman Date: Tue, 30 May 2023 15:53:18 -0700 Subject: [PATCH 01/25] Scaffolding for wheel prototype --- .github/workflows/publish.yml | 71 ++++++++++++++++++++++++++--------- setup.py | 52 ++++++++++++++++++++++++- 2 files changed, 105 insertions(+), 18 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 72df605..a9bd229 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -10,7 +10,7 @@ on: - '**' jobs: - release: + setup_release: name: Create Release runs-on: ubuntu-latest steps: @@ -27,23 +27,27 @@ jobs: with: tag_name: ${{ steps.extract_branch.outputs.branch }} release_name: ${{ steps.extract_branch.outputs.branch }} - - wheel: + + build_wheels: name: Build Wheel runs-on: ${{ matrix.os }} - needs: release - + needs: setup_release + strategy: fail-fast: false matrix: - # os: [ubuntu-20.04] - os: [ubuntu-18.04] - python-version: ['3.7', '3.8', '3.9', '3.10'] - torch-version: [1.11.0, 1.12.0, 1.12.1] - cuda-version: ['113', '116'] - exclude: - - torch-version: 1.11.0 - cuda-version: '116' + # TODO: @pierce - again, simplify for prototyping + os: [ubuntu-20.04] + #os: [ubuntu-20.04, ubuntu-22.04] + # python-version: ['3.7', '3.8', '3.9', '3.10'] + python-version: ['3.10'] + #torch-version: [1.11.0, 1.12.0, 1.12.1] + torch-version: [1.12.1] + #cuda-version: ['113', '116'] + cuda-version: ['113'] + #exclude: + # - torch-version: 1.11.0 + # cuda-version: '116' steps: - name: Checkout @@ -108,13 +112,13 @@ jobs: export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH export CUDA_INSTALL_DIR=/usr/local/cuda-11.3$CUDA_INSTALL_DIR - pip install wheel + pip install ninja packaging setuptools wheel python setup.py bdist_wheel --dist-dir=dist tmpname=cu${{ matrix.cuda-version }}torch${{ matrix.torch-version }} wheel_name=$(ls dist/*whl | xargs -n 1 basename | sed "s/-/+$tmpname-/2") - ls dist/*whl |xargs -I {} mv {} ${wheel_name} + ls dist/*whl |xargs -I {} mv {} dist/${wheel_name} echo "wheel_name=${wheel_name}" >> $GITHUB_ENV - + - name: Upload Release Asset id: upload_release_asset uses: actions/upload-release-asset@v1 @@ -124,4 +128,37 @@ jobs: upload_url: ${{ steps.get_current_release.outputs.upload_url }} asset_path: ./${{env.wheel_name}} asset_name: ${{env.wheel_name}} - asset_content_type: application/* \ No newline at end of file + asset_content_type: application/* + + publish_package: + name: Publish package + needs: [build_wheels] + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - uses: actions/setup-python@v4 + with: + python-version: '3.10' + + - name: List contents + run: | + ls -la dist + ls -la dist/* + + - name: Install dependencies + run: | + pip install ninja packaging setuptools wheel twine + + - name: Build core package + run: | + python setup.py sdist --dist-dir=dist + + - name: Deploy + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} + run: | + python -m twine upload dist/* diff --git a/setup.py b/setup.py index 7597ea3..a5b63b1 100644 --- a/setup.py +++ b/setup.py @@ -10,6 +10,7 @@ from packaging.version import parse, Version from setuptools import setup, find_packages import subprocess +import urllib import torch from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDAExtension, CUDA_HOME @@ -22,6 +23,50 @@ with open("README.md", "r", encoding="utf-8") as fh: this_dir = os.path.dirname(os.path.abspath(__file__)) +def get_platform(): + """ + Returns the platform string. + """ + if sys.platform.startswith('linux'): + return 'linux_x86_64' + elif sys.platform == 'darwin': + return 'macosx_10_9_x86_64' + elif sys.platform == 'win32': + return 'win_amd64' + else: + raise ValueError('Unsupported platform: {}'.format(sys.platform)) + +from setuptools.command.install import install + +# @pierce - TODO: Remove for proper release +BASE_WHEEL_URL = "https://github.com/piercefreeman/flash-attention/releases/download/{tag_name}/{wheel_name}" + +class CustomInstallCommand(install): + def run(self): + # Determine the version numbers that will be used to determine the correct wheel + _, cuda_version = get_cuda_bare_metal_version() + torch_version = torch.__version__ + python_version = f"cp{sys.version_info.major}{sys.version_info.minor}" + platform_name = get_platform() + flash_version = get_package_version() + + # Determine wheel URL based on CUDA version, torch version, python version and OS + wheel_filename = f'flash_attn-{flash_version}+cu{cuda_version}torch{torch_version}-{python_version}-{python_version}-{platform_name}.whl' + wheel_url = BASE_WHEEL_URL.format( + tag_name=f"v{flash_version}", + wheel_name=wheel_filename + ) + + try: + urllib.request.urlretrieve(wheel_url, wheel_filename) + os.system(f'pip install {wheel_filename}') + os.remove(wheel_filename) + except urllib.error.HTTPError: + print("Precompiled wheel not found. Building from source...") + # If the wheel could not be downloaded, build from source + install.run(self) + + def get_cuda_bare_metal_version(cuda_dir): raw_output = subprocess.check_output([cuda_dir + "/bin/nvcc", "-V"], universal_newlines=True) output = raw_output.split() @@ -190,7 +235,12 @@ setup( "Operating System :: Unix", ], ext_modules=ext_modules, - cmdclass={"build_ext": BuildExtension} if ext_modules else {}, + cmdclass={ + 'install': CustomInstallCommand, + "build_ext": BuildExtension + } if ext_modules else { + 'install': CustomInstallCommand, + }, python_requires=">=3.7", install_requires=[ "torch", From e1faefce9de958fa64747edef823a0779392b027 Mon Sep 17 00:00:00 2001 From: Pierce Freeman Date: Fri, 2 Jun 2023 13:20:39 -0700 Subject: [PATCH 02/25] Raise cuda error on build --- .github/workflows/publish.yml | 2 +- setup.py | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index a9bd229..a0244f8 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -126,7 +126,7 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: upload_url: ${{ steps.get_current_release.outputs.upload_url }} - asset_path: ./${{env.wheel_name}} + asset_path: ./dist/${{env.wheel_name}} asset_name: ${{env.wheel_name}} asset_content_type: application/* diff --git a/setup.py b/setup.py index a5b63b1..91a37ce 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,8 @@ from packaging.version import parse, Version from setuptools import setup, find_packages import subprocess -import urllib +import urllib.request +import urllib.error import torch from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDAExtension, CUDA_HOME @@ -43,8 +44,10 @@ BASE_WHEEL_URL = "https://github.com/piercefreeman/flash-attention/releases/down class CustomInstallCommand(install): def run(self): + raise_if_cuda_home_none("flash_attn") + # Determine the version numbers that will be used to determine the correct wheel - _, cuda_version = get_cuda_bare_metal_version() + _, cuda_version = get_cuda_bare_metal_version(CUDA_HOME) torch_version = torch.__version__ python_version = f"cp{sys.version_info.major}{sys.version_info.minor}" platform_name = get_platform() @@ -64,7 +67,10 @@ class CustomInstallCommand(install): except urllib.error.HTTPError: print("Precompiled wheel not found. Building from source...") # If the wheel could not be downloaded, build from source - install.run(self) + #install.run(self) + raise ValueError + + raise ValueError def get_cuda_bare_metal_version(cuda_dir): From 0e7769c813fcd2b04882a9cd7e13945002a903d3 Mon Sep 17 00:00:00 2001 From: Pierce Freeman Date: Fri, 2 Jun 2023 14:41:07 -0700 Subject: [PATCH 03/25] Guessing wheel URL --- setup.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index 91a37ce..7581d74 100644 --- a/setup.py +++ b/setup.py @@ -47,18 +47,22 @@ class CustomInstallCommand(install): raise_if_cuda_home_none("flash_attn") # Determine the version numbers that will be used to determine the correct wheel - _, cuda_version = get_cuda_bare_metal_version(CUDA_HOME) + _, cuda_version_raw = get_cuda_bare_metal_version(CUDA_HOME) torch_version = torch.__version__ python_version = f"cp{sys.version_info.major}{sys.version_info.minor}" platform_name = get_platform() flash_version = get_package_version() + cuda_version = f"{cuda_version_raw.major}{cuda_version_raw.minor}" # Determine wheel URL based on CUDA version, torch version, python version and OS wheel_filename = f'flash_attn-{flash_version}+cu{cuda_version}torch{torch_version}-{python_version}-{python_version}-{platform_name}.whl' wheel_url = BASE_WHEEL_URL.format( - tag_name=f"v{flash_version}", + #tag_name=f"v{flash_version}", + # HACK + tag_name=f"v0.0.3", wheel_name=wheel_filename ) + print("Guessing wheel URL: ", wheel_url) try: urllib.request.urlretrieve(wheel_url, wheel_filename) @@ -70,8 +74,6 @@ class CustomInstallCommand(install): #install.run(self) raise ValueError - raise ValueError - def get_cuda_bare_metal_version(cuda_dir): raw_output = subprocess.check_output([cuda_dir + "/bin/nvcc", "-V"], universal_newlines=True) From dab99053e46c32f394fee40c6d8627f302566b9f Mon Sep 17 00:00:00 2001 From: Pierce Freeman Date: Fri, 2 Jun 2023 14:52:31 -0700 Subject: [PATCH 04/25] Bump build to use 116 for testing --- .github/workflows/publish.yml | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index a0244f8..4f62194 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -44,7 +44,8 @@ jobs: #torch-version: [1.11.0, 1.12.0, 1.12.1] torch-version: [1.12.1] #cuda-version: ['113', '116'] - cuda-version: ['113'] + #cuda-version: ['113'] + cuda-version: ['116'] #exclude: # - torch-version: 1.11.0 # cuda-version: '116' @@ -143,11 +144,6 @@ jobs: with: python-version: '3.10' - - name: List contents - run: | - ls -la dist - ls -la dist/* - - name: Install dependencies run: | pip install ninja packaging setuptools wheel twine From 5e4699782a8734f871bee1f628b55d25c05a46a5 Mon Sep 17 00:00:00 2001 From: Pierce Freeman Date: Fri, 2 Jun 2023 15:58:36 -0700 Subject: [PATCH 05/25] Allow fallback install --- .github/workflows/publish.yml | 1 + setup.py | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 4f62194..dad5d7d 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -109,6 +109,7 @@ jobs: - name: Build wheel run: | + export FLASH_ATTENTION_FORCE_BUILD="TRUE" export FORCE_CUDA="1" export PATH=/usr/local/nvidia/bin:/usr/local/nvidia/lib64:$PATH export LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:$LD_LIBRARY_PATH diff --git a/setup.py b/setup.py index 7581d74..e0fcddd 100644 --- a/setup.py +++ b/setup.py @@ -44,6 +44,9 @@ BASE_WHEEL_URL = "https://github.com/piercefreeman/flash-attention/releases/down class CustomInstallCommand(install): def run(self): + if os.getenv("FLASH_ATTENTION_FORCE_BUILD", "FALSE") == "TRUE": + return install.run(self) + raise_if_cuda_home_none("flash_attn") # Determine the version numbers that will be used to determine the correct wheel @@ -59,7 +62,7 @@ class CustomInstallCommand(install): wheel_url = BASE_WHEEL_URL.format( #tag_name=f"v{flash_version}", # HACK - tag_name=f"v0.0.3", + tag_name=f"v0.0.5", wheel_name=wheel_filename ) print("Guessing wheel URL: ", wheel_url) From 9fc9820a5bf0eb851b79388908f43a70affbe296 Mon Sep 17 00:00:00 2001 From: Pierce Freeman Date: Fri, 2 Jun 2023 18:02:24 -0700 Subject: [PATCH 06/25] Strip cuda name from torch version --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e0fcddd..ff718ef 100644 --- a/setup.py +++ b/setup.py @@ -51,11 +51,12 @@ class CustomInstallCommand(install): # Determine the version numbers that will be used to determine the correct wheel _, cuda_version_raw = get_cuda_bare_metal_version(CUDA_HOME) - torch_version = torch.__version__ + torch_version_raw = parse(torch.__version__) python_version = f"cp{sys.version_info.major}{sys.version_info.minor}" platform_name = get_platform() flash_version = get_package_version() cuda_version = f"{cuda_version_raw.major}{cuda_version_raw.minor}" + torch_version = f"{torch_version_raw.major}.{torch_version_raw.minor}.{torch_version_raw.micro}" # Determine wheel URL based on CUDA version, torch version, python version and OS wheel_filename = f'flash_attn-{flash_version}+cu{cuda_version}torch{torch_version}-{python_version}-{python_version}-{platform_name}.whl' From ea2ed8862341767d1bb7d82bff3cbd27c9740784 Mon Sep 17 00:00:00 2001 From: Pierce Freeman Date: Fri, 2 Jun 2023 18:22:44 -0700 Subject: [PATCH 07/25] Refactor and clean of setup.py --- .github/workflows/publish.yml | 2 + setup.py | 242 ++++++++++++++++++---------------- 2 files changed, 131 insertions(+), 113 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index dad5d7d..3e74449 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -150,6 +150,8 @@ jobs: pip install ninja packaging setuptools wheel twine - name: Build core package + env: + FLASH_ATTENTION_SKIP_CUDA_BUILD: "TRUE" run: | python setup.py sdist --dist-dir=dist diff --git a/setup.py b/setup.py index ff718ef..cf8a7ef 100644 --- a/setup.py +++ b/setup.py @@ -6,8 +6,10 @@ import re import ast from pathlib import Path from packaging.version import parse, Version +import platform from setuptools import setup, find_packages +from setuptools.command.install import install import subprocess import urllib.request @@ -24,60 +26,29 @@ with open("README.md", "r", encoding="utf-8") as fh: this_dir = os.path.dirname(os.path.abspath(__file__)) +# @pierce - TODO: Update for proper release +BASE_WHEEL_URL = "https://github.com/piercefreeman/flash-attention/releases/download/{tag_name}/{wheel_name}" + +# FORCE_BUILD: Force a fresh build locally, instead of attempting to find prebuilt wheels +# SKIP_CUDA_BUILD: Intended to allow CI to use a simple `python setup.py sdist` run to copy over raw files, without any cuda compilation +FORCE_BUILD = os.getenv("FLASH_ATTENTION_FORCE_BUILD", "FALSE") == "TRUE" +SKIP_CUDA_BUILD = os.getenv("FLASH_ATTENTION_SKIP_CUDA_BUILD", "FALSE") == "TRUE" + + def get_platform(): """ - Returns the platform string. + Returns the platform name as used in wheel filenames. """ if sys.platform.startswith('linux'): return 'linux_x86_64' elif sys.platform == 'darwin': - return 'macosx_10_9_x86_64' + mac_version = '.'.join(platform.mac_ver()[0].split('.')[:2]) + return f'macosx_{mac_version}_x86_64' elif sys.platform == 'win32': return 'win_amd64' else: raise ValueError('Unsupported platform: {}'.format(sys.platform)) -from setuptools.command.install import install - -# @pierce - TODO: Remove for proper release -BASE_WHEEL_URL = "https://github.com/piercefreeman/flash-attention/releases/download/{tag_name}/{wheel_name}" - -class CustomInstallCommand(install): - def run(self): - if os.getenv("FLASH_ATTENTION_FORCE_BUILD", "FALSE") == "TRUE": - return install.run(self) - - raise_if_cuda_home_none("flash_attn") - - # Determine the version numbers that will be used to determine the correct wheel - _, cuda_version_raw = get_cuda_bare_metal_version(CUDA_HOME) - torch_version_raw = parse(torch.__version__) - python_version = f"cp{sys.version_info.major}{sys.version_info.minor}" - platform_name = get_platform() - flash_version = get_package_version() - cuda_version = f"{cuda_version_raw.major}{cuda_version_raw.minor}" - torch_version = f"{torch_version_raw.major}.{torch_version_raw.minor}.{torch_version_raw.micro}" - - # Determine wheel URL based on CUDA version, torch version, python version and OS - wheel_filename = f'flash_attn-{flash_version}+cu{cuda_version}torch{torch_version}-{python_version}-{python_version}-{platform_name}.whl' - wheel_url = BASE_WHEEL_URL.format( - #tag_name=f"v{flash_version}", - # HACK - tag_name=f"v0.0.5", - wheel_name=wheel_filename - ) - print("Guessing wheel URL: ", wheel_url) - - try: - urllib.request.urlretrieve(wheel_url, wheel_filename) - os.system(f'pip install {wheel_filename}') - os.remove(wheel_filename) - except urllib.error.HTTPError: - print("Precompiled wheel not found. Building from source...") - # If the wheel could not be downloaded, build from source - #install.run(self) - raise ValueError - def get_cuda_bare_metal_version(cuda_dir): raw_output = subprocess.check_output([cuda_dir + "/bin/nvcc", "-V"], universal_newlines=True) @@ -147,77 +118,77 @@ if not torch.cuda.is_available(): else: os.environ["TORCH_CUDA_ARCH_LIST"] = "6.0;6.1;6.2;7.0;7.5" - -print("\n\ntorch.__version__ = {}\n\n".format(torch.__version__)) -TORCH_MAJOR = int(torch.__version__.split(".")[0]) -TORCH_MINOR = int(torch.__version__.split(".")[1]) - cmdclass = {} ext_modules = [] -# Check, if ATen/CUDAGeneratorImpl.h is found, otherwise use ATen/cuda/CUDAGeneratorImpl.h -# See https://github.com/pytorch/pytorch/pull/70650 -generator_flag = [] -torch_dir = torch.__path__[0] -if os.path.exists(os.path.join(torch_dir, "include", "ATen", "CUDAGeneratorImpl.h")): - generator_flag = ["-DOLD_GENERATOR_PATH"] +if not SKIP_CUDA_BUILD: + print("\n\ntorch.__version__ = {}\n\n".format(torch.__version__)) + TORCH_MAJOR = int(torch.__version__.split(".")[0]) + TORCH_MINOR = int(torch.__version__.split(".")[1]) -raise_if_cuda_home_none("flash_attn") -# Check, if CUDA11 is installed for compute capability 8.0 -cc_flag = [] -_, bare_metal_version = get_cuda_bare_metal_version(CUDA_HOME) -if bare_metal_version < Version("11.0"): - raise RuntimeError("FlashAttention is only supported on CUDA 11 and above") -cc_flag.append("-gencode") -cc_flag.append("arch=compute_75,code=sm_75") -cc_flag.append("-gencode") -cc_flag.append("arch=compute_80,code=sm_80") -if bare_metal_version >= Version("11.8"): + # Check, if ATen/CUDAGeneratorImpl.h is found, otherwise use ATen/cuda/CUDAGeneratorImpl.h + # See https://github.com/pytorch/pytorch/pull/70650 + generator_flag = [] + torch_dir = torch.__path__[0] + if os.path.exists(os.path.join(torch_dir, "include", "ATen", "CUDAGeneratorImpl.h")): + generator_flag = ["-DOLD_GENERATOR_PATH"] + + raise_if_cuda_home_none("flash_attn") + # Check, if CUDA11 is installed for compute capability 8.0 + cc_flag = [] + _, bare_metal_version = get_cuda_bare_metal_version(CUDA_HOME) + if bare_metal_version < Version("11.0"): + raise RuntimeError("FlashAttention is only supported on CUDA 11 and above") cc_flag.append("-gencode") - cc_flag.append("arch=compute_90,code=sm_90") + cc_flag.append("arch=compute_75,code=sm_75") + cc_flag.append("-gencode") + cc_flag.append("arch=compute_80,code=sm_80") + if bare_metal_version >= Version("11.8"): + cc_flag.append("-gencode") + cc_flag.append("arch=compute_90,code=sm_90") -subprocess.run(["git", "submodule", "update", "--init", "csrc/flash_attn/cutlass"]) -ext_modules.append( - CUDAExtension( - name="flash_attn_cuda", - sources=[ - "csrc/flash_attn/fmha_api.cpp", - "csrc/flash_attn/src/fmha_fwd_hdim32.cu", - "csrc/flash_attn/src/fmha_fwd_hdim64.cu", - "csrc/flash_attn/src/fmha_fwd_hdim128.cu", - "csrc/flash_attn/src/fmha_bwd_hdim32.cu", - "csrc/flash_attn/src/fmha_bwd_hdim64.cu", - "csrc/flash_attn/src/fmha_bwd_hdim128.cu", - "csrc/flash_attn/src/fmha_block_fprop_fp16_kernel.sm80.cu", - "csrc/flash_attn/src/fmha_block_dgrad_fp16_kernel_loop.sm80.cu", - ], - extra_compile_args={ - "cxx": ["-O3", "-std=c++17"] + generator_flag, - "nvcc": append_nvcc_threads( - [ - "-O3", - "-std=c++17", - "-U__CUDA_NO_HALF_OPERATORS__", - "-U__CUDA_NO_HALF_CONVERSIONS__", - "-U__CUDA_NO_HALF2_OPERATORS__", - "-U__CUDA_NO_BFLOAT16_CONVERSIONS__", - "--expt-relaxed-constexpr", - "--expt-extended-lambda", - "--use_fast_math", - "--ptxas-options=-v", - "-lineinfo" - ] - + generator_flag - + cc_flag - ), - }, - include_dirs=[ - Path(this_dir) / 'csrc' / 'flash_attn', - Path(this_dir) / 'csrc' / 'flash_attn' / 'src', - Path(this_dir) / 'csrc' / 'flash_attn' / 'cutlass' / 'include', - ], + subprocess.run(["git", "submodule", "update", "--init", "csrc/flash_attn/cutlass"]) + ext_modules.append( + CUDAExtension( + name="flash_attn_cuda", + sources=[ + "csrc/flash_attn/fmha_api.cpp", + "csrc/flash_attn/src/fmha_fwd_hdim32.cu", + "csrc/flash_attn/src/fmha_fwd_hdim64.cu", + "csrc/flash_attn/src/fmha_fwd_hdim128.cu", + "csrc/flash_attn/src/fmha_bwd_hdim32.cu", + "csrc/flash_attn/src/fmha_bwd_hdim64.cu", + "csrc/flash_attn/src/fmha_bwd_hdim128.cu", + "csrc/flash_attn/src/fmha_block_fprop_fp16_kernel.sm80.cu", + "csrc/flash_attn/src/fmha_block_dgrad_fp16_kernel_loop.sm80.cu", + ], + extra_compile_args={ + "cxx": ["-O3", "-std=c++17"] + generator_flag, + "nvcc": append_nvcc_threads( + [ + "-O3", + "-std=c++17", + "-U__CUDA_NO_HALF_OPERATORS__", + "-U__CUDA_NO_HALF_CONVERSIONS__", + "-U__CUDA_NO_HALF2_OPERATORS__", + "-U__CUDA_NO_BFLOAT16_CONVERSIONS__", + "--expt-relaxed-constexpr", + "--expt-extended-lambda", + "--use_fast_math", + "--ptxas-options=-v", + "-lineinfo" + ] + + generator_flag + + cc_flag + ), + }, + include_dirs=[ + Path(this_dir) / 'csrc' / 'flash_attn', + Path(this_dir) / 'csrc' / 'flash_attn' / 'src', + Path(this_dir) / 'csrc' / 'flash_attn' / 'cutlass' / 'include', + ], + ) ) -) def get_package_version(): with open(Path(this_dir) / "flash_attn" / "__init__.py", "r") as f: @@ -229,18 +200,63 @@ def get_package_version(): else: return str(public_version) + +class CachedWheelsCommand(install): + """ + Installer hook to scan for existing wheels that match the current platform environment. + Falls back to building from source if no wheel is found. + + """ + def run(self): + if FORCE_BUILD: + return install.run(self) + + raise_if_cuda_home_none("flash_attn") + + # Determine the version numbers that will be used to determine the correct wheel + _, cuda_version_raw = get_cuda_bare_metal_version(CUDA_HOME) + torch_version_raw = parse(torch.__version__) + python_version = f"cp{sys.version_info.major}{sys.version_info.minor}" + platform_name = get_platform() + flash_version = get_package_version() + cuda_version = f"{cuda_version_raw.major}{cuda_version_raw.minor}" + torch_version = f"{torch_version_raw.major}.{torch_version_raw.minor}.{torch_version_raw.micro}" + + # Determine wheel URL based on CUDA version, torch version, python version and OS + wheel_filename = f'flash_attn-{flash_version}+cu{cuda_version}torch{torch_version}-{python_version}-{python_version}-{platform_name}.whl' + wheel_url = BASE_WHEEL_URL.format( + tag_name=f"v{flash_version}", + wheel_name=wheel_filename + ) + print("Guessing wheel URL: ", wheel_url) + + try: + urllib.request.urlretrieve(wheel_url, wheel_filename) + os.system(f'pip install {wheel_filename}') + os.remove(wheel_filename) + except urllib.error.HTTPError: + print("Precompiled wheel not found. Building from source...") + # If the wheel could not be downloaded, build from source + install.run(self) + + setup( - name="flash_attn", + # @pierce - TODO: Revert for official release + name="flash_attn_wheels", version=get_package_version(), packages=find_packages( exclude=("build", "csrc", "include", "tests", "dist", "docs", "benchmarks", "flash_attn.egg-info",) ), - author="Tri Dao", - author_email="trid@stanford.edu", + #author="Tri Dao", + #author_email="trid@stanford.edu", + # @pierce - TODO: Revert for official release + author="Pierce Freeman", + author_email="pierce@freeman.vc", description="Flash Attention: Fast and Memory-Efficient Exact Attention", long_description=long_description, long_description_content_type="text/markdown", - url="https://github.com/HazyResearch/flash-attention", + #url="https://github.com/HazyResearch/flash-attention", + url="https://github.com/piercefreeman/flash-attention", classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: BSD License", @@ -248,10 +264,10 @@ setup( ], ext_modules=ext_modules, cmdclass={ - 'install': CustomInstallCommand, + 'install': CachedWheelsCommand, "build_ext": BuildExtension } if ext_modules else { - 'install': CustomInstallCommand, + 'install': CachedWheelsCommand, }, python_requires=">=3.7", install_requires=[ From cd0c169eeef47eba8d67c0717bec19f6484739b0 Mon Sep 17 00:00:00 2001 From: Pierce Freeman Date: Fri, 2 Jun 2023 18:28:00 -0700 Subject: [PATCH 08/25] Restore full build matrix --- .github/workflows/publish.yml | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 3e74449..f74ef75 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -36,19 +36,13 @@ jobs: strategy: fail-fast: false matrix: - # TODO: @pierce - again, simplify for prototyping - os: [ubuntu-20.04] - #os: [ubuntu-20.04, ubuntu-22.04] - # python-version: ['3.7', '3.8', '3.9', '3.10'] - python-version: ['3.10'] - #torch-version: [1.11.0, 1.12.0, 1.12.1] - torch-version: [1.12.1] - #cuda-version: ['113', '116'] - #cuda-version: ['113'] - cuda-version: ['116'] - #exclude: - # - torch-version: 1.11.0 - # cuda-version: '116' + os: [ubuntu-20.04, ubuntu-22.04] + python-version: ['3.7', '3.8', '3.9', '3.10'] + torch-version: [1.11.0, 1.12.0, 1.12.1] + cuda-version: ['113', '116'] + exclude: + - torch-version: 1.11.0 + cuda-version: '116' steps: - name: Checkout From a682252be78e09f55925e36775ff5818a26b5172 Mon Sep 17 00:00:00 2001 From: Pierce Freeman Date: Fri, 2 Jun 2023 18:47:25 -0700 Subject: [PATCH 09/25] OS version build numbers --- .github/workflows/cuda/cu102-Linux.sh | 4 +++- .github/workflows/cuda/cu113-Linux.sh | 8 +++++++- .github/workflows/cuda/cu116-Linux.sh | 4 +++- .github/workflows/publish.yml | 17 +++++++++++------ 4 files changed, 24 insertions(+), 9 deletions(-) diff --git a/.github/workflows/cuda/cu102-Linux.sh b/.github/workflows/cuda/cu102-Linux.sh index 46fb053..82729ad 100644 --- a/.github/workflows/cuda/cu102-Linux.sh +++ b/.github/workflows/cuda/cu102-Linux.sh @@ -1,6 +1,8 @@ #!/bin/bash -OS=ubuntu1804 +# Strip the periods from the version number +OS_VERSION=$(echo $OS_VERSION | tr -d .) +OS=ubuntu${OS_VERSION} wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 diff --git a/.github/workflows/cuda/cu113-Linux.sh b/.github/workflows/cuda/cu113-Linux.sh index b89a7fb..65e6d39 100644 --- a/.github/workflows/cuda/cu113-Linux.sh +++ b/.github/workflows/cuda/cu113-Linux.sh @@ -1,11 +1,17 @@ #!/bin/bash -OS=ubuntu1804 +# Strip the periods from the version number +OS_VERSION=$(echo $OS_VERSION | tr -d .) +OS=ubuntu${OS_VERSION} wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/11.3.0/local_installers/cuda-repo-${OS}-11-3-local_11.3.0-465.19.01-1_amd64.deb sudo dpkg -i cuda-repo-${OS}-11-3-local_11.3.0-465.19.01-1_amd64.deb + +# TODO: If on version < 22.04, install via signal-desktop-keyring +# For future versions it's deprecated and should be moved into the trusted folder +# sudo mv /var/cuda-repo-${OS}-11-3-local/7fa2af80.pub /etc/apt/trusted.gpg.d/ sudo apt-key add /var/cuda-repo-${OS}-11-3-local/7fa2af80.pub sudo apt-get -qq update diff --git a/.github/workflows/cuda/cu116-Linux.sh b/.github/workflows/cuda/cu116-Linux.sh index e3e4e2a..c49f604 100644 --- a/.github/workflows/cuda/cu116-Linux.sh +++ b/.github/workflows/cuda/cu116-Linux.sh @@ -1,6 +1,8 @@ #!/bin/bash -OS=ubuntu1804 +# Strip the periods from the version number +OS_VERSION=$(echo $OS_VERSION | tr -d .) +OS=ubuntu${OS_VERSION} wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index f74ef75..36e990a 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -37,12 +37,15 @@ jobs: fail-fast: false matrix: os: [ubuntu-20.04, ubuntu-22.04] - python-version: ['3.7', '3.8', '3.9', '3.10'] - torch-version: [1.11.0, 1.12.0, 1.12.1] - cuda-version: ['113', '116'] - exclude: - - torch-version: 1.11.0 - cuda-version: '116' + #python-version: ['3.7', '3.8', '3.9', '3.10'] + python-version: ['3.10'] + torch-version: [1.11.0] + cuda-version: ['113'] + #torch-version: [1.11.0, 1.12.0, 1.12.1] + #cuda-version: ['113', '116'] + #exclude: + # - torch-version: 1.11.0 + # cuda-version: '116' steps: - name: Checkout @@ -65,6 +68,8 @@ jobs: - name: Install CUDA ${{ matrix.cuda-version }} if: ${{ matrix.cuda-version != 'cpu' }} + env: + OS_VERSION: ${{ runner.release }} run: | bash .github/workflows/cuda/cu${{ matrix.cuda-version }}-${{ runner.os }}.sh shell: From 2dadfdbbcab2edc6a56b068a8cedc73c8324aacc Mon Sep 17 00:00:00 2001 From: Pierce Freeman Date: Fri, 2 Jun 2023 18:48:02 -0700 Subject: [PATCH 10/25] Temp disable deploy --- .github/workflows/publish.yml | 46 +++++++++++++++++------------------ 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 36e990a..c394348 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -131,32 +131,32 @@ jobs: asset_name: ${{env.wheel_name}} asset_content_type: application/* - publish_package: - name: Publish package - needs: [build_wheels] + # publish_package: + # name: Publish package + # needs: [build_wheels] - runs-on: ubuntu-latest + # runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 + # steps: + # - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: '3.10' + # - uses: actions/setup-python@v4 + # with: + # python-version: '3.10' - - name: Install dependencies - run: | - pip install ninja packaging setuptools wheel twine + # - name: Install dependencies + # run: | + # pip install ninja packaging setuptools wheel twine - - name: Build core package - env: - FLASH_ATTENTION_SKIP_CUDA_BUILD: "TRUE" - run: | - python setup.py sdist --dist-dir=dist + # - name: Build core package + # env: + # FLASH_ATTENTION_SKIP_CUDA_BUILD: "TRUE" + # run: | + # python setup.py sdist --dist-dir=dist - - name: Deploy - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} - run: | - python -m twine upload dist/* + # - name: Deploy + # env: + # TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + # TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} + # run: | + # python -m twine upload dist/* From 061470ae58220a189272e72995a4a206f7447d39 Mon Sep 17 00:00:00 2001 From: Pierce Freeman Date: Fri, 2 Jun 2023 18:59:09 -0700 Subject: [PATCH 11/25] echo OS version --- .github/workflows/cuda/cu102-Linux.sh | 2 ++ .github/workflows/cuda/cu113-Linux.sh | 2 ++ .github/workflows/cuda/cu116-Linux.sh | 2 ++ 3 files changed, 6 insertions(+) diff --git a/.github/workflows/cuda/cu102-Linux.sh b/.github/workflows/cuda/cu102-Linux.sh index 82729ad..ac38052 100644 --- a/.github/workflows/cuda/cu102-Linux.sh +++ b/.github/workflows/cuda/cu102-Linux.sh @@ -1,5 +1,7 @@ #!/bin/bash +echo $OS_VERSION + # Strip the periods from the version number OS_VERSION=$(echo $OS_VERSION | tr -d .) OS=ubuntu${OS_VERSION} diff --git a/.github/workflows/cuda/cu113-Linux.sh b/.github/workflows/cuda/cu113-Linux.sh index 65e6d39..0518a09 100644 --- a/.github/workflows/cuda/cu113-Linux.sh +++ b/.github/workflows/cuda/cu113-Linux.sh @@ -1,5 +1,7 @@ #!/bin/bash +echo $OS_VERSION + # Strip the periods from the version number OS_VERSION=$(echo $OS_VERSION | tr -d .) OS=ubuntu${OS_VERSION} diff --git a/.github/workflows/cuda/cu116-Linux.sh b/.github/workflows/cuda/cu116-Linux.sh index c49f604..d717ab4 100644 --- a/.github/workflows/cuda/cu116-Linux.sh +++ b/.github/workflows/cuda/cu116-Linux.sh @@ -1,5 +1,7 @@ #!/bin/bash +echo $OS_VERSION + # Strip the periods from the version number OS_VERSION=$(echo $OS_VERSION | tr -d .) OS=ubuntu${OS_VERSION} From 18e100d312b9fe04079d993aebb2b68dd145daa3 Mon Sep 17 00:00:00 2001 From: Pierce Freeman Date: Fri, 2 Jun 2023 19:01:44 -0700 Subject: [PATCH 12/25] Release is actually unsupported --- .github/workflows/cuda/cu102-Linux.sh | 4 +--- .github/workflows/cuda/cu113-Linux.sh | 4 +--- .github/workflows/cuda/cu116-Linux.sh | 4 +--- .github/workflows/publish.yml | 2 -- 4 files changed, 3 insertions(+), 11 deletions(-) diff --git a/.github/workflows/cuda/cu102-Linux.sh b/.github/workflows/cuda/cu102-Linux.sh index ac38052..ada39d5 100644 --- a/.github/workflows/cuda/cu102-Linux.sh +++ b/.github/workflows/cuda/cu102-Linux.sh @@ -1,9 +1,7 @@ #!/bin/bash -echo $OS_VERSION - # Strip the periods from the version number -OS_VERSION=$(echo $OS_VERSION | tr -d .) +OS_VERSION=$(echo $(lsb_release -sr) | tr -d .) OS=ubuntu${OS_VERSION} wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin diff --git a/.github/workflows/cuda/cu113-Linux.sh b/.github/workflows/cuda/cu113-Linux.sh index 0518a09..0b804d9 100644 --- a/.github/workflows/cuda/cu113-Linux.sh +++ b/.github/workflows/cuda/cu113-Linux.sh @@ -1,9 +1,7 @@ #!/bin/bash -echo $OS_VERSION - # Strip the periods from the version number -OS_VERSION=$(echo $OS_VERSION | tr -d .) +OS_VERSION=$(echo $(lsb_release -sr) | tr -d .) OS=ubuntu${OS_VERSION} wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin diff --git a/.github/workflows/cuda/cu116-Linux.sh b/.github/workflows/cuda/cu116-Linux.sh index d717ab4..68e9ed4 100644 --- a/.github/workflows/cuda/cu116-Linux.sh +++ b/.github/workflows/cuda/cu116-Linux.sh @@ -1,9 +1,7 @@ #!/bin/bash -echo $OS_VERSION - # Strip the periods from the version number -OS_VERSION=$(echo $OS_VERSION | tr -d .) +OS_VERSION=$(echo $(lsb_release -sr) | tr -d .) OS=ubuntu${OS_VERSION} wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index c394348..0fc0281 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -68,8 +68,6 @@ jobs: - name: Install CUDA ${{ matrix.cuda-version }} if: ${{ matrix.cuda-version != 'cpu' }} - env: - OS_VERSION: ${{ runner.release }} run: | bash .github/workflows/cuda/cu${{ matrix.cuda-version }}-${{ runner.os }}.sh shell: From a372e2be1bd970956bd9b2b8e84f23b7e86e2a4a Mon Sep 17 00:00:00 2001 From: Pierce Freeman Date: Fri, 2 Jun 2023 19:19:49 -0700 Subject: [PATCH 13/25] Add CUDA 11.7 --- .github/workflows/cuda/cu116-Linux.sh | 1 + .github/workflows/cuda/cu117-Linux-env.sh | 9 +++++++++ .github/workflows/cuda/cu117-Linux.sh | 18 ++++++++++++++++++ .github/workflows/publish.yml | 11 +++++++---- 4 files changed, 35 insertions(+), 4 deletions(-) create mode 100644 .github/workflows/cuda/cu117-Linux-env.sh create mode 100644 .github/workflows/cuda/cu117-Linux.sh diff --git a/.github/workflows/cuda/cu116-Linux.sh b/.github/workflows/cuda/cu116-Linux.sh index 68e9ed4..f6ebbe3 100644 --- a/.github/workflows/cuda/cu116-Linux.sh +++ b/.github/workflows/cuda/cu116-Linux.sh @@ -7,6 +7,7 @@ OS=ubuntu${OS_VERSION} wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 wget -nv https://developer.download.nvidia.com/compute/cuda/11.6.2/local_installers/cuda-repo-${OS}-11-6-local_11.6.2-510.47.03-1_amd64.deb + sudo dpkg -i cuda-repo-${OS}-11-6-local_11.6.2-510.47.03-1_amd64.deb sudo apt-key add /var/cuda-repo-${OS}-11-6-local/7fa2af80.pub diff --git a/.github/workflows/cuda/cu117-Linux-env.sh b/.github/workflows/cuda/cu117-Linux-env.sh new file mode 100644 index 0000000..ab432d1 --- /dev/null +++ b/.github/workflows/cuda/cu117-Linux-env.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +CUDA_HOME=/usr/local/cuda-11.7 +LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} +PATH=${CUDA_HOME}/bin:${PATH} + +export FORCE_CUDA=1 +export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" +export CUDA_HOME=/usr/local/cuda-11.7 \ No newline at end of file diff --git a/.github/workflows/cuda/cu117-Linux.sh b/.github/workflows/cuda/cu117-Linux.sh new file mode 100644 index 0000000..40e66f3 --- /dev/null +++ b/.github/workflows/cuda/cu117-Linux.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# Strip the periods from the version number +OS_VERSION=$(echo $(lsb_release -sr) | tr -d .) +OS=ubuntu${OS_VERSION} + +wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin +sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 +wget -nv https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda-repo-${OS}-11-7-local_11.7.0-515.43.04-1_amd64.deb + +sudo dpkg -i cuda-repo-${OS}-11-7-local_11.7.0-515.43.04-1_amd64.deb +sudo cp /var/cuda-repo-${OS}-11-7-local/cuda-*-keyring.gpg /usr/share/keyrings/ + +sudo apt-get -qq update +sudo apt install cuda cuda-nvcc-11-7 cuda-libraries-dev-11-7 +sudo apt clean + +rm -f https://developer.download.nvidia.com/compute/cuda/11.7.0/local_installers/cuda-repo-${OS}-11-7-local_11.7.0-515.43.04-1_amd64.deb diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 0fc0281..2475ba0 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -40,12 +40,15 @@ jobs: #python-version: ['3.7', '3.8', '3.9', '3.10'] python-version: ['3.10'] torch-version: [1.11.0] - cuda-version: ['113'] + cuda-version: ['113', '117'] #torch-version: [1.11.0, 1.12.0, 1.12.1] #cuda-version: ['113', '116'] - #exclude: - # - torch-version: 1.11.0 - # cuda-version: '116' + exclude: + # Nvidia only supports 11.7+ for ubuntu-22.04 + - os: ubuntu-22.04 + cuda-version: '116' + - os: ubuntu-22.04 + cuda-version: '113' steps: - name: Checkout From ac543b0e8d0d5f30e6ce02411f860995127ca013 Mon Sep 17 00:00:00 2001 From: Pierce Freeman Date: Fri, 2 Jun 2023 22:47:29 -0700 Subject: [PATCH 14/25] Full version matrix --- .github/workflows/publish.yml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 2475ba0..07c4ebc 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -37,18 +37,20 @@ jobs: fail-fast: false matrix: os: [ubuntu-20.04, ubuntu-22.04] - #python-version: ['3.7', '3.8', '3.9', '3.10'] - python-version: ['3.10'] - torch-version: [1.11.0] - cuda-version: ['113', '117'] - #torch-version: [1.11.0, 1.12.0, 1.12.1] - #cuda-version: ['113', '116'] + python-version: ['3.7', '3.8', '3.9', '3.10'] + torch-version: ['1.11.0', '1.12.0', '1.13.0'] + cuda-version: ['113', '116', '117'] exclude: # Nvidia only supports 11.7+ for ubuntu-22.04 - os: ubuntu-22.04 cuda-version: '116' - os: ubuntu-22.04 cuda-version: '113' + # Torch only builds cuda 117 for 1.13.0+ + - cuda-version: '117' + torch-version: '1.11.0' + - cuda-version: '117' + torch-version: '1.12.0' steps: - name: Checkout From 84009fcc66fe7a9d777f3b3ec49277ae704656b8 Mon Sep 17 00:00:00 2001 From: Pierce Freeman Date: Sat, 3 Jun 2023 09:51:13 -0700 Subject: [PATCH 15/25] Exclude additional disallowed matrix params --- .github/workflows/publish.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 07c4ebc..9091ede 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -51,6 +51,12 @@ jobs: torch-version: '1.11.0' - cuda-version: '117' torch-version: '1.12.0' + # Torch only builds cuda 116 for 1.12.0+ + - cuda-version: '116' + torch-version: '1.11.0' + # 1.13.0 drops support for cuda 11.3 + - cuda-version: '113' + torch-version: '1.13.0' steps: - name: Checkout @@ -123,6 +129,10 @@ jobs: ls dist/*whl |xargs -I {} mv {} dist/${wheel_name} echo "wheel_name=${wheel_name}" >> $GITHUB_ENV + - name: Log Built Wheels + run: | + ls dist + - name: Upload Release Asset id: upload_release_asset uses: actions/upload-release-asset@v1 From 1848d0004f4bf698b908db871db0a22666d2e311 Mon Sep 17 00:00:00 2001 From: Pierce Freeman Date: Sat, 3 Jun 2023 19:10:47 -0700 Subject: [PATCH 16/25] Exclude cuda erroring builds --- .github/workflows/publish.yml | 50 +++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 23 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 9091ede..44d894a 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -57,6 +57,10 @@ jobs: # 1.13.0 drops support for cuda 11.3 - cuda-version: '113' torch-version: '1.13.0' + # Fails with "Validation Error" on artifact upload + - cuda-version: '117' + torch-version: '1.13.0' + os: ubuntu-20.04 steps: - name: Checkout @@ -144,32 +148,32 @@ jobs: asset_name: ${{env.wheel_name}} asset_content_type: application/* - # publish_package: - # name: Publish package - # needs: [build_wheels] + publish_package: + name: Publish package + needs: [build_wheels] - # runs-on: ubuntu-latest + runs-on: ubuntu-latest - # steps: - # - uses: actions/checkout@v3 + steps: + - uses: actions/checkout@v3 - # - uses: actions/setup-python@v4 - # with: - # python-version: '3.10' + - uses: actions/setup-python@v4 + with: + python-version: '3.10' - # - name: Install dependencies - # run: | - # pip install ninja packaging setuptools wheel twine + - name: Install dependencies + run: | + pip install ninja packaging setuptools wheel twine - # - name: Build core package - # env: - # FLASH_ATTENTION_SKIP_CUDA_BUILD: "TRUE" - # run: | - # python setup.py sdist --dist-dir=dist + - name: Build core package + env: + FLASH_ATTENTION_SKIP_CUDA_BUILD: "TRUE" + run: | + python setup.py sdist --dist-dir=dist - # - name: Deploy - # env: - # TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - # TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} - # run: | - # python -m twine upload dist/* + - name: Deploy + env: + TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} + run: | + python -m twine upload dist/* From 8d60c373e4ed0075baa4c597891ffd9fb576752c Mon Sep 17 00:00:00 2001 From: Pierce Freeman Date: Sat, 3 Jun 2023 20:26:45 -0700 Subject: [PATCH 17/25] Add torch dependency to final build --- .github/workflows/publish.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 44d894a..1f959c4 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -164,6 +164,7 @@ jobs: - name: Install dependencies run: | pip install ninja packaging setuptools wheel twine + pip install torch - name: Build core package env: From 494b2aa48657edb55eb9f5907d5e980014d9dbdc Mon Sep 17 00:00:00 2001 From: Pierce Freeman Date: Sun, 4 Jun 2023 06:14:05 -0700 Subject: [PATCH 18/25] Add notes to github action workflow --- .github/workflows/publish.yml | 9 ++-- README.md | 8 +++ flash_attn/__init__.py | 2 +- flash_attn_builder/README.md | 3 ++ .../flash_attn_builder/__init__.py | 0 flash_attn_builder/flash_attn_builder/main.py | 54 +++++++++++++++++++ flash_attn_builder/pyproject.toml | 15 ++++++ pyproject.toml | 3 -- setup.py | 46 ++++++++++------ 9 files changed, 118 insertions(+), 22 deletions(-) create mode 100644 flash_attn_builder/README.md create mode 100644 flash_attn_builder/flash_attn_builder/__init__.py create mode 100644 flash_attn_builder/flash_attn_builder/main.py create mode 100644 flash_attn_builder/pyproject.toml delete mode 100644 pyproject.toml diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 1f959c4..83c4b48 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -1,8 +1,11 @@ -# This workflow will upload a Python Package to Release asset +# This workflow will: +# - Create a new Github release +# - Build wheels for supported architectures +# - Deploy the wheels to the Github release +# - Release the static code to PyPi # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries - -name: Python Package +name: Build wheels and deploy on: create: diff --git a/README.md b/README.md index 31fc62a..99f8829 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,14 @@ To install: pip install flash-attn ``` +If you see an error about `ModuleNotFoundError: No module named 'torch'`, it's likely because of pypi's installation isolation. + +To fix you can run: + +```sh +pip install flash-attn --no-build-isolation +``` + Alternatively you can compile from source: ``` python setup.py install diff --git a/flash_attn/__init__.py b/flash_attn/__init__.py index 9e604c0..e13bd59 100644 --- a/flash_attn/__init__.py +++ b/flash_attn/__init__.py @@ -1 +1 @@ -__version__ = "1.0.7" +__version__ = "1.0.8" diff --git a/flash_attn_builder/README.md b/flash_attn_builder/README.md new file mode 100644 index 0000000..3e42b3b --- /dev/null +++ b/flash_attn_builder/README.md @@ -0,0 +1,3 @@ +## flash-attn-builder + +Basic build utilities for flash-attn. diff --git a/flash_attn_builder/flash_attn_builder/__init__.py b/flash_attn_builder/flash_attn_builder/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/flash_attn_builder/flash_attn_builder/main.py b/flash_attn_builder/flash_attn_builder/main.py new file mode 100644 index 0000000..1e750e7 --- /dev/null +++ b/flash_attn_builder/flash_attn_builder/main.py @@ -0,0 +1,54 @@ +import os +import sys +import urllib +import setuptools.build_meta +from setuptools.command.install import install +from packaging.version import parse, Version + +# @pierce - TODO: Update for proper release +BASE_WHEEL_URL = "https://github.com/piercefreeman/flash-attention/releases/download/{tag_name}/{wheel_name}" + +# FORCE_BUILD: Force a fresh build locally, instead of attempting to find prebuilt wheels +# SKIP_CUDA_BUILD: Intended to allow CI to use a simple `python setup.py sdist` run to copy over raw files, without any cuda compilation +FORCE_BUILD = os.getenv("FLASH_ATTENTION_FORCE_BUILD", "FALSE") == "TRUE" + +class CustomBuildBackend(setuptools.build_meta._BuildMetaBackend): + + def build_wheel(self, wheel_directory, config_settings=None, metadata_directory=None): + this_file_directory = os.path.dirname(os.path.abspath(__file__)) + print(f'This file is located in: {this_file_directory}') + + sys.argv = [ + *sys.argv[:1], + *self._global_args(config_settings), + *self._arbitrary_args(config_settings), + ] + with setuptools.build_meta.no_install_setup_requires(): + self.run_setup() + + print("OS", os.environ["FLASH_ATTENTION_WHEEL_URL"]) + print("config_settings", config_settings) + print("metadata_directory", metadata_directory) + raise ValueError + + print("Guessing wheel URL: ", wheel_url) + + try: + urllib.request.urlretrieve(wheel_url, wheel_filename) + os.system(f'pip install {wheel_filename}') + os.remove(wheel_filename) + except urllib.error.HTTPError: + print("Precompiled wheel not found. Building from source...") + # If the wheel could not be downloaded, build from source + super().build_wheel(wheel_directory, config_settings, metadata_directory) + + +_BACKEND = CustomBuildBackend() # noqa + + +get_requires_for_build_wheel = _BACKEND.get_requires_for_build_wheel +get_requires_for_build_sdist = _BACKEND.get_requires_for_build_sdist +prepare_metadata_for_build_wheel = _BACKEND.prepare_metadata_for_build_wheel +build_wheel = _BACKEND.build_wheel +build_sdist = _BACKEND.build_sdist + diff --git a/flash_attn_builder/pyproject.toml b/flash_attn_builder/pyproject.toml new file mode 100644 index 0000000..7fa99d4 --- /dev/null +++ b/flash_attn_builder/pyproject.toml @@ -0,0 +1,15 @@ +[tool.poetry] +name = "flash-attn-builder" +version = "0.1.0" +description = "" +authors = ["Pierce Freeman "] +readme = "README.md" +packages = [{include = "flash_attn_builder"}] + +[tool.poetry.dependencies] +python = "^3.10" + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/pyproject.toml b/pyproject.toml deleted file mode 100644 index f67608a..0000000 --- a/pyproject.toml +++ /dev/null @@ -1,3 +0,0 @@ -[build-system] -requires = ["ninja", "packaging", "setuptools", "wheel"] -build-backend = "setuptools.build_meta" diff --git a/setup.py b/setup.py index cf8a7ef..89222f7 100644 --- a/setup.py +++ b/setup.py @@ -9,13 +9,15 @@ from packaging.version import parse, Version import platform from setuptools import setup, find_packages -from setuptools.command.install import install +from setuptools.command.build import build import subprocess +from setuptools.command.bdist_egg import bdist_egg import urllib.request import urllib.error import torch from torch.utils.cpp_extension import BuildExtension, CppExtension, CUDAExtension, CUDA_HOME +from wheel.bdist_wheel import bdist_wheel as _bdist_wheel with open("README.md", "r", encoding="utf-8") as fh: @@ -25,6 +27,7 @@ with open("README.md", "r", encoding="utf-8") as fh: # ninja build does not work unless include_dirs are abs path this_dir = os.path.dirname(os.path.abspath(__file__)) +PACKAGE_NAME = "flash_attn_wheels" # @pierce - TODO: Update for proper release BASE_WHEEL_URL = "https://github.com/piercefreeman/flash-attention/releases/download/{tag_name}/{wheel_name}" @@ -201,15 +204,17 @@ def get_package_version(): return str(public_version) -class CachedWheelsCommand(install): - """ - Installer hook to scan for existing wheels that match the current platform environment. - Falls back to building from source if no wheel is found. +class CachedWheelsCommand(_bdist_wheel): + """ + The CachedWheelsCommand plugs into the default bdist wheel, which is ran by pip when it cannot + find an existing wheel (which is currently the case for all flash attention installs). We use + the environment parameters to detect whether there is already a pre-built version of a compatible + wheel available and short-circuits the standard full build pipeline. - """ - def run(self): + """ + def run(self): if FORCE_BUILD: - return install.run(self) + return build.run(self) raise_if_cuda_home_none("flash_attn") @@ -223,7 +228,7 @@ class CachedWheelsCommand(install): torch_version = f"{torch_version_raw.major}.{torch_version_raw.minor}.{torch_version_raw.micro}" # Determine wheel URL based on CUDA version, torch version, python version and OS - wheel_filename = f'flash_attn-{flash_version}+cu{cuda_version}torch{torch_version}-{python_version}-{python_version}-{platform_name}.whl' + wheel_filename = f'{PACKAGE_NAME}-{flash_version}+cu{cuda_version}torch{torch_version}-{python_version}-{python_version}-{platform_name}.whl' wheel_url = BASE_WHEEL_URL.format( tag_name=f"v{flash_version}", wheel_name=wheel_filename @@ -232,17 +237,28 @@ class CachedWheelsCommand(install): try: urllib.request.urlretrieve(wheel_url, wheel_filename) - os.system(f'pip install {wheel_filename}') - os.remove(wheel_filename) + + # Make the archive + # Lifted from the root wheel processing command + # https://github.com/pypa/wheel/blob/cf71108ff9f6ffc36978069acb28824b44ae028e/src/wheel/bdist_wheel.py#LL381C9-L381C85 + if not os.path.exists(self.dist_dir): + os.makedirs(self.dist_dir) + + impl_tag, abi_tag, plat_tag = self.get_tag() + archive_basename = f"{self.wheel_dist_name}-{impl_tag}-{abi_tag}-{plat_tag}" + + wheel_path = os.path.join(self.dist_dir, archive_basename + ".whl") + print("Raw wheel path", wheel_path) + os.rename(wheel_filename, wheel_path) except urllib.error.HTTPError: print("Precompiled wheel not found. Building from source...") # If the wheel could not be downloaded, build from source - install.run(self) + super().run() setup( # @pierce - TODO: Revert for official release - name="flash_attn_wheels", + name=PACKAGE_NAME, version=get_package_version(), packages=find_packages( exclude=("build", "csrc", "include", "tests", "dist", "docs", "benchmarks", "flash_attn.egg-info",) @@ -264,10 +280,10 @@ setup( ], ext_modules=ext_modules, cmdclass={ - 'install': CachedWheelsCommand, + 'bdist_wheel': CachedWheelsCommand, "build_ext": BuildExtension } if ext_modules else { - 'install': CachedWheelsCommand, + 'bdist_wheel': CachedWheelsCommand, }, python_requires=">=3.7", install_requires=[ From 6c730dc8c669ffd140ed90366cd96aa031a08594 Mon Sep 17 00:00:00 2001 From: Pierce Freeman Date: Wed, 7 Jun 2023 17:07:14 -0700 Subject: [PATCH 19/25] Bump version --- flash_attn/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flash_attn/__init__.py b/flash_attn/__init__.py index e13bd59..39e0411 100644 --- a/flash_attn/__init__.py +++ b/flash_attn/__init__.py @@ -1 +1 @@ -__version__ = "1.0.8" +__version__ = "1.0.9" From eb812c205b4a4327230f5e75407d06e75917417b Mon Sep 17 00:00:00 2001 From: Pierce Freeman Date: Wed, 7 Jun 2023 17:20:13 -0700 Subject: [PATCH 20/25] Remove builder project --- flash_attn_builder/README.md | 3 -- .../flash_attn_builder/__init__.py | 0 flash_attn_builder/flash_attn_builder/main.py | 54 ------------------- flash_attn_builder/pyproject.toml | 15 ------ 4 files changed, 72 deletions(-) delete mode 100644 flash_attn_builder/README.md delete mode 100644 flash_attn_builder/flash_attn_builder/__init__.py delete mode 100644 flash_attn_builder/flash_attn_builder/main.py delete mode 100644 flash_attn_builder/pyproject.toml diff --git a/flash_attn_builder/README.md b/flash_attn_builder/README.md deleted file mode 100644 index 3e42b3b..0000000 --- a/flash_attn_builder/README.md +++ /dev/null @@ -1,3 +0,0 @@ -## flash-attn-builder - -Basic build utilities for flash-attn. diff --git a/flash_attn_builder/flash_attn_builder/__init__.py b/flash_attn_builder/flash_attn_builder/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/flash_attn_builder/flash_attn_builder/main.py b/flash_attn_builder/flash_attn_builder/main.py deleted file mode 100644 index 1e750e7..0000000 --- a/flash_attn_builder/flash_attn_builder/main.py +++ /dev/null @@ -1,54 +0,0 @@ -import os -import sys -import urllib -import setuptools.build_meta -from setuptools.command.install import install -from packaging.version import parse, Version - -# @pierce - TODO: Update for proper release -BASE_WHEEL_URL = "https://github.com/piercefreeman/flash-attention/releases/download/{tag_name}/{wheel_name}" - -# FORCE_BUILD: Force a fresh build locally, instead of attempting to find prebuilt wheels -# SKIP_CUDA_BUILD: Intended to allow CI to use a simple `python setup.py sdist` run to copy over raw files, without any cuda compilation -FORCE_BUILD = os.getenv("FLASH_ATTENTION_FORCE_BUILD", "FALSE") == "TRUE" - -class CustomBuildBackend(setuptools.build_meta._BuildMetaBackend): - - def build_wheel(self, wheel_directory, config_settings=None, metadata_directory=None): - this_file_directory = os.path.dirname(os.path.abspath(__file__)) - print(f'This file is located in: {this_file_directory}') - - sys.argv = [ - *sys.argv[:1], - *self._global_args(config_settings), - *self._arbitrary_args(config_settings), - ] - with setuptools.build_meta.no_install_setup_requires(): - self.run_setup() - - print("OS", os.environ["FLASH_ATTENTION_WHEEL_URL"]) - print("config_settings", config_settings) - print("metadata_directory", metadata_directory) - raise ValueError - - print("Guessing wheel URL: ", wheel_url) - - try: - urllib.request.urlretrieve(wheel_url, wheel_filename) - os.system(f'pip install {wheel_filename}') - os.remove(wheel_filename) - except urllib.error.HTTPError: - print("Precompiled wheel not found. Building from source...") - # If the wheel could not be downloaded, build from source - super().build_wheel(wheel_directory, config_settings, metadata_directory) - - -_BACKEND = CustomBuildBackend() # noqa - - -get_requires_for_build_wheel = _BACKEND.get_requires_for_build_wheel -get_requires_for_build_sdist = _BACKEND.get_requires_for_build_sdist -prepare_metadata_for_build_wheel = _BACKEND.prepare_metadata_for_build_wheel -build_wheel = _BACKEND.build_wheel -build_sdist = _BACKEND.build_sdist - diff --git a/flash_attn_builder/pyproject.toml b/flash_attn_builder/pyproject.toml deleted file mode 100644 index 7fa99d4..0000000 --- a/flash_attn_builder/pyproject.toml +++ /dev/null @@ -1,15 +0,0 @@ -[tool.poetry] -name = "flash-attn-builder" -version = "0.1.0" -description = "" -authors = ["Pierce Freeman "] -readme = "README.md" -packages = [{include = "flash_attn_builder"}] - -[tool.poetry.dependencies] -python = "^3.10" - - -[build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" From 9af165c38920bd18fc066e193383903e6ecff451 Mon Sep 17 00:00:00 2001 From: Pierce Freeman Date: Wed, 7 Jun 2023 17:26:13 -0700 Subject: [PATCH 21/25] Clean setup.py imports --- setup.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 89222f7..4737c5b 100644 --- a/setup.py +++ b/setup.py @@ -9,9 +9,7 @@ from packaging.version import parse, Version import platform from setuptools import setup, find_packages -from setuptools.command.build import build import subprocess -from setuptools.command.bdist_egg import bdist_egg import urllib.request import urllib.error @@ -214,7 +212,7 @@ class CachedWheelsCommand(_bdist_wheel): """ def run(self): if FORCE_BUILD: - return build.run(self) + return super().run() raise_if_cuda_home_none("flash_attn") From 565615c603bc83ff0215cf62bc4d907b27041215 Mon Sep 17 00:00:00 2001 From: Pierce Freeman Date: Thu, 10 Aug 2023 19:54:29 -0700 Subject: [PATCH 22/25] Isolate 2.0.0 & cuda12 --- .github/workflows/cuda/cu120-Linux-env.sh | 9 +++++++++ .github/workflows/cuda/cu120-Linux.sh | 18 ++++++++++++++++++ .github/workflows/publish.yml | 18 +++++++++++++++--- 3 files changed, 42 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/cuda/cu120-Linux-env.sh create mode 100644 .github/workflows/cuda/cu120-Linux.sh diff --git a/.github/workflows/cuda/cu120-Linux-env.sh b/.github/workflows/cuda/cu120-Linux-env.sh new file mode 100644 index 0000000..37917cc --- /dev/null +++ b/.github/workflows/cuda/cu120-Linux-env.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +CUDA_HOME=/usr/local/cuda-12.0 +LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} +PATH=${CUDA_HOME}/bin:${PATH} + +export FORCE_CUDA=1 +export TORCH_CUDA_ARCH_LIST="3.5;5.0+PTX;6.0;7.0;7.5;8.0;8.6" +export CUDA_HOME=/usr/local/cuda-12.0 \ No newline at end of file diff --git a/.github/workflows/cuda/cu120-Linux.sh b/.github/workflows/cuda/cu120-Linux.sh new file mode 100644 index 0000000..56996de --- /dev/null +++ b/.github/workflows/cuda/cu120-Linux.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# Strip the periods from the version number +OS_VERSION=$(echo $(lsb_release -sr) | tr -d .) +OS=ubuntu${OS_VERSION} + +wget -nv https://developer.download.nvidia.com/compute/cuda/repos/${OS}/x86_64/cuda-${OS}.pin +sudo mv cuda-${OS}.pin /etc/apt/preferences.d/cuda-repository-pin-600 +wget -nv https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda-repo-${OS}-12-0-local_12.0.0-525.60.13-1_amd64.deb + +sudo dpkg -i cuda-repo-${OS}-12-0-local_12.0.0-525.60.13-1_amd64.deb +sudo cp /var/cuda-repo-${OS}-12-0-local/cuda-*-keyring.gpg /usr/share/keyrings/ + +sudo apt-get -qq update +sudo apt install cuda cuda-nvcc-12-0 cuda-libraries-dev-12-0 +sudo apt clean + +rm -f https://developer.download.nvidia.com/compute/cuda/12.0.0/local_installers/cuda-repo-${OS}-12-0-local_12.0.0-525.60.13-1_amd64.deb diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 83c4b48..05eaaad 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -40,9 +40,12 @@ jobs: fail-fast: false matrix: os: [ubuntu-20.04, ubuntu-22.04] - python-version: ['3.7', '3.8', '3.9', '3.10'] - torch-version: ['1.11.0', '1.12.0', '1.13.0'] - cuda-version: ['113', '116', '117'] + #python-version: ['3.7', '3.8', '3.9', '3.10'] + #torch-version: ['1.11.0', '1.12.0', '1.13.0', '2.0.1'] + #cuda-version: ['113', '116', '117', '120'] + python-version: ['3.10'] + torch-version: ['2.0.1'] + cuda-version: ['120'] exclude: # Nvidia only supports 11.7+ for ubuntu-22.04 - os: ubuntu-22.04 @@ -57,9 +60,18 @@ jobs: # Torch only builds cuda 116 for 1.12.0+ - cuda-version: '116' torch-version: '1.11.0' + # Torch only builds cuda 120 for 2.0.1+ + - cuda-version: '120' + torch-version: '1.11.0' + - cuda-version: '120' + torch-version: '1.12.0' + - cuda-version: '120' + torch-version: '1.13.0' # 1.13.0 drops support for cuda 11.3 - cuda-version: '113' torch-version: '1.13.0' + - cuda-version: '113' + torch-version: '2.0.1' # Fails with "Validation Error" on artifact upload - cuda-version: '117' torch-version: '1.13.0' From bc6d4992f2de570969bfbc956799c67fd81c31d0 Mon Sep 17 00:00:00 2001 From: Pierce Freeman Date: Thu, 10 Aug 2023 19:55:52 -0700 Subject: [PATCH 23/25] Build wheel on each push --- .github/workflows/publish.yml | 76 ++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 37 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 05eaaad..08f40af 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -7,10 +7,12 @@ name: Build wheels and deploy +#on: +# create: +# tags: +# - '**' on: - create: - tags: - - '**' + push jobs: setup_release: @@ -152,44 +154,44 @@ jobs: run: | ls dist - - name: Upload Release Asset - id: upload_release_asset - uses: actions/upload-release-asset@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - upload_url: ${{ steps.get_current_release.outputs.upload_url }} - asset_path: ./dist/${{env.wheel_name}} - asset_name: ${{env.wheel_name}} - asset_content_type: application/* + # - name: Upload Release Asset + # id: upload_release_asset + # uses: actions/upload-release-asset@v1 + # env: + # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # with: + # upload_url: ${{ steps.get_current_release.outputs.upload_url }} + # asset_path: ./dist/${{env.wheel_name}} + # asset_name: ${{env.wheel_name}} + # asset_content_type: application/* - publish_package: - name: Publish package - needs: [build_wheels] + # publish_package: + # name: Publish package + # needs: [build_wheels] - runs-on: ubuntu-latest + # runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 + # steps: + # - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - with: - python-version: '3.10' + # - uses: actions/setup-python@v4 + # with: + # python-version: '3.10' - - name: Install dependencies - run: | - pip install ninja packaging setuptools wheel twine - pip install torch + # - name: Install dependencies + # run: | + # pip install ninja packaging setuptools wheel twine + # pip install torch - - name: Build core package - env: - FLASH_ATTENTION_SKIP_CUDA_BUILD: "TRUE" - run: | - python setup.py sdist --dist-dir=dist + # - name: Build core package + # env: + # FLASH_ATTENTION_SKIP_CUDA_BUILD: "TRUE" + # run: | + # python setup.py sdist --dist-dir=dist - - name: Deploy - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} - run: | - python -m twine upload dist/* + # - name: Deploy + # env: + # TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} + # TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }} + # run: | + # python -m twine upload dist/* From ecc6535443c73efca91007b1a300c4b049c6c0ff Mon Sep 17 00:00:00 2001 From: Pierce Freeman Date: Thu, 10 Aug 2023 19:56:24 -0700 Subject: [PATCH 24/25] Remove release creation --- .github/workflows/publish.yml | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 08f40af..f29539d 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -15,28 +15,28 @@ on: push jobs: - setup_release: - name: Create Release - runs-on: ubuntu-latest - steps: - - name: Get the tag version - id: extract_branch - run: echo ::set-output name=branch::${GITHUB_REF#refs/tags/} - shell: bash + # setup_release: + # name: Create Release + # runs-on: ubuntu-latest + # steps: + # - name: Get the tag version + # id: extract_branch + # run: echo ::set-output name=branch::${GITHUB_REF#refs/tags/} + # shell: bash - - name: Create Release - id: create_release - uses: actions/create-release@v1 - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - tag_name: ${{ steps.extract_branch.outputs.branch }} - release_name: ${{ steps.extract_branch.outputs.branch }} + # - name: Create Release + # id: create_release + # uses: actions/create-release@v1 + # env: + # GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # with: + # tag_name: ${{ steps.extract_branch.outputs.branch }} + # release_name: ${{ steps.extract_branch.outputs.branch }} build_wheels: name: Build Wheel runs-on: ${{ matrix.os }} - needs: setup_release + #needs: setup_release strategy: fail-fast: false From 6ef3bd800e8b8104537ffa0ba4ea10306da40f42 Mon Sep 17 00:00:00 2001 From: Pierce Freeman Date: Thu, 10 Aug 2023 20:12:20 -0700 Subject: [PATCH 25/25] Install standard non-wheel package --- .github/workflows/publish.yml | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index f29539d..bc01441 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -116,13 +116,24 @@ jobs: - name: Install PyTorch ${{ matrix.torch-version }}+cu${{ matrix.cuda-version }} run: | pip install numpy pyyaml scipy ipython mkl mkl-include ninja cython typing pandas typing-extensions dataclasses && conda clean -ya - pip install --no-index --no-cache-dir torch==${{ matrix.torch-version }} -f https://download.pytorch.org/whl/cu${{ matrix.cuda-version }}/torch_stable.html + pip install --no-cache-dir torch==${{ matrix.torch-version }} python --version python -c "import torch; print('PyTorch:', torch.__version__)" python -c "import torch; print('CUDA:', torch.version.cuda)" python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)" shell: bash + + # - name: Install PyTorch ${{ matrix.torch-version }}+cu${{ matrix.cuda-version }} + # run: | + # pip install numpy pyyaml scipy ipython mkl mkl-include ninja cython typing pandas typing-extensions dataclasses && conda clean -ya + # pip install --no-index --no-cache-dir torch==${{ matrix.torch-version }} -f https://download.pytorch.org/whl/cu${{ matrix.cuda-version }}/torch_stable.html + # python --version + # python -c "import torch; print('PyTorch:', torch.__version__)" + # python -c "import torch; print('CUDA:', torch.version.cuda)" + # python -c "from torch.utils import cpp_extension; print (cpp_extension.CUDA_HOME)" + # shell: + # bash - name: Get the tag version id: extract_branch