diff --git a/.buildkite/check-wheel-size.py b/.buildkite/check-wheel-size.py new file mode 100644 index 00000000..8178fba5 --- /dev/null +++ b/.buildkite/check-wheel-size.py @@ -0,0 +1,33 @@ +import os +import zipfile + +MAX_SIZE_MB = 100 + + +def print_top_10_largest_files(zip_file): + with zipfile.ZipFile(zip_file, 'r') as z: + file_sizes = [(f, z.getinfo(f).file_size) for f in z.namelist()] + file_sizes.sort(key=lambda x: x[1], reverse=True) + for f, size in file_sizes[:10]: + print(f"{f}: {size/(1024*1024)} MBs uncompressed.") + + +def check_wheel_size(directory): + for root, _, files in os.walk(directory): + for f in files: + if f.endswith(".whl"): + wheel_path = os.path.join(root, f) + wheel_size = os.path.getsize(wheel_path) + wheel_size_mb = wheel_size / (1024 * 1024) + if wheel_size_mb > MAX_SIZE_MB: + print( + f"Wheel {wheel_path} is too large ({wheel_size_mb} MB) " + f"compare to the allowed size ({MAX_SIZE_MB} MB).") + print_top_10_largest_files(wheel_path) + return 1 + return 0 + + +if __name__ == "__main__": + import sys + sys.exit(check_wheel_size(sys.argv[1])) diff --git a/Dockerfile b/Dockerfile index e8a9842c..90be3a30 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,7 +7,7 @@ #################### BASE BUILD IMAGE #################### # prepare basic build environment -FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 AS dev +FROM nvidia/cuda:12.4.1-devel-ubuntu22.04 AS dev RUN apt-get update -y \ && apt-get install -y python3-pip git @@ -16,7 +16,7 @@ RUN apt-get update -y \ # https://github.com/pytorch/pytorch/issues/107960 -- hopefully # this won't be needed for future versions of this docker image # or future versions of triton. -RUN ldconfig /usr/local/cuda-12.1/compat/ +RUN ldconfig /usr/local/cuda-12.4/compat/ WORKDIR /workspace @@ -75,6 +75,10 @@ RUN --mount=type=cache,target=/root/.cache/ccache \ --mount=type=cache,target=/root/.cache/pip \ python3 setup.py bdist_wheel --dist-dir=dist +# check the size of the wheel, we cannot upload wheels larger than 100MB +COPY .buildkite/check-wheel-size.py check-wheel-size.py +RUN python3 check-wheel-size.py dist + # the `vllm_nccl` package must be installed from source distribution # pip is too smart to store a wheel in the cache, and other CI jobs # will directly use the wheel from the cache, which is not what we want. @@ -102,7 +106,7 @@ RUN pip --verbose wheel flash-attn==${FLASH_ATTN_VERSION} \ #################### vLLM installation IMAGE #################### # image with vLLM installed -FROM nvidia/cuda:12.1.0-base-ubuntu22.04 AS vllm-base +FROM nvidia/cuda:12.4.1-base-ubuntu22.04 AS vllm-base WORKDIR /vllm-workspace RUN apt-get update -y \ @@ -112,7 +116,7 @@ RUN apt-get update -y \ # https://github.com/pytorch/pytorch/issues/107960 -- hopefully # this won't be needed for future versions of this docker image # or future versions of triton. -RUN ldconfig /usr/local/cuda-12.1/compat/ +RUN ldconfig /usr/local/cuda-12.4/compat/ # install vllm wheel first, so that torch etc will be installed RUN --mount=type=bind,from=build,src=/workspace/dist,target=/vllm-workspace/dist \