Skip to content

Commit

Permalink
Hotfix(MInference): fix the pip setup issue (#6)
Browse files Browse the repository at this point in the history
* Hotfix(MInference): fix the pip setup issue
* Hotfix(MInference): fix the torch version

Co-authored-by: Yucheng Li <liyucheng09@gmail.com>
Co-authored-by: Chengruidong Zhang <chengzhang@microsoft.com>
  • Loading branch information
3 people authored Jul 3, 2024
1 parent 038e005 commit cfda78c
Show file tree
Hide file tree
Showing 10 changed files with 122 additions and 38 deletions.
6 changes: 6 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,12 @@ jobs:
with:
python-version: ${{ matrix.python-version }}

- name: Set CUDA and PyTorch versions
run: |
echo "MATRIX_CUDA_VERSION=$(echo ${{ matrix.cuda-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV
echo "MATRIX_TORCH_VERSION=$(echo ${{ matrix.pytorch-version }} | awk -F \. {'print $1 "." $2'})" >> $GITHUB_ENV
echo "MATRIX_PYTHON_VERSION=$(echo ${{ matrix.python-version }} | awk -F \. {'print $1 $2'})" >> $GITHUB_ENV
- name: Install CUDA ${{ matrix.cuda-version }}
run: |
bash -x .github/workflows/scripts/cuda-install.sh ${{ matrix.cuda-version }} ${{ matrix.os }}
Expand Down
7 changes: 6 additions & 1 deletion .github/workflows/scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,9 @@ export MAX_JOBS=1
# Make sure release wheels are built for the following architectures
export TORCH_CUDA_ARCH_LIST="7.0 7.5 8.0 8.6 8.9 9.0+PTX"
# Build
$python_executable setup.py $3 --dist-dir=dist
if [ "$3" = sdist ];
then
MINFERENCE_SKIP_CUDA_BUILD="TRUE" $python_executable setup.py $3 --dist-dir=dist
else
MINFERENCE_LOCAL_VERSION=cu${MATRIX_CUDA_VERSION}torch${MATRIX_TORCH_VERSION} MINFERENCE_FORCE_BUILD="TRUE" $python_executable setup.py $3 --dist-dir=dist
fi
5 changes: 4 additions & 1 deletion .github/workflows/unittest.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
name: Unit Test

# see: https://help.github.com/en/actions/reference/events-that-trigger-workflows
on: [] # Trigger the workflow on pull request or merge
on:
push:
branches:
- 'test/**'
# pull_request:
# merge_group:
# types: [checks_requested]
Expand Down
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -414,3 +414,4 @@ __pycache__
build/
*.egg-info/
*.so
dist
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@

<p align="center">
| <a href="https://aka.ms/MInference"><b>Project Page</b></a> |
<a href="https://arxiv.org/abs/2407.02490"><b>Paper</b></a> |
<a href="https://export.arxiv.org/pdf/2407.02490"><b>Paper</b></a> |
<a href="https://huggingface.co/spaces/microsoft/MInference"><b>HF Demo</b></a> |
</p>

https://github.com/microsoft/MInference/assets/30883354/52613efc-738f-4081-8367-7123c81d6b19

## News
- 📃 [24/07/03] Due to an issue with arXiv, the PDF is currently unavailable there. You can find the paper at this [link](https://github.com/microsoft/MInference/blob/main/papers/MInference1_Arxiv.pdf)..
- 📃 [24/07/03] Due to an issue with arXiv, the PDF is currently unavailable there. You can find the paper at this [link](https://export.arxiv.org/pdf/2407.02490).
- 🧩 [24/07/03] We will present **MInference 1.0** at the _**Microsoft Booth**_ and _**ES-FoMo**_ at ICML'24. See you in Vienna!

## TL;DR
Expand Down
Empty file added minference/configs/__init__.py
Empty file.
Empty file added minference/modules/__init__.py
Empty file.
Empty file added minference/ops/__init__.py
Empty file.
2 changes: 1 addition & 1 deletion minference/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
_MINOR = "1"
# On master and in a nightly release the patch should be one ahead of the last
# released build.
_PATCH = "0"
_PATCH = "1"
# This is mainly for nightly builds which have the suffix ".dev$DATE". See
# https://semver.org/#is-v123-a-semantic-version for the semantics.
_SUFFIX = ""
Expand Down
135 changes: 102 additions & 33 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from packaging.version import Version, parse
from setuptools import find_packages, setup
from torch.utils.cpp_extension import CUDA_HOME, BuildExtension, CUDAExtension
from wheel.bdist_wheel import bdist_wheel as _bdist_wheel

# PEP0440 compatible formatted version, see:
# https://www.python.org/dev/peps/pep-0440/
Expand Down Expand Up @@ -46,52 +47,117 @@
]
DEV_REQUIRES = INSTALL_REQUIRES + QUANLITY_REQUIRES

MAIN_CUDA_VERSION = "12.1"

# ninja build does not work unless include_dirs are abs path
this_dir = os.path.dirname(os.path.abspath(__file__))

def _is_cuda() -> bool:
return torch.version.cuda is not None
PACKAGE_NAME = "minference"

BASE_WHEEL_URL = (
"https://github.com/microsoft/MInference/releases/download/{tag_name}/{wheel_name}"
)

def get_nvcc_cuda_version() -> Version:
"""Get the CUDA version from nvcc.
# FORCE_BUILD: Force a fresh build locally, instead of attempting to find prebuilt wheels
# SKIP_CUDA_BUILD: Intended to allow CI to use a simple `python setup.py sdist` run to copy over raw files, without any cuda compilation
FORCE_BUILD = os.getenv("MINFERENCE_FORCE_BUILD", "FALSE") == "TRUE"
SKIP_CUDA_BUILD = os.getenv("MINFERENCE_SKIP_CUDA_BUILD", "FALSE") == "TRUE"
# For CI, we want the option to build with C++11 ABI since the nvcr images use C++11 ABI
FORCE_CXX11_ABI = os.getenv("MINFERENCE_FORCE_CXX11_ABI", "FALSE") == "TRUE"


def check_if_cuda_home_none(global_option: str) -> None:
if CUDA_HOME is not None:
return
# warn instead of error because user could be downloading prebuilt wheels, so nvcc won't be necessary
# in that case.
warnings.warn(
f"{global_option} was requested, but nvcc was not found. Are you sure your environment has nvcc available? "
"If you're installing within a container from https://hub.docker.com/r/pytorch/pytorch, "
"only images whose names contain 'devel' will provide nvcc."
)

Adapted from https://github.com/NVIDIA/apex/blob/8b7a1ff183741dd8f9b87e7bafd04cfde99cea28/setup.py
"""
assert CUDA_HOME is not None, "CUDA_HOME is not set"
nvcc_output = subprocess.check_output(
[CUDA_HOME + "/bin/nvcc", "-V"], universal_newlines=True

cmdclass = {}
ext_modules = []

if not SKIP_CUDA_BUILD:
print("\n\ntorch.__version__ = {}\n\n".format(torch.__version__))
TORCH_MAJOR = int(torch.__version__.split(".")[0])
TORCH_MINOR = int(torch.__version__.split(".")[1])

# Check, if ATen/CUDAGeneratorImpl.h is found, otherwise use ATen/cuda/CUDAGeneratorImpl.h
# See https://github.com/pytorch/pytorch/pull/70650
generator_flag = []
torch_dir = torch.__path__[0]
if os.path.exists(
os.path.join(torch_dir, "include", "ATen", "CUDAGeneratorImpl.h")
):
generator_flag = ["-DOLD_GENERATOR_PATH"]

check_if_cuda_home_none("minference")

# HACK: The compiler flag -D_GLIBCXX_USE_CXX11_ABI is set to be the same as
# torch._C._GLIBCXX_USE_CXX11_ABI
# https://github.com/pytorch/pytorch/blob/8472c24e3b5b60150096486616d98b7bea01500b/torch/utils/cpp_extension.py#L920
if FORCE_CXX11_ABI:
torch._C._GLIBCXX_USE_CXX11_ABI = True
ext_modules.append(
CUDAExtension(
name="minference.cuda",
sources=[
os.path.join("csrc", "kernels.cpp"),
os.path.join("csrc", "vertical_slash_index.cu"),
],
extra_compile_args=["-std=c++17", "-O3"],
)
)
output = nvcc_output.split()
release_idx = output.index("release") + 1
nvcc_cuda_version = parse(output[release_idx].split(",")[0])
return nvcc_cuda_version


def get_minference_version() -> str:
version = VERSION["VERSION"]

if _is_cuda():
cuda_version = str(get_nvcc_cuda_version())
if cuda_version != MAIN_CUDA_VERSION:
cuda_version_str = cuda_version.replace(".", "")[:3]
version += f"+cu{cuda_version_str}"
local_version = os.environ.get("MINFERENCE_LOCAL_VERSION")
if local_version:
return f"{version}+{local_version}"
else:
raise RuntimeError("Unknown runtime environment")
return str(version)

return version

class CachedWheelsCommand(_bdist_wheel):
"""
The CachedWheelsCommand plugs into the default bdist wheel, which is ran by pip when it cannot
find an existing wheel (which is currently the case for all flash attention installs). We use
the environment parameters to detect whether there is already a pre-built version of a compatible
wheel available and short-circuits the standard full build pipeline.
"""

def run(self):
return super().run()


class NinjaBuildExtension(BuildExtension):
def __init__(self, *args, **kwargs) -> None:
# do not override env MAX_JOBS if already exists
if not os.environ.get("MAX_JOBS"):
import psutil

# calculate the maximum allowed NUM_JOBS based on cores
max_num_jobs_cores = max(1, os.cpu_count() // 2)

# calculate the maximum allowed NUM_JOBS based on free memory
free_memory_gb = psutil.virtual_memory().available / (
1024**3
) # free memory in GB
max_num_jobs_memory = int(
free_memory_gb / 9
) # each JOB peak memory cost is ~8-9GB when threads = 4

# pick lower value of jobs based on cores vs memory metric to minimize oom and swap usage during compilation
max_jobs = max(1, min(max_num_jobs_cores, max_num_jobs_memory))
os.environ["MAX_JOBS"] = str(max_jobs)

super().__init__(*args, **kwargs)

ext_modules = [
CUDAExtension(
name="minference.cuda",
sources=[
os.path.join("csrc", "kernels.cpp"),
os.path.join("csrc", "vertical_slash_index.cu"),
],
extra_compile_args=["-std=c++17", "-O3"],
)
]

setup(
name="minference",
Expand All @@ -110,7 +176,6 @@ def get_minference_version() -> str:
"Programming Language :: Python :: 3",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
],
package_dir={"": "."},
packages=find_packages(
exclude=(
"csrc",
Expand All @@ -136,5 +201,9 @@ def get_minference_version() -> str:
python_requires=">=3.8.0",
zip_safe=False,
ext_modules=ext_modules,
cmdclass={"build_ext": BuildExtension},
cmdclass={"bdist_wheel": CachedWheelsCommand, "build_ext": NinjaBuildExtension}
if ext_modules
else {
"bdist_wheel": CachedWheelsCommand,
},
)

0 comments on commit cfda78c

Please sign in to comment.