-
Notifications
You must be signed in to change notification settings - Fork 22
/
build.sh
357 lines (333 loc) · 10.3 KB
/
build.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
#!/bin/bash
set -e
CUR_DIR=$(
cd $(dirname $0)
pwd
)
TORCH_MUSA_HOME=$CUR_DIR
PYTORCH_PATH=${PYTORCH_REPO_PATH:-$(realpath ${TORCH_MUSA_HOME}/../pytorch)}
KINETO_PATH=${PYTORCH_PATH}/third_party/kineto
TORCH_PATCHES_DIR=${TORCH_MUSA_HOME}/torch_patches/
KINETO_PATCHES_DIR=${TORCH_MUSA_HOME}/kineto_patches/
KINETO_URL=${KINETO_URL:-https://github.com/MooreThreads/kineto.git}
KINETO_TAG=v1.2.2
BUILD_WHEEL=0
DEBUG_MODE=0
ASAN_MODE=0
BUILD_TORCH=1
BUILD_TORCH_MUSA=1
USE_KINETO=1
ONLY_PATCH=0
CLEAN=0
COMPILE_FP64=1
PYTORCH_TAG=v2.2.0
PYTORCH_BUILD_VERSION="${PYTORCH_TAG:1}"
PYTORCH_BUILD_NUMBER=0 # This is used for official torch distribution.
USE_STATIC_MKL=${USE_STATIC_MKL:-1}
USE_MCCL=${USE_MCCL:-1}
MUSA_DIR="/usr/local/musa"
UPDATE_MUSA=0
UPDATE_DAILY_MUSA=0
usage() {
echo -e "\033[1;32mThis script is used to build PyTorch and Torch_MUSA. \033[0m"
echo -e "\033[1;32mParameters usage: \033[0m"
echo -e "\033[32m --all : Means building both PyTorch and Torch_MUSA. \033[0m"
echo -e "\033[32m --fp64 : Means compiling fp64 data type in kernels using mcc in Torch_MUSA. \033[0m"
echo -e "\033[32m --update_musa : Update latest RELEASED MUSA software stack. \033[0m"
echo -e "\033[32m --update_daily_musa : Update latest DAILY MUSA software stack. \033[0m"
echo -e "\033[32m -m/--musa : Means building Torch_MUSA only. \033[0m"
echo -e "\033[32m -t/--torch : Means building original PyTorch only. \033[0m"
echo -e "\033[32m -d/--debug : Means building in debug mode. \033[0m"
echo -e "\033[32m -a/--asan : Means building in asan mode. \033[0m"
echo -e "\033[32m -c/--clean : Means cleaning everything that has been built. \033[0m"
echo -e "\033[32m -p/--patch : Means applying patches only. \033[0m"
echo -e "\033[32m -w/--wheel : Means generating wheel after building. \033[0m"
echo -e "\033[32m -n/--no_kineto : Disable kineto. \033[0m"
echo -e "\033[32m -h/--help : Help information. \033[0m"
}
# parse paremters
parameters=$(getopt -o +mtdacpwnh --long all,fp64,update_musa,update_daily_musa,musa,torch,debug,asan,clean,patch,wheel,no_kineto,help, -n "$0" -- "$@")
[ $? -ne 0 ] && {
echo -e "\033[34mTry '$0 --help' for more information. \033[0m"
exit 1
}
eval set -- "$parameters"
while true; do
case "$1" in
--all)
BUILD_TORCH=1
BUILD_TORCH_MUSA=1
shift
;;
--fp64)
COMPILE_FP64=1
shift
;;
--update_musa)
UPDATE_MUSA=1
shift
;;
--update_daily_musa)
UPDATE_DAILY_MUSA=1
shift
;;
-m | --musa)
BUILD_TORCH_MUSA=1
BUILD_TORCH=0
shift
;;
-t | --torch)
BUILD_TORCH_MUSA=0
BUILD_TORCH=1
shift
;;
-d | --debug)
DEBUG_MODE=1
shift
;;
-a | --asan)
ASAN_MODE=1
shift
;;
-c | --clean)
CLEAN=1
shift
;;
-w | --wheel)
BUILD_WHEEL=1
shift
;;
-n | --no_kineto)
USE_KINETO=0
shift
;;
-p | --patch)
ONLY_PATCH=1
shift
;;
-h | --help)
usage
exit
;;
--)
shift
break
;;
*)
usage
exit 1
;;
esac
done
cmd_check(){
cmd="$1"
if command -v ${cmd} >/dev/null 2>&1; then
echo "- cmd exist : ${cmd}"
else
echo -e "\033[34m- cmd does not exist, automatically install \"${cmd}\"\033[0m"
pip install -r ${TORCH_MUSA_HOME}/requirements.txt # extra requirements
fi
}
precommit_install(){
cmd_check "pre-commit"
root_dir="$(dirname "$(realpath "${BASH_SOURCE:-$0}" )")"
if [ ! -f ${root_dir}/.git/hooks/pre-commit ]; then
pushd $root_dir
pre-commit install
popd
fi
}
precommit_install
clone_pytorch() {
# if PyTorch repo exists already, we skip gitting clone PyTorch
if [ -d ${PYTORCH_PATH} ]; then
echo -e "\033[34mPyTorch repo path is ${PYTORCH_PATH} ...\033[0m"
pushd ${PYTORCH_PATH}
git checkout ${PYTORCH_TAG}
echo -e "\033[34m Switch the Pytorch repo to tag ${PYTORCH_TAG} \033[0m"
popd
else
ABSOLUTE_PATH=$(cd $(dirname ${PYTORCH_PATH}) && pwd)"/pytorch"
echo -e "\033[34mUsing default pytorch repo path: ${ABSOLUTE_PATH}\033[0m"
if [ ! -d "${PYTORCH_PATH}" ]; then
pushd ${TORCH_MUSA_HOME}/..
echo -e "\033[34mPyTorch repo does not exist, now git clone PyTorch to ${ABSOLUTE_PATH} ...\033[0m"
git clone -b ${PYTORCH_TAG} https://github.com/pytorch/pytorch.git --depth=1
popd
fi
fi
# to make sure submodules are fetched
pushd ${PYTORCH_PATH}
git submodule update --init --recursive
}
apply_torch_patches() {
# apply patches into PyTorch
echo -e "\033[34mApplying patches to ${PYTORCH_PATH} ...\033[0m"
# clean PyTorch before patching
if [ -d "$PYTORCH_PATH/.git" ]; then
echo -e "\033[34mStash and checkout the PyTorch environment before patching. \033[0m"
pushd $PYTORCH_PATH
git stash -u
git checkout ${PYTORCH_TAG}
popd
fi
for file in $(find ${TORCH_PATCHES_DIR} -type f -print); do
if [ "${file##*.}"x = "patch"x ]; then
echo -e "\033[34mapplying patch: $file \033[0m"
pushd $PYTORCH_PATH
git apply --check $file
git apply $file
popd
fi
done
# apply kineto-related patches into PyTorch
if [ ${USE_KINETO} -eq 1 ]; then
for file in $(find ${KINETO_PATCHES_DIR} -type f -print); do
if [ "${file##*.}"x = "patch"x ]; then
echo -e "\033[34mapplying kineto-related patch: $file \033[0m"
pushd $PYTORCH_PATH
git apply --check $file
git apply $file
popd
fi
done
fi
}
update_kineto_source() {
echo -e "\033[34mUpdating kineto...\033[0m"
pushd ${PYTORCH_PATH}/third_party
rm -rf ./kineto
if [ -d /home/kineto ]; then
pushd /home/kineto
git checkout ${KINETO_TAG}
git submodule update --init --recursive
popd
cp -r /home/kineto .
else
git clone ${KINETO_URL} -b ${KINETO_TAG} --depth 1 --recursive
fi
popd
}
build_pytorch() {
echo -e "\033[34mBuilding PyTorch...\033[0m"
status=0
if [ ! -d ${PYTORCH_PATH} ]; then
echo -e "\033[34mAn error occurred while building PyTorch, the specified PyTorch repo [${PYTORCH_PATH}] does not exist \033[0m"
exit 1
fi
pushd ${PYTORCH_PATH}
pip install -r requirements.txt
pip install -r ${TORCH_MUSA_HOME}/requirements.txt # extra requirements
if [ $BUILD_WHEEL -eq 1 ]; then
rm -rf dist
pip uninstall torch -y
PYTORCH_BUILD_NUMBER=${PYTORCH_BUILD_NUMBER} PYTORCH_BUILD_VERSION=${PYTORCH_BUILD_VERSION} DEBUG=${DEBUG_MODE} USE_ASAN=${ASAN_MODE} USE_STATIC_MKL=${USE_STATIC_MKL} USE_MKL=1 USE_MKLDNN=1 USE_MKLDNN_CBLAS=1 python setup.py bdist_wheel
status=$?
rm -rf torch.egg-info
pip install dist/*.whl
else
PYTORCH_BUILD_NUMBER=${PYTORCH_BUILD_NUMBER} PYTORCH_BUILD_VERSION=${PYTORCH_BUILD_VERSION} DEBUG=${DEBUG_MODE} USE_ASAN=${ASAN_MODE} USE_STATIC_MKL=${USE_STATIC_MKL} USE_MKL=1 USE_MKLDNN=1 USE_MKLDNN_CBLAS=1 python setup.py install
status=$?
fi
popd
return $status
}
clean_pytorch() {
echo -e "\033[34mCleaning PyTorch...\033[0m"
pushd ${PYTORCH_PATH}
python setup.py clean
popd
}
clean_torch_musa() {
echo -e "\033[34mCleaning torch_musa...\033[0m"
pushd ${TORCH_MUSA_HOME}
python setup.py clean
rm -rf $CUR_DIR/build
popd
}
build_torch_musa() {
echo -e "\033[34mBuilding torch_musa...\033[0m"
status=0
pushd ${TORCH_MUSA_HOME}
if [ $BUILD_WHEEL -eq 1 ]; then
rm -rf dist build
PYTORCH_REPO_PATH=${PYTORCH_PATH} DEBUG=${DEBUG_MODE} USE_ASAN=${ASAN_MODE} ENABLE_COMPILE_FP64=${COMPILE_FP64} USE_MCCL=${USE_MCCL} python setup.py bdist_wheel
status=$?
rm -rf torch_musa.egg-info
pip install dist/*.whl
else
PYTORCH_REPO_PATH=${PYTORCH_PATH} DEBUG=${DEBUG_MODE} USE_ASAN=${ASAN_MODE} ENABLE_COMPILE_FP64=${COMPILE_FP64} USE_MCCL=${USE_MCCL} python setup.py install
status=$?
fi
if [ $status -ne 0 ]; then
exit $status
fi
# scan and output ops list for each building
bash ${CUR_DIR}/scripts/scan_ops.sh
popd
return $status
}
main() {
# ======== install MUSA ========
if [ ! -d ${MUSA_DIR} ] || [ -z "$(ls -A ${MUSA_DIR})" ]; then
echo -e "\033[34mStart installing MUSA software stack, including musatoolkits/mudnn/mccl/muThrust/muSparse/muAlg ... \033[0m"
. ${CUR_DIR}/docker/common/release/update_release_all.sh
fi
if [ ${UPDATE_MUSA} -eq 1 ]; then
echo -e "\033[34mStart updating MUSA software stack to latest released version ... \033[0m"
. ${CUR_DIR}/docker/common/release/update_release_all.sh
exit 0
fi
if [ ${UPDATE_DAILY_MUSA} -eq 1 ]; then
echo -e "\033[34mStart updating MUSA software stack to latest daily version ... \033[0m"
. ${CUR_DIR}/docker/common/daily/update_daily_all.sh
exit 0
fi
# ==============================
if [[ ${CLEAN} -eq 1 ]] && [[ ${BUILD_TORCH} -ne 1 ]] && [[ ${BUILD_TORCH_MUSA} -ne 1 ]]; then
clean_pytorch
clean_torch_musa
exit 0
fi
if [ ${ONLY_PATCH} -eq 1 ]; then
apply_torch_patches
exit 0
fi
if [ ${BUILD_TORCH} -eq 1 ]; then
clone_pytorch
if [ ${CLEAN} -eq 1 ]; then
clean_pytorch
fi
apply_torch_patches
if [ ${USE_KINETO} -eq 1 ]; then
update_kineto_source
fi
build_pytorch
build_pytorch_status=$?
if [ $build_pytorch_status -ne 0 ]; then
clean_and_build="bash build.sh -c # Clean PyTorch/torch_musa and build"
echo -e "\033[31mBuilding PyTorch failed, please try cleaning first before building: \033[0m"
echo -e "\033[32m$clean_and_build \033[0m"
exit 1
fi
fi
if [ ${BUILD_TORCH_MUSA} -eq 1 ]; then
if [ ${CLEAN} -eq 1 ]; then
clean_torch_musa
fi
build_torch_musa
build_torch_musa_status=$?
if [ $build_torch_musa_status -ne 0 ]; then
echo -e "\033[31mPlease try the following commands once building torch_musa is failed: \033[0m"
echo -e "\033[32mClean PyTorch/torch_musa and build: \033[0m"
echo "cmd1: bash build.sh -c"
echo -e "\033[32mIf cmd1 still failed, update torch_musa to newest and build: \033[0m"
echo "cmd2: git fetch && git rebase origin/main && bash build.sh -c"
echo -e "\033[32mIf cmd2 still failed, update libraries and build: \033[0m"
echo "cmd3: bash docker/common/daily/update_daily_musart.sh && bash docker/common/daily/update_daily_mudnn.sh && bash build.sh -c"
echo -e "\033[32mIf cmd3 still failed, please check driver version on your host machine. \033[0m"
exit 1
fi
fi
}
main