Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NPU] fix NPU ci scripts, test=develop #35095

Merged
merged 1 commit into from
Aug 24, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 42 additions & 17 deletions paddle/scripts/paddle_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1059,7 +1059,7 @@ function get_quickly_disable_ut() {

function card_test() {
set -m
CTEST_PARALLEL_LEVEL=2

case_count $1 $2
ut_startTime_s=`date +%s`

Expand Down Expand Up @@ -1725,16 +1725,22 @@ set +x
single_card_tests="$single_card_tests|^$testcase$"
fi
done <<< "$test_cases";
card_test "$single_card_tests" 1

ut_actual_total_startTime_s=`date +%s`

card_test "$single_card_tests" 1 # run cases 1 job each time with single GPU
collect_failed_tests

# add unit test retry for NPU
rm -f $tmp_dir/*
exec_times=0
retry_unittests_record=''
retry_time=3
exec_time_array=('first' 'second' 'third')
retry_time=4
exec_time_array=('first' 'second' 'third' 'fourth')
parallel_failed_tests_exec_retry_threshold=80
exec_retry_threshold=10
is_retry_execuate=0
rerun_ut_startTime_s=`date +%s`
if [ -n "$failed_test_lists" ];then
if [ ${TIMEOUT_DEBUG_HELP:-OFF} == "ON" ];then
bash $PADDLE_ROOT/tools/timeout_debug_help.sh "$failed_test_lists" # cat logs for tiemout uts which killed by ctest
Expand All @@ -1743,14 +1749,30 @@ set +x
need_retry_ut_arr=(${need_retry_ut_str})
need_retry_ut_count=${#need_retry_ut_arr[@]}
read retry_unittests <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' )
if [ $need_retry_ut_count -lt $exec_retry_threshold ];then
while ( [ $exec_times -lt $retry_time ] )
do
while ( [ $exec_times -lt $retry_time ] )
do
if [[ "${exec_times}" == "0" ]] ;then
if [ $need_retry_ut_count -lt $parallel_failed_tests_exec_retry_threshold ];then
is_retry_execuate=0
else
is_retry_execuate=1
fi
elif [[ "${exec_times}" == "1" ]] ;then
read need_retry_ut_str <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' )
need_retry_ut_arr=(${need_retry_ut_str})
need_retry_ut_count=${#need_retry_ut_arr[@]}
if [ $need_retry_ut_count -lt $exec_retry_threshold ];then
is_retry_execuate=0
else
is_retry_execuate=1
fi
fi
if [[ "$is_retry_execuate" == "0" ]];then
set +e
retry_unittests_record="$retry_unittests_record$failed_test_lists"
failed_test_lists_ult=`echo "${failed_test_lists}" |grep -Po '[^ ].*$'`
set -e
if [[ "${exec_times}" == "1" ]];then
if [[ "${exec_times}" == "1" ]] || [[ "${exec_times}" == "3" ]];then
if [[ "${failed_test_lists}" == "" ]];then
break
else
Expand All @@ -1761,35 +1783,38 @@ set +x
echo "This is the ${exec_time_array[$exec_times]} time to re-run"
echo "========================================="
echo "The following unittest will be re-run:"
echo "${retry_unittests}"

echo "${retry_unittests}"
for line in ${retry_unittests[@]} ;
do
read tmp_one_tmp <<< "$( echo $single_card_tests | grep -oEi $line )"

if [[ "$tmp_one_tmp" != "" ]]; then
if [[ "$one_card_retry" == "" ]]; then
one_card_retry="^$line$"
else
one_card_retry="$one_card_retry|^$line$"
fi
fi

done

if [[ "$one_card_retry" != "" ]]; then
card_test "$one_card_retry" 1
card_test "$one_card_retry" 1 # run cases 1 job each time with single GPU
fi

exec_times=$[$exec_times+1]
failed_test_lists=''
collect_failed_tests
rm -f $tmp_dir/*
one_card_retry=''
done
else
# There are more than 10 failed unit tests, so no unit test retry
is_retry_execuate=1
fi
fi
done
fi

rerun_ut_endTime_s=`date +%s`

echo "ipipe_log_param_Rerun_TestCases_Total_Time: $[ $rerun_ut_endTime_s - $rerun_ut_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt
ut_actual_total_endTime_s=`date +%s`
echo "ipipe_log_param_actual_TestCases_Total_Time: $[ $ut_actual_total_endTime_s - $ut_actual_total_startTime_s ]s" >> ${PADDLE_ROOT}/build/build_summary.txt
if [[ "$EXIT_CODE" != "0" ]]; then
show_ut_retry_result
fi
Expand Down
24 changes: 24 additions & 0 deletions tools/coverage/paddle_coverage.sh
Original file line number Diff line number Diff line change
Expand Up @@ -85,8 +85,30 @@ function gen_full_html_report_xpu() {
mv -f coverage-full.tmp coverage-full.info
}

function gen_full_html_report_npu() {
lcov --extract coverage.info \
'/paddle/paddle/fluid/operators/*npu*' \
-o coverage-full.tmp \
--rc lcov_branch_coverage=0

mv -f coverage-full.tmp coverage-full.info

lcov --remove coverage-full.info \
'/paddle/paddle/fluid/framework/*_test*' \
'/paddle/paddle/fluid/*/*test*' \
'/paddle/paddle/fluid/*/*/*test*' \
'/paddle/paddle/fluid/inference/tests/*' \
'/paddle/paddle/fluid/inference/api/demo_ci/*' \
-o coverage-full.tmp \
--rc lcov_branch_coverage=0

mv -f coverage-full.tmp coverage-full.info
}

if [ ${WITH_XPU:-OFF} == "ON" ]; then
gen_full_html_report_xpu || true
elif [ ${WITH_ASCEND_CL:-OFF} == "ON" ]; then
gen_full_html_report_npu || true
else
gen_full_html_report || true
fi
Expand Down Expand Up @@ -183,6 +205,8 @@ echo "Assert Python Diff Coverage"

if [ ${WITH_XPU:-OFF} == "ON" ]; then
echo "XPU has no python coverage!"
elif [ ${WITH_ASCEND_CL:-OFF} == "ON" ]; then
echo "NPU has no python coverage!"
else
if [[ "${NO_PYTHON_COVERAGE_DATA}" != "1" ]];then
python3.7 ${PADDLE_ROOT}/tools/coverage/coverage_lines.py python-coverage-diff.info 0.9 || PYTHON_COVERAGE_LINES_ASSERT=1
Expand Down