Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve CI unittest parallel execution strategy #44334

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 98 additions & 3 deletions paddle/scripts/paddle_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -1880,6 +1880,18 @@ function precise_card_test_single {
done
}

function parallel_card_test_single {
set +e
set +x
testcases=$1
num=$2
for case in $(echo $testcases | tr "$|^" "\n")
do
cd ${PADDLE_ROOT}/build
precise_card_test "^${case}$" $num
done
}

function precise_card_test() {
set -m
testcases=$1
Expand Down Expand Up @@ -1927,6 +1939,8 @@ function get_precise_tests_map_file {
multiple_card_tests='' # cases list which would take multiple GPUs, most cases would be two GPUs
is_exclusive='' # indicate whether the case is exclusive type
is_multicard='' # indicate whether the case is multiple GPUs type

single_card_test_num=0
set +x

while read -r line; do
Expand Down Expand Up @@ -1962,7 +1976,8 @@ set +x
multiple_card_tests="$multiple_card_tests|^$testcase$"
fi
else
if [[ "${single_card_tests}" -gt 3000 ]];then
single_card_test_num=$(($single_card_test_num+1))
if [[ $single_card_test_num -gt 3000 ]];then
if [[ "$single_card_tests_1" == "" ]]; then
single_card_tests_1="^$testcase$"
else
Expand Down Expand Up @@ -2006,7 +2021,86 @@ set -x

#generate ut file map
python ${PADDLE_ROOT}/tools/get_ut_file_map.py 'get_ut_map' ${PADDLE_ROOT}

}

function get_parallel_tests_map_file {
cd ${PADDLE_ROOT}/build
pip install ${PADDLE_ROOT}/build/python/dist/*whl
ut_total_startTime_s=`date +%s`
EXIT_CODE=0;
test_cases=$(ctest -N -V) # get all test cases
single_card_tests='' # all cases list which would take one graph card
exclusive_tests='' # cases list which would be run exclusively
multiple_card_tests='' # cases list which would take multiple GPUs, most cases would be two GPUs
is_exclusive='' # indicate whether the case is exclusive type
is_multicard='' # indicate whether the case is multiple GPUs type
single_card_test_num=0
set +x

while read -r line; do
if [[ "$line" == "" ]]; then
continue
fi
read matchstr <<< $(echo "$line"|grep -oEi 'Test[ \t]+#')
if [[ "$matchstr" == "" ]]; then
# Any test case with LABELS property would be parse here
# RUN_TYPE=EXCLUSIVE mean the case would run exclusively
# RUN_TYPE=DIST mean the case would take two graph GPUs during runtime
read is_exclusive <<< $(echo "$line"|grep -oEi "RUN_TYPE=EXCLUSIVE")
read is_multicard <<< $(echo "$line"|grep -oEi "RUN_TYPE=DIST")
continue
fi
read testcase <<< $(echo "$line"|grep -oEi "\w+$")

if [[ "$is_multicard" == "" ]]; then
# trick: treat all test case with prefix "test_dist" as dist case, and would run on 2 GPUs
read is_multicard <<< $(echo "$testcase"|grep -oEi "test_dist_")
fi

if [[ "$is_exclusive" != "" ]]; then
if [[ "$exclusive_tests" == "" ]]; then
exclusive_tests="^$testcase$"
else
exclusive_tests="$exclusive_tests|^$testcase$"
fi
elif [[ "$is_multicard" != "" ]]; then
if [[ "$multiple_card_tests" == "" ]]; then
multiple_card_tests="^$testcase$"
else
multiple_card_tests="$multiple_card_tests|^$testcase$"
fi
else
single_card_test_num=$(($single_card_test_num+1))
if [[ $single_card_test_num -gt 3000 ]];then
if [[ "$single_card_tests_1" == "" ]]; then
single_card_tests_1="^$testcase$"
else
single_card_tests_1="$single_card_tests_1|^$testcase$"
fi
continue
fi
if [[ "$single_card_tests" == "" ]]; then
single_card_tests="^$testcase$"
else
single_card_tests="$single_card_tests|^$testcase$"
fi
fi
is_exclusive=''
is_multicard=''
is_nightly=''
matchstr=''
testcase=''
done <<< "$test_cases";

set -x
mkdir -p ${PADDLE_ROOT}/build/ut_map
mkdir -p ${PADDLE_ROOT}/build/pytest

parallel_card_test_single "$single_card_tests" 1
parallel_card_test_single "$single_card_tests_1" 1
parallel_card_test_single "$multiple_card_tests" 2
parallel_card_test_single "$exclusive_tests"

wait;
#classify_case_by_cardNum
Expand Down Expand Up @@ -2385,7 +2479,6 @@ set +x
else
break
fi

done
fi

Expand Down Expand Up @@ -3468,9 +3561,11 @@ function main() {
ci_preciseTest)
insert_pile_to_h_cu_diff
cmake_gen_and_build ${PYTHON_ABI:-""} ${parallel_number}
enable_unused_var_check
get_precise_tests_map_file
;;
ci_parallelTest)
get_parallel_tests_map_file
;;
cicheck_brpc)
cmake_gen ${PYTHON_ABI:-""}
build ${parallel_number}
Expand Down
5 changes: 4 additions & 1 deletion tools/final_ut_parallel_rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ def classify_cases_by_mem(rootPath):
case = case.replace('^', '').replace('$', '').strip()
all_tests_by_card['exclusive_card_tests'].append(case)

if not os.path.exists("/pre_test"):
os.mkdir("/pre_test")

with open("/pre_test/classify_case_by_cardNum.json", "w") as f:
json.dump(all_tests_by_card, f)

Expand Down Expand Up @@ -120,7 +123,7 @@ def classify_cases_by_mem(rootPath):
mem_1_sum = 0
with open('/pre_test/%s' % cardType, 'w') as f_not_0:
for index in case_mem_1_sort:
if mem_1_sum < 16 * 1024 * 2:
if mem_1_sum < 14 * 1024 * 2:
mem_1_sum += index[1]
case_mem_1_line = case_mem_1_line + '|^' + index[0] + '$'
else:
Expand Down
8 changes: 7 additions & 1 deletion tools/get_ut_mem_map.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# -*- coding: utf-8 -*-

# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
Expand Down Expand Up @@ -26,12 +28,13 @@ def get_ut_mem(rootPath):
ut = f.replace('^', '').replace('$.log', '')
case_dic[ut] = {}
filename = '%s/%s' % (parent, f)
fi = open(filename)
fi = open(filename, mode='rb')
lines = fi.readlines()
mem_reserved1 = -1
mem_nvidia1 = -1
caseTime = -1
for line in lines:
line = line.decode('utf-8', errors='ignore')
if '[Memory Usage (Byte)] gpu' in line:
mem_reserved = round(
float(
Expand All @@ -56,7 +59,10 @@ def get_ut_mem(rootPath):
case_dic[ut]['mem_nvidia'] = mem_nvidia1
if caseTime != -1:
case_dic[ut]['time'] = caseTime
fi.close()

if not os.path.exists("/pre_test"):
os.mkdir("/pre_test")
ut_mem_map_file = "/pre_test/ut_mem_map.json"
with open(ut_mem_map_file, "w") as f:
json.dump(case_dic, f)
Expand Down
6 changes: 3 additions & 3 deletions tools/group_case_for_parallel.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,16 @@ def group_case_for_parallel(rootPath):

#wget file
for filename in [
'nightly_tests', 'single_card_tests', 'single_card_tests_mem0',
'nightly_case', 'single_card_tests', 'single_card_tests_mem0',
'multiple_card_tests', 'multiple_card_tests_mem0',
'exclusive_card_tests', 'exclusive_card_tests_mem0'
]:
os.system(
'cd %s/tools && wget --no-proxy https://paddle-docker-tar.bj.bcebos.com/pre_test/%s --no-check-certificate'
'cd %s/tools && wget --no-proxy https://paddle-docker-tar.bj.bcebos.com/pre_test_bak/%s --no-check-certificate'
% (rootPath, filename))

#get nightly tests
nightly_tests_file = open('%s/tools/nightly_tests' % rootPath, 'r')
nightly_tests_file = open('%s/tools/nightly_case' % rootPath, 'r')
nightly_tests = nightly_tests_file.read().strip().split('\n')
nightly_tests_file.close()

Expand Down