forked from PaddlePaddle/Paddle
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
support distributed with poprun (PaddlePaddle#669)
* support distributed with poprun * fix typo * fix mpi-global-args * add debug info * fix mpi-local-args * fix launch failed * update samples * perfect ut * perfect ut 2 * perfect ut 3 * rm useless code * fix ut * fix script
- Loading branch information
Showing
7 changed files
with
689 additions
and
103 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
80 changes: 80 additions & 0 deletions
80
python/paddle/fluid/tests/unittests/ipu/disabled/run_dist_ipu.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
#!/bin/bash | ||
|
||
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
set -e | ||
|
||
partition_name=pod64-lr17 | ||
vipu_server=lr17-1-ctrl | ||
allclose_script=" | ||
import sys | ||
import numpy as np | ||
data1 = np.loadtxt(\"ipu_res.txt\") | ||
data2 = np.loadtxt(\"cpu_res.txt\") | ||
if np.allclose(data1[::16], data2, atol=1e-6): | ||
sys.exit(0) | ||
else: | ||
sys.exit(1) | ||
" | ||
|
||
for opt in lamb sgd adam ; | ||
do | ||
for onchip in False True ; | ||
do | ||
for rts in False True ; | ||
do | ||
echo "Testcase: opt: ${opt}, onchip: ${onchip}, rts: ${rts}" | ||
echo "paddle.distributed.fleet.launch test with IPUs..." | ||
python3.7 -m paddle.distributed.fleet.launch \ | ||
--run_mode=collective \ | ||
--ips=localhost \ | ||
--nproc_per_node=2 \ | ||
--ipus_per_replica=2 \ | ||
--num_ipus=8 \ | ||
--partition_name=${partition_name} \ | ||
--vipu_server=${vipu_server} \ | ||
test_dist_data_parallel_ipu.py ${opt} ipu_res.txt ${onchip} ${rts} > ipu.log | ||
echo "paddle.distributed.fleet.launch test with IPUs...Done" | ||
|
||
echo "paddle normal test with CPU..." | ||
export POPLAR_IPUMODEL=1 | ||
python3.7 test_dist_data_parallel_ipu.py ${opt} cpu_res.txt > cpu.log | ||
unset POPLAR_IPUMODEL | ||
echo "paddle normal test with CPU...Done" | ||
|
||
echo "Compare results..." | ||
python3.7 -c """${allclose_script}""" | ||
if [ $? -eq 0 ];then | ||
echo "Compare results...Done" | ||
else | ||
echo "Error occurs. Please check ipu.log, cpu.log, ipu_res.txt and cpu_res.txt" | ||
exit 0 | ||
fi | ||
done | ||
done | ||
done | ||
|
||
if [ -f "ipu.log" ]; then | ||
rm "ipu.log" | ||
fi | ||
if [ -f "cpu.log" ]; then | ||
rm "cpu.log" | ||
fi | ||
if [ -f "ipu_res.txt" ]; then | ||
rm "ipu_res.txt" | ||
fi | ||
if [ -f "cpu_res.txt" ]; then | ||
rm "cpu_res.txt" | ||
fi |
Oops, something went wrong.