Skip to content

Commit

Permalink
Kokoro automation - Log rotation using logrotate (#1008)
Browse files Browse the repository at this point in the history
* Removing the dependency of log_rotation branch

* Minor correction

* Removing experimental flag from setup_container

* Fixing permission issue

* Fixing permission related issue

* Half change log-rotation

* Half change log-rotation

* Refactoring to use the same code by tf/workflow

* Fixing issue arosen due to refactoring

* Fixing the source env variable issue

* Changing for 7 days run period and changing logrotation config
  • Loading branch information
raj-prince authored Mar 15, 2023
1 parent 1901dde commit 2ad9b90
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,15 @@ cd "${KOKORO_ARTIFACTS_DIR}/github/gcsfuse"
echo "Building docker image containing all pytorch libraries..."
sudo docker build . -f perfmetrics/scripts/ml_tests/pytorch/dino/Dockerfile --tag pytorch-gcsfuse

mkdir container_artifacts
mkdir -p container_artifacts

echo "Running the docker image build in the previous step..."
sudo docker run --runtime=nvidia --name=pytorch_automation_container --privileged -d -v ${KOKORO_ARTIFACTS_DIR}/github/gcsfuse/container_artifacts:/pytorch_dino/run_artifacts:rw,rshared \
--shm-size=128g pytorch-gcsfuse:latest

echo "Setting up cron job to delete the gcsfuse_logs."
echo "0 */1 * * * cd ${KOKORO_ARTIFACTS_DIR}/github/gcsfuse && sudo sh ./perfmetrics/scripts/ml_tests/smart_log_deleter.sh container_artifacts/gcsfuse_logs/" | crontab -
# Setup the log_rotation.
chmod +x perfmetrics/scripts/ml_tests/setup_log_rotation.sh
source perfmetrics/scripts/ml_tests/setup_log_rotation.sh ${KOKORO_ARTIFACTS_DIR}/github/gcsfuse/container_artifacts/gcsfuse.log

# Wait for the script completion as well as logs output.
sudo docker logs -f pytorch_automation_container
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
build_file: "gcsfuse/perfmetrics/scripts/continuous_test/ml_tests/pytorch/dino/build.sh"

# Setting the 16 days (23040 mins) timeout for kokoro-jobs.
timeout_mins: 23040
# Setting the 8 days (11520 mins) timeout for kokoro-jobs.
timeout_mins: 11520


# Post the gcsfuse logs as an artifact to GCSBucket
Expand Down
7 changes: 2 additions & 5 deletions perfmetrics/scripts/ml_tests/pytorch/dino/setup_container.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,10 @@ wget -O go_tar.tar.gz https://go.dev/dl/go1.19.5.linux-amd64.tar.gz
rm -rf /usr/local/go && tar -C /usr/local -xzf go_tar.tar.gz
export PATH=$PATH:/usr/local/go/bin

# Todo: please update the branch, when log-rotation changes are merged.
# Log-rotation branch will create the logs.txt file after every 6 hours.
# Hence, we need to setup the job to delete the logs file if not required.
git clone https://github.com/GoogleCloudPlatform/gcsfuse.git
cd gcsfuse
git checkout log_rotation
go build .
cd -

Expand All @@ -22,11 +20,10 @@ nohup /pytorch_dino/gcsfuse/gcsfuse --foreground --type-cache-ttl=1728000s \
--stat-cache-capacity=1320000 \
--stackdriver-export-interval=60s \
--implicit-dirs \
--experimental-enable-storage-client-library \
--max-conns-per-host=100 \
--debug_fs \
--debug_gcs \
--log-file run_artifacts/gcsfuse_logs/logs.txt \
--log-file run_artifacts/gcsfuse.log \
--log-format text \
gcsfuse-ml-data gcsfuse_data > "run_artifacts/gcsfuse.out" 2> "run_artifacts/gcsfuse.err" &

Expand Down Expand Up @@ -67,7 +64,7 @@ python3 -m torch.distributed.launch \
--norm_last_layer False \
--use_fp16 False \
--clip_grad 0 \
--epochs 200 \
--epochs 100 \
--global_crops_scale 0.25 1.0 \
--local_crops_number 10 \
--local_crops_scale 0.05 0.25 \
Expand Down
53 changes: 53 additions & 0 deletions perfmetrics/scripts/ml_tests/setup_log_rotation.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/bin/bash

# This will setup the rotation of log-file present at the $1
# Please provide the absolute path of log-file.

log_file=$1
echo "Creating logrotate configuration..."
cat << EOF | sudo tee ${KOKORO_ARTIFACTS_DIR}/github/gcsfuse/gcsfuse_logrotate.conf
${log_file} {
su root adm
rotate 3
size 1G
missingok
notifempty
compress
dateext
dateformat -%Y%m%d-%s
copytruncate
}
EOF

# Set the correct access permission to the config file.
sudo chmod 0644 ${KOKORO_ARTIFACTS_DIR}/github/gcsfuse/gcsfuse_logrotate.conf
sudo chown root ${KOKORO_ARTIFACTS_DIR}/github/gcsfuse/gcsfuse_logrotate.conf

# Make sure logrotate installed on the system.
if test -x /usr/sbin/logrotate ; then
echo "Logrotate already installed on the system."
else
echo "Installing logrotate on the system..."
sudo apt-get install logrotate
fi

# Add a shell script which will be run hourly, which eventually executes the
# command to rotate the logs according to config present in /etc/logrotate.hourly.d
cat << EOF | sudo tee /etc/cron.hourly/gcsfuse_logrotate
#!/bin/bash
test -x /usr/sbin/logrotate || exit 0
/usr/sbin/logrotate ${KOKORO_ARTIFACTS_DIR}/github/gcsfuse/gcsfuse_logrotate.conf --state ${KOKORO_ARTIFACTS_DIR}/github/gcsfuse/gcsfuse_logrotate_status
EOF

# Make sure, we have hourly logrotate setup inplace correctly.
if [ $? -eq 0 ]; then
echo "Hourly cron setup for logrotate completed successfully"
else
echo "Please install linux package - cron"
exit 1
fi

sudo chmod 775 /etc/cron.hourly/gcsfuse_logrotate

# Restart the cron service
sudo service cron restart

0 comments on commit 2ad9b90

Please sign in to comment.