Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

scripts: support minos 2.0 #298

Merged
merged 5 commits into from
Mar 6, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 125 additions & 0 deletions scripts/minos_common.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#!/bin/bash

#
# You should set these environment variables:
# * MINOS_CONFIG_FILE
# * MINOS_CLIENT_DIR
# * MINOS2_CONFIG_FILE
# * MINOS2_CLIENT_DIR
#
# For example:
# export MINOS_CONFIG_FILE=$HOME/infra/deployment-config/deploy.cfg
# export MINOS_CLIENT_DIR=$HOME/infra/minos/client
# export MINOS2_CONFIG_FILE=$HOME/infra/deployment/deploy.cfg
# export MINOS2_CLIENT_DIR=$HOME/infra/minos2/client
#

# usage: find_cluster <cluster_name>
#
# return 0 if found
#
# if found, then these global variables will be set:
# * minos_type
# * minos_config
# * minos_client_dir
function find_cluster()
{
if [ -n "$MINOS_CONFIG_FILE" -a -n "$MINOS_CLIENT_DIR" ]; then
minos_type=1
minos_config=$(dirname $MINOS_CONFIG_FILE)/xiaomi-config/conf/pegasus/pegasus-${1}.cfg
minos_client_dir=$MINOS_CLIENT_DIR
if [ -f "$minos_config" -a -f "$minos_client_dir/deploy" ]; then
return 0
fi
fi

if [ -n "$MINOS2_CONFIG_FILE" -a -n "$MINOS2_CLIENT_DIR" ]; then
minos_type=2
minos_config=$(dirname $MINOS2_CONFIG_FILE)/xiaomi-config/conf/pegasus/pegasus-${1}.yaml
minos_client_dir=$MINOS2_CLIENT_DIR
if [ -f "$minos_config" -a -f "$minos_client_dir/deploy" ]; then
return 0
fi
fi

return 1
}

# usage: minos_show_replica <cluster_name> <result_file>
function minos_show_replica()
{
local pwd=`pwd`
local tmp_file="/tmp/$UID.$PID.pegasus.minos.show"
cd $minos_client_dir
./deploy show pegasus $1 --job replica &>$tmp_file
if [ $? -ne 0 ]; then
echo "ERROR: minos show replica failed, refer to $tmp_file"
exit 1
fi
if [ $minos_type -eq 1 ]; then
grep -o 'Showing task [0-9][0-9]* of replica on [^(]*' $tmp_file | awk '{print $3,$7}' >$2
else
grep -o 'Task [0-9][0-9]* of replica on [^:]*' $tmp_file | awk '{print $2,$6}' >$2
fi
cd $pwd
}

# usage: minos_rolling_update <cluster_name> <job_name> [task_id]
function minos_rolling_update()
{
local pwd=`pwd`
local options="--job $2"
if [ -n "$3" ]; then
options="$options --task $3"
fi
options="$options --update_package --update_config --time_interval 20 --skip_confirm"
if [ $minos_type -eq 2 ]; then
options="$options --confirm_install"
fi
cd $minos_client_dir
echo "./deploy rolling_update pegasus $1 $options"
./deploy rolling_update pegasus $1 $options
if [ $? -ne 0 ]; then
echo "ERROR: minos rolling update failed"
exit 1
fi
cd $pwd
}

# usage: minos_stop <cluster_name> <job_name> [task_id]
function minos_stop()
{
local pwd=`pwd`
local options="--job $2"
if [ -n "$3" ]; then
options="$options --task $3"
fi
options="$options --skip_confirm"
cd $minos_client_dir
echo "./deploy stop pegasus $1 $options"
./deploy stop pegasus $1 $options
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minos 2.0 也可以直接stop?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

可以的,我试过

if [ $? -ne 0 ]; then
echo "ERROR: minos stop failed"
exit 1
fi
cd $pwd
}

# usage: minos_restart <cluster_name> <job_name> [task_id]
function minos_restart()
{
local pwd=`pwd`
local options="--job $2"
if [ -n "$3" ]; then
options="$options --task $3"
fi
options="$options --skip_confirm"
cd $minos_client_dir
echo "./deploy restart pegasus $1 $options"
./deploy restart pegasus $1 $options
if [ $? -ne 0 ]; then
echo "ERROR: minos restart failed"
exit 1
fi
cd $pwd
}
52 changes: 9 additions & 43 deletions scripts/pegasus_migrate_zookeeper.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,12 @@ target_zk=$3

pwd="$( cd "$( dirname "$0" )" && pwd )"
shell_dir="$( cd $pwd/.. && pwd )"
minos_config_dir=$(dirname $MINOS_CONFIG_FILE)/xiaomi-config/conf/pegasus
minos_client_dir=/home/work/pegasus/infra/minos/client
cd $shell_dir

minos_config=$minos_config_dir/pegasus-${cluster}.cfg
if [ ! -f $minos_config ]; then
echo "ERROR: minos config \"$minos_config\" not found"
exit 1
fi

minos_client=$minos_client_dir/deploy
if [ ! -f $minos_client ]; then
echo "ERROR: minos client \"$minos_client\" not found"
source ./scripts/minos_common.sh
find_cluster $cluster
if [ $? -ne 0 ]; then
echo "ERROR: cluster \"$cluster\" not found"
exit 1
fi

Expand Down Expand Up @@ -85,41 +78,20 @@ sed -i "s/ recover_from_replica_server = .*/ recover_from_replica_server = true/
sed -i "s/ hosts_list = .*/ hosts_list = ${target_zk}/" $minos_config

echo ">>>> Stopping all meta-servers..."
cd $minos_client_dir
./deploy stop pegasus $cluster --skip_confirm --job meta 2>&1 | tee /tmp/$UID.$PID.pegasus.migrate_zookeeper.minos.stop.meta.all
cd $shell_dir
minos_stop $cluster meta

echo ">>>> Sleep for 15 seconds..."
sleep 15

function rolling_update_meta()
{
task_id=$1
cd $minos_client_dir
./deploy rolling_update pegasus $cluster --skip_confirm --time_interval 10 --update_config --job meta --task $task_id 2>&1 | tee /tmp/$UID.$PID.pegasus.migrate_zookeeper.minos.rolling.meta.$task_id
if [ `cat /tmp/$UID.$PID.pegasus.migrate_zookeeper.minos.rolling.meta.$task_id | grep "Start task $task_id of meta .* success" | wc -l` -ne 1 ]; then
echo "ERROR: rolling update meta-servers task $task_id failed, refer to /tmp/$UID.$PID.pegasus.migrate_zookeeper.minos.rolling.meta.$task_id"
cd $shell_dir
return 1
fi
cd $shell_dir
return 0
}

function undo()
{
echo ">>>> Undo..."
mv -f ${minos_config}.bak $minos_config
rolling_update_meta 0
rolling_update_meta 1
minos_rolling_update $cluster meta
}

echo ">>>> Rolling update meta-server task 0..."
rolling_update_meta 0
if [ $? -ne 0 ]; then
undo
exit 1
fi
minos_rolling_update $cluster meta 0

echo ">>>> Sending recover command..."
echo "recover -f ${cluster}.recover.nodes" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.migrate_zookeeper.shell.recover
Expand Down Expand Up @@ -152,16 +124,10 @@ echo ">>>> Modifying config..."
sed -i "s/ recover_from_replica_server = .*/ recover_from_replica_server = false/" $minos_config

echo ">>>> Rolling update meta-server task 1..."
rolling_update_meta 1
if [ $? -ne 0 ]; then
exit 1
fi
minos_rolling_update $cluster meta 1

echo ">>>> Rolling update meta-server task 0..."
rolling_update_meta 0
if [ $? -ne 0 ]; then
exit 1
fi
minos_rolling_update $cluster meta 0

echo ">>>> Querying cluster info..."
echo "cluster_info" | ./run.sh shell --cluster $meta_list
Expand Down
33 changes: 10 additions & 23 deletions scripts/pegasus_offline_node.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,12 @@ replica_task_id=$3

pwd="$( cd "$( dirname "$0" )" && pwd )"
shell_dir="$( cd $pwd/.. && pwd )"
minos_config_dir=$(dirname $MINOS_CONFIG_FILE)/xiaomi-config/conf/pegasus
minos_client_dir=/home/work/pegasus/infra/minos/client
cd $shell_dir

minos_config=$minos_config_dir/pegasus-${cluster}.cfg
if [ ! -f $minos_config ]; then
echo "ERROR: minos config \"$minos_config\" not found"
exit 1
fi

minos_client=$minos_client_dir/deploy
if [ ! -f $minos_client ]; then
echo "ERROR: minos client \"$minos_client\" not found"
source ./scripts/minos_common.sh
find_cluster $cluster
if [ $? -ne 0 ]; then
echo "ERROR: cluster \"$cluster\" not found"
exit 1
fi

Expand All @@ -42,18 +35,14 @@ echo "Start time: `date`"
all_start_time=$((`date +%s`))
echo

echo "Generating /tmp/$UID.$PID.pegasus.offline_node.minos.show..."
cd $minos_client_dir
./deploy show pegasus $cluster &>/tmp/$UID.$PID.pegasus.offline_node.minos.show

echo "Generating /tmp/$UID.$PID.pegasus.offline_node.rs.list..."
grep 'Showing task [0-9][0-9]* of replica' /tmp/$UID.$PID.pegasus.offline_node.minos.show | awk '{print $5,$9}' | sed 's/(.*)$//' >/tmp/$UID.$PID.pegasus.offline_node.rs.list
replica_server_count=`cat /tmp/$UID.$PID.pegasus.offline_node.rs.list | wc -l`
rs_list_file="/tmp/$UID.$PID.pegasus.rolling_update.rs.list"
echo "Generating $rs_list_file..."
minos_show_replica $cluster $rs_list_file
replica_server_count=`cat $rs_list_file | wc -l`
if [ $replica_server_count -eq 0 ]; then
echo "ERROR: replica server count is 0 by minos show"
exit 1
fi
cd $shell_dir

echo "Generating /tmp/$UID.$PID.pegasus.offline_node.cluster_info..."
echo cluster_info | ./run.sh shell --cluster $meta_list 2>&1 | sed 's/ *$//' >/tmp/$UID.$PID.pegasus.offline_node.cluster_info
Expand Down Expand Up @@ -161,9 +150,7 @@ do
sleep 1

echo "Stop node by minos..."
cd $minos_client_dir
./deploy stop pegasus $cluster --skip_confirm --job replica --task $task_id
cd $shell_dir
minos_stop $cluster replica $task_id
echo "Stop node by minos done."
echo
sleep 1
Expand All @@ -182,7 +169,7 @@ do
done
echo
sleep 1
done </tmp/$UID.$PID.pegasus.offline_node.rs.list
done <$rs_list_file

echo "Set lb.assign_delay_ms to DEFAULT..."
echo "remote_command -l $pmeta meta.lb.assign_delay_ms DEFAULT" | ./run.sh shell --cluster $meta_list &>/tmp/$UID.$PID.pegasus.offline_node.assign_delay_ms
Expand Down
40 changes: 16 additions & 24 deletions scripts/pegasus_offline_node_list.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,12 @@ replica_task_id_list=$3

pwd="$( cd "$( dirname "$0" )" && pwd )"
shell_dir="$( cd $pwd/.. && pwd )"
minos_config_dir=$(dirname $MINOS_CONFIG_FILE)/xiaomi-config/conf/pegasus
minos_client_dir=/home/work/pegasus/infra/minos/client
cd $shell_dir

minos_config=$minos_config_dir/pegasus-${cluster}.cfg
if [ ! -f $minos_config ]; then
echo "ERROR: minos config \"$minos_config\" not found"
exit 1
fi

minos_client=$minos_client_dir/deploy
if [ ! -f $minos_client ]; then
echo "ERROR: minos client \"$minos_client\" not found"
source ./scripts/minos_common.sh
find_cluster $cluster
if [ $? -ne 0 ]; then
echo "ERROR: cluster \"$cluster\" not found"
exit 1
fi

Expand All @@ -42,18 +35,15 @@ echo "Start time: `date`"
all_start_time=$((`date +%s`))
echo

echo "Generating /tmp/$UID.$PID.pegasus.offline_node_list.minos.show..."
cd $minos_client_dir
./deploy show pegasus $cluster &>/tmp/$UID.$PID.pegasus.offline_node_list.minos.show

echo "Generating /tmp/$UID.$PID.pegasus.offline_node_list.rs.list..."
grep 'Showing task [0-9][0-9]* of replica' /tmp/$UID.$PID.pegasus.offline_node_list.minos.show | awk '{print $5,$9}' | sed 's/(.*)$//' >/tmp/$UID.$PID.pegasus.offline_node_list.rs.list
replica_server_count=`cat /tmp/$UID.$PID.pegasus.offline_node_list.rs.list | wc -l`
rs_list_file="/tmp/$UID.$PID.pegasus.rolling_update.rs.list"
echo "Generating $rs_list_file..."
minos_show_replica $cluster $rs_list_file
replica_server_count=`cat $rs_list_file | wc -l`
if [ $replica_server_count -eq 0 ]; then
echo "ERROR: replica server count is 0 by minos show"
exit 1
fi
cd $shell_dir


echo "Generating /tmp/$UID.$PID.pegasus.offline_node_list.cluster_info..."
echo cluster_info | ./run.sh shell --cluster $meta_list 2>&1 | sed 's/ *$//' >/tmp/$UID.$PID.pegasus.offline_node_list.cluster_info
Expand Down Expand Up @@ -86,18 +76,20 @@ for id in `echo $replica_task_id_list | sed 's/,/ /g'` ; do
exit 1;
fi
fi
pair=`grep "^$id " /tmp/$UID.$PID.pegasus.offline_node_list.rs.list`
pair=`grep "^$id " $rs_list_file`
if [ "$pair" == "" ]; then
echo "ERROR: replica task id $id not found, refer to /tmp/$UID.$PID.pegasus.offline_node_list.minos.show"
echo "ERROR: replica task id $id not found, refer to $rs_list_file"
exit 1;
fi
address=`echo $pair | awk '{print $2}'`
node_str=`echo $pair | awk '{print $2}'`
node_ip=`getent hosts $node_str | awk '{print $1}'`
node=${node_ip}:${rs_port}
if [ "$id_list" != "" ]; then
id_list="$id_list $id"
address_list="$address_list,$address:$rs_port"
address_list="$address_list,$node"
else
id_list="$id"
address_list="$address:$rs_port"
address_list="$node"
fi
done

Expand Down
Loading