Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

scripts: check health status for all clusters #244

Merged
merged 4 commits into from
Dec 29, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions scripts/pegasus_check_clusters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/usr/bin/python
#
# Copyright (c) 2018, Xiaomi, Inc. All rights reserved.
# This source code is licensed under the Apache License Version 2.0, which
# can be found in the LICENSE file in the root directory of this source tree.
"""
Basic usage:

> vim ~/.bashrc
export PYTHONPATH=$PYTHONPATH:$HOME/.local/lib/python2.7/site-packages/
export PEGASUS_CONFIG_PATH=$HOME/work/conf_pegasus
export PEGASUS_SHELL_PATH=$HOME/work/pegasus
> pip install --user click
> ./pegasus_check_clusters.py --env c3srv
"""

import os
import click

from py_utils import *


@click.command()
@click.option(
"--env", default="", help="Env of pegasus cluster, eg. c3srv or c4tst")
@click.option('-v', '--verbose', count=True)
def main(env, verbose):
pegasus_config_path = os.getenv("PEGASUS_CONFIG_PATH")
if pegasus_config_path is None:
echo(
"Please configure environment variable PEGASUS_CONFIG_PATH in your bashrc or zshrc",
"red")
exit(1)
if env != "":
echo("env = " + env)
set_global_verbose(verbose)
clusters = list_pegasus_clusters(pegasus_config_path, env)
for cluster in clusters:
echo("=== " + cluster.name())
try:
cluster.print_imbalance_nodes()
cluster.print_unhealthy_partitions()
except RuntimeError as e:
echo(str(e), "red")
return
echo("===")


if __name__ == "__main__":
main()
61 changes: 61 additions & 0 deletions scripts/pegasus_check_ports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/python
#
# Copyright (c) 2018, Xiaomi, Inc. All rights reserved.
# This source code is licensed under the Apache License Version 2.0, which
# can be found in the LICENSE file in the root directory of this source tree.
"""
Basic usage:

> vim ~/.bashrc
export PYTHONPATH=$PYTHONPATH:$HOME/.local/lib/python2.7/site-packages/
export PEGASUS_CONFIG_PATH=$HOME/work/conf_pegasus
export PEGASUS_SHELL_PATH=$HOME/work/pegasus
> pip install --user click
> ./pegasus_check_posts.py --env c3srv
"""

import os
import click

from py_utils import *


@click.command()
@click.option("--env", help="Env of pegasus cluster, eg. c3srv or c4tst")
def main(env):
pegasus_config_path = os.getenv("PEGASUS_CONFIG_PATH")
if pegasus_config_path is None:
echo(
"Please configure environment variable PEGASUS_CONFIG_PATH in your bashrc or zshrc",
"red")
exit(1)
clusters = list_pegasus_clusters(pegasus_config_path, env)
host_to_ports = {}
for cluster in clusters:
try:
p = cluster.get_meta_port()
h = cluster.get_meta_host()
if not h in host_to_ports:
host_to_ports[h] = set()
if p in host_to_ports[h]:
echo(
"port number conflicted: {0} {1} [{2}]".format(
p, cluster.name(), h), "red")
continue
host_to_ports[h].add(p)
echo("cluster {0}: {1} [{2}]".format(cluster.name(), p, h))
except RuntimeError as e:
echo(str(e), "red")
return

echo("")
for h in host_to_ports:
echo("recommended port number for [{0}] is: {1}".format(
h, str(max(host_to_ports[h]) + 1000)))
echo("host [{0}] has in total {1} clusters on it".format(
h, len(host_to_ports[h])))
echo("")


if __name__ == "__main__":
main()
11 changes: 11 additions & 0 deletions scripts/py_utils/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/usr/bin/python
#
# Copyright (c) 2018, Xiaomi, Inc. All rights reserved.
# This source code is licensed under the Apache License Version 2.0, which
# can be found in the LICENSE file in the root directory of this source tree.

from .lib import set_global_verbose, echo, list_pegasus_clusters, PegasusCluster

__all__ = [
'set_global_verbose', 'echo', 'list_pegasus_clusters', 'PegasusCluster'
]
126 changes: 126 additions & 0 deletions scripts/py_utils/lib.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
#!/usr/bin/python
#
# Copyright (c) 2018, Xiaomi, Inc. All rights reserved.
# This source code is licensed under the Apache License Version 2.0, which
# can be found in the LICENSE file in the root directory of this source tree.

import click
import commands
import os

_global_verbose = False


def set_global_verbose(val):
_global_verbose = val


def echo(message, color=None):
click.echo(click.style(message, fg=color))


class PegasusCluster(object):
def __init__(self, cfg_file_name):
self._cluster_name = os.path.basename(cfg_file_name).replace(
"pegasus-", "").replace(".cfg", "")
self._shell_path = os.getenv("PEGASUS_SHELL_PATH")
self._cfg_file_name = cfg_file_name
if self._shell_path is None:
echo(
"Please configure environment variable PEGASUS_SHELL_PATH in your bashrc or zshrc",
"red")
exit(1)

def print_unhealthy_partitions(self):
list_detail = self._run_shell("ls -d").strip()

read_unhealthy_app_count = int([
line for line in list_detail.splitlines()
if line.startswith("read_unhealthy_app_count")
][0].split(":")[1])
write_unhealthy_app_count = int([
line for line in list_detail.splitlines()
if line.startswith("write_unhealthy_app_count")
][0].split(":")[1])

if write_unhealthy_app_count > 0:
echo("cluster is write unhealthy, write_unhealthy_app_count = " +
str(write_unhealthy_app_count))
return
if read_unhealthy_app_count > 0:
echo("cluster is read unhealthy, read_unhealthy_app_count = " +
str(read_unhealthy_app_count))
return

def print_imbalance_nodes(self):
nodes_detail = self._run_shell("nodes -d").strip()

primaries_per_node = []
for line in nodes_detail.splitlines()[1:]:
columns = line.strip().split()
if len(columns) < 5 or not columns[4].isdigit():
continue
primary_count = int(columns[3])
primaries_per_node.append(primary_count)
primaries_per_node.sort()
if float(primaries_per_node[0]) / float(primaries_per_node[-1]) < 0.8:
print nodes_detail

def get_meta_port(self):
with open(self._cfg_file_name) as cfg:
for line in cfg.readlines():
if line.strip().startswith("base_port"):
return int(line.split("=")[1])

def get_meta_host(self):
with open(self._cfg_file_name) as cfg:
for line in cfg.readlines():
if line.strip().startswith("host.0"):
return line.split("=")[1].strip()

def _run_shell(self, args):
"""
:param args: arguments passed to ./run.sh shell (type `string`)
:return: shell output
"""
global _global_verbose

cmd = "cd {1}; echo {0} | ./run.sh shell -n {2}".format(
args, self._shell_path, self._cluster_name)
if _global_verbose:
echo("executing command: \"{0}\"".format(cmd))

status, output = commands.getstatusoutput(cmd)
if status != 0:
raise RuntimeError("failed to execute \"{0}\": {1}".format(
cmd, output))

result = ""
result_begin = False
for line in output.splitlines():
if line.startswith("The cluster meta list is:"):
result_begin = True
continue
if line.startswith("dsn exit with code"):
break
if result_begin:
result += line + "\n"
return result

def name(self):
return self._cluster_name


def list_pegasus_clusters(config_path, env):
clusters = []
for fname in os.listdir(config_path):
if not os.path.isfile(config_path + "/" + fname):
continue
if not fname.startswith("pegasus-" + env):
continue
if not fname.endswith(".cfg"):
continue
if fname.endswith("proxy.cfg"):
continue
clusters.append(PegasusCluster(config_path + "/" + fname))
return clusters