diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index f94d0ccb..54c8e889 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -13,7 +13,7 @@ jobs: container-test-job: runs-on: ubuntu-latest container: - image: scslab/coeus:0.9.1 + image: scslab/coeus:0.9.3 steps: - name: Check for dockerenv file run: (ls /.dockerenv && echo Found dockerenv) || (echo No dockerenv) diff --git a/CI/jarvis-util/.coveragerc b/CI/jarvis-util/.coveragerc new file mode 100644 index 00000000..f1db9952 --- /dev/null +++ b/CI/jarvis-util/.coveragerc @@ -0,0 +1,4 @@ +# .coveragerc +[run] +source = . +omit = *test* \ No newline at end of file diff --git a/CI/jarvis-util/.gitignore b/CI/jarvis-util/.gitignore new file mode 100644 index 00000000..80ced04e --- /dev/null +++ b/CI/jarvis-util/.gitignore @@ -0,0 +1,139 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class +.idea +hostfile.txt +lcov.info + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ +/.idea/jarvis-cd.iml +/.idea/misc.xml +/.idea/modules.xml +/.idea/inspectionProfiles/profiles_settings.xml +/.idea/inspectionProfiles/Project_Default.xml +/.idea/vcs.xml +/.idea/deployment.xml diff --git a/CI/jarvis-util/LICENSE b/CI/jarvis-util/LICENSE new file mode 100644 index 00000000..5db7c2cc --- /dev/null +++ b/CI/jarvis-util/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2022-present Luke Logan and other contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/CI/jarvis-util/README.md b/CI/jarvis-util/README.md new file mode 100644 index 00000000..711a1466 --- /dev/null +++ b/CI/jarvis-util/README.md @@ -0,0 +1,118 @@ +# Jarvis UTIL + +Jarvis-util is a library which contains various utilities to aid with +creating shell scripts within Python. This library contains wrappers +for executing shell commands locally, SSH, SCP, MPI, argument parsing, +and various other random utilities. + +![Build](https://github.com/lukemartinlogan/jarvis-util/workflows/GitHub%20Actions/badge.svg) + +[![Coverage Status](https://coveralls.io/repos/github/lukemartinlogan/jarvis-util/badge.svg?branch=master)](https://coveralls.io/github/lukemartinlogan/jarvis-util?branch=master) + +## Installation + +For now, we only consider manual installation +```bash +cd jarvis-util +python3 -m pip install -r requirements.txt +python3 -m pip install -e . +``` + +## Executing a program + +The following code will execute a command on the local machine. +The output will NOT be collected into an in-memory buffer. +The output will be printed to the terminal as it occurs. + +```python +from jarvis_util.shell.exec import Exec +from jarvis_util.shell.local_exec import LocalExecInfo + +node = Exec('echo hello', LocalExecInfo(collect_output=False)) +``` + +Programs can also be executed asynchronously: +```python +from jarvis_util.shell.exec import Exec +from jarvis_util.shell.local_exec import LocalExecInfo + +node = Exec('echo hello', LocalExecInfo(collect_output=False, + exec_async=True)) +node.wait() +``` + +Various examples of outputs: +```python +from jarvis_util.shell.exec import Exec +from jarvis_util.shell.local_exec import LocalExecInfo + +# Will ONLY print to the terminal +node = Exec('echo hello', LocalExecInfo(collect_output=False)) +# Will collect AND print to the terminal +node = Exec('echo hello', LocalExecInfo(collect_output=True)) +# Will collect BUT NOT print to the terminal +node = Exec('echo hello', LocalExecInfo(collect_output=True, + hide_output=True)) +# Will collect, pipe to file, and print to terminal +node = Exec('echo hello', LocalExecInfo(collect_output=True, + pipe_stdout='/tmp/stdout.txt', + pipe_stderr='/tmp/stderr.txt')) +``` + +This is useful if you have a program which runs using a daemon mode. + +## Executing an MPI program + +The following code will execute the "hostname" command on the local +machine 24 times using MPI. + +```python +from jarvis_util.shell.exec import Exec +from jarvis_util.shell.mpi_exec import MpiExecInfo + +node = Exec('hostname', MpiExecInfo(hostfile=None, + nprocs=24, + ppn=None, + collect_output=False)) +``` + +## Executing an SSH program + +The following code will execute the "hostname" command on all machines +in the hostfile + +```python +from jarvis_util.shell.exec import Exec +from jarvis_util.shell.pssh_exec import PsshExecInfo + +node = Exec('hostname', PsshExecInfo(hostfile="/tmp/hostfile.txt", + collect_output=False)) +``` + +## The contents of a hostfile + +A hostfile can have the following syntax: +``` +ares-comp-01 +ares-comp-[02-04] +ares-comp-[05-09,11,12-14]-40g +``` + +These will be expanded internally by PSSH and MPI. + +# Unit tests + +We run our unit tests in a docker container, which is located underneath +the CI directory. This is because we need to test multi-node functionality, +without having multiple nodes. To setup unit testing, perform the following: + +1. Install Docker +2. Setup our "ci" container +3. Run the unit tests + +``` +``` + +# Contributing + +We use the Google Python Style guide (pylintrc). \ No newline at end of file diff --git a/CI/jarvis-util/bin/jarvis-imports b/CI/jarvis-util/bin/jarvis-imports new file mode 100644 index 00000000..25fc19a9 --- /dev/null +++ b/CI/jarvis-util/bin/jarvis-imports @@ -0,0 +1,8 @@ +#!/usr/bin/env python3 + +from jarvis_util.util.import_all import * +import pathlib +import os + + +build_global_import_from_bin('jarvis_util') diff --git a/CI/jarvis-util/ci/cluster/Dockerfile b/CI/jarvis-util/ci/cluster/Dockerfile new file mode 100644 index 00000000..548937e0 --- /dev/null +++ b/CI/jarvis-util/ci/cluster/Dockerfile @@ -0,0 +1,55 @@ +# Install ubuntu 20.04 +FROM ubuntu:20.04 +LABEL maintainer="llogan@hawk.iit.edu" +LABEL version="0.0" +LABEL description="An example docker image" + +# Disable Prompt During Packages Installation +ARG DEBIAN_FRONTEND=noninteractive + +# Update ubuntu +RUN apt update && apt install + +# Install some basic packages +RUN apt install -y \ + openssh-server \ + sudo git nano vim \ + docker \ + mpich \ + gcc \ + g++ \ + gfortran \ + libtool \ + libtool-bin \ + automake \ + autoconf + +# Create a new user +RUN useradd -m sshuser +RUN usermod -aG sudo sshuser +RUN passwd -d sshuser + +# Copy the host's SSH keys +# Docker requires COPY be relative to the current working +# directory. We cannot pass ~/.ssh/id_rsa unfortunately... +ENV SSHDIR=/home/sshuser/.ssh +RUN sudo -u sshuser mkdir ${SSHDIR} +COPY id_rsa ${SSHDIR}/id_rsa +COPY id_rsa.pub ${SSHDIR}/id_rsa.pub + +# Authorize host SSH keys +RUN sudo -u sshuser touch ${SSHDIR}/authorized_keys +RUN cat ${SSHDIR}/id_rsa.pub >> ${SSHDIR}/authorized_keys + +# Set SSH permissions +RUN chmod 700 ${SSHDIR} +RUN chmod 644 ${SSHDIR}/id_rsa.pub +RUN chmod 600 ${SSHDIR}/id_rsa +RUN chmod 600 ${SSHDIR}/authorized_keys + +# Enable passwordless SSH +RUN sed -i 's/#PermitEmptyPasswords no/PermitEmptyPasswords yes/' /etc/ssh/sshd_config + +# Start SSHD and wait forever +RUN mkdir /run/sshd +CMD ["/usr/sbin/sshd", "-D"] \ No newline at end of file diff --git a/CI/jarvis-util/ci/cluster/docker-compose.yml b/CI/jarvis-util/ci/cluster/docker-compose.yml new file mode 100644 index 00000000..aa747708 --- /dev/null +++ b/CI/jarvis-util/ci/cluster/docker-compose.yml @@ -0,0 +1,24 @@ +version: "3" + +services: + node1: + build: . + links: + - node2 + networks: + - net + hostname: node1 + stdin_open: true + tty: true + + node2: + build: . + networks: + - net + hostname: node2 + stdin_open: true + tty: true + +networks: + net: + driver: bridge \ No newline at end of file diff --git a/CI/jarvis-util/ci/install_deps.sh b/CI/jarvis-util/ci/install_deps.sh new file mode 100644 index 00000000..0d1f0447 --- /dev/null +++ b/CI/jarvis-util/ci/install_deps.sh @@ -0,0 +1,12 @@ +#!/bin/bash +sudo apt update +sudo apt install -y \ +docker \ +mpich \ +gcc \ +g++ \ +gfortran \ +libtool \ +libtool-bin \ +automake \ +autoconf \ No newline at end of file diff --git a/CI/jarvis-util/ci/install_jarvis.sh b/CI/jarvis-util/ci/install_jarvis.sh new file mode 100644 index 00000000..58aa283c --- /dev/null +++ b/CI/jarvis-util/ci/install_jarvis.sh @@ -0,0 +1,3 @@ +#!/bin/bash +python3 -m pip install -r requirements.txt +python3 -m pip install -e . diff --git a/CI/jarvis-util/ci/install_spack.sh b/CI/jarvis-util/ci/install_spack.sh new file mode 100644 index 00000000..0d1f0447 --- /dev/null +++ b/CI/jarvis-util/ci/install_spack.sh @@ -0,0 +1,12 @@ +#!/bin/bash +sudo apt update +sudo apt install -y \ +docker \ +mpich \ +gcc \ +g++ \ +gfortran \ +libtool \ +libtool-bin \ +automake \ +autoconf \ No newline at end of file diff --git a/CI/jarvis-util/ci/lint.sh b/CI/jarvis-util/ci/lint.sh new file mode 100644 index 00000000..a1af3ff4 --- /dev/null +++ b/CI/jarvis-util/ci/lint.sh @@ -0,0 +1,2 @@ +#!/bin/bash +pylint "${PWD}"/jarvis_util \ No newline at end of file diff --git a/CI/jarvis-util/ci/run_tests.sh b/CI/jarvis-util/ci/run_tests.sh new file mode 100644 index 00000000..abd4e290 --- /dev/null +++ b/CI/jarvis-util/ci/run_tests.sh @@ -0,0 +1,5 @@ +#!/bin/bash +coverage run -m pytest +rm -rf "*.pyc" +coverage report +coverage-lcov \ No newline at end of file diff --git a/CI/jarvis-util/jarvis_util/__init__.py b/CI/jarvis-util/jarvis_util/__init__.py new file mode 100644 index 00000000..1019dd21 --- /dev/null +++ b/CI/jarvis-util/jarvis_util/__init__.py @@ -0,0 +1,25 @@ +"""Import all modules""" +from jarvis_util.util.expand_env import * +from jarvis_util.util.naming import * +from jarvis_util.util.hostfile import * +from jarvis_util.util.size_conv import * +from jarvis_util.util.import_all import * +from jarvis_util.util.import_mod import * +from jarvis_util.util.argparse import * +from jarvis_util.serialize.ini_file import * +from jarvis_util.serialize.yaml_file import * +from jarvis_util.serialize.text_file import * +from jarvis_util.serialize.serializer import * +from jarvis_util.serialize.pickle import * +from jarvis_util.shell.filesystem import * +from jarvis_util.shell.exec import * +from jarvis_util.shell.exec_info import * +from jarvis_util.shell.ssh_exec import * +from jarvis_util.shell.pssh_exec import * +from jarvis_util.shell.process import * +from jarvis_util.shell.pscp import * +from jarvis_util.shell.scp import * +from jarvis_util.shell.mpi_exec import * +from jarvis_util.shell.local_exec import * +from jarvis_util.introspect.system_info import * +from jarvis_util.jutil_manager import * diff --git a/CI/jarvis-util/jarvis_util/introspect/__init__.py b/CI/jarvis-util/jarvis_util/introspect/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/CI/jarvis-util/jarvis_util/introspect/system_info.py b/CI/jarvis-util/jarvis_util/introspect/system_info.py new file mode 100644 index 00000000..5a9d9448 --- /dev/null +++ b/CI/jarvis-util/jarvis_util/introspect/system_info.py @@ -0,0 +1,635 @@ +""" +This module provides methods for querying the information of the host +system. This can be used to make scripts more portable. +""" + +import re +import platform +from jarvis_util.shell.exec import Exec +from jarvis_util.util.size_conv import SizeConv +from jarvis_util.serialize.yaml_file import YamlFile +import json +import pandas as pd +import numpy as np +from enum import Enum +import shlex + +# pylint: disable=C0121 + +class SystemInfo: + """ + This class queries information about the host machine, such as OS, + CPU, and kernel + """ + + instance_ = None + + @staticmethod + def get_instance(): + if SystemInfo.instance_ is None: + SystemInfo.instance_ = SystemInfo() + return SystemInfo.instance_ + + def __init__(self): + with open('/etc/os-release', 'r', encoding='utf-8') as fp: + lines = fp.read().splitlines() + self.os = self._detect_os_type(lines) + self.os_like = self._detect_os_like_type(lines) + self.os_version = self._detect_os_version(lines) + self.ksemantic = platform.platform() + self.krelease = platform.release() + self.ktype = platform.system() + self.cpu = platform.processor() + self.cpu_family = platform.machine() + + def _detect_os_type(self, lines): + for line in lines: + if 'ID=' in line: + if 'ubuntu' in line: + return 'ubuntu' + elif 'centos' in line: + return 'centos' + elif 'debian' in line: + return 'debian' + + def _detect_os_like_type(self, lines): + for line in lines: + if 'ID_LIKE=' in line: + if 'ubuntu' in line: + return 'ubuntu' + elif 'centos' in line: + return 'centos' + elif 'debian' in line: + return 'debian' + + def _detect_os_version(self, lines): + for line in lines: + grp = re.match('VERSION_ID=\"(.*)\"', line) + if grp: + return grp.group(1) + + def __hash__(self): + return hash(str([self.os, self.os_like, + self.os_version, self.ksemantic, + self.krelease, self.ktype, + self.cpu, self.cpu_family])) + + def __eq__(self, other): + return ( + (self.os == other.os) and + (self.os_like == other.os_like) and + (self.os_version == other.os_version) and + (self.ksemantic == other.ksemantic) and + (self.krelease == other.krelease) and + (self.cpu == other.cpu) and + (self.cpu_family == other.cpu_family) + ) + + +class Lsblk(Exec): + """ + List all block devices in the system per-node. Lsblk will return + a JSON output + + A table is stored per-host: + parent: the parent device of the partition (e.g., /dev/sda or NaN) + device: the name of the partition (e.g., /dev/sda1) + size: total size of the partition (bytes) + mount: where the partition is mounted (if anywhere) + model: the exact model of the device + tran: the transport of the device (e.g., /dev/nvme) + rota: whether or not the device is rotational + host: the host this record corresponds to + """ + + def __init__(self, exec_info): + cmd = 'lsblk -o NAME,SIZE,MODEL,TRAN,MOUNTPOINT,ROTA -J -s' + super().__init__(cmd, exec_info.mod(collect_output=True)) + self.exec_async = exec_info.exec_async + self.graph = {} + if not self.exec_async: + self.wait() + + def wait(self): + super().wait() + for host, stdout in self.stdout.items(): + lsblk_data = json.loads(stdout)['blockdevices'] + partitions = [] + devs = {} + for partition in lsblk_data: + dev = partition['children'][0] + partitions.append({ + 'parent': f'/dev/{dev["name"]}', + 'device': f'/dev/{partition["name"]}', + 'size': SizeConv.to_int(partition['size']), + 'mount': partition['mountpoint'], + 'host': host + }) + devs[dev['name']] = { + 'parent': f'/dev/{dev["name"]}', + 'size': SizeConv.to_int(dev['size']), + 'model': dev['model'], + 'tran': dev['tran'].lower(), + 'mount': dev['mountpoint'], + 'rota': dev['rota'], + 'host': host + } + devs = list(devs.values()) + part_df = pd.DataFrame(partitions) + dev_df = pd.DataFrame(devs) + total_df = pd.merge(part_df, + dev_df[['parent', 'model', 'tran', 'host']], + on=['parent', 'host']) + dev_df = dev_df.rename(columns={'parent': 'device'}) + total_df = pd.concat([total_df, dev_df]) + self.df = total_df + + +class Blkid(Exec): + """ + List all filesystems (even those unmounted) and their properties + + Stores a per-host table with the following: + device: the device (or partition) which stores the data (e.g., /dev/sda) + fs_type: the type of filesystem (e.g., ext4) + uuid: filesystem-levle uuid from the FS metadata + partuuid: the partition-lable UUID for the partition + host: the host this entry corresponds to + """ + def __init__(self, exec_info): + cmd = 'blkid' + super().__init__(cmd, exec_info.mod(collect_output=True)) + self.exec_async = exec_info.exec_async + self.graph = {} + if not self.exec_async: + self.wait() + + def wait(self): + super().wait() + for host, stdout in self.stdout.items(): + devices = stdout.splitlines() + dev_list = [] + for dev in devices: + dev_dict = {} + toks = shlex.split(dev) + dev_name = toks[0].split(':')[0] + dev_dict['device'] = dev_name + dev_dict['host'] = host + for tok in toks[1:]: + keyval = tok.split('=') + key = keyval[0].lower() + val = ' '.join(keyval[1:]) + dev_dict[key] = val + dev_list.append(dev_dict) + df = pd.DataFrame(dev_list) + df = df.rename(columns={'type': 'fs_type'}) + self.df = df + + +class ListFses(Exec): + """ + List all mounted filesystems + + Will store a per-host dictionary containing: + device: the device which contains the filesystem + fs_size: total size of the filesystem + used: total nubmer of bytes used + avail: total number of bytes remaining + use%: the percent of capacity used + fs_mount: where the filesystem is mounted + host: the host this entry corresponds to + """ + + def __init__(self, exec_info): + cmd = 'df -h' + super().__init__(cmd, exec_info.mod(collect_output=True)) + self.exec_async = exec_info.exec_async + self.graph = {} + if not self.exec_async: + self.wait() + + def wait(self): + super().wait() + for host, stdout in self.stdout.items(): + lines = stdout.strip().splitlines() + columns = ['device', 'fs_size', 'used', + 'avail', 'use%', 'fs_mount', 'host'] + rows = [line.split() + [host] for line in lines[1:]] + df = pd.DataFrame(rows, columns=columns) + # pylint: disable=W0108 + df.loc[:, 'fs_size'] = df['fs_size'].apply( + lambda x : SizeConv.to_int(x)) + df.loc[:, 'used'] = df['used'].apply( + lambda x: SizeConv.to_int(x)) + df.loc[:, 'avail'] = df['avail'].apply( + lambda x : SizeConv.to_int(x)) + # pylint: enable=W0108 + self.df = df + + +class FiInfo(Exec): + """ + List all networks and their information + provider: network protocol (e.g., sockets, tcp, ib) + fabric: IP address + domain: network domain + version: network version + type: packet type (e.g., DGRAM) + protocol: protocol constant + host: the host this network corresponds to + """ + def __init__(self, exec_info): + super().__init__('fi_info', exec_info.mod(collect_output=True)) + self.exec_async = exec_info.exec_async + self.graph = {} + if not self.exec_async: + self.wait() + + def wait(self): + super().wait() + for host, stdout in self.stdout.items(): + lines = stdout.strip().splitlines() + providers = [] + for line in lines: + if 'provider' in line: + providers.append({ + 'provider': line.split(':')[1], + 'host': host + }) + else: + splits = line.split(':') + key = splits[0].strip() + val = splits[1].strip() + if 'fabric' in key: + val = val.split('/')[0] + providers[-1][key] = val + self.df = pd.DataFrame(providers) + + +class StorageDeviceType(Enum): + PMEM='pmem' + NVME='nvme' + SSD='ssd' + HDD='hdd' + + +class ResourceGraph: + """ + Stores helpful information about storage and networking info for machines. + + Two tables are stored to make decisions on application deployment. + fs: + parent: the parent device of the partition (e.g., /dev/sda or NaN) + device: the name of the device (e.g., /dev/sda1 or /dev/sda) + size: total size of the device (bytes) + mount: where the device is mounted (if anywhere) + model: the exact model of the device + rota: whether the device is rotational or not + tran: the transport of the device (e.g., /dev/nvme) + fs_type: the type of filesystem (e.g., ext4) + uuid: filesystem-levle uuid from the FS metadata + fs_size: total size of the filesystem + avail: total number of bytes remaining + shared: whether the this is a shared service or not + host: the host this record corresponds to + net: + provider: network protocol (e.g., sockets, tcp, ib) + fabric: IP address + domain: network domain + host: the host this network corresponds to + + TODO: Need to verify on more than ubuntu20.04 + TODO: Can we make this work for windows? + TODO: Can we make this work even when hosts have different OSes? + """ + + def __init__(self): + self.lsblk = None + self.blkid = None + self.list_fs = None + self.fi_info = None + self.fs_columns = [ + 'parent', 'device', 'size', 'mount', 'model', 'rota', + 'tran', 'fs_type', 'uuid', 'fs_size', + 'avail', 'shared', 'host' + ] + self.net_columns = [ + 'provider', 'fabric', 'domain', 'host' + ] + self.all_fs = pd.DataFrame(columns=self.fs_columns) + self.all_net = pd.DataFrame(columns=self.net_columns) + self.fs_settings = { + 'register': [], + 'filter_mounts': {} + } + self.net_settings = { + 'register': [], + 'track_ips': {} + } + self.hosts = None + + def build(self, exec_info, introspect=True): + """ + Build a resource graph. + + :param exec_info: Where to collect resource information + :param introspect: Whether to introspect system info, or rely solely + on admin-defined settings + :return: self + """ + if introspect: + self._introspect(exec_info) + self.apply() + return self + + def _introspect(self, exec_info): + """ + Introspect the cluster for resources. + + :param exec_info: Where to collect resource information + :return: None + """ + self.lsblk = Lsblk(exec_info) + self.blkid = Blkid(exec_info) + self.list_fs = ListFses(exec_info) + self.fi_info = FiInfo(exec_info) + self.hosts = exec_info.hostfile.hosts + self.all_fs = pd.merge(self.lsblk.df, + self.blkid.df, + on=['device', 'host'], + how='outer') + self.all_fs['shared'] = False + self.all_fs = pd.merge(self.all_fs, + self.list_fs.df, + on=['device', 'host'], + how='outer') + self.all_fs['shared'].fillna(True, inplace=True) + self.all_fs.drop(['used', 'use%', 'fs_mount', 'partuuid'], + axis=1, inplace=True) + self.all_fs['mount'].fillna(value='', inplace=True) + net_df = self.fi_info.df + net_df['speed'] = np.nan + net_df.drop(['version', 'type', 'protocol'], + axis=1, inplace=True) + self.all_net = net_df + + def save(self, path): + """ + Save the resource graph YAML file + + :param path: the path to save the file + :return: None + """ + graph = { + 'hosts': self.hosts, + 'fs': self.all_fs.to_dict('records'), + 'net': self.all_net.to_dict('records'), + 'fs_settings': self.fs_settings, + 'net_settings': self.net_settings + } + YamlFile(path).save(graph) + + def load(self, path): + """ + Load resource graph from storage. + + :param path: The path to the resource graph YAML file + :return: self + """ + graph = YamlFile(path).load() + self.hosts = graph['hosts'] + self.all_fs = pd.DataFrame(graph['fs']) + self.all_net = pd.DataFrame(graph['net']) + self.fs = None + self.net = None + self.fs_settings = graph['fs_settings'] + self.net_settings = graph['net_settings'] + self.apply() + return self + + def set_hosts(self, hosts): + """ + Set the set of hosts this resource graph covers + + :param hosts: Hostfile() + :return: None + """ + self.hosts = hosts.hosts_ip + + def add_storage(self, hosts, **kwargs): + """ + Register a storage device record + + :param hosts: Hostfile() indicating set of hosts to make record for + :param kwargs: storage record + :return: None + """ + for host in hosts.hosts: + record = kwargs.copy() + record['host'] = host + self.fs_settings['register'].append(record) + + def add_net(self, hosts, **kwargs): + """ + Register a network record + + :param hosts: Hostfile() indicating set of hosts to make record for + :param kwargs: net record + :return: None + """ + for host, ip in zip(hosts.hosts, hosts.hosts_ip): + record = kwargs.copy() + record['fabric'] = ip + record['host'] = host + self.net_settings['register'].append(record) + + def filter_fs(self, mount_re, + mount_suffix=None, tran=None): + """ + Track all filesystems + devices matching the mount regex. + + :param mount_re: The regex to match a set of mountpoints + :param mount_suffix: After the mount_re is matched, append this path + to the mountpoint to indicate where users can access data. A typical + value for this is /${USER}, indicating the mountpoint has a subdirectory + per-user where they can access data. + :param shared: Whether this mount point is shared + :param tran: The transport of this device + :return: self + """ + self.fs_settings['filter_mounts']['mount_re'] = { + 'mount_re': mount_re, + 'mount_suffix': mount_suffix, + 'tran': tran + } + return self + + def filter_ip(self, ip_re, speed=None): + """ + Track all IPs matching the regex. The IPs with this regex all have + a certain speed. + + :param ip_re: The regex to match + :param speed: The speed of the fabric + :return: self + """ + self.net_settings['track_ips'][ip_re] = { + 'ip_re': ip_re, + 'speed': SizeConv.to_int(speed) if speed is not None else speed + } + return self + + def filter_hosts(self, hosts, speed=None): + """ + Track all ips matching the hostnames. + + :param hosts: Hostfile() of the hosts to filter for + :param speed: Speed of the interconnect (e.g., 1gbps) + :return: self + """ + for host in hosts.hosts_ip: + self.filter_ip(host, speed) + return self + + def apply(self): + """ + Apply fs and net settings to the resource graph + + :return: self + """ + self._apply_fs_settings() + self._apply_net_settings() + # self.fs.size = self.fs.size.fillna(0) + # self.fs.avail = self.fs.avail.fillna(0) + # self.fs.fs_size = self.fs.fs_size.fillna(0) + return self + + def _apply_fs_settings(self): + if len(self.fs_settings) == 0: + self.fs = self.all_fs + return + df = self.all_fs + self.fs = pd.DataFrame(columns=self.all_net.columns) + for fs_set in self.fs_settings['filter_mounts'].values(): + mount_re = fs_set['mount_re'] + mount_suffix = fs_set['mount_suffix'] + tran = fs_set['tran'] + with_mount = df[df.mount.str.contains(mount_re)] + if mount_suffix is not None: + with_mount['mount'] += mount_suffix + if tran is not None: + with_mount['tran'] = tran + self.fs = pd.concat([self.fs, with_mount]) + admin_df = pd.DataFrame(self.fs_settings['register'], + columns=self.fs_columns) + self.fs = pd.concat([self.fs, admin_df]) + + def _apply_net_settings(self): + if len(self.net_settings) == 0: + self.net = self.all_net + return + self.net = pd.DataFrame(columns=self.all_net.columns) + df = self.all_net + for net_set in self.net_settings['track_ips'].values(): + ip_re = net_set['ip_re'] + speed = net_set['speed'] + with_ip = df[df['fabric'].str.contains(ip_re)] + with_ip['speed'] = speed + self.net = pd.concat([self.net, with_ip]) + admin_df = pd.DataFrame(self.net_settings['register'], + columns=self.net_columns) + self.net = pd.concat([self.net, admin_df]) + + def find_shared_storage(self): + """ + Find the set of shared storage services + + :return: Dataframe + """ + df = self.fs + return df[df.shared == True] + + def find_storage(self, + dev_types=None, + is_mounted=True, + common=False, + count_per_node=None, + count_per_dev=None, + min_cap=None, + min_avail=None): + """ + Find a set of storage devices. + + :param dev_types: Search for devices of type in order. Either a list + or a string. + :param is_mounted: Search only for mounted devices + :param common: Remove mount points that are not common across all hosts + :param count_per_node: Choose only a subset of devices matching query + :param count_per_dev: Choose only a subset of devices matching query + :param min_cap: Remove devices with too little overall capacity + :param min_avail: Remove devices with too little available space + :return: Dataframe + """ + df = self.fs + # Remove pfs + df = df[df.shared == False] + # Filter devices by whether or not a mount is needed + if is_mounted: + df = df[df.mount.notna()] + # Find devices of a particular type + if dev_types is not None: + matching_devs = pd.DataFrame(columns=df.columns) + if isinstance(dev_types, str): + dev_types = [dev_types] + for dev_type in dev_types: + if dev_type == StorageDeviceType.HDD: + devs = df[(df.tran == 'sata') & (df.rota == True)] + elif dev_type == StorageDeviceType.SSD: + devs = df[(df.tran == 'sata') & (df.rota == False)] + elif dev_type == StorageDeviceType.NVME: + devs = df[(df.tran == 'nvme')] + matching_devs = pd.concat([matching_devs, devs]) + df = matching_devs + # Get the set of mounts common between all hosts + if common: + df = df.groupby(['mount']).filter( + lambda x: len(x) == len(self.hosts)).reset_index(drop=True) + # Remove storage with too little capacity + if min_cap is not None: + df = df[df.size >= min_cap] + # Remove storage with too little available space + if min_avail is not None: + df = df[df.avail >= min_avail] + # Take a certain number of each device per-host + if count_per_dev is not None: + df = df.groupby(['tran', 'rota', 'host']).\ + head(count_per_dev).reset_index(drop=True) + # Take a certain number of matched devices per-host + if count_per_node is not None: + df = df.groupby('host').head(count_per_node).reset_index(drop=True) + return df + + def find_net_info(self, hosts, + providers=None): + """ + Find the set of networks common between each host. + + :param hosts: A Hostfile() data structure containing the set of + all hosts to find network information for + :param providers: The network protocols to search for. + :return: Dataframe + """ + df = self.net + # Get the set of fabrics corresponding to these hosts + df = df[df.fabric.isin(hosts.hosts_ip)] + # Filter out protocols which are not common between these hosts + df = df.groupby('provider').filter( + lambda x: len(x) == len(hosts)).reset_index(drop=True) + # Choose only a subset of providers + if providers is not None: + if isinstance(providers, str): + providers = [providers] + df = df[df.provider.isin(providers)] + return df + +# pylint: enable=C0121 diff --git a/CI/jarvis-util/jarvis_util/jutil_manager.py b/CI/jarvis-util/jarvis_util/jutil_manager.py new file mode 100644 index 00000000..c229b244 --- /dev/null +++ b/CI/jarvis-util/jarvis_util/jutil_manager.py @@ -0,0 +1,28 @@ +""" +This file contains properties which are globally accessible to all +jarvis-util modules. This can be used to configure various aspects +of jarvis, such as output. +""" + + +class JutilManager: + """ + A singleton which stores various properties that can be queried by + internally by jutil modules. This includes properties such output + management. + """ + + instance_ = None + + @staticmethod + def get_instance(): + if JutilManager.instance_ is None: + JutilManager.instance_ = JutilManager() + return JutilManager.instance_ + + def __init__(self): + self.collect_output = False + self.hide_output = False + self.debug_mpi_exec = False + self.debug_local_exec = False + diff --git a/CI/jarvis-util/jarvis_util/serialize/__init__.py b/CI/jarvis-util/jarvis_util/serialize/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/CI/jarvis-util/jarvis_util/serialize/ini_file.py b/CI/jarvis-util/jarvis_util/serialize/ini_file.py new file mode 100644 index 00000000..8da174ff --- /dev/null +++ b/CI/jarvis-util/jarvis_util/serialize/ini_file.py @@ -0,0 +1,24 @@ +""" +This module contains methods to serialize and deserialize data from +a human-readable ini file. +""" +import configparser +from jarvis_util.serialize.serializer import Serializer + + +class IniFile(Serializer): + """ + This class contains methods to serialize and deserialize data from + a human-readable ini file. + """ + def __init__(self, path): + self.path = path + + def load(self): + config = configparser.ConfigParser() + config.read(self.path) + return config + + def save(self, data): + with open(self.path, 'w', encoding='utf-8') as fp: + data.write(fp) diff --git a/CI/jarvis-util/jarvis_util/serialize/pickle.py b/CI/jarvis-util/jarvis_util/serialize/pickle.py new file mode 100644 index 00000000..a90ae12d --- /dev/null +++ b/CI/jarvis-util/jarvis_util/serialize/pickle.py @@ -0,0 +1,24 @@ +""" +This module contains methods to serialize and deserialize data from +a pickle file. +""" + +import pickle as pkl +from jarvis_util.serialize.serializer import Serializer + + +class PickleFile(Serializer): + """ + This class serializes and deserializes data from a pickle file + """ + + def __init__(self, path): + self.path = path + + def load(self): + with open(self.path, 'rb') as fp: + return pkl.load(fp) + + def save(self, data): + with open(self.path, 'wb') as fp: + pkl.dump(data, fp) diff --git a/CI/jarvis-util/jarvis_util/serialize/serializer.py b/CI/jarvis-util/jarvis_util/serialize/serializer.py new file mode 100644 index 00000000..8647945a --- /dev/null +++ b/CI/jarvis-util/jarvis_util/serialize/serializer.py @@ -0,0 +1,21 @@ +""" +This module contains an abstract class used to define classes which +serialize data to a file. +""" + +from abc import ABC, abstractmethod + + +class Serializer(ABC): + """ + An abstract class which loads serialized data from a file and + saves serialized data to a file. + """ + + @abstractmethod + def load(self): + pass + + @abstractmethod + def save(self, data): + pass diff --git a/CI/jarvis-util/jarvis_util/serialize/text_file.py b/CI/jarvis-util/jarvis_util/serialize/text_file.py new file mode 100644 index 00000000..214c6bb9 --- /dev/null +++ b/CI/jarvis-util/jarvis_util/serialize/text_file.py @@ -0,0 +1,22 @@ +""" +This module stores data into a file in a human-readable way +""" +from jarvis_util.serialize.serializer import Serializer + + +class TextFile(Serializer): + """ + This class stores data directly into a file using str() as the + serialization method. The data is intended to be human-readable. + """ + def __init__(self, path): + self.path = path + + def load(self): + with open(self.path, 'r', encoding='utf-8') as fp: + data = fp.read() + return data + + def save(self, data): + with open(self.path, 'w', encoding='utf-8') as fp: + fp.write(data) diff --git a/CI/jarvis-util/jarvis_util/serialize/yaml_file.py b/CI/jarvis-util/jarvis_util/serialize/yaml_file.py new file mode 100644 index 00000000..5892f6bc --- /dev/null +++ b/CI/jarvis-util/jarvis_util/serialize/yaml_file.py @@ -0,0 +1,24 @@ +""" +This module contains methods to serialize and deserialize data from +a human-readable YAML file. +""" +from jarvis_util.serialize.serializer import Serializer +import yaml + + +class YamlFile(Serializer): + """ + This class contains methods to serialize and deserialize data from + a human-readable YAML file. + """ + def __init__(self, path): + self.path = path + + def load(self): + with open(self.path, 'r', encoding='utf-8') as fp: + return yaml.load(fp, Loader=yaml.FullLoader) + return None + + def save(self, data): + with open(self.path, 'w', encoding='utf-8') as fp: + yaml.dump(data, fp) diff --git a/CI/jarvis-util/jarvis_util/shell/__init__.py b/CI/jarvis-util/jarvis_util/shell/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/CI/jarvis-util/jarvis_util/shell/exec.py b/CI/jarvis-util/jarvis_util/shell/exec.py new file mode 100644 index 00000000..f9fef066 --- /dev/null +++ b/CI/jarvis-util/jarvis_util/shell/exec.py @@ -0,0 +1,59 @@ +""" +This module provides mechanisms to execute binaries either locally or +remotely. +""" + +from .local_exec import LocalExec +from .pssh_exec import PsshExec +from .pssh_exec import SshExec +from .mpi_exec import MpiExec +from .exec_info import ExecInfo, ExecType, Executable + + +class Exec(Executable): + """ + This class is a factory which wraps around various shell command + execution stragies, such as MPI and SSH. + """ + + def __init__(self, cmd, exec_info=None): + """ + Execute a command or list of commands + + :param cmd: list of commands or a single command string + :param exec_info: Info needed to execute processes locally + """ + super().__init__() + if exec_info is None: + exec_info = ExecInfo() + if exec_info.exec_type == ExecType.LOCAL: + self.exec_ = LocalExec(cmd, exec_info) + elif exec_info.exec_type == ExecType.SSH: + self.exec_ = SshExec(cmd, exec_info) + elif exec_info.exec_type == ExecType.PSSH: + self.exec_ = PsshExec(cmd, exec_info) + elif exec_info.exec_type == ExecType.MPI: + self.exec_ = MpiExec(cmd, exec_info) + self.set_exit_code() + self.set_output() + + def wait(self): + self.exec_.wait() + self.set_output() + self.set_exit_code() + return self.exit_code + + def set_output(self): + self.stdout = self.exec_.stdout + self.stderr = self.exec_.stderr + if isinstance(self.stdout, str): + if hasattr(self.exec_, 'addr'): + host = self.exec_.addr + else: + host = 'localhost' + self.stdout = {host: self.stdout} + self.stderr = {host: self.stderr} + + def set_exit_code(self): + self.exec_.set_exit_code() + self.exit_code = self.exec_.exit_code diff --git a/CI/jarvis-util/jarvis_util/shell/exec_info.py b/CI/jarvis-util/jarvis_util/shell/exec_info.py new file mode 100644 index 00000000..2b28246a --- /dev/null +++ b/CI/jarvis-util/jarvis_util/shell/exec_info.py @@ -0,0 +1,216 @@ +""" +This module contains data structures for determining how to execute +a subcommand. This includes information such as storing SSH keys, +passwords, working directory, etc. +""" + +from enum import Enum +from jarvis_util.util.hostfile import Hostfile +import os +from abc import ABC, abstractmethod + + +class ExecType(Enum): + """ + Different program execution methods. + """ + + LOCAL = 'LOCAL' + SSH = 'SSH' + PSSH = 'PSSH' + MPI = 'MPI' + + +class ExecInfo: + """ + Contains all information needed to execute a program. This includes + parameters such as the path to key-pairs, the hosts to run the program + on, number of processes, etc. + """ + def __init__(self, exec_type=ExecType.LOCAL, nprocs=None, ppn=None, + user=None, pkey=None, port=None, + hostfile=None, hosts=None, env=None, + sleep_ms=0, sudo=False, cwd=None, + collect_output=None, pipe_stdout=None, pipe_stderr=None, + hide_output=None, exec_async=False, stdin=None): + """ + + :param exec_type: How to execute a program. SSH, MPI, Local, etc. + :param nprocs: Number of processes to spawn. E.g., MPI uses this + :param ppn: Number of processes per node. E.g., MPI uses this + :param user: The user to execute command under. E.g., SSH, PSSH + :param pkey: The path to the private key. E.g., SSH, PSSH + :param port: The port to use for connection. E.g., SSH, PSSH + :param hostfile: The hosts to launch command on. E.g., PSSH, MPI + :param hosts: A list (or single string) of host names to run command on. + :param env: The environment variables to use for command. + :param sleep_ms: Sleep for a period of time AFTER executing + :param sudo: Execute command with root privilege. E.g., SSH, PSSH + :param cwd: Set current working directory. E.g., SSH, PSSH + :param collect_output: Collect program output in python buffer + :param pipe_stdout: Pipe STDOUT into a file. (path string) + :param pipe_stderr: Pipe STDERR into a file. (path string) + :param hide_output: Whether to print output to console + :param exec_async: Whether to execute program asynchronously + :param stdin: Any input needed by the program. Only local + """ + + self.exec_type = exec_type + self.nprocs = nprocs + self.user = user + self.pkey = pkey + self.port = port + self.ppn = ppn + self.hostfile = hostfile + self._set_hostfile(hostfile=hostfile, hosts=hosts) + self.env = env + self.basic_env = {} + self._set_env(env) + self.cwd = cwd + self.sudo = sudo + self.sleep_ms = sleep_ms + self.collect_output = collect_output + self.pipe_stdout = pipe_stdout + self.pipe_stderr = pipe_stderr + self.hide_output = hide_output + self.exec_async = exec_async + self.stdin = stdin + self.keys = ['exec_type', 'nprocs', 'ppn', 'user', 'pkey', 'port', + 'hostfile', 'env', 'sleep_ms', 'sudo', + 'cwd', 'hosts', 'collect_output', + 'pipe_stdout', 'pipe_stderr', 'hide_output', + 'exec_async', 'stdin'] + + def _set_env(self, env): + if env is None: + self.env = {} + else: + self.env = env + basic_env = [ + 'PATH', 'LD_LIBRARY_PATH', 'LIBRARY_PATH', 'CMAKE_PREFIX_PATH', + 'PYTHON_PATH', 'CPATH', 'INCLUDE', 'JAVA_HOME' + ] + self.basic_env = {} + for key in basic_env: + if key not in os.environ: + continue + self.basic_env[key] = os.getenv(key) + for key, val in self.basic_env.items(): + if key not in self.env: + self.env[key] = val + self.basic_env.update(self.env) + if 'LD_PRELOAD' in self.basic_env: + del self.basic_env['LD_PRELOAD'] + + def _set_hostfile(self, hostfile=None, hosts=None): + if hostfile is not None: + if isinstance(hostfile, str): + self.hostfile = Hostfile(hostfile=hostfile) + elif isinstance(hostfile, Hostfile): + self.hostfile = hostfile + else: + raise Exception('Hostfile is neither string nor Hostfile') + if hosts is not None: + if isinstance(hosts, list): + self.hostfile = Hostfile(all_hosts=hosts) + elif isinstance(hosts, str): + self.hostfile = Hostfile(all_hosts=[hosts]) + elif isinstance(hosts, Hostfile): + self.hostfile = hosts + else: + raise Exception('Host set is neither str, list or Hostfile') + + if hosts is not None and hostfile is not None: + raise Exception('Must choose either hosts or hostfile, not both') + + if self.hostfile is None: + self.hostfile = Hostfile() + + def mod(self, **kwargs): + self._mod_kwargs(kwargs) + return ExecInfo(**kwargs) + + def _mod_kwargs(self, kwargs): + for key in self.keys: + if key not in kwargs and hasattr(self, key): + kwargs[key] = getattr(self, key) + + def copy(self): + return self.mod() + + +class Executable(ABC): + """ + An abstract class representing a class which is intended to run + shell commands. This includes SSH, MPI, etc. + """ + + def __init__(self): + self.exit_code = None + self.stdout = '' + self.stderr = '' + + def failed(self): + return self.exit_code != 0 + + @abstractmethod + def set_exit_code(self): + pass + + @abstractmethod + def wait(self): + pass + + def smash_cmd(self, cmds): + """ + Convert a list of commands into a single command for the shell + to execute. + + :param cmds: A list of commands or a single command string + :return: + """ + if isinstance(cmds, list): + return ' && '.join(cmds) + elif isinstance(cmds, str): + return cmds + else: + raise Exception('Command must be either list or string') + + def wait_list(self, nodes): + for node in nodes: + node.wait() + + def smash_list_outputs(self, nodes): + """ + Combine the outputs of a set of nodes into a single output. + For example, used if executing multiple commands in sequence. + + :param nodes: + :return: + """ + self.stdout = '\n'.join([node.stdout for node in nodes]) + self.stderr = '\n'.join([node.stderr for node in nodes]) + + def per_host_outputs(self, nodes): + """ + Convert the outputs of a set of nodes to a per-host dictionary. + Used if sending commands to multiple hosts + + :param nodes: + :return: + """ + self.stdout = {} + self.stderr = {} + self.stdout = {node.addr: node.stdout for node in nodes} + self.stderr = {node.addr: node.stderr for node in nodes} + + def set_exit_code_list(self, nodes): + """ + Set the exit code from a set of nodes. + + :param nodes: The set of execution nodes that have been executed + :return: + """ + for node in nodes: + if node.exit_code: + self.exit_code = node.exit_code diff --git a/CI/jarvis-util/jarvis_util/shell/filesystem.py b/CI/jarvis-util/jarvis_util/shell/filesystem.py new file mode 100644 index 00000000..a5cb2b55 --- /dev/null +++ b/CI/jarvis-util/jarvis_util/shell/filesystem.py @@ -0,0 +1,69 @@ +""" +This module contains various wrappers over typical filesystem commands seen +in shell scripts. This includes operations such as creating directories, +changing file permissions, etc. +""" +from .exec import Exec + + +class Mkdir(Exec): + """ + Create directories + subdirectories. + """ + + def __init__(self, paths, exec_info=None): + """ + Create directories + subdirectories. Does not fail if the dirs + already exist. + + :param paths: A list of paths or a single path string. + :param exec_info: Info needed to execute the mkdir command + """ + + if isinstance(paths, str): + paths = [paths] + path = ' '.join(paths) + super().__init__(f'mkdir -p {path}', exec_info) + + +class Rm(Exec): + """ + Remove a file and its subdirectories + """ + + def __init__(self, paths, exec_info=None): + """ + Execute file or directory remove. + + :param paths: Either a list of paths or a single path string + :param exec_info: Information needed to execute rm + """ + + if isinstance(paths, str): + paths = [paths] + path = ' '.join(paths) + super().__init__(f'rm -rf {path}', exec_info) + + +class Chmod(Exec): + """ + Change the mode of a file + """ + + def __init__(self, path=None, mode=None, modes=None, exec_info=None): + """ + Change the mode of a file + + :param path: path to file to mode change + :param mode: the mode to change to + :param modes: A list of tuples [(Path, Mode)] + :param exec_info: How to execute commands + """ + cmds = [] + if path is not None and mode is not None: + cmds.append(f'chmod {mode} {path}') + if modes is not None: + cmds += [f'chmod {mode[1]} {mode[0]}' for mode in modes] + if len(cmds) == 0: + raise Exception('Must set either path+mode or modes') + super().__init__(cmds, exec_info) diff --git a/CI/jarvis-util/jarvis_util/shell/local_exec.py b/CI/jarvis-util/jarvis_util/shell/local_exec.py new file mode 100644 index 00000000..c5487fe1 --- /dev/null +++ b/CI/jarvis-util/jarvis_util/shell/local_exec.py @@ -0,0 +1,103 @@ +""" +Provides methods for executing a program or workflow locally. This class +is intended to be called from Exec, not by general users. +""" + +import time +import subprocess +import os +import sys +import io +import threading +from jarvis_util.jutil_manager import JutilManager +from .exec_info import ExecInfo, ExecType, Executable + + +class LocalExec(Executable): + """ + Provides methods for executing a program or workflow locally. + """ + + def __init__(self, cmd, exec_info): + """ + Execute a program or workflow + + :param cmd: list of commands or a single command string + :param exec_info: Info needed to execute processes locally + """ + + super().__init__() + jutil = JutilManager.get_instance() + cmd = self.smash_cmd(cmd) + + # Managing console output and collection + self.collect_output = exec_info.collect_output + self.pipe_stdout = exec_info.pipe_stdout + self.pipe_stderr = exec_info.pipe_stderr + self.pipe_stdout_fp = None + self.pipe_stderr_fp = None + self.hide_output = exec_info.hide_output + # pylint: disable=R1732 + if self.collect_output is None: + self.collect_output = jutil.collect_output + if self.hide_output is None: + self.hide_output = jutil.hide_output + # pylint: enable=R1732 + self.stdout = io.StringIO() + self.stderr = io.StringIO() + self.last_stdout_size = 0 + self.last_stderr_size = 0 + self.executing_ = True + self.exit_code = 0 + + # Copy ENV + self.env = exec_info.env.copy() + for key, val in os.environ.items(): + if key not in self.env: + self.env[key] = val + + # Managing command execution + self.cmd = cmd + self.sudo = exec_info.sudo + self.stdin = exec_info.stdin + self.exec_async = exec_info.exec_async + self.sleep_ms = exec_info.sleep_ms + if exec_info.cwd is None: + self.cwd = os.getcwd() + else: + self.cwd = exec_info.cwd + if jutil.debug_local_exec: + print(cmd) + self._start_bash_processes() + + def _start_bash_processes(self): + if self.sudo: + self.cmd = f'sudo {self.cmd}' + time.sleep(self.sleep_ms) + # pylint: disable=R1732 + self.proc = subprocess.Popen(self.cmd, + cwd=self.cwd, + env=self.env, + shell=True) + # pylint: enable=R1732 + if not self.exec_async: + self.wait() + + def wait(self): + self.proc.wait() + self.set_exit_code() + return self.exit_code + + def set_exit_code(self): + self.exit_code = self.proc.returncode + + def get_pid(self): + if self.proc is not None: + return self.proc.pid + else: + return None + + +class LocalExecInfo(ExecInfo): + def __init__(self, **kwargs): + super().__init__(exec_type=ExecType.LOCAL, **kwargs) diff --git a/CI/jarvis-util/jarvis_util/shell/mpi_exec.py b/CI/jarvis-util/jarvis_util/shell/mpi_exec.py new file mode 100644 index 00000000..e104bf50 --- /dev/null +++ b/CI/jarvis-util/jarvis_util/shell/mpi_exec.py @@ -0,0 +1,55 @@ +""" +This module provides methods to execute a process in parallel using the +Message Passing Interface (MPI). This module assumes MPI is installed +on the system. This class is intended to be called from Exec, +not by general users. +""" + +from jarvis_util.jutil_manager import JutilManager +from jarvis_util.shell.local_exec import LocalExec +from .exec_info import ExecInfo, ExecType + + +class MpiExec(LocalExec): + """ + This class contains methods for executing a command in parallel + using MPI. + """ + + def __init__(self, cmd, exec_info): + """ + Execute a command using MPI + + :param cmd: A command (string) to execute + :param exec_info: Information needed by MPI + """ + + self.cmd = cmd + self.nprocs = exec_info.nprocs + self.ppn = exec_info.ppn + self.hostfile = exec_info.hostfile + self.mpi_env = exec_info.env + super().__init__(self.mpicmd(), + exec_info.mod(env=exec_info.basic_env)) + + def mpicmd(self): + params = [f"mpirun -n {self.nprocs}"] + if self.ppn is not None: + params.append(f"-ppn {self.ppn}") + if len(self.hostfile): + if self.hostfile.is_subset() or self.hostfile.path is None: + params.append(f"--host {','.join(self.hostfile.hosts)}") + else: + params.append(f"--hostfile {self.hostfile.path}") + params += [f"-genv {key}={val}" for key, val in self.mpi_env.items()] + params.append(self.cmd) + cmd = " ".join(params) + jutil = JutilManager.get_instance() + if jutil.debug_mpi_exec: + print(cmd) + return cmd + + +class MpiExecInfo(ExecInfo): + def __init__(self, **kwargs): + super().__init__(exec_type=ExecType.MPI, **kwargs) diff --git a/CI/jarvis-util/jarvis_util/shell/process.py b/CI/jarvis-util/jarvis_util/shell/process.py new file mode 100644 index 00000000..7aba2f0f --- /dev/null +++ b/CI/jarvis-util/jarvis_util/shell/process.py @@ -0,0 +1,22 @@ +""" +This module provides various wrappers for methods which manage processes +in the cluster. Examples include killing processes, determining whether +or not a process exists, etc. +""" + +from .exec import Exec + + +class Kill(Exec): + """ + Kill all processes which match the name regex. + """ + + def __init__(self, cmd, exec_info): + """ + Kill all processes which match the name regex. + + :param cmd: A regex for the command to kill + :param exec_info: Info needed to execute the command + """ + super().__init__(f"pkill {cmd}", exec_info) diff --git a/CI/jarvis-util/jarvis_util/shell/pscp.py b/CI/jarvis-util/jarvis_util/shell/pscp.py new file mode 100644 index 00000000..f74da34d --- /dev/null +++ b/CI/jarvis-util/jarvis_util/shell/pscp.py @@ -0,0 +1,58 @@ +""" +This module provides methods to distribute a command among multiple +nodes using SSH. This class is intended to be called from Exec, +not by general users. +""" + +from .scp import Scp +from .exec_info import Executable + + +class Pscp(Executable): + """ + Execute commands on multiple hosts using SSH. + """ + + def __init__(self, paths, exec_info): + """ + Copy files to a set of remote hosts via rsync. + + Case 1: Paths is a single file: + paths = '/tmp/hi.txt' + '/tmp/hi.txt' will be copied to user@host:/tmp/hi.txt + + Case 2: Paths is a list of files: + paths = ['/tmp/hi1.txt', '/tmp/hi2.txt'] + Repeat Case 1 twice. + + Case 3: Paths is a list of tuples of files: + paths = [('/tmp/hi.txt', '/tmp/remote_hi.txt')] + '/tmp/hi.txt' will be copied to user@host:'/tmp/remote_hi.txt' + + :param paths: Either a path to a file, a list of files, or a list of + tuples of files. + :param exec_info: Connection information for SSH + """ + super().__init__() + self.exec_async = exec_info.exec_async + self.hosts = exec_info.hostfile.hosts + self.scp_nodes = [] + self.stdout = {} + self.stderr = {} + self.hosts = exec_info.hostfile.hosts + for host in self.hosts: + ssh_exec_info = exec_info.mod(hostfile=None, + hosts=host, + exec_async=True) + self.scp_nodes.append(Scp(paths, ssh_exec_info)) + if self.exec_async: + self.wait() + + def wait(self): + self.wait_list(self.scp_nodes) + self.per_host_outputs(self.scp_nodes) + self.set_exit_code() + + def set_exit_code(self): + self.set_exit_code_list(self.scp_nodes) + diff --git a/CI/jarvis-util/jarvis_util/shell/pssh_exec.py b/CI/jarvis-util/jarvis_util/shell/pssh_exec.py new file mode 100644 index 00000000..72086939 --- /dev/null +++ b/CI/jarvis-util/jarvis_util/shell/pssh_exec.py @@ -0,0 +1,56 @@ +""" +This module provides methods to distribute a command among multiple +nodes using SSH. This class is intended to be called from Exec, +not by general users. +""" + +from .ssh_exec import SshExec +from .local_exec import LocalExec +from .exec_info import ExecInfo, ExecType, Executable + + +class PsshExec(Executable): + """ + Execute commands on multiple hosts using SSH. + """ + + def __init__(self, cmd, exec_info): + """ + Execute commands on multiple hosts. + + :param cmd: A list of commands or a single command string + :param exec_info: Info needed to execute command with SSH + """ + super().__init__() + self.cmd = self.smash_cmd(cmd) + self.exec_async = exec_info.exec_async + self.hosts = exec_info.hostfile.hosts + self.execs_ = [] + self.stdout = {} + self.stderr = {} + if len(self.hosts): + for host in self.hosts: + ssh_exec_info = exec_info.mod(hostfile=None, + hosts=host, + exec_async=True) + self.execs_.append(SshExec(cmd, ssh_exec_info)) + else: + self.execs_.append( + LocalExec(cmd, exec_info)) + return + if not self.exec_async: + self.wait() + + def wait(self): + self.wait_list(self.execs_) + self.per_host_outputs(self.execs_) + self.set_exit_code() + + def set_exit_code(self): + self.set_exit_code_list(self.execs_) + + +class PsshExecInfo(ExecInfo): + def __init__(self, **kwargs): + super().__init__(exec_type=ExecType.PSSH, **kwargs) + diff --git a/CI/jarvis-util/jarvis_util/shell/scp.py b/CI/jarvis-util/jarvis_util/shell/scp.py new file mode 100644 index 00000000..8b39301e --- /dev/null +++ b/CI/jarvis-util/jarvis_util/shell/scp.py @@ -0,0 +1,115 @@ +""" +This module provides methods to execute a single command remotely using SSH. +This class is intended to be called from Exec, not by general users. +""" +from .local_exec import LocalExec +from .exec_info import Executable + + +class _Scp(LocalExec): + """ + This class provides methods to copy data over SSH using the "rsync" + command utility in Linux + """ + + def __init__(self, src_path, dst_path, exec_info): + """ + Copy a file or directory from source to destination via rsync + + :param src_path: The path to the file on the host + :param dst_path: The desired file path on the remote host + :param exec_info: Info needed to execute command with SSH + """ + + self.addr = exec_info.hostfile.hosts[0] + self.src_path = src_path + self.dst_path = dst_path + self.user = exec_info.user + self.pkey = exec_info.pkey + self.port = exec_info.port + self.sudo = exec_info.sudo + super().__init__(self.rsync_cmd(src_path, dst_path), + exec_info.mod(env=exec_info.basic_env)) + + def rsync_cmd(self, src_path, dst_path): + lines = ['rsync -ha'] + if self.pkey is not None or self.port is not None: + ssh_lines = ['ssh'] + if self.pkey is not None: + ssh_lines.append(f'-i {self.pkey}') + if self.port is not None: + ssh_lines.append(f'-p {self.port}') + ssh_cmd = ' '.join(ssh_lines) + lines.append(f'-e \'{ssh_cmd}\'') + lines.append(src_path) + if self.user is not None: + lines.append(f'{self.user}@{self.addr}:{dst_path}') + else: + lines.append(f'{self.addr}:{dst_path}') + rsync_cmd = ' '.join(lines) + return rsync_cmd + + +class Scp(Executable): + """ + Secure copy data between two hosts. + """ + + def __init__(self, paths, exec_info): + """ + Copy files via rsync. + + Case 1: Paths is a single file: + paths = '/tmp/hi.txt' + '/tmp/hi.txt' will be copied to user@host:/tmp/hi.txt + + Case 2: Paths is a list of files: + paths = ['/tmp/hi1.txt', '/tmp/hi2.txt'] + Repeat Case 1 twice. + + Case 3: Paths is a list of tuples of files: + paths = [('/tmp/hi.txt', '/tmp/remote_hi.txt')] + '/tmp/hi.txt' will be copied to user@host:'/tmp/remote_hi.txt' + + :param paths: Either a path to a file, a list of files, or a list of + tuples of files. + :param exec_info: Connection information for SSH + """ + + super().__init__() + self.paths = paths + self.exec_info = exec_info + self.scp_nodes = [] + if isinstance(paths, str): + self._exec_single_path(paths) + if isinstance(paths, list): + if len(paths) == 0: + raise Exception('Must have at least one path to scp') + elif isinstance(paths[0], str): + self._exec_many_paths(paths) + elif isinstance(paths[0], tuple): + self._exec_many_paths_tuple(paths) + elif isinstance(paths[0], list): + self._exec_many_paths_tuple(paths) + if not self.exec_info.exec_async: + self.wait() + + def _exec_single_path(self, path): + self.scp_nodes.append(_Scp(path, path, self.exec_info)) + + def _exec_many_paths(self, paths): + for path in paths: + self.scp_nodes.append(_Scp(path, path, self.exec_info)) + + def _exec_many_paths_tuple(self, path_tlist): + for src, dst in path_tlist: + self.scp_nodes.append(_Scp(src, dst, self.exec_info)) + + def wait(self): + self.wait_list(self.scp_nodes) + self.smash_list_outputs(self.scp_nodes) + self.set_exit_code() + return self.exit_code + + def set_exit_code(self): + self.set_exit_code_list(self.scp_nodes) diff --git a/CI/jarvis-util/jarvis_util/shell/ssh_exec.py b/CI/jarvis-util/jarvis_util/shell/ssh_exec.py new file mode 100644 index 00000000..bd9b8513 --- /dev/null +++ b/CI/jarvis-util/jarvis_util/shell/ssh_exec.py @@ -0,0 +1,56 @@ +""" +This module provides methods to execute a single command remotely using SSH. +This class is intended to be called from Exec, not by general users. +""" +from .local_exec import LocalExec +from .exec_info import ExecInfo, ExecType + + +class SshExec(LocalExec): + """ + This class provides methods to execute a command via SSH. + """ + + def __init__(self, cmd, exec_info): + """ + Execute a command remotely via SSH + + :param cmd: A list of commands or a single command string + :param exec_info: Info needed to execute command with SSH + """ + + cmd = self.smash_cmd(cmd) + self.addr = exec_info.hostfile.hosts[0] + self.user = exec_info.user + self.pkey = exec_info.pkey + self.port = exec_info.port + self.sudo = exec_info.sudo + self.ssh_env = exec_info.env + super().__init__(self.ssh_cmd(cmd), + exec_info.mod(env=exec_info.basic_env)) + + def ssh_cmd(self, cmd): + lines = ['ssh'] + if self.pkey is not None: + lines.append(f'-i {self.pkey}') + if self.port is not None: + lines.append(f'-p {self.port}') + if self.user is not None: + lines.append(f'{self.user}@{self.addr}') + else: + lines.append(f'{self.addr}') + ssh_cmd = ' '.join(lines) + + cmd_lines = [] + if self.ssh_env is not None: + for key, val in self.ssh_env.items(): + cmd_lines.append(f'{key}={val}') + cmd_lines.append(cmd) + env_cmd = ' '.join(cmd_lines) + real_cmd = f'{ssh_cmd} \"{env_cmd}\"' + return real_cmd + + +class SshExecInfo(ExecInfo): + def __init__(self, **kwargs): + super().__init__(exec_type=ExecType.SSH, **kwargs) diff --git a/CI/jarvis-util/jarvis_util/util/__init__.py b/CI/jarvis-util/jarvis_util/util/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/CI/jarvis-util/jarvis_util/util/argparse.py b/CI/jarvis-util/jarvis_util/util/argparse.py new file mode 100644 index 00000000..e58486be --- /dev/null +++ b/CI/jarvis-util/jarvis_util/util/argparse.py @@ -0,0 +1,454 @@ +""" +This module contains an argument parser which defines +""" + +import sys +import os +from abc import ABC, abstractmethod +import shlex +from tabulate import tabulate + + +class ArgParse(ABC): + """ + A class for parsing command line arguments. + Parsed menu name stored in self.menu_name + Parsed menu arguments stored in self.kwargs + Parsed remaining arguments stored in self.remainder + """ + + def __init__(self, args=None, exit_on_fail=True): + if args is None: + args = sys.argv[1:] + elif isinstance(args, str): + args = shlex.split(args) + args = ' '.join(args) + self.binary_name = os.path.basename(sys.argv[0]) + self.orig_args = shlex.split(args) + self.args = self.orig_args + self.error = None + self.exit_on_fail = exit_on_fail + self.menus = [] + self.vars = {} + self.remainder = None + self.pos_required = False + self.use_remainder = False + + self.menu = None + self.menu_name = None + self.kwargs = {} + self.define_options() + self._parse() + + @abstractmethod + def define_options(self): + """ + User-defined options menu + + :return: + """ + pass + + def process_args(self): + """ + After args have been parsed, can call this function to process + the arguments. Assumes that derived ArgParse class has a function + for each menu option. + + :return: + """ + + func_name = self.menu_name.replace(' ', '_') + func = getattr(self, func_name) + func(self) + + def add_menu(self, name=None, msg=None, + use_remainder=False): + """ + A menu is a container of arguments. + + :param name: The name that appears in the CLI to trigger the menu. + Spaces indicate menu nesting. E.g., 'repo add' will trigger the + menu argparser only if 'repo' and 'add' appear next to each other + in the argument list. + :param msg: The message to print if the user selects an improper menu + in the CLI. + :param use_remainder: Whether or not the menu should store all remaining + arguments for further use later. + :return: + """ + + toks = [] + if name is not None: + toks = name.split() + self.menus.append({ + 'name_str': ' '.join(toks), + 'name': toks, + 'msg': msg, + 'num_required': 0, + 'pos_opts': [], + 'kw_opts': {}, + 'use_remainder': use_remainder + }) + self.pos_required = False + self.menu = self.menus[-1] + + def start_required(self): + """ + Define a set of required positional arguments. + + :return: None + """ + self.pos_required = True + + def end_required(self): + """ + Finish the set of required positional arguments. + + :return: None + """ + self.pos_required = False + + def add_arg(self, + name, + argtype=str, + choices=None, + default=None, + msg=None, + action=None, + aliases=None): + """ + Append an argument to a menu. + Arguments can either be positional or key-value. + Positional arguments do NOT start with a dash + Key-value arguments are separated by dashes + + :param name: The name of the argument. If name starts with a dash, + it will be interpreted as a positional arg + :param argtype: The type of the argument being stored. + :param choices: The set of acceptable inputs as a list + :param default: The default value to store + :param msg: The help message to print if there is a problem + :param action: An action to execute if the argument exists + :param aliases: Other names for the same thing (list) + :return: + """ + + # Add all aliases + if aliases is not None: + for alias in aliases: + if '-' in alias: + self.add_arg(alias, argtype, choices, default, msg, action) + else: + raise f"Can't have a non-keyword alias: {alias}" + # Handle the specific boolean argument case + is_kwarg = '-' in name + if is_kwarg and argtype == bool: + self._add_bool_kw_arg(name, default, msg) + return + # Add general argument + menu = self.menu + arg = { + 'name': name, + 'dict_name': self._get_opt_name(name), + 'type': argtype, + 'choices': choices, + 'default': default, + 'action': action, + 'msg': msg, + 'required': self.pos_required, + 'has_input': True + } + if is_kwarg: + self.pos_required = False + menu['kw_opts'][name] = arg + else: + if self.pos_required: + menu['num_required'] += 1 + menu['pos_opts'].append(arg) + self.kwargs[arg['dict_name']] = default + + def _add_bool_kw_arg(self, + name, + default, + msg=None, + is_other=False, + dict_name=None): + """ + Boolean arguments can be indicated using a +-. + + indicates true, - indicates false. + + :param name: The name of the boolean arg + :param default: Default value of the boolean arg + :param msg: Help message + :param is_other: Indicates this is an alias of the +- syntax. + :param dict_name: Name to make the argument in final kwargs + :return: None + """ + menu = self.menu + if dict_name is None: + dict_name = self._get_opt_name(name, True) + arg = { + 'name': name, + 'dict_name': dict_name, + 'type': bool, + 'choices': None, + 'default': default, + 'action': None, + 'msg': msg, + 'required': False, + 'has_input': not is_other + } + if not is_other: + self._add_bool_kw_arg('--with-' + name.strip('-'), + True, msg, True, dict_name) + self._add_bool_kw_arg('--no-' + name.strip('-'), + False, msg, True, dict_name) + self.pos_required = False + menu['kw_opts'][name] = arg + + def _parse(self): + """ + Parse the CLI arguments. + Will modify self.menu to indicate which menu is used + Will modify self.args to create a key-value store of arguments + + :return: None. + """ + self.menus.sort(key=lambda x: len(x['name']), reverse=True) + self._parse_menu() + + def _parse_menu(self): + """ + Determine which menu is used in the CLI. + + :return: Modify self.menu. No return value. + """ + + self.menu = None + for menu in self.menus: + menu_name = menu['name'] + if len(menu_name) > len(self.args): + continue + if menu_name == self.args[0:len(menu_name)]: + self.menu = menu + break + if self.menu is None: + self._invalid_menu() + self.menu_name = self.menu['name_str'] + self.add_arg('-h', + default=None, + msg='print help message', + action=self._print_help, + aliases=['--help']) + menu_name = self.menu['name'] + self.use_remainder = self.menu['use_remainder'] + self.args = self.args[len(menu_name):] + self._parse_args() + + def _parse_args(self): + self._set_defaults() + i = self._parse_pos_args() + self._parse_kw_args(i) + + def _set_defaults(self): + all_opts = self.menu['pos_opts'] + list(self.menu['kw_opts'].values()) + for opt_info in all_opts: + if opt_info['default'] is None: + continue + self.__dict__[opt_info['dict_name']] = opt_info['default'] + + def _parse_pos_args(self): + """ + Parse positional arguments + Modify the self.kwargs dictionary + + :return: + """ + + i = 0 + args = self.args + menu = self.menu + while i < len(menu['pos_opts']): + # Get the positional arg info + opt_name = menu['pos_opts'][i]['name'] + opt_dict_name = menu['pos_opts'][i]['dict_name'] + opt_type = menu['pos_opts'][i]['type'] + opt_choices = menu['pos_opts'][i]['choices'] + if i >= len(args): + if i >= menu['num_required']: + break + else: + self._missing_positional(opt_name) + + # Get the arg value + arg = args[i] + if arg in menu['kw_opts']: + break + arg = self._convert_opt(opt_name, opt_type, opt_choices, arg) + + # Set the argument + self.kwargs[opt_dict_name] = arg + i += 1 + return i + + def _parse_kw_args(self, i): + """ + Parse key-word arguments. + Modify the self.kwargs dictionary + + :param i: The starting index in the self.args list where kv pairs start + :return: + """ + + menu = self.menu + args = self.args + while i < len(args): + # Get argument name + opt_name = args[i] + if opt_name not in menu['kw_opts']: + if self.use_remainder: + self.remainder = ' '.join(args[i:]) + return + else: + self._invalid_kwarg(opt_name) + + # Get argument type + opt = menu['kw_opts'][opt_name] + opt_has_input = opt['has_input'] + opt_dict_name = opt['dict_name'] + opt_type = opt['type'] + opt_default = opt['default'] + opt_action = opt['action'] + opt_choices = opt['choices'] + if not opt_has_input: + arg = opt_default + i += 1 + elif opt_action is not None: + opt_action() + arg = None + i += 1 + elif self._next_is_kw_value(i): + arg = args[i + 1] + i += 2 + elif opt_default is not None: + arg = opt_default + i += 1 + else: + arg = None + self._invalid_kwarg_default(opt_name) + + # Convert argument to type + arg = self._convert_opt(opt_name, opt_type, opt_choices, arg) + + # Set the argument + self.kwargs[opt_dict_name] = arg + + def _convert_opt(self, opt_name, opt_type, opt_choices, arg): + if opt_type is not None: + # pylint: disable=W0702 + try: + arg = opt_type(arg) + if opt_choices is not None: + if arg not in opt_choices: + self._invalid_choice(opt_name, arg) + except: + self._invalid_type(opt_name, opt_type) + # pylint: enable=W0702 + return arg + + def _next_is_kw_value(self, i): + if i + 1 >= len(self.args): + return False + return self.args[i + 1] not in self.menu['kw_opts'] + + def _get_opt_name(self, opt_name, is_bool_arg=False): + """ + Normalize option names + '-' are converted into '_' + '--with-' and '--no-' are removed + '+' and '-' for boolean args are removed + + :param opt_name: The menu option name + :param is_bool_arg: Whether the arg is a boolean arg + :return: + """ + + if not is_bool_arg: + return opt_name.strip('-').replace('-', '_') + else: + return opt_name.replace('--with-', '', 1)\ + .replace('--no-', '', 1).\ + strip('-').replace('-', '_') + + def _invalid_menu(self): + self._print_error('Could not find a menu') + + def _invalid_choice(self, opt_name, arg): + self._print_menu_error(f'{opt_name}={arg} is not a valid choice') + + def _missing_positional(self, opt_name): + self._print_menu_error(f'{opt_name} was required, but not defined') + + def _invalid_kwarg(self, opt_name): + self._print_menu_error(f'{opt_name} is not a valid key-word argument') + + def _invalid_kwarg_default(self, opt_name): + self._print_menu_error( + f'{opt_name} was not given a value, but requires one') + + def _invalid_type(self, opt_name, opt_type): + self._print_menu_error(f'{opt_name} was not of type {opt_type}') + + def _print_menu_error(self, msg): + self._print_error(f'{self.menu["name_str"]} {msg}') + + def _print_error(self, msg): + print(f'{msg}') + self._print_help() + if self.exit_on_fail: + sys.exit(1) + else: + raise Exception(msg) + + def _print_help(self): + if self.menu is not None: + self._print_menu_help() + else: + self._print_menus() + + def _print_menus(self): + for menu in self.menus: + self.menu = menu + self._print_menu_help(True) + + def _print_menu_help(self, only_usage=False): + pos_args = [] + for arg in self.menu['pos_opts']: + if arg['required']: + pos_args.append(f'[{arg["name"]}]') + else: + pos_args.append(f'[{arg["name"]} (opt)]') + pos_args = ' '.join(pos_args) + menu_str = self.menu['name_str'] + print(f'USAGE: {self.binary_name} {menu_str} {pos_args} ...') + if self.menu['msg'] is not None: + print(self.menu['msg']) + print() + if only_usage: + return + + headers = ['Name', 'Default', 'Type', 'Description'] + table = [] + all_opts = self.menu['pos_opts'] + list(self.menu['kw_opts'].values()) + for arg in all_opts: + default = arg['default'] + if self._is_bool_arg(arg): + default = None + table.append( + [arg['name'], default, arg['type'], arg['msg']]) + print(tabulate(table, headers=headers)) + + def _is_bool_arg(self, arg): + return arg['type'] == bool and (arg['name'].startswith('--with-') or + arg['name'].startswith('--no-')) diff --git a/CI/jarvis-util/jarvis_util/util/expand_env.py b/CI/jarvis-util/jarvis_util/util/expand_env.py new file mode 100644 index 00000000..a9d8aefa --- /dev/null +++ b/CI/jarvis-util/jarvis_util/util/expand_env.py @@ -0,0 +1,25 @@ +""" +This module contains functions for expanding environment variables for +dictionaries +""" + +import os + + +def expand_env(data): + """ + Expand environment variables for dictionaries + + :param data: A dict where strings may contain environment variables to + expand + :return: + """ + if isinstance(data, str): + return os.path.expandvars(data) + if isinstance(data, dict): + for key, val in data.items(): + data[key] = expand_env(val) + if isinstance(data, (list, tuple)): + for i, val in enumerate(data): + data[i] = expand_env(val) + return data diff --git a/CI/jarvis-util/jarvis_util/util/hostfile.py b/CI/jarvis-util/jarvis_util/util/hostfile.py new file mode 100644 index 00000000..4d22ae23 --- /dev/null +++ b/CI/jarvis-util/jarvis_util/util/hostfile.py @@ -0,0 +1,216 @@ +""" +This module contains methods for parsing hostfiles and storing hosts +""" + +import os +import socket +import re +import itertools + + +class Hostfile: + """ + Parse a hostfile or store a set of hosts passed in manually. + """ + + def __init__(self, hostfile=None, all_hosts=None, all_hosts_ip=None, + text=None, find_ips=True): + """ + Constructor. Parse hostfile or store existing host list. + + :param hostfile: The path to the hostfile + :param all_hosts: a list of strings representing all hostnames + :param all_hosts_ip: a list of strings representing all host IPs + :param text: Text of a hostfile + :param find_ips: Whether to construct host_ip and all_host_ip fields + """ + self.hosts_ip = [] + self.hosts = [] + self.all_hosts = [] + self.all_hosts_ip = [] + self.path = hostfile + self.find_ips = find_ips + + # Set the host ips directly + if all_hosts_ip is not None: + self.all_hosts_ip = all_hosts_ip + self.hosts_ip = all_hosts_ip + self.find_ips = False + + # Direct constructor + if all_hosts is not None: + self._set_hosts(all_hosts) + + # From hostfile path + elif hostfile is not None: + self._load_hostfile(self.path) + + # From hostfile text + elif text is not None: + self.parse(text) + + # Both hostfile and hosts are None + else: + self._set_hosts(['localhost']) + + def _load_hostfile(self, path): + """ + Expand a hostfile + + :param path: the path to the hostfile + :return: + """ + if not os.path.exists(path): + raise Exception('hostfile not found') + self.path = path + with open(path, 'r', encoding='utf-8') as fp: + text = fp.read() + self.parse(text) + return self + + def parse(self, text): + """ + Parse a hostfile text. + + :param text: Hostfile text + :param set_hosts: Whether or not to set hosts + :return: None + """ + + lines = text.strip().splitlines() + hosts = [] + for line in lines: + self._expand_line(hosts, line) + self._set_hosts(hosts) + + def _expand_line(self, hosts, line): + """ + Will expand brackets in a host declaration. + E.g., host-[0-5,...]-name + + :param hosts: the current set of hosts + :param line: the line to parse + :return: None + """ + toks = re.split(r'[\[\]]', line) + brkts = [tok for i, tok in enumerate(toks) if i % 2 == 1] + num_set = [] + + # Get the expanded set of numbers for each bracket + for i, brkt in enumerate(brkts): + num_set.append([]) + self._expand_set(num_set[-1], brkt) + + # Expand the host string + host_nums = self._product(num_set) + for host_num in host_nums: + host = [] + for i, tok in enumerate(toks): + if i % 2 == 1: + host.append(host_num[int(i/2)]) + else: + host.append(tok) + hosts.append(''.join(host)) + + def _expand_set(self, num_set, brkt): + """ + Expand a bracket set. + The bracket initially has notation: [0-5,0-9,...] + """ + + rngs = brkt.split(',') + for rng in rngs: + self._expand_range(num_set, rng) + + def _expand_range(self, num_set, brkt): + """ + Expand a range. + The range has notation: A-B or A + + :param num_set: the numbers in the range + :param brkt: + :return: + """ + if len(brkt) == 0: + return + if '-' in brkt: + min_max = brkt.split('-') + if len(min_max[0]) == len(min_max[1]): + nums = range(int(min_max[0]), int(min_max[1]) + 1) + num_set += [str(num).zfill(len(min_max[0])) for num in nums] + else: + nums = range(int(min_max[0]), int(min_max[1]) + 1) + num_set += [str(num) for num in nums] + else: + num_set.append(brkt) + + def _product(self, num_set): + """ + Return the cartesian product of the number set + + :param num_set: The numbers to product + :return: + """ + return list(itertools.product(*num_set)) + + def _set_hosts(self, all_hosts): + self.all_hosts = all_hosts + if self.find_ips: + self.all_hosts_ip = [socket.gethostbyname(host) + for host in all_hosts] + self.hosts = self.all_hosts + if self.find_ips: + self.hosts_ip = self.all_hosts_ip + return self + + def subset(self, count): + sub = Hostfile() + sub.path = self.path + sub.all_hosts = self.all_hosts + sub.all_hosts_ip = self.all_hosts_ip + sub.hosts = self.hosts[:count] + sub.hosts_ip = self.hosts_ip[:count] + return sub + + def is_subset(self): + return len(self.hosts) != len(self.all_hosts) + + def save(self, path): + self.all_hosts = self.hosts + self.all_hosts_ip = self.hosts_ip + self.path = path + with open(path, 'w', encoding='utf-8') as fp: + fp.write('\n'.join(self.all_hosts)) + return self + + def ip_list(self): + return self.hosts_ip + + def hostname_list(self): + return self.hosts + + def enumerate(self): + return enumerate(self.hosts) + + def host_str(self, sep=','): + return sep.join(self.hosts) + + def ip_str(self, sep=','): + return sep.join(self.hosts_ip) + + def __len__(self): + return len(self.hosts) + + def __getitem__(self, idx): + return self.hosts[idx] + + def __str__(self): + return str(self.hosts) + + def __repr__(self): + return str(self) + + def __eq__(self, other): + return (self.hosts == other.hosts and + self.all_hosts == other.all_hosts) + diff --git a/CI/jarvis-util/jarvis_util/util/import_all.py b/CI/jarvis-util/jarvis_util/util/import_all.py new file mode 100644 index 00000000..8a10e612 --- /dev/null +++ b/CI/jarvis-util/jarvis_util/util/import_all.py @@ -0,0 +1,72 @@ +""" +This file contains methods to automate large import __init__.py files +""" + +import pathlib +import os + + +def _import_recurse(root_path, root, stmts): + """ + Identify the set of files in the current "root" directory + + :param root_path: The path to the root of the python package + :param root: The current subdirectory of the python package + :param stmts: The current set of import statements + :return: + """ + for file in os.listdir(root): + file = os.path.join(root, file) + if os.path.isfile(file): + file = os.path.relpath(file, root_path) + ext = file.split('.') + if ext[-1] == 'py': + toks = ext[0].split('/') + if toks[-1] == '__init__': + continue + import_stmt = '.'.join(toks) + stmts.append(f'from {import_stmt} import *') + elif os.path.isdir(file): + _import_recurse(root_path, file, stmts) + return stmts + + +def import_all(root_path, root): + """ + Create all import statement to do: from root import *. + + :param root_path: The root of the python repo + :param root: The current directory we are in within the repo + :return: + """ + stmts = [] + _import_recurse(root_path, root, stmts) + return '\"\"\"Import all modules\"\"\"\n' + '\n'.join(stmts) + '\n' + + +def build_global_import_file(root_path, pkg_name): + """ + Build a file to be able to do: from pkg_name import * + + :param root_path: The path to the python package's root directory + :param pkg_name: The name of the python package + :return: + """ + path = os.path.join(root_path, pkg_name) + imports = import_all(root_path, path) + with open(os.path.join(path, '__init__.py'), 'w', + encoding='utf-8') as fp: + fp.write(imports) + + +def build_global_import_from_bin(pkg_name): + """ + Build a file to be able to do: from pkg_name import * + This function is assumed to be called in the "bin" directory + of the main python repo + + :param pkg_name: The name of the python package being built + :return: + """ + root_path = str(pathlib.Path(__file__).parent.parent.parent.resolve()) + build_global_import_file(root_path, pkg_name) diff --git a/CI/jarvis-util/jarvis_util/util/import_mod.py b/CI/jarvis-util/jarvis_util/util/import_mod.py new file mode 100644 index 00000000..9c55c508 --- /dev/null +++ b/CI/jarvis-util/jarvis_util/util/import_mod.py @@ -0,0 +1,28 @@ +""" +This file contains helper methods to load a class dynamically from a file +""" + +import sys + +# NOTE(llogan): To get the path of the directory this file is in, use +# str(pathlib.Path(__file__).parent.resolve()) + + +def load_class(import_str, path, class_name): + """ + Loads a class from a python file. + + :param import_str: A python import string. E.g., for "myrepo.dir1.pkg" + :param path: The absolute path to the directory which contains the + beginning of the import statement. Let's say you have a git repo located + at "/home/hello/myrepo". The git repo has a subdirectory called "myrepo", + so "/home/hello/myrepo/myrepo". In this case, path would be + "/home/hello/myrepo". The import string "myrepo.dir1.pkg" will find + the "myrepo" part of the import string at "/home/hello/myrepo/myrepo". + :param class_name: The name of the class in the file + :return: + """ + sys.path.insert(0, path) + module = __import__(import_str, fromlist=[class_name]) + sys.path.pop(0) + return getattr(module, class_name) diff --git a/CI/jarvis-util/jarvis_util/util/naming.py b/CI/jarvis-util/jarvis_util/util/naming.py new file mode 100644 index 00000000..af650563 --- /dev/null +++ b/CI/jarvis-util/jarvis_util/util/naming.py @@ -0,0 +1,34 @@ +""" +This module contains methods to create strings which follow a particular +naming convention. +""" + +import re + + +def to_camel_case(string): + """ + Convert a string in snake case to camel case + + :param string: + :return: + """ + if string is None: + return + words = re.sub(r'(_|-)+', ' ', string).split() + words = [word.capitalize() for word in words] + return ''.join(words) + + +def to_snake_case(string): + """ + Convert a string in CamelCase to snake case + :param string: + :return: + """ + if string is None: + return + words = re.split('([A-Z][a-z0-9_]*)', string) + words = [word for word in words if len(word)] + string = '_'.join(words) + return string.lower() diff --git a/CI/jarvis-util/jarvis_util/util/size_conv.py b/CI/jarvis-util/jarvis_util/util/size_conv.py new file mode 100644 index 00000000..266959bf --- /dev/null +++ b/CI/jarvis-util/jarvis_util/util/size_conv.py @@ -0,0 +1,44 @@ +""" +This module provides methods to convert a semantic size string to an integer. +""" + + +class SizeConv: + """ + A class which provides methods to convert a semantic size string to an int. + """ + + @staticmethod + def to_int(text): + text = text.lower() + if 'k' in text: + return SizeConv.kb(text) + if 'm' in text: + return SizeConv.mb(text) + if 'g' in text: + return SizeConv.gb(text) + if 't' in text: + return SizeConv.tb(text) + if 'p' in text: + return SizeConv.pb(text) + return int(text) + + @staticmethod + def kb(num): + return int(float(num.split('k')[0]) * (1 << 10)) + + @staticmethod + def mb(num): + return int(float(num.split('m')[0]) * (1 << 20)) + + @staticmethod + def gb(num): + return int(float(num.split('g')[0]) * (1 << 30)) + + @staticmethod + def tb(num): + return int(float(num.split('t')[0]) * (1 << 40)) + + @staticmethod + def pb(num): + return int(float(num.split('p')[0]) * (1 << 50)) diff --git a/CI/jarvis-util/pylintrc b/CI/jarvis-util/pylintrc new file mode 100644 index 00000000..5371019b --- /dev/null +++ b/CI/jarvis-util/pylintrc @@ -0,0 +1,429 @@ +# This Pylint rcfile contains a best-effort configuration to uphold the +# best-practices and style described in the Google Python style guide: +# https://google.github.io/styleguide/pyguide.html +# +# Its canonical open-source location is: +# https://google.github.io/styleguide/pylintrc + +[MASTER] + +# Files or directories to be skipped. They should be base names, not paths. +ignore=third_party + +# Files or directories matching the regex patterns are skipped. The regex +# matches against base names, not paths. +ignore-patterns= + +# Pickle collected data for later comparisons. +persistent=no + +# List of plugins (as comma separated values of python modules names) to load, +# usually to register additional checkers. +load-plugins= + +# Use multiple processes to speed up Pylint. +jobs=4 + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + + +[MESSAGES CONTROL] + +# Only show warnings with the listed confidence levels. Leave empty to show +# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED +confidence= + +# Enable the message, report, category or checker with the given id(s). You can +# either give multiple identifier separated by comma (,) or put this option +# multiple time (only on the command line, not in the configuration file where +# it should appear only once). See also the "--disable" option for examples. +#enable= + +# Disable the message, report, category or checker with the given id(s). You +# can either give multiple identifiers separated by comma (,) or put this +# option multiple times (only on the command line, not in the configuration +# file where it should appear only once).You can also use "--disable=all" to +# disable everything first and then reenable specific checks. For example, if +# you want to run only the similarities checker, you can use "--disable=all +# --enable=similarities". If you want to run only the classes checker, but have +# no Warning level messages displayed, use"--disable=all --enable=classes +# --disable=W" +disable=abstract-method, + apply-builtin, + arguments-differ, + attribute-defined-outside-init, + backtick, + bad-option-value, + basestring-builtin, + buffer-builtin, + c-extension-no-member, + consider-using-enumerate, + cmp-builtin, + cmp-method, + coerce-builtin, + coerce-method, + delslice-method, + div-method, + duplicate-code, + eq-without-hash, + execfile-builtin, + file-builtin, + filter-builtin-not-iterating, + fixme, + getslice-method, + global-statement, + hex-method, + idiv-method, + implicit-str-concat, + import-error, + import-self, + import-star-module-level, + inconsistent-return-statements, + input-builtin, + intern-builtin, + invalid-str-codec, + locally-disabled, + long-builtin, + long-suffix, + map-builtin-not-iterating, + misplaced-comparison-constant, + missing-function-docstring, + metaclass-assignment, + next-method-called, + next-method-defined, + no-absolute-import, + no-else-break, + no-else-continue, + no-else-raise, + no-else-return, + no-init, # added + no-member, + no-name-in-module, + no-self-use, + nonzero-method, + oct-method, + old-division, + old-ne-operator, + old-octal-literal, + old-raise-syntax, + parameter-unpacking, + print-statement, + raising-string, + range-builtin-not-iterating, + raw_input-builtin, + rdiv-method, + reduce-builtin, + relative-import, + reload-builtin, + round-builtin, + setslice-method, + signature-differs, + standarderror-builtin, + suppressed-message, + sys-max-int, + too-few-public-methods, + too-many-ancestors, + too-many-arguments, + too-many-boolean-expressions, + too-many-branches, + too-many-instance-attributes, + too-many-locals, + too-many-nested-blocks, + too-many-public-methods, + too-many-return-statements, + too-many-statements, + trailing-newlines, + unichr-builtin, + unicode-builtin, + unnecessary-pass, + unpacking-in-except, + useless-else-on-loop, + useless-object-inheritance, + useless-suppression, + using-cmp-argument, + wrong-import-order, + xrange-builtin, + zip-builtin-not-iterating, + + +[REPORTS] + +# Set the output format. Available formats are text, parseable, colorized, msvs +# (visual studio) and html. You can also give a reporter class, eg +# mypackage.mymodule.MyReporterClass. +output-format=text + +# Tells whether to display a full report or only the messages +reports=no + +# Python expression which should return a note less than 10 (10 is the highest +# note). You have access to the variables errors warning, statement which +# respectively contain the number of errors / warnings messages and the total +# number of statements analyzed. This is used by the global evaluation report +# (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details +#msg-template= + + +[BASIC] + +# Good variable names which should always be accepted, separated by a comma +good-names=main,_ + +# Bad variable names which should always be refused, separated by a comma +bad-names= + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Include a hint for the correct naming format with invalid-name +include-naming-hint=no + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +property-classes=abc.abstractproperty,cached_property.cached_property,cached_property.threaded_cached_property,cached_property.cached_property_with_ttl,cached_property.threaded_cached_property_with_ttl + +# Regular expression matching correct function names +function-rgx=^(?:(?PsetUp|tearDown|setUpModule|tearDownModule)|(?P_?[A-Z][a-zA-Z0-9]*)|(?P_?[a-z][a-z0-9_]*))$ + +# Regular expression matching correct variable names +variable-rgx=^[a-z][a-z0-9_]*$ + +# Regular expression matching correct constant names +const-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$ + +# Regular expression matching correct attribute names +attr-rgx=^_{0,2}[a-z][a-z0-9_]*$ + +# Regular expression matching correct argument names +argument-rgx=^[a-z][a-z0-9_]*$ + +# Regular expression matching correct class attribute names +class-attribute-rgx=^(_?[A-Z][A-Z0-9_]*|__[a-z0-9_]+__|_?[a-z][a-z0-9_]*)$ + +# Regular expression matching correct inline iteration names +inlinevar-rgx=^[a-z][a-z0-9_]*$ + +# Regular expression matching correct class names +class-rgx=^_?[A-Z][a-zA-Z0-9]*$ + +# Regular expression matching correct module names +module-rgx=^(_?[a-z][a-z0-9_]*|__init__)$ + +# Regular expression matching correct method names +method-rgx=(?x)^(?:(?P_[a-z0-9_]+__|runTest|setUp|tearDown|setUpTestCase|tearDownTestCase|setupSelf|tearDownClass|setUpClass|(test|assert)_*[A-Z0-9][a-zA-Z0-9_]*|next)|(?P_{0,2}[A-Z][a-zA-Z0-9_]*)|(?P_{0,2}[a-z][a-z0-9_]*))$ + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=(__.*__|main|test.*|.*test|.*Test)$ + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=10 + + +[TYPECHECK] + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager,contextlib2.contextmanager + +# Tells whether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis. It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules= + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=optparse.Values,thread._local,_thread._local + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + + +[FORMAT] + +# Maximum number of characters on a single line. +max-line-length=80 + +# TODO(https://github.com/PyCQA/pylint/issues/3352): Direct pylint to exempt +# lines made too long by directives to pytype. + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=(?x)( + ^\s*(\#\ )??$| + ^\s*(from\s+\S+\s+)?import\s+.+$) + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=yes + +# Maximum number of lines in a module +max-module-lines=99999 + +# String used as indentation unit. The internal Google style guide mandates 2 +# spaces. Google's externaly-published style guide says 4, consistent with +# PEP 8. Here, we use 2 spaces, for conformity with many open-sourced Google +# projects (like TensorFlow). +indent-string=' ' + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=TODO + + +[STRING] + +# This flag controls whether inconsistent-quotes generates a warning when the +# character used as a quote delimiter is used inconsistently within a module. +check-quote-consistency=yes + + +[VARIABLES] + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# A regular expression matching the name of dummy variables (i.e. expectedly +# not used). +dummy-variables-rgx=^\*{0,2}(_$|unused_|dummy_) + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid to define new builtins when possible. +additional-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_,_cb + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six,six.moves,past.builtins,future.builtins,functools + + +[LOGGING] + +# Logging modules to check that the string format arguments are in logging +# function parameter format +logging-modules=logging,absl.logging,tensorflow.io.logging + + +[SIMILARITIES] + +# Minimum lines number of a similarity. +min-similarity-lines=4 + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + +# Ignore imports when computing similarities. +ignore-imports=no + + +[SPELLING] + +# Spelling dictionary name. Available dictionaries: none. To make it working +# install python-enchant package. +spelling-dict= + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to indicated private dictionary in +# --spelling-private-dict-file option instead of raising a message. +spelling-store-unknown-words=no + + +[IMPORTS] + +# Deprecated modules which should not be used, separated by a comma +deprecated-modules=regsub, + TERMIOS, + Bastion, + rexec, + sets + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report RP0402 must not be disabled) +import-graph= + +# Create a graph of external dependencies in the given file (report RP0402 must +# not be disabled) +ext-import-graph= + +# Create a graph of internal dependencies in the given file (report RP0402 must +# not be disabled) +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant, absl + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + + +[CLASSES] + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__, + __new__, + setUp + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict, + _fields, + _replace, + _source, + _make + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls, + class_ + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "Exception" +overgeneral-exceptions=StandardError, + Exception, + BaseException diff --git a/CI/jarvis-util/requirements.txt b/CI/jarvis-util/requirements.txt new file mode 100644 index 00000000..70849fea --- /dev/null +++ b/CI/jarvis-util/requirements.txt @@ -0,0 +1,9 @@ +pyyaml +pylint==2.15.0 +coverage==5.5 +# coverage +coverage-lcov==0.2.4 +# coverage-lcov +pytest==6.2.5 +pandas +tabulate \ No newline at end of file diff --git a/CI/jarvis-util/setup.py b/CI/jarvis-util/setup.py new file mode 100644 index 00000000..a1bdd4e9 --- /dev/null +++ b/CI/jarvis-util/setup.py @@ -0,0 +1,23 @@ +import setuptools + +setuptools.setup( + name="jarvis_util", + packages=setuptools.find_packages(), + version="0.0.1", + author="Luke Logan", + author_email="llogan@hawk.iit.edu", + description="Various wrappers around shell utilities", + url="https://github.com/scs-lab/jarvis-util", + classifiers = [ + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Development Status :: 0 - Pre-Alpha", + "Environment :: Other Environment", + "Intended Audience :: Developers", + "License :: None", + "Operating System :: OS Independent", + "Topic :: Software Development :: Libraries :: Python Modules", + "Topic :: Application Configuration", + ], + long_description="" +) diff --git a/CI/jarvis-util/test/unit/argparse_main.py b/CI/jarvis-util/test/unit/argparse_main.py new file mode 100644 index 00000000..7b636ca6 --- /dev/null +++ b/CI/jarvis-util/test/unit/argparse_main.py @@ -0,0 +1,4 @@ +from jarvis_util.util.argparse import ArgParse + +if __name__ == 'main': + args = ArgParse() \ No newline at end of file diff --git a/CI/jarvis-util/test/unit/print5s.py b/CI/jarvis-util/test/unit/print5s.py new file mode 100644 index 00000000..a6581b31 --- /dev/null +++ b/CI/jarvis-util/test/unit/print5s.py @@ -0,0 +1,12 @@ +""" +NOTE: this is a helper utility for test_local_exec +""" + +import time +import sys + + +for i in range(5): + sys.stdout.write(f"COUT: {i}\n") + sys.stderr.write(f"CERR: {i}\n") + time.sleep(1) \ No newline at end of file diff --git a/CI/jarvis-util/test/unit/printNone.py b/CI/jarvis-util/test/unit/printNone.py new file mode 100644 index 00000000..cf728ec3 --- /dev/null +++ b/CI/jarvis-util/test/unit/printNone.py @@ -0,0 +1,4 @@ +from jarvis_util.shell.local_exec import LocalExec, LocalExecInfo + +spawn_info = LocalExecInfo(hide_output=True) +LocalExec("echo hello", spawn_info) diff --git a/CI/jarvis-util/test/unit/test_argparse.py b/CI/jarvis-util/test/unit/test_argparse.py new file mode 100644 index 00000000..3c3f3d32 --- /dev/null +++ b/CI/jarvis-util/test/unit/test_argparse.py @@ -0,0 +1,10 @@ +from jarvis_util.util.argparse import ArgParse +from jarvis_util.shell.exec import Exec +from jarvis_util.shell.local_exec import LocalExecInfo +import pathlib +from unittest import TestCase + + +class TestArgparse(TestCase): + def test_argparse_main(self): + pass \ No newline at end of file diff --git a/CI/jarvis-util/test/unit/test_hostfile.py b/CI/jarvis-util/test/unit/test_hostfile.py new file mode 100644 index 00000000..47bd7c85 --- /dev/null +++ b/CI/jarvis-util/test/unit/test_hostfile.py @@ -0,0 +1,57 @@ +from jarvis_util.util.hostfile import Hostfile +import pathlib +from unittest import TestCase + + +class TestHostfile(TestCase): + def test_no_expand_int(self): + host = Hostfile(text='0', find_ips=False) + self.assertTrue(len(host.hosts) == 1) + self.assertTrue(host.hosts[0] == '0') + + def test_no_expand(self): + host = Hostfile(text='ares-comp-01', find_ips=False) + self.assertTrue(len(host.hosts) == 1) + self.assertTrue(host.hosts[0] == 'ares-comp-01') + + def test_expand_set(self): + host = Hostfile(text='ares-comp-[01-04]-40g', find_ips=False) + self.assertTrue(len(host.hosts) == 4) + self.assertTrue(host.hosts[0] == 'ares-comp-01-40g') + self.assertTrue(host.hosts[1] == 'ares-comp-02-40g') + self.assertTrue(host.hosts[2] == 'ares-comp-03-40g') + self.assertTrue(host.hosts[3] == 'ares-comp-04-40g') + + def test_expand_two_sets(self): + host = Hostfile(text='ares-comp-[01-02]-40g-[01-02]', find_ips=False) + self.assertTrue(len(host.hosts) == 4) + self.assertTrue(host.hosts[0] == 'ares-comp-01-40g-01') + self.assertTrue(host.hosts[1] == 'ares-comp-01-40g-02') + self.assertTrue(host.hosts[2] == 'ares-comp-02-40g-01') + self.assertTrue(host.hosts[3] == 'ares-comp-02-40g-02') + + def test_subset(self): + host = Hostfile(text='ares-comp-[01-02]-40g-[01-02]', find_ips=False) + host = host.subset(3) + self.assertTrue(len(host.hosts) == 3) + self.assertTrue(host.is_subset()) + self.assertTrue(host.hosts[0] == 'ares-comp-01-40g-01') + self.assertTrue(host.hosts[1] == 'ares-comp-01-40g-02') + self.assertTrue(host.hosts[2] == 'ares-comp-02-40g-01') + + def test_read_hostfile(self): + HERE = str(pathlib.Path(__file__).parent.resolve()) + hf = Hostfile(hostfile=f'{HERE}/test_hostfile.txt', find_ips=False) + print(hf.hosts) + self.assertEqual(len(hf), 15) + + def test_save_hostfile(self): + HERE = str(pathlib.Path(__file__).parent.resolve()) + hf = Hostfile(hostfile=f'{HERE}/test_hostfile.txt', find_ips=False) + hf_sub = hf.subset(4) + self.assertEqual(len(hf_sub), 4) + hf_sub.save('/tmp/test_hostfile.txt') + hf_sub_reload = Hostfile(hostfile=f'/tmp/test_hostfile.txt', + find_ips=False) + self.assertEqual(len(hf_sub_reload), 4) + self.assertEqual(hf_sub, hf_sub_reload) diff --git a/CI/jarvis-util/test/unit/test_hostfile.txt b/CI/jarvis-util/test/unit/test_hostfile.txt new file mode 100644 index 00000000..7ecc164e --- /dev/null +++ b/CI/jarvis-util/test/unit/test_hostfile.txt @@ -0,0 +1 @@ +ares-comp-[01-10,11,12-15]-40g \ No newline at end of file diff --git a/CI/jarvis-util/test/unit/test_local_exec.py b/CI/jarvis-util/test/unit/test_local_exec.py new file mode 100644 index 00000000..c0ced8c3 --- /dev/null +++ b/CI/jarvis-util/test/unit/test_local_exec.py @@ -0,0 +1,65 @@ +import pathlib +import os +from jarvis_util.shell.local_exec import LocalExec, LocalExecInfo +from jarvis_util.shell.exec import Exec +from unittest import TestCase + + +class TestLocalExec(TestCase): + def _setup_files(self): + self.stdout = '/tmp/test_out.log' + self.stderr = '/tmp/test_err.log' + try: + os.remove(self.stdout) + except OSError: + pass + try: + os.remove(self.stderr) + except: + pass + + def test_default(self): + ret = Exec("echo hello") + self.assertEqual(ret.exit_code, 0) + self.assertEqual(len(ret.stdout['localhost']), 0) + + def test_pipe_stdout(self): + self._setup_files() + spawn_info = LocalExecInfo(pipe_stdout=self.stdout, + pipe_stderr=self.stderr, + collect_output=True) + ret = Exec("echo hello", spawn_info) + self.assertEqual(ret.stdout['localhost'].strip(), "hello") + self.assertEqual(ret.stderr['localhost'].strip(), "") + self.assertFile(self.stdout, "hello") + self.assertFile(self.stderr, "") + + def test_hide_stdout(self): + HERE = str(pathlib.Path(__file__).parent.resolve()) + PRINTNONE = os.path.join(HERE, 'printNone.py') + spawn_info = LocalExecInfo(collect_output=True) + ret = Exec(f"python3 {PRINTNONE}", spawn_info) + self.assertEqual(ret.stdout['localhost'].strip(), "") + self.assertEqual(ret.stderr['localhost'].strip(), "") + + def test_periodic_print(self): + self._setup_files() + HERE = str(pathlib.Path(__file__).parent.resolve()) + PRINT5s = os.path.join(HERE, 'print5s.py') + ret = Exec(f"python3 {PRINT5s}", + LocalExecInfo(pipe_stdout=self.stdout, + pipe_stderr=self.stderr)) + stdout_data = "\n".join([f"COUT: {i}" for i in range(5)]) + stderr_data = "\n".join([f"CERR: {i}" for i in range(5)]) + self.assertFile(self.stdout, stdout_data) + self.assertFile(self.stderr, stderr_data) + + def assertFile(self, path, data, strip=True): + self.assertTrue(os.path.exists(path)) + with open(path, 'r') as fp: + if strip: + data = data.strip() + file_data = fp.read().strip() + else: + file_data = fp.read() + self.assertEqual(data, file_data) diff --git a/CI/jarvis-util/test/unit/test_system_info.py b/CI/jarvis-util/test/unit/test_system_info.py new file mode 100644 index 00000000..a9b6c9d9 --- /dev/null +++ b/CI/jarvis-util/test/unit/test_system_info.py @@ -0,0 +1,137 @@ +from jarvis_util.util.argparse import ArgParse +from jarvis_util.shell.exec import Exec +from jarvis_util.shell.local_exec import LocalExecInfo +from jarvis_util.util.hostfile import Hostfile +from jarvis_util.introspect.system_info import Lsblk, \ + ListFses, FiInfo, Blkid, ResourceGraph, StorageDeviceType +from jarvis_util.util.size_conv import SizeConv +import pathlib +import itertools +from unittest import TestCase + + +class TestSystemInfo(TestCase): + def test_lsblk(self): + Lsblk(LocalExecInfo(hide_output=True)) + + def test_list_fses(self): + ListFses(LocalExecInfo(hide_output=True)) + + def test_fi_info(self): + FiInfo(LocalExecInfo(hide_output=True)) + + def test_blkid(self): + Blkid(LocalExecInfo(hide_output=True)) + + def test_resource_graph(self): + rg = ResourceGraph() + rg.build(LocalExecInfo(hide_output=True)) + rg.save('/tmp/resource_graph.yaml') + rg.load('/tmp/resource_graph.yaml') + rg.filter_fs(r'/$', '/${USER}', 'NVME') + rg.filter_hosts(Hostfile(), '1gbps') + rg.save('/tmp/resource_graph.yaml') + + def test_custom_resource_graph(self): + rg = ResourceGraph() + all_hosts = ['host1', 'host2', 'host3'] + all_hosts_ip = ['192.168.1.0', '192.168.1.1', '192.168.1.2'] + providers = ['tcp', 'ib', 'roce'] + hosts = Hostfile(all_hosts=all_hosts, all_hosts_ip=all_hosts_ip) + + # Add networks for each node + rg.set_hosts(hosts) + for provider in providers: + rg.add_net(hosts, + provider=provider) + rg.add_net(hosts.subset(1), + provider='uncommon') + + # Add common storage for each node + rg.add_storage(hosts, + device='/dev/sda1', + mount='/', + tran='sata', + rota=True, + size=SizeConv.to_int('10g'), + shared=False) + rg.add_storage(hosts, + device='/dev/sda2', + mount='/mnt/hdd/$USER', + tran='sata', + rota=True, + size=SizeConv.to_int('200g'), + shared=False) + rg.add_storage(hosts, + device='/dev/sdb1', + mount='/mnt/ssd/$USER', + tran='sata', + rota=False, + size=SizeConv.to_int('50g'), + shared=False) + rg.add_storage(hosts, + device='/dev/nvme0n1', + mount='/mnt/nvme/$USER', + tran='nvme', + rota=False, + size=SizeConv.to_int('100g'), + shared=False) + rg.add_storage(hosts.subset(1), + device='/dev/nvme0n2', + mount='/mnt/nvme2/$USER', + tran='nvme', + rota=False, + size=SizeConv.to_int('10g'), + shared=False) + rg.add_storage(hosts, + device='/dev/nvme0n3', + tran='nvme', + rota=False, + size=SizeConv.to_int('100g'), + shared=False) + + # Filter only mounts in '/mnt' + rg.filter_fs('/mnt/*') + + # Apply changes + rg.apply() + + # Find all mounted NVMes + df = rg.find_storage([StorageDeviceType.NVME]) + self.assertTrue(len(df[df.tran == 'nvme']) == 4) + self.assertTrue(len(df[df.tran == 'sata']) == 0) + self.assertTrue(len(df) == 4) + + # Find all mounted & common NVMes and SSDs + df = rg.find_storage([StorageDeviceType.NVME, + StorageDeviceType.SSD], + common=True) + self.assertTrue(len(df[df.tran == 'nvme']) == 3) + self.assertTrue(len(df[df.tran == 'sata']) == 3) + self.assertTrue(len(df) == 6) + + # Select a single nvme per-node + df = rg.find_storage([StorageDeviceType.NVME, + StorageDeviceType.SSD], + common=True, + count_per_node=1) + self.assertTrue(len(df[df.tran == 'nvme']) == 3) + self.assertTrue(len(df[df.tran == 'sata']) == 0) + self.assertTrue(len(df) == 3) + + # Select a single nvme and ssd per-node + df = rg.find_storage([StorageDeviceType.NVME, + StorageDeviceType.SSD], + common=True, + count_per_dev=1) + self.assertTrue(len(df[df.tran == 'nvme']) == 3) + self.assertTrue(len(df[df.tran == 'sata']) == 3) + self.assertTrue(len(df) == 6) + + # Find common networks between hosts + df = rg.find_net_info(hosts) + self.assertTrue(len(df) == 9) + + # Find common tcp networks + df = rg.find_net_info(hosts, providers='tcp') + self.assertTrue(len(df) == 3) diff --git a/CI/py_coeus_ci/bin/run_test.py b/CI/py_coeus_ci/bin/run_test.py new file mode 100644 index 00000000..46bd4172 --- /dev/null +++ b/CI/py_coeus_ci/bin/run_test.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python3 +import sys, os +import pathlib + +if __name__ == '__main__': + if len(sys.argv) != 4: + print("USAGE: ./run_test [TEST_TYPE] [TEST_NAME] [CMAKE_BINARY_DIR]") + exit(1) + test_type = sys.argv[1] + test_name = sys.argv[2] + cmake_binary_dir = sys.argv[3] + address_sanitizer = False + + # The root of Coeus + COEUS_ROOT = str(pathlib.Path(__file__).parent.parent.parent.parent.resolve()) + ADAPTER_TEST_ROOT = f"{COEUS_ROOT}/adapter/test" + + # Ensure that all calls beneath know how to resolve jarvis_util + sys.path.insert(0, f"{COEUS_ROOT}/CI/jarvis-util") + + # Ensure subsequent classes know how to resolve py_coeus_ci package + sys.path.insert(0, f"{COEUS_ROOT}/CI/py_coeus_ci") + + # Unit test file to load + if test_type == 'native': + pkg_dir = f"{COEUS_ROOT}/test/unit" + else: + raise Exception("Could not find the unit test") + + # Load the unit test + from jarvis_util.util.naming import to_camel_case + from jarvis_util.util.import_mod import load_class + test_cls = load_class(f"tests", pkg_dir, to_camel_case(f"{test_type}_test_manager")) + tests = test_cls(COEUS_ROOT, cmake_binary_dir, address_sanitizer) + tests.call(test_name) \ No newline at end of file diff --git a/CI/py_coeus_ci/py_coeus_ci/test_manager.py b/CI/py_coeus_ci/py_coeus_ci/test_manager.py new file mode 100644 index 00000000..b4b0f6f7 --- /dev/null +++ b/CI/py_coeus_ci/py_coeus_ci/test_manager.py @@ -0,0 +1,187 @@ +from jarvis_util import * +import time +import os, sys +import pathlib +import inspect +from abc import ABC, abstractmethod + + +class SpawnInfo(MpiExecInfo): + def __init__(self, nprocs, hermes_conf=None, hermes_mode=None, api=None, + **kwargs): + super().__init__(nprocs=nprocs, **kwargs) + self.hermes_conf = hermes_conf + self.hermes_mode = hermes_mode + self.api = api + + +class TestManager(ABC): + """======================================================================""" + """ Test Case Constructor """ + """======================================================================""" + def __init__(self, cmake_source_dir, cmake_binary_dir, address_sanitizer): + """ + Initialize test manager + """ + jutil = JutilManager.get_instance() + jutil.collect_output = False + jutil.hide_output = False + jutil.debug_mpi_exec = False + jutil.debug_local_exec = False + self.MY_DIR = str(pathlib.Path(inspect.getfile(LocalExecInfo)).parent) + self.CMAKE_SOURCE_DIR = cmake_source_dir + self.CMAKE_BINARY_DIR = cmake_binary_dir + self.HERMES_TRAIT_PATH = f"{self.CMAKE_BINARY_DIR}/bin" # Change path? + self.HERMES_CLIENT_CONF = f"{self.CMAKE_SOURCE_DIR}/test/data/hermes_client.yaml" # Change path? + self.ADDRESS_SANITIZER = address_sanitizer + self.daemon = None + self.disable_testing = False + + os.makedirs("/tmp/test_hermes", exist_ok=True) + self.tests_ = {} + self.devices = {} + self.set_devices() + self.find_tests() + + def spawn_info(self, nprocs=None, ppn=None, hostfile=None, + hermes_conf=None, hermes_mode=None, api=None, cwd=None): + # Whether to deploy hermes + use_hermes = hermes_mode is not None \ + or api == 'native' \ + or hermes_conf is not None + + # Get the hermes configuration path + if use_hermes: + if hermes_conf is None: + hermes_conf = os.path.join(self.CMAKE_SOURCE_DIR, + 'test', 'data', + 'hermes_server.yaml') + else: + hermes_conf = os.path.join(self.CMAKE_SOURCE_DIR, + 'test', 'data', + f"{hermes_conf}.yaml") + + # Basic environment variables + env = {} + if use_hermes: + env.update({ + 'HERMES_LOG_OUT': "/tmp/hermes_log.txt", + 'HERMES_CONF': hermes_conf, + 'HERMES_CLIENT_CONF': self.HERMES_CLIENT_CONF, + 'HERMES_TRAIT_PATH': self.HERMES_TRAIT_PATH, + }) + if hostfile: + env['HERMES_HOSTFILE'] = hostfile.path + + # Get libasan path + # Assumes GCC for now + # TODO(llogan): check if ADDRESS_SANITIZER is enabled... + if 'LD_PRELOAD' in env: + node = Exec('gcc -print-file-name=libasan.so', + LocalExecInfo(collect_output=True, hide_output=True)) + env['LD_PRELOAD'] = f"{node.stdout.strip()}:{env['LD_PRELOAD']}" + + # Hermes mode + if hermes_mode == 'kDefault': + env['HERMES_ADAPTER_MODE'] = 'kDefault' + if hermes_mode == 'kScratch': + env['HERMES_ADAPTER_MODE'] = 'kScratch' + if hermes_mode == 'kBypass': + env['HERMES_ADAPTER_MODE'] = 'kBypass' + + env['LD_LIBRARY_PATH'] = f'{self.CMAKE_BINARY_DIR}/bin' + + return SpawnInfo(nprocs=nprocs, + ppn=ppn, + hostfile=hostfile, + hermes_conf=hermes_conf, + hermes_mode=hermes_mode, + api=api, + env=env, + cwd=cwd) + + @abstractmethod + def set_paths(self): + pass + + def set_devices(self): + self.devices['nvme'] = '/tmp/test_hermes/' + self.devices['ssd'] = '/tmp/test_hermes/' + self.devices['hdd'] = '/tmp/test_hermes/' + self.devices['pfs'] = '/tmp/test_hermes/' + + @abstractmethod + def spawn_all_nodes(self): + pass + + def cleanup(self): + dirs = " ".join([os.path.join(d, '*') for d in self.devices.values()]) + Rm(dirs, LocalExecInfo(hostfile=self.spawn_all_nodes().hostfile)) + + def find_tests(self): + # Filter the list to include only attributes that start with "test" + test_attributes = [attr for attr in dir(self) if + attr.startswith("test")] + + # Get a reference to each test method + for attr in test_attributes: + if callable(getattr(self, attr)): + self.tests_[attr] = getattr(self, attr) + + def call(self, test_name): + self.set_paths() + if self.disable_testing: + return + test_name = test_name.strip() + if test_name in self.tests_: + print(f"Running test: {test_name}") + exit_code = self.tests_[test_name]() + else: + print(f"{test_name} was not found. Available tests: ") + for i, test in enumerate(self.tests_): + print(f"{i}: {test}") + exit_code = 1 + exit(1) + self.cleanup() + exit(exit_code) + + """======================================================================""" + """ General Test Helper Functions """ + """======================================================================""" + + def start_daemon(self, spawn_info): + """ + Helper function. Start the Hermes daemon + + :param env: Hermes environment variables + :return: None + """ + print("Killing daemon") + Kill("hermes_daemon", + LocalExecInfo( + hostfile=spawn_info.hostfile, + collect_output=False)) + + print("Start daemon") + #self.daemon = Exec(f"{self.CMAKE_BINARY_DIR}/bin/hermes_daemon", + self.daemon = Exec("hermes_daemon", + LocalExecInfo( + hostfile=spawn_info.hostfile, + env=spawn_info.basic_env, + exec_async=True)) + time.sleep(5) + print("Launched") + + def stop_daemon(self, spawn_info): + """ + Helper function. Stop the Hermes daemon. + + :param env: Hermes environment variables + :return: None + """ + print("Stop daemon") + Exec("finalize_hermes", + LocalExecInfo( + env=spawn_info.basic_env)) + self.daemon.wait() + print("Stopped daemon") diff --git a/CMakeLists.txt b/CMakeLists.txt index 0d754916..8077a39c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -81,7 +81,6 @@ endif() #------------------------------------------------------------------------------ # External libraries #------------------------------------------------------------------------------ - # Hermes find_package(Hermes CONFIG REQUIRED) message(STATUS "found Hermes at ${Hermes_DIR}") @@ -114,6 +113,15 @@ if(COEUS_ENABLE_COVERAGE) endmacro() endif() +#------------------------------------------------------------------------------ +# Enable Testing +#------------------------------------------------------------------------------ +function(pytest test_type test_name) + set(script ${CMAKE_SOURCE_DIR}/CI/py_coeus_ci/bin/run_test.py) + add_test(NAME ${test_name} + COMMAND python3 ${script} ${test_type} ${test_name} ${CMAKE_BINARY_DIR}) +endfunction() + #------------------------------------------------------------------------------ # Documentation #------------------------------------------------------------------------------ diff --git a/test/data/hermes_client.yaml b/test/data/hermes_client.yaml new file mode 100644 index 00000000..e95d7173 --- /dev/null +++ b/test/data/hermes_client.yaml @@ -0,0 +1,9 @@ +stop_daemon: false +path_inclusions: ["/tmp/test_hermes"] +path_exclusions: ["/"] +file_page_size: 1024KB +base_adapter_mode: kDefault +file_adapter_configs: + - path: "/" + page_size: 1MB + mode: kDefault \ No newline at end of file diff --git a/test/data/hermes_server.yaml b/test/data/hermes_server.yaml new file mode 100644 index 00000000..348ce1e3 --- /dev/null +++ b/test/data/hermes_server.yaml @@ -0,0 +1,130 @@ +# Example Hermes configuration file + +### Define properties of the storage devices +devices: + # The name of the device. + # It can be whatever the user wants, there are no special names + ram: + # The mount point of each device. RAM should be the empty string. For block + # devices, this is the directory where Hermes will create buffering files. For + # object storage or cloud targets, this will be a url. + mount_point: "" + + # The maximum buffering capacity in MiB of each device. Here we say that all 4 + # devices get 50 MiB of buffering capacity. + capacity: 1000MB + + # The size of the smallest available buffer in KiB. In general this should be + # the page size of your system for byte addressable storage, and the block size + # of the storage device for block addressable storage. + block_size: 4KB + + # The number of blocks (the size of which is chosen in block_sizes_kb) that each + # device should contain for each slab (controlled by num_slabs). This allows for + # precise control of the distibution of buffer sizes. + slab_sizes: [ 4KB, 16KB, 64KB, 1MB ] + + # The maximum theoretical bandwidth (as advertised by the manufacturer) in + # Possible units: KBps, MBps, GBps + bandwidth: 6000MBps + + # The latency of each device (as advertised by the manufacturer). + # Possible units: ns, us, ms, s + latency: 15us + + # For each device, indicate '1' if it is shared among nodes (e.g., burst + # buffers), or '0' if it is per node (e.g., local NVMe). + is_shared_device: false + + # For each device, the minimum and maximum percent capacity threshold at which + # the BufferOrganizer will trigger. Decreasing the maximum thresholds will cause + # the BufferOrganizer to move data to lower devices, making more room in faster + # devices (ideal for write-heavy workloads). Conversely, increasing the minimum + # threshold will cause data to be moved from slower devices into faster devices + # (ideal for read-heavy workloads). For example, a maximum capacity threshold of + # 0.8 would have the effect of always keeping 20% of the device's space free for + # incoming writes. Conversely, a minimum capacity threshold of 0.3 would ensure + # that the device is always at least 30% occupied. + borg_capacity_thresh: [0.0, 1.0] + + nvme: + mount_point: "./" + capacity: 50MB + block_size: 4KB + slab_sizes: [ 4KB, 16KB, 64KB, 1MB ] + bandwidth: 1GBps + latency: 600us + is_shared_device: false + borg_capacity_thresh: [ 0.0, 1.0 ] + + ssd: + mount_point: "./" + capacity: 50MB + block_size: 4KB + slab_sizes: [ 4KB, 16KB, 64KB, 1MB ] + bandwidth: 500MBps + latency: 1200us + is_shared_device: false + borg_capacity_thresh: [ 0.0, 1.0 ] + + pfs: + mount_point: "./" + capacity: 16GB + block_size: 64KB # The stripe size of PFS + slab_sizes: [ 4KB, 16KB, 64KB, 1MB ] + bandwidth: 100MBps # Per-device bandwidth + latency: 200ms + is_shared_device: true + borg_capacity_thresh: [ 0.0, 1.0 ] + +### Define properties of RPCs +rpc: + # A path to a file containing a list of server names, 1 per line. If your + # servers are named according to a pattern (e.g., server-1, server-2, etc.), + # prefer the `rpc_server_base_name` and `rpc_host_number_range` options. If this + # option is not empty, it will override anything in `rpc_server_base_name`. + host_file: "" + + # Host names are constructed as "base_name + + # host_number + rpc_server_suffix." Each entry in the rpc_host_number_range_list + # can be either a single number, or a range, which is 2 numbers separated by a + # hyphen (-). For example the list {01, 03-05, 07, 08-10} will be expanded to + # {01, 03, 04, 05, 07, 08, 09, 10}. + host_names: ["localhost"] + + # The RPC protocol. This must come from the documentation of the specific RPC + # library in use. + protocol: "ofi+sockets" + + # RPC domain name for verbs transport. Blank for tcp. + domain: "" + + # Desired RPC port number. + port: 8080 + + # The number of handler threads for each RPC server. + num_threads: 4 + +### Define properties of the BORG +buffer_organizer: + # The number of threads used in the background organization of internal Hermes buffers. + num_threads: 1 + + # Desired RPC port number for buffer organizer. + port: 8081 + +### Define the default data placement policy +dpe: + # Choose Random, RoundRobin, or MinimizeIoTime + default_placement_policy: "MinimizeIoTime" + + # If true (1) the RoundRobin placement policy algorithm will split each Blob + # into a random number of smaller Blobs. + default_rr_split: 0 + +# The shared memory prefix for the hermes shared memory segment. A user name +# will be automatically appended. +shmem_name: "/hermes_shm_" + +# The interval in milliseconds at which to update the global system view. +system_view_state_update_interval_ms: 1000 \ No newline at end of file diff --git a/test/unit/CMakeLists.txt b/test/unit/CMakeLists.txt index cf4c7c82..10c854f6 100644 --- a/test/unit/CMakeLists.txt +++ b/test/unit/CMakeLists.txt @@ -2,7 +2,6 @@ add_executable(basic ${CMAKE_CURRENT_SOURCE_DIR}/basic.cc) add_dependencies(basic hermes_engine) target_link_libraries(basic PRIVATE hermes_engine MPI::MPI_CXX) -add_test(NAME basic_plugin_test - COMMAND ${CMAKE_BINARY_DIR}/bin/basic ${CMAKE_BINARY_DIR}/bin) -set_tests_properties(basic_plugin_test PROPERTIES ENVIRONMENT - "LD_LIBRARY_PATH=${COEUS_BINARY_DIR}/bin:$ENV{LD_LIBRARY_PATH}") \ No newline at end of file +pytest(native test_basic) +set_tests_properties(${test_name} PROPERTIES ENVIRONMENT + "LD_LIBRARY_PATH=${COEUS_BINARY_DIR}/bin:$ENV{LD_LIBRARY_PATH}") \ No newline at end of file diff --git a/test/unit/tests.py b/test/unit/tests.py new file mode 100644 index 00000000..78ad5ea1 --- /dev/null +++ b/test/unit/tests.py @@ -0,0 +1,17 @@ +from py_coeus_ci.test_manager import TestManager +from jarvis_util import * + +class NativeTestManager(TestManager): + def spawn_all_nodes(self): + return self.spawn_info() + + def set_paths(self): + self.BASIC_CMD = f"{self.CMAKE_BINARY_DIR}/bin/basic" + + def test_basic(self): + spawn_info = self.spawn_info(nprocs=1, + hermes_conf='hermes_server') + self.start_daemon(spawn_info) + node = Exec(self.BASIC_CMD, spawn_info) + self.stop_daemon(spawn_info) + return node.exit_code