Skip to content

Commit

Permalink
Merge pull request #2 from sychan/main
Browse files Browse the repository at this point in the history
Add support for JSON output, backwards compatible
  • Loading branch information
tylern4 authored Nov 4, 2022
2 parents 5622678 + 66c814a commit 38d99d6
Show file tree
Hide file tree
Showing 5 changed files with 191 additions and 30 deletions.
12 changes: 8 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@ pip install pagurus
### Options

```
usage: pagurus [-h] [-t TAG] [-o OUTFILE] [-p PATH] [-d] [-r RATE] [-u USER] [-noh] [-mv]
usage: pagurus [-h] [-o OUTFILE] [-p PATH] [-d] [-r RATE] [-u USER] [-noh] [-mv] [-l ROLLING] [--json] [--envvar ENVVAR]
optional arguments:
options:
-h, --help show this help message and exit
-o OUTFILE, --outfile OUTFILE
File name for csv.
Expand All @@ -23,14 +23,18 @@ optional arguments:
-r RATE, --rate RATE Polling rate for process.
-u USER, --user USER Username to get stats for.
-noh, --no-header Turn off writting the header.
-mv, --move Moves file from 'running' to 'complete'
-mv, --move Moves file from 'running' to 'done' directories
-l ROLLING, --rolling ROLLING
Time to roll file over to number to file name in ~minutes.
--json Output JSON strings instead of CSV lines
--envvar ENVVAR add environment var to output (can be specified multiple times)
```


### Running pagurus as a wrapper for a single user
```bash
# Start running wrapper in the background for username
pagurus -u username -mv -p /path/to/output/dir -o test.csv
pagurus -u $USER -mv -p /path/to/output/dir -o test.csv
# Get the previous running PID of pagurus
export PID=$!
# Sleep for a few seconds to let everything start running
Expand Down
87 changes: 62 additions & 25 deletions bin/pagurus
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ import os
import logging
import sys
import signal
import json

from typing import Dict, List
from pathlib import Path

Expand All @@ -27,26 +29,47 @@ class FileWriter:
def __init__(self, outfile,
header: List[str] = [""],
write_header: bool = True,
rolling: bool = False) -> None:
rolling: bool = False,
jsonout: bool = False,
env: Dict = {}) -> None:
self.extensions = {
'gz': 'csv.gz',
'bz2': 'csv.bz2',
'csv': 'csv'
}

self.header: List[str] = header
self.number: int = 0
self.write_header: bool = write_header
self.rolling: bool = rolling
self.env: Dict = env

# Create an appropriate formatting function
if jsonout:
# Formatter function that outputs a dictionary in JSON
if header == [""]:
raise Exception("header cannot be blank for JSON output")
if write_header:
logging.debug("forcing write_header to false due to --json flag")
self.write_header = False

# Adds envs to the end of the dict for fmt_writer
def fmt(*args):
temp = dict(zip(self.header, args))
temp.update(env)
return "{}\n".format(json.dumps(temp))
self.fmt_func = lambda *args: fmt(*args)

# Make formater based on number of metrics in header
fmt = ",".join(["{}" for _ in range(len(header))])
self.fmt_writer = fmt + "\n"

else:
# Make formatter function based on number of metrics in header
fmt = ",".join(["{}" for _ in range(len(self.header))])
fmt_writer = fmt + "\n"
self.fmt_func = lambda *args: fmt_writer.format(*args)
self.outfile: Path = outfile
self.next_file()

def write(self, *args):
self.output_file.write(self.fmt_writer.format(*args))
self.output_file.write(self.fmt_func(*args))

def flush(self):
self.output_file.flush()
Expand All @@ -70,7 +93,7 @@ class FileWriter:
self.output_file = open(self.outfile, "w")

def _write_header(self):
self.output_file.write(self.fmt_writer.format(*self.header))
self.output_file.write(self.fmt_func(*self.header))
self.output_file.flush()

def _renamer(self):
Expand Down Expand Up @@ -231,10 +254,12 @@ def runner(
path: str = ".", filename: str = "stats.csv",
pole_rate: float = 0.1, username: str = "",
write_header: bool = True,
move: bool = False, rolling: int = 0):
move: bool = False, rolling: int = 0,
json: bool = False, env: Dict = {}):
"""
Runs while your executable is still running and logs info
about running process to a csv file.
about running process to the output file, defaulting to CSV format
unless the --json flag is set
Args:
outfile (str, optional): output filename. Defaults to "stats.csv".
Expand Down Expand Up @@ -270,8 +295,10 @@ def runner(
"num_fds", "read_count", "write_count", "read_chars",
"write_chars", "cmdline", "current_dir"]

stats_file = FileWriter(
outfile=outfile, header=header, write_header=write_header, rolling=True if rolling > 0 else False)
stats_file = FileWriter(outfile=outfile, header=header,
write_header=write_header,
rolling=True if rolling > 0 else False,
jsonout=json, env=env)
itteration = 0
# Keep pulling data from the process while it's running
while not killer.kill_now:
Expand All @@ -282,26 +309,29 @@ def runner(
pData = proc.as_dict()

# Add new line to the file with relevant data
stats_file.write(datetime.now().strftime("%m-%d-%Y %H:%M:%S.%f"),
proc_num,
pData['ppid'],
pData['name'],
pData['num_threads'],
*get_cputimes(pData),
*get_meminfo(pData),
pData['memory_percent'],
pData['num_fds'],
*get_iocounters(pData),
cmd_data(pData),
pData['cwd']
)
stats = [datetime.now().strftime("%m-%d-%Y %H:%M:%S.%f"),
proc_num,
pData['ppid'],
pData['name'],
pData['num_threads'],
*get_cputimes(pData),
*get_meminfo(pData),
pData['memory_percent'],
pData['num_fds'],
*get_iocounters(pData),
cmd_data(pData),
pData['cwd']]

stats_file.write(*stats)

except psutil.NoSuchProcess as e:
# Comes when a process is killed between getting the number and getting the data
pass
except AttributeError as e:
# logging.debug(f'Error ({type(e).__name__}): {e}')
pass
except TypeError as e:
# logging.debug(f'Error ({type(e).__name__}): {e}')
pass
except Exception as e:
logging.error(f'Error ({type(e).__name__}): {e}')
Expand Down Expand Up @@ -342,6 +372,9 @@ if __name__ == '__main__':
help="Moves file from 'running' to 'done' directories", default=False, action='store_true')
parser.add_argument("-l", "--rolling", type=int, help="Time to roll file over to number to file name in ~minutes.",
default=0)
parser.add_argument("--json", default=False, action="store_true", help="Output JSON strings instead of CSV lines")
parser.add_argument("--envvar", action="append", default=[],
help="add environment var to output (can be specified multiple times)")

args = parser.parse_args()

Expand All @@ -358,6 +391,10 @@ if __name__ == '__main__':
# so rolling*10 should be okay for minutes (on average)
rolling = args.rolling * 10

# Get's the environment variables once and places them into a dict
env = {ev: os.getenv(ev) for ev in args.envvar}

# Start the recorder
runner(path=args.path, filename=args.outfile, pole_rate=args.rate,
username=args.user, write_header=args.no_header, move=args.move, rolling=rolling)
username=args.user, write_header=args.no_header, move=args.move,
rolling=rolling, json=args.json, env=env)
117 changes: 117 additions & 0 deletions bin/test_FileWriter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import pytest
import json
import os
from importlib.util import spec_from_loader, module_from_spec
from importlib.machinery import SourceFileLoader

# Do some funny business to load the modules from a file without the .py extension
# see https://stackoverflow.com/a/43602645
from importlib.util import spec_from_loader, module_from_spec
from importlib.machinery import SourceFileLoader

spec = spec_from_loader("pagurus", SourceFileLoader("pagurus", "./pagurus"))
pagurus=module_from_spec(spec)
spec.loader.exec_module(pagurus)

def test_FileWriter_csv(tmp_path):
print("Output file path", tmp_path)
header = ['name1',"name2","name3"]
outfile = tmp_path/f"test.csv"
fw = pagurus.FileWriter(outfile=outfile, header=header,write_header=True)
assert( fw.header == header)
assert( fw.write_header == True)
assert( fw.outfile == outfile)
assert( outfile.is_file())
fw.write(0,1,2)
fw.close()
with open(outfile) as f:
contents = f.read()
print(contents)
lines = contents.splitlines()
assert( lines[0] == ",".join(header))
assert( lines[1] == "0,1,2")
fw.outfile.unlink()

def test_FileWriter_csv_envvar(tmp_path):
print("Output file path", tmp_path)
header = ['name1',"name2","name3"]
outfile = tmp_path/f"test.csv"

# Clear out environment variables we will be testing with
try:
os.environ.pop("testytesty")
os.environ.pop("testytoasty")
except:
pass
os.environ['testytesty'] = 'test'
os.environ['testytoasty'] = 'test2'
fw = pagurus.FileWriter(outfile=outfile, header=header,write_header=True,env=["testytesty","testytoasty"])
header2 = header+["testytesty","testytoasty"]

assert( fw.header == header2)
assert( fw.write_header == True)
assert( fw.outfile == outfile)
assert( outfile.is_file())
fw.write(0,1,2,"test","test2")
fw.close()
with open(outfile) as f:
contents = f.read()
print(contents)
lines = contents.splitlines()
assert( lines[0] == ",".join(header2))
assert( lines[1] == '0,1,2,test,test2')
fw.outfile.unlink()


def test_FileWriter_json(tmp_path):
print("Output file path", tmp_path)
header = ['name1',"name2","name3"]
outfile = tmp_path/f"test.json"
fw = pagurus.FileWriter(outfile=outfile, header=header,write_header=True,jsonout=True)
assert( fw.header == header)
assert( fw.write_header == False)
assert( fw.outfile == outfile)
assert( outfile.is_file())
fw.write(0,1,2)
fw.close()
with open(outfile) as f:
contents = f.read()
print(contents)
lines = contents.splitlines()
jsonout = json.dumps(dict(zip(header,[0,1,2])))
assert( lines[0] == jsonout)
fw.outfile.unlink()

def test_FileWriter_json_envvar(tmp_path):
print("Output file path", tmp_path)
header = ['name1',"name2","name3"]
outfile = tmp_path/f"test.json"

# Clear out environment variables we will be testing with
try:
os.environ.pop("testytesty")
os.environ.pop("testytoasty")
except:
pass
with pytest.raises(KeyError) as e:
fw = pagurus.FileWriter(outfile=outfile, header=header,write_header=True,jsonout=True,env=["testytesty","testytoasty"])
os.environ['testytesty'] = 'test'
os.environ['testytoasty'] = 'test2'
fw = pagurus.FileWriter(outfile=outfile, header=header,write_header=True,jsonout=True,env=["testytesty","testytoasty"])
header2 = header+["testytesty","testytoasty"]

assert( fw.header == header2)
assert( fw.write_header == False)
assert( fw.outfile == outfile)
assert( outfile.is_file())
fw.write(0,1,2,"test","test2")
fw.close()
with open(outfile) as f:
contents = f.read()
print(contents)
lines = contents.splitlines()
jsonout = json.dumps(dict(zip(header2,[0,1,2,"test","test2"])))
assert( lines[0] == jsonout)
fw.outfile.unlink()


2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[flake8]
max-line-length = 160
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@
url="https://github.com/tylern4/pagurus",
author="Nick Tyler",
author_email="[email protected]",
version='1.1',
version='1.2',
scripts=glob('bin/*'),
py_modules=[],
install_requires=[
'psutil==5.8.0',
],
Expand Down

0 comments on commit 38d99d6

Please sign in to comment.