Skip to content

Commit

Permalink
Add a parse_membership_data command
Browse files Browse the repository at this point in the history
  • Loading branch information
tmcadam committed Dec 3, 2024
1 parent 827edb2 commit 0d1cb01
Show file tree
Hide file tree
Showing 7 changed files with 196 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -66,3 +66,7 @@ target/

# Temporary files
._*


# vscode
.vscode/
2 changes: 2 additions & 0 deletions conda/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,13 @@ requirements:
- parse_account_usage_data = ncigrafana.parse_account_usage_data:main_argv
- nci_account_json = ncigrafana.nci_account:main_argv
- parse_lquota_data = ncigrafana.parse_lquota:main_argv
- parse_membership_data = ncigrafana.parse_membership:main_argv
has_prefix_files:
- bin/parse_user_storage_data
- bin/parse_account_usage_data
- bin/nci_account_json
- bin/parse_lquota_data
- bin/parse_membership_data

test:
imports:
Expand Down
22 changes: 22 additions & 0 deletions ncigrafana/UsageDataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -643,3 +643,25 @@ def top_usage(self, year, quarter, storagepoint, measure='size', count=10, scale
quarter,
storagept=storagepoint,
datafield=measure).ix[-1].sort_values(ascending=False).head(count).divide(scale)

def addprojectmembership(self, project, system, date, members_list, members_count):
"""
Add an entry to record project membership
"""
# need to lookup ids of, or create entries for project_id and system_id
project_id = self.addproject(project)
system_id = self.addsystem(system)

# not strictly needed by foreign key constraint
for member in members_list:
self.adduser(member)

# would prefer array, but not supported in dataset library
members_csv = ",".join(members_list)

data = dict(project_id=project_id,
system_id=system_id,
date=date.date(),
members=members_csv,
members_count=members_count)
return self.db['ProjectMembership'].upsert(data, ['project_id', 'system_id', 'date', 'members', 'members_list'])
123 changes: 123 additions & 0 deletions ncigrafana/parse_membership.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#!/usr/bin/env python

"""
Copyright 2020 ARC Centre of Excellence for Climate Systems Science
author: Aidan Heerdegen <[email protected]>
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from __future__ import print_function

import argparse
import datetime
import sys
from .UsageDataset import *
from .DBcommon import extract_num_unit, parse_size, mkdir, archive
from .DBcommon import datetoyearquarter, date_range_from_quarter

dbfileprefix = '.'
nfields = 7

def parse_membership(filename, verbose, db=None, dburl=None):

project = None
date = None
system = 'gadi'

with open(filename) as f:

print("Parsing {file}".format(file=filename))

# read 3 lines in membership dump file
header = f.readline()
if verbose: print("> ", header)

date_line = f.readline()
if verbose: print("> ", header)

data_line = f.readline()

# parse date from file
date = datetime.datetime.strptime(date_line.strip(), '%a %b %d %H:%M:%S %Z %Y')

# parse project_id and members from file
data_line_parts = data_line.strip().split(":")
project = data_line_parts[0]
members_csv = data_line_parts[3]
members_list = members_csv.split(",")

# remove service users
members_list = [ m for m in members_list if not m.endswith("_thredds") ]
members_list = [ m for m in members_list if not m.endswith("_ci") ]

members_count = len(members_list)

# write the results to database
if verbose: print('Add project membership', project, system, date, members_count)
db.addprojectmembership(project, system, date, members_list, members_count)

"""
%%%%%%%%%%%%%%%%
Tue Dec 03 11:09:29 AEDT 2024
au88:*:8950:user502,user581,user561
"""

def main(args):

verbose = args.verbose

db = None
if args.dburl:
db = ProjectDataset(dburl=args.dburl)

for f in args.inputs:
try:
parse_membership(f, verbose, db=db)
except:
raise
else:
if not args.noarchive:
archive(f)

def parse_args(args):
"""
Parse arguments given as list (args)
"""
parser = argparse.ArgumentParser(description="Parse project membership report dumps")
parser.add_argument("-v","--verbose", help="Verbose output", action='store_true')
parser.add_argument("-db","--dburl", help="Database file url", default=None)
parser.add_argument("-n","--noarchive", help="Database file url", action='store_true')
parser.add_argument("inputs", help="dumpfiles", nargs='+')

return parser.parse_args()

def main_parse_args(args):
"""
Call main with list of arguments. Callable from tests
"""
# Must return so that check command return value is passed back to calling routine
# otherwise py.test will fail
return main(parse_args(args))

def main_argv():
"""
Call main and pass command line arguments. This is required for setup.py entry_points
"""
main_parse_args(sys.argv[1:])

if __name__ == "__main__":

main_argv()

1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ console_scripts =
parse_account_usage_data = ncigrafana.parse_account_usage_data:main_argv
nci_account_json = ncigrafana.nci_account:main_argv
parse_lquota_data = ncigrafana.parse_lquota:main_argv
parse_membership_data = ncigrafana.parse_membership:main_argv

[extras]
# Optional dependencies
Expand Down
3 changes: 3 additions & 0 deletions test/2024-12-03T11:09:28.au88.membership.dump
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
%%%%%%%%%%%%%%%%
Tue Dec 03 11:09:29 AEDT 2024
au88:*:8950:test502,test581,test561,blah_ci
41 changes: 41 additions & 0 deletions test/test_parse_membership.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/usr/bin/env python

from __future__ import print_function

import datetime
from numpy.testing import assert_array_equal, assert_array_almost_equal
from numpy import arange
import os
import pytest
import time

from ncigrafana.UsageDataset import *
from ncigrafana.DBcommon import datetoyearquarter
from ncigrafana.parse_membership import parse_membership

# Set acceptable time zone strings so we can parse the
# AEST timezone in the test file
os.environ['TZ'] = 'AEST-10AEDT-11,M10.5.0,M3.5.0'
time.tzset()
dbfileprefix = '.'
verbose = False

@pytest.fixture(scope='session')
def db():
project = 'xx00'
dbfile = "sqlite:///:memory:"
return ProjectDataset(project, dbfile)

def test_parse_membership(db):

parse_membership('test/2024-12-03T11:09:28.au88.membership.dump', verbose=verbose, db=db)

assert db.db["Users"].count() == 3
assert db.db["Users"].find_one(user='test581')
assert db.db["Projects"].find_one(project='au88')

project_id = db.db["Projects"].find_one(project='au88')['id']
assert db.db["ProjectMembership"].find_one(project_id=project_id, date=datetime.date(2024,12,3))

membership_record = db.db["ProjectMembership"].find_one(project_id=project_id, date=datetime.date(2024,12,3))
assert len(membership_record["members"].split(",")) == 3

0 comments on commit 0d1cb01

Please sign in to comment.