From a6a4b460689c5e39777773e8ae32ba8f5e389671 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Thu, 11 Jan 2024 11:35:56 +0100 Subject: [PATCH 01/22] make lanesplit check on sampleIDs rather then samplenames, and lane aware --- src/dissectBCL/classes.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/dissectBCL/classes.py b/src/dissectBCL/classes.py index 280ff00..76f6f54 100644 --- a/src/dissectBCL/classes.py +++ b/src/dissectBCL/classes.py @@ -179,11 +179,18 @@ def decideSplit(self): laneSplitStatus = True # Do we need lane splitting or not ? # If there is at least one sample in more then 1 lane, we cannot split: - if sum(self.fullSS['Sample_Name'].value_counts() > 1) > 0: - logging.info( - "No lane splitting: >= 1 sample in multiple lanes." - ) - laneSplitStatus = False + samples = list(self.fullSS['Sample_ID'].unique()) + for _s in samples: + if len( + list(self.fullSS[ + self.fullSS['Sample_ID'] == _s + ]['Lane'].unique() + ) + ) > 1: + logging.info( + "No lane splitting: >= 1 sample in multiple lanes." + ) + laneSplitStatus = False # If one project is split over multiple lanes, we also don't split: projects = list(self.fullSS['Sample_Project'].unique()) for project in projects: From c392c741546d66b4283d64f3574ec6a5da103a42 Mon Sep 17 00:00:00 2001 From: adRn-s Date: Mon, 15 Jan 2024 12:27:38 +0100 Subject: [PATCH 02/22] get_contact_details API endpoint // deprecated userList text file --- ChangeLog | 1 + dissectBCL.ini | 2 +- docs/config.rst | 6 +-- src/tools/emailProjectFinished.py | 67 +++++++++---------------------- 4 files changed, 23 insertions(+), 53 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8f2a4f0..df494c4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,7 @@ CHANGES ======= +* emailProjectFinished.py now uses Parkour2's API endpoint (#161 , #175) * actual seq\_data dir in email * mycoplasma include in prep * include mycoplasma hyorhinis diff --git a/dissectBCL.ini b/dissectBCL.ini index 80ad3b5..06be865 100644 --- a/dissectBCL.ini +++ b/dissectBCL.ini @@ -18,7 +18,7 @@ pushURL=parkour.push.url/api user=parkourUser password=parkourPw cert=/path/to/cert.pem -userList=filename_with_parkour_users +URL=parkour.domain.tld [software] bclconvert=/path/to/bclconvert diff --git a/docs/config.rst b/docs/config.rst index 6e04f8c..efebc15 100644 --- a/docs/config.rst +++ b/docs/config.rst @@ -68,9 +68,7 @@ Note that this block contains sensitive information. #. user: the username for API requests #. pw: the password for API requests #. cert: the pem certificate for API requests -#. userList: a headerless tsv file containing firstname lastname emailaddress lines. - -Note that the userList is used implicitly for the email command to notify end users. +#. URL: the URL to Parkour2, `https://` is implicit! .. _software: @@ -128,7 +126,7 @@ example user=parkourUser password=parkourPw cert=/path/to/cert.pem - userList=filename_with_parkour_users + URL=parkour.domain.tld [software] bclconvert=/path/to/bclconvert diff --git a/src/tools/emailProjectFinished.py b/src/tools/emailProjectFinished.py index 5fd9670..4b0b8f2 100755 --- a/src/tools/emailProjectFinished.py +++ b/src/tools/emailProjectFinished.py @@ -3,57 +3,26 @@ import sys import smtplib import os +import requests from dissectBCL.misc import getConf from email.mime.text import MIMEText import glob - -def fetchFirstNameAndEmail(lastName, config): - # search in dictionary file defined in config for lastName - try: - fn = config['parkour']['userList'] - except KeyError: - print("Error: fetchFirstNameAndEmail\n\ - No dictionary defined. \ - Specify --toEmail and --toName explicitly!") - sys.exit(1) - - if not os.path.exists(fn): - print("{} does not exist!".format(fn)) - sys.exit(1) - - f = open(fn) - d = dict() - for line in f: - cols = line.rstrip().split("\t") - - # only accept format: firstName, lastName, email - if (len(cols) < 3): - continue - - # ignore all other lastNames - if cols[1] != lastName: - continue - - # check if lastName occurs more than once in list - if cols[1] in d: - print("Error: fetchFirstNameAndEmail\n\ - Name {} exists more than once. \ - Specify --toEmail and --toName explicitly!".format(cols[1])) - print('now: ', cols[1], cols[0], cols[2]) - print('previous: ', cols[1], d[cols[1]]) - sys.exit(1) - - # add to dictionary - d[cols[1]] = [cols[0], cols[2]] - f.close() - - if lastName not in d: - print("Error: fetchFirstNameAndEmail\n\ - No Information for lastName={}. {} needs update".format(lastName, fn)) - sys.exit(1) - - return d[lastName] +def getContactDetails(projectID, config): + """ + Retrieve user data from a given sequencing request + """ + res = requests.get( + config["parkour"]["URL"] + + "/api/requests/" + + projectID + + "/get_contact_details", + auth=(config["parkour"]["user"], config["parkour"]["password"]), + verify=config["parkour"]["cert"], + ) + if res.status_code != 200: + raise RuntimeError(f"API error: {res.json()}") + return res.json() def getProjectIDs(projects, config): @@ -167,7 +136,9 @@ def main(): # get lastName (user) from project name lastName = args.project[0].split("_")[2] if not args.toEmail or not args.toName: - firstName, email = fetchFirstNameAndEmail(lastName, config) + my_dict = getContactDetails(args.project[0].split("_")[1], config) + assert lastName == my_dict["last_name"] + firstName, email = my_dict["first_name"], my_dict["email"] else: firstName, email = args.toName, args.toEmail From 8e2e15d38d24ddaffce36fd43c583db7069f92cd Mon Sep 17 00:00:00 2001 From: adRn-s Date: Mon, 15 Jan 2024 12:36:20 +0100 Subject: [PATCH 03/22] flake8 E302 --- src/tools/emailProjectFinished.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/tools/emailProjectFinished.py b/src/tools/emailProjectFinished.py index 4b0b8f2..aec0ffe 100755 --- a/src/tools/emailProjectFinished.py +++ b/src/tools/emailProjectFinished.py @@ -8,6 +8,7 @@ from email.mime.text import MIMEText import glob + def getContactDetails(projectID, config): """ Retrieve user data from a given sequencing request From 35c480355cdf869489fc36cde286bf118708582e Mon Sep 17 00:00:00 2001 From: adRn-s Date: Fri, 19 Jan 2024 11:48:34 +0100 Subject: [PATCH 04/22] use 1 config param (URL) instead of 3 --- dissectBCL.ini | 2 -- docs/config.rst | 2 -- src/dissectBCL/fakeNews.py | 6 ++++-- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/dissectBCL.ini b/dissectBCL.ini index 06be865..748dcec 100644 --- a/dissectBCL.ini +++ b/dissectBCL.ini @@ -13,8 +13,6 @@ seqDir=seqfolderstr fex=False [parkour] -pullURL=parkour.pull.url/api -pushURL=parkour.push.url/api user=parkourUser password=parkourPw cert=/path/to/cert.pem diff --git a/docs/config.rst b/docs/config.rst index efebc15..47a47fb 100644 --- a/docs/config.rst +++ b/docs/config.rst @@ -63,8 +63,6 @@ parkour The *parkour block* contains all necessary information to communicate with `parkour `. Note that this block contains sensitive information. -#. pullURL: the URL to pull flowcell information from. Is parkoururl/api/analysis_list/analysis_list -#. pushURL: the URL to push flowcell statistics to. Is parkoururl/api/run_statistics/upload #. user: the username for API requests #. pw: the password for API requests #. cert: the pem certificate for API requests diff --git a/src/dissectBCL/fakeNews.py b/src/dissectBCL/fakeNews.py index 8613452..ef2244e 100644 --- a/src/dissectBCL/fakeNews.py +++ b/src/dissectBCL/fakeNews.py @@ -48,7 +48,8 @@ def pullParkour(flowcellID, config): ) d = {'flowcell_id': FID} res = requests.get( - config['parkour']['pullURL'], + config['parkour']['URL'] + + '/api/analysis_list/analysis_list/', auth=( config['parkour']['user'], config['parkour']['password'] @@ -185,7 +186,8 @@ def pushParkour(flowcellID, sampleSheet, config, flowcellBase): d['matrix'] = json.dumps(list(laneDict.values())) logging.info("Pushing FID with dic {} {}".format(FID, d)) pushParkStat = requests.post( - config.get("parkour", "pushURL"), + config.get("parkour", "URL") + + '/api/run_statistics/upload/', auth=( config.get("parkour", "user"), config.get("parkour", "password") From 010386be26bbe59b4ed52a813c54619bf2be528f Mon Sep 17 00:00:00 2001 From: adRn-s Date: Fri, 19 Jan 2024 11:50:16 +0100 Subject: [PATCH 05/22] URL should be complete --- docs/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/config.rst b/docs/config.rst index 47a47fb..5e5fe9a 100644 --- a/docs/config.rst +++ b/docs/config.rst @@ -66,7 +66,7 @@ Note that this block contains sensitive information. #. user: the username for API requests #. pw: the password for API requests #. cert: the pem certificate for API requests -#. URL: the URL to Parkour2, `https://` is implicit! +#. URL: the URL to Parkour2, e.g. `https://parkour.yourdomain.tld`. .. _software: From 88c7d11fa7148a19566ca4940bdf37ff895eb6fe Mon Sep 17 00:00:00 2001 From: adRn-s Date: Fri, 19 Jan 2024 14:28:53 +0100 Subject: [PATCH 06/22] flake8 fix --- src/dissectBCL/fakeNews.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/dissectBCL/fakeNews.py b/src/dissectBCL/fakeNews.py index ef2244e..7c3672f 100644 --- a/src/dissectBCL/fakeNews.py +++ b/src/dissectBCL/fakeNews.py @@ -48,8 +48,7 @@ def pullParkour(flowcellID, config): ) d = {'flowcell_id': FID} res = requests.get( - config['parkour']['URL'] + - '/api/analysis_list/analysis_list/', + config['parkour']['URL'] + '/api/analysis_list/analysis_list/', auth=( config['parkour']['user'], config['parkour']['password'] @@ -186,8 +185,7 @@ def pushParkour(flowcellID, sampleSheet, config, flowcellBase): d['matrix'] = json.dumps(list(laneDict.values())) logging.info("Pushing FID with dic {} {}".format(FID, d)) pushParkStat = requests.post( - config.get("parkour", "URL") + - '/api/run_statistics/upload/', + config.get("parkour", "URL") + '/api/run_statistics/upload/', auth=( config.get("parkour", "user"), config.get("parkour", "password") From fda62aae58fe9c0c1b1c4918c070d1eeab9b254a Mon Sep 17 00:00:00 2001 From: adRn-s Date: Fri, 26 Jan 2024 13:24:35 +0100 Subject: [PATCH 07/22] FutureWarning: Calling int on a single element Series is deprecated and will raise a TypeError in the future. --- src/dissectBCL/drHouse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dissectBCL/drHouse.py b/src/dissectBCL/drHouse.py index 533cb5e..8c335fb 100644 --- a/src/dissectBCL/drHouse.py +++ b/src/dissectBCL/drHouse.py @@ -142,7 +142,7 @@ def initClass( muxDF = pd.read_csv(muxPath) totalReads = int(muxDF['# Reads'].sum()) if len(muxDF[muxDF['SampleID'] == 'Undetermined']) == 1: - undReads = int(muxDF[muxDF['SampleID'] == 'Undetermined']['# Reads']) + undReads = int(muxDF[muxDF['SampleID'] == 'Undetermined']['# Reads'].iloc[0]) else: undDic = dict( muxDF[ From 413c207937c7678ffcacb32d418df1aa7ce66c0c Mon Sep 17 00:00:00 2001 From: adRn-s Date: Fri, 26 Jan 2024 14:10:22 +0100 Subject: [PATCH 08/22] works in my server :P --- src/wd40/release.py | 43 ++++++++++++++++++++++++++++++++++++++++++- src/wd40/wd40.py | 15 ++++++++++++++- 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/src/wd40/release.py b/src/wd40/release.py index ebf30e5..a4aa7b7 100644 --- a/src/wd40/release.py +++ b/src/wd40/release.py @@ -1,4 +1,6 @@ import os +import requests +from subprocess import check_output import sys import glob from pathlib import Path @@ -154,7 +156,7 @@ def release_rights(F, grp): return (successRate) -def rel(flowcellPath, piList, prefix, postfix): +def rel(flowcellPath, piList, prefix, postfix, parkourURL, parkourAuth, parkourCert, fexBool, fromAddress): projDic = fetchFolders( flowcellPath, piList, @@ -185,3 +187,42 @@ def rel(flowcellPath, piList, prefix, postfix): successes[2] ) ) + projectPath = projDic[proj][1][1].split('/')[-1] + PI = projectPath.split('_')[-1].lower().replace( + "cabezas-wallscheid", "cabezas" + ) + d = None + if PI in piList: + d = { + "data": projDic[proj][1][1], + "metadata": projDic[proj][1][1] + '/multiqc_report.html' + } + elif fexBool: + fexList = check_output( + [ + 'fexsend', + '-l', + fromAddress + ] + ).decode("utf-8").replace("\n", " ").split(' ') + tar_lane, tar_proj = projDic[proj][1][1].split('/')[-2:] + # e.g. ['230731_M01358_0029_000000000-KYBFN_lanes_1', 'Project_2852_Trancoso_Boehm'] + tarBall = tar_lane + '_' + tar_proj + '.tar' + if tarBall in fexList: + d = { + "data": tarBall, + "metadata": None + } + else: + print("fexLink: ", tarBall, " not found!") + if d: + parkourURL = "https://parkour-test.ie-freiburg.mpg.de" + #TODO testing purposes only, remove this line. ^^ + print("Adding filepaths to Parkour2:", + requests.post( + parkourURL + '/api/requests/' + proj.split('_')[1] + '/put_filepaths/', + auth = parkourAuth, + data = d, + verify = parkourCert + ) + ) # print the returned answer from the API diff --git a/src/wd40/wd40.py b/src/wd40/wd40.py index 017cfaa..6789cfc 100644 --- a/src/wd40/wd40.py +++ b/src/wd40/wd40.py @@ -73,6 +73,14 @@ def cli(ctx, configpath, debug): ctx.obj['postfixDir'] = cnf['Internals']['seqDir'] ctx.obj['fastqDir'] = cnf['Dirs']['outputDir'] ctx.obj['solDir'] = cnf['Dirs']['baseDir'] + ctx.obj['parkourURL'] = cnf['parkour']['URL'] + ctx.obj['parkourAuth'] = ( + cnf['parkour']['user'], + cnf['parkour']['password'] + ) + ctx.obj['parkourCert'] = cnf['parkour']['cert'] + ctx.obj['fexBool'] = cnf['Internals'].getboolean('fex') + ctx.obj['fromAddress'] = cnf['communication']['fromAddress'] @cli.command() @@ -88,7 +96,12 @@ def rel(ctx, flowcell): flowcell, ctx.obj['piList'], ctx.obj['prefixDir'], - ctx.obj['postfixDir'] + ctx.obj['postfixDir'], + ctx.obj['parkourURL'], + ctx.obj['parkourAuth'], + ctx.obj['parkourCert'], + ctx.obj['fexBool'], + ctx.obj['fromAddress'] ) From ac566213649381cdec5ed186ded8169a85c03a26 Mon Sep 17 00:00:00 2001 From: adRn-s Date: Fri, 26 Jan 2024 14:19:16 +0100 Subject: [PATCH 09/22] fixes --- src/dissectBCL/drHouse.py | 6 +++++- src/wd40/release.py | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/dissectBCL/drHouse.py b/src/dissectBCL/drHouse.py index 8c335fb..4e2df8f 100644 --- a/src/dissectBCL/drHouse.py +++ b/src/dissectBCL/drHouse.py @@ -142,7 +142,11 @@ def initClass( muxDF = pd.read_csv(muxPath) totalReads = int(muxDF['# Reads'].sum()) if len(muxDF[muxDF['SampleID'] == 'Undetermined']) == 1: - undReads = int(muxDF[muxDF['SampleID'] == 'Undetermined']['# Reads'].iloc[0]) + undReads = int( + muxDF[ + muxDF['SampleID'] == 'Undetermined' + ]['# Reads'].iloc[0] + ) else: undDic = dict( muxDF[ diff --git a/src/wd40/release.py b/src/wd40/release.py index a4aa7b7..d430f50 100644 --- a/src/wd40/release.py +++ b/src/wd40/release.py @@ -216,8 +216,6 @@ def rel(flowcellPath, piList, prefix, postfix, parkourURL, parkourAuth, parkourC else: print("fexLink: ", tarBall, " not found!") if d: - parkourURL = "https://parkour-test.ie-freiburg.mpg.de" - #TODO testing purposes only, remove this line. ^^ print("Adding filepaths to Parkour2:", requests.post( parkourURL + '/api/requests/' + proj.split('_')[1] + '/put_filepaths/', @@ -226,3 +224,5 @@ def rel(flowcellPath, piList, prefix, postfix, parkourURL, parkourAuth, parkourC verify = parkourCert ) ) # print the returned answer from the API + else: + print("Warning: Unrecognized PI or fexBool was False") From 7189dc7acb0973f6e19bef6ab53c6fe0214def10 Mon Sep 17 00:00:00 2001 From: adRn-s Date: Fri, 26 Jan 2024 14:30:19 +0100 Subject: [PATCH 10/22] autoreformatted for flake8 rule --- src/wd40/release.py | 156 ++++++++++++++++++-------------------------- 1 file changed, 63 insertions(+), 93 deletions(-) diff --git a/src/wd40/release.py b/src/wd40/release.py index d430f50..ca41c26 100644 --- a/src/wd40/release.py +++ b/src/wd40/release.py @@ -8,22 +8,12 @@ def fetchLatestSeqDir(pref, PI, postfix): - globStr = os.path.join( - pref, - PI, - postfix + '*' - ) + globStr = os.path.join(pref, PI, postfix + "*") if len(glob.glob(globStr)) == 1: return glob.glob(globStr)[0] else: maxFolder = 0 - for seqDir in glob.glob( - os.path.join( - pref, - PI, - postfix + '*' - ) - ): + for seqDir in glob.glob(os.path.join(pref, PI, postfix + "*")): try: seqInt = int(seqDir[-1]) except ValueError: @@ -31,17 +21,13 @@ def fetchLatestSeqDir(pref, PI, postfix): continue if seqInt > maxFolder: maxFolder = seqInt - return (os.path.join( - pref, - PI, - postfix + str(maxFolder) - )) + return os.path.join(pref, PI, postfix + str(maxFolder)) def fetchFolders(flowcellPath, piList, prefix, postfix): institute_PIs = piList flowcellPath = os.path.abspath(flowcellPath) - FID = flowcellPath.split('/')[-1] + FID = flowcellPath.split("/")[-1] projDic = {} try: int(FID[:6]) @@ -50,33 +36,26 @@ def fetchFolders(flowcellPath, piList, prefix, postfix): sys.exit( "First 6 digits of flowcellpath don't convert to an int. Exiting." ) - for projF in glob.glob( - os.path.join( - flowcellPath, - 'Project_*' - ) - ): - proj = projF.split('/')[-1] + for projF in glob.glob(os.path.join(flowcellPath, "Project_*")): + proj = projF.split("/")[-1] PI = proj.split("_")[-1].lower() - if PI == 'cabezas-wallscheid': - PI = 'cabezas' + if PI == "cabezas-wallscheid": + PI = "cabezas" if PI in institute_PIs: seqFolder = fetchLatestSeqDir(prefix, PI, postfix) - if os.path.exists( - os.path.join(seqFolder, FID) - ): + if os.path.exists(os.path.join(seqFolder, FID)): projDic[proj] = [ - PI + 'grp', + PI + "grp", [ os.path.join(seqFolder, FID), os.path.join(seqFolder, FID, proj), - os.path.join(seqFolder, FID, 'FASTQC_' + proj), + os.path.join(seqFolder, FID, "FASTQC_" + proj), os.path.join( seqFolder, FID, - 'Analysis_' + proj.replace('Project_', '') - ) - ] + "Analysis_" + proj.replace("Project_", ""), + ), + ], ] else: print( @@ -109,12 +88,8 @@ def release_folder(grp, lis): succes_fqc = release_rights(fastqcF, grp) if os.path.exists(analysisF): succes_analysis = release_rights(analysisF, grp) - return ( - [succes_project, succes_fqc, succes_analysis] - ) - return ( - [succes_project, succes_fqc] - ) + return [succes_project, succes_fqc, succes_analysis] + return [succes_project, succes_fqc] def release_rights(F, grp): @@ -124,10 +99,7 @@ def release_rights(F, grp): for r, dirs, files in os.walk(F): for d in dirs: try: - os.chmod( - os.path.join(r, d), - 0o750 - ) + os.chmod(os.path.join(r, d), 0o750) changed += 1 except PermissionError: print("Permission error for {}".format(d)) @@ -138,10 +110,7 @@ def release_rights(F, grp): if grp != Path(fil).group(): grouperror = True try: - os.chmod( - fil, - 0o750 - ) + os.chmod(fil, 0o750) changed += 1 except PermissionError: print("Permission error for {}".format(f)) @@ -153,76 +122,77 @@ def release_rights(F, grp): F ) ) - return (successRate) + return successRate -def rel(flowcellPath, piList, prefix, postfix, parkourURL, parkourAuth, parkourCert, fexBool, fromAddress): - projDic = fetchFolders( - flowcellPath, - piList, - prefix, - postfix - ) +def rel( + flowcellPath, + piList, + prefix, + postfix, + parkourURL, + parkourAuth, + parkourCert, + fexBool, + fromAddress, +): + projDic = fetchFolders(flowcellPath, piList, prefix, postfix) print("Print number of changed/(changed+unchanged)!") for proj in projDic: - ''' + """ every projDic[proj] is a nested list of: [grp, [flowcell, project, fastqc]] - ''' + """ successes = release_folder(projDic[proj][0], projDic[proj][1]) if len(successes) == 2: print( "[green]Project[/green] {},{} proj,{} fqc".format( - proj, - successes[0], - successes[1] + proj, successes[0], successes[1] ) ) else: print( "[green]Project[/green] {},{} proj,{} fqc,{} analysis".format( - proj, - successes[0], - successes[1], - successes[2] + proj, successes[0], successes[1], successes[2] ) ) - projectPath = projDic[proj][1][1].split('/')[-1] - PI = projectPath.split('_')[-1].lower().replace( - "cabezas-wallscheid", "cabezas" + projectPath = projDic[proj][1][1].split("/")[-1] + PI = ( + projectPath.split("_")[-1] + .lower() + .replace("cabezas-wallscheid", "cabezas") ) d = None if PI in piList: d = { - "data": projDic[proj][1][1], - "metadata": projDic[proj][1][1] + '/multiqc_report.html' - } + "data": projDic[proj][1][1], + "metadata": projDic[proj][1][1] + "/multiqc_report.html", + } elif fexBool: - fexList = check_output( - [ - 'fexsend', - '-l', - fromAddress - ] - ).decode("utf-8").replace("\n", " ").split(' ') - tar_lane, tar_proj = projDic[proj][1][1].split('/')[-2:] - # e.g. ['230731_M01358_0029_000000000-KYBFN_lanes_1', 'Project_2852_Trancoso_Boehm'] - tarBall = tar_lane + '_' + tar_proj + '.tar' + fexList = ( + check_output(["fexsend", "-l", fromAddress]) + .decode("utf-8") + .replace("\n", " ") + .split(" ") + ) + tar_lane, tar_proj = projDic[proj][1][1].split("/")[-2:] + tarBall = tar_lane + "_" + tar_proj + ".tar" if tarBall in fexList: - d = { - "data": tarBall, - "metadata": None - } + d = {"data": tarBall, "metadata": None} else: - print("fexLink: ", tarBall, " not found!") + print("fexLink: ", tarBall, " not found!") if d: - print("Adding filepaths to Parkour2:", + print( + "Adding filepaths to Parkour2:", requests.post( - parkourURL + '/api/requests/' + proj.split('_')[1] + '/put_filepaths/', - auth = parkourAuth, - data = d, - verify = parkourCert - ) + parkourURL + + "/api/requests/" + + proj.split("_")[1] + + "/put_filepaths/", + auth=parkourAuth, + data=d, + verify=parkourCert, + ), ) # print the returned answer from the API else: print("Warning: Unrecognized PI or fexBool was False") From 0caa9765141b764e8515ee4ef6065a07062d1c58 Mon Sep 17 00:00:00 2001 From: adRn-s Date: Fri, 26 Jan 2024 14:33:30 +0100 Subject: [PATCH 11/22] or it wasn't found on fexList // let's just omit info, code is the doc (?) --- src/wd40/release.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/wd40/release.py b/src/wd40/release.py index ca41c26..9873a38 100644 --- a/src/wd40/release.py +++ b/src/wd40/release.py @@ -194,5 +194,3 @@ def rel( verify=parkourCert, ), ) # print the returned answer from the API - else: - print("Warning: Unrecognized PI or fexBool was False") From 7e49dd406ff78040b923af0cd4cd10f95eb45f67 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Fri, 9 Feb 2024 15:26:21 +0100 Subject: [PATCH 12/22] split up zebrafish contamination into mito - rrna - zebrafish --- contaminome.yml | 12 +++++++++++- src/tools/prep_contaminome.py | 10 +++++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/contaminome.yml b/contaminome.yml index 26c90ed..6a11283 100644 --- a/contaminome.yml +++ b/contaminome.yml @@ -162,6 +162,11 @@ rrna: vulgarname: aedesaegyptirrna accession: NC_035159.1 taxid: 71591111 + Zebrafish rRNA: + URL: https://raw.githubusercontent.com/WardDeb/customcontamination/main/fna/zebrarRNA.fna.gz + vulgarname: zebrafishrrna + accession: NR_145818.1 + taxid: 79551111 mito: Homo sapiens mitochondrion: URL: https://raw.githubusercontent.com/WardDeb/customcontamination/main/fna/humanmito.fna.gz @@ -182,4 +187,9 @@ mito: URL: https://raw.githubusercontent.com/WardDeb/customcontamination/main/fna/aedesaegyptimito.fna.gz vulgarname: aedesaegyptimito accession: NC_035159.1 - taxid: 71592222 \ No newline at end of file + taxid: 71592222 + Zebrafish mitochondrion: + URL: https://raw.githubusercontent.com/WardDeb/customcontamination/main/fna/zebramito.fna.gz + vulgarname: zebrafishmito + accession: NC_002333.2 + taxid: 79552222 diff --git a/src/tools/prep_contaminome.py b/src/tools/prep_contaminome.py index 914d86b..1accbc5 100644 --- a/src/tools/prep_contaminome.py +++ b/src/tools/prep_contaminome.py @@ -12,14 +12,16 @@ 'human': ['NC_012920.1'], # human mito 'mouse': ['NC_005089.1'], # mouse mito 'fly': ['NC_024511.2'], # fly mito - 'aedes-aegypti': ['NC_035159.1'] # aedes mito + 'aedes-aegypti': ['NC_035159.1'], # aedes mito + 'zebrafish': ['NC_002333.2'] # zebrafish mito } rrna_mask = [ ('human', 'humanrrna'), ('mouse', 'mouserrna'), ('fly', 'flyrrna'), - ('aedes-aegypti', 'aedesaegyptirrna') + ('aedes-aegypti', 'aedesaegyptirrna'), + ('zebrafish', 'zebrafishrrna') ] taxmap = { @@ -68,11 +70,13 @@ 'humanrrna': [96061111, 9, 'species'], 'mouserrna': [100901111, 10, 'species'], 'aedesaegyptirrna': [71591111, 13, 'species'], + 'zebrafishrrna': [79551111, 13, 'species'], 'flyrrna': [72271111, 11, 'species'], 'humanmito': [96062222, 9, 'species'], 'mousemito': [100902222, 10, 'species'], 'flymito': [72272222, 11, 'species'], - 'aedesaegyptimito': [71592222, 13, 'species'] + 'aedesaegyptimito': [71592222, 13, 'species'], + 'zebrafishmito': [79552222, 13, 'species'] } From 02afa7e035c263138fa7b467f8eea5841de8a8ad Mon Sep 17 00:00:00 2001 From: WardDeb Date: Fri, 9 Feb 2024 15:26:50 +0100 Subject: [PATCH 13/22] Changelog --- ChangeLog | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index df494c4..9377de6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,11 +1,29 @@ CHANGES ======= -* emailProjectFinished.py now uses Parkour2's API endpoint (#161 , #175) +* split up zebrafish contamination into mito - rrna - zebrafish +* or it wasn't found on fexList // let's just omit info, code is the doc (?) +* autoreformatted for flake8 rule +* fixes +* works in my server :P +* FutureWarning: Calling int on a single element Series is deprecated and will raise a TypeError in the future +* flake8 fix +* URL should be complete +* use 1 config param (URL) instead of 3 +* flake8 E302 +* get\_contact\_details API endpoint // deprecated userList text file +* Lanesplit samples (#173) +* make lanesplit check on sampleIDs rather then samplenames, and lane aware +* Mycoplasma implement, email update (#172) +* Contam emails (#171) +* ChangeLog * actual seq\_data dir in email * mycoplasma include in prep * include mycoplasma hyorhinis +* docs updates (#170) +* auto version for docs, fix readthedocs yaml (#169) * auto version for docs, fix readthedocs yaml +* Docs (#168) * update changelog * include authors * make sure doc pytest includes reqs from the doc folder From dcf8a867c9f97114525c778359dccdea73da01c3 Mon Sep 17 00:00:00 2001 From: adRn-s Date: Tue, 13 Feb 2024 08:42:11 +0100 Subject: [PATCH 14/22] name from folder is processed, we'd need to do the same. better avoid this. --- src/tools/emailProjectFinished.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/tools/emailProjectFinished.py b/src/tools/emailProjectFinished.py index aec0ffe..6369457 100755 --- a/src/tools/emailProjectFinished.py +++ b/src/tools/emailProjectFinished.py @@ -138,7 +138,6 @@ def main(): lastName = args.project[0].split("_")[2] if not args.toEmail or not args.toName: my_dict = getContactDetails(args.project[0].split("_")[1], config) - assert lastName == my_dict["last_name"] firstName, email = my_dict["first_name"], my_dict["email"] else: firstName, email = args.toName, args.toEmail From d96c6df307a503a0d3e78b79318a807960f8a7fb Mon Sep 17 00:00:00 2001 From: adRn-s Date: Wed, 14 Feb 2024 15:52:20 +0100 Subject: [PATCH 15/22] better safe than sorry - subject for emails is configurable so that testing emails are discerned --- src/dissectBCL/fakeNews.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/dissectBCL/fakeNews.py b/src/dissectBCL/fakeNews.py index 7c3672f..f060a9a 100644 --- a/src/dissectBCL/fakeNews.py +++ b/src/dissectBCL/fakeNews.py @@ -360,7 +360,8 @@ def multiQC_yaml(config, flowcell, ssDic, project, laneFolder): def mailHome(subject, _html, config, toCore=False): mailer = MIMEMultipart('alternative') - mailer['Subject'] = '[dissectBCL] [{}] '.format( + mailer['Subject'] = '[{}] [{}] '.format( + config['communication']['subject'], version('dissectBCL') ) + subject mailer['From'] = config['communication']['fromAddress'] From 33c433ae8bb7e42fe73bc0ed714ce2732d178fce Mon Sep 17 00:00:00 2001 From: adRn-s Date: Thu, 15 Feb 2024 10:21:47 +0100 Subject: [PATCH 16/22] works with latest ruamel, dump fn now is a obj.method --- src/dissectBCL/fakeNews.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/dissectBCL/fakeNews.py b/src/dissectBCL/fakeNews.py index f060a9a..62db139 100644 --- a/src/dissectBCL/fakeNews.py +++ b/src/dissectBCL/fakeNews.py @@ -581,21 +581,30 @@ def organiseLogs(flowcell, sampleSheet): mvFile ) shutil.move(fileIn, fileOut) + # Write out ssdf. outssdf = os.path.join(_logDir, 'sampleSheetdf.tsv') sampleSheet.ssDic[outLane]['sampleSheet'].to_csv(outssdf, sep='\t') - # Write out the yaml files. - yaml = ruamel.yaml.YAML() - yaml.indent(mapping=2, sequence=4, offset=2) # write out outLaneInfo.yaml + dic0 = sampleSheet.ssDic[outLane] + del dic0['sampleSheet'] + yaml0 = ruamel.yaml.YAML() + yaml0.indent(mapping=2, sequence=4, offset=2) outLaneInfo = os.path.join(_logDir, 'outLaneInfo.yaml') - dic = sampleSheet.ssDic[outLane] - del dic['sampleSheet'] with open(outLaneInfo, 'w') as f: - ruamel.yaml.dump(dic, f) + yaml0.dump(dic0, f) + + # write out config.ini + dic1 = flowcell.asdict() + flowcellConfig = os.path.join(_logDir, 'config.ini') + with open(flowcellConfig, 'w') as f: + dic1['config'].write(f) + # write out flowcellInfo.yaml + del dic1['config'] + yaml1 = ruamel.yaml.YAML() + yaml1.indent(mapping=2, sequence=4, offset=2) flowcellInfo = os.path.join(_logDir, 'flowcellInfo.yaml') - dic = flowcell.asdict() with open(flowcellInfo, 'w') as f: - ruamel.yaml.dump(dic, f) + yaml1.dump(dic1, f) From f5e8cd090a615de8b3360a37bbcd91cc24c7b2f8 Mon Sep 17 00:00:00 2001 From: adRn-s Date: Thu, 15 Feb 2024 10:57:43 +0100 Subject: [PATCH 17/22] fix flake8 E721 misc.py 159:8 --- src/dissectBCL/misc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dissectBCL/misc.py b/src/dissectBCL/misc.py index 33ea039..6881a91 100644 --- a/src/dissectBCL/misc.py +++ b/src/dissectBCL/misc.py @@ -156,7 +156,7 @@ def hamming(s1, s2): # We have some basket cases (multimodal) # Where barcode is nan (type as float) # Ignore these for now. - if type(s1) == float or type(s2) == float: + if isinstance(s1, float) or isinstance(s2, float): return 0 if s1 is None or s2 is None: return 0 From d1b0f9027114f82c89593c2a2edb616cc3d5b834 Mon Sep 17 00:00:00 2001 From: adRn-s Date: Thu, 15 Feb 2024 11:00:36 +0100 Subject: [PATCH 18/22] fix flake8 F841 emailProjectFinished.py 138:5 --- src/tools/emailProjectFinished.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/emailProjectFinished.py b/src/tools/emailProjectFinished.py index 6369457..8d1e923 100755 --- a/src/tools/emailProjectFinished.py +++ b/src/tools/emailProjectFinished.py @@ -135,7 +135,7 @@ def main(): sys.exit("Project folder {} not found.".format(p)) # get lastName (user) from project name - lastName = args.project[0].split("_")[2] + ## lastName = args.project[0].split("_")[2] if not args.toEmail or not args.toName: my_dict = getContactDetails(args.project[0].split("_")[1], config) firstName, email = my_dict["first_name"], my_dict["email"] From 6452edb7d396c26c58f7da90fec12c05090329dd Mon Sep 17 00:00:00 2001 From: adRn-s Date: Thu, 15 Feb 2024 11:02:40 +0100 Subject: [PATCH 19/22] new param, communication.subject --- dissectBCL.ini | 1 + 1 file changed, 1 insertion(+) diff --git a/dissectBCL.ini b/dissectBCL.ini index 748dcec..1eaf806 100644 --- a/dissectBCL.ini +++ b/dissectBCL.ini @@ -28,6 +28,7 @@ mpiImg=/path/to/multiqc_headerimg.jpg krakenExpl=" Kraken is used to classify the reads and to detect contamination.
For this we use a *custom* database, with a simplified taxonomical hierarchy (that no longer resembles any true taxonomical classification.
In brief, by default we screen for:
  • eukaryotes (human, mouse, fly, mosquito, lamprey, medaka, c-elegans, yeast, zebrafish and the moss-piglet)
  • prokaryotes (Ecoli, pseudomonas, mycoplasma and haemophilus influenza)
  • viruses (sars-cov2, influenza A,B & C, norwalk virus, rhinoviruses, drosophila C virus, phiX and lambda phage )
  • custom databases (ERCC spikes, univec core DB)
  • Note that for human, mouse, fly and mosquito we scan for mitochondrial and ribosomal contamination separately).
    Only the top (most abundant) five hits and unclassified hits are shown, all other hits are grouped under an 'other' tag.
    " [communication] +subject=dissectBCL deepSeq=email@seqfacility.de bioinfoCore=email@bioinfocore.de fromAddress=sender@dissectbcl.de From 4da7a4a4c31c5020a5750dc6036e018e26cfdce4 Mon Sep 17 00:00:00 2001 From: adRn-s Date: Thu, 15 Feb 2024 11:05:54 +0100 Subject: [PATCH 20/22] fix flake8 E266 emailProjectFinished.py --- src/tools/emailProjectFinished.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/tools/emailProjectFinished.py b/src/tools/emailProjectFinished.py index 8d1e923..21de996 100755 --- a/src/tools/emailProjectFinished.py +++ b/src/tools/emailProjectFinished.py @@ -134,8 +134,7 @@ def main(): if not os.path.exists(p): sys.exit("Project folder {} not found.".format(p)) - # get lastName (user) from project name - ## lastName = args.project[0].split("_")[2] + # get user from project name, lastName = args.project[0].split("_")[2] if not args.toEmail or not args.toName: my_dict = getContactDetails(args.project[0].split("_")[1], config) firstName, email = my_dict["first_name"], my_dict["email"] From 85bd918e3dbc407cf28122cf78ed9e9cf2ecc20d Mon Sep 17 00:00:00 2001 From: adRn-s Date: Mon, 19 Feb 2024 14:17:44 +0100 Subject: [PATCH 21/22] compound surnames, support how our IT handles them --- src/tools/emailProjectFinished.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/emailProjectFinished.py b/src/tools/emailProjectFinished.py index 21de996..67c4847 100755 --- a/src/tools/emailProjectFinished.py +++ b/src/tools/emailProjectFinished.py @@ -32,7 +32,7 @@ def getProjectIDs(projects, config): # Sanity check assert (p.startswith("Project_")) IDs.append(p.split("_")[1]) - PI = p.split("_")[-1].lower() + PI = p.split("_")[-1].split("-")[0].lower() # compound surnames use minus, we use 1st only. # Get the actual sequencing_data dir # Assume if multiple projects are given, they all in the same flowcell. flowcell = getFlowCell() From bff8e64513bf571ce85de0cec553eeddc9d78af3 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Thu, 7 Mar 2024 11:44:50 +0100 Subject: [PATCH 22/22] flake fix emailproject --- src/tools/emailProjectFinished.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/tools/emailProjectFinished.py b/src/tools/emailProjectFinished.py index 67c4847..0d9fd68 100755 --- a/src/tools/emailProjectFinished.py +++ b/src/tools/emailProjectFinished.py @@ -32,7 +32,8 @@ def getProjectIDs(projects, config): # Sanity check assert (p.startswith("Project_")) IDs.append(p.split("_")[1]) - PI = p.split("_")[-1].split("-")[0].lower() # compound surnames use minus, we use 1st only. + # compound surnames use minus, we use 1st only. + PI = p.split("_")[-1].split("-")[0].lower() # Get the actual sequencing_data dir # Assume if multiple projects are given, they all in the same flowcell. flowcell = getFlowCell()