From 51e4ff9c8086563dfc5defcfac2eda02f82cc37c Mon Sep 17 00:00:00 2001 From: Katherine Thiessen Date: Fri, 21 Apr 2023 12:17:35 -0500 Subject: [PATCH 1/3] adding start and end dates to python script --- .../sequence-files/purge_sequence_files.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/main/resources/scripts/sequence-files/purge_sequence_files.py b/src/main/resources/scripts/sequence-files/purge_sequence_files.py index 8dfd7b1117..d2083ac6e3 100644 --- a/src/main/resources/scripts/sequence-files/purge_sequence_files.py +++ b/src/main/resources/scripts/sequence-files/purge_sequence_files.py @@ -1,5 +1,6 @@ #!/usr/bin/python import argparse +import datetime import mysql.connector import os @@ -17,7 +18,7 @@ def remove(path, purge): else: print(path) -def list_sequence_files(host, user, password, database): +def list_sequence_files(startDate, endDate, host, user, password, database): db = mysql.connector.connect( host=host, user=user, @@ -26,7 +27,14 @@ def list_sequence_files(host, user, password, database): ) cursor = db.cursor() # TODO: Should we double check this file doesn't exist in the actual table in case it was manually restored? - cursor.execute("SELECT DISTINCT file_path FROM sequence_file_AUD WHERE revtype=2") + if(startDate and endDate): + cursor.execute("SELECT DISTINCT file_path FROM sequence_file_AUD WHERE revtype=2 AND modified_date BETWEEN %s AND %s", (startDate, endDate)) + elif(startDate): + cursor.execute("SELECT DISTINCT file_path FROM sequence_file_AUD WHERE revtype=2 AND modified_date >= %s", (startDate,)) + elif(endDate): + cursor.execute("SELECT DISTINCT file_path FROM sequence_file_AUD WHERE revtype=2 AND modified_date <= %s", (endDate,)) + else: + cursor.execute("SELECT DISTINCT file_path FROM sequence_file_AUD WHERE revtype=2") result = cursor.fetchall() cursor.close() db.close() @@ -35,6 +43,8 @@ def list_sequence_files(host, user, password, database): def main(): parser = argparse.ArgumentParser(description="This program lists the sequence files and folders that have been previously deleted in IRIDA.") parser.add_argument('--purge', help="Deletes the sequence files and folders from the filesystem.", action="store_true") + parser.add_argument('--startDate', type=datetime.date.fromisoformat, help="The start date in format YYYY-MM-DD (inclusive).", required=False) + parser.add_argument('--endDate', type=datetime.date.fromisoformat, help="The end date in format YYYY-MM-DD (inclusive).", required=False) parser.add_argument('--baseDirectory', default='/tmp/irida/sequence-files', help="The sequence file base directory.", required=False) parser.add_argument('--host', default='localhost', help="The database host name.", required=False) parser.add_argument('--database', default='irida_test', help="The database name.", required=False) @@ -42,7 +52,7 @@ def main(): parser.add_argument('--password', default='test', help="The database password.", required=False) args = parser.parse_args() - rows = list_sequence_files(args.host, args.user, args.password, args.database) + rows = list_sequence_files(args.startDate, args.endDate, args.host, args.user, args.password, args.database) if rows: for row in rows: sequence_file_directory = os.path.dirname(os.path.dirname(os.path.join(args.baseDirectory, row[0]))) From afef746421001e503113cbdf757ecfc8f39a9c55 Mon Sep 17 00:00:00 2001 From: Katherine Thiessen Date: Fri, 21 Apr 2023 12:26:11 -0500 Subject: [PATCH 2/3] updating changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2bf5f2118f..64d0aca9e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ * [Developer]: Added override flag that determines if files should be deleted from file storage. [See PR 1486](https://github.com/phac-nml/irida/pull/1486) * [Developer]: Fixed flaky text in `PipelinesPhylogenomicsPageIT#testPageSetup` test. See [PR 1490](https://github.com/phac-nml/irida/pull/1492) * [ALL]: Added LDAP/ADLDAP support. +* [Developer]: Added start and ends dates to filesystem clean up script. [See PR 1487](https://github.com/phac-nml/irida/pull/1487) ## [23.01.3] - 2023/05/09 * [Developer]: Fixed issue with metadata uploader removing existing data. See [PR 1489](https://github.com/phac-nml/irida/pull/1489) From f46182b3c6e4fb53b96d3a9aaa8882151f7bddce Mon Sep 17 00:00:00 2001 From: Katherine Thiessen Date: Fri, 21 Apr 2023 12:28:01 -0500 Subject: [PATCH 3/3] fixing formatting --- .../resources/scripts/sequence-files/purge_sequence_files.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/resources/scripts/sequence-files/purge_sequence_files.py b/src/main/resources/scripts/sequence-files/purge_sequence_files.py index d2083ac6e3..8009819b0b 100644 --- a/src/main/resources/scripts/sequence-files/purge_sequence_files.py +++ b/src/main/resources/scripts/sequence-files/purge_sequence_files.py @@ -43,8 +43,8 @@ def list_sequence_files(startDate, endDate, host, user, password, database): def main(): parser = argparse.ArgumentParser(description="This program lists the sequence files and folders that have been previously deleted in IRIDA.") parser.add_argument('--purge', help="Deletes the sequence files and folders from the filesystem.", action="store_true") - parser.add_argument('--startDate', type=datetime.date.fromisoformat, help="The start date in format YYYY-MM-DD (inclusive).", required=False) - parser.add_argument('--endDate', type=datetime.date.fromisoformat, help="The end date in format YYYY-MM-DD (inclusive).", required=False) + parser.add_argument('--startDate', type=datetime.date.fromisoformat, help="The start date in format YYYY-MM-DD (inclusive).", required=False) + parser.add_argument('--endDate', type=datetime.date.fromisoformat, help="The end date in format YYYY-MM-DD (inclusive).", required=False) parser.add_argument('--baseDirectory', default='/tmp/irida/sequence-files', help="The sequence file base directory.", required=False) parser.add_argument('--host', default='localhost', help="The database host name.", required=False) parser.add_argument('--database', default='irida_test', help="The database name.", required=False)