diff --git a/CHANGELOG.md b/CHANGELOG.md index 2bf5f2118f..64d0aca9e1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ * [Developer]: Added override flag that determines if files should be deleted from file storage. [See PR 1486](https://github.com/phac-nml/irida/pull/1486) * [Developer]: Fixed flaky text in `PipelinesPhylogenomicsPageIT#testPageSetup` test. See [PR 1490](https://github.com/phac-nml/irida/pull/1492) * [ALL]: Added LDAP/ADLDAP support. +* [Developer]: Added start and ends dates to filesystem clean up script. [See PR 1487](https://github.com/phac-nml/irida/pull/1487) ## [23.01.3] - 2023/05/09 * [Developer]: Fixed issue with metadata uploader removing existing data. See [PR 1489](https://github.com/phac-nml/irida/pull/1489) diff --git a/src/main/resources/scripts/sequence-files/purge_sequence_files.py b/src/main/resources/scripts/sequence-files/purge_sequence_files.py index 8dfd7b1117..8009819b0b 100644 --- a/src/main/resources/scripts/sequence-files/purge_sequence_files.py +++ b/src/main/resources/scripts/sequence-files/purge_sequence_files.py @@ -1,5 +1,6 @@ #!/usr/bin/python import argparse +import datetime import mysql.connector import os @@ -17,7 +18,7 @@ def remove(path, purge): else: print(path) -def list_sequence_files(host, user, password, database): +def list_sequence_files(startDate, endDate, host, user, password, database): db = mysql.connector.connect( host=host, user=user, @@ -26,7 +27,14 @@ def list_sequence_files(host, user, password, database): ) cursor = db.cursor() # TODO: Should we double check this file doesn't exist in the actual table in case it was manually restored? - cursor.execute("SELECT DISTINCT file_path FROM sequence_file_AUD WHERE revtype=2") + if(startDate and endDate): + cursor.execute("SELECT DISTINCT file_path FROM sequence_file_AUD WHERE revtype=2 AND modified_date BETWEEN %s AND %s", (startDate, endDate)) + elif(startDate): + cursor.execute("SELECT DISTINCT file_path FROM sequence_file_AUD WHERE revtype=2 AND modified_date >= %s", (startDate,)) + elif(endDate): + cursor.execute("SELECT DISTINCT file_path FROM sequence_file_AUD WHERE revtype=2 AND modified_date <= %s", (endDate,)) + else: + cursor.execute("SELECT DISTINCT file_path FROM sequence_file_AUD WHERE revtype=2") result = cursor.fetchall() cursor.close() db.close() @@ -35,6 +43,8 @@ def list_sequence_files(host, user, password, database): def main(): parser = argparse.ArgumentParser(description="This program lists the sequence files and folders that have been previously deleted in IRIDA.") parser.add_argument('--purge', help="Deletes the sequence files and folders from the filesystem.", action="store_true") + parser.add_argument('--startDate', type=datetime.date.fromisoformat, help="The start date in format YYYY-MM-DD (inclusive).", required=False) + parser.add_argument('--endDate', type=datetime.date.fromisoformat, help="The end date in format YYYY-MM-DD (inclusive).", required=False) parser.add_argument('--baseDirectory', default='/tmp/irida/sequence-files', help="The sequence file base directory.", required=False) parser.add_argument('--host', default='localhost', help="The database host name.", required=False) parser.add_argument('--database', default='irida_test', help="The database name.", required=False) @@ -42,7 +52,7 @@ def main(): parser.add_argument('--password', default='test', help="The database password.", required=False) args = parser.parse_args() - rows = list_sequence_files(args.host, args.user, args.password, args.database) + rows = list_sequence_files(args.startDate, args.endDate, args.host, args.user, args.password, args.database) if rows: for row in rows: sequence_file_directory = os.path.dirname(os.path.dirname(os.path.join(args.baseDirectory, row[0])))