diff --git a/.gitignore b/.gitignore index 942c885c657..20334f34c10 100644 --- a/.gitignore +++ b/.gitignore @@ -58,3 +58,4 @@ src/main/webapp/node_modules /src/main/resources/application-local.properties java_pid*.hprof gradle.properties +.virtualenv diff --git a/CHANGELOG.md b/CHANGELOG.md index d4104da7cfe..2426bd22908 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ * [Developer]: Fixed bug in setting a `default_sequencing_object and default_genome_assembly to `NULL` for a sample when the default sequencing object or genome assembly were removed. [See PR 1466](https://github.com/phac-nml/irida/pull/1466) * [Developer]: Fixed bug preventing a `sample` with an analysis submission from being deleted. [See PR 1467](https://github.com/phac-nml/irida/pull/1467) * [Developer]: Added functionality to delete sequence files from file system when a sequence run is removed. [See PR 1468](https://github.com/phac-nml/irida/pull/1468) +* [Developer]: Added script to do initial cleanup of sequence files from file system. [See PR 1469](https://github.com/phac-nml/irida/pull/1469) ## [22.09.7] - 2023/01/24 * [UI]: Fixed bugs on NCBI Export page preventing the NCBI `submission.xml` file from being properly written. See [PR 1451](https://github.com/phac-nml/irida/pull/1451) diff --git a/UPGRADING.md b/UPGRADING.md index 86cbe1520c5..d193126feb1 100644 --- a/UPGRADING.md +++ b/UPGRADING.md @@ -4,6 +4,10 @@ Upgrading This document summarizes the environmental changes that need to be made when upgrading IRIDA that cannot be automated. +Unreleased +---------- +* This upgrade deletes sequence files from the file system when they are removed from IRIDA. To clean up all previously removed sequence files, a script can be found under the `src/main/resources/scripts/sequence-files` folder in the IRIDA repo. + 22.05 to 22.09 -------------- * This upgrade switches the OAuth2 implementation from using spring-security-oauth to spring-security-oauth2-authorization-server and spring-security-oauth2-resource-server. Due to the dependency updates we have changed the format of the OAuth2 access tokens, they are now JWT Tokens (https://jwt.io/introduction) and are encrypted/decrypted using a certificate within a java keystore. No default java keystore is provided, so administrators will need to update their deployments to configure an appropriate java keystore. The same java keystore will need to be present on all servers which allow api access, otherwise access tokens generated on one server will not work on any other server. diff --git a/src/main/resources/scripts/sequence-files/README.txt b/src/main/resources/scripts/sequence-files/README.txt new file mode 100644 index 00000000000..f558890d94b --- /dev/null +++ b/src/main/resources/scripts/sequence-files/README.txt @@ -0,0 +1,20 @@ +Please follow the instructions below on how to run the purge_sequencing_files.py script. +The assumption is that Python3 and pip3 are already installed. + +Install virtual env. +$ pip3 install virtualenv + +Create a virtual python environment. +$ python3 -m venv .virtualenv + +Activate the environment. +$ source .virtualenv/bin/activate + +Install libraries. +$ pip3 install -r requirements.txt + +Run the script to purge the sequence files on the filesystem. +$ python3 purge_sequence_files.py --help + +Activate the environment. +$ deactivate \ No newline at end of file diff --git a/src/main/resources/scripts/sequence-files/purge_sequence_files.py b/src/main/resources/scripts/sequence-files/purge_sequence_files.py new file mode 100644 index 00000000000..8dfd7b11172 --- /dev/null +++ b/src/main/resources/scripts/sequence-files/purge_sequence_files.py @@ -0,0 +1,60 @@ +#!/usr/bin/python +import argparse +import mysql.connector +import os + +def remove(path, purge): + if purge: + try: + if os.path.exists(path): + if os.path.isdir(path): + os.rmdir(path) + elif os.path.isfile(path): + os.remove(path) + print("Deleted ", path) + except OSError as e: + print(e) + else: + print(path) + +def list_sequence_files(host, user, password, database): + db = mysql.connector.connect( + host=host, + user=user, + password=password, + database=database + ) + cursor = db.cursor() + # TODO: Should we double check this file doesn't exist in the actual table in case it was manually restored? + cursor.execute("SELECT DISTINCT file_path FROM sequence_file_AUD WHERE revtype=2") + result = cursor.fetchall() + cursor.close() + db.close() + return result + +def main(): + parser = argparse.ArgumentParser(description="This program lists the sequence files and folders that have been previously deleted in IRIDA.") + parser.add_argument('--purge', help="Deletes the sequence files and folders from the filesystem.", action="store_true") + parser.add_argument('--baseDirectory', default='/tmp/irida/sequence-files', help="The sequence file base directory.", required=False) + parser.add_argument('--host', default='localhost', help="The database host name.", required=False) + parser.add_argument('--database', default='irida_test', help="The database name.", required=False) + parser.add_argument('--user', default='test', help="The database user name.", required=False) + parser.add_argument('--password', default='test', help="The database password.", required=False) + + args = parser.parse_args() + rows = list_sequence_files(args.host, args.user, args.password, args.database) + if rows: + for row in rows: + sequence_file_directory = os.path.dirname(os.path.dirname(os.path.join(args.baseDirectory, row[0]))) + for root, dirs, files in os.walk(sequence_file_directory, topdown=False): + for name in files: + file = os.path.join(root, name) + remove(file, args.purge) + for name in dirs: + directory = os.path.join(root, name) + remove(directory, args.purge) + remove(sequence_file_directory, args.purge) + print("All done.") + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/src/main/resources/scripts/sequence-files/requirements.txt b/src/main/resources/scripts/sequence-files/requirements.txt new file mode 100644 index 00000000000..fe33fd931ca --- /dev/null +++ b/src/main/resources/scripts/sequence-files/requirements.txt @@ -0,0 +1 @@ +mysql-connector-python==8.0.22 \ No newline at end of file