From bcaae04aab160d6caa9509dcb00c634d8716ee4c Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Thu, 2 May 2024 14:51:48 -0400 Subject: [PATCH 1/9] Add lenient file-type checking mode #10862 --- arches/app/utils/file_validator.py | 14 +++++++---- arches/apps.py | 14 +++++++++++ .../project_name/settings.py-tpl | 19 +++++++++++++-- arches/settings.py | 2 +- releases/7.6.0.md | 24 ++++++++++++++++++- tests/test_settings.py | 2 ++ 6 files changed, 67 insertions(+), 8 deletions(-) diff --git a/arches/app/utils/file_validator.py b/arches/app/utils/file_validator.py index 8f4087351ce..e76fa6c52ff 100644 --- a/arches/app/utils/file_validator.py +++ b/arches/app/utils/file_validator.py @@ -17,9 +17,14 @@ def test_unknown_filetypes(self, file, extension=None): errors = [] match extension: case "DS_Store": - self.logger.log( - logging.WARN, "DS_Store file encountered, proceeding with caution." - ) + if settings.FILE_TYPE_CHECKING: + if settings.FILE_TYPE_CHECKING == "lenient": + self.logger.log( + logging.WARN, + "DS_Store file encountered, proceeding with caution.", + ) + else: + errors.append(f"File type is not permitted: {extension}") case _ if extension not in settings.FILE_TYPES: errors.append(f"File type is not permitted: {extension}") case "xlsx": @@ -46,7 +51,8 @@ def test_unknown_filetypes(self, file, extension=None): except json.decoder.JSONDecodeError: errors.append("Invalid json file") case _: - errors.append("Cannot validate file") + if settings.FILE_TYPE_CHECKING != "lenient": + errors.append("Cannot validate file") for error in errors: self.logger.log(logging.ERROR, error) diff --git a/arches/apps.py b/arches/apps.py index 697f362da80..3c00e54dfcd 100644 --- a/arches/apps.py +++ b/arches/apps.py @@ -1,6 +1,20 @@ +import warnings + from django.conf import settings from django.core.checks import register, Tags, Error, Warning +### GLOBAL DEPRECATIONS ### +FILE_TYPE_CHECKING_MSG = ( + "Providing boolean values to FILE_TYPE_CHECKING is deprecated. " + "Starting with Arches 8.0, the only allowed options will be " + "None, 'lenient', and 'strict'." +) +if settings.FILE_TYPE_CHECKING in (True, False): + warnings.warn(FILE_TYPE_CHECKING_MSG, DeprecationWarning) + + +### SYSTEM CHECKS ### + @register(Tags.security) def check_cache_backend_for_production(app_configs, **kwargs): diff --git a/arches/install/arches-templates/project_name/settings.py-tpl b/arches/install/arches-templates/project_name/settings.py-tpl index f3c0bb607f2..61d1b4fa124 100644 --- a/arches/install/arches-templates/project_name/settings.py-tpl +++ b/arches/install/arches-templates/project_name/settings.py-tpl @@ -36,8 +36,23 @@ SEARCH_COMPONENT_LOCATIONS.append('{{ project_name }}.search_components') LOCALE_PATHS.insert(0, os.path.join(APP_ROOT, 'locale')) -FILE_TYPE_CHECKING = False -FILE_TYPES = ["bmp", "gif", "jpg", "jpeg", "json", "pdf", "png", "psd", "rtf", "tif", "tiff", "xlsx", "csv", "zip"] +FILE_TYPE_CHECKING = "lenient" +FILE_TYPES = [ + "bmp", + "gif", + "jpg", + "jpeg", + "json", + "pdf", + "png", + "psd", + "rtf", + "tif", + "tiff", + "xlsx", + "csv", + "zip", +] FILENAME_GENERATOR = "arches.app.utils.storage_filename_generator.generate_filename" UPLOADED_FILES_DIR = "uploadedfiles" diff --git a/arches/settings.py b/arches/settings.py index 8eebcaa467e..27e3ee3797e 100644 --- a/arches/settings.py +++ b/arches/settings.py @@ -674,7 +674,7 @@ "arches.app.etl_modules", ] -FILE_TYPE_CHECKING = False +FILE_TYPE_CHECKING = "lenient" FILE_TYPES = [ "bmp", "gif", diff --git a/releases/7.6.0.md b/releases/7.6.0.md index 5853295af3c..2c89778ff31 100644 --- a/releases/7.6.0.md +++ b/releases/7.6.0.md @@ -34,6 +34,7 @@ Arches 7.6.0 Release Notes - 9769 Ensure resource creation edit log timestamps precede resource update timestamps - 10738 Adds Github action for comparing test coverage between branches and rejecting branches that lower test coverage - 10842 Adds project-level testing and GitHub test runners +- 10862 Lenient file-type checking mode - 10699 Allow overriding search_results view - 10911 Styling fix in resource model manage menu - 10726 Upgrade openpyxl package to 3.1.2 and fixes ETL modules @@ -155,6 +156,15 @@ Minor incompatibilities: is now a more attractive target for overriding than `run_load_task()`. +### Deprecations + +- Boolean values for the `FILE_TYPE_CHECKING` setting are deprecated. Starting with Arches 8.0, the allowed values will be: + - `None` + - `"lenient"` + - `"strict"` + + For more, see the [documentation]() for this setting. + ### Upgrading Arches 1. You must be upgraded to at least version 7.5.0 before proceeding. If you are on an earlier version, please refer to the upgrade process in the [Version 7.5.0 release notes](https://github.com/archesproject/arches/blob/dev/7.5.x/releases/7.5.0.md) @@ -355,7 +365,19 @@ Minor incompatibilities: - `npm run build_production` This builds a production bundle. **takes up to 2hrs depending on resources** - Alternatively you can run `python manage.py build_production`. This will create a production bundle of frontend assessts and also call `collectstatic`. -16. If you are running Arches on Apache, be sure to run: +16. Test your project with deprecation warnings enabled. + + ``` + python -Wall::DeprecationWarning manage.py test --settings=tests.test_settings + ``` + +17. Run system checks against your production settings. + + ``` + python manage.py check --deploy --settings=path.to.production.settings + ``` + +18. If you are running Arches on Apache, be sure to run: ``` collectstatic diff --git a/tests/test_settings.py b/tests/test_settings.py index 3ab8f2bf335..665ea887fae 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -71,6 +71,8 @@ "arches.W001" ) # Cache backend does not support rate-limiting +FILE_TYPE_CHECKING = "lenient" + # could add Chrome, PhantomJS etc... here LOCAL_BROWSERS = [] # ['Firefox'] From 56300cc9399102608d8fb8d8696f4d483674df36 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Fri, 14 Jun 2024 17:03:24 -0400 Subject: [PATCH 2/9] nit re #10862 --- arches/app/utils/file_validator.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/arches/app/utils/file_validator.py b/arches/app/utils/file_validator.py index e76fa6c52ff..3075cd9896c 100644 --- a/arches/app/utils/file_validator.py +++ b/arches/app/utils/file_validator.py @@ -17,14 +17,13 @@ def test_unknown_filetypes(self, file, extension=None): errors = [] match extension: case "DS_Store": - if settings.FILE_TYPE_CHECKING: - if settings.FILE_TYPE_CHECKING == "lenient": - self.logger.log( - logging.WARN, - "DS_Store file encountered, proceeding with caution.", - ) - else: - errors.append(f"File type is not permitted: {extension}") + if settings.FILE_TYPE_CHECKING == "lenient": + self.logger.log( + logging.WARN, + "DS_Store file encountered, proceeding with caution.", + ) + else: + errors.append(f"File type is not permitted: {extension}") case _ if extension not in settings.FILE_TYPES: errors.append(f"File type is not permitted: {extension}") case "xlsx": From 8125407f6f171c27fea52f2c1644702fc68ae346 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Fri, 14 Jun 2024 17:21:14 -0400 Subject: [PATCH 3/9] logic nit re #10862 --- arches/app/utils/file_validator.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arches/app/utils/file_validator.py b/arches/app/utils/file_validator.py index 3075cd9896c..42ece529cdb 100644 --- a/arches/app/utils/file_validator.py +++ b/arches/app/utils/file_validator.py @@ -24,7 +24,10 @@ def test_unknown_filetypes(self, file, extension=None): ) else: errors.append(f"File type is not permitted: {extension}") - case _ if extension not in settings.FILE_TYPES: + case _ if ( + extension not in settings.FILE_TYPES + and settings.FILE_TYPE_CHECKING != "lenient" + ): errors.append(f"File type is not permitted: {extension}") case "xlsx": try: From ca6469539f3946be2f5ddb95b92460d8e91babf7 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Fri, 14 Jun 2024 17:21:53 -0400 Subject: [PATCH 4/9] Surface error message if non-zip file given to JSON-LD importer --- arches/app/etl_modules/jsonld_importer.py | 30 ++++++++++++++++------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/arches/app/etl_modules/jsonld_importer.py b/arches/app/etl_modules/jsonld_importer.py index c88b5652631..00af9849faf 100644 --- a/arches/app/etl_modules/jsonld_importer.py +++ b/arches/app/etl_modules/jsonld_importer.py @@ -99,6 +99,27 @@ def read(self, request): ), } + try: + self.read_zip_file(content, result) + except zipfile.BadZipFile as e: + return { + "success": False, + "data": FileValidationError( + message=e.args[0], + code=400, + ), + } + + if not result["summary"]["files"]: + title = _("Invalid Uploaded File") + message = _( + "This file has missing information or invalid formatting. Make sure the file is complete and in the expected format." + ) + return {"success": False, "data": {"title": title, "message": message}} + + return {"success": True, "data": result} + + def read_zip_file(self, content, result): with zipfile.ZipFile(content, "r") as zip_ref: files = zip_ref.infolist() for file in files: @@ -127,15 +148,6 @@ def read(self, request): default_storage.save(destination, f) - if not result["summary"]["files"]: - title = _("Invalid Uploaded File") - message = _( - "This file has missing information or invalid formatting. Make sure the file is complete and in the expected format." - ) - return {"success": False, "data": {"title": title, "message": message}} - - return {"success": True, "data": result} - def validate_uploaded_file(self, file): path = Path(file.name) slug = path.parts[1] From 52afd2af6d8194ba20deaf717d6b210c4ee2de2d Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Mon, 17 Jun 2024 10:52:23 -0400 Subject: [PATCH 5/9] Complete test coverage of file validator #10862 --- arches/app/utils/file_validator.py | 2 +- tests/utils/test_file_validator.py | 149 +++++++++++++++++++++++++++++ 2 files changed, 150 insertions(+), 1 deletion(-) create mode 100644 tests/utils/test_file_validator.py diff --git a/arches/app/utils/file_validator.py b/arches/app/utils/file_validator.py index 42ece529cdb..65db9c08057 100644 --- a/arches/app/utils/file_validator.py +++ b/arches/app/utils/file_validator.py @@ -26,7 +26,7 @@ def test_unknown_filetypes(self, file, extension=None): errors.append(f"File type is not permitted: {extension}") case _ if ( extension not in settings.FILE_TYPES - and settings.FILE_TYPE_CHECKING != "lenient" + and (settings.FILE_TYPE_CHECKING != "lenient" or extension is not None) ): errors.append(f"File type is not permitted: {extension}") case "xlsx": diff --git a/tests/utils/test_file_validator.py b/tests/utils/test_file_validator.py new file mode 100644 index 00000000000..20e69173556 --- /dev/null +++ b/tests/utils/test_file_validator.py @@ -0,0 +1,149 @@ +import os +import shutil +from pathlib import Path + +from unittest.mock import Mock, patch + +from django.conf import settings +from django.test import TestCase +from django.test.utils import override_settings + +from arches.app.utils.file_validator import FileValidator + +# these tests can be run from the command line via +# python manage.py test tests.utils.test_file_validator.FileValidatorTests --settings="tests.test_settings" + + +class MockFile: + @staticmethod + def read(): + """Return a jagged csv file (invalid row length)""" + return b"col1,col2\ndata1" + + @staticmethod + def seek(offset): + return + + +class MockFileType: + def __init__(self, extension): + self.extension = extension + + +class FileValidatorTests(TestCase): + """FILE_TYPE_CHECKING defaults to 'lenient': overridden as necessary.""" + + validator = FileValidator() + mock_file = MockFile() + + @override_settings(FILE_TYPE_CHECKING=None) + def test_no_file_checking(self): + errors = self.validator.validate_file_type(self.mock_file) + self.assertEqual(errors, []) + + @patch("filetype.guess", Mock(return_value=None)) + def test_check_unknown_filetype_lenient(self): + errors = self.validator.validate_file_type(self.mock_file) + self.assertEqual(errors, []) + + @patch("filetype.guess", Mock(return_value=None)) + @override_settings(FILE_TYPE_CHECKING="strict") + def test_check_unknown_filetype_strict(self): + with self.assertLogs("arches.app.utils.file_validator", level="ERROR"): + errors = self.validator.validate_file_type(self.mock_file) + self.assertEqual(errors, ["File type is not permitted: None"]) + + @patch("filetype.guess", Mock(return_value=MockFileType("suspicious"))) + def test_filetype_not_listed(self): + with self.assertLogs("arches.app.utils.file_validator", level="ERROR"): + errors = self.validator.validate_file_type(self.mock_file) + self.assertEqual(errors, ["Unsafe file type suspicious"]) + + @patch("filetype.guess", Mock(return_value=None)) + def test_check_invalid_csv(self): + with self.assertLogs("arches.app.utils.file_validator", level="ERROR"): + errors = self.validator.validate_file_type(self.mock_file, extension="csv") + self.assertEqual(errors, ["Invalid csv file"]) + + @patch("filetype.guess", Mock(return_value=None)) + def test_check_invalid_json(self): + with self.assertLogs("arches.app.utils.file_validator", level="ERROR"): + errors = self.validator.validate_file_type(self.mock_file, extension="json") + self.assertEqual(errors, ["Invalid json file"]) + + @patch("filetype.guess", Mock(return_value=None)) + def test_check_invalid_jpeg_lenient(self): + errors = self.validator.validate_file_type(self.mock_file, extension="jpeg") + self.assertEqual(errors, []) + + @patch("filetype.guess", Mock(return_value=None)) + @override_settings(FILE_TYPE_CHECKING="strict") + def test_check_invalid_jpeg_strict(self): + with self.assertLogs("arches.app.utils.file_validator", level="ERROR"): + errors = self.validator.validate_file_type(self.mock_file, extension="jpeg") + self.assertEqual(errors, ["Cannot validate file"]) + + @patch("filetype.guess", Mock(return_value=None)) + def test_check_invalid_jpeg_lenient(self): + errors = self.validator.validate_file_type(self.mock_file, extension="jpeg") + self.assertEqual(errors, []) + + @patch("filetype.guess", Mock(return_value=None)) + @override_settings(FILE_TYPE_CHECKING="strict") + def test_check_invalid_but_not_in_listed_types(self): + with self.assertLogs("arches.app.utils.file_validator", level="ERROR"): + errors = self.validator.validate_file_type( + self.mock_file, extension="notlisted" + ) + self.assertEqual(errors, ["File type is not permitted: notlisted"]) + + @patch("filetype.guess", Mock(return_value=None)) + def test_check_dsstore_lenient(self): + """In lenient mode, we assume these might be present in .zip files.""" + with self.assertLogs("arches.app.utils.file_validator", level="WARN"): + errors = self.validator.validate_file_type( + self.mock_file, extension="DS_Store" + ) + self.assertEqual(errors, []) + + @patch("filetype.guess", Mock(return_value=None)) + @override_settings(FILE_TYPE_CHECKING="strict") + def test_check_dsstore_strict(self): + with self.assertLogs("arches.app.utils.file_validator", level="ERROR"): + errors = self.validator.validate_file_type( + self.mock_file, extension="DS_Store" + ) + self.assertEqual(errors, ["File type is not permitted: DS_Store"]) + + @patch("filetype.guess", Mock(return_value=None)) + @patch("arches.app.utils.file_validator.load_workbook", lambda noop: None) + def test_valid_xlsx(self): + errors = self.validator.validate_file_type(self.mock_file, extension="xlsx") + self.assertEqual(errors, []) + + @patch("filetype.guess", Mock(return_value=None)) + def test_invalid_xlsx(self): + with self.assertLogs("arches.app.utils.file_validator", level="ERROR"): + errors = self.validator.validate_file_type(self.mock_file, extension="xlsx") + self.assertEqual(errors, ["Invalid xlsx workbook"]) + + def test_zip(self): + # Zip up the files in the tests/fixtures/uploadedfiles dir + # Currently, contains a single .csv file and an empty dir. + dir_to_zip = Path(settings.MEDIA_ROOT) / "uploadedfiles" + zip_file_path = shutil.make_archive("test", "zip", dir_to_zip, dir_to_zip) + self.addCleanup(os.unlink, zip_file_path) + + with open(zip_file_path, "rb") as file: + errors = self.validator.validate_file_type(file) + self.assertEqual(errors, []) + + with ( + open(zip_file_path, "rb") as file, + self.modify_settings(FILE_TYPES={"remove": "csv"}), + self.assertLogs("arches.app.utils.file_validator", level="ERROR"), + ): + errors = self.validator.validate_file_type(file) + self.assertEqual( + errors, ["File type is not permitted: csv", "Unsafe zip file contents"] + ) From 80b39f148317f7903e75abd2cb6415bd9bdb35e8 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 18 Jun 2024 09:16:53 -0400 Subject: [PATCH 6/9] Bump filetype to 1.2.0 --- arches/install/requirements.txt | 4 ++-- releases/7.6.0.md | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arches/install/requirements.txt b/arches/install/requirements.txt index 680991ce1b4..c9b7e78fc54 100644 --- a/arches/install/requirements.txt +++ b/arches/install/requirements.txt @@ -1,4 +1,4 @@ -Django>=4.2.11,<5.0.0 +Django>=4.2.13,<5.0.0 psycopg2==2.9.9 urllib3<2 elasticsearch>=8.3.1,<9.0.0 @@ -26,7 +26,7 @@ pillow>=7.0.0 arcgis2geojson==2.0.0 openpyxl==3.1.2 django-webpack-loader==2.0.1 -filetype==1.0.13 +filetype==1.2.0 defusedxml==0.7.1 requests-oauthlib==1.3.1 pyjwt>=2.0.0,<3 diff --git a/releases/7.6.0.md b/releases/7.6.0.md index 2c89778ff31..c66140cae6e 100644 --- a/releases/7.6.0.md +++ b/releases/7.6.0.md @@ -61,8 +61,9 @@ System: Python: Upgraded: - Django == 4.2.9 + Django == 4.2.13 openpyxl == 3.0.10 + filetype == 1.2.0 Added: (dev dependencies): From 2c5386ec38917bf19f547efe0f8b542a0ae0d576 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 18 Jun 2024 11:56:14 -0400 Subject: [PATCH 7/9] Continue debugging flaky test Follow-up to 4c14610. --- tests/models/tile_model_tests.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/models/tile_model_tests.py b/tests/models/tile_model_tests.py index 317e5efba50..2c95335d81b 100644 --- a/tests/models/tile_model_tests.py +++ b/tests/models/tile_model_tests.py @@ -312,6 +312,11 @@ def test_save_provisional_from_athoritative(self): provisionaledits = provisional_tile.provisionaledits self.assertEqual(tiles.count(), 2) + self.assertIn( + "72048cb3-adbc-11e6-9ccf-14109fd34195", + provisional_tile.data, + provisional_tile, + ) self.assertEqual( provisional_tile.data["72048cb3-adbc-11e6-9ccf-14109fd34195"]["en"][ "value" From 90557ee12cfaf89b80fca5b8c8a642da3b679d47 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 18 Jun 2024 16:25:35 -0400 Subject: [PATCH 8/9] Use SimpleTestCase re #10862 --- tests/utils/test_file_validator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/utils/test_file_validator.py b/tests/utils/test_file_validator.py index 20e69173556..80aace0e7a3 100644 --- a/tests/utils/test_file_validator.py +++ b/tests/utils/test_file_validator.py @@ -5,7 +5,7 @@ from unittest.mock import Mock, patch from django.conf import settings -from django.test import TestCase +from django.test import SimpleTestCase from django.test.utils import override_settings from arches.app.utils.file_validator import FileValidator @@ -30,7 +30,7 @@ def __init__(self, extension): self.extension = extension -class FileValidatorTests(TestCase): +class FileValidatorTests(SimpleTestCase): """FILE_TYPE_CHECKING defaults to 'lenient': overridden as necessary.""" validator = FileValidator() From 28cf693dcf060e1732b312a67ac38d830efd9387 Mon Sep 17 00:00:00 2001 From: Jacob Walls Date: Tue, 25 Jun 2024 10:46:38 -0400 Subject: [PATCH 9/9] Add documentation link (content to follow) --- releases/7.6.0.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/releases/7.6.0.md b/releases/7.6.0.md index c66140cae6e..344471eaa08 100644 --- a/releases/7.6.0.md +++ b/releases/7.6.0.md @@ -161,10 +161,10 @@ Minor incompatibilities: - Boolean values for the `FILE_TYPE_CHECKING` setting are deprecated. Starting with Arches 8.0, the allowed values will be: - `None` - - `"lenient"` + - `"lenient"` (new in 7.6) - `"strict"` - For more, see the [documentation]() for this setting. + For more, see the [documentation](https://arches.readthedocs.io/en/stable/configuring/settings-beyond-the-ui/#file-type-checking) for this setting. ### Upgrading Arches