diff --git a/arches/app/etl_modules/jsonld_importer.py b/arches/app/etl_modules/jsonld_importer.py index c88b5652631..00af9849faf 100644 --- a/arches/app/etl_modules/jsonld_importer.py +++ b/arches/app/etl_modules/jsonld_importer.py @@ -99,6 +99,27 @@ def read(self, request): ), } + try: + self.read_zip_file(content, result) + except zipfile.BadZipFile as e: + return { + "success": False, + "data": FileValidationError( + message=e.args[0], + code=400, + ), + } + + if not result["summary"]["files"]: + title = _("Invalid Uploaded File") + message = _( + "This file has missing information or invalid formatting. Make sure the file is complete and in the expected format." + ) + return {"success": False, "data": {"title": title, "message": message}} + + return {"success": True, "data": result} + + def read_zip_file(self, content, result): with zipfile.ZipFile(content, "r") as zip_ref: files = zip_ref.infolist() for file in files: @@ -127,15 +148,6 @@ def read(self, request): default_storage.save(destination, f) - if not result["summary"]["files"]: - title = _("Invalid Uploaded File") - message = _( - "This file has missing information or invalid formatting. Make sure the file is complete and in the expected format." - ) - return {"success": False, "data": {"title": title, "message": message}} - - return {"success": True, "data": result} - def validate_uploaded_file(self, file): path = Path(file.name) slug = path.parts[1] diff --git a/arches/app/utils/file_validator.py b/arches/app/utils/file_validator.py index 8f4087351ce..65db9c08057 100644 --- a/arches/app/utils/file_validator.py +++ b/arches/app/utils/file_validator.py @@ -17,10 +17,17 @@ def test_unknown_filetypes(self, file, extension=None): errors = [] match extension: case "DS_Store": - self.logger.log( - logging.WARN, "DS_Store file encountered, proceeding with caution." - ) - case _ if extension not in settings.FILE_TYPES: + if settings.FILE_TYPE_CHECKING == "lenient": + self.logger.log( + logging.WARN, + "DS_Store file encountered, proceeding with caution.", + ) + else: + errors.append(f"File type is not permitted: {extension}") + case _ if ( + extension not in settings.FILE_TYPES + and (settings.FILE_TYPE_CHECKING != "lenient" or extension is not None) + ): errors.append(f"File type is not permitted: {extension}") case "xlsx": try: @@ -46,7 +53,8 @@ def test_unknown_filetypes(self, file, extension=None): except json.decoder.JSONDecodeError: errors.append("Invalid json file") case _: - errors.append("Cannot validate file") + if settings.FILE_TYPE_CHECKING != "lenient": + errors.append("Cannot validate file") for error in errors: self.logger.log(logging.ERROR, error) diff --git a/arches/apps.py b/arches/apps.py index 697f362da80..3c00e54dfcd 100644 --- a/arches/apps.py +++ b/arches/apps.py @@ -1,6 +1,20 @@ +import warnings + from django.conf import settings from django.core.checks import register, Tags, Error, Warning +### GLOBAL DEPRECATIONS ### +FILE_TYPE_CHECKING_MSG = ( + "Providing boolean values to FILE_TYPE_CHECKING is deprecated. " + "Starting with Arches 8.0, the only allowed options will be " + "None, 'lenient', and 'strict'." +) +if settings.FILE_TYPE_CHECKING in (True, False): + warnings.warn(FILE_TYPE_CHECKING_MSG, DeprecationWarning) + + +### SYSTEM CHECKS ### + @register(Tags.security) def check_cache_backend_for_production(app_configs, **kwargs): diff --git a/arches/install/arches-templates/project_name/settings.py-tpl b/arches/install/arches-templates/project_name/settings.py-tpl index f3c0bb607f2..61d1b4fa124 100644 --- a/arches/install/arches-templates/project_name/settings.py-tpl +++ b/arches/install/arches-templates/project_name/settings.py-tpl @@ -36,8 +36,23 @@ SEARCH_COMPONENT_LOCATIONS.append('{{ project_name }}.search_components') LOCALE_PATHS.insert(0, os.path.join(APP_ROOT, 'locale')) -FILE_TYPE_CHECKING = False -FILE_TYPES = ["bmp", "gif", "jpg", "jpeg", "json", "pdf", "png", "psd", "rtf", "tif", "tiff", "xlsx", "csv", "zip"] +FILE_TYPE_CHECKING = "lenient" +FILE_TYPES = [ + "bmp", + "gif", + "jpg", + "jpeg", + "json", + "pdf", + "png", + "psd", + "rtf", + "tif", + "tiff", + "xlsx", + "csv", + "zip", +] FILENAME_GENERATOR = "arches.app.utils.storage_filename_generator.generate_filename" UPLOADED_FILES_DIR = "uploadedfiles" diff --git a/arches/install/requirements.txt b/arches/install/requirements.txt index 680991ce1b4..c9b7e78fc54 100644 --- a/arches/install/requirements.txt +++ b/arches/install/requirements.txt @@ -1,4 +1,4 @@ -Django>=4.2.11,<5.0.0 +Django>=4.2.13,<5.0.0 psycopg2==2.9.9 urllib3<2 elasticsearch>=8.3.1,<9.0.0 @@ -26,7 +26,7 @@ pillow>=7.0.0 arcgis2geojson==2.0.0 openpyxl==3.1.2 django-webpack-loader==2.0.1 -filetype==1.0.13 +filetype==1.2.0 defusedxml==0.7.1 requests-oauthlib==1.3.1 pyjwt>=2.0.0,<3 diff --git a/arches/settings.py b/arches/settings.py index 8eebcaa467e..27e3ee3797e 100644 --- a/arches/settings.py +++ b/arches/settings.py @@ -674,7 +674,7 @@ "arches.app.etl_modules", ] -FILE_TYPE_CHECKING = False +FILE_TYPE_CHECKING = "lenient" FILE_TYPES = [ "bmp", "gif", diff --git a/releases/7.6.0.md b/releases/7.6.0.md index 5853295af3c..344471eaa08 100644 --- a/releases/7.6.0.md +++ b/releases/7.6.0.md @@ -34,6 +34,7 @@ Arches 7.6.0 Release Notes - 9769 Ensure resource creation edit log timestamps precede resource update timestamps - 10738 Adds Github action for comparing test coverage between branches and rejecting branches that lower test coverage - 10842 Adds project-level testing and GitHub test runners +- 10862 Lenient file-type checking mode - 10699 Allow overriding search_results view - 10911 Styling fix in resource model manage menu - 10726 Upgrade openpyxl package to 3.1.2 and fixes ETL modules @@ -60,8 +61,9 @@ System: Python: Upgraded: - Django == 4.2.9 + Django == 4.2.13 openpyxl == 3.0.10 + filetype == 1.2.0 Added: (dev dependencies): @@ -155,6 +157,15 @@ Minor incompatibilities: is now a more attractive target for overriding than `run_load_task()`. +### Deprecations + +- Boolean values for the `FILE_TYPE_CHECKING` setting are deprecated. Starting with Arches 8.0, the allowed values will be: + - `None` + - `"lenient"` (new in 7.6) + - `"strict"` + + For more, see the [documentation](https://arches.readthedocs.io/en/stable/configuring/settings-beyond-the-ui/#file-type-checking) for this setting. + ### Upgrading Arches 1. You must be upgraded to at least version 7.5.0 before proceeding. If you are on an earlier version, please refer to the upgrade process in the [Version 7.5.0 release notes](https://github.com/archesproject/arches/blob/dev/7.5.x/releases/7.5.0.md) @@ -355,7 +366,19 @@ Minor incompatibilities: - `npm run build_production` This builds a production bundle. **takes up to 2hrs depending on resources** - Alternatively you can run `python manage.py build_production`. This will create a production bundle of frontend assessts and also call `collectstatic`. -16. If you are running Arches on Apache, be sure to run: +16. Test your project with deprecation warnings enabled. + + ``` + python -Wall::DeprecationWarning manage.py test --settings=tests.test_settings + ``` + +17. Run system checks against your production settings. + + ``` + python manage.py check --deploy --settings=path.to.production.settings + ``` + +18. If you are running Arches on Apache, be sure to run: ``` collectstatic diff --git a/tests/models/tile_model_tests.py b/tests/models/tile_model_tests.py index 317e5efba50..2c95335d81b 100644 --- a/tests/models/tile_model_tests.py +++ b/tests/models/tile_model_tests.py @@ -312,6 +312,11 @@ def test_save_provisional_from_athoritative(self): provisionaledits = provisional_tile.provisionaledits self.assertEqual(tiles.count(), 2) + self.assertIn( + "72048cb3-adbc-11e6-9ccf-14109fd34195", + provisional_tile.data, + provisional_tile, + ) self.assertEqual( provisional_tile.data["72048cb3-adbc-11e6-9ccf-14109fd34195"]["en"][ "value" diff --git a/tests/test_settings.py b/tests/test_settings.py index 3ab8f2bf335..665ea887fae 100644 --- a/tests/test_settings.py +++ b/tests/test_settings.py @@ -71,6 +71,8 @@ "arches.W001" ) # Cache backend does not support rate-limiting +FILE_TYPE_CHECKING = "lenient" + # could add Chrome, PhantomJS etc... here LOCAL_BROWSERS = [] # ['Firefox'] diff --git a/tests/utils/test_file_validator.py b/tests/utils/test_file_validator.py new file mode 100644 index 00000000000..80aace0e7a3 --- /dev/null +++ b/tests/utils/test_file_validator.py @@ -0,0 +1,149 @@ +import os +import shutil +from pathlib import Path + +from unittest.mock import Mock, patch + +from django.conf import settings +from django.test import SimpleTestCase +from django.test.utils import override_settings + +from arches.app.utils.file_validator import FileValidator + +# these tests can be run from the command line via +# python manage.py test tests.utils.test_file_validator.FileValidatorTests --settings="tests.test_settings" + + +class MockFile: + @staticmethod + def read(): + """Return a jagged csv file (invalid row length)""" + return b"col1,col2\ndata1" + + @staticmethod + def seek(offset): + return + + +class MockFileType: + def __init__(self, extension): + self.extension = extension + + +class FileValidatorTests(SimpleTestCase): + """FILE_TYPE_CHECKING defaults to 'lenient': overridden as necessary.""" + + validator = FileValidator() + mock_file = MockFile() + + @override_settings(FILE_TYPE_CHECKING=None) + def test_no_file_checking(self): + errors = self.validator.validate_file_type(self.mock_file) + self.assertEqual(errors, []) + + @patch("filetype.guess", Mock(return_value=None)) + def test_check_unknown_filetype_lenient(self): + errors = self.validator.validate_file_type(self.mock_file) + self.assertEqual(errors, []) + + @patch("filetype.guess", Mock(return_value=None)) + @override_settings(FILE_TYPE_CHECKING="strict") + def test_check_unknown_filetype_strict(self): + with self.assertLogs("arches.app.utils.file_validator", level="ERROR"): + errors = self.validator.validate_file_type(self.mock_file) + self.assertEqual(errors, ["File type is not permitted: None"]) + + @patch("filetype.guess", Mock(return_value=MockFileType("suspicious"))) + def test_filetype_not_listed(self): + with self.assertLogs("arches.app.utils.file_validator", level="ERROR"): + errors = self.validator.validate_file_type(self.mock_file) + self.assertEqual(errors, ["Unsafe file type suspicious"]) + + @patch("filetype.guess", Mock(return_value=None)) + def test_check_invalid_csv(self): + with self.assertLogs("arches.app.utils.file_validator", level="ERROR"): + errors = self.validator.validate_file_type(self.mock_file, extension="csv") + self.assertEqual(errors, ["Invalid csv file"]) + + @patch("filetype.guess", Mock(return_value=None)) + def test_check_invalid_json(self): + with self.assertLogs("arches.app.utils.file_validator", level="ERROR"): + errors = self.validator.validate_file_type(self.mock_file, extension="json") + self.assertEqual(errors, ["Invalid json file"]) + + @patch("filetype.guess", Mock(return_value=None)) + def test_check_invalid_jpeg_lenient(self): + errors = self.validator.validate_file_type(self.mock_file, extension="jpeg") + self.assertEqual(errors, []) + + @patch("filetype.guess", Mock(return_value=None)) + @override_settings(FILE_TYPE_CHECKING="strict") + def test_check_invalid_jpeg_strict(self): + with self.assertLogs("arches.app.utils.file_validator", level="ERROR"): + errors = self.validator.validate_file_type(self.mock_file, extension="jpeg") + self.assertEqual(errors, ["Cannot validate file"]) + + @patch("filetype.guess", Mock(return_value=None)) + def test_check_invalid_jpeg_lenient(self): + errors = self.validator.validate_file_type(self.mock_file, extension="jpeg") + self.assertEqual(errors, []) + + @patch("filetype.guess", Mock(return_value=None)) + @override_settings(FILE_TYPE_CHECKING="strict") + def test_check_invalid_but_not_in_listed_types(self): + with self.assertLogs("arches.app.utils.file_validator", level="ERROR"): + errors = self.validator.validate_file_type( + self.mock_file, extension="notlisted" + ) + self.assertEqual(errors, ["File type is not permitted: notlisted"]) + + @patch("filetype.guess", Mock(return_value=None)) + def test_check_dsstore_lenient(self): + """In lenient mode, we assume these might be present in .zip files.""" + with self.assertLogs("arches.app.utils.file_validator", level="WARN"): + errors = self.validator.validate_file_type( + self.mock_file, extension="DS_Store" + ) + self.assertEqual(errors, []) + + @patch("filetype.guess", Mock(return_value=None)) + @override_settings(FILE_TYPE_CHECKING="strict") + def test_check_dsstore_strict(self): + with self.assertLogs("arches.app.utils.file_validator", level="ERROR"): + errors = self.validator.validate_file_type( + self.mock_file, extension="DS_Store" + ) + self.assertEqual(errors, ["File type is not permitted: DS_Store"]) + + @patch("filetype.guess", Mock(return_value=None)) + @patch("arches.app.utils.file_validator.load_workbook", lambda noop: None) + def test_valid_xlsx(self): + errors = self.validator.validate_file_type(self.mock_file, extension="xlsx") + self.assertEqual(errors, []) + + @patch("filetype.guess", Mock(return_value=None)) + def test_invalid_xlsx(self): + with self.assertLogs("arches.app.utils.file_validator", level="ERROR"): + errors = self.validator.validate_file_type(self.mock_file, extension="xlsx") + self.assertEqual(errors, ["Invalid xlsx workbook"]) + + def test_zip(self): + # Zip up the files in the tests/fixtures/uploadedfiles dir + # Currently, contains a single .csv file and an empty dir. + dir_to_zip = Path(settings.MEDIA_ROOT) / "uploadedfiles" + zip_file_path = shutil.make_archive("test", "zip", dir_to_zip, dir_to_zip) + self.addCleanup(os.unlink, zip_file_path) + + with open(zip_file_path, "rb") as file: + errors = self.validator.validate_file_type(file) + self.assertEqual(errors, []) + + with ( + open(zip_file_path, "rb") as file, + self.modify_settings(FILE_TYPES={"remove": "csv"}), + self.assertLogs("arches.app.utils.file_validator", level="ERROR"), + ): + errors = self.validator.validate_file_type(file) + self.assertEqual( + errors, ["File type is not permitted: csv", "Unsafe zip file contents"] + )