diff --git a/lib/galaxy/datatypes/sequence.py b/lib/galaxy/datatypes/sequence.py index 911a1a2db31a..7a721d8df465 100644 --- a/lib/galaxy/datatypes/sequence.py +++ b/lib/galaxy/datatypes/sequence.py @@ -41,6 +41,7 @@ get_headers, iter_headers, ) +from galaxy.exceptions import InvalidFileFormatError from galaxy.util import ( compression_utils, nice_size, @@ -775,15 +776,17 @@ def display_data( headers = kwd.get("headers", {}) if preview: with compression_utils.get_fileobj(dataset.get_file_name()) as fh: - max_peek_size = 1000000 # 1 MB - if os.stat(dataset.get_file_name()).st_size < max_peek_size: + max_peek_size = 100000 + try: + chunk = fh.read(max_peek_size + 1) + except UnicodeDecodeError: + raise InvalidFileFormatError("Dataset appears to contain binary data, cannot display.") + if len(chunk) <= max_peek_size: mime = "text/plain" self._clean_and_set_mime_type(trans, mime, headers) - return fh.read(), headers + return chunk[:-1], headers return ( - trans.fill_template_mako( - "/dataset/large_file.mako", truncated_data=fh.read(max_peek_size), data=dataset - ), + trans.fill_template_mako("/dataset/large_file.mako", truncated_data=chunk[:-1], data=dataset), headers, ) else: diff --git a/lib/galaxy/datatypes/tabular.py b/lib/galaxy/datatypes/tabular.py index 8bfde1202c31..fab0b5893334 100644 --- a/lib/galaxy/datatypes/tabular.py +++ b/lib/galaxy/datatypes/tabular.py @@ -65,6 +65,7 @@ iter_headers, validate_tabular, ) +from galaxy.exceptions import InvalidFileFormatError from galaxy.util import compression_utils from galaxy.util.compression_utils import ( FileObjType, @@ -156,12 +157,15 @@ def get_chunk(self, trans, dataset: HasFileName, offset: int = 0, ck_size: Optio def _read_chunk(self, trans, dataset: HasFileName, offset: int, ck_size: Optional[int] = None): with compression_utils.get_fileobj(dataset.get_file_name()) as f: f.seek(offset) - ck_data = f.read(ck_size or trans.app.config.display_chunk_size) - if ck_data and ck_data[-1] != "\n": - cursor = f.read(1) - while cursor and cursor != "\n": - ck_data += cursor + try: + ck_data = f.read(ck_size or trans.app.config.display_chunk_size) + if ck_data and ck_data[-1] != "\n": cursor = f.read(1) + while cursor and cursor != "\n": + ck_data += cursor + cursor = f.read(1) + except UnicodeDecodeError: + raise InvalidFileFormatError("Dataset appears to contain binary data, cannot display.") last_read = f.tell() return ck_data, last_read