From fce4d62caac3df467c8079d9dc3c9d436c17799d Mon Sep 17 00:00:00 2001 From: Brian Gow Date: Mon, 6 Jan 2025 13:56:07 -0500 Subject: [PATCH 01/17] add fsspec to rdheader --- pyproject.toml | 2 ++ wfdb/io/download.py | 14 +++++++++++--- wfdb/io/record.py | 10 +++++++--- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 09fbdad9..5faaa00e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,8 @@ dependencies = [ "soundfile >= 0.10.0", "matplotlib >= 3.2.2", "requests >= 2.8.1", + "fsspec >= 2023.10.0", + "aiohttp >= 3.11.11", ] dynamic = ["version"] diff --git a/wfdb/io/download.py b/wfdb/io/download.py index 338d8b97..667ca16e 100644 --- a/wfdb/io/download.py +++ b/wfdb/io/download.py @@ -3,6 +3,7 @@ import os import posixpath +import fsspec import numpy as np from wfdb.io import _url @@ -12,6 +13,9 @@ PN_INDEX_URL = "https://physionet.org/files/" PN_CONTENT_URL = "https://physionet.org/content/" +# Cloud protocols +CLOUD_PROTOCOLS = ["az:", "azureml:", "s3:", "gs:"] + class Config(object): """ @@ -101,11 +105,15 @@ def _stream_header(file_name: str, pn_dir: str) -> str: The text contained in the header file """ - # Full url of header location - url = posixpath.join(config.db_index_url, pn_dir, file_name) + # Full cloud url + if any(pn_dir.startswith(proto) for proto in CLOUD_PROTOCOLS): + url = posixpath.join(pn_dir, file_name) + # Full physionet database url + else: + url = posixpath.join(config.db_index_url, pn_dir, file_name) # Get the content of the remote file - with _url.openurl(url, "rb") as f: + with fsspec.open(url, "rb") as f: content = f.read() return content.decode("iso-8859-1") diff --git a/wfdb/io/record.py b/wfdb/io/record.py index 1a8855ed..8d69c64b 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -4,6 +4,7 @@ import os import re +import fsspec import numpy as np import pandas as pd @@ -1826,8 +1827,11 @@ def rdheader(record_name, pn_dir=None, rd_segments=False): dir_name, base_record_name = os.path.split(record_name) dir_name = os.path.abspath(dir_name) - # Construct the download path using the database version - if (pn_dir is not None) and ("." not in pn_dir): + # If this is a cloud path we leave it as is + if (pn_dir is not None) and any(pn_dir.startswith(proto) for proto in download.CLOUD_PROTOCOLS): + pass + # If it isn't a cloud path, construct the download path using the database version + elif (pn_dir is not None) and ("." not in pn_dir): dir_list = pn_dir.split("/") pn_dir = posixpath.join( dir_list[0], download.get_version(dir_list[0]), *dir_list[1:] @@ -1836,7 +1840,7 @@ def rdheader(record_name, pn_dir=None, rd_segments=False): # Read the local or remote header file. file_name = f"{base_record_name}.hea" if pn_dir is None: - with open( + with fsspec.open( os.path.join(dir_name, file_name), "r", encoding="ascii", From 2a116b9450579c29713466276e3c76d53dd5ba0a Mon Sep 17 00:00:00 2001 From: Brian Gow Date: Mon, 6 Jan 2025 14:26:49 -0500 Subject: [PATCH 02/17] downgrade aiohttp for python 3.8 compatibility --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5faaa00e..baac2d01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ dependencies = [ "matplotlib >= 3.2.2", "requests >= 2.8.1", "fsspec >= 2023.10.0", - "aiohttp >= 3.11.11", + "aiohttp >= 3.10.11", ] dynamic = ["version"] From d72ccd791b41eb665f7e58db26442a202a0553c8 Mon Sep 17 00:00:00 2001 From: Brian Gow Date: Mon, 6 Jan 2025 14:58:37 -0500 Subject: [PATCH 03/17] add fsspec to run-tests --- .github/workflows/run-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index dc0d322e..9f00a18f 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -49,7 +49,7 @@ jobs: python3-soundfile \ python3-pytest \ git - + pip install fsspec>=2023.10.0 # Note: "actions/checkout@v2" requires libstdc++6:amd64 to be # installed in the container. To keep things simple, use # "actions/checkout@v1" instead. From 8930f1c2f0b7429968cf915603c050f24ec460a3 Mon Sep 17 00:00:00 2001 From: Brian Gow Date: Mon, 6 Jan 2025 15:02:08 -0500 Subject: [PATCH 04/17] reformat for compatibility with black --- wfdb/io/record.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/wfdb/io/record.py b/wfdb/io/record.py index 8d69c64b..33881aa2 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -1828,7 +1828,9 @@ def rdheader(record_name, pn_dir=None, rd_segments=False): dir_name = os.path.abspath(dir_name) # If this is a cloud path we leave it as is - if (pn_dir is not None) and any(pn_dir.startswith(proto) for proto in download.CLOUD_PROTOCOLS): + if (pn_dir is not None) and any( + pn_dir.startswith(proto) for proto in download.CLOUD_PROTOCOLS + ): pass # If it isn't a cloud path, construct the download path using the database version elif (pn_dir is not None) and ("." not in pn_dir): From de1483c86639d0a18c0229aabb5250b7466e9e3f Mon Sep 17 00:00:00 2001 From: Brian Gow Date: Mon, 6 Jan 2025 15:16:59 -0500 Subject: [PATCH 05/17] install pip before calling it during run-tests --- .github/workflows/run-tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 9f00a18f..4515bb3e 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -44,6 +44,7 @@ jobs: python3-matplotlib \ python3-numpy \ python3-pandas \ + python3-pip \ python3-requests \ python3-scipy \ python3-soundfile \ From 3794f9293543a360912955da6b6ac7b848906f86 Mon Sep 17 00:00:00 2001 From: Brian Gow Date: Thu, 9 Jan 2025 11:34:12 -0500 Subject: [PATCH 06/17] update tests to run on debian 11 --- .github/workflows/run-tests.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 4515bb3e..2d316672 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -33,14 +33,15 @@ jobs: - name: Check source code format run: uv run --extra dev black --check --diff . - test-deb10-i386: + test-deb11-i386: runs-on: ubuntu-latest - container: i386/debian:10 + container: i386/debian:11 steps: - name: Install dependencies run: | apt-get update apt-get install -y --no-install-recommends \ + python3-fsspec \ python3-matplotlib \ python3-numpy \ python3-pandas \ @@ -49,8 +50,7 @@ jobs: python3-scipy \ python3-soundfile \ python3-pytest \ - git - pip install fsspec>=2023.10.0 + # Note: "actions/checkout@v2" requires libstdc++6:amd64 to be # installed in the container. To keep things simple, use # "actions/checkout@v1" instead. From dfb78181d1ea30daeebdfc38a253a72122b6d653 Mon Sep 17 00:00:00 2001 From: Brian Gow Date: Thu, 9 Jan 2025 11:35:02 -0500 Subject: [PATCH 07/17] dont use fsspec for pn_dir files --- wfdb/io/download.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/wfdb/io/download.py b/wfdb/io/download.py index 667ca16e..bb54f48b 100644 --- a/wfdb/io/download.py +++ b/wfdb/io/download.py @@ -105,15 +105,11 @@ def _stream_header(file_name: str, pn_dir: str) -> str: The text contained in the header file """ - # Full cloud url - if any(pn_dir.startswith(proto) for proto in CLOUD_PROTOCOLS): - url = posixpath.join(pn_dir, file_name) - # Full physionet database url - else: - url = posixpath.join(config.db_index_url, pn_dir, file_name) + # Full url of header location + url = posixpath.join(config.db_index_url, pn_dir, file_name) # Get the content of the remote file - with fsspec.open(url, "rb") as f: + with _url.openurl(url, "rb") as f: content = f.read() return content.decode("iso-8859-1") From ccd03cca0e2e40f683ec3746f24f55a745e971d1 Mon Sep 17 00:00:00 2001 From: Brian Gow Date: Thu, 9 Jan 2025 11:41:30 -0500 Subject: [PATCH 08/17] move cloud_protocols definition --- wfdb/io/download.py | 4 ---- wfdb/io/record.py | 4 +++- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/wfdb/io/download.py b/wfdb/io/download.py index bb54f48b..338d8b97 100644 --- a/wfdb/io/download.py +++ b/wfdb/io/download.py @@ -3,7 +3,6 @@ import os import posixpath -import fsspec import numpy as np from wfdb.io import _url @@ -13,9 +12,6 @@ PN_INDEX_URL = "https://physionet.org/files/" PN_CONTENT_URL = "https://physionet.org/content/" -# Cloud protocols -CLOUD_PROTOCOLS = ["az:", "azureml:", "s3:", "gs:"] - class Config(object): """ diff --git a/wfdb/io/record.py b/wfdb/io/record.py index 33881aa2..b185b357 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -156,6 +156,8 @@ "vtip": "mV", } +# Cloud protocols +CLOUD_PROTOCOLS = ["az:", "azureml:", "s3:", "gs:"] class BaseRecord(object): """ @@ -1829,7 +1831,7 @@ def rdheader(record_name, pn_dir=None, rd_segments=False): # If this is a cloud path we leave it as is if (pn_dir is not None) and any( - pn_dir.startswith(proto) for proto in download.CLOUD_PROTOCOLS + pn_dir.startswith(proto) for proto in CLOUD_PROTOCOLS ): pass # If it isn't a cloud path, construct the download path using the database version From 91c20b2ffd1fbc03925fbecc5178b74a392ca69f Mon Sep 17 00:00:00 2001 From: Brian Gow Date: Thu, 9 Jan 2025 11:50:12 -0500 Subject: [PATCH 09/17] reformat per black --- wfdb/io/record.py | 1 + 1 file changed, 1 insertion(+) diff --git a/wfdb/io/record.py b/wfdb/io/record.py index b185b357..83886023 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -159,6 +159,7 @@ # Cloud protocols CLOUD_PROTOCOLS = ["az:", "azureml:", "s3:", "gs:"] + class BaseRecord(object): """ The base WFDB class extended by the Record and MultiRecord classes. From b3e0bd75a99b84d86507d89a03a6005a150e8ecf Mon Sep 17 00:00:00 2001 From: Brian Gow Date: Thu, 9 Jan 2025 15:14:26 -0500 Subject: [PATCH 10/17] dont use local path separator for uri --- wfdb/io/record.py | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/wfdb/io/record.py b/wfdb/io/record.py index 83886023..66ba726f 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -1829,22 +1829,28 @@ def rdheader(record_name, pn_dir=None, rd_segments=False): """ dir_name, base_record_name = os.path.split(record_name) dir_name = os.path.abspath(dir_name) + file_name = f"{base_record_name}.hea" + + # If this is a cloud path, use posixpath to construct the path + if any(dir_name.startswith(proto) for proto in CLOUD_PROTOCOLS): + with fsspec.open( + posixpath.join(dir_name, file_name), + mode="rb" + ) as f: + header_content = f.read() - # If this is a cloud path we leave it as is - if (pn_dir is not None) and any( - pn_dir.startswith(proto) for proto in CLOUD_PROTOCOLS - ): - pass # If it isn't a cloud path, construct the download path using the database version - elif (pn_dir is not None) and ("." not in pn_dir): - dir_list = pn_dir.split("/") - pn_dir = posixpath.join( - dir_list[0], download.get_version(dir_list[0]), *dir_list[1:] - ) + elif (pn_dir is not None): + if ("." not in pn_dir): + dir_list = pn_dir.split("/") + pn_dir = posixpath.join( + dir_list[0], download.get_version(dir_list[0]), *dir_list[1:] + ) - # Read the local or remote header file. - file_name = f"{base_record_name}.hea" - if pn_dir is None: + header_content = download._stream_header(file_name, pn_dir) + + # If it isn't a cloud path or a PhysioNet path, we treat as a local file + else: with fsspec.open( os.path.join(dir_name, file_name), "r", @@ -1852,8 +1858,6 @@ def rdheader(record_name, pn_dir=None, rd_segments=False): errors="ignore", ) as f: header_content = f.read() - else: - header_content = download._stream_header(file_name, pn_dir) # Separate comment and non-comment lines header_lines, comment_lines = header.parse_header_content(header_content) From fa27e34e6f8c849eb1a9a9cd6dfbc484fce0c68d Mon Sep 17 00:00:00 2001 From: Brian Gow Date: Thu, 9 Jan 2025 15:51:06 -0500 Subject: [PATCH 11/17] only call abspath for local files --- wfdb/io/record.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/wfdb/io/record.py b/wfdb/io/record.py index 66ba726f..526df830 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -1828,20 +1828,16 @@ def rdheader(record_name, pn_dir=None, rd_segments=False): """ dir_name, base_record_name = os.path.split(record_name) - dir_name = os.path.abspath(dir_name) file_name = f"{base_record_name}.hea" # If this is a cloud path, use posixpath to construct the path if any(dir_name.startswith(proto) for proto in CLOUD_PROTOCOLS): - with fsspec.open( - posixpath.join(dir_name, file_name), - mode="rb" - ) as f: + with fsspec.open(posixpath.join(dir_name, file_name), mode="rb") as f: header_content = f.read() # If it isn't a cloud path, construct the download path using the database version - elif (pn_dir is not None): - if ("." not in pn_dir): + elif pn_dir is not None: + if "." not in pn_dir: dir_list = pn_dir.split("/") pn_dir = posixpath.join( dir_list[0], download.get_version(dir_list[0]), *dir_list[1:] @@ -1851,6 +1847,7 @@ def rdheader(record_name, pn_dir=None, rd_segments=False): # If it isn't a cloud path or a PhysioNet path, we treat as a local file else: + dir_name = os.path.abspath(dir_name) with fsspec.open( os.path.join(dir_name, file_name), "r", From 489dcc497765642656774f578ff680ce1df3b32d Mon Sep 17 00:00:00 2001 From: Brian Gow Date: Fri, 17 Jan 2025 15:24:35 -0500 Subject: [PATCH 12/17] use correct read mode --- wfdb/io/record.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wfdb/io/record.py b/wfdb/io/record.py index 526df830..06dc6faf 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -1832,7 +1832,7 @@ def rdheader(record_name, pn_dir=None, rd_segments=False): # If this is a cloud path, use posixpath to construct the path if any(dir_name.startswith(proto) for proto in CLOUD_PROTOCOLS): - with fsspec.open(posixpath.join(dir_name, file_name), mode="rb") as f: + with fsspec.open(posixpath.join(dir_name, file_name), mode="r") as f: header_content = f.read() # If it isn't a cloud path, construct the download path using the database version From 6e2b45576e42a7d19f7199aa2e0743a9db78d52e Mon Sep 17 00:00:00 2001 From: Brian Gow Date: Fri, 17 Jan 2025 15:25:39 -0500 Subject: [PATCH 13/17] use double slash for cloud protocol urls --- wfdb/io/record.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wfdb/io/record.py b/wfdb/io/record.py index 06dc6faf..688001bc 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -157,7 +157,7 @@ } # Cloud protocols -CLOUD_PROTOCOLS = ["az:", "azureml:", "s3:", "gs:"] +CLOUD_PROTOCOLS = ["az://", "azureml://", "s3://", "gs://"] class BaseRecord(object): From 1b6f57b1a58fbb452f0713feac9fe36dd13f91fe Mon Sep 17 00:00:00 2001 From: Brian Gow Date: Wed, 22 Jan 2025 11:22:06 -0500 Subject: [PATCH 14/17] add fsspec to rdrecord --- wfdb/io/_coreio.py | 9 ++++++--- wfdb/io/_signal.py | 33 +++++++++++++++++++++++---------- wfdb/io/record.py | 4 ++-- 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/wfdb/io/_coreio.py b/wfdb/io/_coreio.py index 9b3a7876..0a11cf1f 100644 --- a/wfdb/io/_coreio.py +++ b/wfdb/io/_coreio.py @@ -1,5 +1,7 @@ import posixpath +import fsspec + from wfdb.io import _url from wfdb.io.download import config @@ -28,8 +30,9 @@ def _open_file( The PhysioNet database directory where the file is stored, or None if file_name is a local path. file_name : str - The name of the file, either as a local filesystem path (if - `pn_dir` is None) or a URL path (if `pn_dir` is a string.) + The name of the file, either as a local filesystem path or cloud + URL (if `pn_dir` is None) or a PhysioNet URL path + (if `pn_dir` is a string.) mode : str, optional The standard I/O mode for the file ("r" by default). If `pn_dir` is not None, this must be "r", "rt", or "rb". @@ -47,7 +50,7 @@ def _open_file( """ if pn_dir is None: - return open( + return fsspec.open( file_name, mode, buffering=buffering, diff --git a/wfdb/io/_signal.py b/wfdb/io/_signal.py index 693c6a19..7f58e141 100644 --- a/wfdb/io/_signal.py +++ b/wfdb/io/_signal.py @@ -1,7 +1,9 @@ import math import os +import posixpath import sys +import fsspec import numpy as np from wfdb.io import download, _coreio, util @@ -1643,10 +1645,10 @@ def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp): The name of the dat file. dir_name : str The full directory where the dat file(s) are located, if the dat - file(s) are local. + file(s) are local or in the cloud. pn_dir : str The PhysioNet directory where the dat file(s) are located, if - the dat file(s) are remote. + the dat file(s) are on a PhysioNet server. fmt : str The format of the dat file. start_byte : int @@ -1688,7 +1690,7 @@ def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp): # Local dat file if pn_dir is None: - with open(os.path.join(dir_name, file_name), "rb") as fp: + with fsspec.open(os.path.join(dir_name, file_name), "rb") as fp: fp.seek(start_byte) sig_data = np.fromfile( fp, dtype=np.dtype(DATA_LOAD_TYPES[fmt]), count=element_count @@ -1840,8 +1842,9 @@ def _rd_compressed_file( file_name : str The name of the signal file. dir_name : str - The full directory where the signal file is located, if local. - This argument is ignored if `pn_dir` is not None. + The full directory where the signal file is located, if this + is a local or cloud path. This argument is ignored if `pn_dir` + is not None. pn_dir : str or None The PhysioNet database directory where the signal file is located. fmt : str @@ -2585,10 +2588,10 @@ def _infer_sig_len( The byte offset of the dat file. None is equivalent to zero. dir_name : str The full directory where the dat file(s) are located, if the dat - file(s) are local. + file(s) are local or on the cloud. pn_dir : str, optional The PhysioNet directory where the dat file(s) are located, if - the dat file(s) are remote. + the dat file(s) are on a PhysioNet server. Returns ------- @@ -2600,13 +2603,23 @@ def _infer_sig_len( sig_len * tsamps_per_frame * bytes_per_sample == file_size """ - if pn_dir is None: - file_size = os.path.getsize(os.path.join(dir_name, file_name)) - else: + from wfdb.io.record import CLOUD_PROTOCOLS + + # If this is a cloud path, use posixpath to construct the path and fsspec to open file + if any(dir_name.startswith(proto) for proto in CLOUD_PROTOCOLS): + with fsspec.open(posixpath.join(dir_name, file_name), mode="rb") as f: + file_size = f.seek(0, os.SEEK_END) + + # If the PhysioNet database path is provided, construct the download path using the database version + elif pn_dir is not None: file_size = download._remote_file_size( file_name=file_name, pn_dir=pn_dir ) + # If it isn't a cloud path or a PhysioNet path, we treat as a local file + else: + file_size = os.path.getsize(os.path.join(dir_name, file_name)) + if byte_offset is None: byte_offset = 0 data_size = file_size - byte_offset diff --git a/wfdb/io/record.py b/wfdb/io/record.py index 688001bc..c18bc149 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -1830,12 +1830,12 @@ def rdheader(record_name, pn_dir=None, rd_segments=False): dir_name, base_record_name = os.path.split(record_name) file_name = f"{base_record_name}.hea" - # If this is a cloud path, use posixpath to construct the path + # If this is a cloud path, use posixpath to construct the path and fsspec to open file if any(dir_name.startswith(proto) for proto in CLOUD_PROTOCOLS): with fsspec.open(posixpath.join(dir_name, file_name), mode="r") as f: header_content = f.read() - # If it isn't a cloud path, construct the download path using the database version + # If the PhysioNet database path is provided, construct the download path using the database version elif pn_dir is not None: if "." not in pn_dir: dir_list = pn_dir.split("/") From de65d8b80fbf1f9c9bcdaac8a5719576d0b83830 Mon Sep 17 00:00:00 2001 From: Brian Gow Date: Wed, 22 Jan 2025 14:55:42 -0500 Subject: [PATCH 15/17] dont call abspath when opening cloud path --- wfdb/io/record.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/wfdb/io/record.py b/wfdb/io/record.py index c18bc149..2bb141e4 100644 --- a/wfdb/io/record.py +++ b/wfdb/io/record.py @@ -2027,7 +2027,9 @@ def rdrecord( """ dir_name, base_record_name = os.path.split(record_name) - dir_name = os.path.abspath(dir_name) + # Update the dir_name using abspath unless it is a cloud path + if not any(dir_name.startswith(proto) for proto in CLOUD_PROTOCOLS): + dir_name = os.path.abspath(dir_name) # Read the header fields if (pn_dir is not None) and ("." not in pn_dir): From e180259dc2c45ae991da355ca9b9a0b007b8d93b Mon Sep 17 00:00:00 2001 From: Brian Gow Date: Fri, 24 Jan 2025 14:07:22 -0500 Subject: [PATCH 16/17] add alternative to numpy fromfile for fsspec --- wfdb/io/_signal.py | 7 ++++--- wfdb/io/util.py | 27 +++++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/wfdb/io/_signal.py b/wfdb/io/_signal.py index 7f58e141..da3c611d 100644 --- a/wfdb/io/_signal.py +++ b/wfdb/io/_signal.py @@ -1688,14 +1688,15 @@ def _rd_dat_file(file_name, dir_name, pn_dir, fmt, start_byte, n_samp): element_count = n_samp byte_count = n_samp * BYTES_PER_SAMPLE[fmt] - # Local dat file + # Local or cloud dat file if pn_dir is None: with fsspec.open(os.path.join(dir_name, file_name), "rb") as fp: fp.seek(start_byte) - sig_data = np.fromfile( + sig_data = util.fromfile( fp, dtype=np.dtype(DATA_LOAD_TYPES[fmt]), count=element_count ) - # Stream dat file from Physionet + + # Stream dat file from PhysioNet else: dtype_in = np.dtype(DATA_LOAD_TYPES[fmt]) sig_data = download._stream_dat( diff --git a/wfdb/io/util.py b/wfdb/io/util.py index 07b06dcc..0ad99920 100644 --- a/wfdb/io/util.py +++ b/wfdb/io/util.py @@ -2,9 +2,12 @@ A module for general utility functions """ +import io import math import os +import numpy as np + from typing import List, Sequence, Tuple @@ -121,3 +124,27 @@ def overlapping_ranges( for second in ranges_2 if max(first[0], second[0]) < min(first[1], second[1]) ] + + +def fromfile(fileobj, dtype, count=-1): + """ + Detect if the object will work with numpy.fromfile - if so, use it. If not, read the object into a numpy array and + calculate the number of elements (if not provided) - this is needed for fsspec objects. + """ + if isinstance(fileobj, io.FileIO) or ( + isinstance(fileobj, (io.BufferedReader, io.BufferedRandom)) + and isinstance(fileobj.raw, io.FileIO) + ): + return np.fromfile(fileobj, dtype=dtype, count=count) + else: + dtype = np.dtype(dtype) + if count < 0: + start = fileobj.tell() + fileobj.seek(0, os.SEEK_END) + end = fileobj.tell() + fileobj.seek(start, os.SEEK_SET) + count = (end - start) // dtype.itemsize + array = np.empty(count, dtype) + size = fileobj.readinto(array) + array.resize(size // dtype.itemsize) + return array From 9c9fffee025a3d3acce98ea8a868a238303f0407 Mon Sep 17 00:00:00 2001 From: Brian Gow Date: Fri, 31 Jan 2025 16:46:38 -0500 Subject: [PATCH 17/17] use unit8 for reading fsspec object size --- wfdb/io/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wfdb/io/util.py b/wfdb/io/util.py index 0ad99920..db998d03 100644 --- a/wfdb/io/util.py +++ b/wfdb/io/util.py @@ -145,6 +145,6 @@ def fromfile(fileobj, dtype, count=-1): fileobj.seek(start, os.SEEK_SET) count = (end - start) // dtype.itemsize array = np.empty(count, dtype) - size = fileobj.readinto(array) + size = fileobj.readinto(array.view(np.uint8)) array.resize(size // dtype.itemsize) return array