diff --git a/src/mdio/segy/utilities.py b/src/mdio/segy/utilities.py index f6c45596..9b953e19 100644 --- a/src/mdio/segy/utilities.py +++ b/src/mdio/segy/utilities.py @@ -1,8 +1,7 @@ """More utilities for reading SEG-Ys.""" - - from __future__ import annotations +import logging from enum import Enum from typing import Sequence @@ -16,6 +15,9 @@ from mdio.segy.parsers import parse_trace_headers +logger = logging.getLogger(__name__) + + class GeometryTemplateType(Enum): """Geometry template types as enum.""" @@ -107,16 +109,26 @@ def get_grid_plan( # noqa: C901 chan_idx = index_names.index("channel") if "AutoChannelTraceQC" in grid_overrides: trace_qc_count = int(grid_overrides["AutoChannelTraceQC"]) - unique_cables, cable_chan_min, cable_chan_max, geom_type = qc_index_headers( + unique_cables, cable_chan_min, _cable_chan_max, geom_type = qc_index_headers( index_headers, index_names, trace_qc_count ) + logger.info(f"Ingesting dataset as {geom_type.name}") + # TODO: Add strict=True and remove noqa when min Python is 3.10 + for cable, chan_min, chan_max in zip( # noqa: B905 + unique_cables, cable_chan_min, _cable_chan_max + ): + logger.info( + f"Cable: {cable} has min chan: {chan_min} and max chan: {chan_max}" + ) + # This might be slow and potentially could be improved with a rewrite # to prevent so many lookups if geom_type == GeometryTemplateType.STREAMER_B: for idx, cable in enumerate(unique_cables): cable_idxs = np.where(index_headers[:, cable_idx] == cable) cc_min = cable_chan_min[idx] + # print(f"idx = {idx} cable = {cable} cc_min={cc_min}") index_headers[cable_idxs, chan_idx] = ( index_headers[cable_idxs, chan_idx] - cc_min + 1 ) @@ -202,13 +214,13 @@ def qc_index_headers( geom_type = GeometryTemplateType.STREAMER_B for idx, cable in enumerate(unique_cables): min_val = cable_chan_min[idx] - max_val = cable_chan_max[cable_idx] + max_val = cable_chan_max[idx] for idx2, cable2 in enumerate(unique_cables): - if cable2 == cable: - # Don't compare with itself - pass - - if cable_chan_min[idx2] < max_val and cable_chan_max[idx2] > min_val: + if ( + cable_chan_min[idx2] < max_val + and cable_chan_max[idx2] > min_val + and (cable2 != cable) + ): geom_type = GeometryTemplateType.STREAMER_A # Return cable_chan_min values diff --git a/tests/integration/test_segy_import_export.py b/tests/integration/test_segy_import_export.py index 9d6b0525..53771f98 100644 --- a/tests/integration/test_segy_import_export.py +++ b/tests/integration/test_segy_import_export.py @@ -2,6 +2,7 @@ import os from os.path import getsize +from typing import List import dask import numpy as np @@ -18,20 +19,24 @@ dask.config.set(scheduler="synchronous") -def create_4d_segy(file_path, chan_header_type="a", **args): +def create_4d_segy( + file_path: str, + num_samples: int, + fldrs: List, + cables: List, + num_traces: List, + chan_header_type: str = "a", + **args, +): """Dummy 4D segy file for use in tests.""" import segyio - fldrs = [2, 3, 5] - cables = [0, 101, 201, 301] - num_traces = [1, 5, 7, 5] - spec = segyio.spec() d = os.path.join(file_path, "data") os.makedirs(d, exist_ok=True) segy_file = os.path.join(d, f"4d_type_{chan_header_type}.sgy") spec.format = 1 - spec.samples = range(25) + spec.samples = range(num_samples) trace_count = len(fldrs) * np.sum(num_traces) spec.tracecount = trace_count @@ -43,18 +48,18 @@ def create_4d_segy(file_path, chan_header_type="a", **args): tracf = 0 for fldr in fldrs: if chan_header_type == "b": - tracf = 0 + tracf = 1 # TODO: Add strict=True and remove noqa when min supported Python is 3.10 for cable, length in zip(cables, num_traces): # noqa: B905 if chan_header_type == "a": - tracf = 0 + tracf = 1 for _i in range(length): # segyio names and byte locations for headers can be found at: - # https://segyio.readthedocs.io/en/latest/segyio.html - # fldr is byte location 9 - shot - # ep is byte location 17 - shot - # stae is byte location 137 - cable - # tracf is byte location 13 - channel + # https://segyio.readthedocs.io/en/latest/segyio.html + # fldr is byte location 9 - shot 4 byte + # ep is byte location 17 - shot 4 byte + # stae is byte location 137 - cable 2 byte + # tracf is byte location 13 - channel 4 byte f.header[trno].update( offset=1, @@ -77,9 +82,10 @@ def create_4d_segy(file_path, chan_header_type="a", **args): @pytest.mark.parametrize("header_locations", [(17, 137, 13)]) @pytest.mark.parametrize("header_names", [("shot", "cable", "channel")]) +@pytest.mark.parametrize("header_lengths", [(4, 2, 4)]) @pytest.mark.parametrize("endian", ["big"]) @pytest.mark.parametrize( - "grid_overrides", [{"AutoChannelWrap": True, "AutoChannelTraceQC": 1000000}] + "grid_overrides", [{"AutoChannelWrap": True, "AutoChannelTraceQC": 100000}, None] ) @pytest.mark.parametrize("chan_header_type", ["a", "b"]) class TestImport4D: @@ -91,24 +97,62 @@ def test_import_4d_segy( zarr_tmp, header_locations, header_names, + header_lengths, endian, grid_overrides, chan_header_type, ): """Test importing a SEG-Y file to MDIO.""" - segy_path = create_4d_segy(tmp_path, chan_header_type=chan_header_type) + num_samples = 25 + fldrs = [2, 3, 5] + cables = [0, 101, 201, 301] + num_traces = [1, 5, 7, 5] + segy_path = create_4d_segy( + tmp_path, + num_samples=num_samples, + fldrs=fldrs, + cables=cables, + num_traces=num_traces, + chan_header_type=chan_header_type, + ) segy_to_mdio( segy_path=segy_path, mdio_path_or_buffer=zarr_tmp.__str__(), index_bytes=header_locations, index_names=header_names, + index_lengths=header_lengths, chunksize=(8, 2, 128, 1024), overwrite=True, endian=endian, grid_overrides=grid_overrides, ) + # QC mdio output + mdio = MDIOReader(zarr_tmp.__str__(), access_pattern="0123") + assert mdio.binary_header["Samples"] == num_samples + grid = mdio.grid + + print(f"chan_header_type = {chan_header_type}") + print(f"grid_overrides = {grid_overrides}") + print(f"grid.select_dim(header_names[0]) = {grid.select_dim(header_names[0])}") + print(f"grid.select_dim(header_names[1]) = {grid.select_dim(header_names[1])}") + print(f"grid.select_dim(header_names[2]) = {grid.select_dim(header_names[2])}") + assert grid.select_dim(header_names[0]) == Dimension(fldrs, header_names[0]) + assert grid.select_dim(header_names[1]) == Dimension(cables, header_names[1]) + + if "b" in chan_header_type and grid_overrides is None: + assert grid.select_dim(header_names[2]) == Dimension( + range(1, np.sum(num_traces) + 1), header_names[2] + ) + else: + assert grid.select_dim(header_names[2]) == Dimension( + range(1, np.amax(num_traces) + 1), header_names[2] + ) + assert grid.select_dim("sample") == Dimension( + range(0, num_samples, 1), "sample" + ) + @pytest.mark.parametrize("header_locations", [(17, 13)]) @pytest.mark.parametrize("header_names", [("inline", "crossline")])