Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Chunk interval index #519

Draft
wants to merge 9 commits into
base: master
Choose a base branch
from
32 changes: 31 additions & 1 deletion strax/chunk.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import typing as ty

import numpy as np
import pandas as pd
import numba

import strax
Expand All @@ -17,7 +18,7 @@ class Chunk:
data_type: str
data_kind: str
dtype: np.dtype

# run_id is not superfluous to track:
# this could change during the run in superruns (in the future)
run_id: str
Expand All @@ -27,6 +28,7 @@ class Chunk:

data: np.ndarray
target_size_mb: int
_index: pd.IntervalIndex = None

def __init__(self,
*,
Expand Down Expand Up @@ -112,6 +114,12 @@ def nbytes(self):
@property
def duration(self):
return self.end - self.start

@property
def index(self):
if self._index is None:
self._index = pd.IntervalIndex.from_arrays(self.data['time'], strax.endtime(self.data))
return self._index

@property
def is_superrun(self):
Expand Down Expand Up @@ -292,6 +300,28 @@ def concatenate(cls, chunks):
data=np.concatenate([c.data for c in chunks]),
target_size_mb=max([c.target_size_mb for c in chunks]))

def overlaps(self, start,end=None):
"""
Return data that overlaps the interval (start, end]

Args:
start ([type]): interval start time or pd.Interval
end ([type], optional): interval end time. Defaults to None.

Raises:
ValueError: if end is not given and start is not an interval.

Returns:
[type]: array or overlapping data
"""

if isinstance(start, pd.Interval):
dt = start
elif end is not None:
dt = pd.Interval(start,end)
else:
raise ValueError("Must supply interval of start and end times.")
return self.data[self.index.overlaps(dt)]

@export
def continuity_check(chunk_iter):
Expand Down