From a990b8aa55d924a592cbe79ca971a8fdaaaa3ec0 Mon Sep 17 00:00:00 2001 From: Jonathan Wright Date: Wed, 21 Apr 2021 10:54:07 +0200 Subject: [PATCH 1/5] docstring fixup --- src/darkflat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/darkflat.c b/src/darkflat.c index ee2d1051..6fa63203 100644 --- a/src/darkflat.c +++ b/src/darkflat.c @@ -552,7 +552,7 @@ void reorder_u16_a32_a16(uint16_t *restrict data, uint32_t *restrict a0, /* F2PY_WRAPPER_START subroutine bgcalc(img, bg, msk, ns, nf, gain, sp, st) -!DOC bg1d computes a background on a 1d signal where gain +!DOC bgcalc computes a background on a 1d signal where gain !DOC and sp and st are defined by the code! !DOC img - source data !DOC bg - computed background From 48c1305ca986e22c2eddd9b6792fb1e5eb89c771 Mon Sep 17 00:00:00 2001 From: Jonathan Wright Date: Thu, 22 Apr 2021 09:04:38 +0200 Subject: [PATCH 2/5] fix manifest --- MANIFEST.in | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index c23e14c1..f020e800 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -5,9 +5,9 @@ include *.sh include *.txt include *.yml include LICENSE -recursive-include ImageD11src *.c -recursive-include ImageD11src *.md -recursive-include ImageD11src *.py +recursive-include ImageD11 *.c +recursive-include ImageD11 *.md +recursive-include ImageD11 *.py recursive-include data *.spline recursive-include docs *.bat recursive-include docs *.docx From 103a82d6878e1092d4c5896bfaa9cdf5235c0dfa Mon Sep 17 00:00:00 2001 From: Jonathan Wright Date: Wed, 12 May 2021 13:17:58 +0200 Subject: [PATCH 3/5] add doc --- src/darkflat.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/darkflat.c b/src/darkflat.c index 6fa63203..c6904d38 100644 --- a/src/darkflat.c +++ b/src/darkflat.c @@ -553,7 +553,11 @@ void reorder_u16_a32_a16(uint16_t *restrict data, uint32_t *restrict a0, /* F2PY_WRAPPER_START subroutine bgcalc(img, bg, msk, ns, nf, gain, sp, st) !DOC bgcalc computes a background on a 1d signal where gain -!DOC and sp and st are defined by the code! +!DOC and sp and st are defined by: +!DOC diff = difference to neighbors or bg estimate +!DOC sigmap = weight for abs background value +!DOC sigmat = constant weight +!DOC gain for b += diff * gain !DOC img - source data !DOC bg - computed background !DOC msk - mask From 3b0aac00760e5bd94d33a11ba37d956ff4491fb1 Mon Sep 17 00:00:00 2001 From: Jonathan Wright Date: Wed, 19 May 2021 11:32:11 +0200 Subject: [PATCH 4/5] updates for scipy/numpy changes --- test/demo/qtsplat.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/test/demo/qtsplat.py b/test/demo/qtsplat.py index 4322af4a..af63e718 100644 --- a/test/demo/qtsplat.py +++ b/test/demo/qtsplat.py @@ -113,8 +113,8 @@ def __init__(self, w=2000, h=1500): self.origin = (0,0,0) # origin in reciprocal space self.scale = 0.7*min(w,h) # scale for Angstrom to px self.zsort = True - self.u0 = np.eye(3, dtype=np.float ) - self.u = np.eye(3, dtype=np.float ) + self.u0 = np.eye(3, dtype=float ) + self.u = np.eye(3, dtype=float ) self.zname = "gz" self.need_redraw = True @@ -128,7 +128,10 @@ def rotate(self, dx, dy, axes="XYZ"): self.need_redraw = True r = Rotation.from_euler(axes, (dx, dy, 0), degrees=True ) - self.u = r.as_dcm() + if hasattr(r, "as_dcm"): + self.u = r.as_dcm() + else: + self.u = r.as_matrix() def resetrot(self): self.need_redraw = True From dc6b26fa025b1dc06b190f670bdcbecc3a2c394b Mon Sep 17 00:00:00 2001 From: Jonathan Wright Date: Fri, 25 Jun 2021 16:15:45 +0200 Subject: [PATCH 5/5] sparse runner --- sandbox/segmenter.py | 305 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 305 insertions(+) create mode 100644 sandbox/segmenter.py diff --git a/sandbox/segmenter.py b/sandbox/segmenter.py new file mode 100644 index 00000000..238bead8 --- /dev/null +++ b/sandbox/segmenter.py @@ -0,0 +1,305 @@ + + +""" Does segmentation from a master file to produce an _sparse.h5 with +segmented images in it """ + + +import sys +# Code to clean the 2D image and reduce it to a sparse array: +# things we might edit +class Options: + CUT = 1 # keep values abuve cut in first look at image + HOWMANY = 10000 # max pixels per frame to keep + # additional thresholds to test if you have too many pixels (tuple) + THRESHOLDS = (2, 4, 8, 16, 32, 64, 128, 256) + # Remove single pixel spots. How many 8-connected pixels needed in a spot to keep it? + PIXELS_IN_SPOT = 3 + # measurements that might be involved in scanning to transfer to output + SCANMOTORS = ("rot", "rot_cen360", "rot_center", "fpico6", "dty", "dty_center") + # scalars from scan/instrument/positioners/XXXX to transfer to output + HEADERMOTORS = ("dty", "rot", "pz", "px", "py", "shtz", "shty", "shtx") + SCANTYPES = ("fscan", "f2scan", "fscan2d") + DETECTORNAME = "eiger" + MASKFILE = "/data/id11/nanoscope/Eiger/mask_20210428.edf" + CORES = None # guess the number of cores to use + HNAME = sys.argv[1] + OUTPATH = "." + +########################## should not need to change much below here + +import time +import os +import functools +import numpy as np +import hdf5plugin +import h5py +import numba +import fabio + +# pip install ImageD11 --no-deps # if you do not have it yet: +from ImageD11 import sparseframe, cImageD11 + + + +def init(self): + # validate input + print("Calling init") + assert self.CUT >= 0 + for i in range(1, len(self.THRESHOLDS)): + assert self.THRESHOLDS[i] > self.THRESHOLDS[i - 1] + self.MASK = 1 - fabio.open(self.MASKFILE).data + print("# Opened mask", self.MASKFILE) + if self.CORES is None: + try: + self.CORES = int(os.environ["SLURM_JOB_CPUS_PER_NODE"]) - 1 + except: + self.CORES = os.cpu_count() - 1 + self.CORES = max(1, self.CORES) + print("# Aiming to use", self.CORES, "processes") + outh5 = os.path.split(self.HNAME)[-1].replace(".h5", "_sparse.h5") + self.OUTNAME = os.path.join(self.OUTPATH, outh5) + print("# Output to ", self.OUTNAME) + +Options.__init__ = init +# each subprocess gets their own options (no need if we fork?) +OPTIONS = Options() + + +# we will use multiple processes, each with 1 core +# all sub-processes better do this! +os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE" # not sure we want/need this here? +os.environ["OMP_NUM_THREADS"] = "1" +os.environ["NUMBA_NUM_THREADS"] = "1" +os.environ["MKL_NUM_THREADS"] = "1" + + +@numba.njit +def select(img, mask, row, col, val, cut): + # Choose the pixels that are > cut and put into sparse arrays + k = 0 + for s in range(img.shape[0]): + for f in range(img.shape[1]): + if img[s, f] > cut: + if mask[s, f]: # skip masked + continue + row[k] = s + col[k] = f + val[k] = img[s, f] + k += 1 + return k + + +@numba.njit +def top_pixels(nnz, row, col, val, howmany, thresholds): + """ + selects the strongest pixels from a sparse collection + - THRESHOLDS should be a sorted array of potential cutoff values to try + that are higher than the original cutoff used to select data + - howmany is the maximum number of pixels to return + """ + # quick return if there are already few enough pixels + if nnz <= howmany: + return nnz + # histogram of how many pixels are above each threshold + h = np.zeros(len(thresholds), dtype=np.uint32) + for k in range(nnz): + for i, t in enumerate(thresholds): + if val[k] > t: + h[i] += 1 + else: + break + # choose the one to use. This is the first that is lower than howmany + tcut = thresholds[-1] + for n, t in zip(h, thresholds): + if n < howmany: + tcut = t + break + # now we filter the pixels + n = 0 + for k in range(nnz): + if val[k] > tcut: + row[n] = row[k] + col[n] = col[k] + val[n] = val[k] + n += 1 + if n >= howmany: + break + return n + + +@functools.lru_cache(maxsize=1) +def get_dset(h5name, dsetname): + """This avoids to re-read the dataset many times""" + dset = h5py.File(h5name, "r")[dsetname] + return dset + + +def choose_parallel(args): + """reads a frame and sends back a sparse frame""" + h5name, address, frame_num = args + frm = get_dset(h5name, address)[frame_num] + if choose_parallel.cache is None: + # cache the mallocs on this function. Should be one per process + row = np.empty(OPTIONS.MASK.size, np.uint16) + col = np.empty(OPTIONS.MASK.size, np.uint16) + val = np.empty(OPTIONS.MASK.size, frm.dtype) + choose_parallel.cache = row, col, val + else: + row, col, val = choose_parallel.cache + nnz = select(frm, OPTIONS.MASK, row, col, val, OPTIONS.CUT) + if nnz == 0: + sf = None + else: + if nnz > OPTIONS.HOWMANY: + nnz = top_pixels(nnz, row, col, val, OPTIONS.HOWMANY, OPTIONS.THRESHOLDS) + # Now get rid of the single pixel 'peaks' + # (for the mallocs, data is copied here) + s = sparseframe.sparse_frame(row[:nnz].copy(), col[:nnz].copy(), frm.shape) + s.set_pixels("intensity", val[:nnz].copy()) + if OPTIONS.PIXELS_IN_SPOT <= 1: + sf = s + else: + # label them according to the connected objects + sparseframe.sparse_connected_pixels( + s, threshold=OPTIONS.CUT, data_name="intensity", label_name="cp" + ) + # only keep spots with more than 3 pixels ... + mom = sparseframe.sparse_moments( + s, intensity_name="intensity", labels_name="cp" + ) + npx = mom[:, cImageD11.s2D_1] + pxcounts = npx[s.pixels["cp"] - 1] + pxmsk = pxcounts >= OPTIONS.PIXELS_IN_SPOT + if pxmsk.sum() == 0: + sf = None + else: + sf = s.mask(pxmsk) + return frame_num, sf + + +choose_parallel.cache = None # cache for malloc per process + + +def segment_scans( fname, + scans, + outname, + mypool, + scanmotors=OPTIONS.SCANMOTORS, + headermotors=OPTIONS.HEADERMOTORS, + detector=OPTIONS.DETECTORNAME ): + """Does segmentation on a series of scans in hdf files: + fname : input hdf file + scans : input groups in the hdf files [h[s] for s in scans] will be treated + outname : output hdf file to put the results into + sparsify_func : function to select which pixels to keep + headermotors : things to copy from input instrument/positioners to output + scanmotors : datasets to copy from input measurement/xxx to output + """ + opts = { + "chunks": (10000,), + "maxshape": (None,), + "compression": "lzf", + "shuffle": True, + } + ndone = 0 + with h5py.File(outname, "a") as hout: + for scan in scans: + if scan.endswith(".2"): # for fscans + continue + with h5py.File(fname, "r") as hin: + hout.attrs["h5input"] = fname + gin = hin[scan] + bad = 1 + title = hin[scan]["title"][()] + print("# ", scan, title) + print("# time now", time.ctime(), "\n#", end=" ") + for scantype in OPTIONS.SCANTYPES: + if title.find(scantype) >= 0: + bad = 0 + if ("measurement" in gin) and (detector not in gin["measurement"]): + bad += 1 + if bad: + print("# SKIPPING BAD SCAN", scan) + continue + g = hout.create_group(scan) + gm = g.create_group("measurement") + for m in scanmotors: # vary : many + if m in gin["measurement"]: + gm.create_dataset(m, data=gin["measurement"][m][:]) + # else: + # the thing requested is not there - ignore the problem for now + gip = g.create_group("instrument/positioners") + for m in headermotors: # fixed : scalar + gip.create_dataset(m, data=gin["instrument/positioners"][m][()]) + try: + frms = gin["measurement"][detector] + except: + print(list(gin)) + print(list(gin["measurement"])) + print(detector) + raise + row = g.create_dataset("row", (1,), dtype=np.uint16, **opts) + col = g.create_dataset("col", (1,), dtype=np.uint16, **opts) + # can go over 65535 frames in a scan + num = g.create_dataset("frame", (1,), dtype=np.uint32, **opts) + sig = g.create_dataset("intensity", (1,), dtype=frms.dtype, **opts) + nnz = g.create_dataset("nnz", (frms.shape[0],), dtype=np.uint32) + g.attrs["itype"] = np.dtype(np.uint16).name + g.attrs["nframes"] = frms.shape[0] + g.attrs["shape0"] = frms.shape[1] + g.attrs["shape1"] = frms.shape[2] + npx = 0 + address = scan + "/measurement/" + detector + nimg = frms.shape[0] + args = [(fname, address, i) for i in range(nimg)] + # CLOSE the input file here + # approx 1440 frames, 40 cores + chunksize = max(1, len(args) // OPTIONS.CORES // 8) + for i, spf in mypool.map( + choose_parallel, args, chunksize=chunksize, timeout=60 + ): + if spf is None: + nnz[i] = 0 + continue + if i % 500 == 0: + print("%4d %d" % (i, spf.nnz), end=",") + if spf.nnz + npx > len(row): + row.resize(spf.nnz + npx, axis=0) + col.resize(spf.nnz + npx, axis=0) + sig.resize(spf.nnz + npx, axis=0) + num.resize(spf.nnz + npx, axis=0) + row[npx:] = spf.row[:] + col[npx:] = spf.col[:] + sig[npx:] = spf.pixels["intensity"] + num[npx:] = i + nnz[i] = spf.nnz + npx += spf.nnz + ndone += nimg + print("\n# Done", scan, nimg, ndone) + sys.stdout.flush() + return ndone + + # the output file should be flushed and closed when this returns + + +def main(hname, outname, mypool): + # read all the scans : just the master process + print("hname = ", repr(hname)) + print("outname = ", repr(outname)) + with h5py.File(hname, "r") as h: + scans = list(h["/"]) + print("scans = ", repr(scans)) + print("#", time.ctime()) + start = time.time() + nfrm = segment_scans(hname, scans, outname, mypool) + end = time.time() + print("#", time.ctime()) + print("# Elapsed", end - start, "/s, f.p.s %.2f" % (nfrm / (end - start))) + + + +if __name__ == "__main__": + import concurrent.futures + + with concurrent.futures.ProcessPoolExecutor(max_workers=OPTIONS.CORES) as thepool: + main(OPTIONS.HNAME, OPTIONS.OUTNAME, thepool)