From 23728da59cf8e43e9aa84c70f3a23e9d142c5906 Mon Sep 17 00:00:00 2001 From: Yuankailiu Date: Sat, 5 Sep 2020 11:03:28 -0700 Subject: [PATCH 1/6] To enhance the memory efficiency in calculating ramp, deramp, and time-series rms. --- mintpy/objects/ramp.py | 2 +- mintpy/objects/stack.py | 4 +++- mintpy/utils/utils1.py | 9 ++++++--- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/mintpy/objects/ramp.py b/mintpy/objects/ramp.py index dce861421..fc91f02f9 100644 --- a/mintpy/objects/ramp.py +++ b/mintpy/objects/ramp.py @@ -75,6 +75,7 @@ def deramp(data, mask_in, ramp_type='linear', metadata=None): # estimate ramp X = np.dot(np.linalg.pinv(G[mask, :], rcond=1e-15), data[mask, :]) ramp = np.dot(G, X) + ramp = np.array(ramp, dtype=data.dtype) del X # reference in space if metadata @@ -85,7 +86,6 @@ def deramp(data, mask_in, ramp_type='linear', metadata=None): # do not change pixel with original zero value ramp[data == 0] = 0 - ramp = np.array(ramp, dtype=data.dtype) data_out = data - ramp if len(dshape) == 3: diff --git a/mintpy/objects/stack.py b/mintpy/objects/stack.py index 3f7a92ab8..b9e1fb531 100644 --- a/mintpy/objects/stack.py +++ b/mintpy/objects/stack.py @@ -404,7 +404,9 @@ def timeseries_rms(self, maskFile=None, outFile=None): print('read mask from file: '+maskFile) mask = singleDataset(maskFile).read() data[:, mask == 0] = np.nan - self.rms = np.sqrt(np.nanmean(np.square(data), axis=(1, 2))) + self.rms = np.zeros(data.shape[0]) * np.nan + for i in range(data.shape[0]): + self.rms[i] = np.sqrt(np.nanmean(np.square(data[i,:]), axis=(0, 1))) # Write text file header = 'Root Mean Square in space for each acquisition of time-series\n' diff --git a/mintpy/utils/utils1.py b/mintpy/utils/utils1.py index 477a81b75..cacb68e01 100644 --- a/mintpy/utils/utils1.py +++ b/mintpy/utils/utils1.py @@ -719,7 +719,8 @@ def run_deramp(fname, ramp_type, mask_file=None, out_file=None, datasetName=None print('reading data ...') data = readfile.read(fname)[0] print('estimating phase ramp ...') - data = deramp(data, mask, ramp_type=ramp_type, metadata=atr)[0] + for i in range(data.shape[0]): + data[i,:] = deramp(data[i,:], mask, ramp_type=ramp_type, metadata=atr)[0] writefile.write(data, out_file, ref_file=fname) elif k == 'ifgramStack': @@ -741,7 +742,8 @@ def run_deramp(fname, ramp_type, mask_file=None, out_file=None, datasetName=None prog_bar = ptime.progressBar(maxValue=obj.numIfgram) for i in range(obj.numIfgram): data = ds[i, :, :] - data = deramp(data, mask, ramp_type=ramp_type, metadata=atr)[0] + for j in range(data.shape[0]): + data[j,:] = deramp(data[j,:], mask, ramp_type=ramp_type, metadata=atr)[0] dsOut[i, :, :] = data prog_bar.update(i+1, suffix='{}/{}'.format(i+1, obj.numIfgram)) prog_bar.close() @@ -750,7 +752,8 @@ def run_deramp(fname, ramp_type, mask_file=None, out_file=None, datasetName=None # Single Dataset File else: data = readfile.read(fname)[0] - data = deramp(data, mask, ramp_type, metadata=atr)[0] + for i in range(data.shape[0]): + data[i,:] = deramp(data[i,:], mask, ramp_type=ramp_type, metadata=atr)[0] print('writing >>> {}'.format(out_file)) writefile.write(data, out_file=out_file, ref_file=fname) From 3a1e85fb9982e350f751852c6edaafa5283ab970 Mon Sep 17 00:00:00 2001 From: Yuankailiu Date: Sat, 5 Sep 2020 11:38:28 -0700 Subject: [PATCH 2/6] Fixed a syntax error in readfile.py at line 799 Before: is not "none" Now: != "none" --- mintpy/utils/readfile.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mintpy/utils/readfile.py b/mintpy/utils/readfile.py index 677b8403a..963dfd099 100644 --- a/mintpy/utils/readfile.py +++ b/mintpy/utils/readfile.py @@ -796,7 +796,7 @@ def get_hdf5_dataset(name, obj): 'cfloat': 'complex64', } data_type = atr.get('DATA_TYPE', 'none').lower() - if data_type is not 'none' and data_type in dataTypeDict.keys(): + if data_type != 'none' and data_type in dataTypeDict.keys(): atr['DATA_TYPE'] = dataTypeDict[data_type] # UNIT From cda73c20d88590b14afe659226630b4250eb8eb8 Mon Sep 17 00:00:00 2001 From: Yuan-Kai Liu <55262505+liuykai@users.noreply.github.com> Date: Thu, 10 Sep 2020 11:47:55 -0700 Subject: [PATCH 3/6] Memory-saving deramp and RMS - Read the time-series dataset one date at a time. - Then do the deramp and RMS calculation and save to files. --- mintpy/ifgram_inversion.py | 3 +-- mintpy/objects/stack.py | 23 ++++++++++++++++------ mintpy/utils/utils1.py | 40 +++++++++++++++++++++++++++++++++----- mintpy/utils/writefile.py | 15 +++++++------- 4 files changed, 61 insertions(+), 20 deletions(-) diff --git a/mintpy/ifgram_inversion.py b/mintpy/ifgram_inversion.py index db7d912a6..f0c64a001 100755 --- a/mintpy/ifgram_inversion.py +++ b/mintpy/ifgram_inversion.py @@ -1002,8 +1002,7 @@ def ifgram_inversion(inps=None): meta['UNIT'] = 'm' meta['REF_DATE'] = date_list[0] - ts_obj = timeseries(inps.tsFile) - ts_obj.layout_hdf5(dsNameDict, meta) + writefile.layout_hdf5(inps.tsFile, dsNameDict, meta) # write date time-series date_list_utf8 = [dt.encode('utf-8') for dt in date_list] diff --git a/mintpy/objects/stack.py b/mintpy/objects/stack.py index b9e1fb531..535ef30e7 100644 --- a/mintpy/objects/stack.py +++ b/mintpy/objects/stack.py @@ -398,15 +398,26 @@ def timeseries_rms(self, maskFile=None, outFile=None): """Calculate the Root Mean Square for each acquisition of time-series and output result to a text file. """ - # Calculate RMS - data = self.read() + # Get date list + date_list = self.get_date_list() + num_date = len(date_list) + + # Get mask if maskFile and os.path.isfile(maskFile): print('read mask from file: '+maskFile) mask = singleDataset(maskFile).read() - data[:, mask == 0] = np.nan - self.rms = np.zeros(data.shape[0]) * np.nan - for i in range(data.shape[0]): - self.rms[i] = np.sqrt(np.nanmean(np.square(data[i,:]), axis=(0, 1))) + + # Calculate RMS one date at a time + self.rms = np.zeros(num_date) * np.nan + print('reading {} data from file: {} ...'.format(self.name, self.file)) + prog_bar = ptime.progressBar(maxValue=num_date) + for i in range(num_date): + data = self.read(datasetName='{}'.format(date_list[i]), print_msg=False) + if maskFile and os.path.isfile(maskFile): + data[mask == 0] = np.nan + self.rms[i] = np.sqrt(np.nanmean(np.square(data), axis=(0, 1))) + prog_bar.update(i+1, suffix='{}/{}'.format(i+1, num_date)) + prog_bar.close() # Write text file header = 'Root Mean Square in space for each acquisition of time-series\n' diff --git a/mintpy/utils/utils1.py b/mintpy/utils/utils1.py index cacb68e01..08add5260 100644 --- a/mintpy/utils/utils1.py +++ b/mintpy/utils/utils1.py @@ -716,12 +716,42 @@ def run_deramp(fname, ramp_type, mask_file=None, out_file=None, datasetName=None # deramping if k == 'timeseries': - print('reading data ...') - data = readfile.read(fname)[0] + # initialize dataset structure + ts_obj = timeseries(fname) + ts_obj.open() + date_list = ts_obj.dateList + num_date = len(date_list) + length = int(atr['LENGTH']) + width = int(atr['WIDTH']) + date_dtype = np.dtype('S{}'.format(len(date_list[0]))) + dsNameDict = { + "date" : (date_dtype, (num_date,)), + "bperp" : (np.float32, (num_date,)), + "timeseries" : (np.float32, (num_date, length, width)), + } + + # write HDF5 file with defined metadata and (empty) dataset structure + writefile.layout_hdf5(out_file, dsNameDict, atr) + + # write date time-series + date_list_utf8 = [dt.encode('utf-8') for dt in date_list] + writefile.write_hdf5_block(out_file, date_list_utf8, datasetName='date') + + # write bperp time-series + bperp = ts_obj.pbase + writefile.write_hdf5_block(out_file, bperp, datasetName='bperp') + + print('reading data one date at a time ...') print('estimating phase ramp ...') - for i in range(data.shape[0]): - data[i,:] = deramp(data[i,:], mask, ramp_type=ramp_type, metadata=atr)[0] - writefile.write(data, out_file, ref_file=fname) + prog_bar = ptime.progressBar(maxValue=num_date) + for i in range(num_date): + dSet = 'timeseries-{}'.format(date_list[i]) + data = readfile.read(fname, datasetName=dSet)[0] + data = deramp(data, mask, ramp_type=ramp_type, metadata=atr)[0] + block = [i, i+1, 0, length, 0, width] + writefile.write_hdf5_block(out_file, data, datasetName='timeseries', block=block, print_msg=False) + prog_bar.update(i+1, suffix='{}/{}'.format(i+1, num_date)) + prog_bar.close() elif k == 'ifgramStack': obj = ifgramStack(fname) diff --git a/mintpy/utils/writefile.py b/mintpy/utils/writefile.py index 45e7337d9..7a82bba96 100644 --- a/mintpy/utils/writefile.py +++ b/mintpy/utils/writefile.py @@ -266,7 +266,7 @@ def layout_hdf5(fname, dsNameDict, metadata): return fname -def write_hdf5_block(fname, data, datasetName, block=None, mode='a'): +def write_hdf5_block(fname, data, datasetName, block=None, mode='a', print_msg=True): """Write data to existing HDF5 dataset in disk block by block. Parameters: data - np.ndarray 1/2/3D matrix datasetName - str, dataset name @@ -299,11 +299,11 @@ def write_hdf5_block(fname, data, datasetName, block=None, mode='a'): 0, shape[2]] # write - print('-'*50) - print('open HDF5 file {} in {} mode'.format(fname, mode)) - with h5py.File(fname, mode) as f: + if print_msg is True: + print('-'*50) + print('open HDF5 file {} in {} mode'.format(fname, mode)) print("writing dataset /{:<25} block: {}".format(datasetName, block)) - + with h5py.File(fname, mode) as f: if len(block) == 6: f[datasetName][block[0]:block[1], block[2]:block[3], @@ -315,8 +315,9 @@ def write_hdf5_block(fname, data, datasetName, block=None, mode='a'): elif len(block) == 2: f[datasetName][block[0]:block[1]] = data - - print('close HDF5 file {}.'.format(fname)) + + if print_msg is True: + print('close HDF5 file {}.'.format(fname)) return fname From f1a7a244e80643f677a705fb9cbd37874fc59e17 Mon Sep 17 00:00:00 2001 From: Zhang Yunjun Date: Thu, 10 Sep 2020 13:07:27 -0700 Subject: [PATCH 4/6] Update utils1.py --- mintpy/utils/utils1.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/mintpy/utils/utils1.py b/mintpy/utils/utils1.py index 08add5260..fa159fd29 100644 --- a/mintpy/utils/utils1.py +++ b/mintpy/utils/utils1.py @@ -741,17 +741,22 @@ def run_deramp(fname, ramp_type, mask_file=None, out_file=None, datasetName=None bperp = ts_obj.pbase writefile.write_hdf5_block(out_file, bperp, datasetName='bperp') - print('reading data one date at a time ...') - print('estimating phase ramp ...') + print('estimating phase ramp one date at a time ...') prog_bar = ptime.progressBar(maxValue=num_date) for i in range(num_date): - dSet = 'timeseries-{}'.format(date_list[i]) - data = readfile.read(fname, datasetName=dSet)[0] + # read + dsName = 'timeseries-{}'.format(date_list[i]) + data = readfile.read(fname, datasetName=dsName)[0] + # deramp data = deramp(data, mask, ramp_type=ramp_type, metadata=atr)[0] - block = [i, i+1, 0, length, 0, width] - writefile.write_hdf5_block(out_file, data, datasetName='timeseries', block=block, print_msg=False) + # write + writefile.write_hdf5_block(out_file, data, + datasetName='timeseries', + block=[i, i+1, 0, length, 0, width], + print_msg=False) prog_bar.update(i+1, suffix='{}/{}'.format(i+1, num_date)) prog_bar.close() + print('finished writing to file: {}'.format(fname)) elif k == 'ifgramStack': obj = ifgramStack(fname) @@ -765,15 +770,17 @@ def run_deramp(fname, ramp_type, mask_file=None, out_file=None, datasetName=None dsOut = f[dsNameOut] print('access HDF5 dataset /{}'.format(dsNameOut)) else: - dsOut = f.create_dataset(dsNameOut, shape=(obj.numIfgram, obj.length, obj.width), - dtype=np.float32, chunks=True, compression=None) + dsOut = f.create_dataset(dsNameOut, + shape=(obj.numIfgram, obj.length, obj.width), + dtype=np.float32, + chunks=True, + compression=None) print('create HDF5 dataset /{}'.format(dsNameOut)) prog_bar = ptime.progressBar(maxValue=obj.numIfgram) for i in range(obj.numIfgram): data = ds[i, :, :] - for j in range(data.shape[0]): - data[j,:] = deramp(data[j,:], mask, ramp_type=ramp_type, metadata=atr)[0] + data = deramp(data, mask, ramp_type=ramp_type, metadata=atr)[0] dsOut[i, :, :] = data prog_bar.update(i+1, suffix='{}/{}'.format(i+1, obj.numIfgram)) prog_bar.close() @@ -782,8 +789,7 @@ def run_deramp(fname, ramp_type, mask_file=None, out_file=None, datasetName=None # Single Dataset File else: data = readfile.read(fname)[0] - for i in range(data.shape[0]): - data[i,:] = deramp(data[i,:], mask, ramp_type=ramp_type, metadata=atr)[0] + data = deramp(data, mask, ramp_type, metadata=atr)[0] print('writing >>> {}'.format(out_file)) writefile.write(data, out_file=out_file, ref_file=fname) From 70e810a1eb82bc679a2d56991a9419630f436cb1 Mon Sep 17 00:00:00 2001 From: Zhang Yunjun Date: Thu, 10 Sep 2020 13:17:22 -0700 Subject: [PATCH 5/6] Update utils1.py --- mintpy/utils/utils1.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/mintpy/utils/utils1.py b/mintpy/utils/utils1.py index fa159fd29..862c3cd72 100644 --- a/mintpy/utils/utils1.py +++ b/mintpy/utils/utils1.py @@ -717,29 +717,27 @@ def run_deramp(fname, ramp_type, mask_file=None, out_file=None, datasetName=None # deramping if k == 'timeseries': # initialize dataset structure - ts_obj = timeseries(fname) - ts_obj.open() + ts_obj = timeseries(fname) + ts_obj.open(print_msg=False) date_list = ts_obj.dateList num_date = len(date_list) - length = int(atr['LENGTH']) - width = int(atr['WIDTH']) date_dtype = np.dtype('S{}'.format(len(date_list[0]))) dsNameDict = { "date" : (date_dtype, (num_date,)), "bperp" : (np.float32, (num_date,)), - "timeseries" : (np.float32, (num_date, length, width)), + "timeseries" : (np.float32, (num_date, ts_obj.length, ts_obj.width)), } - + # write HDF5 file with defined metadata and (empty) dataset structure - writefile.layout_hdf5(out_file, dsNameDict, atr) + writefile.layout_hdf5(out_file, dsNameDict, atr, print_msg=False) # write date time-series date_list_utf8 = [dt.encode('utf-8') for dt in date_list] - writefile.write_hdf5_block(out_file, date_list_utf8, datasetName='date') + writefile.write_hdf5_block(out_file, date_list_utf8, datasetName='date', print_msg=False) # write bperp time-series bperp = ts_obj.pbase - writefile.write_hdf5_block(out_file, bperp, datasetName='bperp') + writefile.write_hdf5_block(out_file, bperp, datasetName='bperp', print_msg=False) print('estimating phase ramp one date at a time ...') prog_bar = ptime.progressBar(maxValue=num_date) @@ -752,11 +750,11 @@ def run_deramp(fname, ramp_type, mask_file=None, out_file=None, datasetName=None # write writefile.write_hdf5_block(out_file, data, datasetName='timeseries', - block=[i, i+1, 0, length, 0, width], + block=[i, i+1, 0, ts_obj.length, 0, ts_obj.width], print_msg=False) prog_bar.update(i+1, suffix='{}/{}'.format(i+1, num_date)) prog_bar.close() - print('finished writing to file: {}'.format(fname)) + print('finished writing to file: {}'.format(out_file)) elif k == 'ifgramStack': obj = ifgramStack(fname) From ca6651ce4a99ec66fad9e40fc44717412c381545 Mon Sep 17 00:00:00 2001 From: Zhang Yunjun Date: Thu, 10 Sep 2020 13:19:05 -0700 Subject: [PATCH 6/6] Update writefile.py --- mintpy/utils/writefile.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/mintpy/utils/writefile.py b/mintpy/utils/writefile.py index 7a82bba96..1b36132fa 100644 --- a/mintpy/utils/writefile.py +++ b/mintpy/utils/writefile.py @@ -191,7 +191,7 @@ def write(datasetDict, out_file, metadata=None, ref_file=None, compression=None) ######################################################################### -def layout_hdf5(fname, dsNameDict, metadata): +def layout_hdf5(fname, dsNameDict, metadata, print_msg=True): """Create HDF5 file with defined metadata and (empty) dataset structure Parameters: fname - str, HDF5 file path @@ -226,9 +226,10 @@ def layout_hdf5(fname, dsNameDict, metadata): } """ - print('-'*50) - print('create HDF5 file {} with w mode'.format(fname)) h5 = h5py.File(fname, "w") + if print_msg: + print('-'*50) + print('create HDF5 file {} with w mode'.format(fname)) # initiate dataset for key in dsNameDict.keys(): @@ -247,9 +248,10 @@ def layout_hdf5(fname, dsNameDict, metadata): else: maxShape = data_shape - print("create dataset: {d:<25} of {t:<25} in size of {s}".format(d=key, - t=str(data_type), - s=data_shape)) + if print_msg: + print("create dataset: {d:<25} of {t:<25} in size of {s}".format(d=key, + t=str(data_type), + s=data_shape)) h5.create_dataset(key, shape=data_shape, maxshape=maxShape, @@ -262,7 +264,8 @@ def layout_hdf5(fname, dsNameDict, metadata): h5.attrs[key] = metadata[key] h5.close() - print('close HDF5 file {}'.format(fname)) + if print_msg: + print('close HDF5 file {}'.format(fname)) return fname @@ -299,7 +302,7 @@ def write_hdf5_block(fname, data, datasetName, block=None, mode='a', print_msg=T 0, shape[2]] # write - if print_msg is True: + if print_msg: print('-'*50) print('open HDF5 file {} in {} mode'.format(fname, mode)) print("writing dataset /{:<25} block: {}".format(datasetName, block)) @@ -315,8 +318,8 @@ def write_hdf5_block(fname, data, datasetName, block=None, mode='a', print_msg=T elif len(block) == 2: f[datasetName][block[0]:block[1]] = data - - if print_msg is True: + + if print_msg: print('close HDF5 file {}.'.format(fname)) return fname