From 0be27f98e9ee3d58d9ff5357af1c9725af96925f Mon Sep 17 00:00:00 2001 From: Aitor Morales-Gregorio <43403140+morales-gregorio@users.noreply.github.com> Date: Tue, 12 Jan 2021 16:50:00 +0100 Subject: [PATCH] Improve memory efficiency of _create_sparse_matrix in BinnedSpikeTrain class (#395) Co-authored-by: kleinjohann --- elephant/conversion.py | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/elephant/conversion.py b/elephant/conversion.py index b35602acf..1f6ffa8b8 100644 --- a/elephant/conversion.py +++ b/elephant/conversion.py @@ -1065,11 +1065,23 @@ def _create_sparse_matrix(self, spiketrains): Spike trains to bin. """ + + # The data type for numeric values + data_dtype = np.int32 + if not _check_neo_spiketrain(spiketrains): # a binned numpy array - sparse_matrix = sps.csr_matrix(spiketrains, dtype=np.int32) + sparse_matrix = sps.csr_matrix(spiketrains, dtype=data_dtype) return sparse_matrix + # Get index dtype that can accomodate the largest index + # (this is the same dtype that will be used for the index arrays of the + # sparse matrix, so already using it here avoids array duplication) + shape = (len(spiketrains), self.n_bins) + numtype = np.int32 + if max(shape) > np.iinfo(numtype).max: + numtype = np.int64 + row_ids, column_ids = [], [] # data counts = [] @@ -1089,21 +1101,29 @@ def _create_sparse_matrix(self, spiketrains): valid_bins = bins[bins < self.n_bins] n_discarded += len(bins) - len(valid_bins) f, c = np.unique(valid_bins, return_counts=True) + # f inherits the dtype np.int32 from bins, but c is created in + # np.unique with the default int dtype (usually np.int64) + c = c.astype(data_dtype) column_ids.append(f) counts.append(c) - row_ids.append(np.repeat(idx, repeats=len(f))) + row_ids.append(np.repeat(idx, repeats=len(f)).astype(numtype)) if n_discarded > 0: warnings.warn("Binning discarded {} last spike(s) of the " "input spiketrain".format(n_discarded)) + # Stacking preserves the data type. In any case, while creating + # the sparse matrix, a copy is performed even if we set 'copy' to False + # explicitly (however, this might change in future scipy versions - + # this depends on scipy csr matrix initialization implementation). counts = np.hstack(counts) - row_ids = np.hstack(row_ids) column_ids = np.hstack(column_ids) + row_ids = np.hstack(row_ids) sparse_matrix = sps.csr_matrix((counts, (row_ids, column_ids)), - shape=(len(spiketrains), self.n_bins), - dtype=np.int32, copy=False) + shape=shape, dtype=data_dtype, + copy=False) + return sparse_matrix