diff --git a/bridgescaler/distributed.py b/bridgescaler/distributed.py index f8dd727..8da4db7 100644 --- a/bridgescaler/distributed.py +++ b/bridgescaler/distributed.py @@ -390,10 +390,12 @@ def inv_transform_variable(td_obj, xv, td_centroids = td_obj.centroids() x_transformed = np.zeros_like(xv) if distribution == "normal": - x_transformed = ndtr(xv) + x_intermediate = ndtr(xv) elif distribution == "logistic": - x_transformed = logistic.cdf(xv) - tdigest_quantile(xv, td_centroids["mean"], td_centroids["weight"], + x_intermediate = logistic.cdf(xv) + else: + x_intermediate = xv + tdigest_quantile(x_intermediate, td_centroids["mean"], td_centroids["weight"], td_obj.min(), td_obj.max(), x_transformed) return x_transformed @@ -503,6 +505,8 @@ class DQuantileScaler(DBaseScaler): datasets in parallel. The library can perform fitting, transforms, and inverse transforms across variables in parallel using the multiprocessing library. Multidimensional arrays are stored in shared memory across processes to minimize inter-process communication. + + DQuantileScaler supports Attributes: compression: Recommended number of centroids to use. @@ -637,7 +641,7 @@ def inverse_transform(self, x, channels_last=None, pool=None): del outputs[:] else: for td_obj in td_i_objs: - x_transformed[..., td_obj[0]] = inv_trans_var_func(td_obj[1], xv[:, td_obj[0]]) + x_transformed[..., td_obj[0]] = inv_trans_var_func(td_obj[1], xv[..., td_obj[0]]) else: if pool is not None: split_indices = np.round(np.linspace(0, xv[..., 0].size, pool._processes)).astype(int) diff --git a/doc/source/_static/bridgescaler_logo.png b/doc/source/_static/bridgescaler_logo.png new file mode 100644 index 0000000..0e852b5 Binary files /dev/null and b/doc/source/_static/bridgescaler_logo.png differ diff --git a/doc/source/_static/logo.graffle b/doc/source/_static/logo.graffle new file mode 100644 index 0000000..6cc4d1a Binary files /dev/null and b/doc/source/_static/logo.graffle differ diff --git a/doc/source/conf.py b/doc/source/conf.py index cf077f1..fb97e88 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -7,9 +7,9 @@ # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information project = 'bridgescaler' -copyright = '2024, David John Gagne' +copyright = '2024, University Corporation for Atmopsheric Research' author = 'David John Gagne' -release = '0.7.0' +release = '0.8.0' # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration @@ -26,3 +26,4 @@ html_theme = 'sphinx_book_theme' html_static_path = ['_static'] +html_logo = "_static/bridgescaler_logo.png" diff --git a/doc/source/index.rst b/doc/source/index.rst index 1d97472..cdf22da 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -12,7 +12,6 @@ distributed scaling of data for pre-processing of AI and ML models. :maxdepth: 2 :caption: Contents: - index.rst gettingstarted.rst usage.rst distributed.rst