diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md index 65cde7cc6ed..99e35d659b3 100644 --- a/RELEASE-NOTES.md +++ b/RELEASE-NOTES.md @@ -1,10 +1,11 @@ # Release Notes -## PyMC3 vNext (on deck) +## PyMC3 vNext (3.11.1) ### Breaking Changes ### New Features ++ Automatic imputations now also work with `ndarray` data, not just `pd.Series` or `pd.DataFrame` (see[#4439](https://github.com/pymc-devs/pymc3/pull/4439)). ### Maintenance - `math.log1mexp_numpy` no longer raises RuntimeWarning when given very small inputs. These were commonly observed during NUTS sampling (see [#4428](https://github.com/pymc-devs/pymc3/pull/4428)). diff --git a/pymc3/model.py b/pymc3/model.py index 393c4d2f6a2..ceccb0ab403 100644 --- a/pymc3/model.py +++ b/pymc3/model.py @@ -21,6 +21,7 @@ from typing import TYPE_CHECKING, Any, List, Optional, Type, TypeVar, Union, cast import numpy as np +import pandas as pd import scipy.sparse as sps import theano import theano.graph.basic @@ -1695,16 +1696,31 @@ def pandas_to_array(data): XXX: When `data` is a generator, this will return a Theano tensor! """ - if hasattr(data, "values"): # pandas - if data.isnull().any().any(): # missing values - ret = np.ma.MaskedArray(data.values, data.isnull().values) + if hasattr(data, "to_numpy"): + # typically, but not limited to pandas objects + vals = data.to_numpy() + mask = np.isnan(vals) + if mask.any(): + # there are missing values + ret = np.ma.MaskedArray(vals, mask) else: - ret = data.values - elif hasattr(data, "mask"): - if data.mask.any(): - ret = data - else: # empty mask - ret = data.filled() + ret = vals + elif isinstance(data, np.ndarray): + if isinstance(data, np.ma.MaskedArray): + if not data.mask.any(): + # empty mask + ret = data.filled() + else: + # already masked and rightly so + ret = data + else: + # already a ndarray, but not masked + mask = np.isnan(data) + if np.any(mask): + ret = np.ma.MaskedArray(data, mask) + else: + # no masking required + ret = data elif isinstance(data, theano.graph.basic.Variable): ret = data elif sps.issparse(data):