SMC Sampling fails with multivariate distribution #312

mathDR · 2019-11-29T01:53:50Z

Summary:

When a sequence of thresholds having length > 1 is passed to smc, the inference fails with ValueError: In executing node '_theta_logpdf': Vector 'x' must have either the same number of entries as, or one entry fewer than, parameter vector 'a', but alpha.shape = (10,) and x.shape = (100000,)..

Description:

Using a simulator of a multinomial model, having a dirichlet prior, attempting to run smc for a sequence of thresholds produces an error.

Reproducible Steps:

import numpy as np
import scipy.stats as ss
import elfi
num_categories = 10
num_experiments = 100
theta = elfi.Prior('dirichlet', np.ones(num_categories), name='theta')
def simulator(num_experiments, theta, batch_size=1, random_state=None):
    if len(theta.shape)==1:
        theta = theta.reshape(1,-1)
    return_value = []

    for i in range(theta.shape[0]):
        return_value.append(
            ss.multinomial.rvs(
                100, 
                theta[i,:], 
                random_state=random_state
            )
        )
    return np.array(return_value)
                       
def mean(y):
    return np.mean(y, axis=1)

def ret_identity(y):
    return y

def distance_in_variation(p1,observed=[]):
    arr_p1 = np.asarray(p1)
    y = np.tile(observed[0],(np.asarray(p1).shape[0],1))
    
    return 0.5*np.sum(np.abs(arr_p1-y),axis=1)

theta0 = np.random.dirichlet(np.ones(num_categories))
np.random.seed(20191126) 
y0 = simulator(num_experiments,theta0)

from functools import partial
# Add the simulator node and observed data to the model
sim_fn = partial(simulator,num_experiments)
sim = elfi.Simulator(sim_fn, theta, observed=y0)
d = elfi.Discrepancy(
    distance_in_variation, 
    elfi.Summary(ret_identity, sim, name='ret_identity'), 
    name='d')

smc = elfi.SMC(d, batch_size=10000)

N = 1000
schedule = [100,99]
%time result_smc = smc.sample(N, schedule)

Current Output:

When the threshold schedule is [100], everything runs. When it is changed to [100,99] it fails with the error:

Progress: |█████████████████████████-------------------------| 50.0% Complete
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/executor.py in execute(cls, G)
     69                 try:
---> 70                     G.node[node] = cls._run(op, node, G)
     71                 except Exception as exc:

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/executor.py in _run(fn, node, G)
    153 
--> 154         output_dict = {'output': fn(*args, **kwargs)}
    155         return output_dict

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/scipy/stats/_multivariate.py in logpdf(self, x, alpha)
   1448         alpha = _dirichlet_check_parameters(alpha)
-> 1449         x = _dirichlet_check_input(alpha, x)
   1450 

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/scipy/stats/_multivariate.py in _dirichlet_check_input(alpha, x)
   1242                          "parameter vector 'a', but alpha.shape = %s "
-> 1243                          "and x.shape = %s." % (alpha.shape, x.shape))
   1244 

ValueError: Vector 'x' must have either the same number of entries as, or one entry fewer than, parameter vector 'a', but alpha.shape = (10,) and x.shape = (100000,).

During handling of the above exception, another exception occurred:

ValueError                                Traceback (most recent call last)
<timed exec> in <module>

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/methods/parameter_inference.py in sample(self, n_samples, *args, **kwargs)
    404         bar = kwargs.pop('bar', True)
    405 
--> 406         return self.infer(n_samples, *args, bar=bar, **kwargs)
    407 
    408     def _extract_result_kwargs(self):

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/methods/parameter_inference.py in infer(self, vis, bar, *args, **kwargs)
    262 
    263         while not self.finished:
--> 264             self.iterate()
    265             if vis:
    266                 self.plot_state(interactive=True, **vis_opt)

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/methods/parameter_inference.py in iterate(self)
    299         # Submit new batches if allowed
    300         while self._allow_submit(self.batches.next_index):
--> 301             next_batch = self.prepare_new_batch(self.batches.next_index)
    302             logger.debug("Submitting batch %d" % self.batches.next_index)
    303             self.batches.submit(next_batch)

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/methods/parameter_inference.py in prepare_new_batch(self, batch_index)
    716         params = GMDistribution.rvs(*self._gm_params, size=self.batch_size,
    717                                     prior_logpdf=self._prior.logpdf,
--> 718                                     random_state=self._round_random_state)
    719 
    720         batch = arr2d_to_batch(params, self.parameter_names)

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/methods/utils.py in rvs(cls, means, cov, weights, size, prior_logpdf, random_state)
    229             # check validity of x
    230             if prior_logpdf is not None:
--> 231                 x = x[np.isfinite(prior_logpdf(x))]
    232 
    233             n_accepted1 = len(x)

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/methods/utils.py in logpdf(self, x)
    356     def logpdf(self, x):
    357         """Return the log density of the joint prior at x."""
--> 358         return self._evaluate_pdf(x, log=True)
    359 
    360     def _evaluate_pdf(self, x, log=False):

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/methods/utils.py in _evaluate_pdf(self, x, log)
    380             loaded_net.node[k] = {'output': v}
    381 
--> 382         val = self.client.compute(loaded_net)[node]
    383         if ndim == 0 or (ndim == 1 and self.dim > 1):
    384             val = val[0]

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/client.py in compute(self, loaded_net)
    271     def compute(self, loaded_net):
    272         """Request evaluation of `loaded_net` and wait for result."""
--> 273         return self.apply_sync(Executor.execute, loaded_net)
    274 
    275     @property

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/clients/native.py in apply_sync(self, kallable, *args, **kwargs)
     51 
     52         """
---> 53         return kallable(*args, **kwargs)
     54 
     55     def get_result(self, task_id):

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/executor.py in execute(cls, G)
     71                 except Exception as exc:
     72                     raise exc.__class__("In executing node '{}': {}."
---> 73                                         .format(node, exc)).with_traceback(exc.__traceback__)
     74 
     75             elif 'output' not in attr:

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/executor.py in execute(cls, G)
     68                 op = attr['operation']
     69                 try:
---> 70                     G.node[node] = cls._run(op, node, G)
     71                 except Exception as exc:
     72                     raise exc.__class__("In executing node '{}': {}."

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/elfi/executor.py in _run(fn, node, G)
    152         args = [a[1] for a in sorted(args, key=itemgetter(0))]
    153 
--> 154         output_dict = {'output': fn(*args, **kwargs)}
    155         return output_dict
    156 

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/scipy/stats/_multivariate.py in logpdf(self, x, alpha)
   1447         """
   1448         alpha = _dirichlet_check_parameters(alpha)
-> 1449         x = _dirichlet_check_input(alpha, x)
   1450 
   1451         out = self._logpdf(x, alpha)

~/.pyenv/versions/3.7.4/envs/work/lib/python3.7/site-packages/scipy/stats/_multivariate.py in _dirichlet_check_input(alpha, x)
   1241                          "of entries as, or one entry fewer than, "
   1242                          "parameter vector 'a', but alpha.shape = %s "
-> 1243                          "and x.shape = %s." % (alpha.shape, x.shape))
   1244 
   1245     if x.shape[0] != alpha.shape[0]:

ValueError: In executing node '_theta_logpdf': Vector 'x' must have either the same number of entries as, or one entry fewer than, parameter vector 'a', but alpha.shape = (10,) and x.shape = (100000,)..

Expected Output:

I expect the output to have samples similar to running with threshold [99].

ELFI Version:

0.7.4

Python Version:

3.7.4

Operating System:

MAC OSX

The text was updated successfully, but these errors were encountered:

hpesonen · 2019-12-03T21:12:36Z

Hi,
unfortunately SMC-ABC in ELFI doesn't support multidimensional priors. However, in the case of Dirichlet-prior, you could use the following change of variables based on independent gamma-priors.

import numpy as np
import scipy.stats as ss
import elfi
import collections
from functools import partial

def simulator(*alpha, num_experiments, batch_size=1, random_state=None):

    n_dim=len(alpha)
    batches_alpha = np.zeros(shape=(batch_size, n_dim))

    for idx_dim, param_alpha in enumerate(alpha):
        batches_alpha[:, idx_dim] = param_alpha

    return_value = []
    theta = batches_alpha / np.sum(batches_alpha,axis=1, keepdims=True)
    return_value = np.zeros((theta.shape))
    for i in range(batch_size):
        return_value[i,:] = ss.multinomial.rvs(num_experiments, 
                                           theta[i,:], random_state=random_state)
        
    return np.array(return_value)
                       
def mean(y):
    return np.mean(y, axis=1)

def ret_identity(y):
    return y

def distance_in_variation(p1,observed=[]):
    arr_p1 = np.asarray(p1)
    y = np.tile(observed[0],(np.asarray(p1).shape[0],1))
    return 0.5*np.sum(np.abs(arr_p1-y),axis=1)

num_categories = 10
num_experiments = 100
theta0 = np.random.dirichlet(np.ones(num_categories))
y0 = simulator(*theta0, num_experiments = num_experiments)
m = elfi.new_model()

priors = []

for i in range(num_categories):
    name_prior = 'alpha_{}'.format(i)
    prior_alpha = elfi.Prior('gamma', 1, model=m, name=name_prior)
    priors.append(prior_alpha)

np.random.seed(20191126) 

# Add the simulator node and observed data to the model
sim_fn = partial(simulator,num_experiments=num_experiments)
sim = elfi.Simulator(sim_fn, *priors, observed=y0)
d = elfi.Discrepancy(
    distance_in_variation, 
    elfi.Summary(ret_identity, sim, model=m, name='ret_identity'), name='d')

smc = elfi.SMC(d, batch_size=1000)

N = 10000
schedule = [100,90,80,70,50,40]
result_smc = smc.sample(N, thresholds = schedule, bar=False)

alpha = result_smc.sample_means.values()
total_sum = sum(alpha) 
theta_values = list(alpha) / total_sum

for i in range(num_categories):
    print("theta_% 1d % 10.4f % 10.4f" %(i, theta0[i], theta_values[i]))

mathDR · 2019-12-04T01:23:57Z

Thanks for the tip. I will definitely put this into practice.

If this is functionality you think would help out in ELFI, I would be willing to take a shot at a PR extending the SMC functionality.

Please advise and thanks!

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

SMC Sampling fails with multivariate distribution #312

SMC Sampling fails with multivariate distribution #312

mathDR commented Nov 29, 2019

hpesonen commented Dec 3, 2019

mathDR commented Dec 4, 2019

SMC Sampling fails with multivariate distribution #312

SMC Sampling fails with multivariate distribution #312

Comments

mathDR commented Nov 29, 2019

Summary:

Description:

Reproducible Steps:

Current Output:

Expected Output:

ELFI Version:

Python Version:

Operating System:

hpesonen commented Dec 3, 2019

mathDR commented Dec 4, 2019