Skip to content

Commit

Permalink
add black formatter to pre-commit (#4163)
Browse files Browse the repository at this point in the history
* add black formatter to pre-commit

* fmt off benchmarks

Co-authored-by: Marco Gorelli <[email protected]>
  • Loading branch information
MarcoGorelli and Marco Gorelli authored Oct 8, 2020
1 parent a248c3d commit 2482a35
Show file tree
Hide file tree
Showing 7 changed files with 170 additions and 138 deletions.
6 changes: 5 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ repos:
- id: end-of-file-fixer
- id: check-toml
- repo: https://github.com/nbQA-dev/nbQA
rev: 0.2.1
rev: 0.2.3
hooks:
- id: nbqa
args: ['isort']
Expand All @@ -22,3 +22,7 @@ repos:
hooks:
- id: pyupgrade
args: ['--py36-plus']
- repo: https://github.com/psf/black
rev: 20.8b1
hooks:
- id: black
230 changes: 137 additions & 93 deletions benchmarks/benchmarks/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,23 @@
def glm_hierarchical_model(random_seed=123):
"""Sample glm hierarchical model to use in benchmarks"""
np.random.seed(random_seed)
data = pd.read_csv(pm.get_data('radon.csv'))
data['log_radon'] = data['log_radon'].astype(theano.config.floatX)
data = pd.read_csv(pm.get_data("radon.csv"))
data["log_radon"] = data["log_radon"].astype(theano.config.floatX)
county_idx = data.county_code.values

n_counties = len(data.county.unique())
with pm.Model() as model:
mu_a = pm.Normal('mu_a', mu=0., sd=100**2)
sigma_a = pm.HalfCauchy('sigma_a', 5)
mu_b = pm.Normal('mu_b', mu=0., sd=100**2)
sigma_b = pm.HalfCauchy('sigma_b', 5)
a = pm.Normal('a', mu=0, sd=1, shape=n_counties)
b = pm.Normal('b', mu=0, sd=1, shape=n_counties)
mu_a = pm.Normal("mu_a", mu=0.0, sd=100 ** 2)
sigma_a = pm.HalfCauchy("sigma_a", 5)
mu_b = pm.Normal("mu_b", mu=0.0, sd=100 ** 2)
sigma_b = pm.HalfCauchy("sigma_b", 5)
a = pm.Normal("a", mu=0, sd=1, shape=n_counties)
b = pm.Normal("b", mu=0, sd=1, shape=n_counties)
a = mu_a + sigma_a * a
b = mu_b + sigma_b * b
eps = pm.HalfCauchy('eps', 5)
eps = pm.HalfCauchy("eps", 5)
radon_est = a[county_idx] + b[county_idx] * data.floor.values
pm.Normal('radon_like', mu=radon_est, sd=eps, observed=data.log_radon)
pm.Normal("radon_like", mu=radon_est, sd=eps, observed=data.log_radon)
return model


Expand All @@ -50,24 +50,27 @@ def mixture_model(random_seed=1234):
np.random.seed(1234)
size = 1000
w_true = np.array([0.35, 0.4, 0.25])
mu_true = np.array([0., 2., 5.])
sigma = np.array([0.5, 0.5, 1.])
mu_true = np.array([0.0, 2.0, 5.0])
sigma = np.array([0.5, 0.5, 1.0])
component = np.random.choice(mu_true.size, size=size, p=w_true)
x = np.random.normal(mu_true[component], sigma[component], size=size)

with pm.Model() as model:
w = pm.Dirichlet('w', a=np.ones_like(w_true))
mu = pm.Normal('mu', mu=0., sd=10., shape=w_true.shape)
enforce_order = pm.Potential('enforce_order', tt.switch(mu[0] - mu[1] <= 0, 0., -np.inf) +
tt.switch(mu[1] - mu[2] <= 0, 0., -np.inf))
tau = pm.Gamma('tau', alpha=1., beta=1., shape=w_true.shape)
pm.NormalMixture('x_obs', w=w, mu=mu, tau=tau, observed=x)
w = pm.Dirichlet("w", a=np.ones_like(w_true))
mu = pm.Normal("mu", mu=0.0, sd=10.0, shape=w_true.shape)
enforce_order = pm.Potential(
"enforce_order",
tt.switch(mu[0] - mu[1] <= 0, 0.0, -np.inf)
+ tt.switch(mu[1] - mu[2] <= 0, 0.0, -np.inf),
)
tau = pm.Gamma("tau", alpha=1.0, beta=1.0, shape=w_true.shape)
pm.NormalMixture("x_obs", w=w, mu=mu, tau=tau, observed=x)

# Initialization can be poorly specified, this is a hack to make it work
start = {
'mu': mu_true.copy(),
'tau_log__': np.log(1. / sigma**2),
'w_stickbreaking__': np.array([-0.03, 0.44])
"mu": mu_true.copy(),
"tau_log__": np.log(1.0 / sigma ** 2),
"w_stickbreaking__": np.array([-0.03, 0.44]),
}
return model, start

Expand All @@ -77,26 +80,34 @@ class OverheadSuite:
Just tests how long sampling from a normal distribution takes for various
samplers
"""

params = [pm.NUTS, pm.HamiltonianMC, pm.Metropolis, pm.Slice]
timer = timeit.default_timer

def setup(self, step):
self.n_steps = 10000
with pm.Model() as self.model:
pm.Normal('x', mu=0, sd=1)
pm.Normal("x", mu=0, sd=1)

def time_overhead_sample(self, step):
with self.model:
pm.sample(self.n_steps, step=step(), random_seed=1,
progressbar=False, compute_convergence_checks=False)
pm.sample(
self.n_steps,
step=step(),
random_seed=1,
progressbar=False,
compute_convergence_checks=False,
)


class ExampleSuite:
"""Implements examples to keep up with benchmarking them."""

timeout = 360.0 # give it a few minutes
timer = timeit.default_timer

def time_drug_evaluation(self):
# fmt: off
drug = np.array([101, 100, 102, 104, 102, 97, 105, 105, 98, 101,
100, 123, 105, 103, 100, 95, 102, 106, 109, 102, 82,
102, 100, 102, 102, 101, 102, 102, 103, 103, 97, 97,
Expand All @@ -107,46 +118,52 @@ def time_drug_evaluation(self):
100, 101, 102, 103, 97, 101, 101, 100, 101, 99,
101, 100, 100, 101, 100, 99, 101, 100, 102, 99,
100, 99])

y = pd.DataFrame({
'value': np.r_[drug, placebo],
'group': np.r_[['drug']*len(drug), ['placebo']*len(placebo)]
})
# fmt: on

y = pd.DataFrame(
{
"value": np.r_[drug, placebo],
"group": np.r_[["drug"] * len(drug), ["placebo"] * len(placebo)],
}
)
y_mean = y.value.mean()
y_std = y.value.std() * 2

sigma_low = 1
sigma_high = 10
with pm.Model():
group1_mean = pm.Normal('group1_mean', y_mean, sd=y_std)
group2_mean = pm.Normal('group2_mean', y_mean, sd=y_std)
group1_std = pm.Uniform('group1_std', lower=sigma_low, upper=sigma_high)
group2_std = pm.Uniform('group2_std', lower=sigma_low, upper=sigma_high)
lambda_1 = group1_std**-2
lambda_2 = group2_std**-2

nu = pm.Exponential('ν_minus_one', 1/29.) + 1

pm.StudentT('drug', nu=nu, mu=group1_mean, lam=lambda_1, observed=drug)
pm.StudentT('placebo', nu=nu, mu=group2_mean, lam=lambda_2, observed=placebo)
diff_of_means = pm.Deterministic('difference of means', group1_mean - group2_mean)
pm.Deterministic('difference of stds', group1_std - group2_std)
group1_mean = pm.Normal("group1_mean", y_mean, sd=y_std)
group2_mean = pm.Normal("group2_mean", y_mean, sd=y_std)
group1_std = pm.Uniform("group1_std", lower=sigma_low, upper=sigma_high)
group2_std = pm.Uniform("group2_std", lower=sigma_low, upper=sigma_high)
lambda_1 = group1_std ** -2
lambda_2 = group2_std ** -2

nu = pm.Exponential("ν_minus_one", 1 / 29.0) + 1

pm.StudentT("drug", nu=nu, mu=group1_mean, lam=lambda_1, observed=drug)
pm.StudentT("placebo", nu=nu, mu=group2_mean, lam=lambda_2, observed=placebo)
diff_of_means = pm.Deterministic("difference of means", group1_mean - group2_mean)
pm.Deterministic("difference of stds", group1_std - group2_std)
pm.Deterministic(
'effect size', diff_of_means / np.sqrt((group1_std**2 + group2_std**2) / 2))
pm.sample(draws=20000, cores=4, chains=4,
progressbar=False, compute_convergence_checks=False)
"effect size", diff_of_means / np.sqrt((group1_std ** 2 + group2_std ** 2) / 2)
)
pm.sample(
draws=20000, cores=4, chains=4, progressbar=False, compute_convergence_checks=False
)

def time_glm_hierarchical(self):
with glm_hierarchical_model():
pm.sample(draws=20000, cores=4, chains=4,
progressbar=False, compute_convergence_checks=False)
pm.sample(
draws=20000, cores=4, chains=4, progressbar=False, compute_convergence_checks=False
)


class NUTSInitSuite:
"""Tests initializations for NUTS sampler on models
"""
"""Tests initializations for NUTS sampler on models"""

timeout = 360.0
params = ('adapt_diag', 'jitter+adapt_diag', 'jitter+adapt_full', 'adapt_full')
params = ("adapt_diag", "jitter+adapt_diag", "jitter+adapt_full", "adapt_full")
number = 1
repeat = 1
draws = 10000
Expand All @@ -159,32 +176,49 @@ def time_glm_hierarchical_init(self, init):

def track_glm_hierarchical_ess(self, init):
with glm_hierarchical_model():
start, step = pm.init_nuts(init=init, chains=self.chains, progressbar=False, random_seed=123)
start, step = pm.init_nuts(
init=init, chains=self.chains, progressbar=False, random_seed=123
)
t0 = time.time()
trace = pm.sample(draws=self.draws, step=step, cores=4, chains=self.chains,
start=start, random_seed=100, progressbar=False,
compute_convergence_checks=False)
trace = pm.sample(
draws=self.draws,
step=step,
cores=4,
chains=self.chains,
start=start,
random_seed=100,
progressbar=False,
compute_convergence_checks=False,
)
tot = time.time() - t0
ess = float(pm.ess(trace, var_names=['mu_a'])['mu_a'].values)
ess = float(pm.ess(trace, var_names=["mu_a"])["mu_a"].values)
return ess / tot

def track_marginal_mixture_model_ess(self, init):
model, start = mixture_model()
with model:
_, step = pm.init_nuts(init=init, chains=self.chains,
progressbar=False, random_seed=123)
_, step = pm.init_nuts(
init=init, chains=self.chains, progressbar=False, random_seed=123
)
start = [{k: v for k, v in start.items()} for _ in range(self.chains)]
t0 = time.time()
trace = pm.sample(draws=self.draws, step=step, cores=4, chains=self.chains,
start=start, random_seed=100, progressbar=False,
compute_convergence_checks=False)
trace = pm.sample(
draws=self.draws,
step=step,
cores=4,
chains=self.chains,
start=start,
random_seed=100,
progressbar=False,
compute_convergence_checks=False,
)
tot = time.time() - t0
ess = pm.ess(trace, var_names=['mu'])['mu'].values.min() # worst case
ess = pm.ess(trace, var_names=["mu"])["mu"].values.min() # worst case
return ess / tot


NUTSInitSuite.track_glm_hierarchical_ess.unit = 'Effective samples per second'
NUTSInitSuite.track_marginal_mixture_model_ess.unit = 'Effective samples per second'
NUTSInitSuite.track_glm_hierarchical_ess.unit = "Effective samples per second"
NUTSInitSuite.track_marginal_mixture_model_ess.unit = "Effective samples per second"


class CompareMetropolisNUTSSuite:
Expand All @@ -200,15 +234,21 @@ def track_glm_hierarchical_ess(self, step):
if step is not None:
step = step()
t0 = time.time()
trace = pm.sample(draws=self.draws, step=step, cores=4, chains=4,
random_seed=100, progressbar=False,
compute_convergence_checks=False)
trace = pm.sample(
draws=self.draws,
step=step,
cores=4,
chains=4,
random_seed=100,
progressbar=False,
compute_convergence_checks=False,
)
tot = time.time() - t0
ess = float(pm.ess(trace, var_names=['mu_a'])['mu_a'].values)
ess = float(pm.ess(trace, var_names=["mu_a"])["mu_a"].values)
return ess / tot


CompareMetropolisNUTSSuite.track_glm_hierarchical_ess.unit = 'Effective samples per second'
CompareMetropolisNUTSSuite.track_glm_hierarchical_ess.unit = "Effective samples per second"


class DifferentialEquationSuite:
Expand All @@ -223,30 +263,34 @@ def freefall(y, t, p):

# Times for observation
times = np.arange(0, 10, 0.5)
y = np.array([
-2.01,
9.49,
15.58,
16.57,
27.58,
32.26,
35.13,
38.07,
37.36,
38.83,
44.86,
43.58,
44.59,
42.75,
46.9,
49.32,
44.06,
49.86,
46.48,
48.18
]).reshape(-1, 1)

ode_model = pm.ode.DifferentialEquation(func=freefall, times=times, n_states=1, n_theta=2, t0=0)
y = np.array(
[
-2.01,
9.49,
15.58,
16.57,
27.58,
32.26,
35.13,
38.07,
37.36,
38.83,
44.86,
43.58,
44.59,
42.75,
46.9,
49.32,
44.06,
49.86,
46.48,
48.18,
]
).reshape(-1, 1)

ode_model = pm.ode.DifferentialEquation(
func=freefall, times=times, n_states=1, n_theta=2, t0=0
)
with pm.Model() as model:
# Specify prior distributions for some of our model parameters
sigma = pm.HalfCauchy("sigma", 1)
Expand All @@ -263,4 +307,4 @@ def freefall(y, t, p):
return np.mean([ess.sigma, ess.gamma]) / tot


DifferentialEquationSuite.track_1var_2par_ode_ess.unit = 'Effective samples per second'
DifferentialEquationSuite.track_1var_2par_ode_ess.unit = "Effective samples per second"
10 changes: 3 additions & 7 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@
("Books + Videos", "learn"),
("API", "api"),
("Developer Guide", "developer_guide"),
("About PyMC3", "about")
("About PyMC3", "about"),
],
# "fixed_sidebar": "false",
# "description": "Probabilistic Programming in Python: Bayesian Modeling and Probabilistic Machine Learning with Theano"
Expand Down Expand Up @@ -264,9 +264,7 @@
# Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title,
# author, documentclass [howto, manual, or own class]).
latex_documents = [
(master_doc, "pymc3.tex", "PyMC3 Documentation", "PyMC developers", "manual")
]
latex_documents = [(master_doc, "pymc3.tex", "PyMC3 Documentation", "PyMC developers", "manual")]

# The name of an image file (relative to this directory) to place at the top of
# the title page.
Expand Down Expand Up @@ -330,7 +328,5 @@


def setup(app):
app.add_css_file(
"https://cdn.jsdelivr.net/npm/[email protected]/dist/semantic.min.css"
)
app.add_css_file("https://cdn.jsdelivr.net/npm/[email protected]/dist/semantic.min.css")
app.add_css_file("default.css")
Loading

0 comments on commit 2482a35

Please sign in to comment.