From 273187b75d790b1d41796d82c36545894a44c8dd Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Sat, 25 Jan 2020 09:41:51 -0300
Subject: [PATCH 01/24] update from master

---
 pymc3/distributions/__init__.py |   4 +
 pymc3/distributions/bart.py     | 263 +++++++++++++++++
 pymc3/distributions/tree.py     | 508 ++++++++++++++++++++++++++++++++
 pymc3/model.py                  |  35 +--
 pymc3/sampling.py               |  22 +-
 pymc3/step_methods/__init__.py  |   2 +
 pymc3/step_methods/hmc/nuts.py  |   6 +-
 pymc3/step_methods/pgbart.py    | 214 ++++++++++++++
 8 files changed, 1009 insertions(+), 45 deletions(-)
 create mode 100644 pymc3/distributions/bart.py
 create mode 100644 pymc3/distributions/tree.py
 create mode 100644 pymc3/step_methods/pgbart.py

diff --git a/pymc3/distributions/__init__.py b/pymc3/distributions/__init__.py
index d396d61dd6d..8efb515f212 100644
--- a/pymc3/distributions/__init__.py
+++ b/pymc3/distributions/__init__.py
@@ -98,8 +98,11 @@
 from .timeseries import MvGaussianRandomWalk
 from .timeseries import MvStudentTRandomWalk
 
+from .bart import BART
+
 from .bound import Bound
 
+
 __all__ = [
     "Uniform",
     "Flat",
@@ -175,4 +178,5 @@
     "Moyal",
     "Simulator",
     "fast_sample_posterior_predictive",
+    "BART",
 ]
diff --git a/pymc3/distributions/bart.py b/pymc3/distributions/bart.py
new file mode 100644
index 00000000000..b48e3d5b165
--- /dev/null
+++ b/pymc3/distributions/bart.py
@@ -0,0 +1,263 @@
+import numpy as np
+from .distribution import NoDistribution
+from .tree import Tree, SplitNode, LeafNode
+from pymc3.util import get_variable_name
+
+# from ..step_methods.bart.exceptions import (
+#    BARTParamsError,
+# )
+
+# __all__ = ["BART"]
+
+
+class BARTParamsError(Exception):
+    """Base (catch-all) BART hyper parameters exception."""
+
+
+class BaseBART(NoDistribution):
+    def __init__(self, X, Y, m=200, alpha=0.95, beta=2.0, cache_size=5000, *args, **kwargs):
+
+        self.Y_shared = Y
+        self.X = X
+        self.Y = Y.eval()
+        super().__init__(
+            shape=X.shape[0], dtype="float64", testval=0, *args, **kwargs
+        )  # FIXME dtype and testvalue are nonsensical
+
+        if not isinstance(self.X, np.ndarray) or self.X.dtype.type is not np.float64:
+            raise BARTParamsError(
+                "The design matrix X type must be numpy.ndarray where every item"
+                " type is numpy.float64"
+            )
+        if self.X.ndim != 2:
+            raise BARTParamsError("The design matrix X must have two dimensions")
+        if not isinstance(self.Y, np.ndarray) or self.Y.dtype.type is not np.float64:
+            raise BARTParamsError(
+                "The response matrix Y type must be numpy.ndarray where every item"
+                " type is numpy.float64"
+            )
+        if self.Y.ndim != 1:
+            raise BARTParamsError("The response matrix Y must have one dimension")
+        if self.X.shape[0] != self.Y.shape[0]:
+            raise BARTParamsError(
+                "The design matrix X and the response matrix Y must have the same number of elements"
+            )
+        if not isinstance(m, int):
+            raise BARTParamsError("The number of trees m type must be int")
+        if m < 1:
+            raise BARTParamsError("The number of trees m must be greater than zero")
+        if not isinstance(alpha, float):
+            raise BARTParamsError(
+                "The type for the alpha parameter for the tree structure must be float"
+            )
+        if alpha <= 0 or 1 <= alpha:
+            raise BARTParamsError(
+                "The value for the alpha parameter for the tree structure "
+                "must be in the interval (0, 1)"
+            )
+        if not isinstance(beta, float):
+            raise BARTParamsError(
+                "The type for the beta parameter for the tree structure must be float"
+            )
+        if beta < 0:
+            raise BARTParamsError(
+                "The value for the beta parameter for the tree structure "
+                'must be in the interval [0, float("inf"))'
+            )
+
+        self.num_observations = X.shape[0]
+        self.number_variates = X.shape[1]
+        self.m = m
+        self.alpha = alpha
+        self.beta = beta
+        self._normal_dist_sampler = NormalDistributionSampler(cache_size)
+        self._disc_uniform_dist_sampler = DiscreteUniformDistributionSampler(cache_size)
+        self.trees = self.init_list_of_trees()
+
+    def init_list_of_trees(self):
+        initial_value_leaf_nodes = self.Y.mean() / self.m
+        initial_idx_data_points_leaf_nodes = np.array(range(self.num_observations), dtype="int32")
+        list_of_trees = []
+        for i in range(self.m):
+            new_tree = Tree.init_tree(
+                tree_id=i,
+                leaf_node_value=initial_value_leaf_nodes,
+                idx_data_points=initial_idx_data_points_leaf_nodes,
+            )
+            list_of_trees.append(new_tree)
+        # Diff trick to speed computation of residuals.
+        # Taken from Section 3.1 of Kapelner, A and Bleich, J. bartMachine: A Powerful Tool for Machine Learning in R. ArXiv e-prints, 2013
+        # The sum_trees_output will contain the sum of the predicted output for all trees.
+        # When R_j is needed we subtract the current predicted output for tree T_j.
+        self.sum_trees_output = np.ones_like(self.Y) * self.Y.mean()
+
+        return list_of_trees
+
+    def __iter__(self):
+        return iter(self.trees)
+
+    def __repr_latex(self):
+        raise NotImplementedError
+
+    def prediction_untransformed(self, x):
+        sum_of_trees = 0.0
+        for t in self.trees:
+            sum_of_trees += t.out_of_sample_predict(x=x)
+        return sum_of_trees
+
+    def sample_dist_splitting_variable(self, value):
+        return self._disc_uniform_dist_sampler.sample(0, value)
+
+    def sample_dist_splitting_rule_assignment(self, value):
+        return self._disc_uniform_dist_sampler.sample(0, value)
+
+    def get_available_predictors(self, idx_data_points_split_node):
+        possible_splitting_variables = []
+        for j in range(self.number_variates):
+            x_j = self.X[idx_data_points_split_node, j]
+            x_j = x_j[~np.isnan(x_j)]
+            for i in range(1, len(x_j)):
+                if x_j[i - 1] != x_j[i]:
+                    possible_splitting_variables.append(j)
+                    break
+        return possible_splitting_variables
+
+    def get_available_splitting_rules(self, idx_data_points_split_node, idx_split_variable):
+        x_j = self.X[idx_data_points_split_node, idx_split_variable]
+        x_j = x_j[~np.isnan(x_j)]
+        values, indices = np.unique(x_j, return_index=True)
+        # The last value is not consider since if we choose it as the value of
+        # the splitting rule assignment, it would leave the right subtree empty.
+        return values[:-1], indices[:-1]
+
+    def grow_tree(self, tree, index_leaf_node):
+        # This can be unsuccessful when there are not available predictors
+        successful_grow_tree = False
+        current_node = tree.get_node(index_leaf_node)
+
+        available_predictors = self.get_available_predictors(current_node.idx_data_points)
+
+        if not available_predictors:
+            return successful_grow_tree
+
+        index_selected_predictor = self.sample_dist_splitting_variable(len(available_predictors))
+        selected_predictor = available_predictors[index_selected_predictor]
+
+        available_splitting_rules, _ = self.get_available_splitting_rules(
+            current_node.idx_data_points, selected_predictor
+        )
+        index_selected_splitting_rule = self.sample_dist_splitting_rule_assignment(
+            len(available_splitting_rules)
+        )
+        selected_splitting_rule = available_splitting_rules[index_selected_splitting_rule]
+
+        new_split_node = SplitNode(
+            index=index_leaf_node,
+            idx_split_variable=selected_predictor,
+            split_value=selected_splitting_rule,
+        )
+
+        left_node_idx_data_points, right_node_idx_data_points = self.get_new_idx_data_points(
+            new_split_node, current_node.idx_data_points
+        )
+
+        left_node_value = self.draw_leaf_value(tree, left_node_idx_data_points)
+        right_node_value = self.draw_leaf_value(tree, right_node_idx_data_points)
+
+        new_left_node = LeafNode(
+            index=current_node.get_idx_left_child(),
+            value=left_node_value,
+            idx_data_points=left_node_idx_data_points,
+        )
+        new_right_node = LeafNode(
+            index=current_node.get_idx_right_child(),
+            value=right_node_value,
+            idx_data_points=right_node_idx_data_points,
+        )
+        tree.grow_tree(index_leaf_node, new_split_node, new_left_node, new_right_node)
+        successful_grow_tree = True
+
+        return successful_grow_tree
+
+    def get_new_idx_data_points(self, current_split_node, idx_data_points):
+        idx_split_variable = current_split_node.idx_split_variable
+        split_value = current_split_node.split_value
+
+        left_idx = np.nonzero(self.X[idx_data_points, idx_split_variable] <= split_value)
+        left_node_idx_data_points = idx_data_points[left_idx]
+        right_idx = np.nonzero(~(self.X[idx_data_points, idx_split_variable] <= split_value))
+        right_node_idx_data_points = idx_data_points[right_idx]
+
+        return left_node_idx_data_points, right_node_idx_data_points
+
+    def get_residuals(self, tree):
+        R_j = self.Y - (self.sum_trees_output - tree.predict_output(self.num_observations))
+        return R_j
+
+    def draw_leaf_value(self, tree, idx_data_points):
+        raise NotImplementedError
+
+
+class DiscreteUniformDistributionSampler:
+    """
+    Draw samples from a discrete uniform distribution.
+    Samples are uniformly distributed over the half-open interval [low, high) (includes low, but excludes high).
+    """
+
+    def __init__(self, cache_size=1000):
+        self._cache_size = cache_size
+        self._cache = []
+
+    def sample(self, lower_limit, upper_limit):
+        if len(self._cache) == 0:
+            self.refresh_cache()
+        return int(lower_limit + (upper_limit - lower_limit) * self._cache.pop())
+
+    def refresh_cache(self):
+        self._cache = list(np.random.random(size=self._cache_size))
+
+
+class NormalDistributionSampler:
+    def __init__(self, cache_size=1000):
+        self._cache_size = cache_size
+        self._cache = []
+
+    def sample(self):
+        if len(self._cache) == 0:
+            self.refresh_cache()
+        return self._cache.pop()
+
+    def refresh_cache(self):
+        self._cache = list(np.random.normal(size=self._cache_size))
+
+
+class BART(BaseBART):
+    def __init__(self, X, Y, m=200, alpha=0.95, beta=2.0):
+        super().__init__(X, Y, m, alpha, beta)
+
+    def _repr_latex_(self, name=None, dist=None):
+        if dist is None:
+            dist = self
+        X = (type(self.X),)
+        Y = (type(self.Y),)
+        m = (self.m,)
+        alpha = self.alpha
+        beta = self.beta
+        m = self.m
+        name = r"\text{%s}" % name
+        return r"""${} \sim \text{{BART}}(\mathit{{alpha}}={},~\mathit{{beta}}={},~\mathit{{m}}={})$""".format(
+            name, alpha, beta, m
+        )
+
+    def draw_leaf_value(self, tree, idx_data_points):
+        R_j = self.get_residuals(tree)
+        node_responses = R_j[idx_data_points]
+
+        data_mean = (
+            node_responses.mean() / self.m
+        )  # for skewed distribution use median or sample from data-points?
+        data_std_scaled = node_responses.std() / self.m
+
+        draw = data_mean  # (data_mean + self._normal_dist_sampler.sample()) * data_std_scaled
+
+        return draw
diff --git a/pymc3/distributions/tree.py b/pymc3/distributions/tree.py
new file mode 100644
index 00000000000..8cc6d188227
--- /dev/null
+++ b/pymc3/distributions/tree.py
@@ -0,0 +1,508 @@
+import numpy as np
+import math
+from copy import deepcopy
+
+
+class TreeStructureError(Exception):
+    """Base (catch-all) tree structure exception."""
+
+
+class TreeNodeError(Exception):
+    """Base (catch-all) tree node exception."""
+
+
+class Tree:
+    """ Full binary tree
+    A full binary tree is a tree where each node has exactly zero or two children.
+    This structure is used as the basic component of the Bayesian Additive Regression Tree (BART)
+    Attributes
+    ----------
+    tree_structure : dict
+        A dictionary that represents the nodes stored in breadth-first order, based in the array method
+        for storing binary trees (https://en.wikipedia.org/wiki/Binary_tree#Arrays).
+        The dictionary's keys are integers that represent the nodes position.
+        The dictionary's values are objects of type SplitNode or LeafNode that represent the nodes of the tree itself.
+    num_nodes : int
+        Total number of nodes.
+    idx_leaf_nodes : list
+        List with the index of the leaf nodes of the tree.
+    idx_prunable_split_nodes : list
+        List with the index of the prunable splitting nodes of the tree. A splitting node is prunable if both
+        its children are leaf nodes.
+    tree_id : int
+        Identifier used to get the previous tree in the ParticleGibbs algorithm used in BART.
+
+    Parameters
+    ----------
+    tree_id : int, optional
+    """
+
+    def __init__(self, tree_id=0):
+        self.tree_structure = {}
+        self.num_nodes = 0
+        self.idx_leaf_nodes = []
+        self.idx_prunable_split_nodes = []
+        self.tree_id = tree_id
+
+    def __getitem__(self, index):
+        return self.get_node(index)
+
+    def __setitem__(self, index, node):
+        self.set_node(index, node)
+
+    def __delitem__(self, index):
+        self.delete_node(index)
+
+    def __iter__(self):
+        return iter(self.tree_structure.values())
+
+    def __eq__(self, other):
+        # The idx_leaf_nodes and idx_prunable_split_nodes are transformed to sets to correctly check for equivalence
+        # in case the values are not ordered in the same way.
+        return (
+            self.tree_structure == other.tree_structure
+            and self.num_nodes == other.num_nodes
+            and set(self.idx_leaf_nodes) == set(other.idx_leaf_nodes)
+            and set(self.idx_prunable_split_nodes) == set(other.idx_prunable_split_nodes)
+            and self.tree_id == other.tree_id
+        )
+
+    def __hash__(self):
+        # Method added to create a set of trees.
+        return 0
+
+    def __len__(self):
+        return len(self.tree_structure)
+
+    def __repr__(self):
+        return "Tree(num_nodes={})".format(self.num_nodes)
+
+    def __str__(self):
+        lines = self._build_tree_string(index=0, show_index=False, delimiter="-")[0]
+        return "\n" + "\n".join((line.rstrip() for line in lines))
+
+    def _build_tree_string(self, index, show_index=False, delimiter="-"):
+        """Recursively walk down the binary tree and build a pretty-print string.
+
+        In each recursive call, a "box" of characters visually representing the
+        current (sub)tree is constructed line by line. Each line is padded with
+        whitespaces to ensure all lines in the box have the same length. Then the
+        box, its width, and start-end positions of its root node value repr string
+        (required for drawing branches) are sent up to the parent call. The parent
+        call then combines its left and right sub-boxes to build a larger box etc.
+        """
+        if index not in self.tree_structure:
+            return [], 0, 0, 0
+
+        line1 = []
+        line2 = []
+        current_node = self.get_node(index)
+        if show_index:
+            node_repr = "{}{}{}".format(index, delimiter, str(current_node))
+        else:
+            node_repr = str(current_node)
+
+        new_root_width = gap_size = len(node_repr)
+
+        left_child = current_node.get_idx_left_child()
+        right_child = current_node.get_idx_right_child()
+
+        # Get the left and right sub-boxes, their widths, and root repr positions
+        l_box, l_box_width, l_root_start, l_root_end = self._build_tree_string(
+            left_child, show_index, delimiter
+        )
+        r_box, r_box_width, r_root_start, r_root_end = self._build_tree_string(
+            right_child, show_index, delimiter
+        )
+
+        # Draw the branch connecting the current root node to the left sub-box
+        # Pad the line with whitespaces where necessary
+        if l_box_width > 0:
+            l_root = (l_root_start + l_root_end) // 2 + 1
+            line1.append(" " * (l_root + 1))
+            line1.append("_" * (l_box_width - l_root))
+            line2.append(" " * l_root + "/")
+            line2.append(" " * (l_box_width - l_root))
+            new_root_start = l_box_width + 1
+            gap_size += 1
+        else:
+            new_root_start = 0
+
+        # Draw the representation of the current root node
+        line1.append(node_repr)
+        line2.append(" " * new_root_width)
+
+        # Draw the branch connecting the current root node to the right sub-box
+        # Pad the line with whitespaces where necessary
+        if r_box_width > 0:
+            r_root = (r_root_start + r_root_end) // 2
+            line1.append("_" * r_root)
+            line1.append(" " * (r_box_width - r_root + 1))
+            line2.append(" " * r_root + "\\")
+            line2.append(" " * (r_box_width - r_root))
+            gap_size += 1
+        new_root_end = new_root_start + new_root_width - 1
+
+        # Combine the left and right sub-boxes with the branches drawn above
+        gap = " " * gap_size
+        new_box = ["".join(line1), "".join(line2)]
+        for i in range(max(len(l_box), len(r_box))):
+            l_line = l_box[i] if i < len(l_box) else " " * l_box_width
+            r_line = r_box[i] if i < len(r_box) else " " * r_box_width
+            new_box.append(l_line + gap + r_line)
+
+        # Return the new box, its width and its root repr positions
+        return new_box, len(new_box[0]), new_root_start, new_root_end
+
+    def copy(self):
+        return deepcopy(self)
+
+    def get_node(self, index):
+        if not isinstance(index, int) or index < 0:
+            raise TreeStructureError("Node index must be a non-negative int")
+        if index not in self.tree_structure:
+            raise TreeStructureError("Node missing at index {}".format(index))
+        return self.tree_structure[index]
+
+    def set_node(self, index, node):
+        if not isinstance(index, int) or index < 0:
+            raise TreeStructureError("Node index must be a non-negative int")
+        if not isinstance(node, SplitNode) and not isinstance(node, LeafNode):
+            raise TreeStructureError("Node class must be SplitNode or LeafNode")
+        if index in self.tree_structure:
+            raise TreeStructureError("Node index already exist in tree")
+        if self.num_nodes == 0 and index != 0:
+            raise TreeStructureError("Root node must have index zero")
+        parent_index = node.get_idx_parent_node()
+        if self.num_nodes != 0 and parent_index not in self.tree_structure:
+            raise TreeStructureError("Invalid index, node must have a parent node")
+        if self.num_nodes != 0 and not isinstance(self.get_node(parent_index), SplitNode):
+            raise TreeStructureError("Parent node must be of class SplitNode")
+        if index != node.index:
+            raise TreeStructureError("Node must have same index as tree index")
+        self.tree_structure[index] = node
+        self.num_nodes += 1
+        if isinstance(node, LeafNode):
+            self.idx_leaf_nodes.append(index)
+
+    def delete_node(self, index):
+        if not isinstance(index, int) or index < 0:
+            raise TreeStructureError("Node index must be a non-negative int")
+        if index not in self.tree_structure:
+            raise TreeStructureError("Node missing at index {}".format(index))
+        current_node = self.get_node(index)
+        left_child_idx = current_node.get_idx_left_child()
+        right_child_idx = current_node.get_idx_right_child()
+        if left_child_idx in self.tree_structure or right_child_idx in self.tree_structure:
+            raise TreeStructureError("Invalid removal of node, leaving two orphans nodes")
+        if isinstance(current_node, LeafNode):
+            self.idx_leaf_nodes.remove(index)
+        del self.tree_structure[index]
+        self.num_nodes -= 1
+
+    def make_digraph(self, name="Tree"):
+        """Make graphviz Digraph of the tree.
+
+        Parameters
+        ----------
+        name : str
+            Name used for the Digraph. Useful to differentiate digraph of trees.
+
+        Returns
+        -------
+        graphviz.Digraph
+        """
+        try:
+            import graphviz
+        except ImportError:
+            raise ImportError(
+                "This function requires the python library graphviz, along with binaries. "
+                "The easiest way to install all of this is by running\n\n"
+                "\tconda install -c conda-forge python-graphviz"
+            )
+        graph = graphviz.Digraph(name)
+        graph = self._digraph_tree_traversal(0, graph)
+        return graph
+
+    def _digraph_tree_traversal(self, index, graph):
+        if index not in self.tree_structure.keys():
+            return graph
+        current_node = self.get_node(index)
+        style = ""
+        if isinstance(current_node, SplitNode):
+            shape = "box"
+            if index in self.idx_prunable_split_nodes:
+                style = "filled"
+        else:
+            shape = "ellipse"
+        node_name = "{}_{}".format(graph.name, index)
+        graph.node(name=node_name, label=str(current_node), shape=shape, style=style)
+
+        parent_index = current_node.get_idx_parent_node()
+        if parent_index in self.tree_structure:
+            tail_name = "{}_{}".format(graph.name, parent_index)
+            head_name = "{}_{}".format(graph.name, index)
+            if current_node.is_left_child():
+                graph.edge(tail_name=tail_name, head_name=head_name, label="T")
+            else:
+                graph.edge(tail_name=tail_name, head_name=head_name, label="F")
+
+        left_child = current_node.get_idx_left_child()
+        right_child = current_node.get_idx_right_child()
+        graph = self._digraph_tree_traversal(left_child, graph)
+        graph = self._digraph_tree_traversal(right_child, graph)
+
+        return graph
+
+    def predict_output(self, num_observations):
+        output = np.zeros(num_observations)
+        for node_index in self.idx_leaf_nodes:
+            current_node = self.get_node(node_index)
+            output[current_node.idx_data_points] = current_node.value
+        return output
+
+    def out_of_sample_predict(self, x):
+        """
+        Predict output of tree for an unobserved point.
+
+        Parameters
+        ----------
+        x : np.ndarray
+
+        Returns
+        -------
+        float
+            Value of the leaf value where the unobserved point lies.
+        """
+        leaf_node = self._traverse_tree(x=x, node_index=0)
+        return leaf_node.value
+
+    def _traverse_tree(self, x, node_index=0):
+        """
+        Traverse the tree starting from a particular node given an unobserved point.
+
+        Parameters
+        ----------
+        x : np.ndarray
+        node_index : int
+
+        Returns
+        -------
+        LeafNode
+        """
+        current_node = self.get_node(node_index)
+        if isinstance(current_node, SplitNode):
+            if current_node.evaluate_splitting_rule(x):
+                left_child = current_node.get_idx_left_child()
+                final_node = self._traverse_tree(x, left_child)
+            else:
+                right_child = current_node.get_idx_right_child()
+                final_node = self._traverse_tree(x, right_child)
+        else:
+            final_node = current_node
+        return final_node
+
+    def grow_tree(self, index_leaf_node, new_split_node, new_left_node, new_right_node):
+        """
+        Grow the tree from a particular node.
+
+        Parameters
+        ----------
+        index_leaf_node : int
+        new_split_node : SplitNode
+        new_left_node : LeafNode
+        new_right_node : LeafNode
+        """
+        current_node = self.get_node(index_leaf_node)
+        if not isinstance(current_node, LeafNode):
+            raise TreeStructureError("The tree grows from the leaves")
+        if not isinstance(new_split_node, SplitNode):
+            raise TreeStructureError("The node that replaces the leaf node must be SplitNode")
+        if not isinstance(new_left_node, LeafNode) or not isinstance(new_right_node, LeafNode):
+            raise TreeStructureError("The new leaves must be LeafNode")
+
+        self.delete_node(index_leaf_node)
+        self.set_node(index_leaf_node, new_split_node)
+        self.set_node(new_left_node.index, new_left_node)
+        self.set_node(new_right_node.index, new_right_node)
+
+        # The new SplitNode is a prunable node since it has both children.
+        self.idx_prunable_split_nodes.append(index_leaf_node)
+        # If the parent of the node from which the tree is growing was a prunable node,
+        # remove from the list since one of its children is a SplitNode now
+        parent_index = current_node.get_idx_parent_node()
+        if parent_index in self.idx_prunable_split_nodes:
+            self.idx_prunable_split_nodes.remove(parent_index)
+
+    @staticmethod
+    def init_tree(tree_id, leaf_node_value, idx_data_points):
+        """
+
+        Parameters
+        ----------
+        tree_id
+        leaf_node_value
+        idx_data_points
+
+        Returns
+        -------
+
+        """
+        new_tree = Tree(tree_id)
+        new_tree[0] = LeafNode(index=0, value=leaf_node_value, idx_data_points=idx_data_points)
+        return new_tree
+
+    def get_tree_depth(self):
+        """
+
+        Returns
+        -------
+
+        """
+        max_depth = 0
+        for idx_leaf_node in self.idx_leaf_nodes:
+            leaf_node = self.get_node(idx_leaf_node)
+            if max_depth < leaf_node.depth:
+                max_depth = leaf_node.depth
+        return max_depth
+
+
+class BaseNode:
+    def __init__(self, index):
+        if not isinstance(index, int) or index < 0:
+            raise TreeNodeError("Node index must be a non-negative int")
+        self.index = index
+        self.depth = int(math.floor(math.log(index + 1, 2)))
+
+    def __eq__(self, other):
+        return self.index == other.index and self.depth == other.depth
+
+    def get_idx_parent_node(self):
+        return (self.index - 1) // 2
+
+    def get_idx_left_child(self):
+        return self.index * 2 + 1
+
+    def get_idx_right_child(self):
+        return self.get_idx_left_child() + 1
+
+    def is_left_child(self):
+        return bool(self.index % 2)
+
+    def get_idx_sibling(self):
+        return (self.index + 1) if self.is_left_child() else (self.index - 1)
+
+
+class SplitNode(BaseNode):
+    def __init__(self, index, idx_split_variable, split_value):
+        super().__init__(index)
+
+        if not isinstance(idx_split_variable, int) or idx_split_variable < 0:
+            raise TreeNodeError("Index of split variable must be a non-negative int")
+        if not isinstance(split_value, float):
+            raise TreeNodeError("Node split value type must be float")
+
+        self.idx_split_variable = idx_split_variable
+        self.split_value = split_value
+
+    def __repr__(self):
+        return "SplitNode(index={}, idx_split_variable={}, split_value={})".format(
+            self.index, self.idx_split_variable, self.split_value
+        )
+
+    def __str__(self):
+        return "x[{}] <= {}".format(self.idx_split_variable, self.split_value)
+
+    def __eq__(self, other):
+        if isinstance(other, SplitNode):
+            return (
+                super().__eq__(other)
+                and self.idx_split_variable == other.idx_split_variable
+                and self.split_value == other.split_value
+            )
+        else:
+            return NotImplemented
+
+    def evaluate_splitting_rule(self, x):
+        if x is np.NaN:
+            return False
+        else:
+            return x[self.idx_split_variable] <= self.split_value
+
+    def prior_log_probability_node(self, alpha, beta):
+        """
+        Calculate the log probability of the node being a SplitNode.
+        Taken from equation 7 in [Chipman2010].
+
+        Parameters
+        ----------
+        alpha : float
+        beta : float
+
+        Returns
+        -------
+        float
+
+        References
+        ----------
+        .. [Chipman2010] Chipman, H. A., George, E. I., & McCulloch, R. E. (2010). BART: Bayesian
+            additive regression trees. The Annals of Applied Statistics, 4(1), 266-298.,
+            `link <https://projecteuclid.org/download/pdfview_1/euclid.aoas/1273584455>`__
+        """
+        return np.log(alpha * np.power(1.0 + self.depth, -beta))
+
+
+class LeafNode(BaseNode):
+    def __init__(self, index, value, idx_data_points):
+        super().__init__(index)
+        if not isinstance(value, float):
+            raise TreeNodeError("Leaf node value type must be float")
+        if (
+            not isinstance(idx_data_points, np.ndarray)
+            or idx_data_points.dtype.type is not np.int32
+        ):
+            raise TreeNodeError("Index of data points must be a numpy.ndarray of np.int32")
+        if len(idx_data_points) == 0:
+            raise TreeNodeError("Index of data points can not be empty")
+        self.value = value
+        self.idx_data_points = idx_data_points
+
+    def __repr__(self):
+        return "LeafNode(index={}, value={}, len(idx_data_points)={})".format(
+            self.index, self.value, len(self.idx_data_points)
+        )
+
+    def __str__(self):
+        return "{}".format(self.value)
+
+    def __eq__(self, other):
+        if isinstance(other, LeafNode):
+            return (
+                super().__eq__(other)
+                and self.value == other.value
+                and np.array_equal(self.idx_data_points, other.idx_data_points)
+            )
+        else:
+            return NotImplemented
+
+    def prior_log_probability_node(self, alpha, beta):
+        """
+        Calculate the log probability of the node being a LeafNode.
+        Taken from equation 7 in [Chipman2010].
+
+        Parameters
+        ----------
+        alpha : float
+        beta : float
+
+        Returns
+        -------
+        float
+
+        References
+        ----------
+        .. [Chipman2010] Chipman, H. A., George, E. I., & McCulloch, R. E. (2010). BART: Bayesian
+            additive regression trees. The Annals of Applied Statistics, 4(1), 266-298.,
+            `link <https://projecteuclid.org/download/pdfview_1/euclid.aoas/1273584455>`__
+        """
+        return np.log(1.0 - alpha * np.power(1.0 + self.depth, -beta))
diff --git a/pymc3/model.py b/pymc3/model.py
index ca18a3278d0..0b9f2e810b5 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -1144,11 +1144,7 @@ def Var(self, name, dist, data=None, total_size=None, dims=None):
         elif isinstance(data, dict):
             with self:
                 var = MultiObservedRV(
-                    name=name,
-                    data=data,
-                    distribution=dist,
-                    total_size=total_size,
-                    model=self,
+                    name=name, data=data, distribution=dist, total_size=total_size, model=self,
                 )
             self.observed_RVs.append(var)
             if var.missing_values:
@@ -1159,11 +1155,7 @@ def Var(self, name, dist, data=None, total_size=None, dims=None):
         else:
             with self:
                 var = ObservedRV(
-                    name=name,
-                    data=data,
-                    distribution=dist,
-                    total_size=total_size,
-                    model=self,
+                    name=name, data=data, distribution=dist, total_size=total_size, model=self,
                 )
             self.observed_RVs.append(var)
             if var.missing_values:
@@ -1662,10 +1654,7 @@ def __init__(
             self.scaling = _get_scaling(total_size, self.shape, self.ndim)
 
             incorporate_methods(
-                source=distribution,
-                destination=self,
-                methods=["random"],
-                wrapper=InstanceMethod,
+                source=distribution, destination=self, methods=["random"], wrapper=InstanceMethod,
             )
 
     @property
@@ -1721,10 +1710,7 @@ def as_tensor(data, name, model, distribution):
 
         testval = np.broadcast_to(distribution.default(), data.shape)[data.mask]
         fakedist = NoDistribution.dist(
-            shape=data.mask.sum(),
-            dtype=dtype,
-            testval=testval,
-            parent_dist=distribution,
+            shape=data.mask.sum(), dtype=dtype, testval=testval, parent_dist=distribution,
         )
         missing_values = FreeRV(name=name + "_missing", distribution=fakedist, model=model)
         constant = tt.as_tensor_variable(data.filled())
@@ -1977,10 +1963,7 @@ def __init__(
             self.tag.test_value = normalRV.tag.test_value
             self.scaling = _get_scaling(total_size, self.shape, self.ndim)
             incorporate_methods(
-                source=distribution,
-                destination=self,
-                methods=["random"],
-                wrapper=InstanceMethod,
+                source=distribution, destination=self, methods=["random"], wrapper=InstanceMethod,
             )
 
     @property
@@ -1997,10 +1980,12 @@ def as_iterargs(data):
 
 
 def all_continuous(vars):
-    """Check that vars not include discrete variables, excepting
-    ObservedRVs."""
+    """Check that vars not include discrete variables or BART variables, excepting ObservedRVs."""
+
     vars_ = [var for var in vars if not isinstance(var, pm.model.ObservedRV)]
-    if any([var.dtype in pm.discrete_types for var in vars_]):
+    if any(
+        [(var.dtype in pm.discrete_types or isinstance(var.distribution, pm.BART)) for var in vars_]
+    ):
         return False
     else:
         return True
diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index f577a5aa8e5..65126c28911 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -51,6 +51,7 @@
     Slice,
     CompoundStep,
     arraystep,
+    PGBART,
 )
 from .util import (
     update_start_vals,
@@ -90,6 +91,7 @@
     BinaryGibbsMetropolis,
     Slice,
     CategoricalGibbsMetropolis,
+    PGBART,
 )
 
 ArrayLike = Union[np.ndarray, List[float]]
@@ -657,14 +659,7 @@ def _check_start_shape(model, start):
 
 
 def _sample_many(
-    draws,
-    chain: int,
-    chains: int,
-    start: list,
-    random_seed: list,
-    step,
-    callback=None,
-    **kwargs,
+    draws, chain: int, chains: int, start: list, random_seed: list, step, callback=None, **kwargs,
 ):
     """Samples all chains sequentially.
 
@@ -1001,8 +996,7 @@ def _iter_sample(
             if callback is not None:
                 warns = getattr(step, "warnings", None)
                 callback(
-                    trace=strace,
-                    draw=Draw(chain, i == draws, i, i < tune, stats, point, warns),
+                    trace=strace, draw=Draw(chain, i == draws, i, i < tune, stats, point, warns),
                 )
 
             yield strace, diverging
@@ -1955,13 +1949,7 @@ def sample_prior_predictive(
 
 
 def init_nuts(
-    init="auto",
-    chains=1,
-    n_init=500000,
-    model=None,
-    random_seed=None,
-    progressbar=True,
-    **kwargs,
+    init="auto", chains=1, n_init=500000, model=None, random_seed=None, progressbar=True, **kwargs,
 ):
     """Set up the mass matrix initialization for NUTS.
 
diff --git a/pymc3/step_methods/__init__.py b/pymc3/step_methods/__init__.py
index 5755e99eb51..f2cd57e60c9 100644
--- a/pymc3/step_methods/__init__.py
+++ b/pymc3/step_methods/__init__.py
@@ -33,3 +33,5 @@
 from .slicer import Slice
 
 from .elliptical_slice import EllipticalSlice
+
+from .pgbart import PGBART
diff --git a/pymc3/step_methods/hmc/nuts.py b/pymc3/step_methods/hmc/nuts.py
index 782a350654c..5d3bcc52870 100644
--- a/pymc3/step_methods/hmc/nuts.py
+++ b/pymc3/step_methods/hmc/nuts.py
@@ -23,9 +23,9 @@
 from pymc3.math import logbern, logdiffexp_numpy
 from pymc3.theanof import floatX
 from pymc3.vartypes import continuous_types
+from ...distributions import BART
 
-
-__all__ = ["NUTS"]
+__all__ = ['NUTS']
 
 
 class NUTS(BaseHMC):
@@ -196,7 +196,7 @@ def _hamiltonian_step(self, start, p0, step_size):
     @staticmethod
     def competence(var, has_grad):
         """Check how appropriate this class is for sampling a random variable."""
-        if var.dtype in continuous_types and has_grad:
+         if var.dtype in continuous_types and has_grad and not isinstance(var.distribution, BART):
             return Competence.IDEAL
         return Competence.INCOMPATIBLE
 
diff --git a/pymc3/step_methods/pgbart.py b/pymc3/step_methods/pgbart.py
new file mode 100644
index 00000000000..f72aedb5160
--- /dev/null
+++ b/pymc3/step_methods/pgbart.py
@@ -0,0 +1,214 @@
+import numpy as np
+
+from .arraystep import ArrayStepShared, Competence
+from ..distributions import BART
+from ..distributions.tree import Tree
+from ..model import modelcontext
+from ..theanof import inputvars, make_shared_replacements, floatX
+from ..smc.smc_utils import logp_forw
+
+
+class PGBART(ArrayStepShared):
+    """
+    """
+
+    name = "bartsampler"
+    default_blocked = False
+
+    def __init__(self, vars=None, m=2, num_particles=10, max_stages=5000, model=None):
+        model = modelcontext(model)
+        vars = inputvars(vars)
+        self.bart = vars[0].distribution
+
+        self.num_particles = num_particles
+        self.max_stages = max_stages
+        self.previous_trees_particles_list = []
+        for i in range(self.bart.m):
+            p = Particle(self.bart.trees[i])
+            self.previous_trees_particles_list.append(p)
+
+        shared = make_shared_replacements(vars, model)
+        self.likelihood_logp = logp_forw([model.datalogpt], vars, shared)
+        super().__init__(vars, shared)
+
+    def astep(self, q_0):
+        # Step 4 of algorithm
+        bart = self.bart
+        num_observations = bart.num_observations
+        likelihood_logp = self.likelihood_logp
+        output = np.zeros((bart.m, num_observations))
+        log_num_particles = np.log(self.num_particles)
+        for idx, tree in enumerate(bart.trees):
+            R_j = bart.get_residuals(tree)
+            bart.Y_shared.set_value(R_j)
+            list_of_particles = self.init_particles(tree.tree_id, R_j, bart.num_observations)
+            # Step 5 of algorithm
+            old_likelihoods = np.array(
+                [
+                    likelihood_logp(p.tree.predict_output(num_observations))
+                    for p in list_of_particles
+                ]
+            )
+            log_weights = np.array(old_likelihoods)
+
+            list_of_particles[0] = self.get_previous_tree_particle(tree.tree_id, 0)
+            log_weights[0] = likelihood_logp(
+                list_of_particles[0].tree.predict_output(num_observations)
+            )
+            old_likelihoods[0] = log_weights[0]
+            log_weights -= log_num_particles
+
+            for p_idx, p in enumerate(list_of_particles):
+                p.weight = log_weights[p_idx]
+
+            for t in range(1, self.max_stages + 2):
+                # Step 7 of algorithm
+                list_of_particles[0] = self.get_previous_tree_particle(tree.tree_id, t)
+                for c in range(
+                    1, self.num_particles
+                ):  # This should be embarrassingly parallelizable
+                    # Step 9 of algorithm
+                    list_of_particles[c].sample_tree_sequential(bart)
+                # for c in range(self.num_particles):
+                # list_of_particles[c].update_weight()
+
+                # Step 12 of algorithm
+                for p_idx, p in enumerate(list_of_particles):
+                    new_likelihood = likelihood_logp(p.tree.predict_output(num_observations))
+                    # p.weight is actually log_weight
+                    # print(new_likelihood - old_likelihoods[p_idx])
+                    p.weight += new_likelihood - old_likelihoods[p_idx]
+                    old_likelihoods[p_idx] = new_likelihood
+                    log_weights[p_idx] = p.weight
+
+                W, normalized_weights = self._normalize(log_weights)
+
+                # Step 15 of algorithm
+                # list_of_particles = self.resample(list_of_particles, normalized_weights)
+                # resample all but first particle
+                re_n_w = normalized_weights[1:] / normalized_weights[1:].sum()
+                indices = range(1, len(list_of_particles))
+                new_indices = np.random.choice(indices, size=len(list_of_particles) - 1, p=re_n_w)
+                list_of_particles[1:] = np.array(list_of_particles)[new_indices]
+                old_likelihoods[1:] = old_likelihoods[new_indices]
+
+                # Step 16 of algorithm
+                w_t = W - log_num_particles
+                for c in range(self.num_particles):
+                    list_of_particles[c].weight = w_t
+
+                #                ### XXX Revisar esto!!!!
+                #                # Step 17 of algorithm
+                #                non_available_nodes_for_expansion  = np.ones(self.num_particles)
+                #                for c in range(self.num_particles):
+                #                    if len(list_of_particles[c].expansion_nodes) != 0:
+                #                        non_available_nodes_for_expansion[c] = 0
+                #                if np.all(non_available_nodes_for_expansion):
+                #                    break
+                non_available_nodes_for_expansion = True
+                for c in range(self.num_particles):
+                    if len(list_of_particles[c].expansion_nodes) != 0:
+                        non_available_nodes_for_expansion = False
+                        break
+                if non_available_nodes_for_expansion:
+                    break
+
+            # print(idx, t, normalized_weights)
+            new_tree = np.random.choice(list_of_particles, p=normalized_weights)
+            self.previous_trees_particles_list[tree.tree_id] = new_tree
+            bart.trees[idx] = new_tree.tree
+            new_prediction = new_tree.tree.predict_output(num_observations)
+            output[idx] = new_prediction
+            bart.sum_trees_output = bart.Y - R_j + new_prediction
+
+        return np.sum(output, axis=0)
+
+    @staticmethod
+    def competence(var, has_grad):
+        """
+        PGBART is only suitable for BART distributions
+        """
+        if isinstance(var.distribution, BART):
+            return Competence.IDEAL
+        return Competence.INCOMPATIBLE
+
+    def _normalize(self, log_w):  # this function needs a home sweet home
+        """
+        use logsumexp trick to get W and softmax to get normalized_weights
+        """
+        log_w_max = log_w.max()
+        log_w_ = log_w - log_w_max
+        w_ = np.exp(log_w_)
+        w_sum = w_.sum()
+        W = log_w_max + np.log(w_sum)
+        normalized_weights = w_ / w_sum
+        # stabilize weights to avoid assigning exactly zero probability to a particle
+        normalized_weights += 1e-12
+
+        return W, normalized_weights
+
+    def get_previous_tree_particle(self, tree_id, t):
+        previous_tree_particle = self.previous_trees_particles_list[tree_id]
+        previous_tree_particle.set_particle_to_step(t)
+        return previous_tree_particle
+
+    def init_particles(self, tree_id, R_j, num_observations):
+        list_of_particles = []
+        initial_value_leaf_nodes = R_j.mean()
+        initial_idx_data_points_leaf_nodes = np.array(range(num_observations), dtype="int32")
+        new_tree = Tree.init_tree(
+            tree_id=tree_id,
+            leaf_node_value=initial_value_leaf_nodes,
+            idx_data_points=initial_idx_data_points_leaf_nodes,
+        )
+        for _ in range(self.num_particles):
+            new_particle = Particle(new_tree)
+            list_of_particles.append(new_particle)
+        return list_of_particles
+
+    def resample(self, list_of_particles, normalized_weights):
+        list_of_particles = np.random.choice(
+            list_of_particles, size=len(list_of_particles), p=normalized_weights
+        )
+        return list_of_particles
+
+
+class Particle:
+    def __init__(self, tree):
+        self.tree = tree.copy()  # Mantiene el arbol que nos interesa en este momento
+        self.expansion_nodes = self.tree.idx_leaf_nodes.copy()  # This should be the array [0]
+        self.tree_history = [self.tree.copy()]
+        self.expansion_nodes_history = [self.expansion_nodes.copy()]
+        self.weight = 0.0
+
+    def sample_tree_sequential(self, bart):
+        if self.expansion_nodes:
+            index_leaf_node = self.expansion_nodes.pop(0)
+            # Probability that this node will remain a leaf node
+            log_prob = self.tree[index_leaf_node].prior_log_probability_node(bart.alpha, bart.beta)
+
+            if np.exp(log_prob) < np.random.random():
+                self.grow_successful = bart.grow_tree(self.tree, index_leaf_node)
+                # TODO: in case the grow_tree fails, should we try to sample the tree from another leaf node?
+                if self.grow_successful:
+                    # Add new leaf nodes indexes
+                    new_indexes = self.tree.idx_leaf_nodes[-2:]
+                    self.expansion_nodes.extend(new_indexes)
+            self.tree_history.append(self.tree.copy())
+            self.expansion_nodes_history.append(self.expansion_nodes.copy())
+
+    def set_particle_to_step(self, t):
+        if len(self.tree_history) <= t:
+            self.tree = self.tree_history[-1]
+            self.expansion_nodes = self.expansion_nodes_history[-1]
+        else:
+            self.tree = self.tree_history[t]
+            self.expansion_nodes = self.expansion_nodes_history[t]
+
+    def init_weight(self):
+        # TODO
+        return 1.0
+
+    def update_weight(self):
+        # TODO
+        pass

From 9d4f73fc4aa9acd67896897032700f97bc4f8348 Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Sat, 25 Jan 2020 09:46:09 -0300
Subject: [PATCH 02/24] black

---
 pymc3/step_methods/hmc/nuts.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/step_methods/hmc/nuts.py b/pymc3/step_methods/hmc/nuts.py
index 5d3bcc52870..5810470f300 100644
--- a/pymc3/step_methods/hmc/nuts.py
+++ b/pymc3/step_methods/hmc/nuts.py
@@ -196,7 +196,7 @@ def _hamiltonian_step(self, start, p0, step_size):
     @staticmethod
     def competence(var, has_grad):
         """Check how appropriate this class is for sampling a random variable."""
-         if var.dtype in continuous_types and has_grad and not isinstance(var.distribution, BART):
+        if var.dtype in continuous_types and has_grad and not isinstance(var.distribution, BART):
             return Competence.IDEAL
         return Competence.INCOMPATIBLE
 

From beaf184d5663d02a1d6a312d2b32725392cb42c9 Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Mon, 27 Jan 2020 13:15:24 -0300
Subject: [PATCH 03/24] minor fix

---
 pymc3/distributions/bart.py  | 6 ++----
 pymc3/smc/smc.py             | 1 +
 pymc3/step_methods/pgbart.py | 2 +-
 3 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/pymc3/distributions/bart.py b/pymc3/distributions/bart.py
index b48e3d5b165..08f61bf1d3f 100644
--- a/pymc3/distributions/bart.py
+++ b/pymc3/distributions/bart.py
@@ -253,11 +253,9 @@ def draw_leaf_value(self, tree, idx_data_points):
         R_j = self.get_residuals(tree)
         node_responses = R_j[idx_data_points]
 
-        data_mean = (
-            node_responses.mean() / self.m
-        )  # for skewed distribution use median or sample from data-points?
+        data_mean = node_responses.mean() # for skewed distribution use median or sample from data-points?
         data_std_scaled = node_responses.std() / self.m
 
-        draw = data_mean  # (data_mean + self._normal_dist_sampler.sample()) * data_std_scaled
+        draw = (data_mean + self._normal_dist_sampler.sample() * data_std_scaled)
 
         return draw
diff --git a/pymc3/smc/smc.py b/pymc3/smc/smc.py
index d6069122952..69b94054da0 100644
--- a/pymc3/smc/smc.py
+++ b/pymc3/smc/smc.py
@@ -392,3 +392,4 @@ def __call__(self, posterior):
         if self.save_log_pseudolikelihood:
             self.save_lpl(elemwise)
         return elemwise.sum()
+
diff --git a/pymc3/step_methods/pgbart.py b/pymc3/step_methods/pgbart.py
index f72aedb5160..30d422003c9 100644
--- a/pymc3/step_methods/pgbart.py
+++ b/pymc3/step_methods/pgbart.py
@@ -5,7 +5,7 @@
 from ..distributions.tree import Tree
 from ..model import modelcontext
 from ..theanof import inputvars, make_shared_replacements, floatX
-from ..smc.smc_utils import logp_forw
+from ..smc.smc import logp_forw
 
 
 class PGBART(ArrayStepShared):

From f11a57f6e4aabe131c52f142af57c55089666300 Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Thu, 30 Jan 2020 13:08:12 -0300
Subject: [PATCH 04/24] clean code

---
 pymc3/step_methods/pgbart.py | 62 ++++++++++++++++++++----------------
 1 file changed, 35 insertions(+), 27 deletions(-)

diff --git a/pymc3/step_methods/pgbart.py b/pymc3/step_methods/pgbart.py
index 30d422003c9..2f60325c5f5 100644
--- a/pymc3/step_methods/pgbart.py
+++ b/pymc3/step_methods/pgbart.py
@@ -10,12 +10,26 @@
 
 class PGBART(ArrayStepShared):
     """
+    Write me!!!
+
+    References
+    ----------
+    .. [Lakshminarayanan2015] Lakshminarayanan, B. and Roy, D.M. and Teh, Y. W., (2015),
+        Particle Gibbs for Bayesian Additive Regression Trees.
+        ArviX,
+        `link <https://arxiv.org/abs/1502.04622>`__
+
+    Parameters
+    ----------
+    lower: float
+        Lower limit.
+    upper: float
+        Upper limit.
     """
-
     name = "bartsampler"
     default_blocked = False
 
-    def __init__(self, vars=None, m=2, num_particles=10, max_stages=5000, model=None):
+    def __init__(self, vars=None, num_particles=10, max_stages=5000, model=None):
         model = modelcontext(model)
         vars = inputvars(vars)
         self.bart = vars[0].distribution
@@ -59,32 +73,26 @@ def astep(self, q_0):
             log_weights -= log_num_particles
 
             for p_idx, p in enumerate(list_of_particles):
-                p.weight = log_weights[p_idx]
+                p.log_weight = log_weights[p_idx]
 
             for t in range(1, self.max_stages + 2):
                 # Step 7 of algorithm
                 list_of_particles[0] = self.get_previous_tree_particle(tree.tree_id, t)
-                for c in range(
-                    1, self.num_particles
-                ):  # This should be embarrassingly parallelizable
+                # This should be embarrassingly parallelizable
+                for c in range(1, self.num_particles):
                     # Step 9 of algorithm
                     list_of_particles[c].sample_tree_sequential(bart)
-                # for c in range(self.num_particles):
-                # list_of_particles[c].update_weight()
 
                 # Step 12 of algorithm
                 for p_idx, p in enumerate(list_of_particles):
                     new_likelihood = likelihood_logp(p.tree.predict_output(num_observations))
-                    # p.weight is actually log_weight
-                    # print(new_likelihood - old_likelihoods[p_idx])
-                    p.weight += new_likelihood - old_likelihoods[p_idx]
+                    p.log_weight += new_likelihood - old_likelihoods[p_idx]
                     old_likelihoods[p_idx] = new_likelihood
-                    log_weights[p_idx] = p.weight
+                    log_weights[p_idx] = p.log_weight
 
                 W, normalized_weights = self._normalize(log_weights)
 
                 # Step 15 of algorithm
-                # list_of_particles = self.resample(list_of_particles, normalized_weights)
                 # resample all but first particle
                 re_n_w = normalized_weights[1:] / normalized_weights[1:].sum()
                 indices = range(1, len(list_of_particles))
@@ -95,16 +103,17 @@ def astep(self, q_0):
                 # Step 16 of algorithm
                 w_t = W - log_num_particles
                 for c in range(self.num_particles):
-                    list_of_particles[c].weight = w_t
-
-                #                ### XXX Revisar esto!!!!
-                #                # Step 17 of algorithm
-                #                non_available_nodes_for_expansion  = np.ones(self.num_particles)
-                #                for c in range(self.num_particles):
-                #                    if len(list_of_particles[c].expansion_nodes) != 0:
-                #                        non_available_nodes_for_expansion[c] = 0
-                #                if np.all(non_available_nodes_for_expansion):
-                #                    break
+                    list_of_particles[c].log_weight = w_t
+
+                ### Should we used this one!!!
+                # Step 17 of algorithm
+                # non_available_nodes_for_expansion = np.ones(self.num_particles)
+                # for c in range(self.num_particles):
+                #     if len(list_of_particles[c].expansion_nodes) != 0:
+                #         non_available_nodes_for_expansion[c] = 0
+                # if np.all(non_available_nodes_for_expansion):
+                #     break
+                ### Or this one !!!
                 non_available_nodes_for_expansion = True
                 for c in range(self.num_particles):
                     if len(list_of_particles[c].expansion_nodes) != 0:
@@ -113,7 +122,6 @@ def astep(self, q_0):
                 if non_available_nodes_for_expansion:
                     break
 
-            # print(idx, t, normalized_weights)
             new_tree = np.random.choice(list_of_particles, p=normalized_weights)
             self.previous_trees_particles_list[tree.tree_id] = new_tree
             bart.trees[idx] = new_tree.tree
@@ -134,7 +142,7 @@ def competence(var, has_grad):
 
     def _normalize(self, log_w):  # this function needs a home sweet home
         """
-        use logsumexp trick to get W and softmax to get normalized_weights
+        Use logsumexp trick to get W and softmax to get normalized_weights
         """
         log_w_max = log_w.max()
         log_w_ = log_w - log_w_max
@@ -155,7 +163,7 @@ def get_previous_tree_particle(self, tree_id, t):
     def init_particles(self, tree_id, R_j, num_observations):
         list_of_particles = []
         initial_value_leaf_nodes = R_j.mean()
-        initial_idx_data_points_leaf_nodes = np.array(range(num_observations), dtype="int32")
+        initial_idx_data_points_leaf_nodes = np.arange(num_observations, dtype="int32")
         new_tree = Tree.init_tree(
             tree_id=tree_id,
             leaf_node_value=initial_value_leaf_nodes,
@@ -179,7 +187,7 @@ def __init__(self, tree):
         self.expansion_nodes = self.tree.idx_leaf_nodes.copy()  # This should be the array [0]
         self.tree_history = [self.tree.copy()]
         self.expansion_nodes_history = [self.expansion_nodes.copy()]
-        self.weight = 0.0
+        self.log_weight = 0.0
 
     def sample_tree_sequential(self, bart):
         if self.expansion_nodes:

From 43fed87f38710ff6a819912a9814314b5aeab7d6 Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Fri, 9 Oct 2020 08:39:47 -0300
Subject: [PATCH 05/24] blackify

---
 pymc3/distributions/bart.py    |  6 ++++--
 pymc3/distributions/tree.py    |  2 +-
 pymc3/model.py                 | 27 ++++++++++++++++++++++-----
 pymc3/sampling.py              | 20 +++++++++++++++++---
 pymc3/step_methods/hmc/nuts.py |  2 +-
 pymc3/step_methods/pgbart.py   |  1 +
 6 files changed, 46 insertions(+), 12 deletions(-)

diff --git a/pymc3/distributions/bart.py b/pymc3/distributions/bart.py
index 08f61bf1d3f..23f1a60997a 100644
--- a/pymc3/distributions/bart.py
+++ b/pymc3/distributions/bart.py
@@ -253,9 +253,11 @@ def draw_leaf_value(self, tree, idx_data_points):
         R_j = self.get_residuals(tree)
         node_responses = R_j[idx_data_points]
 
-        data_mean = node_responses.mean() # for skewed distribution use median or sample from data-points?
+        data_mean = (
+            node_responses.mean()
+        )  # for skewed distribution use median or sample from data-points?
         data_std_scaled = node_responses.std() / self.m
 
-        draw = (data_mean + self._normal_dist_sampler.sample() * data_std_scaled)
+        draw = data_mean + self._normal_dist_sampler.sample() * data_std_scaled
 
         return draw
diff --git a/pymc3/distributions/tree.py b/pymc3/distributions/tree.py
index 8cc6d188227..2ec0cebc738 100644
--- a/pymc3/distributions/tree.py
+++ b/pymc3/distributions/tree.py
@@ -12,7 +12,7 @@ class TreeNodeError(Exception):
 
 
 class Tree:
-    """ Full binary tree
+    """Full binary tree
     A full binary tree is a tree where each node has exactly zero or two children.
     This structure is used as the basic component of the Bayesian Additive Regression Tree (BART)
     Attributes
diff --git a/pymc3/model.py b/pymc3/model.py
index 0b9f2e810b5..5d881c172b6 100644
--- a/pymc3/model.py
+++ b/pymc3/model.py
@@ -1144,7 +1144,11 @@ def Var(self, name, dist, data=None, total_size=None, dims=None):
         elif isinstance(data, dict):
             with self:
                 var = MultiObservedRV(
-                    name=name, data=data, distribution=dist, total_size=total_size, model=self,
+                    name=name,
+                    data=data,
+                    distribution=dist,
+                    total_size=total_size,
+                    model=self,
                 )
             self.observed_RVs.append(var)
             if var.missing_values:
@@ -1155,7 +1159,11 @@ def Var(self, name, dist, data=None, total_size=None, dims=None):
         else:
             with self:
                 var = ObservedRV(
-                    name=name, data=data, distribution=dist, total_size=total_size, model=self,
+                    name=name,
+                    data=data,
+                    distribution=dist,
+                    total_size=total_size,
+                    model=self,
                 )
             self.observed_RVs.append(var)
             if var.missing_values:
@@ -1654,7 +1662,10 @@ def __init__(
             self.scaling = _get_scaling(total_size, self.shape, self.ndim)
 
             incorporate_methods(
-                source=distribution, destination=self, methods=["random"], wrapper=InstanceMethod,
+                source=distribution,
+                destination=self,
+                methods=["random"],
+                wrapper=InstanceMethod,
             )
 
     @property
@@ -1710,7 +1721,10 @@ def as_tensor(data, name, model, distribution):
 
         testval = np.broadcast_to(distribution.default(), data.shape)[data.mask]
         fakedist = NoDistribution.dist(
-            shape=data.mask.sum(), dtype=dtype, testval=testval, parent_dist=distribution,
+            shape=data.mask.sum(),
+            dtype=dtype,
+            testval=testval,
+            parent_dist=distribution,
         )
         missing_values = FreeRV(name=name + "_missing", distribution=fakedist, model=model)
         constant = tt.as_tensor_variable(data.filled())
@@ -1963,7 +1977,10 @@ def __init__(
             self.tag.test_value = normalRV.tag.test_value
             self.scaling = _get_scaling(total_size, self.shape, self.ndim)
             incorporate_methods(
-                source=distribution, destination=self, methods=["random"], wrapper=InstanceMethod,
+                source=distribution,
+                destination=self,
+                methods=["random"],
+                wrapper=InstanceMethod,
             )
 
     @property
diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 65126c28911..c819abaeb05 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -659,7 +659,14 @@ def _check_start_shape(model, start):
 
 
 def _sample_many(
-    draws, chain: int, chains: int, start: list, random_seed: list, step, callback=None, **kwargs,
+    draws,
+    chain: int,
+    chains: int,
+    start: list,
+    random_seed: list,
+    step,
+    callback=None,
+    **kwargs,
 ):
     """Samples all chains sequentially.
 
@@ -996,7 +1003,8 @@ def _iter_sample(
             if callback is not None:
                 warns = getattr(step, "warnings", None)
                 callback(
-                    trace=strace, draw=Draw(chain, i == draws, i, i < tune, stats, point, warns),
+                    trace=strace,
+                    draw=Draw(chain, i == draws, i, i < tune, stats, point, warns),
                 )
 
             yield strace, diverging
@@ -1949,7 +1957,13 @@ def sample_prior_predictive(
 
 
 def init_nuts(
-    init="auto", chains=1, n_init=500000, model=None, random_seed=None, progressbar=True, **kwargs,
+    init="auto",
+    chains=1,
+    n_init=500000,
+    model=None,
+    random_seed=None,
+    progressbar=True,
+    **kwargs,
 ):
     """Set up the mass matrix initialization for NUTS.
 
diff --git a/pymc3/step_methods/hmc/nuts.py b/pymc3/step_methods/hmc/nuts.py
index 5810470f300..7ffbf106d41 100644
--- a/pymc3/step_methods/hmc/nuts.py
+++ b/pymc3/step_methods/hmc/nuts.py
@@ -25,7 +25,7 @@
 from pymc3.vartypes import continuous_types
 from ...distributions import BART
 
-__all__ = ['NUTS']
+__all__ = ["NUTS"]
 
 
 class NUTS(BaseHMC):
diff --git a/pymc3/step_methods/pgbart.py b/pymc3/step_methods/pgbart.py
index 2f60325c5f5..c7088fcd64c 100644
--- a/pymc3/step_methods/pgbart.py
+++ b/pymc3/step_methods/pgbart.py
@@ -26,6 +26,7 @@ class PGBART(ArrayStepShared):
     upper: float
         Upper limit.
     """
+
     name = "bartsampler"
     default_blocked = False
 

From f847f6605c99d55937a7389f85b6b325f46b60a4 Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Fri, 9 Oct 2020 16:07:37 -0300
Subject: [PATCH 06/24] fix error residuals

---
 pymc3/distributions/bart.py  | 24 +++++++++++++-----------
 pymc3/distributions/tree.py  |  2 +-
 pymc3/step_methods/pgbart.py | 29 ++++++++++++++++++++++++++---
 3 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/pymc3/distributions/bart.py b/pymc3/distributions/bart.py
index 23f1a60997a..648cf401300 100644
--- a/pymc3/distributions/bart.py
+++ b/pymc3/distributions/bart.py
@@ -1,7 +1,7 @@
 import numpy as np
 from .distribution import NoDistribution
 from .tree import Tree, SplitNode, LeafNode
-from pymc3.util import get_variable_name
+from pymc3.util import get_var_name
 
 # from ..step_methods.bart.exceptions import (
 #    BARTParamsError,
@@ -89,7 +89,7 @@ def init_list_of_trees(self):
         # Taken from Section 3.1 of Kapelner, A and Bleich, J. bartMachine: A Powerful Tool for Machine Learning in R. ArXiv e-prints, 2013
         # The sum_trees_output will contain the sum of the predicted output for all trees.
         # When R_j is needed we subtract the current predicted output for tree T_j.
-        self.sum_trees_output = np.ones_like(self.Y) * self.Y.mean()
+        self.sum_trees_output = np.full_like(self.Y, self.Y.mean())
 
         return list_of_trees
 
@@ -190,10 +190,14 @@ def get_new_idx_data_points(self, current_split_node, idx_data_points):
 
         return left_node_idx_data_points, right_node_idx_data_points
 
-    def get_residuals(self, tree):
+    def get_residuals_loo(self, tree):
         R_j = self.Y - (self.sum_trees_output - tree.predict_output(self.num_observations))
         return R_j
 
+    def get_residuals(self):
+        R_j = self.Y - self.sum_trees_output
+        return R_j
+
     def draw_leaf_value(self, tree, idx_data_points):
         raise NotImplementedError
 
@@ -250,14 +254,12 @@ def _repr_latex_(self, name=None, dist=None):
         )
 
     def draw_leaf_value(self, tree, idx_data_points):
-        R_j = self.get_residuals(tree)
-        node_responses = R_j[idx_data_points]
-
-        data_mean = (
-            node_responses.mean()
-        )  # for skewed distribution use median or sample from data-points?
-        data_std_scaled = node_responses.std() / self.m
+        R_j = self.get_residuals()[idx_data_points]
 
-        draw = data_mean + self._normal_dist_sampler.sample() * data_std_scaled
+        # for skewed distribution use median or sample from data-points?
+        # data_mean = R_j.mean()
+        # data_std_scaled = R_j.std() / self.m
+        # draw = data_mean + self._normal_dist_sampler.sample() * data_std_scaled
+        draw = R_j.mean()
 
         return draw
diff --git a/pymc3/distributions/tree.py b/pymc3/distributions/tree.py
index 2ec0cebc738..b7bfaa8b186 100644
--- a/pymc3/distributions/tree.py
+++ b/pymc3/distributions/tree.py
@@ -487,7 +487,7 @@ def __eq__(self, other):
 
     def prior_log_probability_node(self, alpha, beta):
         """
-        Calculate the log probability of the node being a LeafNode.
+        Calculate the log probability of the node being a LeafNode (1 - p(being SplitNode)).
         Taken from equation 7 in [Chipman2010].
 
         Parameters
diff --git a/pymc3/step_methods/pgbart.py b/pymc3/step_methods/pgbart.py
index c7088fcd64c..879f217d476 100644
--- a/pymc3/step_methods/pgbart.py
+++ b/pymc3/step_methods/pgbart.py
@@ -5,7 +5,8 @@
 from ..distributions.tree import Tree
 from ..model import modelcontext
 from ..theanof import inputvars, make_shared_replacements, floatX
-from ..smc.smc import logp_forw
+
+# from ..smc.smc import logp_forw
 
 
 class PGBART(ArrayStepShared):
@@ -30,7 +31,7 @@ class PGBART(ArrayStepShared):
     name = "bartsampler"
     default_blocked = False
 
-    def __init__(self, vars=None, num_particles=10, max_stages=5000, model=None):
+    def __init__(self, vars=None, num_particles=10, max_stages=1, model=None):
         model = modelcontext(model)
         vars = inputvars(vars)
         self.bart = vars[0].distribution
@@ -54,7 +55,7 @@ def astep(self, q_0):
         output = np.zeros((bart.m, num_observations))
         log_num_particles = np.log(self.num_particles)
         for idx, tree in enumerate(bart.trees):
-            R_j = bart.get_residuals(tree)
+            R_j = bart.get_residuals_loo(tree)
             bart.Y_shared.set_value(R_j)
             list_of_particles = self.init_particles(tree.tree_id, R_j, bart.num_observations)
             # Step 5 of algorithm
@@ -221,3 +222,25 @@ def init_weight(self):
     def update_weight(self):
         # TODO
         pass
+
+
+from theano import function as theano_function
+from ..theanof import join_nonshared_inputs
+
+
+def logp_forw(out_vars, vars, shared):
+    """Compile Theano function of the model and the input and output variables.
+
+    Parameters
+    ----------
+    out_vars: List
+        containing :class:`pymc3.Distribution` for the output variables
+    vars: List
+        containing :class:`pymc3.Distribution` for the input variables
+    shared: List
+        containing :class:`theano.tensor.Tensor` for depended shared data
+    """
+    out_list, inarray0 = join_nonshared_inputs(out_vars, vars, shared)
+    f = theano_function([inarray0], out_list[0])
+    f.trust_input = True
+    return f

From 6700a74ca4a6567a58265a4e1b9d9a3f56487c8e Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Fri, 16 Oct 2020 16:02:42 -0300
Subject: [PATCH 07/24] use a low number of max_stages for the first iteration,
 remove not necessary errors

---
 pymc3/distributions/bart.py  |  5 ----
 pymc3/distributions/tree.py  | 57 ------------------------------------
 pymc3/step_methods/pgbart.py | 19 ++++++++++--
 3 files changed, 16 insertions(+), 65 deletions(-)

diff --git a/pymc3/distributions/bart.py b/pymc3/distributions/bart.py
index 648cf401300..48c6d25464f 100644
--- a/pymc3/distributions/bart.py
+++ b/pymc3/distributions/bart.py
@@ -1,11 +1,6 @@
 import numpy as np
 from .distribution import NoDistribution
 from .tree import Tree, SplitNode, LeafNode
-from pymc3.util import get_var_name
-
-# from ..step_methods.bart.exceptions import (
-#    BARTParamsError,
-# )
 
 # __all__ = ["BART"]
 
diff --git a/pymc3/distributions/tree.py b/pymc3/distributions/tree.py
index b7bfaa8b186..fd534b06cc8 100644
--- a/pymc3/distributions/tree.py
+++ b/pymc3/distributions/tree.py
@@ -3,14 +3,6 @@
 from copy import deepcopy
 
 
-class TreeStructureError(Exception):
-    """Base (catch-all) tree structure exception."""
-
-
-class TreeNodeError(Exception):
-    """Base (catch-all) tree node exception."""
-
-
 class Tree:
     """Full binary tree
     A full binary tree is a tree where each node has exactly zero or two children.
@@ -158,43 +150,16 @@ def copy(self):
         return deepcopy(self)
 
     def get_node(self, index):
-        if not isinstance(index, int) or index < 0:
-            raise TreeStructureError("Node index must be a non-negative int")
-        if index not in self.tree_structure:
-            raise TreeStructureError("Node missing at index {}".format(index))
         return self.tree_structure[index]
 
     def set_node(self, index, node):
-        if not isinstance(index, int) or index < 0:
-            raise TreeStructureError("Node index must be a non-negative int")
-        if not isinstance(node, SplitNode) and not isinstance(node, LeafNode):
-            raise TreeStructureError("Node class must be SplitNode or LeafNode")
-        if index in self.tree_structure:
-            raise TreeStructureError("Node index already exist in tree")
-        if self.num_nodes == 0 and index != 0:
-            raise TreeStructureError("Root node must have index zero")
-        parent_index = node.get_idx_parent_node()
-        if self.num_nodes != 0 and parent_index not in self.tree_structure:
-            raise TreeStructureError("Invalid index, node must have a parent node")
-        if self.num_nodes != 0 and not isinstance(self.get_node(parent_index), SplitNode):
-            raise TreeStructureError("Parent node must be of class SplitNode")
-        if index != node.index:
-            raise TreeStructureError("Node must have same index as tree index")
         self.tree_structure[index] = node
         self.num_nodes += 1
         if isinstance(node, LeafNode):
             self.idx_leaf_nodes.append(index)
 
     def delete_node(self, index):
-        if not isinstance(index, int) or index < 0:
-            raise TreeStructureError("Node index must be a non-negative int")
-        if index not in self.tree_structure:
-            raise TreeStructureError("Node missing at index {}".format(index))
         current_node = self.get_node(index)
-        left_child_idx = current_node.get_idx_left_child()
-        right_child_idx = current_node.get_idx_right_child()
-        if left_child_idx in self.tree_structure or right_child_idx in self.tree_structure:
-            raise TreeStructureError("Invalid removal of node, leaving two orphans nodes")
         if isinstance(current_node, LeafNode):
             self.idx_leaf_nodes.remove(index)
         del self.tree_structure[index]
@@ -314,12 +279,6 @@ def grow_tree(self, index_leaf_node, new_split_node, new_left_node, new_right_no
         new_right_node : LeafNode
         """
         current_node = self.get_node(index_leaf_node)
-        if not isinstance(current_node, LeafNode):
-            raise TreeStructureError("The tree grows from the leaves")
-        if not isinstance(new_split_node, SplitNode):
-            raise TreeStructureError("The node that replaces the leaf node must be SplitNode")
-        if not isinstance(new_left_node, LeafNode) or not isinstance(new_right_node, LeafNode):
-            raise TreeStructureError("The new leaves must be LeafNode")
 
         self.delete_node(index_leaf_node)
         self.set_node(index_leaf_node, new_split_node)
@@ -369,8 +328,6 @@ def get_tree_depth(self):
 
 class BaseNode:
     def __init__(self, index):
-        if not isinstance(index, int) or index < 0:
-            raise TreeNodeError("Node index must be a non-negative int")
         self.index = index
         self.depth = int(math.floor(math.log(index + 1, 2)))
 
@@ -397,11 +354,6 @@ class SplitNode(BaseNode):
     def __init__(self, index, idx_split_variable, split_value):
         super().__init__(index)
 
-        if not isinstance(idx_split_variable, int) or idx_split_variable < 0:
-            raise TreeNodeError("Index of split variable must be a non-negative int")
-        if not isinstance(split_value, float):
-            raise TreeNodeError("Node split value type must be float")
-
         self.idx_split_variable = idx_split_variable
         self.split_value = split_value
 
@@ -455,15 +407,6 @@ def prior_log_probability_node(self, alpha, beta):
 class LeafNode(BaseNode):
     def __init__(self, index, value, idx_data_points):
         super().__init__(index)
-        if not isinstance(value, float):
-            raise TreeNodeError("Leaf node value type must be float")
-        if (
-            not isinstance(idx_data_points, np.ndarray)
-            or idx_data_points.dtype.type is not np.int32
-        ):
-            raise TreeNodeError("Index of data points must be a numpy.ndarray of np.int32")
-        if len(idx_data_points) == 0:
-            raise TreeNodeError("Index of data points can not be empty")
         self.value = value
         self.idx_data_points = idx_data_points
 
diff --git a/pymc3/step_methods/pgbart.py b/pymc3/step_methods/pgbart.py
index 879f217d476..43784b28b8b 100644
--- a/pymc3/step_methods/pgbart.py
+++ b/pymc3/step_methods/pgbart.py
@@ -4,7 +4,7 @@
 from ..distributions import BART
 from ..distributions.tree import Tree
 from ..model import modelcontext
-from ..theanof import inputvars, make_shared_replacements, floatX
+from ..theanof import inputvars, make_shared_replacements
 
 # from ..smc.smc import logp_forw
 
@@ -31,13 +31,14 @@ class PGBART(ArrayStepShared):
     name = "bartsampler"
     default_blocked = False
 
-    def __init__(self, vars=None, num_particles=10, max_stages=1, model=None):
+    def __init__(self, vars=None, num_particles=10, max_stages=5000, model=None):
         model = modelcontext(model)
         vars = inputvars(vars)
         self.bart = vars[0].distribution
 
         self.num_particles = num_particles
         self.max_stages = max_stages
+        self.first_iteration = True
         self.previous_trees_particles_list = []
         for i in range(self.bart.m):
             p = Particle(self.bart.trees[i])
@@ -48,8 +49,18 @@ def __init__(self, vars=None, num_particles=10, max_stages=1, model=None):
         super().__init__(vars, shared)
 
     def astep(self, q_0):
+        # For the first iteration we restrict max_stages to a low number, otherwise it is almsot sure
+        # we will reach max_stages given that our fist set of m trees is not good at all.
+        # maybe we can set max_stages by using some function of the number of variables/dimensions.
+        if self.first_iteration:
+            max_stages = 5
+            self.iteration = False
+        else:
+            max_stages = self.max_stages
+
         # Step 4 of algorithm
         bart = self.bart
+
         num_observations = bart.num_observations
         likelihood_logp = self.likelihood_logp
         output = np.zeros((bart.m, num_observations))
@@ -57,6 +68,8 @@ def astep(self, q_0):
         for idx, tree in enumerate(bart.trees):
             R_j = bart.get_residuals_loo(tree)
             bart.Y_shared.set_value(R_j)
+            # generate an initial set of SMC particles
+            # At the end of the algorith we return one of these particles as a new tree
             list_of_particles = self.init_particles(tree.tree_id, R_j, bart.num_observations)
             # Step 5 of algorithm
             old_likelihoods = np.array(
@@ -77,7 +90,7 @@ def astep(self, q_0):
             for p_idx, p in enumerate(list_of_particles):
                 p.log_weight = log_weights[p_idx]
 
-            for t in range(1, self.max_stages + 2):
+            for t in range(1, max_stages):
                 # Step 7 of algorithm
                 list_of_particles[0] = self.get_previous_tree_particle(tree.tree_id, t)
                 # This should be embarrassingly parallelizable

From ac96b1a38004884090586c6e4aacc0cff11ed821 Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Fri, 16 Oct 2020 20:21:33 -0300
Subject: [PATCH 08/24] use Rockova prior, refactor prior leaf prob computaion

---
 pymc3/distributions/bart.py  | 25 ++++++--------------
 pymc3/distributions/tree.py  | 44 ------------------------------------
 pymc3/step_methods/pgbart.py | 43 +++++++++++++++++++++++++++++++----
 3 files changed, 45 insertions(+), 67 deletions(-)

diff --git a/pymc3/distributions/bart.py b/pymc3/distributions/bart.py
index 48c6d25464f..80aa5200963 100644
--- a/pymc3/distributions/bart.py
+++ b/pymc3/distributions/bart.py
@@ -10,7 +10,7 @@ class BARTParamsError(Exception):
 
 
 class BaseBART(NoDistribution):
-    def __init__(self, X, Y, m=200, alpha=0.95, beta=2.0, cache_size=5000, *args, **kwargs):
+    def __init__(self, X, Y, m=200, alpha=0.25, cache_size=5000, *args, **kwargs):
 
         self.Y_shared = Y
         self.X = X
@@ -45,26 +45,16 @@ def __init__(self, X, Y, m=200, alpha=0.95, beta=2.0, cache_size=5000, *args, **
             raise BARTParamsError(
                 "The type for the alpha parameter for the tree structure must be float"
             )
-        if alpha <= 0 or 1 <= alpha:
+        if alpha <= 0 or 0.5 <= alpha:
             raise BARTParamsError(
                 "The value for the alpha parameter for the tree structure "
-                "must be in the interval (0, 1)"
-            )
-        if not isinstance(beta, float):
-            raise BARTParamsError(
-                "The type for the beta parameter for the tree structure must be float"
-            )
-        if beta < 0:
-            raise BARTParamsError(
-                "The value for the beta parameter for the tree structure "
-                'must be in the interval [0, float("inf"))'
+                "must be in the interval (0, 0.5)"
             )
 
         self.num_observations = X.shape[0]
         self.number_variates = X.shape[1]
         self.m = m
         self.alpha = alpha
-        self.beta = beta
         self._normal_dist_sampler = NormalDistributionSampler(cache_size)
         self._disc_uniform_dist_sampler = DiscreteUniformDistributionSampler(cache_size)
         self.trees = self.init_list_of_trees()
@@ -231,8 +221,8 @@ def refresh_cache(self):
 
 
 class BART(BaseBART):
-    def __init__(self, X, Y, m=200, alpha=0.95, beta=2.0):
-        super().__init__(X, Y, m, alpha, beta)
+    def __init__(self, X, Y, m=200, alpha=0.25):
+        super().__init__(X, Y, m, alpha)
 
     def _repr_latex_(self, name=None, dist=None):
         if dist is None:
@@ -241,11 +231,10 @@ def _repr_latex_(self, name=None, dist=None):
         Y = (type(self.Y),)
         m = (self.m,)
         alpha = self.alpha
-        beta = self.beta
         m = self.m
         name = r"\text{%s}" % name
-        return r"""${} \sim \text{{BART}}(\mathit{{alpha}}={},~\mathit{{beta}}={},~\mathit{{m}}={})$""".format(
-            name, alpha, beta, m
+        return r"""${} \sim \text{{BART}}(\mathit{{alpha}}={},~\mathit{{m}}={})$""".format(
+            name, alpha, m
         )
 
     def draw_leaf_value(self, tree, idx_data_points):
diff --git a/pymc3/distributions/tree.py b/pymc3/distributions/tree.py
index fd534b06cc8..39fffcc7906 100644
--- a/pymc3/distributions/tree.py
+++ b/pymc3/distributions/tree.py
@@ -381,28 +381,6 @@ def evaluate_splitting_rule(self, x):
         else:
             return x[self.idx_split_variable] <= self.split_value
 
-    def prior_log_probability_node(self, alpha, beta):
-        """
-        Calculate the log probability of the node being a SplitNode.
-        Taken from equation 7 in [Chipman2010].
-
-        Parameters
-        ----------
-        alpha : float
-        beta : float
-
-        Returns
-        -------
-        float
-
-        References
-        ----------
-        .. [Chipman2010] Chipman, H. A., George, E. I., & McCulloch, R. E. (2010). BART: Bayesian
-            additive regression trees. The Annals of Applied Statistics, 4(1), 266-298.,
-            `link <https://projecteuclid.org/download/pdfview_1/euclid.aoas/1273584455>`__
-        """
-        return np.log(alpha * np.power(1.0 + self.depth, -beta))
-
 
 class LeafNode(BaseNode):
     def __init__(self, index, value, idx_data_points):
@@ -427,25 +405,3 @@ def __eq__(self, other):
             )
         else:
             return NotImplemented
-
-    def prior_log_probability_node(self, alpha, beta):
-        """
-        Calculate the log probability of the node being a LeafNode (1 - p(being SplitNode)).
-        Taken from equation 7 in [Chipman2010].
-
-        Parameters
-        ----------
-        alpha : float
-        beta : float
-
-        Returns
-        -------
-        float
-
-        References
-        ----------
-        .. [Chipman2010] Chipman, H. A., George, E. I., & McCulloch, R. E. (2010). BART: Bayesian
-            additive regression trees. The Annals of Applied Statistics, 4(1), 266-298.,
-            `link <https://projecteuclid.org/download/pdfview_1/euclid.aoas/1273584455>`__
-        """
-        return np.log(1.0 - alpha * np.power(1.0 + self.depth, -beta))
diff --git a/pymc3/step_methods/pgbart.py b/pymc3/step_methods/pgbart.py
index 43784b28b8b..fee09b6649c 100644
--- a/pymc3/step_methods/pgbart.py
+++ b/pymc3/step_methods/pgbart.py
@@ -35,13 +35,14 @@ def __init__(self, vars=None, num_particles=10, max_stages=5000, model=None):
         model = modelcontext(model)
         vars = inputvars(vars)
         self.bart = vars[0].distribution
+        self.prior_prob_leaf_node = _compute_prior_probability(self.bart.alpha)
 
         self.num_particles = num_particles
         self.max_stages = max_stages
         self.first_iteration = True
         self.previous_trees_particles_list = []
         for i in range(self.bart.m):
-            p = Particle(self.bart.trees[i])
+            p = Particle(self.bart.trees[i], self.prior_prob_leaf_node)
             self.previous_trees_particles_list.append(p)
 
         shared = make_shared_replacements(vars, model)
@@ -185,7 +186,7 @@ def init_particles(self, tree_id, R_j, num_observations):
             idx_data_points=initial_idx_data_points_leaf_nodes,
         )
         for _ in range(self.num_particles):
-            new_particle = Particle(new_tree)
+            new_particle = Particle(new_tree, self.prior_prob_leaf_node)
             list_of_particles.append(new_particle)
         return list_of_particles
 
@@ -197,20 +198,24 @@ def resample(self, list_of_particles, normalized_weights):
 
 
 class Particle:
-    def __init__(self, tree):
+    def __init__(self, tree, prior_prob_leaf_node):
         self.tree = tree.copy()  # Mantiene el arbol que nos interesa en este momento
         self.expansion_nodes = self.tree.idx_leaf_nodes.copy()  # This should be the array [0]
         self.tree_history = [self.tree.copy()]
         self.expansion_nodes_history = [self.expansion_nodes.copy()]
         self.log_weight = 0.0
+        self.prior_prob_leaf_node = prior_prob_leaf_node
 
     def sample_tree_sequential(self, bart):
         if self.expansion_nodes:
             index_leaf_node = self.expansion_nodes.pop(0)
             # Probability that this node will remain a leaf node
-            log_prob = self.tree[index_leaf_node].prior_log_probability_node(bart.alpha, bart.beta)
+            try:
+                prob_leaf = self.prior_prob_leaf_node[self.tree[index_leaf_node].depth]
+            except IndexError:
+                prob_leaf = 1
 
-            if np.exp(log_prob) < np.random.random():
+            if prob_leaf < np.random.random():
                 self.grow_successful = bart.grow_tree(self.tree, index_leaf_node)
                 # TODO: in case the grow_tree fails, should we try to sample the tree from another leaf node?
                 if self.grow_successful:
@@ -237,6 +242,34 @@ def update_weight(self):
         pass
 
 
+def _compute_prior_probability(alpha):
+    """
+    Calculate the probability of the node being a LeafNode (1 - p(being SplitNode)).
+    Taken from equation 19 in [On Theory for BART]. XXX FIX reference below!
+
+    Parameters
+    ----------
+    alpha : float
+
+    Returns
+    -------
+    float
+
+    References
+    ----------
+    .. [Chipman2010] Chipman, H. A., George, E. I., & McCulloch, R. E. (2010). BART: Bayesian
+        additive regression trees. The Annals of Applied Statistics, 4(1), 266-298.,
+        `link <https://projecteuclid.org/download/pdfview_1/euclid.aoas/1273584455>`__
+    """
+    prior_leaf_prob = [0]
+    depth = 1
+    prob = 1
+    while prob < 1:
+        prob = 1 - alpha ** depth
+        depth += 1
+    return prior_leaf_prob
+
+
 from theano import function as theano_function
 from ..theanof import join_nonshared_inputs
 

From 7d54bfa71985494b514c7ebcc9dd3149d45dd33b Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Mon, 19 Oct 2020 16:25:07 -0300
Subject: [PATCH 09/24] clean code add docstring

---
 pymc3/distributions/bart.py  | 42 ++++++++++++++++++++----------------
 pymc3/step_methods/pgbart.py | 14 ++++++------
 2 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/pymc3/distributions/bart.py b/pymc3/distributions/bart.py
index 80aa5200963..16c9a84301c 100644
--- a/pymc3/distributions/bart.py
+++ b/pymc3/distributions/bart.py
@@ -10,11 +10,12 @@ class BARTParamsError(Exception):
 
 
 class BaseBART(NoDistribution):
-    def __init__(self, X, Y, m=200, alpha=0.25, cache_size=5000, *args, **kwargs):
+    def __init__(self, X, Y, m=200, alpha=0.25, *args, **kwargs):
 
         self.Y_shared = Y
         self.X = X
         self.Y = Y.eval()
+        self.cache_size = 1000
         super().__init__(
             shape=X.shape[0], dtype="float64", testval=0, *args, **kwargs
         )  # FIXME dtype and testvalue are nonsensical
@@ -45,18 +46,17 @@ def __init__(self, X, Y, m=200, alpha=0.25, cache_size=5000, *args, **kwargs):
             raise BARTParamsError(
                 "The type for the alpha parameter for the tree structure must be float"
             )
-        if alpha <= 0 or 0.5 <= alpha:
+        if alpha <= 0 or 1 <= alpha:
             raise BARTParamsError(
                 "The value for the alpha parameter for the tree structure "
-                "must be in the interval (0, 0.5)"
+                "must be in the interval (0, 1)"
             )
 
         self.num_observations = X.shape[0]
         self.number_variates = X.shape[1]
         self.m = m
         self.alpha = alpha
-        self._normal_dist_sampler = NormalDistributionSampler(cache_size)
-        self._disc_uniform_dist_sampler = DiscreteUniformDistributionSampler(cache_size)
+        self._disc_uniform_dist_sampler = DiscreteUniformDistributionSampler(self.cache_size)
         self.trees = self.init_list_of_trees()
 
     def init_list_of_trees(self):
@@ -206,21 +206,25 @@ def refresh_cache(self):
         self._cache = list(np.random.random(size=self._cache_size))
 
 
-class NormalDistributionSampler:
-    def __init__(self, cache_size=1000):
-        self._cache_size = cache_size
-        self._cache = []
-
-    def sample(self):
-        if len(self._cache) == 0:
-            self.refresh_cache()
-        return self._cache.pop()
-
-    def refresh_cache(self):
-        self._cache = list(np.random.normal(size=self._cache_size))
-
-
 class BART(BaseBART):
+    """
+    BART distribution.
+
+    Distributon representing a sum over trees
+
+    Parameters
+    ----------
+    X :
+        The design matrix.
+    Y :
+        The response vector.
+    m : int
+        Number of trees
+    alpha : float
+        Control the prior probability over the depth of the trees. Must be in the interval (0, 1),
+        altought it is recomenned to be between in the interval (0, 0.5].
+    """
+
     def __init__(self, X, Y, m=200, alpha=0.25):
         super().__init__(X, Y, m, alpha)
 
diff --git a/pymc3/step_methods/pgbart.py b/pymc3/step_methods/pgbart.py
index fee09b6649c..e5d21a3dc39 100644
--- a/pymc3/step_methods/pgbart.py
+++ b/pymc3/step_methods/pgbart.py
@@ -210,10 +210,11 @@ def sample_tree_sequential(self, bart):
         if self.expansion_nodes:
             index_leaf_node = self.expansion_nodes.pop(0)
             # Probability that this node will remain a leaf node
-            try:
-                prob_leaf = self.prior_prob_leaf_node[self.tree[index_leaf_node].depth]
-            except IndexError:
-                prob_leaf = 1
+            # try:
+            # print(self.tree[index_leaf_node].depth)
+            prob_leaf = self.prior_prob_leaf_node[self.tree[index_leaf_node].depth]
+            # except IndexError:
+            #    prob_leaf = 1
 
             if prob_leaf < np.random.random():
                 self.grow_successful = bart.grow_tree(self.tree, index_leaf_node)
@@ -263,9 +264,8 @@ def _compute_prior_probability(alpha):
     """
     prior_leaf_prob = [0]
     depth = 1
-    prob = 1
-    while prob < 1:
-        prob = 1 - alpha ** depth
+    while prior_leaf_prob[-1] < 1:
+        prior_leaf_prob.append(1 - alpha ** depth)
         depth += 1
     return prior_leaf_prob
 

From b566d50dc5c702a81fb4d87bab51b3f1f4e1c143 Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Tue, 20 Oct 2020 18:44:34 -0300
Subject: [PATCH 10/24] reduce code

---
 pymc3/distributions/bart.py  |  77 +++--------
 pymc3/distributions/tree.py  | 242 +----------------------------------
 pymc3/step_methods/pgbart.py |  28 ++--
 3 files changed, 30 insertions(+), 317 deletions(-)

diff --git a/pymc3/distributions/bart.py b/pymc3/distributions/bart.py
index 16c9a84301c..40189afa51e 100644
--- a/pymc3/distributions/bart.py
+++ b/pymc3/distributions/bart.py
@@ -2,7 +2,7 @@
 from .distribution import NoDistribution
 from .tree import Tree, SplitNode, LeafNode
 
-# __all__ = ["BART"]
+__all__ = ["BART"]
 
 
 class BARTParamsError(Exception):
@@ -15,7 +15,6 @@ def __init__(self, X, Y, m=200, alpha=0.25, *args, **kwargs):
         self.Y_shared = Y
         self.X = X
         self.Y = Y.eval()
-        self.cache_size = 1000
         super().__init__(
             shape=X.shape[0], dtype="float64", testval=0, *args, **kwargs
         )  # FIXME dtype and testvalue are nonsensical
@@ -27,11 +26,11 @@ def __init__(self, X, Y, m=200, alpha=0.25, *args, **kwargs):
             )
         if self.X.ndim != 2:
             raise BARTParamsError("The design matrix X must have two dimensions")
-        if not isinstance(self.Y, np.ndarray) or self.Y.dtype.type is not np.float64:
-            raise BARTParamsError(
-                "The response matrix Y type must be numpy.ndarray where every item"
-                " type is numpy.float64"
-            )
+        # if not isinstance(self.Y, np.ndarray) or self.Y.dtype.type is not np.float64:
+        #    raise BARTParamsError(
+        #       "The response matrix Y type must be numpy.ndarray where every item"
+        #        " type is numpy.float64"
+        #    )
         if self.Y.ndim != 1:
             raise BARTParamsError("The response matrix Y must have one dimension")
         if self.X.shape[0] != self.Y.shape[0]:
@@ -42,10 +41,7 @@ def __init__(self, X, Y, m=200, alpha=0.25, *args, **kwargs):
             raise BARTParamsError("The number of trees m type must be int")
         if m < 1:
             raise BARTParamsError("The number of trees m must be greater than zero")
-        if not isinstance(alpha, float):
-            raise BARTParamsError(
-                "The type for the alpha parameter for the tree structure must be float"
-            )
+
         if alpha <= 0 or 1 <= alpha:
             raise BARTParamsError(
                 "The value for the alpha parameter for the tree structure "
@@ -56,7 +52,6 @@ def __init__(self, X, Y, m=200, alpha=0.25, *args, **kwargs):
         self.number_variates = X.shape[1]
         self.m = m
         self.alpha = alpha
-        self._disc_uniform_dist_sampler = DiscreteUniformDistributionSampler(self.cache_size)
         self.trees = self.init_list_of_trees()
 
     def init_list_of_trees(self):
@@ -84,18 +79,6 @@ def __iter__(self):
     def __repr_latex(self):
         raise NotImplementedError
 
-    def prediction_untransformed(self, x):
-        sum_of_trees = 0.0
-        for t in self.trees:
-            sum_of_trees += t.out_of_sample_predict(x=x)
-        return sum_of_trees
-
-    def sample_dist_splitting_variable(self, value):
-        return self._disc_uniform_dist_sampler.sample(0, value)
-
-    def sample_dist_splitting_rule_assignment(self, value):
-        return self._disc_uniform_dist_sampler.sample(0, value)
-
     def get_available_predictors(self, idx_data_points_split_node):
         possible_splitting_variables = []
         for j in range(self.number_variates):
@@ -125,15 +108,13 @@ def grow_tree(self, tree, index_leaf_node):
         if not available_predictors:
             return successful_grow_tree
 
-        index_selected_predictor = self.sample_dist_splitting_variable(len(available_predictors))
+        index_selected_predictor = discrete_uniform_sampler(len(available_predictors))
         selected_predictor = available_predictors[index_selected_predictor]
 
         available_splitting_rules, _ = self.get_available_splitting_rules(
             current_node.idx_data_points, selected_predictor
         )
-        index_selected_splitting_rule = self.sample_dist_splitting_rule_assignment(
-            len(available_splitting_rules)
-        )
+        index_selected_splitting_rule = discrete_uniform_sampler(len(available_splitting_rules))
         selected_splitting_rule = available_splitting_rules[index_selected_splitting_rule]
 
         new_split_node = SplitNode(
@@ -146,8 +127,8 @@ def grow_tree(self, tree, index_leaf_node):
             new_split_node, current_node.idx_data_points
         )
 
-        left_node_value = self.draw_leaf_value(tree, left_node_idx_data_points)
-        right_node_value = self.draw_leaf_value(tree, right_node_idx_data_points)
+        left_node_value = self.draw_leaf_value(left_node_idx_data_points)
+        right_node_value = self.draw_leaf_value(right_node_idx_data_points)
 
         new_left_node = LeafNode(
             index=current_node.get_idx_left_child(),
@@ -183,27 +164,16 @@ def get_residuals(self):
         R_j = self.Y - self.sum_trees_output
         return R_j
 
-    def draw_leaf_value(self, tree, idx_data_points):
-        raise NotImplementedError
-
-
-class DiscreteUniformDistributionSampler:
-    """
-    Draw samples from a discrete uniform distribution.
-    Samples are uniformly distributed over the half-open interval [low, high) (includes low, but excludes high).
-    """
+    def draw_leaf_value(self, idx_data_points):
+        # draw the residual mean
+        R_j = self.get_residuals()[idx_data_points]
+        draw = R_j.mean()
 
-    def __init__(self, cache_size=1000):
-        self._cache_size = cache_size
-        self._cache = []
+        return draw
 
-    def sample(self, lower_limit, upper_limit):
-        if len(self._cache) == 0:
-            self.refresh_cache()
-        return int(lower_limit + (upper_limit - lower_limit) * self._cache.pop())
 
-    def refresh_cache(self):
-        self._cache = list(np.random.random(size=self._cache_size))
+def discrete_uniform_sampler(upper_value):
+    return int(np.random.random() * upper_value)
 
 
 class BART(BaseBART):
@@ -240,14 +210,3 @@ def _repr_latex_(self, name=None, dist=None):
         return r"""${} \sim \text{{BART}}(\mathit{{alpha}}={},~\mathit{{m}}={})$""".format(
             name, alpha, m
         )
-
-    def draw_leaf_value(self, tree, idx_data_points):
-        R_j = self.get_residuals()[idx_data_points]
-
-        # for skewed distribution use median or sample from data-points?
-        # data_mean = R_j.mean()
-        # data_std_scaled = R_j.std() / self.m
-        # draw = data_mean + self._normal_dist_sampler.sample() * data_std_scaled
-        draw = R_j.mean()
-
-        return draw
diff --git a/pymc3/distributions/tree.py b/pymc3/distributions/tree.py
index 39fffcc7906..5b7d9f2a99f 100644
--- a/pymc3/distributions/tree.py
+++ b/pymc3/distributions/tree.py
@@ -42,110 +42,6 @@ def __getitem__(self, index):
     def __setitem__(self, index, node):
         self.set_node(index, node)
 
-    def __delitem__(self, index):
-        self.delete_node(index)
-
-    def __iter__(self):
-        return iter(self.tree_structure.values())
-
-    def __eq__(self, other):
-        # The idx_leaf_nodes and idx_prunable_split_nodes are transformed to sets to correctly check for equivalence
-        # in case the values are not ordered in the same way.
-        return (
-            self.tree_structure == other.tree_structure
-            and self.num_nodes == other.num_nodes
-            and set(self.idx_leaf_nodes) == set(other.idx_leaf_nodes)
-            and set(self.idx_prunable_split_nodes) == set(other.idx_prunable_split_nodes)
-            and self.tree_id == other.tree_id
-        )
-
-    def __hash__(self):
-        # Method added to create a set of trees.
-        return 0
-
-    def __len__(self):
-        return len(self.tree_structure)
-
-    def __repr__(self):
-        return "Tree(num_nodes={})".format(self.num_nodes)
-
-    def __str__(self):
-        lines = self._build_tree_string(index=0, show_index=False, delimiter="-")[0]
-        return "\n" + "\n".join((line.rstrip() for line in lines))
-
-    def _build_tree_string(self, index, show_index=False, delimiter="-"):
-        """Recursively walk down the binary tree and build a pretty-print string.
-
-        In each recursive call, a "box" of characters visually representing the
-        current (sub)tree is constructed line by line. Each line is padded with
-        whitespaces to ensure all lines in the box have the same length. Then the
-        box, its width, and start-end positions of its root node value repr string
-        (required for drawing branches) are sent up to the parent call. The parent
-        call then combines its left and right sub-boxes to build a larger box etc.
-        """
-        if index not in self.tree_structure:
-            return [], 0, 0, 0
-
-        line1 = []
-        line2 = []
-        current_node = self.get_node(index)
-        if show_index:
-            node_repr = "{}{}{}".format(index, delimiter, str(current_node))
-        else:
-            node_repr = str(current_node)
-
-        new_root_width = gap_size = len(node_repr)
-
-        left_child = current_node.get_idx_left_child()
-        right_child = current_node.get_idx_right_child()
-
-        # Get the left and right sub-boxes, their widths, and root repr positions
-        l_box, l_box_width, l_root_start, l_root_end = self._build_tree_string(
-            left_child, show_index, delimiter
-        )
-        r_box, r_box_width, r_root_start, r_root_end = self._build_tree_string(
-            right_child, show_index, delimiter
-        )
-
-        # Draw the branch connecting the current root node to the left sub-box
-        # Pad the line with whitespaces where necessary
-        if l_box_width > 0:
-            l_root = (l_root_start + l_root_end) // 2 + 1
-            line1.append(" " * (l_root + 1))
-            line1.append("_" * (l_box_width - l_root))
-            line2.append(" " * l_root + "/")
-            line2.append(" " * (l_box_width - l_root))
-            new_root_start = l_box_width + 1
-            gap_size += 1
-        else:
-            new_root_start = 0
-
-        # Draw the representation of the current root node
-        line1.append(node_repr)
-        line2.append(" " * new_root_width)
-
-        # Draw the branch connecting the current root node to the right sub-box
-        # Pad the line with whitespaces where necessary
-        if r_box_width > 0:
-            r_root = (r_root_start + r_root_end) // 2
-            line1.append("_" * r_root)
-            line1.append(" " * (r_box_width - r_root + 1))
-            line2.append(" " * r_root + "\\")
-            line2.append(" " * (r_box_width - r_root))
-            gap_size += 1
-        new_root_end = new_root_start + new_root_width - 1
-
-        # Combine the left and right sub-boxes with the branches drawn above
-        gap = " " * gap_size
-        new_box = ["".join(line1), "".join(line2)]
-        for i in range(max(len(l_box), len(r_box))):
-            l_line = l_box[i] if i < len(l_box) else " " * l_box_width
-            r_line = r_box[i] if i < len(r_box) else " " * r_box_width
-            new_box.append(l_line + gap + r_line)
-
-        # Return the new box, its width and its root repr positions
-        return new_box, len(new_box[0]), new_root_start, new_root_end
-
     def copy(self):
         return deepcopy(self)
 
@@ -165,60 +61,6 @@ def delete_node(self, index):
         del self.tree_structure[index]
         self.num_nodes -= 1
 
-    def make_digraph(self, name="Tree"):
-        """Make graphviz Digraph of the tree.
-
-        Parameters
-        ----------
-        name : str
-            Name used for the Digraph. Useful to differentiate digraph of trees.
-
-        Returns
-        -------
-        graphviz.Digraph
-        """
-        try:
-            import graphviz
-        except ImportError:
-            raise ImportError(
-                "This function requires the python library graphviz, along with binaries. "
-                "The easiest way to install all of this is by running\n\n"
-                "\tconda install -c conda-forge python-graphviz"
-            )
-        graph = graphviz.Digraph(name)
-        graph = self._digraph_tree_traversal(0, graph)
-        return graph
-
-    def _digraph_tree_traversal(self, index, graph):
-        if index not in self.tree_structure.keys():
-            return graph
-        current_node = self.get_node(index)
-        style = ""
-        if isinstance(current_node, SplitNode):
-            shape = "box"
-            if index in self.idx_prunable_split_nodes:
-                style = "filled"
-        else:
-            shape = "ellipse"
-        node_name = "{}_{}".format(graph.name, index)
-        graph.node(name=node_name, label=str(current_node), shape=shape, style=style)
-
-        parent_index = current_node.get_idx_parent_node()
-        if parent_index in self.tree_structure:
-            tail_name = "{}_{}".format(graph.name, parent_index)
-            head_name = "{}_{}".format(graph.name, index)
-            if current_node.is_left_child():
-                graph.edge(tail_name=tail_name, head_name=head_name, label="T")
-            else:
-                graph.edge(tail_name=tail_name, head_name=head_name, label="F")
-
-        left_child = current_node.get_idx_left_child()
-        right_child = current_node.get_idx_right_child()
-        graph = self._digraph_tree_traversal(left_child, graph)
-        graph = self._digraph_tree_traversal(right_child, graph)
-
-        return graph
-
     def predict_output(self, num_observations):
         output = np.zeros(num_observations)
         for node_index in self.idx_leaf_nodes:
@@ -226,22 +68,6 @@ def predict_output(self, num_observations):
             output[current_node.idx_data_points] = current_node.value
         return output
 
-    def out_of_sample_predict(self, x):
-        """
-        Predict output of tree for an unobserved point.
-
-        Parameters
-        ----------
-        x : np.ndarray
-
-        Returns
-        -------
-        float
-            Value of the leaf value where the unobserved point lies.
-        """
-        leaf_node = self._traverse_tree(x=x, node_index=0)
-        return leaf_node.value
-
     def _traverse_tree(self, x, node_index=0):
         """
         Traverse the tree starting from a particular node given an unobserved point.
@@ -257,7 +83,8 @@ def _traverse_tree(self, x, node_index=0):
         """
         current_node = self.get_node(node_index)
         if isinstance(current_node, SplitNode):
-            if current_node.evaluate_splitting_rule(x):
+            # if current_node.evaluate_splitting_rule(x):
+            if x is not np.NaN:
                 left_child = current_node.get_idx_left_child()
                 final_node = self._traverse_tree(x, left_child)
             else:
@@ -311,29 +138,12 @@ def init_tree(tree_id, leaf_node_value, idx_data_points):
         new_tree[0] = LeafNode(index=0, value=leaf_node_value, idx_data_points=idx_data_points)
         return new_tree
 
-    def get_tree_depth(self):
-        """
-
-        Returns
-        -------
-
-        """
-        max_depth = 0
-        for idx_leaf_node in self.idx_leaf_nodes:
-            leaf_node = self.get_node(idx_leaf_node)
-            if max_depth < leaf_node.depth:
-                max_depth = leaf_node.depth
-        return max_depth
-
 
 class BaseNode:
     def __init__(self, index):
         self.index = index
         self.depth = int(math.floor(math.log(index + 1, 2)))
 
-    def __eq__(self, other):
-        return self.index == other.index and self.depth == other.depth
-
     def get_idx_parent_node(self):
         return (self.index - 1) // 2
 
@@ -343,12 +153,6 @@ def get_idx_left_child(self):
     def get_idx_right_child(self):
         return self.get_idx_left_child() + 1
 
-    def is_left_child(self):
-        return bool(self.index % 2)
-
-    def get_idx_sibling(self):
-        return (self.index + 1) if self.is_left_child() else (self.index - 1)
-
 
 class SplitNode(BaseNode):
     def __init__(self, index, idx_split_variable, split_value):
@@ -357,51 +161,9 @@ def __init__(self, index, idx_split_variable, split_value):
         self.idx_split_variable = idx_split_variable
         self.split_value = split_value
 
-    def __repr__(self):
-        return "SplitNode(index={}, idx_split_variable={}, split_value={})".format(
-            self.index, self.idx_split_variable, self.split_value
-        )
-
-    def __str__(self):
-        return "x[{}] <= {}".format(self.idx_split_variable, self.split_value)
-
-    def __eq__(self, other):
-        if isinstance(other, SplitNode):
-            return (
-                super().__eq__(other)
-                and self.idx_split_variable == other.idx_split_variable
-                and self.split_value == other.split_value
-            )
-        else:
-            return NotImplemented
-
-    def evaluate_splitting_rule(self, x):
-        if x is np.NaN:
-            return False
-        else:
-            return x[self.idx_split_variable] <= self.split_value
-
 
 class LeafNode(BaseNode):
     def __init__(self, index, value, idx_data_points):
         super().__init__(index)
         self.value = value
         self.idx_data_points = idx_data_points
-
-    def __repr__(self):
-        return "LeafNode(index={}, value={}, len(idx_data_points)={})".format(
-            self.index, self.value, len(self.idx_data_points)
-        )
-
-    def __str__(self):
-        return "{}".format(self.value)
-
-    def __eq__(self, other):
-        if isinstance(other, LeafNode):
-            return (
-                super().__eq__(other)
-                and self.value == other.value
-                and np.array_equal(self.idx_data_points, other.idx_data_points)
-            )
-        else:
-            return NotImplemented
diff --git a/pymc3/step_methods/pgbart.py b/pymc3/step_methods/pgbart.py
index e5d21a3dc39..4576807c0a7 100644
--- a/pymc3/step_methods/pgbart.py
+++ b/pymc3/step_methods/pgbart.py
@@ -199,10 +199,12 @@ def resample(self, list_of_particles, normalized_weights):
 
 class Particle:
     def __init__(self, tree, prior_prob_leaf_node):
-        self.tree = tree.copy()  # Mantiene el arbol que nos interesa en este momento
-        self.expansion_nodes = self.tree.idx_leaf_nodes.copy()  # This should be the array [0]
-        self.tree_history = [self.tree.copy()]
-        self.expansion_nodes_history = [self.expansion_nodes.copy()]
+        self.tree = tree.copy()  # keeps the tree that we care at the moment
+        self.expansion_nodes = tree.idx_leaf_nodes.copy()  # This should be the array [0]
+        # self.tree_history = [self.tree.copy()]
+        # self.expansion_nodes_history = [self.expansion_nodes.copy()]
+        self.tree_history = [self.tree]
+        self.expansion_nodes_history = [self.expansion_nodes]
         self.log_weight = 0.0
         self.prior_prob_leaf_node = prior_prob_leaf_node
 
@@ -210,11 +212,7 @@ def sample_tree_sequential(self, bart):
         if self.expansion_nodes:
             index_leaf_node = self.expansion_nodes.pop(0)
             # Probability that this node will remain a leaf node
-            # try:
-            # print(self.tree[index_leaf_node].depth)
             prob_leaf = self.prior_prob_leaf_node[self.tree[index_leaf_node].depth]
-            # except IndexError:
-            #    prob_leaf = 1
 
             if prob_leaf < np.random.random():
                 self.grow_successful = bart.grow_tree(self.tree, index_leaf_node)
@@ -223,8 +221,10 @@ def sample_tree_sequential(self, bart):
                     # Add new leaf nodes indexes
                     new_indexes = self.tree.idx_leaf_nodes[-2:]
                     self.expansion_nodes.extend(new_indexes)
-            self.tree_history.append(self.tree.copy())
-            self.expansion_nodes_history.append(self.expansion_nodes.copy())
+            # self.tree_history.append(self.tree.copy())
+            # self.expansion_nodes_history.append(self.expansion_nodes.copy())
+            self.tree_history.append(self.tree)
+            self.expansion_nodes_history.append(self.expansion_nodes)
 
     def set_particle_to_step(self, t):
         if len(self.tree_history) <= t:
@@ -234,14 +234,6 @@ def set_particle_to_step(self, t):
             self.tree = self.tree_history[t]
             self.expansion_nodes = self.expansion_nodes_history[t]
 
-    def init_weight(self):
-        # TODO
-        return 1.0
-
-    def update_weight(self):
-        # TODO
-        pass
-
 
 def _compute_prior_probability(alpha):
     """

From 0ff5833ddfb9d67b81d1892e52aa1ea8bacb4bac Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Wed, 21 Oct 2020 02:35:01 -0300
Subject: [PATCH 11/24] speed-up by fitting a subset of trees per step

---
 pymc3/distributions/bart.py  |  5 +----
 pymc3/step_methods/pgbart.py | 31 ++++++++++++++++++-------------
 2 files changed, 19 insertions(+), 17 deletions(-)

diff --git a/pymc3/distributions/bart.py b/pymc3/distributions/bart.py
index 40189afa51e..49232b6146a 100644
--- a/pymc3/distributions/bart.py
+++ b/pymc3/distributions/bart.py
@@ -11,10 +11,8 @@ class BARTParamsError(Exception):
 
 class BaseBART(NoDistribution):
     def __init__(self, X, Y, m=200, alpha=0.25, *args, **kwargs):
-
-        self.Y_shared = Y
         self.X = X
-        self.Y = Y.eval()
+        self.Y = Y
         super().__init__(
             shape=X.shape[0], dtype="float64", testval=0, *args, **kwargs
         )  # FIXME dtype and testvalue are nonsensical
@@ -168,7 +166,6 @@ def draw_leaf_value(self, idx_data_points):
         # draw the residual mean
         R_j = self.get_residuals()[idx_data_points]
         draw = R_j.mean()
-
         return draw
 
 
diff --git a/pymc3/step_methods/pgbart.py b/pymc3/step_methods/pgbart.py
index 4576807c0a7..a664bf72814 100644
--- a/pymc3/step_methods/pgbart.py
+++ b/pymc3/step_methods/pgbart.py
@@ -6,8 +6,6 @@
 from ..model import modelcontext
 from ..theanof import inputvars, make_shared_replacements
 
-# from ..smc.smc import logp_forw
-
 
 class PGBART(ArrayStepShared):
     """
@@ -31,15 +29,18 @@ class PGBART(ArrayStepShared):
     name = "bartsampler"
     default_blocked = False
 
-    def __init__(self, vars=None, num_particles=10, max_stages=5000, model=None):
+    def __init__(self, vars=None, num_particles=10, max_stages=100, chunk="auto", model=None):
         model = modelcontext(model)
         vars = inputvars(vars)
         self.bart = vars[0].distribution
         self.prior_prob_leaf_node = _compute_prior_probability(self.bart.alpha)
 
+        self.tune = True
+        self.idx = 0
+        if chunk == "auto":
+            self.chunk = min(1, int(self.bart.m * 0.10))
         self.num_particles = num_particles
         self.max_stages = max_stages
-        self.first_iteration = True
         self.previous_trees_particles_list = []
         for i in range(self.bart.m):
             p = Particle(self.bart.trees[i], self.prior_prob_leaf_node)
@@ -53,22 +54,27 @@ def astep(self, q_0):
         # For the first iteration we restrict max_stages to a low number, otherwise it is almsot sure
         # we will reach max_stages given that our fist set of m trees is not good at all.
         # maybe we can set max_stages by using some function of the number of variables/dimensions.
-        if self.first_iteration:
+        bart = self.bart
+
+        if self.tune:
             max_stages = 5
-            self.iteration = False
         else:
             max_stages = self.max_stages
 
-        # Step 4 of algorithm
-        bart = self.bart
+        if self.idx == bart.m:
+            self.idx = 0
 
+        # Step 4 of algorithm
         num_observations = bart.num_observations
         likelihood_logp = self.likelihood_logp
-        output = np.zeros((bart.m, num_observations))
         log_num_particles = np.log(self.num_particles)
-        for idx, tree in enumerate(bart.trees):
+
+        for idx in range(self.idx, self.idx + self.chunk):
+            if idx > bart.m:
+                break
+            self.idx += 1
+            tree = bart.trees[idx]
             R_j = bart.get_residuals_loo(tree)
-            bart.Y_shared.set_value(R_j)
             # generate an initial set of SMC particles
             # At the end of the algorith we return one of these particles as a new tree
             list_of_particles = self.init_particles(tree.tree_id, R_j, bart.num_observations)
@@ -142,10 +148,9 @@ def astep(self, q_0):
             self.previous_trees_particles_list[tree.tree_id] = new_tree
             bart.trees[idx] = new_tree.tree
             new_prediction = new_tree.tree.predict_output(num_observations)
-            output[idx] = new_prediction
             bart.sum_trees_output = bart.Y - R_j + new_prediction
 
-        return np.sum(output, axis=0)
+        return bart.sum_trees_output
 
     @staticmethod
     def competence(var, has_grad):

From 3419e70d4da871d540daf6048e94655e12cd3cc4 Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Wed, 21 Oct 2020 03:13:34 -0300
Subject: [PATCH 12/24] choose max

---
 pymc3/step_methods/pgbart.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pymc3/step_methods/pgbart.py b/pymc3/step_methods/pgbart.py
index a664bf72814..8f19a849e81 100644
--- a/pymc3/step_methods/pgbart.py
+++ b/pymc3/step_methods/pgbart.py
@@ -38,7 +38,7 @@ def __init__(self, vars=None, num_particles=10, max_stages=100, chunk="auto", mo
         self.tune = True
         self.idx = 0
         if chunk == "auto":
-            self.chunk = min(1, int(self.bart.m * 0.10))
+            self.chunk = max(1, int(self.bart.m * 0.1))
         self.num_particles = num_particles
         self.max_stages = max_stages
         self.previous_trees_particles_list = []

From 51165d4b94d75908f2efcfea3be5bba5379fbd53 Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Wed, 21 Oct 2020 13:41:30 -0300
Subject: [PATCH 13/24] improve docstrings

---
 pymc3/distributions/bart.py  | 60 ++++++++++++++++++++----------------
 pymc3/step_methods/pgbart.py | 25 +++++++++------
 2 files changed, 49 insertions(+), 36 deletions(-)

diff --git a/pymc3/distributions/bart.py b/pymc3/distributions/bart.py
index 49232b6146a..d87cbf1511b 100644
--- a/pymc3/distributions/bart.py
+++ b/pymc3/distributions/bart.py
@@ -4,31 +4,17 @@
 
 __all__ = ["BART"]
 
-
-class BARTParamsError(Exception):
-    """Base (catch-all) BART hyper parameters exception."""
-
-
 class BaseBART(NoDistribution):
     def __init__(self, X, Y, m=200, alpha=0.25, *args, **kwargs):
         self.X = X
         self.Y = Y
         super().__init__(
             shape=X.shape[0], dtype="float64", testval=0, *args, **kwargs
-        )  # FIXME dtype and testvalue are nonsensical
+        )
 
-        if not isinstance(self.X, np.ndarray) or self.X.dtype.type is not np.float64:
-            raise BARTParamsError(
-                "The design matrix X type must be numpy.ndarray where every item"
-                " type is numpy.float64"
-            )
         if self.X.ndim != 2:
             raise BARTParamsError("The design matrix X must have two dimensions")
-        # if not isinstance(self.Y, np.ndarray) or self.Y.dtype.type is not np.float64:
-        #    raise BARTParamsError(
-        #       "The response matrix Y type must be numpy.ndarray where every item"
-        #        " type is numpy.float64"
-        #    )
+
         if self.Y.ndim != 1:
             raise BARTParamsError("The response matrix Y must have one dimension")
         if self.X.shape[0] != self.Y.shape[0]:
@@ -51,6 +37,7 @@ def __init__(self, X, Y, m=200, alpha=0.25, *args, **kwargs):
         self.m = m
         self.alpha = alpha
         self.trees = self.init_list_of_trees()
+        self.mean = fast_mean()
 
     def init_list_of_trees(self):
         initial_value_leaf_nodes = self.Y.mean() / self.m
@@ -63,8 +50,8 @@ def init_list_of_trees(self):
                 idx_data_points=initial_idx_data_points_leaf_nodes,
             )
             list_of_trees.append(new_tree)
-        # Diff trick to speed computation of residuals.
-        # Taken from Section 3.1 of Kapelner, A and Bleich, J. bartMachine: A Powerful Tool for Machine Learning in R. ArXiv e-prints, 2013
+        # Diff trick to speed computation of residuals. From Section 3.1 of Kapelner, A and Bleich, J. 
+        # bartMachine: A Powerful Tool for Machine Learning in R. ArXiv e-prints, 2013
         # The sum_trees_output will contain the sum of the predicted output for all trees.
         # When R_j is needed we subtract the current predicted output for tree T_j.
         self.sum_trees_output = np.full_like(self.Y, self.Y.mean())
@@ -154,22 +141,44 @@ def get_new_idx_data_points(self, current_split_node, idx_data_points):
 
         return left_node_idx_data_points, right_node_idx_data_points
 
-    def get_residuals_loo(self, tree):
-        R_j = self.Y - (self.sum_trees_output - tree.predict_output(self.num_observations))
-        return R_j
 
     def get_residuals(self):
+        """Compute the residuals."""
         R_j = self.Y - self.sum_trees_output
         return R_j
 
+    def get_residuals_loo(self, tree):
+        """Compute the residuals without leaving the passed tree out."""
+        R_j = self.Y - (self.sum_trees_output - tree.predict_output(self.num_observations))
+        return R_j
+
     def draw_leaf_value(self, idx_data_points):
-        # draw the residual mean
+        """ Draw the residual mean."""
         R_j = self.get_residuals()[idx_data_points]
-        draw = R_j.mean()
+        draw = self.mean(R_j)
         return draw
 
 
+def fast_mean():
+    """If available use Numba to speed up the computation of the mean."""
+    try:
+        from numba import jit
+    except ImportError:
+        return np.mean
+
+    @jit
+    def mean(a):
+        count = a.shape[0]
+        suma = 0
+        for i in range(count):
+            suma += a[i]
+        return suma / count
+
+    return mean
+
+
 def discrete_uniform_sampler(upper_value):
+    """Draw from the uniform distribuion with bounds [0, upper_value)."""
     return int(np.random.random() * upper_value)
 
 
@@ -177,7 +186,7 @@ class BART(BaseBART):
     """
     BART distribution.
 
-    Distributon representing a sum over trees
+    Distribution representing a sum over trees
 
     Parameters
     ----------
@@ -189,7 +198,7 @@ class BART(BaseBART):
         Number of trees
     alpha : float
         Control the prior probability over the depth of the trees. Must be in the interval (0, 1),
-        altought it is recomenned to be between in the interval (0, 0.5].
+        altought it is recomenned to be in the interval (0, 0.5].
     """
 
     def __init__(self, X, Y, m=200, alpha=0.25):
@@ -200,7 +209,6 @@ def _repr_latex_(self, name=None, dist=None):
             dist = self
         X = (type(self.X),)
         Y = (type(self.Y),)
-        m = (self.m,)
         alpha = self.alpha
         m = self.m
         name = r"\text{%s}" % name
diff --git a/pymc3/step_methods/pgbart.py b/pymc3/step_methods/pgbart.py
index 8f19a849e81..5edbf6b3044 100644
--- a/pymc3/step_methods/pgbart.py
+++ b/pymc3/step_methods/pgbart.py
@@ -9,21 +9,26 @@
 
 class PGBART(ArrayStepShared):
     """
-    Write me!!!
+    Particle Gibss BART sampling step
+
+    Parameters
+    ----------)
+    vars: list
+        List of variables for sampler
+    num_particles : int
+        Number of particles for the SMC sampler. Defaults to 10
+    max_stages : int
+        Maximum number of Iterations of the SMC sampler Defaults to 100.
+    chunk = int
+        Number of trees fitted per step. Defaults to  "auto", which is the 10% of the `m` trees.
+    model: PyMC Model
+        Optional model for sampling step. Defaults to None (taken from context).
 
     References
     ----------
     .. [Lakshminarayanan2015] Lakshminarayanan, B. and Roy, D.M. and Teh, Y. W., (2015),
         Particle Gibbs for Bayesian Additive Regression Trees.
-        ArviX,
-        `link <https://arxiv.org/abs/1502.04622>`__
-
-    Parameters
-    ----------
-    lower: float
-        Lower limit.
-    upper: float
-        Upper limit.
+        ArviX, `link <https://arxiv.org/abs/1502.04622>`__
     """
 
     name = "bartsampler"

From 03758a4758f7065adc524b56f57ffc1f20d1b3c1 Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Thu, 22 Oct 2020 15:27:51 -0300
Subject: [PATCH 14/24] refactor and clean code

---
 pymc3/distributions/bart.py  |  35 ++++++-
 pymc3/step_methods/pgbart.py | 197 +++++++++++++----------------------
 2 files changed, 103 insertions(+), 129 deletions(-)

diff --git a/pymc3/distributions/bart.py b/pymc3/distributions/bart.py
index d87cbf1511b..e95753f6faf 100644
--- a/pymc3/distributions/bart.py
+++ b/pymc3/distributions/bart.py
@@ -4,13 +4,12 @@
 
 __all__ = ["BART"]
 
+
 class BaseBART(NoDistribution):
     def __init__(self, X, Y, m=200, alpha=0.25, *args, **kwargs):
         self.X = X
         self.Y = Y
-        super().__init__(
-            shape=X.shape[0], dtype="float64", testval=0, *args, **kwargs
-        )
+        super().__init__(shape=X.shape[0], dtype="float64", testval=0, *args, **kwargs)
 
         if self.X.ndim != 2:
             raise BARTParamsError("The design matrix X must have two dimensions")
@@ -38,6 +37,7 @@ def __init__(self, X, Y, m=200, alpha=0.25, *args, **kwargs):
         self.alpha = alpha
         self.trees = self.init_list_of_trees()
         self.mean = fast_mean()
+        self.prior_prob_leaf_node = compute_prior_probability(alpha)
 
     def init_list_of_trees(self):
         initial_value_leaf_nodes = self.Y.mean() / self.m
@@ -50,7 +50,7 @@ def init_list_of_trees(self):
                 idx_data_points=initial_idx_data_points_leaf_nodes,
             )
             list_of_trees.append(new_tree)
-        # Diff trick to speed computation of residuals. From Section 3.1 of Kapelner, A and Bleich, J. 
+        # Diff trick to speed computation of residuals. From Section 3.1 of Kapelner, A and Bleich, J.
         # bartMachine: A Powerful Tool for Machine Learning in R. ArXiv e-prints, 2013
         # The sum_trees_output will contain the sum of the predicted output for all trees.
         # When R_j is needed we subtract the current predicted output for tree T_j.
@@ -141,7 +141,6 @@ def get_new_idx_data_points(self, current_split_node, idx_data_points):
 
         return left_node_idx_data_points, right_node_idx_data_points
 
-
     def get_residuals(self):
         """Compute the residuals."""
         R_j = self.Y - self.sum_trees_output
@@ -159,6 +158,32 @@ def draw_leaf_value(self, idx_data_points):
         return draw
 
 
+def compute_prior_probability(alpha):
+    """
+    Calculate the probability of the node being a LeafNode (1 - p(being SplitNode)).
+    Taken from equation 19 in [Rockova2018].
+
+    Parameters
+    ----------
+    alpha : float
+
+    Returns
+    -------
+    float
+
+    References
+    ----------
+    .. [Rockova2018] Veronika Rockova, Enakshi Saha (2018). On the theory of BART.
+    arXiv, `link <https://arxiv.org/abs/1810.00787>`__
+    """
+    prior_leaf_prob = [0]
+    depth = 1
+    while prior_leaf_prob[-1] < 1:
+        prior_leaf_prob.append(1 - alpha ** depth)
+        depth += 1
+    return prior_leaf_prob
+
+
 def fast_mean():
     """If available use Numba to speed up the computation of the mean."""
     try:
diff --git a/pymc3/step_methods/pgbart.py b/pymc3/step_methods/pgbart.py
index 5edbf6b3044..28257afedab 100644
--- a/pymc3/step_methods/pgbart.py
+++ b/pymc3/step_methods/pgbart.py
@@ -1,10 +1,11 @@
 import numpy as np
+from theano import function as theano_function
 
 from .arraystep import ArrayStepShared, Competence
 from ..distributions import BART
 from ..distributions.tree import Tree
 from ..model import modelcontext
-from ..theanof import inputvars, make_shared_replacements
+from ..theanof import inputvars, make_shared_replacements, join_nonshared_inputs
 
 
 class PGBART(ArrayStepShared):
@@ -34,45 +35,45 @@ class PGBART(ArrayStepShared):
     name = "bartsampler"
     default_blocked = False
 
-    def __init__(self, vars=None, num_particles=10, max_stages=100, chunk="auto", model=None):
+    def __init__(self, vars=None, num_particles=10, max_stages=5000, chunk="auto", model=None):
         model = modelcontext(model)
         vars = inputvars(vars)
         self.bart = vars[0].distribution
-        self.prior_prob_leaf_node = _compute_prior_probability(self.bart.alpha)
 
         self.tune = True
         self.idx = 0
         if chunk == "auto":
             self.chunk = max(1, int(self.bart.m * 0.1))
         self.num_particles = num_particles
+        self.log_num_particles = np.log(num_particles)
+        self.indices = list(range(1, num_particles))
+
         self.max_stages = max_stages
         self.previous_trees_particles_list = []
         for i in range(self.bart.m):
-            p = Particle(self.bart.trees[i], self.prior_prob_leaf_node)
+            p = Particle(self.bart.trees[i], self.bart.prior_prob_leaf_node)
             self.previous_trees_particles_list.append(p)
 
         shared = make_shared_replacements(vars, model)
-        self.likelihood_logp = logp_forw([model.datalogpt], vars, shared)
+        self.likelihood_logp = logp([model.datalogpt], vars, shared)
         super().__init__(vars, shared)
 
-    def astep(self, q_0):
-        # For the first iteration we restrict max_stages to a low number, otherwise it is almsot sure
-        # we will reach max_stages given that our fist set of m trees is not good at all.
-        # maybe we can set max_stages by using some function of the number of variables/dimensions.
+    def astep(self, _):
         bart = self.bart
 
-        if self.tune:
-            max_stages = 5
-        else:
-            max_stages = self.max_stages
+        # For the tunning phase we restrict max_stages to a low number, otherwise it is almost sure
+        # we will reach max_stages given that our first set of m trees is not good at all.
+        # Can set max_stages as a function of the number of variables/dimensions?
+        # if self.tune:
+        #    max_stages = 5
+        # else:
+        #    max_stages = self.max_stages
 
         if self.idx == bart.m:
             self.idx = 0
 
         # Step 4 of algorithm
         num_observations = bart.num_observations
-        likelihood_logp = self.likelihood_logp
-        log_num_particles = np.log(self.num_particles)
 
         for idx in range(self.idx, self.idx + self.chunk):
             if idx > bart.m:
@@ -81,75 +82,46 @@ def astep(self, q_0):
             tree = bart.trees[idx]
             R_j = bart.get_residuals_loo(tree)
             # generate an initial set of SMC particles
-            # At the end of the algorith we return one of these particles as a new tree
-            list_of_particles = self.init_particles(tree.tree_id, R_j, bart.num_observations)
-            # Step 5 of algorithm
-            old_likelihoods = np.array(
-                [
-                    likelihood_logp(p.tree.predict_output(num_observations))
-                    for p in list_of_particles
-                ]
-            )
-            log_weights = np.array(old_likelihoods)
-
-            list_of_particles[0] = self.get_previous_tree_particle(tree.tree_id, 0)
-            log_weights[0] = likelihood_logp(
-                list_of_particles[0].tree.predict_output(num_observations)
-            )
-            old_likelihoods[0] = log_weights[0]
-            log_weights -= log_num_particles
-
-            for p_idx, p in enumerate(list_of_particles):
-                p.log_weight = log_weights[p_idx]
+            # at the end of the algorithm we return one of these particles as a new tree
+            particles = self.init_particles(tree.tree_id, R_j, bart.num_observations)
 
             for t in range(1, max_stages):
-                # Step 7 of algorithm
-                list_of_particles[0] = self.get_previous_tree_particle(tree.tree_id, t)
-                # This should be embarrassingly parallelizable
+                # get previous particle at stage t
+                particles[0] = self.get_previous_tree_particle(tree.tree_id, t)
+                # sample each particle (try to grow each tree)
                 for c in range(1, self.num_particles):
-                    # Step 9 of algorithm
-                    list_of_particles[c].sample_tree_sequential(bart)
-
-                # Step 12 of algorithm
-                for p_idx, p in enumerate(list_of_particles):
-                    new_likelihood = likelihood_logp(p.tree.predict_output(num_observations))
-                    p.log_weight += new_likelihood - old_likelihoods[p_idx]
-                    old_likelihoods[p_idx] = new_likelihood
-                    log_weights[p_idx] = p.log_weight
+                    particles[c].sample_tree_sequential(bart)
+                # Update weights
+                # Since the prior is used as the proposal,the weights are updated additively
+                # as the ratio of the new and old log_likelihoods
+                for p_idx, p in enumerate(particles):
+                    new_likelihood = self.likelihood_logp(p.tree.predict_output(num_observations))
+                    p.log_weight += new_likelihood - p.old_likelihood_logp
+                    p.old_likelihood_logp = new_likelihood
+
+                # Normalize weights
+                W, normalized_weights = self.normalize(particles)
 
-                W, normalized_weights = self._normalize(log_weights)
-
-                # Step 15 of algorithm
                 # resample all but first particle
                 re_n_w = normalized_weights[1:] / normalized_weights[1:].sum()
-                indices = range(1, len(list_of_particles))
-                new_indices = np.random.choice(indices, size=len(list_of_particles) - 1, p=re_n_w)
-                list_of_particles[1:] = np.array(list_of_particles)[new_indices]
-                old_likelihoods[1:] = old_likelihoods[new_indices]
-
-                # Step 16 of algorithm
-                w_t = W - log_num_particles
-                for c in range(self.num_particles):
-                    list_of_particles[c].log_weight = w_t
-
-                ### Should we used this one!!!
-                # Step 17 of algorithm
-                # non_available_nodes_for_expansion = np.ones(self.num_particles)
-                # for c in range(self.num_particles):
-                #     if len(list_of_particles[c].expansion_nodes) != 0:
-                #         non_available_nodes_for_expansion[c] = 0
-                # if np.all(non_available_nodes_for_expansion):
-                #     break
-                ### Or this one !!!
-                non_available_nodes_for_expansion = True
-                for c in range(self.num_particles):
-                    if len(list_of_particles[c].expansion_nodes) != 0:
-                        non_available_nodes_for_expansion = False
-                        break
-                if non_available_nodes_for_expansion:
+                new_indices = np.random.choice(self.indices, size=len(self.indices), p=re_n_w)
+                particles[1:] = particles[new_indices]
+
+                # Set the new weights
+                w_t = W - self.log_num_particles
+                for p in particles:
+                    p.log_weight = w_t
+
+                # Check if particles can keep growing, otherwise stop iterating
+                non_available_nodes_for_expansion = np.ones(self.num_particles - 1)
+                for c in range(1, self.num_particles):
+                    if len(particles[c].expansion_nodes) != 0:
+                        non_available_nodes_for_expansion[c - 1] = 0
+                if np.all(non_available_nodes_for_expansion):
                     break
 
-            new_tree = np.random.choice(list_of_particles, p=normalized_weights)
+            # Get the new tree and update
+            new_tree = np.random.choice(particles, p=normalized_weights)
             self.previous_trees_particles_list[tree.tree_id] = new_tree
             bart.trees[idx] = new_tree.tree
             new_prediction = new_tree.tree.predict_output(num_observations)
@@ -166,10 +138,11 @@ def competence(var, has_grad):
             return Competence.IDEAL
         return Competence.INCOMPATIBLE
 
-    def _normalize(self, log_w):  # this function needs a home sweet home
+    def normalize(self, particles):
         """
         Use logsumexp trick to get W and softmax to get normalized_weights
         """
+        log_w = np.array([p.log_weight for p in particles])
         log_w_max = log_w.max()
         log_w_ = log_w - log_w_max
         w_ = np.exp(log_w_)
@@ -187,7 +160,13 @@ def get_previous_tree_particle(self, tree_id, t):
         return previous_tree_particle
 
     def init_particles(self, tree_id, R_j, num_observations):
-        list_of_particles = []
+
+        prev_tree = self.get_previous_tree_particle(tree_id, 0)
+        likelihood = self.likelihood_logp(prev_tree.tree.predict_output(num_observations))
+        prev_tree.old_likelihood_logp = likelihood
+        prev_tree.log_weight = likelihood - self.log_num_particles
+        particles = [prev_tree]
+
         initial_value_leaf_nodes = R_j.mean()
         initial_idx_data_points_leaf_nodes = np.arange(num_observations, dtype="int32")
         new_tree = Tree.init_tree(
@@ -195,28 +174,29 @@ def init_particles(self, tree_id, R_j, num_observations):
             leaf_node_value=initial_value_leaf_nodes,
             idx_data_points=initial_idx_data_points_leaf_nodes,
         )
-        for _ in range(self.num_particles):
-            new_particle = Particle(new_tree, self.prior_prob_leaf_node)
-            list_of_particles.append(new_particle)
-        return list_of_particles
-
-    def resample(self, list_of_particles, normalized_weights):
-        list_of_particles = np.random.choice(
-            list_of_particles, size=len(list_of_particles), p=normalized_weights
-        )
-        return list_of_particles
+        likelihood_logp = self.likelihood_logp(new_tree.predict_output(num_observations))
+        log_weight = likelihood_logp - self.log_num_particles
+        for i in range(1, self.num_particles):
+            particles.append(
+                Particle(new_tree, self.bart.prior_prob_leaf_node, log_weight, likelihood_logp)
+            )
+
+        return np.array(particles)
+
+    def resample(self, particles, normalized_weights):
+        particles = np.random.choice(particles, size=len(particles), p=normalized_weights)
+        return particles
 
 
 class Particle:
-    def __init__(self, tree, prior_prob_leaf_node):
+    def __init__(self, tree, prior_prob_leaf_node, log_weight=0, likelihood=0):
         self.tree = tree.copy()  # keeps the tree that we care at the moment
         self.expansion_nodes = tree.idx_leaf_nodes.copy()  # This should be the array [0]
-        # self.tree_history = [self.tree.copy()]
-        # self.expansion_nodes_history = [self.expansion_nodes.copy()]
         self.tree_history = [self.tree]
         self.expansion_nodes_history = [self.expansion_nodes]
-        self.log_weight = 0.0
+        self.log_weight = 0
         self.prior_prob_leaf_node = prior_prob_leaf_node
+        self.old_likelihood_logp = likelihood
 
     def sample_tree_sequential(self, bart):
         if self.expansion_nodes:
@@ -245,38 +225,7 @@ def set_particle_to_step(self, t):
             self.expansion_nodes = self.expansion_nodes_history[t]
 
 
-def _compute_prior_probability(alpha):
-    """
-    Calculate the probability of the node being a LeafNode (1 - p(being SplitNode)).
-    Taken from equation 19 in [On Theory for BART]. XXX FIX reference below!
-
-    Parameters
-    ----------
-    alpha : float
-
-    Returns
-    -------
-    float
-
-    References
-    ----------
-    .. [Chipman2010] Chipman, H. A., George, E. I., & McCulloch, R. E. (2010). BART: Bayesian
-        additive regression trees. The Annals of Applied Statistics, 4(1), 266-298.,
-        `link <https://projecteuclid.org/download/pdfview_1/euclid.aoas/1273584455>`__
-    """
-    prior_leaf_prob = [0]
-    depth = 1
-    while prior_leaf_prob[-1] < 1:
-        prior_leaf_prob.append(1 - alpha ** depth)
-        depth += 1
-    return prior_leaf_prob
-
-
-from theano import function as theano_function
-from ..theanof import join_nonshared_inputs
-
-
-def logp_forw(out_vars, vars, shared):
+def logp(out_vars, vars, shared):
     """Compile Theano function of the model and the input and output variables.
 
     Parameters

From c3c3929349ec9fb90794994f2ac49c1ab56c7a93 Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Thu, 22 Oct 2020 16:07:02 -0300
Subject: [PATCH 15/24] clean docstrings

---
 pymc3/distributions/tree.py  |  1 -
 pymc3/step_methods/pgbart.py | 66 ++++++++++++++++++++----------------
 2 files changed, 36 insertions(+), 31 deletions(-)

diff --git a/pymc3/distributions/tree.py b/pymc3/distributions/tree.py
index 5b7d9f2a99f..35413e2f6d3 100644
--- a/pymc3/distributions/tree.py
+++ b/pymc3/distributions/tree.py
@@ -83,7 +83,6 @@ def _traverse_tree(self, x, node_index=0):
         """
         current_node = self.get_node(node_index)
         if isinstance(current_node, SplitNode):
-            # if current_node.evaluate_splitting_rule(x):
             if x is not np.NaN:
                 left_child = current_node.get_idx_left_child()
                 final_node = self._traverse_tree(x, left_child)
diff --git a/pymc3/step_methods/pgbart.py b/pymc3/step_methods/pgbart.py
index 28257afedab..850d12281b7 100644
--- a/pymc3/step_methods/pgbart.py
+++ b/pymc3/step_methods/pgbart.py
@@ -13,7 +13,7 @@ class PGBART(ArrayStepShared):
     Particle Gibss BART sampling step
 
     Parameters
-    ----------)
+    ----------
     vars: list
         List of variables for sampler
     num_particles : int
@@ -49,10 +49,10 @@ def __init__(self, vars=None, num_particles=10, max_stages=5000, chunk="auto", m
         self.indices = list(range(1, num_particles))
 
         self.max_stages = max_stages
-        self.previous_trees_particles_list = []
+        self.old_trees_particles_list = []
         for i in range(self.bart.m):
             p = Particle(self.bart.trees[i], self.bart.prior_prob_leaf_node)
-            self.previous_trees_particles_list.append(p)
+            self.old_trees_particles_list.append(p)
 
         shared = make_shared_replacements(vars, model)
         self.likelihood_logp = logp([model.datalogpt], vars, shared)
@@ -60,40 +60,37 @@ def __init__(self, vars=None, num_particles=10, max_stages=5000, chunk="auto", m
 
     def astep(self, _):
         bart = self.bart
+        num_observations = bart.num_observations
 
         # For the tunning phase we restrict max_stages to a low number, otherwise it is almost sure
         # we will reach max_stages given that our first set of m trees is not good at all.
         # Can set max_stages as a function of the number of variables/dimensions?
-        # if self.tune:
-        #    max_stages = 5
-        # else:
-        #    max_stages = self.max_stages
+        if self.tune:
+            max_stages = 5
+        else:
+            max_stages = self.max_stages
 
         if self.idx == bart.m:
             self.idx = 0
 
-        # Step 4 of algorithm
-        num_observations = bart.num_observations
-
         for idx in range(self.idx, self.idx + self.chunk):
             if idx > bart.m:
                 break
             self.idx += 1
             tree = bart.trees[idx]
             R_j = bart.get_residuals_loo(tree)
-            # generate an initial set of SMC particles
-            # at the end of the algorithm we return one of these particles as a new tree
+            # Generate an initial set of SMC particles
+            # at the end of the algorithm we return one of these particles as the new tree
             particles = self.init_particles(tree.tree_id, R_j, bart.num_observations)
 
             for t in range(1, max_stages):
-                # get previous particle at stage t
-                particles[0] = self.get_previous_tree_particle(tree.tree_id, t)
+                # Get old particle at stage t
+                particles[0] = self.get_old_tree_particle(tree.tree_id, t)
                 # sample each particle (try to grow each tree)
                 for c in range(1, self.num_particles):
                     particles[c].sample_tree_sequential(bart)
-                # Update weights
-                # Since the prior is used as the proposal,the weights are updated additively
-                # as the ratio of the new and old log_likelihoods
+                # Update weights. Since the prior is used as the proposal,the weights
+                # are updated additively as the ratio of the new and old log_likelihoods
                 for p_idx, p in enumerate(particles):
                     new_likelihood = self.likelihood_logp(p.tree.predict_output(num_observations))
                     p.log_weight += new_likelihood - p.old_likelihood_logp
@@ -102,7 +99,7 @@ def astep(self, _):
                 # Normalize weights
                 W, normalized_weights = self.normalize(particles)
 
-                # resample all but first particle
+                # Resample all but first particle
                 re_n_w = normalized_weights[1:] / normalized_weights[1:].sum()
                 new_indices = np.random.choice(self.indices, size=len(self.indices), p=re_n_w)
                 particles[1:] = particles[new_indices]
@@ -122,7 +119,7 @@ def astep(self, _):
 
             # Get the new tree and update
             new_tree = np.random.choice(particles, p=normalized_weights)
-            self.previous_trees_particles_list[tree.tree_id] = new_tree
+            self.old_trees_particles_list[tree.tree_id] = new_tree
             bart.trees[idx] = new_tree.tree
             new_prediction = new_tree.tree.predict_output(num_observations)
             bart.sum_trees_output = bart.Y - R_j + new_prediction
@@ -154,19 +151,23 @@ def normalize(self, particles):
 
         return W, normalized_weights
 
-    def get_previous_tree_particle(self, tree_id, t):
-        previous_tree_particle = self.previous_trees_particles_list[tree_id]
-        previous_tree_particle.set_particle_to_step(t)
-        return previous_tree_particle
+    def get_old_tree_particle(self, tree_id, t):
+        old_tree_particle = self.old_trees_particles_list[tree_id]
+        old_tree_particle.set_particle_to_step(t)
+        return old_tree_particle
 
     def init_particles(self, tree_id, R_j, num_observations):
-
-        prev_tree = self.get_previous_tree_particle(tree_id, 0)
+        """
+        Initialize particles
+        """
+        # The first particle is from the tree we are trying to replace
+        prev_tree = self.get_old_tree_particle(tree_id, 0)
         likelihood = self.likelihood_logp(prev_tree.tree.predict_output(num_observations))
         prev_tree.old_likelihood_logp = likelihood
         prev_tree.log_weight = likelihood - self.log_num_particles
         particles = [prev_tree]
 
+        # The rest of the particles are identically initialized
         initial_value_leaf_nodes = R_j.mean()
         initial_idx_data_points_leaf_nodes = np.arange(num_observations, dtype="int32")
         new_tree = Tree.init_tree(
@@ -183,12 +184,19 @@ def init_particles(self, tree_id, R_j, num_observations):
 
         return np.array(particles)
 
-    def resample(self, particles, normalized_weights):
-        particles = np.random.choice(particles, size=len(particles), p=normalized_weights)
+    def resample(self, particles, weights):
+        """
+        resample a set of particles given its weights
+        """
+        particles = np.random.choice(particles, size=len(particles), p=weights)
         return particles
 
 
 class Particle:
+    """
+    Particle tree
+    """
+
     def __init__(self, tree, prior_prob_leaf_node, log_weight=0, likelihood=0):
         self.tree = tree.copy()  # keeps the tree that we care at the moment
         self.expansion_nodes = tree.idx_leaf_nodes.copy()  # This should be the array [0]
@@ -206,13 +214,11 @@ def sample_tree_sequential(self, bart):
 
             if prob_leaf < np.random.random():
                 self.grow_successful = bart.grow_tree(self.tree, index_leaf_node)
-                # TODO: in case the grow_tree fails, should we try to sample the tree from another leaf node?
                 if self.grow_successful:
                     # Add new leaf nodes indexes
                     new_indexes = self.tree.idx_leaf_nodes[-2:]
                     self.expansion_nodes.extend(new_indexes)
-            # self.tree_history.append(self.tree.copy())
-            # self.expansion_nodes_history.append(self.expansion_nodes.copy())
+
             self.tree_history.append(self.tree)
             self.expansion_nodes_history.append(self.expansion_nodes)
 

From 6a58daaeaf02ae8a67ff74b608db4b1844bf7e92 Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Fri, 23 Oct 2020 10:43:07 -0300
Subject: [PATCH 16/24] add tests and minor fixes.

Co-authored-by: aloctavodia <aloctavodia@gmail.com>
Co-authored-by: jmloyola <jmloyola@outlook.com>
---
 pymc3/distributions/bart.py  | 33 ++++++++++++++++++++++++---------
 pymc3/distributions/tree.py  | 14 ++++++++++++++
 pymc3/step_methods/pgbart.py | 14 ++++++++++++++
 3 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/pymc3/distributions/bart.py b/pymc3/distributions/bart.py
index e95753f6faf..0896617d028 100644
--- a/pymc3/distributions/bart.py
+++ b/pymc3/distributions/bart.py
@@ -1,3 +1,17 @@
+#   Copyright 2020 The PyMC Developers
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
 import numpy as np
 from .distribution import NoDistribution
 from .tree import Tree, SplitNode, LeafNode
@@ -32,7 +46,7 @@ def __init__(self, X, Y, m=200, alpha=0.25, *args, **kwargs):
             )
 
         self.num_observations = X.shape[0]
-        self.number_variates = X.shape[1]
+        self.num_variates = X.shape[1]
         self.m = m
         self.alpha = alpha
         self.trees = self.init_list_of_trees()
@@ -66,7 +80,7 @@ def __repr_latex(self):
 
     def get_available_predictors(self, idx_data_points_split_node):
         possible_splitting_variables = []
-        for j in range(self.number_variates):
+        for j in range(self.num_variates):
             x_j = self.X[idx_data_points_split_node, j]
             x_j = x_j[~np.isnan(x_j)]
             for i in range(1, len(x_j)):
@@ -169,7 +183,7 @@ def compute_prior_probability(alpha):
 
     Returns
     -------
-    float
+    list with probabilities for leaf nodes
 
     References
     ----------
@@ -203,7 +217,7 @@ def mean(a):
 
 
 def discrete_uniform_sampler(upper_value):
-    """Draw from the uniform distribuion with bounds [0, upper_value)."""
+    """Draw from the uniform distribution with bounds [0, upper_value)."""
     return int(np.random.random() * upper_value)
 
 
@@ -229,14 +243,15 @@ class BART(BaseBART):
     def __init__(self, X, Y, m=200, alpha=0.25):
         super().__init__(X, Y, m, alpha)
 
-    def _repr_latex_(self, name=None, dist=None):
+    def _str_repr(self, name=None, dist=None, formatting="plain"):
         if dist is None:
             dist = self
         X = (type(self.X),)
         Y = (type(self.Y),)
         alpha = self.alpha
         m = self.m
-        name = r"\text{%s}" % name
-        return r"""${} \sim \text{{BART}}(\mathit{{alpha}}={},~\mathit{{m}}={})$""".format(
-            name, alpha, m
-        )
+
+        if formatting == "latex":
+            return f"$\\text{{{name}}} \\sim  \\text{{BART}}(\\text{{alpha = }}\\text{{{alpha}}}, \\text{{m = }}\\text{{{m}}})$"
+        else:
+            return f"{name} ~ BART(alpha = {alpha}, m = {m})"
diff --git a/pymc3/distributions/tree.py b/pymc3/distributions/tree.py
index 35413e2f6d3..3b2c098e896 100644
--- a/pymc3/distributions/tree.py
+++ b/pymc3/distributions/tree.py
@@ -1,3 +1,17 @@
+#   Copyright 2020 The PyMC Developers
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
 import numpy as np
 import math
 from copy import deepcopy
diff --git a/pymc3/step_methods/pgbart.py b/pymc3/step_methods/pgbart.py
index 850d12281b7..726fea4cf83 100644
--- a/pymc3/step_methods/pgbart.py
+++ b/pymc3/step_methods/pgbart.py
@@ -1,3 +1,17 @@
+#   Copyright 2020 The PyMC Developers
+#
+#   Licensed under the Apache License, Version 2.0 (the "License");
+#   you may not use this file except in compliance with the License.
+#   You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing, software
+#   distributed under the License is distributed on an "AS IS" BASIS,
+#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#   See the License for the specific language governing permissions and
+#   limitations under the License.
+
 import numpy as np
 from theano import function as theano_function
 

From 9050469110b9ce00548cabbaf4c0a16e1bdca13f Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Fri, 23 Oct 2020 11:46:48 -0300
Subject: [PATCH 17/24] remove space.

Co-authored-by: aloctavodia <aloctavodia@gmail.com>
Co-authored-by: jmloyola <jmloyola@outlook.com>
---
 pymc3/smc/smc.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pymc3/smc/smc.py b/pymc3/smc/smc.py
index 69b94054da0..d6069122952 100644
--- a/pymc3/smc/smc.py
+++ b/pymc3/smc/smc.py
@@ -392,4 +392,3 @@ def __call__(self, posterior):
         if self.save_log_pseudolikelihood:
             self.save_lpl(elemwise)
         return elemwise.sum()
-

From 5fdd999adb1e8ae5a7ebf778f242343c08af36ee Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Thu, 29 Oct 2020 19:55:35 -0300
Subject: [PATCH 18/24] add variable importance report

---
 pymc3/distributions/bart.py  |  8 ++------
 pymc3/sampling.py            |  4 ++++
 pymc3/step_methods/pgbart.py | 30 +++++++++++++++++++++---------
 3 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/pymc3/distributions/bart.py b/pymc3/distributions/bart.py
index 0896617d028..f20f5bc0da3 100644
--- a/pymc3/distributions/bart.py
+++ b/pymc3/distributions/bart.py
@@ -99,23 +99,20 @@ def get_available_splitting_rules(self, idx_data_points_split_node, idx_split_va
 
     def grow_tree(self, tree, index_leaf_node):
         # This can be unsuccessful when there are not available predictors
-        successful_grow_tree = False
         current_node = tree.get_node(index_leaf_node)
 
         available_predictors = self.get_available_predictors(current_node.idx_data_points)
 
         if not available_predictors:
-            return successful_grow_tree
+            return False, None
 
         index_selected_predictor = discrete_uniform_sampler(len(available_predictors))
         selected_predictor = available_predictors[index_selected_predictor]
-
         available_splitting_rules, _ = self.get_available_splitting_rules(
             current_node.idx_data_points, selected_predictor
         )
         index_selected_splitting_rule = discrete_uniform_sampler(len(available_splitting_rules))
         selected_splitting_rule = available_splitting_rules[index_selected_splitting_rule]
-
         new_split_node = SplitNode(
             index=index_leaf_node,
             idx_split_variable=selected_predictor,
@@ -140,9 +137,8 @@ def grow_tree(self, tree, index_leaf_node):
             idx_data_points=right_node_idx_data_points,
         )
         tree.grow_tree(index_leaf_node, new_split_node, new_left_node, new_right_node)
-        successful_grow_tree = True
 
-        return successful_grow_tree
+        return True, index_selected_predictor
 
     def get_new_idx_data_points(self, current_split_node, idx_data_points):
         idx_split_variable = current_split_node.idx_split_variable
diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index c819abaeb05..5004d412362 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -606,6 +606,10 @@ def sample(
     trace.report._n_draws = n_draws
     trace.report._t_sampling = t_sampling
 
+    if "variable_inclusion" in trace.stat_names:
+        variable_inclusion = trace.get_sampler_stats("variable_inclusion")[::-draws]
+        trace.report.variable_importance = np.mean([vi / vi.sum() for vi in variable_inclusion], 0)
+
     n_chains = len(trace.chains)
     _log.info(
         f'Sampling {n_chains} chain{"s" if n_chains > 1 else ""} for {n_tune:_d} tune and {n_draws:_d} draw iterations '
diff --git a/pymc3/step_methods/pgbart.py b/pymc3/step_methods/pgbart.py
index 726fea4cf83..8a2744ead2d 100644
--- a/pymc3/step_methods/pgbart.py
+++ b/pymc3/step_methods/pgbart.py
@@ -31,9 +31,9 @@ class PGBART(ArrayStepShared):
     vars: list
         List of variables for sampler
     num_particles : int
-        Number of particles for the SMC sampler. Defaults to 10
+        Number of particles for the conditional SMC sampler. Defaults to 10
     max_stages : int
-        Maximum number of Iterations of the SMC sampler Defaults to 100.
+        Maximum number of iterations of the conditional SMC sampler. Defaults to 100.
     chunk = int
         Number of trees fitted per step. Defaults to  "auto", which is the 10% of the `m` trees.
     model: PyMC Model
@@ -48,6 +48,8 @@ class PGBART(ArrayStepShared):
 
     name = "bartsampler"
     default_blocked = False
+    generates_stats = True
+    stats_dtypes = [{"variable_inclusion": np.ndarray}]
 
     def __init__(self, vars=None, num_particles=10, max_stages=5000, chunk="auto", model=None):
         model = modelcontext(model)
@@ -58,14 +60,14 @@ def __init__(self, vars=None, num_particles=10, max_stages=5000, chunk="auto", m
         self.idx = 0
         if chunk == "auto":
             self.chunk = max(1, int(self.bart.m * 0.1))
+        self.variable_inclusion = np.zeros(self.bart.num_variates)
         self.num_particles = num_particles
         self.log_num_particles = np.log(num_particles)
         self.indices = list(range(1, num_particles))
-
         self.max_stages = max_stages
         self.old_trees_particles_list = []
         for i in range(self.bart.m):
-            p = Particle(self.bart.trees[i], self.bart.prior_prob_leaf_node)
+            p = ParticleTree(self.bart.trees[i], self.bart.prior_prob_leaf_node)
             self.old_trees_particles_list.append(p)
 
         shared = make_shared_replacements(vars, model)
@@ -138,7 +140,13 @@ def astep(self, _):
             new_prediction = new_tree.tree.predict_output(num_observations)
             bart.sum_trees_output = bart.Y - R_j + new_prediction
 
-        return bart.sum_trees_output
+            if not self.tune:
+                for index in new_tree.used_variates:
+                    self.variable_inclusion[index] += 1
+
+        stats = {"variable_inclusion": self.variable_inclusion}
+
+        return bart.sum_trees_output, [stats]
 
     @staticmethod
     def competence(var, has_grad):
@@ -193,7 +201,7 @@ def init_particles(self, tree_id, R_j, num_observations):
         log_weight = likelihood_logp - self.log_num_particles
         for i in range(1, self.num_particles):
             particles.append(
-                Particle(new_tree, self.bart.prior_prob_leaf_node, log_weight, likelihood_logp)
+                ParticleTree(new_tree, self.bart.prior_prob_leaf_node, log_weight, likelihood_logp)
             )
 
         return np.array(particles)
@@ -206,7 +214,7 @@ def resample(self, particles, weights):
         return particles
 
 
-class Particle:
+class ParticleTree:
     """
     Particle tree
     """
@@ -219,6 +227,7 @@ def __init__(self, tree, prior_prob_leaf_node, log_weight=0, likelihood=0):
         self.log_weight = 0
         self.prior_prob_leaf_node = prior_prob_leaf_node
         self.old_likelihood_logp = likelihood
+        self.used_variates = []
 
     def sample_tree_sequential(self, bart):
         if self.expansion_nodes:
@@ -227,11 +236,14 @@ def sample_tree_sequential(self, bart):
             prob_leaf = self.prior_prob_leaf_node[self.tree[index_leaf_node].depth]
 
             if prob_leaf < np.random.random():
-                self.grow_successful = bart.grow_tree(self.tree, index_leaf_node)
-                if self.grow_successful:
+                grow_successful, index_selected_predictor = bart.grow_tree(
+                    self.tree, index_leaf_node
+                )
+                if grow_successful:
                     # Add new leaf nodes indexes
                     new_indexes = self.tree.idx_leaf_nodes[-2:]
                     self.expansion_nodes.extend(new_indexes)
+                    self.used_variates.append(index_selected_predictor)
 
             self.tree_history.append(self.tree)
             self.expansion_nodes_history.append(self.expansion_nodes)

From acc5290dda071fbc459e84743c2e7cb04ba7d3dc Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Wed, 4 Nov 2020 11:58:17 -0300
Subject: [PATCH 19/24] use ValueError

---
 pymc3/distributions/bart.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pymc3/distributions/bart.py b/pymc3/distributions/bart.py
index f20f5bc0da3..cbffcda918d 100644
--- a/pymc3/distributions/bart.py
+++ b/pymc3/distributions/bart.py
@@ -26,21 +26,21 @@ def __init__(self, X, Y, m=200, alpha=0.25, *args, **kwargs):
         super().__init__(shape=X.shape[0], dtype="float64", testval=0, *args, **kwargs)
 
         if self.X.ndim != 2:
-            raise BARTParamsError("The design matrix X must have two dimensions")
+            raise ValueError("The design matrix X must have two dimensions")
 
         if self.Y.ndim != 1:
-            raise BARTParamsError("The response matrix Y must have one dimension")
+            raise ValueError("The response matrix Y must have one dimension")
         if self.X.shape[0] != self.Y.shape[0]:
-            raise BARTParamsError(
+            raise ValueError(
                 "The design matrix X and the response matrix Y must have the same number of elements"
             )
         if not isinstance(m, int):
-            raise BARTParamsError("The number of trees m type must be int")
+            raise ValueError("The number of trees m type must be int")
         if m < 1:
-            raise BARTParamsError("The number of trees m must be greater than zero")
+            raise ValueError("The number of trees m must be greater than zero")
 
         if alpha <= 0 or 1 <= alpha:
-            raise BARTParamsError(
+            raise ValueError(
                 "The value for the alpha parameter for the tree structure "
                 "must be in the interval (0, 1)"
             )

From 7ac976b47722a6e561d3eda1155ee34b1ef67283 Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Fri, 6 Nov 2020 16:05:30 -0300
Subject: [PATCH 20/24] wip return mean and std variable importance

---
 pymc3/distributions/bart.py  | 5 ++---
 pymc3/sampling.py            | 7 +++++--
 pymc3/step_methods/pgbart.py | 8 +++++---
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/pymc3/distributions/bart.py b/pymc3/distributions/bart.py
index cbffcda918d..bba94f8d22b 100644
--- a/pymc3/distributions/bart.py
+++ b/pymc3/distributions/bart.py
@@ -144,10 +144,9 @@ def get_new_idx_data_points(self, current_split_node, idx_data_points):
         idx_split_variable = current_split_node.idx_split_variable
         split_value = current_split_node.split_value
 
-        left_idx = np.nonzero(self.X[idx_data_points, idx_split_variable] <= split_value)
+        left_idx = self.X[idx_data_points, idx_split_variable] <= split_value
         left_node_idx_data_points = idx_data_points[left_idx]
-        right_idx = np.nonzero(~(self.X[idx_data_points, idx_split_variable] <= split_value))
-        right_node_idx_data_points = idx_data_points[right_idx]
+        right_node_idx_data_points = idx_data_points[~left_idx]
 
         return left_node_idx_data_points, right_node_idx_data_points
 
diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 5004d412362..a93a8aea079 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -607,8 +607,11 @@ def sample(
     trace.report._t_sampling = t_sampling
 
     if "variable_inclusion" in trace.stat_names:
-        variable_inclusion = trace.get_sampler_stats("variable_inclusion")[::-draws]
-        trace.report.variable_importance = np.mean([vi / vi.sum() for vi in variable_inclusion], 0)
+        variable_inclusion = np.vstack(trace.get_sampler_stats("variable_inclusion"))
+        variable_inclusion = np.split(variable_inclusion, 50)
+        dada = np.vstack([v.sum(0) / v.sum() for v in variable_inclusion])
+        trace.report.variable_importance_m = dada.mean(0)
+        trace.report.variable_importance_s = dada.std(0)
 
     n_chains = len(trace.chains)
     _log.info(
diff --git a/pymc3/step_methods/pgbart.py b/pymc3/step_methods/pgbart.py
index 8a2744ead2d..b53f4e8ecdb 100644
--- a/pymc3/step_methods/pgbart.py
+++ b/pymc3/step_methods/pgbart.py
@@ -60,7 +60,7 @@ def __init__(self, vars=None, num_particles=10, max_stages=5000, chunk="auto", m
         self.idx = 0
         if chunk == "auto":
             self.chunk = max(1, int(self.bart.m * 0.1))
-        self.variable_inclusion = np.zeros(self.bart.num_variates)
+        self.variable_inclusion = np.zeros(self.bart.num_variates, dtype="int")
         self.num_particles = num_particles
         self.log_num_particles = np.log(num_particles)
         self.indices = list(range(1, num_particles))
@@ -77,6 +77,7 @@ def __init__(self, vars=None, num_particles=10, max_stages=5000, chunk="auto", m
     def astep(self, _):
         bart = self.bart
         num_observations = bart.num_observations
+        variable_inclusion = self.variable_inclusion
 
         # For the tunning phase we restrict max_stages to a low number, otherwise it is almost sure
         # we will reach max_stages given that our first set of m trees is not good at all.
@@ -141,10 +142,11 @@ def astep(self, _):
             bart.sum_trees_output = bart.Y - R_j + new_prediction
 
             if not self.tune:
+                variable_inclusion = self.variable_inclusion
                 for index in new_tree.used_variates:
-                    self.variable_inclusion[index] += 1
+                    variable_inclusion[index] += 1
 
-        stats = {"variable_inclusion": self.variable_inclusion}
+        stats = {"variable_inclusion": variable_inclusion}
 
         return bart.sum_trees_output, [stats]
 

From 78b6f79819f46f1c9f119c0ca17e19f9e9e649af Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Mon, 9 Nov 2020 18:37:33 -0300
Subject: [PATCH 21/24] update variable importance report

---
 pymc3/sampling.py            | 10 ++++++----
 pymc3/step_methods/pgbart.py |  4 +---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index a93a8aea079..1fdff6a3438 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -608,10 +608,12 @@ def sample(
 
     if "variable_inclusion" in trace.stat_names:
         variable_inclusion = np.vstack(trace.get_sampler_stats("variable_inclusion"))
-        variable_inclusion = np.split(variable_inclusion, 50)
-        dada = np.vstack([v.sum(0) / v.sum() for v in variable_inclusion])
-        trace.report.variable_importance_m = dada.mean(0)
-        trace.report.variable_importance_s = dada.std(0)
+        variable_inclusion = np.vstack(
+            [v.sum(0) / v.sum() for v in np.array_split(variable_inclusion, 50)]
+        )
+        trace.report.variable_importance = np.empty((variable_inclusion.shape[1], 3))
+        trace.report.variable_importance[:, 0] = variable_inclusion.mean(0)
+        trace.report.variable_importance[:, 1:3] = arviz.hdi(variable_inclusion, hdi_prob=0.68)
 
     n_chains = len(trace.chains)
     _log.info(
diff --git a/pymc3/step_methods/pgbart.py b/pymc3/step_methods/pgbart.py
index b53f4e8ecdb..e3ed5503bc9 100644
--- a/pymc3/step_methods/pgbart.py
+++ b/pymc3/step_methods/pgbart.py
@@ -60,7 +60,6 @@ def __init__(self, vars=None, num_particles=10, max_stages=5000, chunk="auto", m
         self.idx = 0
         if chunk == "auto":
             self.chunk = max(1, int(self.bart.m * 0.1))
-        self.variable_inclusion = np.zeros(self.bart.num_variates, dtype="int")
         self.num_particles = num_particles
         self.log_num_particles = np.log(num_particles)
         self.indices = list(range(1, num_particles))
@@ -77,7 +76,7 @@ def __init__(self, vars=None, num_particles=10, max_stages=5000, chunk="auto", m
     def astep(self, _):
         bart = self.bart
         num_observations = bart.num_observations
-        variable_inclusion = self.variable_inclusion
+        variable_inclusion = np.zeros(bart.num_variates, dtype="int")
 
         # For the tunning phase we restrict max_stages to a low number, otherwise it is almost sure
         # we will reach max_stages given that our first set of m trees is not good at all.
@@ -142,7 +141,6 @@ def astep(self, _):
             bart.sum_trees_output = bart.Y - R_j + new_prediction
 
             if not self.tune:
-                variable_inclusion = self.variable_inclusion
                 for index in new_tree.used_variates:
                     variable_inclusion[index] += 1
 

From 2dda3b020ab4da3ab259ec1cb8136bef0e1abfc4 Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Sat, 14 Nov 2020 09:39:12 -0300
Subject: [PATCH 22/24] update release notes, remove vi hdi report

---
 RELEASE-NOTES.md             | 3 ++-
 pymc3/sampling.py            | 9 ++-------
 pymc3/step_methods/pgbart.py | 5 +++++
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/RELEASE-NOTES.md b/RELEASE-NOTES.md
index 1640bf52514..0eb41630a74 100644
--- a/RELEASE-NOTES.md
+++ b/RELEASE-NOTES.md
@@ -14,8 +14,9 @@
 ### New features
 - `sample_posterior_predictive_w` can now feed on `xarray.Dataset` - e.g. from `InferenceData.posterior`. (see [#4042](https://github.com/pymc-devs/pymc3/pull/4042))
 - Add MLDA, a new stepper for multilevel sampling. MLDA can be used when a hierarchy of approximate posteriors of varying accuracy is available, offering improved sampling efficiency especially in high-dimensional problems and/or where gradients are not available (see [#3926](https://github.com/pymc-devs/pymc3/pull/3926))
-- Change SMC metropolis kernel to independent metropolis kernel [#4115](https://github.com/pymc-devs/pymc3/pull/3926))
+- Change SMC metropolis kernel to independent metropolis kernel [#4115](https://github.com/pymc-devs/pymc3/pull/4115))
 - Add alternative parametrization to NegativeBinomial distribution in terms of n and p (see [#4126](https://github.com/pymc-devs/pymc3/issues/4126))
+- Add Bayesian Additive Regression Trees (BARTs) [#4183](https://github.com/pymc-devs/pymc3/pull/4183))
 
 
 ## PyMC3 3.9.3 (11 August 2020)
diff --git a/pymc3/sampling.py b/pymc3/sampling.py
index 1fdff6a3438..6a44f11cb94 100644
--- a/pymc3/sampling.py
+++ b/pymc3/sampling.py
@@ -607,13 +607,8 @@ def sample(
     trace.report._t_sampling = t_sampling
 
     if "variable_inclusion" in trace.stat_names:
-        variable_inclusion = np.vstack(trace.get_sampler_stats("variable_inclusion"))
-        variable_inclusion = np.vstack(
-            [v.sum(0) / v.sum() for v in np.array_split(variable_inclusion, 50)]
-        )
-        trace.report.variable_importance = np.empty((variable_inclusion.shape[1], 3))
-        trace.report.variable_importance[:, 0] = variable_inclusion.mean(0)
-        trace.report.variable_importance[:, 1:3] = arviz.hdi(variable_inclusion, hdi_prob=0.68)
+        variable_inclusion = np.stack(trace.get_sampler_stats("variable_inclusion")).mean(0)
+        trace.report.variable_importance = variable_inclusion / variable_inclusion.sum()
 
     n_chains = len(trace.chains)
     _log.info(
diff --git a/pymc3/step_methods/pgbart.py b/pymc3/step_methods/pgbart.py
index e3ed5503bc9..1d6a503c4c7 100644
--- a/pymc3/step_methods/pgbart.py
+++ b/pymc3/step_methods/pgbart.py
@@ -12,6 +12,8 @@
 #   See the License for the specific language governing permissions and
 #   limitations under the License.
 
+import logging
+
 import numpy as np
 from theano import function as theano_function
 
@@ -21,6 +23,8 @@
 from ..model import modelcontext
 from ..theanof import inputvars, make_shared_replacements, join_nonshared_inputs
 
+_log = logging.getLogger("pymc3")
+
 
 class PGBART(ArrayStepShared):
     """
@@ -52,6 +56,7 @@ class PGBART(ArrayStepShared):
     stats_dtypes = [{"variable_inclusion": np.ndarray}]
 
     def __init__(self, vars=None, num_particles=10, max_stages=5000, chunk="auto", model=None):
+        _log.warning("The BART model is experimental. Use with caution.")
         model = modelcontext(model)
         vars = inputvars(vars)
         self.bart = vars[0].distribution

From bb69a7642a697402f50ee30600d39a0abe2b5f98 Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Sat, 14 Nov 2020 12:26:50 -0300
Subject: [PATCH 23/24] test variable importance

---
 pymc3/tests/test_sampling.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py
index 389b3eb4520..cb7cb5ab88d 100644
--- a/pymc3/tests/test_sampling.py
+++ b/pymc3/tests/test_sampling.py
@@ -166,6 +166,20 @@ def test_trace_report(self, step_cls, discard):
             assert isinstance(trace.report.t_sampling, float)
         pass
 
+    def test_trace_report_bart():
+        X = np.random.normal(0, 1, size=(3, 250)).T
+        Y = np.random.normal(0, 1, size=250)
+        X[:, 0] = np.random.normal(Y, 0.1)
+
+        with pm.Model() as model:
+            mu = pm.BART("mu", X, Y, m=20)
+            sigma = pm.HalfNormal("sigma", 1)
+            y = pm.Normal("y", mu, sigma, observed=Y)
+            trace = pm.sample(500, tune=100, chains=1, random_seed=3415)
+        var_imp = trace.report.variable_importance
+        assert var_imp[0] > var_imp[1:].sum()
+        npt.assert_almost_equal(var_imp.sum(), 1)
+
     def test_return_inferencedata(self):
         with self.model:
             kwargs = dict(draws=100, tune=50, cores=1, chains=2, step=pm.Metropolis())

From 5a7b55284d97b3b72db56eab80139fb814350c10 Mon Sep 17 00:00:00 2001
From: aloctavodia <aloctavodia@gmail.com>
Date: Sat, 14 Nov 2020 13:29:18 -0300
Subject: [PATCH 24/24] fix test

---
 pymc3/tests/test_sampling.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pymc3/tests/test_sampling.py b/pymc3/tests/test_sampling.py
index cb7cb5ab88d..a415f648ea4 100644
--- a/pymc3/tests/test_sampling.py
+++ b/pymc3/tests/test_sampling.py
@@ -166,7 +166,7 @@ def test_trace_report(self, step_cls, discard):
             assert isinstance(trace.report.t_sampling, float)
         pass
 
-    def test_trace_report_bart():
+    def test_trace_report_bart(self):
         X = np.random.normal(0, 1, size=(3, 250)).T
         Y = np.random.normal(0, 1, size=250)
         X[:, 0] = np.random.normal(Y, 0.1)
@@ -175,7 +175,7 @@ def test_trace_report_bart():
             mu = pm.BART("mu", X, Y, m=20)
             sigma = pm.HalfNormal("sigma", 1)
             y = pm.Normal("y", mu, sigma, observed=Y)
-            trace = pm.sample(500, tune=100, chains=1, random_seed=3415)
+            trace = pm.sample(500, tune=100, random_seed=3415)
         var_imp = trace.report.variable_importance
         assert var_imp[0] > var_imp[1:].sum()
         npt.assert_almost_equal(var_imp.sum(), 1)