diff --git a/doc/api.rst b/doc/api.rst
index 18fdf5195..793adb7a5 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -150,6 +150,7 @@ tree models.
    PermutationForestClassifier
    PermutationForestRegressor
    build_coleman_forest
+   build_permutation_forest
    build_hyppo_oob_forest
    build_hyppo_cv_forest
    PermutationHonestForestClassifier
diff --git a/doc/whats_new/v0.7.rst b/doc/whats_new/v0.7.rst
index 717faee65..2d474beca 100644
--- a/doc/whats_new/v0.7.rst
+++ b/doc/whats_new/v0.7.rst
@@ -25,6 +25,8 @@ Changelog
     by `Adam Li`_ (:pr:`#211`)
 - |Feature| Introduce a new set of simulations based on Marron and Wand 1992.
     by `Sambit Panda`_ (:pr:`#203`)
+- |Feature| :func:`sktree.stats.build_coleman_forest` and :func:`sktree.stats.build_permutation_forest`
+    are added to compute p-values given an estimator and permutation-estimator, `Adam Li`_ (:pr:`#222`)
 
 Code and Documentation Contributors
 -----------------------------------
diff --git a/sktree/_lib/sklearn_fork b/sktree/_lib/sklearn_fork
index d9c9a3918..db91ddb6f 160000
--- a/sktree/_lib/sklearn_fork
+++ b/sktree/_lib/sklearn_fork
@@ -1 +1 @@
-Subproject commit d9c9a3918a8de5f0984a25d4a7c1968ff18c916d
+Subproject commit db91ddb6f090ac091588dd844be6d9c60afc341f
diff --git a/sktree/stats/__init__.py b/sktree/stats/__init__.py
index ede7a5f8f..39d09234b 100644
--- a/sktree/stats/__init__.py
+++ b/sktree/stats/__init__.py
@@ -4,6 +4,7 @@
     build_coleman_forest,
     build_hyppo_cv_forest,
     build_hyppo_oob_forest,
+    build_permutation_forest,
 )
 from .monte_carlo import PermutationTest
 from .permutationforest import PermutationForestClassifier, PermutationForestRegressor
@@ -18,5 +19,6 @@
     "build_hyppo_cv_forest",
     "build_hyppo_oob_forest",
     "build_coleman_forest",
+    "build_permutation_forest",
     "PermutationHonestForestClassifier",
 ]
diff --git a/sktree/stats/forestht.py b/sktree/stats/forestht.py
index 9fa3a837a..9b9aef692 100644
--- a/sktree/stats/forestht.py
+++ b/sktree/stats/forestht.py
@@ -1,4 +1,5 @@
 import threading
+from collections import namedtuple
 from typing import Callable, Optional, Tuple, Union
 
 import numpy as np
@@ -24,6 +25,7 @@
 from ..experimental import conditional_resample
 from ..tree import DecisionTreeClassifier, DecisionTreeRegressor
 from ..tree._classes import DTYPE
+from .permuteforest import PermutationHonestForestClassifier
 from .utils import (
     METRIC_FUNCTIONS,
     POSITIVE_METRICS,
@@ -1212,6 +1214,11 @@ def _parallel_predict_proba_oob(predict_proba, X, out, idx, test_idx, lock):
     return prediction
 
 
+ForestTestResult = namedtuple(
+    "ForestTestResult", ["observe_test_stat", "permuted_stat", "observe_stat", "pvalue"]
+)
+
+
 def build_coleman_forest(
     est,
     perm_est,
@@ -1219,13 +1226,20 @@ def build_coleman_forest(
     y,
     covariate_index=None,
     metric="s@98",
-    n_repeats=1000,
+    n_repeats=10_000,
     verbose=False,
     seed=None,
     return_posteriors=True,
     **metric_kwargs,
 ):
-    """Build a hypothesis testing forest using oob samples.
+    """Build a hypothesis testing forest using a two-forest approach.
+
+    The two-forest approach stems from the Coleman et al. 2022 paper, where
+    two forests are trained: one on the original dataset, and one on the
+    permuted dataset. The dataset is either permuted once, or independently for
+    each tree in the permuted forest. The original test statistic is computed by
+    comparing the metric on both forests ``(metric_forest - metric_perm_forest)``.
+    For full details, see :footcite:`coleman2022scalable`.
 
     Parameters
     ----------
@@ -1244,7 +1258,9 @@ def build_coleman_forest(
         98% specificity.
     n_repeats : int, optional
         Number of times to bootstrap sample the two forests to construct
-        the null distribution, by default 1000.
+        the null distribution, by default 10000. The construction of the
+        null forests will be parallelized according to the ``n_jobs``
+        argument of the ``est`` forest.
     verbose : bool, optional
         Verbosity, by default False.
     seed : int, optional
@@ -1267,31 +1283,30 @@ def build_coleman_forest(
     perm_forest_proba : ArrayLike of shape (n_estimators, n_samples, n_outputs)
         The predicted posterior probabilities for each of the permuted estimators
         on their out of bag samples.
+
+    References
+    ----------
+    .. footbibliography::
     """
-    rng = np.random.default_rng(seed)
     metric_func: Callable[[ArrayLike, ArrayLike], float] = METRIC_FUNCTIONS[metric]
 
     if covariate_index is None:
         covariate_index = np.arange(X.shape[1], dtype=int)
 
-    # perform permutation of covariates
-    # TODO: refactor permutations into the HonestForest(?)
-    n_samples_train = X.shape[0]
-    index_arr = rng.choice(
-        np.arange(n_samples_train, dtype=int),
-        size=(n_samples_train, 1),
-        replace=False,
-        shuffle=True,
-    )
-    X_permute = X.copy()
-    X_permute[:, covariate_index] = X_permute[index_arr, covariate_index]
+    if not isinstance(perm_est, PermutationHonestForestClassifier):
+        raise RuntimeError(
+            f"Permutation forest must be a PermutationHonestForestClassifier, got {type(perm_est)}"
+        )
 
     # build two sets of forests
     est, orig_forest_proba = build_hyppo_oob_forest(est, X, y, verbose=verbose)
     perm_est, perm_forest_proba = build_hyppo_oob_forest(
-        perm_est, X_permute, y, verbose=verbose, covariate_index=covariate_index
+        perm_est, X, y, verbose=verbose, covariate_index=covariate_index
     )
 
+    # get the number of jobs
+    n_jobs = est.n_jobs
+
     metric_star, metric_star_pi = _compute_null_distribution_coleman(
         y,
         orig_forest_proba,
@@ -1299,6 +1314,7 @@ def build_coleman_forest(
         metric,
         n_repeats=n_repeats,
         seed=seed,
+        n_jobs=n_jobs,
         **metric_kwargs,
     )
 
@@ -1320,10 +1336,133 @@ def build_coleman_forest(
     else:
         pvalue = (1 + (null_dist >= observe_test_stat).sum()) / (1 + n_repeats)
 
+    forest_result = ForestTestResult(observe_test_stat, permute_stat, observe_stat, pvalue)
     if return_posteriors:
-        return observe_test_stat, pvalue, orig_forest_proba, perm_forest_proba
+        return forest_result, orig_forest_proba, perm_forest_proba
     else:
-        return observe_test_stat, pvalue
+        return forest_result
+
+
+def build_permutation_forest(
+    est,
+    perm_est,
+    X,
+    y,
+    covariate_index=None,
+    metric="s@98",
+    n_repeats=500,
+    verbose=False,
+    seed=None,
+    return_posteriors=True,
+    **metric_kwargs,
+):
+    """Build a hypothesis testing forest using a permutation-forest approach.
+
+    The permutation-forest approach stems from standard permutaiton-testing, where
+    each forest is trained on a new permutation of the dataset. The original test
+    statistic is computed on the original data. Then the pvalue is computed
+    by comparing the original test statistic to the null distribution of the
+    test statistic computed from the permuted forests.
+
+    Parameters
+    ----------
+    est : Forest
+        The type of forest to use. Must be enabled with ``bootstrap=True``.
+    perm_est : Forest
+        The forest to use for the permuted dataset. Should be
+        ``PermutationHonestForestClassifier``.
+    X : ArrayLike of shape (n_samples, n_features)
+        Data.
+    y : ArrayLike of shape (n_samples, n_outputs)
+        Binary target, so ``n_outputs`` should be at most 1.
+    covariate_index : ArrayLike, optional of shape (n_covariates,)
+        The index array of covariates to shuffle, by default None.
+    metric : str, optional
+        The metric to compute, by default "s@98", for sensitivity at
+        98% specificity.
+    n_repeats : int, optional
+        Number of times to bootstrap sample the two forests to construct
+        the null distribution, by default 10000. The construction of the
+        null forests will be parallelized according to the ``n_jobs``
+        argument of the ``est`` forest.
+    verbose : bool, optional
+        Verbosity, by default False.
+    seed : int, optional
+        Random seed, by default None.
+    return_posteriors : bool, optional
+        Whether or not to return the posteriors, by default False.
+    **metric_kwargs : dict, optional
+        Additional keyword arguments to pass to the metric function.
+
+    Returns
+    -------
+    observe_stat : float
+        The test statistic. To compute the test statistic, take
+        ``permute_stat_`` and subtract ``observe_stat_``.
+    pvalue : float
+        The p-value of the test statistic.
+    orig_forest_proba : ArrayLike of shape (n_estimators, n_samples, n_outputs)
+        The predicted posterior probabilities for each estimator on their
+        out of bag samples.
+    perm_forest_proba : ArrayLike of shape (n_estimators, n_samples, n_outputs)
+        The predicted posterior probabilities for each of the permuted estimators
+        on their out of bag samples.
+
+    References
+    ----------
+    .. footbibliography::
+    """
+    rng = np.random.default_rng(seed)
+    metric_func: Callable[[ArrayLike, ArrayLike], float] = METRIC_FUNCTIONS[metric]
+
+    if covariate_index is None:
+        covariate_index = np.arange(X.shape[1], dtype=int)
+
+    if not isinstance(perm_est, PermutationHonestForestClassifier):
+        raise RuntimeError(
+            f"Permutation forest must be a PermutationHonestForestClassifier, got {type(perm_est)}"
+        )
+
+    # train the original forest on unpermuted data
+    est, orig_forest_proba = build_hyppo_oob_forest(est, X, y, verbose=verbose)
+    y_pred_proba_orig = np.nanmean(orig_forest_proba, axis=0)
+    observe_test_stat = metric_func(y, y_pred_proba_orig, **metric_kwargs)
+
+    # get the number of jobs
+    index_arr = np.arange(X.shape[0], dtype=int).reshape(-1, 1)
+
+    # train many null forests
+    X_perm = X.copy()
+    null_dist = []
+    for _ in range(n_repeats):
+        rng.shuffle(index_arr)
+        perm_X_cov = X_perm[index_arr, covariate_index]
+        X_perm[:, covariate_index] = perm_X_cov
+
+        #
+        perm_est = clone(perm_est)
+        perm_est.set_params(random_state=rng.integers(0, np.iinfo(np.int32).max))
+
+        perm_est, perm_forest_proba = build_hyppo_oob_forest(
+            perm_est, X_perm, y, verbose=verbose, covariate_index=covariate_index
+        )
+
+        y_pred_proba_perm = np.nanmean(perm_forest_proba, axis=0)
+        permute_stat = metric_func(y, y_pred_proba_perm, **metric_kwargs)
+        null_dist.append(permute_stat)
+
+    # compute pvalue, which note is opposite that of the Coleman approach, since
+    # we are testing if the null distribution results in a test statistic greater
+    if metric in POSITIVE_METRICS:
+        pvalue = (1 + (null_dist >= observe_test_stat).sum()) / (1 + n_repeats)
+    else:
+        pvalue = (1 + (null_dist <= observe_test_stat).sum()) / (1 + n_repeats)
+
+    forest_result = ForestTestResult(observe_test_stat, permute_stat, None, pvalue)
+    if return_posteriors:
+        return forest_result, orig_forest_proba, perm_forest_proba
+    else:
+        return forest_result
 
 
 def build_hyppo_oob_forest(est, X, y, verbose=False, **est_kwargs):
@@ -1474,36 +1613,3 @@ def build_hyppo_cv_forest(
         est_list.append(est)
 
     return est_list, all_proba_list
-
-
-def predict_oob_proba(est, X, verbose=False):
-    """Predict out of bag posterior probabilities."""
-    from sklearn.utils.validation import check_is_fitted
-
-    check_is_fitted(est)
-
-    # now evaluate
-    X = est._validate_X_predict(X)
-
-    # if we trained a binning tree, then we should re-bin the data
-    # XXX: this is inefficient and should be improved to be in line with what
-    # the Histogram Gradient Boosting Tree does, where the binning thresholds
-    # are passed into the tree itself, thus allowing us to set the node feature
-    # value thresholds within the tree itself.
-    if est.max_bins is not None:
-        X = est._bin_data(X, is_training_data=False).astype(DTYPE)
-
-    # Assign chunk of trees to jobs
-    n_jobs, _, _ = _partition_estimators(est.n_estimators, est.n_jobs)
-
-    # avoid storing the output of every estimator by summing them here
-    lock = threading.Lock()
-    # accumulate the predictions across all trees
-    all_proba = np.full(
-        (len(est.estimators_), X.shape[0], est.n_classes_), np.nan, dtype=np.float64
-    )
-    Parallel(n_jobs=n_jobs, verbose=verbose, require="sharedmem")(
-        delayed(_parallel_predict_proba_oob)(e.predict_proba, X, all_proba, idx, test_idx, lock)
-        for idx, (e, test_idx) in enumerate(zip(est.estimators_, est.oob_samples_))
-    )
-    return all_proba
diff --git a/sktree/stats/tests/test_forestht.py b/sktree/stats/tests/test_forestht.py
index 001ca9669..ca9d7a1de 100644
--- a/sktree/stats/tests/test_forestht.py
+++ b/sktree/stats/tests/test_forestht.py
@@ -13,7 +13,13 @@
 
 from sktree import HonestForestClassifier, RandomForestClassifier, RandomForestRegressor
 from sktree._lib.sklearn.tree import DecisionTreeClassifier
-from sktree.stats import FeatureImportanceForestClassifier, FeatureImportanceForestRegressor
+from sktree.stats import (
+    FeatureImportanceForestClassifier,
+    FeatureImportanceForestRegressor,
+    PermutationHonestForestClassifier,
+    build_coleman_forest,
+    build_permutation_forest,
+)
 from sktree.stats.utils import _non_nan_samples
 from sktree.tree import MultiViewDecisionTreeClassifier, ObliqueDecisionTreeClassifier
 
@@ -772,3 +778,86 @@ def test_null_with_partial_auc():
     null_dist_pvalue = wilcoxon(first_null_dist, second_null_dist).pvalue
     assert null_dist_pvalue < 0.05, null_dist_pvalue
     assert pvalue > 0.05, f"pvalue: {pvalue}"
+
+
+def test_build_coleman_forest():
+    """Simple test for building a Coleman forest.
+
+    Test the function under alternative and null hypothesis for a very simple dataset.
+    """
+    n_estimators = 10
+    n_samples = 20
+    n_features = 3
+    rng = np.random.default_rng(seed)
+
+    _X = rng.uniform(size=(n_samples, n_features))
+    _X = rng.uniform(size=(n_samples // 2, n_features))
+    X2 = _X + 3
+    X = np.vstack([_X, X2])
+    y = np.vstack(
+        [np.zeros((n_samples // 2, 1)), np.ones((n_samples // 2, 1))]
+    )  # Binary classification
+
+    clf = HonestForestClassifier(
+        n_estimators=n_estimators, random_state=seed, n_jobs=-1, honest_fraction=0.5, bootstrap=True
+    )
+    perm_clf = PermutationHonestForestClassifier(
+        n_estimators=n_estimators, random_state=seed, n_jobs=-1, honest_fraction=0.5, bootstrap=True
+    )
+    with pytest.raises(
+        RuntimeError, match="Permutation forest must be a PermutationHonestForestClassifier"
+    ):
+        build_coleman_forest(clf, clf, X, y)
+
+    forest_result, orig_forest_proba, perm_forest_proba = build_coleman_forest(
+        clf, perm_clf, X, y, metric="s@98"
+    )
+    assert forest_result.pvalue <= 0.05, f"{forest_result.pvalue}"
+    assert forest_result.observe_stat > 0.1, f"{forest_result.observe_stat}"
+    assert_array_equal(orig_forest_proba.shape, perm_forest_proba.shape)
+
+    X = np.vstack([_X, _X])
+    forest_result, _, _ = build_coleman_forest(clf, perm_clf, X, y, metric="s@98")
+    assert forest_result.pvalue > 0.05, f"{forest_result.pvalue}"
+    assert forest_result.observe_stat < 0.05, f"{forest_result.observe_stat}"
+
+
+def test_build_permutation_forest():
+    """Simple test for building a permutation forest."""
+    n_estimators = 30
+    n_samples = 100
+    n_features = 3
+    rng = np.random.default_rng(seed)
+
+    _X = rng.uniform(size=(n_samples, n_features))
+    _X = rng.uniform(size=(n_samples // 2, n_features))
+    X2 = _X + 10
+    X = np.vstack([_X, X2])
+    y = np.vstack(
+        [np.zeros((n_samples // 2, 1)), np.ones((n_samples // 2, 1))]
+    )  # Binary classification
+
+    clf = HonestForestClassifier(
+        n_estimators=n_estimators, random_state=seed, n_jobs=-1, honest_fraction=0.5, bootstrap=True
+    )
+    perm_clf = PermutationHonestForestClassifier(
+        n_estimators=n_estimators, random_state=seed, n_jobs=-1, honest_fraction=0.5, bootstrap=True
+    )
+    with pytest.raises(
+        RuntimeError, match="Permutation forest must be a PermutationHonestForestClassifier"
+    ):
+        build_permutation_forest(clf, clf, X, y, seed=seed)
+
+    forest_result, orig_forest_proba, perm_forest_proba = build_permutation_forest(
+        clf, perm_clf, X, y, metric="s@98", n_repeats=20, seed=seed
+    )
+    assert forest_result.observe_test_stat > 0.1, f"{forest_result.observe_stat}"
+    assert forest_result.pvalue <= 0.05, f"{forest_result.pvalue}"
+    assert_array_equal(orig_forest_proba.shape, perm_forest_proba.shape)
+
+    X = np.vstack([_X, _X])
+    forest_result, _, _ = build_permutation_forest(
+        clf, perm_clf, X, y, metric="s@98", n_repeats=10, seed=seed
+    )
+    assert forest_result.pvalue > 0.05, f"{forest_result.pvalue}"
+    assert forest_result.observe_test_stat < 0.05, f"{forest_result.observe_test_stat}"
diff --git a/sktree/stats/utils.py b/sktree/stats/utils.py
index ffda338a2..3dd4d9da8 100644
--- a/sktree/stats/utils.py
+++ b/sktree/stats/utils.py
@@ -1,6 +1,7 @@
 from typing import Optional, Tuple
 
 import numpy as np
+from joblib import Parallel, delayed
 from numpy.typing import ArrayLike
 from scipy.stats import entropy
 from sklearn.ensemble._forest import _generate_unsampled_indices, _get_n_samples_bootstrap
@@ -204,6 +205,7 @@ def _compute_null_distribution_coleman(
     metric: str = "mse",
     n_repeats: int = 1000,
     seed: Optional[int] = None,
+    n_jobs: Optional[int] = None,
     **metric_kwargs,
 ) -> Tuple[ArrayLike, ArrayLike]:
     """Compute null distribution using Coleman method.
@@ -238,9 +240,6 @@ def _compute_null_distribution_coleman(
     metric_star_pi : ArrayLike of shape (n_samples,)
         An array of the metrics computed on the other half of the trees.
     """
-    rng = np.random.default_rng(seed)
-    metric_func = METRIC_FUNCTIONS[metric]
-
     # sample two sets of equal number of trees from the combined forest these are the posteriors
     # (n_estimators * 2, n_samples, n_outputs)
     all_y_pred = np.concatenate((y_pred_proba_normal, y_pred_proba_perm), axis=0)
@@ -258,51 +257,82 @@ def _compute_null_distribution_coleman(
 
     metric_star = np.zeros((n_repeats,))
     metric_star_pi = np.zeros((n_repeats,))
-    for idx in range(n_repeats):
-        # two sets of random indices from 1 : 2N are sampled using Fisher-Yates
-        rng.shuffle(y_pred_ind_arr)
-
-        # get random half of the posteriors from two sets of trees
-        first_forest_inds = y_pred_ind_arr[: n_estimators // 2]
-        second_forest_inds = y_pred_ind_arr[n_estimators // 2 :]
-
-        # get random half of the posteriors as one forest
-        first_forest_pred = all_y_pred[first_forest_inds, ...]
-        second_forest_pred = all_y_pred[second_forest_inds, ...]
-
-        # determine if there are any nans in the final posterior array, when
-        # averaged over the trees
-        first_forest_samples = _non_nan_samples(first_forest_pred)
-        second_forest_samples = _non_nan_samples(second_forest_pred)
-
-        # todo: is this step necessary?
-        # non_nan_samples = np.intersect1d(
-        #     first_forest_samples, second_forest_samples, assume_unique=True
-        # )
-        # now average the posteriors over the trees for the non-nan samples
-        # y_pred_first_half = np.nanmean(first_forest_pred[:, non_nan_samples, :], axis=0)
-        # y_pred_second_half = np.nanmean(second_forest_pred[:, non_nan_samples, :], axis=0)
-        # # compute two instances of the metric from the sampled trees
-        # first_half_metric = metric_func(y_test[non_nan_samples, :], y_pred_first_half)
-        # second_half_metric = metric_func(y_test[non_nan_samples, :], y_pred_second_half)
-
-        y_pred_first_half = np.nanmean(first_forest_pred[:, first_forest_samples, :], axis=0)
-        y_pred_second_half = np.nanmean(second_forest_pred[:, second_forest_samples, :], axis=0)
 
-        # compute two instances of the metric from the sampled trees
-        first_half_metric = metric_func(
-            y_test[first_forest_samples, :], y_pred_first_half, **metric_kwargs
-        )
-        second_half_metric = metric_func(
-            y_test[second_forest_samples, :], y_pred_second_half, **metric_kwargs
+    # generate the random seeds for the parallel jobs
+    ss = np.random.SeedSequence(seed)
+    out = Parallel(n_jobs=n_jobs)(
+        delayed(_parallel_build_null_forests)(
+            y_pred_ind_arr,
+            n_estimators,
+            all_y_pred,
+            y_test,
+            seed,
+            metric,
+            **metric_kwargs,
         )
+        for i, seed in zip(range(n_repeats), ss.spawn(n_repeats))
+    )
 
+    for idx, (first_half_metric, second_half_metric) in enumerate(out):
         metric_star[idx] = first_half_metric
         metric_star_pi[idx] = second_half_metric
 
     return metric_star, metric_star_pi
 
 
+def _parallel_build_null_forests(
+    index_arr: ArrayLike,
+    n_estimators: int,
+    all_y_pred: ArrayLike,
+    y_test: ArrayLike,
+    seed: int,
+    metric: str,
+    **metric_kwargs: dict,
+):
+    """Randomly sample two sets of forests and compute the metric on each."""
+    rng = np.random.default_rng(seed)
+    metric_func = METRIC_FUNCTIONS[metric]
+
+    # two sets of random indices from 1 : 2N are sampled using Fisher-Yates
+    rng.shuffle(index_arr)
+
+    # get random half of the posteriors from two sets of trees
+    first_forest_inds = index_arr[: n_estimators // 2]
+    second_forest_inds = index_arr[n_estimators // 2 :]
+
+    # get random half of the posteriors as one forest
+    first_forest_pred = all_y_pred[first_forest_inds, ...]
+    second_forest_pred = all_y_pred[second_forest_inds, ...]
+
+    # determine if there are any nans in the final posterior array, when
+    # averaged over the trees
+    first_forest_samples = _non_nan_samples(first_forest_pred)
+    second_forest_samples = _non_nan_samples(second_forest_pred)
+
+    # todo: is this step necessary?
+    # non_nan_samples = np.intersect1d(
+    #     first_forest_samples, second_forest_samples, assume_unique=True
+    # )
+    # now average the posteriors over the trees for the non-nan samples
+    # y_pred_first_half = np.nanmean(first_forest_pred[:, non_nan_samples, :], axis=0)
+    # y_pred_second_half = np.nanmean(second_forest_pred[:, non_nan_samples, :], axis=0)
+    # # compute two instances of the metric from the sampled trees
+    # first_half_metric = metric_func(y_test[non_nan_samples, :], y_pred_first_half)
+    # second_half_metric = metric_func(y_test[non_nan_samples, :], y_pred_second_half)
+
+    y_pred_first_half = np.nanmean(first_forest_pred[:, first_forest_samples, :], axis=0)
+    y_pred_second_half = np.nanmean(second_forest_pred[:, second_forest_samples, :], axis=0)
+
+    # compute two instances of the metric from the sampled trees
+    first_half_metric = metric_func(
+        y_test[first_forest_samples, :], y_pred_first_half, **metric_kwargs
+    )
+    second_half_metric = metric_func(
+        y_test[second_forest_samples, :], y_pred_second_half, **metric_kwargs
+    )
+    return first_half_metric, second_half_metric
+
+
 def get_per_tree_oob_samples(est: BaseForest):
     """The sample indices that are out-of-bag.
 
diff --git a/sktree/tree/_honest_tree.py b/sktree/tree/_honest_tree.py
index 6e9260e57..2a434daea 100644
--- a/sktree/tree/_honest_tree.py
+++ b/sktree/tree/_honest_tree.py
@@ -4,6 +4,7 @@
 import numpy as np
 from sklearn.base import ClassifierMixin, MetaEstimatorMixin, _fit_context, clone
 from sklearn.model_selection import StratifiedShuffleSplit
+from sklearn.utils._param_validation import Interval, RealNotInt, StrOptions
 from sklearn.utils.multiclass import _check_partial_fit_first_call, check_classification_targets
 from sklearn.utils.validation import check_is_fitted, check_X_y
 
@@ -269,6 +270,13 @@ class frequency in the voting subsample.
            0.8       , 0.8       , 0.93333333, 1.        , 1.        ])
     """
 
+    _parameter_constraints: dict = {
+        **BaseDecisionTree._parameter_constraints,
+        "honest_fraction": [Interval(RealNotInt, 0.0, 1.0, closed="neither")],
+        "honest_prior": [StrOptions({"empirical", "uniform", "ignore"})],
+        "stratify": ["boolean"],
+    }
+
     def __init__(
         self,
         criterion="gini",