LSSTDESC · vitenti · Jan 30, 2024 · Dec 6, 2023 · Dec 6, 2023 · Dec 7, 2023
diff --git a/examples/des_y1_3x2pt/des_y1_3x2pt_PT.py b/examples/des_y1_3x2pt/des_y1_3x2pt_PT.py
@@ -43,6 +43,9 @@ class CclSetup:
 
 @dataclass
 class CElls:
+    """A package of related C_ell values, to reduce the number of variables
+    used in the :meth:`run_likelihood` method."""
+
     GG: np.ndarray
     GI: np.ndarray
     II: np.ndarray
@@ -234,6 +237,7 @@ def run_likelihood() -> None:
     assert likelihood.cov is not None
 
     stat0 = likelihood.statistics[0].statistic
+    assert isinstance(stat0, TwoPoint)
 
     # x = likelihood.statistics[0].ell_or_theta_
     # y_data = likelihood.statistics[0].measured_statistic_
@@ -243,11 +247,12 @@ def run_likelihood() -> None:
 
     print(list(stat0.cells.keys()))
 
-    stat2 = likelihood.statistics[2].statistic
+    stat2 = likelihood.statistics[2].statistic  # pylint: disable=no-member
     assert isinstance(stat2, TwoPoint)
     print(list(stat2.cells.keys()))
 
-    stat3 = likelihood.statistics[3].statistic
+    stat3 = likelihood.statistics[3].statistic  # pylint: disable=no-member
+    assert isinstance(stat3, TwoPoint)
     print(list(stat3.cells.keys()))
 
     plot_predicted_and_measured_statistics(

diff --git a/examples/des_y1_3x2pt/des_y1_cosmic_shear_TATT.py b/examples/des_y1_3x2pt/des_y1_cosmic_shear_TATT.py
@@ -158,6 +158,7 @@ def run_likelihood() -> None:
     print(f"Log-like = {log_like:.1f}")
 
     # Plot the predicted and measured statistic
+    assert isinstance(likelihood, ConstGaussian)
     two_point_0 = likelihood.statistics[0].statistic
     assert isinstance(two_point_0, TwoPoint)
 

diff --git a/examples/des_y1_3x2pt/des_y1_cosmic_shear_pk_modifier.py b/examples/des_y1_3x2pt/des_y1_cosmic_shear_pk_modifier.py
@@ -156,10 +156,9 @@ def run_likelihood() -> None:
     print(f"Log-like = {log_like:.1f}")
 
     # Plot the predicted and measured statistic
+    assert isinstance(likelihood, ConstGaussian)
     two_point_0 = likelihood.statistics[0].statistic
     assert isinstance(two_point_0, TwoPoint)
-
-    assert isinstance(likelihood, ConstGaussian)
     assert likelihood.cov is not None
 
     # Predict CCL Cl

diff --git a/firecrown/connector/cosmosis/likelihood.py b/firecrown/connector/cosmosis/likelihood.py
@@ -40,9 +40,6 @@ class FirecrownLikelihood:
     :param config: current CosmoSIS datablock
     """
 
-    likelihood: Likelihood
-    map: MappingCosmoSIS
-
     def __init__(self, config: cosmosis.datablock):
         """Create the FirecrownLikelihood object from the given configuration."""
         likelihood_source = config.get_string(option_section, "likelihood_source", "")
@@ -60,6 +57,7 @@ def __init__(self, config: cosmosis.datablock):
 
         self.firecrown_module_name = option_section
         self.sampling_sections = sections
+        self.likelihood: Likelihood
         try:
             self.likelihood, self.tools = load_likelihood(
                 likelihood_source, build_parameters
@@ -69,7 +67,7 @@ def __init__(self, config: cosmosis.datablock):
             print(f"The Firecrown likelihood needs a required parameter: {err}")
             print("*" * 30)
             raise
-        self.map = mapping_builder(
+        self.map: MappingCosmoSIS = mapping_builder(
             input_style="CosmoSIS", require_nonlinear_pk=require_nonlinear_pk
         )
 
@@ -126,23 +124,31 @@ def execute(self, sample: cosmosis.datablock) -> int:
         for section, name, val in derived_params_collection:
             sample.put(section, name, val)
 
-        self.likelihood.reset()
-        self.tools.reset()
+        if not isinstance(self.likelihood, GaussFamily):
+            self.likelihood.reset()
+            self.tools.reset()
+            return 0
 
-        # Save concatenated data vector and inverse covariance to enable support
+        # If we get here, we have a GaussFamily likelihood, and we need to
+        # save concatenated data vector and inverse covariance to enable support
         # for the CosmoSIS Fisher sampler. This can only work for likelihoods
         # that have these quantities. Currently, this is only GaussFamily.
 
-        if isinstance(self.likelihood, GaussFamily):
-            sample.put(
-                "data_vector", "firecrown_theory", self.likelihood.predicted_data_vector
-            )
-            sample.put(
-                "data_vector", "firecrown_data", self.likelihood.measured_data_vector
-            )
-            sample.put(
-                "data_vector", "firecrown_inverse_covariance", self.likelihood.inv_cov
-            )
+        sample.put(
+            "data_vector",
+            "firecrown_theory",
+            self.likelihood.get_theory_vector(),
+        )
+        sample.put(
+            "data_vector",
+            "firecrown_data",
+            self.likelihood.get_data_vector(),
+        )
+        sample.put(
+            "data_vector",
+            "firecrown_inverse_covariance",
+            self.likelihood.inv_cov,
+        )
 
         # Write out theory and data vectors to the data block the ease
         # debugging.
@@ -163,14 +169,16 @@ def execute(self, sample: cosmosis.datablock) -> int:
                 sample.put(
                     "data_vector",
                     f"theory_{stat.sacc_data_type}_{tracer}",
-                    stat.predicted_statistic_,
+                    stat.get_theory_vector(),
                 )
                 sample.put(
                     "data_vector",
                     f"data_{stat.sacc_data_type}_{tracer}",
-                    stat.measured_statistic_,
+                    stat.get_data_vector(),
                 )
 
+        self.likelihood.reset()
+        self.tools.reset()
         return 0
 
     def form_error_message(self, exc: MissingSamplerParameterError) -> str:

diff --git a/firecrown/likelihood/gauss_family/gauss_family.py b/firecrown/likelihood/gauss_family/gauss_family.py
@@ -8,8 +8,9 @@
 """
 
 from __future__ import annotations
+
 from enum import Enum
-from typing import List, Optional, Tuple, Sequence
+from typing import List, Optional, Tuple, Sequence, Dict
 from typing import final
 import warnings
 
@@ -61,12 +62,24 @@ def __init__(
         self.state: State = State.INITIALIZED
         if len(statistics) == 0:
             raise ValueError("GaussFamily requires at least one statistic")
-        self.statistics: UpdatableCollection = UpdatableCollection(
+
+        for i, s in enumerate(statistics):
+            if not isinstance(s, Statistic):
+                raise ValueError(
+                    f"statistics[{i}] is not an instance of Statistic: {s}"
+                    f"it is a {type(s)} instead."
+                )
+
+        self.statistics: UpdatableCollection[GuardedStatistic] = UpdatableCollection(
             GuardedStatistic(s) for s in statistics
         )
         self.cov: Optional[npt.NDArray[np.float64]] = None
         self.cholesky: Optional[npt.NDArray[np.float64]] = None
         self.inv_cov: Optional[npt.NDArray[np.float64]] = None
+        self.cov_index_map: Optional[Dict[int, int]] = None
+        self.computed_theory_vector = False
+        self.theory_vector: Optional[npt.NDArray[np.double]] = None
+        self.data_vector: Optional[npt.NDArray[np.double]] = None
 
     def _update(self, _: ParamsMap) -> None:
         """Handle the state resetting required by :class:`GaussFamily`
@@ -84,6 +97,10 @@ def _reset(self) -> None:
         at the start of the method, and change the state at the end of the
         method."""
         assert self.state == State.UPDATED, "update() must be called before reset()"
+
+        self.computed_theory_vector = False
+        self.theory_vector = None
+
         self.state = State.READY
 
     def read(self, sacc_data: sacc.Sacc) -> None:
@@ -98,28 +115,50 @@ def read(self, sacc_data: sacc.Sacc) -> None:
             raise RuntimeError(msg)
 
         covariance = sacc_data.covariance.dense
+
+        indices_list = []
+        data_vector_list = []
         for stat in self.statistics:
             stat.read(sacc_data)
+            if stat.statistic.sacc_indices is None:
+                raise RuntimeError(
+                    f"The statistic {stat.statistic} has no sacc_indices."
+                )
+            indices_list.append(stat.statistic.sacc_indices.copy())
+            data_vector_list.append(stat.statistic.get_data_vector())
 
-        indices_list = [s.statistic.sacc_indices.copy() for s in self.statistics]
         indices = np.concatenate(indices_list)
+        data_vector = np.concatenate(data_vector_list)
         cov = np.zeros((len(indices), len(indices)))
 
         for new_i, old_i in enumerate(indices):
             for new_j, old_j in enumerate(indices):
                 cov[new_i, new_j] = covariance[old_i, old_j]
 
+        self.data_vector = data_vector
+        self.cov_index_map = {old_i: new_i for new_i, old_i in enumerate(indices)}
         self.cov = cov
         self.cholesky = scipy.linalg.cholesky(self.cov, lower=True)
         self.inv_cov = np.linalg.inv(cov)
 
         self.state = State.READY
 
     @final
-    def get_cov(self) -> npt.NDArray[np.float64]:
-        """Gets the current covariance matrix."""
+    def get_cov(self, statistic: Optional[Statistic] = None) -> npt.NDArray[np.float64]:
+        """Gets the current covariance matrix.
+
+        :param statistic: The statistic for which the sub-covariance matrix
+        should be return. If not specified, return the covariance of all
+        statistics.
+        """
         assert self._is_ready(), "read() must be called before get_cov()"
         assert self.cov is not None
+        if statistic is not None:
+            assert statistic.sacc_indices is not None
+            assert self.cov_index_map is not None
+            idx = [self.cov_index_map[idx] for idx in statistic.sacc_indices]
+            # We do not change the state.
+            return self.cov[np.ix_(idx, idx)]
         # We do not change the state.
         return self.cov
 
@@ -129,11 +168,8 @@ def get_data_vector(self) -> npt.NDArray[np.float64]:
         order."""
         assert self._is_ready(), "read() must be called before get_data_vector()"
 
-        data_vector_list: List[npt.NDArray[np.float64]] = [
-            stat.get_data_vector() for stat in self.statistics
-        ]
-        # We do not change the state.
-        return np.concatenate(data_vector_list)
+        assert self.data_vector is not None
+        return self.data_vector
 
     @final
     def compute_theory_vector(self, tools: ModelingTools) -> npt.NDArray[np.float64]:
@@ -148,8 +184,30 @@ def compute_theory_vector(self, tools: ModelingTools) -> npt.NDArray[np.float64]
         theory_vector_list: List[npt.NDArray[np.float64]] = [
             stat.compute_theory_vector(tools) for stat in self.statistics
         ]
-        # We do not change the state
-        return np.concatenate(theory_vector_list)
+        self.computed_theory_vector = True
+        self.theory_vector = np.concatenate(theory_vector_list)
+
+        return self.theory_vector
+
+    @final
+    def get_theory_vector(self) -> npt.NDArray[np.float64]:
+        """Get the theory vector from all statistics and concatenate in the right
+        order."""
+
+        assert (
+            self.state == State.UPDATED
+        ), "update() must be called before get_theory_vector()"
+
+        if not self.computed_theory_vector:
+            raise RuntimeError(
+                "The theory vector has not been computed yet. "
+                "Call compute_theory_vector first."
+            )
+        assert self.theory_vector is not None, (
+            "Implementation error, "
+            "computed_theory_vector is True but theory_vector is None"
+        )
+        return self.theory_vector
 
     @final
     def compute(
@@ -186,9 +244,6 @@ def compute_chisq(self, tools: ModelingTools) -> float:
         assert len(data_vector) == len(theory_vector)
         residuals = data_vector - theory_vector
 
-        self.predicted_data_vector: npt.NDArray[np.float64] = theory_vector
-        self.measured_data_vector: npt.NDArray[np.float64] = data_vector
-
         x = scipy.linalg.solve_triangular(self.cholesky, residuals, lower=True)
         chisq = np.dot(x, x)
 
@@ -198,3 +253,35 @@ def compute_chisq(self, tools: ModelingTools) -> float:
     def _is_ready(self) -> bool:
         """Return True if the state is either READY or UPDATED."""
         return self.state in (State.READY, State.UPDATED)
+
+    def make_realization(
+        self, sacc_data: sacc.Sacc, add_noise: bool = True, strict: bool = True
+    ) -> sacc.Sacc:
+        new_sacc = sacc_data.copy()
+
+        sacc_indices_list = []
+        for stat in self.statistics:
+            assert stat.statistic.sacc_indices is not None
+            sacc_indices_list.append(stat.statistic.sacc_indices.copy())
+
+        sacc_indices = np.concatenate(sacc_indices_list)
+
+        if add_noise:
+            new_data_vector = self.make_realization_vector()
+        else:
+            new_data_vector = self.get_theory_vector()
+
+        assert len(sacc_indices) == len(new_data_vector)
+
+        if strict:
+            if set(sacc_indices.tolist()) != set(sacc_data.indices()):
+                raise RuntimeError(
+                    "The predicted data does not cover all the data in the "
+                    "sacc object. To write only the calculated predictions, "
+                    "set strict=False."
+                )
+
+        for prediction_idx, sacc_idx in enumerate(sacc_indices):
+            new_sacc.data[sacc_idx].value = new_data_vector[prediction_idx]
+
+        return new_sacc
diff --git a/firecrown/likelihood/gauss_family/gaussian.py b/firecrown/likelihood/gauss_family/gaussian.py
@@ -3,6 +3,7 @@
 """
 
 from __future__ import annotations
+import numpy as np
 
 from .gauss_family import GaussFamily
 from ...modeling_tools import ModelingTools
@@ -15,3 +16,12 @@ def compute_loglike(self, tools: ModelingTools):
         """Compute the log-likelihood."""
 
         return -0.5 * self.compute_chisq(tools)
+
+    def make_realization_vector(self) -> np.ndarray:
+        theory_vector = self.get_theory_vector()
+        assert self.cholesky is not None
+        new_data_vector = theory_vector + np.dot(
+            self.cholesky, np.random.randn(len(theory_vector))
+        )
+
+        return new_data_vector
diff --git a/firecrown/likelihood/gauss_family/statistic/binned_cluster_number_counts.py b/firecrown/likelihood/gauss_family/statistic/binned_cluster_number_counts.py
@@ -74,7 +74,7 @@ def get_data_vector(self) -> DataVector:
         assert self.data_vector is not None
         return self.data_vector
 
-    def compute_theory_vector(self, tools: ModelingTools) -> TheoryVector:
+    def _compute_theory_vector(self, tools: ModelingTools) -> TheoryVector:
         assert tools.cluster_abundance is not None
 
         theory_vector_list: List[float] = []