Introduce dataframe to ExperimentData (step2) (#1253)

### Summary Executing approved design proposal in https://github.com/Qiskit/rfcs/blob/master/0007-experiment-dataframe.md. This PR replaces the representation of curve data points with data frame. This object will be added to artifact in a follow up PR. ### Details and comments In this PR, representation of the intermediate data for `CurveAnalysis` is replaced with the data frame `ScatterTable`. Experimentalists want easier access to the XY data points after curve analysis with various motivations, e.g. plotting data with their own code, rerunning analysis outside the CurveAnalysis framework, or analyzing the time series of curve data. A curve data consists of not only x, y values, but also multiple metadata such as associated fit model, process status (raw, formatted, fitted), and some circuit metadata per each data point. The data frame representation is convenient to manage such complicated data set, and also allows us to record all information in a single object. In addition, `CurveAnalysis` gains `_create_figures` method thanks to `ScatterTable` representation, which cannot be implemented with conventional `CurveData` object. This allows a curve analysis subclass to overwrites the method to flexibly customize figure generation. For example, current `StarkRamseyXYAmpScanAnalysis` [overwrites entire `_run_analysis` method](https://github.com/Qiskit-Extensions/qiskit-experiments/blob/c01b0fad86a42ffb3437757a146b79d501992cf4/qiskit_experiments/library/characterization/analysis/ramsey_xy_analysis.py#L539-L685) just to add second axis to the figure.
qiskit-community · Sep 27, 2023 · f33bed7 · f33bed7
1 parent 49a5218
commit f33bed7
Show file tree

Hide file tree

Showing 27 changed files with 800 additions and 380 deletions.
diff --git a/docs/tutorials/calibrations.rst b/docs/tutorials/calibrations.rst
@@ -34,10 +34,6 @@ This automatic updating can also be disabled using the ``auto_update`` flag.
     This tutorial requires the :mod:`qiskit_dynamics` package to run simulations.
     You can install it with ``python -m pip install qiskit-dynamics``.
 
-.. note::
-    This tutorial requires the ``pandas`` package to visualize calibration tables.
-    You can install it with ``python -m pip install pandas``.
-
 .. jupyter-execute::
 
     import pandas as pd

diff --git a/docs/tutorials/curve_analysis.rst b/docs/tutorials/curve_analysis.rst
@@ -266,6 +266,11 @@ every logic defined in ``AnalysisA``.
 Curve Analysis workflow
 -----------------------
 
+.. warning::
+
+    :class:`CurveData` dataclass is replaced with :class:`.ScatterTable` dataframe.
+    This class will be deprecated and removed in the future release.
+
 Typically curve analysis performs fitting as follows.
 This workflow is defined in the method :meth:`CurveAnalysis._run_analysis`.
 

diff --git a/qiskit_experiments/curve_analysis/__init__.py b/qiskit_experiments/curve_analysis/__init__.py
@@ -117,6 +117,7 @@
 from .base_curve_analysis import BaseCurveAnalysis
 from .curve_analysis import CurveAnalysis
 from .composite_curve_analysis import CompositeCurveAnalysis
+from .scatter_table import ScatterTable
 from .curve_data import (
     CurveData,
     CurveFitResult,

diff --git a/qiskit_experiments/curve_analysis/base_curve_analysis.py b/qiskit_experiments/curve_analysis/base_curve_analysis.py
@@ -38,7 +38,8 @@
     MplDrawer,
 )
 
-from .curve_data import CurveData, CurveFitResult, ParameterRepr
+from .curve_data import CurveFitResult, ParameterRepr
+from .scatter_table import ScatterTable
 
 PARAMS_ENTRY_PREFIX = "@Parameters_"
 DATA_ENTRY_PREFIX = "@Data_"
@@ -99,11 +100,16 @@ class BaseCurveAnalysis(BaseAnalysis, ABC):
 
     .. rubric:: _create_curve_data
 
-    This method to creates analysis results for the formatted dataset, i.e. data used for the fitting.
+    This method creates analysis results for the formatted dataset, i.e. data used for the fitting.
     Entries are created when the analysis option ``return_data_points`` is ``True``.
     If analysis consists of multiple series, analysis result is created for
     each curve data in the series definitions.
 
+    .. rubric:: _create_figures
+
+    This method creates figures by consuming the scatter table data.
+    Figures are created when the analysis option ``plot`` is ``True``.
+
     .. rubric:: _initialize
 
     This method initializes analysis options against input experiment data.
@@ -277,29 +283,21 @@ def set_options(self, **fields):
     def _run_data_processing(
         self,
         raw_data: List[Dict],
-        models: List[lmfit.Model],
-    ) -> CurveData:
+    ) -> ScatterTable:
         """Perform data processing from the experiment result payload.
 
         Args:
             raw_data: Payload in the experiment data.
-            models: A list of LMFIT models that provide the model name and
-                optionally data sorting keys.
 
         Returns:
             Processed data that will be sent to the formatter method.
-
-        Raises:
-            DataProcessorError: When model is multi-objective function but
-                data sorting option is not provided.
-            DataProcessorError: When key for x values is not found in the metadata.
         """
 
     @abstractmethod
     def _format_data(
         self,
-        curve_data: CurveData,
-    ) -> CurveData:
+        curve_data: ScatterTable,
+    ) -> ScatterTable:
         """Postprocessing for the processed dataset.
 
         Args:
@@ -312,15 +310,12 @@ def _format_data(
     @abstractmethod
     def _run_curve_fit(
         self,
-        curve_data: CurveData,
-        models: List[lmfit.Model],
+        curve_data: ScatterTable,
     ) -> CurveFitResult:
         """Perform curve fitting on given data collection and fit models.
 
         Args:
             curve_data: Formatted data to fit.
-            models: A list of LMFIT models that are used to build a cost function
-                for the LMFIT minimizer.
 
         Returns:
             The best fitting outcome with minimum reduced chi-squared value.
@@ -387,42 +382,53 @@ def _create_analysis_results(
 
         return outcomes
 
+    # pylint: disable=unused-argument
     def _create_curve_data(
         self,
-        curve_data: CurveData,
-        models: List[lmfit.Model],
+        curve_data: ScatterTable,
         **metadata,
     ) -> List[AnalysisResultData]:
         """Create analysis results for raw curve data.
 
         Args:
             curve_data: Formatted data that is used for the fitting.
-            models: A list of LMFIT models that provides model names
-                to extract subsets of experiment data.
 
         Returns:
             List of analysis result data.
         """
         samples = []
 
-        for model in models:
-            sub_data = curve_data.get_subset_of(model._name)
+        for model_name, sub_data in list(curve_data.groupby("model_name")):
             raw_datum = AnalysisResultData(
                 name=DATA_ENTRY_PREFIX + self.__class__.__name__,
                 value={
-                    "xdata": sub_data.x,
-                    "ydata": sub_data.y,
-                    "sigma": sub_data.y_err,
+                    "xdata": sub_data.xval.to_numpy(),
+                    "ydata": sub_data.yval.to_numpy(),
+                    "sigma": sub_data.yerr.to_numpy(),
                 },
                 extra={
-                    "name": model._name,
+                    "name": model_name,
                     **metadata,
                 },
             )
             samples.append(raw_datum)
 
         return samples
 
+    def _create_figures(
+        self,
+        curve_data: ScatterTable,
+    ) -> List["matplotlib.figure.Figure"]:
+        """Create a list of figures from the curve data.
+
+        Args:
+            curve_data: Scatter data table containing all data points.
+
+        Returns:
+            A list of figures.
+        """
+        return []
+
     def _initialize(
         self,
         experiment_data: ExperimentData,