diff --git a/docs/tutorials/calibrations.rst b/docs/tutorials/calibrations.rst index b1557a0c0e..de1cff5e6a 100644 --- a/docs/tutorials/calibrations.rst +++ b/docs/tutorials/calibrations.rst @@ -34,10 +34,6 @@ This automatic updating can also be disabled using the ``auto_update`` flag. This tutorial requires the :mod:`qiskit_dynamics` package to run simulations. You can install it with ``python -m pip install qiskit-dynamics``. -.. note:: - This tutorial requires the ``pandas`` package to visualize calibration tables. - You can install it with ``python -m pip install pandas``. - .. jupyter-execute:: import pandas as pd diff --git a/docs/tutorials/curve_analysis.rst b/docs/tutorials/curve_analysis.rst index 0f9abe6295..0bcabcb43d 100644 --- a/docs/tutorials/curve_analysis.rst +++ b/docs/tutorials/curve_analysis.rst @@ -266,6 +266,11 @@ every logic defined in ``AnalysisA``. Curve Analysis workflow ----------------------- +.. warning:: + + :class:`CurveData` dataclass is replaced with :class:`.ScatterTable` dataframe. + This class will be deprecated and removed in the future release. + Typically curve analysis performs fitting as follows. This workflow is defined in the method :meth:`CurveAnalysis._run_analysis`. diff --git a/qiskit_experiments/curve_analysis/__init__.py b/qiskit_experiments/curve_analysis/__init__.py index 2a3b77e7a0..2db6044fff 100644 --- a/qiskit_experiments/curve_analysis/__init__.py +++ b/qiskit_experiments/curve_analysis/__init__.py @@ -117,6 +117,7 @@ from .base_curve_analysis import BaseCurveAnalysis from .curve_analysis import CurveAnalysis from .composite_curve_analysis import CompositeCurveAnalysis +from .scatter_table import ScatterTable from .curve_data import ( CurveData, CurveFitResult, diff --git a/qiskit_experiments/curve_analysis/base_curve_analysis.py b/qiskit_experiments/curve_analysis/base_curve_analysis.py index 408aff89a4..1fed4abdba 100644 --- a/qiskit_experiments/curve_analysis/base_curve_analysis.py +++ b/qiskit_experiments/curve_analysis/base_curve_analysis.py @@ -38,7 +38,8 @@ MplDrawer, ) -from .curve_data import CurveData, CurveFitResult, ParameterRepr +from .curve_data import CurveFitResult, ParameterRepr +from .scatter_table import ScatterTable PARAMS_ENTRY_PREFIX = "@Parameters_" DATA_ENTRY_PREFIX = "@Data_" @@ -99,11 +100,16 @@ class BaseCurveAnalysis(BaseAnalysis, ABC): .. rubric:: _create_curve_data - This method to creates analysis results for the formatted dataset, i.e. data used for the fitting. + This method creates analysis results for the formatted dataset, i.e. data used for the fitting. Entries are created when the analysis option ``return_data_points`` is ``True``. If analysis consists of multiple series, analysis result is created for each curve data in the series definitions. + .. rubric:: _create_figures + + This method creates figures by consuming the scatter table data. + Figures are created when the analysis option ``plot`` is ``True``. + .. rubric:: _initialize This method initializes analysis options against input experiment data. @@ -277,29 +283,21 @@ def set_options(self, **fields): def _run_data_processing( self, raw_data: List[Dict], - models: List[lmfit.Model], - ) -> CurveData: + ) -> ScatterTable: """Perform data processing from the experiment result payload. Args: raw_data: Payload in the experiment data. - models: A list of LMFIT models that provide the model name and - optionally data sorting keys. Returns: Processed data that will be sent to the formatter method. - - Raises: - DataProcessorError: When model is multi-objective function but - data sorting option is not provided. - DataProcessorError: When key for x values is not found in the metadata. """ @abstractmethod def _format_data( self, - curve_data: CurveData, - ) -> CurveData: + curve_data: ScatterTable, + ) -> ScatterTable: """Postprocessing for the processed dataset. Args: @@ -312,15 +310,12 @@ def _format_data( @abstractmethod def _run_curve_fit( self, - curve_data: CurveData, - models: List[lmfit.Model], + curve_data: ScatterTable, ) -> CurveFitResult: """Perform curve fitting on given data collection and fit models. Args: curve_data: Formatted data to fit. - models: A list of LMFIT models that are used to build a cost function - for the LMFIT minimizer. Returns: The best fitting outcome with minimum reduced chi-squared value. @@ -387,35 +382,32 @@ def _create_analysis_results( return outcomes + # pylint: disable=unused-argument def _create_curve_data( self, - curve_data: CurveData, - models: List[lmfit.Model], + curve_data: ScatterTable, **metadata, ) -> List[AnalysisResultData]: """Create analysis results for raw curve data. Args: curve_data: Formatted data that is used for the fitting. - models: A list of LMFIT models that provides model names - to extract subsets of experiment data. Returns: List of analysis result data. """ samples = [] - for model in models: - sub_data = curve_data.get_subset_of(model._name) + for model_name, sub_data in list(curve_data.groupby("model_name")): raw_datum = AnalysisResultData( name=DATA_ENTRY_PREFIX + self.__class__.__name__, value={ - "xdata": sub_data.x, - "ydata": sub_data.y, - "sigma": sub_data.y_err, + "xdata": sub_data.xval.to_numpy(), + "ydata": sub_data.yval.to_numpy(), + "sigma": sub_data.yerr.to_numpy(), }, extra={ - "name": model._name, + "name": model_name, **metadata, }, ) @@ -423,6 +415,20 @@ def _create_curve_data( return samples + def _create_figures( + self, + curve_data: ScatterTable, + ) -> List["matplotlib.figure.Figure"]: + """Create a list of figures from the curve data. + + Args: + curve_data: Scatter data table containing all data points. + + Returns: + A list of figures. + """ + return [] + def _initialize( self, experiment_data: ExperimentData, diff --git a/qiskit_experiments/curve_analysis/composite_curve_analysis.py b/qiskit_experiments/curve_analysis/composite_curve_analysis.py index 093bc5f791..de3898e316 100644 --- a/qiskit_experiments/curve_analysis/composite_curve_analysis.py +++ b/qiskit_experiments/curve_analysis/composite_curve_analysis.py @@ -19,6 +19,7 @@ import lmfit import numpy as np +import pandas as pd from uncertainties import unumpy as unp from qiskit.utils.deprecation import deprecate_func @@ -39,6 +40,7 @@ from .base_curve_analysis import PARAMS_ENTRY_PREFIX, BaseCurveAnalysis from .curve_data import CurveFitResult +from .scatter_table import ScatterTable from .utils import eval_with_uncertainties @@ -101,6 +103,11 @@ class CompositeCurveAnalysis(BaseAnalysis): This method is passed all the group fit outcomes and can return a list of new values to be stored in the analysis results. + .. rubric:: _create_figures + + This method creates figures by consuming the scatter table data. + Figures are created when the analysis option ``plot`` is ``True``. + """ def __init__( @@ -210,6 +217,52 @@ def _create_analysis_results( """ return [] + def _create_figures( + self, + curve_data: ScatterTable, + ) -> List["matplotlib.figure.Figure"]: + """Create a list of figures from the curve data. + + Args: + curve_data: Scatter data table containing all data points. + + Returns: + A list of figures. + """ + for analysis in self.analyses(): + sub_data = curve_data[curve_data.model_name.str.endswith(f"_{analysis.name}")] + for model_id, data in list(sub_data.groupby("model_id")): + model_name = analysis._models[model_id]._name + # Plot raw data scatters + if analysis.options.plot_raw_data: + raw_data = data.filter(like="processed", axis="index") + self.plotter.set_series_data( + series_name=model_name, + x=raw_data.xval.to_numpy(), + y=raw_data.yval.to_numpy(), + ) + # Plot formatted data scatters + formatted_data = data.filter(like="formatted", axis="index") + self.plotter.set_series_data( + series_name=model_name, + x_formatted=formatted_data.xval.to_numpy(), + y_formatted=formatted_data.yval.to_numpy(), + y_formatted_err=formatted_data.yerr.to_numpy(), + ) + # Plot fit lines + line_data = data.filter(like="fitted", axis="index") + if len(line_data) == 0: + continue + fit_stdev = line_data.yerr.to_numpy() + self.plotter.set_series_data( + series_name=model_name, + x_interp=line_data.xval.to_numpy(), + y_interp=line_data.yval.to_numpy(), + y_interp_err=fit_stdev if np.isfinite(fit_stdev).all() else None, + ) + + return [self.plotter.figure()] + @classmethod def _default_options(cls) -> Options: """Default analysis options. @@ -280,54 +333,31 @@ def _run_analysis( ) -> Tuple[List[AnalysisResultData], List["matplotlib.figure.Figure"]]: analysis_results = [] + figures = [] fit_dataset = {} - red_chi = {} + curve_data_set = [] for analysis in self._analyses: analysis._initialize(experiment_data) + analysis.set_options(plot=False) metadata = analysis.options.extra.copy() metadata["group"] = analysis.name - processed_data = analysis._run_data_processing( - raw_data=experiment_data.data(), - models=analysis.models, - ) - - if self.options.plot and analysis.options.plot_raw_data: - for model in analysis.models: - sub_data = processed_data.get_subset_of(model._name) - self.plotter.set_series_data( - model._name + f"_{analysis.name}", - x=sub_data.x, - y=sub_data.y, - ) - - # Format data - formatted_data = analysis._format_data(processed_data) - if self.options.plot: - for model in analysis.models: - sub_data = formatted_data.get_subset_of(model._name) - self.plotter.set_series_data( - model._name + f"_{analysis.name}", - x_formatted=sub_data.x, - y_formatted=sub_data.y, - y_formatted_err=sub_data.y_err, - ) - - # Run fitting - fit_data = analysis._run_curve_fit( - curve_data=formatted_data, - models=analysis.models, + curve_data = analysis._format_data( + analysis._run_data_processing(experiment_data.data()) ) + fit_data = analysis._run_curve_fit(curve_data.filter(like="formatted", axis="index")) + fit_dataset[analysis.name] = fit_data if fit_data.success: quality = analysis._evaluate_quality(fit_data) - red_chi[analysis.name] = fit_data.reduced_chisq else: quality = "bad" if self.options.return_fit_parameters: + # Store fit status overview entry regardless of success. + # This is sometime useful when debugging the fitting code. overview = AnalysisResultData( name=PARAMS_ENTRY_PREFIX + analysis.name, value=fit_data, @@ -337,65 +367,73 @@ def _run_analysis( analysis_results.append(overview) if fit_data.success: - # Add extra analysis results + # Add fit data to curve data table + fit_curves = [] + formatted = curve_data.filter(like="formatted", axis="index") + columns = list(curve_data.columns) + for i, sub_data in list(formatted.groupby("model_id")): + name = analysis._models[i]._name + xval = sub_data.xval.to_numpy() + if len(xval) == 0: + # If data is empty, skip drawing this model. + # This is the case when fit model exist but no data to fit is provided. + continue + # Compute X, Y values with fit parameters. + xval_fit = np.linspace(np.min(xval), np.max(xval), num=100) + yval_fit = eval_with_uncertainties( + x=xval_fit, + model=analysis.models[i], + params=fit_data.ufloat_params, + ) + model_fit = np.full((100, len(columns)), np.nan, dtype=object) + fit_curves.append(model_fit) + model_fit[:, columns.index("xval")] = xval_fit + model_fit[:, columns.index("yval")] = unp.nominal_values(yval_fit) + if fit_data.covar is not None: + model_fit[:, columns.index("yerr")] = unp.std_devs(yval_fit) + model_fit[:, columns.index("model_name")] = name + model_fit[:, columns.index("model_id")] = i + curve_data = curve_data.append_list_values( + other=np.vstack(fit_curves), + prefix="fitted", + ) analysis_results.extend( analysis._create_analysis_results( - fit_data=fit_data, quality=quality, **metadata.copy() + fit_data=fit_data, + quality=quality, + **metadata.copy(), ) ) - # Draw fit result - if self.options.plot: - x_interp = np.linspace( - np.min(formatted_data.x), np.max(formatted_data.x), num=100 - ) - for model in analysis.models: - y_data_with_uncertainty = eval_with_uncertainties( - x=x_interp, - model=model, - params=fit_data.ufloat_params, - ) - y_interp = unp.nominal_values(y_data_with_uncertainty) - # Add fit line data - self.plotter.set_series_data( - model._name + f"_{analysis.name}", - x_interp=x_interp, - y_interp=y_interp, - ) - if fit_data.covar is not None: - # Add confidence interval data - y_interp_err = unp.std_devs(y_data_with_uncertainty) - if np.isfinite(y_interp_err).all(): - self.plotter.set_series_data( - model._name + f"_{analysis.name}", - y_interp_err=y_interp_err, - ) - - # Add raw data points if self.options.return_data_points: + # Add raw data points analysis_results.extend( analysis._create_curve_data( - curve_data=formatted_data, - models=analysis.models, + curve_data=curve_data.filter(like="formatted", axis="index"), **metadata, ) ) - fit_dataset[analysis.name] = fit_data + curve_data.model_name += f"_{analysis.name}" + curve_data_set.append(curve_data) + combined_curve_data = pd.concat(curve_data_set) total_quality = self._evaluate_quality(fit_dataset) - if red_chi: - self.plotter.set_supplementary_data(fit_red_chi=red_chi) # Create analysis results by combining all fit data if all(fit_data.success for fit_data in fit_dataset.values()): - primary_results = self._create_analysis_results( + composite_results = self._create_analysis_results( fit_data=fit_dataset, quality=total_quality, **self.options.extra.copy() ) - analysis_results.extend(primary_results) - self.plotter.set_supplementary_data(primary_results=primary_results) + analysis_results.extend(composite_results) + else: + composite_results = [] if self.options.plot: - return analysis_results, [self.plotter.figure()] + self.plotter.set_supplementary_data( + fit_red_chi={k: v.reduced_chisq for k, v in fit_dataset.items() if v.success}, + primary_results=composite_results, + ) + figures.extend(self._create_figures(curve_data=combined_curve_data)) - return analysis_results, [] + return analysis_results, figures diff --git a/qiskit_experiments/curve_analysis/curve_analysis.py b/qiskit_experiments/curve_analysis/curve_analysis.py index fd1a5ff413..4ddd06cdec 100644 --- a/qiskit_experiments/curve_analysis/curve_analysis.py +++ b/qiskit_experiments/curve_analysis/curve_analysis.py @@ -16,6 +16,9 @@ # pylint: disable=invalid-name from typing import Dict, List, Tuple, Union, Optional +from functools import partial +from itertools import groupby +from operator import itemgetter import lmfit import numpy as np @@ -25,8 +28,15 @@ from qiskit_experiments.data_processing.exceptions import DataProcessorError from .base_curve_analysis import BaseCurveAnalysis, PARAMS_ENTRY_PREFIX -from .curve_data import CurveData, FitOptions, CurveFitResult -from .utils import eval_with_uncertainties, convert_lmfit_result, multi_mean_xy_data, data_sort +from .curve_data import FitOptions, CurveFitResult +from .scatter_table import ScatterTable +from .utils import ( + eval_with_uncertainties, + convert_lmfit_result, + shot_weighted_average, + inverse_weighted_variance, + sample_average, +) class CurveAnalysis(BaseCurveAnalysis): @@ -84,6 +94,11 @@ class CurveAnalysis(BaseCurveAnalysis): If analysis consists of multiple series, an analysis result is created for each series definition. + .. rubric:: _create_figures + + This method creates figures by consuming the scatter table data. + Figures are created when the analysis option ``plot`` is ``True``. + .. rubric:: _initialize This method initializes analysis options against input experiment data. @@ -135,122 +150,168 @@ def models(self) -> List[lmfit.Model]: def _run_data_processing( self, raw_data: List[Dict], - models: List[lmfit.Model], - ) -> CurveData: + ) -> ScatterTable: """Perform data processing from the experiment result payload. Args: raw_data: Payload in the experiment data. - models: A list of LMFIT models that provide the model name and - optionally data sorting keys. Returns: Processed data that will be sent to the formatter method. Raises: - DataProcessorError: When model is a multi-objective function but - data sorting option is not provided. DataProcessorError: When key for x values is not found in the metadata. + DataProcessorError: When data_subfit_map information for a fit model is missing. + ValueError: When input data has incomplete metadata to specify fit model. """ + opt = self.options - def _matched(metadata, **filters): - try: - return all(metadata[key] == val for key, val in filters.items()) - except KeyError: - return False - - if not self.options.filter_data: - analyzed_data = raw_data + # Create table + if opt.filter_data: + to_process = [d for d in raw_data if opt.filter_data.items() <= d["metadata"].items()] else: - analyzed_data = [ - d for d in raw_data if _matched(d["metadata"], **self.options.filter_data) + to_process = raw_data + + # This must align with ScatterTable columns. Use struct array. + dtypes = np.dtype( + [ + ("xval", float), + ("yval", float), + ("yerr", float), + ("model_name", "U30"), # builtin str is U0 which is zero-length unicode in numpy + ("model_id", int), + ("shots", int), ] - - x_key = self.options.x_key - - try: - xdata = np.asarray([datum["metadata"][x_key] for datum in analyzed_data], dtype=float) - except KeyError as ex: - raise DataProcessorError( - f"X value key {x_key} is not defined in circuit metadata." - ) from ex - - ydata = self.options.data_processor(analyzed_data) - shots = np.asarray([datum.get("shots", np.nan) for datum in analyzed_data]) - - if len(models) == 1: - # all data belongs to the single model - data_allocation = np.full(xdata.size, 0, dtype=int) - else: - data_allocation = np.full(xdata.size, -1, dtype=int) - for idx, sub_model in enumerate(models): - try: - tags = self.options.data_subfit_map[sub_model._name] - except KeyError as ex: - raise DataProcessorError( - f"Data sort options for model {sub_model._name} is not defined. " - "Please provide the 'data_subfit_map' analysis option for this model." - ) from ex - if tags is None: - continue - matched_inds = np.asarray( - [_matched(d["metadata"], **tags) for d in analyzed_data], dtype=bool + ) + table_data = np.empty(len(to_process), dtype=dtypes) + for idx, datum in enumerate(to_process): + metadata = datum["metadata"].copy() + # Get xval from metadata + try: + xval = metadata.pop(opt.x_key) + except KeyError as ex: + raise DataProcessorError( + f"X value key {opt.x_key} is not defined in the circuit metadata." + ) from ex + # Classify fit model + if len(self._models) == 1: + m_id = 0 + m_name = self._models[0]._name + else: + for i, model in enumerate(self._models): + try: + model_spec = self.options.data_subfit_map[model._name] + except KeyError as ex: + raise DataProcessorError( + f"Mapping to data for the fit model {model._name} is not provided." + ) from ex + if model_spec.items() <= metadata.items(): + m_id = i + m_name = model._name + break + else: + raise ValueError(f"Experiment data {datum} doesn't belong to any fit model.") + table_data[idx]["xval"] = xval + table_data[idx]["shots"] = datum.get("shots", -1) + table_data[idx]["model_id"] = m_id + table_data[idx]["model_name"] = m_name + + # Add extra metadata + add_key = metadata.keys() - table_data.dtype.fields + if add_key: + # Add missing keys to struct array + # This code is lengthy but faster than merge_arrays function + add_dtypes = [] + for k in add_key: + if isinstance(metadata[k], str): + new_dtype = "U30" + else: + new_dtype = type(metadata[k]) + add_dtypes.append((k, new_dtype)) + new_table_data = np.empty( + len(to_process), dtype=sum((table_data.dtype.descr, add_dtypes), []) ) - data_allocation[matched_inds] = idx - - return CurveData( - x=xdata, - y=unp.nominal_values(ydata), - y_err=unp.std_devs(ydata), - shots=shots, - data_allocation=data_allocation, - labels=[sub_model._name for sub_model in models], + for k in table_data.dtype.fields: + new_table_data[k] = table_data[k] + table_data = new_table_data + for k, v in metadata.items(): + table_data[idx][k] = v + + # Compute y value + if not self.options.data_processor: + raise ValueError( + f"Data processor is not set for the {self.__class__.__name__} instance. " + "Initialize the instance with the experiment data, or set the " + "data_processor analysis options." + ) + processed_values = self.options.data_processor(to_process) + table_data["yval"] = unp.nominal_values(processed_values).flatten() + table_data["yerr"] = unp.std_devs(processed_values).flatten() + + out = ScatterTable( + data=table_data, + index=[f"processed-{i:04d}" for i in range(len(to_process))], ) + return out def _format_data( self, - curve_data: CurveData, - ) -> CurveData: + curve_data: ScatterTable, + ) -> ScatterTable: """Postprocessing for the processed dataset. Args: curve_data: Processed dataset created from experiment results. Returns: - Formatted data. + New scatter table instance including raw and formatted data. """ - # take average over the same x value by keeping sigma - data_allocation, xdata, ydata, sigma, shots = multi_mean_xy_data( - series=curve_data.data_allocation, - xdata=curve_data.x, - ydata=curve_data.y, - sigma=curve_data.y_err, - shots=curve_data.shots, - method=self.options.average_method, - ) - - # sort by x value in ascending order - data_allocation, xdata, ydata, sigma, shots = data_sort( - series=data_allocation, - xdata=xdata, - ydata=ydata, - sigma=sigma, - shots=shots, + averaging_methods = { + "shots_weighted": shot_weighted_average, + "iwv": inverse_weighted_variance, + "sample": sample_average, + } + + columns = list(curve_data.columns) + sort_by = itemgetter( + columns.index("model_id"), + columns.index("xval"), ) - - return CurveData( - x=xdata, - y=ydata, - y_err=sigma, - shots=shots, - data_allocation=data_allocation, - labels=curve_data.labels, + # Use python native groupby method on ndarray. This is more performant than pandas one. + average = averaging_methods[self.options.average_method] + formatted = [] + for (mid, xv), g in groupby(sorted(curve_data.values, key=sort_by), key=sort_by): + g_values = np.array(list(g)) + g_dict = dict(zip(columns, g_values.T)) + avg_yval, avg_yerr, shots = average(g_dict["yval"], g_dict["yerr"], g_dict["shots"]) + averaged = dict.fromkeys(columns) + averaged["xval"] = xv + averaged["yval"] = avg_yval + averaged["yerr"] = avg_yerr + averaged["model_id"] = mid + averaged["shots"] = shots + for k, v in g_dict.items(): + if averaged[k] is not None: + continue + if len(g_values) == 1: + averaged[k] = v[0] + else: + unique = set(v) + if len(unique) == 1: + averaged[k] = next(iter(unique)) + else: + averaged[k] = list(unique) + formatted.append(list(averaged.values())) + + return curve_data.append_list_values( + other=formatted, + prefix="formatted", ) def _generate_fit_guesses( self, user_opt: FitOptions, - curve_data: CurveData, # pylint: disable=unused-argument + curve_data: ScatterTable, # pylint: disable=unused-argument ) -> Union[FitOptions, List[FitOptions]]: """Create algorithmic initial fit guess from analysis options and curve data. @@ -265,21 +326,18 @@ def _generate_fit_guesses( def _run_curve_fit( self, - curve_data: CurveData, - models: List[lmfit.Model], + curve_data: ScatterTable, ) -> CurveFitResult: """Perform curve fitting on given data collection and fit models. Args: curve_data: Formatted data to fit. - models: A list of LMFIT models that are used to build a cost function - for the LMFIT minimizer. Returns: The best fitting outcome with minimum reduced chi-squared value. """ unite_parameter_names = [] - for model in models: + for model in self._models: # Seems like this is not efficient looping, but using set operation sometimes # yields bad fit. Not sure if this is an edge case, but # `TestRamseyXY` unittest failed due to the significant chisq value @@ -314,36 +372,32 @@ def _run_curve_fit( if isinstance(fit_options, FitOptions): fit_options = [fit_options] - valid_uncertainty = np.all(np.isfinite(curve_data.y_err)) - - model_weights = {} - if valid_uncertainty: - for model in models: - sub_yerr = curve_data.get_subset_of(model._name).y_err - if len(sub_yerr) == 0: - continue - nonzero_yerr = np.where(np.isclose(sub_yerr, 0.0), np.finfo(float).eps, sub_yerr) + # Create convenient function to compute residual of the models. + partial_residuals = [] + valid_uncertainty = np.all(np.isfinite(curve_data.yerr.to_numpy())) + for i, sub_data in list(curve_data.groupby("model_id")): + if valid_uncertainty: + nonzero_yerr = np.where( + np.isclose(sub_data.yerr, 0.0), + np.finfo(float).eps, + sub_data.yerr, + ) raw_weights = 1 / nonzero_yerr # Remove outlier. When all sample values are the same with sample average, # or sampling error is zero with shot-weighted average, # some yerr values might be very close to zero, yielding significant weights. # With such outlier, the fit doesn't sense residual of other data points. maximum_weight = np.percentile(raw_weights, 90) - model_weights[model._name] = np.clip(raw_weights, 0.0, maximum_weight) - - # Objective function for minimize. This computes composite residuals of sub models. - def _objective(_params): - ys = [] - for model in models: - sub_data = curve_data.get_subset_of(model._name) - yi = model._residual( - params=_params, - data=sub_data.y, - weights=model_weights.get(model._name, None), - x=sub_data.x, - ) - ys.append(yi) - return np.concatenate(ys) + weights = np.clip(raw_weights, 0.0, maximum_weight) + else: + weights = None + model_residual = partial( + self._models[i]._residual, + data=sub_data.yval.to_numpy(), + weights=weights, + x=sub_data.xval.to_numpy(), + ) + partial_residuals.append(model_residual) # Run fit for each configuration res = None @@ -363,7 +417,7 @@ def _objective(_params): try: with np.errstate(all="ignore"): new = lmfit.minimize( - fcn=_objective, + fcn=lambda x: np.concatenate([p(x) for p in partial_residuals]), params=guess_params, method=self.options.fit_method, scale_covar=not valid_uncertainty, @@ -380,53 +434,76 @@ def _objective(_params): if new.success and res.redchi > new.redchi: res = new - return convert_lmfit_result(res, models, curve_data.x, curve_data.y) - - def _run_analysis( - self, experiment_data: ExperimentData - ) -> Tuple[List[AnalysisResultData], List["pyplot.Figure"]]: - - # Prepare for fitting - self._initialize(experiment_data) + return convert_lmfit_result( + res, + self._models, + curve_data.xval.to_numpy(), + curve_data.yval.to_numpy(), + ) - analysis_results = [] + def _create_figures( + self, + curve_data: ScatterTable, + ) -> List["matplotlib.figure.Figure"]: + """Create a list of figures from the curve data. - # Run data processing - processed_data = self._run_data_processing( - raw_data=experiment_data.data(), - models=self._models, - ) + Args: + curve_data: Scatter data table containing all data points. - if self.options.plot and self.options.plot_raw_data: - for model in self._models: - sub_data = processed_data.get_subset_of(model._name) + Returns: + A list of figures. + """ + for model_id, data in list(curve_data.groupby("model_id")): + model_name = self._models[model_id]._name + # Plot raw data scatters + if self.options.plot_raw_data: + raw_data = data.filter(like="processed", axis="index") self.plotter.set_series_data( - model._name, - x=sub_data.x, - y=sub_data.y, + series_name=model_name, + x=raw_data.xval.to_numpy(), + y=raw_data.yval.to_numpy(), ) - - # Format data - formatted_data = self._format_data(processed_data) - if self.options.plot: - for model in self._models: - sub_data = formatted_data.get_subset_of(model._name) + # Plot formatted data scatters + formatted_data = data.filter(like="formatted", axis="index") + self.plotter.set_series_data( + series_name=model_name, + x_formatted=formatted_data.xval.to_numpy(), + y_formatted=formatted_data.yval.to_numpy(), + y_formatted_err=formatted_data.yerr.to_numpy(), + ) + # Plot fit lines + line_data = data.filter(like="fitted", axis="index") + if len(line_data) == 0: + continue + self.plotter.set_series_data( + series_name=model_name, + x_interp=line_data.xval.to_numpy(), + y_interp=line_data.yval.to_numpy(), + ) + fit_stdev = line_data.yerr.to_numpy() + if np.isfinite(fit_stdev).all(): self.plotter.set_series_data( - model._name, - x_formatted=sub_data.x, - y_formatted=sub_data.y, - y_formatted_err=sub_data.y_err, + series_name=model_name, + y_interp_err=fit_stdev, ) - # Run fitting - fit_data = self._run_curve_fit( - curve_data=formatted_data, - models=self._models, - ) + return [self.plotter.figure()] + + def _run_analysis( + self, + experiment_data: ExperimentData, + ) -> Tuple[List[AnalysisResultData], List["pyplot.Figure"]]: + analysis_results = [] + figures = [] + + # Prepare for fitting + self._initialize(experiment_data) + + curve_data = self._format_data(self._run_data_processing(experiment_data.data())) + fit_data = self._run_curve_fit(curve_data.filter(like="formatted", axis="index")) if fit_data.success: quality = self._evaluate_quality(fit_data) - self.plotter.set_supplementary_data(fit_red_chi=fit_data.reduced_chisq) else: quality = "bad" @@ -441,58 +518,62 @@ def _run_analysis( ) analysis_results.append(overview) - # Create figure and result data if fit_data.success: - # Create analysis results - primary_results = self._create_analysis_results( - fit_data=fit_data, quality=quality, **self.options.extra.copy() + # Add fit data to curve data table + fit_curves = [] + formatted = curve_data.filter(like="formatted", axis="index") + columns = list(curve_data.columns) + for i, sub_data in list(formatted.groupby("model_id")): + name = self._models[i]._name + xval = sub_data.xval.to_numpy() + if len(xval) == 0: + # If data is empty, skip drawing this model. + # This is the case when fit model exist but no data to fit is provided. + continue + # Compute X, Y values with fit parameters. + xval_fit = np.linspace(np.min(xval), np.max(xval), num=100, dtype=float) + yval_fit = eval_with_uncertainties( + x=xval_fit, + model=self._models[i], + params=fit_data.ufloat_params, + ) + model_fit = np.full((100, len(columns)), None, dtype=object) + fit_curves.append(model_fit) + model_fit[:, columns.index("xval")] = xval_fit + model_fit[:, columns.index("yval")] = unp.nominal_values(yval_fit) + if fit_data.covar is not None: + model_fit[:, columns.index("yerr")] = unp.std_devs(yval_fit) + model_fit[:, columns.index("model_name")] = name + model_fit[:, columns.index("model_id")] = i + curve_data = curve_data.append_list_values( + other=np.vstack(fit_curves), + prefix="fitted", ) - analysis_results.extend(primary_results) - self.plotter.set_supplementary_data(primary_results=primary_results) - - # Draw fit curves and report - if self.options.plot: - for model in self._models: - sub_data = formatted_data.get_subset_of(model._name) - if sub_data.x.size == 0: - # If data is empty, skip drawing this model. - # This is the case when fit model exist but no data to fit is provided. - # For example, experiment may omit experimenting with some setting. - continue - x_interp = np.linspace(np.min(sub_data.x), np.max(sub_data.x), num=100) - - y_data_with_uncertainty = eval_with_uncertainties( - x=x_interp, - model=model, - params=fit_data.ufloat_params, - ) - y_interp = unp.nominal_values(y_data_with_uncertainty) - # Add fit line data - self.plotter.set_series_data( - model._name, - x_interp=x_interp, - y_interp=y_interp, - ) - if fit_data.covar is not None: - # Add confidence interval data - y_interp_err = unp.std_devs(y_data_with_uncertainty) - if np.isfinite(y_interp_err).all(): - self.plotter.set_series_data( - model._name, - y_interp_err=y_interp_err, - ) - - # Add raw data points + analysis_results.extend( + self._create_analysis_results( + fit_data=fit_data, + quality=quality, + **self.options.extra.copy(), + ) + ) + if self.options.return_data_points: + # Add raw data points analysis_results.extend( - self._create_curve_data(curve_data=formatted_data, models=self._models) + self._create_curve_data( + curve_data=curve_data.filter(like="formatted", axis="index"), + ) ) - # Finalize plot if self.options.plot: - return analysis_results, [self.plotter.figure()] + if fit_data.success: + self.plotter.set_supplementary_data( + fit_red_chi=fit_data.reduced_chisq, + primary_results=[r for r in analysis_results if not r.name.startswith("@")], + ) + figures.extend(self._create_figures(curve_data=curve_data)) - return analysis_results, [] + return analysis_results, figures def __getstate__(self): state = self.__dict__.copy() diff --git a/qiskit_experiments/curve_analysis/curve_data.py b/qiskit_experiments/curve_analysis/curve_data.py index c344956895..62214e9d9b 100644 --- a/qiskit_experiments/curve_analysis/curve_data.py +++ b/qiskit_experiments/curve_analysis/curve_data.py @@ -112,6 +112,15 @@ class CurveData: data_allocation: np.ndarray labels: List[str] + @deprecate_func( + since="0.6", + additional_msg="CurveData is replaced with 'ScatterTable' with dataframe representation.", + removal_timeline="after 0.7", + package_name="qiskit-experiments", + ) + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + def get_subset_of(self, index: Union[str, int]) -> "CurveData": """Filter data by series name or index. diff --git a/qiskit_experiments/curve_analysis/scatter_table.py b/qiskit_experiments/curve_analysis/scatter_table.py new file mode 100644 index 0000000000..7d16cedd44 --- /dev/null +++ b/qiskit_experiments/curve_analysis/scatter_table.py @@ -0,0 +1,198 @@ +# This code is part of Qiskit. +# +# (C) Copyright IBM 2023. +# +# This code is licensed under the Apache License, Version 2.0. You may +# obtain a copy of this license in the LICENSE.txt file in the root directory +# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0. +# +# Any modifications or derivative works of this code must retain this +# copyright notice, and modified files need to carry a notice indicating +# that they have been altered from the originals. +"""Table representation of the x, y data for curve fitting.""" + +import logging +from typing import List, Sequence, Dict, Any, Union + +import numpy as np +import pandas as pd + +from qiskit.utils import deprecate_func + +from qiskit_experiments.framework.table_mixin import DefaultColumnsMixIn + + +LOG = logging.getLogger(__name__) + + +class ScatterTable(pd.DataFrame, DefaultColumnsMixIn): + """A table to store x and y data with metadata associated with the data point. + + This class is implemented upon the pandas dataframe. + See `pandas dataframe documentation `_ + for the base class API documentation. + + A single ``ScatterTable`` object can contain different kind of intermediate data + generated through the curve fitting, which are categorized by the fit model. + When an experiment has sub-data for ``model_abc``, the formatted x, y, and y-error + array data may be obtained from the original table object as follows: + + .. code-block::python + + formatted = table.filter(like="formatted", axis="index") + abc_data = formatted[formatted.model_name == "model_abc"] + x, y, e = abc_data.xval.to_numpy(), abc_data.yval.to_numpy(), abc_data.yerr.to_numpy() + + """ + + # TODO Add this to toctree. In current mechanism all pandas DataFrame members are rendered + # and it fails in the Sphinx build process. We may need a custom directive to + # exclude class members from an external package. + + @classmethod + def _default_columns(cls) -> List[str]: + return [ + "xval", + "yval", + "yerr", + "model_name", + "model_id", + "shots", + ] + + @deprecate_func( + since="0.6", + additional_msg="Curve data uses dataframe representation. Use dataframe filtering method.", + pending=True, + package_name="qiskit-experiments", + ) + def get_subset_of(self, index: Union[str, int]) -> "ScatterTable": + """Filter data by series name or index. + + Args: + index: Series index of name. + + Returns: + A subset of data corresponding to a particular series. + """ + if isinstance(index, int): + index = self.labels[index] + return self[self.model_name == index] + + @property + @deprecate_func( + since="0.6", + additional_msg="Curve data uses dataframe representation. Call .xval.to_numpy() instead.", + pending=True, + package_name="qiskit-experiments", + is_property=True, + ) + def x(self) -> np.ndarray: + """X values.""" + return self.xval.to_numpy() + + @property + @deprecate_func( + since="0.6", + additional_msg="Curve data uses dataframe representation. Call .yval.to_numpy() instead.", + pending=True, + package_name="qiskit-experiments", + is_property=True, + ) + def y(self) -> np.ndarray: + """Y values.""" + return self.yval.to_numpy() + + @property + @deprecate_func( + since="0.6", + additional_msg="Curve data uses dataframe representation. Call .yerr.to_numpy() instead.", + pending=True, + package_name="qiskit-experiments", + is_property=True, + ) + def y_err(self) -> np.ndarray: + """Standard deviation of y values.""" + return self.yerr.to_numpy() + + @property + @deprecate_func( + since="0.6", + additional_msg="Curve data uses dataframe representation. Call .shots.to_numpy() instead.", + pending=True, + package_name="qiskit-experiments", + is_property=True, + ) + def shots(self): + """Shot number of data points.""" + return self.shots.to_numpy() + + @property + @deprecate_func( + since="0.6", + additional_msg="Curve data uses dataframe representation. Call .model_id.to_numpy() instead.", + pending=True, + package_name="qiskit-experiments", + is_property=True, + ) + def data_allocation(self) -> np.ndarray: + """Index of corresponding fit model.""" + # pylint: disable=no-member + return self.model_id.to_numpy() + + @property + @deprecate_func( + since="0.6", + additional_msg="Curve data uses dataframe representation. Labels are a part of table.", + pending=True, + package_name="qiskit-experiments", + is_property=True, + ) + def labels(self) -> List[str]: + """List of model names.""" + # Order sensitive + name_id_tups = self.groupby(["model_name", "model_id"]).groups.keys() + return [k[0] for k in sorted(name_id_tups, key=lambda k: k[1])] + + def append_list_values( + self, + other: Sequence, + prefix: str, + ) -> "ScatterTable": + """Add another list of dataframe values to this dataframe. + + Args: + other: List of dataframe values to be added. + prefix: Prefix of row labels of the added values. + + Returns: + New scatter table instance including both self and added data. + """ + other_index = [f"{prefix}-{i:04d}" for i in range(len(other))] + return ScatterTable( + data=[*self.values, *other], + columns=self.columns, + index=[*self.index, *other_index], + ) + + def __json_encode__(self) -> Dict[str, Any]: + return { + "class": "ScatterTable", + "data": self.to_dict(orient="index"), + } + + @classmethod + def __json_decode__(cls, value: Dict[str, Any]) -> "ScatterTable": + if not value.get("class", None) == "ScatterTable": + raise ValueError("JSON decoded value for ScatterTable is not valid class type.") + + instance = cls.from_dict( + data=value.get("data", {}), + orient="index", + ).replace({np.nan: None}) + return instance + + @property + def _constructor(self): + # https://pandas.pydata.org/pandas-docs/stable/development/extending.html + return ScatterTable diff --git a/qiskit_experiments/curve_analysis/standard_analysis/bloch_trajectory.py b/qiskit_experiments/curve_analysis/standard_analysis/bloch_trajectory.py index 9e4885ba9b..a155eebe58 100644 --- a/qiskit_experiments/curve_analysis/standard_analysis/bloch_trajectory.py +++ b/qiskit_experiments/curve_analysis/standard_analysis/bloch_trajectory.py @@ -154,7 +154,7 @@ def _default_options(cls): def _generate_fit_guesses( self, user_opt: curve.FitOptions, - curve_data: curve.CurveData, + curve_data: curve.ScatterTable, ) -> Union[curve.FitOptions, List[curve.FitOptions]]: """Create algorithmic initial fit guess from analysis options and curve data. diff --git a/qiskit_experiments/curve_analysis/standard_analysis/decay.py b/qiskit_experiments/curve_analysis/standard_analysis/decay.py index 1ff1daaf58..11044afd28 100644 --- a/qiskit_experiments/curve_analysis/standard_analysis/decay.py +++ b/qiskit_experiments/curve_analysis/standard_analysis/decay.py @@ -65,7 +65,7 @@ def __init__( def _generate_fit_guesses( self, user_opt: curve.FitOptions, - curve_data: curve.CurveData, + curve_data: curve.ScatterTable, ) -> Union[curve.FitOptions, List[curve.FitOptions]]: """Create algorithmic initial fit guess from analysis options and curve data. diff --git a/qiskit_experiments/curve_analysis/standard_analysis/error_amplification_analysis.py b/qiskit_experiments/curve_analysis/standard_analysis/error_amplification_analysis.py index 116430f2d9..cc8ae54a1a 100644 --- a/qiskit_experiments/curve_analysis/standard_analysis/error_amplification_analysis.py +++ b/qiskit_experiments/curve_analysis/standard_analysis/error_amplification_analysis.py @@ -118,7 +118,7 @@ def _default_options(cls): def _generate_fit_guesses( self, user_opt: curve.FitOptions, - curve_data: curve.CurveData, + curve_data: curve.ScatterTable, ) -> Union[curve.FitOptions, List[curve.FitOptions]]: """Create algorithmic initial fit guess from analysis options and curve data. diff --git a/qiskit_experiments/curve_analysis/standard_analysis/gaussian.py b/qiskit_experiments/curve_analysis/standard_analysis/gaussian.py index 2a17f54ac0..13b22d3975 100644 --- a/qiskit_experiments/curve_analysis/standard_analysis/gaussian.py +++ b/qiskit_experiments/curve_analysis/standard_analysis/gaussian.py @@ -88,7 +88,7 @@ def _default_options(cls) -> Options: def _generate_fit_guesses( self, user_opt: curve.FitOptions, - curve_data: curve.CurveData, + curve_data: curve.ScatterTable, ) -> Union[curve.FitOptions, List[curve.FitOptions]]: """Create algorithmic initial fit guess from analysis options and curve data. diff --git a/qiskit_experiments/curve_analysis/standard_analysis/oscillation.py b/qiskit_experiments/curve_analysis/standard_analysis/oscillation.py index 461698c367..27d564cb37 100644 --- a/qiskit_experiments/curve_analysis/standard_analysis/oscillation.py +++ b/qiskit_experiments/curve_analysis/standard_analysis/oscillation.py @@ -72,7 +72,7 @@ def __init__( def _generate_fit_guesses( self, user_opt: curve.FitOptions, - curve_data: curve.CurveData, + curve_data: curve.ScatterTable, ) -> Union[curve.FitOptions, List[curve.FitOptions]]: """Create algorithmic initial fit guess from analysis options and curve data. @@ -189,7 +189,7 @@ def __init__( def _generate_fit_guesses( self, user_opt: curve.FitOptions, - curve_data: curve.CurveData, + curve_data: curve.ScatterTable, ) -> Union[curve.FitOptions, List[curve.FitOptions]]: """Create algorithmic initial fit guess from analysis options and curve data. diff --git a/qiskit_experiments/curve_analysis/standard_analysis/resonance.py b/qiskit_experiments/curve_analysis/standard_analysis/resonance.py index 558de514d8..1c6b811038 100644 --- a/qiskit_experiments/curve_analysis/standard_analysis/resonance.py +++ b/qiskit_experiments/curve_analysis/standard_analysis/resonance.py @@ -88,7 +88,7 @@ def _default_options(cls) -> Options: def _generate_fit_guesses( self, user_opt: curve.FitOptions, - curve_data: curve.CurveData, + curve_data: curve.ScatterTable, ) -> Union[curve.FitOptions, List[curve.FitOptions]]: """Create algorithmic initial fit guess from analysis options and curve data. diff --git a/qiskit_experiments/curve_analysis/utils.py b/qiskit_experiments/curve_analysis/utils.py index 84d4613584..66a8855254 100644 --- a/qiskit_experiments/curve_analysis/utils.py +++ b/qiskit_experiments/curve_analysis/utils.py @@ -18,6 +18,7 @@ import asteval import lmfit import numpy as np +from qiskit.utils.deprecation import deprecate_func from qiskit.utils import detach_prefix from uncertainties import UFloat, wrap as wrap_function from uncertainties import unumpy @@ -222,6 +223,104 @@ def eval_with_uncertainties( return wrapfunc(x=x, **sub_params) +def shot_weighted_average( + yvals: np.ndarray, + yerrs: np.ndarray, + shots: np.ndarray, +) -> Tuple[float, float, float]: + """Compute shot based variance and weighted average of the categorized data frame. + + Sample is weighted by the shot number. + + Args: + yvals: Y values to average. + yerrs: Y errors to average. + shots: Number of shots used to obtain Y value and error. + + Returns: + Averaged Y value, Y error, and total shots. + """ + if len(yvals) == 1: + return yvals[0], yerrs[0], shots[0] + + if np.any(shots < -1): + # Shot number is unknown + return np.mean(yvals), np.nan, -1 + + total_shots = np.sum(shots) + weights = shots / total_shots + + avg_yval = np.sum(weights * yvals) + avg_yerr = np.sqrt(np.sum(weights**2 * yerrs**2)) + + return avg_yval, avg_yerr, total_shots + + +def inverse_weighted_variance( + yvals: np.ndarray, + yerrs: np.ndarray, + shots: np.ndarray, +) -> Tuple[float, float, int]: + """Compute inverse weighted variance and weighted average of the categorized data frame. + + Sample is weighted by the inverse of the data variance. + + Args: + yvals: Y values to average. + yerrs: Y errors to average. + shots: Number of shots used to obtain Y value and error. + + Returns: + Averaged Y value, Y error, and total shots. + """ + if len(yvals) == 1: + return yvals[0], yerrs[0], shots[0] + + total_shots = np.sum(shots) if all(shots > 0) else -1 + weights = 1 / yerrs**2 + yvar = 1 / np.sum(weights) + + avg_yval = yvar * np.sum(weights * yvals) + avg_yerr = np.sqrt(yvar) + + return avg_yval, avg_yerr, total_shots + + +# pylint: disable=unused-argument +def sample_average( + yvals: np.ndarray, + yerrs: np.ndarray, + shots: np.ndarray, +) -> Tuple[float, float, int]: + """Compute sample based variance and average of the categorized data frame. + + Original variance of the data is ignored and variance is computed with the y values. + + Args: + yvals: Y values to average. + yerrs: Y errors to average (ignored). + shots: Number of shots used to obtain Y value and error. + + Returns: + Averaged Y value, Y error, and total shots. + """ + if len(yvals) == 1: + return yvals[0], 0.0, shots[0] + + total_shots = np.sum(shots) if all(shots > 0) else -1 + + avg_yval = np.mean(yvals) + avg_yerr = np.sqrt(np.mean((avg_yval - yvals) ** 2) / len(yvals)) + + return avg_yval, avg_yerr, total_shots + + +@deprecate_func( + since="0.6", + additional_msg="The curve data representation is replaced with dataframe format.", + package_name="qiskit-experiments", + pending=True, +) def filter_data(data: List[Dict[str, any]], **filters) -> List[Dict[str, any]]: """Return the list of filtered data @@ -249,6 +348,12 @@ def filter_data(data: List[Dict[str, any]], **filters) -> List[Dict[str, any]]: return filtered_data +@deprecate_func( + since="0.6", + additional_msg="The curve data representation is replaced with dataframe format.", + package_name="qiskit-experiments", + pending=True, +) def mean_xy_data( xdata: np.ndarray, ydata: np.ndarray, @@ -369,6 +474,12 @@ def mean_xy_data( raise QiskitError(f"Unsupported method {method}") +@deprecate_func( + since="0.6", + additional_msg="The curve data representation is replaced with dataframe format.", + package_name="qiskit-experiments", + pending=True, +) def multi_mean_xy_data( series: np.ndarray, xdata: np.ndarray, @@ -427,6 +538,12 @@ def multi_mean_xy_data( ) +@deprecate_func( + since="0.6", + additional_msg="The curve data representation is replaced with dataframe format.", + package_name="qiskit-experiments", + pending=True, +) def data_sort( series: np.ndarray, xdata: np.ndarray, diff --git a/qiskit_experiments/framework/experiment_data.py b/qiskit_experiments/framework/experiment_data.py index c83a4fa175..20fab44cc8 100644 --- a/qiskit_experiments/framework/experiment_data.py +++ b/qiskit_experiments/framework/experiment_data.py @@ -1405,7 +1405,7 @@ def add_analysis_results( tags = tags or [] backend = backend or self.backend_name - series = self._analysis_results.add_entry( + self._analysis_results.add_entry( result_id=result_id, name=name, value=value, @@ -1420,8 +1420,9 @@ def add_analysis_results( **extra_values, ) if self.auto_save: + last_index = self._analysis_results.result_ids()[-1][:8] service_result = _series_to_service_result( - series=series, + series=self._analysis_results.get_entry(last_index), service=self._service, auto_save=False, ) diff --git a/qiskit_experiments/library/characterization/analysis/drag_analysis.py b/qiskit_experiments/library/characterization/analysis/drag_analysis.py index 2aae5e1c9d..111179d081 100644 --- a/qiskit_experiments/library/characterization/analysis/drag_analysis.py +++ b/qiskit_experiments/library/characterization/analysis/drag_analysis.py @@ -104,7 +104,7 @@ def set_options(self, **fields): def _generate_fit_guesses( self, user_opt: curve.FitOptions, - curve_data: curve.CurveData, + curve_data: curve.ScatterTable, ) -> Union[curve.FitOptions, List[curve.FitOptions]]: """Create algorithmic initial fit guess from analysis options and curve data. @@ -156,8 +156,7 @@ def _generate_fit_guesses( def _run_curve_fit( self, - curve_data: curve.CurveData, - models: List[lmfit.Model], + curve_data: curve.ScatterTable, ) -> curve.CurveFitResult: r"""Perform curve fitting on given data collection and fit models. @@ -187,13 +186,11 @@ def _run_curve_fit( Args: curve_data: Formatted data to fit. - models: A list of LMFIT models that are used to build a cost function - for the LMFIT minimizer. Returns: The best fitting outcome with minimum reduced chi-squared value. """ - fit_result = super()._run_curve_fit(curve_data, models) + fit_result = super()._run_curve_fit(curve_data) if fit_result and fit_result.params is not None: beta = fit_result.params["beta"] diff --git a/qiskit_experiments/library/characterization/analysis/ramsey_xy_analysis.py b/qiskit_experiments/library/characterization/analysis/ramsey_xy_analysis.py index 12c3c34bfd..7e73afd8a6 100644 --- a/qiskit_experiments/library/characterization/analysis/ramsey_xy_analysis.py +++ b/qiskit_experiments/library/characterization/analysis/ramsey_xy_analysis.py @@ -104,7 +104,7 @@ def _default_options(cls): def _generate_fit_guesses( self, user_opt: curve.FitOptions, - curve_data: curve.CurveData, + curve_data: curve.ScatterTable, ) -> Union[curve.FitOptions, List[curve.FitOptions]]: """Create algorithmic initial fit guess from analysis options and curve data. @@ -425,7 +425,7 @@ def _default_options(cls): def _generate_fit_guesses( self, user_opt: curve.FitOptions, - curve_data: curve.CurveData, + curve_data: curve.ScatterTable, ) -> Union[curve.FitOptions, List[curve.FitOptions]]: """Create algorithmic initial fit guess from analysis options and curve data. diff --git a/qiskit_experiments/library/characterization/analysis/t1_analysis.py b/qiskit_experiments/library/characterization/analysis/t1_analysis.py index ea77e6b569..cbc0929f20 100644 --- a/qiskit_experiments/library/characterization/analysis/t1_analysis.py +++ b/qiskit_experiments/library/characterization/analysis/t1_analysis.py @@ -22,7 +22,6 @@ import qiskit_experiments.curve_analysis as curve import qiskit_experiments.data_processing as dp import qiskit_experiments.visualization as vis -from qiskit_experiments.curve_analysis.curve_data import CurveData from qiskit_experiments.data_processing.exceptions import DataProcessorError from qiskit_experiments.database_service.device_component import Qubit from qiskit_experiments.framework import BaseAnalysis, ExperimentData, AnalysisResultData, Options @@ -122,8 +121,8 @@ def _evaluate_quality(self, fit_data: curve.CurveFitResult) -> Union[str, None]: def _format_data( self, - curve_data: curve.CurveData, - ) -> curve.CurveData: + curve_data: curve.ScatterTable, + ) -> curve.ScatterTable: """Postprocessing for the processed dataset. Args: @@ -133,20 +132,10 @@ def _format_data( Formatted data. """ # check if the SVD decomposition categorized 0 as 1 by calculating the average slope - diff_y = np.diff(unp.nominal_values(curve_data.y), axis=0) + diff_y = np.diff(curve_data.yval) avg_slope = sum(diff_y) / len(diff_y) - if avg_slope[0] > 0: - new_y_data = 1 - curve_data.y - new_curve_data = CurveData( - x=curve_data.x, - y=new_y_data, - y_err=curve_data.y_err, - shots=curve_data.shots, - data_allocation=curve_data.data_allocation, - labels=curve_data.labels, - ) - - return super()._format_data(new_curve_data) + if avg_slope > 0: + curve_data.yval = 1 - curve_data.yval return super()._format_data(curve_data) diff --git a/qiskit_experiments/library/characterization/analysis/zz_ramsey_analysis.py b/qiskit_experiments/library/characterization/analysis/zz_ramsey_analysis.py index 77de28f7ef..5f5c9770bc 100644 --- a/qiskit_experiments/library/characterization/analysis/zz_ramsey_analysis.py +++ b/qiskit_experiments/library/characterization/analysis/zz_ramsey_analysis.py @@ -21,7 +21,12 @@ from qiskit.providers.options import Options import qiskit_experiments.curve_analysis as curve -from qiskit_experiments.curve_analysis import CurveAnalysis, CurveData, CurveFitResult, FitOptions +from qiskit_experiments.curve_analysis import ( + CurveAnalysis, + ScatterTable, + CurveFitResult, + FitOptions, +) from qiskit_experiments.curve_analysis.utils import is_error_not_significant @@ -121,7 +126,7 @@ def _default_options(cls) -> Options: def _generate_fit_guesses( self, user_opt: FitOptions, - curve_data: CurveData, + curve_data: ScatterTable, ) -> Union[FitOptions, List[FitOptions]]: """Compute the initial guesses. diff --git a/qiskit_experiments/library/randomized_benchmarking/interleaved_rb_analysis.py b/qiskit_experiments/library/randomized_benchmarking/interleaved_rb_analysis.py index 266ea0703b..7864b20436 100644 --- a/qiskit_experiments/library/randomized_benchmarking/interleaved_rb_analysis.py +++ b/qiskit_experiments/library/randomized_benchmarking/interleaved_rb_analysis.py @@ -120,7 +120,7 @@ def _default_options(cls): def _generate_fit_guesses( self, user_opt: curve.FitOptions, - curve_data: curve.CurveData, + curve_data: curve.ScatterTable, ) -> Union[curve.FitOptions, List[curve.FitOptions]]: """Create algorithmic initial fit guess from analysis options and curve data. diff --git a/qiskit_experiments/library/randomized_benchmarking/rb_analysis.py b/qiskit_experiments/library/randomized_benchmarking/rb_analysis.py index 28ba5c7e23..6670934c94 100644 --- a/qiskit_experiments/library/randomized_benchmarking/rb_analysis.py +++ b/qiskit_experiments/library/randomized_benchmarking/rb_analysis.py @@ -110,7 +110,7 @@ def _default_options(cls): def _generate_fit_guesses( self, user_opt: curve.FitOptions, - curve_data: curve.CurveData, + curve_data: curve.ScatterTable, ) -> Union[curve.FitOptions, List[curve.FitOptions]]: """Create algorithmic initial fit guess from analysis options and curve data. diff --git a/qiskit_experiments/test/fake_service.py b/qiskit_experiments/test/fake_service.py index 2034380842..5029193921 100644 --- a/qiskit_experiments/test/fake_service.py +++ b/qiskit_experiments/test/fake_service.py @@ -12,12 +12,12 @@ """Fake service class for tests.""" -from typing import Optional, List, Dict, Type, Any, Union, Tuple, Callable -import functools +from typing import Optional, List, Dict, Type, Any, Union, Tuple import json from datetime import datetime, timedelta import uuid +import pandas as pd from qiskit_ibm_experiment import AnalysisResultData from qiskit_experiments.test.fake_backend import FakeBackend @@ -28,41 +28,6 @@ ) -# Check if PANDAS package is installed -try: - import pandas as pd - - HAS_PANDAS = True -except ImportError: - pd = None - HAS_PANDAS = False - - -def requires_pandas(func: Callable) -> Callable: - """Function decorator for functions requiring Pandas. - - Args: - func: a function requiring Pandas. - - Returns: - The decorated function. - - Raises: - QiskitError: If Pandas is not installed. - """ - - @functools.wraps(func) - def decorated_func(*args, **kwargs): - if not HAS_PANDAS: - raise ImportError( - f"The pandas python package is required for {func}." - "You can install it with 'pip install pandas'." - ) - return func(*args, **kwargs) - - return decorated_func - - class FakeService: """ This extremely simple database is designated for testing and as a playground for developers. @@ -71,7 +36,6 @@ class FakeService: It implements most of the methods of `DatabaseService`. """ - @requires_pandas def __init__(self): self.exps = pd.DataFrame( columns=[ diff --git a/releasenotes/notes/add-dataframe-curve-data-a8905c450748b281.yaml b/releasenotes/notes/add-dataframe-curve-data-a8905c450748b281.yaml new file mode 100644 index 0000000000..7af3b7320f --- /dev/null +++ b/releasenotes/notes/add-dataframe-curve-data-a8905c450748b281.yaml @@ -0,0 +1,16 @@ +--- +features: + - | + :class:`.ScatterTable` is introduced as a drop-in replacement of :class:`.CurveData`. + + This is a data format to store intermediate data in curve analysis, built on top of + the pandas DataFrame. Each table row corresponds to a single data point, + and the table contains all data points generated by the :class:`.CurveAnalysis`. + All properties and methods of :class:`.CurveData` are implemented for backward compatibility, + but these will be removed in the future release. +developer: + - | + :meth:`.CurveAnalysis._create_figures` method is added to the curve analysis base class. + A curve analysis subclass can overwrite this method to customize the output figures. + The method is called with the :class:`.ScatterTable` containing all intermediate data points + generated during the curve analysis. diff --git a/requirements-dev.txt b/requirements-dev.txt index 98abe635b4..0d0200aa7e 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -12,7 +12,6 @@ reno>=4.0.0 nbsphinx arxiv ddt>=1.6.0 -pandas>=1.1.5 pylatexenc multimethod sphinx-copybutton diff --git a/requirements.txt b/requirements.txt index 3fa61a3e18..7264029843 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ qiskit-ibm-experiment>=0.3.4 matplotlib>=3.4 uncertainties lmfit -rustworkx \ No newline at end of file +rustworkx +pandas>=1.1.5 diff --git a/test/curve_analysis/test_baseclass.py b/test/curve_analysis/test_baseclass.py index bc037b51b5..2aec96f5bc 100644 --- a/test/curve_analysis/test_baseclass.py +++ b/test/curve_analysis/test_baseclass.py @@ -133,10 +133,7 @@ def test_data_extraction(self): }, ) - curve_data = analysis._run_data_processing( - raw_data=expdata1.data() + expdata2.data(), - models=analysis._models, - ) + curve_data = analysis._run_data_processing(raw_data=expdata1.data() + expdata2.data()) self.assertListEqual(curve_data.labels, ["s1", "s2"]) # check data of series1