From 4aa850504657806822bae3e048fd81aaf3843a18 Mon Sep 17 00:00:00 2001 From: Naoki Kanazawa Date: Fri, 17 Nov 2023 10:38:11 +0900 Subject: [PATCH] Add upgrade doc about behavior change and replace class ID for unassigned data with null value. --- .../curve_analysis/curve_analysis.py | 22 ++++++++++++------- ...dataframe-curve-data-a8905c450748b281.yaml | 9 ++++++++ 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/qiskit_experiments/curve_analysis/curve_analysis.py b/qiskit_experiments/curve_analysis/curve_analysis.py index c8296c1512..9215917c98 100644 --- a/qiskit_experiments/curve_analysis/curve_analysis.py +++ b/qiskit_experiments/curve_analysis/curve_analysis.py @@ -211,14 +211,17 @@ def _run_data_processing( source[idx]["shots"] = datum.get("shots", -1) # Assign entry name and class id - # Enumerate starts at 1 so that unclassified data becomes class_id = 0. - # This class_id is just defined for result data according to the data_subfit_map - # and this doesn't need to match with the actual fit model index. - for class_id, (name, spec) in enumerate(classifier.items(), 1): + for class_id, (name, spec) in enumerate(classifier.items()): if spec.items() <= metadata.items(): source[idx]["class_id"] = class_id source[idx]["name"] = name break + else: + # This is unclassified data. + # Assume that normal ID will never become negative number. + # This is numpy struct array object and cannot store pandas nullable integer. + source[idx]["class_id"] = -1 + source[idx]["name"] = "" # Compute y value if not self.options.data_processor: @@ -232,7 +235,13 @@ def _run_data_processing( source["yerr"] = unp.std_devs(processed_values).flatten() source["category"] = category - return ScatterTable(data=source) + table = ScatterTable(data=source) + + # Replace temporary -1 value with nullable integer + table["class_id"] = table["class_id"].replace(-1, pd.NA) + table["shots"] = table["shots"].replace(-1, pd.NA) + + return table def _format_data( self, @@ -264,9 +273,6 @@ def _format_data( model_names = self.model_names() formatted = [] for (class_id, xv), g in groupby(sorted(curve_data.values, key=sort_by), key=sort_by): - if class_id == 0: - # This is unclassified data - continue g_values = np.array(list(g)) g_dict = dict(zip(columns, g_values.T)) avg_yval, avg_yerr, shots = average(g_dict["yval"], g_dict["yerr"], g_dict["shots"]) diff --git a/releasenotes/notes/add-dataframe-curve-data-a8905c450748b281.yaml b/releasenotes/notes/add-dataframe-curve-data-a8905c450748b281.yaml index b5ba718812..4072ee0471 100644 --- a/releasenotes/notes/add-dataframe-curve-data-a8905c450748b281.yaml +++ b/releasenotes/notes/add-dataframe-curve-data-a8905c450748b281.yaml @@ -12,6 +12,15 @@ features: New analysis option ``fit_category`` is added to :class:`.CurveAnalysis` subclasses. This option controls which data subset within the :class:`.ScatterTable` is used for the curve fitting. +upgrade: + - | + The behavior of :class:`.CurveAnalysis` data processing was changed. + It used to raise ``DataProcessorError`` error when it encounters an experiment result + which cannot be classified into any fit model, but this restriction was relaxed + and the analysis continues with unclassified data. + Unclassified data is just stored as-is in the :class:`.ScatterTable` with having + the null class ID assigned. Such data is ignored in the rest of analysis steps + such as formatting, fitting, and visualization. developer: - | :meth:`.CurveAnalysis._create_figures` method is added to the curve analysis base class.