From 4aa850504657806822bae3e048fd81aaf3843a18 Mon Sep 17 00:00:00 2001
From: Naoki Kanazawa <nkanazawa1989@gmail.com>
Date: Fri, 17 Nov 2023 10:38:11 +0900
Subject: [PATCH] Add upgrade doc about behavior change and replace class ID
 for unassigned data with null value.

---
 .../curve_analysis/curve_analysis.py          | 22 ++++++++++++-------
 ...dataframe-curve-data-a8905c450748b281.yaml |  9 ++++++++
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/qiskit_experiments/curve_analysis/curve_analysis.py b/qiskit_experiments/curve_analysis/curve_analysis.py
index c8296c1512..9215917c98 100644
--- a/qiskit_experiments/curve_analysis/curve_analysis.py
+++ b/qiskit_experiments/curve_analysis/curve_analysis.py
@@ -211,14 +211,17 @@ def _run_data_processing(
             source[idx]["shots"] = datum.get("shots", -1)
 
             # Assign entry name and class id
-            # Enumerate starts at 1 so that unclassified data becomes class_id = 0.
-            # This class_id is just defined for result data according to the data_subfit_map
-            # and this doesn't need to match with the actual fit model index.
-            for class_id, (name, spec) in enumerate(classifier.items(), 1):
+            for class_id, (name, spec) in enumerate(classifier.items()):
                 if spec.items() <= metadata.items():
                     source[idx]["class_id"] = class_id
                     source[idx]["name"] = name
                     break
+            else:
+                # This is unclassified data.
+                # Assume that normal ID will never become negative number.
+                # This is numpy struct array object and cannot store pandas nullable integer.
+                source[idx]["class_id"] = -1
+                source[idx]["name"] = ""
 
         # Compute y value
         if not self.options.data_processor:
@@ -232,7 +235,13 @@ def _run_data_processing(
         source["yerr"] = unp.std_devs(processed_values).flatten()
         source["category"] = category
 
-        return ScatterTable(data=source)
+        table = ScatterTable(data=source)
+
+        # Replace temporary -1 value with nullable integer
+        table["class_id"] = table["class_id"].replace(-1, pd.NA)
+        table["shots"] = table["shots"].replace(-1, pd.NA)
+
+        return table
 
     def _format_data(
         self,
@@ -264,9 +273,6 @@ def _format_data(
         model_names = self.model_names()
         formatted = []
         for (class_id, xv), g in groupby(sorted(curve_data.values, key=sort_by), key=sort_by):
-            if class_id == 0:
-                # This is unclassified data
-                continue
             g_values = np.array(list(g))
             g_dict = dict(zip(columns, g_values.T))
             avg_yval, avg_yerr, shots = average(g_dict["yval"], g_dict["yerr"], g_dict["shots"])
diff --git a/releasenotes/notes/add-dataframe-curve-data-a8905c450748b281.yaml b/releasenotes/notes/add-dataframe-curve-data-a8905c450748b281.yaml
index b5ba718812..4072ee0471 100644
--- a/releasenotes/notes/add-dataframe-curve-data-a8905c450748b281.yaml
+++ b/releasenotes/notes/add-dataframe-curve-data-a8905c450748b281.yaml
@@ -12,6 +12,15 @@ features:
     New analysis option ``fit_category`` is added to :class:`.CurveAnalysis` subclasses.
     This option controls which data subset within the :class:`.ScatterTable` 
     is used for the curve fitting.
+upgrade:
+  - |
+    The behavior of :class:`.CurveAnalysis` data processing was changed. 
+    It used to raise ``DataProcessorError`` error when it encounters an experiment result
+    which cannot be classified into any fit model, but this restriction was relaxed 
+    and the analysis continues with unclassified data.
+    Unclassified data is just stored as-is in the :class:`.ScatterTable` with having 
+    the null class ID assigned. Such data is ignored in the rest of analysis steps
+    such as formatting, fitting, and visualization.
 developer:
   - |
     :meth:`.CurveAnalysis._create_figures` method is added to the curve analysis base class.