fix: notebook for python>3.8 deps

zama-ai · Dec 10, 2024 · 14af92f · 14af92f
1 parent 720bf36
commit 14af92f
Show file tree

Hide file tree

Showing 3 changed files with 27 additions and 10 deletions.
diff --git a/docs/advanced_examples/DecisionTreeClassifier.ipynb b/docs/advanced_examples/DecisionTreeClassifier.ipynb
@@ -78,7 +78,7 @@
     "\n",
     "# List of hyper parameters to tune\n",
     "param_grid = {\n",
-    "    \"max_features\": [None, \"auto\", \"sqrt\", \"log2\"],\n",
+    "    \"max_features\": [None, \"sqrt\", \"log2\"],\n",
     "    \"min_samples_leaf\": [1, 10, 100],\n",
     "    \"min_samples_split\": [2, 10, 100],\n",
     "    \"max_depth\": [None, 2, 4, 6, 8],\n",

diff --git a/docs/advanced_examples/ExperimentPrivacyTreePaper.ipynb b/docs/advanced_examples/ExperimentPrivacyTreePaper.ipynb
@@ -130,28 +130,45 @@
     "        y (np.array): Target labels of the dataset.\n",
     "    \"\"\"\n",
     "    if data_id is not None:\n",
-    "        X, y = fetch_openml(data_id=data_id, as_frame=False, cache=True, return_X_y=True)\n",
+    "        X, y = fetch_openml(data_id=data_id, as_frame=True, cache=True, return_X_y=True)\n",
     "    else:\n",
-    "        X, y = fetch_openml(name=name, as_frame=False, cache=True, return_X_y=True)\n",
+    "        X, y = fetch_openml(name=name, as_frame=True, cache=True, return_X_y=True)\n",
     "    return X, y\n",
     "\n",
     "\n",
+    "def preprocess_features(X):\n",
+    "    \"\"\"Convert categorical columns to numerical.\"\"\"\n",
+    "    X_processed = X.copy()\n",
+    "\n",
+    "    for column in X_processed.columns:\n",
+    "        if X_processed[column].dtype == \"object\" or X_processed[column].dtype.name == \"category\":\n",
+    "            # Convert categorical columns to numeric using label encoding\n",
+    "            X_processed[column] = X_processed[column].astype(\"category\").cat.codes\n",
+    "\n",
+    "    return X_processed.astype(np.float32)\n",
+    "\n",
+    "\n",
     "for ds_name, ds_id in dataset_names.items():\n",
     "    print(f\"Loading {ds_name}\")\n",
     "\n",
     "    X, y = load_dataset(ds_name, ds_id)\n",
     "\n",
+    "    # Preprocess features (handle categorical data)\n",
+    "    X = preprocess_features(X)\n",
+    "\n",
     "    # Remove rows with NaN values\n",
-    "    not_nan_idx = np.where(~np.isnan(X).any(axis=1))\n",
-    "    X = X[not_nan_idx]\n",
-    "    y = y[not_nan_idx]\n",
+    "    not_nan_mask = ~np.isnan(X).any(axis=1)\n",
+    "    X = X[not_nan_mask]\n",
+    "    y = y[not_nan_mask]\n",
     "\n",
     "    # Convert non-integer target labels to integers\n",
     "    if not y.dtype == np.int64:\n",
     "        encoder = OrdinalEncoder()\n",
-    "        y = encoder.fit_transform(y.reshape(-1, 1)).astype(np.int32).squeeze()\n",
+    "        # Convert pandas Series to numpy array before reshaping\n",
+    "        y = encoder.fit_transform(np.array(y).reshape(-1, 1)).astype(np.int32).squeeze()\n",
     "\n",
-    "    datasets[ds_name] = {\"X\": X, \"y\": y}"
+    "    # Ensure both X and y are numpy arrays before storing\n",
+    "    datasets[ds_name] = {\"X\": np.array(X), \"y\": np.array(y)}"
    ]
   },
   {

diff --git a/docs/advanced_examples/LogisticRegressionTraining.ipynb b/docs/advanced_examples/LogisticRegressionTraining.ipynb
@@ -111,7 +111,7 @@
     "\n",
     "# Load the Iris dataset\n",
     "X_full, y_full = datasets.load_iris(return_X_y=True)\n",
-    "X_full = MinMaxScaler(feature_range=[-1, 1]).fit_transform(X_full)\n",
+    "X_full = MinMaxScaler(feature_range=(-1, 1)).fit_transform(X_full)\n",
     "\n",
     "# Select petal length and petal width for visualization\n",
     "X = X_full[:, 2:4]  # Petal length and petal width\n",
@@ -384,7 +384,7 @@
     "X, y = datasets.load_breast_cancer(return_X_y=True)\n",
     "x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)\n",
     "\n",
-    "scaler = MinMaxScaler(feature_range=[-1, 1])\n",
+    "scaler = MinMaxScaler(feature_range=(-1, 1))\n",
     "x_train = scaler.fit_transform(x_train)\n",
     "x_test = scaler.transform(x_test)\n",
     "\n",