Skip to content

Commit

Permalink
fix: notebook for python>3.8 deps
Browse files Browse the repository at this point in the history
  • Loading branch information
jfrery committed Dec 10, 2024
1 parent 720bf36 commit 14af92f
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 10 deletions.
2 changes: 1 addition & 1 deletion docs/advanced_examples/DecisionTreeClassifier.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@
"\n",
"# List of hyper parameters to tune\n",
"param_grid = {\n",
" \"max_features\": [None, \"auto\", \"sqrt\", \"log2\"],\n",
" \"max_features\": [None, \"sqrt\", \"log2\"],\n",
" \"min_samples_leaf\": [1, 10, 100],\n",
" \"min_samples_split\": [2, 10, 100],\n",
" \"max_depth\": [None, 2, 4, 6, 8],\n",
Expand Down
31 changes: 24 additions & 7 deletions docs/advanced_examples/ExperimentPrivacyTreePaper.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -130,28 +130,45 @@
" y (np.array): Target labels of the dataset.\n",
" \"\"\"\n",
" if data_id is not None:\n",
" X, y = fetch_openml(data_id=data_id, as_frame=False, cache=True, return_X_y=True)\n",
" X, y = fetch_openml(data_id=data_id, as_frame=True, cache=True, return_X_y=True)\n",
" else:\n",
" X, y = fetch_openml(name=name, as_frame=False, cache=True, return_X_y=True)\n",
" X, y = fetch_openml(name=name, as_frame=True, cache=True, return_X_y=True)\n",
" return X, y\n",
"\n",
"\n",
"def preprocess_features(X):\n",
" \"\"\"Convert categorical columns to numerical.\"\"\"\n",
" X_processed = X.copy()\n",
"\n",
" for column in X_processed.columns:\n",
" if X_processed[column].dtype == \"object\" or X_processed[column].dtype.name == \"category\":\n",
" # Convert categorical columns to numeric using label encoding\n",
" X_processed[column] = X_processed[column].astype(\"category\").cat.codes\n",
"\n",
" return X_processed.astype(np.float32)\n",
"\n",
"\n",
"for ds_name, ds_id in dataset_names.items():\n",
" print(f\"Loading {ds_name}\")\n",
"\n",
" X, y = load_dataset(ds_name, ds_id)\n",
"\n",
" # Preprocess features (handle categorical data)\n",
" X = preprocess_features(X)\n",
"\n",
" # Remove rows with NaN values\n",
" not_nan_idx = np.where(~np.isnan(X).any(axis=1))\n",
" X = X[not_nan_idx]\n",
" y = y[not_nan_idx]\n",
" not_nan_mask = ~np.isnan(X).any(axis=1)\n",
" X = X[not_nan_mask]\n",
" y = y[not_nan_mask]\n",
"\n",
" # Convert non-integer target labels to integers\n",
" if not y.dtype == np.int64:\n",
" encoder = OrdinalEncoder()\n",
" y = encoder.fit_transform(y.reshape(-1, 1)).astype(np.int32).squeeze()\n",
" # Convert pandas Series to numpy array before reshaping\n",
" y = encoder.fit_transform(np.array(y).reshape(-1, 1)).astype(np.int32).squeeze()\n",
"\n",
" datasets[ds_name] = {\"X\": X, \"y\": y}"
" # Ensure both X and y are numpy arrays before storing\n",
" datasets[ds_name] = {\"X\": np.array(X), \"y\": np.array(y)}"
]
},
{
Expand Down
4 changes: 2 additions & 2 deletions docs/advanced_examples/LogisticRegressionTraining.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@
"\n",
"# Load the Iris dataset\n",
"X_full, y_full = datasets.load_iris(return_X_y=True)\n",
"X_full = MinMaxScaler(feature_range=[-1, 1]).fit_transform(X_full)\n",
"X_full = MinMaxScaler(feature_range=(-1, 1)).fit_transform(X_full)\n",
"\n",
"# Select petal length and petal width for visualization\n",
"X = X_full[:, 2:4] # Petal length and petal width\n",
Expand Down Expand Up @@ -384,7 +384,7 @@
"X, y = datasets.load_breast_cancer(return_X_y=True)\n",
"x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)\n",
"\n",
"scaler = MinMaxScaler(feature_range=[-1, 1])\n",
"scaler = MinMaxScaler(feature_range=(-1, 1))\n",
"x_train = scaler.fit_transform(x_train)\n",
"x_test = scaler.transform(x_test)\n",
"\n",
Expand Down

0 comments on commit 14af92f

Please sign in to comment.