From f0130a28596fafb35b458cbe47f450ff84ad3571 Mon Sep 17 00:00:00 2001 From: Jon Niehof Date: Mon, 27 Feb 2023 09:08:08 -0500 Subject: [PATCH 1/2] Convert chapter 2 to new Boston source --- 02_fundamentals/Code.ipynb | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/02_fundamentals/Code.ipynb b/02_fundamentals/Code.ipynb index 2995817..81ff5a1 100644 --- a/02_fundamentals/Code.ipynb +++ b/02_fundamentals/Code.ipynb @@ -47,7 +47,7 @@ "metadata": {}, "outputs": [], "source": [ - "from sklearn.datasets import load_boston" + "import pandas as pd" ] }, { @@ -56,7 +56,9 @@ "metadata": {}, "outputs": [], "source": [ - "boston = load_boston()" + "# New source for Boston housing data per https://scikit-learn.org/1.0/whats_new/v1.0.html#changes-1-0\n", + "data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\n", + "raw_df = pd.read_csv(data_url, sep=\"\\s+\", skiprows=22, header=None)" ] }, { @@ -65,9 +67,10 @@ "metadata": {}, "outputs": [], "source": [ - "data = boston.data\n", - "target = boston.target\n", - "features = boston.feature_names" + "data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\n", + "target = raw_df.values[1::2, 2]\n", + "features = np.array(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS',\n", + " 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT'])" ] }, { From 30d6de87db037c1b4980a4dc1df47ad53943b4e7 Mon Sep 17 00:00:00 2001 From: Jonathan Niehof Date: Fri, 10 Mar 2023 13:48:10 -0500 Subject: [PATCH 2/2] Convert chapters 3, 7 to new Boston source --- 03_dlfs/Code.ipynb | 11 ++++++----- 07_PyTorch/Code.ipynb | 12 ++++++------ 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/03_dlfs/Code.ipynb b/03_dlfs/Code.ipynb index 70401bc..c38d07f 100644 --- a/03_dlfs/Code.ipynb +++ b/03_dlfs/Code.ipynb @@ -820,12 +820,13 @@ "metadata": {}, "outputs": [], "source": [ - "from sklearn.datasets import load_boston\n", + "import pandas as pd\n", + "data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\n", + "raw_df = pd.read_csv(data_url, sep=\"\\s+\", skiprows=22, header=None)\n", "\n", - "boston = load_boston()\n", - "data = boston.data\n", - "target = boston.target\n", - "features = boston.feature_names" + "data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\n", + "target = raw_df.values[1::2, 2]\n", + "features = np.array(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT'])" ] }, { diff --git a/07_PyTorch/Code.ipynb b/07_PyTorch/Code.ipynb index ad4c8eb..b0c3b8b 100644 --- a/07_PyTorch/Code.ipynb +++ b/07_PyTorch/Code.ipynb @@ -61,13 +61,13 @@ "metadata": {}, "outputs": [], "source": [ - "from sklearn.datasets import load_boston\n", - "\n", - "boston = load_boston()\n", + "import pandas as pd\n", + "data_url = \"http://lib.stat.cmu.edu/datasets/boston\"\n", + "raw_df = pd.read_csv(data_url, sep=\"\\s+\", skiprows=22, header=None)\n", "\n", - "data = boston.data\n", - "target = boston.target\n", - "features = boston.feature_names\n", + "data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])\n", + "target = raw_df.values[1::2, 2]\n", + "features = np.array(['CRIM', 'ZN', 'INDUS', 'CHAS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT'])\n", "\n", "from sklearn.preprocessing import StandardScaler\n", "s = StandardScaler()\n",