diff --git a/CPC251_Project_Part1_Group-Cancer_2.code-workspace b/CPC251_Project_Part1_Group-Cancer_2.code-workspace new file mode 100644 index 0000000..876a149 --- /dev/null +++ b/CPC251_Project_Part1_Group-Cancer_2.code-workspace @@ -0,0 +1,8 @@ +{ + "folders": [ + { + "path": "." + } + ], + "settings": {} +} \ No newline at end of file diff --git a/Project Part 1 (Template).ipynb b/Project Part 1 (Template).ipynb index e3d7c45..f5a2b39 100644 --- a/Project Part 1 (Template).ipynb +++ b/Project Part 1 (Template).ipynb @@ -72,9 +72,7 @@ "execution_count": 9, "metadata": {}, "outputs": [], - "source": [ - "#split into 80:10:10" - ] + "source": [] }, { "attachments": {}, diff --git a/Project Part 1.ipynb b/Project Part 1.ipynb index 3b8d0d7..6dbf1b2 100644 --- a/Project Part 1.ipynb +++ b/Project Part 1.ipynb @@ -448,7 +448,10 @@ } ], "source": [ + "#Import the pandas library, reading the dataset into a dataframe\n", "df= pd.read_csv('risk_factors.csv')\n", + "\n", + "#Print the dataframe\n", "df" ] }, @@ -508,6 +511,7 @@ } ], "source": [ + "#Display information about the dataframe\n", "df.info()" ] }, @@ -566,7 +570,10 @@ } ], "source": [ + "#Drop 'Biopsy' column to create a new dataframe called as 'X'\n", "X = df.drop('Biopsy', axis = 1)\n", + "\n", + "#Display the information of 'X'\n", "X.info()" ] }, @@ -587,6 +594,7 @@ } ], "source": [ + "#Get the size or dimensions of 'X'\n", "X.shape\n" ] }, @@ -613,7 +621,10 @@ } ], "source": [ + "#Assign 'Biopsy' column to variable 'y'\n", "y = df['Biopsy']\n", + "\n", + "#Display the information of 'y' variable\n", "y.info()" ] }, @@ -652,8 +663,6 @@ "metadata": {}, "outputs": [], "source": [ - "#split into 80:10:10\n", - "\n", "#splitting into training and test set\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1,random_state=0)\n", "\n",