Skip to content

Commit

Permalink
split df into X and y('Biopsy‘)
Browse files Browse the repository at this point in the history
  • Loading branch information
diminecjean committed May 18, 2023
1 parent ccccb0f commit a8f5fa9
Showing 1 changed file with 164 additions and 15 deletions.
179 changes: 164 additions & 15 deletions Project Part 1 (Template).ipynb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -26,6 +27,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -34,7 +36,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -45,6 +47,7 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -53,7 +56,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -371,8 +374,8 @@
"</div>"
],
"text/plain": [
" Age Number of sexual partners First sexual intercourse \n",
"0 18 4.0 15.0 \\\n",
" Age Number of sexual partners First sexual intercourse \\\n",
"0 18 4.0 15.0 \n",
"1 15 1.0 14.0 \n",
"2 34 1.0 ? \n",
"3 52 5.0 16.0 \n",
Expand All @@ -384,8 +387,8 @@
"856 33 2.0 24.0 \n",
"857 29 2.0 20.0 \n",
"\n",
" Num of pregnancies Smokes Smokes (years) Smokes (packs/year) \n",
"0 1.0 0.0 0.0 0.0 \\\n",
" Num of pregnancies Smokes Smokes (years) Smokes (packs/year) \\\n",
"0 1.0 0.0 0.0 0.0 \n",
"1 1.0 0.0 0.0 0.0 \n",
"2 1.0 0.0 0.0 0.0 \n",
"3 4.0 1.0 37.0 37.0 \n",
Expand All @@ -397,8 +400,8 @@
"856 2.0 0.0 0.0 0.0 \n",
"857 1.0 0.0 0.0 0.0 \n",
"\n",
" Hormonal Contraceptives Hormonal Contraceptives (years) IUD ... \n",
"0 0.0 0.0 0.0 ... \\\n",
" Hormonal Contraceptives Hormonal Contraceptives (years) IUD ... \\\n",
"0 0.0 0.0 0.0 ... \n",
"1 0.0 0.0 0.0 ... \n",
"2 0.0 0.0 0.0 ... \n",
"3 1.0 3.0 0.0 ... \n",
Expand All @@ -410,8 +413,8 @@
"856 1.0 0.08 0.0 ... \n",
"857 1.0 0.5 0.0 ... \n",
"\n",
" STDs: Time since first diagnosis STDs: Time since last diagnosis \n",
"0 ? ? \\\n",
" STDs: Time since first diagnosis STDs: Time since last diagnosis \\\n",
"0 ? ? \n",
"1 ? ? \n",
"2 ? ? \n",
"3 ? ? \n",
Expand Down Expand Up @@ -439,7 +442,7 @@
"[858 rows x 36 columns]"
]
},
"execution_count": 2,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -450,6 +453,150 @@
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 858 entries, 0 to 857\n",
"Data columns (total 36 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Age 858 non-null int64 \n",
" 1 Number of sexual partners 858 non-null object\n",
" 2 First sexual intercourse 858 non-null object\n",
" 3 Num of pregnancies 858 non-null object\n",
" 4 Smokes 858 non-null object\n",
" 5 Smokes (years) 858 non-null object\n",
" 6 Smokes (packs/year) 858 non-null object\n",
" 7 Hormonal Contraceptives 858 non-null object\n",
" 8 Hormonal Contraceptives (years) 858 non-null object\n",
" 9 IUD 858 non-null object\n",
" 10 IUD (years) 858 non-null object\n",
" 11 STDs 858 non-null object\n",
" 12 STDs (number) 858 non-null object\n",
" 13 STDs:condylomatosis 858 non-null object\n",
" 14 STDs:cervical condylomatosis 858 non-null object\n",
" 15 STDs:vaginal condylomatosis 858 non-null object\n",
" 16 STDs:vulvo-perineal condylomatosis 858 non-null object\n",
" 17 STDs:syphilis 858 non-null object\n",
" 18 STDs:pelvic inflammatory disease 858 non-null object\n",
" 19 STDs:genital herpes 858 non-null object\n",
" 20 STDs:molluscum contagiosum 858 non-null object\n",
" 21 STDs:AIDS 858 non-null object\n",
" 22 STDs:HIV 858 non-null object\n",
" 23 STDs:Hepatitis B 858 non-null object\n",
" 24 STDs:HPV 858 non-null object\n",
" 25 STDs: Number of diagnosis 858 non-null int64 \n",
" 26 STDs: Time since first diagnosis 858 non-null object\n",
" 27 STDs: Time since last diagnosis 858 non-null object\n",
" 28 Dx:Cancer 858 non-null int64 \n",
" 29 Dx:CIN 858 non-null int64 \n",
" 30 Dx:HPV 858 non-null int64 \n",
" 31 Dx 858 non-null int64 \n",
" 32 Hinselmann 858 non-null int64 \n",
" 33 Schiller 858 non-null int64 \n",
" 34 Citology 858 non-null int64 \n",
" 35 Biopsy 858 non-null int64 \n",
"dtypes: int64(10), object(26)\n",
"memory usage: 241.4+ KB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 858 entries, 0 to 857\n",
"Data columns (total 35 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Age 858 non-null int64 \n",
" 1 Number of sexual partners 858 non-null object\n",
" 2 First sexual intercourse 858 non-null object\n",
" 3 Num of pregnancies 858 non-null object\n",
" 4 Smokes 858 non-null object\n",
" 5 Smokes (years) 858 non-null object\n",
" 6 Smokes (packs/year) 858 non-null object\n",
" 7 Hormonal Contraceptives 858 non-null object\n",
" 8 Hormonal Contraceptives (years) 858 non-null object\n",
" 9 IUD 858 non-null object\n",
" 10 IUD (years) 858 non-null object\n",
" 11 STDs 858 non-null object\n",
" 12 STDs (number) 858 non-null object\n",
" 13 STDs:condylomatosis 858 non-null object\n",
" 14 STDs:cervical condylomatosis 858 non-null object\n",
" 15 STDs:vaginal condylomatosis 858 non-null object\n",
" 16 STDs:vulvo-perineal condylomatosis 858 non-null object\n",
" 17 STDs:syphilis 858 non-null object\n",
" 18 STDs:pelvic inflammatory disease 858 non-null object\n",
" 19 STDs:genital herpes 858 non-null object\n",
" 20 STDs:molluscum contagiosum 858 non-null object\n",
" 21 STDs:AIDS 858 non-null object\n",
" 22 STDs:HIV 858 non-null object\n",
" 23 STDs:Hepatitis B 858 non-null object\n",
" 24 STDs:HPV 858 non-null object\n",
" 25 STDs: Number of diagnosis 858 non-null int64 \n",
" 26 STDs: Time since first diagnosis 858 non-null object\n",
" 27 STDs: Time since last diagnosis 858 non-null object\n",
" 28 Dx:Cancer 858 non-null int64 \n",
" 29 Dx:CIN 858 non-null int64 \n",
" 30 Dx:HPV 858 non-null int64 \n",
" 31 Dx 858 non-null int64 \n",
" 32 Hinselmann 858 non-null int64 \n",
" 33 Schiller 858 non-null int64 \n",
" 34 Citology 858 non-null int64 \n",
"dtypes: int64(9), object(26)\n",
"memory usage: 234.7+ KB\n"
]
}
],
"source": [
"X = df.drop('Biopsy', axis = 1)\n",
"X.info()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.series.Series'>\n",
"RangeIndex: 858 entries, 0 to 857\n",
"Series name: Biopsy\n",
"Non-Null Count Dtype\n",
"-------------- -----\n",
"858 non-null int64\n",
"dtypes: int64(1)\n",
"memory usage: 6.8 KB\n"
]
}
],
"source": [
"y = df['Biopsy']\n",
"y.info()"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -459,16 +606,15 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#split into 80:10:10\n",
"\n",
"\n"
"#split into 80:10:10\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -486,6 +632,7 @@
"source": []
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -503,6 +650,7 @@
"source": []
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand All @@ -520,6 +668,7 @@
"source": []
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
Expand Down

0 comments on commit a8f5fa9

Please sign in to comment.