From 43f3c8113bb1e30508eaae9fccf027605f62fcd8 Mon Sep 17 00:00:00 2001 From: wbenbihi Date: Fri, 19 Aug 2022 15:48:05 +0800 Subject: [PATCH] [ADD][DOC] Export data in notebook 02 --- notebooks/02 - Prepare Data.ipynb | 622 +++++++++++++++++++++++++----- 1 file changed, 518 insertions(+), 104 deletions(-) diff --git a/notebooks/02 - Prepare Data.ipynb b/notebooks/02 - Prepare Data.ipynb index e8ff777..80eaced 100644 --- a/notebooks/02 - Prepare Data.ipynb +++ b/notebooks/02 - Prepare Data.ipynb @@ -27,7 +27,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -94,7 +94,7 @@ }, { "cell_type": "code", - "execution_count": 82, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -516,7 +516,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 15, "metadata": {}, "outputs": [ { @@ -540,7 +540,7 @@ " 15: HTFPersonJoint(x=374, y=258, id=15, visible=True)}" ] }, - "execution_count": 20, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -561,7 +561,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": 16, "metadata": {}, "outputs": [], "source": [ @@ -590,7 +590,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -622,7 +622,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 18, "metadata": {}, "outputs": [ { @@ -643,7 +643,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -652,14 +652,21 @@ }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 20, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sizes valid: True\n" + ] + }, { "name": "stderr", "output_type": "stream", "text": [ - "/var/folders/yt/n2trg5vn7q1gldwvjlgl8gc00000gn/T/ipykernel_47621/3915166120.py:2: DeprecationWarning: Sampling from a set deprecated\n", + "/var/folders/yt/n2trg5vn7q1gldwvjlgl8gc00000gn/T/ipykernel_66838/3915166120.py:2: DeprecationWarning: Sampling from a set deprecated\n", "since Python 3.9 and will be removed in a subsequent version.\n", " train_images = random.sample(image_names, int(0.8*(len(image_names))))\n" ] @@ -668,10 +675,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "Sizes valid: True\n", - "Set sizes [ 5695 23188]\n", - "Validation Set Size (5695, 56)\n", - "Train Set Size (23188, 56)\n" + "Set sizes [ 5771 23112]\n", + "Validation Set Size (5771, 56)\n", + "Train Set Size (23112, 56)\n" ] } ], @@ -693,7 +699,7 @@ }, { "cell_type": "code", - "execution_count": 102, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -714,12 +720,12 @@ "]\n", "# Visualize as DataFrame \n", "train_df = pd.DataFrame.from_records(train_array, columns=headers)\n", - "validation_df = pd.DataFrame.from_records(train_array, columns=headers)" + "validation_df = pd.DataFrame.from_records(validation_array, columns=headers)" ] }, { "cell_type": "code", - "execution_count": 103, + "execution_count": 22, "metadata": {}, "outputs": [], "source": [ @@ -741,7 +747,7 @@ }, { "cell_type": "code", - "execution_count": 105, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -934,7 +940,7 @@ " ...\n", " \n", " \n", - " 23183\n", + " 23107\n", " 084761779.jpg\n", " 1.189877\n", " 287\n", @@ -958,7 +964,7 @@ " 0\n", " \n", " \n", - " 23184\n", + " 23108\n", " 084761779.jpg\n", " 0.900880\n", " 333\n", @@ -982,7 +988,7 @@ " 0\n", " \n", " \n", - " 23185\n", + " 23109\n", " 084761779.jpg\n", " 0.950352\n", " 400\n", @@ -1006,7 +1012,7 @@ " 0\n", " \n", " \n", - " 23186\n", + " 23110\n", " 084761779.jpg\n", " 0.905662\n", " 212\n", @@ -1030,7 +1036,7 @@ " 0\n", " \n", " \n", - " 23187\n", + " 23111\n", " 084761779.jpg\n", " 2.665729\n", " 0\n", @@ -1055,7 +1061,7 @@ " \n", " \n", "\n", - "

23188 rows × 56 columns

\n", + "

23112 rows × 56 columns

\n", "" ], "text/plain": [ @@ -1066,11 +1072,11 @@ "3 015599452.jpg 6.071051 903 903 1070 1070 \n", "4 015599452.jpg 5.728162 27 27 186 186 \n", "... ... ... ... ... ... ... \n", - "23183 084761779.jpg 1.189877 287 287 324 324 \n", - "23184 084761779.jpg 0.900880 333 333 361 361 \n", - "23185 084761779.jpg 0.950352 400 400 428 428 \n", - "23186 084761779.jpg 0.905662 212 212 244 244 \n", - "23187 084761779.jpg 2.665729 0 0 76 76 \n", + "23107 084761779.jpg 1.189877 287 287 324 324 \n", + "23108 084761779.jpg 0.900880 333 333 361 361 \n", + "23109 084761779.jpg 0.950352 400 400 428 428 \n", + "23110 084761779.jpg 0.905662 212 212 244 244 \n", + "23111 084761779.jpg 2.665729 0 0 76 76 \n", "\n", " center_x center_y joint_0_X joint_0_Y ... joint_12_visible \\\n", "0 594 257 620 394 ... 1 \n", @@ -1079,11 +1085,11 @@ "3 1010 412 -1 -1 ... 1 \n", "4 133 315 -1 -1 ... 1 \n", "... ... ... ... ... ... ... \n", - "23183 330 208 362 350 ... 1 \n", - "23184 372 216 457 324 ... 0 \n", - "23185 437 207 483 292 ... 0 \n", - "23186 230 216 -1 -1 ... 0 \n", - "23187 62 338 -1 -1 ... 1 \n", + "23107 330 208 362 350 ... 1 \n", + "23108 372 216 457 324 ... 0 \n", + "23109 437 207 483 292 ... 0 \n", + "23110 230 216 -1 -1 ... 0 \n", + "23111 62 338 -1 -1 ... 1 \n", "\n", " joint_13_X joint_13_Y joint_13_visible joint_14_X joint_14_Y \\\n", "0 692 185 1 693 240 \n", @@ -1092,11 +1098,11 @@ "3 1145 269 1 1226 475 \n", "4 26 251 1 26 423 \n", "... ... ... ... ... ... \n", - "23183 288 177 1 279 199 \n", - "23184 349 171 1 334 193 \n", - "23185 403 179 1 403 205 \n", - "23186 214 179 1 190 202 \n", - "23187 -1 -1 0 -1 -1 \n", + "23107 288 177 1 279 199 \n", + "23108 349 171 1 334 193 \n", + "23109 403 179 1 403 205 \n", + "23110 214 179 1 190 202 \n", + "23111 -1 -1 0 -1 -1 \n", "\n", " joint_14_visible joint_15_X joint_15_Y joint_15_visible \n", "0 1 688 313 1 \n", @@ -1105,16 +1111,16 @@ "3 1 1096 433 1 \n", "4 1 -1 -1 0 \n", "... ... ... ... ... \n", - "23183 0 268 194 0 \n", - "23184 0 319 194 0 \n", - "23185 0 386 198 0 \n", - "23186 1 190 201 0 \n", - "23187 0 -1 -1 0 \n", + "23107 0 268 194 0 \n", + "23108 0 319 194 0 \n", + "23109 0 386 198 0 \n", + "23110 1 190 201 0 \n", + "23111 0 -1 -1 0 \n", "\n", - "[23188 rows x 56 columns]" + "[23112 rows x 56 columns]" ] }, - "execution_count": 105, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -1134,7 +1140,7 @@ }, { "cell_type": "code", - "execution_count": 106, + "execution_count": 24, "metadata": {}, "outputs": [ { @@ -1142,66 +1148,66 @@ "output_type": "stream", "text": [ "\n", - "RangeIndex: 23188 entries, 0 to 23187\n", + "RangeIndex: 23112 entries, 0 to 23111\n", "Data columns (total 56 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 image 23188 non-null object \n", - " 1 scale 23188 non-null float64\n", - " 2 bbox_tl_x 23188 non-null int64 \n", - " 3 bbox_tl_y 23188 non-null int64 \n", - " 4 bbox_br_x 23188 non-null int64 \n", - " 5 bbox_br_y 23188 non-null int64 \n", - " 6 center_x 23188 non-null int64 \n", - " 7 center_y 23188 non-null int64 \n", - " 8 joint_0_X 23188 non-null int64 \n", - " 9 joint_0_Y 23188 non-null int64 \n", - " 10 joint_0_visible 23188 non-null int64 \n", - " 11 joint_1_X 23188 non-null int64 \n", - " 12 joint_1_Y 23188 non-null int64 \n", - " 13 joint_1_visible 23188 non-null int64 \n", - " 14 joint_2_X 23188 non-null int64 \n", - " 15 joint_2_Y 23188 non-null int64 \n", - " 16 joint_2_visible 23188 non-null int64 \n", - " 17 joint_3_X 23188 non-null int64 \n", - " 18 joint_3_Y 23188 non-null int64 \n", - " 19 joint_3_visible 23188 non-null int64 \n", - " 20 joint_4_X 23188 non-null int64 \n", - " 21 joint_4_Y 23188 non-null int64 \n", - " 22 joint_4_visible 23188 non-null int64 \n", - " 23 joint_5_X 23188 non-null int64 \n", - " 24 joint_5_Y 23188 non-null int64 \n", - " 25 joint_5_visible 23188 non-null int64 \n", - " 26 joint_6_X 23188 non-null int64 \n", - " 27 joint_6_Y 23188 non-null int64 \n", - " 28 joint_6_visible 23188 non-null int64 \n", - " 29 joint_7_X 23188 non-null int64 \n", - " 30 joint_7_Y 23188 non-null int64 \n", - " 31 joint_7_visible 23188 non-null int64 \n", - " 32 joint_8_X 23188 non-null int64 \n", - " 33 joint_8_Y 23188 non-null int64 \n", - " 34 joint_8_visible 23188 non-null int64 \n", - " 35 joint_9_X 23188 non-null int64 \n", - " 36 joint_9_Y 23188 non-null int64 \n", - " 37 joint_9_visible 23188 non-null int64 \n", - " 38 joint_10_X 23188 non-null int64 \n", - " 39 joint_10_Y 23188 non-null int64 \n", - " 40 joint_10_visible 23188 non-null int64 \n", - " 41 joint_11_X 23188 non-null int64 \n", - " 42 joint_11_Y 23188 non-null int64 \n", - " 43 joint_11_visible 23188 non-null int64 \n", - " 44 joint_12_X 23188 non-null int64 \n", - " 45 joint_12_Y 23188 non-null int64 \n", - " 46 joint_12_visible 23188 non-null int64 \n", - " 47 joint_13_X 23188 non-null int64 \n", - " 48 joint_13_Y 23188 non-null int64 \n", - " 49 joint_13_visible 23188 non-null int64 \n", - " 50 joint_14_X 23188 non-null int64 \n", - " 51 joint_14_Y 23188 non-null int64 \n", - " 52 joint_14_visible 23188 non-null int64 \n", - " 53 joint_15_X 23188 non-null int64 \n", - " 54 joint_15_Y 23188 non-null int64 \n", - " 55 joint_15_visible 23188 non-null int64 \n", + " 0 image 23112 non-null object \n", + " 1 scale 23112 non-null float64\n", + " 2 bbox_tl_x 23112 non-null int64 \n", + " 3 bbox_tl_y 23112 non-null int64 \n", + " 4 bbox_br_x 23112 non-null int64 \n", + " 5 bbox_br_y 23112 non-null int64 \n", + " 6 center_x 23112 non-null int64 \n", + " 7 center_y 23112 non-null int64 \n", + " 8 joint_0_X 23112 non-null int64 \n", + " 9 joint_0_Y 23112 non-null int64 \n", + " 10 joint_0_visible 23112 non-null int64 \n", + " 11 joint_1_X 23112 non-null int64 \n", + " 12 joint_1_Y 23112 non-null int64 \n", + " 13 joint_1_visible 23112 non-null int64 \n", + " 14 joint_2_X 23112 non-null int64 \n", + " 15 joint_2_Y 23112 non-null int64 \n", + " 16 joint_2_visible 23112 non-null int64 \n", + " 17 joint_3_X 23112 non-null int64 \n", + " 18 joint_3_Y 23112 non-null int64 \n", + " 19 joint_3_visible 23112 non-null int64 \n", + " 20 joint_4_X 23112 non-null int64 \n", + " 21 joint_4_Y 23112 non-null int64 \n", + " 22 joint_4_visible 23112 non-null int64 \n", + " 23 joint_5_X 23112 non-null int64 \n", + " 24 joint_5_Y 23112 non-null int64 \n", + " 25 joint_5_visible 23112 non-null int64 \n", + " 26 joint_6_X 23112 non-null int64 \n", + " 27 joint_6_Y 23112 non-null int64 \n", + " 28 joint_6_visible 23112 non-null int64 \n", + " 29 joint_7_X 23112 non-null int64 \n", + " 30 joint_7_Y 23112 non-null int64 \n", + " 31 joint_7_visible 23112 non-null int64 \n", + " 32 joint_8_X 23112 non-null int64 \n", + " 33 joint_8_Y 23112 non-null int64 \n", + " 34 joint_8_visible 23112 non-null int64 \n", + " 35 joint_9_X 23112 non-null int64 \n", + " 36 joint_9_Y 23112 non-null int64 \n", + " 37 joint_9_visible 23112 non-null int64 \n", + " 38 joint_10_X 23112 non-null int64 \n", + " 39 joint_10_Y 23112 non-null int64 \n", + " 40 joint_10_visible 23112 non-null int64 \n", + " 41 joint_11_X 23112 non-null int64 \n", + " 42 joint_11_Y 23112 non-null int64 \n", + " 43 joint_11_visible 23112 non-null int64 \n", + " 44 joint_12_X 23112 non-null int64 \n", + " 45 joint_12_Y 23112 non-null int64 \n", + " 46 joint_12_visible 23112 non-null int64 \n", + " 47 joint_13_X 23112 non-null int64 \n", + " 48 joint_13_Y 23112 non-null int64 \n", + " 49 joint_13_visible 23112 non-null int64 \n", + " 50 joint_14_X 23112 non-null int64 \n", + " 51 joint_14_Y 23112 non-null int64 \n", + " 52 joint_14_visible 23112 non-null int64 \n", + " 53 joint_15_X 23112 non-null int64 \n", + " 54 joint_15_Y 23112 non-null int64 \n", + " 55 joint_15_visible 23112 non-null int64 \n", "dtypes: float64(1), int64(54), object(1)\n", "memory usage: 9.9+ MB\n" ] @@ -1211,6 +1217,414 @@ "sanitize_train_df.info()" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Export Records" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 23112\n", + "0 5771\n", + "Name: is_training, dtype: int64\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
imagescalebbox_tl_xbbox_tl_ybbox_br_xbbox_br_ycenter_xcenter_yjoint_0_Xjoint_0_Y...joint_13_Xjoint_13_Yjoint_13_visiblejoint_14_Xjoint_14_Yjoint_14_visiblejoint_15_Xjoint_15_Yjoint_15_visibleis_training
0015601864.jpg3.021046627627706706594257620394...6921851693240168831311
1015601864.jpg2.472117841841902902952222895293...92420611013203195526311
2015599452.jpg5.641276607607752752619329-1-1...7192991711516154546611
3015599452.jpg6.071051903903107010701010412-1-1...1145269112264751109643311
4015599452.jpg5.7281622727186186133315-1-1...262511264231-1-101
..................................................................
5766035723542.jpg1.70467610211021106210621009410987549...1015360010024040101744300
5767003820079.jpg1.943852582582626626616240508441...6611471672212161522510
5768005622743.jpg2.035477601601653653658175572393...6591051680164167618110
5769094821945.jpg1.327614606606642642614246691390...5892091585257161525300
5770066294927.jpg3.908759174174268268252255-1-1...2141820249276029929910
\n", + "

28883 rows × 57 columns

\n", + "
" + ], + "text/plain": [ + " image scale bbox_tl_x bbox_tl_y bbox_br_x bbox_br_y \\\n", + "0 015601864.jpg 3.021046 627 627 706 706 \n", + "1 015601864.jpg 2.472117 841 841 902 902 \n", + "2 015599452.jpg 5.641276 607 607 752 752 \n", + "3 015599452.jpg 6.071051 903 903 1070 1070 \n", + "4 015599452.jpg 5.728162 27 27 186 186 \n", + "... ... ... ... ... ... ... \n", + "5766 035723542.jpg 1.704676 1021 1021 1062 1062 \n", + "5767 003820079.jpg 1.943852 582 582 626 626 \n", + "5768 005622743.jpg 2.035477 601 601 653 653 \n", + "5769 094821945.jpg 1.327614 606 606 642 642 \n", + "5770 066294927.jpg 3.908759 174 174 268 268 \n", + "\n", + " center_x center_y joint_0_X joint_0_Y ... joint_13_X joint_13_Y \\\n", + "0 594 257 620 394 ... 692 185 \n", + "1 952 222 895 293 ... 924 206 \n", + "2 619 329 -1 -1 ... 719 299 \n", + "3 1010 412 -1 -1 ... 1145 269 \n", + "4 133 315 -1 -1 ... 26 251 \n", + "... ... ... ... ... ... ... ... \n", + "5766 1009 410 987 549 ... 1015 360 \n", + "5767 616 240 508 441 ... 661 147 \n", + "5768 658 175 572 393 ... 659 105 \n", + "5769 614 246 691 390 ... 589 209 \n", + "5770 252 255 -1 -1 ... 214 182 \n", + "\n", + " joint_13_visible joint_14_X joint_14_Y joint_14_visible joint_15_X \\\n", + "0 1 693 240 1 688 \n", + "1 1 1013 203 1 955 \n", + "2 1 711 516 1 545 \n", + "3 1 1226 475 1 1096 \n", + "4 1 26 423 1 -1 \n", + "... ... ... ... ... ... \n", + "5766 0 1002 404 0 1017 \n", + "5767 1 672 212 1 615 \n", + "5768 1 680 164 1 676 \n", + "5769 1 585 257 1 615 \n", + "5770 0 249 276 0 299 \n", + "\n", + " joint_15_Y joint_15_visible is_training \n", + "0 313 1 1 \n", + "1 263 1 1 \n", + "2 466 1 1 \n", + "3 433 1 1 \n", + "4 -1 0 1 \n", + "... ... ... ... \n", + "5766 443 0 0 \n", + "5767 225 1 0 \n", + "5768 181 1 0 \n", + "5769 253 0 0 \n", + "5770 299 1 0 \n", + "\n", + "[28883 rows x 57 columns]" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "sanitize_train_df['is_training'] = 1\n", + "sanitize_validation_df['is_training'] = 0\n", + "export_df = pd.concat(\n", + " [sanitize_train_df, sanitize_validation_df]\n", + ")\n", + "print(export_df.is_training.value_counts())\n", + "export_df.to_csv(os.path.join(ROOT_FOLDER, \"data\", \"labels.csv\"), index=False)\n", + "export_df.to_json(os.path.join(ROOT_FOLDER, \"data\", \"labels.json\"), orient='records')\n", + "export_df" + ] + }, { "cell_type": "code", "execution_count": null,