From 3bd18687d07bddbffa35fe6bf82f9515f80c4187 Mon Sep 17 00:00:00 2001 From: Billy Li Date: Thu, 18 Jul 2024 13:51:41 -0700 Subject: [PATCH] dev: mix baseline with bdt for boosted prediction --- notebooks/dev/mix_basline_bdt.ipynb | 86 ++++++++++++++++++----------- 1 file changed, 54 insertions(+), 32 deletions(-) diff --git a/notebooks/dev/mix_basline_bdt.ipynb b/notebooks/dev/mix_basline_bdt.ipynb index ea7bb36..6fdce27 100644 --- a/notebooks/dev/mix_basline_bdt.ipynb +++ b/notebooks/dev/mix_basline_bdt.ipynb @@ -130,30 +130,55 @@ " return ak.fill_none(ak.pad_none(ak_array, max_n, clip=True, axis=axis), pad, axis=axis).to_numpy()" ] }, + { + "cell_type": "markdown", + "id": "42e8de48", + "metadata": {}, + "source": [ + "### BDT WP by background misidentification rate\n", + "Tight: 0.3%\n", + "\n", + "Medium: 1%\n", + "\n", + "Loose: 2%" + ] + }, { "cell_type": "code", "execution_count": 7, - "id": "d7cceadc", + "id": "f2ad1605", "metadata": {}, "outputs": [], "source": [ - "WP = 0.911" + "WP_tight = 0.95626426\n", + "WP_medium = 0.93498826\n", + "WP_loose = 0.911348" ] }, { "cell_type": "code", "execution_count": 8, + "id": "d7cceadc", + "metadata": {}, + "outputs": [], + "source": [ + "WP = WP_loose\n", + "pred_file = \"//Users/billyli/UCSD/hhh/reports/bv2/pred_baseline_bdt_loose.h5\"" + ] + }, + { + "cell_type": "code", + "execution_count": 9, "id": "6d2502b7", "metadata": {}, "outputs": [], "source": [ - "pred_file = \"//Users/billyli/UCSD/hhh/reports/bv2/pred_baseline_bdt_loose.h5\"\n", "test_file = \"//Users/billyli/UCSD/hhh/reports/bv2/hhh_test.h5\"" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "ea6b8317", "metadata": {}, "outputs": [], @@ -163,7 +188,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "401061e3", "metadata": {}, "outputs": [ @@ -173,7 +198,7 @@ "" ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -184,7 +209,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "id": "1632da1f", "metadata": {}, "outputs": [], @@ -204,7 +229,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "id": "1820e38a", "metadata": {}, "outputs": [], @@ -261,7 +286,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "id": "ad91ad5a", "metadata": {}, "outputs": [ @@ -280,7 +305,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "id": "30133d49", "metadata": {}, "outputs": [], @@ -305,7 +330,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "id": "9133ec8b", "metadata": {}, "outputs": [], @@ -335,7 +360,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "id": "60993088", "metadata": {}, "outputs": [ @@ -355,7 +380,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "id": "118d59dc", "metadata": {}, "outputs": [], @@ -388,23 +413,19 @@ "id": "9f12c161", "metadata": {}, "source": [ - "### Remove overlapped jets" + "### Select un_padded jets" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "id": "e3d30de8", "metadata": {}, "outputs": [], "source": [ "# find ak4jets that matched to selected ak8jets (dR check)\n", - "matched_fj_idx = match_fjet_to_jet(fjs_selected, js, ak.ArrayBuilder()).snapshot()\n", - "\n", - "# remove overlapped ak4jets and padded jets\n", - "unoverlapped = matched_fj_idx==-1\n", "not_padded = js['mask']\n", - "j_cond = unoverlapped & not_padded\n", + "j_cond = not_padded\n", "js_selected = js[j_cond]" ] }, @@ -418,7 +439,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "id": "8eda8e8b", "metadata": {}, "outputs": [], @@ -428,12 +449,13 @@ "# and how many boosted Higgs that you have reconstructed\n", "N_jet = ak.num(js_selected, axis=-1).to_numpy(allow_missing=False)\n", "N_bH = ak.num(fjs_selected, axis=-1).to_numpy(allow_missing=False)\n", - "N_rH = np.minimum(np.floor(N_jet/2), 3-N_bH)" + "# N_rH = np.minimum(np.floor(N_jet/2), 3-N_bH)\n", + "N_rH = np.minimum(np.floor(N_jet/2), 3)" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "id": "b4f5933a", "metadata": {}, "outputs": [], @@ -454,7 +476,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 22, "id": "3b38686d", "metadata": {}, "outputs": [], @@ -492,7 +514,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 23, "id": "1662a585", "metadata": {}, "outputs": [], @@ -523,7 +545,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 24, "id": "b6b04fe7", "metadata": {}, "outputs": [], @@ -537,7 +559,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 25, "id": "d7607a1b", "metadata": {}, "outputs": [], @@ -555,7 +577,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 26, "id": "f620bdfc", "metadata": {}, "outputs": [], @@ -565,7 +587,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": 27, "id": "d91e4dc0", "metadata": {}, "outputs": [ @@ -575,7 +597,7 @@ "" ] }, - "execution_count": 26, + "execution_count": 27, "metadata": {}, "output_type": "execute_result" } @@ -586,7 +608,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 28, "id": "17aa80d9", "metadata": {}, "outputs": [ @@ -596,7 +618,7 @@ "" ] }, - "execution_count": 27, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" }