diff --git a/notebook/L200-plotting-concatenate_runs_periods.ipynb b/notebook/L200-plotting-concatenate_runs_periods.ipynb new file mode 100644 index 0000000..dca5c67 --- /dev/null +++ b/notebook/L200-plotting-concatenate_runs_periods.ipynb @@ -0,0 +1,569 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "308b2266-c882-465f-89d0-c6ffe46e1b08", + "metadata": {}, + "source": [ + "### Introduction\n", + "\n", + "This notebook helps to have a first look at the saved output, reading into hdf files. It helps to concatenate more runs and more periods, one after the other. It is helpful to monitor the system over a larger period of time usually set as a run.\n", + "\n", + "It works after having installed the repo 'legend-data-monitor'. In particular, after the cloning, enter into the folder and install the package by typing\n", + "\n", + "```console\n", + "foo@bar:~$ pip install .\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "acd13756-4007-4cda-bed2-3ee1b6056d15", + "metadata": {}, + "source": [ + "# Select period to inspect" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5de1e10c-b02d-45eb-9088-3e8103b3cbff", + "metadata": {}, + "outputs": [], + "source": [ + "# ------------------------------------------------------------------------------------------ which data do you want to read? CHANGE ME!\n", + "subsystem = \"geds\" # KEEP 'geds' for the moment\n", + "folder = \"prod-ref-v2\" # you can change me\n", + "version = \"\" # leave an empty string if you're looking at p03 data\n", + "periods = [\n", + " \"p06\"\n", + "] # one or more, eg = sorted(os.listdir(f\"/data1/users/calgaro/{folder}/generated/plt/phy/\"))\n", + "\n", + "# ------------------------------------------------------------------------------------------ remove detectors from the plots\n", + "# do you want to remove some detectors? If so, put here their names (or empty list if you want everything included)\n", + "to_be_excluded = (\n", + " []\n", + ") # [\"V01406A\", \"V01415A\", \"V01387A\", \"P00665C\", \"P00748B\", \"P00748A\", \"B00089D\"]" + ] + }, + { + "cell_type": "markdown", + "id": "ab6a56d1-ec1e-4162-8b41-49e8df7b5f16", + "metadata": {}, + "source": [ + "# Select event type, parameter and original or PULS01ANA-rescaled values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3348d46-78a7-4be3-80de-a88610d88f00", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# ------------------------------------------------------------------------------------------ ...from here, you don't need to change anything in the code\n", + "import os\n", + "import json\n", + "import sys\n", + "import h5py\n", + "import shelve\n", + "import matplotlib\n", + "import pandas as pd\n", + "import numpy as np\n", + "import ipywidgets as widgets\n", + "from IPython.display import display\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.patches import Rectangle\n", + "from legend_data_monitor import plot_styles, plotting, utils\n", + "import legend_data_monitor as ldm\n", + "\n", + "%matplotlib widget\n", + "\n", + "# ------------------------------------------------------------------------------------------ select one data file\n", + "# hypothesis: being these files under the same production folder, we expect them to contain the same keys - if not, an error will appear\n", + "run = sorted(\n", + " os.listdir(f\"/data1/users/calgaro/{folder}/generated/plt/phy/{periods[0]}/\")\n", + ")[0]\n", + "if version == \"\":\n", + " data_file = f\"/data1/users/calgaro/{folder}/generated/plt/phy/{periods[0]}/{run}/l200-{periods[0]}-{run}-phy-{subsystem}.hdf\"\n", + "else:\n", + " data_file = f\"/data1/users/calgaro/{folder}/{version}/generated/plt/phy/{periods[0]}/{run}/l200-{periods[0]}-{run}-phy-{subsystem}.hdf\"\n", + "\n", + "# ------------------------------------------------------------------------------------------ building channel map\n", + "# this is period/run dependent, but for now it was kept equal among p03-p06\n", + "dataset = {\n", + " \"experiment\": \"L200\",\n", + " \"period\": periods[0],\n", + " \"type\": \"phy\",\n", + " \"version\": version,\n", + " \"path\": \"/data2/public/prodenv/prod-blind/tmp/auto\",\n", + " \"runs\": int(run[1:]),\n", + "}\n", + "\n", + "geds = ldm.Subsystem(f\"{subsystem}\", dataset=dataset)\n", + "channel_map = geds.channel_map\n", + "\n", + "for det in to_be_excluded:\n", + " channel_map = channel_map[channel_map.name != det]\n", + "\n", + "# ------------------------------------------------------------------------------------------ load data\n", + "# Load the hdf file\n", + "hdf_file = h5py.File(data_file, \"r\")\n", + "keys = list(hdf_file.keys())\n", + "hdf_file.close()\n", + "\n", + "# available flags - get the list of available event types\n", + "event_types = list(set([key.split(\"_\")[0] for key in keys]))\n", + "\n", + "# Create a dropdown widget for the event type\n", + "evt_type_widget = widgets.Dropdown(options=event_types, description=\"Event Type:\")\n", + "\n", + "\n", + "# ------------------------------------------------------------------------------------------ parameter\n", + "# Define a function to update the parameter dropdown based on the selected event type\n", + "def update_params(*args):\n", + " selected_evt_type = evt_type_widget.value\n", + " params = list(\n", + " set(\n", + " [\n", + " key.split(\"_\")[1]\n", + " for key in keys\n", + " if key.split(\"_\")[0] == selected_evt_type\n", + " ]\n", + " )\n", + " )\n", + " param_widget.options = params\n", + "\n", + "\n", + "# Call the update_params function when the event type is changed\n", + "evt_type_widget.observe(update_params, \"value\")\n", + "\n", + "# Create a dropdown widget for the parameter\n", + "param_widget = widgets.Dropdown(description=\"Parameter:\")\n", + "\n", + "# ------------------------------------------------------------------------------------------ data format\n", + "data_format = [\"absolute values\", \"% values\"]\n", + "\n", + "# Create a dropdown widget\n", + "data_format_widget = widgets.Dropdown(options=data_format, description=\"data format:\")\n", + "\n", + "# ------------------------------------------------------------------------------------------ plot structure\n", + "plot_structures = [\"per string\", \"per channel\"]\n", + "\n", + "# Create a dropdown widget\n", + "plot_structures_widget = widgets.Dropdown(\n", + " options=plot_structures, description=\"Plot structure:\"\n", + ")\n", + "\n", + "# ------------------------------------------------------------------------------------------ plot style\n", + "plot_styles = [\"vs time\", \"histogram\"]\n", + "\n", + "# Create a dropdown widget\n", + "plot_styles_widget = widgets.Dropdown(options=plot_styles, description=\"Plot style:\")\n", + "\n", + "# ------------------------------------------------------------------------------------------ resampling\n", + "resampled = [\"no\", \"only\", \"also\"]\n", + "\n", + "# Create a dropdown widget\n", + "resampled_widget = widgets.Dropdown(options=resampled, description=\"Resampled:\")\n", + "\n", + "\n", + "# ------------------------------------------------------------------------------------------ get one or all strings\n", + "if subsystem == \"geds\":\n", + " strings_widg = [1, 2, 3, 4, 5, 7, 8, 9, 10, 11, \"all\"]\n", + "if subsystem == \"pulser01ana\":\n", + " strings_widg = [-1]\n", + "\n", + "# Create a dropdown widget\n", + "strings_widget = widgets.Dropdown(options=strings_widg, description=\"String:\")\n", + "\n", + "\n", + "# ------------------------------------------------------------------------------------------ display widgets\n", + "display(evt_type_widget)\n", + "display(param_widget)\n", + "\n", + "# ------------------------------------------------------------------------------------------ get params (based on event type)\n", + "evt_type = evt_type_widget.value\n", + "params = list(set([key.split(\"_\")[1] for key in keys if key.split(\"_\")[0] == evt_type]))\n", + "param_widget.options = params\n", + "\n", + "\n", + "aux_widget = widgets.Dropdown(description=\"Options:\")\n", + "print(\n", + " \"Pick the way you want to include PULS01ANA info\\n(this is not available for EventRate, CuspEmaxCtcCal \\nand AoECustom; in this case, select None):\"\n", + ")\n", + "display(aux_widget)\n", + "\n", + "aux_info = [\"pulser01anaRatio\", \"pulser01anaDiff\", \"None\"]\n", + "aux_dict = {\n", + " \"pulser01anaRatio\": f\"Ratio: {subsystem} / PULS01ANA\",\n", + " \"pulser01anaDiff\": f\"Difference: {subsystem} - PULS01ANA\",\n", + " \"None\": f\"None (ie just plain {subsystem} data)\",\n", + "}\n", + "aux_info = [aux_dict[info] for info in aux_info]\n", + "aux_widget.options = aux_info\n", + "\n", + "print(\"\\033[91mIf you change me, then RUN AGAIN the next cell!!!\\033[0m\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "508896aa-8f5c-4bed-a731-bb9aeca61bef", + "metadata": {}, + "outputs": [], + "source": [ + "def to_None(string):\n", + " return None if string == \"None\" else string\n", + "\n", + "\n", + "# ------------------------------------------------------------------------------------------ get dataframe\n", + "def display_param_value(*args):\n", + " selected_evt_type = evt_type_widget.value\n", + " selected_param = param_widget.value\n", + " selected_aux_info = aux_widget.value\n", + " print(\n", + " f\"You are going to plot '{selected_param}' for '{selected_evt_type}' events...\"\n", + " )\n", + "\n", + " df_info = pd.DataFrame()\n", + " df_param_orig = pd.DataFrame()\n", + " df_param_var = pd.DataFrame()\n", + " df_param_mean = pd.DataFrame()\n", + "\n", + " for period in periods:\n", + " runs = sorted(\n", + " os.listdir(f\"/data1/users/calgaro/{folder}/generated/plt/phy/{period}/\")\n", + " )\n", + "\n", + " for run in runs:\n", + " if version == \"\":\n", + " data_file = f\"/data1/users/calgaro/{folder}/generated/plt/phy/{period}/{run}/l200-{period}-{run}-phy-{subsystem}.hdf\"\n", + " else:\n", + " data_file = f\"/data1/users/calgaro/{folder}/{version}/generated/plt/phy/{period}/{run}/l200-{period}-{run}-phy-{subsystem}.hdf\"\n", + "\n", + " # some info\n", + " key = f\"{selected_evt_type}_{selected_param}\"\n", + " df_info = pd.read_hdf(data_file, f\"{key}_info\")\n", + "\n", + " if \"None\" not in selected_aux_info:\n", + " # Iterate over the dictionary items\n", + " for k, v in aux_dict.items():\n", + " if v == selected_aux_info:\n", + " option = k\n", + " break\n", + " key = f\"{selected_evt_type}_{selected_param}_{option}\"\n", + "\n", + " # get dataframe\n", + " tmp_df_param_orig = pd.read_hdf(data_file, f\"{key}\")\n", + " tmp_df_param_var = pd.read_hdf(data_file, f\"{key}_var\")\n", + " tmp_df_param_mean = pd.read_hdf(data_file, f\"{key}_mean\")\n", + "\n", + " df_param_orig = pd.concat([df_param_orig, tmp_df_param_orig])\n", + " df_param_var = pd.concat([df_param_var, tmp_df_param_var])\n", + " df_param_mean = pd.concat([df_param_mean, tmp_df_param_mean])\n", + "\n", + " print(f\"...{period}-{run}: loaded!\")\n", + "\n", + " return df_param_orig, df_param_var, df_param_mean, df_info\n", + "\n", + "\n", + "df_param_orig, df_param_var, df_param_mean, df_info = display_param_value()\n", + "print(f\"...data have been loaded!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bff94f92-e85b-4fa8-b82f-46deab8d4773", + "metadata": {}, + "outputs": [], + "source": [ + "# ---------------------------------------------------------------------------------- get back the usual df shape for legend-data-monitor plots\n", + "pivot_table = df_param_orig.copy()\n", + "pivot_table.reset_index(inplace=True)\n", + "new_df = pd.melt(\n", + " pivot_table, id_vars=[\"datetime\"], var_name=\"channel\", value_name=\"value\"\n", + ")\n", + "new_df_param_orig = new_df.copy().merge(channel_map, on=\"channel\")\n", + "\n", + "pivot_table_var = df_param_var.copy()\n", + "pivot_table_var.reset_index(inplace=True)\n", + "new_df_var = pd.melt(\n", + " pivot_table_var, id_vars=[\"datetime\"], var_name=\"channel\", value_name=\"value\"\n", + ")\n", + "new_df_param_var = new_df_var.copy().merge(channel_map, on=\"channel\")\n", + "\n", + "\n", + "# ---------------------------------------------------------------------------------- remove global spikes (if you are looking at cuspEmax)\n", + "# remove global spikes events by selecting their amplitude\n", + "if \"Cusp\" in param_widget.value:\n", + " new_df_param_orig = new_df_param_orig.loc[new_df_param_var[\"value\"] > -10]\n", + " new_df_param_var = new_df_param_var.loc[new_df_param_var[\"value\"] > -10]\n", + " print(\"--> global spikes were removed from cusp plot (threshold: +-10%)!\")\n", + "\n", + "# ---------------------------------------------------------------------------------- recalculate % variation wrt new mean value for all channels\n", + "channel_list = new_df_param_var[\"channel\"].unique()\n", + "channel_df = pd.DataFrame()\n", + "\"\"\"\n", + "for ch in channel_list:\n", + " channel_df = pd.DataFrame()\n", + " new_ch_var = pd.DataFrame()\n", + "\n", + " channel_df = (\n", + " new_df_param_orig[new_df_param_orig[\"channel\"] == ch]\n", + " .sort_values(by=\"datetime\")\n", + " .copy()\n", + " )\n", + " channel_mean = channel_df[\"value\"].iloc[0 : int(0.1 * len(channel_df))].mean()\n", + " new_ch_var = (channel_df[\"value\"] - channel_mean) / channel_mean * 100\n", + " new_df_param_var.loc[\n", + " new_df_param_var[\"channel\"] == ch, \"value\"\n", + " ] = new_ch_var\n", + "\"\"\"\n", + "print(\n", + " \"...% variations were calculated again over the larger time window (mute me if you don't want to keep run-oriented % variations)!\"\n", + ")\n", + "\n", + "\n", + "# ---------------------------------------------------------------------------------- change column names (again, needed for legend-data-monitor plots)\n", + "def convert_to_original_format(camel_case_string: str) -> str:\n", + " \"\"\"Convert a camel case string to its original format.\"\"\"\n", + " original_string = \"\"\n", + " for i, char in enumerate(camel_case_string):\n", + " if char.isupper() and i > 0:\n", + " original_string += \"_\" + char.lower()\n", + " else:\n", + " original_string += char.lower()\n", + "\n", + " return original_string\n", + "\n", + "\n", + "new_df_param_orig = (new_df_param_orig.copy()).rename(\n", + " columns={\n", + " \"value\": convert_to_original_format(param_widget.value)\n", + " if param_widget.value != \"BlMean\"\n", + " else param_widget.value\n", + " }\n", + ")\n", + "new_df_param_var = (new_df_param_var.copy()).rename(\n", + " columns={\n", + " \"value\": convert_to_original_format(param_widget.value) + \"_var\"\n", + " if param_widget.value != \"BlMean\"\n", + " else param_widget.value + \"_var\"\n", + " }\n", + ")\n", + "\n", + "print(\"...data have been formatted to the right structure!\")" + ] + }, + { + "cell_type": "markdown", + "id": "f1c10c0f-9bed-400f-8174-c6d7e185648b", + "metadata": { + "tags": [] + }, + "source": [ + "# Plot data\n", + "For the selected parameter, choose the plot style (you can play with different data formats, plot structures, ... among the available ones).\n", + "\n", + "### Notes\n", + "1. When you select **plot_style='histogram', you'll always plot NOT resampled values** (ie values for each timestamp entry). Indeed, if you choose different resampled options while keeping plot_style='histogram', nothing will change in plots.\n", + "2. **resampled='no'** means you look at each timestamp entry\n", + "3. **resampled='only'** means you look at each timestamp entry mediated over 1H time window (use the button to resampled according to your needs; available options: 1min, 5min, 10min, 30min, 60min)\n", + "4. **resampled='also'** means you look at each timestamp entry mediated over 1H time window AND at each timestamp entry TOGETHER -> suggestion: use 'also' just when you choose plot_structures='per channel'; if you have selected 'per string', then you're not going to understand anything" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6fde51f-89b0-49f8-82ed-74d24235cbe0", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Define the time interval options\n", + "time_intervals = [\"1min\", \"5min\", \"10min\", \"30min\", \"60min\"]\n", + "\n", + "# Create RadioButtons with circular style\n", + "radio_buttons = widgets.RadioButtons(\n", + " options=time_intervals,\n", + " button_style=\"circle\",\n", + " description=\"\\t\",\n", + " layout={\"width\": \"max-content\"},\n", + ")\n", + "\n", + "# Create a label widget to display the selected time interval\n", + "selected_interval_label = widgets.Label()\n", + "\n", + "\n", + "# Define a callback function for button selection\n", + "def on_button_selected(change):\n", + " selected_interval_label.value = change.new\n", + "\n", + "\n", + "# Assign the callback function to the RadioButtons\n", + "radio_buttons.observe(on_button_selected, names=\"value\")\n", + "\n", + "# Create a horizontal box to contain the RadioButtons and label\n", + "box_layout = widgets.Layout(display=\"flex\", flex_flow=\"row\", align_items=\"center\")\n", + "container_resampling = widgets.HBox(\n", + " [radio_buttons, selected_interval_label], layout=box_layout\n", + ")\n", + "\n", + "# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "# Define the time interval options\n", + "answer = [\"no\", \"yes\"]\n", + "\n", + "# Create RadioButtons with circular style\n", + "limits_buttons = widgets.RadioButtons(\n", + " options=answer,\n", + " button_style=\"circle\",\n", + " description=\"\\t\",\n", + " layout={\"width\": \"max-content\"},\n", + ")\n", + "\n", + "# Assign the callback function to the RadioButtons\n", + "limits_buttons.observe(on_button_selected, names=\"value\")\n", + "\n", + "# Create a horizontal box to contain the RadioButtons and label\n", + "container_limits = widgets.HBox(\n", + " [limits_buttons, selected_interval_label], layout=box_layout\n", + ")\n", + "\n", + "# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "# Create text input boxes for min and max values\n", + "min_input = widgets.FloatText(\n", + " description=\"Min y-axis:\", layout=widgets.Layout(width=\"150px\")\n", + ")\n", + "max_input = widgets.FloatText(\n", + " description=\"Max y-axis:\", layout=widgets.Layout(width=\"150px\")\n", + ")\n", + "\n", + "# ------------------------------------------------------------------------------------------ get plots\n", + "display(data_format_widget)\n", + "display(plot_structures_widget)\n", + "display(plot_styles_widget)\n", + "display(strings_widget)\n", + "display(resampled_widget)\n", + "\n", + "print(\"Chose resampling time among the available options:\")\n", + "display(container_resampling)\n", + "\n", + "print(\"Do you want to display horizontal lines for limits in the plots?\")\n", + "display(container_limits)\n", + "\n", + "print(\"Set y-axis range; use min=0=max if you don't want to use any fixed range:\")\n", + "display(widgets.VBox([min_input, max_input]))\n", + "\n", + "print(\"\\033[91mIf you change me, then RUN AGAIN the next cell!!!\\033[0m\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2122008e-2a6c-49b6-8a81-d351c1bfd57e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# set plotting options\n", + "plot_info = {\n", + " \"unit\": df_info.loc[\"unit\", \"Value\"],\n", + " \"label\": df_info.loc[\"label\", \"Value\"],\n", + " \"lower_lim_var\": float(df_info.loc[\"lower_lim_var\", \"Value\"])\n", + " if limits_buttons.value == \"yes\"\n", + " and to_None(df_info.loc[\"lower_lim_var\", \"Value\"]) is not None\n", + " else None,\n", + " \"upper_lim_var\": float(df_info.loc[\"upper_lim_var\", \"Value\"])\n", + " if limits_buttons.value == \"yes\"\n", + " and to_None(df_info.loc[\"upper_lim_var\", \"Value\"]) is not None\n", + " else None,\n", + " \"lower_lim_abs\": float(df_info.loc[\"lower_lim_abs\", \"Value\"])\n", + " if limits_buttons.value == \"yes\"\n", + " and to_None(df_info.loc[\"lower_lim_abs\", \"Value\"]) is not None\n", + " else None,\n", + " \"upper_lim_abs\": float(df_info.loc[\"upper_lim_abs\", \"Value\"])\n", + " if limits_buttons.value == \"yes\"\n", + " and to_None(df_info.loc[\"upper_lim_abs\", \"Value\"]) is not None\n", + " else None,\n", + " \"plot_style\": plot_styles_widget.value,\n", + " \"plot_structure\": plot_structures_widget.value,\n", + " \"resampled\": resampled_widget.value,\n", + " \"title\": \"\",\n", + " \"subsystem\": \"\",\n", + " \"std\": False,\n", + " \"locname\": {\n", + " \"geds\": \"string\",\n", + " \"spms\": \"fiber\",\n", + " \"pulser\": \"puls\",\n", + " \"pulser01ana\": \"pulser01ana\",\n", + " \"FCbsln\": \"FC bsln\",\n", + " \"muon\": \"muon\",\n", + " }[subsystem],\n", + " \"range\": [min_input.value, max_input.value]\n", + " if min_input.value < max_input.value\n", + " else [None, None],\n", + " \"event_type\": None,\n", + " \"unit_label\": \"%\"\n", + " if data_format_widget.value == \"% values\"\n", + " else df_info.loc[\"unit\", \"Value\"],\n", + " \"parameters\": \"\",\n", + " \"time_window\": radio_buttons.value.split(\"min\")[0] + \"T\",\n", + "}\n", + "\n", + "\n", + "# turn on the std when plotting individual channels together\n", + "if plot_info[\"plot_structure\"] == \"per channel\":\n", + " plot_info[\"std\"] = True\n", + "\n", + "if data_format_widget.value == \"absolute values\":\n", + " plot_info[\"limits\"] = [plot_info[\"lower_lim_abs\"], plot_info[\"upper_lim_abs\"]]\n", + " plot_info[\"parameter\"] = (\n", + " convert_to_original_format(param_widget.value)\n", + " if param_widget.value != \"BlMean\"\n", + " else param_widget.value\n", + " )\n", + " df_to_plot = new_df_param_orig.copy()\n", + "if data_format_widget.value == \"% values\":\n", + " plot_info[\"limits\"] = [plot_info[\"lower_lim_var\"], plot_info[\"upper_lim_var\"]]\n", + " plot_info[\"parameter\"] = (\n", + " convert_to_original_format(param_widget.value) + \"_var\"\n", + " if param_widget.value != \"BlMean\"\n", + " else param_widget.value + \"_var\"\n", + " )\n", + " df_to_plot = new_df_param_var.copy()\n", + "\n", + "print(f\"Making plots now...\")\n", + "\n", + "if isinstance(strings_widget.value, str): # let's get all strings in output\n", + " strings = strings_widg.remove(\"all\")\n", + " for string in strings:\n", + " if plot_structures_widget.value == \"per channel\":\n", + " plotting.plot_per_ch(\n", + " df_to_plot[df_to_plot[\"location\"] == string], plot_info, \"\"\n", + " ) # plot one canvas per channel\n", + " elif plot_structures_widget.value == \"per string\":\n", + " plotting.plot_per_string(\n", + " df_to_plot[df_to_plot[\"location\"] == string], plot_info, \"\"\n", + " ) # plot one canvas per string\n", + "else: # let's get one string in output\n", + " if plot_structures_widget.value == \"per channel\":\n", + " plotting.plot_per_ch(\n", + " df_to_plot[df_to_plot[\"location\"] == strings_widget.value], plot_info, \"\"\n", + " ) # plot one canvas per channel\n", + " elif plot_structures_widget.value == \"per string\":\n", + " plotting.plot_per_string(\n", + " df_to_plot[df_to_plot[\"location\"] == strings_widget.value], plot_info, \"\"\n", + " ) # plot one canvas per string" + ] + } + ], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebook/L200-plotting-hdf-widgets.ipynb b/notebook/L200-plotting-individual-runs.ipynb similarity index 67% rename from notebook/L200-plotting-hdf-widgets.ipynb rename to notebook/L200-plotting-individual-runs.ipynb index 574f585..6a8cdfa 100644 --- a/notebook/L200-plotting-hdf-widgets.ipynb +++ b/notebook/L200-plotting-individual-runs.ipynb @@ -32,16 +32,17 @@ "outputs": [], "source": [ "# ------------------------------------------------------------------------------------------ which data do you want to read? CHANGE ME!\n", - "run = \"r000\" # r000, r001, ...\n", + "run = \"r003\" # r000, r001, ...\n", "subsystem = \"geds\" # KEEP 'geds' for the moment\n", "folder = \"prod-ref-v2\" # you can change me\n", "period = \"p06\"\n", "version = \"\" # leave an empty string if you're looking at p03 data\n", "\n", - "if version == \"\":\n", - " data_file = f\"/data1/users/calgaro/{folder}/generated/plt/phy/{period}/{run}/l200-{period}-{run}-phy-{subsystem}.hdf\"\n", - "else:\n", - " data_file = f\"/data1/users/calgaro/{folder}/{version}/generated/plt/phy/{period}/{run}/l200-{period}-{run}-phy-{subsystem}.hdf\"" + "# ------------------------------------------------------------------------------------------ remove detectors from the plots\n", + "# do you want to remove some detectors? If so, put here their names (or empty list if you want everything included)\n", + "to_be_excluded = (\n", + " []\n", + ") # [\"V01406A\", \"V01415A\", \"V01387A\", \"P00665C\", \"P00748B\", \"P00748A\", \"B00089D\"]" ] }, { @@ -56,14 +57,10 @@ "cell_type": "code", "execution_count": null, "id": "c3348d46-78a7-4be3-80de-a88610d88f00", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "# ------------------------------------------------------------------------------------------ ...from here, you don't need to change anything in the code\n", - "import os\n", - "import json\n", "import sys\n", "import h5py\n", "import shelve\n", @@ -79,6 +76,11 @@ "\n", "%matplotlib widget\n", "\n", + "if version == \"\":\n", + " data_file = f\"/data1/users/calgaro/{folder}/generated/plt/phy/{period}/{run}/l200-{period}-{run}-phy-{subsystem}.hdf\"\n", + "else:\n", + " data_file = f\"/data1/users/calgaro/{folder}/{version}/generated/plt/phy/{period}/{run}/l200-{period}-{run}-phy-{subsystem}.hdf\"\n", + "\n", "# ------------------------------------------------------------------------------------------ building channel map\n", "dataset = {\n", " \"experiment\": \"L200\",\n", @@ -89,23 +91,13 @@ " \"runs\": int(run[1:]),\n", "}\n", "\n", - "geds = ldm.Subsystem(f\"{subsystem}\", dataset=dataset)\n", + "geds = ldm.Subsystem(\"geds\", dataset=dataset)\n", "channel_map = geds.channel_map\n", "\n", "# remove probl dets\n", - "to_be_excluded = [\n", - " \"V01406A\",\n", - " \"V01415A\",\n", - " \"V01387A\",\n", - " \"P00665C\",\n", - " \"P00748B\",\n", - " \"P00748A\",\n", - " \"B00089D\",\n", - "]\n", "for det in to_be_excluded:\n", " channel_map = channel_map[channel_map.name != det]\n", - "# remove OFF dets\n", - "channel_map = channel_map[channel_map.status == \"on\"]\n", + "\n", "\n", "# ------------------------------------------------------------------------------------------ load data\n", "# Load the hdf file\n", @@ -170,16 +162,11 @@ "\n", "\n", "# ------------------------------------------------------------------------------------------ get one or all strings\n", - "if subsystem == \"geds\":\n", - " strings = [1, 2, 3, 4, 5, 7, 8, 9, 10, 11, \"all\"]\n", - "if subsystem == \"pulser01ana\":\n", - " strings = [-1]\n", + "strings = [1, 2, 3, 4, 5, 7, 8, 9, 10, 11, \"all\"]\n", "\n", "# Create a dropdown widget\n", "strings_widget = widgets.Dropdown(options=strings, description=\"String:\")\n", "\n", - "\n", - "print(strings)\n", "# ------------------------------------------------------------------------------------------ display widgets\n", "display(evt_type_widget)\n", "display(param_widget)\n", @@ -231,64 +218,27 @@ " key = f\"{selected_evt_type}_{selected_param}\"\n", " print(key)\n", " print(selected_aux_info)\n", - "\n", - " df_info = pd.DataFrame()\n", - " df_param_orig = pd.DataFrame()\n", - " df_param_var = pd.DataFrame()\n", - " df_param_mean = pd.DataFrame()\n", - "\n", - " # ------------------------------------------------------------------------------------------ which data do you want to read? CHANGE ME!\n", - " folder = \"prod-ref-v2\" # you can change me\n", - " version = \"\" # leave an empty string if you're looking at >p03 data\n", - " subsystem = \"geds\" # KEEP 'geds' for the moment\n", - "\n", - " # ------------------------------------------------------------------------------------------ plot all periods available or just specify in a list e.g. [\"p001\", ...]\n", - " # periods = sorted(os.listdir(f\"/data1/users/calgaro/{folder}/generated/plt/phy/\"))\n", - " periods = [\"p06\"]\n", - "\n", - " for period in periods:\n", - " # load all runs available for this period or just specify in a list e.g. [\"p001\", ...]\n", - " runs = sorted(\n", - " os.listdir(f\"/data1/users/calgaro/{folder}/generated/plt/phy/{period}/\")\n", - " )\n", - " runs = [\"r002\", \"r003\"]\n", - " print(\"period\\t\", period, \"\\t loading runs\\t\", runs)\n", - "\n", - " for run in runs:\n", - " if version == \"\":\n", - " data_file = f\"/data1/users/calgaro/{folder}/generated/plt/phy/{period}/{run}/l200-{period}-{run}-phy-{subsystem}.hdf\"\n", - " else:\n", - " data_file = f\"/data1/users/calgaro/{folder}/{version}/generated/plt/phy/{period}/{run}/l200-{period}-{run}-phy-{subsystem}.hdf\"\n", - "\n", - " # some info\n", - " df_info = pd.read_hdf(data_file, f\"{key}_info\")\n", - "\n", - " if \"None\" not in selected_aux_info:\n", - " print(f\"... plus you are going to apply the option {selected_aux_info}\")\n", - "\n", - " # Iterate over the dictionary items\n", - " for k, v in aux_dict.items():\n", - " if v == selected_aux_info:\n", - " option = k\n", - " break\n", - " key += f\"_{option}\"\n", - "\n", - " # get dataframe\n", - " tmp_df_param_orig = pd.read_hdf(data_file, f\"{key}\")\n", - " tmp_df_param_var = pd.read_hdf(data_file, f\"{key}_var\")\n", - " tmp_df_param_mean = pd.read_hdf(data_file, f\"{key}_mean\")\n", - "\n", - " df_param_orig = pd.concat([df_param_orig, tmp_df_param_orig])\n", - " df_param_var = pd.concat([df_param_var, tmp_df_param_var])\n", - " df_param_mean = pd.concat([df_param_mean, tmp_df_param_mean])\n", - "\n", - " print(run, \" loaded\")\n", + " # some info\n", + " df_info = pd.read_hdf(data_file, f\"{key}_info\")\n", + "\n", + " if \"None\" not in selected_aux_info:\n", + " # Iterate over the dictionary items\n", + " for k, v in aux_dict.items():\n", + " if v == selected_aux_info:\n", + " option = k\n", + " break\n", + " key += f\"_{option}\"\n", + "\n", + " # get dataframe\n", + " df_param_orig = pd.read_hdf(data_file, f\"{key}\")\n", + " df_param_var = pd.read_hdf(data_file, f\"{key}_var\")\n", + " df_param_mean = pd.read_hdf(data_file, f\"{key}_mean\")\n", "\n", " return df_param_orig, df_param_var, df_param_mean, df_info\n", "\n", "\n", "df_param_orig, df_param_var, df_param_mean, df_info = display_param_value()\n", - "print(f\"...data have been loaded!\")\n", + "print(f\"...data have beeng loaded!\")\n", "\n", "\n", "pivot_table = df_param_orig.copy()\n", @@ -336,72 +286,10 @@ "print(\"...data have been formatted to the right structure!\")" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "33f555ff-ea2c-4a18-a901-248c9d7eddb3", - "metadata": {}, - "outputs": [], - "source": [ - "# if you want to plot a specific day\n", - "# NOTE: this action removes\n", - "\n", - "new_df_param_var = new_df_param_var[new_df_param_var.datetime.dt.day > 1]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b1c4d8f0-7977-4b79-9414-6be06dfab720", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# remove global spikes events by selecting their amplitude\n", - "# and\n", - "# compute mean over initial hours of all DataFrame\n", - "# useful also if you load more runs/periods\n", - "\n", - "param = {\n", - " \"Cuspemax\": \"cuspemax_var\",\n", - " \"Baseline\": \"baseline_var\",\n", - " \"BlMean\": \"blmean_var\",\n", - " \"CuspemaxCtcCal\": \"cuspemax_ctc_cal_var\",\n", - "}\n", - "\n", - "new_df_param_var = new_df_param_var.loc[\n", - " new_df_param_var[param[param_widget.value]] > -10\n", - "]\n", - "\n", - "channel_list = new_df_param_var[\"channel\"].unique()\n", - "channel_df = pd.DataFrame()\n", - "\n", - "# recalculate % variation wrt new mean value for all channels\n", - "\n", - "for ch in channel_list:\n", - " channel_df = pd.DataFrame()\n", - " new_ch_var = pd.DataFrame()\n", - "\n", - " channel_df = (\n", - " new_df_param_orig[new_df_param_orig[\"channel\"] == ch]\n", - " .sort_values(by=\"datetime\")\n", - " .copy()\n", - " )\n", - " channel_mean = channel_df[\"cuspemax\"].iloc[0 : int(0.1 * len(channel_df))].mean()\n", - " new_ch_var = (channel_df[\"cuspemax\"] - channel_mean) / channel_mean * 100\n", - " # new_df_param_var.loc[new_df_param_var[\"channel\"] == ch, param[param_widget.value + \"_var\"]] = 1\n", - " new_df_param_var.loc[\n", - " new_df_param_var[\"channel\"] == ch, param[param_widget.value]\n", - " ] = new_ch_var" - ] - }, { "cell_type": "markdown", "id": "f1c10c0f-9bed-400f-8174-c6d7e185648b", - "metadata": { - "tags": [] - }, + "metadata": {}, "source": [ "# Plot data\n", "For the selected parameter, choose the plot style (you can play with different data formats, plot structures, ... among the available ones).\n", @@ -417,9 +305,7 @@ "cell_type": "code", "execution_count": null, "id": "a6fde51f-89b0-49f8-82ed-74d24235cbe0", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "# Define the time interval options\n", @@ -478,18 +364,8 @@ ")\n", "max_input = widgets.FloatText(\n", " description=\"Max y-axis:\", layout=widgets.Layout(width=\"150px\")\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "084e9d36-1478-4833-96ff-555134e9a64c", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ + ")\n", + "\n", "# ------------------------------------------------------------------------------------------ get plots\n", "display(data_format_widget)\n", "display(plot_structures_widget)\n", @@ -513,9 +389,7 @@ "cell_type": "code", "execution_count": null, "id": "2122008e-2a6c-49b6-8a81-d351c1bfd57e", - "metadata": { - "tags": [] - }, + "metadata": {}, "outputs": [], "source": [ "# set plotting options\n", @@ -588,9 +462,7 @@ "print(f\"Making plots now...\")\n", "\n", "if isinstance(strings_widget.value, str): # let's get all strings in output\n", - " if \"all\" in strings:\n", - " strings.remove(\"all\")\n", - " for string in strings:\n", + " for string in [1, 2, 3, 4, 5, 7, 8, 9, 10, 11]:\n", " if plot_structures_widget.value == \"per channel\":\n", " plotting.plot_per_ch(\n", " df_to_plot[df_to_plot[\"location\"] == string], plot_info, \"\"\n", @@ -610,16 +482,6 @@ " ) # plot one canvas per string" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "238aae30-1e4c-4e0b-bb4d-13c1c8d7da44", - "metadata": {}, - "outputs": [], - "source": [ - "print(strings)" - ] - }, { "cell_type": "markdown", "id": "17542fbd-a2fb-4474-829a-adb0ef99aae3", @@ -627,18 +489,14 @@ "tags": [] }, "source": [ - "# Summary plots vs channels\n", - "Here you can monitor the distribution of a parameter across an entire run for all channels, grouped by string. \n", - "Shown in this plot:\n", - "* **mean** value (horizontal green line) of the distribution\n", - "* **std** (blue box)\n", - "* **min/max** (black horizontal lines below/above the box)" + "# Plot means vs channels\n", + "Here you can monitor the **mean** ('x' green marker) and **median** (horizontal green line) behaves separately for different channels, grouped by string. The box shows the IQR (interquartile range), ie the distance between the upper and lower quartiles, q(0.75)-q(0.25). Vertical lines end up to the min and max value of a given parameter's distribution for each channel." ] }, { "cell_type": "code", "execution_count": null, - "id": "9c275a1b-3354-4a93-80f6-2b8c0a3940c6", + "id": "017b16e9-da40-4a0b-9503-ce4c9e65070c", "metadata": {}, "outputs": [], "source": [ @@ -651,22 +509,12 @@ { "cell_type": "code", "execution_count": null, - "id": "6bbe32fc-f1b5-47d3-a5b7-93a3f2ae30d9", - "metadata": {}, - "outputs": [], - "source": [ - "channel_list = set(new_df_param_var.channel)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dc79d742-05e6-46f7-a8da-370a74b9cc97", - "metadata": {}, + "id": "51ae3c7f-19d2-4760-96c6-fafdfe6e6316", + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "channel_dict = {}\n", - "\n", "param = {\n", " \"Cuspemax\": \"cuspemax_var\",\n", " \"Baseline\": \"baseline_var\",\n", @@ -674,56 +522,24 @@ " \"CuspemaxCtcCal\": \"cuspemax_ctc_cal_var\",\n", "}\n", "\n", - "for channel in channel_list:\n", - " timestamp = os.listdir(\n", - " f\"/data2/public/prodenv/prod-blind/tmp/auto/generated/par/hit/cal/{period}/{run}\"\n", - " )[-1].split(\"-\")[-2]\n", - " pars = json.load(\n", - " open(\n", - " f\"/data2/public/prodenv/prod-blind/tmp/auto/generated/par/hit/cal/{period}/{run}/l200-{period}-{run}-cal-{timestamp}-par_hit_results.json\",\n", - " \"r\",\n", - " )\n", - " )\n", - "\n", - " Qbb_FWHM = pars[\"ch\" + str(channel)][\"ecal\"][\"cuspEmax_ctc_cal\"][\"Qbb_fwhm\"]\n", - " Qbb_sig = Qbb_FWHM / 2.355\n", - " # channel_dict[channel] = Qbb_sig\n", - " channel_dict[channel] = Qbb_sig\n", - "\n", - "if param_widget.value == \"Cuspemax\":\n", - " new_df_param_var[\"resolution\"] = new_df_param_var[\"channel\"].map(channel_dict)\n", - "\n", "grouped_df = new_df_param_var.groupby([\"location\", \"position\", \"name\"])[\n", - " [param[param_widget.value], \"resolution\"]\n", + " param[param_widget.value]\n", "]\n", "\n", - "resolution = 2 # FWHM [keV]\n", - "\n", "my_df = pd.DataFrame()\n", - "\n", - "if param_widget.value == \"Cuspemax\":\n", - " my_df[\"mean\"] = grouped_df.mean()[param[param_widget.value]] / resolution * 20.39\n", - " my_df[\"std\"] = grouped_df.std()[param[param_widget.value]] / resolution * 20.39\n", - " my_df[\"minimum\"] = grouped_df.min()[param[param_widget.value]] / resolution * 20.39\n", - " my_df[\"maximum\"] = grouped_df.max()[param[param_widget.value]] / resolution * 20.39\n", - " my_df[\"resolution\"] = grouped_df.mean()[\"resolution\"]\n", - "\n", - " my_df[[\"mean\", \"std\", \"minimum\", \"maximum\"]] = my_df[\n", - " [\"mean\", \"std\", \"minimum\", \"maximum\"]\n", - " ].apply(lambda x: x / my_df.resolution)\n", - "\n", - "else:\n", - " my_df[\"mean\"] = grouped_df.mean()\n", - " my_df[\"std\"] = grouped_df.std()\n", - " my_df[\"minimum\"] = grouped_df.min()\n", - " my_df[\"maximum\"] = grouped_df.max()\n", + "my_df[\"mean\"] = grouped_df.mean()\n", + "my_df[\"std\"] = grouped_df.std()\n", + "my_df[\"std_2\"] = 2 * grouped_df.std()\n", + "my_df[\"std_3\"] = 3 * grouped_df.std()\n", + "my_df[\"minimum\"] = grouped_df.min()\n", + "my_df[\"maximum\"] = grouped_df.max()\n", "\n", "# Create boxes for mean ± std and plot mean as a horizontal line\n", "box_width = 0.5 # Width of the boxes\n", "box_positions = np.arange(len(my_df))\n", "\n", "# Create the figure and axis\n", - "fig, ax = plt.subplots(figsize=(16, 4))\n", + "fig, ax = plt.subplots(figsize=(16, 6))\n", "\n", "l = 0.15\n", "\n", @@ -735,28 +551,55 @@ "for index, row in my_df.reset_index().iterrows():\n", " if current_string != row[\"location\"]:\n", " current_index += 1\n", - " ax.vlines(current_index, -10000, 10000, color=\"black\", linewidth=2, zorder=10)\n", + " ax.vlines(current_index, -100, 100, color=\"black\", linewidth=2, zorder=10)\n", " current_string = row[\"location\"]\n", " name_list.append(f\"string {row.location}\")\n", "\n", " current_index += 1\n", "\n", + " rect3 = Rectangle(\n", + " (current_index - box_width / 2, row[\"mean\"] - row[\"std_3\"]),\n", + " box_width,\n", + " 2 * row[\"std_3\"],\n", + " fill=True,\n", + " alpha=0.15,\n", + " color=\"gray\",\n", + " linewidth=0,\n", + " zorder=3,\n", + " )\n", + "\n", + " rect2 = Rectangle(\n", + " (current_index - box_width / 2, row[\"mean\"] - row[\"std_2\"]),\n", + " box_width,\n", + " 2 * row[\"std_2\"],\n", + " fill=True,\n", + " alpha=0.5,\n", + " color=\"gray\",\n", + " linewidth=0,\n", + " zorder=3,\n", + " )\n", + "\n", " rect = Rectangle(\n", " (current_index - box_width / 2, row[\"mean\"] - row[\"std\"]),\n", " box_width,\n", " 2 * row[\"std\"],\n", - " fill=False,\n", - " edgecolor=\"tab:blue\",\n", - " linewidth=1,\n", + " fill=True,\n", + " alpha=0.9,\n", + " color=\"gray\",\n", + " linewidth=0,\n", " zorder=2,\n", " )\n", + "\n", + " ax.add_patch(rect3)\n", + " ax.add_patch(rect2)\n", " ax.add_patch(rect)\n", " ax.plot(\n", " [current_index - box_width / 2, current_index + box_width / 2],\n", " [row[\"mean\"], row[\"mean\"]],\n", - " color=\"tab:green\",\n", - " zorder=2,\n", + " color=\"tab:red\",\n", + " zorder=10,\n", " )\n", + " ax.grid()\n", "\n", " # Plot horizontal black lines at min and max values\n", " ax.hlines(\n", @@ -781,73 +624,56 @@ " current_index,\n", " row[\"std\"] + row[\"mean\"],\n", " row[\"maximum\"],\n", - " color=\"tab:blue\",\n", + " color=\"k\",\n", " linewidth=1,\n", " )\n", " ax.vlines(\n", " current_index,\n", " row[\"minimum\"],\n", " -row[\"std\"] + row[\"mean\"],\n", - " color=\"tab:blue\",\n", + " color=\"k\",\n", " linewidth=1,\n", " )\n", "\n", " name_list.append(row[\"name\"])\n", "\n", "\n", - "if limits_buttons.value == \"yes\":\n", - " # Plot lines for mean value thresholds\n", - " # ax.hlines(0.025, 0, len(name_list) - 1, color=\"tab:orange\", zorder=3, linewidth=1)\n", - " # ax.hlines(-0.025, 0, len(name_list) - 1, color=\"tab:orange\", zorder=3, linewidth=1)\n", + "# Plot lines for mean value thresholds\n", + "ax.hlines(5, 0, len(name_list) - 1, color=\"tab:green\", zorder=3, linewidth=1)\n", + "ax.hlines(-5, 0, len(name_list) - 1, color=\"tab:green\", zorder=3, linewidth=1)\n", "\n", - " # Plot lines for std value thresholds\n", - " ax.hlines(-1, 0, len(name_list) - 1, color=\"tab:red\", zorder=3, linewidth=1)\n", - " ax.hlines(1, 0, len(name_list) - 1, color=\"tab:red\", zorder=3, linewidth=1)\n", + "# Plot lines for std value thresholds\n", + "ax.hlines(\n", + " 10, 0, len(name_list) - 1, color=\"tab:orange\", zorder=3, linewidth=1, linestyle=\"--\"\n", + ")\n", + "ax.hlines(\n", + " -10,\n", + " 0,\n", + " len(name_list) - 1,\n", + " color=\"tab:orange\",\n", + " zorder=3,\n", + " linewidth=1,\n", + " linestyle=\"--\",\n", + ")\n", "\n", "# Set labels and title\n", "ax.set_xticks(np.arange(len(name_list)))\n", "ax.set_xticklabels(name_list, rotation=90)\n", "\n", "# Show plot\n", - "ax.set_ylim([-0.2, 0.2])\n", - "if min_input.value < max_input.value:\n", - " ax.set_ylim([min_input.value, max_input.value])\n", - "# ax.set_ylabel(f\"{param_widget.value} % variation\")\n", - "ax.set_ylabel(f\"energy shift @Qbb / resolution\")\n", + "x_min = min_input.value\n", + "x_max = max_input.value\n", + "if x_min == 0 and x_max == 0:\n", + " x_min = -50\n", + " x_max = 50\n", + "div = 12\n", + "ax.set_ylim([x_min, x_max])\n", + "ax.set_yticks(np.arange(x_min, x_max, div))\n", + "ax.set_ylabel(f\"{param_widget.value} % variation\")\n", "ax.set_title(f\"{period}-{run}\")\n", - "plt.grid()\n", "plt.tight_layout()\n", "plt.show()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "37e5f237-2470-49c8-a607-2c3796d7798b", - "metadata": {}, - "outputs": [], - "source": [ - "# remove global spikes events by selecting their amplitude\n", - "# and\n", - "# compute mean over initial hours of all DataFrame\n", - "\n", - "new_df_param_var = new_df_param_var.loc[\n", - " new_df_param_var[param[param_widget.value]] > -10\n", - "]\n", - "\n", - "channel_list = new_df_param_var[\"channel\"].unique()\n", - "\n", - "# recalculate % variation wrt new mean value for all channels\n", - "for ch in channel_list:\n", - " channel_df = new_df_param_var[new_df_param_var[\"channel\"] == ch]\n", - " channel_mean = (\n", - " channel_df[\"cuspemax_var\"].iloc[0 : int(0.1 * len(channel_df))].mean()\n", - " )\n", - " new_ch_var = (channel_df[\"cuspemax_var\"] - channel_mean) / channel_mean * 100\n", - " new_df_param_var.loc[\n", - " new_df_param_var[\"channel\"] == ch, param_widget.value + \"_var\"\n", - " ] = new_ch_var" - ] } ], "metadata": {}, diff --git a/notebook/L200-plotting-widgets.ipynb b/notebook/L200-plotting-widgets.ipynb deleted file mode 100644 index 02e60d5..0000000 --- a/notebook/L200-plotting-widgets.ipynb +++ /dev/null @@ -1,310 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "308b2266-c882-465f-89d0-c6ffe46e1b08", - "metadata": {}, - "source": [ - "### Introduction\n", - "\n", - "This notebook helps to have a first look at the saved output. \n", - "\n", - "It works after having installed the repo 'legend-data-monitor'. In particular, after the cloning, enter into the folder and install the package by typing\n", - "\n", - "```console\n", - "foo@bar:~$ pip install .\n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "ab6a56d1-ec1e-4162-8b41-49e8df7b5f16", - "metadata": {}, - "source": [ - "# Select event type and parameter" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c3348d46-78a7-4be3-80de-a88610d88f00", - "metadata": {}, - "outputs": [], - "source": [ - "# ------------------------------------------------------------------------------------------ which data do you want to read? CHANGE ME!\n", - "run = \"r005\" # r000, r001, ...\n", - "subsystem = \"geds\" # KEEP 'geds' for the moment\n", - "folder = \"prod-ref-temp\" # you can change me\n", - "period = \"p03\"\n", - "version = \"\" # leave an empty string if you're looking at p03 data\n", - "\n", - "if version == \"\":\n", - " data_file = f\"/data1/users/calgaro/{folder}/generated/plt/phy/{period}/{run}/l200-{period}-{run}-phy-{subsystem}\"\n", - "else:\n", - " data_file = f\"/data1/users/calgaro/{folder}/{version}/generated/plt/phy/{period}/{run}/l200-{period}-{run}-phy-{subsystem}\"\n", - "\n", - "\n", - "# ------------------------------------------------------------------------------------------ ...from here, you don't need to change anything in the code\n", - "import sys\n", - "import shelve\n", - "import matplotlib\n", - "import ipywidgets as widgets\n", - "from IPython.display import display\n", - "from matplotlib import pyplot as plt\n", - "from legend_data_monitor import plot_styles, plotting, utils\n", - "\n", - "%matplotlib widget\n", - "\n", - "# ------------------------------------------------------------------------------------------ load data\n", - "# Load the shelve object\n", - "shelf = shelve.open(data_file)\n", - "\n", - "# ------------------------------------------------------------------------------------------ evt type\n", - "# Get the list of available event types\n", - "event_types = list(shelf[\"monitoring\"].keys())\n", - "\n", - "# Create a dropdown widget for the event type\n", - "evt_type_widget = widgets.Dropdown(options=event_types, description=\"Event Type:\")\n", - "\n", - "\n", - "# ------------------------------------------------------------------------------------------ parameter\n", - "# Define a function to update the parameter dropdown based on the selected event type\n", - "def update_params(*args):\n", - " selected_evt_type = evt_type_widget.value\n", - " params = list(shelf[\"monitoring\"][selected_evt_type].keys())\n", - " param_widget.options = params\n", - "\n", - "\n", - "# Call the update_params function when the event type is changed\n", - "evt_type_widget.observe(update_params, \"value\")\n", - "\n", - "# Create a dropdown widget for the parameter\n", - "param_widget = widgets.Dropdown(description=\"Parameter:\")\n", - "\n", - "# ------------------------------------------------------------------------------------------ data format\n", - "data_format = [\"absolute values\", \"% values\"]\n", - "\n", - "# Create a dropdown widget\n", - "data_format_widget = widgets.Dropdown(options=data_format, description=\"data format:\")\n", - "\n", - "# ------------------------------------------------------------------------------------------ plot structure\n", - "plot_structures = [\"per string\", \"per channel\"]\n", - "\n", - "# Create a dropdown widget\n", - "plot_structures_widget = widgets.Dropdown(\n", - " options=plot_structures, description=\"Plot structure:\"\n", - ")\n", - "\n", - "# ------------------------------------------------------------------------------------------ plot style\n", - "plot_styles = [\"vs time\", \"histogram\"]\n", - "\n", - "# Create a dropdown widget\n", - "plot_styles_widget = widgets.Dropdown(options=plot_styles, description=\"Plot style:\")\n", - "\n", - "# ------------------------------------------------------------------------------------------ resampling\n", - "resampled = [\"no\", \"only\", \"also\"]\n", - "\n", - "# Create a dropdown widget\n", - "resampled_widget = widgets.Dropdown(options=resampled, description=\"Resampled:\")\n", - "\n", - "\n", - "# ------------------------------------------------------------------------------------------ get one or all strings\n", - "strings = [1, 2, 3, 4, 5, 7, 8, 9, 10, 11, \"all\"]\n", - "\n", - "# Create a dropdown widget\n", - "strings_widget = widgets.Dropdown(options=strings, description=\"String:\")\n", - "\n", - "# ------------------------------------------------------------------------------------------ display widgets\n", - "display(evt_type_widget)\n", - "display(\n", - " param_widget\n", - ") # it takes a while before displaying available parameters in the corresponding widget\n", - "\n", - "# ------------------------------------------------------------------------------------------ get params (based on event type)\n", - "evt_type = evt_type_widget.value\n", - "# params = list(shelf[\"monitoring\"][evt_type].keys())\n", - "param_widget.options = [\"cuspEmax\"]\n", - "\n", - "print(\"\\033[91mIf you change me, then RUN AGAIN the next cell!!!\\033[0m\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "508896aa-8f5c-4bed-a731-bb9aeca61bef", - "metadata": {}, - "outputs": [], - "source": [ - "# ------------------------------------------------------------------------------------------ get dataframe\n", - "def display_param_value(*args):\n", - " selected_evt_type = evt_type_widget.value\n", - " selected_param = param_widget.value\n", - " print(\n", - " f\"You are going to plot '{selected_param}' for '{selected_evt_type}' events...\"\n", - " )\n", - " # get dataframe\n", - " df_param = shelf[\"monitoring\"][selected_evt_type][selected_param][\"df_geds\"]\n", - " # get plot info\n", - " plot_info = shelf[\"monitoring\"][selected_evt_type][selected_param][\"plot_info\"]\n", - "\n", - " return df_param, plot_info\n", - "\n", - "\n", - "df_param, plot_info = display_param_value()\n", - "print(f\"...data have beeng loaded!\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b836b69d-b7f5-4131-b6d5-26b637aed57b", - "metadata": {}, - "outputs": [], - "source": [ - "# ------------------------------------------------------------------------------------------ remove problematic dets in cal data\n", - "# df_param = df_param.set_index(\"name\")\n", - "# df_param = df_param.drop(['V01406A', 'V01415A', 'V01387A', 'P00665C', 'P00748B', 'P00748A', 'B00089D'])\n", - "# df_param = df_param.reset_index()" - ] - }, - { - "cell_type": "markdown", - "id": "f1c10c0f-9bed-400f-8174-c6d7e185648b", - "metadata": {}, - "source": [ - "# Plot data (select style and string)\n", - "For the selected parameter, choose the plot style (you can play with different data formats, plot structures, ... among the available ones).\n", - "\n", - "### Notes\n", - "1. I recommend using just **\"absolute values\" when plotting 'bl_std'** to see how noisy is each detector.\n", - "2. When you select **plot_style='histogram', you'll always plot NOT resampled values** (ie values for each timestamp entry). Indeed, if you choose different resampled options while keeping plot_style='histogram', nothing will change in plots.\n", - "4. **resampled='no'** means you look at each timestamp entry\n", - "5. **resampled='only'** means you look at each timestamp entry mediated over 1H time window ('1H' might change - in case, you can see what value was used for the resampling by printing ```print(plot_info['time_window'])``` (T=minutes, H=hours, D=days)\n", - "6. **resampled='also'** means you look at each timestamp entry mediated over 1H time window AND at each timestamp entry TOGETHER -> suggestion: use 'also' just when you choose plot_structures='per channel'; if you have selected 'per string', then you're not going to understand anything" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "084e9d36-1478-4833-96ff-555134e9a64c", - "metadata": {}, - "outputs": [], - "source": [ - "# ------------------------------------------------------------------------------------------ get plots\n", - "display(data_format_widget)\n", - "display(plot_structures_widget)\n", - "display(plot_styles_widget)\n", - "display(resampled_widget)\n", - "display(strings_widget)\n", - "print(\"\\033[91mIf you change me, then RUN AGAIN the next cell!!!\\033[0m\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2122008e-2a6c-49b6-8a81-d351c1bfd57e", - "metadata": {}, - "outputs": [], - "source": [ - "# set plotting options\n", - "plot_info[\"plot_style\"] = plot_styles_widget.value\n", - "plot_info[\"plot_structure\"] = plot_structures_widget.value\n", - "plot_info[\"resampled\"] = resampled_widget.value\n", - "plot_info[\"title\"] = \"\" # for plotting purposes\n", - "plot_info[\"subsystem\"] = \"\" # for plotting purposes\n", - "plot_info[\"std\"] = False\n", - "\n", - "df_to_plot = df_param\n", - "\n", - "# turn on the std when plotting individual channels together\n", - "if plot_info[\"plot_structure\"] == \"per channel\":\n", - " plot_info[\"std\"] = True\n", - "\n", - "if data_format_widget.value == \"absolute values\":\n", - " plot_info[\"parameter\"] = (\n", - " plot_info[\"parameter\"].split(\"_var\")[0]\n", - " if \"_var\" in plot_info[\"parameter\"]\n", - " else plot_info[\"parameter\"]\n", - " )\n", - " plot_info[\"limits\"] = utils.PLOT_INFO[plot_info[\"parameter\"]][\"limits\"][subsystem][\n", - " \"absolute\"\n", - " ]\n", - " plot_info[\"unit_label\"] = plot_info[\"unit\"]\n", - " if plot_info[\"parameter\"] not in df_to_plot:\n", - " print(\"There is no\", plot_info[\"parameter\"])\n", - " sys.exit(\"Stopping notebook.\")\n", - "if data_format_widget.value == \"% values\":\n", - " plot_info[\"parameter\"] = (\n", - " plot_info[\"parameter\"]\n", - " if \"_var\" in plot_info[\"parameter\"]\n", - " else plot_info[\"parameter\"] + \"_var\"\n", - " )\n", - " plot_info[\"limits\"] = utils.PLOT_INFO[plot_info[\"parameter\"].split(\"_var\")[0]][\n", - " \"limits\"\n", - " ][subsystem][\"variation\"]\n", - " plot_info[\"unit_label\"] = \"%\"\n", - " if plot_info[\"parameter\"] not in df_to_plot:\n", - " print(\"There is no\", plot_info[\"parameter\"])\n", - " sys.exit(\"Stopping notebook.\")\n", - "\n", - "print(f\"Making plots now...\")\n", - "if isinstance(strings_widget.value, str): # let's get all strings in output\n", - " for string in [1, 2, 3, 4, 5, 7, 8, 9, 10, 11]:\n", - " if plot_structures_widget.value == \"per channel\":\n", - " plotting.plot_per_ch(\n", - " df_to_plot[df_to_plot[\"location\"] == string], plot_info, \"\"\n", - " ) # plot one canvas per channel\n", - " elif plot_structures_widget.value == \"per string\":\n", - " plotting.plot_per_string(\n", - " df_to_plot[df_to_plot[\"location\"] == string], plot_info, \"\"\n", - " ) # plot one canvas per string\n", - "else: # let's get one string in output\n", - " if plot_structures_widget.value == \"per channel\":\n", - " plotting.plot_per_ch(\n", - " df_to_plot[df_to_plot[\"location\"] == strings_widget.value], plot_info, \"\"\n", - " ) # plot one canvas per channel\n", - " elif plot_structures_widget.value == \"per string\":\n", - " plotting.plot_per_string(\n", - " df_to_plot[df_to_plot[\"location\"] == strings_widget.value], plot_info, \"\"\n", - " ) # plot one canvas per string" - ] - }, - { - "cell_type": "markdown", - "id": "17542fbd-a2fb-4474-829a-adb0ef99aae3", - "metadata": { - "tags": [] - }, - "source": [ - "# Plot means vs channels\n", - "Here you can monitor how the **mean value (evaluated over the first 10% of data)** behaves separately for different channels, grouped by string. These mean values are the ones **used to compute percentage variations**. The average value displayed in the legend on the right of the plot generated below shows the average of mean values for a given string." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1c483113-044b-4b98-89fa-9596002a3752", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# ------------------------------------------------------------------------------------------ get means plot\n", - "plot_info[\"plot_style\"] = \"vs ch\"\n", - "plot_info[\"unit_label\"] = plot_info[\"unit\"]\n", - "plot_info[\"parameter\"] = (\n", - " plot_info[\"parameter\"].split(\"_var\")[0]\n", - " if \"_var\" in plot_info[\"parameter\"]\n", - " else plot_info[\"parameter\"]\n", - ")\n", - "plot_info[\"unit_label\"] = plot_info[\"unit\"]\n", - "data = df_param.drop(columns=[param_widget.value])\n", - "data = data.rename(columns={param_widget.value + \"_mean\": param_widget.value})\n", - "plotting.plot_array(data, plot_info, \"\")" - ] - } - ], - "metadata": {}, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/src/legend_data_monitor/subsystem.py b/src/legend_data_monitor/subsystem.py index 5d25acf..138c849 100644 --- a/src/legend_data_monitor/subsystem.py +++ b/src/legend_data_monitor/subsystem.py @@ -6,7 +6,7 @@ import numpy as np import pandas as pd -from legendmeta import LegendMetadata +from legendmeta import JsonDB from pygama.flow import DataLoader from . import utils @@ -472,13 +472,11 @@ def get_channel_map(self): utils.logger.info("... getting channel map") # ------------------------------------------------------------------------- - # load full channel map of this exp and period + # load full channel map of this exp and period (and version) # ------------------------------------------------------------------------- - lmeta = LegendMetadata() - full_channel_map = lmeta.hardware.configuration.channelmaps.on( - timestamp=self.first_timestamp - ) + map_file = os.path.join(self.path, "inputs/hardware/configuration/channelmaps") + full_channel_map = JsonDB(map_file).on(timestamp=self.first_timestamp) df_map = pd.DataFrame(columns=utils.COLUMNS_TO_LOAD) df_map = df_map.set_index("channel") @@ -655,11 +653,11 @@ def get_channel_status(self): utils.logger.info("... getting channel status") # ------------------------------------------------------------------------- - # load full status map of this time selection + # load full status map of this time selection (and version) # ------------------------------------------------------------------------- - lmeta = LegendMetadata() - full_status_map = lmeta.dataprod.config.on( + map_file = os.path.join(self.path, "inputs/dataprod/config") + full_status_map = JsonDB(map_file).on( timestamp=self.first_timestamp, system=self.datatype )["analysis"]