diff --git a/attic/auto_prod/README.md b/attic/auto_prod/README.md new file mode 100644 index 0000000..752653e --- /dev/null +++ b/attic/auto_prod/README.md @@ -0,0 +1,24 @@ +This basic example file can be used to automatically generate monitoring plots, based on new .lh5 dsp/hit files appearing in the production folders. Slow Control data are automatically retrieved from the database (you need to provide the port you are using to connect to the database together with the password you can find on Confluence). + +You need to specify the period and run you want to analyze in the script. You can then run the code through + +```console +$ python main_sync_code.py +``` + +The output text is saved in an output file called "output.log". + +You can run this command as a cronejob. Run + +```console +$ crontab -e +``` + +and add a new line of the type + +```console +0 */6 * * * rm output.log && python main_syc_code.py >> output.log 2>&1 +``` + +This will automatically look for new processed .lh5 files every 6 hours. +You need to specify all input and output folders within the script itself. diff --git a/attic/auto_prod/main_sync_code.py b/attic/auto_prod/main_sync_code.py new file mode 100644 index 0000000..d175e0c --- /dev/null +++ b/attic/auto_prod/main_sync_code.py @@ -0,0 +1,334 @@ +import os +import re +import json +import subprocess +from pathlib import Path +import monitoring +from legendmeta import LegendMetadata +lmeta = LegendMetadata() +from legend_data_monitor import utils + +# paths +auto_dir_path = "/data2/public/prodenv/prod-blind/tmp/auto" +rsync_path = "/data1/users/calgaro/rsync-env/output/" + +search_directory = f'{auto_dir_path}/generated/tier/dsp/phy' +def search_latest_folder(my_dir): + directories = [d for d in os.listdir(my_dir) if os.path.isdir(os.path.join(my_dir, d))] + directories.sort(key=lambda x: Path(my_dir, x).stat().st_ctime) + return directories[-1] + +# Period to monitor +period = "p07" # search_latest_folder(search_directory) +# Run to monitor +search_directory = os.path.join(search_directory, period) +run = search_latest_folder(search_directory) + +source_dir = f"{auto_dir_path}/generated/tier/hit/phy/{period}/{run}/" + +# commands to run the container +cmd = "apptainer run" +arg = "/data2/public/prodenv/containers/legendexp_legend-base_latest.sif" +output_folder = "/data1/users/calgaro/prod-ref-v2" #"auto_prova" + +# =========================================================================================== +# BEGINNING OF THE ANALYSIS +# =========================================================================================== +# Configs definition + +# define slow control dict +scdb = { + "output": output_folder, + "dataset": { + "experiment": "L200", + "period": period, + "version": "", + "path": auto_dir_path, + "type": "phy", + "runs": int(run.split('r')[-1]) + }, + "saving": "overwrite", + "slow_control": { + "parameters": [ + "DaqLeft-Temp1", + "DaqLeft-Temp2", + "DaqRight-Temp1", + "DaqRight-Temp2", + "RREiT", + "RRNTe", + "RRSTe", + "ZUL_T_RR" + ] + } +} +with open(f"{rsync_path}auto_slow_control.json", "w") as f: + json.dump(scdb, f) + +# define geds dict +my_config = { + "output": output_folder, + "dataset": { + "experiment": "L200", + "period": period, + "version": "", + "path": auto_dir_path, + "type": "phy", + "runs": int(run.split('r')[-1]) + }, + "saving": "append", + "subsystems": { + "geds": { + "Event rate in pulser events": { + "parameters": "event_rate", + "event_type": "pulser", + "plot_structure": "per string", + "resampled": "only", + "plot_style": "vs time", + "time_window": "20S" + }, + "Event rate in FCbsln events": { + "parameters": "event_rate", + "event_type": "FCbsln", + "plot_structure": "per string", + "resampled": "only", + "plot_style": "vs time", + "time_window": "20S" + }, + "Baselines (dsp/baseline) in pulser events": { + "parameters": "baseline", + "event_type": "pulser", + "plot_structure": "per string", + "resampled": "only", + "plot_style": "vs time", + "AUX_ratio": True, + "variation": True, + "time_window": "10T" + }, + "Baselines (dsp/baseline) in FCbsln events": { + "parameters": "baseline", + "event_type": "FCbsln", + "plot_structure": "per string", + "resampled": "only", + "plot_style": "vs time", + "variation": True, + "time_window": "10T" + }, + "Mean baselines (dsp/bl_mean) in pulser events": { + "parameters": "bl_mean", + "event_type": "pulser", + "plot_structure": "per string", + "resampled": "only", + "plot_style": "vs time", + "AUX_ratio": True, + "variation": True, + "time_window": "10T" + }, + "Mean baselines (dsp/bl_mean) in FCbsln events": { + "parameters": "bl_mean", + "event_type": "FCbsln", + "plot_structure": "per string", + "resampled": "only", + "plot_style": "vs time", + "variation": True, + "time_window": "10T" + }, + "Uncalibrated gain (dsp/cuspEmax) in pulser events": { + "parameters": "cuspEmax", + "event_type": "pulser", + "plot_structure": "per string", + "resampled": "only", + "plot_style": "vs time", + "AUX_ratio": True, + "variation": True, + "time_window": "10T" + }, + "Uncalibrated gain (dsp/cuspEmax) in FCbsln events": { + "parameters": "cuspEmax", + "event_type": "FCbsln", + "plot_structure": "per string", + "resampled": "only", + "plot_style": "vs time", + "AUX_ratio": True, + "variation": True, + "time_window": "10T" + }, + "Calibrated gain (hit/cuspEmax_ctc_cal) in pulser events": { + "parameters": "cuspEmax_ctc_cal", + "event_type": "pulser", + "plot_structure": "per string", + "resampled": "only", + "plot_style": "vs time", + "variation": True, + "time_window": "10T" + }, + "Calibrated gain (hit/cuspEmax_ctc_cal) in FCbsln events": { + "parameters": "cuspEmax_ctc_cal", + "event_type": "FCbsln", + "plot_structure": "per string", + "resampled": "only", + "plot_style": "vs time", + "variation": True, + "time_window": "10T" + }, + "Noise (dsp/bl_std) in pulser events": { + "parameters": "bl_std", + "event_type": "pulser", + "plot_structure": "per string", + "resampled": "only", + "plot_style": "vs time", + "AUX_ratio": True, + "variation": True, + "time_window": "10T" + }, + "Noise (dsp/bl_std) in FCbsln events": { + "parameters": "bl_std", + "event_type": "FCbsln", + "plot_structure": "per string", + "resampled": "only", + "plot_style": "vs time", + "AUX_ratio": True, + "variation": True, + "time_window": "10T" + }, + "A/E (from dsp) in pulser events": { + "parameters": "AoE_Custom", + "event_type": "pulser", + "plot_structure": "per string", + "resampled": "only", + "plot_style": "vs time", + "variation": True, + "time_window": "10T" + }, + "A/E (from dsp) in FCbsln events": { + "parameters": "AoE_Custom", + "event_type": "FCbsln", + "plot_structure": "per string", + "resampled": "only", + "plot_style": "vs time", + "variation": True, + "time_window": "10T" + } + } + } +} +with open(f"{rsync_path}auto_config.json", "w") as f: + json.dump(my_config, f) + +# =========================================================================================== +# Get not-analyzed files +# =========================================================================================== + +# File to store the timestamp of the last check +timestamp_file = f'{rsync_path}last_checked_{period}_{run}.txt' + +# Read the last checked timestamp +last_checked = None +if os.path.exists(timestamp_file): + with open(timestamp_file, 'r') as file: + last_checked = file.read().strip() + +# Get the current timestamp +current_files = os.listdir(source_dir) +new_files = [] + +# Compare the timestamps of files and find new files +for file in current_files: + file_path = os.path.join(source_dir, file) + if last_checked is None or os.path.getmtime(file_path) > float(last_checked): + new_files.append(file) + +# If new files are found, check if they are ok or not +if new_files: + pattern = r"\d+" + correct_files = [] + + for new_file in new_files: + matches = re.findall(pattern, new_file) + # get only files with correct ending (and discard the ones that are still under processing) + if len(matches) == 6: + correct_files.append(new_file) + + new_files = correct_files + +# =========================================================================================== +# Analyze not-analyzed files +# =========================================================================================== + +# If new files are found, run the shell command +if new_files: + # Replace this command with your desired shell command + command = 'echo New files found: \033[91m{}\033[0m'.format(' '.join(new_files)) + subprocess.run(command, shell=True) + + # create the file containing the keys with correct format to be later used by legend-data-monitor (it must be created every time with the new keys; NOT APPEND) + utils.logger.debug("\nCreating the file containing the keys to inspect...") + with open(f'{rsync_path}new_keys.filekeylist', 'w') as f: + for new_file in new_files: + new_file = new_file.split('-tier')[0] + f.write(new_file + '\n') + utils.logger.debug("...done!") + + # ...run the plot production + utils.logger.debug("\nRunning the generation of plots...") + config_file = f"{rsync_path}auto_config.json" + keys_file = f"{rsync_path}new_keys.filekeylist" + + bash_command = f"{cmd} --cleanenv {arg} ~/.local/bin/legend-data-monitor user_rsync_prod --config {config_file} --keys {keys_file}" + utils.logger.debug(f"...running command \033[95m{bash_command}\033[0m") + subprocess.run(bash_command, shell=True) + utils.logger.debug("...done!") + + # =========================================================================================== + # Analyze Slow Control data (for the full run - overwrite of previous info) + # =========================================================================================== + # run slow control data retrieving + utils.logger.debug("\nRetrieving Slow Control data...") + scdb_config_file = f"{rsync_path}auto_slow_control.json" + + bash_command = f"{cmd} --cleanenv {arg} ~/.local/bin/legend-data-monitor user_scdb --config {scdb_config_file} --port 8282 --pswd THE_PASSWORD" + utils.logger.debug(f"...running command \033[92m{bash_command}\033[0m") + subprocess.run(bash_command, shell=True) + utils.logger.debug("...SC done!") + +# Update the last checked timestamp +with open(timestamp_file, 'w') as file: + file.write(str(os.path.getmtime(max([os.path.join(source_dir, file) for file in current_files], key=os.path.getmtime)))) + +# =========================================================================================== +# Generate Static Plots (eg gain monitoring) +# =========================================================================================== + +# create monitoring-plots folder +mtg_folder = os.path.join(output_folder, 'generated/mtg') +if not os.path.exists(mtg_folder): + os.makedirs(mtg_folder) + utils.logger.debug(f"Folder '{mtg_folder}' created.") +mtg_folder = os.path.join(mtg_folder, 'phy') +if not os.path.exists(mtg_folder): + os.makedirs(mtg_folder) + utils.logger.debug(f"Folder '{mtg_folder}' created.") + +# define dataset depending on the (latest) monitored period/run +avail_runs = sorted(os.listdir(os.path.join(mtg_folder.replace('mtg', 'plt'), period))) +dataset = { + period: avail_runs +} +utils.logger.debug(f'This is the dataset: {dataset}') + +# get first timestamp of first run of the given period +start_key = (sorted(os.listdir(os.path.join(search_directory, avail_runs[0])))[0]).split('-')[4] +meta = LegendMetadata("/data2/public/prodenv/prod-blind/tmp/auto/inputs/") +# get channel map +chmap = meta.channelmap(start_key) +# get string info +str_chns = {} +for string in range(13): + if string in [0, 6]: continue + channels = [f"ch{chmap[ged].daq.rawid}" for ged, dic in chmap.items() if dic["system"]=='geds' and dic["analysis"]["processable"]==True and dic["location"]["string"]==string] + if len(channels)>0: + str_chns[string] = channels + +# get pulser monitoring plot for a full period +phy_mtg_data = mtg_folder.replace('mtg', 'plt') +if dataset[period] != []: + monitoring.stability(phy_mtg_data, mtg_folder, dataset, chmap, str_chns, 1, False) \ No newline at end of file diff --git a/attic/auto_prod/monitoring.py b/attic/auto_prod/monitoring.py new file mode 100644 index 0000000..2efbdbd --- /dev/null +++ b/attic/auto_prod/monitoring.py @@ -0,0 +1,385 @@ +# +# Big part of the code made by William Quinn - this is an adaptation to read auto monitoring hdf files for phy data +# and automatically create monitoring plots that'll be lared uploaded in the dashboard. +# !!! this is not taking account of global pulser spike tagging +# + +import matplotlib +import matplotlib.pyplot as plt +from matplotlib import cycler, patches +from matplotlib.colors import LogNorm +import os, json +import lgdo.lh5_store as lh5 +import numpy as np +from lgdo import ls , show +from legendmeta import LegendMetadata +import pandas as pd +import h5py + +from tqdm.notebook import tqdm + +IPython_default = plt.rcParams.copy() +SMALL_SIZE = 8 +MEDIUM_SIZE = 10 +BIGGER_SIZE = 12 + +figsize = (4.5, 3) + +plt.rc('font', size=SMALL_SIZE) # controls default text sizes +plt.rc('axes', titlesize=SMALL_SIZE) # fontsize of the axes title +plt.rc('axes', labelsize=SMALL_SIZE) # fontsize of the x and y labels +plt.rc('xtick', labelsize=SMALL_SIZE) # fontsize of the tick labels +plt.rc('ytick', labelsize=SMALL_SIZE) # fontsize of the tick labels +plt.rc('legend', fontsize=SMALL_SIZE) # legend fontsize +plt.rc('figure', titlesize=SMALL_SIZE) # fontsize of the figure title +plt.rcParams["font.family"] = "serif" + +matplotlib.rcParams['mathtext.fontset'] = 'stix' +#matplotlib.rcParams['font.family'] = 'STIXGeneral' + +marker_size = 2 +line_width = 0.5 +cap_size = 0.5 +cap_thick = 0.5 + +# colors = cycler('color', ['b', 'g', 'r', 'm', 'y', 'k', 'c', '#8c564b']) +plt.rc('axes', facecolor='white', edgecolor='black', + axisbelow=True, grid=True) + +def get_calib_pars(period, run_list, channel, partition, escale=2039, fit='linear', path='/data2/public/prodenv/prod-blind/tmp/auto'):#'/data2/public/prodenv/prod-blind/ref/v02.00'): + sto = lh5.LH5Store() + + calib_data = { + 'fep': [], + 'cal_const': [], + 'run_start': [], + 'run_end': [], + 'res': [], + 'res_quad': [] + } + + tier = 'pht' if partition is True else 'hit' + key_result = 'partition_ecal' if partition is True else 'ecal' + + for run in run_list: + prod_ref = path + timestamp = os.listdir(f'{path}/generated/par/{tier}/cal/{period}/{run}')[-1].split('-')[-2] + if tier == 'pht': + pars = json.load( + open( + f'{path}/generated/par/{tier}/cal/{period}/{run}/l200-{period}-{run}-cal-{timestamp}-par_{tier}.json', + 'r') + ) + else: + pars = json.load( + open( + f'{path}/generated/par/{tier}/cal/{period}/{run}/l200-{period}-{run}-cal-{timestamp}-par_{tier}_results.json', + 'r') + ) + + # for FEP peak, we want to look at the behaviour over time --> take 'ecal' results (not partition ones!) + if tier == 'pht': + try: + fep_peak_pos = pars[channel]['results']['ecal']['cuspEmax_ctc_cal']['pk_fits']['2614.5']['parameters_in_ADC']['mu'] + fep_gain = fep_peak_pos/2614.5 + except: + fep_peak_pos=0 + fep_gain=0 + else: + try: + fep_peak_pos = pars[channel]['ecal']['cuspEmax_ctc_cal']['peak_fit_pars']['2614.5'][1] + fep_gain = fep_peak_pos/2614.5 + except: + fep_peak_pos=0 + fep_gain=0 + + if tier == 'pht': + try: + if fit == "linear": + Qbb_fwhm = pars[channel]['results'][key_result]['cuspEmax_ctc_cal']['eres_linear']['Qbb_fwhm(keV)'] + Qbb_fwhm_quad = pars[channel]['results'][key_result]['cuspEmax_ctc_cal']['eres_quadratic']['Qbb_fwhm(keV)'] + else: + Qbb_fwhm = pars[channel]['results'][key_result]['cuspEmax_ctc_cal']['eres_quadratic']['Qbb_fwhm(keV)'] + except: + Qbb_fwhm = np.nan + else: + try: + Qbb_fwhm = pars[channel][key_result]['cuspEmax_ctc_cal']['Qbb_fwhm'] + Qbb_fwhm_quad = np.nan + except: + Qbb_fwhm = np.nan + Qbb_fwhm_quad = np.nan + + pars = json.load( + open( + f'{path}/generated/par/{tier}/cal/{period}/{run}/l200-{period}-{run}-cal-{timestamp}-par_{tier}.json', + 'r') + ) + + if tier == 'pht': + try: + cal_const_a = pars[channel]['pars']['operations']['cuspEmax_ctc_cal']['parameters']['a'] + cal_const_b = pars[channel]['pars']['operations']['cuspEmax_ctc_cal']['parameters']['b'] + cal_const_c = pars[channel]['pars']['operations']['cuspEmax_ctc_cal']['parameters']['c'] + fep_cal = cal_const_c + fep_peak_pos * cal_const_b + cal_const_a * fep_peak_pos ** 2 + except: + fep_cal = np.nan + else: + try: + cal_const_a = pars[channel]['operations']['cuspEmax_ctc_cal']['parameters']['a'] + cal_const_b = pars[channel]['operations']['cuspEmax_ctc_cal']['parameters']['b'] + if period in ['p07'] or (period == 'p06' and run == 'r005'): + cal_const_c = pars[channel]['operations']['cuspEmax_ctc_cal']['parameters']['c'] + fep_cal = cal_const_c + fep_peak_pos * cal_const_b + cal_const_a * fep_peak_pos ** 2 + else: + fep_cal = cal_const_b + cal_const_a * fep_peak_pos + except: + fep_cal = np.nan + + if run not in os.listdir(f'{prod_ref}/generated/tier/dsp/phy/{period}'): + # get timestamp for additional-final cal run (only for FEP gain display) + run_files = sorted(os.listdir(f'{prod_ref}/generated/tier/dsp/cal/{period}/{run}/')) + run_end_time = pd.to_datetime( + sto.read_object("ch1027201/dsp/timestamp", + f'{prod_ref}/generated/tier/dsp/cal/{period}/{run}/' + run_files[-1])[0][-1], + unit='s' + ) + run_start_time = run_end_time + Qbb_fwhm = np.nan + Qbb_fwhm_quad = np.nan + else: + run_files = sorted(os.listdir(f'{prod_ref}/generated/tier/dsp/phy/{period}/{run}/')) + run_start_time = pd.to_datetime( + sto.read_object("ch1027201/dsp/timestamp", + f'{prod_ref}/generated/tier/dsp/phy/{period}/{run}/' + run_files[0])[0][0], + unit='s' + ) + run_end_time = pd.to_datetime( + sto.read_object("ch1027201/dsp/timestamp", + f'{prod_ref}/generated/tier/dsp/phy/{period}/{run}/' + run_files[-1])[0][-1], + unit='s' + ) + + calib_data['fep'].append(fep_gain) + calib_data['cal_const'].append(fep_cal) + calib_data['run_start'].append(run_start_time) + calib_data['run_end'].append(run_end_time) + calib_data['res'].append(Qbb_fwhm) + calib_data['res_quad'].append(Qbb_fwhm_quad) + + print(channel, calib_data['res']) + + for key, item in calib_data.items(): calib_data[key] = np.array(item) + + init_cal_const, init_fep = 0, 0 + for cal_, fep_ in zip(calib_data['cal_const'], calib_data['fep']): + if init_fep == 0 and fep_ != 0: init_fep = fep_ + if init_cal_const == 0 and cal_ != 0: init_cal_const = cal_ + + if init_cal_const == 0: + calib_data['cal_const_diff'] = np.array([np.nan for i in range(len(calib_data['cal_const']))]) + else: + calib_data['cal_const_diff'] = (calib_data['cal_const'] - init_cal_const)/init_cal_const * escale + + if init_fep == 0: + calib_data['fep_diff'] = np.array([np.nan for i in range(len(calib_data['fep']))]) + else: + calib_data['fep_diff'] = (calib_data['fep'] - init_fep)/init_fep * escale + + return calib_data + +def custom_resampler(group, min_required_data_points=100): + if len(group) >= min_required_data_points: + return group + else: + return None + +def get_dfs(phy_mtg_data, period, run_list): + phy_mtg_data = os.path.join(phy_mtg_data, period) + runs = os.listdir(phy_mtg_data) + geds_df_cuspEmax_abs = pd.DataFrame() + geds_df_cuspEmax_var = pd.DataFrame() + geds_df_cuspEmax_abs_corr = pd.DataFrame() + geds_df_cuspEmax_var_corr = pd.DataFrame() + puls_df_cuspEmax_abs = pd.DataFrame() + puls_df_cuspEmax_var = pd.DataFrame() + + for r in runs: + # keep only specified runs + if r not in run_list: + continue + files = os.listdir(os.path.join(phy_mtg_data, r)) + # get only geds files + hdf_geds = [f for f in files if "hdf" in f and "geds" in f] + if len(hdf_geds) == 0: + return None, None, None + hdf_geds = os.path.join(phy_mtg_data, r, hdf_geds[0]) # should be 1 + # get only puls files + hdf_puls = [f for f in files if "hdf" in f and "pulser01ana" in f] + hdf_puls = os.path.join(phy_mtg_data, r, hdf_puls[0]) # should be 1 + + # GEDS DATA ======================================================================================================== + geds_abs = pd.read_hdf(hdf_geds, key=f'IsPulser_Cuspemax') + geds_df_cuspEmax_abs = pd.concat([geds_df_cuspEmax_abs, geds_abs], ignore_index=False, axis=0) + # GEDS PULS-CORRECTED DATA ========================================================================================= + geds_puls_abs = pd.read_hdf(hdf_geds, key=f'IsPulser_Cuspemax_pulser01anaDiff') + geds_df_cuspEmax_abs_corr = pd.concat([geds_df_cuspEmax_abs_corr, geds_puls_abs], ignore_index=False, axis=0) + # PULS DATA ======================================================================================================== + puls_abs = pd.read_hdf(hdf_puls, key=f'IsPulser_Cuspemax') + puls_df_cuspEmax_abs = pd.concat([puls_df_cuspEmax_abs, puls_abs], ignore_index=False, axis=0) + + return geds_df_cuspEmax_abs, geds_df_cuspEmax_abs_corr, puls_df_cuspEmax_abs + +def get_pulser_data(period, dfs, channel, escale): + + ser_pul_cusp = dfs[2][1027203] # selection of pulser channel + ser_ged_cusp = dfs[0][channel] # selection of ged channel + + ser_ged_cusp = ser_ged_cusp.dropna() + ser_pul_cusp = ser_pul_cusp.loc[ser_ged_cusp.index] + hour_counts = ser_pul_cusp.resample("H").count() >= 100 + + ged_cusp_av = np.average(ser_ged_cusp.values[:360]) # switch to first 10% of available time interval? + pul_cusp_av = np.average(ser_pul_cusp.values[:360]) + # first entries of dataframe are NaN ... how to solve it? + if np.isnan(ged_cusp_av): + print('the average is a nan') + print(ser_pul_cusp_without_nan) + return None + + ser_ged_cuspdiff = pd.Series((ser_ged_cusp.values - ged_cusp_av)/ged_cusp_av, index=ser_ged_cusp.index.values).dropna() + ser_pul_cuspdiff = pd.Series((ser_pul_cusp.values - pul_cusp_av)/pul_cusp_av, index=ser_pul_cusp.index.values).dropna() + ser_ged_cuspdiff_kev = pd.Series(ser_ged_cuspdiff*escale, index=ser_ged_cuspdiff.index.values) + ser_pul_cuspdiff_kev = pd.Series(ser_pul_cuspdiff*escale, index=ser_pul_cuspdiff.index.values) + + #is_valid = (df_ged.tp_0_est < 5e4) & (df_ged.tp_0_est > 4.8e4) & (df_ged.trapTmax > 200) # global pulser removal (these columns are not present in our dfs) + + ged_cusp_hr_av_ = ser_ged_cuspdiff_kev.resample('H').mean() + ged_cusp_hr_av_[~hour_counts.values] = np.nan + ged_cusp_hr_std = ser_ged_cuspdiff_kev.resample('H').std() + ged_cusp_hr_std[~hour_counts.values] = np.nan + pul_cusp_hr_av_ = ser_pul_cuspdiff_kev.resample('H').mean() + pul_cusp_hr_av_[~hour_counts.values] = np.nan + pul_cusp_hr_std = ser_pul_cuspdiff_kev.resample('H').std() + pul_cusp_hr_std[~hour_counts.values] = np.nan + + ged_cusp_corr = ser_ged_cuspdiff - ser_pul_cuspdiff + ged_cusp_corr = pd.Series(ged_cusp_corr[ser_ged_cuspdiff.index.values]) + ged_cusp_corr_kev = ged_cusp_corr*escale + ged_cusp_corr_kev = pd.Series(ged_cusp_corr_kev[ged_cusp_corr.index.values]) + ged_cusp_cor_hr_av_ = ged_cusp_corr_kev.resample('H').mean() + ged_cusp_cor_hr_av_[~hour_counts.values] = np.nan + ged_cusp_cor_hr_std = ged_cusp_corr_kev.resample('H').std() + ged_cusp_cor_hr_std[~hour_counts.values] = np.nan + + return { + 'ged': { + 'cusp': ser_ged_cusp, + 'cuspdiff': ser_ged_cuspdiff, + 'cuspdiff_kev': ser_ged_cuspdiff_kev, + 'cusp_av': ged_cusp_hr_av_, + 'cusp_std': ged_cusp_hr_std + }, + 'pul_cusp': { + 'raw': ser_pul_cusp, + 'rawdiff': ser_pul_cuspdiff, + 'kevdiff': ser_pul_cuspdiff_kev, + 'kevdiff_av': pul_cusp_hr_av_, + 'kevdiff_std': pul_cusp_hr_std + }, + 'diff': { + 'raw': None, + 'rawdiff': ged_cusp_corr, + 'kevdiff': ged_cusp_corr_kev, + 'kevdiff_av': ged_cusp_cor_hr_av_, + 'kevdiff_std': ged_cusp_cor_hr_std + } + } + + +def stability(phy_mtg_data, output_folder, dataset, chmap, str_chns, xlim_idx, partition=False, quadratic=False, zoom=True): + + period_list = list(dataset.keys()) + for index_i in tqdm(range(len(period_list))): + period = period_list[index_i] + run_list = dataset[period] + + geds_df_cuspEmax_abs, geds_df_cuspEmax_abs_corr, puls_df_cuspEmax_abs = get_dfs(phy_mtg_data, period, run_list) + if geds_df_cuspEmax_abs is None or geds_df_cuspEmax_abs_corr is None or puls_df_cuspEmax_abs is None: + continue + dfs = [geds_df_cuspEmax_abs, geds_df_cuspEmax_abs_corr, puls_df_cuspEmax_abs] + + string_list = list(str_chns.keys()) + for index_j in tqdm(range(len(string_list))): + string = string_list[index_j] + + channel_list = str_chns[string] + do_channel = True + for index_k in range(len(channel_list)): + channel = channel_list[index_k] + pulser_data = get_pulser_data(period, dfs, int(channel.split('ch')[-1]), escale=2039) + if pulser_data is None: + continue + + fig, ax = plt.subplots(figsize=(12,4)) + + pars_data = get_calib_pars(period, run_list, channel, partition, escale=2039) + + if channel != 'ch1120004': + + # plt.plot(pulser_data['ged']['cusp_av'], 'C0', label='GED') + plt.plot(pulser_data['pul_cusp']['kevdiff_av'], 'C2', label='PULS01') + plt.plot(pulser_data['diff']['kevdiff_av'], 'C4', label='GED corrected') + + plt.fill_between( + pulser_data['diff']['kevdiff_av'].index.values, + y1=[float(i) - float(j) for i, j in zip(pulser_data['diff']['kevdiff_av'].values, pulser_data['diff']['kevdiff_std'].values)], + y2=[float(i) + float(j) for i, j in zip(pulser_data['diff']['kevdiff_av'].values, pulser_data['diff']['kevdiff_std'].values)], + color='k', alpha=0.2, label=r'±1$\sigma$' + ) + + plt.plot(pars_data['run_start'] - pd.Timedelta(hours=5), pars_data['fep_diff'], 'kx', label='FEP gain') + plt.plot(pars_data['run_start'] - pd.Timedelta(hours=5), pars_data['cal_const_diff'], 'rx', label='cal. const. diff') + + for ti in pars_data['run_start']: plt.axvline(ti, color='k') + + t0 = pars_data['run_start'] + for i in range(len(t0)): + if i == len(pars_data['run_start'])-1: + plt.plot([t0[i], t0[i] + pd.Timedelta(days=7)], [pars_data['res'][i]/2, pars_data['res'][i]/2], 'b-') + plt.plot([t0[i], t0[i] + pd.Timedelta(days=7)], [-pars_data['res'][i]/2, -pars_data['res'][i]/2], 'b-') + if quadratic: + plt.plot([t0[i], t0[i] + pd.Timedelta(days=7)], [pars_data['res_quad'][i]/2, pars_data['res_quad'][i]/2], 'y-') + plt.plot([t0[i], t0[i] + pd.Timedelta(days=7)], [-pars_data['res_quad'][i]/2, -pars_data['res_quad'][i]/2], 'y-') + else: + plt.plot([t0[i], t0[i+1]], [pars_data['res'][i]/2, pars_data['res'][i]/2], 'b-') + plt.plot([t0[i], t0[i+1]], [-pars_data['res'][i]/2, -pars_data['res'][i]/2], 'b-') + if quadratic: + plt.plot([t0[i], t0[i+1]], [pars_data['res_quad'][i]/2, pars_data['res_quad'][i]/2], 'y-') + plt.plot([t0[i], t0[i+1]], [-pars_data['res_quad'][i]/2, -pars_data['res_quad'][i]/2], 'y-') + if str(pars_data['res'][i]/2*1.1) != 'nan' and i`_ instructions. +Data are loaded following the ``pylegendmeta`` tutorial , which shows how to inspect the database. + + +... put here some text on how to specify the plotting of a SC parameter in the config file (no ideas for the moment)... + + +Files are collected in the output folder specified in the ``output`` config entry: + +.. code-block:: json + + { + "output": "/out", + // ... + +In principle, for plotting the SC data you would need just the start and the end of a time interval of interest. This means that SC data does not depend on any dataset info (``experiment``, ``period``, ``version``, ``type``) but ``time_selection``. +However, there are cases were we want to inspect a given run or time period made of keys as we usually do with germanium. + +In the first case, we end up saving data in the following folder: + +.. code-block:: + + /out/ + └── generated + └── plt + └── SC + └── + ├── SC-.pdf + ├── SC-.log + └── SC-.{dat,bak,dir} + +Otherwise, we store the SC data/plots as usual: + +.. code-block:: + + /out/ + └── generated + └── plt + └── + └── + └── SC + └── + ├── SC-.pdf + ├── SC-.log + └── SC-.{dat,bak,dir} + + +.. note:: + + ``time_selection`` can assume one of the following formats, depending on what we put as a time range into ``dataset``: + + - if ``{'start': '20220928T080000Z', 'end': '20220928T093000Z'}`` (start + end), then = ``20220928T080000Z_20220928T093000Z``; + - if ``{'timestamps': ['20230207T103123Z']}`` (one key), then = ``20230207T103123Z``; + - if ``{'timestamps': ['20230207T103123Z', '20230207T141123Z', '20230207T083323Z']}`` (multiple keys), then = ``20230207T083323Z_20230207T141123Z`` (min/max timestamp interval) + - if ``{'runs': 1}`` (one run), then = ``r001``; + - if ``{'runs': [1, 2, 3]}`` (multiple runs), then = ``r001_r002_r003``. + +Shelve output objects +~~~~~~~~~~~~~~~~~~~~~ +*Under construction...* + + +Available SC parameters +----------------------- + +Available parameters include: + +- ``PT114``, ``PT115``, ``PT118`` (cryostat pressures) +- ``PT202``, ``PT205``, ``PT208`` (cryostat vacuum) +- ``LT01`` (water loop fine fill level) +- ``RREiT`` (injected air temperature clean room), ``RRNTe`` (clean room temperature north), ``RRSTe`` (clean room temperature south), ``ZUL_T_RR`` (supply air temperature clean room) +- ``DaqLeft-Temp1``, ``DaqLeft-Temp2``, ``DaqRight-Temp1``, ``DaqRight-Temp2`` (rack present temperatures) diff --git a/notebook/L200-plotting-concatenate_runs_periods.ipynb b/notebook/L200-plotting-concatenate_runs_periods.ipynb new file mode 100644 index 0000000..dca5c67 --- /dev/null +++ b/notebook/L200-plotting-concatenate_runs_periods.ipynb @@ -0,0 +1,569 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "308b2266-c882-465f-89d0-c6ffe46e1b08", + "metadata": {}, + "source": [ + "### Introduction\n", + "\n", + "This notebook helps to have a first look at the saved output, reading into hdf files. It helps to concatenate more runs and more periods, one after the other. It is helpful to monitor the system over a larger period of time usually set as a run.\n", + "\n", + "It works after having installed the repo 'legend-data-monitor'. In particular, after the cloning, enter into the folder and install the package by typing\n", + "\n", + "```console\n", + "foo@bar:~$ pip install .\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "acd13756-4007-4cda-bed2-3ee1b6056d15", + "metadata": {}, + "source": [ + "# Select period to inspect" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5de1e10c-b02d-45eb-9088-3e8103b3cbff", + "metadata": {}, + "outputs": [], + "source": [ + "# ------------------------------------------------------------------------------------------ which data do you want to read? CHANGE ME!\n", + "subsystem = \"geds\" # KEEP 'geds' for the moment\n", + "folder = \"prod-ref-v2\" # you can change me\n", + "version = \"\" # leave an empty string if you're looking at p03 data\n", + "periods = [\n", + " \"p06\"\n", + "] # one or more, eg = sorted(os.listdir(f\"/data1/users/calgaro/{folder}/generated/plt/phy/\"))\n", + "\n", + "# ------------------------------------------------------------------------------------------ remove detectors from the plots\n", + "# do you want to remove some detectors? If so, put here their names (or empty list if you want everything included)\n", + "to_be_excluded = (\n", + " []\n", + ") # [\"V01406A\", \"V01415A\", \"V01387A\", \"P00665C\", \"P00748B\", \"P00748A\", \"B00089D\"]" + ] + }, + { + "cell_type": "markdown", + "id": "ab6a56d1-ec1e-4162-8b41-49e8df7b5f16", + "metadata": {}, + "source": [ + "# Select event type, parameter and original or PULS01ANA-rescaled values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3348d46-78a7-4be3-80de-a88610d88f00", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# ------------------------------------------------------------------------------------------ ...from here, you don't need to change anything in the code\n", + "import os\n", + "import json\n", + "import sys\n", + "import h5py\n", + "import shelve\n", + "import matplotlib\n", + "import pandas as pd\n", + "import numpy as np\n", + "import ipywidgets as widgets\n", + "from IPython.display import display\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.patches import Rectangle\n", + "from legend_data_monitor import plot_styles, plotting, utils\n", + "import legend_data_monitor as ldm\n", + "\n", + "%matplotlib widget\n", + "\n", + "# ------------------------------------------------------------------------------------------ select one data file\n", + "# hypothesis: being these files under the same production folder, we expect them to contain the same keys - if not, an error will appear\n", + "run = sorted(\n", + " os.listdir(f\"/data1/users/calgaro/{folder}/generated/plt/phy/{periods[0]}/\")\n", + ")[0]\n", + "if version == \"\":\n", + " data_file = f\"/data1/users/calgaro/{folder}/generated/plt/phy/{periods[0]}/{run}/l200-{periods[0]}-{run}-phy-{subsystem}.hdf\"\n", + "else:\n", + " data_file = f\"/data1/users/calgaro/{folder}/{version}/generated/plt/phy/{periods[0]}/{run}/l200-{periods[0]}-{run}-phy-{subsystem}.hdf\"\n", + "\n", + "# ------------------------------------------------------------------------------------------ building channel map\n", + "# this is period/run dependent, but for now it was kept equal among p03-p06\n", + "dataset = {\n", + " \"experiment\": \"L200\",\n", + " \"period\": periods[0],\n", + " \"type\": \"phy\",\n", + " \"version\": version,\n", + " \"path\": \"/data2/public/prodenv/prod-blind/tmp/auto\",\n", + " \"runs\": int(run[1:]),\n", + "}\n", + "\n", + "geds = ldm.Subsystem(f\"{subsystem}\", dataset=dataset)\n", + "channel_map = geds.channel_map\n", + "\n", + "for det in to_be_excluded:\n", + " channel_map = channel_map[channel_map.name != det]\n", + "\n", + "# ------------------------------------------------------------------------------------------ load data\n", + "# Load the hdf file\n", + "hdf_file = h5py.File(data_file, \"r\")\n", + "keys = list(hdf_file.keys())\n", + "hdf_file.close()\n", + "\n", + "# available flags - get the list of available event types\n", + "event_types = list(set([key.split(\"_\")[0] for key in keys]))\n", + "\n", + "# Create a dropdown widget for the event type\n", + "evt_type_widget = widgets.Dropdown(options=event_types, description=\"Event Type:\")\n", + "\n", + "\n", + "# ------------------------------------------------------------------------------------------ parameter\n", + "# Define a function to update the parameter dropdown based on the selected event type\n", + "def update_params(*args):\n", + " selected_evt_type = evt_type_widget.value\n", + " params = list(\n", + " set(\n", + " [\n", + " key.split(\"_\")[1]\n", + " for key in keys\n", + " if key.split(\"_\")[0] == selected_evt_type\n", + " ]\n", + " )\n", + " )\n", + " param_widget.options = params\n", + "\n", + "\n", + "# Call the update_params function when the event type is changed\n", + "evt_type_widget.observe(update_params, \"value\")\n", + "\n", + "# Create a dropdown widget for the parameter\n", + "param_widget = widgets.Dropdown(description=\"Parameter:\")\n", + "\n", + "# ------------------------------------------------------------------------------------------ data format\n", + "data_format = [\"absolute values\", \"% values\"]\n", + "\n", + "# Create a dropdown widget\n", + "data_format_widget = widgets.Dropdown(options=data_format, description=\"data format:\")\n", + "\n", + "# ------------------------------------------------------------------------------------------ plot structure\n", + "plot_structures = [\"per string\", \"per channel\"]\n", + "\n", + "# Create a dropdown widget\n", + "plot_structures_widget = widgets.Dropdown(\n", + " options=plot_structures, description=\"Plot structure:\"\n", + ")\n", + "\n", + "# ------------------------------------------------------------------------------------------ plot style\n", + "plot_styles = [\"vs time\", \"histogram\"]\n", + "\n", + "# Create a dropdown widget\n", + "plot_styles_widget = widgets.Dropdown(options=plot_styles, description=\"Plot style:\")\n", + "\n", + "# ------------------------------------------------------------------------------------------ resampling\n", + "resampled = [\"no\", \"only\", \"also\"]\n", + "\n", + "# Create a dropdown widget\n", + "resampled_widget = widgets.Dropdown(options=resampled, description=\"Resampled:\")\n", + "\n", + "\n", + "# ------------------------------------------------------------------------------------------ get one or all strings\n", + "if subsystem == \"geds\":\n", + " strings_widg = [1, 2, 3, 4, 5, 7, 8, 9, 10, 11, \"all\"]\n", + "if subsystem == \"pulser01ana\":\n", + " strings_widg = [-1]\n", + "\n", + "# Create a dropdown widget\n", + "strings_widget = widgets.Dropdown(options=strings_widg, description=\"String:\")\n", + "\n", + "\n", + "# ------------------------------------------------------------------------------------------ display widgets\n", + "display(evt_type_widget)\n", + "display(param_widget)\n", + "\n", + "# ------------------------------------------------------------------------------------------ get params (based on event type)\n", + "evt_type = evt_type_widget.value\n", + "params = list(set([key.split(\"_\")[1] for key in keys if key.split(\"_\")[0] == evt_type]))\n", + "param_widget.options = params\n", + "\n", + "\n", + "aux_widget = widgets.Dropdown(description=\"Options:\")\n", + "print(\n", + " \"Pick the way you want to include PULS01ANA info\\n(this is not available for EventRate, CuspEmaxCtcCal \\nand AoECustom; in this case, select None):\"\n", + ")\n", + "display(aux_widget)\n", + "\n", + "aux_info = [\"pulser01anaRatio\", \"pulser01anaDiff\", \"None\"]\n", + "aux_dict = {\n", + " \"pulser01anaRatio\": f\"Ratio: {subsystem} / PULS01ANA\",\n", + " \"pulser01anaDiff\": f\"Difference: {subsystem} - PULS01ANA\",\n", + " \"None\": f\"None (ie just plain {subsystem} data)\",\n", + "}\n", + "aux_info = [aux_dict[info] for info in aux_info]\n", + "aux_widget.options = aux_info\n", + "\n", + "print(\"\\033[91mIf you change me, then RUN AGAIN the next cell!!!\\033[0m\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "508896aa-8f5c-4bed-a731-bb9aeca61bef", + "metadata": {}, + "outputs": [], + "source": [ + "def to_None(string):\n", + " return None if string == \"None\" else string\n", + "\n", + "\n", + "# ------------------------------------------------------------------------------------------ get dataframe\n", + "def display_param_value(*args):\n", + " selected_evt_type = evt_type_widget.value\n", + " selected_param = param_widget.value\n", + " selected_aux_info = aux_widget.value\n", + " print(\n", + " f\"You are going to plot '{selected_param}' for '{selected_evt_type}' events...\"\n", + " )\n", + "\n", + " df_info = pd.DataFrame()\n", + " df_param_orig = pd.DataFrame()\n", + " df_param_var = pd.DataFrame()\n", + " df_param_mean = pd.DataFrame()\n", + "\n", + " for period in periods:\n", + " runs = sorted(\n", + " os.listdir(f\"/data1/users/calgaro/{folder}/generated/plt/phy/{period}/\")\n", + " )\n", + "\n", + " for run in runs:\n", + " if version == \"\":\n", + " data_file = f\"/data1/users/calgaro/{folder}/generated/plt/phy/{period}/{run}/l200-{period}-{run}-phy-{subsystem}.hdf\"\n", + " else:\n", + " data_file = f\"/data1/users/calgaro/{folder}/{version}/generated/plt/phy/{period}/{run}/l200-{period}-{run}-phy-{subsystem}.hdf\"\n", + "\n", + " # some info\n", + " key = f\"{selected_evt_type}_{selected_param}\"\n", + " df_info = pd.read_hdf(data_file, f\"{key}_info\")\n", + "\n", + " if \"None\" not in selected_aux_info:\n", + " # Iterate over the dictionary items\n", + " for k, v in aux_dict.items():\n", + " if v == selected_aux_info:\n", + " option = k\n", + " break\n", + " key = f\"{selected_evt_type}_{selected_param}_{option}\"\n", + "\n", + " # get dataframe\n", + " tmp_df_param_orig = pd.read_hdf(data_file, f\"{key}\")\n", + " tmp_df_param_var = pd.read_hdf(data_file, f\"{key}_var\")\n", + " tmp_df_param_mean = pd.read_hdf(data_file, f\"{key}_mean\")\n", + "\n", + " df_param_orig = pd.concat([df_param_orig, tmp_df_param_orig])\n", + " df_param_var = pd.concat([df_param_var, tmp_df_param_var])\n", + " df_param_mean = pd.concat([df_param_mean, tmp_df_param_mean])\n", + "\n", + " print(f\"...{period}-{run}: loaded!\")\n", + "\n", + " return df_param_orig, df_param_var, df_param_mean, df_info\n", + "\n", + "\n", + "df_param_orig, df_param_var, df_param_mean, df_info = display_param_value()\n", + "print(f\"...data have been loaded!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bff94f92-e85b-4fa8-b82f-46deab8d4773", + "metadata": {}, + "outputs": [], + "source": [ + "# ---------------------------------------------------------------------------------- get back the usual df shape for legend-data-monitor plots\n", + "pivot_table = df_param_orig.copy()\n", + "pivot_table.reset_index(inplace=True)\n", + "new_df = pd.melt(\n", + " pivot_table, id_vars=[\"datetime\"], var_name=\"channel\", value_name=\"value\"\n", + ")\n", + "new_df_param_orig = new_df.copy().merge(channel_map, on=\"channel\")\n", + "\n", + "pivot_table_var = df_param_var.copy()\n", + "pivot_table_var.reset_index(inplace=True)\n", + "new_df_var = pd.melt(\n", + " pivot_table_var, id_vars=[\"datetime\"], var_name=\"channel\", value_name=\"value\"\n", + ")\n", + "new_df_param_var = new_df_var.copy().merge(channel_map, on=\"channel\")\n", + "\n", + "\n", + "# ---------------------------------------------------------------------------------- remove global spikes (if you are looking at cuspEmax)\n", + "# remove global spikes events by selecting their amplitude\n", + "if \"Cusp\" in param_widget.value:\n", + " new_df_param_orig = new_df_param_orig.loc[new_df_param_var[\"value\"] > -10]\n", + " new_df_param_var = new_df_param_var.loc[new_df_param_var[\"value\"] > -10]\n", + " print(\"--> global spikes were removed from cusp plot (threshold: +-10%)!\")\n", + "\n", + "# ---------------------------------------------------------------------------------- recalculate % variation wrt new mean value for all channels\n", + "channel_list = new_df_param_var[\"channel\"].unique()\n", + "channel_df = pd.DataFrame()\n", + "\"\"\"\n", + "for ch in channel_list:\n", + " channel_df = pd.DataFrame()\n", + " new_ch_var = pd.DataFrame()\n", + "\n", + " channel_df = (\n", + " new_df_param_orig[new_df_param_orig[\"channel\"] == ch]\n", + " .sort_values(by=\"datetime\")\n", + " .copy()\n", + " )\n", + " channel_mean = channel_df[\"value\"].iloc[0 : int(0.1 * len(channel_df))].mean()\n", + " new_ch_var = (channel_df[\"value\"] - channel_mean) / channel_mean * 100\n", + " new_df_param_var.loc[\n", + " new_df_param_var[\"channel\"] == ch, \"value\"\n", + " ] = new_ch_var\n", + "\"\"\"\n", + "print(\n", + " \"...% variations were calculated again over the larger time window (mute me if you don't want to keep run-oriented % variations)!\"\n", + ")\n", + "\n", + "\n", + "# ---------------------------------------------------------------------------------- change column names (again, needed for legend-data-monitor plots)\n", + "def convert_to_original_format(camel_case_string: str) -> str:\n", + " \"\"\"Convert a camel case string to its original format.\"\"\"\n", + " original_string = \"\"\n", + " for i, char in enumerate(camel_case_string):\n", + " if char.isupper() and i > 0:\n", + " original_string += \"_\" + char.lower()\n", + " else:\n", + " original_string += char.lower()\n", + "\n", + " return original_string\n", + "\n", + "\n", + "new_df_param_orig = (new_df_param_orig.copy()).rename(\n", + " columns={\n", + " \"value\": convert_to_original_format(param_widget.value)\n", + " if param_widget.value != \"BlMean\"\n", + " else param_widget.value\n", + " }\n", + ")\n", + "new_df_param_var = (new_df_param_var.copy()).rename(\n", + " columns={\n", + " \"value\": convert_to_original_format(param_widget.value) + \"_var\"\n", + " if param_widget.value != \"BlMean\"\n", + " else param_widget.value + \"_var\"\n", + " }\n", + ")\n", + "\n", + "print(\"...data have been formatted to the right structure!\")" + ] + }, + { + "cell_type": "markdown", + "id": "f1c10c0f-9bed-400f-8174-c6d7e185648b", + "metadata": { + "tags": [] + }, + "source": [ + "# Plot data\n", + "For the selected parameter, choose the plot style (you can play with different data formats, plot structures, ... among the available ones).\n", + "\n", + "### Notes\n", + "1. When you select **plot_style='histogram', you'll always plot NOT resampled values** (ie values for each timestamp entry). Indeed, if you choose different resampled options while keeping plot_style='histogram', nothing will change in plots.\n", + "2. **resampled='no'** means you look at each timestamp entry\n", + "3. **resampled='only'** means you look at each timestamp entry mediated over 1H time window (use the button to resampled according to your needs; available options: 1min, 5min, 10min, 30min, 60min)\n", + "4. **resampled='also'** means you look at each timestamp entry mediated over 1H time window AND at each timestamp entry TOGETHER -> suggestion: use 'also' just when you choose plot_structures='per channel'; if you have selected 'per string', then you're not going to understand anything" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6fde51f-89b0-49f8-82ed-74d24235cbe0", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Define the time interval options\n", + "time_intervals = [\"1min\", \"5min\", \"10min\", \"30min\", \"60min\"]\n", + "\n", + "# Create RadioButtons with circular style\n", + "radio_buttons = widgets.RadioButtons(\n", + " options=time_intervals,\n", + " button_style=\"circle\",\n", + " description=\"\\t\",\n", + " layout={\"width\": \"max-content\"},\n", + ")\n", + "\n", + "# Create a label widget to display the selected time interval\n", + "selected_interval_label = widgets.Label()\n", + "\n", + "\n", + "# Define a callback function for button selection\n", + "def on_button_selected(change):\n", + " selected_interval_label.value = change.new\n", + "\n", + "\n", + "# Assign the callback function to the RadioButtons\n", + "radio_buttons.observe(on_button_selected, names=\"value\")\n", + "\n", + "# Create a horizontal box to contain the RadioButtons and label\n", + "box_layout = widgets.Layout(display=\"flex\", flex_flow=\"row\", align_items=\"center\")\n", + "container_resampling = widgets.HBox(\n", + " [radio_buttons, selected_interval_label], layout=box_layout\n", + ")\n", + "\n", + "# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "# Define the time interval options\n", + "answer = [\"no\", \"yes\"]\n", + "\n", + "# Create RadioButtons with circular style\n", + "limits_buttons = widgets.RadioButtons(\n", + " options=answer,\n", + " button_style=\"circle\",\n", + " description=\"\\t\",\n", + " layout={\"width\": \"max-content\"},\n", + ")\n", + "\n", + "# Assign the callback function to the RadioButtons\n", + "limits_buttons.observe(on_button_selected, names=\"value\")\n", + "\n", + "# Create a horizontal box to contain the RadioButtons and label\n", + "container_limits = widgets.HBox(\n", + " [limits_buttons, selected_interval_label], layout=box_layout\n", + ")\n", + "\n", + "# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "# Create text input boxes for min and max values\n", + "min_input = widgets.FloatText(\n", + " description=\"Min y-axis:\", layout=widgets.Layout(width=\"150px\")\n", + ")\n", + "max_input = widgets.FloatText(\n", + " description=\"Max y-axis:\", layout=widgets.Layout(width=\"150px\")\n", + ")\n", + "\n", + "# ------------------------------------------------------------------------------------------ get plots\n", + "display(data_format_widget)\n", + "display(plot_structures_widget)\n", + "display(plot_styles_widget)\n", + "display(strings_widget)\n", + "display(resampled_widget)\n", + "\n", + "print(\"Chose resampling time among the available options:\")\n", + "display(container_resampling)\n", + "\n", + "print(\"Do you want to display horizontal lines for limits in the plots?\")\n", + "display(container_limits)\n", + "\n", + "print(\"Set y-axis range; use min=0=max if you don't want to use any fixed range:\")\n", + "display(widgets.VBox([min_input, max_input]))\n", + "\n", + "print(\"\\033[91mIf you change me, then RUN AGAIN the next cell!!!\\033[0m\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2122008e-2a6c-49b6-8a81-d351c1bfd57e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# set plotting options\n", + "plot_info = {\n", + " \"unit\": df_info.loc[\"unit\", \"Value\"],\n", + " \"label\": df_info.loc[\"label\", \"Value\"],\n", + " \"lower_lim_var\": float(df_info.loc[\"lower_lim_var\", \"Value\"])\n", + " if limits_buttons.value == \"yes\"\n", + " and to_None(df_info.loc[\"lower_lim_var\", \"Value\"]) is not None\n", + " else None,\n", + " \"upper_lim_var\": float(df_info.loc[\"upper_lim_var\", \"Value\"])\n", + " if limits_buttons.value == \"yes\"\n", + " and to_None(df_info.loc[\"upper_lim_var\", \"Value\"]) is not None\n", + " else None,\n", + " \"lower_lim_abs\": float(df_info.loc[\"lower_lim_abs\", \"Value\"])\n", + " if limits_buttons.value == \"yes\"\n", + " and to_None(df_info.loc[\"lower_lim_abs\", \"Value\"]) is not None\n", + " else None,\n", + " \"upper_lim_abs\": float(df_info.loc[\"upper_lim_abs\", \"Value\"])\n", + " if limits_buttons.value == \"yes\"\n", + " and to_None(df_info.loc[\"upper_lim_abs\", \"Value\"]) is not None\n", + " else None,\n", + " \"plot_style\": plot_styles_widget.value,\n", + " \"plot_structure\": plot_structures_widget.value,\n", + " \"resampled\": resampled_widget.value,\n", + " \"title\": \"\",\n", + " \"subsystem\": \"\",\n", + " \"std\": False,\n", + " \"locname\": {\n", + " \"geds\": \"string\",\n", + " \"spms\": \"fiber\",\n", + " \"pulser\": \"puls\",\n", + " \"pulser01ana\": \"pulser01ana\",\n", + " \"FCbsln\": \"FC bsln\",\n", + " \"muon\": \"muon\",\n", + " }[subsystem],\n", + " \"range\": [min_input.value, max_input.value]\n", + " if min_input.value < max_input.value\n", + " else [None, None],\n", + " \"event_type\": None,\n", + " \"unit_label\": \"%\"\n", + " if data_format_widget.value == \"% values\"\n", + " else df_info.loc[\"unit\", \"Value\"],\n", + " \"parameters\": \"\",\n", + " \"time_window\": radio_buttons.value.split(\"min\")[0] + \"T\",\n", + "}\n", + "\n", + "\n", + "# turn on the std when plotting individual channels together\n", + "if plot_info[\"plot_structure\"] == \"per channel\":\n", + " plot_info[\"std\"] = True\n", + "\n", + "if data_format_widget.value == \"absolute values\":\n", + " plot_info[\"limits\"] = [plot_info[\"lower_lim_abs\"], plot_info[\"upper_lim_abs\"]]\n", + " plot_info[\"parameter\"] = (\n", + " convert_to_original_format(param_widget.value)\n", + " if param_widget.value != \"BlMean\"\n", + " else param_widget.value\n", + " )\n", + " df_to_plot = new_df_param_orig.copy()\n", + "if data_format_widget.value == \"% values\":\n", + " plot_info[\"limits\"] = [plot_info[\"lower_lim_var\"], plot_info[\"upper_lim_var\"]]\n", + " plot_info[\"parameter\"] = (\n", + " convert_to_original_format(param_widget.value) + \"_var\"\n", + " if param_widget.value != \"BlMean\"\n", + " else param_widget.value + \"_var\"\n", + " )\n", + " df_to_plot = new_df_param_var.copy()\n", + "\n", + "print(f\"Making plots now...\")\n", + "\n", + "if isinstance(strings_widget.value, str): # let's get all strings in output\n", + " strings = strings_widg.remove(\"all\")\n", + " for string in strings:\n", + " if plot_structures_widget.value == \"per channel\":\n", + " plotting.plot_per_ch(\n", + " df_to_plot[df_to_plot[\"location\"] == string], plot_info, \"\"\n", + " ) # plot one canvas per channel\n", + " elif plot_structures_widget.value == \"per string\":\n", + " plotting.plot_per_string(\n", + " df_to_plot[df_to_plot[\"location\"] == string], plot_info, \"\"\n", + " ) # plot one canvas per string\n", + "else: # let's get one string in output\n", + " if plot_structures_widget.value == \"per channel\":\n", + " plotting.plot_per_ch(\n", + " df_to_plot[df_to_plot[\"location\"] == strings_widget.value], plot_info, \"\"\n", + " ) # plot one canvas per channel\n", + " elif plot_structures_widget.value == \"per string\":\n", + " plotting.plot_per_string(\n", + " df_to_plot[df_to_plot[\"location\"] == strings_widget.value], plot_info, \"\"\n", + " ) # plot one canvas per string" + ] + } + ], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebook/L200-plotting-individual-runs.ipynb b/notebook/L200-plotting-individual-runs.ipynb new file mode 100644 index 0000000..6a8cdfa --- /dev/null +++ b/notebook/L200-plotting-individual-runs.ipynb @@ -0,0 +1,682 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "308b2266-c882-465f-89d0-c6ffe46e1b08", + "metadata": {}, + "source": [ + "### Introduction\n", + "\n", + "This notebook helps to have a first look at the saved output, reading into hdf files. \n", + "\n", + "It works after having installed the repo 'legend-data-monitor'. In particular, after the cloning, enter into the folder and install the package by typing\n", + "\n", + "```console\n", + "foo@bar:~$ pip install .\n", + "```" + ] + }, + { + "cell_type": "markdown", + "id": "acd13756-4007-4cda-bed2-3ee1b6056d15", + "metadata": {}, + "source": [ + "# Select run to inspect" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5de1e10c-b02d-45eb-9088-3e8103b3cbff", + "metadata": {}, + "outputs": [], + "source": [ + "# ------------------------------------------------------------------------------------------ which data do you want to read? CHANGE ME!\n", + "run = \"r003\" # r000, r001, ...\n", + "subsystem = \"geds\" # KEEP 'geds' for the moment\n", + "folder = \"prod-ref-v2\" # you can change me\n", + "period = \"p06\"\n", + "version = \"\" # leave an empty string if you're looking at p03 data\n", + "\n", + "# ------------------------------------------------------------------------------------------ remove detectors from the plots\n", + "# do you want to remove some detectors? If so, put here their names (or empty list if you want everything included)\n", + "to_be_excluded = (\n", + " []\n", + ") # [\"V01406A\", \"V01415A\", \"V01387A\", \"P00665C\", \"P00748B\", \"P00748A\", \"B00089D\"]" + ] + }, + { + "cell_type": "markdown", + "id": "ab6a56d1-ec1e-4162-8b41-49e8df7b5f16", + "metadata": {}, + "source": [ + "# Select event type, parameter and original or PULS01ANA-rescaled values" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3348d46-78a7-4be3-80de-a88610d88f00", + "metadata": {}, + "outputs": [], + "source": [ + "# ------------------------------------------------------------------------------------------ ...from here, you don't need to change anything in the code\n", + "import sys\n", + "import h5py\n", + "import shelve\n", + "import matplotlib\n", + "import pandas as pd\n", + "import numpy as np\n", + "import ipywidgets as widgets\n", + "from IPython.display import display\n", + "from matplotlib import pyplot as plt\n", + "from matplotlib.patches import Rectangle\n", + "from legend_data_monitor import plot_styles, plotting, utils\n", + "import legend_data_monitor as ldm\n", + "\n", + "%matplotlib widget\n", + "\n", + "if version == \"\":\n", + " data_file = f\"/data1/users/calgaro/{folder}/generated/plt/phy/{period}/{run}/l200-{period}-{run}-phy-{subsystem}.hdf\"\n", + "else:\n", + " data_file = f\"/data1/users/calgaro/{folder}/{version}/generated/plt/phy/{period}/{run}/l200-{period}-{run}-phy-{subsystem}.hdf\"\n", + "\n", + "# ------------------------------------------------------------------------------------------ building channel map\n", + "dataset = {\n", + " \"experiment\": \"L200\",\n", + " \"period\": period,\n", + " \"type\": \"phy\",\n", + " \"version\": version,\n", + " \"path\": \"/data2/public/prodenv/prod-blind/tmp/auto\",\n", + " \"runs\": int(run[1:]),\n", + "}\n", + "\n", + "geds = ldm.Subsystem(\"geds\", dataset=dataset)\n", + "channel_map = geds.channel_map\n", + "\n", + "# remove probl dets\n", + "for det in to_be_excluded:\n", + " channel_map = channel_map[channel_map.name != det]\n", + "\n", + "\n", + "# ------------------------------------------------------------------------------------------ load data\n", + "# Load the hdf file\n", + "hdf_file = h5py.File(data_file, \"r\")\n", + "keys = list(hdf_file.keys())\n", + "hdf_file.close()\n", + "\n", + "# available flags - get the list of available event types\n", + "event_types = list(set([key.split(\"_\")[0] for key in keys]))\n", + "\n", + "# Create a dropdown widget for the event type\n", + "evt_type_widget = widgets.Dropdown(options=event_types, description=\"Event Type:\")\n", + "\n", + "\n", + "# ------------------------------------------------------------------------------------------ parameter\n", + "# Define a function to update the parameter dropdown based on the selected event type\n", + "def update_params(*args):\n", + " selected_evt_type = evt_type_widget.value\n", + " params = list(\n", + " set(\n", + " [\n", + " key.split(\"_\")[1]\n", + " for key in keys\n", + " if key.split(\"_\")[0] == selected_evt_type\n", + " ]\n", + " )\n", + " )\n", + " param_widget.options = params\n", + "\n", + "\n", + "# Call the update_params function when the event type is changed\n", + "evt_type_widget.observe(update_params, \"value\")\n", + "\n", + "# Create a dropdown widget for the parameter\n", + "param_widget = widgets.Dropdown(description=\"Parameter:\")\n", + "\n", + "# ------------------------------------------------------------------------------------------ data format\n", + "data_format = [\"absolute values\", \"% values\"]\n", + "\n", + "# Create a dropdown widget\n", + "data_format_widget = widgets.Dropdown(options=data_format, description=\"data format:\")\n", + "\n", + "# ------------------------------------------------------------------------------------------ plot structure\n", + "plot_structures = [\"per string\", \"per channel\"]\n", + "\n", + "# Create a dropdown widget\n", + "plot_structures_widget = widgets.Dropdown(\n", + " options=plot_structures, description=\"Plot structure:\"\n", + ")\n", + "\n", + "# ------------------------------------------------------------------------------------------ plot style\n", + "plot_styles = [\"vs time\", \"histogram\"]\n", + "\n", + "# Create a dropdown widget\n", + "plot_styles_widget = widgets.Dropdown(options=plot_styles, description=\"Plot style:\")\n", + "\n", + "# ------------------------------------------------------------------------------------------ resampling\n", + "resampled = [\"no\", \"only\", \"also\"]\n", + "\n", + "# Create a dropdown widget\n", + "resampled_widget = widgets.Dropdown(options=resampled, description=\"Resampled:\")\n", + "\n", + "\n", + "# ------------------------------------------------------------------------------------------ get one or all strings\n", + "strings = [1, 2, 3, 4, 5, 7, 8, 9, 10, 11, \"all\"]\n", + "\n", + "# Create a dropdown widget\n", + "strings_widget = widgets.Dropdown(options=strings, description=\"String:\")\n", + "\n", + "# ------------------------------------------------------------------------------------------ display widgets\n", + "display(evt_type_widget)\n", + "display(param_widget)\n", + "\n", + "# ------------------------------------------------------------------------------------------ get params (based on event type)\n", + "evt_type = evt_type_widget.value\n", + "params = list(set([key.split(\"_\")[1] for key in keys if key.split(\"_\")[0] == evt_type]))\n", + "param_widget.options = params\n", + "\n", + "\n", + "aux_widget = widgets.Dropdown(description=\"Options:\")\n", + "print(\n", + " \"Pick the way you want to include PULS01ANA info\\n(this is not available for EventRate, CuspEmaxCtcCal \\nand AoECustom; in this case, select None):\"\n", + ")\n", + "display(aux_widget)\n", + "\n", + "aux_info = [\"pulser01anaRatio\", \"pulser01anaDiff\", \"None\"]\n", + "aux_dict = {\n", + " \"pulser01anaRatio\": f\"Ratio: {subsystem} / PULS01ANA\",\n", + " \"pulser01anaDiff\": f\"Difference: {subsystem} - PULS01ANA\",\n", + " \"None\": f\"None (ie just plain {subsystem} data)\",\n", + "}\n", + "aux_info = [aux_dict[info] for info in aux_info]\n", + "aux_widget.options = aux_info\n", + "\n", + "print(\"\\033[91mIf you change me, then RUN AGAIN the next cell!!!\\033[0m\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "508896aa-8f5c-4bed-a731-bb9aeca61bef", + "metadata": {}, + "outputs": [], + "source": [ + "def to_None(string):\n", + " return None if string == \"None\" else string\n", + "\n", + "\n", + "# ------------------------------------------------------------------------------------------ get dataframe\n", + "def display_param_value(*args):\n", + " selected_evt_type = evt_type_widget.value\n", + " selected_param = param_widget.value\n", + " selected_aux_info = aux_widget.value\n", + " print(\n", + " f\"You are going to plot '{selected_param}' for '{selected_evt_type}' events...\"\n", + " )\n", + "\n", + " key = f\"{selected_evt_type}_{selected_param}\"\n", + " print(key)\n", + " print(selected_aux_info)\n", + " # some info\n", + " df_info = pd.read_hdf(data_file, f\"{key}_info\")\n", + "\n", + " if \"None\" not in selected_aux_info:\n", + " # Iterate over the dictionary items\n", + " for k, v in aux_dict.items():\n", + " if v == selected_aux_info:\n", + " option = k\n", + " break\n", + " key += f\"_{option}\"\n", + "\n", + " # get dataframe\n", + " df_param_orig = pd.read_hdf(data_file, f\"{key}\")\n", + " df_param_var = pd.read_hdf(data_file, f\"{key}_var\")\n", + " df_param_mean = pd.read_hdf(data_file, f\"{key}_mean\")\n", + "\n", + " return df_param_orig, df_param_var, df_param_mean, df_info\n", + "\n", + "\n", + "df_param_orig, df_param_var, df_param_mean, df_info = display_param_value()\n", + "print(f\"...data have beeng loaded!\")\n", + "\n", + "\n", + "pivot_table = df_param_orig.copy()\n", + "pivot_table.reset_index(inplace=True)\n", + "new_df = pd.melt(\n", + " pivot_table, id_vars=[\"datetime\"], var_name=\"channel\", value_name=\"value\"\n", + ")\n", + "new_df_param_orig = new_df.copy().merge(channel_map, on=\"channel\")\n", + "\n", + "pivot_table_var = df_param_var.copy()\n", + "pivot_table_var.reset_index(inplace=True)\n", + "new_df_var = pd.melt(\n", + " pivot_table_var, id_vars=[\"datetime\"], var_name=\"channel\", value_name=\"value\"\n", + ")\n", + "new_df_param_var = new_df_var.copy().merge(channel_map, on=\"channel\")\n", + "\n", + "\n", + "def convert_to_original_format(camel_case_string: str) -> str:\n", + " \"\"\"Convert a camel case string to its original format.\"\"\"\n", + " original_string = \"\"\n", + " for i, char in enumerate(camel_case_string):\n", + " if char.isupper() and i > 0:\n", + " original_string += \"_\" + char.lower()\n", + " else:\n", + " original_string += char.lower()\n", + "\n", + " return original_string\n", + "\n", + "\n", + "new_df_param_orig = (new_df_param_orig.copy()).rename(\n", + " columns={\n", + " \"value\": convert_to_original_format(param_widget.value)\n", + " if param_widget.value != \"BlMean\"\n", + " else param_widget.value\n", + " }\n", + ")\n", + "new_df_param_var = (new_df_param_var.copy()).rename(\n", + " columns={\n", + " \"value\": convert_to_original_format(param_widget.value) + \"_var\"\n", + " if param_widget.value != \"BlMean\"\n", + " else param_widget.value + \"_var\"\n", + " }\n", + ")\n", + "\n", + "print(\"...data have been formatted to the right structure!\")" + ] + }, + { + "cell_type": "markdown", + "id": "f1c10c0f-9bed-400f-8174-c6d7e185648b", + "metadata": {}, + "source": [ + "# Plot data\n", + "For the selected parameter, choose the plot style (you can play with different data formats, plot structures, ... among the available ones).\n", + "\n", + "### Notes\n", + "1. When you select **plot_style='histogram', you'll always plot NOT resampled values** (ie values for each timestamp entry). Indeed, if you choose different resampled options while keeping plot_style='histogram', nothing will change in plots.\n", + "2. **resampled='no'** means you look at each timestamp entry\n", + "3. **resampled='only'** means you look at each timestamp entry mediated over 1H time window (use the button to resampled according to your needs; available options: 1min, 5min, 10min, 30min, 60min)\n", + "4. **resampled='also'** means you look at each timestamp entry mediated over 1H time window AND at each timestamp entry TOGETHER -> suggestion: use 'also' just when you choose plot_structures='per channel'; if you have selected 'per string', then you're not going to understand anything" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6fde51f-89b0-49f8-82ed-74d24235cbe0", + "metadata": {}, + "outputs": [], + "source": [ + "# Define the time interval options\n", + "time_intervals = [\"1min\", \"5min\", \"10min\", \"30min\", \"60min\"]\n", + "\n", + "# Create RadioButtons with circular style\n", + "radio_buttons = widgets.RadioButtons(\n", + " options=time_intervals,\n", + " button_style=\"circle\",\n", + " description=\"\\t\",\n", + " layout={\"width\": \"max-content\"},\n", + ")\n", + "\n", + "# Create a label widget to display the selected time interval\n", + "selected_interval_label = widgets.Label()\n", + "\n", + "\n", + "# Define a callback function for button selection\n", + "def on_button_selected(change):\n", + " selected_interval_label.value = change.new\n", + "\n", + "\n", + "# Assign the callback function to the RadioButtons\n", + "radio_buttons.observe(on_button_selected, names=\"value\")\n", + "\n", + "# Create a horizontal box to contain the RadioButtons and label\n", + "box_layout = widgets.Layout(display=\"flex\", flex_flow=\"row\", align_items=\"center\")\n", + "container_resampling = widgets.HBox(\n", + " [radio_buttons, selected_interval_label], layout=box_layout\n", + ")\n", + "\n", + "# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "# Define the time interval options\n", + "answer = [\"no\", \"yes\"]\n", + "\n", + "# Create RadioButtons with circular style\n", + "limits_buttons = widgets.RadioButtons(\n", + " options=answer,\n", + " button_style=\"circle\",\n", + " description=\"\\t\",\n", + " layout={\"width\": \"max-content\"},\n", + ")\n", + "\n", + "# Assign the callback function to the RadioButtons\n", + "limits_buttons.observe(on_button_selected, names=\"value\")\n", + "\n", + "# Create a horizontal box to contain the RadioButtons and label\n", + "container_limits = widgets.HBox(\n", + " [limits_buttons, selected_interval_label], layout=box_layout\n", + ")\n", + "\n", + "# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", + "# Create text input boxes for min and max values\n", + "min_input = widgets.FloatText(\n", + " description=\"Min y-axis:\", layout=widgets.Layout(width=\"150px\")\n", + ")\n", + "max_input = widgets.FloatText(\n", + " description=\"Max y-axis:\", layout=widgets.Layout(width=\"150px\")\n", + ")\n", + "\n", + "# ------------------------------------------------------------------------------------------ get plots\n", + "display(data_format_widget)\n", + "display(plot_structures_widget)\n", + "display(plot_styles_widget)\n", + "display(strings_widget)\n", + "display(resampled_widget)\n", + "\n", + "print(\"Chose resampling time among the available options:\")\n", + "display(container_resampling)\n", + "\n", + "print(\"Do you want to display horizontal lines for limits in the plots?\")\n", + "display(container_limits)\n", + "\n", + "print(\"Set y-axis range; use min=0=max if you don't want to use any fixed range:\")\n", + "display(widgets.VBox([min_input, max_input]))\n", + "\n", + "print(\"\\033[91mIf you change me, then RUN AGAIN the next cell!!!\\033[0m\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2122008e-2a6c-49b6-8a81-d351c1bfd57e", + "metadata": {}, + "outputs": [], + "source": [ + "# set plotting options\n", + "plot_info = {\n", + " \"unit\": df_info.loc[\"unit\", \"Value\"],\n", + " \"label\": df_info.loc[\"label\", \"Value\"],\n", + " \"lower_lim_var\": float(df_info.loc[\"lower_lim_var\", \"Value\"])\n", + " if limits_buttons.value == \"yes\"\n", + " and to_None(df_info.loc[\"lower_lim_var\", \"Value\"]) is not None\n", + " else None,\n", + " \"upper_lim_var\": float(df_info.loc[\"upper_lim_var\", \"Value\"])\n", + " if limits_buttons.value == \"yes\"\n", + " and to_None(df_info.loc[\"upper_lim_var\", \"Value\"]) is not None\n", + " else None,\n", + " \"lower_lim_abs\": float(df_info.loc[\"lower_lim_abs\", \"Value\"])\n", + " if limits_buttons.value == \"yes\"\n", + " and to_None(df_info.loc[\"lower_lim_abs\", \"Value\"]) is not None\n", + " else None,\n", + " \"upper_lim_abs\": float(df_info.loc[\"upper_lim_abs\", \"Value\"])\n", + " if limits_buttons.value == \"yes\"\n", + " and to_None(df_info.loc[\"upper_lim_abs\", \"Value\"]) is not None\n", + " else None,\n", + " \"plot_style\": plot_styles_widget.value,\n", + " \"plot_structure\": plot_structures_widget.value,\n", + " \"resampled\": resampled_widget.value,\n", + " \"title\": \"\",\n", + " \"subsystem\": \"\",\n", + " \"std\": False,\n", + " \"locname\": {\n", + " \"geds\": \"string\",\n", + " \"spms\": \"fiber\",\n", + " \"pulser\": \"puls\",\n", + " \"pulser01ana\": \"pulser01ana\",\n", + " \"FCbsln\": \"FC bsln\",\n", + " \"muon\": \"muon\",\n", + " }[subsystem],\n", + " \"range\": [min_input.value, max_input.value]\n", + " if min_input.value < max_input.value\n", + " else [None, None],\n", + " \"event_type\": None,\n", + " \"unit_label\": \"%\"\n", + " if data_format_widget.value == \"% values\"\n", + " else df_info.loc[\"unit\", \"Value\"],\n", + " \"parameters\": \"\",\n", + " \"time_window\": radio_buttons.value.split(\"min\")[0] + \"T\",\n", + "}\n", + "\n", + "\n", + "# turn on the std when plotting individual channels together\n", + "if plot_info[\"plot_structure\"] == \"per channel\":\n", + " plot_info[\"std\"] = True\n", + "\n", + "if data_format_widget.value == \"absolute values\":\n", + " plot_info[\"limits\"] = [plot_info[\"lower_lim_abs\"], plot_info[\"upper_lim_abs\"]]\n", + " plot_info[\"parameter\"] = (\n", + " convert_to_original_format(param_widget.value)\n", + " if param_widget.value != \"BlMean\"\n", + " else param_widget.value\n", + " )\n", + " df_to_plot = new_df_param_orig.copy()\n", + "if data_format_widget.value == \"% values\":\n", + " plot_info[\"limits\"] = [plot_info[\"lower_lim_var\"], plot_info[\"upper_lim_var\"]]\n", + " plot_info[\"parameter\"] = (\n", + " convert_to_original_format(param_widget.value) + \"_var\"\n", + " if param_widget.value != \"BlMean\"\n", + " else param_widget.value + \"_var\"\n", + " )\n", + " df_to_plot = new_df_param_var.copy()\n", + "\n", + "print(f\"Making plots now...\")\n", + "\n", + "if isinstance(strings_widget.value, str): # let's get all strings in output\n", + " for string in [1, 2, 3, 4, 5, 7, 8, 9, 10, 11]:\n", + " if plot_structures_widget.value == \"per channel\":\n", + " plotting.plot_per_ch(\n", + " df_to_plot[df_to_plot[\"location\"] == string], plot_info, \"\"\n", + " ) # plot one canvas per channel\n", + " elif plot_structures_widget.value == \"per string\":\n", + " plotting.plot_per_string(\n", + " df_to_plot[df_to_plot[\"location\"] == string], plot_info, \"\"\n", + " ) # plot one canvas per string\n", + "else: # let's get one string in output\n", + " if plot_structures_widget.value == \"per channel\":\n", + " plotting.plot_per_ch(\n", + " df_to_plot[df_to_plot[\"location\"] == strings_widget.value], plot_info, \"\"\n", + " ) # plot one canvas per channel\n", + " elif plot_structures_widget.value == \"per string\":\n", + " plotting.plot_per_string(\n", + " df_to_plot[df_to_plot[\"location\"] == strings_widget.value], plot_info, \"\"\n", + " ) # plot one canvas per string" + ] + }, + { + "cell_type": "markdown", + "id": "17542fbd-a2fb-4474-829a-adb0ef99aae3", + "metadata": { + "tags": [] + }, + "source": [ + "# Plot means vs channels\n", + "Here you can monitor the **mean** ('x' green marker) and **median** (horizontal green line) behaves separately for different channels, grouped by string. The box shows the IQR (interquartile range), ie the distance between the upper and lower quartiles, q(0.75)-q(0.25). Vertical lines end up to the min and max value of a given parameter's distribution for each channel." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "017b16e9-da40-4a0b-9503-ce4c9e65070c", + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Do you want to display horizontal lines for limits in the plots?\")\n", + "display(container_limits)\n", + "print(\"Set y-axis range; use min=0=max if you don't want to use any fixed range:\")\n", + "display(widgets.VBox([min_input, max_input]))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51ae3c7f-19d2-4760-96c6-fafdfe6e6316", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "param = {\n", + " \"Cuspemax\": \"cuspemax_var\",\n", + " \"Baseline\": \"baseline_var\",\n", + " \"BlMean\": \"blmean_var\",\n", + " \"CuspemaxCtcCal\": \"cuspemax_ctc_cal_var\",\n", + "}\n", + "\n", + "grouped_df = new_df_param_var.groupby([\"location\", \"position\", \"name\"])[\n", + " param[param_widget.value]\n", + "]\n", + "\n", + "my_df = pd.DataFrame()\n", + "my_df[\"mean\"] = grouped_df.mean()\n", + "my_df[\"std\"] = grouped_df.std()\n", + "my_df[\"std_2\"] = 2 * grouped_df.std()\n", + "my_df[\"std_3\"] = 3 * grouped_df.std()\n", + "my_df[\"minimum\"] = grouped_df.min()\n", + "my_df[\"maximum\"] = grouped_df.max()\n", + "\n", + "# Create boxes for mean ± std and plot mean as a horizontal line\n", + "box_width = 0.5 # Width of the boxes\n", + "box_positions = np.arange(len(my_df))\n", + "\n", + "# Create the figure and axis\n", + "fig, ax = plt.subplots(figsize=(16, 6))\n", + "\n", + "l = 0.15\n", + "\n", + "current_string = 0\n", + "current_index = -1\n", + "name_list = []\n", + "my_df.reset_index()\n", + "\n", + "for index, row in my_df.reset_index().iterrows():\n", + " if current_string != row[\"location\"]:\n", + " current_index += 1\n", + " ax.vlines(current_index, -100, 100, color=\"black\", linewidth=2, zorder=10)\n", + " current_string = row[\"location\"]\n", + " name_list.append(f\"string {row.location}\")\n", + "\n", + " current_index += 1\n", + "\n", + " rect3 = Rectangle(\n", + " (current_index - box_width / 2, row[\"mean\"] - row[\"std_3\"]),\n", + " box_width,\n", + " 2 * row[\"std_3\"],\n", + " fill=True,\n", + " alpha=0.15,\n", + " color=\"gray\",\n", + " linewidth=0,\n", + " zorder=3,\n", + " )\n", + "\n", + " rect2 = Rectangle(\n", + " (current_index - box_width / 2, row[\"mean\"] - row[\"std_2\"]),\n", + " box_width,\n", + " 2 * row[\"std_2\"],\n", + " fill=True,\n", + " alpha=0.5,\n", + " color=\"gray\",\n", + " linewidth=0,\n", + " zorder=3,\n", + " )\n", + "\n", + " rect = Rectangle(\n", + " (current_index - box_width / 2, row[\"mean\"] - row[\"std\"]),\n", + " box_width,\n", + " 2 * row[\"std\"],\n", + " fill=True,\n", + " alpha=0.9,\n", + " color=\"gray\",\n", + " linewidth=0,\n", + " zorder=2,\n", + " )\n", + "\n", + " ax.add_patch(rect3)\n", + " ax.add_patch(rect2)\n", + " ax.add_patch(rect)\n", + " ax.plot(\n", + " [current_index - box_width / 2, current_index + box_width / 2],\n", + " [row[\"mean\"], row[\"mean\"]],\n", + " color=\"tab:red\",\n", + " zorder=10,\n", + " )\n", + " ax.grid()\n", + "\n", + " # Plot horizontal black lines at min and max values\n", + " ax.hlines(\n", + " row[\"minimum\"],\n", + " current_index - l,\n", + " current_index + l,\n", + " color=\"k\",\n", + " zorder=2,\n", + " linewidth=1,\n", + " )\n", + " ax.hlines(\n", + " row[\"maximum\"],\n", + " current_index - l,\n", + " current_index + l,\n", + " color=\"k\",\n", + " zorder=2,\n", + " linewidth=1,\n", + " )\n", + "\n", + " # Plot vertical lines min and max values\n", + " ax.vlines(\n", + " current_index,\n", + " row[\"std\"] + row[\"mean\"],\n", + " row[\"maximum\"],\n", + " color=\"k\",\n", + " linewidth=1,\n", + " )\n", + " ax.vlines(\n", + " current_index,\n", + " row[\"minimum\"],\n", + " -row[\"std\"] + row[\"mean\"],\n", + " color=\"k\",\n", + " linewidth=1,\n", + " )\n", + "\n", + " name_list.append(row[\"name\"])\n", + "\n", + "\n", + "# Plot lines for mean value thresholds\n", + "ax.hlines(5, 0, len(name_list) - 1, color=\"tab:green\", zorder=3, linewidth=1)\n", + "ax.hlines(-5, 0, len(name_list) - 1, color=\"tab:green\", zorder=3, linewidth=1)\n", + "\n", + "# Plot lines for std value thresholds\n", + "ax.hlines(\n", + " 10, 0, len(name_list) - 1, color=\"tab:orange\", zorder=3, linewidth=1, linestyle=\"--\"\n", + ")\n", + "ax.hlines(\n", + " -10,\n", + " 0,\n", + " len(name_list) - 1,\n", + " color=\"tab:orange\",\n", + " zorder=3,\n", + " linewidth=1,\n", + " linestyle=\"--\",\n", + ")\n", + "\n", + "# Set labels and title\n", + "ax.set_xticks(np.arange(len(name_list)))\n", + "ax.set_xticklabels(name_list, rotation=90)\n", + "\n", + "# Show plot\n", + "x_min = min_input.value\n", + "x_max = max_input.value\n", + "if x_min == 0 and x_max == 0:\n", + " x_min = -50\n", + " x_max = 50\n", + "div = 12\n", + "ax.set_ylim([x_min, x_max])\n", + "ax.set_yticks(np.arange(x_min, x_max, div))\n", + "ax.set_ylabel(f\"{param_widget.value} % variation\")\n", + "ax.set_title(f\"{period}-{run}\")\n", + "plt.tight_layout()\n", + "plt.show()" + ] + } + ], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebook/L200-plotting-widgets.ipynb b/notebook/L200-plotting-widgets.ipynb deleted file mode 100644 index 425c7e1..0000000 --- a/notebook/L200-plotting-widgets.ipynb +++ /dev/null @@ -1,291 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "0", - "metadata": {}, - "source": [ - "### Introduction\n", - "\n", - "This notebook helps to have a first look at the saved output. \n", - "\n", - "It works after having installed the repo 'legend-data-monitor'. In particular, after the cloning, enter into the folder and install the package by typing\n", - "\n", - "```console\n", - "foo@bar:~$ pip install .\n", - "```" - ] - }, - { - "cell_type": "markdown", - "id": "1", - "metadata": {}, - "source": [ - "# Select event type and parameter" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "# ------------------------------------------------------------------------------------------ which data do you want to read? CHANGE ME!\n", - "run = \"r001\" # r000, r001, ...\n", - "subsystem = \"geds\" # KEEP 'geds' for the moment\n", - "folder = \"prod-ref\"\n", - "period = \"p04\"\n", - "version = \"\" # leave an empty string if you're looking at p03 data\n", - "\n", - "if version == \"\":\n", - " data_file = f\"/data1/users/calgaro/{folder}/generated/plt/phy/{period}/{run}/l200-{period}-{run}-phy-{subsystem}\"\n", - "else:\n", - " data_file = f\"/data1/users/calgaro/{folder}/{version}/generated/plt/phy/{period}/{run}/l200-{period}-{run}-phy-{subsystem}\"\n", - "\n", - "\n", - "# ------------------------------------------------------------------------------------------ ...from here, you don't need to change anything in the code\n", - "import sys\n", - "import shelve\n", - "import matplotlib\n", - "import ipywidgets as widgets\n", - "from IPython.display import display\n", - "from matplotlib import pyplot as plt\n", - "from legend_data_monitor import plot_styles, plotting, utils\n", - "\n", - "%matplotlib widget\n", - "\n", - "# ------------------------------------------------------------------------------------------ load data\n", - "# Load the shelve object\n", - "shelf = shelve.open(data_file)\n", - "\n", - "# ------------------------------------------------------------------------------------------ evt type\n", - "# Get the list of available event types\n", - "event_types = list(shelf[\"monitoring\"].keys())\n", - "\n", - "# Create a dropdown widget for the event type\n", - "evt_type_widget = widgets.Dropdown(options=event_types, description=\"Event Type:\")\n", - "\n", - "\n", - "# ------------------------------------------------------------------------------------------ parameter\n", - "# Define a function to update the parameter dropdown based on the selected event type\n", - "def update_params(*args):\n", - " selected_evt_type = evt_type_widget.value\n", - " params = list(shelf[\"monitoring\"][selected_evt_type].keys())\n", - " param_widget.options = params\n", - "\n", - "\n", - "# Call the update_params function when the event type is changed\n", - "evt_type_widget.observe(update_params, \"value\")\n", - "\n", - "# Create a dropdown widget for the parameter\n", - "param_widget = widgets.Dropdown(description=\"Parameter:\")\n", - "\n", - "# ------------------------------------------------------------------------------------------ data format\n", - "data_format = [\"absolute values\", \"% values\"]\n", - "\n", - "# Create a dropdown widget\n", - "data_format_widget = widgets.Dropdown(options=data_format, description=\"data format:\")\n", - "\n", - "# ------------------------------------------------------------------------------------------ plot structure\n", - "plot_structures = [\"per string\", \"per channel\"]\n", - "\n", - "# Create a dropdown widget\n", - "plot_structures_widget = widgets.Dropdown(\n", - " options=plot_structures, description=\"Plot structure:\"\n", - ")\n", - "\n", - "# ------------------------------------------------------------------------------------------ plot style\n", - "plot_styles = [\"vs time\", \"histogram\"]\n", - "\n", - "# Create a dropdown widget\n", - "plot_styles_widget = widgets.Dropdown(options=plot_styles, description=\"Plot style:\")\n", - "\n", - "# ------------------------------------------------------------------------------------------ resampling\n", - "resampled = [\"no\", \"only\", \"also\"]\n", - "\n", - "# Create a dropdown widget\n", - "resampled_widget = widgets.Dropdown(options=resampled, description=\"Resampled:\")\n", - "\n", - "\n", - "# ------------------------------------------------------------------------------------------ get one or all strings\n", - "strings = [1, 2, 3, 4, 5, 7, 8, 9, 10, 11, \"all\"]\n", - "\n", - "# Create a dropdown widget\n", - "strings_widget = widgets.Dropdown(options=strings, description=\"String:\")\n", - "\n", - "# ------------------------------------------------------------------------------------------ display widgets\n", - "display(evt_type_widget)\n", - "display(\n", - " param_widget\n", - ") # it takes a while before displaying available parameters in the corresponding widget\n", - "\n", - "# ------------------------------------------------------------------------------------------ get params (based on event type)\n", - "evt_type = evt_type_widget.value\n", - "params = list(shelf[\"monitoring\"][evt_type].keys())\n", - "param_widget.options = params\n", - "\n", - "print(\"\\033[91mIf you change me, then RUN AGAIN the next cell!!!\\033[0m\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "# ------------------------------------------------------------------------------------------ get dataframe\n", - "def display_param_value(*args):\n", - " selected_evt_type = evt_type_widget.value\n", - " selected_param = param_widget.value\n", - " print(\n", - " f\"You are going to plot '{selected_param}' for '{selected_evt_type}' events...\"\n", - " )\n", - " # get dataframe\n", - " df_param = shelf[\"monitoring\"][selected_evt_type][selected_param][\"df_geds\"]\n", - " # get plot info\n", - " plot_info = shelf[\"monitoring\"][selected_evt_type][selected_param][\"plot_info\"]\n", - "\n", - " return df_param, plot_info\n", - "\n", - "\n", - "df_param, plot_info = display_param_value()\n", - "print(f\"...data have beeng loaded!\")" - ] - }, - { - "cell_type": "markdown", - "id": "4", - "metadata": {}, - "source": [ - "# Plot data (select style and string)\n", - "For the selected parameter, choose the plot style (you can play with different data formats, plot structures, ... among the available ones).\n", - "\n", - "### Notes\n", - "1. I recommend using just **\"absolute values\" when plotting 'bl_std'** to see how noisy is each detector.\n", - "2. When you select **plot_style='histogram', you'll always plot NOT resampled values** (ie values for each timestamp entry). Indeed, if you choose different resampled options while keeping plot_style='histogram', nothing will change in plots.\n", - "4. **resampled='no'** means you look at each timestamp entry\n", - "5. **resampled='only'** means you look at each timestamp entry mediated over 1H time window ('1H' might change - in case, you can see what value was used for the resampling by printing ```print(plot_info['time_window'])``` (T=minutes, H=hours, D=days)\n", - "6. **resampled='also'** means you look at each timestamp entry mediated over 1H time window AND at each timestamp entry TOGETHER -> suggestion: use 'also' just when you choose plot_structures='per channel'; if you have selected 'per string', then you're not going to understand anything" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "# ------------------------------------------------------------------------------------------ get plots\n", - "display(data_format_widget)\n", - "display(plot_structures_widget)\n", - "display(plot_styles_widget)\n", - "display(resampled_widget)\n", - "display(strings_widget)\n", - "print(\"\\033[91mIf you change me, then RUN AGAIN the next cell!!!\\033[0m\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# set plotting options\n", - "plot_info[\"plot_style\"] = plot_styles_widget.value\n", - "plot_info[\"resampled\"] = resampled_widget.value\n", - "plot_info[\"title\"] = \"\" # for plotting purposes\n", - "plot_info[\"subsystem\"] = \"\" # for plotting purposes\n", - "\n", - "if data_format_widget.value == \"absolute values\":\n", - " plot_info[\"parameter\"] = (\n", - " plot_info[\"parameter\"].split(\"_var\")[0]\n", - " if \"_var\" in plot_info[\"parameter\"]\n", - " else plot_info[\"parameter\"]\n", - " )\n", - " plot_info[\"limits\"] = utils.PLOT_INFO[plot_info[\"parameter\"]][\"limits\"][subsystem][\n", - " \"absolute\"\n", - " ]\n", - " plot_info[\"unit_label\"] = plot_info[\"unit\"]\n", - " if plot_info[\"parameter\"] not in df_param:\n", - " print(\"There is no\", plot_info[\"parameter\"])\n", - " sys.exit(\"Stopping notebook.\")\n", - "if data_format_widget.value == \"% values\":\n", - " plot_info[\"parameter\"] = (\n", - " plot_info[\"parameter\"]\n", - " if \"_var\" in plot_info[\"parameter\"]\n", - " else plot_info[\"parameter\"] + \"_var\"\n", - " )\n", - " plot_info[\"limits\"] = utils.PLOT_INFO[plot_info[\"parameter\"].split(\"_var\")[0]][\n", - " \"limits\"\n", - " ][subsystem][\"variation\"]\n", - " plot_info[\"unit_label\"] = \"%\"\n", - " if plot_info[\"parameter\"] not in df_param:\n", - " print(\"There is no\", plot_info[\"parameter\"])\n", - " sys.exit(\"Stopping notebook.\")\n", - "\n", - "print(f\"Making plots now...\")\n", - "if isinstance(strings_widget.value, str): # let's get all strings in output\n", - " for string in [1, 2, 3, 4, 5, 7, 8, 9, 10, 11]:\n", - " if plot_structures_widget.value == \"per channel\":\n", - " plotting.plot_per_ch(\n", - " df_param[df_param[\"location\"] == string], plot_info, \"\"\n", - " ) # plot one canvas per channel\n", - " elif plot_structures_widget.value == \"per string\":\n", - " plotting.plot_per_string(\n", - " df_param[df_param[\"location\"] == string], plot_info, \"\"\n", - " ) # plot one canvas per string\n", - "else: # let's get one string in output\n", - " if plot_structures_widget.value == \"per channel\":\n", - " plotting.plot_per_ch(\n", - " df_param[df_param[\"location\"] == strings_widget.value], plot_info, \"\"\n", - " ) # plot one canvas per channel\n", - " elif plot_structures_widget.value == \"per string\":\n", - " plotting.plot_per_string(\n", - " df_param[df_param[\"location\"] == strings_widget.value], plot_info, \"\"\n", - " ) # plot one canvas per string" - ] - }, - { - "cell_type": "markdown", - "id": "7", - "metadata": { - "tags": [] - }, - "source": [ - "# Plot means vs channels\n", - "Here you can monitor how the **mean value (evaluated over the first 10% of data)** behaves separately for different channels, grouped by string. These mean values are the ones **used to compute percentage variations**. The average value displayed in the legend on the right of the plot generated below shows the average of mean values for a given string." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# ------------------------------------------------------------------------------------------ get means plot\n", - "plot_info[\"plot_style\"] = \"vs ch\"\n", - "plot_info[\"unit_label\"] = plot_info[\"unit\"]\n", - "plot_info[\"parameter\"] = (\n", - " plot_info[\"parameter\"].split(\"_var\")[0]\n", - " if \"_var\" in plot_info[\"parameter\"]\n", - " else plot_info[\"parameter\"]\n", - ")\n", - "plot_info[\"unit_label\"] = plot_info[\"unit\"]\n", - "data = df_param.drop(columns=[param_widget.value])\n", - "data = data.rename(columns={param_widget.value + \"_mean\": param_widget.value})\n", - "plotting.plot_array(data, plot_info, \"\")" - ] - } - ], - "metadata": {}, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/pyproject.toml b/pyproject.toml index fa06b85..e5f9094 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [build-system] requires = [ - "setuptools>=42.0.0", + "setuptools>=43.0.0", "setuptools_scm[toml]>=3.4" ] @@ -13,7 +13,7 @@ write_to = "src/legend_data_monitor/_version.py" minversion = "6.0" addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"] xfail_strict = true -filterwarnings = "error" +filterwarnings = ["error", "ignore::DeprecationWarning"] log_cli_level = "info" testpaths = "tests" diff --git a/setup.cfg b/setup.cfg index fa8de60..eb145b5 100644 --- a/setup.cfg +++ b/setup.cfg @@ -67,4 +67,4 @@ legend_data_monitor = settings/*.json extend-ignore = E203, E501, D10 [codespell] -ignore-words-list = crate, nd, unparseable, compiletime, puls, livetime +ignore-words-list = crate, nd, unparseable, compiletime, puls, livetime, whis diff --git a/src/legend_data_monitor/__init__.py b/src/legend_data_monitor/__init__.py index e2a82b6..b71e542 100644 --- a/src/legend_data_monitor/__init__.py +++ b/src/legend_data_monitor/__init__.py @@ -1,7 +1,14 @@ from legend_data_monitor._version import version as __version__ from legend_data_monitor.analysis_data import AnalysisData from legend_data_monitor.core import control_plots -from legend_data_monitor.cuts import apply_cut +from legend_data_monitor.slow_control import SlowControl from legend_data_monitor.subsystem import Subsystem -__all__ = ["__version__", "control_plots", "Subsystem", "AnalysisData", "apply_cut"] +__all__ = [ + "__version__", + "control_plots", + "Subsystem", + "AnalysisData", + "SlowControl", + "apply_cut", +] diff --git a/src/legend_data_monitor/analysis_data.py b/src/legend_data_monitor/analysis_data.py index 188d8b6..52e82ce 100644 --- a/src/legend_data_monitor/analysis_data.py +++ b/src/legend_data_monitor/analysis_data.py @@ -1,13 +1,14 @@ import os import shelve +import sys import numpy as np import pandas as pd -from pandas import DataFrame, concat +from legendmeta import LegendMetadata # needed to know which parameters are not in DataLoader # but need to be calculated, such as event rate -from . import cuts, utils +from . import utils # ------------------------------------------------------------------------- @@ -29,11 +30,11 @@ class AnalysisData: - 'time_window' [str]: [optional] time window in which to calculate event rate, in case that's the parameter of interest. Format: time_window='NA', where N is integer, and A is M for months, D for days, T for minutes, and S for seconds. Default: None + aux_info= + str that has info regarding pulser operations (as difference or ratio wrt geds (spms?) data). Available options are: + - "pulser01anaRatio" + - "pulser01anaDiff" Or input kwargs directly parameters=, event_type=, cuts=, variation=, time_window= - - To apply a single cut, use data_after_cut = ldm.apply_cut() - To apply all cuts, use data_after_all_cuts = .apply_all_cuts() - where is the AnalysisData object you created. """ def __init__(self, sub_data: pd.DataFrame, **kwargs): @@ -43,6 +44,7 @@ def __init__(self, sub_data: pd.DataFrame, **kwargs): analysis_info = ( kwargs["selection"].copy() if "selection" in kwargs else kwargs.copy() ) + aux_info = kwargs["aux_info"] if "aux_info" in kwargs else None # ------------------------------------------------------------------------- # validity checks @@ -51,8 +53,6 @@ def __init__(self, sub_data: pd.DataFrame, **kwargs): # defaults if "time_window" not in analysis_info: analysis_info["time_window"] = None - if "variation" not in analysis_info: - analysis_info["variation"] = False if "cuts" not in analysis_info: analysis_info["cuts"] = [] if "plt_path" not in analysis_info: @@ -63,15 +63,33 @@ def __init__(self, sub_data: pd.DataFrame, **kwargs): if isinstance(analysis_info[input], str): analysis_info[input] = [analysis_info[input]] - if analysis_info["event_type"] != "all" and "flag_pulser" not in sub_data: - utils.logger.error( - f"\033[91mYour subsystem data does not have a pulser flag! We need it to subselect event type {analysis_info['event_type']}\033[0m" - ) + event_type_flags = { + "pulser": ("flag_pulser", "pulser"), + "FCbsln": ("flag_fc_bsln", "FCbsln"), + "muon": ("flag_muon", "muon"), + } + + event_type = analysis_info["event_type"] + + # check if the selected event type is within the available ones + if ( + event_type not in ["all", "phy"] + and event_type not in event_type_flags.keys() + ): utils.logger.error( - "\033[91mRun the function .flag_pulser_events() first, where is your Subsystem object, \033[0m" - + "\033[91mand is a Subsystem object of type 'pulser', which already has it data loaded with .get_data(); then create AnalysisData object.\033[0m" + f"\033[91mThe event type '{event_type}' does not exist and cannot be flagged! Try again with one among {list(event_type_flags.keys())}.\033[0m" ) - return + sys.exit() + + if event_type not in ["all", "phy"] and event_type in event_type_flags: + flag, subsystem_name = event_type_flags[event_type] + if flag not in sub_data: + utils.logger.error( + f"\033[91mYour subsystem data does not have a {subsystem_name} flag! We need it to subselect event type {event_type}\033[0m" + + f"\033[91mRun the function .flag_{subsystem_name}_events(<{subsystem_name}>) first, where is your Subsystem object, \033[0m" + + f"\033[91mand <{subsystem_name}> is a Subsystem object of type '{subsystem_name}', which already has its data loaded with <{subsystem_name}>.get_data(); then create an AnalysisData object.\033[0m" + ) + sys.exit() # cannot do event rate and another parameter at the same time # since event rate is calculated in windows @@ -88,7 +106,7 @@ def __init__(self, sub_data: pd.DataFrame, **kwargs): # time window must be provided for event rate if ( - analysis_info["parameters"][0] == "event_rate" + "event_rate" in analysis_info["parameters"] and not analysis_info["time_window"] ): utils.logger.error( @@ -100,36 +118,28 @@ def __init__(self, sub_data: pd.DataFrame, **kwargs): self.parameters = analysis_info["parameters"] self.evt_type = analysis_info["event_type"] self.time_window = analysis_info["time_window"] - self.variation = analysis_info["variation"] self.cuts = analysis_info["cuts"] self.saving = analysis_info["saving"] self.plt_path = analysis_info["plt_path"] + # evaluate the variation in any case, so we can save it (later useful for dashboard; + # when plotting, no variation will be included as specified in the config file) + self.variation = True + self.aux_info = aux_info # ------------------------------------------------------------------------- # subselect data # ------------------------------------------------------------------------- # always get basic parameters - params_to_get = [ - "timestamp", - "datetime", - "channel", - "name", - "location", - "position", - "cc4_id", - "cc4_channel", - "daq_crate", - "daq_card", - "HV_card", - "HV_channel", - "det_type", - "status", - ] - # pulser flag is present only if subsystem.flag_pulser_events() was called - # needed to subselect phy/pulser events - if "flag_pulser" in sub_data: - params_to_get.append("flag_pulser") + params_to_get = ["datetime"] + utils.COLUMNS_TO_LOAD + ["status"] + + for col in sub_data.columns: + # pulser flag is present only if subsystem.flag_pulser_events() was called -> needed to subselect phy/pulser events + if "flag_pulser" in col or "flag_fc_bsln" in col or "flag_muon" in col: + params_to_get.append(col) + # QC flag is present only if inserted as a cut in the config file -> this part is needed to apply + if "is_" in col: + params_to_get.append(col) # if special parameter, get columns needed to calculate it for param in self.parameters: @@ -149,10 +159,13 @@ def __init__(self, sub_data: pd.DataFrame, **kwargs): # the parameter does not exist else: utils.logger.error( - "\033[91m'%s' either does not exist in 'par-settings.json' or you misspelled the parameter's name. TRY AGAIN.\033[0m", + "\033[91m'%s' either does not exist in 'par-settings.json' or you misspelled the parameter's name. " + + "Another possibility is that the parameter does not exists in .lh5 processed files, so if the problem " + + "persists check if in the production environment you are looking at the parameter is included. " + + "Check also that you are not trying to plot a flag (ie a quality cut), which is not a parameter by definition.\033[0m", param, ) - exit() + sys.exit() # avoid repetition params_to_get = list(np.unique(params_to_get)) @@ -165,15 +178,17 @@ def __init__(self, sub_data: pd.DataFrame, **kwargs): "\033[91mOne/more entry/entries among %s is/are not present in the dataframe. TRY AGAIN.\033[0m", params_to_get, ) - exit() + sys.exit() # ------------------------------------------------------------------------- - - # selec phy/puls/all events + # select phy/puls/all/Klines events bad = self.select_events() if bad: return + # apply cuts, if any + self.apply_all_cuts() + # calculate if special parameter self.special_parameter() @@ -183,8 +198,7 @@ def __init__(self, sub_data: pd.DataFrame, **kwargs): # calculate variation if needed - only works after channel mean self.calculate_variation() - # ------------------------------------------------------------------------- - + # little sorting, before closing the function self.data = self.data.sort_values(["channel", "datetime"]) def select_events(self): @@ -192,10 +206,16 @@ def select_events(self): if self.evt_type == "pulser": utils.logger.info("... keeping only pulser events") self.data = self.data[self.data["flag_pulser"]] + elif self.evt_type == "FCbsln": + utils.logger.info("... keeping only FC baseline events") + self.data = self.data[self.data["flag_fc_bsln"]] + elif self.evt_type == "muon": + utils.logger.info("... keeping only muon events") + self.data = self.data[self.data["flag_muon"]] elif self.evt_type == "phy": - utils.logger.info("... keeping only physical (non-pulser) events") - self.data = self.data[~self.data["flag_pulser"]] - elif self.evt_type == "K_lines": + utils.logger.info("... keeping only physical (non-pulser & non-FCbsln & non-muon) events") + self.data = self.data[(~self.data["flag_pulser"]) | (~self.data["flag_fc_bsln"]) | (~self.data["flag_muon"])] + elif self.evt_type == "K_events": utils.logger.info("... selecting K lines in physical (non-pulser) events") self.data = self.data[~self.data["flag_pulser"]] energy = utils.SPECIAL_PARAMETERS["K_events"][0] @@ -209,6 +229,34 @@ def select_events(self): utils.logger.error("\033[91m%s\033[0m", self.__doc__) return "bad" + def apply_cut(self, cut: str): + """ + Apply given boolean cut. + + Format: cut name as in lh5 files ("is_*") to apply given cut, or cut name preceded by "~" to apply a "not" cut. + """ + if cut not in list(self.data.columns): + utils.logger.warning( + "\033[93mThe cut '%s' is not available " + + "(you either misspelled the cut's name or it is not available for the data you are inspecting). " + + "We do not apply any cut and keep everything, not to stop the flow.\033[0m", + cut, + ) + else: + utils.logger.info("... applying cut: " + cut) + + cut_value = 1 + # check if the cut has "not" in it + if cut[0] == "~": + cut_value = 0 + cut = cut[1:] + + self.data = self.data[self.data[cut] == cut_value] + + def apply_all_cuts(self): + for cut in self.cuts: + self.apply_cut(cut) + def special_parameter(self): for param in self.parameters: if param == "wf_max_rel": @@ -230,9 +278,11 @@ def special_parameter(self): .reset_index() ) + # ToDo: check time_window for event rate is smaller than the time window, but bigger than the rate (otherwise plots make no sense) + # divide event count in each time window by sampling window in seconds to get Hz dt_seconds = get_seconds(self.time_window) - event_rate["event_rate"] = event_rate["event_rate"] / dt_seconds + event_rate["event_rate"] = event_rate["event_rate"] * 1.0 / dt_seconds # --- get rid of last value # as the data range does not equally divide by the time window, the count in the last "window" will be smaller @@ -253,20 +303,21 @@ def special_parameter(self): # - reindex to match event rate table index # - put the columns in with concat event_rate = event_rate.set_index("channel") + # need to copy, otherwise next line removes "channel" from original, and crashes next time over not finding channel + columns = utils.COLUMNS_TO_LOAD[:] + columns.remove("channel") self.data = pd.concat( [ event_rate, self.data.groupby("channel") .first() - .reindex(event_rate.index)[["name", "location", "position"]], + .reindex(event_rate.index)[columns], ], axis=1, ) # put the channel back as column self.data = self.data.reset_index() elif param == "FWHM": - self.data = self.data.reset_index() - # calculate FWHM for each channel (substitute 'param' column with it) channel_fwhm = ( self.data.groupby("channel")[utils.SPECIAL_PARAMETERS[param][0]] @@ -282,11 +333,66 @@ def special_parameter(self): # put channel back in self.data.reset_index() - elif param == "K_events": - self.data = self.data.reset_index() - self.data = self.data.rename( - columns={utils.SPECIAL_PARAMETERS[param][0]: "K_events"} + elif param == "exposure": + # ------ get pulser rate for this experiment + + # retrieve first timestamp + first_timestamp = self.data["datetime"].iloc[0] + + # ToDo: already loaded before in Subsystem => 1) load mass already then, 2) inherit channel map from Subsystem ? + # get channel map at this timestamp + lmeta = LegendMetadata() + full_channel_map = lmeta.hardware.configuration.channelmaps.on( + timestamp=first_timestamp + ) + + # get pulser rate + if "PULS01" in full_channel_map.keys(): + rate = 0.05 # full_channel_map["PULS01"]["rate_in_Hz"] # L200: p02, p03 + else: + rate = full_channel_map["AUX00"]["rate_in_Hz"]["puls"] # L60 + + # ------ count number of pulser events + + # - subselect only pulser events (flag_pulser True) + # - count number of rows i.e. events for each detector + # - select arbitrary column that is definitely not NaN in each row e.g. channel to represent the count + # - rename to "pulser_events" + # now we have a table with number of pulser events as column with DETECTOR NAME AS INDEX + df_livetime = ( + self.data[self.data["flag_pulser"]] + .groupby("name") + .count()["channel"] + .to_frame("pulser_events") + ) + + # ------ calculate livetime for each detector and add it to original dataframe + df_livetime["livetime_in_s"] = df_livetime["pulser_events"] / rate + + self.data = self.data.set_index("name") + self.data = pd.concat( + [self.data, df_livetime.reindex(self.data.index)], axis=1 ) + # drop the pulser events column we don't need it + self.data = self.data.drop("pulser_events", axis=1) + + # --- calculate exposure for each detector + # get diodes map + dets_map = lmeta.hardware.detectors.germanium.diodes + + # add a new column "mass" to self.data containing mass values evaluated from dets_map[channel_name]["production"]["mass_in_g"], where channel_name is the value in "name" column + for det_name in self.data.index.unique(): + mass_in_kg = dets_map[det_name]["production"]["mass_in_g"] / 1000 + # exposure in kg*yr + self.data.at[det_name, "exposure"] = ( + mass_in_kg + * df_livetime.at[det_name, "livetime_in_s"] + / (60 * 60 * 24 * 365.25) + ) + + self.data.reset_index() + elif param == "AoE_Custom": + self.data["AoE_Custom"] = self.data["A_max"] / self.data["cuspEmax"] def channel_mean(self): """ @@ -305,11 +411,14 @@ def channel_mean(self): # congratulations, it's a sipm! if self.is_spms(): channels = (self.data["channel"]).unique() + # !! need to update for multiple parameter case! channel_mean = pd.DataFrame( {"channel": channels, self.parameters[0]: [None] * len(channels)} ) channel_mean = channel_mean.set_index("channel") - # otherwise, it's either the pulser or geds + # !! need to update for multiple parameter case! + self.data = concat_channel_mean(self, channel_mean) + # otherwise, it's either an aux or geds else: if self.saving is None or self.saving == "overwrite": # get the dataframe for timestamps below 10% of data present in the selected time window @@ -318,9 +427,11 @@ def channel_mean(self): channel_mean = self_data_time_cut.groupby("channel").mean( numeric_only=True )[self.parameters] + # concatenate column with mean values + self.data = concat_channel_mean(self, channel_mean) - if self.saving == "append": - subsys = self.get_subsys() + elif self.saving == "append": + subsys = self.get_subsys() if self.aux_info is None else self.aux_info # the file does not exist, so we get the mean as usual if not os.path.exists(self.plt_path + "-" + subsys + ".dat"): self_data_time_cut = cut_dataframe(self.data) @@ -328,64 +439,38 @@ def channel_mean(self): channel_mean = self_data_time_cut.groupby("channel").mean( numeric_only=True )[self.parameters] + # concatenate column with mean values + self.data = concat_channel_mean(self, channel_mean) # the file exist: we have to combine previous data with new data, and re-compute the mean over the first 10% of data (that now, are more than before) else: # open already existing shelve file with shelve.open(self.plt_path + "-" + subsys, "r") as shelf: old_dict = dict(shelf) - # get old dataframe (we are interested only in the column with mean values) - old_df = old_dict["monitoring"][self.evt_type][self.parameters[0]][ - "df_" + subsys - ] - """ - # to use in the future for a more refined version of updated mean values... - - # if previously we chose to plot % variations, we do not have anymore the absolute values to use when computing this new mean; - # what we can do, is to get absolute values starting from the mean and the % values present in the old dataframe' - # Later, we need to put these absolute values in the corresponding parameter column - if self.variation: - old_df[self.parameters[0]] = (old_df[self.parameters[0]] / 100 + 1) * old_df[self.parameters[0] + "_mean"] - - merged_df = concat([old_df, self.data], ignore_index=True, axis=0) - # remove 'level_0' column (if present) - merged_df = utils.check_level0(merged_df) - merged_df = merged_df.reset_index() - - self_data_time_cut = cut_dataframe(merged_df) - - # ...still we have to re-compute the % variations of previous time windows because now the mean estimate is different!!! - """ - # a column of mean values - mean_df = old_df[self.parameters[0] + "_mean"] - # a column of channels - channels = old_df["channel"] - # two columns: one of channels, one of mean values - channel_mean = concat( - [channels, mean_df], ignore_index=True, axis=1 - ).rename(columns={0: "channel", 1: self.parameters[0]}) - # drop potential duplicate rows - channel_mean = channel_mean.drop_duplicates(subset=["channel"]) - # set 'channel' column as index - channel_mean = channel_mean.set_index("channel") - - # FWHM mean is meaningless -> drop (special parameter for SiPMs); no need to get previous mean values for these parameters - if "FWHM" in self.parameters: - channel_mean.drop("FWHM", axis=1) - if "K_events" in self.parameters: - channel_mean.drop("K_events", axis=1) - - # rename columns to be param_mean - channel_mean = channel_mean.rename( - columns={param: param + "_mean" for param in self.parameters} - ) - # add it as column for convenience - repeating redundant information, but convenient - self.data = self.data.set_index("channel") - self.data = pd.concat( - [self.data, channel_mean.reindex(self.data.index)], axis=1 - ) - # put channel back in - self.data = self.data.reset_index() + + if len(self.parameters) == 1: + param = self.parameters[0] + channel_mean = get_saved_df( + self, subsys, param, old_dict, self.evt_type + ) + # concatenate column with mean values + self.data = concat_channel_mean(self, channel_mean) + + if len(self.parameters) > 1: + for param in self.parameters: + parameter = ( + param.split("_var")[0] if "_var" in param else param + ) + channel_mean = get_saved_df( + self, subsys, parameter, old_dict, self.evt_type + ) + # we need to repeat this operation for each param, otherwise only the mean of the last one survives + self.data = concat_channel_mean(self, channel_mean) + + if self.data.empty: + utils.logger.error( + f"\033[91mFor '{self.evt_type}' there are no flagged data (empty dataframe) -> no entries in the output file! Stop here the study.\033[0m" + ) def calculate_variation(self): """ @@ -403,14 +488,11 @@ def calculate_variation(self): self.data[param] / self.data[param + "_mean"] - 1 ) * 100 # % - def apply_all_cuts(self) -> DataFrame: - data_after_cuts = self.data.copy() - for cut in self.cuts: - data_after_cuts = cuts.apply_cut(data_after_cuts, cut) - return data_after_cuts - def is_spms(self) -> bool: """Return True if 'location' (=fiber) and 'position' (=top, bottom) are strings.""" + if self.data.empty: + return False + if isinstance(self.data.iloc[0]["location"], str) and isinstance( self.data.iloc[0]["position"], str ): @@ -420,28 +502,59 @@ def is_spms(self) -> bool: def is_geds(self) -> bool: """Return True if 'location' (=string) and 'position' are NOT strings.""" - if not self.is_spms(): - return True - else: - False + return not self.is_spms() def is_pulser(self) -> bool: - """Return True if 'location' (=string) and 'position' are NOT strings.""" - if self.is_geds(): - if ( - self.data.iloc[0]["location"] == 0 - and self.data.iloc[0]["position"] == 0 - ): - return True - else: - return False - else: - return False + """Return True if the system is the pulser channel.""" + return ( + self.is_geds() + and self.data.iloc[0]["location"] == 0 + and self.data.iloc[0]["position"] == 0 + ) + + def is_pulser01ana(self) -> bool: + """Return True if the system is the pulser channel.""" + return ( + self.is_geds() + and self.data.iloc[0]["location"] == -1 + and self.data.iloc[0]["position"] == -1 + ) + + def is_fc_bsln(self) -> bool: + """Return True if the system is the FC baseline channel.""" + return ( + self.is_geds() + and self.data.iloc[0]["location"] == -2 + and self.data.iloc[0]["position"] == -2 + ) + + def is_muon(self) -> bool: + """Return True if the system is the muon channel.""" + return ( + self.is_geds() + and self.data.iloc[0]["location"] == -3 + and self.data.iloc[0]["position"] == -3 + ) + + def is_aux(self) -> bool: + """Return True if the system is an AUX channel.""" + return ( + self.is_pulser() + or self.is_pulser01ana() + or self.is_fc_bsln() + or self.is_muon() + ) def get_subsys(self) -> str: - """Return 'pulser', 'geds' or 'spms'.""" + """Return 'pulser', 'pulser01ana', 'FCbsln', 'muon', 'geds' or 'spms' depending on the subsystem type.""" if self.is_pulser(): return "pulser" + if self.is_pulser01ana(): + return "pulser01ana" + if self.is_fc_bsln(): + return "FCbsln" + if self.is_muon(): + return "muon" if self.is_spms(): return "spms" if self.is_geds(): @@ -470,12 +583,178 @@ def get_seconds(time_window: str): return int(time_window.rstrip(time_unit)) * str_to_seconds[time_unit] -def cut_dataframe(data: DataFrame) -> DataFrame: - """Get mean value of the parameters under study over the first 10% of data present in the selected time range.""" - min_datetime = data["datetime"].min() # first timestamp - max_datetime = data["datetime"].max() # last timestamp +def cut_dataframe(df: pd.DataFrame) -> pd.DataFrame: + """Get mean value of the parameters under study over the first 10% of data present in the selected time range of the input dataframe.""" + min_datetime = df["datetime"].min() # first timestamp + max_datetime = df["datetime"].max() # last timestamp duration = max_datetime - min_datetime ten_percent_duration = duration * 0.1 thr_datetime = min_datetime + ten_percent_duration # 10% timestamp # get only the rows for datetimes before the 10% of the specified time range - return data.loc[data["datetime"] < thr_datetime] + return df.loc[df["datetime"] < thr_datetime] + + +def get_saved_df( + self, subsys: str, param: str, old_dict: dict, evt_type: str +) -> pd.DataFrame: + """Get the already saved dataframe from the already saved output shelve file, for a given parameter ```param```. In particular, it evaluates again the mean over the new 10% of data in the new larger time window.""" + # get old dataframe (we are interested only in the column with mean values) + old_df = old_dict["monitoring"][evt_type][param]["df_" + subsys] + + # we need to re-calculate the mean value over the new bigger time window! + # we retrieve absolute values of already saved df, we use + old_absolute_values = old_df.copy().filter(items=["channel", "datetime", param]) + new_absolute_values = self.data.copy().filter(items=["channel", "datetime", param]) + + concatenated_df = pd.concat( + [old_absolute_values, new_absolute_values], ignore_index=True + ) + # get the dataframe for timestamps below 10% of data present in the selected time window + concatenated_df_time_cut = cut_dataframe(concatenated_df) + # remove 'datetime' column (it was necessary just to evaluate again the first 10% of data that are necessary to evaluate the mean on the new dataset) + concatenated_df_time_cut = concatenated_df_time_cut.drop(columns=["datetime"]) + + # create a column with the mean of the cut dataframe (cut in the time window of interest) + channel_mean = ( + concatenated_df_time_cut.groupby("channel")[param].mean().reset_index() + ) + + # drop potential duplicate rows + channel_mean = channel_mean.drop_duplicates(subset=["channel"]) + # set channel to index because that's how it comes out in previous cases from df.mean() + channel_mean = channel_mean.set_index("channel") + + return channel_mean + + +def get_aux_df( + df: pd.DataFrame, parameter: list, plot_settings: dict, aux_ch: str +) -> pd.DataFrame: + """Get dataframes containing auxiliary (PULS01ANA) data, storing absolute/diff&ratio/mean/% variations values.""" + if len(parameter) == 1: + param = parameter[0] + if ( + param in utils.PARAMETER_TIERS.keys() + and utils.PARAMETER_TIERS[param] == "hit" + ) or param in utils.SPECIAL_PARAMETERS.keys(): + return pd.DataFrame(), pd.DataFrame(), pd.DataFrame() + + # get abs/mean/% variation for data of aux channel --> objects to save + utils.logger.debug(f"Getting {aux_ch} data for {param}") + aux_data = df.copy() + aux_data[param] = aux_data[f"{param}_{aux_ch}"] + aux_data = aux_data.drop( + columns=[ + f"{param}_{aux_ch}Ratio", + f"{param}_{aux_ch}", + f"{param}_{aux_ch}Diff", + ] + ) + # right now, we have the same values repeated for each ged channel + # -> keep one and substytute with AUX channel ID + # (only for this aux df, the others still maintain a relation with geds values) + # keep one channel only + first_ch = aux_data.iloc[0]["channel"] + aux_data = aux_data[aux_data["channel"] == first_ch] + first_timestamp = utils.unix_timestamp_to_string( + aux_data["datetime"].dt.to_pydatetime()[0].timestamp() + ) + if aux_ch == "pulser01ana": + chmap = LegendMetadata().hardware.configuration.channelmaps.on( + timestamp=first_timestamp + ) + # PULS01ANA channel + if "PULS01ANA" in chmap.keys(): + aux_data = get_aux_info(aux_data, chmap, "PULS01ANA") + # PULS (=AUX00) channel (for periods below p03) + else: + aux_data = get_aux_info(aux_data, chmap, "PULS01") + + # get channel mean and blabla + aux_analysis = AnalysisData(aux_data, selection=plot_settings) + utils.logger.debug("... aux dataframe \n%s", aux_analysis.data) + + # get abs/mean/% variation for ratio values with aux channel data --> objects to save + utils.logger.debug(f"Getting ratio wrt {aux_ch} data for {param}") + aux_ratio_data = df.copy() + aux_ratio_data[param] = aux_ratio_data[f"{param}_{aux_ch}Ratio"] + aux_ratio_data = aux_ratio_data.drop( + columns=[ + f"{param}_{aux_ch}Ratio", + f"{param}_{aux_ch}", + f"{param}_{aux_ch}Diff", + ] + ) + + aux_ratio_analysis = AnalysisData( + aux_ratio_data, selection=plot_settings, aux_info="pulser01anaRatio" + ) + utils.logger.debug("... aux ratio dataframe \n%s", aux_ratio_analysis.data) + + # get abs/mean/% variation for difference values with aux channel data --> objects to save + utils.logger.debug(f"Getting difference wrt {aux_ch} data for {param}") + aux_diff_data = df.copy() + aux_diff_data[param] = aux_diff_data[f"{param}_{aux_ch}Diff"] + aux_diff_data = aux_diff_data.drop( + columns=[ + f"{param}_{aux_ch}Ratio", + f"{param}_{aux_ch}", + f"{param}_{aux_ch}Diff", + ] + ) + aux_diff_analysis = AnalysisData( + aux_diff_data, selection=plot_settings, aux_info="pulser01anaDiff" + ) + utils.logger.debug("... aux difference dataframe \n%s", aux_diff_analysis.data) + + if len(parameter) > 1: + utils.logger.warning( + "\033[93mThe aux subtraction/difference is not implemented for multi parameters! We skip it and plot the normal quantities, not corrected for the aux channel.\033[0m" + ) + if "AUX_ratio" in plot_settings.keys(): + del plot_settings["AUX_ratio"] + if "AUX_diff" in plot_settings.keys(): + del plot_settings["AUX_diff"] + return None, None, None + + return aux_analysis, aux_ratio_analysis, aux_diff_analysis + + +def get_aux_info(df: pd.DataFrame, chmap: dict, aux_ch: str) -> pd.DataFrame: + """Return a DataFrame with correct pulser AUX info.""" + df["channel"] = LegendMetadata().channelmap().PULS01ANA.daq.rawid + df["HV_card"] = None + df["HV_channel"] = None + df["cc4_channel"] = None + df["cc4_id"] = None + df["daq_card"] = LegendMetadata().channelmap().PULS01ANA.daq.card.id + df["daq_crate"] = LegendMetadata().channelmap().PULS01ANA.daq.crate + df["det_type"] = None + df["location"] = ( + utils.SPECIAL_SYSTEMS["pulser01ana"] + if aux_ch == "PULS01ANA" + else utils.SPECIAL_SYSTEMS["pulser"] + ) + df["position"] = df["location"] + df["name"] = aux_ch + + return df + + +def concat_channel_mean(self, channel_mean) -> pd.DataFrame: + """Add a new column containing the mean values of the inspected parameter.""" + # some means are meaningless -> drop the corresponding column + if "FWHM" in self.parameters: + channel_mean.drop("FWHM", axis=1) + if "exposure" in self.parameters: + channel_mean.drop("exposure", axis=1) + + # rename columns to be param_mean + channel_mean = channel_mean.rename( + columns={param: param + "_mean" for param in self.parameters} + ) + # add it as column for convenience - repeating redundant information, but convenient + self.data = self.data.set_index("channel") + self.data = pd.concat([self.data, channel_mean.reindex(self.data.index)], axis=1) + + return self.data.reset_index() diff --git a/src/legend_data_monitor/config/p03_L200_phy_r000.json b/src/legend_data_monitor/config/p03_L200_phy_r000.json new file mode 100644 index 0000000..ca65054 --- /dev/null +++ b/src/legend_data_monitor/config/p03_L200_phy_r000.json @@ -0,0 +1,99 @@ +{ + "output": "/data1/users/calgaro/auto_prova", + "dataset": { + "experiment": "L200", + "period": "p03", + "version": "", + "path": "/data2/public/prodenv/prod-blind/tmp/auto", + "type": "phy", + "runs": 0 + }, + "saving": "append", + "subsystems": { + "geds": { + "FWHM in pulser events": { + "parameters": "FWHM", + "event_type": "pulser", + "plot_structure": "array", + "plot_style": "vs ch" + }, + "Baselines (dsp/baseline) in pulser events": { + "parameters": "baseline", + "event_type": "pulser", + "plot_structure": "per channel", + "resampled": "also", + "plot_style": "vs time", + "variation": true, + "time_window": "1H", + "status": true + }, + "Uncalibrated gain (dsp/cuspEmax) in pulser events": { + "parameters": "cuspEmax", + "event_type": "pulser", + "plot_structure": "per channel", + "resampled": "also", + "plot_style": "vs time", + "variation": true, + "time_window": "1H", + "status": true + }, + "Calibrated gain (hit/cuspEmax_ctc_cal) in pulser events": { + "parameters": "cuspEmax_ctc_cal", + "event_type": "pulser", + "plot_structure": "per channel", + "resampled": "also", + "plot_style": "vs time", + "variation": true, + "time_window": "1H", + "status": true + }, + "Uncalibrated gain (dsp/trapEmax) in pulser events": { + "parameters": "trapEmax", + "event_type": "pulser", + "plot_structure": "per channel", + "resampled": "also", + "plot_style": "vs time", + "variation": true, + "time_window": "1H", + "status": true + }, + "Calibrated gain (hit/trapEmax_ctc_cal) in pulser events": { + "parameters": "trapEmax_ctc_cal", + "event_type": "pulser", + "plot_structure": "per channel", + "resampled": "also", + "plot_style": "vs time", + "variation": true, + "time_window": "1H", + "status": true + }, + "Noise (dsp/bl_std) in pulser events": { + "parameters": "bl_std", + "event_type": "pulser", + "plot_structure": "per channel", + "resampled": "only", + "plot_style": "vs time", + "variation": true, + "time_window": "1H" + }, + "A/E corrected (hit/AoE_Corrected) in pulser events": { + "parameters": "AoE_Corrected", + "event_type": "pulser", + "plot_structure": "per channel", + "resampled": "only", + "plot_style": "vs time", + "variation": true, + "time_window": "1H" + }, + "A/E classifier (hit/AoE_Classifier) in pulser events": { + "parameters": "AoE_Classifier", + "event_type": "pulser", + "plot_structure": "per channel", + "resampled": "only", + "plot_style": "vs time", + "variation": true, + "time_window": "1H" + } + } + } +} diff --git a/src/legend_data_monitor/config/p03_r000_L200_hdf_example.json b/src/legend_data_monitor/config/p03_r000_L200_hdf_example.json new file mode 100644 index 0000000..408fc3d --- /dev/null +++ b/src/legend_data_monitor/config/p03_r000_L200_hdf_example.json @@ -0,0 +1,72 @@ +{ + "output": "/data1/users/calgaro/auto_prova", + "dataset": { + "experiment": "L200", + "period": "p03", + "version": "", + "path": "/data2/public/prodenv/prod-blind/tmp/auto", + "type": "phy", + "runs": 0 + }, + "saving": "overwrite", + "subsystems": { + "geds": { + "Event rate in pulser events": { + "parameters": "event_rate", + "event_type": "pulser", + "plot_structure": "per string", + "resampled": "only", + "plot_style": "vs time", + "time_window": "20S" + }, + "Baselines (dsp/baseline) in pulser events": { + "parameters": "baseline", + "event_type": "pulser", + "plot_structure": "per string", + "resampled": "only", + "plot_style": "vs time", + "AUX_ratio": true, + "variation": true, + "time_window": "10T" + }, + "Uncalibrated gain (dsp/cuspEmax) in pulser events": { + "parameters": "cuspEmax", + "event_type": "pulser", + "plot_structure": "per string", + "resampled": "only", + "plot_style": "vs time", + "AUX_ratio": true, + "variation": true, + "time_window": "10T" + }, + "Calibrated gain (hit/cuspEmax_ctc_cal) in FCbsln events": { + "parameters": "cuspEmax_ctc_cal", + "event_type": "FCbsln", + "plot_structure": "per string", + "resampled": "only", + "plot_style": "vs time", + "variation": true, + "time_window": "10T" + }, + "Noise (dsp/bl_std) in pulser events": { + "parameters": "bl_std", + "event_type": "pulser", + "plot_structure": "per string", + "resampled": "only", + "plot_style": "vs time", + "AUX_ratio": true, + "variation": true, + "time_window": "10T" + }, + "A/E (from dsp) in pulser events": { + "parameters": "AoE_Custom", + "event_type": "pulser", + "plot_structure": "per string", + "resampled": "only", + "plot_style": "vs time", + "variation": true, + "time_window": "10T" + } + } + } +} diff --git a/src/legend_data_monitor/config/slow_control_example.json b/src/legend_data_monitor/config/slow_control_example.json new file mode 100644 index 0000000..12e656e --- /dev/null +++ b/src/legend_data_monitor/config/slow_control_example.json @@ -0,0 +1,24 @@ +{ + "output": "/data1/users/calgaro/auto_prova", + "dataset": { + "experiment": "L200", + "period": "p03", + "version": "", + "path": "/data2/public/prodenv/prod-blind/tmp/auto", + "type": "phy", + "runs": 0 + }, + "saving": "overwrite", + "slow_control": { + "parameters": [ + "DaqLeft-Temp1", + "DaqLeft-Temp2", + "DaqRight-Temp1", + "DaqRight-Temp2", + "RREiT", + "RRNTe", + "RRSTe", + "ZUL_T_RR" + ] + } +} diff --git a/src/legend_data_monitor/core.py b/src/legend_data_monitor/core.py index 267fd66..220896f 100644 --- a/src/legend_data_monitor/core.py +++ b/src/legend_data_monitor/core.py @@ -1,19 +1,37 @@ import json +import os import re +import subprocess +import sys -from . import plotting, subsystem, utils +from . import plotting, slow_control, subsystem, utils -def control_plots(user_config_path: str): - """Set the configuration file and the output paths when a user config file is provided. The function to generate plots is then automatically called.""" +def retrieve_scdb(user_config_path: str, port: int, pswd: str): + """Set the configuration file and the output paths when a user config file is provided. The function to retrieve Slow Control data from database is then automatically called.""" + # ------------------------------------------------------------------------- + # SSH tunnel to the Slow Control database + # ------------------------------------------------------------------------- + # for the settings, see instructions on Confluence + try: + subprocess.run("ssh -T -N -f ugnet-proxy", shell=True, check=True) + utils.logger.debug( + "SSH tunnel to Slow Control database established successfully." + ) + except subprocess.CalledProcessError as e: + utils.logger.error( + f"\033[91mError running SSH tunnel to Slow Control database command: {e}\033[0m" + ) + sys.exit() + # ------------------------------------------------------------------------- # Read user settings # ------------------------------------------------------------------------- with open(user_config_path) as f: config = json.load(f) - # check validity of plot settings - valid = utils.check_plot_settings(config) + # check validity of scdb settings + valid = utils.check_scdb_settings(config) if not valid: return @@ -22,55 +40,80 @@ def control_plots(user_config_path: str): # ------------------------------------------------------------------------- # Format: l200-p02-{run}-{data_type}; One pdf/log/shelve file for each subsystem + out_path = utils.get_output_path(config) + "-slow_control.hdf" - try: - data_types = ( - [config["dataset"]["type"]] - if isinstance(config["dataset"]["type"], str) - else config["dataset"]["type"] + # ------------------------------------------------------------------------- + # Load and save data + # ------------------------------------------------------------------------- + for idx, param in enumerate(config["slow_control"]["parameters"]): + utils.logger.info( + "\33[34m~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\33[0m" + ) + utils.logger.info(f"\33[34m~~~ R E T R I E V I N G : {param}\33[0m") + utils.logger.info( + "\33[34m~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\33[0m" ) - plt_basename = "{}-{}-".format( - config["dataset"]["experiment"].lower(), - config["dataset"]["period"], + # build a SlowControl object + # - select parameter of interest from a list of available parameters + # - apply time interval cuts + # - get values from SC database (available from LNGS only) + # - get limits/units/... from SC databasee (available from LNGS only) + sc_analysis = slow_control.SlowControl( + param, port, pswd, dataset=config["dataset"] ) - except (KeyError, TypeError): - # means something about dataset is wrong -> print Subsystem.get_data doc - utils.logger.error( - "\033[91mSomething is missing or wrong in your 'dataset' field of the config. You can see the format here under 'dataset=':\033[0m" + + # check if the dataframe is empty or not (no data) + if utils.check_empty_df(sc_analysis): + utils.logger.warning( + "\033[93m'%s' is not inspected, we continue with the next parameter (if present).\033[0m", + param, + ) + continue + + # remove the slow control hdf file if + # 1) it already exists + # 2) we specified "overwrite" as saving option + # 3) it is the first parameter we want to save (idx==0) + if os.path.exists(out_path) and config["saving"] == "overwrite" and idx == 0: + os.remove(out_path) + + # save data to hdf file + sc_analysis.data.copy().to_hdf( + out_path, + key=param.replace("-", "_"), + mode="a", ) - utils.logger.info("\033[91m%s\033[0m", subsystem.Subsystem.get_data.__doc__) - return - user_time_range = utils.get_query_timerange(dataset=config["dataset"]) - # will be returned as None if something is wrong, and print an error message - if not user_time_range: - return - # create output folders for plots - period_dir = utils.make_output_paths(config, user_time_range) - # get correct time info for subfolder's name - name_time = ( - utils.get_run_name(config, user_time_range) - if "timestamp" in user_time_range.keys() - else utils.get_time_name(user_time_range) - ) - output_paths = period_dir + name_time + "/" - utils.make_dir(output_paths) - if not output_paths: +def control_plots(user_config_path: str, n_files=None): + """Set the configuration file and the output paths when a user config file is provided. The function to generate plots is then automatically called.""" + # ------------------------------------------------------------------------- + # Read user settings + # ------------------------------------------------------------------------- + with open(user_config_path) as f: + config = json.load(f) + + # check validity of plot settings + valid = utils.check_plot_settings(config) + if not valid: return - # we don't care here about the time keyword timestamp/run -> just get the value - plt_basename += name_time - plt_path = output_paths + plt_basename - plt_path += "-{}".format("_".join(data_types)) + # ------------------------------------------------------------------------- + # Define PDF file basename + # ------------------------------------------------------------------------- - # plot - generate_plots(config, plt_path) + # Format: l200-p02-{run}-{data_type}; One pdf/log/shelve file for each subsystem + plt_path = utils.get_output_path(config) + + # ------------------------------------------------------------------------- + # Plot + # ------------------------------------------------------------------------- + generate_plots(config, plt_path, n_files) def auto_control_plots( - plot_config: str, file_keys: str, prod_path: str, prod_config: str + plot_config: str, file_keys: str, prod_path: str, prod_config: str, n_files=None ): """Set the configuration file and the output paths when a config file is provided during automathic plot production.""" # ------------------------------------------------------------------------- @@ -92,80 +135,70 @@ def auto_control_plots( # ------------------------------------------------------------------------- # Define PDF file basename # ------------------------------------------------------------------------- - # Format: l200-p02-{run}-{data_type}; One pdf/log/shelve file for each subsystem - - try: - data_types = ( - [config["dataset"]["type"]] - if isinstance(config["dataset"]["type"], str) - else config["dataset"]["type"] - ) - plt_basename = "{}-{}-".format( - config["dataset"]["experiment"].lower(), - config["dataset"]["period"], - ) - except (KeyError, TypeError): - # means something about dataset is wrong -> print Subsystem.get_data doc - utils.logger.error( - "\033[91mSomething is missing or wrong in your 'dataset' field of the config. You can see the format here under 'dataset=':\033[0m" - ) - utils.logger.info("\033[91m%s\033[0m", subsystem.Subsystem.get_data.__doc__) - return - - user_time_range = utils.get_query_timerange(dataset=config["dataset"]) - # will be returned as None if something is wrong, and print an error message - if not user_time_range: - return - - # create output folders for plots - period_dir = utils.make_output_paths(config, user_time_range) - # get correct time info for subfolder's name - name_time = config["dataset"]["run"] - output_paths = period_dir + name_time + "/" - utils.make_dir(output_paths) - if not output_paths: - return - # we don't care here about the time keyword timestamp/run -> just get the value - plt_basename += name_time - plt_path = output_paths + plt_basename - plt_path += "-{}".format("_".join(data_types)) + # Format: l200-p02-{run}-{data_type}; One pdf/log/shelve file for each subsystem + plt_path = utils.get_output_path(config) # plot - generate_plots(config, plt_path) - - -def generate_plots(config: dict, plt_path: str): - """Generate plots once the config file is set and once we provide the path and name in which store results.""" + generate_plots(config, plt_path, n_files) + + +def generate_plots(config: dict, plt_path: str, n_files=None): + """Generate plots once the config file is set and once we provide the path and name in which store results. n_files specifies if we want to inspect the entire time window (if n_files is not specified), otherwise we subdivide the time window in smaller datasets, each one being composed by n_files files.""" + # no subdivision of data (useful when the inspected time window is short enough) + if n_files is None: + # some output messages, just to warn the user... + if config["saving"] is None: + utils.logger.warning( + "\033[93mData will not be saved, but the pdf will be.\033[0m" + ) + elif config["saving"] == "append": + utils.logger.warning( + "\033[93mYou're going to append new data to already existing data. If not present, you first create the output file as a very first step.\033[0m" + ) + elif config["saving"] == "overwrite": + utils.logger.warning( + "\033[93mYou have accepted to overwrite already generated files, there's no way back until you manually stop the code NOW!\033[0m" + ) + else: + utils.logger.error( + "\033[91mThe selected saving option in the config file is wrong. Try again with 'overwrite', 'append' or nothing!\033[0m" + ) + sys.exit() + # do the plots + make_plots(config, plt_path, config["saving"]) + + # for subdivision of data, let's loop over lists of timestamps, each one of length n_files + else: + # list of datasets to loop over later on + bunches = utils.bunch_dataset(config.copy(), n_files) + + # remove unnecessary keys for precaution - we will replace the time selections with individual timestamps/file keys + config["dataset"].pop("start", None) + config["dataset"].pop("end", None) + config["dataset"].pop("runs", None) + + for idx, bunch in enumerate(bunches): + utils.logger.debug( + f"\33[44mYou are inspecting bunch #{idx+1}/{len(bunches)}...\33[0m" + ) + # if it is the first dataset, just override previous content + if idx == 0: + config["saving"] = "overwrite" + # if we already inspected the first dataset, append the ones coming after + if idx > 0: + config["saving"] = "append" + + # get the dataset + config["dataset"]["timestamps"] = bunch + # make the plots / load data for the dataset of interest + make_plots(config.copy(), plt_path, config["saving"]) + + +def make_plots(config: dict, plt_path: str, saving: str): # ------------------------------------------------------------------------- - # Get pulser first - needed to flag pulser events + # flag events - PULSER # ------------------------------------------------------------------------- - - # get saving option - if "saving" in config: - saving = config["saving"] - else: - saving = None - - # some output messages, just to warn the user... - if saving is None: - utils.logger.warning( - "\033[93mData will not be saed, but the pdf will be.\033[0m" - ) - elif saving == "append": - utils.logger.warning( - "\033[93mYou're going to append new data to already existing data. If not present, you first create the output file as a very first step.\033[0m" - ) - elif saving == "overwrite": - utils.logger.warning( - "\033[93mYou have accepted to overwrite already generated files, there's no way back until you manually stop the code NOW!\033[0m" - ) - else: - utils.logger.error( - "\033[91mThe selected saving option in the config file is wrong. Try again with 'overwrite', 'append' or nothing!\033[0m" - ) - exit() - # put it in a dict, so that later, if pulser is also wanted to be plotted, we don't have to load it twice subsystems = {"pulser": subsystem.Subsystem("pulser", dataset=config["dataset"])} # get list of all parameters needed for all requested plots, if any @@ -176,7 +209,26 @@ def generate_plots(config: dict, plt_path: str): utils.logger.debug(subsystems["pulser"].data) # ------------------------------------------------------------------------- + # flag events - FC baseline + # ------------------------------------------------------------------------- + subsystems["FCbsln"] = subsystem.Subsystem("FCbsln", dataset=config["dataset"]) + parameters = utils.get_all_plot_parameters("FCbsln", config) + subsystems["FCbsln"].get_data(parameters) + # the following 3 lines help to tag FC bsln events that are not in coincidence with a pulser + subsystems["FCbsln"].flag_pulser_events(subsystems["pulser"]) + subsystems["FCbsln"].flag_fcbsln_only_events() + subsystems["FCbsln"].data.drop(columns={"flag_pulser"}) + utils.logger.debug(subsystems["FCbsln"].data) + + # ------------------------------------------------------------------------- + # flag events - muon + # ------------------------------------------------------------------------- + subsystems["muon"] = subsystem.Subsystem("muon", dataset=config["dataset"]) + parameters = utils.get_all_plot_parameters("muon", config) + subsystems["muon"].get_data(parameters) + utils.logger.debug(subsystems["muon"].data) + # ------------------------------------------------------------------------- # What subsystems do we want to plot? subsystems_to_plot = list(config["subsystems"].keys()) @@ -193,9 +245,32 @@ def generate_plots(config: dict, plt_path: str): parameters = utils.get_all_plot_parameters(system, config) # get data for these parameters and dataset range subsystems[system].get_data(parameters) - utils.logger.debug(subsystems[system].data) - # flag pulser events for future parameter data selection - subsystems[system].flag_pulser_events(subsystems["pulser"]) + + # load also aux channel if necessary (FOR ALL SYSTEMS), and add it to the already existing df + for plot in config["subsystems"][system].keys(): + # !!! add if for sipms... + subsystems[system].include_aux( + config["subsystems"][system][plot]["parameters"], + config["dataset"], + config["subsystems"][system][plot], + "pulser01ana", + ) + + utils.logger.debug(subsystems[system].data) + + # ------------------------------------------------------------------------- + # flag events (FOR ALL SYSTEMS) + # ------------------------------------------------------------------------- + # flag pulser events for future parameter data selection + subsystems[system].flag_pulser_events(subsystems["pulser"]) + # flag FC baseline events (not in correspondence with any pulser event) for future parameter data selection + subsystems[system].flag_fcbsln_events(subsystems["FCbsln"]) + # flag muon events for future parameter data selection + subsystems[system].flag_muon_events(subsystems["muon"]) + + # remove timestamps for given detectors (moved here cause otherwise timestamps for flagging don't match) + subsystems[system].remove_timestamps(utils.REMOVE_KEYS) + utils.logger.debug(subsystems[system].data) # ------------------------------------------------------------------------- # make subsystem plots diff --git a/src/legend_data_monitor/cuts.py b/src/legend_data_monitor/cuts.py deleted file mode 100644 index 7cc2b4a..0000000 --- a/src/legend_data_monitor/cuts.py +++ /dev/null @@ -1,28 +0,0 @@ -from . import utils - - -def cut_k_lines(data): - # if we are not plotting "K_events", then there is still the case were the user might want to plot a given parameter (eg. baseline) - # in correspondence ok K line entries. To do this, we go and look at the corresponding energy column. In particular, the energy is decided a priori in 'special-parameters.json' - if utils.SPECIAL_PARAMETERS["K_events"][0] in data.columns: - energy = utils.SPECIAL_PARAMETERS["K_events"][0] - # when we are plotting "K_events", then we already re-named the energy column with the parameter's name (due to how the code was built) - if "K_events" in data.columns: - energy = "K_events" - # if something is not properly working, exit from the code - else: - utils.logger.error( - "\033[91mThe cut over K lines entries is not working. Check again your subsystem options!\033[0m" - ) - exit() - - return data[(data[energy] > 1430) & (data[energy] < 1575)] - - -def apply_cut(data, cut): - cut_function = CUTS[cut] - utils.logger.info("...... applying cut: " + cut) - return cut_function(data) - - -CUTS = {"K lines": cut_k_lines} diff --git a/src/legend_data_monitor/plot_styles.py b/src/legend_data_monitor/plot_styles.py index 7d9ea54..f19feee 100644 --- a/src/legend_data_monitor/plot_styles.py +++ b/src/legend_data_monitor/plot_styles.py @@ -4,15 +4,22 @@ # See mapping user plot structure keywords to corresponding functions in the end of this file + +from datetime import datetime + import numpy as np import pandas as pd from matplotlib.axes import Axes from matplotlib.dates import DateFormatter, date2num, num2date from matplotlib.figure import Figure -from pandas import DataFrame, Timedelta +from pandas import DataFrame, Timedelta, concat from . import utils +# ------------------------------------------------------------------------------- +# single parameter plotting functions +# ------------------------------------------------------------------------------- + def plot_vs_time( data_channel: DataFrame, fig: Figure, ax: Axes, plot_info: dict, color=None @@ -26,6 +33,10 @@ def plot_vs_time( # changing the type of the column itself with the table does not work data_channel = data_channel.sort_values("datetime") + # if you inspect event rate, change the 'resampled' option from 'only' (if so) to 'no' + if plot_info["parameter"] == "event_rate" and plot_info["resampled"] == "only": + plot_info["resampled"] = "no" + res_col = color all_col = ( color @@ -39,6 +50,7 @@ def plot_vs_time( data_channel[plot_info["parameter"]], zorder=0, color=all_col, + linewidth=1, ) # ------------------------------------------------------------------------- @@ -48,6 +60,7 @@ def plot_vs_time( if plot_info["resampled"] != "no": # unless event rate - already resampled and counted in some time window if not plot_info["parameter"] == "event_rate": + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 1 - resampling # resample in given time window, as start pick the first timestamp in table resampled = ( data_channel.set_index("datetime") @@ -71,15 +84,55 @@ def plot_vs_time( linestyle="-", ) + # evaluation of std bands, if enabled + if plot_info["std"] is True: + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 2 - std evaluation + std_data = ( + data_channel.set_index("datetime") + .resample(plot_info["time_window"], origin="start") + .std(numeric_only=True) + ) + std_data = std_data.reset_index() + + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 3 - appending std to the resampled dataframe + std_data = std_data.rename(columns={plot_info["parameter"]: "std"}) + new_dataframe = concat( + [resampled, std_data[["std"]]], ignore_index=False, axis=1 + ) + + ax.fill_between( + resampled["datetime"].dt.to_pydatetime(), + resampled[plot_info["parameter"]] - new_dataframe["std"], + resampled[plot_info["parameter"]] + new_dataframe["std"], + alpha=0.25, + color=res_col, + ) + # ------------------------------------------------------------------------- # beautification # ------------------------------------------------------------------------- + # set range if provided + if plot_info["range"][0] is not None: + ax.set_ylim(ymin=plot_info["range"][0]) + if plot_info["range"][1] is not None: + ax.set_ylim(ymax=plot_info["range"][1]) + + # plot the position of the two K lines + if plot_info["event_type"] == "K_events": + ax.axhline(y=1460.822, color="gray", linestyle="--") + ax.axhline(y=1524.6, color="gray", linestyle="--") + # --- time ticks/labels on x-axis min_x = date2num(data_channel.iloc[0]["datetime"]) max_x = date2num(data_channel.iloc[-1]["datetime"]) time_points = np.linspace(min_x, max_x, 10) - labels = [num2date(time).strftime("%Y\n%m/%d\n%H:%M") for time in time_points] + labels = [ + num2date(time, tz=datetime.now().astimezone().tzinfo).strftime( + "%Y\n%m/%d\n%H:%M" + ) + for time in time_points + ] # set ticks ax.set_xticks(time_points) @@ -87,11 +140,24 @@ def plot_vs_time( # --- set labels fig.supxlabel("UTC Time") - y_label = ( - f"{plot_info['label']}, {plot_info['unit_label']}" - if plot_info["unit_label"] == "%" - else f"{plot_info['label']} [{plot_info['unit_label']}]" - ) + y_label = plot_info["label"] + if plot_info["unit_label"] == "%": + y_label += ", %" + else: + if ( + "(PULS01ANA)" in y_label + or "(PULS01)" in y_label + or "(BSLN01)" in y_label + or "(MUON01)" in y_label + ): + separator = "-" if "-" in y_label else "/" + parts = y_label.split(separator) + + if len(parts) == 2 and separator == "-": + y_label += f" [{plot_info['unit']}]" + else: + y_label += f" [{plot_info['unit']}]" + fig.supylabel(y_label) @@ -138,18 +204,15 @@ def plot_histo( data_channel: DataFrame, fig: Figure, ax: Axes, plot_info: dict, color=None ): # --- histo range - # !! in the future take from par-settings - # needed for cuspEmax because with geant outliers not possible to view normal histo - hrange = {"keV": [0, 2500]} # take full range if not specified x_min = ( - hrange[plot_info["unit"]][0] - if plot_info["unit"] in hrange + plot_info["range"][0] + if plot_info["range"][0] is not None else data_channel[plot_info["parameter"]].min() ) x_max = ( - hrange[plot_info["unit"]][1] - if plot_info["unit"] in hrange + plot_info["range"][1] + if plot_info["range"][1] is not None else data_channel[plot_info["parameter"]].max() ) @@ -158,44 +221,44 @@ def plot_histo( bin_width = bwidth[plot_info["unit"]] if plot_info["unit"] in bwidth else 1 # Compute number of bins - if bin_width: - bin_edges = ( - np.arange(x_min, x_max + bin_width, bin_width / 5) - if plot_info["unit_label"] == "%" - else np.arange(x_min, x_max + bin_width, bin_width) + # sometimes e.g. A/E is always 0.0 => mean = 0 => var = NaN => x_min = NaN => cannot do np.arange + # why arange tho? why not just number of bins (xmax - xmin) / binwidth? + if not np.isnan(x_min): + if bin_width: + bin_edges = ( + np.arange(x_min, x_max + bin_width, bin_width / 5) + if plot_info["unit_label"] == "%" + else np.arange(x_min, x_max + bin_width, bin_width) + ) + # this never happens unless somebody puts 0 in the bwidth dictionary? + else: + bin_edges = 50 + + # ------------------------------------------------------------------------- + # Plot histogram + data_channel[plot_info["parameter"]].plot.hist( + bins=bin_edges, + range=[x_min, x_max], + histtype="step", + linewidth=1.5, + ax=ax, + color=color, ) - else: - bin_edges = 50 # ------------------------------------------------------------------------- - # Plot histogram - data_channel[plot_info["parameter"]].plot.hist( - bins=bin_edges, - range=[x_min, x_max], - histtype="step", - linewidth=1.5, - ax=ax, - color=color, - ) - # ------------------------------------------------------------------------- + # plot the position of the two K lines + if plot_info["event_type"] == "K_events": + ax.axvline(x=1460.822, color="gray", linestyle="--") + ax.axvline(x=1524.6, color="gray", linestyle="--") + ax.set_yscale("log") x_label = ( f"{plot_info['label']}, {plot_info['unit_label']}" if plot_info["unit_label"] == "%" else f"{plot_info['label']} [{plot_info['unit_label']}]" ) - fig.supylabel(x_label) - - # saving x,y data into output files - ch_dict = { - "values": {}, - "mean": "", - "plot_info": plot_info, - "timestamp": {}, - } - - return ch_dict + fig.supxlabel(x_label) def plot_scatter( @@ -211,6 +274,10 @@ def plot_scatter( # edgecolors=color, ) + if plot_info["event_type"] == "K_events": + ax.axhline(y=1460.822, color="gray", linestyle="--") + ax.axhline(y=1524.6, color="gray", linestyle="--") + # --- time ticks/labels on x-axis ax.xaxis.set_major_formatter(DateFormatter("%Y\n%m/%d\n%H:%M")) @@ -222,18 +289,93 @@ def plot_scatter( ) fig.supylabel(y_label) - # saving x,y data into output files - ch_dict = { - "values": {"all": data_channel[plot_info["parameter"]], "resampled": []}, - "mean": "", - "plot_info": plot_info, - "timestamp": { - "all": data_channel["datetime"].dt.to_pydatetime(), - "resampled": [], - }, - } - return ch_dict +# ------------------------------------------------------------------------------- +# multi parameter plotting functions +# ------------------------------------------------------------------------------- + + +def plot_par_vs_par( + data_channel: DataFrame, fig: Figure, ax: Axes, plot_info: dict, color=None +): + par_x = plot_info["parameters"][0] + par_y = plot_info["parameters"][1] + + ax.scatter(data_channel[par_x], data_channel[par_y], color=color) + + labels = [] + for param in plot_info["parameters"]: + # construct label + label = ( + f"{plot_info['label'][param]}, {plot_info['unit_label'][param]}" + if plot_info["unit_label"][param] == "%" + else f"{plot_info['label'][param]} [{plot_info['unit_label'][param]}]" + ) + labels.append(label) + + fig.supxlabel(labels[0]) + fig.supylabel(labels[1]) + + # apply range + # parameter not in range means 1) none was given and defaulted to [None, None], or 2) this parameter was not mentioned in range + # ? cut data before plotting, not after? could be more efficient to plot smaller data sample? + if par_x in plot_info["range"]: + ax.set_xlim(plot_info["range"][par_x]) + if par_y in plot_info["range"]: + ax.set_ylim(plot_info["range"][par_y]) + + +# !!! WORK IN PROGRESS !!! +# hard to test because A/E vs E is weird with huge ranges of strange large and negative values, kills memory with many bins +# will come back to this later after clarifying what A/E makes sense to plot +# def plot_par_vs_par_hist(data_channel: DataFrame, fig: Figure, ax: Axes, plot_info: dict, color=None): +# # Compute number of bins +# # 0 = x, 1 = y +# nbins = []; ranges = [] +# # NaN check +# # anynan = False +# for param in plot_info["parameters"]: +# # range +# par_range = [data_channel[param].min(), data_channel[param].max()] + +# # bin width +# if param == "AoE_Custom": +# bin_width = 0.1 +# # par_range = [0,2] +# elif plot_info["unit"][param] == "keV": +# bin_width = 2.5 +# par_range = [0,3000] # avoid negative values +# else: +# bin_width = 1 # default + + +# # number of bins +# nbins.append( int( (par_range[1] - par_range[0])/bin_width ) ) +# ranges.append(par_range) +# # sometimes e.g. A/E is always 0.0 => mean = 0 => var = NaN => x_min = NaN => cannot plot range [nan, nan] +# # anynan = anynan or np.isnan(nbins[-1]) + +# print(nbins) +# print(ranges) +# # if not anynan: +# h, xedges, yedges, image = ax.hist2d(data_channel[plot_info["parameters"][0]], data_channel[plot_info["parameters"][1]], range=ranges, bins=nbins) + +# labels = [] +# for param in plot_info["parameters"]: +# label = ( +# f"{plot_info['label'][param]}, {plot_info['unit_label'][param]}" +# if plot_info["unit_label"][param] == "%" +# else f"{plot_info['label'][param]} [{plot_info['unit_label'][param]}]" +# ) +# labels.append(label) + +# fig.supxlabel(labels[0]) +# fig.supylabel(labels[1]) + +# del h +# del xedges +# del yedges +# del image # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -335,4 +477,6 @@ def plot_heatmap( "histogram": plot_histo, "scatter": plot_scatter, "heatmap": plot_heatmap, + "par vs par": plot_par_vs_par, + # "par vs par histo": plot_par_vs_par_hist } diff --git a/src/legend_data_monitor/plotting.py b/src/legend_data_monitor/plotting.py index 47c9a57..e006146 100644 --- a/src/legend_data_monitor/plotting.py +++ b/src/legend_data_monitor/plotting.py @@ -1,12 +1,22 @@ import io import shelve +from typing import Union +import matplotlib.patches as mpatches import matplotlib.pyplot as plt +import numpy as np from matplotlib.backends.backend_pdf import PdfPages from pandas import DataFrame from seaborn import color_palette -from . import analysis_data, plot_styles, status_plot, subsystem, utils +from . import ( + analysis_data, + plot_styles, + save_data, + string_visualization, + subsystem, + utils, +) # ------------------------------------------------------------------------- @@ -27,8 +37,10 @@ def make_subsystem_plots( ): pdf = PdfPages(plt_path + "-" + subsystem.type + ".pdf") out_dict = {} + aux_out_dict = {} + aux_ratio_out_dict = {} + aux_diff_out_dict = {} - # for param in subsys.parameters: for plot_title in plots: utils.logger.info( "\33[95m~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\33[0m" @@ -38,11 +50,11 @@ def make_subsystem_plots( "\33[95m~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\33[0m" ) + # ------------------------------------------------------------------------- + # settings checks + # ------------------------------------------------------------------------- + # --- original plot settings provided in json - # - parameter of interest - # - event type all/pulser/phy/Klines - # - variation (bool) - # - time window (for event rate or vs time plot) plot_settings = plots[plot_title] # --- defaults @@ -53,40 +65,123 @@ def make_subsystem_plots( # same, here need to account for unit label % if "variation" not in plot_settings: plot_settings["variation"] = False + # range for parameter + if "range" not in plot_settings: + plot_settings["range"] = [None, None] + # resampling: applies only to vs time plot + if "resampled" not in plot_settings: + plot_settings["resampled"] = None + # status plot requires no plot style option (for now) + if "plot_style" not in plot_settings: + plot_settings["plot_style"] = None + if plot_settings["plot_style"] != "par vs par" and ( + isinstance(plot_settings["parameters"], list) + and len(plot_settings["parameters"]) > 1 + ): + utils.logger.warning( + "\033[93m'%s' is not enabled for multiple parameters. " + + "We switch to the 'par vs par' option.\033[0m", + plot_settings["plot_style"], + ) + plot_settings["plot_style"] = "par vs par" + + # --- additional not in json # add saving info + plot where we save things plot_settings["saving"] = saving plot_settings["plt_path"] = plt_path + # --- checks + # resampled not provided for vs time -> set default + if plot_settings["plot_style"] == "vs time": + if not plot_settings["resampled"]: + plot_settings["resampled"] = "also" + utils.logger.warning( + "\033[93mNo 'resampled' option was specified. Both resampled and all entries will be plotted (otherwise you can try again using the option 'no', 'only', 'also').\033[0m" + ) + # resampled provided for irrelevant plot + elif plot_settings["resampled"]: + utils.logger.warning( + "\033[93mYou're using the option 'resampled' for a plot style that does not need it. For this reason, that option will be ignored.\033[0m" + ) + # ------------------------------------------------------------------------- # set up analysis data # ------------------------------------------------------------------------- # --- AnalysisData: - # - select parameter of interest + # - select parameter(s) of interest # - subselect type of events (pulser/phy/all/klines) + # - apply cuts + # - calculate special parameters if present + # - get channel mean # - calculate variation from mean, if asked + # note: subsystem.data contains: absolute value of a param, the respective value for aux channel (with ratio and diff already computed) data_analysis = analysis_data.AnalysisData( subsystem.data, selection=plot_settings ) - # cuts will be loaded but not applied; for our purposes, need to apply the cuts right away - # currently only K lines cut is used, and only data after cut is plotted -> just replace - data_analysis.data = data_analysis.apply_all_cuts() + # check if the dataframe is empty; if so, skip this parameter + if utils.check_empty_df(data_analysis): + continue utils.logger.debug(data_analysis.data) + # get list of parameters + params = plot_settings["parameters"] + if isinstance(params, str): + params = [params] + + # this is ok for geds, but for spms? maybe another function will be necessary for this???? + # note: this will not do anything in case the parameter is from hit tier + aux_analysis, aux_ratio_analysis, aux_diff_analysis = analysis_data.get_aux_df( + subsystem.data.copy(), params, plot_settings, "pulser01ana" + ) + + # ------------------------------------------------------------------------- + # switch to aux data (if specified in config file) + # ------------------------------------------------------------------------- + # check if the aux objects are not empty + # !!! not handled for spms + if not utils.check_empty_df(aux_ratio_analysis) and not utils.check_empty_df( + aux_diff_analysis + ): + if ( + "AUX_ratio" in plot_settings.keys() + and plot_settings["AUX_ratio"] is True + ): + data_to_plot = aux_ratio_analysis + if "AUX_diff" in plot_settings.keys() and plot_settings["AUX_diff"] is True: + data_to_plot = aux_diff_analysis + if ( + ("AUX_ratio" not in plot_settings and "AUX_diff" not in plot_settings) + or (plot_settings.get("AUX_ratio") is False) + or (plot_settings.get("AUX_diff") is False) + ): + data_to_plot = data_analysis + # if empty, ... + else: + data_to_plot = data_analysis + # ------------------------------------------------------------------------- # set up plot info # ------------------------------------------------------------------------- - # --- color settings using a pre-defined palette + # ------------------------------------------------------------------------- + # color settings using a pre-defined palette + # num colors needed = max number of channels per string # - find number of unique positions in each string # - get maximum occurring - if plot_settings["plot_structure"] == "per cc4": + plot_structure = ( + PLOT_STRUCTURE[plot_settings["plot_structure"]] + if "plot_structure" in plot_settings + else None + ) + + if plot_structure == "per cc4": if ( - data_analysis.data.iloc[0]["cc4_id"] is None - or data_analysis.data.iloc[0]["cc4_channel"] is None + data_to_plot.data.iloc[0]["cc4_id"] is None + or data_to_plot.data.iloc[0]["cc4_channel"] is None ): - if subsystem.type in ["spms", "pulser"]: + if subsystem.type in ["spms", "pulser", "pulser01ana", "bsln"]: utils.logger.error( "\033[91mPlotting per CC4 is not available for %s. Try again!\033[0m", subsystem.type, @@ -99,110 +194,194 @@ def make_subsystem_plots( exit() # ...if cc4 are present, group by them max_ch_per_string = ( - data_analysis.data.groupby("cc4_id")["cc4_channel"].nunique().max() + data_to_plot.data.groupby("cc4_id")["cc4_channel"].nunique().max() ) else: max_ch_per_string = ( - data_analysis.data.groupby("location")["position"].nunique().max() + data_to_plot.data.groupby("location")["position"].nunique().max() ) global COLORS COLORS = color_palette("hls", max_ch_per_string).as_hex() - # --- information needed for plot structure - # ! currently "parameters" is one parameter ! - # subject to change if one day want to plot multiple in one plot + # ------------------------------------------------------------------------- + # basic information needed for plot structure plot_info = { "title": plot_title, "subsystem": subsystem.type, - "locname": {"geds": "string", "spms": "fiber", "pulser": "puls"}[ - subsystem.type - ], - "unit": utils.PLOT_INFO[plot_settings["parameters"]]["unit"], - "plot_style": plot_settings["plot_style"], + "locname": { + "geds": "string", + "spms": "fiber", + "pulser": "puls", + "pulser01ana": "pulser01ana", + "FCbsln": "FC bsln", + "muon": "muon", + }[subsystem.type], } - # information for having the resampled or all entries (needed only for 'vs time' style option) - plot_info["resampled"] = ( - plot_settings["resampled"] if "resampled" in plot_settings else "" - ) + # parameters from plot settings to be simply propagated + plot_info["plot_style"] = plot_settings["plot_style"] + plot_info["time_window"] = plot_settings["time_window"] + plot_info["resampled"] = plot_settings["resampled"] + plot_info["range"] = plot_settings["range"] - if plot_settings["plot_style"] == "vs time": - if plot_info["resampled"] == "": - plot_info["resampled"] = "also" - utils.logger.warning( - "\033[93mNo 'resampled' option was specified. Both resampled and all entries will be plotted (otherwise you can try again using the option 'no', 'only', 'also').\033[0m" - ) - else: - if plot_info["resampled"] != "": - utils.logger.warning( - "\033[93mYou're using the option 'resampled' for a plot style that does not need it. For this reason, that option will be ignored.\033[0m" - ) + # information for shifting the channels or not (not needed only for the 'per channel' structure option) when plotting the std + plot_info["std"] = True if plot_structure == "per channel" else False - # --- information needed for plot style - plot_info["label"] = utils.PLOT_INFO[plot_settings["parameters"]]["label"] - # unit label should be % if variation was asked - plot_info["unit_label"] = ( - "%" if plot_settings["variation"] else plot_info["unit"] - ) - plot_info["cuts"] = plot_settings["cuts"] if "cuts" in plot_settings else "" - # time window might be needed fort he vs time function - plot_info["time_window"] = plot_settings["time_window"] - # threshold values are needed for status map; might be needed for plotting limits on canvas too - if subsystem.type != "pulser": - plot_info["limits"] = ( - utils.PLOT_INFO[plot_settings["parameters"]]["limits"][subsystem.type][ - "variation" - ] - if plot_settings["variation"] - else utils.PLOT_INFO[plot_settings["parameters"]]["limits"][ - subsystem.type - ]["absolute"] + # ------------------------------------------------------------------------- + # information needed for plot style depending on parameters + + # first, treat it like multiple parameters, add dictionary to each entry with values for each parameter + multi_param_info = ["unit", "label", "unit_label", "limits", "event_type"] + for info in multi_param_info: + plot_info[info] = {} + + # name(s) of parameter(s) to plot - always list + plot_info["parameters"] = params + # preserve original param_mean before potentially adding _var to name + plot_info["param_mean"] = [x + "_mean" for x in params] + # add _var if variation asked + if plot_settings["variation"]: + plot_info["parameters"] = [x + "_var" for x in params] + + for param in plot_info["parameters"]: + # plot info should contain final parameter to plot i.e. _var if var is asked + # unit, label and limits are connected to original parameter name + # this is messy AF need to rethink + param_orig = param.rstrip("_var") + plot_info["unit"][param] = utils.PLOT_INFO[param_orig]["unit"] + plot_info["label"][param] = utils.PLOT_INFO[param_orig]["label"] + + # modify the labels in case we perform a ratio/diff with aux channel data + if param_orig in utils.PARAMETER_TIERS.keys(): + if ( + "AUX_ratio" in plot_settings.keys() + and utils.PARAMETER_TIERS[param_orig] != "hit" + ): + if plot_settings["AUX_ratio"] is True: + plot_info["label"][param] += ( + " / " + plot_info["label"][param] + "(PULS01ANA)" + ) + if ( + "AUX_diff" in plot_settings.keys() + and utils.PARAMETER_TIERS[param_orig] != "hit" + ): + if plot_settings["AUX_diff"] is True: + plot_info["label"][param] += ( + " - " + plot_info["label"][param] + "(PULS01ANA)" + ) + + keyword = "variation" if plot_settings["variation"] else "absolute" + plot_info["limits"][param] = ( + utils.PLOT_INFO[param_orig]["limits"][subsystem.type][keyword] + if subsystem.type in utils.PLOT_INFO[param_orig]["limits"].keys() + else [None, None] ) - plot_info["parameter"] = ( - plot_settings["parameters"] + "_var" - if plot_info["unit_label"] == "%" - else plot_settings["parameters"] - ) # could be multiple in the future! - plot_info["param_mean"] = plot_settings["parameters"] + "_mean" + # unit label should be % if variation was asked + plot_info["unit_label"][param] = ( + "%" if plot_settings["variation"] else plot_info["unit"][param_orig] + ) + plot_info["event_type"][param] = plot_settings["event_type"] + + if len(params) == 1: + # change "parameters" to "parameter" - for single-param plotting functions + plot_info["parameter"] = plot_info["parameters"][0] + # now, if it was actually a single parameter, convert {param: value} dict structure to just the value + # this is how one-parameter plotting functions are designed + for info in multi_param_info: + plot_info[info] = plot_info[info][plot_info["parameter"]] + # same for mean + plot_info["param_mean"] = plot_info["param_mean"][0] + + # threshold values are needed for status map; might be needed for plotting limits on canvas too + # only needed for single param plots (for now) + if subsystem.type not in ["pulser", "pulser01ana", "FCbsln", "muon"]: + keyword = "variation" if plot_settings["variation"] else "absolute" + plot_info["limits"] = utils.PLOT_INFO[params[0]]["limits"][ + subsystem.type + ][keyword] + + # needed for grey lines for K lines, in case we are looking at energy itself (not event rate for example) + plot_info["event_type"] = plot_settings["event_type"] # ------------------------------------------------------------------------- - # call chosen plot structure + # call chosen plot structure + plotting # ------------------------------------------------------------------------- - # choose plot function based on user requested structure e.g. per channel or all ch together - plot_structure = PLOT_STRUCTURE[plot_settings["plot_structure"]] - utils.logger.debug("Plot structure: " + plot_settings["plot_structure"]) - - # plotting - plot_structure(data_analysis.data, plot_info, pdf) + if "exposure" in plot_info["parameters"]: + string_visualization.exposure_plot( + subsystem, data_to_plot.data, plot_info, pdf + ) + else: + utils.logger.debug("Plot structure: %s", plot_settings["plot_structure"]) + plot_structure(data_to_plot.data, plot_info, pdf) # For some reason, after some plotting functions the index is set to "channel". - # We need to set it back otherwise status_plot.py gets crazy and everything crashes. - data_analysis.data = data_analysis.data.reset_index() + # We need to set it back otherwise string_visualization.py gets crazy and everything crashes. + data_to_plot.data = data_to_plot.data.reset_index() # ------------------------------------------------------------------------- # saving dataframe + plot info # ------------------------------------------------------------------------- - - par_dict_content = {} - - # saving dataframe data for each parameter - par_dict_content["df_" + plot_info["subsystem"]] = data_analysis.data - par_dict_content["plot_info"] = plot_info + # here we are not checking if we are plotting one or more than one parameter + # the output dataframe and plot_info objects are merged for more than one parameters + # this will be split at a later stage, when building the output dictionary through utils.build_out_dict(...) + + # --- save shelf + # normal geds values (??? do we want the rescaled ones to be saved as shelf?) + par_dict_content = save_data.save_df_and_info(data_analysis.data, plot_info) + # aux values as shelf (necessary to get the right mean) - if not empty + if not utils.check_empty_df(aux_analysis): + aux_plot_info = plot_info.copy() + aux_plot_info["subsystem"] = "pulser01ana" + aux_par_dict_content = save_data.save_df_and_info( + aux_analysis.data, aux_plot_info + ) + if not utils.check_empty_df(aux_ratio_analysis): + aux_ratio_plot_info = plot_info.copy() + aux_ratio_plot_info["subsystem"] = "pulser01anaRatio" + aux_ratio_par_dict_content = save_data.save_df_and_info( + aux_ratio_analysis.data, aux_ratio_plot_info + ) + if not utils.check_empty_df(aux_diff_analysis): + aux_diff_plot_info = plot_info.copy() + aux_diff_plot_info["subsystem"] = "pulser01anaDiff" + aux_diff_par_dict_content = save_data.save_df_and_info( + aux_diff_analysis.data, aux_diff_plot_info + ) + # --- save hdf + save_data.save_hdf( + saving, + plt_path + f"-{subsystem.type}.hdf", + data_analysis, + "pulser01ana", + aux_analysis, + aux_ratio_analysis, + aux_diff_analysis, + plot_info, + ) # ------------------------------------------------------------------------- # call status plot # ------------------------------------------------------------------------- if "status" in plot_settings and plot_settings["status"]: - if subsystem.type == "pulser": + if subsystem.type in ["pulser", "pulser01ana", "FCbsln", "muon"]: utils.logger.debug( - "Thresholds are not enabled for pulser! Use you own eyes to do checks there" + f"Thresholds are not enabled for {subsystem.type}! Use you own eyes to do checks there" ) else: - _ = status_plot.status_plot( - subsystem, data_analysis.data, plot_info, pdf - ) + # take care of one parameter and multiple parameters cases + for param in params: + if len(params) == 1: + _ = string_visualization.status_plot( + subsystem, data_analysis.data, plot_info, pdf + ) + if len(params) > 1: + # retrieved the necessary info for the specific parameter under study (just in the multi-parameters case) + plot_info_param = save_data.get_param_info(param, plot_info) + _ = string_visualization.status_plot( + subsystem, data_analysis.data, plot_info_param, pdf + ) # ------------------------------------------------------------------------- # save results @@ -210,16 +389,47 @@ def make_subsystem_plots( # building a dictionary with dataframe/plot_info to be later stored in a shelve object if saving is not None: - out_dict = utils.build_out_dict( - plot_settings, plot_info, par_dict_content, out_dict, saving, plt_path + out_dict = save_data.build_out_dict( + plot_settings, par_dict_content, out_dict ) + # check if the parameter is a hit or special parameter (still need to include MORE PARAMS case) + params = params[0] + if ( + params in utils.PARAMETER_TIERS.keys() + and utils.PARAMETER_TIERS[params] != "hit" + ) and params not in utils.SPECIAL_PARAMETERS: + # aux data + aux_out_dict = save_data.build_out_dict( + plot_settings, aux_par_dict_content, aux_out_dict + ) + # subsystem data / aux data + aux_ratio_out_dict = save_data.build_out_dict( + plot_settings, aux_ratio_par_dict_content, aux_ratio_out_dict + ) + # subsystem data - aux data + aux_diff_out_dict = save_data.build_out_dict( + plot_settings, aux_diff_par_dict_content, aux_diff_out_dict + ) + # save in shelve object, overwriting the already existing file with new content (either completely new or new bunches) if saving is not None: out_file = shelve.open(plt_path + f"-{subsystem.type}") out_file["monitoring"] = out_dict out_file.close() + aux_out_file = shelve.open(plt_path + "-pulser01ana") + aux_out_file["monitoring"] = aux_out_dict + aux_out_file.close() + + aux_ratio_out_file = shelve.open(plt_path + "-pulser01anaRatio") + aux_ratio_out_file["monitoring"] = aux_ratio_out_dict + aux_ratio_out_file.close() + + aux_diff_out_file = shelve.open(plt_path + "-pulser01anaDiff") + aux_diff_out_file["monitoring"] = aux_diff_out_dict + aux_diff_out_file.close() + # save in pdf object pdf.close() @@ -283,27 +493,35 @@ def plot_per_ch(data_analysis: DataFrame, plot_info: dict, pdf: PdfPages): COLORS = color_palette("hls", max_ch_per_string).as_hex() # plot selected style on this axis - _ = plot_style( - data_channel, fig, axes[ax_idx], plot_info, color=COLORS[ax_idx] - ) + plot_style(data_channel, fig, axes[ax_idx], plot_info, color=COLORS[ax_idx]) - # --- add summary to axis + # --- add summary to axis - only for single channel plots # name, position and mean are unique for each channel - take first value - t = data_channel.iloc[0][ - ["channel", "position", "name", plot_info["param_mean"]] - ] - - text = ( - t["name"] - + "\n" - + f"channel {t['channel']}\n" - + f"position {t['position']}\n" - + ( - f"mean {round(t[plot_info['param_mean']],3)} [{plot_info['unit']}]" - if t[plot_info["param_mean"]] is not None + df_text = data_channel.iloc[0][["channel", "position", "name"]] + text = df_text["name"] + "\n" + f"channel {df_text['channel']}\n" + text += ( + f"position {df_text['position']}" + if plot_info["subsystem"] + not in ["pulser", "pulser01ana", "FCbsln", "muon"] + else "" + ) + if len(plot_info["parameters"]) == 1: + # in case of 1 parameter, "param mean" entry is a single string param_mean + # in case of > 1, it's a list of parameters -> ignore for now and plot mean only for 1 param case + par_mean = data_channel.iloc[0][ + plot_info["param_mean"] + ] # single number + if plot_info["parameter"] != "event_rate": + fwhm_ch = ( + 0 # get_fwhm_for_fixed_ch(data_channel, plot_info["parameter"]) + ) + text += f"\nFWHM {fwhm_ch}" if fwhm_ch != 0 else "" + + text += "\n" + ( + f"mean {round(par_mean,3)} [{plot_info['unit']}]" + if par_mean is not None else "" ) # handle with care mean='None' situations - ) axes[ax_idx].text(1.01, 0.5, text, transform=axes[ax_idx].transAxes) # add grid @@ -312,14 +530,15 @@ def plot_per_ch(data_analysis: DataFrame, plot_info: dict, pdf: PdfPages): # remove automatic y label since there will be a shared one axes[ax_idx].set_ylabel("") - # plot line at 0% for variation - if plot_info["unit_label"] == "%": - axes[ax_idx].axhline(y=0, color="gray", linestyle="--") + # plot limits + # check if "limits" present, is not for pulser (otherwise crash when plotting e.g. event rate) + if "limits" in plot_info: + plot_limits(axes[ax_idx], plot_info["parameters"], plot_info["limits"]) ax_idx += 1 # ------------------------------------------------------------------------------- - if plot_info["subsystem"] == "pulser": + if plot_info["subsystem"] in ["pulser", "pulser01ana", "FCbsln", "muon"]: y_title = 1.05 axes[0].set_title("") else: @@ -327,18 +546,16 @@ def plot_per_ch(data_analysis: DataFrame, plot_info: dict, pdf: PdfPages): axes[0].set_title(f"{plot_info['locname']} {location}") fig.suptitle(f"{plot_info['subsystem']} - {plot_info['title']}", y=y_title) - if pdf: - plt.savefig(pdf, format="pdf", bbox_inches="tight") - # figures are retained until explicitly closed; close to not consume too much memory - plt.close() + save_pdf(plt, pdf) return fig def plot_per_cc4(data_analysis: DataFrame, plot_info: dict, pdf: PdfPages): - if plot_info["subsystem"] == "pulser": + if plot_info["subsystem"] in ["pulser", "pulser01ana", "FCbsln", "muon"]: utils.logger.error( - "\033[91mPlotting per CC4 is not available for the pulser channel.\nTry again with a different plot structure!\033[0m" + "\033[91mPlotting per CC4 is not available for %s channel.\nTry again with a different plot structure!\033[0m", + plot_info["subsystem"], ) exit() # --- choose plot function based on user requested style e.g. vs time or histogram @@ -364,9 +581,9 @@ def plot_per_cc4(data_analysis: DataFrame, plot_info: dict, pdf: PdfPages): ["name", "position", "location", "cc4_channel", "cc4_id"] ] labels["channel"] = labels.index - labels["label"] = labels[ - ["location", "position", "channel", "name", "cc4_channel"] - ].apply(lambda x: f"s{x[0]}-p{x[1]}-ch{str(x[2]).zfill(3)}-{x[3]}-{x[4]}", axis=1) + labels["label"] = labels[["location", "position", "name", "cc4_channel"]].apply( + lambda x: f"s{x[0]}-p{x[1]}-{x[2]}-cc4 ch.{x[3]}", axis=1 + ) # put it in the table data_analysis = data_analysis.set_index("channel") data_analysis["label"] = labels["label"] @@ -390,9 +607,20 @@ def plot_per_cc4(data_analysis: DataFrame, plot_info: dict, pdf: PdfPages): labels = [] for label, data_channel in data_cc4_id.groupby("label"): cc4_channel = (label.split("-"))[-1] - utils.logger.debug(f"...... channel {cc4_channel}") - _ = plot_style(data_channel, fig, axes[ax_idx], plot_info, COLORS[col_idx]) + utils.logger.debug(f"...... {cc4_channel}") + plot_style(data_channel, fig, axes[ax_idx], plot_info, COLORS[col_idx]) + labels.append(label) + if len(plot_info["parameters"]) == 1: + if plot_info["parameter"] != "event_rate": + fwhm_ch = ( + 0 # get_fwhm_for_fixed_ch(data_channel, plot_info["parameter"]) + ) + labels[-1] = ( + label + f" - FWHM: {fwhm_ch}" if fwhm_ch != 0 else label + ) + else: + labels[-1] = label col_idx += 1 # add grid @@ -403,24 +631,21 @@ def plot_per_cc4(data_analysis: DataFrame, plot_info: dict, pdf: PdfPages): axes[ax_idx].set_ylabel("") axes[ax_idx].legend(labels=labels, loc="center left", bbox_to_anchor=(1, 0.5)) - # plot the position of the two K lines - if plot_info["parameter"] == "K_events": - axes[ax_idx].axhline(y=1460.822, color="gray", linestyle="--") - axes[ax_idx].axhline(y=1524.6, color="gray", linestyle="--") + # plot limits + # check if "limits" present, is not for pulser (otherwise crash when plotting e.g. event rate) + if "limits" in plot_info: + plot_limits(axes[ax_idx], plot_info["parameters"], plot_info["limits"]) - # plot line at 0% for variation - if plot_info["unit_label"] == "%": - axes[ax_idx].axhline(y=0, color="gray", linestyle="--") ax_idx += 1 # ------------------------------------------------------------------------------- - y_title = 1.05 if plot_info["subsystem"] == "pulser" else 1.01 + y_title = ( + 1.05 + if plot_info["subsystem"] in ["pulser", "pulser01ana", "FCbsln", "muon"] + else 1.01 + ) fig.suptitle(f"{plot_info['subsystem']} - {plot_info['title']}", y=y_title) - # if no pdf is specified, then the function is not being called by make_subsystem_plots() - if pdf: - plt.savefig(pdf, format="pdf", bbox_inches="tight") - # figures are retained until explicitly closed; close to not consume too much memory - plt.close() + save_pdf(plt, pdf) return fig @@ -481,8 +706,18 @@ def plot_per_string(data_analysis: DataFrame, plot_info: dict, pdf: PdfPages): col_idx = 0 labels = [] for label, data_channel in data_location.groupby("label"): - _ = plot_style(data_channel, fig, axes[ax_idx], plot_info, COLORS[col_idx]) + plot_style(data_channel, fig, axes[ax_idx], plot_info, COLORS[col_idx]) labels.append(label) + if len(plot_info["parameters"]) == 1: + if plot_info["parameter"] != "event_rate": + fwhm_ch = ( + 0 # get_fwhm_for_fixed_ch(data_channel, plot_info["parameter"]) + ) + labels[-1] = ( + label + f" - FWHM: {fwhm_ch}" if fwhm_ch != 0 else label + ) + else: + labels[-1] = label col_idx += 1 # add grid @@ -493,25 +728,22 @@ def plot_per_string(data_analysis: DataFrame, plot_info: dict, pdf: PdfPages): axes[ax_idx].set_ylabel("") axes[ax_idx].legend(labels=labels, loc="center left", bbox_to_anchor=(1, 0.5)) - # plot the position of the two K lines - if plot_info["parameter"] == "K_events": - axes[ax_idx].axhline(y=1460.822, color="gray", linestyle="--") - axes[ax_idx].axhline(y=1524.6, color="gray", linestyle="--") + # plot limits + # check if "limits" present, is not for pulser (otherwise crash when plotting e.g. event rate) + if "limits" in plot_info: + plot_limits(axes[ax_idx], plot_info["parameters"], plot_info["limits"]) - # plot line at 0% for variation - if plot_info["unit_label"] == "%": - axes[ax_idx].axhline(y=0, color="gray", linestyle="--") ax_idx += 1 # ------------------------------------------------------------------------------- - y_title = 1.05 if plot_info["subsystem"] == "pulser" else 1.01 + y_title = ( + 1.05 + if plot_info["subsystem"] in ["pulser", "pulser01ana", "FCbsln", "muon"] + else 1.01 + ) fig.suptitle(f"{plot_info['subsystem']} - {plot_info['title']}", y=y_title) - # if no pdf is specified, then the function is not being called by make_subsystem_plots() - if pdf: - plt.savefig(pdf, format="pdf", bbox_inches="tight") - # figures are retained until explicitly closed; close to not consume too much memory - plt.close() + save_pdf(plt, pdf) return fig @@ -523,8 +755,6 @@ def plot_array(data_analysis: DataFrame, plot_info: dict, pdf: PdfPages): ) exit() - import matplotlib.patches as mpatches - # --- choose plot function based on user requested style plot_style = plot_styles.PLOT_STYLE[plot_info["plot_style"]] utils.logger.debug("Plot style: " + plot_info["plot_style"]) @@ -585,28 +815,32 @@ def plot_array(data_analysis: DataFrame, plot_info: dict, pdf: PdfPages): labels.append(label.split("-")[-1]) channels.append(map_dict[str(location)][str(position)]) - values_per_string.append(data_channel[plot_info["parameter"]].unique()[0]) - channels_per_string.append(map_dict[str(location)][str(position)]) - - # get average of plotted parameter per string (print horizontal line) - avg_of_string = sum(values_per_string) / len(values_per_string) - axes.hlines( - y=avg_of_string, - xmin=min(channels_per_string), - xmax=max(channels_per_string), - color="k", - linestyle="-", - linewidth=1, - ) - utils.logger.debug(f"..... average: {round(avg_of_string, 2)}") + if len(plot_info["parameters"]) == 1: + values_per_string.append( + data_channel[plot_info["parameter"]].unique()[0] + ) + channels_per_string.append(map_dict[str(location)][str(position)]) + + if len(plot_info["parameters"]) == 1: + # get average of plotted parameter per string (print horizontal line) + avg_of_string = sum(values_per_string) / len(values_per_string) + axes.hlines( + y=avg_of_string, + xmin=min(channels_per_string), + xmax=max(channels_per_string), + color="k", + linestyle="-", + linewidth=1, + ) + utils.logger.debug(f"..... average: {round(avg_of_string, 2)}") - # get legend entry (print string + colour) - legend.append( - mpatches.Patch( - color=COLORS[col_idx], - label=f"s{location} - avg: {round(avg_of_string, 2)} {plot_info['unit_label']}", + # get legend entry (print string + colour) + legend.append( + mpatches.Patch( + color=COLORS[col_idx], + label=f"s{location} - avg: {round(avg_of_string, 2)} {plot_info['unit_label']}", + ) ) - ) # LAST thing to update col_idx += 1 @@ -637,14 +871,9 @@ def plot_array(data_analysis: DataFrame, plot_info: dict, pdf: PdfPages): fig.supxlabel("") fig.suptitle(f"{plot_info['subsystem']} - {plot_info['title']}", y=1.05) - # ------------------------------------------------------------------------------- - # if no pdf is specified, then the function is not being called by make_subsystem_plots() - if pdf: - plt.savefig(pdf, format="pdf", bbox_inches="tight") - # figures are retained until explicitly closed; close to not consume too much memory - plt.close() + save_pdf(plt, pdf) - # return fig + return fig # ------------------------------------------------------------------------------- @@ -663,7 +892,6 @@ def plot_per_fiber_and_barrel(data_analysis: DataFrame, plot_info: dict, pdf: Pd # - each figure has subplots with N columns and M rows where N is the number of fibers, and M is the number of positions (top/bottom -> 2) # this function will only work for SiPMs requiring a columns 'barrel' in the channel map # add a check in config settings check to make sure geds are not called with this structure to avoid crash - pass # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -762,13 +990,13 @@ def plot_per_barrel_and_position( det_idx += 1 continue - ch_dict = plot_style( + plot_style( data_position, fig, axes, plot_info, color=COLORS[det_idx] ) labels.append(data_position["label"]) if channel[det_idx] not in par_dict.keys(): - par_dict[channel[det_idx]] = ch_dict + par_dict[channel[det_idx]] = {} # set label as title for each axes text = ( @@ -804,6 +1032,63 @@ def plot_per_barrel_and_position( return par_dict +# ------------------------------------------------------------------------------- +# plotting functions +# ------------------------------------------------------------------------------- + + +def get_fwhm_for_fixed_ch(data_channel: DataFrame, parameter: str) -> float: + """Calculate the FWHM of a given parameter for a given channel.""" + entries = data_channel[parameter] + entries_avg = np.mean(entries) + fwhm_ch = 2.355 * np.sqrt(np.mean(np.square(entries - entries_avg))) + + if fwhm_ch != 0: + # Determine the number of decimal places based on the magnitude of the value + decimal_places = max(0, int(-np.floor(np.log10(abs(fwhm_ch)))) + 2) + # Format the FWHM value with the appropriate number of decimal places + formatted_fwhm = "{:.{dp}f}".format(fwhm_ch, dp=decimal_places) + # Remove trailing zeros from the formatted value + formatted_fwhm = formatted_fwhm.rstrip("0").rstrip(".") + + return formatted_fwhm + else: + return 0 + + +def plot_limits(ax: plt.Axes, params: list, limits: Union[list, dict]): + """Plot limits (if present) on the plot. The multi-params case is carefully handled.""" + # one parameter case + if (isinstance(params, list) and len(params) == 1) or isinstance(params, str): + if not all([x is None for x in limits]): + if limits[0] is not None: + ax.axhline(y=limits[0], color="red", linestyle="--") + if limits[1] is not None: + ax.axhline(y=limits[1], color="red", linestyle="--") + # multi-parameters case + else: + for idx, param in enumerate(params): + limits_param = limits[param] + if not all([x is None for x in limits_param]): + if limits_param[0] is not None: + if idx == 0: + ax.axvline(x=limits_param[0], color="red", linestyle="--") + if idx == 1: + ax.axhline(y=limits_param[0], color="red", linestyle="--") + if limits_param[1] is not None: + if idx == 0: + ax.axvline(x=limits_param[1], color="red", linestyle="--") + if idx == 1: + ax.axhline(y=limits_param[1], color="red", linestyle="--") + + +def save_pdf(plt, pdf: PdfPages): + """Save the plot to a PDF file. The plot is closed after save_data.""" + if pdf: + plt.savefig(pdf, format="pdf", bbox_inches="tight") + plt.close() + + # ------------------------------------------------------------------------------- # mapping user keywords to plot style functions # ------------------------------------------------------------------------------- diff --git a/src/legend_data_monitor/run.py b/src/legend_data_monitor/run.py index f722eed..2fd47fd 100644 --- a/src/legend_data_monitor/run.py +++ b/src/legend_data_monitor/run.py @@ -17,6 +17,7 @@ def main(): $ legend-data-monitor --help # help section Example JSON configuration file: + .. code-block:: json { "dataset": { @@ -66,7 +67,9 @@ def main(): subparsers = parser.add_subparsers() # functions for different purpouses + add_user_scdb(subparsers) add_user_config_parser(subparsers) + add_user_bunch_parser(subparsers) add_user_rsync_parser(subparsers) add_auto_prod_parser(subparsers) @@ -85,6 +88,40 @@ def main(): args.func(args) +def add_user_scdb(subparsers): + """Configure :func:`.core.control_plots` command line interface.""" + parser_auto_prod = subparsers.add_parser( + "user_scdb", + description="""Retrieve Slow Control data from database by giving a full config file with parameters/subsystems info to plot. Available only when working in LNGS machines.""", + ) + parser_auto_prod.add_argument( + "--config", + help="""Path to config file (e.g. \"some_path/config_L200_r001_phy.json\").""", + ) + parser_auto_prod.add_argument( + "--port", + help="""Local port.""", + ) + parser_auto_prod.add_argument( + "--pswd", + help="""Password to get access to the Slow Control database (check on Confluence).""", + ) + parser_auto_prod.set_defaults(func=user_scdb_cli) + + +def user_scdb_cli(args): + """Pass command line arguments to :func:`.core.retrieve_scdb`.""" + # get the path to the user config file + config_file = args.config + # get the local port + port = args.port + # get the password to the SC database + password = args.pswd + + # start loading data + legend_data_monitor.core.retrieve_scdb(config_file, port, password) + + def add_user_config_parser(subparsers): """Configure :func:`.core.control_plots` command line interface.""" parser_auto_prod = subparsers.add_parser( @@ -107,6 +144,34 @@ def user_config_cli(args): legend_data_monitor.core.control_plots(config_file) +def add_user_bunch_parser(subparsers): + """Configure :func:`.core.control_plots` command line interface.""" + parser_auto_prod = subparsers.add_parser( + "user_bunch", + description="""Inspect LEGEND HDF5 (LH5) processed data by giving a full config file with parameters/subsystems info to plot. Files will be bunched in groups of n_files files each, and every time the code is run you will append new data to the previously generated ones.""", + ) + parser_auto_prod.add_argument( + "--config", + help="""Path to config file (e.g. \"some_path/config_L200_r001_phy.json\").""", + ) + parser_auto_prod.add_argument( + "--n_files", + help="""Number (int) of files of a given run you want to inspect at each cycle.""", + ) + parser_auto_prod.set_defaults(func=user_bunch_cli) + + +def user_bunch_cli(args): + """Pass command line arguments to :func:`.core.control_plots`.""" + # get the path to the user config file + config_file = args.config + # get the number of files for each cycle + n_files = args.n_files + + # start loading data & generating plots + legend_data_monitor.core.control_plots(config_file, n_files) + + def add_user_rsync_parser(subparsers): """Configure :func:`.core.control_rsync_plots` command line interface.""" parser_auto_prod = subparsers.add_parser( diff --git a/src/legend_data_monitor/save_data.py b/src/legend_data_monitor/save_data.py new file mode 100644 index 0000000..06392d1 --- /dev/null +++ b/src/legend_data_monitor/save_data.py @@ -0,0 +1,757 @@ +import os +import shelve + +import h5py +from pandas import DataFrame, concat, read_hdf + +from . import analysis_data, utils + +# ------------------------------------------------------------------------- +# Saving related functions +# ------------------------------------------------------------------------- + + +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# SHELVE OBJECTS +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + +def save_df_and_info(df: DataFrame, plot_info: dict) -> dict: + """Return a dictionary containing a dataframe for the parameter(s) under study for a given subsystem. The plotting info are saved too.""" + columns_to_drop = [ + "name", + "location", + "position", + "cc4_channel", + "cc4_id", + "status", + "det_type", + "flag_muon", + "flag_pulser", + "flag_fc_bsln", + "daq_crate", + "daq_card", + "HV_card", + "HV_channel", + ] + columns_existing = [col for col in columns_to_drop if col in df.copy().columns] + + if columns_existing: + df = df.drop(columns=columns_existing) + + par_dict_content = { + "df_" + plot_info["subsystem"]: df, # saving dataframe + "plot_info": plot_info, # saving plotting info + } + + return par_dict_content + + +def build_out_dict( + plot_settings: list, + par_dict_content: dict, + out_dict: dict, +): + """ + Build the output dictionary based on the input 'saving' option. + + Parameters + ---------- + plot_settings + Dictionary with settings for plotting. It contains the following keys: 'parameters', 'event_type', 'plot_structure', 'resampled', 'plot_style', 'variation', 'time_window', 'range', 'saving', 'plt_path' + par_dict_content + Dictionary containing, for a given parameter, the dataframe with data and a dictionary with info for plotting (e.g. plot style, title, units, labels, ...) + out_dict + Dictionary that is returned, containing the objects that need to be saved. + """ + saving = plot_settings["saving"] if "saving" in plot_settings.keys() else None + plt_path = plot_settings["plt_path"] if "plt_path" in plot_settings.keys() else None + plot_info = par_dict_content["plot_info"] + + # we overwrite the object with a new one + if saving == "overwrite": + out_dict = build_dict(plot_settings, plot_info, par_dict_content, out_dict) + + # we retrieve the already existing shelve object, and we append new things to it; the parameter here is fixed + if saving == "append": + # the file does not exist, so we create it + if not os.path.exists(plt_path + "-" + plot_info["subsystem"] + ".dat"): + out_dict = build_dict(plot_settings, plot_info, par_dict_content, out_dict) + + # the file exists, so we are going to append data + else: + utils.logger.info( + "There is already a file containing output data. Appending new data to it right now..." + ) + # open already existing shelve file + with shelve.open(plt_path + "-" + plot_info["subsystem"], "r") as shelf: + old_dict = dict(shelf) + + # one parameter case + if ( + isinstance(plot_settings["parameters"], list) + and len(plot_settings["parameters"]) == 1 + ) or isinstance(plot_settings["parameters"], str): + utils.logger.debug("... appending new data for the one-parameter case") + out_dict = append_new_data( + plot_settings["parameters"][0] + if isinstance(plot_settings["parameters"], list) + else plot_settings["parameters"], + plot_settings, + plot_info, + old_dict, + par_dict_content, + plt_path, + ) + # multi-parameters case + if ( + isinstance(plot_settings["parameters"], list) + and len(plot_settings["parameters"]) > 1 + ): + utils.logger.debug( + "... appending new data for the multi-parameters case" + ) + for param in plot_settings["parameters"]: + out_dict = append_new_data( + param, + plot_settings, + plot_info, + old_dict, + par_dict_content, + plt_path, + ) + + return out_dict + + +def build_dict( + plot_settings: list, plot_info: list, par_dict_content: dict, out_dict: dict +) -> dict: + """Create a dictionary with the correct format for being saved in the final shelve object.""" + # get the parameters under study (can be one, can be more for 'par vs par' plot style) + params = ( + plot_info["parameters"] + if "parameters" in plot_info.keys() + else plot_info["parameter"] + ) + + # one parameter + if (isinstance(params, list) and len(params) == 1) or isinstance(params, str): + utils.logger.debug("Building the output dictionary in the one-parameter case") + if isinstance(params, list): + param = params[0] + if isinstance(params, str): + param = params + parameter = param.split("_var")[0] if "_var" in param else param + par_dict_content["plot_info"] = get_param_info( + param, par_dict_content["plot_info"] + ) + # --- building up the output dictionary + # event type key is already there + if plot_settings["event_type"] in out_dict.keys(): + out_dict[plot_settings["event_type"]][parameter] = par_dict_content + # event type key is NOT there + else: + # empty dictionary (not filled yet) + if len(out_dict.keys()) == 0: + out_dict = {plot_settings["event_type"]: {parameter: par_dict_content}} + # the dictionary already contains something (but for another event type selection) + else: + out_dict[plot_settings["event_type"]] = {parameter: par_dict_content} + # more than one parameter + if isinstance(params, list) and len(params) > 1: + utils.logger.debug( + "Building the output dictionary in the multi-parameters case" + ) + # we have to polish our dataframe and plot_info dictionary from other parameters... + # --- original plot info + # ::::::::::::::::::::::::::::::::::::::::::: example 'plot_info_all' ::::::::::::::::::::::::::::::::::::::::::: + # {'title': 'Plotting cuspEmax vs baseline', 'subsystem': 'geds', 'locname': 'string', + # 'plot_style': 'par vs par', 'time_window': '10T', 'resampled': 'no', 'range': [None, None], 'std': False, + # 'unit': {'cuspEmax_var': 'ADC', 'baseline_var': 'ADC'}, + # 'label': {'cuspEmax_var': 'cuspEmax', 'baseline_var': 'FPGA baseline'}, + # 'unit_label': {'cuspEmax_var': '%', 'baseline_var': '%'}, + # 'limits': {'cuspEmax_var': [-0.025, 0.025], 'baseline_var': [-5, 5]}, + # 'parameters': ['cuspEmax_var', 'baseline_var'], + # 'param_mean': ['cuspEmax_mean', 'baseline_mean']} + plot_info_all = par_dict_content["plot_info"] + + # --- original dataframes coming from the analysis + df_all = par_dict_content["df_" + plot_info_all["subsystem"]] + + for param in params: + parameter = param.split("_var")[0] if "_var" in param else param + + # --- cleaned plot info + # ::::::::::::::::::::::::::::::::::::::::::: example 'plot_info_param' ::::::::::::::::::::::::::::::::::::::::::: + # {'title': 'Prove in corso', 'subsystem': 'geds', 'locname': 'string', 'plot_style': 'par vs par', 'time_window': '10T', + # 'resampled': 'no', 'range': [None, None], 'std': False, 'unit': 'ADC', 'label': 'cuspEmax', 'unit_label': '%', + # 'limits': [-0.025, 0.025], 'param_mean': 'cuspEmax_mean', 'parameter': 'cuspEmax_var', 'variation': True} + plot_info_param = get_param_info(param, plot_info_all) + + # --- cleaned df + df_param = get_param_df(parameter, df_all) + + # --- rebuilding the 'par_dict_content' for the parameter under study + par_dict_content = save_df_and_info(df_param, plot_info_param) + + # --- building up the output dictionary + # event type key is already there + if plot_settings["event_type"] in out_dict.keys(): + out_dict[plot_settings["event_type"]][parameter] = par_dict_content + # event type key is NOT there + else: + # empty dictionary (not filled yet) + if len(out_dict.keys()) == 0: + out_dict = { + plot_settings["event_type"]: {parameter: par_dict_content} + } + # the dictionary already contains something (but for another event type selection) + else: + out_dict[plot_settings["event_type"]] = { + parameter: par_dict_content + } + + return out_dict + + +def append_new_data( + param: str, + plot_settings: dict, + plot_info: dict, + old_dict: dict, + par_dict_content: dict, + plt_path: str, +) -> dict: + # the parameter is there + parameter = param.split("_var")[0] if "_var" in param else param + event_type = plot_settings["event_type"] + + utils.logger.info("\33[95m**************************************************\33[0m") + utils.logger.info(f"\33[95m*** S A V I N G : {plot_info['subsystem']}\33[0m") + utils.logger.info("\33[95m**************************************************\33[0m") + + if old_dict["monitoring"][event_type][parameter]: + # get already present df + old_df = old_dict["monitoring"][event_type][parameter][ + "df_" + plot_info["subsystem"] + ].copy() + old_df = check_level0(old_df) + + # get new df (plot_info object is the same as before, no need to get it and update it) + new_df = par_dict_content["df_" + plot_info["subsystem"]].copy() + # --- cleaned df + new_df = get_param_df(parameter, new_df) + + # --- we have to copy the new means in the old one, otherwise we end up with two values (consider they have different lengths!) + # Create a dictionary mapping 'channel' values to 'parameter_mean' values from new_df + mean_dict = new_df.set_index("channel")[parameter + "_mean"].to_dict() + # Update 'parameter_mean' values in old_df based on the dictionary mapping + old_df[parameter + "_mean"] = ( + old_df["channel"].map(mean_dict).fillna(old_df[parameter + "_mean"]) + ) + + # we have to re-calculate the % variations based on the new mean values (new-df is ok, but old_df isn't!) + old_df = old_df.drop(columns={parameter + "_var"}) + old_df[parameter + "_var"] = ( + old_df[parameter] / old_df[parameter + "_mean"] - 1 + ) * 100 + old_df = old_df.reset_index(drop=True) + + # concatenate the two dfs (channels are no more grouped; not a problem) + merged_df = DataFrame.empty + merged_df = concat([old_df, new_df], ignore_index=True, axis=0) + merged_df = merged_df.reset_index(drop=True) + # re-order content in order of channels/timestamps + merged_df = merged_df.sort_values(["channel", "datetime"]) + + # redefine the dict containing the df and plot_info + par_dict_content = {} + par_dict_content["df_" + plot_info["subsystem"]] = merged_df + par_dict_content["plot_info"] = plot_info + + # saved the merged df as usual (but for the given parameter) + plot_info = get_param_info(param, plot_info) + out_dict = build_dict( + plot_settings, plot_info, par_dict_content, old_dict["monitoring"] + ) + + # we need to save it, otherwise when looping over the next parameter we lose the appended info for the already inspected parameter + out_file = shelve.open(plt_path + "-" + plot_info["subsystem"]) + out_file["monitoring"] = out_dict + out_file.close() + + return out_dict + + +def check_level0(dataframe: DataFrame) -> DataFrame: + """Check if a dataframe contains the 'level_0' column. If so, remove it.""" + if "level_0" in dataframe.columns: + return dataframe.drop(columns=["level_0"]) + else: + return dataframe + + +def get_param_info(param: str, plot_info: dict) -> dict: + """Subselect from 'plot_info' the plotting info for the specified parameter ```param```. This is needed for the multi-parameters case.""" + # get the *naked* parameter name and apply some if statements to avoid problems + param = param + "_var" if "_var" not in param else param + parameter = param.split("_var")[0] + + # but what if there is no % variation? We don't want any "_var" in our parameters! + if ( + isinstance(plot_info["unit_label"], dict) + and param not in plot_info["unit_label"].keys() + ): + if plot_info["unit_label"][parameter] != "%": + param = parameter + if isinstance(plot_info["unit_label"], str): + if plot_info["unit_label"] != "%": + param = parameter + + # re-shape the plot_info dictionary for the given parameter under study + plot_info_param = plot_info.copy() + plot_info_param["title"] = f"Plotting {param}" + plot_info_param["unit"] = ( + plot_info["unit"][param] + if isinstance(plot_info["unit"], dict) + else plot_info["unit"] + ) + plot_info_param["label"] = ( + plot_info["label"][param] + if isinstance(plot_info["label"], dict) + else plot_info["label"] + ) + plot_info_param["unit_label"] = ( + plot_info["unit_label"][param] + if isinstance(plot_info["unit_label"], dict) + else plot_info["unit_label"] + ) + plot_info_param["limits"] = ( + plot_info["limits"][param] + if isinstance(plot_info["limits"], dict) + else plot_info["limits"] + ) + plot_info_param["event_type"] = ( + plot_info["event_type"][param] + if isinstance(plot_info["event_type"], dict) + else plot_info["event_type"] + ) + plot_info_param["param_mean"] = parameter + "_mean" + plot_info_param["variation"] = ( + True if plot_info_param["unit_label"] == "%" else False + ) + plot_info_param["parameters"] = ( + param if plot_info_param["variation"] is True else parameter + ) + + # ... need to go back to the one parameter case ... + # if "parameters" in plot_info_param.keys(): + # plot_info_param["parameter"] = plot_info_param.pop("parameters") + + return plot_info_param + + +def get_param_df(parameter: str, df: DataFrame) -> DataFrame: + """Subselect from 'df' only the dataframe columns that refer to a given parameter. The case of 'parameter' being a special parameter is carefully handled.""" + # list needed to better divide the parameters stored in the dataframe... + keep_cols = [ + "index", + "channel", + "HV_card", + "HV_channel", + "cc4_channel", + "cc4_id", + "daq_card", + "daq_crate", + "datetime", + "det_type", + "flag_fc_bsln", + "flag_muon", + "flag_pulser", + "location", + "name", + "position", + "status", + ] + df_param = df.copy().drop(columns={x for x in df.columns if parameter not in x}) + df_cols = df.copy().drop(columns={x for x in df.columns if x not in keep_cols}) + + # check if the parameter belongs to a special one + if parameter in utils.SPECIAL_PARAMETERS: + # get the other columns to keep in the new dataframe + other_cols_to_keep = utils.SPECIAL_PARAMETERS[parameter] + # initialize an empty dataframe + df_other_cols = DataFrame() + # we might want to load one or more special columns + # (of course, avoid to load columns if the special parameter does not request any special parameter, + # eg event rate or exposure are not build on the basis of any other parameter) + + # + one column only + if isinstance(other_cols_to_keep, str) and other_cols_to_keep is not None: + df_other_cols = df.copy().drop( + columns={x for x in df.columns if x != other_cols_to_keep} + ) + # + more than one column + if isinstance(other_cols_to_keep, list): + for col in other_cols_to_keep: + if col is not None: + # this is the first column we are putting in 'df_other_cols' + if df_other_cols.empty: + df_other_cols = df.copy().drop( + columns={x for x in df.columns if x != col} + ) + # there are already column(s) in 'df_other_cols' + else: + new_col = df.copy().drop( + columns={x for x in df.columns if x != col} + ) + df_other_cols = concat([df_other_cols, new_col], axis=1) + else: + df_other_cols = DataFrame() + + # concatenate everything + df_param = concat([df_param, df_cols, df_other_cols], axis=1) + + return df_param + + +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# HDF OBJECTS +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + +def save_hdf( + saving: str, + file_path: str, + df: analysis_data.AnalysisData, + aux_ch: str, + aux_analysis: analysis_data.AnalysisData, + aux_ratio_analysis: analysis_data.AnalysisData, + aux_diff_analysis: analysis_data.AnalysisData, + plot_info: dict, +) -> dict: + """Save the input dataframe in an external hdf file, using a different structure (time vs channel, with values in cells). Plot info are saved too.""" + utils.logger.info("Building HDF file(s)") + # save the final dataframe as a hdf object + parameters = plot_info["parameters"] + keys_to_drop = [ + "std", + "range", + "plot_style", + "variation", + "limits", + "title", + "parameters", + "parameter", + "param_mean", + "locname", + "time_window", + "resampled", + "unit_label", + ] + flag_rename = { + "pulser": "IsPulser", + "FCbsln": "IsBsln", + "muon": "IsMuon", + "phy": "IsPhysics", + "all": "All", + } + + for param in parameters: + evt_type = ( + plot_info["event_type"][param] + if isinstance(plot_info["event_type"], dict) + else plot_info["event_type"] + ) + param_orig = param.rstrip("_var") if "_var" in param else param + param_orig_camel = utils.convert_to_camel_case(param_orig, "_") + + # get dictionary with useful plotting info + plot_info_param = get_param_info(param, plot_info) + # drop the list, and get directly lower/upper limits (set to False if no limits are provided); + # this helps to avoid mixing types with PyTables + + # fix the label (in general, it could contain info for aux data too - here, we want a simple version of the label) + plot_info_param["label"] = utils.PLOT_INFO[param_orig]["label"] + + limits_var = ( + utils.PLOT_INFO[param_orig]["limits"][plot_info_param["subsystem"]][ + "variation" + ] + if plot_info_param["subsystem"] + in utils.PLOT_INFO[param_orig]["limits"].keys() + else [None, None] + ) + limits_abs = ( + utils.PLOT_INFO[param_orig]["limits"][plot_info_param["subsystem"]][ + "absolute" + ] + if plot_info_param["subsystem"] + in utils.PLOT_INFO[param_orig]["limits"].keys() + else [None, None] + ) + + # for limits, change from 'None' to 'False' to be hdf-friendly + plot_info_param["lower_lim_var"] = str(limits_var[0]) or False + plot_info_param["upper_lim_var"] = str(limits_var[1]) or False + plot_info_param["lower_lim_abs"] = str(limits_abs[0]) or False + plot_info_param["upper_lim_abs"] = str(limits_abs[1]) or False + + # drop useless keys + for key in keys_to_drop: + del plot_info_param[key] + + # one-param case + if len(parameters) == 1: + df_to_save = df.data.copy() + if not utils.check_empty_df(aux_analysis): + df_aux_to_save = aux_analysis.data.copy() + if not utils.check_empty_df(aux_ratio_analysis): + df_aux_ratio_to_save = aux_ratio_analysis.data.copy() + if not utils.check_empty_df(aux_diff_analysis): + df_aux_diff_to_save = aux_diff_analysis.data.copy() + # multi-param case (get only the df for the param of interest) + if len(parameters) > 1: + df_to_save = get_param_df(param_orig, df.data) + if not utils.check_empty_df(aux_analysis): + df_aux_to_save = get_param_df(param_orig, aux_analysis.data) + if not utils.check_empty_df(aux_ratio_analysis): + df_aux_ratio_to_save = get_param_df(param_orig, aux_ratio_analysis.data) + if not utils.check_empty_df(aux_diff_analysis): + df_aux_diff_to_save = get_param_df(param_orig, aux_diff_analysis.data) + + # still need to check overwrite/append (and existence of file!!!) + # SOLVE THIS!!! + # if saving == "overwrite": + # check_existence_and_overwrite(file_path) + + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # PLOTTING INFO + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # this is constant over time, so with 'append' we simply overwrite previous content + df_info = DataFrame.from_dict( + plot_info_param, orient="index", columns=["Value"] + ) + + df_info.to_hdf( + file_path, key=f"{flag_rename[evt_type]}_{param_orig_camel}_info", mode="a" + ) + + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # PURE VALUES - AUX CHANNEL + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + if not utils.check_empty_df(aux_analysis): + # SOLVE THIS!!! + # if saving == "overwrite": + # check_existence_and_overwrite( + # file_path.replace(plot_info_param["subsystem"], aux_ch) + # ) + + plot_info_aux = plot_info_param.copy() + plot_info_aux["subsystem"] = aux_ch + # --- plotting info + df_info_aux = DataFrame.from_dict( + plot_info_aux, orient="index", columns=["Value"] + ) + df_info_aux.to_hdf( + file_path.replace(plot_info_param["subsystem"], aux_ch), + key=f"{flag_rename[evt_type]}_{param_orig_camel}_info", + mode="a", + ) + + # ... absolute values + get_pivot( + df_aux_to_save, + param_orig, + f"{flag_rename[evt_type]}_{param_orig_camel}", + file_path.replace(plot_info_param["subsystem"], aux_ch), + saving, + ) + # ... mean values + get_pivot( + df_aux_to_save, + param_orig + "_mean", + f"{flag_rename[evt_type]}_{param_orig_camel}_mean", + file_path.replace(plot_info_param["subsystem"], aux_ch), + saving, + ) + # ... % variations wrt absolute values + get_pivot( + df_aux_to_save, + param_orig + "_var", + f"{flag_rename[evt_type]}_{param_orig_camel}_var", + file_path.replace(plot_info_param["subsystem"], aux_ch), + saving, + ) + utils.logger.info( + f"... HDF file for {aux_ch} - pure AUX values - saved in: \33[4m{file_path.replace(plot_info_param['subsystem'], aux_ch)}\33[0m" + ) + + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # PURE VALUES + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # ... absolute values + get_pivot( + df_to_save, + param_orig, + f"{flag_rename[evt_type]}_{param_orig_camel}", + file_path, + saving, + ) + # ... mean values + get_pivot( + df_to_save, + param_orig + "_mean", + f"{flag_rename[evt_type]}_{param_orig_camel}_mean", + file_path, + saving, + ) + # ... % variations wrt absolute values + get_pivot( + df_to_save, + param_orig + "_var", + f"{flag_rename[evt_type]}_{param_orig_camel}_var", + file_path, + saving, + ) + + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # RATIO WRT AUX CHANNEL + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + if not utils.check_empty_df(aux_ratio_analysis): + # ... absolute values + get_pivot( + df_aux_ratio_to_save, + param_orig, + f"{flag_rename[evt_type]}_{param_orig_camel}_{aux_ch}Ratio", + file_path, + saving, + ) + # ... mean values + get_pivot( + df_aux_ratio_to_save, + param_orig + "_mean", + f"{flag_rename[evt_type]}_{param_orig_camel}_{aux_ch}Ratio_mean", + file_path, + saving, + ) + # ... % variations wrt absolute values + get_pivot( + df_aux_ratio_to_save, + param_orig + "_var", + f"{flag_rename[evt_type]}_{param_orig_camel}_{aux_ch}Ratio_var", + file_path, + saving, + ) + + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + # DIFFERENCE WRT AUX CHANNEL + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + if not utils.check_empty_df(aux_diff_analysis): + # ... absolute values + get_pivot( + df_aux_diff_to_save, + param_orig, + f"{flag_rename[evt_type]}_{param_orig_camel}_{aux_ch}Diff", + file_path, + saving, + ) + # ... mean values + get_pivot( + df_aux_diff_to_save, + param_orig + "_mean", + f"{flag_rename[evt_type]}_{param_orig_camel}_{aux_ch}Diff_mean", + file_path, + saving, + ) + # ... % variations wrt absolute values + get_pivot( + df_aux_diff_to_save, + param_orig + "_var", + f"{flag_rename[evt_type]}_{param_orig_camel}_{aux_ch}Diff_var", + file_path, + saving, + ) + + utils.logger.info( + f"... HDF file for {plot_info_param['subsystem']} saved in: \33[4m{file_path}\33[0m" + ) + + +def get_pivot( + df: DataFrame, parameter: str, key_name: str, file_path: str, saving: str +): + """Get pivot: datetimes (first column) vs channels (other columns).""" + df_pivot = df.pivot(index="datetime", columns="channel", values=parameter) + # just select one row for mean values (since mean is constant over time for a given channel) + # take into consideration parameters that are named with 'mean' in it, eg "bl_mean" + if "_mean" in parameter and parameter.count("mean") > 1: + df_pivot = df_pivot.iloc[[0]] + + # append new data + if saving == "append": + # check if the file exists: if not, create a new one + if not os.path.exists(file_path): + df_pivot.to_hdf(file_path, key=key_name, mode="a") + return + # the file exists, but this specific key was not saved - create the new key + saved_keys = [] + with h5py.File(file_path, "r") as file: + saved_keys = list(file.keys()) + if os.path.exists(file_path) and key_name not in saved_keys: + df_pivot.to_hdf(file_path, key=key_name, mode="a") + return + + # for the mean entry, we overwrite the already existing content with the new mean value + if "_mean" in parameter and parameter.count("mean") > 1: + df_pivot.to_hdf(file_path, key=key_name, mode="a") + if "_mean" not in parameter or ( + "_mean" in parameter and parameter.count("mean") == 1 + ): + # if % variations, we have to re-calculate all of them for the new mean values + if "_var" in parameter: + key_name_orig = key_name.replace("_var", "") + new_mean = read_hdf( + file_path, key=key_name_orig + "_mean" + ) # gia' aggiornata (perche' la media la aggiorniamo prima delle variazioni %) + all_abs_data = read_hdf( + file_path, key=key_name_orig + ) # df vecchio con TUTTI i valori assoluti (anche quelli di prima) + new_var_data = all_abs_data.copy() + + # one channel (AUX) + channels = list(df["channel"].unique()) + if len(channels) == 1: + channel = channels[0] + new_var_data[channel] = ( + all_abs_data[channel] / new_mean[channel][0] - 1 + ) * 100 + # more channels (geds) + else: + for channel in channels: + new_var_data[channel] = ( + all_abs_data[channel] / new_mean[channel][0] - 1 + ) * 100 + + # Write the combined DataFrame to the HDF5 file + new_var_data.to_hdf(file_path, key=key_name, mode="a") + + # otherwise, just read the existing HDF5 file + else: + # Read the existing HDF5 file + existing_data = read_hdf(file_path, key=key_name) + # Concatenate the existing data and the new data + combined_data = concat([existing_data, df_pivot]) + # Write the combined DataFrame to the HDF5 file + combined_data.to_hdf(file_path, key=key_name, mode="a") + + # overwrite already existing data + else: + df_pivot.to_hdf(file_path, key=key_name, mode="a") + + +def check_existence_and_overwrite(file: str): + """Check for the existence of a file, and if it exists removes it.""" + if os.path.exists(file): + os.remove(file) diff --git a/src/legend_data_monitor/settings/SC-params.json b/src/legend_data_monitor/settings/SC-params.json new file mode 100644 index 0000000..7975296 --- /dev/null +++ b/src/legend_data_monitor/settings/SC-params.json @@ -0,0 +1,154 @@ +{ + "SC_DB_params": { + "diode_vmon": { + "table": "diode_snap", + "flags": [] + }, + "diode_imon": { + "table": "diode_snap", + "flags": [] + }, + "PT114": { + "table": "cryostat_snap", + "flags": ["is_Pressure", "is_PT114"] + }, + "PT115": { + "table": "cryostat_snap", + "flags": ["is_Pressure", "is_PT115"] + }, + "PT118": { + "table": "cryostat_snap", + "flags": ["is_Pressure", "is_PT118"] + }, + "PT202": { + "table": "cryostat_snap", + "flags": ["is_Vacuum", "is_PT202"] + }, + "PT205": { + "table": "cryostat_snap", + "flags": ["is_Vacuum", "is_PT205"] + }, + "PT208": { + "table": "cryostat_snap", + "flags": ["is_Vacuum", "is_PT208"] + }, + "LT01": { + "table": "waterloop_snap", + "flags": ["is_WaterLoop", "is_LT01"] + }, + "RREiT": { + "table": "cleanroom_snap", + "flags": ["is_clean", "is_RREiT"] + }, + "RRNTe": { + "table": "cleanroom_snap", + "flags": ["is_clean", "is_RRNTe"] + }, + "RRSTe": { + "table": "cleanroom_snap", + "flags": ["is_clean", "is_RRSTe"] + }, + "ZUL_T_RR": { + "table": "cleanroom_snap", + "flags": ["is_clean", "is_ZUL_T_RR"] + }, + "DaqLeft-Temp1": { + "table": "rack_snap", + "flags": ["is_temperature", "is_DaqLeft", "is_Temp_1"] + }, + "DaqRight-Temp1": { + "table": "rack_snap", + "flags": ["is_temperature", "is_DaqRight", "is_Temp_1"] + }, + "DaqLeft-Temp2": { + "table": "rack_snap", + "flags": ["is_temperature", "is_DaqLeft", "is_Temp_2"] + }, + "DaqRight-Temp2": { + "table": "rack_snap", + "flags": ["is_temperature", "is_DaqRight", "is_Temp_2"] + } + }, + "expressions": { + "is_Pressure": { + "column": "group", + "entry": "Pressure" + }, + "is_Vacuum": { + "column": "group", + "entry": "Vacuum" + }, + "is_WaterLoop": { + "column": "group", + "entry": "WaterLoop" + }, + "is_clean": { + "column": "group", + "entry": "clean" + }, + "is_PT114": { + "column": "name", + "entry": "PT114" + }, + "is_PT115": { + "column": "name", + "entry": "PT115" + }, + "is_PT118": { + "column": "name", + "entry": "PT118" + }, + "is_PT202": { + "column": "name", + "entry": "PT202" + }, + "is_PT205": { + "column": "name", + "entry": "PT205" + }, + "is_PT208": { + "column": "name", + "entry": "PT208" + }, + "is_LT01": { + "column": "name", + "entry": "LT01" + }, + "is_RREiT": { + "column": "name", + "entry": "RREiT" + }, + "is_RRNTe": { + "column": "name", + "entry": "RRNTe" + }, + "is_RRSTe": { + "column": "name", + "entry": "RRSTe" + }, + "is_ZUL_T_RR": { + "column": "name", + "entry": "ZUL_T_RR" + }, + "is_temperature": { + "column": "name", + "entry": "Temp" + }, + "is_DaqLeft": { + "column": "rack", + "entry": "CleanRoom-DaqLeft" + }, + "is_DaqRight": { + "column": "rack", + "entry": "CleanRoom-DaqRight" + }, + "is_Temp_1": { + "column": "sensor", + "entry": "Temp-1" + }, + "is_Temp_2": { + "column": "sensor", + "entry": "Temp-2" + } + } +} diff --git a/src/legend_data_monitor/settings/auto_config.json b/src/legend_data_monitor/settings/auto_config.json deleted file mode 100644 index aeec0fa..0000000 --- a/src/legend_data_monitor/settings/auto_config.json +++ /dev/null @@ -1,15 +0,0 @@ -{ - "subsystems": { - "geds": { - "Baselines in pulser events": { - "parameters": "baseline", - "event_type": "pulser", - "plot_structure": "per channel", - "plot_style": "vs time", - "variation": true, - "time_window": "1H", - "status": false - } - } - } -} diff --git a/src/legend_data_monitor/settings/par-settings.json b/src/legend_data_monitor/settings/par-settings.json index e753e32..ef9a194 100644 --- a/src/legend_data_monitor/settings/par-settings.json +++ b/src/legend_data_monitor/settings/par-settings.json @@ -70,13 +70,13 @@ }, "geds": { "variation": [null, null], - "absolute": [null, null] + "absolute": [0, 20] } } }, "A_max": { "label": "Max. Current Pulse", - "unit": "a.u.", + "unit": "ADC/sample", "facecol": [0.96, 0.73, 1.0], "limits": { "spms": { @@ -91,7 +91,7 @@ }, "QDrift": { "label": "QDrift", - "unit": "a.u.", + "unit": "ADC", "facecol": [0.56, 0.74, 0.56], "limits": { "spms": { @@ -106,7 +106,7 @@ }, "cuspEftp": { "label": "cuspEftp", - "unit": "a.u.", + "unit": "ADC", "facecol": [0.86, 0.86, 0.86], "limits": { "spms": { @@ -121,7 +121,7 @@ }, "cuspEmax": { "label": "cuspEmax", - "unit": "a.u.", + "unit": "ADC", "facecol": [0.86, 0.86, 0.86], "limits": { "spms": { @@ -129,7 +129,7 @@ "absolute": [null, null] }, "geds": { - "variation": [-0.25, 0.25], + "variation": [-0.025, 0.025], "absolute": [null, null] } } @@ -144,7 +144,7 @@ "absolute": [null, null] }, "geds": { - "variation": [-0.25, 0.25], + "variation": [-0.025, 0.025], "absolute": [null, null] } } @@ -166,7 +166,7 @@ }, "dt_eff": { "label": "dt_eff", - "unit": "a.u.", + "unit": "ns", "facecol": "peachpuff", "limits": { "spms": { @@ -181,7 +181,7 @@ }, "pz_mean": { "label": "pz_mean", - "unit": "a.u.", + "unit": "ADC", "facecol": "paleturquoise", "limits": { "spms": { @@ -196,7 +196,7 @@ }, "pz_slope": { "label": "pz_slope", - "unit": "a.u.", + "unit": "ADC", "facecol": "paleturquoise", "limits": { "spms": { @@ -211,7 +211,7 @@ }, "pz_std": { "label": "pz_std", - "unit": "a.u.", + "unit": "ADC", "facecol": "paleturquoise", "limits": { "spms": { @@ -376,7 +376,7 @@ }, "tp_aoe_max": { "label": "tp_aoe_max", - "unit": "a.u.", + "unit": "ns", "facecol": "plum", "limits": { "spms": { @@ -406,7 +406,7 @@ }, "tp_max": { "label": "tp_max", - "unit": "a.u.", + "unit": "ns", "facecol": "palegreen", "limits": { "spms": { @@ -421,7 +421,7 @@ }, "tp_min": { "label": "tp_max", - "unit": "a.u.", + "unit": "ns", "facecol": "palegreen", "limits": { "spms": { @@ -436,7 +436,7 @@ }, "trapEftp": { "label": "trapEftp", - "unit": "a.u.", + "unit": "ADC", "facecol": "palegoldenrod", "limits": { "spms": { @@ -451,7 +451,7 @@ }, "trapEmax": { "label": "trapEmax", - "unit": "a.u.", + "unit": "ADC", "facecol": "palegoldenrod", "limits": { "spms": { @@ -459,7 +459,7 @@ "absolute": [null, null] }, "geds": { - "variation": [-0.25, 0.25], + "variation": [-0.025, 0.025], "absolute": [null, null] } } @@ -474,14 +474,14 @@ "absolute": [null, null] }, "geds": { - "variation": [-0.25, 0.25], + "variation": [-0.025, 0.025], "absolute": [null, null] } } }, "trapTmax": { "label": "trapTmax", - "unit": "a.u.", + "unit": "ADC", "facecol": "palegoldenrod", "limits": { "spms": { @@ -489,7 +489,7 @@ "absolute": [null, null] }, "geds": { - "variation": [-0.25, 0.25], + "variation": [-0.025, 0.025], "absolute": [null, null] } } @@ -526,7 +526,7 @@ }, "wf_min": { "label": "wf_min", - "unit": "a.u.", + "unit": "ADC", "facecol": "pink", "limits": { "spms": { @@ -541,7 +541,7 @@ }, "zacEftp": { "label": "zacEftp", - "unit": "a.u.", + "unit": "ADC", "facecol": "aquamarine", "limits": { "spms": { @@ -556,7 +556,7 @@ }, "zacEmax": { "label": "zacEmax", - "unit": "a.u.", + "unit": "ADC", "facecol": "aquamarine", "limits": { "spms": { @@ -584,6 +584,21 @@ } } }, + "exposure": { + "label": "Exposure", + "unit": "kg yr", + "facecol": [0.82, 0.94, 0.75], + "limits": { + "spms": { + "variation": [null, null], + "absolute": [null, null] + }, + "geds": { + "variation": [null, null], + "absolute": [null, null] + } + } + }, "bl_rms": { "label": "Baseline RMS", "unit": null, @@ -593,18 +608,9 @@ "geds": [null, 0.0] } }, - "lc": { - "label": "LC", - "unit": "?", - "facecol": [0.94, 0.87, 0.9], - "limits": { - "spms": [null, 0.0], - "geds": [null, 0.0] - } - }, "gain": { "label": "Uncalibrated Gain", - "unit": "a.u.", + "unit": "ADC", "facecol": [0.68, 0.87, 0.68], "limits": { "spms": { @@ -766,5 +772,20 @@ "absolute": [null, null] } } + }, + "AoE_Custom": { + "label": "Custom A/E (A_max / cuspEmax)", + "unit": "a.u.", + "facecol": [0.74, 0.77, 0.87], + "limits": { + "spms": { + "variation": [null, null], + "absolute": [null, null] + }, + "geds": { + "variation": [null, null], + "absolute": [null, null] + } + } } } diff --git a/src/legend_data_monitor/settings/parameter-tiers.json b/src/legend_data_monitor/settings/parameter-tiers.json index 653fb53..eb3cb18 100644 --- a/src/legend_data_monitor/settings/parameter-tiers.json +++ b/src/legend_data_monitor/settings/parameter-tiers.json @@ -2,7 +2,78 @@ "baseline": "dsp", "wf_max": "dsp", "timestamp": "dsp", + "tp_min": "dsp", + "tp_max": "dsp", + "wf_min": "dsp", + "t_sat_lo": "dsp", + "t_sat_hi": "dsp", + "t_discharge": "dsp", + "bl_mean": "dsp", + "bl_std": "dsp", + "bl_slope": "dsp", + "bl_intercept": "dsp", + "pz_slope": "dsp", + "pz_std": "dsp", + "pz_mean": "dsp", + "trapTmax": "dsp", + "trapSmax": "dsp", + "trapEmax": "dsp", + "trapEftp": "dsp", + "cuspEmax": "dsp", + "zacEmax": "dsp", + "zacEftp": "dsp", + "cuspEftp": "dsp", + "tp_0_est": "dsp", + "tp_0_atrap": "dsp", + "tp_01": "dsp", + "tp_10": "dsp", + "tp_20": "dsp", + "tp_50": "dsp", + "tp_80": "dsp", + "tp_90": "dsp", + "tp_95": "dsp", + "tp_99": "dsp", + "tp_100": "dsp", + "A_max": "dsp", + "tp_aoe_max": "dsp", + "QDrift": "dsp", + "dt_eff": "dsp", + "lq80": "dsp", + "dt_eff_invert": "dsp", + "trapTmax_invert": "dsp", + "trapTftp_invert": "dsp", + "tp_0_invert": "dsp", + "tp_100_invert": "dsp", + "tp_99_invert": "dsp", + "tp_90_invert": "dsp", + "tp_80_invert": "dsp", + "tp_50_invert": "dsp", + "tp_20_invert": "dsp", + "tp_10_invert": "dsp", "cuspEmax_ctc_cal": "hit", + "zacEmax_ctc_cal": "hit", + "trapEmax_ctc_cal": "hit", + "trapTmax_cal": "hit", "AoE_Corrected": "hit", - "zacEmax_ctc_cal": "hit" + "AoE_Classifier": "hit", + "AoE_Low_Cut": "hit", + "AoE_Double_Sided_Cut": "hit", + "is_valid_cal": "hit", + "is_valid_0vbb": "hit", + "is_negative": "hit", + "is_saturated": "hit", + "is_valid_rt": "hit", + "is_valid_t0": "hit", + "is_valid_tmax": "hit", + "is_valid_dteff": "hit", + "is_valid_ediff": "hit", + "is_valid_efrac": "hit", + "is_negative_crosstalk": "hit", + "is_discharge": "hit", + "is_neg_energy": "hit", + "is_valid_tail": "hit", + "is_downgoing_baseline": "hit", + "is_upgoing_baseline": "hit", + "is_noise_burst": "hit", + "is_valid_baseline": "hit" } diff --git a/src/legend_data_monitor/settings/remove-dets.json b/src/legend_data_monitor/settings/remove-dets.json new file mode 100644 index 0000000..0967ef4 --- /dev/null +++ b/src/legend_data_monitor/settings/remove-dets.json @@ -0,0 +1 @@ +{} diff --git a/src/legend_data_monitor/settings/remove-keys-COAXp04.json b/src/legend_data_monitor/settings/remove-keys-COAXp04.json new file mode 100644 index 0000000..405baf5 --- /dev/null +++ b/src/legend_data_monitor/settings/remove-keys-COAXp04.json @@ -0,0 +1,56 @@ +{ + "C00ANG3": [ + { + "from": "20230330T043441Z", + "to": "20230401T012732Z" + }, + { + "from": "20230411T170538Z", + "to": "20230411T210547Z" + }, + { + "from": "20230413T064408Z", + "to": "20230413T084412Z" + }, + { + "from": "20230415T133659Z", + "to": "20230424T185631Z" + } + ], + "C00ANG5": [ + { + "from": "20230330T043441Z", + "to": "20230401T012732Z" + }, + { + "from": "20230411T170538Z", + "to": "20230411T210547Z" + }, + { + "from": "20230413T064408Z", + "to": "20230413T084412Z" + }, + { + "from": "20230415T133659Z", + "to": "20230424T185631Z" + } + ], + "C00ANG2": [ + { + "from": "20230330T043441Z", + "to": "20230401T012732Z" + }, + { + "from": "20230411T170538Z", + "to": "20230411T210547Z" + }, + { + "from": "20230413T064408Z", + "to": "20230413T084412Z" + }, + { + "from": "20230415T133659Z", + "to": "20230424T185631Z" + } + ] +} diff --git a/src/legend_data_monitor/settings/remove-keys.json b/src/legend_data_monitor/settings/remove-keys.json new file mode 100644 index 0000000..0967ef4 --- /dev/null +++ b/src/legend_data_monitor/settings/remove-keys.json @@ -0,0 +1 @@ +{} diff --git a/src/legend_data_monitor/settings/special-parameters.json b/src/legend_data_monitor/settings/special-parameters.json index 9fb9c93..f81c191 100644 --- a/src/legend_data_monitor/settings/special-parameters.json +++ b/src/legend_data_monitor/settings/special-parameters.json @@ -2,5 +2,7 @@ "K_events": "cuspEmax_ctc_cal", "FWHM": "cuspEmax_ctc_cal", "wf_max_rel": ["wf_max", "baseline"], - "event_rate": null + "event_rate": null, + "exposure": null, + "AoE_Custom": ["A_max", "cuspEmax"] } diff --git a/src/legend_data_monitor/settings/user_config_example_L200.json b/src/legend_data_monitor/settings/user_config_example_L200.json deleted file mode 100644 index f5cc5e4..0000000 --- a/src/legend_data_monitor/settings/user_config_example_L200.json +++ /dev/null @@ -1,43 +0,0 @@ -{ - "output": "/data1/users/morella/testing-dev-michele", - "dataset": { - "experiment": "L200", - "period": "p02", - "version": "v06.00", - "path": "/data1/users/marshall/prod-ref", - "type": "phy", - "start": "2023-01-26 04:30:00", - "end": "2023-01-26 07:00:00" - }, - "subsystems": { - "geds": { - "Pulser Gain in pulser events": { - "parameters": "cuspEmax", - "event_type": "pulser", - "plot_structure": "per string", - "plot_style": "vs time", - "resampled": "yes", - "variation": true, - "time_window": "5T" - }, - "Baseline in pulser events": { - "parameters": "baseline", - "event_type": "pulser", - "plot_structure": "per string", - "plot_style": "vs time", - "resampled": "no", - "variation": true, - "time_window": "5T" - }, - "Noise in pulser events": { - "parameters": "bl_std", - "event_type": "pulser", - "plot_structure": "per string", - "plot_style": "vs time", - "resampled": "only", - "variation": true, - "time_window": "5T" - } - } - } -} diff --git a/src/legend_data_monitor/slow_control.py b/src/legend_data_monitor/slow_control.py new file mode 100644 index 0000000..f7fc564 --- /dev/null +++ b/src/legend_data_monitor/slow_control.py @@ -0,0 +1,292 @@ +import sys +from datetime import datetime, timezone +from typing import Tuple + +import pandas as pd +from legendmeta import LegendSlowControlDB +from pandas import DataFrame + +from . import utils + +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# SLOW CONTROL LOADING/PLOTTING FUNCTIONS +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + +class SlowControl: + """ + Object containing Slow Control database information for a data subselected based on given criteria. + + parameter [str] : diode_vmon | diode_imon | PT114 | PT115 | PT118 | PT202 | PT205 | PT208 | LT01 | RREiT | RRNTe | RRSTe | ZUL_T_RR | DaqLeft-Temp1 | DaqLeft-Temp2 | DaqRight-Temp1 | DaqRight-Temp2 + + Options for kwargs + + dataset= + dict with the following keys: + - 'experiment' [str]: 'L60' or 'L200' + - 'period' [str]: period format pXX + - 'path' [str]: path to prod-ref folder (before version) + - 'version' [str]: version of pygama data processing format vXX.XX + - 'type' [str]: 'phy' or 'cal' + - the following key(s) depending in time selection + 1. 'start' : , 'end': where input is of format 'YYYY-MM-DD hh:mm:ss' + 2. 'window'[str]: time window in the past from current time point, format: 'Xd Xh Xm' for days, hours, minutes + 2. 'timestamps': str or list of str in format 'YYYYMMDDThhmmssZ' + 3. 'runs': int or list of ints for run number(s) e.g. 10 for r010 + Or input kwargs separately experiment=, period=, path=, version=, type=; start=&end=, (or window= - ???), or timestamps=, or runs= + """ + + def __init__(self, parameter: str, port: int, pswd: str, **kwargs): + # if setup= kwarg was provided, get dict provided + # otherwise kwargs is itself already the dict we need with experiment= and period= + data_info = kwargs["dataset"] if "dataset" in kwargs else kwargs + + # validity check of kwarg + utils.dataset_validity_check(data_info) + + # needed to know for making 'if' statement over different experiments/periods + self.experiment = data_info["experiment"] + self.period = data_info["period"] + # need to remember for channel status query + # ! now needs to be single ! + self.datatype = data_info["type"] + # need to remember for DataLoader config + self.path = data_info["path"] + self.version = data_info["version"] + + # load info from settings/SC-params.json + self.parameter = parameter + self.sc_parameters = utils.SC_PARAMETERS + self.data = pd.DataFrame() + self.scdb = LegendSlowControlDB() + self.scdb.connect(port=port, password=pswd) + + # check if parameter is within the one listed in settings/SC-params.json + if parameter not in self.sc_parameters["SC_DB_params"].keys(): + utils.logger.error( + f"\033[91mThe parameter '{self.parameter}' is not present in 'settings/SC-params.json'. Try again with another parameter or update the json file!\033[0m" + ) + return + + ( + self.timerange, + self.first_timestamp, + self.last_timestamp, + ) = utils.get_query_times(**kwargs) + + # None will be returned if something went wrong + if not self.timerange: + utils.logger.error("\033[91m%s\033[0m", self.get_data.__doc__) + return + + # ------------------------------------------------------------------------- + self.data = self.get_sc_param() + + def get_sc_param(self): + """Load the corresponding table from SC database for the process of interest and apply already the flags for the parameter under study.""" + # getting the process and flags of interest from 'settings/SC-params.json' for the provided parameter + table_param = self.sc_parameters["SC_DB_params"][self.parameter]["table"] + flags_param = self.sc_parameters["SC_DB_params"][self.parameter]["flags"] + + # check if the selected table is present in the SC database. If not, arise an error and exit + if table_param not in self.scdb.get_tables(): + utils.logger.error( + "\033[91mThis is not present in the SC database! Try again.\033[0m" + ) + sys.exit() + + # get the dataframe for the process of interest + utils.logger.debug( + f"... getting the dataframe for '{table_param}' in the time range of interest\n" + ) + # SQL query to filter the dataframe based on the time range + query = f"SELECT * FROM {table_param} WHERE tstamp >= '{self.first_timestamp}' AND tstamp <= '{self.last_timestamp}'" + get_table_df = self.scdb.dataframe(query) + + # remove unnecessary columns (necessary when retrieving diode parameters) + # note: there will be a 'status' column such that ON=1 and OFF=0 - right now we are keeping every detector, without removing the OFF ones as we usually do for geds + if "vmon" in self.parameter and "imon" in list(get_table_df.columns): + get_table_df = get_table_df.drop(columns="imon") + # rename the column of interest to 'value' to be consistent with other parameter dataframes + get_table_df = get_table_df.rename(columns={"vmon": "value"}) + if "imon" in self.parameter and "vmon" in list(get_table_df.columns): + get_table_df = get_table_df.drop(columns="vmon") + get_table_df = get_table_df.rename(columns={"imon": "value"}) + # in case of geds parameters, add the info about the channel name and channel id (right now, there is only crate&slot info) + if self.parameter == "diode_vmon" or self.parameter == "diode_imon": + get_table_df = include_more_diode_info(get_table_df, self.scdb) + + # order by timestamp (not automatically done) + get_table_df = get_table_df.sort_values(by="tstamp") + + # let's apply the flags for keeping only the parameter of interest + utils.logger.debug( + f"... applying flags to get the parameter '{self.parameter}'" + ) + get_table_df = apply_flags(get_table_df, self.sc_parameters, flags_param) + + # get units and lower/upper limits for the parameter of interest + if "diode" not in self.parameter: + unit, lower_lim, upper_lim = get_plotting_info( + self.parameter, + self.sc_parameters, + self.first_timestamp, + self.last_timestamp, + self.scdb, + ) + else: + lower_lim = ( + upper_lim + ) = None # there are just 'set values', no actual thresholds + if "vmon" in self.parameter: + unit = "V" + elif "imon" in self.parameter: + unit = "\u03BCA" + else: + unit = None + + # append unit, lower_lim, upper_lim to the dataframe + get_table_df["unit"] = unit + get_table_df["lower_lim"] = lower_lim + get_table_df["upper_lim"] = upper_lim + + # fix time column + get_table_df['tstamp'] = pd.to_datetime(get_table_df['tstamp'], utc=True) + # fix value column + get_table_df['value'] = pd.to_numeric(get_table_df['value'], errors='coerce') # handle errors as NaN + + # remove unnecessary columns + remove_cols = ["rack", "group", "sensor", "name", "almask"] + for col in remove_cols: + if col in list(get_table_df.columns): + get_table_df = get_table_df.drop(columns={col}) + + get_table_df = get_table_df.reset_index(drop=True) + + utils.logger.debug( + "... final dataframe (after flagging the events):\n%s", get_table_df + ) + + return get_table_df + + +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Other functions +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + +def get_plotting_info( + parameter: str, + sc_parameters: dict, + first_tstmp: str, + last_tstmp: str, + scdb: LegendSlowControlDB, +) -> Tuple[str, float, float]: + """Return units and low/high limits of a given parameter.""" + table_param = sc_parameters["SC_DB_params"][parameter]["table"] + flags_param = sc_parameters["SC_DB_params"][parameter]["flags"] + + # get info dataframe of the corresponding process under study (do I need to specify the param????) + get_table_info = scdb.dataframe(table_param.replace("snap", "info")) + + # let's apply the flags for keeping only the parameter of interest + get_table_info = apply_flags(get_table_info, sc_parameters, flags_param) + utils.logger.debug( + "... units and thresholds will be retrieved from the following object:\n%s", + get_table_info, + ) + + # Convert first_tstmp and last_tstmp to datetime objects in the UTC timezone + first_tstmp = datetime.strptime(first_tstmp, "%Y%m%dT%H%M%SZ").replace( + tzinfo=timezone.utc + ) + last_tstmp = datetime.strptime(last_tstmp, "%Y%m%dT%H%M%SZ").replace( + tzinfo=timezone.utc + ) + + # Filter the DataFrame based on the time interval, starting to look from the latest entry ('reversed(...)') + times = list(get_table_info["tstamp"].unique()) + + for time in reversed(times): + if first_tstmp < time < last_tstmp: + unit = list(get_table_info["unit"].unique())[0] + lower_lim = upper_lim = False + utils.logger.warning( + f"\033[93mParameter {parameter} has no valid range in the time period you selected. Upper and lower thresholds are set to False, while units={unit}\033[0m" + ) + return unit, lower_lim, upper_lim + + if time < first_tstmp and time < last_tstmp: + unit = list( + get_table_info[get_table_info["tstamp"] == time]["unit"].unique() + )[0] + lower_lim = get_table_info[get_table_info["tstamp"] == time][ + "ltol" + ].tolist()[-1] + upper_lim = get_table_info[get_table_info["tstamp"] == time][ + "utol" + ].tolist()[-1] + utils.logger.debug( + f"... parameter {parameter} must be within [{lower_lim};{upper_lim}] {unit}" + ) + return unit, lower_lim, upper_lim + + if time > first_tstmp and time > last_tstmp: + if time == times[0]: + utils.logger.error( + "\033[91mYou're travelling too far in the past, there were no SC data in the time period you selected. Try again!\033[0m" + ) + sys.exit() + + return unit, lower_lim, upper_lim + + +def apply_flags(df: DataFrame, sc_parameters: dict, flags_param: list) -> DataFrame: + """Apply the flags read from 'settings/SC-params.json' to the input dataframe.""" + for flag in flags_param: + column = sc_parameters["expressions"][flag]["column"] + entry = sc_parameters["expressions"][flag]["entry"] + df = df[df[column] == entry] + + # check if the dataframe is empty, if so, skip this plot + if utils.is_empty(df): + return # or exit - depending on how we will include these data in plotting + + return df + + +def include_more_diode_info(df: DataFrame, scdb: LegendSlowControlDB) -> DataFrame: + """Include more diode info, such as the channel name and the string number to which it belongs.""" + # get the diode info dataframe from the SC database + df_info = scdb.dataframe("diode_info") + # remove duplicates of detector names + df_info = df_info.drop_duplicates(subset="label") + # remove unnecessary columns (otherwise, they are repeated after the merging) + df_info = df_info.drop(columns={"status", "tstamp"}) + # there is a repeated detector! Once with an additional blank space in front of its name: removed in case it is found + if " V00050B" in list(df_info["label"].unique()): + df_info = df_info[df_info["label"] != " V00050B"] + + # remove 'HV filter test' and 'no cable' entries + df_info = df_info[~df_info["label"].str.contains("Ch")] + # remove other stuff (???) + if "?" in list(df_info["label"].unique()): + df_info = df_info[df_info["label"] != "?"] + if " routed" in list(df_info["label"].unique()): + df_info = df_info[df_info["label"] != " routed"] + if "routed" in list(df_info["label"].unique()): + df_info = df_info[df_info["label"] != "routed"] + + # Merge df_info into df based on 'crate' and 'slot' + merged_df = df.merge( + df_info[["crate", "slot", "channel", "label", "group"]], + on=["crate", "slot", "channel"], + how="left", + ) + merged_df = merged_df.rename(columns={"label": "name", "group": "string"}) + # remove "name"=NaN (ie entries for which there was not a correspondence among the two merged dataframes) + merged_df = merged_df.dropna(subset=["name"]) + # switch from "String X" (str) to "X" (int) for entries of the 'string' column + merged_df["string"] = merged_df["string"].str.extract(r"(\d+)").astype(int) + + return merged_df diff --git a/src/legend_data_monitor/status_plot.py b/src/legend_data_monitor/string_visualization.py similarity index 57% rename from src/legend_data_monitor/status_plot.py rename to src/legend_data_monitor/string_visualization.py index 6ec0ed6..d043a02 100644 --- a/src/legend_data_monitor/status_plot.py +++ b/src/legend_data_monitor/string_visualization.py @@ -11,9 +11,12 @@ from matplotlib.backends.backend_pdf import PdfPages from pandas import DataFrame, Timedelta, concat -from . import utils +from . import plotting, utils +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# CHANNELS' STATUS FUNCTION +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ def status_plot(subsystem, data_analysis: DataFrame, plot_info: dict, pdf: PdfPages): # ------------------------------------------------------------------------- # plot a map with statuses of channels @@ -28,7 +31,7 @@ def status_plot(subsystem, data_analysis: DataFrame, plot_info: dict, pdf: PdfPa low_thr = plot_info["limits"][0] high_thr = plot_info["limits"][1] utils.logger.debug( - "...low threshold for " + "... low threshold for " + plot_info["parameter"] + " set at: " + str(low_thr) @@ -36,7 +39,7 @@ def status_plot(subsystem, data_analysis: DataFrame, plot_info: dict, pdf: PdfPa + plot_info["unit_label"] ) utils.logger.debug( - "...high threshold for " + "... high threshold for " + plot_info["parameter"] + " set at: " + str(high_thr) @@ -58,7 +61,9 @@ def status_plot(subsystem, data_analysis: DataFrame, plot_info: dict, pdf: PdfPa if low_thr is not None and high_thr is not None: plot_title += f"{plot_info['parameter']} < {low_thr} {plot_info['unit_label']} || {plot_info['parameter']} > {high_thr} {plot_info['unit_label']}" if low_thr is None and high_thr is None: - plot_title += f"{plot_info['parameter']} (no checks)" + # there is no point to check values if there are no thresholds + utils.logger.debug("... there are no thresholds to check for. We skip this!") + return new_dataframe = DataFrame() # loop over individual channels (otherwise, the problematic timestamps apply to all detectors, even the OK ones) and create a summary dataframe @@ -166,15 +171,9 @@ def status_plot(subsystem, data_analysis: DataFrame, plot_info: dict, pdf: PdfPa ) # get position within the array + other necessary info - name = subsystem.channel_map.loc[ - subsystem.channel_map["channel"] == channel - ]["name"].iloc[0] - location = subsystem.channel_map.loc[ - subsystem.channel_map["channel"] == channel - ]["location"].iloc[0] - position = subsystem.channel_map.loc[ - subsystem.channel_map["channel"] == channel - ]["position"].iloc[0] + name, location, position = get_info_from_channel( + subsystem.channel_map, channel + ) # define new row for not-ON detectors new_row = [[channel, name, location, position, status]] @@ -237,7 +236,7 @@ def status_plot(subsystem, data_analysis: DataFrame, plot_info: dict, pdf: PdfPa ) ] - # to account for empty strings: not a good idea actually... + # to account for empty strings: ...not a good idea actually... # In L60, there are S1,S2,S7,S8: do we really want to display 4 empty strings, i.e. S3-S6? There is no need! # x_axis_labels = [f"S{no}" for no in range(min(new_dataframe["location"].unique()), max(new_dataframe["location"].unique()+1))] @@ -279,7 +278,219 @@ def status_plot(subsystem, data_analysis: DataFrame, plot_info: dict, pdf: PdfPa ) plt.yticks(rotation=0) plt.title(plot_title) - pdf.savefig(bbox_inches="tight") + + # saving + plotting.save_pdf(plt, pdf) # returning the figure return fig + + +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# EXPOSURE FUNCTION +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +def exposure_plot(subsystem, data_analysis: DataFrame, plot_info: dict, pdf: PdfPages): + if plot_info["subsystem"] == "spms": + utils.logger.error( + "\033[91mPlotting the summary is not available for the spms.\nTry again!\033[0m" + ) + exit() + + # cbar unit (either 'kg d', if exposure is less than 0.1 kg yr, or 'kg yr'); note: exposure, at this point, is evaluated as 'kg yr' + if data_analysis["exposure"].max() < 0.1: + cbar_unit = "kg d" + else: + cbar_unit = "kg yr" + + # convert exposure into [kg day] if data_analysis["exposure"].max() < 0.1 kg yr + if cbar_unit == "kg d": + data_analysis["exposure"] = data_analysis["exposure"] * 365.25 + # data_analysis.loc[data_analysis["exposure"] < 0.1, "exposure"] = data_analysis.loc[data_analysis["exposure"] < 0.1, "exposure"] * 365.25 + # drop duplicate rows, based on channel entry (exposure is constant for a fixed channel) + data_analysis = data_analysis.drop_duplicates(subset=["channel"]) + # total exposure + tot_expo = data_analysis["exposure"].sum() + utils.logger.info(f"Total exposure: {tot_expo:.3f} {cbar_unit}") + + data_analysis = data_analysis.filter( + ["channel", "name", "location", "position", "exposure", "livetime_in_s"] + ) + + # ------------------------------------------------------------------------------- + # OFF detectors + # ------------------------------------------------------------------------------- + + # include OFF channels and see what is their status + off_channels = subsystem.channel_map[subsystem.channel_map["status"] == "off"][ + "channel" + ].unique() + + if len(off_channels) != 0: + for channel in off_channels: + # check if the channel is already in the exposure dataframe; if not, add a new row for it + if channel not in data_analysis["channel"].values: + status_info = subsystem.channel_map[ + subsystem.channel_map["channel"] == channel + ]["status"].iloc[0] + + # get status info + if status_info != "on": + exposure = 0.0 + livetime_in_s = 0.0 + + # get position within the array + other necessary info + name, location, position = get_info_from_channel( + subsystem.channel_map, channel + ) + + # define new row for not-ON detectors + new_row = [[channel, name, location, position, exposure, livetime_in_s]] + new_df = DataFrame( + new_row, + columns=[ + "channel", + "name", + "location", + "position", + "exposure", + "livetime_in_s", + ], + ) + # add the new row to the dataframe + data_analysis = concat( + [data_analysis, new_df], ignore_index=True, axis=0 + ) + + # ------------------------------------------------------------------------------- + # ON but NULL exposure detectors + # ------------------------------------------------------------------------------- + on_channels = subsystem.channel_map[subsystem.channel_map["status"] == "on"][ + "channel" + ].unique() + + for channel in on_channels: + if channel in list(data_analysis["channel"].unique()): + continue + + # if not there, set exposure to zero + exposure = 0.0 + livetime_in_s = 0.0 + + # get position within the array + other necessary info + name, location, position = get_info_from_channel(subsystem.channel_map, channel) + + # define new row for not-ON detectors + new_row = [[channel, name, location, position, exposure, livetime_in_s]] + new_df = DataFrame( + new_row, + columns=[ + "channel", + "name", + "location", + "position", + "exposure", + "livetime_in_s", + ], + ) + # add the new row to the dataframe + data_analysis = concat([data_analysis, new_df], ignore_index=True, axis=0) + + # values to plot + result = data_analysis.pivot( + index="position", columns="location", values="exposure" + ) + result = result.round(3) + + # display it + if utils.logger.getEffectiveLevel() is utils.logging.DEBUG: + from tabulate import tabulate + + output_result = tabulate( + result, headers="keys", tablefmt="psql", showindex=False, stralign="center" + ) + utils.logger.debug( + "Status map summary for " + plot_info["parameter"] + ":\n%s", output_result + ) + + # calculate total livetime as sum of content of livetime_in_s column (and then convert it a human readable format) + tot_livetime = data_analysis["livetime_in_s"].unique()[0] + tot_livetime, unit = utils.get_livetime(tot_livetime) + + # ------------------------------------------------------------------------------- + # plot + # ------------------------------------------------------------------------------- + + # create the figure + fig = plt.figure(num=None, figsize=(8, 12), dpi=80, facecolor="w", edgecolor="k") + sns.set(font_scale=1) + + # create labels for dets, with exposure values + labels = result.astype(str) + + # labels definition (AFTER having included OFF detectors too) ------------------------------- + # LOCATION: + x_axis_labels = [f"S{no}" for no in sorted(data_analysis["location"].unique())] + # POSITION: + y_axis_labels = [ + no + for no in range( + min(data_analysis["position"].unique()), + max(data_analysis["position"].unique() + 1), + ) + ] + + # create the heatmap + status_map = sns.heatmap( + data=result, + annot=labels, + annot_kws={"size": 6}, + yticklabels=y_axis_labels, + xticklabels=x_axis_labels, + fmt="s", + cbar=True, + cbar_kws={"shrink": 0.5}, + linewidths=1, + linecolor="white", + square=True, + rasterized=True, + ) + + # add title "kg yr" as text on top of the cbar + plt.text( + 1.08, + 0.89, + f"({cbar_unit})", + transform=status_map.transAxes, + horizontalalignment="center", + verticalalignment="center", + ) + + plt.tick_params( + axis="both", + which="major", + labelbottom=False, + bottom=False, + top=False, + labeltop=True, + ) + plt.yticks(rotation=0) + plt.title( + f"{plot_info['subsystem']} - {plot_info['title']}\nTotal livetime: {tot_livetime:.2f}{unit}\nTotal exposure: {tot_expo:.3f} {cbar_unit}" + ) + + # saving + plotting.save_pdf(plt, pdf) + + return fig + + +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# Plotting recurring functions +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +def get_info_from_channel(channel_map: DataFrame, channel: int): + """Get info (name, location, position) from a channel number, once the channel map is provided as a DataFrame.""" + name = channel_map.loc[channel_map["channel"] == channel]["name"].iloc[0] + location = channel_map.loc[channel_map["channel"] == channel]["location"].iloc[0] + position = channel_map.loc[channel_map["channel"] == channel]["position"].iloc[0] + + return name, location, position diff --git a/src/legend_data_monitor/subsystem.py b/src/legend_data_monitor/subsystem.py index ab92de6..071fc14 100644 --- a/src/legend_data_monitor/subsystem.py +++ b/src/legend_data_monitor/subsystem.py @@ -1,10 +1,12 @@ import os +import sys import typing from datetime import datetime +from typing import Union import numpy as np import pandas as pd -from legendmeta import LegendMetadata +from legendmeta import JsonDB from pygama.flow import DataLoader from . import utils @@ -17,28 +19,32 @@ class Subsystem: """ Object containing information for a given subsystem such as channel map, channels status etc. - sub_type [str]: geds | spms | pulser + sub_type [str]: geds | spms | pulser | pulser01ana | FCbsln | muon Options for kwargs dataset= dict with the following keys: - 'experiment' [str]: 'L60' or 'L200' - - 'path' [str]: < move description here from get_data() > - - 'version' [str]: < move description here from get_data() > + - 'period' [str]: period format pXX + - 'path' [str]: path to prod-ref folder (before version) + - 'version' [str]: version of pygama data processing format vXX.XX - 'type' [str]: 'phy' or 'cal' - the following key(s) depending in time selection - 1) 'start' : , 'end': where input is of format 'YYYY-MM-DD hh:mm:ss' - 2) 'window'[str]: time window in the past from current time point, format: 'Xd Xh Xm' for days, hours, minutes - 2) 'timestamps': str or list of str in format 'YYYYMMDDThhmmssZ' - 3) 'runs': int or list of ints for run number(s) e.g. 10 for r010 - Or input kwargs separately path=, version=, type=; start=&end=, or window=, or timestamps=, or runs= - - Experiment is needed to know which channel belongs to the pulser Subsystem, AUX0 (L60) or AUX1 (L200) + 1. 'start' : , 'end': where input is of format 'YYYY-MM-DD hh:mm:ss' + 2. 'window' [str]: time window in the past from current time point, format: 'Xd Xh Xm' for days, hours, minutes + 2. 'timestamps': str or list of str in format 'YYYYMMDDThhmmssZ' + 3. 'runs': int or list of ints for run number(s) e.g. 10 for r010 + Or input kwargs separately experiment=, period=, path=, version=, type=; start=&end=, or window=, or timestamps=, or runs= + + Experiment is needed to know which channel belongs to the pulser Subsystem (and its name), "auxs" ch0 (L60) or "puls" ch1 (L200) + Period is needed to know channel name ("fcid" or "rawid") Selection range is needed for the channel map and status information at that time point, and should be the only information needed, however, pylegendmeta only allows query .on(timestamp=...) but not .on(run=...); therefore, to be able to get info in case of `runs` selection, we need to know - path, version, and run type to look up first timestamp of the run + path, version, and run type to look up first timestamp of the run. + If this changes in the future, the path will only be asked when data is requested to be loaded with Subsystem.get_data(), + but not to just load the channel map and status for given run Might set default "latest" for version, but gotta be careful. """ @@ -58,55 +64,8 @@ def __init__(self, sub_type: str, **kwargs): # otherwise kwargs is itself already the dict we need with experiment= and period= data_info = kwargs["dataset"] if "dataset" in kwargs else kwargs - if "experiment" not in data_info: - utils.logger.error("\033[91mProvide experiment name!\033[0m") - utils.logger.error("\033[91m%s\033[0m", self.__doc__) - return - - if "type" not in data_info: - utils.logger.error("\033[91mProvide data type!\033[0m") - utils.logger.error("\033[91m%s\033[0m", self.__doc__) - return - - # convert to list for convenience - # ! currently not possible with channel status - # if isinstance(data_info["type"], str): - # data_info["type"] = [data_info["type"]] - - data_types = ["phy", "cal"] - # ! currently not possible with channel status - # for datatype in data_info["type"]: - # if datatype not in data_types: - if not data_info["type"] in data_types: - utils.logger.error("\033[91mInvalid data type provided!\033[0m") - utils.logger.error("\033[91m%s\033[0m", self.__doc__) - return - - if "path" not in data_info: - utils.logger.error("\033[91mProvide path to data!\033[0m") - utils.logger.error("\033[91m%s\033[0m", self.__doc__) - return - if not os.path.exists(data_info["path"]): - utils.logger.error( - "\033[91mThe data path you provided does not exist!\033[0m" - ) - return - - if "version" not in data_info: - utils.logger.error( - '\033[91mProvide processing version! If not needed, just put an empty string, "".\033[0m' - ) - utils.logger.error("\033[91m%s\033[0m", self.__doc__) - return - - # in p03 things change again!!!! - # There is no version in '/data2/public/prodenv/prod-blind/tmp/auto/generated/tier/dsp/phy/p03', so for the moment we skip this check... - if data_info["period"] != "p03" and not os.path.exists( - os.path.join(data_info["path"], data_info["version"]) - ): - utils.logger.error("\033[91mProvide valid processing version!\033[0m") - utils.logger.error("\033[91m%s\033[0m", self.__doc__) - return + # validity check of kwarg + utils.dataset_validity_check(data_info) # validity of time selection will be checked in utils @@ -126,7 +85,17 @@ def __init__(self, sub_type: str, **kwargs): self.path = data_info["path"] self.version = data_info["version"] - self.timerange, self.first_timestamp = utils.get_query_times(**kwargs) + # data stored under these folders have been partitioned! + if "tmp-auto" not in self.path: + self.partition = True + else: + self.partition = False + + ( + self.timerange, + self.first_timestamp, + self.last_timestamp, + ) = utils.get_query_times(**kwargs) # None will be returned if something went wrong if not self.timerange: @@ -217,14 +186,19 @@ def get_data(self, parameters: typing.Union[str, list_of_str, tuple_of_str] = () now = datetime.now() self.data = dl.load() utils.logger.info(f"Total time to load data: {(datetime.now() - now)}") - + # ------------------------------------------------------------------------- # polish things up # ------------------------------------------------------------------------- - tier = "hit" if "hit" in dbconfig["columns"] else "dsp" + tier = "dsp" + if "hit" in dbconfig["columns"]: + tier = "hit" + if self.partition and "pht" in dbconfig["columns"]: + tier = "pht" # remove columns we don't need - self.data = self.data.drop([f"{tier}_idx", "file"], axis=1) + if "{tier}_idx" in list(self.data.columns): + self.data = self.data.drop([f"{tier}_idx", "file"], axis=1) # rename channel to channel self.data = self.data.rename(columns={f"{tier}_table": "channel"}) @@ -236,6 +210,7 @@ def get_data(self, parameters: typing.Union[str, list_of_str, tuple_of_str] = () self.data["datetime"] = pd.to_datetime( self.data["timestamp"], origin="unix", utc=True, unit="s" ) + self.data = self.data.drop("timestamp", axis=1) # ------------------------------------------------------------------------- # add detector name, location and position from map @@ -262,8 +237,114 @@ def get_data(self, parameters: typing.Union[str, list_of_str, tuple_of_str] = () if self.type == "pulser": self.flag_pulser_events() + if self.type == "FCbsln": + self.flag_fcbsln_events() + if self.type == "muon": + self.flag_muon_events() + + def include_aux( + self, params: Union[str, list], dataset: dict, plot: dict, aux_ch: str + ): + """Include in a new column data coming from PULS01ANA aux channel, to either compute a ratio or a difference with data coming from the inspected subsystem.""" + # auxiliary channel of reference (fixed for the moment) + aux_channel = "pulser01ana" + # both options (diff and ratio) are present -> BAD! For this parameter we do not subtract/divide for any AUX entry + if "AUX_ratio" in plot.keys() and "AUX_diff" in plot.keys(): + utils.logger.error( + "\033[91mYou selected both 'AUX_ratio' and 'AUX_diff' for %s. Pick one!\033[0m", + plot["parameters"], + ) + sys.exit() + # one option (either diff or ratio) is present + if "AUX_ratio" in plot.keys() or "AUX_diff" in plot.keys(): + # check if the selected AUX channel exists, otherwise continue + if "AUX_ratio" in plot.keys() and plot["AUX_ratio"] is True: + utils.logger.debug( + "... you are going to plot the parameter accounting for the ratio wrt PULS01ANA data" + ) + if "AUX_diff" in plot.keys() and plot["AUX_diff"] is True: + utils.logger.debug( + "... you are going to plot the parameter accounting for the difference wrt PULS01ANA data" + ) + + utils.logger.debug( + "... but now we are going to perform diff/ratio with PULS01ANA entries" + ) + + def add_aux(param): + aux_subsys = Subsystem(aux_channel, dataset=dataset) + # get data for these parameters and time range given in the dataset + # (if no parameters given to plot, baseline and wfmax will always be loaded to flag pulser events anyway) + aux_subsys.get_data(param) + + # Merge the dataframes based on the 'datetime' column + utils.logger.debug( + "... merging the PULS01ANA dataframe with the original one" + ) + self.data = self.data.merge( + aux_subsys.data[["datetime", param]], on="datetime", how="left" + ) + + # ratio + self.data[f"{param}_{aux_ch}Ratio"] = ( + self.data[f"{param}_x"] / self.data[f"{param}_y"] + ) + # diff + self.data[f"{param}_{aux_ch}Diff"] = ( + self.data[f"{param}_x"] - self.data[f"{param}_y"] + ) + # rename columns (absolute values) + self.data = self.data.rename( + columns={f"{param}_x": param, f"{param}_y": f"{param}_{aux_ch}"} + ) + + # one-parameter case + if (isinstance(params, list) and len(params) == 1) or isinstance(params, str): + param = params if isinstance(params, str) else params[0] + # check if the parameter under study is special; if so, skip it + if param in utils.SPECIAL_PARAMETERS.keys(): + utils.logger.warning( + "\033[93m'%s' is a special parameter. " + + "For the moment, we skip the ratio/diff wrt the AUX channel and plot the parameter as it is.\033[0m", + params, + ) + return + # check if the parameter under study is from 'hit' tier; if so, skip it + if ( + param in utils.PARAMETER_TIERS.keys() + and utils.PARAMETER_TIERS[param] == "hit" + ): + utils.logger.warning( + "\033[93m'%s' is saved in hit tier, for which no AUX channel is present. " + + "We skip the ratio/diff wrt the AUX channel and plot the parameter as it is.\033[0m", + params, + ) + return + if f"{param}_{aux_channel}" not in list(self.data.columns): + add_aux(params) + + # multiple-parameters case + if isinstance(params, list) and len(params) > 1: + for param in params: + if param in utils.SPECIAL_PARAMETERS.keys(): + utils.logger.warning( + "\033[93m'%s' is a special parameter. " + + "For the moment, we skip the ratio/diff wrt the AUX channel and plot the parameter as it is.\033[0m", + params, + ) + return + if utils.PARAMETER_TIERS[param] == "hit": + utils.logger.warning( + "\033[93m'%s' is saved in hit tier, for which no AUX channel is present. " + + "We skip the ratio/diff wrt the AUX channel and plot the parameter as it is.\033[0m", + param, + ) + continue + if f"{param}_{aux_channel}" not in list(self.data.columns): + add_aux(params) def flag_pulser_events(self, pulser=None): + """Flag pulser events. If a pulser object was provided, flag pulser events in data based on its flag.""" utils.logger.info("... flagging pulser events") # --- if a pulser object was provided, flag pulser events in data based on its flag @@ -287,14 +368,98 @@ def flag_pulser_events(self, pulser=None): else: # --- if no object was provided, it's understood that this itself is a pulser + trapTmax = self.data["trapTmax"] + pulser_timestamps = self.data[trapTmax > 200].index + # flag them + self.data["flag_pulser"] = False + self.data.loc[pulser_timestamps, "flag_pulser"] = True + + self.data = self.data.reset_index() + + def flag_fcbsln_events(self, fc_bsln=None): + """Flag FC baseline events, keeping the ones that are in correspondence with a pulser event too. If a FC baseline object was provided, flag FC baseline events in data based on its flag.""" + utils.logger.info("... flagging FC baseline events") + + # --- if a FC baseline object was provided, flag FC baseline events in data based on its flag + if fc_bsln: + try: + fc_bsln_timestamps = fc_bsln.data[fc_bsln.data["flag_fc_bsln"]][ + "datetime" + ] # .set_index('datetime').index + self.data["flag_fc_bsln"] = False + self.data = self.data.set_index("datetime") + self.data.loc[fc_bsln_timestamps, "flag_fc_bsln"] = True + except KeyError: + utils.logger.warning( + "\033[93mWarning: cannot flag FC baseline events, timestamps don't match!\n \ + If you are you looking at calibration data, it's not possible to flag FC baseline events in it this way.\n \ + Contact the developers if you would like them to focus on advanced flagging methods.\033[0m" + ) + utils.logger.warning( + "\033[93m! Proceeding without FC baseline flag !\033[0m" + ) + + else: + # --- if no object was provided, it's understood that this itself is a FC baseline # find timestamps over threshold - high_thr = 12500 + high_thr = 3000 self.data = self.data.set_index("datetime") wf_max_rel = self.data["wf_max"] - self.data["baseline"] - pulser_timestamps = self.data[wf_max_rel > high_thr].index + fc_bsln_timestamps = self.data[wf_max_rel > high_thr].index # flag them - self.data["flag_pulser"] = False - self.data.loc[pulser_timestamps, "flag_pulser"] = True + self.data["flag_fc_bsln"] = False + self.data.loc[fc_bsln_timestamps, "flag_fc_bsln"] = True + + self.data = self.data.reset_index() + + def flag_fcbsln_only_events(self, fc_bsln=None): + """Flag FC baseline events. If a FC baseline object was provided, flag FC baseline events in data based on its flag.""" + utils.logger.info("... flagging FC baseline ONLY events") + + # --- if a FC baseline object was provided, flag FC baseline events in data + if fc_bsln: + self.data = self.data.merge( + fc_bsln.data[["datetime", "flag_fc_bsln"]], on="datetime" + ) + + # in any case, define FC bsln events as FC bsln events for which there was not a pulser event + self.data["flag_fc_bsln"] = ( + self.data["flag_fc_bsln"] & ~self.data["flag_pulser"] + ) + + self.data = self.data.reset_index() + + def flag_muon_events(self, muon=None): + """Flag muon events. If a muon object was provided, flag muon events in data based on its flag.""" + utils.logger.info("... flagging muon events") + + # --- if a muon object was provided, flag muon events in data based on its flag + if muon: + try: + muon_timestamps = muon.data[muon.data["flag_muon"]][ + "datetime" + ] # .set_index('datetime').index + self.data["flag_muon"] = False + self.data = self.data.set_index("datetime") + self.data.loc[muon_timestamps, "flag_muon"] = True + except KeyError: + utils.logger.warning( + "\033[93mWarning: cannot flag muon events, timestamps don't match!\n \ + If you are you looking at calibration data, it's not possible to flag muon events in it this way.\n \ + Contact the developers if you would like them to focus on advanced flagging methods.\033[0m" + ) + utils.logger.warning("\033[93m! Proceeding without muon flag !\033[0m") + + else: + # --- if no object was provided, it's understood that this itself is a muon + # find timestamps over threshold + high_thr = 500 + self.data = self.data.set_index("datetime") + wf_max_rel = self.data["wf_max"] - self.data["baseline"] + muon_timestamps = self.data[wf_max_rel > high_thr].index + # flag them + self.data["flag_muon"] = False + self.data.loc[muon_timestamps, "flag_muon"] = True self.data = self.data.reset_index() @@ -304,36 +469,22 @@ def get_channel_map(self): setup_info: dict with the keys 'experiment' and 'period' - Later will probably be changed to get channel map by timestamp (or hopefully run, if possible) + Later will probably be changed to get channel map by run, if possible Planning to add: - barrel column for SiPMs special case """ utils.logger.info("... getting channel map") # ------------------------------------------------------------------------- - # load full channel map of this exp and period + # load full channel map of this exp and period (and version) # ------------------------------------------------------------------------- - lmeta = LegendMetadata() - full_channel_map = lmeta.hardware.configuration.channelmaps.on( - timestamp=self.first_timestamp + map_file = os.path.join( + self.path, self.version, "inputs/hardware/configuration/channelmaps" ) + full_channel_map = JsonDB(map_file).on(timestamp=self.first_timestamp) - df_map = pd.DataFrame( - columns=[ - "name", - "location", - "channel", - "position", - "cc4_id", - "cc4_channel", - "daq_crate", - "daq_card", - "HV_card", - "HV_channel", - "det_type", - ], - ) + df_map = pd.DataFrame(columns=utils.COLUMNS_TO_LOAD) df_map = df_map.set_index("channel") # ------------------------------------------------------------------------- @@ -341,10 +492,10 @@ def get_channel_map(self): # ------------------------------------------------------------------------- # for L60-p01 and L200-p02, keep using 'fcid' as channel - if int(self.period[-1]) < 3: + if int(self.period.split('p')[-1]) < 3: ch_flag = "fcid" # from L200-p03 included, uses 'rawid' as channel - if int(self.period[-1]) >= 3: + if int(self.period.split('p')[-1]) >= 3: ch_flag = "rawid" # dct_key is the subdict corresponding to one chmap entry @@ -354,13 +505,55 @@ def is_subsystem(entry): if self.experiment == "L60": return entry["system"] == "auxs" and entry["daq"]["fcid"] == 0 if self.experiment == "L200": - if int(self.period[-1]) < 3: + # we get PULS01 + if self.below_period_3_excluded(): return entry["system"] == "puls" and entry["daq"][ch_flag] == 1 - if int(self.period[-1]) >= 3: + # we get PULS01ANA + if self.above_period_3_included(): return ( entry["system"] == "puls" + # and entry["daq"][ch_flag] == 1027203 and entry["daq"][ch_flag] == 1027201 ) + # special case for pulser AUX + if self.type == "pulser01ana": + if self.experiment == "L60": + utils.logger.error( + "\033[91mThere is no pulser AUX channel in L60. Remove this subsystem!\033[0m" + ) + exit() + if self.experiment == "L200": + if self.below_period_3_excluded(): + return entry["system"] == "puls" and entry["daq"][ch_flag] == 3 + if self.above_period_3_included(): + return ( + entry["system"] == "puls" + and entry["daq"][ch_flag] == 1027203 + ) + # special case for baseline + if self.type == "FCbsln": + if self.experiment == "L60": + return entry["system"] == "auxs" and entry["daq"]["fcid"] == 0 + if self.experiment == "L200": + if self.below_period_3_excluded(): + return entry["system"] == "bsln" and entry["daq"][ch_flag] == 0 + if self.above_period_3_included(): + return ( + entry["system"] == "bsln" + and entry["daq"][ch_flag] == 1027200 + ) + # special case for muon channel + if self.type == "muon": + if self.experiment == "L60": + return entry["system"] == "auxs" and entry["daq"]["fcid"] == 1 + if self.experiment == "L200": + if self.below_period_3_excluded(): + return entry["system"] == "auxs" and entry["daq"][ch_flag] == 2 + if self.above_period_3_included(): + return ( + entry["system"] == "auxs" + and entry["daq"][ch_flag] == 1027202 + ) # for geds or spms return entry["system"] == self.type @@ -370,14 +563,17 @@ def is_subsystem(entry): # detector type for geds in the channel map type_code = {"B": "bege", "C": "coax", "V": "icpc", "P": "ppc"} + # systems for which the location/position has to be handled carefully; values were chosen arbitrarily to avoid conflicts + special_systems = utils.SPECIAL_SYSTEMS + # ------------------------------------------------------------------------- # loop over entries and find out subsystem # ------------------------------------------------------------------------- # config.channel_map is already a dict read from the channel map json for entry in full_channel_map: - # skip 'BF' (! not needed since BF is auxs) - if "BF" in entry: + # skip dummy channels + if "BF" in entry or "DUMMY" in entry: continue entry_info = full_channel_map[entry] @@ -386,19 +582,21 @@ def is_subsystem(entry): if not is_subsystem(entry_info): continue - # --- add info for this channel - Raw/FlashCam ID, unique for geds/spms/pulser + # --- add info for this channel - Raw/FlashCam ID, unique for geds/spms/pulser/pulser01ana/FCbsln/muon ch = entry_info["daq"][ch_flag] df_map.at[ch, "name"] = entry_info["name"] - # number/name of string/fiber for geds/spms, dummy for pulser + # number/name of string/fiber for geds/spms, dummy for pulser/pulser01ana/FCbsln/muon df_map.at[ch, "location"] = ( - 0 - if self.type == "pulser" + special_systems[self.type] + if self.type in special_systems else entry_info["location"][loc_code[self.type]] ) - # position in string/fiber for geds/spms, dummy for pulser (works if there is only one pulser channel) + # position in string/fiber for geds/spms, dummy for pulser/pulser01ana/FCbsln/muon df_map.at[ch, "position"] = ( - 0 if self.type == "pulser" else entry_info["location"]["position"] + special_systems[self.type] + if self.type in special_systems + else entry_info["location"]["position"] ) # CC4 information - will be None for L60 (set to 'null') or spms (there, but no CC4s) df_map.at[ch, "cc4_id"] = ( @@ -461,24 +659,33 @@ def get_channel_status(self): utils.logger.info("... getting channel status") # ------------------------------------------------------------------------- - # load full status map of this time selection + # load full status map of this time selection (and version) # ------------------------------------------------------------------------- - lmeta = LegendMetadata() - full_status_map = lmeta.dataprod.config.on( + map_file = os.path.join(self.path, self.version, "inputs/dataprod/config") + full_status_map = JsonDB(map_file).on( timestamp=self.first_timestamp, system=self.datatype )["analysis"] - # AUX channels are not in status map, so at least for pulser need default on + # AUX channels are not in status map, so at least for pulser/pulser01ana/FCbsln/muon need default on self.channel_map["status"] = "on" + self.channel_map = self.channel_map.set_index("name") - # 'channel_name', for instance, has the format 'DNNXXXS' (= "name" column) + # 'channel_name' has the format 'DNNXXXS' (= "name" column) for channel_name in full_status_map: # status map contains all channels, check if this channel is in our subsystem if channel_name in self.channel_map.index: self.channel_map.at[channel_name, "status"] = full_status_map[ channel_name - ]["usability"] + ]["usability"] + + # ------------------------------------------------------------------------- + # quick-fix to remove detectors while status maps are not updated + # ------------------------------------------------------------------------- + for channel_name in utils.REMOVE_DETS: + # status map contains all channels, check if this channel is in our subsystem + if channel_name in self.channel_map.index: + self.channel_map.at[channel_name, "status"] = "off" self.channel_map = self.channel_map.reset_index() @@ -493,8 +700,8 @@ def get_parameters_for_dataloader(self, parameters: typing.Union[str, list_of_st # --- always read timestamp params = ["timestamp"] # --- always get wf_max & baseline for pulser for flagging - if self.type == "pulser": - params += ["wf_max", "baseline"] + if self.type in ["pulser", "pulser01ana", "FCbsln", "muon"]: + params += ["wf_max", "baseline", "trapTmax"] # --- add user requested parameters # change to list for convenience, if input was single @@ -518,12 +725,12 @@ def get_parameters_for_dataloader(self, parameters: typing.Union[str, list_of_st # some parameters might be repeated twice - remove return list(np.unique(params)) + def construct_dataloader_configs(self, params: list_of_str): """ Construct DL and DB configs for DataLoader based on parameters and which tiers they belong to. params: list of parameters to load - data_info: dict of containing type:, path:, version: """ # ------------------------------------------------------------------------- # which parameters belong to which tiers @@ -535,6 +742,9 @@ def construct_dataloader_configs(self, params: list_of_str): # ... param_tiers = pd.DataFrame.from_dict(utils.PARAMETER_TIERS.items()) param_tiers.columns = ["param", "tier"] + # change from 'hit' to 'pht' when loading data for partitioned files + if self.partition: + param_tiers["tier"] = param_tiers["tier"].replace("hit", "pht") # which of these are requested by user param_tiers = param_tiers[param_tiers["param"].isin(params)] @@ -559,19 +769,26 @@ def construct_dataloader_configs(self, params: list_of_str): # set up tiers depending on what parameters we need # ------------------------------------------------------------------------- - # ronly load channels that are on (off channels will crash DataLoader) - chlist = list(self.channel_map[self.channel_map["status"] == "on"]["channel"]) + # only load channels that are on or ac + chlist = list(self.channel_map[(self.channel_map["status"] == "on") | (self.channel_map["status"] == "ac")]["channel"]) + # remove off channels removed_chs = list( - self.channel_map[self.channel_map["status"] == "off"]["channel"] + self.channel_map[self.channel_map["status"] == "off"]["name"] ) - utils.logger.info(f"...... not loading channels with status off: {removed_chs}") + """ + # remove on channels that are not processable (ie have no hit entries) + removed_unprocessable_chs = list( + self.channel_map[self.channel_map["status"] == "on_not_process"]["name"] + ) + utils.logger.info(f"...... not loading on channels that are not processable: {removed_unprocessable_chs}") + """ # for L60-p01 and L200-p02, keep using 3 digits - if int(self.period[-1]) < 3: + if int(self.period.split('p')[-1]) < 3: ch_format = "ch:03d" # from L200-p03 included, uses 7 digits - if int(self.period[-1]) >= 3: + if int(self.period.split('p')[-1]) >= 3: ch_format = "ch:07d" # --- settings for each tier @@ -587,7 +804,8 @@ def construct_dataloader_configs(self, params: list_of_str): + tier + ".lh5" ) - dict_dbconfig["table_format"][tier] = "ch{" + ch_format + "}/" + tier + dict_dbconfig["table_format"][tier] = "ch{" + ch_format + "}/" + dict_dbconfig["table_format"][tier] += "hit" if tier == "pht" else tier dict_dbconfig["tables"][tier] = chlist @@ -596,7 +814,7 @@ def construct_dataloader_configs(self, params: list_of_str): # dict_dlconfig['levels'][tier] = {'tiers': [tier]} # --- settings based on tier hierarchy - order = {"hit": 3, "dsp": 2, "raw": 1} + order = {"pht": 3, "dsp": 2, "raw": 1} if self.partition else {"hit": 3, "dsp": 2, "raw": 1} param_tiers["order"] = param_tiers["tier"].apply(lambda x: order[x]) # find highest tier max_tier = param_tiers[param_tiers["order"] == param_tiers["order"].max()][ @@ -608,3 +826,120 @@ def construct_dataloader_configs(self, params: list_of_str): } return dict_dlconfig, dict_dbconfig + + + def construct_dataloader_configs_unprocess(self, params: list_of_str): + """ + Construct DL and DB configs for DataLoader based on parameters and which tiers they belong to. + + params: list of parameters to load + """ + + param_tiers = pd.DataFrame.from_dict(utils.PARAMETER_TIERS.items()) + param_tiers.columns = ["param", "tier"] + + param_tiers = param_tiers[param_tiers["param"].isin(params)] + utils.logger.info("...... loading parameters from the following tiers:") + utils.logger.debug(param_tiers) + + # ------------------------------------------------------------------------- + # set up config templates + # ------------------------------------------------------------------------- + + dict_dbconfig = { + "data_dir": os.path.join(self.path, self.version, "generated", "tier"), + "tier_dirs": {}, + "file_format": {}, + "table_format": {}, + "tables": {}, + "columns": {}, + } + dict_dlconfig = {"channel_map": {}, "levels": {}} + + # ------------------------------------------------------------------------- + # set up tiers depending on what parameters we need + # ------------------------------------------------------------------------- + + chlist = list(self.channel_map[(self.channel_map["status"] == "on_not_process") | (self.channel_map["status"] == "ac")]["channel"]) + utils.logger.info(f"...... loading on channels that are not processable: {chlist}") + + # for L60-p01 and L200-p02, keep using 3 digits + if int(self.period.split('p')[-1]) < 3: + ch_format = "ch:03d" + # from L200-p03 included, uses 7 digits + if int(self.period.split('p')[-1]) >= 3: + ch_format = "ch:07d" + + for tier, tier_params in param_tiers.groupby("tier"): + dict_dbconfig["tier_dirs"][tier] = f"/{tier}" + dict_dbconfig["file_format"][tier] = ( + "/{type}/" + + self.period # {period} + + "/{run}/{exp}-" + + self.period # {period} + + "-{run}-{type}-{timestamp}-tier_" + + tier + + ".lh5" + ) + dict_dbconfig["table_format"][tier] = "ch{" + ch_format + "}/" + tier + + dict_dbconfig["tables"][tier] = chlist + + dict_dbconfig["columns"][tier] = list(tier_params["param"]) + + # --- settings based on tier hierarchy + order = {"hit": 3, "dsp": 2, "raw": 1} + param_tiers["order"] = param_tiers["tier"].apply(lambda x: order[x]) + max_tier = param_tiers[param_tiers["order"] == param_tiers["order"].max()][ + "tier" + ].iloc[0] + dict_dlconfig["levels"][max_tier] = { + "tiers": list(param_tiers["tier"].unique()) + } + + return dict_dlconfig, dict_dbconfig + + + def remove_timestamps(self, remove_keys: dict): + """Remove timestamps from the dataframes for a given channel. + + The time interval in which to remove the channel is provided through an external json file. + """ + # all timestamps we are considering are expressed in UTC0 + utils.logger.debug("... removing timestamps from the following detectors:") + + # loop over channels for which we want to remove timestamps + for detector in remove_keys: + if detector in self.data["name"].unique(): + utils.logger.debug(f".... {detector}") + # remove timestamps from self.data that are within time_from and time_to, for a given channel + for chunk in remove_keys[detector]: + utils.logger.debug(f"from {chunk['from']} to {chunk['to']}") + # times are in format YYYYMMDDTHHMMSSZ, convert them into a UTC0 timestamp + for point in ["from", "to"]: + # convert UTC timestamp to datetime (unix epoch time) + chunk[point] = pd.to_datetime( + chunk[point], utc=True, format="%Y%m%dT%H%M%SZ" + ) + + # entries to drop for this chunk + rows_to_drop = self.data[ + (self.data["name"] == detector) + & (self.data["datetime"] >= chunk["from"]) + & (self.data["datetime"] <= chunk["to"]) + ] + self.data = self.data.drop(rows_to_drop.index) + + self.data = self.data.reset_index() + + def below_period_3_excluded(self) -> bool: + if int(self.period.split('p')[-1]) < 3: + return True + else: + return False + + def above_period_3_included(self) -> bool: + if int(self.period.split('p')[-1]) >= 3: + return True + else: + return False diff --git a/src/legend_data_monitor/utils.py b/src/legend_data_monitor/utils.py index 7078455..f237d60 100644 --- a/src/legend_data_monitor/utils.py +++ b/src/legend_data_monitor/utils.py @@ -4,12 +4,15 @@ import logging import os import re -import shelve +import sys # for getting DataLoader time range from datetime import datetime, timedelta -from pandas import DataFrame, concat +import lgdo.lh5_store as lh5 +from pandas import DataFrame + +from . import subsystem # ------------------------------------------------------------------------- @@ -23,12 +26,11 @@ # format formatter = logging.Formatter("%(asctime)s: %(message)s") stream_handler.setFormatter(formatter) -# file_handler.setFormatter(formatter) # add to logger logger.addHandler(stream_handler) -# ------------------------------------------------------------------------- +# ------------------------------------------------------------------------- SOME DICTIONARIES LOADING/DEFINITION pkg = importlib.resources.files("legend_data_monitor") @@ -44,15 +46,45 @@ with open(pkg / "settings" / "special-parameters.json") as f: SPECIAL_PARAMETERS = json.load(f) -# dictionary map (helpful when we want to map channels based on their location/position) -with open(pkg / "settings" / "map-channels.json") as f: - MAP_DICT = json.load(f) - # convert all to lists for convenience for param in SPECIAL_PARAMETERS: if isinstance(SPECIAL_PARAMETERS[param], str): SPECIAL_PARAMETERS[param] = [SPECIAL_PARAMETERS[param]] +# load SC params and corresponding flags to get specific parameters from big dfs that are stored in the database +with open(pkg / "settings" / "SC-params.json") as f: + SC_PARAMETERS = json.load(f) + +# load list of columns to load for a dataframe +COLUMNS_TO_LOAD = [ + "name", + "location", + "channel", + "position", + "cc4_id", + "cc4_channel", + "daq_crate", + "daq_card", + "HV_card", + "HV_channel", + "det_type", +] + +# map position/location for special systems +SPECIAL_SYSTEMS = {"pulser": 0, "pulser01ana": -1, "FCbsln": -2, "muon": -3} + +# dictionary map (helpful when we want to map channels based on their location/position) +with open(pkg / "settings" / "map-channels.json") as f: + MAP_DICT = json.load(f) + +# dictionary with timestamps to remove for specific channels +with open(pkg / "settings" / "remove-keys.json") as f: + REMOVE_KEYS = json.load(f) + +# dictionary with detectors to remove +with open(pkg / "settings" / "remove-dets.json") as f: + REMOVE_DETS = json.load(f) + # ------------------------------------------------------------------------- # Subsystem related functions (for getting channel map & status) # ------------------------------------------------------------------------- @@ -60,7 +92,7 @@ def get_query_times(**kwargs): """ - Get time ranges for DataLoader query from user input, as well as first timestamp for channel map/status query. + Get time ranges for DataLoader query from user input, as well as first/last timestamp for channel map / status / SC query. Available kwargs: @@ -71,10 +103,10 @@ def get_query_times(**kwargs): - 'version' [str]: < move description here from get_data() > - 'type' [str]: < move description here > ! not possible for multiple types now! - the following keys depending in time selection mode (choose one) - 1) 'start' : , 'end': where input is of format 'YYYY-MM-DD hh:mm:ss' - 2) 'window'[str]: time window in the past from current time point, format: 'Xd Xh Xm' for days, hours, minutes - 2) 'timestamps': str or list of str in format 'YYYYMMDDThhmmssZ' - 3) 'runs': int or list of ints for run number(s) e.g. 10 for r010 + 1. 'start' : , 'end': where input is of format 'YYYY-MM-DD hh:mm:ss' + 2. 'window'[str]: time window in the past from current time point, format: 'Xd Xh Xm' for days, hours, minutes + 2. 'timestamps': str or list of str in format 'YYYYMMDDThhmmssZ' + 3. 'runs': int or list of ints for run number(s) e.g. 10 for r010 Or input kwargs separately path=, ...; start=&end=, or window=, or timestamps=, or runs= Designed in such a way to accommodate Subsystem init kwargs. A bit cumbersome and can probably be done better. @@ -92,42 +124,88 @@ def get_query_times(**kwargs): timerange = get_query_timerange(**kwargs) first_timestamp = "" - # get first timestamp in case keyword is timestamp + # get first/last timestamp in case keyword is timestamp if "timestamp" in timerange: if "start" in timerange["timestamp"]: first_timestamp = timerange["timestamp"]["start"] - else: + if "end" in timerange["timestamp"]: + last_timestamp = timerange["timestamp"]["end"] + if ( + "start" not in timerange["timestamp"] + and "end" not in timerange["timestamp"] + ): first_timestamp = min(timerange["timestamp"]) + last_timestamp = max(timerange["timestamp"]) # look in path to find first timestamp if keyword is run else: # currently only list of runs and not 'start' and 'end', so always list - # find earliest run, format rXXX + # find earliest/latest run, format rXXX first_run = min(timerange["run"]) + last_run = max(timerange["run"]) # --- get dsp filelist of this run # if setup= keyword was used, get dict; otherwise kwargs is already the dict we need path_info = kwargs["dataset"] if "dataset" in kwargs else kwargs - # format to search /path_to_prod-ref[/v06.00]/generated/tier/**/phy/**/r027 (version might not be there) - glob_path = os.path.join( + first_glob_path = os.path.join( path_info["path"], path_info["version"], "generated", "tier", - "**", + "dsp", path_info["type"], - "**", + path_info["period"], first_run, + ) + last_glob_path = os.path.join( + path_info["path"], + path_info["version"], + "generated", + "tier", + "dsp", + path_info["type"], + path_info["period"], + last_run, + ) + + if not os.path.exists(first_glob_path): + logger.warning( + "\033[93mThe path '%s' does not exist, check config['dataset'] and try again.\033[0m", + first_glob_path, + ) + exit() + if not os.path.exists(last_glob_path): + logger.warning( + "\033[93mThe path '%s' does not exist, check config['dataset'] and try again.\033[0m", + last_glob_path, + ) + exit() + + # format to search /path_to_prod-ref[/vXX.XX]/generated/tier/dsp/phy/pXX/rXXX (version 'vXX.XX' might not be there). + # NOTICE that we fixed the tier, otherwise it picks the last one it finds (eg tcm). + # NOTICE that this is PERIOD SPECIFIC (unlikely we're gonna inspect two periods together, so we fix it) + first_glob_path = os.path.join( + first_glob_path, "*.lh5", ) - dsp_files = glob.glob(glob_path) + last_glob_path = os.path.join( + last_glob_path, + "*.lh5", + ) + first_dsp_files = glob.glob(first_glob_path) + last_dsp_files = glob.glob(last_glob_path) # find earliest - dsp_files.sort() - first_file = dsp_files[0] - # extract timestamp + first_dsp_files.sort() + first_file = first_dsp_files[0] + # find latest + last_dsp_files.sort() + last_file = last_dsp_files[-1] + # extract timestamps first_timestamp = get_key(first_file) + # last timestamp is not the key of last file: it's the last timestamp saved in the last file + last_timestamp = get_last_timestamp(last_file) - return timerange, first_timestamp + return timerange, first_timestamp, last_timestamp def get_query_timerange(**kwargs): @@ -138,10 +216,10 @@ def get_query_timerange(**kwargs): dataset= dict with the following keys depending in time selection mode (choose one) - 1) 'start' : , 'end': where input is of format 'YYYY-MM-DD hh:mm:ss' - 2) 'window'[str]: time window in the past from current time point, format: 'Xd Xh Xm' for days, hours, minutes - 2) 'timestamps': str or list of str in format 'YYYYMMDDThhmmssZ' - 3) 'runs': int or list of ints for run number(s) e.g. 10 for r010 + 1. 'start' : , 'end': where input is of format 'YYYY-MM-DD hh:mm:ss' + 2. 'window'[str]: time window in the past from current time point, format: 'Xd Xh Xm' for days, hours, minutes + 2. 'timestamps': str or list of str in format 'YYYYMMDDThhmmssZ' + 3. 'runs': int or list of ints for run number(s) e.g. 10 for r010 Or enter kwargs separately start=&end=, or window=, or timestamp=, or runs= Designed in such a way to accommodate Subsystem init kwargs. A bit cumbersome and can probably be done better. @@ -235,12 +313,98 @@ def get_query_timerange(**kwargs): return time_range +def dataset_validity_check(data_info: dict): + """Check the validity of the input dictionary to see if it contains all necessary info. Used in Subsystem and SlowControl classes.""" + if "experiment" not in data_info: + logger.error("\033[91mProvide experiment name!\033[0m") + logger.error("\033[91m%s\033[0m", subsystem.Subsystem.__doc__) + return + + if "type" not in data_info: + logger.error("\033[91mProvide data type!\033[0m") + logger.error("\033[91m%s\033[0m", subsystem.Subsystem.__doc__) + return + + if "period" not in data_info: + logger.error("\033[91mProvide period!\033[0m") + logger.error("\033[91m%s\033[0m", subsystem.Subsystem.__doc__) + return + + # convert to list for convenience + # ! currently not possible with channel status + # if isinstance(data_info["type"], str): + # data_info["type"] = [data_info["type"]] + + data_types = ["phy", "cal"] + # ! currently not possible with channel status + # for datatype in data_info["type"]: + # if datatype not in data_types: + if not data_info["type"] in data_types: + logger.error("\033[91mInvalid data type provided!\033[0m") + logger.error("\033[91m%s\033[0m", subsystem.Subsystem.__doc__) + return + + if "path" not in data_info: + logger.error("\033[91mProvide path to data!\033[0m") + logger.error("\033[91m%s\033[0m", subsystem.Subsystem.__doc__) + return + if not os.path.exists(data_info["path"]): + logger.error("\033[91mThe data path you provided does not exist!\033[0m") + return + + if "version" not in data_info: + logger.error( + '\033[91mProvide processing version! If not needed, just put an empty string, "".\033[0m' + ) + logger.error("\033[91m%s\033[0m", subsystem.Subsystem.__doc__) + return + + # in p03 things change again!!!! + # There is no version in '/data2/public/prodenv/prod-blind/tmp/auto/generated/tier/dsp/phy/p03', so for the moment we skip this check... + if data_info["period"] != "p03" and not os.path.exists( + os.path.join(data_info["path"], data_info["version"]) + ): + logger.error("\033[91mProvide valid processing version!\033[0m") + logger.error("\033[91m%s\033[0m", subsystem.Subsystem.__doc__) + return + + # ------------------------------------------------------------------------- # Plotting related functions # ------------------------------------------------------------------------- -def check_plot_settings(conf: dict): +def check_scdb_settings(conf: dict) -> bool: + """Check if the 'slow_control' entry in config file is good or not.""" + # there is no "slow_control" key + if "slow_control" not in conf.keys(): + logger.warning( + "\033[93mThere is no 'slow_control' key in the config file. Try again if you want to retrieve slow control data.\033[0m" + ) + return False + # there is "slow_control" key, but ... + else: + # ... there is no "parameters" key + if "parameters" not in conf["slow_control"].keys(): + logger.warning( + "\033[93mThere is no 'parameters' key in config 'slow_control' entry. Try again if you want to retrieve slow control data.\033[0m" + ) + return False + # ... there is "parameters" key, but ... + else: + # ... it is not a string or a list (of strings) + if not isinstance( + conf["slow_control"]["parameters"], str + ) and not isinstance(conf["slow_control"]["parameters"], list): + logger.error( + "\033[91mSlow control parameters must be a string or a list of strings. Try again if you want to retrieve slow control data.\033[0m" + ) + return False + + return True + + +def check_plot_settings(conf: dict) -> bool: from . import plot_styles, plotting options = { @@ -248,13 +412,29 @@ def check_plot_settings(conf: dict): "plot_style": plot_styles.PLOT_STYLE.keys(), } + if "subsystems" not in conf.keys(): + logger.error( + "\033[91mThere is no 'subsystems' key in the config file. Try again if you want to plot data.\033[0m" + ) + exit() + for subsys in conf["subsystems"]: for plot in conf["subsystems"][subsys]: # settings for this plot plot_settings = conf["subsystems"][subsys][plot] + # ---------------------------------------------------------------------------------------------- + # general check + # ---------------------------------------------------------------------------------------------- # check if all necessary fields for param settings were provided for field in options: + # when plot_structure is summary, plot_style is not needed... + # ToDo: neater way to skip the whole loop but still do special checks; break? ugly... + # future ToDo: exposure can be plotted in various plot styles e.g. string viz, or plot array, will change + if plot_settings["parameters"] == "exposure": + continue + + # ...otherwise, it is required # if this field is not provided by user, tell them to provide it # (if optional to provided, will have been set with defaults before calling set_defaults()) if field not in plot_settings: @@ -282,6 +462,25 @@ def check_plot_settings(conf: dict): ) return False + # ---------------------------------------------------------------------------------------------- + # special checks + # ---------------------------------------------------------------------------------------------- + + # exposure check + if plot_settings["parameters"] == "exposure" and ( + plot_settings["event_type"] not in ["pulser", "all"] + ): + logger.error( + "\033[91mPulser events are needed to calculate livetime/exposure; choose 'pulser' or 'all' event type\033[0m" + ) + return False + + # ToDo: neater way to skip the whole loop but still do special checks; break? ugly... + if plot_settings["parameters"] == "exposure": + continue + + # other non-exposure checks + # if vs time was provided, need time window if ( plot_settings["plot_style"] == "vs time" @@ -361,16 +560,16 @@ def get_time_name(user_time_range: dict) -> str: """Get a name for each available time selection. careful handling of folder name depending on the selected time range. The possibilities are: - 1) user_time_range = {'timestamp': {'start': '20220928T080000Z', 'end': '20220928T093000Z'}} => start + end + 1. user_time_range = {'timestamp': {'start': '20220928T080000Z', 'end': '20220928T093000Z'}} => start + end -> folder: 20220928T080000Z_20220928T093000Z/ - 2) user_time_range = {'timestamp': ['20230207T103123Z']} => one key + 2. user_time_range = {'timestamp': ['20230207T103123Z']} => one key -> folder: 20230207T103123Z/ - 3) user_time_range = {'timestamp': ['20230207T103123Z', '20230207T141123Z', '20230207T083323Z']} => multiple keys + 3. user_time_range = {'timestamp': ['20230207T103123Z', '20230207T141123Z', '20230207T083323Z']} => multiple keys -> get min/max and use in the folder name -> folder: 20230207T083323Z_20230207T141123Z/ - 4) user_time_range = {'run': ['r010']} => one run + 4. user_time_range = {'run': ['r010']} => one run -> folder: r010/ - 5) user_time_range = {'run': ['r010', 'r014']} => multiple runs + 5. user_time_range = {'run': ['r010', 'r014']} => multiple runs -> folder: r010_r014/ """ name_time = "" @@ -401,6 +600,7 @@ def get_time_name(user_time_range: dict) -> str: def get_timestamp(filename): + """Get the timestamp from a filename. For instance, if file='l200-p04-r000-phy-20230421T055556Z-tier_dsp.lh5', then it returns '20230421T055556Z'.""" # Assumes that the timestamp is in the format YYYYMMDDTHHMMSSZ return filename.split("-")[-2] @@ -413,8 +613,14 @@ def get_run_name(config, user_time_range: dict) -> str: ) # start/end timestamps of the selected time range of interest - start_timestamp = user_time_range["timestamp"]["start"] - end_timestamp = user_time_range["timestamp"]["end"] + # if range was given, will have keywords "start" and "end" + if "start" in user_time_range["timestamp"]: + start_timestamp = user_time_range["timestamp"]["start"] + end_timestamp = user_time_range["timestamp"]["end"] + # if list of timestamps was given (may be not consecutive or in order), it's just a list + else: + start_timestamp = min(user_time_range["timestamp"]) + end_timestamp = max(user_time_range["timestamp"]) run_list = [] # this will be updated with the run ID @@ -451,7 +657,7 @@ def search_for_timestamp(folder): logger.error( "\033[91mThe selected timestamps were not find anywhere. Try again with another time range!\033[0m" ) - exit() + sys.exit() if len(run_list) > 1: return get_multiple_run_id(user_time_range) @@ -469,11 +675,22 @@ def get_all_plot_parameters(subsystem: str, config: dict): else: all_parameters += parameters + # check if event type asked needs a special parameter (K lines need energy) + event_type = config["subsystems"][subsystem][plot]["event_type"] + if event_type in SPECIAL_PARAMETERS: + all_parameters += SPECIAL_PARAMETERS[event_type] + # check if there is any QC entry; if so, add it to the list of parameters to load - if "quality_cuts" in config["subsystems"][subsystem][plot]: - all_parameters.append( - config["subsystems"][subsystem][plot]["quality_cuts"] - ) + if "cuts" in config["subsystems"][subsystem][plot]: + cuts = config["subsystems"][subsystem][plot]["cuts"] + # convert to list for convenience + if isinstance(cuts, str): + cuts = [cuts] + for cut in cuts: + # append original name of the cut to load (remove the "not" ~ symbol if present) + if cut[0] == "~": + cut = cut[1:] + all_parameters.append(cut) return all_parameters @@ -483,6 +700,104 @@ def get_key(dsp_fname: str) -> str: return re.search(r"-\d{8}T\d{6}Z", dsp_fname).group(0)[1:] +def unix_timestamp_to_string(unix_timestamp): + """Convert a Unix timestamp to a string in the format 'YYYYMMDDTHHMMSSZ' with the timezone indicating UTC+00.""" + utc_datetime = datetime.utcfromtimestamp(unix_timestamp) + formatted_string = utc_datetime.strftime("%Y%m%dT%H%M%SZ") + return formatted_string + + +def get_last_timestamp(dsp_fname: str) -> str: + """Read a lh5 file and return the last timestamp saved in the file. This works only in case of a global trigger where the whole array is entirely recorded for a given timestamp.""" + # pick a random channel + first_channel = lh5.ls(dsp_fname, "")[0] + # get array of timestamps stored in the lh5 file + timestamp = lh5.load_nda(dsp_fname, ["timestamp"], f"{first_channel}/dsp/")[ + "timestamp" + ] + # get the last entry + last_timestamp = timestamp[-1] + # convert from UNIX tstamp to string tstmp of format YYYYMMDDTHHMMSSZ + last_timestamp = unix_timestamp_to_string(last_timestamp) + + return last_timestamp + + +def bunch_dataset(config: dict, n_files=None): + """Bunch the full datasets into smaller pieces, based on the number of files we want to inspect at each iteration. + + It works for "start+end", "runs" and "timestamps" in "dataset" present in the config file. + """ + # --- get dsp filelist of this run + path_info = config["dataset"] + user_time_range = get_query_timerange(dataset=config["dataset"]) + + run = ( + get_run_name(config, user_time_range) + if "timestamp" in user_time_range.keys() + else get_time_name(user_time_range) + ) + # format to search /path_to_prod-ref[/vXX.XX]/generated/tier/dsp/phy/pXX/rXXX (version 'vXX.XX' might not be there). + # NOTICE that we fixed the tier, otherwise it picks the last one it finds (eg tcm). + # NOTICE that this is PERIOD SPECIFIC (unlikely we're gonna inspect two periods together, so we fix it) + path_to_files = os.path.join( + path_info["path"], + path_info["version"], + "generated", + "tier", + "dsp", + path_info["type"], + path_info["period"], + run, + "*.lh5", + ) + # get all dsp files + dsp_files = glob.glob(path_to_files) + dsp_files.sort() + + if "timestamp" in user_time_range.keys(): + if isinstance(user_time_range["timestamp"], list): + # sort in crescent order + user_time_range["timestamp"].sort() + start_time = datetime.strptime( + user_time_range["timestamp"][0], "%Y%m%dT%H%M%SZ" + ) + end_time = datetime.strptime( + user_time_range["timestamp"][-1], "%Y%m%dT%H%M%SZ" + ) + + else: + start_time = datetime.strptime( + user_time_range["timestamp"]["start"], "%Y%m%dT%H%M%SZ" + ) + end_time = datetime.strptime( + user_time_range["timestamp"]["end"], "%Y%m%dT%H%M%SZ" + ) + + if "run" in user_time_range.keys(): + timerange, start_tmstmp, end_tmstmp = get_query_times(dataset=config["dataset"]) + start_time = datetime.strptime(start_tmstmp, "%Y%m%dT%H%M%SZ") + end_time = datetime.strptime(end_tmstmp, "%Y%m%dT%H%M%SZ") + + # filter files and keep the ones within the time range of interest + filtered_files = [] + for dsp_file in dsp_files: + # Extract the timestamp from the file name + timestamp_str = dsp_file.split("-")[-2] + file_timestamp = datetime.strptime(timestamp_str, "%Y%m%dT%H%M%SZ") + # Check if the file timestamp is within the specified range + if start_time <= file_timestamp <= end_time: + filtered_files.append(dsp_file) + + filtered_files = [filtered_file.split("-")[-2] for filtered_file in filtered_files] + filtered_files = [ + filtered_files[i : i + int(n_files)] + for i in range(0, len(filtered_files), int(n_files)) + ] + + return filtered_files + + # ------------------------------------------------------------------------- # Config file related functions (for building files) # ------------------------------------------------------------------------- @@ -495,6 +810,19 @@ def add_config_entries( prod_config: dict, ) -> dict: """Add missing information (output, dataset) to the configuration file. This function is generally used during automathic data production, where the initiali config file has only the 'subsystem' entry.""" + # check if there is an output folder specified in the config file + if "output" not in config.keys(): + logger.error( + "\033[91mThe config file is missing the 'output' key. Add it and try again!\033[0m" + ) + sys.exit() + # check if there is the saving option specified in the config file + if "saving" not in config.keys(): + logger.error( + "\033[91mThe config file is missing the 'saving' key. Add it and try again!\033[0m" + ) + sys.exit() + # Get the keys with open(file_keys) as f: keys = f.readlines() @@ -517,42 +845,47 @@ def add_config_entries( if "version" in config["dataset"].keys(): version = config["dataset"]["version"] else: - version = ( - (prod_path.split("/"))[-2] - if prod_path.endswith("/") - else (prod_path.split("/"))[-1] - ) + # case of rsync when inspecting temp files to plot for the dashboard + if prod_path == "": + version = "" + # prod-ref version where the version is specified + else: + version = ( + (prod_path.split("/"))[-2] + if prod_path.endswith("/") + else (prod_path.split("/"))[-1] + ) if "type" in config["dataset"].keys(): type = config["dataset"]["type"] else: logger.error("\033[91mYou need to provide data type! Try again.\033[0m") - exit() + sys.exit() if "path" in config["dataset"].keys(): path = config["dataset"]["path"] else: logger.error( "\033[91mYou need to provide path to lh5 files! Try again.\033[0m" ) - exit() + sys.exit() else: # get phy/cal lists phy_keys = [key for key in keys if "phy" in key] cal_keys = [key for key in keys if "cal" in key] if len(phy_keys) == 0 and len(cal_keys) == 0: logger.error("\033[91mNo keys to load. Try again.\033[0m") - return + sys.exit() if len(phy_keys) != 0 and len(cal_keys) == 0: type = "phy" if len(phy_keys) == 0 and len(cal_keys) != 0: type = "cal" logger.error("\033[91mcal is still under development! Try again.\033[0m") - return + sys.exit() if len(phy_keys) != 0 and len(cal_keys) != 0: type = ["cal", "phy"] logger.error( "\033[91mBoth cal and phy are still under development! Try again.\033[0m" ) - return + sys.exit() # Get the production path path = ( prod_path.split("prod-ref")[0] + "prod-ref" @@ -560,9 +893,6 @@ def add_config_entries( else prod_path.split("prod-ref")[0] + "/prod-ref" ) - if "output" in config.keys(): - prod_path = config["output"] - # create the dataset dictionary dataset_dict = { "experiment": experiment, @@ -574,7 +904,7 @@ def add_config_entries( "timestamps": timestamp, } - more_info = {"output": prod_path, "dataset": dataset_dict} + more_info = {"dataset": dataset_dict} # 'saving' and 'subsystem' info must be already there config.update(more_info) @@ -585,112 +915,113 @@ def add_config_entries( '\033[91mThere are missing entries among ["output", "dataset", "saving", "subsystems"] in the config file (found keys: %s). Try again and check you start with "output" and "dataset" info!\033[0m', config.keys(), ) - exit() + sys.exit() return config # ------------------------------------------------------------------------- -# Saving related functions +# Other functions # ------------------------------------------------------------------------- -def build_out_dict( - plot_settings: list, - plot_info: list, - par_dict_content: dict, - out_dict: dict, - saving: str, - plt_path: str, -): - """Build the output dictionary based on the input 'saving' option.""" - # we overwrite the object with a new one - if saving == "overwrite": - out_dict = save_dict(plot_settings, plot_info, par_dict_content, out_dict) - - # we retrieve the already existing shelve object, and we append new things to it; the parameter here is fixed - if saving == "append": - # the file does not exist, so first we create it and then, at the next step, we'll append things - if not os.path.exists(plt_path + "-" + plot_info["subsystem"] + ".dat"): - # logger.warning( - # "\033[93mYou selected 'append' when saving, but the file with already saved data does not exist. For this reason, it will be created first.\033[0m" - # ) - out_dict = save_dict(plot_settings, plot_info, par_dict_content, out_dict) - - # the file exists, so we are going to append data - else: - logger.info( - "There is already a file containing output data. Appending new data to it right now..." - ) - # open already existing shelve file - with shelve.open(plt_path + "-" + plot_info["subsystem"], "r") as shelf: - old_dict = dict(shelf) - - # the parameter is there - parameter = ( - plot_info["parameter"].split("_var")[0] - if "_var" in plot_info["parameter"] - else plot_info["parameter"] - ) - if old_dict["monitoring"]["pulser"][parameter]: - # get already present df - old_df = old_dict["monitoring"]["pulser"][parameter][ - "df_" + plot_info["subsystem"] - ] - old_df = check_level0(old_df) - # get new df (plot_info object is the same as before, no need to get it and update it) - new_df = par_dict_content["df_" + plot_info["subsystem"]] - # concatenate the two dfs (channels are no more grouped; not a problem) - merged_df = DataFrame.empty - merged_df = concat([old_df, new_df], ignore_index=True, axis=0) - merged_df = merged_df.reset_index() - merged_df = check_level0(merged_df) - # re-order content in order of channels/timestamps - merged_df = merged_df.sort_values(["channel", "datetime"]) - - # redefine the dict containing the df and plot_info - par_dict_content = {} - par_dict_content["df_" + plot_info["subsystem"]] = merged_df - par_dict_content["plot_info"] = plot_info - - # saved the merged df as usual - out_dict = save_dict( - plot_settings, plot_info, par_dict_content, old_dict["monitoring"] - ) - # we need to save it, otherwise when looping over the next parameter we lose the appended info for the already inspected parameter - out_file = shelve.open(plt_path + "-" + plot_info["subsystem"]) - out_file["monitoring"] = out_dict - out_file.close() - - return out_dict - - -def save_dict( - plot_settings: list, plot_info: list, par_dict_content: dict, out_dict: dict -): - """Create a dictionary with the correct format for being saved in the final shelve object.""" - parameter = ( - plot_info["parameter"].split("_var")[0] - if "_var" in plot_info["parameter"] - else plot_info["parameter"] - ) - # event type key is already there - if plot_settings["event_type"] in out_dict.keys(): - out_dict[plot_settings["event_type"]][parameter] = par_dict_content - # event type key is NOT there +def get_livetime(tot_livetime: float): + """Get the livetime in a human readable format, starting from livetime in seconds. + + If tot_livetime is more than 0.1 yr, convert it to years. + If tot_livetime is less than 0.1 yr but more than 1 day, convert it to days. + If tot_livetime is less than 1 day but more than 1 hour, convert it to hours. + If tot_livetime is less than 1 hour but more than 1 minute, convert it to minutes. + """ + if tot_livetime > 60 * 60 * 24 * 365.25: + tot_livetime = tot_livetime / 60 / 60 / 24 / 365.25 + unit = " yr" + elif tot_livetime > 60 * 60 * 24: + tot_livetime = tot_livetime / 60 / 60 / 24 + unit = " days" + elif tot_livetime > 60 * 60: + tot_livetime = tot_livetime / 60 / 60 + unit = " hrs" + elif tot_livetime > 60: + tot_livetime = tot_livetime / 60 + unit = " min" else: - # empty dictionary (not filled yet) - if len(out_dict.keys()) == 0: - out_dict = {plot_settings["event_type"]: {parameter: par_dict_content}} - # the dictionary already contains something (but for another event type selection) - else: - out_dict[plot_settings["event_type"]] = {parameter: par_dict_content} + unit = " sec" + logger.info(f"Total livetime: {tot_livetime:.2f}{unit}") + + return tot_livetime, unit + + +def is_empty(df: DataFrame): + """Check if a dataframe is empty.""" + if df.empty: + return True + - return out_dict +def check_empty_df(df) -> bool: + """Check if df (DataFrame | analysis_data.AnalysisData) exists and is not empty.""" + # the dataframe is of type DataFrame + if isinstance(df, DataFrame): + return is_empty(df) + # the dataframe is of type analysis_data.AnalysisData + else: + return is_empty(df.data) + + +def convert_to_camel_case(string: str, char: str) -> str: + """Remove a character from a string and capitalize all initial letters.""" + # Split the string by underscores + words = string.split(char) + # Capitalize the initial letters of each word + words = [word.capitalize() for word in words] + # Join the words back together without any separator + camel_case_string = "".join(words) + + return camel_case_string + + +def get_output_path(config: dict): + """Get output path provided a 'dataset' from the config file. The path will be used to save and store pdfs/hdf/etc files.""" + try: + data_types = ( + [config["dataset"]["type"]] + if isinstance(config["dataset"]["type"], str) + else config["dataset"]["type"] + ) + + plt_basename = "{}-{}-".format( + config["dataset"]["experiment"].lower(), + config["dataset"]["period"], + ) + except (KeyError, TypeError): + # means something about dataset is wrong -> print Subsystem doc + logger.error( + "\033[91mSomething is missing or wrong in your 'dataset' field of the config. You can see the format here under 'dataset=':\033[0m" + ) + logger.info("\033[91m%s\033[0m", subsystem.Subsystem.__doc__) + exit() + + user_time_range = get_query_timerange(dataset=config["dataset"]) + # will be returned as None if something is wrong, and print an error message + if not user_time_range: + return + + # create output folders for plots + period_dir = make_output_paths(config, user_time_range) + # get correct time info for subfolder's name + name_time = ( + get_run_name(config, user_time_range) + if "timestamp" in user_time_range.keys() + else get_time_name(user_time_range) + ) + output_paths = period_dir + name_time + "/" + make_dir(output_paths) + if not output_paths: + return + # we don't care here about the time keyword timestamp/run -> just get the value + plt_basename += name_time + out_path = output_paths + plt_basename + out_path += "-{}".format("_".join(data_types)) -def check_level0(dataframe: DataFrame) -> DataFrame: - """Check if a dataframe contains the 'level_0' column. If so, remove it.""" - if "level_0" in dataframe.columns: - dataframe = dataframe.drop(columns=["level_0"]) - return dataframe + return out_path