Skip to content

Commit

Permalink
Drop events with no L1/L2/L3 impacts (or NULL impacts) (#195)
Browse files Browse the repository at this point in the history
  • Loading branch information
i-be-snek authored Nov 27, 2024
1 parent 0280d9e commit 3ddd749
Show file tree
Hide file tree
Showing 4,187 changed files with 1,312,770 additions and 2 deletions.
The diff you're trying to view is too large. We only load the first 3000 changed files.
42 changes: 41 additions & 1 deletion Database/fill_data_gap.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
logger.info(
f"Dropping any records of {e} in {name} for impact {impact}. Shape before: {level[impact].shape}"
)
level[impact] = level[impact][~(level[impact]["Event_ID"] == e)]
level[impact] = level[impact][~(level[impact][dg_utils.event_id] == e)]
logger.info(
f"Dropped any records of {e} in {name} for impact {impact}. Shape after: {level[impact].shape}"
)
Expand Down Expand Up @@ -322,6 +322,46 @@
for impact in level.keys():
level[impact].replace(float("nan"), None, inplace=True)

null_mask_total = l1[[x for x in l1.columns if "Total_" in x]].isnull().all(axis=1)
e_ids_missing_l1_impacts: list[str] = list(l1[null_mask_total].Event_ID.unique())
missing_event_ids_to_drop: list[str] = []
logger.info(f"Found {len(e_ids_missing_l1_impacts)} Event IDs without impacts")
for e_id in e_ids_missing_l1_impacts:
drop_l2, drop_l3 = True, True
for impact in l2.keys():
null_mask_l2 = l2[impact][[dg_utils.num_min, dg_utils.num_max]].isnull().all(axis=1)
l2_df = l2[impact][(~null_mask_l2) & (l2[impact][dg_utils.event_id] == e_id)]
if not l2_df.empty:
logger.warning(f"L2 {e_id} contains impacts not propagated to L1!\n{l2_df}")
drop_l2 = False
del l2_df

null_mask_l3 = l3[impact][[dg_utils.num_min, dg_utils.num_max]].isnull().all(axis=1)
l3_df = l3[impact][(~null_mask_l3) & (l3[impact][dg_utils.event_id] == e_id)]
if not l3_df.empty:
logger.warning(f"L3 {e_id} contains impacts not propagated to L1!\n{l3_df}")
drop_l3 = False
del l3_df

if drop_l3 and drop_l2:
missing_event_ids_to_drop.append(e_id)

missing_event_ids_to_drop = list(set(missing_event_ids_to_drop))

logger.warning(
f"Dropping the following {len(missing_event_ids_to_drop)} IDs from L1/L2/L3: {missing_event_ids_to_drop}"
)
l1 = l1[~l1[dg_utils.event_id].isin(missing_event_ids_to_drop)]
for level in [l2, l3]:
for impact in level.keys():
for e in missing_event_ids_to_drop:
level[impact] = level[impact][~(level[impact][dg_utils.event_id] == e)]

# Replace NaNs will NoneType
for level in [l2, l3]:
for impact in level.keys():
level[impact].replace(float("nan"), None, inplace=True)

logger.info(f"Storing results in {args.output_dir}")
pathlib.Path(args.output_dir).mkdir(parents=True, exist_ok=True)

Expand Down
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Loading

0 comments on commit 3ddd749

Please sign in to comment.