Skip to content

Commit

Permalink
Fix the way we replace in the dataframe and convert to numeric
Browse files Browse the repository at this point in the history
  • Loading branch information
eldraco committed Aug 5, 2024
1 parent 9f00ae7 commit d0127a3
Showing 1 changed file with 18 additions and 6 deletions.
24 changes: 18 additions & 6 deletions zeek_anomaly_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,19 +54,31 @@ def detect(file, amountanom, dumptocsv):
# is better than not using the lines.
# Also fill the no values with 0
# Finally put a type to each column
bro_df['orig_bytes'].replace('-', '0', inplace=True)
bro_df.replace({'orig_bytes': '-'}, '0', inplace=True)
bro_df['orig_bytes'] = pd.to_numeric(bro_df['orig_bytes'], errors='coerce')
bro_df['orig_bytes'] = bro_df['orig_bytes'].fillna(0).astype('int32')
bro_df['resp_bytes'].replace('-', '0', inplace=True)

bro_df.replace({'resp_bytes': '-'}, '0', inplace=True)
bro_df['resp_bytes'] = pd.to_numeric(bro_df['resp_bytes'], errors='coerce')
bro_df['resp_bytes'] = bro_df['resp_bytes'].fillna(0).astype('int32')
bro_df['resp_pkts'].replace('-', '0', inplace=True)

bro_df.replace({'resp_pkts': '-'}, '0', inplace=True)
bro_df['resp_pkts'] = pd.to_numeric(bro_df['resp_pkts'], errors='coerce')
bro_df['resp_pkts'] = bro_df['resp_pkts'].fillna(0).astype('int32')
bro_df['orig_ip_bytes'].replace('-', '0', inplace=True)

bro_df.replace({'orig_ip_bytes': '-'}, '0', inplace=True)
bro_df['orig_ip_bytes'] = pd.to_numeric(bro_df['orig_ip_bytes'], errors='coerce')
bro_df['orig_ip_bytes'] = bro_df['orig_ip_bytes'].fillna(0).astype('int32')
bro_df['resp_ip_bytes'].replace('-', '0', inplace=True)

bro_df.replace({'resp_ip_bytes': '-'}, '0', inplace=True)
bro_df['resp_ip_bytes'] = pd.to_numeric(bro_df['resp_ip_bytes'], errors='coerce')
bro_df['resp_ip_bytes'] = bro_df['resp_ip_bytes'].fillna(0).astype('int32')
bro_df['duration'].replace('-', '0', inplace=True)

bro_df.replace({'duration': '-'}, '0', inplace=True)
bro_df['duration'] = pd.to_numeric(bro_df['duration'], errors='coerce')
bro_df['duration'] = bro_df['duration'].fillna(0).astype('float64')


# Save dataframe to disk as CSV
if dumptocsv != "None":
bro_df.to_csv(dumptocsv)
Expand Down

0 comments on commit d0127a3

Please sign in to comment.