-
Notifications
You must be signed in to change notification settings - Fork 30
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #274 from mikaylathompson/human-readable-tuples
Add script to transform tuples to human readable format
- Loading branch information
Showing
3 changed files
with
187 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
146 changes: 146 additions & 0 deletions
146
TrafficCapture/dockerSolution/src/main/docker/migrationConsole/humanReadableLogs.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,146 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import argparse | ||
import base64 | ||
import gzip | ||
import json | ||
import pathlib | ||
from typing import Optional | ||
import logging | ||
|
||
from tqdm import tqdm | ||
from tqdm.contrib.logging import logging_redirect_tqdm | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
LOG_JSON_TUPLE_FIELD = "message" | ||
BASE64_ENCODED_TUPLE_PATHS = ["request.body", "primaryResponse.body", "shadowResponse.body"] | ||
# TODO: I'm not positive about the capitalization of the Content-Encoding and Content-Type headers. | ||
# This version worked on my test cases, but not guaranteed to work in all cases. | ||
CONTENT_ENCODING_PATH = { | ||
BASE64_ENCODED_TUPLE_PATHS[0]: "request.content-encoding", | ||
BASE64_ENCODED_TUPLE_PATHS[1]: "primaryResponse.content-encoding", | ||
BASE64_ENCODED_TUPLE_PATHS[2]: "shadowResponse.content-encoding" | ||
} | ||
CONTENT_TYPE_PATH = { | ||
BASE64_ENCODED_TUPLE_PATHS[0]: "request.content-type", | ||
BASE64_ENCODED_TUPLE_PATHS[1]: "primaryResponse.content-type", | ||
BASE64_ENCODED_TUPLE_PATHS[2]: "shadowResponse.content-type" | ||
} | ||
CONTENT_TYPE_JSON = "application/json" | ||
CONTENT_ENCODING_GZIP = "gzip" | ||
URI_PATH = "request.Request-URI" | ||
BULK_URI_PATH = "_bulk" | ||
|
||
|
||
class DictionaryPathException(Exception): | ||
pass | ||
|
||
|
||
def get_element(element: str, dict_: dict, raise_on_error=False) -> Optional[any]: | ||
keys = element.split('.') | ||
rv = dict_ | ||
for key in keys: | ||
try: | ||
rv = rv[key] | ||
except KeyError: | ||
if raise_on_error: | ||
raise DictionaryPathException(f"Key {key} was not present.") | ||
else: | ||
return None | ||
return rv | ||
|
||
|
||
def set_element(element: str, dict_: dict, value: any) -> None: | ||
keys = element.split('.') | ||
rv = dict_ | ||
for key in keys[:-1]: | ||
rv = rv[key] | ||
rv[keys[-1]] = value | ||
|
||
|
||
def parse_args(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("infile", type=pathlib.Path, help="Path to input logged tuple file.") | ||
parser.add_argument("--outfile", type=pathlib.Path, help="Path for output human readable tuple file.") | ||
return parser.parse_args() | ||
|
||
|
||
def parse_body_value(raw_value: str, content_encoding: Optional[str], | ||
content_type: Optional[str], is_bulk: bool, line_no: int): | ||
try: | ||
b64decoded = base64.b64decode(raw_value) | ||
except Exception as e: | ||
logger.error(f"Body value on line {line_no} could not be decoded: {e}. Skipping parsing body value.") | ||
return None | ||
is_gzipped = content_encoding is not None and content_encoding == CONTENT_ENCODING_GZIP | ||
is_json = content_type is not None and CONTENT_TYPE_JSON in content_type | ||
if is_gzipped: | ||
try: | ||
unzipped = gzip.decompress(b64decoded) | ||
except Exception as e: | ||
logger.error(f"Body value on line {line_no} should be gzipped but could not be unzipped: {e}. " | ||
"Skipping parsing body value.") | ||
return b64decoded | ||
else: | ||
unzipped = b64decoded | ||
try: | ||
decoded = unzipped.decode("utf-8") | ||
except Exception as e: | ||
logger.error(f"Body value on line {line_no} could not be decoded to utf-8: {e}. " | ||
"Skipping parsing body value.") | ||
return unzipped | ||
if is_json and len(decoded) > 0: | ||
if is_bulk: | ||
try: | ||
return [json.loads(line) for line in decoded.splitlines()] | ||
except Exception as e: | ||
logger.error("Body value on line {line_no} should be a bulk json (list of json lines) but " | ||
f"could not be parsed: {e}. Skipping parsing body value.") | ||
return decoded | ||
try: | ||
return json.loads(decoded) | ||
except Exception as e: | ||
logger.error(f"Body value on line {line_no} should be a json but could not be parsed: {e}. " | ||
"Skipping parsing body value.") | ||
return decoded | ||
return decoded | ||
|
||
|
||
def parse_tuple(line: str, line_no: int) -> dict: | ||
item = json.loads(line) | ||
message = item[LOG_JSON_TUPLE_FIELD] | ||
tuple = json.loads(message) | ||
try: | ||
is_bulk_path = BULK_URI_PATH in get_element(URI_PATH, tuple, raise_on_error=True) | ||
except DictionaryPathException as e: | ||
logger.error(f"`{URI_PATH}` on line {line_no} could not be loaded: {e} " | ||
f"Skipping parsing tuple.") | ||
return tuple | ||
for body_path in BASE64_ENCODED_TUPLE_PATHS: | ||
base64value = get_element(body_path, tuple) | ||
if base64value is None: | ||
# This component has no body element, which is potentially valid. | ||
continue | ||
content_encoding = get_element(CONTENT_ENCODING_PATH[body_path], tuple) | ||
content_type = get_element(CONTENT_TYPE_PATH[body_path], tuple) | ||
value = parse_body_value(base64value, content_encoding, content_type, is_bulk_path, line_no) | ||
if value: | ||
set_element(body_path, tuple, value) | ||
return tuple | ||
|
||
|
||
if __name__ == "__main__": | ||
args = parse_args() | ||
if args.outfile: | ||
outfile = args.outfile | ||
else: | ||
outfile = args.infile.parent / f"readable-{args.infile.name}" | ||
print(f"Input file: {args.infile}; Output file: {outfile}") | ||
|
||
logging.basicConfig(level=logging.INFO) | ||
with logging_redirect_tqdm(): | ||
with open(args.infile, 'r') as in_f: | ||
with open(outfile, 'w') as out_f: | ||
for i, line in tqdm(enumerate(in_f)): | ||
print(json.dumps(parse_tuple(line, i + 1)), file=out_f) |