From 266bc82dfcc3d3c2f86b20aa8ee126fab28a5f00 Mon Sep 17 00:00:00 2001 From: Lu Peng <61207760+lupengamzn@users.noreply.github.com> Date: Mon, 22 Mar 2021 16:03:24 -0700 Subject: [PATCH] Replace jsonpickle with json to serialize entity (#275) * Replace jsonpickle with json to serialize entity * Added workflow to create release tag * Pinned sqlalchemy and Flask-SQLAlchemy for unit test * Fixed version * Changed log to debug level * Update logging * Added empty line --- .github/workflows/Release.yaml | 27 ++ aws_xray_sdk/core/models/entity.py | 57 +-- aws_xray_sdk/core/models/segment.py | 22 +- aws_xray_sdk/core/models/subsegment.py | 19 +- aws_xray_sdk/core/models/throwable.py | 21 +- aws_xray_sdk/core/utils/conversion.py | 35 ++ setup.py | 1 - tests/test_serialize_entities.py | 508 +++++++++++++++++++++++++ tests/util.py | 7 +- tox.ini | 4 +- 10 files changed, 640 insertions(+), 61 deletions(-) create mode 100644 .github/workflows/Release.yaml create mode 100644 aws_xray_sdk/core/utils/conversion.py create mode 100644 tests/test_serialize_entities.py diff --git a/.github/workflows/Release.yaml b/.github/workflows/Release.yaml new file mode 100644 index 00000000..8070a7f0 --- /dev/null +++ b/.github/workflows/Release.yaml @@ -0,0 +1,27 @@ +name: Release X-Ray Python SDK + +on: + workflow_dispatch: + inputs: + version: + description: The version to tag the release with, e.g., 1.2.0, 1.3.0 + required: true + +jobs: + release: + runs-on: ubuntu-latest + steps: + - name: Checkout master branch + uses: actions/checkout@v2 + + - name: Create Release + id: create_release + uses: actions/create-release@v1 + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + with: + tag_name: '${{ github.event.inputs.version }}' + release_name: '${{ github.event.inputs.version }} Release' + body: 'See details in [CHANGELOG](https://github.com/aws/aws-xray-sdk-python/blob/master/CHANGELOG.rst)' + draft: true + prerelease: false diff --git a/aws_xray_sdk/core/models/entity.py b/aws_xray_sdk/core/models/entity.py index 07641062..293f220b 100644 --- a/aws_xray_sdk/core/models/entity.py +++ b/aws_xray_sdk/core/models/entity.py @@ -4,9 +4,10 @@ import time import string -import jsonpickle +import json from ..utils.compat import annotation_value_types, string_types +from ..utils.conversion import metadata_to_dict from .throwable import Throwable from . import http from ..exceptions.exceptions import AlreadyEndedException @@ -256,36 +257,42 @@ def get_origin_trace_header(self): def serialize(self): """ Serialize to JSON document that can be accepted by the - X-Ray backend service. It uses jsonpickle to perform - serialization. + X-Ray backend service. It uses json to perform serialization. """ try: - return jsonpickle.encode(self, unpicklable=False) + return json.dumps(self.to_dict(), default=str) except Exception: - log.exception("got an exception during serialization") + log.exception("Failed to serialize %s", self.name) - def _delete_empty_properties(self, properties): + def to_dict(self): """ - Delete empty properties before serialization to avoid - extra keys with empty values in the output json. + Convert Entity(Segment/Subsegment) object to dict + with required properties that have non-empty values. """ - if not self.parent_id: - del properties['parent_id'] - if not self.subsegments: - del properties['subsegments'] - if not self.aws: - del properties['aws'] - if not self.http: - del properties['http'] - if not self.cause: - del properties['cause'] - if not self.annotations: - del properties['annotations'] - if not self.metadata: - del properties['metadata'] - properties.pop(ORIGIN_TRACE_HEADER_ATTR_KEY, None) - - del properties['sampled'] + entity_dict = {} + + for key, value in vars(self).items(): + if isinstance(value, bool) or value: + if key == 'subsegments': + # child subsegments are stored as List + subsegments = [] + for subsegment in value: + subsegments.append(subsegment.to_dict()) + entity_dict[key] = subsegments + elif key == 'cause': + entity_dict[key] = {} + entity_dict[key]['working_directory'] = self.cause['working_directory'] + # exceptions are stored as List + throwables = [] + for throwable in value['exceptions']: + throwables.append(throwable.to_dict()) + entity_dict[key]['exceptions'] = throwables + elif key == 'metadata': + entity_dict[key] = metadata_to_dict(value) + elif key != 'sampled' and key != ORIGIN_TRACE_HEADER_ATTR_KEY: + entity_dict[key] = value + + return entity_dict def _check_ended(self): if not self.in_progress: diff --git a/aws_xray_sdk/core/models/segment.py b/aws_xray_sdk/core/models/segment.py index b07d952c..ce6ca8b7 100644 --- a/aws_xray_sdk/core/models/segment.py +++ b/aws_xray_sdk/core/models/segment.py @@ -155,14 +155,14 @@ def set_rule_name(self, rule_name): self.aws['xray'] = {} self.aws['xray']['sampling_rule_name'] = rule_name - def __getstate__(self): - """ - Used by jsonpikle to remove unwanted fields. - """ - properties = copy.copy(self.__dict__) - super(Segment, self)._delete_empty_properties(properties) - if not self.user: - del properties['user'] - del properties['ref_counter'] - del properties['_subsegments_counter'] - return properties + def to_dict(self): + """ + Convert Segment object to dict with required properties + that have non-empty values. + """ + segment_dict = super(Segment, self).to_dict() + + del segment_dict['ref_counter'] + del segment_dict['_subsegments_counter'] + + return segment_dict diff --git a/aws_xray_sdk/core/models/subsegment.py b/aws_xray_sdk/core/models/subsegment.py index 801bc8b7..6737d18d 100644 --- a/aws_xray_sdk/core/models/subsegment.py +++ b/aws_xray_sdk/core/models/subsegment.py @@ -149,12 +149,13 @@ def set_sql(self, sql): """ self.sql = sql - def __getstate__(self): - - properties = copy.copy(self.__dict__) - super(Subsegment, self)._delete_empty_properties(properties) - - del properties['parent_segment'] - if not self.sql: - del properties['sql'] - return properties + def to_dict(self): + """ + Convert Subsegment object to dict with required properties + that have non-empty values. + """ + subsegment_dict = super(Subsegment, self).to_dict() + + del subsegment_dict['parent_segment'] + + return subsegment_dict diff --git a/aws_xray_sdk/core/models/throwable.py b/aws_xray_sdk/core/models/throwable.py index 5081a4fb..43f76b7c 100644 --- a/aws_xray_sdk/core/models/throwable.py +++ b/aws_xray_sdk/core/models/throwable.py @@ -46,6 +46,19 @@ def __init__(self, exception, stack, remote=False): if exception: setattr(exception, '_recorded', True) setattr(exception, '_cause_id', self.id) + + def to_dict(self): + """ + Convert Throwable object to dict with required properties that + have non-empty values. + """ + throwable_dict = {} + + for key, value in vars(self).items(): + if isinstance(value, bool) or value: + throwable_dict[key] = value + + return throwable_dict def _normalize_stack_trace(self, stack): if stack is None: @@ -66,11 +79,3 @@ def _normalize_stack_trace(self, stack): normalized['label'] = label.strip() self.stack.append(normalized) - - def __getstate__(self): - properties = copy.copy(self.__dict__) - - if not self.stack: - del properties['stack'] - - return properties diff --git a/aws_xray_sdk/core/utils/conversion.py b/aws_xray_sdk/core/utils/conversion.py new file mode 100644 index 00000000..0d40d463 --- /dev/null +++ b/aws_xray_sdk/core/utils/conversion.py @@ -0,0 +1,35 @@ +import logging + +log = logging.getLogger(__name__) + +def metadata_to_dict(obj): + """ + Convert object to dict with all serializable properties like: + dict, list, set, tuple, str, bool, int, float, type, object, etc. + """ + try: + if isinstance(obj, dict): + metadata = {} + for key, value in obj.items(): + metadata[key] = metadata_to_dict(value) + return metadata + elif isinstance(obj, type): + return str(obj) + elif hasattr(obj, "_ast"): + return metadata_to_dict(obj._ast()) + elif hasattr(obj, "__iter__") and not isinstance(obj, str): + metadata = [] + for item in obj: + metadata.append(metadata_to_dict(item)) + return metadata + elif hasattr(obj, "__dict__"): + metadata = {} + for key, value in vars(obj).items(): + if not callable(value) and not key.startswith('_'): + metadata[key] = metadata_to_dict(value) + return metadata + else: + return obj + except Exception: + log.exception("Failed to convert {} to dict".format(str(obj))) + return {} diff --git a/setup.py b/setup.py index c6b386ff..6468d9fc 100644 --- a/setup.py +++ b/setup.py @@ -44,7 +44,6 @@ ], install_requires=[ - 'jsonpickle', 'enum34;python_version<"3.4"', 'wrapt', 'future', diff --git a/tests/test_serialize_entities.py b/tests/test_serialize_entities.py new file mode 100644 index 00000000..da5d50a2 --- /dev/null +++ b/tests/test_serialize_entities.py @@ -0,0 +1,508 @@ +import ast +import datetime +import json +import platform +import pytest + +from aws_xray_sdk.version import VERSION +from aws_xray_sdk.core.models import http +from aws_xray_sdk.core.models.segment import Segment +from aws_xray_sdk.core.models.subsegment import Subsegment + +from .util import entity_to_dict + +def test_serialize_segment(): + + segment = Segment('test') + segment.close() + + expected_segment_dict = { + "name": "test", + "start_time": segment.start_time, + "trace_id": segment.trace_id, + "end_time": segment.end_time, + "in_progress": False, + "id": segment.id + } + + actual_segment_dict = entity_to_dict(segment) + + assert expected_segment_dict == actual_segment_dict + +def test_serialize_segment_with_aws(): + + segment = Segment('test') + + XRAY_META = { + 'xray': { + 'sdk': 'X-Ray for Python', + 'sdk_version': VERSION + } + } + + segment.set_aws(XRAY_META) + + segment.close() + + expected_segment_dict = { + "name": "test", + "start_time": segment.start_time, + "trace_id": segment.trace_id, + "end_time": segment.end_time, + "in_progress": False, + "aws": { + "xray": { + "sdk": "X-Ray for Python", + "sdk_version": VERSION + } + }, + "id": segment.id + } + + actual_segment_dict = entity_to_dict(segment) + + assert expected_segment_dict == actual_segment_dict + +def test_serialize_segment_with_services(): + + segment = Segment('test') + + SERVICE_INFO = { + 'runtime': platform.python_implementation(), + 'runtime_version': platform.python_version() + } + + segment.set_service(SERVICE_INFO) + + segment.close() + + expected_segment_dict = { + "name": "test", + "start_time": segment.start_time, + "trace_id": segment.trace_id, + "end_time": segment.end_time, + "in_progress": False, + "service": { + "runtime": segment.service['runtime'], + "runtime_version": segment.service['runtime_version'] + }, + "id": segment.id + } + + actual_segment_dict = entity_to_dict(segment) + + assert expected_segment_dict == actual_segment_dict + +def test_serialize_segment_with_annotation(): + + segment = Segment('test') + + segment.put_annotation('key', 'value') + + segment.close() + + expected_segment_dict = { + "id": segment.id, + "name": "test", + "start_time": segment.start_time, + "in_progress": False, + "annotations": { + "key": "value" + }, + "trace_id": segment.trace_id, + "end_time": segment.end_time + } + + actual_segment_dict = entity_to_dict(segment) + + assert expected_segment_dict == actual_segment_dict + +def test_serialize_segment_with_metadata(): + + class TestMetadata(): + def __init__(self, parameter_one, parameter_two): + self.parameter_one = parameter_one + self.parameter_two = parameter_two + + self.parameter_three = {'test'} #set + self.parameter_four = {'a': [1, 2, 3], 'b': True, 'c': (1.1, 2.2), 'd': list} #dict + self.parameter_five = [TestSubMetadata(datetime.time(9, 25, 31)), TestSubMetadata(datetime.time(23, 14, 6))] #list + + class TestSubMetadata(): + def __init__(self, time): + self.time = time + + segment = Segment('test') + + segment.put_metadata('key_one', TestMetadata(1,2), 'namespace_one') + segment.put_metadata('key_two', TestMetadata(3,4), 'namespace_two') + + segment.close() + + expected_segment_dict = { + "id": segment.id, + "name": "test", + "start_time": segment.start_time, + "in_progress": False, + "metadata": { + "namespace_one": { + "key_one": { + "parameter_one": 1, + "parameter_two": 2, + "parameter_three": [ + "test" + ], + "parameter_four": { + "a": [ + 1, + 2, + 3 + ], + "b": True, + "c": [ + 1.1, + 2.2 + ], + "d": str(list) + }, + "parameter_five": [ + { + "time": "09:25:31" + }, + { + "time": "23:14:06" + } + ] + } + }, + "namespace_two": { + "key_two": { + "parameter_one": 3, + "parameter_two": 4, + "parameter_three": [ + "test" + ], + "parameter_four": { + "a": [ + 1, + 2, + 3 + ], + "b": True, + "c": [ + 1.1, + 2.2 + ], + "d": str(list) + }, + "parameter_five": [ + { + "time": "09:25:31" + }, + { + "time": "23:14:06" + } + ] + } + } + }, + "trace_id": segment.trace_id, + "end_time": segment.end_time + } + + actual_segment_dict = entity_to_dict(segment) + + assert expected_segment_dict == actual_segment_dict + +def test_serialize_segment_with_http(): + + segment = Segment('test') + + segment.put_http_meta(http.URL, 'https://aws.amazon.com') + segment.put_http_meta(http.METHOD, 'get') + segment.put_http_meta(http.USER_AGENT, 'test') + segment.put_http_meta(http.CLIENT_IP, '127.0.0.1') + segment.put_http_meta(http.X_FORWARDED_FOR, True) + segment.put_http_meta(http.STATUS, 200) + segment.put_http_meta(http.CONTENT_LENGTH, 0) + + segment.close() + + expected_segment_dict = { + "id": segment.id, + "name": "test", + "start_time": segment.start_time, + "in_progress": False, + "http": { + "request": { + "url": "https://aws.amazon.com", + "method": "get", + "user_agent": "test", + "client_ip": "127.0.0.1", + "x_forwarded_for": True + }, + "response": { + "status": 200, + "content_length": 0 + } + }, + "trace_id": segment.trace_id, + "end_time": segment.end_time + } + + actual_segment_dict = entity_to_dict(segment) + + assert expected_segment_dict == actual_segment_dict + +def test_serialize_segment_with_exception(): + + class TestException(Exception): + def __init__(self, message): + super(TestException, self).__init__(message) + + segment = Segment('test') + + stack_one = [ + ('/path/to/test.py', 10, 'module', 'another_function()'), + ('/path/to/test.py', 3, 'another_function', 'wrong syntax') + ] + + stack_two = [ + ('/path/to/test.py', 11, 'module', 'another_function()'), + ('/path/to/test.py', 4, 'another_function', 'wrong syntax') + ] + + exception_one = TestException('test message one') + exception_two = TestException('test message two') + + segment.add_exception(exception_one, stack_one, True) + segment.add_exception(exception_two, stack_two, False) + + segment.close() + + expected_segment_dict = { + "id": segment.id, + "name": "test", + "start_time": segment.start_time, + "in_progress": False, + "cause": { + "working_directory": segment.cause['working_directory'], + "exceptions": [ + { + "id": exception_one._cause_id, + "message": "test message one", + "type": "TestException", + "remote": True, + "stack": [ + { + "path": "test.py", + "line": 10, + "label": "module" + }, + { + "path": "test.py", + "line": 3, + "label": "another_function" + } + ] + }, + { + "id": exception_two._cause_id, + "message": "test message two", + "type": "TestException", + "remote": False, + "stack": [ + { + "path": "test.py", + "line": 11, + "label": "module" + }, + { + "path": "test.py", + "line": 4, + "label": "another_function" + } + ] + } + ] + }, + "trace_id": segment.trace_id, + "fault": True, + "end_time": segment.end_time + } + + actual_segment_dict = entity_to_dict(segment) + + assert expected_segment_dict == actual_segment_dict + +def test_serialize_subsegment(): + + segment = Segment('test') + subsegment = Subsegment('test', 'local', segment) + + subsegment.close() + segment.close() + + expected_subsegment_dict = { + "id": subsegment.id, + "name": "test", + "start_time": subsegment.start_time, + "in_progress": False, + "trace_id": subsegment.trace_id, + "type": "subsegment", + "namespace": "local", + "end_time": subsegment.end_time + } + + actual_subsegment_dict = entity_to_dict(subsegment) + + assert expected_subsegment_dict == actual_subsegment_dict + +def test_serialize_subsegment_with_http(): + + segment = Segment('test') + subsegment = Subsegment('test', 'remote', segment) + + subsegment.put_http_meta(http.URL, 'https://aws.amazon.com') + subsegment.put_http_meta(http.METHOD, 'get') + + subsegment.put_http_meta(http.STATUS, 200) + subsegment.put_http_meta(http.CONTENT_LENGTH, 0) + + subsegment.close() + segment.close() + + expected_subsegment_dict = { + "id": subsegment.id, + "name": "test", + "start_time": subsegment.start_time, + "in_progress": False, + "http": { + "request": { + "url": "https://aws.amazon.com", + "method": "get" + }, + "response": { + "status": 200, + "content_length": 0 + } + }, + "trace_id": subsegment.trace_id, + "type": "subsegment", + "namespace": "remote", + "end_time": subsegment.end_time + } + + actual_subsegment_dict = entity_to_dict(subsegment) + + assert expected_subsegment_dict == actual_subsegment_dict + +def test_serialize_subsegment_with_sql(): + + segment = Segment('test') + subsegment = Subsegment('test', 'remote', segment) + + sql = { + "url": "jdbc:postgresql://aawijb5u25wdoy.cpamxznpdoq8.us-west-2.rds.amazonaws.com:5432/ebdb", + "preparation": "statement", + "database_type": "PostgreSQL", + "database_version": "9.5.4", + "driver_version": "PostgreSQL 9.4.1211.jre7", + "user" : "dbuser", + "sanitized_query" : "SELECT * FROM customers WHERE customer_id=?;" + } + + subsegment.set_sql(sql) + + subsegment.close() + segment.close() + + expected_subsegment_dict = { + "id": subsegment.id, + "name": "test", + "start_time": subsegment.start_time, + "in_progress": False, + "trace_id": subsegment.trace_id, + "type": "subsegment", + "namespace": "remote", + "sql": { + "url": "jdbc:postgresql://aawijb5u25wdoy.cpamxznpdoq8.us-west-2.rds.amazonaws.com:5432/ebdb", + "preparation": "statement", + "database_type": "PostgreSQL", + "database_version": "9.5.4", + "driver_version": "PostgreSQL 9.4.1211.jre7", + "user": "dbuser", + "sanitized_query": "SELECT * FROM customers WHERE customer_id=?;" + }, + "end_time": subsegment.end_time + } + + actual_subsegment_dict = entity_to_dict(subsegment) + + assert expected_subsegment_dict == actual_subsegment_dict + +def test_serialize_subsegment_with_aws(): + + segment = Segment('test') + subsegment = Subsegment('test', 'aws', segment) + + aws = { + "bucket_name": "testbucket", + "region": "us-east-1", + "operation": "GetObject", + "request_id": "0000000000000000", + "key": "123", + "resource_names": [ + "testbucket" + ] + } + + subsegment.set_aws(aws) + + subsegment.close() + segment.close() + + expected_subsegment_dict = { + "id": subsegment.id, + "name": "test", + "start_time": subsegment.start_time, + "in_progress": False, + "aws": { + "bucket_name": "testbucket", + "region": "us-east-1", + "operation": "GetObject", + "request_id": "0000000000000000", + "key": "123", + "resource_names": [ + "testbucket" + ] + }, + "trace_id": subsegment.trace_id, + "type": "subsegment", + "namespace": "aws", + "end_time": subsegment.end_time + } + + actual_subsegment_dict = entity_to_dict(subsegment) + + assert expected_subsegment_dict == actual_subsegment_dict + +def test_serialize_with_ast_metadata(): + + class_string = """\ +class A: + def __init__(self, a): + self.a = a +""" + + ast_obj = ast.parse(class_string) + + segment = Segment('test') + + segment.put_metadata('ast', ast_obj) + + segment.close() + + actual_segment_dict = entity_to_dict(segment) + + assert 'ast' in actual_segment_dict['metadata']['default'] diff --git a/tests/util.py b/tests/util.py index e3c7762d..074e712d 100644 --- a/tests/util.py +++ b/tests/util.py @@ -1,6 +1,5 @@ import json import threading -import jsonpickle from aws_xray_sdk.core.recorder import AWSXRayRecorder from aws_xray_sdk.core.emitters.udp_emitter import UDPEmitter @@ -71,8 +70,7 @@ def _search_entity(entity, name): def find_subsegment(segment, name): """Helper function to find a subsegment by name in the entity tree""" - segment = jsonpickle.encode(segment, unpicklable=False) - segment = json.loads(segment) + segment = entity_to_dict(segment) for entity in segment['subsegments']: result = _search_entity(entity, name) if result is not None: @@ -82,8 +80,7 @@ def find_subsegment(segment, name): def find_subsegment_by_annotation(segment, key, value): """Helper function to find a subsegment by annoation key & value in the entity tree""" - segment = jsonpickle.encode(segment, unpicklable=False) - segment = json.loads(segment) + segment = entity_to_dict(segment) for entity in segment['subsegments']: result = _search_entity_by_annotation(entity, key, value) if result is not None: diff --git a/tox.ini b/tox.ini index 61b5270a..c916610b 100644 --- a/tox.ini +++ b/tox.ini @@ -18,8 +18,8 @@ deps = requests bottle >= 0.10 flask >= 0.10 - sqlalchemy - Flask-SQLAlchemy + sqlalchemy==1.3.* + Flask-SQLAlchemy==2.4.* future django22: Django==2.2.* django30: Django==3.0.*