From e9914bc561c473757c86dcdebd1b2cc8c4acec42 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Mon, 27 Feb 2023 17:23:52 -0800 Subject: [PATCH 1/6] Adds JSON/CBOR support and an Io-type option to benchmark different type of sources. --- amazon/ionbenchmark/Command.py | 7 + amazon/ionbenchmark/Format.py | 31 ++ amazon/ionbenchmark/Io_type.py | 8 + amazon/ionbenchmark/ion_benchmark_cli.py | 545 +++++++++++++++++------ requirements.txt | 3 +- tests/test_benchmark_cli.py | 79 +++- 6 files changed, 525 insertions(+), 148 deletions(-) create mode 100644 amazon/ionbenchmark/Command.py create mode 100644 amazon/ionbenchmark/Io_type.py diff --git a/amazon/ionbenchmark/Command.py b/amazon/ionbenchmark/Command.py new file mode 100644 index 000000000..67686491f --- /dev/null +++ b/amazon/ionbenchmark/Command.py @@ -0,0 +1,7 @@ +from enum import Enum + + +class Command(Enum): + """Enumeration of the Command.""" + READ = 'read' + WRITE = 'write' diff --git a/amazon/ionbenchmark/Format.py b/amazon/ionbenchmark/Format.py index 708e8a7f9..8f93bfcc1 100644 --- a/amazon/ionbenchmark/Format.py +++ b/amazon/ionbenchmark/Format.py @@ -1,8 +1,39 @@ from enum import Enum +def format_is_ion(format_option): + return (format_option == Format.ION_BINARY.value) or (format_option == Format.ION_TEXT.value) + + +def format_is_json(format_option): + return (format_option == Format.JSON.value) or (format_option == Format.SIMPLEJSON.value) \ + or (format_option == Format.UJSON.value) or (format_option == Format.RAPIDJSON.value) or \ + (format_option == Format.ORJSON.value) + + +def format_is_cbor(format_option): + return (format_option == Format.CBOR.value) or (format_option == Format.CBOR2.value) + + +def format_is_binary(format_option): + return format_is_cbor(format_option) or (format_option == Format.ION_BINARY.value) \ + or (format_option == Format.ORJSON.value) + + +def rewrite_file_to_format(file, format_option): + return file + + class Format(Enum): """Enumeration of the formats.""" ION_TEXT = 'ion_text' ION_BINARY = 'ion_binary' + JSON = 'json' + SIMPLEJSON = 'simplejson' + UJSON = 'ujson' + RAPIDJSON = 'rapidjson' + ORJSON = 'orjson' + CBOR = 'cbor' + CBOR2 = 'cbor2' DEFAULT = 'ion_binary' + diff --git a/amazon/ionbenchmark/Io_type.py b/amazon/ionbenchmark/Io_type.py new file mode 100644 index 000000000..53df79a01 --- /dev/null +++ b/amazon/ionbenchmark/Io_type.py @@ -0,0 +1,8 @@ +from enum import Enum + + +class Io_type(Enum): + """Enumeration of the IO types.""" + FILE = 'file' + BUFFER = 'buffer' + DEFAULT = 'file' diff --git a/amazon/ionbenchmark/ion_benchmark_cli.py b/amazon/ionbenchmark/ion_benchmark_cli.py index e061a3580..44411ad9b 100644 --- a/amazon/ionbenchmark/ion_benchmark_cli.py +++ b/amazon/ionbenchmark/ion_benchmark_cli.py @@ -15,9 +15,9 @@ Usage: ion_python_benchmark_cli.py write [--api ]... [--warmups ] [--c-extension ] [--iterations ] - [--format ]... + [--format ]... [--io-type ]... ion_python_benchmark_cli.py read [--api ]... [--iterator ] [--warmups ] [--iterations ] - [--c-extension ] [--format ]... + [--c-extension ] [--format ]... [--io-type ]... ion_python_benchmark_cli.py (-h | --help) ion_python_benchmark_cli.py (-v | --version) @@ -55,6 +55,10 @@ error will be raised if this option is used when multiple values are specified for other options. Not enabled by default. + -i --io-type The source or destination type, from the set (buffer | file). If buffer is + selected, buffers the input data in memory before reading and writes the output + data to an in-memory buffer instead of a file. [default: file] + -u --time-unit (NOT SUPPORTED YET) -o --results-file (NOT SUPPORTED YET) -I --ion-imports-for-input (NOT SUPPORTED YET) @@ -62,17 +66,29 @@ """ import itertools +import json +import os import timeit from pathlib import Path import platform +import cbor2 +import orjson +import rapidjson +import simplejson +import ujson +from cbor import cbor + import amazon.ion.simpleion as ion from docopt import docopt from tabulate import tabulate from amazon.ionbenchmark.API import API -from amazon.ionbenchmark.Format import Format +from amazon.ionbenchmark.Command import Command +from amazon.ionbenchmark.Format import Format, format_is_ion, format_is_json, format_is_cbor, rewrite_file_to_format, \ + format_is_binary from amazon.ionbenchmark.util import str_to_bool, format_percentage, format_decimal, TOOL_VERSION +from amazon.ionbenchmark.Io_type import Io_type # Relate pypy incompatible issue - https://github.com/amazon-ion/ion-python/issues/227 pypy = platform.python_implementation() == 'PyPy' @@ -84,52 +100,169 @@ write_memory_usage_peak = 0 read_memory_usage_peak = 0 +JSON_PRIMARY_BASELINE = Format.JSON +CBOR_PRIMARY_BASELINE = Format.CBOR2 + +output_file = 'dump_output' -# Generates benchmark code for simpleion load API -def generate_simpleion_load_test_code(file, memory_profiling, iterator=False, single_value=False, + +# Generates benchmark code for simpleion load/loads APIs +def generate_simpleion_read_test_code(file, memory_profiling, io_type, iterator=False, single_value=False, emit_bare_values=False): - if not memory_profiling: - if not iterator: - def test_func(): - with open(file, "br") as fp: - data = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values) - return data + if io_type == Io_type.BUFFER.value: + with open(file, "br") as fp: + benchmark_data = fp.read() + if not memory_profiling: + if not iterator: + def test_func(): + data = ion.loads(benchmark_data, single_value=single_value, emit_bare_values=emit_bare_values) + return data + else: + def test_func(): + it = ion.loads(benchmark_data, single_value=single_value, emit_bare_values=emit_bare_values, + parse_eagerly=False) + while True: + try: + next(it) + except StopIteration: + break + return it else: - def test_func(): - with open(file, "br") as fp: - it = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values, - parse_eagerly=False) + if not iterator: + def test_func(): + tracemalloc.start() + data = ion.loads(benchmark_data, single_value=single_value, emit_bare_values=emit_bare_values) + global read_memory_usage_peak + read_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB + tracemalloc.stop() + return data + else: + def test_func(): + tracemalloc.start() + it = ion.loads(benchmark_data, single_value=single_value, emit_bare_values=emit_bare_values, + parse_eagerly=False) while True: try: next(it) except StopIteration: break - return it + global read_memory_usage_peak + read_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB + tracemalloc.stop() + return it else: - if not iterator: + if not memory_profiling: + if not iterator: + def test_func(): + with open(file, "br") as fp: + data = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values) + return data + else: + def test_func(): + with open(file, "br") as fp: + it = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values, + parse_eagerly=False) + while True: + try: + next(it) + except StopIteration: + break + return it + else: + if not iterator: + def test_func(): + tracemalloc.start() + with open(file, "br") as fp: + data = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values) + global read_memory_usage_peak + read_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB + tracemalloc.stop() + return data + else: + def test_func(): + tracemalloc.start() + with open(file, "br") as fp: + it = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values, + parse_eagerly=False) + while True: + try: + next(it) + except StopIteration: + break + global read_memory_usage_peak + read_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB + tracemalloc.stop() + return it + return test_func + + +# Generates benchmark code for json/cbor load/loads APIs +def generate_read_test_code(file, memory_profiling, format_option, binary, io_type): + if format_option == Format.JSON.value: + benchmark_api = json.loads if io_type == Io_type.BUFFER.value else json.load + elif format_option == Format.SIMPLEJSON.value: + benchmark_api = simplejson.loads if io_type == Io_type.BUFFER.value else simplejson.load + elif format_option == Format.UJSON.value: + benchmark_api = ujson.loads if io_type == Io_type.BUFFER.value else ujson.load + elif format_option == Format.RAPIDJSON.value: + benchmark_api = rapidjson.loads if io_type == Io_type.BUFFER.value else rapidjson.load + elif format_option == Format.ORJSON.value: + # orjson doesn't provide load API, so use loads for both file and buffer io_types. + benchmark_api = orjson.loads + elif format_option == Format.CBOR.value: + benchmark_api = cbor.loads if io_type == Io_type.BUFFER.value else cbor.load + elif format_option == Format.CBOR2.value: + benchmark_api = cbor2.loads if io_type == Io_type.BUFFER.value else cbor2.load + else: + raise Exception('unknown JSON/CBOR format to generate setup code.') + + if io_type == Io_type.BUFFER.value: + with open(file, 'br') as fp: + benchmark_data = fp.read() + + if not memory_profiling: + def test_func(): + data = benchmark_api(benchmark_data) + return data + else: def test_func(): tracemalloc.start() - with open(file, "br") as fp: - data = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values) + data = benchmark_api(benchmark_data) global read_memory_usage_peak read_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB tracemalloc.stop() return data + elif format_option == Format.ORJSON.value: + if not memory_profiling: + def test_func(): + with open(file, 'br') as benchmark_file: + data = benchmark_api(benchmark_file.read()) + return data else: def test_func(): tracemalloc.start() - with open(file, "br") as fp: - it = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values, - parse_eagerly=False) - while True: - try: - next(it) - except StopIteration: - break + with open(file, 'br') as benchmark_file: + data = benchmark_api(benchmark_file.read()) + global read_memory_usage_peak + read_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB + tracemalloc.stop() + return data + else: + if not memory_profiling: + def test_func(): + with open(file, 'br' if binary else 'r') as benchmark_file: + data = benchmark_api(benchmark_file) + return data + else: + def test_func(): + tracemalloc.start() + with open(file, 'br' if binary else 'r') as benchmark_file: + data = benchmark_api(benchmark_file) global read_memory_usage_peak read_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB tracemalloc.stop() - return it + return data + return test_func @@ -139,10 +272,18 @@ def generate_event_test_code(file): # Generates setup code for simpleion benchmark code -def generate_simpleion_setup(c_extension, memory_profiling, gc=True): - rtn = f'import amazon.ion.simpleion as ion;from amazon.ion.simple_types import IonPySymbol; ion.c_ext ={c_extension}' - if memory_profiling: - rtn += '; import tracemalloc' +def generate_simpleion_setup(c_extension, gc=False): + rtn = f'import amazon.ion.simpleion as ion;from amazon.ion.simple_types import IonPySymbol; ion.c_ext = ' \ + f'{c_extension}; import tracemalloc' + if gc: + rtn += '; import gc; gc.enable()' + + return rtn + + +# Generates setup code for json/cbor benchmark code +def generate_setup(gc=False): + rtn = 'import tracemalloc' if gc: rtn += '; import gc; gc.enable()' @@ -150,39 +291,53 @@ def generate_simpleion_setup(c_extension, memory_profiling, gc=True): # Generates setup code for event based non_blocking benchmark code -def generate_event_setup(file, gc=True): +def generate_event_setup(file, gc=False): pass -# Benchmarks simpleion load API -def read_micro_benchmark_simpleion(iterations, warmups, c_extension, file, memory_profiling, iterator=False): +# Benchmarks json/cbor loads/load APIs +def read_micro_benchmark(iterations, warmups, c_extension, file, memory_profiling, format_option, binary, io_type, + iterator=False): file_size = Path(file).stat().st_size / BYTES_TO_MB - setup_with_gc = generate_simpleion_setup(c_extension=c_extension, gc=False, memory_profiling=memory_profiling) + setup_with_gc = generate_setup(gc=False) - test_code = generate_simpleion_load_test_code(file, emit_bare_values=False, memory_profiling=memory_profiling, - iterator=iterator) - test_code_without_wrapper = generate_simpleion_load_test_code(file, emit_bare_values=True, - memory_profiling=memory_profiling, - iterator=iterator) + test_code = generate_read_test_code(file, memory_profiling=memory_profiling, + format_option=format_option, io_type=io_type, binary=binary) # warm up timeit.timeit(stmt=test_code, setup=setup_with_gc, number=warmups) - timeit.timeit(stmt=test_code_without_wrapper, setup=setup_with_gc, number=warmups) # iteration result_with_gc = timeit.timeit(stmt=test_code, setup=setup_with_gc, number=iterations) / iterations - result_with_raw_value = \ - (timeit.timeit(stmt=test_code_without_wrapper, setup=setup_with_gc, number=iterations) / iterations) \ - if c_extension else result_with_gc - return file_size, result_with_gc, result_with_raw_value + return file_size, result_with_gc + + +# Benchmarks simpleion load/loads APIs +def read_micro_benchmark_simpleion(iterations, warmups, c_extension, file, memory_profiling, format_option, binary, + io_type, iterator=False): + file_size = Path(file).stat().st_size / BYTES_TO_MB + + setup_with_gc = generate_simpleion_setup(c_extension=c_extension, gc=False) + + test_code = generate_simpleion_read_test_code(file, emit_bare_values=False, memory_profiling=memory_profiling, + iterator=iterator, io_type=io_type) + + # warm up + timeit.timeit(stmt=test_code, setup=setup_with_gc, number=warmups) + + # iteration + result_with_gc = timeit.timeit(stmt=test_code, setup=setup_with_gc, number=iterations) / iterations + + return file_size, result_with_gc # Benchmarks pure python implementation event based APIs # https://github.com/amazon-ion/ion-python/issues/236 -def read_micro_benchmark_event(iterations, warmups, c_extension, file, memory_profiling, iterator=False): - return 0, 0, 0 +def read_micro_benchmark_event(iterations, warmups, c_extension, file, memory_profiling, format_option, binary, io_type, + iterator=False): + return 0, 0 # Framework for benchmarking read methods, this functions includes @@ -190,7 +345,7 @@ def read_micro_benchmark_event(iterations, warmups, c_extension, file, memory_pr # 2. benchmark performance, # 3. generate report def read_micro_benchmark_and_profiling(table, read_micro_benchmark_function, iterations, warmups, file, c_extension, - iterator, each_option): + binary, iterator, each_option, io_type): if not file: raise Exception("Invalid file: file can not be none.") if not read_micro_benchmark_function: @@ -199,61 +354,73 @@ def read_micro_benchmark_and_profiling(table, read_micro_benchmark_function, ite # memory profiling if not pypy: read_micro_benchmark_function(iterations=1, warmups=0, file=file, c_extension=c_extension, - memory_profiling=True, - iterator=iterator) + memory_profiling=True, iterator=iterator, format_option=each_option[1], + io_type=io_type, binary=binary) # performance benchmark - file_size, result_with_gc, result_with_raw_value = \ + file_size, result_with_gc = \ read_micro_benchmark_function(iterations=iterations, warmups=warmups, file=file, c_extension=c_extension, - memory_profiling=False, iterator=iterator) + memory_profiling=False, iterator=iterator, format_option=each_option[1], + io_type=io_type, binary=binary) - # calculate metrics - conversion_time = result_with_gc - result_with_raw_value # generate report - read_generate_report(table, file_size, each_option, result_with_gc, - conversion_time if conversion_time > 0 else 0, - (conversion_time / result_with_gc) if conversion_time > 0 else 0, - read_memory_usage_peak) + read_generate_report(table, file_size, each_option, result_with_gc, read_memory_usage_peak) - return file_size, result_with_gc, conversion_time, read_memory_usage_peak + return file_size, result_with_gc, read_memory_usage_peak # Generates and prints benchmark report -def read_generate_report(table, file_size, each_option, total_time, conversion_time, wrapper_time_percentage, memory_usage_peak): +def read_generate_report(table, file_size, each_option, total_time, memory_usage_peak): insert_into_report_table(table, [format_decimal(file_size), each_option, format_decimal(total_time), - format_decimal(conversion_time), - format_percentage(wrapper_time_percentage), format_decimal(memory_usage_peak)]) # Generates benchmark code for simpleion dump API -def generate_simpleion_dump_test_code(obj, memory_profiling, binary=True): - if not memory_profiling: - def test_func(): - return ion.dumps(obj=obj, binary=binary) - else: - def test_func(): - tracemalloc.start() - data = ion.dumps(obj=obj, binary=binary) - global write_memory_usage_peak - write_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB - tracemalloc.stop() +def generate_simpleion_write_test_code(obj, memory_profiling, io_type, binary): + if io_type == Io_type.BUFFER.value: + if not memory_profiling: + def test_func(): + return ion.dumps(obj=obj, binary=binary) + else: + def test_func(): + tracemalloc.start() + data = ion.dumps(obj=obj, binary=binary) + global write_memory_usage_peak + write_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB + tracemalloc.stop() - return data + return data + else: + if not memory_profiling: + def test_func(): + with open(output_file, 'bw') as fp: + ion.dump(obj, fp, binary=binary) + else: + def test_func(): + tracemalloc.start() + with open(output_file, 'bw') as fp: + ion.dump(obj, fp, binary=binary) + global write_memory_usage_peak + write_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB + tracemalloc.stop() return test_func # Benchmarks simpleion dump API -def write_micro_benchmark_simpleion(iterations, warmups, c_extension, obj, file, binary, memory_profiling): +def write_micro_benchmark_simpleion(iterations, warmups, c_extension, file, binary, memory_profiling, + format_option, io_type): file_size = Path(file).stat().st_size / BYTES_TO_MB + with open(file) as fp: + obj = ion.load(fp, parse_eagerly=True, single_value=False) # GC refers to reference cycles, not reference count - setup_with_gc = generate_simpleion_setup(gc=True, c_extension=c_extension, memory_profiling=memory_profiling) + setup_with_gc = generate_simpleion_setup(gc=False, c_extension=c_extension) - test_func = generate_simpleion_dump_test_code(obj, memory_profiling=memory_profiling, binary=binary) + test_func = generate_simpleion_write_test_code(obj, memory_profiling=memory_profiling, binary=binary, + io_type=io_type) # warm up timeit.timeit(stmt=test_func, setup=setup_with_gc, number=warmups) @@ -264,9 +431,104 @@ def write_micro_benchmark_simpleion(iterations, warmups, c_extension, obj, file, return file_size, result_with_gc +# Benchmarks JSON/CBOR APIs +def write_micro_benchmark(iterations, warmups, c_extension, file, binary, memory_profiling, format_option, io_type): + file_size = Path(file).stat().st_size / BYTES_TO_MB + obj = generate_json_and_cbor_obj_for_write(file, format_option) + # GC refers to reference cycles, not reference count + setup_with_gc = generate_setup(gc=False) + + test_func = generate_write_test_code(obj, memory_profiling=memory_profiling, format_option=format_option, + io_type=io_type, binary=binary) + + # warm up + timeit.timeit(stmt=test_func, setup=setup_with_gc, number=warmups) + + # iteration + result_with_gc = timeit.timeit(stmt=test_func, setup=setup_with_gc, number=iterations) / iterations + + return file_size, result_with_gc + + +# Generates benchmark code for json dump API +def generate_write_test_code(obj, memory_profiling, format_option, io_type, binary): + if format_option == Format.JSON.value: + benchmark_api = json.dumps if io_type == Io_type.BUFFER.value else json.dump + elif format_option == Format.SIMPLEJSON.value: + benchmark_api = simplejson.dumps if io_type == Io_type.BUFFER.value else simplejson.dump + elif format_option == Format.UJSON.value: + benchmark_api = ujson.dumps if io_type == Io_type.BUFFER.value else ujson.dump + elif format_option == Format.RAPIDJSON.value: + benchmark_api = rapidjson.dumps if io_type == Io_type.BUFFER.value else rapidjson.dump + elif format_option == Format.ORJSON.value: + benchmark_api = orjson.dumps + elif format_option == Format.CBOR.value: + benchmark_api = cbor.dumps if io_type == Io_type.BUFFER.value else cbor.dump + elif format_option == Format.CBOR2.value: + benchmark_api = cbor2.dumps if io_type == Io_type.BUFFER.value else cbor2.dump + else: + raise Exception('unknown JSON/CBOR format to generate setup code.') + + if io_type == Io_type.BUFFER.value: + if not memory_profiling: + def test_func(): + return benchmark_api(obj) + else: + def test_func(): + tracemalloc.start() + data = benchmark_api(obj) + global write_memory_usage_peak + write_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB + tracemalloc.stop() + + return data + elif format_option == Format.ORJSON.value: + if not memory_profiling: + def test_func(): + with open(output_file, 'bw' if binary else 'w') as fp: + data = benchmark_api(obj) + fp.write(data) + else: + def test_func(): + tracemalloc.start() + with open(output_file, 'bw' if binary else 'w') as fp: + data = benchmark_api(obj) + fp.write(data) + global write_memory_usage_peak + write_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB + tracemalloc.stop() + + return data + else: + if not memory_profiling: + def test_func(): + with open(output_file, 'bw' if binary else 'w') as fp: + benchmark_api(obj, fp) + else: + def test_func(): + tracemalloc.start() + with open(output_file, 'bw' if binary else 'w') as fp: + benchmark_api(obj, fp) + global write_memory_usage_peak + write_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB + tracemalloc.stop() + + return test_func + + +# Generates setup code for json benchmark code +def generate_setup(gc=False): + rtn = 'import tracemalloc; import gc' + if gc: + rtn += '; gc.enable()' + + return rtn + + # Benchmarks pure python event based write API # https://github.com/amazon-ion/ion-python/issues/236 -def write_micro_benchmark_event(iterations, warmups, c_extension, obj, file, binary, memory_profiling): +def write_micro_benchmark_event(iterations, warmups, c_extension, file, binary, memory_profiling, io_type, + format_option): return 0, 0 @@ -274,23 +536,21 @@ def write_micro_benchmark_event(iterations, warmups, c_extension, obj, file, bin # 1. profile memory usage, # 2. benchmark performance, # 3. generate report -def write_micro_benchmark_and_profiling(table, write_micro_benchmark_function, iterations, warmups, obj, c_extension, - binary, file, each_option): - if not obj: - raise Exception("Invalid obj: object can not be none.") +def write_micro_benchmark_and_profiling(table, write_micro_benchmark_function, iterations, warmups, file, c_extension, + binary, each_option, io_type): if not write_micro_benchmark_function: raise Exception("Invalid micro benchmark function: micro benchmark function can not be none.") # Memory Profiling if not pypy: - write_micro_benchmark_function(iterations=1, warmups=0, obj=obj, c_extension=c_extension, file=file, - binary=binary, - memory_profiling=True) + write_micro_benchmark_function(iterations=1, warmups=0, c_extension=c_extension, file=file, + binary=binary, memory_profiling=True, format_option=each_option[1], + io_type=io_type) # Performance Benchmark file_size, result_with_gc = \ - write_micro_benchmark_function(iterations=iterations, warmups=warmups, obj=obj, c_extension=c_extension, - file=file, - binary=binary, memory_profiling=False) + write_micro_benchmark_function(iterations=iterations, warmups=warmups, c_extension=c_extension, + file=file, binary=binary, memory_profiling=False, format_option=each_option[1], + io_type=io_type) # generate report write_generate_report(table, file_size, each_option, result_with_gc, write_memory_usage_peak) @@ -318,8 +578,7 @@ def insert_into_report_table(table, row): def identify_report_table(command): if command == 'read': return identify_report_table_helper( - ['file_size (MB)', 'options', 'total_time (s)', 'conversion_\ntime (s)', 'conversion_time/\ntotal_time (%)', - 'memory_usage_peak (MB)']) + ['file_size (MB)', 'options', 'total_time (s)', 'memory_usage_peak (MB)']) elif command == 'write': return identify_report_table_helper( ['file_size (MB)', 'options', 'total_time (s)', 'memory_usage_peak (MB)'] @@ -340,8 +599,34 @@ def reset_for_each_execution(each_option): write_memory_usage_peak = 0 api = each_option[0] format_option = each_option[1] + io_type = each_option[2] + + return api, format_option, io_type + + +def generate_json_and_cbor_obj_for_write(file, format_option): + with open(file) as fp: + if format_option == Format.JSON.value: + return json.load(fp) + elif format_option == Format.SIMPLEJSON.value: + return simplejson.load(fp) + elif format_option == Format.UJSON.value: + return ujson.load(fp) + elif format_option == Format.RAPIDJSON.value: + return rapidjson.load(fp) + elif format_option == Format.ORJSON.value: + return orjson.loads(fp.read()) + elif format_option == Format.CBOR.value: + return cbor.load(fp) + elif format_option == Format.CBOR2.value: + return cbor2.load(fp) + else: + raise Exception('unknown JSON format to generate setup code.') + - return api, format_option +def clean_up(): + if os.path.exists(output_file): + os.remove(output_file) def ion_python_benchmark_cli(arguments): @@ -351,6 +636,7 @@ def ion_python_benchmark_cli(arguments): if not arguments['']: raise Exception('Invalid input file') file = arguments[''] + command = Command.READ.value if arguments['read'] else Command.WRITE.value iterations = int(arguments['--iterations']) warmups = int(arguments['--warmups']) c_extension = str_to_bool(arguments['--c-extension']) if not pypy else False @@ -360,55 +646,50 @@ def ion_python_benchmark_cli(arguments): # initialize options that might show up multiple times api = [*set(arguments['--api'])] if arguments['--api'] else [API.DEFAULT.value] format_option = [*set(arguments['--format'])] if arguments['--format'] else [Format.DEFAULT.value] + io_type = [*set(arguments['--io-type'])] if arguments['--io-type'] else [Io_type.DEFAULT.value] # option_configuration is used for tracking options may show up multiple times. - option_configuration = [api, format_option] + option_configuration = [api, format_option, io_type] option_configuration_combination = list(itertools.product(*option_configuration)) - print(option_configuration_combination) - if arguments['read']: - # initialize benchmark report table - table = identify_report_table('read') - - for each_option in option_configuration_combination: - # reset - api, format_option = reset_for_each_execution(each_option) - + # initialize benchmark report table + table = identify_report_table(command) + + for each_option in option_configuration_combination: + print(f'Generating option {each_option}...') + # reset each option configuration + api, format_option, io_type = reset_for_each_execution(each_option) + binary = format_is_binary(format_option) + # TODO. currently, we must provide the tool a corresponding file format for read benchmarking. For example, + # we must provide a CBOR file for CBOR APIs benchmarking. We cannot benchmark CBOR APIs by giving a JSON + # file. Lack of format conversion prevents us from benchmarking different formats concurrently. + file = rewrite_file_to_format(file, format_option) + + # Generate microbenchmark API according to read/write command + if format_is_ion(format_option): if not api or api == API.SIMPLE_ION.value: - read_micro_benchmark_function = read_micro_benchmark_simpleion + micro_benchmark_function = read_micro_benchmark_simpleion if command == 'read' \ + else write_micro_benchmark_simpleion elif api == API.EVENT.value: - read_micro_benchmark_function = read_micro_benchmark_event - else: - raise Exception(f'Invalid API option {api}.') - - file_size, result_with_gc, conversion_time, read_memory_usage_peak = \ - read_micro_benchmark_and_profiling(table, read_micro_benchmark_function, iterations, warmups, file, - c_extension, iterator, each_option) - - print(tabulate(table, tablefmt='fancy_grid')) - - - elif arguments['write']: - # initialize benchmark report table - table = identify_report_table('write') - - for each_option in option_configuration_combination: - # reset - api, format_option = reset_for_each_execution(each_option) - binary = format_option == Format.ION_BINARY.value - - if not api or api == API.SIMPLE_ION.value: - write_micro_benchmark_function = write_micro_benchmark_simpleion - elif api == API.EVENT.value: - write_micro_benchmark_function = write_micro_benchmark_event + micro_benchmark_function = read_micro_benchmark_event if command == 'read' \ + else write_micro_benchmark_event else: raise Exception(f'Invalid API option {api}.') + elif format_is_json(format_option): + micro_benchmark_function = read_micro_benchmark if command == 'read' else write_micro_benchmark + elif format_is_cbor(format_option): + micro_benchmark_function = read_micro_benchmark if command == 'read' else write_micro_benchmark + else: + raise Exception(f'Invalid format option {format_option}.') - with open(file) as fp: - obj = ion.load(fp, parse_eagerly=True, single_value=False) + if command == 'read': + read_micro_benchmark_and_profiling(table, micro_benchmark_function, iterations, warmups, file, + c_extension, binary, iterator, each_option, io_type) + else: + write_micro_benchmark_and_profiling(table, micro_benchmark_function, iterations, warmups, file, + c_extension, binary, each_option, io_type) - write_micro_benchmark_and_profiling(table, write_micro_benchmark_function, iterations, warmups, obj, - c_extension, binary, file, each_option) - print(tabulate(table, tablefmt='fancy_grid')) + print(tabulate(table, tablefmt='fancy_grid')) + clean_up() return table diff --git a/requirements.txt b/requirements.txt index c3dfeb673..c95a9dc61 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,4 +15,5 @@ tox==3.23.1 virtualenv==20.4.7 setuptools<=60.5.0 docopt==0.6.2 -tabulate==0.9.0 \ No newline at end of file +tabulate==0.9.0 +simplejson \ No newline at end of file diff --git a/tests/test_benchmark_cli.py b/tests/test_benchmark_cli.py index caa6c1ae5..5761356d7 100644 --- a/tests/test_benchmark_cli.py +++ b/tests/test_benchmark_cli.py @@ -5,9 +5,9 @@ from docopt import docopt from amazon.ion import simpleion -from amazon.ionbenchmark import ion_benchmark_cli -from amazon.ionbenchmark.ion_benchmark_cli import generate_simpleion_load_test_code, generate_simpleion_dump_test_code,\ - ion_python_benchmark_cli +from amazon.ionbenchmark import ion_benchmark_cli, Format, Io_type +from amazon.ionbenchmark.ion_benchmark_cli import generate_simpleion_read_test_code, \ + generate_simpleion_write_test_code, ion_python_benchmark_cli from amazon.ionbenchmark.util import str_to_bool, TOOL_VERSION from tests import parametrize from tests.test_simpleion import generate_scalars_text @@ -44,8 +44,9 @@ def generate_test_path(p): @parametrize( generate_test_path('integers.ion') ) -def test_generate_simpleion_load_test_code(path): - actual = generate_simpleion_load_test_code(path, memory_profiling=False, single_value=False, emit_bare_values=False) +def test_generate_simpleion_read_test_code(path): + actual = generate_simpleion_read_test_code(path, memory_profiling=False, single_value=False, + emit_bare_values=False, io_type='buffer') # make sure we generated the desired load function with open(path) as fp: @@ -60,8 +61,8 @@ def test_generate_simpleion_load_test_code(path): generate_scalars_text(SIMPLE_SCALARS_MAP_TEXT), )) ) -def test_generate_simpleion_dump_test_code(obj): - actual = generate_simpleion_dump_test_code(obj, memory_profiling=False, binary=False) +def test_generate_simpleion_write_test_code(obj): + actual = generate_simpleion_write_test_code(obj, memory_profiling=False, binary=False, io_type='buffer') # make sure we generated the desired dumps function expect = simpleion.dumps(obj, binary=False) @@ -135,39 +136,87 @@ def gather_all_options_in_list(table): def test_read_multi_api(file=generate_test_path('integers.ion')): table = execution_with_command(['read', file, '--api', 'simple_ion', '--api', 'event']) - assert gather_all_options_in_list(table) == sorted([('event', 'ion_binary'), ('simple_ion', 'ion_binary')]) + assert gather_all_options_in_list(table) == sorted([('event', 'ion_binary', 'file'), ('simple_ion', 'ion_binary', 'file')]) def test_write_multi_api(file=generate_test_path('integers.ion')): table = execution_with_command(['write', file, '--api', 'simple_ion', '--api', 'event']) - assert gather_all_options_in_list(table) == sorted([('event', 'ion_binary'), ('simple_ion', 'ion_binary')]) + assert gather_all_options_in_list(table) == sorted([('event', 'ion_binary', 'file'), ('simple_ion', 'ion_binary', 'file')]) def test_read_multi_duplicated_api(file=generate_test_path('integers.ion')): table = execution_with_command(['read', file, '--api', 'simple_ion', '--api', 'event', '--api', 'event']) - assert gather_all_options_in_list(table) == sorted([('event', 'ion_binary'), ('simple_ion', 'ion_binary')]) + assert gather_all_options_in_list(table) == sorted([('event', 'ion_binary', 'file'), ('simple_ion', 'ion_binary', 'file')]) def test_write_multi_duplicated_api(file=generate_test_path('integers.ion')): table = execution_with_command(['write', file, '--api', 'simple_ion', '--api', 'event', '--api', 'event']) - assert gather_all_options_in_list(table) == sorted([('event', 'ion_binary'), ('simple_ion', 'ion_binary')]) + assert gather_all_options_in_list(table) == sorted([('event', 'ion_binary', 'file'), ('simple_ion', 'ion_binary', 'file')]) def test_read_multi_format(file=generate_test_path('integers.ion')): table = execution_with_command(['read', file, '--format', 'ion_text', '--format', 'ion_binary']) - assert gather_all_options_in_list(table) == sorted([('simple_ion', 'ion_binary'), ('simple_ion', 'ion_text')]) + assert gather_all_options_in_list(table) == sorted([('simple_ion', 'ion_binary', 'file'), ('simple_ion', 'ion_text', 'file')]) def test_write_multi_format(file=generate_test_path('integers.ion')): table = execution_with_command(['write', file, '--format', 'ion_text', '--format', 'ion_binary']) - assert gather_all_options_in_list(table) == sorted([('simple_ion', 'ion_text'), ('simple_ion', 'ion_binary')]) + assert gather_all_options_in_list(table) == sorted([('simple_ion', 'ion_text', 'file'), ('simple_ion', 'ion_binary', 'file')]) def test_read_multi_duplicated_format(file=generate_test_path('integers.ion')): table = execution_with_command(['read', file, '--format', 'ion_text', '--format', 'ion_binary', '--format', 'ion_text']) - assert gather_all_options_in_list(table) == sorted([('simple_ion', 'ion_text'), ('simple_ion', 'ion_binary')]) + assert gather_all_options_in_list(table) == sorted([('simple_ion', 'ion_text', 'file'), ('simple_ion', 'ion_binary', 'file')]) def test_write_multi_duplicated_format(file=generate_test_path('integers.ion')): table = execution_with_command(['write', file, '--format', 'ion_text', '--format', 'ion_binary', '--format', 'ion_text',]) - assert gather_all_options_in_list(table) == sorted([('simple_ion', 'ion_text'), ('simple_ion', 'ion_binary')]) + assert gather_all_options_in_list(table) == sorted([('simple_ion', 'ion_text', 'file'), ('simple_ion', 'ion_binary', 'file')]) + + +@parametrize( + *tuple((f.value for f in Format.Format if Format.format_is_json(f.value))) +) +def test_write_json_format(f): + table = execution_with_command(['write', generate_test_path('integers.ion'), '--format', f'{f}']) + assert gather_all_options_in_list(table) == sorted([('simple_ion', f'{f}', 'file')]) + + +@parametrize( + *tuple((f.value for f in Format.Format if Format.format_is_json(f.value))) +) +def test_read_json_format(f): + table = execution_with_command(['read', generate_test_path('integers.ion'), '--format', f'{f}']) + assert gather_all_options_in_list(table) == sorted([('simple_ion', f'{f}', 'file')]) + + +@parametrize( + *tuple((f.value for f in Format.Format if Format.format_is_json(f.value))) +) +def test_write_json_format(f): + table = execution_with_command(['write', generate_test_path('integers.ion'), '--format', f'{f}']) + assert gather_all_options_in_list(table) == sorted([('simple_ion', f'{f}', 'file')]) + + +@parametrize( + *tuple((f.value for f in Format.Format if Format.format_is_cbor(f.value))) +) +def test_read_cbor_format(f): + table = execution_with_command(['read', generate_test_path('integers.ion'), '--format', f'{f}']) + assert gather_all_options_in_list(table) == sorted([('simple_ion', f'{f}', 'file')]) + + +@parametrize( + *tuple((io.value for io in Io_type.Io_type)) +) +def test_write_io_type(f): + table = execution_with_command(['write', generate_test_path('integers.ion'), '--io-type', f'{f}', '--format', 'json']) + assert gather_all_options_in_list(table) == sorted([('simple_ion', 'json', f'{f}')]) + + +@parametrize( + *tuple((io.value for io in Io_type.Io_type)) +) +def test_read_io_type(f): + table = execution_with_command(['read', generate_test_path('integers.ion'), '--io-type', f'{f}', '--format', 'json', '--format', 'ion_binary']) + assert gather_all_options_in_list(table) == sorted([('simple_ion', 'json', f'{f}'), ('simple_ion', 'ion_binary', f'{f}')]) From 8bcd38521b137811e3c51f257181642681f81e8a Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Tue, 14 Mar 2023 00:56:36 -0700 Subject: [PATCH 2/6] Adds dependencies. --- ion-c | 2 +- requirements.txt | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/ion-c b/ion-c index 093cc1c32..6c8cc1a44 160000 --- a/ion-c +++ b/ion-c @@ -1 +1 @@ -Subproject commit 093cc1c32d9ac3ba4b5f203b7538a9a8189bc9a2 +Subproject commit 6c8cc1a4436d5ac4c1aa876e24abb811b5fd5825 diff --git a/requirements.txt b/requirements.txt index c95a9dc61..2bf0b1274 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,4 +16,11 @@ virtualenv==20.4.7 setuptools<=60.5.0 docopt==0.6.2 tabulate==0.9.0 -simplejson \ No newline at end of file +simplejson~=3.18.3 +pip~=23.0 +six~=1.16.0 +cbor~=1.0.0 +orjson~=3.8.6 +cbor2~=5.4.6 +python-rapidjson~=1.9 +ujson~=5.7.0 \ No newline at end of file From 87e7f65f9d05fc527820226a5dc93c9ee1d22954 Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Wed, 15 Mar 2023 13:38:25 -0700 Subject: [PATCH 3/6] Adds formats to documentation, changes a test namespace. --- amazon/ionbenchmark/ion_benchmark_cli.py | 5 +++-- ion-c | 2 +- tests/test_benchmark_cli.py | 4 ++-- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/amazon/ionbenchmark/ion_benchmark_cli.py b/amazon/ionbenchmark/ion_benchmark_cli.py index 44411ad9b..1945f4462 100644 --- a/amazon/ionbenchmark/ion_benchmark_cli.py +++ b/amazon/ionbenchmark/ion_benchmark_cli.py @@ -46,8 +46,9 @@ -c --c-extension If the C extension is enabled, note that it only applies to simpleIon module. [default: True] - -f --format Format to benchmark, from the set (ion_binary | ion_text). May be specified - multiple times to compare different formats. [default: ion_binary] + -f --format Format to benchmark, from the set (ion_binary | ion_text | json | simplejson | + ujson | rapidjson | orjson | cbor | cbor2). May be specified multiple times to + compare different formats. [default: ion_binary] -p --profile (NOT SUPPORTED YET) Initiates a single iteration that repeats indefinitely until terminated, allowing users to attach profiling tools. If this option is diff --git a/ion-c b/ion-c index 6c8cc1a44..621e37969 160000 --- a/ion-c +++ b/ion-c @@ -1 +1 @@ -Subproject commit 6c8cc1a4436d5ac4c1aa876e24abb811b5fd5825 +Subproject commit 621e3796968170c520ea06a89b4baea26c43b070 diff --git a/tests/test_benchmark_cli.py b/tests/test_benchmark_cli.py index 5761356d7..42e8c4f33 100644 --- a/tests/test_benchmark_cli.py +++ b/tests/test_benchmark_cli.py @@ -191,9 +191,9 @@ def test_read_json_format(f): @parametrize( - *tuple((f.value for f in Format.Format if Format.format_is_json(f.value))) + *tuple((f.value for f in Format.Format if Format.format_is_cbor(f.value))) ) -def test_write_json_format(f): +def test_write_cbor_format(f): table = execution_with_command(['write', generate_test_path('integers.ion'), '--format', f'{f}']) assert gather_all_options_in_list(table) == sorted([('simple_ion', f'{f}', 'file')]) From ed5597356535e084d05991eb646269bf6b84b324 Mon Sep 17 00:00:00 2001 From: Eric Chen <67451029+cheqianh@users.noreply.github.com> Date: Fri, 17 Mar 2023 15:28:05 -0700 Subject: [PATCH 4/6] Uses specific ion-c version to build ion-python C extension. (#250) --- install.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/install.py b/install.py index bf44e1085..3f50cc547 100644 --- a/install.py +++ b/install.py @@ -89,6 +89,9 @@ def _download_ionc(): os.chdir(_CURRENT_ION_C_DIR) + # TODO Use ion-c 1.1.0 for now - https://github.com/amazon-ion/ion-python/issues/249 + check_call(['git', 'reset', '--hard', 'v1.1.0']) + # Initialize submodule. check_call(['git', 'submodule', 'update', '--init']) From 3c99a29260763a192386b8698a6ddd3019536d5e Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Sun, 19 Mar 2023 15:28:28 -0700 Subject: [PATCH 5/6] Address comments Adds. table --- amazon/ionbenchmark/API.py | 6 +- amazon/ionbenchmark/ion_benchmark_cli.py | 307 +++++++++++------------ ion-c | 2 +- requirements.txt | 1 - tests/benchmark_sample_data/cbor/sample | 1 + tests/test_benchmark_cli.py | 164 +++++++++--- 6 files changed, 289 insertions(+), 192 deletions(-) create mode 100644 tests/benchmark_sample_data/cbor/sample diff --git a/amazon/ionbenchmark/API.py b/amazon/ionbenchmark/API.py index 95b005bc9..b370d299f 100644 --- a/amazon/ionbenchmark/API.py +++ b/amazon/ionbenchmark/API.py @@ -4,6 +4,6 @@ # Serialization/deserialization APIs to benchmark. class API(Enum): """Enumeration of the APIs.""" - SIMPLE_ION = 'simple_ion' - EVENT = 'event' - DEFAULT = 'simple_ion' + LOAD_DUMP = 'load_dump' + STREAMING = 'streaming' + DEFAULT = 'load_dump' diff --git a/amazon/ionbenchmark/ion_benchmark_cli.py b/amazon/ionbenchmark/ion_benchmark_cli.py index 1945f4462..22120c227 100644 --- a/amazon/ionbenchmark/ion_benchmark_cli.py +++ b/amazon/ionbenchmark/ion_benchmark_cli.py @@ -33,9 +33,9 @@ Options: -h, --help Show this screen. - --api The API to exercise (simple_ion, event). `simple_ion` refers to - simpleIon's load method. `event` refers to ion-python's event - based non-blocking API. Default to `simpleIon`. + --api The API to exercise (load_dump, streaming). `load_dump` refers to + the load/dump method. `streaming` refers to ion-python's event + based non-blocking API specifically. Default to `load_dump`. -t --iterator If returns an iterator for simpleIon C extension read API. [default: False] @@ -74,7 +74,6 @@ import platform import cbor2 -import orjson import rapidjson import simplejson import ujson @@ -95,6 +94,9 @@ pypy = platform.python_implementation() == 'PyPy' if not pypy: import tracemalloc + import orjson +else: + import json as orjson BYTES_TO_MB = 1024 * 1024 _IVM = b'\xE0\x01\x00\xEA' @@ -107,99 +109,97 @@ output_file = 'dump_output' -# Generates benchmark code for simpleion load/loads APIs -def generate_simpleion_read_test_code(file, memory_profiling, io_type, iterator=False, single_value=False, - emit_bare_values=False): - if io_type == Io_type.BUFFER.value: - with open(file, "br") as fp: - benchmark_data = fp.read() - if not memory_profiling: - if not iterator: - def test_func(): - data = ion.loads(benchmark_data, single_value=single_value, emit_bare_values=emit_bare_values) - return data - else: - def test_func(): - it = ion.loads(benchmark_data, single_value=single_value, emit_bare_values=emit_bare_values, - parse_eagerly=False) - while True: - try: - next(it) - except StopIteration: - break - return it - else: - if not iterator: - def test_func(): - tracemalloc.start() - data = ion.loads(benchmark_data, single_value=single_value, emit_bare_values=emit_bare_values) - global read_memory_usage_peak - read_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB - tracemalloc.stop() - return data - else: - def test_func(): - tracemalloc.start() - it = ion.loads(benchmark_data, single_value=single_value, emit_bare_values=emit_bare_values, - parse_eagerly=False) - while True: - try: - next(it) - except StopIteration: - break - global read_memory_usage_peak - read_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB - tracemalloc.stop() - return it - else: - if not memory_profiling: - if not iterator: - def test_func(): - with open(file, "br") as fp: - data = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values) - return data - else: - def test_func(): - with open(file, "br") as fp: - it = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values, - parse_eagerly=False) +# Generates benchmark code for json/cbor/Ion load/loads APIs +def generate_read_test_code(file, memory_profiling, format_option, binary, io_type, iterator=False, single_value=False, + emit_bare_values=False): + # if format_option == Format.ION_TEXT.value or format_option == Format.ION_BINARY.value: + if format_is_ion(format_option): + if io_type == Io_type.BUFFER.value: + with open(file, "br") as fp: + benchmark_data = fp.read() + if not memory_profiling: + if not iterator: + def test_func(): + data = ion.loads(benchmark_data, single_value=single_value, emit_bare_values=emit_bare_values) + return data + else: + def test_func(): + it = ion.loads(benchmark_data, single_value=single_value, emit_bare_values=emit_bare_values, + parse_eagerly=False) while True: try: next(it) except StopIteration: break - return it - else: - if not iterator: - def test_func(): - tracemalloc.start() - with open(file, "br") as fp: - data = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values) - global read_memory_usage_peak - read_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB - tracemalloc.stop() - return data + return it else: - def test_func(): - tracemalloc.start() - with open(file, "br") as fp: - it = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values, - parse_eagerly=False) + if not iterator: + def test_func(): + tracemalloc.start() + data = ion.loads(benchmark_data, single_value=single_value, emit_bare_values=emit_bare_values) + global read_memory_usage_peak + read_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB + tracemalloc.stop() + return data + else: + def test_func(): + tracemalloc.start() + it = ion.loads(benchmark_data, single_value=single_value, emit_bare_values=emit_bare_values, + parse_eagerly=False) while True: try: next(it) except StopIteration: break - global read_memory_usage_peak - read_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB - tracemalloc.stop() - return it - return test_func - - -# Generates benchmark code for json/cbor load/loads APIs -def generate_read_test_code(file, memory_profiling, format_option, binary, io_type): - if format_option == Format.JSON.value: + global read_memory_usage_peak + read_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB + tracemalloc.stop() + return it + else: + if not memory_profiling: + if not iterator: + def test_func(): + with open(file, "br") as fp: + data = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values) + return data + else: + def test_func(): + with open(file, "br") as fp: + it = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values, + parse_eagerly=False) + while True: + try: + next(it) + except StopIteration: + break + return it + else: + if not iterator: + def test_func(): + tracemalloc.start() + with open(file, "br") as fp: + data = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values, parse_eagerly=True) + global read_memory_usage_peak + read_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB + tracemalloc.stop() + return data + else: + def test_func(): + tracemalloc.start() + with open(file, "br") as fp: + it = ion.load(fp, single_value=single_value, emit_bare_values=emit_bare_values, + parse_eagerly=False) + while True: + try: + next(it) + except StopIteration: + break + global read_memory_usage_peak + read_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB + tracemalloc.stop() + return it + return test_func + elif format_option == Format.JSON.value: benchmark_api = json.loads if io_type == Io_type.BUFFER.value else json.load elif format_option == Format.SIMPLEJSON.value: benchmark_api = simplejson.loads if io_type == Io_type.BUFFER.value else simplejson.load @@ -215,7 +215,7 @@ def generate_read_test_code(file, memory_profiling, format_option, binary, io_ty elif format_option == Format.CBOR2.value: benchmark_api = cbor2.loads if io_type == Io_type.BUFFER.value else cbor2.load else: - raise Exception('unknown JSON/CBOR format to generate setup code.') + raise Exception(f'unknown JSON/CBOR/Ion format {format_option} to generate setup code.') if io_type == Io_type.BUFFER.value: with open(file, 'br') as fp: @@ -272,22 +272,18 @@ def generate_event_test_code(file): pass -# Generates setup code for simpleion benchmark code -def generate_simpleion_setup(c_extension, gc=False): - rtn = f'import amazon.ion.simpleion as ion;from amazon.ion.simple_types import IonPySymbol; ion.c_ext = ' \ - f'{c_extension}; import tracemalloc' - if gc: - rtn += '; import gc; gc.enable()' - - return rtn - - # Generates setup code for json/cbor benchmark code -def generate_setup(gc=False): - rtn = 'import tracemalloc' +def generate_setup(format_option, c_extension=False, gc=False): + if format_is_ion(format_option): + rtn = f'import amazon.ion.simpleion as ion;from amazon.ion.simple_types import IonPySymbol; ion.c_ext = ' \ + f'{c_extension}; import tracemalloc' + else: + rtn = 'import tracemalloc' + if gc: rtn += '; import gc; gc.enable()' - + else: + rtn += '; import gc; gc.disable()' return rtn @@ -301,7 +297,7 @@ def read_micro_benchmark(iterations, warmups, c_extension, file, memory_profilin iterator=False): file_size = Path(file).stat().st_size / BYTES_TO_MB - setup_with_gc = generate_setup(gc=False) + setup_with_gc = generate_setup(format_option=format_option, gc=False) test_code = generate_read_test_code(file, memory_profiling=memory_profiling, format_option=format_option, io_type=io_type, binary=binary) @@ -320,10 +316,11 @@ def read_micro_benchmark_simpleion(iterations, warmups, c_extension, file, memor io_type, iterator=False): file_size = Path(file).stat().st_size / BYTES_TO_MB - setup_with_gc = generate_simpleion_setup(c_extension=c_extension, gc=False) + setup_with_gc = generate_setup(format_option=format_option, c_extension=c_extension, gc=False) - test_code = generate_simpleion_read_test_code(file, emit_bare_values=False, memory_profiling=memory_profiling, - iterator=iterator, io_type=io_type) + test_code = generate_read_test_code(file, format_option=format_option, emit_bare_values=False, + memory_profiling=memory_profiling, iterator=iterator, io_type=io_type, + binary=binary) # warm up timeit.timeit(stmt=test_code, setup=setup_with_gc, number=warmups) @@ -378,39 +375,7 @@ def read_generate_report(table, file_size, each_option, total_time, memory_usage format_decimal(memory_usage_peak)]) -# Generates benchmark code for simpleion dump API -def generate_simpleion_write_test_code(obj, memory_profiling, io_type, binary): - if io_type == Io_type.BUFFER.value: - if not memory_profiling: - def test_func(): - return ion.dumps(obj=obj, binary=binary) - else: - def test_func(): - tracemalloc.start() - data = ion.dumps(obj=obj, binary=binary) - global write_memory_usage_peak - write_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB - tracemalloc.stop() - - return data - else: - if not memory_profiling: - def test_func(): - with open(output_file, 'bw') as fp: - ion.dump(obj, fp, binary=binary) - else: - def test_func(): - tracemalloc.start() - with open(output_file, 'bw') as fp: - ion.dump(obj, fp, binary=binary) - global write_memory_usage_peak - write_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB - tracemalloc.stop() - - return test_func - - -# Benchmarks simpleion dump API +# Benchmarks simpleion dump/dumps API def write_micro_benchmark_simpleion(iterations, warmups, c_extension, file, binary, memory_profiling, format_option, io_type): file_size = Path(file).stat().st_size / BYTES_TO_MB @@ -418,10 +383,10 @@ def write_micro_benchmark_simpleion(iterations, warmups, c_extension, file, bina obj = ion.load(fp, parse_eagerly=True, single_value=False) # GC refers to reference cycles, not reference count - setup_with_gc = generate_simpleion_setup(gc=False, c_extension=c_extension) + setup_with_gc = generate_setup(format_option=format_option, gc=False, c_extension=c_extension) - test_func = generate_simpleion_write_test_code(obj, memory_profiling=memory_profiling, binary=binary, - io_type=io_type) + test_func = generate_write_test_code(obj, memory_profiling=memory_profiling, binary=binary, + io_type=io_type, format_option=format_option) # warm up timeit.timeit(stmt=test_func, setup=setup_with_gc, number=warmups) @@ -432,12 +397,12 @@ def write_micro_benchmark_simpleion(iterations, warmups, c_extension, file, bina return file_size, result_with_gc -# Benchmarks JSON/CBOR APIs +# Benchmarks JSON/CBOR dump/dumps APIs def write_micro_benchmark(iterations, warmups, c_extension, file, binary, memory_profiling, format_option, io_type): file_size = Path(file).stat().st_size / BYTES_TO_MB obj = generate_json_and_cbor_obj_for_write(file, format_option) # GC refers to reference cycles, not reference count - setup_with_gc = generate_setup(gc=False) + setup_with_gc = generate_setup(format_option=format_option, gc=False) test_func = generate_write_test_code(obj, memory_profiling=memory_profiling, format_option=format_option, io_type=io_type, binary=binary) @@ -451,9 +416,38 @@ def write_micro_benchmark(iterations, warmups, c_extension, file, binary, memory return file_size, result_with_gc -# Generates benchmark code for json dump API +# Generates benchmark code for json/cbor/Ion dump/dumps API def generate_write_test_code(obj, memory_profiling, format_option, io_type, binary): - if format_option == Format.JSON.value: + if format_is_ion(format_option): + if io_type == Io_type.BUFFER.value: + if not memory_profiling: + def test_func(): + return ion.dumps(obj=obj, binary=binary) + else: + def test_func(): + tracemalloc.start() + data = ion.dumps(obj=obj, binary=binary) + global write_memory_usage_peak + write_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB + tracemalloc.stop() + + return data + else: + if not memory_profiling: + def test_func(): + with open(output_file, 'bw') as fp: + ion.dump(obj, fp, binary=binary) + else: + def test_func(): + tracemalloc.start() + with open(output_file, 'bw') as fp: + ion.dump(obj, fp, binary=binary) + global write_memory_usage_peak + write_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB + tracemalloc.stop() + + return test_func + elif format_option == Format.JSON.value: benchmark_api = json.dumps if io_type == Io_type.BUFFER.value else json.dump elif format_option == Format.SIMPLEJSON.value: benchmark_api = simplejson.dumps if io_type == Io_type.BUFFER.value else simplejson.dump @@ -468,7 +462,7 @@ def generate_write_test_code(obj, memory_profiling, format_option, io_type, bina elif format_option == Format.CBOR2.value: benchmark_api = cbor2.dumps if io_type == Io_type.BUFFER.value else cbor2.dump else: - raise Exception('unknown JSON/CBOR format to generate setup code.') + raise Exception(f'unknown JSON/CBOR/Ion format {format_option} to generate setup code.') if io_type == Io_type.BUFFER.value: if not memory_profiling: @@ -517,15 +511,6 @@ def test_func(): return test_func -# Generates setup code for json benchmark code -def generate_setup(gc=False): - rtn = 'import tracemalloc; import gc' - if gc: - rtn += '; gc.enable()' - - return rtn - - # Benchmarks pure python event based write API # https://github.com/amazon-ion/ion-python/issues/236 def write_micro_benchmark_event(iterations, warmups, c_extension, file, binary, memory_profiling, io_type, @@ -630,6 +615,10 @@ def clean_up(): os.remove(output_file) +def options_validation(format_options): + return True + + def ion_python_benchmark_cli(arguments): if arguments['--version'] or arguments['-v']: print(TOOL_VERSION) @@ -660,17 +649,17 @@ def ion_python_benchmark_cli(arguments): # reset each option configuration api, format_option, io_type = reset_for_each_execution(each_option) binary = format_is_binary(format_option) - # TODO. currently, we must provide the tool a corresponding file format for read benchmarking. For example, - # we must provide a CBOR file for CBOR APIs benchmarking. We cannot benchmark CBOR APIs by giving a JSON - # file. Lack of format conversion prevents us from benchmarking different formats concurrently. + # TODO. currently, we must provide the tool to convert to a corresponding file format for read benchmarking. + # For example, we must provide a CBOR file for CBOR APIs benchmarking. We cannot benchmark CBOR APIs by giving + # a JSON file. Lack of format conversion prevents us from benchmarking different formats concurrently. file = rewrite_file_to_format(file, format_option) # Generate microbenchmark API according to read/write command if format_is_ion(format_option): - if not api or api == API.SIMPLE_ION.value: + if not api or api == API.LOAD_DUMP.value: micro_benchmark_function = read_micro_benchmark_simpleion if command == 'read' \ else write_micro_benchmark_simpleion - elif api == API.EVENT.value: + elif api == API.STREAMING.value: micro_benchmark_function = read_micro_benchmark_event if command == 'read' \ else write_micro_benchmark_event else: diff --git a/ion-c b/ion-c index 621e37969..093cc1c32 160000 --- a/ion-c +++ b/ion-c @@ -1 +1 @@ -Subproject commit 621e3796968170c520ea06a89b4baea26c43b070 +Subproject commit 093cc1c32d9ac3ba4b5f203b7538a9a8189bc9a2 diff --git a/requirements.txt b/requirements.txt index 2bf0b1274..aa554a017 100644 --- a/requirements.txt +++ b/requirements.txt @@ -20,7 +20,6 @@ simplejson~=3.18.3 pip~=23.0 six~=1.16.0 cbor~=1.0.0 -orjson~=3.8.6 cbor2~=5.4.6 python-rapidjson~=1.9 ujson~=5.7.0 \ No newline at end of file diff --git a/tests/benchmark_sample_data/cbor/sample b/tests/benchmark_sample_data/cbor/sample new file mode 100644 index 000000000..45fc55b07 --- /dev/null +++ b/tests/benchmark_sample_data/cbor/sample @@ -0,0 +1 @@ +sthis is a test file \ No newline at end of file diff --git a/tests/test_benchmark_cli.py b/tests/test_benchmark_cli.py index 42e8c4f33..4a9e237a1 100644 --- a/tests/test_benchmark_cli.py +++ b/tests/test_benchmark_cli.py @@ -1,13 +1,16 @@ +import json import time from itertools import chain from os.path import abspath, join, dirname +import cbor2 from docopt import docopt from amazon.ion import simpleion from amazon.ionbenchmark import ion_benchmark_cli, Format, Io_type -from amazon.ionbenchmark.ion_benchmark_cli import generate_simpleion_read_test_code, \ - generate_simpleion_write_test_code, ion_python_benchmark_cli +from amazon.ionbenchmark.Format import format_is_ion, format_is_cbor, format_is_json +from amazon.ionbenchmark.ion_benchmark_cli import generate_read_test_code, \ + generate_write_test_code, ion_python_benchmark_cli from amazon.ionbenchmark.util import str_to_bool, TOOL_VERSION from tests import parametrize from tests.test_simpleion import generate_scalars_text @@ -45,12 +48,45 @@ def generate_test_path(p): generate_test_path('integers.ion') ) def test_generate_simpleion_read_test_code(path): - actual = generate_simpleion_read_test_code(path, memory_profiling=False, single_value=False, - emit_bare_values=False, io_type='buffer') + actual = generate_read_test_code(path, memory_profiling=False, single_value=False, + format_option=Format.Format.ION_TEXT.value, emit_bare_values=False, + io_type=Io_type.Io_type.FILE, binary=False) # make sure we generated the desired load function with open(path) as fp: - expect = simpleion.load(fp, single_value=False, parse_eagerly=True) + expect = simpleion.load(fp, single_value=False, emit_bare_values=False, parse_eagerly=True) + + # make sure the return values are same + assert actual() == expect + + +@parametrize( + generate_test_path('integers.ion') +) +def test_generate_json_read_test_code(path): + actual = generate_read_test_code(path, memory_profiling=False, single_value=False, + format_option=Format.Format.JSON.value, emit_bare_values=False, + io_type=Io_type.Io_type.FILE, binary=False) + + # make sure we generated the desired load function + with open(path) as fp: + expect = json.load(fp) + + # make sure the return values are same + assert actual() == expect + + +@parametrize( + generate_test_path('integers.ion') +) +def test_generate_cbor_read_test_code(path): + actual = generate_read_test_code(path, memory_profiling=False, single_value=False, + format_option=Format.Format.CBOR2.value, emit_bare_values=False, + io_type=Io_type.Io_type.FILE, binary=False) + + # make sure we generated the desired load function + with open(path) as fp: + expect = cbor2.load(fp) # make sure the return values are same assert actual() == expect @@ -62,7 +98,8 @@ def test_generate_simpleion_read_test_code(path): )) ) def test_generate_simpleion_write_test_code(obj): - actual = generate_simpleion_write_test_code(obj, memory_profiling=False, binary=False, io_type='buffer') + actual = generate_write_test_code(obj, format_option=Format.Format.ION_TEXT.value, memory_profiling=False, + binary=False, io_type=Io_type.Io_type.BUFFER.value) # make sure we generated the desired dumps function expect = simpleion.dumps(obj, binary=False) @@ -71,6 +108,38 @@ def test_generate_simpleion_write_test_code(obj): assert actual() == expect +@parametrize( + generate_test_path('./json/object.json'), +) +def test_generate_json_write_test_code(file): + with open(file) as fp: + obj = json.load(fp) + actual = generate_write_test_code(obj, format_option=Format.Format.JSON.value, memory_profiling=False, binary=False, + io_type=Io_type.Io_type.BUFFER.value) + + # make sure we generated the desired dumps function + expect = json.dumps(obj) + + # make sure the return values are same + assert actual() == expect + + +@parametrize( + generate_test_path('./cbor/sample') +) +def test_generate_cbor_write_test_code(file): + with open(file, 'br') as fp: + obj = cbor2.load(fp) + actual = generate_write_test_code(obj, format_option=Format.Format.CBOR2.value, memory_profiling=False, + binary=False, io_type=Io_type.Io_type.BUFFER.value) + + # make sure we generated the desired dumps function + expect = cbor2.dumps(obj) + + # make sure the return values are same + assert actual() == expect + + def execution_with_command(c): return ion_python_benchmark_cli(docopt(doc, argv=c)) @@ -135,43 +204,53 @@ def gather_all_options_in_list(table): def test_read_multi_api(file=generate_test_path('integers.ion')): - table = execution_with_command(['read', file, '--api', 'simple_ion', '--api', 'event']) - assert gather_all_options_in_list(table) == sorted([('event', 'ion_binary', 'file'), ('simple_ion', 'ion_binary', 'file')]) + table = execution_with_command(['read', file, '--api', 'load_dump', '--api', 'streaming']) + assert gather_all_options_in_list(table) == sorted( + [('streaming', 'ion_binary', 'file'), ('load_dump', 'ion_binary', 'file')]) def test_write_multi_api(file=generate_test_path('integers.ion')): - table = execution_with_command(['write', file, '--api', 'simple_ion', '--api', 'event']) - assert gather_all_options_in_list(table) == sorted([('event', 'ion_binary', 'file'), ('simple_ion', 'ion_binary', 'file')]) + table = execution_with_command(['write', file, '--api', 'load_dump', '--api', 'streaming']) + assert gather_all_options_in_list(table) == sorted( + [('streaming', 'ion_binary', 'file'), ('load_dump', 'ion_binary', 'file')]) def test_read_multi_duplicated_api(file=generate_test_path('integers.ion')): - table = execution_with_command(['read', file, '--api', 'simple_ion', '--api', 'event', '--api', 'event']) - assert gather_all_options_in_list(table) == sorted([('event', 'ion_binary', 'file'), ('simple_ion', 'ion_binary', 'file')]) + table = execution_with_command(['read', file, '--api', 'load_dump', '--api', 'streaming', '--api', 'streaming']) + assert gather_all_options_in_list(table) == sorted( + [('streaming', 'ion_binary', 'file'), ('load_dump', 'ion_binary', 'file')]) def test_write_multi_duplicated_api(file=generate_test_path('integers.ion')): - table = execution_with_command(['write', file, '--api', 'simple_ion', '--api', 'event', '--api', 'event']) - assert gather_all_options_in_list(table) == sorted([('event', 'ion_binary', 'file'), ('simple_ion', 'ion_binary', 'file')]) + table = execution_with_command(['write', file, '--api', 'load_dump', '--api', 'streaming', '--api', 'streaming']) + assert gather_all_options_in_list(table) == sorted( + [('streaming', 'ion_binary', 'file'), ('load_dump', 'ion_binary', 'file')]) def test_read_multi_format(file=generate_test_path('integers.ion')): table = execution_with_command(['read', file, '--format', 'ion_text', '--format', 'ion_binary']) - assert gather_all_options_in_list(table) == sorted([('simple_ion', 'ion_binary', 'file'), ('simple_ion', 'ion_text', 'file')]) + assert gather_all_options_in_list(table) == sorted( + [('load_dump', 'ion_binary', 'file'), ('load_dump', 'ion_text', 'file')]) def test_write_multi_format(file=generate_test_path('integers.ion')): table = execution_with_command(['write', file, '--format', 'ion_text', '--format', 'ion_binary']) - assert gather_all_options_in_list(table) == sorted([('simple_ion', 'ion_text', 'file'), ('simple_ion', 'ion_binary', 'file')]) + assert gather_all_options_in_list(table) == sorted( + [('load_dump', 'ion_text', 'file'), ('load_dump', 'ion_binary', 'file')]) def test_read_multi_duplicated_format(file=generate_test_path('integers.ion')): - table = execution_with_command(['read', file, '--format', 'ion_text', '--format', 'ion_binary', '--format', 'ion_text']) - assert gather_all_options_in_list(table) == sorted([('simple_ion', 'ion_text', 'file'), ('simple_ion', 'ion_binary', 'file')]) + table = execution_with_command( + ['read', file, '--format', 'ion_text', '--format', 'ion_binary', '--format', 'ion_text']) + assert gather_all_options_in_list(table) == sorted( + [('load_dump', 'ion_text', 'file'), ('load_dump', 'ion_binary', 'file')]) def test_write_multi_duplicated_format(file=generate_test_path('integers.ion')): - table = execution_with_command(['write', file, '--format', 'ion_text', '--format', 'ion_binary', '--format', 'ion_text',]) - assert gather_all_options_in_list(table) == sorted([('simple_ion', 'ion_text', 'file'), ('simple_ion', 'ion_binary', 'file')]) + table = execution_with_command( + ['write', file, '--format', 'ion_text', '--format', 'ion_binary', '--format', 'ion_text', ]) + assert gather_all_options_in_list(table) == sorted( + [('load_dump', 'ion_text', 'file'), ('load_dump', 'ion_binary', 'file')]) @parametrize( @@ -179,7 +258,7 @@ def test_write_multi_duplicated_format(file=generate_test_path('integers.ion')): ) def test_write_json_format(f): table = execution_with_command(['write', generate_test_path('integers.ion'), '--format', f'{f}']) - assert gather_all_options_in_list(table) == sorted([('simple_ion', f'{f}', 'file')]) + assert gather_all_options_in_list(table) == sorted([('load_dump', f'{f}', 'file')]) @parametrize( @@ -187,7 +266,7 @@ def test_write_json_format(f): ) def test_read_json_format(f): table = execution_with_command(['read', generate_test_path('integers.ion'), '--format', f'{f}']) - assert gather_all_options_in_list(table) == sorted([('simple_ion', f'{f}', 'file')]) + assert gather_all_options_in_list(table) == sorted([('load_dump', f'{f}', 'file')]) @parametrize( @@ -195,7 +274,7 @@ def test_read_json_format(f): ) def test_write_cbor_format(f): table = execution_with_command(['write', generate_test_path('integers.ion'), '--format', f'{f}']) - assert gather_all_options_in_list(table) == sorted([('simple_ion', f'{f}', 'file')]) + assert gather_all_options_in_list(table) == sorted([('load_dump', f'{f}', 'file')]) @parametrize( @@ -203,20 +282,49 @@ def test_write_cbor_format(f): ) def test_read_cbor_format(f): table = execution_with_command(['read', generate_test_path('integers.ion'), '--format', f'{f}']) - assert gather_all_options_in_list(table) == sorted([('simple_ion', f'{f}', 'file')]) + assert gather_all_options_in_list(table) == sorted([('load_dump', f'{f}', 'file')]) @parametrize( *tuple((io.value for io in Io_type.Io_type)) ) def test_write_io_type(f): - table = execution_with_command(['write', generate_test_path('integers.ion'), '--io-type', f'{f}', '--format', 'json']) - assert gather_all_options_in_list(table) == sorted([('simple_ion', 'json', f'{f}')]) + table = execution_with_command( + ['write', generate_test_path('integers.ion'), '--io-type', f'{f}', '--format', 'json']) + assert gather_all_options_in_list(table) == sorted([('load_dump', 'json', f'{f}')]) @parametrize( *tuple((io.value for io in Io_type.Io_type)) ) def test_read_io_type(f): - table = execution_with_command(['read', generate_test_path('integers.ion'), '--io-type', f'{f}', '--format', 'json', '--format', 'ion_binary']) - assert gather_all_options_in_list(table) == sorted([('simple_ion', 'json', f'{f}'), ('simple_ion', 'ion_binary', f'{f}')]) + table = execution_with_command( + ['read', generate_test_path('integers.ion'), '--io-type', f'{f}', '--format', 'json', '--format', 'ion_binary']) + assert gather_all_options_in_list(table) == sorted( + [('load_dump', 'json', f'{f}'), ('load_dump', 'ion_binary', f'{f}')]) + + +@parametrize( + *tuple((Format.Format.ION_TEXT, Format.Format.ION_BINARY)) +) +def test_format_is_ion(f): + assert format_is_ion(f.value) is True + + +@parametrize( + *tuple((Format.Format.JSON, + Format.Format.UJSON, + Format.Format.RAPIDJSON, + Format.Format.SIMPLEJSON + )) +) +def test_format_is_json(f): + assert format_is_json(f.value) is True + + +@parametrize( + Format.Format.CBOR, + Format.Format.CBOR2 +) +def test_format_is_cbor(f): + assert format_is_cbor(f.value) is True From 2cbbdd898277f52557fa0dd1f6211cdb239d64bc Mon Sep 17 00:00:00 2001 From: Eric Chen Date: Tue, 28 Mar 2023 12:44:19 -0700 Subject: [PATCH 6/6] Addressed feedback, passed all CI/CD, and deprecated orjson for now. --- amazon/ionbenchmark/Format.py | 7 +- amazon/ionbenchmark/ion_benchmark_cli.py | 77 +++++--------------- tests/benchmark_sample_data/json/object.json | 1 + tests/test_benchmark_cli.py | 16 ++-- 4 files changed, 31 insertions(+), 70 deletions(-) create mode 100644 tests/benchmark_sample_data/json/object.json diff --git a/amazon/ionbenchmark/Format.py b/amazon/ionbenchmark/Format.py index 8f93bfcc1..c25d06b39 100644 --- a/amazon/ionbenchmark/Format.py +++ b/amazon/ionbenchmark/Format.py @@ -7,8 +7,7 @@ def format_is_ion(format_option): def format_is_json(format_option): return (format_option == Format.JSON.value) or (format_option == Format.SIMPLEJSON.value) \ - or (format_option == Format.UJSON.value) or (format_option == Format.RAPIDJSON.value) or \ - (format_option == Format.ORJSON.value) + or (format_option == Format.UJSON.value) or (format_option == Format.RAPIDJSON.value) def format_is_cbor(format_option): @@ -16,8 +15,7 @@ def format_is_cbor(format_option): def format_is_binary(format_option): - return format_is_cbor(format_option) or (format_option == Format.ION_BINARY.value) \ - or (format_option == Format.ORJSON.value) + return format_is_cbor(format_option) or (format_option == Format.ION_BINARY.value) def rewrite_file_to_format(file, format_option): @@ -32,7 +30,6 @@ class Format(Enum): SIMPLEJSON = 'simplejson' UJSON = 'ujson' RAPIDJSON = 'rapidjson' - ORJSON = 'orjson' CBOR = 'cbor' CBOR2 = 'cbor2' DEFAULT = 'ion_binary' diff --git a/amazon/ionbenchmark/ion_benchmark_cli.py b/amazon/ionbenchmark/ion_benchmark_cli.py index 22120c227..36016644d 100644 --- a/amazon/ionbenchmark/ion_benchmark_cli.py +++ b/amazon/ionbenchmark/ion_benchmark_cli.py @@ -47,7 +47,7 @@ [default: True] -f --format Format to benchmark, from the set (ion_binary | ion_text | json | simplejson | - ujson | rapidjson | orjson | cbor | cbor2). May be specified multiple times to + ujson | rapidjson | cbor | cbor2). May be specified multiple times to compare different formats. [default: ion_binary] -p --profile (NOT SUPPORTED YET) Initiates a single iteration that repeats indefinitely until @@ -94,9 +94,6 @@ pypy = platform.python_implementation() == 'PyPy' if not pypy: import tracemalloc - import orjson -else: - import json as orjson BYTES_TO_MB = 1024 * 1024 _IVM = b'\xE0\x01\x00\xEA' @@ -112,7 +109,6 @@ # Generates benchmark code for json/cbor/Ion load/loads APIs def generate_read_test_code(file, memory_profiling, format_option, binary, io_type, iterator=False, single_value=False, emit_bare_values=False): - # if format_option == Format.ION_TEXT.value or format_option == Format.ION_BINARY.value: if format_is_ion(format_option): if io_type == Io_type.BUFFER.value: with open(file, "br") as fp: @@ -207,9 +203,6 @@ def test_func(): benchmark_api = ujson.loads if io_type == Io_type.BUFFER.value else ujson.load elif format_option == Format.RAPIDJSON.value: benchmark_api = rapidjson.loads if io_type == Io_type.BUFFER.value else rapidjson.load - elif format_option == Format.ORJSON.value: - # orjson doesn't provide load API, so use loads for both file and buffer io_types. - benchmark_api = orjson.loads elif format_option == Format.CBOR.value: benchmark_api = cbor.loads if io_type == Io_type.BUFFER.value else cbor.load elif format_option == Format.CBOR2.value: @@ -233,21 +226,6 @@ def test_func(): read_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB tracemalloc.stop() return data - elif format_option == Format.ORJSON.value: - if not memory_profiling: - def test_func(): - with open(file, 'br') as benchmark_file: - data = benchmark_api(benchmark_file.read()) - return data - else: - def test_func(): - tracemalloc.start() - with open(file, 'br') as benchmark_file: - data = benchmark_api(benchmark_file.read()) - global read_memory_usage_peak - read_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB - tracemalloc.stop() - return data else: if not memory_profiling: def test_func(): @@ -273,17 +251,21 @@ def generate_event_test_code(file): # Generates setup code for json/cbor benchmark code -def generate_setup(format_option, c_extension=False, gc=False): +def generate_setup(format_option, c_extension=False, gc=False, memory_profiling=False): if format_is_ion(format_option): - rtn = f'import amazon.ion.simpleion as ion;from amazon.ion.simple_types import IonPySymbol; ion.c_ext = ' \ - f'{c_extension}; import tracemalloc' + rtn = f'import gc; import amazon.ion.simpleion as ion;from amazon.ion.simple_types import IonPySymbol; ' \ + f'ion.c_ext = {c_extension}' else: - rtn = 'import tracemalloc' + rtn = 'import gc' if gc: - rtn += '; import gc; gc.enable()' + rtn += '; gc.enable()' else: - rtn += '; import gc; gc.disable()' + rtn += '; gc.disable()' + + if memory_profiling: + rtn += '; import tracemalloc' + return rtn @@ -297,7 +279,7 @@ def read_micro_benchmark(iterations, warmups, c_extension, file, memory_profilin iterator=False): file_size = Path(file).stat().st_size / BYTES_TO_MB - setup_with_gc = generate_setup(format_option=format_option, gc=False) + setup_with_gc = generate_setup(format_option=format_option, gc=False, memory_profiling=memory_profiling) test_code = generate_read_test_code(file, memory_profiling=memory_profiling, format_option=format_option, io_type=io_type, binary=binary) @@ -316,7 +298,8 @@ def read_micro_benchmark_simpleion(iterations, warmups, c_extension, file, memor io_type, iterator=False): file_size = Path(file).stat().st_size / BYTES_TO_MB - setup_with_gc = generate_setup(format_option=format_option, c_extension=c_extension, gc=False) + setup_with_gc = generate_setup(format_option=format_option, c_extension=c_extension, gc=False, + memory_profiling=memory_profiling) test_code = generate_read_test_code(file, format_option=format_option, emit_bare_values=False, memory_profiling=memory_profiling, iterator=iterator, io_type=io_type, @@ -383,7 +366,8 @@ def write_micro_benchmark_simpleion(iterations, warmups, c_extension, file, bina obj = ion.load(fp, parse_eagerly=True, single_value=False) # GC refers to reference cycles, not reference count - setup_with_gc = generate_setup(format_option=format_option, gc=False, c_extension=c_extension) + setup_with_gc = generate_setup(format_option=format_option, gc=False, c_extension=c_extension, + memory_profiling=memory_profiling) test_func = generate_write_test_code(obj, memory_profiling=memory_profiling, binary=binary, io_type=io_type, format_option=format_option) @@ -400,9 +384,9 @@ def write_micro_benchmark_simpleion(iterations, warmups, c_extension, file, bina # Benchmarks JSON/CBOR dump/dumps APIs def write_micro_benchmark(iterations, warmups, c_extension, file, binary, memory_profiling, format_option, io_type): file_size = Path(file).stat().st_size / BYTES_TO_MB - obj = generate_json_and_cbor_obj_for_write(file, format_option) + obj = generate_json_and_cbor_obj_for_write(file, format_option, binary=binary) # GC refers to reference cycles, not reference count - setup_with_gc = generate_setup(format_option=format_option, gc=False) + setup_with_gc = generate_setup(format_option=format_option, gc=False, memory_profiling=memory_profiling) test_func = generate_write_test_code(obj, memory_profiling=memory_profiling, format_option=format_option, io_type=io_type, binary=binary) @@ -455,8 +439,6 @@ def test_func(): benchmark_api = ujson.dumps if io_type == Io_type.BUFFER.value else ujson.dump elif format_option == Format.RAPIDJSON.value: benchmark_api = rapidjson.dumps if io_type == Io_type.BUFFER.value else rapidjson.dump - elif format_option == Format.ORJSON.value: - benchmark_api = orjson.dumps elif format_option == Format.CBOR.value: benchmark_api = cbor.dumps if io_type == Io_type.BUFFER.value else cbor.dump elif format_option == Format.CBOR2.value: @@ -476,23 +458,6 @@ def test_func(): write_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB tracemalloc.stop() - return data - elif format_option == Format.ORJSON.value: - if not memory_profiling: - def test_func(): - with open(output_file, 'bw' if binary else 'w') as fp: - data = benchmark_api(obj) - fp.write(data) - else: - def test_func(): - tracemalloc.start() - with open(output_file, 'bw' if binary else 'w') as fp: - data = benchmark_api(obj) - fp.write(data) - global write_memory_usage_peak - write_memory_usage_peak = tracemalloc.get_traced_memory()[1] / BYTES_TO_MB - tracemalloc.stop() - return data else: if not memory_profiling: @@ -590,8 +555,8 @@ def reset_for_each_execution(each_option): return api, format_option, io_type -def generate_json_and_cbor_obj_for_write(file, format_option): - with open(file) as fp: +def generate_json_and_cbor_obj_for_write(file, format_option, binary): + with open(file, 'br' if binary else 'r') as fp: if format_option == Format.JSON.value: return json.load(fp) elif format_option == Format.SIMPLEJSON.value: @@ -600,8 +565,6 @@ def generate_json_and_cbor_obj_for_write(file, format_option): return ujson.load(fp) elif format_option == Format.RAPIDJSON.value: return rapidjson.load(fp) - elif format_option == Format.ORJSON.value: - return orjson.loads(fp.read()) elif format_option == Format.CBOR.value: return cbor.load(fp) elif format_option == Format.CBOR2.value: diff --git a/tests/benchmark_sample_data/json/object.json b/tests/benchmark_sample_data/json/object.json new file mode 100644 index 000000000..978d70e54 --- /dev/null +++ b/tests/benchmark_sample_data/json/object.json @@ -0,0 +1 @@ +{"name":"John", "age":30, "car":null} \ No newline at end of file diff --git a/tests/test_benchmark_cli.py b/tests/test_benchmark_cli.py index 4a9e237a1..970f516d6 100644 --- a/tests/test_benchmark_cli.py +++ b/tests/test_benchmark_cli.py @@ -82,10 +82,10 @@ def test_generate_json_read_test_code(path): def test_generate_cbor_read_test_code(path): actual = generate_read_test_code(path, memory_profiling=False, single_value=False, format_option=Format.Format.CBOR2.value, emit_bare_values=False, - io_type=Io_type.Io_type.FILE, binary=False) + io_type=Io_type.Io_type.FILE, binary=True) # make sure we generated the desired load function - with open(path) as fp: + with open(path, 'br') as fp: expect = cbor2.load(fp) # make sure the return values are same @@ -109,7 +109,7 @@ def test_generate_simpleion_write_test_code(obj): @parametrize( - generate_test_path('./json/object.json'), + generate_test_path('json/object.json'), ) def test_generate_json_write_test_code(file): with open(file) as fp: @@ -125,7 +125,7 @@ def test_generate_json_write_test_code(file): @parametrize( - generate_test_path('./cbor/sample') + generate_test_path('cbor/sample') ) def test_generate_cbor_write_test_code(file): with open(file, 'br') as fp: @@ -257,7 +257,7 @@ def test_write_multi_duplicated_format(file=generate_test_path('integers.ion')): *tuple((f.value for f in Format.Format if Format.format_is_json(f.value))) ) def test_write_json_format(f): - table = execution_with_command(['write', generate_test_path('integers.ion'), '--format', f'{f}']) + table = execution_with_command(['write', generate_test_path('json/object.json'), '--format', f'{f}']) assert gather_all_options_in_list(table) == sorted([('load_dump', f'{f}', 'file')]) @@ -265,7 +265,7 @@ def test_write_json_format(f): *tuple((f.value for f in Format.Format if Format.format_is_json(f.value))) ) def test_read_json_format(f): - table = execution_with_command(['read', generate_test_path('integers.ion'), '--format', f'{f}']) + table = execution_with_command(['read', generate_test_path('json/object.json'), '--format', f'{f}']) assert gather_all_options_in_list(table) == sorted([('load_dump', f'{f}', 'file')]) @@ -273,7 +273,7 @@ def test_read_json_format(f): *tuple((f.value for f in Format.Format if Format.format_is_cbor(f.value))) ) def test_write_cbor_format(f): - table = execution_with_command(['write', generate_test_path('integers.ion'), '--format', f'{f}']) + table = execution_with_command(['write', generate_test_path('cbor/sample'), '--format', f'{f}']) assert gather_all_options_in_list(table) == sorted([('load_dump', f'{f}', 'file')]) @@ -281,7 +281,7 @@ def test_write_cbor_format(f): *tuple((f.value for f in Format.Format if Format.format_is_cbor(f.value))) ) def test_read_cbor_format(f): - table = execution_with_command(['read', generate_test_path('integers.ion'), '--format', f'{f}']) + table = execution_with_command(['read', generate_test_path('cbor/sample'), '--format', f'{f}']) assert gather_all_options_in_list(table) == sorted([('load_dump', f'{f}', 'file')])