From 574f3818e1a23dca6fd626a1a11cb672f7ac2a11 Mon Sep 17 00:00:00 2001 From: Alon Zakai Date: Wed, 12 Aug 2020 09:50:51 -0700 Subject: [PATCH] Remove fastcomp-only ELIMINATE_DUPLICATE_FUNCTIONS* options (#11876) This has been a no-op on wasm, and this PR keeps it that way (we do duplicate function elimination in binaryen automatically). See #11860 --- emcc.py | 9 - .../docs/optimizing/Optimizing-Code.rst | 1 - src/settings.js | 16 +- tests/fuzz/25.c | 5 - tests/test_core.py | 28 - tests/test_other.py | 127 +--- tools/building.py | 5 - tools/duplicate_function_eliminator.py | 413 ------------- tools/eliminate-duplicate-functions.js | 547 ------------------ 9 files changed, 4 insertions(+), 1147 deletions(-) delete mode 100644 tools/duplicate_function_eliminator.py delete mode 100644 tools/eliminate-duplicate-functions.js diff --git a/emcc.py b/emcc.py index 192abecd3ec7c..2f780c86535f0 100755 --- a/emcc.py +++ b/emcc.py @@ -1869,9 +1869,6 @@ def include_and_export(name): shared.Settings.WASM_BINARY_FILE = shared.JS.escape_for_js_string(os.path.basename(wasm_binary_target)) shared.Settings.ASMJS_CODE_FILE = shared.JS.escape_for_js_string(os.path.basename(asm_target)) shared.Settings.ASM_JS = 2 # when targeting wasm, we use a wasm Memory, but that is not compatible with asm.js opts - if shared.Settings.ELIMINATE_DUPLICATE_FUNCTIONS: - diagnostics.warning('emcc', 'for wasm there is no need to set ELIMINATE_DUPLICATE_FUNCTIONS, the binaryen optimizer does it automatically') - shared.Settings.ELIMINATE_DUPLICATE_FUNCTIONS = 0 if options.js_opts and not options.force_js_opts: options.js_opts = None logger.debug('asm.js opts not forced by user or an option that depends them, and we do not intend to run the asm.js, so disabling and leaving opts to the binaryen optimizer') @@ -2695,12 +2692,6 @@ def get_eliminate(): else: optimizer.queue += ['registerize'] - # NOTE: Important that this comes after registerize/registerizeHarder - if shared.Settings.ELIMINATE_DUPLICATE_FUNCTIONS and shared.Settings.OPT_LEVEL >= 2: - optimizer.flush() - building.eliminate_duplicate_funcs(final) - save_intermediate('dfe') - if shared.Settings.EVAL_CTORS and options.memory_init_file and not use_source_map(options) and not shared.Settings.WASM: optimizer.flush() building.eval_ctors(final, memfile) diff --git a/site/source/docs/optimizing/Optimizing-Code.rst b/site/source/docs/optimizing/Optimizing-Code.rst index 52cbb76eb6368..6c442f456b72b 100644 --- a/site/source/docs/optimizing/Optimizing-Code.rst +++ b/site/source/docs/optimizing/Optimizing-Code.rst @@ -86,7 +86,6 @@ The following compiler settings can help (see ``src/settings.js`` for more detai - Disable inlining when possible, using ``-s INLINING_LIMIT=1``. Compiling with -Os or -Oz generally avoids inlining too. (Inlining can make code faster, though, so use this carefully.) - You can use the ``-s FILESYSTEM=0`` option to disable bundling of filesystem support code (the compiler should optimize it out if not used, but may not always succeed). This can be useful if you are building a pure computational library, for example. - The ``ENVIRONMENT`` flag lets you specify that the output will only run on the web, or only run in node.js, etc. This prevents the compiler from emitting code to support all possible runtime environments, saving ~2KB. -- You can use ``ELIMINATE_DUPLICATE_FUNCTIONS`` to remove duplicate functions, which C++ templates often create. (This is already done by default for wasm, in ``-O1`` and above.) LTO === diff --git a/src/settings.js b/src/settings.js index dec5125f04e76..521da17f01d4b 100644 --- a/src/settings.js +++ b/src/settings.js @@ -1412,19 +1412,6 @@ var PTHREADS_DEBUG = 0; // If true, building against Emscripten's asm.js/wasm heap memory profiler. var MEMORYPROFILER = 0; -// Duplicate function elimination. This coalesces function bodies that are -// identical, which can happen e.g. if two methods have different C/C++ or LLVM -// types, but end up identical at the asm.js level (all pointers are the same as -// int32_t in asm.js, for example). -// -// This option is quite slow to run, as it processes and hashes all methods in -// the codebase in multiple passes. -// -// [fastcomp-only] -var ELIMINATE_DUPLICATE_FUNCTIONS = 0; // disabled by default -var ELIMINATE_DUPLICATE_FUNCTIONS_DUMP_EQUIVALENT_FUNCTIONS = 0; -var ELIMINATE_DUPLICATE_FUNCTIONS_PASSES = 5; - // This tries to evaluate global ctors at compile-time, applying their effects // into the mem init file. This saves running code during startup, and also // allows removing the global ctor functions and other code that only they used, @@ -1766,6 +1753,9 @@ var LEGACY_SETTINGS = [ ['SKIP_STACK_IN_SMALL', [0, 1], 'SKIP_STACK_IN_SMALL is no longer needed as the backend can optimize it directly'], ['SAFE_STACK', [0], 'Replace SAFE_STACK=1 with STACK_OVERFLOW_CHECK=2'], ['MEMORY_GROWTH_STEP', 'MEMORY_GROWTH_LINEAR_STEP'], + ['ELIMINATE_DUPLICATE_FUNCTIONS', [0, 1], 'Duplicate function elimination for wasm is handled automatically by binaryen'], + ['ELIMINATE_DUPLICATE_FUNCTIONS_DUMP_EQUIVALENT_FUNCTIONS', [0], 'Duplicate function elimination for wasm is handled automatically by binaryen'], + ['ELIMINATE_DUPLICATE_FUNCTIONS_PASSES', [5], 'Duplicate function elimination for wasm is handled automatically by binaryen'], // WASM_OBJECT_FILES is handled in emcc.py, supporting both 0 and 1 for now. ['WASM_OBJECT_FILES', [0, 1], 'For LTO, use -flto or -fto=thin instead; to disable LTO, just do not pass WASM_OBJECT_FILES=1 as 1 is the default anyhow'], ['TOTAL_MEMORY', 'INITIAL_MEMORY'], diff --git a/tests/fuzz/25.c b/tests/fuzz/25.c index 07823c70b8505..df74b0b5ab9db 100644 --- a/tests/fuzz/25.c +++ b/tests/fuzz/25.c @@ -1785,8 +1785,3 @@ XXX max block depth: 5 XXX percentage a fresh-made variable is used: 17.8 XXX percentage an existing variable is used: 82.2 ********************* end of statistics **********************/ - - -// /usr/bin/python /Users/achoudhury/Code/emscripten/emscripten/emcc -Oz --llvm-opts 1 /Users/achoudhury/Code/emscripten/emscripten/tests/fuzz/temp_fuzzcode28225_.cpp -o /Users/achoudhury/Code/emscripten/emscripten/tests/fuzz/fuzz.cpp -I /usr/local/Cellar/csmith/2.2.0/include/csmith-2.2.0/runtime -s ELIMINATE_DUPLICATE_FUNCTIONS=1 --emit-symbol-map -w -s MAIN_MODULE=1 -s EMTERPRETIFY=1 -s EMTERPRETIFY_WHITELIST=["_main"] - - diff --git a/tests/test_core.py b/tests/test_core.py index bf2b01318ec18..4744b435c940b 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -264,18 +264,6 @@ class TestCoreBase(RunnerCore): def is_wasm2js(self): return self.is_wasm_backend() and not self.get_setting('WASM') - # whether the test mode supports duplicate function elimination in js - def supports_js_dfe(self): - # wasm does this when optimizing anyhow, and the wasm backend always - # optimizes the wasm even if it does wasm2js later - if self.is_wasm() or self.is_wasm_backend(): - return False - supported_opt_levels = ['-O2', '-O3', '-Oz', '-Os'] - for opt_level in supported_opt_levels: - if opt_level in self.emcc_args: - return True - return False - # Use closure in some tests for some additional coverage def maybe_closure(self): if '-g' not in self.emcc_args and ('-O2' in self.emcc_args or '-Os' in self.emcc_args): @@ -6258,14 +6246,6 @@ def test(): test() - if self.supports_js_dfe(): - print("Testing poppler with ELIMINATE_DUPLICATE_FUNCTIONS set to 1", file=sys.stderr) - num_original_funcs = self.count_funcs('src.cpp.o.js') - self.set_setting('ELIMINATE_DUPLICATE_FUNCTIONS', 1) - test() - # Make sure that DFE ends up eliminating more than 200 functions (if we can view source) - assert (num_original_funcs - self.count_funcs('src.cpp.o.js')) > 200 - @needs_make('make') @is_slow_test def test_openjpeg(self): @@ -8290,14 +8270,6 @@ def test(assert_returncode=0): print('ENVIRONMENT =', self.get_setting('ENVIRONMENT')) test() - def test_dfe(self): - if not self.supports_js_dfe(): - self.skipTest('dfe-only') - self.set_setting('ELIMINATE_DUPLICATE_FUNCTIONS', 1) - self.do_run_in_out_file_test('tests', 'core', 'test_hello_world') - self.emcc_args += ['-g2'] # test for issue #6331 - self.do_run_in_out_file_test('tests', 'core', 'test_hello_world') - def test_postrun_exception(self): # verify that an exception thrown in postRun() will not trigger the # compilation failed handler, and will be printed to stderr. diff --git a/tests/test_other.py b/tests/test_other.py index 49640d2ea5df7..aec6150f107ad 100644 --- a/tests/test_other.py +++ b/tests/test_other.py @@ -43,7 +43,6 @@ import tools.line_endings import tools.js_optimizer import tools.tempfiles -import tools.duplicate_function_eliminator scons_path = shared.which('scons') emmake = shared.bat_suffix(path_from_root('emmake')) @@ -6999,130 +6998,6 @@ def test_warn_unexported_main(self): proc = self.run_process([EMCC, path_from_root('tests', 'hello_world.c'), '-s', 'EXPORTED_FUNCTIONS=[]'], stderr=PIPE) self.assertContained(WARNING, proc.stderr) - ############################################################ - # Function eliminator tests - ############################################################ - def normalize_line_endings(self, input): - return input.replace('\r\n', '\n').replace('\n\n', '\n').replace('\n\n', '\n') - - def get_file_contents(self, file): - file_contents = "" - with open(file) as fout: - file_contents = "".join(fout.readlines()) - - file_contents = self.normalize_line_endings(file_contents) - - return file_contents - - def function_eliminator_test_helper(self, input_file, expected_output_file, use_hash_info=False): - input_file = path_from_root('tests', 'optimizer', input_file) - expected_output_file = path_from_root('tests', 'optimizer', expected_output_file) - command = [path_from_root('tools', 'eliminate-duplicate-functions.js'), input_file, '--no-minimize-whitespace', '--use-asm-ast'] - - if use_hash_info: - command.append('--use-hash-info') - - proc = self.run_process(NODE_JS + command, stdin=PIPE, stderr=PIPE, stdout=PIPE) - assert proc.stderr == '', proc.stderr - expected_output = self.get_file_contents(expected_output_file) - output = self.normalize_line_endings(proc.stdout) - - self.assertIdentical(expected_output, output) - - def test_function_eliminator_simple(self): - self.function_eliminator_test_helper('test-function-eliminator-simple.js', - 'test-function-eliminator-simple-output.js') - - def test_function_eliminator_replace_function_call(self): - self.function_eliminator_test_helper('test-function-eliminator-replace-function-call.js', - 'test-function-eliminator-replace-function-call-output.js') - - def test_function_eliminator_replace_function_call_two_passes(self): - self.function_eliminator_test_helper('test-function-eliminator-replace-function-call-output.js', - 'test-function-eliminator-replace-function-call-two-passes-output.js') - - def test_function_eliminator_replace_array_value(self): - output_file = 'output.js' - - try: - shared.safe_copy(path_from_root('tests', 'optimizer', 'test-function-eliminator-replace-array-value.js'), output_file) - - tools.duplicate_function_eliminator.run(output_file) - - output_file_contents = self.get_file_contents(output_file) - - expected_file_contents = self.get_file_contents(path_from_root('tests', 'optimizer', 'test-function-eliminator-replace-array-value-output.js')) - - self.assertIdentical(expected_file_contents, output_file_contents) - finally: - tools.tempfiles.try_delete(output_file) - - def test_function_eliminator_replace_object_value_assignment(self): - self.function_eliminator_test_helper('test-function-eliminator-replace-object-value-assignment.js', - 'test-function-eliminator-replace-object-value-assignment-output.js') - - def test_function_eliminator_variable_clash(self): - self.function_eliminator_test_helper('test-function-eliminator-variable-clash.js', - 'test-function-eliminator-variable-clash-output.js') - - def test_function_eliminator_replace_variable_value(self): - self.function_eliminator_test_helper('test-function-eliminator-replace-variable-value.js', - 'test-function-eliminator-replace-variable-value-output.js') - - @no_wasm_backend('tests native asm.js optimizer, which is never build for wasm backend') - def test_function_eliminator_double_parsed_correctly(self): - # This is a test that makes sure that when we perform final optimization on - # the JS file, doubles are preserved (and not converted to ints). - output_file = 'output.js' - - try: - shared.safe_copy(path_from_root('tests', 'optimizer', 'test-function-eliminator-double-parsed-correctly.js'), output_file) - - # Run duplicate function elimination - tools.duplicate_function_eliminator.run(output_file) - - # Run last opts - shutil.move(tools.js_optimizer.run(output_file, ['last', 'asm']), output_file) - output_file_contents = self.get_file_contents(output_file) - - # Compare - expected_file_contents = self.get_file_contents(path_from_root('tests', 'optimizer', 'test-function-eliminator-double-parsed-correctly-output.js')) - self.assertIdentical(expected_file_contents, output_file_contents) - finally: - tools.tempfiles.try_delete(output_file) - - # Now do the same, but using a pre-generated equivalent function hash info that - # comes in handy for parallel processing - def test_function_eliminator_simple_with_hash_info(self): - self.function_eliminator_test_helper('test-function-eliminator-simple-with-hash-info.js', - 'test-function-eliminator-simple-output.js', - use_hash_info=True) - - def test_function_eliminator_replace_function_call_with_hash_info(self): - self.function_eliminator_test_helper('test-function-eliminator-replace-function-call-with-hash-info.js', - 'test-function-eliminator-replace-function-call-output.js', - use_hash_info=True) - - def test_function_eliminator_replace_function_call_two_passes_with_hash_info(self): - self.function_eliminator_test_helper('test-function-eliminator-replace-function-call-output-with-hash-info.js', - 'test-function-eliminator-replace-function-call-two-passes-output.js', - use_hash_info=True) - - def test_function_eliminator_replace_object_value_assignment_with_hash_info(self): - self.function_eliminator_test_helper('test-function-eliminator-replace-object-value-assignment-with-hash-info.js', - 'test-function-eliminator-replace-object-value-assignment-output.js', - use_hash_info=True) - - def test_function_eliminator_variable_clash_with_hash_info(self): - self.function_eliminator_test_helper('test-function-eliminator-variable-clash-with-hash-info.js', - 'test-function-eliminator-variable-clash-output.js', - use_hash_info=True) - - def test_function_eliminator_replace_variable_value_with_hash_info(self): - self.function_eliminator_test_helper('test-function-eliminator-replace-variable-value-with-hash-info.js', - 'test-function-eliminator-replace-variable-value-output.js', - use_hash_info=True) - def test_source_file_with_fixed_language_mode(self): create_test_file('src_tmp_fixed_lang', ''' #include @@ -9035,7 +8910,7 @@ def test_minimal_runtime_code_size(self): '-DNDEBUG', '-ffast-math'] - asmjs = ['-s', 'WASM=0', '--separate-asm', '-s', 'ELIMINATE_DUPLICATE_FUNCTIONS=1', '--memory-init-file', '1'] + asmjs = ['-s', 'WASM=0', '--separate-asm', '-s', '--memory-init-file', '1'] wasm2js = ['-s', 'WASM=0', '--memory-init-file', '1'] hello_world_sources = [path_from_root('tests', 'small_hello_world.c'), diff --git a/tools/building.py b/tools/building.py index 86c2a31268a00..a1d3a205c7867 100644 --- a/tools/building.py +++ b/tools/building.py @@ -986,11 +986,6 @@ def eval_ctors(js_file, binary_file, binaryen_bin='', debug_info=False): check_call(cmd) -def eliminate_duplicate_funcs(filename): - from . import duplicate_function_eliminator - duplicate_function_eliminator.eliminate_duplicate_funcs(filename) - - def calculate_reachable_functions(infile, initial_list, can_reach=True): with ToolchainProfiler.profile_block('calculate_reachable_functions'): from . import asm_module diff --git a/tools/duplicate_function_eliminator.py b/tools/duplicate_function_eliminator.py deleted file mode 100644 index bc9f334301d2a..0000000000000 --- a/tools/duplicate_function_eliminator.py +++ /dev/null @@ -1,413 +0,0 @@ -# Copyright 2016 The Emscripten Authors. All rights reserved. -# Emscripten is available under two separate licenses, the MIT license and the -# University of Illinois/NCSA Open Source License. Both these licenses can be -# found in the LICENSE file. - -from __future__ import print_function -import os -import sys -import subprocess -import re -import json -import shutil -import tempfile -import logging -import traceback - -sys.path.insert(1, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -from tools import shared, building -from tools.js_optimizer import DEBUG, temp_files, start_funcs_marker, end_funcs_marker, split_funcs, start_asm_marker, end_asm_marker -from tools.js_optimizer import MIN_CHUNK_SIZE, MAX_CHUNK_SIZE, NUM_CHUNKS_PER_CORE - -DUPLICATE_FUNCTION_ELIMINATOR = shared.path_from_root('tools', 'eliminate-duplicate-functions.js') - - -def process_shell(js_engine, shell, equivalentfn_hash_info=None): - suffix = '.eliminatedupes' - - with temp_files.get_file(suffix + '.js') as temp_file: - with open(temp_file, 'w') as f: - f.write(shell) - f.write('\n') - - f.write(equivalentfn_hash_info) - - proc = shared.run_process( - js_engine + - [DUPLICATE_FUNCTION_ELIMINATOR, temp_file, '--use-hash-info', '--no-minimize-whitespace'], - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - assert len(proc.stdout) - assert len(proc.stderr) == 0 - - return proc.stdout - - -def run_on_chunk(command): - try: - file_suffix = '.js' - index = command.index(DUPLICATE_FUNCTION_ELIMINATOR) - filename = command[index + 1] - - if '--gen-hash-info' in command: - file_suffix = '.json' - - if os.environ.get('EMCC_SAVE_OPT_TEMP') and os.environ.get('EMCC_SAVE_OPT_TEMP') != '0': - saved = 'save_' + os.path.basename(filename) - while os.path.exists(saved): - saved = 'input' + str(int(saved.replace('input', '').replace('.txt', '')) + 1) + '.txt' - print('running DFE command', ' '.join([c if c != filename else saved for c in command]), file=sys.stderr) - shutil.copyfile(filename, os.path.join(shared.get_emscripten_temp_dir(), saved)) - - if shared.EM_BUILD_VERBOSE >= 3: - print('run_on_chunk: ' + str(command), file=sys.stderr) - - proc = shared.run_process(command, stdout=subprocess.PIPE) - output = proc.stdout - assert proc.returncode == 0, 'Error in optimizer (return code ' + str(proc.returncode) + '): ' + output - assert len(output) and not output.startswith('Assertion failed'), 'Error in optimizer: ' + output - filename = temp_files.get(os.path.basename(filename) + '.dfjo' + file_suffix).name - - with open(filename, 'w') as f: - f.write(output) - if DEBUG and not shared.WINDOWS: - print('.', file=sys.stderr) # Skip debug progress indicator on Windows, since it doesn't buffer well with multiple threads printing to console. - return filename - except KeyboardInterrupt: - # avoid throwing keyboard interrupts from a child process - raise Exception() - except (TypeError, ValueError): - formatted_lines = traceback.format_exc().splitlines() - - print(">>>>>>>>>>>>>>>>>", file=sys.stderr) - for formatted_line in formatted_lines: - print(formatted_line, file=sys.stderr) - print("<<<<<<<<<<<<<<<<<", file=sys.stderr) - - raise - - -def dump_equivalent_functions(passed_in_filename, global_data): - # Represents the sets of equivalent functions for the passed in filename - equivalent_fn_info = {} - equivalent_fn_json_file = passed_in_filename + ".equivalent_functions.json" - - # If we are running more than one pass, then we want to merge - # all the hash infos into one - if os.path.isfile(equivalent_fn_json_file): - print("Merging data from current pass for {} into {}".format(passed_in_filename, equivalent_fn_json_file), file=sys.stderr) - with open(equivalent_fn_json_file) as data_file: - equivalent_fn_info = json.load(data_file) - else: - print("Writing equivalent functions for {} to {}".format(passed_in_filename, equivalent_fn_json_file), file=sys.stderr) - - # Merge the global data's fn_hash_to_fn_name structure into - # the equivalent function info hash. - for fn_hash, fn_names in global_data['fn_hash_to_fn_name'].items(): - if fn_hash not in equivalent_fn_info: - # Exclude single item arrays as they are of no use to us. - if len(fn_names) > 1: - equivalent_fn_info[fn_hash] = fn_names[:] - else: - for fn_name in fn_names: - if fn_name not in equivalent_fn_info[fn_hash]: - equivalent_fn_info[fn_hash].append(fn_name) - - with open(equivalent_fn_json_file, 'w') as fout: - fout.write(json.dumps(equivalent_fn_info)) - - -def write_equivalent_fn_hash_to_file(f, json_files, passed_in_filename): - # Represents the aggregated info for all the json files passed in - # Each json file contains info for one of the processed chunks - global_data = {} - global_data['fn_hash_to_fn_name'] = {} - global_data['fn_hash_to_fn_body'] = {} - global_data['variable_names'] = {} - - for json_file in json_files: - with open(json_file) as data_file: - data = json.load(data_file) - - # Merge the data's fn_hash_to_fn_name structure into - # the global data hash. - for fn_hash, fn_names in data['fn_hash_to_fn_name'].items(): - if fn_hash not in global_data['fn_hash_to_fn_name']: - global_data['fn_hash_to_fn_name'][fn_hash] = fn_names[:] - global_data['fn_hash_to_fn_body'][fn_hash] = data['fn_hash_to_fn_body'][fn_hash] - else: - assert(data['fn_hash_to_fn_body'][fn_hash] == global_data['fn_hash_to_fn_body'][fn_hash]) - - for fn_name in fn_names: - if fn_name not in global_data['fn_hash_to_fn_name'][fn_hash]: - global_data['fn_hash_to_fn_name'][fn_hash].append(fn_name) - - # Merge the data's variable_names structure into - # the global data hash. - for variable, value in data['variable_names'].items(): - if variable not in global_data['variable_names']: - global_data['variable_names'][variable] = value - - variable_names = global_data['variable_names'] - - # Lets generate the equivalent function hash from the global data set - equivalent_fn_hash = {} - for fn_hash, fn_names in global_data['fn_hash_to_fn_name'].items(): - shortest_fn = None - for fn_name in fn_names: - if (fn_name not in variable_names) and (shortest_fn is None or (len(fn_name) < len(shortest_fn))): - shortest_fn = fn_name - - if shortest_fn is not None: - for fn_name in fn_names: - if fn_name not in variable_names and fn_name != shortest_fn: - equivalent_fn_hash[fn_name] = shortest_fn - - # Dump the sets of equivalent functions if the user desires it - # This comes in handy for debugging - if shared.Settings.ELIMINATE_DUPLICATE_FUNCTIONS_DUMP_EQUIVALENT_FUNCTIONS: - dump_equivalent_functions(passed_in_filename, global_data) - - # Now write the equivalent function hash to the last line of the file - f.write('// ' + json.dumps(equivalent_fn_hash, separators=(',', ':'))) - - -# gen_hash_info is used to determine whether we are generating -# the global set of function implementation hashes. If set to -# False, we assume that we have to use the global hash info to -# reduce the set of duplicate functions -# Returns the filename of the processed JS file, which is expected to be -# deleted by the caller once done. -def run_on_js(filename, gen_hash_info=False): - js_engine = shared.NODE_JS - - js = open(filename).read() - if os.linesep != '\n': - js = js.replace(os.linesep, '\n') # we assume \n in the splitting code - - equivalentfn_hash_info = None - passed_in_filename = filename - - # Find markers - start_funcs = js.find(start_funcs_marker) - end_funcs = js.rfind(end_funcs_marker) - - if start_funcs < 0 or end_funcs < start_funcs: - logging.critical('Invalid input file. Did not contain appropriate markers. (start_funcs: %s, end_funcs: %s)' % (start_funcs, end_funcs)) - sys.exit(1) - - if not gen_hash_info: - equivalentfn_hash_info = js[js.rfind('//'):] - - start_asm = js.find(start_asm_marker) - end_asm = js.rfind(end_asm_marker) - assert (start_asm >= 0) == (end_asm >= 0) - - # We need to split out the asm shell as well, for minification - pre = js[:start_asm + len(start_asm_marker)] - post = js[end_asm:] - asm_shell_pre = js[start_asm + len(start_asm_marker):start_funcs + len(start_funcs_marker)] - # Prevent "uglify" from turning 0.0 into 0 in variables' initialization. To do this we first replace 0.0 with - # ZERO$DOT$ZERO and then replace it back. - asm_shell_pre = re.sub(r'(\S+\s*=\s*)0\.0', r'\1ZERO$DOT$ZERO', asm_shell_pre) - asm_shell_post = js[end_funcs + len(end_funcs_marker):end_asm + len(end_asm_marker)] - asm_shell = asm_shell_pre + '\nEMSCRIPTEN_FUNCS();\n' + asm_shell_post - js = js[start_funcs + len(start_funcs_marker):end_funcs] - - # we assume there is a maximum of one new name per line - asm_shell_pre, asm_shell_post = process_shell(js_engine, asm_shell, equivalentfn_hash_info).split('EMSCRIPTEN_FUNCS();') - asm_shell_pre = re.sub(r'(\S+\s*=\s*)ZERO\$DOT\$ZERO', r'\g<1>0.0', asm_shell_pre) - asm_shell_post = asm_shell_post.replace('});', '})') - pre += asm_shell_pre + '\n' + start_funcs_marker - post = end_funcs_marker + asm_shell_post + post - - if not gen_hash_info: - # We don't need the extra info at the end - post = post[:post.rfind('//')].strip() - else: - pre = js[:start_funcs + len(start_funcs_marker)] - post = js[end_funcs + len(end_funcs_marker):] - js = js[start_funcs + len(start_funcs_marker):end_funcs] - post = end_funcs_marker + post - - total_size = len(js) - funcs = split_funcs(js, False) - - js = None - - # if we are making source maps, we want our debug numbering to start from the - # top of the file, so avoid breaking the JS into chunks - cores = building.get_num_cores() - - intended_num_chunks = int(round(cores * NUM_CHUNKS_PER_CORE)) - chunk_size = min(MAX_CHUNK_SIZE, max(MIN_CHUNK_SIZE, total_size / intended_num_chunks)) - chunks = shared.chunkify(funcs, chunk_size) - - chunks = [chunk for chunk in chunks if len(chunk)] - if DEBUG and len(chunks): - print('chunkification: num funcs:', len(funcs), 'actual num chunks:', len(chunks), 'chunk size range:', max(map(len, chunks)), '-', min(map(len, chunks)), file=sys.stderr) - funcs = None - - if len(chunks): - def write_chunk(chunk, i): - temp_file = temp_files.get('.jsfunc_%d.js' % i).name - with open(temp_file, 'w') as f: - f.write(chunk) - if not gen_hash_info: - f.write('\n') - f.write(equivalentfn_hash_info) - return temp_file - filenames = [write_chunk(chunks[i], i) for i in range(len(chunks))] - else: - filenames = [] - - old_filenames = filenames[:] - if len(filenames): - commands = [js_engine + [DUPLICATE_FUNCTION_ELIMINATOR, f, '--gen-hash-info' if gen_hash_info else '--use-hash-info', '--no-minimize-whitespace'] for f in filenames] - - if DEBUG and commands is not None: - print([' '.join(command if command is not None else '(null)') for command in commands], file=sys.stderr) - - cores = min(cores, len(filenames)) - if len(chunks) > 1 and cores >= 2: - # We can parallelize - if DEBUG: - print('splitting up js optimization into %d chunks, using %d cores (total: %.2f MB)' % (len(chunks), cores, total_size / (1024 * 1024.)), file=sys.stderr) - pool = building.get_multiprocessing_pool() - filenames = pool.map(run_on_chunk, commands, chunksize=1) - else: - # We can't parallize, but still break into chunks to avoid uglify/node memory issues - if len(chunks) > 1 and DEBUG: - print('splitting up js optimization into %d chunks' % (len(chunks)), file=sys.stderr) - filenames = [run_on_chunk(command) for command in commands] - else: - filenames = [] - - # we create temp files in the child threads, clean them up here when we are done - for filename in filenames: - temp_files.note(filename) - - json_files = [] - - # We're going to be coalescing the files back at the end - # Just replace the file list with the ones provided in - # the command list - and save off the generated Json - if gen_hash_info: - json_files = filenames[:] - filenames = old_filenames[:] - - for filename in filenames: - temp_files.note(filename) - - filename += '.jo.js' - f = open(filename, 'w') - f.write(pre) - pre = None - - # sort functions by size, to make diffing easier and to improve aot times - funcses = [] - for out_file in filenames: - funcses.append(split_funcs(open(out_file).read(), False)) - funcs = [item for sublist in funcses for item in sublist] - funcses = None - if not os.environ.get('EMCC_NO_OPT_SORT'): - funcs.sort(key=lambda x: (len(x[1]), x[0]), reverse=True) - - for func in funcs: - f.write(func[1]) - funcs = None - - f.write('\n') - f.write(post) - # No need to write suffix: if there was one, it is inside post which exists when suffix is there - f.write('\n') - - if gen_hash_info and len(json_files): - write_equivalent_fn_hash_to_file(f, json_files, passed_in_filename) - f.close() - - return filename - - -def save_temp_file(file_to_process): - if os.environ.get('EMSCRIPTEN_SAVE_TEMP_FILES') and os.environ.get('EMSCRIPTEN_TEMP_FILES_DIR'): - destinationFile = file_to_process - - temp_dir_name = tempfile.gettempdir() - destinationFile = destinationFile.replace(temp_dir_name, os.environ.get('EMSCRIPTEN_TEMP_FILES_DIR')) - - if not os.path.exists(os.path.dirname(destinationFile)): - os.makedirs(os.path.dirname(destinationFile)) - - print("Copying {} to {}".format(file_to_process, destinationFile), file=sys.stderr) - shutil.copyfile(file_to_process, destinationFile) - - -def get_func_names(javascript_file): - func_names = [] - start_tok = "// EMSCRIPTEN_START_FUNCS" - end_tok = "// EMSCRIPTEN_END_FUNCS" - start_off = 0 - end_off = 0 - - with open(javascript_file, 'rt') as fin: - blob = "".join(fin.readlines()) - start_off = blob.find(start_tok) + len(start_tok) - end_off = blob.find(end_tok) - asm_chunk = blob[start_off:end_off] - - for match in re.finditer(r'function (\S+?)\s*\(', asm_chunk): - func_names.append(match.groups(1)[0]) - - return func_names - - -def eliminate_duplicate_funcs(file_name): - if shared.Settings.ELIMINATE_DUPLICATE_FUNCTIONS_DUMP_EQUIVALENT_FUNCTIONS != 0: - # Remove previous log file if it exists - equivalent_fn_json_file = file_name + ".equivalent_functions.json" - if os.path.isfile(equivalent_fn_json_file): - print("Deleting old json: " + equivalent_fn_json_file, file=sys.stderr) - os.remove(equivalent_fn_json_file) - - old_funcs = get_func_names(file_name) - - for pass_num in range(shared.Settings.ELIMINATE_DUPLICATE_FUNCTIONS_PASSES): - if DEBUG: - print("[PASS {}]: eliminating duplicate functions in: {}.".format(pass_num, file_name), file=sys.stderr) - - # Generate the JSON for the equivalent hash first - processed_file = run_on_js(filename=file_name, gen_hash_info=True) - try: - save_temp_file(processed_file) - # Use the hash to reduce the JS file - final_file = run_on_js(filename=processed_file, gen_hash_info=False) - finally: - os.remove(processed_file) - - save_temp_file(final_file) - - shared.safe_move(final_file, file_name) - - if shared.Settings.ELIMINATE_DUPLICATE_FUNCTIONS_DUMP_EQUIVALENT_FUNCTIONS != 0: - new_funcs = get_func_names(file_name) - - eliminated_funcs_file = file_name + ".eliminated_functions.json" - print("Writing eliminated functions to file: {}".format(eliminated_funcs_file), file=sys.stderr) - - with open(eliminated_funcs_file, 'w') as fout: - eliminated_functions = list(set(old_funcs) - set(new_funcs)) - eliminated_functions.sort() - for eliminated_function in eliminated_functions: - fout.write('{}\n'.format(eliminated_function)) - - -def run(filename, js_engine=shared.NODE_JS): - js_engine = shared.listify(js_engine) - - return temp_files.run_and_clean(lambda: eliminate_duplicate_funcs(filename)) - - -if __name__ == '__main__': - run(sys.argv[1], sys.argv[2:]) - sys.exit(0) diff --git a/tools/eliminate-duplicate-functions.js b/tools/eliminate-duplicate-functions.js deleted file mode 100644 index 9fa2983a6921e..0000000000000 --- a/tools/eliminate-duplicate-functions.js +++ /dev/null @@ -1,547 +0,0 @@ -// Copyright 2016 The Emscripten Authors. All rights reserved. -// Emscripten is available under two separate licenses, the MIT license and the -// University of Illinois/NCSA Open Source License. Both these licenses can be -// found in the LICENSE file. -// -// Duplicate Function Elimination. -// -// This is a Javascript file that is used to post-process an Emscripten -// transpiled JS file. It will remove all the duplicate functions from the -// generated ASM. In its current form, the input JS file is expected to be a -// 'chunk' from an Emscripten generated ASM.JS file. -// -// An ASM JS chunk consists of a number of ASM.JS function definitions. It can -// also represent the ASM JS 'shell' which consists of the global variable -// declarations for the generated ASM JS. -// -// The file will remove all the generated functions that are deemed to be -// identical. Currently, the file will only run one pass of the algorithm. The -// caller of this JS file can run multiple passes to ensure that higher level -// functions which will become identical after a pass can be further eliminated. -// -// Usually, 4 or at most 5 passes will result in an optimal reduction - i.e., in -// a file that cannot be reduced any further. - -var crypto = require('crypto'); -var uglify = require('../third_party/uglify-js/uglify-js'); - -var nodeFS = require('fs'); -var nodePath = require('path'); -var debug = false; -var debugFile = undefined; -var debugFileName = 'function_eliminator.log'; -var genHashInfo = false; -var useHashInfo = false; -var useAsmAst = false; - -// Variables that helps control verbosity of debug spew -// Set appropriate zones here (to 0 or 1) for debugging various -// parts of the algorithm. -var ZONE_IDENTIFY_DUPLICATE_FUNCS = 1; -var ZONE_REPLACE_FUNCTION_REFERENCES = 1; -var ZONE_REPLACE_DUPLICATE_FUNCS = 1; -var ZONE_EQUIVALENT_FUNCTION_HASH = 1; -var ZONE_TOP_LEVEL = 1; -var ZONE_DUMP_AST = 0; - -if (!nodeFS.existsSync) { - nodeFS.existsSync = function(path) { - try { - return !!nodeFS.readFileSync(path); - } catch (e) { - return false; - } - } -} - -function srcToAst(src) { - return uglify.parser.parse(src, false, false); -} - -function astToSrc(ast, minifyWhitespace) { - return uglify.uglify.gen_code(ast, { - debug: debug, - ascii_only: true, - beautify: !minifyWhitespace, - indent_level: 1 - }); -} - -// Traverses the children of a node. If the traverse function returns an object, -// replaces the child. If it returns true, stop the traversal and return true. -function traverseChildren(node, traverse, pre, post) { - for (var i = 0; i < node.length; i++) { - var subnode = node[i]; - if (Array.isArray(subnode)) { - var subresult = traverse(subnode, pre, post); - if (subresult === true) return true; - if (subresult !== null && typeof subresult === 'object') node[i] = subresult; - } - } -} - -print = function(x) { - process['stdout'].write(x + '\n'); -}; - -printErr = function(x) { - process['stderr'].write(x + '\n'); -}; - -function debugLog(zone, str) { - if (debug && (zone !== 0)) { - nodeFS.writeSync(debugFile, str + '\n'); - } -} - -// Traverses a JavaScript syntax tree rooted at the given node calling the given -// callback for each node. -// @arg node: The root of the AST. -// @arg pre: The pre to call for each node. This will be called with -// the node as the first argument and its type as the second. If true is -// returned, the traversal is stopped. If an object is returned, -// it replaces the passed node in the tree. If null is returned, we stop -// traversing the subelements (but continue otherwise). -// @arg post: A callback to call after traversing all children. -// @returns: If the root node was replaced, the new root node. If the traversal -// was stopped, true. Otherwise undefined. -function traverse(node, pre, post) { - var type = node[0], - result, len; - var relevant = typeof type === 'string'; - if (relevant) { - var result = pre(node, type); - if (result === true) return true; - if (result && result !== null) node = result; // Continue processing on this node - } - if (result !== null) { - if (traverseChildren(node, traverse, pre, post) === true) return true; - } - if (relevant) { - if (post) { - var postResult = post(node, type); - result = result || postResult; - } - } - return result; -} - -function dumpAst(ast) { - debugLog(ZONE_DUMP_AST, JSON.stringify(ast, null, ' ')); -} - -function getFunctionBody(node) { - // Remove the function part of the source for the function - var functionSrc = astToSrc(node, true); - var functionNameRegex = /(function .*?)\(/; - return functionSrc.replace(functionNameRegex, "("); -} - -function traverseFunctions(ast, callback) { - var topLevelList = useAsmAst ? ast : ast[1]; - - for (var listIndex = 0; listIndex < topLevelList.length; ++listIndex) { - var node = topLevelList[listIndex]; - - if (node[0] === 'defun') { - callback(node); - } - } -} - -function identifyDuplicateFunctions(ast) { - debugLog(ZONE_TOP_LEVEL, "identifyDuplicateFunctions"); - - var functionHashToFunctionName = {}; - - traverseFunctions(ast, function(node) { - debugLog(ZONE_IDENTIFY_DUPLICATE_FUNCS, "Node: " + node); - var functionBody = getFunctionBody(node); - - debugLog(ZONE_IDENTIFY_DUPLICATE_FUNCS, "Function Body: " + functionBody + "\n"); - var functionHash = crypto.createHash('sha256').update(functionBody).digest('hex'); - - if (functionHashToFunctionName[functionHash] === undefined) { - functionHashToFunctionName[functionHash] = []; - } - - debugLog(ZONE_IDENTIFY_DUPLICATE_FUNCS, typeof node[1]); - functionHashToFunctionName[functionHash].push(node[1]); - debugLog(ZONE_IDENTIFY_DUPLICATE_FUNCS, functionHash + '->' + node[1]); - }); - - if (debug) { - for (var key in functionHashToFunctionName) { - debugLog(ZONE_IDENTIFY_DUPLICATE_FUNCS, key + "->" + functionHashToFunctionName[key]); - } - } - - return functionHashToFunctionName; -} - -function getVariableNames(ast) { - var variableNames = {}; - traverse(ast, function(node, type) { - if (type === 'var') { - - var vars = node[1]; - - if (Array.isArray(vars)) { - for (var i = 0; i < vars.length; i++) { - var ident = vars[i][0]; - - variableNames[ident] = 1; - } - } - } - }); - - return variableNames; -} - -function replaceFunctionDefinitions(ast, equivalentFunctionHash) { - debugLog(ZONE_TOP_LEVEL, 'replaceFunctionDefinitions'); - - var topLevelList = useAsmAst ? ast : ast[1]; - var indicesToRemove = []; - for (var listIndex = 0; listIndex < topLevelList.length; ++listIndex) { - var node = topLevelList[listIndex]; - - if (node[0] === 'defun' && equivalentFunctionHash[node[1]] !== undefined) { - indicesToRemove.push(listIndex); - } - } - - if (indicesToRemove.length > 0) { - for (var i = indicesToRemove.length - 1; i >= 0; --i) { - debugLog(ZONE_REPLACE_DUPLICATE_FUNCS, "Removing " + topLevelList[indicesToRemove[i]][1]); - topLevelList.splice(indicesToRemove[i], 1); - } - } -} - -function replaceFunctionReferences(ast, equivalentFunctionHash) { - debugLog(ZONE_TOP_LEVEL, 'replaceFunctionReferences'); - traverse(ast, function(node, type) { - if (type === 'call') { - var functionName = node[1][1]; - - // Replace the call with a call to the equivalent function if there is one - if (equivalentFunctionHash[functionName] !== undefined) { - node[1][1] = equivalentFunctionHash[functionName]; - } - } else if (type === 'var') { - var vars = node[1]; - for (var i = 0; i < vars.length; i++) { - debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, 'Variable: ' + vars[i]); - var value = vars[i][1][1]; - debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, 'Variable value: ' + value); - - if (equivalentFunctionHash[value] !== undefined) { - debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, 'Variable value replacement: ' + equivalentFunctionHash[value]); - vars[i][1][1] = equivalentFunctionHash[value]; - } - } - } else if (type === 'assign') { - if (node[3][0] === 'name' && equivalentFunctionHash[node[3][1]] !== undefined) { - node[3][1] = equivalentFunctionHash[node[3][1]]; - } - } else if (type === 'object') { - var assignments = node[1]; - - for (var i = 0; i < assignments.length; i++) { - debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, 'Object Value Assignment: ' + assignments[i][1][1]); - - if (equivalentFunctionHash[assignments[i][1][1]] !== undefined) { - assignments[i][1][1] = equivalentFunctionHash[assignments[i][1][1]]; - } - } - } else if (type === 'array') { - var arrayVars = node[1]; - - if (Array.isArray(arrayVars)) { - for (var i = 0; i < arrayVars.length; i++) { - debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, "Array: " + arrayVars[i][0] + ", " + arrayVars[i][1]); - // First element contains type, 2nd contains value - if (arrayVars[i][0] == 'name' && equivalentFunctionHash[arrayVars[i][1]] !== undefined) { - debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, "Replacing array value " + arrayVars[i][1]); - arrayVars[i][1] = equivalentFunctionHash[arrayVars[i][1]]; - } - } - } else { - debugLog(ZONE_REPLACE_FUNCTION_REFERENCES, "ArrayVars (not an array): " + arrayVars + ", node: " + node); - } - } - }); -} - -function replaceDuplicateFuncs(ast, equivalentFunctionHash) { - debugLog(ZONE_TOP_LEVEL, "replaceDuplicateFuncs"); - - // Replace references to all functions with their equivalent function - replaceFunctionReferences(ast, equivalentFunctionHash); - - // Now lets replace the function definitions - replaceFunctionDefinitions(ast, equivalentFunctionHash); -} - -function logEquivalentFunctionHash(equivalentFunctionHash) { - if (debug && ZONE_EQUIVALENT_FUNCTION_HASH != 0) { - debugLog(ZONE_EQUIVALENT_FUNCTION_HASH, "Equivalent Function Hash:"); - for (var fn in equivalentFunctionHash) { - debugLog(ZONE_EQUIVALENT_FUNCTION_HASH, fn + "->" + equivalentFunctionHash[fn]); - } - } -} - -function generateEquivalentFunctionHash(functionHashToFunctionName, variableNames) { - var equivalentFunctionHash = {}; - - debugLog(ZONE_TOP_LEVEL, "generateEquivalentFunctionHash"); - - if (debug && ZONE_EQUIVALENT_FUNCTION_HASH != 0) { - debugLog(ZONE_EQUIVALENT_FUNCTION_HASH, "Equivalent Functions:"); - - for (var fnHash in functionHashToFunctionName) { - if (functionHashToFunctionName[fnHash].length > 1) { - debugLog(ZONE_EQUIVALENT_FUNCTION_HASH, JSON.stringify(functionHashToFunctionName[fnHash], null, ' ')); - } - } - } - - for (var fnHash in functionHashToFunctionName) { - var equivalentFunctions = functionHashToFunctionName[fnHash]; - var shortestFunction = undefined; - var equivalentFn = undefined; - - // From each list of equivalent functions, pick the - // shortest one that is not also a variable name - for (var index in equivalentFunctions) { - equivalentFn = equivalentFunctions[index]; - - // If one of the variables is not the same name as the equivalent function, - // and the equivalent function is shorter than the shortest function. - if ((variableNames[equivalentFn] === undefined) && - (shortestFunction === undefined || equivalentFn.length < shortestFunction.length)) { - shortestFunction = equivalentFn; - } - - if (debug && variableNames[equivalentFn] !== undefined) { - debugLog(ZONE_EQUIVALENT_FUNCTION_HASH, equivalentFn + " is a variable"); - } - } - - if (shortestFunction !== undefined) { - // Populate the equivalent function hash with this info - for (var index in equivalentFunctions) { - equivalentFn = equivalentFunctions[index]; - - // If we're not the shortest function, and - // we are not a variable name - if ((equivalentFn !== shortestFunction) && variableNames[equivalentFn] === undefined) { - equivalentFunctionHash[equivalentFn] = shortestFunction; - debugLog(ZONE_EQUIVALENT_FUNCTION_HASH, equivalentFn + "->" + shortestFunction); - } - } - } - } - - return equivalentFunctionHash; -} - -function getBodyForFunction(ast, functionName) { - var functionBody = undefined; - var topLevelList = ast[1]; - - for (var listIndex = 0; listIndex < topLevelList.length; ++listIndex) { - var node = topLevelList[listIndex]; - - if (node[0] === 'defun' && node[1] === functionName) { - functionBody = getFunctionBody(node); - break; - } - } - - return functionBody; -} - -function checkForHashCollisions(ast, functionHashToFunctionName) { - var functionHashToFunctionBody = {}; - - for (var functionHash in functionHashToFunctionName) { - var equivalentFunctions = functionHashToFunctionName[functionHash]; - var functionBody = getBodyForFunction(ast, equivalentFunctions[0]); - - functionHashToFunctionBody[functionHash] = functionBody; - - // If we have more than one equivalent function, make sure - // that the bodies are the same from the hash values - if (equivalentFunctions.length > 1) { - for (var functionIndex = 1; functionIndex < equivalentFunctions.length; ++functionIndex) { - var curFunctionBody = getBodyForFunction(ast, equivalentFunctions[functionIndex]); - - if (curFunctionBody !== functionBody) { - printErr("ERROR!!! Function bodies for two hash-equivalent functions differ!!! Candidates: " - + equivalentFunctions[0] + ", " + equivalentFunctions[functionIndex]); - process.exit(1); - } - } - } - } - - return functionHashToFunctionBody; -} - -function eliminateDuplicateFuncs(ast) { - debugLog(ZONE_TOP_LEVEL, "eliminateDuplicateFuncs"); - - // Phase 1 - identify duplicate functions - var functionHashToFunctionName = identifyDuplicateFunctions(ast); - - // Phase 1.1 - Check for hash collisions - checkForHashCollisions(ast, functionHashToFunctionName); - - // Phase 2 - identify variables that conflict with function names - var variableNames = getVariableNames(ast); - - // Phase 3 - generate the equivalent function hash - var equivalentFunctionHash = generateEquivalentFunctionHash(functionHashToFunctionName, variableNames); - - // Phase 4 - for each set of equivalent functions, pick one and - // use it to replace the other equivalent functions. - replaceDuplicateFuncs(ast, equivalentFunctionHash); - - return; -} - -function find(filename) { - var prefixes = [nodePath.join(__dirname, '..', 'src'), process.cwd()]; - for (var i = 0; i < prefixes.length; ++i) { - var combined = nodePath.join(prefixes[i], filename); - if (nodeFS.existsSync(combined)) { - return combined; - } - } - return filename; -} - -function findAsmAst(ast) { - var asmNode = undefined; - traverse(ast, function(node, type) { - if (type === 'var') { - - var vars = node[1]; - for (var i = 0; i < vars.length; i++) { - var ident = vars[i][0]; - - if (ident === 'asm') { - asmNode = vars[i][1][1][3]; // asm->call->toplevel-ast - } - } - } - }); - - return asmNode; -} - -function printHashInfo(ast) { - debugLog(ZONE_TOP_LEVEL, "printHashInfo"); - - var infoHash = {}; - infoHash['variable_names'] = getVariableNames(ast); - infoHash['fn_hash_to_fn_name'] = identifyDuplicateFunctions(ast); - infoHash['fn_hash_to_fn_body'] = checkForHashCollisions(ast, infoHash['fn_hash_to_fn_name']); - - print(JSON.stringify(infoHash)); -} - -read = function(filename) { - var absolute = find(filename); - return nodeFS['readFileSync'](absolute).toString(); -}; - -// Main -var arguments_ = process['argv'].slice(2); -var noMinimizeWhitespace = false; // Eliminate whitespace by default -var functionName = undefined; -var src = undefined; - -for (var argIndex = 0; argIndex < arguments_.length; ++argIndex) { - var arg = arguments_[argIndex]; - if (arg === '--debug') { - debug = true; - debugFile = nodeFS.openSync(debugFileName, 'w'); - } else if (arg === '--no-minimize-whitespace') { - noMinimizeWhitespace = true; - } else if (arg === '--gen-hash-info') { - genHashInfo = true; - } else if (arg === '--use-hash-info') { - useHashInfo = true; - } else if (arg === '--use-asm-ast') { - useAsmAst = true; - } else if (arg === '--get-function-body') { - if (argIndex === arguments_.length_ - 1) { - throw new Error('Please specify valid arguments!'); - } - - functionName = arguments_[argIndex+1]; - argIndex += 1; - } else if (/^--/.test(arg)) { - throw new Error('Please specify valid arguments!'); - } else if (src === undefined) { - src = read(arg); - } else { - throw new Error('Please specify valid arguments!'); - } -} - -var ast = srcToAst(src); -var asmAst = ast; - -if (useAsmAst) { - asmAst = findAsmAst(ast); -} - -if (debug) { - dumpAst(ast); -} - -if (functionName !== undefined) { - var functionBody = getBodyForFunction(ast, functionName); - - if (functionBody === undefined) { - throw new Error('Could not find body for function ' + functionName + '!!!'); - } - - print(functionBody); -} else if (genHashInfo) { - printHashInfo(asmAst); -} else { - equivalentFunctionHash = {}; - - if (useHashInfo) { - // The last line has the required info - infoHashJsonStart = src.lastIndexOf("//") + 2 // 2 for going past the // - - if (infoHashJsonStart == -1) { - throw new Error('--use-hash-info specified but no JSON found at the end of the file!'); - } - - equivalentFunctionHash = JSON.parse(src.substring(infoHashJsonStart)); - - logEquivalentFunctionHash(equivalentFunctionHash); - replaceDuplicateFuncs(asmAst, equivalentFunctionHash); - } else { - eliminateDuplicateFuncs(asmAst); - } - - var minimizeWhitespace = (debug || noMinimizeWhitespace) ? false : true; - var js = astToSrc(ast, minimizeWhitespace); - - print(js); -} - -if (debug && debugFile !== undefined) { - printErr('Wrote debug log to ' + debugFileName); - nodeFS.close(debugFile); -}