diff --git a/rbql-js/build_engine.js b/rbql-js/build_engine.js deleted file mode 100755 index b3bfb87..0000000 --- a/rbql-js/build_engine.js +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/env node -const path = require('path'); -const fs = require('fs'); - -const rbql_home_dir = __dirname; - - -function replace_all(src, search, replacement) { - return src.split(search).join(replacement); -} - - -function escape_string_literal_backtick(src) { - src = replace_all(src, '\\', '\\\\'); - src = replace_all(src, "`", "\\`"); - src = replace_all(src, "${", "\\${"); - src = "`" + src + "`"; - return src; -} - - -function read_engine_text() { - try { - return fs.readFileSync(path.join(rbql_home_dir, 'rbql.js'), 'utf-8'); - } catch (e) { - return ''; - } -} - - -function build_engine_text(for_web) { - let proto_engine_dir = path.join(rbql_home_dir, 'proto_engine'); - let builder_text = fs.readFileSync(path.join(proto_engine_dir, 'builder.js'), 'utf-8'); - let template_text = fs.readFileSync(path.join(proto_engine_dir, 'template.js'), 'utf-8'); - let marker = 'codegeneration_pseudo_function_include_combine("template.js")'; - let do_not_edit_warning = '// DO NOT EDIT!\n// This file was autogenerated from builder.js and template.js using build_engine.js script\n\n'; - let engine_body = builder_text.replace(marker, escape_string_literal_backtick(template_text)); - if (for_web) { - engine_body = "let module = {'exports': {}};\n" + 'rbql = module.exports;\n' + engine_body; - engine_body = '( function() {\n' + engine_body + '})()'; - engine_body = 'let rbql = null;\n' + engine_body; - } - let engine_text = do_not_edit_warning + engine_body + '\n\n' + do_not_edit_warning; - return engine_text; -} - - -function build_engine() { - let engine_text = build_engine_text(false); - fs.writeFileSync(path.join(rbql_home_dir, 'rbql.js'), engine_text, 'utf-8'); - let web_engine_text = build_engine_text(true); - fs.writeFileSync(path.join(rbql_home_dir, 'web_rbql.js'), web_engine_text, 'utf-8'); -} - - -module.exports.build_engine = build_engine; -module.exports.read_engine_text = read_engine_text; -module.exports.build_engine_text = build_engine_text; - -if (require.main === module) { - build_engine(); -} - - diff --git a/rbql-js/cli_rbql.js b/rbql-js/cli_rbql.js index 0f33536..eed9469 100755 --- a/rbql-js/cli_rbql.js +++ b/rbql-js/cli_rbql.js @@ -8,6 +8,14 @@ var rbql_csv = null; const csv_utils = require('./csv_utils.js'); const cli_parser = require('./cli_parser.js'); +let out_format_names = ['csv', 'tsv', 'monocolumn', 'input']; + +var tmp_worker_module_path = null; +var error_format = 'hr'; +var interactive_mode = false; +var user_input_reader = null; +var args = null; + // TODO implement query history like in Python version. "readline" modules allows to do that, see "completer" parameter. @@ -17,20 +25,12 @@ function die(error_msg) { process.exit(1); } -let out_format_names = ['csv', 'tsv', 'monocolumn', 'input']; -var tmp_worker_module_path = null; -var error_format = 'hr'; -var interactive_mode = false; -var user_input_reader = null; -var args = null; - - -function show_error(msg) { +function show_error(error_type, error_msg) { if (interactive_mode) { - console.log('\x1b[31;1mError:\x1b[0m ' + msg); + console.log(`\x1b[31;1mError [${error_type}]:\x1b[0m ${error_msg}`); } else { - console.error('Error: ' + msg); + console.error(`Error [${error_type}]: ${error_msg}`); } if (fs.existsSync(tmp_worker_module_path)) { let output_func = interactive_mode ? console.log : console.error; @@ -69,14 +69,6 @@ function cleanup_tmp() { } -function report_warnings_json(warnings) { - if (warnings !== null) { - var warnings_report = JSON.stringify({'warnings': warnings}); - process.stderr.write(warnings_report); - } -} - - function report_error_json(error_type, error_msg) { let report = new Object(); report.error_type = error_type; @@ -90,7 +82,7 @@ function report_error_json(error_type, error_msg) { function finish_query_with_error(error_type, error_msg) { if (error_format == 'hr') { - show_error(error_type + ': ' + error_msg); + show_error(error_type, error_msg); } else { report_error_json(error_type, error_msg); } @@ -209,7 +201,10 @@ function handle_query_success(warnings, output_path, delim, policy) { }); } } else { - report_warnings_json(warnings); + if (warnings !== null && warnings.length) { + var warnings_report = JSON.stringify({'warnings': warnings}); + process.stderr.write(warnings_report); + } } } @@ -288,11 +283,11 @@ function show_preview(input_path, delim, policy) { function start_preview_mode(args) { let input_path = get_default(args, 'input', null); if (!input_path) { - show_error('Input file must be provided in interactive mode. You can use stdin input only in non-interactive mode'); + show_error('generic', 'Input file must be provided in interactive mode. You can use stdin input only in non-interactive mode'); process.exit(1); } if (error_format != 'hr') { - show_error('Only default "hr" error format is supported in interactive mode'); + show_error('generic', 'Only default "hr" error format is supported in interactive mode'); process.exit(1); } let delim = get_default(args, 'delim', null); diff --git a/rbql-js/proto_engine/builder.js b/rbql-js/proto_engine/builder.js deleted file mode 100644 index 33c0e3c..0000000 --- a/rbql-js/proto_engine/builder.js +++ /dev/null @@ -1,690 +0,0 @@ -const external_js_template_text = codegeneration_pseudo_function_include_combine("template.js"); -// ^ The expression above will cause builder.js and tempalte.js to be combined to autogenerate rbql.js: builder.js + template.js -> ../rbql.js -// Expression is written as a function to pacify the linter. -// Unit tests will ensure that rbql.js is indeed a concatenation of builder.js and template.js - - -// This module works with records only. It is CSV-agnostic. -// Do not add CSV-related logic or variables/functions/objects like "delim", "separator" etc - - -// TODO get rid of functions with "_js" suffix - - -// TODO replace prototypes with classes: this improves readability - - -const version = '0.8.0'; - -const GROUP_BY = 'GROUP BY'; -const UPDATE = 'UPDATE'; -const SELECT = 'SELECT'; -const JOIN = 'JOIN'; -const INNER_JOIN = 'INNER JOIN'; -const LEFT_JOIN = 'LEFT JOIN'; -const STRICT_LEFT_JOIN = 'STRICT LEFT JOIN'; -const ORDER_BY = 'ORDER BY'; -const WHERE = 'WHERE'; -const LIMIT = 'LIMIT'; -const EXCEPT = 'EXCEPT'; - - -class RbqlParsingError extends Error {} -class RbqlIOHandlingError extends Error {} -class AssertionError extends Error {} - -var debug_mode = false; - -function assert(condition, message=null) { - if (!condition) { - if (!message) { - message = 'Assertion error'; - } - throw new AssertionError(message); - } -} - - -function get_all_matches(regexp, text) { - var result = []; - let match_obj = null; - while((match_obj = regexp.exec(text)) !== null) { - result.push(match_obj); - } - return result; -} - - -function replace_all(src, search, replacement) { - return src.split(search).join(replacement); -} - - -function str_strip(src) { - return src.replace(/^ +| +$/g, ''); -} - - -function rbql_meta_format(template_src, meta_params) { - for (var key in meta_params) { - if (!meta_params.hasOwnProperty(key)) - continue; - var value = meta_params[key]; - var template_src_upd = replace_all(template_src, key, value); - assert(template_src_upd != template_src); - template_src = template_src_upd; - } - return template_src; -} - - -function strip_comments(cline) { - cline = cline.trim(); - if (cline.startsWith('//')) - return ''; - return cline; -} - - -function parse_join_expression(src) { - var rgx = /^ *([^ ]+) +on +([ab][0-9]+) *== *([ab][0-9]+) *$/i; - var match = rgx.exec(src); - if (match === null) { - throw new RbqlParsingError('Invalid join syntax. Must be: " /path/to/B/table on a == b"'); - } - var table_id = match[1]; - var avar = match[2]; - var bvar = match[3]; - if (avar.charAt(0) == 'b') { - [avar, bvar] = [bvar, avar]; - } - if (avar.charAt(0) != 'a' || bvar.charAt(0) != 'b') { - throw new RbqlParsingError('Invalid join syntax. Must be: " /path/to/B/table on a == b"'); - } - avar = parseInt(avar.substr(1)) - 1; - var lhs_join_var = `safe_join_get(afields, ${avar})`; - let rhs_key_index = parseInt(bvar.substr(1)) - 1; - return [table_id, lhs_join_var, rhs_key_index]; -} - - -function generate_init_statements(column_vars, indent) { - var init_statements = []; - for (var i = 0; i < column_vars.length; i++) { - var var_name = column_vars[i]; - var var_group = var_name.charAt(0); - var zero_based_idx = parseInt(var_name.substr(1)) - 1; - if (var_group == 'a') { - init_statements.push(`var ${var_name} = safe_get(afields, ${zero_based_idx});`); - } else { - init_statements.push(`var ${var_name} = bfields === null ? null : safe_get(bfields, ${zero_based_idx});`); - } - } - for (var i = 1; i < init_statements.length; i++) { - init_statements[i] = indent + init_statements[i]; - } - return init_statements.join('\n'); -} - - -function replace_star_count(aggregate_expression) { - var rgx = /(^|,) *COUNT\( *\* *\) *(?:$|(?=,))/g; - var result = aggregate_expression.replace(rgx, '$1 COUNT(1)'); - return str_strip(result); -} - - -function replace_star_vars(rbql_expression) { - var middle_star_rgx = /(?:^|,) *\* *(?=, *\* *($|,))/g; - rbql_expression = rbql_expression.replace(middle_star_rgx, ']).concat(star_fields).concat(['); - var last_star_rgx = /(?:^|,) *\* *(?:$|,)/g; - rbql_expression = rbql_expression.replace(last_star_rgx, ']).concat(star_fields).concat(['); - return rbql_expression; -} - - -function translate_update_expression(update_expression, indent) { - var rgx = /(?:^|,) *a([1-9][0-9]*) *=(?=[^=])/g; - var translated = update_expression.replace(rgx, '\nsafe_set(up_fields, $1,'); - var update_statements = translated.split('\n'); - update_statements = update_statements.map(str_strip); - if (update_statements.length < 2 || update_statements[0] != '') { - throw new RbqlParsingError('Unable to parse "UPDATE" expression'); - } - update_statements = update_statements.slice(1); - for (var i = 0; i < update_statements.length; i++) { - update_statements[i] = update_statements[i] + ')'; - } - for (var i = 1; i < update_statements.length; i++) { - update_statements[i] = indent + update_statements[i]; - } - var translated = update_statements.join('\n'); - return translated; -} - - -function translate_select_expression_js(select_expression) { - var translated = replace_star_count(select_expression); - translated = replace_star_vars(translated); - translated = str_strip(translated); - if (!translated.length) { - throw new RbqlParsingError('"SELECT" expression is empty'); - } - return `[].concat([${translated}])`; -} - - -function separate_string_literals_js(rbql_expression) { - // The regex consists of 3 almost identicall parts, the only difference is quote type - var rgx = /('(\\(\\\\)*'|[^'])*')|("(\\(\\\\)*"|[^"])*")|(`(\\(\\\\)*`|[^`])*`)/g; - var match_obj = null; - var format_parts = []; - var string_literals = []; - var idx_before = 0; - while((match_obj = rgx.exec(rbql_expression)) !== null) { - var literal_id = string_literals.length; - var string_literal = match_obj[0]; - string_literals.push(string_literal); - var start_index = match_obj.index; - format_parts.push(rbql_expression.substring(idx_before, start_index)); - format_parts.push(`###RBQL_STRING_LITERAL###${literal_id}`); - idx_before = rgx.lastIndex; - } - format_parts.push(rbql_expression.substring(idx_before)); - var format_expression = format_parts.join(''); - format_expression = format_expression.replace(/\t/g, ' '); - return [format_expression, string_literals]; -} - - -function combine_string_literals(backend_expression, string_literals) { - for (var i = 0; i < string_literals.length; i++) { - backend_expression = replace_all(backend_expression, `###RBQL_STRING_LITERAL###${i}`, string_literals[i]); - } - return backend_expression; -} - - -function locate_statements(rbql_expression) { - let statement_groups = []; - statement_groups.push([STRICT_LEFT_JOIN, LEFT_JOIN, INNER_JOIN, JOIN]); - statement_groups.push([SELECT]); - statement_groups.push([ORDER_BY]); - statement_groups.push([WHERE]); - statement_groups.push([UPDATE]); - statement_groups.push([GROUP_BY]); - statement_groups.push([LIMIT]); - statement_groups.push([EXCEPT]); - var result = []; - for (var ig = 0; ig < statement_groups.length; ig++) { - for (var is = 0; is < statement_groups[ig].length; is++) { - var statement = statement_groups[ig][is]; - var rgxp = new RegExp('(?:^| )' + replace_all(statement, ' ', ' *') + '(?= )', 'ig'); - var matches = get_all_matches(rgxp, rbql_expression); - if (!matches.length) - continue; - if (matches.length > 1) - throw new RbqlParsingError(`More than one ${statement} statements found`); - assert(matches.length == 1); - var match = matches[0]; - var match_str = match[0]; - result.push([match.index, match.index + match_str.length, statement]); - break; // Break to avoid matching a sub-statement from the same group e.g. "INNER JOIN" -> "JOIN" - } - } - result.sort(function(a, b) { return a[0] - b[0]; }); - return result; -} - - -function separate_actions(rbql_expression) { - rbql_expression = str_strip(rbql_expression); - var ordered_statements = locate_statements(rbql_expression); - var result = {}; - for (var i = 0; i < ordered_statements.length; i++) { - var statement_start = ordered_statements[i][0]; - var span_start = ordered_statements[i][1]; - var statement = ordered_statements[i][2]; - var span_end = i + 1 < ordered_statements.length ? ordered_statements[i + 1][0] : rbql_expression.length; - assert(statement_start < span_start); - assert(span_start <= span_end); - var span = rbql_expression.substring(span_start, span_end); - var statement_params = {}; - if ([STRICT_LEFT_JOIN, LEFT_JOIN, INNER_JOIN, JOIN].indexOf(statement) != -1) { - statement_params['join_subtype'] = statement; - statement = JOIN; - } - - if (statement == UPDATE) { - if (statement_start != 0) - throw new RbqlParsingError('UPDATE keyword must be at the beginning of the query'); - span = span.replace(/^ *SET/i, ''); - } - - if (statement == ORDER_BY) { - span = span.replace(/ ASC *$/i, ''); - var new_span = span.replace(/ DESC *$/i, ''); - if (new_span != span) { - span = new_span; - statement_params['reverse'] = true; - } else { - statement_params['reverse'] = false; - } - } - - if (statement == SELECT) { - if (statement_start != 0) - throw new RbqlParsingError('SELECT keyword must be at the beginning of the query'); - var match = /^ *TOP *([0-9]+) /i.exec(span); - if (match !== null) { - statement_params['top'] = parseInt(match[1]); - span = span.substr(match.index + match[0].length); - } - match = /^ *DISTINCT *(COUNT)? /i.exec(span); - if (match !== null) { - statement_params['distinct'] = true; - if (match[1]) { - statement_params['distinct_count'] = true; - } - span = span.substr(match.index + match[0].length); - } - } - statement_params['text'] = str_strip(span); - result[statement] = statement_params; - } - if (!result.hasOwnProperty(SELECT) && !result.hasOwnProperty(UPDATE)) { - throw new RbqlParsingError('Query must contain either SELECT or UPDATE statement'); - } - assert(result.hasOwnProperty(SELECT) != result.hasOwnProperty(UPDATE)); - return result; -} - - -function find_top(rb_actions) { - if (rb_actions.hasOwnProperty(LIMIT)) { - var result = parseInt(rb_actions[LIMIT]['text']); - if (isNaN(result)) { - throw new RbqlParsingError('LIMIT keyword must be followed by an integer'); - } - return result; - } - var select_action = rb_actions[SELECT]; - if (select_action && select_action.hasOwnProperty('top')) { - return select_action['top']; - } - return null; -} - - -function indent_user_init_code(user_init_code) { - let source_lines = user_init_code.split(/(?:\r\n)|\r|\n/); - source_lines = source_lines.map(line => ' ' + line); - return source_lines.join('\n'); -} - - -function extract_column_vars(rbql_expression) { - var rgx = /(?:^|[^_a-zA-Z0-9])([ab][1-9][0-9]*)(?:$|(?=[^_a-zA-Z0-9]))/g; - var result = []; - var seen = {}; - var matches = get_all_matches(rgx, rbql_expression); - for (var i = 0; i < matches.length; i++) { - var var_name = matches[i][1]; - if (!seen.hasOwnProperty(var_name)) { - result.push(var_name); - seen[var_name] = 1; - } - } - return result; -} - - -function translate_except_expression(except_expression) { - let skip_vars = except_expression.split(','); - let skip_indices = []; - let rgx = /^a[1-9][0-9]*$/; - for (let i = 0; i < skip_vars.length; i++) { - let skip_var = str_strip(skip_vars[i]); - let match = rgx.exec(skip_var); - if (match === null) { - throw new RbqlParsingError('Invalid EXCEPT syntax'); - } - skip_indices.push(parseInt(skip_var.substring(1)) - 1); - } - skip_indices = skip_indices.sort((a, b) => a - b); - let indices_str = skip_indices.join(','); - return `select_except(afields, [${indices_str}])`; -} - - -function HashJoinMap(record_iterator, key_index) { - this.max_record_len = 0; - this.hash_map = new Map(); - this.record_iterator = record_iterator; - this.key_index = key_index; - this.error_msg = null; - this.external_error_handler = null; - this.external_success_handler = null; - this.nr = 0; - - this.finish_build = function() { - if (this.error_msg === null) { - this.external_success_handler(); - } else { - this.external_error_handler('IO handling', this.error_msg); - } - }; - - this.add_record = function(record) { - this.nr += 1; - let num_fields = record.length; - this.max_record_len = Math.max(this.max_record_len, num_fields); - if (this.key_index >= num_fields) { - this.error_msg = `No "b${this.key_index + 1}" field at record: ${this.nr} in "B" table`; - this.record_iterator.finish(); - } - let key = record[this.key_index]; - let key_records = this.hash_map.get(key); - if (key_records === undefined) { - this.hash_map.set(key, [record]); - } else { - key_records.push(record); - } - }; - - this.build = function(success_callback, error_callback) { - this.external_success_handler = success_callback; - this.external_error_handler = error_callback; - this.record_iterator.set_record_callback((record) => { this.add_record(record); }); - this.record_iterator.set_finish_callback(() => { this.finish_build(); }); - this.record_iterator.start(); - }; - - this.get_join_records = function(key) { - let result = this.hash_map.get(key); - if (result === undefined) - return []; - return result; - }; - - this.get_warnings = function() { - return this.record_iterator.get_warnings(); - }; -} - - -function parse_to_js(query, js_template_text, join_tables_registry, user_init_code) { - let rbql_lines = query.split('\n'); - rbql_lines = rbql_lines.map(strip_comments); - rbql_lines = rbql_lines.filter(line => line.length); - var full_rbql_expression = rbql_lines.join(' '); - var column_vars = extract_column_vars(full_rbql_expression); - var [format_expression, string_literals] = separate_string_literals_js(full_rbql_expression); - var rb_actions = separate_actions(format_expression); - - var js_meta_params = {}; - js_meta_params['__RBQLMP__user_init_code'] = user_init_code; - - if (rb_actions.hasOwnProperty(ORDER_BY) && rb_actions.hasOwnProperty(UPDATE)) - throw new RbqlParsingError('"ORDER BY" is not allowed in "UPDATE" queries'); - - if (rb_actions.hasOwnProperty(GROUP_BY)) { - if (rb_actions.hasOwnProperty(ORDER_BY) || rb_actions.hasOwnProperty(UPDATE)) - throw new RbqlParsingError('"ORDER BY" and "UPDATE" are not allowed in aggregate queries'); - var aggregation_key_expression = rb_actions[GROUP_BY]['text']; - js_meta_params['__RBQLMP__aggregation_key_expression'] = '[' + combine_string_literals(aggregation_key_expression, string_literals) + ']'; - } else { - js_meta_params['__RBQLMP__aggregation_key_expression'] = 'null'; - } - - let join_map = null; - if (rb_actions.hasOwnProperty(JOIN)) { - var [rhs_table_id, lhs_join_var, rhs_key_index] = parse_join_expression(rb_actions[JOIN]['text']); - js_meta_params['__RBQLMP__join_operation'] = rb_actions[JOIN]['join_subtype']; - js_meta_params['__RBQLMP__lhs_join_var'] = lhs_join_var; - if (join_tables_registry === null) - throw new RbqlParsingError('JOIN operations were disabled'); - let join_record_iterator = join_tables_registry.get_iterator_by_table_id(rhs_table_id); - if (!join_record_iterator) - throw new RbqlParsingError(`Unable to find join table: "${rhs_table_id}"`); - join_map = new HashJoinMap(join_record_iterator, rhs_key_index); - } else { - js_meta_params['__RBQLMP__join_operation'] = 'VOID'; - js_meta_params['__RBQLMP__lhs_join_var'] = 'null'; - } - - if (rb_actions.hasOwnProperty(WHERE)) { - var where_expression = rb_actions[WHERE]['text']; - if (/[^!=]=[^=]/.exec(where_expression)) { - throw new RbqlParsingError('Assignments "=" are not allowed in "WHERE" expressions. For equality test use "==" or "==="'); - } - js_meta_params['__RBQLMP__where_expression'] = combine_string_literals(where_expression, string_literals); - } else { - js_meta_params['__RBQLMP__where_expression'] = 'true'; - } - - - if (rb_actions.hasOwnProperty(UPDATE)) { - var update_expression = translate_update_expression(rb_actions[UPDATE]['text'], ' '.repeat(8)); - js_meta_params['__RBQLMP__writer_type'] = 'simple'; - js_meta_params['__RBQLMP__select_expression'] = 'null'; - js_meta_params['__RBQLMP__update_statements'] = combine_string_literals(update_expression, string_literals); - js_meta_params['__RBQLMP__is_select_query'] = 'false'; - js_meta_params['__RBQLMP__top_count'] = 'null'; - } - - js_meta_params['__RBQLMP__init_column_vars_update'] = generate_init_statements(column_vars, ' '.repeat(4)); - js_meta_params['__RBQLMP__init_column_vars_select'] = generate_init_statements(column_vars, ' '.repeat(8)); - - if (rb_actions.hasOwnProperty(SELECT)) { - var top_count = find_top(rb_actions); - js_meta_params['__RBQLMP__top_count'] = top_count === null ? 'null' : String(top_count); - if (rb_actions[SELECT].hasOwnProperty('distinct_count')) { - js_meta_params['__RBQLMP__writer_type'] = 'uniq_count'; - } else if (rb_actions[SELECT].hasOwnProperty('distinct')) { - js_meta_params['__RBQLMP__writer_type'] = 'uniq'; - } else { - js_meta_params['__RBQLMP__writer_type'] = 'simple'; - } - if (rb_actions.hasOwnProperty(EXCEPT)) { - js_meta_params['__RBQLMP__select_expression'] = translate_except_expression(rb_actions[EXCEPT]['text']); - } else { - let select_expression = translate_select_expression_js(rb_actions[SELECT]['text']); - js_meta_params['__RBQLMP__select_expression'] = combine_string_literals(select_expression, string_literals); - } - js_meta_params['__RBQLMP__update_statements'] = ''; - js_meta_params['__RBQLMP__is_select_query'] = 'true'; - } - - if (rb_actions.hasOwnProperty(ORDER_BY)) { - var order_expression = rb_actions[ORDER_BY]['text']; - js_meta_params['__RBQLMP__sort_key_expression'] = combine_string_literals(order_expression, string_literals); - js_meta_params['__RBQLMP__reverse_flag'] = rb_actions[ORDER_BY]['reverse'] ? 'true' : 'false'; - js_meta_params['__RBQLMP__sort_flag'] = 'true'; - } else { - js_meta_params['__RBQLMP__sort_key_expression'] = 'null'; - js_meta_params['__RBQLMP__reverse_flag'] = 'false'; - js_meta_params['__RBQLMP__sort_flag'] = 'false'; - } - var js_code = rbql_meta_format(js_template_text, js_meta_params); - return [js_code, join_map]; -} - - -function load_module_from_file(js_code) { - let os = require('os'); - let path = require('path'); - let fs = require('fs'); - var tmp_dir = os.tmpdir(); - var script_filename = 'rbconvert_' + String(Math.random()).replace('.', '_') + '.js'; - let tmp_worker_module_path = path.join(tmp_dir, script_filename); - fs.writeFileSync(tmp_worker_module_path, js_code); - let worker_module = require(tmp_worker_module_path); - return worker_module; -} - - -function generic_run(user_query, input_iterator, output_writer, success_handler, error_handler, join_tables_registry=null, user_init_code='') { - try { - user_init_code = indent_user_init_code(user_init_code); - let [js_code, join_map] = parse_to_js(user_query, external_js_template_text, join_tables_registry, user_init_code); - let rbql_worker = null; - if (debug_mode) { - rbql_worker = load_module_from_file(js_code); - } else { - let module = {'exports': {}}; - eval('(function(){' + js_code + '})()'); - rbql_worker = module.exports; - } - rbql_worker.rb_transform(input_iterator, join_map, output_writer, success_handler, error_handler, debug_mode); - } catch (e) { - if (e instanceof RbqlParsingError) { - error_handler('query parsing', e.message); - } else { - if (debug_mode) { - console.log('Unexpected exception, dumping stack trace:'); - console.log(e.stack); - } - error_handler('unexpected', 'Unexpected exception: ' + e); - } - } -} - - -function make_inconsistent_num_fields_warning(table_name, inconsistent_records_info) { - let keys = Object.keys(inconsistent_records_info); - let entries = []; - for (let i = 0; i < keys.length; i++) { - let key = keys[i]; - let record_id = inconsistent_records_info[key]; - entries.push([record_id, key]); - } - entries.sort(function(a, b) { return a[0] - b[0]; }); - assert(entries.length > 1); - let [record_1, num_fields_1] = entries[0]; - let [record_2, num_fields_2] = entries[1]; - let warn_msg = `Number of fields in "${table_name}" table is not consistent: `; - warn_msg += `e.g. record ${record_1} -> ${num_fields_1} fields, record ${record_2} -> ${num_fields_2} fields`; - return warn_msg; -} - - -function TableIterator(input_table) { - this.input_table = input_table; - this.NR = 0; - this.fields_info = new Object(); - this.external_record_callback = null; - this.external_finish_callback = null; - this.finished = false; - - - this.set_record_callback = function(external_record_callback) { - this.external_record_callback = external_record_callback; - }; - - - this.set_finish_callback = function(external_finish_callback) { - this.external_finish_callback = external_finish_callback; - }; - - - this.start = function() { - while (!this.finished) { - let record = this.get_record(); - if (record === null) { - this.finish(); - } else { - this.external_record_callback(record); - } - } - }; - - - this.finish = function() { - if (!this.finished) { - this.finished = true; - this.external_finish_callback(); - } - }; - - - this.get_record = function() { - if (this.NR >= this.input_table.length) - return null; - let record = this.input_table[this.NR]; - this.NR += 1; - let num_fields = record.length; - if (!this.fields_info.hasOwnProperty(num_fields)) - this.fields_info[num_fields] = this.NR; - return record; - }; - - this.get_warnings = function() { - if (Object.keys(this.fields_info).length > 1) - return [make_inconsistent_num_fields_warning('input', this.fields_info)]; - return []; - }; -} - - -function TableWriter(external_table) { - this.table = external_table; - - this.write = function(fields) { - this.table.push(fields); - }; - - this.finish = function(after_finish_callback) { - after_finish_callback(); - }; - - this.get_warnings = function() { - return []; - }; -} - - -function SingleTableRegistry(table, table_id='B') { - this.table = table; - this.table_id = table_id; - - this.get_iterator_by_table_id = function(table_id) { - if (table_id !== this.table_id) { - throw new RbqlIOHandlingError(`Unable to find join table: "${table_id}"`); - } - return new TableIterator(this.table); - }; -} - - -function table_run(user_query, input_table, output_table, success_handler, error_handler, join_table=null, user_init_code='') { - let input_iterator = new TableIterator(input_table); - let output_writer = new TableWriter(output_table); - let join_tables_registry = join_table === null ? null : new SingleTableRegistry(join_table); - generic_run(user_query, input_iterator, output_writer, success_handler, error_handler, join_tables_registry, user_init_code); -} - - -function set_debug_mode() { - debug_mode = true; -} - - -module.exports.version = version; -module.exports.generic_run = generic_run; -module.exports.table_run = table_run; - -module.exports.TableIterator = TableIterator; -module.exports.TableWriter = TableWriter; -module.exports.SingleTableRegistry = SingleTableRegistry; - -module.exports.strip_comments = strip_comments; -module.exports.separate_actions = separate_actions; -module.exports.separate_string_literals_js = separate_string_literals_js; -module.exports.combine_string_literals = combine_string_literals; -module.exports.translate_except_expression = translate_except_expression; -module.exports.parse_join_expression = parse_join_expression; -module.exports.translate_update_expression = translate_update_expression; -module.exports.translate_select_expression_js = translate_select_expression_js; - -module.exports.set_debug_mode = set_debug_mode; diff --git a/rbql-js/proto_engine/template.js b/rbql-js/proto_engine/template.js deleted file mode 100644 index 1ddec64..0000000 --- a/rbql-js/proto_engine/template.js +++ /dev/null @@ -1,744 +0,0 @@ -try { -__RBQLMP__user_init_code -} catch (e) { - throw new Error('Exception while executing user-provided init code: ' + e); -} - - -class RbqlRuntimeError extends Error {} - - -function InternalBadFieldError(idx) { - this.idx = idx; - this.name = 'InternalBadFieldError'; -} - - - -var unfold_list = null; - -var module_was_used_failsafe = false; - -// Aggregators: -var aggregation_stage = 0; -var aggr_init_counter = 0; -var functional_aggregators = []; - -var writer = null; - -var NU = 0; // NU - Num Updated. Alternative variables: NW (Num Where) - Not Practical. NW (Num Written) - Impossible to implement. -var NR = 0; - -var finished_with_error = false; - -var external_success_handler = null; -var external_error_handler = null; - -var external_input_iterator = null; -var external_writer = null; -var external_join_map_impl = null; - -var process_function = null; -var join_map = null; -var node_debug_mode_flag = false; - - -function finish_processing_error(error_type, error_msg) { - if (finished_with_error) - return; - finished_with_error = true; - // Stopping input_iterator to trigger exit procedure. - external_input_iterator.finish(); - external_error_handler(error_type, error_msg); -} - - -function finish_processing_success() { - if (finished_with_error) - return; - try { - writer.finish(() => { - var join_warnings = external_join_map_impl ? external_join_map_impl.get_warnings() : []; - var warnings = join_warnings.concat(external_writer.get_warnings()).concat(external_input_iterator.get_warnings()); - external_success_handler(warnings); - }); - } catch (e) { - if (e instanceof RbqlRuntimeError) { - finish_processing_error('query execution', e.message); - } else { - if (node_debug_mode_flag) { - console.log('Unexpected exception, dumping stack trace:'); - console.log(e.stack); - } - finish_processing_error('unexpected', String(e)); - } - return; - } -} - - -function assert(condition, message) { - if (!condition) { - finish_processing_error('unexpected', message); - } -} - - -function stable_compare(a, b) { - for (var i = 0; i < a.length; i++) { - if (a[i] !== b[i]) - return a[i] < b[i] ? -1 : 1; - } -} - - -function safe_get(record, idx) { - return idx < record.length ? record[idx] : null; -} - - -function safe_join_get(record, idx) { - if (idx < record.length) { - return record[idx]; - } - throw new InternalBadFieldError(idx); -} - - -function safe_set(record, idx, value) { - if (idx - 1 < record.length) { - record[idx - 1] = value; - } else { - throw new InternalBadFieldError(idx - 1); - } -} - - -function Marker(marker_id, value) { - this.marker_id = marker_id; - this.value = value; - this.toString = function() { - throw new RbqlRuntimeError('Unsupported aggregate expression'); - } -} - - -function UnfoldMarker() {} - - -function UNFOLD(vals) { - if (unfold_list !== null) { - // Technically we can support multiple UNFOLD's but the implementation/algorithm is more complex and just doesn't worth it - throw new RbqlRuntimeError('Only one UNFOLD is allowed per query'); - } - unfold_list = vals; - return new UnfoldMarker(); -} - - - -function MinAggregator() { - this.stats = new Map(); - - this.increment = function(key, val) { - // JS version doesn't need "NumHandler" hack like in Python impl because it has only one "number" type, no ints/floats - val = parseFloat(val); - var cur_aggr = this.stats.get(key); - if (cur_aggr === undefined) { - this.stats.set(key, val); - } else { - this.stats.set(key, Math.min(cur_aggr, val)); - } - } - - this.get_final = function(key) { - return this.stats.get(key); - } -} - - - -function MaxAggregator() { - this.stats = new Map(); - - this.increment = function(key, val) { - val = parseFloat(val); - var cur_aggr = this.stats.get(key); - if (cur_aggr === undefined) { - this.stats.set(key, val); - } else { - this.stats.set(key, Math.max(cur_aggr, val)); - } - } - - this.get_final = function(key) { - return this.stats.get(key); - } -} - - -function CountAggregator() { - this.stats = new Map(); - - this.increment = function(key, val) { - var cur_aggr = this.stats.get(key); - if (cur_aggr === undefined) { - this.stats.set(key, 1); - } else { - this.stats.set(key, cur_aggr + 1); - } - } - - this.get_final = function(key) { - return this.stats.get(key); - } -} - - -function SumAggregator() { - this.stats = new Map(); - - this.increment = function(key, val) { - val = parseFloat(val); - var cur_aggr = this.stats.get(key); - if (cur_aggr === undefined) { - this.stats.set(key, val); - } else { - this.stats.set(key, cur_aggr + val); - } - } - - this.get_final = function(key) { - return this.stats.get(key); - } -} - - -function AvgAggregator() { - this.stats = new Map(); - - this.increment = function(key, val) { - val = parseFloat(val); - var cur_aggr = this.stats.get(key); - if (cur_aggr === undefined) { - this.stats.set(key, [val, 1]); - } else { - var cur_sum = cur_aggr[0]; - var cur_cnt = cur_aggr[1]; - this.stats.set(key, [cur_sum + val, cur_cnt + 1]); - } - } - - this.get_final = function(key) { - var cur_aggr = this.stats.get(key); - var cur_sum = cur_aggr[0]; - var cur_cnt = cur_aggr[1]; - var avg = cur_sum / cur_cnt; - return avg; - } -} - - -function VarianceAggregator() { - this.stats = new Map(); - - this.increment = function(key, val) { - val = parseFloat(val); - var cur_aggr = this.stats.get(key); - if (cur_aggr === undefined) { - this.stats.set(key, [val, val * val, 1]); - } else { - var cur_sum = cur_aggr[0]; - var cur_sum_sq = cur_aggr[1]; - var cur_cnt = cur_aggr[2]; - this.stats.set(key, [cur_sum + val, cur_sum_sq + val * val, cur_cnt + 1]); - } - } - - this.get_final = function(key) { - var cur_aggr = this.stats.get(key); - var cur_sum = cur_aggr[0]; - var cur_sum_sq = cur_aggr[1]; - var cur_cnt = cur_aggr[2]; - var avg_val = cur_sum / cur_cnt; - var variance = cur_sum_sq / cur_cnt - avg_val * avg_val; - return variance; - } -} - - -function MedianAggregator() { - this.stats = new Map(); - - this.increment = function(key, val) { - val = parseFloat(val); - var cur_aggr = this.stats.get(key); - if (cur_aggr === undefined) { - this.stats.set(key, [val]); - } else { - cur_aggr.push(val); - } - } - - this.get_final = function(key) { - var cur_aggr = this.stats.get(key); - cur_aggr.sort(function(a, b) { return a - b; }); - var m = Math.floor(cur_aggr.length / 2); - if (cur_aggr.length % 2) { - return cur_aggr[m]; - } else { - return (cur_aggr[m - 1] + cur_aggr[m]) / 2.0; - } - } -} - - -function FoldAggregator(post_proc) { - this.post_proc = post_proc; - this.stats = new Map(); - - this.increment = function(key, val) { - let cur_aggr = this.stats.get(key); - if (cur_aggr === undefined) { - this.stats.set(key, [val]); - } else { - cur_aggr.push(val); - } - } - - this.get_final = function(key) { - let cur_aggr = this.stats.get(key); - return this.post_proc(cur_aggr); - } -} - - -function SubkeyChecker() { - this.subkeys = new Map(); - - this.increment = function(key, subkey) { - var old_subkey = this.subkeys.get(key); - if (old_subkey === undefined) { - this.subkeys.set(key, subkey); - } else if (old_subkey != subkey) { - throw 'Unable to group by "' + key + '", different values in output: "' + old_subkey + '" and "' + subkey + '"'; - } - } - - this.get_final = function(key) { - return this.subkeys.get(key); - } -} - - -function init_aggregator(generator_name, val, post_proc=null) { - aggregation_stage = 1; - assert(aggr_init_counter == functional_aggregators.length, 'Unable to process aggregation expression'); - if (post_proc === null) { - functional_aggregators.push(new generator_name()); - } else { - functional_aggregators.push(new generator_name(post_proc)); - } - var res = new Marker(aggr_init_counter, val); - aggr_init_counter += 1; - return res; -} - - -function MIN(val) { - return aggregation_stage < 2 ? init_aggregator(MinAggregator, val) : val; -} - - -function MAX(val) { - return aggregation_stage < 2 ? init_aggregator(MaxAggregator, val) : val; -} - -function COUNT(val) { - return aggregation_stage < 2 ? init_aggregator(CountAggregator, 1) : 1; -} - -function SUM(val) { - return aggregation_stage < 2 ? init_aggregator(SumAggregator, val) : val; -} - -function AVG(val) { - return aggregation_stage < 2 ? init_aggregator(AvgAggregator, val) : val; -} - -function VARIANCE(val) { - return aggregation_stage < 2 ? init_aggregator(VarianceAggregator, val) : val; -} - -function MEDIAN(val) { - return aggregation_stage < 2 ? init_aggregator(MedianAggregator, val) : val; -} - -function FOLD(val, post_proc = v => v.join('|')) { - return aggregation_stage < 2 ? init_aggregator(FoldAggregator, val, post_proc) : val; -} - - -function add_to_set(dst_set, value) { - var len_before = dst_set.size; - dst_set.add(value); - return len_before != dst_set.size; -} - - -function TopWriter(subwriter) { - this.subwriter = subwriter; - this.NW = 0; - - this.write = function(record) { - if (__RBQLMP__top_count !== null && this.NW >= __RBQLMP__top_count) - return false; - this.subwriter.write(record); - this.NW += 1; - return true; - } - - this.finish = function(after_finish_callback) { - this.subwriter.finish(after_finish_callback); - } -} - - -function UniqWriter(subwriter) { - this.subwriter = subwriter; - this.seen = new Set(); - - this.write = function(record) { - if (!add_to_set(this.seen, JSON.stringify(record))) - return true; - if (!this.subwriter.write(record)) - return false; - return true; - } - - this.finish = function(after_finish_callback) { - this.subwriter.finish(after_finish_callback); - } -} - - -function UniqCountWriter(subwriter) { - this.subwriter = subwriter; - this.records = new Map(); - - this.write = function(record) { - var key = JSON.stringify(record); - var old_val = this.records.get(key); - if (old_val) { - old_val[0] += 1; - } else { - this.records.set(key, [1, record]); - } - return true; - } - - this.finish = function(after_finish_callback) { - for (var [key, value] of this.records) { - let [count, record] = value; - record.unshift(count); - if (!this.subwriter.write(record)) - break; - } - this.subwriter.finish(after_finish_callback); - } -} - - -function SortedWriter(subwriter) { - this.subwriter = subwriter; - this.unsorted_entries = []; - - this.write = function(stable_entry) { - this.unsorted_entries.push(stable_entry); - return true; - } - - this.finish = function(after_finish_callback) { - var unsorted_entries = this.unsorted_entries; - unsorted_entries.sort(stable_compare); - if (__RBQLMP__reverse_flag) - unsorted_entries.reverse(); - for (var i = 0; i < unsorted_entries.length; i++) { - var entry = unsorted_entries[i]; - if (!this.subwriter.write(entry[entry.length - 1])) - break; - } - this.subwriter.finish(after_finish_callback); - } -} - - -function AggregateWriter(subwriter) { - this.subwriter = subwriter; - this.aggregators = []; - this.aggregation_keys = new Set(); - - this.finish = function(after_finish_callback) { - var all_keys = Array.from(this.aggregation_keys); - all_keys.sort(); - for (var i = 0; i < all_keys.length; i++) { - var key = all_keys[i]; - var out_fields = []; - for (var ag of this.aggregators) { - out_fields.push(ag.get_final(key)); - } - if (!this.subwriter.write(out_fields)) - break; - } - this.subwriter.finish(after_finish_callback); - } -} - - - -function FakeJoiner(join_map) { - this.get_rhs = function(lhs_key) { - return [null]; - } -} - - -function InnerJoiner(join_map) { - this.join_map = join_map; - - this.get_rhs = function(lhs_key) { - return this.join_map.get_join_records(lhs_key); - } -} - - -function LeftJoiner(join_map) { - this.join_map = join_map; - this.null_record = [Array(join_map.max_record_len).fill(null)]; - - this.get_rhs = function(lhs_key) { - let result = this.join_map.get_join_records(lhs_key); - if (result.length == 0) { - return this.null_record; - } - return result; - } -} - - -function StrictLeftJoiner(join_map) { - this.join_map = join_map; - - this.get_rhs = function(lhs_key) { - let result = this.join_map.get_join_records(lhs_key); - if (result.length != 1) { - throw new RbqlRuntimeError('In "STRICT LEFT JOIN" each key in A must have exactly one match in B. Bad A key: "' + lhs_key + '"'); - } - return result; - } -} - - -function select_except(src, except_fields) { - let result = []; - for (let i = 0; i < src.length; i++) { - if (except_fields.indexOf(i) == -1) - result.push(src[i]); - } - return result; -} - - -function process_update(NF, afields, rhs_records) { - if (rhs_records.length > 1) - throw new RbqlRuntimeError('More than one record in UPDATE query matched A-key in join table B'); - var bfields = null; - if (rhs_records.length == 1) - bfields = rhs_records[0]; - var up_fields = afields; - __RBQLMP__init_column_vars_select - if (rhs_records.length == 1 && (__RBQLMP__where_expression)) { - NU += 1; - __RBQLMP__update_statements - } - return writer.write(up_fields); -} - - -function select_simple(sort_key, out_fields) { - if (__RBQLMP__sort_flag) { - var sort_entry = sort_key.concat([NR, out_fields]); - if (!writer.write(sort_entry)) - return false; - } else { - if (!writer.write(out_fields)) - return false; - } - return true; -} - - -function select_aggregated(key, transparent_values) { - if (key !== null) { - key = JSON.stringify(key); - } - if (aggregation_stage === 1) { - if (!(writer instanceof TopWriter)) { - throw new RbqlRuntimeError('Unable to use "ORDER BY" or "DISTINCT" keywords in aggregate query'); - } - writer = new AggregateWriter(writer); - for (var i = 0; i < transparent_values.length; i++) { - var trans_value = transparent_values[i]; - if (trans_value instanceof Marker) { - writer.aggregators.push(functional_aggregators[trans_value.marker_id]); - writer.aggregators[writer.aggregators.length - 1].increment(key, trans_value.value); - } else { - writer.aggregators.push(new SubkeyChecker()); - writer.aggregators[writer.aggregators.length - 1].increment(key, trans_value); - } - } - aggregation_stage = 2; - } else { - for (var i = 0; i < transparent_values.length; i++) { - var trans_value = transparent_values[i]; - writer.aggregators[i].increment(key, trans_value); - } - } - writer.aggregation_keys.add(key) -} - - -function select_unfolded(sort_key, folded_fields) { - let out_fields = folded_fields.slice(); - let unfold_pos = folded_fields.findIndex(val => val instanceof UnfoldMarker); - for (var i = 0; i < unfold_list.length; i++) { - out_fields[unfold_pos] = unfold_list[i]; - if (!select_simple(sort_key, out_fields.slice())) - return false; - } - return true; -} - - -function process_select(NF, afields, rhs_records) { - for (var i = 0; i < rhs_records.length; i++) { - unfold_list = null; - var bfields = rhs_records[i]; - var star_fields = afields; - if (bfields != null) - star_fields = afields.concat(bfields); - __RBQLMP__init_column_vars_update - if (!(__RBQLMP__where_expression)) - continue; - // TODO wrap all user expression in try/catch block to improve error reporting - var out_fields = __RBQLMP__select_expression; - if (aggregation_stage > 0) { - var key = __RBQLMP__aggregation_key_expression; - select_aggregated(key, out_fields); - } else { - var sort_key = [__RBQLMP__sort_key_expression]; - if (unfold_list !== null) { - if (!select_unfolded(sort_key, out_fields)) - return false; - } else { - if (!select_simple(sort_key, out_fields)) - return false; - } - } - } - return true; -} - - -function process_record(record) { - NR += 1; - if (finished_with_error) - return; - try { - do_process_record(record); - } catch (e) { - if (e instanceof InternalBadFieldError) { - finish_processing_error('query execution', 'No "a' + (e.idx + 1) + '" column at record: ' + NR); - } else if (e instanceof RbqlRuntimeError) { - finish_processing_error('query execution', e.message); - } else { - if (node_debug_mode_flag) { - console.log('Unexpected exception, dumping stack trace:'); - console.log(e.stack); - } - finish_processing_error('unexpected', `At record: ${NR}, Details: ${String(e)}`); - } - } -} - - -function do_process_record(afields) { - let rhs_records = join_map.get_rhs(__RBQLMP__lhs_join_var); - let NF = afields.length; - if (!process_function(NF, afields, rhs_records)) { - external_input_iterator.finish(); - return; - } -} - - -function do_rb_transform(input_iterator, output_writer) { - process_function = __RBQLMP__is_select_query ? process_select : process_update; - var sql_join_type = {'VOID': FakeJoiner, 'JOIN': InnerJoiner, 'INNER JOIN': InnerJoiner, 'LEFT JOIN': LeftJoiner, 'STRICT LEFT JOIN': StrictLeftJoiner}['__RBQLMP__join_operation']; - - join_map = new sql_join_type(external_join_map_impl); - - writer = new TopWriter(output_writer); - - if ('__RBQLMP__writer_type' == 'uniq') { - writer = new UniqWriter(writer); - } else if ('__RBQLMP__writer_type' == 'uniq_count') { - writer = new UniqCountWriter(writer); - } - - if (__RBQLMP__sort_flag) - writer = new SortedWriter(writer); - - input_iterator.set_record_callback(process_record); - input_iterator.start(); -} - - -function rb_transform(input_iterator, join_map_impl, output_writer, external_success_cb, external_error_cb, node_debug_mode=false) { - node_debug_mode_flag = node_debug_mode; - external_success_handler = external_success_cb; - external_error_handler = external_error_cb; - external_input_iterator = input_iterator; - external_writer = output_writer; - external_join_map_impl = join_map_impl; - - input_iterator.set_finish_callback(finish_processing_success); - - if (module_was_used_failsafe) { - finish_processing_error('unexpected', 'Module can only be used once'); - return; - } - module_was_used_failsafe = true; - - try { - if (external_join_map_impl !== null) { - external_join_map_impl.build(function() { do_rb_transform(input_iterator, output_writer); }, finish_processing_error); - } else { - do_rb_transform(input_iterator, output_writer); - } - - } catch (e) { - if (e instanceof RbqlRuntimeError) { - finish_processing_error('query execution', e.message); - } else { - if (node_debug_mode_flag) { - console.log('Unexpected exception, dumping stack trace:'); - console.log(e.stack); - } - finish_processing_error('unexpected', String(e)); - } - } -} - - -module.exports.rb_transform = rb_transform; diff --git a/rbql-js/rbql.js b/rbql-js/rbql.js index 210758e..fcf4cdf 100644 --- a/rbql-js/rbql.js +++ b/rbql-js/rbql.js @@ -1,13 +1,10 @@ // DO NOT EDIT! // This file was autogenerated from builder.js and template.js using build_engine.js script -const external_js_template_text = `try { -__RBQLMP__user_init_code -} catch (e) { - throw new Error('Exception while executing user-provided init code: ' + e); -} +const external_js_template_text = `__RBQLMP__user_init_code +class RbqlParsingError extends Error {} class RbqlRuntimeError extends Error {} @@ -18,13 +15,12 @@ function InternalBadFieldError(idx) { -var unfold_list = null; +var unnest_list = null; var module_was_used_failsafe = false; // Aggregators: var aggregation_stage = 0; -var aggr_init_counter = 0; var functional_aggregators = []; var writer = null; @@ -41,10 +37,11 @@ var external_input_iterator = null; var external_writer = null; var external_join_map_impl = null; -var process_function = null; +var polymorphic_process = null; var join_map = null; var node_debug_mode_flag = false; +const wrong_aggregation_usage_error = 'Usage of RBQL aggregation functions inside JavaScript expressions is not allowed, see the docs'; function finish_processing_error(error_type, error_msg) { if (finished_with_error) @@ -117,35 +114,48 @@ function safe_set(record, idx, value) { } -function Marker(marker_id, value) { +function RBQLAggregationToken(marker_id, value) { this.marker_id = marker_id; this.value = value; this.toString = function() { - throw new RbqlRuntimeError('Unsupported aggregate expression'); + throw new RbqlParsingError(wrong_aggregation_usage_error); } } -function UnfoldMarker() {} +function UnnestMarker() {} -function UNFOLD(vals) { - if (unfold_list !== null) { - // Technically we can support multiple UNFOLD's but the implementation/algorithm is more complex and just doesn't worth it - throw new RbqlRuntimeError('Only one UNFOLD is allowed per query'); +function UNNEST(vals) { + if (unnest_list !== null) { + // Technically we can support multiple UNNEST's but the implementation/algorithm is more complex and just doesn't worth it + throw new RbqlParsingError('Only one UNNEST is allowed per query'); } - unfold_list = vals; - return new UnfoldMarker(); + unnest_list = vals; + return new UnnestMarker(); } +const unnest = UNNEST; +const Unnest = UNNEST; +const UNFOLD = UNNEST; // "UNFOLD" is deprecated, just for backward compatibility + +function parse_number(val) { + // We can do a more pedantic number test like \`/^ *-{0,1}[0-9]+\\.{0,1}[0-9]* *$/.test(val)\`, but user will probably use just Number(val) or parseInt/parseFloat + let result = Number(val); + if (isNaN(result)) { + throw new RbqlRuntimeError(\`Unable to convert value "\${val}" to number. MIN, MAX, SUM, AVG, MEDIAN and VARIANCE aggregate functions convert their string arguments to numeric values\`); + } + return result; +} + + function MinAggregator() { this.stats = new Map(); this.increment = function(key, val) { - // JS version doesn't need "NumHandler" hack like in Python impl because it has only one "number" type, no ints/floats - val = parseFloat(val); + val = parse_number(val); var cur_aggr = this.stats.get(key); if (cur_aggr === undefined) { this.stats.set(key, val); @@ -165,7 +175,7 @@ function MaxAggregator() { this.stats = new Map(); this.increment = function(key, val) { - val = parseFloat(val); + val = parse_number(val); var cur_aggr = this.stats.get(key); if (cur_aggr === undefined) { this.stats.set(key, val); @@ -180,29 +190,11 @@ function MaxAggregator() { } -function CountAggregator() { - this.stats = new Map(); - - this.increment = function(key, val) { - var cur_aggr = this.stats.get(key); - if (cur_aggr === undefined) { - this.stats.set(key, 1); - } else { - this.stats.set(key, cur_aggr + 1); - } - } - - this.get_final = function(key) { - return this.stats.get(key); - } -} - - function SumAggregator() { this.stats = new Map(); this.increment = function(key, val) { - val = parseFloat(val); + val = parse_number(val); var cur_aggr = this.stats.get(key); if (cur_aggr === undefined) { this.stats.set(key, val); @@ -221,7 +213,7 @@ function AvgAggregator() { this.stats = new Map(); this.increment = function(key, val) { - val = parseFloat(val); + val = parse_number(val); var cur_aggr = this.stats.get(key); if (cur_aggr === undefined) { this.stats.set(key, [val, 1]); @@ -246,7 +238,7 @@ function VarianceAggregator() { this.stats = new Map(); this.increment = function(key, val) { - val = parseFloat(val); + val = parse_number(val); var cur_aggr = this.stats.get(key); if (cur_aggr === undefined) { this.stats.set(key, [val, val * val, 1]); @@ -274,7 +266,7 @@ function MedianAggregator() { this.stats = new Map(); this.increment = function(key, val) { - val = parseFloat(val); + val = parse_number(val); var cur_aggr = this.stats.get(key); if (cur_aggr === undefined) { this.stats.set(key, [val]); @@ -296,7 +288,25 @@ function MedianAggregator() { } -function FoldAggregator(post_proc) { +function CountAggregator() { + this.stats = new Map(); + + this.increment = function(key, val) { + var cur_aggr = this.stats.get(key); + if (cur_aggr === undefined) { + this.stats.set(key, 1); + } else { + this.stats.set(key, cur_aggr + 1); + } + } + + this.get_final = function(key) { + return this.stats.get(key); + } +} + + +function ArrayAggAggregator(post_proc) { this.post_proc = post_proc; this.stats = new Map(); @@ -316,34 +326,33 @@ function FoldAggregator(post_proc) { } -function SubkeyChecker() { - this.subkeys = new Map(); +function ConstGroupVerifier(output_index) { + this.output_index = output_index; + this.const_values = new Map(); - this.increment = function(key, subkey) { - var old_subkey = this.subkeys.get(key); - if (old_subkey === undefined) { - this.subkeys.set(key, subkey); - } else if (old_subkey != subkey) { - throw 'Unable to group by "' + key + '", different values in output: "' + old_subkey + '" and "' + subkey + '"'; + this.increment = function(key, value) { + var old_value = this.const_values.get(key); + if (old_value === undefined) { + this.const_values.set(key, value); + } else if (old_value != value) { + throw new RbqlRuntimeError(\`Invalid aggregate expression: non-constant values in output column \${this.output_index + 1}. E.g. "\${old_value}" and "\${value}"\`); } } this.get_final = function(key) { - return this.subkeys.get(key); + return this.const_values.get(key); } } function init_aggregator(generator_name, val, post_proc=null) { aggregation_stage = 1; - assert(aggr_init_counter == functional_aggregators.length, 'Unable to process aggregation expression'); + var res = new RBQLAggregationToken(functional_aggregators.length, val); if (post_proc === null) { functional_aggregators.push(new generator_name()); } else { functional_aggregators.push(new generator_name(post_proc)); } - var res = new Marker(aggr_init_counter, val); - aggr_init_counter += 1; return res; } @@ -351,35 +360,51 @@ function init_aggregator(generator_name, val, post_proc=null) { function MIN(val) { return aggregation_stage < 2 ? init_aggregator(MinAggregator, val) : val; } +const min = MIN; +const Min = MIN; function MAX(val) { return aggregation_stage < 2 ? init_aggregator(MaxAggregator, val) : val; } +const max = MAX; +const Max = MAX; function COUNT(val) { return aggregation_stage < 2 ? init_aggregator(CountAggregator, 1) : 1; } +const count = COUNT; +const Count = COUNT; function SUM(val) { return aggregation_stage < 2 ? init_aggregator(SumAggregator, val) : val; } +const sum = SUM; +const Sum = SUM; function AVG(val) { return aggregation_stage < 2 ? init_aggregator(AvgAggregator, val) : val; } +const avg = AVG; +const Avg = AVG; function VARIANCE(val) { return aggregation_stage < 2 ? init_aggregator(VarianceAggregator, val) : val; } +const variance = VARIANCE; +const Variance = VARIANCE; function MEDIAN(val) { return aggregation_stage < 2 ? init_aggregator(MedianAggregator, val) : val; } +const median = MEDIAN; +const Median = MEDIAN; -function FOLD(val, post_proc = v => v.join('|')) { - return aggregation_stage < 2 ? init_aggregator(FoldAggregator, val, post_proc) : val; +function ARRAY_AGG(val, post_proc = v => v.join('|')) { + return aggregation_stage < 2 ? init_aggregator(ArrayAggAggregator, val, post_proc) : val; } +const array_agg = ARRAY_AGG; +const FOLD = ARRAY_AGG; // "FOLD" is deprecated, just for backward compatibility function add_to_set(dst_set, value) { @@ -587,19 +612,24 @@ function select_aggregated(key, transparent_values) { } if (aggregation_stage === 1) { if (!(writer instanceof TopWriter)) { - throw new RbqlRuntimeError('Unable to use "ORDER BY" or "DISTINCT" keywords in aggregate query'); + throw new RbqlParsingError('Unable to use "ORDER BY" or "DISTINCT" keywords in aggregate query'); } writer = new AggregateWriter(writer); + let num_aggregators_found = 0; for (var i = 0; i < transparent_values.length; i++) { var trans_value = transparent_values[i]; - if (trans_value instanceof Marker) { + if (trans_value instanceof RBQLAggregationToken) { writer.aggregators.push(functional_aggregators[trans_value.marker_id]); writer.aggregators[writer.aggregators.length - 1].increment(key, trans_value.value); + num_aggregators_found += 1; } else { - writer.aggregators.push(new SubkeyChecker()); + writer.aggregators.push(new ConstGroupVerifier(writer.aggregators.length)); writer.aggregators[writer.aggregators.length - 1].increment(key, trans_value); } } + if (num_aggregators_found != functional_aggregators.length) { + throw new RbqlParsingError(wrong_aggregation_usage_error); + } aggregation_stage = 2; } else { for (var i = 0; i < transparent_values.length; i++) { @@ -611,11 +641,11 @@ function select_aggregated(key, transparent_values) { } -function select_unfolded(sort_key, folded_fields) { +function select_unnested(sort_key, folded_fields) { let out_fields = folded_fields.slice(); - let unfold_pos = folded_fields.findIndex(val => val instanceof UnfoldMarker); - for (var i = 0; i < unfold_list.length; i++) { - out_fields[unfold_pos] = unfold_list[i]; + let unnest_pos = folded_fields.findIndex(val => val instanceof UnnestMarker); + for (var i = 0; i < unnest_list.length; i++) { + out_fields[unnest_pos] = unnest_list[i]; if (!select_simple(sort_key, out_fields.slice())) return false; } @@ -625,7 +655,7 @@ function select_unfolded(sort_key, folded_fields) { function process_select(NF, afields, rhs_records) { for (var i = 0; i < rhs_records.length; i++) { - unfold_list = null; + unnest_list = null; var bfields = rhs_records[i]; var star_fields = afields; if (bfields != null) @@ -640,8 +670,8 @@ function process_select(NF, afields, rhs_records) { select_aggregated(key, out_fields); } else { var sort_key = [__RBQLMP__sort_key_expression]; - if (unfold_list !== null) { - if (!select_unfolded(sort_key, out_fields)) + if (unnest_list !== null) { + if (!select_unnested(sort_key, out_fields)) return false; } else { if (!select_simple(sort_key, out_fields)) @@ -664,12 +694,14 @@ function process_record(record) { finish_processing_error('query execution', 'No "a' + (e.idx + 1) + '" column at record: ' + NR); } else if (e instanceof RbqlRuntimeError) { finish_processing_error('query execution', e.message); + } else if (e instanceof RbqlParsingError) { + finish_processing_error('query parsing', e.message); } else { if (node_debug_mode_flag) { console.log('Unexpected exception, dumping stack trace:'); console.log(e.stack); } - finish_processing_error('unexpected', \`At record: \${NR}, Details: \${String(e)}\`); + finish_processing_error('query execution', \`At record: \${NR}, Details: \${String(e)}\`); } } } @@ -678,7 +710,7 @@ function process_record(record) { function do_process_record(afields) { let rhs_records = join_map.get_rhs(__RBQLMP__lhs_join_var); let NF = afields.length; - if (!process_function(NF, afields, rhs_records)) { + if (!polymorphic_process(NF, afields, rhs_records)) { external_input_iterator.finish(); return; } @@ -686,7 +718,7 @@ function do_process_record(afields) { function do_rb_transform(input_iterator, output_writer) { - process_function = __RBQLMP__is_select_query ? process_select : process_update; + polymorphic_process = __RBQLMP__is_select_query ? process_select : process_update; var sql_join_type = {'VOID': FakeJoiner, 'JOIN': InnerJoiner, 'INNER JOIN': InnerJoiner, 'LEFT JOIN': LeftJoiner, 'STRICT LEFT JOIN': StrictLeftJoiner}['__RBQLMP__join_operation']; join_map = new sql_join_type(external_join_map_impl); @@ -733,6 +765,8 @@ function rb_transform(input_iterator, join_map_impl, output_writer, external_suc } catch (e) { if (e instanceof RbqlRuntimeError) { finish_processing_error('query execution', e.message); + } else if (e instanceof RbqlParsingError) { + finish_processing_error('query parsing', e.message); } else { if (node_debug_mode_flag) { console.log('Unexpected exception, dumping stack trace:'); @@ -761,7 +795,7 @@ module.exports.rb_transform = rb_transform; // TODO replace prototypes with classes: this improves readability -const version = '0.8.0'; +const version = '0.9.0'; const GROUP_BY = 'GROUP BY'; const UPDATE = 'UPDATE'; @@ -875,7 +909,7 @@ function generate_init_statements(column_vars, indent) { function replace_star_count(aggregate_expression) { - var rgx = /(^|,) *COUNT\( *\* *\) *(?:$|(?=,))/g; + var rgx = /(^|,) *COUNT\( *\* *\) *(?:$|(?=,))/ig; var result = aggregate_expression.replace(rgx, '$1 COUNT(1)'); return str_strip(result); } @@ -945,7 +979,7 @@ function separate_string_literals_js(rbql_expression) { function combine_string_literals(backend_expression, string_literals) { - for (var i = 0; i < string_literals.length; i++) { + for (var i = string_literals.length - 1; i >= 0; i--) { backend_expression = replace_all(backend_expression, `###RBQL_STRING_LITERAL###${i}`, string_literals[i]); } return backend_expression; diff --git a/rbql-js/rbql_csv.js b/rbql-js/rbql_csv.js index 7c1b7ab..07b0165 100644 --- a/rbql-js/rbql_csv.js +++ b/rbql-js/rbql_csv.js @@ -433,6 +433,10 @@ function csv_run(user_query, input_path, input_delim, input_policy, output_path, error_handler('IO handling', 'To use non-ascii characters in query enable UTF-8 encoding instead of latin-1/binary'); return; } + if ((!is_ascii(input_delim) || !is_ascii(output_delim)) && csv_encoding == 'binary') { + error_handler('IO handling', 'To use non-ascii characters in query enable UTF-8 encoding instead of latin-1/binary'); + return; + } let default_init_source_path = path.join(os.homedir(), '.rbql_init_source.js'); if (user_init_code == '' && fs.existsSync(default_init_source_path)) { diff --git a/rbql-js/web_rbql.js b/rbql-js/web_rbql.js deleted file mode 100644 index e54ed9c..0000000 --- a/rbql-js/web_rbql.js +++ /dev/null @@ -1,1446 +0,0 @@ -// DO NOT EDIT! -// This file was autogenerated from builder.js and template.js using build_engine.js script - -let rbql = null; -( function() { -let module = {'exports': {}}; -rbql = module.exports; -const external_js_template_text = `try { -__RBQLMP__user_init_code -} catch (e) { - throw new Error('Exception while executing user-provided init code: ' + e); -} - - -class RbqlRuntimeError extends Error {} - - -function InternalBadFieldError(idx) { - this.idx = idx; - this.name = 'InternalBadFieldError'; -} - - - -var unfold_list = null; - -var module_was_used_failsafe = false; - -// Aggregators: -var aggregation_stage = 0; -var aggr_init_counter = 0; -var functional_aggregators = []; - -var writer = null; - -var NU = 0; // NU - Num Updated. Alternative variables: NW (Num Where) - Not Practical. NW (Num Written) - Impossible to implement. -var NR = 0; - -var finished_with_error = false; - -var external_success_handler = null; -var external_error_handler = null; - -var external_input_iterator = null; -var external_writer = null; -var external_join_map_impl = null; - -var process_function = null; -var join_map = null; -var node_debug_mode_flag = false; - - -function finish_processing_error(error_type, error_msg) { - if (finished_with_error) - return; - finished_with_error = true; - // Stopping input_iterator to trigger exit procedure. - external_input_iterator.finish(); - external_error_handler(error_type, error_msg); -} - - -function finish_processing_success() { - if (finished_with_error) - return; - try { - writer.finish(() => { - var join_warnings = external_join_map_impl ? external_join_map_impl.get_warnings() : []; - var warnings = join_warnings.concat(external_writer.get_warnings()).concat(external_input_iterator.get_warnings()); - external_success_handler(warnings); - }); - } catch (e) { - if (e instanceof RbqlRuntimeError) { - finish_processing_error('query execution', e.message); - } else { - if (node_debug_mode_flag) { - console.log('Unexpected exception, dumping stack trace:'); - console.log(e.stack); - } - finish_processing_error('unexpected', String(e)); - } - return; - } -} - - -function assert(condition, message) { - if (!condition) { - finish_processing_error('unexpected', message); - } -} - - -function stable_compare(a, b) { - for (var i = 0; i < a.length; i++) { - if (a[i] !== b[i]) - return a[i] < b[i] ? -1 : 1; - } -} - - -function safe_get(record, idx) { - return idx < record.length ? record[idx] : null; -} - - -function safe_join_get(record, idx) { - if (idx < record.length) { - return record[idx]; - } - throw new InternalBadFieldError(idx); -} - - -function safe_set(record, idx, value) { - if (idx - 1 < record.length) { - record[idx - 1] = value; - } else { - throw new InternalBadFieldError(idx - 1); - } -} - - -function Marker(marker_id, value) { - this.marker_id = marker_id; - this.value = value; - this.toString = function() { - throw new RbqlRuntimeError('Unsupported aggregate expression'); - } -} - - -function UnfoldMarker() {} - - -function UNFOLD(vals) { - if (unfold_list !== null) { - // Technically we can support multiple UNFOLD's but the implementation/algorithm is more complex and just doesn't worth it - throw new RbqlRuntimeError('Only one UNFOLD is allowed per query'); - } - unfold_list = vals; - return new UnfoldMarker(); -} - - - -function MinAggregator() { - this.stats = new Map(); - - this.increment = function(key, val) { - // JS version doesn't need "NumHandler" hack like in Python impl because it has only one "number" type, no ints/floats - val = parseFloat(val); - var cur_aggr = this.stats.get(key); - if (cur_aggr === undefined) { - this.stats.set(key, val); - } else { - this.stats.set(key, Math.min(cur_aggr, val)); - } - } - - this.get_final = function(key) { - return this.stats.get(key); - } -} - - - -function MaxAggregator() { - this.stats = new Map(); - - this.increment = function(key, val) { - val = parseFloat(val); - var cur_aggr = this.stats.get(key); - if (cur_aggr === undefined) { - this.stats.set(key, val); - } else { - this.stats.set(key, Math.max(cur_aggr, val)); - } - } - - this.get_final = function(key) { - return this.stats.get(key); - } -} - - -function CountAggregator() { - this.stats = new Map(); - - this.increment = function(key, val) { - var cur_aggr = this.stats.get(key); - if (cur_aggr === undefined) { - this.stats.set(key, 1); - } else { - this.stats.set(key, cur_aggr + 1); - } - } - - this.get_final = function(key) { - return this.stats.get(key); - } -} - - -function SumAggregator() { - this.stats = new Map(); - - this.increment = function(key, val) { - val = parseFloat(val); - var cur_aggr = this.stats.get(key); - if (cur_aggr === undefined) { - this.stats.set(key, val); - } else { - this.stats.set(key, cur_aggr + val); - } - } - - this.get_final = function(key) { - return this.stats.get(key); - } -} - - -function AvgAggregator() { - this.stats = new Map(); - - this.increment = function(key, val) { - val = parseFloat(val); - var cur_aggr = this.stats.get(key); - if (cur_aggr === undefined) { - this.stats.set(key, [val, 1]); - } else { - var cur_sum = cur_aggr[0]; - var cur_cnt = cur_aggr[1]; - this.stats.set(key, [cur_sum + val, cur_cnt + 1]); - } - } - - this.get_final = function(key) { - var cur_aggr = this.stats.get(key); - var cur_sum = cur_aggr[0]; - var cur_cnt = cur_aggr[1]; - var avg = cur_sum / cur_cnt; - return avg; - } -} - - -function VarianceAggregator() { - this.stats = new Map(); - - this.increment = function(key, val) { - val = parseFloat(val); - var cur_aggr = this.stats.get(key); - if (cur_aggr === undefined) { - this.stats.set(key, [val, val * val, 1]); - } else { - var cur_sum = cur_aggr[0]; - var cur_sum_sq = cur_aggr[1]; - var cur_cnt = cur_aggr[2]; - this.stats.set(key, [cur_sum + val, cur_sum_sq + val * val, cur_cnt + 1]); - } - } - - this.get_final = function(key) { - var cur_aggr = this.stats.get(key); - var cur_sum = cur_aggr[0]; - var cur_sum_sq = cur_aggr[1]; - var cur_cnt = cur_aggr[2]; - var avg_val = cur_sum / cur_cnt; - var variance = cur_sum_sq / cur_cnt - avg_val * avg_val; - return variance; - } -} - - -function MedianAggregator() { - this.stats = new Map(); - - this.increment = function(key, val) { - val = parseFloat(val); - var cur_aggr = this.stats.get(key); - if (cur_aggr === undefined) { - this.stats.set(key, [val]); - } else { - cur_aggr.push(val); - } - } - - this.get_final = function(key) { - var cur_aggr = this.stats.get(key); - cur_aggr.sort(function(a, b) { return a - b; }); - var m = Math.floor(cur_aggr.length / 2); - if (cur_aggr.length % 2) { - return cur_aggr[m]; - } else { - return (cur_aggr[m - 1] + cur_aggr[m]) / 2.0; - } - } -} - - -function FoldAggregator(post_proc) { - this.post_proc = post_proc; - this.stats = new Map(); - - this.increment = function(key, val) { - let cur_aggr = this.stats.get(key); - if (cur_aggr === undefined) { - this.stats.set(key, [val]); - } else { - cur_aggr.push(val); - } - } - - this.get_final = function(key) { - let cur_aggr = this.stats.get(key); - return this.post_proc(cur_aggr); - } -} - - -function SubkeyChecker() { - this.subkeys = new Map(); - - this.increment = function(key, subkey) { - var old_subkey = this.subkeys.get(key); - if (old_subkey === undefined) { - this.subkeys.set(key, subkey); - } else if (old_subkey != subkey) { - throw 'Unable to group by "' + key + '", different values in output: "' + old_subkey + '" and "' + subkey + '"'; - } - } - - this.get_final = function(key) { - return this.subkeys.get(key); - } -} - - -function init_aggregator(generator_name, val, post_proc=null) { - aggregation_stage = 1; - assert(aggr_init_counter == functional_aggregators.length, 'Unable to process aggregation expression'); - if (post_proc === null) { - functional_aggregators.push(new generator_name()); - } else { - functional_aggregators.push(new generator_name(post_proc)); - } - var res = new Marker(aggr_init_counter, val); - aggr_init_counter += 1; - return res; -} - - -function MIN(val) { - return aggregation_stage < 2 ? init_aggregator(MinAggregator, val) : val; -} - - -function MAX(val) { - return aggregation_stage < 2 ? init_aggregator(MaxAggregator, val) : val; -} - -function COUNT(val) { - return aggregation_stage < 2 ? init_aggregator(CountAggregator, 1) : 1; -} - -function SUM(val) { - return aggregation_stage < 2 ? init_aggregator(SumAggregator, val) : val; -} - -function AVG(val) { - return aggregation_stage < 2 ? init_aggregator(AvgAggregator, val) : val; -} - -function VARIANCE(val) { - return aggregation_stage < 2 ? init_aggregator(VarianceAggregator, val) : val; -} - -function MEDIAN(val) { - return aggregation_stage < 2 ? init_aggregator(MedianAggregator, val) : val; -} - -function FOLD(val, post_proc = v => v.join('|')) { - return aggregation_stage < 2 ? init_aggregator(FoldAggregator, val, post_proc) : val; -} - - -function add_to_set(dst_set, value) { - var len_before = dst_set.size; - dst_set.add(value); - return len_before != dst_set.size; -} - - -function TopWriter(subwriter) { - this.subwriter = subwriter; - this.NW = 0; - - this.write = function(record) { - if (__RBQLMP__top_count !== null && this.NW >= __RBQLMP__top_count) - return false; - this.subwriter.write(record); - this.NW += 1; - return true; - } - - this.finish = function(after_finish_callback) { - this.subwriter.finish(after_finish_callback); - } -} - - -function UniqWriter(subwriter) { - this.subwriter = subwriter; - this.seen = new Set(); - - this.write = function(record) { - if (!add_to_set(this.seen, JSON.stringify(record))) - return true; - if (!this.subwriter.write(record)) - return false; - return true; - } - - this.finish = function(after_finish_callback) { - this.subwriter.finish(after_finish_callback); - } -} - - -function UniqCountWriter(subwriter) { - this.subwriter = subwriter; - this.records = new Map(); - - this.write = function(record) { - var key = JSON.stringify(record); - var old_val = this.records.get(key); - if (old_val) { - old_val[0] += 1; - } else { - this.records.set(key, [1, record]); - } - return true; - } - - this.finish = function(after_finish_callback) { - for (var [key, value] of this.records) { - let [count, record] = value; - record.unshift(count); - if (!this.subwriter.write(record)) - break; - } - this.subwriter.finish(after_finish_callback); - } -} - - -function SortedWriter(subwriter) { - this.subwriter = subwriter; - this.unsorted_entries = []; - - this.write = function(stable_entry) { - this.unsorted_entries.push(stable_entry); - return true; - } - - this.finish = function(after_finish_callback) { - var unsorted_entries = this.unsorted_entries; - unsorted_entries.sort(stable_compare); - if (__RBQLMP__reverse_flag) - unsorted_entries.reverse(); - for (var i = 0; i < unsorted_entries.length; i++) { - var entry = unsorted_entries[i]; - if (!this.subwriter.write(entry[entry.length - 1])) - break; - } - this.subwriter.finish(after_finish_callback); - } -} - - -function AggregateWriter(subwriter) { - this.subwriter = subwriter; - this.aggregators = []; - this.aggregation_keys = new Set(); - - this.finish = function(after_finish_callback) { - var all_keys = Array.from(this.aggregation_keys); - all_keys.sort(); - for (var i = 0; i < all_keys.length; i++) { - var key = all_keys[i]; - var out_fields = []; - for (var ag of this.aggregators) { - out_fields.push(ag.get_final(key)); - } - if (!this.subwriter.write(out_fields)) - break; - } - this.subwriter.finish(after_finish_callback); - } -} - - - -function FakeJoiner(join_map) { - this.get_rhs = function(lhs_key) { - return [null]; - } -} - - -function InnerJoiner(join_map) { - this.join_map = join_map; - - this.get_rhs = function(lhs_key) { - return this.join_map.get_join_records(lhs_key); - } -} - - -function LeftJoiner(join_map) { - this.join_map = join_map; - this.null_record = [Array(join_map.max_record_len).fill(null)]; - - this.get_rhs = function(lhs_key) { - let result = this.join_map.get_join_records(lhs_key); - if (result.length == 0) { - return this.null_record; - } - return result; - } -} - - -function StrictLeftJoiner(join_map) { - this.join_map = join_map; - - this.get_rhs = function(lhs_key) { - let result = this.join_map.get_join_records(lhs_key); - if (result.length != 1) { - throw new RbqlRuntimeError('In "STRICT LEFT JOIN" each key in A must have exactly one match in B. Bad A key: "' + lhs_key + '"'); - } - return result; - } -} - - -function select_except(src, except_fields) { - let result = []; - for (let i = 0; i < src.length; i++) { - if (except_fields.indexOf(i) == -1) - result.push(src[i]); - } - return result; -} - - -function process_update(NF, afields, rhs_records) { - if (rhs_records.length > 1) - throw new RbqlRuntimeError('More than one record in UPDATE query matched A-key in join table B'); - var bfields = null; - if (rhs_records.length == 1) - bfields = rhs_records[0]; - var up_fields = afields; - __RBQLMP__init_column_vars_select - if (rhs_records.length == 1 && (__RBQLMP__where_expression)) { - NU += 1; - __RBQLMP__update_statements - } - return writer.write(up_fields); -} - - -function select_simple(sort_key, out_fields) { - if (__RBQLMP__sort_flag) { - var sort_entry = sort_key.concat([NR, out_fields]); - if (!writer.write(sort_entry)) - return false; - } else { - if (!writer.write(out_fields)) - return false; - } - return true; -} - - -function select_aggregated(key, transparent_values) { - if (key !== null) { - key = JSON.stringify(key); - } - if (aggregation_stage === 1) { - if (!(writer instanceof TopWriter)) { - throw new RbqlRuntimeError('Unable to use "ORDER BY" or "DISTINCT" keywords in aggregate query'); - } - writer = new AggregateWriter(writer); - for (var i = 0; i < transparent_values.length; i++) { - var trans_value = transparent_values[i]; - if (trans_value instanceof Marker) { - writer.aggregators.push(functional_aggregators[trans_value.marker_id]); - writer.aggregators[writer.aggregators.length - 1].increment(key, trans_value.value); - } else { - writer.aggregators.push(new SubkeyChecker()); - writer.aggregators[writer.aggregators.length - 1].increment(key, trans_value); - } - } - aggregation_stage = 2; - } else { - for (var i = 0; i < transparent_values.length; i++) { - var trans_value = transparent_values[i]; - writer.aggregators[i].increment(key, trans_value); - } - } - writer.aggregation_keys.add(key) -} - - -function select_unfolded(sort_key, folded_fields) { - let out_fields = folded_fields.slice(); - let unfold_pos = folded_fields.findIndex(val => val instanceof UnfoldMarker); - for (var i = 0; i < unfold_list.length; i++) { - out_fields[unfold_pos] = unfold_list[i]; - if (!select_simple(sort_key, out_fields.slice())) - return false; - } - return true; -} - - -function process_select(NF, afields, rhs_records) { - for (var i = 0; i < rhs_records.length; i++) { - unfold_list = null; - var bfields = rhs_records[i]; - var star_fields = afields; - if (bfields != null) - star_fields = afields.concat(bfields); - __RBQLMP__init_column_vars_update - if (!(__RBQLMP__where_expression)) - continue; - // TODO wrap all user expression in try/catch block to improve error reporting - var out_fields = __RBQLMP__select_expression; - if (aggregation_stage > 0) { - var key = __RBQLMP__aggregation_key_expression; - select_aggregated(key, out_fields); - } else { - var sort_key = [__RBQLMP__sort_key_expression]; - if (unfold_list !== null) { - if (!select_unfolded(sort_key, out_fields)) - return false; - } else { - if (!select_simple(sort_key, out_fields)) - return false; - } - } - } - return true; -} - - -function process_record(record) { - NR += 1; - if (finished_with_error) - return; - try { - do_process_record(record); - } catch (e) { - if (e instanceof InternalBadFieldError) { - finish_processing_error('query execution', 'No "a' + (e.idx + 1) + '" column at record: ' + NR); - } else if (e instanceof RbqlRuntimeError) { - finish_processing_error('query execution', e.message); - } else { - if (node_debug_mode_flag) { - console.log('Unexpected exception, dumping stack trace:'); - console.log(e.stack); - } - finish_processing_error('unexpected', \`At record: \${NR}, Details: \${String(e)}\`); - } - } -} - - -function do_process_record(afields) { - let rhs_records = join_map.get_rhs(__RBQLMP__lhs_join_var); - let NF = afields.length; - if (!process_function(NF, afields, rhs_records)) { - external_input_iterator.finish(); - return; - } -} - - -function do_rb_transform(input_iterator, output_writer) { - process_function = __RBQLMP__is_select_query ? process_select : process_update; - var sql_join_type = {'VOID': FakeJoiner, 'JOIN': InnerJoiner, 'INNER JOIN': InnerJoiner, 'LEFT JOIN': LeftJoiner, 'STRICT LEFT JOIN': StrictLeftJoiner}['__RBQLMP__join_operation']; - - join_map = new sql_join_type(external_join_map_impl); - - writer = new TopWriter(output_writer); - - if ('__RBQLMP__writer_type' == 'uniq') { - writer = new UniqWriter(writer); - } else if ('__RBQLMP__writer_type' == 'uniq_count') { - writer = new UniqCountWriter(writer); - } - - if (__RBQLMP__sort_flag) - writer = new SortedWriter(writer); - - input_iterator.set_record_callback(process_record); - input_iterator.start(); -} - - -function rb_transform(input_iterator, join_map_impl, output_writer, external_success_cb, external_error_cb, node_debug_mode=false) { - node_debug_mode_flag = node_debug_mode; - external_success_handler = external_success_cb; - external_error_handler = external_error_cb; - external_input_iterator = input_iterator; - external_writer = output_writer; - external_join_map_impl = join_map_impl; - - input_iterator.set_finish_callback(finish_processing_success); - - if (module_was_used_failsafe) { - finish_processing_error('unexpected', 'Module can only be used once'); - return; - } - module_was_used_failsafe = true; - - try { - if (external_join_map_impl !== null) { - external_join_map_impl.build(function() { do_rb_transform(input_iterator, output_writer); }, finish_processing_error); - } else { - do_rb_transform(input_iterator, output_writer); - } - - } catch (e) { - if (e instanceof RbqlRuntimeError) { - finish_processing_error('query execution', e.message); - } else { - if (node_debug_mode_flag) { - console.log('Unexpected exception, dumping stack trace:'); - console.log(e.stack); - } - finish_processing_error('unexpected', String(e)); - } - } -} - - -module.exports.rb_transform = rb_transform; -`; -// ^ The expression above will cause builder.js and tempalte.js to be combined to autogenerate rbql.js: builder.js + template.js -> ../rbql.js -// Expression is written as a function to pacify the linter. -// Unit tests will ensure that rbql.js is indeed a concatenation of builder.js and template.js - - -// This module works with records only. It is CSV-agnostic. -// Do not add CSV-related logic or variables/functions/objects like "delim", "separator" etc - - -// TODO get rid of functions with "_js" suffix - - -// TODO replace prototypes with classes: this improves readability - - -const version = '0.8.0'; - -const GROUP_BY = 'GROUP BY'; -const UPDATE = 'UPDATE'; -const SELECT = 'SELECT'; -const JOIN = 'JOIN'; -const INNER_JOIN = 'INNER JOIN'; -const LEFT_JOIN = 'LEFT JOIN'; -const STRICT_LEFT_JOIN = 'STRICT LEFT JOIN'; -const ORDER_BY = 'ORDER BY'; -const WHERE = 'WHERE'; -const LIMIT = 'LIMIT'; -const EXCEPT = 'EXCEPT'; - - -class RbqlParsingError extends Error {} -class RbqlIOHandlingError extends Error {} -class AssertionError extends Error {} - -var debug_mode = false; - -function assert(condition, message=null) { - if (!condition) { - if (!message) { - message = 'Assertion error'; - } - throw new AssertionError(message); - } -} - - -function get_all_matches(regexp, text) { - var result = []; - let match_obj = null; - while((match_obj = regexp.exec(text)) !== null) { - result.push(match_obj); - } - return result; -} - - -function replace_all(src, search, replacement) { - return src.split(search).join(replacement); -} - - -function str_strip(src) { - return src.replace(/^ +| +$/g, ''); -} - - -function rbql_meta_format(template_src, meta_params) { - for (var key in meta_params) { - if (!meta_params.hasOwnProperty(key)) - continue; - var value = meta_params[key]; - var template_src_upd = replace_all(template_src, key, value); - assert(template_src_upd != template_src); - template_src = template_src_upd; - } - return template_src; -} - - -function strip_comments(cline) { - cline = cline.trim(); - if (cline.startsWith('//')) - return ''; - return cline; -} - - -function parse_join_expression(src) { - var rgx = /^ *([^ ]+) +on +([ab][0-9]+) *== *([ab][0-9]+) *$/i; - var match = rgx.exec(src); - if (match === null) { - throw new RbqlParsingError('Invalid join syntax. Must be: " /path/to/B/table on a == b"'); - } - var table_id = match[1]; - var avar = match[2]; - var bvar = match[3]; - if (avar.charAt(0) == 'b') { - [avar, bvar] = [bvar, avar]; - } - if (avar.charAt(0) != 'a' || bvar.charAt(0) != 'b') { - throw new RbqlParsingError('Invalid join syntax. Must be: " /path/to/B/table on a == b"'); - } - avar = parseInt(avar.substr(1)) - 1; - var lhs_join_var = `safe_join_get(afields, ${avar})`; - let rhs_key_index = parseInt(bvar.substr(1)) - 1; - return [table_id, lhs_join_var, rhs_key_index]; -} - - -function generate_init_statements(column_vars, indent) { - var init_statements = []; - for (var i = 0; i < column_vars.length; i++) { - var var_name = column_vars[i]; - var var_group = var_name.charAt(0); - var zero_based_idx = parseInt(var_name.substr(1)) - 1; - if (var_group == 'a') { - init_statements.push(`var ${var_name} = safe_get(afields, ${zero_based_idx});`); - } else { - init_statements.push(`var ${var_name} = bfields === null ? null : safe_get(bfields, ${zero_based_idx});`); - } - } - for (var i = 1; i < init_statements.length; i++) { - init_statements[i] = indent + init_statements[i]; - } - return init_statements.join('\n'); -} - - -function replace_star_count(aggregate_expression) { - var rgx = /(^|,) *COUNT\( *\* *\) *(?:$|(?=,))/g; - var result = aggregate_expression.replace(rgx, '$1 COUNT(1)'); - return str_strip(result); -} - - -function replace_star_vars(rbql_expression) { - var middle_star_rgx = /(?:^|,) *\* *(?=, *\* *($|,))/g; - rbql_expression = rbql_expression.replace(middle_star_rgx, ']).concat(star_fields).concat(['); - var last_star_rgx = /(?:^|,) *\* *(?:$|,)/g; - rbql_expression = rbql_expression.replace(last_star_rgx, ']).concat(star_fields).concat(['); - return rbql_expression; -} - - -function translate_update_expression(update_expression, indent) { - var rgx = /(?:^|,) *a([1-9][0-9]*) *=(?=[^=])/g; - var translated = update_expression.replace(rgx, '\nsafe_set(up_fields, $1,'); - var update_statements = translated.split('\n'); - update_statements = update_statements.map(str_strip); - if (update_statements.length < 2 || update_statements[0] != '') { - throw new RbqlParsingError('Unable to parse "UPDATE" expression'); - } - update_statements = update_statements.slice(1); - for (var i = 0; i < update_statements.length; i++) { - update_statements[i] = update_statements[i] + ')'; - } - for (var i = 1; i < update_statements.length; i++) { - update_statements[i] = indent + update_statements[i]; - } - var translated = update_statements.join('\n'); - return translated; -} - - -function translate_select_expression_js(select_expression) { - var translated = replace_star_count(select_expression); - translated = replace_star_vars(translated); - translated = str_strip(translated); - if (!translated.length) { - throw new RbqlParsingError('"SELECT" expression is empty'); - } - return `[].concat([${translated}])`; -} - - -function separate_string_literals_js(rbql_expression) { - // The regex consists of 3 almost identicall parts, the only difference is quote type - var rgx = /('(\\(\\\\)*'|[^'])*')|("(\\(\\\\)*"|[^"])*")|(`(\\(\\\\)*`|[^`])*`)/g; - var match_obj = null; - var format_parts = []; - var string_literals = []; - var idx_before = 0; - while((match_obj = rgx.exec(rbql_expression)) !== null) { - var literal_id = string_literals.length; - var string_literal = match_obj[0]; - string_literals.push(string_literal); - var start_index = match_obj.index; - format_parts.push(rbql_expression.substring(idx_before, start_index)); - format_parts.push(`###RBQL_STRING_LITERAL###${literal_id}`); - idx_before = rgx.lastIndex; - } - format_parts.push(rbql_expression.substring(idx_before)); - var format_expression = format_parts.join(''); - format_expression = format_expression.replace(/\t/g, ' '); - return [format_expression, string_literals]; -} - - -function combine_string_literals(backend_expression, string_literals) { - for (var i = 0; i < string_literals.length; i++) { - backend_expression = replace_all(backend_expression, `###RBQL_STRING_LITERAL###${i}`, string_literals[i]); - } - return backend_expression; -} - - -function locate_statements(rbql_expression) { - let statement_groups = []; - statement_groups.push([STRICT_LEFT_JOIN, LEFT_JOIN, INNER_JOIN, JOIN]); - statement_groups.push([SELECT]); - statement_groups.push([ORDER_BY]); - statement_groups.push([WHERE]); - statement_groups.push([UPDATE]); - statement_groups.push([GROUP_BY]); - statement_groups.push([LIMIT]); - statement_groups.push([EXCEPT]); - var result = []; - for (var ig = 0; ig < statement_groups.length; ig++) { - for (var is = 0; is < statement_groups[ig].length; is++) { - var statement = statement_groups[ig][is]; - var rgxp = new RegExp('(?:^| )' + replace_all(statement, ' ', ' *') + '(?= )', 'ig'); - var matches = get_all_matches(rgxp, rbql_expression); - if (!matches.length) - continue; - if (matches.length > 1) - throw new RbqlParsingError(`More than one ${statement} statements found`); - assert(matches.length == 1); - var match = matches[0]; - var match_str = match[0]; - result.push([match.index, match.index + match_str.length, statement]); - break; // Break to avoid matching a sub-statement from the same group e.g. "INNER JOIN" -> "JOIN" - } - } - result.sort(function(a, b) { return a[0] - b[0]; }); - return result; -} - - -function separate_actions(rbql_expression) { - rbql_expression = str_strip(rbql_expression); - var ordered_statements = locate_statements(rbql_expression); - var result = {}; - for (var i = 0; i < ordered_statements.length; i++) { - var statement_start = ordered_statements[i][0]; - var span_start = ordered_statements[i][1]; - var statement = ordered_statements[i][2]; - var span_end = i + 1 < ordered_statements.length ? ordered_statements[i + 1][0] : rbql_expression.length; - assert(statement_start < span_start); - assert(span_start <= span_end); - var span = rbql_expression.substring(span_start, span_end); - var statement_params = {}; - if ([STRICT_LEFT_JOIN, LEFT_JOIN, INNER_JOIN, JOIN].indexOf(statement) != -1) { - statement_params['join_subtype'] = statement; - statement = JOIN; - } - - if (statement == UPDATE) { - if (statement_start != 0) - throw new RbqlParsingError('UPDATE keyword must be at the beginning of the query'); - span = span.replace(/^ *SET/i, ''); - } - - if (statement == ORDER_BY) { - span = span.replace(/ ASC *$/i, ''); - var new_span = span.replace(/ DESC *$/i, ''); - if (new_span != span) { - span = new_span; - statement_params['reverse'] = true; - } else { - statement_params['reverse'] = false; - } - } - - if (statement == SELECT) { - if (statement_start != 0) - throw new RbqlParsingError('SELECT keyword must be at the beginning of the query'); - var match = /^ *TOP *([0-9]+) /i.exec(span); - if (match !== null) { - statement_params['top'] = parseInt(match[1]); - span = span.substr(match.index + match[0].length); - } - match = /^ *DISTINCT *(COUNT)? /i.exec(span); - if (match !== null) { - statement_params['distinct'] = true; - if (match[1]) { - statement_params['distinct_count'] = true; - } - span = span.substr(match.index + match[0].length); - } - } - statement_params['text'] = str_strip(span); - result[statement] = statement_params; - } - if (!result.hasOwnProperty(SELECT) && !result.hasOwnProperty(UPDATE)) { - throw new RbqlParsingError('Query must contain either SELECT or UPDATE statement'); - } - assert(result.hasOwnProperty(SELECT) != result.hasOwnProperty(UPDATE)); - return result; -} - - -function find_top(rb_actions) { - if (rb_actions.hasOwnProperty(LIMIT)) { - var result = parseInt(rb_actions[LIMIT]['text']); - if (isNaN(result)) { - throw new RbqlParsingError('LIMIT keyword must be followed by an integer'); - } - return result; - } - var select_action = rb_actions[SELECT]; - if (select_action && select_action.hasOwnProperty('top')) { - return select_action['top']; - } - return null; -} - - -function indent_user_init_code(user_init_code) { - let source_lines = user_init_code.split(/(?:\r\n)|\r|\n/); - source_lines = source_lines.map(line => ' ' + line); - return source_lines.join('\n'); -} - - -function extract_column_vars(rbql_expression) { - var rgx = /(?:^|[^_a-zA-Z0-9])([ab][1-9][0-9]*)(?:$|(?=[^_a-zA-Z0-9]))/g; - var result = []; - var seen = {}; - var matches = get_all_matches(rgx, rbql_expression); - for (var i = 0; i < matches.length; i++) { - var var_name = matches[i][1]; - if (!seen.hasOwnProperty(var_name)) { - result.push(var_name); - seen[var_name] = 1; - } - } - return result; -} - - -function translate_except_expression(except_expression) { - let skip_vars = except_expression.split(','); - let skip_indices = []; - let rgx = /^a[1-9][0-9]*$/; - for (let i = 0; i < skip_vars.length; i++) { - let skip_var = str_strip(skip_vars[i]); - let match = rgx.exec(skip_var); - if (match === null) { - throw new RbqlParsingError('Invalid EXCEPT syntax'); - } - skip_indices.push(parseInt(skip_var.substring(1)) - 1); - } - skip_indices = skip_indices.sort((a, b) => a - b); - let indices_str = skip_indices.join(','); - return `select_except(afields, [${indices_str}])`; -} - - -function HashJoinMap(record_iterator, key_index) { - this.max_record_len = 0; - this.hash_map = new Map(); - this.record_iterator = record_iterator; - this.key_index = key_index; - this.error_msg = null; - this.external_error_handler = null; - this.external_success_handler = null; - this.nr = 0; - - this.finish_build = function() { - if (this.error_msg === null) { - this.external_success_handler(); - } else { - this.external_error_handler('IO handling', this.error_msg); - } - }; - - this.add_record = function(record) { - this.nr += 1; - let num_fields = record.length; - this.max_record_len = Math.max(this.max_record_len, num_fields); - if (this.key_index >= num_fields) { - this.error_msg = `No "b${this.key_index + 1}" field at record: ${this.nr} in "B" table`; - this.record_iterator.finish(); - } - let key = record[this.key_index]; - let key_records = this.hash_map.get(key); - if (key_records === undefined) { - this.hash_map.set(key, [record]); - } else { - key_records.push(record); - } - }; - - this.build = function(success_callback, error_callback) { - this.external_success_handler = success_callback; - this.external_error_handler = error_callback; - this.record_iterator.set_record_callback((record) => { this.add_record(record); }); - this.record_iterator.set_finish_callback(() => { this.finish_build(); }); - this.record_iterator.start(); - }; - - this.get_join_records = function(key) { - let result = this.hash_map.get(key); - if (result === undefined) - return []; - return result; - }; - - this.get_warnings = function() { - return this.record_iterator.get_warnings(); - }; -} - - -function parse_to_js(query, js_template_text, join_tables_registry, user_init_code) { - let rbql_lines = query.split('\n'); - rbql_lines = rbql_lines.map(strip_comments); - rbql_lines = rbql_lines.filter(line => line.length); - var full_rbql_expression = rbql_lines.join(' '); - var column_vars = extract_column_vars(full_rbql_expression); - var [format_expression, string_literals] = separate_string_literals_js(full_rbql_expression); - var rb_actions = separate_actions(format_expression); - - var js_meta_params = {}; - js_meta_params['__RBQLMP__user_init_code'] = user_init_code; - - if (rb_actions.hasOwnProperty(ORDER_BY) && rb_actions.hasOwnProperty(UPDATE)) - throw new RbqlParsingError('"ORDER BY" is not allowed in "UPDATE" queries'); - - if (rb_actions.hasOwnProperty(GROUP_BY)) { - if (rb_actions.hasOwnProperty(ORDER_BY) || rb_actions.hasOwnProperty(UPDATE)) - throw new RbqlParsingError('"ORDER BY" and "UPDATE" are not allowed in aggregate queries'); - var aggregation_key_expression = rb_actions[GROUP_BY]['text']; - js_meta_params['__RBQLMP__aggregation_key_expression'] = '[' + combine_string_literals(aggregation_key_expression, string_literals) + ']'; - } else { - js_meta_params['__RBQLMP__aggregation_key_expression'] = 'null'; - } - - let join_map = null; - if (rb_actions.hasOwnProperty(JOIN)) { - var [rhs_table_id, lhs_join_var, rhs_key_index] = parse_join_expression(rb_actions[JOIN]['text']); - js_meta_params['__RBQLMP__join_operation'] = rb_actions[JOIN]['join_subtype']; - js_meta_params['__RBQLMP__lhs_join_var'] = lhs_join_var; - if (join_tables_registry === null) - throw new RbqlParsingError('JOIN operations were disabled'); - let join_record_iterator = join_tables_registry.get_iterator_by_table_id(rhs_table_id); - if (!join_record_iterator) - throw new RbqlParsingError(`Unable to find join table: "${rhs_table_id}"`); - join_map = new HashJoinMap(join_record_iterator, rhs_key_index); - } else { - js_meta_params['__RBQLMP__join_operation'] = 'VOID'; - js_meta_params['__RBQLMP__lhs_join_var'] = 'null'; - } - - if (rb_actions.hasOwnProperty(WHERE)) { - var where_expression = rb_actions[WHERE]['text']; - if (/[^!=]=[^=]/.exec(where_expression)) { - throw new RbqlParsingError('Assignments "=" are not allowed in "WHERE" expressions. For equality test use "==" or "==="'); - } - js_meta_params['__RBQLMP__where_expression'] = combine_string_literals(where_expression, string_literals); - } else { - js_meta_params['__RBQLMP__where_expression'] = 'true'; - } - - - if (rb_actions.hasOwnProperty(UPDATE)) { - var update_expression = translate_update_expression(rb_actions[UPDATE]['text'], ' '.repeat(8)); - js_meta_params['__RBQLMP__writer_type'] = 'simple'; - js_meta_params['__RBQLMP__select_expression'] = 'null'; - js_meta_params['__RBQLMP__update_statements'] = combine_string_literals(update_expression, string_literals); - js_meta_params['__RBQLMP__is_select_query'] = 'false'; - js_meta_params['__RBQLMP__top_count'] = 'null'; - } - - js_meta_params['__RBQLMP__init_column_vars_update'] = generate_init_statements(column_vars, ' '.repeat(4)); - js_meta_params['__RBQLMP__init_column_vars_select'] = generate_init_statements(column_vars, ' '.repeat(8)); - - if (rb_actions.hasOwnProperty(SELECT)) { - var top_count = find_top(rb_actions); - js_meta_params['__RBQLMP__top_count'] = top_count === null ? 'null' : String(top_count); - if (rb_actions[SELECT].hasOwnProperty('distinct_count')) { - js_meta_params['__RBQLMP__writer_type'] = 'uniq_count'; - } else if (rb_actions[SELECT].hasOwnProperty('distinct')) { - js_meta_params['__RBQLMP__writer_type'] = 'uniq'; - } else { - js_meta_params['__RBQLMP__writer_type'] = 'simple'; - } - if (rb_actions.hasOwnProperty(EXCEPT)) { - js_meta_params['__RBQLMP__select_expression'] = translate_except_expression(rb_actions[EXCEPT]['text']); - } else { - let select_expression = translate_select_expression_js(rb_actions[SELECT]['text']); - js_meta_params['__RBQLMP__select_expression'] = combine_string_literals(select_expression, string_literals); - } - js_meta_params['__RBQLMP__update_statements'] = ''; - js_meta_params['__RBQLMP__is_select_query'] = 'true'; - } - - if (rb_actions.hasOwnProperty(ORDER_BY)) { - var order_expression = rb_actions[ORDER_BY]['text']; - js_meta_params['__RBQLMP__sort_key_expression'] = combine_string_literals(order_expression, string_literals); - js_meta_params['__RBQLMP__reverse_flag'] = rb_actions[ORDER_BY]['reverse'] ? 'true' : 'false'; - js_meta_params['__RBQLMP__sort_flag'] = 'true'; - } else { - js_meta_params['__RBQLMP__sort_key_expression'] = 'null'; - js_meta_params['__RBQLMP__reverse_flag'] = 'false'; - js_meta_params['__RBQLMP__sort_flag'] = 'false'; - } - var js_code = rbql_meta_format(js_template_text, js_meta_params); - return [js_code, join_map]; -} - - -function load_module_from_file(js_code) { - let os = require('os'); - let path = require('path'); - let fs = require('fs'); - var tmp_dir = os.tmpdir(); - var script_filename = 'rbconvert_' + String(Math.random()).replace('.', '_') + '.js'; - let tmp_worker_module_path = path.join(tmp_dir, script_filename); - fs.writeFileSync(tmp_worker_module_path, js_code); - let worker_module = require(tmp_worker_module_path); - return worker_module; -} - - -function generic_run(user_query, input_iterator, output_writer, success_handler, error_handler, join_tables_registry=null, user_init_code='') { - try { - user_init_code = indent_user_init_code(user_init_code); - let [js_code, join_map] = parse_to_js(user_query, external_js_template_text, join_tables_registry, user_init_code); - let rbql_worker = null; - if (debug_mode) { - rbql_worker = load_module_from_file(js_code); - } else { - let module = {'exports': {}}; - eval('(function(){' + js_code + '})()'); - rbql_worker = module.exports; - } - rbql_worker.rb_transform(input_iterator, join_map, output_writer, success_handler, error_handler, debug_mode); - } catch (e) { - if (e instanceof RbqlParsingError) { - error_handler('query parsing', e.message); - } else { - if (debug_mode) { - console.log('Unexpected exception, dumping stack trace:'); - console.log(e.stack); - } - error_handler('unexpected', 'Unexpected exception: ' + e); - } - } -} - - -function make_inconsistent_num_fields_warning(table_name, inconsistent_records_info) { - let keys = Object.keys(inconsistent_records_info); - let entries = []; - for (let i = 0; i < keys.length; i++) { - let key = keys[i]; - let record_id = inconsistent_records_info[key]; - entries.push([record_id, key]); - } - entries.sort(function(a, b) { return a[0] - b[0]; }); - assert(entries.length > 1); - let [record_1, num_fields_1] = entries[0]; - let [record_2, num_fields_2] = entries[1]; - let warn_msg = `Number of fields in "${table_name}" table is not consistent: `; - warn_msg += `e.g. record ${record_1} -> ${num_fields_1} fields, record ${record_2} -> ${num_fields_2} fields`; - return warn_msg; -} - - -function TableIterator(input_table) { - this.input_table = input_table; - this.NR = 0; - this.fields_info = new Object(); - this.external_record_callback = null; - this.external_finish_callback = null; - this.finished = false; - - - this.set_record_callback = function(external_record_callback) { - this.external_record_callback = external_record_callback; - }; - - - this.set_finish_callback = function(external_finish_callback) { - this.external_finish_callback = external_finish_callback; - }; - - - this.start = function() { - while (!this.finished) { - let record = this.get_record(); - if (record === null) { - this.finish(); - } else { - this.external_record_callback(record); - } - } - }; - - - this.finish = function() { - if (!this.finished) { - this.finished = true; - this.external_finish_callback(); - } - }; - - - this.get_record = function() { - if (this.NR >= this.input_table.length) - return null; - let record = this.input_table[this.NR]; - this.NR += 1; - let num_fields = record.length; - if (!this.fields_info.hasOwnProperty(num_fields)) - this.fields_info[num_fields] = this.NR; - return record; - }; - - this.get_warnings = function() { - if (Object.keys(this.fields_info).length > 1) - return [make_inconsistent_num_fields_warning('input', this.fields_info)]; - return []; - }; -} - - -function TableWriter(external_table) { - this.table = external_table; - - this.write = function(fields) { - this.table.push(fields); - }; - - this.finish = function(after_finish_callback) { - after_finish_callback(); - }; - - this.get_warnings = function() { - return []; - }; -} - - -function SingleTableRegistry(table, table_id='B') { - this.table = table; - this.table_id = table_id; - - this.get_iterator_by_table_id = function(table_id) { - if (table_id !== this.table_id) { - throw new RbqlIOHandlingError(`Unable to find join table: "${table_id}"`); - } - return new TableIterator(this.table); - }; -} - - -function table_run(user_query, input_table, output_table, success_handler, error_handler, join_table=null, user_init_code='') { - let input_iterator = new TableIterator(input_table); - let output_writer = new TableWriter(output_table); - let join_tables_registry = join_table === null ? null : new SingleTableRegistry(join_table); - generic_run(user_query, input_iterator, output_writer, success_handler, error_handler, join_tables_registry, user_init_code); -} - - -function set_debug_mode() { - debug_mode = true; -} - - -module.exports.version = version; -module.exports.generic_run = generic_run; -module.exports.table_run = table_run; - -module.exports.TableIterator = TableIterator; -module.exports.TableWriter = TableWriter; -module.exports.SingleTableRegistry = SingleTableRegistry; - -module.exports.strip_comments = strip_comments; -module.exports.separate_actions = separate_actions; -module.exports.separate_string_literals_js = separate_string_literals_js; -module.exports.combine_string_literals = combine_string_literals; -module.exports.translate_except_expression = translate_except_expression; -module.exports.parse_join_expression = parse_join_expression; -module.exports.translate_update_expression = translate_update_expression; -module.exports.translate_select_expression_js = translate_select_expression_js; - -module.exports.set_debug_mode = set_debug_mode; -})() - -// DO NOT EDIT! -// This file was autogenerated from builder.js and template.js using build_engine.js script - diff --git a/rbql/_version.py b/rbql/_version.py index 9540737..2163ab1 100644 --- a/rbql/_version.py +++ b/rbql/_version.py @@ -1,3 +1,3 @@ # Explanation of this file purpose: https://stackoverflow.com/a/16084844/2898283 -__version__ = '0.8.0' +__version__ = '0.9.0' diff --git a/rbql/engine/builder.py b/rbql/engine/builder.py index ff983e0..5edfa39 100755 --- a/rbql/engine/builder.py +++ b/rbql/engine/builder.py @@ -28,10 +28,17 @@ # Do not add CSV-related logic or variables/functions/objects like "delim", "separator" etc -# TODO get rid of functions with "_py" suffix +# TODO cosmetic refactoring: get rid of functions with "_py" suffix # TODO new feature: allow record iterator provide custom column names. +# TODO catch exceptions in user expression to report the exact place where it occured: "SELECT" expression, "WHERE" expression, etc + +# TODO gracefuly handle unknown encoding: generate RbqlIOHandlingError + +# TODO modify ARRAY_AGG: it should return array instead of joined string +# TODO document the second callback argument of ARRAY_AGG in the README.md + GROUP_BY = 'GROUP BY' UPDATE = 'UPDATE' @@ -47,7 +54,7 @@ -class RbqlRutimeError(Exception): +class RbqlRuntimeError(Exception): pass class RbqlIOHandlingError(Exception): @@ -59,7 +66,7 @@ class RbqlParsingError(Exception): def exception_to_error_info(e): exceptions_type_map = { - 'RbqlRutimeError': 'query execution', + 'RbqlRuntimeError': 'query execution', 'RbqlParsingError': 'query parsing', 'RbqlIOHandlingError': 'IO handling' } @@ -122,7 +129,7 @@ def generate_init_statements(column_vars, indent): def replace_star_count(aggregate_expression): - return re.sub(r'(^|(?<=,)) *COUNT\( *\* *\) *($|(?=,))', ' COUNT(1)', aggregate_expression).lstrip(' ') + return re.sub(r'(^|(?<=,)) *COUNT\( *\* *\) *($|(?=,))', ' COUNT(1)', aggregate_expression, flags=re.IGNORECASE).lstrip(' ') def replace_star_vars(rbql_expression): @@ -174,7 +181,7 @@ def separate_string_literals_py(rbql_expression): def combine_string_literals(backend_expression, string_literals): - for i in range(len(string_literals)): + for i in reversed(range(len(string_literals))): backend_expression = backend_expression.replace('###RBQL_STRING_LITERAL###{}'.format(i), string_literals[i]) return backend_expression @@ -316,7 +323,7 @@ def build(self): num_fields = len(fields) self.max_record_len = max(self.max_record_len, num_fields) if self.key_index >= num_fields: - raise RbqlRutimeError('No "b' + str(self.key_index + 1) + '" field at record: ' + str(nr) + ' in "B" table') + raise RbqlRuntimeError('No "b' + str(self.key_index + 1) + '" field at record: ' + str(nr) + ' in "B" table') key = fields[self.key_index] self.hash_map[key].append(fields) self.record_iterator.finish() diff --git a/rbql/engine/template.py b/rbql/engine/template.py index b2b8efe..a0a30f7 100644 --- a/rbql/engine/template.py +++ b/rbql/engine/template.py @@ -16,9 +16,6 @@ # Do not add CSV-related logic or variables/functions/objects like "delim", "separator", "split", "line", "path" etc -# TODO implement arrays passing to output_writer, e.g. for FOLD() - - try: pass __RBQLMP__user_init_code @@ -28,13 +25,12 @@ PY3 = sys.version_info[0] == 3 -unfold_list = None +unnest_list = None module_was_used_failsafe = False # Aggregators: aggregation_stage = 0 -aggr_init_counter = 0 functional_aggregators = list() writer = None @@ -42,6 +38,9 @@ NU = 0 # NU - Num Updated. Alternative variables: NW (Num Where) - Not Practical. NW (Num Written) - Impossible to implement. +wrong_aggregation_usage_error = 'Usage of RBQL aggregation functions inside Python expressions is not allowed, see the docs' +numeric_conversion_error = 'Unable to convert value "{}" to int or float. MIN, MAX, SUM, AVG, MEDIAN and VARIANCE aggregate functions convert their string arguments to numeric values' + def iteritems6(x): if PY3: @@ -58,6 +57,10 @@ class RbqlRuntimeError(Exception): pass +class RbqlParsingError(Exception): + pass + + def safe_get(record, idx): return record[idx] if idx < len(record) else None @@ -76,45 +79,61 @@ def safe_set(record, idx, value): raise InternalBadFieldError(idx - 1) -class Marker(object): +class RBQLAggregationToken(object): def __init__(self, marker_id, value): self.marker_id = marker_id self.value = value def __str__(self): - raise TypeError('Marker') + raise TypeError('RBQLAggregationToken') -class UNFOLD: +class UNNEST: def __init__(self, vals): - global unfold_list - if unfold_list is not None: - # Technically we can support multiple UNFOLD's but the implementation/algorithm is more complex and just doesn't worth it - raise RbqlRuntimeError('Only one UNFOLD is allowed per query') - unfold_list = vals + global unnest_list + if unnest_list is not None: + # Technically we can support multiple UNNEST's but the implementation/algorithm is more complex and just doesn't worth it + raise RbqlParsingError('Only one UNNEST is allowed per query') + unnest_list = vals def __str__(self): - raise TypeError('UNFOLD') + raise TypeError('UNNEST') + +unnest = UNNEST +Unnest = UNNEST +UNFOLD = UNNEST # "UNFOLD" is deprecated, just for backward compatibility class NumHandler: - def __init__(self): - self.is_int = True + def __init__(self, start_with_int): + self.is_int = start_with_int + self.string_detection_done = False + self.is_str = False - def parse(self, str_val): - if not self.is_int: - return float(str_val) + def parse(self, val): + if not self.string_detection_done: + self.string_detection_done = True + if PY3 and isinstance(val, str): + self.is_str = True + if not PY3 and isinstance(val, basestring): + self.is_str = True + if not self.is_str: + return val + if self.is_int: + try: + return int(val) + except ValueError: + self.is_int = False try: - return int(str_val) + return float(val) except ValueError: - self.is_int = False - return float(str_val) + raise RbqlRuntimeError(numeric_conversion_error.format(val)) class MinAggregator: def __init__(self): self.stats = dict() - self.num_handler = NumHandler() + self.num_handler = NumHandler(True) def increment(self, key, val): val = self.num_handler.parse(val) @@ -122,7 +141,7 @@ def increment(self, key, val): if cur_aggr is None: self.stats[key] = val else: - self.stats[key] = min(cur_aggr, val) + self.stats[key] = builtin_min(cur_aggr, val) def get_final(self, key): return self.stats[key] @@ -131,7 +150,7 @@ def get_final(self, key): class MaxAggregator: def __init__(self): self.stats = dict() - self.num_handler = NumHandler() + self.num_handler = NumHandler(True) def increment(self, key, val): val = self.num_handler.parse(val) @@ -139,18 +158,7 @@ def increment(self, key, val): if cur_aggr is None: self.stats[key] = val else: - self.stats[key] = max(cur_aggr, val) - - def get_final(self, key): - return self.stats[key] - - -class CountAggregator: - def __init__(self): - self.stats = defaultdict(int) - - def increment(self, key, val): - self.stats[key] += 1 + self.stats[key] = builtin_max(cur_aggr, val) def get_final(self, key): return self.stats[key] @@ -159,7 +167,7 @@ def get_final(self, key): class SumAggregator: def __init__(self): self.stats = defaultdict(int) - self.num_handler = NumHandler() + self.num_handler = NumHandler(True) def increment(self, key, val): val = self.num_handler.parse(val) @@ -172,9 +180,10 @@ def get_final(self, key): class AvgAggregator: def __init__(self): self.stats = dict() + self.num_handler = NumHandler(False) def increment(self, key, val): - val = float(val) + val = self.num_handler.parse(val) cur_aggr = self.stats.get(key) if cur_aggr is None: self.stats[key] = (val, 1) @@ -190,9 +199,10 @@ def get_final(self, key): class VarianceAggregator: def __init__(self): self.stats = dict() + self.num_handler = NumHandler(False) def increment(self, key, val): - val = float(val) + val = self.num_handler.parse(val) cur_aggr = self.stats.get(key) if cur_aggr is None: self.stats[key] = (val, val ** 2, 1) @@ -208,7 +218,7 @@ def get_final(self, key): class MedianAggregator: def __init__(self): self.stats = defaultdict(list) - self.num_handler = NumHandler() + self.num_handler = NumHandler(True) def increment(self, key, val): val = self.num_handler.parse(val) @@ -226,7 +236,18 @@ def get_final(self, key): return a if a == b else (a + b) / 2.0 -class FoldAggregator: +class CountAggregator: + def __init__(self): + self.stats = defaultdict(int) + + def increment(self, key, val): + self.stats[key] += 1 + + def get_final(self, key): + return self.stats[key] + + +class ArrayAggAggregator: def __init__(self, post_proc): self.stats = defaultdict(list) self.post_proc = post_proc @@ -239,66 +260,147 @@ def get_final(self, key): return self.post_proc(res) -class SubkeyChecker: - def __init__(self): - self.subkeys = dict() +class ConstGroupVerifier: + def __init__(self, output_index): + self.const_values = dict() + self.output_index = output_index - def increment(self, key, subkey): - old_subkey = self.subkeys.get(key) - if old_subkey is None: - self.subkeys[key] = subkey - elif old_subkey != subkey: - raise RuntimeError('Unable to group by "{}", different values in output: "{}" and "{}"'.format(key, old_subkey, subkey)) + def increment(self, key, value): + old_value = self.const_values.get(key) + if old_value is None: + self.const_values[key] = value + elif old_value != value: + raise RbqlRuntimeError('Invalid aggregate expression: non-constant values in output column {}. E.g. "{}" and "{}"'.format(self.output_index + 1, old_value, value)) def get_final(self, key): - return self.subkeys[key] + return self.const_values[key] def init_aggregator(generator_name, val, post_proc=None): global aggregation_stage - global aggr_init_counter aggregation_stage = 1 - assert aggr_init_counter == len(functional_aggregators) + res = RBQLAggregationToken(len(functional_aggregators), val) if post_proc is not None: functional_aggregators.append(generator_name(post_proc)) else: functional_aggregators.append(generator_name()) - res = Marker(aggr_init_counter, val) - aggr_init_counter += 1 return res def MIN(val): return init_aggregator(MinAggregator, val) if aggregation_stage < 2 else val +# min = MIN - see the mad max copypaste below +Min = MIN + def MAX(val): return init_aggregator(MaxAggregator, val) if aggregation_stage < 2 else val +# max = MAX - see the mad max copypaste below +Max = MAX + def COUNT(val): return init_aggregator(CountAggregator, 1) if aggregation_stage < 2 else 1 +count = COUNT +Count = COUNT + def SUM(val): return init_aggregator(SumAggregator, val) if aggregation_stage < 2 else val +# sum = SUM - see the mad max copypaste below +Sum = SUM + def AVG(val): return init_aggregator(AvgAggregator, val) if aggregation_stage < 2 else val +avg = AVG +Avg = AVG + def VARIANCE(val): return init_aggregator(VarianceAggregator, val) if aggregation_stage < 2 else val +variance = VARIANCE +Variance = VARIANCE + def MEDIAN(val): return init_aggregator(MedianAggregator, val) if aggregation_stage < 2 else val +median = MEDIAN +Median = MEDIAN + -def FOLD(val, post_proc=lambda v: '|'.join(v)): +def ARRAY_AGG(val, post_proc=lambda v: '|'.join(v)): # TODO consider passing array to output writer - return init_aggregator(FoldAggregator, val, post_proc) if aggregation_stage < 2 else val + return init_aggregator(ArrayAggAggregator, val, post_proc) if aggregation_stage < 2 else val + +array_agg = ARRAY_AGG +FOLD = ARRAY_AGG # "FOLD" is deprecated, just for backward compatibility + + +# <<<< COPYPASTE FROM "mad_max.py" +##################################### +##################################### +# This is to ensure that "mad_max.py" file has exactly the same content as this fragment. This condition will be ensured by test_mad_max.py +# To edit this code you need to simultaneously edit this fragment and content of mad_max.py, otherwise test_mad_max.py will fail. + +builtin_max = max +builtin_min = min +builtin_sum = sum + + +def max(*args, **kwargs): + single_arg = len(args) == 1 and not kwargs + if single_arg: + if PY3 and isinstance(args[0], str): + return MAX(args[0]) + if not PY3 and isinstance(args[0], basestring): + return MAX(args[0]) + if isinstance(args[0], int) or isinstance(args[0], float): + return MAX(args[0]) + try: + return builtin_max(*args, **kwargs) + except TypeError: + if single_arg: + return MAX(args[0]) + raise + + +def min(*args, **kwargs): + single_arg = len(args) == 1 and not kwargs + if single_arg: + if PY3 and isinstance(args[0], str): + return MIN(args[0]) + if not PY3 and isinstance(args[0], basestring): + return MIN(args[0]) + if isinstance(args[0], int) or isinstance(args[0], float): + return MIN(args[0]) + try: + return builtin_min(*args, **kwargs) + except TypeError: + if single_arg: + return MIN(args[0]) + raise + + +def sum(*args): + try: + return builtin_sum(*args) + except TypeError: + if len(args) == 1: + return SUM(args[0]) + raise + +##################################### +##################################### +# >>>> COPYPASTE END + def add_to_set(dst_set, value): @@ -473,15 +575,19 @@ def select_aggregated(key, transparent_values): if aggregation_stage == 1: global writer if type(writer) is not TopWriter: - raise RbqlRuntimeError('Unable to use "ORDER BY" or "DISTINCT" keywords in aggregate query') + raise RbqlParsingError('Unable to use "ORDER BY" or "DISTINCT" keywords in aggregate query') writer = AggregateWriter(writer) + num_aggregators_found = 0 for i, trans_value in enumerate(transparent_values): - if isinstance(trans_value, Marker): + if isinstance(trans_value, RBQLAggregationToken): + num_aggregators_found += 1 writer.aggregators.append(functional_aggregators[trans_value.marker_id]) writer.aggregators[-1].increment(key, trans_value.value) else: - writer.aggregators.append(SubkeyChecker()) + writer.aggregators.append(ConstGroupVerifier(len(writer.aggregators))) writer.aggregators[-1].increment(key, trans_value) + if num_aggregators_found != len(functional_aggregators): + raise RbqlParsingError(wrong_aggregation_usage_error) aggregation_stage = 2 else: for i, trans_value in enumerate(transparent_values): @@ -489,25 +595,25 @@ def select_aggregated(key, transparent_values): writer.aggregation_keys.add(key) -def select_unfolded(sort_key, folded_fields): - unfold_pos = None +def select_unnested(sort_key, folded_fields): + unnest_pos = None for i, trans_value in enumerate(folded_fields): - if isinstance(trans_value, UNFOLD): - unfold_pos = i + if isinstance(trans_value, UNNEST): + unnest_pos = i break - assert unfold_pos is not None - for v in unfold_list: + assert unnest_pos is not None + for v in unnest_list: out_fields = folded_fields[:] - out_fields[unfold_pos] = v + out_fields[unnest_pos] = v if not select_simple(sort_key, out_fields): return False return True def process_select(NR, NF, afields, rhs_records): - global unfold_list + global unnest_list for bfields in rhs_records: - unfold_list = None + unnest_list = None if bfields is None: star_fields = afields else: @@ -521,8 +627,8 @@ def process_select(NR, NF, afields, rhs_records): select_aggregated(key, out_fields) else: sort_key = (__RBQLMP__sort_key_expression) - if unfold_list is not None: - if not select_unfolded(sort_key, out_fields): + if unnest_list is not None: + if not select_unnested(sort_key, out_fields): return False else: if not select_simple(sort_key, out_fields): @@ -537,7 +643,7 @@ def rb_transform(input_iterator, join_map_impl, output_writer): global writer - process_function = process_select if __RBQLMP__is_select_query else process_update + polymorphic_process = process_select if __RBQLMP__is_select_query else process_update sql_join_type = {'VOID': FakeJoiner, 'JOIN': InnerJoiner, 'INNER JOIN': InnerJoiner, 'LEFT JOIN': LeftJoiner, 'STRICT LEFT JOIN': StrictLeftJoiner}['__RBQLMP__join_operation'] if join_map_impl is not None: @@ -563,12 +669,16 @@ def rb_transform(input_iterator, join_map_impl, output_writer): NF = len(afields) try: rhs_records = join_map.get_rhs(__RBQLMP__lhs_join_var) - if not process_function(NR, NF, afields, rhs_records): + if not polymorphic_process(NR, NF, afields, rhs_records): break except InternalBadFieldError as e: bad_idx = e.bad_idx raise RbqlRuntimeError('No "a' + str(bad_idx + 1) + '" field at record: ' + str(NR)) + except RbqlParsingError: + raise except Exception as e: + if str(e).find('RBQLAggregationToken') != -1: + raise RbqlParsingError(wrong_aggregation_usage_error) raise RbqlRuntimeError('At record: ' + str(NR) + ', Details: ' + str(e)) writer.finish() return True diff --git a/rbql/rbql_csv.py b/rbql/rbql_csv.py index 86c98e1..be66f34 100755 --- a/rbql/rbql_csv.py +++ b/rbql/rbql_csv.py @@ -415,6 +415,9 @@ def csv_run(user_query, input_path, input_delim, input_policy, output_path, outp if not is_ascii(user_query) and csv_encoding == 'latin-1': raise RbqlIOHandlingError('To use non-ascii characters in query enable UTF-8 encoding instead of latin-1/binary') + if (not is_ascii(input_delim) or not is_ascii(output_delim)) and csv_encoding == 'latin-1': + raise RbqlIOHandlingError('To use non-ascii separators enable UTF-8 encoding instead of latin-1/binary') + default_init_source_path = os.path.join(os.path.expanduser('~'), '.rbql_init_source.py') if user_init_code == '' and os.path.exists(default_init_source_path): user_init_code = read_user_init_code(default_init_source_path) diff --git a/rbql/rbql_main.py b/rbql/rbql_main.py index a168ae9..0c6e856 100755 --- a/rbql/rbql_main.py +++ b/rbql/rbql_main.py @@ -273,6 +273,12 @@ def main(): show_error('generic', 'Using "--policy" without "--delim" is not allowed', is_interactive=False) sys.exit(1) + if args.encoding != 'latin-1' and not PY3: + if args.delim is not None: + args.delim = args.delim.decode(args.encoding) + if args.query is not None: + args.query = args.query.decode(args.encoding) + if args.query: if args.delim is None: show_error('generic', 'Separator must be provided with "--delim" option in non-interactive mode', is_interactive=False)