From 3effe9e2d36187533300785d6279579027334aec Mon Sep 17 00:00:00 2001
From: mechatroner <mechatroner@yandex.ru>
Date: Sat, 12 Jun 2021 21:16:57 -0400
Subject: [PATCH] merge from RBQL

---
 rbql-js/cli_rbql.js |  25 ++--
 rbql-js/rbql.js     | 336 +++++++++++++++++++++++++++++++++++++++-----
 rbql-js/rbql_csv.js | 129 ++++++++++++-----
 rbql/_version.py    |   2 +-
 rbql/csv_utils.py   |  16 +--
 rbql/rbql_csv.py    | 166 ++++++++++++----------
 rbql/rbql_engine.py | 290 ++++++++++++++++++++++++++++++++------
 rbql/rbql_main.py   | 325 ++++++++++++++++++++++++++++++++++--------
 8 files changed, 1027 insertions(+), 262 deletions(-)

diff --git a/rbql-js/cli_rbql.js b/rbql-js/cli_rbql.js
index db25c12..3a7a4c2 100755
--- a/rbql-js/cli_rbql.js
+++ b/rbql-js/cli_rbql.js
@@ -158,7 +158,7 @@ async function autodetect_delim_policy(table_path) {
 }
 
 
-function print_colorized(records, delim, show_column_names, skip_header) {
+function print_colorized(records, delim, show_column_names, with_headers) {
     let reset_color_code = '\x1b[0m';
     let color_codes = ['\x1b[0m', '\x1b[31m', '\x1b[32m', '\x1b[33m', '\x1b[34m', '\x1b[35m', '\x1b[36m', '\x1b[31;1m', '\x1b[32;1m', '\x1b[33;1m'];
     for (let r = 0; r < records.length; r++) {
@@ -166,7 +166,7 @@ function print_colorized(records, delim, show_column_names, skip_header) {
         for (let c = 0; c < records[r].length; c++) {
             let color_code = color_codes[c % color_codes.length];
             let field = records[r][c];
-            let colored_field = (!show_column_names || (skip_header && r == 0)) ? color_code + field : `${color_code}a${c + 1}:${field}`;
+            let colored_field = (!show_column_names || (with_headers && r == 0)) ? color_code + field : `${color_code}a${c + 1}:${field}`;
             out_fields.push(colored_field);
         }
         let out_line = out_fields.join(delim) + reset_color_code;
@@ -208,7 +208,8 @@ async function run_with_js(args) {
     var input_path = get_default(args, 'input', null);
     var output_path = get_default(args, 'output', null);
     var csv_encoding = args['encoding'];
-    var skip_header = args['skip-header'];
+    var with_headers = args['with-headers'];
+    var comment_prefix = args['comment-prefix'];
     var output_delim = get_default(args, 'out-delim', null);
     var output_policy = get_default(args, 'out-policy', null);
     let init_source_file = get_default(args, 'init-source-file', null);
@@ -222,7 +223,14 @@ async function run_with_js(args) {
         user_init_code = rbql_csv.read_user_init_code(init_source_file);
     try {
         let warnings = [];
-        await rbql_csv.query_csv(query, input_path, delim, policy, output_path, output_delim, output_policy, csv_encoding, warnings, skip_header, user_init_code, {'bulk_read': true});
+        // Do not use bulk_read mode here because:
+        // * Bulk read can't handle large file since node unable to read the whole file into a string, see https://github.com/mechatroner/rainbow_csv/issues/19
+        // * In case of stdin read we would have to use the util.TextDecoder anyway
+        // * binary/latin-1 do not require the decoder anyway
+        // * This is CLI so no way we are in the Electron environment which can't use the TextDecoder
+        // * Streaming mode works a little faster (since we don't need to do the manual validation)
+        // TODO check if the current node installation doesn't have ICU enabled (which is typicaly provided by Node.js by default, see https://nodejs.org/api/intl.html) and report a user-friendly error with an option to use latin-1 encoding or switch the interpreter
+        await rbql_csv.query_csv(query, input_path, delim, policy, output_path, output_delim, output_policy, csv_encoding, warnings, with_headers, comment_prefix, user_init_code/*, {'bulk_read': true}*/);
         await handle_query_success(warnings, output_path, csv_encoding, output_delim, output_policy);
         return true;
     } catch (e) {
@@ -242,11 +250,11 @@ function get_default_output_path(input_path, delim) {
 }
 
 
-async function show_preview(input_path, encoding, delim, policy, skip_header) {
+async function show_preview(input_path, encoding, delim, policy, with_headers) {
     let [records, warnings] = await sample_records(input_path, encoding, delim, policy);
     console.log('Input table preview:');
     console.log('====================================');
-    print_colorized(records, delim, true, skip_header);
+    print_colorized(records, delim, true, with_headers);
     console.log('====================================\n');
     for (let warning of warnings) {
         show_warning(warning);
@@ -272,7 +280,7 @@ async function run_interactive_loop(args) {
         if (!delim)
             throw new GenericError('Unable to autodetect table delimiter. Provide column separator explicitly with "--delim" option');
     }
-    await show_preview(input_path, args['encoding'], delim, policy, args['skip-header']);
+    await show_preview(input_path, args['encoding'], delim, policy, args['with-headers']);
     args.delim = delim;
     args.policy = policy;
     if (!args.output) {
@@ -357,7 +365,8 @@ function main() {
         '--output': {'help': 'Write output table to FILE instead of stdout', 'metavar': 'FILE'},
         '--delim': {'help': 'Delimiter character or multicharacter string, e.g. "," or "###". Can be autodetected in interactive mode', 'metavar': 'DELIM'},
         '--policy': {'help': 'Split policy, see the explanation below. Supported values: "simple", "quoted", "quoted_rfc", "whitespace", "monocolumn". Can be autodetected in interactive mode', 'metavar': 'POLICY'},
-        '--skip-header': {'boolean': true, 'help': 'Skip header line in input and join tables. Roughly equivalent of ... WHERE NR > 1 ... in your Query'},
+        '--with-headers': {'boolean': true, 'help': 'Indicates that input (and join) table has header'},
+        '--comment-prefix': {'help': 'Ignore lines in input and join tables that start with the comment PREFIX, e.g. "#" or ">>"', 'metavar': 'PREFIX'},
         '--encoding': {'default': 'utf-8', 'help': 'Manually set csv encoding', 'metavar': 'ENCODING'},
         '--out-format': {'default': 'input', 'help': 'Output format. Supported values: ' + out_format_names.map(v => `"${v}"`).join(', '), 'metavar': 'FORMAT'},
         '--out-delim': {'help': 'Output delim. Use with "out-policy". Overrides out-format', 'metavar': 'DELIM'},
diff --git a/rbql-js/rbql.js b/rbql-js/rbql.js
index 16ab5a3..8ab6e63 100755
--- a/rbql-js/rbql.js
+++ b/rbql-js/rbql.js
@@ -70,7 +70,113 @@ var query_context = null; // Needs to be global for MIN(), MAX(), etc functions
 
 
 const wrong_aggregation_usage_error = 'Usage of RBQL aggregation functions inside JavaScript expressions is not allowed, see the docs';
-const RBQL_VERSION = '0.17.0';
+const RBQL_VERSION = '0.20.0';
+
+
+function check_if_brackets_match(opening_bracket, closing_bracket) {
+    return (opening_bracket == '[' && closing_bracket == ']') || (opening_bracket == '(' && closing_bracket == ')') || (opening_bracket == '{' && closing_bracket == '}');
+}
+
+
+function parse_root_bracket_level_text_spans(select_expression) {
+    let text_spans = []; // parts of text separated by commas at the root parenthesis level
+    let last_pos = 0;
+    let bracket_stack = [];
+    for (let i = 0; i < select_expression.length; i++) {
+        let cur_char = select_expression[i];
+        if (cur_char == ',' && bracket_stack.length == 0) {
+            text_spans.push(select_expression.substring(last_pos, i));
+            last_pos = i + 1;
+        } else if (['[', '{', '('].indexOf(cur_char) != -1) {
+            bracket_stack.push(cur_char);
+        } else if ([']', '}', ')'].indexOf(cur_char) != -1) {
+            if (bracket_stack.length && check_if_brackets_match(bracket_stack[bracket_stack.length - 1], cur_char)) {
+                bracket_stack.pop();
+            } else {
+                throw new RbqlParsingError(`Unable to parse column headers in SELECT expression: No matching opening bracket for closing "${cur_char}"`);
+            }
+        }
+    }
+    if (bracket_stack.length) {
+        throw new RbqlParsingError(`Unable to parse column headers in SELECT expression: No matching closing bracket for opening "${bracket_stack[0]}"`);
+    }
+    text_spans.push(select_expression.substring(last_pos, select_expression.length));
+    text_spans = text_spans.map(span => span.trim());
+    return text_spans;
+}
+
+
+function unquote_string(quoted_str) {
+    // It's possible to use eval here to unqoute the quoted_column_name, but it would be a little barbaric, let's do it manually instead
+    if (!quoted_str || quoted_str.length < 2)
+        return null;
+    if (quoted_str[0] == "'" && quoted_str[quoted_str.length - 1] == "'") {
+        return quoted_str.substring(1, quoted_str.length - 1).replace(/\\'/g, "'").replace(/\\\\/g, "\\");
+    } else if (quoted_str[0] == '"' && quoted_str[quoted_str.length - 1] == '"') {
+        return quoted_str.substring(1, quoted_str.length - 1).replace(/\\"/g, '"').replace(/\\\\/g, "\\");
+    } else {
+        return null;
+    }
+}
+
+
+function column_info_from_text_span(text_span, string_literals) {
+    // This function is a rough equivalent of "column_info_from_node()" function in python version of RBQL
+    text_span = text_span.trim();
+    let rbql_star_marker = '__RBQL_INTERNAL_STAR';
+    let simple_var_match = /^[_a-zA-Z][_a-zA-Z0-9]*$/.exec(text_span);
+    let attribute_match = /^([ab])\.([_a-zA-Z][_a-zA-Z0-9]*)$/.exec(text_span);
+    let subscript_int_match = /^([ab])\[([0-9]+)\]$/.exec(text_span);
+    let subscript_str_match = /^([ab])\[___RBQL_STRING_LITERAL([0-9]+)___\]$/.exec(text_span);
+    if (simple_var_match !== null) {
+        if (text_span == rbql_star_marker)
+            return {table_name: null, column_index: null, column_name: null, is_star: true};
+        if (text_span.startsWith('___RBQL_STRING_LITERAL'))
+            return null;
+        let match = /^([ab])([0-9]+)$/.exec(text_span);
+        if (match !== null) {
+            return {table_name: match[1], column_index: parseInt(match[2]) - 1, column_name: null, is_star: false};
+        }
+        // Some examples for this branch: NR, NF
+        return {table_name: null, column_index: null, column_name: text_span, is_star: false};
+    } else if (attribute_match !== null) {
+        let table_name = attribute_match[1];
+        let column_name = attribute_match[2];
+        if (column_name == rbql_star_marker) {
+            return {table_name: table_name, column_index: null, column_name: null, is_star: true};
+        }
+        return {table_name: null, column_index: null, column_name: column_name, is_star: false};
+    } else if (subscript_int_match != null) {
+        let table_name = subscript_int_match[1];
+        let column_index = parseInt(subscript_int_match[2]) - 1;
+        return {table_name: table_name, column_index: column_index, column_name: null, is_star: false};
+    } else if (subscript_str_match != null) {
+        let table_name = subscript_str_match[1];
+        let replaced_string_literal_id = subscript_str_match[2];
+        if (replaced_string_literal_id < string_literals.length) {
+            let quoted_column_name = string_literals[replaced_string_literal_id];
+            let unquoted_column_name = unquote_string(quoted_column_name);
+            if (unquoted_column_name) {
+                return {table_name: null, column_index: null, column_name: unquoted_column_name, is_star: false};
+            }
+        }
+    }
+    return null;
+}
+
+
+function adhoc_parse_select_expression_to_column_infos(select_expression, string_literals) {
+    // It is acceptable for the algorithm to provide null column name when it could be theorethically possible to deduce the name.
+    // I.e. this algorithm guarantees precision but doesn't guarantee completeness in all theorethically possible queries.
+    // Although the algorithm should be complete in all practical scenarios, i.e. it should be hard to come up with the query that doesn't produce complete set of column names.
+    // The null column name just means that the output column will be named as col{i}, so the failure to detect the proper column name can be tolerated.
+    // Specifically this function guarantees the following:
+    // 1. The number of column_infos is correct and will match the number of fields in each record in the output - otherwise the exception should be thrown
+    // 2. If column_info at pos j is not null, it is guaranteed to correctly represent that column name in the output
+    let text_spans = parse_root_bracket_level_text_spans(select_expression);
+    let column_infos = text_spans.map(ts => column_info_from_text_span(ts, string_literals));
+    return column_infos;
+}
 
 
 function stable_compare(a, b) {
@@ -860,13 +966,22 @@ async function compile_and_run(query_context) {
         if (e instanceof SyntaxError) {
             // SyntaxError's from eval() function do not contain detailed explanation of what has caused the syntax error, so to guess what was wrong we can only use the original query
             // v8 issue to fix eval: https://bugs.chromium.org/p/v8/issues/detail?id=2589
-            if (query_context.query_text.toLowerCase().indexOf(' having ') != -1)
+            let lower_case_query = query_context.query_text.toLowerCase();
+            if (lower_case_query.indexOf(' having ') != -1)
                 throw new SyntaxError(e.message + "\nRBQL doesn't support \"HAVING\" keyword");
-            if (query_context.query_text.toLowerCase().indexOf(' like ') != -1)
+            if (lower_case_query.indexOf(' like ') != -1)
                 throw new SyntaxError(e.message + "\nRBQL doesn't support \"LIKE\" operator, use like() function instead e.g. ... WHERE like(a1, 'foo%bar') ... "); // UT JSON
-            if (query_context.query_text.toLowerCase().indexOf(' from ') != -1)
+            if (lower_case_query.indexOf(' from ') != -1)
                 throw new SyntaxError(e.message + "\nRBQL doesn't use \"FROM\" keyword, e.g. you can query 'SELECT *' without FROM"); // UT JSON
+            if (e && e.message && String(e.message).toLowerCase().indexOf('unexpected identifier') != -1) {
+                if (lower_case_query.indexOf(' and ') != -1)
+                    throw new SyntaxError(e.message + "\nDid you use 'and' keyword in your query?\nJavaScript backend doesn't support 'and' keyword, use '&&' operator instead!");
+                if (lower_case_query.indexOf(' or ') != -1)
+                    throw new SyntaxError(e.message + "\nDid you use 'or' keyword in your query?\nJavaScript backend doesn't support 'or' keyword, use '||' operator instead!");
+            }
         }
+        if (e && e.message && e.message.indexOf('Received an instance of RBQLAggregationToken') != -1)
+            throw new RbqlParsingError(wrong_aggregation_usage_error);
         throw e;
     }
 }
@@ -884,6 +999,7 @@ const ORDER_BY = 'ORDER BY';
 const WHERE = 'WHERE';
 const LIMIT = 'LIMIT';
 const EXCEPT = 'EXCEPT';
+const WITH = 'WITH';
 
 
 function get_ambiguous_error_msg(variable_name) {
@@ -916,7 +1032,7 @@ function strip_comments(cline) {
 
 function combine_string_literals(backend_expression, string_literals) {
     for (var i = 0; i < string_literals.length; i++) {
-        backend_expression = replace_all(backend_expression, `###RBQL_STRING_LITERAL${i}###`, string_literals[i]);
+        backend_expression = replace_all(backend_expression, `___RBQL_STRING_LITERAL${i}___`, string_literals[i]);
     }
     return backend_expression;
 }
@@ -1163,6 +1279,24 @@ function replace_star_vars(rbql_expression) {
 }
 
 
+function replace_star_vars_for_header_parsing(rbql_expression) {
+    let star_rgx = /(?:(?<=^)|(?<=,)) *(\*|a\.\*|b\.\*) *(?=$|,)/g;
+    let matches = get_all_matches(star_rgx, rbql_expression);
+    let last_pos = 0;
+    let result = '';
+    for (let match of matches) {
+        let star_expression = match[1];
+        let replacement_expression = {'*': '__RBQL_INTERNAL_STAR', 'a.*': 'a.__RBQL_INTERNAL_STAR', 'b.*': 'b.__RBQL_INTERNAL_STAR'}[star_expression];
+        if (last_pos < match.index)
+            result += rbql_expression.substring(last_pos, match.index);
+        result += replacement_expression;
+        last_pos = match.index + match[0].length;
+    }
+    result += rbql_expression.substring(last_pos);
+    return result;
+}
+
+
 function translate_update_expression(update_expression, input_variables_map, string_literals, indent) {
     let first_assignment = str_strip(update_expression.split('=')[0]);
     let first_assignment_error = `Unable to parse "UPDATE" expression: the expression must start with assignment, but "${first_assignment}" does not look like an assignable field name`;
@@ -1194,12 +1328,12 @@ function translate_update_expression(update_expression, input_variables_map, str
 
 
 function translate_select_expression(select_expression) {
-    var translated = replace_star_count(select_expression);
-    translated = replace_star_vars(translated);
-    translated = str_strip(translated);
+    let expression_without_stars = replace_star_count(select_expression);
+    let translated = str_strip(replace_star_vars(expression_without_stars));
+    let translated_for_header = str_strip(replace_star_vars_for_header_parsing(expression_without_stars));
     if (!translated.length)
         throw new RbqlParsingError('"SELECT" expression is empty');
-    return `[].concat([${translated}])`;
+    return [`[].concat([${translated}])`, translated_for_header];
 }
 
 
@@ -1216,7 +1350,7 @@ function separate_string_literals(rbql_expression) {
         string_literals.push(string_literal);
         var start_index = match_obj.index;
         format_parts.push(rbql_expression.substring(idx_before, start_index));
-        format_parts.push(`###RBQL_STRING_LITERAL${literal_id}###`);
+        format_parts.push(`___RBQL_STRING_LITERAL${literal_id}___`);
         idx_before = rgx.lastIndex;
     }
     format_parts.push(rbql_expression.substring(idx_before));
@@ -1260,8 +1394,13 @@ function locate_statements(rbql_expression) {
 
 function separate_actions(rbql_expression) {
     rbql_expression = str_strip(rbql_expression);
-    var ordered_statements = locate_statements(rbql_expression);
     var result = {};
+    let with_match = /^(.*)  *[Ww][Ii][Tt][Hh] *\(([a-z]{4,20})\) *$/.exec(rbql_expression);
+    if (with_match !== null) {
+        rbql_expression = with_match[1];
+        result[WITH] = with_match[2];
+    }
+    var ordered_statements = locate_statements(rbql_expression);
     for (var i = 0; i < ordered_statements.length; i++) {
         var statement_start = ordered_statements[i][0];
         var span_start = ordered_statements[i][1];
@@ -1296,7 +1435,7 @@ function separate_actions(rbql_expression) {
         if (statement == SELECT) {
             if (statement_start != 0)
                 throw new RbqlParsingError('SELECT keyword must be at the beginning of the query');
-            var match = /^ *TOP *([0-9]+) /i.exec(span);
+            let match = /^ *TOP *([0-9]+) /i.exec(span);
             if (match !== null) {
                 statement_params['top'] = parseInt(match[1]);
                 span = span.substr(match.index + match[0].length);
@@ -1338,7 +1477,7 @@ function find_top(rb_actions) {
 }
 
 
-function translate_except_expression(except_expression, input_variables_map, string_literals) {
+function translate_except_expression(except_expression, input_variables_map, string_literals, input_header) {
     let skip_vars = except_expression.split(',');
     skip_vars = skip_vars.map(str_strip);
     let skip_indices = [];
@@ -1349,8 +1488,9 @@ function translate_except_expression(except_expression, input_variables_map, str
         skip_indices.push(input_variables_map[var_name].index);
     }
     skip_indices = skip_indices.sort((a, b) => a - b);
+    let output_header = input_header === null ? null : select_except(input_header, skip_indices);
     let indices_str = skip_indices.join(',');
-    return `select_except(record_a, [${indices_str}])`;
+    return [output_header, `select_except(record_a, [${indices_str}])`];
 }
 
 
@@ -1430,6 +1570,44 @@ function remove_redundant_table_name(query_text) {
 }
 
 
+function select_output_header(input_header, join_header, query_column_infos) {
+    if (input_header === null && join_header === null)
+        return null;
+    if (input_header === null)
+        input_header = [];
+    if (join_header === null)
+        join_header = [];
+    let output_header = [];
+    for (let qci of query_column_infos) {
+        // TODO refactor this and python version: extract this code into a function instead to always return something
+        if (qci === null) {
+            output_header.push('col' + (output_header.length + 1));
+        } else if (qci.is_star) {
+            if (qci.table_name === null) {
+                output_header = output_header.concat(input_header).concat(join_header);
+            } else if (qci.table_name === 'a') {
+                output_header = output_header.concat(input_header);
+            } else if (qci.table_name === 'b') {
+                output_header = output_header.concat(join_header);
+            }
+        } else if (qci.column_name !== null) {
+            output_header.push(qci.column_name);
+        } else if (qci.column_index !== null) {
+            if (qci.table_name == 'a' && qci.column_index < input_header.length) {
+                output_header.push(input_header[qci.column_index]);
+            } else if (qci.table_name == 'b' && qci.column_index < join_header.length) {
+                output_header.push(join_header[qci.column_index]);
+            } else {
+                output_header.push('col' + (output_header.length + 1));
+            }
+        } else { // Should never happen
+            output_header.push('col' + (output_header.length + 1));
+        }
+    }
+    return output_header;
+}
+
+
 function make_inconsistent_num_fields_warning(table_name, inconsistent_records_info) {
     let keys = Object.keys(inconsistent_records_info);
     let entries = [];
@@ -1448,8 +1626,66 @@ function make_inconsistent_num_fields_warning(table_name, inconsistent_records_i
 }
 
 
-class TableIterator {
+class RBQLInputIterator {
+    constructor(){}
+    stop() {
+        throw new Error("Unable to call the interface method");
+    }
+    async get_variables_map(query_text) {
+        throw new Error("Unable to call the interface method");
+    }
+    async get_record() {
+        throw new Error("Unable to call the interface method");
+    }
+    handle_query_modifier() {
+        return; // Reimplement if you need to handle a boolean query modifier that can be used like this: `SELECT * WITH (modifiername)`
+    }
+    get_warnings() {
+        return []; // Reimplement if your class can produce warnings
+    }
+    async get_header() {
+        return null; // Reimplement if your class can provide input header
+    }
+}
+
+
+class RBQLOutputWriter {
+    constructor(){}
+
+    write(fields) {
+        throw new Error("Unable to call the interface method");
+    }
+
+    async finish() {
+        // Reimplement if your class needs to do something on finish e.g. cleanup
+    };
+
+    get_warnings() {
+        return []; // Reimplement if your class can produce warnings
+    };
+
+    set_header() {
+        return; // Reimplement if your class can handle output headers in a meaningful way
+    }
+}
+
+
+class RBQLTableRegistry {
+    constructor(){}
+
+    get_iterator_by_table_id(table_id) {
+        throw new Error("Unable to call the interface method");
+    }
+
+    get_warnings() {
+        return []; // Reimplement if your class can produce warnings
+    };
+}
+
+
+class TableIterator extends RBQLInputIterator {
     constructor(table, column_names=null, normalize_column_names=true, variable_prefix='a') {
+        super();
         this.table = table;
         this.column_names = column_names;
         this.normalize_column_names = normalize_column_names;
@@ -1501,12 +1737,18 @@ class TableIterator {
             return [make_inconsistent_num_fields_warning('input', this.fields_info)];
         return [];
     };
+
+    async get_header() {
+        return this.column_names;
+    }
 }
 
 
-class TableWriter {
+class TableWriter extends RBQLOutputWriter {
     constructor(external_table) {
+        super();
         this.table = external_table;
+        this.header = null;
     }
 
     write(fields) {
@@ -1514,16 +1756,15 @@ class TableWriter {
         return true;
     };
 
-    get_warnings() {
-        return [];
-    };
-
-    async finish() {};
+    set_header(header) {
+        this.header = header;
+    }
 }
 
 
-class SingleTableRegistry {
+class SingleTableRegistry extends RBQLTableRegistry {
     constructor(table, column_names=null, normalize_column_names=true, table_id='b') {
+        super();
         this.table = table;
         this.table_id = table_id;
         this.column_names = column_names;
@@ -1545,6 +1786,9 @@ async function shallow_parse_input_query(query_text, input_iterator, join_tables
     var input_variables_map = await input_iterator.get_variables_map(query_text);
 
     var rb_actions = separate_actions(format_expression);
+    if (rb_actions.hasOwnProperty(WITH)) {
+        input_iterator.handle_query_modifier(rb_actions[WITH]);
+    }
 
     if (rb_actions.hasOwnProperty(ORDER_BY) && rb_actions.hasOwnProperty(UPDATE))
         throw new RbqlParsingError('"ORDER BY" is not allowed in "UPDATE" queries');
@@ -1556,6 +1800,7 @@ async function shallow_parse_input_query(query_text, input_iterator, join_tables
     }
 
     let join_variables_map = null;
+    let join_header = null;
     if (rb_actions.hasOwnProperty(JOIN)) {
         var [rhs_table_id, variable_pairs] = parse_join_expression(rb_actions[JOIN]['text']);
         if (join_tables_registry === null)
@@ -1563,7 +1808,11 @@ async function shallow_parse_input_query(query_text, input_iterator, join_tables
         let join_record_iterator = join_tables_registry.get_iterator_by_table_id(rhs_table_id);
         if (!join_record_iterator)
             throw new RbqlParsingError(`Unable to find join table: "${rhs_table_id}"`);
+        if (rb_actions.hasOwnProperty(WITH)) {
+            join_record_iterator.handle_query_modifier(rb_actions[WITH]);
+        }
         join_variables_map = await join_record_iterator.get_variables_map(query_text);
+        join_header = await join_record_iterator.get_header();
         let [lhs_variables, rhs_indices] = resolve_join_variables(input_variables_map, join_variables_map, variable_pairs, string_literals);
         let sql_join_type = {'JOIN': InnerJoiner, 'INNER JOIN': InnerJoiner, 'LEFT JOIN': LeftJoiner, 'LEFT OUTER JOIN': LeftJoiner, 'STRICT LEFT JOIN': StrictLeftJoiner}[rb_actions[JOIN]['join_subtype']];
         query_context.lhs_join_var_expression = lhs_variables.length == 1 ? lhs_variables[0] : 'JSON.stringify([' + lhs_variables.join(',') + '])';
@@ -1576,31 +1825,38 @@ async function shallow_parse_input_query(query_text, input_iterator, join_tables
 
     if (rb_actions.hasOwnProperty(WHERE)) {
         var where_expression = rb_actions[WHERE]['text'];
-        if (/[^!=]=[^=]/.exec(where_expression))
+        if (/[^><!=]=[^=]/.exec(where_expression))
             throw new RbqlParsingError('Assignments "=" are not allowed in "WHERE" expressions. For equality test use "==" or "==="');
         query_context.where_expression = combine_string_literals(where_expression, string_literals);
     }
 
+    let input_header = await input_iterator.get_header();
     if (rb_actions.hasOwnProperty(UPDATE)) {
         var update_expression = translate_update_expression(rb_actions[UPDATE]['text'], input_variables_map, string_literals, ' '.repeat(8));
         query_context.update_expressions = combine_string_literals(update_expression, string_literals);
+        query_context.writer.set_header(input_header);
     }
 
     if (rb_actions.hasOwnProperty(SELECT)) {
         query_context.top_count = find_top(rb_actions);
-        query_context.writer = new TopWriter(query_context.writer, query_context.top_count);
+        if (rb_actions.hasOwnProperty(EXCEPT)) {
+            let [output_header, select_expression] = translate_except_expression(rb_actions[EXCEPT]['text'], input_variables_map, string_literals, input_header);
+            query_context.select_expression = select_expression;
+            query_context.writer.set_header(output_header);
+        } else {
+            let [select_expression, select_expression_for_ast] = translate_select_expression(rb_actions[SELECT]['text']);
+            query_context.select_expression = combine_string_literals(select_expression, string_literals);
+            let column_infos = adhoc_parse_select_expression_to_column_infos(select_expression_for_ast, string_literals);
+            let output_header = select_output_header(input_header, join_header, column_infos);
+            query_context.writer.set_header(output_header);
+        }
 
+        query_context.writer = new TopWriter(query_context.writer, query_context.top_count);
         if (rb_actions[SELECT].hasOwnProperty('distinct_count')) {
             query_context.writer = new UniqCountWriter(query_context.writer);
         } else if (rb_actions[SELECT].hasOwnProperty('distinct')) {
             query_context.writer = new UniqWriter(query_context.writer);
         }
-        if (rb_actions.hasOwnProperty(EXCEPT)) {
-            query_context.select_expression = translate_except_expression(rb_actions[EXCEPT]['text'], input_variables_map, string_literals);
-        } else {
-            let select_expression = translate_select_expression(rb_actions[SELECT]['text']);
-            query_context.select_expression = combine_string_literals(select_expression, string_literals);
-        }
     }
 
     if (rb_actions.hasOwnProperty(ORDER_BY)) {
@@ -1623,13 +1879,21 @@ async function query(query_text, input_iterator, output_writer, output_warnings,
 }
 
 
-async function query_table(query_text, input_table, output_table, output_warnings, join_table=null, input_column_names=null, join_column_names=null, normalize_column_names=true, user_init_code='') {
+async function query_table(query_text, input_table, output_table, output_warnings, join_table=null, input_column_names=null, join_column_names=null, output_column_names=null, normalize_column_names=true, user_init_code='') {
     if (!normalize_column_names && input_column_names !== null && join_column_names !== null)
         ensure_no_ambiguous_variables(query_text, input_column_names, join_column_names);
     let input_iterator = new TableIterator(input_table, input_column_names, normalize_column_names);
     let output_writer = new TableWriter(output_table);
     let join_tables_registry = join_table === null ? null : new SingleTableRegistry(join_table, join_column_names, normalize_column_names);
     await query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code);
+    if (output_column_names !== null) {
+        assert(output_column_names.length == 0, '`output_column_names` param must be an empty list or null');
+        if (output_writer.header !== null) {
+            for (let column_name of output_writer.header) {
+                output_column_names.push(column_name);
+            }
+        }
+    }
 }
 
 
@@ -1651,6 +1915,9 @@ function exception_to_error_info(e) {
 
 exports.query = query;
 exports.query_table = query_table;
+exports.RBQLInputIterator = RBQLInputIterator;
+exports.RBQLOutputWriter = RBQLOutputWriter;
+exports.RBQLTableRegistry = RBQLTableRegistry;
 
 exports.version = RBQL_VERSION;
 exports.TableIterator = TableIterator;
@@ -1660,6 +1927,7 @@ exports.exception_to_error_info = exception_to_error_info;
 
 
 // The functions below are exported just for unit tests, they are not part of the rbql API
+// TODO exports through the special unit_test proxy e.g. exports.unit_test.parse_basic_variables = parse_basic_variables;
 exports.parse_basic_variables = parse_basic_variables;
 exports.parse_array_variables = parse_array_variables;
 exports.parse_dictionary_variables = parse_dictionary_variables;
@@ -1669,11 +1937,15 @@ exports.strip_comments = strip_comments;
 exports.separate_actions = separate_actions;
 exports.separate_string_literals = separate_string_literals;
 exports.combine_string_literals = combine_string_literals;
-exports.translate_except_expression = translate_except_expression;
 exports.parse_join_expression = parse_join_expression;
 exports.resolve_join_variables = resolve_join_variables;
 exports.translate_update_expression = translate_update_expression;
 exports.translate_select_expression = translate_select_expression;
+exports.translate_except_expression = translate_except_expression;
 exports.like_to_regex = like_to_regex;
+exports.adhoc_parse_select_expression_to_column_infos = adhoc_parse_select_expression_to_column_infos;
+exports.replace_star_count = replace_star_count;
+exports.replace_star_vars_for_header_parsing = replace_star_vars_for_header_parsing;
+exports.select_output_header = select_output_header;
 
 }(typeof exports === 'undefined' ? this.rbql = {} : exports));
diff --git a/rbql-js/rbql_csv.js b/rbql-js/rbql_csv.js
index 3ba2d9c..8f202f0 100755
--- a/rbql-js/rbql_csv.js
+++ b/rbql-js/rbql_csv.js
@@ -152,27 +152,30 @@ class RecordQueue {
         }
         return this.pull_stack.pop();
     }
-
-    return_to_pull_stack(record) {
-        this.pull_stack.push(record);
-    }
 }
 
 
-class CSVRecordIterator {
-    // CSVRecordIterator implements typical async producer-consumer model with an internal buffer:
+class CSVRecordIterator extends rbql.RBQLInputIterator {
+    // CSVRecordIterator implements a typical async producer-consumer model with an internal buffer:
     // get_record() - consumer
     // stream.on('data') - producer
-    constructor(stream, csv_path, encoding, delim, policy, skip_headers=false, table_name='input', variable_prefix='a') {
+    constructor(stream, csv_path, encoding, delim, policy, has_header=false, comment_prefix=null, table_name='input', variable_prefix='a') {
+        super();
         this.stream = stream;
         this.csv_path = csv_path;
         assert((this.stream === null) != (this.csv_path === null));
         this.encoding = encoding;
         this.delim = delim;
         this.policy = policy;
-        this.skip_headers = skip_headers;
+
+        this.has_header = has_header;
+        this.first_record = null;
+        this.first_record_should_be_emitted = !has_header;
+        this.header_preread_complete = false;
+
         this.table_name = table_name;
         this.variable_prefix = variable_prefix;
+        this.comment_prefix = (comment_prefix !== null && comment_prefix.length) ? comment_prefix : null;
 
         this.decoder = null;
         if (encoding == 'utf-8' && this.csv_path === null) {
@@ -195,12 +198,13 @@ class CSVRecordIterator {
         this.first_defective_line = null;
 
         this.fields_info = new Object();
-        this.NR = 0; // Record num
-        this.NL = 0; // Line num (can be different from record num for rfc dialect)
+        this.NR = 0; // Record number
+        this.NL = 0; // Line number (NL != NR when the CSV file has comments or multiline fields)
 
         this.rfc_line_buffer = [];
 
         this.partially_decoded_line = '';
+        this.partially_decoded_line_ends_with_cr = false;
 
         this.resolve_current_record = null;
         this.reject_current_record = null;
@@ -211,6 +215,20 @@ class CSVRecordIterator {
         this.process_line_polymorphic = policy == 'quoted_rfc' ? this.process_partial_rfc_record_line : this.process_record_line;
     }
 
+
+    handle_query_modifier(modifier) {
+        // For `... WITH (header) ...` syntax
+        if (['header', 'headers'].indexOf(modifier) != -1) {
+            this.has_header = true;
+            this.first_record_should_be_emitted = false;
+        }
+        if (['noheader', 'noheaders'].indexOf(modifier) != -1) {
+            this.has_header = false;
+            this.first_record_should_be_emitted = true;
+        }
+    }
+
+
     handle_exception(exception) {
         if (this.reject_current_record) {
             let reject = this.reject_current_record;
@@ -223,15 +241,17 @@ class CSVRecordIterator {
 
     }
 
-    async preread_header() {
-        let header_record = await this.get_record();
-        if (header_record === null)
-            return null;
-        if (!this.skip_headers)
-            this.produced_records_queue.return_to_pull_stack(header_record);
+    async preread_first_record() {
+        if (this.header_preread_complete)
+            return;
+        this.first_record = await this.get_record();
+        this.header_preread_complete = true; // We must set header_preread_complete to true after calling get_record(), because get_record() uses it internally.
+        if (this.first_record === null) {
+            return;
+        }
         if (this.stream)
             this.stream.pause();
-        return header_record.slice();
+        this.first_record = this.first_record.slice();
     };
 
 
@@ -240,19 +260,32 @@ class CSVRecordIterator {
         rbql.parse_basic_variables(query_text, this.variable_prefix, variable_map);
         rbql.parse_array_variables(query_text, this.variable_prefix, variable_map);
 
-        let header_record = await this.preread_header(); // TODO optimize: do not start the stream if query_text doesn't seem to have dictionary or attribute -looking patterns
-        if (header_record) {
-            rbql.parse_attribute_variables(query_text, this.variable_prefix, header_record, 'CSV header line', variable_map);
-            rbql.parse_dictionary_variables(query_text, this.variable_prefix, header_record, variable_map);
+        await this.preread_first_record();
+        if (this.first_record) {
+            rbql.parse_attribute_variables(query_text, this.variable_prefix, this.first_record, 'CSV header line', variable_map);
+            rbql.parse_dictionary_variables(query_text, this.variable_prefix, this.first_record, variable_map);
         }
         return variable_map;
     };
 
+    async get_header() {
+        await this.preread_first_record();
+        return this.has_header ? this.first_record : null;
+    }
+
 
     try_resolve_next_record() {
         if (this.resolve_current_record === null)
             return;
-        let record = this.produced_records_queue.dequeue();
+
+        let record = null;
+        if (this.first_record_should_be_emitted && this.header_preread_complete) {
+            this.first_record_should_be_emitted = false;
+            record = this.first_record;
+        } else {
+            record = this.produced_records_queue.dequeue();
+        }
+
         if (record === null && !this.input_exhausted)
             return;
         let resolve = this.resolve_current_record;
@@ -298,13 +331,15 @@ class CSVRecordIterator {
 
 
     process_record_line(line) {
+        if (this.comment_prefix !== null && line.startsWith(this.comment_prefix))
+            return; // Just skip the line
         this.NR += 1;
         var [record, warning] = csv_utils.smart_split(line, this.delim, this.policy, false);
         if (warning) {
             if (this.first_defective_line === null) {
-                this.first_defective_line = this.NR;
+                this.first_defective_line = this.NL;
                 if (this.policy == 'quoted_rfc')
-                    this.handle_exception(new RbqlIOHandlingError(`Inconsistent double quote escaping in ${this.table_name} table at record ${this.NR}`));
+                    this.handle_exception(new RbqlIOHandlingError(`Inconsistent double quote escaping in ${this.table_name} table at record ${this.NR}, line ${this.NL}`));
             }
         }
         let num_fields = record.length;
@@ -316,6 +351,8 @@ class CSVRecordIterator {
 
 
     process_partial_rfc_record_line(line) {
+        if (this.comment_prefix !== null && this.rfc_line_buffer.length == 0 && line.startsWith(this.comment_prefix))
+            return; // Just skip the line
         let match_list = line.match(/"/g);
         let has_unbalanced_double_quote = match_list && match_list.length % 2 == 1;
         if (this.rfc_line_buffer.length == 0 && !has_unbalanced_double_quote) {
@@ -334,14 +371,14 @@ class CSVRecordIterator {
 
 
     process_line(line) {
-        if (this.NL === 0) {
+        this.NL += 1;
+        if (this.NL === 1) {
             var clean_line = remove_utf8_bom(line, this.encoding);
             if (clean_line != line) {
                 line = clean_line;
                 this.utf8_bom_removed = true;
             }
         }
-        this.NL += 1;
         this.process_line_polymorphic(line);
     };
 
@@ -362,10 +399,14 @@ class CSVRecordIterator {
         } else {
             decoded_string = data_chunk.toString(this.encoding);
         }
+        let line_starts_with_lf = decoded_string.length && decoded_string[0] == '\n';
+        let first_line_index = line_starts_with_lf && this.partially_decoded_line_ends_with_cr ? 1 : 0;
+        this.partially_decoded_line_ends_with_cr = decoded_string.length && decoded_string[decoded_string.length - 1] == '\r';
         let lines = csv_utils.split_lines(decoded_string);
         lines[0] = this.partially_decoded_line + lines[0];
+        assert(first_line_index == 0 || lines[0].length == 0);
         this.partially_decoded_line = lines.pop();
-        for (let i = 0; i < lines.length; i++) {
+        for (let i = first_line_index; i < lines.length; i++) {
             this.process_line(lines[i]);
         }
     };
@@ -452,8 +493,9 @@ class CSVRecordIterator {
 }
 
 
-class CSVWriter {
+class CSVWriter extends rbql.RBQLOutputWriter {
     constructor(stream, close_stream_on_finish, encoding, delim, policy, line_separator='\n') {
+        super();
         this.stream = stream;
         this.encoding = encoding;
         if (encoding)
@@ -467,6 +509,7 @@ class CSVWriter {
 
         this.null_in_output = false;
         this.delim_in_simple_output = false;
+        this.header_len = null;
 
         if (policy == 'simple') {
             this.polymorphic_join = this.simple_join;
@@ -484,6 +527,14 @@ class CSVWriter {
     }
 
 
+    set_header(header) {
+        if (header !== null) {
+            this.header_len = header.length;
+            this.write(header);
+        }
+    }
+
+
     quoted_join(fields) {
         let delim = this.delim;
         var quoted_fields = fields.map(function(v) { return csv_utils.quote_field(String(v), delim); });
@@ -529,6 +580,8 @@ class CSVWriter {
 
 
     write(fields) {
+        if (this.header_len !== null && fields.length != this.header_len)
+            throw new RbqlIOHandlingError(`Inconsistent number of columns in output header and the current record: ${this.header_len} != ${fields.length}`);
         this.normalize_fields(fields);
         this.stream.write(this.polymorphic_join(fields));
         this.stream.write(this.line_separator);
@@ -570,12 +623,14 @@ class CSVWriter {
 }
 
 
-class FileSystemCSVRegistry {
-    constructor(delim, policy, encoding, skip_headers=false, options=null) {
+class FileSystemCSVRegistry extends rbql.RBQLTableRegistry {
+    constructor(delim, policy, encoding, has_header=false, comment_prefix=null, options=null) {
+        super();
         this.delim = delim;
         this.policy = policy;
         this.encoding = encoding;
-        this.skip_headers = skip_headers;
+        this.has_header = has_header;
+        this.comment_prefix = comment_prefix;
         this.stream = null;
         this.record_iterator = null;
 
@@ -594,19 +649,19 @@ class FileSystemCSVRegistry {
         } else {
             this.stream = fs.createReadStream(this.table_path);
         }
-        this.record_iterator = new CSVRecordIterator(this.stream, this.bulk_input_path, this.encoding, this.delim, this.policy, this.skip_headers, table_id, 'b');
+        this.record_iterator = new CSVRecordIterator(this.stream, this.bulk_input_path, this.encoding, this.delim, this.policy, this.has_header, this.comment_prefix, table_id, 'b');
         return this.record_iterator;
     };
 
     get_warnings(output_warnings) {
-        if (this.record_iterator && this.skip_headers) {
-            output_warnings.push(`The first (header) record was also skipped in the JOIN file: ${path.basename(this.table_path)}`);
+        if (this.record_iterator && this.has_header) {
+            output_warnings.push(`The first record in JOIN file ${path.basename(this.table_path)} was also treated as header (and skipped)`);
         }
     }
 }
 
 
-async function query_csv(query_text, input_path, input_delim, input_policy, output_path, output_delim, output_policy, csv_encoding, output_warnings, skip_headers=false, user_init_code='', options=null) {
+async function query_csv(query_text, input_path, input_delim, input_policy, output_path, output_delim, output_policy, csv_encoding, output_warnings, with_headers=false, comment_prefix=null, user_init_code='', options=null) {
     let input_stream = null;
     let bulk_input_path = null;
     if (options && options['bulk_read'] && input_path) {
@@ -629,8 +684,8 @@ async function query_csv(query_text, input_path, input_delim, input_policy, outp
         user_init_code = read_user_init_code(default_init_source_path);
     }
 
-    let join_tables_registry = new FileSystemCSVRegistry(input_delim, input_policy, csv_encoding, skip_headers, options);
-    let input_iterator = new CSVRecordIterator(input_stream, bulk_input_path, csv_encoding, input_delim, input_policy, skip_headers);
+    let join_tables_registry = new FileSystemCSVRegistry(input_delim, input_policy, csv_encoding, with_headers, comment_prefix, options);
+    let input_iterator = new CSVRecordIterator(input_stream, bulk_input_path, csv_encoding, input_delim, input_policy, with_headers, comment_prefix);
     let output_writer = new CSVWriter(output_stream, close_output_on_finish, csv_encoding, output_delim, output_policy);
 
     await rbql.query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code);
diff --git a/rbql/_version.py b/rbql/_version.py
index 4bf366c..ae41f67 100644
--- a/rbql/_version.py
+++ b/rbql/_version.py
@@ -1,3 +1,3 @@
 # Explanation of this file purpose: https://stackoverflow.com/a/16084844/2898283
-__version__ = '0.17.0'
+__version__ = '0.20.0'
 
diff --git a/rbql/csv_utils.py b/rbql/csv_utils.py
index 51bdcd2..3e7faff 100644
--- a/rbql/csv_utils.py
+++ b/rbql/csv_utils.py
@@ -83,19 +83,19 @@ def extract_line_from_data(data):
 
 
 def quote_field(src, delim):
-    if src.find(delim) != -1 or src.find('"') != -1:
-        escaped = src.replace('"', '""')
-        escaped = '"{}"'.format(escaped)
-        return escaped
+    if src.find('"') != -1:
+        return '"{}"'.format(src.replace('"', '""'))
+    if src.find(delim) != -1:
+        return '"{}"'.format(src)
     return src
 
 
 def rfc_quote_field(src, delim):
     # A single regexp can be used to find all 4 characters simultaneously, but this approach doesn't significantly improve performance according to my tests.
-    if src.find(delim) != -1 or src.find('"') != -1 or src.find('\n') != -1 or src.find('\r') != -1:
-        escaped = src.replace('"', '""')
-        escaped = '"{}"'.format(escaped)
-        return escaped
+    if src.find('"') != -1:
+        return '"{}"'.format(src.replace('"', '""'))
+    if src.find(delim) != -1 or src.find('\n') != -1 or src.find('\r') != -1:
+        return '"{}"'.format(src)
     return src
 
 
diff --git a/rbql/rbql_csv.py b/rbql/rbql_csv.py
index 17389be..03b4972 100755
--- a/rbql/rbql_csv.py
+++ b/rbql/rbql_csv.py
@@ -6,7 +6,6 @@
 import os
 import codecs
 import io
-import re
 from errno import EPIPE
 
 from . import rbql_engine
@@ -39,13 +38,6 @@
     broken_pipe_exception = IOError
 
 
-class RbqlIOHandlingError(Exception):
-    pass
-
-class RbqlParsingError(Exception):
-    pass
-
-
 def is_ascii(s):
     return all(ord(c) < 128 for c in s)
 
@@ -114,18 +106,6 @@ def remove_utf8_bom(line, assumed_source_encoding):
     return line
 
 
-def str_py2(obj):
-    return obj if isinstance(obj, basestring) else str(obj)
-
-
-def str_py3(obj):
-    return obj if isinstance(obj, str) else str(obj)
-
-
-polymorphic_str = str_py3 if PY3 else str_py2
-
-
-
 def try_read_index(index_path):
     lines = []
     try:
@@ -187,7 +167,7 @@ def init_ansi_terminal_colors():
 
 
 
-class CSVWriter:
+class CSVWriter(rbql_engine.RBQLOutputWriter):
     def __init__(self, stream, close_stream_on_finish, encoding, delim, policy, line_separator='\n', colorize_output=False):
         assert encoding in ['utf-8', 'latin-1', None]
         self.stream = encode_output_stream(stream, encoding)
@@ -221,6 +201,13 @@ def __init__(self, stream, close_stream_on_finish, encoding, delim, policy, line
 
         self.none_in_output = False
         self.delim_in_simple_output = False
+        self.header_len = None
+
+
+    def set_header(self, header):
+        if header is not None:
+            self.header_len = len(header)
+            self.write(header)
 
 
     def monocolumn_join(self, fields):
@@ -243,6 +230,8 @@ def join_by_delim(self, fields):
 
 
     def write(self, fields):
+        if self.header_len is not None and len(fields) != self.header_len:
+            raise rbql_engine.RbqlIOHandlingError('Inconsistent number of columns in output header and the current record: {} != {}'.format(self.header_len, len(fields)))
         self.normalize_fields(fields)
 
         if self.polymorphic_preprocess is not None:
@@ -287,19 +276,23 @@ def quote_fields_rfc(self, fields):
 
     def ensure_single_field(self, fields):
         if len(fields) > 1:
-            raise RbqlIOHandlingError('Unable to use "Monocolumn" output format: some records have more than one field')
+            raise rbql_engine.RbqlIOHandlingError('Unable to use "Monocolumn" output format: some records have more than one field')
 
 
     def normalize_fields(self, fields):
         for i in polymorphic_xrange(len(fields)):
-            if fields[i] is None:
+            if PY3 and isinstance(fields[i], str):
+                continue
+            elif not PY3 and isinstance(fields[i], basestring):
+                continue
+            elif fields[i] is None:
                 fields[i] = ''
                 self.none_in_output = True
             elif isinstance(fields[i], list):
                 self.normalize_fields(fields[i])
                 fields[i] = self.sub_array_delim.join(fields[i])
             else:
-                fields[i] = polymorphic_str(fields[i])
+                fields[i] = str(fields[i])
 
 
     def _write_all(self, table):
@@ -340,8 +333,8 @@ def get_warnings(self):
         return result
 
 
-class CSVRecordIterator:
-    def __init__(self, stream, encoding, delim, policy, skip_headers=False, table_name='input', variable_prefix='a', chunk_size=1024, line_mode=False):
+class CSVRecordIterator(rbql_engine.RBQLInputIterator):
+    def __init__(self, stream, encoding, delim, policy, has_header=False, comment_prefix=None, table_name='input', variable_prefix='a', chunk_size=1024, line_mode=False):
         assert encoding in ['utf-8', 'latin-1', None]
         self.encoding = encoding
         self.stream = encode_input_stream(stream, encoding)
@@ -349,33 +342,49 @@ def __init__(self, stream, encoding, delim, policy, skip_headers=False, table_na
         self.policy = policy
         self.table_name = table_name
         self.variable_prefix = variable_prefix
+        self.comment_prefix = comment_prefix if (comment_prefix is not None and len(comment_prefix)) else None
 
         self.buffer = ''
         self.detected_line_separator = '\n'
         self.exhausted = False
-        self.NR = 0
+        self.NR = 0 # Record number
+        self.NL = 0 # Line number (NL != NR when the CSV file has comments or multiline fields)
         self.chunk_size = chunk_size
         self.fields_info = dict()
 
         self.utf8_bom_removed = False
-        self.first_defective_line = None # TODO use line # instead of record # when "\n" in fields parsing is implemented
+        self.first_defective_line = None
         self.polymorphic_get_row = self.get_row_rfc if policy == 'quoted_rfc' else self.get_row_simple
+        self.has_header = has_header
+        self.first_record_should_be_emitted = False
 
         if not line_mode:
-            self.header_record = None
-            self.header_record_emitted = skip_headers
-            self.header_record = self.get_record()
+            self.first_record = None
+            self.first_record = self.get_record()
+            self.first_record_should_be_emitted = not has_header
 
 
+    def handle_query_modifier(self, modifier):
+        # For `... WITH (header) ...` syntax
+        if modifier in ['header', 'headers']:
+            self.has_header = True
+            self.first_record_should_be_emitted = False
+        if modifier in ['noheader', 'noheaders']:
+            self.has_header = False
+            self.first_record_should_be_emitted = True
+        
+
     def get_variables_map(self, query_text):
         variable_map = dict()
         rbql_engine.parse_basic_variables(query_text, self.variable_prefix, variable_map)
         rbql_engine.parse_array_variables(query_text, self.variable_prefix, variable_map)
-        if self.header_record is not None:
-            rbql_engine.parse_attribute_variables(query_text, self.variable_prefix, self.header_record, 'CSV header line', variable_map)
-            rbql_engine.parse_dictionary_variables(query_text, self.variable_prefix, self.header_record, variable_map)
+        if self.first_record is not None:
+            rbql_engine.parse_attribute_variables(query_text, self.variable_prefix, self.first_record, 'CSV header line', variable_map)
+            rbql_engine.parse_dictionary_variables(query_text, self.variable_prefix, self.first_record, variable_map)
         return variable_map
 
+    def get_header(self):
+        return self.first_record if self.has_header else None
 
     def _get_row_from_buffer(self):
         str_before, separator, str_after = csv_utils.extract_line_from_data(self.buffer)
@@ -409,27 +418,33 @@ def _read_until_found(self):
 
     def get_row_simple(self):
         try:
-            row = self._get_row_from_buffer()
-            if row is not None:
-                return row
-            self._read_until_found()
             row = self._get_row_from_buffer()
             if row is None:
-                assert self.exhausted
-                if self.buffer:
-                    tmp = self.buffer
+                self._read_until_found()
+                row = self._get_row_from_buffer()
+                if row is None:
+                    assert self.exhausted
+                    if not len(self.buffer):
+                        return None
+                    row = self.buffer
                     self.buffer = ''
-                    return tmp
-                return None
+            self.NL += 1
+            if self.NL == 1:
+                clean_line = remove_utf8_bom(row, self.encoding)
+                if clean_line != row:
+                    row = clean_line
+                    self.utf8_bom_removed = True
             return row
         except UnicodeDecodeError:
-            raise RbqlIOHandlingError('Unable to decode input table as UTF-8. Use binary (latin-1) encoding instead')
+            raise rbql_engine.RbqlIOHandlingError('Unable to decode input table as UTF-8. Use binary (latin-1) encoding instead')
 
     
     def get_row_rfc(self):
         first_row = self.get_row_simple()
         if first_row is None:
             return None
+        if self.comment_prefix is not None and first_row.startswith(self.comment_prefix):
+            return first_row
         if first_row.count('"') % 2 == 0:
             return first_row
         rows_buffer = [first_row]
@@ -443,25 +458,22 @@ def get_row_rfc(self):
 
 
     def get_record(self):
-        if not self.header_record_emitted and self.header_record is not None:
-            self.header_record_emitted = True
-            return self.header_record
-        line = self.polymorphic_get_row()
-        if line is None:
-            return None
-        if self.NR == 0:
-            clean_line = remove_utf8_bom(line, self.encoding)
-            if clean_line != line:
-                line = clean_line
-                self.utf8_bom_removed = True
+        if self.first_record_should_be_emitted:
+            self.first_record_should_be_emitted = False
+            return self.first_record
+        while True:
+            line = self.polymorphic_get_row()
+            if line is None:
+                return None
+            if self.comment_prefix is None or not line.startswith(self.comment_prefix):
+                break
         self.NR += 1
         record, warning = csv_utils.smart_split(line, self.delim, self.policy, preserve_quotes_and_whitespaces=False)
         if warning:
             if self.first_defective_line is None:
-                self.first_defective_line = self.NR
+                self.first_defective_line = self.NL
                 if self.policy == 'quoted_rfc':
-                    # TODO add line number when NL is supported
-                    raise RbqlIOHandlingError('Inconsistent double quote escaping in {} table at record {}'.format(self.table_name, self.NR))
+                    raise rbql_engine.RbqlIOHandlingError('Inconsistent double quote escaping in {} table at record {}, line {}'.format(self.table_name, self.NR, self.NL))
         num_fields = len(record)
         if num_fields not in self.fields_info:
             self.fields_info[num_fields] = self.NR
@@ -501,32 +513,37 @@ def get_warnings(self):
         return result
 
 
-class FileSystemCSVRegistry:
-    def __init__(self, delim, policy, encoding, skip_headers):
+class FileSystemCSVRegistry(rbql_engine.RBQLTableRegistry):
+    def __init__(self, delim, policy, encoding, has_header, comment_prefix):
         self.delim = delim
         self.policy = policy
         self.encoding = encoding
         self.record_iterator = None
         self.input_stream = None
-        self.skip_headers = skip_headers
+        self.has_header = has_header
+        self.comment_prefix = comment_prefix
         self.table_path = None
 
     def get_iterator_by_table_id(self, table_id):
         self.table_path = find_table_path(table_id)
         if self.table_path is None:
-            raise RbqlIOHandlingError('Unable to find join table "{}"'.format(table_id))
+            raise rbql_engine.RbqlIOHandlingError('Unable to find join table "{}"'.format(table_id))
         self.input_stream = open(self.table_path, 'rb')
-        self.record_iterator = CSVRecordIterator(self.input_stream, self.encoding, self.delim, self.policy, self.skip_headers, table_name=table_id, variable_prefix='b')
+        self.record_iterator = CSVRecordIterator(self.input_stream, self.encoding, self.delim, self.policy, self.has_header, comment_prefix=self.comment_prefix, table_name=table_id, variable_prefix='b')
         return self.record_iterator
 
-    def finish(self, output_warnings):
+    def finish(self):
         if self.input_stream is not None:
             self.input_stream.close()
-            if self.skip_headers:
-                output_warnings.append('The first (header) record was also skipped in the JOIN file: {}'.format(os.path.basename(self.table_path))) # UT JSON CSV
+
+    def get_warnings(self):
+        result = []
+        if self.record_iterator is not None and self.has_header:
+            result.append('The first record in JOIN file {} was also treated as header (and skipped)'.format(os.path.basename(self.table_path))) # UT JSON CSV
+        return result
 
 
-def query_csv(query_text, input_path, input_delim, input_policy, output_path, output_delim, output_policy, csv_encoding, output_warnings, skip_headers=False, user_init_code='', colorize_output=False):
+def query_csv(query_text, input_path, input_delim, input_policy, output_path, output_delim, output_policy, csv_encoding, output_warnings, with_headers, comment_prefix=None, user_init_code='', colorize_output=False):
     output_stream, close_output_on_finish = (None, False)
     input_stream, close_input_on_finish = (None, False)
     join_tables_registry = None
@@ -535,22 +552,22 @@ def query_csv(query_text, input_path, input_delim, input_policy, output_path, ou
         input_stream, close_input_on_finish = (sys.stdin, False) if input_path is None else (open(input_path, 'rb'), True)
 
         if input_delim == '"' and input_policy == 'quoted':
-            raise RbqlIOHandlingError('Double quote delimiter is incompatible with "quoted" policy')
+            raise rbql_engine.RbqlIOHandlingError('Double quote delimiter is incompatible with "quoted" policy')
         if input_delim != ' ' and input_policy == 'whitespace':
-            raise RbqlIOHandlingError('Only whitespace " " delim is supported with "whitespace" policy')
+            raise rbql_engine.RbqlIOHandlingError('Only whitespace " " delim is supported with "whitespace" policy')
 
         if not is_ascii(query_text) and csv_encoding == 'latin-1':
-            raise RbqlIOHandlingError('To use non-ascii characters in query enable UTF-8 encoding instead of latin-1/binary')
+            raise rbql_engine.RbqlIOHandlingError('To use non-ascii characters in query enable UTF-8 encoding instead of latin-1/binary')
 
         if (not is_ascii(input_delim) or not is_ascii(output_delim)) and csv_encoding == 'latin-1':
-            raise RbqlIOHandlingError('To use non-ascii separators enable UTF-8 encoding instead of latin-1/binary')
+            raise rbql_engine.RbqlIOHandlingError('To use non-ascii separators enable UTF-8 encoding instead of latin-1/binary')
 
         default_init_source_path = os.path.join(os.path.expanduser('~'), '.rbql_init_source.py')
         if user_init_code == '' and os.path.exists(default_init_source_path):
             user_init_code = read_user_init_code(default_init_source_path)
 
-        join_tables_registry = FileSystemCSVRegistry(input_delim, input_policy, csv_encoding, skip_headers)
-        input_iterator = CSVRecordIterator(input_stream, csv_encoding, input_delim, input_policy, skip_headers)
+        join_tables_registry = FileSystemCSVRegistry(input_delim, input_policy, csv_encoding, with_headers, comment_prefix)
+        input_iterator = CSVRecordIterator(input_stream, csv_encoding, input_delim, input_policy, with_headers, comment_prefix=comment_prefix)
         output_writer = CSVWriter(output_stream, close_output_on_finish, csv_encoding, output_delim, output_policy, colorize_output=colorize_output)
         if debug_mode:
             rbql_engine.set_debug_mode()
@@ -561,7 +578,8 @@ def query_csv(query_text, input_path, input_delim, input_policy, output_path, ou
         if close_output_on_finish:
             output_stream.close()
         if join_tables_registry:
-            join_tables_registry.finish(output_warnings)
+            join_tables_registry.finish()
+            output_warnings += join_tables_registry.get_warnings()
 
 
 def set_debug_mode():
diff --git a/rbql/rbql_engine.py b/rbql/rbql_engine.py
index 9e22a43..98a5a93 100755
--- a/rbql/rbql_engine.py
+++ b/rbql/rbql_engine.py
@@ -6,6 +6,7 @@
 import re
 import random
 import time
+import ast
 from collections import OrderedDict, defaultdict, namedtuple
 
 import datetime # For date operations inside user queries
@@ -26,6 +27,8 @@
 # UT JSON CSV - means json csv Unit Test exists for this case
 
 
+# TODO we can do good-enough header autodetection in CSV files to show warnings when we have a high degree of confidence that the file has header but user didn't skip it and vise versa
+
 # TODO catch exceptions in user expression to report the exact place where it occured: "SELECT" expression, "WHERE" expression, etc
 
 # TODO consider supporting explicit column names variables like "host" or "name" or "surname" - just parse all variable-looking sequences from the query and match them against available column names from the header, but skip all symbol defined in rbql_engine.py/rbql.js, user init code and python/js builtin keywords (show warning on intersection)
@@ -38,14 +41,18 @@
 
 # TODO support custom (virtual) headers for CSV version
 
-# TODO support RBQL variable "NL" - line number. when header is skipped it would be "2" for the first record. Also it is not equal to NR for multiline records
-
-# TODO support option to skip comment lines (lines starting with the specified prefix)
+# TODO allow to use NL in RBQL queries for CSV version
 
 # TODO add "inconsistent number of fields in output table" warning. Useful for queries like this: `*a1.split("|")` or `...a1.split("|")`, where num of fields in a1 is variable
 
+# TODO add RBQL iterators for json lines ( https://jsonlines.org/ ) and xml-by-line files
+# TODO add RBQL file-system iterator to be able to query files like fselect does
+
+# TODO use ast module to improve parsing of parse_attribute_variables / parse_dictionary_variables, like it was done for select parsing
 
-# FIXME refactor this module in sync with the JS version. There wasn't any cleanup after the last redesign
+# TODO support 'AS' keyword
+
+# FIXME consider disallowing to use values in the first row when header is not enabled (only a1, a2, ... should be allowed) and vice versa - Don't allow a1, a2 etc when header is enabled. This is to make sure that the user knows what query mode they are in.
 
 
 GROUP_BY = 'GROUP BY'
@@ -60,6 +67,7 @@
 WHERE = 'WHERE'
 LIMIT = 'LIMIT'
 EXCEPT = 'EXCEPT'
+WITH = 'WITH'
 
 ambiguous_error_msg = 'Ambiguous variable name: "{}" is present both in input and in join tables'
 invalid_keyword_in_aggregate_query_error_msg = '"ORDER BY", "UPDATE" and "DISTINCT" keywords are not allowed in aggregate queries'
@@ -132,6 +140,95 @@ def __init__(self, input_iterator, output_writer, user_init_code):
 PY3 = sys.version_info[0] == 3
 
 
+def is_str6(val):
+    return (PY3 and isinstance(val, str)) or (not PY3 and isinstance(val, basestring))
+
+
+QueryColumnInfo = namedtuple('QueryColumnInfo', ['table_name', 'column_index', 'column_name', 'is_star'])
+
+
+def get_field(root, field_name):
+    for f in ast.iter_fields(root):
+        if len(f) == 2 and f[0] == field_name:
+            return f[1]
+    return None
+
+
+def column_info_from_node(root):
+    rbql_star_marker = '__RBQL_INTERNAL_STAR'
+    if isinstance(root, ast.Name):
+        var_name = get_field(root, 'id')
+        if var_name is None:
+            return None
+        if var_name == rbql_star_marker:
+            return QueryColumnInfo(table_name=None, column_index=None, column_name=None, is_star=True)
+        good_column_name_rgx = '^([ab])([0-9][0-9]*)$'
+        match_obj = re.match(good_column_name_rgx, var_name)
+        if match_obj is not None:
+            table_name = match_obj.group(1)
+            column_index = int(match_obj.group(2)) - 1
+            return QueryColumnInfo(table_name=table_name, column_index=column_index, column_name=None, is_star=False)
+        # Some examples for this branch: NR, NF
+        return QueryColumnInfo(table_name=None, column_index=None, column_name=var_name, is_star=False)
+    if isinstance(root, ast.Attribute):
+        column_name = get_field(root, 'attr')
+        if not column_name:
+            return None
+        if not is_str6(column_name):
+            return None
+        var_root = get_field(root, 'value')
+        if not isinstance(var_root, ast.Name):
+            return None
+        table_name = get_field(var_root, 'id')
+        if table_name is None or table_name not in ['a', 'b']:
+            return None
+        if column_name == rbql_star_marker:
+            return QueryColumnInfo(table_name=table_name, column_index=None, column_name=None, is_star=True)
+        return QueryColumnInfo(table_name=None, column_index=None, column_name=column_name, is_star=False)
+    if isinstance(root, ast.Subscript):
+        var_root = get_field(root, 'value')
+        if not isinstance(var_root, ast.Name):
+            return None
+        table_name = get_field(var_root, 'id')
+        if table_name is None or table_name not in ['a', 'b']:
+            return None
+        slice_root = get_field(root, 'slice')
+        if slice_root is None or not isinstance(slice_root, ast.Index):
+            return None
+        slice_val_root = get_field(slice_root, 'value')
+        column_index = None
+        column_name = None
+        if isinstance(slice_val_root, ast.Str):
+            column_name = get_field(slice_val_root, 's')
+            table_name = None # We don't need table name for named fields
+        elif isinstance(slice_val_root, ast.Num):
+            column_index = get_field(slice_val_root, 'n') - 1
+        else:
+            return None
+        return QueryColumnInfo(table_name=table_name, column_index=column_index, column_name=column_name, is_star=False)
+    return None
+
+
+def ast_parse_select_expression_to_column_infos(select_expression):
+    root = ast.parse(select_expression)
+    children = list(ast.iter_child_nodes(root))
+    if 'body' not in root._fields:
+        raise RbqlParsingError('Unable to parse SELECT expression (error code #117)') # Should never happen
+    if len(children) != 1:
+        raise RbqlParsingError('Unable to parse SELECT expression (error code #118)') # Should never happen
+    root = children[0]
+    children = list(ast.iter_child_nodes(root))
+    if len(children) != 1:
+        raise RbqlParsingError('Unable to parse SELECT expression (error code #119): "{}"'.format(select_expression)) # This can be triggered with `SELECT a = 100`
+    root = children[0]
+    if isinstance(root, ast.Tuple):
+        column_expression_trees = root.elts
+        column_infos = [column_info_from_node(ct) for ct in column_expression_trees]
+    else:
+        column_infos = [column_info_from_node(root)]
+    return column_infos
+
+
 def iteritems6(x):
     if PY3:
         return x.items()
@@ -228,9 +325,7 @@ def __init__(self, start_with_int):
     def parse(self, val):
         if not self.string_detection_done:
             self.string_detection_done = True
-            if PY3 and isinstance(val, str):
-                self.is_str = True
-            if not PY3 and isinstance(val, basestring):
+            if is_str6(val):
                 self.is_str = True
         if not self.is_str:
             return val
@@ -406,14 +501,12 @@ def init_aggregator(generator_name, val, post_proc=None):
 def MIN(val):
     return init_aggregator(MinAggregator, val) if query_context.aggregation_stage < 2 else val
 
-# min = MIN - see the mad max copypaste below
 Min = MIN
 
 
 def MAX(val):
     return init_aggregator(MaxAggregator, val) if query_context.aggregation_stage < 2 else val
 
-# max = MAX - see the mad max copypaste below
 Max = MAX
 
 
@@ -427,7 +520,6 @@ def COUNT(_val):
 def SUM(val):
     return init_aggregator(SumAggregator, val) if query_context.aggregation_stage < 2 else val
 
-# sum = SUM - see the mad max copypaste below
 Sum = SUM
 
 
@@ -459,9 +551,7 @@ def ARRAY_AGG(val, post_proc=None):
 array_agg = ARRAY_AGG
 
 
-
-
-# Redefining builtin max, min and sum. See test_max_max.py unit test for explanation
+# Redefining builtin max, min and sum
 builtin_max = max
 builtin_min = min
 builtin_sum = sum
@@ -895,7 +985,7 @@ def strip_comments(cline):
 
 def combine_string_literals(backend_expression, string_literals):
     for i in range(len(string_literals)):
-        backend_expression = backend_expression.replace('###RBQL_STRING_LITERAL{}###'.format(i), string_literals[i])
+        backend_expression = backend_expression.replace('___RBQL_STRING_LITERAL{}___'.format(i), string_literals[i])
     return backend_expression
 
 
@@ -1044,6 +1134,7 @@ def ensure_no_ambiguous_variables(query_text, input_column_names, join_column_na
 def generate_common_init_code(query_text, variable_prefix):
     assert variable_prefix in ['a', 'b']
     result = list()
+    # TODO [PERFORMANCE] do not initialize RBQLRecord if we don't have `a.` or `a[` prefix in the query
     result.append('{} = RBQLRecord()'.format(variable_prefix))
     base_var = 'NR' if variable_prefix == 'a' else 'bNR'
     attr_var = '{}.NR'.format(variable_prefix)
@@ -1086,6 +1177,21 @@ def replace_star_vars(rbql_expression):
     return result
 
 
+def replace_star_vars_for_ast(rbql_expression):
+    star_matches = list(re.finditer(r'(?:(?<=^)|(?<=,)) *(\*|a\.\*|b\.\*) *(?=$|,)', rbql_expression))
+    last_pos = 0
+    result = ''
+    for match in star_matches:
+        star_expression = match.group(1)
+        replacement_expression = {'*': '__RBQL_INTERNAL_STAR', 'a.*': 'a.__RBQL_INTERNAL_STAR', 'b.*': 'b.__RBQL_INTERNAL_STAR'}[star_expression]
+        if last_pos < match.start():
+            result += rbql_expression[last_pos:match.start()]
+        result += replacement_expression
+        last_pos = match.end()
+    result += rbql_expression[last_pos:]
+    return result
+
+
 def translate_update_expression(update_expression, input_variables_map, string_literals):
     assignment_looking_rgx = re.compile(r'(?:^|,) *(a[.#a-zA-Z0-9\[\]_]*) *=(?=[^=])')
     update_expressions = []
@@ -1110,12 +1216,12 @@ def translate_update_expression(update_expression, input_variables_map, string_l
 
 
 def translate_select_expression(select_expression):
-    translated = replace_star_count(select_expression)
-    translated = replace_star_vars(translated)
-    translated = translated.strip()
+    expression_without_stars = replace_star_count(select_expression)
+    translated = replace_star_vars(expression_without_stars).strip()
+    translated_for_ast = replace_star_vars_for_ast(expression_without_stars).strip()
     if not len(translated):
         raise RbqlParsingError('"SELECT" expression is empty') # UT JSON
-    return '[{}]'.format(translated)
+    return ('[{}]'.format(translated), translated_for_ast)
 
 
 def separate_string_literals(rbql_expression):
@@ -1129,7 +1235,7 @@ def separate_string_literals(rbql_expression):
         literal_id = len(string_literals)
         string_literals.append(m.group(0))
         format_parts.append(rbql_expression[idx_before:m.start()])
-        format_parts.append('###RBQL_STRING_LITERAL{}###'.format(literal_id))
+        format_parts.append('___RBQL_STRING_LITERAL{}___'.format(literal_id))
         idx_before = m.end()
     format_parts.append(rbql_expression[idx_before:])
     format_expression = ''.join(format_parts)
@@ -1168,8 +1274,13 @@ def separate_actions(rbql_expression):
     # TODO add more checks:
     # make sure all rbql_expression was separated and SELECT or UPDATE is at the beginning
     rbql_expression = rbql_expression.strip(' ')
-    ordered_statements = locate_statements(rbql_expression)
     result = dict()
+    # For now support no more than one query modifier per query
+    mobj = re.match('^(.*)  *[Ww][Ii][Tt][Hh] *\(([a-z]{4,20})\) *$', rbql_expression)
+    if mobj is not None:
+        rbql_expression = mobj.group(1)
+        result[WITH] = mobj.group(2)
+    ordered_statements = locate_statements(rbql_expression)
     for i in range(len(ordered_statements)):
         statement_start = ordered_statements[i][0]
         span_start = ordered_statements[i][1]
@@ -1217,7 +1328,8 @@ def separate_actions(rbql_expression):
         result[statement] = statement_params
     if SELECT not in result and UPDATE not in result:
         raise RbqlParsingError('Query must contain either SELECT or UPDATE statement') # UT JSON
-    assert (SELECT in result) != (UPDATE in result)
+    if SELECT in result and UPDATE in result:
+        raise RbqlParsingError('Query can not contain both SELECT and UPDATE statements')
     return result
 
 
@@ -1230,7 +1342,7 @@ def find_top(rb_actions):
     return rb_actions[SELECT].get('top', None)
 
 
-def translate_except_expression(except_expression, input_variables_map, string_literals):
+def translate_except_expression(except_expression, input_variables_map, string_literals, input_header):
     skip_vars = except_expression.split(',')
     skip_vars = [v.strip() for v in skip_vars]
     skip_indices = list()
@@ -1241,8 +1353,9 @@ def translate_except_expression(except_expression, input_variables_map, string_l
             raise RbqlParsingError('Unknown field in EXCEPT expression: "{}"'.format(var_name)) # UT JSON
         skip_indices.append(var_info.index)
     skip_indices = sorted(skip_indices)
+    output_header = None if input_header is None else select_except(input_header, skip_indices)
     skip_indices = [str(v) for v in skip_indices]
-    return 'select_except(record_a, [{}])'.format(','.join(skip_indices))
+    return (output_header, 'select_except(record_a, [{}])'.format(','.join(skip_indices)))
 
 
 class HashJoinMap:
@@ -1310,13 +1423,46 @@ def remove_redundant_input_table_name(query_text):
     return query_text
 
 
+def select_output_header(input_header, join_header, query_column_infos):
+    if input_header is None and join_header is None:
+        return None
+    if input_header is None:
+        input_header = []
+    if join_header is None:
+        join_header = []
+    output_header = []
+    for qci in query_column_infos:
+        if qci is None:
+            output_header.append('col{}'.format(len(output_header) + 1))
+        elif qci.is_star:
+            if qci.table_name is None:
+                output_header += input_header + join_header
+            elif qci.table_name == 'a':
+                output_header += input_header
+            elif qci.table_name == 'b':
+                output_header += join_header
+        elif qci.column_name is not None:
+            output_header.append(qci.column_name)
+        elif qci.column_index is not None:
+            if qci.table_name == 'a' and qci.column_index < len(input_header):
+                output_header.append(input_header[qci.column_index])
+            elif qci.table_name == 'b' and qci.column_index < len(join_header):
+                output_header.append(join_header[qci.column_index])
+            else:
+                output_header.append('col{}'.format(len(output_header) + 1))
+        else: # Should never happen
+            output_header.append('col{}'.format(len(output_header) + 1))
+    return output_header
+
+
 def shallow_parse_input_query(query_text, input_iterator, join_tables_registry, query_context):
     query_text = cleanup_query(query_text)
     format_expression, string_literals = separate_string_literals(query_text)
     format_expression = remove_redundant_input_table_name(format_expression)
     input_variables_map = input_iterator.get_variables_map(query_text)
-
     rb_actions = separate_actions(format_expression)
+    if WITH in rb_actions:
+        input_iterator.handle_query_modifier(rb_actions[WITH])
 
     if ORDER_BY in rb_actions and UPDATE in rb_actions:
         raise RbqlParsingError('"ORDER BY" is not allowed in "UPDATE" queries') # UT JSON
@@ -1328,6 +1474,7 @@ def shallow_parse_input_query(query_text, input_iterator, join_tables_registry,
 
 
     join_variables_map = None
+    join_header = None
     if JOIN in rb_actions:
         rhs_table_id, variable_pairs = parse_join_expression(rb_actions[JOIN]['text'])
         if join_tables_registry is None:
@@ -1335,7 +1482,10 @@ def shallow_parse_input_query(query_text, input_iterator, join_tables_registry,
         join_record_iterator = join_tables_registry.get_iterator_by_table_id(rhs_table_id)
         if join_record_iterator is None:
             raise RbqlParsingError('Unable to find join table: "{}"'.format(rhs_table_id)) # UT JSON CSV
+        if WITH in rb_actions:
+            join_record_iterator.handle_query_modifier(rb_actions[WITH])
         join_variables_map = join_record_iterator.get_variables_map(query_text)
+        join_header = join_record_iterator.get_header()
 
         lhs_variables, rhs_indices = resolve_join_variables(input_variables_map, join_variables_map, variable_pairs, string_literals)
         joiner_type = {JOIN: InnerJoiner, INNER_JOIN: InnerJoiner, LEFT_OUTER_JOIN: LeftJoiner, LEFT_JOIN: LeftJoiner, STRICT_LEFT_JOIN: StrictLeftJoiner}[rb_actions[JOIN]['join_subtype']]
@@ -1349,7 +1499,7 @@ def shallow_parse_input_query(query_text, input_iterator, join_tables_registry,
 
     if WHERE in rb_actions:
         where_expression = rb_actions[WHERE]['text']
-        if re.search(r'[^!=]=[^=]', where_expression) is not None:
+        if re.search(r'[^><!=]=[^=]', where_expression) is not None:
             raise RbqlParsingError('Assignments "=" are not allowed in "WHERE" expressions. For equality test use "=="') # UT JSON
         query_context.where_expression = combine_string_literals(where_expression, string_literals)
 
@@ -1357,20 +1507,29 @@ def shallow_parse_input_query(query_text, input_iterator, join_tables_registry,
     if UPDATE in rb_actions:
         update_expression = translate_update_expression(rb_actions[UPDATE]['text'], input_variables_map, string_literals)
         query_context.update_expressions = combine_string_literals(update_expression, string_literals)
+        query_context.writer.set_header(input_iterator.get_header())
 
 
     if SELECT in rb_actions:
         query_context.top_count = find_top(rb_actions)
+
+        if EXCEPT in rb_actions:
+            output_header, select_expression = translate_except_expression(rb_actions[EXCEPT]['text'], input_variables_map, string_literals, input_iterator.get_header())
+        else:
+            select_expression, select_expression_for_ast = translate_select_expression(rb_actions[SELECT]['text'])
+            select_expression = combine_string_literals(select_expression, string_literals)
+            # We need to add string literals back in order to have relevant errors in case of exceptions during parsing
+            combined_select_expression_for_ast = combine_string_literals(select_expression_for_ast, string_literals)
+            column_infos = ast_parse_select_expression_to_column_infos(combined_select_expression_for_ast)
+            output_header = select_output_header(input_iterator.get_header(), join_header, column_infos)
+        query_context.select_expression = select_expression
+        query_context.writer.set_header(output_header)
+
         query_context.writer = TopWriter(query_context.writer)
         if 'distinct_count' in rb_actions[SELECT]:
             query_context.writer = UniqCountWriter(query_context.writer)
         elif 'distinct' in rb_actions[SELECT]:
             query_context.writer = UniqWriter(query_context.writer)
-        if EXCEPT in rb_actions:
-            query_context.select_expression = translate_except_expression(rb_actions[EXCEPT]['text'], input_variables_map, string_literals)
-        else:
-            select_expression = translate_select_expression(rb_actions[SELECT]['text'])
-            query_context.select_expression = combine_string_literals(select_expression, string_literals)
 
     if ORDER_BY in rb_actions:
         query_context.sort_key_expression = '({})'.format(combine_string_literals(rb_actions[ORDER_BY]['text'], string_literals))
@@ -1400,7 +1559,50 @@ def query(query_text, input_iterator, output_writer, output_warnings, join_table
     output_warnings.extend(output_writer.get_warnings())
 
 
-class TableIterator:
+class RBQLInputIterator:
+    def get_variables_map(self, query_text):
+        raise NotImplementedError('Unable to call the interface method')
+
+    def get_record(self):
+        raise NotImplementedError('Unable to call the interface method')
+
+    def handle_query_modifier(self, modifier_name):
+        # Reimplement if you need to handle a boolean query modifier that can be used like this: `SELECT * WITH (modifiername)`
+        pass
+
+    def get_warnings(self):
+        return [] # Reimplement if your class can produce warnings
+
+    def get_header(self):
+        return None # Reimplement if your class can provide input header
+
+
+class RBQLOutputWriter:
+    def write(self, fields):
+        raise NotImplementedError('Unable to call the interface method')
+
+    def finish(self):
+        pass # Reimplement if your class needs to do something on finish e.g. cleanup
+
+    def get_warnings(self):
+        return [] # Reimplement if your class can produce warnings
+
+    def set_header(self, header):
+        pass # Reimplement if your class can handle output headers in a meaningful way
+
+
+class RBQLTableRegistry:
+    def get_iterator_by_table_id(self, table_id):
+        raise NotImplementedError('Unable to call the interface method')
+
+    def finish(self):
+        pass # Reimplement if your class needs to do something on finish e.g. cleanup
+
+    def get_warnings(self):
+        return [] # Reimplement if your class can produce warnings
+
+
+class TableIterator(RBQLInputIterator):
     def __init__(self, table, column_names=None, normalize_column_names=True, variable_prefix='a'):
         self.table = table
         self.column_names = column_names
@@ -1438,23 +1640,24 @@ def get_warnings(self):
             return [make_inconsistent_num_fields_warning('input', self.fields_info)]
         return []
 
+    def get_header(self):
+        return self.column_names
+
 
-class TableWriter:
+class TableWriter(RBQLOutputWriter):
     def __init__(self, external_table):
         self.table = external_table
+        self.header = None
 
     def write(self, fields):
         self.table.append(fields)
         return True
 
-    def finish(self):
-        pass
-
-    def get_warnings(self):
-        return []
+    def set_header(self, header):
+        self.header = header
 
 
-class SingleTableRegistry:
+class SingleTableRegistry(RBQLTableRegistry):
     def __init__(self, table, column_names=None, normalize_column_names=True, table_name='b'):
         self.table = table
         self.column_names = column_names
@@ -1467,16 +1670,21 @@ def get_iterator_by_table_id(self, table_id):
         return TableIterator(self.table, self.column_names, self.normalize_column_names, 'b')
 
 
-def query_table(query_text, input_table, output_table, output_warnings, join_table=None, input_column_names=None, join_column_names=None, normalize_column_names=True, user_init_code=''):
+def query_table(query_text, input_table, output_table, output_warnings, join_table=None, input_column_names=None, join_column_names=None, output_column_names=None, normalize_column_names=True, user_init_code=''):
     if not normalize_column_names and input_column_names is not None and join_column_names is not None:
         ensure_no_ambiguous_variables(query_text, input_column_names, join_column_names)
     input_iterator = TableIterator(input_table, input_column_names, normalize_column_names)
     output_writer = TableWriter(output_table)
     join_tables_registry = None if join_table is None else SingleTableRegistry(join_table, join_column_names, normalize_column_names)
     query(query_text, input_iterator, output_writer, output_warnings, join_tables_registry, user_init_code=user_init_code)
+    if output_column_names is not None:
+        assert len(output_column_names) == 0, '`output_column_names` param must be an empty list or None'
+        if output_writer.header is not None:
+            for column_name in output_writer.header:
+                output_column_names.append(column_name)
 
 
-def set_debug_mode():
+def set_debug_mode(new_value=True):
     global debug_mode
-    debug_mode = True
+    debug_mode = new_value
 
diff --git a/rbql/rbql_main.py b/rbql/rbql_main.py
index 26bb769..6123493 100755
--- a/rbql/rbql_main.py
+++ b/rbql/rbql_main.py
@@ -8,17 +8,23 @@
 
 from . import csv_utils
 from . import rbql_csv
+from . import rbql_sqlite
 from . import rbql_engine
 from . import _version
 
+# TODO support sqlite input join on both sqlite and csv tables - pass 2 join registries
+# TODO add demo gif to python package README.md for pypi website
 
-PY3 = sys.version_info[0] == 3
+# TODO add --output_header param 
+# TODO add option to write to other sqlite dbs
 
-# TODO add demo gif to python package README.md for pypi website
+# TODO add an option to align columns for content preview. This would be especially useful for Windows which doesn't support terminal colors
 
 
-history_path = os.path.join(os.path.expanduser("~"), ".rbql_py_query_history")
+PY3 = sys.version_info[0] == 3
+
 
+history_path = os.path.join(os.path.expanduser("~"), ".rbql_py_query_history")
 
 polymorphic_input = input if PY3 else raw_input
 
@@ -45,42 +51,43 @@ def get_default_policy(delim):
 
 def show_error(error_type, error_msg, is_interactive):
     if is_interactive:
-        full_msg = '{}Error [{}]:{} {}'.format('\u001b[31;1m', error_type, '\u001b[0m', error_msg)
-        print(full_msg)
+        if os.name == 'nt': # Windows does not support terminal colors
+            print('Error [{}]: {}'.format(error_type, error_msg))
+        else:
+            print('{}Error [{}]:{} {}'.format('\u001b[31;1m', error_type, '\u001b[0m', error_msg))
     else:
         eprint('Error [{}]: {}'.format(error_type, error_msg))
 
 
 def show_warning(msg, is_interactive):
     if is_interactive:
-        full_msg = '{}Warning:{} {}'.format('\u001b[33;1m', '\u001b[0m', msg)
-        print(full_msg)
+        if os.name == 'nt': # Windows does not support terminal colors
+            print('Warning: ' + msg)
+        else:
+            print('{}Warning:{} {}'.format('\u001b[33;1m', '\u001b[0m', msg))
     else:
         eprint('Warning: ' + msg)
 
 
-def run_with_python(args, is_interactive):
+def run_with_python_csv(args, is_interactive):
     if args.debug_mode:
         rbql_csv.set_debug_mode()
     delim = rbql_csv.normalize_delim(args.delim)
     policy = args.policy if args.policy is not None else get_default_policy(delim)
     query = args.query
-    skip_header = args.skip_header
+    with_headers = args.with_headers
     input_path = args.input
     output_path = args.output
-    init_source_file = args.init_source_file
     csv_encoding = args.encoding
     args.output_delim, args.output_policy = (delim, policy) if args.out_format == 'input' else rbql_csv.interpret_named_csv_format(args.out_format)
     out_delim, out_policy = args.output_delim, args.output_policy
 
-    user_init_code = ''
-    if init_source_file is not None:
-        user_init_code = rbql_csv.read_user_init_code(init_source_file)
+    user_init_code = rbql_csv.read_user_init_code(args.init_source_file) if args.init_source_file is not None else ''
 
     warnings = []
     error_type, error_msg = None, None
     try:
-        rbql_csv.query_csv(query, input_path, delim, policy, output_path, out_delim, out_policy, csv_encoding, warnings, skip_header, user_init_code, args.color)
+        rbql_csv.query_csv(query, input_path, delim, policy, output_path, out_delim, out_policy, csv_encoding, warnings, with_headers, args.comment_prefix, user_init_code, args.color)
     except Exception as e:
         if args.debug_mode:
             raise
@@ -97,6 +104,35 @@ def run_with_python(args, is_interactive):
     return success
 
 
+def run_with_python_sqlite(args, is_interactive):
+    import sqlite3
+    user_init_code = rbql_csv.read_user_init_code(args.init_source_file) if args.init_source_file is not None else ''
+
+    warnings = []
+    error_type, error_msg = None, None
+    try:
+        # TODO open in readonly mode
+        db_connection = sqlite3.connect(args.database)
+        if args.debug_mode:
+            rbql_engine.set_debug_mode()
+        rbql_sqlite.query_sqlite_to_csv(args.query, db_connection, args.input, args.output, args.output_delim, args.output_policy, args.encoding, warnings, user_init_code, args.color)
+    except Exception as e:
+        if args.debug_mode:
+            raise
+        error_type, error_msg = rbql_engine.exception_to_error_info(e)
+    finally:
+        db_connection.close()
+
+    if error_type is None:
+        success = True
+        for warning in warnings:
+            show_warning(warning, is_interactive)
+    else:
+        success = False
+        show_error(error_type, error_msg, is_interactive)
+
+    return success
+
 
 def is_delimited_table(sampled_lines, delim, policy):
     if len(sampled_lines) < 2:
@@ -113,11 +149,11 @@ def is_delimited_table(sampled_lines, delim, policy):
     return True
 
 
-def sample_lines(src_path, encoding, delim, policy):
-    # FIXME this should be an independent function, remove sample line functionality from record iterator
+def sample_lines(src_path, encoding, delim, policy, comment_prefix=None):
+    # TODO this should be a dependency-free function, remove sample line functionality from CSVRecordIterator
     result = []
     with open(src_path, 'rb') as source:
-        line_iterator = rbql_csv.CSVRecordIterator(source, encoding, delim=delim, policy=policy, line_mode=True)
+        line_iterator = rbql_csv.CSVRecordIterator(source, encoding, delim=delim, policy=policy, line_mode=True, comment_prefix=comment_prefix)
         for _i in polymorphic_xrange(10):
             line = line_iterator.polymorphic_get_row()
             if line is None:
@@ -126,8 +162,8 @@ def sample_lines(src_path, encoding, delim, policy):
         return result
 
 
-def autodetect_delim_policy(input_path, encoding):
-    sampled_lines = sample_lines(input_path, encoding, None, None)
+def autodetect_delim_policy(input_path, encoding, comment_prefix=None):
+    sampled_lines = sample_lines(input_path, encoding, None, None, comment_prefix)
     autodetection_dialects = [('\t', 'simple'), (',', 'quoted'), (';', 'quoted'), ('|', 'simple')]
     for delim, policy in autodetection_dialects:
         if is_delimited_table(sampled_lines, delim, policy):
@@ -139,23 +175,27 @@ def autodetect_delim_policy(input_path, encoding):
     return (None, None)
 
 
-def sample_records(input_path, delim, policy, encoding):
+def sample_records(input_path, delim, policy, encoding, comment_prefix=None):
     with open(input_path, 'rb') as source:
-        record_iterator = rbql_csv.CSVRecordIterator(source, encoding, delim=delim, policy=policy)
+        record_iterator = rbql_csv.CSVRecordIterator(source, encoding, delim=delim, policy=policy, comment_prefix=comment_prefix)
         sampled_records = record_iterator.get_all_records(num_rows=10);
         warnings = record_iterator.get_warnings()
         return (sampled_records, warnings)
 
 
-def print_colorized(records, delim, encoding, show_column_names, skip_header):
+def print_colorized(records, delim, encoding, show_column_names, with_headers):
     # TODO consider colorizing a1,a2,... in different default color
-    reset_color_code = '\u001b[0m'
-    color_codes = ['\u001b[0m', '\u001b[31m', '\u001b[32m', '\u001b[33m', '\u001b[34m', '\u001b[35m', '\u001b[36m', '\u001b[31;1m', '\u001b[32;1m', '\u001b[33;1m']
+    if os.name == 'nt': # Windows does not support terminal colors
+        reset_color_code = ''
+        color_codes = ['']
+    else:
+        reset_color_code = '\u001b[0m'
+        color_codes = ['\u001b[0m', '\u001b[31m', '\u001b[32m', '\u001b[33m', '\u001b[34m', '\u001b[35m', '\u001b[36m', '\u001b[31;1m', '\u001b[32;1m', '\u001b[33;1m']
     for rnum, record in enumerate(records):
         out_fields = []
         for i, field in enumerate(record):
             color_code = color_codes[i % len(color_codes)]
-            if not show_column_names or (skip_header and rnum == 0):
+            if not show_column_names or (with_headers and rnum == 0):
                 colored_field = '{}{}'.format(color_code, field)
             else:
                 colored_field = '{}a{}:{}'.format(color_code, i + 1, field)
@@ -176,11 +216,15 @@ def get_default_output_path(input_path, delim):
     return input_path + '.txt'
 
 
-def run_interactive_loop(args):
-    import readline
-    if os.path.exists(history_path):
-        readline.read_history_file(history_path)
-    readline.set_history_length(100)
+def run_interactive_loop(mode, args):
+    assert mode in ['csv', 'sqlite']
+    try:
+        import readline # Module readline is not available on Windows
+        if os.path.exists(history_path):
+            readline.read_history_file(history_path)
+        readline.set_history_length(100)
+    except Exception:
+        pass
     while True:
         try:
             query = polymorphic_input('Input SQL-like RBQL query and press Enter:\n> ')
@@ -190,20 +234,87 @@ def run_interactive_loop(args):
             break # Ctrl-D
         if not len(query):
             break
-        readline.write_history_file(history_path)
+        try:
+            readline.write_history_file(history_path) # This can fail sometimes for no valid reason
+        except Exception:
+            pass
         args.query = query
-        success = run_with_python(args, is_interactive=True)
+        if mode == 'csv':
+            success = run_with_python_csv(args, is_interactive=True)
+        else:
+            success = run_with_python_sqlite(args, is_interactive=True)
         if success:
             print('\nOutput table preview:')
             print('====================================')
-            records, _warnings = sample_records(args.output, args.output_delim, args.output_policy, args.encoding)
-            print_colorized(records, args.output_delim, args.encoding, show_column_names=False, skip_header=False)
+            records, _warnings = sample_records(args.output, args.output_delim, args.output_policy, args.encoding, comment_prefix=None)
+            print_colorized(records, args.output_delim, args.encoding, show_column_names=False, with_headers=False)
             print('====================================')
             print('Success! Result table was saved to: ' + args.output)
             break
 
 
-def start_preview_mode(args):
+def sample_records_sqlite(db_connection, table_name):
+    import sqlite3
+    record_iterator = rbql_sqlite.SqliteRecordIterator(db_connection, table_name)
+    records = []
+    records.append(record_iterator.get_column_names())
+    records += record_iterator.get_all_records(num_rows=10)
+    return records
+
+
+def read_table_names(db_connection):
+    cursor = db_connection.cursor()
+    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
+    table_names = [r[0] for r in cursor.fetchall()]
+    return table_names
+
+
+def select_table_name_by_user_choice(db_connection):
+    table_names = read_table_names(db_connection)
+    max_to_show = 20
+    if len(table_names) > max_to_show:
+        print('Database has {} tables, showing top {}:'.format(len(table_names), max_to_show))
+    else:
+        print('Showing database tables:')
+    print(', '.join(table_names[:max_to_show]))
+    table_name = polymorphic_input('No input table was provided as a CLI argument, please type in the table name to use:\n> ')
+    table_name = table_name.strip()
+    while table_name not in table_names:
+        table_name = polymorphic_input('"{}" is not a valid table name. Please enter a valid table name:\n> '.format(table_name))
+        table_name = table_name.strip()
+    return table_name
+
+
+def start_preview_mode_sqlite(args):
+    import sqlite3
+    db_path = args.database
+    db_connection = sqlite3.connect(db_path)
+    if not args.input:
+        args.input = select_table_name_by_user_choice(db_connection)
+    try:
+        records = sample_records_sqlite(db_connection, table_name=args.input)
+    except Exception as e:
+        if args.debug_mode:
+            raise
+        error_type, error_msg = rbql_engine.exception_to_error_info(e)
+        show_error(error_type, 'Unable to sample preview records: {}'.format(error_msg), is_interactive=True)
+        sys.exit(1)
+    db_connection.close()
+
+    print('Input table preview:')
+    print('====================================')
+    print_colorized(records, '|', args.encoding, show_column_names=True, with_headers=False)
+    print('====================================\n')
+    if args.output is None:
+        args.output = get_default_output_path('rbql_sqlite_rs', args.output_delim)
+        show_warning('Output path was not provided. Result set will be saved as: ' + args.output, is_interactive=True)
+    try:
+        run_interactive_loop('sqlite', args)
+    except KeyboardInterrupt:
+        print()
+
+
+def start_preview_mode_csv(args):
     input_path = args.input
     if not input_path:
         show_error('generic', 'Input file must be provided in interactive mode. You can use stdin input only in non-interactive mode', is_interactive=True)
@@ -215,16 +326,16 @@ def start_preview_mode(args):
         delim = rbql_csv.normalize_delim(args.delim)
         policy = args.policy if args.policy is not None else get_default_policy(delim)
     else:
-        delim, policy = autodetect_delim_policy(input_path, args.encoding)
+        delim, policy = autodetect_delim_policy(input_path, args.encoding, args.comment_prefix)
         if delim is None:
             show_error('generic', 'Unable to autodetect table delimiter. Provide column separator explicitly with "--delim" option', is_interactive=True)
             return
         args.delim = delim
         args.policy = policy
-    records, warnings = sample_records(input_path, delim, policy, args.encoding)
+    records, warnings = sample_records(input_path, delim, policy, args.encoding, args.comment_prefix)
     print('Input table preview:')
     print('====================================')
-    print_colorized(records, delim, args.encoding, show_column_names=True, skip_header=args.skip_header)
+    print_colorized(records, delim, args.encoding, show_column_names=True, with_headers=args.with_headers)
     print('====================================\n')
     for warning in warnings:
         show_warning(warning, is_interactive=True)
@@ -232,22 +343,30 @@ def start_preview_mode(args):
         args.output = get_default_output_path(input_path, delim)
         show_warning('Output path was not provided. Result set will be saved as: ' + args.output, is_interactive=True)
     try:
-        run_interactive_loop(args)
+        run_interactive_loop('csv', args)
     except KeyboardInterrupt:
         print()
 
 
-tool_description = '''
-Run RBQL queries against CSV files and data streams
+csv_tool_description = '''
+Run RBQL queries against CSV files, sqlite databases
+
+rbql supports two modes: non-interactive (with "--query" option) and interactive (without "--query" option)
 
-rbql-py supports two modes: non-interactive (with "--query" option) and interactive (without "--query" option)
 Interactive mode shows source table preview which makes query editing much easier. Usage example:
-  $ rbql-py --input input.csv
-Non-interactive mode supports reading input tables from stdin. Usage example:
-  $ rbql-py --query "select a1, a2 order by a1" --delim , < input.csv
+  $ rbql --input input.csv
+
+Non-interactive mode supports reading input tables from stdin and writing output to stdout. Usage example:
+  $ rbql --query "select a1, a2 order by a1" --delim , < input.csv
+
+By default rbql works with CSV input files.
+To learn how to use rbql to query an sqlite database, run this command:
+
+  $ rbql sqlite --help
+
 '''
 
-epilog = '''
+csv_epilog = '''
 Description of the available CSV split policies:
   * "simple" - RBQL uses simple split() function and doesn't perform special handling of double quote characters
   * "quoted" - Separator can be escaped inside double-quoted fields. Double quotes inside double-quoted fields must be doubled
@@ -257,17 +376,18 @@ def start_preview_mode(args):
 '''
 
 
-def main():
-    parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description=tool_description, epilog=epilog)
+def csv_main():
+    parser = argparse.ArgumentParser(prog='rbql [csv]', formatter_class=argparse.RawDescriptionHelpFormatter, description=csv_tool_description, epilog=csv_epilog)
     parser.add_argument('--input', metavar='FILE', help='read csv table from FILE instead of stdin. Required in interactive mode')
     parser.add_argument('--delim', help='delimiter character or multicharacter string, e.g. "," or "###". Can be autodetected in interactive mode')
     parser.add_argument('--policy', help='CSV split policy, see the explanation below. Can be autodetected in interactive mode', choices=policy_names)
-    parser.add_argument('--skip-header', action='store_true', help='skip header line in input and join tables. Roughly equivalent of ... WHERE NR > 1 ... in your Query')
+    parser.add_argument('--with-headers', action='store_true', help='indicates that input (and join) table has header')
+    parser.add_argument('--comment-prefix', metavar='PREFIX', help='ignore lines in input and join tables that start with the comment PREFIX, e.g. "#" or ">>"')
     parser.add_argument('--query', help='query string in rbql. Run in interactive mode if empty')
     parser.add_argument('--out-format', help='output format', default='input', choices=out_format_names)
     parser.add_argument('--encoding', help='manually set csv encoding', default=rbql_csv.default_csv_encoding, choices=['latin-1', 'utf-8'])
     parser.add_argument('--output', metavar='FILE', help='write output table to FILE instead of stdout')
-    parser.add_argument('--color', action='store_true', help='colorize columns in output in non-interactive mode. Do NOT use if redirecting output to a file')
+    parser.add_argument('--color', action='store_true', help='colorize columns in output in non-interactive mode')
     parser.add_argument('--version', action='store_true', help='print RBQL version and exit')
     parser.add_argument('--init-source-file', metavar='FILE', help=argparse.SUPPRESS) # Path to init source file to use instead of ~/.rbql_init_source.py
     parser.add_argument('--debug-mode', action='store_true', help=argparse.SUPPRESS) # Run in debug mode
@@ -277,6 +397,14 @@ def main():
         print(_version.__version__)
         return
 
+    if args.color and os.name == 'nt':
+        show_error('generic', '--color option is not supported for Windows terminals', is_interactive=False)
+        sys.exit(1)
+
+    if args.output is not None and args.color:
+        show_error('generic', '"--output" is not compatible with "--color" option', is_interactive=False)
+        sys.exit(1)
+
     if args.policy == 'monocolumn':
         args.delim = ''
 
@@ -284,33 +412,108 @@ def main():
         show_error('generic', 'Using "--policy" without "--delim" is not allowed', is_interactive=False)
         sys.exit(1)
 
-    if args.output is not None and args.color:
-        show_error('generic', '"--output" is not compatible with "--color" option', is_interactive=False)
-        sys.exit(1)
-
     if args.encoding != 'latin-1' and not PY3:
         if args.delim is not None:
             args.delim = args.delim.decode(args.encoding)
         if args.query is not None:
             args.query = args.query.decode(args.encoding)
 
-    if args.query:
+    is_interactive_mode = args.query is None
+    if is_interactive_mode:
+        if args.color:
+            show_error('generic', '"--color" option is not compatible with interactive mode. Output and Input files preview would be colorized anyway', is_interactive=False)
+            sys.exit(1)
+        start_preview_mode_csv(args)
+    else:
         if args.delim is None:
             show_error('generic', 'Separator must be provided with "--delim" option in non-interactive mode', is_interactive=False)
             sys.exit(1)
-        success = run_with_python(args, is_interactive=False)
-        if not success:
+        if not run_with_python_csv(args, is_interactive=False):
             sys.exit(1)
-    else:
+
+
+sqlite_tool_description = '''
+Run RBQL queries against sqlite databases
+Although sqlite database can serve as an input data source, the query engine which will be used is RBQL (not sqlite).
+Result set will be written to a csv file. This is also true for UPDATE queries because in RBQL UPDATE is just a special case of SELECT.
+
+rbql sqlite supports two modes: non-interactive (with "--query" option) and interactive (without "--query" option)
+
+Interactive mode shows source table preview which makes query editing much easier.
+  $ rbql sqlite path/to/database.sqlite
+
+Non-interactive mode supports reading input tables from stdin and writing output to stdout. Usage example:
+  $ rbql sqlite path/to/database.sqlite --input Employee --query "select top 20 a1, random.random(), a.salary // 1000 order by a.emp_id"
+
+'''
+
+
+def sqlite_main():
+    parser = argparse.ArgumentParser(prog='rbql sqlite', formatter_class=argparse.RawDescriptionHelpFormatter, description=sqlite_tool_description)
+    parser.add_argument('database', metavar='PATH', help='PATH to sqlite db')
+    parser.add_argument('--input', metavar='NAME', help='NAME of the table in sqlite database')
+    parser.add_argument('--query', help='query string in rbql. Run in interactive mode if empty')
+    parser.add_argument('--out-format', help='output format', default='csv', choices=['csv', 'tsv'])
+    parser.add_argument('--output', metavar='FILE', help='write output table to FILE instead of stdout')
+    parser.add_argument('--color', action='store_true', help='colorize columns in output in non-interactive mode. Do NOT use if redirecting output to a file')
+    parser.add_argument('--version', action='store_true', help='print RBQL version and exit')
+    parser.add_argument('--init-source-file', metavar='FILE', help=argparse.SUPPRESS) # Path to init source file to use instead of ~/.rbql_init_source.py
+    parser.add_argument('--debug-mode', action='store_true', help=argparse.SUPPRESS) # Run in debug mode
+    args = parser.parse_args()
+
+    if args.version:
+        print(_version.__version__)
+        return
+
+    if not os.path.isfile(args.database):
+        show_error('generic', 'The database does not exist: {}'.format(args.database), is_interactive=False)
+        sys.exit(1)
+
+    is_interactive_mode = args.query is None
+
+    import sqlite3
+    if not args.input:
+        db_connection = sqlite3.connect(args.database)
+        table_names = read_table_names(db_connection)
+        db_connection.close()
+        if len(table_names) == 1:
+            args.input = table_names[0]
+            # TODO Consider showing a warning here
+        elif not is_interactive_mode:
+            show_error('generic', 'Please provide input table name with --input parameter: source database has more than one table', is_interactive=False)
+            sys.exit(1)
+
+    if args.output is not None and args.color:
+        show_error('generic', '"--output" is not compatible with "--color" option', is_interactive=False)
+        sys.exit(1)
+
+    args.encoding = 'utf-8'
+    args.output_delim, args.output_policy = (',', 'quoted_rfc') if args.out_format == 'csv' else rbql_csv.interpret_named_csv_format(args.out_format)
+
+    if is_interactive_mode:
         if args.color:
             show_error('generic', '"--color" option is not compatible with interactive mode. Output and Input files preview would be colorized anyway', is_interactive=False)
             sys.exit(1)
-        if os.name == 'nt':
-            show_error('generic', 'Interactive mode is not available on Windows', is_interactive=False)
+        start_preview_mode_sqlite(args)
+    else:
+        if not run_with_python_sqlite(args, is_interactive=False):
             sys.exit(1)
-        start_preview_mode(args)
 
 
+def main():
+    if len(sys.argv) > 1:
+        if sys.argv[1] == 'sqlite':
+            del sys.argv[1]
+            sqlite_main()
+        elif sys.argv[1] == 'csv':
+            del sys.argv[1]
+            csv_main()
+        else:
+            # TODO Consider showing "uknown mode" error if the first argument doesn't start with '--'
+            csv_main()
+    else:
+        csv_main()
+
 
 if __name__ == '__main__':
     main()