From 73b545f942908dbb841493f3cefab398b07c7c8c Mon Sep 17 00:00:00 2001 From: jirimosinger <99467904+jirimosinger@users.noreply.github.com> Date: Fri, 6 Jan 2023 15:16:44 +0100 Subject: [PATCH] feat: DB2 preprocessor statements highlighting and parsing --- clients/vscode-hlasmplugin/CHANGELOG.md | 2 +- parser_library/src/diagnostic.cpp | 21 +- parser_library/src/diagnostic.h | 6 +- parser_library/src/lexing/logical_line.h | 54 +- .../low_language_processor.cpp | 48 +- .../src/processing/preprocessor.cpp | 32 +- parser_library/src/processing/preprocessor.h | 19 +- .../preprocessors/cics_preprocessor.cpp | 44 +- .../preprocessors/db2_preprocessor.cpp | 878 ++++++++++++------ .../preprocessors/preprocessor_utils.cpp | 31 +- .../preprocessors/preprocessor_utils.h | 13 + .../statement_analyzers/lsp_analyzer.cpp | 4 +- .../src/semantics/range_provider.cpp | 24 +- parser_library/src/semantics/range_provider.h | 4 + parser_library/src/semantics/statement.cpp | 4 - parser_library/src/semantics/statement.h | 18 +- parser_library/test/lexing/CMakeLists.txt | 1 + .../lexing/logical_line_iterator_test.cpp | 190 ++++ .../test/lexing/logical_line_test.cpp | 45 +- .../lsp/lsp_context_preprocessor_test.cpp | 314 ++++++- .../test/processing/db2_preprocessor_test.cpp | 188 +++- parser_library/test/semantics/CMakeLists.txt | 1 + .../test/semantics/highlighting_test.cpp | 86 +- .../test/semantics/text_range_test.cpp | 64 ++ utils/include/utils/string_operations.h | 6 +- utils/src/string_operations.cpp | 48 +- 26 files changed, 1624 insertions(+), 521 deletions(-) create mode 100644 parser_library/test/lexing/logical_line_iterator_test.cpp create mode 100644 parser_library/test/semantics/text_range_test.cpp diff --git a/clients/vscode-hlasmplugin/CHANGELOG.md b/clients/vscode-hlasmplugin/CHANGELOG.md index b369914f5..c0163745c 100644 --- a/clients/vscode-hlasmplugin/CHANGELOG.md +++ b/clients/vscode-hlasmplugin/CHANGELOG.md @@ -6,7 +6,7 @@ - Command for downloading copybooks allows selections of data sets which should be downloaded - Code actions for an unknown operation code - Quick fixes for typos in instruction and macro names added to the code actions -- Endevor and CICS preprocessor statements highlighting and parsing +- Endevor, CICS and DB2 preprocessor statements highlighting and parsing - Instruction suggestions are included in the completion list - Support for the SYSCLOCK system variable - Implement step out support in the macro tracer diff --git a/parser_library/src/diagnostic.cpp b/parser_library/src/diagnostic.cpp index 650becb70..b893d1d9f 100644 --- a/parser_library/src/diagnostic.cpp +++ b/parser_library/src/diagnostic.cpp @@ -2348,18 +2348,33 @@ diagnostic_op diagnostic_op::error_DB004(const range& range) { return diagnostic_op(diagnostic_severity::error, "DB004", - std::string("DB2 preprocessor - requested SQL TYPE not recognized"), + std::string( + "DB2 preprocessor - requested 'SQL TYPE IS' not recognized (operands either missing or not recognized)"), range); } -diagnostic_op diagnostic_op::error_DB005(const range& range) +diagnostic_op diagnostic_op::warn_DB005(const range& range) { return diagnostic_op(diagnostic_severity::warning, "DB005", - std::string("DB2 preprocessor - continuation detected on SQL TYPE statement"), + std::string("DB2 preprocessor - continuation detected on 'SQL TYPE' statement"), range); } +diagnostic_op diagnostic_op::warn_DB006(const range& range) +{ + return diagnostic_op(diagnostic_severity::warning, + "DB006", + std::string("DB2 preprocessor - requested 'SQL TYPE' not recognized (operand 'IS' either missing or split)"), + range); +} + +diagnostic_op diagnostic_op::warn_DB007(const range& range) +{ + return diagnostic_op( + diagnostic_severity::warning, "DB007", std::string("DB2 preprocessor - missing INCLUDE member"), range); +} + diagnostic_op diagnostic_op::warn_CIC001(const range& range) { return diagnostic_op(diagnostic_severity::warning, diff --git a/parser_library/src/diagnostic.h b/parser_library/src/diagnostic.h index d83ecd30b..13cba6bea 100644 --- a/parser_library/src/diagnostic.h +++ b/parser_library/src/diagnostic.h @@ -711,7 +711,11 @@ struct diagnostic_op static diagnostic_op error_DB004(const range& range); - static diagnostic_op error_DB005(const range& range); + static diagnostic_op warn_DB005(const range& range); + + static diagnostic_op warn_DB006(const range& range); + + static diagnostic_op warn_DB007(const range& range); static diagnostic_op warn_CIC001(const range& range); diff --git a/parser_library/src/lexing/logical_line.h b/parser_library/src/lexing/logical_line.h index fa6a53067..3a7c15589 100644 --- a/parser_library/src/lexing/logical_line.h +++ b/parser_library/src/lexing/logical_line.h @@ -16,9 +16,12 @@ #define HLASMPLUGIN_HLASMPARSERLIBRARY_LOGICAL_LINE_H #include -#include +#include +#include +#include #include #include +#include #include namespace hlasm_plugin::parser_library::lexing { @@ -63,7 +66,7 @@ struct logical_line bool so_si_continuation; bool missing_next_line; - void clear() + void clear() noexcept { segments.clear(); continuation_error = false; @@ -83,26 +86,26 @@ struct logical_line using reference = const char&; const_iterator() = default; - const_iterator(segment_iterator segment, column_iterator col, const logical_line* ll) - : m_segment(segment) - , m_col(col) + const_iterator(segment_iterator segment_it, column_iterator col_it, const logical_line* ll) noexcept + : m_segment_it(segment_it) + , m_col_it(col_it) , m_logical_line(ll) {} - reference operator*() const noexcept { return *m_col; } - pointer operator->() const noexcept { return std::to_address(m_col); } + reference operator*() const noexcept { return *m_col_it; } + pointer operator->() const noexcept { return std::to_address(m_col_it); } const_iterator& operator++() noexcept { assert(m_logical_line); - ++m_col; - while (m_col == m_segment->code.end()) + ++m_col_it; + while (m_col_it == m_segment_it->code.end()) { - if (++m_segment == m_logical_line->segments.end()) + if (++m_segment_it == m_logical_line->segments.end()) { - m_col = column_iterator(); + m_col_it = column_iterator(); break; } - m_col = m_segment->code.begin(); + m_col_it = m_segment_it->code.begin(); } return *this; } @@ -115,12 +118,12 @@ struct logical_line const_iterator& operator--() noexcept { assert(m_logical_line); - while (m_segment == m_logical_line->segments.end() || m_col == m_segment->code.begin()) + while (m_segment_it == m_logical_line->segments.end() || m_col_it == m_segment_it->code.begin()) { - --m_segment; - m_col = m_segment->code.end(); + --m_segment_it; + m_col_it = m_segment_it->code.end(); } - --m_col; + --m_col_it; return *this; } const_iterator operator--(int) noexcept @@ -132,19 +135,30 @@ struct logical_line friend bool operator==(const const_iterator& a, const const_iterator& b) noexcept { assert(a.m_logical_line == b.m_logical_line); - return a.m_segment == b.m_segment && a.m_col == b.m_col; + return a.m_segment_it == b.m_segment_it && a.m_col_it == b.m_col_it; } friend bool operator!=(const const_iterator& a, const const_iterator& b) noexcept { return !(a == b); } bool same_line(const const_iterator& o) const noexcept { assert(m_logical_line == o.m_logical_line); - return m_segment == o.m_segment; + return m_segment_it == o.m_segment_it; + } + + std::pair get_coordinates() const noexcept + { + assert(m_logical_line); + + if (m_segment_it == m_logical_line->segments.end()) + return { 0, 0 }; + + return { m_segment_it->code_offset + std::distance(m_segment_it->code.begin(), m_col_it), + std::distance(m_logical_line->segments.begin(), m_segment_it) }; } private: - segment_iterator m_segment = segment_iterator(); - column_iterator m_col = std::string_view::const_iterator(); + segment_iterator m_segment_it = segment_iterator(); + column_iterator m_col_it = std::string_view::const_iterator(); const logical_line* m_logical_line = nullptr; }; diff --git a/parser_library/src/processing/instruction_sets/low_language_processor.cpp b/parser_library/src/processing/instruction_sets/low_language_processor.cpp index 93733cb2b..596028ac1 100644 --- a/parser_library/src/processing/instruction_sets/low_language_processor.cpp +++ b/parser_library/src/processing/instruction_sets/low_language_processor.cpp @@ -89,50 +89,32 @@ bool low_language_processor::create_symbol( return ok; } -// return true if the result is not empty -bool trim_right(std::string& s) -{ - auto last_non_space = s.find_last_not_of(' '); - if (last_non_space != std::string::npos) - { - s.erase(last_non_space + 1); - return true; - } - else - { - s.clear(); - return false; - } -} - low_language_processor::preprocessed_part low_language_processor::preprocess_inner(const resolved_statement& stmt) { using namespace semantics; preprocessed_part result; - std::string new_label; + const auto label_inserter = [&result, &ids = hlasm_ctx.ids()](std::string&& label, const range& r) { + label.erase(label.find_last_not_of(' ') + 1); + if (label.empty()) + result.label.emplace(r); + else + { + auto ord_id = ids.add(label); + result.label.emplace(r, ord_symbol_string { ord_id, std::move(label) }); + } + }; + // label switch (const auto& label_ref = stmt.label_ref(); label_ref.type) { case label_si_type::CONC: - new_label = concatenation_point::evaluate(std::get(label_ref.value), eval_ctx); - if (!trim_right(new_label)) - result.label.emplace(label_ref.field_range); - else - { - auto ord_id = hlasm_ctx.ids().add(new_label); - result.label.emplace(label_ref.field_range, ord_symbol_string { ord_id, std::move(new_label) }); - } + label_inserter(concatenation_point::evaluate(std::get(label_ref.value), eval_ctx), + label_ref.field_range); break; case label_si_type::VAR: - new_label = var_sym_conc::evaluate(std::get(label_ref.value)->evaluate(eval_ctx)); - if (!trim_right(new_label)) - result.label.emplace(label_ref.field_range); - else - { - auto ord_id = hlasm_ctx.ids().add(new_label); - result.label.emplace(label_ref.field_range, ord_symbol_string { ord_id, std::move(new_label) }); - } + label_inserter( + var_sym_conc::evaluate(std::get(label_ref.value)->evaluate(eval_ctx)), label_ref.field_range); break; case label_si_type::MAC: if (stmt.opcode_ref().value.to_string_view() != "TITLE") diff --git a/parser_library/src/processing/preprocessor.cpp b/parser_library/src/processing/preprocessor.cpp index 2238fb4e4..46254934e 100644 --- a/parser_library/src/processing/preprocessor.cpp +++ b/parser_library/src/processing/preprocessor.cpp @@ -80,14 +80,36 @@ void preprocessor::do_highlighting(const semantics::preprocessor_statement_si& s src_proc.add_hl_symbol(token_info(details.instruction.r, semantics::hl_scopes::instruction), continue_column); - for (const auto& operand : details.operands.items) - { + for (const auto& operand : details.operands) src_proc.add_hl_symbol(token_info(operand.r, semantics::hl_scopes::operand), continue_column); - } - for (const auto& remark_r : details.remarks.items) - { + for (const auto& remark_r : details.remarks) src_proc.add_hl_symbol(token_info(remark_r, semantics::hl_scopes::remark), continue_column); +} + +void preprocessor::do_highlighting(const semantics::preprocessor_statement_si& stmt, + const lexing::logical_line& ll, + semantics::source_info_processor& src_proc, + size_t continue_column) const +{ + do_highlighting(stmt, src_proc, continue_column); + + constexpr const auto continuation_column = lexing::default_ictl.end; + constexpr const auto ignore_column = lexing::default_ictl.end + 1; + for (size_t i = 0, lineno = stmt.m_details.stmt_r.start.line; i < ll.segments.size(); ++i, ++lineno) + { + const auto& segment = ll.segments[i]; + + if (!segment.continuation.empty()) + src_proc.add_hl_symbol( + token_info(range(position(lineno, continuation_column), position(lineno, ignore_column)), + semantics::hl_scopes::continuation)); + + if (!segment.ignore.empty()) + src_proc.add_hl_symbol(token_info( + range(position(lineno, ignore_column), + position(lineno, ignore_column + segment.ignore.length() - segment.continuation.empty())), + semantics::hl_scopes::ignored)); } } diff --git a/parser_library/src/processing/preprocessor.h b/parser_library/src/processing/preprocessor.h index db34598dd..ffa85eb5f 100644 --- a/parser_library/src/processing/preprocessor.h +++ b/parser_library/src/processing/preprocessor.h @@ -52,6 +52,8 @@ using library_fetcher = class preprocessor { public: + using line_iterator = std::vector::const_iterator; + struct included_member_details { std::string name; @@ -78,18 +80,16 @@ class preprocessor virtual const std::vector>& view_included_members(); -protected: - preprocessor() = default; - preprocessor(const preprocessor&) = default; - preprocessor(preprocessor&&) = default; - - using line_iterator = std::vector::const_iterator; - static line_iterator extract_nonempty_logical_line(lexing::logical_line& out, line_iterator it, line_iterator end, const lexing::logical_line_extractor_args& opts); +protected: + preprocessor() = default; + preprocessor(const preprocessor&) = default; + preprocessor(preprocessor&&) = default; + void reset(); void set_statement(std::shared_ptr stmt); void set_statements(std::vector> stmts); @@ -100,6 +100,11 @@ class preprocessor semantics::source_info_processor& src_proc, size_t continue_column = 15) const; + virtual void do_highlighting(const semantics::preprocessor_statement_si& stmt, + const lexing::logical_line& ll, + semantics::source_info_processor& src_proc, + size_t continue_column = 15) const; + void append_included_member(std::unique_ptr details); void append_included_members(std::vector> details); void capture_included_members(preprocessor& preproc); diff --git a/parser_library/src/processing/preprocessors/cics_preprocessor.cpp b/parser_library/src/processing/preprocessors/cics_preprocessor.cpp index 49babeb24..383663829 100644 --- a/parser_library/src/processing/preprocessors/cics_preprocessor.cpp +++ b/parser_library/src/processing/preprocessors/cics_preprocessor.cpp @@ -701,18 +701,6 @@ class mini_parser return r; }(); - template - std::true_type same_line_detector(const T& t, decltype(t.same_line(t)) = false); - std::false_type same_line_detector(...); - - bool same_line(It l, It r) - { - if constexpr (decltype(same_line_detector(l))::value) - return l.same_line(r); - else - return true; - } - public: const std::string& operands() const& { return m_substituted_operands; } std::string operands() && { return std::move(m_substituted_operands); } @@ -848,7 +836,6 @@ class mini_parser class cics_preprocessor final : public preprocessor { lexing::logical_line m_logical_line; - std::string m_operands; library_fetcher m_libs; diagnostic_op_consumer* m_diags = nullptr; std::vector m_result; @@ -1151,8 +1138,8 @@ class cics_preprocessor final : public preprocessor } static const stmt_part_ids part_ids { 1, { 2, 3 }, { 4 }, std::nullopt }; - auto stmt = get_preproc_statement(m_matches_ll, part_ids, lineno, 1); - do_highlighting(*stmt, m_src_proc, 1); + auto stmt = get_preproc_statement(m_matches_ll, part_ids, lineno, 1); + do_highlighting(*stmt, m_logical_line, m_src_proc, 1); set_statement(std::move(stmt)); return true; @@ -1231,8 +1218,8 @@ class cics_preprocessor final : public preprocessor ret_val = true; static const stmt_part_ids part_ids { 1, { 2 }, 3, 4 }; - auto stmt = get_preproc_statement(m_matches_ll, part_ids, lineno); - do_highlighting(*stmt, m_src_proc); + auto stmt = get_preproc_statement(m_matches_ll, part_ids, lineno); + do_highlighting(*stmt, m_logical_line, m_src_proc); set_statement(std::move(stmt)); } @@ -1330,29 +1317,6 @@ class cics_preprocessor final : public preprocessor } cics_preprocessor_options current_options() const { return m_options; } - - void do_highlighting(const semantics::preprocessor_statement_si& stmt, - semantics::source_info_processor& src_proc, - size_t continue_column = 15) const override - { - preprocessor::do_highlighting(stmt, src_proc, continue_column); - - size_t lineno = stmt.m_details.stmt_r.start.line; - for (size_t i = 0; i < m_logical_line.segments.size(); ++i) - { - const auto& segment = m_logical_line.segments[i]; - - if (!segment.continuation.empty()) - m_src_proc.add_hl_symbol(token_info( - range(position(lineno + i, 71), position(lineno + i, 72)), semantics::hl_scopes::continuation)); - - if (!segment.ignore.empty()) - m_src_proc.add_hl_symbol( - token_info(range(position(lineno + i, 72), - position(lineno + i, 72 + segment.ignore.length() - segment.continuation.empty())), - semantics::hl_scopes::ignored)); - } - } }; } // namespace diff --git a/parser_library/src/processing/preprocessors/db2_preprocessor.cpp b/parser_library/src/processing/preprocessors/db2_preprocessor.cpp index 2d5f5b532..12a914c84 100644 --- a/parser_library/src/processing/preprocessors/db2_preprocessor.cpp +++ b/parser_library/src/processing/preprocessors/db2_preprocessor.cpp @@ -13,12 +13,16 @@ */ #include +#include #include #include -#include +#include +#include +#include #include +#include #include -#include +#include #include #include #include @@ -26,65 +30,312 @@ #include #include "diagnostic_consumer.h" +#include "document.h" #include "lexing/logical_line.h" #include "preprocessor_options.h" +#include "preprocessor_utils.h" #include "processing/preprocessor.h" +#include "range.h" +#include "semantics/range_provider.h" #include "semantics/source_info_processor.h" +#include "semantics/statement.h" #include "utils/concat.h" #include "utils/resource_location.h" +#include "utils/string_operations.h" +#include "utils/unicode_text.h" #include "workspaces/parse_lib_provider.h" +namespace hlasm_plugin::parser_library::processing { namespace { -constexpr std::string_view trim_right(std::string_view s) +using utils::concat; + +enum class symbol_type : unsigned char { - const auto i = s.find_last_not_of(' '); + other_char, + ord_char, + blank, + colon, + quote, + remark_start, +}; - if (i == std::string_view::npos) - return s; +constexpr std::array symbols = []() { + std::array::max() + 1> r {}; - return s.substr(0, i + 1); -} -} // namespace + using enum symbol_type; -namespace hlasm_plugin::parser_library::processing { -namespace { -using utils::concat; + for (unsigned char c = '0'; c <= '9'; ++c) + r[c] = ord_char; + for (unsigned char c = 'A'; c <= 'Z'; ++c) + r[c] = ord_char; + for (unsigned char c = 'a'; c <= 'z'; ++c) + r[c] = ord_char; -class db2_preprocessor final : public preprocessor // TODO Take DBCS into account + r[(unsigned char)'_'] = ord_char; + r[(unsigned char)'@'] = ord_char; + r[(unsigned char)'$'] = ord_char; + r[(unsigned char)'#'] = ord_char; + r[(unsigned char)' '] = blank; + r[(unsigned char)':'] = colon; + r[(unsigned char)'\''] = quote; + r[(unsigned char)'\"'] = quote; + r[(unsigned char)'-'] = remark_start; + + return r; +}(); + +class db2_logical_line_helper { - lexing::logical_line m_logical_line; - std::string m_operands; - std::string m_version; - bool m_conditional; - library_fetcher m_libs; - diagnostic_op_consumer* m_diags = nullptr; - std::vector m_result; - bool m_source_translated = false; +public: + lexing::logical_line m_orig_ll; + lexing::logical_line m_db2_ll; + size_t m_lineno = 0; + std::vector> m_comments; + + db2_logical_line_helper() = default; - static bool remove_space(std::string_view& s) + preprocessor::line_iterator reinit(preprocessor::line_iterator it, preprocessor::line_iterator end, size_t lineno) { - if (s.empty() || s.front() != ' ') - return false; - const auto non_space = s.find_first_not_of(' '); + m_lineno = lineno; + + it = preprocessor::extract_nonempty_logical_line(m_orig_ll, it, end, lexing::default_ictl); + m_db2_ll = m_orig_ll; + extract_db2_line_comments(m_db2_ll, m_comments); + + return it; + } + + static void trim_left(lexing::logical_line::const_iterator& it, const lexing::logical_line::const_iterator& it_e) + { + while (it != it_e) + { + if (*it == ' ') + it = std::next(it); + else if (auto it_n = std::next(it); *it == '-' && (it_n != it_e && *it_n == '-')) + it = std::next(it_n); + else + break; + } + } + +private: + size_t find_start_of_line_comment( + std::stack>& quotes, const std::string_view& code) const + { + bool comment_possibly_started = false; + size_t comment_start = 0; + for (const auto& c : code) + { + if (auto s = symbols[static_cast(c)]; s == symbol_type::quote) + { + if (quotes.empty() || quotes.top() != c) + quotes.push(c); + else if (quotes.top() == c) + quotes.pop(); + + comment_possibly_started = false; + } + else if (quotes.empty() && s == symbol_type::remark_start) + { + if (!comment_possibly_started) + comment_possibly_started = true; + else + break; + } - if (non_space == std::string_view::npos) + comment_start++; + } + + return comment_start; + } + + void extract_db2_line_comments( + lexing::logical_line& ll, std::vector>& comments) const + { + comments.clear(); + std::stack> quotes; + for (auto& seg : ll.segments) { - s = {}; + auto& code = seg.code; + auto& comment = comments.emplace_back(std::nullopt); + + // code part will contain the '--' separator if comment is detected + if (auto comment_start = find_start_of_line_comment(quotes, code); comment_start != code.length()) + { + comment = code.substr(comment_start + 1); + code.remove_suffix(comment->length()); + } + } + } +}; + +template +class mini_parser +{ + void skip_to_matching_character(It& b, const It& e) const + { + if (b == e) + return; + + const auto to_match = *b; + + while (++b != e && to_match != *b) + ; + } + +public: + std::vector get_args(It& b, const It& e, size_t lineno) + { + enum class consuming_state + { + NON_CONSUMING, + PREPARE_TO_CONSUME, + CONSUMING, + TRAIL, + QUOTE, + }; + + std::vector arguments; + const auto try_arg_inserter = [&arguments, &lineno](const It& start, const It& end, consuming_state state) { + if (state != consuming_state::CONSUMING) + return false; + + arguments.emplace_back(semantics::preproc_details::name_range { + std::string(start, end), semantics::text_range(start, end, lineno) }); return true; + }; + + It arg_start_it; + consuming_state next_state = consuming_state::NON_CONSUMING; + while (b != e) + { + const auto state = std::exchange(next_state, consuming_state::NON_CONSUMING); + + switch (symbols[static_cast(*b)]) + { + using enum symbol_type; + case ord_char: + if (state == consuming_state::PREPARE_TO_CONSUME) + { + arg_start_it = b; + next_state = consuming_state::CONSUMING; + } + else if (state == consuming_state::CONSUMING) + next_state = state; + + break; + + case colon: + if (state == consuming_state::PREPARE_TO_CONSUME || state == consuming_state::TRAIL) + break; + + if (!try_arg_inserter(arg_start_it, b, state)) + next_state = consuming_state::PREPARE_TO_CONSUME; + + break; + + case blank: + if (try_arg_inserter(arg_start_it, b, state)) + next_state = consuming_state::TRAIL; + else + next_state = state; + + break; + + case quote: + try_arg_inserter(arg_start_it, b, state); + + if (skip_to_matching_character(b, e); b == e) + goto done; + + break; + + case remark_start: + if (auto n = std::next(b); !try_arg_inserter(arg_start_it, b, state) && n != e + && symbols[static_cast(*n)] == remark_start) + { + b = n; + next_state = state; + } + + break; + + case other_char: + try_arg_inserter(arg_start_it, b, state); + break; + + default: + assert(false); + break; + } + + ++b; } - s.remove_prefix(non_space); - return true; + try_arg_inserter(arg_start_it, b, next_state); + + done: + return arguments; } +}; - static bool consume(std::string_view& s, std::string_view lit) +struct consuming_regex_details +{ + bool needs_same_line; + bool tolerate_no_space_at_end; + const std::regex r; + + consuming_regex_details(const std::initializer_list& words_to_consume, + bool needs_same_line, + bool tolerate_no_space_at_end) + : needs_same_line(needs_same_line) + , tolerate_no_space_at_end(tolerate_no_space_at_end) + , r(get_consuming_regex(words_to_consume, tolerate_no_space_at_end)) + {} + +private: + static std::regex get_consuming_regex( + const std::initializer_list& words, bool tolerate_no_space_at_end) { - // case sensitive - if (s.substr(0, lit.size()) != lit) - return false; - s.remove_prefix(lit.size()); - return true; + assert(words.size()); + + auto w_it = words.begin(); + + std::string s = "("; + s.append(*w_it++); + while (w_it != words.end()) + s.append("(?:[ ]|--)+(?:").append(*w_it++).append(")"); + + s.append(")([ ]|--)"); + if (tolerate_no_space_at_end) + s.append("*"); + else + s.append("+"); + s.append("(.*)"); + + return std::regex(s); } +}; + +class db2_preprocessor final : public preprocessor // TODO Take DBCS into account +{ + std::string m_version; + bool m_conditional; + library_fetcher m_libs; + diagnostic_op_consumer* m_diags = nullptr; + std::vector m_result; + bool m_source_translated = false; + semantics::source_info_processor& m_src_proc; + db2_logical_line_helper m_ll_helper; + db2_logical_line_helper m_ll_include_helper; + + enum class line_type + { + ignore, + exec_sql, + include, + sql_type + }; void push_sql_version_data() { @@ -237,71 +488,94 @@ class db2_preprocessor final : public preprocessor // TODO Take DBCS into accoun m_result.emplace_back(replaced_line { " MEND \n" }); } - void process_include(std::string_view operands, size_t lineno) + template + static std::optional consume_words_advance_to_next(It& it, const It& it_e, const consuming_regex_details& crd) + { + if (std::match_results matches; std::regex_match(it, it_e, matches, crd.r) + && (!crd.needs_same_line || same_line(matches[1].first, std::prev(matches[1].second))) + && (!crd.tolerate_no_space_at_end || matches[2].length() || !matches[3].length() + || (matches[1].second == matches[3].first + && !same_line(std::prev(matches[1].second), matches[3].first)))) + { + it = matches[3].first; + return matches[1].second; + } + + return std::nullopt; + } + + std::optional try_process_include( + lexing::logical_line::const_iterator it, const lexing::logical_line::const_iterator& it_e, size_t lineno) + { + if (static const consuming_regex_details include_crd({ "INCLUDE" }, false, false); + !consume_words_advance_to_next(it, it_e, include_crd)) + return std::nullopt; + + lexing::logical_line::const_iterator inc_it_s; + lexing::logical_line::const_iterator inc_it_e; + semantics::preproc_details::name_range nr; + static const auto member_pattern = std::regex("(.*?)(?:[ ]|--)*$"); + + for (auto reg_it = std::regex_iterator(it, it_e, member_pattern), + reg_it_e = std::regex_iterator(); + reg_it != reg_it_e; + ++reg_it) + { + if (const auto& sub_match = (*reg_it)[1]; sub_match.length()) + { + if (nr.name.empty()) + inc_it_s = sub_match.first; + inc_it_e = sub_match.second; + + if (!nr.name.empty()) + nr.name.push_back(' '); + nr.name.append(sub_match.str()); + } + } + + if (!nr.name.empty()) + nr.r = semantics::text_range(inc_it_s, inc_it_e, lineno); + + return nr; + } + + std::pair process_include_member( + line_type instruction_type, std::string member, size_t lineno) { - auto operands_upper = context::to_upper_copy(std::string(operands)); + auto member_upper = context::to_upper_copy(member); - if (operands_upper == "SQLCA") + if (member_upper == "SQLCA") { inject_SQLCA(); - return; + return { instruction_type, member_upper }; } - if (operands_upper == "SQLDA") + if (member_upper == "SQLDA") { inject_SQLDA(); - return; + return { instruction_type, member_upper }; } m_result.emplace_back(replaced_line { "***$$$\n" }); std::optional> include_member; if (m_libs) - include_member = m_libs(operands_upper); + include_member = m_libs(member_upper); if (!include_member.has_value()) { if (m_diags) - m_diags->add_diagnostic(diagnostic_op::error_DB002(range(position(lineno, 0)), operands)); - return; + m_diags->add_diagnostic(diagnostic_op::error_DB002(range(position(lineno, 0)), member)); + return { instruction_type, member }; } auto& [include_mem_text, include_mem_loc] = *include_member; document d(include_mem_text); d.convert_to_replaced(); - generate_replacement(d.begin(), d.end(), false); + generate_replacement(d.begin(), d.end(), m_ll_include_helper, false); append_included_member(std::make_unique(included_member_details { - std::move(operands_upper), std::move(include_mem_text), std::move(include_mem_loc) })); + std::move(member_upper), std::move(include_mem_text), std::move(include_mem_loc) })); + return { line_type::include, member }; } - static bool consume_words( - std::string_view& l, std::initializer_list words, bool tolerate_no_space_at_end = false) - { - const auto init_l = l; - for (const auto& w : words) - { - if (!consume(l, w)) - { - l = init_l; // all or nothing - return false; - } - if (!remove_space(l)) - { - if (tolerate_no_space_at_end && l.empty() && &w == words.end() - 1) - return true; - l = init_l; // all or nothing - return false; - } - } - return true; - } - - static bool is_end(std::string_view s) - { - if (!consume(s, "END")) - return false; - if (s.empty() || s.front() == ' ') - return true; - - return false; - } + static bool is_end(std::string_view s) { return utils::consume(s, "END") && (s.empty() || s.front() == ' '); } static std::string_view create_line_preview(std::string_view input) { @@ -320,54 +594,53 @@ class db2_preprocessor final : public preprocessor // TODO Take DBCS into accoun static bool ignore_line(std::string_view s) { return s.empty() || s.front() == '*' || s.substr(0, 2) == ".*"; } - static std::string_view extract_label(std::string_view& s) + static semantics::preproc_details::name_range extract_label(std::string_view& s, size_t lineno) { - if (s.empty() || s.front() == ' ') + auto label = utils::next_nonblank_sequence(s); + if (!label.length()) return {}; - auto space = s.find(' '); - if (space == std::string_view::npos) - space = s.size(); - - std::string_view result = s.substr(0, space); + s.remove_prefix(label.length()); - s.remove_prefix(space); - - return result; + return semantics::preproc_details::name_range { std::string(label), + range((position(lineno, 0)), (position(lineno, label.length()))) }; } - enum class line_type + static std::pair extract_instruction( + const std::string_view& line_preview, size_t lineno, size_t instr_column_start) { - ignore, - exec_sql, - include, - sql_type - }; + static const std::pair ignore(line_type::ignore, {}); - static line_type consume_instruction(std::string_view& line_preview) - { if (line_preview.empty()) - return line_type::ignore; + return ignore; + + const auto consume_and_create = [&line_preview, lineno, instr_column_start](line_type line, + const consuming_regex_details& crd, + std::string_view line_id) { + auto it = line_preview.begin(); + if (auto consumed_words_end = consume_words_advance_to_next(it, line_preview.end(), crd); + consumed_words_end) + return std::make_pair(line, + semantics::preproc_details::name_range { std::string(line_id), + range((position(lineno, instr_column_start)), + (position(lineno, + instr_column_start + std::distance(line_preview.begin(), *consumed_words_end)))) }); + return ignore; + }; + + static const consuming_regex_details exec_sql_crd({ "EXEC", "SQL" }, true, false); + static const consuming_regex_details sql_type_crd({ "SQL", "TYPE" }, true, false); switch (line_preview.front()) { case 'E': - if (consume_words(line_preview, { "EXEC", "SQL" })) - { - if (consume_words(line_preview, { "INCLUDE" })) - return line_type::include; - else - return line_type::exec_sql; - } - return line_type::ignore; + return consume_and_create(line_type::exec_sql, exec_sql_crd, "EXEC SQL"); case 'S': - if (consume_words(line_preview, { "SQL", "TYPE", "IS" })) - return line_type::sql_type; - return line_type::ignore; + return consume_and_create(line_type::sql_type, sql_type_crd, "SQL TYPE"); default: - return line_type::ignore; + return ignore; } } @@ -426,13 +699,16 @@ class db2_preprocessor final : public preprocessor // TODO Take DBCS into accoun return result; } - bool handle_lob(const std::regex& pattern, std::string_view label, std::string_view operands) + bool handle_lob(const std::regex& pattern, + std::string_view label, + const lexing::logical_line::const_iterator& it, + const lexing::logical_line::const_iterator& it_e) { - std::match_results match; - if (!std::regex_match(operands.cbegin(), operands.cend(), match, pattern)) + std::match_results match; + if (!std::regex_match(it, it_e, match, pattern)) return false; - switch ((match[4].matched ? match[4] : match[1]).second[-1]) + switch (*std::prev((match[4].matched ? match[4] : match[1]).second)) { case 'E': // ..._FILE add_ds_line(label, "", "0FL4"); @@ -448,9 +724,7 @@ class db2_preprocessor final : public preprocessor // TODO Take DBCS into accoun default: { const auto li = lob_info(*match[1].first, match[3].matched ? *match[3].first : 0); - unsigned long long len; - std::from_chars(std::to_address(match[2].first), std::to_address(match[2].second), len); - len *= li.scale; + auto len = std::stoll(match[2].str()) * li.scale; add_ds_line(label, "", "0FL4"); add_ds_line(label, "_LENGTH", "FL4", false); @@ -466,127 +740,141 @@ class db2_preprocessor final : public preprocessor // TODO Take DBCS into accoun return true; }; - bool process_sql_type_operands(std::string_view operands, std::string_view label) + bool handle_r_starting_operands(const std::string_view& label, + const lexing::logical_line::const_iterator& it_b, + const lexing::logical_line::const_iterator& it_e) { - if (operands.size() < 2) + auto ds_line_inserter = [&label, &it_e, this](lexing::logical_line::const_iterator it, + const consuming_regex_details& crd, + std::string_view ds_line_type) { + if (!consume_words_advance_to_next(it, it_e, crd)) + return false; + add_ds_line(label, "", ds_line_type); + return true; + }; + + assert(it_b != it_e && *it_b == 'R'); + + static const consuming_regex_details result_set_crd({ "RESULT_SET_LOCATOR", "VARYING" }, false, true); + static const consuming_regex_details rowid_crd({ "ROWID" }, false, true); + + if (auto it_n = std::next(it_b); it_n == it_e || (*it_n != 'E' && *it_n != 'O')) + return false; + else if (*it_n == 'E') + return ds_line_inserter(it_b, result_set_crd, "FL4"); + else + return ds_line_inserter(it_b, rowid_crd, "H,CL40"); + }; + + bool process_sql_type_operands(const std::string_view& label, + const lexing::logical_line::const_iterator& it, + const lexing::logical_line::const_iterator& it_e) + { + if (it == it_e) return false; // keep the capture groups in sync - static const auto xml_type = std::regex( - "XML[ ]+AS[ ]+" - "(?:" - "(BINARY[ ]+LARGE[ ]+OBJECT|BLOB|CHARACTER[ ]+LARGE[ ]+OBJECT|CHAR[ ]+LARGE[ ]+OBJECT|CLOB|DBCLOB)" - "[ ]+([[:digit:]]{1,9})([KMG])?" - "|" - "(BLOB_FILE|CLOB_FILE|DBCLOB_FILE)" - ")" - "(?: .*)?"); - static const auto lob_type = std::regex( - "(?:" - "(BINARY[ ]+LARGE[ ]+OBJECT|BLOB|CHARACTER[ ]+LARGE[ ]+OBJECT|CHAR[ ]+LARGE[ ]+OBJECT|CLOB|DBCLOB)" - "[ ]+([[:digit:]]{1,9})([KMG])?" - "|" - "(BLOB_FILE|CLOB_FILE|DBCLOB_FILE|BLOB_LOCATOR|CLOB_LOCATOR|DBCLOB_LOCATOR)" - ")" - "(?: .*)?"); - - static const auto table_like = - std::regex("TABLE[ ]+LIKE[ ]+('(?:[^']|'')+'|(?:[^']|'')+)[ ]+AS[ ]+LOCATOR(?: .*)?"); - - switch (operands[0]) + static const auto xml_type = + std::regex("XML(?:[ ]|--)+AS(?:[ ]|--)+" + "(?:" + "(" + "BINARY(?:[ ]|--)+LARGE(?:[ ]|--)+OBJECT|BLOB|CHARACTER(?:[ ]|--)+" + "LARGE(?:[ ]|--)+OBJECT|CHAR(?:[ ]|--)+LARGE(?:[ ]|--)+OBJECT|CLOB|DBCLOB" + ")" + "(?:[ ]|--)+([[:digit:]]{1,9})([KMG])?" + "|" + "(BLOB_FILE|CLOB_FILE|DBCLOB_FILE)" + ")" + "(?: .*)?"); + static const auto lob_type = + std::regex("(?:" + "(" + "BINARY(?:[ ]|--)+LARGE(?:[ ]|--)+OBJECT|BLOB|CHARACTER(?:[ ]|--)+" + "LARGE(?:[ ]|--)+OBJECT|CHAR(?:[ ]|--)+LARGE(?:[ ]|--)+OBJECT|CLOB|DBCLOB" + ")" + "(?:[ ]|--)+([[:digit:]]{1,9})([KMG])?" + "|" + "(BLOB_FILE|CLOB_FILE|DBCLOB_FILE|BLOB_LOCATOR|CLOB_LOCATOR|DBCLOB_LOCATOR)" + ")" + "(?: .*)?"); + + static const auto table_like = std::regex( + "TABLE(?:[ ]|--)+LIKE(?:[ ]|--)+('(?:[^']|'')+'|(?:[^']|'')+)(?:[ ]|--)+AS(?:[ ]|--)+LOCATOR(?: .*)?"); + + switch (*it) { case 'R': - switch (operands[1]) - { - case 'E': - if (!consume_words(operands, { "RESULT_SET_LOCATOR", "VARYING" }, true)) - break; - add_ds_line(label, "", "FL4"); - return true; - - case 'O': - if (!consume_words(operands, { "ROWID" }, true)) - break; - add_ds_line(label, "", "H,CL40"); - return true; - } - break; + return handle_r_starting_operands(label, it, it_e); case 'T': - if (!std::regex_match(operands.begin(), operands.end(), table_like)) - break; + if (!std::regex_match(it, it_e, table_like)) + return false; add_ds_line(label, "", "FL4"); return true; case 'X': - return handle_lob(xml_type, label, operands); + return handle_lob(xml_type, label, it, it_e); case 'B': case 'C': case 'D': - return handle_lob(lob_type, label, operands); + return handle_lob(lob_type, label, it, it_e); + default: + return false; } - return false; } - void process_regular_line(std::string_view label, size_t first_line_skipped) + void process_regular_line(const std::vector& ll_segments, std::string_view label) { if (!label.empty()) m_result.emplace_back(replaced_line { concat(label, " DS 0H\n") }); m_result.emplace_back(replaced_line { "***$$$\n" }); - for (const auto& segment : m_logical_line.segments) + bool first_line = true; + for (const auto& segment : ll_segments) { std::string this_line(segment.line); - auto operand_part = segment.code; - if (first_line_skipped) + if (std::exchange(first_line, false)) { const auto appended_line_size = segment.line.size(); - operand_part.remove_prefix(first_line_skipped); if (!label.empty()) this_line.replace(this_line.size() - appended_line_size, label.size(), label.size(), ' '); // mask out any label-like characters this_line[this_line.size() - appended_line_size] = '*'; - - first_line_skipped = 0; } + this_line.append("\n"); m_result.emplace_back(replaced_line { std::move(this_line) }); - m_operands.append(operand_part.substr(0, operand_part.find("--"))); } } - void process_sql_type_line(size_t first_line_skipped) + void process_sql_type_line(const db2_logical_line_helper& ll) { m_result.emplace_back(replaced_line { "***$$$\n" }); m_result.emplace_back(replaced_line { - concat("*", m_logical_line.segments.front().code.substr(0, lexing::default_ictl.end - 1), "\n") }); - - for (const auto& segment : m_logical_line.segments) - { - m_operands.append(segment.code.substr(first_line_skipped)); - first_line_skipped = 0; - } - + concat("*", ll.m_orig_ll.segments.front().code.substr(0, lexing::default_ictl.end - 1), "\n") }); m_result.emplace_back(replaced_line { "***$$$\n" }); } - std::tuple check_line(std::string_view input) + std::tuple check_line( + std::string_view input, size_t lineno) { - static constexpr std::tuple ignore(line_type::ignore, 0, {}); + static const std:: + tuple + ignore(line_type::ignore, {}, {}); std::string_view line_preview = create_line_preview(input); if (ignore_line(line_preview)) return ignore; - size_t first_line_skipped = line_preview.size(); - std::string_view label = extract_label(line_preview); + semantics::preproc_details::name_range label = extract_label(line_preview, lineno); - if (!remove_space(line_preview)) + auto trimmed = utils::trim_left(line_preview); + if (!trimmed) return ignore; if (is_end(line_preview)) @@ -596,108 +884,111 @@ class db2_preprocessor final : public preprocessor // TODO Take DBCS into accoun return ignore; } - auto instruction = consume_instruction(line_preview); - if (instruction == line_type::ignore) - return ignore; - - if (!line_preview.empty()) - first_line_skipped = line_preview.data() - input.data(); + if (auto [instruction_type, instruction_nr] = + extract_instruction(line_preview, lineno, label.r.end.column + trimmed); + instruction_type != line_type::ignore) + return { std::move(instruction_type), label, std::move(instruction_nr) }; - return { instruction, first_line_skipped, label }; + return ignore; } - static bool ord_char(unsigned char c) { return std::isalnum(c) || c == '_' || c == '@' || c == '$' || c == '#'; } - - static size_t count_arguments(std::string_view s) + std::vector process_nonempty_line(const db2_logical_line_helper& ll, + size_t instruction_end, + bool include_allowed, + line_type& instruction_type, + std::string_view label) { - size_t result = 0; + const auto diag_adder = [diags = m_diags](diagnostic_op&& diag) { + if (diags) + diags->add_diagnostic(std::move(diag)); + }; - while (!s.empty()) - { - auto next = s.find_first_of(":'\""); - if (next == std::string_view::npos) - break; + if (ll.m_db2_ll.continuation_error) + diag_adder(diagnostic_op::error_DB001(range(position(ll.m_lineno, 0)))); - auto c = s[next]; - s.remove_prefix(next + 1); - switch (c) - { - case ':': - ++result; - while (!s.empty() && s.front() == ' ') // skip optional spaces - s.remove_prefix(1); - while (!s.empty() && ord_char(s.front())) // skip host variable name - s.remove_prefix(1); - while (!s.empty() && s.front() == ' ') // skip spaces - s.remove_prefix(1); - if (!s.empty() && s.front() == ':') // null indicator? - s.remove_prefix(1); - break; + static const consuming_regex_details is_crd({ "IS" }, true, true); - case '\'': - case '\"': - if (auto ending = s.find(c); ending == std::string_view::npos) - s = {}; - else - s.remove_prefix(ending + 1); - break; - } - } + std::vector args; + auto it = std::next(ll.m_db2_ll.begin(), instruction_end); + auto it_e = ll.m_db2_ll.end(); + db2_logical_line_helper::trim_left(it, it_e); - return result; - } - - void process_nonempty_line( - size_t lineno, bool include_allowed, line_type instruction, size_t first_line_skipped, std::string_view label) - { - m_operands.clear(); - - if (m_logical_line.continuation_error && m_diags) - m_diags->add_diagnostic(diagnostic_op::error_DB001(range(position(lineno, 0)))); - - switch (instruction) + switch (instruction_type) { - case line_type::exec_sql: - process_regular_line(label, first_line_skipped); - if (sql_has_codegen(m_operands)) - generate_sql_code_mock(count_arguments(m_operands)); - m_result.emplace_back(replaced_line { "***$$$\n" }); - break; + case line_type::exec_sql: { + process_regular_line(ll.m_db2_ll.segments, label); + if (auto inc_member_details = try_process_include(it, it_e, ll.m_lineno); + inc_member_details.has_value()) + { + if (inc_member_details->name.empty()) + { + diag_adder(diagnostic_op::warn_DB007(range(position(ll.m_lineno, 0)))); + break; + } + + if (include_allowed) + std::tie(instruction_type, inc_member_details->name) = + process_include_member(instruction_type, inc_member_details->name, ll.m_lineno); + else + diag_adder( + diagnostic_op::error_DB003(range(position(ll.m_lineno, 0)), inc_member_details->name)); - case line_type::include: - process_regular_line(label, first_line_skipped); + args.emplace_back(std::move(*inc_member_details)); + } + else + { + if (sql_has_codegen(it, it_e)) + { + mini_parser p; + args = p.get_args(it, it_e, ll.m_lineno); + generate_sql_code_mock(args.size()); + } + m_result.emplace_back(replaced_line { "***$$$\n" }); + } - if (std::string_view operands = trim_right(m_operands); include_allowed) - process_include(operands, lineno); - else if (m_diags) - m_diags->add_diagnostic(diagnostic_op::error_DB003(range(position(lineno, 0)), operands)); break; + } case line_type::sql_type: - process_sql_type_line(first_line_skipped); + process_sql_type_line(ll); // DB2 preprocessor exhibits strange behavior when SQL TYPE line is continued - if (m_logical_line.segments.size() > 1 && m_diags) - m_diags->add_diagnostic(diagnostic_op::error_DB005(range(position(lineno, 0)))); + if (ll.m_db2_ll.segments.size() > 1) + diag_adder(diagnostic_op::warn_DB005(range(position(ll.m_lineno, 0)))); + + if (!consume_words_advance_to_next(it, it_e, is_crd)) + { + diag_adder(diagnostic_op::warn_DB006(range(position(ll.m_lineno, 0)))); + break; + } + if (label.empty()) label = " "; // best matches the observed behavior - if (!process_sql_type_operands(m_operands, label) && m_diags) - m_diags->add_diagnostic(diagnostic_op::error_DB004(range(position(lineno, 0)))); + if (!process_sql_type_operands(label, it, it_e)) + diag_adder(diagnostic_op::error_DB004(range(position(ll.m_lineno, 0)))); + break; + + default: break; } + + return args; } - static bool sql_has_codegen(std::string_view sql) + bool sql_has_codegen( + const lexing::logical_line::const_iterator& it, const lexing::logical_line::const_iterator& it_e) const { // handles only the most obvious cases (imprecisely) - static const auto no_code_statements = - std::regex("(?:DECLARE|WHENEVER|BEGIN[ ]+DECLARE[ ]+SECTION|END[ ]+DECLARE[ ]+SECTION)(?: .*)?", - std::regex_constants::icase); - return !std::regex_match(sql.begin(), sql.end(), no_code_statements); + static const auto no_code_statements = std::regex( + "(?:DECLARE|WHENEVER|BEGIN(?:[ ]|--)+DECLARE(?:[ ]|--)+SECTION|END(?:[ ]|--)+DECLARE(?:[ ]|--)+SECTION)" + "(?: .*)?", + std::regex_constants::icase); + return !std::regex_match(it, it_e, no_code_statements); } + void generate_sql_code_mock(size_t in_params) { // this function generates semi-realistic sql statement replacement code, because people do strange things... - // output parameters + // input parameters m_result.emplace_back(replaced_line { " BRAS 15,*+56 \n" }); m_result.emplace_back(replaced_line { " DC H'0',X'0000',H'0' \n" }); m_result.emplace_back(replaced_line { " DC XL8'0000000000000000' \n" }); @@ -765,9 +1056,11 @@ class db2_preprocessor final : public preprocessor // TODO Take DBCS into accoun } } - void generate_replacement(line_iterator it, line_iterator end, bool include_allowed) + void generate_replacement( + line_iterator it, line_iterator end, db2_logical_line_helper& ll_helper, bool include_allowed) { bool skip_continuation = false; + while (it != end) { const auto text = it->text(); @@ -777,8 +1070,11 @@ class db2_preprocessor final : public preprocessor // TODO Take DBCS into accoun skip_continuation = is_continued(text); continue; } - auto [instruction, first_line_skipped, label] = check_line(text); - if (instruction == line_type::ignore) + + auto lineno = it->lineno(); // TODO: needs to be addressed for chained preprocessors + + auto [instruction_type, label_nr, instruction_nr] = check_line(text, lineno.value_or(0)); + if (instruction_type == line_type::ignore) { m_result.emplace_back(*it++); skip_continuation = is_continued(text); @@ -787,13 +1083,26 @@ class db2_preprocessor final : public preprocessor // TODO Take DBCS into accoun m_source_translated = true; - m_logical_line.clear(); + it = ll_helper.reinit(it, end, lineno.value_or(0)); - size_t lineno = it->lineno().value_or(0); // TODO: needs to be addressed for chained preprocessors + auto args = process_nonempty_line( + ll_helper, instruction_nr.r.end.column, include_allowed, instruction_type, label_nr.name); - it = extract_nonempty_logical_line(m_logical_line, it, end, lexing::default_ictl); - - process_nonempty_line(lineno, include_allowed, instruction, first_line_skipped, label); + if (lineno.has_value()) + { + auto stmt = std::make_shared( + semantics::preproc_details { + semantics::text_range( + ll_helper.m_orig_ll.begin(), ll_helper.m_orig_ll.end(), ll_helper.m_lineno), + std::move(label_nr), + std::move(instruction_nr) }, + instruction_type == line_type::include); + + do_highlighting(*stmt, ll_helper.m_orig_ll, m_src_proc); + + stmt->m_details.operands = std::move(args); + set_statement(std::move(stmt)); + } } } @@ -812,7 +1121,7 @@ class db2_preprocessor final : public preprocessor // TODO Take DBCS into accoun // ignores ICTL inject_SQLSECT(); - generate_replacement(it, end, true); + generate_replacement(it, end, m_ll_helper, true); if (m_source_translated || !m_conditional) return document(std::move(m_result)); @@ -820,12 +1129,47 @@ class db2_preprocessor final : public preprocessor // TODO Take DBCS into accoun return doc; } + void do_highlighting(const semantics::preprocessor_statement_si& stmt, + const lexing::logical_line& ll, + semantics::source_info_processor& src_proc, + size_t continue_column = 15) const override + { + preprocessor::do_highlighting(stmt, ll, src_proc, continue_column); + + for (size_t i = 0, lineno = stmt.m_details.stmt_r.start.line, line_start_column = 0; + i < m_ll_helper.m_db2_ll.segments.size(); + ++i, ++lineno, std::exchange(line_start_column, continue_column)) + { + const auto& code = m_ll_helper.m_db2_ll.segments[i].code; + auto comment_start_column = line_start_column + code.length(); + + if (const auto& comment = m_ll_helper.m_comments[i]; comment.has_value()) + { + comment_start_column -= 2; // Compensate for code part having the '--' separator while comment part not + src_proc.add_hl_symbol(token_info(range(position(lineno, comment_start_column), + position(lineno, comment_start_column + comment->length() + 2)), + semantics::hl_scopes::remark)); + } + + if (!code.empty()) + if (auto operand_start_column = i == 0 ? stmt.m_details.instruction.r.end.column : continue_column; + operand_start_column < comment_start_column) + src_proc.add_hl_symbol(token_info( + range(position(lineno, operand_start_column), position(lineno, comment_start_column)), + semantics::hl_scopes::operand)); + } + } + public: - db2_preprocessor(const db2_preprocessor_options& opts, library_fetcher libs, diagnostic_op_consumer* diags) + db2_preprocessor(const db2_preprocessor_options& opts, + library_fetcher libs, + diagnostic_op_consumer* diags, + semantics::source_info_processor& src_proc) : m_version(opts.version) , m_conditional(opts.conditional) , m_libs(std::move(libs)) , m_diags(diags) + , m_src_proc(src_proc) {} }; } // namespace @@ -835,7 +1179,7 @@ std::unique_ptr preprocessor::create(const db2_preprocessor_option diagnostic_op_consumer* diags, semantics::source_info_processor& src_proc) { - return std::make_unique(opts, std::move(libs), diags); + return std::make_unique(opts, std::move(libs), diags, src_proc); } } // namespace hlasm_plugin::parser_library::processing diff --git a/parser_library/src/processing/preprocessors/preprocessor_utils.cpp b/parser_library/src/processing/preprocessors/preprocessor_utils.cpp index a743a2e38..f9eba69fd 100644 --- a/parser_library/src/processing/preprocessors/preprocessor_utils.cpp +++ b/parser_library/src/processing/preprocessors/preprocessor_utils.cpp @@ -74,8 +74,7 @@ std::string_view extract_operand_and_argument(std::string_view s) std::pair remove_separators(std::string_view s) { - size_t trimmed = 0; - std::tie(s, trimmed) = hlasm_plugin::utils::trim_left(s); + auto trimmed = hlasm_plugin::utils::trim_left(s); if (!s.empty() && s.front() == ',') { s.remove_prefix(1); @@ -160,35 +159,27 @@ std::shared_ptr get_preproc_statement( } if (matches[ids.operands].length()) - { - auto [ops_text, ops_range] = get_stmt_part_name_range(matches, ids.operands, rp); - details.operands.items = - get_operands_list(ops_text, std::distance(matches[0].first, matches[ids.operands].first), rp); - details.operands.overall_r = std::move(ops_range); - } + details.operands = get_operands_list(get_stmt_part_name_range(matches, ids.operands, rp).name, + std::distance(matches[0].first, matches[ids.operands].first), + rp); if (ids.remarks && matches[*ids.remarks].length()) - { - details.remarks.overall_r = get_stmt_part_name_range(matches, *ids.remarks, rp).r; - details.remarks.items.emplace_back(details.remarks.overall_r); - } + details.remarks.emplace_back(get_stmt_part_name_range(matches, *ids.remarks, rp).r); return std::make_shared(std::move(details)); } -template std::shared_ptr -get_preproc_statement( - const std::match_results& matches, +template std::shared_ptr +get_preproc_statement( + const std::match_results& matches, const stmt_part_ids& ids, size_t lineno, size_t continuation_column); -template std::shared_ptr -get_preproc_statement( - const std::match_results& matches, +template std::shared_ptr +get_preproc_statement( + const std::match_results& matches, const stmt_part_ids& ids, size_t lineno, size_t continuation_column); - - } // namespace hlasm_plugin::parser_library::processing diff --git a/parser_library/src/processing/preprocessors/preprocessor_utils.h b/parser_library/src/processing/preprocessors/preprocessor_utils.h index 982255491..f55a78d37 100644 --- a/parser_library/src/processing/preprocessors/preprocessor_utils.h +++ b/parser_library/src/processing/preprocessors/preprocessor_utils.h @@ -43,6 +43,19 @@ template std::shared_ptr get_preproc_statement( const std::match_results& matches, const stmt_part_ids& ids, size_t lineno, size_t continue_column = 15); +template +static std::true_type same_line_detector(const It& t, decltype(t.same_line(t)) = false); +static std::false_type same_line_detector(...); + +template +static bool same_line(const It& l, const It& r) +{ + if constexpr (decltype(same_line_detector(l))::value) + return l.same_line(r); + else + return true; +} + } // namespace hlasm_plugin::parser_library::processing #endif \ No newline at end of file diff --git a/parser_library/src/processing/statement_analyzers/lsp_analyzer.cpp b/parser_library/src/processing/statement_analyzers/lsp_analyzer.cpp index ee147d14b..231b2c3d6 100644 --- a/parser_library/src/processing/statement_analyzers/lsp_analyzer.cpp +++ b/parser_library/src/processing/statement_analyzers/lsp_analyzer.cpp @@ -126,7 +126,7 @@ void lsp_analyzer::analyze(const semantics::preprocessor_statement_si& statement { collect_occurences(lsp::occurence_kind::ORD, statement); - if (const auto& operands = statement.m_details.operands.items; statement.m_copylike && operands.size() == 1) + if (const auto& operands = statement.m_details.operands; statement.m_copylike && operands.size() == 1) add_copy_operand(hlasm_ctx_.ids().add(operands.front().name), operands.front().r, false); assign_statement_occurences(hlasm_ctx_.opencode_location()); @@ -230,7 +230,7 @@ void lsp_analyzer::collect_occurences(lsp::occurence_kind kind, const semantics: details.instruction.r, evaluated_model); - for (const auto& ops : details.operands.items) + for (const auto& ops : details.operands) collector.occurences.emplace_back( lsp::occurence_kind::ORD, hlasm_ctx_.ids().add(ops.name), ops.r, evaluated_model); } diff --git a/parser_library/src/semantics/range_provider.cpp b/parser_library/src/semantics/range_provider.cpp index 6de011864..c978fe559 100644 --- a/parser_library/src/semantics/range_provider.cpp +++ b/parser_library/src/semantics/range_provider.cpp @@ -14,8 +14,10 @@ #include "range_provider.h" +#include + using namespace hlasm_plugin::parser_library; -using namespace hlasm_plugin::parser_library::semantics; +namespace hlasm_plugin::parser_library::semantics { range range_provider::get_range(const antlr4::Token* start, const antlr4::Token* stop) const { @@ -157,8 +159,7 @@ range_provider::range_provider(range original_field_range, assert(original_operand_ranges.empty() || original_range.start == original_operand_ranges.front().start); } -hlasm_plugin::parser_library::semantics::range_provider::range_provider( - std::vector, range>> ms) +range_provider::range_provider(std::vector, range>> ms) : model_substitutions(std::move(ms)) , state(adjusting_state::MODEL_REPARSE) { @@ -169,3 +170,20 @@ range_provider::range_provider() : original_range() , state(adjusting_state::NONE) {} + +range text_range( + const lexing::logical_line::const_iterator& b, const lexing::logical_line::const_iterator& e, size_t lineno_offset) +{ + assert(std::distance(b, e) >= 0); + + const auto [bx, by] = b.get_coordinates(); + position b_pos(by + lineno_offset, bx); + if (b == e) // empty range + return range(std::move(b_pos)); + else + { + const auto [ex, ey] = std::prev(e).get_coordinates(); + return range(std::move(b_pos), position(ey + lineno_offset, ex + 1)); + } +} +} // namespace hlasm_plugin::parser_library::semantics \ No newline at end of file diff --git a/parser_library/src/semantics/range_provider.h b/parser_library/src/semantics/range_provider.h index dddc7a44d..377ed0ceb 100644 --- a/parser_library/src/semantics/range_provider.h +++ b/parser_library/src/semantics/range_provider.h @@ -20,6 +20,7 @@ #include "antlr4-runtime.h" +#include "lexing/logical_line.h" #include "range.h" namespace hlasm_plugin::parser_library::semantics { @@ -64,5 +65,8 @@ struct range_provider position adjust_model_position(position pos, bool end) const; }; +range text_range( + const lexing::logical_line::const_iterator& b, const lexing::logical_line::const_iterator& e, size_t lineno_offset); + } // namespace hlasm_plugin::parser_library::semantics #endif diff --git a/parser_library/src/semantics/statement.cpp b/parser_library/src/semantics/statement.cpp index 671a97ddf..67edcf578 100644 --- a/parser_library/src/semantics/statement.cpp +++ b/parser_library/src/semantics/statement.cpp @@ -22,8 +22,4 @@ endevor_statement_si::endevor_statement_si(preproc_details details) m_details.instruction.name = "-INC"; } -cics_statement_si::cics_statement_si(preproc_details details) - : preprocessor_statement_si(std::move(details), false) -{} - } // namespace hlasm_plugin::parser_library::semantics \ No newline at end of file diff --git a/parser_library/src/semantics/statement.h b/parser_library/src/semantics/statement.h index f646d63a2..be39ebc9d 100644 --- a/parser_library/src/semantics/statement.h +++ b/parser_library/src/semantics/statement.h @@ -175,18 +175,11 @@ struct preproc_details bool operator==(const name_range&) const = default; }; - template - struct item_list - { - std::vector items; - range overall_r; - }; - range stmt_r; name_range label; name_range instruction; - item_list operands; - item_list remarks; + std::vector operands; + std::vector remarks; }; struct preprocessor_statement_si @@ -194,7 +187,7 @@ struct preprocessor_statement_si preproc_details m_details; const bool m_copylike; - preprocessor_statement_si(preproc_details details, bool copylike) + preprocessor_statement_si(preproc_details details, bool copylike = false) : m_details(std::move(details)) , m_copylike(copylike) {} @@ -205,11 +198,6 @@ struct endevor_statement_si : public preprocessor_statement_si explicit endevor_statement_si(preproc_details details); }; -struct cics_statement_si : public preprocessor_statement_si -{ - explicit cics_statement_si(preproc_details details); -}; - } // namespace hlasm_plugin::parser_library::semantics #endif diff --git a/parser_library/test/lexing/CMakeLists.txt b/parser_library/test/lexing/CMakeLists.txt index 0cd60f7dc..bcd63687a 100644 --- a/parser_library/test/lexing/CMakeLists.txt +++ b/parser_library/test/lexing/CMakeLists.txt @@ -12,6 +12,7 @@ target_sources(library_test PRIVATE lexer_test.cpp + logical_line_iterator_test.cpp logical_line_test.cpp ) diff --git a/parser_library/test/lexing/logical_line_iterator_test.cpp b/parser_library/test/lexing/logical_line_iterator_test.cpp new file mode 100644 index 000000000..cbe16b9b2 --- /dev/null +++ b/parser_library/test/lexing/logical_line_iterator_test.cpp @@ -0,0 +1,190 @@ +/* + * Copyright (c) 2023 Broadcom. + * The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries. + * + * This program and the accompanying materials are made + * available under the terms of the Eclipse Public License 2.0 + * which is available at https://www.eclipse.org/legal/epl-2.0/ + * + * SPDX-License-Identifier: EPL-2.0 + * + * Contributors: + * Broadcom, Inc. - initial API and implementation + */ + +#include +#include +#include +#include + +#include "gtest/gtest.h" + +#include "lexing/logical_line.h" + +using namespace hlasm_plugin::parser_library::lexing; + +namespace { +class logical_line_iterator_fixture : public ::testing::TestWithParam> +{}; +} // namespace + +TEST_P(logical_line_iterator_fixture, general_behavior) +{ + logical_line line; + const auto& parm = GetParam(); + std::transform(parm.begin(), parm.end(), std::back_inserter(line.segments), [](const auto& c) { + logical_line_segment lls; + lls.code = c; + return lls; + }); + + std::string concat_parm; + for (auto p : parm) + concat_parm.append(p); + + EXPECT_TRUE(std::equal(line.begin(), line.end(), concat_parm.begin(), concat_parm.end())); + EXPECT_TRUE(std::equal(std::make_reverse_iterator(line.end()), + std::make_reverse_iterator(line.begin()), + concat_parm.rbegin(), + concat_parm.rend())); +} + +INSTANTIATE_TEST_SUITE_P(logical_line, + logical_line_iterator_fixture, + ::testing::ValuesIn(std::vector { + std::vector {}, + std::vector { "" }, + std::vector { "", "" }, + std::vector { "a", "b" }, + std::vector { "", "b" }, + std::vector { "a", "" }, + std::vector { "a", "", "c" }, + std::vector { "a", "", "c", "" }, + std::vector { "a", "", "c", "", "e" }, + std::vector { "", "", "abc", "", "", "def", "", "", "ghi", "", "" }, + })); + +namespace { +class logical_line_iterator_coordinates_test : public testing::Test +{ +public: + logical_line_iterator_coordinates_test(std::string_view input) + : m_input(std::move(input)) + {} + + void SetUp() override { ASSERT_TRUE(extract_logical_line(m_line, m_input, default_ictl)); } + +protected: + logical_line m_line; + std::string_view m_input; +}; + +class logical_line_iterator_coordinates_singleline : public logical_line_iterator_coordinates_test +{ +public: + logical_line_iterator_coordinates_singleline() + : logical_line_iterator_coordinates_test("123456") + {} +}; +} // namespace + +TEST_F(logical_line_iterator_coordinates_singleline, unchanged_code_part) +{ + auto expected = std::pair(0, 0); + EXPECT_EQ(m_line.begin().get_coordinates(), expected); + EXPECT_EQ(m_line.end().get_coordinates(), expected); + + expected = std::pair(3, 0); + EXPECT_EQ(std::next(m_line.begin(), 3).get_coordinates(), expected); + + expected = std::pair(5, 0); + EXPECT_EQ(std::prev(m_line.end()).get_coordinates(), expected); +} + +TEST_F(logical_line_iterator_coordinates_singleline, removed_code_suffix) +{ + m_line.segments.front().code.remove_suffix(3); + + auto expected = std::pair(0, 0); + EXPECT_EQ(m_line.begin().get_coordinates(), expected); + + expected = std::pair(2, 0); + EXPECT_EQ(std::prev(m_line.end()).get_coordinates(), expected); +} + +namespace { +class logical_line_iterator_coordinates_multiline : public logical_line_iterator_coordinates_test +{ +public: + logical_line_iterator_coordinates_multiline() + : logical_line_iterator_coordinates_test(m_input) + {} + +private: + inline static const std::string_view m_input = + R"( EXEC SQL X00004000 + --comment X + SELECT X + 1 --rem X00050000 + INTO :B X + FROM X + SYSIBM.SYSDUMMY1)"; +}; + +} // namespace + +TEST_F(logical_line_iterator_coordinates_multiline, unchanged_code_part) +{ + auto expected = std::pair(0, 0); + EXPECT_EQ(m_line.begin().get_coordinates(), expected); + + expected = std::pair(18, 2); + EXPECT_EQ(std::next(m_line.begin(), 130).get_coordinates(), expected); + + expected = std::pair(30, 6); + EXPECT_EQ(std::prev(m_line.end()).get_coordinates(), expected); +} + +TEST_F(logical_line_iterator_coordinates_multiline, empty_all_lines) +{ + std::for_each(m_line.segments.begin(), m_line.segments.end(), [](auto& s) { s.code = {}; }); + + auto expected = std::pair(0, 0); + EXPECT_EQ(m_line.begin().get_coordinates(), expected); + EXPECT_EQ(m_line.end().get_coordinates(), expected); +} + +TEST_F(logical_line_iterator_coordinates_multiline, empty_last_line) +{ + m_line.segments.back().code = {}; + + auto expected = std::pair(0, 0); + EXPECT_EQ(m_line.begin().get_coordinates(), expected); + + expected = std::pair(70, 5); + EXPECT_EQ(std::prev(m_line.end()).get_coordinates(), expected); +} + +TEST_F(logical_line_iterator_coordinates_multiline, empty_some_lines) +{ + m_line.segments[1].code = {}; + m_line.segments[3].code.remove_suffix(46); + + auto expected = std::pair(0, 0); + EXPECT_EQ(m_line.begin().get_coordinates(), expected); + + expected = std::pair(70, 0); + EXPECT_EQ(std::next(m_line.begin(), 70).get_coordinates(), expected); + + expected = std::pair(15, 2); + EXPECT_EQ(std::next(m_line.begin(), 71).get_coordinates(), expected); + + expected = std::pair(70, 4); + EXPECT_EQ(std::next(m_line.begin(), 192).get_coordinates(), expected); + + expected = std::pair(15, 5); + EXPECT_EQ(std::next(m_line.begin(), 193).get_coordinates(), expected); + + expected = std::pair(30, 6); + EXPECT_EQ(std::prev(m_line.end()).get_coordinates(), expected); +} diff --git a/parser_library/test/lexing/logical_line_test.cpp b/parser_library/test/lexing/logical_line_test.cpp index 64708194f..d2583b3f4 100644 --- a/parser_library/test/lexing/logical_line_test.cpp +++ b/parser_library/test/lexing/logical_line_test.cpp @@ -12,11 +12,9 @@ * Broadcom, Inc. - initial API and implementation */ -#include #include -#include -#include -#include +#include +#include #include "gtest/gtest.h" @@ -230,42 +228,3 @@ TEST(logical_line, eol) EXPECT_EQ(t.first.size(), 0); } } - -class logical_line_iterator_fixture : public ::testing::TestWithParam> -{}; - -TEST_P(logical_line_iterator_fixture, iterator) -{ - logical_line line; - const auto& parm = GetParam(); - std::transform(parm.begin(), parm.end(), std::back_inserter(line.segments), [](const auto& c) { - logical_line_segment lls; - lls.code = c; - return lls; - }); - - std::string concat_parm; - for (auto p : parm) - concat_parm.append(p); - - EXPECT_TRUE(std::equal(line.begin(), line.end(), concat_parm.begin(), concat_parm.end())); - EXPECT_TRUE(std::equal(std::make_reverse_iterator(line.end()), - std::make_reverse_iterator(line.begin()), - concat_parm.rbegin(), - concat_parm.rend())); -} - -INSTANTIATE_TEST_SUITE_P(logical_line, - logical_line_iterator_fixture, - ::testing::ValuesIn(std::vector { - std::vector {}, - std::vector { "" }, - std::vector { "", "" }, - std::vector { "a", "b" }, - std::vector { "", "b" }, - std::vector { "a", "" }, - std::vector { "a", "", "c" }, - std::vector { "a", "", "c", "" }, - std::vector { "a", "", "c", "", "e" }, - std::vector { "", "", "abc", "", "", "def", "", "", "ghi", "", "" }, - })); diff --git a/parser_library/test/lsp/lsp_context_preprocessor_test.cpp b/parser_library/test/lsp/lsp_context_preprocessor_test.cpp index 6f83114c4..2ec8081a9 100644 --- a/parser_library/test/lsp/lsp_context_preprocessor_test.cpp +++ b/parser_library/test/lsp/lsp_context_preprocessor_test.cpp @@ -35,29 +35,61 @@ const resource_location mac_loc("MAC"); const resource_location source_loc("OPEN"); const resource_location member_loc("MEMBER"); const resource_location member2_loc("MEMBER2"); -} // namespace -class lsp_context_endevor_preprocessor_test : public testing::Test +const std::vector> member_list { + { "MEMBER", R"(R2 EQU 2 + LR R2,R2)" }, + { "MEMBER2", R"(R5 EQU 5 + LR R5,R5)" }, +}; + +class lsp_context_preprocessor_test : public testing::Test { public: - lsp_context_endevor_preprocessor_test() - : lib_provider({ { "MEMBER", R"(R2 EQU 2 - LR R2,R2)" }, - { "MEMBER2", R"(R5 EQU 5 - LR R5,R5)" } }) - , a(contents, analyzer_options { source_loc, &lib_provider, endevor_preprocessor_options() }) {}; + lsp_context_preprocessor_test(const std::string& contents, + std::shared_ptr lib_provider, + preprocessor_options preproc_options) + : lib_provider(lib_provider) + , a(contents, analyzer_options { source_loc, lib_provider.get(), preproc_options }) + {} void SetUp() override { a.analyze(); } protected: - const std::string contents = + std::shared_ptr lib_provider; + analyzer a; + + std::optional find_preproc_file(std::string_view name) + { + const auto& files = a.hlasm_ctx().get_visited_files(); + + auto it = std::find_if( + files.begin(), files.end(), [name](const resource_location& f) { return f.get_uri().ends_with(name); }); + + return it != files.end() ? std::make_optional(*it) : std::nullopt; + } + + inline bool reloc_symbol_checker(std::string_view source) + { + return source.find("Relocatable Symbol") != std::string_view::npos; + }; +}; +} // namespace + +class lsp_context_endevor_preprocessor_test : public lsp_context_preprocessor_test +{ +private: + inline static const std::string contents = R"( -INC MEMBER blabla ++INCLUDE MEMBER blabla -INC MEMBER2)"; - mock_parse_lib_provider lib_provider; - analyzer a; +public: + lsp_context_endevor_preprocessor_test() + : lsp_context_preprocessor_test( + contents, std::make_shared(member_list), endevor_preprocessor_options()) + {} }; namespace { @@ -132,36 +164,10 @@ TEST_F(lsp_context_endevor_preprocessor_test, refs) has_same_content(expected_blabla_locations, a.context().lsp_ctx->references(source_loc, position(2, 21)))); } -class lsp_context_cics_preprocessor_test : public testing::Test +class lsp_context_cics_preprocessor_test : public lsp_context_preprocessor_test { -public: - lsp_context_cics_preprocessor_test() - : a(contents, analyzer_options { source_loc, cics_preprocessor_options() }) {}; - - void SetUp() override - { - a.analyze(); - - preproc1_loc = find_preproc_file("PREPROCESSOR_1.hlasm"); - preproc6_loc = find_preproc_file("PREPROCESSOR_6.hlasm"); - - ASSERT_TRUE(preproc1_loc.has_value()); - ASSERT_TRUE(preproc6_loc.has_value()); - } - private: - std::optional find_preproc_file(std::string_view name) - { - const auto& files = a.hlasm_ctx().get_visited_files(); - - auto it = std::find_if( - files.begin(), files.end(), [name](const resource_location& f) { return f.get_uri().ends_with(name); }); - - return it != files.end() ? std::make_optional(*it) : std::nullopt; - } - -protected: - const std::string contents = + inline static const std::string contents = R"( A EXEC CICS ABEND ABCODE('1234') NODUMP EXEC CICS ALLOCATE SYSID('4321') NOQUEUE @@ -175,9 +181,26 @@ B LARL 0,DFHRESP(NORMAL) LARL 1,A LARL 1,B)"; - analyzer a; +protected: std::optional preproc1_loc; std::optional preproc6_loc; + +public: + lsp_context_cics_preprocessor_test() + : lsp_context_preprocessor_test( + contents, std::make_shared(), cics_preprocessor_options()) + {} + + void SetUp() override + { + lsp_context_preprocessor_test::SetUp(); + + preproc1_loc = find_preproc_file("PREPROCESSOR_1.hlasm"); + preproc6_loc = find_preproc_file("PREPROCESSOR_6.hlasm"); + + ASSERT_TRUE(preproc1_loc.has_value()); + ASSERT_TRUE(preproc6_loc.has_value()); + } }; TEST_F(lsp_context_cics_preprocessor_test, go_to) @@ -295,4 +318,213 @@ TEST_F(lsp_context_cics_preprocessor_test, refs_dfh) expected_dfhvalue_busy_locations, a.context().lsp_ctx->references(source_loc, position(7, 25)))); } -// TODO: hover for DFHVALUE and DHFRESP values \ No newline at end of file +// TODO: hover for DFHVALUE and DHFRESP values + +class lsp_context_db2_preprocessor_test : public lsp_context_preprocessor_test +{ +protected: + inline static const std::string contents = + R"( +A EXEC SQL INCLUDE MEMBER +B EXEC SQL INCLUDE sqlca +C EXEC SQL INCLUDE SqLdA)"; + + std::optional preproc1_loc; + +public: + lsp_context_db2_preprocessor_test() + : lsp_context_preprocessor_test( + contents, std::make_shared(member_list), db2_preprocessor_options()) + {} + + void SetUp() override + { + a.analyze(); + preproc1_loc = find_preproc_file("PREPROCESSOR_1.hlasm"); + + ASSERT_TRUE(preproc1_loc.has_value()); + } +}; + +TEST_F(lsp_context_db2_preprocessor_test, go_to_label) +{ + // jump to virtual file, label A + EXPECT_EQ(location(position(0, 0), *preproc1_loc), a.context().lsp_ctx->definition(source_loc, position(1, 1))); + // jump to virtual file, label B + EXPECT_EQ(location(position(6, 0), *preproc1_loc), a.context().lsp_ctx->definition(source_loc, position(2, 0))); + // jump to virtual file, label C + EXPECT_EQ(location(position(32, 0), *preproc1_loc), a.context().lsp_ctx->definition(source_loc, position(3, 1))); +} + +TEST_F(lsp_context_db2_preprocessor_test, go_to_exec_sql) +{ + // no jump, EXEC SQL + EXPECT_EQ(location(position(1, 12), source_loc), a.context().lsp_ctx->definition(source_loc, position(1, 12))); + // no jump, EXEC SQL + EXPECT_EQ(location(position(2, 12), source_loc), a.context().lsp_ctx->definition(source_loc, position(2, 12))); + // no jump, EXEC SQL + EXPECT_EQ(location(position(3, 12), source_loc), a.context().lsp_ctx->definition(source_loc, position(3, 12))); +} + +TEST_F(lsp_context_db2_preprocessor_test, go_to_include) +{ + // jump from source to MEMBER + EXPECT_EQ(location(position(0, 0), member_loc), a.context().lsp_ctx->definition(source_loc, position(1, 29))); + // jump from source to virtual file - SQLCA + EXPECT_EQ(location(position(10, 0), *preproc1_loc), a.context().lsp_ctx->definition(source_loc, position(2, 29))); + // jump from source to virtual file - SQLDA + EXPECT_EQ(location(position(42, 0), *preproc1_loc), a.context().lsp_ctx->definition(source_loc, position(3, 29))); +} + +TEST_F(lsp_context_db2_preprocessor_test, refs_label) +{ + const location_list expected_a_locations { + location(position(1, 0), source_loc), + location(position(0, 0), *preproc1_loc), + }; + const location_list expected_b_locations { + location(position(2, 0), source_loc), + location(position(6, 0), *preproc1_loc), + }; + const location_list expected_c_locations { + location(position(3, 0), source_loc), + location(position(32, 0), *preproc1_loc), + }; + + // A reference + EXPECT_TRUE(has_same_content(expected_a_locations, a.context().lsp_ctx->references(source_loc, position(1, 1)))); + // B reference + EXPECT_TRUE(has_same_content(expected_b_locations, a.context().lsp_ctx->references(source_loc, position(2, 0)))); + // C reference + EXPECT_TRUE(has_same_content(expected_c_locations, a.context().lsp_ctx->references(source_loc, position(3, 1)))); +} + + +TEST_F(lsp_context_db2_preprocessor_test, refs_exec_sql) +{ + const location_list expected_exec_sql_locations { + location(position(1, 7), source_loc), + location(position(2, 8), source_loc), + location(position(3, 6), source_loc), + }; + + // EXEC SQL reference + EXPECT_TRUE( + has_same_content(expected_exec_sql_locations, a.context().lsp_ctx->references(source_loc, position(1, 12)))); + // EXEC SQL reference + EXPECT_TRUE( + has_same_content(expected_exec_sql_locations, a.context().lsp_ctx->references(source_loc, position(2, 12)))); + // EXEC SQL reference + EXPECT_TRUE( + has_same_content(expected_exec_sql_locations, a.context().lsp_ctx->references(source_loc, position(3, 12)))); +} + +TEST_F(lsp_context_db2_preprocessor_test, refs_include) +{ + const location_list expected_member_locations { + location(position(1, 29), source_loc), + }; + const location_list expected_sqlca_locations { + location(position(2, 28), source_loc), + location(position(10, 0), *preproc1_loc), + }; + const location_list expected_sqlda_locations { + location(position(3, 24), source_loc), + location(position(42, 0), *preproc1_loc), + location(position(60, 0), *preproc1_loc), + }; + + // MEMBER reference + EXPECT_TRUE( + has_same_content(expected_member_locations, a.context().lsp_ctx->references(source_loc, position(1, 29)))); + // SQLCA reference + EXPECT_TRUE( + has_same_content(expected_sqlca_locations, a.context().lsp_ctx->references(source_loc, position(2, 29)))); + // SQLDA reference + EXPECT_TRUE( + has_same_content(expected_sqlda_locations, a.context().lsp_ctx->references(source_loc, position(3, 29)))); +} + +TEST_F(lsp_context_db2_preprocessor_test, hover_label) +{ + // A + EXPECT_TRUE(reloc_symbol_checker(a.context().lsp_ctx->hover(source_loc, position(1, 1)))); + // B + EXPECT_TRUE(reloc_symbol_checker(a.context().lsp_ctx->hover(source_loc, position(2, 0)))); + // C + EXPECT_TRUE(reloc_symbol_checker(a.context().lsp_ctx->hover(source_loc, position(3, 1)))); +} + +class lsp_context_db2_preprocessor_exec_sql_args_test : public lsp_context_preprocessor_test +{ +protected: + inline static const std::string contents = + R"( + USING *,12 + USING SQLDSECT,11 + EXEC SQL INCLUDE SQLCA + EXEC SQL SELECT 1 INX + TO : --RM ZYX + XWV FROM TABLE WHERE X = :ABCDE + +ZYXWV DS F +XWV DS F +ABCDE DS F + END +)"; + +public: + lsp_context_db2_preprocessor_exec_sql_args_test() + : lsp_context_preprocessor_test( + contents, std::make_shared(member_list), db2_preprocessor_options()) + {} +}; + +TEST_F(lsp_context_db2_preprocessor_exec_sql_args_test, go_to) +{ + // XWV + EXPECT_EQ(location(position(9, 0), source_loc), a.context().lsp_ctx->definition(source_loc, position(6, 17))); + // ABCDE + EXPECT_EQ(location(position(10, 0), source_loc), a.context().lsp_ctx->definition(source_loc, position(6, 48))); + + // ZY - no jump + EXPECT_EQ(location(position(5, 71), source_loc), a.context().lsp_ctx->definition(source_loc, position(5, 71))); +} + +TEST_F(lsp_context_db2_preprocessor_exec_sql_args_test, refs) +{ + const location_list expected_zyxwv_locations { + location(position(8, 0), source_loc), + }; + const location_list expected_xwv_locations { + location(position(6, 15), source_loc), + location(position(9, 0), source_loc), + }; + const location_list expected_abcde_locations { + location(position(6, 43), source_loc), + location(position(10, 0), source_loc), + }; + + // ZYXWV reference + EXPECT_TRUE( + has_same_content(expected_zyxwv_locations, a.context().lsp_ctx->references(source_loc, position(8, 1)))); + // XWV reference + EXPECT_TRUE(has_same_content(expected_xwv_locations, a.context().lsp_ctx->references(source_loc, position(6, 17)))); + // ABCDE reference + EXPECT_TRUE( + has_same_content(expected_abcde_locations, a.context().lsp_ctx->references(source_loc, position(6, 48)))); + + // ZY reference + EXPECT_TRUE(a.context().lsp_ctx->references(source_loc, position(5, 70)).empty()); +} + +TEST_F(lsp_context_db2_preprocessor_exec_sql_args_test, hover) +{ + // XWV + EXPECT_TRUE(reloc_symbol_checker(a.context().lsp_ctx->hover(source_loc, position(6, 17)))); + // ABCDE + EXPECT_TRUE(reloc_symbol_checker(a.context().lsp_ctx->hover(source_loc, position(6, 48)))); + + // ZY + EXPECT_TRUE(a.context().lsp_ctx->hover(source_loc, position(5, 71)).empty()); +} diff --git a/parser_library/test/processing/db2_preprocessor_test.cpp b/parser_library/test/processing/db2_preprocessor_test.cpp index 58ecfa0c7..70a68b4a1 100644 --- a/parser_library/test/processing/db2_preprocessor_test.cpp +++ b/parser_library/test/processing/db2_preprocessor_test.cpp @@ -211,7 +211,7 @@ TEST_F(db2_preprocessor_test, bad_continuation) auto p = create_preprocessor( db2_preprocessor_options {}, [](std::string_view) { return std::nullopt; }, &m_diags); - std::string_view text = R"( EXEC SQL PRETENT SQL STATEMENT X + std::string_view text = R"( EXEC SQL PRETEND SQL STATEMENT X badcontinuation)"; auto doc = p->generate_replacement(document(text)); @@ -252,6 +252,20 @@ TEST(db2_preprocessor, sqlsect_available) EXPECT_EQ(a.diags().size(), (size_t)0); } +TEST(db2_preprocessor, instruction_not_recognized) +{ + std::string input = R"( + EXEC SQLX + INCLUDE SQLCA +)"; + + analyzer a(input, analyzer_options { db2_preprocessor_options {} }); + a.analyze(); + a.collect_diags(); + + EXPECT_TRUE(matches_message_codes(a.diags(), { "E049" })); +} + TEST(db2_preprocessor, aread_from_preprocessor) { std::string input = R"( @@ -364,20 +378,84 @@ TEST(db2_preprocessor, continuation_in_buffer) EXPECT_TRUE(a.hlasm_ctx().get_visited_files().count(member_loc)); } -TEST(db2_preprocessor, include_empty) +TEST(db2_preprocessor, include_valid) { mock_parse_lib_provider libs({ { "MEMBER", "" }, }); - std::string input = " EXEC SQL INCLUDE MEMBER "; - analyzer a(input, analyzer_options { &libs, db2_preprocessor_options {} }); - a.analyze(); - a.collect_diags(); + std::vector inputs = { + R"( EXEC SQL INCLUDE MEMBER )", + R"( EXEC SQL INCLUDE MEMBER--TMP)", + R"( EXEC SQL INCLUDE MEMBER--)", + R"( EXEC SQL INCLUDE X + MEMBER)", + R"( EXEC SQL INCLUDE -- TMP X + MEMBER)", + R"( EXEC SQL INCLUDE -- COMMENT X + --COMMENT X + MEMBER)", + // R"( EXEC SQLX + // INCLUDE SQLCA)", // TODO Easier to enable this with proper grammar + }; - EXPECT_EQ(a.diags().size(), (size_t)0); + for (const auto& input : inputs) + { + analyzer a(input, analyzer_options { &libs, db2_preprocessor_options {} }); + a.analyze(); + a.collect_diags(); - EXPECT_TRUE(a.hlasm_ctx().get_visited_files().count(member_loc)); + EXPECT_EQ(a.diags().size(), (size_t)0); + + EXPECT_TRUE(a.hlasm_ctx().get_visited_files().count(member_loc)); + } +} + +TEST(db2_preprocessor, include_double) +{ + mock_parse_lib_provider libs({ + { "MEMBER", "" }, + }); + + std::vector inputs = { + R"( EXEC SQL INCLUDE MEMBER MEMBER)", + R"( EXEC SQL INCLUDE MEMBER X + MEMBER)", + R"( EXEC SQL INCLUDE MEMBER X + -- COMMENT X + --COMMENT X + MEMBER)", + }; + + for (const auto& input : inputs) + { + analyzer a(input, analyzer_options { &libs, db2_preprocessor_options {} }); + a.analyze(); + a.collect_diags(); + + EXPECT_TRUE(matches_message_codes(a.diags(), { "DB002" })); + EXPECT_EQ(a.hlasm_ctx().get_visited_files().count(member_loc), 0); + } +} + +TEST(db2_preprocessor, include_member_not_present) +{ + mock_parse_lib_provider libs({ + { "MEMBER", "" }, + }); + std::vector inputs = { R"( EXEC SQL INCLUDE -- MEMBER)", + R"( EXEC SQL INCLUDE -- X + -- MEMBER)" }; + + for (const auto& input : inputs) + { + analyzer a(input, analyzer_options { &libs, db2_preprocessor_options {} }); + a.analyze(); + a.collect_diags(); + + EXPECT_TRUE(matches_message_codes(a.diags(), { "DB007" })); + EXPECT_EQ(a.hlasm_ctx().get_visited_files().count(member_loc), 0); + } } TEST(db2_preprocessor, include_insensitive) @@ -392,7 +470,6 @@ TEST(db2_preprocessor, include_insensitive) a.collect_diags(); EXPECT_EQ(a.diags().size(), (size_t)0); - EXPECT_TRUE(a.hlasm_ctx().get_visited_files().count(member_loc)); } @@ -404,7 +481,21 @@ TEST(db2_preprocessor, include_nonexistent) a.analyze(); a.collect_diags(); - EXPECT_EQ(a.diags().size(), (size_t)1); + EXPECT_TRUE(matches_message_codes(a.diags(), { "DB002" })); +} + +TEST(db2_preprocessor, include_invalid) +{ + mock_parse_lib_provider libs({ + { "MEMBER", "" }, + }); + std::string input = " EXEC SQL INCLUDEMEMBER "; + + analyzer a(input, analyzer_options { &libs, db2_preprocessor_options {} }); + a.analyze(); + a.collect_diags(); + + EXPECT_EQ(a.hlasm_ctx().get_visited_files().count(member_loc), 0); } TEST(db2_preprocessor, ago_in_include) @@ -890,6 +981,81 @@ TEST_F(db2_preprocessor_test, sql_type_warn_on_continuation) EXPECT_TRUE(matches_message_codes(m_diags.diags, { "DB005" })); } +// TODO - issue with msvc regex +// TEST(db2_preprocessor, sql_type_is_table_like_regex) +//{ +// std::string input = R"( +// A SQL TYPE IS TABLE LIKE A X +// A X +// A AS LOCATOR)"; +// +// analyzer a(input, analyzer_options { db2_preprocessor_options {} }); +// a.analyze(); +// +// // No expectations - it should just past +//} + +TEST_F(db2_preprocessor_test, sql_type_parse_and_warn_on_continuation) +{ + std::string_view text = R"( +RE1 SQL TYPE X + IS RESULT_SET_X + LOCATOR VARYING +RE2 SQL TYPE ISX + RESULT_SET_LOCATOR X + VARYING +RE3 SQL TYPE ISX + RESULT_SET_LOCATORX + VARYING +)"; + + + auto p = create_preprocessor( + db2_preprocessor_options {}, [](std::string_view) { return std::nullopt; }, &m_diags); + + auto result = p->generate_replacement(document(text)); + + EXPECT_TRUE(matches_message_codes(m_diags.diags, { "DB005", "DB005", "DB005" })); + EXPECT_EQ(std::count_if(result.begin(), + result.end(), + [](const auto& l) { return l.text().find("DS FL4") != std::string_view::npos; }), + 3); +} + +TEST_F(db2_preprocessor_test, sql_type_dont_parse_and_warn_on_continuation) +{ + std::string_view text = R"( +RE1 SQL TYPE IX + S RESULT_SET_X + LOCATOR VARYING +RE2 SQL TYPE X + IS RESULT_SET_--RX + LOCATOR VARYING +RE3 SQL TYPE ISX + RESULT_SET_LOCATORX + VARYING +)"; + + auto p = create_preprocessor( + db2_preprocessor_options {}, [](std::string_view) { return std::nullopt; }, &m_diags); + + auto result = p->generate_replacement(document(text)); + + EXPECT_TRUE(matches_message_codes(m_diags.diags, + { + "DB005", + "DB006", + "DB005", + "DB004", + "DB005", + "DB004", + })); + EXPECT_EQ(std::count_if(result.begin(), + result.end(), + [](const auto& l) { return l.text().find("DS FL4") != std::string_view::npos; }), + 0); +} + TEST(db2_preprocessor, no_codegen_for_unacceptable_sql_statement) { std::string input = R"( @@ -1006,6 +1172,8 @@ B DS 0C DECLARE C CURSOR FOR SELECT 1 FROM TABLE E DS 0C LEN EQU E-B + EXEC SQL -- comment X + INCLUDE SQLCA )"; analyzer a(input, analyzer_options { db2_preprocessor_options {} }); diff --git a/parser_library/test/semantics/CMakeLists.txt b/parser_library/test/semantics/CMakeLists.txt index 83f58cca4..4cb9275e5 100644 --- a/parser_library/test/semantics/CMakeLists.txt +++ b/parser_library/test/semantics/CMakeLists.txt @@ -14,4 +14,5 @@ target_sources(library_test PRIVATE concatenation_test.cpp highlighting_test.cpp operand_test.cpp + text_range_test.cpp ) diff --git a/parser_library/test/semantics/highlighting_test.cpp b/parser_library/test/semantics/highlighting_test.cpp index f4dadec30..9a2b0b3cf 100644 --- a/parser_library/test/semantics/highlighting_test.cpp +++ b/parser_library/test/semantics/highlighting_test.cpp @@ -19,6 +19,7 @@ #include "gtest/gtest.h" #include "../gtest_stringers.h" +#include "../mock_parse_lib_provider.h" #include "analyzer.h" #include "preprocessor_options.h" #include "protocol.h" @@ -466,4 +467,87 @@ B L 0,DFHRESP ( NORMAL ) bla X00000002 }; EXPECT_EQ(tokens, expected); -} \ No newline at end of file +} + +TEST(highlighting, db2_preprocessor_statement_include) +{ + const std::string contents = R"( +AAA EXEC SQL INCLUDE SQLCA -- REMARK 00000001 + EXEC SQL --REMX00000020 + INCLUDE SQLCA -- rem rem2 00000300 + EXEC SQL X00004000 + SELECT X + 1 --rem X00050000 + INTO :B X + FROM X + SYSIBM.SYSDUMMY1 +B SQL TYPE IS RESULT_SET_LOCATOR VARYING comment comment2 006)"; + + analyzer a( + contents, analyzer_options { source_file_loc, db2_preprocessor_options(), collect_highlighting_info::yes }); + a.analyze(); + + const auto& tokens = a.source_processor().semantic_tokens(); + const semantics::lines_info expected = { + token_info({ { 1, 0 }, { 1, 3 } }, hl_scopes::label), + token_info({ { 1, 4 }, { 1, 13 } }, hl_scopes::instruction), + token_info({ { 1, 13 }, { 1, 31 } }, hl_scopes::operand), + token_info({ { 1, 31 }, { 1, 71 } }, hl_scopes::remark), + token_info({ { 1, 72 }, { 1, 80 } }, hl_scopes::ignored), + token_info({ { 2, 57 }, { 2, 65 } }, hl_scopes::instruction), + token_info({ { 2, 65 }, { 2, 66 } }, hl_scopes::operand), + token_info({ { 2, 66 }, { 2, 71 } }, hl_scopes::remark), + token_info({ { 2, 71 }, { 2, 72 } }, hl_scopes::continuation), + token_info({ { 2, 72 }, { 2, 80 } }, hl_scopes::ignored), + token_info({ { 3, 15 }, { 3, 37 } }, hl_scopes::operand), + token_info({ { 3, 37 }, { 3, 71 } }, hl_scopes::remark), + token_info({ { 3, 72 }, { 3, 80 } }, hl_scopes::ignored), + token_info({ { 4, 18 }, { 4, 31 } }, hl_scopes::instruction), + token_info({ { 4, 31 }, { 4, 71 } }, hl_scopes::operand), + token_info({ { 4, 71 }, { 4, 72 } }, hl_scopes::continuation), + token_info({ { 4, 72 }, { 4, 80 } }, hl_scopes::ignored), + token_info({ { 5, 15 }, { 5, 71 } }, hl_scopes::operand), + token_info({ { 5, 71 }, { 5, 72 } }, hl_scopes::continuation), + token_info({ { 6, 15 }, { 6, 23 } }, hl_scopes::operand), + token_info({ { 6, 23 }, { 6, 71 } }, hl_scopes::remark), + token_info({ { 6, 71 }, { 6, 72 } }, hl_scopes::continuation), + token_info({ { 6, 72 }, { 6, 80 } }, hl_scopes::ignored), + token_info({ { 7, 15 }, { 7, 71 } }, hl_scopes::operand), + token_info({ { 7, 71 }, { 7, 72 } }, hl_scopes::continuation), + token_info({ { 8, 15 }, { 8, 71 } }, hl_scopes::operand), + token_info({ { 8, 71 }, { 8, 72 } }, hl_scopes::continuation), + token_info({ { 9, 15 }, { 9, 31 } }, hl_scopes::operand), + token_info({ { 10, 0 }, { 10, 1 } }, hl_scopes::label), + token_info({ { 10, 2 }, { 10, 11 } }, hl_scopes::instruction), + token_info({ { 10, 11 }, { 10, 71 } }, hl_scopes::operand), + /* TODO - Missing recognition of remarks in the SQL TYPE IS statement + token_info({ { 10, 14 }, { 10, 46 } }, hl_scopes::operand), + token_info({ { 10, 46 }, { 10, 62 } }, hl_scopes::remark), + */ + token_info({ { 10, 72 }, { 10, 75 } }, hl_scopes::ignored), + }; + + EXPECT_EQ(tokens, expected); +} + +TEST(highlighting, db2_preprocessor_statement_reinclude) +{ + mock_parse_lib_provider libs({ + { "MEMBER", " SQL TYPE IS RESULT_SET_LOCATOR VARYING" }, + }); + + const std::string contents = "ABCDE EXEC SQL INCLUDE MEMBER"; + + analyzer a(contents, + analyzer_options { source_file_loc, &libs, db2_preprocessor_options(), collect_highlighting_info::yes }); + a.analyze(); + + const auto& tokens = a.source_processor().semantic_tokens(); + const semantics::lines_info expected = { + token_info({ { 0, 0 }, { 0, 5 } }, hl_scopes::label), + token_info({ { 0, 6 }, { 0, 15 } }, hl_scopes::instruction), + token_info({ { 0, 15 }, { 0, 33 } }, hl_scopes::operand), + }; + + EXPECT_EQ(tokens, expected); +} diff --git a/parser_library/test/semantics/text_range_test.cpp b/parser_library/test/semantics/text_range_test.cpp new file mode 100644 index 000000000..f01992757 --- /dev/null +++ b/parser_library/test/semantics/text_range_test.cpp @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2023 Broadcom. + * The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries. + * + * This program and the accompanying materials are made + * available under the terms of the Eclipse Public License 2.0 + * which is available at https://www.eclipse.org/legal/epl-2.0/ + * + * SPDX-License-Identifier: EPL-2.0 + * + * Contributors: + * Broadcom, Inc. - initial API and implementation + */ + +#include + +#include "gtest/gtest.h" + +#include "lexing/logical_line.h" +#include "range.h" +#include "semantics/range_provider.h" + +using namespace hlasm_plugin::parser_library; +using namespace hlasm_plugin::parser_library::semantics; + +TEST(text_range_test, empty_line) +{ + std::string_view input = "123456"; + lexing::logical_line ll; + + ASSERT_TRUE(extract_logical_line(ll, input, lexing::default_ictl)); + ll.segments.front().code = {}; + + EXPECT_EQ(text_range(ll.begin(), ll.end(), 1), range(position(1, 0))); +} + +TEST(text_range_test, single_line) +{ + std::string_view input = "123456"; + lexing::logical_line ll; + + ASSERT_TRUE(extract_logical_line(ll, input, lexing::default_ictl)); + + EXPECT_EQ(text_range(ll.begin(), ll.end(), 2), range(position(2, 0), position(2, 6))); + EXPECT_EQ(text_range(std::next(ll.begin()), ll.end(), 0), range(position(0, 1), position(0, 6))); + EXPECT_EQ(text_range(std::next(ll.begin(), 3), std::prev(ll.end(), 1), 0), range(position(0, 3), position(0, 5))); + EXPECT_EQ(text_range(std::next(ll.begin(), 3), std::prev(ll.end(), 3), 0), range(position(0, 3))); +} + +TEST(text_range_test, multi_line) +{ + std::string_view input = R"( SOME X + TEXT X + GOES X + HERE)"; + lexing::logical_line ll; + + ASSERT_TRUE(extract_logical_line(ll, input, lexing::default_ictl)); + + EXPECT_EQ(text_range(ll.begin(), ll.end(), 2), range(position(2, 0), position(5, 19))); + EXPECT_EQ( + text_range(std::next(ll.begin(), 71), std::next(ll.begin(), 127), 0), range(position(1, 15), position(1, 71))); + EXPECT_EQ(text_range(ll.begin(), std::prev(ll.end(), 4), 0), range(position(0, 0), position(2, 71))); +} \ No newline at end of file diff --git a/utils/include/utils/string_operations.h b/utils/include/utils/string_operations.h index 893fc55e1..47909b2f0 100644 --- a/utils/include/utils/string_operations.h +++ b/utils/include/utils/string_operations.h @@ -21,7 +21,11 @@ namespace hlasm_plugin::utils { -std::pair trim_left(std::string_view s); +size_t trim_left(std::string_view& s); +size_t trim_right(std::string_view& s); + +size_t consume(std::string_view& s, std::string_view lit); +std::string_view next_nonblank_sequence(std::string_view s); inline bool isblank32(char32_t c) { return c <= 255 && std::isblank(static_cast(c)); } diff --git a/utils/src/string_operations.cpp b/utils/src/string_operations.cpp index be62c6b38..6750295ef 100644 --- a/utils/src/string_operations.cpp +++ b/utils/src/string_operations.cpp @@ -16,13 +16,53 @@ namespace hlasm_plugin::utils { -std::pair trim_left(std::string_view s) +size_t trim_left(std::string_view& s) { - size_t to_trim = s.find_first_not_of(" "); + const auto to_trim = s.find_first_not_of(' '); if (to_trim == std::string_view::npos) - return { "", s.length() }; + { + auto s_length = s.length(); + s = {}; + return s_length; + } s.remove_prefix(to_trim); - return { s, to_trim }; + return to_trim; } + +size_t trim_right(std::string_view& s) +{ + const auto to_trim = s.find_last_not_of(' '); + if (to_trim == std::string_view::npos) + { + auto s_length = s.length(); + s = {}; + return s_length; + } + + s = s.substr(0, to_trim + 1); + return to_trim; +} + +size_t consume(std::string_view& s, std::string_view lit) +{ + // case sensitive + if (!s.starts_with(lit)) + return 0; + s.remove_prefix(lit.size()); + return lit.size(); +} + +std::string_view next_nonblank_sequence(std::string_view s) +{ + if (s.empty() || s.front() == ' ') + return {}; + + auto space = s.find(' '); + if (space == std::string_view::npos) + space = s.size(); + + return s.substr(0, space); +} + } // namespace hlasm_plugin::utils