From 7ea7ce80dfa2ff3eecf6c898fee478c0b81fdf68 Mon Sep 17 00:00:00 2001 From: slavek-kucera <53339291+slavek-kucera@users.noreply.github.com> Date: Mon, 20 Jun 2022 09:54:23 +0200 Subject: [PATCH] refactor: Preprocessors --- parser_library/src/CMakeLists.txt | 2 + parser_library/src/context/hlasm_context.cpp | 13 +- parser_library/src/context/hlasm_context.h | 2 +- parser_library/src/context/source_context.cpp | 2 +- parser_library/src/context/source_context.h | 1 - parser_library/src/context/source_snapshot.h | 26 +- parser_library/src/diagnostic.cpp | 59 +-- parser_library/src/document.cpp | 67 +++ parser_library/src/document.h | 94 ++++ parser_library/src/processing/CMakeLists.txt | 1 + .../src/processing/opencode_provider.cpp | 133 +++--- .../src/processing/opencode_provider.h | 20 +- .../src/processing/preprocessor.cpp | 44 ++ parser_library/src/processing/preprocessor.h | 22 +- .../preprocessors/cics_preprocessor.cpp | 292 +++++++------ .../preprocessors/db2_preprocessor.cpp | 413 ++++++++++-------- .../src/processing/processing_manager.cpp | 5 +- .../processing/cics_preprocessor_test.cpp | 52 +-- .../test/processing/db2_preprocessor_test.cpp | 241 +++------- utils/include/utils/concat.h | 89 ++++ 20 files changed, 875 insertions(+), 703 deletions(-) create mode 100644 parser_library/src/document.cpp create mode 100644 parser_library/src/document.h create mode 100644 parser_library/src/processing/preprocessor.cpp create mode 100644 utils/include/utils/concat.h diff --git a/parser_library/src/CMakeLists.txt b/parser_library/src/CMakeLists.txt index 687821c40..9779b61ca 100644 --- a/parser_library/src/CMakeLists.txt +++ b/parser_library/src/CMakeLists.txt @@ -26,6 +26,8 @@ target_sources(parser_library PRIVATE diagnostic.h diagnostic_consumer.h diagnostic_adder.h + document.cpp + document.h ebcdic_encoding.cpp ebcdic_encoding.h error_messages.h diff --git a/parser_library/src/context/hlasm_context.cpp b/parser_library/src/context/hlasm_context.cpp index 48e2028a8..61cf0ee8e 100644 --- a/parser_library/src/context/hlasm_context.cpp +++ b/parser_library/src/context/hlasm_context.cpp @@ -401,11 +401,10 @@ hlasm_context::~hlasm_context() = default; void hlasm_context::set_source_position(position pos) { source_stack_.back().current_instruction.pos = pos; } -void hlasm_context::set_source_indices(size_t begin_index, size_t end_index, size_t end_line) +void hlasm_context::set_source_indices(size_t begin_index, size_t end_index) { source_stack_.back().begin_index = begin_index; source_stack_.back().end_index = end_index; - source_stack_.back().end_line = end_line; } std::pair hlasm_context::get_begin_snapshot(bool ignore_macros) const @@ -416,13 +415,11 @@ std::pair hlasm_context::get_begin_snapshot(bo if (!is_in_macros && current_copy_stack().empty()) { - statement_position.file_offset = current_source().begin_index; - statement_position.file_line = current_source().current_instruction.pos.line; + statement_position.rewind_target = current_source().begin_index; } else { - statement_position.file_offset = current_source().end_index; - statement_position.file_line = current_source().end_line + 1; + statement_position.rewind_target = current_source().end_index; } context::source_snapshot snapshot = current_source().create_snapshot(); @@ -436,8 +433,7 @@ std::pair hlasm_context::get_begin_snapshot(bo std::pair hlasm_context::get_end_snapshot() const { context::source_position statement_position; - statement_position.file_offset = current_source().end_index; - statement_position.file_line = current_source().end_line + 1; + statement_position.rewind_target = current_source().end_index; context::source_snapshot snapshot = current_source().create_snapshot(); @@ -896,7 +892,6 @@ void hlasm_context::apply_source_snapshot(source_snapshot snapshot) source_stack_.back().current_instruction = std::move(snapshot.instruction); source_stack_.back().begin_index = snapshot.begin_index; source_stack_.back().end_index = snapshot.end_index; - source_stack_.back().end_line = snapshot.end_line; source_stack_.back().copy_stack.clear(); diff --git a/parser_library/src/context/hlasm_context.h b/parser_library/src/context/hlasm_context.h index c7b3c3bfc..ff769a83d 100644 --- a/parser_library/src/context/hlasm_context.h +++ b/parser_library/src/context/hlasm_context.h @@ -120,7 +120,7 @@ class hlasm_context // sets current source position void set_source_position(position pos); // sets current source file indices - void set_source_indices(size_t begin_index, size_t end_index, size_t end_line); + void set_source_indices(size_t begin_index, size_t end_index); std::pair get_begin_snapshot(bool ignore_macros) const; std::pair get_end_snapshot() const; diff --git a/parser_library/src/context/source_context.cpp b/parser_library/src/context/source_context.cpp index de5301a36..bac57c998 100644 --- a/parser_library/src/context/source_context.cpp +++ b/parser_library/src/context/source_context.cpp @@ -32,7 +32,7 @@ source_snapshot source_context::create_snapshot() const if (!copy_frames.empty()) --copy_frames.back().statement_offset; - return source_snapshot { current_instruction, begin_index, end_index, end_line, std::move(copy_frames) }; + return source_snapshot { current_instruction, begin_index, end_index, std::move(copy_frames) }; } processing_frame::processing_frame( diff --git a/parser_library/src/context/source_context.h b/parser_library/src/context/source_context.h index d518f8aea..5f3f192dd 100644 --- a/parser_library/src/context/source_context.h +++ b/parser_library/src/context/source_context.h @@ -29,7 +29,6 @@ struct source_context // location in the file size_t begin_index = 0; size_t end_index = 0; - size_t end_line = 0; // stack of copy nests std::vector copy_stack; diff --git a/parser_library/src/context/source_snapshot.h b/parser_library/src/context/source_snapshot.h index 9df39b68f..25597026e 100644 --- a/parser_library/src/context/source_snapshot.h +++ b/parser_library/src/context/source_snapshot.h @@ -26,19 +26,14 @@ namespace hlasm_plugin::parser_library::context { struct source_position { // line in the file - size_t file_line; - // character offset in the file - size_t file_offset; + size_t rewind_target = 0; - source_position(size_t file_line = 0, size_t file_offset = 0) - : file_line(file_line) - , file_offset(file_offset) + source_position() = default; + explicit source_position(size_t rewind_target) + : rewind_target(rewind_target) {} - bool operator==(const source_position& oth) const - { - return file_line == oth.file_line && file_offset == oth.file_offset; - } + bool operator==(const source_position& oth) const noexcept = default; }; // helper structure representing a copy member invocation @@ -64,27 +59,20 @@ struct source_snapshot location instruction; size_t begin_index = 0; size_t end_index = 0; - size_t end_line = 0; std::vector copy_frames; source_snapshot() = default; - source_snapshot(location instruction, - size_t begin_index, - size_t end_index, - size_t end_line, - std::vector copy_frames) + source_snapshot(location instruction, size_t begin_index, size_t end_index, std::vector copy_frames) : instruction(std::move(instruction)) , begin_index(begin_index) , end_index(end_index) - , end_line(end_line) , copy_frames(std::move(copy_frames)) {} bool operator==(const source_snapshot& oth) const { - return end_line == oth.end_line && begin_index == oth.begin_index && end_index == oth.end_index - && copy_frames == oth.copy_frames; + return begin_index == oth.begin_index && end_index == oth.end_index && copy_frames == oth.copy_frames; } }; diff --git a/parser_library/src/diagnostic.cpp b/parser_library/src/diagnostic.cpp index 4121a91cd..4d08f1ce7 100644 --- a/parser_library/src/diagnostic.cpp +++ b/parser_library/src/diagnostic.cpp @@ -19,67 +19,12 @@ #include #include +#include "utils/concat.h" #include "utils/utf8text.h" namespace hlasm_plugin::parser_library { -namespace { -struct concat_helper -{ - void operator()(std::string& s, std::string_view t) const { s.append(t); } - template - std::enable_if_t> operator()(std::string& s, T&& t) const - { - s.append(std::to_string(std::forward(t))); - } - - constexpr static std::string_view span_sep = ", "; - template - void operator()(std::string& s, typename std::span span) const - { - bool first = true; - for (const auto& e : span) - { - if (!first) - s.append(span_sep); - else - first = false; - - operator()(s, e); - } - } - - size_t len(std::string_view t) const { return t.size(); } - template - std::enable_if_t, size_t> len(const T&) const - { - return 8; // arbitrary estimate for the length of the stringified argument (typically small numbers) - } - template - size_t len(const typename std::span& span) const - { - size_t result = 0; - for (const auto& e : span) - result += span_sep.size() + len(e); - - return result - (result ? span_sep.size() : 0); - } -}; - -template -std::string concat(Args&&... args) -{ - std::string result; - - concat_helper h; - - result.reserve((... + h.len(std::as_const(args)))); - - (h(result, std::forward(args)), ...); - - return result; -} -} // namespace +using hlasm_plugin::utils::concat; // diagnostic_op errors diff --git a/parser_library/src/document.cpp b/parser_library/src/document.cpp new file mode 100644 index 000000000..841fb9563 --- /dev/null +++ b/parser_library/src/document.cpp @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2022 Broadcom. + * The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries. + * + * This program and the accompanying materials are made + * available under the terms of the Eclipse Public License 2.0 + * which is available at https://www.eclipse.org/legal/epl-2.0/ + * + * SPDX-License-Identifier: EPL-2.0 + * + * Contributors: + * Broadcom, Inc. - initial API and implementation + */ + +#include "document.h" + +#include + +namespace hlasm_plugin::parser_library { +document::document(std::string_view text) +{ + if (text.empty()) + { + m_lines.emplace_back(original_line()); + return; + } + size_t line_no = 0; + while (!text.empty()) + { + auto p = text.find_first_of("\r\n"); + if (p == std::string_view::npos) + break; + if (text.substr(p, 2) == "\r\n") + ++p; + + m_lines.emplace_back(original_line { text.substr(0, p + 1), line_no }); + + text.remove_prefix(p + 1); + ++line_no; + } + if (!text.empty()) + m_lines.emplace_back(original_line { text, line_no }); +} + +std::string document::text() const +{ + return std::accumulate(m_lines.begin(), m_lines.end(), std::string(), [](std::string&& result, const auto& l) { + auto t = l.text(); + result.append(t); + if (t.empty() || t.back() != '\n') + result.push_back('\n'); + return std::move(result); + }); +} + +void document::convert_to_replaced() +{ + for (auto& line : m_lines) + { + if (line.is_original()) + { + line = document_line(replaced_line { std::string(line.text()) }); + } + } +} + +} // namespace hlasm_plugin::parser_library diff --git a/parser_library/src/document.h b/parser_library/src/document.h new file mode 100644 index 000000000..f8a369cb3 --- /dev/null +++ b/parser_library/src/document.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2022 Broadcom. + * The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries. + * + * This program and the accompanying materials are made + * available under the terms of the Eclipse Public License 2.0 + * which is available at https://www.eclipse.org/legal/epl-2.0/ + * + * SPDX-License-Identifier: EPL-2.0 + * + * Contributors: + * Broadcom, Inc. - initial API and implementation + */ + +#ifndef HLASMPLUGIN_PARSERLIBRARY_DOCUMENT_H +#define HLASMPLUGIN_PARSERLIBRARY_DOCUMENT_H + +#include +#include +#include +#include +#include +#include + +namespace hlasm_plugin::parser_library { + +struct replaced_line +{ + std::string m_text; +}; + +struct original_line +{ + std::string_view m_text; + size_t m_lineno = 0; +}; + +class document_line +{ + std::variant m_line; + +public: + explicit document_line(original_line l) noexcept + : m_line(std::move(l)) + {} + explicit document_line(replaced_line l) noexcept + : m_line(std::move(l)) + {} + + std::string_view text() const noexcept + { + return std::visit([](const auto& s) -> std::string_view { return s.m_text; }, m_line); + } + + std::optional lineno() const noexcept + { + if (std::holds_alternative(m_line)) + return std::get(m_line).m_lineno; + else + return std::nullopt; + } + + bool is_original() const noexcept { return std::holds_alternative(m_line); } + + bool same_type(const document_line& d) const noexcept { return m_line.index() == d.m_line.index(); } +}; + +class document +{ + std::vector m_lines; + +public: + document() = default; + explicit document(std::string_view text); + explicit document(std::vector lines) noexcept + : m_lines(std::move(lines)) + {} + + auto begin() const { return m_lines.begin(); } + + auto end() const { return m_lines.end(); } + + auto size() const { return m_lines.size(); } + + std::string text() const; + + const auto& at(size_t idx) const { return m_lines.at(idx); } + + void convert_to_replaced(); +}; + +} // namespace hlasm_plugin::parser_library + +#endif diff --git a/parser_library/src/processing/CMakeLists.txt b/parser_library/src/processing/CMakeLists.txt index 59545df76..6f1ad215d 100644 --- a/parser_library/src/processing/CMakeLists.txt +++ b/parser_library/src/processing/CMakeLists.txt @@ -18,6 +18,7 @@ target_sources(parser_library PRIVATE op_code.h opencode_provider.cpp opencode_provider.h + preprocessor.cpp preprocessor.h processing_format.h processing_manager.cpp diff --git a/parser_library/src/processing/opencode_provider.cpp b/parser_library/src/processing/opencode_provider.cpp index c734b8489..43e982452 100644 --- a/parser_library/src/processing/opencode_provider.cpp +++ b/parser_library/src/processing/opencode_provider.cpp @@ -43,8 +43,7 @@ opencode_provider::opencode_provider(std::string_view text, opencode_provider_options opts, virtual_file_monitor* virtual_file_monitor) : statement_provider(statement_provider_kind::OPEN) - , m_original_text(text) - , m_next_line_text(text) + , m_input_document(preprocessor ? preprocessor->generate_replacement(document(text)) : document(text)) , m_parser(parsing::parser_holder::create(&src_proc, ctx.hlasm_ctx.get(), &diag_consumer)) , m_lookahead_parser(parsing::parser_holder::create(nullptr, ctx.hlasm_ctx.get(), nullptr)) , m_operand_parser(parsing::parser_holder::create(nullptr, ctx.hlasm_ctx.get(), nullptr)) @@ -60,11 +59,9 @@ opencode_provider::opencode_provider(std::string_view text, void opencode_provider::rewind_input(context::source_position pos) { - apply_pending_line_changes(); - m_ainsert_buffer.clear(); // this needs to be tested, but apparently AGO clears AINSERT buffer - m_next_line_text = m_original_text.substr(pos.file_offset); - m_current_line = pos.file_line; + assert(pos.rewind_target <= m_input_document.size()); + m_next_line_index = pos.rewind_target; } void opencode_provider::generate_aread_highlighting(std::string_view text, size_t line_no) const @@ -88,8 +85,6 @@ void opencode_provider::generate_aread_highlighting(std::string_view text, size_ std::string opencode_provider::aread() { - apply_pending_line_changes(); - bool adjust_length = true; std::string result; if (!m_ainsert_buffer.empty()) @@ -114,13 +109,13 @@ std::string opencode_provider::aread() while (!opencode_stack.empty() && !opencode_stack.back().suspended()) opencode_stack.pop_back(); } - else if (!m_next_line_text.empty()) + else if (m_next_line_index < m_input_document.size()) { - result = lexing::extract_line(m_next_line_text).first; - - generate_aread_highlighting(result, m_current_line); - - m_lines_to_remove.current_text_lines++; + const auto& line = m_input_document.at(m_next_line_index++); + auto line_text = line.text(); + result = lexing::extract_line(line_text).first; + if (auto lineno = line.lineno(); lineno.has_value()) + generate_aread_highlighting(result, *lineno); } else adjust_length = false; @@ -361,21 +356,32 @@ utils::resource::resource_location generate_virtual_file_name(virtual_file_id id bool opencode_provider::try_running_preprocessor() { - const auto current_line = m_current_line; - auto result = m_preprocessor->generate_replacement(m_next_line_text, m_current_line); - if (!result.has_value() || result.value().empty()) + if (m_next_line_index >= m_input_document.size() || m_input_document.at(m_next_line_index).is_original()) return false; + const auto current_line = m_next_line_index ? m_input_document.at(m_next_line_index - 1).lineno().value() + 1 : 0; + + std::string preprocessor_text; + auto it = m_input_document.begin() + m_next_line_index; + for (; it != m_input_document.end() && !it->is_original(); ++it) + { + const auto text = it->text(); + preprocessor_text.append(text); + if (text.empty() || text.back() != '\n') + preprocessor_text.push_back('\n'); + } + const size_t stop_line = it != m_input_document.end() ? it->lineno().value() : current_line; + const auto last_index = it - m_input_document.begin(); + auto virtual_file_name = m_ctx->hlasm_ctx->ids().add("preprocessor:" + std::to_string(current_line)); - auto [new_file, inserted] = m_virtual_files.try_emplace(virtual_file_name, std::move(result.value())); + auto [new_file, inserted] = m_virtual_files.try_emplace(virtual_file_name, std::move(preprocessor_text)); // set up "call site" - const auto current_offset = - m_next_line_text.empty() ? m_original_text.size() : m_next_line_text.data() - m_original_text.data(); - const auto last_statement_line = m_current_line - (current_line != m_current_line); + const auto last_statement_line = stop_line - (stop_line != current_line); m_ctx->hlasm_ctx->set_source_position(position(last_statement_line, 0)); - m_ctx->hlasm_ctx->set_source_indices(current_offset, current_offset, last_statement_line); + m_ctx->hlasm_ctx->set_source_indices(m_next_line_index, last_index); + m_next_line_index = last_index; if (inserted) { @@ -393,7 +399,7 @@ bool opencode_provider::try_running_preprocessor() } else { - assert(result.value() == new_file->second); + assert(preprocessor_text == new_file->second); // isn't moved if insert fails } m_ctx->hlasm_ctx->enter_copy_member(virtual_file_name); @@ -534,9 +540,8 @@ context::shared_stmt_ptr opencode_provider::get_next(const statement_processor& std::move(collector.diag_container().diags)); } - m_ctx->hlasm_ctx->set_source_indices(m_current_logical_line_source.begin_offset, - m_current_logical_line_source.end_offset, - m_current_logical_line_source.end_line); + m_ctx->hlasm_ctx->set_source_indices( + m_current_logical_line_source.first_index, m_current_logical_line_source.last_index); return lookahead ? process_lookahead(proc, collector, op_text, op_range) : process_ordinary(proc, collector, op_text, op_range, diag_target); @@ -544,14 +549,12 @@ context::shared_stmt_ptr opencode_provider::get_next(const statement_processor& bool opencode_provider::finished() const { - if (!m_next_line_text.empty()) + if (m_next_line_index < m_input_document.size()) return false; if (!m_ctx->hlasm_ctx->in_opencode()) return true; if (!m_ainsert_buffer.empty()) return false; - if (m_preprocessor && !m_preprocessor->finished()) - return false; const auto& o = m_ctx->hlasm_ctx->opencode_copy_stack(); if (o.empty()) return true; @@ -566,23 +569,18 @@ parsing::hlasmparser& opencode_provider::parser() return *m_parser->parser; } -void opencode_provider::apply_pending_line_changes() -{ - m_ainsert_buffer.erase(m_ainsert_buffer.begin(), m_ainsert_buffer.begin() + m_lines_to_remove.ainsert_buffer); - - m_current_line += m_lines_to_remove.current_text_lines; - - m_lines_to_remove = {}; -} - bool opencode_provider::is_next_line_ictl() const { static constexpr std::string_view ICTL_LITERAL = "ICTL"; - const auto non_blank = m_next_line_text.find_first_not_of(' '); + const auto& current_line = m_input_document.at(m_next_line_index); + if (!current_line.is_original()) // for now, let's say that ICTL can only be specified in the original + return false; + const auto current_line_text = current_line.text(); + const auto non_blank = current_line_text.find_first_not_of(' '); if (non_blank == std::string_view::npos || non_blank == 0) return false; - const auto test_ictl = m_next_line_text.substr(non_blank); + const auto test_ictl = current_line_text.substr(non_blank); if (test_ictl.size() > ICTL_LITERAL.size() && test_ictl[ICTL_LITERAL.size()] != ' ') return false; @@ -598,10 +596,15 @@ bool opencode_provider::is_next_line_process() const { static constexpr std::string_view PROCESS_LITERAL = "*PROCESS"; - if (m_next_line_text.size() > PROCESS_LITERAL.size() && m_next_line_text[PROCESS_LITERAL.size()] != ' ') + const auto& current_line = m_input_document.at(m_next_line_index); + if (!current_line.is_original()) // for now, let's say that *PROCESS can only be specified in the original return false; - const auto test_process = m_next_line_text.substr(0, PROCESS_LITERAL.size()); + const auto current_line_text = current_line.text(); + if (current_line_text.size() > PROCESS_LITERAL.size() && current_line_text[PROCESS_LITERAL.size()] != ' ') + return false; + + const auto test_process = current_line_text.substr(0, PROCESS_LITERAL.size()); return std::equal( test_process.cbegin(), test_process.cend(), PROCESS_LITERAL.cbegin(), [](unsigned char l, unsigned char r) { return std::toupper(l) == r; @@ -637,7 +640,6 @@ extract_next_logical_line_result opencode_provider::extract_next_logical_line_fr copy_file.current_statement = resync; const auto* copy_text = m_ctx->lsp_ctx->get_file_info(copy_file.definition_location()->resource_loc); - std::string_view full_text = copy_text->data.get_lines_beginning_at({ 0, 0 }); std::string_view remaining_text = copy_text->data.get_lines_beginning_at({ line, 0 }); if (!lexing::extract_logical_line(m_current_logical_line, remaining_text, lexing::default_ictl_copy)) { @@ -646,11 +648,8 @@ extract_next_logical_line_result opencode_provider::extract_next_logical_line_fr } m_current_logical_line_source.begin_line = line; - m_current_logical_line_source.end_line = line + m_current_logical_line.segments.size() - 1; - m_current_logical_line_source.begin_offset = - m_current_logical_line.segments.front().code.data() - full_text.data(); - m_current_logical_line_source.end_offset = - remaining_text.size() ? remaining_text.data() - full_text.data() : full_text.size(); + m_current_logical_line_source.first_index = m_next_line_index; + m_current_logical_line_source.last_index = m_next_line_index; m_current_logical_line_source.source = logical_line_origin::source_type::copy; copy_file.resume(); @@ -662,8 +661,6 @@ extract_next_logical_line_result opencode_provider::extract_next_logical_line_fr extract_next_logical_line_result opencode_provider::extract_next_logical_line() { - apply_pending_line_changes(); - bool ictl_allowed = false; if (m_opts.ictl_allowed) { @@ -686,6 +683,9 @@ extract_next_logical_line_result opencode_provider::extract_next_logical_line() return extract_next_logical_line_from_copy_buffer(); } + if (m_next_line_index >= m_input_document.size()) + return extract_next_logical_line_result::failed; + if (ictl_allowed) ictl_allowed = is_next_line_ictl(); @@ -695,17 +695,16 @@ extract_next_logical_line_result opencode_provider::extract_next_logical_line() m_opts.process_remaining = 0; else { - ++m_lines_to_remove.current_text_lines; - append_to_logical_line(m_current_logical_line, m_next_line_text, lexing::default_ictl); + const auto first_index = m_next_line_index; + const auto& current_line = m_input_document.at(m_next_line_index++); + auto current_line_text = current_line.text(); + append_to_logical_line(m_current_logical_line, current_line_text, lexing::default_ictl); finish_logical_line(m_current_logical_line, lexing::default_ictl); --m_opts.process_remaining; - m_current_logical_line_source.begin_line = m_current_line; - m_current_logical_line_source.end_line = m_current_line + m_current_logical_line.segments.size() - 1; - m_current_logical_line_source.begin_offset = - m_current_logical_line.segments.front().code.data() - m_original_text.data(); - m_current_logical_line_source.end_offset = - m_next_line_text.size() ? m_next_line_text.data() - m_original_text.data() : m_original_text.size(); + m_current_logical_line_source.begin_line = current_line.lineno().value(); + m_current_logical_line_source.first_index = first_index; + m_current_logical_line_source.last_index = m_next_line_index; m_current_logical_line_source.source = logical_line_origin::source_type::file; return extract_next_logical_line_result::process; @@ -715,10 +714,13 @@ extract_next_logical_line_result opencode_provider::extract_next_logical_line() if (m_preprocessor && try_running_preprocessor()) return extract_next_logical_line_result::failed; - while (!m_next_line_text.empty()) + const auto first_index = m_next_line_index; + const auto current_lineno = m_input_document.at(m_next_line_index).lineno().value(); + while (m_next_line_index < m_input_document.size()) { - ++m_lines_to_remove.current_text_lines; - if (!append_to_logical_line(m_current_logical_line, m_next_line_text, lexing::default_ictl)) + const auto& current_line = m_input_document.at(m_next_line_index++); + auto current_line_text = current_line.text(); + if (!append_to_logical_line(m_current_logical_line, current_line_text, lexing::default_ictl)) break; } finish_logical_line(m_current_logical_line, lexing::default_ictl); @@ -726,12 +728,9 @@ extract_next_logical_line_result opencode_provider::extract_next_logical_line() if (m_current_logical_line.segments.empty()) return extract_next_logical_line_result::failed; - m_current_logical_line_source.begin_line = m_current_line; - m_current_logical_line_source.end_line = m_current_line + m_current_logical_line.segments.size() - 1; - m_current_logical_line_source.begin_offset = - m_current_logical_line.segments.front().code.data() - m_original_text.data(); - m_current_logical_line_source.end_offset = - m_next_line_text.size() ? m_next_line_text.data() - m_original_text.data() : m_original_text.size(); + m_current_logical_line_source.begin_line = current_lineno; + m_current_logical_line_source.first_index = first_index; + m_current_logical_line_source.last_index = m_next_line_index; m_current_logical_line_source.source = logical_line_origin::source_type::file; if (ictl_allowed) diff --git a/parser_library/src/processing/opencode_provider.h b/parser_library/src/processing/opencode_provider.h index b60dd91f3..87faaefd2 100644 --- a/parser_library/src/processing/opencode_provider.h +++ b/parser_library/src/processing/opencode_provider.h @@ -67,26 +67,15 @@ enum class extract_next_logical_line_result // uses the parser implementation to produce statements in the opencode(-like) scenario class opencode_provider final : public statement_provider { - struct lines_to_remove - { - size_t ainsert_buffer; - size_t copy_files; - size_t current_text_lines; - }; - lines_to_remove m_lines_to_remove = {}; - - std::string_view m_original_text; - size_t m_current_line = 0; - - std::string_view m_next_line_text; + document m_input_document; + std::size_t m_next_line_index = 0; lexing::logical_line m_current_logical_line; struct logical_line_origin { - size_t begin_offset; - size_t end_offset; size_t begin_line; - size_t end_line; + size_t first_index; + size_t last_index; enum class source_type { none, @@ -151,7 +140,6 @@ class opencode_provider final : public statement_provider void generate_continuation_error_messages(diagnostic_op_consumer* diags) const; extract_next_logical_line_result extract_next_logical_line_from_copy_buffer(); extract_next_logical_line_result extract_next_logical_line(); - void apply_pending_line_changes(); const parsing::parser_holder& prepare_operand_parser(const std::string& text, context::hlasm_context& hlasm_ctx, diagnostic_op_consumer* diag_collector, diff --git a/parser_library/src/processing/preprocessor.cpp b/parser_library/src/processing/preprocessor.cpp new file mode 100644 index 000000000..1b8df2e22 --- /dev/null +++ b/parser_library/src/processing/preprocessor.cpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 Broadcom. + * The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries. + * + * This program and the accompanying materials are made + * available under the terms of the Eclipse Public License 2.0 + * which is available at https://www.eclipse.org/legal/epl-2.0/ + * + * SPDX-License-Identifier: EPL-2.0 + * + * Contributors: + * Broadcom, Inc. - initial API and implementation + */ + +#include "preprocessor.h" + +#include "lexing/logical_line.h" + +namespace hlasm_plugin::parser_library::processing { + +preprocessor::line_iterator preprocessor::extract_nonempty_logical_line( + lexing::logical_line& out, line_iterator it, line_iterator end, const lexing::logical_line_extractor_args& opts) +{ + out.clear(); + + while (it != end) + { + auto text = it++->text(); + if (!append_to_logical_line(out, text, opts)) + break; + } + + finish_logical_line(out, opts); + + return it; +} + +bool preprocessor::is_continued(std::string_view s) +{ + const auto cont = lexing::utf8_substr(s, lexing::default_ictl_copy.end, 1).str; + return !cont.empty() && cont != " "; +} + +} // namespace hlasm_plugin::parser_library::processing diff --git a/parser_library/src/processing/preprocessor.h b/parser_library/src/processing/preprocessor.h index a45a4b3b7..589746de1 100644 --- a/parser_library/src/processing/preprocessor.h +++ b/parser_library/src/processing/preprocessor.h @@ -22,12 +22,20 @@ #include #include #include +#include #include "diagnostic_consumer.h" +#include "document.h" namespace hlasm_plugin::parser_library { struct cics_preprocessor_options; struct db2_preprocessor_options; + +namespace lexing { +struct logical_line; +struct logical_line_extractor_args; +} // namespace lexing + } // namespace hlasm_plugin::parser_library namespace hlasm_plugin::parser_library::processing { @@ -39,15 +47,23 @@ class preprocessor public: virtual ~preprocessor() = default; - virtual std::optional generate_replacement(std::string_view& input, size_t& lineno) = 0; - - virtual bool finished() const = 0; + virtual document generate_replacement(document doc) = 0; static std::unique_ptr create( const cics_preprocessor_options&, library_fetcher, diagnostic_op_consumer*); static std::unique_ptr create( const db2_preprocessor_options&, library_fetcher, diagnostic_op_consumer*); + +protected: + using line_iterator = std::vector::const_iterator; + + static line_iterator extract_nonempty_logical_line(lexing::logical_line& out, + line_iterator it, + line_iterator end, + const lexing::logical_line_extractor_args& opts); + + static bool is_continued(std::string_view s); }; } // namespace hlasm_plugin::parser_library::processing diff --git a/parser_library/src/processing/preprocessors/cics_preprocessor.cpp b/parser_library/src/processing/preprocessors/cics_preprocessor.cpp index 79ea376b5..da490e2d4 100644 --- a/parser_library/src/processing/preprocessors/cics_preprocessor.cpp +++ b/parser_library/src/processing/preprocessors/cics_preprocessor.cpp @@ -21,12 +21,15 @@ #include "lexing/logical_line.h" #include "preprocessor_options.h" #include "processing/preprocessor.h" +#include "utils/concat.h" #include "workspaces/parse_lib_provider.h" namespace hlasm_plugin::parser_library::processing { namespace { +using utils::concat; + const std::unordered_map DFHRESP_operands = { { "NORMAL", 0 }, { "ERROR", 1 }, @@ -815,12 +818,11 @@ class mini_parser class cics_preprocessor : public preprocessor { - const char* m_last_position = nullptr; lexing::logical_line m_logical_line; std::string m_operands; library_fetcher m_libs; diagnostic_op_consumer* m_diags = nullptr; - std::string m_buffer; + std::vector m_result; cics_preprocessor_options m_options; bool m_end_seen = false; @@ -842,8 +844,8 @@ class cics_preprocessor : public preprocessor void inject_no_end_warning() { - m_buffer.append("*DFH7041I W NO END CARD FOUND - COPYBOOK ASSUMED.\n" - " DFHEIMSG 4\n"); + m_result.emplace_back(replaced_line { "*DFH7041I W NO END CARD FOUND - COPYBOOK ASSUMED.\n" }); + m_result.emplace_back(replaced_line { " DFHEIMSG 4\n" }); } void inject_DFHEIGBL(bool rsect) @@ -851,34 +853,38 @@ class cics_preprocessor : public preprocessor if (rsect) { if (m_options.leasm) - m_buffer.append(" DFHEIGBL ,,RS,LE INSERTED BY TRANSLATOR\n"); + m_result.emplace_back(replaced_line { " DFHEIGBL ,,RS,LE INSERTED BY TRANSLATOR\n" }); else - m_buffer.append(" DFHEIGBL ,,RS,NOLE INSERTED BY TRANSLATOR\n"); + m_result.emplace_back(replaced_line { " DFHEIGBL ,,RS,NOLE INSERTED BY TRANSLATOR\n" }); } else { if (m_options.leasm) - m_buffer.append(" DFHEIGBL ,,,LE INSERTED BY TRANSLATOR\n"); + m_result.emplace_back(replaced_line { " DFHEIGBL ,,,LE INSERTED BY TRANSLATOR\n" }); else - m_buffer.append(" DFHEIGBL ,,,NOLE INSERTED BY TRANSLATOR\n"); + m_result.emplace_back(replaced_line { " DFHEIGBL ,,,NOLE INSERTED BY TRANSLATOR\n" }); } } - void inject_prolog() { m_buffer.append(" DFHEIENT INSERTED BY TRANSLATOR\n"); } + void inject_prolog() + { + m_result.emplace_back(replaced_line { " DFHEIENT INSERTED BY TRANSLATOR\n" }); + } void inject_dfh_null_error(std::string_view variable) { - m_buffer.append("*DFH7218I S SUB-OPERAND(S) OF '").append(variable).append("' CANNOT BE NULL. COMMAND NOT\n"); - m_buffer.append("* TRANSLATED.\n"); - m_buffer.append(" DFHEIMSG 12\n"); + m_result.emplace_back( + replaced_line { concat("*DFH7218I S SUB-OPERAND(S) OF '", variable, "' CANNOT BE NULL. COMMAND NOT\n") }); + m_result.emplace_back(replaced_line { "* TRANSLATED.\n" }); + m_result.emplace_back(replaced_line { " DFHEIMSG 12\n" }); } void inject_end_code() { if (m_options.epilog) - m_buffer.append(" DFHEIRET INSERTED BY TRANSLATOR\n"); + m_result.emplace_back(replaced_line { " DFHEIRET INSERTED BY TRANSLATOR\n" }); if (m_options.prolog) { - m_buffer.append(" DFHEISTG INSERTED BY TRANSLATOR\n"); - m_buffer.append(" DFHEIEND INSERTED BY TRANSLATOR\n"); + m_result.emplace_back(replaced_line { " DFHEISTG INSERTED BY TRANSLATOR\n" }); + m_result.emplace_back(replaced_line { " DFHEIEND INSERTED BY TRANSLATOR\n" }); } } @@ -970,32 +976,51 @@ class cics_preprocessor : public preprocessor size_t char_length; }; - template - void echo_text_and_inject_label(It b, It e, const label_info& li) + void echo_text(const label_info& li) { // print lines, remove continuation character and label on the first line bool first_line = true; for (const auto& l : m_logical_line.segments) { - auto buf_len = m_buffer.size(); - m_buffer.append(lexing::utf8_substr(l.line, 0, cics_extract.end).str); + std::string buffer; + buffer.append(lexing::utf8_substr(l.line, 0, cics_extract.end).str); if (auto after_cont = lexing::utf8_substr(l.line, cics_extract.end + 1).str; !after_cont.empty()) - m_buffer.append(" ").append(after_cont); + buffer.append(" ").append(after_cont); if (first_line) - m_buffer.replace(buf_len, li.byte_length, li.char_length, ' '); + buffer.replace(0, li.byte_length, li.char_length, ' '); - m_buffer[buf_len] = '*'; - m_buffer.append("\n"); + buffer[0] = '*'; + buffer.append("\n"); + m_result.emplace_back(replaced_line { std::move(buffer) }); first_line = false; } + } + + static std::string generate_label_fragment(lexing::logical_line::const_iterator label_b, + lexing::logical_line::const_iterator label_e, + const label_info& li) + { + if (li.char_length <= 8) + return std::string(label_b, label_e) + std::string(9 - li.char_length, ' '); + else + return std::string(label_b, label_e) + " DS 0H\n"; + } - m_buffer.append(b, e); + void inject_call(lexing::logical_line::const_iterator label_b, + lexing::logical_line::const_iterator label_e, + const label_info& li) + { if (li.char_length <= 8) - m_buffer.append(9 - li.char_length, ' '); + m_result.emplace_back( + replaced_line { generate_label_fragment(label_b, label_e, li) + "DFHECALL =X'0E'\n" }); else - m_buffer.append(" DS 0H\n").append(9, ' '); + { + m_result.emplace_back(replaced_line { generate_label_fragment(label_b, label_e, li) }); + m_result.emplace_back(replaced_line { " DFHECALL =X'0E'\n" }); + } + // TODO: generate correct calls } void process_exec_cics(const std::match_results& matches) @@ -1006,9 +1031,8 @@ class cics_preprocessor : public preprocessor (size_t)std::distance(label_b, label_e), (size_t)std::count_if(label_b, label_e, [](unsigned char c) { return (c & 0xc0) != 0x80; }), }; - echo_text_and_inject_label(label_b, label_e, li); - - m_buffer.append("DFHECALL =X'0E'\n"); // TODO: generate correct calls + echo_text(li); + inject_call(label_b, label_e, li); } auto try_substituting_dfh(const std::match_results& matches) @@ -1023,151 +1047,169 @@ class cics_preprocessor : public preprocessor (size_t)std::count_if(label_b, label_e, [](unsigned char c) { return (c & 0xc0) != 0x80; }), }; - echo_text_and_inject_label(label_b, label_e, li); + echo_text(li); - auto text_to_add = matches[2].str(); + std::string text_to_add = matches[2].str(); if (auto instr_len = lexing::utf8_substr(text_to_add).char_count; instr_len < 4) text_to_add.append(4 - instr_len, ' '); text_to_add.append(1, ' ').append(m_mini_parser.operands()); + text_to_add.insert(0, generate_label_fragment(label_b, label_e, li)); + std::string_view prefix; std::string_view t = text_to_add; size_t line_limit = 62; while (true) { auto part = lexing::utf8_substr(t, 0, line_limit); - m_buffer.append(part.str); t.remove_prefix(part.str.size()); + if (t.empty()) + { + m_result.emplace_back(replaced_line { concat(prefix, part.str, "\n") }); break; + } + else + m_result.emplace_back(replaced_line { concat(prefix, part.str, "*\n") }); - m_buffer.append("*\n "); - + prefix = " "; line_limit = 56; } - m_buffer.append(1, '\n'); } return events; } - /* returns number of consumed lines */ - size_t fill_buffer(std::string_view& input, size_t lineno) + // Inherited via preprocessor + document generate_replacement(document doc) override { - if (std::exchange(m_pending_prolog, false)) - inject_prolog(); - if (!m_pending_dfh_null_error.empty()) - inject_dfh_null_error(std::exchange(m_pending_dfh_null_error, std::string_view())); + m_result.clear(); + m_result.reserve(doc.size()); - if (input.empty()) - { - if (!std::exchange(m_end_seen, true)) - inject_no_end_warning(); - return 0; - } + auto it = doc.begin(); + const auto end = doc.end(); - if (lineno == 0 && try_asm_xopts(input, lineno)) - return 0; + bool skip_continuation = false; + while (it != end) + { + const auto text = it->text(); + if (skip_continuation) + { + m_result.emplace_back(*it++); + skip_continuation = is_continued(text); + continue; + } + if (std::exchange(m_pending_prolog, false)) + inject_prolog(); + if (!m_pending_dfh_null_error.empty()) + inject_dfh_null_error(std::exchange(m_pending_dfh_null_error, std::string_view())); - auto [line, line_len_chars, _] = create_line_preview(input); + const auto lineno = it->lineno().value_or(0); // TODO: preprocessor chaining - if (ignore_line(line)) - return 0; + if (lineno == 0 && try_asm_xopts(it->text(), lineno)) + { + m_result.emplace_back(*it++); + // ignores continuation + continue; + } - // apparently lines full of characters are ignored - if (line_len_chars == valid_cols && line.find(' ') == std::string_view::npos) - return 0; + auto [line, line_len_chars, _] = create_line_preview(text); - static const std::regex line_of_interest("(?:[^ ]*)[ ]+(START|CSECT|RSECT|END)(?: .+)?"); + if (ignore_line(line)) + { + m_result.emplace_back(*it++); + skip_continuation = is_continued(text); + continue; + } + // apparently lines full of characters are ignored + if (line_len_chars == valid_cols && line.find(' ') == std::string_view::npos) + { + m_result.emplace_back(*it++); + skip_continuation = is_continued(text); + continue; + } - if (std::regex_match(line.begin(), line.end(), m_matches_sv, line_of_interest)) - { - process_asm_statement(*m_matches_sv[1].first); - return 0; - } + static const std::regex line_of_interest("(?:[^ ]*)[ ]+(START|CSECT|RSECT|END)(?: .+)?"); - const std::string_view input_backup = input; + if (std::regex_match(line.begin(), line.end(), m_matches_sv, line_of_interest)) + { + process_asm_statement(*m_matches_sv[1].first); + m_result.emplace_back(*it++); + skip_continuation = is_continued(text); + continue; + } - bool extracted = lexing::extract_logical_line(m_logical_line, input, cics_extract); - assert(extracted); - bool exec_cics_continuation_error = false; - if (m_logical_line.continuation_error) - { - exec_cics_continuation_error = true; - // keep 1st line only - m_logical_line.segments.erase(m_logical_line.segments.begin() + 1, m_logical_line.segments.end()); - } + const auto it_backup = it; - static const std::regex exec_cics("([^ ]*)[ ]+(?:[eE][xX][eE][cC][ ]+[cC][iI][cC][sS])(?: .+)?"); + it = extract_nonempty_logical_line(m_logical_line, it, end, cics_extract); + bool exec_cics_continuation_error = false; + if (m_logical_line.continuation_error) + { + exec_cics_continuation_error = true; + // keep 1st line only + m_logical_line.segments.erase(m_logical_line.segments.begin() + 1, m_logical_line.segments.end()); + } - if (std::regex_match(m_logical_line.begin(), m_logical_line.end(), m_matches_ll, exec_cics)) - { - process_exec_cics(m_matches_ll); + static const std::regex exec_cics("([^ ]*)[ ]+(?:[eE][xX][eE][cC][ ]+[cC][iI][cC][sS])(?: .+)?"); - if (exec_cics_continuation_error) + if (std::regex_match(m_logical_line.begin(), m_logical_line.end(), m_matches_ll, exec_cics)) { - if (m_diags) - m_diags->add_diagnostic(diagnostic_op::warn_CIC001(range(position(lineno, 0)))); - m_buffer.append("*DFH7080I W CONTINUATION OF EXEC COMMAND IGNORED.\n" - " DFHEIMSG 4\n"); + process_exec_cics(m_matches_ll); + + if (exec_cics_continuation_error) + { + if (m_diags) + m_diags->add_diagnostic(diagnostic_op::warn_CIC001(range(position(lineno, 0)))); + m_result.emplace_back(replaced_line { "*DFH7080I W CONTINUATION OF EXEC COMMAND IGNORED.\n" }); + m_result.emplace_back(replaced_line { " DFHEIMSG 4\n" }); + } + continue; } - return m_logical_line.segments.size(); - } + static const std::regex dfh_lookup( + "([^ ]*)[ ]+([A-Z#$@][A-Z#$@0-9]*)[ ]+(.*(DFHRESP|DFHVALUE)[ ]*\\([ ]*[A-Z]*[ ]*\\).*)", + std::regex_constants::icase); - static const std::regex dfh_lookup( - "([^ ]*)[ ]+([A-Z#$@][A-Z#$@0-9]*)[ ]+(.*(DFHRESP|DFHVALUE)[ ]*\\([ ]*[A-Z]*[ ]*\\).*)", - std::regex_constants::icase); + it = it_backup; - input = input_backup; + it = extract_nonempty_logical_line(m_logical_line, it, end, lexing::default_ictl); - extracted = lexing::extract_logical_line(m_logical_line, input, lexing::default_ictl); - assert(extracted); - - if (m_logical_line.continuation_error) - { - if (m_diags) - m_diags->add_diagnostic(diagnostic_op::warn_CIC001(range(position(lineno, 0)))); - } - else if (std::regex_match(m_logical_line.begin(), m_logical_line.end(), m_matches_ll, dfh_lookup)) - { - auto r = try_substituting_dfh(m_matches_ll); - if (r.error()) + if (m_logical_line.continuation_error) { if (m_diags) - m_diags->add_diagnostic( - diagnostic_op::warn_CIC002(range(position(lineno, 0)), r.error_variable_name())); - m_pending_dfh_null_error = r.error_variable_name(); + m_diags->add_diagnostic(diagnostic_op::warn_CIC001(range(position(lineno, 0)))); + } + else if (std::regex_match(m_logical_line.begin(), m_logical_line.end(), m_matches_ll, dfh_lookup)) + { + auto r = try_substituting_dfh(m_matches_ll); + if (r.error()) + { + if (m_diags) + m_diags->add_diagnostic( + diagnostic_op::warn_CIC002(range(position(lineno, 0)), r.error_variable_name())); + m_pending_dfh_null_error = r.error_variable_name(); + } + else if (r.substitutions_performed() > 0) + { + continue; + } } - else if (r.substitutions_performed() > 0) - return m_logical_line.segments.size(); - } - - input = input_backup; - - return 0; - } - // Inherited via preprocessor - std::optional generate_replacement(std::string_view& input, size_t& lineno) override - { - if (input.data() == m_last_position) - return std::nullopt; + it = it_backup; - m_buffer.clear(); - if (std::exchange(m_last_position, input.data()) == nullptr) - { - // nothing so far + m_result.emplace_back(*it++); + skip_continuation = is_continued(text); } - lineno += fill_buffer(input, lineno); - if (m_buffer.size()) - return m_buffer; - else - return std::nullopt; - } + if (std::exchange(m_pending_prolog, false)) + inject_prolog(); + if (!m_pending_dfh_null_error.empty()) + inject_dfh_null_error(std::exchange(m_pending_dfh_null_error, std::string_view())); + if (!std::exchange(m_end_seen, true)) + inject_no_end_warning(); - bool finished() const override { return !m_pending_prolog && m_pending_dfh_null_error.empty(); } + return document(std::move(m_result)); + } cics_preprocessor_options current_options() const { return m_options; } }; diff --git a/parser_library/src/processing/preprocessors/db2_preprocessor.cpp b/parser_library/src/processing/preprocessors/db2_preprocessor.cpp index 10480974b..e545f2436 100644 --- a/parser_library/src/processing/preprocessors/db2_preprocessor.cpp +++ b/parser_library/src/processing/preprocessors/db2_preprocessor.cpp @@ -16,11 +16,14 @@ #include #include #include +#include +#include #include #include "lexing/logical_line.h" #include "preprocessor_options.h" #include "processing/preprocessor.h" +#include "utils/concat.h" #include "workspaces/parse_lib_provider.h" namespace { @@ -37,15 +40,16 @@ constexpr std::string_view trim_right(std::string_view s) namespace hlasm_plugin::parser_library::processing { namespace { +using utils::concat; + class db2_preprocessor : public preprocessor { - const char* m_last_position = nullptr; lexing::logical_line m_logical_line; std::string m_operands; std::string m_version; library_fetcher m_libs; diagnostic_op_consumer* m_diags = nullptr; - std::string m_buffer; + std::vector m_result; static bool remove_space(std::string_view& s) { @@ -79,26 +83,26 @@ class db2_preprocessor : public preprocessor constexpr auto version_chunk = (size_t)32; if (m_version.size() <= version_chunk) { - m_buffer.append("SQLVERSP DC CL4'VER.' VERSION-ID PREFIX\n"); - m_buffer.append("SQLVERD1 DC CL64'").append(m_version).append("' VERSION-ID\n"); + m_result.emplace_back(replaced_line { "SQLVERSP DC CL4'VER.' VERSION-ID PREFIX\n" }); + m_result.emplace_back(replaced_line { concat("SQLVERD1 DC CL64'", m_version, "' VERSION-ID\n") }); } else { - m_buffer.append("SQLVERS DS CL68 VERSION-ID\n" - " ORG SQLVERS+0\n" - "SQLVERSP DC CL4'VER.' VERS-ID PREFIX\n"); + m_result.emplace_back(replaced_line { "SQLVERS DS CL68 VERSION-ID\n" }); + m_result.emplace_back(replaced_line { " ORG SQLVERS+0\n" }); + m_result.emplace_back(replaced_line { "SQLVERSP DC CL4'VER.' VERS-ID PREFIX\n" }); for (auto [version, i] = std::pair(std::string_view(m_version), 1); !version.empty(); version.remove_prefix(std::min(version.size(), version_chunk)), ++i) { auto i_str = std::to_string(i); - m_buffer.append("SQLVERD") - .append(i_str) - .append(" DC CL32'") - .append(version.substr(0, version_chunk)) - .append("' VERS-ID PART-") - .append(i_str) - .append("\n"); + m_result.emplace_back(replaced_line { concat("SQLVERD", + i_str, + " DC CL32'", + version.substr(0, version_chunk), + "' VERS-ID PART-", + i_str, + "\n") }); } } } @@ -108,119 +112,119 @@ class db2_preprocessor : public preprocessor if (!m_version.empty()) push_sql_version_data(); - m_buffer.append("***$$$ SQL WORKING STORAGE \n" - "SQLDSIZ DC A(SQLDLEN) SQLDSECT SIZE \n" - "SQLDSECT DSECT \n" - "SQLTEMP DS CL128 TEMPLATE \n" - "DSNTEMP DS F INT SCROLL VALUE \n" - "DSNTMP2 DS PL16 DEC SCROLL VALUE \n" - "DSNNROWS DS F MULTI-ROW N-ROWS VALUE \n" - "DSNNTYPE DS H MULTI-ROW N-ROWS TYPE \n" - "DSNNLEN DS H MULTI-ROW N-ROWS LENGTH\n" - "DSNPARMS DS 4F DSNHMLTR PARM LIST \n" - "DSNPNM DS CL386 PROCEDURE NAME \n" - "DSNCNM DS CL128 CURSOR NAME \n" - "SQL_FILE_READ EQU 2 \n" - "SQL_FILE_CREATE EQU 8 \n" - "SQL_FILE_OVERWRITE EQU 16 \n" - "SQL_FILE_APPEND EQU 32 \n" - " DS 0D \n" - "SQLPLIST DS F \n" - "SQLPLLEN DS H PLIST LENGTH \n" - "SQLFLAGS DS XL2 FLAGS \n" - "SQLCTYPE DS H CALL-TYPE \n" - "SQLPROGN DS CL8 PROGRAM NAME \n" - "SQLTIMES DS CL8 TIMESTAMP \n" - "SQLSECTN DS H SECTION \n" - "SQLCODEP DS A CODE POINTER \n" - "SQLVPARM DS A VPARAM POINTER \n" - "SQLAPARM DS A AUX PARAM PTR \n" - "SQLSTNM7 DS H PRE_V8 STATEMENT NUMBER\n" - "SQLSTYPE DS H STATEMENT TYPE \n" - "SQLSTNUM DS F STATEMENT NUMBER \n" - "SQLFLAG2 DS H internal flags \n" - "SQLRSRVD DS CL18 RESERVED \n" - "SQLPVARS DS CL8,F,2H,0CL44 \n" - "SQLAVARS DS CL8,F,2H,0CL44 \n" - " DS 0D \n" - "SQLDLEN EQU *-SQLDSECT \n"); + m_result.emplace_back(replaced_line { "***$$$ SQL WORKING STORAGE \n" }); + m_result.emplace_back(replaced_line { "SQLDSIZ DC A(SQLDLEN) SQLDSECT SIZE \n" }); + m_result.emplace_back(replaced_line { "SQLDSECT DSECT \n" }); + m_result.emplace_back(replaced_line { "SQLTEMP DS CL128 TEMPLATE \n" }); + m_result.emplace_back(replaced_line { "DSNTEMP DS F INT SCROLL VALUE \n" }); + m_result.emplace_back(replaced_line { "DSNTMP2 DS PL16 DEC SCROLL VALUE \n" }); + m_result.emplace_back(replaced_line { "DSNNROWS DS F MULTI-ROW N-ROWS VALUE \n" }); + m_result.emplace_back(replaced_line { "DSNNTYPE DS H MULTI-ROW N-ROWS TYPE \n" }); + m_result.emplace_back(replaced_line { "DSNNLEN DS H MULTI-ROW N-ROWS LENGTH\n" }); + m_result.emplace_back(replaced_line { "DSNPARMS DS 4F DSNHMLTR PARM LIST \n" }); + m_result.emplace_back(replaced_line { "DSNPNM DS CL386 PROCEDURE NAME \n" }); + m_result.emplace_back(replaced_line { "DSNCNM DS CL128 CURSOR NAME \n" }); + m_result.emplace_back(replaced_line { "SQL_FILE_READ EQU 2 \n" }); + m_result.emplace_back(replaced_line { "SQL_FILE_CREATE EQU 8 \n" }); + m_result.emplace_back(replaced_line { "SQL_FILE_OVERWRITE EQU 16 \n" }); + m_result.emplace_back(replaced_line { "SQL_FILE_APPEND EQU 32 \n" }); + m_result.emplace_back(replaced_line { " DS 0D \n" }); + m_result.emplace_back(replaced_line { "SQLPLIST DS F \n" }); + m_result.emplace_back(replaced_line { "SQLPLLEN DS H PLIST LENGTH \n" }); + m_result.emplace_back(replaced_line { "SQLFLAGS DS XL2 FLAGS \n" }); + m_result.emplace_back(replaced_line { "SQLCTYPE DS H CALL-TYPE \n" }); + m_result.emplace_back(replaced_line { "SQLPROGN DS CL8 PROGRAM NAME \n" }); + m_result.emplace_back(replaced_line { "SQLTIMES DS CL8 TIMESTAMP \n" }); + m_result.emplace_back(replaced_line { "SQLSECTN DS H SECTION \n" }); + m_result.emplace_back(replaced_line { "SQLCODEP DS A CODE POINTER \n" }); + m_result.emplace_back(replaced_line { "SQLVPARM DS A VPARAM POINTER \n" }); + m_result.emplace_back(replaced_line { "SQLAPARM DS A AUX PARAM PTR \n" }); + m_result.emplace_back(replaced_line { "SQLSTNM7 DS H PRE_V8 STATEMENT NUMBER\n" }); + m_result.emplace_back(replaced_line { "SQLSTYPE DS H STATEMENT TYPE \n" }); + m_result.emplace_back(replaced_line { "SQLSTNUM DS F STATEMENT NUMBER \n" }); + m_result.emplace_back(replaced_line { "SQLFLAG2 DS H internal flags \n" }); + m_result.emplace_back(replaced_line { "SQLRSRVD DS CL18 RESERVED \n" }); + m_result.emplace_back(replaced_line { "SQLPVARS DS CL8,F,2H,0CL44 \n" }); + m_result.emplace_back(replaced_line { "SQLAVARS DS CL8,F,2H,0CL44 \n" }); + m_result.emplace_back(replaced_line { " DS 0D \n" }); + m_result.emplace_back(replaced_line { "SQLDLEN EQU *-SQLDSECT \n" }); } void inject_SQLCA() { - m_buffer.append("***$$$ SQLCA \n" - "SQLCA DS 0F \n" - "SQLCAID DS CL8 ID \n" - "SQLCABC DS F BYTE COUNT \n" - "SQLCODE DS F RETURN CODE \n" - "SQLERRM DS H,CL70 ERR MSG PARMS \n" - "SQLERRP DS CL8 IMPL-DEPENDENT\n" - "SQLERRD DS 6F \n" - "SQLWARN DS 0C WARNING FLAGS \n" - "SQLWARN0 DS C'W' IF ANY \n" - "SQLWARN1 DS C'W' = WARNING \n" - "SQLWARN2 DS C'W' = WARNING \n" - "SQLWARN3 DS C'W' = WARNING \n" - "SQLWARN4 DS C'W' = WARNING \n" - "SQLWARN5 DS C'W' = WARNING \n" - "SQLWARN6 DS C'W' = WARNING \n" - "SQLWARN7 DS C'W' = WARNING \n" - "SQLEXT DS 0CL8 \n" - "SQLWARN8 DS C \n" - "SQLWARN9 DS C \n" - "SQLWARNA DS C \n" - "SQLSTATE DS CL5 \n" - "***$$$\n"); + m_result.emplace_back(replaced_line { "***$$$ SQLCA \n" }); + m_result.emplace_back(replaced_line { "SQLCA DS 0F \n" }); + m_result.emplace_back(replaced_line { "SQLCAID DS CL8 ID \n" }); + m_result.emplace_back(replaced_line { "SQLCABC DS F BYTE COUNT \n" }); + m_result.emplace_back(replaced_line { "SQLCODE DS F RETURN CODE \n" }); + m_result.emplace_back(replaced_line { "SQLERRM DS H,CL70 ERR MSG PARMS \n" }); + m_result.emplace_back(replaced_line { "SQLERRP DS CL8 IMPL-DEPENDENT\n" }); + m_result.emplace_back(replaced_line { "SQLERRD DS 6F \n" }); + m_result.emplace_back(replaced_line { "SQLWARN DS 0C WARNING FLAGS \n" }); + m_result.emplace_back(replaced_line { "SQLWARN0 DS C'W' IF ANY \n" }); + m_result.emplace_back(replaced_line { "SQLWARN1 DS C'W' = WARNING \n" }); + m_result.emplace_back(replaced_line { "SQLWARN2 DS C'W' = WARNING \n" }); + m_result.emplace_back(replaced_line { "SQLWARN3 DS C'W' = WARNING \n" }); + m_result.emplace_back(replaced_line { "SQLWARN4 DS C'W' = WARNING \n" }); + m_result.emplace_back(replaced_line { "SQLWARN5 DS C'W' = WARNING \n" }); + m_result.emplace_back(replaced_line { "SQLWARN6 DS C'W' = WARNING \n" }); + m_result.emplace_back(replaced_line { "SQLWARN7 DS C'W' = WARNING \n" }); + m_result.emplace_back(replaced_line { "SQLEXT DS 0CL8 \n" }); + m_result.emplace_back(replaced_line { "SQLWARN8 DS C \n" }); + m_result.emplace_back(replaced_line { "SQLWARN9 DS C \n" }); + m_result.emplace_back(replaced_line { "SQLWARNA DS C \n" }); + m_result.emplace_back(replaced_line { "SQLSTATE DS CL5 \n" }); + m_result.emplace_back(replaced_line { "***$$$\n" }); } void inject_SQLDA() { - m_buffer.append("***$$$ SQLDA \n" - "SQLTRIPL EQU C'3' \n" - "SQLDOUBL EQU C'2' \n" - "SQLSINGL EQU C' ' \n" - "* \n" - " SQLSECT SAVE \n" - "* \n" - "SQLDA DSECT \n" - "SQLDAID DS CL8 ID \n" - "SQLDABC DS F BYTE COUNT \n" - "SQLN DS H COUNT SQLVAR/SQLVAR2 ENTRIES \n" - "SQLD DS H COUNT VARS (TWICE IF USING BOTH)\n" - "* \n" - "SQLVAR DS 0F BEGIN VARS \n" - "SQLVARN DSECT , NTH VARIABLE \n" - "SQLTYPE DS H DATA TYPE CODE \n" - "SQLLEN DS 0H LENGTH \n" - "SQLPRCSN DS X DEC PRECISION \n" - "SQLSCALE DS X DEC SCALE \n" - "SQLDATA DS A ADDR OF VAR \n" - "SQLIND DS A ADDR OF IND \n" - "SQLNAME DS H,CL30 DESCRIBE NAME \n" - "SQLVSIZ EQU *-SQLDATA \n" - "SQLSIZV EQU *-SQLVARN \n" - "* \n" - "SQLDA DSECT \n" - "SQLVAR2 DS 0F BEGIN EXTENDED FIELDS OF VARS \n" - "SQLVAR2N DSECT , EXTENDED FIELDS OF NTH VARIABLE \n" - "SQLLONGL DS F LENGTH \n" - "SQLRSVDL DS F RESERVED \n" - "SQLDATAL DS A ADDR OF LENGTH IN BYTES \n" - "SQLTNAME DS H,CL30 DESCRIBE NAME \n" - "* \n" - " SQLSECT RESTORE \n" - "***$$$\n"); + m_result.emplace_back(replaced_line { "***$$$ SQLDA \n" }); + m_result.emplace_back(replaced_line { "SQLTRIPL EQU C'3' \n" }); + m_result.emplace_back(replaced_line { "SQLDOUBL EQU C'2' \n" }); + m_result.emplace_back(replaced_line { "SQLSINGL EQU C' ' \n" }); + m_result.emplace_back(replaced_line { "* \n" }); + m_result.emplace_back(replaced_line { " SQLSECT SAVE \n" }); + m_result.emplace_back(replaced_line { "* \n" }); + m_result.emplace_back(replaced_line { "SQLDA DSECT \n" }); + m_result.emplace_back(replaced_line { "SQLDAID DS CL8 ID \n" }); + m_result.emplace_back(replaced_line { "SQLDABC DS F BYTE COUNT \n" }); + m_result.emplace_back(replaced_line { "SQLN DS H COUNT SQLVAR/SQLVAR2 ENTRIES \n" }); + m_result.emplace_back(replaced_line { "SQLD DS H COUNT VARS (TWICE IF USING BOTH)\n" }); + m_result.emplace_back(replaced_line { "* \n" }); + m_result.emplace_back(replaced_line { "SQLVAR DS 0F BEGIN VARS \n" }); + m_result.emplace_back(replaced_line { "SQLVARN DSECT , NTH VARIABLE \n" }); + m_result.emplace_back(replaced_line { "SQLTYPE DS H DATA TYPE CODE \n" }); + m_result.emplace_back(replaced_line { "SQLLEN DS 0H LENGTH \n" }); + m_result.emplace_back(replaced_line { "SQLPRCSN DS X DEC PRECISION \n" }); + m_result.emplace_back(replaced_line { "SQLSCALE DS X DEC SCALE \n" }); + m_result.emplace_back(replaced_line { "SQLDATA DS A ADDR OF VAR \n" }); + m_result.emplace_back(replaced_line { "SQLIND DS A ADDR OF IND \n" }); + m_result.emplace_back(replaced_line { "SQLNAME DS H,CL30 DESCRIBE NAME \n" }); + m_result.emplace_back(replaced_line { "SQLVSIZ EQU *-SQLDATA \n" }); + m_result.emplace_back(replaced_line { "SQLSIZV EQU *-SQLVARN \n" }); + m_result.emplace_back(replaced_line { "* \n" }); + m_result.emplace_back(replaced_line { "SQLDA DSECT \n" }); + m_result.emplace_back(replaced_line { "SQLVAR2 DS 0F BEGIN EXTENDED FIELDS OF VARS \n" }); + m_result.emplace_back(replaced_line { "SQLVAR2N DSECT , EXTENDED FIELDS OF NTH VARIABLE \n" }); + m_result.emplace_back(replaced_line { "SQLLONGL DS F LENGTH \n" }); + m_result.emplace_back(replaced_line { "SQLRSVDL DS F RESERVED \n" }); + m_result.emplace_back(replaced_line { "SQLDATAL DS A ADDR OF LENGTH IN BYTES \n" }); + m_result.emplace_back(replaced_line { "SQLTNAME DS H,CL30 DESCRIBE NAME \n" }); + m_result.emplace_back(replaced_line { "* \n" }); + m_result.emplace_back(replaced_line { " SQLSECT RESTORE \n" }); + m_result.emplace_back(replaced_line { "***$$$\n" }); } void inject_SQLSECT() { - m_buffer.append(" MACRO \n" - " SQLSECT &TYPE \n" - " GBLC &SQLSECT \n" - " AIF ('&TYPE' EQ 'RESTORE').REST\n" - "&SQLSECT SETC '&SYSECT' \n" - " MEXIT \n" - ".REST ANOP \n" - "&SQLSECT CSECT \n" - " MEND \n"); + m_result.emplace_back(replaced_line { " MACRO \n" }); + m_result.emplace_back(replaced_line { " SQLSECT &TYPE \n" }); + m_result.emplace_back(replaced_line { " GBLC &SQLSECT \n" }); + m_result.emplace_back(replaced_line { " AIF ('&TYPE' EQ 'RESTORE').REST\n" }); + m_result.emplace_back(replaced_line { "&SQLSECT SETC '&SYSECT' \n" }); + m_result.emplace_back(replaced_line { " MEXIT \n" }); + m_result.emplace_back(replaced_line { ".REST ANOP \n" }); + m_result.emplace_back(replaced_line { "&SQLSECT CSECT \n" }); + m_result.emplace_back(replaced_line { " MEND \n" }); } void process_include(std::string_view operands, size_t lineno) @@ -235,7 +239,7 @@ class db2_preprocessor : public preprocessor inject_SQLDA(); return; } - m_buffer.append("***$$$\n"); + m_result.emplace_back(replaced_line { "***$$$\n" }); std::optional include_text; if (m_libs) @@ -247,21 +251,9 @@ class db2_preprocessor : public preprocessor return; } - std::string_view include = include_text.value(); - - while (!include.empty()) - { - if (fill_buffer(include, lineno, false) > 0) - continue; - while (true) - { - const auto text = lexing::extract_line(include).first; - m_buffer.append(text); - m_buffer.append("\n"); - if (text.size() <= lexing::default_ictl_copy.end || text[lexing::default_ictl_copy.end] == ' ') - break; - } - } + document d(include_text.value()); + d.convert_to_replaced(); + generate_replacement(d.begin(), d.end(), false); } static bool consume_words( std::string_view& l, std::initializer_list words, bool tolerate_no_space_at_end = false) @@ -366,13 +358,14 @@ class db2_preprocessor : public preprocessor void add_ds_line(std::string_view label, std::string_view label_suffix, std::string_view type, bool align = true) { - m_buffer.append(label) - .append(label_suffix) - .append(align && label.size() + label_suffix.size() < 8 ? 8 - (label.size() + label_suffix.size()) : 0, ' ') - .append(" DS ") - .append(align ? 2 + (type.front() != '0') : 0, ' ') - .append(type) - .append("\n"); + m_result.emplace_back(replaced_line { concat(label, + label_suffix, + std::string( + align && label.size() + label_suffix.size() < 8 ? 8 - (label.size() + label_suffix.size()) : 0, ' '), + " DS ", + std::string(align ? 2 + (type.front() != '0') : 0, ' '), + type, + "\n") }); }; struct lob_info_t @@ -448,11 +441,10 @@ class db2_preprocessor : public preprocessor add_ds_line(label, "_LENGTH", "FL4", false); add_ds_line(label, "_DATA", li.prefix + std::to_string(len <= li.limit ? len : li.limit), false); if (len > li.limit) - m_buffer - .append(" ORG *+(") + m_result.emplace_back(replaced_line { concat(" ORG *+(", // there seems be this strage artifical limit - .append(std::to_string(std::min(len - li.limit, 1073676289ull))) - .append(")\n"); + std::min(len - li.limit, 1073676289ULL), + ")\n") }); break; } } @@ -525,13 +517,13 @@ class db2_preprocessor : public preprocessor void process_regular_line(std::string_view label, size_t first_line_skipped) { if (!label.empty()) - m_buffer.append(label).append(" DS 0H\n"); + m_result.emplace_back(replaced_line { concat(label, " DS 0H\n") }); - m_buffer.append("***$$$\n"); + m_result.emplace_back(replaced_line { "***$$$\n" }); for (const auto& segment : m_logical_line.segments) { - m_buffer.append(segment.line); + std::string this_line(segment.line); auto operand_part = segment.code; if (first_line_skipped) @@ -539,25 +531,25 @@ class db2_preprocessor : public preprocessor const auto appended_line_size = segment.line.size(); operand_part.remove_prefix(first_line_skipped); if (!label.empty()) - m_buffer.replace(m_buffer.size() - appended_line_size, + this_line.replace(this_line.size() - appended_line_size, label.size(), label.size(), ' '); // mask out any label-like characters - m_buffer[m_buffer.size() - appended_line_size] = '*'; + this_line[this_line.size() - appended_line_size] = '*'; first_line_skipped = 0; } - m_buffer.append("\n"); + this_line.append("\n"); + m_result.emplace_back(replaced_line { std::move(this_line) }); m_operands.append(operand_part); } } void process_sql_type_line(size_t first_line_skipped) { - m_buffer.append("***$$$\n"); - m_buffer.append("*") - .append(m_logical_line.segments.front().code.substr(0, lexing::default_ictl.end - 1)) - .append("\n"); + m_result.emplace_back(replaced_line { "***$$$\n" }); + m_result.emplace_back(replaced_line { + concat("*", m_logical_line.segments.front().code.substr(0, lexing::default_ictl.end - 1), "\n") }); for (const auto& segment : m_logical_line.segments) { @@ -565,46 +557,44 @@ class db2_preprocessor : public preprocessor first_line_skipped = 0; } - m_buffer.append("***$$$\n"); + m_result.emplace_back(replaced_line { "***$$$\n" }); } - /* returns number of consumed lines */ - size_t fill_buffer(std::string_view& input, size_t lineno, bool include_allowed) + std::tuple check_line(std::string_view input) { - using namespace std::literals; - + static constexpr std::tuple ignore(line_type::ignore, 0, {}); std::string_view line_preview = create_line_preview(input); if (ignore_line(line_preview)) - return 0; + return ignore; size_t first_line_skipped = line_preview.size(); std::string_view label = extract_label(line_preview); if (!remove_space(line_preview)) - return 0; + return ignore; if (is_end(line_preview)) { push_sql_working_storage(); - return 0; + return ignore; } auto instruction = consume_instruction(line_preview); if (instruction == line_type::ignore) - return 0; + return ignore; if (!line_preview.empty()) first_line_skipped = line_preview.data() - input.data(); - // now we have a valid line + return { instruction, first_line_skipped, label }; + } + void process_nonempty_line( + size_t lineno, bool include_allowed, line_type instruction, size_t first_line_skipped, std::string_view label) + { m_operands.clear(); - m_logical_line.clear(); - - bool extracted = lexing::extract_logical_line(m_logical_line, input, lexing::default_ictl); - assert(extracted); if (m_logical_line.continuation_error && m_diags) m_diags->add_diagnostic(diagnostic_op::error_DB001(range(position(lineno, 0)))); @@ -615,7 +605,7 @@ class db2_preprocessor : public preprocessor process_regular_line(label, first_line_skipped); if (sql_has_codegen(m_operands)) generate_sql_code_mock(); - m_buffer.append("***$$$\n"); + m_result.emplace_back(replaced_line { "***$$$\n" }); break; case line_type::include: @@ -638,8 +628,6 @@ class db2_preprocessor : public preprocessor m_diags->add_diagnostic(diagnostic_op::error_DB004(range(position(lineno, 0)))); break; } - - return m_logical_line.segments.size(); } static bool sql_has_codegen(std::string_view sql) @@ -653,32 +641,78 @@ class db2_preprocessor : public preprocessor void generate_sql_code_mock() { // this function generates non-realistic sql statement replacement code, because people do strange things... - m_buffer.append(" LA 15,SQLCA \n" - " L 15,=V(DSNHLI) \n" - " BALR 14,15 \n"); + m_result.emplace_back(replaced_line { " LA 15,SQLCA \n" }); + m_result.emplace_back(replaced_line { " L 15,=V(DSNHLI) \n" }); + m_result.emplace_back(replaced_line { " BALR 14,15 \n" }); } - // Inherited via preprocessor - std::optional generate_replacement(std::string_view& input, size_t& lineno) override + void skip_process(line_iterator& it, line_iterator end) { - if (input.data() == m_last_position) - return std::nullopt; - - m_buffer.clear(); - if (std::exchange(m_last_position, input.data()) == nullptr) + static constexpr std::string_view PROCESS_LITERAL = "*PROCESS"; + for (; it != end; ++it) { - // injected right after ICTL or *PROCESS - inject_SQLSECT(); + const auto text = it->text(); + if (text.size() < PROCESS_LITERAL.size()) + break; + if (text.size() > PROCESS_LITERAL.size() && text[PROCESS_LITERAL.size()] != ' ') + break; + if (!std::equal( + PROCESS_LITERAL.begin(), PROCESS_LITERAL.end(), text.begin(), [](unsigned char l, unsigned char r) { + return l == ::toupper(r); + })) + break; + + m_result.push_back(*it); } + } - lineno += fill_buffer(input, lineno, true); - if (m_buffer.size()) - return m_buffer; - else - return std::nullopt; + void generate_replacement(line_iterator it, line_iterator end, bool include_allowed) + { + bool skip_continuation = false; + while (it != end) + { + const auto text = it->text(); + if (skip_continuation) + { + m_result.emplace_back(*it++); + skip_continuation = is_continued(text); + continue; + } + auto [instruction, first_line_skipped, label] = check_line(text); + if (instruction == line_type::ignore) + { + m_result.emplace_back(*it++); + skip_continuation = is_continued(text); + continue; + } + + m_logical_line.clear(); + + size_t lineno = it->lineno().value_or(0); // TODO: needs to be addressed for chained preprocessors + + it = extract_nonempty_logical_line(m_logical_line, it, end, lexing::default_ictl); + + process_nonempty_line(lineno, include_allowed, instruction, first_line_skipped, label); + } } - bool finished() const override { return true; } + // Inherited via preprocessor + document generate_replacement(document doc) override + { + m_result.clear(); + m_result.reserve(doc.size()); + + auto it = doc.begin(); + const auto end = doc.end(); + + skip_process(it, end); + // ignores ICTL + inject_SQLSECT(); + + generate_replacement(it, end, true); + + return document(std::move(m_result)); + } public: db2_preprocessor(const db2_preprocessor_options& opts, library_fetcher libs, diagnostic_op_consumer* diags) @@ -694,4 +728,5 @@ std::unique_ptr preprocessor::create( { return std::make_unique(opts, std::move(libs), diags); } + } // namespace hlasm_plugin::parser_library::processing diff --git a/parser_library/src/processing/processing_manager.cpp b/parser_library/src/processing/processing_manager.cpp index 966fb0fe6..136ad2964 100644 --- a/parser_library/src/processing/processing_manager.cpp +++ b/parser_library/src/processing/processing_manager.cpp @@ -184,10 +184,9 @@ void processing_manager::finish_macro_definition(macrodef_processing_result resu void processing_manager::start_lookahead(lookahead_start_data start) { // jump to the statement where the previous lookahead stopped - if (hlasm_ctx_.current_source().end_index < lookahead_stop_.end_index + if (hlasm_ctx_.current_source().end_index < lookahead_stop_.begin_index && (!hlasm_ctx_.in_opencode() || hlasm_ctx_.current_ainsert_id() <= lookahead_stop_ainsert_id)) - perform_opencode_jump( - context::source_position(lookahead_stop_.end_line + 1, lookahead_stop_.end_index), lookahead_stop_); + perform_opencode_jump(context::source_position(lookahead_stop_.end_index), lookahead_stop_); hlasm_ctx_.push_statement_processing(processing_kind::LOOKAHEAD); procs_.emplace_back(std::make_unique(ctx_, *this, *this, lib_provider_, std::move(start))); diff --git a/parser_library/test/processing/cics_preprocessor_test.cpp b/parser_library/test/processing/cics_preprocessor_test.cpp index b109ed011..b3ac75c77 100644 --- a/parser_library/test/processing/cics_preprocessor_test.cpp +++ b/parser_library/test/processing/cics_preprocessor_test.cpp @@ -47,11 +47,9 @@ TEST(cics_preprocessor, asm_xopts_parsing) { auto p = preprocessor::create( cics_preprocessor_options {}, [](std::string_view) { return std::nullopt; }, nullptr); - size_t lineno = 0; - auto text = text_template; - auto result = p->generate_replacement(text, lineno); - EXPECT_FALSE(result.has_value()); + auto result = p->generate_replacement(document(text_template)); + EXPECT_GT(result.size(), 0); using hlasm_plugin::parser_library::processing::test::test_cics_current_options; EXPECT_EQ(test_cics_current_options(*p), expected) << text_template; @@ -83,40 +81,20 @@ TEST_P(cics_preprocessor_tests, basics) auto [text_template, config] = input; auto p = preprocessor::create( config, [](std::string_view) { return std::nullopt; }, nullptr); - size_t lineno = 0; - auto text = text_template; - - auto result_it = expected.begin(); - - bool passed_empty_to_preprocessor = false; - - while (!passed_empty_to_preprocessor || !text.empty() || !p->finished()) - { - if (text.empty()) - passed_empty_to_preprocessor = true; - - auto result = p->generate_replacement(text, lineno); - if (result.has_value()) - { - std::string_view to_check = result.value(); - while (!to_check.empty()) - { - ASSERT_NE(result_it, expected.end()) << text_template; - EXPECT_EQ(lexing::extract_line(to_check).first, *result_it); - ++result_it; - } - } - else - { - if (text.empty()) - break; - ASSERT_NE(result_it, expected.end()) << text_template; - EXPECT_EQ(lexing::extract_line(text).first, *result_it); - ++result_it; - } - } - EXPECT_EQ(result_it, expected.end()); + auto result = p->generate_replacement(document(text_template)); + + EXPECT_TRUE(std::equal(expected.begin(), + expected.end(), + result.begin(), + result.end(), + [](const auto& l, const auto& r) { + auto text = r.text(); + while (!text.empty() && (text.back() == '\n' || text.back() == '\r')) + text.remove_suffix(1); + return l == text; + })) + << text_template; } INSTANTIATE_TEST_SUITE_P(cics_preprocessor, diff --git a/parser_library/test/processing/db2_preprocessor_test.cpp b/parser_library/test/processing/db2_preprocessor_test.cpp index d954c37a0..5c47a6b3b 100644 --- a/parser_library/test/processing/db2_preprocessor_test.cpp +++ b/parser_library/test/processing/db2_preprocessor_test.cpp @@ -33,16 +33,14 @@ TEST(db2_preprocessor, first_line) auto p = preprocessor::create( db2_preprocessor_options {}, [](std::string_view) { return std::nullopt; }, nullptr); std::string_view text = ""; - size_t lineno = 0; - auto result = p->generate_replacement(text, lineno); - ASSERT_TRUE(result.has_value()); - EXPECT_EQ(lineno, 0); + auto result = p->generate_replacement(document()); - EXPECT_NE(result.value().find("SQLSECT"), std::string::npos); - - EXPECT_FALSE(p->generate_replacement(text, lineno).has_value()); - EXPECT_EQ(lineno, 0); + EXPECT_EQ(std::count_if(result.begin(), + result.end(), + [](const auto& l) { return l.text().find(" SQLSECT ") != std::string_view::npos; }), + 1); + EXPECT_TRUE(std::all_of(result.begin(), result.end(), [](const auto& l) { return !l.is_original(); })); } TEST(db2_preprocessor, last_line) @@ -50,23 +48,14 @@ TEST(db2_preprocessor, last_line) auto p = preprocessor::create( db2_preprocessor_options {}, [](std::string_view) { return std::nullopt; }, nullptr); std::string_view text = "\n END "; - size_t lineno = 0; - - EXPECT_TRUE(p->generate_replacement(text, lineno).has_value()); - EXPECT_EQ(lineno, 0); - text.remove_prefix(1); - ++lineno; - std::string_view original_text = text; - auto result = p->generate_replacement(text, lineno); - ASSERT_TRUE(result.has_value()); - EXPECT_EQ(lineno, 1); - EXPECT_EQ(original_text, text); // END should remain in the text + auto result = p->generate_replacement(document(text)); - EXPECT_EQ(result.value().find("***$$$ SQL WORKING STORAGE"), 0); - - EXPECT_FALSE(p->generate_replacement(text, lineno).has_value()); // but should not be processed again - EXPECT_EQ(lineno, 1); + EXPECT_EQ(std::count_if(result.begin(), + result.end(), + [](const auto& l) { return l.text().find("***$$$ SQL WORKING STORAGE") == 0; }), + 1); + EXPECT_EQ(std::count_if(result.begin(), result.end(), [](const auto& l) { return l.text() == " END "; }), 1); } TEST(db2_preprocessor, include) @@ -79,23 +68,15 @@ TEST(db2_preprocessor, include) }, nullptr); std::string_view text = "\n EXEC SQL INCLUDE MEMBER "; - size_t lineno = 0; - - EXPECT_TRUE(p->generate_replacement(text, lineno).has_value()); - EXPECT_EQ(lineno, 0); - text.remove_prefix(1); - ++lineno; - std::string_view original_text = text; - auto result = p->generate_replacement(text, lineno); - ASSERT_TRUE(result.has_value()); - EXPECT_NE(original_text, text); // INCLUDE should be removed - EXPECT_EQ(lineno, 2); + auto result = p->generate_replacement(document(text)); - EXPECT_NE(result.value().find("member content\n"), std::string::npos); - - EXPECT_FALSE(p->generate_replacement(text, lineno).has_value()); - EXPECT_EQ(lineno, 2); + EXPECT_EQ( + std::count_if(result.begin(), result.end(), [](const auto& l) { return l.text() == "member content"; }), 1); + EXPECT_EQ(std::count_if(result.begin(), + result.end(), + [](const auto& l) { return l.text().starts_with(" EXEC SQL INCLUDE MEMBER"); }), + 0); } TEST(db2_preprocessor, include_sqlca) @@ -109,24 +90,13 @@ TEST(db2_preprocessor, include_sqlca) }, nullptr); std::string_view text = "\n EXEC SQL INCLUDE SQLCA "; - size_t lineno = 0; - - EXPECT_TRUE(p->generate_replacement(text, lineno).has_value()); - EXPECT_EQ(lineno, 0); - text.remove_prefix(1); - ++lineno; - - std::string_view original_text = text; - auto result = p->generate_replacement(text, lineno); - ASSERT_TRUE(result.has_value()); - EXPECT_EQ(lineno, 2); - EXPECT_NE(original_text, text); // INCLUDE should be removed - EXPECT_FALSE(called); - EXPECT_NE(result.value().find("***$$$ SQLCA"), std::string::npos); + auto result = p->generate_replacement(document(text)); - EXPECT_FALSE(p->generate_replacement(text, lineno).has_value()); - EXPECT_EQ(lineno, 2); + EXPECT_EQ(std::count_if(result.begin(), + result.end(), + [](const auto& l) { return l.text().find("***$$$ SQLCA") != std::string::npos; }), + 1); } TEST(db2_preprocessor, include_sqlda) @@ -140,24 +110,13 @@ TEST(db2_preprocessor, include_sqlda) }, nullptr); std::string_view text = "\n EXEC SQL INCLUDE SQLDA "; - size_t lineno = 0; - EXPECT_TRUE(p->generate_replacement(text, lineno).has_value()); - EXPECT_EQ(lineno, 0); - text.remove_prefix(1); - ++lineno; + auto result = p->generate_replacement(document(text)); - std::string_view original_text = text; - auto result = p->generate_replacement(text, lineno); - ASSERT_TRUE(result.has_value()); - EXPECT_EQ(lineno, 2); - EXPECT_NE(original_text, text); // INCLUDE should be removed - - EXPECT_FALSE(called); - EXPECT_NE(result.value().find("***$$$ SQLDA"), std::string::npos); - - EXPECT_FALSE(p->generate_replacement(text, lineno).has_value()); - EXPECT_EQ(lineno, 2); + EXPECT_EQ(std::count_if(result.begin(), + result.end(), + [](const auto& l) { return l.text().find("***$$$ SQLDA") != std::string::npos; }), + 1); } TEST(db2_preprocessor, sql_like) @@ -171,24 +130,15 @@ TEST(db2_preprocessor, sql_like) }, nullptr); std::string_view text = "\n EXEC SQL SELECT 1 INTO :A FROM SYSIBM.SYSDUMMY1"; - size_t lineno = 0; - - EXPECT_TRUE(p->generate_replacement(text, lineno).has_value()); - EXPECT_EQ(lineno, 0); - text.remove_prefix(1); - ++lineno; - std::string_view original_text = text; - auto result = p->generate_replacement(text, lineno); - ASSERT_TRUE(result.has_value()); - EXPECT_EQ(lineno, 2); - EXPECT_NE(original_text, text); // SQL should be removed + auto result = p->generate_replacement(document(text)); - EXPECT_FALSE(called); - EXPECT_EQ(result.value().find("***$$$\n*EXEC SQL SELECT 1 INTO :A FROM SYSIBM.SYSDUMMY1\n"), 0); - - EXPECT_FALSE(p->generate_replacement(text, lineno).has_value()); - EXPECT_EQ(lineno, 2); + EXPECT_NE(std::adjacent_find(result.begin(), + result.end(), + [](const auto& l, const auto& r) { + return l.text() == "***$$$\n" && r.text() == "*EXEC SQL SELECT 1 INTO :A FROM SYSIBM.SYSDUMMY1\n"; + }), + result.end()); } TEST(db2_preprocessor, with_label) @@ -196,21 +146,21 @@ TEST(db2_preprocessor, with_label) auto p = preprocessor::create( db2_preprocessor_options {}, [](std::string_view) { return std::nullopt; }, nullptr); std::string_view text = "\nABC EXEC SQL WHATEVER"; - size_t lineno = 0; - - EXPECT_TRUE(p->generate_replacement(text, lineno).has_value()); - EXPECT_EQ(lineno, 0); - text.remove_prefix(1); - ++lineno; - auto result = p->generate_replacement(text, lineno); - ASSERT_TRUE(result.has_value()); - EXPECT_EQ(lineno, 2); + auto result = p->generate_replacement(document(text)); - EXPECT_EQ(result.value().find("ABC DS 0H\n***$$$\n* EXEC SQL WHATEVER"), 0); + const auto expected = { + std::string_view("ABC DS 0H\n"), + std::string_view("***$$$\n"), + std::string_view("* EXEC SQL WHATEVER\n"), + }; - EXPECT_FALSE(p->generate_replacement(text, lineno).has_value()); - EXPECT_EQ(lineno, 2); + EXPECT_NE(std::search(result.begin(), + result.end(), + expected.begin(), + expected.end(), + [](const auto& l, const auto& r) { return l.text() == r; }), + result.end()); } TEST(db2_preprocessor, missing_member) @@ -220,13 +170,11 @@ TEST(db2_preprocessor, missing_member) db2_preprocessor_options {}, [](std::string_view) { return std::nullopt; }, &diags); std::string_view text = " EXEC SQL INCLUDE MISSING"; - size_t lineno = 0; - EXPECT_TRUE(p->generate_replacement(text, lineno)); - EXPECT_EQ(lineno, 1); + auto doc = p->generate_replacement(document(text)); - ASSERT_EQ(diags.diags.size(), 1U); - EXPECT_EQ(diags.diags[0].code, "DB002"); + EXPECT_NE(doc.size(), 0); + EXPECT_TRUE(matches_message_codes(diags.diags, { "DB002" })); } TEST(db2_preprocessor, bad_continuation) @@ -237,13 +185,11 @@ TEST(db2_preprocessor, bad_continuation) std::string_view text = R"( EXEC SQL PRETENT SQL STATEMENT X badcontinuation)"; - size_t lineno = 0; - EXPECT_TRUE(p->generate_replacement(text, lineno)); - EXPECT_EQ(lineno, 2); + auto doc = p->generate_replacement(document(text)); - ASSERT_EQ(diags.diags.size(), 1U); - EXPECT_EQ(diags.diags[0].code, "DB001"); + EXPECT_NE(doc.size(), 0); + EXPECT_TRUE(matches_message_codes(diags.diags, { "DB001" })); } TEST(db2_preprocessor, no_nested_include) @@ -257,13 +203,11 @@ TEST(db2_preprocessor, no_nested_include) }, &diags); std::string_view text = " EXEC SQL INCLUDE MEMBER "; - size_t lineno = 0; - EXPECT_TRUE(p->generate_replacement(text, lineno)); - EXPECT_EQ(lineno, 1); + auto doc = p->generate_replacement(document(text)); + EXPECT_NE(doc.size(), 0); - ASSERT_EQ(diags.diags.size(), 1U); - EXPECT_EQ(diags.diags[0].code, "DB003"); + EXPECT_TRUE(matches_message_codes(diags.diags, { "DB003" })); } TEST(db2_preprocessor, sqlsect_available) @@ -710,55 +654,37 @@ BFILE SQL TYPE IS BLOB_FILE CFILE SQL TYPE IS CLOB_FILE DFILE SQL TYPE IS DBCLOB_FILE )"; - size_t lineno = 0; - EXPECT_TRUE(p->generate_replacement(text, lineno).has_value()); - EXPECT_EQ(lineno, 0); - text.remove_prefix(1); - ++lineno; - - std::vector expected = { + std::string_view expected = { R"( ***$$$ *RE SQL TYPE IS RESULT_SET_LOCATOR VARYING ***$$$ RE DS FL4 -)", - R"( ***$$$ *RO SQL TYPE IS ROWID ***$$$ RO DS H,CL40 -)", - R"( ***$$$ *TU SQL TYPE IS TABLE LIKE A AS LOCATOR ***$$$ TU DS FL4 -)", - R"( ***$$$ *TQ SQL TYPE IS TABLE LIKE 'A''B' AS LOCATOR ***$$$ TQ DS FL4 -)", - R"( ***$$$ *XB SQL TYPE IS XML AS BLOB 10 ***$$$ XB DS 0FL4 XB_LENGTH DS FL4 XB_DATA DS CL10 -)", - R"( ***$$$ *XC SQL TYPE IS XML AS CLOB 10K ***$$$ XC DS 0FL4 XC_LENGTH DS FL4 XC_DATA DS CL10240 -)", - R"( ***$$$ *XD SQL TYPE IS XML AS DBCLOB 10M ***$$$ @@ -766,16 +692,12 @@ XD DS 0FL4 XD_LENGTH DS FL4 XD_DATA DS GL65534 ORG *+(10420226) -)", - R"( ***$$$ *BL SQL TYPE IS BINARY LARGE OBJECT 10K ***$$$ BL DS 0FL4 BL_LENGTH DS FL4 BL_DATA DS CL10240 -)", - R"( ***$$$ *CL SQL TYPE IS CHARACTER LARGE OBJECT 10M ***$$$ @@ -783,8 +705,6 @@ CL DS 0FL4 CL_LENGTH DS FL4 CL_DATA DS CL65535 ORG *+(10420225) -)", - R"( ***$$$ *DL SQL TYPE IS DBCLOB 1G ***$$$ @@ -792,26 +712,18 @@ DL DS 0FL4 DL_LENGTH DS FL4 DL_DATA DS GL65534 ORG *+(1073676289) -)", - R"( ***$$$ *BLOC SQL TYPE IS BLOB_LOCATOR ***$$$ BLOC DS FL4 -)", - R"( ***$$$ *CLOC SQL TYPE IS CLOB_LOCATOR ***$$$ CLOC DS FL4 -)", - R"( ***$$$ *DLOC SQL TYPE IS DBCLOB_LOCATOR ***$$$ DLOC DS FL4 -)", - R"( ***$$$ *BFILE SQL TYPE IS BLOB_FILE ***$$$ @@ -820,8 +732,6 @@ BFILE_NAME_LENGTH DS FL4 BFILE_DATA_LENGTH DS FL4 BFILE_FILE_OPTIONS DS FL4 BFILE_NAME DS CL255 -)", - R"( ***$$$ *CFILE SQL TYPE IS CLOB_FILE ***$$$ @@ -830,8 +740,6 @@ CFILE_NAME_LENGTH DS FL4 CFILE_DATA_LENGTH DS FL4 CFILE_FILE_OPTIONS DS FL4 CFILE_NAME DS CL255 -)", - R"( ***$$$ *DFILE SQL TYPE IS DBCLOB_FILE ***$$$ @@ -843,23 +751,12 @@ DFILE_NAME DS CL255 )" }; - size_t result_id = 0; - while (!text.empty()) - { - ASSERT_LT(result_id, expected.size()); - - auto result = p->generate_replacement(text, lineno); - ASSERT_TRUE(result.has_value()); - - std::string_view e = expected[result_id]; - e.remove_prefix(1); + auto doc = p->generate_replacement(document(text)); + EXPECT_NE(doc.size(), 0); - EXPECT_EQ(result, e); + EXPECT_NE(doc.text().find(expected), std::string_view::npos); - ++result_id; - } - EXPECT_EQ(result_id, expected.size()); - EXPECT_EQ(diags.diags.size(), 0); + EXPECT_TRUE(diags.diags.empty()); } TEST(db2_preprocessor, sql_types_with_space) @@ -931,12 +828,9 @@ TEST(db2_preprocessor, sql_type_fails) auto p = preprocessor::create( db2_preprocessor_options {}, [](std::string_view) { return std::nullopt; }, &diags); - size_t lineno = 0; - - EXPECT_TRUE(p->generate_replacement(text, lineno)); + p->generate_replacement(document(text)); - ASSERT_EQ(diags.diags.size(), 1U); - EXPECT_EQ(diags.diags[0].code, "DB004"); + EXPECT_TRUE(matches_message_codes(diags.diags, { "DB004" })); } } @@ -948,12 +842,9 @@ TEST(db2_preprocessor, sql_type_warn_on_continuation) auto p = preprocessor::create( db2_preprocessor_options {}, [](std::string_view) { return std::nullopt; }, &diags); - size_t lineno = 0; - - EXPECT_TRUE(p->generate_replacement(text, lineno)); + p->generate_replacement(document(text)); - ASSERT_EQ(diags.diags.size(), 1U); - EXPECT_EQ(diags.diags[0].code, "DB005"); + EXPECT_TRUE(matches_message_codes(diags.diags, { "DB005" })); } TEST(db2_preprocessor, no_codegen_for_unacceptable_sql_statement) diff --git a/utils/include/utils/concat.h b/utils/include/utils/concat.h new file mode 100644 index 000000000..23186562c --- /dev/null +++ b/utils/include/utils/concat.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2022 Broadcom. + * The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries. + * + * This program and the accompanying materials are made + * available under the terms of the Eclipse Public License 2.0 + * which is available at https://www.eclipse.org/legal/epl-2.0/ + * + * SPDX-License-Identifier: EPL-2.0 + * + * Contributors: + * Broadcom, Inc. - initial API and implementation + */ + +#ifndef HLASMPLUGIN_UTILS_CONCAT_H +#define HLASMPLUGIN_UTILS_CONCAT_H + +#include +#include +#include + +namespace hlasm_plugin::utils { + +namespace detail { +struct concat_helper +{ + void operator()(std::string& s, std::string_view t) const { s.append(t); } + template + std::enable_if_t> operator()(std::string& s, T&& t) const + { + s.append(std::to_string(std::forward(t))); + } + + constexpr static std::string_view span_sep = ", "; + template + void operator()(std::string& s, typename std::span span) const + { + bool first = true; + for (const auto& e : span) + { + if (!first) + s.append(span_sep); + else + first = false; + + operator()(s, e); + } + } + + size_t len(std::string_view t) const { return t.size(); } + template + std::enable_if_t, size_t> len(const T&) const + { + return 8; // arbitrary estimate for the length of the stringified argument (typically small numbers) + } + template + size_t len(const typename std::span& span) const + { + size_t result = 0; + for (const auto& e : span) + result += span_sep.size() + len(e); + + return result - (result ? span_sep.size() : 0); + } +}; + +} // namespace detail + +struct +{ + template + std::string operator()(Args&&... args) const + { + std::string result; + + detail::concat_helper h; + + result.reserve((... + h.len(std::as_const(args)))); + + (h(result, std::forward(args)), ...); + + return result; + } + +} static constexpr concat; + +} // namespace hlasm_plugin::utils + +#endif