diff --git a/clients/vscode-hlasmplugin/CHANGELOG.md b/clients/vscode-hlasmplugin/CHANGELOG.md index 59d5982b0..c25bdef37 100644 --- a/clients/vscode-hlasmplugin/CHANGELOG.md +++ b/clients/vscode-hlasmplugin/CHANGELOG.md @@ -8,6 +8,7 @@ - DB2 preprocessor now supports the VERSION option - Instruction set versioning support - Basic GOFF, XOBJECT and SYSOPT_XOBJECT support +- MNOTE support #### Fixed - Fixed an issue preventing correct N' attribute evaluation of empty subscript arrays diff --git a/language_server/src/lsp/feature_language_features.cpp b/language_server/src/lsp/feature_language_features.cpp index f5a6eb744..a6ae6e68e 100644 --- a/language_server/src/lsp/feature_language_features.cpp +++ b/language_server/src/lsp/feature_language_features.cpp @@ -106,10 +106,11 @@ void feature_language_features::definition(const json& id, const json& params) auto definition_position_uri = ws_mngr_.definition(uri_to_path(document_uri).c_str(), pos); - document_uri = - (definition_position_uri.file()[0] == '\0') ? document_uri : path_to_uri(definition_position_uri.file()); - json to_ret { { "uri", document_uri }, - { "range", range_to_json({ definition_position_uri.pos(), definition_position_uri.pos() }) } }; + document_uri = definition_position_uri.file().empty() ? document_uri : path_to_uri(definition_position_uri.file()); + json to_ret { + { "uri", document_uri }, + { "range", range_to_json({ definition_position_uri.pos(), definition_position_uri.pos() }) }, + }; response_->respond(id, "", to_ret); } diff --git a/language_server/src/lsp/lsp_server.cpp b/language_server/src/lsp/lsp_server.cpp index 2c835a144..563c55e35 100644 --- a/language_server/src/lsp/lsp_server.cpp +++ b/language_server/src/lsp/lsp_server.cpp @@ -285,6 +285,14 @@ void server::consume_diagnostics(parser_library::diagnostic_list diagnostics) { one_json["severity"] = (int)d.severity(); } + if (auto t = d.tags(); t != parser_library::diagnostic_tag::none) + { + auto& tags = one_json["tags"] = json::array(); + if (static_cast(t) & static_cast(parser_library::diagnostic_tag::unnecessary)) + tags.push_back(1); + if (static_cast(t) & static_cast(parser_library::diagnostic_tag::deprecated)) + tags.push_back(2); + } diags_array.push_back(std::move(one_json)); } diff --git a/language_server/test/regress_test.cpp b/language_server/test/regress_test.cpp index 9b4e26407..29d8b156d 100644 --- a/language_server/test/regress_test.cpp +++ b/language_server/test/regress_test.cpp @@ -593,3 +593,31 @@ TEST(regress_test, stability_sync) for (const auto& message : messages) s.message_received(message); } + +TEST(regress_test, check_diagnostic_tags) +{ + parser_library::workspace_manager ws_mngr; + message_provider_mock mess_p; + lsp::server s(ws_mngr); + s.set_send_message_provider(&mess_p); + + auto notf = make_notification("textDocument/didOpen", + R"#({"textDocument":{"uri":"file:///c%3A/test/note_test.hlasm","languageId":"plaintext","version":1,"text":" MNOTE 'test note'"}})#"_json); + s.message_received(notf); + + ASSERT_EQ(mess_p.notfs.size(), (size_t)2); + auto publish_notif = std::find_if(mess_p.notfs.begin(), mess_p.notfs.end(), [&](json notif) { + return notif["method"] == "textDocument/publishDiagnostics"; + }); + ASSERT_NE(publish_notif, mess_p.notfs.end()); + ASSERT_EQ((*publish_notif)["method"], "textDocument/publishDiagnostics"); + auto diagnostics = (*publish_notif)["params"]["diagnostics"]; + ASSERT_EQ(diagnostics.size(), (size_t)1); + EXPECT_EQ(diagnostics[0]["code"].get(), "MNOTE"); + ASSERT_GT(diagnostics[0].count("tags"), 0); + ASSERT_TRUE(diagnostics[0]["tags"].is_array()); + ASSERT_EQ(diagnostics[0]["tags"].size(), (size_t)1); + ASSERT_EQ(diagnostics[0]["tags"][0], 1); + + mess_p.notfs.clear(); +} diff --git a/parser_library/include/protocol.h b/parser_library/include/protocol.h index be396f948..cc010547d 100644 --- a/parser_library/include/protocol.h +++ b/parser_library/include/protocol.h @@ -213,6 +213,13 @@ enum class PARSER_LIBRARY_EXPORT diagnostic_severity unspecified = 5 }; +enum class PARSER_LIBRARY_EXPORT diagnostic_tag +{ + none = 0, + unnecessary = 1 << 0, + deprecated = 1 << 1, +}; + struct PARSER_LIBRARY_EXPORT diagnostic_related_info { diagnostic_related_info(diagnostic_related_info_s&); @@ -236,6 +243,7 @@ struct PARSER_LIBRARY_EXPORT diagnostic const char* message() const; const diagnostic_related_info related_info(size_t index) const; size_t related_info_size() const; + diagnostic_tag tags() const; private: diagnostic_s& impl_; diff --git a/parser_library/src/diagnostic.cpp b/parser_library/src/diagnostic.cpp index 15ec7f5cf..bbe946269 100644 --- a/parser_library/src/diagnostic.cpp +++ b/parser_library/src/diagnostic.cpp @@ -17,6 +17,7 @@ #include #include #include +#include namespace hlasm_plugin::parser_library { @@ -32,7 +33,7 @@ struct concat_helper size_t len(std::string_view t) const { return t.size(); } template - std::enable_if_t, size_t> len(T&&) const + std::enable_if_t, size_t> len(const T&) const { return 8; // arbitrary estimate for the length of the stringified argument (typically small numbers) } @@ -45,7 +46,7 @@ std::string concat(Args&&... args) concat_helper h; - result.reserve((... + h.len(std::forward(args)))); + result.reserve((... + h.len(std::as_const(args)))); (h(result, std::forward(args)), ...); @@ -2411,6 +2412,16 @@ diagnostic_op diagnostic_op::error_U006_duplicate_base_specified(const range& ra return diagnostic_op(diagnostic_severity::error, "U006", "Base registers must be distinct.", range); } +diagnostic_op diagnostic_op::mnote_diagnostic(unsigned level, std::string_view message, const range& range) +{ + const auto lvl = level >= 8 ? diagnostic_severity::error + : level >= 4 ? diagnostic_severity::warning + : level >= 2 ? diagnostic_severity::info + : diagnostic_severity::hint; + const auto tag = level >= 2 ? diagnostic_tag::none : diagnostic_tag::unnecessary; + return diagnostic_op(lvl, "MNOTE", std::string(message), range, tag); +} + diagnostic_s diagnostic_s::error_W0002(std::string_view ws_uri, std::string_view ws_name) { return diagnostic_s(std::string(ws_uri), @@ -2418,7 +2429,8 @@ diagnostic_s diagnostic_s::error_W0002(std::string_view ws_uri, std::string_view diagnostic_severity::error, "W0002", concat("The configuration file proc_grps for workspace ", ws_name, " is malformed."), - {}); + {}, + diagnostic_tag::none); } diagnostic_s diagnostic_s::error_W0003(std::string_view file_name, std::string_view ws_name) @@ -2428,7 +2440,8 @@ diagnostic_s diagnostic_s::error_W0003(std::string_view file_name, std::string_v diagnostic_severity::error, "W0003", concat("The configuration file pgm_conf for workspace ", ws_name, " is malformed."), - {}); + {}, + diagnostic_tag::none); } diagnostic_s diagnostic_s::error_W0004(std::string_view file_name, std::string_view ws_name) @@ -2440,7 +2453,8 @@ diagnostic_s diagnostic_s::error_W0004(std::string_view file_name, std::string_v concat("The configuration file pgm_conf for workspace ", ws_name, " refers to a processor group, that is not defined in proc_grps"), - {}); + {}, + diagnostic_tag::none); } diagnostic_s diagnostic_s::error_W0005(std::string_view file_name, std::string_view proc_group) @@ -2450,7 +2464,8 @@ diagnostic_s diagnostic_s::error_W0005(std::string_view file_name, std::string_v diagnostic_severity::warning, "W0005", concat("The processor group '", proc_group, "' from '", file_name, "' defines invalid assembler options."), - {}); + {}, + diagnostic_tag::none); } diagnostic_s diagnostic_s::error_W0006(std::string_view file_name, std::string_view proc_group) @@ -2460,7 +2475,8 @@ diagnostic_s diagnostic_s::error_W0006(std::string_view file_name, std::string_v diagnostic_severity::warning, "W0006", concat("The processor group '", proc_group, "' from '", file_name, "' defines invalid preprocessor options."), - {}); + {}, + diagnostic_tag::none); } diagnostic_s diagnostic_s::error_W0007(std::string_view file_name, std::string_view proc_group) @@ -2474,7 +2490,8 @@ diagnostic_s diagnostic_s::error_W0007(std::string_view file_name, std::string_v "' from '", file_name, "' refers to invalid OPTABLE value. Using value UNI as default."), - {}); + {}, + diagnostic_tag::none); } diagnostic_s diagnostic_s::error_L0001(std::string_view path) @@ -2497,7 +2514,8 @@ diagnostic_s diagnostic_s::warning_L0003(std::string_view path) concat("Macros from library '", path, "' were selected by a deprecated mechanism to specify file extensions (alwaysRecognize in pgm_conf.json)."), - {}); + {}, + diagnostic_tag::none); } diagnostic_s diagnostic_s::warning_L0004(std::string_view path, std::string_view macro_name) @@ -2507,7 +2525,8 @@ diagnostic_s diagnostic_s::warning_L0004(std::string_view path, std::string_view diagnostic_severity::warning, "L0004", concat("Library '", path, "' contains multiple definitions of the macro '", macro_name, "'."), - {}); + {}, + diagnostic_tag::none); } diagnostic_s diagnostic_s::warning_L0005(std::string_view pattern, size_t limit) @@ -2517,7 +2536,8 @@ diagnostic_s diagnostic_s::warning_L0005(std::string_view pattern, size_t limit) diagnostic_severity::warning, "L0005", concat("Limit of ", limit, " directories was reached while evaluating library pattern '", pattern, "'."), - {}); + {}, + diagnostic_tag::none); } diagnostic_op diagnostic_op::error_S100(std::string_view message, const range& range) diff --git a/parser_library/src/diagnostic.h b/parser_library/src/diagnostic.h index 74d0294d7..7504066fe 100644 --- a/parser_library/src/diagnostic.h +++ b/parser_library/src/diagnostic.h @@ -105,13 +105,20 @@ struct diagnostic_op std::string code; std::string message; range diag_range; + diagnostic_tag tag; + diagnostic_op() = default; - diagnostic_op(diagnostic_severity severity, std::string code, std::string message, range diag_range = {}) + diagnostic_op(diagnostic_severity severity, + std::string code, + std::string message, + range diag_range = {}, + diagnostic_tag tag = diagnostic_tag::none) : severity(severity) , code(std::move(code)) , message(std::move(message)) - , diag_range(std::move(diag_range)) {}; + , diag_range(std::move(diag_range)) + , tag(tag) {}; static diagnostic_op error_I999(std::string_view instr_name, const range& range); @@ -708,6 +715,8 @@ struct diagnostic_op int e_off); static diagnostic_op error_U006_duplicate_base_specified(const range& range); + + static diagnostic_op mnote_diagnostic(unsigned level, std::string_view message, const range& range); }; struct range_uri_s @@ -754,7 +763,8 @@ class diagnostic_s diagnostic_severity severity, std::string code, std::string message, - std::vector related) + std::vector related, + diagnostic_tag tag) : file_name(std::move(file_name)) , diag_range(range) , severity(severity) @@ -762,6 +772,7 @@ class diagnostic_s , source("HLASM Plugin") , message(std::move(message)) , related(std::move(related)) + , tag(tag) {} diagnostic_s(std::string file_name, diagnostic_op diag_op) : file_name(std::move(file_name)) @@ -770,6 +781,7 @@ class diagnostic_s , code(std::move(diag_op.code)) , source("HLASM Plugin") , message(std::move(diag_op.message)) + , tag(diag_op.tag) {} diagnostic_s(diagnostic_op diag_op) : diag_range(std::move(diag_op.diag_range)) @@ -777,6 +789,7 @@ class diagnostic_s , code(std::move(diag_op.code)) , source("HLASM Plugin") , message(std::move(diag_op.message)) + , tag(diag_op.tag) {} @@ -787,6 +800,7 @@ class diagnostic_s std::string source; std::string message; std::vector related; + diagnostic_tag tag = diagnostic_tag::none; /* Lxxxx - local library messages diff --git a/parser_library/src/lexing/input_source.cpp b/parser_library/src/lexing/input_source.cpp index 22117b40f..90200def8 100644 --- a/parser_library/src/lexing/input_source.cpp +++ b/parser_library/src/lexing/input_source.cpp @@ -17,6 +17,7 @@ #include #include "logical_line.h" +#include "utils/utf8text.h" namespace hlasm_plugin::parser_library::lexing { @@ -37,7 +38,7 @@ void append_utf8_to_utf32(UTF32String& t, std::string_view s) s.remove_prefix(1); continue; } - const auto cs = utf8_prefix_sizes[c]; + const auto cs = utils::utf8_prefix_sizes[c]; if (cs.utf8 && cs.utf8 <= s.size()) { uint32_t v = c & 0b0111'1111u >> cs.utf8; @@ -48,7 +49,7 @@ void append_utf8_to_utf32(UTF32String& t, std::string_view s) } else { - t.append(1, substitute_character); + t.append(1, utils::substitute_character); s.remove_prefix(1); } } diff --git a/parser_library/src/lexing/logical_line.cpp b/parser_library/src/lexing/logical_line.cpp index 110df585b..e7a7e2a36 100644 --- a/parser_library/src/lexing/logical_line.cpp +++ b/parser_library/src/lexing/logical_line.cpp @@ -14,6 +14,8 @@ #include "logical_line.h" +#include "utils/utf8text.h" + namespace hlasm_plugin::parser_library::lexing { std::pair extract_line(std::string_view& input) { @@ -64,12 +66,12 @@ std::pair substr_step(std::string_view& s, size_t& chars) continue; } - const auto cs = utf8_prefix_sizes[c]; + const auto cs = utils::utf8_prefix_sizes[c]; if constexpr (validate) { - if (!cs.utf8 || s.size() < cs.utf8) + if (cs.utf8 < 2 || s.size() < cs.utf8 || !utils::utf8_valid_multibyte_prefix(s[0], s[1])) throw hlasm_plugin::parser_library::lexing::utf8_error(); - for (const auto* p = s.data() + 1; p != s.data() + cs.utf8; ++p) + for (const auto* p = s.data() + 2; p != s.data() + cs.utf8; ++p) if ((*p & 0xc0) != 0x80) throw hlasm_plugin::parser_library::lexing::utf8_error(); } diff --git a/parser_library/src/lexing/logical_line.h b/parser_library/src/lexing/logical_line.h index 28588d00a..6f28e0093 100644 --- a/parser_library/src/lexing/logical_line.h +++ b/parser_library/src/lexing/logical_line.h @@ -15,7 +15,6 @@ #ifndef HLASMPLUGIN_HLASMPARSERLIBRARY_LOGICAL_LINE_H #define HLASMPLUGIN_HLASMPARSERLIBRARY_LOGICAL_LINE_H -#include #include #include #include @@ -24,30 +23,6 @@ namespace hlasm_plugin::parser_library::lexing { -// Length of Unicode character in 8/16-bit chunks -struct char_size -{ - uint8_t utf8 : 4; - uint8_t utf16 : 4; -}; - -// Map first byte of UTF-8 encoded Unicode character to char_size -constexpr const auto utf8_prefix_sizes = []() { - std::array sizes = {}; - static_assert(std::numeric_limits::max() < sizes.size()); - for (int i = 0b0000'0000; i <= 0b0111'1111; ++i) - sizes[i] = { 1, 1 }; - for (int i = 0b1100'0000; i <= 0b1101'1111; ++i) - sizes[i] = { 2, 1 }; - for (int i = 0b1110'0000; i <= 0b1110'1111; ++i) - sizes[i] = { 3, 1 }; - for (int i = 0b1111'0000; i <= 0b1111'0111; ++i) - sizes[i] = { 4, 2 }; - return sizes; -}(); - -constexpr const char substitute_character = 0x1a; - class utf8_error : public std::runtime_error { public: diff --git a/parser_library/src/processing/instruction_sets/asm_processor.cpp b/parser_library/src/processing/instruction_sets/asm_processor.cpp index b10adb053..7eeeb136c 100644 --- a/parser_library/src/processing/instruction_sets/asm_processor.cpp +++ b/parser_library/src/processing/instruction_sets/asm_processor.cpp @@ -14,6 +14,8 @@ #include "asm_processor.h" +#include + #include "checking/instr_operand.h" #include "context/literal_pool.h" #include "context/ordinary_assembly/ordinary_assembly_dependency_solver.h" @@ -22,7 +24,7 @@ #include "expressions/mach_expr_term.h" #include "expressions/mach_expr_visitor.h" #include "postponed_statement_impl.h" - +#include "utils/utf8text.h" namespace hlasm_plugin::parser_library::processing { @@ -49,6 +51,17 @@ std::optional try_get_abs_value(const semantics::operand* op, cont return std::nullopt; return try_get_abs_value(expr_op, dep_solver); } + +std::optional try_get_number(std::string_view s) +{ + int v = 0; + const char* b = s.data(); + const char* e = b + s.size(); + if (auto ec = std::from_chars(b, e, v); ec.ec == std::errc {} && ec.ptr == e) + return v; + return std::nullopt; +} + } // namespace void asm_processor::process_sect(const context::section_kind kind, rebuilt_statement stmt) @@ -716,6 +729,7 @@ asm_processor::process_table_t asm_processor::create_table(context::hlasm_contex table.emplace(h_ctx.ids().add("DROP"), [this](rebuilt_statement stmt) { process_DROP(std::move(stmt)); }); table.emplace(h_ctx.ids().add("PUSH"), [this](rebuilt_statement stmt) { process_PUSH(std::move(stmt)); }); table.emplace(h_ctx.ids().add("POP"), [this](rebuilt_statement stmt) { process_POP(std::move(stmt)); }); + table.emplace(h_ctx.ids().add("MNOTE"), [this](rebuilt_statement stmt) { process_MNOTE(std::move(stmt)); }); return table; } @@ -1150,4 +1164,98 @@ void asm_processor::process_POP(rebuilt_statement stmt) dep_solver.derive_current_dependency_evaluation_context()); } +void asm_processor::process_MNOTE(rebuilt_statement stmt) +{ + static constexpr std::string_view MNOTE = "MNOTE"; + const auto& ops = stmt.operands_ref().value; + + std::optional level; + size_t first_op_len = 0; + + find_sequence_symbol(stmt); + + switch (ops.size()) + { + case 1: + level = 0; + break; + case 2: + switch (ops[0]->type) + { + case semantics::operand_type::EMPTY: + level = 1; + break; + case semantics::operand_type::ASM: + if (auto expr = ops[0]->access_asm()->access_expr(); !expr) + { + // fail + } + else if (dynamic_cast(expr->expression.get())) + { + level = 0; + first_op_len = 1; + } + else + { + const auto& val = expr->get_value(); + first_op_len = val.size(); + level = try_get_number(val); + } + break; + + default: + break; + } + break; + default: + add_diagnostic(diagnostic_op::error_A012_from_to(MNOTE, 1, 2, stmt.operands_ref().field_range)); + return; + } + if (!level.has_value() || level.value() < 0 || level.value() > 255) + { + add_diagnostic(diagnostic_op::error_A119_MNOTE_first_op_format(ops[0]->operand_range)); + return; + } + + std::string_view text; + + const auto& r = ops.back()->operand_range; + if (ops.back()->type != semantics::operand_type::ASM) + { + add_diagnostic(diagnostic_op::warning_A300_op_apostrophes_missing(MNOTE, r)); + } + else + { + auto* string_op = ops.back()->access_asm(); + if (string_op->kind == semantics::asm_kind::STRING) + { + text = string_op->access_string()->value; + } + else + { + if (string_op->kind == semantics::asm_kind::EXPR) + { + text = string_op->access_expr()->get_value(); + } + add_diagnostic(diagnostic_op::warning_A300_op_apostrophes_missing(MNOTE, r)); + } + } + + if (text.size() > checking::MNOTE_max_message_length) + { + add_diagnostic(diagnostic_op::error_A117_MNOTE_message_size(r)); + text = text.substr(0, checking::MNOTE_max_message_length); + } + else if (text.size() + first_op_len > checking::MNOTE_max_operands_length) + { + add_diagnostic(diagnostic_op::error_A118_MNOTE_operands_size(r)); + } + + std::string sanitized; + sanitized.reserve(text.size()); + utils::append_utf8_sanitized(sanitized, text); + + add_diagnostic(diagnostic_op::mnote_diagnostic(level.value(), sanitized, r)); +} + } // namespace hlasm_plugin::parser_library::processing diff --git a/parser_library/src/processing/instruction_sets/asm_processor.h b/parser_library/src/processing/instruction_sets/asm_processor.h index 95fecf21b..f32e236a2 100644 --- a/parser_library/src/processing/instruction_sets/asm_processor.h +++ b/parser_library/src/processing/instruction_sets/asm_processor.h @@ -71,6 +71,7 @@ class asm_processor : public low_language_processor void process_DROP(rebuilt_statement stmt); void process_PUSH(rebuilt_statement stmt); void process_POP(rebuilt_statement stmt); + void process_MNOTE(rebuilt_statement stmt); template void process_data_instruction(rebuilt_statement stmt); diff --git a/parser_library/src/processing/statement_fields_parser.cpp b/parser_library/src/processing/statement_fields_parser.cpp index fd74194f0..e59b4a9f0 100644 --- a/parser_library/src/processing/statement_fields_parser.cpp +++ b/parser_library/src/processing/statement_fields_parser.cpp @@ -17,6 +17,7 @@ #include "hlasmparser.h" #include "lexing/token_stream.h" #include "parsing/error_strategy.h" +#include "utils/utf8text.h" namespace hlasm_plugin::parser_library::processing { @@ -52,39 +53,6 @@ const parsing::parser_holder& statement_fields_parser::prepare_parser(const std: return *m_parser; } -void append_sanitized(std::string& result, std::string_view str) -{ - auto it = str.begin(); - auto end = str.end(); - while (true) - { - auto first_complex = std::find_if(it, end, [](unsigned char c) { return c >= 0x80; }); - result.append(it, first_complex); - it = first_complex; - if (it == end) - break; - - unsigned char c = *it; - auto cs = lexing::utf8_prefix_sizes[c]; - if (cs.utf8 && (end - it) >= cs.utf8 - && std::all_of(it + 1, it + cs.utf8, [](unsigned char c) { return (c & 0xC0) == 0x80; })) - { - result.append(it, it + cs.utf8); - it += cs.utf8; - } - else - { - static const char hex_digits[] = "0123456789ABCDEF"; - result.append(1, '<'); - result.append(1, hex_digits[(c >> 4) & 0xf]); - result.append(1, hex_digits[(c >> 0) & 0xf]); - result.append(1, '>'); - - ++it; - } - } -} - std::string decorate_message(const std::string& field, const std::string& message) { static const std::string_view prefix = "While evaluating the result of substitution '"; @@ -93,7 +61,7 @@ std::string decorate_message(const std::string& field, const std::string& messag result.reserve(prefix.size() + field.size() + arrow.size() + message.size()); result.append(prefix); - append_sanitized(result, field); + utils::append_utf8_sanitized(result, field); result.append(arrow); result.append(message); diff --git a/parser_library/src/protocol.cpp b/parser_library/src/protocol.cpp index 24c2cdc57..6155331a6 100644 --- a/parser_library/src/protocol.cpp +++ b/parser_library/src/protocol.cpp @@ -110,6 +110,8 @@ const diagnostic_related_info diagnostic::related_info(size_t index) const { ret size_t diagnostic::related_info_size() const { return impl_.related.size(); } +diagnostic_tag diagnostic::tags() const { return impl_.tag; } + //********************* diagnostics_container ******************* class diagnostic_list_impl diff --git a/parser_library/test/checking/asm_instr_diag_test.cpp b/parser_library/test/checking/asm_instr_diag_test.cpp index 33a33fe2a..896537d3f 100644 --- a/parser_library/test/checking/asm_instr_diag_test.cpp +++ b/parser_library/test/checking/asm_instr_diag_test.cpp @@ -830,9 +830,8 @@ TEST(diagnostics, mnote_first_op_format) analyzer a(input); a.analyze(); a.collect_diags(); - ASSERT_EQ(a.parser().getNumberOfSyntaxErrors(), (size_t)0); - ASSERT_EQ(a.diags().size(), (size_t)1); - ASSERT_EQ(a.diags().at(0).code, "A117"); + EXPECT_EQ(a.parser().getNumberOfSyntaxErrors(), (size_t)0); + EXPECT_TRUE(matches_message_codes(a.diags(), { "A300", "MNOTE" })); } TEST(diagnostics, mnote_long_message) @@ -863,8 +862,7 @@ TEST(diagnostics, mnote_long_message) a.analyze(); a.collect_diags(); ASSERT_EQ(a.parser().getNumberOfSyntaxErrors(), (size_t)0); - ASSERT_EQ(a.diags().size(), (size_t)1); - ASSERT_EQ(a.diags().at(0).code, "A117"); + EXPECT_TRUE(matches_message_codes(a.diags(), { "A117", "MNOTE" })); } TEST(diagnostics, iseq_number_of_operands) @@ -992,3 +990,162 @@ TEST(diagnostics, org_incorrect_first_op) ASSERT_EQ(a.diags().size(), (size_t)1); ASSERT_EQ(a.diags().at(0).code, "A245"); } + +struct mnote_test +{ + int code; + std::string text; + diagnostic_severity expected; +}; + +class mnote_fixture : public ::testing::TestWithParam +{}; + +INSTANTIATE_TEST_SUITE_P(mnote, + mnote_fixture, + ::testing::Values(mnote_test { -2, "test", diagnostic_severity::hint }, + mnote_test { -1, "test", diagnostic_severity::hint }, + mnote_test { 0, "test", diagnostic_severity::hint }, + mnote_test { 1, "test", diagnostic_severity::hint }, + mnote_test { 2, "test", diagnostic_severity::info }, + mnote_test { 3, "test", diagnostic_severity::info }, + mnote_test { 4, "test", diagnostic_severity::warning }, + mnote_test { 5, "test", diagnostic_severity::warning }, + mnote_test { 6, "test", diagnostic_severity::warning }, + mnote_test { 7, "test", diagnostic_severity::warning }, + mnote_test { 8, "test", diagnostic_severity::error }, + mnote_test { 20, "test", diagnostic_severity::error }, + mnote_test { 150, "test", diagnostic_severity::error }, + mnote_test { 255, "test", diagnostic_severity::error })); + +TEST_P(mnote_fixture, diagnostic_severity) +{ + const auto& [code, text, expected] = GetParam(); + std::string input = " MNOTE " + + (code == -2 ? "" + : code == -1 ? "*," + : std::to_string(code) + ",") + + "'" + text + "'"; + + analyzer a(input); + a.analyze(); + a.collect_diags(); + + ASSERT_EQ(a.diags().size(), (size_t)1); + + const auto& d = a.diags()[0]; + EXPECT_EQ(d.code, "MNOTE"); + EXPECT_EQ(d.message, text); + EXPECT_EQ(d.severity, expected); +} + +TEST(mnote, substitution_first) +{ + std::string input = R"( +&L SETA 4 + MNOTE &L,'test message' +)"; + + analyzer a(input); + a.analyze(); + a.collect_diags(); + + ASSERT_EQ(a.diags().size(), (size_t)1); + + const auto& d = a.diags()[0]; + EXPECT_EQ(d.code, "MNOTE"); + EXPECT_EQ(d.message, "test message"); + EXPECT_EQ(d.severity, diagnostic_severity::warning); +} + +TEST(mnote, substitution_both) +{ + std::string input = R"( +&L SETA 8 +&M SETC 'test message' + MNOTE &L,'&M' +)"; + + analyzer a(input); + a.analyze(); + a.collect_diags(); + + ASSERT_EQ(a.diags().size(), (size_t)1); + + const auto& d = a.diags()[0]; + EXPECT_EQ(d.code, "MNOTE"); + EXPECT_EQ(d.message, "test message"); + EXPECT_EQ(d.severity, diagnostic_severity::error); +} + +TEST(mnote, empty_first_arg) +{ + std::string input = R"( + MNOTE ,'test message' +)"; + + analyzer a(input); + a.analyze(); + a.collect_diags(); + + ASSERT_EQ(a.diags().size(), (size_t)1); + + const auto& d = a.diags()[0]; + EXPECT_EQ(d.code, "MNOTE"); + EXPECT_EQ(d.message, "test message"); + EXPECT_EQ(d.severity, diagnostic_severity::hint); +} + +TEST(mnote, three_args) +{ + std::string input = R"( + MNOTE ,'test message', +)"; + + analyzer a(input); + a.analyze(); + a.collect_diags(); + + EXPECT_TRUE(matches_message_codes(a.diags(), { "A012" })); +} + +TEST(mnote, emtpy_second_arg) +{ + std::string input = R"( + MNOTE 0, +)"; + + analyzer a(input); + a.analyze(); + a.collect_diags(); + + EXPECT_TRUE(matches_message_codes(a.diags(), { "MNOTE", "A300" })); +} + +TEST(mnote, missing_quotes) +{ + std::string input = R"( + MNOTE 0,test +)"; + + analyzer a(input); + a.analyze(); + a.collect_diags(); + + EXPECT_TRUE(matches_message_codes(a.diags(), { "MNOTE", "A300" })); +} + +TEST(mnote, nonprintable_characters) +{ + std::string input = R"( +&C SETC X2C('0101') + MNOTE 0,'&C' +)"; + + analyzer a(input); + a.analyze(); + a.collect_diags(); + + ASSERT_TRUE(matches_message_codes(a.diags(), { "MNOTE" })); + EXPECT_EQ(a.diags()[0].message, "<01><01>"); +} diff --git a/parser_library/test/context/macro_test.cpp b/parser_library/test/context/macro_test.cpp index 320aadb9b..eb9661d8c 100644 --- a/parser_library/test/context/macro_test.cpp +++ b/parser_library/test/context/macro_test.cpp @@ -735,8 +735,8 @@ TEST(macro, apostrophe_in_substitution) a.analyze(); a.collect_diags(); - EXPECT_EQ(a.diags().size(), (size_t)0); EXPECT_EQ(a.parser().getNumberOfSyntaxErrors(), (size_t)0); + EXPECT_TRUE(matches_message_codes(a.diags(), { "MNOTE" })); } TEST(macro, macro_call_reparse_range) diff --git a/parser_library/test/diagnostics_check_test.cpp b/parser_library/test/diagnostics_check_test.cpp index b66029bbf..3d26b4da4 100644 --- a/parser_library/test/diagnostics_check_test.cpp +++ b/parser_library/test/diagnostics_check_test.cpp @@ -294,7 +294,7 @@ label1 RSECT ASSERT_EQ(a.parser().getNumberOfSyntaxErrors(), (size_t)0); - ASSERT_EQ(a.diags().size(), (size_t)0); + EXPECT_TRUE(matches_message_codes(a.diags(), { "MNOTE" })); } TEST(diagnostics, parser_diagnostics_passing) diff --git a/parser_library/test/lexing/utf8_test.cpp b/parser_library/test/lexing/utf8_test.cpp index 69a3ced5c..513f706d3 100644 --- a/parser_library/test/lexing/utf8_test.cpp +++ b/parser_library/test/lexing/utf8_test.cpp @@ -18,6 +18,7 @@ #include "gtest/gtest.h" #include "lexing/logical_line.h" +#include "utils/utf8text.h" using namespace hlasm_plugin::parser_library::lexing; @@ -55,3 +56,23 @@ TEST(utf8, substr_with_validate) EXPECT_THROW(utf8_substr(str, off, len), utf8_error) << str << ":" << off << ":" << len; } } + +TEST(utf8, multibyte_validation) +{ + using namespace hlasm_plugin::utils; + for (const auto [f, s, e] : std::initializer_list> { + { 0, 0, false }, + { 0x7f, 0, false }, + { 0xa0, 0x80, false }, + { 0xc0, 0, false }, + { 0xc0, 0x80, false }, + { 0xc0, 0x90, false }, + { 0xc2, 0x80, true }, + { 0xed, 0xa0, false }, + { 0xed, 0xbf, false }, + { 0xf4, 0x8f, true }, + { 0xf4, 0x90, false }, + { 0xff, 0xff, false }, + }) + EXPECT_EQ(utf8_valid_multibyte_prefix(f, s), e); +} diff --git a/utils/include/utils/CMakeLists.txt b/utils/include/utils/CMakeLists.txt index 0b384c4ef..d98ee07d6 100644 --- a/utils/include/utils/CMakeLists.txt +++ b/utils/include/utils/CMakeLists.txt @@ -15,4 +15,5 @@ target_sources(hlasm_utils PUBLIC path.h platform.h similar.h + utf8text.h ) diff --git a/utils/include/utils/utf8text.h b/utils/include/utils/utf8text.h new file mode 100644 index 000000000..18db36da6 --- /dev/null +++ b/utils/include/utils/utf8text.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2022 Broadcom. + * The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries. + * + * This program and the accompanying materials are made + * available under the terms of the Eclipse Public License 2.0 + * which is available at https://www.eclipse.org/legal/epl-2.0/ + * + * SPDX-License-Identifier: EPL-2.0 + * + * Contributors: + * Broadcom, Inc. - initial API and implementation + */ + +#ifndef HLASMPLUGIN_UTILS_UTF8TEXT_H +#define HLASMPLUGIN_UTILS_UTF8TEXT_H + +#include +#include +#include +#include + +namespace hlasm_plugin::utils { + +// Length of Unicode character in 8/16-bit chunks +struct char_size +{ + uint8_t utf8 : 4; + uint8_t utf16 : 4; +}; + +// Map first byte of UTF-8 encoded Unicode character to char_size +extern constinit const std::array utf8_prefix_sizes; + +constexpr const char substitute_character = 0x1a; + +extern constinit const std::array utf8_valid_multibyte_prefix_table; + +inline bool utf8_valid_multibyte_prefix(unsigned char first, unsigned char second) +{ + if (first < 0xc0) + return false; + unsigned bitid = (first - 0xC0) << 4 | second >> 4; + return utf8_valid_multibyte_prefix_table[bitid / 8] & (0x80 >> bitid % 8); +} + +void append_utf8_sanitized(std::string& result, std::string_view str); +} // namespace hlasm_plugin::utils + +#endif \ No newline at end of file diff --git a/utils/src/CMakeLists.txt b/utils/src/CMakeLists.txt index 5850b325d..7adcc7987 100644 --- a/utils/src/CMakeLists.txt +++ b/utils/src/CMakeLists.txt @@ -12,6 +12,7 @@ target_sources(hlasm_utils PRIVATE platform.cpp + utf8text.cpp ) if(EMSCRIPTEN) diff --git a/utils/src/utf8text.cpp b/utils/src/utf8text.cpp new file mode 100644 index 000000000..f6f6d7e4c --- /dev/null +++ b/utils/src/utf8text.cpp @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2022 Broadcom. + * The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries. + * + * This program and the accompanying materials are made + * available under the terms of the Eclipse Public License 2.0 + * which is available at https://www.eclipse.org/legal/epl-2.0/ + * + * SPDX-License-Identifier: EPL-2.0 + * + * Contributors: + * Broadcom, Inc. - initial API and implementation + */ + +#include "utils/utf8text.h" + +#include + +namespace hlasm_plugin::utils { +constinit const std::array utf8_prefix_sizes = []() { + std::array sizes = {}; + static_assert(std::numeric_limits::max() < sizes.size()); + for (int i = 0b0000'0000; i <= 0b0111'1111; ++i) + sizes[i] = { 1, 1 }; + for (int i = 0b1100'0000; i <= 0b1101'1111; ++i) + sizes[i] = { 2, 1 }; + for (int i = 0b1110'0000; i <= 0b1110'1111; ++i) + sizes[i] = { 3, 1 }; + for (int i = 0b1111'0000; i <= 0b1111'0111; ++i) + sizes[i] = { 4, 2 }; + return sizes; +}(); + +constinit const std::array utf8_valid_multibyte_prefix_table = []() { + std::array result {}; + const auto update = [&result](unsigned char f, unsigned char s) { + int bitid = (f - 0xC0) << 4 | s >> 4; + result[bitid / 8] |= (0x80 >> bitid % 8); + }; + const auto update_range = [update](unsigned char fl, unsigned char fh, unsigned char sl, unsigned char sh) { + for (unsigned char f = fl; f <= fh; ++f) + for (unsigned char s = sl; s <= sh; ++s) + update(f, s); + }; + + update_range(0xc2, 0xdf, 0x80, 0xbf); + update_range(0xe0, 0xe0, 0xa0, 0xbf); + update_range(0xe1, 0xec, 0x80, 0xbf); + update_range(0xed, 0xed, 0x80, 0x9f); + update_range(0xee, 0xef, 0x80, 0xbf); + update_range(0xf0, 0xf0, 0x90, 0xbf); + update_range(0xf1, 0xf3, 0x80, 0xbf); + update_range(0xf4, 0xf4, 0x80, 0x8f); + + return result; +}(); + +void append_utf8_sanitized(std::string& result, std::string_view str) +{ + auto it = str.begin(); + auto end = str.end(); + while (true) + { + // handle ascii printable characters + auto first_complex = std::find_if(it, end, [](unsigned char c) { return c < 0x20 || c >= 0x7f; }); + result.append(it, first_complex); + it = first_complex; + if (it == end) + break; + + + unsigned char c = *it; + auto cs = utf8_prefix_sizes[c]; + if (cs.utf8 > 1 && (end - it) >= cs.utf8 && utf8_valid_multibyte_prefix(c, *std::next(it)) + && std::all_of(it + 2, it + cs.utf8, [](unsigned char c) { return (c & 0xC0) == 0x80; })) + { + char32_t combined = c & ~(0xffu << (8 - cs.utf8)); + for (auto p = it + 1; p != it + cs.utf8; ++p) + combined = combined << 6 | *p & 0x3fu; + + if (combined < 0x8d + || combined > 0x9f && (0xfffe & combined) != 0xfffe && (combined < 0xfdd0 || combined > 0xfdef)) + { + result.append(it, it + cs.utf8); + it += cs.utf8; + continue; + } + } + + static constexpr char hex_digits[] = "0123456789ABCDEF"; + + // 0x00-0x1F, 0x7F, 0x8D-0x9F, not characters and invalid sequences + result.push_back('<'); + result.push_back(hex_digits[(c >> 4) & 0xf]); + result.push_back(hex_digits[(c >> 0) & 0xf]); + result.push_back('>'); + + ++it; + } +} +} // namespace hlasm_plugin::utils