diff --git a/help/pages/bt-types.tt b/help/pages/bt-types.tt index 112adc14..f0048b27 100644 --- a/help/pages/bt-types.tt +++ b/help/pages/bt-types.tt @@ -99,6 +99,54 @@ func1(s); func2(t); [%- END %] +

Character Sets

+ +

+You can specify the character set of a string in a file using the following syntax: +

+ +[% WRAPPER "code.tt" -%] +[% WRAPPER "code-type.tt" %]char[% END %] field[32] <charset = "UTF-8">; +[%- END %] + +

+Setting the character set of a field will set the appropriate data type on the byte range in the file, so that the text is correctly displayed in the hex view.
+
+The charset attribute can only be specified on [% WRAPPER "inline-type.tt" %]char[][% END %] variables - "wide" strings should be declared as a [% WRAPPER "inline-type.tt" %]char[][% END %] to take advantage of the character set handling. +

+ +

+The following character sets are currently supported: +

+ + +

Structures

diff --git a/plugins/binary-template/executor.lua b/plugins/binary-template/executor.lua index e0f974c4..69f4e5b0 100644 --- a/plugins/binary-template/executor.lua +++ b/plugins/binary-template/executor.lua @@ -1466,7 +1466,7 @@ expand_value = function(context, type_info, struct_arg_values, array_element_idx end end -local function _decl_variable(context, statement, var_type, var_name, struct_arg_values, array_size, initial_value, is_local) +local function _decl_variable(context, statement, var_type, var_name, struct_arg_values, array_size, attributes, initial_value, is_local) local filename = statement[1] local line_num = statement[2] @@ -1566,6 +1566,59 @@ local function _decl_variable(context, statement, var_type, var_name, struct_arg type_info = _make_overlay_type(type_info, { big_endian = false, rehex_type = type_info.rehex_type_le }) end + -- Variable attributes (so far) are only used for defining encoding on character arrays, so + -- we check for that attribute in this lovely kludge here. + + local array_type_info = array_size ~= nil + and _make_aray_type(type_info) + or type_info + + local string_charset + + if attributes ~= nil + then + for i = 1, #attributes + do + local attr_name = attributes[i][1] + local attr_value_type = attributes[i][2] and attributes[i][2][1] + local attr_value = attributes[i][2] and attributes[i][2][2] + + if attr_name == "charset" and _type_is_char_array(array_type_info) + then + if string_charset ~= nil + then + _template_error(context, "Attribute 'charset' specified multiple times") + end + + if not _type_is_stringish(attr_value_type) + then + _template_error(context, "Unexpected type '" .. _get_type_name(attr_value_type) .. "' used as value for 'charset' attribute (expected string)") + end + + local charset_name = _stringify_value(attr_value_type, attr_value) + local charset_valid = false + + for j = 1, #context.valid_charsets + do + if context.valid_charsets[j] == charset_name + then + charset_valid = true + break + end + end + + if not charset_valid + then + _template_error(context, "Unrecognised character set '" .. charset_name .. "' specified") + end + + string_charset = charset_name + else + _template_error(context, "Invalid variable attribute '" .. attr_name .. "' used with type '" .. _get_type_name(array_type_info) .. "'") + end + end + end + local root_value if array_size == nil @@ -1585,12 +1638,11 @@ local function _decl_variable(context, statement, var_type, var_name, struct_arg _template_error(context, "Expected numeric type for array size, got '" .. _get_type_name(ArrayLength_type) .. "'") end - local array_type_info = _make_aray_type(type_info) - if type_info.base ~= "struct" and not context.declaring_local_var then local data_type_fmt = (context.big_endian and ">" or "<") .. type_info.string_fmt root_value = FileArrayValue:new(context, context.next_variable, ArrayLength_val:get(), type_info.length, data_type_fmt) + root_value.charset = string_charset context.next_variable = context.next_variable + (ArrayLength_val:get() * type_info.length) @@ -1640,6 +1692,7 @@ _eval_variable = function(context, statement) local var_name = statement[5] local struct_args = statement[6] local array_size = statement[7] + local attributes = statement[8] local struct_arg_values = nil if struct_args ~= nil @@ -1652,7 +1705,26 @@ _eval_variable = function(context, statement) end end - _decl_variable(context, statement, var_type, var_name, struct_arg_values, array_size, nil, false) + local attributes_evaluated = nil + if attributes ~= nil + then + attributes_evaluated = {} + + for i = 1, #attributes + do + local attr_name = attributes[i][3] + local attr_value = attributes[i][4] + + if attr_value ~= nil + then + attr_value = { _eval_statement(context, attr_value) } + end + + attributes_evaluated[i] = { attr_name, attr_value } + end + end + + _decl_variable(context, statement, var_type, var_name, struct_arg_values, array_size, attributes_evaluated, nil, false) end _eval_local_variable = function(context, statement) @@ -1676,7 +1748,7 @@ _eval_local_variable = function(context, statement) local was_declaring_local_var = context.declaring_local_var context.declaring_local_var = true - _decl_variable(context, statement, var_type, var_name, struct_arg_values, array_size, initial_value, true) + _decl_variable(context, statement, var_type, var_name, struct_arg_values, array_size, nil, initial_value, true) context.declaring_local_var = was_declaring_local_var end @@ -1990,7 +2062,7 @@ _eval_struct_defn = function(context, statement) local var_args = var_decl[2] local array_size = var_decl[3] - _decl_variable(context, statement, type_info, var_name, var_args, array_size, nil, false) + _decl_variable(context, statement, type_info, var_name, var_args, array_size, nil, nil, false) end end @@ -2105,7 +2177,7 @@ _eval_enum = function(context, statement) local var_name = var_decl[1] local array_size = var_decl[3] - _decl_variable(context, statement, type_info, var_name, nil, array_size, nil, false) + _decl_variable(context, statement, type_info, var_name, nil, array_size, nil, nil, false) end end @@ -2546,6 +2618,8 @@ local function execute(interface, statements) st_stack = {}, template_error = _template_error, + + valid_charsets = interface.get_valid_charsets(), } for k, v in pairs(_builtin_functions) @@ -2635,7 +2709,14 @@ local function execute(interface, statements) -- for the range, else it would be displayed as a list of integers rather than a -- contiguous byte sequence. - if not (type_info.is_array and (type_info.type_key == _builtin_types.char.type_key or type_info.type_key == _builtin_types.uint8_t.type_key)) + if value.charset ~= nil + then + local data_start, data_end = value:data_range() + if data_start ~= nil + then + context.interface.set_data_type(data_start, (data_end - data_start), "text:" .. value.charset) + end + elseif not (type_info.is_array and (type_info.type_key == _builtin_types.char.type_key or type_info.type_key == _builtin_types.uint8_t.type_key)) then local data_start, data_end = value:data_range() if data_start ~= nil diff --git a/plugins/binary-template/executor_spec.lua b/plugins/binary-template/executor_spec.lua index 4af227b6..f2ffbb0d 100644 --- a/plugins/binary-template/executor_spec.lua +++ b/plugins/binary-template/executor_spec.lua @@ -50,6 +50,14 @@ local function test_interface(data) file_length = function() return data:len() end, + + get_valid_charsets = function() + return { + "ASCII", + "ISO-8859-1", + "ISO-8859-2", + } + end, } return interface, log @@ -6834,4 +6842,171 @@ describe("executor", function() }) end, "Attempt to use undefined variable 'localvar' at test.bt:2") end) + + it("allows setting character set on a char array", function() + local interface, log = test_interface(string.char( + 0xD2, 0x04, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 + )) + + executor.execute(interface, { + { "test.bt", 1, "variable", "char", "s", nil, + -- array length + { "test.bt", 1, "num", 8 }, + + -- attributes + { + { "test.bt", 1, "charset", { "test.bt", 1, "str", "ASCII" } }, + } }, + + { "test.bt", 2, "function", "string", "charsetfunc", {}, + { + { "test.bt", 2, "return", + { "test.bt", 2, "str", "ISO-8859-1" } }, + } }, + + { "test.bt", 3, "variable", "char", "t", nil, + -- array length + { "test.bt", 3, "num", 8 }, + + -- attributes + { + { "test.bt", 3, "charset", { "test.bt", 3, "call", "charsetfunc", {} } }, + } }, + }) + + local expect_log = { + "set_comment(0, 8, s)", + "set_data_type(0, 8, text:ASCII)", + "set_comment(8, 8, t)", + "set_data_type(8, 8, text:ISO-8859-1)", + } + + assert.are.same(expect_log, log) + end) + + it("doesn't allow setting character set on non-char[] types", function() + local interface, log = test_interface(string.char( + 0xD2, 0x04, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 + )) + + assert.has_error(function() + executor.execute(interface, { + { "test.bt", 1, "variable", "char", "s", nil, nil, + + -- attributes + { + { "test.bt", 1, "charset", { "test.bt", 1, "str", "ASCII" } }, + } }, + }) + end, "Invalid variable attribute 'charset' used with type 'char' at test.bt:1") + + assert.has_error(function() + executor.execute(interface, { + { "test.bt", 1, "variable", "unsigned char", "s", nil, + + -- array length + { "test.bt", 1, "num", 8 }, + + -- attributes + { + { "test.bt", 1, "charset", { "test.bt", 1, "str", "ASCII" } }, + } }, + }) + end, "Invalid variable attribute 'charset' used with type 'unsigned char[]' at test.bt:1") + end) + + it("doesn't allow specifying character set multiple times", function() + local interface, log = test_interface(string.char( + 0xD2, 0x04, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 + )) + + assert.has_error(function() + executor.execute(interface, { + { "test.bt", 1, "variable", "char", "s", nil, + + -- array length + { "test.bt", 1, "num", 8 }, + + -- attributes + { + { "test.bt", 1, "charset", { "test.bt", 1, "str", "ASCII" } }, + { "test.bt", 1, "charset", { "test.bt", 1, "str", "ASCII" } }, + } }, + }) + end, "Attribute 'charset' specified multiple times at test.bt:1") + end) + + it("doesn't allow specifying character set as non-string types", function() + local interface, log = test_interface(string.char( + 0xD2, 0x04, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 + )) + + assert.has_error(function() + executor.execute(interface, { + { "test.bt", 1, "variable", "char", "s", nil, + + -- array length + { "test.bt", 1, "num", 8 }, + + -- attributes + { + { "test.bt", 1, "charset", { "test.bt", 1, "num", 1 } }, + } }, + }) + end, "Unexpected type 'const int' used as value for 'charset' attribute (expected string) at test.bt:1") + + assert.has_error(function() + executor.execute(interface, { + { "test.bt", 1, "variable", "char", "s", nil, + + -- array length + { "test.bt", 1, "num", 8 }, + + -- attributes + { + { "test.bt", 1, "charset" }, -- void / no value + } }, + }) + end, "Unexpected type 'void' used as value for 'charset' attribute (expected string) at test.bt:1") + end) + + it("doesn't allow specifying an unknown character set", function() + local interface, log = test_interface(string.char( + 0xD2, 0x04, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 + )) + + assert.has_error(function() + executor.execute(interface, { + { "test.bt", 1, "variable", "char", "s", nil, + + -- array length + { "test.bt", 1, "num", 8 }, + + -- attributes + { + { "test.bt", 1, "charset", { "test.bt", 1, "str", "UTF-64" } }, + } }, + }) + end, "Unrecognised character set 'UTF-64' specified at test.bt:1") + end) end) diff --git a/plugins/binary-template/parser.lua b/plugins/binary-template/parser.lua index a38d285b..5bf57a72 100644 --- a/plugins/binary-template/parser.lua +++ b/plugins/binary-template/parser.lua @@ -410,15 +410,21 @@ local _parser = spc * P{ * (V("BRACE_BLOCK_CONTINUE") * (V("STMT") * spc + _PARSE_ERROR())) ^ 0 * V("BRACE_BLOCK_CLOSE"), - EXPR = + -- An expression will slurp up as many valid tokens as it finds rather than only consuming + -- valid expressions, which is a problem when the expression terminator is itself a valid + -- token in an expression, e.g. variable attributes are terminated by ">", so we have the + -- _IN_VAR_ATTR variants which won't slurp those characters (unless inside parentheses) and + -- also form the base of the main EXPR match. + + EXPR_IN_VAR_ATTR = Ct( P(_capture_position) * Cc("_expr") * Ct( ( - Ct( P(_capture_position) * Cc("cast") * P("(") * spc * P(_capture_type) * P(")") * V("EXPR") ) + - V("EXPR2") + Ct( P(_capture_position) * Cc("cast") * P("(") * spc * P(_capture_type) * P(")") * V("EXPR_IN_VAR_ATTR") ) + + V("EXPR2_IN_VAR_ATTR") ) ^ 1 ) ), - EXPR2 = + EXPR2_IN_VAR_ATTR = P("(") * spc * V("EXPR") * P(")") * spc + Ct( P(_capture_position) * Cc("_ternary_t") * P("?") * spc * V("EXPR") * P(":") * spc) + Ct( P(_capture_position) * Cc("call") * name * Ct( S("(") * spc * (V("EXPR") * (comma * V("EXPR")) ^ 0) ^ -1 * S(")") ) * spc ) + @@ -426,11 +432,28 @@ local _parser = spc * P{ Ct( P(_capture_position) * Cc("postfix-decrement") * Ct( V("VALUE") ) * P("--") * spc) + Ct( V("VALUE") ) + Ct( P(_capture_position) * Cc("_token") * - C( P("<=") + P(">=") + P("==") + P("!=") + P("&&") + P("||") + P("+=") + P("-=") + P("*=") + P("/=") + P("%=") + P("<<=") + P(">>=") + P("&=") + P("^=") + P("|=") + P("<<") + P(">>") + P("++") + P("--") + S("!~*/%+-<>&^|=") ) * spc), + C( P("<=") + P(">=") + P("==") + P("!=") + P("&&") + P("||") + P("+=") + P("-=") + P("*=") + P("/=") + P("%=") + P("<<=") + P(">>=") + P("&=") + P("^=") + P("|=") + P("<<") + P(">>") + P("++") + P("--") + S("!~*/%+-&^|=") ) * spc), + + EXPR = + Ct( P(_capture_position) * Cc("_expr") * Ct( + ( + Ct( P(_capture_position) * Cc("cast") * P("(") * spc * P(_capture_type) * P(")") * V("EXPR") ) + + V("EXPR2") + ) ^ 1 + ) ), + + EXPR2 = + V("EXPR2_IN_VAR_ATTR") + + Ct( P(_capture_position) * Cc("_token") * + C( S("<>") ) * spc), EXPR_OR_NIL = V("EXPR") + Cc(nil) * spc, ZERO_OR_MORE_EXPRS = (V("EXPR") * (comma * V("EXPR")) ^ 0) ^ -1, + VAR_ATTR = Ct( + P(_capture_position) * name * + (P("=") * spc * V("EXPR_IN_VAR_ATTR") + Cc(nil))), + -- { -- "file.bt", , -- "variable", @@ -438,12 +461,15 @@ local _parser = spc * P{ -- , -- { } OR nil, -- OR nil, + -- { { "file.bt", , , OR nil }, ... } OR nil, -- } VAR_DEFN = Ct( P(_capture_position) * Cc("variable") * P(_capture_type) * name * (P("(") * spc * Ct( V("ZERO_OR_MORE_EXPRS") ) * P(")") * spc + Cc(nil)) * - (P("[") * spc * V("EXPR") * P("]") * spc + Cc(nil)) * P(";") * spc ), + (P("[") * spc * V("EXPR") * P("]") * spc + Cc(nil)) * + (P("<") * spc * Ct( V("VAR_ATTR") * (comma * V("VAR_ATTR")) ^ 0 ) * P(">") * spc) ^ -1 * + P(";") * spc ), -- { -- "file.bt", , @@ -945,6 +971,7 @@ local function _compile_statement(s) then local arguments = s[6] local array_size = s[7] + local attributes = s[8] if arguments ~= nil then @@ -955,6 +982,19 @@ local function _compile_statement(s) end if array_size then _compile_expr(array_size) end + + if attributes ~= nil + then + for i = 1, #attributes + do + _resolve_pos(attributes[i]) + + if attributes[i][4] ~= nil + then + _compile_expr(attributes[i][4]) + end + end + end elseif op == "typedef" then local array_size = s[6] diff --git a/plugins/binary-template/parser_spec.lua b/plugins/binary-template/parser_spec.lua index 036b966b..9e817f8b 100644 --- a/plugins/binary-template/parser_spec.lua +++ b/plugins/binary-template/parser_spec.lua @@ -1939,4 +1939,44 @@ describe("parser", function() assert.are.same(expect, got) end) + + it("parses variable definitions with attributes", function() + local got + local expect + + got = parser.parse_text("int var_a < param_b>;") + expect = { + { "UNKNOWN FILE", 1, "variable", "int", "var_a", nil, nil, + -- attributes + { + { "UNKNOWN FILE", 1, "param_b", nil }, + } }, + } + + assert.are.same(expect, got) + + got = parser.parse_text("int var_a ;") + expect = { + { "UNKNOWN FILE", 1, "variable", "int", "var_a", nil, nil, + -- attributes + { + { "UNKNOWN FILE", 1, "param_c", { "UNKNOWN FILE", 1, "num", 1 } } + } }, + } + + assert.are.same(expect, got) + + got = parser.parse_text("struct foo bar(1, 2, 3)[100] no)> ;") + expect = { + { "UNKNOWN FILE", 1, "variable", "struct foo", "bar", { { "UNKNOWN FILE", 1, "num", 1 }, { "UNKNOWN FILE", 1, "num", 2 }, { "UNKNOWN FILE", 1, "num", 3 } }, { "UNKNOWN FILE", 1, "num", 100 }, + -- attributes + { + { "UNKNOWN FILE", 1, "align", { "UNKNOWN FILE", 1, "num", 90 } }, + { "UNKNOWN FILE", 1, "hello", { "UNKNOWN FILE", 1, "str", "world" } }, + { "UNKNOWN FILE", 1, "yes", { "UNKNOWN FILE", 1, "greater-than", { "UNKNOWN FILE", 1, "ref", { "yes" } }, { "UNKNOWN FILE", 1, "ref", { "no" } } } }, + } }, + } + + assert.are.same(expect, got) + end); end); diff --git a/plugins/binary-template/plugin.lua b/plugins/binary-template/plugin.lua index 63e95b4c..cd3d8596 100644 --- a/plugins/binary-template/plugin.lua +++ b/plugins/binary-template/plugin.lua @@ -220,6 +220,18 @@ rehex.AddToToolsMenu("Execute binary template / script...", function(window) error("Template execution aborted", 0) end end, + + get_valid_charsets = function() + local all_encodings = rehex.CharacterEncoding.all_encodings() + local valid_charsets = {} + + for i = 1, #all_encodings + do + table.insert(valid_charsets, all_encodings[i].key) + end + + return valid_charsets + end, } doc:transact_begin("Binary template") diff --git a/src/CharacterEncoding.luadoc b/src/CharacterEncoding.luadoc new file mode 100644 index 00000000..b10ca466 --- /dev/null +++ b/src/CharacterEncoding.luadoc @@ -0,0 +1,40 @@ +--- +-- A text encoding (character set). +-- @classmod rehex.CharacterEncoding + +--- The fixed identifier for the encoding (read only). +rehex.CharacterEncoding.key = nil + +--- The display name for the encoding (read only). +rehex.CharacterEncoding.label = nil + +--- Get all available encodings. +-- @function all_encodings +-- +-- @return A table of CharacterEncoding objects. +-- +-- **NOTE**: This is a static method/function. +-- +-- @usage +-- +-- local encodings = rehex.CharacterEncoding.all_encodings() +-- for i = 1, #encodings +-- do +-- print("key = " .. encodings[i].key .. ", label = " .. encodings[i].label) +-- end + +--- Get an encoding by its key. +-- @function encoding_by_key +-- +-- @param key The key of the encoding ("ASCII", "ISO-8859-1", etc) +-- +-- @return A CharacterEncoding object, or nil. +-- +-- **NOTE**: This is a static method/function. +-- +-- @usage +-- +-- local ascii_encoding = rehex.CharacterEncoding.encoding_by_key("ASCII") +-- if ascii_encoding ~= nil +-- -- yep, ASCII still exists +-- end diff --git a/src/lua-bindings/rehex.i b/src/lua-bindings/rehex.i index e26c9343..37774271 100644 --- a/src/lua-bindings/rehex.i +++ b/src/lua-bindings/rehex.i @@ -1,4 +1,5 @@ #include "../App.hpp" +#include "../CharacterEncoder.hpp" #include "../document.hpp" #include "../mainwindow.hpp" @@ -116,3 +117,12 @@ class REHex::TabCreatedEvent: public wxEvent // Filthy hack to get the MainWindow handle into Lua land rather than an opaque userdata. REHex::MainWindow *GetEventObject(); }; + +class REHex::CharacterEncoding +{ + const wxString key; + const wxString label; + + static const REHex::CharacterEncoding *encoding_by_key(const wxString &key); + static LuaTable all_encodings(); +}; diff --git a/src/lua-bindings/rehex_override.hpp b/src/lua-bindings/rehex_override.hpp index 82b670ed..41c8efc4 100644 --- a/src/lua-bindings/rehex_override.hpp +++ b/src/lua-bindings/rehex_override.hpp @@ -206,3 +206,35 @@ static int LUACALL wxLua_REHex_Tab_get_selection_linear(lua_State *L) } } %end + +%override wxLua_REHex_CharacterEncoding_encoding_by_key +static int LUACALL wxLua_REHex_CharacterEncoding_encoding_by_key(lua_State *L) +{ + const wxString key = wxlua_getwxStringtype(L, 1); + + const REHex::CharacterEncoding* returns = REHex::CharacterEncoding::encoding_by_key(std::string(key)); + wxluaT_pushuserdatatype(L, returns, wxluatype_REHex_CharacterEncoding); + + return 1; +} +%end + +%override wxLua_REHex_CharacterEncoding_all_encodings +static int LUACALL wxLua_REHex_CharacterEncoding_all_encodings(lua_State *L) +{ + auto all_encodings = REHex::CharacterEncoding::all_encodings(); + + lua_newtable(L); /* Table to return */ + lua_Integer table_idx = 1; /* Next index to use in return table */ + + for(auto e = all_encodings.begin(); e != all_encodings.end(); ++e) + { + lua_pushinteger(L, table_idx++); + wxluaT_pushuserdatatype(L, *e, wxluatype_REHex_CharacterEncoding); + + lua_settable(L, -3); + } + + return 1; +} +%end diff --git a/src/lua-plugin-preload.lua b/src/lua-plugin-preload.lua index c6222ab7..d030e0ae 100644 --- a/src/lua-plugin-preload.lua +++ b/src/lua-plugin-preload.lua @@ -86,6 +86,7 @@ if _rehex_plugin_dir ~= nil then end -- Bodge in some less-obnoxious aliases for the classes generated by genwxbind.lua +rehex.CharacterEncoding = rehex.REHex_CharacterEncoding rehex.Comment = rehex.REHex_Document_Comment rehex.Document = rehex.REHex_Document rehex.MainWindow = rehex.REHex_MainWindow