Skip to content

Commit

Permalink
Allow specifying character set in templates (#184).
Browse files Browse the repository at this point in the history
  • Loading branch information
solemnwarning committed Apr 15, 2023
1 parent 95c0cb4 commit 8c7b955
Show file tree
Hide file tree
Showing 10 changed files with 493 additions and 14 deletions.
48 changes: 48 additions & 0 deletions help/pages/bt-types.tt
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,54 @@ func1(s);
func2(t);
[%- END %]

<h4>Character Sets</h4>

<p>
You can specify the character set of a string in a file using the following syntax:
</p>

[% WRAPPER "code.tt" -%]
[% WRAPPER "code-type.tt" %]char[% END %] field[32] &lt;charset = "UTF-8"&gt;;
[%- END %]

<p>
Setting the character set of a field will set the appropriate data type on the byte range in the file, so that the text is correctly displayed in the hex view.<br>
<br>
The charset attribute can only be specified on [% WRAPPER "inline-type.tt" %]char[][% END %] variables - "wide" strings should be declared as a [% WRAPPER "inline-type.tt" %]char[][% END %] to take advantage of the character set handling.
</p>

<p>
The following character sets are currently supported:
</p>

<ul>
<li>"ASCII" - US-ASCII (7-bit)</li>
<li>"UTF-8"</li>
<li>"UTF-16LE" - UTF-16LE (Little Endian)</li>
<li>"UTF-16BE" - UTF-16BE (Big Endian)</li>
<li>"UTF-32LE" - UTF-32LE (Little Endian)</li>
<li>"UTF-32BE" - UTF-32BE (Big Endian)</li>
<li>"ISO-8859-1" - Latin-1 (ISO-8859-1: Western European)</li>
<li>"ISO-8859-2" - Latin-2 (ISO-8859-2: Central European)</li>
<li>"ISO-8859-3" - Latin-3 (ISO-8859-3: South European and Esperanto)</li>
<li>"ISO-8859-4" - Latin-4 (ISO-8859-4: Baltic, old)</li>
<li>"ISO-8859-5" - Cyrillic (ISO-8859-5)</li>
<li>"ISO-8859-6" - Arabic (ISO-8859-6)</li>
<li>"ISO-8859-7" - Greek (ISO-8859-7)</li>
<li>"ISO-8859-8" - Hebrew (ISO-8859-8)</li>
<li>"ISO-8859-9" - Latin-5 (ISO-8859-9: Turkish)</li>
<li>"ISO-8859-10" - Latin-6 (ISO-8859-10: Nordic)</li>
<li>"ISO-8859-11" - Thai (ISO-8859-11, unofficial)</li>
<li>"ISO-8859-13" - Latin-7 (ISO-8859-13: Baltic, new)</li>
<li>"ISO-8859-14" - Latin-8 (ISO-8859-14: Celtic)</li>
<li>"ISO-8859-15" - Latin-9 (ISO-8859-15: Revised Western European)</li>
<li>"CP437" - Code page 437 (IBM)</li>
<li>"MSCP932" - Code page 932 (Windows, "Shift JIS")</li>
<li>"MSCP936" - Code page 936 (Windows, "GBK")</li>
<li>"MSCP949" - Code page 949 (Windows, "UHC")</li>
<li>"MSCP950" - Code page 950 (Windows)</li>
</ul>

<a name="struct"><h3>Structures</h3></a>

<p>
Expand Down
97 changes: 89 additions & 8 deletions plugins/binary-template/executor.lua
Original file line number Diff line number Diff line change
Expand Up @@ -1466,7 +1466,7 @@ expand_value = function(context, type_info, struct_arg_values, array_element_idx
end
end

local function _decl_variable(context, statement, var_type, var_name, struct_arg_values, array_size, initial_value, is_local)
local function _decl_variable(context, statement, var_type, var_name, struct_arg_values, array_size, attributes, initial_value, is_local)
local filename = statement[1]
local line_num = statement[2]

Expand Down Expand Up @@ -1566,6 +1566,59 @@ local function _decl_variable(context, statement, var_type, var_name, struct_arg
type_info = _make_overlay_type(type_info, { big_endian = false, rehex_type = type_info.rehex_type_le })
end

-- Variable attributes (so far) are only used for defining encoding on character arrays, so
-- we check for that attribute in this lovely kludge here.

local array_type_info = array_size ~= nil
and _make_aray_type(type_info)
or type_info

local string_charset

if attributes ~= nil
then
for i = 1, #attributes
do
local attr_name = attributes[i][1]
local attr_value_type = attributes[i][2] and attributes[i][2][1]
local attr_value = attributes[i][2] and attributes[i][2][2]

if attr_name == "charset" and _type_is_char_array(array_type_info)
then
if string_charset ~= nil
then
_template_error(context, "Attribute 'charset' specified multiple times")
end

if not _type_is_stringish(attr_value_type)
then
_template_error(context, "Unexpected type '" .. _get_type_name(attr_value_type) .. "' used as value for 'charset' attribute (expected string)")
end

local charset_name = _stringify_value(attr_value_type, attr_value)
local charset_valid = false

for j = 1, #context.valid_charsets
do
if context.valid_charsets[j] == charset_name
then
charset_valid = true
break
end
end

if not charset_valid
then
_template_error(context, "Unrecognised character set '" .. charset_name .. "' specified")
end

string_charset = charset_name
else
_template_error(context, "Invalid variable attribute '" .. attr_name .. "' used with type '" .. _get_type_name(array_type_info) .. "'")
end
end
end

local root_value

if array_size == nil
Expand All @@ -1585,12 +1638,11 @@ local function _decl_variable(context, statement, var_type, var_name, struct_arg
_template_error(context, "Expected numeric type for array size, got '" .. _get_type_name(ArrayLength_type) .. "'")
end

local array_type_info = _make_aray_type(type_info)

if type_info.base ~= "struct" and not context.declaring_local_var
then
local data_type_fmt = (context.big_endian and ">" or "<") .. type_info.string_fmt
root_value = FileArrayValue:new(context, context.next_variable, ArrayLength_val:get(), type_info.length, data_type_fmt)
root_value.charset = string_charset

context.next_variable = context.next_variable + (ArrayLength_val:get() * type_info.length)

Expand Down Expand Up @@ -1640,6 +1692,7 @@ _eval_variable = function(context, statement)
local var_name = statement[5]
local struct_args = statement[6]
local array_size = statement[7]
local attributes = statement[8]

local struct_arg_values = nil
if struct_args ~= nil
Expand All @@ -1652,7 +1705,26 @@ _eval_variable = function(context, statement)
end
end

_decl_variable(context, statement, var_type, var_name, struct_arg_values, array_size, nil, false)
local attributes_evaluated = nil
if attributes ~= nil
then
attributes_evaluated = {}

for i = 1, #attributes
do
local attr_name = attributes[i][3]
local attr_value = attributes[i][4]

if attr_value ~= nil
then
attr_value = { _eval_statement(context, attr_value) }
end

attributes_evaluated[i] = { attr_name, attr_value }
end
end

_decl_variable(context, statement, var_type, var_name, struct_arg_values, array_size, attributes_evaluated, nil, false)
end

_eval_local_variable = function(context, statement)
Expand All @@ -1676,7 +1748,7 @@ _eval_local_variable = function(context, statement)
local was_declaring_local_var = context.declaring_local_var
context.declaring_local_var = true

_decl_variable(context, statement, var_type, var_name, struct_arg_values, array_size, initial_value, true)
_decl_variable(context, statement, var_type, var_name, struct_arg_values, array_size, nil, initial_value, true)

context.declaring_local_var = was_declaring_local_var
end
Expand Down Expand Up @@ -1990,7 +2062,7 @@ _eval_struct_defn = function(context, statement)
local var_args = var_decl[2]
local array_size = var_decl[3]

_decl_variable(context, statement, type_info, var_name, var_args, array_size, nil, false)
_decl_variable(context, statement, type_info, var_name, var_args, array_size, nil, nil, false)
end
end

Expand Down Expand Up @@ -2105,7 +2177,7 @@ _eval_enum = function(context, statement)
local var_name = var_decl[1]
local array_size = var_decl[3]

_decl_variable(context, statement, type_info, var_name, nil, array_size, nil, false)
_decl_variable(context, statement, type_info, var_name, nil, array_size, nil, nil, false)
end
end

Expand Down Expand Up @@ -2546,6 +2618,8 @@ local function execute(interface, statements)
st_stack = {},

template_error = _template_error,

valid_charsets = interface.get_valid_charsets(),
}

for k, v in pairs(_builtin_functions)
Expand Down Expand Up @@ -2635,7 +2709,14 @@ local function execute(interface, statements)
-- for the range, else it would be displayed as a list of integers rather than a
-- contiguous byte sequence.

if not (type_info.is_array and (type_info.type_key == _builtin_types.char.type_key or type_info.type_key == _builtin_types.uint8_t.type_key))
if value.charset ~= nil
then
local data_start, data_end = value:data_range()
if data_start ~= nil
then
context.interface.set_data_type(data_start, (data_end - data_start), "text:" .. value.charset)
end
elseif not (type_info.is_array and (type_info.type_key == _builtin_types.char.type_key or type_info.type_key == _builtin_types.uint8_t.type_key))
then
local data_start, data_end = value:data_range()
if data_start ~= nil
Expand Down
Loading

0 comments on commit 8c7b955

Please sign in to comment.