Skip to content

Commit

Permalink
Implement softhyphen substitution
Browse files Browse the repository at this point in the history
  • Loading branch information
zauguin committed Jul 21, 2024
1 parent 775bfc1 commit b94bd3e
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 0 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,12 @@ All notable changes to the `tagpdf` package since the

The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
this project uses date-based 'snapshot' version identifiers.

## [Unreleased]

### Added
- key activate/softhyphen and code to use soft hyphens for hyphenation
if supported by the font.

## [2024-06-20]
Version 0.99c
Expand Down
83 changes: 83 additions & 0 deletions tagpdf-backend.dtx
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,12 @@ local iwfontattributeid = luatexbase.new_attribute ("g_@@_interwordfont_attr")
local tagunmarkedbool= token.create("g_@@_tagunmarked_bool")
local truebool = token.create("c_true_bool")
% \end{macrocode}
% with this token we can query the state of the softhyphen boolean
% and so detect if hyphens should be marked with ActualText
% \begin{macrocode}
local softhyphenbool = token.create("g_@@_softhyphen_bool")
% \end{macrocode}

% Now a number of local versions from global tables.
% Not all is perhaps needed, most node variants were copied from lua-debug.
% \begin{macrocode}
Expand Down Expand Up @@ -286,6 +292,9 @@ local KERN = node.id("kern")
local PENALTY = node.id("penalty")
local LOCAL_PAR = node.id("local_par")
local MATH = node.id("math")

local explicit_disc = 1
local regular_disc = 3
% \end{macrocode}
% Now we setup the main table structure. ltx is used by other latex code too!
% \begin{macrocode}
Expand Down Expand Up @@ -1267,6 +1276,80 @@ function ltx.@@.func.output_parenttree (abspage)
end
% \end{macrocode}
% \end{macro}
%
% \begin{macro}
% {
% process_softhyphen_pre
% process_softhyphen_post
% }
% First some local definitions. Since these are only needed locally everything gets wrapped into a block.
% \begin{macrocode}
do
local properties = node.get_properties_table()
local is_soft_hyphen_prop = 'tagpdf.rewrite-softhyphen.is_soft_hyphen'
local hyphen_char = 0x2D
local soft_hyphen_char = 0xAD
% \end{macrocode}
%
% A lookup table to test if the font supports the soft hyphen glyph.
% \begin{macrocode}
local softhyphen_fonts = setmetatable({}, {__index = function(t, fid)
local fdir = identifiers[fontid]
local format = fdir and fdir.format
local result = (format == 'opentype' or format == 'truetype')
local characters = fdir and fdir.characters
result = result and (characters and characters[soft_hyphen_char]) ~= nil
t[fid] = result
return result
end})
% \end{macrocode}
%
% A pre shaping callback to mark hyphens as being hyphenation hyphens.
% This runs before shaping to avoid affecting hyphens moved into
% discretionaries during shaping.
% \begin{macrocode}
local function process_softhyphen_pre(head, _context, _dir)
if softhyphenbool.mode ~= truebool.mode then return true end
for disc, sub in node.traverse_id(DISC, head) do
if sub == explicit_disc or sub == regular_disc then
for n, _ch, _f in node.traverse_char(disc.pre) do
local props = properties[n]
if not props then
props = {}
properties[n] = props
end
props[is_soft_hyphen_prop] = true
end
end
end
return true
end
% \end{macrocode}
%
% Finally do the actual replacement after shaping. No checking for double processing here
% since the operation is idempotent.
% \begin{macrocode}
local function process_softhyphen_post(head, _context, _dir)
if softhyphenbool.mode ~= truebool.mode then return true end
for disc, sub in node.traverse_id(DISC, head) do
for n, ch, fid in node.traverse_glyph(disc.pre) do
local props = properties[n]
if softhyphen_fonts[fid] and ch == hyphen_char and props and props[is_soft_hyphen_prop] then
n.char = soft_hyphen_char
props.glyph_info = nil
end
end
end
return true
end
luatexbase.add_to_callback('pre_shaping_filter', process_softhyphen_pre, 'tagpdf.rewrite-softhyphen')
luatexbase.add_to_callback('post_shaping_filter', process_softhyphen_post, 'tagpdf.rewrite-softhyphen')
end
% \end{macrocode}
% \end{macro}
%
% \begin{macrocode}
%</lua>
% \end{macrocode}
Expand Down
20 changes: 20 additions & 0 deletions tagpdf.dtx
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,12 @@
% marked up as artifact. The initial value is true.
% \end{function}
%
% \begin{function}{activate/softhyphen (setup-key)}
% This key allows to activates automatic handling of hyphens inserted
% by hyphenation. It only is used in luamode and replaces hyphens
% by U+00AD if the font supports this.
% \end{function}
%
% \begin{function}{page/tabsorder (setup-key), tabsorder (deprecated)}
% This sets the tabsorder on a page. The values are |row|, |column|, |structure| (default)
% or |none|. Currently this is set more or less globally. More finer control can be
Expand Down Expand Up @@ -354,6 +360,13 @@
% \end{macrocode}
% \end{variable}
%
% \begin{variable}{\g_@@_softhyphen_bool}
% This boolean controls if the code should try to automatically
% handle hyphens from hyphenation. It is currently only used in luamode.
% \begin{macrocode}
\bool_new:N \g_@@_softhyphen_bool
% \end{macrocode}
% \end{variable}
% \section{Variants of l3 commands}
% \begin{macrocode}
\prg_generate_conditional_variant:Nnn \pdf_object_if_exist:n {e}{T,F,TF}
Expand Down Expand Up @@ -648,6 +661,13 @@
tagunmarked .bool_gset:N = \g_@@_tagunmarked_bool,
% \end{macrocode}
% \end{macro}
% \begin{macro}{activate/softhyphen (setup-key)}
% This key activates (in luamode) the handling of soft hyphens.
% \begin{macrocode}
activate/softhyphen .bool_gset:N = \g_@@_softhyphen_bool,
activate/softhyphen .initial:n = false,
% \end{macrocode}
% \end{macro}
% \begin{macro}{page/tabsorder (setup-key),tabsorder (deprecated)}
% This sets the tabsorder on a page. The values are |row|, |column|, |structure| (default)
% or |none|. Currently this is set more or less globally. More finer control can be
Expand Down

0 comments on commit b94bd3e

Please sign in to comment.