OCTO-10961-honor-offset-commands

pbs · Mar 27, 2024 · 19fbf72 · 19fbf72
1 parent c7d1cc1
commit 19fbf72
Show file tree

Hide file tree

Showing 8 changed files with 116 additions and 88 deletions.
diff --git a/pycaption/scc/constants.py b/pycaption/scc/constants.py
@@ -1,5 +1,4 @@
 from itertools import product
-from collections import defaultdict
 
 COMMANDS = {
     '9420': '',

diff --git a/pycaption/scc/specialized_collections.py b/pycaption/scc/specialized_collections.py
@@ -10,6 +10,7 @@
     PAC_BYTES_TO_POSITIONING_MAP, COMMANDS, PAC_TAB_OFFSET_COMMANDS,
     MICROSECONDS_PER_CODEWORD, INCONVERTIBLE_TO_ASCII_EXTENDED_CHARS_ASSOCIATION
 )
+from .translator import not_italics_commands
 
 PopOnCue = collections.namedtuple("PopOnCue", "buffer, start, end")
 
@@ -254,7 +255,7 @@ def create_and_store(self, node_buffer, start, end=0):
                 layout_info = _get_layout_from_tuple(instruction.position)
                 caption.nodes.append(
                     CaptionNode.create_text(
-                        instruction.get_text(), layout_info=layout_info),
+                        instruction.text, layout_info=layout_info),
                 )
                 caption.layout_info = layout_info
 
@@ -312,40 +313,59 @@ def add_chars(self, *chars):
             node = _InstructionNode(position=current_position)
             self._collection.append(node)
 
-        # handle a simple line break
+        # if offset command add spaces
+        if self._position_tracer._spaces_to_add:
+            node = _InstructionNode.create_text(
+                current_position,
+                " " * self._position_tracer._spaces_to_add
+            )
+            self._collection.append(node)
+            self._position_tracer._spaces_to_add = 0
+        node.add_chars(*chars)
+
+    def interpret_command(self, command):
+        """Creates instruction node for the command
+
+        :type command: str
+        """
+        self._update_positioning(command)
+        text = COMMANDS.get(command, '')
+        current_position = self._position_tracer.get_current_position()
+
+        # if a command sets text to something else than italic is having an open
+        # italics tag, it should close it to reset the text style
+        # doing this first so it close the tag in current line
+
+        has_open_italics_tag = False
+        for node in self._collection[::-1]:
+            if node.is_italics_node():
+                if node.sets_italics_on():
+                    has_open_italics_tag = True
+                break
+        if command in not_italics_commands and has_open_italics_tag:
+            self._collection.append(
+                _InstructionNode.create_italics_style(
+                    self._position_tracer.get_current_position(),
+                    turn_on=False
+                )
+            )
+
+        # add break instruction node
         if self._position_tracer.is_linebreak_required():
-            # must insert a line break here
             self._collection.append(_InstructionNode.create_break(
                 position=current_position))
-            node = _InstructionNode.create_text(current_position)
-            self._collection.append(node)
             self._position_tracer.acknowledge_linebreak_consumed()
 
-        # handle completely new positioning
-        elif self._position_tracer.is_repositioning_required():
+        # add repositioning instruction node
+        if self._position_tracer.is_repositioning_required():
             self._collection.append(
                 _InstructionNode.create_repositioning_command(
                     current_position
                 )
             )
-            node = _InstructionNode.create_text(current_position)
-            self._collection.append(node)
             self._position_tracer.acknowledge_position_changed()
 
-        node.add_chars(*chars)
-
-    def interpret_command(self, command):
-        """Given a command determines whether to turn italics on or off,
-        or to set the positioning
-
-        This is mostly used to convert from the legacy-style commands
-
-        :type command: str
-        """
-        self._update_positioning(command)
-
-        text = COMMANDS.get(command, '')
-
+        # add italic open/closed italics nodes
         if 'italic' in text:
             if 'end' not in text:
                 self._collection.append(
@@ -370,6 +390,7 @@ def _update_positioning(self, command):
             prev_positioning = self._position_tracer.default
             positioning = (prev_positioning[0],
                            prev_positioning[1] + tab_offset)
+            self._position_tracer._spaces_to_add = tab_offset
         else:
             first, second = command[:2], command[2:]
 
@@ -498,7 +519,6 @@ def add_chars(self, *args):
         """
         if self.text is None:
             self.text = ''
-
         self.text += ''.join(args)
 
     def is_text_node(self):

diff --git a/pycaption/scc/state_machines.py b/pycaption/scc/state_machines.py
@@ -17,6 +17,7 @@ def __init__(self, positioning=None):
         # this attribute is used to store it and determine by comparison if the
         # next positioning is actually a Tab Offset
         self._last_column = None
+        self._spaces_to_add = 0
 
     def update_positioning(self, positioning):
         """Being notified of a position change, updates the internal state,
@@ -35,26 +36,15 @@ def update_positioning(self, positioning):
             return
 
         row, col = current
-        if self._break_required:
-            col = self._last_column
         new_row, new_col = positioning
-        is_tab_offset = new_row == row and col + 1 <= new_col <= col + 3
 
-        # One line below will be treated as line break, not repositioning
         if new_row == row + 1:
-            self._positions.append((new_row, col))
+            self._positions.append((new_row, new_col))
             self._break_required = True
-            self._last_column = new_col
-        # Tab offsets after line breaks will be ignored to avoid repositioning
-        elif self._break_required and is_tab_offset:
-            return
+            self._spaces_to_add = new_col
         else:
-            # Reset the "current" position altogether.
             self._positions = [positioning]
-            # Tab offsets are not interpreted as repositioning, but adjustments
-            # to the previous PAC command
-            if not is_tab_offset:
-                self._repositioning_required = True
+            self._repositioning_required = True
 
     def get_current_position(self):
         """Returns the current usable position

diff --git a/pycaption/scc/translator.py b/pycaption/scc/translator.py
@@ -545,6 +545,11 @@
     "94a2": "Alarm On"
 }
 
+not_italics_commands = {
+    key: value for key, value in COMMAND_LABELS.items()
+    if "italic" not in value
+}
+
 
 def translate_scc(scc_content, brackets='[]'):
     """

diff --git a/tests/fixtures/dfxp.py b/tests/fixtures/dfxp.py
@@ -939,8 +939,8 @@ def sample_dfxp_from_scc_output():
    </p>
    <p begin="00:00:03.136" end="00:00:09.709" region="r3" style="default">
     ghgh<br/>
-    ijij<br/>
-    klkl
+        ijij<br/>
+        klkl
    </p>
    <p begin="00:00:09.709" end="00:00:11.711" region="r4" style="default">
     mnmn
@@ -953,8 +953,8 @@ def sample_dfxp_from_scc_output():
    </p>
    <p begin="00:00:11.711" end="00:00:20.086" region="r7" style="default">
     stst<br/>
-    uvuv<br/>
-    wxwx
+        uvuv<br/>
+        wxwx
    </p>
    <p begin="00:00:20.086" end="00:00:22.088" region="r8" style="default">
     yzyz
@@ -967,8 +967,8 @@ def sample_dfxp_from_scc_output():
    </p>
    <p begin="00:00:22.088" end="00:00:26.088" region="r11" style="default">
     4545<br/>
-    6767<br/>
-    8989
+        6767<br/>
+        8989
    </p>
   </div>
  </body>
@@ -1004,15 +1004,15 @@ def sample_dfxp_with_properly_closing_spans_output():
     bbbb
    </p>
    <p begin="00:01:35.633" end="00:01:40.833" region="r2" style="default">
-    <span tts:fontStyle="italic" region="r2">cccc<br/>
-    bbaa</span>
+    <span tts:fontStyle="italic" region="r2">cccc</span><br/>
+    bbaa
    </p>
    <p begin="00:01:55.766" end="00:01:59.500" region="r0" style="default">
     aa
    </p>
    <p begin="00:01:55.766" end="00:01:59.500" region="r3" style="default">
-    <span tts:fontStyle="italic" region="r3">bb<br/>
-    cc</span>
+    <span tts:fontStyle="italic" region="r3">bb</span><br/>
+    cc
    </p>
    <p begin="00:01:59.500" end="00:02:03.500" region="r3" style="default">
     abcd

diff --git a/tests/fixtures/webvtt.py b/tests/fixtures/webvtt.py
@@ -319,7 +319,10 @@ def sample_webvtt_from_scc_properly_writes_newlines_output():
 
 21:30.000 --> 21:34.000 align:left position:20% line:83% size:70%
 aa
-bb
+
+
+21:30.000 --> 21:34.000 align:left position:35% line:89% size:55%
+  bb
 """
 
 

diff --git a/tests/test_scc.py b/tests/test_scc.py
@@ -94,14 +94,29 @@ def test_tab_offset(self, sample_scc_tab_offset):
         captions = SCCReader().read(sample_scc_tab_offset)
 
         # SCC generates only origin, and we always expect it.
+        # expected_positioning = [
+        #     ((37.5, UnitEnum.PERCENT), (83.0, UnitEnum.PERCENT)),
+        #     ((17.5, UnitEnum.PERCENT), (89.0, UnitEnum.PERCENT)),
+        #     ((12.5, UnitEnum.PERCENT), (83.0, UnitEnum.PERCENT)),
+        #     ((27.5, UnitEnum.PERCENT), (83.0, UnitEnum.PERCENT)),
+        #     ((30.0, UnitEnum.PERCENT), (89.0, UnitEnum.PERCENT)),
+        #     ((35.0, UnitEnum.PERCENT), (83.0, UnitEnum.PERCENT)),
+        #     ((17.5, UnitEnum.PERCENT), (83.0, UnitEnum.PERCENT))
+        # ]
+
         expected_positioning = [
             ((37.5, UnitEnum.PERCENT), (83.0, UnitEnum.PERCENT)),
+            ((12.5, UnitEnum.PERCENT), (89.0, UnitEnum.PERCENT)),
             ((17.5, UnitEnum.PERCENT), (89.0, UnitEnum.PERCENT)),
             ((12.5, UnitEnum.PERCENT), (83.0, UnitEnum.PERCENT)),
+            ((32.5, UnitEnum.PERCENT), (89.0, UnitEnum.PERCENT)),
             ((27.5, UnitEnum.PERCENT), (83.0, UnitEnum.PERCENT)),
+            ((15.0, UnitEnum.PERCENT), (89.0, UnitEnum.PERCENT)),
             ((30.0, UnitEnum.PERCENT), (89.0, UnitEnum.PERCENT)),
             ((35.0, UnitEnum.PERCENT), (83.0, UnitEnum.PERCENT)),
-            ((17.5, UnitEnum.PERCENT), (83.0, UnitEnum.PERCENT))
+            ((22.5, UnitEnum.PERCENT), (89.0, UnitEnum.PERCENT)),
+            ((17.5, UnitEnum.PERCENT), (83.0, UnitEnum.PERCENT)),
+            ((22.5, UnitEnum.PERCENT), (89.0, UnitEnum.PERCENT))
         ]
 
         actual_positioning = [
@@ -135,7 +150,8 @@ def switches_italics(node):
         # will most likely change
         assert switches_italics(nodes[0]) is True
         assert switches_italics(nodes[2]) is False
-        assert nodes[1].content == 'abababab'
+        # we're respecting offsets and we have a 97a2 = 2 spaces
+        assert nodes[1].content == '  abababab'
 
     def test_default_positioning_when_no_positioning_is_specified(
             self, sample_no_positioning_at_all_scc):
@@ -200,17 +216,20 @@ def test_skip_extended_characters_ascii_duplicate(
         caption_set = SCCReader().read(sample_scc_with_extended_characters)
         captions = caption_set.get_captions('en-US')
         assert captions[0].nodes[0].content == 'MÄRTHA:'
-        expected_result = ['JUNIOR: ¡Yum!', None, 'Ya me siento mucho mejor.']
+        # 97a1 means one space before Junior
+        # 9723 means three spaces before Ya me ...
+        expected_result = [' JUNIOR: ¡Yum!', None, '   Ya me siento mucho mejor.']
         content = [node.content for node in captions[1].nodes]
         assert all(result in expected_result for result in content)
 
     def test_skip_duplicate_tab_offset(self, sample_scc_duplicate_tab_offset):
+        # 97a1 space before [Radio
         expected_lines = [
-            '[Radio reporter]',
-            'The I-10 Santa Monica Freeway',
-            'westbound is jammed,',
-            'due to a three-car accident',
-            'blocking lanes 1 and 2',
+            ' [Radio reporter]',
+            ' The I-10 Santa Monica Freeway',
+            ' westbound is jammed,',
+            '  due to a three-car accident',
+            '  blocking lanes 1 and 2'
         ]
 
         caption_set = SCCReader().read(sample_scc_duplicate_tab_offset)
@@ -220,7 +239,6 @@ def test_skip_duplicate_tab_offset(self, sample_scc_duplicate_tab_offset):
             for node in cap_.nodes
             if node.type_ == CaptionNode.TEXT
         ]
-
         assert expected_lines == actual_lines
 
     def test_skip_duplicate_special_characters(
@@ -248,9 +266,10 @@ def test_line_too_long(self, sample_scc_with_line_too_long):
         with pytest.raises(CaptionLineLengthError) as exc_info:
             SCCReader().read(sample_scc_with_line_too_long)
 
+        # 9723 - 3 spaces before "was Call" so length will be 81 + 3
         assert exc_info.value.args[0].startswith(
             "32 character limit for caption cue in scc file.")
-        assert ("was Cal l l l l l l l l l l l l l l l l l l l l l l l l l l l l Denison, a friend - Length 81"
+        assert ("   was Cal l l l l l l l l l l l l l l l l l l l l l l l l l l l l Denison, a friend - Length 84"
                 in exc_info.value.args[0].split("\n"))
 
 
@@ -294,21 +313,20 @@ def test_multiple_formats(self, sample_scc_multiple_formats):
         # Test for captions that contain both pop on and paint on formats to
         # ensure the paint on lines are not repeated
         expected_text_lines = [
-            "(Client's Voice)",
-            'Remember that degree',
-            'you got in taxation?',
-            '(Danny)',
-            "Of course you don't",
-            "because you didn't!",
-            "Your job isn't doing hard",
-            'work...',
-            "...it's making them do hard",
-            'work...',
-            '...and getting paid for it.',
-            '(VO)',
-            'Snap and sort your expenses to',
-            'save over $4,600 at tax time.',
-            'QUICKBOOKS. BACKING YOU.',
+            "  (Client's Voice)",
+            '  Remember that degree',
+            '  you got in taxation?',
+            '  (Danny)',
+            "  Of course you don't",
+            "  because you didn't!",
+            "   Your job isn't doing hard",
+            '   work...',
+            "  ...it's making them do hard",
+            '  work...',
+            '  ...and getting paid for it.',
+            ' (VO)', ' Snap and sort your expenses to',
+            ' save over $4,600 at tax time.',
+            'QUICKBOOKS. BACKING YOU.'
         ]
 
         captions = SCCReader().read(sample_scc_multiple_formats) \
@@ -319,7 +337,6 @@ def test_multiple_formats(self, sample_scc_multiple_formats):
             for node in caption.nodes
             if node.type_ == CaptionNode.TEXT
         ]
-
         assert expected_text_lines == text_lines
 
     def test_freeze_semicolon_spec_time(self, sample_scc_roll_up_ru2):
@@ -407,7 +424,7 @@ def test_italics_commands_are_formatted_properly(self):
         node_creator.add_chars('c')
         node_creator.interpret_command('91ae')  # italics ON
 
-        node_creator.interpret_command('9270')  # row 4 col 0
+        node_creator.interpret_command('9270')  # row 4 col 0 resets italics
         node_creator.add_chars('d')
 
         node_creator.interpret_command('15d0')  # row 5 col 0 - creates BR
@@ -434,17 +451,12 @@ def test_italics_commands_are_formatted_properly(self):
         assert result[8].is_text_node()
 
         assert result[9].requires_repositioning()
-        assert result[10].is_italics_node()
-        assert result[10].sets_italics_on()
-
-        assert result[11].is_text_node()
-        assert result[12].is_explicit_break()
-        assert result[13].is_text_node()
-        assert result[14].is_explicit_break()
-        assert result[15].is_text_node()
+        assert result[10].is_text_node()
 
-        assert result[16].is_italics_node()
-        assert result[16].sets_italics_off()
+        assert result[11].is_explicit_break()
+        assert result[12].is_text_node()
+        assert result[13].is_explicit_break()
+        assert result[14].is_text_node()
 
 
 class CaptionDummy:
-Original file line number
+Diff line change
@@ Expand Up @@
 :30.000 --> 21:34.000 align:left position:20% line:83% size:70%
     aa
-    bb
+:30.000 --> 21:34.000 align:left position:35% line:89% size:55%
+      bb
     """
@@ Expand Down @@