Skip to content

Commit

Permalink
changed repositioning to spaces
Browse files Browse the repository at this point in the history
  • Loading branch information
OlteanuRares committed Apr 3, 2024
1 parent 19fbf72 commit cc15cc6
Show file tree
Hide file tree
Showing 9 changed files with 180 additions and 108 deletions.
13 changes: 12 additions & 1 deletion pycaption/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ class CaptionNode:
# property of the node, not a type of node itself.
STYLE = 2
BREAK = 3
REPOSITIONING = 4

def __init__(self, type_, layout_info=None, content=None, start=None):
"""
Expand All @@ -133,6 +134,8 @@ def __repr__(self):
return repr(self.content)
elif t == CaptionNode.BREAK:
return repr('BREAK')
elif t == CaptionNode.REPOSITIONING:
return repr("REPOSITIONING")
elif t == CaptionNode.STYLE:
return repr(f'STYLE: {self.start} {self.content}')
else:
Expand All @@ -153,6 +156,12 @@ def create_style(start, content, layout_info=None):
def create_break(layout_info=None):
return CaptionNode(CaptionNode.BREAK, layout_info=layout_info)

@staticmethod
def create_repositioning(text, layout_info=None):
print("create - add spaces", text, len(text))
return CaptionNode(
CaptionNode.REPOSITIONING, layout_info=layout_info, content=text)


class Caption:
"""
Expand Down Expand Up @@ -222,13 +231,15 @@ def get_text_for_node(node):
return node.content
if node.type_ == CaptionNode.BREAK:
return '\n'
if node.type_ == CaptionNode.REPOSITIONING:
return node.content
return ''

return [get_text_for_node(node) for node in self.nodes]

def get_text(self):
text_nodes = self.get_text_nodes()
return ''.join(text_nodes).strip()
return ''.join(text_nodes)

def _format_timestamp(self, microseconds, msec_separator=None):
duration = timedelta(microseconds=microseconds)
Expand Down
3 changes: 3 additions & 0 deletions pycaption/sami.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,9 @@ def _recreate_text(self, caption):
line += self._encode(node.content) + ' '
elif node.type_ == CaptionNode.BREAK:
line = line.rstrip() + '<br/>\n '
elif node.type_ == CaptionNode.REPOSITIONING:
print(self._encode(node.content))
line += node.content
elif node.type_ == CaptionNode.STYLE:
line = self._recreate_line_style(line, node)

Expand Down
89 changes: 42 additions & 47 deletions pycaption/scc/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,42 @@ def detect(self, content):
else:
return False

@staticmethod
def validate_scc_input(captions, lang):
caption_set = captions.get_captions(lang)

# check captions for incorrect lengths
lines = []
for caption in caption_set:
caption_text = "".join(caption.get_text_nodes())
lines.extend(caption_text.split("\n"))
lines_too_long = [line for line in lines if len(line) > 32]
if bool(lines_too_long):
msg = ""
for line in lines_too_long:
msg += line + f" - Length { len(line)}" + "\n"
raise CaptionLineLengthError(
f"32 character limit for caption cue in scc file.\n"
f"Lines longer than 32:\n"
f"{msg}"
)

# if there's an end time on a caption and the difference is
# less than .05s kill it (this is likely caused by a standalone
# EOC marker in the SCC file)
for cap in caption_set:
if 0 < cap.end - cap.start < 50000:
raise CaptionReadTimingError(
f'Unsupported cue duration around {cap.format_start()} '
f'for line beginning with "{cap.get_text()}". Duration '
f'must be at least 0.05 seconds.')

if captions.is_empty():
raise CaptionReadNoCaptions("empty caption file")
else:
fix_last_captions_without_ending(captions.get_captions(lang))
return captions

def read(self, content, lang='en-US', simulate_roll_up=False, offset=0):
"""Converts the unicode string into a CaptionSet
Expand Down Expand Up @@ -231,39 +267,7 @@ def read(self, content, lang='en-US', simulate_roll_up=False, offset=0):

captions = CaptionSet({lang: self.caption_stash.get_all()})

# check captions for incorrect lengths
lines = []
for caption in self.caption_stash._collection:
caption_text = "".join(caption.to_real_caption().get_text_nodes())
lines.extend(caption_text.split("\n"))
lines_too_long = [line for line in lines if len(line) > 32]

if bool(lines_too_long):
msg = ""
for line in lines_too_long:
msg += line + f" - Length { len(line)}" + "\n"
raise CaptionLineLengthError(
f"32 character limit for caption cue in scc file.\n"
f"Lines longer than 32:\n"
f"{msg}"
)

for cap in captions.get_captions(lang):
# if there's an end time on a caption and the difference is
# less than .05s kill it (this is likely caused by a standalone
# EOC marker in the SCC file)
if 0 < cap.end - cap.start < 50000:
raise CaptionReadTimingError(
f'Unsupported cue duration around {cap.format_start()} '
f'for line beginning with "{cap.get_text()}". Duration '
f'must be at least 0.05 seconds.')

if captions.is_empty():
raise CaptionReadNoCaptions("empty caption file")
else:
fix_last_captions_without_ending(captions.get_captions(lang))

return captions
return self.validate_scc_input(captions, lang)

def _flush_implicit_buffers(self, old_key=None, *args):
"""Convert to Captions those buffers whose behavior is implicit.
Expand Down Expand Up @@ -297,11 +301,11 @@ def _translate_line(self, line):
# split line in timestamp and words
r = re.compile(r"([0-9:;]*)([\s\t]*)((.)*)")
parts = r.findall(line.lower())
words = parts[0][2].split(' ')

self.time_translator.start_at(parts[0][0])

# loop through each word
for word in parts[0][2].split(' '):
for word in words:
# ignore empty results or invalid commands
word = word.strip()
if len(word) == 4:
Expand Down Expand Up @@ -333,6 +337,8 @@ def _translate_word(self, word):
self.time_translator.increment_frames()

def _handle_double_command(self, word):
print("=============")
print(word)
# If the caption is to be broadcast, each of the commands are doubled
# up for redundancy in case the signal is garbled in transmission.
# The decoder is programmed to ignore a second command when it is the
Expand All @@ -343,18 +349,6 @@ def _handle_double_command(self, word):
if word == self.last_command:
self.last_command = ''
return True
# Fix for the <position> <tab offset> <position> <tab offset>
# repetition
elif _is_pac_command(word) and word in self.last_command:
self.last_command = ''
return True
elif word in PAC_TAB_OFFSET_COMMANDS:
if _is_pac_command(self.last_command):
self.last_command += f" {word}"
return False
else:
return True

self.last_command = word
return False

Expand Down Expand Up @@ -419,6 +413,7 @@ def _translate_command(self, word):
self._pop_on(end=self.time)
if self.buffer.is_empty():
return
self.buffer._position_tracer._last_char_position = 0
cue = PopOnCue(buffer=deepcopy(self.buffer), start=self.time, end=0)
self.pop_ons_queue.appendleft(cue)
self.buffer = self.node_creator_factory.new_creator()
Expand Down
5 changes: 5 additions & 0 deletions pycaption/scc/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -1046,3 +1046,8 @@ def _restructure_bytes_to_position_map(byte_to_pos_map):
'└': ["+"],
'┘': ["+"]
}

NOT_ITALICS_COMMANDS = {
key: value for key, value in COMMANDS.items()
if "italic" not in value and "break" in value
}
Loading

0 comments on commit cc15cc6

Please sign in to comment.