diff --git a/PyPDF2/pdf.py b/PyPDF2/pdf.py index f932d7f15..47e9df978 100644 --- a/PyPDF2/pdf.py +++ b/PyPDF2/pdf.py @@ -2656,7 +2656,7 @@ def compressContentStreams(self): content = ContentStream(content, self.pdf) self[NameObject("/Contents")] = content.flateEncode() - def extractText(self): + def extractText(self, Tj_sep="", TJ_sep=" "): """ Locate all text drawing commands, in the order they are provided in the content stream, and extract the text. This works well for some PDF @@ -2678,6 +2678,7 @@ def extractText(self): if operator == b_("Tj"): _text = operands[0] if isinstance(_text, TextStringObject): + text += Tj_sep text += _text text += "\n" elif operator == b_("T*"): @@ -2695,7 +2696,7 @@ def extractText(self): elif operator == b_("TJ"): for i in operands[0]: if isinstance(i, TextStringObject): - text += " " + text += TJ_sep text += i text += "\n" return text