You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
File "C:\Users\jerem\PycharmProjects\testing\main.py", line 16, in
main()
File "C:\Users\jerem\PycharmProjects\testing\main.py", line 8, in main
text = pypdf.PdfReader('test.pdf')
File "C:\Users\jerem\AppData\Local\Programs\Python\Python310\lib\site-packages\pypdf_reader.py", line 318, in init
self.read(stream)
File "C:\Users\jerem\AppData\Local\Programs\Python\Python310\lib\site-packages\pypdf_reader.py", line 1548, in read
self._read_xref_tables_and_trailers(stream, startxref, xref_issue_nr)
File "C:\Users\jerem\AppData\Local\Programs\Python\Python310\lib\site-packages\pypdf_reader.py", line 1758, in _read_xref_tables_and_trailers
startxref = self._read_xref(stream)
File "C:\Users\jerem\AppData\Local\Programs\Python\Python310\lib\site-packages\pypdf_reader.py", line 1794, in _read_xref
self._read_standard_xref_table(stream)
File "C:\Users\jerem\AppData\Local\Programs\Python\Python310\lib\site-packages\pypdf_reader.py", line 1657, in _read_standard_xref_table
size = cast(int, read_object(stream, self))
File "C:\Users\jerem\AppData\Local\Programs\Python\Python310\lib\site-packages\pypdf\generic_data_structures.py", line 1229, in read_object
return BooleanObject.read_from_stream(stream)
File "C:\Users\jerem\AppData\Local\Programs\Python\Python310\lib\site-packages\pypdf\generic_base.py", line 257, in read_from_stream
raise PdfReadError("Could not read Boolean object")
pypdf.errors.PdfReadError: Could not read Boolean object
Process finished with exit code 1
################CODE################
import pypdf
def main():
text = pypdf.PdfReader('test.pdf') #FAILS HERE
out = text.pages[0].extract_text()
if name == 'main':
pdf_file = 'test.pdf'
main()
The text was updated successfully, but these errors were encountered:
Your pdf shows a non common observation : the xref keyword is not followed by any separator. This seems to be ok for Acrobat reader. I've added a mod to robustify that.
i'm getting an error when i try and read a pdf from a specific company
pycharm
test.pdf
File "C:\Users\jerem\PycharmProjects\testing\main.py", line 16, in
main()
File "C:\Users\jerem\PycharmProjects\testing\main.py", line 8, in main
text = pypdf.PdfReader('test.pdf')
File "C:\Users\jerem\AppData\Local\Programs\Python\Python310\lib\site-packages\pypdf_reader.py", line 318, in init
self.read(stream)
File "C:\Users\jerem\AppData\Local\Programs\Python\Python310\lib\site-packages\pypdf_reader.py", line 1548, in read
self._read_xref_tables_and_trailers(stream, startxref, xref_issue_nr)
File "C:\Users\jerem\AppData\Local\Programs\Python\Python310\lib\site-packages\pypdf_reader.py", line 1758, in _read_xref_tables_and_trailers
startxref = self._read_xref(stream)
File "C:\Users\jerem\AppData\Local\Programs\Python\Python310\lib\site-packages\pypdf_reader.py", line 1794, in _read_xref
self._read_standard_xref_table(stream)
File "C:\Users\jerem\AppData\Local\Programs\Python\Python310\lib\site-packages\pypdf_reader.py", line 1657, in _read_standard_xref_table
size = cast(int, read_object(stream, self))
File "C:\Users\jerem\AppData\Local\Programs\Python\Python310\lib\site-packages\pypdf\generic_data_structures.py", line 1229, in read_object
return BooleanObject.read_from_stream(stream)
File "C:\Users\jerem\AppData\Local\Programs\Python\Python310\lib\site-packages\pypdf\generic_base.py", line 257, in read_from_stream
raise PdfReadError("Could not read Boolean object")
pypdf.errors.PdfReadError: Could not read Boolean object
Process finished with exit code 1
################CODE################
import pypdf
def main():
text = pypdf.PdfReader('test.pdf') #FAILS HERE
out = text.pages[0].extract_text()
if name == 'main':
pdf_file = 'test.pdf'
main()
The text was updated successfully, but these errors were encountered: