diff --git a/publishing/extract_text.py b/publishing/extract_text.py new file mode 100644 index 00000000..76a3e6ab --- /dev/null +++ b/publishing/extract_text.py @@ -0,0 +1,6 @@ +from pdfminer.high_level import extract_text + +text = extract_text('article.pdf') +with open('article.pdf','rb') as f: + text = extract_text(f) + print(text)