From 41ae4a83caae47782d653ddd784a436b90628f56 Mon Sep 17 00:00:00 2001 From: Horea Christian Date: Wed, 24 Jan 2024 10:31:37 -0500 Subject: [PATCH] Draft body extraction script. Maybe move to RepSeP. Some journals seem to require it. --- publishing/extract_text.py | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 publishing/extract_text.py diff --git a/publishing/extract_text.py b/publishing/extract_text.py new file mode 100644 index 00000000..76a3e6ab --- /dev/null +++ b/publishing/extract_text.py @@ -0,0 +1,6 @@ +from pdfminer.high_level import extract_text + +text = extract_text('article.pdf') +with open('article.pdf','rb') as f: + text = extract_text(f) + print(text)