diff --git a/.gitignore b/.gitignore index 4f7d9483fe..c7f7f3e416 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ __pycache__ !.gitkeep db.sqlite3 *.pem +.vscode/ # pyenv .python-version diff --git a/src/tram/tram/ml/base.py b/src/tram/tram/ml/base.py index 080b4d8ecd..09bc850eb6 100644 --- a/src/tram/tram/ml/base.py +++ b/src/tram/tram/ml/base.py @@ -128,6 +128,8 @@ def _extract_text(self, document): text = self._extract_docx_text(document) elif suffix == ".html": text = self._extract_html_text(document) + elif suffix == ".txt": + text = self._extract_plain_text(document) else: raise ValueError("Unknown file suffix: %s" % suffix) @@ -228,6 +230,10 @@ def _extract_docx_text(self, document): text = " ".join([paragraph.text for paragraph in parsed_docx.paragraphs]) return text + def _extract_plain_text(self, document): + text = document.docfile.read().decode("UTF-8") + return text + def process_job(self, job): name = self._get_report_name(job) text = self._extract_text(job.document) diff --git a/src/tram/tram/templates/base.html b/src/tram/tram/templates/base.html index 73378824be..84dada2022 100644 --- a/src/tram/tram/templates/base.html +++ b/src/tram/tram/templates/base.html @@ -49,7 +49,7 @@