From 8f96b15a1d919e57c6bab2f92f014e4aba73538f Mon Sep 17 00:00:00 2001 From: Michael Clifford Date: Wed, 24 Apr 2024 10:13:02 -0400 Subject: [PATCH 1/2] set summarizer write to /tmp Signed-off-by: Michael Clifford --- .../summarizer/app/requirements.txt | 3 ++- .../summarizer/app/summarizer.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/recipes/natural_language_processing/summarizer/app/requirements.txt b/recipes/natural_language_processing/summarizer/app/requirements.txt index 8bf95409..7f30524f 100644 --- a/recipes/natural_language_processing/summarizer/app/requirements.txt +++ b/recipes/natural_language_processing/summarizer/app/requirements.txt @@ -1,3 +1,4 @@ langchain langchain_openai -streamlit \ No newline at end of file +streamlit +pypdf diff --git a/recipes/natural_language_processing/summarizer/app/summarizer.py b/recipes/natural_language_processing/summarizer/app/summarizer.py index 687a3652..798cf72a 100644 --- a/recipes/natural_language_processing/summarizer/app/summarizer.py +++ b/recipes/natural_language_processing/summarizer/app/summarizer.py @@ -49,12 +49,12 @@ def read_file(file): file_type = file.type if file_type == "application/pdf": - with open(file.name, "wb") as f: + with open(f"/tmp/{file.name}", "wb") as f: f.write(file.getvalue()) - loader = PyPDFLoader(file.name) + loader = PyPDFLoader(f"/tmp/{file.name}") pages = loader.load() text = "".join([p.page_content for p in pages]) - os.remove(file.name) + os.remove(f"/tmp/{file.name}") if file_type == "text/plain": text = file.read().decode() From 9f8dab040bcb20a2704748d597f69c993f3e9800 Mon Sep 17 00:00:00 2001 From: Michael Clifford Date: Wed, 24 Apr 2024 14:27:06 -0400 Subject: [PATCH 2/2] use tempfile for summarizer Signed-off-by: Michael Clifford --- .../summarizer/app/summarizer.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/recipes/natural_language_processing/summarizer/app/summarizer.py b/recipes/natural_language_processing/summarizer/app/summarizer.py index 798cf72a..bb39bf21 100644 --- a/recipes/natural_language_processing/summarizer/app/summarizer.py +++ b/recipes/natural_language_processing/summarizer/app/summarizer.py @@ -4,6 +4,7 @@ from langchain_community.callbacks import StreamlitCallbackHandler from langchain_community.document_loaders import PyPDFLoader import streamlit as st +import tempfile import requests import time import os @@ -49,12 +50,12 @@ def read_file(file): file_type = file.type if file_type == "application/pdf": - with open(f"/tmp/{file.name}", "wb") as f: + temp = tempfile.NamedTemporaryFile() + with open(temp.name, "wb") as f: f.write(file.getvalue()) - loader = PyPDFLoader(f"/tmp/{file.name}") + loader = PyPDFLoader(temp.name) pages = loader.load() text = "".join([p.page_content for p in pages]) - os.remove(f"/tmp/{file.name}") if file_type == "text/plain": text = file.read().decode()