Skip to content

Commit

Permalink
redeploying PDFBox (langchain4J)
Browse files Browse the repository at this point in the history
redeploying PDFBox (langchain4J)
  • Loading branch information
amirkhan-ak-sf committed Aug 6, 2024
1 parent 79c0972 commit ab8a442
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 22 deletions.
7 changes: 6 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.mule.mulechain</groupId>
<artifactId>mulechain-ai-connector</artifactId>
<version>0.2.12</version>
<version>0.2.13</version>
<packaging>mule-extension</packaging>
<name>MuleChain</name>

Expand Down Expand Up @@ -169,6 +169,11 @@
<version>2.2.21</version>
</dependency>
<!--<dependency>
<groupId>dev.langchain4j</groupId>
<artifactId>langchain4j-document-parser-apache-pdfbox</artifactId>
<version>0.31.0</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>3.0.2</version>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,13 @@

import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Base64;
import java.util.List;

import javax.imageio.ImageIO;

import java.io.InputStream;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.rendering.PDFRenderer;


Expand Down Expand Up @@ -95,54 +93,57 @@ public String drawImage(@Config LangchainLLMConfiguration configuration, String
* Reads an scanned document.
*/

/*

@MediaType(value = ANY, strict = false)
@Alias("IMAGE-read-scanned-documents")
public String readScannedDocumentPDF(@Config LangchainLLMConfiguration configuration, String data, String filePath) {

ChatLanguageModel model = configuration.getModel();

String sourceDir = filePath;

JSONObject jsonObject = new JSONObject();
JSONArray docPages = new JSONArray();
try (PDDocument document = Loader.loadPDF(new File(sourceDir))) {

//try (PDDocument document = Loader.loadPDF(new File(sourceDir))) {
try (InputStream inputStream = new FileInputStream(sourceDir);
PDDocument document = PDDocument.load(inputStream);) {


PDFRenderer pdfRenderer = new PDFRenderer(document);
int totalPages = document.getNumberOfPages();
LOGGER.info("Total files to be converted -> " + totalPages);
jsonObject.put(MuleChainConstants.TOTAL_PAGES, totalPages);

JSONObject docPage;
for (int pageNumber = 0; pageNumber < totalPages; pageNumber++) {

BufferedImage image = pdfRenderer.renderImageWithDPI(pageNumber, 300);
LOGGER.info("Reading page -> " + pageNumber);

String imageBase64 = convertToBase64String(image);
UserMessage userMessage = UserMessage.from(
TextContent.from(data),
ImageContent.from(imageBase64, "image/png"));

Response<AiMessage> response = model.generate(userMessage);

docPage = new JSONObject();
docPage.put(MuleChainConstants.PAGE, pageNumber + 1);
docPage.put(MuleChainConstants.RESPONSE, response.content().text());
docPage.put(MuleChainConstants.TOKEN_USAGE, JsonUtils.getTokenUsage(response));
docPages.put(docPage);
}

} catch (IOException e) {
LOGGER.info("Error occurred while processing the file: " + e.getMessage());
}

jsonObject.put(MuleChainConstants.PAGES, docPages);

return jsonObject.toString();
}

private String convertToBase64String(BufferedImage image) {
String base64String;
try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) {
Expand All @@ -155,5 +156,5 @@ private String convertToBase64String(BufferedImage image) {
LOGGER.info("Error occurred while processing the file: " + e.getMessage());
return "Error";
}
} */
}
}

0 comments on commit ab8a442

Please sign in to comment.