Skip to content

Commit

Permalink
new operation added
Browse files Browse the repository at this point in the history
read scanned documents (pdf only)
PDF will be split into images by page and send to image model to convert to the request in the prompt.
  • Loading branch information
amirkhan-ak-sf committed Aug 6, 2024
1 parent 3348c5f commit fc358bb
Show file tree
Hide file tree
Showing 3 changed files with 86 additions and 1 deletion.
7 changes: 6 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>com.mule.mulechain</groupId>
<artifactId>mulechain-ai-connector</artifactId>
<version>0.2.6</version>
<version>0.2.10</version>
<packaging>mule-extension</packaging>
<name>MuleChain</name>

Expand Down Expand Up @@ -168,6 +168,11 @@
<artifactId>rxjava</artifactId>
<version>2.2.21</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>3.0.2</version>
</dependency>
</dependencies>
<repositories>
<repository>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,9 @@ private MuleChainConstants() {}
public static final String DATASET = "dataset";
public static final String SENTIMENT = "sentiment";
public static final String IS_POSITIVE = "isPositive";
public static final String PAGE = "page";
public static final String TOTAL_PAGES = "total_pages";
public static final String PAGES = "pages";

public static class EmbeddingConstants {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
*/
package org.mule.extension.mulechain.internal.operation;

import org.json.JSONArray;
import org.json.JSONObject;
import org.mule.extension.mulechain.internal.config.LangchainLLMConfiguration;
import org.mule.extension.mulechain.internal.constants.MuleChainConstants;
Expand All @@ -26,6 +27,19 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.Base64;
import java.util.List;

import javax.imageio.ImageIO;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.Loader;

/**
* This class is a container for operations, every public method in this class will be taken as an extension operation.
Expand Down Expand Up @@ -76,4 +90,67 @@ public String drawImage(@Config LangchainLLMConfiguration configuration, String

return jsonObject.toString();
}

/**
* Reads an scanned document.
*/
@MediaType(value = ANY, strict = false)
@Alias("IMAGE-read-scanned-documents")
public String readScannedDocumentPDF(@Config LangchainLLMConfiguration configuration, String data, String filePath) {

ChatLanguageModel model = configuration.getModel();

String sourceDir = filePath;

JSONObject jsonObject = new JSONObject();
JSONArray docPages = new JSONArray();
try (PDDocument document = Loader.loadPDF(new File(sourceDir))) {

PDFRenderer pdfRenderer = new PDFRenderer(document);
int totalPages = document.getNumberOfPages();
LOGGER.info("Total files to be converted -> " + totalPages);
jsonObject.put(MuleChainConstants.TOTAL_PAGES, totalPages);

JSONObject docPage;
for (int pageNumber = 0; pageNumber < totalPages; pageNumber++) {

BufferedImage image = pdfRenderer.renderImageWithDPI(pageNumber, 300);
LOGGER.info("Reading page -> " + pageNumber);

String imageBase64 = convertToBase64String(image);
UserMessage userMessage = UserMessage.from(
TextContent.from(data),
ImageContent.from(imageBase64, "image/png"));

Response<AiMessage> response = model.generate(userMessage);

docPage = new JSONObject();
docPage.put(MuleChainConstants.PAGE, pageNumber + 1);
docPage.put(MuleChainConstants.RESPONSE, response.content().text());
docPage.put(MuleChainConstants.TOKEN_USAGE, JsonUtils.getTokenUsage(response));
docPages.put(docPage);
}

} catch (IOException e) {
LOGGER.info("Error occurred while processing the file: " + e.getMessage());
}

jsonObject.put(MuleChainConstants.PAGES, docPages);

return jsonObject.toString();
}

private String convertToBase64String(BufferedImage image) {
String base64String;
try (ByteArrayOutputStream outputStream = new ByteArrayOutputStream()) {
ImageIO.write(image, "png", outputStream);
byte[] imageBytes = outputStream.toByteArray();
base64String = Base64.getEncoder().encodeToString(imageBytes);
return base64String;
} catch (IOException e) {
e.printStackTrace();
LOGGER.info("Error occurred while processing the file: " + e.getMessage());
return "Error";
}
}
}

0 comments on commit fc358bb

Please sign in to comment.