Skip to content

Commit

Permalink
Merge pull request #16 from diging/develop
Browse files Browse the repository at this point in the history
Story/geco 90 (#15)
  • Loading branch information
jdamerow authored Feb 16, 2018
2 parents 57e2a86 + cb06d86 commit 09fb31d
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 32 deletions.
3 changes: 2 additions & 1 deletion cassiopeia/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
<org.slf4j-version>1.7.5</org.slf4j-version>
<org.springframework-version>4.3.1.RELEASE</org.springframework-version>
<spring-security-version>4.1.3.RELEASE</spring-security-version>
<geco.requests.version>0.5</geco.requests.version>
<geco.requests.version>0.6</geco.requests.version>
<geco.util.version>0.4.2</geco.util.version>
<geco.september.util.version>0.2</geco.september.util.version>

Expand All @@ -35,6 +35,7 @@
<tesseract.bin></tesseract.bin>
<tesseract.data></tesseract.data>
<tesseract.create.hocr>false</tesseract.create.hocr>
<tesseract.timeout>600</tesseract.timeout>
</properties>

<dependencies>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,59 +30,73 @@ public class KafkaRequestSender implements IKafkaRequestSender {

@Autowired
private IPropertiesManager propertyManager;

@Autowired
private IRequestFactory<ICompletedOCRRequest, CompletedOCRRequest> requestFactory;

@Autowired
private IRequestProducer requestProducer;

@Autowired
private ISystemMessageHandler messageHandler;

@PostConstruct
public void init() {
requestFactory.config(CompletedOCRRequest.class);
}

/* (non-Javadoc)
* @see edu.asu.diging.gilesecosystem.cassiopeia.core.service.impl.IKafkaRequestSender#sendRequest(java.lang.String, java.lang.String, java.lang.String, java.lang.String, edu.asu.diging.gilesecosystem.cassiopeia.core.service.impl.RequestInfo)
/*
* (non-Javadoc)
*
* @see edu.asu.diging.gilesecosystem.cassiopeia.core.service.impl.
* IKafkaRequestSender#sendRequest(java.lang.String, java.lang.String,
* java.lang.String, java.lang.String,
* edu.asu.diging.gilesecosystem.cassiopeia.core.service.impl.RequestInfo)
*/
@Override
public void sendRequest(String requestId, String documentId, RequestInfo info) {
String restEndpoint = propertyManager.getProperty(Properties.BASE_URL);
if (restEndpoint.endsWith("/")) {
restEndpoint = restEndpoint.substring(0, restEndpoint.length()-1);
restEndpoint = restEndpoint.substring(0, restEndpoint.length() - 1);
}

String fileEndpoint = null;

if (info.getStatus() == RequestStatus.COMPLETE) {
fileEndpoint = restEndpoint + DownloadFileController.GET_FILE_URL
.replace(DownloadFileController.REQUEST_ID_PLACEHOLDER, requestId)
.replace(DownloadFileController.DOCUMENT_ID_PLACEHOLDER, documentId)
.replace(DownloadFileController.FILENAME_PLACEHOLDER,
info.getFilename());
}

String fileEndpoint = restEndpoint + DownloadFileController.GET_FILE_URL
.replace(DownloadFileController.REQUEST_ID_PLACEHOLDER, requestId)
.replace(DownloadFileController.DOCUMENT_ID_PLACEHOLDER, documentId)
.replace(DownloadFileController.FILENAME_PLACEHOLDER, info.getFilename());


ICompletedOCRRequest completedRequest = null;
try {
completedRequest = requestFactory.createRequest(requestId, info.getUploadId());
completedRequest = requestFactory.createRequest(requestId,
info.getUploadId());
} catch (InstantiationException | IllegalAccessException e) {
messageHandler.handleMessage("Could not create request.", e, MessageType.ERROR);
messageHandler.handleMessage("Could not create request.", e,
MessageType.ERROR);
// this should never happen if used correctly
}

completedRequest.setDocumentId(documentId);
completedRequest.setDownloadPath(info.getPath());
completedRequest.setSize(info.getSize());
completedRequest.setDownloadUrl(fileEndpoint);
completedRequest.setFilename(info.getImageFilename());
completedRequest.setFileId(info.getFileId());
completedRequest.setStatus(RequestStatus.COMPLETE);
completedRequest.setStatus(info.getStatus());
completedRequest.setErrorMsg(info.getErrorMsg());
completedRequest.setOcrDate(OffsetDateTime.now(ZoneId.of("UTC")).toString());
completedRequest.setTextFilename(info.getFilename());

try {
requestProducer.sendRequest(completedRequest, propertyManager.getProperty(Properties.KAFKA_TOPIC_OCR_COMPLETE));
requestProducer.sendRequest(completedRequest,
propertyManager.getProperty(Properties.KAFKA_TOPIC_OCR_COMPLETE));
} catch (MessageCreationException e) {
messageHandler.handleMessage("Could not send message.", e, MessageType.ERROR);
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import edu.asu.diging.gilesecosystem.cassiopeia.core.service.IKafkaRequestSender;
import edu.asu.diging.gilesecosystem.cassiopeia.core.service.IOCRManager;
import edu.asu.diging.gilesecosystem.requests.IOCRRequest;
import edu.asu.diging.gilesecosystem.requests.RequestStatus;
import edu.asu.diging.gilesecosystem.septemberutil.properties.MessageType;
import edu.asu.diging.gilesecosystem.septemberutil.service.ISystemMessageHandler;
import edu.asu.diging.gilesecosystem.util.files.IFileStorageManager;
Expand Down Expand Up @@ -82,18 +83,25 @@ public void processOCRRequest(IOCRRequest request) {
Metadata metadata = new Metadata();
BodyContentHandler handler = new BodyContentHandler();

String ocrResult = null;
RequestInfo info = null;
try (InputStream stream = new ByteArrayInputStream(image)) {
ocrParser.parse(stream, handler, metadata, parseContext);
ocrResult = handler.toString();
String ocrResult = handler.toString();
info = saveTextToFile(request.getRequestId(), request.getDocumentId(), ocrResult, request.getFilename(), ".txt");
info.setUploadId(request.getUploadId());
info.setFileId(request.getFileId());
info.setStatus(RequestStatus.COMPLETE);
info.setImageFilename(request.getFilename());
} catch (SAXException | TikaException | IOException e) {
messageHandler.handleMessage("Error during ocr.", e, MessageType.ERROR);
// FIXME: send to monitoring app
info = new RequestInfo(null, 0, null);
info.setUploadId(request.getUploadId());
info.setFileId(request.getFileId());
info.setStatus(RequestStatus.FAILED);
info.setErrorMsg(e.getMessage());
info.setImageFilename(request.getFilename());
}

RequestInfo info = saveTextToFile(request.getRequestId(), request.getDocumentId(), ocrResult, request.getFilename(), ".txt");
info.setUploadId(request.getUploadId());
info.setFileId(request.getFileId());

kafkaRequestSender.sendRequest(request.getRequestId(), request.getDocumentId(), info);
}
Expand Down Expand Up @@ -127,7 +135,6 @@ protected RequestInfo saveTextToFile(String requestId,
if (!fileExtentions.startsWith(".")) {
fileExtentions = "." + fileExtentions;
}
String imageFilename = filename;
filename = filename + fileExtentions;

String filePath = docFolder + File.separator + filename;
Expand All @@ -151,6 +158,6 @@ protected RequestInfo saveTextToFile(String requestId,
}

String relativePath = storageManager.getFileFolderPathInBaseFolder(requestId, documentId, null);
return new RequestInfo(relativePath + File.separator + filename, fileObject.length(), imageFilename, filename);
return new RequestInfo(relativePath + File.separator + filename, fileObject.length(), filename);
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package edu.asu.diging.gilesecosystem.cassiopeia.core.service.impl;

import edu.asu.diging.gilesecosystem.requests.RequestStatus;

public class RequestInfo {

private String path;
Expand All @@ -8,12 +10,13 @@ public class RequestInfo {
private String imageFilename;
private String uploadId;
private String fileId;
private RequestStatus status;
private String errorMsg;

public RequestInfo(String path, long size, String imageFilename, String filename) {
public RequestInfo(String path, long size, String filename) {
this.path = path;
this.size = size;
this.filename = filename;
this.imageFilename = imageFilename;
}

public String getPath() {
Expand Down Expand Up @@ -63,6 +66,22 @@ public String getFilename() {
public void setFilename(String filename) {
this.filename = filename;
}

public RequestStatus getStatus() {
return status;
}

public void setStatus(RequestStatus status) {
this.status = status;
}

public String getErrorMsg() {
return errorMsg;
}

public void setErrorMsg(String errorMsg) {
this.errorMsg = errorMsg;
}


}
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ public boolean accept(File dir, String name) {
String imageFilename = textFile.getName();
imageFilename = imageFilename.substring(0, imageFilename.lastIndexOf("."));

RequestInfo info = new RequestInfo(relativePath + File.separator + textFile.getName(), textFile.length(), imageFilename, textFile.getName());
RequestInfo info = new RequestInfo(relativePath + File.separator + textFile.getName(), textFile.length(), textFile.getName());
info.setImageFilename(imageFilename);
kafkaRequestSender.sendRequest(requestId, docId, info);
requestCounter++;
}
Expand Down
2 changes: 1 addition & 1 deletion cassiopeia/src/main/resources/config.properties
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ tesseract_bin_folder=${tesseract.bin}
tesseract_data_folder=${tesseract.data}
tesseract_create_hocr=${tesseract.create.hocr}
# how much time to give to tesseract before timing out (in sec)
tesseract_timeout=240
tesseract_timeout=${tesseract.timeout}

#OCR Type
ocr_plainText=Plain Text
Expand Down

0 comments on commit 09fb31d

Please sign in to comment.