From e94ee3b462e21f66b79e6cbebd959c03d631c52b Mon Sep 17 00:00:00 2001 From: khurramHashmi Date: Tue, 22 Oct 2019 17:00:56 +0200 Subject: [PATCH 1/4] tensorflow version update --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 0920ad4..74f2d05 100755 --- a/requirements.txt +++ b/requirements.txt @@ -9,5 +9,5 @@ torchvision dominate>=2.3.5 pandas keras -tensorflow-gpu +tensorflow-gpu==1.14 scikit-image From 627e95330d141096046e5d8c7220940acd974198 Mon Sep 17 00:00:00 2001 From: khurramHashmi Date: Wed, 30 Oct 2019 15:31:26 +0100 Subject: [PATCH 2/4] Using page_image passed as an argument in process_segment function --- .../cli/ocrd_anybaseocr_binarize.py | 30 ++++++++++++------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_binarize.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_binarize.py index 8525a7f..7bea5c4 100755 --- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_binarize.py +++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_binarize.py @@ -107,8 +107,14 @@ def process(self): oplevel = self.parameter['operation_level'] for (n, input_file) in enumerate(self.input_files): + file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) +# page_id = input_file.pageId or input_file.ID page_id = input_file.pageId or input_file.ID +# if input_file.pageId is None: +# continue +# page_id = input_file.pageId + LOG.info("INPUT FILE %i / %s", n, page_id) pcgts = page_from_file(self.workspace.download_file(input_file)) metadata = pcgts.get_Metadata() @@ -126,7 +132,7 @@ def process(self): LOG.info("Binarizing on '%s' level in page '%s'", oplevel, page_id) if oplevel=="page": - self._process_segment(page, page_image.filename, page_id, file_id + ".bin") + self._process_segment(page_image, page, page_image.filename, page_id, file_id + ".bin") else: regions = page.get_TextRegion() + page.get_TableRegion() if not regions: @@ -152,8 +158,11 @@ def process(self): - def _process_segment(self, page, filename, page_id, file_id): - raw = ocrolib.read_image_gray(filename) + def _process_segment(self,page_image, page, filename, page_id, file_id): + #raw = ocrolib.read_image_gray(filename) + raw = ocrolib.pil2array(page_image) + raw = np.mean(raw,2) + raw = raw.astype("float64") self.dshow(raw, "input") # perform image normalization @@ -163,12 +172,12 @@ def _process_segment(self, page, filename, page_id, file_id): return image /= amax(image) - if not self.parameter['nocheck']: - check = self.check_page(amax(image)-image) - if check is not None: - LOG.error(input_file.pageId or input_file.ID+" SKIPPED. "+check + - " (use -n to disable this check)") - return +# if not self.parameter['nocheck']: +# check = self.check_page(amax(image)-image) +# if check is not None: +# LOG.error(input_file.pageId or input_file.ID+" SKIPPED. "+check + +# " (use -n to disable this check)") +# return # check whether the image is already effectively binarized if self.parameter['gray']: @@ -258,5 +267,4 @@ def _process_segment(self, page, filename, page_id, file_id): page_id=page_id, file_grp=self.image_grp ) - page.add_AlternativeImage(AlternativeImageType(filename=file_path, comment="binarized")) - + page.add_AlternativeImage(AlternativeImageType(filename=file_path)) From c59a98cfea830b7e8e306db50f803f67d0381cb8 Mon Sep 17 00:00:00 2001 From: khurramHashmi Date: Wed, 30 Oct 2019 15:33:22 +0100 Subject: [PATCH 3/4] Alternative image functionality added. --- ocrd_anybaseocr/cli/ocrd_anybaseocr_deskew.py | 35 ++++++++++++++----- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_deskew.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_deskew.py index cf1243a..4d832cc 100755 --- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_deskew.py +++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_deskew.py @@ -43,7 +43,7 @@ import os import numpy as np -from pylab import amin, amax, linspace, mean, var, plot, ginput, ones, clip, imshow +from pylab import amin,array, amax, linspace, mean, var, plot, ginput, ones, clip, imshow from scipy.ndimage import filters, interpolation, morphology from scipy import stats import ocrolib @@ -53,6 +53,7 @@ from ocrd_modelfactory import page_from_file from ocrd_models.ocrd_page import ( to_xml, + AlternativeImageType, MetadataItemType, LabelsType, LabelType ) @@ -72,7 +73,7 @@ def __init__(self, *args, **kwargs): def estimate_skew_angle(self, image, angles): estimates = [] - + for a in angles: v = mean(interpolation.rotate( image, a, order=0, mode='constant'), axis=1) @@ -95,7 +96,11 @@ def process(self): for (n, input_file) in enumerate(self.input_files): file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) - page_id = input_file.pageId or input_file.ID + + if input_file.mimetype !="image/png": + continue + page_id = input_file.pageId + LOG.info("INPUT FILE %i / %s", n, page_id) pcgts = page_from_file(self.workspace.download_file(input_file)) metadata = pcgts.get_Metadata() @@ -109,9 +114,10 @@ def process(self): for name in self.parameter.keys()])])) page = pcgts.get_Page() page_image, page_xywh, page_image_info = self.workspace.image_from_page(page, page_id) + if oplevel=="page": - self._process_segment(page, page_image.filename, page_id, file_id + ".ds") + self._process_segment(page_image, page, page_image.filename, page_id, file_id) # ASK , what should we add here, ds in skew file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp) if file_id == input_file.ID: @@ -127,12 +133,14 @@ def process(self): content=to_xml(pcgts).encode('utf-8') ) - def _process_segment(self, page, filename, page_id, file_id): + def _process_segment(self,page_image, page, filename, page_id, file_id): if self.parameter['parallel'] < 2: LOG.info("INPUT FILE %s ", filename) - raw = ocrolib.read_image_gray(filename) + + raw = ocrolib.pil2array(page_image) + #raw = ocrolib.read_image_gray(filename) - flat = raw + flat = raw.astype("float64") #flat = np.array(binImg) # estimate skew angle and rotate if self.parameter['maxskew'] > 0: @@ -198,12 +206,21 @@ def _process_segment(self, page, filename, page_id, file_id): #TODO: Need some clarification as the results effect the following pre-processing steps. #orientation = -angle #orientation = 180 - ((180 - orientation) % 360) + + if angle is None: # added piece of code + angle = 0 + page.set_orientation(angle) - file_path = self.workspace.save_image_file(bin_image, + bin_array = array(255*(deskewed>ocrolib.midrange(deskewed)),'B') + deskewed = ocrolib.array2pil(bin_array) + file_path = self.workspace.save_image_file(deskewed, file_id, page_id=page_id, file_grp=self.image_grp ) - page.add_AlternativeImage(AlternativeImageType(filename=file_path, comment="deskewed")) + page.add_AlternativeImage(AlternativeImageType(filename=file_path, comments="deskewed")) + + + From 26faa5073a9d4ad7b87c15a6c2a82ed5843d86fb Mon Sep 17 00:00:00 2001 From: khurramHashmi Date: Wed, 30 Oct 2019 15:35:12 +0100 Subject: [PATCH 4/4] page_image passed as an argument in process_segment function --- .../cli/ocrd_anybaseocr_cropping.py | 150 ++++++++++-------- 1 file changed, 83 insertions(+), 67 deletions(-) diff --git a/ocrd_anybaseocr/cli/ocrd_anybaseocr_cropping.py b/ocrd_anybaseocr/cli/ocrd_anybaseocr_cropping.py index 2da74eb..468efca 100755 --- a/ocrd_anybaseocr/cli/ocrd_anybaseocr_cropping.py +++ b/ocrd_anybaseocr/cli/ocrd_anybaseocr_cropping.py @@ -39,25 +39,28 @@ from ..constants import OCRD_TOOL - +from pylab import array from ocrd import Processor from ocrd_utils import getLogger, concat_padded, MIMETYPE_PAGE from ocrd_modelfactory import page_from_file from ocrd_models.ocrd_page import ( CoordsType, + AlternativeImageType, to_xml, MetadataItemType, - LabelsType, LabelType + LabelsType, LabelType, ) from ocrd_models.ocrd_page_generateds import BorderType TOOL = 'ocrd-anybaseocr-crop' + LOG = getLogger('OcrdAnybaseocrCropper') FALLBACK_IMAGE_GRP = 'OCR-D-IMG-CROP' class OcrdAnybaseocrCropper(Processor): def __init__(self, *args, **kwargs): + kwargs['ocrd_tool'] = OCRD_TOOL['tools'][TOOL] kwargs['version'] = OCRD_TOOL['version'] super(OcrdAnybaseocrCropper, self).__init__(*args, **kwargs) @@ -407,18 +410,23 @@ def crop_area(self, textarea, binImg, rgb): return textarea def process(self): - try: - self.page_grp, self.image_grp = self.output_file_grp.split(',') - except ValueError: - self.page_grp = self.output_file_grp - self.image_grp = FALLBACK_IMAGE_GRP - LOG.info("No output file group for images specified, falling back to '%s'", FALLBACK_IMAGE_GRP) + """Performs border detection on the workspace. """ +# try: +# print("OUTPUT FILE ", self.output_file_grp) +# self.page_grp, self.image_grp = self.output_file_grp.split(',') +# except ValueError: +# self.page_grp = self.output_file_grp +# self.image_grp = FALLBACK_IMAGE_GRP +# LOG.info("No output file group for images specified, falling back to '%s'", FALLBACK_IMAGE_GRP) oplevel = self.parameter['operation_level'] - for (n, input_file) in enumerate(self.input_files): - file_id = input_file.ID.replace(self.input_file_grp, self.image_grp) - page_id = input_file.pageId or input_file.ID + + if input_file.mimetype =="image/png": + continue + page_id = input_file.pageId + LOG.info("INPUT FILE %i / %s", n, page_id) + pcgts = page_from_file(self.workspace.download_file(input_file)) metadata = pcgts.get_Metadata() metadata.add_MetadataItem( @@ -430,63 +438,18 @@ def process(self): value=self.parameter[name]) for name in self.parameter.keys()])])) page = pcgts.get_Page() - print(page.imageFilename) - #page_image, page_xywh, page_image_info = self.workspace.image_from_page(page, page_id) - - ''' - # Get image orientation - orientation = page.get_orientation() - rotated_image = self.rotate_image(orientation, page_image) - LOG.info("INPUT FILE %s ", input_file.pageId or input_file.ID) - - img_array = ocrolib.pil2array(rotated_image) - - #Check if image is RGB or not - if len(img_array.shape)==2: - img_array = np.stack((img_array,)*3, axis=-1) - - img_array_bin = np.array( - img_array > ocrolib.midrange(img_array), 'i') - - lineDetectH = [] - lineDetectV = [] - img_array_rr = self.remove_rular(img_array) - - textarea, img_array_rr_ta, height, width = self.detect_textarea( - img_array_rr) - self.parameter['colSeparator'] = int( - width * self.parameter['colSeparator']) - - if len(textarea) > 1: - textarea = self.crop_area( - textarea, img_array_bin, img_array_rr_ta) - - if len(textarea) == 0: - min_x, min_y, max_x, max_y = self.select_borderLine( - img_array_rr, lineDetectH, lineDetectV) - else: - min_x, min_y, max_x, max_y = textarea[0] - elif len(textarea) == 1 and (height*width*0.5 < (abs(textarea[0][2]-textarea[0][0]) * abs(textarea[0][3]-textarea[0][1]))): - x1, y1, x2, y2 = textarea[0] - x1 = x1-20 if x1 > 20 else 0 - x2 = x2+20 if x2 < width-20 else width - y1 = y1-40 if y1 > 40 else 0 - y2 = y2+40 if y2 < height-40 else height - - #self.save_pf(base, [x1, y1, x2, y2]) - min_x, min_y, max_x, max_y = textarea[0] - else: - min_x, min_y, max_x, max_y = self.select_borderLine( - img_array_rr, lineDetectH, lineDetectV) - - brd = BorderType(Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" % ( - min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y))) - pcgts.get_Page().set_Border(brd) + + # this function is causing problem. soemhow no feature key in page at workspace.py at line # 360 + page_image, page_xywh, page_image_info = self.workspace.image_from_page(page, page_id) + + #if oplevel=="page": + self._process_segment(page_image, page, page_id) + file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp) + # Use input_file's basename for the new file - # this way the files retain the same basenames: - file_id = input_file.ID.replace(self.input_file_grp, self.output_file_grp) if file_id == input_file.ID: file_id = concat_padded(self.output_file_grp, n) self.workspace.add_file( @@ -498,9 +461,62 @@ def process(self): file_id + '.xml'), content=to_xml(pcgts).encode('utf-8') ) - ''' + def _process_segment(self,page_image, page, page_id): + # Get image orientation + #orientation = page.get_orientation() # This function is not working +# rotated_image = self.rotate_image(orientation, page_image) +# img_array = ocrolib.pil2array(rotated_image) - + img_array = ocrolib.pil2array(page_image) + + #Check if image is RGB or not + if len(img_array.shape)==2: + img_array = np.stack((img_array,)*3, axis=-1) + + img_array_bin = np.array( + img_array > ocrolib.midrange(img_array), 'i') + + lineDetectH = [] + lineDetectV = [] + img_array_rr = self.remove_rular(img_array) + + textarea, img_array_rr_ta, height, width = self.detect_textarea( + img_array_rr) + self.parameter['colSeparator'] = int( + width * self.parameter['colSeparator']) + + if len(textarea) > 1: + textarea = self.crop_area( + textarea, img_array_bin, img_array_rr_ta) + + if len(textarea) == 0: + min_x, min_y, max_x, max_y = self.select_borderLine( + img_array_rr, lineDetectH, lineDetectV) + else: + min_x, min_y, max_x, max_y = textarea[0] + elif len(textarea) == 1 and (height*width*0.5 < (abs(textarea[0][2]-textarea[0][0]) * abs(textarea[0][3]-textarea[0][1]))): + x1, y1, x2, y2 = textarea[0] + x1 = x1-20 if x1 > 20 else 0 + x2 = x2+20 if x2 < width-20 else width + y1 = y1-40 if y1 > 40 else 0 + y2 = y2+40 if y2 < height-40 else height - \ No newline at end of file + #self.save_pf(base, [x1, y1, x2, y2]) + min_x, min_y, max_x, max_y = textarea[0] + else: + min_x, min_y, max_x, max_y = self.select_borderLine( + img_array_rr, lineDetectH, lineDetectV) + + brd = BorderType(Coords=CoordsType("%i,%i %i,%i %i,%i %i,%i" % ( + min_x, min_y, max_x, min_y, max_x, max_y, min_x, max_y))) + page.set_Border(brd) +# cropped_image = img_array[min_x:max_x, min_y:max_y] +# bin_array = array(255*(cropped_image>ocrolib.midrange(cropped_image)),'B') +# cropped_image = ocrolib.array2pil(bin_array) +# file_path = self.workspace.save_image_file(cropped_image, +# file_id, +# page_id=page_id, +# file_grp=self.image_grp +# ) +# page.add_AlternativeImage(AlternativeImageType(filename=file_path, comments="cropped"))