More linting

mittagessen · Jan 30, 2024 · 096a088 · 096a088
1 parent b57ffac
commit 096a088
Show file tree

Hide file tree

Showing 4 changed files with 59 additions and 45 deletions.
diff --git a/kraken/contrib/hyperparameters/tune_training.py b/kraken/contrib/hyperparameters/tune_training.py
@@ -31,15 +31,15 @@ def train_tune(config, training_data=None, epochs=100):
                                      output='model',
                                      spec=RECOGNITION_SPEC)
 
-    data_module = PretrainDataModule(batch_size=hyper_params.pop('batch_size'),
-                                     pad=hyper_params.pop('pad'),
-                                     augment=hyper_params.pop('augment'),
-                                     training_data=training_data,
-                                     num_workers=resources_per_trial['cpu'],
-                                     height=model.height,
-                                     width=model.width,
-                                     channels=model.channels,
-                                     format_type='binary')
+    _ = PretrainDataModule(batch_size=hyper_params.pop('batch_size'),
+                           pad=hyper_params.pop('pad'),
+                           augment=hyper_params.pop('augment'),
+                           training_data=training_data,
+                           num_workers=resources_per_trial['cpu'],
+                           height=model.height,
+                           width=model.width,
+                           channels=model.channels,
+                           format_type='binary')
 
     callback = TuneReportCallback({'loss': 'CE'}, on='validation_end')
     trainer = pl.Trainer(max_epochs=epochs,
@@ -49,6 +49,10 @@ def train_tune(config, training_data=None, epochs=100):
     trainer.fit(model)
 
 
-analysis = tune.run(partial(train_tune, training_data=sys.argv[2:]), local_dir=sys.argv[1], num_samples=100, resources_per_trial=resources_per_trial, config=config)
+analysis = tune.run(partial(train_tune, training_data=sys.argv[2:]),
+                    local_dir=sys.argv[1],
+                    num_samples=100,
+                    resources_per_trial=resources_per_trial,
+                    config=config)
 
 print("Best hyperparameters found were: ", analysis.get_best_config(metric='accuracy', mode='max'))
diff --git a/kraken/contrib/print_word_spreader.py b/kraken/contrib/print_word_spreader.py
@@ -26,24 +26,21 @@ class BboxError(Exception):
 parser.add_argument('--inputDir', help='Path to directory where source files are found', required=True)
 parser.add_argument('--outputDir', help='Path to directory where output is stored', required=True)
 parser.add_argument('--imageDir', help='Path to directory where images corresponding to the html files are stored.', required=False)
-parser.add_argument(
-    '-c',
-    '--confidenceSummary',
-    default=False,
-    action="store_true",
-    help="store summaries of word confidence in xhtml data- attributes and cut all material after the first ; from the word span title attribute, making their mouseover popups less obtrusive.")
-parser.add_argument(
-    '-f',
-    '--fixBigWordSpans',
-    default=False,
-    action="store_true",
-    help="fix word_span elements whose bbox area is greater than a sixth of the whole page area by assigning them the bbox of the previous word.")
-parser.add_argument(
-    '-s',
-    '--shareSpaceSpans',
-    default=False,
-    action="store_true",
-    help="normalize hocr output from kraken, which assigns a word to every space and gives it a bbox. This removes those space words and assigns their area to the words on either side, with some space in between, generating output more like Ocropus and tesseract.")
+parser.add_argument('-c', '--confidenceSummary', default=False, action="store_true",
+                    help="store summaries of word confidence in xhtml data- "
+                         "attributes and cut all material after the first ; from the "
+                         "word span title attribute, making their mouseover popups "
+                         "less obtrusive.")
+parser.add_argument('-f', '--fixBigWordSpans', default=False, action="store_true",
+                    help="fix word_span elements whose bbox area is greater "
+                         "than a sixth of the whole page area by assigning them the "
+                         "bbox of the previous word.")
+parser.add_argument('-s', '--shareSpaceSpans', default=False, action="store_true",
+                    help="normalize hocr output from kraken, which assigns a "
+                         "word to every space and gives it a bbox. This removes those "
+                         "space words and assigns their area to the words on either "
+                         "side, with some space in between, generating output more "
+                         "like Ocropus and tesseract.")
 parser.add_argument("-v", "--verbose", help="increase output verbosity", default=False, action="store_true")
 args = parser.parse_args()
 
@@ -76,15 +73,15 @@ def get_bbox_area(span):
         if (args.verbose):
             print("this element's area is " + str(area))
         return area
-    except Exception as e:
+    except Exception:
         # print("Exception getting area on span  {}".format(etree.tostring(span)))
         raise
 
 
 def set_bbox_value(span, position, val):
     try:
         parts = span.get('title').split(';')
-    except Exception as e:
+    except Exception:
         print("Exception getting title element on span id {}.".format(span.get('id')))
         raise BboxError
     bbox_parts = parts[0].split(' ')
@@ -114,7 +111,7 @@ def share_space_spans(treeIn):
             print(e)
             raise
         # check that we have both
-        if ((not previous_span is None) and (not next_span is None)):
+        if previous_span and next_span:
             # this means that there is both a previous and a next
             if (args.verbose):
                 print("***")
@@ -155,7 +152,7 @@ def confidence_summary(treeIn):
             word_span.set('data-min-confidence', str(minimum))
             word_span.set('data-average-confidence', str(average))
             word_span.set('title', bbox_only)
-        except Exception as e:
+        except Exception:
             # there's not much to do if this goes wrong
             pass
 

diff --git a/kraken/lib/layers.py b/kraken/lib/layers.py
@@ -865,7 +865,8 @@ def get_shape(self, input: Tuple[int, int, int, int], target_shape: Optional[Tup
                                  self.out_channels,
                                  int(max(np.floor((input[2]+2*self.padding[0]-self.dilation[0]*(self.kernel_size[0]-1)-1) /
                                      self.stride[0]+1), 1) if input[2] != 0 else 0),
-                                 int(max(np.floor((input[3]+2*self.padding[1]-self.dilation[1]*(self.kernel_size[1]-1)-1)/self.stride[1]+1), 1) if input[3] != 0 else 0))
+                                 int(max(np.floor((input[3]+2*self.padding[1]-self.dilation[1]*(self.kernel_size[1]-1)-1) /
+                                     self.stride[1]+1), 1) if input[3] != 0 else 0))
         return self.output_shape
 
     def deserialize(self, name: str, spec) -> None:

diff --git a/kraken/lib/vgsl.py b/kraken/lib/vgsl.py
@@ -469,7 +469,8 @@ def build_rnn(self,
                   input: Tuple[int, int, int, int],
                   blocks: List[str],
                   idx: int,
-                  target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None], Tuple[Tuple[int, int, int, int], str, Callable]]:
+                  target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None],
+                                                                                            Tuple[Tuple[int, int, int, int], str, Callable]]:
         """
         Builds an LSTM/GRU layer returning number of outputs and layer.
         """
@@ -497,7 +498,8 @@ def build_dropout(self,
                       input: Tuple[int, int, int, int],
                       blocks: List[str],
                       idx: int,
-                      target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None], Tuple[Tuple[int, int, int, int], str, Callable]]:
+                      target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None],
+                                                                                                Tuple[Tuple[int, int, int, int], str, Callable]]:
         pattern = re.compile(r'(?P<type>Do)(?P<name>{\w+})?(?P<p>(\d+(\.\d*)?|\.\d+))?(,(?P<dim>\d+))?')
         m = pattern.match(blocks[idx])
         if not m:
@@ -513,7 +515,8 @@ def build_addition(self,
                        input: Tuple[int, int, int, int],
                        blocks: List[str],
                        idx: int,
-                       target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None], Tuple[Tuple[int, int, int, int], str, Callable]]:
+                       target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None],
+                                                                                                 Tuple[Tuple[int, int, int, int], str, Callable]]:
         pattern = re.compile(r'(?P<type>A)(?P<name>{\w+})?(?P<dim>\d+),(?P<chunk_size>\d+)')
         m = pattern.match(blocks[idx])
         if not m:
@@ -533,7 +536,8 @@ def build_identity(self,
                        input: Tuple[int, int, int, int],
                        blocks: List[str],
                        idx: int,
-                       target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None], Tuple[Tuple[int, int, int, int], str, Callable]]:
+                       target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None],
+                                                                                                 Tuple[Tuple[int, int, int, int], str, Callable]]:
         pattern = re.compile(r'(?P<type>I)(?P<name>{\w+})?')
         m = pattern.match(blocks[idx])
         if not m:
@@ -547,7 +551,8 @@ def build_groupnorm(self,
                         input: Tuple[int, int, int, int],
                         blocks: List[str],
                         idx: int,
-                        target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None], Tuple[Tuple[int, int, int, int], str, Callable]]:
+                        target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None],
+                                                                                                  Tuple[Tuple[int, int, int, int], str, Callable]]:
         pattern = re.compile(r'(?P<type>Gn)(?P<name>{\w+})?(?P<groups>\d+)')
         m = pattern.match(blocks[idx])
         if not m:
@@ -562,7 +567,8 @@ def build_wav2vec2(self,
                        input: Tuple[int, int, int, int],
                        blocks: List[str],
                        idx: int,
-                       target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None], Tuple[Tuple[int, int, int, int], str, Callable]]:
+                       target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None],
+                                                                                                 Tuple[Tuple[int, int, int, int], str, Callable]]:
         """
         Builds a Wav2Vec2 masking layer.
         """
@@ -604,7 +610,8 @@ def build_conv(self,
                    input: Tuple[int, int, int, int],
                    blocks: List[str],
                    idx: int,
-                   target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None], Tuple[Tuple[int, int, int, int], str, Callable]]:
+                   target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None],
+                                                                                             Tuple[Tuple[int, int, int, int], str, Callable]]:
         """
         Builds a 2D convolution layer.
         """
@@ -629,7 +636,8 @@ def build_maxpool(self,
                       input: Tuple[int, int, int, int],
                       blocks: List[str],
                       idx: int,
-                      target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None], Tuple[Tuple[int, int, int, int], str, Callable]]:
+                      target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None],
+                                                                                                Tuple[Tuple[int, int, int, int], str, Callable]]:
         """
         Builds a maxpool layer.
         """
@@ -649,7 +657,8 @@ def build_reshape(self,
                       input: Tuple[int, int, int, int],
                       blocks: List[str],
                       idx: int,
-                      target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None], Tuple[Tuple[int, int, int, int], str, Callable]]:
+                      target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None],
+                                                                                                Tuple[Tuple[int, int, int, int], str, Callable]]:
         """
         Builds a reshape layer
         """
@@ -688,7 +697,8 @@ def build_output(self,
                      input: Tuple[int, int, int, int],
                      blocks: List[str],
                      idx: int,
-                     target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None], Tuple[Tuple[int, int, int, int], str, Callable]]:
+                     target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None],
+                                                                                               Tuple[Tuple[int, int, int, int], str, Callable]]:
         """
         Builds an output layer.
         """
@@ -755,7 +765,8 @@ def build_series(self,
                      input: Tuple[int, int, int, int],
                      blocks: List[str],
                      idx: int,
-                     target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None], Tuple[Tuple[int, int, int, int], str, Callable]]:
+                     target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None],
+                                                                                               Tuple[Tuple[int, int, int, int], str, Callable]]:
         """
         Builds a serial block of layers.
         """
@@ -784,7 +795,8 @@ def build_parallel(self,
                        input: Tuple[int, int, int, int],
                        blocks: List[str],
                        idx: int,
-                       target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None], Tuple[Tuple[int, int, int, int], str, Callable]]:
+                       target_output_shape: Optional[Tuple[int, int, int, int]] = None) -> Union[Tuple[None, None, None],
+                                                                                                 Tuple[Tuple[int, int, int, int], str, Callable]]:
         """
         Builds a block of parallel layers.
         """