diff --git a/camelot/parsers/lattice.py b/camelot/parsers/lattice.py index 14430c8e..8a85d1df 100644 --- a/camelot/parsers/lattice.py +++ b/camelot/parsers/lattice.py @@ -271,10 +271,11 @@ def _generate_columns_and_rows(self, table_idx, tk): tk, self.vertical_segments, self.horizontal_segments) t_bbox['horizontal'] = text_in_bbox(tk, self.horizontal_text) t_bbox['vertical'] = text_in_bbox(tk, self.vertical_text) - self.t_bbox = t_bbox - for direction in self.t_bbox: - self.t_bbox[direction].sort(key=lambda x: (x.x0, -x.y0)) + t_bbox['horizontal'].sort(key=lambda x: (-x.y0, x.x0)) + t_bbox['vertical'].sort(key=lambda x: (x.x0, -x.y0)) + + self.t_bbox = t_bbox cols, rows = zip(*self.table_bbox[tk]) cols, rows = list(cols), list(rows) diff --git a/camelot/parsers/stream.py b/camelot/parsers/stream.py index d9481cca..eab82762 100644 --- a/camelot/parsers/stream.py +++ b/camelot/parsers/stream.py @@ -293,10 +293,11 @@ def _generate_columns_and_rows(self, table_idx, tk): t_bbox = {} t_bbox['horizontal'] = text_in_bbox(tk, self.horizontal_text) t_bbox['vertical'] = text_in_bbox(tk, self.vertical_text) - self.t_bbox = t_bbox - for direction in self.t_bbox: - self.t_bbox[direction].sort(key=lambda x: (x.x0, -x.y0)) + t_bbox['horizontal'].sort(key=lambda x: (-x.y0, x.x0)) + t_bbox['vertical'].sort(key=lambda x: (x.x0, -x.y0)) + + self.t_bbox = t_bbox text_x_min, text_y_min, text_x_max, text_y_max = self._text_bbox(self.t_bbox) rows_grouped = self._group_rows(self.t_bbox['horizontal'], row_close_tol=self.row_close_tol) diff --git a/camelot/utils.py b/camelot/utils.py index 2d735c80..cd55e4e0 100644 --- a/camelot/utils.py +++ b/camelot/utils.py @@ -344,9 +344,9 @@ def flag_font_size(textline, direction): fchars = [t[0] for t in chars] if ''.join(fchars).strip(): flist.append(''.join(fchars)) - fstring = ''.join(flist).strip('\n') + fstring = ''.join(flist) else: - fstring = ''.join([t.get_text() for t in textline]).strip('\n') + fstring = ''.join([t.get_text() for t in textline]) return fstring @@ -419,7 +419,7 @@ def split_textline(table, textline, direction, flag_size=False): grouped_chars.append((key[0], key[1], flag_font_size([t[2] for t in chars], direction))) else: gchars = [t[2].get_text() for t in chars] - grouped_chars.append((key[0], key[1], ''.join(gchars).strip('\n'))) + grouped_chars.append((key[0], key[1], ''.join(gchars))) return grouped_chars @@ -500,7 +500,7 @@ def get_table_index(table, t, direction, split_text=False, flag_size=False): if flag_size: return [(r_idx, c_idx, flag_font_size(t._objs, direction))], error else: - return [(r_idx, c_idx, t.get_text().strip('\n'))], error + return [(r_idx, c_idx, t.get_text())], error def compute_accuracy(error_weights):