Skip to content
This repository has been archived by the owner on Jan 6, 2025. It is now read-only.

[MRG + 1] Add suppress_warnings flag #155

Merged
merged 3 commits into from
Oct 19, 2018
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions camelot/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def set_config(self, key, value):
' font size. Useful to detect super/subscripts.')
@click.option('-M', '--margins', nargs=3, default=(1.0, 0.5, 0.1),
help='PDFMiner char_margin, line_margin and word_margin.')
@click.option('-q', '--quiet', is_flag=True, help='Suppress warnings.')
@click.pass_context
def cli(ctx, *args, **kwargs):
"""Camelot: PDF Table Extraction for Humans"""
Expand Down Expand Up @@ -89,6 +90,7 @@ def lattice(c, *args, **kwargs):
output = conf.pop('output')
f = conf.pop('format')
compress = conf.pop('zip')
suppress_warnings = conf.pop('quiet')
plot_type = kwargs.pop('plot_type')
filepath = kwargs.pop('filepath')
kwargs.update(conf)
Expand All @@ -99,7 +101,8 @@ def lattice(c, *args, **kwargs):
kwargs['copy_text'] = None if not copy_text else copy_text
kwargs['shift_text'] = list(kwargs['shift_text'])

tables = read_pdf(filepath, pages=pages, flavor='lattice', **kwargs)
tables = read_pdf(filepath, pages=pages, flavor='lattice',
suppress_warnings=suppress_warnings, **kwargs)
click.echo('Found {} tables'.format(tables.n))
if plot_type is not None:
for table in tables:
Expand Down Expand Up @@ -134,6 +137,7 @@ def stream(c, *args, **kwargs):
output = conf.pop('output')
f = conf.pop('format')
compress = conf.pop('zip')
suppress_warnings = conf.pop('quiet')
plot_type = kwargs.pop('plot_type')
filepath = kwargs.pop('filepath')
kwargs.update(conf)
Expand All @@ -143,7 +147,8 @@ def stream(c, *args, **kwargs):
columns = list(kwargs['columns'])
kwargs['columns'] = None if not columns else columns

tables = read_pdf(filepath, pages=pages, flavor='stream', **kwargs)
tables = read_pdf(filepath, pages=pages, flavor='stream',
suppress_warnings=suppress_warnings, **kwargs)
click.echo('Found {} tables'.format(tables.n))
if plot_type is not None:
for table in tables:
Expand Down
20 changes: 14 additions & 6 deletions camelot/io.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
# -*- coding: utf-8 -*-
import warnings

from .handlers import PDFHandler
from .utils import validate_input, remove_extra


def read_pdf(filepath, pages='1', flavor='lattice', **kwargs):
def read_pdf(filepath, pages='1', flavor='lattice', suppress_warnings=False,
**kwargs):
"""Read PDF and return extracted tables.

Note: kwargs annotated with ^ can only be used with flavor='stream'
Expand All @@ -20,6 +22,8 @@ def read_pdf(filepath, pages='1', flavor='lattice', **kwargs):
flavor : str (default: 'lattice')
The parsing method to use ('lattice' or 'stream').
Lattice is used by default.
suppress_warnings : bool, optional (default: False)
Prevent warnings from being emitted by Camelot.
table_area : list, optional (default: None)
List of table area strings of the form x1,y1,x2,y2
where (x1, y1) -> left-top and (x2, y2) -> right-bottom
Expand Down Expand Up @@ -85,8 +89,12 @@ def read_pdf(filepath, pages='1', flavor='lattice', **kwargs):
raise NotImplementedError("Unknown flavor specified."
" Use either 'lattice' or 'stream'")

validate_input(kwargs, flavor=flavor)
p = PDFHandler(filepath, pages)
kwargs = remove_extra(kwargs, flavor=flavor)
tables = p.parse(flavor=flavor, **kwargs)
return tables
with warnings.catch_warnings():
if suppress_warnings:
warnings.simplefilter("ignore")

validate_input(kwargs, flavor=flavor)
p = PDFHandler(filepath, pages)
kwargs = remove_extra(kwargs, flavor=flavor)
tables = p.parse(flavor=flavor, **kwargs)
return tables
14 changes: 14 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,17 @@ def test_cli_output_format():
result = runner.invoke(cli, ['--zip', '--format', 'csv', '--output', outfile.format('csv'),
'stream', infile])
assert result.exit_code == 0

def test_cli_quiet_flag():
with TemporaryDirectory() as tempdir:
infile = os.path.join(testdir, 'blank.pdf')
outfile = os.path.join(tempdir, 'blank.csv')
runner = CliRunner()

result = runner.invoke(cli, ['--format', 'csv', '--output', outfile,
'stream', infile])
assert 'No tables found on page-1' in result.output

result = runner.invoke(cli, ['--quiet', '--format', 'csv',
'--output', outfile, 'stream', infile])
assert 'No tables found on page-1' not in result.output
19 changes: 15 additions & 4 deletions tests/test_errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,19 @@ def test_no_tables_found():
# TODO: use pytest.warns
with warnings.catch_warnings():
warnings.simplefilter('error')
try:
with pytest.raises(UserWarning) as e:
tables = camelot.read_pdf(filename)
except Exception as e:
assert type(e).__name__ == 'UserWarning'
assert str(e) == 'No tables found on page-1'
assert str(e.value) == 'No tables found on page-1'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jonathanlloyd Should this be one indent level down?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Its a bit weird but that's actually how it's documented: https://docs.pytest.org/en/latest/assert.html
It seems to work 🤷‍♂️

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh. I looked at the tests for requests and they seem to do it this way too. https://github.com/requests/requests/blob/master/tests/test_requests.py#L817 If you ever find why pytest implemented it this way, do comment here!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Very odd!



def test_no_tables_found_warnings_supressed():
filename = os.path.join(testdir, 'blank.pdf')
with warnings.catch_warnings():
# Should fail the test if any warning is thrown - warnings should
# be suppressed.
warnings.simplefilter('error')
try:
tables = camelot.read_pdf(filename, suppress_warnings=True)
except Warning as e:
warning_text = str(e)
pytest.fail('Unexpected warning: {}'.format(warning_text))