Skip to content

Commit

Permalink
add configuration options & cross-engine tests
Browse files Browse the repository at this point in the history
  • Loading branch information
jreback committed Apr 2, 2017
1 parent 99d3556 commit 71e1e5d
Show file tree
Hide file tree
Showing 4 changed files with 71 additions and 6 deletions.
11 changes: 11 additions & 0 deletions pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,3 +466,14 @@ def _register_xlsx(engine, other):
except ImportError:
# fallback
_register_xlsx('openpyxl', 'xlsxwriter')

# Set up the io.parquet specific configuration.
parquet_engine_doc = """
: string
The default parquet reader/writer engine. Available options:
None, 'pyarrow', 'fastparquet'
"""

with cf.config_prefix('io.parquet'):
cf.register_option('engine', 'pyarrow', parquet_engine_doc,
validator=is_one_of_factory(['pyarrow', 'fastparquet']))
3 changes: 2 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1520,7 +1520,7 @@ def to_feather(self, fname):
from pandas.io.feather_format import to_feather
to_feather(self, fname)

def to_parquet(self, fname, engine, compression=None,
def to_parquet(self, fname, engine=None, compression=None,
**kwargs):
"""
write out the binary parquet for DataFrames
Expand All @@ -1533,6 +1533,7 @@ def to_parquet(self, fname, engine, compression=None,
string file path
engine : parquet engine
supported are {'pyarrow', 'fastparquet'}
if None, will use the option: io.parquet.engine
compression : str, optional
compression method, includes {'gzip', 'snappy', 'brotli'}
kwargs passed to the engine
Expand Down
11 changes: 8 additions & 3 deletions pandas/io/parquet.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
""" parquet compat """

from warnings import catch_warnings
from pandas import DataFrame, RangeIndex, Int64Index
from pandas import DataFrame, RangeIndex, Int64Index, get_option
from pandas.compat import range


def get_engine(engine):
""" return our implementation """

if engine is None:
engine = get_option('io.parquet.engine')

if engine not in ['pyarrow', 'fastparquet']:
raise ValueError("engine must be one of 'pyarrow', 'fastparquet'")

Expand Down Expand Up @@ -71,7 +74,7 @@ def read(self, path):
return self.api.ParquetFile(path).to_pandas()


def to_parquet(df, path, engine, compression=None, **kwargs):
def to_parquet(df, path, engine=None, compression=None, **kwargs):
"""
Write a DataFrame to the pyarrow
Expand All @@ -82,6 +85,7 @@ def to_parquet(df, path, engine, compression=None, **kwargs):
File path
engine : parquet engine
supported are {'pyarrow', 'fastparquet'}
if None, will use the option: io.parquet.engine
compression : str, optional
compression method, includes {'gzip', 'snappy', 'brotli'}
kwargs are passed to the engine
Expand Down Expand Up @@ -125,7 +129,7 @@ def to_parquet(df, path, engine, compression=None, **kwargs):
return impl.write(df, path, compression=compression)


def read_parquet(path, engine, **kwargs):
def read_parquet(path, engine=None, **kwargs):
"""
Load a parquet object from the file path
Expand All @@ -137,6 +141,7 @@ def read_parquet(path, engine, **kwargs):
File path
engine : parquet engine
supported are {'pyarrow', 'fastparquet'}
if None, will use the option: io.parquet.engine
kwargs are passed to the engine
Returns
Expand Down
52 changes: 50 additions & 2 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,58 @@ def fp():
return 'fastparquet'


def test_invalid_engine():
@pytest.fixture
def df_compat():
return pd.DataFrame({'A': [1, 2, 3], 'B': 'foo'})


def test_invalid_engine(df_compat):

with pytest.raises(ValueError):
tm.makeDataFrame().to_parquet('foo', 'bar')
df_compat.to_parquet('foo', 'bar')


def test_options_py(df_compat, pa):
# use the set option

df = df_compat
with tm.ensure_clean() as path:

with pd.option_context('io.parquet.engine', 'pyarrow'):
df.to_parquet(path)

result = read_parquet(path)
tm.assert_frame_equal(result, df)


def test_options_fp(df_compat, fp):
# use the set option

df = df_compat
with tm.ensure_clean() as path:

with pd.option_context('io.parquet.engine', 'fastparquet'):
df.to_parquet(path)

result = read_parquet(path)
tm.assert_frame_equal(result, df)


def test_cross_engine(df_compat, pa, fp):
# cross-compat with differing reading/writing engines

df = df_compat
with tm.ensure_clean() as path:
df.to_parquet(path, engine=pa)

result = read_parquet(path, engine=fp)
tm.assert_frame_equal(result, df)

with tm.ensure_clean() as path:
df.to_parquet(path, engine=fp)

result = read_parquet(path, engine=pa)
tm.assert_frame_equal(result, df)


class Base(object):
Expand Down

0 comments on commit 71e1e5d

Please sign in to comment.