add configuration options & cross-engine tests

pandas-dev · Apr 2, 2017 · 71e1e5d · 71e1e5d
1 parent 99d3556
commit 71e1e5d
Show file tree

Hide file tree

Showing 4 changed files with 71 additions and 6 deletions.
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
@@ -466,3 +466,14 @@ def _register_xlsx(engine, other):
     except ImportError:
         # fallback
         _register_xlsx('openpyxl', 'xlsxwriter')
+
+# Set up the io.parquet specific configuration.
+parquet_engine_doc = """
+: string
+    The default parquet reader/writer engine. Available options:
+    None, 'pyarrow', 'fastparquet'
+"""
+
+with cf.config_prefix('io.parquet'):
+    cf.register_option('engine', 'pyarrow', parquet_engine_doc,
+                       validator=is_one_of_factory(['pyarrow', 'fastparquet']))
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1520,7 +1520,7 @@ def to_feather(self, fname):
         from pandas.io.feather_format import to_feather
         to_feather(self, fname)
 
-    def to_parquet(self, fname, engine, compression=None,
+    def to_parquet(self, fname, engine=None, compression=None,
                    **kwargs):
         """
         write out the binary parquet for DataFrames
@@ -1533,6 +1533,7 @@ def to_parquet(self, fname, engine, compression=None,
             string file path
         engine : parquet engine
             supported are {'pyarrow', 'fastparquet'}
+            if None, will use the option: io.parquet.engine
         compression : str, optional
             compression method, includes {'gzip', 'snappy', 'brotli'}
         kwargs passed to the engine

diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
@@ -1,13 +1,16 @@
 """ parquet compat """
 
 from warnings import catch_warnings
-from pandas import DataFrame, RangeIndex, Int64Index
+from pandas import DataFrame, RangeIndex, Int64Index, get_option
 from pandas.compat import range
 
 
 def get_engine(engine):
     """ return our implementation """
 
+    if engine is None:
+        engine = get_option('io.parquet.engine')
+
     if engine not in ['pyarrow', 'fastparquet']:
         raise ValueError("engine must be one of 'pyarrow', 'fastparquet'")
 
@@ -71,7 +74,7 @@ def read(self, path):
         return self.api.ParquetFile(path).to_pandas()
 
 
-def to_parquet(df, path, engine, compression=None, **kwargs):
+def to_parquet(df, path, engine=None, compression=None, **kwargs):
     """
     Write a DataFrame to the pyarrow
 
@@ -82,6 +85,7 @@ def to_parquet(df, path, engine, compression=None, **kwargs):
         File path
     engine : parquet engine
         supported are {'pyarrow', 'fastparquet'}
+        if None, will use the option: io.parquet.engine
     compression : str, optional
         compression method, includes {'gzip', 'snappy', 'brotli'}
     kwargs are passed to the engine
@@ -125,7 +129,7 @@ def to_parquet(df, path, engine, compression=None, **kwargs):
     return impl.write(df, path, compression=compression)
 
 
-def read_parquet(path, engine, **kwargs):
+def read_parquet(path, engine=None, **kwargs):
     """
     Load a parquet object from the file path
 
@@ -137,6 +141,7 @@ def read_parquet(path, engine, **kwargs):
         File path
     engine : parquet engine
         supported are {'pyarrow', 'fastparquet'}
+        if None, will use the option: io.parquet.engine
     kwargs are passed to the engine
 
     Returns

diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py
@@ -44,10 +44,58 @@ def fp():
     return 'fastparquet'
 
 
-def test_invalid_engine():
+@pytest.fixture
+def df_compat():
+    return pd.DataFrame({'A': [1, 2, 3], 'B': 'foo'})
+
+
+def test_invalid_engine(df_compat):
 
     with pytest.raises(ValueError):
-        tm.makeDataFrame().to_parquet('foo', 'bar')
+        df_compat.to_parquet('foo', 'bar')
+
+
+def test_options_py(df_compat, pa):
+    # use the set option
+
+    df = df_compat
+    with tm.ensure_clean() as path:
+
+        with pd.option_context('io.parquet.engine', 'pyarrow'):
+            df.to_parquet(path)
+
+            result = read_parquet(path)
+            tm.assert_frame_equal(result, df)
+
+
+def test_options_fp(df_compat, fp):
+    # use the set option
+
+    df = df_compat
+    with tm.ensure_clean() as path:
+
+        with pd.option_context('io.parquet.engine', 'fastparquet'):
+            df.to_parquet(path)
+
+            result = read_parquet(path)
+            tm.assert_frame_equal(result, df)
+
+
+def test_cross_engine(df_compat, pa, fp):
+    # cross-compat with differing reading/writing engines
+
+    df = df_compat
+    with tm.ensure_clean() as path:
+        df.to_parquet(path, engine=pa)
+
+        result = read_parquet(path, engine=fp)
+        tm.assert_frame_equal(result, df)
+
+    with tm.ensure_clean() as path:
+        df.to_parquet(path, engine=fp)
+
+        result = read_parquet(path, engine=pa)
+        tm.assert_frame_equal(result, df)
 
 
 class Base(object):