update docs as per review

pandas-dev · Apr 3, 2017 · 6a95c81 · 6a95c81
1 parent 8216486
commit 6a95c81
Show file tree

Hide file tree

Showing 5 changed files with 28 additions and 19 deletions.
diff --git a/doc/source/io.rst b/doc/source/io.rst
@@ -4529,7 +4529,13 @@ Several caveats.
 - Non supported types include ``Period`` and actual python object types. These will raise a helpful error message
   on an attempt at serialization.
 
-See the documentation for `pyarrow <https://pyarrow.readthedocs.io/en/latest/` and `fastparquet <https://fastparquet.readthedocs.io/en/latest/necessary>`
+See the documentation for `pyarrow <https://pyarrow.readthedocs.io/en/latest/`__ and `fastparquet <https://fastparquet.readthedocs.io/en/latest/>`__
+
+.. note::
+
+   These engines are very similar and should read/write nearly identical parquet format files.
+   These libraries differ by having different underlying dependencies (``fastparquet`` by using ``numba``, while ``pyarrow`` uses a c-library).
+   TODO: differing options to write non-standard columns & null treatment
 
 .. ipython:: python
 

diff --git a/doc/source/options.rst b/doc/source/options.rst
@@ -416,6 +416,8 @@ io.hdf.default_format               None         default format writing format,
                                                  'table'
 io.hdf.dropna_table                 True         drop ALL nan rows when appending
                                                  to a table
+io.parquet.engine                   pyarrow      The engine to use as a default for
+                                                 parquet reading and writing.
 mode.chained_assignment             warn         Raise an exception, warn, or no
                                                  action if trying to use chained
                                                  assignment, The default is warn
@@ -538,4 +540,4 @@ Only ``'display.max_rows'`` are serialized and published.
 .. ipython:: python
     :suppress:
 
-    pd.reset_option('display.html.table_schema')
+    pd.reset_option('display.html.table_schema')
diff --git a/pandas/core/config_init.py b/pandas/core/config_init.py
@@ -471,7 +471,7 @@ def _register_xlsx(engine, other):
 parquet_engine_doc = """
 : string
     The default parquet reader/writer engine. Available options:
-    None, 'pyarrow', 'fastparquet'
+    'pyarrow', 'fastparquet', the default is 'pyarrow'
 """
 
 with cf.config_prefix('io.parquet'):

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1531,9 +1531,9 @@ def to_parquet(self, fname, engine=None, compression=None,
         ----------
         fname : str
             string file path
-        engine : parquet engine
-            supported are {'pyarrow', 'fastparquet'}
-            if None, will use the option: io.parquet.engine
+        engine : str, optional
+            The parquet engine, one of {'pyarrow', 'fastparquet'}
+            if None, will use the option: `io.parquet.engine`
         compression : str, optional
             compression method, includes {'gzip', 'snappy', 'brotli'}
         kwargs passed to the engine

diff --git a/pandas/io/parquet.py b/pandas/io/parquet.py
@@ -81,16 +81,16 @@ def read(self, path):
 
 def to_parquet(df, path, engine=None, compression=None, **kwargs):
     """
-    Write a DataFrame to the pyarrow
+    Write a DataFrame to the parquet format.
 
     Parameters
     ----------
     df : DataFrame
     path : string
         File path
-    engine : parquet engine
-        supported are {'pyarrow', 'fastparquet'}
-        if None, will use the option: io.parquet.engine
+    engine : str, optional
+        The parquet engine, one of {'pyarrow', 'fastparquet'}
+        if None, will use the option: `io.parquet.engine`
     compression : str, optional
         compression method, includes {'gzip', 'snappy', 'brotli'}
     kwargs are passed to the engine
@@ -110,15 +110,16 @@ def to_parquet(df, path, engine=None, compression=None, **kwargs):
     # raise on anything else as we don't serialize the index
 
     if not isinstance(df.index, Int64Index):
-        raise ValueError("parquet does not serializing {} "
+        raise ValueError("parquet does not support serializing {} "
                          "for the index; you can .reset_index()"
                          "to make the index into column(s)".format(
                              type(df.index)))
 
     if not df.index.equals(RangeIndex.from_range(range(len(df)))):
-        raise ValueError("parquet does not serializing a non-default index "
-                         "for the index; you can .reset_index()"
-                         "to make the index into column(s)")
+        raise ValueError("parquet does not support serializing a "
+                         "non-default index for the index; you "
+                         "can .reset_index() to make the index "
+                         "into column(s)")
 
     if df.index.name is not None:
         raise ValueError("parquet does not serialize index meta-data on a "
@@ -136,22 +137,22 @@ def to_parquet(df, path, engine=None, compression=None, **kwargs):
 
 def read_parquet(path, engine=None, **kwargs):
     """
-    Load a parquet object from the file path
+    Load a parquet object from the file path, returning a DataFrame.
 
     .. versionadded 0.20.0
 
     Parameters
     ----------
     path : string
         File path
-    engine : parquet engine
-        supported are {'pyarrow', 'fastparquet'}
-        if None, will use the option: io.parquet.engine
+    engine : str, optional
+        The parquet engine, one of {'pyarrow', 'fastparquet'}
+        if None, will use the option: `io.parquet.engine`
     kwargs are passed to the engine
 
     Returns
     -------
-    type of object stored in file
+    DataFrame
 
     """