Skip to content

Commit

Permalink
update docs as per review
Browse files Browse the repository at this point in the history
  • Loading branch information
jreback committed Apr 3, 2017
1 parent 8216486 commit 6a95c81
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 19 deletions.
8 changes: 7 additions & 1 deletion doc/source/io.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4529,7 +4529,13 @@ Several caveats.
- Non supported types include ``Period`` and actual python object types. These will raise a helpful error message
on an attempt at serialization.

See the documentation for `pyarrow <https://pyarrow.readthedocs.io/en/latest/` and `fastparquet <https://fastparquet.readthedocs.io/en/latest/necessary>`
See the documentation for `pyarrow <https://pyarrow.readthedocs.io/en/latest/`__ and `fastparquet <https://fastparquet.readthedocs.io/en/latest/>`__

.. note::

These engines are very similar and should read/write nearly identical parquet format files.
These libraries differ by having different underlying dependencies (``fastparquet`` by using ``numba``, while ``pyarrow`` uses a c-library).
TODO: differing options to write non-standard columns & null treatment

.. ipython:: python
Expand Down
4 changes: 3 additions & 1 deletion doc/source/options.rst
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,8 @@ io.hdf.default_format None default format writing format,
'table'
io.hdf.dropna_table True drop ALL nan rows when appending
to a table
io.parquet.engine pyarrow The engine to use as a default for
parquet reading and writing.
mode.chained_assignment warn Raise an exception, warn, or no
action if trying to use chained
assignment, The default is warn
Expand Down Expand Up @@ -538,4 +540,4 @@ Only ``'display.max_rows'`` are serialized and published.
.. ipython:: python
:suppress:
pd.reset_option('display.html.table_schema')
pd.reset_option('display.html.table_schema')
2 changes: 1 addition & 1 deletion pandas/core/config_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ def _register_xlsx(engine, other):
parquet_engine_doc = """
: string
The default parquet reader/writer engine. Available options:
None, 'pyarrow', 'fastparquet'
'pyarrow', 'fastparquet', the default is 'pyarrow'
"""

with cf.config_prefix('io.parquet'):
Expand Down
6 changes: 3 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1531,9 +1531,9 @@ def to_parquet(self, fname, engine=None, compression=None,
----------
fname : str
string file path
engine : parquet engine
supported are {'pyarrow', 'fastparquet'}
if None, will use the option: io.parquet.engine
engine : str, optional
The parquet engine, one of {'pyarrow', 'fastparquet'}
if None, will use the option: `io.parquet.engine`
compression : str, optional
compression method, includes {'gzip', 'snappy', 'brotli'}
kwargs passed to the engine
Expand Down
27 changes: 14 additions & 13 deletions pandas/io/parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,16 +81,16 @@ def read(self, path):

def to_parquet(df, path, engine=None, compression=None, **kwargs):
"""
Write a DataFrame to the pyarrow
Write a DataFrame to the parquet format.
Parameters
----------
df : DataFrame
path : string
File path
engine : parquet engine
supported are {'pyarrow', 'fastparquet'}
if None, will use the option: io.parquet.engine
engine : str, optional
The parquet engine, one of {'pyarrow', 'fastparquet'}
if None, will use the option: `io.parquet.engine`
compression : str, optional
compression method, includes {'gzip', 'snappy', 'brotli'}
kwargs are passed to the engine
Expand All @@ -110,15 +110,16 @@ def to_parquet(df, path, engine=None, compression=None, **kwargs):
# raise on anything else as we don't serialize the index

if not isinstance(df.index, Int64Index):
raise ValueError("parquet does not serializing {} "
raise ValueError("parquet does not support serializing {} "
"for the index; you can .reset_index()"
"to make the index into column(s)".format(
type(df.index)))

if not df.index.equals(RangeIndex.from_range(range(len(df)))):
raise ValueError("parquet does not serializing a non-default index "
"for the index; you can .reset_index()"
"to make the index into column(s)")
raise ValueError("parquet does not support serializing a "
"non-default index for the index; you "
"can .reset_index() to make the index "
"into column(s)")

if df.index.name is not None:
raise ValueError("parquet does not serialize index meta-data on a "
Expand All @@ -136,22 +137,22 @@ def to_parquet(df, path, engine=None, compression=None, **kwargs):

def read_parquet(path, engine=None, **kwargs):
"""
Load a parquet object from the file path
Load a parquet object from the file path, returning a DataFrame.
.. versionadded 0.20.0
Parameters
----------
path : string
File path
engine : parquet engine
supported are {'pyarrow', 'fastparquet'}
if None, will use the option: io.parquet.engine
engine : str, optional
The parquet engine, one of {'pyarrow', 'fastparquet'}
if None, will use the option: `io.parquet.engine`
kwargs are passed to the engine
Returns
-------
type of object stored in file
DataFrame
"""

Expand Down

0 comments on commit 6a95c81

Please sign in to comment.