pandas-dev · datapythonista · Mar 3, 2023 · Mar 4, 2023 · Mar 4, 2023 · Mar 4, 2023
diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst
@@ -281,6 +281,7 @@ Other enhancements
 - Added new argument ``engine`` to :func:`read_json` to support parsing JSON with pyarrow by specifying ``engine="pyarrow"`` (:issue:`48893`)
 - Added support for SQLAlchemy 2.0 (:issue:`40686`)
 - :class:`Index` set operations :meth:`Index.union`, :meth:`Index.intersection`, :meth:`Index.difference`, and :meth:`Index.symmetric_difference` now support ``sort=True``, which will always return a sorted result, unlike the default ``sort=None`` which does not sort in some cases (:issue:`25151`)
+- Added methods :meth:`DataFrame.from_pyarrow` and :meth:`DataFrame.to_pyarrow` to convert data from and to PyArrow tables (:issue:`51760`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_200.notable_bug_fixes:

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -171,6 +171,10 @@
     PeriodArray,
     TimedeltaArray,
 )
+from pandas.core.arrays.arrow import (
+    ArrowDtype,
+    ArrowExtensionArray,
+)
 from pandas.core.arrays.sparse import SparseFrameAccessor
 from pandas.core.construction import (
     ensure_wrapped_if_datetimelike,
@@ -1763,6 +1767,79 @@ def create_index(indexlist, namelist):
             columns = create_index(data["columns"], data["column_names"])
             return cls(realdata, index=index, columns=columns, dtype=dtype)
 
+    @classmethod
+    def from_pyarrow(
+        cls,
+        table,
+    ) -> DataFrame:
+        """
+        Convert a pyarrow table to DataFrame.
+
+        Parameters
+        ----------
+        table: pyarrow.Table
+
+        Returns
+        -------
+        DataFrame
+
+        See Also
+        --------
+        DataFrame.to_pyarrow
+        pyarrow.Table.to_pandas
+
+        Notes
+        -----
+        The conversion is zero-copy, and the resulting DataFrame uses
+        Arrow-backend data types.
+
+        For customization of the conversion use the
+        `to_pandas <https://arrow.apache.org/docs/python/generated/\
+pyarrow.Table.html#pyarrow.Table.to_pandas>`__
+        method of the Table object.
+
+        Examples
+        --------
+        >>> import pyarrow as pa
+        >>> table = pa.table([pa.array([1, 2, 3])], names=["my_column"])
+        >>> pd.DataFrame.from_pyarrow(table)
+           my_column
+        0          1
+        1          2
+        2          3
+        """
+        return table.to_pandas(types_mapper=ArrowDtype)
+
+    def to_pyarrow(
+        self,
+    ):
+        """
+        Convert the DataFrame to a pyarrow.Table object.
+
+        Returns
+        -------
+        pyarrow.Table
+
+        See Also
+        --------
+        DataFrame.from_pyarrow
+        pyarrow.Table.from_pandas
+
+        Examples
+        --------
+        >>> df = pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
+        >>> df.to_pyarrow()
+        pyarrow.Table
+        col1: int64
+        col2: int64
+        ----
+        col1: [[1,2]]
+        col2: [[3,4]]
+        """
+        import pyarrow as pa
+
+        return pa.Table.from_pandas(self)
+
     def to_numpy(
         self,
         dtype: npt.DTypeLike | None = None,

diff --git a/pandas/tests/frame/constructors/test_pyarrow.py b/pandas/tests/frame/constructors/test_pyarrow.py
@@ -0,0 +1,92 @@
+import pytest
+
+from pandas import (
+    NA,
+    DataFrame,
+    Series,
+)
+from pandas.core.arrays.arrow import ArrowDtype
+
+pa = pytest.importorskip("pyarrow", minversion="7.0.0")
+import pandas._testing as tm
+
+
+@pytest.fixture(scope="module")
+def sample_dataframe_numpy_backend():
+    return DataFrame(
+        {
+            "u8": Series([1, 2, 3, NA], dtype="UInt8"),
+            "f64": Series([float("NaN"), 1.0, 2.0, 3.0], dtype="float64"),
+            "s": Series(["foo", "bar", None, "foobar"], dtype="object"),
+        }
+    )
+
+
+@pytest.fixture(scope="module")
+def sample_dataframe_pyarrow_backend():
+    return DataFrame(
+        {
+            "u8": Series([1, 2, 3, NA], dtype="uint8[pyarrow]"),
+            "f64": Series([NA, 1.0, 2.0, 3.0], dtype="float64[pyarrow]"),
+            "s": Series(["foo", "bar", NA, "foobar"], dtype="string[pyarrow]"),
+        }
+    )
+
+
+@pytest.fixture(scope="module")
+def sample_pyarrow_table():
+    return pa.table(
+        [
+            pa.array([1, 2, 3, None], type=pa.uint8()),
+            pa.array([None, 1.0, 2.0, 3.0], type=pa.float64()),
+            pa.array(["foo", "bar", None, "foobar"], type=pa.string()),
+        ],
+        names=["u8", "f64", "s"],
+    )
+
+
+class TestPyArrow:
+    @pytest.mark.parametrize(
+        "column,dtype", [("u8", pa.uint8()), ("f64", pa.float64()), ("s", pa.string())]
+    )
+    def test_from_pyarrow_uses_right_pandas_types(
+        self, sample_pyarrow_table, column, dtype
+    ):
+        result = DataFrame.from_pyarrow(sample_pyarrow_table)
+        assert result[column].dtype == ArrowDtype(dtype)
+
+    @pytest.mark.parametrize("column", ["u8", "f64", "s"])
+    def test_from_pyarrow_keeps_correct_data(self, sample_pyarrow_table, column):
+        result = DataFrame.from_pyarrow(sample_pyarrow_table)
+        assert result[column]._data.array._data == sample_pyarrow_table[column]
+
+    @pytest.mark.parametrize("column", ["u8", "f64", "s"])
+    def test_from_pyarrow_does_not_copy_memory(self, sample_pyarrow_table, column):
+        result = DataFrame.from_pyarrow(sample_pyarrow_table)
+
+        result_buffers = result[column]._data.array._data.chunks[0].buffers()
+        expected_buffers = sample_pyarrow_table[column].chunks[0].buffers()
+
+        for result_buffer, expected_buffer in zip(result_buffers, expected_buffers):
+            if result_buffer is None and expected_buffer is None:
+                continue
+            assert result_buffer.address == expected_buffer.address
+            assert result_buffer.size == expected_buffer.size
+
+    def test_to_pyarrow_numpy_backend(
+        self, sample_dataframe_numpy_backend, sample_pyarrow_table
+    ):
+        result = sample_dataframe_numpy_backend.to_pyarrow()
+        assert result == sample_pyarrow_table
+
+    def test_to_pyarrow_pyarrow_backend(
+        self, sample_dataframe_pyarrow_backend, sample_pyarrow_table
+    ):
+        result = sample_dataframe_pyarrow_backend.to_pyarrow()
+        assert result == sample_pyarrow_table
+
+    def test_pyarrow_roundtrip(
+        self, sample_dataframe_numpy_backend, sample_dataframe_pyarrow_backend
+    ):
+        result = DataFrame.from_pyarrow(sample_dataframe_numpy_backend.to_pyarrow())
+        tm.assert_frame_equal(result, sample_dataframe_pyarrow_backend)
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
@@ -60,6 +60,7 @@
     SparseArray,
     TimedeltaArray,
 )
+from pandas.core.arrays.arrow import ArrowDtype
 
 MIXED_FLOAT_DTYPES = ["float16", "float32", "float64"]
 MIXED_INT_DTYPES = [