From 3a0fbfd4899b1c9abec4f024d3cccc8970882691 Mon Sep 17 00:00:00 2001
From: Christopher Whelan <topherwhelan@gmail.com>
Date: Fri, 1 Feb 2019 12:56:05 -0800
Subject: [PATCH] PERF: use new to_records() argument in to_stata() (#25045)

---
 doc/source/whatsnew/v0.25.0.rst | 12 ++----------
 pandas/io/stata.py              | 20 +++++---------------
 2 files changed, 7 insertions(+), 25 deletions(-)

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
index 55b9957763ff6..09626be713c4f 100644
--- a/doc/source/whatsnew/v0.25.0.rst
+++ b/doc/source/whatsnew/v0.25.0.rst
@@ -23,14 +23,6 @@ Other Enhancements
 -
 -
 
-.. _whatsnew_0250.performance:
-
-Performance Improvements
-~~~~~~~~~~~~~~~~~~~~~~~~
- - Significant speedup in `SparseArray` initialization that benefits most operations, fixing performance regression introduced in v0.20.0 (:issue:`24985`)
-
-
-
 .. _whatsnew_0250.api_breaking:
 
 Backwards incompatible API changes
@@ -69,8 +61,8 @@ Removal of prior version deprecations/changes
 Performance Improvements
 ~~~~~~~~~~~~~~~~~~~~~~~~
 
--
--
+- Significant speedup in `SparseArray` initialization that benefits most operations, fixing performance regression introduced in v0.20.0 (:issue:`24985`)
+- `DataFrame.to_stata()` is now faster when outputting data with any string or non-native endian columns (:issue:`25045`)
 -
 
 
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index 1b0660171ecac..0bd084f4e5df7 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -2385,32 +2385,22 @@ def _prepare_data(self):
         data = self._convert_strls(data)
 
         # 3. Convert bad string data to '' and pad to correct length
-        dtypes = []
-        data_cols = []
-        has_strings = False
+        dtypes = {}
         native_byteorder = self._byteorder == _set_endianness(sys.byteorder)
         for i, col in enumerate(data):
             typ = typlist[i]
             if typ <= self._max_string_length:
-                has_strings = True
                 data[col] = data[col].fillna('').apply(_pad_bytes, args=(typ,))
                 stype = 'S{type}'.format(type=typ)
-                dtypes.append(('c' + str(i), stype))
-                string = data[col].str.encode(self._encoding)
-                data_cols.append(string.values.astype(stype))
+                dtypes[col] = stype
+                data[col] = data[col].str.encode(self._encoding).astype(stype)
             else:
-                values = data[col].values
                 dtype = data[col].dtype
                 if not native_byteorder:
                     dtype = dtype.newbyteorder(self._byteorder)
-                dtypes.append(('c' + str(i), dtype))
-                data_cols.append(values)
-        dtypes = np.dtype(dtypes)
+                dtypes[col] = dtype
 
-        if has_strings or not native_byteorder:
-            self.data = np.fromiter(zip(*data_cols), dtype=dtypes)
-        else:
-            self.data = data.to_records(index=False)
+        self.data = data.to_records(index=False, column_dtypes=dtypes)
 
     def _write_data(self):
         data = self.data