diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
index 246eab386b2ab4..a79eb127886d69 100644
--- a/doc/source/whatsnew/v0.23.0.txt
+++ b/doc/source/whatsnew/v0.23.0.txt
@@ -114,7 +114,7 @@ Previous Behavior:
     4    NaN
     dtype: float64
 
-Current Behavior
+Current Behavior:
 
 .. ipython:: python
 
@@ -139,7 +139,7 @@ Previous Behavior:
     3    2.5
     dtype: float64
 
-Current Behavior
+Current Behavior:
 
 .. ipython:: python
 
@@ -259,6 +259,52 @@ Convert to an xarray DataArray
 
    p.to_xarray()
 
+.. _whatsnew_0230.api_breaking.apply:
+
+Apply Changes
+~~~~~~~~~~~~~
+
+:func:`DataFrame.apply` was inconsistent when applying an arbitrary user-defined-function that returned a list-like with ``axis=1``. Several bugs and inconsistencies
+are resolved. If the applied function returns a Series, then pandas will return a DataFrame; otherwise a Series will be returned, this includes the case
+where a list-like (e.g. ``tuple`` or ``list`` is returned), (:issue:`16353`, :issue:`17437`, :issue:`17970`, :issue:`17348`, :issue:`17892`, :issue:`18573`,
+:issue:`17602`, :issue:`18775`, :issue:`18901`, :issue:`18919`)
+
+.. ipython:: python
+
+    df = pd.DataFrame(np.random.randn(6, 3), columns=['A', 'B', 'C'])
+    df
+
+Previous Behavior. If the returned shape happened to match the index, this would return a list-like.
+
+.. code-block:: python
+
+   In [3]: df.apply(lambda x: [1, 2, 3], axis=1)
+   Out[3]:
+      A  B  C
+   0  1  2  3
+   1  1  2  3
+   2  1  2  3
+   3  1  2  3
+   4  1  2  3
+   5  1  2  3
+
+   In [4]: df.apply(lambda x: [1, 2], axis=1)
+   Out[4]:
+   0    [1, 2]
+   1    [1, 2]
+   2    [1, 2]
+   3    [1, 2]
+   4    [1, 2]
+   5    [1, 2]
+   dtype: object
+
+
+New Behavior. The behavior is consistent. These will *always* return a ``Series``.
+
+.. ipython:: python
+
+    df.apply(lambda x: [1, 2, 3], axis=1)
+    df.apply(lambda x: [1, 2], axis=1)
 
 Build Changes
 ^^^^^^^^^^^^^
diff --git a/pandas/core/apply.py b/pandas/core/apply.py
index 4cdec54b9a07ab..193b1af9f4930b 100644
--- a/pandas/core/apply.py
+++ b/pandas/core/apply.py
@@ -19,18 +19,20 @@ def frame_apply(obj, func, axis=0, broadcast=False,
         klass = FrameColumnApply
 
     return klass(obj, func, broadcast=broadcast,
-                 raw=raw, reduce=reduce, args=args, kwds=kwds)
+                 raw=raw, reduce=reduce,
+                 args=args, kwds=kwds)
 
 
 class FrameApply(object):
 
-    def __init__(self, obj, func, broadcast, raw, reduce, args, kwds):
+    def __init__(self, obj, func, broadcast, raw, reduce,
+                 args, kwds):
         self.obj = obj
         self.broadcast = broadcast
         self.raw = raw
         self.reduce = reduce
-        self.args = args
 
+        self.args = args
         self.ignore_failures = kwds.pop('ignore_failures', False)
         self.kwds = kwds
 
@@ -94,6 +96,13 @@ def get_result(self):
         return self.apply_standard()
 
     def apply_empty_result(self):
+        """
+        we have an empty result; at least 1 axis is 0
+
+        we will try to apply the function to an empty
+        series in order to see if this is a reduction function
+        """
+
         from pandas import Series
         reduce = self.reduce
 
@@ -113,6 +122,8 @@ def apply_empty_result(self):
             return self.obj.copy()
 
     def apply_raw(self):
+        """ apply to the values as a numpy array """
+
         try:
             result = reduction.reduce(self.values, self.f, axis=self.axis)
         except Exception:
@@ -207,19 +218,57 @@ def wrap_results(self, results, res_index, res_columns):
         from pandas import Series
 
         if len(results) > 0 and is_sequence(results[0]):
-            if not isinstance(results[0], Series):
-                index = res_columns
+
+            # map to rows
+            if self.axis == 0:
+                result = self.obj._constructor(data=results)
+
+                if not isinstance(results[0], Series):
+                    try:
+                        result.index = res_columns
+                    except ValueError:
+                        pass
+
+                try:
+                    result.columns = res_index
+                except ValueError:
+                    pass
+
+            # map to columns
             else:
-                index = None
 
-            result = self.obj._constructor(data=results, index=index)
-            result.columns = res_index
+                def infer_to_same_shape():
+                    result = self.obj._constructor(data=results)
+                    result = result.T
+
+                    # try to assign the result indices;
+                    # this may fail, if so we have
+                    # received an invalid return shape
+                    try:
+                        result.index = res_index
+                    except ValueError:
+                        pass
+
+                    try:
+                        result.columns = res_columns
+                    except ValueError:
+                        pass
+
+                    # infer dtypes
+                    result = result.infer_objects()
 
-            if self.axis == 1:
-                result = result.T
-            result = result._convert(
-                datetime=True, timedelta=True, copy=False)
+                    return result
 
+                # we have a non-series and don't want inference
+                if not isinstance(results[0], Series):
+                    result = Series(results)
+                    result.index = res_index
+
+                # we may want to infer results
+                else:
+                    result = infer_to_same_shape()
+
+        # dict of scalars
         else:
 
             result = Series(results)
@@ -270,16 +319,6 @@ def result_columns(self):
 class FrameColumnApply(FrameApply):
     axis = 1
 
-    def __init__(self, obj, func, broadcast, raw, reduce, args, kwds):
-        super(FrameColumnApply, self).__init__(obj, func, broadcast,
-                                               raw, reduce, args, kwds)
-
-        # skip if we are mixed datelike and trying reduce across axes
-        # GH6125
-        if self.reduce:
-            if self.obj._is_mixed_type and self.obj._is_datelike_mixed_type:
-                self.reduce = False
-
     def apply_broadcast(self):
         return self._apply_broadcast(self.obj.T).T
 
diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py
index e0fc6c470fe573..68172ff0a86b1e 100644
--- a/pandas/tests/frame/test_apply.py
+++ b/pandas/tests/frame/test_apply.py
@@ -350,11 +350,10 @@ def test_apply_attach_name(self):
 
         result = self.frame.apply(lambda x: np.repeat(x.name, len(x)),
                                   axis=1)
-        expected = DataFrame(np.tile(self.frame.index,
-                                     (len(self.frame.columns), 1)).T,
-                             index=self.frame.index,
-                             columns=self.frame.columns)
-        assert_frame_equal(result, expected)
+        expected = Series(np.repeat(t[0], len(self.frame.columns))
+                          for t in self.frame.itertuples())
+        expected.index = self.frame.index
+        assert_series_equal(result, expected)
 
     def test_apply_multi_index(self):
         s = DataFrame([[1, 2], [3, 4], [5, 6]])
@@ -367,10 +366,10 @@ def test_apply_dict(self):
 
         # GH 8735
         A = DataFrame([['foo', 'bar'], ['spam', 'eggs']])
-        A_dicts = pd.Series([dict([(0, 'foo'), (1, 'spam')]),
-                             dict([(0, 'bar'), (1, 'eggs')])])
+        A_dicts = Series([dict([(0, 'foo'), (1, 'spam')]),
+                          dict([(0, 'bar'), (1, 'eggs')])])
         B = DataFrame([[0, 1], [2, 3]])
-        B_dicts = pd.Series([dict([(0, 0), (1, 2)]), dict([(0, 1), (1, 3)])])
+        B_dicts = Series([dict([(0, 0), (1, 2)]), dict([(0, 1), (1, 3)])])
         fn = lambda x: x.to_dict()
 
         for df, dicts in [(A, A_dicts), (B, B_dicts)]:
@@ -482,6 +481,141 @@ def test_apply_non_numpy_dtype(self):
         assert_frame_equal(result, df)
 
 
+class TestInferOutputShape(object):
+    # the user has supplied an opaque UDF where
+    # they are transforming the input that requires
+    # us to infer the output
+
+    def test_infer_row_shape(self):
+        # gh-17437
+        # if row shape is changing, infer it
+        df = pd.DataFrame(np.random.rand(10, 2))
+        result = df.apply(np.fft.fft, axis=0)
+        assert result.shape == (10, 2)
+
+        result = df.apply(np.fft.rfft, axis=0)
+        assert result.shape == (6, 2)
+
+    def test_with_dictlike_columns(self):
+        # gh 17602
+
+        df = DataFrame([[1, 2], [1, 2]], columns=['a', 'b'])
+        result = df.apply(lambda x: {'s': x['a'] + x['b']}, 1)
+        expected = Series([{'s': 3} for t in df.itertuples()])
+        assert_series_equal(result, expected)
+
+        df['tm'] = [pd.Timestamp('2017-05-01 00:00:00'),
+                    pd.Timestamp('2017-05-02 00:00:00')]
+        assert_series_equal(result, expected)
+
+        # compose a series
+        result = (df['a'] + df['b']).apply(lambda x: {'s': x})
+        expected = Series([{'s': 3}, {'s': 3}])
+        assert_series_equal(result, expected)
+
+        # gh-18775
+        df = DataFrame()
+        df["author"] = ["X", "Y", "Z"]
+        df["publisher"] = ["BBC", "NBC", "N24"]
+        df["date"] = pd.to_datetime(['17-10-2010 07:15:30',
+                                     '13-05-2011 08:20:35',
+                                     '15-01-2013 09:09:09'])
+        result = df.apply(lambda x: {}, axis=1)
+        expected = Series([{}, {}, {}])
+        assert_series_equal(result, expected)
+
+    def test_with_listlike_columns(self):
+        # gh-17348
+        df = DataFrame({'a': Series(np.random.randn(4)),
+                        'b': ['a', 'list', 'of', 'words'],
+                        'ts': date_range('2016-10-01', periods=4, freq='H')})
+
+        result = df[['a', 'b']].apply(tuple, axis=1)
+        expected = Series([t[1:] for t in df[['a', 'b']].itertuples()])
+        assert_series_equal(result, expected)
+
+        result = df[['a', 'ts']].apply(tuple, axis=1)
+        expected = Series([t[1:] for t in df[['a', 'ts']].itertuples()])
+        assert_series_equal(result, expected)
+
+        # gh-18919
+        df = DataFrame({'x': Series([['a', 'b'], ['q']]),
+                        'y': Series([['z'], ['q', 't']])})
+        df.index = MultiIndex.from_tuples([('i0', 'j0'), ('i1', 'j1')])
+
+        result = df.apply(
+            lambda row: [el for el in row['x'] if el in row['y']],
+            axis=1)
+        expected = Series([[], ['q']], index=df.index)
+        assert_series_equal(result, expected)
+
+    def test_infer_output_shape_columns(self):
+        # gh-18573
+
+        df = DataFrame({'number': [1., 2.],
+                        'string': ['foo', 'bar'],
+                        'datetime': [pd.Timestamp('2017-11-29 03:30:00'),
+                                     pd.Timestamp('2017-11-29 03:45:00')]})
+        result = df.apply(lambda row: (row.number, row.string), axis=1)
+        expected = Series([t[2:] for t in df.itertuples()])
+        assert_series_equal(result, expected)
+
+    def test_infer_output_shape_listlike_columns(self):
+        # gh-16353
+
+        df = DataFrame(np.random.randn(6, 3), columns=['A', 'B', 'C'])
+
+        result = df.apply(lambda x: [1, 2, 3], axis=1)
+        expected = Series([[1, 2, 3] for t in df.itertuples()])
+        assert_series_equal(result, expected)
+
+        result = df.apply(lambda x: [1, 2], axis=1)
+        expected = Series([[1, 2] for t in df.itertuples()])
+        assert_series_equal(result, expected)
+
+        # gh-17970
+        df = DataFrame({"a": [1, 2, 3]}, index=list('abc'))
+
+        result = df.apply(lambda row: np.ones(1), axis=1)
+        expected = Series([np.ones(1) for t in df.itertuples()],
+                          index=df.index)
+        assert_series_equal(result, expected)
+
+        result = df.apply(lambda row: np.ones(2), axis=1)
+        expected = Series([np.ones(2) for t in df.itertuples()],
+                          index=df.index)
+        assert_series_equal(result, expected)
+
+        # gh-17892
+        df = pd.DataFrame({'a': [pd.Timestamp('2010-02-01'),
+                                 pd.Timestamp('2010-02-04'),
+                                 pd.Timestamp('2010-02-05'),
+                                 pd.Timestamp('2010-02-06')],
+                           'b': [9, 5, 4, 3],
+                           'c': [5, 3, 4, 2],
+                           'd': [1, 2, 3, 4]})
+
+        def fun(x):
+            return (1, 2)
+
+        result = df.apply(fun, axis=1)
+        expected = Series([(1, 2) for t in df.itertuples()])
+        assert_series_equal(result, expected)
+
+    def test_consistent_coerce_for_shapes(self):
+        # we want column names to NOT be propagated
+        # just because the shape matches the input shape
+        df = DataFrame(np.random.randn(4, 3), columns=['A', 'B', 'C'])
+
+        result = df.apply(lambda x: [1, 2, 3], axis=1)
+        expected = Series([[1, 2, 3] for t in df.itertuples()])
+        assert_series_equal(result, expected)
+
+        result = df.apply(lambda x: [1, 2], axis=1)
+        expected = Series([[1, 2] for t in df.itertuples()])
+        assert_series_equal(result, expected)
+
+
 def zip_frames(*frames):
     """
     take a list of frames, zip the columns together for each
@@ -659,13 +793,13 @@ def test_non_callable_aggregates(self):
 
         # Function aggregate
         result = df.agg({'A': 'count'})
-        expected = pd.Series({'A': 2})
+        expected = Series({'A': 2})
 
         assert_series_equal(result, expected)
 
         # Non-function aggregate
         result = df.agg({'A': 'size'})
-        expected = pd.Series({'A': 3})
+        expected = Series({'A': 3})
 
         assert_series_equal(result, expected)