diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index 41b6519eb740f..a56212328f5c3 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -850,7 +850,8 @@ Bug Fixes
 - Bug in ``SparseSeries.reindex`` on single level with list of length 1 (:issue:`15447`)
 
 
-- Bug in  groupby operations with timedelta64 when passing ``numeric_only=False`` (:issue:`5724`)
+- Bug in groupby operations with timedelta64 when passing ``numeric_only=False`` (:issue:`5724`)
+- Bug in ``groupby.apply()`` coercing ``object`` dtypes to numeric types, when not all values were numeric (:issue:`14423`, :issue:`15421`, :issue:`15670`)
 
 
 - Bug in ``DataFrame.to_html`` with ``index=False`` and ``max_rows`` raising in ``IndexError`` (:issue:`14998`)
diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
index a10be078a8f96..7a017ffae284c 100644
--- a/pandas/core/groupby.py
+++ b/pandas/core/groupby.py
@@ -10,6 +10,7 @@
     zip, range, lzip,
     callable, map
 )
+
 from pandas import compat
 from pandas.compat.numpy import function as nv
 from pandas.compat.numpy import _np_version_under1p8
@@ -3424,6 +3425,7 @@ def _decide_output_index(self, output, labels):
 
     def _wrap_applied_output(self, keys, values, not_indexed_same=False):
         from pandas.core.index import _all_indexes_same
+        from pandas.tools.util import to_numeric
 
         if len(keys) == 0:
             return DataFrame(index=keys)
@@ -3566,7 +3568,8 @@ def first_non_None_value(values):
                 # as we are stacking can easily have object dtypes here
                 so = self._selected_obj
                 if (so.ndim == 2 and so.dtypes.apply(is_datetimelike).any()):
-                    result = result._convert(numeric=True)
+                    result = result.apply(
+                        lambda x: to_numeric(x, errors='ignore'))
                     date_cols = self._selected_obj.select_dtypes(
                         include=['datetime', 'timedelta']).columns
                     date_cols = date_cols.intersection(result.columns)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index d7fa3beda0abf..c25974c94bfd1 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -4314,6 +4314,54 @@ def test_cummin_cummax(self):
         expected = pd.Series([1, 2, 1], name='b')
         tm.assert_series_equal(result, expected)
 
+    def test_apply_numeric_coercion_when_datetime(self):
+        # In the past, group-by/apply operations have been over-eager
+        # in converting dtypes to numeric, in the presence of datetime
+        # columns.  Various GH issues were filed, the reproductions
+        # for which are here.
+
+        # GH 15670
+        df = pd.DataFrame({'Number': [1, 2],
+                           'Date': ["2017-03-02"] * 2,
+                           'Str': ["foo", "inf"]})
+        expected = df.groupby(['Number']).apply(lambda x: x.iloc[0])
+        df.Date = pd.to_datetime(df.Date)
+        result = df.groupby(['Number']).apply(lambda x: x.iloc[0])
+        tm.assert_series_equal(result['Str'], expected['Str'])
+
+        # GH 15421
+        df = pd.DataFrame({'A': [10, 20, 30],
+                           'B': ['foo', '3', '4'],
+                           'T': [pd.Timestamp("12:31:22")] * 3})
+
+        def get_B(g):
+            return g.iloc[0][['B']]
+        result = df.groupby('A').apply(get_B)['B']
+        expected = df.B
+        expected.index = df.A
+        tm.assert_series_equal(result, expected)
+
+        # GH 14423
+        def predictions(tool):
+            out = pd.Series(index=['p1', 'p2', 'useTime'], dtype=object)
+            if 'step1' in list(tool.State):
+                out['p1'] = str(tool[tool.State == 'step1'].Machine.values[0])
+            if 'step2' in list(tool.State):
+                out['p2'] = str(tool[tool.State == 'step2'].Machine.values[0])
+                out['useTime'] = str(
+                    tool[tool.State == 'step2'].oTime.values[0])
+            return out
+        df1 = pd.DataFrame({'Key': ['B', 'B', 'A', 'A'],
+                            'State': ['step1', 'step2', 'step1', 'step2'],
+                            'oTime': ['', '2016-09-19 05:24:33',
+                                      '', '2016-09-19 23:59:04'],
+                            'Machine': ['23', '36L', '36R', '36R']})
+        df2 = df1.copy()
+        df2.oTime = pd.to_datetime(df2.oTime)
+        expected = df1.groupby('Key').apply(predictions).p1
+        result = df2.groupby('Key').apply(predictions).p1
+        tm.assert_series_equal(expected, result)
+
 
 def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
     tups = lmap(tuple, df[keys].values)