pandas-dev · GYHHAHA · Jul 13, 2022 · Jul 13, 2022 · Jul 13, 2022 · Jul 13, 2022
diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst
@@ -1047,6 +1047,7 @@ I/O
 - Bug in :func:`read_parquet` with ``use_nullable_dtypes=True`` where ``float64`` dtype was returned instead of nullable ``Float64`` dtype (:issue:`45694`)
 - Bug in :meth:`DataFrame.to_json` where ``PeriodDtype`` would not make the serialization roundtrip when read back with :meth:`read_json` (:issue:`44720`)
 - Bug in :func:`read_xml` when reading XML files with Chinese character tags and would raise ``XMLSyntaxError`` (:issue:`47902`)
+- Bug in :func:`json_normalize` raised boardcasting error with list-like metadata (:issue:`37782`, :issue:`47182`)
 
 Period
 ^^^^^^

diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py
@@ -22,6 +22,8 @@
 )
 from pandas.util._decorators import deprecate
 
+from pandas.core.dtypes.common import is_list_like
+
 import pandas as pd
 from pandas import DataFrame
 
@@ -531,7 +533,14 @@ def _recursive_extract(data, path, seen_meta, level=0):
             raise ValueError(
                 f"Conflicting metadata name {k}, need distinguishing prefix "
             )
-        result[k] = np.array(v, dtype=object).repeat(lengths)
+        if v and is_list_like(v[0]):
+            out = []
+            for item, repeat in zip(v, lengths):
+                for _ in range(repeat):
+                    out.append(item)
+        else:
+            out = np.array(v, dtype=object).repeat(lengths).tolist()
+        result[k] = out
     return result
 
 

diff --git a/pandas/tests/io/json/test_normalize.py b/pandas/tests/io/json/test_normalize.py
@@ -546,7 +546,7 @@ def test_meta_non_iterable(self):
 
         result = json_normalize(json.loads(data), record_path=["data"], meta=["id"])
         expected = DataFrame(
-            {"one": [1], "two": [2], "id": np.array([99], dtype=object)}
+            {"one": [1], "two": [2], "id": np.array([99], dtype="int64")}
         )
         tm.assert_frame_equal(result, expected)
 
@@ -640,9 +640,7 @@ def test_missing_nested_meta(self):
         )
         ex_data = [[1, "foo", np.nan], [2, "foo", np.nan]]
         columns = ["rec", "meta", "nested_meta.leaf"]
-        expected = DataFrame(ex_data, columns=columns).astype(
-            {"nested_meta.leaf": object}
-        )
+        expected = DataFrame(ex_data, columns=columns)
         tm.assert_frame_equal(result, expected)
 
         # If errors="raise" and nested metadata is null, we should raise with the
@@ -891,3 +889,32 @@ def test_series_non_zero_index(self):
             }
         )
         tm.assert_frame_equal(result, expected)
+
+    def test_list_type_meta_data(self):
+        # GH 37782
+        data = {"values": [1, 2, 3], "metadata": {"listdata": [1, 2]}}
+        result = json_normalize(
+            data=data,
+            record_path=["values"],
+            meta=[["metadata", "listdata"]],
+        )
+        expected = DataFrame(
+            {
+                0: [1, 2, 3],
+                "metadata.listdata": [[1, 2], [1, 2], [1, 2]],
+            }
+        )
+        tm.assert_frame_equal(result, expected)
+
+    def test_empty_list_data(self):
+        # GH 47182
+        data = [
+            {"id": 1, "path": [{"a": 3, "b": 4}], "emptyList": []},
+            {"id": 2, "path": [{"a": 5, "b": 6}], "emptyList": []},
+        ]
+        result = json_normalize(data, "path", ["id", "emptyList"])
+        expected = DataFrame(
+            [[3, 4, 1, []], [5, 6, 2, []]],
+            columns=["a", "b", "id", "emptyList"],
+        )
+        tm.assert_frame_equal(result, expected)