Skip to content

Commit

Permalink
Json fix normalize (#49920)
Browse files Browse the repository at this point in the history
* added failing test

* fix + whatsnew

* Refactor for readability

* Better compat
  • Loading branch information
WillAyd authored Nov 28, 2022
1 parent 0d2c579 commit cd58f3b
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 5 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -717,6 +717,7 @@ I/O
- Bug in :func:`read_csv` for a single-line csv with fewer columns than ``names`` raised :class:`.errors.ParserError` with ``engine="c"`` (:issue:`47566`)
- Bug in :func:`DataFrame.to_string` with ``header=False`` that printed the index name on the same line as the first row of the data (:issue:`49230`)
- Fixed memory leak which stemmed from the initialization of the internal JSON module (:issue:`49222`)
- Fixed issue where :func:`json_normalize` would incorrectly remove leading characters from column names that matched the ``sep`` argument (:issue:`49861`)
-

Period
Expand Down
16 changes: 11 additions & 5 deletions pandas/io/json/_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
defaultdict,
)
import copy
import sys
from typing import (
Any,
DefaultDict,
Expand Down Expand Up @@ -148,13 +149,18 @@ def _normalise_json(
if isinstance(data, dict):
for key, value in data.items():
new_key = f"{key_string}{separator}{key}"

if not key_string:
if sys.version_info < (3, 9):
from pandas.util._str_methods import removeprefix

new_key = removeprefix(new_key, separator)
else:
new_key = new_key.removeprefix(separator)

_normalise_json(
data=value,
# to avoid adding the separator to the start of every key
# GH#43831 avoid adding key if key_string blank
key_string=new_key
if new_key[: len(separator)] != separator
else new_key[len(separator) :],
key_string=new_key,
normalized_dict=normalized_dict,
separator=separator,
)
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/io/json/test_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,14 @@ def generator_data():

tm.assert_frame_equal(result, expected)

def test_top_column_with_leading_underscore(self):
# 49861
data = {"_id": {"a1": 10, "l2": {"l3": 0}}, "gg": 4}
result = json_normalize(data, sep="_")
expected = DataFrame([[4, 10, 0]], columns=["gg", "_id_a1", "_id_l2_l3"])

tm.assert_frame_equal(result, expected)


class TestNestedToRecord:
def test_flat_stays_flat(self):
Expand Down

0 comments on commit cd58f3b

Please sign in to comment.