Skip to content

Commit

Permalink
DOC: Fix docs for io/json/* (#41284)
Browse files Browse the repository at this point in the history
  • Loading branch information
ShaharNaveh authored May 4, 2021
1 parent a997bab commit 070341c
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 82 deletions.
1 change: 1 addition & 0 deletions ci/code_checks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then
pandas/core/window/ \
pandas/errors/ \
pandas/io/clipboard/ \
pandas/io/json/ \
pandas/io/excel/ \
pandas/io/parsers/ \
pandas/io/sas/ \
Expand Down
30 changes: 20 additions & 10 deletions pandas/io/json/_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -526,9 +526,13 @@ def read_json(
Encoding/decoding a Dataframe using ``'split'`` formatted JSON:
>>> df.to_json(orient='split')
'{{"columns":["col 1","col 2"],
"index":["row 1","row 2"],
"data":[["a","b"],["c","d"]]}}'
'\
{{\
"columns":["col 1","col 2"],\
"index":["row 1","row 2"],\
"data":[["a","b"],["c","d"]]\
}}\
'
>>> pd.read_json(_, orient='split')
col 1 col 2
row 1 a b
Expand All @@ -538,6 +542,7 @@ def read_json(
>>> df.to_json(orient='index')
'{{"row 1":{{"col 1":"a","col 2":"b"}},"row 2":{{"col 1":"c","col 2":"d"}}}}'
>>> pd.read_json(_, orient='index')
col 1 col 2
row 1 a b
Expand All @@ -556,13 +561,18 @@ def read_json(
Encoding with Table Schema
>>> df.to_json(orient='table')
'{{"schema": {{"fields": [{{"name": "index", "type": "string"}},
{{"name": "col 1", "type": "string"}},
{{"name": "col 2", "type": "string"}}],
"primaryKey": "index",
"pandas_version": "0.20.0"}},
"data": [{{"index": "row 1", "col 1": "a", "col 2": "b"}},
{{"index": "row 2", "col 1": "c", "col 2": "d"}}]}}'
'\
{{"schema":{{"fields":[\
{{"name":"index","type":"string"}},\
{{"name":"col 1","type":"string"}},\
{{"name":"col 2","type":"string"}}],\
"primaryKey":["index"],\
"pandas_version":"0.20.0"}},\
"data":[\
{{"index":"row 1","col 1":"a","col 2":"b"}},\
{{"index":"row 2","col 1":"c","col 2":"d"}}]\
}}\
'
"""
if orient == "table" and dtype:
raise ValueError("cannot pass both dtype and orient='table'")
Expand Down
136 changes: 83 additions & 53 deletions pandas/io/json/_normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,17 @@ def nested_to_record(
Examples
--------
IN[52]: nested_to_record(dict(flat1=1,dict1=dict(c=1,d=2),
nested=dict(e=dict(c=1,d=2),d=2)))
Out[52]:
{'dict1.c': 1,
'dict1.d': 2,
'flat1': 1,
'nested.d': 2,
'nested.e.c': 1,
'nested.e.d': 2}
>>> nested_to_record(
... dict(flat1=1, dict1=dict(c=1, d=2), nested=dict(e=dict(c=1, d=2), d=2))
... )
{\
'flat1': 1, \
'dict1.c': 1, \
'dict1.d': 2, \
'nested.e.c': 1, \
'nested.e.d': 2, \
'nested.d': 2\
}
"""
singleton = False
if isinstance(ds, dict):
Expand Down Expand Up @@ -208,18 +210,21 @@ def _simple_json_normalize(
Examples
--------
IN[52]: _simple_json_normalize({
'flat1': 1,
'dict1': {'c': 1, 'd': 2},
'nested': {'e': {'c': 1, 'd': 2}, 'd': 2}
})
Out[52]:
{'dict1.c': 1,
'dict1.d': 2,
'flat1': 1,
'nested.d': 2,
'nested.e.c': 1,
'nested.e.d': 2}
>>> _simple_json_normalize(
... {
... "flat1": 1,
... "dict1": {"c": 1, "d": 2},
... "nested": {"e": {"c": 1, "d": 2}, "d": 2},
... }
... )
{\
'flat1': 1, \
'dict1.c': 1, \
'dict1.d': 2, \
'nested.e.c': 1, \
'nested.e.d': 2, \
'nested.d': 2\
}
"""
normalised_json_object = {}
Expand Down Expand Up @@ -283,22 +288,30 @@ def _json_normalize(
Examples
--------
>>> data = [{'id': 1, 'name': {'first': 'Coleen', 'last': 'Volk'}},
... {'name': {'given': 'Mark', 'family': 'Regner'}},
... {'id': 2, 'name': 'Faye Raker'}]
>>> data = [
... {"id": 1, "name": {"first": "Coleen", "last": "Volk"}},
... {"name": {"given": "Mark", "family": "Regner"}},
... {"id": 2, "name": "Faye Raker"},
... ]
>>> pd.json_normalize(data)
id name.first name.last name.given name.family name
0 1.0 Coleen Volk NaN NaN NaN
1 NaN NaN NaN Mark Regner NaN
2 2.0 NaN NaN NaN NaN Faye Raker
>>> data = [{'id': 1,
... 'name': "Cole Volk",
... 'fitness': {'height': 130, 'weight': 60}},
... {'name': "Mark Reg",
... 'fitness': {'height': 130, 'weight': 60}},
... {'id': 2, 'name': 'Faye Raker',
... 'fitness': {'height': 130, 'weight': 60}}]
>>> data = [
... {
... "id": 1,
... "name": "Cole Volk",
... "fitness": {"height": 130, "weight": 60},
... },
... {"name": "Mark Reg", "fitness": {"height": 130, "weight": 60}},
... {
... "id": 2,
... "name": "Faye Raker",
... "fitness": {"height": 130, "weight": 60},
... },
... ]
>>> pd.json_normalize(data, max_level=0)
id name fitness
0 1.0 Cole Volk {'height': 130, 'weight': 60}
Expand All @@ -307,32 +320,49 @@ def _json_normalize(
Normalizes nested data up to level 1.
>>> data = [{'id': 1,
... 'name': "Cole Volk",
... 'fitness': {'height': 130, 'weight': 60}},
... {'name': "Mark Reg",
... 'fitness': {'height': 130, 'weight': 60}},
... {'id': 2, 'name': 'Faye Raker',
... 'fitness': {'height': 130, 'weight': 60}}]
>>> data = [
... {
... "id": 1,
... "name": "Cole Volk",
... "fitness": {"height": 130, "weight": 60},
... },
... {"name": "Mark Reg", "fitness": {"height": 130, "weight": 60}},
... {
... "id": 2,
... "name": "Faye Raker",
... "fitness": {"height": 130, "weight": 60},
... },
... ]
>>> pd.json_normalize(data, max_level=1)
id name fitness.height fitness.weight
0 1.0 Cole Volk 130 60
1 NaN Mark Reg 130 60
2 2.0 Faye Raker 130 60
>>> data = [{'state': 'Florida',
... 'shortname': 'FL',
... 'info': {'governor': 'Rick Scott'},
... 'counties': [{'name': 'Dade', 'population': 12345},
... {'name': 'Broward', 'population': 40000},
... {'name': 'Palm Beach', 'population': 60000}]},
... {'state': 'Ohio',
... 'shortname': 'OH',
... 'info': {'governor': 'John Kasich'},
... 'counties': [{'name': 'Summit', 'population': 1234},
... {'name': 'Cuyahoga', 'population': 1337}]}]
>>> result = pd.json_normalize(data, 'counties', ['state', 'shortname',
... ['info', 'governor']])
>>> data = [
... {
... "state": "Florida",
... "shortname": "FL",
... "info": {"governor": "Rick Scott"},
... "counties": [
... {"name": "Dade", "population": 12345},
... {"name": "Broward", "population": 40000},
... {"name": "Palm Beach", "population": 60000},
... ],
... },
... {
... "state": "Ohio",
... "shortname": "OH",
... "info": {"governor": "John Kasich"},
... "counties": [
... {"name": "Summit", "population": 1234},
... {"name": "Cuyahoga", "population": 1337},
... ],
... },
... ]
>>> result = pd.json_normalize(
... data, "counties", ["state", "shortname", ["info", "governor"]]
... )
>>> result
name population state shortname info.governor
0 Dade 12345 Florida FL Rick Scott
Expand All @@ -341,8 +371,8 @@ def _json_normalize(
3 Summit 1234 Ohio OH John Kasich
4 Cuyahoga 1337 Ohio OH John Kasich
>>> data = {'A': [1, 2]}
>>> pd.json_normalize(data, 'A', record_prefix='Prefix.')
>>> data = {"A": [1, 2]}
>>> pd.json_normalize(data, "A", record_prefix="Prefix.")
Prefix.0
0 1
1 2
Expand Down
43 changes: 24 additions & 19 deletions pandas/io/json/_table_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,21 +155,25 @@ def convert_json_field_to_pandas_type(field):
Examples
--------
>>> convert_json_field_to_pandas_type({'name': 'an_int',
'type': 'integer'})
>>> convert_json_field_to_pandas_type({"name": "an_int", "type": "integer"})
'int64'
>>> convert_json_field_to_pandas_type({'name': 'a_categorical',
'type': 'any',
'constraints': {'enum': [
'a', 'b', 'c']},
'ordered': True})
'CategoricalDtype(categories=['a', 'b', 'c'], ordered=True)'
>>> convert_json_field_to_pandas_type({'name': 'a_datetime',
'type': 'datetime'})
>>> convert_json_field_to_pandas_type(
... {
... "name": "a_categorical",
... "type": "any",
... "constraints": {"enum": ["a", "b", "c"]},
... "ordered": True,
... }
... )
CategoricalDtype(categories=['a', 'b', 'c'], ordered=True)
>>> convert_json_field_to_pandas_type({"name": "a_datetime", "type": "datetime"})
'datetime64[ns]'
>>> convert_json_field_to_pandas_type({'name': 'a_datetime_with_tz',
'type': 'datetime',
'tz': 'US/Central'})
>>> convert_json_field_to_pandas_type(
... {"name": "a_datetime_with_tz", "type": "datetime", "tz": "US/Central"}
... )
'datetime64[ns, US/Central]'
"""
typ = field["type"]
Expand Down Expand Up @@ -245,12 +249,13 @@ def build_table_schema(
... 'C': pd.date_range('2016-01-01', freq='d', periods=3),
... }, index=pd.Index(range(3), name='idx'))
>>> build_table_schema(df)
{'fields': [{'name': 'idx', 'type': 'integer'},
{'name': 'A', 'type': 'integer'},
{'name': 'B', 'type': 'string'},
{'name': 'C', 'type': 'datetime'}],
'pandas_version': '0.20.0',
'primaryKey': ['idx']}
{'fields': \
[{'name': 'idx', 'type': 'integer'}, \
{'name': 'A', 'type': 'integer'}, \
{'name': 'B', 'type': 'string'}, \
{'name': 'C', 'type': 'datetime'}], \
'primaryKey': ['idx'], \
'pandas_version': '0.20.0'}
"""
if index is True:
data = set_default_names(data)
Expand Down

0 comments on commit 070341c

Please sign in to comment.