From 070341cf4958652343f798c74c04a8c15de2fd04 Mon Sep 17 00:00:00 2001 From: Shahar Naveh <50263213+ShaharNaveh@users.noreply.github.com> Date: Wed, 5 May 2021 00:55:07 +0300 Subject: [PATCH] DOC: Fix docs for io/json/* (#41284) --- ci/code_checks.sh | 1 + pandas/io/json/_json.py | 30 ++++--- pandas/io/json/_normalize.py | 136 +++++++++++++++++++------------- pandas/io/json/_table_schema.py | 43 +++++----- 4 files changed, 128 insertions(+), 82 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 8e876eebf93ad..7cc171330e01a 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -140,6 +140,7 @@ if [[ -z "$CHECK" || "$CHECK" == "doctests" ]]; then pandas/core/window/ \ pandas/errors/ \ pandas/io/clipboard/ \ + pandas/io/json/ \ pandas/io/excel/ \ pandas/io/parsers/ \ pandas/io/sas/ \ diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index b7493ebeadf34..259850e9a7233 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -526,9 +526,13 @@ def read_json( Encoding/decoding a Dataframe using ``'split'`` formatted JSON: >>> df.to_json(orient='split') - '{{"columns":["col 1","col 2"], - "index":["row 1","row 2"], - "data":[["a","b"],["c","d"]]}}' + '\ +{{\ +"columns":["col 1","col 2"],\ +"index":["row 1","row 2"],\ +"data":[["a","b"],["c","d"]]\ +}}\ +' >>> pd.read_json(_, orient='split') col 1 col 2 row 1 a b @@ -538,6 +542,7 @@ def read_json( >>> df.to_json(orient='index') '{{"row 1":{{"col 1":"a","col 2":"b"}},"row 2":{{"col 1":"c","col 2":"d"}}}}' + >>> pd.read_json(_, orient='index') col 1 col 2 row 1 a b @@ -556,13 +561,18 @@ def read_json( Encoding with Table Schema >>> df.to_json(orient='table') - '{{"schema": {{"fields": [{{"name": "index", "type": "string"}}, - {{"name": "col 1", "type": "string"}}, - {{"name": "col 2", "type": "string"}}], - "primaryKey": "index", - "pandas_version": "0.20.0"}}, - "data": [{{"index": "row 1", "col 1": "a", "col 2": "b"}}, - {{"index": "row 2", "col 1": "c", "col 2": "d"}}]}}' + '\ +{{"schema":{{"fields":[\ +{{"name":"index","type":"string"}},\ +{{"name":"col 1","type":"string"}},\ +{{"name":"col 2","type":"string"}}],\ +"primaryKey":["index"],\ +"pandas_version":"0.20.0"}},\ +"data":[\ +{{"index":"row 1","col 1":"a","col 2":"b"}},\ +{{"index":"row 2","col 1":"c","col 2":"d"}}]\ +}}\ +' """ if orient == "table" and dtype: raise ValueError("cannot pass both dtype and orient='table'") diff --git a/pandas/io/json/_normalize.py b/pandas/io/json/_normalize.py index 3d07b9d98f9a9..5927d6482d3b0 100644 --- a/pandas/io/json/_normalize.py +++ b/pandas/io/json/_normalize.py @@ -70,15 +70,17 @@ def nested_to_record( Examples -------- - IN[52]: nested_to_record(dict(flat1=1,dict1=dict(c=1,d=2), - nested=dict(e=dict(c=1,d=2),d=2))) - Out[52]: - {'dict1.c': 1, - 'dict1.d': 2, - 'flat1': 1, - 'nested.d': 2, - 'nested.e.c': 1, - 'nested.e.d': 2} + >>> nested_to_record( + ... dict(flat1=1, dict1=dict(c=1, d=2), nested=dict(e=dict(c=1, d=2), d=2)) + ... ) + {\ +'flat1': 1, \ +'dict1.c': 1, \ +'dict1.d': 2, \ +'nested.e.c': 1, \ +'nested.e.d': 2, \ +'nested.d': 2\ +} """ singleton = False if isinstance(ds, dict): @@ -208,18 +210,21 @@ def _simple_json_normalize( Examples -------- - IN[52]: _simple_json_normalize({ - 'flat1': 1, - 'dict1': {'c': 1, 'd': 2}, - 'nested': {'e': {'c': 1, 'd': 2}, 'd': 2} - }) - Out[52]: - {'dict1.c': 1, - 'dict1.d': 2, - 'flat1': 1, - 'nested.d': 2, - 'nested.e.c': 1, - 'nested.e.d': 2} + >>> _simple_json_normalize( + ... { + ... "flat1": 1, + ... "dict1": {"c": 1, "d": 2}, + ... "nested": {"e": {"c": 1, "d": 2}, "d": 2}, + ... } + ... ) + {\ +'flat1': 1, \ +'dict1.c': 1, \ +'dict1.d': 2, \ +'nested.e.c': 1, \ +'nested.e.d': 2, \ +'nested.d': 2\ +} """ normalised_json_object = {} @@ -283,22 +288,30 @@ def _json_normalize( Examples -------- - >>> data = [{'id': 1, 'name': {'first': 'Coleen', 'last': 'Volk'}}, - ... {'name': {'given': 'Mark', 'family': 'Regner'}}, - ... {'id': 2, 'name': 'Faye Raker'}] + >>> data = [ + ... {"id": 1, "name": {"first": "Coleen", "last": "Volk"}}, + ... {"name": {"given": "Mark", "family": "Regner"}}, + ... {"id": 2, "name": "Faye Raker"}, + ... ] >>> pd.json_normalize(data) id name.first name.last name.given name.family name 0 1.0 Coleen Volk NaN NaN NaN 1 NaN NaN NaN Mark Regner NaN 2 2.0 NaN NaN NaN NaN Faye Raker - >>> data = [{'id': 1, - ... 'name': "Cole Volk", - ... 'fitness': {'height': 130, 'weight': 60}}, - ... {'name': "Mark Reg", - ... 'fitness': {'height': 130, 'weight': 60}}, - ... {'id': 2, 'name': 'Faye Raker', - ... 'fitness': {'height': 130, 'weight': 60}}] + >>> data = [ + ... { + ... "id": 1, + ... "name": "Cole Volk", + ... "fitness": {"height": 130, "weight": 60}, + ... }, + ... {"name": "Mark Reg", "fitness": {"height": 130, "weight": 60}}, + ... { + ... "id": 2, + ... "name": "Faye Raker", + ... "fitness": {"height": 130, "weight": 60}, + ... }, + ... ] >>> pd.json_normalize(data, max_level=0) id name fitness 0 1.0 Cole Volk {'height': 130, 'weight': 60} @@ -307,32 +320,49 @@ def _json_normalize( Normalizes nested data up to level 1. - >>> data = [{'id': 1, - ... 'name': "Cole Volk", - ... 'fitness': {'height': 130, 'weight': 60}}, - ... {'name': "Mark Reg", - ... 'fitness': {'height': 130, 'weight': 60}}, - ... {'id': 2, 'name': 'Faye Raker', - ... 'fitness': {'height': 130, 'weight': 60}}] + >>> data = [ + ... { + ... "id": 1, + ... "name": "Cole Volk", + ... "fitness": {"height": 130, "weight": 60}, + ... }, + ... {"name": "Mark Reg", "fitness": {"height": 130, "weight": 60}}, + ... { + ... "id": 2, + ... "name": "Faye Raker", + ... "fitness": {"height": 130, "weight": 60}, + ... }, + ... ] >>> pd.json_normalize(data, max_level=1) id name fitness.height fitness.weight 0 1.0 Cole Volk 130 60 1 NaN Mark Reg 130 60 2 2.0 Faye Raker 130 60 - >>> data = [{'state': 'Florida', - ... 'shortname': 'FL', - ... 'info': {'governor': 'Rick Scott'}, - ... 'counties': [{'name': 'Dade', 'population': 12345}, - ... {'name': 'Broward', 'population': 40000}, - ... {'name': 'Palm Beach', 'population': 60000}]}, - ... {'state': 'Ohio', - ... 'shortname': 'OH', - ... 'info': {'governor': 'John Kasich'}, - ... 'counties': [{'name': 'Summit', 'population': 1234}, - ... {'name': 'Cuyahoga', 'population': 1337}]}] - >>> result = pd.json_normalize(data, 'counties', ['state', 'shortname', - ... ['info', 'governor']]) + >>> data = [ + ... { + ... "state": "Florida", + ... "shortname": "FL", + ... "info": {"governor": "Rick Scott"}, + ... "counties": [ + ... {"name": "Dade", "population": 12345}, + ... {"name": "Broward", "population": 40000}, + ... {"name": "Palm Beach", "population": 60000}, + ... ], + ... }, + ... { + ... "state": "Ohio", + ... "shortname": "OH", + ... "info": {"governor": "John Kasich"}, + ... "counties": [ + ... {"name": "Summit", "population": 1234}, + ... {"name": "Cuyahoga", "population": 1337}, + ... ], + ... }, + ... ] + >>> result = pd.json_normalize( + ... data, "counties", ["state", "shortname", ["info", "governor"]] + ... ) >>> result name population state shortname info.governor 0 Dade 12345 Florida FL Rick Scott @@ -341,8 +371,8 @@ def _json_normalize( 3 Summit 1234 Ohio OH John Kasich 4 Cuyahoga 1337 Ohio OH John Kasich - >>> data = {'A': [1, 2]} - >>> pd.json_normalize(data, 'A', record_prefix='Prefix.') + >>> data = {"A": [1, 2]} + >>> pd.json_normalize(data, "A", record_prefix="Prefix.") Prefix.0 0 1 1 2 diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py index ea47dca4f079e..87ea109c20f43 100644 --- a/pandas/io/json/_table_schema.py +++ b/pandas/io/json/_table_schema.py @@ -155,21 +155,25 @@ def convert_json_field_to_pandas_type(field): Examples -------- - >>> convert_json_field_to_pandas_type({'name': 'an_int', - 'type': 'integer'}) + >>> convert_json_field_to_pandas_type({"name": "an_int", "type": "integer"}) 'int64' - >>> convert_json_field_to_pandas_type({'name': 'a_categorical', - 'type': 'any', - 'constraints': {'enum': [ - 'a', 'b', 'c']}, - 'ordered': True}) - 'CategoricalDtype(categories=['a', 'b', 'c'], ordered=True)' - >>> convert_json_field_to_pandas_type({'name': 'a_datetime', - 'type': 'datetime'}) + + >>> convert_json_field_to_pandas_type( + ... { + ... "name": "a_categorical", + ... "type": "any", + ... "constraints": {"enum": ["a", "b", "c"]}, + ... "ordered": True, + ... } + ... ) + CategoricalDtype(categories=['a', 'b', 'c'], ordered=True) + + >>> convert_json_field_to_pandas_type({"name": "a_datetime", "type": "datetime"}) 'datetime64[ns]' - >>> convert_json_field_to_pandas_type({'name': 'a_datetime_with_tz', - 'type': 'datetime', - 'tz': 'US/Central'}) + + >>> convert_json_field_to_pandas_type( + ... {"name": "a_datetime_with_tz", "type": "datetime", "tz": "US/Central"} + ... ) 'datetime64[ns, US/Central]' """ typ = field["type"] @@ -245,12 +249,13 @@ def build_table_schema( ... 'C': pd.date_range('2016-01-01', freq='d', periods=3), ... }, index=pd.Index(range(3), name='idx')) >>> build_table_schema(df) - {'fields': [{'name': 'idx', 'type': 'integer'}, - {'name': 'A', 'type': 'integer'}, - {'name': 'B', 'type': 'string'}, - {'name': 'C', 'type': 'datetime'}], - 'pandas_version': '0.20.0', - 'primaryKey': ['idx']} + {'fields': \ +[{'name': 'idx', 'type': 'integer'}, \ +{'name': 'A', 'type': 'integer'}, \ +{'name': 'B', 'type': 'string'}, \ +{'name': 'C', 'type': 'datetime'}], \ +'primaryKey': ['idx'], \ +'pandas_version': '0.20.0'} """ if index is True: data = set_default_names(data)