Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: add size parameter for load table from dataframe and json methods #280

Merged
merged 8 commits into from
Oct 14, 2020
4 changes: 3 additions & 1 deletion google/cloud/bigquery/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2258,11 +2258,13 @@ def load_table_from_dataframe(
dataframe.to_parquet(tmppath, compression=parquet_compression)

with open(tmppath, "rb") as parquet_file:
file_size = os.path.getsize(tmppath)
return self.load_table_from_file(
parquet_file,
destination,
num_retries=num_retries,
rewind=True,
size=file_size,
job_id=job_id,
job_id_prefix=job_id_prefix,
location=location,
Expand Down Expand Up @@ -2365,10 +2367,10 @@ def load_table_from_json(

data_str = u"\n".join(json.dumps(item) for item in json_rows)
data_file = io.BytesIO(data_str.encode())

return self.load_table_from_file(
data_file,
destination,
size=len(data_str),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This length needs to be on the encoded string, not the unicode string, otherwise the size will be wrong when non-ASCII characters are included.

num_retries=num_retries,
job_id=job_id,
job_id_prefix=job_id_prefix,
Expand Down
16 changes: 16 additions & 0 deletions tests/unit/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -7486,6 +7486,7 @@ def test_load_table_from_dataframe(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=None,
Expand Down Expand Up @@ -7529,6 +7530,7 @@ def test_load_table_from_dataframe_w_client_location(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -7581,6 +7583,7 @@ def test_load_table_from_dataframe_w_custom_job_config_wihtout_source_format(sel
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -7635,6 +7638,7 @@ def test_load_table_from_dataframe_w_custom_job_config_w_source_format(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -7727,6 +7731,7 @@ def test_load_table_from_dataframe_w_automatic_schema(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -7786,6 +7791,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -7831,6 +7837,7 @@ def test_load_table_from_dataframe_unknown_table(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=None,
Expand Down Expand Up @@ -7945,6 +7952,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -7991,6 +7999,7 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -8065,6 +8074,7 @@ def test_load_table_from_dataframe_struct_fields(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -8138,6 +8148,7 @@ def test_load_table_from_dataframe_w_partial_schema(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -8232,6 +8243,7 @@ def test_load_table_from_dataframe_w_partial_schema_missing_types(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -8288,6 +8300,7 @@ def test_load_table_from_dataframe_w_schema_wo_pyarrow(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -8403,6 +8416,7 @@ def test_load_table_from_dataframe_w_nulls(self):
self.TABLE_REF,
num_retries=_DEFAULT_NUM_RETRIES,
rewind=True,
size=mock.ANY,
job_id=mock.ANY,
job_id_prefix=None,
location=self.LOCATION,
Expand Down Expand Up @@ -8454,6 +8468,7 @@ def test_load_table_from_json_basic_use(self):
client,
mock.ANY,
self.TABLE_REF,
size=mock.ANY,
num_retries=_DEFAULT_NUM_RETRIES,
job_id=mock.ANY,
job_id_prefix=None,
Expand Down Expand Up @@ -8505,6 +8520,7 @@ def test_load_table_from_json_non_default_args(self):
client,
mock.ANY,
self.TABLE_REF,
size=mock.ANY,
num_retries=_DEFAULT_NUM_RETRIES,
job_id=mock.ANY,
job_id_prefix=None,
Expand Down