Skip to content

Commit

Permalink
feat: add ability to request compressed ReadRowsResponse rows (#728)
Browse files Browse the repository at this point in the history
* feat: add ability to request compressed ReadRowsResponse rows

This change allows the client to request raw lz4 compression of the ReadRowsResponse rows data for both ArrowRecordBatches and Avro rows.

PiperOrigin-RevId: 597000088

Source-Link: googleapis/googleapis@341d70f

Source-Link: googleapis/googleapis-gen@01713f3
Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiMDE3MTNmM2Y1NTM0YWNjNzhmMDRkNTllMTNjMDY2OGM4MTI5YmYwMyJ9

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

---------

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
  • Loading branch information
gcf-owl-bot[bot] and gcf-owl-bot[bot] authored Jan 12, 2024
1 parent 90cef51 commit fe09e3b
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 2 deletions.
26 changes: 26 additions & 0 deletions google/cloud/bigquery_storage_v1/types/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,27 @@ class ReadRowsResponse(proto.Message):
Output only. Arrow schema.
This field is a member of `oneof`_ ``schema``.
uncompressed_byte_size (int):
Optional. If the row data in this ReadRowsResponse is
compressed, then uncompressed byte size is the original size
of the uncompressed row data. If it is set to a value
greater than 0, then decompress into a buffer of size
uncompressed_byte_size using the compression codec that was
requested during session creation time and which is
specified in TableReadOptions.response_compression_codec in
ReadSession. This value is not set if no
response_compression_codec was not requested and it is -1 if
the requested compression would not have reduced the size of
this ReadRowsResponse's row data. This attempts to match
Apache Arrow's behavior described here
https://github.com/apache/arrow/issues/15102 where the
uncompressed length may be set to -1 to indicate that the
data that follows is not compressed, which can be useful for
cases where compression does not yield appreciable savings.
When uncompressed_byte_size is not greater than 0, the
client should skip decompression.
This field is a member of `oneof`_ ``_uncompressed_byte_size``.
"""

avro_rows: avro.AvroRows = proto.Field(
Expand Down Expand Up @@ -273,6 +294,11 @@ class ReadRowsResponse(proto.Message):
oneof="schema",
message=arrow.ArrowSchema,
)
uncompressed_byte_size: int = proto.Field(
proto.INT64,
number=9,
optional=True,
)


class SplitReadStreamRequest(proto.Message):
Expand Down
31 changes: 31 additions & 0 deletions google/cloud/bigquery_storage_v1/types/stream.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,33 @@ class TableReadOptions(proto.Message):
https://cloud.google.com/bigquery/docs/table-sampling)
This field is a member of `oneof`_ ``_sample_percentage``.
response_compression_codec (google.cloud.bigquery_storage_v1.types.ReadSession.TableReadOptions.ResponseCompressionCodec):
Optional. Set response_compression_codec when creating a
read session to enable application-level compression of
ReadRows responses.
This field is a member of `oneof`_ ``_response_compression_codec``.
"""

class ResponseCompressionCodec(proto.Enum):
r"""Specifies which compression codec to attempt on the entire
serialized response payload (either Arrow record batch or Avro
rows). This is not to be confused with the Apache Arrow native
compression codecs specified in ArrowSerializationOptions. For
performance reasons, when creating a read session requesting
Arrow responses, setting both native Arrow compression and
application-level response compression will not be allowed -
choose, at most, one kind of compression.
Values:
RESPONSE_COMPRESSION_CODEC_UNSPECIFIED (0):
Default is no compression.
RESPONSE_COMPRESSION_CODEC_LZ4 (2):
Use raw LZ4 compression.
"""
RESPONSE_COMPRESSION_CODEC_UNSPECIFIED = 0
RESPONSE_COMPRESSION_CODEC_LZ4 = 2

selected_fields: MutableSequence[str] = proto.RepeatedField(
proto.STRING,
number=1,
Expand All @@ -276,6 +301,12 @@ class TableReadOptions(proto.Message):
number=5,
optional=True,
)
response_compression_codec: "ReadSession.TableReadOptions.ResponseCompressionCodec" = proto.Field(
proto.ENUM,
number=6,
optional=True,
enum="ReadSession.TableReadOptions.ResponseCompressionCodec",
)

name: str = proto.Field(
proto.STRING,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
],
"language": "PYTHON",
"name": "google-cloud-bigquery-storage",
"version": "2.24.0"
"version": "0.1.0"
},
"snippets": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
],
"language": "PYTHON",
"name": "google-cloud-bigquery-storage",
"version": "2.24.0"
"version": "0.1.0"
},
"snippets": [
{
Expand Down

0 comments on commit fe09e3b

Please sign in to comment.