Skip to content
This repository has been archived by the owner on Sep 20, 2023. It is now read-only.

Commit

Permalink
feat: add symbols field, and auto-format comments (#277)
Browse files Browse the repository at this point in the history
* feat: add `symbols` field, and auto-format comments

PiperOrigin-RevId: 428799963

Source-Link: googleapis/googleapis@8b6181f

Source-Link: https://github.com/googleapis/googleapis-gen/commit/2c320a956160ee86cb8d7923e37a1a3a451d1042
Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiMmMzMjBhOTU2MTYwZWU4NmNiOGQ3OTIzZTM3YTFhM2E0NTFkMTA0MiJ9

* 🦉 Updates from OwlBot

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* feat: add `symbols` field, and auto-format comments

PiperOrigin-RevId: 428800506

Source-Link: googleapis/googleapis@ea20380

Source-Link: https://github.com/googleapis/googleapis-gen/commit/8b9ece508c132516f4bf79e462f08253acc85935
Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiOGI5ZWNlNTA4YzEzMjUxNmY0YmY3OWU0NjJmMDgyNTNhY2M4NTkzNSJ9

* 🦉 Updates from OwlBot

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
  • Loading branch information
gcf-owl-bot[bot] and gcf-owl-bot[bot] authored Feb 16, 2022
1 parent 56c96e4 commit ca016dd
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 41 deletions.
67 changes: 50 additions & 17 deletions google/cloud/documentai_v1/types/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,9 @@ class Page(proto.Message):
form_fields (Sequence[google.cloud.documentai_v1.types.Document.Page.FormField]):
A list of visually detected form fields on
the page.
symbols (Sequence[google.cloud.documentai_v1.types.Document.Page.Symbol]):
A list of visually detected symbols on the
page.
provenance (google.cloud.documentai_v1.types.Document.Provenance):
The history of this page.
"""
Expand Down Expand Up @@ -454,6 +457,26 @@ class Type(proto.Enum):
proto.MESSAGE, number=4, message="Document.Provenance",
)

class Symbol(proto.Message):
r"""A detected symbol.
Attributes:
layout (google.cloud.documentai_v1.types.Document.Page.Layout):
[Layout][google.cloud.documentai.v1.Document.Page.Layout]
for
[Symbol][google.cloud.documentai.v1.Document.Page.Symbol].
detected_languages (Sequence[google.cloud.documentai_v1.types.Document.Page.DetectedLanguage]):
A list of detected languages together with
confidence.
"""

layout = proto.Field(
proto.MESSAGE, number=1, message="Document.Page.Layout",
)
detected_languages = proto.RepeatedField(
proto.MESSAGE, number=2, message="Document.Page.DetectedLanguage",
)

class VisualElement(proto.Message):
r"""Detected non-text visual elements e.g. checkbox, signature
etc. on the page.
Expand Down Expand Up @@ -602,7 +625,7 @@ class DetectedLanguage(proto.Message):
language_code (str):
The BCP-47 language code, such as "en-US" or "sr-Latn". For
more information, see
http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
https://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
confidence (float):
Confidence of detected language. Range [0, 1].
"""
Expand Down Expand Up @@ -643,13 +666,17 @@ class DetectedLanguage(proto.Message):
form_fields = proto.RepeatedField(
proto.MESSAGE, number=11, message="Document.Page.FormField",
)
symbols = proto.RepeatedField(
proto.MESSAGE, number=12, message="Document.Page.Symbol",
)
provenance = proto.Field(
proto.MESSAGE, number=16, message="Document.Provenance",
)

class Entity(proto.Message):
r"""A phrase in the text that is a known entity type, such as a
person, an organization, or location.
r"""An entity that could be a phrase in the text or a property
that belongs to the document. It is a known entity type, such as
a person, an organization, or location.
Attributes:
text_anchor (google.cloud.documentai_v1.types.Document.TextAnchor):
Expand All @@ -660,7 +687,8 @@ class Entity(proto.Message):
Entity type from a schema e.g. ``Address``.
mention_text (str):
Optional. Text value in the document e.g.
``1600 Amphitheatre Pkwy``.
``1600 Amphitheatre Pkwy``. If the entity is not present in
the document, this field will be empty.
mention_id (str):
Optional. Deprecated. Use ``id`` field instead.
confidence (float):
Expand Down Expand Up @@ -729,10 +757,14 @@ class NormalizedValue(proto.Message):
This field is a member of `oneof`_ ``structured_value``.
text (str):
Required. Normalized entity value stored as a string. This
field is populated for supported document type (e.g.
Invoice). For some entity types, one of respective
'structured_value' fields may also be populated.
Optional. An optional field to store a normalized string.
For some entity types, one of respective
``structured_value`` fields may also be populated. Also not
all the types of ``structured_value`` will be normalized.
For example, some processors may not generate float or int
normalized text by default.
Below are sample formats mapped to structured values.
- Money/Currency type (``money_value``) is in the ISO 4217
text format.
Expand Down Expand Up @@ -818,7 +850,8 @@ class TextAnchor(proto.Message):
[Document.text][google.cloud.documentai.v1.Document.text].
content (str):
Contains the content of the text span so that users do not
have to look it up in the text_segments.
have to look it up in the text_segments. It is always
populated for formFields.
"""

class TextSegment(proto.Message):
Expand Down Expand Up @@ -942,18 +975,18 @@ class OperationType(proto.Enum):
EVAL_SKIPPED = 6

class Parent(proto.Message):
r"""Structure for referencing parent provenances. When an
element replaces one of more other elements parent references
identify the elements that are replaced.
r"""The parent element the current element is based on. Used for
referencing/aligning, removal and replacement operations.
Attributes:
revision (int):
The index of the [Document.revisions] identifying the parent
revision.
The index of the index into current revision's parent_ids
list.
index (int):
The index of the parent revisions
corresponding collection of items (eg. list of
entities, properties within entities, etc.)
The index of the parent item in the
corresponding item list (eg. list of entities,
properties within entities, etc.) in the parent
revision.
id (int):
The id of the parent provenance.
"""
Expand Down
82 changes: 58 additions & 24 deletions google/cloud/documentai_v1beta3/types/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ class Document(proto.Message):
Optional. UTF-8 encoded text in reading order
from the document.
text_styles (Sequence[google.cloud.documentai_v1beta3.types.Document.Style]):
Styles for the
Placeholder. Styles for the
[Document.text][google.cloud.documentai.v1beta3.Document.text].
pages (Sequence[google.cloud.documentai_v1beta3.types.Document.Page]):
Visual page layout for the
Expand All @@ -79,13 +79,13 @@ class Document(proto.Message):
For document shards, entities in this list may cross shard
boundaries.
entity_relations (Sequence[google.cloud.documentai_v1beta3.types.Document.EntityRelation]):
Relationship among
Placeholder. Relationship among
[Document.entities][google.cloud.documentai.v1beta3.Document.entities].
text_changes (Sequence[google.cloud.documentai_v1beta3.types.Document.TextChange]):
A list of text corrections made to [Document.text]. This is
usually used for annotating corrections to OCR mistakes.
Text changes for a given revision may not overlap with each
other.
Placeholder. A list of text corrections made to
[Document.text]. This is usually used for annotating
corrections to OCR mistakes. Text changes for a given
revision may not overlap with each other.
shard_info (google.cloud.documentai_v1beta3.types.Document.ShardInfo):
Information about the sharding if this
document is sharded part of a larger document.
Expand All @@ -95,7 +95,8 @@ class Document(proto.Message):
Any error that occurred while processing this
document.
revisions (Sequence[google.cloud.documentai_v1beta3.types.Document.Revision]):
Revision history of this document.
Placeholder. Revision history of this
document.
"""

class ShardInfo(proto.Message):
Expand Down Expand Up @@ -224,6 +225,9 @@ class Page(proto.Message):
form_fields (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.FormField]):
A list of visually detected form fields on
the page.
symbols (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.Symbol]):
A list of visually detected symbols on the
page.
provenance (google.cloud.documentai_v1beta3.types.Document.Provenance):
The history of this page.
"""
Expand Down Expand Up @@ -457,6 +461,26 @@ class Type(proto.Enum):
proto.MESSAGE, number=4, message="Document.Provenance",
)

class Symbol(proto.Message):
r"""A detected symbol.
Attributes:
layout (google.cloud.documentai_v1beta3.types.Document.Page.Layout):
[Layout][google.cloud.documentai.v1beta3.Document.Page.Layout]
for
[Symbol][google.cloud.documentai.v1beta3.Document.Page.Symbol].
detected_languages (Sequence[google.cloud.documentai_v1beta3.types.Document.Page.DetectedLanguage]):
A list of detected languages together with
confidence.
"""

layout = proto.Field(
proto.MESSAGE, number=1, message="Document.Page.Layout",
)
detected_languages = proto.RepeatedField(
proto.MESSAGE, number=2, message="Document.Page.DetectedLanguage",
)

class VisualElement(proto.Message):
r"""Detected non-text visual elements e.g. checkbox, signature
etc. on the page.
Expand Down Expand Up @@ -606,7 +630,7 @@ class DetectedLanguage(proto.Message):
language_code (str):
The BCP-47 language code, such as "en-US" or "sr-Latn". For
more information, see
http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
https://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
confidence (float):
Confidence of detected language. Range [0, 1].
"""
Expand Down Expand Up @@ -647,13 +671,17 @@ class DetectedLanguage(proto.Message):
form_fields = proto.RepeatedField(
proto.MESSAGE, number=11, message="Document.Page.FormField",
)
symbols = proto.RepeatedField(
proto.MESSAGE, number=12, message="Document.Page.Symbol",
)
provenance = proto.Field(
proto.MESSAGE, number=16, message="Document.Provenance",
)

class Entity(proto.Message):
r"""A phrase in the text that is a known entity type, such as a
person, an organization, or location.
r"""An entity that could be a phrase in the text or a property
that belongs to the document. It is a known entity type, such as
a person, an organization, or location.
Attributes:
text_anchor (google.cloud.documentai_v1beta3.types.Document.TextAnchor):
Expand All @@ -664,7 +692,8 @@ class Entity(proto.Message):
Entity type from a schema e.g. ``Address``.
mention_text (str):
Optional. Text value in the document e.g.
``1600 Amphitheatre Pkwy``.
``1600 Amphitheatre Pkwy``. If the entity is not present in
the document, this field will be empty.
mention_id (str):
Optional. Deprecated. Use ``id`` field instead.
confidence (float):
Expand Down Expand Up @@ -733,10 +762,14 @@ class NormalizedValue(proto.Message):
This field is a member of `oneof`_ ``structured_value``.
text (str):
Required. Normalized entity value stored as a string. This
field is populated for supported document type (e.g.
Invoice). For some entity types, one of respective
'structured_value' fields may also be populated.
Optional. An optional field to store a normalized string.
For some entity types, one of respective
``structured_value`` fields may also be populated. Also not
all the types of ``structured_value`` will be normalized.
For example, some processors may not generate float or int
normalized text by default.
Below are sample formats mapped to structured values.
- Money/Currency type (``money_value``) is in the ISO 4217
text format.
Expand Down Expand Up @@ -822,7 +855,8 @@ class TextAnchor(proto.Message):
[Document.text][google.cloud.documentai.v1beta3.Document.text].
content (str):
Contains the content of the text span so that users do not
have to look it up in the text_segments.
have to look it up in the text_segments. It is always
populated for formFields.
"""

class TextSegment(proto.Message):
Expand Down Expand Up @@ -946,18 +980,18 @@ class OperationType(proto.Enum):
EVAL_SKIPPED = 6

class Parent(proto.Message):
r"""Structure for referencing parent provenances. When an
element replaces one of more other elements parent references
identify the elements that are replaced.
r"""The parent element the current element is based on. Used for
referencing/aligning, removal and replacement operations.
Attributes:
revision (int):
The index of the [Document.revisions] identifying the parent
revision.
The index of the index into current revision's parent_ids
list.
index (int):
The index of the parent revisions
corresponding collection of items (eg. list of
entities, properties within entities, etc.)
The index of the parent item in the
corresponding item list (eg. list of entities,
properties within entities, etc.) in the parent
revision.
id (int):
The id of the parent provenance.
"""
Expand Down

0 comments on commit ca016dd

Please sign in to comment.