Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: [documentai] add StyleInfo to document.proto #4352

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 54 additions & 52 deletions packages/google-cloud-documentai/README.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2022 Google LLC
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2022 Google LLC
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -64,7 +64,8 @@ message Document {
// Font size for the text.
float size = 1;

// Unit for the font size. Follows CSS naming (in, px, pt, etc.).
// Unit for the font size. Follows CSS naming (such as `in`, `px`, and
// `pt`).
string unit = 2;
}

Expand All @@ -78,17 +79,18 @@ message Document {
// Text background color.
google.type.Color background_color = 3;

// Font weight. Possible values are normal, bold, bolder, and lighter.
// https://www.w3schools.com/cssref/pr_font_weight.asp
// [Font weight](https://www.w3schools.com/cssref/pr_font_weight.asp).
// Possible values are `normal`, `bold`, `bolder`, and `lighter`.
string font_weight = 4;

// Text style. Possible values are normal, italic, and oblique.
// https://www.w3schools.com/cssref/pr_font_font-style.asp
// [Text style](https://www.w3schools.com/cssref/pr_font_font-style.asp).
// Possible values are `normal`, `italic`, and `oblique`.
string text_style = 5;

// Text decoration. Follows CSS standard.
// <text-decoration-line> <text-decoration-color> <text-decoration-style>
// https://www.w3schools.com/cssref/pr_text_text-decoration.asp
// [Text
// decoration](https://www.w3schools.com/cssref/pr_text_text-decoration.asp).
// Follows CSS standard. <text-decoration-line> <text-decoration-color>
// <text-decoration-style>
string text_decoration = 6;

// Font size.
Expand Down Expand Up @@ -118,7 +120,9 @@ message Document {
// Raw byte content of the image.
bytes content = 1;

// Encoding mime type for the image.
// Encoding [media type (MIME
// type)](https://www.iana.org/assignments/media-types/media-types.xhtml)
// for the image.
string mime_type = 2;

// Width of the image in pixels.
Expand Down Expand Up @@ -255,6 +259,59 @@ message Document {
Type type = 1;
}

// Font and other text style attributes.
message StyleInfo {
// Font size in points (`1` point is `¹⁄₇₂` inches).
int32 font_size = 1;

// Font size in pixels, equal to _unrounded
// [font_size][google.cloud.documentai.v1.Document.Page.Token.StyleInfo.font_size]_
// * _resolution_ ÷ `72.0`.
double pixel_font_size = 2;

// Letter spacing in points.
double letter_spacing = 3;

// Name or style of the font.
string font_type = 4;

// Whether the text is bold (equivalent to
// [font_weight][google.cloud.documentai.v1.Document.Page.Token.StyleInfo.font_weight]
// is at least `700`).
bool bold = 5;

// Whether the text is italic.
bool italic = 6;

// Whether the text is underlined.
bool underlined = 7;

// Whether the text is strikethrough.
bool strikeout = 8;

// Whether the text is a subscript.
bool subscript = 9;

// Whether the text is a superscript.
bool superscript = 10;

// Whether the text is in small caps.
bool smallcaps = 11;

// TrueType weight on a scale `100` (thin) to `1000` (ultra-heavy).
// Normal is `400`, bold is `700`.
int32 font_weight = 12;

// Whether the text is handwritten.
bool handwritten = 13;

// Color of the text.
google.type.Color text_color = 14;

// Color of the background.
google.type.Color background_color = 15;
}

// [Layout][google.cloud.documentai.v1.Document.Page.Layout] for
// [Token][google.cloud.documentai.v1.Document.Page.Token].
Layout layout = 1;
Expand All @@ -268,6 +325,9 @@ message Document {

// The history of this annotation.
Provenance provenance = 4 [deprecated = true];

// Text style attributes.
StyleInfo style_info = 5;
}

// A detected symbol.
Expand Down Expand Up @@ -333,7 +393,7 @@ message Document {
repeated DetectedLanguage detected_languages = 4;

// The history of this table.
Provenance provenance = 5;
Provenance provenance = 5 [deprecated = true];
}

// A form field detected on the page.
Expand Down Expand Up @@ -388,16 +448,16 @@ message Document {

// Detected language for a structural component.
message DetectedLanguage {
// The BCP-47 language code, such as `en-US` or `sr-Latn`. For more
// information, see
// https://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
// The [BCP-47 language
// code](https://www.unicode.org/reports/tr35/#Unicode_locale_identifier),
// such as `en-US` or `sr-Latn`.
string language_code = 1;

// Confidence of detected language. Range `[0, 1]`.
float confidence = 2;
}

// Image Quality Scores for the page image
// Image quality scores for the page image.
message ImageQualityScores {
// Image Quality Defects
message DetectedDefect {
Expand All @@ -413,12 +473,12 @@ message Document {
// - `quality/defect_glare`
string type = 1;

// Confidence of detected defect. Range `[0, 1]` where 1 indicates
// strong confidence of that the defect exists.
// Confidence of detected defect. Range `[0, 1]` where `1` indicates
// strong confidence that the defect exists.
float confidence = 2;
}

// The overall quality score. Range `[0, 1]` where 1 is perfect quality.
// The overall quality score. Range `[0, 1]` where `1` is perfect quality.
float quality_score = 1;

// A list of detected defects.
Expand Down Expand Up @@ -482,7 +542,7 @@ message Document {
// A list of detected barcodes.
repeated DetectedBarcode detected_barcodes = 15;

// Image Quality Scores.
// Image quality scores.
ImageQualityScores image_quality_scores = 17;

// The history of this page.
Expand Down Expand Up @@ -736,9 +796,9 @@ message Document {
REMOVE = 2;

// Updates any fields within the given provenance scope of the message. It
// 'overwrites' the fields rather than replacing them. This is
// especially relevant when we just want to update a field value of an
// entity without also affecting all the child properties.
// overwrites the fields rather than replacing them. Use this when you
// want to update a field value of an entity without also updating all the
// child properties.
UPDATE = 7;

// Currently unused. Replace an element identified by `parent`.
Expand Down Expand Up @@ -835,10 +895,9 @@ message Document {
// Original source document from the user.
oneof source {
// Optional. Currently supports Google Cloud Storage URI of the form
// `gs://bucket_name/object_name`. Object versioning is not supported.
// See [Google Cloud Storage Request
// URIs](https://cloud.google.com/storage/docs/reference-uris) for more
// info.
// `gs://bucket_name/object_name`. Object versioning is not supported.
// For more information, refer to [Google Cloud Storage Request
// URIs](https://cloud.google.com/storage/docs/reference-uris).
string uri = 1 [(google.api.field_behavior) = OPTIONAL];

// Optional. Inline document content, represented as a stream of bytes.
Expand All @@ -847,9 +906,8 @@ message Document {
bytes content = 2 [(google.api.field_behavior) = OPTIONAL];
}

// An IANA published MIME type (also referred to as media type). For more
// information, see
// https://www.iana.org/assignments/media-types/media-types.xhtml.
// An IANA published [media type (MIME
// type)](https://www.iana.org/assignments/media-types/media-types.xhtml).
string mime_type = 3;

// Optional. UTF-8 encoded text in reading order from the document.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2022 Google LLC
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down
Loading