From ac10591891eb1ce5f16cbf8dd62a2cb82068e909 Mon Sep 17 00:00:00 2001 From: Christopher Wilcox Date: Tue, 29 Jan 2019 13:28:49 -0800 Subject: [PATCH] Add protos as an artifact to library (#7205) --- .../language_v1/proto/language_service.proto | 950 ++++++++++++++++++ .../proto/language_service.proto | 950 ++++++++++++++++++ packages/google-cloud-language/synth.metadata | 10 +- packages/google-cloud-language/synth.py | 1 + 4 files changed, 1906 insertions(+), 5 deletions(-) create mode 100644 packages/google-cloud-language/google/cloud/language_v1/proto/language_service.proto create mode 100644 packages/google-cloud-language/google/cloud/language_v1beta2/proto/language_service.proto diff --git a/packages/google-cloud-language/google/cloud/language_v1/proto/language_service.proto b/packages/google-cloud-language/google/cloud/language_v1/proto/language_service.proto new file mode 100644 index 000000000000..27212ac726c4 --- /dev/null +++ b/packages/google-cloud-language/google/cloud/language_v1/proto/language_service.proto @@ -0,0 +1,950 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.cloud.language.v1beta1; + +import "google/api/annotations.proto"; + +option go_package = "google.golang.org/genproto/googleapis/cloud/language/v1beta1;language"; +option java_multiple_files = true; +option java_outer_classname = "LanguageServiceProto"; +option java_package = "com.google.cloud.language.v1beta1"; + + +// Provides text analysis operations such as sentiment analysis and entity +// recognition. +service LanguageService { + // Analyzes the sentiment of the provided text. + rpc AnalyzeSentiment(AnalyzeSentimentRequest) returns (AnalyzeSentimentResponse) { + option (google.api.http) = { post: "/v1beta1/documents:analyzeSentiment" body: "*" }; + } + + // Finds named entities (currently proper names and common nouns) in the text + // along with entity types, salience, mentions for each entity, and + // other properties. + rpc AnalyzeEntities(AnalyzeEntitiesRequest) returns (AnalyzeEntitiesResponse) { + option (google.api.http) = { post: "/v1beta1/documents:analyzeEntities" body: "*" }; + } + + // Analyzes the syntax of the text and provides sentence boundaries and + // tokenization along with part of speech tags, dependency trees, and other + // properties. + rpc AnalyzeSyntax(AnalyzeSyntaxRequest) returns (AnalyzeSyntaxResponse) { + option (google.api.http) = { post: "/v1beta1/documents:analyzeSyntax" body: "*" }; + } + + // A convenience method that provides all the features that analyzeSentiment, + // analyzeEntities, and analyzeSyntax provide in one call. + rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) { + option (google.api.http) = { post: "/v1beta1/documents:annotateText" body: "*" }; + } +} + +// ################################################################ # +// +// Represents the input to API methods. +message Document { + // The document types enum. + enum Type { + // The content type is not specified. + TYPE_UNSPECIFIED = 0; + + // Plain text + PLAIN_TEXT = 1; + + // HTML + HTML = 2; + } + + // Required. If the type is not set or is `TYPE_UNSPECIFIED`, + // returns an `INVALID_ARGUMENT` error. + Type type = 1; + + // The source of the document: a string containing the content or a + // Google Cloud Storage URI. + oneof source { + // The content of the input in string format. + string content = 2; + + // The Google Cloud Storage URI where the file content is located. + // This URI must be of the form: gs://bucket_name/object_name. For more + // details, see https://cloud.google.com/storage/docs/reference-uris. + // NOTE: Cloud Storage object versioning is not supported. + string gcs_content_uri = 3; + } + + // The language of the document (if not specified, the language is + // automatically detected). Both ISO and BCP-47 language codes are + // accepted.
+ // [Language Support](https://cloud.google.com/natural-language/docs/languages) + // lists currently supported languages for each API method. + // If the language (either specified by the caller or automatically detected) + // is not supported by the called API method, an `INVALID_ARGUMENT` error + // is returned. + string language = 4; +} + +// Represents a sentence in the input document. +message Sentence { + // The sentence text. + TextSpan text = 1; + + // For calls to [AnalyzeSentiment][] or if + // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_document_sentiment] is set to + // true, this field will contain the sentiment for the sentence. + Sentiment sentiment = 2; +} + +// Represents a phrase in the text that is a known entity, such as +// a person, an organization, or location. The API associates information, such +// as salience and mentions, with entities. +message Entity { + // The type of the entity. + enum Type { + // Unknown + UNKNOWN = 0; + + // Person + PERSON = 1; + + // Location + LOCATION = 2; + + // Organization + ORGANIZATION = 3; + + // Event + EVENT = 4; + + // Work of art + WORK_OF_ART = 5; + + // Consumer goods + CONSUMER_GOOD = 6; + + // Other types + OTHER = 7; + } + + // The representative name for the entity. + string name = 1; + + // The entity type. + Type type = 2; + + // Metadata associated with the entity. + // + // Currently, Wikipedia URLs and Knowledge Graph MIDs are provided, if + // available. The associated keys are "wikipedia_url" and "mid", respectively. + map metadata = 3; + + // The salience score associated with the entity in the [0, 1.0] range. + // + // The salience score for an entity provides information about the + // importance or centrality of that entity to the entire document text. + // Scores closer to 0 are less salient, while scores closer to 1.0 are highly + // salient. + float salience = 4; + + // The mentions of this entity in the input document. The API currently + // supports proper noun mentions. + repeated EntityMention mentions = 5; +} + +// Represents the smallest syntactic building block of the text. +message Token { + // The token text. + TextSpan text = 1; + + // Parts of speech tag for this token. + PartOfSpeech part_of_speech = 2; + + // Dependency tree parse for this token. + DependencyEdge dependency_edge = 3; + + // [Lemma](https://en.wikipedia.org/wiki/Lemma_%28morphology%29) of the token. + string lemma = 4; +} + +// Represents the feeling associated with the entire text or entities in +// the text. +message Sentiment { + // DEPRECATED FIELD - This field is being deprecated in + // favor of score. Please refer to our documentation at + // https://cloud.google.com/natural-language/docs for more information. + float polarity = 1; + + // A non-negative number in the [0, +inf) range, which represents + // the absolute magnitude of sentiment regardless of score (positive or + // negative). + float magnitude = 2; + + // Sentiment score between -1.0 (negative sentiment) and 1.0 + // (positive sentiment). + float score = 3; +} + +// Represents part of speech information for a token. +message PartOfSpeech { + // The part of speech tags enum. + enum Tag { + // Unknown + UNKNOWN = 0; + + // Adjective + ADJ = 1; + + // Adposition (preposition and postposition) + ADP = 2; + + // Adverb + ADV = 3; + + // Conjunction + CONJ = 4; + + // Determiner + DET = 5; + + // Noun (common and proper) + NOUN = 6; + + // Cardinal number + NUM = 7; + + // Pronoun + PRON = 8; + + // Particle or other function word + PRT = 9; + + // Punctuation + PUNCT = 10; + + // Verb (all tenses and modes) + VERB = 11; + + // Other: foreign words, typos, abbreviations + X = 12; + + // Affix + AFFIX = 13; + } + + // The characteristic of a verb that expresses time flow during an event. + enum Aspect { + // Aspect is not applicable in the analyzed language or is not predicted. + ASPECT_UNKNOWN = 0; + + // Perfective + PERFECTIVE = 1; + + // Imperfective + IMPERFECTIVE = 2; + + // Progressive + PROGRESSIVE = 3; + } + + // The grammatical function performed by a noun or pronoun in a phrase, + // clause, or sentence. In some languages, other parts of speech, such as + // adjective and determiner, take case inflection in agreement with the noun. + enum Case { + // Case is not applicable in the analyzed language or is not predicted. + CASE_UNKNOWN = 0; + + // Accusative + ACCUSATIVE = 1; + + // Adverbial + ADVERBIAL = 2; + + // Complementive + COMPLEMENTIVE = 3; + + // Dative + DATIVE = 4; + + // Genitive + GENITIVE = 5; + + // Instrumental + INSTRUMENTAL = 6; + + // Locative + LOCATIVE = 7; + + // Nominative + NOMINATIVE = 8; + + // Oblique + OBLIQUE = 9; + + // Partitive + PARTITIVE = 10; + + // Prepositional + PREPOSITIONAL = 11; + + // Reflexive + REFLEXIVE_CASE = 12; + + // Relative + RELATIVE_CASE = 13; + + // Vocative + VOCATIVE = 14; + } + + // Depending on the language, Form can be categorizing different forms of + // verbs, adjectives, adverbs, etc. For example, categorizing inflected + // endings of verbs and adjectives or distinguishing between short and long + // forms of adjectives and participles + enum Form { + // Form is not applicable in the analyzed language or is not predicted. + FORM_UNKNOWN = 0; + + // Adnomial + ADNOMIAL = 1; + + // Auxiliary + AUXILIARY = 2; + + // Complementizer + COMPLEMENTIZER = 3; + + // Final ending + FINAL_ENDING = 4; + + // Gerund + GERUND = 5; + + // Realis + REALIS = 6; + + // Irrealis + IRREALIS = 7; + + // Short form + SHORT = 8; + + // Long form + LONG = 9; + + // Order form + ORDER = 10; + + // Specific form + SPECIFIC = 11; + } + + // Gender classes of nouns reflected in the behaviour of associated words. + enum Gender { + // Gender is not applicable in the analyzed language or is not predicted. + GENDER_UNKNOWN = 0; + + // Feminine + FEMININE = 1; + + // Masculine + MASCULINE = 2; + + // Neuter + NEUTER = 3; + } + + // The grammatical feature of verbs, used for showing modality and attitude. + enum Mood { + // Mood is not applicable in the analyzed language or is not predicted. + MOOD_UNKNOWN = 0; + + // Conditional + CONDITIONAL_MOOD = 1; + + // Imperative + IMPERATIVE = 2; + + // Indicative + INDICATIVE = 3; + + // Interrogative + INTERROGATIVE = 4; + + // Jussive + JUSSIVE = 5; + + // Subjunctive + SUBJUNCTIVE = 6; + } + + // Count distinctions. + enum Number { + // Number is not applicable in the analyzed language or is not predicted. + NUMBER_UNKNOWN = 0; + + // Singular + SINGULAR = 1; + + // Plural + PLURAL = 2; + + // Dual + DUAL = 3; + } + + // The distinction between the speaker, second person, third person, etc. + enum Person { + // Person is not applicable in the analyzed language or is not predicted. + PERSON_UNKNOWN = 0; + + // First + FIRST = 1; + + // Second + SECOND = 2; + + // Third + THIRD = 3; + + // Reflexive + REFLEXIVE_PERSON = 4; + } + + // This category shows if the token is part of a proper name. + enum Proper { + // Proper is not applicable in the analyzed language or is not predicted. + PROPER_UNKNOWN = 0; + + // Proper + PROPER = 1; + + // Not proper + NOT_PROPER = 2; + } + + // Reciprocal features of a pronoun. + enum Reciprocity { + // Reciprocity is not applicable in the analyzed language or is not + // predicted. + RECIPROCITY_UNKNOWN = 0; + + // Reciprocal + RECIPROCAL = 1; + + // Non-reciprocal + NON_RECIPROCAL = 2; + } + + // Time reference. + enum Tense { + // Tense is not applicable in the analyzed language or is not predicted. + TENSE_UNKNOWN = 0; + + // Conditional + CONDITIONAL_TENSE = 1; + + // Future + FUTURE = 2; + + // Past + PAST = 3; + + // Present + PRESENT = 4; + + // Imperfect + IMPERFECT = 5; + + // Pluperfect + PLUPERFECT = 6; + } + + // The relationship between the action that a verb expresses and the + // participants identified by its arguments. + enum Voice { + // Voice is not applicable in the analyzed language or is not predicted. + VOICE_UNKNOWN = 0; + + // Active + ACTIVE = 1; + + // Causative + CAUSATIVE = 2; + + // Passive + PASSIVE = 3; + } + + // The part of speech tag. + Tag tag = 1; + + // The grammatical aspect. + Aspect aspect = 2; + + // The grammatical case. + Case case = 3; + + // The grammatical form. + Form form = 4; + + // The grammatical gender. + Gender gender = 5; + + // The grammatical mood. + Mood mood = 6; + + // The grammatical number. + Number number = 7; + + // The grammatical person. + Person person = 8; + + // The grammatical properness. + Proper proper = 9; + + // The grammatical reciprocity. + Reciprocity reciprocity = 10; + + // The grammatical tense. + Tense tense = 11; + + // The grammatical voice. + Voice voice = 12; +} + +// Represents dependency parse tree information for a token. +message DependencyEdge { + // The parse label enum for the token. + enum Label { + // Unknown + UNKNOWN = 0; + + // Abbreviation modifier + ABBREV = 1; + + // Adjectival complement + ACOMP = 2; + + // Adverbial clause modifier + ADVCL = 3; + + // Adverbial modifier + ADVMOD = 4; + + // Adjectival modifier of an NP + AMOD = 5; + + // Appositional modifier of an NP + APPOS = 6; + + // Attribute dependent of a copular verb + ATTR = 7; + + // Auxiliary (non-main) verb + AUX = 8; + + // Passive auxiliary + AUXPASS = 9; + + // Coordinating conjunction + CC = 10; + + // Clausal complement of a verb or adjective + CCOMP = 11; + + // Conjunct + CONJ = 12; + + // Clausal subject + CSUBJ = 13; + + // Clausal passive subject + CSUBJPASS = 14; + + // Dependency (unable to determine) + DEP = 15; + + // Determiner + DET = 16; + + // Discourse + DISCOURSE = 17; + + // Direct object + DOBJ = 18; + + // Expletive + EXPL = 19; + + // Goes with (part of a word in a text not well edited) + GOESWITH = 20; + + // Indirect object + IOBJ = 21; + + // Marker (word introducing a subordinate clause) + MARK = 22; + + // Multi-word expression + MWE = 23; + + // Multi-word verbal expression + MWV = 24; + + // Negation modifier + NEG = 25; + + // Noun compound modifier + NN = 26; + + // Noun phrase used as an adverbial modifier + NPADVMOD = 27; + + // Nominal subject + NSUBJ = 28; + + // Passive nominal subject + NSUBJPASS = 29; + + // Numeric modifier of a noun + NUM = 30; + + // Element of compound number + NUMBER = 31; + + // Punctuation mark + P = 32; + + // Parataxis relation + PARATAXIS = 33; + + // Participial modifier + PARTMOD = 34; + + // The complement of a preposition is a clause + PCOMP = 35; + + // Object of a preposition + POBJ = 36; + + // Possession modifier + POSS = 37; + + // Postverbal negative particle + POSTNEG = 38; + + // Predicate complement + PRECOMP = 39; + + // Preconjunt + PRECONJ = 40; + + // Predeterminer + PREDET = 41; + + // Prefix + PREF = 42; + + // Prepositional modifier + PREP = 43; + + // The relationship between a verb and verbal morpheme + PRONL = 44; + + // Particle + PRT = 45; + + // Associative or possessive marker + PS = 46; + + // Quantifier phrase modifier + QUANTMOD = 47; + + // Relative clause modifier + RCMOD = 48; + + // Complementizer in relative clause + RCMODREL = 49; + + // Ellipsis without a preceding predicate + RDROP = 50; + + // Referent + REF = 51; + + // Remnant + REMNANT = 52; + + // Reparandum + REPARANDUM = 53; + + // Root + ROOT = 54; + + // Suffix specifying a unit of number + SNUM = 55; + + // Suffix + SUFF = 56; + + // Temporal modifier + TMOD = 57; + + // Topic marker + TOPIC = 58; + + // Clause headed by an infinite form of the verb that modifies a noun + VMOD = 59; + + // Vocative + VOCATIVE = 60; + + // Open clausal complement + XCOMP = 61; + + // Name suffix + SUFFIX = 62; + + // Name title + TITLE = 63; + + // Adverbial phrase modifier + ADVPHMOD = 64; + + // Causative auxiliary + AUXCAUS = 65; + + // Helper auxiliary + AUXVV = 66; + + // Rentaishi (Prenominal modifier) + DTMOD = 67; + + // Foreign words + FOREIGN = 68; + + // Keyword + KW = 69; + + // List for chains of comparable items + LIST = 70; + + // Nominalized clause + NOMC = 71; + + // Nominalized clausal subject + NOMCSUBJ = 72; + + // Nominalized clausal passive + NOMCSUBJPASS = 73; + + // Compound of numeric modifier + NUMC = 74; + + // Copula + COP = 75; + + // Dislocated relation (for fronted/topicalized elements) + DISLOCATED = 76; + } + + // Represents the head of this token in the dependency tree. + // This is the index of the token which has an arc going to this token. + // The index is the position of the token in the array of tokens returned + // by the API method. If this token is a root token, then the + // `head_token_index` is its own index. + int32 head_token_index = 1; + + // The parse label for the token. + Label label = 2; +} + +// Represents a mention for an entity in the text. Currently, proper noun +// mentions are supported. +message EntityMention { + // The supported types of mentions. + enum Type { + // Unknown + TYPE_UNKNOWN = 0; + + // Proper name + PROPER = 1; + + // Common noun (or noun compound) + COMMON = 2; + } + + // The mention text. + TextSpan text = 1; + + // The type of the entity mention. + Type type = 2; +} + +// Represents an output piece of text. +message TextSpan { + // The content of the output text. + string content = 1; + + // The API calculates the beginning offset of the content in the original + // document according to the [EncodingType][google.cloud.language.v1beta1.EncodingType] specified in the API request. + int32 begin_offset = 2; +} + +// The sentiment analysis request message. +message AnalyzeSentimentRequest { + // Input document. + Document document = 1; + + // The encoding type used by the API to calculate sentence offsets for the + // sentence sentiment. + EncodingType encoding_type = 2; +} + +// The sentiment analysis response message. +message AnalyzeSentimentResponse { + // The overall sentiment of the input document. + Sentiment document_sentiment = 1; + + // The language of the text, which will be the same as the language specified + // in the request or, if not specified, the automatically-detected language. + // See [Document.language][google.cloud.language.v1beta1.Document.language] field for more details. + string language = 2; + + // The sentiment for all the sentences in the document. + repeated Sentence sentences = 3; +} + +// The entity analysis request message. +message AnalyzeEntitiesRequest { + // Input document. + Document document = 1; + + // The encoding type used by the API to calculate offsets. + EncodingType encoding_type = 2; +} + +// The entity analysis response message. +message AnalyzeEntitiesResponse { + // The recognized entities in the input document. + repeated Entity entities = 1; + + // The language of the text, which will be the same as the language specified + // in the request or, if not specified, the automatically-detected language. + // See [Document.language][google.cloud.language.v1beta1.Document.language] field for more details. + string language = 2; +} + +// The syntax analysis request message. +message AnalyzeSyntaxRequest { + // Input document. + Document document = 1; + + // The encoding type used by the API to calculate offsets. + EncodingType encoding_type = 2; +} + +// The syntax analysis response message. +message AnalyzeSyntaxResponse { + // Sentences in the input document. + repeated Sentence sentences = 1; + + // Tokens, along with their syntactic information, in the input document. + repeated Token tokens = 2; + + // The language of the text, which will be the same as the language specified + // in the request or, if not specified, the automatically-detected language. + // See [Document.language][google.cloud.language.v1beta1.Document.language] field for more details. + string language = 3; +} + +// The request message for the text annotation API, which can perform multiple +// analysis types (sentiment, entities, and syntax) in one call. +message AnnotateTextRequest { + // All available features for sentiment, syntax, and semantic analysis. + // Setting each one to true will enable that specific analysis for the input. + message Features { + // Extract syntax information. + bool extract_syntax = 1; + + // Extract entities. + bool extract_entities = 2; + + // Extract document-level sentiment. + bool extract_document_sentiment = 3; + } + + // Input document. + Document document = 1; + + // The enabled features. + Features features = 2; + + // The encoding type used by the API to calculate offsets. + EncodingType encoding_type = 3; +} + +// The text annotations response message. +message AnnotateTextResponse { + // Sentences in the input document. Populated if the user enables + // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_syntax]. + repeated Sentence sentences = 1; + + // Tokens, along with their syntactic information, in the input document. + // Populated if the user enables + // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_syntax]. + repeated Token tokens = 2; + + // Entities, along with their semantic information, in the input document. + // Populated if the user enables + // [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_entities]. + repeated Entity entities = 3; + + // The overall sentiment for the document. Populated if the user enables + // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_document_sentiment]. + Sentiment document_sentiment = 4; + + // The language of the text, which will be the same as the language specified + // in the request or, if not specified, the automatically-detected language. + // See [Document.language][google.cloud.language.v1beta1.Document.language] field for more details. + string language = 5; +} + +// Represents the text encoding that the caller uses to process the output. +// Providing an `EncodingType` is recommended because the API provides the +// beginning offsets for various outputs, such as tokens and mentions, and +// languages that natively use different text encodings may access offsets +// differently. +enum EncodingType { + // If `EncodingType` is not specified, encoding-dependent information (such as + // `begin_offset`) will be set at `-1`. + NONE = 0; + + // Encoding-dependent information (such as `begin_offset`) is calculated based + // on the UTF-8 encoding of the input. C++ and Go are examples of languages + // that use this encoding natively. + UTF8 = 1; + + // Encoding-dependent information (such as `begin_offset`) is calculated based + // on the UTF-16 encoding of the input. Java and Javascript are examples of + // languages that use this encoding natively. + UTF16 = 2; + + // Encoding-dependent information (such as `begin_offset`) is calculated based + // on the UTF-32 encoding of the input. Python is an example of a language + // that uses this encoding natively. + UTF32 = 3; +} diff --git a/packages/google-cloud-language/google/cloud/language_v1beta2/proto/language_service.proto b/packages/google-cloud-language/google/cloud/language_v1beta2/proto/language_service.proto new file mode 100644 index 000000000000..27212ac726c4 --- /dev/null +++ b/packages/google-cloud-language/google/cloud/language_v1beta2/proto/language_service.proto @@ -0,0 +1,950 @@ +// Copyright 2017 Google Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.cloud.language.v1beta1; + +import "google/api/annotations.proto"; + +option go_package = "google.golang.org/genproto/googleapis/cloud/language/v1beta1;language"; +option java_multiple_files = true; +option java_outer_classname = "LanguageServiceProto"; +option java_package = "com.google.cloud.language.v1beta1"; + + +// Provides text analysis operations such as sentiment analysis and entity +// recognition. +service LanguageService { + // Analyzes the sentiment of the provided text. + rpc AnalyzeSentiment(AnalyzeSentimentRequest) returns (AnalyzeSentimentResponse) { + option (google.api.http) = { post: "/v1beta1/documents:analyzeSentiment" body: "*" }; + } + + // Finds named entities (currently proper names and common nouns) in the text + // along with entity types, salience, mentions for each entity, and + // other properties. + rpc AnalyzeEntities(AnalyzeEntitiesRequest) returns (AnalyzeEntitiesResponse) { + option (google.api.http) = { post: "/v1beta1/documents:analyzeEntities" body: "*" }; + } + + // Analyzes the syntax of the text and provides sentence boundaries and + // tokenization along with part of speech tags, dependency trees, and other + // properties. + rpc AnalyzeSyntax(AnalyzeSyntaxRequest) returns (AnalyzeSyntaxResponse) { + option (google.api.http) = { post: "/v1beta1/documents:analyzeSyntax" body: "*" }; + } + + // A convenience method that provides all the features that analyzeSentiment, + // analyzeEntities, and analyzeSyntax provide in one call. + rpc AnnotateText(AnnotateTextRequest) returns (AnnotateTextResponse) { + option (google.api.http) = { post: "/v1beta1/documents:annotateText" body: "*" }; + } +} + +// ################################################################ # +// +// Represents the input to API methods. +message Document { + // The document types enum. + enum Type { + // The content type is not specified. + TYPE_UNSPECIFIED = 0; + + // Plain text + PLAIN_TEXT = 1; + + // HTML + HTML = 2; + } + + // Required. If the type is not set or is `TYPE_UNSPECIFIED`, + // returns an `INVALID_ARGUMENT` error. + Type type = 1; + + // The source of the document: a string containing the content or a + // Google Cloud Storage URI. + oneof source { + // The content of the input in string format. + string content = 2; + + // The Google Cloud Storage URI where the file content is located. + // This URI must be of the form: gs://bucket_name/object_name. For more + // details, see https://cloud.google.com/storage/docs/reference-uris. + // NOTE: Cloud Storage object versioning is not supported. + string gcs_content_uri = 3; + } + + // The language of the document (if not specified, the language is + // automatically detected). Both ISO and BCP-47 language codes are + // accepted.
+ // [Language Support](https://cloud.google.com/natural-language/docs/languages) + // lists currently supported languages for each API method. + // If the language (either specified by the caller or automatically detected) + // is not supported by the called API method, an `INVALID_ARGUMENT` error + // is returned. + string language = 4; +} + +// Represents a sentence in the input document. +message Sentence { + // The sentence text. + TextSpan text = 1; + + // For calls to [AnalyzeSentiment][] or if + // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_document_sentiment] is set to + // true, this field will contain the sentiment for the sentence. + Sentiment sentiment = 2; +} + +// Represents a phrase in the text that is a known entity, such as +// a person, an organization, or location. The API associates information, such +// as salience and mentions, with entities. +message Entity { + // The type of the entity. + enum Type { + // Unknown + UNKNOWN = 0; + + // Person + PERSON = 1; + + // Location + LOCATION = 2; + + // Organization + ORGANIZATION = 3; + + // Event + EVENT = 4; + + // Work of art + WORK_OF_ART = 5; + + // Consumer goods + CONSUMER_GOOD = 6; + + // Other types + OTHER = 7; + } + + // The representative name for the entity. + string name = 1; + + // The entity type. + Type type = 2; + + // Metadata associated with the entity. + // + // Currently, Wikipedia URLs and Knowledge Graph MIDs are provided, if + // available. The associated keys are "wikipedia_url" and "mid", respectively. + map metadata = 3; + + // The salience score associated with the entity in the [0, 1.0] range. + // + // The salience score for an entity provides information about the + // importance or centrality of that entity to the entire document text. + // Scores closer to 0 are less salient, while scores closer to 1.0 are highly + // salient. + float salience = 4; + + // The mentions of this entity in the input document. The API currently + // supports proper noun mentions. + repeated EntityMention mentions = 5; +} + +// Represents the smallest syntactic building block of the text. +message Token { + // The token text. + TextSpan text = 1; + + // Parts of speech tag for this token. + PartOfSpeech part_of_speech = 2; + + // Dependency tree parse for this token. + DependencyEdge dependency_edge = 3; + + // [Lemma](https://en.wikipedia.org/wiki/Lemma_%28morphology%29) of the token. + string lemma = 4; +} + +// Represents the feeling associated with the entire text or entities in +// the text. +message Sentiment { + // DEPRECATED FIELD - This field is being deprecated in + // favor of score. Please refer to our documentation at + // https://cloud.google.com/natural-language/docs for more information. + float polarity = 1; + + // A non-negative number in the [0, +inf) range, which represents + // the absolute magnitude of sentiment regardless of score (positive or + // negative). + float magnitude = 2; + + // Sentiment score between -1.0 (negative sentiment) and 1.0 + // (positive sentiment). + float score = 3; +} + +// Represents part of speech information for a token. +message PartOfSpeech { + // The part of speech tags enum. + enum Tag { + // Unknown + UNKNOWN = 0; + + // Adjective + ADJ = 1; + + // Adposition (preposition and postposition) + ADP = 2; + + // Adverb + ADV = 3; + + // Conjunction + CONJ = 4; + + // Determiner + DET = 5; + + // Noun (common and proper) + NOUN = 6; + + // Cardinal number + NUM = 7; + + // Pronoun + PRON = 8; + + // Particle or other function word + PRT = 9; + + // Punctuation + PUNCT = 10; + + // Verb (all tenses and modes) + VERB = 11; + + // Other: foreign words, typos, abbreviations + X = 12; + + // Affix + AFFIX = 13; + } + + // The characteristic of a verb that expresses time flow during an event. + enum Aspect { + // Aspect is not applicable in the analyzed language or is not predicted. + ASPECT_UNKNOWN = 0; + + // Perfective + PERFECTIVE = 1; + + // Imperfective + IMPERFECTIVE = 2; + + // Progressive + PROGRESSIVE = 3; + } + + // The grammatical function performed by a noun or pronoun in a phrase, + // clause, or sentence. In some languages, other parts of speech, such as + // adjective and determiner, take case inflection in agreement with the noun. + enum Case { + // Case is not applicable in the analyzed language or is not predicted. + CASE_UNKNOWN = 0; + + // Accusative + ACCUSATIVE = 1; + + // Adverbial + ADVERBIAL = 2; + + // Complementive + COMPLEMENTIVE = 3; + + // Dative + DATIVE = 4; + + // Genitive + GENITIVE = 5; + + // Instrumental + INSTRUMENTAL = 6; + + // Locative + LOCATIVE = 7; + + // Nominative + NOMINATIVE = 8; + + // Oblique + OBLIQUE = 9; + + // Partitive + PARTITIVE = 10; + + // Prepositional + PREPOSITIONAL = 11; + + // Reflexive + REFLEXIVE_CASE = 12; + + // Relative + RELATIVE_CASE = 13; + + // Vocative + VOCATIVE = 14; + } + + // Depending on the language, Form can be categorizing different forms of + // verbs, adjectives, adverbs, etc. For example, categorizing inflected + // endings of verbs and adjectives or distinguishing between short and long + // forms of adjectives and participles + enum Form { + // Form is not applicable in the analyzed language or is not predicted. + FORM_UNKNOWN = 0; + + // Adnomial + ADNOMIAL = 1; + + // Auxiliary + AUXILIARY = 2; + + // Complementizer + COMPLEMENTIZER = 3; + + // Final ending + FINAL_ENDING = 4; + + // Gerund + GERUND = 5; + + // Realis + REALIS = 6; + + // Irrealis + IRREALIS = 7; + + // Short form + SHORT = 8; + + // Long form + LONG = 9; + + // Order form + ORDER = 10; + + // Specific form + SPECIFIC = 11; + } + + // Gender classes of nouns reflected in the behaviour of associated words. + enum Gender { + // Gender is not applicable in the analyzed language or is not predicted. + GENDER_UNKNOWN = 0; + + // Feminine + FEMININE = 1; + + // Masculine + MASCULINE = 2; + + // Neuter + NEUTER = 3; + } + + // The grammatical feature of verbs, used for showing modality and attitude. + enum Mood { + // Mood is not applicable in the analyzed language or is not predicted. + MOOD_UNKNOWN = 0; + + // Conditional + CONDITIONAL_MOOD = 1; + + // Imperative + IMPERATIVE = 2; + + // Indicative + INDICATIVE = 3; + + // Interrogative + INTERROGATIVE = 4; + + // Jussive + JUSSIVE = 5; + + // Subjunctive + SUBJUNCTIVE = 6; + } + + // Count distinctions. + enum Number { + // Number is not applicable in the analyzed language or is not predicted. + NUMBER_UNKNOWN = 0; + + // Singular + SINGULAR = 1; + + // Plural + PLURAL = 2; + + // Dual + DUAL = 3; + } + + // The distinction between the speaker, second person, third person, etc. + enum Person { + // Person is not applicable in the analyzed language or is not predicted. + PERSON_UNKNOWN = 0; + + // First + FIRST = 1; + + // Second + SECOND = 2; + + // Third + THIRD = 3; + + // Reflexive + REFLEXIVE_PERSON = 4; + } + + // This category shows if the token is part of a proper name. + enum Proper { + // Proper is not applicable in the analyzed language or is not predicted. + PROPER_UNKNOWN = 0; + + // Proper + PROPER = 1; + + // Not proper + NOT_PROPER = 2; + } + + // Reciprocal features of a pronoun. + enum Reciprocity { + // Reciprocity is not applicable in the analyzed language or is not + // predicted. + RECIPROCITY_UNKNOWN = 0; + + // Reciprocal + RECIPROCAL = 1; + + // Non-reciprocal + NON_RECIPROCAL = 2; + } + + // Time reference. + enum Tense { + // Tense is not applicable in the analyzed language or is not predicted. + TENSE_UNKNOWN = 0; + + // Conditional + CONDITIONAL_TENSE = 1; + + // Future + FUTURE = 2; + + // Past + PAST = 3; + + // Present + PRESENT = 4; + + // Imperfect + IMPERFECT = 5; + + // Pluperfect + PLUPERFECT = 6; + } + + // The relationship between the action that a verb expresses and the + // participants identified by its arguments. + enum Voice { + // Voice is not applicable in the analyzed language or is not predicted. + VOICE_UNKNOWN = 0; + + // Active + ACTIVE = 1; + + // Causative + CAUSATIVE = 2; + + // Passive + PASSIVE = 3; + } + + // The part of speech tag. + Tag tag = 1; + + // The grammatical aspect. + Aspect aspect = 2; + + // The grammatical case. + Case case = 3; + + // The grammatical form. + Form form = 4; + + // The grammatical gender. + Gender gender = 5; + + // The grammatical mood. + Mood mood = 6; + + // The grammatical number. + Number number = 7; + + // The grammatical person. + Person person = 8; + + // The grammatical properness. + Proper proper = 9; + + // The grammatical reciprocity. + Reciprocity reciprocity = 10; + + // The grammatical tense. + Tense tense = 11; + + // The grammatical voice. + Voice voice = 12; +} + +// Represents dependency parse tree information for a token. +message DependencyEdge { + // The parse label enum for the token. + enum Label { + // Unknown + UNKNOWN = 0; + + // Abbreviation modifier + ABBREV = 1; + + // Adjectival complement + ACOMP = 2; + + // Adverbial clause modifier + ADVCL = 3; + + // Adverbial modifier + ADVMOD = 4; + + // Adjectival modifier of an NP + AMOD = 5; + + // Appositional modifier of an NP + APPOS = 6; + + // Attribute dependent of a copular verb + ATTR = 7; + + // Auxiliary (non-main) verb + AUX = 8; + + // Passive auxiliary + AUXPASS = 9; + + // Coordinating conjunction + CC = 10; + + // Clausal complement of a verb or adjective + CCOMP = 11; + + // Conjunct + CONJ = 12; + + // Clausal subject + CSUBJ = 13; + + // Clausal passive subject + CSUBJPASS = 14; + + // Dependency (unable to determine) + DEP = 15; + + // Determiner + DET = 16; + + // Discourse + DISCOURSE = 17; + + // Direct object + DOBJ = 18; + + // Expletive + EXPL = 19; + + // Goes with (part of a word in a text not well edited) + GOESWITH = 20; + + // Indirect object + IOBJ = 21; + + // Marker (word introducing a subordinate clause) + MARK = 22; + + // Multi-word expression + MWE = 23; + + // Multi-word verbal expression + MWV = 24; + + // Negation modifier + NEG = 25; + + // Noun compound modifier + NN = 26; + + // Noun phrase used as an adverbial modifier + NPADVMOD = 27; + + // Nominal subject + NSUBJ = 28; + + // Passive nominal subject + NSUBJPASS = 29; + + // Numeric modifier of a noun + NUM = 30; + + // Element of compound number + NUMBER = 31; + + // Punctuation mark + P = 32; + + // Parataxis relation + PARATAXIS = 33; + + // Participial modifier + PARTMOD = 34; + + // The complement of a preposition is a clause + PCOMP = 35; + + // Object of a preposition + POBJ = 36; + + // Possession modifier + POSS = 37; + + // Postverbal negative particle + POSTNEG = 38; + + // Predicate complement + PRECOMP = 39; + + // Preconjunt + PRECONJ = 40; + + // Predeterminer + PREDET = 41; + + // Prefix + PREF = 42; + + // Prepositional modifier + PREP = 43; + + // The relationship between a verb and verbal morpheme + PRONL = 44; + + // Particle + PRT = 45; + + // Associative or possessive marker + PS = 46; + + // Quantifier phrase modifier + QUANTMOD = 47; + + // Relative clause modifier + RCMOD = 48; + + // Complementizer in relative clause + RCMODREL = 49; + + // Ellipsis without a preceding predicate + RDROP = 50; + + // Referent + REF = 51; + + // Remnant + REMNANT = 52; + + // Reparandum + REPARANDUM = 53; + + // Root + ROOT = 54; + + // Suffix specifying a unit of number + SNUM = 55; + + // Suffix + SUFF = 56; + + // Temporal modifier + TMOD = 57; + + // Topic marker + TOPIC = 58; + + // Clause headed by an infinite form of the verb that modifies a noun + VMOD = 59; + + // Vocative + VOCATIVE = 60; + + // Open clausal complement + XCOMP = 61; + + // Name suffix + SUFFIX = 62; + + // Name title + TITLE = 63; + + // Adverbial phrase modifier + ADVPHMOD = 64; + + // Causative auxiliary + AUXCAUS = 65; + + // Helper auxiliary + AUXVV = 66; + + // Rentaishi (Prenominal modifier) + DTMOD = 67; + + // Foreign words + FOREIGN = 68; + + // Keyword + KW = 69; + + // List for chains of comparable items + LIST = 70; + + // Nominalized clause + NOMC = 71; + + // Nominalized clausal subject + NOMCSUBJ = 72; + + // Nominalized clausal passive + NOMCSUBJPASS = 73; + + // Compound of numeric modifier + NUMC = 74; + + // Copula + COP = 75; + + // Dislocated relation (for fronted/topicalized elements) + DISLOCATED = 76; + } + + // Represents the head of this token in the dependency tree. + // This is the index of the token which has an arc going to this token. + // The index is the position of the token in the array of tokens returned + // by the API method. If this token is a root token, then the + // `head_token_index` is its own index. + int32 head_token_index = 1; + + // The parse label for the token. + Label label = 2; +} + +// Represents a mention for an entity in the text. Currently, proper noun +// mentions are supported. +message EntityMention { + // The supported types of mentions. + enum Type { + // Unknown + TYPE_UNKNOWN = 0; + + // Proper name + PROPER = 1; + + // Common noun (or noun compound) + COMMON = 2; + } + + // The mention text. + TextSpan text = 1; + + // The type of the entity mention. + Type type = 2; +} + +// Represents an output piece of text. +message TextSpan { + // The content of the output text. + string content = 1; + + // The API calculates the beginning offset of the content in the original + // document according to the [EncodingType][google.cloud.language.v1beta1.EncodingType] specified in the API request. + int32 begin_offset = 2; +} + +// The sentiment analysis request message. +message AnalyzeSentimentRequest { + // Input document. + Document document = 1; + + // The encoding type used by the API to calculate sentence offsets for the + // sentence sentiment. + EncodingType encoding_type = 2; +} + +// The sentiment analysis response message. +message AnalyzeSentimentResponse { + // The overall sentiment of the input document. + Sentiment document_sentiment = 1; + + // The language of the text, which will be the same as the language specified + // in the request or, if not specified, the automatically-detected language. + // See [Document.language][google.cloud.language.v1beta1.Document.language] field for more details. + string language = 2; + + // The sentiment for all the sentences in the document. + repeated Sentence sentences = 3; +} + +// The entity analysis request message. +message AnalyzeEntitiesRequest { + // Input document. + Document document = 1; + + // The encoding type used by the API to calculate offsets. + EncodingType encoding_type = 2; +} + +// The entity analysis response message. +message AnalyzeEntitiesResponse { + // The recognized entities in the input document. + repeated Entity entities = 1; + + // The language of the text, which will be the same as the language specified + // in the request or, if not specified, the automatically-detected language. + // See [Document.language][google.cloud.language.v1beta1.Document.language] field for more details. + string language = 2; +} + +// The syntax analysis request message. +message AnalyzeSyntaxRequest { + // Input document. + Document document = 1; + + // The encoding type used by the API to calculate offsets. + EncodingType encoding_type = 2; +} + +// The syntax analysis response message. +message AnalyzeSyntaxResponse { + // Sentences in the input document. + repeated Sentence sentences = 1; + + // Tokens, along with their syntactic information, in the input document. + repeated Token tokens = 2; + + // The language of the text, which will be the same as the language specified + // in the request or, if not specified, the automatically-detected language. + // See [Document.language][google.cloud.language.v1beta1.Document.language] field for more details. + string language = 3; +} + +// The request message for the text annotation API, which can perform multiple +// analysis types (sentiment, entities, and syntax) in one call. +message AnnotateTextRequest { + // All available features for sentiment, syntax, and semantic analysis. + // Setting each one to true will enable that specific analysis for the input. + message Features { + // Extract syntax information. + bool extract_syntax = 1; + + // Extract entities. + bool extract_entities = 2; + + // Extract document-level sentiment. + bool extract_document_sentiment = 3; + } + + // Input document. + Document document = 1; + + // The enabled features. + Features features = 2; + + // The encoding type used by the API to calculate offsets. + EncodingType encoding_type = 3; +} + +// The text annotations response message. +message AnnotateTextResponse { + // Sentences in the input document. Populated if the user enables + // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_syntax]. + repeated Sentence sentences = 1; + + // Tokens, along with their syntactic information, in the input document. + // Populated if the user enables + // [AnnotateTextRequest.Features.extract_syntax][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_syntax]. + repeated Token tokens = 2; + + // Entities, along with their semantic information, in the input document. + // Populated if the user enables + // [AnnotateTextRequest.Features.extract_entities][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_entities]. + repeated Entity entities = 3; + + // The overall sentiment for the document. Populated if the user enables + // [AnnotateTextRequest.Features.extract_document_sentiment][google.cloud.language.v1beta1.AnnotateTextRequest.Features.extract_document_sentiment]. + Sentiment document_sentiment = 4; + + // The language of the text, which will be the same as the language specified + // in the request or, if not specified, the automatically-detected language. + // See [Document.language][google.cloud.language.v1beta1.Document.language] field for more details. + string language = 5; +} + +// Represents the text encoding that the caller uses to process the output. +// Providing an `EncodingType` is recommended because the API provides the +// beginning offsets for various outputs, such as tokens and mentions, and +// languages that natively use different text encodings may access offsets +// differently. +enum EncodingType { + // If `EncodingType` is not specified, encoding-dependent information (such as + // `begin_offset`) will be set at `-1`. + NONE = 0; + + // Encoding-dependent information (such as `begin_offset`) is calculated based + // on the UTF-8 encoding of the input. C++ and Go are examples of languages + // that use this encoding natively. + UTF8 = 1; + + // Encoding-dependent information (such as `begin_offset`) is calculated based + // on the UTF-16 encoding of the input. Java and Javascript are examples of + // languages that use this encoding natively. + UTF16 = 2; + + // Encoding-dependent information (such as `begin_offset`) is calculated based + // on the UTF-32 encoding of the input. Python is an example of a language + // that uses this encoding natively. + UTF32 = 3; +} diff --git a/packages/google-cloud-language/synth.metadata b/packages/google-cloud-language/synth.metadata index 88fc58cbfa27..5ebf26d28f73 100644 --- a/packages/google-cloud-language/synth.metadata +++ b/packages/google-cloud-language/synth.metadata @@ -1,19 +1,19 @@ { - "updateTime": "2019-01-17T13:21:34.263272Z", + "updateTime": "2019-01-24T01:00:35.096241Z", "sources": [ { "generator": { "name": "artman", - "version": "0.16.6", - "dockerImage": "googleapis/artman@sha256:12722f2ca3fbc3b53cc6aa5f0e569d7d221b46bd876a2136497089dec5e3634e" + "version": "0.16.7", + "dockerImage": "googleapis/artman@sha256:d6c8ced606eb49973ca95d2af7c55a681acc042db0f87d135968349e7bf6dd80" } }, { "git": { "name": "googleapis", "remote": "https://github.com/googleapis/googleapis.git", - "sha": "0ac60e21a1aa86c07c1836865b35308ba8178b05", - "internalRef": "229626798" + "sha": "9aac88a22468b1e291937f55fa1ef237adfdc63e", + "internalRef": "230568136" } }, { diff --git a/packages/google-cloud-language/synth.py b/packages/google-cloud-language/synth.py index 6abc66811b9c..d8acb69950e9 100644 --- a/packages/google-cloud-language/synth.py +++ b/packages/google-cloud-language/synth.py @@ -31,6 +31,7 @@ version, config_path=f"/google/cloud/language/artman_language_{version}.yaml", artman_output_name=f"language-{version}", + include_protos=True, ) s.move(library / f"google/cloud/language_{version}/proto")