-
Notifications
You must be signed in to change notification settings - Fork 52
/
Copy pathopenapi.json
466 lines (466 loc) · 45 KB
/
openapi.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
{
"swagger": "2.0",
"info": {
"contact": {
"x-twitter": "TisaneLabs"
},
"description": "<table><tr><td style=\"font-size:large\"> 🔬 </td><td>Detect abusive content, obtain sentiment analysis, extract entities, detect topics, automatically correct spelling errors, and more.</td></tr></table>",
"title": "Text Analysis",
"version": "1.0",
"x-apisguru-categories": [
"text"
],
"x-logo": {
"url": "https://api.apis.guru/v2/cache/logo/https_twitter.com_TisaneLabs_profile_image.jpeg"
},
"x-origin": [
{
"converter": {
"url": "https://github.com/lucybot/api-spec-converter",
"version": "2.7.31"
},
"format": "openapi",
"url": "https://dev.tisane.ai/docs/services/5a3b6668a3511b11cc292655/export?DocumentFormat=openapi",
"version": "3.0"
}
],
"x-preferred": true,
"x-providerName": "tisane.ai",
"x-datafire": {
"name": "tisane_ai",
"type": "openapi"
}
},
"host": "api.tisane.ai",
"basePath": "/",
"schemes": [
"http"
],
"paths": {
"/compare/entities": {
"post": {
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"parameters": [
{
"description": "The request body is a JSON structure made of the following elements:\n\n* `language1` (string) - a standard IETF tag for the language of the first entity\n* `entity1` (string) - 1st entity\n* `language2` (string) - a standard IETF tag for the language of the second entity\n* `entity2` (string) - 2nd entity\n* `type` (string) - the type of the entity. Currently only _person_ is supported\n",
"in": "body",
"name": "body"
}
],
"responses": {
"200": {
"description": "A JSON structure containing the comparison result and the different segments, if applicable. The elements are:\n\n* `result` (string) - the result of the comparison. Values: \n * `no_single_entity` - at least one of the entity parameters could not be parsed as a single entity. \n * `same` - it's the same entity, even if the order is different. Note that changing the order may mean the same or different name. For example, _Kevin Tan_ is the same as _Tan Kevin_, but _James David_ is not the same as _David James_.\n * `different` - there are differences between the entities. They are listed in the _differences_ element as described below. \n* `differences` (array of strings) - the different segments. Currently supported:\n * `given_name`\n * `surname`\n * `title` - Mr., Mrs., etc.\n * `social_role` - academic degrees, etc.\n * `suffix`\n * `variation` - substantially the same; the only difference is a style (e.g. hypocorism or colloquial vs formal) or a language or a spelling",
"examples": {
"application/json": {
"differences": [
"surname",
"social_role"
],
"result": "different"
}
}
}
},
"description": "Compares two compound named entities and outputs the differences found. ",
"operationId": "compare-named-entities",
"summary": "Named entity comparison"
}
},
"/helper/extract_text": {
"post": {
"consumes": [
"text/plain"
],
"produces": [
"text/plain"
],
"parameters": [
{
"description": "Text to clean up.",
"in": "body",
"name": "body",
"schema": {
"format": "binary",
"type": "string"
}
}
],
"responses": {
"200": {
"description": "The text without markup, script, CSS, JSON.",
"examples": {
"text/plain": "Clean me up"
}
}
},
"description": "A service method to remove JavaScript, CSS tags, JSON, and other markup, returning pure decoded text. ",
"operationId": "clean-up-text",
"summary": "Text clean up"
}
},
"/languages": {
"get": {
"produces": [
"application/json"
],
"parameters": [],
"responses": {
"200": {
"description": "An array of elements containing basic info for the supported languages. Every array member contains:\n\n* `id` (string) - a standard IETF language tag\n* `name` (string) - a native name of the language\n* `englishName` (string) - the English name of the language\n* `nativeEncoding` (string) - the encoding of the language\n* `preferredFont` (string) - a font family known to be compatible with the language\n* `latin` (boolean) - _true_ if the language uses Latin script, _false_ otherwise\n* `rightToLeft` (boolean) - _true_ if the language uses a bidirectional script (e.g. Arabic, Persian, Hebrew), _false_ otherwise",
"examples": {
"application/json": [
{
"englishName": "Arabic",
"id": "ar",
"latin": false,
"name": "العربية",
"nativeEncoding": "iso-8859-6",
"preferredFont": "Arial",
"rightToLeft": true
},
{
"englishName": "Danish",
"id": "da",
"latin": true,
"name": "Dansk",
"nativeEncoding": "iso-8859-1",
"preferredFont": "Calibri"
},
{
"englishName": "German",
"id": "de",
"latin": true,
"name": "Deutsch",
"nativeEncoding": "iso-8859-1",
"preferredFont": "Calibri"
},
{
"englishName": "English",
"id": "en",
"latin": true,
"name": "English",
"nativeEncoding": "iso-8859-1",
"preferredFont": "Arial",
"systemLanguage": true
},
{
"englishName": "Spanish",
"id": "es",
"latin": true,
"name": "Español",
"nativeEncoding": "iso-8859-1",
"preferredFont": "Verdana"
},
{
"englishName": "Persian",
"id": "fa",
"latin": false,
"name": "فارسي",
"nativeEncoding": "iso-8859-6",
"preferredFont": "Arial",
"rightToLeft": true
},
{
"englishName": "Finnish",
"id": "fi",
"latin": true,
"name": "suomi",
"nativeEncoding": "iso-8859-1",
"preferredFont": "Berlin Sans FB"
},
{
"englishName": "French",
"id": "fr",
"latin": true,
"name": "Français",
"nativeEncoding": "iso-8859-1",
"preferredFont": "Arial"
},
{
"englishName": "Hebrew",
"id": "he",
"latin": false,
"name": "עברית",
"nativeEncoding": "iso-8859-8",
"preferredFont": "Courier New",
"rightToLeft": true
},
{
"englishName": "Indonesian",
"id": "id",
"latin": true,
"name": "Bahasa Indonesia",
"nativeEncoding": "iso-8859-1",
"preferredFont": "Book Antiqua"
},
{
"englishName": "Italian",
"id": "it",
"latin": true,
"name": "Italiano",
"nativeEncoding": "iso-8859-1",
"preferredFont": "Book Antiqua"
},
{
"englishName": "Japanese",
"id": "ja",
"latin": false,
"name": "日本語",
"nativeEncoding": "ISO-2022",
"preferredFont": "MS Mincho"
},
{
"englishName": "Korean",
"id": "ko",
"latin": false,
"name": "한국어",
"nativeEncoding": "EUC-KR",
"preferredFont": "Arial Narrow"
},
{
"englishName": "Malay",
"id": "ms",
"latin": true,
"name": "Bahasa Melayu",
"nativeEncoding": "iso-8859-1",
"preferredFont": "Calibri"
},
{
"englishName": "Dutch",
"id": "nl",
"latin": true,
"name": "Nederlands",
"nativeEncoding": "iso-8859-1",
"preferredFont": "Arial"
},
{
"englishName": "Norwegian",
"id": "no",
"latin": true,
"name": "Norsk",
"nativeEncoding": "iso-8859-1",
"preferredFont": "Book Antiqua"
},
{
"englishName": "Polish",
"id": "pl",
"latin": true,
"name": "Polski",
"nativeEncoding": "iso-8859-2",
"preferredFont": "Arial"
},
{
"englishName": "Pashto",
"id": "ps-AF",
"latin": false,
"name": "پښتو",
"nativeEncoding": "iso-8859-6",
"preferredFont": "Pashto Kror Asiatype",
"rightToLeft": true
},
{
"englishName": "Portuguese",
"id": "pt",
"latin": true,
"name": "Português",
"nativeEncoding": "iso-8859-1",
"preferredFont": "Book Antiqua"
},
{
"englishName": "Russian",
"id": "ru",
"latin": false,
"name": "русский",
"nativeEncoding": "iso-8859-5",
"preferredFont": "Verdana"
},
{
"englishName": "Swedish",
"id": "sv",
"latin": true,
"name": "Svenska",
"nativeEncoding": "iso-8859-1",
"preferredFont": "Lucida Console"
},
{
"englishName": "Thai",
"id": "th",
"latin": false,
"name": "ไทย",
"nativeEncoding": "iso-8859-11",
"preferredFont": "Tahoma"
},
{
"englishName": "Turkish",
"id": "tr",
"latin": true,
"name": "Türkçe",
"nativeEncoding": "iso-8859-9",
"preferredFont": "Arial"
},
{
"englishName": "Urdu",
"id": "ur",
"latin": false,
"name": "اردو",
"nativeEncoding": "iso-8859-6",
"preferredFont": "Arial",
"rightToLeft": true
},
{
"englishName": "Vietnamese",
"id": "vi",
"latin": true,
"name": "Tiếng Việt",
"nativeEncoding": "iso-8859-1",
"preferredFont": "Arial"
},
{
"englishName": "Simplified Chinese",
"id": "zh-CN",
"latin": false,
"name": "简体中文",
"nativeEncoding": "gb18030",
"preferredFont": "NSimSun"
},
{
"englishName": "Traditional Chinese",
"id": "zh-TW",
"latin": false,
"name": "繁體中文",
"nativeEncoding": "utf-8",
"preferredFont": "Arial Unicode MS"
}
]
}
}
},
"description": "Obtain a list of available languages. No parameters.",
"operationId": "5a4c8182a3511b120c2e80bd",
"summary": "List available languages"
}
},
"/parse": {
"post": {
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"parameters": [
{
"description": "A request is made of three elements:\n\n + `language` - a standard IETF tag for the language to analyze\n + `content` - a content to analyze\n + `settings` - the settings to apply when analyzing\n\n### Settings Reference\n\nThe purpose of the settings structure is to:\n\n * [provide cues about the content being sent to improve the results](#content-cues-and-instructions)\n * [customize the output and select sections to be shown](#output-customization)\n * [define standards and formats in use](#standards-and-formats)\n * [define and calculate the signal to noise ranking](#signal-to-noise-ranking)\n\n\nAll settings are optional. To leave all settings to default, simply provide an empty object (`{}`).\n\n#### Content Cues and Instructions\n\n`format` (string) - the format of the content. Some policies will be applied depending on the format. Certain logic in the underlying language models may require the content to be of a certain format (e.g. logic applied on the reviews may seek for sentiment more aggressively). The default format is empty / undefined. The format values are:\n\n* `review` - a review of a product or a service or any other review. Normally, the underlying language models will seek for sentiment expressions more aggressively in reviews.\n* `dialogue` - a comment or a post which is a part of a dialogue. An example of a logic more specific to a dialogue is name calling. A single word like \"idiot\" would not be a personal attack in any other format, but it is certainly a personal attack when part of a dialogue.\n* `shortpost` - a microblogging post, e.g. a tweet.\n* `longform` - a long post or an article.\n* `proofread` - a post which was proofread. In the proofread posts, the spellchecking is switched off. \n* `alias` - a nickname in an online community. \n* `search` - a search query. Search queries may not always be grammatically correct. Certain topics and items, that we may otherwise let pass, are tagged with the `search` format.\n\n`disable_spellcheck` (boolean) - determines whether the automatic spellchecking is to be disabled. Default: `false`.\n\n`lowercase_spellcheck_only` (boolean) - determines whether the automatic spellchecking is only to be applied to words in lowercase. Default: `false`\n\n`min_generic_frequency` (int) - allows excluding more esoteric terms; the valid values are 0 thru 10.\n\n`subscope` (boolean) - enables sub-scope parsing, for scenarios like hashtag, URL parsing, and obfuscated content (e.g. _ihateyou_). Default: `false`.\n\n`domain_factors` (set of pairs made of strings and numbers) - provides a session-scope cues for the domains of discourse. This is a powerful tool that allows tailoring the result based on the use case. The format is, family ID of the domain as a key and the multiplication factor as a value (e.g. _\"12345\": 5.0_). For example, when processing text looking for criminal activity, we may want to set domains relevant to drugs, firearms, crime, higher: `\"domain_factors\": {\"31058\": 5.0, \"45220\": 5.0, \"14112\": 5.0, \"14509\": 3.0, \"28309\": 5.0, \"43220\": 5.0, \"34581\": 5.0}`. The same device can be used to eliminate noise coming from domains we know are irrelevant by setting the factor to a value lower than 1. \n\n`when` (date string, format YYYY-MM-DD) - indicates when the utterance was uttered. (TO BE IMPLEMENTED) The purpose is to prune word senses that were not available at a particular point in time. For example, the words _troll_, _mail_, and _post_ had nothing to do with the Internet 300 years ago because there was no Internet, and so in a text that was written hundreds of years ago, we should ignore the word senses that emerged only recently.\n\n#### Output Customization\n\n`abuse` (boolean) - output instances of abusive content (default: `true`)\n\n`sentiment` (boolean) - output sentiment-bearing snippets (default: `true`)\n\n`document_sentiment` (boolean) - output document-level sentiment (default: `false`)\n\n`entities` (boolean) - output entities (default: `true`)\n\n`topics` (boolean) - output topics (default: `true`), with two more relevant settings:\n\n * `topic_stats` (boolean) - include coverage statistics in the topic output (default: `false`). When set, the topic is an object containing the attributes `topic` (string) and `coverage` (floating-point number). The coverage indicates a share of sentences touching the topic among all the sentences. \n * `optimize_topics` (boolean) - if `true`, the less specific topics are removed if they are parts of the more specific topics. For example, when the topic is `cryptocurrency`, the optimization removes `finance`. \n\n\n \n`words` (boolean) - output the lexical chunks / words for every sentence (default: `false`). In languages without white spaces (Chinese, Japanese, Thai), the tokens are tokenized words. In languages with compounds (e.g. German, Dutch, Norwegian), the compounds are split. \n\n `fetch_definitions` (boolean) - include definitions of the words in the output (default: `false`). Only relevant when the `words` setting is `true`\n\n`parses` (boolean) - output parse forests of phrases\n\n`deterministic` (boolean) - whether the n-best senses and n-best parses are to be output in addition to the detected sense. If `true`, only the detected sense will be output. Default: `true`\n\n`snippets` (boolean) - include the text snippets in the abuse, sentiment, and entities sections (default: `false`)\n\n`explain` (boolean) - if `true`, a reasoning for the abuse and sentiment snippets is provided when possible (see the `explanation` attribute)\n\n#### Standards and Formats\n\n`feature_standard` (string) - determines the standard used to output the features (grammar, style, semantics) in the response object. The standards we support are: \n\n* `ud`: [Universal Dependencies tags](https://universaldependencies.org/u/pos/) (default)\n* `penn`: [Penn treebank tags](https://www.ling.upenn.edu/courses/Fall_2003/ling001/penn_treebank_pos.html)\n* `native`: Tisane native feature codes\n* `description`: Tisane native feature descriptions\n\nOnly the native Tisane standards (codes and descriptions) support style and semantic features.\n\n`topic_standard` (string) - determines the standard used to output the topics in the response object. The standards we support are:\n\n* `iptc_code` - IPTC topic taxonomy code\n* `iptc_description` - IPTC topic taxonomy description\n* `iab_code` - IAB topic taxonomy code\n* `iab_description` - IAB topic taxonomy description\n* `native` - Tisane domain description, coming from the family description (default)\n\n`sentiment_analysis_type` (string) - (RESERVED) the type of the sentiment analysis strategy. The values are:\n\n* `products_and_services` - most common sentiment analysis of products and services\n* `entity` - sentiment analysis with entities as targets\n* `creative_content_review` - reviews of creative content\n* `political_essay` - political essays\n\n#### Signal to Noise Ranking\n\nWhen we're studying a bunch of posts commenting on an issue or an article, we may want to prioritise the ones more relevant to the topic, and containing more reason and logic than emotion. This is what the signal to noise ranking is meant to achieve.\n\nThe signal to noise ranking is made of two parts:\n\n1. Determine the most relevant concepts. This part may be omitted, depending on the use case scenario (e.g. we want to track posts most relevant to a particular set of issues). \n2. Rank the actual post in relevance to these concepts. \n\nTo determine the most relevant concepts, we need to analyze the headline or the article itself. The headline is usually enough. We need two additional settings:\n\n* `keyword_features` (an object of strings with string values) - determines the features to look for in a word. When such a feature is found, the family ID is added to the set of potentially relevant family IDs. \n* `stop_hypernyms` (an array of integers) - if a potentially relevant family ID has a hypernym listed in this setting, it will not be considered. For example, we extracted a set of nouns from the headline, but we may not be interested in abstractions or feelings. E.g. from a headline like _Fear and Loathing in Las Vegas_ we want _Las Vegas_ only. Optional.\n\nIf `keyword_features` is provided in the settings, the response will have a special attribute, `relevant`, containing a set of family IDs. \n\nAt the second stage, when ranking the actual posts or comments for relevance, this array is to be supplied among the settings. The ranking is boosted when the domain, the hypernyms, or the families related to those in the `relevant` array are mentioned, when negative and positive sentiment is linked to aspects, and penalized when the negativity is not linked to aspects, or abuse of any kind is found. The latter consideration may be disabled, e.g. when we are looking for specific criminal content. When the `abuse_not_noise` parameter is specified and set to `true`, the abuse is not penalized by the ranking calculations. \n\nTo sum it up, in order to calculate the signal to noise ranking: \n\n1. Analyze the headline with `keyword_features` and, optionally, `stop_hypernyms` in the settings. Obtain the `relevant` attribute.\n2. When analyzing the posts or the comments, specify the `relevant` attribute obtained in step 1. \n",
"in": "body",
"name": "body"
}
],
"responses": {
"200": {
"description": "Extract topics only:\n\n`{\"language\":\"en\", \"content\":\"An inertial force is a force that resists a change in velocity of an object.\", \"settings\":{}}`",
"examples": {
"application/json": {
"text": "An inertial force is a force that resists a change in velocity of an object.",
"topics": [
"physics"
]
}
}
}
},
"description": "The method analyzes the input, returning high-level and low-level metadata. \n\nThe request body is a JSON structure made of three elements:\n\n* `language` (string) - a standard IETF tag for the language to analyze\n* `content` (string) - a content to analyze\n* `settings` (structure) - the [settings](#settings-reference) to apply when analyzing\n\nExample: \n```json\n{\"language\": \"en\", \"content\":\"Hello Tisane API!\", \"settings\": {}}\n```\n\n\n### Response Reference\n\n* [Abusive Content](#abusive-content)\n* [Sentiment Analysis](#sentiment-analysis)\n* [Entities](#entities)\n + [Entity Types & Subtypes](#entity-types-and-subtypes)\n* [Topics](#topics)\n* [Advanced Low-Level Data: Sentences, Phrases, and Words](#advanced-low-level-data-sentences-phrases-and-words)\n + [Words](#words)\n - [Advanced](#advanced)\n + [Parse Trees & Phrases](#parse-trees-and-phrases)\n + [Context-Aware Spelling Correction](#context-aware-spelling-correction)\n\nThe response contains several sections which are displayed or hidden according to the [settings](#output-customization). \n\nThe common attributes are:\n\n* `text` (string) - the original input\n* `reduced_output` (boolean) - if the input is too big, and verbose information like the lexical chunk was requested, the verbose information will not be generated, and this flag will be set to `true` and returned as part of the response\n* `sentiment` (floating-point number) - a number in range -1 to 1 indicating the document-level sentiment. Only shown when `document_sentiment` [setting](#output-customization) is set to `true`.\n* `signal2noise` (floating-point number) - a signal to noise ranking of the text, in relation to the array of concepts specified in the `relevant` [setting](#signal-to-noise-ranking). Only shown when the `relevant` setting exists.\n\n### Abusive Content\n\nThe `abuse` section is an array of detected instances of content that may violate some terms of use. **NOTE**: the terms of use in online communities may vary, and so it is up to the administrators to determine whether the content is indeed abusive. For instance, it makes no sense to restrict sexual advances in a dating community, or censor profanities when it's accepted in the bulk of the community. \n\nThe section exists if instances of abuse are detected and the `abuse` [setting](#output-customization) is either omitted or set to `true`.\n\nEvery instance contains the following attributes:\n\n* `offset` (unsigned integer) - zero-based offset where the instance starts\n* `length` (unsigned integer) - length of the content\n* `sentence_index` (unsigned integer) - zero-based index of the sentence containing the instance\n* `text` (string) - fragment of text containing the instance (only included if the `snippets` [setting](#output-customization) is set to `true`)\n* `tags` (array of strings) - when exists, provides additional detail about the abuse. For instance, if the fragment is classified as an attempt to sell hard drugs, one of the tags will be _hard_drug_.\n* `type` (string) - the type of the abuse\n* `severity` (string) - how severe the abuse is. The levels of severity are `low`, `medium`, `high`, and `extreme`\n* `explanation` (string) - when available, provides rationale for the annotation; set the `explain` setting to `true` to enable.\n\nThe currently supported types are:\n\n* `personal_attack` - an insult / attack on the addressee, e.g. an instance of cyberbullying. Please note that an attack on a post or a point, or just negative sentiment is not the same as an insult. The line may be blurred at times. See [our Knowledge Base for more information](http://tisane.ai/knowledgebase/how-do-i-detect-personal-attacks/).\n* `bigotry` - hate speech aimed at one of the [protected classes](https://en.wikipedia.org/wiki/Protected_group). The hate speech detected is not just racial slurs, but, generally, hostile statements aimed at the group as a whole\n* `profanity` - profane language, regardless of the intent\n* `sexual_advances` - welcome or unwelcome attempts to gain some sort of sexual favor or gratification\n* `criminal_activity` - attempts to sell or procure restricted items, criminal services, issuing death threats, and so on\n* `external_contact` - attempts to establish contact or payment via external means of communication, e.g. phone, email, instant messaging (may violate the rules in certain communities, e.g. gig economy portals, e-commerce portals)\n* `adult_only` - activities restricted for minors (e.g. consumption of alcohol)\n* `mental_issues` - content indicative of suicidal thoughts or depression (LIMITED)\n* `spam` - (RESERVED) spam content\n* `generic` - undefined\n\n### Sentiment Analysis\n\nThe `sentiment_expressions` section is an array of detected fragments indicating the attitude towards aspects or entities. \n\nThe section exists if sentiment is detected and the `sentiment` [setting](#output-customization) is either omitted or set to `true`.\n\nEvery instance contains the following attributes:\n\n* `offset` (unsigned integer) - zero-based offset where the instance starts\n* `length` (unsigned integer) - length of the content\n* `sentence_index` (unsigned integer) - zero-based index of the sentence containing the instance\n* `text` (string) - fragment of text containing the instance (only included if the `snippets` setting is set to `true`)\n* `polarity` (string) - whether the attitude is `positive`, `negative`, or `mixed`. Additionally, there is a `default` sentiment used for cases when the entire snippet has been pre-classified. For instance, if a review is split into two portions, _What did you like?_ and _What did you not like?_, and the reviewer replies briefly, e.g. _The quiet. The service_, the utterance itself has no sentiment value. When the calling application is aware of the intended sentiment, the _default_ sentiment simply provides the targets / aspects, which will be then added the sentiment externally. \n* `targets` (array of strings) - when available, provides set of aspects and/or entities which are the targets of the sentiment. For instance, when the utterance is, _The breakfast was yummy but the staff is unfriendly_, the targets for the two sentiment expressions are `meal` and `staff`. Named entities may also be targets of the sentiment.\n* `reasons` (array of strings) - when available, provides reasons for the sentiment. In the example utterance above (_The breakfast was yummy but the staff is unfriendly_), the `reasons` array for the `staff` is `[\"unfriendly\"]`, while the `reasons` array for `meal` is `[\"tasty\"]`.\n* `explanation` (string) - when available, provides rationale for the sentiment; set the `explain` setting to `true` to enable.\n\nExample:\n\n```json\n\"sentiment_expressions\": [\n {\n \"sentence_index\": 0,\n \"offset\": 0,\n \"length\": 32,\n \"polarity\": \"positive\",\n \"reasons\": [\"close\"],\n \"targets\": [\"location\"]\n },\n {\n \"sentence_index\": 0,\n \"offset\": 38,\n \"length\": 29,\n \"polarity\": \"negative\",\n \"reasons\": [\"disrespectful\"],\n \"targets\": [\"staff\"]\n }\n ]\n```\n\n### Entities\n\nThe `entities_summary` section is an array of named entity objects detected in the text. \n\nThe section exists if named entities are detected and the `entities` [setting](#output-customization) is either omitted or set to `true`.\n\nEvery entity contains the following attributes:\n\n* `name` (string) - the most complete name of the entity in the text of all the mentions\n* `ref_lemma` (string) - when available, the dictionary form of the entity in the reference language (English) regardless of the input language\n* `type` (string) - a string or an array of strings specifying the type of the entity, such as `person`, `organization`, `numeric`, `amount_of_money`, `place`. Certain entities, like countries, may have several types (because a country is both a `place` and an `organization`).\n* `subtype` (string) - a string indicating the subtype of the entity\n* `mentions` (array of objects) - a set of instances where the entity was mentioned in the text\n\nEvery mention contains the following attributes:\n\n* `offset` (unsigned integer) - zero-based offset where the instance starts\n* `length` (unsigned integer) - length of the content\n* `sentence_index` (unsigned integer) - zero-based index of the sentence containing the instance\n* `text` (string) - fragment of text containing the instance (only included if the `snippets` setting is set to `true`)\n\n\nExample:\n```json\n \"entities_summary\": [\n {\n \"type\": \"person\",\n \"name\": \"John Smith\",\n \"ref_lemma\": \"John Smith\",\n \"mentions\": [\n {\n \"sentence_index\": 0,\n \"offset\": 0,\n \"length\": 10 }\n ]\n }\n ,\n {\n \"type\": [ \"organization\", \"place\" ]\n ,\n \"name\": \"UK\",\n \"ref_lemma\": \"U.K.\",\n \"mentions\": [\n {\n \"sentence_index\": 0,\n \"offset\": 40,\n \"length\": 2 }\n ]\n }\n ]\n```\n\n#### Entity Types and Subtypes\n\nThe currently supported types are: \n\n* `person`, with optional subtypes: `fictional_character`, `important_person`, `spiritual_being`\n* `organization` (note that a country is both an organization and a place)\n* `place`\n* `time_range`\n* `date`\n* `time`\n* `hashtag`\n* `email`\n* `amount_of_money`\n* `phone` phone number, either domestic or international, in a variety of formats\n* `role` (a social role, e.g. position in an organization)\n* `software`\n* `website` (URL), with an optional subtype: `tor` for Onion links; note that web services may also have the `software` type assigned\n* `weight`\n* `bank_account` only IBAN format is supported; subtypes: `iban`\n* `credit_card`, with optional subtypes: `visa`, `mastercard`, `american_express`, `diners_club`, `discovery`, `jcb`, `unionpay`\n* `coordinates` (GPS coordinates)\n* `credential`, with optional subtypes: `md5`, `sha-1`\n* `crypto`, with optional subtypes: `bitcoin`, `ethereum`, `monero`, `monero_payment_id`, `litecoin`, `dash`\n* `event`\n* `file` only Windows pathnames are supported; subtypes: `windows`, `facebook` (for images downloaded from Facebook)\n* `flight_code`\n* `identifier`\n* `ip_address`, subtypes: `v4`, `v6`\n* `mac_address`\n* `numeric` (an unclassified numeric entity)\n* `username`\n\n\n### Topics\n\nThe `topics` section is an array of topics (subjects, domains, themes in other terms) detected in the text. \n\nThe section exists if topics are detected and the `topics` [setting](#output-customization) is either omitted or set to `true`.\n\nBy default, a topic is a string. If `topic_stats` [setting](#output-customization) is set to `true`, then every entry in the array contains:\n\n* `topic` (string) - the topic itself\n* `coverage` (floating-point number) - a number between 0 and 1, indicating the ratio between the number of sentences where the topic is detected to the total number of sentences\n\n### Advanced Low-Level Data: Sentences, Phrases, and Words\n\nTisane allows obtaining more in-depth data, specifically:\n\n* sentences and their corrected form, if a misspelling was detected\n* lexical chunks and their grammatical and stylistic features\n* parse trees and phrases\n\nThe `sentence_list` section is generated if the `words` or the `parses` [setting](#output-customization) is set to `true`. \n\nEvery sentence structure in the list contains:\n\n* `offset` (unsigned integer) - zero-based offset where the sentence starts\n* `length` (unsigned integer) - length of the sentence\n* `text` (string) - the sentence itself\n* `corrected_text` (string) - if a misspelling was detected and the spellchecking is active, contains the automatically corrected text\n* `words` (array of structures) - if `words` [setting](#output-customization) is set to `true`, generates extended information about every lexical chunk. (The term \"word\" is used for the sake of simplicity, however, it may not be linguistically correct to equate lexical chunks with words.)\n* `parse_tree` (object) - if `parses` [setting](#output-customization) is set to `true`, generates information about the parse tree and the phrases detected in the sentence.\n* `nbest_parses` (array of parse objects) if `parses` [setting](#output-customization) is set to `true` and `deterministic` [setting](#output-customization) is set to `false`, generates information about the parse trees that were deemed close enough to the best one but not the best. \n\n#### Words\n\nEvery lexical chunk (\"word\") structure in the `words` array contains:\n\n* `type` (string) - the type of the element: `punctuation` for punctuation marks, `numeral` for numerals, or `word` for everything else\n* `text` (string) - the text\n* `offset` (unsigned integer) - zero-based offset where the element starts\n* `length` (unsigned integer) - length of the element\n* `corrected_text` (string) - if a misspelling is detected, the corrected form\n* `lettercase` (string) - the original letter case: `upper`, `capitalized`, or `mixed`. If lowercase or no case, the attribute is omitted.\n* `stopword` (boolean) - determines whether the word is a [stopword](https://en.wikipedia.org/wiki/Stop_words)\n* `grammar` (array of strings or structures) - generates the list of grammar features associated with the `word`. If the `feature_standard` [setting] is defined as `native`, then every feature is an object containing a numeral (`index`) and a string (`value`). Otherwise, every feature is a plain string\n\n##### Advanced\n\nFor lexical chunks only:\n\n* `role` (string) - semantic role, like `agent` or `patient`. Note that in passive voice, the semantic roles are reverse to the syntactic roles. E.g. in a sentence like _The car was driven by David_, _car_ is the patient, and _David_ is the agent.\n* `numeric_value` (floating-point number) - the numeric value, if the chunk has a value associated with it\n* `family` (integer number) - the ID of the family associated with the disambiguated word-sense of the lexical chunk\n* `definition` (string) - the definition of the family, if the `fetch_definitions` [setting](#output-customization) is set to `true`\n* `lexeme` (integer number) - the ID of the lexeme entry associated with the disambiguated word-sense of the lexical chunk\n* `nondictionary_pattern` (integer number) - the ID of a non-dictionary pattern that matched, if the word was not in the language model but was classified by the nondictionary heuristics\n* `style` (array of strings or structures) - generates the list of style features associated with the `word`. Only if the `feature_standard` [setting] is set to `native` or `description`\n* `semantics` (array of strings or structures) - generates the list of semantic features associated with the `word`. Only if the `feature_standard` [setting] is set to `native` or `description`\n* `segmentation` (structure) - generates info about the selected segmentation, if there are several possibilities to segment the current lexical chunk and the `deterministic` [setting] is set to `false`. A segmentation is simply an array of `word` structures. \n* `other_segmentations` (array of structures) - generates info about the segmentations deemed incorrect during the disambiguation process. Every entry has the same structure as the `segmentation` structure.\n* `nbest_senses` (array of structures) - when the `deterministic` [setting] is set to `false`, generates a set of hypotheses that were deemed incorrect by the disambiguation process. Every hypothesis contains the following attributes: `grammar`, `style`, and `semantics`, identical in structure to their counterparts above; and `senses`, an array of word-senses associated with every hypothesis. Every sense has a `family`, which is an ID of the associated family; and, if the `fetch_definitions` [setting](#output-customization) is set to `true`, `definition` and `ref_lemma` of that family.\n\nFor punctuation marks only: \n\n* `id` (integer number) - the ID of the punctuation mark\n* `behavior` (string) - the behavior code of the punctuation mark. Values: `sentenceTerminator`, `genericComma`, `bracketStart`, `bracketEnd`, `scopeDelimiter`, `hyphen`, `quoteStart`, `quoteEnd`, `listComma` (for East-Asian enumeration commas like _、_)\n\n#### Parse Trees and Phrases\n\nEvery parse tree, or more accurately, parse forest, is a collection of phrases, hierarchically linked to each other. \n\nAt the top level of the parse, there is an array of root phrases under the `phrases` element and the numeric `id` associated with it. Every phrase may have children phrases. Every phrase has the following attributes:\n\n* `type` (string) - a [Penn treebank phrase tag](http://nliblog.com/wiki/knowledge-base-2/nlp-1-natural-language-processing/penn-treebank/penn-treebank-phrase-level-tags/) denoting the type of the phrase, e.g. _S_, _VP_, _NP_, etc.\n* `family` (integer number) - an ID of the phrase family\n* `offset` (unsigned integer) - a zero-based offset where the phrase starts\n* `length` (unsigned integer) - the span of the phrase\n* `role` (string) - the semantic role of the phrase, if any, analogous to that of the words\n* `text` (string) - the phrase text, where the phrase members are delimited by the vertical bar character. Children phrases are enclosed in brackets. E.g., _driven|by|David_ or _(The|car)|was|(driven|by|David)_.\n\nExample:\n\n```json\n\"parse_tree\": {\n\"id\": 4,\n\"phrases\": [\n{\n \"type\": \"S\",\n \"family\": 1451,\n \"offset\": 0,\n \"length\": 27,\n \"text\": \"(The|car)|was|(driven|by|David)\",\n \"children\": [\n {\n \"type\": \"NP\",\n \"family\": 1081,\n \"offset\": 0,\n \"length\": 7,\n \"text\": \"The|car\",\n \"role\": \"patient\"\n },\n {\n \"type\": \"VP\",\n \"family\": 1172,\n \"offset\": 12,\n \"length\": 15,\n \"text\": \"driven|by|David\",\n \"role\": \"verb\"\n }\n ]\n}\n```\n\n#### Context-Aware Spelling Correction\n\nTisane supports automatic, context-aware spelling correction. Whether it's a misspelling or a purported obfuscation, Tisane attempts to deduce the intended meaning, if the language model does not recognize the word. \n\nWhen or if it's found, Tisane adds the `corrected_text` attribute to the word (if the words / lexical chunks are returned) and the sentence (if the sentence text is generated). Sentence-level `corrected_text` is displayed if `words` or `parses` are set to _true_.\n\nNote that as Tisane works with large dictionaries, you may need to exclude more esoteric terms by using the `min_generic_frequency` setting. \n\nNote that **the invocation of spell-checking does not depend on whether the sentences and the words sections are generated in the output**. The spellchecking can be disabled by [setting](#content-cues-and-instructions) `disable_spellcheck` to `true`. Another option is to enable the spellchecking for lowercase words only, thus excluding potential proper nouns in languages that support capitalization; to avoid spell-checking capitalized and uppercase words, set `lowercase_spellcheck_only` to `true`.\n\n",
"operationId": "5a3b7177a3511b11cc29265c",
"summary": "Analyze text"
}
},
"/similarity": {
"post": {
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"parameters": [
{
"description": "The request body contains the following nodes:\n\n* `content1` (string) - a text fragment\n* `language1` (string) - the IETF language code for `content1`\n* `content2` (string) - a text fragment to compare with\n* `language2` (string) - the IETF language code for `content2`\n* `settings` (structure) - the settings object; same specs as in [/parse method](/docs/services/5a3b6668a3511b11cc292655/operations/5a3b7177a3511b11cc29265c?#settings-reference)",
"in": "body",
"name": "body"
}
],
"responses": {
"200": {
"description": "A number in range 0 thru 1 indicating the similarity of the submitted text fragments.",
"examples": {
"application/json": 0.8341
}
}
},
"description": "Calculate semantic similarity between two text fragments, in the same language or in two different languages.",
"operationId": "semantic-similarity",
"summary": "Semantic similarity"
}
},
"/text2picture": {
"post": {
"consumes": [
"application/json"
],
"produces": [
"application/json"
],
"parameters": [
{
"description": "The request body is a JSON structure made of two elements:\n\n* `language` (string) - a standard IETF tag for the language to analyze\n* `content` (string) - a content to analyze\n\n",
"in": "body",
"name": "body"
}
],
"responses": {
"200": {
"description": "A JSON-escaped URL of a Creative Commons image on Wikimedia servers, if exists. Null if no image is found."
}
},
"description": "Finds a URL of an image (Creative Commons) best describing the text. \n\n*WARNING*: may be slow, as Wikimedia servers are queried.",
"operationId": "generate-illustration-for-text",
"summary": "Generate image from text"
}
}
},
"definitions": {
"settings": {
"properties": {
"abuse": {
"type": "boolean"
}
},
"type": "object"
}
},
"securityDefinitions": {
"apiKeyHeader": {
"in": "header",
"name": "Ocp-Apim-Subscription-Key",
"type": "apiKey"
},
"apiKeyQuery": {
"in": "query",
"name": "subscription-key",
"type": "apiKey"
}
},
"security": [
{
"apiKeyHeader": []
},
{
"apiKeyQuery": []
}
],
"x-components": {}
}