Skip to content

Commit

Permalink
Do not include a default language. (#2985)
Browse files Browse the repository at this point in the history
The language API auto-detects language if not is not provided, so defaulting to English is incorrect.
  • Loading branch information
lukesneeringer authored Feb 6, 2017
1 parent ec1776c commit 65213a1
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 13 deletions.
11 changes: 4 additions & 7 deletions language/google/cloud/language/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,6 @@
from google.cloud.language.syntax import Token


DEFAULT_LANGUAGE = 'en-US'
"""Default document language, English."""


Annotations = collections.namedtuple(
'Annotations',
'sentences tokens sentiment entities')
Expand Down Expand Up @@ -93,7 +89,7 @@ class Document(object):
:type language: str
:param language: (Optional) The language of the document text.
Defaults to :data:`DEFAULT_LANGUAGE`.
Defaults to None (auto-detect).
:type encoding: str
:param encoding: (Optional) The encoding of the document text.
Expand All @@ -115,7 +111,7 @@ class Document(object):
"""HTML document type."""

def __init__(self, client, content=None, gcs_url=None, doc_type=PLAIN_TEXT,
language=DEFAULT_LANGUAGE, encoding=Encoding.UTF8):
language=None, encoding=Encoding.UTF8):
if content is not None and gcs_url is not None:
raise ValueError('A Document cannot contain both local text and '
'a link to text in a Google Cloud Storage object')
Expand All @@ -139,8 +135,9 @@ def _to_dict(self):
"""
info = {
'type': self.doc_type,
'language': self.language,
}
if self.language is not None:
info['language'] = self.language
if self.content is not None:
info['content'] = self.content
elif self.gcs_url is not None:
Expand Down
14 changes: 8 additions & 6 deletions language/unit_tests/test_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,8 +125,8 @@ def test_constructor_defaults(self):
self.assertIs(document.client, client)
self.assertEqual(document.content, content)
self.assertIsNone(document.gcs_url)
self.assertIsNone(document.language)
self.assertEqual(document.doc_type, MUT.Document.PLAIN_TEXT)
self.assertEqual(document.language, MUT.DEFAULT_LANGUAGE)
self.assertEqual(document.encoding, MUT.Encoding.UTF8)

def test_constructor_explicit(self):
Expand All @@ -146,6 +146,13 @@ def test_constructor_explicit(self):
self.assertEqual(document.language, language)
self.assertEqual(document.encoding, MUT.Encoding.UTF32)

def test_constructor_explicit_language(self):
client = object()
content = 'abc'

This comment has been minimized.

Copy link
@monattar

monattar Feb 7, 2017

It would be great to test with a language other than EN, for example Spanish, to see that auto-detect of language works.

document = self._make_one(client, content, language='en-US')
self.assertEqual(document.language, 'en-US')
self.assertEqual(document._to_dict()['language'], 'en-US')

def test_constructor_no_text(self):
with self.assertRaises(ValueError):
self._make_one(None, content=None, gcs_url=None)
Expand All @@ -162,7 +169,6 @@ def test__to_dict_with_content(self):
info = document._to_dict()
self.assertEqual(info, {
'content': content,
'language': document.language,
'type': klass.PLAIN_TEXT,
})

Expand All @@ -173,7 +179,6 @@ def test__to_dict_with_gcs(self):
info = document._to_dict()
self.assertEqual(info, {
'gcsContentUri': gcs_url,
'language': document.language,
'type': klass.PLAIN_TEXT,
})

Expand All @@ -183,7 +188,6 @@ def test__to_dict_with_no_content(self):
document.content = None # Manually unset the content.
info = document._to_dict()
self.assertEqual(info, {
'language': document.language,
'type': klass.PLAIN_TEXT,
})

Expand All @@ -203,12 +207,10 @@ def _expected_data(content, encoding_type=None,
extract_sentiment=False,
extract_entities=False,
extract_syntax=False):
from google.cloud.language.document import DEFAULT_LANGUAGE
from google.cloud.language.document import Document

expected = {
'document': {
'language': DEFAULT_LANGUAGE,
'type': Document.PLAIN_TEXT,
'content': content,
},
Expand Down

0 comments on commit 65213a1

Please sign in to comment.