From c23b1a4c8cb4ac87c9e71703285393e5904e2a8a Mon Sep 17 00:00:00 2001
From: Piotr Chromiec <piotr.chromiec@rtbhouse.com>
Date: Thu, 9 Feb 2017 12:08:02 -0500
Subject: [PATCH] BUG: fix read_gbq lost precision for longs above 2^53 and
 floats above 10k

closes #14020
closes #14305

Author: Piotr Chromiec <piotr.chromiec@rtbhouse.com>

Closes #14064 from tworec/read_gbq_full_long_support and squashes the following commits:

788ccee [Piotr Chromiec] BUG: fix read_gbq lost numeric precision
---
 doc/source/install.rst          |  13 +-
 doc/source/io.rst               |  61 +++++--
 doc/source/whatsnew/v0.20.0.txt |   5 +-
 pandas/io/gbq.py                |  24 +--
 pandas/io/tests/test_gbq.py     | 288 +++++++++++++++++++++-----------
 5 files changed, 263 insertions(+), 128 deletions(-)

diff --git a/doc/source/install.rst b/doc/source/install.rst
index 158a6e5562b7a..4b3ea19624a0e 100644
--- a/doc/source/install.rst
+++ b/doc/source/install.rst
@@ -250,9 +250,9 @@ Optional Dependencies
 * `Feather Format <https://github.com/wesm/feather>`__: necessary for feather-based storage, version 0.3.1 or higher.
 * `SQLAlchemy <http://www.sqlalchemy.org>`__: for SQL database support. Version 0.8.1 or higher recommended. Besides SQLAlchemy, you also need a database specific driver. You can find an overview of supported drivers for each SQL dialect in the `SQLAlchemy docs <http://docs.sqlalchemy.org/en/latest/dialects/index.html>`__. Some common drivers are:
 
-    - `psycopg2 <http://initd.org/psycopg/>`__: for PostgreSQL
-    - `pymysql <https://github.com/PyMySQL/PyMySQL>`__: for MySQL.
-    - `SQLite <https://docs.python.org/3.5/library/sqlite3.html>`__: for SQLite, this is included in Python's standard library by default.
+  * `psycopg2 <http://initd.org/psycopg/>`__: for PostgreSQL
+  * `pymysql <https://github.com/PyMySQL/PyMySQL>`__: for MySQL.
+  * `SQLite <https://docs.python.org/3.5/library/sqlite3.html>`__: for SQLite, this is included in Python's standard library by default.
 
 * `matplotlib <http://matplotlib.org/>`__: for plotting
 * For Excel I/O:
@@ -272,11 +272,8 @@ Optional Dependencies
   <http://www.vergenet.net/~conrad/software/xsel/>`__, or `xclip
   <https://github.com/astrand/xclip/>`__: necessary to use
   :func:`~pandas.read_clipboard`. Most package managers on Linux distributions will have ``xclip`` and/or ``xsel`` immediately available for installation.
-* Google's `python-gflags <<https://github.com/google/python-gflags/>`__ ,
-  `oauth2client <https://github.com/google/oauth2client>`__ ,
-  `httplib2 <http://pypi.python.org/pypi/httplib2>`__
-  and `google-api-python-client <http://github.com/google/google-api-python-client>`__
-  : Needed for :mod:`~pandas.io.gbq`
+* For Google BigQuery I/O - see :ref:`here <io.bigquery_deps>`.
+
 * `Backports.lzma <https://pypi.python.org/pypi/backports.lzma/>`__: Only for Python 2, for writing to and/or reading from an xz compressed DataFrame in CSV; Python 3 support is built into the standard library.
 * One of the following combinations of libraries is needed to use the
   top-level :func:`~pandas.read_html` function:
diff --git a/doc/source/io.rst b/doc/source/io.rst
index 4c78758a0e2d2..22eac33a715ba 100644
--- a/doc/source/io.rst
+++ b/doc/source/io.rst
@@ -39,7 +39,7 @@ object.
     * :ref:`read_json<io.json_reader>`
     * :ref:`read_msgpack<io.msgpack>`
     * :ref:`read_html<io.read_html>`
-    * :ref:`read_gbq<io.bigquery_reader>`
+    * :ref:`read_gbq<io.bigquery>`
     * :ref:`read_stata<io.stata_reader>`
     * :ref:`read_sas<io.sas_reader>`
     * :ref:`read_clipboard<io.clipboard>`
@@ -55,7 +55,7 @@ The corresponding ``writer`` functions are object methods that are accessed like
     * :ref:`to_json<io.json_writer>`
     * :ref:`to_msgpack<io.msgpack>`
     * :ref:`to_html<io.html>`
-    * :ref:`to_gbq<io.bigquery_writer>`
+    * :ref:`to_gbq<io.bigquery>`
     * :ref:`to_stata<io.stata_writer>`
     * :ref:`to_clipboard<io.clipboard>`
     * :ref:`to_pickle<io.pickle>`
@@ -4648,16 +4648,11 @@ DataFrame with a shape and data types derived from the source table.
 Additionally, DataFrames can be inserted into new BigQuery tables or appended
 to existing tables.
 
-You will need to install some additional dependencies:
-
-- Google's `python-gflags <https://github.com/google/python-gflags/>`__
-- `httplib2 <http://pypi.python.org/pypi/httplib2>`__
-- `google-api-python-client <http://github.com/google/google-api-python-client>`__
-
 .. warning::
 
    To use this module, you will need a valid BigQuery account. Refer to the
-   `BigQuery Documentation <https://cloud.google.com/bigquery/what-is-bigquery>`__ for details on the service itself.
+   `BigQuery Documentation <https://cloud.google.com/bigquery/what-is-bigquery>`__
+   for details on the service itself.
 
 The key functions are:
 
@@ -4671,7 +4666,44 @@ The key functions are:
 
 .. currentmodule:: pandas
 
-.. _io.bigquery_reader:
+
+Supported Data Types
+++++++++++++++++++++
+
+Pandas supports all these `BigQuery data types <https://cloud.google.com/bigquery/data-types>`__:
+``STRING``, ``INTEGER`` (64bit), ``FLOAT`` (64 bit), ``BOOLEAN`` and
+``TIMESTAMP`` (microsecond precision). Data types ``BYTES`` and ``RECORD``
+are not supported.
+
+Integer and boolean ``NA`` handling
++++++++++++++++++++++++++++++++++++
+
+.. versionadded:: 0.20
+
+Since all columns in BigQuery queries are nullable, and NumPy lacks of ``NA``
+support for integer and boolean types, this module will store ``INTEGER`` or
+``BOOLEAN`` columns with at least one ``NULL`` value as ``dtype=object``.
+Otherwise those columns will be stored as ``dtype=int64`` or ``dtype=bool``
+respectively.
+
+This is opposite to default pandas behaviour which will promote integer
+type to float in order to store NAs. See the :ref:`gotchas<gotchas.intna>`
+for detailed explaination.
+
+While this trade-off works well for most cases, it breaks down for storing
+values greater than 2**53. Such values in BigQuery can represent identifiers
+and unnoticed precision lost for identifier is what we want to avoid.
+
+.. _io.bigquery_deps:
+
+Dependencies
+++++++++++++
+
+This module requires following additional dependencies:
+
+- `httplib2 <https://github.com/httplib2/httplib2>`__: HTTP client
+- `google-api-python-client <http://github.com/google/google-api-python-client>`__: Google's API client
+- `oauth2client <https://github.com/google/oauth2client>`__: authentication and authorization for Google's API
 
 .. _io.bigquery_authentication:
 
@@ -4686,7 +4718,7 @@ Is possible to authenticate with either user account credentials or service acco
 Authenticating with user account credentials is as simple as following the prompts in a browser window
 which will be automatically opened for you. You will be authenticated to the specified
 ``BigQuery`` account using the product name ``pandas GBQ``. It is only possible on local host.
-The remote authentication using user account credentials is not currently supported in Pandas.
+The remote authentication using user account credentials is not currently supported in pandas.
 Additional information on the authentication mechanism can be found
 `here <https://developers.google.com/identity/protocols/OAuth2#clientside/>`__.
 
@@ -4695,8 +4727,6 @@ is particularly useful when working on remote servers (eg. jupyter iPython noteb
 Additional information on service accounts can be found
 `here <https://developers.google.com/identity/protocols/OAuth2#serviceaccount>`__.
 
-You will need to install an additional dependency: `oauth2client <https://github.com/google/oauth2client>`__.
-
 Authentication via ``application default credentials`` is also possible. This is only valid
 if the parameter ``private_key`` is not provided. This method also requires that
 the credentials can be fetched from the environment the code is running in.
@@ -4716,6 +4746,7 @@ Additional information on
    A private key can be obtained from the Google developers console by clicking
    `here <https://console.developers.google.com/permissions/serviceaccounts>`__. Use JSON key type.
 
+.. _io.bigquery_reader:
 
 Querying
 ''''''''
@@ -4775,7 +4806,6 @@ For more information about query configuration parameters see
 
 .. _io.bigquery_writer:
 
-
 Writing DataFrames
 ''''''''''''''''''
 
@@ -4865,6 +4895,8 @@ For example:
    often as the service seems to be changing and evolving. BiqQuery is best for analyzing large
    sets of data quickly, but it is not a direct replacement for a transactional database.
 
+.. _io.bigquery_create_tables:
+
 Creating BigQuery Tables
 ''''''''''''''''''''''''
 
@@ -4894,6 +4926,7 @@ produce the dictionary representation schema of the specified pandas DataFrame.
    the new table with a different name. Refer to
    `Google BigQuery issue 191 <https://code.google.com/p/google-bigquery/issues/detail?id=191>`__.
 
+
 .. _io.stata:
 
 Stata Format
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index e765cdef4d219..9eae2b7a33923 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -369,7 +369,9 @@ Other API Changes
 - ``pd.read_csv()`` will now raise a ``ValueError`` for the C engine if the quote character is larger than than one byte (:issue:`11592`)
 - ``inplace`` arguments now require a boolean value, else a ``ValueError`` is thrown (:issue:`14189`)
 - ``pandas.api.types.is_datetime64_ns_dtype`` will now report ``True`` on a tz-aware dtype, similar to ``pandas.api.types.is_datetime64_any_dtype``
- - ``DataFrame.asof()`` will return a null filled ``Series`` instead the scalar ``NaN`` if a match is not found (:issue:`15118`)
+- ``DataFrame.asof()`` will return a null filled ``Series`` instead the scalar ``NaN`` if a match is not found (:issue:`15118`)
+- The :func:`pd.read_gbq` method now stores ``INTEGER`` columns as ``dtype=object`` if they contain ``NULL`` values. Otherwise they are stored as ``int64``. This prevents precision lost for integers greather than 2**53. Furthermore ``FLOAT`` columns with values above 10**4 are no more casted to ``int64`` which also caused precision lost (:issue: `14064`, :issue:`14305`).
+
 .. _whatsnew_0200.deprecations:
 
 Deprecations
@@ -439,6 +441,7 @@ Bug Fixes
 
 - Bug in ``DataFrame.loc`` with indexing a ``MultiIndex`` with a ``Series`` indexer (:issue:`14730`)
 
+
 - Bug in ``pd.read_msgpack()`` in which ``Series`` categoricals were being improperly processed (:issue:`14901`)
 - Bug in ``Series.ffill()`` with mixed dtypes containing tz-aware datetimes. (:issue:`14956`)
 
diff --git a/pandas/io/gbq.py b/pandas/io/gbq.py
index 966f53e9d75ef..76c228418a616 100644
--- a/pandas/io/gbq.py
+++ b/pandas/io/gbq.py
@@ -603,18 +603,14 @@ def _parse_data(schema, rows):
     # see:
     # http://pandas.pydata.org/pandas-docs/dev/missing_data.html
     # #missing-data-casting-rules-and-indexing
-    dtype_map = {'INTEGER': np.dtype(float),
-                 'FLOAT': np.dtype(float),
-                 # This seems to be buggy without nanosecond indicator
+    dtype_map = {'FLOAT': np.dtype(float),
                  'TIMESTAMP': 'M8[ns]'}
 
     fields = schema['fields']
     col_types = [field['type'] for field in fields]
     col_names = [str(field['name']) for field in fields]
     col_dtypes = [dtype_map.get(field['type'], object) for field in fields]
-    page_array = np.zeros((len(rows),),
-                          dtype=lzip(col_names, col_dtypes))
-
+    page_array = np.zeros((len(rows),), dtype=lzip(col_names, col_dtypes))
     for row_num, raw_row in enumerate(rows):
         entries = raw_row.get('f', [])
         for col_num, field_type in enumerate(col_types):
@@ -628,7 +624,9 @@ def _parse_data(schema, rows):
 def _parse_entry(field_value, field_type):
     if field_value is None or field_value == 'null':
         return None
-    if field_type == 'INTEGER' or field_type == 'FLOAT':
+    if field_type == 'INTEGER':
+        return int(field_value)
+    elif field_type == 'FLOAT':
         return float(field_value)
     elif field_type == 'TIMESTAMP':
         timestamp = datetime.utcfromtimestamp(float(field_value))
@@ -757,10 +755,14 @@ def read_gbq(query, project_id=None, index_col=None, col_order=None,
                 'Column order does not match this DataFrame.'
             )
 
-    # Downcast floats to integers and objects to booleans
-    # if there are no NaN's. This is presently due to a
-    # limitation of numpy in handling missing data.
-    final_df._data = final_df._data.downcast(dtypes='infer')
+    # cast BOOLEAN and INTEGER columns from object to bool/int
+    # if they dont have any nulls
+    type_map = {'BOOLEAN': bool, 'INTEGER': int}
+    for field in schema['fields']:
+        if field['type'] in type_map and \
+                final_df[field['name']].notnull().all():
+            final_df[field['name']] = \
+                final_df[field['name']].astype(type_map[field['type']])
 
     connector.print_elapsed_seconds(
         'Total time taken',
diff --git a/pandas/io/tests/test_gbq.py b/pandas/io/tests/test_gbq.py
index 457e2d218cb33..1157482d7ae67 100644
--- a/pandas/io/tests/test_gbq.py
+++ b/pandas/io/tests/test_gbq.py
@@ -46,6 +46,11 @@ def _skip_if_no_project_id():
             "Cannot run integration tests without a project id")
 
 
+def _skip_local_auth_if_in_travis_env():
+    if _in_travis_environment():
+        raise nose.SkipTest("Cannot run local auth in travis environment")
+
+
 def _skip_if_no_private_key_path():
     if not _get_private_key_path():
         raise nose.SkipTest("Cannot run integration tests without a "
@@ -248,14 +253,14 @@ def test_generate_bq_schema_deprecated():
         gbq.generate_bq_schema(df)
 
 
-class TestGBQConnectorIntegration(tm.TestCase):
+class TestGBQConnectorIntegrationWithLocalUserAccountAuth(tm.TestCase):
 
     def setUp(self):
         _setup_common()
         _skip_if_no_project_id()
+        _skip_local_auth_if_in_travis_env()
 
-        self.sut = gbq.GbqConnector(_get_project_id(),
-                                    private_key=_get_private_key_path())
+        self.sut = gbq.GbqConnector(_get_project_id())
 
     def test_should_be_able_to_make_a_connector(self):
         self.assertTrue(self.sut is not None,
@@ -293,8 +298,7 @@ def test_get_application_default_credentials_returns_credentials(self):
         self.assertTrue(isinstance(credentials, GoogleCredentials))
 
 
-class TestGBQConnectorServiceAccountKeyPathIntegration(tm.TestCase):
-
+class TestGBQConnectorIntegrationWithServiceAccountKeyPath(tm.TestCase):
     def setUp(self):
         _setup_common()
 
@@ -325,16 +329,15 @@ def test_should_be_able_to_get_results_from_query(self):
         self.assertTrue(pages is not None)
 
 
-class TestGBQConnectorServiceAccountKeyContentsIntegration(tm.TestCase):
-
+class TestGBQConnectorIntegrationWithServiceAccountKeyContents(tm.TestCase):
     def setUp(self):
         _setup_common()
 
         _skip_if_no_project_id()
-        _skip_if_no_private_key_path()
+        _skip_if_no_private_key_contents()
 
         self.sut = gbq.GbqConnector(_get_project_id(),
-                                    private_key=_get_private_key_path())
+                                    private_key=_get_private_key_contents())
 
     def test_should_be_able_to_make_a_connector(self):
         self.assertTrue(self.sut is not None,
@@ -373,9 +376,9 @@ def test_import_google_api_python_client(self):
             from googleapiclient.discovery import build  # noqa
             from googleapiclient.errors import HttpError  # noqa
 
-    def test_should_return_bigquery_integers_as_python_floats(self):
+    def test_should_return_bigquery_integers_as_python_ints(self):
         result = gbq._parse_entry(1, 'INTEGER')
-        tm.assert_equal(result, float(1))
+        tm.assert_equal(result, int(1))
 
     def test_should_return_bigquery_floats_as_python_floats(self):
         result = gbq._parse_entry(1, 'FLOAT')
@@ -403,15 +406,15 @@ def test_to_gbq_with_no_project_id_given_should_fail(self):
 
     def test_read_gbq_with_no_project_id_given_should_fail(self):
         with tm.assertRaises(TypeError):
-            gbq.read_gbq('SELECT "1" as NUMBER_1')
+            gbq.read_gbq('SELECT 1')
 
     def test_that_parse_data_works_properly(self):
         test_schema = {'fields': [
-            {'mode': 'NULLABLE', 'name': 'VALID_STRING', 'type': 'STRING'}]}
+            {'mode': 'NULLABLE', 'name': 'valid_string', 'type': 'STRING'}]}
         test_page = [{'f': [{'v': 'PI'}]}]
 
         test_output = gbq._parse_data(test_schema, test_page)
-        correct_output = DataFrame({'VALID_STRING': ['PI']})
+        correct_output = DataFrame({'valid_string': ['PI']})
         tm.assert_frame_equal(test_output, correct_output)
 
     def test_read_gbq_with_invalid_private_key_json_should_fail(self):
@@ -435,12 +438,12 @@ def test_read_gbq_with_empty_private_key_file_should_fail(self):
                              private_key=empty_file_path)
 
     def test_read_gbq_with_corrupted_private_key_json_should_fail(self):
-        _skip_if_no_private_key_path()
+        _skip_if_no_private_key_contents()
 
         with tm.assertRaises(gbq.InvalidPrivateKeyFormat):
             gbq.read_gbq(
                 'SELECT 1', project_id='x',
-                private_key=re.sub('[a-z]', '9', _get_private_key_path()))
+                private_key=re.sub('[a-z]', '9', _get_private_key_contents()))
 
 
 class TestReadGBQIntegration(tm.TestCase):
@@ -475,112 +478,207 @@ def tearDown(self):
         pass
 
     def test_should_read_as_user_account(self):
-        if _in_travis_environment():
-            raise nose.SkipTest("Cannot run local auth in travis environment")
+        _skip_local_auth_if_in_travis_env()
 
-        query = 'SELECT "PI" as VALID_STRING'
+        query = 'SELECT "PI" AS valid_string'
         df = gbq.read_gbq(query, project_id=_get_project_id())
-        tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']}))
+        tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']}))
 
     def test_should_read_as_service_account_with_key_path(self):
         _skip_if_no_private_key_path()
-        query = 'SELECT "PI" as VALID_STRING'
+        query = 'SELECT "PI" AS valid_string'
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
-        tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']}))
+        tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']}))
 
     def test_should_read_as_service_account_with_key_contents(self):
         _skip_if_no_private_key_contents()
-        query = 'SELECT "PI" as VALID_STRING'
+        query = 'SELECT "PI" AS valid_string'
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_contents())
-        tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']}))
+        tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']}))
+
+
+class TestReadGBQIntegrationWithServiceAccountKeyPath(tm.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        # - GLOBAL CLASS FIXTURES -
+        #   put here any instruction you want to execute only *ONCE* *BEFORE*
+        #   executing *ALL* tests described below.
+
+        _skip_if_no_project_id()
+        _skip_if_no_private_key_path()
+
+        _setup_common()
+
+    def setUp(self):
+        # - PER-TEST FIXTURES -
+        # put here any instruction you want to be run *BEFORE* *EVERY* test is
+        # executed.
+        pass
+
+    @classmethod
+    def tearDownClass(cls):
+        # - GLOBAL CLASS FIXTURES -
+        # put here any instruction you want to execute only *ONCE* *AFTER*
+        # executing all tests.
+        pass
+
+    def tearDown(self):
+        # - PER-TEST FIXTURES -
+        # put here any instructions you want to be run *AFTER* *EVERY* test is
+        # executed.
+        pass
 
     def test_should_properly_handle_valid_strings(self):
-        query = 'SELECT "PI" as VALID_STRING'
+        query = 'SELECT "PI" AS valid_string'
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
-        tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']}))
+        tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']}))
 
     def test_should_properly_handle_empty_strings(self):
-        query = 'SELECT "" as EMPTY_STRING'
+        query = 'SELECT "" AS empty_string'
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
-        tm.assert_frame_equal(df, DataFrame({'EMPTY_STRING': [""]}))
+        tm.assert_frame_equal(df, DataFrame({'empty_string': [""]}))
 
     def test_should_properly_handle_null_strings(self):
-        query = 'SELECT STRING(NULL) as NULL_STRING'
+        query = 'SELECT STRING(NULL) AS null_string'
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
-        tm.assert_frame_equal(df, DataFrame({'NULL_STRING': [None]}))
+        tm.assert_frame_equal(df, DataFrame({'null_string': [None]}))
 
     def test_should_properly_handle_valid_integers(self):
-        query = 'SELECT INTEGER(3) as VALID_INTEGER'
+        query = 'SELECT INTEGER(3) AS valid_integer'
+        df = gbq.read_gbq(query, project_id=_get_project_id(),
+                          private_key=_get_private_key_path())
+        tm.assert_frame_equal(df, DataFrame({'valid_integer': [3]}))
+
+    def test_should_properly_handle_nullable_integers(self):
+        query = '''SELECT * FROM
+                    (SELECT 1 AS nullable_integer),
+                    (SELECT NULL AS nullable_integer)'''
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
-        tm.assert_frame_equal(df, DataFrame({'VALID_INTEGER': [3]}))
+        tm.assert_frame_equal(
+            df, DataFrame({'nullable_integer': [1, None]}).astype(object))
+
+    def test_should_properly_handle_valid_longs(self):
+        query = 'SELECT 1 << 62 AS valid_long'
+        df = gbq.read_gbq(query, project_id=_get_project_id(),
+                          private_key=_get_private_key_path())
+        tm.assert_frame_equal(
+            df, DataFrame({'valid_long': [1 << 62]}))
+
+    def test_should_properly_handle_nullable_longs(self):
+        query = '''SELECT * FROM
+                    (SELECT 1 << 62 AS nullable_long),
+                    (SELECT NULL AS nullable_long)'''
+        df = gbq.read_gbq(query, project_id=_get_project_id(),
+                          private_key=_get_private_key_path())
+        tm.assert_frame_equal(
+            df, DataFrame({'nullable_long': [1 << 62, None]}).astype(object))
 
     def test_should_properly_handle_null_integers(self):
-        query = 'SELECT INTEGER(NULL) as NULL_INTEGER'
+        query = 'SELECT INTEGER(NULL) AS null_integer'
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
-        tm.assert_frame_equal(df, DataFrame({'NULL_INTEGER': [np.nan]}))
+        tm.assert_frame_equal(df, DataFrame({'null_integer': [None]}))
 
     def test_should_properly_handle_valid_floats(self):
-        query = 'SELECT PI() as VALID_FLOAT'
+        from math import pi
+        query = 'SELECT PI() AS valid_float'
+        df = gbq.read_gbq(query, project_id=_get_project_id(),
+                          private_key=_get_private_key_path())
+        tm.assert_frame_equal(df, DataFrame(
+            {'valid_float': [pi]}))
+
+    def test_should_properly_handle_nullable_floats(self):
+        from math import pi
+        query = '''SELECT * FROM
+                    (SELECT PI() AS nullable_float),
+                    (SELECT NULL AS nullable_float)'''
+        df = gbq.read_gbq(query, project_id=_get_project_id(),
+                          private_key=_get_private_key_path())
+        tm.assert_frame_equal(
+            df, DataFrame({'nullable_float': [pi, None]}))
+
+    def test_should_properly_handle_valid_doubles(self):
+        from math import pi
+        query = 'SELECT PI() * POW(10, 307) AS valid_double'
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
         tm.assert_frame_equal(df, DataFrame(
-            {'VALID_FLOAT': [3.141592653589793]}))
+            {'valid_double': [pi * 10 ** 307]}))
+
+    def test_should_properly_handle_nullable_doubles(self):
+        from math import pi
+        query = '''SELECT * FROM
+                    (SELECT PI() * POW(10, 307) AS nullable_double),
+                    (SELECT NULL AS nullable_double)'''
+        df = gbq.read_gbq(query, project_id=_get_project_id(),
+                          private_key=_get_private_key_path())
+        tm.assert_frame_equal(
+            df, DataFrame({'nullable_double': [pi * 10 ** 307, None]}))
 
     def test_should_properly_handle_null_floats(self):
-        query = 'SELECT FLOAT(NULL) as NULL_FLOAT'
+        query = 'SELECT FLOAT(NULL) AS null_float'
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
-        tm.assert_frame_equal(df, DataFrame({'NULL_FLOAT': [np.nan]}))
+        tm.assert_frame_equal(df, DataFrame({'null_float': [np.nan]}))
 
     def test_should_properly_handle_timestamp_unix_epoch(self):
-        query = 'SELECT TIMESTAMP("1970-01-01 00:00:00") as UNIX_EPOCH'
+        query = 'SELECT TIMESTAMP("1970-01-01 00:00:00") AS unix_epoch'
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
         tm.assert_frame_equal(df, DataFrame(
-            {'UNIX_EPOCH': [np.datetime64('1970-01-01T00:00:00.000000Z')]}))
+            {'unix_epoch': [np.datetime64('1970-01-01T00:00:00.000000Z')]}))
 
     def test_should_properly_handle_arbitrary_timestamp(self):
-        query = 'SELECT TIMESTAMP("2004-09-15 05:00:00") as VALID_TIMESTAMP'
+        query = 'SELECT TIMESTAMP("2004-09-15 05:00:00") AS valid_timestamp'
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
         tm.assert_frame_equal(df, DataFrame({
-            'VALID_TIMESTAMP': [np.datetime64('2004-09-15T05:00:00.000000Z')]
+            'valid_timestamp': [np.datetime64('2004-09-15T05:00:00.000000Z')]
         }))
 
     def test_should_properly_handle_null_timestamp(self):
-        query = 'SELECT TIMESTAMP(NULL) as NULL_TIMESTAMP'
+        query = 'SELECT TIMESTAMP(NULL) AS null_timestamp'
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
-        tm.assert_frame_equal(df, DataFrame({'NULL_TIMESTAMP': [NaT]}))
+        tm.assert_frame_equal(df, DataFrame({'null_timestamp': [NaT]}))
 
     def test_should_properly_handle_true_boolean(self):
-        query = 'SELECT BOOLEAN(TRUE) as TRUE_BOOLEAN'
+        query = 'SELECT BOOLEAN(TRUE) AS true_boolean'
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
-        tm.assert_frame_equal(df, DataFrame({'TRUE_BOOLEAN': [True]}))
+        tm.assert_frame_equal(df, DataFrame({'true_boolean': [True]}))
 
     def test_should_properly_handle_false_boolean(self):
-        query = 'SELECT BOOLEAN(FALSE) as FALSE_BOOLEAN'
+        query = 'SELECT BOOLEAN(FALSE) AS false_boolean'
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
-        tm.assert_frame_equal(df, DataFrame({'FALSE_BOOLEAN': [False]}))
+        tm.assert_frame_equal(df, DataFrame({'false_boolean': [False]}))
 
     def test_should_properly_handle_null_boolean(self):
-        query = 'SELECT BOOLEAN(NULL) as NULL_BOOLEAN'
+        query = 'SELECT BOOLEAN(NULL) AS null_boolean'
+        df = gbq.read_gbq(query, project_id=_get_project_id(),
+                          private_key=_get_private_key_path())
+        tm.assert_frame_equal(df, DataFrame({'null_boolean': [None]}))
+
+    def test_should_properly_handle_nullable_booleans(self):
+        query = '''SELECT * FROM
+                    (SELECT BOOLEAN(TRUE) AS nullable_boolean),
+                    (SELECT NULL AS nullable_boolean)'''
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
-        tm.assert_frame_equal(df, DataFrame({'NULL_BOOLEAN': [None]}))
+        tm.assert_frame_equal(
+            df, DataFrame({'nullable_boolean': [True, None]}).astype(object))
 
     def test_unicode_string_conversion_and_normalization(self):
         correct_test_datatype = DataFrame(
-            {'UNICODE_STRING': [u("\xe9\xfc")]}
+            {'unicode_string': [u("\xe9\xfc")]}
         )
 
         unicode_string = "\xc3\xa9\xc3\xbc"
@@ -588,40 +686,40 @@ def test_unicode_string_conversion_and_normalization(self):
         if compat.PY3:
             unicode_string = unicode_string.encode('latin-1').decode('utf8')
 
-        query = 'SELECT "{0}" as UNICODE_STRING'.format(unicode_string)
+        query = 'SELECT "{0}" AS unicode_string'.format(unicode_string)
 
         df = gbq.read_gbq(query, project_id=_get_project_id(),
                           private_key=_get_private_key_path())
         tm.assert_frame_equal(df, correct_test_datatype)
 
     def test_index_column(self):
-        query = "SELECT 'a' as STRING_1, 'b' as STRING_2"
+        query = "SELECT 'a' AS string_1, 'b' AS string_2"
         result_frame = gbq.read_gbq(query, project_id=_get_project_id(),
-                                    index_col="STRING_1",
+                                    index_col="string_1",
                                     private_key=_get_private_key_path())
         correct_frame = DataFrame(
-            {'STRING_1': ['a'], 'STRING_2': ['b']}).set_index("STRING_1")
+            {'string_1': ['a'], 'string_2': ['b']}).set_index("string_1")
         tm.assert_equal(result_frame.index.name, correct_frame.index.name)
 
     def test_column_order(self):
-        query = "SELECT 'a' as STRING_1, 'b' as STRING_2, 'c' as STRING_3"
-        col_order = ['STRING_3', 'STRING_1', 'STRING_2']
+        query = "SELECT 'a' AS string_1, 'b' AS string_2, 'c' AS string_3"
+        col_order = ['string_3', 'string_1', 'string_2']
         result_frame = gbq.read_gbq(query, project_id=_get_project_id(),
                                     col_order=col_order,
                                     private_key=_get_private_key_path())
-        correct_frame = DataFrame({'STRING_1': ['a'], 'STRING_2': [
-                                  'b'], 'STRING_3': ['c']})[col_order]
+        correct_frame = DataFrame({'string_1': ['a'], 'string_2': [
+                                  'b'], 'string_3': ['c']})[col_order]
         tm.assert_frame_equal(result_frame, correct_frame)
 
     def test_column_order_plus_index(self):
-        query = "SELECT 'a' as STRING_1, 'b' as STRING_2, 'c' as STRING_3"
-        col_order = ['STRING_3', 'STRING_2']
+        query = "SELECT 'a' AS string_1, 'b' AS string_2, 'c' AS string_3"
+        col_order = ['string_3', 'string_2']
         result_frame = gbq.read_gbq(query, project_id=_get_project_id(),
-                                    index_col='STRING_1', col_order=col_order,
+                                    index_col='string_1', col_order=col_order,
                                     private_key=_get_private_key_path())
         correct_frame = DataFrame(
-            {'STRING_1': ['a'], 'STRING_2': ['b'], 'STRING_3': ['c']})
-        correct_frame.set_index('STRING_1', inplace=True)
+            {'string_1': ['a'], 'string_2': ['b'], 'string_3': ['c']})
+        correct_frame.set_index('string_1', inplace=True)
         correct_frame = correct_frame[col_order]
         tm.assert_frame_equal(result_frame, correct_frame)
 
@@ -655,14 +753,17 @@ def test_download_dataset_larger_than_200k_rows(self):
 
     def test_zero_rows(self):
         # Bug fix for https://github.com/pandas-dev/pandas/issues/10273
-        df = gbq.read_gbq("SELECT title, id "
+        df = gbq.read_gbq("SELECT title, id, is_bot, "
+                          "SEC_TO_TIMESTAMP(timestamp) ts "
                           "FROM [publicdata:samples.wikipedia] "
                           "WHERE timestamp=-9999999",
                           project_id=_get_project_id(),
                           private_key=_get_private_key_path())
         page_array = np.zeros(
-            (0,), dtype=[('title', object), ('id', np.dtype(float))])
-        expected_result = DataFrame(page_array, columns=['title', 'id'])
+            (0,), dtype=[('title', object), ('id', np.dtype(int)),
+                         ('is_bot', np.dtype(bool)), ('ts', 'M8[ns]')])
+        expected_result = DataFrame(
+            page_array, columns=['title', 'id', 'is_bot', 'ts'])
         self.assert_frame_equal(df, expected_result)
 
     def test_legacy_sql(self):
@@ -715,7 +816,7 @@ def test_invalid_option_for_sql_dialect(self):
                      dialect='standard', private_key=_get_private_key_path())
 
     def test_query_with_parameters(self):
-        sql_statement = "SELECT @param1 + @param2 as VALID_RESULT"
+        sql_statement = "SELECT @param1 + @param2 AS valid_result"
         config = {
             'query': {
                 "useLegacySql": False,
@@ -753,11 +854,11 @@ def test_query_with_parameters(self):
         df = gbq.read_gbq(sql_statement, project_id=_get_project_id(),
                           private_key=_get_private_key_path(),
                           configuration=config)
-        tm.assert_frame_equal(df, DataFrame({'VALID_RESULT': [3]}))
+        tm.assert_frame_equal(df, DataFrame({'valid_result': [3]}))
 
     def test_query_inside_configuration(self):
-        query_no_use = 'SELECT "PI_WRONG" as VALID_STRING'
-        query = 'SELECT "PI" as VALID_STRING'
+        query_no_use = 'SELECT "PI_WRONG" AS valid_string'
+        query = 'SELECT "PI" AS valid_string'
         config = {
             'query': {
                 "query": query,
@@ -774,7 +875,7 @@ def test_query_inside_configuration(self):
         df = gbq.read_gbq(None, project_id=_get_project_id(),
                           private_key=_get_private_key_path(),
                           configuration=config)
-        tm.assert_frame_equal(df, DataFrame({'VALID_STRING': ['PI']}))
+        tm.assert_frame_equal(df, DataFrame({'valid_string': ['PI']}))
 
     def test_configuration_without_query(self):
         sql_statement = 'SELECT 1'
@@ -800,7 +901,7 @@ def test_configuration_without_query(self):
                          configuration=config)
 
 
-class TestToGBQIntegration(tm.TestCase):
+class TestToGBQIntegrationWithServiceAccountKeyPath(tm.TestCase):
     # Changes to BigQuery table schema may take up to 2 minutes as of May 2015
     # As a workaround to this issue, each test should use a unique table name.
     # Make sure to modify the for loop range in the tearDownClass when a new
@@ -814,6 +915,7 @@ def setUpClass(cls):
         # executing *ALL* tests described below.
 
         _skip_if_no_project_id()
+        _skip_if_no_private_key_path()
 
         _setup_common()
         clean_gbq_environment(_get_private_key_path())
@@ -859,11 +961,11 @@ def test_upload_data(self):
 
         sleep(30)  # <- Curses Google!!!
 
-        result = gbq.read_gbq("SELECT COUNT(*) as NUM_ROWS FROM {0}"
+        result = gbq.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}"
                               .format(destination_table),
                               project_id=_get_project_id(),
                               private_key=_get_private_key_path())
-        self.assertEqual(result['NUM_ROWS'][0], test_size)
+        self.assertEqual(result['num_rows'][0], test_size)
 
     def test_upload_data_if_table_exists_fail(self):
         destination_table = DESTINATION_TABLE + "2"
@@ -899,11 +1001,11 @@ def test_upload_data_if_table_exists_append(self):
 
         sleep(30)  # <- Curses Google!!!
 
-        result = gbq.read_gbq("SELECT COUNT(*) as NUM_ROWS FROM {0}"
+        result = gbq.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}"
                               .format(destination_table),
                               project_id=_get_project_id(),
                               private_key=_get_private_key_path())
-        self.assertEqual(result['NUM_ROWS'][0], test_size * 2)
+        self.assertEqual(result['num_rows'][0], test_size * 2)
 
         # Try inserting with a different schema, confirm failure
         with tm.assertRaises(gbq.InvalidSchema):
@@ -932,11 +1034,11 @@ def test_upload_data_if_table_exists_replace(self):
 
         sleep(30)  # <- Curses Google!!!
 
-        result = gbq.read_gbq("SELECT COUNT(*) as NUM_ROWS FROM {0}"
+        result = gbq.read_gbq("SELECT COUNT(*) AS num_rows FROM {0}"
                               .format(destination_table),
                               project_id=_get_project_id(),
                               private_key=_get_private_key_path())
-        self.assertEqual(result['NUM_ROWS'][0], 5)
+        self.assertEqual(result['num_rows'][0], 5)
 
     @tm.slow
     def test_google_upload_errors_should_raise_exception(self):
@@ -1113,7 +1215,7 @@ def test_dataset_does_not_exist(self):
             DATASET_ID + "_not_found"), 'Expected dataset not to exist')
 
 
-class TestToGBQIntegrationServiceAccountKeyPath(tm.TestCase):
+class TestToGBQIntegrationWithLocalUserAccountAuth(tm.TestCase):
     # Changes to BigQuery table schema may take up to 2 minutes as of May 2015
     # As a workaround to this issue, each test should use a unique table name.
     # Make sure to modify the for loop range in the tearDownClass when a new
@@ -1128,10 +1230,10 @@ def setUpClass(cls):
         # executing *ALL* tests described below.
 
         _skip_if_no_project_id()
-        _skip_if_no_private_key_path()
+        _skip_local_auth_if_in_travis_env()
 
         _setup_common()
-        clean_gbq_environment(_get_private_key_path())
+        clean_gbq_environment()
 
     def setUp(self):
         # - PER-TEST FIXTURES -
@@ -1145,7 +1247,7 @@ def tearDownClass(cls):
         # put here any instruction you want to execute only *ONCE* *AFTER*
         # executing all tests.
 
-        clean_gbq_environment(_get_private_key_path())
+        clean_gbq_environment()
 
     def tearDown(self):
         # - PER-TEST FIXTURES -
@@ -1153,26 +1255,24 @@ def tearDown(self):
         # is executed.
         pass
 
-    def test_upload_data_as_service_account_with_key_path(self):
+    def test_upload_data(self):
         destination_table = "{0}.{1}".format(DATASET_ID + "2", TABLE_ID + "1")
 
         test_size = 10
         df = make_mixed_dataframe_v2(test_size)
 
-        gbq.to_gbq(df, destination_table, _get_project_id(), chunksize=10000,
-                   private_key=_get_private_key_path())
+        gbq.to_gbq(df, destination_table, _get_project_id(), chunksize=10000)
 
         sleep(30)  # <- Curses Google!!!
 
         result = gbq.read_gbq(
-            "SELECT COUNT(*) as NUM_ROWS FROM {0}".format(destination_table),
-            project_id=_get_project_id(),
-            private_key=_get_private_key_path())
+            "SELECT COUNT(*) AS num_rows FROM {0}".format(destination_table),
+            project_id=_get_project_id())
 
-        self.assertEqual(result['NUM_ROWS'][0], test_size)
+        self.assertEqual(result['num_rows'][0], test_size)
 
 
-class TestToGBQIntegrationServiceAccountKeyContents(tm.TestCase):
+class TestToGBQIntegrationWithServiceAccountKeyContents(tm.TestCase):
     # Changes to BigQuery table schema may take up to 2 minutes as of May 2015
     # As a workaround to this issue, each test should use a unique table name.
     # Make sure to modify the for loop range in the tearDownClass when a new
@@ -1212,7 +1312,7 @@ def tearDown(self):
         # is executed.
         pass
 
-    def test_upload_data_as_service_account_with_key_contents(self):
+    def test_upload_data(self):
         destination_table = "{0}.{1}".format(DATASET_ID + "3", TABLE_ID + "1")
 
         test_size = 10
@@ -1224,7 +1324,7 @@ def test_upload_data_as_service_account_with_key_contents(self):
         sleep(30)  # <- Curses Google!!!
 
         result = gbq.read_gbq(
-            "SELECT COUNT(*) as NUM_ROWS FROM {0}".format(destination_table),
+            "SELECT COUNT(*) AS num_rows FROM {0}".format(destination_table),
             project_id=_get_project_id(),
             private_key=_get_private_key_contents())
-        self.assertEqual(result['NUM_ROWS'][0], test_size)
+        self.assertEqual(result['num_rows'][0], test_size)