From 89bf82de72a841379d56e1b353580f7756bd7913 Mon Sep 17 00:00:00 2001
From: Matti Remes <matti.remes@gmail.com>
Date: Sun, 26 Feb 2017 15:24:10 -0500
Subject: [PATCH] Use name and type comparising when appending a dataframe into
 table

I modified GbqConnector.verify_schema function to parse name and type
from the remote schema (basically dropping mode) and include those in
the compared fields.    Currently, when appending to a BQ table,
comparison between the destination table's schema and a dataframe
schema is done over superset of a BQ schema definition (name, type,
mode) when _generate_bq_schema parses only name and type from a
dataframe.    IMO it would be inconvenient to make the mode check in
the module by generating completeness of columns (includes null values
or not). So raising a generic GBQ error is more convenient here.
closes #13

Author: Matti Remes <matti.remes@gmail.com>

Closes #14 from mremes/master and squashes the following commits:

bf8c378 [Matti Remes] added reference to issue #13
77b1fd5 [Matti Remes] changelog for verify_schema changes
70d08ef [Matti Remes] make the syntax of the test flake-pretty
45826f1 [Matti Remes] Merge remote-tracking branch 'upstream/master'
66aa616 [Matti Remes] Added test for validate_schema ignoring field mode when comparing schemas
5dafd55 [Matti Remes] fix bug with selecting key
631d66c [Matti Remes] Use name and type of fields for comparing remote and local schemas when appending to a table
---
 docs/source/changelog.rst    |  6 ++++--
 docs/source/conf.py          |  5 ++---
 pandas_gbq/gbq.py            |  6 +++++-
 pandas_gbq/tests/test_gbq.py | 28 ++++++++++++++++++++++++++++
 4 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/docs/source/changelog.rst b/docs/source/changelog.rst
index 3ec2a3df..b2a6b83e 100644
--- a/docs/source/changelog.rst
+++ b/docs/source/changelog.rst
@@ -1,8 +1,10 @@
 Changelog
 =========
 
-0.2.0 / 2017-?
---------------
+0.2.0 / 2017-03-??
+------------------
+
+- Bug with appending to a BigQuery table where fields have modes (NULLABLE,REQUIRED,REPEATED) specified. These modes were compared versus the remote schema and writing a table via ``to_gbq`` would previously raise. (:issue:`13`)
 
 0.1.2 / 2017-02-23
 ------------------
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 32c2fcfc..94c8d229 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -353,6 +353,5 @@
 intersphinx_mapping = {'https://docs.python.org/': None}
 
 extlinks = {'issue': ('https://github.com/pydata/pandas-gbq/issues/%s',
-                      'GH'),
-            'wiki': ('https://github.com/pydata/pandas-gbq/wiki/%s',
-                     'wiki ')}
+                      'GH#'),
+            'pr': ('https://github.com/pydata/pandas-gbq/pull/%s', 'GH#')}
diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py
index 9759e379..060724ed 100644
--- a/pandas_gbq/gbq.py
+++ b/pandas_gbq/gbq.py
@@ -563,8 +563,12 @@ def verify_schema(self, dataset_id, table_id, schema):
                 datasetId=dataset_id,
                 tableId=table_id).execute()['schema']
 
+            remote_fields = [{'name': field_remote['name'],
+                              'type': field_remote['type']}
+                             for field_remote in remote_schema['fields']]
+
             fields_remote = set([json.dumps(field_remote)
-                                 for field_remote in remote_schema['fields']])
+                                 for field_remote in remote_fields])
             fields_local = set(json.dumps(field_local)
                                for field_local in schema['fields'])
 
diff --git a/pandas_gbq/tests/test_gbq.py b/pandas_gbq/tests/test_gbq.py
index 036e8330..6a3cad19 100644
--- a/pandas_gbq/tests/test_gbq.py
+++ b/pandas_gbq/tests/test_gbq.py
@@ -1161,6 +1161,34 @@ def test_upload_data_flexible_column_order(self):
                    _get_project_id(), if_exists='append',
                    private_key=_get_private_key_path())
 
+    def test_verify_schema_ignores_field_mode(self):
+        test_id = "14"
+        test_schema_1 = {'fields': [{'name': 'A',
+                                     'type': 'FLOAT',
+                                     'mode': 'NULLABLE'},
+                                    {'name': 'B',
+                                     'type': 'FLOAT',
+                                     'mode': 'NULLABLE'},
+                                    {'name': 'C',
+                                     'type': 'STRING',
+                                     'mode': 'NULLABLE'},
+                                    {'name': 'D',
+                                     'type': 'TIMESTAMP',
+                                     'mode': 'REQUIRED'}]}
+        test_schema_2 = {'fields': [{'name': 'A',
+                                     'type': 'FLOAT'},
+                                    {'name': 'B',
+                                     'type': 'FLOAT'},
+                                    {'name': 'C',
+                                     'type': 'STRING'},
+                                    {'name': 'D',
+                                     'type': 'TIMESTAMP'}]}
+
+        self.table.create(TABLE_ID + test_id, test_schema_1)
+        self.assertTrue(self.sut.verify_schema(
+            self.dataset_prefix + "1", TABLE_ID + test_id, test_schema_2),
+            'Expected schema to match')
+
     def test_list_dataset(self):
         dataset_id = self.dataset_prefix + "1"
         self.assertTrue(dataset_id in self.dataset.datasets(),