diff --git a/pandas_gbq/gbq.py b/pandas_gbq/gbq.py index 4eb08b60..1c1b694c 100644 --- a/pandas_gbq/gbq.py +++ b/pandas_gbq/gbq.py @@ -816,6 +816,11 @@ def to_gbq(dataframe, destination_table, project_id, chunksize=10000, Service account private key in JSON format. Can be file path or string contents. This is useful for remote server authentication (eg. jupyter iPython notebook on remote host) + table_schema : list of dicts + List of BigQuery table fields to which according DataFrame columns + conform to, e.g. `[{'name': 'col1', 'type': 'STRING'},...]`. If + schema is not provided, it will be generated according to dtypes + of DataFrame columns. """ if if_exists not in ('fail', 'replace', 'append'): diff --git a/pandas_gbq/tests/test_gbq.py b/pandas_gbq/tests/test_gbq.py index 9386f17b..f72f6516 100644 --- a/pandas_gbq/tests/test_gbq.py +++ b/pandas_gbq/tests/test_gbq.py @@ -1258,6 +1258,31 @@ def test_verify_schema_ignores_field_mode(self): assert self.sut.verify_schema( self.dataset_prefix + "1", TABLE_ID + test_id, test_schema_2) + def test_upload_data_with_valid_user_schema(self): + df = tm.makeMixedDataFrame() + test_id = "15" + test_schema = [{'name': 'A', 'type': 'FLOAT'}, + {'name': 'B', 'type': 'FLOAT'}, + {'name': 'C', 'type': 'STRING'}, + {'name': 'D', 'type': 'TIMESTAMP'}] + destination_table = self.destination_table + test_id + gbq.to_gbq(df, destination_table, _get_project_id(), + private_key=_get_private_key_path(), table_schema=test_schema) + dataset, table = destination_table.split('.') + assert self.table.verify_schema(dataset, table, dict(fields=test_schema)) + + def test_upload_data_with_invalid_user_schema_raises_error(self): + df = tm.makeMixedDataFrame() + test_id = "16" + test_schema = [{'name': 'A', 'type': 'FLOAT'}, + {'name': 'B', 'type': 'FLOAT'}, + {'name': 'C', 'type': 'FLOAT'}, + {'name': 'D', 'type': 'FLOAT'}] + destination_table = self.destination_table + test_id + with tm.assertRaises(gbq.StreamingInsertError): + gbq.to_gbq(df, destination_table, _get_project_id(), + private_key=_get_private_key_path(), table_schema=test_schema) + def test_list_dataset(self): dataset_id = self.dataset_prefix + "1" assert dataset_id in self.dataset.datasets()