From a526c6296dccc96b0f415796ce9ff1ebd0bf1fae Mon Sep 17 00:00:00 2001 From: elibixby Date: Fri, 19 Jun 2015 11:43:29 -0700 Subject: [PATCH 1/4] Added export format option --- bigquery/samples/export_data_to_cloud_storage.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/bigquery/samples/export_data_to_cloud_storage.py b/bigquery/samples/export_data_to_cloud_storage.py index 62675d66d209..860584d30f72 100644 --- a/bigquery/samples/export_data_to_cloud_storage.py +++ b/bigquery/samples/export_data_to_cloud_storage.py @@ -20,6 +20,7 @@ # [START export_table] def export_table(service, cloud_storage_path, projectId, datasetId, tableId, + export_format="CSV", num_retries=5): """ Starts an export job @@ -30,6 +31,8 @@ def export_table(service, cloud_storage_path, cloud_storage_path: fully qualified path to a Google Cloud Storage location, e.g. gs://mybucket/myfolder/ + export_format: format to export in; + "CSV", "NEWLINE_DELIMITED_JSON", or "AVRO". Returns: an extract job resource representing the job, see https://cloud.google.com/bigquery/docs/reference/v2/jobs @@ -49,6 +52,7 @@ def export_table(service, cloud_storage_path, 'tableId': tableId, }, 'destinationUris': [cloud_storage_path], + 'destinationFormat': export_format } } } From d0bc45f2928c1504e7572bcbebdc05777a2ba676 Mon Sep 17 00:00:00 2001 From: elibixby Date: Tue, 30 Jun 2015 13:01:57 -0700 Subject: [PATCH 2/4] Added tests for export data formats --- .../samples/export_data_to_cloud_storage.py | 11 +++++----- .../test_export_data_to_cloud_storage.py | 22 +++++++++++++++++-- 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/bigquery/samples/export_data_to_cloud_storage.py b/bigquery/samples/export_data_to_cloud_storage.py index 860584d30f72..79c263fe2f44 100644 --- a/bigquery/samples/export_data_to_cloud_storage.py +++ b/bigquery/samples/export_data_to_cloud_storage.py @@ -29,10 +29,10 @@ def export_table(service, cloud_storage_path, service: initialized and authorized bigquery google-api-client object, cloud_storage_path: fully qualified - path to a Google Cloud Storage location, - e.g. gs://mybucket/myfolder/ - export_format: format to export in; - "CSV", "NEWLINE_DELIMITED_JSON", or "AVRO". + path to a Google Cloud Storage location, + e.g. gs://mybucket/myfolder/ + export_format: format to export in; + "CSV", "NEWLINE_DELIMITED_JSON", or "AVRO". Returns: an extract job resource representing the job, see https://cloud.google.com/bigquery/docs/reference/v2/jobs @@ -65,7 +65,7 @@ def export_table(service, cloud_storage_path, # [START run] def run(cloud_storage_path, projectId, datasetId, tableId, - num_retries, interval): + num_retries, interval, export_format="CSV"): bigquery = get_service() resource = export_table(bigquery, cloud_storage_path, @@ -74,6 +74,7 @@ def run(cloud_storage_path, resource['jobReference']['projectId'], resource['jobReference']['jobId'], interval, + export_format, num_retries) # [END run] diff --git a/bigquery/tests/test_export_data_to_cloud_storage.py b/bigquery/tests/test_export_data_to_cloud_storage.py index 7024e29143cc..d5872d87241f 100644 --- a/bigquery/tests/test_export_data_to_cloud_storage.py +++ b/bigquery/tests/test_export_data_to_cloud_storage.py @@ -22,14 +22,32 @@ class TestExportTableToGCS(CloudBaseTest): - def test_export_table(self): + def test_export_table_csv(self): run(self.constants['cloudStorageInputURI'], self.constants['projectId'], self.constants['datasetId'], self.constants['newTableId'], 5, - 5) + 5, + export_format="CSV") + + def test_export_table_json(self): + run(self.constants['cloudStorageInputURI'], + self.constants['projectId'], + self.constants['datasetId'], + self.constants['newTableId'], + 5, + 5, + export_format="NEWLINE_DELIMITED_JSON") + def test_export_table_avro(self): + run(self.constants['cloudStorageInputURI'], + self.constants['projectId'], + self.constants['datasetId'], + self.constants['newTableId'], + 5, + 5, + export_format="AVRO") if __name__ == '__main__': unittest.main() From 3353e3c60b7b9d023608c089d6252ffbf6071ae5 Mon Sep 17 00:00:00 2001 From: elibixby Date: Tue, 30 Jun 2015 13:23:15 -0700 Subject: [PATCH 3/4] fixed wrong paramenters --- bigquery/samples/export_data_to_cloud_storage.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/bigquery/samples/export_data_to_cloud_storage.py b/bigquery/samples/export_data_to_cloud_storage.py index 79c263fe2f44..ac16142f8cc6 100644 --- a/bigquery/samples/export_data_to_cloud_storage.py +++ b/bigquery/samples/export_data_to_cloud_storage.py @@ -69,12 +69,13 @@ def run(cloud_storage_path, bigquery = get_service() resource = export_table(bigquery, cloud_storage_path, - projectId, datasetId, tableId, num_retries) + projectId, datasetId, tableId, + num_retries=num_retries, + export_format=export_format) poll_job(bigquery, resource['jobReference']['projectId'], resource['jobReference']['jobId'], interval, - export_format, num_retries) # [END run] From 771d2fa2bab2d31a807fd6613ba19db15e2c27c7 Mon Sep 17 00:00:00 2001 From: elibixby Date: Wed, 1 Jul 2015 11:01:02 -0700 Subject: [PATCH 4/4] Indentation and punctuation --- bigquery/samples/export_data_to_cloud_storage.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/bigquery/samples/export_data_to_cloud_storage.py b/bigquery/samples/export_data_to_cloud_storage.py index ac16142f8cc6..4e2456370952 100644 --- a/bigquery/samples/export_data_to_cloud_storage.py +++ b/bigquery/samples/export_data_to_cloud_storage.py @@ -27,15 +27,15 @@ def export_table(service, cloud_storage_path, Args: service: initialized and authorized bigquery - google-api-client object, + google-api-client object. cloud_storage_path: fully qualified - path to a Google Cloud Storage location, + path to a Google Cloud Storage location. e.g. gs://mybucket/myfolder/ - export_format: format to export in; + export_format: format to export in; "CSV", "NEWLINE_DELIMITED_JSON", or "AVRO". Returns: an extract job resource representing the - job, see https://cloud.google.com/bigquery/docs/reference/v2/jobs + job, see https://cloud.google.com/bigquery/docs/reference/v2/jobs """ # Generate a unique job_id so retries # don't accidentally duplicate export