From 4fe0e479c33114ed4363f6d96fa9fb459b01a369 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Wed, 29 Jul 2015 12:04:19 -0400 Subject: [PATCH 1/3] Add support for table schemas. --- gcloud/bigquery/table.py | 61 ++++++++++++++++++++++- gcloud/bigquery/test__helpers.py | 3 +- gcloud/bigquery/test_table.py | 84 ++++++++++++++++++++++++++++++++ 3 files changed, 146 insertions(+), 2 deletions(-) diff --git a/gcloud/bigquery/table.py b/gcloud/bigquery/table.py index 82c5417d717d..8f5cb85f7b5b 100644 --- a/gcloud/bigquery/table.py +++ b/gcloud/bigquery/table.py @@ -22,6 +22,35 @@ from gcloud.bigquery._helpers import _prop_from_datetime +class SchemaField(object): + """Describe a single field within a table schema. + + :type name: string + :param name: the name of the field + + :type field_type: string + :param field_type: the type of the field (one of 'STRING', 'INTEGER', + 'FLOAT', 'BOOLEAN', 'TIMESTAMP' or 'RECORD') + + :type mode: string + :param mode: the type of the field (one of 'NULLABLE', 'REQUIRED', + or 'REPEATED') + + :type description: string + :param description: optional description for the field + + :type fields: list of ``SchemaField``, or None + :param fields: subfields (requires ``field_type`` of 'RECORD'). + """ + def __init__(self, name, field_type, mode='NULLABLE', description=None, + fields=None): + self.name = name + self.field_type = field_type + self.mode = mode + self.description = description + self.fields = fields + + class Table(object): """Tables represent a set of rows whose values correspond to a schema. @@ -33,12 +62,16 @@ class Table(object): :type dataset: :class:`gcloud.bigquery.dataset.Dataset` :param dataset: The dataset which contains the table. + + :type schema: list of :class:`SchemaField` + :param schema: The table's schema """ - def __init__(self, name, dataset): + def __init__(self, name, dataset, schema=()): self.name = name self._dataset = dataset self._properties = {} + self.schema = schema @property def path(self): @@ -49,6 +82,32 @@ def path(self): """ return '%s/tables/%s' % (self._dataset.path, self.name) + @property + def schema(self): + """Table's schema. + + :rtype: list of ``SchemaField`` + :returns: fields describing the schema + """ + return list(self._schema) + + @schema.setter + def schema(self, value): + """Update table's schema + + :type value: list of ``SchemaField`` + :param value: fields describing the schema + + :raises: TypeError if 'value' is not a sequence, or ValueError if + any item in the sequence is not a SchemaField + """ + fields = list(value) + if len(fields) > 0: + types = set([type(field) for field in fields]) + if types != set([SchemaField]): + raise ValueError('Schema items must be fields') + self._schema = tuple(value) + @property def created(self): """Datetime at which the table was created. diff --git a/gcloud/bigquery/test__helpers.py b/gcloud/bigquery/test__helpers.py index 6dca59467c64..17be6af5ac5d 100644 --- a/gcloud/bigquery/test__helpers.py +++ b/gcloud/bigquery/test__helpers.py @@ -41,7 +41,8 @@ def test_w_millis(self): import datetime import pytz from gcloud.bigquery._helpers import _total_seconds - NOW = datetime.datetime.utcnow().replace(tzinfo=pytz.utc) + NOW = datetime.datetime(2015, 7, 29, 17, 45, 21, 123456, + tzinfo=pytz.utc) EPOCH = datetime.datetime(1970, 1, 1, tzinfo=pytz.utc) MILLIS = _total_seconds(NOW - EPOCH) * 1000 self.assertEqual(self._callFUT(MILLIS), NOW) diff --git a/gcloud/bigquery/test_table.py b/gcloud/bigquery/test_table.py index 5a4bee8e9eb5..07498d9780e5 100644 --- a/gcloud/bigquery/test_table.py +++ b/gcloud/bigquery/test_table.py @@ -15,6 +15,53 @@ import unittest2 +class TestSchemaField(unittest2.TestCase): + + def _getTargetClass(self): + from gcloud.bigquery.table import SchemaField + return SchemaField + + def _makeOne(self, *args, **kw): + return self._getTargetClass()(*args, **kw) + + def test_ctor_defaults(self): + field = self._makeOne('test', 'STRING') + self.assertEqual(field.name, 'test') + self.assertEqual(field.field_type, 'STRING') + self.assertEqual(field.mode, 'NULLABLE') + self.assertEqual(field.description, None) + self.assertEqual(field.fields, None) + + def test_ctor_explicit(self): + field = self._makeOne('test', 'STRING', mode='REQUIRED', + description='Testing') + self.assertEqual(field.name, 'test') + self.assertEqual(field.field_type, 'STRING') + self.assertEqual(field.mode, 'REQUIRED') + self.assertEqual(field.description, 'Testing') + self.assertEqual(field.fields, None) + + def test_ctor_subfields(self): + field = self._makeOne('phone_number', 'RECORD', + fields=[self._makeOne('area_code', 'STRING'), + self._makeOne('local_number', 'STRING')]) + self.assertEqual(field.name, 'phone_number') + self.assertEqual(field.field_type, 'RECORD') + self.assertEqual(field.mode, 'NULLABLE') + self.assertEqual(field.description, None) + self.assertEqual(len(field.fields), 2) + self.assertEqual(field.fields[0].name, 'area_code') + self.assertEqual(field.fields[0].field_type, 'STRING') + self.assertEqual(field.fields[0].mode, 'NULLABLE') + self.assertEqual(field.fields[0].description, None) + self.assertEqual(field.fields[0].fields, None) + self.assertEqual(field.fields[1].name, 'local_number') + self.assertEqual(field.fields[1].field_type, 'STRING') + self.assertEqual(field.fields[1].mode, 'NULLABLE') + self.assertEqual(field.fields[1].description, None) + self.assertEqual(field.fields[1].fields, None) + + class TestTable(unittest2.TestCase): PROJECT = 'project' DS_NAME = 'dataset-name' @@ -37,6 +84,7 @@ def test_ctor(self): table.path, '/projects/%s/datasets/%s/tables/%s' % ( self.PROJECT, self.DS_NAME, self.TABLE_NAME)) + self.assertEqual(table.schema, []) self.assertEqual(table.created, None) self.assertEqual(table.etag, None) @@ -53,6 +101,42 @@ def test_ctor(self): self.assertEqual(table.location, None) self.assertEqual(table.view_query, None) + def test_ctor_w_schema(self): + from gcloud.bigquery.table import SchemaField + client = _Client(self.PROJECT) + dataset = _Dataset(client) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + table = self._makeOne(self.TABLE_NAME, dataset, + schema=[full_name, age]) + self.assertEqual(table.schema, [full_name, age]) + + def test_schema_setter_non_list(self): + client = _Client(self.PROJECT) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset) + with self.assertRaises(TypeError): + table.schema = object() + + def test_schema_setter_invalid_field(self): + from gcloud.bigquery.table import SchemaField + client = _Client(self.PROJECT) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + with self.assertRaises(ValueError): + table.schema = [full_name, object()] + + def test_schema_setter(self): + from gcloud.bigquery.table import SchemaField + client = _Client(self.PROJECT) + dataset = _Dataset(client) + table = self._makeOne(self.TABLE_NAME, dataset) + full_name = SchemaField('full_name', 'STRING', mode='REQUIRED') + age = SchemaField('age', 'INTEGER', mode='REQUIRED') + table.schema = [full_name, age] + self.assertEqual(table.schema, [full_name, age]) + def test_props_set_by_server(self): import datetime import pytz From 938ab1b15d4777258e707c7ae4b3547e9dd0d688 Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 4 Aug 2015 11:28:13 -0400 Subject: [PATCH 2/3] Use Sphinx :class: role consistently. Addresses: https://github.com/GoogleCloudPlatform/gcloud-python/pull/1022#discussion_r35887168 --- gcloud/bigquery/table.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gcloud/bigquery/table.py b/gcloud/bigquery/table.py index 8f5cb85f7b5b..a264015f9f3e 100644 --- a/gcloud/bigquery/table.py +++ b/gcloud/bigquery/table.py @@ -39,7 +39,7 @@ class SchemaField(object): :type description: string :param description: optional description for the field - :type fields: list of ``SchemaField``, or None + :type fields: list of :class:`SchemaField`, or None :param fields: subfields (requires ``field_type`` of 'RECORD'). """ def __init__(self, name, field_type, mode='NULLABLE', description=None, @@ -86,7 +86,7 @@ def path(self): def schema(self): """Table's schema. - :rtype: list of ``SchemaField`` + :rtype: list of :class:`SchemaField` :returns: fields describing the schema """ return list(self._schema) @@ -95,7 +95,7 @@ def schema(self): def schema(self, value): """Update table's schema - :type value: list of ``SchemaField`` + :type value: list of :class:`SchemaField` :param value: fields describing the schema :raises: TypeError if 'value' is not a sequence, or ValueError if From a4e78d5ebdc9a334c073cdbe9680cfada6569efd Mon Sep 17 00:00:00 2001 From: Tres Seaver Date: Tue, 4 Aug 2015 11:33:27 -0400 Subject: [PATCH 3/3] Simplify test for non-fields, avoid intermediate set/list. Addresses: https://github.com/GoogleCloudPlatform/gcloud-python/pull/1022#discussion_r35887539. --- gcloud/bigquery/table.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/gcloud/bigquery/table.py b/gcloud/bigquery/table.py index a264015f9f3e..a3c63ced04ba 100644 --- a/gcloud/bigquery/table.py +++ b/gcloud/bigquery/table.py @@ -101,11 +101,8 @@ def schema(self, value): :raises: TypeError if 'value' is not a sequence, or ValueError if any item in the sequence is not a SchemaField """ - fields = list(value) - if len(fields) > 0: - types = set([type(field) for field in fields]) - if types != set([SchemaField]): - raise ValueError('Schema items must be fields') + if not all(isinstance(field, SchemaField) for field in value): + raise ValueError('Schema items must be fields') self._schema = tuple(value) @property