From c43fc066f6bd051bd744bd186bcc276fb0773575 Mon Sep 17 00:00:00 2001 From: Danny Hermes Date: Tue, 19 Jul 2016 16:57:12 -0700 Subject: [PATCH] Making HappyBase create_table() atomic. In the process, adding a column families option to Table.create() in the low-level API. Fixes #1524. --- gcloud/bigtable/column_family.py | 23 +++++----- gcloud/bigtable/happybase/connection.py | 20 +++------ gcloud/bigtable/happybase/test_connection.py | 14 ++---- gcloud/bigtable/table.py | 46 ++++++++++---------- gcloud/bigtable/test_column_family.py | 16 +++++++ gcloud/bigtable/test_table.py | 28 +++++++++++- 6 files changed, 86 insertions(+), 61 deletions(-) diff --git a/gcloud/bigtable/column_family.py b/gcloud/bigtable/column_family.py index 10127aa74961..5c33e6f456c3 100644 --- a/gcloud/bigtable/column_family.py +++ b/gcloud/bigtable/column_family.py @@ -248,13 +248,20 @@ def __eq__(self, other): def __ne__(self, other): return not self.__eq__(other) - def create(self): - """Create this column family.""" + def to_pb(self): + """Converts the column family to a protobuf. + + :rtype: :class:`.table_v2_pb2.ColumnFamily` + :returns: The converted current object. + """ if self.gc_rule is None: - column_family = table_v2_pb2.ColumnFamily() + return table_v2_pb2.ColumnFamily() else: - column_family = table_v2_pb2.ColumnFamily( - gc_rule=self.gc_rule.to_pb()) + return table_v2_pb2.ColumnFamily(gc_rule=self.gc_rule.to_pb()) + + def create(self): + """Create this column family.""" + column_family = self.to_pb() request_pb = table_admin_v2_pb2.ModifyColumnFamiliesRequest( name=self._table.name) request_pb.modifications.add( @@ -276,11 +283,7 @@ def update(self): Only the GC rule can be updated. By changing the column family ID, you will simply be referring to a different column family. """ - if self.gc_rule is None: - column_family = table_v2_pb2.ColumnFamily() - else: - column_family = table_v2_pb2.ColumnFamily( - gc_rule=self.gc_rule.to_pb()) + column_family = self.to_pb() request_pb = table_admin_v2_pb2.ModifyColumnFamiliesRequest( name=self._table.name) request_pb.modifications.add( diff --git a/gcloud/bigtable/happybase/connection.py b/gcloud/bigtable/happybase/connection.py index 3780763cd033..09428073d2c8 100644 --- a/gcloud/bigtable/happybase/connection.py +++ b/gcloud/bigtable/happybase/connection.py @@ -297,15 +297,6 @@ def create_table(self, name, families): The only column family options from HappyBase that are able to be used with Cloud Bigtable are ``max_versions`` and ``time_to_live``. - .. note:: - - This method is **not** atomic. The Cloud Bigtable API separates - the creation of a table from the creation of column families. Thus - this method needs to send 1 request for the table creation and 1 - request for each column family. If any of these fails, the method - will fail, but the progress made towards completion cannot be - rolled back. - Values in ``families`` represent column family options. In HappyBase, these are dictionaries, corresponding to the ``ColumnDescriptor`` structure in the Thrift API. The accepted keys are: @@ -353,19 +344,18 @@ def create_table(self, name, families): # Create table instance and then make API calls. name = self._table_name(name) low_level_table = _LowLevelTable(name, self._instance) + column_families = ( + low_level_table.column_family(column_family_name, gc_rule=gc_rule) + for column_family_name, gc_rule in six.iteritems(gc_rule_dict) + ) try: - low_level_table.create() + low_level_table.create(column_families=column_families) except face.NetworkError as network_err: if network_err.code == interfaces.StatusCode.ALREADY_EXISTS: raise AlreadyExists(name) else: raise - for column_family_name, gc_rule in gc_rule_dict.items(): - column_family = low_level_table.column_family( - column_family_name, gc_rule=gc_rule) - column_family.create() - def delete_table(self, name, disable=False): """Delete the specified table. diff --git a/gcloud/bigtable/happybase/test_connection.py b/gcloud/bigtable/happybase/test_connection.py index f70c69eaaea5..957122a95d41 100644 --- a/gcloud/bigtable/happybase/test_connection.py +++ b/gcloud/bigtable/happybase/test_connection.py @@ -370,14 +370,11 @@ def make_table(*args, **kwargs): col_fam_created.sort(key=operator.attrgetter('column_family_id')) self.assertEqual(col_fam_created[0].column_family_id, col_fam1) self.assertEqual(col_fam_created[0].gc_rule, mock_gc_rule) - self.assertEqual(col_fam_created[0].create_calls, 1) self.assertEqual(col_fam_created[1].column_family_id, col_fam2) self.assertEqual(col_fam_created[1].gc_rule, mock_gc_rule) - self.assertEqual(col_fam_created[1].create_calls, 1) self.assertEqual(col_fam_created[2].column_family_id, col_fam3.decode('utf-8')) self.assertEqual(col_fam_created[2].gc_rule, mock_gc_rule) - self.assertEqual(col_fam_created[2].create_calls, 1) def test_create_table_bad_type(self): instance = _Instance() # Avoid implicit environ check. @@ -696,10 +693,6 @@ class _MockLowLevelColumnFamily(object): def __init__(self, column_family_id, gc_rule=None): self.column_family_id = column_family_id self.gc_rule = gc_rule - self.create_calls = 0 - - def create(self): - self.create_calls += 1 class _MockLowLevelTable(object): @@ -715,12 +708,11 @@ def __init__(self, *args, **kwargs): def delete(self): self.delete_calls += 1 - def create(self): + def create(self, column_families=()): self.create_calls += 1 + self.col_fam_created.extend(column_families) if self.create_error: raise self.create_error def column_family(self, column_family_id, gc_rule=None): - result = _MockLowLevelColumnFamily(column_family_id, gc_rule=gc_rule) - self.col_fam_created.append(result) - return result + return _MockLowLevelColumnFamily(column_family_id, gc_rule=gc_rule) diff --git a/gcloud/bigtable/table.py b/gcloud/bigtable/table.py index 3eef6fe2a5ad..5bcead18e906 100644 --- a/gcloud/bigtable/table.py +++ b/gcloud/bigtable/table.py @@ -19,6 +19,8 @@ bigtable_pb2 as data_messages_v2_pb2) from gcloud.bigtable._generated_v2 import ( bigtable_table_admin_pb2 as table_admin_messages_v2_pb2) +from gcloud.bigtable._generated_v2 import ( + table_pb2 as table_v2_pb2) from gcloud.bigtable.column_family import _gc_rule_from_pb from gcloud.bigtable.column_family import ColumnFamily from gcloud.bigtable.row import AppendRow @@ -32,14 +34,9 @@ class Table(object): .. note:: - We don't define any properties on a table other than the name. As - the proto says, in a request: - - The ``name`` field of the Table and all of its ColumnFamilies must - be left blank, and will be populated in the response. - - This leaves only the ``current_operation`` and ``granularity`` - fields. The ``current_operation`` is only used for responses while + We don't define any properties on a table other than the name. + The only other fields are ``column_families`` and ``granularity``, + The ``column_families`` are not stored locally and ``granularity`` is an enum with only one value. We can use a :class:`Table` to: @@ -52,7 +49,7 @@ class Table(object): :type table_id: str :param table_id: The ID of the table. - :type instance: :class:`Cluster <.instance.Instance>` + :type instance: :class:`Instance <.instance.Instance>` :param instance: The instance that owns the table. """ @@ -71,7 +68,7 @@ def name(self): The table name is of the form - ``"projects/../zones/../clusters/../tables/{table_id}"`` + ``"projects/../instances/../tables/{table_id}"`` :rtype: str :returns: The table name. @@ -136,24 +133,14 @@ def __eq__(self, other): def __ne__(self, other): return not self.__eq__(other) - def create(self, initial_split_keys=None): + def create(self, initial_split_keys=None, column_families=()): """Creates this table. - .. note:: - - Though a :class:`._generated_v2.table_pb2.Table` is also - allowed (as the ``table`` property) in a create table request, we - do not support it in this method. As mentioned in the - :class:`Table` docstring, the name is the only useful property in - the table proto. - .. note:: A create request returns a :class:`._generated_v2.table_pb2.Table` but we don't use - this response. The proto definition allows for the inclusion of a - ``current_operation`` in the response, but it does not appear that - the Cloud Bigtable API returns any operation. + this response. :type initial_split_keys: list :param initial_split_keys: (Optional) List of row keys that will be @@ -163,15 +150,28 @@ def create(self, initial_split_keys=None): ``"s1"`` and ``"s2"``, three tablets will be created, spanning the key ranges: ``[, s1)``, ``[s1, s2)``, ``[s2, )``. + + :type column_families: list + :param column_families: (Optional) List or other iterable of + :class:`.ColumnFamily` instances. """ - split_pb = table_admin_messages_v2_pb2.CreateTableRequest.Split if initial_split_keys is not None: + split_pb = table_admin_messages_v2_pb2.CreateTableRequest.Split initial_split_keys = [ split_pb(key=key) for key in initial_split_keys] + + table_pb = None + if column_families: + table_pb = table_v2_pb2.Table() + for col_fam in column_families: + curr_id = col_fam.column_family_id + table_pb.column_families[curr_id].MergeFrom(col_fam.to_pb()) + request_pb = table_admin_messages_v2_pb2.CreateTableRequest( initial_splits=initial_split_keys or [], parent=self._instance.name, table_id=self.table_id, + table=table_pb, ) client = self._instance._client # We expect a `._generated_v2.table_pb2.Table` diff --git a/gcloud/bigtable/test_column_family.py b/gcloud/bigtable/test_column_family.py index d9deaf841fa0..64fe8a46c78a 100644 --- a/gcloud/bigtable/test_column_family.py +++ b/gcloud/bigtable/test_column_family.py @@ -388,6 +388,22 @@ def test___ne__(self): column_family2 = self._makeOne('column_family_id2', None) self.assertNotEqual(column_family1, column_family2) + def test_to_pb_no_rules(self): + column_family = self._makeOne('column_family_id', None) + pb_val = column_family.to_pb() + expected = _ColumnFamilyPB() + self.assertEqual(pb_val, expected) + + def test_to_pb_with_rule(self): + from gcloud.bigtable.column_family import MaxVersionsGCRule + + gc_rule = MaxVersionsGCRule(1) + column_family = self._makeOne('column_family_id', None, + gc_rule=gc_rule) + pb_val = column_family.to_pb() + expected = _ColumnFamilyPB(gc_rule=gc_rule.to_pb()) + self.assertEqual(pb_val, expected) + def _create_test_helper(self, gc_rule=None): from gcloud.bigtable._generated_v2 import ( bigtable_table_admin_pb2 as table_admin_v2_pb2) diff --git a/gcloud/bigtable/test_table.py b/gcloud/bigtable/test_table.py index 1494b3917d91..7824291c22d2 100644 --- a/gcloud/bigtable/test_table.py +++ b/gcloud/bigtable/test_table.py @@ -133,7 +133,7 @@ def test___ne__(self): table2 = self._makeOne('table_id2', 'instance2') self.assertNotEqual(table1, table2) - def _create_test_helper(self, initial_split_keys): + def _create_test_helper(self, initial_split_keys, column_families=()): from gcloud._helpers import _to_bytes from gcloud.bigtable._testing import _FakeStub @@ -145,10 +145,18 @@ def _create_test_helper(self, initial_split_keys): splits_pb = [ _CreateTableRequestSplitPB(key=_to_bytes(key)) for key in initial_split_keys or ()] + table_pb = None + if column_families: + table_pb = _TablePB() + for cf in column_families: + cf_pb = table_pb.column_families[cf.column_family_id] + if cf.gc_rule is not None: + cf_pb.gc_rule.MergeFrom(cf.gc_rule.to_pb()) request_pb = _CreateTableRequestPB( initial_splits=splits_pb, parent=self.INSTANCE_NAME, table_id=self.TABLE_ID, + table=table_pb, ) # Create response_pb @@ -161,7 +169,8 @@ def _create_test_helper(self, initial_split_keys): expected_result = None # create() has no return value. # Perform the method and check the result. - result = table.create(initial_split_keys=initial_split_keys) + result = table.create(initial_split_keys=initial_split_keys, + column_families=column_families) self.assertEqual(result, expected_result) self.assertEqual(stub.method_calls, [( 'CreateTable', @@ -177,6 +186,21 @@ def test_create_with_split_keys(self): initial_split_keys = [b's1', b's2'] self._create_test_helper(initial_split_keys) + def test_create_with_column_families(self): + from gcloud.bigtable.column_family import ColumnFamily + from gcloud.bigtable.column_family import MaxVersionsGCRule + + cf_id1 = 'col-fam-id1' + cf1 = ColumnFamily(cf_id1, None) + cf_id2 = 'col-fam-id2' + gc_rule = MaxVersionsGCRule(42) + cf2 = ColumnFamily(cf_id2, None, gc_rule=gc_rule) + + initial_split_keys = None + column_families = [cf1, cf2] + self._create_test_helper(initial_split_keys, + column_families=column_families) + def _list_column_families_helper(self): from gcloud.bigtable._testing import _FakeStub