diff --git a/gcloud/bigtable/happybase/connection.py b/gcloud/bigtable/happybase/connection.py index 48a876a2266d..77e849ed3fa7 100644 --- a/gcloud/bigtable/happybase/connection.py +++ b/gcloud/bigtable/happybase/connection.py @@ -15,11 +15,15 @@ """Google Cloud Bigtable HappyBase connection module.""" +import datetime import warnings import six from gcloud.bigtable.client import Client +from gcloud.bigtable.column_family import GCRuleIntersection +from gcloud.bigtable.column_family import MaxAgeGCRule +from gcloud.bigtable.column_family import MaxVersionsGCRule from gcloud.bigtable.happybase.table import Table from gcloud.bigtable.table import Table as _LowLevelTable @@ -124,9 +128,6 @@ class Connection(object): :type kwargs: dict :param kwargs: Remaining keyword arguments. Provided for HappyBase compatibility. - - :raises: :class:`ValueError ` if any of the unused - parameters are specified with a value other than the defaults. """ _cluster = None @@ -265,6 +266,77 @@ def tables(self): return table_names + def create_table(self, name, families): + """Create a table. + + .. warning:: + + The only column family options from HappyBase that are able to be + used with Cloud Bigtable are ``max_versions`` and ``time_to_live``. + + .. note:: + + This method is **not** atomic. The Cloud Bigtable API separates + the creation of a table from the creation of column families. Thus + this method needs to send 1 request for the table creation and 1 + request for each column family. If any of these fails, the method + will fail, but the progress made towards completion cannot be + rolled back. + + Values in ``families`` represent column family options. In HappyBase, + these are dictionaries, corresponding to the ``ColumnDescriptor`` + structure in the Thrift API. The accepted keys are: + + * ``max_versions`` (``int``) + * ``compression`` (``str``) + * ``in_memory`` (``bool``) + * ``bloom_filter_type`` (``str``) + * ``bloom_filter_vector_size`` (``int``) + * ``bloom_filter_nb_hashes`` (``int``) + * ``block_cache_enabled`` (``bool``) + * ``time_to_live`` (``int``) + + :type name: str + :param name: The name of the table to be created. + + :type families: dict + :param families: Dictionary with column family names as keys and column + family options as the values. The options can be among + + * :class:`dict` + * :class:`.GarbageCollectionRule` + + :raises: :class:`TypeError ` if ``families`` is + not a dictionary, + :class:`ValueError ` if ``families`` + has no entries + """ + if not isinstance(families, dict): + raise TypeError('families arg must be a dictionary') + + if not families: + raise ValueError('Cannot create table %r (no column ' + 'families specified)' % (name,)) + + # Parse all keys before making any API requests. + gc_rule_dict = {} + for column_family_name, option in families.items(): + if isinstance(column_family_name, six.binary_type): + column_family_name = column_family_name.decode('utf-8') + if column_family_name.endswith(':'): + column_family_name = column_family_name[:-1] + gc_rule_dict[column_family_name] = _parse_family_option(option) + + # Create table instance and then make API calls. + name = self._table_name(name) + low_level_table = _LowLevelTable(name, self._cluster) + low_level_table.create() + + for column_family_name, gc_rule in gc_rule_dict.items(): + column_family = low_level_table.column_family( + column_family_name, gc_rule=gc_rule) + column_family.create() + def delete_table(self, name, disable=False): """Delete the specified table. @@ -336,3 +408,52 @@ def compact_table(self, name, major=False): """ raise NotImplementedError('The Cloud Bigtable API does not support ' 'compacting a table.') + + +def _parse_family_option(option): + """Parses a column family option into a garbage collection rule. + + .. note:: + + If ``option`` is not a dictionary, the type is not checked. + If ``option`` is :data:`None`, there is nothing to do, since this + is the correct output. + + :type option: :class:`dict`, + :data:`NoneType `, + :class:`.GarbageCollectionRule` + :param option: A column family option passes as a dictionary value in + :meth:`Connection.create_table`. + + :rtype: :class:`.GarbageCollectionRule` + :returns: A garbage collection rule parsed from the input. + """ + result = option + if isinstance(result, dict): + if not set(result.keys()) <= set(['max_versions', 'time_to_live']): + all_keys = ', '.join(repr(key) for key in result.keys()) + warning_msg = ('Cloud Bigtable only supports max_versions and ' + 'time_to_live column family settings. ' + 'Received: %s' % (all_keys,)) + _WARN(warning_msg) + + max_num_versions = result.get('max_versions') + max_age = None + if 'time_to_live' in result: + max_age = datetime.timedelta(seconds=result['time_to_live']) + + versions_rule = age_rule = None + if max_num_versions is not None: + versions_rule = MaxVersionsGCRule(max_num_versions) + if max_age is not None: + age_rule = MaxAgeGCRule(max_age) + + if versions_rule is None: + result = age_rule + else: + if age_rule is None: + result = versions_rule + else: + result = GCRuleIntersection(rules=[age_rule, versions_rule]) + + return result diff --git a/gcloud/bigtable/happybase/test_connection.py b/gcloud/bigtable/happybase/test_connection.py index 9cefa0069245..e503a1e25a87 100644 --- a/gcloud/bigtable/happybase/test_connection.py +++ b/gcloud/bigtable/happybase/test_connection.py @@ -311,6 +311,90 @@ def test_tables_with_prefix(self): result = connection.tables() self.assertEqual(result, [unprefixed_table_name1]) + def test_create_table(self): + import operator + from gcloud._testing import _Monkey + from gcloud.bigtable.happybase import connection as MUT + + cluster = _Cluster() # Avoid implicit environ check. + connection = self._makeOne(autoconnect=False, cluster=cluster) + mock_gc_rule = object() + called_options = [] + + def mock_parse_family_option(option): + called_options.append(option) + return mock_gc_rule + + name = 'table-name' + col_fam1 = 'cf1' + col_fam_option1 = object() + col_fam2 = u'cf2' + col_fam_option2 = object() + col_fam3 = b'cf3' + col_fam_option3 = object() + families = { + col_fam1: col_fam_option1, + # A trailing colon is also allowed. + col_fam2 + ':': col_fam_option2, + col_fam3 + b':': col_fam_option3, + } + + tables_created = [] + + def make_table(*args, **kwargs): + result = _MockLowLevelTable(*args, **kwargs) + tables_created.append(result) + return result + + with _Monkey(MUT, _LowLevelTable=make_table, + _parse_family_option=mock_parse_family_option): + connection.create_table(name, families) + + # Just one table would have been created. + table_instance, = tables_created + self.assertEqual(table_instance.args, (name, cluster)) + self.assertEqual(table_instance.kwargs, {}) + self.assertEqual(table_instance.create_calls, 1) + + # Check if our mock was called twice, but we don't know the order. + self.assertEqual( + set(called_options), + set([col_fam_option1, col_fam_option2, col_fam_option3])) + + # We expect three column family instances created, but don't know the + # order due to non-deterministic dict.items(). + col_fam_created = table_instance.col_fam_created + self.assertEqual(len(col_fam_created), 3) + col_fam_created.sort(key=operator.attrgetter('column_family_id')) + self.assertEqual(col_fam_created[0].column_family_id, col_fam1) + self.assertEqual(col_fam_created[0].gc_rule, mock_gc_rule) + self.assertEqual(col_fam_created[0].create_calls, 1) + self.assertEqual(col_fam_created[1].column_family_id, col_fam2) + self.assertEqual(col_fam_created[1].gc_rule, mock_gc_rule) + self.assertEqual(col_fam_created[1].create_calls, 1) + self.assertEqual(col_fam_created[2].column_family_id, + col_fam3.decode('utf-8')) + self.assertEqual(col_fam_created[2].gc_rule, mock_gc_rule) + self.assertEqual(col_fam_created[2].create_calls, 1) + + def test_create_table_bad_type(self): + cluster = _Cluster() # Avoid implicit environ check. + connection = self._makeOne(autoconnect=False, cluster=cluster) + + name = 'table-name' + families = None + with self.assertRaises(TypeError): + connection.create_table(name, families) + + def test_create_table_bad_value(self): + cluster = _Cluster() # Avoid implicit environ check. + connection = self._makeOne(autoconnect=False, cluster=cluster) + + name = 'table-name' + families = {} + with self.assertRaises(ValueError): + connection.create_table(name, families) + def test_delete_table(self): from gcloud._testing import _Monkey from gcloud.bigtable.happybase import connection as MUT @@ -376,6 +460,90 @@ def test_compact_table(self): connection.compact_table(name, major=major) +class Test__parse_family_option(unittest2.TestCase): + + def _callFUT(self, option): + from gcloud.bigtable.happybase.connection import _parse_family_option + return _parse_family_option(option) + + def test_dictionary_no_keys(self): + option = {} + result = self._callFUT(option) + self.assertEqual(result, None) + + def test_null(self): + option = None + result = self._callFUT(option) + self.assertEqual(result, None) + + def test_dictionary_bad_key(self): + from gcloud._testing import _Monkey + from gcloud.bigtable.happybase import connection as MUT + + warned = [] + + def mock_warn(msg): + warned.append(msg) + + option = {'badkey': None} + with _Monkey(MUT, _WARN=mock_warn): + result = self._callFUT(option) + + self.assertEqual(result, None) + self.assertEqual(len(warned), 1) + self.assertIn('badkey', warned[0]) + + def test_dictionary_versions_key(self): + from gcloud.bigtable.column_family import MaxVersionsGCRule + + versions = 42 + option = {'max_versions': versions} + result = self._callFUT(option) + + gc_rule = MaxVersionsGCRule(versions) + self.assertEqual(result, gc_rule) + + def test_dictionary_ttl_key(self): + import datetime + from gcloud.bigtable.column_family import MaxAgeGCRule + + time_to_live = 24 * 60 * 60 + max_age = datetime.timedelta(days=1) + option = {'time_to_live': time_to_live} + result = self._callFUT(option) + + gc_rule = MaxAgeGCRule(max_age) + self.assertEqual(result, gc_rule) + + def test_dictionary_both_keys(self): + import datetime + from gcloud.bigtable.column_family import GCRuleIntersection + from gcloud.bigtable.column_family import MaxAgeGCRule + from gcloud.bigtable.column_family import MaxVersionsGCRule + + versions = 42 + time_to_live = 24 * 60 * 60 + option = { + 'max_versions': versions, + 'time_to_live': time_to_live, + } + result = self._callFUT(option) + + max_age = datetime.timedelta(days=1) + # NOTE: This relies on the order of the rules in the method we are + # calling matching this order here. + gc_rule1 = MaxAgeGCRule(max_age) + gc_rule2 = MaxVersionsGCRule(versions) + gc_rule = GCRuleIntersection(rules=[gc_rule1, gc_rule2]) + self.assertEqual(result, gc_rule) + + def test_non_dictionary(self): + option = object() + self.assertFalse(isinstance(option, dict)) + result = self._callFUT(option) + self.assertEqual(result, option) + + class _Client(object): def __init__(self, *args, **kwargs): @@ -418,12 +586,33 @@ def list_tables(self): return self.list_tables_result +class _MockLowLevelColumnFamily(object): + + def __init__(self, column_family_id, gc_rule=None): + self.column_family_id = column_family_id + self.gc_rule = gc_rule + self.create_calls = 0 + + def create(self): + self.create_calls += 1 + + class _MockLowLevelTable(object): def __init__(self, *args, **kwargs): self.args = args self.kwargs = kwargs self.delete_calls = 0 + self.create_calls = 0 + self.col_fam_created = [] def delete(self): self.delete_calls += 1 + + def create(self): + self.create_calls += 1 + + def column_family(self, column_family_id, gc_rule=None): + result = _MockLowLevelColumnFamily(column_family_id, gc_rule=gc_rule) + self.col_fam_created.append(result) + return result