Skip to content

Commit

Permalink
Adding HappyBase Connection.create_table().
Browse files Browse the repository at this point in the history
  • Loading branch information
dhermes committed Feb 19, 2016
1 parent 9f5d9a4 commit 10d0ea4
Show file tree
Hide file tree
Showing 2 changed files with 308 additions and 0 deletions.
124 changes: 124 additions & 0 deletions gcloud/bigtable/happybase/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,17 @@
"""Google Cloud Bigtable HappyBase connection module."""


import datetime
import warnings

import six

from gcloud.bigtable.client import Client
from gcloud.bigtable.column_family import GCRuleIntersection
from gcloud.bigtable.column_family import MaxAgeGCRule
from gcloud.bigtable.column_family import MaxVersionsGCRule
from gcloud.bigtable.happybase.table import Table
from gcloud.bigtable.table import Table as _LowLevelTable


# Constants reproduced here for HappyBase compatibility, though values
Expand Down Expand Up @@ -264,6 +269,77 @@ def tables(self):

return table_names

def create_table(self, name, families):
"""Create a table.
.. warning::
The only column family options from HappyBase that are able to be
used with Cloud Bigtable are ``max_versions`` and ``time_to_live``.
.. note::
This method is **not** atomic. The Cloud Bigtable API separates
the creation of a table from the creation of column families. Thus
this method needs to send 1 request for the table creation and 1
request for each column family. If any of these fails, the method
will fail, but the progress made towards completion cannot be
rolled back.
Values in ``families`` represent column family options. In HappyBase,
these are dictionaries, corresponding to the ``ColumnDescriptor``
structure in the Thrift API. The accepted keys are:
* ``max_versions`` (``int``)
* ``compression`` (``str``)
* ``in_memory`` (``bool``)
* ``bloom_filter_type`` (``str``)
* ``bloom_filter_vector_size`` (``int``)
* ``bloom_filter_nb_hashes`` (``int``)
* ``block_cache_enabled`` (``bool``)
* ``time_to_live`` (``int``)
:type name: str
:param name: The name of the table to be created.
:type families: dict
:param families: Dictionary with column family names as keys and column
family options as the values. The options can be among
* :class:`dict`
* :class:`.GarbageCollectionRule`
:raises: :class:`TypeError <exceptions.TypeError>` if ``families`` is
not a dictionary,
:class:`ValueError <exceptions.ValueError>` if ``families``
has no entries
"""
if not isinstance(families, dict):
raise TypeError('families arg must be a dictionary')

if not families:
raise ValueError('Cannot create table %r (no column '
'families specified)' % (name,))

# Parse all keys before making any API requests.
gc_rule_dict = {}
for column_family_name, option in families.items():
if isinstance(column_family_name, six.binary_type):
column_family_name = column_family_name.decode('utf-8')
if column_family_name.endswith(':'):
column_family_name = column_family_name[:-1]
gc_rule_dict[column_family_name] = _parse_family_option(option)

# Create table instance and then make API calls.
name = self._table_name(name)
low_level_table = _LowLevelTable(name, self._cluster)
low_level_table.create()

for column_family_name, gc_rule in gc_rule_dict.items():
column_family = low_level_table.column_family(
column_family_name, gc_rule=gc_rule)
column_family.create()

def enable_table(self, name):
"""Enable the specified table.
Expand Down Expand Up @@ -311,3 +387,51 @@ def compact_table(self, name, major=False):
"""
raise NotImplementedError('The Cloud Bigtable API does not support '
'compacting a table.')


def _parse_family_option(option):
"""Parses a column family option into a garbage collection rule.
.. note::
If ``option`` is not a dictionary, the type is not checked.
If ``option`` is :data:`None`, there is nothing to do, since this
is the correct output.
:type option: :class:`dict`,
:data:`NoneType <types.NoneType>`,
:class:`.GarbageCollectionRule`
:param option: A column family option passes as a dictionary value in
:meth:`Connection.create_table`.
:rtype: :class:`.GarbageCollectionRule`
:returns: A garbage collection rule parsed from the input.
:raises: :class:`ValueError <exceptions.ValueError>` if ``option`` is a
dictionary but keys other than ``max_versions`` and
``time_to_live`` are used.
"""
result = option
if isinstance(result, dict):
if not set(result.keys()) <= set(['max_versions', 'time_to_live']):
raise ValueError('Cloud Bigtable only supports max_versions and '
'time_to_live column family settings',
'Received', result.keys())

max_num_versions = result.get('max_versions')
max_age = None
if 'time_to_live' in result:
max_age = datetime.timedelta(seconds=result['time_to_live'])

if len(result) == 0:
result = None
elif len(result) == 1:
if max_num_versions is None:
result = MaxAgeGCRule(max_age)
else:
result = MaxVersionsGCRule(max_num_versions)
else: # By our check above we know this means len(result) == 2.
rule1 = MaxAgeGCRule(max_age)
rule2 = MaxVersionsGCRule(max_num_versions)
result = GCRuleIntersection(rules=[rule1, rule2])

return result
184 changes: 184 additions & 0 deletions gcloud/bigtable/happybase/test_connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,90 @@ def test_tables_with_prefix(self):
result = connection.tables()
self.assertEqual(result, [unprefixed_table_name1])

def test_create_table(self):
import operator
from gcloud._testing import _Monkey
from gcloud.bigtable.happybase import connection as MUT

cluster = _Cluster() # Avoid implicit environ check.
connection = self._makeOne(autoconnect=False, cluster=cluster)
mock_gc_rule = object()
called_options = []

def mock_parse_family_option(option):
called_options.append(option)
return mock_gc_rule

name = 'table-name'
col_fam1 = 'cf1'
col_fam_option1 = object()
col_fam2 = u'cf2'
col_fam_option2 = object()
col_fam3 = b'cf3'
col_fam_option3 = object()
families = {
col_fam1: col_fam_option1,
# A trailing colon is also allowed.
col_fam2 + ':': col_fam_option2,
col_fam3 + b':': col_fam_option3,
}

tables_created = []

def make_table(*args, **kwargs):
result = _MockLowLevelTable(*args, **kwargs)
tables_created.append(result)
return result

with _Monkey(MUT, _LowLevelTable=make_table,
_parse_family_option=mock_parse_family_option):
connection.create_table(name, families)

# Just one table would have been created.
table_instance, = tables_created
self.assertEqual(table_instance.args, (name, cluster))
self.assertEqual(table_instance.kwargs, {})
self.assertEqual(table_instance.create_calls, 1)

# Check if our mock was called twice, but we don't know the order.
self.assertEqual(
set(called_options),
set([col_fam_option1, col_fam_option2, col_fam_option3]))

# We expect three column family instances created, but don't know the
# order due to non-deterministic dict.items().
col_fam_created = table_instance.col_fam_created
self.assertEqual(len(col_fam_created), 3)
col_fam_created.sort(key=operator.attrgetter('column_family_id'))
self.assertEqual(col_fam_created[0].column_family_id, col_fam1)
self.assertEqual(col_fam_created[0].gc_rule, mock_gc_rule)
self.assertEqual(col_fam_created[0].create_calls, 1)
self.assertEqual(col_fam_created[1].column_family_id, col_fam2)
self.assertEqual(col_fam_created[1].gc_rule, mock_gc_rule)
self.assertEqual(col_fam_created[1].create_calls, 1)
self.assertEqual(col_fam_created[2].column_family_id,
col_fam3.decode('utf-8'))
self.assertEqual(col_fam_created[2].gc_rule, mock_gc_rule)
self.assertEqual(col_fam_created[2].create_calls, 1)

def test_create_table_bad_type(self):
cluster = _Cluster() # Avoid implicit environ check.
connection = self._makeOne(autoconnect=False, cluster=cluster)

name = 'table-name'
families = None
with self.assertRaises(TypeError):
connection.create_table(name, families)

def test_create_table_bad_value(self):
cluster = _Cluster() # Avoid implicit environ check.
connection = self._makeOne(autoconnect=False, cluster=cluster)

name = 'table-name'
families = {}
with self.assertRaises(ValueError):
connection.create_table(name, families)

def test_enable_table(self):
cluster = _Cluster() # Avoid implicit environ check.
connection = self._makeOne(autoconnect=False, cluster=cluster)
Expand Down Expand Up @@ -345,6 +429,78 @@ def test_compact_table(self):
connection.compact_table(name, major=major)


class Test__parse_family_option(unittest2.TestCase):

def _callFUT(self, option):
from gcloud.bigtable.happybase.connection import _parse_family_option
return _parse_family_option(option)

def test_dictionary_no_keys(self):
option = {}
result = self._callFUT(option)
self.assertEqual(result, None)

def test_null(self):
option = None
result = self._callFUT(option)
self.assertEqual(result, None)

def test_dictionary_bad_key(self):
option = {'badkey': None}
with self.assertRaises(ValueError):
self._callFUT(option)

def test_dictionary_versions_key(self):
from gcloud.bigtable.column_family import MaxVersionsGCRule

versions = 42
option = {'max_versions': versions}
result = self._callFUT(option)

gc_rule = MaxVersionsGCRule(versions)
self.assertEqual(result, gc_rule)

def test_dictionary_ttl_key(self):
import datetime
from gcloud.bigtable.column_family import MaxAgeGCRule

time_to_live = 24 * 60 * 60
max_age = datetime.timedelta(days=1)
option = {'time_to_live': time_to_live}
result = self._callFUT(option)

gc_rule = MaxAgeGCRule(max_age)
self.assertEqual(result, gc_rule)

def test_dictionary_both_keys(self):
import datetime
from gcloud.bigtable.column_family import GCRuleIntersection
from gcloud.bigtable.column_family import MaxAgeGCRule
from gcloud.bigtable.column_family import MaxVersionsGCRule

versions = 42
time_to_live = 24 * 60 * 60
option = {
'max_versions': versions,
'time_to_live': time_to_live,
}
result = self._callFUT(option)

max_age = datetime.timedelta(days=1)
# NOTE: This relies on the order of the rules in the method we are
# calling matching this order here.
gc_rule1 = MaxAgeGCRule(max_age)
gc_rule2 = MaxVersionsGCRule(versions)
gc_rule = GCRuleIntersection(rules=[gc_rule1, gc_rule2])
self.assertEqual(result, gc_rule)

def test_non_dictionary(self):
option = object()
self.assertFalse(isinstance(option, dict))
result = self._callFUT(option)
self.assertEqual(result, option)


class _Client(object):

def __init__(self, *args, **kwargs):
Expand Down Expand Up @@ -385,3 +541,31 @@ def copy(self):

def list_tables(self):
return self.list_tables_result


class _MockLowLevelColumnFamily(object):

def __init__(self, column_family_id, gc_rule=None):
self.column_family_id = column_family_id
self.gc_rule = gc_rule
self.create_calls = 0

def create(self):
self.create_calls += 1


class _MockLowLevelTable(object):

def __init__(self, *args, **kwargs):
self.args = args
self.kwargs = kwargs
self.create_calls = 0
self.col_fam_created = []

def create(self):
self.create_calls += 1

def column_family(self, column_family_id, gc_rule=None):
result = _MockLowLevelColumnFamily(column_family_id, gc_rule=gc_rule)
self.col_fam_created.append(result)
return result

0 comments on commit 10d0ea4

Please sign in to comment.