From a76d5c907cc998f49031e7ae7277e950f6d7ba0a Mon Sep 17 00:00:00 2001 From: Roger Qiu Date: Mon, 9 Nov 2020 16:01:59 +1100 Subject: [PATCH] Allow the ability to specify custom codecs --- multicodec/__init__.py | 2 +- multicodec/multicodec.py | 175 +++++++++++++++++++++------------------ tests/test_multicodec.py | 25 +++--- 3 files changed, 110 insertions(+), 92 deletions(-) diff --git a/multicodec/__init__.py b/multicodec/__init__.py index 1c56ca1..fc9d905 100644 --- a/multicodec/__init__.py +++ b/multicodec/__init__.py @@ -7,4 +7,4 @@ __version__ = '0.2.1' -from .multicodec import (add_prefix, remove_prefix, get_codec, get_prefix, is_codec, extract_prefix) # noqa: F401 +from .multicodec import Multicodec # noqa: F401 diff --git a/multicodec/multicodec.py b/multicodec/multicodec.py index 066766b..a41c48a 100644 --- a/multicodec/multicodec.py +++ b/multicodec/multicodec.py @@ -3,84 +3,97 @@ from .constants import NAME_TABLE, CODE_TABLE -def extract_prefix(bytes_): - """ - Extracts the prefix from multicodec prefixed data - - :param bytes bytes_: multicodec prefixed data - :return: prefix for the prefixed data - :rtype: bytes - :raises ValueError: when incorrect varint is provided - """ - try: - return varint.decode_bytes(bytes_) - except TypeError: - raise ValueError('incorrect varint provided') - - -def get_prefix(multicodec): - """ - Returns prefix for a given multicodec - - :param str multicodec: multicodec codec name - :return: the prefix for the given multicodec - :rtype: byte - :raises ValueError: if an invalid multicodec name is provided - """ - try: - prefix = varint.encode(NAME_TABLE[multicodec]) - except KeyError: - raise ValueError('{} multicodec is not supported.'.format(multicodec)) - return prefix - - -def add_prefix(multicodec, bytes_): - """ - Adds multicodec prefix to the given bytes input - - :param str multicodec: multicodec to use for prefixing - :param bytes bytes_: data to prefix - :return: prefixed byte data - :rtype: bytes - """ - prefix = get_prefix(multicodec) - return b''.join([prefix, bytes_]) - - -def remove_prefix(bytes_): - """ - Removes prefix from a prefixed data - - :param bytes bytes_: multicodec prefixed data bytes - :return: prefix removed data bytes - :rtype: bytes - """ - prefix_int = extract_prefix(bytes_) - prefix = varint.encode(prefix_int) - return bytes_[len(prefix):] - - -def get_codec(bytes_): - """ - Gets the codec used for prefix the multicodec prefixed data - - :param bytes bytes_: multicodec prefixed data bytes - :return: name of the multicodec used to prefix - :rtype: str - """ - prefix = extract_prefix(bytes_) - try: - return CODE_TABLE[prefix] - except KeyError: - raise ValueError('Prefix {} not present in the lookup table'.format(prefix)) - - -def is_codec(name): - """ - Check if the codec is a valid codec or not - - :param str name: name of the codec - :return: if the codec is valid or not - :rtype: bool - """ - return name in NAME_TABLE +class Multicodec: + def __init__(self, codecs={}): + NAME_TABLE_ = {name: value['prefix'] for name, value in codecs.items()} + CODE_TABLE_ = {value['prefix']: name for name, value in codecs.items()} + self.__NAME_TABLE = { + **NAME_TABLE, + **NAME_TABLE_ + } + self.__CODE_TABLE = { + **CODE_TABLE, + **CODE_TABLE_ + } + + def extract_prefix(self, bytes_): + """ + Extracts the prefix from multicodec prefixed data + + :param bytes bytes_: multicodec prefixed data + :return: prefix for the prefixed data + :rtype: bytes + :raises ValueError: when incorrect varint is provided + """ + try: + return varint.decode_bytes(bytes_) + except TypeError: + raise ValueError('incorrect varint provided') + + + def get_prefix(self, multicodec): + """ + Returns prefix for a given multicodec + + :param str multicodec: multicodec codec name + :return: the prefix for the given multicodec + :rtype: byte + :raises ValueError: if an invalid multicodec name is provided + """ + try: + prefix = varint.encode(self.__NAME_TABLE[multicodec]) + except KeyError: + raise ValueError('{} multicodec is not supported.'.format(multicodec)) + return prefix + + + def add_prefix(self, multicodec, bytes_): + """ + Adds multicodec prefix to the given bytes input + + :param str multicodec: multicodec to use for prefixing + :param bytes bytes_: data to prefix + :return: prefixed byte data + :rtype: bytes + """ + prefix = self.get_prefix(multicodec) + return b''.join([prefix, bytes_]) + + + def remove_prefix(self, bytes_): + """ + Removes prefix from a prefixed data + + :param bytes bytes_: multicodec prefixed data bytes + :return: prefix removed data bytes + :rtype: bytes + """ + prefix_int = self.extract_prefix(bytes_) + prefix = varint.encode(prefix_int) + return bytes_[len(prefix):] + + + def get_codec(self, bytes_): + """ + Gets the codec used for prefix the multicodec prefixed data + + :param bytes bytes_: multicodec prefixed data bytes + :return: name of the multicodec used to prefix + :rtype: str + """ + prefix = self.extract_prefix(bytes_) + try: + return self.__CODE_TABLE[prefix] + except KeyError: + raise ValueError('Prefix {} not present in the lookup table'.format(prefix)) + + + def is_codec(self, name): + """ + Check if the codec is a valid codec or not + + :param str name: name of the codec + :return: if the codec is valid or not + :rtype: bool + """ + return name in self.__NAME_TABLE diff --git a/tests/test_multicodec.py b/tests/test_multicodec.py index 4628a2c..2dab289 100644 --- a/tests/test_multicodec.py +++ b/tests/test_multicodec.py @@ -6,7 +6,7 @@ import pytest import varint -from multicodec import add_prefix, remove_prefix, get_codec, extract_prefix, get_prefix, is_codec +from multicodec import Multicodec from multicodec.constants import CODECS @@ -19,37 +19,42 @@ @pytest.mark.parametrize('multicodec,prefix', CODECS.items()) def test_verify_prefix_complete(multicodec, prefix): + mc = Multicodec() data = b'testbytesbuffer' prefix_int = prefix['prefix'] - prefixed_data = add_prefix(multicodec, data) + prefixed_data = mc.add_prefix(multicodec, data) - assert is_codec(multicodec) - assert get_codec(prefixed_data) == multicodec - assert remove_prefix(prefixed_data) == data - assert extract_prefix(prefixed_data) == prefix_int + assert mc.is_codec(multicodec) + assert mc.get_codec(prefixed_data) == multicodec + assert mc.remove_prefix(prefixed_data) == data + assert mc.extract_prefix(prefixed_data) == prefix_int @pytest.mark.parametrize('multicodec,_', INVALID_CODECS) def test_get_prefix_invalid_prefix(multicodec, _): + mc = Multicodec() with pytest.raises(ValueError) as excinfo: - get_prefix(multicodec) + mc.get_prefix(multicodec) assert 'multicodec is not supported' in str(excinfo.value) @pytest.mark.parametrize('_,prefix', INVALID_CODECS) def test_get_codec_invalid_prefix(_, prefix): + mc = Multicodec() prefix_bytes = varint.encode(prefix) with pytest.raises(ValueError) as excinfo: - get_codec(prefix_bytes) + mc.get_codec(prefix_bytes) assert 'not present in the lookup table' in str(excinfo.value) @pytest.mark.parametrize('multicodec,_', INVALID_CODECS) def test_is_codec_invalid_prefix(multicodec, _): - assert not is_codec(multicodec) + mc = Multicodec() + assert not mc.is_codec(multicodec) def test_extract_prefix_invalid_varint(): + mc = Multicodec() with pytest.raises(ValueError) as excinfo: - extract_prefix(b'\xff') + mc.extract_prefix(b'\xff') assert 'incorrect varint provided' in str(excinfo.value)