Skip to content
This repository has been archived by the owner on May 9, 2023. It is now read-only.

Allow the ability to specify custom codecs #11

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion multicodec/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
__version__ = '0.2.1'


from .multicodec import (add_prefix, remove_prefix, get_codec, get_prefix, is_codec, extract_prefix) # noqa: F401
from .multicodec import Multicodec # noqa: F401
175 changes: 94 additions & 81 deletions multicodec/multicodec.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,84 +3,97 @@
from .constants import NAME_TABLE, CODE_TABLE


def extract_prefix(bytes_):
"""
Extracts the prefix from multicodec prefixed data

:param bytes bytes_: multicodec prefixed data
:return: prefix for the prefixed data
:rtype: bytes
:raises ValueError: when incorrect varint is provided
"""
try:
return varint.decode_bytes(bytes_)
except TypeError:
raise ValueError('incorrect varint provided')


def get_prefix(multicodec):
"""
Returns prefix for a given multicodec

:param str multicodec: multicodec codec name
:return: the prefix for the given multicodec
:rtype: byte
:raises ValueError: if an invalid multicodec name is provided
"""
try:
prefix = varint.encode(NAME_TABLE[multicodec])
except KeyError:
raise ValueError('{} multicodec is not supported.'.format(multicodec))
return prefix


def add_prefix(multicodec, bytes_):
"""
Adds multicodec prefix to the given bytes input

:param str multicodec: multicodec to use for prefixing
:param bytes bytes_: data to prefix
:return: prefixed byte data
:rtype: bytes
"""
prefix = get_prefix(multicodec)
return b''.join([prefix, bytes_])


def remove_prefix(bytes_):
"""
Removes prefix from a prefixed data

:param bytes bytes_: multicodec prefixed data bytes
:return: prefix removed data bytes
:rtype: bytes
"""
prefix_int = extract_prefix(bytes_)
prefix = varint.encode(prefix_int)
return bytes_[len(prefix):]


def get_codec(bytes_):
"""
Gets the codec used for prefix the multicodec prefixed data

:param bytes bytes_: multicodec prefixed data bytes
:return: name of the multicodec used to prefix
:rtype: str
"""
prefix = extract_prefix(bytes_)
try:
return CODE_TABLE[prefix]
except KeyError:
raise ValueError('Prefix {} not present in the lookup table'.format(prefix))


def is_codec(name):
"""
Check if the codec is a valid codec or not

:param str name: name of the codec
:return: if the codec is valid or not
:rtype: bool
"""
return name in NAME_TABLE
class Multicodec:
def __init__(self, codecs={}):
NAME_TABLE_ = {name: value['prefix'] for name, value in codecs.items()}
CODE_TABLE_ = {value['prefix']: name for name, value in codecs.items()}
self.__NAME_TABLE = {
**NAME_TABLE,
**NAME_TABLE_
}
self.__CODE_TABLE = {
**CODE_TABLE,
**CODE_TABLE_
}

def extract_prefix(self, bytes_):
"""
Extracts the prefix from multicodec prefixed data

:param bytes bytes_: multicodec prefixed data
:return: prefix for the prefixed data
:rtype: bytes
:raises ValueError: when incorrect varint is provided
"""
try:
return varint.decode_bytes(bytes_)
except TypeError:
raise ValueError('incorrect varint provided')


def get_prefix(self, multicodec):
"""
Returns prefix for a given multicodec

:param str multicodec: multicodec codec name
:return: the prefix for the given multicodec
:rtype: byte
:raises ValueError: if an invalid multicodec name is provided
"""
try:
prefix = varint.encode(self.__NAME_TABLE[multicodec])
except KeyError:
raise ValueError('{} multicodec is not supported.'.format(multicodec))
return prefix


def add_prefix(self, multicodec, bytes_):
"""
Adds multicodec prefix to the given bytes input

:param str multicodec: multicodec to use for prefixing
:param bytes bytes_: data to prefix
:return: prefixed byte data
:rtype: bytes
"""
prefix = self.get_prefix(multicodec)
return b''.join([prefix, bytes_])


def remove_prefix(self, bytes_):
"""
Removes prefix from a prefixed data

:param bytes bytes_: multicodec prefixed data bytes
:return: prefix removed data bytes
:rtype: bytes
"""
prefix_int = self.extract_prefix(bytes_)
prefix = varint.encode(prefix_int)
return bytes_[len(prefix):]


def get_codec(self, bytes_):
"""
Gets the codec used for prefix the multicodec prefixed data

:param bytes bytes_: multicodec prefixed data bytes
:return: name of the multicodec used to prefix
:rtype: str
"""
prefix = self.extract_prefix(bytes_)
try:
return self.__CODE_TABLE[prefix]
except KeyError:
raise ValueError('Prefix {} not present in the lookup table'.format(prefix))


def is_codec(self, name):
"""
Check if the codec is a valid codec or not

:param str name: name of the codec
:return: if the codec is valid or not
:rtype: bool
"""
return name in self.__NAME_TABLE
25 changes: 15 additions & 10 deletions tests/test_multicodec.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import pytest
import varint

from multicodec import add_prefix, remove_prefix, get_codec, extract_prefix, get_prefix, is_codec
from multicodec import Multicodec
from multicodec.constants import CODECS


Expand All @@ -19,37 +19,42 @@

@pytest.mark.parametrize('multicodec,prefix', CODECS.items())
def test_verify_prefix_complete(multicodec, prefix):
mc = Multicodec()
data = b'testbytesbuffer'
prefix_int = prefix['prefix']
prefixed_data = add_prefix(multicodec, data)
prefixed_data = mc.add_prefix(multicodec, data)

assert is_codec(multicodec)
assert get_codec(prefixed_data) == multicodec
assert remove_prefix(prefixed_data) == data
assert extract_prefix(prefixed_data) == prefix_int
assert mc.is_codec(multicodec)
assert mc.get_codec(prefixed_data) == multicodec
assert mc.remove_prefix(prefixed_data) == data
assert mc.extract_prefix(prefixed_data) == prefix_int


@pytest.mark.parametrize('multicodec,_', INVALID_CODECS)
def test_get_prefix_invalid_prefix(multicodec, _):
mc = Multicodec()
with pytest.raises(ValueError) as excinfo:
get_prefix(multicodec)
mc.get_prefix(multicodec)
assert 'multicodec is not supported' in str(excinfo.value)


@pytest.mark.parametrize('_,prefix', INVALID_CODECS)
def test_get_codec_invalid_prefix(_, prefix):
mc = Multicodec()
prefix_bytes = varint.encode(prefix)
with pytest.raises(ValueError) as excinfo:
get_codec(prefix_bytes)
mc.get_codec(prefix_bytes)
assert 'not present in the lookup table' in str(excinfo.value)


@pytest.mark.parametrize('multicodec,_', INVALID_CODECS)
def test_is_codec_invalid_prefix(multicodec, _):
assert not is_codec(multicodec)
mc = Multicodec()
assert not mc.is_codec(multicodec)


def test_extract_prefix_invalid_varint():
mc = Multicodec()
with pytest.raises(ValueError) as excinfo:
extract_prefix(b'\xff')
mc.extract_prefix(b'\xff')
assert 'incorrect varint provided' in str(excinfo.value)