Skip to content

Commit

Permalink
python3 support (while keeping python2 support)
Browse files Browse the repository at this point in the history
  • Loading branch information
kalmi committed Dec 20, 2015
1 parent e426027 commit 512ce06
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 111 deletions.
65 changes: 0 additions & 65 deletions base58.py

This file was deleted.

95 changes: 49 additions & 46 deletions pyMultiHash.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,56 +4,59 @@
"""

import hashlib
import base58
from collections import namedtuple

from base58 import b58encode, b58decode

"""
These first two methods are kinda inefficient, but python is not really designed to mess with bytes
"""
def int_to_byte_array(big_int):
array = []
while big_int > 1:
array.append(big_int%256)
big_int/=256
print array
print len(array)
return array
SHA1 = 0x11
SHA256 = 0x12
SHA512 = 0x13

def bytes_to_long(bytes):
return int( ''.join('{:02x}'.format(x) for x in bytes), 16)
hash_functions = {
SHA1: hashlib.sha1,
SHA256: hashlib.sha256,
SHA512: hashlib.sha512,
}

DecodedMultihash = namedtuple('DecodedMultihash', ['code', 'digest'])


def decode(hashstr):
hashbytes = b58decode(hashstr)

"""
the main event!
"""
def parseHash(hashstr):
hashint = base58.decode(hashstr)
hashbytes = int_to_byte_array(hashint)
if len(hashbytes) < 3:
raise Exception("Multihash must be at least 3 bytes")
hash_func_id = hashbytes[0]
hash_length = hashbytes[1]
hash_contents = hashbytes[2:hash_length+2]

return bytes_to_long(hash_contents)

def genHash(bytes,func_id):
hashfunc = None
if func_id == 0x11:
#function is sha1
hashfunc = hashlib.sha1()
elif func_id == 0x12:
#function is sha256
hashfunc = hashlib.sha256()
elif func_id == 0x13:
#function is sha512
hashfunc = hashlib.sha512()
else:
raise Exception("Requested hash is not supported")
hashfunc.update(bytes)
data = hashfunc.digest()
size = hashfunc.digest_size
bytes = [func_id,size]+[ord(x) for x in data]
return base58.encode(bytes_to_long(bytes))

print genHash("foo",0x12)
if len(hashbytes) > 129:
raise Exception("Multihash too long. must be < 129 bytes")

This comment has been minimized.

Copy link
@BrendanBenshoof

BrendanBenshoof Dec 20, 2015

I'd sooner just implement the proper varint than to do this. It defies the standard.

This comment has been minimized.

Copy link
@kalmi

kalmi Dec 20, 2015

Author Owner

I was not aware that the standard mandates a varint. My understanding is that the current standard is this: https://github.com/jbenet/multihash
Actually this is what the go implementation does: https://github.com/jbenet/go-multihash/blob/e8d2374934f16a971d1e94a864514a21ac74bf7f/multihash.go#L132

Can you point me to the actual accepted standard?

This comment has been minimized.

Copy link
@BrendanBenshoof

BrendanBenshoof Dec 22, 2015

See section "on varints" jbenet/random-ideas#1

Right now they are avoiding it for performance reasons but in python that is not generally the optimized value. So I think we should be able to do it right.

This comment has been minimized.

Copy link
@kalmi

kalmi Dec 26, 2015

Author Owner

Yeah, we could, however I am not convinced that they have actually come to a decision about the exact format of the varints. Let's leave this matter for some other time. Doing the same thing as the go implementation is good enough for now imo.


hash_func_id, hash_length = list(bytearray(hashbytes[0:2]))
hash_contents = hashbytes[2:]

if hash_length != len(hashbytes)-2:
raise Exception("Multihash length inconsistent")

return DecodedMultihash(
code=hash_func_id,
digest=hash_contents,
)


def encode(bytes, func_id=SHA256):
try:
hash_func = hash_functions[func_id]
except KeyError:
raise Exception("Requested hash type is not supported")

hasher = hash_func(bytes)
data = hasher.digest()
size = hasher.digest_size

if size > 127:
raise Exception("multihash does not yet support digests longer than 127 bytes")

output = chr(func_id).encode('ascii') + chr(size).encode('ascii') + data
return b58encode(output).encode('latin-1')

if __name__ == "__main__":
print(encode(b"Hash me!") == b"QmepSLzJZG2LpJi9fak5Sgg4nQ2y7MaMGbD54DWyDrrxJt")
print(decode(b"QmepSLzJZG2LpJi9fak5Sgg4nQ2y7MaMGbD54DWyDrrxJt").digest == hashlib.sha256(b"Hash me!").digest())
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
base58==0.2.2

2 comments on commit 512ce06

@BrendanBenshoof
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd prefer to avoid the base58 import and keep the module independent. Base58 is only one of many possible bases to use, and I'd rather abstract out the code we are using already rather than import a bunch of unneeded libraries.

@kalmi
Copy link
Owner Author

@kalmi kalmi commented on 512ce06 Dec 26, 2015

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I will bring back the module.

Please sign in to comment.