Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove dependence on ethash (pyethash) #2121

Merged
merged 6 commits into from
Sep 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions docs/guides/understanding_the_mining_process.rst
Original file line number Diff line number Diff line change
@@ -1,12 +1,21 @@
Understanding the mining process
================================

.. note::

Proof-of-Work (PoW) mining is no longer used for achieving consensus on Ethereum.
Newer virtual machines, beginning with the ``ParisVM``, assume a
Proof-of-Stake (PoS) consensus mechanism which lies beyond the scope of the
execution layer. This guide is for educational purposes only.


From the :doc:`Cookbook </cookbook/index>` we can already learn how to
use the :class:`~eth.chains.base.Chain` class to create a single
blockchain as a combination of different virtual machines for different spans
of blocks.

In this guide we want to build up on that knowledge and look into the actual mining process.
In this guide we want to build up on that knowledge and look into the actual mining
process that was once important for achieving consensus on mainnet Ethereum.


.. note::
Expand Down Expand Up @@ -134,7 +143,7 @@ briefly go over an example that demonstrates how we can retrieve a matching
Py-EVM currently doesn't offer a stable API for actual PoW mining. The following code is for
demonstration purpose only.

Mining on the main ethereum chain is a competition done simultanously by many miners, hence the
Mining on the main ethereum chain is a competition done simultaneously by many miners, hence the
*mining difficulty* is pretty high which means it will take a very long time to find the right
``nonce`` and ``mix_hash`` on commodity hardware. In order for us to have something that we can
tinker with on a regular laptop, we'll construct a test chain with the ``difficulty`` set to ``1``.
Expand Down
12 changes: 9 additions & 3 deletions eth/_utils/blake2/compression.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import struct
from typing import (
Tuple,
Union,
)

doc = """
Expand Down Expand Up @@ -74,7 +75,7 @@ class Blake2b(Blake2):
def blake2b_compress(
num_rounds: int,
h_starting_state: TMessageBlock,
block: bytes,
block: Union[bytes, TMessageBlock],
t_offset_counters: Tuple[int, int],
final_block_flag: bool,
) -> bytes:
Expand All @@ -101,8 +102,13 @@ def blake2b_compress(
sigma_schedule = Blake2b.sigma_schedule
sigma_schedule_len = len(sigma_schedule)

# convert block (bytes) into 16 LE words
m = struct.unpack_from("<16%s" % Blake2b.WORDFMT, bytes(block))
# convert block (if bytes) into tuple of 16 LE words
# *later versions of blake2b use the tuple form, but older versions use bytes
m = (
block
if isinstance(block, tuple)
else struct.unpack_from("<16%s" % Blake2b.WORDFMT, bytes(block))
)

v = [0] * 16
v[0:8] = h_starting_state
Expand Down
235 changes: 235 additions & 0 deletions eth/consensus/ethash.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,235 @@
"""
This file was heavily inspired by and borrowed from the ethereum.org page on Ethash,
as well as the ``ethereum/execution-specs`` repository implementation of Ethash.
"""
from typing import (
Callable,
Dict,
Sequence,
Tuple,
Union,
)

from Crypto.Hash import (
keccak as pc_keccak,
)
from eth_typing import (
Hash32,
)

WORD_BYTES = 4 # bytes in word
DATASET_BYTES_INIT = 2**30 # bytes in dataset at genesis
DATASET_BYTES_GROWTH = 2**23 # dataset growth per epoch
CACHE_BYTES_INIT = 2**24 # bytes in cache at genesis
CACHE_BYTES_GROWTH = 2**17 # cache growth per epoch
CACHE_MULTIPLIER = 1024 # Size of the DAG relative to the cache
EPOCH_LENGTH = 30000 # blocks per epoch
MIX_BYTES = 128 # width of mix
HASH_BYTES = 64 # hash length in bytes
DATASET_PARENTS = 256 # number of parents of each dataset element
CACHE_ROUNDS = 3 # number of rounds in cache production
ACCESSES = 64 # number of accesses in hashimoto loop

FNV_PRIME = 0x01000193


def fnv(v1: int, v2: int) -> int:
return ((v1 * FNV_PRIME) ^ v2) % 2**32


def encode_int(num: int) -> str:
return hex(num)[2::-1] # strip off '0x', and reverse


def zpad(foo: str, length: int) -> str:
return foo + "\x00" * max(0, length - len(foo))


def keccak_256(seed: bytes) -> bytes:
hasher = pc_keccak.new(data=seed, digest_bits=256)
return hasher.digest()


def keccak_512(seed: bytes) -> bytes:
hasher = pc_keccak.new(data=seed, digest_bits=512)
return hasher.digest()


def get_cache_size(block_number: int) -> int:
sz = CACHE_BYTES_INIT + CACHE_BYTES_GROWTH * (block_number // EPOCH_LENGTH)
sz -= HASH_BYTES
while not isprime(sz // HASH_BYTES):
sz -= 2 * HASH_BYTES
return sz


def get_dataset_full_size(block_number: int) -> int:
sz = DATASET_BYTES_INIT + DATASET_BYTES_GROWTH * (block_number // EPOCH_LENGTH)
sz -= MIX_BYTES
while not isprime(sz / MIX_BYTES):
sz -= 2 * MIX_BYTES
return sz


def isprime(x: Union[int, float]) -> bool:
for i in range(2, int(x**0.5)):
if x % i == 0:
return False
return True


def serialize_hash(h: bytes) -> bytes:
foo = "".join([zpad(encode_int(x), 4) for x in h])
return foo.encode()


def generate_seed_hash(block_number: int) -> bytes:
epoch = block_number // EPOCH_LENGTH
seed = b"\x00" * 32
while epoch != 0:
seed = serialize_hash(keccak_256(seed))
epoch -= 1
return seed


def xor(first_item: bytes, second_item: int) -> bytes:
return bytes([a ^ b for a, b in zip(first_item, bytes(second_item))])


def mkcache(block_number: int) -> Tuple[Tuple[int, ...], ...]:
cache_size = get_cache_size(block_number)
cache_size_words = cache_size // HASH_BYTES

seed = generate_seed_hash(block_number)

# Sequentially produce the initial dataset
cache = [keccak_512(seed)]
previous_cache_item = cache[0]
for _ in range(1, cache_size_words):
cache_item = keccak_512(previous_cache_item)
cache.append(cache_item)
previous_cache_item = cache_item

# Use a low-round version of `RandMemoHash` algorithm
for _ in range(CACHE_ROUNDS):
for i in range(cache_size_words):
first_cache_item = cache[i - 1 + int(cache_size_words) % cache_size_words]
foo = bytes_to_int(cache[i][0:4])
second_cache_item = foo % cache_size_words
result = xor(first_cache_item, second_cache_item)
cache[i] = keccak_512(result)

return tuple(le_bytes_to_uint32_sequence(cache_item) for cache_item in cache)


def int_to_le_bytes(val: int, num_bytes: int = None) -> bytes:
if num_bytes is None:
bit_length = int(val).bit_length()
num_bytes = (bit_length + 7) // 8
return val.to_bytes(num_bytes, "little")


def bytes_to_int(val: bytes) -> int:
return int.from_bytes(val, "little")


def le_bytes_to_uint32_sequence(data: bytes) -> Tuple[int, ...]:
sequence = []
for i in range(0, len(data), 4):
sequence.append(bytes_to_int(data[i : i + 4]))

return tuple(sequence)


def le_uint32_sequence_to_bytes(sequence: Sequence[int]) -> bytes:
result_bytes = b""
for item in sequence:
result_bytes += int_to_le_bytes(item, 4)

return result_bytes


def from_le_bytes(data: bytes) -> int:
return bytes_to_int(data)


def le_uint32_sequence_to_uint(sequence: Sequence[int]) -> int:
sequence_as_bytes = le_uint32_sequence_to_bytes(sequence)
return from_le_bytes(sequence_as_bytes)


def fnv_hash(mix_integers: Tuple[int, ...], data: Tuple[int, ...]) -> Tuple[int, ...]:
return tuple(fnv(mix_integers[i], data[i]) for i in range(len(mix_integers)))


def calc_dataset_item(cache: Tuple[Tuple[int, ...], ...], i: int) -> Tuple[int, ...]:
n = len(cache)
r = HASH_BYTES // WORD_BYTES # 16

mix = keccak_512(
int_to_le_bytes((le_uint32_sequence_to_uint(cache[i % n]) ^ i), HASH_BYTES)
)
mix_integers = le_bytes_to_uint32_sequence(mix)

# fnv it with a lot of random cache nodes based on i
for j in range(DATASET_PARENTS):
cache_index = fnv(i ^ j, mix_integers[j % r])
mix_integers = fnv_hash(mix_integers, cache[cache_index % n])

mix = le_uint32_sequence_to_bytes(mix_integers)
return le_bytes_to_uint32_sequence(keccak_512(mix))


def _hashimoto(
header_hash: bytes,
nonce: bytes,
dataset_size: int,
fetch_dataset_item: Callable[[int], Tuple[int, ...]],
) -> Dict[str, bytes]:
mix_hashes = MIX_BYTES // HASH_BYTES

nonce_le = bytes(reversed(nonce))
seed_hash = keccak_512(header_hash + nonce_le)
seed_head = from_le_bytes(seed_hash[:4])

rows = dataset_size // 128
mix = le_bytes_to_uint32_sequence(seed_hash) * mix_hashes

for i in range(ACCESSES):
new_data: Tuple[int, ...] = ()
parent = fnv(i ^ seed_head, mix[i % len(mix)]) % rows
for j in range(MIX_BYTES // HASH_BYTES):
new_data += fetch_dataset_item(2 * parent + j)

mix = fnv_hash(mix, new_data)

compressed_mix = []
for i in range(0, len(mix), 4):
compressed_mix.append(fnv(fnv(fnv(mix[i], mix[i + 1]), mix[i + 2]), mix[i + 3]))

mix_digest = le_uint32_sequence_to_bytes(compressed_mix)
result = keccak_256(seed_hash + mix_digest)

return {"mix_digest": mix_digest, "result": result}


def hashimoto_light(
full_size: int, cache: Tuple[Tuple[int, ...], ...], header: Hash32, nonce: bytes
) -> Dict[str, bytes]:
return _hashimoto(
header,
nonce,
full_size,
lambda x: calc_dataset_item(cache, x),
)


def hashimoto(
full_size: int, dataset: Tuple[Tuple[int, ...], ...], header: Hash32, nonce: bytes
) -> Dict[str, bytes]:
return _hashimoto(
header,
nonce,
full_size,
lambda x: dataset[x],
)
Loading