Skip to content

Commit

Permalink
Add theil, mpr and tau to metrics (#138)
Browse files Browse the repository at this point in the history
  • Loading branch information
LadyChristina authored Jan 16, 2024
1 parent 0b7e276 commit abbd977
Show file tree
Hide file tree
Showing 8 changed files with 151 additions and 26 deletions.
4 changes: 4 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@ metrics:
gini:
hhi:
nakamoto_coefficient:
theil_index:
max_power_ratio:
tau_index:
threshold: 0.66

default_timeframe:
start_date: 2010-01-01
Expand Down
3 changes: 3 additions & 0 deletions consensus_decentralization/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
from consensus_decentralization.metrics.nakamoto_coefficient import compute_nakamoto_coefficient # noqa: F401
from consensus_decentralization.metrics.entropy import compute_entropy, compute_entropy_percentage # noqa: F401
from consensus_decentralization.metrics.herfindahl_hirschman_index import compute_hhi # noqa: F401
from consensus_decentralization.metrics.theil_index import compute_theil_index # noqa: F401
from consensus_decentralization.metrics.max_power_ratio import compute_max_power_ratio # noqa: F401
from consensus_decentralization.metrics.tau_index import compute_tau_index # noqa: F401


def analyze(projects, aggregated_data_filename, output_dir):
Expand Down
11 changes: 11 additions & 0 deletions consensus_decentralization/metrics/max_power_ratio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
def compute_max_power_ratio(blocks_per_entity):
"""
Calculates the maximum power ratio of a distribution of balances
:param blocks_per_entity: a dictionary with entities and the blocks they have produced
:returns: float that represents the maximum power ratio among all block producers (0 if there weren't any)
"""
if len(blocks_per_entity) == 0:
return 0
max_nblocks = max(blocks_per_entity.values())
total_blocks = sum(blocks_per_entity.values())
return max_nblocks / total_blocks if total_blocks > 0 else 0
19 changes: 5 additions & 14 deletions consensus_decentralization/metrics/nakamoto_coefficient.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,10 @@
from consensus_decentralization.metrics.tau_index import compute_tau_index


def compute_nakamoto_coefficient(blocks_per_entity):
"""
Calculates the Nakamoto coefficient of a distribution of blocks to entities
:param blocks_per_entity: a dictionary with entities and the blocks they have produced
:returns: int that represents the Nakamoto coefficient of the given distribution or None if the data is empty
:returns: int that represents the Nakamoto coefficient of the given distribution, or None if the data is empty
"""
total_blocks = sum(blocks_per_entity.values())
if total_blocks == 0:
return None
nc, power_percentage, top_entities = 0, 0, set()
while power_percentage < 50:
current_max_name = None
for (name, blocks) in blocks_per_entity.items():
if current_max_name is None or (blocks >= blocks_per_entity[current_max_name] and name not in top_entities):
current_max_name = name
nc += 1
power_percentage += 100 * blocks_per_entity[current_max_name] / total_blocks
top_entities.add(current_max_name)
return nc
return compute_tau_index(blocks_per_entity=blocks_per_entity, threshold=0.5)
19 changes: 19 additions & 0 deletions consensus_decentralization/metrics/tau_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
def compute_tau_index(blocks_per_entity, threshold):
"""
Calculates the tau-decentralization index of a distribution of blocks
:param blocks_per_entity: a dictionary with entities and the blocks they have produced
:param threshold: float, the parameter of the tau-decentralization index, i.e. the threshold for the power
ratio that is captured by the index (e.g. 0.66 for 66%)
:returns: int that corresponds to the tau index of the given distribution, or None if there were no blocks
"""
total_blocks = sum(blocks_per_entity.values())
if total_blocks == 0:
return None
tau_index, power_ratio_covered = 0, 0
blocks_per_entity_copy = blocks_per_entity.copy()
while power_ratio_covered < threshold:
current_max_entity = max(blocks_per_entity_copy, key=blocks_per_entity_copy.get)
tau_index += 1
power_ratio_covered += blocks_per_entity_copy[current_max_entity] / total_blocks
del blocks_per_entity_copy[current_max_entity]
return tau_index
21 changes: 21 additions & 0 deletions consensus_decentralization/metrics/theil_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from math import log


def compute_theil_index(blocks_per_entity):
"""
Calculates the Thiel index of a distribution of blocks to entities
:param blocks_per_entity: a dictionary with entities and the blocks they have produced
:returns: float that represents the Thiel index of the given distribution
"""
n = len(blocks_per_entity)
if n == 0:
return 0
total_blocks = sum(blocks_per_entity.values())
mu = total_blocks / n
theil = 0
for nblocks in blocks_per_entity.values():
x = nblocks / mu
if x > 0:
theil += x * log(x)
theil /= n
return theil
32 changes: 21 additions & 11 deletions docs/metrics.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,37 @@ A metric gets the aggregated data (see [Aggregator](aggregator.md)) and outputs
The metrics that have been implemented so far are the following:

1. **Nakamoto coefficient**: The Nakamoto coefficient represents the minimum number of entities that
collectively produce more than 50% of the total blocks within a given timeframe. The output of the metric is an
integer.
collectively produce more than 50% of the total blocks within a given timeframe. The output of the metric is an
integer.
2. **Gini coefficient**: The Gini coefficient represents the degree of inequality in block production. The
output of the metric is a decimal number in [0,1]. Values close to 0 indicate equality (all entities in
the system produce the same number of blocks) and values close to 1 indicate inequality (one entity
produces most or all blocks).
output of the metric is a decimal number in [0,1]. Values close to 0 indicate equality (all entities in
the system produce the same number of blocks) and values close to 1 indicate inequality (one entity
produces most or all blocks).
3. **Entropy**: Entropy represents the expected amount of information in the distribution of blocks across entities.
The output of the metric is a real number. Typically, a higher value of entropy indicates higher decentralization
(lower predictability). Entropy is parameterized by a base rate α, which defines different types of entropy:
The output of the metric is a real number. Typically, a higher value of entropy indicates higher decentralization
(lower predictability). Entropy is parameterized by a base rate α, which defines different types of entropy:
- α = -1: min entropy
- α = 0: Hartley entropy
- α = 1: Shannon entropy (this is used by default)
- α = 2: collision entropy
4. **HHI**: The Herfindahl-Hirschman Index (HHI) is a measure of market concentration. It is defined as the sum of the
squares of the market shares (as whole numbers, e.g. 40 for 40%) of the entities in the system. The output of the metric
is a real number in (0, 10000]. Values close to 0 indicate low concentration (many entities produce a similar number of
blocks) and values close to 1 indicate high concentration (one entity produces most or all blocks).
The U.S. Department of Justice has set the following thresholds for interpreting HHI values (in traditional markets):
squares of the market shares (as whole numbers, e.g. 40 for 40%) of the entities in the system. The output of the
metric is a real number in (0, 10000]. Values close to 0 indicate low concentration (many entities produce a similar
number of blocks) and values close to 1 indicate high concentration (one entity produces most or all blocks).
The U.S. Department of Justice has set the following thresholds for interpreting HHI values (in traditional markets):
- (0, 1500): Competitive market
- [1500, 2500]: Moderately concentrated market
- (2500, 10000]: Highly concentrated market
5. **Theil index**: The Theil index is another measure of entropy which is intended to capture the lack of diversity,
or the redundancy, in a population. In practice, it is calculated as the maximum possible entropy minus the observed
entropy. The output is a real number. Values close to 0 indicate equality and values towards infinity indicate
inequality. Therefore, a high Theil Index suggests a population that is highly centralized.
6. **Max power ratio**: The max power ratio represents the share of blocks that are produced by the most "powerful"
entity, i.e. the entity that produces the most blocks. The output of the metric is a decimal number in [0,1].
7. **Tau-decentralization index**: The tau-decentralization index is a generalization of the Nakamoto coefficient.
It is defined as the minimum number of entities that collectively produce more than a given threshold of the total
blocks within a given timeframe. The threshold parameter is a decimal in [0, 1] (0.66 by default) and the output of
the metric is an integer.

Each metric is implemented in a separate Python script in the folder `metrics`.
Each script defines a function named `compute_<metric_name>`, which takes as input a dictionary of the form
Expand Down
68 changes: 67 additions & 1 deletion tests/test_metrics.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from consensus_decentralization.metrics import entropy, gini, nakamoto_coefficient, herfindahl_hirschman_index
from consensus_decentralization.metrics import (entropy, gini, nakamoto_coefficient, herfindahl_hirschman_index,
theil_index, max_power_ratio, tau_index)
import numpy as np


Expand Down Expand Up @@ -111,3 +112,68 @@ def test_hhi():

hhi5 = herfindahl_hirschman_index.compute_hhi(blocks_per_entity={'a': 0, 'b': 0})
assert hhi5 is None


def test_compute_theil_index():
"""
Ensure that the results of the compute_theil_index function are consistent with online calculators,
such as: http://www.poorcity.richcity.org/calculator/
"""
decimals = 3

theil_t = theil_index.compute_theil_index(blocks_per_entity={'a': 3, 'b': 2, 'c': 1})
assert round(theil_t, decimals) == 0.087

theil_t = theil_index.compute_theil_index(blocks_per_entity={'a': 3, 'b': 2, 'c': 1, 'd': 1, 'e': 1, 'f': 1})
assert round(theil_t, decimals) == 0.115

theil_t = theil_index.compute_theil_index(blocks_per_entity={'a': 432, 'b': 0, 'c': 0, 'd': 0})
assert round(theil_t, decimals) == 1.386

theil_t = theil_index.compute_theil_index(blocks_per_entity={'a': 432})
assert round(theil_t, decimals) == 0

theil_t = theil_index.compute_theil_index(blocks_per_entity={})
assert theil_t == 0


def test_compute_max_power_ratio():
max_mpr = max_power_ratio.compute_max_power_ratio(blocks_per_entity={'a': 3, 'b': 2, 'c': 1})
assert max_mpr == 0.5

max_mpr = max_power_ratio.compute_max_power_ratio(blocks_per_entity={'a': 3, 'b': 2, 'c': 1, 'd': 1, 'e': 1, 'f': 1})
assert max_mpr == 1 / 3

max_mpr = max_power_ratio.compute_max_power_ratio(blocks_per_entity={'a': 1})
assert max_mpr == 1

max_mpr = max_power_ratio.compute_max_power_ratio(blocks_per_entity={'a': 1, 'b': 1, 'c': 1})
assert max_mpr == 1 / 3

max_mpr = max_power_ratio.compute_max_power_ratio(blocks_per_entity={})
assert max_mpr == 0


def test_tau_33():
tau_idx = tau_index.compute_tau_index(blocks_per_entity={'a': 3, 'b': 2, 'c': 1}, threshold=0.33)
assert tau_idx == 1

tau_idx = tau_index.compute_tau_index(blocks_per_entity={'a': 3, 'b': 2, 'c': 1, 'd': 1, 'e': 1, 'f': 1}, threshold=0.33)
assert tau_idx == 1

tau_idx = tau_index.compute_tau_index(blocks_per_entity={'a': 1}, threshold=0.33)
assert tau_idx == 1

tau_idx = tau_index.compute_tau_index(blocks_per_entity={}, threshold=0.33)
assert tau_idx is None


def test_tau_66():
tau_idx = tau_index.compute_tau_index(blocks_per_entity={'a': 3, 'b': 2, 'c': 1}, threshold=0.66)
assert tau_idx == 2

tau_idx = tau_index.compute_tau_index(blocks_per_entity={'a': 3, 'b': 2, 'c': 1, 'd': 1, 'e': 1, 'f': 1}, threshold=0.66)
assert tau_idx == 3

tau_idx = tau_index.compute_tau_index(blocks_per_entity={'a': 1}, threshold=0.66)
assert tau_idx == 1

0 comments on commit abbd977

Please sign in to comment.