From abbd9779192757e50268e463d10011294403a76e Mon Sep 17 00:00:00 2001 From: Christina Ovezik <20790332+LadyChristina@users.noreply.github.com> Date: Tue, 16 Jan 2024 17:25:23 +0000 Subject: [PATCH] Add theil, mpr and tau to metrics (#138) --- config.yaml | 4 ++ consensus_decentralization/analyze.py | 3 + .../metrics/max_power_ratio.py | 11 +++ .../metrics/nakamoto_coefficient.py | 19 ++---- .../metrics/tau_index.py | 19 ++++++ .../metrics/theil_index.py | 21 ++++++ docs/metrics.md | 32 ++++++--- tests/test_metrics.py | 68 ++++++++++++++++++- 8 files changed, 151 insertions(+), 26 deletions(-) create mode 100644 consensus_decentralization/metrics/max_power_ratio.py create mode 100644 consensus_decentralization/metrics/tau_index.py create mode 100644 consensus_decentralization/metrics/theil_index.py diff --git a/config.yaml b/config.yaml index d2d16ed..6b36a4f 100644 --- a/config.yaml +++ b/config.yaml @@ -6,6 +6,10 @@ metrics: gini: hhi: nakamoto_coefficient: + theil_index: + max_power_ratio: + tau_index: + threshold: 0.66 default_timeframe: start_date: 2010-01-01 diff --git a/consensus_decentralization/analyze.py b/consensus_decentralization/analyze.py index 706e1c4..ae62450 100644 --- a/consensus_decentralization/analyze.py +++ b/consensus_decentralization/analyze.py @@ -5,6 +5,9 @@ from consensus_decentralization.metrics.nakamoto_coefficient import compute_nakamoto_coefficient # noqa: F401 from consensus_decentralization.metrics.entropy import compute_entropy, compute_entropy_percentage # noqa: F401 from consensus_decentralization.metrics.herfindahl_hirschman_index import compute_hhi # noqa: F401 +from consensus_decentralization.metrics.theil_index import compute_theil_index # noqa: F401 +from consensus_decentralization.metrics.max_power_ratio import compute_max_power_ratio # noqa: F401 +from consensus_decentralization.metrics.tau_index import compute_tau_index # noqa: F401 def analyze(projects, aggregated_data_filename, output_dir): diff --git a/consensus_decentralization/metrics/max_power_ratio.py b/consensus_decentralization/metrics/max_power_ratio.py new file mode 100644 index 0000000..2259058 --- /dev/null +++ b/consensus_decentralization/metrics/max_power_ratio.py @@ -0,0 +1,11 @@ +def compute_max_power_ratio(blocks_per_entity): + """ + Calculates the maximum power ratio of a distribution of balances + :param blocks_per_entity: a dictionary with entities and the blocks they have produced + :returns: float that represents the maximum power ratio among all block producers (0 if there weren't any) + """ + if len(blocks_per_entity) == 0: + return 0 + max_nblocks = max(blocks_per_entity.values()) + total_blocks = sum(blocks_per_entity.values()) + return max_nblocks / total_blocks if total_blocks > 0 else 0 diff --git a/consensus_decentralization/metrics/nakamoto_coefficient.py b/consensus_decentralization/metrics/nakamoto_coefficient.py index 4b4328e..ef4b15c 100644 --- a/consensus_decentralization/metrics/nakamoto_coefficient.py +++ b/consensus_decentralization/metrics/nakamoto_coefficient.py @@ -1,19 +1,10 @@ +from consensus_decentralization.metrics.tau_index import compute_tau_index + + def compute_nakamoto_coefficient(blocks_per_entity): """ Calculates the Nakamoto coefficient of a distribution of blocks to entities :param blocks_per_entity: a dictionary with entities and the blocks they have produced - :returns: int that represents the Nakamoto coefficient of the given distribution or None if the data is empty + :returns: int that represents the Nakamoto coefficient of the given distribution, or None if the data is empty """ - total_blocks = sum(blocks_per_entity.values()) - if total_blocks == 0: - return None - nc, power_percentage, top_entities = 0, 0, set() - while power_percentage < 50: - current_max_name = None - for (name, blocks) in blocks_per_entity.items(): - if current_max_name is None or (blocks >= blocks_per_entity[current_max_name] and name not in top_entities): - current_max_name = name - nc += 1 - power_percentage += 100 * blocks_per_entity[current_max_name] / total_blocks - top_entities.add(current_max_name) - return nc + return compute_tau_index(blocks_per_entity=blocks_per_entity, threshold=0.5) diff --git a/consensus_decentralization/metrics/tau_index.py b/consensus_decentralization/metrics/tau_index.py new file mode 100644 index 0000000..50fbe89 --- /dev/null +++ b/consensus_decentralization/metrics/tau_index.py @@ -0,0 +1,19 @@ +def compute_tau_index(blocks_per_entity, threshold): + """ + Calculates the tau-decentralization index of a distribution of blocks + :param blocks_per_entity: a dictionary with entities and the blocks they have produced + :param threshold: float, the parameter of the tau-decentralization index, i.e. the threshold for the power + ratio that is captured by the index (e.g. 0.66 for 66%) + :returns: int that corresponds to the tau index of the given distribution, or None if there were no blocks + """ + total_blocks = sum(blocks_per_entity.values()) + if total_blocks == 0: + return None + tau_index, power_ratio_covered = 0, 0 + blocks_per_entity_copy = blocks_per_entity.copy() + while power_ratio_covered < threshold: + current_max_entity = max(blocks_per_entity_copy, key=blocks_per_entity_copy.get) + tau_index += 1 + power_ratio_covered += blocks_per_entity_copy[current_max_entity] / total_blocks + del blocks_per_entity_copy[current_max_entity] + return tau_index diff --git a/consensus_decentralization/metrics/theil_index.py b/consensus_decentralization/metrics/theil_index.py new file mode 100644 index 0000000..687834c --- /dev/null +++ b/consensus_decentralization/metrics/theil_index.py @@ -0,0 +1,21 @@ +from math import log + + +def compute_theil_index(blocks_per_entity): + """ + Calculates the Thiel index of a distribution of blocks to entities + :param blocks_per_entity: a dictionary with entities and the blocks they have produced + :returns: float that represents the Thiel index of the given distribution + """ + n = len(blocks_per_entity) + if n == 0: + return 0 + total_blocks = sum(blocks_per_entity.values()) + mu = total_blocks / n + theil = 0 + for nblocks in blocks_per_entity.values(): + x = nblocks / mu + if x > 0: + theil += x * log(x) + theil /= n + return theil diff --git a/docs/metrics.md b/docs/metrics.md index dd8a16d..25f29d7 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -4,27 +4,37 @@ A metric gets the aggregated data (see [Aggregator](aggregator.md)) and outputs The metrics that have been implemented so far are the following: 1. **Nakamoto coefficient**: The Nakamoto coefficient represents the minimum number of entities that -collectively produce more than 50% of the total blocks within a given timeframe. The output of the metric is an -integer. + collectively produce more than 50% of the total blocks within a given timeframe. The output of the metric is an + integer. 2. **Gini coefficient**: The Gini coefficient represents the degree of inequality in block production. The -output of the metric is a decimal number in [0,1]. Values close to 0 indicate equality (all entities in -the system produce the same number of blocks) and values close to 1 indicate inequality (one entity -produces most or all blocks). + output of the metric is a decimal number in [0,1]. Values close to 0 indicate equality (all entities in + the system produce the same number of blocks) and values close to 1 indicate inequality (one entity + produces most or all blocks). 3. **Entropy**: Entropy represents the expected amount of information in the distribution of blocks across entities. -The output of the metric is a real number. Typically, a higher value of entropy indicates higher decentralization -(lower predictability). Entropy is parameterized by a base rate α, which defines different types of entropy: + The output of the metric is a real number. Typically, a higher value of entropy indicates higher decentralization + (lower predictability). Entropy is parameterized by a base rate α, which defines different types of entropy: - α = -1: min entropy - α = 0: Hartley entropy - α = 1: Shannon entropy (this is used by default) - α = 2: collision entropy 4. **HHI**: The Herfindahl-Hirschman Index (HHI) is a measure of market concentration. It is defined as the sum of the -squares of the market shares (as whole numbers, e.g. 40 for 40%) of the entities in the system. The output of the metric -is a real number in (0, 10000]. Values close to 0 indicate low concentration (many entities produce a similar number of -blocks) and values close to 1 indicate high concentration (one entity produces most or all blocks). -The U.S. Department of Justice has set the following thresholds for interpreting HHI values (in traditional markets): + squares of the market shares (as whole numbers, e.g. 40 for 40%) of the entities in the system. The output of the + metric is a real number in (0, 10000]. Values close to 0 indicate low concentration (many entities produce a similar + number of blocks) and values close to 1 indicate high concentration (one entity produces most or all blocks). + The U.S. Department of Justice has set the following thresholds for interpreting HHI values (in traditional markets): - (0, 1500): Competitive market - [1500, 2500]: Moderately concentrated market - (2500, 10000]: Highly concentrated market +5. **Theil index**: The Theil index is another measure of entropy which is intended to capture the lack of diversity, + or the redundancy, in a population. In practice, it is calculated as the maximum possible entropy minus the observed + entropy. The output is a real number. Values close to 0 indicate equality and values towards infinity indicate + inequality. Therefore, a high Theil Index suggests a population that is highly centralized. +6. **Max power ratio**: The max power ratio represents the share of blocks that are produced by the most "powerful" + entity, i.e. the entity that produces the most blocks. The output of the metric is a decimal number in [0,1]. +7. **Tau-decentralization index**: The tau-decentralization index is a generalization of the Nakamoto coefficient. + It is defined as the minimum number of entities that collectively produce more than a given threshold of the total + blocks within a given timeframe. The threshold parameter is a decimal in [0, 1] (0.66 by default) and the output of + the metric is an integer. Each metric is implemented in a separate Python script in the folder `metrics`. Each script defines a function named `compute_`, which takes as input a dictionary of the form diff --git a/tests/test_metrics.py b/tests/test_metrics.py index dca1eae..546180d 100644 --- a/tests/test_metrics.py +++ b/tests/test_metrics.py @@ -1,4 +1,5 @@ -from consensus_decentralization.metrics import entropy, gini, nakamoto_coefficient, herfindahl_hirschman_index +from consensus_decentralization.metrics import (entropy, gini, nakamoto_coefficient, herfindahl_hirschman_index, + theil_index, max_power_ratio, tau_index) import numpy as np @@ -111,3 +112,68 @@ def test_hhi(): hhi5 = herfindahl_hirschman_index.compute_hhi(blocks_per_entity={'a': 0, 'b': 0}) assert hhi5 is None + + +def test_compute_theil_index(): + """ + Ensure that the results of the compute_theil_index function are consistent with online calculators, + such as: http://www.poorcity.richcity.org/calculator/ + """ + decimals = 3 + + theil_t = theil_index.compute_theil_index(blocks_per_entity={'a': 3, 'b': 2, 'c': 1}) + assert round(theil_t, decimals) == 0.087 + + theil_t = theil_index.compute_theil_index(blocks_per_entity={'a': 3, 'b': 2, 'c': 1, 'd': 1, 'e': 1, 'f': 1}) + assert round(theil_t, decimals) == 0.115 + + theil_t = theil_index.compute_theil_index(blocks_per_entity={'a': 432, 'b': 0, 'c': 0, 'd': 0}) + assert round(theil_t, decimals) == 1.386 + + theil_t = theil_index.compute_theil_index(blocks_per_entity={'a': 432}) + assert round(theil_t, decimals) == 0 + + theil_t = theil_index.compute_theil_index(blocks_per_entity={}) + assert theil_t == 0 + + +def test_compute_max_power_ratio(): + max_mpr = max_power_ratio.compute_max_power_ratio(blocks_per_entity={'a': 3, 'b': 2, 'c': 1}) + assert max_mpr == 0.5 + + max_mpr = max_power_ratio.compute_max_power_ratio(blocks_per_entity={'a': 3, 'b': 2, 'c': 1, 'd': 1, 'e': 1, 'f': 1}) + assert max_mpr == 1 / 3 + + max_mpr = max_power_ratio.compute_max_power_ratio(blocks_per_entity={'a': 1}) + assert max_mpr == 1 + + max_mpr = max_power_ratio.compute_max_power_ratio(blocks_per_entity={'a': 1, 'b': 1, 'c': 1}) + assert max_mpr == 1 / 3 + + max_mpr = max_power_ratio.compute_max_power_ratio(blocks_per_entity={}) + assert max_mpr == 0 + + +def test_tau_33(): + tau_idx = tau_index.compute_tau_index(blocks_per_entity={'a': 3, 'b': 2, 'c': 1}, threshold=0.33) + assert tau_idx == 1 + + tau_idx = tau_index.compute_tau_index(blocks_per_entity={'a': 3, 'b': 2, 'c': 1, 'd': 1, 'e': 1, 'f': 1}, threshold=0.33) + assert tau_idx == 1 + + tau_idx = tau_index.compute_tau_index(blocks_per_entity={'a': 1}, threshold=0.33) + assert tau_idx == 1 + + tau_idx = tau_index.compute_tau_index(blocks_per_entity={}, threshold=0.33) + assert tau_idx is None + + +def test_tau_66(): + tau_idx = tau_index.compute_tau_index(blocks_per_entity={'a': 3, 'b': 2, 'c': 1}, threshold=0.66) + assert tau_idx == 2 + + tau_idx = tau_index.compute_tau_index(blocks_per_entity={'a': 3, 'b': 2, 'c': 1, 'd': 1, 'e': 1, 'f': 1}, threshold=0.66) + assert tau_idx == 3 + + tau_idx = tau_index.compute_tau_index(blocks_per_entity={'a': 1}, threshold=0.66) + assert tau_idx == 1