From 92cb6c7f7a6f72ee529f11d08c26bd99f2f27345 Mon Sep 17 00:00:00 2001 From: Michael Karg Date: Wed, 13 Mar 2024 11:33:59 +0100 Subject: [PATCH 01/15] wb | prof: new profile faststartup-24M --- Makefile | 2 +- nix/workbench/profile/prof1-variants.jq | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 1dd249e5beb..8e91ef0582f 100644 --- a/Makefile +++ b/Makefile @@ -75,7 +75,7 @@ ps: ## Plain-text list of profiles ## Profile-based cluster shells (autogenerated targets) ## PROFILES_BASE := default default-p2p plutus plutus-secp-ecdsa plutus-secp-schnorr oldtracing idle tracer-only -PROFILES_FAST := fast fast-p2p fast-plutus fast-notracer fast-oldtracing +PROFILES_FAST := fast fast-p2p fast-plutus fast-notracer fast-oldtracing faststartup-24M PROFILES_CI_TEST := ci-test ci-test-p2p ci-test-plutus ci-test-notracer ci-test-rtview ci-test-dense10 PROFILES_CI_BENCH := ci-bench ci-bench-p2p ci-bench-plutus ci-bench-plutus-secp-ecdsa ci-bench-plutus-secp-schnorr ci-bench-notracer ci-bench-rtview PROFILES_TRACE_BENCH := trace-bench trace-bench-notracer trace-bench-oldtracing trace-bench-rtview diff --git a/nix/workbench/profile/prof1-variants.jq b/nix/workbench/profile/prof1-variants.jq index 97632c82c72..fac5a1acd2a 100644 --- a/nix/workbench/profile/prof1-variants.jq +++ b/nix/workbench/profile/prof1-variants.jq @@ -52,6 +52,12 @@ def all_profile_variants: , delegators: (0.2 * $M) } } as $dataset_small + | + { genesis: + { utxo: (24 * $M) + , delegators: (1.2 * $M) + } + } as $dataset_24m | { genesis: { utxo: (30 * $M) @@ -798,6 +804,11 @@ def all_profile_variants: { name: "fast-oldtracing" } + ## Fast variants: single node with large, varying dataset sizes + , $fast_base * $solo * $dataset_24m * + { name: "faststartup-24M" + } + ## CI variants: test duration, 3 blocks , $citest_base * { name: "ci-test" From fd34093a52234ec483ebc25b841802a4b2593bc1 Mon Sep 17 00:00:00 2001 From: Michael Karg Date: Mon, 18 Mar 2024 11:54:44 +0100 Subject: [PATCH 02/15] wb | profiles: new utxoscale-solo profile family --- Makefile | 3 ++ nix/workbench/profile/prof1-variants.jq | 63 ++++++++++++++++++++++++- nix/workbench/profile/prof3-derived.jq | 12 ++++- 3 files changed, 75 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 8e91ef0582f..09b1b52343e 100644 --- a/Makefile +++ b/Makefile @@ -101,6 +101,8 @@ PROFILES_NOMAD_PERF += plutus-nomadperf fast-nomadperf latency-nomadperf PROFILES_NOMAD_PERF_NOP2P := default-nomadperf-nop2p oldtracing-nomadperf-nop2p ci-test-nomadperf-nop2p ci-bench-nomadperf-nop2p PROFILES_NOMAD_PERF_NOP2P += value-nomadperf-nop2p value-oldtracing-nomadperf-nop2p plutus-nomadperf-nop2p fast-nomadperf-nop2p PROFILES_NOMAD_PERFSSD := fast-nomadperfssd +# single node profiles on the NomadSSD cluster on AWS +PROFILES_UTXOSCALE_SOLO := utxoscale-solo-24M64G-nomadperfssd utxoscale-solo-12M64G-nomadperfssd utxoscale-solo-12M16G-nomadperfssd LOCAL_PROFILES += $(PROFILES_BASE) LOCAL_PROFILES += $(PROFILES_FAST) @@ -120,6 +122,7 @@ LOCAL_PROFILES += $(PROFILES_VENDOR) CLOUD_PROFILES += $(PROFILES_NOMAD_PERF) CLOUD_PROFILES += $(PROFILES_NOMAD_PERF_NOP2P) CLOUD_PROFILES += $(PROFILES_NOMAD_PERFSSD) +CLOUD_PROFILES += $(PROFILES_UTXOSCALE_SOLO) ## Note: to enable a shell for a profile, just add its name (one of names from 'make ps') to SHELL_PROFILES diff --git a/nix/workbench/profile/prof1-variants.jq b/nix/workbench/profile/prof1-variants.jq index fac5a1acd2a..facc89f45b6 100644 --- a/nix/workbench/profile/prof1-variants.jq +++ b/nix/workbench/profile/prof1-variants.jq @@ -1,5 +1,18 @@ import "epoch-timeline" as timeline; +## For the Nomad perf-ssd cluster, we might want to artificially +## cap the large RAM resources the instances provide. +def nomad_memory_limit($limit): + { nomad: + { resources: + { producer: + { memory: $limit + , memory_max: $limit + } + } + } + }; + def all_profile_variants: 1024 as $Ki | 1000000 as $M @@ -281,8 +294,8 @@ def all_profile_variants: } as $compressed_timescale | { genesis: - { epoch_length: 1800 - , parameter_k: 9 + { epoch_length: 1200 + , parameter_k: 6 } } as $small_timescale | @@ -547,6 +560,10 @@ def all_profile_variants: ($model_timescale * $nomad_perf_tps_saturation_value * { scenario: "fixed-loaded" }) as $scenario_nomad_perf + | + ($small_timescale * $nomad_perf_tps_saturation_value * + { scenario: "fixed-loaded" + }) as $scenario_nomad_perfssd_solo | ($model_timescale * $model_tps_saturation_value * { scenario: "fixed-loaded" @@ -636,6 +653,23 @@ def all_profile_variants: ($scenario_latency * $compose_fiftytwo * $dataset_empty * $no_filtering * { desc: "AWS c5-2xlarge cluster, stop when all latency services stop" }) as $nomad_perf_latency_base + | + ($scenario_nomad_perfssd_solo * $solo * $dataset_24m * + { node: + { shutdown_on_slot_synced: 7200 + } + , analysis: + { filters: ["epoch3+", "size-full"] + } + , generator: + { epochs: 6 + } + , genesis: + { funds_balance: 20000000000000 + , max_block_size: 88000 + } + , desc: "AWS c5[d]-9xlarge utxoscale dataset, 6 epochs" + }) as $nomad_perfssd_solo_base | ($scenario_model * $quadruplet * $dataset_current * $for_7ep * { node: @@ -715,6 +749,13 @@ def all_profile_variants: ### Actual profiles ## + ### Profile templates + ### + # UTxO scaling on a single node, mainnet blocksize, ~2h runtime (6 epochs) - default: 24mio UTxO, 64GB RAM cap + ($nomad_perfssd_solo_base * $nomad_perfssd_unicircle * $costmodel_v8_preview * $p2p + ) as $utxoscale_solo_template + | + ### First, auto-named profiles: ### ## Short slots: @@ -970,6 +1011,24 @@ def all_profile_variants: { name: "fast-nomadperfssd" } +## P&T NomadSSD cluster: UTxO scale benchmarks on a single node + , $utxoscale_solo_template * + { name: "utxoscale-solo-24M64G-nomadperfssd" + } + , $utxoscale_solo_template * + { name: "utxoscale-solo-12M64G-nomadperfssd" + , genesis: + { utxo: (12 * $M) + } + } + , $utxoscale_solo_template * + { name: "utxoscale-solo-12M16G-nomadperfssd" + , genesis: + { utxo: (12 * $M) + } + , cluster: nomad_memory_limit(16384) + } + ## Model value variant: 7 epochs (128GB RAM needed; 16GB for testing locally) , $model_base * $costmodel_v8_preview * { name: "model-value" diff --git a/nix/workbench/profile/prof3-derived.jq b/nix/workbench/profile/prof3-derived.jq index dfd1894c94b..6588d96777f 100644 --- a/nix/workbench/profile/prof3-derived.jq +++ b/nix/workbench/profile/prof3-derived.jq @@ -85,6 +85,16 @@ def add_derived_params: | .era as $era | .node as $node | .genesis.shelley.protocolParams as $pparams +| .cluster.nomad as $nomad + +## The perf-ssd machines have abundant physical RAM, and Nomad uses cgroups to constrain resources. +## To also influence RTS / GC behaviour, -M needs to be used, as the RTS infers a heap limit from +## the system's ulimit, not the cgroup limit. +| $node.rts_flags_override as $rtsflags +| (if $nomad.class == "perf-ssd" + then $rtsflags + [("-M" + ($nomad.resources.producer.memory_max | tostring) + "M")] + else $rtsflags + end) as $rtsflags_derived ## Absolute durations: | ($gsis.epoch_length * $gsis.slot_duration) as $epoch_duration @@ -191,7 +201,7 @@ def add_derived_params: { tx_count: $generator_tx_count } , node: - { + { rts_flags_override: $rtsflags_derived } , analysis: { minimum_chain_density: ($gsis.active_slots_coeff * 0.5) From 4c5f43f4d799e6b628ed10eff333ef59e384412a Mon Sep 17 00:00:00 2001 From: Michael Karg Date: Tue, 19 Mar 2024 13:52:42 +0100 Subject: [PATCH 03/15] wb: separate node heap limit from nomad values --- nix/workbench/profile/prof0-defaults.jq | 1 + nix/workbench/profile/prof1-variants.jq | 4 +++- nix/workbench/profile/prof3-derived.jq | 6 +++--- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/nix/workbench/profile/prof0-defaults.jq b/nix/workbench/profile/prof0-defaults.jq index e233d45bccf..f4a37017604 100644 --- a/nix/workbench/profile/prof0-defaults.jq +++ b/nix/workbench/profile/prof0-defaults.jq @@ -61,6 +61,7 @@ def era_defaults($era): , node: { rts_flags_override: [] + , heap_limit: null ## optional: heap limit in MB (translates to RTS flag -M) , shutdown_on_slot_synced: null , shutdown_on_block_synced: null , tracing_backend: "trace-dispatcher" ## or "iohk-monitoring" diff --git a/nix/workbench/profile/prof1-variants.jq b/nix/workbench/profile/prof1-variants.jq index facc89f45b6..bc157c4bf4e 100644 --- a/nix/workbench/profile/prof1-variants.jq +++ b/nix/workbench/profile/prof1-variants.jq @@ -1026,7 +1026,9 @@ def all_profile_variants: , genesis: { utxo: (12 * $M) } - , cluster: nomad_memory_limit(16384) + , node: + { heap_limit: 16384 + } } ## Model value variant: 7 epochs (128GB RAM needed; 16GB for testing locally) diff --git a/nix/workbench/profile/prof3-derived.jq b/nix/workbench/profile/prof3-derived.jq index 6588d96777f..5a5e4b5ba13 100644 --- a/nix/workbench/profile/prof3-derived.jq +++ b/nix/workbench/profile/prof3-derived.jq @@ -85,14 +85,14 @@ def add_derived_params: | .era as $era | .node as $node | .genesis.shelley.protocolParams as $pparams -| .cluster.nomad as $nomad ## The perf-ssd machines have abundant physical RAM, and Nomad uses cgroups to constrain resources. ## To also influence RTS / GC behaviour, -M needs to be used, as the RTS infers a heap limit from ## the system's ulimit, not the cgroup limit. | $node.rts_flags_override as $rtsflags -| (if $nomad.class == "perf-ssd" - then $rtsflags + [("-M" + ($nomad.resources.producer.memory_max | tostring) + "M")] +| $node.heap_limit as $heap_limit +| (if $heap_limit | type == "number" + then $rtsflags + [("-M" + ($heap_limit | tostring) + "m")] else $rtsflags end) as $rtsflags_derived From bbaca2975555c93b9d77730e46a5170df47d55ac Mon Sep 17 00:00:00 2001 From: Michael Karg Date: Wed, 20 Mar 2024 12:51:48 +0100 Subject: [PATCH 04/15] wb | nomad: adjust to new deployment --- nix/workbench/profile/prof1-variants.jq | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nix/workbench/profile/prof1-variants.jq b/nix/workbench/profile/prof1-variants.jq index bc157c4bf4e..c9df5dfecec 100644 --- a/nix/workbench/profile/prof1-variants.jq +++ b/nix/workbench/profile/prof1-variants.jq @@ -233,14 +233,14 @@ def all_profile_variants: { namespace: "perf-ssd" , class: "perf-ssd" , resources: - { producer: {cores: 32, memory: 64000, memory_max: 64000} - , explorer: {cores: 32, memory: 64000, memory_max: 64000} + { producer: {cores: 16, memory: 128000, memory_max: 128000} + , explorer: {cores: 16, memory: 128000, memory_max: 128000} } , fetch_logs_ssh: true } , aws: { instance_type: - { producer: "c5.9xlarge" + { producer: "r5.4xlarge" , explorer: null } } From 4fa3066e31b1b9405a8efaab059fc18e2058abbd Mon Sep 17 00:00:00 2001 From: Michael Karg Date: Thu, 21 Mar 2024 14:30:03 +0100 Subject: [PATCH 05/15] =?UTF-8?q?wb=20|=C2=A0nix:=20add=20UTxO-HD=20LMDB?= =?UTF-8?q?=20config=20options=20and=20profile?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Makefile | 2 +- nix/nixos/cardano-node-service.nix | 32 ++++++++++++++++++++++++- nix/workbench/profile/prof0-defaults.jq | 4 +++- nix/workbench/profile/prof1-variants.jq | 5 ++++ nix/workbench/service/nodes.nix | 6 +++++ 5 files changed, 46 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 09b1b52343e..bc701739bbc 100644 --- a/Makefile +++ b/Makefile @@ -77,7 +77,7 @@ ps: ## Plain-text list of profiles PROFILES_BASE := default default-p2p plutus plutus-secp-ecdsa plutus-secp-schnorr oldtracing idle tracer-only PROFILES_FAST := fast fast-p2p fast-plutus fast-notracer fast-oldtracing faststartup-24M PROFILES_CI_TEST := ci-test ci-test-p2p ci-test-plutus ci-test-notracer ci-test-rtview ci-test-dense10 -PROFILES_CI_BENCH := ci-bench ci-bench-p2p ci-bench-plutus ci-bench-plutus-secp-ecdsa ci-bench-plutus-secp-schnorr ci-bench-notracer ci-bench-rtview +PROFILES_CI_BENCH := ci-bench ci-bench-p2p ci-bench-plutus ci-bench-plutus-secp-ecdsa ci-bench-plutus-secp-schnorr ci-bench-notracer ci-bench-rtview ci-bench-lmdb PROFILES_TRACE_BENCH := trace-bench trace-bench-notracer trace-bench-oldtracing trace-bench-rtview PROFILES_TRACE_FULL := trace-full trace-full-rtview PROFILES_EPOCHTRANS := epoch-transition diff --git a/nix/nixos/cardano-node-service.nix b/nix/nixos/cardano-node-service.nix index ce603fba52d..4215854cd1b 100644 --- a/nix/nixos/cardano-node-service.nix +++ b/nix/nixos/cardano-node-service.nix @@ -128,6 +128,11 @@ let ]; }; instanceDbPath = cfg.databasePath i; + utxoLmdbParams = ["--v1-lmdb-ledger-db-backend"] + ++ lib.optionals (cfg.lmdbDatabasePath i != null) + [ "--ssd-database-dir ${cfg.lmdbDatabasePath i}" + "--ssd-snapshot-tables" + ]; cmd = builtins.filter (x: x != "") [ "${cfg.executable} run" "--config ${nodeConfigFile}" @@ -143,7 +148,8 @@ let "--tracer-socket-path-accept ${cfg.tracerSocketPathAccept i}" ] ++ lib.optionals (cfg.tracerSocketPathConnect i != null) [ "--tracer-socket-path-connect ${cfg.tracerSocketPathConnect i}" - ] ++ consensusParams.${cfg.nodeConfig.Protocol} ++ cfg.extraArgs ++ cfg.rtsArgs; + ] ++ lib.optionals (cfg.withUtxoHdLmdb i) utxoLmdbParams + ++ consensusParams.${cfg.nodeConfig.Protocol} ++ cfg.extraArgs ++ cfg.rtsArgs; in '' echo "Starting: ${concatStringsSep "\"\n echo \"" cmd}" echo "..or, once again, in a single line:" @@ -343,6 +349,16 @@ in { ''; }; + ssdDatabaseDir = mkOption { + type = nullOrStr; + default = null; + description = '' + Optional mount point of a device with high performance disk I/O. + This could be a direct-access SSD, with a specifically created journal-less file system and optimized mount options. + It'll be used as storage for UTxO-HD's LMDB backend only. + ''; + }; + databasePath = mkOption { type = funcToOr types.str; default = i : "${cfg.stateDir i}/${cfg.dbPrefix i}"; @@ -350,6 +366,13 @@ in { description = ''Node database path, for each instance.''; }; + lmdbDatabasePath = mkOption { + type = funcToOr nullOrStr; + default = i : if cfg.ssdDatabaseDir == null then null else "${cfg.ssdDatabaseDir}/lmdb-${cfg.dbPrefix i}"; + apply = x : if builtins.isFunction x then x else if x == null then _: null else i: x; + description = ''Node UTxO-HD LMDB path for performant disk I/O, for each instance.''; + }; + socketPath = mkOption { type = funcToOr types.str; default = i : "${runtimeDir i}/node.socket"; @@ -648,6 +671,13 @@ in { default = false; }; + withUtxoHdLmdb = mkOption { + type = funcToOr types.bool; + default = false; + apply = x: if builtins.isFunction x then x else _: x; + description = ''On an UTxO-HD enabled node, the in-memory backend is the default. This activates the on-disk backend (LMDB) instead.''; + }; + extraArgs = mkOption { type = types.listOf types.str; default = []; diff --git a/nix/workbench/profile/prof0-defaults.jq b/nix/workbench/profile/prof0-defaults.jq index f4a37017604..97b2d0b48a9 100644 --- a/nix/workbench/profile/prof0-defaults.jq +++ b/nix/workbench/profile/prof0-defaults.jq @@ -66,6 +66,7 @@ def era_defaults($era): , shutdown_on_block_synced: null , tracing_backend: "trace-dispatcher" ## or "iohk-monitoring" , tracer: true + , utxo_lmdb: false ## use LMDB backend (instead of default in-mem) on a UTxO-HD node; will be ignored by non-UTxO-HD nodes , verbatim: { } @@ -110,7 +111,8 @@ def era_defaults($era): , explorer: 14155776 # 13.5×1024×1024 } , keep_running: false + , ssd_directory: null } - } + } | (.common * (.[$era] // {})); diff --git a/nix/workbench/profile/prof1-variants.jq b/nix/workbench/profile/prof1-variants.jq index c9df5dfecec..71d90d0508b 100644 --- a/nix/workbench/profile/prof1-variants.jq +++ b/nix/workbench/profile/prof1-variants.jq @@ -901,6 +901,11 @@ def all_profile_variants: , $cibench_base * $with_rtview * { name: "ci-bench-rtview" } + , $cibench_base * $p2p * + { name: "ci-bench-lmdb" + , node: { utxo_lmdb: true } + , cluster: { ssd_directory: "/tmp" } + } , $cibench_base * $nomad_perf_torus * $p2p * { name: "ci-bench-nomadperf" , desc: "ci-bench on P&T exclusive cluster" diff --git a/nix/workbench/service/nodes.nix b/nix/workbench/service/nodes.nix index 4d2ccc97355..05f66cf0102 100644 --- a/nix/workbench/service/nodes.nix +++ b/nix/workbench/service/nodes.nix @@ -68,6 +68,12 @@ let topology = "topology.json"; nodeConfigFile = "config.json"; + # Allow for local clusters to have multiple LMDB directories in the same physical ssd_directory + withUtxoHdLmdb = profile.node.utxo_lmdb; + lmdbDatabasePath = + if (profile.cluster ? "ssd_directory" && profile.cluster.ssd_directory != null) + then "${profile.cluster.ssd_directory}/lmdb-node-${toString i}" + else null; ## Combine: ## 0. baseNodeConfig (coming cardanoLib's testnet environ) From 82154208be958a935d8bd6ce575631cd01715ab7 Mon Sep 17 00:00:00 2001 From: Michael Karg Date: Fri, 22 Mar 2024 10:19:39 +0100 Subject: [PATCH 06/15] wb | nix: simplify node svc --- nix/nixos/cardano-node-service.nix | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/nix/nixos/cardano-node-service.nix b/nix/nixos/cardano-node-service.nix index 4215854cd1b..e2724fcba8c 100644 --- a/nix/nixos/cardano-node-service.nix +++ b/nix/nixos/cardano-node-service.nix @@ -349,16 +349,6 @@ in { ''; }; - ssdDatabaseDir = mkOption { - type = nullOrStr; - default = null; - description = '' - Optional mount point of a device with high performance disk I/O. - This could be a direct-access SSD, with a specifically created journal-less file system and optimized mount options. - It'll be used as storage for UTxO-HD's LMDB backend only. - ''; - }; - databasePath = mkOption { type = funcToOr types.str; default = i : "${cfg.stateDir i}/${cfg.dbPrefix i}"; @@ -368,9 +358,12 @@ in { lmdbDatabasePath = mkOption { type = funcToOr nullOrStr; - default = i : if cfg.ssdDatabaseDir == null then null else "${cfg.ssdDatabaseDir}/lmdb-${cfg.dbPrefix i}"; - apply = x : if builtins.isFunction x then x else if x == null then _: null else i: x; - description = ''Node UTxO-HD LMDB path for performant disk I/O, for each instance.''; + default = null; + apply = x : if builtins.isFunction x then x else if x == null then _: null else _: x; + description = '' + Node UTxO-HD LMDB path for performant disk I/O, for each instance. + This could point to a direct-access SSD, with a specifically created journal-less file system and optimized mount options. + ''; }; socketPath = mkOption { @@ -722,6 +715,7 @@ in { config = mkIf cfg.enable ( let stateDirBase = "/var/lib/"; runDirBase = "/run/"; + lmdbPaths = filter (x: x != null) (map (e: lmdbDatabasePath e) cfg.instances); genInstanceConf = f: listToAttrs (if cfg.instances > 1 then genList (i: let n = "cardano-node-${toString i}"; in nameValuePair n (f n i)) cfg.instances else [ (nameValuePair "cardano-node" (f "cardano-node" 0)) ]); in lib.mkMerge [ @@ -823,6 +817,10 @@ in { assertion = !(cfg.systemdSocketActivation && cfg.useNewTopology); message = "Systemd socket activation cannot be used with p2p topology due to a systemd socket re-use issue."; } + { + assertion = (length lmdPaths) == (length (lib.lists.unique lmdbPaths)); + message = "When configuring multiple LMDB enabled nodes on one instance, lmdbDatabasePath must be unique."; + } ]; } ]); From f64da4b22a946ca8f0c044201d24c61d3636591b Mon Sep 17 00:00:00 2001 From: Federico Mastellone Date: Fri, 22 Mar 2024 01:46:43 +0000 Subject: [PATCH 07/15] wb | allow mounting Nomad host_volumes by name (SSD / LMDB) --- nix/workbench/backend/nomad-job.nix | 37 ++++++++++++++++++++++++- nix/workbench/profile/prof0-defaults.jq | 2 ++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/nix/workbench/backend/nomad-job.nix b/nix/workbench/backend/nomad-job.nix index c5811b385c7..de62c5285c3 100644 --- a/nix/workbench/backend/nomad-job.nix +++ b/nix/workbench/backend/nomad-job.nix @@ -483,7 +483,28 @@ let # precedence over the -consul-namespace command line argument in job run. # namespace = ""; # Not available as the documentations says: Extraneous JSON object property; No argument or block type is named "namespace". - + } + // + # If it needs host volumes add the constraints (can't be "null" or "[]".) + ### - https://developer.hashicorp.com/nomad/tutorials/stateful-workloads/stateful-workloads-host-volumes + (lib.optionalAttrs (profileData.value.cluster.nomad.host_volumes != null) { + volume = lib.listToAttrs (lib.lists.imap0 + (i: v: { + # Internal name, reference to mount in this group's tasks below. + name = "volume-${taskName}-${toString i}"; + value = { + type = "host"; # We only support type "host". + read_only = v.read_only; + # How it is named in the Nomad Client's config. + # https://developer.hashicorp.com/nomad/docs/configuration/client#host_volume-block + source = v.source; + }; + }) + profileData.value.cluster.nomad.host_volumes + ); + }) + // + { # The task stanza creates an individual unit of work, such as a Docker # container, web application, or batch processing. # https://developer.hashicorp.com/nomad/docs/job-specification/task @@ -591,6 +612,20 @@ let check = null; }; + # If it needs host volumes mount them (defined above if any). + volume_mount = if profileData.value.cluster.nomad.host_volumes != null + then lib.lists.imap0 + (i: v: { + # Internal name, defined above in the group's specification. + volume = "volume-${taskName}-${toString i}"; + # Where it is going to be mounted inside the Task. + destination = v.destination; + read_only = v.read_only; + }) + profileData.value.cluster.nomad.host_volumes + else null + ; + # Specifies the set of templates to render for the task. Templates can # be used to inject both static and dynamic configuration with data # populated from environment variables, Consul and Vault. diff --git a/nix/workbench/profile/prof0-defaults.jq b/nix/workbench/profile/prof0-defaults.jq index 97b2d0b48a9..c8aa3135c5e 100644 --- a/nix/workbench/profile/prof0-defaults.jq +++ b/nix/workbench/profile/prof0-defaults.jq @@ -98,6 +98,8 @@ def era_defaults($era): { producer: {cores: 2, memory: 15000, memory_max: 16000} , explorer: {cores: 2, memory: 15000, memory_max: 16000} } + # Volumes like {source: "ssd1", destination: "/ssd1", read_only: false} + , host_volumes: null , fetch_logs_ssh: false } , aws: From 0afb0a4ad8eac04f903716542f1272ffcf65a94d Mon Sep 17 00:00:00 2001 From: Federico Mastellone Date: Sat, 23 Mar 2024 14:57:59 +0000 Subject: [PATCH 08/15] wb | support public routing when using perf-ssd Nomad cluster --- nix/workbench/backend/nomad-job.nix | 12 ++++++------ nix/workbench/profile/prof0-defaults.jq | 2 ++ nix/workbench/profile/prof1-variants.jq | 2 ++ 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/nix/workbench/backend/nomad-job.nix b/nix/workbench/backend/nomad-job.nix index de62c5285c3..39328797e5b 100644 --- a/nix/workbench/backend/nomad-job.nix +++ b/nix/workbench/backend/nomad-job.nix @@ -578,12 +578,12 @@ let # address of an AWS EC2 instance set this to # ${attr.unique.platform.aws.public-ipv4}. address = - # When using the dedicated P&T Nomad cluster on AWS we use public - # IPs/routing, all the other cloud runs are behind a VPC/firewall. - # Local runs just use 12.0.0.1. - if lib.strings.hasInfix "-nomadperf" profileData.profileName + # When using dedicated Nomad clusters on AWS we want to use public + # IPs/routing, all the other cloud runs will run behind a + # VPC/firewall. + if profileData.value.cluster.aws.use_public_routing then "\${attr.unique.platform.aws.public-ipv4}" - else "" + else "" # Local runs just use 127.0.0.1. ; # Specifies the port to advertise for this service. The value of # port depends on which address_mode is being used: @@ -1398,7 +1398,7 @@ let [ # Address string to ( - if lib.strings.hasInfix "-nomadperf" profileData.profileName + if profileData.value.cluster.aws.use_public_routing then ''--host-addr {{ env "attr.unique.platform.aws.local-ipv4" }}'' else ''--host-addr 0.0.0.0'' ) diff --git a/nix/workbench/profile/prof0-defaults.jq b/nix/workbench/profile/prof0-defaults.jq index c8aa3135c5e..d0f1f76f455 100644 --- a/nix/workbench/profile/prof0-defaults.jq +++ b/nix/workbench/profile/prof0-defaults.jq @@ -107,6 +107,8 @@ def era_defaults($era): { producer: "c5.2xlarge" , explorer: "m5.4xlarge" } + # "attr.unique.platform.aws.public-ipv4" to bind and service definition. + , use_public_routing: false } , minimun_storage: { producer: 12582912 # 12×1024×1024 diff --git a/nix/workbench/profile/prof1-variants.jq b/nix/workbench/profile/prof1-variants.jq index 71d90d0508b..2d3b87b993f 100644 --- a/nix/workbench/profile/prof1-variants.jq +++ b/nix/workbench/profile/prof1-variants.jq @@ -212,6 +212,7 @@ def all_profile_variants: { producer: "c5.2xlarge" , explorer: "m5.4xlarge" } + , use_public_routing: true } # We are requiring 10.5GB on the explorer node and 9GB on the others. , minimun_storage: @@ -243,6 +244,7 @@ def all_profile_variants: { producer: "r5.4xlarge" , explorer: null } + , use_public_routing: true } , minimun_storage: null , keep_running: true From f72b771665a983dab5420453736fff1ac7e7325b Mon Sep 17 00:00:00 2001 From: Federico Mastellone Date: Fri, 22 Mar 2024 21:09:15 +0000 Subject: [PATCH 09/15] wb | fix Nomad local runs namespace error --- nix/workbench/backend/nomad-job.nix | 7 ------- nix/workbench/backend/nomad.sh | 2 +- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/nix/workbench/backend/nomad-job.nix b/nix/workbench/backend/nomad-job.nix index 39328797e5b..11ff8b1684d 100644 --- a/nix/workbench/backend/nomad-job.nix +++ b/nix/workbench/backend/nomad-job.nix @@ -476,13 +476,6 @@ let } ]; }; - - # The Consul namespace in which group and task-level services within the - # group will be registered. Use of template to access Consul KV will read - # from the specified Consul namespace. Specifying namespace takes - # precedence over the -consul-namespace command line argument in job run. - # namespace = ""; - # Not available as the documentations says: Extraneous JSON object property; No argument or block type is named "namespace". } // # If it needs host volumes add the constraints (can't be "null" or "[]".) diff --git a/nix/workbench/backend/nomad.sh b/nix/workbench/backend/nomad.sh index 81e750ccc28..1ff9f7bda7e 100644 --- a/nix/workbench/backend/nomad.sh +++ b/nix/workbench/backend/nomad.sh @@ -3855,7 +3855,7 @@ client { # Specifies an arbitrary string used to logically group client nodes by # user-defined class. This can be used during job placement as a filter. - node_class = "perf" # Using the "world.dev.cardano.org" testing class for "perf". + node_class = "" # Make sure we are not using namespaces locally. # "artifact" parameters (fail fast!!!) ###################################### From 67b73e6ed45b9f0039f5ac5ee50a7e088225d714 Mon Sep 17 00:00:00 2001 From: Federico Mastellone Date: Tue, 26 Mar 2024 16:57:27 +0000 Subject: [PATCH 10/15] wb | add perf-ssd 52+1 nodes fast, value and latency profiles --- Makefile | 2 +- nix/workbench/profile/prof1-variants.jq | 50 ++++++++++++++++++++++--- 2 files changed, 46 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index bc701739bbc..a711507eed3 100644 --- a/Makefile +++ b/Makefile @@ -100,7 +100,7 @@ PROFILES_NOMAD_PERF := default-nomadperf ci-test-nomadperf ci-bench-nomadp PROFILES_NOMAD_PERF += plutus-nomadperf fast-nomadperf latency-nomadperf PROFILES_NOMAD_PERF_NOP2P := default-nomadperf-nop2p oldtracing-nomadperf-nop2p ci-test-nomadperf-nop2p ci-bench-nomadperf-nop2p PROFILES_NOMAD_PERF_NOP2P += value-nomadperf-nop2p value-oldtracing-nomadperf-nop2p plutus-nomadperf-nop2p fast-nomadperf-nop2p -PROFILES_NOMAD_PERFSSD := fast-nomadperfssd +PROFILES_NOMAD_PERFSSD := value-nomadperfssd fast-nomadperfssd latency-nomadperfssd # single node profiles on the NomadSSD cluster on AWS PROFILES_UTXOSCALE_SOLO := utxoscale-solo-24M64G-nomadperfssd utxoscale-solo-12M64G-nomadperfssd utxoscale-solo-12M16G-nomadperfssd diff --git a/nix/workbench/profile/prof1-variants.jq b/nix/workbench/profile/prof1-variants.jq index 2d3b87b993f..102362a6381 100644 --- a/nix/workbench/profile/prof1-variants.jq +++ b/nix/workbench/profile/prof1-variants.jq @@ -234,15 +234,15 @@ def all_profile_variants: { namespace: "perf-ssd" , class: "perf-ssd" , resources: - { producer: {cores: 16, memory: 128000, memory_max: 128000} - , explorer: {cores: 16, memory: 128000, memory_max: 128000} + { producer: {cores: 16, memory: 120000, memory_max: 124000} + , explorer: {cores: 16, memory: 120000, memory_max: 124000} } , fetch_logs_ssh: true } , aws: { instance_type: { producer: "r5.4xlarge" - , explorer: null + , explorer: "r5.4xlarge" } , use_public_routing: true } @@ -277,6 +277,15 @@ def all_profile_variants: } } ) as $nomad_perfssd_unicircle + | + ($nomad_perfssd * + { composition: + { locations: ["eu-central-1", "us-east-1", "ap-southeast-2"] + , topology: "torus-dense" + , with_explorer: true + } + } + ) as $nomad_perfssd_dense | ## ### Definition vocabulary: filtering @@ -562,6 +571,10 @@ def all_profile_variants: ($model_timescale * $nomad_perf_tps_saturation_value * { scenario: "fixed-loaded" }) as $scenario_nomad_perf + | + ($model_timescale * $nomad_perf_tps_saturation_value * + { scenario: "fixed-loaded" + }) as $scenario_nomad_perfssd | ($small_timescale * $nomad_perf_tps_saturation_value * { scenario: "fixed-loaded" @@ -633,6 +646,23 @@ def all_profile_variants: } , desc: "AWS c5-2xlarge cluster dataset, 7 epochs" }) as $nomad_perf_base + | + ($scenario_nomad_perfssd * $compose_fiftytwo * $dataset_oct2021 * $for_8ep * + { node: + { shutdown_on_slot_synced: 64000 + } + , analysis: + { filters: ["epoch3+", "size-full"] + } + , generator: + { init_cooldown: 45 + } + , genesis: + { funds_balance: 20000000000000 + , max_block_size: 88000 + } + , desc: "AWS c5-2xlarge cluster dataset, 7 epochs" + }) as $nomad_perfssd_base | ($scenario_nomad_perf * $compose_fiftytwo * $dataset_oct2021 * $for_9ep * $plutus_base * $plutus_loop_counter * { node: @@ -653,8 +683,12 @@ def all_profile_variants: }) as $nomad_perf_plutus_base | ($scenario_latency * $compose_fiftytwo * $dataset_empty * $no_filtering * - { desc: "AWS c5-2xlarge cluster, stop when all latency services stop" + { desc: "AWS perf class cluster, stop when all latency services stop" }) as $nomad_perf_latency_base + | + ($scenario_latency * $compose_fiftytwo * $dataset_empty * $no_filtering * + { desc: "AWS perf-ssd class cluster, stop when all latency services stop" + }) as $nomad_perfssd_latency_base | ($scenario_nomad_perfssd_solo * $solo * $dataset_24m * { node: @@ -986,6 +1020,9 @@ def all_profile_variants: , $nomad_perf_base * $nomad_perf_dense * $p2p * $costmodel_v8_preview * { name: "value-nomadperf" } + , $nomad_perfssd_base * $nomad_perfssd_dense * $p2p * $costmodel_v8_preview * + { name: "value-nomadperfssd" + } , $nomad_perf_base * $nomad_perf_dense * $p2p * $costmodel_v8_preview * $old_tracing * { name: "value-oldtracing-nomadperf" } @@ -995,6 +1032,9 @@ def all_profile_variants: , $nomad_perf_latency_base * $nomad_perf_dense * $p2p * $costmodel_v8_preview * { name: "latency-nomadperf" } + , $nomad_perfssd_latency_base * $nomad_perfssd_dense * $p2p * $costmodel_v8_preview * + { name: "latency-nomadperfssd" + } ## P&T Nomad cluster: 52 nodes, 3 regions, value-only (with old tracing variant) and Plutus, no P2P flavour , $nomad_perf_base * $nomad_perf_dense * $costmodel_v8_preview * @@ -1014,7 +1054,7 @@ def all_profile_variants: , $fast_base * $compose_fiftytwo * $nomad_perf_dense * $costmodel_v8_preview * { name: "fast-nomadperf-nop2p" } - , $fast_base * $solo * $nomad_perfssd_unicircle * $costmodel_v8_preview * $p2p * + , $fast_base * $compose_fiftytwo * $nomad_perfssd_dense * $costmodel_v8_preview * $p2p * { name: "fast-nomadperfssd" } From afca46ab2bb2a65b7dd517cfaf5d7a9bf19e50c3 Mon Sep 17 00:00:00 2001 From: Federico Mastellone Date: Fri, 29 Mar 2024 15:05:57 +0000 Subject: [PATCH 11/15] wb | fix the "ready" nodes filter --- nix/workbench/nomad.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nix/workbench/nomad.sh b/nix/workbench/nomad.sh index 479b953a575..960ab5f0c96 100644 --- a/nix/workbench/nomad.sh +++ b/nix/workbench/nomad.sh @@ -429,7 +429,7 @@ EOL local nomad_class nomad_class="$(jq -r .cluster.nomad.class "${WB_SHELL_PROFILE_DATA}"/profile.json)" local perf_nodes - perf_nodes="$(nomad node status -filter 'Status=="ready"' -filter "NodeClass==\"${nomad_class}\"" -json)" + perf_nodes="$(nomad node status -filter "Status==\"ready\" and NodeClass==\"${nomad_class}\"" -json)" # Create the base JSON string but without the "attributes" because those # are only available when fetching the status of individual nodes. local nodes_json From d404962b792f7f9d654d2ae595da5c9625ed0893 Mon Sep 17 00:00:00 2001 From: Federico Mastellone Date: Tue, 2 Apr 2024 19:58:43 +0000 Subject: [PATCH 12/15] wb | even better error reporting when incompatible genesis file format --- nix/workbench/backend/supervisor-conf.nix | 37 ++++++++++++----------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/nix/workbench/backend/supervisor-conf.nix b/nix/workbench/backend/supervisor-conf.nix index be8f82c2241..052362e35e7 100644 --- a/nix/workbench/backend/supervisor-conf.nix +++ b/nix/workbench/backend/supervisor-conf.nix @@ -120,24 +120,25 @@ let startretries = 0; # Seconds it needs to stay running to consider the start successful # In cases with a big genesis file, like the "value" profile with ~600 - # mega, if this file has an error the node can fail after the 5 seconds - # we use as default for the other programs and the error will be catched - # later by the healthcheck service with a misleading message. - # We found with our reference machines (c5.2xlarge, 16 MB and 8 cores), - # when running the "value" profile, that with 50 seconds at least one - # node was assummed successful (its socket was created). So to the - # default 5 we add 45 seconds when the UTxO size is the one of the - # "value" profile and seconds proportionaly to this for the others. - ### derived.utxo_generated - ### - fast: 18000 (Default of 5s is OK) - ### - ci-test: 18000 (Default of 5s is OK) - ### - default: 43200 (Default of 5s is OK) - ### - plutus: 61200 (Default of 5s is OK) - ### - forge-stress-pre: 72000 - ### - forge-stress-large: 144000 - ### - value: 1536000 (30s more needed) - ### - chainsync-early-alonzo: 31104000 - startsecs = 5 + (profileData.derived.utxo_generated / (1536000 / 50)); + # mega, if this file has a format error the node can fail after the 5 + # seconds we use as default for the other "program"s and the error will + # be caught later by the healthcheck service with a misleading message. + # We found with our AWS reference machines (c5.2xlarge, 16 MB and 8 + # cores), when running the "value" profile, that with 50 seconds at + # least one node was assumed successful (its socket was created). So to + # the default 5 we add 50 seconds when the UTxO set size is the one of + # the "value" profile and seconds proportionally to this for the others. + # Not directly related to "genesis.extra_future_offset" or + # "derived.genesis_future_offset". + ### derived.dataset_measure + ### - fast: 0 (Default of 5s is OK) + ### - ci-test: 0 (Default of 5s is OK) + ### - default: 0 (Default of 5s is OK) + ### - plutus: 0 (Default of 5s is OK) + ### - forge-stress-pre: 5000000 + ### - forge-stress-large: 11300000 + ### - value: 5000000 (50s more needed) + startsecs = 5 + (profileData.derived.dataset_measure / (5000000 / 50)); }) nodeSpecs)) // From dd997915eafcd8b194c1d1ddfe1b59cfd7500ff5 Mon Sep 17 00:00:00 2001 From: Federico Mastellone Date: Wed, 3 Apr 2024 02:44:34 +0000 Subject: [PATCH 13/15] wb | better generator and explorer error reporting --- nix/workbench/backend/nomad.sh | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/nix/workbench/backend/nomad.sh b/nix/workbench/backend/nomad.sh index 1ff9f7bda7e..0e9a3d2e6f0 100644 --- a/nix/workbench/backend/nomad.sh +++ b/nix/workbench/backend/nomad.sh @@ -2323,12 +2323,23 @@ backend_nomad() { # If the node in "${generator_task}" quits generators fails with: # tx-generator: MuxError MuxBearerClosed " closed when reading data, waiting on next header True" # Service binary 'tx-generator' returned status: 1 + msg "$(yellow "WARNING: supervisord program \"generator\" (inside Nomad Task \"${generator_task}\") quit with an error exit code!")" + # Give the node where tx-generator runs some time to quit. + msg "$(yellow " Waiting 60s to check the status of supervisord program \"${generator_task}\" (inside Nomad Task \"${generator_task}\")")" + sleep 30 if backend_nomad is-task-program-running "${dir}" "${generator_task}" "${generator_task}" 5 then # This was not expected! # But check it wasn't a race condition of a stopping cluster! if ! test -f "${dir}"/flag/cluster-stopping then + msg "$(red "ERROR: supervisord program \"generator\" (inside Nomad Task \"${generator_task}\") quit with an error exit code while supervisord program \"${generator_task}\" (inside Nomad Task \"${generator_task}\") is still running!")" + # The tx-generator can fail because something happened with + # the nodes (out of memory?), this gives the nodes more time + # to shutdown properly and/or show any possible cause of + # trouble before being killed. + msg "$(yellow "WARNING: Waiting one minute so nodes are not killed immediately")" + sleep 60 touch "${dir}"/flag/cluster-stopping fatal "Generator quit unexpectedly!!!" fi @@ -2337,14 +2348,14 @@ backend_nomad() { touch "${dir}"/generator/quit # Show the warning and continue with the counter echo -ne "\n" - msg "$(yellow "WARNING: supervisord program \"generator\" (inside Nomad Task \"${generator_task}\" quit with an error exit code")" + msg "$(yellow "WARNING: supervisord program \"generator\" (inside Nomad Task \"${generator_task}\") quit with an error exit code but expected when supervisord program \"${generator_task}\" (inside Nomad Task \"${generator_task}\") is not running")" msg_ne "nomad: $(blue Waiting) until all pool nodes are stopped: 000000" fi else touch "${dir}"/generator/quit # Show the warning and continue with the counter echo -ne "\n" - msg "$(yellow "WARNING: supervisord program \"generator\" (inside Nomad Task \"${generator_task}\" quit with a non-error exit code")" + msg "$(yellow "WARNING: supervisord program \"generator\" (inside Nomad Task \"${generator_task}\") quit with a non-error exit code")" msg_ne "nomad: $(blue Waiting) until all pool nodes are stopped: 000000" fi fi # Finish generator checks. From bba719d162fa2c86920d963de39967bf4ff8c181 Mon Sep 17 00:00:00 2001 From: Federico Mastellone Date: Thu, 11 Apr 2024 12:00:12 +0000 Subject: [PATCH 14/15] wb | add instance type and host_volume constraints for the new perf-ssd NVMe machines --- nix/workbench/profile/prof1-variants.jq | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/nix/workbench/profile/prof1-variants.jq b/nix/workbench/profile/prof1-variants.jq index 102362a6381..900a1e9f460 100644 --- a/nix/workbench/profile/prof1-variants.jq +++ b/nix/workbench/profile/prof1-variants.jq @@ -237,12 +237,16 @@ def all_profile_variants: { producer: {cores: 16, memory: 120000, memory_max: 124000} , explorer: {cores: 16, memory: 120000, memory_max: 124000} } + , host_volumes: [ + {source: "ssd1", destination: "/ssd1", read_only: false} + , {source: "ssd2", destination: "/ssd2", read_only: false} + ] , fetch_logs_ssh: true } , aws: { instance_type: - { producer: "r5.4xlarge" - , explorer: "r5.4xlarge" + { producer: "r5d.4xlarge" + , explorer: "r5d.4xlarge" } , use_public_routing: true } From 6c619b1b70855f2a275d9bf5e47526f8a58fb6cd Mon Sep 17 00:00:00 2001 From: Michael Karg Date: Thu, 11 Apr 2024 14:40:05 +0200 Subject: [PATCH 15/15] wb: LMDB - for now only put live tables on SSD device --- nix/nixos/cardano-node-service.nix | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nix/nixos/cardano-node-service.nix b/nix/nixos/cardano-node-service.nix index e2724fcba8c..b29a57f74ca 100644 --- a/nix/nixos/cardano-node-service.nix +++ b/nix/nixos/cardano-node-service.nix @@ -131,7 +131,7 @@ let utxoLmdbParams = ["--v1-lmdb-ledger-db-backend"] ++ lib.optionals (cfg.lmdbDatabasePath i != null) [ "--ssd-database-dir ${cfg.lmdbDatabasePath i}" - "--ssd-snapshot-tables" + # "--ssd-snapshot-tables" ]; cmd = builtins.filter (x: x != "") [ "${cfg.executable} run" @@ -715,7 +715,7 @@ in { config = mkIf cfg.enable ( let stateDirBase = "/var/lib/"; runDirBase = "/run/"; - lmdbPaths = filter (x: x != null) (map (e: lmdbDatabasePath e) cfg.instances); + lmdbPaths = filter (x: x != null) (map (e: cfg.lmdbDatabasePath e) (builtins.genList lib.trivial.id cfg.instances)); genInstanceConf = f: listToAttrs (if cfg.instances > 1 then genList (i: let n = "cardano-node-${toString i}"; in nameValuePair n (f n i)) cfg.instances else [ (nameValuePair "cardano-node" (f "cardano-node" 0)) ]); in lib.mkMerge [ @@ -818,7 +818,7 @@ in { message = "Systemd socket activation cannot be used with p2p topology due to a systemd socket re-use issue."; } { - assertion = (length lmdPaths) == (length (lib.lists.unique lmdbPaths)); + assertion = (length lmdbPaths) == (length (lib.lists.unique lmdbPaths)); message = "When configuring multiple LMDB enabled nodes on one instance, lmdbDatabasePath must be unique."; } ];