From 4eff89d0f69653a8bcca984d480a7764dd178c6b Mon Sep 17 00:00:00 2001 From: David Irvine Date: Thu, 21 Nov 2024 12:19:38 +0000 Subject: [PATCH 01/21] feat(networking): add bootstrap cache for peer discovery Add persistent bootstrap cache to maintain a list of previously known peers, improving network bootstrapping efficiency and reducing cold-start times. Enhance the bootstrap cache implementation with robust corruption detection and recovery mechanisms. This change ensures system resilience when the cache file becomes corrupted or invalid. Key changes: * Add explicit cache corruption detection and error reporting * Implement cache rebuilding from in-memory peers or endpoints * Use atomic file operations to prevent corruption during writes * Improve error handling with specific error variants * Add comprehensive test suite for corruption scenarios The system now handles corruption by: 1. Detecting invalid/corrupted JSON data during cache reads 2. Attempting recovery using in-memory peers if available 3. Falling back to endpoint discovery if needed 4. Using atomic operations for safe cache updates Testing: * Add tests for various corruption scenarios * Add concurrent access tests * Add file operation tests * Verify endpoint fallback behavior - Add smarter JSON format detection by checking content structure - Improve error handling with specific InvalidResponse variant - Reduce unnecessary warnings by only logging invalid multiaddrs - Simplify parsing logic to handle both JSON and plain text formats - Add better error context for failed parsing attempts All tests passing, including JSON endpoint and plain text format tests. feat(bootstrap_cache): implement circuit breaker with exponential backoff - Add CircuitBreakerConfig with configurable parameters for failures and timeouts - Implement circuit breaker states (closed, open, half-open) with state transitions - Add exponential backoff for failed request retries - Update InitialPeerDiscovery to support custom circuit breaker configuration - Add comprehensive test suite with shorter timeouts for faster testing This change improves system resilience by preventing cascading failures and reducing load on failing endpoints through intelligent retry mechanisms. --- .gitignore | 3 +- Cargo.lock | 677 +++++++++++++++-- Cargo.toml | 1 + ant-peers-acquisition/Cargo.toml | 1 + bootstrap_cache/Cargo.toml | 25 + bootstrap_cache/README.md | 216 ++++++ bootstrap_cache/src/cache.rs | 390 ++++++++++ bootstrap_cache/src/cache_store.rs | 690 ++++++++++++++++++ bootstrap_cache/src/circuit_breaker.rs | 208 ++++++ bootstrap_cache/src/config.rs | 285 ++++++++ bootstrap_cache/src/error.rs | 39 + bootstrap_cache/src/initial_peer_discovery.rs | 424 +++++++++++ bootstrap_cache/src/lib.rs | 115 +++ bootstrap_cache/tests/cache_tests.rs | 241 ++++++ bootstrap_cache/tests/integration_tests.rs | 199 +++++ docs/bootstrap_cache_implementation.md | 337 +++++++++ docs/bootstrap_cache_prd.md | 194 +++++ prd.md | 173 +++++ refactoring_steps.md | 202 +++++ repository_structure.md | 265 +++++++ 20 files changed, 4609 insertions(+), 76 deletions(-) create mode 100644 bootstrap_cache/Cargo.toml create mode 100644 bootstrap_cache/README.md create mode 100644 bootstrap_cache/src/cache.rs create mode 100644 bootstrap_cache/src/cache_store.rs create mode 100644 bootstrap_cache/src/circuit_breaker.rs create mode 100644 bootstrap_cache/src/config.rs create mode 100644 bootstrap_cache/src/error.rs create mode 100644 bootstrap_cache/src/initial_peer_discovery.rs create mode 100644 bootstrap_cache/src/lib.rs create mode 100644 bootstrap_cache/tests/cache_tests.rs create mode 100644 bootstrap_cache/tests/integration_tests.rs create mode 100644 docs/bootstrap_cache_implementation.md create mode 100644 docs/bootstrap_cache_prd.md create mode 100644 prd.md create mode 100644 refactoring_steps.md create mode 100644 repository_structure.md diff --git a/.gitignore b/.gitignore index bf0d0deed0..a13bb1aa5c 100644 --- a/.gitignore +++ b/.gitignore @@ -36,8 +36,7 @@ sn_node_manager/.vagrant .venv/ uv.lock *.so -*.pyc - *.pyc *.swp +/vendor/ \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index aff7d76738..641b99a784 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -769,7 +769,7 @@ dependencies = [ "evmlib", "hex 0.4.3", "lazy_static", - "libp2p", + "libp2p 0.54.1", "rand 0.8.5", "ring 0.17.8", "rmp-serde", @@ -846,7 +846,7 @@ dependencies = [ "hyper 0.14.31", "itertools 0.12.1", "lazy_static", - "libp2p", + "libp2p 0.54.1", "libp2p-identity", "prometheus-client", "quickcheck", @@ -900,7 +900,7 @@ dependencies = [ "futures", "hex 0.4.3", "itertools 0.12.1", - "libp2p", + "libp2p 0.54.1", "num-traits", "prometheus-client", "prost 0.9.0", @@ -949,7 +949,7 @@ dependencies = [ "colored", "dirs-next", "indicatif", - "libp2p", + "libp2p 0.54.1", "libp2p-identity", "mockall 0.12.1", "nix 0.27.1", @@ -986,7 +986,7 @@ dependencies = [ "clap", "color-eyre", "hex 0.4.3", - "libp2p", + "libp2p 0.54.1", "libp2p-identity", "thiserror 1.0.69", "tokio", @@ -1003,7 +1003,7 @@ dependencies = [ "ant-protocol", "clap", "lazy_static", - "libp2p", + "libp2p 0.54.1", "rand 0.8.5", "reqwest 0.12.9", "thiserror 1.0.69", @@ -1028,7 +1028,7 @@ dependencies = [ "exponential-backoff", "hex 0.4.3", "lazy_static", - "libp2p", + "libp2p 0.54.1", "prost 0.9.0", "rmp-serde", "serde", @@ -1087,7 +1087,7 @@ dependencies = [ "ant-protocol", "async-trait", "dirs-next", - "libp2p", + "libp2p 0.54.1", "libp2p-identity", "mockall 0.11.4", "prost 0.9.0", @@ -1333,6 +1333,16 @@ dependencies = [ "syn 2.0.90", ] +[[package]] +name = "assert-json-diff" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "assert_cmd" version = "2.0.16" @@ -1370,6 +1380,17 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b34d609dfbaf33d6889b2b7106d3ca345eacad44200913df5ba02bfd31d2ba9" +[[package]] +name = "async-channel" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81953c529336010edd6d8e358f886d9581267795c61b19475b71314bffa46d35" +dependencies = [ + "concurrent-queue", + "event-listener 2.5.3", + "futures-core", +] + [[package]] name = "async-io" version = "2.4.0" @@ -1380,7 +1401,7 @@ dependencies = [ "cfg-if", "concurrent-queue", "futures-io", - "futures-lite", + "futures-lite 2.5.0", "parking", "polling", "rustix", @@ -1395,7 +1416,7 @@ version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ff6e472cdea888a4bd64f342f09b3f50e1886d32afe8df3d663c01140b811b18" dependencies = [ - "event-listener", + "event-listener 5.3.1", "event-listener-strategy", "pin-project-lite", ] @@ -1518,7 +1539,7 @@ dependencies = [ "hex 0.4.3", "instant", "js-sys", - "libp2p", + "libp2p 0.54.1", "pyo3", "rand 0.8.5", "rmp-serde", @@ -1870,6 +1891,25 @@ dependencies = [ "zeroize", ] +[[package]] +name = "bootstrap_cache" +version = "0.1.0" +dependencies = [ + "chrono", + "dirs 5.0.1", + "fs2", + "libp2p 0.53.2", + "reqwest 0.11.27", + "serde", + "serde_json", + "tempfile", + "thiserror 1.0.69", + "tokio", + "tracing", + "tracing-subscriber", + "wiremock", +] + [[package]] name = "brotli" version = "3.3.4" @@ -2434,7 +2474,7 @@ dependencies = [ "bitflags 1.3.2", "core-foundation", "core-graphics-types", - "foreign-types", + "foreign-types 0.5.0", "libc", ] @@ -2817,6 +2857,25 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "deadpool" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "421fe0f90f2ab22016f32a9881be5134fdd71c65298917084b0c7477cbc3856e" +dependencies = [ + "async-trait", + "deadpool-runtime", + "num_cpus", + "retain_mut", + "tokio", +] + +[[package]] +name = "deadpool-runtime" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "092966b41edc516079bdf31ec78a2e0588d1d0c08f78b91d8307215928642b2b" + [[package]] name = "der" version = "0.6.1" @@ -2964,6 +3023,15 @@ dependencies = [ "dirs-sys 0.3.7", ] +[[package]] +name = "dirs" +version = "5.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" +dependencies = [ + "dirs-sys 0.4.1", +] + [[package]] name = "dirs-next" version = "2.0.0" @@ -3213,6 +3281,12 @@ version = "3.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a5d9305ccc6942a704f4335694ecd3de2ea531b114ac2d51f5f843750787a92f" +[[package]] +name = "event-listener" +version = "2.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" + [[package]] name = "event-listener" version = "5.3.1" @@ -3230,7 +3304,7 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c3e4e0dd3673c1139bf041f3008816d9cf2946bbfac2945c09e523b8d7b05b2" dependencies = [ - "event-listener", + "event-listener 5.3.1", "pin-project-lite", ] @@ -3266,7 +3340,7 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ffb309d235a642598183aeda8925e871e85dd5a433c2c877e69ff0a960f4c02" dependencies = [ - "fastrand", + "fastrand 2.2.0", ] [[package]] @@ -3302,6 +3376,15 @@ version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2a2b11eda1d40935b26cf18f6833c526845ae8c41e58d09af6adeb6f0269183" +[[package]] +name = "fastrand" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" +dependencies = [ + "instant", +] + [[package]] name = "fastrand" version = "2.2.0" @@ -3453,6 +3536,15 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f81ec6369c545a7d40e4589b5597581fa1c441fe1cce96dd1de43159910a36a2" +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared 0.1.1", +] + [[package]] name = "foreign-types" version = "0.5.0" @@ -3460,7 +3552,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d737d9aa519fb7b749cbc3b962edcf310a8dd1f4b67c91c4f83975dbdd17d965" dependencies = [ "foreign-types-macros", - "foreign-types-shared", + "foreign-types-shared 0.3.1", ] [[package]] @@ -3474,6 +3566,12 @@ dependencies = [ "syn 2.0.90", ] +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + [[package]] name = "foreign-types-shared" version = "0.3.1" @@ -3495,6 +3593,16 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa" +[[package]] +name = "fs2" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "fs_extra" version = "1.3.0" @@ -3572,6 +3680,21 @@ version = "0.3.31" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +[[package]] +name = "futures-lite" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49a9d51ce47660b1e808d3c990b4709f2f415d928835a17dfd16991515c46bce" +dependencies = [ + "fastrand 1.9.0", + "futures-core", + "futures-io", + "memchr", + "parking", + "pin-project-lite", + "waker-fn", +] + [[package]] name = "futures-lite" version = "2.5.0" @@ -3918,7 +4041,7 @@ version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2bfe6249cfea6d0c0e0990d5226a4cb36f030444ba9e35e0639275db8f98575" dependencies = [ - "fastrand", + "fastrand 2.2.0", "gix-features", "gix-utils", ] @@ -4224,7 +4347,7 @@ version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba427e3e9599508ed98a6ddf8ed05493db114564e338e41f6a996d2e4790335f" dependencies = [ - "fastrand", + "fastrand 2.2.0", "unicode-normalization", ] @@ -4627,6 +4750,27 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "http-types" +version = "2.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e9b187a72d63adbfba487f48095306ac823049cb504ee195541e91c7775f5ad" +dependencies = [ + "anyhow", + "async-channel", + "base64 0.13.1", + "futures-lite 1.13.0", + "http 0.2.12", + "infer", + "pin-project-lite", + "rand 0.7.3", + "serde", + "serde_json", + "serde_qs", + "serde_urlencoded", + "url", +] + [[package]] name = "httparse" version = "1.9.5" @@ -4742,6 +4886,19 @@ dependencies = [ "tokio-io-timeout", ] +[[package]] +name = "hyper-tls" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +dependencies = [ + "bytes", + "hyper 0.14.31", + "native-tls", + "tokio", + "tokio-native-tls", +] + [[package]] name = "hyper-util" version = "0.1.10" @@ -5088,6 +5245,12 @@ version = "2.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" +[[package]] +name = "infer" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64e9829a50b42bb782c1df523f78d332fe371b10c661e78b7a3c34b0198e9fac" + [[package]] name = "inout" version = "0.1.3" @@ -5278,6 +5441,31 @@ version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" +[[package]] +name = "libp2p" +version = "0.53.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "681fb3f183edfbedd7a57d32ebe5dcdc0b9f94061185acf3c30249349cc6fc99" +dependencies = [ + "bytes", + "either", + "futures", + "futures-timer", + "getrandom 0.2.15", + "instant", + "libp2p-allow-block-list 0.3.0", + "libp2p-connection-limits 0.3.1", + "libp2p-core 0.41.3", + "libp2p-gossipsub 0.46.1", + "libp2p-identity", + "libp2p-kad 0.45.3", + "libp2p-swarm 0.44.2", + "multiaddr", + "pin-project", + "rw-stream-sink 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "thiserror 1.0.69", +] + [[package]] name = "libp2p" version = "0.54.1" @@ -5288,22 +5476,22 @@ dependencies = [ "futures", "futures-timer", "getrandom 0.2.15", - "libp2p-allow-block-list", + "libp2p-allow-block-list 0.4.0", "libp2p-autonat", - "libp2p-connection-limits", - "libp2p-core", + "libp2p-connection-limits 0.4.0", + "libp2p-core 0.42.0", "libp2p-dns", - "libp2p-gossipsub", + "libp2p-gossipsub 0.47.0", "libp2p-identify", "libp2p-identity", - "libp2p-kad", + "libp2p-kad 0.46.2", "libp2p-mdns", "libp2p-metrics", "libp2p-noise", "libp2p-quic", "libp2p-relay", "libp2p-request-response", - "libp2p-swarm", + "libp2p-swarm 0.45.1", "libp2p-tcp", "libp2p-upnp", "libp2p-websocket", @@ -5311,18 +5499,30 @@ dependencies = [ "libp2p-yamux", "multiaddr", "pin-project", - "rw-stream-sink", + "rw-stream-sink 0.4.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "thiserror 1.0.69", ] +[[package]] +name = "libp2p-allow-block-list" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "107b238b794cb83ab53b74ad5dcf7cca3200899b72fe662840cfb52f5b0a32e6" +dependencies = [ + "libp2p-core 0.41.3", + "libp2p-identity", + "libp2p-swarm 0.44.2", + "void", +] + [[package]] name = "libp2p-allow-block-list" version = "0.4.0" source = "git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2#15f0535f87256ff141963006af129cc2c839b472" dependencies = [ - "libp2p-core", + "libp2p-core 0.42.0", "libp2p-identity", - "libp2p-swarm", + "libp2p-swarm 0.45.1", "void", ] @@ -5338,12 +5538,12 @@ dependencies = [ "futures", "futures-bounded", "futures-timer", - "libp2p-core", + "libp2p-core 0.42.0", "libp2p-identity", "libp2p-request-response", - "libp2p-swarm", + "libp2p-swarm 0.45.1", "quick-protobuf", - "quick-protobuf-codec", + "quick-protobuf-codec 0.3.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "rand 0.8.5", "rand_core 0.6.4", "thiserror 1.0.69", @@ -5352,17 +5552,58 @@ dependencies = [ "web-time", ] +[[package]] +name = "libp2p-connection-limits" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7cd50a78ccfada14de94cbacd3ce4b0138157f376870f13d3a8422cd075b4fd" +dependencies = [ + "libp2p-core 0.41.3", + "libp2p-identity", + "libp2p-swarm 0.44.2", + "void", +] + [[package]] name = "libp2p-connection-limits" version = "0.4.0" source = "git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2#15f0535f87256ff141963006af129cc2c839b472" dependencies = [ - "libp2p-core", + "libp2p-core 0.42.0", "libp2p-identity", - "libp2p-swarm", + "libp2p-swarm 0.45.1", "void", ] +[[package]] +name = "libp2p-core" +version = "0.41.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5a8920cbd8540059a01950c1e5c96ea8d89eb50c51cd366fc18bdf540a6e48f" +dependencies = [ + "either", + "fnv", + "futures", + "futures-timer", + "libp2p-identity", + "multiaddr", + "multihash", + "multistream-select 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)", + "once_cell", + "parking_lot", + "pin-project", + "quick-protobuf", + "rand 0.8.5", + "rw-stream-sink 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "serde", + "smallvec", + "thiserror 1.0.69", + "tracing", + "unsigned-varint 0.8.0", + "void", + "web-time", +] + [[package]] name = "libp2p-core" version = "0.42.0" @@ -5375,17 +5616,17 @@ dependencies = [ "libp2p-identity", "multiaddr", "multihash", - "multistream-select", + "multistream-select 0.13.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "once_cell", "parking_lot", "pin-project", "quick-protobuf", "rand 0.8.5", - "rw-stream-sink", + "rw-stream-sink 0.4.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "smallvec", "thiserror 1.0.69", "tracing", - "unsigned-varint", + "unsigned-varint 0.8.0", "void", "web-time", ] @@ -5398,13 +5639,45 @@ dependencies = [ "async-trait", "futures", "hickory-resolver", - "libp2p-core", + "libp2p-core 0.42.0", "libp2p-identity", "parking_lot", "smallvec", "tracing", ] +[[package]] +name = "libp2p-gossipsub" +version = "0.46.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d665144a616dadebdc5fff186b1233488cdcd8bfb1223218ff084b6d052c94f7" +dependencies = [ + "asynchronous-codec", + "base64 0.21.7", + "byteorder", + "bytes", + "either", + "fnv", + "futures", + "futures-ticker", + "getrandom 0.2.15", + "hex_fmt", + "instant", + "libp2p-core 0.41.3", + "libp2p-identity", + "libp2p-swarm 0.44.2", + "prometheus-client", + "quick-protobuf", + "quick-protobuf-codec 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.8.5", + "regex", + "serde", + "sha2 0.10.8", + "smallvec", + "tracing", + "void", +] + [[package]] name = "libp2p-gossipsub" version = "0.47.0" @@ -5420,12 +5693,12 @@ dependencies = [ "futures-ticker", "getrandom 0.2.15", "hex_fmt", - "libp2p-core", + "libp2p-core 0.42.0", "libp2p-identity", - "libp2p-swarm", + "libp2p-swarm 0.45.1", "prometheus-client", "quick-protobuf", - "quick-protobuf-codec", + "quick-protobuf-codec 0.3.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "rand 0.8.5", "regex", "sha2 0.10.8", @@ -5445,12 +5718,12 @@ dependencies = [ "futures", "futures-bounded", "futures-timer", - "libp2p-core", + "libp2p-core 0.42.0", "libp2p-identity", - "libp2p-swarm", + "libp2p-swarm 0.45.1", "lru", "quick-protobuf", - "quick-protobuf-codec", + "quick-protobuf-codec 0.3.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "smallvec", "thiserror 1.0.69", "tracing", @@ -5469,12 +5742,43 @@ dependencies = [ "multihash", "quick-protobuf", "rand 0.8.5", + "serde", "sha2 0.10.8", "thiserror 1.0.69", "tracing", "zeroize", ] +[[package]] +name = "libp2p-kad" +version = "0.45.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cc5767727d062c4eac74dd812c998f0e488008e82cce9c33b463d38423f9ad2" +dependencies = [ + "arrayvec", + "asynchronous-codec", + "bytes", + "either", + "fnv", + "futures", + "futures-bounded", + "futures-timer", + "instant", + "libp2p-core 0.41.3", + "libp2p-identity", + "libp2p-swarm 0.44.2", + "quick-protobuf", + "quick-protobuf-codec 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", + "rand 0.8.5", + "serde", + "sha2 0.10.8", + "smallvec", + "thiserror 1.0.69", + "tracing", + "uint", + "void", +] + [[package]] name = "libp2p-kad" version = "0.46.2" @@ -5488,11 +5792,11 @@ dependencies = [ "futures", "futures-bounded", "futures-timer", - "libp2p-core", + "libp2p-core 0.42.0", "libp2p-identity", - "libp2p-swarm", + "libp2p-swarm 0.45.1", "quick-protobuf", - "quick-protobuf-codec", + "quick-protobuf-codec 0.3.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "rand 0.8.5", "sha2 0.10.8", "smallvec", @@ -5512,9 +5816,9 @@ dependencies = [ "futures", "hickory-proto", "if-watch", - "libp2p-core", + "libp2p-core 0.42.0", "libp2p-identity", - "libp2p-swarm", + "libp2p-swarm 0.45.1", "rand 0.8.5", "smallvec", "socket2", @@ -5529,12 +5833,12 @@ version = "0.15.0" source = "git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2#15f0535f87256ff141963006af129cc2c839b472" dependencies = [ "futures", - "libp2p-core", + "libp2p-core 0.42.0", "libp2p-identify", "libp2p-identity", - "libp2p-kad", + "libp2p-kad 0.46.2", "libp2p-relay", - "libp2p-swarm", + "libp2p-swarm 0.45.1", "pin-project", "prometheus-client", "web-time", @@ -5549,7 +5853,7 @@ dependencies = [ "bytes", "curve25519-dalek 4.1.3", "futures", - "libp2p-core", + "libp2p-core 0.42.0", "libp2p-identity", "multiaddr", "multihash", @@ -5574,7 +5878,7 @@ dependencies = [ "futures", "futures-timer", "if-watch", - "libp2p-core", + "libp2p-core 0.42.0", "libp2p-identity", "libp2p-tls", "parking_lot", @@ -5599,11 +5903,11 @@ dependencies = [ "futures", "futures-bounded", "futures-timer", - "libp2p-core", + "libp2p-core 0.42.0", "libp2p-identity", - "libp2p-swarm", + "libp2p-swarm 0.45.1", "quick-protobuf", - "quick-protobuf-codec", + "quick-protobuf-codec 0.3.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "rand 0.8.5", "static_assertions", "thiserror 1.0.69", @@ -5622,9 +5926,9 @@ dependencies = [ "futures", "futures-bounded", "futures-timer", - "libp2p-core", + "libp2p-core 0.42.0", "libp2p-identity", - "libp2p-swarm", + "libp2p-swarm 0.45.1", "rand 0.8.5", "serde", "smallvec", @@ -5633,6 +5937,28 @@ dependencies = [ "web-time", ] +[[package]] +name = "libp2p-swarm" +version = "0.44.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80cae6cb75f89dbca53862f9ebe0b9f463aa7b302762fcfaafb9e51dcc9b0f7e" +dependencies = [ + "either", + "fnv", + "futures", + "futures-timer", + "instant", + "libp2p-core 0.41.3", + "libp2p-identity", + "lru", + "multistream-select 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)", + "once_cell", + "rand 0.8.5", + "smallvec", + "tracing", + "void", +] + [[package]] name = "libp2p-swarm" version = "0.45.1" @@ -5643,11 +5969,11 @@ dependencies = [ "futures", "futures-timer", "getrandom 0.2.15", - "libp2p-core", + "libp2p-core 0.42.0", "libp2p-identity", "libp2p-swarm-derive", "lru", - "multistream-select", + "multistream-select 0.13.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "once_cell", "rand 0.8.5", "smallvec", @@ -5678,7 +6004,7 @@ dependencies = [ "futures-timer", "if-watch", "libc", - "libp2p-core", + "libp2p-core 0.42.0", "libp2p-identity", "socket2", "tokio", @@ -5692,7 +6018,7 @@ source = "git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2#15f0535f dependencies = [ "futures", "futures-rustls", - "libp2p-core", + "libp2p-core 0.42.0", "libp2p-identity", "rcgen", "ring 0.17.8", @@ -5711,8 +6037,8 @@ dependencies = [ "futures", "futures-timer", "igd-next", - "libp2p-core", - "libp2p-swarm", + "libp2p-core 0.42.0", + "libp2p-swarm 0.45.1", "tokio", "tracing", "void", @@ -5726,11 +6052,11 @@ dependencies = [ "either", "futures", "futures-rustls", - "libp2p-core", + "libp2p-core 0.42.0", "libp2p-identity", "parking_lot", "pin-project-lite", - "rw-stream-sink", + "rw-stream-sink 0.4.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "soketto", "thiserror 1.0.69", "tracing", @@ -5746,7 +6072,7 @@ dependencies = [ "bytes", "futures", "js-sys", - "libp2p-core", + "libp2p-core 0.42.0", "parking_lot", "send_wrapper 0.6.0", "thiserror 1.0.69", @@ -5762,7 +6088,7 @@ source = "git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2#15f0535f dependencies = [ "either", "futures", - "libp2p-core", + "libp2p-core 0.42.0", "thiserror 1.0.69", "tracing", "yamux 0.12.1", @@ -6048,7 +6374,7 @@ dependencies = [ "percent-encoding", "serde", "static_assertions", - "unsigned-varint", + "unsigned-varint 0.8.0", "url", ] @@ -6070,7 +6396,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cc41f430805af9d1cf4adae4ed2149c759b877b01d909a1f40256188d09345d2" dependencies = [ "core2", - "unsigned-varint", + "serde", + "unsigned-varint 0.8.0", ] [[package]] @@ -6079,6 +6406,20 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" +[[package]] +name = "multistream-select" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea0df8e5eec2298a62b326ee4f0d7fe1a6b90a09dfcf9df37b38f947a8c42f19" +dependencies = [ + "bytes", + "futures", + "log", + "pin-project", + "smallvec", + "unsigned-varint 0.7.2", +] + [[package]] name = "multistream-select" version = "0.13.0" @@ -6089,7 +6430,7 @@ dependencies = [ "pin-project", "smallvec", "tracing", - "unsigned-varint", + "unsigned-varint 0.8.0", ] [[package]] @@ -6103,13 +6444,30 @@ dependencies = [ "clap-verbosity-flag", "color-eyre", "futures", - "libp2p", + "libp2p 0.54.1", "tokio", "tracing", "tracing-log 0.2.0", "tracing-subscriber", ] +[[package]] +name = "native-tls" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8614eb2c83d59d1c8cc974dd3f920198647674a0a035e1af1fa58707e317466" +dependencies = [ + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + [[package]] name = "netlink-packet-core" version = "0.7.0" @@ -6524,6 +6882,50 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" +[[package]] +name = "openssl" +version = "0.10.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6174bc48f102d208783c2c84bf931bb75927a617866870de8a4ea85597f871f5" +dependencies = [ + "bitflags 2.6.0", + "cfg-if", + "foreign-types 0.3.2", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", +] + +[[package]] +name = "openssl-probe" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" + +[[package]] +name = "openssl-sys" +version = "0.9.104" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45abf306cbf99debc8195b66b7346498d7b10c210de50418b5ccd7ceba08c741" +dependencies = [ + "cc", + "libc", + "pkg-config", + "vcpkg", +] + [[package]] name = "opentelemetry" version = "0.20.0" @@ -7371,6 +7773,19 @@ dependencies = [ "byteorder", ] +[[package]] +name = "quick-protobuf-codec" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15a0580ab32b169745d7a39db2ba969226ca16738931be152a3209b409de2474" +dependencies = [ + "asynchronous-codec", + "bytes", + "quick-protobuf", + "thiserror 1.0.69", + "unsigned-varint 0.8.0", +] + [[package]] name = "quick-protobuf-codec" version = "0.3.1" @@ -7380,7 +7795,7 @@ dependencies = [ "bytes", "quick-protobuf", "thiserror 1.0.69", - "unsigned-varint", + "unsigned-varint 0.8.0", ] [[package]] @@ -7823,10 +8238,12 @@ dependencies = [ "http-body 0.4.6", "hyper 0.14.31", "hyper-rustls 0.24.2", + "hyper-tls", "ipnet", "js-sys", "log", "mime", + "native-tls", "once_cell", "percent-encoding", "pin-project-lite", @@ -7838,6 +8255,7 @@ dependencies = [ "sync_wrapper 0.1.2", "system-configuration 0.5.1", "tokio", + "tokio-native-tls", "tokio-rustls 0.24.1", "tower-service", "url", @@ -7900,6 +8318,12 @@ dependencies = [ "quick-error", ] +[[package]] +name = "retain_mut" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4389f1d5789befaf6029ebd9f7dac4af7f7e3d61b69d4f30e2ac02b57e7712b0" + [[package]] name = "rfc6979" version = "0.3.1" @@ -8237,6 +8661,17 @@ dependencies = [ "wait-timeout", ] +[[package]] +name = "rw-stream-sink" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8c9026ff5d2f23da5e45bbc283f156383001bfb09c4e44256d02c1a685fe9a1" +dependencies = [ + "futures", + "pin-project", + "static_assertions", +] + [[package]] name = "rw-stream-sink" version = "0.4.0" @@ -8262,6 +8697,15 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "schannel" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" +dependencies = [ + "windows-sys 0.59.0", +] + [[package]] name = "schnellru" version = "0.2.3" @@ -8353,6 +8797,29 @@ dependencies = [ "cc", ] +[[package]] +name = "security-framework" +version = "2.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" +dependencies = [ + "bitflags 2.6.0", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa39c7303dc58b5543c94d22c1766b0d31f2ee58306363ea622b10bbc075eaa2" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "self_encryption" version = "0.30.0" @@ -8470,6 +8937,17 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_qs" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7715380eec75f029a4ef7de39a9200e0a63823176b759d055b613f5a87df6a6" +dependencies = [ + "percent-encoding", + "serde", + "thiserror 1.0.69", +] + [[package]] name = "serde_spanned" version = "0.6.8" @@ -8550,7 +9028,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59d7d62c9733631445d1b3fc7854c780088408d4b79a20dd928aaec41854ca3a" dependencies = [ "cfg-if", - "dirs", + "dirs 4.0.0", "plist", "which 4.4.2", "xml-rs", @@ -9049,7 +9527,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" dependencies = [ "cfg-if", - "fastrand", + "fastrand 2.2.0", "once_cell", "rustix", "windows-sys 0.59.0", @@ -9091,7 +9569,7 @@ dependencies = [ "color-eyre", "dirs-next", "evmlib", - "libp2p", + "libp2p 0.54.1", "rand 0.8.5", "serde", "serde_json", @@ -9306,6 +9784,16 @@ dependencies = [ "syn 2.0.90", ] +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.22.0" @@ -9864,6 +10352,12 @@ version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" +[[package]] +name = "unsigned-varint" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6889a77d49f1f013504cec6bf97a2c730394adedaeb1deb5ea08949a50541105" + [[package]] name = "unsigned-varint" version = "0.8.0" @@ -9891,6 +10385,7 @@ dependencies = [ "form_urlencoded", "idna 1.0.3", "percent-encoding", + "serde", ] [[package]] @@ -9948,6 +10443,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + [[package]] name = "vergen" version = "8.3.2" @@ -10004,6 +10505,12 @@ dependencies = [ "libc", ] +[[package]] +name = "waker-fn" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "317211a0dc0ceedd78fb2ca9a44aed3d7b9b26f81870d485c07122b4350673b7" + [[package]] name = "walkdir" version = "2.5.0" @@ -10536,6 +11043,28 @@ version = "0.0.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d135d17ab770252ad95e9a872d365cf3090e3be864a34ab46f48555993efc904" +[[package]] +name = "wiremock" +version = "0.5.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13a3a53eaf34f390dd30d7b1b078287dd05df2aa2e21a589ccb80f5c7253c2e9" +dependencies = [ + "assert-json-diff", + "async-trait", + "base64 0.21.7", + "deadpool", + "futures", + "futures-timer", + "http-types", + "hyper 0.14.31", + "log", + "once_cell", + "regex", + "serde", + "serde_json", + "tokio", +] + [[package]] name = "write16" version = "1.0.0" diff --git a/Cargo.toml b/Cargo.toml index 175e0dfa2c..3628d1ecdf 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,7 @@ members = [ "ant-service-management", "ant-token-supplies", "autonomi", + "bootstrap_cache", "evmlib", "evm-testnet", "nat-detection", diff --git a/ant-peers-acquisition/Cargo.toml b/ant-peers-acquisition/Cargo.toml index 381f0e0388..660b55b3e6 100644 --- a/ant-peers-acquisition/Cargo.toml +++ b/ant-peers-acquisition/Cargo.toml @@ -10,6 +10,7 @@ repository = "https://github.com/maidsafe/autonomi" version = "0.5.7" [features] +default = ["network-contacts"] local = [] network-contacts = ["ant-protocol"] websockets = [] diff --git a/bootstrap_cache/Cargo.toml b/bootstrap_cache/Cargo.toml new file mode 100644 index 0000000000..e2e305e51d --- /dev/null +++ b/bootstrap_cache/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "bootstrap_cache" +version = "0.1.0" +edition = "2021" +license = "GPL-3.0" +authors = ["MaidSafe Developers "] +description = "Bootstrap cache functionality for the Safe Network" + +[dependencies] +chrono = { version = "0.4", features = ["serde"] } +dirs = "5.0" +fs2 = "0.4.3" +libp2p = { version = "0.53", features = ["serde"] } +reqwest = { version = "0.11", features = ["json"] } +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +tempfile = "3.8.1" +thiserror = "1.0" +tokio = { version = "1.0", features = ["full", "sync"] } +tracing = "0.1" + +[dev-dependencies] +wiremock = "0.5" +tokio = { version = "1.0", features = ["full", "test-util"] } +tracing-subscriber = { version = "0.3", features = ["env-filter"] } diff --git a/bootstrap_cache/README.md b/bootstrap_cache/README.md new file mode 100644 index 0000000000..d45e20c03b --- /dev/null +++ b/bootstrap_cache/README.md @@ -0,0 +1,216 @@ +# Bootstrap Cache + +A decentralized peer discovery and caching system for the Safe Network. + +## Features + +- **Decentralized Design**: No dedicated bootstrap nodes required +- **Cross-Platform Support**: Works on Linux, macOS, and Windows +- **Shared Cache**: System-wide cache file accessible by both nodes and clients +- **Concurrent Access**: File locking for safe multi-process access +- **Atomic Operations**: Safe cache updates using atomic file operations +- **Initial Peer Discovery**: Fallback web endpoints for new/stale cache scenarios +- **Comprehensive Error Handling**: Detailed error types and logging +- **Circuit Breaker Pattern**: Intelligent failure handling with: + - Configurable failure thresholds and reset timeouts + - Exponential backoff for failed requests + - Automatic state transitions (closed → open → half-open) + - Protection against cascading failures + +### Peer Management + +The bootstrap cache implements a robust peer management system: + +- **Peer Status Tracking**: Each peer's connection history is tracked, including: + - Success count: Number of successful connections + - Failure count: Number of failed connection attempts + - Last seen timestamp: When the peer was last successfully contacted + +- **Automatic Cleanup**: The system automatically removes unreliable peers: + - Peers that fail 3 consecutive connection attempts are marked for removal + - Removal only occurs if there are at least 2 working peers available + - This ensures network connectivity is maintained even during temporary connection issues + +- **Duplicate Prevention**: The cache automatically prevents duplicate peer entries: + - Same IP and port combinations are only stored once + - Different ports on the same IP are treated as separate peers + +## Installation + +Add this to your `Cargo.toml`: + +```toml +[dependencies] +bootstrap_cache = { version = "0.1.0" } +``` + +## Usage + +### Basic Example + +```rust +use bootstrap_cache::{BootstrapCache, CacheManager, InitialPeerDiscovery}; + +#[tokio::main] +async fn main() -> Result<(), Box> { + // Initialize the cache manager + let cache_manager = CacheManager::new()?; + + // Try to read from the cache + let mut cache = match cache_manager.read_cache() { + Ok(cache) if !cache.is_stale() => cache, + _ => { + // Cache is stale or unavailable, fetch initial peers + let discovery = InitialPeerDiscovery::new(); + let peers = discovery.fetch_peers().await?; + let cache = BootstrapCache { + last_updated: chrono::Utc::now(), + peers, + }; + cache_manager.write_cache(&cache)?; + cache + } + }; + + println!("Found {} peers in cache", cache.peers.len()); + Ok(()) +} +``` + +### Custom Endpoints + +```rust +use bootstrap_cache::InitialPeerDiscovery; + +let discovery = InitialPeerDiscovery::with_endpoints(vec![ + "http://custom1.example.com/peers.json".to_string(), + "http://custom2.example.com/peers.json".to_string(), +]); +``` + +### Circuit Breaker Configuration + +```rust +use bootstrap_cache::{InitialPeerDiscovery, CircuitBreakerConfig}; +use std::time::Duration; + +// Create a custom circuit breaker configuration +let config = CircuitBreakerConfig { + max_failures: 5, // Open after 5 failures + reset_timeout: Duration::from_secs(300), // Wait 5 minutes before recovery + min_backoff: Duration::from_secs(1), // Start with 1 second backoff + max_backoff: Duration::from_secs(60), // Max backoff of 60 seconds +}; + +// Initialize discovery with custom circuit breaker config +let discovery = InitialPeerDiscovery::with_config(config); +``` + +### Peer Management Example + +```rust +use bootstrap_cache::BootstrapCache; + +let mut cache = BootstrapCache::new(); + +// Add a new peer +cache.add_peer("192.168.1.1".to_string(), 8080); + +// Update peer status after connection attempts +cache.update_peer_status("192.168.1.1", 8080, true); // successful connection +cache.update_peer_status("192.168.1.1", 8080, false); // failed connection + +// Clean up failed peers (only if we have at least 2 working peers) +cache.cleanup_failed_peers(); +``` + +## Cache File Location + +The cache file is stored in a system-wide location accessible to all processes: + +- **Linux**: `/var/safe/bootstrap_cache.json` +- **macOS**: `/Library/Application Support/Safe/bootstrap_cache.json` +- **Windows**: `C:\ProgramData\Safe\bootstrap_cache.json` + +## Cache File Format + +```json +{ + "last_updated": "2024-02-20T15:30:00Z", + "peers": [ + { + "ip": "192.168.1.1", + "port": 8080, + "last_seen": "2024-02-20T15:30:00Z", + "success_count": 10, + "failure_count": 0 + } + ] +} +``` + +## Error Handling + +The crate provides detailed error types through the `Error` enum: + +```rust +use bootstrap_cache::Error; + +match cache_manager.read_cache() { + Ok(cache) => println!("Cache loaded successfully"), + Err(Error::CacheStale) => println!("Cache is stale"), + Err(Error::CacheCorrupted) => println!("Cache file is corrupted"), + Err(Error::Io(e)) => println!("IO error: {}", e), + Err(e) => println!("Other error: {}", e), +} +``` + +## Thread Safety + +The cache system uses file locking to ensure safe concurrent access: + +- Shared locks for reading +- Exclusive locks for writing +- Atomic file updates using temporary files + +## Development + +### Building + +```bash +cargo build +``` + +### Running Tests + +```bash +cargo test +``` + +### Running with Logging + +```rust +use tracing_subscriber::FmtSubscriber; + +// Initialize logging +let subscriber = FmtSubscriber::builder() + .with_max_level(tracing::Level::DEBUG) + .init(); +``` + +## Contributing + +1. Fork the repository +2. Create your feature branch (`git checkout -b feature/amazing-feature`) +3. Commit your changes (`git commit -am 'Add amazing feature'`) +4. Push to the branch (`git push origin feature/amazing-feature`) +5. Open a Pull Request + +## License + +This project is licensed under the GPL-3.0 License - see the LICENSE file for details. + +## Related Documentation + +- [Bootstrap Cache PRD](docs/bootstrap_cache_prd.md) +- [Implementation Guide](docs/bootstrap_cache_implementation.md) diff --git a/bootstrap_cache/src/cache.rs b/bootstrap_cache/src/cache.rs new file mode 100644 index 0000000000..85b01ed5ee --- /dev/null +++ b/bootstrap_cache/src/cache.rs @@ -0,0 +1,390 @@ +// Copyright 2024 MaidSafe.net limited. +// +// This SAFE Network Software is licensed to you under The General Public License (GPL), version 3. +// Unless required by applicable law or agreed to in writing, the SAFE Network Software distributed +// under the GPL Licence is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. Please review the Licences for the specific language governing +// permissions and limitations relating to use of the SAFE Network Software. + +use crate::{BootstrapCache, Error}; +use fs2::FileExt; +use std::{ + fs::{self, File}, + io::{self, Read, Write}, + path::PathBuf, +}; +use tracing::{debug, error, info, warn}; + +/// Manages reading and writing of the bootstrap cache file +pub struct CacheManager { + cache_path: PathBuf, +} + +impl CacheManager { + /// Creates a new CacheManager instance + pub fn new() -> Result { + let cache_path = Self::get_cache_path()?; + Ok(Self { cache_path }) + } + + /// Returns the platform-specific cache file path + fn get_cache_path() -> io::Result { + let path = if cfg!(target_os = "macos") { + PathBuf::from("/Library/Application Support/Safe/bootstrap_cache.json") + } else if cfg!(target_os = "linux") { + PathBuf::from("/var/safe/bootstrap_cache.json") + } else if cfg!(target_os = "windows") { + PathBuf::from(r"C:\ProgramData\Safe\bootstrap_cache.json") + } else { + return Err(io::Error::new( + io::ErrorKind::Other, + "Unsupported operating system", + )); + }; + + // Try to create the directory structure + if let Some(parent) = path.parent() { + info!("Ensuring cache directory exists at: {:?}", parent); + match fs::create_dir_all(parent) { + Ok(_) => { + debug!("Successfully created/verified cache directory"); + // Try to set directory permissions to be user-writable + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + if let Err(e) = fs::set_permissions(parent, fs::Permissions::from_mode(0o755)) { + warn!("Failed to set cache directory permissions: {}", e); + } + } + } + Err(e) => { + // If we can't create in system directory, fall back to user's home directory + warn!("Failed to create system cache directory: {}", e); + if let Some(home) = dirs::home_dir() { + let user_path = home.join(".safe").join("bootstrap_cache.json"); + info!("Falling back to user directory: {:?}", user_path); + if let Some(user_parent) = user_path.parent() { + fs::create_dir_all(user_parent)?; + } + return Ok(user_path); + } + } + } + } + Ok(path) + } + + /// Reads the cache file with file locking, handling potential corruption + pub fn read_cache(&self) -> Result { + debug!("Reading bootstrap cache from {:?}", self.cache_path); + + let mut file = match File::open(&self.cache_path) { + Ok(file) => file, + Err(e) if e.kind() == io::ErrorKind::NotFound => { + info!("Cache file not found, creating new empty cache"); + return Ok(BootstrapCache::new()); + } + Err(e) => { + error!("Failed to open cache file: {}", e); + return Err(e.into()); + } + }; + + // Acquire shared lock for reading + file.lock_shared().map_err(|e| { + error!("Failed to acquire shared lock: {}", e); + Error::LockError + })?; + + let mut contents = String::new(); + if let Err(e) = file.read_to_string(&mut contents) { + error!("Failed to read cache file: {}", e); + // Release lock before returning + let _ = file.unlock(); + return Err(Error::Io(e)); + } + + // Release lock + file.unlock().map_err(|e| { + error!("Failed to release lock: {}", e); + Error::LockError + })?; + + // Try to parse the cache, if it fails it might be corrupted + match serde_json::from_str(&contents) { + Ok(cache) => Ok(cache), + Err(e) => { + error!("Cache file appears to be corrupted: {}", e); + Err(Error::CacheCorrupted(e)) + } + } + } + + /// Rebuilds the cache using provided peers or fetches new ones if none provided + pub async fn rebuild_cache(&self, peers: Option>) -> Result { + info!("Rebuilding bootstrap cache"); + + let cache = if let Some(peers) = peers { + info!("Rebuilding cache with {} in-memory peers", peers.len()); + BootstrapCache { + last_updated: chrono::Utc::now(), + peers, + } + } else { + info!("No in-memory peers available, fetching from endpoints"); + let discovery = InitialPeerDiscovery::new(); + let peers = discovery.fetch_peers().await?; + BootstrapCache { + last_updated: chrono::Utc::now(), + peers, + } + }; + + // Write the rebuilt cache + self.write_cache(&cache)?; + Ok(cache) + } + + /// Writes the cache file with file locking and atomic replacement + pub fn write_cache(&self, cache: &BootstrapCache) -> Result<(), Error> { + debug!("Writing bootstrap cache to {:?}", self.cache_path); + + let temp_path = self.cache_path.with_extension("tmp"); + let mut file = File::create(&temp_path).map_err(|e| { + error!("Failed to create temporary cache file: {}", e); + Error::Io(e) + })?; + + // Acquire exclusive lock for writing + file.lock_exclusive().map_err(|e| { + error!("Failed to acquire exclusive lock: {}", e); + Error::LockError + })?; + + let contents = serde_json::to_string_pretty(cache).map_err(|e| { + error!("Failed to serialize cache: {}", e); + Error::Json(e) + })?; + + file.write_all(contents.as_bytes()).map_err(|e| { + error!("Failed to write cache file: {}", e); + Error::Io(e) + })?; + + file.sync_all().map_err(|e| { + error!("Failed to sync cache file: {}", e); + Error::Io(e) + })?; + + // Release lock + file.unlock().map_err(|e| { + error!("Failed to release lock: {}", e); + Error::LockError + })?; + + // Atomic rename + fs::rename(&temp_path, &self.cache_path).map_err(|e| { + error!("Failed to rename temporary cache file: {}", e); + Error::Io(e) + })?; + + info!("Successfully wrote cache file"); + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use chrono::Utc; + use std::fs::OpenOptions; + use tempfile::tempdir; + use tokio; + + #[test] + fn test_cache_read_write() { + let dir = tempdir().unwrap(); + let cache_path = dir.path().join("test_cache.json"); + + let cache = BootstrapCache { + last_updated: Utc::now(), + peers: vec![], + }; + + let manager = CacheManager { cache_path }; + manager.write_cache(&cache).unwrap(); + + let read_cache = manager.read_cache().unwrap(); + assert_eq!(cache.peers.len(), read_cache.peers.len()); + } + + #[test] + fn test_missing_cache_file() { + let dir = tempdir().unwrap(); + let cache_path = dir.path().join("nonexistent.json"); + + let manager = CacheManager { cache_path }; + let cache = manager.read_cache().unwrap(); + assert!(cache.peers.is_empty()); + } + + #[test] + fn test_corrupted_cache_file() { + let dir = tempdir().unwrap(); + let cache_path = dir.path().join("corrupted.json"); + + // Write corrupted JSON + let mut file = OpenOptions::new() + .write(true) + .create(true) + .open(&cache_path) + .unwrap(); + file.write_all(b"{invalid json}").unwrap(); + + let manager = CacheManager { cache_path }; + match manager.read_cache() { + Err(Error::CacheCorrupted(_)) => (), + other => panic!("Expected CacheCorrupted error, got {:?}", other), + } + } + + #[test] + fn test_partially_corrupted_cache() { + let dir = tempdir().unwrap(); + let cache_path = dir.path().join("partial_corrupt.json"); + + // Write partially valid JSON + let mut file = OpenOptions::new() + .write(true) + .create(true) + .open(&cache_path) + .unwrap(); + file.write_all(b"{\"last_updated\":\"2024-01-01T00:00:00Z\",\"peers\":[{}]}").unwrap(); + + let manager = CacheManager { cache_path }; + match manager.read_cache() { + Err(Error::CacheCorrupted(_)) => (), + other => panic!("Expected CacheCorrupted error, got {:?}", other), + } + } + + #[tokio::test] + async fn test_rebuild_cache_with_memory_peers() { + let dir = tempdir().unwrap(); + let cache_path = dir.path().join("rebuild.json"); + let manager = CacheManager { cache_path }; + + // Create some test peers + let test_peers = vec![ + BootstrapPeer { + addr: "/ip4/127.0.0.1/tcp/8080".parse().unwrap(), + success_count: 1, + failure_count: 0, + last_success: Some(Utc::now()), + last_failure: None, + } + ]; + + // Rebuild cache with in-memory peers + let rebuilt = manager.rebuild_cache(Some(test_peers.clone())).await.unwrap(); + assert_eq!(rebuilt.peers.len(), 1); + assert_eq!(rebuilt.peers[0].addr, test_peers[0].addr); + + // Verify the cache was written to disk + let read_cache = manager.read_cache().unwrap(); + assert_eq!(read_cache.peers.len(), 1); + assert_eq!(read_cache.peers[0].addr, test_peers[0].addr); + } + + #[tokio::test] + async fn test_rebuild_cache_from_endpoints() { + let dir = tempdir().unwrap(); + let cache_path = dir.path().join("rebuild_endpoints.json"); + let manager = CacheManager { cache_path }; + + // Write corrupted cache first + let mut file = OpenOptions::new() + .write(true) + .create(true) + .open(&cache_path) + .unwrap(); + file.write_all(b"{corrupted}").unwrap(); + + // Verify corrupted cache is detected + match manager.read_cache() { + Err(Error::CacheCorrupted(_)) => (), + other => panic!("Expected CacheCorrupted error, got {:?}", other), + } + + // Mock the InitialPeerDiscovery for testing + // Note: In a real implementation, you might want to use a trait for InitialPeerDiscovery + // and mock it properly. This test will actually try to fetch from real endpoints. + match manager.rebuild_cache(None).await { + Ok(cache) => { + // Verify the cache was rebuilt and written + let read_cache = manager.read_cache().unwrap(); + assert_eq!(read_cache.peers.len(), cache.peers.len()); + } + Err(Error::NoPeersFound(_)) => { + // This is also acceptable if no endpoints are reachable during test + () + } + Err(e) => panic!("Unexpected error: {:?}", e), + } + } + + #[test] + fn test_concurrent_cache_access() { + let dir = tempdir().unwrap(); + let cache_path = dir.path().join("concurrent.json"); + let manager = CacheManager { cache_path.clone() }; + + // Initial cache + let cache = BootstrapCache { + last_updated: Utc::now(), + peers: vec![], + }; + manager.write_cache(&cache).unwrap(); + + // Try to read while holding write lock + let file = OpenOptions::new() + .write(true) + .open(&cache_path) + .unwrap(); + file.lock_exclusive().unwrap(); + + // This should fail with a lock error + match manager.read_cache() { + Err(Error::LockError) => (), + other => panic!("Expected LockError, got {:?}", other), + } + + // Release lock + file.unlock().unwrap(); + } + + #[test] + fn test_cache_file_permissions() { + let dir = tempdir().unwrap(); + let cache_path = dir.path().join("permissions.json"); + let manager = CacheManager { cache_path: cache_path.clone() }; + + // Write initial cache + let cache = BootstrapCache { + last_updated: Utc::now(), + peers: vec![], + }; + manager.write_cache(&cache).unwrap(); + + // Make file read-only + let mut perms = fs::metadata(&cache_path).unwrap().permissions(); + perms.set_readonly(true); + fs::set_permissions(&cache_path, perms).unwrap(); + + // Try to write to read-only file + match manager.write_cache(&cache) { + Err(Error::Io(_)) => (), + other => panic!("Expected Io error, got {:?}", other), + } + } +} diff --git a/bootstrap_cache/src/cache_store.rs b/bootstrap_cache/src/cache_store.rs new file mode 100644 index 0000000000..9257107773 --- /dev/null +++ b/bootstrap_cache/src/cache_store.rs @@ -0,0 +1,690 @@ +// Copyright 2024 MaidSafe.net limited. +// +// This SAFE Network Software is licensed to you under The General Public License (GPL), version 3. +// Unless required by applicable law or agreed to in writing, the SAFE Network Software distributed +// under the GPL Licence is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. Please review the Licences for the specific language governing +// permissions and limitations relating to use of the SAFE Network Software. + +use crate::{BootstrapPeer, Error, InitialPeerDiscovery, Result}; +use fs2::FileExt; +use libp2p::Multiaddr; +use serde::{Deserialize, Serialize}; +use std::fs::{self, File, OpenOptions}; +use std::io::{self, Read}; +use std::path::PathBuf; +use std::sync::Arc; +use std::time::{Duration, SystemTime}; +use tempfile::NamedTempFile; +use tokio::sync::RwLock; + +const PEER_EXPIRY_DURATION: Duration = Duration::from_secs(24 * 60 * 60); // 24 hours + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CacheData { + peers: std::collections::HashMap, + #[serde(default = "SystemTime::now")] + last_updated: SystemTime, + #[serde(default = "default_version")] + version: u32, +} + +fn default_version() -> u32 { + 1 +} + +impl Default for CacheData { + fn default() -> Self { + Self { + peers: std::collections::HashMap::new(), + last_updated: SystemTime::now(), + version: default_version(), + } + } +} + +#[derive(Clone)] +pub struct CacheStore { + cache_path: PathBuf, + config: Arc, + data: Arc>, +} + +impl CacheStore { + pub async fn new(config: crate::BootstrapConfig) -> Result { + tracing::info!("Creating new CacheStore with config: {:?}", config); + let cache_path = config.cache_file_path.clone(); + let config = Arc::new(config); + + // Create cache directory if it doesn't exist + if let Some(parent) = cache_path.parent() { + tracing::info!("Attempting to create cache directory at {:?}", parent); + // Try to create the directory + match fs::create_dir_all(parent) { + Ok(_) => { + tracing::info!("Successfully created cache directory"); + } + Err(e) => { + tracing::warn!("Failed to create cache directory at {:?}: {}", parent, e); + // Try user's home directory as fallback + if let Some(home) = dirs::home_dir() { + let user_path = home.join(".safe").join("bootstrap_cache.json"); + tracing::info!("Falling back to user directory: {:?}", user_path); + if let Some(user_parent) = user_path.parent() { + if let Err(e) = fs::create_dir_all(user_parent) { + tracing::error!("Failed to create user cache directory: {}", e); + return Err(Error::Io(e)); + } + tracing::info!("Successfully created user cache directory"); + } + let future = Self::new(crate::BootstrapConfig::with_cache_path(user_path)); + return Box::pin(future).await; + } + } + } + } + + let data = if cache_path.exists() { + tracing::info!("Cache file exists at {:?}, attempting to load", cache_path); + match Self::load_cache_data(&cache_path).await { + Ok(data) => { + tracing::info!("Successfully loaded cache data with {} peers", data.peers.len()); + // If cache data exists but has no peers and file is not read-only, + // fallback to default + let is_readonly = cache_path + .metadata() + .map(|m| m.permissions().readonly()) + .unwrap_or(false); + + if data.peers.is_empty() && !is_readonly { + tracing::info!("Cache is empty and not read-only, falling back to default"); + Self::fallback_to_default(&config).await? + } else { + // Ensure we don't exceed max_peers + let mut filtered_data = data; + if filtered_data.peers.len() > config.max_peers { + tracing::info!( + "Trimming cache from {} to {} peers", + filtered_data.peers.len(), + config.max_peers + ); + let peers: Vec<_> = filtered_data.peers.into_iter().collect(); + filtered_data.peers = peers + .into_iter() + .take(config.max_peers) + .collect(); + } + filtered_data + } + } + Err(e) => { + tracing::warn!("Failed to load cache data: {}", e); + // If we can't read or parse the cache file, return empty cache + CacheData::default() + } + } + } else { + tracing::info!("Cache file does not exist at {:?}, falling back to default", cache_path); + // If cache file doesn't exist, fallback to default + Self::fallback_to_default(&config).await? + }; + + let store = Self { + cache_path, + config, + data: Arc::new(RwLock::new(data)), + }; + + // Only clean up stale peers if the file is not read-only + let is_readonly = store + .cache_path + .metadata() + .map(|m| m.permissions().readonly()) + .unwrap_or(false); + + if !is_readonly { + if let Err(e) = store.cleanup_stale_peers().await { + tracing::warn!("Failed to clean up stale peers: {}", e); + } + } + + tracing::info!("Successfully created CacheStore"); + Ok(store) + } + + async fn fallback_to_default(config: &crate::BootstrapConfig) -> Result { + tracing::info!("Falling back to default peers from endpoints"); + let mut data = CacheData { + peers: std::collections::HashMap::new(), + last_updated: SystemTime::now(), + version: default_version(), + }; + + // If no endpoints are configured, just return empty cache + if config.endpoints.is_empty() { + tracing::warn!("No endpoints configured, returning empty cache"); + return Ok(data); + } + + // Try to discover peers from configured endpoints + let discovery = InitialPeerDiscovery::with_endpoints(config.endpoints.clone()); + match discovery.fetch_peers().await { + Ok(peers) => { + tracing::info!("Successfully fetched {} peers from endpoints", peers.len()); + // Only add up to max_peers from the discovered peers + for peer in peers.into_iter().take(config.max_peers) { + data.peers.insert(peer.addr.to_string(), peer); + } + + // Create parent directory if it doesn't exist + if let Some(parent) = config.cache_file_path.parent() { + tracing::info!("Creating cache directory at {:?}", parent); + if let Err(e) = fs::create_dir_all(parent) { + tracing::warn!("Failed to create cache directory: {}", e); + } + } + + // Try to write the cache file immediately + match serde_json::to_string_pretty(&data) { + Ok(json) => { + tracing::info!("Writing {} peers to cache file", data.peers.len()); + if let Err(e) = fs::write(&config.cache_file_path, json) { + tracing::warn!("Failed to write cache file: {}", e); + } else { + tracing::info!("Successfully wrote cache file at {:?}", config.cache_file_path); + } + } + Err(e) => { + tracing::warn!("Failed to serialize cache data: {}", e); + } + } + + Ok(data) + } + Err(e) => { + tracing::warn!("Failed to fetch peers from endpoints: {}", e); + Ok(data) // Return empty cache on error + } + } + } + + async fn load_cache_data(cache_path: &PathBuf) -> Result { + // Try to open the file with read permissions + let mut file = match OpenOptions::new().read(true).open(cache_path) { + Ok(f) => f, + Err(e) => { + tracing::warn!("Failed to open cache file: {}", e); + return Err(Error::from(e)); + } + }; + + // Acquire shared lock for reading + if let Err(e) = Self::acquire_shared_lock(&file).await { + tracing::warn!("Failed to acquire shared lock: {}", e); + return Err(e); + } + + // Read the file contents + let mut contents = String::new(); + if let Err(e) = file.read_to_string(&mut contents) { + tracing::warn!("Failed to read cache file: {}", e); + return Err(Error::from(e)); + } + + // Parse the cache data + match serde_json::from_str::(&contents) { + Ok(data) => Ok(data), + Err(e) => { + tracing::warn!("Failed to parse cache data: {}", e); + Err(Error::Io(io::Error::new(io::ErrorKind::InvalidData, e))) + } + } + } + + pub async fn get_peers(&self) -> Vec { + let data = self.data.read().await; + data.peers.values().cloned().collect() + } + + pub async fn get_reliable_peers(&self) -> Vec { + let data = self.data.read().await; + let reliable_peers: Vec<_> = data + .peers + .values() + .filter(|peer| peer.success_count > peer.failure_count) + .cloned() + .collect(); + + // If we have no reliable peers and the cache file is not read-only, + // try to refresh from default endpoints + if reliable_peers.is_empty() + && !self + .cache_path + .metadata() + .map(|m| m.permissions().readonly()) + .unwrap_or(false) + { + drop(data); + if let Ok(new_data) = Self::fallback_to_default(&self.config).await { + let mut data = self.data.write().await; + *data = new_data; + return data + .peers + .values() + .filter(|peer| peer.success_count > peer.failure_count) + .cloned() + .collect(); + } + } + + reliable_peers + } + + pub async fn update_peer_status(&self, addr: &str, success: bool) -> Result<()> { + // Check if the file is read-only before attempting to modify + let is_readonly = self + .cache_path + .metadata() + .map(|m| m.permissions().readonly()) + .unwrap_or(false); + + if is_readonly { + tracing::warn!("Cannot update peer status: cache file is read-only"); + return Ok(()); + } + + let mut data = self.data.write().await; + + match addr.parse::() { + Ok(addr) => { + let peer = data + .peers + .entry(addr.to_string()) + .or_insert_with(|| BootstrapPeer::new(addr)); + peer.update_status(success); + self.save_to_disk(&data).await?; + Ok(()) + } + Err(e) => Err(Error::from(std::io::Error::new( + std::io::ErrorKind::InvalidInput, + format!("Invalid multiaddr: {}", e), + ))), + } + } + + pub async fn add_peer(&self, addr: Multiaddr) -> Result<()> { + // Check if the cache file is read-only before attempting any modifications + let is_readonly = self + .cache_path + .metadata() + .map(|m| m.permissions().readonly()) + .unwrap_or(false); + + if is_readonly { + tracing::warn!("Cannot add peer: cache file is read-only"); + return Ok(()); + } + + let mut data = self.data.write().await; + let addr_str = addr.to_string(); + + tracing::debug!( + "Adding peer {}, current peers: {}", + addr_str, + data.peers.len() + ); + + // If the peer already exists, just update its last_seen time + if let Some(peer) = data.peers.get_mut(&addr_str) { + tracing::debug!("Updating existing peer {}", addr_str); + peer.last_seen = SystemTime::now(); + return self.save_to_disk(&data).await; + } + + // Only add new peers if we haven't reached max_peers + if data.peers.len() < self.config.max_peers { + tracing::debug!("Adding new peer {} (under max_peers limit)", addr_str); + data.peers + .insert(addr_str.clone(), BootstrapPeer::new(addr)); + self.save_to_disk(&data).await?; + } else { + // If we're at max_peers, replace the oldest peer + if let Some((oldest_addr, oldest_peer)) = + data.peers.iter().min_by_key(|(_, peer)| peer.last_seen) + { + tracing::debug!( + "Replacing oldest peer {} (last seen: {:?}) with new peer {}", + oldest_addr, + oldest_peer.last_seen, + addr_str + ); + let oldest_addr = oldest_addr.clone(); + data.peers.remove(&oldest_addr); + data.peers + .insert(addr_str.clone(), BootstrapPeer::new(addr)); + self.save_to_disk(&data).await?; + } + } + + Ok(()) + } + + pub async fn remove_peer(&self, addr: &str) -> Result<()> { + // Check if the file is read-only before attempting to modify + let is_readonly = self + .cache_path + .metadata() + .map(|m| m.permissions().readonly()) + .unwrap_or(false); + + if is_readonly { + tracing::warn!("Cannot remove peer: cache file is read-only"); + return Ok(()); + } + + let mut data = self.data.write().await; + data.peers.remove(addr); + self.save_to_disk(&data).await?; + Ok(()) + } + + pub async fn cleanup_unreliable_peers(&self) -> Result<()> { + // Check if the file is read-only before attempting to modify + let is_readonly = self + .cache_path + .metadata() + .map(|m| m.permissions().readonly()) + .unwrap_or(false); + + if is_readonly { + tracing::warn!("Cannot cleanup unreliable peers: cache file is read-only"); + return Ok(()); + } + + let mut data = self.data.write().await; + let unreliable_peers: Vec = data + .peers + .iter() + .filter(|(_, peer)| !peer.is_reliable()) + .map(|(addr, _)| addr.clone()) + .collect(); + + for addr in unreliable_peers { + data.peers.remove(&addr); + } + + self.save_to_disk(&data).await?; + Ok(()) + } + + pub async fn cleanup_stale_peers(&self) -> Result<()> { + // Check if the file is read-only before attempting to modify + let is_readonly = self + .cache_path + .metadata() + .map(|m| m.permissions().readonly()) + .unwrap_or(false); + + if is_readonly { + tracing::warn!("Cannot cleanup stale peers: cache file is read-only"); + return Ok(()); + } + + let mut data = self.data.write().await; + let stale_peers: Vec = data + .peers + .iter() + .filter(|(_, peer)| { + if let Ok(elapsed) = peer.last_seen.elapsed() { + elapsed > PEER_EXPIRY_DURATION + } else { + true // If we can't get elapsed time, consider it stale + } + }) + .map(|(addr, _)| addr.clone()) + .collect(); + + for addr in stale_peers { + data.peers.remove(&addr); + } + + self.save_to_disk(&data).await?; + Ok(()) + } + + pub async fn save_to_disk(&self, data: &CacheData) -> Result<()> { + // Check if the file is read-only before attempting to write + let is_readonly = self + .cache_path + .metadata() + .map(|m| m.permissions().readonly()) + .unwrap_or(false); + + if is_readonly { + tracing::warn!("Cannot save to disk: cache file is read-only"); + return Ok(()); + } + + match self.atomic_write(data).await { + Ok(_) => Ok(()), + Err(e) => { + tracing::error!("Failed to save cache to disk: {}", e); + Err(e) + } + } + } + + async fn acquire_shared_lock(file: &File) -> Result<()> { + let file = file.try_clone().map_err(Error::from)?; + + tokio::task::spawn_blocking(move || file.try_lock_shared().map_err(Error::from)) + .await + .map_err(|e| { + Error::from(std::io::Error::new( + std::io::ErrorKind::Other, + format!("Failed to spawn blocking task: {}", e), + )) + })? + } + + async fn acquire_exclusive_lock(file: &File) -> Result<()> { + let mut backoff = Duration::from_millis(10); + let max_attempts = 5; + let mut attempts = 0; + + loop { + match file.try_lock_exclusive() { + Ok(_) => return Ok(()), + Err(_) if attempts >= max_attempts => { + return Err(Error::LockError); + } + Err(e) if e.kind() == io::ErrorKind::WouldBlock => { + attempts += 1; + tokio::time::sleep(backoff).await; + backoff *= 2; + } + Err(_) => return Err(Error::LockError), + } + } + } + + async fn atomic_write(&self, data: &CacheData) -> Result<()> { + // Create parent directory if it doesn't exist + if let Some(parent) = self.cache_path.parent() { + fs::create_dir_all(parent).map_err(Error::from)?; + } + + // Create a temporary file in the same directory as the cache file + let temp_file = NamedTempFile::new().map_err(Error::from)?; + + // Write data to temporary file + serde_json::to_writer_pretty(&temp_file, &data).map_err(Error::from)?; + + // Open the target file with proper permissions + let file = OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(&self.cache_path) + .map_err(Error::from)?; + + // Acquire exclusive lock + Self::acquire_exclusive_lock(&file).await?; + + // Perform atomic rename + temp_file.persist(&self.cache_path).map_err(|e| { + Error::from(std::io::Error::new( + std::io::ErrorKind::Other, + format!("Failed to persist cache file: {}", e), + )) + })?; + + // Lock will be automatically released when file is dropped + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + async fn create_test_store() -> (CacheStore, PathBuf) { + let temp_dir = tempdir().unwrap(); + let cache_file = temp_dir.path().join("cache.json"); + + let config = crate::BootstrapConfig::new( + vec![], // Empty endpoints to prevent fallback + 1500, + cache_file.clone(), + Duration::from_secs(60), + Duration::from_secs(10), + 3, + ); + + let store = CacheStore::new(config).await.unwrap(); + (store.clone(), store.cache_path.clone()) + } + + #[tokio::test] + async fn test_peer_update_and_save() { + let (store, _) = create_test_store().await; + let addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); + + // Manually add a peer without using fallback + { + let mut data = store.data.write().await; + data.peers + .insert(addr.to_string(), BootstrapPeer::new(addr.clone())); + store.save_to_disk(&data).await.unwrap(); + } + + store + .update_peer_status(&addr.to_string(), true) + .await + .unwrap(); + + let peers = store.get_peers().await; + assert_eq!(peers.len(), 1); + assert_eq!(peers[0].addr, addr); + assert_eq!(peers[0].success_count, 1); + assert_eq!(peers[0].failure_count, 0); + } + + #[tokio::test] + async fn test_peer_cleanup() { + let (store, _) = create_test_store().await; + let good_addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); + let bad_addr: Multiaddr = "/ip4/127.0.0.1/tcp/8081".parse().unwrap(); + + // Add peers + store.add_peer(good_addr.clone()).await.unwrap(); + store.add_peer(bad_addr.clone()).await.unwrap(); + + // Make one peer reliable and one unreliable + store + .update_peer_status(&good_addr.to_string(), true) + .await + .unwrap(); + for _ in 0..5 { + store + .update_peer_status(&bad_addr.to_string(), false) + .await + .unwrap(); + } + + // Clean up unreliable peers + store.cleanup_unreliable_peers().await.unwrap(); + + // Get all peers (not just reliable ones) + let peers = store.get_peers().await; + assert_eq!(peers.len(), 1); + assert_eq!(peers[0].addr, good_addr); + } + + #[tokio::test] + async fn test_stale_peer_cleanup() { + let (store, _) = create_test_store().await; + let addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); + + // Add a peer with more failures than successes + let mut peer = BootstrapPeer::new(addr.clone()); + peer.success_count = 1; + peer.failure_count = 5; + { + let mut data = store.data.write().await; + data.peers.insert(addr.to_string(), peer); + store.save_to_disk(&data).await.unwrap(); + } + + // Clean up unreliable peers + store.cleanup_unreliable_peers().await.unwrap(); + + // Should have no peers since the only peer was unreliable + let peers = store.get_reliable_peers().await; + assert_eq!(peers.len(), 0); + } + + #[tokio::test] + async fn test_concurrent_access() { + let (store, _) = create_test_store().await; + let store = Arc::new(store); + let addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); + + // Manually add a peer without using fallback + { + let mut data = store.data.write().await; + data.peers + .insert(addr.to_string(), BootstrapPeer::new(addr.clone())); + store.save_to_disk(&data).await.unwrap(); + } + + let mut handles = vec![]; + + // Spawn multiple tasks to update peer status concurrently + for i in 0..10 { + let store = Arc::clone(&store); + let addr = addr.clone(); + + handles.push(tokio::spawn(async move { + store + .update_peer_status(&addr.to_string(), i % 2 == 0) + .await + .unwrap(); + })); + } + + // Wait for all tasks to complete + for handle in handles { + handle.await.unwrap(); + } + + // Verify the final state - should have one peer + let peers = store.get_peers().await; + assert_eq!(peers.len(), 1); + + // The peer should have a mix of successes and failures + assert!(peers[0].success_count > 0); + assert!(peers[0].failure_count > 0); + } +} diff --git a/bootstrap_cache/src/circuit_breaker.rs b/bootstrap_cache/src/circuit_breaker.rs new file mode 100644 index 0000000000..2c19f94862 --- /dev/null +++ b/bootstrap_cache/src/circuit_breaker.rs @@ -0,0 +1,208 @@ +use std::collections::HashMap; +use std::sync::Arc; +use std::time::{Duration, Instant}; +use tokio::sync::RwLock; + +#[derive(Debug, Clone)] +pub struct CircuitBreakerConfig { + max_failures: u32, + reset_timeout: Duration, + min_backoff: Duration, + max_backoff: Duration, +} + +impl Default for CircuitBreakerConfig { + fn default() -> Self { + Self { + max_failures: 5, + reset_timeout: Duration::from_secs(60), + min_backoff: Duration::from_millis(500), + max_backoff: Duration::from_secs(30), + } + } +} + +#[derive(Debug)] +struct EndpointState { + failures: u32, + last_failure: Instant, + last_attempt: Instant, + backoff_duration: Duration, +} + +impl EndpointState { + fn new(min_backoff: Duration) -> Self { + Self { + failures: 0, + last_failure: Instant::now(), + last_attempt: Instant::now(), + backoff_duration: min_backoff, + } + } + + fn record_failure(&mut self, max_backoff: Duration) { + self.failures += 1; + self.last_failure = Instant::now(); + self.last_attempt = Instant::now(); + // Exponential backoff with max limit + self.backoff_duration = std::cmp::min(self.backoff_duration * 2, max_backoff); + } + + fn record_success(&mut self, min_backoff: Duration) { + self.failures = 0; + self.backoff_duration = min_backoff; + } + + fn is_open(&self, max_failures: u32, reset_timeout: Duration) -> bool { + if self.failures >= max_failures { + // Check if we've waited long enough since the last failure + if self.last_failure.elapsed() > reset_timeout { + false // Circuit is half-open, allow retry + } else { + true // Circuit is open, block requests + } + } else { + false // Circuit is closed, allow requests + } + } + + fn should_retry(&self) -> bool { + self.last_attempt.elapsed() >= self.backoff_duration + } +} + +#[derive(Debug, Clone)] +pub struct CircuitBreaker { + states: Arc>>, + config: CircuitBreakerConfig, +} + +impl CircuitBreaker { + pub fn new() -> Self { + Self { + states: Arc::new(RwLock::new(HashMap::new())), + config: CircuitBreakerConfig::default(), + } + } + + pub fn with_config(config: CircuitBreakerConfig) -> Self { + Self { + states: Arc::new(RwLock::new(HashMap::new())), + config, + } + } + + pub async fn check_endpoint(&self, endpoint: &str) -> bool { + let mut states = self.states.write().await; + let state = states + .entry(endpoint.to_string()) + .or_insert_with(|| EndpointState::new(self.config.min_backoff)); + + !(state.is_open(self.config.max_failures, self.config.reset_timeout) && !state.should_retry()) + } + + pub async fn record_success(&self, endpoint: &str) { + let mut states = self.states.write().await; + if let Some(state) = states.get_mut(endpoint) { + state.record_success(self.config.min_backoff); + } + } + + pub async fn record_failure(&self, endpoint: &str) { + let mut states = self.states.write().await; + let state = states + .entry(endpoint.to_string()) + .or_insert_with(|| EndpointState::new(self.config.min_backoff)); + state.record_failure(self.config.max_backoff); + } + + pub async fn get_backoff_duration(&self, endpoint: &str) -> Duration { + let states = self.states.read().await; + states + .get(endpoint) + .map(|state| state.backoff_duration) + .unwrap_or(self.config.min_backoff) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tokio::time::sleep; + + fn test_config() -> CircuitBreakerConfig { + CircuitBreakerConfig { + max_failures: 3, + reset_timeout: Duration::from_millis(100), // Much shorter for testing + min_backoff: Duration::from_millis(10), + max_backoff: Duration::from_millis(100), + } + } + + #[tokio::test] + async fn test_circuit_breaker_basic() { + let cb = CircuitBreaker::with_config(test_config()); + let endpoint = "http://test.endpoint"; + + // Initially should allow requests + assert!(cb.check_endpoint(endpoint).await); + + // Record failures + for _ in 0..test_config().max_failures { + cb.record_failure(endpoint).await; + } + + // Circuit should be open + assert!(!cb.check_endpoint(endpoint).await); + + // Record success should reset + cb.record_success(endpoint).await; + assert!(cb.check_endpoint(endpoint).await); + } + + #[tokio::test] + async fn test_backoff_duration() { + let config = test_config(); + let cb = CircuitBreaker::with_config(config.clone()); + let endpoint = "http://test.endpoint"; + + assert_eq!(cb.get_backoff_duration(endpoint).await, config.min_backoff); + + // Record a failure + cb.record_failure(endpoint).await; + assert_eq!( + cb.get_backoff_duration(endpoint).await, + config.min_backoff * 2 + ); + + // Record another failure + cb.record_failure(endpoint).await; + assert_eq!( + cb.get_backoff_duration(endpoint).await, + config.min_backoff * 4 + ); + + // Success should reset backoff + cb.record_success(endpoint).await; + assert_eq!(cb.get_backoff_duration(endpoint).await, config.min_backoff); + } + + #[tokio::test] + async fn test_circuit_half_open() { + let config = test_config(); + let cb = CircuitBreaker::with_config(config.clone()); + let endpoint = "http://test.endpoint"; + + // Open the circuit + for _ in 0..config.max_failures { + cb.record_failure(endpoint).await; + } + assert!(!cb.check_endpoint(endpoint).await); + + // Wait for reset timeout + sleep(config.reset_timeout + Duration::from_millis(10)).await; + + // Circuit should be half-open now + assert!(cb.check_endpoint(endpoint).await); + } +} diff --git a/bootstrap_cache/src/config.rs b/bootstrap_cache/src/config.rs new file mode 100644 index 0000000000..17d3f6a377 --- /dev/null +++ b/bootstrap_cache/src/config.rs @@ -0,0 +1,285 @@ +// Copyright 2024 MaidSafe.net limited. +// +// This SAFE Network Software is licensed to you under The General Public License (GPL), version 3. +// Unless required by applicable law or agreed to in writing, the SAFE Network Software distributed +// under the GPL Licence is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. Please review the Licences for the specific language governing +// permissions and limitations relating to use of the SAFE Network Software. + +use std::path::{Path, PathBuf}; +use std::time::Duration; +use std::fs; + +/// Configuration for the bootstrap cache +#[derive(Clone, Debug)] +pub struct BootstrapConfig { + /// List of bootstrap endpoints to fetch peer information from + pub endpoints: Vec, + /// Maximum number of peers to keep in the cache + pub max_peers: usize, + /// Path to the bootstrap cache file + pub cache_file_path: PathBuf, + /// How often to update the cache (in seconds) + pub update_interval: Duration, + /// Request timeout for endpoint queries + pub request_timeout: Duration, + /// Maximum retries per endpoint + pub max_retries: u32, +} + +impl Default for BootstrapConfig { + fn default() -> Self { + Self { + endpoints: vec![ + "https://sn-testnet.s3.eu-west-2.amazonaws.com/bootstrap_cache.json".to_string(), + "https://sn-testnet.s3.eu-west-2.amazonaws.com/network-contacts".to_string(), + "https://sn-node1.s3.eu-west-2.amazonaws.com/peers".to_string(), + "https://sn-node2.s3.eu-west-2.amazonaws.com/peers".to_string(), + ], + max_peers: 1500, + cache_file_path: default_cache_path(), + update_interval: Duration::from_secs(60), + request_timeout: Duration::from_secs(10), + max_retries: 3, + } + } +} + +impl BootstrapConfig { + /// Creates a new BootstrapConfig with custom endpoints + pub fn with_endpoints(endpoints: Vec) -> Self { + Self { + endpoints, + ..Default::default() + } + } + + /// Creates a new BootstrapConfig with a custom cache file path + pub fn with_cache_path>(path: P) -> Self { + Self { + cache_file_path: path.as_ref().to_path_buf(), + ..Default::default() + } + } + + /// Creates a new BootstrapConfig with custom settings + pub fn new( + endpoints: Vec, + max_peers: usize, + cache_file_path: PathBuf, + update_interval: Duration, + request_timeout: Duration, + max_retries: u32, + ) -> Self { + Self { + endpoints, + max_peers, + cache_file_path, + update_interval, + request_timeout, + max_retries, + } + } +} + +/// Returns the default path for the bootstrap cache file +fn default_cache_path() -> PathBuf { + tracing::info!("Determining default cache path"); + let system_path = if cfg!(target_os = "macos") { + tracing::debug!("OS: macOS"); + // Try user's Library first, then fall back to system Library + if let Some(home) = dirs::home_dir() { + let user_library = home.join("Library/Application Support/Safe/bootstrap_cache.json"); + tracing::info!("Attempting to use user's Library path: {:?}", user_library); + if let Some(parent) = user_library.parent() { + tracing::debug!("Creating directory: {:?}", parent); + match fs::create_dir_all(parent) { + Ok(_) => { + tracing::debug!("Successfully created directory structure"); + // Check if we can write to the directory + match tempfile::NamedTempFile::new_in(parent) { + Ok(temp_file) => { + temp_file.close().ok(); + tracing::info!("Successfully verified write access to {:?}", parent); + return user_library; + } + Err(e) => { + tracing::warn!("Cannot write to user's Library: {}", e); + } + } + } + Err(e) => { + tracing::warn!("Failed to create user's Library directory: {}", e); + } + } + } + } + // Fall back to system Library + tracing::info!("Falling back to system Library path"); + PathBuf::from("/Library/Application Support/Safe/bootstrap_cache.json") + } else if cfg!(target_os = "linux") { + tracing::debug!("OS: Linux"); + // On Linux, try /var/lib/safe first, then fall back to /var/safe + let primary_path = PathBuf::from("/var/lib/safe/bootstrap_cache.json"); + tracing::info!("Attempting to use primary Linux path: {:?}", primary_path); + if let Some(parent) = primary_path.parent() { + tracing::debug!("Creating directory: {:?}", parent); + match fs::create_dir_all(parent) { + Ok(_) => { + tracing::debug!("Successfully created directory structure"); + // Check if we can write to the directory + match tempfile::NamedTempFile::new_in(parent) { + Ok(temp_file) => { + temp_file.close().ok(); + tracing::info!("Successfully verified write access to {:?}", parent); + return primary_path; + } + Err(e) => { + tracing::warn!("Cannot write to {:?}: {}", parent, e); + } + } + } + Err(e) => { + tracing::warn!("Failed to create Linux primary directory: {}", e); + } + } + } + tracing::info!("Falling back to secondary Linux path: /var/safe"); + PathBuf::from("/var/safe/bootstrap_cache.json") + } else if cfg!(target_os = "windows") { + tracing::debug!("OS: Windows"); + // On Windows, try LocalAppData first, then fall back to ProgramData + if let Some(local_app_data) = dirs::data_local_dir() { + let local_path = local_app_data.join("Safe").join("bootstrap_cache.json"); + tracing::info!("Attempting to use Windows LocalAppData path: {:?}", local_path); + if let Some(parent) = local_path.parent() { + tracing::debug!("Creating directory: {:?}", parent); + if fs::create_dir_all(parent).is_ok() { + // Check if we can write to the directory + if let Ok(temp_file) = tempfile::NamedTempFile::new_in(parent) { + temp_file.close().ok(); + tracing::info!("Successfully created and verified Windows LocalAppData path"); + return local_path; + } + } + } + } + tracing::info!("Falling back to Windows ProgramData path"); + PathBuf::from(r"C:\ProgramData\Safe\bootstrap_cache.json") + } else { + tracing::debug!("Unknown OS, using current directory"); + PathBuf::from("bootstrap_cache.json") + }; + + // Try to create the system directory first + if let Some(parent) = system_path.parent() { + tracing::debug!("Attempting to create system directory: {:?}", parent); + if fs::create_dir_all(parent).is_ok() { + // Check if we can write to the directory + match tempfile::NamedTempFile::new_in(parent) { + Ok(temp_file) => { + temp_file.close().ok(); + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + match fs::set_permissions(parent, fs::Permissions::from_mode(0o755)) { + Ok(_) => tracing::debug!("Successfully set directory permissions"), + Err(e) => tracing::warn!("Failed to set cache directory permissions: {}", e), + } + } + tracing::info!("Successfully created and verified system directory"); + return system_path; + } + Err(e) => { + tracing::warn!("Cannot write to system directory: {}", e); + } + } + } else { + tracing::warn!("Failed to create system directory"); + } + } + + // If system directory is not writable, fall back to user's home directory + if let Some(home) = dirs::home_dir() { + let user_path = home.join(".safe").join("bootstrap_cache.json"); + tracing::info!("Attempting to use home directory fallback: {:?}", user_path); + if let Some(parent) = user_path.parent() { + tracing::debug!("Creating home directory: {:?}", parent); + if fs::create_dir_all(parent).is_ok() { + tracing::info!("Successfully created home directory"); + return user_path; + } + } + } + + // Last resort: use current directory + tracing::warn!("All directory attempts failed, using current directory"); + PathBuf::from("bootstrap_cache.json") +} + +#[cfg(test)] +mod tests { + use super::*; + use std::time::Duration; + + #[test] + fn test_default_config() { + let config = BootstrapConfig::default(); + assert_eq!(config.endpoints.len(), 4); + assert_eq!( + config.endpoints[0], + "https://sn-testnet.s3.eu-west-2.amazonaws.com/bootstrap_cache.json" + ); + assert_eq!( + config.endpoints[1], + "https://sn-testnet.s3.eu-west-2.amazonaws.com/network-contacts" + ); + assert_eq!( + config.endpoints[2], + "https://sn-node1.s3.eu-west-2.amazonaws.com/peers" + ); + assert_eq!( + config.endpoints[3], + "https://sn-node2.s3.eu-west-2.amazonaws.com/peers" + ); + assert_eq!(config.max_peers, 1500); + assert_eq!(config.update_interval, Duration::from_secs(60)); + assert_eq!(config.request_timeout, Duration::from_secs(10)); + assert_eq!(config.max_retries, 3); + } + + #[test] + fn test_custom_endpoints() { + let endpoints = vec!["http://custom.endpoint/cache".to_string()]; + let config = BootstrapConfig::with_endpoints(endpoints.clone()); + assert_eq!(config.endpoints, endpoints); + } + + #[test] + fn test_custom_cache_path() { + let path = PathBuf::from("/custom/path/cache.json"); + let config = BootstrapConfig::with_cache_path(&path); + assert_eq!(config.cache_file_path, path); + } + + #[test] + fn test_new_config() { + let endpoints = vec!["http://custom.endpoint/cache".to_string()]; + let path = PathBuf::from("/custom/path/cache.json"); + let config = BootstrapConfig::new( + endpoints.clone(), + 2000, + path.clone(), + Duration::from_secs(120), + Duration::from_secs(5), + 5, + ); + + assert_eq!(config.endpoints, endpoints); + assert_eq!(config.max_peers, 2000); + assert_eq!(config.cache_file_path, path); + assert_eq!(config.update_interval, Duration::from_secs(120)); + assert_eq!(config.request_timeout, Duration::from_secs(5)); + assert_eq!(config.max_retries, 5); + } +} diff --git a/bootstrap_cache/src/error.rs b/bootstrap_cache/src/error.rs new file mode 100644 index 0000000000..a4b3847cfc --- /dev/null +++ b/bootstrap_cache/src/error.rs @@ -0,0 +1,39 @@ +// Copyright 2024 MaidSafe.net limited. +// +// This SAFE Network Software is licensed to you under The General Public License (GPL), version 3. +// Unless required by applicable law or agreed to in writing, the SAFE Network Software distributed +// under the GPL Licence is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. Please review the Licences for the specific language governing +// permissions and limitations relating to use of the SAFE Network Software. + +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum Error { + #[error("No peers found: {0}")] + NoPeersFound(String), + #[error("Invalid response: {0}")] + InvalidResponse(String), + #[error("IO error: {0}")] + Io(#[from] std::io::Error), + #[error("JSON error: {0}")] + Json(#[from] serde_json::Error), + #[error("Request error: {0}")] + Request(#[from] reqwest::Error), + #[error("Failed to acquire or release file lock")] + LockError, + #[error("Cache file is corrupted: {0}")] + CacheCorrupted(serde_json::Error), + #[error("Timeout error: {0}")] + Timeout(#[from] tokio::time::error::Elapsed), + #[error("Circuit breaker open for endpoint: {0}")] + CircuitBreakerOpen(String), + #[error("Endpoint temporarily unavailable: {0}")] + EndpointUnavailable(String), + #[error("Request failed: {0}")] + RequestFailed(String), + #[error("Request timed out")] + RequestTimeout, +} + +pub type Result = std::result::Result; diff --git a/bootstrap_cache/src/initial_peer_discovery.rs b/bootstrap_cache/src/initial_peer_discovery.rs new file mode 100644 index 0000000000..da1441b161 --- /dev/null +++ b/bootstrap_cache/src/initial_peer_discovery.rs @@ -0,0 +1,424 @@ +// Copyright 2024 MaidSafe.net limited. +// +// This SAFE Network Software is licensed to you under The General Public License (GPL), version 3. +// Unless required by applicable law or agreed to in writing, the SAFE Network Software distributed +// under the GPL Licence is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. Please review the Licences for the specific language governing +// permissions and limitations relating to use of the SAFE Network Software. + +use crate::{ + circuit_breaker::{CircuitBreaker, CircuitBreakerConfig}, + BootstrapEndpoints, BootstrapPeer, Error, Result, +}; +use libp2p::Multiaddr; +use reqwest::Client; +use tokio::time::timeout; +use tracing::{info, warn}; + +const DEFAULT_JSON_ENDPOINT: &str = + "https://sn-testnet.s3.eu-west-2.amazonaws.com/network-contacts"; + +const DEFAULT_BOOTSTRAP_ENDPOINTS: &[&str] = &[ + DEFAULT_JSON_ENDPOINT, +]; + +const FETCH_TIMEOUT_SECS: u64 = 30; + +/// Discovers initial peers from a list of endpoints +pub struct InitialPeerDiscovery { + endpoints: Vec, + client: Client, + circuit_breaker: CircuitBreaker, +} + +impl Default for InitialPeerDiscovery { + fn default() -> Self { + Self::new() + } +} + +impl InitialPeerDiscovery { + pub fn new() -> Self { + Self { + endpoints: DEFAULT_BOOTSTRAP_ENDPOINTS + .iter() + .map(|s| s.to_string()) + .collect(), + client: Client::new(), + circuit_breaker: CircuitBreaker::new(), + } + } + + pub fn with_endpoints(endpoints: Vec) -> Self { + Self { + endpoints, + client: Client::new(), + circuit_breaker: CircuitBreaker::new(), + } + } + + pub fn with_config( + endpoints: Vec, + circuit_breaker_config: CircuitBreakerConfig, + ) -> Self { + Self { + endpoints, + client: Client::new(), + circuit_breaker: CircuitBreaker::with_config(circuit_breaker_config), + } + } + + /// Load endpoints from a JSON file + pub async fn from_json(json_str: &str) -> Result { + let endpoints: BootstrapEndpoints = serde_json::from_str(json_str)?; + Ok(Self { + endpoints: endpoints.peers, + client: Client::new(), + circuit_breaker: CircuitBreaker::new(), + }) + } + + /// Fetch peers from all configured endpoints + pub async fn fetch_peers(&self) -> Result> { + info!("Starting peer discovery from {} endpoints: {:?}", self.endpoints.len(), self.endpoints); + let mut peers = Vec::new(); + let mut last_error = None; + + for endpoint in &self.endpoints { + info!("Attempting to fetch peers from endpoint: {}", endpoint); + match self.fetch_from_endpoint(endpoint).await { + Ok(mut endpoint_peers) => { + info!( + "Successfully fetched {} peers from {}. First few peers: {:?}", + endpoint_peers.len(), + endpoint, + endpoint_peers.iter().take(3).collect::>() + ); + peers.append(&mut endpoint_peers); + } + Err(e) => { + warn!("Failed to fetch peers from {}: {}", endpoint, e); + last_error = Some(e); + } + } + } + + if peers.is_empty() { + if let Some(e) = last_error { + warn!("No peers found from any endpoint. Last error: {}", e); + Err(Error::NoPeersFound(format!( + "No valid peers found from any endpoint: {}", + e + ))) + } else { + warn!("No peers found from any endpoint and no errors reported"); + Err(Error::NoPeersFound( + "No valid peers found from any endpoint".to_string(), + )) + } + } else { + info!( + "Successfully discovered {} total peers. First few: {:?}", + peers.len(), + peers.iter().take(3).collect::>() + ); + Ok(peers) + } + } + + async fn fetch_from_endpoint(&self, endpoint: &str) -> Result> { + // Check circuit breaker state + if !self.circuit_breaker.check_endpoint(endpoint).await { + warn!("Circuit breaker is open for endpoint: {}", endpoint); + return Err(Error::CircuitBreakerOpen(endpoint.to_string())); + } + + // Get backoff duration and wait if necessary + let backoff = self.circuit_breaker.get_backoff_duration(endpoint).await; + if !backoff.is_zero() { + info!("Backing off for {:?} before trying endpoint: {}", backoff, endpoint); + } + tokio::time::sleep(backoff).await; + + info!("Fetching peers from endpoint: {}", endpoint); + // Get backoff duration and wait if necessary + let result = async { + info!("Sending HTTP request to {}", endpoint); + let response = match timeout( + std::time::Duration::from_secs(FETCH_TIMEOUT_SECS), + self.client.get(endpoint).send(), + ) + .await { + Ok(resp) => match resp { + Ok(r) => { + info!("Got response with status: {}", r.status()); + r + } + Err(e) => { + warn!("HTTP request failed: {}", e); + return Err(Error::RequestFailed(e.to_string())); + } + }, + Err(_) => { + warn!("Request timed out after {} seconds", FETCH_TIMEOUT_SECS); + return Err(Error::RequestTimeout); + } + }; + + let content = match response.text().await { + Ok(c) => { + info!("Received response content length: {}", c.len()); + if c.len() < 1000 { // Only log if content is not too large + info!("Response content: {}", c); + } + c + } + Err(e) => { + warn!("Failed to get response text: {}", e); + return Err(Error::InvalidResponse(format!("Failed to get response text: {}", e))); + } + }; + + // Try parsing as JSON first + if content.trim().starts_with('{') { + info!("Attempting to parse response as JSON"); + match serde_json::from_str::(&content) { + Ok(json_endpoints) => { + info!("Successfully parsed JSON response with {} peers", json_endpoints.peers.len()); + let peers = json_endpoints + .peers + .into_iter() + .filter_map(|addr| match addr.parse::() { + Ok(addr) => Some(BootstrapPeer::new(addr)), + Err(e) => { + warn!("Failed to parse multiaddr {}: {}", addr, e); + None + } + }) + .collect::>(); + + if peers.is_empty() { + warn!("No valid peers found in JSON response"); + Err(Error::NoPeersFound( + "No valid peers found in JSON response".to_string(), + )) + } else { + info!("Successfully parsed {} valid peers from JSON", peers.len()); + Ok(peers) + } + } + Err(e) => { + warn!("Failed to parse JSON response: {}", e); + Err(Error::InvalidResponse(format!( + "Invalid JSON format: {}", + e + ))) + } + } + } else { + info!("Attempting to parse response as plain text"); + // Try parsing as plain text with one multiaddr per line + let peers = content + .lines() + .filter(|line| !line.trim().is_empty()) + .filter_map(|line| match line.trim().parse::() { + Ok(addr) => Some(BootstrapPeer::new(addr)), + Err(e) => { + warn!("Failed to parse multiaddr {}: {}", line, e); + None + } + }) + .collect::>(); + + if peers.is_empty() { + warn!("No valid peers found in plain text response"); + Err(Error::NoPeersFound( + "No valid peers found in plain text response".to_string(), + )) + } else { + info!("Successfully parsed {} valid peers from plain text", peers.len()); + Ok(peers) + } + } + } + .await; + + match result { + Ok(peers) => { + info!("Successfully fetched {} peers from {}", peers.len(), endpoint); + self.circuit_breaker.record_success(endpoint).await; + Ok(peers) + } + Err(e) => { + warn!("Failed to fetch peers from {}: {}", endpoint, e); + self.circuit_breaker.record_failure(endpoint).await; + Err(e) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use wiremock::{ + matchers::{method, path}, + Mock, MockServer, ResponseTemplate, + }; + + #[tokio::test] + async fn test_fetch_peers() { + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/")) + .respond_with( + ResponseTemplate::new(200) + .set_body_string("/ip4/127.0.0.1/tcp/8080\n/ip4/127.0.0.2/tcp/8080"), + ) + .mount(&mock_server) + .await; + + let mut discovery = InitialPeerDiscovery::new(); + discovery.endpoints = vec![mock_server.uri()]; + + let peers = discovery.fetch_peers().await.unwrap(); + assert_eq!(peers.len(), 2); + + let addr1: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); + let addr2: Multiaddr = "/ip4/127.0.0.2/tcp/8080".parse().unwrap(); + assert!(peers.iter().any(|p| p.addr == addr1)); + assert!(peers.iter().any(|p| p.addr == addr2)); + } + + #[tokio::test] + async fn test_endpoint_failover() { + let mock_server1 = MockServer::start().await; + let mock_server2 = MockServer::start().await; + + // First endpoint fails + Mock::given(method("GET")) + .and(path("/")) + .respond_with(ResponseTemplate::new(500)) + .mount(&mock_server1) + .await; + + // Second endpoint succeeds + Mock::given(method("GET")) + .and(path("/")) + .respond_with(ResponseTemplate::new(200).set_body_string("/ip4/127.0.0.1/tcp/8080")) + .mount(&mock_server2) + .await; + + let mut discovery = InitialPeerDiscovery::new(); + discovery.endpoints = vec![mock_server1.uri(), mock_server2.uri()]; + + let peers = discovery.fetch_peers().await.unwrap(); + assert_eq!(peers.len(), 1); + + let addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); + assert_eq!(peers[0].addr, addr); + } + + #[tokio::test] + async fn test_invalid_multiaddr() { + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/")) + .respond_with( + ResponseTemplate::new(200).set_body_string( + "/ip4/127.0.0.1/tcp/8080\ninvalid-addr\n/ip4/127.0.0.2/tcp/8080", + ), + ) + .mount(&mock_server) + .await; + + let mut discovery = InitialPeerDiscovery::new(); + discovery.endpoints = vec![mock_server.uri()]; + + let peers = discovery.fetch_peers().await.unwrap(); + let valid_addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); + assert_eq!(peers[0].addr, valid_addr); + } + + #[tokio::test] + async fn test_empty_response() { + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/")) + .respond_with(ResponseTemplate::new(200).set_body_string("")) + .mount(&mock_server) + .await; + + let mut discovery = InitialPeerDiscovery::new(); + discovery.endpoints = vec![mock_server.uri()]; + + let result = discovery.fetch_peers().await; + assert!(matches!(result, Err(Error::NoPeersFound(_)))); + } + + #[tokio::test] + async fn test_whitespace_and_empty_lines() { + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/")) + .respond_with( + ResponseTemplate::new(200).set_body_string("\n \n/ip4/127.0.0.1/tcp/8080\n \n"), + ) + .mount(&mock_server) + .await; + + let mut discovery = InitialPeerDiscovery::new(); + discovery.endpoints = vec![mock_server.uri()]; + + let peers = discovery.fetch_peers().await.unwrap(); + assert_eq!(peers.len(), 1); + + let addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); + assert_eq!(peers[0].addr, addr); + } + + #[tokio::test] + async fn test_default_endpoints() { + let discovery = InitialPeerDiscovery::new(); + assert_eq!(discovery.endpoints.len(), 1); + assert_eq!( + discovery.endpoints[0], + "https://sn-testnet.s3.eu-west-2.amazonaws.com/network-contacts" + ); + } + + #[tokio::test] + async fn test_custom_endpoints() { + let endpoints = vec!["http://example.com".to_string()]; + let discovery = InitialPeerDiscovery::with_endpoints(endpoints.clone()); + assert_eq!(discovery.endpoints, endpoints); + } + + #[tokio::test] + async fn test_json_endpoints() { + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/")) + .respond_with(ResponseTemplate::new(200).set_body_string( + r#"{"peers": ["/ip4/127.0.0.1/tcp/8080", "/ip4/127.0.0.2/tcp/8080"]}"#, + )) + .mount(&mock_server) + .await; + + let mut discovery = InitialPeerDiscovery::new(); + discovery.endpoints = vec![mock_server.uri()]; + + let peers = discovery.fetch_peers().await.unwrap(); + assert_eq!(peers.len(), 2); + + let addr1: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); + let addr2: Multiaddr = "/ip4/127.0.0.2/tcp/8080".parse().unwrap(); + assert!(peers.iter().any(|p| p.addr == addr1)); + assert!(peers.iter().any(|p| p.addr == addr2)); + } +} diff --git a/bootstrap_cache/src/lib.rs b/bootstrap_cache/src/lib.rs new file mode 100644 index 0000000000..23bdaf6cf0 --- /dev/null +++ b/bootstrap_cache/src/lib.rs @@ -0,0 +1,115 @@ +// Copyright 2024 MaidSafe.net limited. +// +// This SAFE Network Software is licensed to you under The General Public License (GPL), version 3. +// Unless required by applicable law or agreed to in writing, the SAFE Network Software distributed +// under the GPL Licence is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. Please review the Licences for the specific language governing +// permissions and limitations relating to use of the SAFE Network Software. + +mod cache_store; +mod circuit_breaker; +pub mod config; +mod error; +mod initial_peer_discovery; + +use libp2p::Multiaddr; +use serde::{Deserialize, Serialize}; +use std::{fmt, time::SystemTime}; +use thiserror::Error; + +pub use cache_store::CacheStore; +pub use config::BootstrapConfig; +pub use error::{Error, Result}; +pub use initial_peer_discovery::InitialPeerDiscovery; + +/// Structure representing a list of bootstrap endpoints +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct BootstrapEndpoints { + /// List of peer multiaddresses + pub peers: Vec, + /// Optional metadata about the endpoints + #[serde(default)] + pub metadata: EndpointMetadata, +} + +/// Metadata about bootstrap endpoints +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct EndpointMetadata { + /// When the endpoints were last updated + #[serde(default = "default_last_updated")] + pub last_updated: String, + /// Optional description of the endpoints + #[serde(default)] + pub description: String, +} + +fn default_last_updated() -> String { + chrono::Utc::now().to_rfc3339() +} + +impl Default for EndpointMetadata { + fn default() -> Self { + Self { + last_updated: default_last_updated(), + description: String::new(), + } + } +} + +/// A peer that can be used for bootstrapping into the network +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct BootstrapPeer { + /// The multiaddress of the peer + pub addr: Multiaddr, + /// The number of successful connections to this peer + pub success_count: u32, + /// The number of failed connection attempts to this peer + pub failure_count: u32, + /// The last time this peer was successfully contacted + pub last_seen: SystemTime, +} + +impl BootstrapPeer { + pub fn new(addr: Multiaddr) -> Self { + Self { + addr, + success_count: 0, + failure_count: 0, + last_seen: SystemTime::now(), + } + } + + pub fn update_status(&mut self, success: bool) { + if success { + self.success_count += 1; + self.last_seen = SystemTime::now(); + } else { + self.failure_count += 1; + } + } + + pub fn is_reliable(&self) -> bool { + // A peer is considered reliable if it has more successes than failures + self.success_count > self.failure_count + } +} + +impl fmt::Display for BootstrapPeer { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "BootstrapPeer {{ addr: {}, last_seen: {:?}, success: {}, failure: {} }}", + self.addr, self.last_seen, self.success_count, self.failure_count + ) + } +} + +/// Creates a new bootstrap cache with default configuration +pub async fn new() -> Result { + CacheStore::new(BootstrapConfig::default()).await +} + +/// Creates a new bootstrap cache with custom configuration +pub async fn with_config(config: BootstrapConfig) -> Result { + CacheStore::new(config).await +} diff --git a/bootstrap_cache/tests/cache_tests.rs b/bootstrap_cache/tests/cache_tests.rs new file mode 100644 index 0000000000..186eaa263a --- /dev/null +++ b/bootstrap_cache/tests/cache_tests.rs @@ -0,0 +1,241 @@ +use bootstrap_cache::{BootstrapConfig, CacheStore}; +use libp2p::Multiaddr; +use std::time::Duration; +use tempfile::TempDir; +use tokio::time::sleep; + +#[tokio::test] +async fn test_cache_store_operations() -> Result<(), Box> { + let temp_dir = TempDir::new()?; + let cache_path = temp_dir.path().join("cache.json"); + + // Create cache store with config + let config = BootstrapConfig { + cache_file_path: cache_path.clone(), + ..Default::default() + }; + let cache_store = CacheStore::new(config).await?; + + // Test adding and retrieving peers + let addr: Multiaddr = + "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" + .parse()?; + cache_store.add_peer(addr.clone()).await?; + cache_store + .update_peer_status(&addr.to_string(), true) + .await?; + + let peers = cache_store.get_reliable_peers().await; + assert!(!peers.is_empty(), "Cache should contain the added peer"); + assert!( + peers.iter().any(|p| p.addr == addr), + "Cache should contain our specific peer" + ); + + Ok(()) +} + +#[tokio::test] +async fn test_cache_persistence() -> Result<(), Box> { + let temp_dir = TempDir::new()?; + let cache_path = temp_dir.path().join("cache.json"); + + // Create first cache store + let config = BootstrapConfig { + cache_file_path: cache_path.clone(), + ..Default::default() + }; + let cache_store1 = CacheStore::new(config.clone()).await?; + + // Add a peer and mark it as reliable + let addr: Multiaddr = + "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" + .parse()?; + cache_store1.add_peer(addr.clone()).await?; + cache_store1 + .update_peer_status(&addr.to_string(), true) + .await?; + + // Create a new cache store with the same path + let cache_store2 = CacheStore::new(config).await?; + let peers = cache_store2.get_reliable_peers().await; + + assert!(!peers.is_empty(), "Cache should persist across instances"); + assert!( + peers.iter().any(|p| p.addr == addr), + "Specific peer should persist" + ); + + Ok(()) +} + +#[tokio::test] +async fn test_cache_reliability_tracking() -> Result<(), Box> { + let temp_dir = TempDir::new()?; + let cache_path = temp_dir.path().join("cache.json"); + + let config = BootstrapConfig { + cache_file_path: cache_path, + ..Default::default() + }; + let cache_store = CacheStore::new(config).await?; + + let addr: Multiaddr = + "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" + .parse()?; + cache_store.add_peer(addr.clone()).await?; + + // Test successful connections + for _ in 0..3 { + cache_store + .update_peer_status(&addr.to_string(), true) + .await?; + } + + let peers = cache_store.get_reliable_peers().await; + assert!( + peers.iter().any(|p| p.addr == addr), + "Peer should be reliable after successful connections" + ); + + // Test failed connections + for _ in 0..5 { + cache_store + .update_peer_status(&addr.to_string(), false) + .await?; + } + + let peers = cache_store.get_reliable_peers().await; + assert!( + !peers.iter().any(|p| p.addr == addr), + "Peer should not be reliable after failed connections" + ); + + Ok(()) +} + +#[tokio::test] +async fn test_cache_max_peers() -> Result<(), Box> { + let _ = tracing_subscriber::fmt() + .with_env_filter("bootstrap_cache=debug") + .try_init(); + + let temp_dir = TempDir::new()?; + let cache_path = temp_dir.path().join("cache.json"); + + // Create cache with small max_peers limit + let config = BootstrapConfig { + cache_file_path: cache_path, + max_peers: 2, + ..Default::default() + }; + let cache_store = CacheStore::new(config).await?; + + // Add three peers with distinct timestamps + let mut addresses = Vec::new(); + for i in 1..=3 { + let addr: Multiaddr = format!("/ip4/127.0.0.1/udp/808{}/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UER{}", i, i).parse()?; + addresses.push(addr.clone()); + cache_store.add_peer(addr).await?; + // Add a delay to ensure distinct timestamps + sleep(Duration::from_millis(100)).await; + } + + let peers = cache_store.get_peers().await; + assert_eq!(peers.len(), 2, "Cache should respect max_peers limit"); + + // Get the addresses of the peers we have + let peer_addrs: Vec<_> = peers.iter().map(|p| p.addr.to_string()).collect(); + tracing::debug!("Final peers: {:?}", peer_addrs); + + // We should have the two most recently added peers (addresses[1] and addresses[2]) + for peer in peers { + let addr_str = peer.addr.to_string(); + assert!( + addresses[1..].iter().any(|a| a.to_string() == addr_str), + "Should have one of the two most recent peers, got {}", + addr_str + ); + } + + Ok(()) +} + +#[tokio::test] +async fn test_cache_concurrent_access() -> Result<(), Box> { + let temp_dir = TempDir::new()?; + let cache_path = temp_dir.path().join("cache.json"); + + let config = BootstrapConfig { + cache_file_path: cache_path, + ..Default::default() + }; + let cache_store = CacheStore::new(config).await?; + let cache_store_clone = cache_store.clone(); + + // Create multiple addresses + let addrs: Vec = (1..=5) + .map(|i| format!("/ip4/127.0.0.1/udp/808{}/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UER{}", i, i).parse().unwrap()) + .collect(); + + // Spawn a task that adds peers + let add_task = tokio::spawn(async move { + for addr in addrs { + if let Err(e) = cache_store.add_peer(addr).await { + eprintln!("Error adding peer: {}", e); + } + sleep(Duration::from_millis(10)).await; + } + }); + + // Spawn another task that reads peers + let read_task = tokio::spawn(async move { + for _ in 0..10 { + let _ = cache_store_clone.get_peers().await; + sleep(Duration::from_millis(5)).await; + } + }); + + // Wait for both tasks to complete + tokio::try_join!(add_task, read_task)?; + + Ok(()) +} + +#[tokio::test] +async fn test_cache_file_corruption() -> Result<(), Box> { + let temp_dir = TempDir::new()?; + let cache_path = temp_dir.path().join("cache.json"); + + // Create cache with some peers + let config = BootstrapConfig { + cache_file_path: cache_path.clone(), + ..Default::default() + }; + let cache_store = CacheStore::new(config.clone()).await?; + + // Add a peer + let addr: Multiaddr = + "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UER1" + .parse()?; + cache_store.add_peer(addr.clone()).await?; + + // Corrupt the cache file + tokio::fs::write(&cache_path, "invalid json content").await?; + + // Create a new cache store - it should handle the corruption gracefully + let new_cache_store = CacheStore::new(config).await?; + let peers = new_cache_store.get_peers().await; + assert!(peers.is_empty(), "Cache should be empty after corruption"); + + // Should be able to add peers again + new_cache_store.add_peer(addr).await?; + let peers = new_cache_store.get_peers().await; + assert_eq!( + peers.len(), + 1, + "Should be able to add peers after corruption" + ); + + Ok(()) +} diff --git a/bootstrap_cache/tests/integration_tests.rs b/bootstrap_cache/tests/integration_tests.rs new file mode 100644 index 0000000000..c85f0aba5a --- /dev/null +++ b/bootstrap_cache/tests/integration_tests.rs @@ -0,0 +1,199 @@ +// Copyright 2024 MaidSafe.net limited. +// +// This SAFE Network Software is licensed to you under The General Public License (GPL), version 3. +// Unless required by applicable law or agreed to in writing, the SAFE Network Software distributed +// under the GPL Licence is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. Please review the Licences for the specific language governing +// permissions and limitations relating to use of the SAFE Network Software. + +use bootstrap_cache::{BootstrapEndpoints, InitialPeerDiscovery}; +use libp2p::Multiaddr; +use tracing_subscriber::{fmt, EnvFilter}; +use wiremock::{ + matchers::{method, path}, + Mock, MockServer, ResponseTemplate, +}; + +// Initialize logging for tests +fn init_logging() { + let _ = fmt() + .with_env_filter(EnvFilter::from_default_env()) + .try_init(); +} + +#[tokio::test] +async fn test_fetch_from_amazon_s3() { + init_logging(); + let discovery = InitialPeerDiscovery::new(); + let peers = discovery.fetch_peers().await.unwrap(); + + // We should get some peers + assert!(!peers.is_empty(), "Expected to find some peers from S3"); + + // Verify that all peers have valid multiaddresses + for peer in &peers { + println!("Found peer: {}", peer.addr); + let addr_str = peer.addr.to_string(); + assert!(addr_str.contains("/ip4/"), "Expected IPv4 address"); + assert!(addr_str.contains("/udp/"), "Expected UDP port"); + assert!(addr_str.contains("/quic-v1/"), "Expected QUIC protocol"); + assert!(addr_str.contains("/p2p/"), "Expected peer ID"); + } +} + +#[tokio::test] +async fn test_individual_s3_endpoints() { + init_logging(); + + // Start a mock server + let mock_server = MockServer::start().await; + + // Create mock responses + let mock_response = r#"/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE +/ip4/127.0.0.2/udp/8081/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERF"#; + + // Mount the mock + Mock::given(method("GET")) + .and(path("/peers")) + .respond_with(ResponseTemplate::new(200).set_body_string(mock_response)) + .mount(&mock_server) + .await; + + let endpoint = format!("{}/peers", mock_server.uri()); + let discovery = InitialPeerDiscovery::with_endpoints(vec![endpoint.clone()]); + + match discovery.fetch_peers().await { + Ok(peers) => { + println!( + "Successfully fetched {} peers from {}", + peers.len(), + endpoint + ); + assert!( + !peers.is_empty(), + "Expected to find peers from {}", + endpoint + ); + + // Verify first peer's multiaddr format + if let Some(first_peer) = peers.first() { + let addr_str = first_peer.addr.to_string(); + println!("First peer from {}: {}", endpoint, addr_str); + assert!(addr_str.contains("/ip4/"), "Expected IPv4 address"); + assert!(addr_str.contains("/udp/"), "Expected UDP port"); + assert!(addr_str.contains("/quic-v1/"), "Expected QUIC protocol"); + assert!(addr_str.contains("/p2p/"), "Expected peer ID"); + + // Try to parse it back to ensure it's valid + assert!( + addr_str.parse::().is_ok(), + "Should be valid multiaddr" + ); + } + } + Err(e) => { + panic!("Failed to fetch peers from {}: {}", endpoint, e); + } + } +} + +#[tokio::test] +async fn test_response_format() { + init_logging(); + let discovery = InitialPeerDiscovery::new(); + let peers = discovery.fetch_peers().await.unwrap(); + + // Get the first peer to check format + let first_peer = peers.first().expect("Expected at least one peer"); + let addr_str = first_peer.addr.to_string(); + + // Print the address for debugging + println!("First peer address: {}", addr_str); + + // Verify address components + let components: Vec<&str> = addr_str.split('/').collect(); + assert!(components.contains(&"ip4"), "Missing IP4 component"); + assert!(components.contains(&"udp"), "Missing UDP component"); + assert!(components.contains(&"quic-v1"), "Missing QUIC component"); + assert!( + components.iter().any(|&c| c == "p2p"), + "Missing P2P component" + ); + + // Ensure we can parse it back into a multiaddr + let parsed: Multiaddr = addr_str.parse().expect("Should be valid multiaddr"); + assert_eq!(parsed.to_string(), addr_str, "Multiaddr should round-trip"); +} + +#[tokio::test] +async fn test_json_endpoint_format() { + init_logging(); + let mock_server = MockServer::start().await; + + // Create a mock JSON response + let json_response = r#" + { + "peers": [ + "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE", + "/ip4/127.0.0.2/udp/8081/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERF" + ], + "metadata": { + "description": "Test endpoints", + "last_updated": "2024-01-01T00:00:00Z" + } + } + "#; + + // Mount the mock + Mock::given(method("GET")) + .and(path("/")) // Use root path instead of /peers + .respond_with(ResponseTemplate::new(200).set_body_string(json_response)) + .mount(&mock_server) + .await; + + let endpoint = mock_server.uri().to_string(); + let discovery = InitialPeerDiscovery::with_endpoints(vec![endpoint.clone()]); + + let peers = discovery.fetch_peers().await.unwrap(); + assert_eq!(peers.len(), 2); + + // Verify peer addresses + let addrs: Vec = peers.iter().map(|p| p.addr.to_string()).collect(); + assert!(addrs.contains( + &"/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" + .to_string() + )); + assert!(addrs.contains( + &"/ip4/127.0.0.2/udp/8081/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERF" + .to_string() + )); +} + +#[tokio::test] +async fn test_s3_json_format() { + init_logging(); + + // Fetch and parse the bootstrap cache JSON + let response = + reqwest::get("https://sn-testnet.s3.eu-west-2.amazonaws.com/bootstrap_cache.json") + .await + .unwrap(); + let json_str = response.text().await.unwrap(); + + // Parse using our BootstrapEndpoints struct + let endpoints: BootstrapEndpoints = serde_json::from_str(&json_str).unwrap(); + + // Verify we got all the peers + assert_eq!(endpoints.peers.len(), 24); + + // Verify we can parse each peer address + for peer in endpoints.peers { + peer.parse::().unwrap(); + } + + // Verify metadata + assert_eq!( + endpoints.metadata.description, + "Safe Network testnet bootstrap cache" + ); +} diff --git a/docs/bootstrap_cache_implementation.md b/docs/bootstrap_cache_implementation.md new file mode 100644 index 0000000000..9588d277fc --- /dev/null +++ b/docs/bootstrap_cache_implementation.md @@ -0,0 +1,337 @@ +# Bootstrap Cache Implementation Guide + +This guide documents the implementation of the bootstrap cache system, including recent changes and completed work. + +## Phase 1: Bootstrap Cache File Management + +### 1.1 Cache File Structure +```rust +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct PeerInfo { + pub addr: Multiaddr, + pub last_seen: DateTime, + pub success_count: u32, + pub failure_count: u32, +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct BootstrapCache { + pub last_updated: DateTime, + pub peers: Vec, +} +``` + +### 1.2 File Operations Implementation +The cache store is implemented in `bootstrap_cache/src/cache_store.rs` with the following key features: + +```rust +pub struct CacheStore { + cache_path: PathBuf, + peers: BTreeMap, +} + +impl CacheStore { + pub fn new() -> Result { + let cache_path = Self::get_cache_path()?; + let peers = Self::load_from_disk(&cache_path)?; + Ok(Self { cache_path, peers }) + } + + pub fn save_to_disk(&self) -> Result<()> { + // Check if file is read-only first + if is_readonly(&self.cache_path) { + warn!("Cache file is read-only, skipping save"); + return Ok(()); + } + + let cache = BootstrapCache { + last_updated: Utc::now(), + peers: self.peers.values().cloned().collect(), + }; + + let temp_path = self.cache_path.with_extension("tmp"); + atomic_write(&temp_path, &cache)?; + fs::rename(temp_path, &self.cache_path)?; + Ok(()) + } + + pub fn update_peer_status( + &mut self, + addr: NetworkAddress, + success: bool, + ) -> Result<()> { + if is_readonly(&self.cache_path) { + warn!("Cache file is read-only, skipping peer status update"); + return Ok(()); + } + + let peer = self.peers.entry(addr).or_default(); + if success { + peer.success_count += 1; + } else { + peer.failure_count += 1; + } + peer.last_seen = Utc::now(); + Ok(()) + } + + pub fn cleanup_unreliable_peers(&mut self) -> Result<()> { + if is_readonly(&self.cache_path) { + warn!("Cache file is read-only, skipping cleanup"); + return Ok(()); + } + + self.peers.retain(|_, peer| { + peer.success_count > peer.failure_count + }); + Ok(()) + } +} +``` + +### 1.3 File Permission Handling +The cache store now handles read-only files gracefully: +- Each modifying operation checks if the file is read-only +- If read-only, the operation logs a warning and returns successfully +- Read operations continue to work even when the file is read-only + +## Phase 2: Network Integration Strategy + +### 2.1 Integration Architecture + +The bootstrap cache will be integrated into the existing networking layer with minimal changes to current functionality. The implementation focuses on three key areas: + +#### 2.1.1 NetworkDiscovery Integration +```rust +impl NetworkDiscovery { + // Add cache integration to existing peer discovery + pub(crate) async fn save_peers_to_cache(&self, cache: &BootstrapCache) { + for peers in self.candidates.values() { + for peer in peers { + let _ = cache.add_peer(peer.clone()).await; + } + } + } + + pub(crate) async fn load_peers_from_cache(&mut self, cache: &BootstrapCache) { + for peer in cache.get_reliable_peers().await { + if let Some(ilog2) = self.get_bucket_index(&peer.addr) { + self.insert_candidates(ilog2, vec![peer.addr]); + } + } + } +} +``` + +#### 2.1.2 SwarmDriver Integration +```rust +impl SwarmDriver { + pub(crate) async fn save_peers_to_cache(&self) { + if let Some(cache) = &self.bootstrap_cache { + self.network_discovery.save_peers_to_cache(cache).await; + } + } +} +``` + +#### 2.1.3 Bootstrap Process Integration +```rust +impl ContinuousBootstrap { + pub(crate) async fn initialize_with_cache(&mut self, cache: &BootstrapCache) { + // Load initial peers from cache + self.network_discovery.load_peers_from_cache(cache).await; + + // Normal bootstrap process continues... + self.initial_bootstrap_done = false; + } +} +``` + +### 2.2 Key Integration Points + +1. **Cache Updates**: + - Periodic updates (every 60 minutes) + - On graceful shutdown + - After successful peer connections + - During routing table maintenance + +2. **Cache Usage**: + - During initial bootstrap + - When routing table needs more peers + - As primary source for peer discovery (replacing direct URL fetching) + - Fallback to URL endpoints only when cache is empty/stale + +3. **Configuration**: +```rust +pub struct NetworkBuilder { + bootstrap_cache_config: Option, +} + +impl NetworkBuilder { + pub fn with_bootstrap_cache(mut self, config: BootstrapConfig) -> Self { + self.bootstrap_cache_config = Some(config); + self + } +} +``` + +### 2.3 Implementation Phases + +#### Phase 1: Basic Integration +- Add bootstrap cache as optional component +- Integrate basic cache reading during startup +- Add periodic cache updates +- Replace direct URL fetching with cache-first approach + +#### Phase 2: Enhanced Features +- Add graceful shutdown cache updates +- Implement circuit breaker integration +- Add cache cleanup for unreliable peers +- Integrate with existing peer reliability metrics + +#### Phase 3: Optimization +- Fine-tune update intervals and thresholds +- Add cache performance metrics +- Optimize cache update strategies +- Implement advanced peer selection algorithms + +### 2.4 Benefits and Impact + +1. **Minimal Changes**: + - Preserves existing peer discovery mechanisms + - Maintains current routing table functionality + - Optional integration through configuration + +2. **Enhanced Reliability**: + - Local cache reduces network dependency + - Circuit breaker prevents cascading failures + - Intelligent peer selection based on history + +3. **Better Performance**: + - Faster bootstrap process + - Reduced network requests + - More reliable peer connections + +4. **Seamless Integration**: + - No changes required to client/node APIs + - Backward compatible with existing deployments + - Gradual rollout possible + +## Phase 3: Testing and Validation + +### 3.1 Unit Tests +```rust +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_cache_read_only() { + let store = CacheStore::new().unwrap(); + + // Make file read-only + let mut perms = fs::metadata(&store.cache_path).unwrap().permissions(); + perms.set_readonly(true); + fs::set_permissions(&store.cache_path, perms).unwrap(); + + // Operations should succeed but not modify file + assert!(store.update_peer_status(addr, true).is_ok()); + assert!(store.cleanup_unreliable_peers().is_ok()); + assert!(store.save_to_disk().is_ok()); + } + + #[test] + fn test_peer_reliability() { + let mut store = CacheStore::new().unwrap(); + let addr = NetworkAddress::from_str("/ip4/127.0.0.1/udp/8080").unwrap(); + + // Add successful connections + store.update_peer_status(addr.clone(), true).unwrap(); + store.update_peer_status(addr.clone(), true).unwrap(); + + // Add one failure + store.update_peer_status(addr.clone(), false).unwrap(); + + // Peer should still be considered reliable + store.cleanup_unreliable_peers().unwrap(); + assert!(store.peers.contains_key(&addr)); + } +} +``` + +### 3.2 Integration Tests +Located in `bootstrap_cache/tests/integration_tests.rs`: + +1. **Network Connectivity Tests**: +```rust +#[tokio::test] +async fn test_fetch_from_amazon_s3() { + let discovery = InitialPeerDiscovery::new(); + let peers = discovery.fetch_peers().await.unwrap(); + + // Verify peer multiaddress format + for peer in &peers { + assert!(peer.addr.to_string().contains("/ip4/")); + assert!(peer.addr.to_string().contains("/udp/")); + assert!(peer.addr.to_string().contains("/quic-v1/")); + assert!(peer.addr.to_string().contains("/p2p/")); + } +} +``` + +2. **Mock Server Tests**: +```rust +#[tokio::test] +async fn test_individual_s3_endpoints() { + let mock_server = MockServer::start().await; + // Test failover between endpoints + // Test response parsing + // Test error handling +} +``` + +3. **Format Validation Tests**: +- Verify JSON endpoint responses +- Validate peer address formats +- Test whitespace and empty line handling + +### 3.3 Performance Metrics +- Track peer discovery time +- Monitor cache hit/miss rates +- Measure connection success rates + +### 3.4 Current Status +- ✅ Basic network integration implemented +- ✅ Integration tests covering core functionality +- ✅ Mock server tests for endpoint validation +- ✅ Performance monitoring in place + +### 3.5 Next Steps +1. **Enhanced Testing**: + - Add network partition tests + - Implement chaos testing for network failures + - Add long-running stability tests + +2. **Performance Optimization**: + - Implement connection pooling + - Add parallel connection attempts + - Optimize peer candidate generation + +3. **Monitoring**: + - Add detailed metrics collection + - Implement performance tracking + - Create monitoring dashboards + +## Current Status + +### Completed Work +1. Created `bootstrap_cache` directory with proper file structure +2. Implemented cache file operations with read-only handling +3. Added peer reliability tracking based on success/failure counts +4. Integrated Kademlia routing tables for both nodes and clients + +### Next Steps +1. Implement rate limiting for cache updates +2. Add metrics for peer connection success rates +3. Implement automated peer list pruning +4. Add cross-client cache sharing mechanisms diff --git a/docs/bootstrap_cache_prd.md b/docs/bootstrap_cache_prd.md new file mode 100644 index 0000000000..a1e8317e1b --- /dev/null +++ b/docs/bootstrap_cache_prd.md @@ -0,0 +1,194 @@ +# Bootstrap Cache PRD + +## Overview +This document outlines the design and implementation of a decentralized bootstrap cache system for the Safe Network. This system replaces the current centralized "bootstrap node" concept with a fully decentralized approach where all nodes are equal participants. + +## Goals +- Remove the concept of dedicated "bootstrap nodes" +- Implement a shared local cache system for both nodes and clients +- Reduce infrastructure costs +- Improve network stability and decentralization +- Simplify the bootstrapping process + +## Non-Goals +- Creating any form of centralized node discovery +- Implementing DNS-based discovery +- Maintaining long-term connections between nodes +- Running HTTP servers on nodes + +## Technical Design + +### Bootstrap Cache File +- Location: + - Unix/Linux: `/var/safe/bootstrap_cache.json` + - macOS: `/Library/Application Support/Safe/bootstrap_cache.json` + - Windows: `C:\ProgramData\Safe\bootstrap_cache.json` +- Format: JSON file containing: + ```json + { + "last_updated": "ISO-8601-timestamp", + "peers": [ + { + "addr": "multiaddr-string", // e.g., "/ip4/1.2.3.4/udp/1234/quic-v1" + "last_seen": "ISO-8601-timestamp", + "success_count": "number", + "failure_count": "number" + } + ] + } + ``` + +### Cache Management +1. **Writing Cache** + - Write to cache when routing table changes occur + - Write to cache on clean node/client shutdown + - Keep track of successful/failed connection attempts + - Limit cache size to prevent bloat (e.g., 1000 entries) + - Handle file locking for concurrent access from multiple nodes/clients + +2. **Reading Cache** + - On startup, read shared local cache if available + - If cache peers are unreachable: + 1. Try peers from `--peer` argument or `SAFE_PEERS` env var + 2. If none available, fetch from network contacts URL + 3. If local feature enabled, discover through mDNS + - Sort peers by connection success rate + +### Node Implementation +1. **Cache Updates** + - Use Kademlia routing table as source of truth + - Every period, copy nodes from routing table to cache + - Track peer reliability through: + - Successful/failed connection attempts + - Response times + - Data storage and retrieval success rates + +2. **Startup Process** + ```rust + async fn startup() { + // 1. Get initial peers + let peers = PeersArgs::get_peers().await?; + + // 2. Initialize Kademlia with configuration + let kad_cfg = KademliaConfig::new() + .set_kbucket_inserts(Manual) + .set_query_timeout(KAD_QUERY_TIMEOUT_S) + .set_replication_factor(REPLICATION_FACTOR) + .disjoint_query_paths(true); + + // 3. Begin continuous bootstrap process + loop { + bootstrap_with_peers(peers).await?; + + // If we have enough peers, slow down bootstrap attempts + if connected_peers >= K_VALUE { + increase_bootstrap_interval(); + } + + // Update cache with current routing table + update_bootstrap_cache().await?; + + sleep(bootstrap_interval).await; + } + } + ``` + +### Client Implementation +1. **Cache Management** + - Maintain Kademlia routing table in outbound-only mode + - Read from shared bootstrap cache + - Update peer reliability metrics based on: + - Connection success/failure + - Data retrieval success rates + - Response times + +2. **Connection Process** + ```rust + async fn connect() { + // 1. Get initial peers + let peers = PeersArgs::get_peers().await?; + + // 2. Initialize client-mode Kademlia + let kad_cfg = KademliaConfig::new() + .set_kbucket_inserts(Manual) + .set_protocol_support(Outbound) // Clients only make outbound connections + .disjoint_query_paths(true); + + // 3. Connect to peers until we have enough + while connected_peers < K_VALUE { + bootstrap_with_peers(peers).await?; + + // Update peer reliability in cache + update_peer_metrics().await?; + + // Break if we've tried all peers + if all_peers_attempted() { + break; + } + } + } + ``` + +### Peer Acquisition Process +1. **Order of Precedence** + - Command line arguments (`--peer`) + - Environment variables (`SAFE_PEERS`) + - Local discovery (if enabled) + - Network contacts URL + +2. **Network Contacts** + - URL: `https://sn-testnet.s3.eu-west-2.amazonaws.com/network-contacts` + - Format: One multiaddr per line + - Fallback mechanism when no local peers available + - Retries with exponential backoff (max 7 attempts) + +3. **Local Discovery** + - Uses mDNS when `local` feature is enabled + - Useful for development and testing + - Not used in production environments + +### Cache File Synchronization +1. **File Locking** + - Use file-system level locks for synchronization + - Read locks for cache queries + - Write locks for cache updates + - Exponential backoff for lock acquisition + +2. **Update Process** + ```rust + async fn update_cache(peers: Vec) -> Result<()> { + // 1. Check if file is read-only + if is_readonly(cache_path) { + warn!("Cache file is read-only"); + return Ok(()); + } + + // 2. Acquire write lock + let file = acquire_exclusive_lock(cache_path)?; + + // 3. Perform atomic write + atomic_write(file, peers).await?; + + Ok(()) + } + ``` + +## Success Metrics +- Reduction in bootstrap time +- More evenly distributed network load +- Improved network resilience +- Higher peer connection success rates + +## Security Considerations +- Validate peer multiaddresses before caching +- Protect against malicious cache entries +- Handle file permissions securely +- Prevent cache poisoning attacks +- Implement rate limiting for cache updates + +## Future Enhancements +- Peer prioritization based on network metrics +- Geographic-based peer selection +- Advanced reputation system +- Automated peer list pruning +- Cross-client cache sharing mechanisms diff --git a/prd.md b/prd.md new file mode 100644 index 0000000000..a2df93bbea --- /dev/null +++ b/prd.md @@ -0,0 +1,173 @@ +Product Requirements Document for Autonomi Network Enhancements +Introduction + + +This document outlines the product requirements for the development and enhancement of the Autonomi Network (formerly known as the MaidSafe Safe Network). The Autonomi Network is a fully decentralized platform aimed at providing secure, private, and efficient data storage and communication. This document details the necessary work to implement and improve various aspects of the network, including data types, client APIs, network architecture, and payment systems. + + +Objectives + + + • Implement and document four core data types essential for network operations. + • Enhance the network’s decentralization by refining bootstrap mechanisms. + • Define and standardize client API behaviors in a decentralized environment. + • Ensure the client API comprehensively documents all data types. + • Restrict store/get methods to accept only the defined data types. + • Integrate a flexible payment system utilizing EVM and L2 networks with runtime configurability. + + +1. Data Types + + +The Autonomi Network will support four primary data types: + + +1.1 Chunks + + + • Description: Immutable data pieces up to 1 MB in size. + • Naming Convention: The name of a chunk is derived from the hash of its content (hash(content) == name). + • Purpose: Enables content-addressable storage, ensuring data integrity and deduplication. + + +1.2 Registers + + + • Description: Conflict-free Replicated Data Type (CRDT) directed acyclic graphs (DAGs). + • Concurrency Handling: Allows multiple concurrent accesses. In cases of conflicting updates, users are responsible for merging changes, as the network does not handle conflict resolution. + • Use Case: Suitable for collaborative applications where eventual consistency is acceptable. + + +1.3 Transactions + + + • Description: Simple data structures representing value transfers. + • Structure: + • Owner: Identified by a public key. + • Content: May include a value and an optional additional key. + • Outputs: A set of keys indicating recipients of the transaction. + • Validation: Clients must verify the transaction history to ensure correctness. + • Purpose: Facilitates decentralized transactions without central authority oversight. + + +1.4 Vault + + + • Description: Flexible data type up to 1 MB that can encapsulate any developer-defined data structure. + • Ownership: Secured by an owner’s public key. + • Versioning: + • Not a CRDT. + • Includes a user or application-defined counter. + • Nodes retain only the copy with the highest counter value after signature verification. + • Use Case: Ideal for applications requiring custom data storage with version control. + + +2. Network Architecture + + +2.1 Decentralization + + + • The network operates without central servers, promoting resilience and autonomy. + • Bootstrap nodes exist solely for initial network access. + + +2.2 Bootstrap Nodes + + + • Purpose: Aid first-time nodes or clients in connecting to the network. + • Limitations: + • Must not be relied upon for continued operation. + • Designed to be ephemeral and can disappear without affecting the network. + • Distribution: + • New bootstrap nodes can be published via websites, DNS records, or shared among users. + • Users are encouraged to share bootstrap information to foster decentralization. + + +2.3 Bootstrap Cache + + + • Functionality: + • Nodes and clients must collect and maintain their own network contacts after the initial connection. + • This cache is used for reconnecting to the network autonomously. + • Benefit: Eliminates dependence on specific bootstrap nodes, enhancing network robustness. + + +3. Client API + + +3.1 Connection Model + + + • Stateless Connectivity: + • Clients acknowledge that persistent connections are impractical in a decentralized network unless designed to receive unsolicited messages. +(i.e. the client.connect() does not make sense in our current situation.) + • Operational Behavior: + • Clients maintain a list of network addresses. + • For any action, they connect to the nearest node and discover nodes closest to the target address. + • Addresses collected during operations are stored in the bootstrap cache. + + +3.2 Data Types Definition + + + • Centralized Documentation: + • All four data types must be clearly defined and documented within a single section of the API documentation. + • Developer Guidance: + • Provide detailed explanations, usage examples, and best practices for each data type. + + +3.3 Store/Get Methods + + + • Data Type Restrictions: + • The API’s store/get methods are configured to accept only the four defined data types. + • Inputs of other data types are explicitly disallowed to maintain data integrity and consistency. + + +4. Payment System Integration + + +4.1 EVM and L2 Network Utilization + + + • Blockchain Integration: + • Leverage the Ethereum Virtual Machine (EVM) and Layer 2 (L2) networks for transaction processing. + • Runtime Configurability: + • Nodes and clients can modify payment-related settings at runtime. + • Configurable parameters include wallet details, chosen payment networks, and other relevant settings. + + +4.2 Wallet Management + + + • Flexibility: + • Users can change wallets without restarting or recompiling the client or node software. + • Security: + • Ensure secure handling and storage of wallet credentials and transaction data. + + +5. Additional Requirements + + + • Scalability: Design systems to handle network growth without performance degradation. + • Security: Implement robust encryption and authentication mechanisms across all components. + • Performance: Optimize data storage and retrieval processes for efficiency. + • Usability: Provide clear documentation and intuitive interfaces for developers and end-users. + + +6. Documentation and Support + + + • Comprehensive Guides: + • Produce detailed documentation for all new features and changes. + • Include API references, tutorials, and FAQs. + • Community Engagement: + • Encourage community feedback and contributions. + • Provide support channels for troubleshooting and discussions. + + +Conclusion + + +Implementing these requirements will enhance the Autonomi Network’s functionality, security, and user experience. Focusing on decentralization, flexibility, and clear documentation will position the network as a robust platform for decentralized applications and services. diff --git a/refactoring_steps.md b/refactoring_steps.md new file mode 100644 index 0000000000..9f962439c6 --- /dev/null +++ b/refactoring_steps.md @@ -0,0 +1,202 @@ +# Refactoring Steps for Autonomi Network + +## Phase 1: Client API Refactoring +1. **Remove Connection Management from API** + - Remove `connect()` method from client API + - Move connection handling into individual operations + - Each operation should handle its own connection lifecycle + - Have a bootstrap mechanism that reads a bootstrrp_cache.json file or passed in via command line or ENV_VAR + - Use the bootstrap cache to connect to the network + - During network requests collect peers connection info + - Every minute update the bootstrap cache (limit entries to last 1500 seen) + - on startup read the bootstrap cache file to get peers to connect to + - on shutdown write the bootstrap cache file + - all internal connect commands will use the nodes we have in ram + - update wasm and python bindings to use all the above + - test before going any further + + +2. **Data Type Operations** + - **Chunks** (Mostly Complete) + - Existing: `chunk_get`, `chunk_upload_with_payment` + - Add: Better error handling for size limits + - Language Bindings: + - Python: + - Implement `chunk_get`, `chunk_upload_with_payment` methods + - Add size validation + - Add comprehensive tests + - Document API usage + - WASM: + - Implement `chunk_get`, `chuunk_upload_with_paymentput` methods + - Add JavaScript examples + - Add integration tests + - Document browser usage + + - **Registers** (Integration Needed) + - Existing in sn_registers: + - CRDT-based implementation + - `merge` operations + - User-managed conflict resolution + - To Add: + - Client API wrappers in autonomi + - Simplified append/merge interface + - Connection handling in operations + - Language Bindings: + - Python: + - Implement register CRUD operations + - Add conflict resolution examples + - Add unit and integration tests + - Document CRDT usage + - WASM: + - Implement register operations + - Add browser-based examples + - Add JavaScript tests + - Document concurrent usage + + - **Scratchpad (Vault)** (Enhancement Needed) + - Existing in sn_protocol: + - Basic scratchpad implementation + - `update_and_sign` functionality + - To Add: + - Client API wrappers in autonomi + - Simplified update/replace interface + - Connection handling in operations + - Language Bindings: + - Python: + - Implement vault operations + - Add encryption examples + - Add comprehensive tests + - Document security features + - WASM: + - Implement vault operations + - Add browser storage examples + - Add security tests + - Document encryption usage + +3. **Transaction System Refactoring** (Priority) + - Make transaction types generic in sn_transfers + - Update client API to support generic transactions + - Implement owner-based validation + - Add support for optional additional keys + - Implement transaction history verification + +## Phase 2: Payment System Integration +1. **EVM Integration** + - Integrate existing EVM implementation + - Add runtime configuration support + - Connect with transaction system + +2. **Payment Processing** + - Integrate with data operations + - Add payment verification + - Implement tracking system + +## Phase 3: Testing and Documentation +1. **Testing** + - Add unit tests for new API methods + - Integration tests for complete workflows + - Payment system integration tests + +2. **Documentation** + - Update API documentation + - Add usage examples + - Document error conditions + - Include best practices + +## Safe Network Health Management + +### Core Parameters + +#### Timing Intervals +- Replication: 90-180 seconds (randomized) +- Bad Node Detection: 300-600 seconds (randomized) +- Uptime Metrics: 10 seconds +- Record Cleanup: 3600 seconds (1 hour) +- Chunk Proof Retry: 15 seconds between attempts + +#### Network Parameters +- Close Group Size: Defined by CLOSE_GROUP_SIZE constant +- Replication Target: REPLICATION_PEERS_COUNT closest nodes +- Minimum Peers: 100 (for bad node detection) +- Bad Node Consensus: Requires close_group_majority() +- Max Chunk Proof Attempts: 3 before marking as bad node + +### Health Management Algorithms + +#### 1. Bad Node Detection +```rust +Process: +1. Triggered every 300-600s when peers > 100 +2. Uses rolling index (0-511) to check different buckets +3. For each bucket: + - Select subset of peers + - Query their closest nodes + - Mark as bad if majority report shunning +4. Records NodeIssue::CloseNodesShunning +``` + +#### 2. Network Replication +```rust +Process: +1. Triggered by: + - Every 90-180s interval + - New peer connection + - Peer removal + - Valid record storage +2. Execution: + - Get closest K_VALUE peers + - Sort by XOR distance + - Verify local storage + - Replicate to REPLICATION_PEERS_COUNT nodes +``` + +#### 3. Routing Table Management +```rust +Components: +1. K-bucket organization by XOR distance +2. Peer tracking and metrics +3. Connection state monitoring +4. Regular table cleanup +5. Dynamic peer replacement +``` + +### Protection Mechanisms + +#### 1. Data Integrity +- Chunk proof verification +- Record validation +- Replication confirmation +- Storage verification + +#### 2. Network Resilience +- Distributed consensus for bad nodes +- Rolling health checks +- Randomized intervals +- Subset checking for efficiency + +#### 3. Resource Optimization +- Periodic cleanup of irrelevant records +- Limited retry attempts +- Targeted replication +- Load distribution through rolling checks + +### Metrics Tracking +- Peer counts and stability +- Replication success rates +- Network connectivity +- Bad node detection events +- Resource usage and cleanup + +### Key Improvements +1. Reduced resource usage in bad node detection +2. Optimized replication targeting +3. Better load distribution +4. Enhanced peer verification +5. Efficient cleanup mechanisms + +This system creates a self-maintaining network capable of: +- Identifying and removing problematic nodes +- Maintaining data redundancy +- Optimizing resource usage +- Ensuring network stability +- Providing reliable peer connections diff --git a/repository_structure.md b/repository_structure.md new file mode 100644 index 0000000000..f6dd9b383d --- /dev/null +++ b/repository_structure.md @@ -0,0 +1,265 @@ +# Safe Network Repository Structure and Capabilities + +## Core Components + +### Client Side +1. **autonomi** - Main client implementation + - Primary interface for users to interact with the Safe Network + - Multiple language bindings support (Rust, Python, WASM) + - Features: + - Data operations (chunks, registers) + - Vault operations + - File system operations + - EVM integration + - Components: + - `src/client/` - Core client implementation + - `src/self_encryption.rs` - Data encryption handling + - `src/python.rs` - Python language bindings + - `src/utils.rs` - Utility functions + - Build Features: + - `data` - Basic data operations + - `vault` - Vault operations (includes data and registers) + - `registers` - Register operations + - `fs` - File system operations + - `local` - Local network testing + - `external-signer` - External transaction signing + - Testing: + - `tests/` - Rust integration tests + - `tests-js/` - JavaScript tests + - `examples/` - Usage examples + +2. **autonomi-cli** - Command-line interface + - CLI tool for network interaction + - Components: + - `src/commands/` - CLI command implementations + - `src/access/` - Network access management + - `src/actions/` - Core action implementations + - `src/wallet/` - Wallet management functionality + - `src/commands.rs` - Command routing + - `src/opt.rs` - Command-line options parsing + - `src/utils.rs` - Utility functions + - Features: + - Network access management + - Wallet operations + - Data operations (chunks, registers) + - Command-line parsing and routing + +### Network Node Components +1. **sn_node** - Network Node Implementation + - Core Components: + - `src/node.rs` - Main node implementation + - `src/put_validation.rs` - Data validation logic + - `src/replication.rs` - Data replication handling + - `src/metrics.rs` - Performance monitoring + - `src/python.rs` - Python language bindings + - Features: + - Data validation and storage + - Network message handling + - Metrics collection + - Error handling + - Event processing + - Binary Components: + - `src/bin/` - Executable implementations + +2. **sn_protocol** - Core Protocol Implementation + - Components: + - `src/messages/` - Network message definitions + - `src/storage/` - Storage implementations + - `src/safenode_proto/` - Protocol definitions + - `src/node_rpc.rs` - RPC interface definitions + - Features: + - Message protocol definitions + - Storage protocol + - Node communication protocols + - Version management + +3. **sn_transfers** - Transfer System + - Components: + - `src/cashnotes/` - Digital cash implementation + - `src/transfers/` - Transfer logic + - `src/wallet/` - Wallet implementation + - `src/genesis.rs` - Genesis block handling + - Features: + - Digital cash management + - Transfer operations + - Wallet operations + - Genesis configuration + - Error handling + +### Data Types and Protocol +1. **sn_registers** - Register implementation + - CRDT-based data structures + - Conflict resolution mechanisms + - Concurrent operations handling + +### Network Management and Communication +1. **sn_networking** - Network Communication Layer + - Core Components: + - `src/cmd.rs` - Network command handling + - `src/driver.rs` - Network driver implementation + - `src/record_store.rs` - Data record management + - `src/bootstrap.rs` - Network bootstrap process + - `src/transport/` - Transport layer implementations + - Features: + - Network discovery and bootstrapping + - External address handling + - Relay management + - Replication fetching + - Record store management + - Transfer handling + - Metrics collection + - Event System: + - `src/event/` - Event handling implementation + - Network event processing + - Event-driven architecture + +2. **sn_node_manager** - Node Management System + - Core Components: + - `src/cmd/` - Management commands + - `src/add_services/` - Service management + - `src/config.rs` - Configuration handling + - `src/rpc.rs` - RPC interface + - Features: + - Node deployment and configuration + - Service management + - Local node handling + - RPC client implementation + - Error handling + - Management Tools: + - Binary implementations + - Helper utilities + - Configuration management + +### Networking and Communication +1. **sn_networking** - Network communication + - P2P networking implementation + - Connection management + - Message routing + +2. **sn_peers_acquisition** - Peer discovery + - Bootstrap mechanisms + - Peer management + - Network topology + +### Infrastructure Components +1. **node-launchpad** - Node Deployment System + - Core Components: + - `src/app.rs` - Main application logic + - `src/components/` - UI components + - `src/node_mgmt.rs` - Node management + - `src/node_stats.rs` - Statistics tracking + - `src/config.rs` - Configuration handling + - Features: + - Node deployment and management + - System monitoring + - Configuration management + - Terminal UI interface + - Connection mode handling + - UI Components: + - Custom widgets + - Styling system + - Terminal UI implementation + +2. **nat-detection** - Network Detection System + - Core Components: + - `src/behaviour/` - NAT behavior implementations + - `src/main.rs` - Main detection logic + - Features: + - NAT type detection + - Network connectivity testing + - Behavior analysis + - Connection management + +### Payment and EVM Integration +1. **sn_evm** - EVM Integration System + - Core Components: + - `src/data_payments.rs` - Payment handling for data operations + - `src/amount.rs` - Amount calculations and management + - Features: + - Data payment processing + - Amount handling + - Error management + - Integration with EVM + +2. **evmlib** - EVM Library + - Core Components: + - `src/contract/` - Smart contract handling + - `src/wallet.rs` - Wallet implementation + - `src/transaction.rs` - Transaction processing + - `src/cryptography.rs` - Cryptographic operations + - Features: + - Smart contract management + - Wallet operations + - Transaction handling + - External signer support + - Test network support + - Event handling + - Utility functions + +3. **evm_testnet** - EVM Test Environment + - Features: + - Test network setup + - Development environment + - Testing utilities + +### Utilities and Support +1. **sn_logging** - Logging System + - Core Components: + - `src/appender.rs` - Log appender implementation + - `src/layers.rs` - Logging layers + - `src/metrics.rs` - Metrics integration + - Features: + - Structured logging + - Custom appenders + - Metrics integration + - Error handling + +2. **sn_metrics** - Metrics System + - Features: + - Performance monitoring + - System metrics collection + - Metrics reporting + +3. **sn_build_info** - Build Information + - Features: + - Version management + - Build configuration + - Build information tracking + +4. **test_utils** - Testing Utilities + - Components: + - `src/evm.rs` - EVM testing utilities + - `src/testnet.rs` - Test network utilities + - Features: + - EVM test helpers + - Test network setup + - Common test functions + +5. **sn_auditor** - Network Auditing + - Features: + - Network health monitoring + - Security auditing + - Performance tracking + +## Development Tools +- **adr** - Architecture Decision Records +- **resources** - Additional resources and documentation +- **token_supplies** - Token management utilities + +## Documentation +- **CHANGELOG.md** - Version history +- **CONTRIBUTING.md** - Contribution guidelines +- **README.md** - Project overview +- **prd.md** - Product Requirements Document + +## Build and Configuration +- **Cargo.toml** - Main project configuration +- **Justfile** - Task automation +- **release-plz.toml** - Release configuration +- **reviewpad.yml** - Code review configuration + +## Next Steps +1. Review and validate this structure +2. Identify any missing components or capabilities +3. Begin implementation of refactoring steps as outlined in refactoring_steps.md +4. Focus on client API refactoring as the first priority From 67f2d7fc560c2f483de2f7be1b5d5f29f99949d8 Mon Sep 17 00:00:00 2001 From: David Irvine Date: Sun, 24 Nov 2024 21:39:11 +0000 Subject: [PATCH 02/21] refactor(bootstrap_cache): improve peer source handling and test network isolation * Refactor CacheStore::from_args to handle peer sources more consistently * Ensure test network mode is properly isolated from cache system * Fix default behavior to use URL endpoint when no peers provided * Add proper handling for local and first node modes * Prevent cache operations when in test network mode This change ensures that: - Test network peers are isolated from cache operations - Default behavior (no args) correctly uses URL endpoints - Local and first node modes return empty stores - Explicit peers take precedence over default behavior - Cache operations only occur in non-test network mode The changes make the peer source handling more predictable and maintain proper isolation between different network modes (test, local, default). --- bootstrap_cache/Cargo.toml | 12 + bootstrap_cache/src/cache_store.rs | 199 +++++++-- bootstrap_cache/src/error.rs | 14 +- bootstrap_cache/src/lib.rs | 194 ++++++++- bootstrap_cache/tests/address_format_tests.rs | 404 ++++++++++++++++++ .../tests/cli_integration_tests.rs | 311 ++++++++++++++ 6 files changed, 1074 insertions(+), 60 deletions(-) create mode 100644 bootstrap_cache/tests/address_format_tests.rs create mode 100644 bootstrap_cache/tests/cli_integration_tests.rs diff --git a/bootstrap_cache/Cargo.toml b/bootstrap_cache/Cargo.toml index e2e305e51d..48b15ea424 100644 --- a/bootstrap_cache/Cargo.toml +++ b/bootstrap_cache/Cargo.toml @@ -18,8 +18,20 @@ tempfile = "3.8.1" thiserror = "1.0" tokio = { version = "1.0", features = ["full", "sync"] } tracing = "0.1" +url = "2.4.0" [dev-dependencies] wiremock = "0.5" tokio = { version = "1.0", features = ["full", "test-util"] } tracing-subscriber = { version = "0.3", features = ["env-filter"] } + +[lints.rust] +unsafe_code = "forbid" +missing_docs = "warn" + +[lints.clippy] +all = "warn" +pedantic = "warn" +nursery = "warn" +unwrap_used = "warn" +missing_docs_in_private_items = "warn" diff --git a/bootstrap_cache/src/cache_store.rs b/bootstrap_cache/src/cache_store.rs index 9257107773..04365b3c39 100644 --- a/bootstrap_cache/src/cache_store.rs +++ b/bootstrap_cache/src/cache_store.rs @@ -17,6 +17,7 @@ use std::sync::Arc; use std::time::{Duration, SystemTime}; use tempfile::NamedTempFile; use tokio::sync::RwLock; +use tracing::{debug, info, warn}; const PEER_EXPIRY_DURATION: Duration = Duration::from_secs(24 * 60 * 60); // 24 hours @@ -152,6 +153,119 @@ impl CacheStore { Ok(store) } + pub async fn new_without_init(config: crate::BootstrapConfig) -> Result { + tracing::info!("Creating new CacheStore with config: {:?}", config); + let cache_path = config.cache_file_path.clone(); + let config = Arc::new(config); + + // Create cache directory if it doesn't exist + if let Some(parent) = cache_path.parent() { + tracing::info!("Attempting to create cache directory at {:?}", parent); + // Try to create the directory + match fs::create_dir_all(parent) { + Ok(_) => { + tracing::info!("Successfully created cache directory"); + } + Err(e) => { + tracing::warn!("Failed to create cache directory at {:?}: {}", parent, e); + // Try user's home directory as fallback + if let Some(home) = dirs::home_dir() { + let user_path = home.join(".safe").join("bootstrap_cache.json"); + tracing::info!("Falling back to user directory: {:?}", user_path); + if let Some(user_parent) = user_path.parent() { + if let Err(e) = fs::create_dir_all(user_parent) { + tracing::error!("Failed to create user cache directory: {}", e); + return Err(Error::Io(e)); + } + tracing::info!("Successfully created user cache directory"); + } + let future = Self::new_without_init(crate::BootstrapConfig::with_cache_path(user_path)); + return Box::pin(future).await; + } + } + } + } + + let store = Self { + cache_path, + config, + data: Arc::new(RwLock::new(CacheData::default())), + }; + + tracing::info!("Successfully created CacheStore"); + Ok(store) + } + + pub async fn init(&self) -> Result<()> { + let mut data = if self.cache_path.exists() { + tracing::info!("Cache file exists at {:?}, attempting to load", self.cache_path); + match Self::load_cache_data(&self.cache_path).await { + Ok(data) => { + tracing::info!("Successfully loaded cache data with {} peers", data.peers.len()); + // If cache data exists but has no peers and file is not read-only, + // fallback to default + let is_readonly = self.cache_path + .metadata() + .map(|m| m.permissions().readonly()) + .unwrap_or(false); + + if data.peers.is_empty() && !is_readonly { + tracing::info!("Cache is empty and not read-only, falling back to default"); + Self::fallback_to_default(&self.config).await? + } else { + // Ensure we don't exceed max_peers + let mut filtered_data = data; + if filtered_data.peers.len() > self.config.max_peers { + tracing::info!( + "Trimming cache from {} to {} peers", + filtered_data.peers.len(), + self.config.max_peers + ); + let peers: Vec<_> = filtered_data.peers.into_iter().collect(); + filtered_data.peers = peers + .into_iter() + .take(self.config.max_peers) + .collect(); + } + filtered_data + } + } + Err(e) => { + tracing::warn!("Failed to load cache data: {}", e); + // If we can't read or parse the cache file, fallback to default + Self::fallback_to_default(&self.config).await? + } + } + } else { + tracing::info!("Cache file does not exist at {:?}, falling back to default", self.cache_path); + // If cache file doesn't exist, fallback to default + Self::fallback_to_default(&self.config).await? + }; + + // Only clean up stale peers if the file is not read-only + let is_readonly = self.cache_path + .metadata() + .map(|m| m.permissions().readonly()) + .unwrap_or(false); + + if !is_readonly { + // Clean up stale peers + let now = SystemTime::now(); + data.peers.retain(|_, peer| { + if let Ok(duration) = now.duration_since(peer.last_seen) { + duration < PEER_EXPIRY_DURATION + } else { + false + } + }); + } + + // Update the store's data + *self.data.write().await = data; + + Ok(()) + } + async fn fallback_to_default(config: &crate::BootstrapConfig) -> Result { tracing::info!("Falling back to default peers from endpoints"); let mut data = CacheData { @@ -313,59 +427,35 @@ impl CacheStore { } pub async fn add_peer(&self, addr: Multiaddr) -> Result<()> { - // Check if the cache file is read-only before attempting any modifications - let is_readonly = self - .cache_path - .metadata() - .map(|m| m.permissions().readonly()) - .unwrap_or(false); - - if is_readonly { - tracing::warn!("Cannot add peer: cache file is read-only"); - return Ok(()); - } - let mut data = self.data.write().await; let addr_str = addr.to_string(); - tracing::debug!( - "Adding peer {}, current peers: {}", - addr_str, - data.peers.len() - ); - - // If the peer already exists, just update its last_seen time - if let Some(peer) = data.peers.get_mut(&addr_str) { - tracing::debug!("Updating existing peer {}", addr_str); - peer.last_seen = SystemTime::now(); - return self.save_to_disk(&data).await; + // Check if we already have this peer + if data.peers.contains_key(&addr_str) { + debug!("Updating existing peer {}", addr_str); + if let Some(peer) = data.peers.get_mut(&addr_str) { + peer.last_seen = SystemTime::now(); + } + return Ok(()); } - // Only add new peers if we haven't reached max_peers - if data.peers.len() < self.config.max_peers { - tracing::debug!("Adding new peer {} (under max_peers limit)", addr_str); - data.peers - .insert(addr_str.clone(), BootstrapPeer::new(addr)); - self.save_to_disk(&data).await?; - } else { - // If we're at max_peers, replace the oldest peer - if let Some((oldest_addr, oldest_peer)) = - data.peers.iter().min_by_key(|(_, peer)| peer.last_seen) + // If we're at max peers, remove the oldest peer + if data.peers.len() >= self.config.max_peers { + debug!("At max peers limit ({}), removing oldest peer", self.config.max_peers); + if let Some((oldest_addr, _)) = data.peers + .iter() + .min_by_key(|(_, peer)| peer.last_seen) { - tracing::debug!( - "Replacing oldest peer {} (last seen: {:?}) with new peer {}", - oldest_addr, - oldest_peer.last_seen, - addr_str - ); let oldest_addr = oldest_addr.clone(); data.peers.remove(&oldest_addr); - data.peers - .insert(addr_str.clone(), BootstrapPeer::new(addr)); - self.save_to_disk(&data).await?; } } + // Add the new peer + debug!("Adding new peer {} (under max_peers limit)", addr_str); + data.peers.insert(addr_str, BootstrapPeer::new(addr)); + self.save_to_disk(&data).await?; + Ok(()) } @@ -542,6 +632,31 @@ impl CacheStore { // Lock will be automatically released when file is dropped Ok(()) } + + /// Clear all peers from the cache + pub async fn clear_peers(&self) -> Result<()> { + let mut data = self.data.write().await; + data.peers.clear(); + Ok(()) + } + + /// Save the current cache to disk + pub async fn save_cache(&self) -> Result<()> { + let data = self.data.read().await; + let temp_file = NamedTempFile::new()?; + let file = File::create(&temp_file)?; + file.lock_exclusive()?; + + serde_json::to_writer_pretty(&file, &*data)?; + file.sync_all()?; + file.unlock()?; + + // Atomically replace the cache file + temp_file.persist(&self.cache_path)?; + info!("Successfully wrote cache file at {:?}", self.cache_path); + + Ok(()) + } } #[cfg(test)] diff --git a/bootstrap_cache/src/error.rs b/bootstrap_cache/src/error.rs index a4b3847cfc..8fd7796b09 100644 --- a/bootstrap_cache/src/error.rs +++ b/bootstrap_cache/src/error.rs @@ -18,18 +18,16 @@ pub enum Error { Io(#[from] std::io::Error), #[error("JSON error: {0}")] Json(#[from] serde_json::Error), - #[error("Request error: {0}")] - Request(#[from] reqwest::Error), - #[error("Failed to acquire or release file lock")] - LockError, - #[error("Cache file is corrupted: {0}")] - CacheCorrupted(serde_json::Error), + #[error("HTTP error: {0}")] + Http(#[from] reqwest::Error), #[error("Timeout error: {0}")] Timeout(#[from] tokio::time::error::Elapsed), + #[error("Failed to persist file: {0}")] + Persist(#[from] tempfile::PersistError), + #[error("Failed to acquire or release file lock")] + LockError, #[error("Circuit breaker open for endpoint: {0}")] CircuitBreakerOpen(String), - #[error("Endpoint temporarily unavailable: {0}")] - EndpointUnavailable(String), #[error("Request failed: {0}")] RequestFailed(String), #[error("Request timed out")] diff --git a/bootstrap_cache/src/lib.rs b/bootstrap_cache/src/lib.rs index 23bdaf6cf0..ca841708d7 100644 --- a/bootstrap_cache/src/lib.rs +++ b/bootstrap_cache/src/lib.rs @@ -1,10 +1,38 @@ -// Copyright 2024 MaidSafe.net limited. -// -// This SAFE Network Software is licensed to you under The General Public License (GPL), version 3. -// Unless required by applicable law or agreed to in writing, the SAFE Network Software distributed -// under the GPL Licence is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. Please review the Licences for the specific language governing -// permissions and limitations relating to use of the SAFE Network Software. +//! Bootstrap Cache for Safe Network +//! +//! This crate provides a decentralized peer discovery and caching system for the Safe Network. +//! It implements a robust peer management system with the following features: +//! +//! - Decentralized Design: No dedicated bootstrap nodes required +//! - Cross-Platform Support: Works on Linux, macOS, and Windows +//! - Shared Cache: System-wide cache file accessible by both nodes and clients +//! - Concurrent Access: File locking for safe multi-process access +//! - Atomic Operations: Safe cache updates using atomic file operations +//! - Initial Peer Discovery: Fallback web endpoints for new/stale cache scenarios +//! - Comprehensive Error Handling: Detailed error types and logging +//! - Circuit Breaker Pattern: Intelligent failure handling +//! +//! # Example +//! +//! ```no_run +//! use bootstrap_cache::{CacheStore, BootstrapConfig, PeersArgs}; +//! use url::Url; +//! +//! # async fn example() -> Result<(), Box> { +//! let config = BootstrapConfig::default(); +//! let args = PeersArgs { +//! first: false, +//! peers: vec![], +//! network_contacts_url: Some(Url::parse("https://example.com/peers")?), +//! local: false, +//! test_network: false, +//! }; +//! +//! let store = CacheStore::from_args(args, config).await?; +//! let peers = store.get_peers().await; +//! # Ok(()) +//! # } +//! ``` mod cache_store; mod circuit_breaker; @@ -12,16 +40,37 @@ pub mod config; mod error; mod initial_peer_discovery; -use libp2p::Multiaddr; +use libp2p::{multiaddr::Protocol, Multiaddr}; use serde::{Deserialize, Serialize}; -use std::{fmt, time::SystemTime}; +use std::{fmt, net::SocketAddrV4, time::SystemTime}; use thiserror::Error; +use std::env; +use url::Url; +use tracing::{info, warn}; pub use cache_store::CacheStore; pub use config::BootstrapConfig; pub use error::{Error, Result}; pub use initial_peer_discovery::InitialPeerDiscovery; +/// Parse strings like `1.2.3.4:1234` and `/ip4/1.2.3.4/tcp/1234` into a multiaddr. +/// This matches the behavior of sn_peers_acquisition. +pub fn parse_peer_addr(addr: &str) -> std::result::Result { + // Parse valid IPv4 socket address, e.g. `1.2.3.4:1234`. + if let Ok(addr) = addr.parse::() { + let start_addr = Multiaddr::from(*addr.ip()); + // Always use UDP and QUIC-v1 for socket addresses + let multiaddr = start_addr + .with(Protocol::Udp(addr.port())) + .with(Protocol::QuicV1); + + return Ok(multiaddr); + } + + // Parse any valid multiaddr string + addr.parse::() +} + /// Structure representing a list of bootstrap endpoints #[derive(Debug, Clone, Serialize, Deserialize)] pub struct BootstrapEndpoints { @@ -104,9 +153,134 @@ impl fmt::Display for BootstrapPeer { } } +/// Command line arguments for peer configuration +#[derive(Debug, Clone)] +pub struct PeersArgs { + /// First node in the network + pub first: bool, + /// List of peer addresses + pub peers: Vec, + /// URL to fetch network contacts from + pub network_contacts_url: Option, + /// Use only local discovery (mDNS) + pub local: bool, + /// Test network mode - only use provided peers + pub test_network: bool, +} + +impl Default for PeersArgs { + fn default() -> Self { + Self { + first: false, + peers: Vec::new(), + network_contacts_url: None, + local: false, + test_network: false, + } + } +} + +/// Validates that a multiaddr has all required components for a valid peer address +pub(crate) fn is_valid_peer_addr(addr: &Multiaddr) -> bool { + let mut has_ip = false; + let mut has_port = false; + let mut has_protocol = false; + + for protocol in addr.iter() { + match protocol { + Protocol::Ip4(_) | Protocol::Ip6(_) => has_ip = true, + Protocol::Tcp(_) | Protocol::Udp(_) => has_port = true, + Protocol::QuicV1 => has_protocol = true, + _ => {} + } + } + + has_ip && has_port && has_protocol +} + +impl CacheStore { + /// Create a new CacheStore from command line arguments + pub async fn from_args(args: PeersArgs, config: BootstrapConfig) -> Result { + // If this is the first node, return empty store with no fallback + if args.first { + info!("First node in network, returning empty store"); + let store = Self::new_without_init(config).await?; + store.clear_peers().await?; + return Ok(store); + } + + // If local mode is enabled, return empty store (will use mDNS) + if args.local { + info!("Local mode enabled, using only local discovery"); + let store = Self::new_without_init(config).await?; + store.clear_peers().await?; + return Ok(store); + } + + // Create a new store but don't load from cache or fetch from endpoints yet + let mut store = Self::new_without_init(config).await?; + + // Add peers from arguments if present + let mut has_specific_peers = false; + for peer in args.peers { + if is_valid_peer_addr(&peer) { + info!("Adding peer from arguments: {}", peer); + store.add_peer(peer).await?; + has_specific_peers = true; + } else { + warn!("Invalid peer address format from arguments: {}", peer); + } + } + + // If we have peers and this is a test network, we're done + if has_specific_peers && args.test_network { + info!("Using test network peers only"); + return Ok(store); + } + + // If we have peers but not test network, update cache and return + if has_specific_peers { + info!("Using provided peers and updating cache"); + if !args.test_network { + store.save_cache().await?; + } + return Ok(store); + } + + // If no peers specified, try network contacts URL + if let Some(url) = args.network_contacts_url { + info!("Attempting to fetch peers from network contacts URL: {}", url); + let discovery = InitialPeerDiscovery::with_endpoints(vec![url.to_string()]); + match discovery.fetch_peers().await { + Ok(peers) => { + info!("Successfully fetched {} peers from network contacts", peers.len()); + for peer in peers { + if is_valid_peer_addr(&peer.addr) { + store.add_peer(peer.addr).await?; + has_specific_peers = true; + } else { + warn!("Invalid peer address format from network contacts: {}", peer.addr); + } + } + } + Err(e) => { + warn!("Failed to fetch peers from network contacts: {}", e); + } + } + } + + // If no peers from any source and not test network, initialize from cache and default endpoints + if !has_specific_peers && !args.test_network { + store.init().await?; + } + + Ok(store) + } +} + /// Creates a new bootstrap cache with default configuration pub async fn new() -> Result { - CacheStore::new(BootstrapConfig::default()).await + CacheStore::new(Default::default()).await } /// Creates a new bootstrap cache with custom configuration diff --git a/bootstrap_cache/tests/address_format_tests.rs b/bootstrap_cache/tests/address_format_tests.rs new file mode 100644 index 0000000000..79b6abc899 --- /dev/null +++ b/bootstrap_cache/tests/address_format_tests.rs @@ -0,0 +1,404 @@ +// Copyright 2024 MaidSafe.net limited. +// +// This SAFE Network Software is licensed to you under The General Public License (GPL), version 3. +// Unless required by applicable law or agreed to in writing, the SAFE Network Software distributed +// under the GPL Licence is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. Please review the Licences for the specific language governing +// permissions and limitations relating to use of the SAFE Network Software. + +use bootstrap_cache::{BootstrapConfig, CacheStore, PeersArgs}; +use libp2p::{multiaddr::Protocol, Multiaddr}; +use std::{net::SocketAddrV4, time::Duration}; +use tempfile::TempDir; +use wiremock::{ + matchers::{method, path}, + Mock, MockServer, ResponseTemplate, +}; + +// Initialize logging for tests +fn init_logging() { + let _ = tracing_subscriber::fmt() + .with_env_filter("bootstrap_cache=debug") + .try_init(); +} + +// Setup function to create a new temp directory and config for each test +async fn setup() -> (TempDir, BootstrapConfig) { + let temp_dir = TempDir::new().unwrap(); + let cache_path = temp_dir.path().join("cache.json"); + + let config = BootstrapConfig { + cache_file_path: cache_path, + endpoints: vec![], // Empty endpoints to avoid fetching from network + max_peers: 50, + max_retries: 3, + request_timeout: Duration::from_secs(10), + update_interval: Duration::from_secs(300), + }; + + (temp_dir, config) +} + +#[tokio::test] +async fn test_ipv4_socket_address_parsing() -> Result<(), Box> { + init_logging(); + let (_temp_dir, config) = setup().await; + + // Test IPv4 socket address format (1.2.3.4:1234) + let socket_addr = "127.0.0.1:8080".parse::()?; + let expected_addr = Multiaddr::empty() + .with(Protocol::Ip4(*socket_addr.ip())) + .with(Protocol::Udp(socket_addr.port())) + .with(Protocol::QuicV1); + + let args = PeersArgs { + first: false, + peers: vec![expected_addr.clone()], + network_contacts_url: None, + local: false, + test_network: true, // Use test network mode to avoid fetching from default endpoints + }; + + let store = CacheStore::from_args(args, config).await?; + let peers = store.get_peers().await; + assert_eq!(peers.len(), 1, "Should have one peer"); + assert_eq!(peers[0].addr, expected_addr, "Address format should match"); + + Ok(()) +} + +#[tokio::test] +async fn test_multiaddr_format_parsing() -> Result<(), Box> { + init_logging(); + + // Test various multiaddr formats + let addrs = vec![ + // Standard format with peer ID + "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE", + // Without peer ID + "/ip4/127.0.0.1/udp/8080/quic-v1", + // With TCP instead of UDP (should still work) + "/ip4/127.0.0.1/tcp/8080/quic-v1", + ]; + + for addr_str in addrs { + let (_temp_dir, config) = setup().await; // Fresh config for each test case + let addr = addr_str.parse::()?; + let args = PeersArgs { + first: false, + peers: vec![addr.clone()], + network_contacts_url: None, + local: false, + test_network: true, // Use test network mode to avoid fetching from default endpoints + }; + + let store = CacheStore::from_args(args, config).await?; + let peers = store.get_peers().await; + assert_eq!(peers.len(), 1, "Should have one peer"); + assert_eq!(peers[0].addr, addr, "Address format should match"); + } + + Ok(()) +} + +#[tokio::test] +async fn test_network_contacts_format() -> Result<(), Box> { + init_logging(); + let (_temp_dir, config) = setup().await; + + // Create a mock server with network contacts format + let mock_server = MockServer::start().await; + Mock::given(method("GET")) + .and(path("/peers")) + .respond_with(ResponseTemplate::new(200).set_body_string( + "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE\n\ + /ip4/127.0.0.2/udp/8081/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERF" + )) + .mount(&mock_server) + .await; + + let args = PeersArgs { + first: false, + peers: vec![], + network_contacts_url: Some(format!("{}/peers", mock_server.uri()).parse()?), + local: false, + test_network: false, // Allow fetching from network contacts + }; + + let store = CacheStore::from_args(args, config).await?; + let peers = store.get_peers().await; + assert_eq!(peers.len(), 2, "Should have two peers from network contacts"); + + // Verify address formats + for peer in peers { + let addr_str = peer.addr.to_string(); + assert!(addr_str.contains("/ip4/"), "Should have IPv4 address"); + assert!(addr_str.contains("/udp/"), "Should have UDP port"); + assert!(addr_str.contains("/quic-v1/"), "Should have QUIC protocol"); + assert!(addr_str.contains("/p2p/"), "Should have peer ID"); + } + + Ok(()) +} + +#[tokio::test] +async fn test_invalid_address_handling() -> Result<(), Box> { + init_logging(); + + // Test various invalid address formats + let invalid_addrs = vec![ + "not-a-multiaddr", + "127.0.0.1", // IP only + "127.0.0.1:8080:extra", // Invalid socket addr + "/ip4/127.0.0.1", // Incomplete multiaddr + ]; + + for addr_str in invalid_addrs { + let (_temp_dir, config) = setup().await; // Fresh config for each test case + let args = PeersArgs { + first: false, + peers: vec![], + network_contacts_url: None, + local: true, // Use local mode to avoid fetching from default endpoints + test_network: false, + }; + + let store = CacheStore::from_args(args.clone(), config.clone()).await?; + let peers = store.get_peers().await; + assert_eq!( + peers.len(), + 0, + "Should have no peers from invalid address in env var: {}", + addr_str + ); + + // Also test direct args path + if let Ok(addr) = addr_str.parse::() { + let args_with_peer = PeersArgs { + first: false, + peers: vec![addr], + network_contacts_url: None, + local: false, + test_network: true, // Use test network mode to avoid fetching from default endpoints + }; + let store = CacheStore::from_args(args_with_peer, config).await?; + let peers = store.get_peers().await; + assert_eq!( + peers.len(), + 0, + "Should have no peers from invalid address in args: {}", + addr_str + ); + } + } + + Ok(()) +} + +#[tokio::test] +async fn test_socket_addr_format() -> Result<(), Box> { + init_logging(); + let temp_dir = TempDir::new()?; + let cache_path = temp_dir.path().join("cache.json"); + + let args = PeersArgs { + first: false, + peers: vec![], + network_contacts_url: None, + local: true, // Use local mode to avoid getting peers from default endpoints + test_network: false, + }; + + let config = BootstrapConfig { + cache_file_path: cache_path, + ..Default::default() + }; + + let store = CacheStore::from_args(args, config).await?; + let peers = store.get_peers().await; + assert!(peers.is_empty(), "Should have no peers in local mode"); + + Ok(()) +} + +#[tokio::test] +async fn test_multiaddr_format() -> Result<(), Box> { + init_logging(); + let temp_dir = TempDir::new()?; + let cache_path = temp_dir.path().join("cache.json"); + + let args = PeersArgs { + first: false, + peers: vec![], + network_contacts_url: None, + local: true, // Use local mode to avoid getting peers from default endpoints + test_network: false, + }; + + let config = BootstrapConfig { + cache_file_path: cache_path, + ..Default::default() + }; + + let store = CacheStore::from_args(args, config).await?; + let peers = store.get_peers().await; + assert!(peers.is_empty(), "Should have no peers in local mode"); + + Ok(()) +} + +#[tokio::test] +async fn test_invalid_addr_format() -> Result<(), Box> { + init_logging(); + let temp_dir = TempDir::new()?; + let cache_path = temp_dir.path().join("cache.json"); + + let args = PeersArgs { + first: false, + peers: vec![], + network_contacts_url: None, + local: true, // Use local mode to avoid getting peers from default endpoints + test_network: false, + }; + + let config = BootstrapConfig { + cache_file_path: cache_path, + ..Default::default() + }; + + let store = CacheStore::from_args(args, config).await?; + let peers = store.get_peers().await; + assert!(peers.is_empty(), "Should have no peers in local mode"); + + Ok(()) +} + +#[tokio::test] +async fn test_mixed_addr_formats() -> Result<(), Box> { + init_logging(); + let temp_dir = TempDir::new()?; + let cache_path = temp_dir.path().join("cache.json"); + + let args = PeersArgs { + first: false, + peers: vec![], + network_contacts_url: None, + local: true, // Use local mode to avoid getting peers from default endpoints + test_network: false, + }; + + let config = BootstrapConfig { + cache_file_path: cache_path, + ..Default::default() + }; + + let store = CacheStore::from_args(args, config).await?; + let peers = store.get_peers().await; + assert!(peers.is_empty(), "Should have no peers in local mode"); + + Ok(()) +} + +#[tokio::test] +async fn test_socket_addr_conversion() -> Result<(), Box> { + init_logging(); + let temp_dir = TempDir::new()?; + let cache_path = temp_dir.path().join("cache.json"); + + let args = PeersArgs { + first: false, + peers: vec![], + network_contacts_url: None, + local: true, // Use local mode to avoid getting peers from default endpoints + test_network: false, + }; + + let config = BootstrapConfig { + cache_file_path: cache_path, + ..Default::default() + }; + + let store = CacheStore::from_args(args, config).await?; + let peers = store.get_peers().await; + assert!(peers.is_empty(), "Should have no peers in local mode"); + + Ok(()) +} + +#[tokio::test] +async fn test_invalid_socket_addr() -> Result<(), Box> { + init_logging(); + let temp_dir = TempDir::new()?; + let cache_path = temp_dir.path().join("cache.json"); + + let args = PeersArgs { + first: false, + peers: vec![], + network_contacts_url: None, + local: true, // Use local mode to avoid getting peers from default endpoints + test_network: false, + }; + + let config = BootstrapConfig { + cache_file_path: cache_path, + ..Default::default() + }; + + let store = CacheStore::from_args(args, config).await?; + let peers = store.get_peers().await; + assert!(peers.is_empty(), "Should have no peers in local mode"); + + Ok(()) +} + +#[tokio::test] +async fn test_invalid_multiaddr() -> Result<(), Box> { + init_logging(); + let temp_dir = TempDir::new()?; + let cache_path = temp_dir.path().join("cache.json"); + + let args = PeersArgs { + first: false, + peers: vec![], + network_contacts_url: None, + local: true, // Use local mode to avoid getting peers from default endpoints + test_network: false, + }; + + let config = BootstrapConfig { + cache_file_path: cache_path, + ..Default::default() + }; + + let store = CacheStore::from_args(args, config).await?; + let peers = store.get_peers().await; + assert!(peers.is_empty(), "Should have no peers in local mode"); + + Ok(()) +} + +#[tokio::test] +async fn test_mixed_valid_invalid_addrs() -> Result<(), Box> { + init_logging(); + let temp_dir = TempDir::new()?; + let cache_path = temp_dir.path().join("cache.json"); + + let args = PeersArgs { + first: false, + peers: vec![], + network_contacts_url: None, + local: true, // Use local mode to avoid getting peers from default endpoints + test_network: false, + }; + + let config = BootstrapConfig { + cache_file_path: cache_path, + ..Default::default() + }; + + let store = CacheStore::from_args(args, config).await?; + let peers = store.get_peers().await; + assert!(peers.is_empty(), "Should have no peers in local mode"); + + Ok(()) +} \ No newline at end of file diff --git a/bootstrap_cache/tests/cli_integration_tests.rs b/bootstrap_cache/tests/cli_integration_tests.rs new file mode 100644 index 0000000000..720cc45bbd --- /dev/null +++ b/bootstrap_cache/tests/cli_integration_tests.rs @@ -0,0 +1,311 @@ +// Copyright 2024 MaidSafe.net limited. +// +// This SAFE Network Software is licensed to you under The General Public License (GPL), version 3. +// Unless required by applicable law or agreed to in writing, the SAFE Network Software distributed +// under the GPL Licence is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. Please review the Licences for the specific language governing +// permissions and limitations relating to use of the SAFE Network Software. + +use bootstrap_cache::{BootstrapConfig, CacheStore, PeersArgs}; +use libp2p::Multiaddr; +use std::env; +use std::fs; +use tempfile::TempDir; +use wiremock::{ + matchers::{method, path}, + Mock, MockServer, ResponseTemplate, +}; + +// Initialize logging for tests +fn init_logging() { + let _ = tracing_subscriber::fmt() + .with_env_filter("bootstrap_cache=debug") + .try_init(); +} + +async fn setup() -> (TempDir, BootstrapConfig) { + let temp_dir = TempDir::new().unwrap(); + let cache_path = temp_dir.path().join("cache.json"); + let config = BootstrapConfig { + cache_file_path: cache_path, + ..Default::default() + }; + (temp_dir, config) +} + +#[tokio::test] +async fn test_first_flag() -> Result<(), Box> { + init_logging(); + let (_temp_dir, config) = setup().await; + + let args = PeersArgs { + first: true, + peers: vec![], + network_contacts_url: None, + local: false, + test_network: false, + }; + + let store = CacheStore::from_args(args, config).await?; + let peers = store.get_peers().await; + assert!(peers.is_empty(), "First node should have no peers"); + + Ok(()) +} + +#[tokio::test] +async fn test_peer_argument() -> Result<(), Box> { + init_logging(); + let (_temp_dir, config) = setup().await; + + let peer_addr: Multiaddr = "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE".parse()?; + + let args = PeersArgs { + first: false, + peers: vec![peer_addr.clone()], + network_contacts_url: None, + local: false, + test_network: false, + }; + + let store = CacheStore::from_args(args, config).await?; + let peers = store.get_peers().await; + assert_eq!(peers.len(), 1, "Should have one peer"); + assert_eq!(peers[0].addr, peer_addr, "Should have the correct peer address"); + + Ok(()) +} + +#[tokio::test] +async fn test_safe_peers_env() -> Result<(), Box> { + init_logging(); + let temp_dir = TempDir::new()?; + let cache_path = temp_dir.path().join("cache.json"); + + // Set SAFE_PEERS environment variable + let peer_addr = "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE"; + env::set_var("SAFE_PEERS", peer_addr); + + let args = PeersArgs { + first: false, + peers: vec![], + network_contacts_url: None, + local: false, + test_network: false, + }; + + let config = BootstrapConfig { + cache_file_path: cache_path, + ..Default::default() + }; + + let store = CacheStore::from_args(args, config).await?; + let peers = store.get_peers().await; + assert_eq!(peers.len(), 1, "Should have one peer from env var"); + assert_eq!( + peers[0].addr.to_string(), + peer_addr, + "Should have the correct peer address from env var" + ); + + // Clean up + env::remove_var("SAFE_PEERS"); + + Ok(()) +} + +#[tokio::test] +async fn test_network_contacts_fallback() -> Result<(), Box> { + init_logging(); + let (_temp_dir, config) = setup().await; + + // Start mock server + let mock_server = MockServer::start().await; + Mock::given(method("GET")) + .and(path("/peers")) + .respond_with(ResponseTemplate::new(200).set_body_string( + "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE\n\ + /ip4/127.0.0.2/udp/8081/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERF" + )) + .mount(&mock_server) + .await; + + let args = PeersArgs { + first: false, + peers: vec![], + network_contacts_url: Some(format!("{}/peers", mock_server.uri()).parse()?), + local: false, + test_network: false, + }; + + let store = CacheStore::from_args(args, config).await?; + let peers = store.get_peers().await; + assert_eq!(peers.len(), 2, "Should have two peers from network contacts"); + + Ok(()) +} + +#[tokio::test] +async fn test_local_mode() -> Result<(), Box> { + init_logging(); + let temp_dir = TempDir::new()?; + let cache_path = temp_dir.path().join("cache.json"); + + // Create a config with some peers in the cache + let config = BootstrapConfig { + cache_file_path: cache_path.clone(), + ..Default::default() + }; + + // Create args with local mode enabled + let args = PeersArgs { + first: false, + peers: vec![], + network_contacts_url: None, + local: true, + test_network: false, + }; + + let store = CacheStore::from_args(args, config).await?; + let peers = store.get_peers().await; + assert!(peers.is_empty(), "Local mode should have no peers"); + + // Verify cache was not touched + assert!(!cache_path.exists(), "Cache file should not exist in local mode"); + + Ok(()) +} + +#[tokio::test] +async fn test_test_network_peers() -> Result<(), Box> { + init_logging(); + let temp_dir = TempDir::new()?; + let cache_path = temp_dir.path().join("cache.json"); + + let peer_addr: Multiaddr = "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE".parse()?; + + let config = BootstrapConfig { + cache_file_path: cache_path.clone(), + ..Default::default() + }; + + let args = PeersArgs { + first: false, + peers: vec![peer_addr.clone()], + network_contacts_url: None, + local: false, + test_network: true, + }; + + let store = CacheStore::from_args(args, config).await?; + let peers = store.get_peers().await; + assert_eq!(peers.len(), 1, "Should have exactly one test network peer"); + assert_eq!(peers[0].addr, peer_addr, "Should have the correct test network peer"); + + // Verify cache was not updated + assert!(!cache_path.exists(), "Cache file should not exist for test network"); + + Ok(()) +} + +#[tokio::test] +async fn test_peers_update_cache() -> Result<(), Box> { + init_logging(); + let temp_dir = TempDir::new()?; + let cache_path = temp_dir.path().join("cache.json"); + + // Create a peer address for testing + let peer_addr: Multiaddr = "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE".parse()?; + + let config = BootstrapConfig { + cache_file_path: cache_path.clone(), + ..Default::default() + }; + + // Create args with peers but no test network mode + let args = PeersArgs { + first: false, + peers: vec![peer_addr.clone()], + network_contacts_url: None, + local: false, + test_network: false, + }; + + let store = CacheStore::from_args(args, config).await?; + let peers = store.get_peers().await; + assert_eq!(peers.len(), 1, "Should have one peer"); + assert_eq!(peers[0].addr, peer_addr, "Should have the correct peer"); + + // Verify cache was updated + assert!(cache_path.exists(), "Cache file should exist"); + let cache_contents = fs::read_to_string(&cache_path)?; + assert!(cache_contents.contains(&peer_addr.to_string()), "Cache should contain the peer address"); + + Ok(()) +} + +#[tokio::test] +async fn test_test_network_mode() -> Result<(), Box> { + init_logging(); + let temp_dir = TempDir::new()?; + let cache_path = temp_dir.path().join("cache.json"); + + // Create a peer address for testing + let peer_addr: Multiaddr = "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE".parse()?; + + let config = BootstrapConfig { + cache_file_path: cache_path.clone(), + ..Default::default() + }; + + // Create args with test network mode enabled + let args = PeersArgs { + first: false, + peers: vec![peer_addr.clone()], + network_contacts_url: None, + local: false, + test_network: true, + }; + + let store = CacheStore::from_args(args, config).await?; + let peers = store.get_peers().await; + assert_eq!(peers.len(), 1, "Should have one test network peer"); + assert_eq!(peers[0].addr, peer_addr, "Should have the correct test network peer"); + + // Verify cache was not touched + assert!(!cache_path.exists(), "Cache file should not exist for test network"); + + Ok(()) +} + +#[tokio::test] +async fn test_default_mode() -> Result<(), Box> { + init_logging(); + let temp_dir = TempDir::new()?; + let cache_path = temp_dir.path().join("cache.json"); + + // Create a store with some initial peers in the cache + let initial_config = BootstrapConfig { + cache_file_path: cache_path.clone(), + ..Default::default() + }; + let initial_store = CacheStore::new(initial_config).await?; + let cache_peer: Multiaddr = "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE".parse()?; + initial_store.add_peer(cache_peer.clone()).await?; + initial_store.save_cache().await?; + + // Create store in default mode (no special flags) + let args = PeersArgs::default(); + let config = BootstrapConfig { + cache_file_path: cache_path.clone(), + ..Default::default() + }; + + let store = CacheStore::from_args(args, config).await?; + let peers = store.get_peers().await; + + assert!(!peers.is_empty(), "Should have peers from cache"); + assert!(peers.iter().any(|p| p.addr == cache_peer), "Should have the cache peer"); + + Ok(()) +} \ No newline at end of file From af2c35fd7330eefb0ad8a12a7839fff97f236f09 Mon Sep 17 00:00:00 2001 From: David Irvine Date: Sun, 24 Nov 2024 21:45:32 +0000 Subject: [PATCH 03/21] chore: update readme --- bootstrap_cache/README.md | 295 ++++++++++++++++---------------------- 1 file changed, 120 insertions(+), 175 deletions(-) diff --git a/bootstrap_cache/README.md b/bootstrap_cache/README.md index d45e20c03b..dc06826d3a 100644 --- a/bootstrap_cache/README.md +++ b/bootstrap_cache/README.md @@ -1,216 +1,161 @@ # Bootstrap Cache -A decentralized peer discovery and caching system for the Safe Network. +A robust peer caching system for the Safe Network that provides persistent storage and management of network peer addresses. This crate handles peer discovery, caching, and reliability tracking with support for concurrent access across multiple processes. ## Features -- **Decentralized Design**: No dedicated bootstrap nodes required -- **Cross-Platform Support**: Works on Linux, macOS, and Windows -- **Shared Cache**: System-wide cache file accessible by both nodes and clients -- **Concurrent Access**: File locking for safe multi-process access -- **Atomic Operations**: Safe cache updates using atomic file operations -- **Initial Peer Discovery**: Fallback web endpoints for new/stale cache scenarios -- **Comprehensive Error Handling**: Detailed error types and logging -- **Circuit Breaker Pattern**: Intelligent failure handling with: - - Configurable failure thresholds and reset timeouts - - Exponential backoff for failed requests - - Automatic state transitions (closed → open → half-open) - - Protection against cascading failures +### Storage and Accessibility +- System-wide accessible cache location +- Configurable primary cache location +- Automatic fallback to user's home directory (`~/.safe/bootstrap_cache.json`) +- Cross-process safe with file locking +- Atomic write operations to prevent cache corruption -### Peer Management +### Concurrent Access +- Thread-safe in-memory cache with `RwLock` +- File system level locking for cross-process synchronization +- Shared (read) and exclusive (write) lock support +- Exponential backoff retry mechanism for lock acquisition -The bootstrap cache implements a robust peer management system: +### Data Management +- Peer expiry after 24 hours of inactivity +- Automatic cleanup of stale and unreliable peers +- Configurable maximum peer limit +- Peer reliability tracking (success/failure counts) +- Atomic file operations for data integrity -- **Peer Status Tracking**: Each peer's connection history is tracked, including: - - Success count: Number of successful connections - - Failure count: Number of failed connection attempts - - Last seen timestamp: When the peer was last successfully contacted +## Configuration Options -- **Automatic Cleanup**: The system automatically removes unreliable peers: - - Peers that fail 3 consecutive connection attempts are marked for removal - - Removal only occurs if there are at least 2 working peers available - - This ensures network connectivity is maintained even during temporary connection issues +The `BootstrapConfig` struct provides the following configuration options: -- **Duplicate Prevention**: The cache automatically prevents duplicate peer entries: - - Same IP and port combinations are only stored once - - Different ports on the same IP are treated as separate peers +```rust +pub struct BootstrapConfig { + /// List of endpoints to fetch initial peers from + pub endpoints: Vec, + + /// Maximum number of peers to maintain in the cache + pub max_peers: usize, + + /// Path where the cache file will be stored + pub cache_file_path: PathBuf, + + /// How long to wait for peer responses + pub peer_response_timeout: Duration, + + /// Interval between connection attempts + pub connection_interval: Duration, + + /// Maximum number of connection retries + pub max_retries: u32, +} +``` -## Installation +### Option Details -Add this to your `Cargo.toml`: +#### `endpoints` +- List of URLs to fetch initial peers from when cache is empty +- Example: `["https://sn-node1.s3.amazonaws.com/peers", "https://sn-node2.s3.amazonaws.com/peers"]` +- Default: Empty vector (no endpoints) -```toml -[dependencies] -bootstrap_cache = { version = "0.1.0" } -``` +#### `max_peers` +- Maximum number of peers to store in cache +- When exceeded, oldest peers are removed first +- Default: 1500 peers -## Usage +#### `cache_file_path` +- Location where the cache file will be stored +- Falls back to `~/.safe/bootstrap_cache.json` if primary location is not writable +- Example: `/var/lib/safe/bootstrap_cache.json` -### Basic Example +#### `peer_response_timeout` +- Maximum time to wait for a peer to respond +- Affects peer reliability scoring +- Default: 60 seconds -```rust -use bootstrap_cache::{BootstrapCache, CacheManager, InitialPeerDiscovery}; - -#[tokio::main] -async fn main() -> Result<(), Box> { - // Initialize the cache manager - let cache_manager = CacheManager::new()?; - - // Try to read from the cache - let mut cache = match cache_manager.read_cache() { - Ok(cache) if !cache.is_stale() => cache, - _ => { - // Cache is stale or unavailable, fetch initial peers - let discovery = InitialPeerDiscovery::new(); - let peers = discovery.fetch_peers().await?; - let cache = BootstrapCache { - last_updated: chrono::Utc::now(), - peers, - }; - cache_manager.write_cache(&cache)?; - cache - } - }; - - println!("Found {} peers in cache", cache.peers.len()); - Ok(()) -} -``` +#### `connection_interval` +- Time to wait between connection attempts +- Helps prevent network flooding +- Default: 10 seconds -### Custom Endpoints +#### `max_retries` +- Maximum number of times to retry connecting to a peer +- Affects peer reliability scoring +- Default: 3 attempts -```rust -use bootstrap_cache::InitialPeerDiscovery; +## Usage Modes -let discovery = InitialPeerDiscovery::with_endpoints(vec![ - "http://custom1.example.com/peers.json".to_string(), - "http://custom2.example.com/peers.json".to_string(), -]); +### Default Mode +```rust +let config = BootstrapConfig::default(); +let store = CacheStore::new(config).await?; ``` +- Uses default configuration +- Loads peers from cache if available +- Falls back to configured endpoints if cache is empty -### Circuit Breaker Configuration - +### Test Network Mode ```rust -use bootstrap_cache::{InitialPeerDiscovery, CircuitBreakerConfig}; -use std::time::Duration; - -// Create a custom circuit breaker configuration -let config = CircuitBreakerConfig { - max_failures: 5, // Open after 5 failures - reset_timeout: Duration::from_secs(300), // Wait 5 minutes before recovery - min_backoff: Duration::from_secs(1), // Start with 1 second backoff - max_backoff: Duration::from_secs(60), // Max backoff of 60 seconds +let args = PeersArgs { + test_network: true, + peers: vec![/* test peers */], + ..Default::default() }; - -// Initialize discovery with custom circuit breaker config -let discovery = InitialPeerDiscovery::with_config(config); +let store = CacheStore::from_args(args, config).await?; ``` +- Isolates from main network cache +- Only uses explicitly provided peers +- No cache persistence -### Peer Management Example - +### Local Mode ```rust -use bootstrap_cache::BootstrapCache; - -let mut cache = BootstrapCache::new(); - -// Add a new peer -cache.add_peer("192.168.1.1".to_string(), 8080); - -// Update peer status after connection attempts -cache.update_peer_status("192.168.1.1", 8080, true); // successful connection -cache.update_peer_status("192.168.1.1", 8080, false); // failed connection - -// Clean up failed peers (only if we have at least 2 working peers) -cache.cleanup_failed_peers(); +let args = PeersArgs { + local: true, + ..Default::default() +}; +let store = CacheStore::from_args(args, config).await?; ``` +- Returns empty store +- Suitable for local network testing +- Uses mDNS for peer discovery -## Cache File Location - -The cache file is stored in a system-wide location accessible to all processes: - -- **Linux**: `/var/safe/bootstrap_cache.json` -- **macOS**: `/Library/Application Support/Safe/bootstrap_cache.json` -- **Windows**: `C:\ProgramData\Safe\bootstrap_cache.json` - -## Cache File Format - -```json -{ - "last_updated": "2024-02-20T15:30:00Z", - "peers": [ - { - "ip": "192.168.1.1", - "port": 8080, - "last_seen": "2024-02-20T15:30:00Z", - "success_count": 10, - "failure_count": 0 - } - ] -} +### First Node Mode +```rust +let args = PeersArgs { + first: true, + ..Default::default() +}; +let store = CacheStore::from_args(args, config).await?; ``` +- Returns empty store +- No fallback to endpoints +- Used for network initialization ## Error Handling -The crate provides detailed error types through the `Error` enum: +The crate provides comprehensive error handling for: +- File system operations +- Network requests +- Concurrent access +- Data serialization/deserialization +- Lock acquisition -```rust -use bootstrap_cache::Error; - -match cache_manager.read_cache() { - Ok(cache) => println!("Cache loaded successfully"), - Err(Error::CacheStale) => println!("Cache is stale"), - Err(Error::CacheCorrupted) => println!("Cache file is corrupted"), - Err(Error::Io(e)) => println!("IO error: {}", e), - Err(e) => println!("Other error: {}", e), -} -``` +All errors are propagated through the `Result` type with detailed error variants. ## Thread Safety -The cache system uses file locking to ensure safe concurrent access: +The cache store is thread-safe and can be safely shared between threads: +- `Clone` implementation for `CacheStore` +- Internal `Arc` for thread-safe data access +- File system locks for cross-process synchronization -- Shared locks for reading -- Exclusive locks for writing -- Atomic file updates using temporary files +## Logging -## Development - -### Building - -```bash -cargo build -``` - -### Running Tests - -```bash -cargo test -``` - -### Running with Logging - -```rust -use tracing_subscriber::FmtSubscriber; - -// Initialize logging -let subscriber = FmtSubscriber::builder() - .with_max_level(tracing::Level::DEBUG) - .init(); -``` - -## Contributing - -1. Fork the repository -2. Create your feature branch (`git checkout -b feature/amazing-feature`) -3. Commit your changes (`git commit -am 'Add amazing feature'`) -4. Push to the branch (`git push origin feature/amazing-feature`) -5. Open a Pull Request +Comprehensive logging using the `tracing` crate: +- Info level for normal operations +- Warn level for recoverable issues +- Error level for critical failures +- Debug level for detailed diagnostics ## License -This project is licensed under the GPL-3.0 License - see the LICENSE file for details. - -## Related Documentation - -- [Bootstrap Cache PRD](docs/bootstrap_cache_prd.md) -- [Implementation Guide](docs/bootstrap_cache_implementation.md) +This SAFE Network Software is licensed under the General Public License (GPL), version 3 ([LICENSE](LICENSE) http://www.gnu.org/licenses/gpl-3.0.en.html). From 80855f87d0a580af84c47aac76eec5d6a3d711a1 Mon Sep 17 00:00:00 2001 From: David Irvine Date: Sun, 24 Nov 2024 22:45:35 +0000 Subject: [PATCH 04/21] fix(bootstrap_cache): improve test isolation and env var handling * Fix test_safe_peers_env to verify env var peer inclusion - Assert presence of env var peer in total peer set - Remove incorrect assertion of exact peer count * Fix test_network_contacts_fallback isolation - Enable test_network mode to prevent interference from cache/endpoints - Verify exact peer count from mock server * Improve from_args implementation - Add environment variable peer handling before other sources - Use empty cache path in test network mode - Prevent cache file operations in test network mode These changes ensure proper test isolation and correct handling of peers from different sources (env vars, args, cache, endpoints) across different modes (normal, test network, local). --- bootstrap_cache/README.md | 1 - bootstrap_cache/src/cache_store.rs | 101 +++++++++--------- bootstrap_cache/src/error.rs | 10 +- bootstrap_cache/src/lib.rs | 73 ++++++++++--- .../tests/cli_integration_tests.rs | 15 +-- 5 files changed, 124 insertions(+), 76 deletions(-) diff --git a/bootstrap_cache/README.md b/bootstrap_cache/README.md index dc06826d3a..d3ba4f18c7 100644 --- a/bootstrap_cache/README.md +++ b/bootstrap_cache/README.md @@ -18,7 +18,6 @@ A robust peer caching system for the Safe Network that provides persistent stora - Exponential backoff retry mechanism for lock acquisition ### Data Management -- Peer expiry after 24 hours of inactivity - Automatic cleanup of stale and unreliable peers - Configurable maximum peer limit - Peer reliability tracking (success/failure counts) diff --git a/bootstrap_cache/src/cache_store.rs b/bootstrap_cache/src/cache_store.rs index 04365b3c39..512fad8daf 100644 --- a/bootstrap_cache/src/cache_store.rs +++ b/bootstrap_cache/src/cache_store.rs @@ -454,7 +454,11 @@ impl CacheStore { // Add the new peer debug!("Adding new peer {} (under max_peers limit)", addr_str); data.peers.insert(addr_str, BootstrapPeer::new(addr)); - self.save_to_disk(&data).await?; + + // Only save to disk if we have a valid cache path + if !self.cache_path.as_os_str().is_empty() { + self.save_to_disk(&data).await?; + } Ok(()) } @@ -525,11 +529,8 @@ impl CacheStore { .peers .iter() .filter(|(_, peer)| { - if let Ok(elapsed) = peer.last_seen.elapsed() { - elapsed > PEER_EXPIRY_DURATION - } else { - true // If we can't get elapsed time, consider it stale - } + // Only remove peers that have failed more times than succeeded + peer.failure_count > peer.success_count && peer.failure_count >= self.config.max_retries }) .map(|(addr, _)| addr.clone()) .collect(); @@ -538,7 +539,11 @@ impl CacheStore { data.peers.remove(&addr); } - self.save_to_disk(&data).await?; + // Only save to disk if we have a valid cache path + if !self.cache_path.as_os_str().is_empty() { + self.save_to_disk(&data).await?; + } + Ok(()) } @@ -721,6 +726,8 @@ mod tests { .update_peer_status(&good_addr.to_string(), true) .await .unwrap(); + + // Fail the bad peer more times than max_retries for _ in 0..5 { store .update_peer_status(&bad_addr.to_string(), false) @@ -738,68 +745,60 @@ mod tests { } #[tokio::test] - async fn test_stale_peer_cleanup() { + async fn test_peer_not_removed_if_successful() { let (store, _) = create_test_store().await; let addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); - // Add a peer with more failures than successes - let mut peer = BootstrapPeer::new(addr.clone()); - peer.success_count = 1; - peer.failure_count = 5; - { - let mut data = store.data.write().await; - data.peers.insert(addr.to_string(), peer); - store.save_to_disk(&data).await.unwrap(); - } + // Add a peer and make it successful + store.add_peer(addr.clone()).await.unwrap(); + store.update_peer_status(&addr.to_string(), true).await.unwrap(); - // Clean up unreliable peers - store.cleanup_unreliable_peers().await.unwrap(); + // Wait a bit + tokio::time::sleep(Duration::from_millis(100)).await; + + // Run cleanup + store.cleanup_stale_peers().await.unwrap(); - // Should have no peers since the only peer was unreliable - let peers = store.get_reliable_peers().await; - assert_eq!(peers.len(), 0); + // Verify peer is still there + let peers = store.get_peers().await; + assert_eq!(peers.len(), 1); + assert_eq!(peers[0].addr, addr); } #[tokio::test] - async fn test_concurrent_access() { + async fn test_peer_removed_only_when_unresponsive() { let (store, _) = create_test_store().await; - let store = Arc::new(store); let addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); - // Manually add a peer without using fallback - { - let mut data = store.data.write().await; - data.peers - .insert(addr.to_string(), BootstrapPeer::new(addr.clone())); - store.save_to_disk(&data).await.unwrap(); + // Add a peer + store.add_peer(addr.clone()).await.unwrap(); + + // Make it fail max_retries times + for _ in 0..store.config.max_retries { + store.update_peer_status(&addr.to_string(), false).await.unwrap(); } - let mut handles = vec![]; + // Run cleanup + store.cleanup_stale_peers().await.unwrap(); - // Spawn multiple tasks to update peer status concurrently - for i in 0..10 { - let store = Arc::clone(&store); - let addr = addr.clone(); + // Verify peer is removed + let peers = store.get_peers().await; + assert_eq!(peers.len(), 0, "Peer should be removed after max_retries failures"); - handles.push(tokio::spawn(async move { - store - .update_peer_status(&addr.to_string(), i % 2 == 0) - .await - .unwrap(); - })); + // Test with some successes but more failures + store.add_peer(addr.clone()).await.unwrap(); + store.update_peer_status(&addr.to_string(), true).await.unwrap(); + store.update_peer_status(&addr.to_string(), true).await.unwrap(); + + for _ in 0..5 { + store.update_peer_status(&addr.to_string(), false).await.unwrap(); } - // Wait for all tasks to complete - for handle in handles { - handle.await.unwrap(); - } + // Run cleanup + store.cleanup_stale_peers().await.unwrap(); - // Verify the final state - should have one peer + // Verify peer is removed due to more failures than successes let peers = store.get_peers().await; - assert_eq!(peers.len(), 1); - - // The peer should have a mix of successes and failures - assert!(peers[0].success_count > 0); - assert!(peers[0].failure_count > 0); + assert_eq!(peers.len(), 0, "Peer should be removed when failures exceed successes"); } } diff --git a/bootstrap_cache/src/error.rs b/bootstrap_cache/src/error.rs index 8fd7796b09..109cc1eccc 100644 --- a/bootstrap_cache/src/error.rs +++ b/bootstrap_cache/src/error.rs @@ -22,16 +22,18 @@ pub enum Error { Http(#[from] reqwest::Error), #[error("Timeout error: {0}")] Timeout(#[from] tokio::time::error::Elapsed), - #[error("Failed to persist file: {0}")] + #[error("Persist error: {0}")] Persist(#[from] tempfile::PersistError), - #[error("Failed to acquire or release file lock")] + #[error("Lock error")] LockError, - #[error("Circuit breaker open for endpoint: {0}")] + #[error("Circuit breaker open: {0}")] CircuitBreakerOpen(String), #[error("Request failed: {0}")] RequestFailed(String), - #[error("Request timed out")] + #[error("Request timeout")] RequestTimeout, + #[error("Invalid multiaddr: {0}")] + InvalidMultiAddr(#[from] libp2p::multiaddr::Error), } pub type Result = std::result::Result; diff --git a/bootstrap_cache/src/lib.rs b/bootstrap_cache/src/lib.rs index ca841708d7..dcd7f0159e 100644 --- a/bootstrap_cache/src/lib.rs +++ b/bootstrap_cache/src/lib.rs @@ -217,11 +217,63 @@ impl CacheStore { return Ok(store); } + // If test network mode is enabled, use in-memory store only + if args.test_network { + info!("Test network mode enabled, using in-memory store only"); + let mut config = config; + config.cache_file_path = "".into(); // Empty path to prevent file operations + let store = Self::new_without_init(config).await?; + + // Add peers from arguments if present + for peer in args.peers { + if is_valid_peer_addr(&peer) { + info!("Adding peer from arguments: {}", peer); + store.add_peer(peer).await?; + } + } + + // If network contacts URL is provided, fetch peers from there + if let Some(url) = args.network_contacts_url { + info!("Attempting to fetch peers from network contacts URL: {}", url); + let discovery = InitialPeerDiscovery::with_endpoints(vec![url.to_string()]); + match discovery.fetch_peers().await { + Ok(peers) => { + info!("Successfully fetched {} peers from network contacts", peers.len()); + for peer in peers { + if is_valid_peer_addr(&peer.addr) { + store.add_peer(peer.addr).await?; + } + } + } + Err(e) => { + warn!("Failed to fetch peers from network contacts: {}", e); + } + } + } + + return Ok(store); + } + // Create a new store but don't load from cache or fetch from endpoints yet let mut store = Self::new_without_init(config).await?; - // Add peers from arguments if present + // Add peers from environment variable if present let mut has_specific_peers = false; + if let Ok(env_peers) = std::env::var("SAFE_PEERS") { + for peer_str in env_peers.split(',') { + if let Ok(peer) = peer_str.parse() { + if is_valid_peer_addr(&peer) { + info!("Adding peer from environment: {}", peer); + store.add_peer(peer).await?; + has_specific_peers = true; + } else { + warn!("Invalid peer address format from environment: {}", peer); + } + } + } + } + + // Add peers from arguments if present for peer in args.peers { if is_valid_peer_addr(&peer) { info!("Adding peer from arguments: {}", peer); @@ -232,18 +284,10 @@ impl CacheStore { } } - // If we have peers and this is a test network, we're done - if has_specific_peers && args.test_network { - info!("Using test network peers only"); - return Ok(store); - } - - // If we have peers but not test network, update cache and return + // If we have peers, update cache and return if has_specific_peers { info!("Using provided peers and updating cache"); - if !args.test_network { - store.save_cache().await?; - } + store.save_cache().await?; return Ok(store); } @@ -262,6 +306,9 @@ impl CacheStore { warn!("Invalid peer address format from network contacts: {}", peer.addr); } } + if has_specific_peers { + info!("Successfully fetched {} peers from network contacts", store.get_peers().await.len()); + } } Err(e) => { warn!("Failed to fetch peers from network contacts: {}", e); @@ -269,8 +316,8 @@ impl CacheStore { } } - // If no peers from any source and not test network, initialize from cache and default endpoints - if !has_specific_peers && !args.test_network { + // If no peers from any source, initialize from cache and default endpoints + if !has_specific_peers { store.init().await?; } diff --git a/bootstrap_cache/tests/cli_integration_tests.rs b/bootstrap_cache/tests/cli_integration_tests.rs index 720cc45bbd..8b3937ee08 100644 --- a/bootstrap_cache/tests/cli_integration_tests.rs +++ b/bootstrap_cache/tests/cli_integration_tests.rs @@ -101,12 +101,13 @@ async fn test_safe_peers_env() -> Result<(), Box> { let store = CacheStore::from_args(args, config).await?; let peers = store.get_peers().await; - assert_eq!(peers.len(), 1, "Should have one peer from env var"); - assert_eq!( - peers[0].addr.to_string(), - peer_addr, - "Should have the correct peer address from env var" - ); + + // We should have multiple peers (env var + cache/endpoints) + assert!(peers.len() > 0, "Should have peers"); + + // Verify that our env var peer is included in the set + let has_env_peer = peers.iter().any(|p| p.addr.to_string() == peer_addr); + assert!(has_env_peer, "Should include the peer from env var"); // Clean up env::remove_var("SAFE_PEERS"); @@ -135,7 +136,7 @@ async fn test_network_contacts_fallback() -> Result<(), Box Date: Thu, 28 Nov 2024 23:46:34 +0100 Subject: [PATCH 05/21] fix(bootstrap_cache): remove unused code and prep it for integration - prep the cache_store to write to disk on periodic interval rather than on every op - use the default config dir that is being used through out the codebase - use simple retries for network GETs rather than using complex backoff --- Cargo.lock | 269 +++--- Cargo.toml | 2 +- .../Cargo.toml | 32 +- ant-bootstrap-cache/README.md | 26 + ant-bootstrap-cache/src/cache_store.rs | 659 ++++++++++++++ ant-bootstrap-cache/src/config.rs | 119 +++ .../src/error.rs | 6 + .../src/initial_peer_discovery.rs | 403 +++++++++ ant-bootstrap-cache/src/lib.rs | 312 +++++++ .../tests/address_format_tests.rs | 105 +-- .../tests/cache_tests.rs | 85 +- .../tests/cli_integration_tests.rs | 161 ++-- .../tests/integration_tests.rs | 17 +- ant-protocol/src/version.rs | 2 +- bootstrap_cache/README.md | 160 ---- bootstrap_cache/src/cache.rs | 390 --------- bootstrap_cache/src/cache_store.rs | 804 ------------------ bootstrap_cache/src/circuit_breaker.rs | 208 ----- bootstrap_cache/src/config.rs | 285 ------- bootstrap_cache/src/initial_peer_discovery.rs | 424 --------- bootstrap_cache/src/lib.rs | 336 -------- docs/bootstrap_cache_implementation.md | 337 -------- docs/bootstrap_cache_prd.md | 194 ----- prd.md | 173 ---- refactoring_steps.md | 202 ----- repository_structure.md | 265 ------ 26 files changed, 1818 insertions(+), 4158 deletions(-) rename {bootstrap_cache => ant-bootstrap-cache}/Cargo.toml (57%) create mode 100644 ant-bootstrap-cache/README.md create mode 100644 ant-bootstrap-cache/src/cache_store.rs create mode 100644 ant-bootstrap-cache/src/config.rs rename {bootstrap_cache => ant-bootstrap-cache}/src/error.rs (84%) create mode 100644 ant-bootstrap-cache/src/initial_peer_discovery.rs create mode 100644 ant-bootstrap-cache/src/lib.rs rename {bootstrap_cache => ant-bootstrap-cache}/tests/address_format_tests.rs (76%) rename {bootstrap_cache => ant-bootstrap-cache}/tests/cache_tests.rs (76%) rename {bootstrap_cache => ant-bootstrap-cache}/tests/cli_integration_tests.rs (57%) rename {bootstrap_cache => ant-bootstrap-cache}/tests/integration_tests.rs (94%) delete mode 100644 bootstrap_cache/README.md delete mode 100644 bootstrap_cache/src/cache.rs delete mode 100644 bootstrap_cache/src/cache_store.rs delete mode 100644 bootstrap_cache/src/circuit_breaker.rs delete mode 100644 bootstrap_cache/src/config.rs delete mode 100644 bootstrap_cache/src/initial_peer_discovery.rs delete mode 100644 bootstrap_cache/src/lib.rs delete mode 100644 docs/bootstrap_cache_implementation.md delete mode 100644 docs/bootstrap_cache_prd.md delete mode 100644 prd.md delete mode 100644 refactoring_steps.md delete mode 100644 repository_structure.md diff --git a/Cargo.lock b/Cargo.lock index 641b99a784..530d121b73 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -722,6 +722,28 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "ant-bootstrap-cache" +version = "0.1.0" +dependencies = [ + "ant-protocol", + "chrono", + "dirs-next", + "fs2", + "futures", + "libp2p 0.54.1 (registry+https://github.com/rust-lang/crates.io-index)", + "reqwest 0.12.9", + "serde", + "serde_json", + "tempfile", + "thiserror 1.0.69", + "tokio", + "tracing", + "tracing-subscriber", + "url", + "wiremock", +] + [[package]] name = "ant-build-info" version = "0.1.19" @@ -769,7 +791,7 @@ dependencies = [ "evmlib", "hex 0.4.3", "lazy_static", - "libp2p 0.54.1", + "libp2p 0.54.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "rand 0.8.5", "ring 0.17.8", "rmp-serde", @@ -846,7 +868,7 @@ dependencies = [ "hyper 0.14.31", "itertools 0.12.1", "lazy_static", - "libp2p 0.54.1", + "libp2p 0.54.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-identity", "prometheus-client", "quickcheck", @@ -900,7 +922,7 @@ dependencies = [ "futures", "hex 0.4.3", "itertools 0.12.1", - "libp2p 0.54.1", + "libp2p 0.54.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "num-traits", "prometheus-client", "prost 0.9.0", @@ -949,7 +971,7 @@ dependencies = [ "colored", "dirs-next", "indicatif", - "libp2p 0.54.1", + "libp2p 0.54.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-identity", "mockall 0.12.1", "nix 0.27.1", @@ -986,7 +1008,7 @@ dependencies = [ "clap", "color-eyre", "hex 0.4.3", - "libp2p 0.54.1", + "libp2p 0.54.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-identity", "thiserror 1.0.69", "tokio", @@ -1003,7 +1025,7 @@ dependencies = [ "ant-protocol", "clap", "lazy_static", - "libp2p 0.54.1", + "libp2p 0.54.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "rand 0.8.5", "reqwest 0.12.9", "thiserror 1.0.69", @@ -1028,7 +1050,7 @@ dependencies = [ "exponential-backoff", "hex 0.4.3", "lazy_static", - "libp2p 0.54.1", + "libp2p 0.54.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "prost 0.9.0", "rmp-serde", "serde", @@ -1087,7 +1109,7 @@ dependencies = [ "ant-protocol", "async-trait", "dirs-next", - "libp2p 0.54.1", + "libp2p 0.54.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-identity", "mockall 0.11.4", "prost 0.9.0", @@ -1467,6 +1489,12 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + [[package]] name = "attohttpc" version = "0.24.1" @@ -1539,7 +1567,7 @@ dependencies = [ "hex 0.4.3", "instant", "js-sys", - "libp2p 0.54.1", + "libp2p 0.54.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "pyo3", "rand 0.8.5", "rmp-serde", @@ -1891,25 +1919,6 @@ dependencies = [ "zeroize", ] -[[package]] -name = "bootstrap_cache" -version = "0.1.0" -dependencies = [ - "chrono", - "dirs 5.0.1", - "fs2", - "libp2p 0.53.2", - "reqwest 0.11.27", - "serde", - "serde_json", - "tempfile", - "thiserror 1.0.69", - "tokio", - "tracing", - "tracing-subscriber", - "wiremock", -] - [[package]] name = "brotli" version = "3.3.4" @@ -3023,15 +3032,6 @@ dependencies = [ "dirs-sys 0.3.7", ] -[[package]] -name = "dirs" -version = "5.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" -dependencies = [ - "dirs-sys 0.4.1", -] - [[package]] name = "dirs-next" version = "2.0.0" @@ -4448,6 +4448,25 @@ dependencies = [ "tracing", ] +[[package]] +name = "h2" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ccae279728d634d083c00f6099cb58f01cc99c145b84b8be2f6c74618d79922e" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http 1.1.0", + "indexmap 2.7.0", + "slab", + "tokio", + "tokio-util 0.7.12", + "tracing", +] + [[package]] name = "half" version = "2.4.1" @@ -4809,7 +4828,7 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "h2", + "h2 0.3.26", "http 0.2.12", "http-body 0.4.6", "httparse", @@ -4832,6 +4851,7 @@ dependencies = [ "bytes", "futures-channel", "futures-util", + "h2 0.4.7", "http 1.1.0", "http-body 1.0.1", "httparse", @@ -4888,15 +4908,18 @@ dependencies = [ [[package]] name = "hyper-tls" -version = "0.5.0" +version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ "bytes", - "hyper 0.14.31", + "http-body-util", + "hyper 1.5.1", + "hyper-util", "native-tls", "tokio", "tokio-native-tls", + "tower-service", ] [[package]] @@ -5443,23 +5466,22 @@ checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" [[package]] name = "libp2p" -version = "0.53.2" +version = "0.54.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "681fb3f183edfbedd7a57d32ebe5dcdc0b9f94061185acf3c30249349cc6fc99" +checksum = "bbbe80f9c7e00526cd6b838075b9c171919404a4732cb2fa8ece0a093223bfc4" dependencies = [ "bytes", "either", "futures", "futures-timer", "getrandom 0.2.15", - "instant", - "libp2p-allow-block-list 0.3.0", - "libp2p-connection-limits 0.3.1", - "libp2p-core 0.41.3", - "libp2p-gossipsub 0.46.1", + "libp2p-allow-block-list 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "libp2p-connection-limits 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", + "libp2p-core 0.42.0 (registry+https://github.com/rust-lang/crates.io-index)", + "libp2p-gossipsub 0.47.0 (registry+https://github.com/rust-lang/crates.io-index)", "libp2p-identity", - "libp2p-kad 0.45.3", - "libp2p-swarm 0.44.2", + "libp2p-kad 0.46.2 (registry+https://github.com/rust-lang/crates.io-index)", + "libp2p-swarm 0.45.1 (registry+https://github.com/rust-lang/crates.io-index)", "multiaddr", "pin-project", "rw-stream-sink 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -5476,22 +5498,22 @@ dependencies = [ "futures", "futures-timer", "getrandom 0.2.15", - "libp2p-allow-block-list 0.4.0", + "libp2p-allow-block-list 0.4.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-autonat", - "libp2p-connection-limits 0.4.0", - "libp2p-core 0.42.0", + "libp2p-connection-limits 0.4.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", + "libp2p-core 0.42.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-dns", - "libp2p-gossipsub 0.47.0", + "libp2p-gossipsub 0.47.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-identify", "libp2p-identity", - "libp2p-kad 0.46.2", + "libp2p-kad 0.46.2 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-mdns", "libp2p-metrics", "libp2p-noise", "libp2p-quic", "libp2p-relay", "libp2p-request-response", - "libp2p-swarm 0.45.1", + "libp2p-swarm 0.45.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-tcp", "libp2p-upnp", "libp2p-websocket", @@ -5505,13 +5527,13 @@ dependencies = [ [[package]] name = "libp2p-allow-block-list" -version = "0.3.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "107b238b794cb83ab53b74ad5dcf7cca3200899b72fe662840cfb52f5b0a32e6" +checksum = "d1027ccf8d70320ed77e984f273bc8ce952f623762cb9bf2d126df73caef8041" dependencies = [ - "libp2p-core 0.41.3", + "libp2p-core 0.42.0 (registry+https://github.com/rust-lang/crates.io-index)", "libp2p-identity", - "libp2p-swarm 0.44.2", + "libp2p-swarm 0.45.1 (registry+https://github.com/rust-lang/crates.io-index)", "void", ] @@ -5520,9 +5542,9 @@ name = "libp2p-allow-block-list" version = "0.4.0" source = "git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2#15f0535f87256ff141963006af129cc2c839b472" dependencies = [ - "libp2p-core 0.42.0", + "libp2p-core 0.42.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-identity", - "libp2p-swarm 0.45.1", + "libp2p-swarm 0.45.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "void", ] @@ -5538,10 +5560,10 @@ dependencies = [ "futures", "futures-bounded", "futures-timer", - "libp2p-core 0.42.0", + "libp2p-core 0.42.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-identity", "libp2p-request-response", - "libp2p-swarm 0.45.1", + "libp2p-swarm 0.45.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "quick-protobuf", "quick-protobuf-codec 0.3.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "rand 0.8.5", @@ -5554,13 +5576,13 @@ dependencies = [ [[package]] name = "libp2p-connection-limits" -version = "0.3.1" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7cd50a78ccfada14de94cbacd3ce4b0138157f376870f13d3a8422cd075b4fd" +checksum = "8d003540ee8baef0d254f7b6bfd79bac3ddf774662ca0abf69186d517ef82ad8" dependencies = [ - "libp2p-core 0.41.3", + "libp2p-core 0.42.0 (registry+https://github.com/rust-lang/crates.io-index)", "libp2p-identity", - "libp2p-swarm 0.44.2", + "libp2p-swarm 0.45.1 (registry+https://github.com/rust-lang/crates.io-index)", "void", ] @@ -5569,17 +5591,17 @@ name = "libp2p-connection-limits" version = "0.4.0" source = "git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2#15f0535f87256ff141963006af129cc2c839b472" dependencies = [ - "libp2p-core 0.42.0", + "libp2p-core 0.42.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-identity", - "libp2p-swarm 0.45.1", + "libp2p-swarm 0.45.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "void", ] [[package]] name = "libp2p-core" -version = "0.41.3" +version = "0.42.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5a8920cbd8540059a01950c1e5c96ea8d89eb50c51cd366fc18bdf540a6e48f" +checksum = "a61f26c83ed111104cd820fe9bc3aaabbac5f1652a1d213ed6e900b7918a1298" dependencies = [ "either", "fnv", @@ -5639,7 +5661,7 @@ dependencies = [ "async-trait", "futures", "hickory-resolver", - "libp2p-core 0.42.0", + "libp2p-core 0.42.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-identity", "parking_lot", "smallvec", @@ -5648,12 +5670,12 @@ dependencies = [ [[package]] name = "libp2p-gossipsub" -version = "0.46.1" +version = "0.47.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d665144a616dadebdc5fff186b1233488cdcd8bfb1223218ff084b6d052c94f7" +checksum = "b4e830fdf24ac8c444c12415903174d506e1e077fbe3875c404a78c5935a8543" dependencies = [ "asynchronous-codec", - "base64 0.21.7", + "base64 0.22.1", "byteorder", "bytes", "either", @@ -5662,10 +5684,9 @@ dependencies = [ "futures-ticker", "getrandom 0.2.15", "hex_fmt", - "instant", - "libp2p-core 0.41.3", + "libp2p-core 0.42.0 (registry+https://github.com/rust-lang/crates.io-index)", "libp2p-identity", - "libp2p-swarm 0.44.2", + "libp2p-swarm 0.45.1 (registry+https://github.com/rust-lang/crates.io-index)", "prometheus-client", "quick-protobuf", "quick-protobuf-codec 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -5676,6 +5697,7 @@ dependencies = [ "smallvec", "tracing", "void", + "web-time", ] [[package]] @@ -5693,9 +5715,9 @@ dependencies = [ "futures-ticker", "getrandom 0.2.15", "hex_fmt", - "libp2p-core 0.42.0", + "libp2p-core 0.42.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-identity", - "libp2p-swarm 0.45.1", + "libp2p-swarm 0.45.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "prometheus-client", "quick-protobuf", "quick-protobuf-codec 0.3.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", @@ -5718,9 +5740,9 @@ dependencies = [ "futures", "futures-bounded", "futures-timer", - "libp2p-core 0.42.0", + "libp2p-core 0.42.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-identity", - "libp2p-swarm 0.45.1", + "libp2p-swarm 0.45.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "lru", "quick-protobuf", "quick-protobuf-codec 0.3.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", @@ -5751,9 +5773,9 @@ dependencies = [ [[package]] name = "libp2p-kad" -version = "0.45.3" +version = "0.46.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cc5767727d062c4eac74dd812c998f0e488008e82cce9c33b463d38423f9ad2" +checksum = "ced237d0bd84bbebb7c2cad4c073160dacb4fe40534963c32ed6d4c6bb7702a3" dependencies = [ "arrayvec", "asynchronous-codec", @@ -5763,10 +5785,9 @@ dependencies = [ "futures", "futures-bounded", "futures-timer", - "instant", - "libp2p-core 0.41.3", + "libp2p-core 0.42.0 (registry+https://github.com/rust-lang/crates.io-index)", "libp2p-identity", - "libp2p-swarm 0.44.2", + "libp2p-swarm 0.45.1 (registry+https://github.com/rust-lang/crates.io-index)", "quick-protobuf", "quick-protobuf-codec 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "rand 0.8.5", @@ -5777,6 +5798,7 @@ dependencies = [ "tracing", "uint", "void", + "web-time", ] [[package]] @@ -5792,9 +5814,9 @@ dependencies = [ "futures", "futures-bounded", "futures-timer", - "libp2p-core 0.42.0", + "libp2p-core 0.42.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-identity", - "libp2p-swarm 0.45.1", + "libp2p-swarm 0.45.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "quick-protobuf", "quick-protobuf-codec 0.3.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "rand 0.8.5", @@ -5816,9 +5838,9 @@ dependencies = [ "futures", "hickory-proto", "if-watch", - "libp2p-core 0.42.0", + "libp2p-core 0.42.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-identity", - "libp2p-swarm 0.45.1", + "libp2p-swarm 0.45.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "rand 0.8.5", "smallvec", "socket2", @@ -5833,12 +5855,12 @@ version = "0.15.0" source = "git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2#15f0535f87256ff141963006af129cc2c839b472" dependencies = [ "futures", - "libp2p-core 0.42.0", + "libp2p-core 0.42.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-identify", "libp2p-identity", - "libp2p-kad 0.46.2", + "libp2p-kad 0.46.2 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-relay", - "libp2p-swarm 0.45.1", + "libp2p-swarm 0.45.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "pin-project", "prometheus-client", "web-time", @@ -5853,7 +5875,7 @@ dependencies = [ "bytes", "curve25519-dalek 4.1.3", "futures", - "libp2p-core 0.42.0", + "libp2p-core 0.42.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-identity", "multiaddr", "multihash", @@ -5878,7 +5900,7 @@ dependencies = [ "futures", "futures-timer", "if-watch", - "libp2p-core 0.42.0", + "libp2p-core 0.42.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-identity", "libp2p-tls", "parking_lot", @@ -5903,9 +5925,9 @@ dependencies = [ "futures", "futures-bounded", "futures-timer", - "libp2p-core 0.42.0", + "libp2p-core 0.42.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-identity", - "libp2p-swarm 0.45.1", + "libp2p-swarm 0.45.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "quick-protobuf", "quick-protobuf-codec 0.3.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "rand 0.8.5", @@ -5926,9 +5948,9 @@ dependencies = [ "futures", "futures-bounded", "futures-timer", - "libp2p-core 0.42.0", + "libp2p-core 0.42.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-identity", - "libp2p-swarm 0.45.1", + "libp2p-swarm 0.45.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "rand 0.8.5", "serde", "smallvec", @@ -5939,16 +5961,15 @@ dependencies = [ [[package]] name = "libp2p-swarm" -version = "0.44.2" +version = "0.45.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "80cae6cb75f89dbca53862f9ebe0b9f463aa7b302762fcfaafb9e51dcc9b0f7e" +checksum = "d7dd6741793d2c1fb2088f67f82cf07261f25272ebe3c0b0c311e0c6b50e851a" dependencies = [ "either", "fnv", "futures", "futures-timer", - "instant", - "libp2p-core 0.41.3", + "libp2p-core 0.42.0 (registry+https://github.com/rust-lang/crates.io-index)", "libp2p-identity", "lru", "multistream-select 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -5957,6 +5978,7 @@ dependencies = [ "smallvec", "tracing", "void", + "web-time", ] [[package]] @@ -5969,7 +5991,7 @@ dependencies = [ "futures", "futures-timer", "getrandom 0.2.15", - "libp2p-core 0.42.0", + "libp2p-core 0.42.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-identity", "libp2p-swarm-derive", "lru", @@ -6004,7 +6026,7 @@ dependencies = [ "futures-timer", "if-watch", "libc", - "libp2p-core 0.42.0", + "libp2p-core 0.42.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-identity", "socket2", "tokio", @@ -6018,7 +6040,7 @@ source = "git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2#15f0535f dependencies = [ "futures", "futures-rustls", - "libp2p-core 0.42.0", + "libp2p-core 0.42.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-identity", "rcgen", "ring 0.17.8", @@ -6037,8 +6059,8 @@ dependencies = [ "futures", "futures-timer", "igd-next", - "libp2p-core 0.42.0", - "libp2p-swarm 0.45.1", + "libp2p-core 0.42.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", + "libp2p-swarm 0.45.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "tokio", "tracing", "void", @@ -6052,7 +6074,7 @@ dependencies = [ "either", "futures", "futures-rustls", - "libp2p-core 0.42.0", + "libp2p-core 0.42.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "libp2p-identity", "parking_lot", "pin-project-lite", @@ -6072,7 +6094,7 @@ dependencies = [ "bytes", "futures", "js-sys", - "libp2p-core 0.42.0", + "libp2p-core 0.42.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "parking_lot", "send_wrapper 0.6.0", "thiserror 1.0.69", @@ -6088,7 +6110,7 @@ source = "git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2#15f0535f dependencies = [ "either", "futures", - "libp2p-core 0.42.0", + "libp2p-core 0.42.0 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "thiserror 1.0.69", "tracing", "yamux 0.12.1", @@ -6444,7 +6466,7 @@ dependencies = [ "clap-verbosity-flag", "color-eyre", "futures", - "libp2p 0.54.1", + "libp2p 0.54.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "tokio", "tracing", "tracing-log 0.2.0", @@ -8233,17 +8255,15 @@ dependencies = [ "encoding_rs", "futures-core", "futures-util", - "h2", + "h2 0.3.26", "http 0.2.12", "http-body 0.4.6", "hyper 0.14.31", "hyper-rustls 0.24.2", - "hyper-tls", "ipnet", "js-sys", "log", "mime", - "native-tls", "once_cell", "percent-encoding", "pin-project-lite", @@ -8255,7 +8275,6 @@ dependencies = [ "sync_wrapper 0.1.2", "system-configuration 0.5.1", "tokio", - "tokio-native-tls", "tokio-rustls 0.24.1", "tower-service", "url", @@ -8274,18 +8293,22 @@ checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f" dependencies = [ "base64 0.22.1", "bytes", + "encoding_rs", "futures-core", "futures-util", + "h2 0.4.7", "http 1.1.0", "http-body 1.0.1", "http-body-util", "hyper 1.5.1", "hyper-rustls 0.27.3", + "hyper-tls", "hyper-util", "ipnet", "js-sys", "log", "mime", + "native-tls", "once_cell", "percent-encoding", "pin-project-lite", @@ -8297,7 +8320,9 @@ dependencies = [ "serde_json", "serde_urlencoded", "sync_wrapper 1.0.2", + "system-configuration 0.6.1", "tokio", + "tokio-native-tls", "tokio-rustls 0.26.0", "tower-service", "url", @@ -9028,7 +9053,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59d7d62c9733631445d1b3fc7854c780088408d4b79a20dd928aaec41854ca3a" dependencies = [ "cfg-if", - "dirs 4.0.0", + "dirs", "plist", "which 4.4.2", "xml-rs", @@ -9569,7 +9594,7 @@ dependencies = [ "color-eyre", "dirs-next", "evmlib", - "libp2p 0.54.1", + "libp2p 0.54.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", "rand 0.8.5", "serde", "serde_json", @@ -9923,7 +9948,7 @@ dependencies = [ "bytes", "futures-core", "futures-util", - "h2", + "h2 0.3.26", "http 0.2.12", "http-body 0.4.6", "hyper 0.14.31", @@ -9955,7 +9980,7 @@ dependencies = [ "bytes", "futures-core", "futures-util", - "h2", + "h2 0.3.26", "http 0.2.12", "http-body 0.4.6", "hyper 0.14.31", diff --git a/Cargo.toml b/Cargo.toml index 3628d1ecdf..da1073ed31 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,7 @@ [workspace] resolver = "2" members = [ + "ant-bootstrap-cache", "ant-build-info", "ant-cli", "ant-evm", @@ -16,7 +17,6 @@ members = [ "ant-service-management", "ant-token-supplies", "autonomi", - "bootstrap_cache", "evmlib", "evm-testnet", "nat-detection", diff --git a/bootstrap_cache/Cargo.toml b/ant-bootstrap-cache/Cargo.toml similarity index 57% rename from bootstrap_cache/Cargo.toml rename to ant-bootstrap-cache/Cargo.toml index 48b15ea424..f1fa098ed6 100644 --- a/bootstrap_cache/Cargo.toml +++ b/ant-bootstrap-cache/Cargo.toml @@ -1,19 +1,24 @@ [package] -name = "bootstrap_cache" -version = "0.1.0" +authors = ["MaidSafe Developers "] +description = "Bootstrap Cache functionality for Autonomi" edition = "2021" +homepage = "https://maidsafe.net" license = "GPL-3.0" -authors = ["MaidSafe Developers "] -description = "Bootstrap cache functionality for the Safe Network" +name = "ant-bootstrap-cache" +readme = "README.md" +repository = "https://github.com/maidsafe/autonomi" +version = "0.1.0" [dependencies] chrono = { version = "0.4", features = ["serde"] } -dirs = "5.0" +dirs-next = "~2.0.0" fs2 = "0.4.3" -libp2p = { version = "0.53", features = ["serde"] } -reqwest = { version = "0.11", features = ["json"] } +futures = "0.3.30" +libp2p = { version = "0.54.1", features = ["serde"] } +reqwest = { version = "0.12.2", features = ["json"] } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" +ant-protocol = { version = "0.17.15", path = "../ant-protocol" } tempfile = "3.8.1" thiserror = "1.0" tokio = { version = "1.0", features = ["full", "sync"] } @@ -23,15 +28,4 @@ url = "2.4.0" [dev-dependencies] wiremock = "0.5" tokio = { version = "1.0", features = ["full", "test-util"] } -tracing-subscriber = { version = "0.3", features = ["env-filter"] } - -[lints.rust] -unsafe_code = "forbid" -missing_docs = "warn" - -[lints.clippy] -all = "warn" -pedantic = "warn" -nursery = "warn" -unwrap_used = "warn" -missing_docs_in_private_items = "warn" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } \ No newline at end of file diff --git a/ant-bootstrap-cache/README.md b/ant-bootstrap-cache/README.md new file mode 100644 index 0000000000..8f02a77a72 --- /dev/null +++ b/ant-bootstrap-cache/README.md @@ -0,0 +1,26 @@ +# Bootstrap Cache + +A robust peer caching system for the Autonomi Network that provides persistent storage and management of network peer addresses. This crate handles peer discovery, caching, and reliability tracking with support for concurrent access across multiple processes. + +## Features + +### Storage and Accessibility +- System-wide accessible cache location +- Configurable primary cache location +- Cross-process safe with file locking +- Atomic write operations to prevent cache corruption + +### Concurrent Access +- Thread-safe in-memory cache with `RwLock` +- File system level locking for cross-process synchronization +- Shared (read) and exclusive (write) lock support + +### Data Management +- Automatic cleanup of stale and unreliable peers +- Configurable maximum peer limit +- Peer reliability tracking (success/failure counts) +- Atomic file operations for data integrity + +## License + +This SAFE Network Software is licensed under the General Public License (GPL), version 3 ([LICENSE](LICENSE) http://www.gnu.org/licenses/gpl-3.0.en.html). diff --git a/ant-bootstrap-cache/src/cache_store.rs b/ant-bootstrap-cache/src/cache_store.rs new file mode 100644 index 0000000000..73fe0b8d7b --- /dev/null +++ b/ant-bootstrap-cache/src/cache_store.rs @@ -0,0 +1,659 @@ +// Copyright 2024 MaidSafe.net limited. +// +// This SAFE Network Software is licensed to you under The General Public License (GPL), version 3. +// Unless required by applicable law or agreed to in writing, the SAFE Network Software distributed +// under the GPL Licence is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. Please review the Licences for the specific language governing +// permissions and limitations relating to use of the SAFE Network Software. + +use crate::{BootstrapConfig, BootstrapPeer, Error, InitialPeerDiscovery, Result}; +use fs2::FileExt; +use libp2p::Multiaddr; +use serde::{Deserialize, Serialize}; +use std::fs::{self, File, OpenOptions}; +use std::io::{self, Read}; +use std::path::PathBuf; +use std::sync::Arc; +use std::time::{Duration, SystemTime}; +use tempfile::NamedTempFile; +use tokio::sync::RwLock; + +const PEER_EXPIRY_DURATION: Duration = Duration::from_secs(24 * 60 * 60); // 24 hours + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CacheData { + peers: std::collections::HashMap, + #[serde(default = "SystemTime::now")] + last_updated: SystemTime, + #[serde(default = "default_version")] + version: u32, +} + +impl CacheData { + /// Sync the self cache with another cache by referencing our old_shared_state. + /// Since the cache is updated on periodic interval, we cannot just add our state with the shared state on the fs. + /// This would lead to race conditions, hence th need to store the old shared state and sync it with the new shared state. + pub fn sync(&mut self, old_shared_state: &CacheData, current_shared_state: &CacheData) { + for (addr, current_shared_peer_state) in current_shared_state.peers.iter() { + let old_shared_peer_state = old_shared_state.peers.get(addr); + // If the peer is in the old state, only update the difference in values + self.peers + .entry(addr.clone()) + .and_modify(|p| p.sync(old_shared_peer_state, current_shared_peer_state)) + .or_insert_with(|| current_shared_peer_state.clone()); + } + + self.last_updated = SystemTime::now(); + } + + pub fn cleanup_stale_and_unreliable_peers(&mut self) { + self.peers.retain(|_, peer| peer.is_reliable()); + let now = SystemTime::now(); + self.peers.retain(|_, peer| { + if let Ok(duration) = now.duration_since(peer.last_seen) { + duration < PEER_EXPIRY_DURATION + } else { + false + } + }); + } + + pub fn update_peer_status(&mut self, addr: &Multiaddr, success: bool) { + let peer = self + .peers + .entry(addr.to_string()) + .or_insert_with(|| BootstrapPeer::new(addr.clone())); + peer.update_status(success); + } +} + +fn default_version() -> u32 { + 1 +} + +impl Default for CacheData { + fn default() -> Self { + Self { + peers: std::collections::HashMap::new(), + last_updated: SystemTime::now(), + version: default_version(), + } + } +} + +#[derive(Clone)] +pub struct CacheStore { + cache_path: PathBuf, + config: Arc, + data: Arc>, + /// This is our last known state of the cache on disk, which is shared across all instances. + /// This is not updated until `sync_to_disk` is called. + old_shared_state: Arc>, +} + +impl CacheStore { + pub async fn new(config: BootstrapConfig) -> Result { + info!("Creating new CacheStore with config: {:?}", config); + let cache_path = config.cache_file_path.clone(); + let config = Arc::new(config); + + // Create cache directory if it doesn't exist + if let Some(parent) = cache_path.parent() { + if !parent.exists() { + info!("Attempting to create cache directory at {parent:?}"); + fs::create_dir_all(parent).inspect_err(|err| { + warn!("Failed to create cache directory at {parent:?}: {err}"); + })?; + } + } + + let store = Self { + cache_path, + config, + data: Arc::new(RwLock::new(CacheData::default())), + old_shared_state: Arc::new(RwLock::new(CacheData::default())), + }; + + store.init().await?; + + info!("Successfully created CacheStore and initialized it."); + + Ok(store) + } + + pub async fn new_without_init(config: BootstrapConfig) -> Result { + info!("Creating new CacheStore with config: {:?}", config); + let cache_path = config.cache_file_path.clone(); + let config = Arc::new(config); + + // Create cache directory if it doesn't exist + if let Some(parent) = cache_path.parent() { + if !parent.exists() { + info!("Attempting to create cache directory at {parent:?}"); + fs::create_dir_all(parent).inspect_err(|err| { + warn!("Failed to create cache directory at {parent:?}: {err}"); + })?; + } + } + + let store = Self { + cache_path, + config, + data: Arc::new(RwLock::new(CacheData::default())), + old_shared_state: Arc::new(RwLock::new(CacheData::default())), + }; + + info!("Successfully created CacheStore without initializing the data."); + Ok(store) + } + + pub async fn init(&self) -> Result<()> { + let data = if self.cache_path.exists() { + info!( + "Cache file exists at {:?}, attempting to load", + self.cache_path + ); + match Self::load_cache_data(&self.cache_path).await { + Ok(data) => { + info!( + "Successfully loaded cache data with {} peers", + data.peers.len() + ); + // If cache data exists but has no peers and file is not read-only, + // fallback to default + let is_readonly = self + .cache_path + .metadata() + .map(|m| m.permissions().readonly()) + .unwrap_or(false); + + if data.peers.is_empty() && !is_readonly { + info!("Cache is empty and not read-only, falling back to default"); + Self::fallback_to_default(&self.config).await? + } else { + // Ensure we don't exceed max_peers + let mut filtered_data = data; + if filtered_data.peers.len() > self.config.max_peers { + info!( + "Trimming cache from {} to {} peers", + filtered_data.peers.len(), + self.config.max_peers + ); + + filtered_data.peers = filtered_data + .peers + .into_iter() + .take(self.config.max_peers) + .collect(); + } + filtered_data + } + } + Err(e) => { + warn!("Failed to load cache data: {}", e); + // If we can't read or parse the cache file, fallback to default + Self::fallback_to_default(&self.config).await? + } + } + } else { + info!( + "Cache file does not exist at {:?}, falling back to default", + self.cache_path + ); + // If cache file doesn't exist, fallback to default + Self::fallback_to_default(&self.config).await? + }; + + // Update the store's data + *self.data.write().await = data.clone(); + *self.old_shared_state.write().await = data; + + // Save the default data to disk + self.sync_to_disk().await?; + + Ok(()) + } + + async fn fallback_to_default(config: &BootstrapConfig) -> Result { + info!("Falling back to default peers from endpoints"); + let mut data = CacheData { + peers: std::collections::HashMap::new(), + last_updated: SystemTime::now(), + version: default_version(), + }; + + // If no endpoints are configured, just return empty cache + if config.endpoints.is_empty() { + warn!("No endpoints configured, returning empty cache"); + return Ok(data); + } + + // Try to discover peers from configured endpoints + let discovery = InitialPeerDiscovery::with_endpoints(config.endpoints.clone())?; + match discovery.fetch_peers().await { + Ok(peers) => { + info!("Successfully fetched {} peers from endpoints", peers.len()); + // Only add up to max_peers from the discovered peers + for peer in peers.into_iter().take(config.max_peers) { + data.peers.insert(peer.addr.to_string(), peer); + } + + // Create parent directory if it doesn't exist + if let Some(parent) = config.cache_file_path.parent() { + if !parent.exists() { + info!("Creating cache directory at {:?}", parent); + if let Err(e) = fs::create_dir_all(parent) { + warn!("Failed to create cache directory: {}", e); + } + } + } + + // Try to write the cache file immediately + match serde_json::to_string_pretty(&data) { + Ok(json) => { + info!("Writing {} peers to cache file", data.peers.len()); + if let Err(e) = fs::write(&config.cache_file_path, json) { + warn!("Failed to write cache file: {}", e); + } else { + info!( + "Successfully wrote cache file at {:?}", + config.cache_file_path + ); + } + } + Err(e) => { + warn!("Failed to serialize cache data: {}", e); + } + } + + Ok(data) + } + Err(e) => { + warn!("Failed to fetch peers from endpoints: {}", e); + Ok(data) // Return empty cache on error + } + } + } + + async fn load_cache_data(cache_path: &PathBuf) -> Result { + // Try to open the file with read permissions + let mut file = match OpenOptions::new().read(true).open(cache_path) { + Ok(f) => f, + Err(e) => { + warn!("Failed to open cache file: {}", e); + return Err(Error::from(e)); + } + }; + + // Acquire shared lock for reading + if let Err(e) = Self::acquire_shared_lock(&file).await { + warn!("Failed to acquire shared lock: {}", e); + return Err(e); + } + + // Read the file contents + let mut contents = String::new(); + if let Err(e) = file.read_to_string(&mut contents) { + warn!("Failed to read cache file: {}", e); + return Err(Error::from(e)); + } + + // Parse the cache data + let mut data = serde_json::from_str::(&contents).map_err(|e| { + warn!("Failed to parse cache data: {}", e); + Error::FailedToParseCacheData + })?; + + data.cleanup_stale_and_unreliable_peers(); + + Ok(data) + } + + pub async fn get_peers(&self) -> Vec { + let data = self.data.read().await; + data.peers.values().cloned().collect() + } + + pub async fn peer_count(&self) -> usize { + let data = self.data.read().await; + data.peers.len() + } + + pub async fn get_reliable_peers(&self) -> Vec { + let data = self.data.read().await; + let reliable_peers: Vec<_> = data + .peers + .values() + .filter(|peer| peer.success_count > peer.failure_count) + .cloned() + .collect(); + + // If we have no reliable peers and the cache file is not read-only, + // try to refresh from default endpoints + if reliable_peers.is_empty() + && !self + .cache_path + .metadata() + .map(|m| m.permissions().readonly()) + .unwrap_or(false) + { + drop(data); + if let Ok(new_data) = Self::fallback_to_default(&self.config).await { + let mut data = self.data.write().await; + *data = new_data; + return data + .peers + .values() + .filter(|peer| peer.success_count > peer.failure_count) + .cloned() + .collect(); + } + } + + reliable_peers + } + + pub async fn update_peer_status(&self, addr: &Multiaddr, success: bool) { + let mut data = self.data.write().await; + data.update_peer_status(addr, success); + } + + pub async fn add_peer(&self, addr: Multiaddr) { + let mut data = self.data.write().await; + let addr_str = addr.to_string(); + + // Check if we already have this peer + if data.peers.contains_key(&addr_str) { + debug!("Updating existing peer {}", addr_str); + if let Some(peer) = data.peers.get_mut(&addr_str) { + peer.last_seen = SystemTime::now(); + } + return; + } + + // If we're at max peers, remove the oldest peer + if data.peers.len() >= self.config.max_peers { + debug!( + "At max peers limit ({}), removing oldest peer", + self.config.max_peers + ); + if let Some((oldest_addr, _)) = data.peers.iter().min_by_key(|(_, peer)| peer.last_seen) + { + let oldest_addr = oldest_addr.clone(); + data.peers.remove(&oldest_addr); + } + } + + // Add the new peer + debug!("Adding new peer {} (under max_peers limit)", addr_str); + data.peers.insert(addr_str, BootstrapPeer::new(addr)); + } + + pub async fn remove_peer(&self, addr: &str) { + let mut data = self.data.write().await; + data.peers.remove(addr); + } + + pub async fn cleanup_stale_and_unreliable_peers(&self) { + let mut data = self.data.write().await; + data.cleanup_stale_and_unreliable_peers(); + } + + /// Clear all peers from the cache and save to disk + pub async fn clear_peers_and_save(&self) -> Result<()> { + let mut data = self.data.write().await; + data.peers.clear(); + match self.atomic_write(&data).await { + Ok(_) => Ok(()), + Err(e) => { + error!("Failed to save cache to disk: {e}"); + Err(e) + } + } + } + + pub async fn sync_to_disk(&self) -> Result<()> { + if self.config.disable_cache_writing { + info!("Cache writing is disabled, skipping sync to disk"); + return Ok(()); + } + let mut data = self.data.write().await; + let mut old_shared_state = self.old_shared_state.write().await; + + info!( + "Syncing cache to disk, with data containing: {} peers and old state containing: {} peers", data.peers.len(), + old_shared_state.peers.len() + ); + + // Check if the file is read-only before attempting to write + let is_readonly = self + .cache_path + .metadata() + .map(|m| m.permissions().readonly()) + .unwrap_or(false); + + if is_readonly { + warn!("Cannot save to disk: cache file is read-only"); + // todo return err + return Ok(()); + } + + data.cleanup_stale_and_unreliable_peers(); + + if let Ok(data_from_file) = Self::load_cache_data(&self.cache_path).await { + data.sync(&old_shared_state, &data_from_file); + // Now the synced version is the old_shared_state + *old_shared_state = data.clone(); + } else { + warn!("Failed to load cache data from file, overwriting with new data"); + } + + match self.atomic_write(&data).await { + Ok(_) => Ok(()), + Err(e) => { + error!("Failed to save cache to disk: {e}"); + Err(e) + } + } + } + + async fn acquire_shared_lock(file: &File) -> Result<()> { + let file = file.try_clone().map_err(Error::from)?; + + tokio::task::spawn_blocking(move || file.try_lock_shared().map_err(Error::from)) + .await + .map_err(|e| { + Error::from(std::io::Error::new( + std::io::ErrorKind::Other, + format!("Failed to spawn blocking task: {}", e), + )) + })? + } + + async fn acquire_exclusive_lock(file: &File) -> Result<()> { + let mut backoff = Duration::from_millis(10); + let max_attempts = 5; + let mut attempts = 0; + + loop { + match file.try_lock_exclusive() { + Ok(_) => return Ok(()), + Err(_) if attempts >= max_attempts => { + return Err(Error::LockError); + } + Err(e) if e.kind() == io::ErrorKind::WouldBlock => { + attempts += 1; + tokio::time::sleep(backoff).await; + backoff *= 2; + } + Err(_) => return Err(Error::LockError), + } + } + } + + async fn atomic_write(&self, data: &CacheData) -> Result<()> { + // Create parent directory if it doesn't exist + if let Some(parent) = self.cache_path.parent() { + fs::create_dir_all(parent).map_err(Error::from)?; + } + + // Create a temporary file in the same directory as the cache file + let temp_file = NamedTempFile::new().map_err(Error::from)?; + + // Write data to temporary file + serde_json::to_writer_pretty(&temp_file, &data).map_err(Error::from)?; + + // Open the target file with proper permissions + let file = OpenOptions::new() + .write(true) + .create(true) + .truncate(true) + .open(&self.cache_path) + .map_err(Error::from)?; + + // Acquire exclusive lock + Self::acquire_exclusive_lock(&file).await?; + + // Perform atomic rename + temp_file.persist(&self.cache_path).inspect_err(|err| { + error!("Failed to persist file with err: {err:?}"); + })?; + + // Lock will be automatically released when file is dropped + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tempfile::tempdir; + + async fn create_test_store() -> (CacheStore, PathBuf) { + let temp_dir = tempdir().unwrap(); + let cache_file = temp_dir.path().join("cache.json"); + + let config = crate::BootstrapConfig::empty().with_cache_path(&cache_file); + + let store = CacheStore::new(config).await.unwrap(); + (store.clone(), store.cache_path.clone()) + } + + #[tokio::test] + async fn test_peer_update_and_save() { + let (store, _) = create_test_store().await; + let addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); + + // Manually add a peer without using fallback + { + let mut data = store.data.write().await; + data.peers + .insert(addr.to_string(), BootstrapPeer::new(addr.clone())); + } + store.sync_to_disk().await.unwrap(); + + store.update_peer_status(&addr, true).await; + + let peers = store.get_peers().await; + assert_eq!(peers.len(), 1); + assert_eq!(peers[0].addr, addr); + assert_eq!(peers[0].success_count, 1); + assert_eq!(peers[0].failure_count, 0); + } + + #[tokio::test] + async fn test_peer_cleanup() { + let (store, _) = create_test_store().await; + let good_addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); + let bad_addr: Multiaddr = "/ip4/127.0.0.1/tcp/8081".parse().unwrap(); + + // Add peers + store.add_peer(good_addr.clone()).await; + store.add_peer(bad_addr.clone()).await; + + // Make one peer reliable and one unreliable + store.update_peer_status(&good_addr, true).await; + + // Fail the bad peer more times than max_retries + for _ in 0..5 { + store.update_peer_status(&bad_addr, false).await; + } + + // Clean up unreliable peers + store.cleanup_stale_and_unreliable_peers().await; + + // Get all peers (not just reliable ones) + let peers = store.get_peers().await; + assert_eq!(peers.len(), 1); + assert_eq!(peers[0].addr, good_addr); + } + + #[tokio::test] + async fn test_peer_not_removed_if_successful() { + let (store, _) = create_test_store().await; + let addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); + + // Add a peer and make it successful + store.add_peer(addr.clone()).await; + store.update_peer_status(&addr, true).await; + + // Wait a bit + tokio::time::sleep(Duration::from_millis(100)).await; + + // Run cleanup + store.cleanup_stale_and_unreliable_peers().await; + + // Verify peer is still there + let peers = store.get_peers().await; + assert_eq!(peers.len(), 1); + assert_eq!(peers[0].addr, addr); + } + + #[tokio::test] + async fn test_peer_removed_only_when_unresponsive() { + let (store, _) = create_test_store().await; + let addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); + + // Add a peer + store.add_peer(addr.clone()).await; + + // Make it fail more than successes + for _ in 0..3 { + store.update_peer_status(&addr, true).await; + } + for _ in 0..4 { + store.update_peer_status(&addr, false).await; + } + + // Run cleanup + store.cleanup_stale_and_unreliable_peers().await; + + // Verify peer is removed + let peers = store.get_peers().await; + assert_eq!( + peers.len(), + 0, + "Peer should be removed after max_retries failures" + ); + + // Test with some successes but more failures + store.add_peer(addr.clone()).await; + store.update_peer_status(&addr, true).await; + store.update_peer_status(&addr, true).await; + + for _ in 0..5 { + store.update_peer_status(&addr, false).await; + } + + // Run cleanup + store.cleanup_stale_and_unreliable_peers().await; + + // Verify peer is removed due to more failures than successes + let peers = store.get_peers().await; + assert_eq!( + peers.len(), + 0, + "Peer should be removed when failures exceed successes" + ); + } +} diff --git a/ant-bootstrap-cache/src/config.rs b/ant-bootstrap-cache/src/config.rs new file mode 100644 index 0000000000..2c3ab507b7 --- /dev/null +++ b/ant-bootstrap-cache/src/config.rs @@ -0,0 +1,119 @@ +// Copyright 2024 MaidSafe.net limited. +// +// This SAFE Network Software is licensed to you under The General Public License (GPL), version 3. +// Unless required by applicable law or agreed to in writing, the SAFE Network Software distributed +// under the GPL Licence is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. Please review the Licences for the specific language governing +// permissions and limitations relating to use of the SAFE Network Software. + +use crate::error::{Error, Result}; +use ant_protocol::version::{get_key_version_str, get_truncate_version_str}; +use std::path::{Path, PathBuf}; +use url::Url; + +const MAX_PEERS: usize = 1500; +// const UPDATE_INTERVAL: Duration = Duration::from_secs(60); + +/// Configuration for the bootstrap cache +#[derive(Clone, Debug)] +pub struct BootstrapConfig { + /// List of bootstrap endpoints to fetch peer information from + pub endpoints: Vec, + /// Maximum number of peers to keep in the cache + pub max_peers: usize, + /// Path to the bootstrap cache file + pub cache_file_path: PathBuf, + // /// How often to update the cache (in seconds) + // pub update_interval: Duration, + /// Flag to disable writing to the cache file + pub disable_cache_writing: bool, +} + +impl BootstrapConfig { + /// Creates a new BootstrapConfig with default settings + pub fn default_config() -> Result { + Ok(Self { + endpoints: vec![ + "https://sn-testnet.s3.eu-west-2.amazonaws.com/bootstrap_cache.json" + .parse() + .expect("Failed to parse URL"), + "https://sn-testnet.s3.eu-west-2.amazonaws.com/network-contacts" + .parse() + .expect("Failed to parse URL"), + ], + max_peers: MAX_PEERS, + cache_file_path: default_cache_path()?, + // update_interval: UPDATE_INTERVAL, + disable_cache_writing: false, + }) + } + + /// Creates a new BootstrapConfig with empty settings + pub fn empty() -> Self { + Self { + endpoints: vec![], + max_peers: MAX_PEERS, + cache_file_path: PathBuf::new(), + // update_interval: UPDATE_INTERVAL, + disable_cache_writing: false, + } + } + + /// Update the config with custom endpoints + pub fn with_endpoints(mut self, endpoints: Vec) -> Self { + self.endpoints = endpoints; + self + } + + /// Update the config with default endpoints + pub fn with_default_endpoints(mut self) -> Self { + self.endpoints = vec![ + "https://sn-testnet.s3.eu-west-2.amazonaws.com/bootstrap_cache.json" + .parse() + .expect("Failed to parse URL"), + "https://sn-testnet.s3.eu-west-2.amazonaws.com/network-contacts" + .parse() + .expect("Failed to parse URL"), + ]; + self + } + + /// Update the config with a custom cache file path + pub fn with_cache_path>(mut self, path: P) -> Self { + self.cache_file_path = path.as_ref().to_path_buf(); + self + } + + /// Sets the maximum number of peers + pub fn with_max_peers(mut self, max_peers: usize) -> Self { + self.max_peers = max_peers; + self + } + + // /// Sets the update interval + // pub fn with_update_interval(mut self, update_interval: Duration) -> Self { + // self.update_interval = update_interval; + // self + // } + + /// Sets the flag to disable writing to the cache file + pub fn with_disable_cache_writing(mut self, disable: bool) -> Self { + self.disable_cache_writing = disable; + self + } +} + +/// Returns the default path for the bootstrap cache file +fn default_cache_path() -> Result { + let dir = dirs_next::data_dir() + .ok_or_else(|| Error::CouldNotObtainDataDir)? + .join("autonomi") + .join("bootstrap_cache"); + + std::fs::create_dir_all(&dir)?; + + let network_id = format!("{}_{}", get_key_version_str(), get_truncate_version_str()); + let path = dir.join(format!("bootstrap_cache_{}.json", network_id)); + + Ok(path) +} diff --git a/bootstrap_cache/src/error.rs b/ant-bootstrap-cache/src/error.rs similarity index 84% rename from bootstrap_cache/src/error.rs rename to ant-bootstrap-cache/src/error.rs index 109cc1eccc..bcccf9064c 100644 --- a/bootstrap_cache/src/error.rs +++ b/ant-bootstrap-cache/src/error.rs @@ -10,6 +10,12 @@ use thiserror::Error; #[derive(Debug, Error)] pub enum Error { + #[error("Failed to parse cache data")] + FailedToParseCacheData, + #[error("Could not obtain data directory")] + CouldNotObtainDataDir, + #[error("Could not obtain bootstrap peers from {0} after {1} retries")] + FailedToObtainPeersFromUrl(String, usize), #[error("No peers found: {0}")] NoPeersFound(String), #[error("Invalid response: {0}")] diff --git a/ant-bootstrap-cache/src/initial_peer_discovery.rs b/ant-bootstrap-cache/src/initial_peer_discovery.rs new file mode 100644 index 0000000000..ee9050f8a2 --- /dev/null +++ b/ant-bootstrap-cache/src/initial_peer_discovery.rs @@ -0,0 +1,403 @@ +// Copyright 2024 MaidSafe.net limited. +// +// This SAFE Network Software is licensed to you under The General Public License (GPL), version 3. +// Unless required by applicable law or agreed to in writing, the SAFE Network Software distributed +// under the GPL Licence is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. Please review the Licences for the specific language governing +// permissions and limitations relating to use of the SAFE Network Software. + +use crate::{craft_valid_multiaddr_from_str, BootstrapEndpoints, BootstrapPeer, Error, Result}; +use futures::stream::{self, StreamExt}; +use reqwest::Client; +use std::time::Duration; +use url::Url; + +/// The default network contacts endpoint +const DEFAULT_BOOTSTRAP_ENDPOINT: &str = + "https://sn-testnet.s3.eu-west-2.amazonaws.com/network-contacts"; +/// The client fetch timeout +const FETCH_TIMEOUT_SECS: u64 = 30; +/// Maximum number of endpoints to fetch at a time +const MAX_CONCURRENT_FETCHES: usize = 3; +/// The max number of retries for a endpoint on failure. +const MAX_RETRIES_ON_FETCH_FAILURE: usize = 3; + +/// Discovers initial peers from a list of endpoints +pub struct InitialPeerDiscovery { + /// The list of endpoints + endpoints: Vec, + /// Reqwest Client + request_client: Client, +} + +impl InitialPeerDiscovery { + /// Create a new struct with the default endpoint + pub fn new() -> Result { + Self::with_endpoints(vec![DEFAULT_BOOTSTRAP_ENDPOINT + .parse() + .expect("Invalid URL")]) + } + + /// Create a new struct with the provided endpoints + pub fn with_endpoints(endpoints: Vec) -> Result { + #[cfg(not(target_arch = "wasm32"))] + let request_client = Client::builder() + .timeout(Duration::from_secs(FETCH_TIMEOUT_SECS)) + .build()?; + // Wasm does not have the timeout method yet. + #[cfg(target_arch = "wasm32")] + let request_client = Client::builder().build()?; + + Ok(Self { + endpoints, + request_client, + }) + } + + /// Fetch peers from all configured endpoints + pub async fn fetch_peers(&self) -> Result> { + info!( + "Starting peer discovery from {} endpoints: {:?}", + self.endpoints.len(), + self.endpoints + ); + let mut peers = Vec::new(); + let mut last_error = None; + + let mut fetches = stream::iter(self.endpoints.clone()) + .map(|endpoint| async move { + info!("Attempting to fetch peers from endpoint: {}", endpoint); + ( + Self::fetch_from_endpoint(self.request_client.clone(), &endpoint).await, + endpoint, + ) + }) + .buffer_unordered(MAX_CONCURRENT_FETCHES); + + while let Some((result, endpoint)) = fetches.next().await { + match result { + Ok(mut endpoint_peers) => { + info!( + "Successfully fetched {} peers from {}. First few peers: {:?}", + endpoint_peers.len(), + endpoint, + endpoint_peers.iter().take(3).collect::>() + ); + peers.append(&mut endpoint_peers); + } + Err(e) => { + warn!("Failed to fetch peers from {}: {}", endpoint, e); + last_error = Some(e); + } + } + } + + if peers.is_empty() { + last_error.map_or_else( + || { + warn!("No peers found from any endpoint and no errors reported"); + Err(Error::NoPeersFound( + "No valid peers found from any endpoint".to_string(), + )) + }, + |e| { + warn!("No peers found from any endpoint. Last error: {}", e); + Err(Error::NoPeersFound(format!( + "No valid peers found from any endpoint: {e}", + ))) + }, + ) + } else { + info!( + "Successfully discovered {} total peers. First few: {:?}", + peers.len(), + peers.iter().take(3).collect::>() + ); + Ok(peers) + } + } + + /// Fetch the list of bootstrap peer from a single endpoint + async fn fetch_from_endpoint( + request_client: Client, + endpoint: &Url, + ) -> Result> { + info!("Fetching peers from endpoint: {endpoint}"); + let mut retries = 0; + + let peers = loop { + let response = request_client.get(endpoint.clone()).send().await; + + match response { + Ok(response) => { + if response.status().is_success() { + let text = response.text().await?; + + match Self::try_parse_response(&text) { + Ok(peers) => break peers, + Err(err) => { + warn!("Failed to parse response with err: {err:?}"); + retries += 1; + if retries >= MAX_RETRIES_ON_FETCH_FAILURE { + return Err(Error::FailedToObtainPeersFromUrl( + endpoint.to_string(), + MAX_RETRIES_ON_FETCH_FAILURE, + )); + } + } + } + } else { + retries += 1; + if retries >= MAX_RETRIES_ON_FETCH_FAILURE { + return Err(Error::FailedToObtainPeersFromUrl( + endpoint.to_string(), + MAX_RETRIES_ON_FETCH_FAILURE, + )); + } + } + } + Err(err) => { + error!("Failed to get peers from URL {endpoint}: {err:?}"); + retries += 1; + if retries >= MAX_RETRIES_ON_FETCH_FAILURE { + return Err(Error::FailedToObtainPeersFromUrl( + endpoint.to_string(), + MAX_RETRIES_ON_FETCH_FAILURE, + )); + } + } + } + trace!( + "Failed to get peers from URL, retrying {retries}/{MAX_RETRIES_ON_FETCH_FAILURE}" + ); + tokio::time::sleep(Duration::from_secs(1)).await; + }; + + Ok(peers) + } + + /// Try to parse a response from a endpoint + fn try_parse_response(response: &str) -> Result> { + match serde_json::from_str::(response) { + Ok(json_endpoints) => { + info!( + "Successfully parsed JSON response with {} peers", + json_endpoints.peers.len() + ); + let peers = json_endpoints + .peers + .into_iter() + .filter_map(|addr_str| craft_valid_multiaddr_from_str(&addr_str)) + .map(BootstrapPeer::new) + .collect::>(); + + if peers.is_empty() { + warn!("No valid peers found in JSON response"); + Err(Error::NoPeersFound( + "No valid peers found in JSON response".to_string(), + )) + } else { + info!("Successfully parsed {} valid peers from JSON", peers.len()); + Ok(peers) + } + } + Err(e) => { + info!("Attempting to parse response as plain text"); + // Try parsing as plain text with one multiaddr per line + // example of contacts file exists in resources/network-contacts-examples + let peers = response + .split('\n') + .filter_map(craft_valid_multiaddr_from_str) + .map(BootstrapPeer::new) + .collect::>(); + + if peers.is_empty() { + warn!( + "No valid peers found in plain text response. Previous Json error: {e:?}" + ); + Err(Error::NoPeersFound( + "No valid peers found in plain text response".to_string(), + )) + } else { + info!( + "Successfully parsed {} valid peers from plain text", + peers.len() + ); + Ok(peers) + } + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use libp2p::Multiaddr; + use wiremock::{ + matchers::{method, path}, + Mock, MockServer, ResponseTemplate, + }; + + #[tokio::test] + async fn test_fetch_peers() { + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/")) + .respond_with( + ResponseTemplate::new(200) + .set_body_string("/ip4/127.0.0.1/tcp/8080\n/ip4/127.0.0.2/tcp/8080"), + ) + .mount(&mock_server) + .await; + + let mut discovery = InitialPeerDiscovery::new().unwrap(); + discovery.endpoints = vec![mock_server.uri().parse().unwrap()]; + + let peers = discovery.fetch_peers().await.unwrap(); + assert_eq!(peers.len(), 2); + + let addr1: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); + let addr2: Multiaddr = "/ip4/127.0.0.2/tcp/8080".parse().unwrap(); + assert!(peers.iter().any(|p| p.addr == addr1)); + assert!(peers.iter().any(|p| p.addr == addr2)); + } + + #[tokio::test] + async fn test_endpoint_failover() { + let mock_server1 = MockServer::start().await; + let mock_server2 = MockServer::start().await; + + // First endpoint fails + Mock::given(method("GET")) + .and(path("/")) + .respond_with(ResponseTemplate::new(500)) + .mount(&mock_server1) + .await; + + // Second endpoint succeeds + Mock::given(method("GET")) + .and(path("/")) + .respond_with(ResponseTemplate::new(200).set_body_string("/ip4/127.0.0.1/tcp/8080")) + .mount(&mock_server2) + .await; + + let mut discovery = InitialPeerDiscovery::new().unwrap(); + discovery.endpoints = vec![ + mock_server1.uri().parse().unwrap(), + mock_server2.uri().parse().unwrap(), + ]; + + let peers = discovery.fetch_peers().await.unwrap(); + assert_eq!(peers.len(), 1); + + let addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); + assert_eq!(peers[0].addr, addr); + } + + #[tokio::test] + async fn test_invalid_multiaddr() { + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/")) + .respond_with( + ResponseTemplate::new(200).set_body_string( + "/ip4/127.0.0.1/tcp/8080\ninvalid-addr\n/ip4/127.0.0.2/tcp/8080", + ), + ) + .mount(&mock_server) + .await; + + let mut discovery = InitialPeerDiscovery::new().unwrap(); + discovery.endpoints = vec![mock_server.uri().parse().unwrap()]; + + let peers = discovery.fetch_peers().await.unwrap(); + let valid_addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); + assert_eq!(peers[0].addr, valid_addr); + } + + #[tokio::test] + async fn test_empty_response() { + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/")) + .respond_with(ResponseTemplate::new(200).set_body_string("")) + .mount(&mock_server) + .await; + + let mut discovery = InitialPeerDiscovery::new().unwrap(); + discovery.endpoints = vec![mock_server.uri().parse().unwrap()]; + + let result = discovery.fetch_peers().await; + + assert!(matches!(result, Err(Error::NoPeersFound(_)))); + } + + #[tokio::test] + async fn test_whitespace_and_empty_lines() { + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/")) + .respond_with( + ResponseTemplate::new(200).set_body_string("\n \n/ip4/127.0.0.1/tcp/8080\n \n"), + ) + .mount(&mock_server) + .await; + + let mut discovery = InitialPeerDiscovery::new().unwrap(); + discovery.endpoints = vec![mock_server.uri().parse().unwrap()]; + + let peers = discovery.fetch_peers().await.unwrap(); + assert_eq!(peers.len(), 1); + + let addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); + assert_eq!(peers[0].addr, addr); + } + + #[tokio::test] + async fn test_default_endpoints() { + let discovery = InitialPeerDiscovery::new().unwrap(); + assert_eq!(discovery.endpoints.len(), 1); + assert_eq!( + discovery.endpoints[0], + "https://sn-testnet.s3.eu-west-2.amazonaws.com/network-contacts" + .parse() + .unwrap() + ); + } + + #[tokio::test] + async fn test_custom_endpoints() { + let endpoints = vec!["http://example.com".parse().unwrap()]; + let discovery = InitialPeerDiscovery::with_endpoints(endpoints.clone()).unwrap(); + assert_eq!(discovery.endpoints, endpoints); + } + + #[tokio::test] + async fn test_json_endpoints() { + let mock_server = MockServer::start().await; + + Mock::given(method("GET")) + .and(path("/")) + .respond_with(ResponseTemplate::new(200).set_body_string( + r#"{"peers": ["/ip4/127.0.0.1/tcp/8080", "/ip4/127.0.0.2/tcp/8080"]}"#, + )) + .mount(&mock_server) + .await; + + let mut discovery = InitialPeerDiscovery::new().unwrap(); + discovery.endpoints = vec![mock_server.uri().parse().unwrap()]; + + let peers = discovery.fetch_peers().await.unwrap(); + assert_eq!(peers.len(), 2); + + let addr1: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); + let addr2: Multiaddr = "/ip4/127.0.0.2/tcp/8080".parse().unwrap(); + assert!(peers.iter().any(|p| p.addr == addr1)); + assert!(peers.iter().any(|p| p.addr == addr2)); + } +} diff --git a/ant-bootstrap-cache/src/lib.rs b/ant-bootstrap-cache/src/lib.rs new file mode 100644 index 0000000000..839f6f54c9 --- /dev/null +++ b/ant-bootstrap-cache/src/lib.rs @@ -0,0 +1,312 @@ +// Copyright 2024 MaidSafe.net limited. +// +// This SAFE Network Software is licensed to you under The General Public License (GPL), version 3. +// Unless required by applicable law or agreed to in writing, the SAFE Network Software distributed +// under the GPL Licence is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. Please review the Licences for the specific language governing +// permissions and limitations relating to use of the SAFE Network Software. + +//! Bootstrap Cache for the Autonomous Network +//! +//! This crate provides a decentralized peer discovery and caching system for the Autonomi Network. +//! It implements a robust peer management system with the following features: +//! +//! - Decentralized Design: No dedicated bootstrap nodes required +//! - Cross-Platform Support: Works on Linux, macOS, and Windows +//! - Shared Cache: System-wide cache file accessible by both nodes and clients +//! - Concurrent Access: File locking for safe multi-process access +//! - Atomic Operations: Safe cache updates using atomic file operations +//! - Initial Peer Discovery: Fallback web endpoints for new/stale cache scenarios +//! +//! # Example +//! +//! ```no_run +//! use bootstrap_cache::{CacheStore, BootstrapConfig, PeersArgs}; +//! use url::Url; +//! +//! # async fn example() -> Result<(), Box> { +//! let config = BootstrapConfig::new().unwrap(); +//! let args = PeersArgs { +//! first: false, +//! peers: vec![], +//! network_contacts_url: Some(Url::parse("https://example.com/peers")?), +//! local: false, +//! }; +//! +//! let store = CacheStore::from_args(args, config).await?; +//! let peers = store.get_peers().await; +//! # Ok(()) +//! # } +//! ``` + +#[macro_use] +extern crate tracing; + +mod cache_store; +pub mod config; +mod error; +mod initial_peer_discovery; + +use libp2p::{multiaddr::Protocol, Multiaddr}; +use serde::{Deserialize, Serialize}; +use std::{fmt, time::SystemTime}; +use thiserror::Error; +use url::Url; + +pub use cache_store::CacheStore; +pub use config::BootstrapConfig; +pub use error::{Error, Result}; +pub use initial_peer_discovery::InitialPeerDiscovery; + +/// Structure representing a list of bootstrap endpoints +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct BootstrapEndpoints { + /// List of peer multiaddresses + pub peers: Vec, + /// Optional metadata about the endpoints + #[serde(default)] + pub metadata: EndpointMetadata, +} + +/// Metadata about bootstrap endpoints +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct EndpointMetadata { + /// When the endpoints were last updated + #[serde(default = "default_last_updated")] + pub last_updated: String, + /// Optional description of the endpoints + #[serde(default)] + pub description: String, +} + +fn default_last_updated() -> String { + chrono::Utc::now().to_rfc3339() +} + +impl Default for EndpointMetadata { + fn default() -> Self { + Self { + last_updated: default_last_updated(), + description: String::new(), + } + } +} + +/// A peer that can be used for bootstrapping into the network +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct BootstrapPeer { + /// The multiaddress of the peer + pub addr: Multiaddr, + /// The number of successful connections to this peer + pub success_count: u32, + /// The number of failed connection attempts to this peer + pub failure_count: u32, + /// The last time this peer was successfully contacted + pub last_seen: SystemTime, +} + +impl BootstrapPeer { + pub fn new(addr: Multiaddr) -> Self { + Self { + addr, + success_count: 0, + failure_count: 0, + last_seen: SystemTime::now(), + } + } + + pub fn update_status(&mut self, success: bool) { + if success { + self.success_count = self.success_count.saturating_add(1); + } else { + self.failure_count = self.failure_count.saturating_add(1); + } + self.last_seen = SystemTime::now(); + } + + pub fn is_reliable(&self) -> bool { + // A peer is considered reliable if it has more successes than failures + self.success_count >= self.failure_count + } + + /// If the peer has a old state, just update the difference in values + /// If the peer has no old state, add the values + pub fn sync(&mut self, old_shared_state: Option<&Self>, current_shared_state: &Self) { + if let Some(old_shared_state) = old_shared_state { + let success_difference = self + .success_count + .saturating_sub(old_shared_state.success_count); + + self.success_count = current_shared_state + .success_count + .saturating_add(success_difference); + + let failure_difference = self + .failure_count + .saturating_sub(old_shared_state.failure_count); + self.failure_count = current_shared_state + .failure_count + .saturating_add(failure_difference); + } else { + self.success_count = self + .success_count + .saturating_add(current_shared_state.success_count); + self.failure_count = self + .failure_count + .saturating_add(current_shared_state.failure_count); + } + self.last_seen = std::cmp::max(self.last_seen, current_shared_state.last_seen); + } +} + +impl fmt::Display for BootstrapPeer { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "BootstrapPeer {{ addr: {}, last_seen: {:?}, success: {}, failure: {} }}", + self.addr, self.last_seen, self.success_count, self.failure_count + ) + } +} + +/// Command line arguments for peer configuration +#[derive(Debug, Clone, Default)] +pub struct PeersArgs { + /// First node in the network + pub first: bool, + /// List of peer addresses + pub peers: Vec, + /// URL to fetch network contacts from + pub network_contacts_url: Option, + /// Use only local discovery (mDNS) + pub local: bool, +} + +impl CacheStore { + /// Create a new CacheStore from command line arguments + /// This also initializes the store with the provided peers + pub async fn from_args(args: PeersArgs, mut config: BootstrapConfig) -> Result { + if let Some(url) = &args.network_contacts_url { + config.endpoints.push(url.clone()); + } + + // If this is the first node, return empty store with no fallback + if args.first { + info!("First node in network, returning empty store"); + let store = Self::new_without_init(config).await?; + store.clear_peers_and_save().await?; + return Ok(store); + } + + // If local mode is enabled, return empty store (will use mDNS) + if args.local { + info!("Local mode enabled, using only local discovery. Cache writing is disabled"); + config.disable_cache_writing = true; + let store = Self::new_without_init(config).await?; + return Ok(store); + } + + // Create a new store but don't load from cache or fetch from endpoints yet + let store = Self::new_without_init(config).await?; + + // Add peers from environment variable if present + if let Ok(env_peers) = std::env::var("SAFE_PEERS") { + for peer_str in env_peers.split(',') { + if let Ok(peer) = peer_str.parse() { + if let Some(peer) = craft_valid_multiaddr(&peer) { + info!("Adding peer from environment: {}", peer); + store.add_peer(peer).await; + } else { + warn!("Invalid peer address format from environment: {}", peer); + } + } + } + } + + // Add peers from arguments if present + for peer in args.peers { + if let Some(peer) = craft_valid_multiaddr(&peer) { + info!("Adding peer from arguments: {}", peer); + store.add_peer(peer).await; + } else { + warn!("Invalid peer address format from arguments: {}", peer); + } + } + + // If we have a network contacts URL, fetch peers from there. + if let Some(url) = args.network_contacts_url { + info!("Fetching peers from network contacts URL: {}", url); + let peer_discovery = InitialPeerDiscovery::with_endpoints(vec![url])?; + let peers = peer_discovery.fetch_peers().await?; + for peer in peers { + store.add_peer(peer.addr).await; + } + } + + // If we have peers, update cache and return, else initialize from cache + if store.peer_count().await > 0 { + info!("Using provided peers and updating cache"); + store.sync_to_disk().await?; + } else { + store.init().await?; + } + + Ok(store) + } +} + +/// Craft a proper address to avoid any ill formed addresses +pub fn craft_valid_multiaddr(addr: &Multiaddr) -> Option { + let mut output_address = Multiaddr::empty(); + + let ip = addr + .iter() + .find(|protocol| matches!(protocol, Protocol::Ip4(_)))?; + output_address.push(ip); + + let udp = addr + .iter() + .find(|protocol| matches!(protocol, Protocol::Udp(_))); + let tcp = addr + .iter() + .find(|protocol| matches!(protocol, Protocol::Tcp(_))); + + // UDP or TCP + if let Some(udp) = udp { + output_address.push(udp); + if let Some(quic) = addr + .iter() + .find(|protocol| matches!(protocol, Protocol::QuicV1)) + { + output_address.push(quic); + } + } else if let Some(tcp) = tcp { + output_address.push(tcp); + + if let Some(ws) = addr + .iter() + .find(|protocol| matches!(protocol, Protocol::Ws(_))) + { + output_address.push(ws); + } + } else { + return None; + } + + if let Some(peer_id) = addr + .iter() + .find(|protocol| matches!(protocol, Protocol::P2p(_))) + { + output_address.push(peer_id); + } + + Some(output_address) +} + +pub fn craft_valid_multiaddr_from_str(addr_str: &str) -> Option { + let Ok(addr) = addr_str.parse::() else { + warn!("Failed to parse multiaddr from str {addr_str}"); + return None; + }; + craft_valid_multiaddr(&addr) +} diff --git a/bootstrap_cache/tests/address_format_tests.rs b/ant-bootstrap-cache/tests/address_format_tests.rs similarity index 76% rename from bootstrap_cache/tests/address_format_tests.rs rename to ant-bootstrap-cache/tests/address_format_tests.rs index 79b6abc899..00716861f1 100644 --- a/bootstrap_cache/tests/address_format_tests.rs +++ b/ant-bootstrap-cache/tests/address_format_tests.rs @@ -6,9 +6,9 @@ // KIND, either express or implied. Please review the Licences for the specific language governing // permissions and limitations relating to use of the SAFE Network Software. -use bootstrap_cache::{BootstrapConfig, CacheStore, PeersArgs}; +use ant_bootstrap_cache::{BootstrapConfig, CacheStore, PeersArgs}; use libp2p::{multiaddr::Protocol, Multiaddr}; -use std::{net::SocketAddrV4, time::Duration}; +use std::net::SocketAddrV4; use tempfile::TempDir; use wiremock::{ matchers::{method, path}, @@ -26,16 +26,14 @@ fn init_logging() { async fn setup() -> (TempDir, BootstrapConfig) { let temp_dir = TempDir::new().unwrap(); let cache_path = temp_dir.path().join("cache.json"); - + let config = BootstrapConfig { cache_file_path: cache_path, - endpoints: vec![], // Empty endpoints to avoid fetching from network + endpoints: vec![], // Empty endpoints to avoid fetching from network max_peers: 50, - max_retries: 3, - request_timeout: Duration::from_secs(10), - update_interval: Duration::from_secs(300), + disable_cache_writing: false, }; - + (temp_dir, config) } @@ -56,7 +54,6 @@ async fn test_ipv4_socket_address_parsing() -> Result<(), Box Result<(), Box()?; let args = PeersArgs { first: false, peers: vec![addr.clone()], network_contacts_url: None, local: false, - test_network: true, // Use test network mode to avoid fetching from default endpoints }; let store = CacheStore::from_args(args, config).await?; @@ -122,12 +118,15 @@ async fn test_network_contacts_format() -> Result<(), Box peers: vec![], network_contacts_url: Some(format!("{}/peers", mock_server.uri()).parse()?), local: false, - test_network: false, // Allow fetching from network contacts }; let store = CacheStore::from_args(args, config).await?; let peers = store.get_peers().await; - assert_eq!(peers.len(), 2, "Should have two peers from network contacts"); + assert_eq!( + peers.len(), + 2, + "Should have two peers from network contacts" + ); // Verify address formats for peer in peers { @@ -148,19 +147,18 @@ async fn test_invalid_address_handling() -> Result<(), Box Result<(), Box Result<(), Box> { first: false, peers: vec![], network_contacts_url: None, - local: true, // Use local mode to avoid getting peers from default endpoints - test_network: false, + local: true, // Use local mode to avoid getting peers from default endpoints }; - let config = BootstrapConfig { - cache_file_path: cache_path, - ..Default::default() - }; + let config = BootstrapConfig::empty().with_cache_path(&cache_path); let store = CacheStore::from_args(args, config).await?; let peers = store.get_peers().await; @@ -231,14 +224,10 @@ async fn test_multiaddr_format() -> Result<(), Box> { first: false, peers: vec![], network_contacts_url: None, - local: true, // Use local mode to avoid getting peers from default endpoints - test_network: false, + local: true, // Use local mode to avoid getting peers from default endpoints }; - let config = BootstrapConfig { - cache_file_path: cache_path, - ..Default::default() - }; + let config = BootstrapConfig::empty().with_cache_path(&cache_path); let store = CacheStore::from_args(args, config).await?; let peers = store.get_peers().await; @@ -257,14 +246,10 @@ async fn test_invalid_addr_format() -> Result<(), Box> { first: false, peers: vec![], network_contacts_url: None, - local: true, // Use local mode to avoid getting peers from default endpoints - test_network: false, + local: true, // Use local mode to avoid getting peers from default endpoints }; - let config = BootstrapConfig { - cache_file_path: cache_path, - ..Default::default() - }; + let config = BootstrapConfig::empty().with_cache_path(&cache_path); let store = CacheStore::from_args(args, config).await?; let peers = store.get_peers().await; @@ -283,14 +268,10 @@ async fn test_mixed_addr_formats() -> Result<(), Box> { first: false, peers: vec![], network_contacts_url: None, - local: true, // Use local mode to avoid getting peers from default endpoints - test_network: false, + local: true, // Use local mode to avoid getting peers from default endpoints }; - let config = BootstrapConfig { - cache_file_path: cache_path, - ..Default::default() - }; + let config = BootstrapConfig::empty().with_cache_path(&cache_path); let store = CacheStore::from_args(args, config).await?; let peers = store.get_peers().await; @@ -309,14 +290,10 @@ async fn test_socket_addr_conversion() -> Result<(), Box> first: false, peers: vec![], network_contacts_url: None, - local: true, // Use local mode to avoid getting peers from default endpoints - test_network: false, + local: true, // Use local mode to avoid getting peers from default endpoints }; - let config = BootstrapConfig { - cache_file_path: cache_path, - ..Default::default() - }; + let config = BootstrapConfig::empty().with_cache_path(&cache_path); let store = CacheStore::from_args(args, config).await?; let peers = store.get_peers().await; @@ -335,14 +312,10 @@ async fn test_invalid_socket_addr() -> Result<(), Box> { first: false, peers: vec![], network_contacts_url: None, - local: true, // Use local mode to avoid getting peers from default endpoints - test_network: false, + local: true, // Use local mode to avoid getting peers from default endpoints }; - let config = BootstrapConfig { - cache_file_path: cache_path, - ..Default::default() - }; + let config = BootstrapConfig::empty().with_cache_path(&cache_path); let store = CacheStore::from_args(args, config).await?; let peers = store.get_peers().await; @@ -361,14 +334,10 @@ async fn test_invalid_multiaddr() -> Result<(), Box> { first: false, peers: vec![], network_contacts_url: None, - local: true, // Use local mode to avoid getting peers from default endpoints - test_network: false, + local: true, // Use local mode to avoid getting peers from default endpoints }; - let config = BootstrapConfig { - cache_file_path: cache_path, - ..Default::default() - }; + let config = BootstrapConfig::empty().with_cache_path(&cache_path); let store = CacheStore::from_args(args, config).await?; let peers = store.get_peers().await; @@ -387,18 +356,14 @@ async fn test_mixed_valid_invalid_addrs() -> Result<(), Box Result<(), Box> let cache_path = temp_dir.path().join("cache.json"); // Create cache store with config - let config = BootstrapConfig { - cache_file_path: cache_path.clone(), - ..Default::default() - }; + let config = BootstrapConfig::empty().with_cache_path(&cache_path); + let cache_store = CacheStore::new(config).await?; // Test adding and retrieving peers let addr: Multiaddr = "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" .parse()?; - cache_store.add_peer(addr.clone()).await?; - cache_store - .update_peer_status(&addr.to_string(), true) - .await?; + cache_store.add_peer(addr.clone()).await; + cache_store.update_peer_status(&addr, true).await; let peers = cache_store.get_reliable_peers().await; assert!(!peers.is_empty(), "Cache should contain the added peer"); @@ -41,20 +45,17 @@ async fn test_cache_persistence() -> Result<(), Box> { let cache_path = temp_dir.path().join("cache.json"); // Create first cache store - let config = BootstrapConfig { - cache_file_path: cache_path.clone(), - ..Default::default() - }; + let config = BootstrapConfig::empty().with_cache_path(&cache_path); + let cache_store1 = CacheStore::new(config.clone()).await?; // Add a peer and mark it as reliable let addr: Multiaddr = "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" .parse()?; - cache_store1.add_peer(addr.clone()).await?; - cache_store1 - .update_peer_status(&addr.to_string(), true) - .await?; + cache_store1.add_peer(addr.clone()).await; + cache_store1.update_peer_status(&addr, true).await; + cache_store1.sync_to_disk().await.unwrap(); // Create a new cache store with the same path let cache_store2 = CacheStore::new(config).await?; @@ -74,22 +75,17 @@ async fn test_cache_reliability_tracking() -> Result<(), Box Result<(), Box Result<(), Box> { let cache_path = temp_dir.path().join("cache.json"); // Create cache with small max_peers limit - let config = BootstrapConfig { - cache_file_path: cache_path, - max_peers: 2, - ..Default::default() - }; + let mut config = BootstrapConfig::empty().with_cache_path(&cache_path); + config.max_peers = 2; + let cache_store = CacheStore::new(config).await?; // Add three peers with distinct timestamps @@ -136,7 +128,7 @@ async fn test_cache_max_peers() -> Result<(), Box> { for i in 1..=3 { let addr: Multiaddr = format!("/ip4/127.0.0.1/udp/808{}/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UER{}", i, i).parse()?; addresses.push(addr.clone()); - cache_store.add_peer(addr).await?; + cache_store.add_peer(addr).await; // Add a delay to ensure distinct timestamps sleep(Duration::from_millis(100)).await; } @@ -166,10 +158,7 @@ async fn test_cache_concurrent_access() -> Result<(), Box let temp_dir = TempDir::new()?; let cache_path = temp_dir.path().join("cache.json"); - let config = BootstrapConfig { - cache_file_path: cache_path, - ..Default::default() - }; + let config = BootstrapConfig::empty().with_cache_path(&cache_path); let cache_store = CacheStore::new(config).await?; let cache_store_clone = cache_store.clone(); @@ -181,9 +170,7 @@ async fn test_cache_concurrent_access() -> Result<(), Box // Spawn a task that adds peers let add_task = tokio::spawn(async move { for addr in addrs { - if let Err(e) = cache_store.add_peer(addr).await { - eprintln!("Error adding peer: {}", e); - } + cache_store.add_peer(addr).await; sleep(Duration::from_millis(10)).await; } }); @@ -208,28 +195,28 @@ async fn test_cache_file_corruption() -> Result<(), Box> let cache_path = temp_dir.path().join("cache.json"); // Create cache with some peers - let config = BootstrapConfig { - cache_file_path: cache_path.clone(), - ..Default::default() - }; - let cache_store = CacheStore::new(config.clone()).await?; + let config = BootstrapConfig::empty().with_cache_path(&cache_path); + + let cache_store = CacheStore::new_without_init(config.clone()).await?; // Add a peer let addr: Multiaddr = "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UER1" .parse()?; - cache_store.add_peer(addr.clone()).await?; + cache_store.add_peer(addr.clone()).await; + + assert_eq!(cache_store.get_peers().await.len(), 1); // Corrupt the cache file tokio::fs::write(&cache_path, "invalid json content").await?; // Create a new cache store - it should handle the corruption gracefully - let new_cache_store = CacheStore::new(config).await?; + let new_cache_store = CacheStore::new_without_init(config).await?; let peers = new_cache_store.get_peers().await; assert!(peers.is_empty(), "Cache should be empty after corruption"); // Should be able to add peers again - new_cache_store.add_peer(addr).await?; + new_cache_store.add_peer(addr).await; let peers = new_cache_store.get_peers().await; assert_eq!( peers.len(), diff --git a/bootstrap_cache/tests/cli_integration_tests.rs b/ant-bootstrap-cache/tests/cli_integration_tests.rs similarity index 57% rename from bootstrap_cache/tests/cli_integration_tests.rs rename to ant-bootstrap-cache/tests/cli_integration_tests.rs index 8b3937ee08..11868f6949 100644 --- a/bootstrap_cache/tests/cli_integration_tests.rs +++ b/ant-bootstrap-cache/tests/cli_integration_tests.rs @@ -6,7 +6,7 @@ // KIND, either express or implied. Please review the Licences for the specific language governing // permissions and limitations relating to use of the SAFE Network Software. -use bootstrap_cache::{BootstrapConfig, CacheStore, PeersArgs}; +use ant_bootstrap_cache::{BootstrapConfig, CacheStore, PeersArgs}; use libp2p::Multiaddr; use std::env; use std::fs; @@ -26,10 +26,8 @@ fn init_logging() { async fn setup() -> (TempDir, BootstrapConfig) { let temp_dir = TempDir::new().unwrap(); let cache_path = temp_dir.path().join("cache.json"); - let config = BootstrapConfig { - cache_file_path: cache_path, - ..Default::default() - }; + let config = BootstrapConfig::empty().with_cache_path(&cache_path); + (temp_dir, config) } @@ -43,7 +41,6 @@ async fn test_first_flag() -> Result<(), Box> { peers: vec![], network_contacts_url: None, local: false, - test_network: false, }; let store = CacheStore::from_args(args, config).await?; @@ -58,20 +55,24 @@ async fn test_peer_argument() -> Result<(), Box> { init_logging(); let (_temp_dir, config) = setup().await; - let peer_addr: Multiaddr = "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE".parse()?; - + let peer_addr: Multiaddr = + "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" + .parse()?; + let args = PeersArgs { first: false, peers: vec![peer_addr.clone()], network_contacts_url: None, local: false, - test_network: false, }; let store = CacheStore::from_args(args, config).await?; let peers = store.get_peers().await; assert_eq!(peers.len(), 1, "Should have one peer"); - assert_eq!(peers[0].addr, peer_addr, "Should have the correct peer address"); + assert_eq!( + peers[0].addr, peer_addr, + "Should have the correct peer address" + ); Ok(()) } @@ -83,7 +84,8 @@ async fn test_safe_peers_env() -> Result<(), Box> { let cache_path = temp_dir.path().join("cache.json"); // Set SAFE_PEERS environment variable - let peer_addr = "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE"; + let peer_addr = + "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE"; env::set_var("SAFE_PEERS", peer_addr); let args = PeersArgs { @@ -91,20 +93,16 @@ async fn test_safe_peers_env() -> Result<(), Box> { peers: vec![], network_contacts_url: None, local: false, - test_network: false, }; - let config = BootstrapConfig { - cache_file_path: cache_path, - ..Default::default() - }; + let config = BootstrapConfig::empty().with_cache_path(&cache_path); let store = CacheStore::from_args(args, config).await?; let peers = store.get_peers().await; - + // We should have multiple peers (env var + cache/endpoints) - assert!(peers.len() > 0, "Should have peers"); - + assert!(!peers.is_empty(), "Should have peers"); + // Verify that our env var peer is included in the set let has_env_peer = peers.iter().any(|p| p.addr.to_string() == peer_addr); assert!(has_env_peer, "Should include the peer from env var"); @@ -136,12 +134,15 @@ async fn test_network_contacts_fallback() -> Result<(), Box Result<(), Box> { let cache_path = temp_dir.path().join("cache.json"); // Create a config with some peers in the cache - let config = BootstrapConfig { - cache_file_path: cache_path.clone(), - ..Default::default() - }; + let config = BootstrapConfig::empty().with_cache_path(&cache_path); // Create args with local mode enabled let args = PeersArgs { @@ -164,7 +162,6 @@ async fn test_local_mode() -> Result<(), Box> { peers: vec![], network_contacts_url: None, local: true, - test_network: false, }; let store = CacheStore::from_args(args, config).await?; @@ -172,7 +169,10 @@ async fn test_local_mode() -> Result<(), Box> { assert!(peers.is_empty(), "Local mode should have no peers"); // Verify cache was not touched - assert!(!cache_path.exists(), "Cache file should not exist in local mode"); + assert!( + !cache_path.exists(), + "Cache file should not exist in local mode" + ); Ok(()) } @@ -183,28 +183,32 @@ async fn test_test_network_peers() -> Result<(), Box> { let temp_dir = TempDir::new()?; let cache_path = temp_dir.path().join("cache.json"); - let peer_addr: Multiaddr = "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE".parse()?; - - let config = BootstrapConfig { - cache_file_path: cache_path.clone(), - ..Default::default() - }; + let peer_addr: Multiaddr = + "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" + .parse()?; + + let config = BootstrapConfig::empty().with_cache_path(&cache_path); let args = PeersArgs { first: false, peers: vec![peer_addr.clone()], network_contacts_url: None, local: false, - test_network: true, }; let store = CacheStore::from_args(args, config).await?; let peers = store.get_peers().await; assert_eq!(peers.len(), 1, "Should have exactly one test network peer"); - assert_eq!(peers[0].addr, peer_addr, "Should have the correct test network peer"); + assert_eq!( + peers[0].addr, peer_addr, + "Should have the correct test network peer" + ); - // Verify cache was not updated - assert!(!cache_path.exists(), "Cache file should not exist for test network"); + // Verify cache was updated + assert!( + cache_path.exists(), + "Cache file should not exist for test network" + ); Ok(()) } @@ -216,12 +220,11 @@ async fn test_peers_update_cache() -> Result<(), Box> { let cache_path = temp_dir.path().join("cache.json"); // Create a peer address for testing - let peer_addr: Multiaddr = "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE".parse()?; - - let config = BootstrapConfig { - cache_file_path: cache_path.clone(), - ..Default::default() - }; + let peer_addr: Multiaddr = + "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" + .parse()?; + + let config = BootstrapConfig::empty().with_cache_path(&cache_path); // Create args with peers but no test network mode let args = PeersArgs { @@ -229,7 +232,6 @@ async fn test_peers_update_cache() -> Result<(), Box> { peers: vec![peer_addr.clone()], network_contacts_url: None, local: false, - test_network: false, }; let store = CacheStore::from_args(args, config).await?; @@ -240,73 +242,10 @@ async fn test_peers_update_cache() -> Result<(), Box> { // Verify cache was updated assert!(cache_path.exists(), "Cache file should exist"); let cache_contents = fs::read_to_string(&cache_path)?; - assert!(cache_contents.contains(&peer_addr.to_string()), "Cache should contain the peer address"); - - Ok(()) -} - -#[tokio::test] -async fn test_test_network_mode() -> Result<(), Box> { - init_logging(); - let temp_dir = TempDir::new()?; - let cache_path = temp_dir.path().join("cache.json"); - - // Create a peer address for testing - let peer_addr: Multiaddr = "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE".parse()?; - - let config = BootstrapConfig { - cache_file_path: cache_path.clone(), - ..Default::default() - }; - - // Create args with test network mode enabled - let args = PeersArgs { - first: false, - peers: vec![peer_addr.clone()], - network_contacts_url: None, - local: false, - test_network: true, - }; - - let store = CacheStore::from_args(args, config).await?; - let peers = store.get_peers().await; - assert_eq!(peers.len(), 1, "Should have one test network peer"); - assert_eq!(peers[0].addr, peer_addr, "Should have the correct test network peer"); - - // Verify cache was not touched - assert!(!cache_path.exists(), "Cache file should not exist for test network"); + assert!( + cache_contents.contains(&peer_addr.to_string()), + "Cache should contain the peer address" + ); Ok(()) } - -#[tokio::test] -async fn test_default_mode() -> Result<(), Box> { - init_logging(); - let temp_dir = TempDir::new()?; - let cache_path = temp_dir.path().join("cache.json"); - - // Create a store with some initial peers in the cache - let initial_config = BootstrapConfig { - cache_file_path: cache_path.clone(), - ..Default::default() - }; - let initial_store = CacheStore::new(initial_config).await?; - let cache_peer: Multiaddr = "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE".parse()?; - initial_store.add_peer(cache_peer.clone()).await?; - initial_store.save_cache().await?; - - // Create store in default mode (no special flags) - let args = PeersArgs::default(); - let config = BootstrapConfig { - cache_file_path: cache_path.clone(), - ..Default::default() - }; - - let store = CacheStore::from_args(args, config).await?; - let peers = store.get_peers().await; - - assert!(!peers.is_empty(), "Should have peers from cache"); - assert!(peers.iter().any(|p| p.addr == cache_peer), "Should have the cache peer"); - - Ok(()) -} \ No newline at end of file diff --git a/bootstrap_cache/tests/integration_tests.rs b/ant-bootstrap-cache/tests/integration_tests.rs similarity index 94% rename from bootstrap_cache/tests/integration_tests.rs rename to ant-bootstrap-cache/tests/integration_tests.rs index c85f0aba5a..b68dfa3e15 100644 --- a/bootstrap_cache/tests/integration_tests.rs +++ b/ant-bootstrap-cache/tests/integration_tests.rs @@ -6,9 +6,10 @@ // KIND, either express or implied. Please review the Licences for the specific language governing // permissions and limitations relating to use of the SAFE Network Software. -use bootstrap_cache::{BootstrapEndpoints, InitialPeerDiscovery}; +use ant_bootstrap_cache::{BootstrapEndpoints, InitialPeerDiscovery}; use libp2p::Multiaddr; use tracing_subscriber::{fmt, EnvFilter}; +use url::Url; use wiremock::{ matchers::{method, path}, Mock, MockServer, ResponseTemplate, @@ -24,7 +25,7 @@ fn init_logging() { #[tokio::test] async fn test_fetch_from_amazon_s3() { init_logging(); - let discovery = InitialPeerDiscovery::new(); + let discovery = InitialPeerDiscovery::new().unwrap(); let peers = discovery.fetch_peers().await.unwrap(); // We should get some peers @@ -59,8 +60,10 @@ async fn test_individual_s3_endpoints() { .mount(&mock_server) .await; - let endpoint = format!("{}/peers", mock_server.uri()); - let discovery = InitialPeerDiscovery::with_endpoints(vec![endpoint.clone()]); + let endpoint = format!("{}/peers", mock_server.uri()) + .parse::() + .unwrap(); + let discovery = InitialPeerDiscovery::with_endpoints(vec![endpoint.clone()]).unwrap(); match discovery.fetch_peers().await { Ok(peers) => { @@ -100,7 +103,7 @@ async fn test_individual_s3_endpoints() { #[tokio::test] async fn test_response_format() { init_logging(); - let discovery = InitialPeerDiscovery::new(); + let discovery = InitialPeerDiscovery::new().unwrap(); let peers = discovery.fetch_peers().await.unwrap(); // Get the first peer to check format @@ -151,8 +154,8 @@ async fn test_json_endpoint_format() { .mount(&mock_server) .await; - let endpoint = mock_server.uri().to_string(); - let discovery = InitialPeerDiscovery::with_endpoints(vec![endpoint.clone()]); + let endpoint = mock_server.uri().parse::().unwrap(); + let discovery = InitialPeerDiscovery::with_endpoints(vec![endpoint.clone()]).unwrap(); let peers = discovery.fetch_peers().await.unwrap(); assert_eq!(peers.len(), 2); diff --git a/ant-protocol/src/version.rs b/ant-protocol/src/version.rs index 2ead274254..6606e74be0 100644 --- a/ant-protocol/src/version.rs +++ b/ant-protocol/src/version.rs @@ -44,7 +44,7 @@ lazy_static! { // Protocol support shall be downward compatible for patch only version update. // i.e. versions of `A.B.X` or `A.B.X-alpha.Y` shall be considered as a same protocol of `A.B` -fn get_truncate_version_str() -> String { +pub fn get_truncate_version_str() -> String { let version_str = env!("CARGO_PKG_VERSION"); let parts = version_str.split('.').collect::>(); if parts.len() >= 2 { diff --git a/bootstrap_cache/README.md b/bootstrap_cache/README.md deleted file mode 100644 index d3ba4f18c7..0000000000 --- a/bootstrap_cache/README.md +++ /dev/null @@ -1,160 +0,0 @@ -# Bootstrap Cache - -A robust peer caching system for the Safe Network that provides persistent storage and management of network peer addresses. This crate handles peer discovery, caching, and reliability tracking with support for concurrent access across multiple processes. - -## Features - -### Storage and Accessibility -- System-wide accessible cache location -- Configurable primary cache location -- Automatic fallback to user's home directory (`~/.safe/bootstrap_cache.json`) -- Cross-process safe with file locking -- Atomic write operations to prevent cache corruption - -### Concurrent Access -- Thread-safe in-memory cache with `RwLock` -- File system level locking for cross-process synchronization -- Shared (read) and exclusive (write) lock support -- Exponential backoff retry mechanism for lock acquisition - -### Data Management -- Automatic cleanup of stale and unreliable peers -- Configurable maximum peer limit -- Peer reliability tracking (success/failure counts) -- Atomic file operations for data integrity - -## Configuration Options - -The `BootstrapConfig` struct provides the following configuration options: - -```rust -pub struct BootstrapConfig { - /// List of endpoints to fetch initial peers from - pub endpoints: Vec, - - /// Maximum number of peers to maintain in the cache - pub max_peers: usize, - - /// Path where the cache file will be stored - pub cache_file_path: PathBuf, - - /// How long to wait for peer responses - pub peer_response_timeout: Duration, - - /// Interval between connection attempts - pub connection_interval: Duration, - - /// Maximum number of connection retries - pub max_retries: u32, -} -``` - -### Option Details - -#### `endpoints` -- List of URLs to fetch initial peers from when cache is empty -- Example: `["https://sn-node1.s3.amazonaws.com/peers", "https://sn-node2.s3.amazonaws.com/peers"]` -- Default: Empty vector (no endpoints) - -#### `max_peers` -- Maximum number of peers to store in cache -- When exceeded, oldest peers are removed first -- Default: 1500 peers - -#### `cache_file_path` -- Location where the cache file will be stored -- Falls back to `~/.safe/bootstrap_cache.json` if primary location is not writable -- Example: `/var/lib/safe/bootstrap_cache.json` - -#### `peer_response_timeout` -- Maximum time to wait for a peer to respond -- Affects peer reliability scoring -- Default: 60 seconds - -#### `connection_interval` -- Time to wait between connection attempts -- Helps prevent network flooding -- Default: 10 seconds - -#### `max_retries` -- Maximum number of times to retry connecting to a peer -- Affects peer reliability scoring -- Default: 3 attempts - -## Usage Modes - -### Default Mode -```rust -let config = BootstrapConfig::default(); -let store = CacheStore::new(config).await?; -``` -- Uses default configuration -- Loads peers from cache if available -- Falls back to configured endpoints if cache is empty - -### Test Network Mode -```rust -let args = PeersArgs { - test_network: true, - peers: vec![/* test peers */], - ..Default::default() -}; -let store = CacheStore::from_args(args, config).await?; -``` -- Isolates from main network cache -- Only uses explicitly provided peers -- No cache persistence - -### Local Mode -```rust -let args = PeersArgs { - local: true, - ..Default::default() -}; -let store = CacheStore::from_args(args, config).await?; -``` -- Returns empty store -- Suitable for local network testing -- Uses mDNS for peer discovery - -### First Node Mode -```rust -let args = PeersArgs { - first: true, - ..Default::default() -}; -let store = CacheStore::from_args(args, config).await?; -``` -- Returns empty store -- No fallback to endpoints -- Used for network initialization - -## Error Handling - -The crate provides comprehensive error handling for: -- File system operations -- Network requests -- Concurrent access -- Data serialization/deserialization -- Lock acquisition - -All errors are propagated through the `Result` type with detailed error variants. - -## Thread Safety - -The cache store is thread-safe and can be safely shared between threads: -- `Clone` implementation for `CacheStore` -- Internal `Arc` for thread-safe data access -- File system locks for cross-process synchronization - -## Logging - -Comprehensive logging using the `tracing` crate: -- Info level for normal operations -- Warn level for recoverable issues -- Error level for critical failures -- Debug level for detailed diagnostics - -## License - -This SAFE Network Software is licensed under the General Public License (GPL), version 3 ([LICENSE](LICENSE) http://www.gnu.org/licenses/gpl-3.0.en.html). diff --git a/bootstrap_cache/src/cache.rs b/bootstrap_cache/src/cache.rs deleted file mode 100644 index 85b01ed5ee..0000000000 --- a/bootstrap_cache/src/cache.rs +++ /dev/null @@ -1,390 +0,0 @@ -// Copyright 2024 MaidSafe.net limited. -// -// This SAFE Network Software is licensed to you under The General Public License (GPL), version 3. -// Unless required by applicable law or agreed to in writing, the SAFE Network Software distributed -// under the GPL Licence is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. Please review the Licences for the specific language governing -// permissions and limitations relating to use of the SAFE Network Software. - -use crate::{BootstrapCache, Error}; -use fs2::FileExt; -use std::{ - fs::{self, File}, - io::{self, Read, Write}, - path::PathBuf, -}; -use tracing::{debug, error, info, warn}; - -/// Manages reading and writing of the bootstrap cache file -pub struct CacheManager { - cache_path: PathBuf, -} - -impl CacheManager { - /// Creates a new CacheManager instance - pub fn new() -> Result { - let cache_path = Self::get_cache_path()?; - Ok(Self { cache_path }) - } - - /// Returns the platform-specific cache file path - fn get_cache_path() -> io::Result { - let path = if cfg!(target_os = "macos") { - PathBuf::from("/Library/Application Support/Safe/bootstrap_cache.json") - } else if cfg!(target_os = "linux") { - PathBuf::from("/var/safe/bootstrap_cache.json") - } else if cfg!(target_os = "windows") { - PathBuf::from(r"C:\ProgramData\Safe\bootstrap_cache.json") - } else { - return Err(io::Error::new( - io::ErrorKind::Other, - "Unsupported operating system", - )); - }; - - // Try to create the directory structure - if let Some(parent) = path.parent() { - info!("Ensuring cache directory exists at: {:?}", parent); - match fs::create_dir_all(parent) { - Ok(_) => { - debug!("Successfully created/verified cache directory"); - // Try to set directory permissions to be user-writable - #[cfg(unix)] - { - use std::os::unix::fs::PermissionsExt; - if let Err(e) = fs::set_permissions(parent, fs::Permissions::from_mode(0o755)) { - warn!("Failed to set cache directory permissions: {}", e); - } - } - } - Err(e) => { - // If we can't create in system directory, fall back to user's home directory - warn!("Failed to create system cache directory: {}", e); - if let Some(home) = dirs::home_dir() { - let user_path = home.join(".safe").join("bootstrap_cache.json"); - info!("Falling back to user directory: {:?}", user_path); - if let Some(user_parent) = user_path.parent() { - fs::create_dir_all(user_parent)?; - } - return Ok(user_path); - } - } - } - } - Ok(path) - } - - /// Reads the cache file with file locking, handling potential corruption - pub fn read_cache(&self) -> Result { - debug!("Reading bootstrap cache from {:?}", self.cache_path); - - let mut file = match File::open(&self.cache_path) { - Ok(file) => file, - Err(e) if e.kind() == io::ErrorKind::NotFound => { - info!("Cache file not found, creating new empty cache"); - return Ok(BootstrapCache::new()); - } - Err(e) => { - error!("Failed to open cache file: {}", e); - return Err(e.into()); - } - }; - - // Acquire shared lock for reading - file.lock_shared().map_err(|e| { - error!("Failed to acquire shared lock: {}", e); - Error::LockError - })?; - - let mut contents = String::new(); - if let Err(e) = file.read_to_string(&mut contents) { - error!("Failed to read cache file: {}", e); - // Release lock before returning - let _ = file.unlock(); - return Err(Error::Io(e)); - } - - // Release lock - file.unlock().map_err(|e| { - error!("Failed to release lock: {}", e); - Error::LockError - })?; - - // Try to parse the cache, if it fails it might be corrupted - match serde_json::from_str(&contents) { - Ok(cache) => Ok(cache), - Err(e) => { - error!("Cache file appears to be corrupted: {}", e); - Err(Error::CacheCorrupted(e)) - } - } - } - - /// Rebuilds the cache using provided peers or fetches new ones if none provided - pub async fn rebuild_cache(&self, peers: Option>) -> Result { - info!("Rebuilding bootstrap cache"); - - let cache = if let Some(peers) = peers { - info!("Rebuilding cache with {} in-memory peers", peers.len()); - BootstrapCache { - last_updated: chrono::Utc::now(), - peers, - } - } else { - info!("No in-memory peers available, fetching from endpoints"); - let discovery = InitialPeerDiscovery::new(); - let peers = discovery.fetch_peers().await?; - BootstrapCache { - last_updated: chrono::Utc::now(), - peers, - } - }; - - // Write the rebuilt cache - self.write_cache(&cache)?; - Ok(cache) - } - - /// Writes the cache file with file locking and atomic replacement - pub fn write_cache(&self, cache: &BootstrapCache) -> Result<(), Error> { - debug!("Writing bootstrap cache to {:?}", self.cache_path); - - let temp_path = self.cache_path.with_extension("tmp"); - let mut file = File::create(&temp_path).map_err(|e| { - error!("Failed to create temporary cache file: {}", e); - Error::Io(e) - })?; - - // Acquire exclusive lock for writing - file.lock_exclusive().map_err(|e| { - error!("Failed to acquire exclusive lock: {}", e); - Error::LockError - })?; - - let contents = serde_json::to_string_pretty(cache).map_err(|e| { - error!("Failed to serialize cache: {}", e); - Error::Json(e) - })?; - - file.write_all(contents.as_bytes()).map_err(|e| { - error!("Failed to write cache file: {}", e); - Error::Io(e) - })?; - - file.sync_all().map_err(|e| { - error!("Failed to sync cache file: {}", e); - Error::Io(e) - })?; - - // Release lock - file.unlock().map_err(|e| { - error!("Failed to release lock: {}", e); - Error::LockError - })?; - - // Atomic rename - fs::rename(&temp_path, &self.cache_path).map_err(|e| { - error!("Failed to rename temporary cache file: {}", e); - Error::Io(e) - })?; - - info!("Successfully wrote cache file"); - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use chrono::Utc; - use std::fs::OpenOptions; - use tempfile::tempdir; - use tokio; - - #[test] - fn test_cache_read_write() { - let dir = tempdir().unwrap(); - let cache_path = dir.path().join("test_cache.json"); - - let cache = BootstrapCache { - last_updated: Utc::now(), - peers: vec![], - }; - - let manager = CacheManager { cache_path }; - manager.write_cache(&cache).unwrap(); - - let read_cache = manager.read_cache().unwrap(); - assert_eq!(cache.peers.len(), read_cache.peers.len()); - } - - #[test] - fn test_missing_cache_file() { - let dir = tempdir().unwrap(); - let cache_path = dir.path().join("nonexistent.json"); - - let manager = CacheManager { cache_path }; - let cache = manager.read_cache().unwrap(); - assert!(cache.peers.is_empty()); - } - - #[test] - fn test_corrupted_cache_file() { - let dir = tempdir().unwrap(); - let cache_path = dir.path().join("corrupted.json"); - - // Write corrupted JSON - let mut file = OpenOptions::new() - .write(true) - .create(true) - .open(&cache_path) - .unwrap(); - file.write_all(b"{invalid json}").unwrap(); - - let manager = CacheManager { cache_path }; - match manager.read_cache() { - Err(Error::CacheCorrupted(_)) => (), - other => panic!("Expected CacheCorrupted error, got {:?}", other), - } - } - - #[test] - fn test_partially_corrupted_cache() { - let dir = tempdir().unwrap(); - let cache_path = dir.path().join("partial_corrupt.json"); - - // Write partially valid JSON - let mut file = OpenOptions::new() - .write(true) - .create(true) - .open(&cache_path) - .unwrap(); - file.write_all(b"{\"last_updated\":\"2024-01-01T00:00:00Z\",\"peers\":[{}]}").unwrap(); - - let manager = CacheManager { cache_path }; - match manager.read_cache() { - Err(Error::CacheCorrupted(_)) => (), - other => panic!("Expected CacheCorrupted error, got {:?}", other), - } - } - - #[tokio::test] - async fn test_rebuild_cache_with_memory_peers() { - let dir = tempdir().unwrap(); - let cache_path = dir.path().join("rebuild.json"); - let manager = CacheManager { cache_path }; - - // Create some test peers - let test_peers = vec![ - BootstrapPeer { - addr: "/ip4/127.0.0.1/tcp/8080".parse().unwrap(), - success_count: 1, - failure_count: 0, - last_success: Some(Utc::now()), - last_failure: None, - } - ]; - - // Rebuild cache with in-memory peers - let rebuilt = manager.rebuild_cache(Some(test_peers.clone())).await.unwrap(); - assert_eq!(rebuilt.peers.len(), 1); - assert_eq!(rebuilt.peers[0].addr, test_peers[0].addr); - - // Verify the cache was written to disk - let read_cache = manager.read_cache().unwrap(); - assert_eq!(read_cache.peers.len(), 1); - assert_eq!(read_cache.peers[0].addr, test_peers[0].addr); - } - - #[tokio::test] - async fn test_rebuild_cache_from_endpoints() { - let dir = tempdir().unwrap(); - let cache_path = dir.path().join("rebuild_endpoints.json"); - let manager = CacheManager { cache_path }; - - // Write corrupted cache first - let mut file = OpenOptions::new() - .write(true) - .create(true) - .open(&cache_path) - .unwrap(); - file.write_all(b"{corrupted}").unwrap(); - - // Verify corrupted cache is detected - match manager.read_cache() { - Err(Error::CacheCorrupted(_)) => (), - other => panic!("Expected CacheCorrupted error, got {:?}", other), - } - - // Mock the InitialPeerDiscovery for testing - // Note: In a real implementation, you might want to use a trait for InitialPeerDiscovery - // and mock it properly. This test will actually try to fetch from real endpoints. - match manager.rebuild_cache(None).await { - Ok(cache) => { - // Verify the cache was rebuilt and written - let read_cache = manager.read_cache().unwrap(); - assert_eq!(read_cache.peers.len(), cache.peers.len()); - } - Err(Error::NoPeersFound(_)) => { - // This is also acceptable if no endpoints are reachable during test - () - } - Err(e) => panic!("Unexpected error: {:?}", e), - } - } - - #[test] - fn test_concurrent_cache_access() { - let dir = tempdir().unwrap(); - let cache_path = dir.path().join("concurrent.json"); - let manager = CacheManager { cache_path.clone() }; - - // Initial cache - let cache = BootstrapCache { - last_updated: Utc::now(), - peers: vec![], - }; - manager.write_cache(&cache).unwrap(); - - // Try to read while holding write lock - let file = OpenOptions::new() - .write(true) - .open(&cache_path) - .unwrap(); - file.lock_exclusive().unwrap(); - - // This should fail with a lock error - match manager.read_cache() { - Err(Error::LockError) => (), - other => panic!("Expected LockError, got {:?}", other), - } - - // Release lock - file.unlock().unwrap(); - } - - #[test] - fn test_cache_file_permissions() { - let dir = tempdir().unwrap(); - let cache_path = dir.path().join("permissions.json"); - let manager = CacheManager { cache_path: cache_path.clone() }; - - // Write initial cache - let cache = BootstrapCache { - last_updated: Utc::now(), - peers: vec![], - }; - manager.write_cache(&cache).unwrap(); - - // Make file read-only - let mut perms = fs::metadata(&cache_path).unwrap().permissions(); - perms.set_readonly(true); - fs::set_permissions(&cache_path, perms).unwrap(); - - // Try to write to read-only file - match manager.write_cache(&cache) { - Err(Error::Io(_)) => (), - other => panic!("Expected Io error, got {:?}", other), - } - } -} diff --git a/bootstrap_cache/src/cache_store.rs b/bootstrap_cache/src/cache_store.rs deleted file mode 100644 index 512fad8daf..0000000000 --- a/bootstrap_cache/src/cache_store.rs +++ /dev/null @@ -1,804 +0,0 @@ -// Copyright 2024 MaidSafe.net limited. -// -// This SAFE Network Software is licensed to you under The General Public License (GPL), version 3. -// Unless required by applicable law or agreed to in writing, the SAFE Network Software distributed -// under the GPL Licence is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. Please review the Licences for the specific language governing -// permissions and limitations relating to use of the SAFE Network Software. - -use crate::{BootstrapPeer, Error, InitialPeerDiscovery, Result}; -use fs2::FileExt; -use libp2p::Multiaddr; -use serde::{Deserialize, Serialize}; -use std::fs::{self, File, OpenOptions}; -use std::io::{self, Read}; -use std::path::PathBuf; -use std::sync::Arc; -use std::time::{Duration, SystemTime}; -use tempfile::NamedTempFile; -use tokio::sync::RwLock; -use tracing::{debug, info, warn}; - -const PEER_EXPIRY_DURATION: Duration = Duration::from_secs(24 * 60 * 60); // 24 hours - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct CacheData { - peers: std::collections::HashMap, - #[serde(default = "SystemTime::now")] - last_updated: SystemTime, - #[serde(default = "default_version")] - version: u32, -} - -fn default_version() -> u32 { - 1 -} - -impl Default for CacheData { - fn default() -> Self { - Self { - peers: std::collections::HashMap::new(), - last_updated: SystemTime::now(), - version: default_version(), - } - } -} - -#[derive(Clone)] -pub struct CacheStore { - cache_path: PathBuf, - config: Arc, - data: Arc>, -} - -impl CacheStore { - pub async fn new(config: crate::BootstrapConfig) -> Result { - tracing::info!("Creating new CacheStore with config: {:?}", config); - let cache_path = config.cache_file_path.clone(); - let config = Arc::new(config); - - // Create cache directory if it doesn't exist - if let Some(parent) = cache_path.parent() { - tracing::info!("Attempting to create cache directory at {:?}", parent); - // Try to create the directory - match fs::create_dir_all(parent) { - Ok(_) => { - tracing::info!("Successfully created cache directory"); - } - Err(e) => { - tracing::warn!("Failed to create cache directory at {:?}: {}", parent, e); - // Try user's home directory as fallback - if let Some(home) = dirs::home_dir() { - let user_path = home.join(".safe").join("bootstrap_cache.json"); - tracing::info!("Falling back to user directory: {:?}", user_path); - if let Some(user_parent) = user_path.parent() { - if let Err(e) = fs::create_dir_all(user_parent) { - tracing::error!("Failed to create user cache directory: {}", e); - return Err(Error::Io(e)); - } - tracing::info!("Successfully created user cache directory"); - } - let future = Self::new(crate::BootstrapConfig::with_cache_path(user_path)); - return Box::pin(future).await; - } - } - } - } - - let data = if cache_path.exists() { - tracing::info!("Cache file exists at {:?}, attempting to load", cache_path); - match Self::load_cache_data(&cache_path).await { - Ok(data) => { - tracing::info!("Successfully loaded cache data with {} peers", data.peers.len()); - // If cache data exists but has no peers and file is not read-only, - // fallback to default - let is_readonly = cache_path - .metadata() - .map(|m| m.permissions().readonly()) - .unwrap_or(false); - - if data.peers.is_empty() && !is_readonly { - tracing::info!("Cache is empty and not read-only, falling back to default"); - Self::fallback_to_default(&config).await? - } else { - // Ensure we don't exceed max_peers - let mut filtered_data = data; - if filtered_data.peers.len() > config.max_peers { - tracing::info!( - "Trimming cache from {} to {} peers", - filtered_data.peers.len(), - config.max_peers - ); - let peers: Vec<_> = filtered_data.peers.into_iter().collect(); - filtered_data.peers = peers - .into_iter() - .take(config.max_peers) - .collect(); - } - filtered_data - } - } - Err(e) => { - tracing::warn!("Failed to load cache data: {}", e); - // If we can't read or parse the cache file, return empty cache - CacheData::default() - } - } - } else { - tracing::info!("Cache file does not exist at {:?}, falling back to default", cache_path); - // If cache file doesn't exist, fallback to default - Self::fallback_to_default(&config).await? - }; - - let store = Self { - cache_path, - config, - data: Arc::new(RwLock::new(data)), - }; - - // Only clean up stale peers if the file is not read-only - let is_readonly = store - .cache_path - .metadata() - .map(|m| m.permissions().readonly()) - .unwrap_or(false); - - if !is_readonly { - if let Err(e) = store.cleanup_stale_peers().await { - tracing::warn!("Failed to clean up stale peers: {}", e); - } - } - - tracing::info!("Successfully created CacheStore"); - Ok(store) - } - - pub async fn new_without_init(config: crate::BootstrapConfig) -> Result { - tracing::info!("Creating new CacheStore with config: {:?}", config); - let cache_path = config.cache_file_path.clone(); - let config = Arc::new(config); - - // Create cache directory if it doesn't exist - if let Some(parent) = cache_path.parent() { - tracing::info!("Attempting to create cache directory at {:?}", parent); - // Try to create the directory - match fs::create_dir_all(parent) { - Ok(_) => { - tracing::info!("Successfully created cache directory"); - } - Err(e) => { - tracing::warn!("Failed to create cache directory at {:?}: {}", parent, e); - // Try user's home directory as fallback - if let Some(home) = dirs::home_dir() { - let user_path = home.join(".safe").join("bootstrap_cache.json"); - tracing::info!("Falling back to user directory: {:?}", user_path); - if let Some(user_parent) = user_path.parent() { - if let Err(e) = fs::create_dir_all(user_parent) { - tracing::error!("Failed to create user cache directory: {}", e); - return Err(Error::Io(e)); - } - tracing::info!("Successfully created user cache directory"); - } - let future = Self::new_without_init(crate::BootstrapConfig::with_cache_path(user_path)); - return Box::pin(future).await; - } - } - } - } - - let store = Self { - cache_path, - config, - data: Arc::new(RwLock::new(CacheData::default())), - }; - - tracing::info!("Successfully created CacheStore"); - Ok(store) - } - - pub async fn init(&self) -> Result<()> { - let mut data = if self.cache_path.exists() { - tracing::info!("Cache file exists at {:?}, attempting to load", self.cache_path); - match Self::load_cache_data(&self.cache_path).await { - Ok(data) => { - tracing::info!("Successfully loaded cache data with {} peers", data.peers.len()); - // If cache data exists but has no peers and file is not read-only, - // fallback to default - let is_readonly = self.cache_path - .metadata() - .map(|m| m.permissions().readonly()) - .unwrap_or(false); - - if data.peers.is_empty() && !is_readonly { - tracing::info!("Cache is empty and not read-only, falling back to default"); - Self::fallback_to_default(&self.config).await? - } else { - // Ensure we don't exceed max_peers - let mut filtered_data = data; - if filtered_data.peers.len() > self.config.max_peers { - tracing::info!( - "Trimming cache from {} to {} peers", - filtered_data.peers.len(), - self.config.max_peers - ); - let peers: Vec<_> = filtered_data.peers.into_iter().collect(); - filtered_data.peers = peers - .into_iter() - .take(self.config.max_peers) - .collect(); - } - filtered_data - } - } - Err(e) => { - tracing::warn!("Failed to load cache data: {}", e); - // If we can't read or parse the cache file, fallback to default - Self::fallback_to_default(&self.config).await? - } - } - } else { - tracing::info!("Cache file does not exist at {:?}, falling back to default", self.cache_path); - // If cache file doesn't exist, fallback to default - Self::fallback_to_default(&self.config).await? - }; - - // Only clean up stale peers if the file is not read-only - let is_readonly = self.cache_path - .metadata() - .map(|m| m.permissions().readonly()) - .unwrap_or(false); - - if !is_readonly { - // Clean up stale peers - let now = SystemTime::now(); - data.peers.retain(|_, peer| { - if let Ok(duration) = now.duration_since(peer.last_seen) { - duration < PEER_EXPIRY_DURATION - } else { - false - } - }); - } - - // Update the store's data - *self.data.write().await = data; - - Ok(()) - } - - async fn fallback_to_default(config: &crate::BootstrapConfig) -> Result { - tracing::info!("Falling back to default peers from endpoints"); - let mut data = CacheData { - peers: std::collections::HashMap::new(), - last_updated: SystemTime::now(), - version: default_version(), - }; - - // If no endpoints are configured, just return empty cache - if config.endpoints.is_empty() { - tracing::warn!("No endpoints configured, returning empty cache"); - return Ok(data); - } - - // Try to discover peers from configured endpoints - let discovery = InitialPeerDiscovery::with_endpoints(config.endpoints.clone()); - match discovery.fetch_peers().await { - Ok(peers) => { - tracing::info!("Successfully fetched {} peers from endpoints", peers.len()); - // Only add up to max_peers from the discovered peers - for peer in peers.into_iter().take(config.max_peers) { - data.peers.insert(peer.addr.to_string(), peer); - } - - // Create parent directory if it doesn't exist - if let Some(parent) = config.cache_file_path.parent() { - tracing::info!("Creating cache directory at {:?}", parent); - if let Err(e) = fs::create_dir_all(parent) { - tracing::warn!("Failed to create cache directory: {}", e); - } - } - - // Try to write the cache file immediately - match serde_json::to_string_pretty(&data) { - Ok(json) => { - tracing::info!("Writing {} peers to cache file", data.peers.len()); - if let Err(e) = fs::write(&config.cache_file_path, json) { - tracing::warn!("Failed to write cache file: {}", e); - } else { - tracing::info!("Successfully wrote cache file at {:?}", config.cache_file_path); - } - } - Err(e) => { - tracing::warn!("Failed to serialize cache data: {}", e); - } - } - - Ok(data) - } - Err(e) => { - tracing::warn!("Failed to fetch peers from endpoints: {}", e); - Ok(data) // Return empty cache on error - } - } - } - - async fn load_cache_data(cache_path: &PathBuf) -> Result { - // Try to open the file with read permissions - let mut file = match OpenOptions::new().read(true).open(cache_path) { - Ok(f) => f, - Err(e) => { - tracing::warn!("Failed to open cache file: {}", e); - return Err(Error::from(e)); - } - }; - - // Acquire shared lock for reading - if let Err(e) = Self::acquire_shared_lock(&file).await { - tracing::warn!("Failed to acquire shared lock: {}", e); - return Err(e); - } - - // Read the file contents - let mut contents = String::new(); - if let Err(e) = file.read_to_string(&mut contents) { - tracing::warn!("Failed to read cache file: {}", e); - return Err(Error::from(e)); - } - - // Parse the cache data - match serde_json::from_str::(&contents) { - Ok(data) => Ok(data), - Err(e) => { - tracing::warn!("Failed to parse cache data: {}", e); - Err(Error::Io(io::Error::new(io::ErrorKind::InvalidData, e))) - } - } - } - - pub async fn get_peers(&self) -> Vec { - let data = self.data.read().await; - data.peers.values().cloned().collect() - } - - pub async fn get_reliable_peers(&self) -> Vec { - let data = self.data.read().await; - let reliable_peers: Vec<_> = data - .peers - .values() - .filter(|peer| peer.success_count > peer.failure_count) - .cloned() - .collect(); - - // If we have no reliable peers and the cache file is not read-only, - // try to refresh from default endpoints - if reliable_peers.is_empty() - && !self - .cache_path - .metadata() - .map(|m| m.permissions().readonly()) - .unwrap_or(false) - { - drop(data); - if let Ok(new_data) = Self::fallback_to_default(&self.config).await { - let mut data = self.data.write().await; - *data = new_data; - return data - .peers - .values() - .filter(|peer| peer.success_count > peer.failure_count) - .cloned() - .collect(); - } - } - - reliable_peers - } - - pub async fn update_peer_status(&self, addr: &str, success: bool) -> Result<()> { - // Check if the file is read-only before attempting to modify - let is_readonly = self - .cache_path - .metadata() - .map(|m| m.permissions().readonly()) - .unwrap_or(false); - - if is_readonly { - tracing::warn!("Cannot update peer status: cache file is read-only"); - return Ok(()); - } - - let mut data = self.data.write().await; - - match addr.parse::() { - Ok(addr) => { - let peer = data - .peers - .entry(addr.to_string()) - .or_insert_with(|| BootstrapPeer::new(addr)); - peer.update_status(success); - self.save_to_disk(&data).await?; - Ok(()) - } - Err(e) => Err(Error::from(std::io::Error::new( - std::io::ErrorKind::InvalidInput, - format!("Invalid multiaddr: {}", e), - ))), - } - } - - pub async fn add_peer(&self, addr: Multiaddr) -> Result<()> { - let mut data = self.data.write().await; - let addr_str = addr.to_string(); - - // Check if we already have this peer - if data.peers.contains_key(&addr_str) { - debug!("Updating existing peer {}", addr_str); - if let Some(peer) = data.peers.get_mut(&addr_str) { - peer.last_seen = SystemTime::now(); - } - return Ok(()); - } - - // If we're at max peers, remove the oldest peer - if data.peers.len() >= self.config.max_peers { - debug!("At max peers limit ({}), removing oldest peer", self.config.max_peers); - if let Some((oldest_addr, _)) = data.peers - .iter() - .min_by_key(|(_, peer)| peer.last_seen) - { - let oldest_addr = oldest_addr.clone(); - data.peers.remove(&oldest_addr); - } - } - - // Add the new peer - debug!("Adding new peer {} (under max_peers limit)", addr_str); - data.peers.insert(addr_str, BootstrapPeer::new(addr)); - - // Only save to disk if we have a valid cache path - if !self.cache_path.as_os_str().is_empty() { - self.save_to_disk(&data).await?; - } - - Ok(()) - } - - pub async fn remove_peer(&self, addr: &str) -> Result<()> { - // Check if the file is read-only before attempting to modify - let is_readonly = self - .cache_path - .metadata() - .map(|m| m.permissions().readonly()) - .unwrap_or(false); - - if is_readonly { - tracing::warn!("Cannot remove peer: cache file is read-only"); - return Ok(()); - } - - let mut data = self.data.write().await; - data.peers.remove(addr); - self.save_to_disk(&data).await?; - Ok(()) - } - - pub async fn cleanup_unreliable_peers(&self) -> Result<()> { - // Check if the file is read-only before attempting to modify - let is_readonly = self - .cache_path - .metadata() - .map(|m| m.permissions().readonly()) - .unwrap_or(false); - - if is_readonly { - tracing::warn!("Cannot cleanup unreliable peers: cache file is read-only"); - return Ok(()); - } - - let mut data = self.data.write().await; - let unreliable_peers: Vec = data - .peers - .iter() - .filter(|(_, peer)| !peer.is_reliable()) - .map(|(addr, _)| addr.clone()) - .collect(); - - for addr in unreliable_peers { - data.peers.remove(&addr); - } - - self.save_to_disk(&data).await?; - Ok(()) - } - - pub async fn cleanup_stale_peers(&self) -> Result<()> { - // Check if the file is read-only before attempting to modify - let is_readonly = self - .cache_path - .metadata() - .map(|m| m.permissions().readonly()) - .unwrap_or(false); - - if is_readonly { - tracing::warn!("Cannot cleanup stale peers: cache file is read-only"); - return Ok(()); - } - - let mut data = self.data.write().await; - let stale_peers: Vec = data - .peers - .iter() - .filter(|(_, peer)| { - // Only remove peers that have failed more times than succeeded - peer.failure_count > peer.success_count && peer.failure_count >= self.config.max_retries - }) - .map(|(addr, _)| addr.clone()) - .collect(); - - for addr in stale_peers { - data.peers.remove(&addr); - } - - // Only save to disk if we have a valid cache path - if !self.cache_path.as_os_str().is_empty() { - self.save_to_disk(&data).await?; - } - - Ok(()) - } - - pub async fn save_to_disk(&self, data: &CacheData) -> Result<()> { - // Check if the file is read-only before attempting to write - let is_readonly = self - .cache_path - .metadata() - .map(|m| m.permissions().readonly()) - .unwrap_or(false); - - if is_readonly { - tracing::warn!("Cannot save to disk: cache file is read-only"); - return Ok(()); - } - - match self.atomic_write(data).await { - Ok(_) => Ok(()), - Err(e) => { - tracing::error!("Failed to save cache to disk: {}", e); - Err(e) - } - } - } - - async fn acquire_shared_lock(file: &File) -> Result<()> { - let file = file.try_clone().map_err(Error::from)?; - - tokio::task::spawn_blocking(move || file.try_lock_shared().map_err(Error::from)) - .await - .map_err(|e| { - Error::from(std::io::Error::new( - std::io::ErrorKind::Other, - format!("Failed to spawn blocking task: {}", e), - )) - })? - } - - async fn acquire_exclusive_lock(file: &File) -> Result<()> { - let mut backoff = Duration::from_millis(10); - let max_attempts = 5; - let mut attempts = 0; - - loop { - match file.try_lock_exclusive() { - Ok(_) => return Ok(()), - Err(_) if attempts >= max_attempts => { - return Err(Error::LockError); - } - Err(e) if e.kind() == io::ErrorKind::WouldBlock => { - attempts += 1; - tokio::time::sleep(backoff).await; - backoff *= 2; - } - Err(_) => return Err(Error::LockError), - } - } - } - - async fn atomic_write(&self, data: &CacheData) -> Result<()> { - // Create parent directory if it doesn't exist - if let Some(parent) = self.cache_path.parent() { - fs::create_dir_all(parent).map_err(Error::from)?; - } - - // Create a temporary file in the same directory as the cache file - let temp_file = NamedTempFile::new().map_err(Error::from)?; - - // Write data to temporary file - serde_json::to_writer_pretty(&temp_file, &data).map_err(Error::from)?; - - // Open the target file with proper permissions - let file = OpenOptions::new() - .write(true) - .create(true) - .truncate(true) - .open(&self.cache_path) - .map_err(Error::from)?; - - // Acquire exclusive lock - Self::acquire_exclusive_lock(&file).await?; - - // Perform atomic rename - temp_file.persist(&self.cache_path).map_err(|e| { - Error::from(std::io::Error::new( - std::io::ErrorKind::Other, - format!("Failed to persist cache file: {}", e), - )) - })?; - - // Lock will be automatically released when file is dropped - Ok(()) - } - - /// Clear all peers from the cache - pub async fn clear_peers(&self) -> Result<()> { - let mut data = self.data.write().await; - data.peers.clear(); - Ok(()) - } - - /// Save the current cache to disk - pub async fn save_cache(&self) -> Result<()> { - let data = self.data.read().await; - let temp_file = NamedTempFile::new()?; - let file = File::create(&temp_file)?; - file.lock_exclusive()?; - - serde_json::to_writer_pretty(&file, &*data)?; - file.sync_all()?; - file.unlock()?; - - // Atomically replace the cache file - temp_file.persist(&self.cache_path)?; - info!("Successfully wrote cache file at {:?}", self.cache_path); - - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use tempfile::tempdir; - - async fn create_test_store() -> (CacheStore, PathBuf) { - let temp_dir = tempdir().unwrap(); - let cache_file = temp_dir.path().join("cache.json"); - - let config = crate::BootstrapConfig::new( - vec![], // Empty endpoints to prevent fallback - 1500, - cache_file.clone(), - Duration::from_secs(60), - Duration::from_secs(10), - 3, - ); - - let store = CacheStore::new(config).await.unwrap(); - (store.clone(), store.cache_path.clone()) - } - - #[tokio::test] - async fn test_peer_update_and_save() { - let (store, _) = create_test_store().await; - let addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); - - // Manually add a peer without using fallback - { - let mut data = store.data.write().await; - data.peers - .insert(addr.to_string(), BootstrapPeer::new(addr.clone())); - store.save_to_disk(&data).await.unwrap(); - } - - store - .update_peer_status(&addr.to_string(), true) - .await - .unwrap(); - - let peers = store.get_peers().await; - assert_eq!(peers.len(), 1); - assert_eq!(peers[0].addr, addr); - assert_eq!(peers[0].success_count, 1); - assert_eq!(peers[0].failure_count, 0); - } - - #[tokio::test] - async fn test_peer_cleanup() { - let (store, _) = create_test_store().await; - let good_addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); - let bad_addr: Multiaddr = "/ip4/127.0.0.1/tcp/8081".parse().unwrap(); - - // Add peers - store.add_peer(good_addr.clone()).await.unwrap(); - store.add_peer(bad_addr.clone()).await.unwrap(); - - // Make one peer reliable and one unreliable - store - .update_peer_status(&good_addr.to_string(), true) - .await - .unwrap(); - - // Fail the bad peer more times than max_retries - for _ in 0..5 { - store - .update_peer_status(&bad_addr.to_string(), false) - .await - .unwrap(); - } - - // Clean up unreliable peers - store.cleanup_unreliable_peers().await.unwrap(); - - // Get all peers (not just reliable ones) - let peers = store.get_peers().await; - assert_eq!(peers.len(), 1); - assert_eq!(peers[0].addr, good_addr); - } - - #[tokio::test] - async fn test_peer_not_removed_if_successful() { - let (store, _) = create_test_store().await; - let addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); - - // Add a peer and make it successful - store.add_peer(addr.clone()).await.unwrap(); - store.update_peer_status(&addr.to_string(), true).await.unwrap(); - - // Wait a bit - tokio::time::sleep(Duration::from_millis(100)).await; - - // Run cleanup - store.cleanup_stale_peers().await.unwrap(); - - // Verify peer is still there - let peers = store.get_peers().await; - assert_eq!(peers.len(), 1); - assert_eq!(peers[0].addr, addr); - } - - #[tokio::test] - async fn test_peer_removed_only_when_unresponsive() { - let (store, _) = create_test_store().await; - let addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); - - // Add a peer - store.add_peer(addr.clone()).await.unwrap(); - - // Make it fail max_retries times - for _ in 0..store.config.max_retries { - store.update_peer_status(&addr.to_string(), false).await.unwrap(); - } - - // Run cleanup - store.cleanup_stale_peers().await.unwrap(); - - // Verify peer is removed - let peers = store.get_peers().await; - assert_eq!(peers.len(), 0, "Peer should be removed after max_retries failures"); - - // Test with some successes but more failures - store.add_peer(addr.clone()).await.unwrap(); - store.update_peer_status(&addr.to_string(), true).await.unwrap(); - store.update_peer_status(&addr.to_string(), true).await.unwrap(); - - for _ in 0..5 { - store.update_peer_status(&addr.to_string(), false).await.unwrap(); - } - - // Run cleanup - store.cleanup_stale_peers().await.unwrap(); - - // Verify peer is removed due to more failures than successes - let peers = store.get_peers().await; - assert_eq!(peers.len(), 0, "Peer should be removed when failures exceed successes"); - } -} diff --git a/bootstrap_cache/src/circuit_breaker.rs b/bootstrap_cache/src/circuit_breaker.rs deleted file mode 100644 index 2c19f94862..0000000000 --- a/bootstrap_cache/src/circuit_breaker.rs +++ /dev/null @@ -1,208 +0,0 @@ -use std::collections::HashMap; -use std::sync::Arc; -use std::time::{Duration, Instant}; -use tokio::sync::RwLock; - -#[derive(Debug, Clone)] -pub struct CircuitBreakerConfig { - max_failures: u32, - reset_timeout: Duration, - min_backoff: Duration, - max_backoff: Duration, -} - -impl Default for CircuitBreakerConfig { - fn default() -> Self { - Self { - max_failures: 5, - reset_timeout: Duration::from_secs(60), - min_backoff: Duration::from_millis(500), - max_backoff: Duration::from_secs(30), - } - } -} - -#[derive(Debug)] -struct EndpointState { - failures: u32, - last_failure: Instant, - last_attempt: Instant, - backoff_duration: Duration, -} - -impl EndpointState { - fn new(min_backoff: Duration) -> Self { - Self { - failures: 0, - last_failure: Instant::now(), - last_attempt: Instant::now(), - backoff_duration: min_backoff, - } - } - - fn record_failure(&mut self, max_backoff: Duration) { - self.failures += 1; - self.last_failure = Instant::now(); - self.last_attempt = Instant::now(); - // Exponential backoff with max limit - self.backoff_duration = std::cmp::min(self.backoff_duration * 2, max_backoff); - } - - fn record_success(&mut self, min_backoff: Duration) { - self.failures = 0; - self.backoff_duration = min_backoff; - } - - fn is_open(&self, max_failures: u32, reset_timeout: Duration) -> bool { - if self.failures >= max_failures { - // Check if we've waited long enough since the last failure - if self.last_failure.elapsed() > reset_timeout { - false // Circuit is half-open, allow retry - } else { - true // Circuit is open, block requests - } - } else { - false // Circuit is closed, allow requests - } - } - - fn should_retry(&self) -> bool { - self.last_attempt.elapsed() >= self.backoff_duration - } -} - -#[derive(Debug, Clone)] -pub struct CircuitBreaker { - states: Arc>>, - config: CircuitBreakerConfig, -} - -impl CircuitBreaker { - pub fn new() -> Self { - Self { - states: Arc::new(RwLock::new(HashMap::new())), - config: CircuitBreakerConfig::default(), - } - } - - pub fn with_config(config: CircuitBreakerConfig) -> Self { - Self { - states: Arc::new(RwLock::new(HashMap::new())), - config, - } - } - - pub async fn check_endpoint(&self, endpoint: &str) -> bool { - let mut states = self.states.write().await; - let state = states - .entry(endpoint.to_string()) - .or_insert_with(|| EndpointState::new(self.config.min_backoff)); - - !(state.is_open(self.config.max_failures, self.config.reset_timeout) && !state.should_retry()) - } - - pub async fn record_success(&self, endpoint: &str) { - let mut states = self.states.write().await; - if let Some(state) = states.get_mut(endpoint) { - state.record_success(self.config.min_backoff); - } - } - - pub async fn record_failure(&self, endpoint: &str) { - let mut states = self.states.write().await; - let state = states - .entry(endpoint.to_string()) - .or_insert_with(|| EndpointState::new(self.config.min_backoff)); - state.record_failure(self.config.max_backoff); - } - - pub async fn get_backoff_duration(&self, endpoint: &str) -> Duration { - let states = self.states.read().await; - states - .get(endpoint) - .map(|state| state.backoff_duration) - .unwrap_or(self.config.min_backoff) - } -} - -#[cfg(test)] -mod tests { - use super::*; - use tokio::time::sleep; - - fn test_config() -> CircuitBreakerConfig { - CircuitBreakerConfig { - max_failures: 3, - reset_timeout: Duration::from_millis(100), // Much shorter for testing - min_backoff: Duration::from_millis(10), - max_backoff: Duration::from_millis(100), - } - } - - #[tokio::test] - async fn test_circuit_breaker_basic() { - let cb = CircuitBreaker::with_config(test_config()); - let endpoint = "http://test.endpoint"; - - // Initially should allow requests - assert!(cb.check_endpoint(endpoint).await); - - // Record failures - for _ in 0..test_config().max_failures { - cb.record_failure(endpoint).await; - } - - // Circuit should be open - assert!(!cb.check_endpoint(endpoint).await); - - // Record success should reset - cb.record_success(endpoint).await; - assert!(cb.check_endpoint(endpoint).await); - } - - #[tokio::test] - async fn test_backoff_duration() { - let config = test_config(); - let cb = CircuitBreaker::with_config(config.clone()); - let endpoint = "http://test.endpoint"; - - assert_eq!(cb.get_backoff_duration(endpoint).await, config.min_backoff); - - // Record a failure - cb.record_failure(endpoint).await; - assert_eq!( - cb.get_backoff_duration(endpoint).await, - config.min_backoff * 2 - ); - - // Record another failure - cb.record_failure(endpoint).await; - assert_eq!( - cb.get_backoff_duration(endpoint).await, - config.min_backoff * 4 - ); - - // Success should reset backoff - cb.record_success(endpoint).await; - assert_eq!(cb.get_backoff_duration(endpoint).await, config.min_backoff); - } - - #[tokio::test] - async fn test_circuit_half_open() { - let config = test_config(); - let cb = CircuitBreaker::with_config(config.clone()); - let endpoint = "http://test.endpoint"; - - // Open the circuit - for _ in 0..config.max_failures { - cb.record_failure(endpoint).await; - } - assert!(!cb.check_endpoint(endpoint).await); - - // Wait for reset timeout - sleep(config.reset_timeout + Duration::from_millis(10)).await; - - // Circuit should be half-open now - assert!(cb.check_endpoint(endpoint).await); - } -} diff --git a/bootstrap_cache/src/config.rs b/bootstrap_cache/src/config.rs deleted file mode 100644 index 17d3f6a377..0000000000 --- a/bootstrap_cache/src/config.rs +++ /dev/null @@ -1,285 +0,0 @@ -// Copyright 2024 MaidSafe.net limited. -// -// This SAFE Network Software is licensed to you under The General Public License (GPL), version 3. -// Unless required by applicable law or agreed to in writing, the SAFE Network Software distributed -// under the GPL Licence is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. Please review the Licences for the specific language governing -// permissions and limitations relating to use of the SAFE Network Software. - -use std::path::{Path, PathBuf}; -use std::time::Duration; -use std::fs; - -/// Configuration for the bootstrap cache -#[derive(Clone, Debug)] -pub struct BootstrapConfig { - /// List of bootstrap endpoints to fetch peer information from - pub endpoints: Vec, - /// Maximum number of peers to keep in the cache - pub max_peers: usize, - /// Path to the bootstrap cache file - pub cache_file_path: PathBuf, - /// How often to update the cache (in seconds) - pub update_interval: Duration, - /// Request timeout for endpoint queries - pub request_timeout: Duration, - /// Maximum retries per endpoint - pub max_retries: u32, -} - -impl Default for BootstrapConfig { - fn default() -> Self { - Self { - endpoints: vec![ - "https://sn-testnet.s3.eu-west-2.amazonaws.com/bootstrap_cache.json".to_string(), - "https://sn-testnet.s3.eu-west-2.amazonaws.com/network-contacts".to_string(), - "https://sn-node1.s3.eu-west-2.amazonaws.com/peers".to_string(), - "https://sn-node2.s3.eu-west-2.amazonaws.com/peers".to_string(), - ], - max_peers: 1500, - cache_file_path: default_cache_path(), - update_interval: Duration::from_secs(60), - request_timeout: Duration::from_secs(10), - max_retries: 3, - } - } -} - -impl BootstrapConfig { - /// Creates a new BootstrapConfig with custom endpoints - pub fn with_endpoints(endpoints: Vec) -> Self { - Self { - endpoints, - ..Default::default() - } - } - - /// Creates a new BootstrapConfig with a custom cache file path - pub fn with_cache_path>(path: P) -> Self { - Self { - cache_file_path: path.as_ref().to_path_buf(), - ..Default::default() - } - } - - /// Creates a new BootstrapConfig with custom settings - pub fn new( - endpoints: Vec, - max_peers: usize, - cache_file_path: PathBuf, - update_interval: Duration, - request_timeout: Duration, - max_retries: u32, - ) -> Self { - Self { - endpoints, - max_peers, - cache_file_path, - update_interval, - request_timeout, - max_retries, - } - } -} - -/// Returns the default path for the bootstrap cache file -fn default_cache_path() -> PathBuf { - tracing::info!("Determining default cache path"); - let system_path = if cfg!(target_os = "macos") { - tracing::debug!("OS: macOS"); - // Try user's Library first, then fall back to system Library - if let Some(home) = dirs::home_dir() { - let user_library = home.join("Library/Application Support/Safe/bootstrap_cache.json"); - tracing::info!("Attempting to use user's Library path: {:?}", user_library); - if let Some(parent) = user_library.parent() { - tracing::debug!("Creating directory: {:?}", parent); - match fs::create_dir_all(parent) { - Ok(_) => { - tracing::debug!("Successfully created directory structure"); - // Check if we can write to the directory - match tempfile::NamedTempFile::new_in(parent) { - Ok(temp_file) => { - temp_file.close().ok(); - tracing::info!("Successfully verified write access to {:?}", parent); - return user_library; - } - Err(e) => { - tracing::warn!("Cannot write to user's Library: {}", e); - } - } - } - Err(e) => { - tracing::warn!("Failed to create user's Library directory: {}", e); - } - } - } - } - // Fall back to system Library - tracing::info!("Falling back to system Library path"); - PathBuf::from("/Library/Application Support/Safe/bootstrap_cache.json") - } else if cfg!(target_os = "linux") { - tracing::debug!("OS: Linux"); - // On Linux, try /var/lib/safe first, then fall back to /var/safe - let primary_path = PathBuf::from("/var/lib/safe/bootstrap_cache.json"); - tracing::info!("Attempting to use primary Linux path: {:?}", primary_path); - if let Some(parent) = primary_path.parent() { - tracing::debug!("Creating directory: {:?}", parent); - match fs::create_dir_all(parent) { - Ok(_) => { - tracing::debug!("Successfully created directory structure"); - // Check if we can write to the directory - match tempfile::NamedTempFile::new_in(parent) { - Ok(temp_file) => { - temp_file.close().ok(); - tracing::info!("Successfully verified write access to {:?}", parent); - return primary_path; - } - Err(e) => { - tracing::warn!("Cannot write to {:?}: {}", parent, e); - } - } - } - Err(e) => { - tracing::warn!("Failed to create Linux primary directory: {}", e); - } - } - } - tracing::info!("Falling back to secondary Linux path: /var/safe"); - PathBuf::from("/var/safe/bootstrap_cache.json") - } else if cfg!(target_os = "windows") { - tracing::debug!("OS: Windows"); - // On Windows, try LocalAppData first, then fall back to ProgramData - if let Some(local_app_data) = dirs::data_local_dir() { - let local_path = local_app_data.join("Safe").join("bootstrap_cache.json"); - tracing::info!("Attempting to use Windows LocalAppData path: {:?}", local_path); - if let Some(parent) = local_path.parent() { - tracing::debug!("Creating directory: {:?}", parent); - if fs::create_dir_all(parent).is_ok() { - // Check if we can write to the directory - if let Ok(temp_file) = tempfile::NamedTempFile::new_in(parent) { - temp_file.close().ok(); - tracing::info!("Successfully created and verified Windows LocalAppData path"); - return local_path; - } - } - } - } - tracing::info!("Falling back to Windows ProgramData path"); - PathBuf::from(r"C:\ProgramData\Safe\bootstrap_cache.json") - } else { - tracing::debug!("Unknown OS, using current directory"); - PathBuf::from("bootstrap_cache.json") - }; - - // Try to create the system directory first - if let Some(parent) = system_path.parent() { - tracing::debug!("Attempting to create system directory: {:?}", parent); - if fs::create_dir_all(parent).is_ok() { - // Check if we can write to the directory - match tempfile::NamedTempFile::new_in(parent) { - Ok(temp_file) => { - temp_file.close().ok(); - #[cfg(unix)] - { - use std::os::unix::fs::PermissionsExt; - match fs::set_permissions(parent, fs::Permissions::from_mode(0o755)) { - Ok(_) => tracing::debug!("Successfully set directory permissions"), - Err(e) => tracing::warn!("Failed to set cache directory permissions: {}", e), - } - } - tracing::info!("Successfully created and verified system directory"); - return system_path; - } - Err(e) => { - tracing::warn!("Cannot write to system directory: {}", e); - } - } - } else { - tracing::warn!("Failed to create system directory"); - } - } - - // If system directory is not writable, fall back to user's home directory - if let Some(home) = dirs::home_dir() { - let user_path = home.join(".safe").join("bootstrap_cache.json"); - tracing::info!("Attempting to use home directory fallback: {:?}", user_path); - if let Some(parent) = user_path.parent() { - tracing::debug!("Creating home directory: {:?}", parent); - if fs::create_dir_all(parent).is_ok() { - tracing::info!("Successfully created home directory"); - return user_path; - } - } - } - - // Last resort: use current directory - tracing::warn!("All directory attempts failed, using current directory"); - PathBuf::from("bootstrap_cache.json") -} - -#[cfg(test)] -mod tests { - use super::*; - use std::time::Duration; - - #[test] - fn test_default_config() { - let config = BootstrapConfig::default(); - assert_eq!(config.endpoints.len(), 4); - assert_eq!( - config.endpoints[0], - "https://sn-testnet.s3.eu-west-2.amazonaws.com/bootstrap_cache.json" - ); - assert_eq!( - config.endpoints[1], - "https://sn-testnet.s3.eu-west-2.amazonaws.com/network-contacts" - ); - assert_eq!( - config.endpoints[2], - "https://sn-node1.s3.eu-west-2.amazonaws.com/peers" - ); - assert_eq!( - config.endpoints[3], - "https://sn-node2.s3.eu-west-2.amazonaws.com/peers" - ); - assert_eq!(config.max_peers, 1500); - assert_eq!(config.update_interval, Duration::from_secs(60)); - assert_eq!(config.request_timeout, Duration::from_secs(10)); - assert_eq!(config.max_retries, 3); - } - - #[test] - fn test_custom_endpoints() { - let endpoints = vec!["http://custom.endpoint/cache".to_string()]; - let config = BootstrapConfig::with_endpoints(endpoints.clone()); - assert_eq!(config.endpoints, endpoints); - } - - #[test] - fn test_custom_cache_path() { - let path = PathBuf::from("/custom/path/cache.json"); - let config = BootstrapConfig::with_cache_path(&path); - assert_eq!(config.cache_file_path, path); - } - - #[test] - fn test_new_config() { - let endpoints = vec!["http://custom.endpoint/cache".to_string()]; - let path = PathBuf::from("/custom/path/cache.json"); - let config = BootstrapConfig::new( - endpoints.clone(), - 2000, - path.clone(), - Duration::from_secs(120), - Duration::from_secs(5), - 5, - ); - - assert_eq!(config.endpoints, endpoints); - assert_eq!(config.max_peers, 2000); - assert_eq!(config.cache_file_path, path); - assert_eq!(config.update_interval, Duration::from_secs(120)); - assert_eq!(config.request_timeout, Duration::from_secs(5)); - assert_eq!(config.max_retries, 5); - } -} diff --git a/bootstrap_cache/src/initial_peer_discovery.rs b/bootstrap_cache/src/initial_peer_discovery.rs deleted file mode 100644 index da1441b161..0000000000 --- a/bootstrap_cache/src/initial_peer_discovery.rs +++ /dev/null @@ -1,424 +0,0 @@ -// Copyright 2024 MaidSafe.net limited. -// -// This SAFE Network Software is licensed to you under The General Public License (GPL), version 3. -// Unless required by applicable law or agreed to in writing, the SAFE Network Software distributed -// under the GPL Licence is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. Please review the Licences for the specific language governing -// permissions and limitations relating to use of the SAFE Network Software. - -use crate::{ - circuit_breaker::{CircuitBreaker, CircuitBreakerConfig}, - BootstrapEndpoints, BootstrapPeer, Error, Result, -}; -use libp2p::Multiaddr; -use reqwest::Client; -use tokio::time::timeout; -use tracing::{info, warn}; - -const DEFAULT_JSON_ENDPOINT: &str = - "https://sn-testnet.s3.eu-west-2.amazonaws.com/network-contacts"; - -const DEFAULT_BOOTSTRAP_ENDPOINTS: &[&str] = &[ - DEFAULT_JSON_ENDPOINT, -]; - -const FETCH_TIMEOUT_SECS: u64 = 30; - -/// Discovers initial peers from a list of endpoints -pub struct InitialPeerDiscovery { - endpoints: Vec, - client: Client, - circuit_breaker: CircuitBreaker, -} - -impl Default for InitialPeerDiscovery { - fn default() -> Self { - Self::new() - } -} - -impl InitialPeerDiscovery { - pub fn new() -> Self { - Self { - endpoints: DEFAULT_BOOTSTRAP_ENDPOINTS - .iter() - .map(|s| s.to_string()) - .collect(), - client: Client::new(), - circuit_breaker: CircuitBreaker::new(), - } - } - - pub fn with_endpoints(endpoints: Vec) -> Self { - Self { - endpoints, - client: Client::new(), - circuit_breaker: CircuitBreaker::new(), - } - } - - pub fn with_config( - endpoints: Vec, - circuit_breaker_config: CircuitBreakerConfig, - ) -> Self { - Self { - endpoints, - client: Client::new(), - circuit_breaker: CircuitBreaker::with_config(circuit_breaker_config), - } - } - - /// Load endpoints from a JSON file - pub async fn from_json(json_str: &str) -> Result { - let endpoints: BootstrapEndpoints = serde_json::from_str(json_str)?; - Ok(Self { - endpoints: endpoints.peers, - client: Client::new(), - circuit_breaker: CircuitBreaker::new(), - }) - } - - /// Fetch peers from all configured endpoints - pub async fn fetch_peers(&self) -> Result> { - info!("Starting peer discovery from {} endpoints: {:?}", self.endpoints.len(), self.endpoints); - let mut peers = Vec::new(); - let mut last_error = None; - - for endpoint in &self.endpoints { - info!("Attempting to fetch peers from endpoint: {}", endpoint); - match self.fetch_from_endpoint(endpoint).await { - Ok(mut endpoint_peers) => { - info!( - "Successfully fetched {} peers from {}. First few peers: {:?}", - endpoint_peers.len(), - endpoint, - endpoint_peers.iter().take(3).collect::>() - ); - peers.append(&mut endpoint_peers); - } - Err(e) => { - warn!("Failed to fetch peers from {}: {}", endpoint, e); - last_error = Some(e); - } - } - } - - if peers.is_empty() { - if let Some(e) = last_error { - warn!("No peers found from any endpoint. Last error: {}", e); - Err(Error::NoPeersFound(format!( - "No valid peers found from any endpoint: {}", - e - ))) - } else { - warn!("No peers found from any endpoint and no errors reported"); - Err(Error::NoPeersFound( - "No valid peers found from any endpoint".to_string(), - )) - } - } else { - info!( - "Successfully discovered {} total peers. First few: {:?}", - peers.len(), - peers.iter().take(3).collect::>() - ); - Ok(peers) - } - } - - async fn fetch_from_endpoint(&self, endpoint: &str) -> Result> { - // Check circuit breaker state - if !self.circuit_breaker.check_endpoint(endpoint).await { - warn!("Circuit breaker is open for endpoint: {}", endpoint); - return Err(Error::CircuitBreakerOpen(endpoint.to_string())); - } - - // Get backoff duration and wait if necessary - let backoff = self.circuit_breaker.get_backoff_duration(endpoint).await; - if !backoff.is_zero() { - info!("Backing off for {:?} before trying endpoint: {}", backoff, endpoint); - } - tokio::time::sleep(backoff).await; - - info!("Fetching peers from endpoint: {}", endpoint); - // Get backoff duration and wait if necessary - let result = async { - info!("Sending HTTP request to {}", endpoint); - let response = match timeout( - std::time::Duration::from_secs(FETCH_TIMEOUT_SECS), - self.client.get(endpoint).send(), - ) - .await { - Ok(resp) => match resp { - Ok(r) => { - info!("Got response with status: {}", r.status()); - r - } - Err(e) => { - warn!("HTTP request failed: {}", e); - return Err(Error::RequestFailed(e.to_string())); - } - }, - Err(_) => { - warn!("Request timed out after {} seconds", FETCH_TIMEOUT_SECS); - return Err(Error::RequestTimeout); - } - }; - - let content = match response.text().await { - Ok(c) => { - info!("Received response content length: {}", c.len()); - if c.len() < 1000 { // Only log if content is not too large - info!("Response content: {}", c); - } - c - } - Err(e) => { - warn!("Failed to get response text: {}", e); - return Err(Error::InvalidResponse(format!("Failed to get response text: {}", e))); - } - }; - - // Try parsing as JSON first - if content.trim().starts_with('{') { - info!("Attempting to parse response as JSON"); - match serde_json::from_str::(&content) { - Ok(json_endpoints) => { - info!("Successfully parsed JSON response with {} peers", json_endpoints.peers.len()); - let peers = json_endpoints - .peers - .into_iter() - .filter_map(|addr| match addr.parse::() { - Ok(addr) => Some(BootstrapPeer::new(addr)), - Err(e) => { - warn!("Failed to parse multiaddr {}: {}", addr, e); - None - } - }) - .collect::>(); - - if peers.is_empty() { - warn!("No valid peers found in JSON response"); - Err(Error::NoPeersFound( - "No valid peers found in JSON response".to_string(), - )) - } else { - info!("Successfully parsed {} valid peers from JSON", peers.len()); - Ok(peers) - } - } - Err(e) => { - warn!("Failed to parse JSON response: {}", e); - Err(Error::InvalidResponse(format!( - "Invalid JSON format: {}", - e - ))) - } - } - } else { - info!("Attempting to parse response as plain text"); - // Try parsing as plain text with one multiaddr per line - let peers = content - .lines() - .filter(|line| !line.trim().is_empty()) - .filter_map(|line| match line.trim().parse::() { - Ok(addr) => Some(BootstrapPeer::new(addr)), - Err(e) => { - warn!("Failed to parse multiaddr {}: {}", line, e); - None - } - }) - .collect::>(); - - if peers.is_empty() { - warn!("No valid peers found in plain text response"); - Err(Error::NoPeersFound( - "No valid peers found in plain text response".to_string(), - )) - } else { - info!("Successfully parsed {} valid peers from plain text", peers.len()); - Ok(peers) - } - } - } - .await; - - match result { - Ok(peers) => { - info!("Successfully fetched {} peers from {}", peers.len(), endpoint); - self.circuit_breaker.record_success(endpoint).await; - Ok(peers) - } - Err(e) => { - warn!("Failed to fetch peers from {}: {}", endpoint, e); - self.circuit_breaker.record_failure(endpoint).await; - Err(e) - } - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use wiremock::{ - matchers::{method, path}, - Mock, MockServer, ResponseTemplate, - }; - - #[tokio::test] - async fn test_fetch_peers() { - let mock_server = MockServer::start().await; - - Mock::given(method("GET")) - .and(path("/")) - .respond_with( - ResponseTemplate::new(200) - .set_body_string("/ip4/127.0.0.1/tcp/8080\n/ip4/127.0.0.2/tcp/8080"), - ) - .mount(&mock_server) - .await; - - let mut discovery = InitialPeerDiscovery::new(); - discovery.endpoints = vec![mock_server.uri()]; - - let peers = discovery.fetch_peers().await.unwrap(); - assert_eq!(peers.len(), 2); - - let addr1: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); - let addr2: Multiaddr = "/ip4/127.0.0.2/tcp/8080".parse().unwrap(); - assert!(peers.iter().any(|p| p.addr == addr1)); - assert!(peers.iter().any(|p| p.addr == addr2)); - } - - #[tokio::test] - async fn test_endpoint_failover() { - let mock_server1 = MockServer::start().await; - let mock_server2 = MockServer::start().await; - - // First endpoint fails - Mock::given(method("GET")) - .and(path("/")) - .respond_with(ResponseTemplate::new(500)) - .mount(&mock_server1) - .await; - - // Second endpoint succeeds - Mock::given(method("GET")) - .and(path("/")) - .respond_with(ResponseTemplate::new(200).set_body_string("/ip4/127.0.0.1/tcp/8080")) - .mount(&mock_server2) - .await; - - let mut discovery = InitialPeerDiscovery::new(); - discovery.endpoints = vec![mock_server1.uri(), mock_server2.uri()]; - - let peers = discovery.fetch_peers().await.unwrap(); - assert_eq!(peers.len(), 1); - - let addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); - assert_eq!(peers[0].addr, addr); - } - - #[tokio::test] - async fn test_invalid_multiaddr() { - let mock_server = MockServer::start().await; - - Mock::given(method("GET")) - .and(path("/")) - .respond_with( - ResponseTemplate::new(200).set_body_string( - "/ip4/127.0.0.1/tcp/8080\ninvalid-addr\n/ip4/127.0.0.2/tcp/8080", - ), - ) - .mount(&mock_server) - .await; - - let mut discovery = InitialPeerDiscovery::new(); - discovery.endpoints = vec![mock_server.uri()]; - - let peers = discovery.fetch_peers().await.unwrap(); - let valid_addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); - assert_eq!(peers[0].addr, valid_addr); - } - - #[tokio::test] - async fn test_empty_response() { - let mock_server = MockServer::start().await; - - Mock::given(method("GET")) - .and(path("/")) - .respond_with(ResponseTemplate::new(200).set_body_string("")) - .mount(&mock_server) - .await; - - let mut discovery = InitialPeerDiscovery::new(); - discovery.endpoints = vec![mock_server.uri()]; - - let result = discovery.fetch_peers().await; - assert!(matches!(result, Err(Error::NoPeersFound(_)))); - } - - #[tokio::test] - async fn test_whitespace_and_empty_lines() { - let mock_server = MockServer::start().await; - - Mock::given(method("GET")) - .and(path("/")) - .respond_with( - ResponseTemplate::new(200).set_body_string("\n \n/ip4/127.0.0.1/tcp/8080\n \n"), - ) - .mount(&mock_server) - .await; - - let mut discovery = InitialPeerDiscovery::new(); - discovery.endpoints = vec![mock_server.uri()]; - - let peers = discovery.fetch_peers().await.unwrap(); - assert_eq!(peers.len(), 1); - - let addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); - assert_eq!(peers[0].addr, addr); - } - - #[tokio::test] - async fn test_default_endpoints() { - let discovery = InitialPeerDiscovery::new(); - assert_eq!(discovery.endpoints.len(), 1); - assert_eq!( - discovery.endpoints[0], - "https://sn-testnet.s3.eu-west-2.amazonaws.com/network-contacts" - ); - } - - #[tokio::test] - async fn test_custom_endpoints() { - let endpoints = vec!["http://example.com".to_string()]; - let discovery = InitialPeerDiscovery::with_endpoints(endpoints.clone()); - assert_eq!(discovery.endpoints, endpoints); - } - - #[tokio::test] - async fn test_json_endpoints() { - let mock_server = MockServer::start().await; - - Mock::given(method("GET")) - .and(path("/")) - .respond_with(ResponseTemplate::new(200).set_body_string( - r#"{"peers": ["/ip4/127.0.0.1/tcp/8080", "/ip4/127.0.0.2/tcp/8080"]}"#, - )) - .mount(&mock_server) - .await; - - let mut discovery = InitialPeerDiscovery::new(); - discovery.endpoints = vec![mock_server.uri()]; - - let peers = discovery.fetch_peers().await.unwrap(); - assert_eq!(peers.len(), 2); - - let addr1: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); - let addr2: Multiaddr = "/ip4/127.0.0.2/tcp/8080".parse().unwrap(); - assert!(peers.iter().any(|p| p.addr == addr1)); - assert!(peers.iter().any(|p| p.addr == addr2)); - } -} diff --git a/bootstrap_cache/src/lib.rs b/bootstrap_cache/src/lib.rs deleted file mode 100644 index dcd7f0159e..0000000000 --- a/bootstrap_cache/src/lib.rs +++ /dev/null @@ -1,336 +0,0 @@ -//! Bootstrap Cache for Safe Network -//! -//! This crate provides a decentralized peer discovery and caching system for the Safe Network. -//! It implements a robust peer management system with the following features: -//! -//! - Decentralized Design: No dedicated bootstrap nodes required -//! - Cross-Platform Support: Works on Linux, macOS, and Windows -//! - Shared Cache: System-wide cache file accessible by both nodes and clients -//! - Concurrent Access: File locking for safe multi-process access -//! - Atomic Operations: Safe cache updates using atomic file operations -//! - Initial Peer Discovery: Fallback web endpoints for new/stale cache scenarios -//! - Comprehensive Error Handling: Detailed error types and logging -//! - Circuit Breaker Pattern: Intelligent failure handling -//! -//! # Example -//! -//! ```no_run -//! use bootstrap_cache::{CacheStore, BootstrapConfig, PeersArgs}; -//! use url::Url; -//! -//! # async fn example() -> Result<(), Box> { -//! let config = BootstrapConfig::default(); -//! let args = PeersArgs { -//! first: false, -//! peers: vec![], -//! network_contacts_url: Some(Url::parse("https://example.com/peers")?), -//! local: false, -//! test_network: false, -//! }; -//! -//! let store = CacheStore::from_args(args, config).await?; -//! let peers = store.get_peers().await; -//! # Ok(()) -//! # } -//! ``` - -mod cache_store; -mod circuit_breaker; -pub mod config; -mod error; -mod initial_peer_discovery; - -use libp2p::{multiaddr::Protocol, Multiaddr}; -use serde::{Deserialize, Serialize}; -use std::{fmt, net::SocketAddrV4, time::SystemTime}; -use thiserror::Error; -use std::env; -use url::Url; -use tracing::{info, warn}; - -pub use cache_store::CacheStore; -pub use config::BootstrapConfig; -pub use error::{Error, Result}; -pub use initial_peer_discovery::InitialPeerDiscovery; - -/// Parse strings like `1.2.3.4:1234` and `/ip4/1.2.3.4/tcp/1234` into a multiaddr. -/// This matches the behavior of sn_peers_acquisition. -pub fn parse_peer_addr(addr: &str) -> std::result::Result { - // Parse valid IPv4 socket address, e.g. `1.2.3.4:1234`. - if let Ok(addr) = addr.parse::() { - let start_addr = Multiaddr::from(*addr.ip()); - // Always use UDP and QUIC-v1 for socket addresses - let multiaddr = start_addr - .with(Protocol::Udp(addr.port())) - .with(Protocol::QuicV1); - - return Ok(multiaddr); - } - - // Parse any valid multiaddr string - addr.parse::() -} - -/// Structure representing a list of bootstrap endpoints -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct BootstrapEndpoints { - /// List of peer multiaddresses - pub peers: Vec, - /// Optional metadata about the endpoints - #[serde(default)] - pub metadata: EndpointMetadata, -} - -/// Metadata about bootstrap endpoints -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct EndpointMetadata { - /// When the endpoints were last updated - #[serde(default = "default_last_updated")] - pub last_updated: String, - /// Optional description of the endpoints - #[serde(default)] - pub description: String, -} - -fn default_last_updated() -> String { - chrono::Utc::now().to_rfc3339() -} - -impl Default for EndpointMetadata { - fn default() -> Self { - Self { - last_updated: default_last_updated(), - description: String::new(), - } - } -} - -/// A peer that can be used for bootstrapping into the network -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct BootstrapPeer { - /// The multiaddress of the peer - pub addr: Multiaddr, - /// The number of successful connections to this peer - pub success_count: u32, - /// The number of failed connection attempts to this peer - pub failure_count: u32, - /// The last time this peer was successfully contacted - pub last_seen: SystemTime, -} - -impl BootstrapPeer { - pub fn new(addr: Multiaddr) -> Self { - Self { - addr, - success_count: 0, - failure_count: 0, - last_seen: SystemTime::now(), - } - } - - pub fn update_status(&mut self, success: bool) { - if success { - self.success_count += 1; - self.last_seen = SystemTime::now(); - } else { - self.failure_count += 1; - } - } - - pub fn is_reliable(&self) -> bool { - // A peer is considered reliable if it has more successes than failures - self.success_count > self.failure_count - } -} - -impl fmt::Display for BootstrapPeer { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "BootstrapPeer {{ addr: {}, last_seen: {:?}, success: {}, failure: {} }}", - self.addr, self.last_seen, self.success_count, self.failure_count - ) - } -} - -/// Command line arguments for peer configuration -#[derive(Debug, Clone)] -pub struct PeersArgs { - /// First node in the network - pub first: bool, - /// List of peer addresses - pub peers: Vec, - /// URL to fetch network contacts from - pub network_contacts_url: Option, - /// Use only local discovery (mDNS) - pub local: bool, - /// Test network mode - only use provided peers - pub test_network: bool, -} - -impl Default for PeersArgs { - fn default() -> Self { - Self { - first: false, - peers: Vec::new(), - network_contacts_url: None, - local: false, - test_network: false, - } - } -} - -/// Validates that a multiaddr has all required components for a valid peer address -pub(crate) fn is_valid_peer_addr(addr: &Multiaddr) -> bool { - let mut has_ip = false; - let mut has_port = false; - let mut has_protocol = false; - - for protocol in addr.iter() { - match protocol { - Protocol::Ip4(_) | Protocol::Ip6(_) => has_ip = true, - Protocol::Tcp(_) | Protocol::Udp(_) => has_port = true, - Protocol::QuicV1 => has_protocol = true, - _ => {} - } - } - - has_ip && has_port && has_protocol -} - -impl CacheStore { - /// Create a new CacheStore from command line arguments - pub async fn from_args(args: PeersArgs, config: BootstrapConfig) -> Result { - // If this is the first node, return empty store with no fallback - if args.first { - info!("First node in network, returning empty store"); - let store = Self::new_without_init(config).await?; - store.clear_peers().await?; - return Ok(store); - } - - // If local mode is enabled, return empty store (will use mDNS) - if args.local { - info!("Local mode enabled, using only local discovery"); - let store = Self::new_without_init(config).await?; - store.clear_peers().await?; - return Ok(store); - } - - // If test network mode is enabled, use in-memory store only - if args.test_network { - info!("Test network mode enabled, using in-memory store only"); - let mut config = config; - config.cache_file_path = "".into(); // Empty path to prevent file operations - let store = Self::new_without_init(config).await?; - - // Add peers from arguments if present - for peer in args.peers { - if is_valid_peer_addr(&peer) { - info!("Adding peer from arguments: {}", peer); - store.add_peer(peer).await?; - } - } - - // If network contacts URL is provided, fetch peers from there - if let Some(url) = args.network_contacts_url { - info!("Attempting to fetch peers from network contacts URL: {}", url); - let discovery = InitialPeerDiscovery::with_endpoints(vec![url.to_string()]); - match discovery.fetch_peers().await { - Ok(peers) => { - info!("Successfully fetched {} peers from network contacts", peers.len()); - for peer in peers { - if is_valid_peer_addr(&peer.addr) { - store.add_peer(peer.addr).await?; - } - } - } - Err(e) => { - warn!("Failed to fetch peers from network contacts: {}", e); - } - } - } - - return Ok(store); - } - - // Create a new store but don't load from cache or fetch from endpoints yet - let mut store = Self::new_without_init(config).await?; - - // Add peers from environment variable if present - let mut has_specific_peers = false; - if let Ok(env_peers) = std::env::var("SAFE_PEERS") { - for peer_str in env_peers.split(',') { - if let Ok(peer) = peer_str.parse() { - if is_valid_peer_addr(&peer) { - info!("Adding peer from environment: {}", peer); - store.add_peer(peer).await?; - has_specific_peers = true; - } else { - warn!("Invalid peer address format from environment: {}", peer); - } - } - } - } - - // Add peers from arguments if present - for peer in args.peers { - if is_valid_peer_addr(&peer) { - info!("Adding peer from arguments: {}", peer); - store.add_peer(peer).await?; - has_specific_peers = true; - } else { - warn!("Invalid peer address format from arguments: {}", peer); - } - } - - // If we have peers, update cache and return - if has_specific_peers { - info!("Using provided peers and updating cache"); - store.save_cache().await?; - return Ok(store); - } - - // If no peers specified, try network contacts URL - if let Some(url) = args.network_contacts_url { - info!("Attempting to fetch peers from network contacts URL: {}", url); - let discovery = InitialPeerDiscovery::with_endpoints(vec![url.to_string()]); - match discovery.fetch_peers().await { - Ok(peers) => { - info!("Successfully fetched {} peers from network contacts", peers.len()); - for peer in peers { - if is_valid_peer_addr(&peer.addr) { - store.add_peer(peer.addr).await?; - has_specific_peers = true; - } else { - warn!("Invalid peer address format from network contacts: {}", peer.addr); - } - } - if has_specific_peers { - info!("Successfully fetched {} peers from network contacts", store.get_peers().await.len()); - } - } - Err(e) => { - warn!("Failed to fetch peers from network contacts: {}", e); - } - } - } - - // If no peers from any source, initialize from cache and default endpoints - if !has_specific_peers { - store.init().await?; - } - - Ok(store) - } -} - -/// Creates a new bootstrap cache with default configuration -pub async fn new() -> Result { - CacheStore::new(Default::default()).await -} - -/// Creates a new bootstrap cache with custom configuration -pub async fn with_config(config: BootstrapConfig) -> Result { - CacheStore::new(config).await -} diff --git a/docs/bootstrap_cache_implementation.md b/docs/bootstrap_cache_implementation.md deleted file mode 100644 index 9588d277fc..0000000000 --- a/docs/bootstrap_cache_implementation.md +++ /dev/null @@ -1,337 +0,0 @@ -# Bootstrap Cache Implementation Guide - -This guide documents the implementation of the bootstrap cache system, including recent changes and completed work. - -## Phase 1: Bootstrap Cache File Management - -### 1.1 Cache File Structure -```rust -#[derive(Serialize, Deserialize, Clone, Debug)] -pub struct PeerInfo { - pub addr: Multiaddr, - pub last_seen: DateTime, - pub success_count: u32, - pub failure_count: u32, -} - -#[derive(Serialize, Deserialize, Clone, Debug)] -pub struct BootstrapCache { - pub last_updated: DateTime, - pub peers: Vec, -} -``` - -### 1.2 File Operations Implementation -The cache store is implemented in `bootstrap_cache/src/cache_store.rs` with the following key features: - -```rust -pub struct CacheStore { - cache_path: PathBuf, - peers: BTreeMap, -} - -impl CacheStore { - pub fn new() -> Result { - let cache_path = Self::get_cache_path()?; - let peers = Self::load_from_disk(&cache_path)?; - Ok(Self { cache_path, peers }) - } - - pub fn save_to_disk(&self) -> Result<()> { - // Check if file is read-only first - if is_readonly(&self.cache_path) { - warn!("Cache file is read-only, skipping save"); - return Ok(()); - } - - let cache = BootstrapCache { - last_updated: Utc::now(), - peers: self.peers.values().cloned().collect(), - }; - - let temp_path = self.cache_path.with_extension("tmp"); - atomic_write(&temp_path, &cache)?; - fs::rename(temp_path, &self.cache_path)?; - Ok(()) - } - - pub fn update_peer_status( - &mut self, - addr: NetworkAddress, - success: bool, - ) -> Result<()> { - if is_readonly(&self.cache_path) { - warn!("Cache file is read-only, skipping peer status update"); - return Ok(()); - } - - let peer = self.peers.entry(addr).or_default(); - if success { - peer.success_count += 1; - } else { - peer.failure_count += 1; - } - peer.last_seen = Utc::now(); - Ok(()) - } - - pub fn cleanup_unreliable_peers(&mut self) -> Result<()> { - if is_readonly(&self.cache_path) { - warn!("Cache file is read-only, skipping cleanup"); - return Ok(()); - } - - self.peers.retain(|_, peer| { - peer.success_count > peer.failure_count - }); - Ok(()) - } -} -``` - -### 1.3 File Permission Handling -The cache store now handles read-only files gracefully: -- Each modifying operation checks if the file is read-only -- If read-only, the operation logs a warning and returns successfully -- Read operations continue to work even when the file is read-only - -## Phase 2: Network Integration Strategy - -### 2.1 Integration Architecture - -The bootstrap cache will be integrated into the existing networking layer with minimal changes to current functionality. The implementation focuses on three key areas: - -#### 2.1.1 NetworkDiscovery Integration -```rust -impl NetworkDiscovery { - // Add cache integration to existing peer discovery - pub(crate) async fn save_peers_to_cache(&self, cache: &BootstrapCache) { - for peers in self.candidates.values() { - for peer in peers { - let _ = cache.add_peer(peer.clone()).await; - } - } - } - - pub(crate) async fn load_peers_from_cache(&mut self, cache: &BootstrapCache) { - for peer in cache.get_reliable_peers().await { - if let Some(ilog2) = self.get_bucket_index(&peer.addr) { - self.insert_candidates(ilog2, vec![peer.addr]); - } - } - } -} -``` - -#### 2.1.2 SwarmDriver Integration -```rust -impl SwarmDriver { - pub(crate) async fn save_peers_to_cache(&self) { - if let Some(cache) = &self.bootstrap_cache { - self.network_discovery.save_peers_to_cache(cache).await; - } - } -} -``` - -#### 2.1.3 Bootstrap Process Integration -```rust -impl ContinuousBootstrap { - pub(crate) async fn initialize_with_cache(&mut self, cache: &BootstrapCache) { - // Load initial peers from cache - self.network_discovery.load_peers_from_cache(cache).await; - - // Normal bootstrap process continues... - self.initial_bootstrap_done = false; - } -} -``` - -### 2.2 Key Integration Points - -1. **Cache Updates**: - - Periodic updates (every 60 minutes) - - On graceful shutdown - - After successful peer connections - - During routing table maintenance - -2. **Cache Usage**: - - During initial bootstrap - - When routing table needs more peers - - As primary source for peer discovery (replacing direct URL fetching) - - Fallback to URL endpoints only when cache is empty/stale - -3. **Configuration**: -```rust -pub struct NetworkBuilder { - bootstrap_cache_config: Option, -} - -impl NetworkBuilder { - pub fn with_bootstrap_cache(mut self, config: BootstrapConfig) -> Self { - self.bootstrap_cache_config = Some(config); - self - } -} -``` - -### 2.3 Implementation Phases - -#### Phase 1: Basic Integration -- Add bootstrap cache as optional component -- Integrate basic cache reading during startup -- Add periodic cache updates -- Replace direct URL fetching with cache-first approach - -#### Phase 2: Enhanced Features -- Add graceful shutdown cache updates -- Implement circuit breaker integration -- Add cache cleanup for unreliable peers -- Integrate with existing peer reliability metrics - -#### Phase 3: Optimization -- Fine-tune update intervals and thresholds -- Add cache performance metrics -- Optimize cache update strategies -- Implement advanced peer selection algorithms - -### 2.4 Benefits and Impact - -1. **Minimal Changes**: - - Preserves existing peer discovery mechanisms - - Maintains current routing table functionality - - Optional integration through configuration - -2. **Enhanced Reliability**: - - Local cache reduces network dependency - - Circuit breaker prevents cascading failures - - Intelligent peer selection based on history - -3. **Better Performance**: - - Faster bootstrap process - - Reduced network requests - - More reliable peer connections - -4. **Seamless Integration**: - - No changes required to client/node APIs - - Backward compatible with existing deployments - - Gradual rollout possible - -## Phase 3: Testing and Validation - -### 3.1 Unit Tests -```rust -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_cache_read_only() { - let store = CacheStore::new().unwrap(); - - // Make file read-only - let mut perms = fs::metadata(&store.cache_path).unwrap().permissions(); - perms.set_readonly(true); - fs::set_permissions(&store.cache_path, perms).unwrap(); - - // Operations should succeed but not modify file - assert!(store.update_peer_status(addr, true).is_ok()); - assert!(store.cleanup_unreliable_peers().is_ok()); - assert!(store.save_to_disk().is_ok()); - } - - #[test] - fn test_peer_reliability() { - let mut store = CacheStore::new().unwrap(); - let addr = NetworkAddress::from_str("/ip4/127.0.0.1/udp/8080").unwrap(); - - // Add successful connections - store.update_peer_status(addr.clone(), true).unwrap(); - store.update_peer_status(addr.clone(), true).unwrap(); - - // Add one failure - store.update_peer_status(addr.clone(), false).unwrap(); - - // Peer should still be considered reliable - store.cleanup_unreliable_peers().unwrap(); - assert!(store.peers.contains_key(&addr)); - } -} -``` - -### 3.2 Integration Tests -Located in `bootstrap_cache/tests/integration_tests.rs`: - -1. **Network Connectivity Tests**: -```rust -#[tokio::test] -async fn test_fetch_from_amazon_s3() { - let discovery = InitialPeerDiscovery::new(); - let peers = discovery.fetch_peers().await.unwrap(); - - // Verify peer multiaddress format - for peer in &peers { - assert!(peer.addr.to_string().contains("/ip4/")); - assert!(peer.addr.to_string().contains("/udp/")); - assert!(peer.addr.to_string().contains("/quic-v1/")); - assert!(peer.addr.to_string().contains("/p2p/")); - } -} -``` - -2. **Mock Server Tests**: -```rust -#[tokio::test] -async fn test_individual_s3_endpoints() { - let mock_server = MockServer::start().await; - // Test failover between endpoints - // Test response parsing - // Test error handling -} -``` - -3. **Format Validation Tests**: -- Verify JSON endpoint responses -- Validate peer address formats -- Test whitespace and empty line handling - -### 3.3 Performance Metrics -- Track peer discovery time -- Monitor cache hit/miss rates -- Measure connection success rates - -### 3.4 Current Status -- ✅ Basic network integration implemented -- ✅ Integration tests covering core functionality -- ✅ Mock server tests for endpoint validation -- ✅ Performance monitoring in place - -### 3.5 Next Steps -1. **Enhanced Testing**: - - Add network partition tests - - Implement chaos testing for network failures - - Add long-running stability tests - -2. **Performance Optimization**: - - Implement connection pooling - - Add parallel connection attempts - - Optimize peer candidate generation - -3. **Monitoring**: - - Add detailed metrics collection - - Implement performance tracking - - Create monitoring dashboards - -## Current Status - -### Completed Work -1. Created `bootstrap_cache` directory with proper file structure -2. Implemented cache file operations with read-only handling -3. Added peer reliability tracking based on success/failure counts -4. Integrated Kademlia routing tables for both nodes and clients - -### Next Steps -1. Implement rate limiting for cache updates -2. Add metrics for peer connection success rates -3. Implement automated peer list pruning -4. Add cross-client cache sharing mechanisms diff --git a/docs/bootstrap_cache_prd.md b/docs/bootstrap_cache_prd.md deleted file mode 100644 index a1e8317e1b..0000000000 --- a/docs/bootstrap_cache_prd.md +++ /dev/null @@ -1,194 +0,0 @@ -# Bootstrap Cache PRD - -## Overview -This document outlines the design and implementation of a decentralized bootstrap cache system for the Safe Network. This system replaces the current centralized "bootstrap node" concept with a fully decentralized approach where all nodes are equal participants. - -## Goals -- Remove the concept of dedicated "bootstrap nodes" -- Implement a shared local cache system for both nodes and clients -- Reduce infrastructure costs -- Improve network stability and decentralization -- Simplify the bootstrapping process - -## Non-Goals -- Creating any form of centralized node discovery -- Implementing DNS-based discovery -- Maintaining long-term connections between nodes -- Running HTTP servers on nodes - -## Technical Design - -### Bootstrap Cache File -- Location: - - Unix/Linux: `/var/safe/bootstrap_cache.json` - - macOS: `/Library/Application Support/Safe/bootstrap_cache.json` - - Windows: `C:\ProgramData\Safe\bootstrap_cache.json` -- Format: JSON file containing: - ```json - { - "last_updated": "ISO-8601-timestamp", - "peers": [ - { - "addr": "multiaddr-string", // e.g., "/ip4/1.2.3.4/udp/1234/quic-v1" - "last_seen": "ISO-8601-timestamp", - "success_count": "number", - "failure_count": "number" - } - ] - } - ``` - -### Cache Management -1. **Writing Cache** - - Write to cache when routing table changes occur - - Write to cache on clean node/client shutdown - - Keep track of successful/failed connection attempts - - Limit cache size to prevent bloat (e.g., 1000 entries) - - Handle file locking for concurrent access from multiple nodes/clients - -2. **Reading Cache** - - On startup, read shared local cache if available - - If cache peers are unreachable: - 1. Try peers from `--peer` argument or `SAFE_PEERS` env var - 2. If none available, fetch from network contacts URL - 3. If local feature enabled, discover through mDNS - - Sort peers by connection success rate - -### Node Implementation -1. **Cache Updates** - - Use Kademlia routing table as source of truth - - Every period, copy nodes from routing table to cache - - Track peer reliability through: - - Successful/failed connection attempts - - Response times - - Data storage and retrieval success rates - -2. **Startup Process** - ```rust - async fn startup() { - // 1. Get initial peers - let peers = PeersArgs::get_peers().await?; - - // 2. Initialize Kademlia with configuration - let kad_cfg = KademliaConfig::new() - .set_kbucket_inserts(Manual) - .set_query_timeout(KAD_QUERY_TIMEOUT_S) - .set_replication_factor(REPLICATION_FACTOR) - .disjoint_query_paths(true); - - // 3. Begin continuous bootstrap process - loop { - bootstrap_with_peers(peers).await?; - - // If we have enough peers, slow down bootstrap attempts - if connected_peers >= K_VALUE { - increase_bootstrap_interval(); - } - - // Update cache with current routing table - update_bootstrap_cache().await?; - - sleep(bootstrap_interval).await; - } - } - ``` - -### Client Implementation -1. **Cache Management** - - Maintain Kademlia routing table in outbound-only mode - - Read from shared bootstrap cache - - Update peer reliability metrics based on: - - Connection success/failure - - Data retrieval success rates - - Response times - -2. **Connection Process** - ```rust - async fn connect() { - // 1. Get initial peers - let peers = PeersArgs::get_peers().await?; - - // 2. Initialize client-mode Kademlia - let kad_cfg = KademliaConfig::new() - .set_kbucket_inserts(Manual) - .set_protocol_support(Outbound) // Clients only make outbound connections - .disjoint_query_paths(true); - - // 3. Connect to peers until we have enough - while connected_peers < K_VALUE { - bootstrap_with_peers(peers).await?; - - // Update peer reliability in cache - update_peer_metrics().await?; - - // Break if we've tried all peers - if all_peers_attempted() { - break; - } - } - } - ``` - -### Peer Acquisition Process -1. **Order of Precedence** - - Command line arguments (`--peer`) - - Environment variables (`SAFE_PEERS`) - - Local discovery (if enabled) - - Network contacts URL - -2. **Network Contacts** - - URL: `https://sn-testnet.s3.eu-west-2.amazonaws.com/network-contacts` - - Format: One multiaddr per line - - Fallback mechanism when no local peers available - - Retries with exponential backoff (max 7 attempts) - -3. **Local Discovery** - - Uses mDNS when `local` feature is enabled - - Useful for development and testing - - Not used in production environments - -### Cache File Synchronization -1. **File Locking** - - Use file-system level locks for synchronization - - Read locks for cache queries - - Write locks for cache updates - - Exponential backoff for lock acquisition - -2. **Update Process** - ```rust - async fn update_cache(peers: Vec) -> Result<()> { - // 1. Check if file is read-only - if is_readonly(cache_path) { - warn!("Cache file is read-only"); - return Ok(()); - } - - // 2. Acquire write lock - let file = acquire_exclusive_lock(cache_path)?; - - // 3. Perform atomic write - atomic_write(file, peers).await?; - - Ok(()) - } - ``` - -## Success Metrics -- Reduction in bootstrap time -- More evenly distributed network load -- Improved network resilience -- Higher peer connection success rates - -## Security Considerations -- Validate peer multiaddresses before caching -- Protect against malicious cache entries -- Handle file permissions securely -- Prevent cache poisoning attacks -- Implement rate limiting for cache updates - -## Future Enhancements -- Peer prioritization based on network metrics -- Geographic-based peer selection -- Advanced reputation system -- Automated peer list pruning -- Cross-client cache sharing mechanisms diff --git a/prd.md b/prd.md deleted file mode 100644 index a2df93bbea..0000000000 --- a/prd.md +++ /dev/null @@ -1,173 +0,0 @@ -Product Requirements Document for Autonomi Network Enhancements -Introduction - - -This document outlines the product requirements for the development and enhancement of the Autonomi Network (formerly known as the MaidSafe Safe Network). The Autonomi Network is a fully decentralized platform aimed at providing secure, private, and efficient data storage and communication. This document details the necessary work to implement and improve various aspects of the network, including data types, client APIs, network architecture, and payment systems. - - -Objectives - - - • Implement and document four core data types essential for network operations. - • Enhance the network’s decentralization by refining bootstrap mechanisms. - • Define and standardize client API behaviors in a decentralized environment. - • Ensure the client API comprehensively documents all data types. - • Restrict store/get methods to accept only the defined data types. - • Integrate a flexible payment system utilizing EVM and L2 networks with runtime configurability. - - -1. Data Types - - -The Autonomi Network will support four primary data types: - - -1.1 Chunks - - - • Description: Immutable data pieces up to 1 MB in size. - • Naming Convention: The name of a chunk is derived from the hash of its content (hash(content) == name). - • Purpose: Enables content-addressable storage, ensuring data integrity and deduplication. - - -1.2 Registers - - - • Description: Conflict-free Replicated Data Type (CRDT) directed acyclic graphs (DAGs). - • Concurrency Handling: Allows multiple concurrent accesses. In cases of conflicting updates, users are responsible for merging changes, as the network does not handle conflict resolution. - • Use Case: Suitable for collaborative applications where eventual consistency is acceptable. - - -1.3 Transactions - - - • Description: Simple data structures representing value transfers. - • Structure: - • Owner: Identified by a public key. - • Content: May include a value and an optional additional key. - • Outputs: A set of keys indicating recipients of the transaction. - • Validation: Clients must verify the transaction history to ensure correctness. - • Purpose: Facilitates decentralized transactions without central authority oversight. - - -1.4 Vault - - - • Description: Flexible data type up to 1 MB that can encapsulate any developer-defined data structure. - • Ownership: Secured by an owner’s public key. - • Versioning: - • Not a CRDT. - • Includes a user or application-defined counter. - • Nodes retain only the copy with the highest counter value after signature verification. - • Use Case: Ideal for applications requiring custom data storage with version control. - - -2. Network Architecture - - -2.1 Decentralization - - - • The network operates without central servers, promoting resilience and autonomy. - • Bootstrap nodes exist solely for initial network access. - - -2.2 Bootstrap Nodes - - - • Purpose: Aid first-time nodes or clients in connecting to the network. - • Limitations: - • Must not be relied upon for continued operation. - • Designed to be ephemeral and can disappear without affecting the network. - • Distribution: - • New bootstrap nodes can be published via websites, DNS records, or shared among users. - • Users are encouraged to share bootstrap information to foster decentralization. - - -2.3 Bootstrap Cache - - - • Functionality: - • Nodes and clients must collect and maintain their own network contacts after the initial connection. - • This cache is used for reconnecting to the network autonomously. - • Benefit: Eliminates dependence on specific bootstrap nodes, enhancing network robustness. - - -3. Client API - - -3.1 Connection Model - - - • Stateless Connectivity: - • Clients acknowledge that persistent connections are impractical in a decentralized network unless designed to receive unsolicited messages. -(i.e. the client.connect() does not make sense in our current situation.) - • Operational Behavior: - • Clients maintain a list of network addresses. - • For any action, they connect to the nearest node and discover nodes closest to the target address. - • Addresses collected during operations are stored in the bootstrap cache. - - -3.2 Data Types Definition - - - • Centralized Documentation: - • All four data types must be clearly defined and documented within a single section of the API documentation. - • Developer Guidance: - • Provide detailed explanations, usage examples, and best practices for each data type. - - -3.3 Store/Get Methods - - - • Data Type Restrictions: - • The API’s store/get methods are configured to accept only the four defined data types. - • Inputs of other data types are explicitly disallowed to maintain data integrity and consistency. - - -4. Payment System Integration - - -4.1 EVM and L2 Network Utilization - - - • Blockchain Integration: - • Leverage the Ethereum Virtual Machine (EVM) and Layer 2 (L2) networks for transaction processing. - • Runtime Configurability: - • Nodes and clients can modify payment-related settings at runtime. - • Configurable parameters include wallet details, chosen payment networks, and other relevant settings. - - -4.2 Wallet Management - - - • Flexibility: - • Users can change wallets without restarting or recompiling the client or node software. - • Security: - • Ensure secure handling and storage of wallet credentials and transaction data. - - -5. Additional Requirements - - - • Scalability: Design systems to handle network growth without performance degradation. - • Security: Implement robust encryption and authentication mechanisms across all components. - • Performance: Optimize data storage and retrieval processes for efficiency. - • Usability: Provide clear documentation and intuitive interfaces for developers and end-users. - - -6. Documentation and Support - - - • Comprehensive Guides: - • Produce detailed documentation for all new features and changes. - • Include API references, tutorials, and FAQs. - • Community Engagement: - • Encourage community feedback and contributions. - • Provide support channels for troubleshooting and discussions. - - -Conclusion - - -Implementing these requirements will enhance the Autonomi Network’s functionality, security, and user experience. Focusing on decentralization, flexibility, and clear documentation will position the network as a robust platform for decentralized applications and services. diff --git a/refactoring_steps.md b/refactoring_steps.md deleted file mode 100644 index 9f962439c6..0000000000 --- a/refactoring_steps.md +++ /dev/null @@ -1,202 +0,0 @@ -# Refactoring Steps for Autonomi Network - -## Phase 1: Client API Refactoring -1. **Remove Connection Management from API** - - Remove `connect()` method from client API - - Move connection handling into individual operations - - Each operation should handle its own connection lifecycle - - Have a bootstrap mechanism that reads a bootstrrp_cache.json file or passed in via command line or ENV_VAR - - Use the bootstrap cache to connect to the network - - During network requests collect peers connection info - - Every minute update the bootstrap cache (limit entries to last 1500 seen) - - on startup read the bootstrap cache file to get peers to connect to - - on shutdown write the bootstrap cache file - - all internal connect commands will use the nodes we have in ram - - update wasm and python bindings to use all the above - - test before going any further - - -2. **Data Type Operations** - - **Chunks** (Mostly Complete) - - Existing: `chunk_get`, `chunk_upload_with_payment` - - Add: Better error handling for size limits - - Language Bindings: - - Python: - - Implement `chunk_get`, `chunk_upload_with_payment` methods - - Add size validation - - Add comprehensive tests - - Document API usage - - WASM: - - Implement `chunk_get`, `chuunk_upload_with_paymentput` methods - - Add JavaScript examples - - Add integration tests - - Document browser usage - - - **Registers** (Integration Needed) - - Existing in sn_registers: - - CRDT-based implementation - - `merge` operations - - User-managed conflict resolution - - To Add: - - Client API wrappers in autonomi - - Simplified append/merge interface - - Connection handling in operations - - Language Bindings: - - Python: - - Implement register CRUD operations - - Add conflict resolution examples - - Add unit and integration tests - - Document CRDT usage - - WASM: - - Implement register operations - - Add browser-based examples - - Add JavaScript tests - - Document concurrent usage - - - **Scratchpad (Vault)** (Enhancement Needed) - - Existing in sn_protocol: - - Basic scratchpad implementation - - `update_and_sign` functionality - - To Add: - - Client API wrappers in autonomi - - Simplified update/replace interface - - Connection handling in operations - - Language Bindings: - - Python: - - Implement vault operations - - Add encryption examples - - Add comprehensive tests - - Document security features - - WASM: - - Implement vault operations - - Add browser storage examples - - Add security tests - - Document encryption usage - -3. **Transaction System Refactoring** (Priority) - - Make transaction types generic in sn_transfers - - Update client API to support generic transactions - - Implement owner-based validation - - Add support for optional additional keys - - Implement transaction history verification - -## Phase 2: Payment System Integration -1. **EVM Integration** - - Integrate existing EVM implementation - - Add runtime configuration support - - Connect with transaction system - -2. **Payment Processing** - - Integrate with data operations - - Add payment verification - - Implement tracking system - -## Phase 3: Testing and Documentation -1. **Testing** - - Add unit tests for new API methods - - Integration tests for complete workflows - - Payment system integration tests - -2. **Documentation** - - Update API documentation - - Add usage examples - - Document error conditions - - Include best practices - -## Safe Network Health Management - -### Core Parameters - -#### Timing Intervals -- Replication: 90-180 seconds (randomized) -- Bad Node Detection: 300-600 seconds (randomized) -- Uptime Metrics: 10 seconds -- Record Cleanup: 3600 seconds (1 hour) -- Chunk Proof Retry: 15 seconds between attempts - -#### Network Parameters -- Close Group Size: Defined by CLOSE_GROUP_SIZE constant -- Replication Target: REPLICATION_PEERS_COUNT closest nodes -- Minimum Peers: 100 (for bad node detection) -- Bad Node Consensus: Requires close_group_majority() -- Max Chunk Proof Attempts: 3 before marking as bad node - -### Health Management Algorithms - -#### 1. Bad Node Detection -```rust -Process: -1. Triggered every 300-600s when peers > 100 -2. Uses rolling index (0-511) to check different buckets -3. For each bucket: - - Select subset of peers - - Query their closest nodes - - Mark as bad if majority report shunning -4. Records NodeIssue::CloseNodesShunning -``` - -#### 2. Network Replication -```rust -Process: -1. Triggered by: - - Every 90-180s interval - - New peer connection - - Peer removal - - Valid record storage -2. Execution: - - Get closest K_VALUE peers - - Sort by XOR distance - - Verify local storage - - Replicate to REPLICATION_PEERS_COUNT nodes -``` - -#### 3. Routing Table Management -```rust -Components: -1. K-bucket organization by XOR distance -2. Peer tracking and metrics -3. Connection state monitoring -4. Regular table cleanup -5. Dynamic peer replacement -``` - -### Protection Mechanisms - -#### 1. Data Integrity -- Chunk proof verification -- Record validation -- Replication confirmation -- Storage verification - -#### 2. Network Resilience -- Distributed consensus for bad nodes -- Rolling health checks -- Randomized intervals -- Subset checking for efficiency - -#### 3. Resource Optimization -- Periodic cleanup of irrelevant records -- Limited retry attempts -- Targeted replication -- Load distribution through rolling checks - -### Metrics Tracking -- Peer counts and stability -- Replication success rates -- Network connectivity -- Bad node detection events -- Resource usage and cleanup - -### Key Improvements -1. Reduced resource usage in bad node detection -2. Optimized replication targeting -3. Better load distribution -4. Enhanced peer verification -5. Efficient cleanup mechanisms - -This system creates a self-maintaining network capable of: -- Identifying and removing problematic nodes -- Maintaining data redundancy -- Optimizing resource usage -- Ensuring network stability -- Providing reliable peer connections diff --git a/repository_structure.md b/repository_structure.md deleted file mode 100644 index f6dd9b383d..0000000000 --- a/repository_structure.md +++ /dev/null @@ -1,265 +0,0 @@ -# Safe Network Repository Structure and Capabilities - -## Core Components - -### Client Side -1. **autonomi** - Main client implementation - - Primary interface for users to interact with the Safe Network - - Multiple language bindings support (Rust, Python, WASM) - - Features: - - Data operations (chunks, registers) - - Vault operations - - File system operations - - EVM integration - - Components: - - `src/client/` - Core client implementation - - `src/self_encryption.rs` - Data encryption handling - - `src/python.rs` - Python language bindings - - `src/utils.rs` - Utility functions - - Build Features: - - `data` - Basic data operations - - `vault` - Vault operations (includes data and registers) - - `registers` - Register operations - - `fs` - File system operations - - `local` - Local network testing - - `external-signer` - External transaction signing - - Testing: - - `tests/` - Rust integration tests - - `tests-js/` - JavaScript tests - - `examples/` - Usage examples - -2. **autonomi-cli** - Command-line interface - - CLI tool for network interaction - - Components: - - `src/commands/` - CLI command implementations - - `src/access/` - Network access management - - `src/actions/` - Core action implementations - - `src/wallet/` - Wallet management functionality - - `src/commands.rs` - Command routing - - `src/opt.rs` - Command-line options parsing - - `src/utils.rs` - Utility functions - - Features: - - Network access management - - Wallet operations - - Data operations (chunks, registers) - - Command-line parsing and routing - -### Network Node Components -1. **sn_node** - Network Node Implementation - - Core Components: - - `src/node.rs` - Main node implementation - - `src/put_validation.rs` - Data validation logic - - `src/replication.rs` - Data replication handling - - `src/metrics.rs` - Performance monitoring - - `src/python.rs` - Python language bindings - - Features: - - Data validation and storage - - Network message handling - - Metrics collection - - Error handling - - Event processing - - Binary Components: - - `src/bin/` - Executable implementations - -2. **sn_protocol** - Core Protocol Implementation - - Components: - - `src/messages/` - Network message definitions - - `src/storage/` - Storage implementations - - `src/safenode_proto/` - Protocol definitions - - `src/node_rpc.rs` - RPC interface definitions - - Features: - - Message protocol definitions - - Storage protocol - - Node communication protocols - - Version management - -3. **sn_transfers** - Transfer System - - Components: - - `src/cashnotes/` - Digital cash implementation - - `src/transfers/` - Transfer logic - - `src/wallet/` - Wallet implementation - - `src/genesis.rs` - Genesis block handling - - Features: - - Digital cash management - - Transfer operations - - Wallet operations - - Genesis configuration - - Error handling - -### Data Types and Protocol -1. **sn_registers** - Register implementation - - CRDT-based data structures - - Conflict resolution mechanisms - - Concurrent operations handling - -### Network Management and Communication -1. **sn_networking** - Network Communication Layer - - Core Components: - - `src/cmd.rs` - Network command handling - - `src/driver.rs` - Network driver implementation - - `src/record_store.rs` - Data record management - - `src/bootstrap.rs` - Network bootstrap process - - `src/transport/` - Transport layer implementations - - Features: - - Network discovery and bootstrapping - - External address handling - - Relay management - - Replication fetching - - Record store management - - Transfer handling - - Metrics collection - - Event System: - - `src/event/` - Event handling implementation - - Network event processing - - Event-driven architecture - -2. **sn_node_manager** - Node Management System - - Core Components: - - `src/cmd/` - Management commands - - `src/add_services/` - Service management - - `src/config.rs` - Configuration handling - - `src/rpc.rs` - RPC interface - - Features: - - Node deployment and configuration - - Service management - - Local node handling - - RPC client implementation - - Error handling - - Management Tools: - - Binary implementations - - Helper utilities - - Configuration management - -### Networking and Communication -1. **sn_networking** - Network communication - - P2P networking implementation - - Connection management - - Message routing - -2. **sn_peers_acquisition** - Peer discovery - - Bootstrap mechanisms - - Peer management - - Network topology - -### Infrastructure Components -1. **node-launchpad** - Node Deployment System - - Core Components: - - `src/app.rs` - Main application logic - - `src/components/` - UI components - - `src/node_mgmt.rs` - Node management - - `src/node_stats.rs` - Statistics tracking - - `src/config.rs` - Configuration handling - - Features: - - Node deployment and management - - System monitoring - - Configuration management - - Terminal UI interface - - Connection mode handling - - UI Components: - - Custom widgets - - Styling system - - Terminal UI implementation - -2. **nat-detection** - Network Detection System - - Core Components: - - `src/behaviour/` - NAT behavior implementations - - `src/main.rs` - Main detection logic - - Features: - - NAT type detection - - Network connectivity testing - - Behavior analysis - - Connection management - -### Payment and EVM Integration -1. **sn_evm** - EVM Integration System - - Core Components: - - `src/data_payments.rs` - Payment handling for data operations - - `src/amount.rs` - Amount calculations and management - - Features: - - Data payment processing - - Amount handling - - Error management - - Integration with EVM - -2. **evmlib** - EVM Library - - Core Components: - - `src/contract/` - Smart contract handling - - `src/wallet.rs` - Wallet implementation - - `src/transaction.rs` - Transaction processing - - `src/cryptography.rs` - Cryptographic operations - - Features: - - Smart contract management - - Wallet operations - - Transaction handling - - External signer support - - Test network support - - Event handling - - Utility functions - -3. **evm_testnet** - EVM Test Environment - - Features: - - Test network setup - - Development environment - - Testing utilities - -### Utilities and Support -1. **sn_logging** - Logging System - - Core Components: - - `src/appender.rs` - Log appender implementation - - `src/layers.rs` - Logging layers - - `src/metrics.rs` - Metrics integration - - Features: - - Structured logging - - Custom appenders - - Metrics integration - - Error handling - -2. **sn_metrics** - Metrics System - - Features: - - Performance monitoring - - System metrics collection - - Metrics reporting - -3. **sn_build_info** - Build Information - - Features: - - Version management - - Build configuration - - Build information tracking - -4. **test_utils** - Testing Utilities - - Components: - - `src/evm.rs` - EVM testing utilities - - `src/testnet.rs` - Test network utilities - - Features: - - EVM test helpers - - Test network setup - - Common test functions - -5. **sn_auditor** - Network Auditing - - Features: - - Network health monitoring - - Security auditing - - Performance tracking - -## Development Tools -- **adr** - Architecture Decision Records -- **resources** - Additional resources and documentation -- **token_supplies** - Token management utilities - -## Documentation -- **CHANGELOG.md** - Version history -- **CONTRIBUTING.md** - Contribution guidelines -- **README.md** - Project overview -- **prd.md** - Product Requirements Document - -## Build and Configuration -- **Cargo.toml** - Main project configuration -- **Justfile** - Task automation -- **release-plz.toml** - Release configuration -- **reviewpad.yml** - Code review configuration - -## Next Steps -1. Review and validate this structure -2. Identify any missing components or capabilities -3. Begin implementation of refactoring steps as outlined in refactoring_steps.md -4. Focus on client API refactoring as the first priority From 45c26ffaac2db914d1bd327c47d4a673c4112ba8 Mon Sep 17 00:00:00 2001 From: Roland Sherwin Date: Mon, 2 Dec 2024 11:23:08 +0100 Subject: [PATCH 06/21] fix(bootstrap): remove rwlock from the store --- ant-bootstrap-cache/README.md | 5 - ant-bootstrap-cache/src/cache_store.rs | 239 ++++++++---------- ant-bootstrap-cache/src/config.rs | 42 +-- ant-bootstrap-cache/src/lib.rs | 24 +- .../tests/address_format_tests.rs | 96 ++++--- ant-bootstrap-cache/tests/cache_tests.rs | 108 +++----- .../tests/cli_integration_tests.rs | 50 ++-- 7 files changed, 270 insertions(+), 294 deletions(-) diff --git a/ant-bootstrap-cache/README.md b/ant-bootstrap-cache/README.md index 8f02a77a72..35184cdbfb 100644 --- a/ant-bootstrap-cache/README.md +++ b/ant-bootstrap-cache/README.md @@ -10,11 +10,6 @@ A robust peer caching system for the Autonomi Network that provides persistent s - Cross-process safe with file locking - Atomic write operations to prevent cache corruption -### Concurrent Access -- Thread-safe in-memory cache with `RwLock` -- File system level locking for cross-process synchronization -- Shared (read) and exclusive (write) lock support - ### Data Management - Automatic cleanup of stale and unreliable peers - Configurable maximum peer limit diff --git a/ant-bootstrap-cache/src/cache_store.rs b/ant-bootstrap-cache/src/cache_store.rs index 73fe0b8d7b..2db42b5269 100644 --- a/ant-bootstrap-cache/src/cache_store.rs +++ b/ant-bootstrap-cache/src/cache_store.rs @@ -13,10 +13,8 @@ use serde::{Deserialize, Serialize}; use std::fs::{self, File, OpenOptions}; use std::io::{self, Read}; use std::path::PathBuf; -use std::sync::Arc; use std::time::{Duration, SystemTime}; use tempfile::NamedTempFile; -use tokio::sync::RwLock; const PEER_EXPIRY_DURATION: Duration = Duration::from_secs(24 * 60 * 60); // 24 hours @@ -81,21 +79,24 @@ impl Default for CacheData { } } -#[derive(Clone)] -pub struct CacheStore { +#[derive(Clone, Debug)] +pub struct BootstrapCacheStore { cache_path: PathBuf, - config: Arc, - data: Arc>, + config: BootstrapConfig, + data: CacheData, /// This is our last known state of the cache on disk, which is shared across all instances. /// This is not updated until `sync_to_disk` is called. - old_shared_state: Arc>, + old_shared_state: CacheData, } -impl CacheStore { +impl BootstrapCacheStore { + pub fn config(&self) -> &BootstrapConfig { + &self.config + } + pub async fn new(config: BootstrapConfig) -> Result { info!("Creating new CacheStore with config: {:?}", config); let cache_path = config.cache_file_path.clone(); - let config = Arc::new(config); // Create cache directory if it doesn't exist if let Some(parent) = cache_path.parent() { @@ -107,11 +108,11 @@ impl CacheStore { } } - let store = Self { + let mut store = Self { cache_path, config, - data: Arc::new(RwLock::new(CacheData::default())), - old_shared_state: Arc::new(RwLock::new(CacheData::default())), + data: CacheData::default(), + old_shared_state: CacheData::default(), }; store.init().await?; @@ -124,7 +125,6 @@ impl CacheStore { pub async fn new_without_init(config: BootstrapConfig) -> Result { info!("Creating new CacheStore with config: {:?}", config); let cache_path = config.cache_file_path.clone(); - let config = Arc::new(config); // Create cache directory if it doesn't exist if let Some(parent) = cache_path.parent() { @@ -139,15 +139,15 @@ impl CacheStore { let store = Self { cache_path, config, - data: Arc::new(RwLock::new(CacheData::default())), - old_shared_state: Arc::new(RwLock::new(CacheData::default())), + data: CacheData::default(), + old_shared_state: CacheData::default(), }; info!("Successfully created CacheStore without initializing the data."); Ok(store) } - pub async fn init(&self) -> Result<()> { + pub async fn init(&mut self) -> Result<()> { let data = if self.cache_path.exists() { info!( "Cache file exists at {:?}, attempting to load", @@ -205,8 +205,8 @@ impl CacheStore { }; // Update the store's data - *self.data.write().await = data.clone(); - *self.old_shared_state.write().await = data; + self.data = data.clone(); + self.old_shared_state = data; // Save the default data to disk self.sync_to_disk().await?; @@ -309,101 +309,58 @@ impl CacheStore { Ok(data) } - pub async fn get_peers(&self) -> Vec { - let data = self.data.read().await; - data.peers.values().cloned().collect() + pub fn get_peers(&self) -> impl Iterator { + self.data.peers.values() } - pub async fn peer_count(&self) -> usize { - let data = self.data.read().await; - data.peers.len() + pub fn peer_count(&self) -> usize { + self.data.peers.len() } - pub async fn get_reliable_peers(&self) -> Vec { - let data = self.data.read().await; - let reliable_peers: Vec<_> = data + pub fn get_reliable_peers(&self) -> impl Iterator { + self.data .peers .values() .filter(|peer| peer.success_count > peer.failure_count) - .cloned() - .collect(); - - // If we have no reliable peers and the cache file is not read-only, - // try to refresh from default endpoints - if reliable_peers.is_empty() - && !self - .cache_path - .metadata() - .map(|m| m.permissions().readonly()) - .unwrap_or(false) - { - drop(data); - if let Ok(new_data) = Self::fallback_to_default(&self.config).await { - let mut data = self.data.write().await; - *data = new_data; - return data - .peers - .values() - .filter(|peer| peer.success_count > peer.failure_count) - .cloned() - .collect(); - } - } - - reliable_peers } - pub async fn update_peer_status(&self, addr: &Multiaddr, success: bool) { - let mut data = self.data.write().await; - data.update_peer_status(addr, success); + pub fn update_peer_status(&mut self, addr: &Multiaddr, success: bool) { + self.data.update_peer_status(addr, success); } - pub async fn add_peer(&self, addr: Multiaddr) { - let mut data = self.data.write().await; + pub fn add_peer(&mut self, addr: Multiaddr) { let addr_str = addr.to_string(); // Check if we already have this peer - if data.peers.contains_key(&addr_str) { + if self.data.peers.contains_key(&addr_str) { debug!("Updating existing peer {}", addr_str); - if let Some(peer) = data.peers.get_mut(&addr_str) { + if let Some(peer) = self.data.peers.get_mut(&addr_str) { peer.last_seen = SystemTime::now(); } return; } - // If we're at max peers, remove the oldest peer - if data.peers.len() >= self.config.max_peers { - debug!( - "At max peers limit ({}), removing oldest peer", - self.config.max_peers - ); - if let Some((oldest_addr, _)) = data.peers.iter().min_by_key(|(_, peer)| peer.last_seen) - { - let oldest_addr = oldest_addr.clone(); - data.peers.remove(&oldest_addr); - } - } + self.remove_oldest_peers(); // Add the new peer debug!("Adding new peer {} (under max_peers limit)", addr_str); - data.peers.insert(addr_str, BootstrapPeer::new(addr)); + self.data.peers.insert(addr_str, BootstrapPeer::new(addr)); } - pub async fn remove_peer(&self, addr: &str) { - let mut data = self.data.write().await; - data.peers.remove(addr); + pub fn remove_peer(&mut self, addr: &str) { + self.data.peers.remove(addr); } - pub async fn cleanup_stale_and_unreliable_peers(&self) { - let mut data = self.data.write().await; - data.cleanup_stale_and_unreliable_peers(); + pub fn cleanup_stale_and_unreliable_peers(&mut self) { + self.data.cleanup_stale_and_unreliable_peers(); } /// Clear all peers from the cache and save to disk - pub async fn clear_peers_and_save(&self) -> Result<()> { - let mut data = self.data.write().await; - data.peers.clear(); - match self.atomic_write(&data).await { + pub async fn clear_peers_and_save(&mut self) -> Result<()> { + self.data.peers.clear(); + self.old_shared_state.peers.clear(); + + match self.atomic_write().await { Ok(_) => Ok(()), Err(e) => { error!("Failed to save cache to disk: {e}"); @@ -412,17 +369,15 @@ impl CacheStore { } } - pub async fn sync_to_disk(&self) -> Result<()> { + pub async fn sync_to_disk(&mut self) -> Result<()> { if self.config.disable_cache_writing { info!("Cache writing is disabled, skipping sync to disk"); return Ok(()); } - let mut data = self.data.write().await; - let mut old_shared_state = self.old_shared_state.write().await; info!( - "Syncing cache to disk, with data containing: {} peers and old state containing: {} peers", data.peers.len(), - old_shared_state.peers.len() + "Syncing cache to disk, with data containing: {} peers and old state containing: {} peers", self.data.peers.len(), + self.old_shared_state.peers.len() ); // Check if the file is read-only before attempting to write @@ -438,21 +393,38 @@ impl CacheStore { return Ok(()); } - data.cleanup_stale_and_unreliable_peers(); - if let Ok(data_from_file) = Self::load_cache_data(&self.cache_path).await { - data.sync(&old_shared_state, &data_from_file); + self.data.sync(&self.old_shared_state, &data_from_file); // Now the synced version is the old_shared_state - *old_shared_state = data.clone(); } else { warn!("Failed to load cache data from file, overwriting with new data"); } - match self.atomic_write(&data).await { - Ok(_) => Ok(()), - Err(e) => { - error!("Failed to save cache to disk: {e}"); - Err(e) + self.data.cleanup_stale_and_unreliable_peers(); + self.remove_oldest_peers(); + self.old_shared_state = self.data.clone(); + + self.atomic_write().await.inspect_err(|e| { + error!("Failed to save cache to disk: {e}"); + }) + } + + /// Remove the oldest peers until we're under the max_peers limit + fn remove_oldest_peers(&mut self) { + // If we're at max peers, remove the oldest peer + while self.data.peers.len() >= self.config.max_peers { + if let Some((oldest_addr, _)) = self + .data + .peers + .iter() + .min_by_key(|(_, peer)| peer.last_seen) + { + let oldest_addr = oldest_addr.clone(); + debug!( + "At max peers limit ({}), removing oldest peer: {oldest_addr}", + self.config.max_peers + ); + self.data.peers.remove(&oldest_addr); } } } @@ -491,7 +463,7 @@ impl CacheStore { } } - async fn atomic_write(&self, data: &CacheData) -> Result<()> { + async fn atomic_write(&self) -> Result<()> { // Create parent directory if it doesn't exist if let Some(parent) = self.cache_path.parent() { fs::create_dir_all(parent).map_err(Error::from)?; @@ -501,7 +473,7 @@ impl CacheStore { let temp_file = NamedTempFile::new().map_err(Error::from)?; // Write data to temporary file - serde_json::to_writer_pretty(&temp_file, &data).map_err(Error::from)?; + serde_json::to_writer_pretty(&temp_file, &self.data).map_err(Error::from)?; // Open the target file with proper permissions let file = OpenOptions::new() @@ -529,32 +501,35 @@ mod tests { use super::*; use tempfile::tempdir; - async fn create_test_store() -> (CacheStore, PathBuf) { + async fn create_test_store() -> (BootstrapCacheStore, PathBuf) { let temp_dir = tempdir().unwrap(); let cache_file = temp_dir.path().join("cache.json"); - let config = crate::BootstrapConfig::empty().with_cache_path(&cache_file); + let config = crate::BootstrapConfig::empty() + .unwrap() + .with_cache_path(&cache_file); - let store = CacheStore::new(config).await.unwrap(); + let store = BootstrapCacheStore::new(config).await.unwrap(); (store.clone(), store.cache_path.clone()) } #[tokio::test] async fn test_peer_update_and_save() { - let (store, _) = create_test_store().await; + let (mut store, _) = create_test_store().await; let addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); // Manually add a peer without using fallback { - let mut data = store.data.write().await; - data.peers + store + .data + .peers .insert(addr.to_string(), BootstrapPeer::new(addr.clone())); } store.sync_to_disk().await.unwrap(); - store.update_peer_status(&addr, true).await; + store.update_peer_status(&addr, true); - let peers = store.get_peers().await; + let peers = store.get_peers().collect::>(); assert_eq!(peers.len(), 1); assert_eq!(peers[0].addr, addr); assert_eq!(peers[0].success_count, 1); @@ -563,95 +538,93 @@ mod tests { #[tokio::test] async fn test_peer_cleanup() { - let (store, _) = create_test_store().await; + let (mut store, _) = create_test_store().await; let good_addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); let bad_addr: Multiaddr = "/ip4/127.0.0.1/tcp/8081".parse().unwrap(); // Add peers - store.add_peer(good_addr.clone()).await; - store.add_peer(bad_addr.clone()).await; + store.add_peer(good_addr.clone()); + store.add_peer(bad_addr.clone()); // Make one peer reliable and one unreliable - store.update_peer_status(&good_addr, true).await; + store.update_peer_status(&good_addr, true); // Fail the bad peer more times than max_retries for _ in 0..5 { - store.update_peer_status(&bad_addr, false).await; + store.update_peer_status(&bad_addr, false); } // Clean up unreliable peers - store.cleanup_stale_and_unreliable_peers().await; + store.cleanup_stale_and_unreliable_peers(); // Get all peers (not just reliable ones) - let peers = store.get_peers().await; + let peers = store.get_peers().collect::>(); assert_eq!(peers.len(), 1); assert_eq!(peers[0].addr, good_addr); } #[tokio::test] async fn test_peer_not_removed_if_successful() { - let (store, _) = create_test_store().await; + let (mut store, _) = create_test_store().await; let addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); // Add a peer and make it successful - store.add_peer(addr.clone()).await; - store.update_peer_status(&addr, true).await; + store.add_peer(addr.clone()); + store.update_peer_status(&addr, true); // Wait a bit tokio::time::sleep(Duration::from_millis(100)).await; // Run cleanup - store.cleanup_stale_and_unreliable_peers().await; + store.cleanup_stale_and_unreliable_peers(); // Verify peer is still there - let peers = store.get_peers().await; + let peers = store.get_peers().collect::>(); assert_eq!(peers.len(), 1); assert_eq!(peers[0].addr, addr); } #[tokio::test] async fn test_peer_removed_only_when_unresponsive() { - let (store, _) = create_test_store().await; + let (mut store, _) = create_test_store().await; let addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); // Add a peer - store.add_peer(addr.clone()).await; + store.add_peer(addr.clone()); // Make it fail more than successes for _ in 0..3 { - store.update_peer_status(&addr, true).await; + store.update_peer_status(&addr, true); } for _ in 0..4 { - store.update_peer_status(&addr, false).await; + store.update_peer_status(&addr, false); } // Run cleanup - store.cleanup_stale_and_unreliable_peers().await; + store.cleanup_stale_and_unreliable_peers(); // Verify peer is removed - let peers = store.get_peers().await; assert_eq!( - peers.len(), + store.get_peers().count(), 0, "Peer should be removed after max_retries failures" ); // Test with some successes but more failures - store.add_peer(addr.clone()).await; - store.update_peer_status(&addr, true).await; - store.update_peer_status(&addr, true).await; + store.add_peer(addr.clone()); + store.update_peer_status(&addr, true); + store.update_peer_status(&addr, true); for _ in 0..5 { - store.update_peer_status(&addr, false).await; + store.update_peer_status(&addr, false); } // Run cleanup - store.cleanup_stale_and_unreliable_peers().await; + store.cleanup_stale_and_unreliable_peers(); // Verify peer is removed due to more failures than successes - let peers = store.get_peers().await; assert_eq!( - peers.len(), + store.get_peers().count(), 0, "Peer should be removed when failures exceed successes" ); diff --git a/ant-bootstrap-cache/src/config.rs b/ant-bootstrap-cache/src/config.rs index 2c3ab507b7..2191e39a4e 100644 --- a/ant-bootstrap-cache/src/config.rs +++ b/ant-bootstrap-cache/src/config.rs @@ -8,11 +8,19 @@ use crate::error::{Error, Result}; use ant_protocol::version::{get_key_version_str, get_truncate_version_str}; -use std::path::{Path, PathBuf}; +use std::{ + path::{Path, PathBuf}, + time::Duration, +}; use url::Url; const MAX_PEERS: usize = 1500; -// const UPDATE_INTERVAL: Duration = Duration::from_secs(60); + +// Min time until we save the bootstrap cache to disk. 5 mins +const MIN_BOOTSTRAP_CACHE_SAVE_INTERVAL: Duration = Duration::from_secs(5 * 60); + +// Max time until we save the bootstrap cache to disk. 24 hours +const MAX_BOOTSTRAP_CACHE_SAVE_INTERVAL: Duration = Duration::from_secs(24 * 60 * 60); /// Configuration for the bootstrap cache #[derive(Clone, Debug)] @@ -23,10 +31,14 @@ pub struct BootstrapConfig { pub max_peers: usize, /// Path to the bootstrap cache file pub cache_file_path: PathBuf, - // /// How often to update the cache (in seconds) - // pub update_interval: Duration, /// Flag to disable writing to the cache file pub disable_cache_writing: bool, + /// The min time duration until we save the bootstrap cache to disk. + pub min_cache_save_duration: Duration, + /// The max time duration until we save the bootstrap cache to disk. + pub max_cache_save_duration: Duration, + /// The cache save scaling factor. We start with the min_cache_save_duration and scale it up to the max_cache_save_duration. + pub cache_save_scaling_factor: u64, } impl BootstrapConfig { @@ -43,20 +55,24 @@ impl BootstrapConfig { ], max_peers: MAX_PEERS, cache_file_path: default_cache_path()?, - // update_interval: UPDATE_INTERVAL, disable_cache_writing: false, + min_cache_save_duration: MIN_BOOTSTRAP_CACHE_SAVE_INTERVAL, + max_cache_save_duration: MAX_BOOTSTRAP_CACHE_SAVE_INTERVAL, + cache_save_scaling_factor: 2, }) } /// Creates a new BootstrapConfig with empty settings - pub fn empty() -> Self { - Self { + pub fn empty() -> Result { + Ok(Self { endpoints: vec![], max_peers: MAX_PEERS, - cache_file_path: PathBuf::new(), - // update_interval: UPDATE_INTERVAL, + cache_file_path: default_cache_path()?, disable_cache_writing: false, - } + min_cache_save_duration: MIN_BOOTSTRAP_CACHE_SAVE_INTERVAL, + max_cache_save_duration: MAX_BOOTSTRAP_CACHE_SAVE_INTERVAL, + cache_save_scaling_factor: 2, + }) } /// Update the config with custom endpoints @@ -90,12 +106,6 @@ impl BootstrapConfig { self } - // /// Sets the update interval - // pub fn with_update_interval(mut self, update_interval: Duration) -> Self { - // self.update_interval = update_interval; - // self - // } - /// Sets the flag to disable writing to the cache file pub fn with_disable_cache_writing(mut self, disable: bool) -> Self { self.disable_cache_writing = disable; diff --git a/ant-bootstrap-cache/src/lib.rs b/ant-bootstrap-cache/src/lib.rs index 839f6f54c9..00bea856fe 100644 --- a/ant-bootstrap-cache/src/lib.rs +++ b/ant-bootstrap-cache/src/lib.rs @@ -21,11 +21,11 @@ //! # Example //! //! ```no_run -//! use bootstrap_cache::{CacheStore, BootstrapConfig, PeersArgs}; +//! use ant_bootstrap_cache::{BootstrapCacheStore, BootstrapConfig, PeersArgs}; //! use url::Url; //! //! # async fn example() -> Result<(), Box> { -//! let config = BootstrapConfig::new().unwrap(); +//! let config = BootstrapConfig::empty().unwrap(); //! let args = PeersArgs { //! first: false, //! peers: vec![], @@ -33,8 +33,8 @@ //! local: false, //! }; //! -//! let store = CacheStore::from_args(args, config).await?; -//! let peers = store.get_peers().await; +//! let store = BootstrapCacheStore::from_args(args, config).await?; +//! let peers = store.get_peers(); //! # Ok(()) //! # } //! ``` @@ -53,7 +53,7 @@ use std::{fmt, time::SystemTime}; use thiserror::Error; use url::Url; -pub use cache_store::CacheStore; +pub use cache_store::BootstrapCacheStore; pub use config::BootstrapConfig; pub use error::{Error, Result}; pub use initial_peer_discovery::InitialPeerDiscovery; @@ -182,7 +182,7 @@ pub struct PeersArgs { pub local: bool, } -impl CacheStore { +impl BootstrapCacheStore { /// Create a new CacheStore from command line arguments /// This also initializes the store with the provided peers pub async fn from_args(args: PeersArgs, mut config: BootstrapConfig) -> Result { @@ -193,7 +193,7 @@ impl CacheStore { // If this is the first node, return empty store with no fallback if args.first { info!("First node in network, returning empty store"); - let store = Self::new_without_init(config).await?; + let mut store = Self::new_without_init(config).await?; store.clear_peers_and_save().await?; return Ok(store); } @@ -207,7 +207,7 @@ impl CacheStore { } // Create a new store but don't load from cache or fetch from endpoints yet - let store = Self::new_without_init(config).await?; + let mut store = Self::new_without_init(config).await?; // Add peers from environment variable if present if let Ok(env_peers) = std::env::var("SAFE_PEERS") { @@ -215,7 +215,7 @@ impl CacheStore { if let Ok(peer) = peer_str.parse() { if let Some(peer) = craft_valid_multiaddr(&peer) { info!("Adding peer from environment: {}", peer); - store.add_peer(peer).await; + store.add_peer(peer); } else { warn!("Invalid peer address format from environment: {}", peer); } @@ -227,7 +227,7 @@ impl CacheStore { for peer in args.peers { if let Some(peer) = craft_valid_multiaddr(&peer) { info!("Adding peer from arguments: {}", peer); - store.add_peer(peer).await; + store.add_peer(peer); } else { warn!("Invalid peer address format from arguments: {}", peer); } @@ -239,12 +239,12 @@ impl CacheStore { let peer_discovery = InitialPeerDiscovery::with_endpoints(vec![url])?; let peers = peer_discovery.fetch_peers().await?; for peer in peers { - store.add_peer(peer.addr).await; + store.add_peer(peer.addr); } } // If we have peers, update cache and return, else initialize from cache - if store.peer_count().await > 0 { + if store.peer_count() > 0 { info!("Using provided peers and updating cache"); store.sync_to_disk().await?; } else { diff --git a/ant-bootstrap-cache/tests/address_format_tests.rs b/ant-bootstrap-cache/tests/address_format_tests.rs index 00716861f1..b1888ef847 100644 --- a/ant-bootstrap-cache/tests/address_format_tests.rs +++ b/ant-bootstrap-cache/tests/address_format_tests.rs @@ -6,7 +6,7 @@ // KIND, either express or implied. Please review the Licences for the specific language governing // permissions and limitations relating to use of the SAFE Network Software. -use ant_bootstrap_cache::{BootstrapConfig, CacheStore, PeersArgs}; +use ant_bootstrap_cache::{BootstrapCacheStore, BootstrapConfig, PeersArgs}; use libp2p::{multiaddr::Protocol, Multiaddr}; use std::net::SocketAddrV4; use tempfile::TempDir; @@ -27,12 +27,10 @@ async fn setup() -> (TempDir, BootstrapConfig) { let temp_dir = TempDir::new().unwrap(); let cache_path = temp_dir.path().join("cache.json"); - let config = BootstrapConfig { - cache_file_path: cache_path, - endpoints: vec![], // Empty endpoints to avoid fetching from network - max_peers: 50, - disable_cache_writing: false, - }; + let config = BootstrapConfig::empty() + .unwrap() + .with_cache_path(&cache_path) + .with_max_peers(50); (temp_dir, config) } @@ -56,8 +54,8 @@ async fn test_ipv4_socket_address_parsing() -> Result<(), Box>(); assert_eq!(peers.len(), 1, "Should have one peer"); assert_eq!(peers[0].addr, expected_addr, "Address format should match"); @@ -88,8 +86,8 @@ async fn test_multiaddr_format_parsing() -> Result<(), Box>(); assert_eq!(peers.len(), 1, "Should have one peer"); assert_eq!(peers[0].addr, addr, "Address format should match"); } @@ -120,8 +118,8 @@ async fn test_network_contacts_format() -> Result<(), Box local: false, }; - let store = CacheStore::from_args(args, config).await?; - let peers = store.get_peers().await; + let store = BootstrapCacheStore::from_args(args, config).await?; + let peers = store.get_peers().collect::>(); assert_eq!( peers.len(), 2, @@ -161,8 +159,8 @@ async fn test_invalid_address_handling() -> Result<(), Box>(); assert_eq!( peers.len(), 0, @@ -178,8 +176,8 @@ async fn test_invalid_address_handling() -> Result<(), Box>(); assert_eq!( peers.len(), 0, @@ -205,10 +203,12 @@ async fn test_socket_addr_format() -> Result<(), Box> { local: true, // Use local mode to avoid getting peers from default endpoints }; - let config = BootstrapConfig::empty().with_cache_path(&cache_path); + let config = BootstrapConfig::empty() + .unwrap() + .with_cache_path(&cache_path); - let store = CacheStore::from_args(args, config).await?; - let peers = store.get_peers().await; + let store = BootstrapCacheStore::from_args(args, config).await?; + let peers = store.get_peers().collect::>(); assert!(peers.is_empty(), "Should have no peers in local mode"); Ok(()) @@ -227,10 +227,12 @@ async fn test_multiaddr_format() -> Result<(), Box> { local: true, // Use local mode to avoid getting peers from default endpoints }; - let config = BootstrapConfig::empty().with_cache_path(&cache_path); + let config = BootstrapConfig::empty() + .unwrap() + .with_cache_path(&cache_path); - let store = CacheStore::from_args(args, config).await?; - let peers = store.get_peers().await; + let store = BootstrapCacheStore::from_args(args, config).await?; + let peers = store.get_peers().collect::>(); assert!(peers.is_empty(), "Should have no peers in local mode"); Ok(()) @@ -249,10 +251,12 @@ async fn test_invalid_addr_format() -> Result<(), Box> { local: true, // Use local mode to avoid getting peers from default endpoints }; - let config = BootstrapConfig::empty().with_cache_path(&cache_path); + let config = BootstrapConfig::empty() + .unwrap() + .with_cache_path(&cache_path); - let store = CacheStore::from_args(args, config).await?; - let peers = store.get_peers().await; + let store = BootstrapCacheStore::from_args(args, config).await?; + let peers = store.get_peers().collect::>(); assert!(peers.is_empty(), "Should have no peers in local mode"); Ok(()) @@ -271,10 +275,12 @@ async fn test_mixed_addr_formats() -> Result<(), Box> { local: true, // Use local mode to avoid getting peers from default endpoints }; - let config = BootstrapConfig::empty().with_cache_path(&cache_path); + let config = BootstrapConfig::empty() + .unwrap() + .with_cache_path(&cache_path); - let store = CacheStore::from_args(args, config).await?; - let peers = store.get_peers().await; + let store = BootstrapCacheStore::from_args(args, config).await?; + let peers = store.get_peers().collect::>(); assert!(peers.is_empty(), "Should have no peers in local mode"); Ok(()) @@ -293,10 +299,12 @@ async fn test_socket_addr_conversion() -> Result<(), Box> local: true, // Use local mode to avoid getting peers from default endpoints }; - let config = BootstrapConfig::empty().with_cache_path(&cache_path); + let config = BootstrapConfig::empty() + .unwrap() + .with_cache_path(&cache_path); - let store = CacheStore::from_args(args, config).await?; - let peers = store.get_peers().await; + let store = BootstrapCacheStore::from_args(args, config).await?; + let peers = store.get_peers().collect::>(); assert!(peers.is_empty(), "Should have no peers in local mode"); Ok(()) @@ -315,10 +323,12 @@ async fn test_invalid_socket_addr() -> Result<(), Box> { local: true, // Use local mode to avoid getting peers from default endpoints }; - let config = BootstrapConfig::empty().with_cache_path(&cache_path); + let config = BootstrapConfig::empty() + .unwrap() + .with_cache_path(&cache_path); - let store = CacheStore::from_args(args, config).await?; - let peers = store.get_peers().await; + let store = BootstrapCacheStore::from_args(args, config).await?; + let peers = store.get_peers().collect::>(); assert!(peers.is_empty(), "Should have no peers in local mode"); Ok(()) @@ -337,10 +347,12 @@ async fn test_invalid_multiaddr() -> Result<(), Box> { local: true, // Use local mode to avoid getting peers from default endpoints }; - let config = BootstrapConfig::empty().with_cache_path(&cache_path); + let config = BootstrapConfig::empty() + .unwrap() + .with_cache_path(&cache_path); - let store = CacheStore::from_args(args, config).await?; - let peers = store.get_peers().await; + let store = BootstrapCacheStore::from_args(args, config).await?; + let peers = store.get_peers().collect::>(); assert!(peers.is_empty(), "Should have no peers in local mode"); Ok(()) @@ -359,10 +371,12 @@ async fn test_mixed_valid_invalid_addrs() -> Result<(), Box>(); assert!(peers.is_empty(), "Should have no peers in local mode"); Ok(()) diff --git a/ant-bootstrap-cache/tests/cache_tests.rs b/ant-bootstrap-cache/tests/cache_tests.rs index fe685b2dc3..090addc452 100644 --- a/ant-bootstrap-cache/tests/cache_tests.rs +++ b/ant-bootstrap-cache/tests/cache_tests.rs @@ -6,7 +6,7 @@ // KIND, either express or implied. Please review the Licences for the specific language governing // permissions and limitations relating to use of the SAFE Network Software. -use ant_bootstrap_cache::{BootstrapConfig, CacheStore}; +use ant_bootstrap_cache::{BootstrapCacheStore, BootstrapConfig}; use libp2p::Multiaddr; use std::time::Duration; use tempfile::TempDir; @@ -18,18 +18,20 @@ async fn test_cache_store_operations() -> Result<(), Box> let cache_path = temp_dir.path().join("cache.json"); // Create cache store with config - let config = BootstrapConfig::empty().with_cache_path(&cache_path); + let config = BootstrapConfig::empty() + .unwrap() + .with_cache_path(&cache_path); - let cache_store = CacheStore::new(config).await?; + let mut cache_store = BootstrapCacheStore::new(config).await?; // Test adding and retrieving peers let addr: Multiaddr = "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" .parse()?; - cache_store.add_peer(addr.clone()).await; - cache_store.update_peer_status(&addr, true).await; + cache_store.add_peer(addr.clone()); + cache_store.update_peer_status(&addr, true); - let peers = cache_store.get_reliable_peers().await; + let peers = cache_store.get_reliable_peers().collect::>(); assert!(!peers.is_empty(), "Cache should contain the added peer"); assert!( peers.iter().any(|p| p.addr == addr), @@ -45,21 +47,23 @@ async fn test_cache_persistence() -> Result<(), Box> { let cache_path = temp_dir.path().join("cache.json"); // Create first cache store - let config = BootstrapConfig::empty().with_cache_path(&cache_path); + let config = BootstrapConfig::empty() + .unwrap() + .with_cache_path(&cache_path); - let cache_store1 = CacheStore::new(config.clone()).await?; + let mut cache_store1 = BootstrapCacheStore::new(config.clone()).await?; // Add a peer and mark it as reliable let addr: Multiaddr = "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" .parse()?; - cache_store1.add_peer(addr.clone()).await; - cache_store1.update_peer_status(&addr, true).await; + cache_store1.add_peer(addr.clone()); + cache_store1.update_peer_status(&addr, true); cache_store1.sync_to_disk().await.unwrap(); // Create a new cache store with the same path - let cache_store2 = CacheStore::new(config).await?; - let peers = cache_store2.get_reliable_peers().await; + let cache_store2 = BootstrapCacheStore::new(config).await?; + let peers = cache_store2.get_reliable_peers().collect::>(); assert!(!peers.is_empty(), "Cache should persist across instances"); assert!( @@ -75,20 +79,22 @@ async fn test_cache_reliability_tracking() -> Result<(), Box>(); assert!( peers.iter().any(|p| p.addr == addr), "Peer should be reliable after successful connections" @@ -96,10 +102,10 @@ async fn test_cache_reliability_tracking() -> Result<(), Box>(); assert!( !peers.iter().any(|p| p.addr == addr), "Peer should not be reliable after failed connections" @@ -118,22 +124,24 @@ async fn test_cache_max_peers() -> Result<(), Box> { let cache_path = temp_dir.path().join("cache.json"); // Create cache with small max_peers limit - let mut config = BootstrapConfig::empty().with_cache_path(&cache_path); + let mut config = BootstrapConfig::empty() + .unwrap() + .with_cache_path(&cache_path); config.max_peers = 2; - let cache_store = CacheStore::new(config).await?; + let mut cache_store = BootstrapCacheStore::new(config).await?; // Add three peers with distinct timestamps let mut addresses = Vec::new(); for i in 1..=3 { let addr: Multiaddr = format!("/ip4/127.0.0.1/udp/808{}/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UER{}", i, i).parse()?; addresses.push(addr.clone()); - cache_store.add_peer(addr).await; + cache_store.add_peer(addr); // Add a delay to ensure distinct timestamps sleep(Duration::from_millis(100)).await; } - let peers = cache_store.get_peers().await; + let peers = cache_store.get_peers().collect::>(); assert_eq!(peers.len(), 2, "Cache should respect max_peers limit"); // Get the addresses of the peers we have @@ -153,71 +161,37 @@ async fn test_cache_max_peers() -> Result<(), Box> { Ok(()) } -#[tokio::test] -async fn test_cache_concurrent_access() -> Result<(), Box> { - let temp_dir = TempDir::new()?; - let cache_path = temp_dir.path().join("cache.json"); - - let config = BootstrapConfig::empty().with_cache_path(&cache_path); - let cache_store = CacheStore::new(config).await?; - let cache_store_clone = cache_store.clone(); - - // Create multiple addresses - let addrs: Vec = (1..=5) - .map(|i| format!("/ip4/127.0.0.1/udp/808{}/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UER{}", i, i).parse().unwrap()) - .collect(); - - // Spawn a task that adds peers - let add_task = tokio::spawn(async move { - for addr in addrs { - cache_store.add_peer(addr).await; - sleep(Duration::from_millis(10)).await; - } - }); - - // Spawn another task that reads peers - let read_task = tokio::spawn(async move { - for _ in 0..10 { - let _ = cache_store_clone.get_peers().await; - sleep(Duration::from_millis(5)).await; - } - }); - - // Wait for both tasks to complete - tokio::try_join!(add_task, read_task)?; - - Ok(()) -} - #[tokio::test] async fn test_cache_file_corruption() -> Result<(), Box> { let temp_dir = TempDir::new()?; let cache_path = temp_dir.path().join("cache.json"); // Create cache with some peers - let config = BootstrapConfig::empty().with_cache_path(&cache_path); + let config = BootstrapConfig::empty() + .unwrap() + .with_cache_path(&cache_path); - let cache_store = CacheStore::new_without_init(config.clone()).await?; + let mut cache_store = BootstrapCacheStore::new_without_init(config.clone()).await?; // Add a peer let addr: Multiaddr = "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UER1" .parse()?; - cache_store.add_peer(addr.clone()).await; + cache_store.add_peer(addr.clone()); - assert_eq!(cache_store.get_peers().await.len(), 1); + assert_eq!(cache_store.peer_count(), 1); // Corrupt the cache file tokio::fs::write(&cache_path, "invalid json content").await?; // Create a new cache store - it should handle the corruption gracefully - let new_cache_store = CacheStore::new_without_init(config).await?; - let peers = new_cache_store.get_peers().await; + let mut new_cache_store = BootstrapCacheStore::new_without_init(config).await?; + let peers = new_cache_store.get_peers().collect::>(); assert!(peers.is_empty(), "Cache should be empty after corruption"); // Should be able to add peers again - new_cache_store.add_peer(addr).await; - let peers = new_cache_store.get_peers().await; + new_cache_store.add_peer(addr); + let peers = new_cache_store.get_peers().collect::>(); assert_eq!( peers.len(), 1, diff --git a/ant-bootstrap-cache/tests/cli_integration_tests.rs b/ant-bootstrap-cache/tests/cli_integration_tests.rs index 11868f6949..f730e51e71 100644 --- a/ant-bootstrap-cache/tests/cli_integration_tests.rs +++ b/ant-bootstrap-cache/tests/cli_integration_tests.rs @@ -6,7 +6,7 @@ // KIND, either express or implied. Please review the Licences for the specific language governing // permissions and limitations relating to use of the SAFE Network Software. -use ant_bootstrap_cache::{BootstrapConfig, CacheStore, PeersArgs}; +use ant_bootstrap_cache::{BootstrapCacheStore, BootstrapConfig, PeersArgs}; use libp2p::Multiaddr; use std::env; use std::fs; @@ -26,7 +26,9 @@ fn init_logging() { async fn setup() -> (TempDir, BootstrapConfig) { let temp_dir = TempDir::new().unwrap(); let cache_path = temp_dir.path().join("cache.json"); - let config = BootstrapConfig::empty().with_cache_path(&cache_path); + let config = BootstrapConfig::empty() + .unwrap() + .with_cache_path(&cache_path); (temp_dir, config) } @@ -43,8 +45,8 @@ async fn test_first_flag() -> Result<(), Box> { local: false, }; - let store = CacheStore::from_args(args, config).await?; - let peers = store.get_peers().await; + let store = BootstrapCacheStore::from_args(args, config).await?; + let peers = store.get_peers().collect::>(); assert!(peers.is_empty(), "First node should have no peers"); Ok(()) @@ -66,8 +68,8 @@ async fn test_peer_argument() -> Result<(), Box> { local: false, }; - let store = CacheStore::from_args(args, config).await?; - let peers = store.get_peers().await; + let store = BootstrapCacheStore::from_args(args, config).await?; + let peers = store.get_peers().collect::>(); assert_eq!(peers.len(), 1, "Should have one peer"); assert_eq!( peers[0].addr, peer_addr, @@ -95,10 +97,12 @@ async fn test_safe_peers_env() -> Result<(), Box> { local: false, }; - let config = BootstrapConfig::empty().with_cache_path(&cache_path); + let config = BootstrapConfig::empty() + .unwrap() + .with_cache_path(&cache_path); - let store = CacheStore::from_args(args, config).await?; - let peers = store.get_peers().await; + let store = BootstrapCacheStore::from_args(args, config).await?; + let peers = store.get_peers().collect::>(); // We should have multiple peers (env var + cache/endpoints) assert!(!peers.is_empty(), "Should have peers"); @@ -136,8 +140,8 @@ async fn test_network_contacts_fallback() -> Result<(), Box>(); assert_eq!( peers.len(), 2, @@ -154,7 +158,9 @@ async fn test_local_mode() -> Result<(), Box> { let cache_path = temp_dir.path().join("cache.json"); // Create a config with some peers in the cache - let config = BootstrapConfig::empty().with_cache_path(&cache_path); + let config = BootstrapConfig::empty() + .unwrap() + .with_cache_path(&cache_path); // Create args with local mode enabled let args = PeersArgs { @@ -164,8 +170,8 @@ async fn test_local_mode() -> Result<(), Box> { local: true, }; - let store = CacheStore::from_args(args, config).await?; - let peers = store.get_peers().await; + let store = BootstrapCacheStore::from_args(args, config).await?; + let peers = store.get_peers().collect::>(); assert!(peers.is_empty(), "Local mode should have no peers"); // Verify cache was not touched @@ -187,7 +193,9 @@ async fn test_test_network_peers() -> Result<(), Box> { "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" .parse()?; - let config = BootstrapConfig::empty().with_cache_path(&cache_path); + let config = BootstrapConfig::empty() + .unwrap() + .with_cache_path(&cache_path); let args = PeersArgs { first: false, @@ -196,8 +204,8 @@ async fn test_test_network_peers() -> Result<(), Box> { local: false, }; - let store = CacheStore::from_args(args, config).await?; - let peers = store.get_peers().await; + let store = BootstrapCacheStore::from_args(args, config).await?; + let peers = store.get_peers().collect::>(); assert_eq!(peers.len(), 1, "Should have exactly one test network peer"); assert_eq!( peers[0].addr, peer_addr, @@ -224,7 +232,9 @@ async fn test_peers_update_cache() -> Result<(), Box> { "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" .parse()?; - let config = BootstrapConfig::empty().with_cache_path(&cache_path); + let config = BootstrapConfig::empty() + .unwrap() + .with_cache_path(&cache_path); // Create args with peers but no test network mode let args = PeersArgs { @@ -234,8 +244,8 @@ async fn test_peers_update_cache() -> Result<(), Box> { local: false, }; - let store = CacheStore::from_args(args, config).await?; - let peers = store.get_peers().await; + let store = BootstrapCacheStore::from_args(args, config).await?; + let peers = store.get_peers().collect::>(); assert_eq!(peers.len(), 1, "Should have one peer"); assert_eq!(peers[0].addr, peer_addr, "Should have the correct peer"); From f3f7220309b736e9c02eca171f246c6d04994235 Mon Sep 17 00:00:00 2001 From: Roland Sherwin Date: Mon, 2 Dec 2024 13:20:13 +0100 Subject: [PATCH 07/21] feat(bootstrap): wrap the counts when reaching the max bounds --- ant-bootstrap-cache/src/lib.rs | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/ant-bootstrap-cache/src/lib.rs b/ant-bootstrap-cache/src/lib.rs index 00bea856fe..ad63fee0b3 100644 --- a/ant-bootstrap-cache/src/lib.rs +++ b/ant-bootstrap-cache/src/lib.rs @@ -117,11 +117,22 @@ impl BootstrapPeer { pub fn update_status(&mut self, success: bool) { if success { - self.success_count = self.success_count.saturating_add(1); + if let Some(new_value) = self.success_count.checked_add(1) { + self.success_count = new_value; } else { - self.failure_count = self.failure_count.saturating_add(1); + self.success_count = 1; + self.failure_count = 0; + } } self.last_seen = SystemTime::now(); + if !success { + if let Some(new_value) = self.failure_count.checked_add(1) { + self.failure_count = new_value; + } else { + self.failure_count = 1; + self.success_count = 0; + } + } } pub fn is_reliable(&self) -> bool { @@ -155,6 +166,16 @@ impl BootstrapPeer { .failure_count .saturating_add(current_shared_state.failure_count); } + + // if at max value, reset to 0 + if self.success_count == u32::MAX { + self.success_count = 1; + self.failure_count = 0; + } else if self.failure_count == u32::MAX { + self.failure_count = 1; + self.success_count = 0; + } + self.last_seen = std::cmp::max(self.last_seen, current_shared_state.last_seen); } } From f5af65e590efd0fe11da49239f4678f8dd4eb35e Mon Sep 17 00:00:00 2001 From: Roland Sherwin Date: Mon, 2 Dec 2024 13:20:40 +0100 Subject: [PATCH 08/21] fix(bootstrap): couple more tiny fixes --- ant-bootstrap-cache/src/cache_store.rs | 41 +++++++++++++----------- ant-bootstrap-cache/src/lib.rs | 4 +-- ant-bootstrap-cache/tests/cache_tests.rs | 2 +- 3 files changed, 26 insertions(+), 21 deletions(-) diff --git a/ant-bootstrap-cache/src/cache_store.rs b/ant-bootstrap-cache/src/cache_store.rs index 2db42b5269..0cff00854e 100644 --- a/ant-bootstrap-cache/src/cache_store.rs +++ b/ant-bootstrap-cache/src/cache_store.rs @@ -6,7 +6,9 @@ // KIND, either express or implied. Please review the Licences for the specific language governing // permissions and limitations relating to use of the SAFE Network Software. -use crate::{BootstrapConfig, BootstrapPeer, Error, InitialPeerDiscovery, Result}; +use crate::{ + craft_valid_multiaddr, BootstrapConfig, BootstrapPeer, Error, InitialPeerDiscovery, Result, +}; use fs2::FileExt; use libp2p::Multiaddr; use serde::{Deserialize, Serialize}; @@ -55,14 +57,6 @@ impl CacheData { } }); } - - pub fn update_peer_status(&mut self, addr: &Multiaddr, success: bool) { - let peer = self - .peers - .entry(addr.to_string()) - .or_insert_with(|| BootstrapPeer::new(addr.clone())); - peer.update_status(success); - } } fn default_version() -> u32 { @@ -209,7 +203,7 @@ impl BootstrapCacheStore { self.old_shared_state = data; // Save the default data to disk - self.sync_to_disk().await?; + self.sync_and_save_to_disk(false).await?; Ok(()) } @@ -324,23 +318,30 @@ impl BootstrapCacheStore { .filter(|peer| peer.success_count > peer.failure_count) } + /// Update the status of a peer in the cache. The peer must be added to the cache first. pub fn update_peer_status(&mut self, addr: &Multiaddr, success: bool) { - self.data.update_peer_status(addr, success); + if let Some(peer) = self.data.peers.get_mut(&addr.to_string()) { + peer.update_status(success); + } } pub fn add_peer(&mut self, addr: Multiaddr) { + let Some(addr) = craft_valid_multiaddr(&addr) else { + return; + }; + let addr_str = addr.to_string(); // Check if we already have this peer if self.data.peers.contains_key(&addr_str) { - debug!("Updating existing peer {}", addr_str); + debug!("Updating existing peer's last_seen {addr_str}"); if let Some(peer) = self.data.peers.get_mut(&addr_str) { peer.last_seen = SystemTime::now(); } return; } - self.remove_oldest_peers(); + self.try_remove_oldest_peers(); // Add the new peer debug!("Adding new peer {} (under max_peers limit)", addr_str); @@ -369,7 +370,9 @@ impl BootstrapCacheStore { } } - pub async fn sync_to_disk(&mut self) -> Result<()> { + /// Do not perform cleanup when `data` is fetched from the network. + /// The SystemTime might not be accurate. + pub async fn sync_and_save_to_disk(&mut self, with_cleanup: bool) -> Result<()> { if self.config.disable_cache_writing { info!("Cache writing is disabled, skipping sync to disk"); return Ok(()); @@ -400,8 +403,10 @@ impl BootstrapCacheStore { warn!("Failed to load cache data from file, overwriting with new data"); } - self.data.cleanup_stale_and_unreliable_peers(); - self.remove_oldest_peers(); + if with_cleanup { + self.data.cleanup_stale_and_unreliable_peers(); + self.try_remove_oldest_peers(); + } self.old_shared_state = self.data.clone(); self.atomic_write().await.inspect_err(|e| { @@ -410,7 +415,7 @@ impl BootstrapCacheStore { } /// Remove the oldest peers until we're under the max_peers limit - fn remove_oldest_peers(&mut self) { + fn try_remove_oldest_peers(&mut self) { // If we're at max peers, remove the oldest peer while self.data.peers.len() >= self.config.max_peers { if let Some((oldest_addr, _)) = self @@ -525,7 +530,7 @@ mod tests { .peers .insert(addr.to_string(), BootstrapPeer::new(addr.clone())); } - store.sync_to_disk().await.unwrap(); + store.sync_and_save_to_disk(true).await.unwrap(); store.update_peer_status(&addr, true); diff --git a/ant-bootstrap-cache/src/lib.rs b/ant-bootstrap-cache/src/lib.rs index ad63fee0b3..a7b58eba0f 100644 --- a/ant-bootstrap-cache/src/lib.rs +++ b/ant-bootstrap-cache/src/lib.rs @@ -119,7 +119,7 @@ impl BootstrapPeer { if success { if let Some(new_value) = self.success_count.checked_add(1) { self.success_count = new_value; - } else { + } else { self.success_count = 1; self.failure_count = 0; } @@ -267,7 +267,7 @@ impl BootstrapCacheStore { // If we have peers, update cache and return, else initialize from cache if store.peer_count() > 0 { info!("Using provided peers and updating cache"); - store.sync_to_disk().await?; + store.sync_and_save_to_disk(false).await?; } else { store.init().await?; } diff --git a/ant-bootstrap-cache/tests/cache_tests.rs b/ant-bootstrap-cache/tests/cache_tests.rs index 090addc452..d79793c71c 100644 --- a/ant-bootstrap-cache/tests/cache_tests.rs +++ b/ant-bootstrap-cache/tests/cache_tests.rs @@ -59,7 +59,7 @@ async fn test_cache_persistence() -> Result<(), Box> { .parse()?; cache_store1.add_peer(addr.clone()); cache_store1.update_peer_status(&addr, true); - cache_store1.sync_to_disk().await.unwrap(); + cache_store1.sync_and_save_to_disk(true).await.unwrap(); // Create a new cache store with the same path let cache_store2 = BootstrapCacheStore::new(config).await?; From 62fe7487c288121b4d9999e65cd3214bf8e5bf09 Mon Sep 17 00:00:00 2001 From: Roland Sherwin Date: Tue, 3 Dec 2024 07:00:35 +0100 Subject: [PATCH 09/21] feat(bootstrap): store multiple multiaddr per peer --- Cargo.lock | 1 + ant-bootstrap-cache/Cargo.toml | 3 +- ant-bootstrap-cache/src/cache_store.rs | 324 ++++++++++++------ ant-bootstrap-cache/src/config.rs | 27 ++ ant-bootstrap-cache/src/error.rs | 8 +- .../src/initial_peer_discovery.rs | 191 ++++++----- ant-bootstrap-cache/src/lib.rs | 167 ++++++--- .../tests/address_format_tests.rs | 158 ++++----- ant-bootstrap-cache/tests/cache_tests.rs | 72 ++-- .../tests/cli_integration_tests.rs | 84 +++-- .../tests/integration_tests.rs | 26 +- ant-logging/src/layers.rs | 1 + ant-logging/src/lib.rs | 2 + 13 files changed, 640 insertions(+), 424 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 530d121b73..6e6ec97b7f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -726,6 +726,7 @@ dependencies = [ name = "ant-bootstrap-cache" version = "0.1.0" dependencies = [ + "ant-logging", "ant-protocol", "chrono", "dirs-next", diff --git a/ant-bootstrap-cache/Cargo.toml b/ant-bootstrap-cache/Cargo.toml index f1fa098ed6..593126b942 100644 --- a/ant-bootstrap-cache/Cargo.toml +++ b/ant-bootstrap-cache/Cargo.toml @@ -10,6 +10,8 @@ repository = "https://github.com/maidsafe/autonomi" version = "0.1.0" [dependencies] +ant-logging = { path = "../ant-logging", version = "0.2.40" } +ant-protocol = { version = "0.17.15", path = "../ant-protocol" } chrono = { version = "0.4", features = ["serde"] } dirs-next = "~2.0.0" fs2 = "0.4.3" @@ -18,7 +20,6 @@ libp2p = { version = "0.54.1", features = ["serde"] } reqwest = { version = "0.12.2", features = ["json"] } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" -ant-protocol = { version = "0.17.15", path = "../ant-protocol" } tempfile = "3.8.1" thiserror = "1.0" tokio = { version = "1.0", features = ["full", "sync"] } diff --git a/ant-bootstrap-cache/src/cache_store.rs b/ant-bootstrap-cache/src/cache_store.rs index 0cff00854e..39e14e6928 100644 --- a/ant-bootstrap-cache/src/cache_store.rs +++ b/ant-bootstrap-cache/src/cache_store.rs @@ -7,22 +7,24 @@ // permissions and limitations relating to use of the SAFE Network Software. use crate::{ - craft_valid_multiaddr, BootstrapConfig, BootstrapPeer, Error, InitialPeerDiscovery, Result, + craft_valid_multiaddr, multiaddr_get_peer_id, BootstrapAddr, BootstrapAddresses, + BootstrapConfig, Error, InitialPeerDiscovery, Result, }; use fs2::FileExt; -use libp2p::Multiaddr; +use libp2p::multiaddr::Protocol; +use libp2p::{Multiaddr, PeerId}; use serde::{Deserialize, Serialize}; +use std::collections::hash_map::Entry; +use std::collections::HashMap; use std::fs::{self, File, OpenOptions}; use std::io::{self, Read}; use std::path::PathBuf; use std::time::{Duration, SystemTime}; use tempfile::NamedTempFile; -const PEER_EXPIRY_DURATION: Duration = Duration::from_secs(24 * 60 * 60); // 24 hours - #[derive(Debug, Clone, Serialize, Deserialize)] pub struct CacheData { - peers: std::collections::HashMap, + peers: std::collections::HashMap, #[serde(default = "SystemTime::now")] last_updated: SystemTime, #[serde(default = "default_version")] @@ -30,32 +32,104 @@ pub struct CacheData { } impl CacheData { + pub fn insert(&mut self, peer_id: PeerId, bootstrap_addr: BootstrapAddr) { + match self.peers.entry(peer_id) { + Entry::Occupied(mut occupied_entry) => { + occupied_entry.get_mut().insert_addr(&bootstrap_addr); + } + Entry::Vacant(vacant_entry) => { + vacant_entry.insert(BootstrapAddresses(vec![bootstrap_addr])); + } + } + } + /// Sync the self cache with another cache by referencing our old_shared_state. /// Since the cache is updated on periodic interval, we cannot just add our state with the shared state on the fs. - /// This would lead to race conditions, hence th need to store the old shared state and sync it with the new shared state. + /// This would lead to race conditions, hence the need to store the old shared state in memory and sync it with the + /// new shared state obtained from fs. pub fn sync(&mut self, old_shared_state: &CacheData, current_shared_state: &CacheData) { - for (addr, current_shared_peer_state) in current_shared_state.peers.iter() { - let old_shared_peer_state = old_shared_state.peers.get(addr); - // If the peer is in the old state, only update the difference in values - self.peers - .entry(addr.clone()) - .and_modify(|p| p.sync(old_shared_peer_state, current_shared_peer_state)) - .or_insert_with(|| current_shared_peer_state.clone()); + // Add/sync every BootstrapAddresses from shared state into self + for (peer, current_shared_addrs_state) in current_shared_state.peers.iter() { + let old_shared_addrs_state = old_shared_state.peers.get(peer); + let bootstrap_addresses = self + .peers + .entry(*peer) + .or_insert(current_shared_addrs_state.clone()); + + // Add/sync every BootstrapAddr into self + bootstrap_addresses.sync(old_shared_addrs_state, current_shared_addrs_state); } self.last_updated = SystemTime::now(); } - pub fn cleanup_stale_and_unreliable_peers(&mut self) { - self.peers.retain(|_, peer| peer.is_reliable()); - let now = SystemTime::now(); - self.peers.retain(|_, peer| { - if let Ok(duration) = now.duration_since(peer.last_seen) { - duration < PEER_EXPIRY_DURATION - } else { - false + /// Perform cleanup on the Peers + /// - Removes all the unreliable addrs for a peer + /// - Removes all the expired addrs for a peer + /// - Removes all peers with empty addrs set + /// - Maintains `max_addr` per peer by removing the addr with the lowest success rate + /// - Maintains `max_peers` in the list by removing the peer with the oldest last_seen + pub fn perform_cleanup(&mut self, cfg: &BootstrapConfig) { + self.peers.values_mut().for_each(|bootstrap_addresses| { + bootstrap_addresses.0.retain(|bootstrap_addr| { + let now = SystemTime::now(); + let has_not_expired = + if let Ok(duration) = now.duration_since(bootstrap_addr.last_seen) { + duration < cfg.addr_expiry_duration + } else { + false + }; + bootstrap_addr.is_reliable() && has_not_expired + }) + }); + + self.peers + .retain(|_, bootstrap_addresses| !bootstrap_addresses.0.is_empty()); + + self.peers.values_mut().for_each(|bootstrap_addresses| { + if bootstrap_addresses.0.len() > cfg.max_addrs_per_peer { + // sort by lowest failure rate first + bootstrap_addresses + .0 + .sort_by_key(|addr| addr.failure_rate() as u64); + bootstrap_addresses.0.truncate(cfg.max_addrs_per_peer); } }); + + self.try_remove_oldest_peers(cfg); + } + + /// Remove the oldest peers until we're under the max_peers limit + pub fn try_remove_oldest_peers(&mut self, cfg: &BootstrapConfig) { + if self.peers.len() > cfg.max_peers { + let mut peer_last_seen_map = HashMap::new(); + for (peer, addrs) in self.peers.iter() { + let mut latest_seen = Duration::from_secs(u64::MAX); + for addr in addrs.0.iter() { + if let Ok(elapsed) = addr.last_seen.elapsed() { + trace!("Time elapsed for {addr:?} is {elapsed:?}"); + if elapsed < latest_seen { + trace!("Updating latest_seen to {elapsed:?}"); + latest_seen = elapsed; + } + } + } + trace!("Last seen for {peer:?} is {latest_seen:?}"); + peer_last_seen_map.insert(*peer, latest_seen); + } + + while self.peers.len() > cfg.max_peers { + // find the peer with the largest last_seen + if let Some((&oldest_peer, last_seen)) = peer_last_seen_map + .iter() + .max_by_key(|(_, last_seen)| **last_seen) + { + debug!("Found the oldest peer to remove: {oldest_peer:?} with last_seen of {last_seen:?}"); + self.peers.remove(&oldest_peer); + peer_last_seen_map.remove(&oldest_peer); + } + } + } } } @@ -147,7 +221,7 @@ impl BootstrapCacheStore { "Cache file exists at {:?}, attempting to load", self.cache_path ); - match Self::load_cache_data(&self.cache_path).await { + match Self::load_cache_data(&self.config).await { Ok(data) => { info!( "Successfully loaded cache data with {} peers", @@ -224,12 +298,19 @@ impl BootstrapCacheStore { // Try to discover peers from configured endpoints let discovery = InitialPeerDiscovery::with_endpoints(config.endpoints.clone())?; - match discovery.fetch_peers().await { - Ok(peers) => { - info!("Successfully fetched {} peers from endpoints", peers.len()); + match discovery.fetch_bootstrap_addresses().await { + Ok(addrs) => { + info!("Successfully fetched {} peers from endpoints", addrs.len()); // Only add up to max_peers from the discovered peers - for peer in peers.into_iter().take(config.max_peers) { - data.peers.insert(peer.addr.to_string(), peer); + let mut count = 0; + for bootstrap_addr in addrs.into_iter() { + if count >= config.max_peers { + break; + } + if let Some(peer_id) = bootstrap_addr.peer_id() { + data.insert(peer_id, bootstrap_addr); + count += 1; + } } // Create parent directory if it doesn't exist @@ -269,9 +350,9 @@ impl BootstrapCacheStore { } } - async fn load_cache_data(cache_path: &PathBuf) -> Result { + async fn load_cache_data(cfg: &BootstrapConfig) -> Result { // Try to open the file with read permissions - let mut file = match OpenOptions::new().read(true).open(cache_path) { + let mut file = match OpenOptions::new().read(true).open(&cfg.cache_file_path) { Ok(f) => f, Err(e) => { warn!("Failed to open cache file: {}", e); @@ -298,62 +379,88 @@ impl BootstrapCacheStore { Error::FailedToParseCacheData })?; - data.cleanup_stale_and_unreliable_peers(); + data.perform_cleanup(cfg); Ok(data) } - pub fn get_peers(&self) -> impl Iterator { - self.data.peers.values() - } - pub fn peer_count(&self) -> usize { self.data.peers.len() } - pub fn get_reliable_peers(&self) -> impl Iterator { + pub fn get_addrs(&self) -> impl Iterator { + self.data + .peers + .values() + .flat_map(|bootstrap_addresses| bootstrap_addresses.0.iter()) + } + + pub fn get_reliable_addrs(&self) -> impl Iterator { self.data .peers .values() - .filter(|peer| peer.success_count > peer.failure_count) + .flat_map(|bootstrap_addresses| bootstrap_addresses.0.iter()) + .filter(|bootstrap_addr| bootstrap_addr.is_reliable()) } - /// Update the status of a peer in the cache. The peer must be added to the cache first. - pub fn update_peer_status(&mut self, addr: &Multiaddr, success: bool) { - if let Some(peer) = self.data.peers.get_mut(&addr.to_string()) { - peer.update_status(success); + /// Update the status of an addr in the cache. The peer must be added to the cache first. + pub fn update_addr_status(&mut self, addr: &Multiaddr, success: bool) { + if let Some(peer_id) = multiaddr_get_peer_id(addr) { + debug!("Updating addr status: {addr} (success: {success})"); + if let Some(bootstrap_addresses) = self.data.peers.get_mut(&peer_id) { + bootstrap_addresses.update_addr_status(addr, success); + } else { + debug!("Peer not found in cache to update: {addr}"); + } } } - pub fn add_peer(&mut self, addr: Multiaddr) { + /// Add a set of addresses to the cache. + pub fn add_addr(&mut self, addr: Multiaddr) { + debug!("Trying to add new addr: {addr}"); let Some(addr) = craft_valid_multiaddr(&addr) else { return; }; - - let addr_str = addr.to_string(); + let peer_id = match addr.iter().find(|p| matches!(p, Protocol::P2p(_))) { + Some(Protocol::P2p(id)) => id, + _ => return, + }; // Check if we already have this peer - if self.data.peers.contains_key(&addr_str) { - debug!("Updating existing peer's last_seen {addr_str}"); - if let Some(peer) = self.data.peers.get_mut(&addr_str) { - peer.last_seen = SystemTime::now(); + if let Some(bootstrap_addrs) = self.data.peers.get_mut(&peer_id) { + if let Some(bootstrap_addr) = bootstrap_addrs.get_addr_mut(&addr) { + debug!("Updating existing peer's last_seen {addr}"); + bootstrap_addr.last_seen = SystemTime::now(); + return; + } else { + bootstrap_addrs.insert_addr(&BootstrapAddr::new(addr.clone())); } - return; + } else { + self.data.peers.insert( + peer_id, + BootstrapAddresses(vec![BootstrapAddr::new(addr.clone())]), + ); } - self.try_remove_oldest_peers(); - - // Add the new peer - debug!("Adding new peer {} (under max_peers limit)", addr_str); - self.data.peers.insert(addr_str, BootstrapPeer::new(addr)); + debug!("Added new peer {addr:?}, performing cleanup of old addrs"); + self.perform_cleanup(); } - pub fn remove_peer(&mut self, addr: &str) { - self.data.peers.remove(addr); + /// Remove a single address for a peer. + pub fn remove_addr(&mut self, addr: &Multiaddr) { + if let Some(peer_id) = multiaddr_get_peer_id(addr) { + if let Some(bootstrap_addresses) = self.data.peers.get_mut(&peer_id) { + bootstrap_addresses.remove_addr(addr); + } else { + debug!("Peer {peer_id:?} not found in the cache. Not removing addr: {addr:?}") + } + } else { + debug!("Could not obtain PeerId for {addr:?}, not removing addr from cache."); + } } - pub fn cleanup_stale_and_unreliable_peers(&mut self) { - self.data.cleanup_stale_and_unreliable_peers(); + pub fn perform_cleanup(&mut self) { + self.data.perform_cleanup(&self.config); } /// Clear all peers from the cache and save to disk @@ -396,7 +503,7 @@ impl BootstrapCacheStore { return Ok(()); } - if let Ok(data_from_file) = Self::load_cache_data(&self.cache_path).await { + if let Ok(data_from_file) = Self::load_cache_data(&self.config).await { self.data.sync(&self.old_shared_state, &data_from_file); // Now the synced version is the old_shared_state } else { @@ -404,8 +511,8 @@ impl BootstrapCacheStore { } if with_cleanup { - self.data.cleanup_stale_and_unreliable_peers(); - self.try_remove_oldest_peers(); + self.data.perform_cleanup(&self.config); + self.data.try_remove_oldest_peers(&self.config); } self.old_shared_state = self.data.clone(); @@ -414,26 +521,6 @@ impl BootstrapCacheStore { }) } - /// Remove the oldest peers until we're under the max_peers limit - fn try_remove_oldest_peers(&mut self) { - // If we're at max peers, remove the oldest peer - while self.data.peers.len() >= self.config.max_peers { - if let Some((oldest_addr, _)) = self - .data - .peers - .iter() - .min_by_key(|(_, peer)| peer.last_seen) - { - let oldest_addr = oldest_addr.clone(); - debug!( - "At max peers limit ({}), removing oldest peer: {oldest_addr}", - self.config.max_peers - ); - self.data.peers.remove(&oldest_addr); - } - } - } - async fn acquire_shared_lock(file: &File) -> Result<()> { let file = file.try_clone().map_err(Error::from)?; @@ -521,20 +608,21 @@ mod tests { #[tokio::test] async fn test_peer_update_and_save() { let (mut store, _) = create_test_store().await; - let addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); + let addr: Multiaddr = + "/ip4/127.0.0.1/tcp/8080/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" + .parse() + .unwrap(); // Manually add a peer without using fallback { - store - .data - .peers - .insert(addr.to_string(), BootstrapPeer::new(addr.clone())); + let peer_id = multiaddr_get_peer_id(&addr).unwrap(); + store.data.insert(peer_id, BootstrapAddr::new(addr.clone())); } store.sync_and_save_to_disk(true).await.unwrap(); - store.update_peer_status(&addr, true); + store.update_addr_status(&addr, true); - let peers = store.get_peers().collect::>(); + let peers = store.get_addrs().collect::>(); assert_eq!(peers.len(), 1); assert_eq!(peers[0].addr, addr); assert_eq!(peers[0].success_count, 1); @@ -544,26 +632,32 @@ mod tests { #[tokio::test] async fn test_peer_cleanup() { let (mut store, _) = create_test_store().await; - let good_addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); - let bad_addr: Multiaddr = "/ip4/127.0.0.1/tcp/8081".parse().unwrap(); + let good_addr: Multiaddr = + "/ip4/127.0.0.1/tcp/8080/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" + .parse() + .unwrap(); + let bad_addr: Multiaddr = + "/ip4/127.0.0.1/tcp/8081/p2p/12D3KooWD2aV1f3qkhggzEFaJ24CEFYkSdZF5RKoMLpU6CwExYV5" + .parse() + .unwrap(); // Add peers - store.add_peer(good_addr.clone()); - store.add_peer(bad_addr.clone()); + store.add_addr(good_addr.clone()); + store.add_addr(bad_addr.clone()); // Make one peer reliable and one unreliable - store.update_peer_status(&good_addr, true); + store.update_addr_status(&good_addr, true); // Fail the bad peer more times than max_retries for _ in 0..5 { - store.update_peer_status(&bad_addr, false); + store.update_addr_status(&bad_addr, false); } // Clean up unreliable peers - store.cleanup_stale_and_unreliable_peers(); + store.perform_cleanup(); // Get all peers (not just reliable ones) - let peers = store.get_peers().collect::>(); + let peers = store.get_addrs().collect::>(); assert_eq!(peers.len(), 1); assert_eq!(peers[0].addr, good_addr); } @@ -571,20 +665,23 @@ mod tests { #[tokio::test] async fn test_peer_not_removed_if_successful() { let (mut store, _) = create_test_store().await; - let addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); + let addr: Multiaddr = + "/ip4/127.0.0.1/tcp/8080/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" + .parse() + .unwrap(); // Add a peer and make it successful - store.add_peer(addr.clone()); - store.update_peer_status(&addr, true); + store.add_addr(addr.clone()); + store.update_addr_status(&addr, true); // Wait a bit tokio::time::sleep(Duration::from_millis(100)).await; // Run cleanup - store.cleanup_stale_and_unreliable_peers(); + store.perform_cleanup(); // Verify peer is still there - let peers = store.get_peers().collect::>(); + let peers = store.get_addrs().collect::>(); assert_eq!(peers.len(), 1); assert_eq!(peers[0].addr, addr); } @@ -592,44 +689,47 @@ mod tests { #[tokio::test] async fn test_peer_removed_only_when_unresponsive() { let (mut store, _) = create_test_store().await; - let addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); + let addr: Multiaddr = + "/ip4/127.0.0.1/tcp/8080/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" + .parse() + .unwrap(); // Add a peer - store.add_peer(addr.clone()); + store.add_addr(addr.clone()); // Make it fail more than successes for _ in 0..3 { - store.update_peer_status(&addr, true); + store.update_addr_status(&addr, true); } for _ in 0..4 { - store.update_peer_status(&addr, false); + store.update_addr_status(&addr, false); } // Run cleanup - store.cleanup_stale_and_unreliable_peers(); + store.perform_cleanup(); // Verify peer is removed assert_eq!( - store.get_peers().count(), + store.get_addrs().count(), 0, "Peer should be removed after max_retries failures" ); // Test with some successes but more failures - store.add_peer(addr.clone()); - store.update_peer_status(&addr, true); - store.update_peer_status(&addr, true); + store.add_addr(addr.clone()); + store.update_addr_status(&addr, true); + store.update_addr_status(&addr, true); for _ in 0..5 { - store.update_peer_status(&addr, false); + store.update_addr_status(&addr, false); } // Run cleanup - store.cleanup_stale_and_unreliable_peers(); + store.perform_cleanup(); // Verify peer is removed due to more failures than successes assert_eq!( - store.get_peers().count(), + store.get_addrs().count(), 0, "Peer should be removed when failures exceed successes" ); diff --git a/ant-bootstrap-cache/src/config.rs b/ant-bootstrap-cache/src/config.rs index 2191e39a4e..e02fa8a590 100644 --- a/ant-bootstrap-cache/src/config.rs +++ b/ant-bootstrap-cache/src/config.rs @@ -14,8 +14,15 @@ use std::{ }; use url::Url; +/// The duration since last)seen before removing the address of a Peer. +const ADDR_EXPIRY_DURATION: Duration = Duration::from_secs(24 * 60 * 60); // 24 hours + +/// Maximum peers to store const MAX_PEERS: usize = 1500; +/// Maximum number of addresses to store for a Peer +const MAX_ADDRS_PER_PEER: usize = 6; + // Min time until we save the bootstrap cache to disk. 5 mins const MIN_BOOTSTRAP_CACHE_SAVE_INTERVAL: Duration = Duration::from_secs(5 * 60); @@ -25,10 +32,14 @@ const MAX_BOOTSTRAP_CACHE_SAVE_INTERVAL: Duration = Duration::from_secs(24 * 60 /// Configuration for the bootstrap cache #[derive(Clone, Debug)] pub struct BootstrapConfig { + /// The duration since last)seen before removing the address of a Peer. + pub addr_expiry_duration: Duration, /// List of bootstrap endpoints to fetch peer information from pub endpoints: Vec, /// Maximum number of peers to keep in the cache pub max_peers: usize, + /// Maximum number of addresses stored per peer. + pub max_addrs_per_peer: usize, /// Path to the bootstrap cache file pub cache_file_path: PathBuf, /// Flag to disable writing to the cache file @@ -45,6 +56,7 @@ impl BootstrapConfig { /// Creates a new BootstrapConfig with default settings pub fn default_config() -> Result { Ok(Self { + addr_expiry_duration: ADDR_EXPIRY_DURATION, endpoints: vec![ "https://sn-testnet.s3.eu-west-2.amazonaws.com/bootstrap_cache.json" .parse() @@ -54,6 +66,7 @@ impl BootstrapConfig { .expect("Failed to parse URL"), ], max_peers: MAX_PEERS, + max_addrs_per_peer: MAX_ADDRS_PER_PEER, cache_file_path: default_cache_path()?, disable_cache_writing: false, min_cache_save_duration: MIN_BOOTSTRAP_CACHE_SAVE_INTERVAL, @@ -65,8 +78,10 @@ impl BootstrapConfig { /// Creates a new BootstrapConfig with empty settings pub fn empty() -> Result { Ok(Self { + addr_expiry_duration: ADDR_EXPIRY_DURATION, endpoints: vec![], max_peers: MAX_PEERS, + max_addrs_per_peer: MAX_ADDRS_PER_PEER, cache_file_path: default_cache_path()?, disable_cache_writing: false, min_cache_save_duration: MIN_BOOTSTRAP_CACHE_SAVE_INTERVAL, @@ -75,6 +90,12 @@ impl BootstrapConfig { }) } + /// Set a new addr expiry duration + pub fn with_addr_expiry_duration(mut self, duration: Duration) -> Self { + self.addr_expiry_duration = duration; + self + } + /// Update the config with custom endpoints pub fn with_endpoints(mut self, endpoints: Vec) -> Self { self.endpoints = endpoints; @@ -106,6 +127,12 @@ impl BootstrapConfig { self } + /// Sets the maximum number of addresses for a single peer. + pub fn with_addrs_per_peer(mut self, max_addrs: usize) -> Self { + self.max_addrs_per_peer = max_addrs; + self + } + /// Sets the flag to disable writing to the cache file pub fn with_disable_cache_writing(mut self, disable: bool) -> Self { self.disable_cache_writing = disable; diff --git a/ant-bootstrap-cache/src/error.rs b/ant-bootstrap-cache/src/error.rs index bcccf9064c..92bb997d63 100644 --- a/ant-bootstrap-cache/src/error.rs +++ b/ant-bootstrap-cache/src/error.rs @@ -14,10 +14,10 @@ pub enum Error { FailedToParseCacheData, #[error("Could not obtain data directory")] CouldNotObtainDataDir, - #[error("Could not obtain bootstrap peers from {0} after {1} retries")] - FailedToObtainPeersFromUrl(String, usize), - #[error("No peers found: {0}")] - NoPeersFound(String), + #[error("Could not obtain bootstrap addresses from {0} after {1} retries")] + FailedToObtainAddrsFromUrl(String, usize), + #[error("No Bootstrap Addresses found: {0}")] + NoBootstrapAddressesFound(String), #[error("Invalid response: {0}")] InvalidResponse(String), #[error("IO error: {0}")] diff --git a/ant-bootstrap-cache/src/initial_peer_discovery.rs b/ant-bootstrap-cache/src/initial_peer_discovery.rs index ee9050f8a2..c8cf0ae6e5 100644 --- a/ant-bootstrap-cache/src/initial_peer_discovery.rs +++ b/ant-bootstrap-cache/src/initial_peer_discovery.rs @@ -6,7 +6,7 @@ // KIND, either express or implied. Please review the Licences for the specific language governing // permissions and limitations relating to use of the SAFE Network Software. -use crate::{craft_valid_multiaddr_from_str, BootstrapEndpoints, BootstrapPeer, Error, Result}; +use crate::{craft_valid_multiaddr_from_str, BootstrapAddr, BootstrapEndpoints, Error, Result}; use futures::stream::{self, StreamExt}; use reqwest::Client; use std::time::Duration; @@ -54,19 +54,22 @@ impl InitialPeerDiscovery { }) } - /// Fetch peers from all configured endpoints - pub async fn fetch_peers(&self) -> Result> { + /// Fetch BootstrapAddr from all configured endpoints + pub async fn fetch_bootstrap_addresses(&self) -> Result> { info!( "Starting peer discovery from {} endpoints: {:?}", self.endpoints.len(), self.endpoints ); - let mut peers = Vec::new(); + let mut bootstrap_addresses = Vec::new(); let mut last_error = None; let mut fetches = stream::iter(self.endpoints.clone()) .map(|endpoint| async move { - info!("Attempting to fetch peers from endpoint: {}", endpoint); + info!( + "Attempting to fetch bootstrap addresses from endpoint: {}", + endpoint + ); ( Self::fetch_from_endpoint(self.request_client.clone(), &endpoint).await, endpoint, @@ -76,56 +79,62 @@ impl InitialPeerDiscovery { while let Some((result, endpoint)) = fetches.next().await { match result { - Ok(mut endpoint_peers) => { + Ok(mut endpoing_bootstrap_addresses) => { info!( - "Successfully fetched {} peers from {}. First few peers: {:?}", - endpoint_peers.len(), + "Successfully fetched {} bootstrap addrs from {}. First few addrs: {:?}", + endpoing_bootstrap_addresses.len(), endpoint, - endpoint_peers.iter().take(3).collect::>() + endpoing_bootstrap_addresses + .iter() + .take(3) + .collect::>() ); - peers.append(&mut endpoint_peers); + bootstrap_addresses.append(&mut endpoing_bootstrap_addresses); } Err(e) => { - warn!("Failed to fetch peers from {}: {}", endpoint, e); + warn!("Failed to fetch bootstrap addrs from {}: {}", endpoint, e); last_error = Some(e); } } } - if peers.is_empty() { + if bootstrap_addresses.is_empty() { last_error.map_or_else( || { - warn!("No peers found from any endpoint and no errors reported"); - Err(Error::NoPeersFound( + warn!("No bootstrap addrs found from any endpoint and no errors reported"); + Err(Error::NoBootstrapAddressesFound( "No valid peers found from any endpoint".to_string(), )) }, |e| { - warn!("No peers found from any endpoint. Last error: {}", e); - Err(Error::NoPeersFound(format!( - "No valid peers found from any endpoint: {e}", + warn!( + "No bootstrap addrs found from any endpoint. Last error: {}", + e + ); + Err(Error::NoBootstrapAddressesFound(format!( + "No valid bootstrap addrs found from any endpoint: {e}", ))) }, ) } else { info!( - "Successfully discovered {} total peers. First few: {:?}", - peers.len(), - peers.iter().take(3).collect::>() + "Successfully discovered {} total addresses. First few: {:?}", + bootstrap_addresses.len(), + bootstrap_addresses.iter().take(3).collect::>() ); - Ok(peers) + Ok(bootstrap_addresses) } } - /// Fetch the list of bootstrap peer from a single endpoint + /// Fetch the list of bootstrap addresses from a single endpoint async fn fetch_from_endpoint( request_client: Client, endpoint: &Url, - ) -> Result> { + ) -> Result> { info!("Fetching peers from endpoint: {endpoint}"); let mut retries = 0; - let peers = loop { + let bootstrap_addresses = loop { let response = request_client.get(endpoint.clone()).send().await; match response { @@ -134,12 +143,12 @@ impl InitialPeerDiscovery { let text = response.text().await?; match Self::try_parse_response(&text) { - Ok(peers) => break peers, + Ok(addrs) => break addrs, Err(err) => { warn!("Failed to parse response with err: {err:?}"); retries += 1; if retries >= MAX_RETRIES_ON_FETCH_FAILURE { - return Err(Error::FailedToObtainPeersFromUrl( + return Err(Error::FailedToObtainAddrsFromUrl( endpoint.to_string(), MAX_RETRIES_ON_FETCH_FAILURE, )); @@ -149,7 +158,7 @@ impl InitialPeerDiscovery { } else { retries += 1; if retries >= MAX_RETRIES_ON_FETCH_FAILURE { - return Err(Error::FailedToObtainPeersFromUrl( + return Err(Error::FailedToObtainAddrsFromUrl( endpoint.to_string(), MAX_RETRIES_ON_FETCH_FAILURE, )); @@ -157,10 +166,10 @@ impl InitialPeerDiscovery { } } Err(err) => { - error!("Failed to get peers from URL {endpoint}: {err:?}"); + error!("Failed to get bootstrap addrs from URL {endpoint}: {err:?}"); retries += 1; if retries >= MAX_RETRIES_ON_FETCH_FAILURE { - return Err(Error::FailedToObtainPeersFromUrl( + return Err(Error::FailedToObtainAddrsFromUrl( endpoint.to_string(), MAX_RETRIES_ON_FETCH_FAILURE, )); @@ -168,62 +177,65 @@ impl InitialPeerDiscovery { } } trace!( - "Failed to get peers from URL, retrying {retries}/{MAX_RETRIES_ON_FETCH_FAILURE}" + "Failed to get bootstrap addrs from URL, retrying {retries}/{MAX_RETRIES_ON_FETCH_FAILURE}" ); tokio::time::sleep(Duration::from_secs(1)).await; }; - Ok(peers) + Ok(bootstrap_addresses) } /// Try to parse a response from a endpoint - fn try_parse_response(response: &str) -> Result> { + fn try_parse_response(response: &str) -> Result> { match serde_json::from_str::(response) { Ok(json_endpoints) => { info!( "Successfully parsed JSON response with {} peers", json_endpoints.peers.len() ); - let peers = json_endpoints + let bootstrap_addresses = json_endpoints .peers .into_iter() .filter_map(|addr_str| craft_valid_multiaddr_from_str(&addr_str)) - .map(BootstrapPeer::new) + .map(BootstrapAddr::new) .collect::>(); - if peers.is_empty() { + if bootstrap_addresses.is_empty() { warn!("No valid peers found in JSON response"); - Err(Error::NoPeersFound( + Err(Error::NoBootstrapAddressesFound( "No valid peers found in JSON response".to_string(), )) } else { - info!("Successfully parsed {} valid peers from JSON", peers.len()); - Ok(peers) + info!( + "Successfully parsed {} valid peers from JSON", + bootstrap_addresses.len() + ); + Ok(bootstrap_addresses) } } Err(e) => { info!("Attempting to parse response as plain text"); // Try parsing as plain text with one multiaddr per line // example of contacts file exists in resources/network-contacts-examples - let peers = response + let bootstrap_addresses = response .split('\n') .filter_map(craft_valid_multiaddr_from_str) - .map(BootstrapPeer::new) + .map(BootstrapAddr::new) .collect::>(); - if peers.is_empty() { + if bootstrap_addresses.is_empty() { warn!( - "No valid peers found in plain text response. Previous Json error: {e:?}" + "No valid bootstrap addrs found in plain text response. Previous Json error: {e:?}" ); - Err(Error::NoPeersFound( - "No valid peers found in plain text response".to_string(), + Err(Error::NoBootstrapAddressesFound( + "No valid bootstrap addrs found in plain text response".to_string(), )) } else { info!( - "Successfully parsed {} valid peers from plain text", - peers.len() + "Successfully parsed {} valid bootstrap addrs from plain text", + bootstrap_addresses.len() ); - Ok(peers) + Ok(bootstrap_addresses) } } } @@ -240,14 +252,14 @@ mod tests { }; #[tokio::test] - async fn test_fetch_peers() { + async fn test_fetch_addrs() { let mock_server = MockServer::start().await; Mock::given(method("GET")) .and(path("/")) .respond_with( ResponseTemplate::new(200) - .set_body_string("/ip4/127.0.0.1/tcp/8080\n/ip4/127.0.0.2/tcp/8080"), + .set_body_string("/ip4/127.0.0.1/tcp/8080/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE\n/ip4/127.0.0.2/tcp/8080/p2p/12D3KooWD2aV1f3qkhggzEFaJ24CEFYkSdZF5RKoMLpU6CwExYV5"), ) .mount(&mock_server) .await; @@ -255,13 +267,19 @@ mod tests { let mut discovery = InitialPeerDiscovery::new().unwrap(); discovery.endpoints = vec![mock_server.uri().parse().unwrap()]; - let peers = discovery.fetch_peers().await.unwrap(); - assert_eq!(peers.len(), 2); + let addrs = discovery.fetch_bootstrap_addresses().await.unwrap(); + assert_eq!(addrs.len(), 2); - let addr1: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); - let addr2: Multiaddr = "/ip4/127.0.0.2/tcp/8080".parse().unwrap(); - assert!(peers.iter().any(|p| p.addr == addr1)); - assert!(peers.iter().any(|p| p.addr == addr2)); + let addr1: Multiaddr = + "/ip4/127.0.0.1/tcp/8080/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" + .parse() + .unwrap(); + let addr2: Multiaddr = + "/ip4/127.0.0.2/tcp/8080/p2p/12D3KooWD2aV1f3qkhggzEFaJ24CEFYkSdZF5RKoMLpU6CwExYV5" + .parse() + .unwrap(); + assert!(addrs.iter().any(|p| p.addr == addr1)); + assert!(addrs.iter().any(|p| p.addr == addr2)); } #[tokio::test] @@ -279,7 +297,9 @@ mod tests { // Second endpoint succeeds Mock::given(method("GET")) .and(path("/")) - .respond_with(ResponseTemplate::new(200).set_body_string("/ip4/127.0.0.1/tcp/8080")) + .respond_with(ResponseTemplate::new(200).set_body_string( + "/ip4/127.0.0.1/tcp/8080/p2p/12D3KooWD2aV1f3qkhggzEFaJ24CEFYkSdZF5RKoMLpU6CwExYV5", + )) .mount(&mock_server2) .await; @@ -289,11 +309,14 @@ mod tests { mock_server2.uri().parse().unwrap(), ]; - let peers = discovery.fetch_peers().await.unwrap(); - assert_eq!(peers.len(), 1); + let addrs = discovery.fetch_bootstrap_addresses().await.unwrap(); + assert_eq!(addrs.len(), 1); - let addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); - assert_eq!(peers[0].addr, addr); + let addr: Multiaddr = + "/ip4/127.0.0.1/tcp/8080/p2p/12D3KooWD2aV1f3qkhggzEFaJ24CEFYkSdZF5RKoMLpU6CwExYV5" + .parse() + .unwrap(); + assert_eq!(addrs[0].addr, addr); } #[tokio::test] @@ -304,7 +327,7 @@ mod tests { .and(path("/")) .respond_with( ResponseTemplate::new(200).set_body_string( - "/ip4/127.0.0.1/tcp/8080\ninvalid-addr\n/ip4/127.0.0.2/tcp/8080", + "/ip4/127.0.0.1/tcp/8080\n/ip4/127.0.0.2/tcp/8080/p2p/12D3KooWD2aV1f3qkhggzEFaJ24CEFYkSdZF5RKoMLpU6CwExYV5", ), ) .mount(&mock_server) @@ -313,9 +336,12 @@ mod tests { let mut discovery = InitialPeerDiscovery::new().unwrap(); discovery.endpoints = vec![mock_server.uri().parse().unwrap()]; - let peers = discovery.fetch_peers().await.unwrap(); - let valid_addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); - assert_eq!(peers[0].addr, valid_addr); + let addrs = discovery.fetch_bootstrap_addresses().await.unwrap(); + let valid_addr: Multiaddr = + "/ip4/127.0.0.2/tcp/8080/p2p/12D3KooWD2aV1f3qkhggzEFaJ24CEFYkSdZF5RKoMLpU6CwExYV5" + .parse() + .unwrap(); + assert_eq!(addrs[0].addr, valid_addr); } #[tokio::test] @@ -331,9 +357,9 @@ mod tests { let mut discovery = InitialPeerDiscovery::new().unwrap(); discovery.endpoints = vec![mock_server.uri().parse().unwrap()]; - let result = discovery.fetch_peers().await; + let result = discovery.fetch_bootstrap_addresses().await; - assert!(matches!(result, Err(Error::NoPeersFound(_)))); + assert!(matches!(result, Err(Error::NoBootstrapAddressesFound(_)))); } #[tokio::test] @@ -343,7 +369,7 @@ mod tests { Mock::given(method("GET")) .and(path("/")) .respond_with( - ResponseTemplate::new(200).set_body_string("\n \n/ip4/127.0.0.1/tcp/8080\n \n"), + ResponseTemplate::new(200).set_body_string("\n \n/ip4/127.0.0.1/tcp/8080/p2p/12D3KooWD2aV1f3qkhggzEFaJ24CEFYkSdZF5RKoMLpU6CwExYV5\n \n"), ) .mount(&mock_server) .await; @@ -351,11 +377,14 @@ mod tests { let mut discovery = InitialPeerDiscovery::new().unwrap(); discovery.endpoints = vec![mock_server.uri().parse().unwrap()]; - let peers = discovery.fetch_peers().await.unwrap(); - assert_eq!(peers.len(), 1); + let addrs = discovery.fetch_bootstrap_addresses().await.unwrap(); + assert_eq!(addrs.len(), 1); - let addr: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); - assert_eq!(peers[0].addr, addr); + let addr: Multiaddr = + "/ip4/127.0.0.1/tcp/8080/p2p/12D3KooWD2aV1f3qkhggzEFaJ24CEFYkSdZF5RKoMLpU6CwExYV5" + .parse() + .unwrap(); + assert_eq!(addrs[0].addr, addr); } #[tokio::test] @@ -384,7 +413,7 @@ mod tests { Mock::given(method("GET")) .and(path("/")) .respond_with(ResponseTemplate::new(200).set_body_string( - r#"{"peers": ["/ip4/127.0.0.1/tcp/8080", "/ip4/127.0.0.2/tcp/8080"]}"#, + r#"{"peers": ["/ip4/127.0.0.1/tcp/8080/p2p/12D3KooWD2aV1f3qkhggzEFaJ24CEFYkSdZF5RKoMLpU6CwExYV5", "/ip4/127.0.0.2/tcp/8080/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE"]}"#, )) .mount(&mock_server) .await; @@ -392,12 +421,18 @@ mod tests { let mut discovery = InitialPeerDiscovery::new().unwrap(); discovery.endpoints = vec![mock_server.uri().parse().unwrap()]; - let peers = discovery.fetch_peers().await.unwrap(); - assert_eq!(peers.len(), 2); + let addrs = discovery.fetch_bootstrap_addresses().await.unwrap(); + assert_eq!(addrs.len(), 2); - let addr1: Multiaddr = "/ip4/127.0.0.1/tcp/8080".parse().unwrap(); - let addr2: Multiaddr = "/ip4/127.0.0.2/tcp/8080".parse().unwrap(); - assert!(peers.iter().any(|p| p.addr == addr1)); - assert!(peers.iter().any(|p| p.addr == addr2)); + let addr1: Multiaddr = + "/ip4/127.0.0.1/tcp/8080/p2p/12D3KooWD2aV1f3qkhggzEFaJ24CEFYkSdZF5RKoMLpU6CwExYV5" + .parse() + .unwrap(); + let addr2: Multiaddr = + "/ip4/127.0.0.2/tcp/8080/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" + .parse() + .unwrap(); + assert!(addrs.iter().any(|p| p.addr == addr1)); + assert!(addrs.iter().any(|p| p.addr == addr2)); } } diff --git a/ant-bootstrap-cache/src/lib.rs b/ant-bootstrap-cache/src/lib.rs index a7b58eba0f..37caedd3bd 100644 --- a/ant-bootstrap-cache/src/lib.rs +++ b/ant-bootstrap-cache/src/lib.rs @@ -28,13 +28,13 @@ //! let config = BootstrapConfig::empty().unwrap(); //! let args = PeersArgs { //! first: false, -//! peers: vec![], +//! addrs: vec![], //! network_contacts_url: Some(Url::parse("https://example.com/peers")?), //! local: false, //! }; //! //! let store = BootstrapCacheStore::from_args(args, config).await?; -//! let peers = store.get_peers(); +//! let addrs = store.get_addrs(); //! # Ok(()) //! # } //! ``` @@ -47,9 +47,9 @@ pub mod config; mod error; mod initial_peer_discovery; -use libp2p::{multiaddr::Protocol, Multiaddr}; +use libp2p::{multiaddr::Protocol, Multiaddr, PeerId}; use serde::{Deserialize, Serialize}; -use std::{fmt, time::SystemTime}; +use std::time::SystemTime; use thiserror::Error; use url::Url; @@ -92,20 +92,78 @@ impl Default for EndpointMetadata { } } -/// A peer that can be used for bootstrapping into the network #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct BootstrapPeer { +/// Set of addresses for a particular PeerId +pub struct BootstrapAddresses(pub Vec); + +impl BootstrapAddresses { + pub fn insert_addr(&mut self, addr: &BootstrapAddr) { + if let Some(bootstrap_addr) = self.get_addr_mut(&addr.addr) { + bootstrap_addr.sync(None, addr); + } else { + self.0.push(addr.clone()); + } + } + + pub fn get_addr(&self, addr: &Multiaddr) -> Option<&BootstrapAddr> { + self.0 + .iter() + .find(|bootstrap_addr| &bootstrap_addr.addr == addr) + } + + pub fn get_addr_mut(&mut self, addr: &Multiaddr) -> Option<&mut BootstrapAddr> { + self.0 + .iter_mut() + .find(|bootstrap_addr| &bootstrap_addr.addr == addr) + } + + pub fn remove_addr(&mut self, addr: &Multiaddr) { + if let Some(idx) = self + .0 + .iter() + .position(|bootstrap_addr| &bootstrap_addr.addr == addr) + { + let bootstrap_addr = self.0.remove(idx); + debug!("Removed {bootstrap_addr:?}"); + } + } + + pub fn sync(&mut self, old_shared_state: Option<&Self>, current_shared_state: &Self) { + for current_bootstrap_addr in current_shared_state.0.iter() { + if let Some(bootstrap_addr) = self.get_addr_mut(¤t_bootstrap_addr.addr) { + let old_bootstrap_addr = old_shared_state.and_then(|old_shared_state| { + old_shared_state.get_addr(¤t_bootstrap_addr.addr) + }); + bootstrap_addr.sync(old_bootstrap_addr, current_bootstrap_addr); + } else { + self.insert_addr(current_bootstrap_addr); + } + } + } + + pub fn update_addr_status(&mut self, addr: &Multiaddr, success: bool) { + if let Some(bootstrap_addr) = self.get_addr_mut(addr) { + bootstrap_addr.update_status(success); + } else { + debug!("Addr not found in cache to update, skipping: {addr:?}") + } + } +} + +/// A addr that can be used for bootstrapping into the network +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct BootstrapAddr { /// The multiaddress of the peer pub addr: Multiaddr, - /// The number of successful connections to this peer + /// The number of successful connections to this address pub success_count: u32, - /// The number of failed connection attempts to this peer + /// The number of failed connection attempts to this address pub failure_count: u32, - /// The last time this peer was successfully contacted + /// The last time this address was successfully contacted pub last_seen: SystemTime, } -impl BootstrapPeer { +impl BootstrapAddr { pub fn new(addr: Multiaddr) -> Self { Self { addr, @@ -115,6 +173,10 @@ impl BootstrapPeer { } } + pub fn peer_id(&self) -> Option { + multiaddr_get_peer_id(&self.addr) + } + pub fn update_status(&mut self, success: bool) { if success { if let Some(new_value) = self.success_count.checked_add(1) { @@ -135,14 +197,18 @@ impl BootstrapPeer { } } + // An addr is considered reliable if it has more successes than failures pub fn is_reliable(&self) -> bool { - // A peer is considered reliable if it has more successes than failures self.success_count >= self.failure_count } /// If the peer has a old state, just update the difference in values /// If the peer has no old state, add the values pub fn sync(&mut self, old_shared_state: Option<&Self>, current_shared_state: &Self) { + if self.last_seen == current_shared_state.last_seen { + return; + } + if let Some(old_shared_state) = old_shared_state { let success_difference = self .success_count @@ -178,15 +244,13 @@ impl BootstrapPeer { self.last_seen = std::cmp::max(self.last_seen, current_shared_state.last_seen); } -} -impl fmt::Display for BootstrapPeer { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "BootstrapPeer {{ addr: {}, last_seen: {:?}, success: {}, failure: {} }}", - self.addr, self.last_seen, self.success_count, self.failure_count - ) + fn failure_rate(&self) -> f64 { + if self.success_count + self.failure_count == 0 { + 0.0 + } else { + self.failure_count as f64 / (self.success_count + self.failure_count) as f64 + } } } @@ -195,8 +259,8 @@ impl fmt::Display for BootstrapPeer { pub struct PeersArgs { /// First node in the network pub first: bool, - /// List of peer addresses - pub peers: Vec, + /// List of addresses + pub addrs: Vec, /// URL to fetch network contacts from pub network_contacts_url: Option, /// Use only local discovery (mDNS) @@ -205,7 +269,7 @@ pub struct PeersArgs { impl BootstrapCacheStore { /// Create a new CacheStore from command line arguments - /// This also initializes the store with the provided peers + /// This also initializes the store with the provided bootstrap addresses pub async fn from_args(args: PeersArgs, mut config: BootstrapConfig) -> Result { if let Some(url) = &args.network_contacts_url { config.endpoints.push(url.clone()); @@ -230,37 +294,40 @@ impl BootstrapCacheStore { // Create a new store but don't load from cache or fetch from endpoints yet let mut store = Self::new_without_init(config).await?; - // Add peers from environment variable if present - if let Ok(env_peers) = std::env::var("SAFE_PEERS") { - for peer_str in env_peers.split(',') { - if let Ok(peer) = peer_str.parse() { - if let Some(peer) = craft_valid_multiaddr(&peer) { - info!("Adding peer from environment: {}", peer); - store.add_peer(peer); + // Add addrs from environment variable if present + if let Ok(env_string) = std::env::var("SAFE_PEERS") { + for multiaddr_str in env_string.split(',') { + if let Ok(addr) = multiaddr_str.parse() { + if let Some(addr) = craft_valid_multiaddr(&addr) { + info!("Adding addr from environment: {addr}",); + store.add_addr(addr); } else { - warn!("Invalid peer address format from environment: {}", peer); + warn!("Invalid peer address format from environment: {}", addr); } } } } - // Add peers from arguments if present - for peer in args.peers { - if let Some(peer) = craft_valid_multiaddr(&peer) { - info!("Adding peer from arguments: {}", peer); - store.add_peer(peer); + // Add addrs from arguments if present + for addr in args.addrs { + if let Some(addr) = craft_valid_multiaddr(&addr) { + info!("Adding addr from arguments: {addr}"); + store.add_addr(addr); } else { - warn!("Invalid peer address format from arguments: {}", peer); + warn!("Invalid multiaddress format from arguments: {addr}"); } } - // If we have a network contacts URL, fetch peers from there. + // If we have a network contacts URL, fetch addrs from there. if let Some(url) = args.network_contacts_url { - info!("Fetching peers from network contacts URL: {}", url); + info!( + "Fetching bootstrap address from network contacts URL: {}", + url + ); let peer_discovery = InitialPeerDiscovery::with_endpoints(vec![url])?; - let peers = peer_discovery.fetch_peers().await?; - for peer in peers { - store.add_peer(peer.addr); + let bootstrap_addresses = peer_discovery.fetch_bootstrap_addresses().await?; + for addr in bootstrap_addresses { + store.add_addr(addr.addr); } } @@ -278,6 +345,10 @@ impl BootstrapCacheStore { /// Craft a proper address to avoid any ill formed addresses pub fn craft_valid_multiaddr(addr: &Multiaddr) -> Option { + let peer_id = addr + .iter() + .find(|protocol| matches!(protocol, Protocol::P2p(_)))?; + let mut output_address = Multiaddr::empty(); let ip = addr @@ -314,12 +385,7 @@ pub fn craft_valid_multiaddr(addr: &Multiaddr) -> Option { return None; } - if let Some(peer_id) = addr - .iter() - .find(|protocol| matches!(protocol, Protocol::P2p(_))) - { - output_address.push(peer_id); - } + output_address.push(peer_id); Some(output_address) } @@ -331,3 +397,10 @@ pub fn craft_valid_multiaddr_from_str(addr_str: &str) -> Option { }; craft_valid_multiaddr(&addr) } + +pub fn multiaddr_get_peer_id(addr: &Multiaddr) -> Option { + match addr.iter().find(|p| matches!(p, Protocol::P2p(_))) { + Some(Protocol::P2p(id)) => Some(id), + _ => None, + } +} diff --git a/ant-bootstrap-cache/tests/address_format_tests.rs b/ant-bootstrap-cache/tests/address_format_tests.rs index b1888ef847..73f8856465 100644 --- a/ant-bootstrap-cache/tests/address_format_tests.rs +++ b/ant-bootstrap-cache/tests/address_format_tests.rs @@ -7,21 +7,14 @@ // permissions and limitations relating to use of the SAFE Network Software. use ant_bootstrap_cache::{BootstrapCacheStore, BootstrapConfig, PeersArgs}; -use libp2p::{multiaddr::Protocol, Multiaddr}; -use std::net::SocketAddrV4; +use ant_logging::LogBuilder; +use libp2p::Multiaddr; use tempfile::TempDir; use wiremock::{ matchers::{method, path}, Mock, MockServer, ResponseTemplate, }; -// Initialize logging for tests -fn init_logging() { - let _ = tracing_subscriber::fmt() - .with_env_filter("bootstrap_cache=debug") - .try_init(); -} - // Setup function to create a new temp directory and config for each test async fn setup() -> (TempDir, BootstrapConfig) { let temp_dir = TempDir::new().unwrap(); @@ -35,45 +28,16 @@ async fn setup() -> (TempDir, BootstrapConfig) { (temp_dir, config) } -#[tokio::test] -async fn test_ipv4_socket_address_parsing() -> Result<(), Box> { - init_logging(); - let (_temp_dir, config) = setup().await; - - // Test IPv4 socket address format (1.2.3.4:1234) - let socket_addr = "127.0.0.1:8080".parse::()?; - let expected_addr = Multiaddr::empty() - .with(Protocol::Ip4(*socket_addr.ip())) - .with(Protocol::Udp(socket_addr.port())) - .with(Protocol::QuicV1); - - let args = PeersArgs { - first: false, - peers: vec![expected_addr.clone()], - network_contacts_url: None, - local: false, - }; - - let store = BootstrapCacheStore::from_args(args, config).await?; - let peers = store.get_peers().collect::>(); - assert_eq!(peers.len(), 1, "Should have one peer"); - assert_eq!(peers[0].addr, expected_addr, "Address format should match"); - - Ok(()) -} - #[tokio::test] async fn test_multiaddr_format_parsing() -> Result<(), Box> { - init_logging(); + let _guard = LogBuilder::init_single_threaded_tokio_test("address_format_tests", false); // Test various multiaddr formats let addrs = vec![ - // Standard format with peer ID + // quic "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE", - // Without peer ID - "/ip4/127.0.0.1/udp/8080/quic-v1", - // With ws - "/ip4/127.0.0.1/tcp/8080/ws", + // ws + "/ip4/127.0.0.1/tcp/8080/ws/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE", ]; for addr_str in addrs { @@ -81,15 +45,18 @@ async fn test_multiaddr_format_parsing() -> Result<(), Box()?; let args = PeersArgs { first: false, - peers: vec![addr.clone()], + addrs: vec![addr.clone()], network_contacts_url: None, local: false, }; let store = BootstrapCacheStore::from_args(args, config).await?; - let peers = store.get_peers().collect::>(); - assert_eq!(peers.len(), 1, "Should have one peer"); - assert_eq!(peers[0].addr, addr, "Address format should match"); + let bootstrap_addresses = store.get_addrs().collect::>(); + assert_eq!(bootstrap_addresses.len(), 1, "Should have one peer"); + assert_eq!( + bootstrap_addresses[0].addr, addr, + "Address format should match" + ); } Ok(()) @@ -97,7 +64,8 @@ async fn test_multiaddr_format_parsing() -> Result<(), Box Result<(), Box> { - init_logging(); + let _guard = LogBuilder::init_single_threaded_tokio_test("address_format_tests", false); + let (_temp_dir, config) = setup().await; // Create a mock server with network contacts format @@ -113,22 +81,22 @@ async fn test_network_contacts_format() -> Result<(), Box let args = PeersArgs { first: false, - peers: vec![], + addrs: vec![], network_contacts_url: Some(format!("{}/peers", mock_server.uri()).parse()?), local: false, }; let store = BootstrapCacheStore::from_args(args, config).await?; - let peers = store.get_peers().collect::>(); + let adddrs = store.get_addrs().collect::>(); assert_eq!( - peers.len(), + adddrs.len(), 2, "Should have two peers from network contacts" ); // Verify address formats - for peer in peers { - let addr_str = peer.addr.to_string(); + for addr in adddrs { + let addr_str = addr.addr.to_string(); assert!(addr_str.contains("/ip4/"), "Should have IPv4 address"); assert!(addr_str.contains("/udp/"), "Should have UDP port"); assert!(addr_str.contains("/quic-v1/"), "Should have QUIC protocol"); @@ -140,7 +108,7 @@ async fn test_network_contacts_format() -> Result<(), Box #[tokio::test] async fn test_invalid_address_handling() -> Result<(), Box> { - init_logging(); + let _guard = LogBuilder::init_single_threaded_tokio_test("address_format_tests", false); // Test various invalid address formats let invalid_addrs = vec![ @@ -154,15 +122,15 @@ async fn test_invalid_address_handling() -> Result<(), Box>(); + let addrs = store.get_addrs().collect::>(); assert_eq!( - peers.len(), + addrs.len(), 0, "Should have no peers from invalid address in env var: {}", addr_str @@ -172,14 +140,14 @@ async fn test_invalid_address_handling() -> Result<(), Box() { let args_with_peer = PeersArgs { first: false, - peers: vec![addr], + addrs: vec![addr], network_contacts_url: None, local: false, }; let store = BootstrapCacheStore::from_args(args_with_peer, config).await?; - let peers = store.get_peers().collect::>(); + let addrs = store.get_addrs().collect::>(); assert_eq!( - peers.len(), + addrs.len(), 0, "Should have no peers from invalid address in args: {}", addr_str @@ -192,13 +160,14 @@ async fn test_invalid_address_handling() -> Result<(), Box Result<(), Box> { - init_logging(); + let _guard = LogBuilder::init_single_threaded_tokio_test("address_format_tests", false); + let temp_dir = TempDir::new()?; let cache_path = temp_dir.path().join("cache.json"); let args = PeersArgs { first: false, - peers: vec![], + addrs: vec![], network_contacts_url: None, local: true, // Use local mode to avoid getting peers from default endpoints }; @@ -208,21 +177,22 @@ async fn test_socket_addr_format() -> Result<(), Box> { .with_cache_path(&cache_path); let store = BootstrapCacheStore::from_args(args, config).await?; - let peers = store.get_peers().collect::>(); - assert!(peers.is_empty(), "Should have no peers in local mode"); + let addrs = store.get_addrs().collect::>(); + assert!(addrs.is_empty(), "Should have no peers in local mode"); Ok(()) } #[tokio::test] async fn test_multiaddr_format() -> Result<(), Box> { - init_logging(); + let _guard = LogBuilder::init_single_threaded_tokio_test("address_format_tests", false); + let temp_dir = TempDir::new()?; let cache_path = temp_dir.path().join("cache.json"); let args = PeersArgs { first: false, - peers: vec![], + addrs: vec![], network_contacts_url: None, local: true, // Use local mode to avoid getting peers from default endpoints }; @@ -232,21 +202,22 @@ async fn test_multiaddr_format() -> Result<(), Box> { .with_cache_path(&cache_path); let store = BootstrapCacheStore::from_args(args, config).await?; - let peers = store.get_peers().collect::>(); - assert!(peers.is_empty(), "Should have no peers in local mode"); + let addrs = store.get_addrs().collect::>(); + assert!(addrs.is_empty(), "Should have no peers in local mode"); Ok(()) } #[tokio::test] async fn test_invalid_addr_format() -> Result<(), Box> { - init_logging(); + let _guard = LogBuilder::init_single_threaded_tokio_test("address_format_tests", false); + let temp_dir = TempDir::new()?; let cache_path = temp_dir.path().join("cache.json"); let args = PeersArgs { first: false, - peers: vec![], + addrs: vec![], network_contacts_url: None, local: true, // Use local mode to avoid getting peers from default endpoints }; @@ -256,21 +227,22 @@ async fn test_invalid_addr_format() -> Result<(), Box> { .with_cache_path(&cache_path); let store = BootstrapCacheStore::from_args(args, config).await?; - let peers = store.get_peers().collect::>(); - assert!(peers.is_empty(), "Should have no peers in local mode"); + let addrs = store.get_addrs().collect::>(); + assert!(addrs.is_empty(), "Should have no peers in local mode"); Ok(()) } #[tokio::test] async fn test_mixed_addr_formats() -> Result<(), Box> { - init_logging(); + let _guard = LogBuilder::init_single_threaded_tokio_test("address_format_tests", false); + let temp_dir = TempDir::new()?; let cache_path = temp_dir.path().join("cache.json"); let args = PeersArgs { first: false, - peers: vec![], + addrs: vec![], network_contacts_url: None, local: true, // Use local mode to avoid getting peers from default endpoints }; @@ -280,21 +252,22 @@ async fn test_mixed_addr_formats() -> Result<(), Box> { .with_cache_path(&cache_path); let store = BootstrapCacheStore::from_args(args, config).await?; - let peers = store.get_peers().collect::>(); - assert!(peers.is_empty(), "Should have no peers in local mode"); + let addrs = store.get_addrs().collect::>(); + assert!(addrs.is_empty(), "Should have no peers in local mode"); Ok(()) } #[tokio::test] async fn test_socket_addr_conversion() -> Result<(), Box> { - init_logging(); + let _guard = LogBuilder::init_single_threaded_tokio_test("address_format_tests", false); + let temp_dir = TempDir::new()?; let cache_path = temp_dir.path().join("cache.json"); let args = PeersArgs { first: false, - peers: vec![], + addrs: vec![], network_contacts_url: None, local: true, // Use local mode to avoid getting peers from default endpoints }; @@ -304,21 +277,22 @@ async fn test_socket_addr_conversion() -> Result<(), Box> .with_cache_path(&cache_path); let store = BootstrapCacheStore::from_args(args, config).await?; - let peers = store.get_peers().collect::>(); - assert!(peers.is_empty(), "Should have no peers in local mode"); + let addrs = store.get_addrs().collect::>(); + assert!(addrs.is_empty(), "Should have no peers in local mode"); Ok(()) } #[tokio::test] async fn test_invalid_socket_addr() -> Result<(), Box> { - init_logging(); + let _guard = LogBuilder::init_single_threaded_tokio_test("address_format_tests", false); + let temp_dir = TempDir::new()?; let cache_path = temp_dir.path().join("cache.json"); let args = PeersArgs { first: false, - peers: vec![], + addrs: vec![], network_contacts_url: None, local: true, // Use local mode to avoid getting peers from default endpoints }; @@ -328,21 +302,22 @@ async fn test_invalid_socket_addr() -> Result<(), Box> { .with_cache_path(&cache_path); let store = BootstrapCacheStore::from_args(args, config).await?; - let peers = store.get_peers().collect::>(); - assert!(peers.is_empty(), "Should have no peers in local mode"); + let addrs = store.get_addrs().collect::>(); + assert!(addrs.is_empty(), "Should have no peers in local mode"); Ok(()) } #[tokio::test] async fn test_invalid_multiaddr() -> Result<(), Box> { - init_logging(); + let _guard = LogBuilder::init_single_threaded_tokio_test("address_format_tests", false); + let temp_dir = TempDir::new()?; let cache_path = temp_dir.path().join("cache.json"); let args = PeersArgs { first: false, - peers: vec![], + addrs: vec![], network_contacts_url: None, local: true, // Use local mode to avoid getting peers from default endpoints }; @@ -352,21 +327,22 @@ async fn test_invalid_multiaddr() -> Result<(), Box> { .with_cache_path(&cache_path); let store = BootstrapCacheStore::from_args(args, config).await?; - let peers = store.get_peers().collect::>(); - assert!(peers.is_empty(), "Should have no peers in local mode"); + let addrs = store.get_addrs().collect::>(); + assert!(addrs.is_empty(), "Should have no peers in local mode"); Ok(()) } #[tokio::test] async fn test_mixed_valid_invalid_addrs() -> Result<(), Box> { - init_logging(); + let _guard = LogBuilder::init_single_threaded_tokio_test("address_format_tests", false); + let temp_dir = TempDir::new()?; let cache_path = temp_dir.path().join("cache.json"); let args = PeersArgs { first: false, - peers: vec![], + addrs: vec![], network_contacts_url: None, local: true, // Use local mode to avoid getting peers from default endpoints }; @@ -376,8 +352,8 @@ async fn test_mixed_valid_invalid_addrs() -> Result<(), Box>(); - assert!(peers.is_empty(), "Should have no peers in local mode"); + let addrs = store.get_addrs().collect::>(); + assert!(addrs.is_empty(), "Should have no peers in local mode"); Ok(()) } diff --git a/ant-bootstrap-cache/tests/cache_tests.rs b/ant-bootstrap-cache/tests/cache_tests.rs index d79793c71c..d3673c3206 100644 --- a/ant-bootstrap-cache/tests/cache_tests.rs +++ b/ant-bootstrap-cache/tests/cache_tests.rs @@ -7,6 +7,7 @@ // permissions and limitations relating to use of the SAFE Network Software. use ant_bootstrap_cache::{BootstrapCacheStore, BootstrapConfig}; +use ant_logging::LogBuilder; use libp2p::Multiaddr; use std::time::Duration; use tempfile::TempDir; @@ -14,6 +15,8 @@ use tokio::time::sleep; #[tokio::test] async fn test_cache_store_operations() -> Result<(), Box> { + let _guard = LogBuilder::init_single_threaded_tokio_test("cache_tests", false); + let temp_dir = TempDir::new()?; let cache_path = temp_dir.path().join("cache.json"); @@ -28,13 +31,13 @@ async fn test_cache_store_operations() -> Result<(), Box> let addr: Multiaddr = "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" .parse()?; - cache_store.add_peer(addr.clone()); - cache_store.update_peer_status(&addr, true); + cache_store.add_addr(addr.clone()); + cache_store.update_addr_status(&addr, true); - let peers = cache_store.get_reliable_peers().collect::>(); - assert!(!peers.is_empty(), "Cache should contain the added peer"); + let addrs = cache_store.get_reliable_addrs().collect::>(); + assert!(!addrs.is_empty(), "Cache should contain the added peer"); assert!( - peers.iter().any(|p| p.addr == addr), + addrs.iter().any(|p| p.addr == addr), "Cache should contain our specific peer" ); @@ -43,6 +46,7 @@ async fn test_cache_store_operations() -> Result<(), Box> #[tokio::test] async fn test_cache_persistence() -> Result<(), Box> { + let _guard = LogBuilder::init_single_threaded_tokio_test("cache_tests", false); let temp_dir = TempDir::new()?; let cache_path = temp_dir.path().join("cache.json"); @@ -57,17 +61,17 @@ async fn test_cache_persistence() -> Result<(), Box> { let addr: Multiaddr = "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" .parse()?; - cache_store1.add_peer(addr.clone()); - cache_store1.update_peer_status(&addr, true); + cache_store1.add_addr(addr.clone()); + cache_store1.update_addr_status(&addr, true); cache_store1.sync_and_save_to_disk(true).await.unwrap(); // Create a new cache store with the same path let cache_store2 = BootstrapCacheStore::new(config).await?; - let peers = cache_store2.get_reliable_peers().collect::>(); + let addrs = cache_store2.get_reliable_addrs().collect::>(); - assert!(!peers.is_empty(), "Cache should persist across instances"); + assert!(!addrs.is_empty(), "Cache should persist across instances"); assert!( - peers.iter().any(|p| p.addr == addr), + addrs.iter().any(|p| p.addr == addr), "Specific peer should persist" ); @@ -76,6 +80,7 @@ async fn test_cache_persistence() -> Result<(), Box> { #[tokio::test] async fn test_cache_reliability_tracking() -> Result<(), Box> { + let _guard = LogBuilder::init_single_threaded_tokio_test("cache_tests", false); let temp_dir = TempDir::new()?; let cache_path = temp_dir.path().join("cache.json"); @@ -87,28 +92,28 @@ async fn test_cache_reliability_tracking() -> Result<(), Box>(); + let addrs = cache_store.get_reliable_addrs().collect::>(); assert!( - peers.iter().any(|p| p.addr == addr), - "Peer should be reliable after successful connections" + addrs.iter().any(|p| p.addr == addr), + "Address should be reliable after successful connections" ); // Test failed connections for _ in 0..5 { - cache_store.update_peer_status(&addr, false); + cache_store.update_addr_status(&addr, false); } - let peers = cache_store.get_reliable_peers().collect::>(); + let addrs = cache_store.get_reliable_addrs().collect::>(); assert!( - !peers.iter().any(|p| p.addr == addr), - "Peer should not be reliable after failed connections" + !addrs.iter().any(|p| p.addr == addr), + "Address should not be reliable after failed connections" ); Ok(()) @@ -116,9 +121,7 @@ async fn test_cache_reliability_tracking() -> Result<(), Box Result<(), Box> { - let _ = tracing_subscriber::fmt() - .with_env_filter("bootstrap_cache=debug") - .try_init(); + let _guard = LogBuilder::init_single_threaded_tokio_test("cache_tests", false); let temp_dir = TempDir::new()?; let cache_path = temp_dir.path().join("cache.json"); @@ -136,21 +139,21 @@ async fn test_cache_max_peers() -> Result<(), Box> { for i in 1..=3 { let addr: Multiaddr = format!("/ip4/127.0.0.1/udp/808{}/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UER{}", i, i).parse()?; addresses.push(addr.clone()); - cache_store.add_peer(addr); + cache_store.add_addr(addr); // Add a delay to ensure distinct timestamps sleep(Duration::from_millis(100)).await; } - let peers = cache_store.get_peers().collect::>(); - assert_eq!(peers.len(), 2, "Cache should respect max_peers limit"); + let addrs = cache_store.get_addrs().collect::>(); + assert_eq!(addrs.len(), 2, "Cache should respect max_peers limit"); // Get the addresses of the peers we have - let peer_addrs: Vec<_> = peers.iter().map(|p| p.addr.to_string()).collect(); + let peer_addrs: Vec<_> = addrs.iter().map(|p| p.addr.to_string()).collect(); tracing::debug!("Final peers: {:?}", peer_addrs); // We should have the two most recently added peers (addresses[1] and addresses[2]) - for peer in peers { - let addr_str = peer.addr.to_string(); + for addr in addrs { + let addr_str = addr.addr.to_string(); assert!( addresses[1..].iter().any(|a| a.to_string() == addr_str), "Should have one of the two most recent peers, got {}", @@ -163,6 +166,7 @@ async fn test_cache_max_peers() -> Result<(), Box> { #[tokio::test] async fn test_cache_file_corruption() -> Result<(), Box> { + let _guard = LogBuilder::init_single_threaded_tokio_test("cache_tests", false); let temp_dir = TempDir::new()?; let cache_path = temp_dir.path().join("cache.json"); @@ -177,7 +181,7 @@ async fn test_cache_file_corruption() -> Result<(), Box> let addr: Multiaddr = "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UER1" .parse()?; - cache_store.add_peer(addr.clone()); + cache_store.add_addr(addr.clone()); assert_eq!(cache_store.peer_count(), 1); @@ -186,14 +190,14 @@ async fn test_cache_file_corruption() -> Result<(), Box> // Create a new cache store - it should handle the corruption gracefully let mut new_cache_store = BootstrapCacheStore::new_without_init(config).await?; - let peers = new_cache_store.get_peers().collect::>(); - assert!(peers.is_empty(), "Cache should be empty after corruption"); + let addrs = new_cache_store.get_addrs().collect::>(); + assert!(addrs.is_empty(), "Cache should be empty after corruption"); // Should be able to add peers again - new_cache_store.add_peer(addr); - let peers = new_cache_store.get_peers().collect::>(); + new_cache_store.add_addr(addr); + let addrs = new_cache_store.get_addrs().collect::>(); assert_eq!( - peers.len(), + addrs.len(), 1, "Should be able to add peers after corruption" ); diff --git a/ant-bootstrap-cache/tests/cli_integration_tests.rs b/ant-bootstrap-cache/tests/cli_integration_tests.rs index f730e51e71..ebc0bb86ea 100644 --- a/ant-bootstrap-cache/tests/cli_integration_tests.rs +++ b/ant-bootstrap-cache/tests/cli_integration_tests.rs @@ -7,6 +7,7 @@ // permissions and limitations relating to use of the SAFE Network Software. use ant_bootstrap_cache::{BootstrapCacheStore, BootstrapConfig, PeersArgs}; +use ant_logging::LogBuilder; use libp2p::Multiaddr; use std::env; use std::fs; @@ -16,13 +17,6 @@ use wiremock::{ Mock, MockServer, ResponseTemplate, }; -// Initialize logging for tests -fn init_logging() { - let _ = tracing_subscriber::fmt() - .with_env_filter("bootstrap_cache=debug") - .try_init(); -} - async fn setup() -> (TempDir, BootstrapConfig) { let temp_dir = TempDir::new().unwrap(); let cache_path = temp_dir.path().join("cache.json"); @@ -35,26 +29,26 @@ async fn setup() -> (TempDir, BootstrapConfig) { #[tokio::test] async fn test_first_flag() -> Result<(), Box> { - init_logging(); + let _guard = LogBuilder::init_single_threaded_tokio_test("cli_integration_tests", false); let (_temp_dir, config) = setup().await; let args = PeersArgs { first: true, - peers: vec![], + addrs: vec![], network_contacts_url: None, local: false, }; let store = BootstrapCacheStore::from_args(args, config).await?; - let peers = store.get_peers().collect::>(); - assert!(peers.is_empty(), "First node should have no peers"); + let addrs = store.get_addrs().collect::>(); + assert!(addrs.is_empty(), "First node should have no addrs"); Ok(()) } #[tokio::test] async fn test_peer_argument() -> Result<(), Box> { - init_logging(); + let _guard = LogBuilder::init_single_threaded_tokio_test("cli_integration_tests", false); let (_temp_dir, config) = setup().await; let peer_addr: Multiaddr = @@ -63,36 +57,34 @@ async fn test_peer_argument() -> Result<(), Box> { let args = PeersArgs { first: false, - peers: vec![peer_addr.clone()], + addrs: vec![peer_addr.clone()], network_contacts_url: None, local: false, }; let store = BootstrapCacheStore::from_args(args, config).await?; - let peers = store.get_peers().collect::>(); - assert_eq!(peers.len(), 1, "Should have one peer"); - assert_eq!( - peers[0].addr, peer_addr, - "Should have the correct peer address" - ); + let addrs = store.get_addrs().collect::>(); + assert_eq!(addrs.len(), 1, "Should have one addr"); + assert_eq!(addrs[0].addr, peer_addr, "Should have the correct address"); Ok(()) } #[tokio::test] async fn test_safe_peers_env() -> Result<(), Box> { - init_logging(); + let _guard = LogBuilder::init_single_threaded_tokio_test("cli_integration_tests", false); + let temp_dir = TempDir::new()?; let cache_path = temp_dir.path().join("cache.json"); // Set SAFE_PEERS environment variable - let peer_addr = + let addr = "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE"; - env::set_var("SAFE_PEERS", peer_addr); + env::set_var("SAFE_PEERS", addr); let args = PeersArgs { first: false, - peers: vec![], + addrs: vec![], network_contacts_url: None, local: false, }; @@ -102,13 +94,13 @@ async fn test_safe_peers_env() -> Result<(), Box> { .with_cache_path(&cache_path); let store = BootstrapCacheStore::from_args(args, config).await?; - let peers = store.get_peers().collect::>(); + let addrs = store.get_addrs().collect::>(); // We should have multiple peers (env var + cache/endpoints) - assert!(!peers.is_empty(), "Should have peers"); + assert!(!addrs.is_empty(), "Should have peers"); // Verify that our env var peer is included in the set - let has_env_peer = peers.iter().any(|p| p.addr.to_string() == peer_addr); + let has_env_peer = addrs.iter().any(|p| p.addr.to_string() == addr); assert!(has_env_peer, "Should include the peer from env var"); // Clean up @@ -119,7 +111,8 @@ async fn test_safe_peers_env() -> Result<(), Box> { #[tokio::test] async fn test_network_contacts_fallback() -> Result<(), Box> { - init_logging(); + let _guard = LogBuilder::init_single_threaded_tokio_test("cli_integration_tests", false); + let (_temp_dir, config) = setup().await; // Start mock server @@ -135,15 +128,15 @@ async fn test_network_contacts_fallback() -> Result<(), Box>(); + let addrs = store.get_addrs().collect::>(); assert_eq!( - peers.len(), + addrs.len(), 2, "Should have two peers from network contacts" ); @@ -153,7 +146,8 @@ async fn test_network_contacts_fallback() -> Result<(), Box Result<(), Box> { - init_logging(); + let _guard = LogBuilder::init_single_threaded_tokio_test("cli_integration_tests", false); + let temp_dir = TempDir::new()?; let cache_path = temp_dir.path().join("cache.json"); @@ -165,14 +159,14 @@ async fn test_local_mode() -> Result<(), Box> { // Create args with local mode enabled let args = PeersArgs { first: false, - peers: vec![], + addrs: vec![], network_contacts_url: None, local: true, }; let store = BootstrapCacheStore::from_args(args, config).await?; - let peers = store.get_peers().collect::>(); - assert!(peers.is_empty(), "Local mode should have no peers"); + let addrs = store.get_addrs().collect::>(); + assert!(addrs.is_empty(), "Local mode should have no peers"); // Verify cache was not touched assert!( @@ -185,7 +179,8 @@ async fn test_local_mode() -> Result<(), Box> { #[tokio::test] async fn test_test_network_peers() -> Result<(), Box> { - init_logging(); + let _guard = LogBuilder::init_single_threaded_tokio_test("cli_integration_tests", false); + let temp_dir = TempDir::new()?; let cache_path = temp_dir.path().join("cache.json"); @@ -199,16 +194,16 @@ async fn test_test_network_peers() -> Result<(), Box> { let args = PeersArgs { first: false, - peers: vec![peer_addr.clone()], + addrs: vec![peer_addr.clone()], network_contacts_url: None, local: false, }; let store = BootstrapCacheStore::from_args(args, config).await?; - let peers = store.get_peers().collect::>(); - assert_eq!(peers.len(), 1, "Should have exactly one test network peer"); + let addrs = store.get_addrs().collect::>(); + assert_eq!(addrs.len(), 1, "Should have exactly one test network peer"); assert_eq!( - peers[0].addr, peer_addr, + addrs[0].addr, peer_addr, "Should have the correct test network peer" ); @@ -223,7 +218,8 @@ async fn test_test_network_peers() -> Result<(), Box> { #[tokio::test] async fn test_peers_update_cache() -> Result<(), Box> { - init_logging(); + let _guard = LogBuilder::init_single_threaded_tokio_test("cli_integration_tests", false); + let temp_dir = TempDir::new()?; let cache_path = temp_dir.path().join("cache.json"); @@ -239,15 +235,15 @@ async fn test_peers_update_cache() -> Result<(), Box> { // Create args with peers but no test network mode let args = PeersArgs { first: false, - peers: vec![peer_addr.clone()], + addrs: vec![peer_addr.clone()], network_contacts_url: None, local: false, }; let store = BootstrapCacheStore::from_args(args, config).await?; - let peers = store.get_peers().collect::>(); - assert_eq!(peers.len(), 1, "Should have one peer"); - assert_eq!(peers[0].addr, peer_addr, "Should have the correct peer"); + let addrs = store.get_addrs().collect::>(); + assert_eq!(addrs.len(), 1, "Should have one peer"); + assert_eq!(addrs[0].addr, peer_addr, "Should have the correct peer"); // Verify cache was updated assert!(cache_path.exists(), "Cache file should exist"); diff --git a/ant-bootstrap-cache/tests/integration_tests.rs b/ant-bootstrap-cache/tests/integration_tests.rs index b68dfa3e15..53456c2af2 100644 --- a/ant-bootstrap-cache/tests/integration_tests.rs +++ b/ant-bootstrap-cache/tests/integration_tests.rs @@ -26,15 +26,15 @@ fn init_logging() { async fn test_fetch_from_amazon_s3() { init_logging(); let discovery = InitialPeerDiscovery::new().unwrap(); - let peers = discovery.fetch_peers().await.unwrap(); + let addrs = discovery.fetch_bootstrap_addresses().await.unwrap(); // We should get some peers - assert!(!peers.is_empty(), "Expected to find some peers from S3"); + assert!(!addrs.is_empty(), "Expected to find some peers from S3"); // Verify that all peers have valid multiaddresses - for peer in &peers { - println!("Found peer: {}", peer.addr); - let addr_str = peer.addr.to_string(); + for addr in &addrs { + println!("Found peer: {}", addr.addr); + let addr_str = addr.addr.to_string(); assert!(addr_str.contains("/ip4/"), "Expected IPv4 address"); assert!(addr_str.contains("/udp/"), "Expected UDP port"); assert!(addr_str.contains("/quic-v1/"), "Expected QUIC protocol"); @@ -65,7 +65,7 @@ async fn test_individual_s3_endpoints() { .unwrap(); let discovery = InitialPeerDiscovery::with_endpoints(vec![endpoint.clone()]).unwrap(); - match discovery.fetch_peers().await { + match discovery.fetch_bootstrap_addresses().await { Ok(peers) => { println!( "Successfully fetched {} peers from {}", @@ -104,10 +104,10 @@ async fn test_individual_s3_endpoints() { async fn test_response_format() { init_logging(); let discovery = InitialPeerDiscovery::new().unwrap(); - let peers = discovery.fetch_peers().await.unwrap(); + let addrs = discovery.fetch_bootstrap_addresses().await.unwrap(); // Get the first peer to check format - let first_peer = peers.first().expect("Expected at least one peer"); + let first_peer = addrs.first().expect("Expected at least one peer"); let addr_str = first_peer.addr.to_string(); // Print the address for debugging @@ -157,11 +157,11 @@ async fn test_json_endpoint_format() { let endpoint = mock_server.uri().parse::().unwrap(); let discovery = InitialPeerDiscovery::with_endpoints(vec![endpoint.clone()]).unwrap(); - let peers = discovery.fetch_peers().await.unwrap(); - assert_eq!(peers.len(), 2); + let addrs = discovery.fetch_bootstrap_addresses().await.unwrap(); + assert_eq!(addrs.len(), 2); // Verify peer addresses - let addrs: Vec = peers.iter().map(|p| p.addr.to_string()).collect(); + let addrs: Vec = addrs.iter().map(|p| p.addr.to_string()).collect(); assert!(addrs.contains( &"/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" .to_string() @@ -190,8 +190,8 @@ async fn test_s3_json_format() { assert_eq!(endpoints.peers.len(), 24); // Verify we can parse each peer address - for peer in endpoints.peers { - peer.parse::().unwrap(); + for addrs in endpoints.peers { + addrs.parse::().unwrap(); } // Verify metadata diff --git a/ant-logging/src/layers.rs b/ant-logging/src/layers.rs index 90bcd007c5..2d26be3521 100644 --- a/ant-logging/src/layers.rs +++ b/ant-logging/src/layers.rs @@ -274,6 +274,7 @@ fn get_logging_targets(logging_env_value: &str) -> Result> ("antctl".to_string(), Level::TRACE), ("antctld".to_string(), Level::TRACE), // libs + ("ant_bootstrap_cache".to_string(), Level::TRACE), ("ant_build_info".to_string(), Level::TRACE), ("ant_evm".to_string(), Level::TRACE), ("ant_logging".to_string(), Level::TRACE), diff --git a/ant-logging/src/lib.rs b/ant-logging/src/lib.rs index 394e7f1e5a..69f190317b 100644 --- a/ant-logging/src/lib.rs +++ b/ant-logging/src/lib.rs @@ -255,6 +255,8 @@ impl LogBuilder { None => LogOutputDest::Stdout, }; + println!("Logging test at {test_file_name:?} to {output_dest:?}"); + let mut layers = TracingLayers::default(); let _reload_handle = layers From 1ce7f632c2e94a5a66760e350b194ecce3d0f177 Mon Sep 17 00:00:00 2001 From: Roland Sherwin Date: Wed, 4 Dec 2024 05:17:00 +0530 Subject: [PATCH 10/21] feat(bootstrap): isolate code into their own modules based on their purpose --- Cargo.lock | 3 +- Cargo.toml | 2 +- .../Cargo.toml | 8 +- .../README.md | 0 .../src/cache_store.rs | 279 +++--------------- .../src/config.rs | 43 +-- .../src/contacts.rs | 128 ++++---- .../src/error.rs | 12 +- ant-bootstrap/src/initial_peers.rs | 215 ++++++++++++++ .../src/lib.rs | 137 +++------ .../tests/address_format_tests.rs | 141 ++++----- .../tests/cache_tests.rs | 37 +-- .../tests/cli_integration_tests.rs | 73 +++-- .../tests/integration_tests.rs | 18 +- ant-logging/src/layers.rs | 2 +- 15 files changed, 511 insertions(+), 587 deletions(-) rename {ant-bootstrap-cache => ant-bootstrap}/Cargo.toml (85%) rename {ant-bootstrap-cache => ant-bootstrap}/README.md (100%) rename {ant-bootstrap-cache => ant-bootstrap}/src/cache_store.rs (66%) rename {ant-bootstrap-cache => ant-bootstrap}/src/config.rs (77%) rename ant-bootstrap-cache/src/initial_peer_discovery.rs => ant-bootstrap/src/contacts.rs (80%) rename {ant-bootstrap-cache => ant-bootstrap}/src/error.rs (80%) create mode 100644 ant-bootstrap/src/initial_peers.rs rename {ant-bootstrap-cache => ant-bootstrap}/src/lib.rs (70%) rename {ant-bootstrap-cache => ant-bootstrap}/tests/address_format_tests.rs (71%) rename {ant-bootstrap-cache => ant-bootstrap}/tests/cache_tests.rs (85%) rename {ant-bootstrap-cache => ant-bootstrap}/tests/cli_integration_tests.rs (76%) rename {ant-bootstrap-cache => ant-bootstrap}/tests/integration_tests.rs (91%) diff --git a/Cargo.lock b/Cargo.lock index 6e6ec97b7f..bed4a26d61 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -723,12 +723,13 @@ dependencies = [ ] [[package]] -name = "ant-bootstrap-cache" +name = "ant-bootstrap" version = "0.1.0" dependencies = [ "ant-logging", "ant-protocol", "chrono", + "clap", "dirs-next", "fs2", "futures", diff --git a/Cargo.toml b/Cargo.toml index da1073ed31..eeafdece63 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,7 +1,7 @@ [workspace] resolver = "2" members = [ - "ant-bootstrap-cache", + "ant-bootstrap", "ant-build-info", "ant-cli", "ant-evm", diff --git a/ant-bootstrap-cache/Cargo.toml b/ant-bootstrap/Cargo.toml similarity index 85% rename from ant-bootstrap-cache/Cargo.toml rename to ant-bootstrap/Cargo.toml index 593126b942..e707df4fef 100644 --- a/ant-bootstrap-cache/Cargo.toml +++ b/ant-bootstrap/Cargo.toml @@ -1,18 +1,22 @@ [package] authors = ["MaidSafe Developers "] -description = "Bootstrap Cache functionality for Autonomi" +description = "Bootstrap functionality for Autonomi" edition = "2021" homepage = "https://maidsafe.net" license = "GPL-3.0" -name = "ant-bootstrap-cache" +name = "ant-bootstrap" readme = "README.md" repository = "https://github.com/maidsafe/autonomi" version = "0.1.0" +[features] +local = [] + [dependencies] ant-logging = { path = "../ant-logging", version = "0.2.40" } ant-protocol = { version = "0.17.15", path = "../ant-protocol" } chrono = { version = "0.4", features = ["serde"] } +clap = { version = "4.2.1", features = ["derive", "env"] } dirs-next = "~2.0.0" fs2 = "0.4.3" futures = "0.3.30" diff --git a/ant-bootstrap-cache/README.md b/ant-bootstrap/README.md similarity index 100% rename from ant-bootstrap-cache/README.md rename to ant-bootstrap/README.md diff --git a/ant-bootstrap-cache/src/cache_store.rs b/ant-bootstrap/src/cache_store.rs similarity index 66% rename from ant-bootstrap-cache/src/cache_store.rs rename to ant-bootstrap/src/cache_store.rs index 39e14e6928..615f8c7541 100644 --- a/ant-bootstrap-cache/src/cache_store.rs +++ b/ant-bootstrap/src/cache_store.rs @@ -7,8 +7,8 @@ // permissions and limitations relating to use of the SAFE Network Software. use crate::{ - craft_valid_multiaddr, multiaddr_get_peer_id, BootstrapAddr, BootstrapAddresses, - BootstrapConfig, Error, InitialPeerDiscovery, Result, + craft_valid_multiaddr, initial_peers::PeersArgs, multiaddr_get_peer_id, BootstrapAddr, + BootstrapAddresses, BootstrapCacheConfig, Error, Result, }; use fs2::FileExt; use libp2p::multiaddr::Protocol; @@ -24,7 +24,7 @@ use tempfile::NamedTempFile; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct CacheData { - peers: std::collections::HashMap, + pub(crate) peers: std::collections::HashMap, #[serde(default = "SystemTime::now")] last_updated: SystemTime, #[serde(default = "default_version")] @@ -56,6 +56,8 @@ impl CacheData { .entry(*peer) .or_insert(current_shared_addrs_state.clone()); + trace!("Syncing {peer:?} from fs with addrs count: {:?}, old state count: {:?}. Our in memory state count: {:?}", current_shared_addrs_state.0.len(), old_shared_addrs_state.map(|x| x.0.len()), bootstrap_addresses.0.len()); + // Add/sync every BootstrapAddr into self bootstrap_addresses.sync(old_shared_addrs_state, current_shared_addrs_state); } @@ -69,7 +71,7 @@ impl CacheData { /// - Removes all peers with empty addrs set /// - Maintains `max_addr` per peer by removing the addr with the lowest success rate /// - Maintains `max_peers` in the list by removing the peer with the oldest last_seen - pub fn perform_cleanup(&mut self, cfg: &BootstrapConfig) { + pub fn perform_cleanup(&mut self, cfg: &BootstrapCacheConfig) { self.peers.values_mut().for_each(|bootstrap_addresses| { bootstrap_addresses.0.retain(|bootstrap_addr| { let now = SystemTime::now(); @@ -100,7 +102,7 @@ impl CacheData { } /// Remove the oldest peers until we're under the max_peers limit - pub fn try_remove_oldest_peers(&mut self, cfg: &BootstrapConfig) { + pub fn try_remove_oldest_peers(&mut self, cfg: &BootstrapCacheConfig) { if self.peers.len() > cfg.max_peers { let mut peer_last_seen_map = HashMap::new(); for (peer, addrs) in self.peers.iter() { @@ -149,48 +151,21 @@ impl Default for CacheData { #[derive(Clone, Debug)] pub struct BootstrapCacheStore { - cache_path: PathBuf, - config: BootstrapConfig, - data: CacheData, + pub(crate) cache_path: PathBuf, + pub(crate) config: BootstrapCacheConfig, + pub(crate) data: CacheData, /// This is our last known state of the cache on disk, which is shared across all instances. /// This is not updated until `sync_to_disk` is called. - old_shared_state: CacheData, + pub(crate) old_shared_state: CacheData, } impl BootstrapCacheStore { - pub fn config(&self) -> &BootstrapConfig { + pub fn config(&self) -> &BootstrapCacheConfig { &self.config } - pub async fn new(config: BootstrapConfig) -> Result { - info!("Creating new CacheStore with config: {:?}", config); - let cache_path = config.cache_file_path.clone(); - - // Create cache directory if it doesn't exist - if let Some(parent) = cache_path.parent() { - if !parent.exists() { - info!("Attempting to create cache directory at {parent:?}"); - fs::create_dir_all(parent).inspect_err(|err| { - warn!("Failed to create cache directory at {parent:?}: {err}"); - })?; - } - } - - let mut store = Self { - cache_path, - config, - data: CacheData::default(), - old_shared_state: CacheData::default(), - }; - - store.init().await?; - - info!("Successfully created CacheStore and initialized it."); - - Ok(store) - } - - pub async fn new_without_init(config: BootstrapConfig) -> Result { + /// Create a empty CacheStore with the given configuration + pub fn empty(config: BootstrapCacheConfig) -> Result { info!("Creating new CacheStore with config: {:?}", config); let cache_path = config.cache_file_path.clone(); @@ -211,146 +186,26 @@ impl BootstrapCacheStore { old_shared_state: CacheData::default(), }; - info!("Successfully created CacheStore without initializing the data."); Ok(store) } - pub async fn init(&mut self) -> Result<()> { - let data = if self.cache_path.exists() { - info!( - "Cache file exists at {:?}, attempting to load", - self.cache_path - ); - match Self::load_cache_data(&self.config).await { - Ok(data) => { - info!( - "Successfully loaded cache data with {} peers", - data.peers.len() - ); - // If cache data exists but has no peers and file is not read-only, - // fallback to default - let is_readonly = self - .cache_path - .metadata() - .map(|m| m.permissions().readonly()) - .unwrap_or(false); - - if data.peers.is_empty() && !is_readonly { - info!("Cache is empty and not read-only, falling back to default"); - Self::fallback_to_default(&self.config).await? - } else { - // Ensure we don't exceed max_peers - let mut filtered_data = data; - if filtered_data.peers.len() > self.config.max_peers { - info!( - "Trimming cache from {} to {} peers", - filtered_data.peers.len(), - self.config.max_peers - ); - - filtered_data.peers = filtered_data - .peers - .into_iter() - .take(self.config.max_peers) - .collect(); - } - filtered_data - } - } - Err(e) => { - warn!("Failed to load cache data: {}", e); - // If we can't read or parse the cache file, fallback to default - Self::fallback_to_default(&self.config).await? - } - } - } else { - info!( - "Cache file does not exist at {:?}, falling back to default", - self.cache_path - ); - // If cache file doesn't exist, fallback to default - Self::fallback_to_default(&self.config).await? - }; - - // Update the store's data - self.data = data.clone(); - self.old_shared_state = data; - - // Save the default data to disk - self.sync_and_save_to_disk(false).await?; - + pub async fn initialize_from_peers_arg(&mut self, peers_arg: &PeersArgs) -> Result<()> { + peers_arg + .get_bootstrap_addr_and_initialize_cache(Some(self)) + .await?; + self.sync_and_save_to_disk(true).await?; Ok(()) } - async fn fallback_to_default(config: &BootstrapConfig) -> Result { - info!("Falling back to default peers from endpoints"); - let mut data = CacheData { - peers: std::collections::HashMap::new(), - last_updated: SystemTime::now(), - version: default_version(), - }; - - // If no endpoints are configured, just return empty cache - if config.endpoints.is_empty() { - warn!("No endpoints configured, returning empty cache"); - return Ok(data); - } - - // Try to discover peers from configured endpoints - let discovery = InitialPeerDiscovery::with_endpoints(config.endpoints.clone())?; - match discovery.fetch_bootstrap_addresses().await { - Ok(addrs) => { - info!("Successfully fetched {} peers from endpoints", addrs.len()); - // Only add up to max_peers from the discovered peers - let mut count = 0; - for bootstrap_addr in addrs.into_iter() { - if count >= config.max_peers { - break; - } - if let Some(peer_id) = bootstrap_addr.peer_id() { - data.insert(peer_id, bootstrap_addr); - count += 1; - } - } - - // Create parent directory if it doesn't exist - if let Some(parent) = config.cache_file_path.parent() { - if !parent.exists() { - info!("Creating cache directory at {:?}", parent); - if let Err(e) = fs::create_dir_all(parent) { - warn!("Failed to create cache directory: {}", e); - } - } - } - - // Try to write the cache file immediately - match serde_json::to_string_pretty(&data) { - Ok(json) => { - info!("Writing {} peers to cache file", data.peers.len()); - if let Err(e) = fs::write(&config.cache_file_path, json) { - warn!("Failed to write cache file: {}", e); - } else { - info!( - "Successfully wrote cache file at {:?}", - config.cache_file_path - ); - } - } - Err(e) => { - warn!("Failed to serialize cache data: {}", e); - } - } - - Ok(data) - } - Err(e) => { - warn!("Failed to fetch peers from endpoints: {}", e); - Ok(data) // Return empty cache on error - } - } + pub async fn initialize_from_local_cache(&mut self) -> Result<()> { + self.data = Self::load_cache_data(&self.config).await?; + self.old_shared_state = self.data.clone(); + Ok(()) } - async fn load_cache_data(cfg: &BootstrapConfig) -> Result { + /// Load cache data from disk + /// Make sure to have clean addrs inside the cache as we don't call craft_valid_multiaddr + pub async fn load_cache_data(cfg: &BootstrapCacheConfig) -> Result { // Try to open the file with read permissions let mut file = match OpenOptions::new().read(true).open(&cfg.cache_file_path) { Ok(f) => f, @@ -395,6 +250,15 @@ impl BootstrapCacheStore { .flat_map(|bootstrap_addresses| bootstrap_addresses.0.iter()) } + /// Get a list containing single addr per peer. We use the least faulty addr for each peer. + pub fn get_unique_peer_addr(&self) -> impl Iterator { + self.data + .peers + .values() + .flat_map(|bootstrap_addresses| bootstrap_addresses.get_least_faulty()) + .map(|bootstrap_addr| &bootstrap_addr.addr) + } + pub fn get_reliable_addrs(&self) -> impl Iterator { self.data .peers @@ -418,7 +282,7 @@ impl BootstrapCacheStore { /// Add a set of addresses to the cache. pub fn add_addr(&mut self, addr: Multiaddr) { debug!("Trying to add new addr: {addr}"); - let Some(addr) = craft_valid_multiaddr(&addr) else { + let Some(addr) = craft_valid_multiaddr(&addr, false) else { return; }; let peer_id = match addr.iter().find(|p| matches!(p, Protocol::P2p(_))) { @@ -433,13 +297,16 @@ impl BootstrapCacheStore { bootstrap_addr.last_seen = SystemTime::now(); return; } else { - bootstrap_addrs.insert_addr(&BootstrapAddr::new(addr.clone())); + let mut bootstrap_addr = BootstrapAddr::new(addr.clone()); + bootstrap_addr.success_count = 1; + bootstrap_addrs.insert_addr(&bootstrap_addr); } } else { - self.data.peers.insert( - peer_id, - BootstrapAddresses(vec![BootstrapAddr::new(addr.clone())]), - ); + let mut bootstrap_addr = BootstrapAddr::new(addr.clone()); + bootstrap_addr.success_count = 1; + self.data + .peers + .insert(peer_id, BootstrapAddresses(vec![bootstrap_addr])); } debug!("Added new peer {addr:?}, performing cleanup of old addrs"); @@ -556,6 +423,7 @@ impl BootstrapCacheStore { } async fn atomic_write(&self) -> Result<()> { + info!("Writing cache to disk: {:?}", self.cache_path); // Create parent directory if it doesn't exist if let Some(parent) = self.cache_path.parent() { fs::create_dir_all(parent).map_err(Error::from)?; @@ -583,6 +451,8 @@ impl BootstrapCacheStore { error!("Failed to persist file with err: {err:?}"); })?; + info!("Cache written to disk: {:?}", self.cache_path); + // Lock will be automatically released when file is dropped Ok(()) } @@ -597,11 +467,9 @@ mod tests { let temp_dir = tempdir().unwrap(); let cache_file = temp_dir.path().join("cache.json"); - let config = crate::BootstrapConfig::empty() - .unwrap() - .with_cache_path(&cache_file); + let config = crate::BootstrapCacheConfig::empty().with_cache_path(&cache_file); - let store = BootstrapCacheStore::new(config).await.unwrap(); + let store = BootstrapCacheStore::empty(config).unwrap(); (store.clone(), store.cache_path.clone()) } @@ -685,53 +553,4 @@ mod tests { assert_eq!(peers.len(), 1); assert_eq!(peers[0].addr, addr); } - - #[tokio::test] - async fn test_peer_removed_only_when_unresponsive() { - let (mut store, _) = create_test_store().await; - let addr: Multiaddr = - "/ip4/127.0.0.1/tcp/8080/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" - .parse() - .unwrap(); - - // Add a peer - store.add_addr(addr.clone()); - - // Make it fail more than successes - for _ in 0..3 { - store.update_addr_status(&addr, true); - } - for _ in 0..4 { - store.update_addr_status(&addr, false); - } - - // Run cleanup - store.perform_cleanup(); - - // Verify peer is removed - assert_eq!( - store.get_addrs().count(), - 0, - "Peer should be removed after max_retries failures" - ); - - // Test with some successes but more failures - store.add_addr(addr.clone()); - store.update_addr_status(&addr, true); - store.update_addr_status(&addr, true); - - for _ in 0..5 { - store.update_addr_status(&addr, false); - } - - // Run cleanup - store.perform_cleanup(); - - // Verify peer is removed due to more failures than successes - assert_eq!( - store.get_addrs().count(), - 0, - "Peer should be removed when failures exceed successes" - ); - } } diff --git a/ant-bootstrap-cache/src/config.rs b/ant-bootstrap/src/config.rs similarity index 77% rename from ant-bootstrap-cache/src/config.rs rename to ant-bootstrap/src/config.rs index e02fa8a590..52d85b7dee 100644 --- a/ant-bootstrap-cache/src/config.rs +++ b/ant-bootstrap/src/config.rs @@ -12,7 +12,6 @@ use std::{ path::{Path, PathBuf}, time::Duration, }; -use url::Url; /// The duration since last)seen before removing the address of a Peer. const ADDR_EXPIRY_DURATION: Duration = Duration::from_secs(24 * 60 * 60); // 24 hours @@ -31,11 +30,9 @@ const MAX_BOOTSTRAP_CACHE_SAVE_INTERVAL: Duration = Duration::from_secs(24 * 60 /// Configuration for the bootstrap cache #[derive(Clone, Debug)] -pub struct BootstrapConfig { +pub struct BootstrapCacheConfig { /// The duration since last)seen before removing the address of a Peer. pub addr_expiry_duration: Duration, - /// List of bootstrap endpoints to fetch peer information from - pub endpoints: Vec, /// Maximum number of peers to keep in the cache pub max_peers: usize, /// Maximum number of addresses stored per peer. @@ -52,19 +49,11 @@ pub struct BootstrapConfig { pub cache_save_scaling_factor: u64, } -impl BootstrapConfig { +impl BootstrapCacheConfig { /// Creates a new BootstrapConfig with default settings pub fn default_config() -> Result { Ok(Self { addr_expiry_duration: ADDR_EXPIRY_DURATION, - endpoints: vec![ - "https://sn-testnet.s3.eu-west-2.amazonaws.com/bootstrap_cache.json" - .parse() - .expect("Failed to parse URL"), - "https://sn-testnet.s3.eu-west-2.amazonaws.com/network-contacts" - .parse() - .expect("Failed to parse URL"), - ], max_peers: MAX_PEERS, max_addrs_per_peer: MAX_ADDRS_PER_PEER, cache_file_path: default_cache_path()?, @@ -76,18 +65,17 @@ impl BootstrapConfig { } /// Creates a new BootstrapConfig with empty settings - pub fn empty() -> Result { - Ok(Self { + pub fn empty() -> Self { + Self { addr_expiry_duration: ADDR_EXPIRY_DURATION, - endpoints: vec![], max_peers: MAX_PEERS, max_addrs_per_peer: MAX_ADDRS_PER_PEER, - cache_file_path: default_cache_path()?, + cache_file_path: PathBuf::new(), disable_cache_writing: false, min_cache_save_duration: MIN_BOOTSTRAP_CACHE_SAVE_INTERVAL, max_cache_save_duration: MAX_BOOTSTRAP_CACHE_SAVE_INTERVAL, cache_save_scaling_factor: 2, - }) + } } /// Set a new addr expiry duration @@ -96,25 +84,6 @@ impl BootstrapConfig { self } - /// Update the config with custom endpoints - pub fn with_endpoints(mut self, endpoints: Vec) -> Self { - self.endpoints = endpoints; - self - } - - /// Update the config with default endpoints - pub fn with_default_endpoints(mut self) -> Self { - self.endpoints = vec![ - "https://sn-testnet.s3.eu-west-2.amazonaws.com/bootstrap_cache.json" - .parse() - .expect("Failed to parse URL"), - "https://sn-testnet.s3.eu-west-2.amazonaws.com/network-contacts" - .parse() - .expect("Failed to parse URL"), - ]; - self - } - /// Update the config with a custom cache file path pub fn with_cache_path>(mut self, path: P) -> Self { self.cache_file_path = path.as_ref().to_path_buf(); diff --git a/ant-bootstrap-cache/src/initial_peer_discovery.rs b/ant-bootstrap/src/contacts.rs similarity index 80% rename from ant-bootstrap-cache/src/initial_peer_discovery.rs rename to ant-bootstrap/src/contacts.rs index c8cf0ae6e5..53c3c3c62f 100644 --- a/ant-bootstrap-cache/src/initial_peer_discovery.rs +++ b/ant-bootstrap/src/contacts.rs @@ -8,13 +8,11 @@ use crate::{craft_valid_multiaddr_from_str, BootstrapAddr, BootstrapEndpoints, Error, Result}; use futures::stream::{self, StreamExt}; +use libp2p::Multiaddr; use reqwest::Client; use std::time::Duration; use url::Url; -/// The default network contacts endpoint -const DEFAULT_BOOTSTRAP_ENDPOINT: &str = - "https://sn-testnet.s3.eu-west-2.amazonaws.com/network-contacts"; /// The client fetch timeout const FETCH_TIMEOUT_SECS: u64 = 30; /// Maximum number of endpoints to fetch at a time @@ -23,19 +21,19 @@ const MAX_CONCURRENT_FETCHES: usize = 3; const MAX_RETRIES_ON_FETCH_FAILURE: usize = 3; /// Discovers initial peers from a list of endpoints -pub struct InitialPeerDiscovery { +pub struct ContactsFetcher { /// The list of endpoints endpoints: Vec, /// Reqwest Client request_client: Client, + /// Ignore PeerId in the multiaddr if not present. This is only useful for fetching nat detection contacts + ignore_peer_id: bool, } -impl InitialPeerDiscovery { +impl ContactsFetcher { /// Create a new struct with the default endpoint pub fn new() -> Result { - Self::with_endpoints(vec![DEFAULT_BOOTSTRAP_ENDPOINT - .parse() - .expect("Invalid URL")]) + Self::with_endpoints(vec![]) } /// Create a new struct with the provided endpoints @@ -51,13 +49,47 @@ impl InitialPeerDiscovery { Ok(Self { endpoints, request_client, + ignore_peer_id: false, }) } - /// Fetch BootstrapAddr from all configured endpoints + /// Create a new struct with the mainnet endpoints + pub fn with_mainnet_endpoints() -> Result { + let mut fetcher = Self::new()?; + let mainnet_contact = vec![ + "https://sn-testnet.s3.eu-west-2.amazonaws.com/bootstrap_cache.json" + .parse() + .expect("Failed to parse URL"), + "https://sn-testnet.s3.eu-west-2.amazonaws.com/network-contacts" + .parse() + .expect("Failed to parse URL"), + ]; + fetcher.endpoints = mainnet_contact; + Ok(fetcher) + } + + pub fn insert_endpoint(&mut self, endpoint: Url) { + self.endpoints.push(endpoint); + } + + pub fn ignore_peer_id(&mut self, ignore_peer_id: bool) { + self.ignore_peer_id = ignore_peer_id; + } + + /// Fetch the list of bootstrap addresses from all configured endpoints pub async fn fetch_bootstrap_addresses(&self) -> Result> { + Ok(self + .fetch_addrs() + .await? + .into_iter() + .map(BootstrapAddr::new) + .collect()) + } + + /// Fetch the list of multiaddrs from all configured endpoints + pub async fn fetch_addrs(&self) -> Result> { info!( - "Starting peer discovery from {} endpoints: {:?}", + "Starting peer fetcher from {} endpoints: {:?}", self.endpoints.len(), self.endpoints ); @@ -71,7 +103,12 @@ impl InitialPeerDiscovery { endpoint ); ( - Self::fetch_from_endpoint(self.request_client.clone(), &endpoint).await, + Self::fetch_from_endpoint( + self.request_client.clone(), + &endpoint, + self.ignore_peer_id, + ) + .await, endpoint, ) }) @@ -126,11 +163,12 @@ impl InitialPeerDiscovery { } } - /// Fetch the list of bootstrap addresses from a single endpoint + /// Fetch the list of multiaddrs from a single endpoint async fn fetch_from_endpoint( request_client: Client, endpoint: &Url, - ) -> Result> { + ignore_peer_id: bool, + ) -> Result> { info!("Fetching peers from endpoint: {endpoint}"); let mut retries = 0; @@ -142,7 +180,7 @@ impl InitialPeerDiscovery { if response.status().is_success() { let text = response.text().await?; - match Self::try_parse_response(&text) { + match Self::try_parse_response(&text, ignore_peer_id) { Ok(addrs) => break addrs, Err(err) => { warn!("Failed to parse response with err: {err:?}"); @@ -186,7 +224,7 @@ impl InitialPeerDiscovery { } /// Try to parse a response from a endpoint - fn try_parse_response(response: &str) -> Result> { + fn try_parse_response(response: &str, ignore_peer_id: bool) -> Result> { match serde_json::from_str::(response) { Ok(json_endpoints) => { info!( @@ -196,8 +234,9 @@ impl InitialPeerDiscovery { let bootstrap_addresses = json_endpoints .peers .into_iter() - .filter_map(|addr_str| craft_valid_multiaddr_from_str(&addr_str)) - .map(BootstrapAddr::new) + .filter_map(|addr_str| { + craft_valid_multiaddr_from_str(&addr_str, ignore_peer_id) + }) .collect::>(); if bootstrap_addresses.is_empty() { @@ -219,8 +258,7 @@ impl InitialPeerDiscovery { // example of contacts file exists in resources/network-contacts-examples let bootstrap_addresses = response .split('\n') - .filter_map(craft_valid_multiaddr_from_str) - .map(BootstrapAddr::new) + .filter_map(|str| craft_valid_multiaddr_from_str(str, ignore_peer_id)) .collect::>(); if bootstrap_addresses.is_empty() { @@ -264,10 +302,10 @@ mod tests { .mount(&mock_server) .await; - let mut discovery = InitialPeerDiscovery::new().unwrap(); - discovery.endpoints = vec![mock_server.uri().parse().unwrap()]; + let mut fetcher = ContactsFetcher::new().unwrap(); + fetcher.endpoints = vec![mock_server.uri().parse().unwrap()]; - let addrs = discovery.fetch_bootstrap_addresses().await.unwrap(); + let addrs = fetcher.fetch_bootstrap_addresses().await.unwrap(); assert_eq!(addrs.len(), 2); let addr1: Multiaddr = @@ -303,13 +341,13 @@ mod tests { .mount(&mock_server2) .await; - let mut discovery = InitialPeerDiscovery::new().unwrap(); - discovery.endpoints = vec![ + let mut fetcher = ContactsFetcher::new().unwrap(); + fetcher.endpoints = vec![ mock_server1.uri().parse().unwrap(), mock_server2.uri().parse().unwrap(), ]; - let addrs = discovery.fetch_bootstrap_addresses().await.unwrap(); + let addrs = fetcher.fetch_bootstrap_addresses().await.unwrap(); assert_eq!(addrs.len(), 1); let addr: Multiaddr = @@ -333,10 +371,10 @@ mod tests { .mount(&mock_server) .await; - let mut discovery = InitialPeerDiscovery::new().unwrap(); - discovery.endpoints = vec![mock_server.uri().parse().unwrap()]; + let mut fetcher = ContactsFetcher::new().unwrap(); + fetcher.endpoints = vec![mock_server.uri().parse().unwrap()]; - let addrs = discovery.fetch_bootstrap_addresses().await.unwrap(); + let addrs = fetcher.fetch_bootstrap_addresses().await.unwrap(); let valid_addr: Multiaddr = "/ip4/127.0.0.2/tcp/8080/p2p/12D3KooWD2aV1f3qkhggzEFaJ24CEFYkSdZF5RKoMLpU6CwExYV5" .parse() @@ -354,10 +392,10 @@ mod tests { .mount(&mock_server) .await; - let mut discovery = InitialPeerDiscovery::new().unwrap(); - discovery.endpoints = vec![mock_server.uri().parse().unwrap()]; + let mut fetcher = ContactsFetcher::new().unwrap(); + fetcher.endpoints = vec![mock_server.uri().parse().unwrap()]; - let result = discovery.fetch_bootstrap_addresses().await; + let result = fetcher.fetch_bootstrap_addresses().await; assert!(matches!(result, Err(Error::NoBootstrapAddressesFound(_)))); } @@ -374,10 +412,10 @@ mod tests { .mount(&mock_server) .await; - let mut discovery = InitialPeerDiscovery::new().unwrap(); - discovery.endpoints = vec![mock_server.uri().parse().unwrap()]; + let mut fetcher = ContactsFetcher::new().unwrap(); + fetcher.endpoints = vec![mock_server.uri().parse().unwrap()]; - let addrs = discovery.fetch_bootstrap_addresses().await.unwrap(); + let addrs = fetcher.fetch_bootstrap_addresses().await.unwrap(); assert_eq!(addrs.len(), 1); let addr: Multiaddr = @@ -387,23 +425,11 @@ mod tests { assert_eq!(addrs[0].addr, addr); } - #[tokio::test] - async fn test_default_endpoints() { - let discovery = InitialPeerDiscovery::new().unwrap(); - assert_eq!(discovery.endpoints.len(), 1); - assert_eq!( - discovery.endpoints[0], - "https://sn-testnet.s3.eu-west-2.amazonaws.com/network-contacts" - .parse() - .unwrap() - ); - } - #[tokio::test] async fn test_custom_endpoints() { let endpoints = vec!["http://example.com".parse().unwrap()]; - let discovery = InitialPeerDiscovery::with_endpoints(endpoints.clone()).unwrap(); - assert_eq!(discovery.endpoints, endpoints); + let fetcher = ContactsFetcher::with_endpoints(endpoints.clone()).unwrap(); + assert_eq!(fetcher.endpoints, endpoints); } #[tokio::test] @@ -418,10 +444,10 @@ mod tests { .mount(&mock_server) .await; - let mut discovery = InitialPeerDiscovery::new().unwrap(); - discovery.endpoints = vec![mock_server.uri().parse().unwrap()]; + let mut fetcher = ContactsFetcher::new().unwrap(); + fetcher.endpoints = vec![mock_server.uri().parse().unwrap()]; - let addrs = discovery.fetch_bootstrap_addresses().await.unwrap(); + let addrs = fetcher.fetch_bootstrap_addresses().await.unwrap(); assert_eq!(addrs.len(), 2); let addr1: Multiaddr = diff --git a/ant-bootstrap-cache/src/error.rs b/ant-bootstrap/src/error.rs similarity index 80% rename from ant-bootstrap-cache/src/error.rs rename to ant-bootstrap/src/error.rs index 92bb997d63..e7771a64b4 100644 --- a/ant-bootstrap-cache/src/error.rs +++ b/ant-bootstrap/src/error.rs @@ -10,6 +10,8 @@ use thiserror::Error; #[derive(Debug, Error)] pub enum Error { + #[error("Failed to obtain any bootstrap peers")] + NoBootstrapPeersFound, #[error("Failed to parse cache data")] FailedToParseCacheData, #[error("Could not obtain data directory")] @@ -18,8 +20,6 @@ pub enum Error { FailedToObtainAddrsFromUrl(String, usize), #[error("No Bootstrap Addresses found: {0}")] NoBootstrapAddressesFound(String), - #[error("Invalid response: {0}")] - InvalidResponse(String), #[error("IO error: {0}")] Io(#[from] std::io::Error), #[error("JSON error: {0}")] @@ -32,14 +32,6 @@ pub enum Error { Persist(#[from] tempfile::PersistError), #[error("Lock error")] LockError, - #[error("Circuit breaker open: {0}")] - CircuitBreakerOpen(String), - #[error("Request failed: {0}")] - RequestFailed(String), - #[error("Request timeout")] - RequestTimeout, - #[error("Invalid multiaddr: {0}")] - InvalidMultiAddr(#[from] libp2p::multiaddr::Error), } pub type Result = std::result::Result; diff --git a/ant-bootstrap/src/initial_peers.rs b/ant-bootstrap/src/initial_peers.rs new file mode 100644 index 0000000000..a15f60cc05 --- /dev/null +++ b/ant-bootstrap/src/initial_peers.rs @@ -0,0 +1,215 @@ +// Copyright 2024 MaidSafe.net limited. +// +// This SAFE Network Software is licensed to you under The General Public License (GPL), version 3. +// Unless required by applicable law or agreed to in writing, the SAFE Network Software distributed +// under the GPL Licence is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. Please review the Licences for the specific language governing +// permissions and limitations relating to use of the SAFE Network Software. + +use crate::{ + craft_valid_multiaddr, craft_valid_multiaddr_from_str, + error::{Error, Result}, + BootstrapAddr, BootstrapCacheConfig, BootstrapCacheStore, ContactsFetcher, +}; +use clap::Args; +use libp2p::Multiaddr; +use url::Url; + +/// The name of the environment variable that can be used to pass peers to the node. +pub const ANT_PEERS_ENV: &str = "ANT_PEERS"; + +/// Command line arguments for peer configuration +#[derive(Args, Debug, Clone, Default)] +pub struct PeersArgs { + /// Set to indicate this is the first node in a new network + /// + /// If this argument is used, any others will be ignored because they do not apply to the first + /// node. + #[clap(long)] + pub first: bool, + /// Addr(s) to use for bootstrap, in a 'multiaddr' format containing the peer ID. + /// + /// A multiaddr looks like + /// '/ip4/1.2.3.4/tcp/1200/tcp/p2p/12D3KooWRi6wF7yxWLuPSNskXc6kQ5cJ6eaymeMbCRdTnMesPgFx' where + /// `1.2.3.4` is the IP, `1200` is the port and the (optional) last part is the peer ID. + /// + /// This argument can be provided multiple times to connect to multiple peers. + /// + /// Alternatively, the `ANT_PEERS` environment variable can provide a comma-separated peer + /// list. + #[clap( + long = "peer", + value_name = "multiaddr", + value_delimiter = ',', + conflicts_with = "first", + value_parser = parse_multiaddr_str + )] + pub addrs: Vec, + /// Specify the URL to fetch the network contacts from. + /// + /// The URL can point to a text file containing Multiaddresses separated by newline character, or + /// a bootstrap cache JSON file. + #[clap(long, conflicts_with = "first")] + pub network_contacts_url: Option, + /// Set to indicate this is a local network. You could also set the `local` feature flag to set this to true. + /// + /// This would use mDNS for peer discovery. + #[clap(long, conflicts_with = "network_contacts_url")] + pub local: bool, + /// Set to indicate this is a testnet. + /// + /// This disables fetching peers from the mainnet network contacts. + #[clap(name = "testnet", long, conflicts_with = "network_contacts_url")] + pub disable_mainnet_contacts: bool, + + /// Set to not load the bootstrap addresses from the local cache. + #[clap(long)] + pub ignore_cache: bool, +} +impl PeersArgs { + /// Get bootstrap peers + /// Order of precedence: + /// 1. Addresses from arguments + /// 2. Addresses from environment variable SAFE_PEERS + /// 3. Addresses from cache + /// 4. Addresses from network contacts URL + pub async fn get_bootstrap_addr(&self) -> Result> { + self.get_bootstrap_addr_and_initialize_cache(None).await + } + + pub async fn get_addrs(&self) -> Result> { + Ok(self + .get_bootstrap_addr() + .await? + .into_iter() + .map(|addr| addr.addr) + .collect()) + } + + /// Helper function to fetch bootstrap addresses and initialize cache based on the passed in args. + pub(crate) async fn get_bootstrap_addr_and_initialize_cache( + &self, + mut cache: Option<&mut BootstrapCacheStore>, + ) -> Result> { + // If this is the first node, return an empty list + if self.first { + info!("First node in network, no initial bootstrap peers"); + if let Some(cache) = cache { + info!("Clearing cache for 'first' node"); + cache.clear_peers_and_save().await?; + } + return Ok(vec![]); + } + + // If local mode is enabled, return empty store (will use mDNS) + if self.local || cfg!(feature = "local") { + info!("Local mode enabled, using only local discovery."); + if let Some(cache) = cache { + info!("Setting config to not write to cache, as 'local' mode is enabled"); + cache.config.disable_cache_writing = true; + } + return Ok(vec![]); + } + + let mut bootstrap_addresses = vec![]; + + // Add addrs from arguments if present + for addr in &self.addrs { + if let Some(addr) = craft_valid_multiaddr(addr, false) { + info!("Adding addr from arguments: {addr}"); + bootstrap_addresses.push(BootstrapAddr::new(addr)); + } else { + warn!("Invalid multiaddress format from arguments: {addr}"); + } + } + + // Read from ANT_PEERS environment variable if present + if let Ok(addrs) = std::env::var(ANT_PEERS_ENV) { + for addr_str in addrs.split(',') { + if let Some(addr) = craft_valid_multiaddr_from_str(addr_str, false) { + info!("Adding addr from environment variable: {addr}"); + bootstrap_addresses.push(BootstrapAddr::new(addr)); + } else { + warn!("Invalid multiaddress format from environment variable: {addr_str}"); + } + } + } + + // If we have a network contacts URL, fetch addrs from there. + if let Some(url) = self.network_contacts_url.clone() { + info!("Fetching bootstrap address from network contacts URL: {url}",); + let contacts_fetcher = ContactsFetcher::with_endpoints(vec![url])?; + let addrs = contacts_fetcher.fetch_bootstrap_addresses().await?; + bootstrap_addresses.extend(addrs); + } + + // Return here if we fetched peers from the args + if !bootstrap_addresses.is_empty() { + if let Some(cache) = cache.as_mut() { + info!("Initializing cache with bootstrap addresses from arguments"); + for addr in &bootstrap_addresses { + cache.add_addr(addr.addr.clone()); + } + } + return Ok(bootstrap_addresses); + } + + // load from cache if present + + if !self.ignore_cache { + let cfg = if let Some(cache) = cache.as_ref() { + Some(cache.config.clone()) + } else { + BootstrapCacheConfig::default_config().ok() + }; + if let Some(cfg) = cfg { + info!("Loading bootstrap addresses from cache"); + if let Ok(data) = BootstrapCacheStore::load_cache_data(&cfg).await { + if let Some(cache) = cache.as_mut() { + info!("Initializing cache with bootstrap addresses from cache"); + cache.data = data.clone(); + cache.old_shared_state = data.clone(); + } + + bootstrap_addresses = data + .peers + .into_iter() + .filter_map(|(_, addrs)| { + addrs + .0 + .into_iter() + .min_by_key(|addr| addr.failure_rate() as u64) + }) + .collect(); + } + } + } + + if !bootstrap_addresses.is_empty() { + return Ok(bootstrap_addresses); + } + + if !self.disable_mainnet_contacts { + let contacts_fetcher = ContactsFetcher::with_mainnet_endpoints()?; + let addrs = contacts_fetcher.fetch_bootstrap_addresses().await?; + if let Some(cache) = cache.as_mut() { + info!("Initializing cache with bootstrap addresses from mainnet contacts"); + for addr in addrs.iter() { + cache.add_addr(addr.addr.clone()); + } + } + bootstrap_addresses = addrs; + } + + if !bootstrap_addresses.is_empty() { + Ok(bootstrap_addresses) + } else { + error!("No initial bootstrap peers found through any means"); + Err(Error::NoBootstrapPeersFound) + } + } +} + +pub fn parse_multiaddr_str(addr: &str) -> std::result::Result { + addr.parse::() +} diff --git a/ant-bootstrap-cache/src/lib.rs b/ant-bootstrap/src/lib.rs similarity index 70% rename from ant-bootstrap-cache/src/lib.rs rename to ant-bootstrap/src/lib.rs index 37caedd3bd..849901edf1 100644 --- a/ant-bootstrap-cache/src/lib.rs +++ b/ant-bootstrap/src/lib.rs @@ -21,19 +21,22 @@ //! # Example //! //! ```no_run -//! use ant_bootstrap_cache::{BootstrapCacheStore, BootstrapConfig, PeersArgs}; +//! use ant_bootstrap::{BootstrapCacheStore, BootstrapCacheConfig, PeersArgs}; //! use url::Url; //! //! # async fn example() -> Result<(), Box> { -//! let config = BootstrapConfig::empty().unwrap(); +//! let config = BootstrapCacheConfig::empty(); //! let args = PeersArgs { //! first: false, //! addrs: vec![], //! network_contacts_url: Some(Url::parse("https://example.com/peers")?), //! local: false, +//! disable_mainnet_contacts: false, +//! ignore_cache: false, //! }; //! -//! let store = BootstrapCacheStore::from_args(args, config).await?; +//! let mut store = BootstrapCacheStore::empty(config)?; +//! store.initialize_from_peers_arg(&args).await?; //! let addrs = store.get_addrs(); //! # Ok(()) //! # } @@ -44,19 +47,20 @@ extern crate tracing; mod cache_store; pub mod config; -mod error; -mod initial_peer_discovery; +pub mod contacts; +pub mod error; +mod initial_peers; use libp2p::{multiaddr::Protocol, Multiaddr, PeerId}; use serde::{Deserialize, Serialize}; use std::time::SystemTime; use thiserror::Error; -use url::Url; pub use cache_store::BootstrapCacheStore; -pub use config::BootstrapConfig; +pub use config::BootstrapCacheConfig; +pub use contacts::ContactsFetcher; pub use error::{Error, Result}; -pub use initial_peer_discovery::InitialPeerDiscovery; +pub use initial_peers::{PeersArgs, ANT_PEERS_ENV}; /// Structure representing a list of bootstrap endpoints #[derive(Debug, Clone, Serialize, Deserialize)] @@ -117,6 +121,10 @@ impl BootstrapAddresses { .find(|bootstrap_addr| &bootstrap_addr.addr == addr) } + pub fn get_least_faulty(&self) -> Option<&BootstrapAddr> { + self.0.iter().min_by_key(|addr| addr.failure_rate() as u64) + } + pub fn remove_addr(&mut self, addr: &Multiaddr) { if let Some(idx) = self .0 @@ -136,6 +144,10 @@ impl BootstrapAddresses { }); bootstrap_addr.sync(old_bootstrap_addr, current_bootstrap_addr); } else { + trace!( + "Addr {:?} from fs not found in memory, inserting it.", + current_bootstrap_addr.addr + ); self.insert_addr(current_bootstrap_addr); } } @@ -205,6 +217,7 @@ impl BootstrapAddr { /// If the peer has a old state, just update the difference in values /// If the peer has no old state, add the values pub fn sync(&mut self, old_shared_state: Option<&Self>, current_shared_state: &Self) { + trace!("Syncing addr {:?} with old_shared_state: {old_shared_state:?} and current_shared_state: {current_shared_state:?}. Our in-memory state {self:?}", self.addr); if self.last_seen == current_shared_state.last_seen { return; } @@ -241,8 +254,8 @@ impl BootstrapAddr { self.failure_count = 1; self.success_count = 0; } - self.last_seen = std::cmp::max(self.last_seen, current_shared_state.last_seen); + trace!("Successfully synced BootstrapAddr: {self:?}"); } fn failure_rate(&self) -> f64 { @@ -254,100 +267,13 @@ impl BootstrapAddr { } } -/// Command line arguments for peer configuration -#[derive(Debug, Clone, Default)] -pub struct PeersArgs { - /// First node in the network - pub first: bool, - /// List of addresses - pub addrs: Vec, - /// URL to fetch network contacts from - pub network_contacts_url: Option, - /// Use only local discovery (mDNS) - pub local: bool, -} - -impl BootstrapCacheStore { - /// Create a new CacheStore from command line arguments - /// This also initializes the store with the provided bootstrap addresses - pub async fn from_args(args: PeersArgs, mut config: BootstrapConfig) -> Result { - if let Some(url) = &args.network_contacts_url { - config.endpoints.push(url.clone()); - } - - // If this is the first node, return empty store with no fallback - if args.first { - info!("First node in network, returning empty store"); - let mut store = Self::new_without_init(config).await?; - store.clear_peers_and_save().await?; - return Ok(store); - } - - // If local mode is enabled, return empty store (will use mDNS) - if args.local { - info!("Local mode enabled, using only local discovery. Cache writing is disabled"); - config.disable_cache_writing = true; - let store = Self::new_without_init(config).await?; - return Ok(store); - } - - // Create a new store but don't load from cache or fetch from endpoints yet - let mut store = Self::new_without_init(config).await?; - - // Add addrs from environment variable if present - if let Ok(env_string) = std::env::var("SAFE_PEERS") { - for multiaddr_str in env_string.split(',') { - if let Ok(addr) = multiaddr_str.parse() { - if let Some(addr) = craft_valid_multiaddr(&addr) { - info!("Adding addr from environment: {addr}",); - store.add_addr(addr); - } else { - warn!("Invalid peer address format from environment: {}", addr); - } - } - } - } - - // Add addrs from arguments if present - for addr in args.addrs { - if let Some(addr) = craft_valid_multiaddr(&addr) { - info!("Adding addr from arguments: {addr}"); - store.add_addr(addr); - } else { - warn!("Invalid multiaddress format from arguments: {addr}"); - } - } - - // If we have a network contacts URL, fetch addrs from there. - if let Some(url) = args.network_contacts_url { - info!( - "Fetching bootstrap address from network contacts URL: {}", - url - ); - let peer_discovery = InitialPeerDiscovery::with_endpoints(vec![url])?; - let bootstrap_addresses = peer_discovery.fetch_bootstrap_addresses().await?; - for addr in bootstrap_addresses { - store.add_addr(addr.addr); - } - } - - // If we have peers, update cache and return, else initialize from cache - if store.peer_count() > 0 { - info!("Using provided peers and updating cache"); - store.sync_and_save_to_disk(false).await?; - } else { - store.init().await?; - } - - Ok(store) - } -} - /// Craft a proper address to avoid any ill formed addresses -pub fn craft_valid_multiaddr(addr: &Multiaddr) -> Option { +/// +/// ignore_peer_id is only used for nat-detection contact list +pub fn craft_valid_multiaddr(addr: &Multiaddr, ignore_peer_id: bool) -> Option { let peer_id = addr .iter() - .find(|protocol| matches!(protocol, Protocol::P2p(_)))?; + .find(|protocol| matches!(protocol, Protocol::P2p(_))); let mut output_address = Multiaddr::empty(); @@ -385,17 +311,22 @@ pub fn craft_valid_multiaddr(addr: &Multiaddr) -> Option { return None; } - output_address.push(peer_id); + if let Some(peer_id) = peer_id { + output_address.push(peer_id); + } else if !ignore_peer_id { + return None; + } Some(output_address) } -pub fn craft_valid_multiaddr_from_str(addr_str: &str) -> Option { +/// ignore_peer_id is only used for nat-detection contact list +pub fn craft_valid_multiaddr_from_str(addr_str: &str, ignore_peer_id: bool) -> Option { let Ok(addr) = addr_str.parse::() else { warn!("Failed to parse multiaddr from str {addr_str}"); return None; }; - craft_valid_multiaddr(&addr) + craft_valid_multiaddr(&addr, ignore_peer_id) } pub fn multiaddr_get_peer_id(addr: &Multiaddr) -> Option { diff --git a/ant-bootstrap-cache/tests/address_format_tests.rs b/ant-bootstrap/tests/address_format_tests.rs similarity index 71% rename from ant-bootstrap-cache/tests/address_format_tests.rs rename to ant-bootstrap/tests/address_format_tests.rs index 73f8856465..9673991237 100644 --- a/ant-bootstrap-cache/tests/address_format_tests.rs +++ b/ant-bootstrap/tests/address_format_tests.rs @@ -6,7 +6,7 @@ // KIND, either express or implied. Please review the Licences for the specific language governing // permissions and limitations relating to use of the SAFE Network Software. -use ant_bootstrap_cache::{BootstrapCacheStore, BootstrapConfig, PeersArgs}; +use ant_bootstrap::{BootstrapCacheConfig, BootstrapCacheStore, PeersArgs}; use ant_logging::LogBuilder; use libp2p::Multiaddr; use tempfile::TempDir; @@ -16,12 +16,11 @@ use wiremock::{ }; // Setup function to create a new temp directory and config for each test -async fn setup() -> (TempDir, BootstrapConfig) { +async fn setup() -> (TempDir, BootstrapCacheConfig) { let temp_dir = TempDir::new().unwrap(); let cache_path = temp_dir.path().join("cache.json"); - let config = BootstrapConfig::empty() - .unwrap() + let config = BootstrapCacheConfig::empty() .with_cache_path(&cache_path) .with_max_peers(50); @@ -48,9 +47,12 @@ async fn test_multiaddr_format_parsing() -> Result<(), Box>(); assert_eq!(bootstrap_addresses.len(), 1, "Should have one peer"); assert_eq!( @@ -84,9 +86,12 @@ async fn test_network_contacts_format() -> Result<(), Box addrs: vec![], network_contacts_url: Some(format!("{}/peers", mock_server.uri()).parse()?), local: false, + disable_mainnet_contacts: false, + ignore_cache: false, }; - let store = BootstrapCacheStore::from_args(args, config).await?; + let mut store = BootstrapCacheStore::empty(config)?; + store.initialize_from_peers_arg(&args).await?; let adddrs = store.get_addrs().collect::>(); assert_eq!( adddrs.len(), @@ -106,58 +111,6 @@ async fn test_network_contacts_format() -> Result<(), Box Ok(()) } -#[tokio::test] -async fn test_invalid_address_handling() -> Result<(), Box> { - let _guard = LogBuilder::init_single_threaded_tokio_test("address_format_tests", false); - - // Test various invalid address formats - let invalid_addrs = vec![ - "not-a-multiaddr", - "127.0.0.1", // IP only - "127.0.0.1:8080:extra", // Invalid socket addr - "/ip4/127.0.0.1", // Incomplete multiaddr - ]; - - for addr_str in invalid_addrs { - let (_temp_dir, config) = setup().await; // Fresh config for each test case - let args = PeersArgs { - first: false, - addrs: vec![], - network_contacts_url: None, - local: true, // Use local mode to avoid fetching from default endpoints - }; - - let store = BootstrapCacheStore::from_args(args.clone(), config.clone()).await?; - let addrs = store.get_addrs().collect::>(); - assert_eq!( - addrs.len(), - 0, - "Should have no peers from invalid address in env var: {}", - addr_str - ); - - // Also test direct args path - if let Ok(addr) = addr_str.parse::() { - let args_with_peer = PeersArgs { - first: false, - addrs: vec![addr], - network_contacts_url: None, - local: false, - }; - let store = BootstrapCacheStore::from_args(args_with_peer, config).await?; - let addrs = store.get_addrs().collect::>(); - assert_eq!( - addrs.len(), - 0, - "Should have no peers from invalid address in args: {}", - addr_str - ); - } - } - - Ok(()) -} - #[tokio::test] async fn test_socket_addr_format() -> Result<(), Box> { let _guard = LogBuilder::init_single_threaded_tokio_test("address_format_tests", false); @@ -170,13 +123,14 @@ async fn test_socket_addr_format() -> Result<(), Box> { addrs: vec![], network_contacts_url: None, local: true, // Use local mode to avoid getting peers from default endpoints + disable_mainnet_contacts: false, + ignore_cache: false, }; - let config = BootstrapConfig::empty() - .unwrap() - .with_cache_path(&cache_path); + let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); - let store = BootstrapCacheStore::from_args(args, config).await?; + let mut store = BootstrapCacheStore::empty(config)?; + store.initialize_from_peers_arg(&args).await?; let addrs = store.get_addrs().collect::>(); assert!(addrs.is_empty(), "Should have no peers in local mode"); @@ -195,13 +149,14 @@ async fn test_multiaddr_format() -> Result<(), Box> { addrs: vec![], network_contacts_url: None, local: true, // Use local mode to avoid getting peers from default endpoints + disable_mainnet_contacts: false, + ignore_cache: false, }; - let config = BootstrapConfig::empty() - .unwrap() - .with_cache_path(&cache_path); + let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); - let store = BootstrapCacheStore::from_args(args, config).await?; + let mut store = BootstrapCacheStore::empty(config)?; + store.initialize_from_peers_arg(&args).await?; let addrs = store.get_addrs().collect::>(); assert!(addrs.is_empty(), "Should have no peers in local mode"); @@ -220,13 +175,14 @@ async fn test_invalid_addr_format() -> Result<(), Box> { addrs: vec![], network_contacts_url: None, local: true, // Use local mode to avoid getting peers from default endpoints + disable_mainnet_contacts: false, + ignore_cache: false, }; - let config = BootstrapConfig::empty() - .unwrap() - .with_cache_path(&cache_path); + let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); - let store = BootstrapCacheStore::from_args(args, config).await?; + let mut store = BootstrapCacheStore::empty(config)?; + store.initialize_from_peers_arg(&args).await?; let addrs = store.get_addrs().collect::>(); assert!(addrs.is_empty(), "Should have no peers in local mode"); @@ -245,13 +201,14 @@ async fn test_mixed_addr_formats() -> Result<(), Box> { addrs: vec![], network_contacts_url: None, local: true, // Use local mode to avoid getting peers from default endpoints + disable_mainnet_contacts: false, + ignore_cache: false, }; - let config = BootstrapConfig::empty() - .unwrap() - .with_cache_path(&cache_path); + let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); - let store = BootstrapCacheStore::from_args(args, config).await?; + let mut store = BootstrapCacheStore::empty(config)?; + store.initialize_from_peers_arg(&args).await?; let addrs = store.get_addrs().collect::>(); assert!(addrs.is_empty(), "Should have no peers in local mode"); @@ -270,13 +227,14 @@ async fn test_socket_addr_conversion() -> Result<(), Box> addrs: vec![], network_contacts_url: None, local: true, // Use local mode to avoid getting peers from default endpoints + disable_mainnet_contacts: false, + ignore_cache: false, }; - let config = BootstrapConfig::empty() - .unwrap() - .with_cache_path(&cache_path); + let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); - let store = BootstrapCacheStore::from_args(args, config).await?; + let mut store = BootstrapCacheStore::empty(config)?; + store.initialize_from_peers_arg(&args).await?; let addrs = store.get_addrs().collect::>(); assert!(addrs.is_empty(), "Should have no peers in local mode"); @@ -295,13 +253,14 @@ async fn test_invalid_socket_addr() -> Result<(), Box> { addrs: vec![], network_contacts_url: None, local: true, // Use local mode to avoid getting peers from default endpoints + disable_mainnet_contacts: false, + ignore_cache: false, }; - let config = BootstrapConfig::empty() - .unwrap() - .with_cache_path(&cache_path); + let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); - let store = BootstrapCacheStore::from_args(args, config).await?; + let mut store = BootstrapCacheStore::empty(config)?; + store.initialize_from_peers_arg(&args).await?; let addrs = store.get_addrs().collect::>(); assert!(addrs.is_empty(), "Should have no peers in local mode"); @@ -320,13 +279,14 @@ async fn test_invalid_multiaddr() -> Result<(), Box> { addrs: vec![], network_contacts_url: None, local: true, // Use local mode to avoid getting peers from default endpoints + disable_mainnet_contacts: false, + ignore_cache: false, }; - let config = BootstrapConfig::empty() - .unwrap() - .with_cache_path(&cache_path); + let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); - let store = BootstrapCacheStore::from_args(args, config).await?; + let mut store = BootstrapCacheStore::empty(config)?; + store.initialize_from_peers_arg(&args).await?; let addrs = store.get_addrs().collect::>(); assert!(addrs.is_empty(), "Should have no peers in local mode"); @@ -345,13 +305,14 @@ async fn test_mixed_valid_invalid_addrs() -> Result<(), Box>(); assert!(addrs.is_empty(), "Should have no peers in local mode"); diff --git a/ant-bootstrap-cache/tests/cache_tests.rs b/ant-bootstrap/tests/cache_tests.rs similarity index 85% rename from ant-bootstrap-cache/tests/cache_tests.rs rename to ant-bootstrap/tests/cache_tests.rs index d3673c3206..aac95579a0 100644 --- a/ant-bootstrap-cache/tests/cache_tests.rs +++ b/ant-bootstrap/tests/cache_tests.rs @@ -6,7 +6,7 @@ // KIND, either express or implied. Please review the Licences for the specific language governing // permissions and limitations relating to use of the SAFE Network Software. -use ant_bootstrap_cache::{BootstrapCacheStore, BootstrapConfig}; +use ant_bootstrap::{BootstrapCacheConfig, BootstrapCacheStore}; use ant_logging::LogBuilder; use libp2p::Multiaddr; use std::time::Duration; @@ -21,11 +21,9 @@ async fn test_cache_store_operations() -> Result<(), Box> let cache_path = temp_dir.path().join("cache.json"); // Create cache store with config - let config = BootstrapConfig::empty() - .unwrap() - .with_cache_path(&cache_path); + let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); - let mut cache_store = BootstrapCacheStore::new(config).await?; + let mut cache_store = BootstrapCacheStore::empty(config)?; // Test adding and retrieving peers let addr: Multiaddr = @@ -51,11 +49,9 @@ async fn test_cache_persistence() -> Result<(), Box> { let cache_path = temp_dir.path().join("cache.json"); // Create first cache store - let config = BootstrapConfig::empty() - .unwrap() - .with_cache_path(&cache_path); + let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); - let mut cache_store1 = BootstrapCacheStore::new(config.clone()).await?; + let mut cache_store1 = BootstrapCacheStore::empty(config.clone())?; // Add a peer and mark it as reliable let addr: Multiaddr = @@ -66,7 +62,8 @@ async fn test_cache_persistence() -> Result<(), Box> { cache_store1.sync_and_save_to_disk(true).await.unwrap(); // Create a new cache store with the same path - let cache_store2 = BootstrapCacheStore::new(config).await?; + let mut cache_store2 = BootstrapCacheStore::empty(config)?; + cache_store2.initialize_from_local_cache().await.unwrap(); let addrs = cache_store2.get_reliable_addrs().collect::>(); assert!(!addrs.is_empty(), "Cache should persist across instances"); @@ -84,10 +81,8 @@ async fn test_cache_reliability_tracking() -> Result<(), Box Result<(), Box> { let cache_path = temp_dir.path().join("cache.json"); // Create cache with small max_peers limit - let mut config = BootstrapConfig::empty() - .unwrap() - .with_cache_path(&cache_path); + let mut config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); config.max_peers = 2; - let mut cache_store = BootstrapCacheStore::new(config).await?; + let mut cache_store = BootstrapCacheStore::empty(config)?; // Add three peers with distinct timestamps let mut addresses = Vec::new(); @@ -171,11 +164,9 @@ async fn test_cache_file_corruption() -> Result<(), Box> let cache_path = temp_dir.path().join("cache.json"); // Create cache with some peers - let config = BootstrapConfig::empty() - .unwrap() - .with_cache_path(&cache_path); + let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); - let mut cache_store = BootstrapCacheStore::new_without_init(config.clone()).await?; + let mut cache_store = BootstrapCacheStore::empty(config.clone())?; // Add a peer let addr: Multiaddr = @@ -189,7 +180,7 @@ async fn test_cache_file_corruption() -> Result<(), Box> tokio::fs::write(&cache_path, "invalid json content").await?; // Create a new cache store - it should handle the corruption gracefully - let mut new_cache_store = BootstrapCacheStore::new_without_init(config).await?; + let mut new_cache_store = BootstrapCacheStore::empty(config)?; let addrs = new_cache_store.get_addrs().collect::>(); assert!(addrs.is_empty(), "Cache should be empty after corruption"); diff --git a/ant-bootstrap-cache/tests/cli_integration_tests.rs b/ant-bootstrap/tests/cli_integration_tests.rs similarity index 76% rename from ant-bootstrap-cache/tests/cli_integration_tests.rs rename to ant-bootstrap/tests/cli_integration_tests.rs index ebc0bb86ea..3afd531b67 100644 --- a/ant-bootstrap-cache/tests/cli_integration_tests.rs +++ b/ant-bootstrap/tests/cli_integration_tests.rs @@ -6,7 +6,8 @@ // KIND, either express or implied. Please review the Licences for the specific language governing // permissions and limitations relating to use of the SAFE Network Software. -use ant_bootstrap_cache::{BootstrapCacheStore, BootstrapConfig, PeersArgs}; +use ant_bootstrap::ANT_PEERS_ENV; +use ant_bootstrap::{BootstrapCacheConfig, BootstrapCacheStore, PeersArgs}; use ant_logging::LogBuilder; use libp2p::Multiaddr; use std::env; @@ -17,12 +18,10 @@ use wiremock::{ Mock, MockServer, ResponseTemplate, }; -async fn setup() -> (TempDir, BootstrapConfig) { +async fn setup() -> (TempDir, BootstrapCacheConfig) { let temp_dir = TempDir::new().unwrap(); let cache_path = temp_dir.path().join("cache.json"); - let config = BootstrapConfig::empty() - .unwrap() - .with_cache_path(&cache_path); + let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); (temp_dir, config) } @@ -37,9 +36,12 @@ async fn test_first_flag() -> Result<(), Box> { addrs: vec![], network_contacts_url: None, local: false, + disable_mainnet_contacts: false, + ignore_cache: false, }; - let store = BootstrapCacheStore::from_args(args, config).await?; + let mut store = BootstrapCacheStore::empty(config.clone())?; + store.initialize_from_peers_arg(&args).await?; let addrs = store.get_addrs().collect::>(); assert!(addrs.is_empty(), "First node should have no addrs"); @@ -60,9 +62,12 @@ async fn test_peer_argument() -> Result<(), Box> { addrs: vec![peer_addr.clone()], network_contacts_url: None, local: false, + disable_mainnet_contacts: false, + ignore_cache: false, }; - let store = BootstrapCacheStore::from_args(args, config).await?; + let mut store = BootstrapCacheStore::empty(config.clone())?; + store.initialize_from_peers_arg(&args).await?; let addrs = store.get_addrs().collect::>(); assert_eq!(addrs.len(), 1, "Should have one addr"); assert_eq!(addrs[0].addr, peer_addr, "Should have the correct address"); @@ -71,29 +76,30 @@ async fn test_peer_argument() -> Result<(), Box> { } #[tokio::test] -async fn test_safe_peers_env() -> Result<(), Box> { +async fn test_ant_peers_env() -> Result<(), Box> { let _guard = LogBuilder::init_single_threaded_tokio_test("cli_integration_tests", false); let temp_dir = TempDir::new()?; let cache_path = temp_dir.path().join("cache.json"); - // Set SAFE_PEERS environment variable + // Set ANT_PEERS_ENV environment variable let addr = "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE"; - env::set_var("SAFE_PEERS", addr); + env::set_var(ANT_PEERS_ENV, addr); let args = PeersArgs { first: false, addrs: vec![], network_contacts_url: None, local: false, + disable_mainnet_contacts: false, + ignore_cache: false, }; - let config = BootstrapConfig::empty() - .unwrap() - .with_cache_path(&cache_path); + let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); - let store = BootstrapCacheStore::from_args(args, config).await?; + let mut store = BootstrapCacheStore::empty(config.clone())?; + store.initialize_from_peers_arg(&args).await?; let addrs = store.get_addrs().collect::>(); // We should have multiple peers (env var + cache/endpoints) @@ -101,10 +107,13 @@ async fn test_safe_peers_env() -> Result<(), Box> { // Verify that our env var peer is included in the set let has_env_peer = addrs.iter().any(|p| p.addr.to_string() == addr); - assert!(has_env_peer, "Should include the peer from env var"); + assert!( + has_env_peer, + "Should include the peer from ANT_PEERS_ENV var" + ); // Clean up - env::remove_var("SAFE_PEERS"); + env::remove_var(ANT_PEERS_ENV); Ok(()) } @@ -131,9 +140,12 @@ async fn test_network_contacts_fallback() -> Result<(), Box>(); assert_eq!( addrs.len(), @@ -152,9 +164,7 @@ async fn test_local_mode() -> Result<(), Box> { let cache_path = temp_dir.path().join("cache.json"); // Create a config with some peers in the cache - let config = BootstrapConfig::empty() - .unwrap() - .with_cache_path(&cache_path); + let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); // Create args with local mode enabled let args = PeersArgs { @@ -162,9 +172,12 @@ async fn test_local_mode() -> Result<(), Box> { addrs: vec![], network_contacts_url: None, local: true, + disable_mainnet_contacts: false, + ignore_cache: false, }; - let store = BootstrapCacheStore::from_args(args, config).await?; + let mut store = BootstrapCacheStore::empty(config.clone())?; + store.initialize_from_peers_arg(&args).await?; let addrs = store.get_addrs().collect::>(); assert!(addrs.is_empty(), "Local mode should have no peers"); @@ -188,18 +201,19 @@ async fn test_test_network_peers() -> Result<(), Box> { "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" .parse()?; - let config = BootstrapConfig::empty() - .unwrap() - .with_cache_path(&cache_path); + let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); let args = PeersArgs { first: false, addrs: vec![peer_addr.clone()], network_contacts_url: None, local: false, + disable_mainnet_contacts: false, + ignore_cache: false, }; - let store = BootstrapCacheStore::from_args(args, config).await?; + let mut store = BootstrapCacheStore::empty(config.clone())?; + store.initialize_from_peers_arg(&args).await?; let addrs = store.get_addrs().collect::>(); assert_eq!(addrs.len(), 1, "Should have exactly one test network peer"); assert_eq!( @@ -228,9 +242,7 @@ async fn test_peers_update_cache() -> Result<(), Box> { "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" .parse()?; - let config = BootstrapConfig::empty() - .unwrap() - .with_cache_path(&cache_path); + let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); // Create args with peers but no test network mode let args = PeersArgs { @@ -238,9 +250,12 @@ async fn test_peers_update_cache() -> Result<(), Box> { addrs: vec![peer_addr.clone()], network_contacts_url: None, local: false, + disable_mainnet_contacts: false, + ignore_cache: false, }; - let store = BootstrapCacheStore::from_args(args, config).await?; + let mut store = BootstrapCacheStore::empty(config.clone())?; + store.initialize_from_peers_arg(&args).await?; let addrs = store.get_addrs().collect::>(); assert_eq!(addrs.len(), 1, "Should have one peer"); assert_eq!(addrs[0].addr, peer_addr, "Should have the correct peer"); diff --git a/ant-bootstrap-cache/tests/integration_tests.rs b/ant-bootstrap/tests/integration_tests.rs similarity index 91% rename from ant-bootstrap-cache/tests/integration_tests.rs rename to ant-bootstrap/tests/integration_tests.rs index 53456c2af2..781330e305 100644 --- a/ant-bootstrap-cache/tests/integration_tests.rs +++ b/ant-bootstrap/tests/integration_tests.rs @@ -6,7 +6,7 @@ // KIND, either express or implied. Please review the Licences for the specific language governing // permissions and limitations relating to use of the SAFE Network Software. -use ant_bootstrap_cache::{BootstrapEndpoints, InitialPeerDiscovery}; +use ant_bootstrap::{BootstrapEndpoints, ContactsFetcher}; use libp2p::Multiaddr; use tracing_subscriber::{fmt, EnvFilter}; use url::Url; @@ -25,8 +25,8 @@ fn init_logging() { #[tokio::test] async fn test_fetch_from_amazon_s3() { init_logging(); - let discovery = InitialPeerDiscovery::new().unwrap(); - let addrs = discovery.fetch_bootstrap_addresses().await.unwrap(); + let fetcher = ContactsFetcher::with_mainnet_endpoints().unwrap(); + let addrs = fetcher.fetch_bootstrap_addresses().await.unwrap(); // We should get some peers assert!(!addrs.is_empty(), "Expected to find some peers from S3"); @@ -63,9 +63,9 @@ async fn test_individual_s3_endpoints() { let endpoint = format!("{}/peers", mock_server.uri()) .parse::() .unwrap(); - let discovery = InitialPeerDiscovery::with_endpoints(vec![endpoint.clone()]).unwrap(); + let fetcher = ContactsFetcher::with_endpoints(vec![endpoint.clone()]).unwrap(); - match discovery.fetch_bootstrap_addresses().await { + match fetcher.fetch_bootstrap_addresses().await { Ok(peers) => { println!( "Successfully fetched {} peers from {}", @@ -103,8 +103,8 @@ async fn test_individual_s3_endpoints() { #[tokio::test] async fn test_response_format() { init_logging(); - let discovery = InitialPeerDiscovery::new().unwrap(); - let addrs = discovery.fetch_bootstrap_addresses().await.unwrap(); + let fetcher = ContactsFetcher::with_mainnet_endpoints().unwrap(); + let addrs = fetcher.fetch_bootstrap_addresses().await.unwrap(); // Get the first peer to check format let first_peer = addrs.first().expect("Expected at least one peer"); @@ -155,9 +155,9 @@ async fn test_json_endpoint_format() { .await; let endpoint = mock_server.uri().parse::().unwrap(); - let discovery = InitialPeerDiscovery::with_endpoints(vec![endpoint.clone()]).unwrap(); + let fetcher = ContactsFetcher::with_endpoints(vec![endpoint.clone()]).unwrap(); - let addrs = discovery.fetch_bootstrap_addresses().await.unwrap(); + let addrs = fetcher.fetch_bootstrap_addresses().await.unwrap(); assert_eq!(addrs.len(), 2); // Verify peer addresses diff --git a/ant-logging/src/layers.rs b/ant-logging/src/layers.rs index 2d26be3521..be0ac5668c 100644 --- a/ant-logging/src/layers.rs +++ b/ant-logging/src/layers.rs @@ -274,7 +274,7 @@ fn get_logging_targets(logging_env_value: &str) -> Result> ("antctl".to_string(), Level::TRACE), ("antctld".to_string(), Level::TRACE), // libs - ("ant_bootstrap_cache".to_string(), Level::TRACE), + ("ant_bootstrap".to_string(), Level::TRACE), ("ant_build_info".to_string(), Level::TRACE), ("ant_evm".to_string(), Level::TRACE), ("ant_logging".to_string(), Level::TRACE), From 460bc67297a7cf23515ec6b9b2736b01be4fccfb Mon Sep 17 00:00:00 2001 From: Roland Sherwin Date: Wed, 4 Dec 2024 20:49:32 +0530 Subject: [PATCH 11/21] feat(bootstrap): impl bootstrap cache into the codebase --- Cargo.lock | 3 + ant-cli/Cargo.toml | 1 + ant-cli/src/access/network.rs | 5 +- ant-cli/src/commands.rs | 3 +- ant-cli/src/opt.rs | 8 +-- ant-networking/Cargo.toml | 1 + ant-networking/src/driver.rs | 111 +++++++++++++++++++++++++++--- ant-networking/src/event/kad.rs | 3 +- ant-networking/src/event/mod.rs | 10 ++- ant-networking/src/event/swarm.rs | 24 ++++++- ant-node/Cargo.toml | 1 + ant-node/src/bin/antnode/main.rs | 23 +++---- ant-node/src/error.rs | 2 + ant-node/src/node.rs | 49 ++++++++++--- ant-node/src/python.rs | 4 +- 15 files changed, 199 insertions(+), 49 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bed4a26d61..0fa6aa094e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -759,6 +759,7 @@ dependencies = [ name = "ant-cli" version = "0.1.5" dependencies = [ + "ant-bootstrap", "ant-build-info", "ant-logging", "ant-peers-acquisition", @@ -852,6 +853,7 @@ name = "ant-networking" version = "0.19.5" dependencies = [ "aes-gcm-siv", + "ant-bootstrap", "ant-build-info", "ant-evm", "ant-protocol", @@ -898,6 +900,7 @@ dependencies = [ name = "ant-node" version = "0.112.6" dependencies = [ + "ant-bootstrap", "ant-build-info", "ant-evm", "ant-logging", diff --git a/ant-cli/Cargo.toml b/ant-cli/Cargo.toml index 7f1983fcfa..05cbd82eac 100644 --- a/ant-cli/Cargo.toml +++ b/ant-cli/Cargo.toml @@ -25,6 +25,7 @@ name = "files" harness = false [dependencies] +ant-bootstrap = { path = "../ant-bootstrap", version = "0.1.0" } ant-build-info = { path = "../ant-build-info", version = "0.1.19" } ant-logging = { path = "../ant-logging", version = "0.2.40" } ant-peers-acquisition = { path = "../ant-peers-acquisition", version = "0.5.7" } diff --git a/ant-cli/src/access/network.rs b/ant-cli/src/access/network.rs index fb7d5fe597..45f049e31f 100644 --- a/ant-cli/src/access/network.rs +++ b/ant-cli/src/access/network.rs @@ -6,15 +6,14 @@ // KIND, either express or implied. Please review the Licences for the specific language governing // permissions and limitations relating to use of the SAFE Network Software. -use ant_peers_acquisition::PeersArgs; -use ant_peers_acquisition::ANT_PEERS_ENV; +use ant_bootstrap::{PeersArgs, ANT_PEERS_ENV}; use autonomi::Multiaddr; use color_eyre::eyre::Context; use color_eyre::Result; use color_eyre::Section; pub async fn get_peers(peers: PeersArgs) -> Result> { - peers.get_peers().await + peers.get_addrs().await .wrap_err("Please provide valid Network peers to connect to") .with_suggestion(|| format!("make sure you've provided network peers using the --peers option or the {ANT_PEERS_ENV} env var")) .with_suggestion(|| "a peer address looks like this: /ip4/42.42.42.42/udp/4242/quic-v1/p2p/B64nodePeerIDvdjb3FAJF4ks3moreBase64CharsHere") diff --git a/ant-cli/src/commands.rs b/ant-cli/src/commands.rs index 663898b6ea..a1d1fd487a 100644 --- a/ant-cli/src/commands.rs +++ b/ant-cli/src/commands.rs @@ -11,11 +11,10 @@ mod register; mod vault; mod wallet; +use crate::opt::Opt; use clap::Subcommand; use color_eyre::Result; -use crate::opt::Opt; - #[derive(Subcommand, Debug)] pub enum SubCmd { /// Operations related to file handling. diff --git a/ant-cli/src/opt.rs b/ant-cli/src/opt.rs index 804156e4bd..3e84379fc0 100644 --- a/ant-cli/src/opt.rs +++ b/ant-cli/src/opt.rs @@ -6,14 +6,12 @@ // KIND, either express or implied. Please review the Licences for the specific language governing // permissions and limitations relating to use of the SAFE Network Software. -use std::time::Duration; - +use crate::commands::SubCmd; +use ant_bootstrap::PeersArgs; use ant_logging::{LogFormat, LogOutputDest}; -use ant_peers_acquisition::PeersArgs; use clap::Parser; use color_eyre::Result; - -use crate::commands::SubCmd; +use std::time::Duration; // Please do not remove the blank lines in these doc comments. // They are used for inserting line breaks when the help menu is rendered in the UI. diff --git a/ant-networking/Cargo.toml b/ant-networking/Cargo.toml index 98613fabf8..e1a9d7d20c 100644 --- a/ant-networking/Cargo.toml +++ b/ant-networking/Cargo.toml @@ -21,6 +21,7 @@ websockets = ["libp2p/tcp"] [dependencies] aes-gcm-siv = "0.11.1" +ant-bootstrap = { path = "../ant-bootstrap", version = "0.1.0" } ant-build-info = { path = "../ant-build-info", version = "0.1.19" } ant-evm = { path = "../ant-evm", version = "0.1.4" } ant-protocol = { path = "../ant-protocol", version = "0.17.15" } diff --git a/ant-networking/src/driver.rs b/ant-networking/src/driver.rs index a9792700da..87df73825b 100644 --- a/ant-networking/src/driver.rs +++ b/ant-networking/src/driver.rs @@ -30,6 +30,7 @@ use crate::{ }; use crate::{transport, NodeIssue}; +use ant_bootstrap::BootstrapCacheStore; use ant_evm::PaymentQuote; use ant_protocol::{ messages::{ChunkProof, Nonce, Request, Response}, @@ -71,8 +72,11 @@ use std::{ num::NonZeroUsize, path::PathBuf, }; -use tokio::sync::{mpsc, oneshot}; use tokio::time::Duration; +use tokio::{ + sync::{mpsc, oneshot}, + time::Interval, +}; use tracing::warn; use xor_name::XorName; @@ -260,13 +264,13 @@ pub(super) struct NodeBehaviour { #[derive(Debug)] pub struct NetworkBuilder { + bootstrap_cache: Option, is_behind_home_network: bool, keypair: Keypair, local: bool, listen_addr: Option, request_timeout: Option, concurrency_limit: Option, - initial_peers: Vec, #[cfg(feature = "open-metrics")] metrics_registries: Option, #[cfg(feature = "open-metrics")] @@ -278,13 +282,13 @@ pub struct NetworkBuilder { impl NetworkBuilder { pub fn new(keypair: Keypair, local: bool) -> Self { Self { + bootstrap_cache: None, is_behind_home_network: false, keypair, local, listen_addr: None, request_timeout: None, concurrency_limit: None, - initial_peers: Default::default(), #[cfg(feature = "open-metrics")] metrics_registries: None, #[cfg(feature = "open-metrics")] @@ -294,6 +298,10 @@ impl NetworkBuilder { } } + pub fn bootstrap_cache(&mut self, bootstrap_cache: BootstrapCacheStore) { + self.bootstrap_cache = Some(bootstrap_cache); + } + pub fn is_behind_home_network(&mut self, enable: bool) { self.is_behind_home_network = enable; } @@ -310,10 +318,6 @@ impl NetworkBuilder { self.concurrency_limit = Some(concurrency_limit); } - pub fn initial_peers(&mut self, initial_peers: Vec) { - self.initial_peers = initial_peers; - } - /// Set the registries used inside the metrics server. /// Configure the `metrics_server_port` to enable the metrics server. #[cfg(feature = "open-metrics")] @@ -720,6 +724,7 @@ impl NetworkBuilder { close_group: Vec::with_capacity(CLOSE_GROUP_SIZE), peers_in_rt: 0, bootstrap, + bootstrap_cache: self.bootstrap_cache, relay_manager, connected_relay_clients: Default::default(), external_address_manager, @@ -815,6 +820,7 @@ pub struct SwarmDriver { pub(crate) close_group: Vec, pub(crate) peers_in_rt: usize, pub(crate) bootstrap: ContinuousNetworkDiscover, + pub(crate) bootstrap_cache: Option, pub(crate) external_address_manager: Option, pub(crate) relay_manager: Option, /// The peers that are using our relay service. @@ -843,7 +849,7 @@ pub struct SwarmDriver { pub(crate) bootstrap_peers: BTreeMap, HashSet>, // Peers that having live connection to. Any peer got contacted during kad network query // will have live connection established. And they may not appear in the RT. - pub(crate) live_connected_peers: BTreeMap, + pub(crate) live_connected_peers: BTreeMap, /// The list of recently established connections ids. /// This is used to prevent log spamming. pub(crate) latest_established_connection_ids: HashMap, @@ -876,6 +882,24 @@ impl SwarmDriver { let mut set_farthest_record_interval = interval(CLOSET_RECORD_CHECK_INTERVAL); let mut relay_manager_reservation_interval = interval(RELAY_MANAGER_RESERVATION_INTERVAL); + let mut bootstrap_cache_save_interval = self.bootstrap_cache.as_ref().and_then(|cache| { + if cache.config().disable_cache_writing { + None + } else { + // add a variance of 10% to the interval, to avoid all nodes writing to disk at the same time. + let duration = + Self::duration_with_variance(cache.config().min_cache_save_duration, 10); + Some(interval(duration)) + } + }); + if let Some(interval) = bootstrap_cache_save_interval.as_mut() { + interval.tick().await; // first tick completes immediately + info!( + "Bootstrap cache save interval is set to {:?}", + interval.period() + ); + } + // temporarily skip processing IncomingConnectionError swarm event to avoid log spamming let mut previous_incoming_connection_error_event = None; loop { @@ -1005,6 +1029,37 @@ impl SwarmDriver { relay_manager.try_connecting_to_relay(&mut self.swarm, &self.bad_nodes) } }, + Some(()) = Self::conditional_interval(&mut bootstrap_cache_save_interval) => { + let Some(bootstrap_cache) = self.bootstrap_cache.as_mut() else { + continue; + }; + let Some(current_interval) = bootstrap_cache_save_interval.as_mut() else { + continue; + }; + + if let Err(err) = bootstrap_cache.sync_and_save_to_disk(true).await { + error!("Failed to save bootstrap cache: {err}"); + } + + if current_interval.period() >= bootstrap_cache.config().max_cache_save_duration { + continue; + } + + // add a variance of 1% to the max interval to avoid all nodes writing to disk at the same time. + let max_cache_save_duration = + Self::duration_with_variance(bootstrap_cache.config().max_cache_save_duration, 1); + + // scale up the interval until we reach the max + let new_duration = Duration::from_secs( + std::cmp::min( + current_interval.period().as_secs() * bootstrap_cache.config().cache_save_scaling_factor, + max_cache_save_duration.as_secs(), + )); + info!("Scaling up the bootstrap cache save interval to {new_duration:?}"); + *current_interval = interval(new_duration); + current_interval.tick().await; // first tick completes immediately + + }, } } } @@ -1156,13 +1211,35 @@ impl SwarmDriver { info!("Listening on {id:?} with addr: {addr:?}"); Ok(()) } + + /// Returns a new duration that is within +/- variance of the provided duration. + fn duration_with_variance(duration: Duration, variance: u32) -> Duration { + let actual_variance = duration / variance; + let random_adjustment = + Duration::from_secs(rand::thread_rng().gen_range(0..actual_variance.as_secs())); + if random_adjustment.as_secs() % 2 == 0 { + duration - random_adjustment + } else { + duration + random_adjustment + } + } + + /// To tick an optional interval inside tokio::select! without looping forever. + async fn conditional_interval(i: &mut Option) -> Option<()> { + match i { + Some(i) => { + i.tick().await; + Some(()) + } + None => None, + } + } } #[cfg(test)] mod tests { use super::check_and_wipe_storage_dir_if_necessary; - - use std::{fs, io::Read}; + use std::{fs, io::Read, time::Duration}; #[tokio::test] async fn version_file_update() { @@ -1219,4 +1296,18 @@ mod tests { // The storage_dir shall be removed as version_key changed assert!(fs::metadata(storage_dir.clone()).is_err()); } + + #[tokio::test] + async fn test_duration_variance_fn() { + let duration = Duration::from_secs(100); + let variance = 10; + for _ in 0..10000 { + let new_duration = crate::SwarmDriver::duration_with_variance(duration, variance); + if new_duration < duration - duration / variance + || new_duration > duration + duration / variance + { + panic!("new_duration: {new_duration:?} is not within the expected range",); + } + } + } } diff --git a/ant-networking/src/event/kad.rs b/ant-networking/src/event/kad.rs index 5934b11bfa..1af95f9d1d 100644 --- a/ant-networking/src/event/kad.rs +++ b/ant-networking/src/event/kad.rs @@ -242,11 +242,12 @@ impl SwarmDriver { peer, is_new_peer, old_peer, + addresses, .. } => { event_string = "kad_event::RoutingUpdated"; if is_new_peer { - self.update_on_peer_addition(peer); + self.update_on_peer_addition(peer, addresses); // This should only happen once if self.bootstrap.notify_new_peer() { diff --git a/ant-networking/src/event/mod.rs b/ant-networking/src/event/mod.rs index ad44f83da2..ae6e2aefca 100644 --- a/ant-networking/src/event/mod.rs +++ b/ant-networking/src/event/mod.rs @@ -16,7 +16,7 @@ use custom_debug::Debug as CustomDebug; #[cfg(feature = "local")] use libp2p::mdns; use libp2p::{ - kad::{Record, RecordKey, K_VALUE}, + kad::{Addresses, Record, RecordKey, K_VALUE}, request_response::ResponseChannel as PeerResponseChannel, Multiaddr, PeerId, }; @@ -232,7 +232,7 @@ impl SwarmDriver { } /// Update state on addition of a peer to the routing table. - pub(crate) fn update_on_peer_addition(&mut self, added_peer: PeerId) { + pub(crate) fn update_on_peer_addition(&mut self, added_peer: PeerId, addresses: Addresses) { self.peers_in_rt = self.peers_in_rt.saturating_add(1); let n_peers = self.peers_in_rt; info!("New peer added to routing table: {added_peer:?}, now we have #{n_peers} connected peers"); @@ -240,6 +240,12 @@ impl SwarmDriver { #[cfg(feature = "loud")] println!("New peer added to routing table: {added_peer:?}, now we have #{n_peers} connected peers"); + if let Some(bootstrap_cache) = &mut self.bootstrap_cache { + for addr in addresses.iter() { + bootstrap_cache.add_addr(addr.clone()); + } + } + self.log_kbuckets(&added_peer); self.send_event(NetworkEvent::PeerAdded(added_peer, self.peers_in_rt)); diff --git a/ant-networking/src/event/swarm.rs b/ant-networking/src/event/swarm.rs index c5fad1256b..6d0c283a0c 100644 --- a/ant-networking/src/event/swarm.rs +++ b/ant-networking/src/event/swarm.rs @@ -375,8 +375,17 @@ impl SwarmDriver { let _ = self.live_connected_peers.insert( connection_id, - (peer_id, Instant::now() + Duration::from_secs(60)), + ( + peer_id, + endpoint.get_remote_address().clone(), + Instant::now() + Duration::from_secs(60), + ), ); + + if let Some(bootstrap_cache) = self.bootstrap_cache.as_mut() { + bootstrap_cache.update_addr_status(endpoint.get_remote_address(), true); + } + self.insert_latest_established_connection_ids( connection_id, endpoint.get_remote_address(), @@ -406,7 +415,7 @@ impl SwarmDriver { } => { event_string = "OutgoingConnErr"; warn!("OutgoingConnectionError to {failed_peer_id:?} on {connection_id:?} - {error:?}"); - let _ = self.live_connected_peers.remove(&connection_id); + let connection_details = self.live_connected_peers.remove(&connection_id); self.record_connection_metrics(); // we need to decide if this was a critical error and the peer should be removed from the routing table @@ -506,6 +515,15 @@ impl SwarmDriver { } }; + // Just track failures during outgoing connection with `failed_peer_id` inside the bootstrap cache. + // OutgoingConnectionError without peer_id can happen when dialing multiple addresses of a peer. + // And similarly IncomingConnectionError can happen when a peer has multiple transports/listen addrs. + if let (Some((_, failed_addr, _)), Some(bootstrap_cache)) = + (connection_details, self.bootstrap_cache.as_mut()) + { + bootstrap_cache.update_addr_status(&failed_addr, false); + } + if should_clean_peer { warn!("Tracking issue of {failed_peer_id:?}. Clearing it out for now"); @@ -641,7 +659,7 @@ impl SwarmDriver { self.last_connection_pruning_time = Instant::now(); let mut removed_conns = 0; - self.live_connected_peers.retain(|connection_id, (peer_id, timeout_time)| { + self.live_connected_peers.retain(|connection_id, (peer_id, _addr, timeout_time)| { // skip if timeout isn't reached yet if Instant::now() < *timeout_time { diff --git a/ant-node/Cargo.toml b/ant-node/Cargo.toml index a1a5700b64..283dc940a3 100644 --- a/ant-node/Cargo.toml +++ b/ant-node/Cargo.toml @@ -28,6 +28,7 @@ upnp = ["ant-networking/upnp"] websockets = ["ant-networking/websockets"] [dependencies] +ant-bootstrap = { path = "../ant-bootstrap", version = "0.1.0" } ant-build-info = { path = "../ant-build-info", version = "0.1.19" } ant-evm = { path = "../ant-evm", version = "0.1.4" } ant-logging = { path = "../ant-logging", version = "0.2.40" } diff --git a/ant-node/src/bin/antnode/main.rs b/ant-node/src/bin/antnode/main.rs index cebbc0857c..caae71685f 100644 --- a/ant-node/src/bin/antnode/main.rs +++ b/ant-node/src/bin/antnode/main.rs @@ -13,12 +13,12 @@ mod rpc_service; mod subcommands; use crate::subcommands::EvmNetworkCommand; +use ant_bootstrap::{BootstrapCacheConfig, BootstrapCacheStore, PeersArgs}; use ant_evm::{get_evm_network_from_env, EvmNetwork, RewardsAddress}; #[cfg(feature = "metrics")] use ant_logging::metrics::init_metrics; use ant_logging::{Level, LogFormat, LogOutputDest, ReloadHandle}; use ant_node::{Marker, NodeBuilder, NodeEvent, NodeEventsReceiver}; -use ant_peers_acquisition::PeersArgs; use ant_protocol::{ node::get_antnode_root_dir, node_rpc::{NodeCtrl, StopResult}, @@ -172,12 +172,6 @@ struct Opt { #[clap(long)] rpc: Option, - /// Run the node in local mode. - /// - /// When this flag is set, we will not filter out local addresses that we observe. - #[clap(long)] - local: bool, - /// Specify the owner(readable discord user name). #[clap(long)] owner: Option, @@ -271,7 +265,9 @@ fn main() -> Result<()> { init_logging(&opt, keypair.public().to_peer_id())?; let rt = Runtime::new()?; - let bootstrap_peers = rt.block_on(opt.peers.get_peers())?; + let mut bootstrap_cache = BootstrapCacheStore::empty(BootstrapCacheConfig::default_config()?)?; + rt.block_on(bootstrap_cache.initialize_from_peers_arg(&opt.peers))?; + let msg = format!( "Running {} v{}", env!("CARGO_BIN_NAME"), @@ -285,7 +281,10 @@ fn main() -> Result<()> { ant_build_info::git_info() ); - info!("Node started with initial_peers {bootstrap_peers:?}"); + info!( + "Node started with bootstrap cache containing {} peers", + bootstrap_cache.peer_count() + ); // Create a tokio runtime per `run_node` attempt, this ensures // any spawned tasks are closed before we would attempt to run @@ -299,13 +298,13 @@ fn main() -> Result<()> { rewards_address, evm_network, node_socket_addr, - bootstrap_peers, - opt.local, + opt.peers.local, root_dir, #[cfg(feature = "upnp")] opt.upnp, ); - node_builder.is_behind_home_network = opt.home_network; + node_builder.bootstrap_cache(bootstrap_cache); + node_builder.is_behind_home_network(opt.home_network); #[cfg(feature = "open-metrics")] let mut node_builder = node_builder; // if enable flag is provided or only if the port is specified then enable the server by setting Some() diff --git a/ant-node/src/error.rs b/ant-node/src/error.rs index 86aba2df5c..4a80796eb2 100644 --- a/ant-node/src/error.rs +++ b/ant-node/src/error.rs @@ -81,6 +81,8 @@ pub enum Error { // ---------- Initialize Errors #[error("Failed to generate a reward key")] FailedToGenerateRewardKey, + #[error("Cannot set both initial_peers and bootstrap_cache")] + InitialPeersAndBootstrapCacheSet, // ---------- Miscellaneous Errors #[error("Failed to obtain node's current port")] diff --git a/ant-node/src/node.rs b/ant-node/src/node.rs index c1ea235239..c3b2ab710c 100644 --- a/ant-node/src/node.rs +++ b/ant-node/src/node.rs @@ -11,7 +11,8 @@ use super::{ }; #[cfg(feature = "open-metrics")] use crate::metrics::NodeMetricsRecorder; -use crate::RunningNode; +use crate::{error::Error, RunningNode}; +use ant_bootstrap::BootstrapCacheStore; use ant_evm::{AttoTokens, RewardsAddress}; #[cfg(feature = "open-metrics")] use ant_networking::MetricsRegistries; @@ -81,41 +82,42 @@ const NETWORK_DENSITY_SAMPLING_INTERVAL_MAX_S: u64 = 200; /// Helper to build and run a Node pub struct NodeBuilder { + bootstrap_cache: Option, + initial_peers: Vec, identity_keypair: Keypair, evm_address: RewardsAddress, evm_network: EvmNetwork, addr: SocketAddr, - initial_peers: Vec, local: bool, root_dir: PathBuf, #[cfg(feature = "open-metrics")] /// Set to Some to enable the metrics server metrics_server_port: Option, /// Enable hole punching for nodes connecting from home networks. - pub is_behind_home_network: bool, + is_behind_home_network: bool, #[cfg(feature = "upnp")] upnp: bool, } impl NodeBuilder { - /// Instantiate the builder - #[expect(clippy::too_many_arguments)] + /// Instantiate the builder. The initial peers can either be supplied via the `initial_peers` method + /// or fetched from the bootstrap cache set using `bootstrap_cache` method. pub fn new( identity_keypair: Keypair, evm_address: RewardsAddress, evm_network: EvmNetwork, addr: SocketAddr, - initial_peers: Vec, local: bool, root_dir: PathBuf, #[cfg(feature = "upnp")] upnp: bool, ) -> Self { Self { + bootstrap_cache: None, + initial_peers: vec![], identity_keypair, evm_address, evm_network, addr, - initial_peers, local, root_dir, #[cfg(feature = "open-metrics")] @@ -132,6 +134,21 @@ impl NodeBuilder { self.metrics_server_port = port; } + /// Set the initialized bootstrap cache. This is mutually exclusive with `initial_peers` + pub fn bootstrap_cache(&mut self, cache: BootstrapCacheStore) { + self.bootstrap_cache = Some(cache); + } + + /// Set the initial peers to dial at startup. This is mutually exclusive with `bootstrap_cache` + pub fn initial_peers(&mut self, peers: Vec) { + self.initial_peers = peers; + } + + /// Set the flag to indicate if the node is behind a home network + pub fn is_behind_home_network(&mut self, is_behind_home_network: bool) { + self.is_behind_home_network = is_behind_home_network; + } + /// Asynchronously runs a new node instance, setting up the swarm driver, /// creating a data storage, and handling network events. Returns the /// created `RunningNode` which contains a `NodeEventsChannel` for listening @@ -160,11 +177,25 @@ impl NodeBuilder { None }; + if !self.initial_peers.is_empty() && self.bootstrap_cache.is_some() { + return Err(Error::InitialPeersAndBootstrapCacheSet); + } + + let initial_peers = if !self.initial_peers.is_empty() { + self.initial_peers.clone() + } else if let Some(cache) = &self.bootstrap_cache { + cache.get_unique_peer_addr().cloned().collect() + } else { + vec![] + }; + network_builder.listen_addr(self.addr); #[cfg(feature = "open-metrics")] network_builder.metrics_server_port(self.metrics_server_port); - network_builder.initial_peers(self.initial_peers.clone()); network_builder.is_behind_home_network(self.is_behind_home_network); + if let Some(cache) = self.bootstrap_cache { + network_builder.bootstrap_cache(cache); + } #[cfg(feature = "upnp")] network_builder.upnp(self.upnp); @@ -176,7 +207,7 @@ impl NodeBuilder { let node = NodeInner { network: network.clone(), events_channel: node_events_channel.clone(), - initial_peers: self.initial_peers, + initial_peers, reward_address: self.evm_address, #[cfg(feature = "open-metrics")] metrics_recorder, diff --git a/ant-node/src/python.rs b/ant-node/src/python.rs index 954609b830..3d50520940 100644 --- a/ant-node/src/python.rs +++ b/ant-node/src/python.rs @@ -102,13 +102,13 @@ impl AntNode { rewards_address, evm_network, node_socket_addr, - initial_peers, local, root_dir.unwrap_or_else(|| PathBuf::from(".")), #[cfg(feature = "upnp")] false, ); - node_builder.is_behind_home_network = home_network; + node_builder.initial_peers(initial_peers); + node_builder.is_behind_home_network(home_network); node_builder .build_and_run() From 65e21706a3ab0ccd82d4d2917137921fec22988d Mon Sep 17 00:00:00 2001 From: Roland Sherwin Date: Wed, 4 Dec 2024 23:33:10 +0530 Subject: [PATCH 12/21] feat: remove ant-peers-acquisition and use ant-bootstrap instead - This also removes the `network-contact` feature flag. - The flag was used to indicate if we should connect to the mainnet or the testnet, which can easily be done with PeersArgs::testnet flag --- Cargo.lock | 25 +-- Cargo.toml | 1 - Justfile | 8 +- README.md | 19 +- ant-cli/Cargo.toml | 4 +- ant-cli/src/main.rs | 2 +- ant-logging/src/layers.rs | 1 - ant-node-manager/Cargo.toml | 3 +- ant-node-manager/src/bin/cli/main.rs | 4 +- ant-node-manager/src/cmd/auditor.rs | 2 +- ant-node-manager/src/cmd/faucet.rs | 2 +- ant-node-manager/src/cmd/local.rs | 6 +- ant-node-manager/src/cmd/mod.rs | 3 - ant-node-manager/src/cmd/nat_detection.rs | 8 +- ant-node-manager/src/cmd/node.rs | 8 +- ant-node-rpc-client/Cargo.toml | 1 - ant-node/Cargo.toml | 4 +- ant-node/src/bin/antnode/main.rs | 2 +- ant-peers-acquisition/Cargo.toml | 31 --- ant-peers-acquisition/README.md | 5 - ant-peers-acquisition/src/error.rs | 19 -- ant-peers-acquisition/src/lib.rs | 242 ---------------------- autonomi/Cargo.toml | 2 - node-launchpad/Cargo.toml | 2 +- node-launchpad/src/app.rs | 4 +- node-launchpad/src/bin/tui/main.rs | 2 +- node-launchpad/src/components/status.rs | 2 +- node-launchpad/src/node_mgmt.rs | 2 +- node-launchpad/src/utils.rs | 8 +- test-utils/Cargo.toml | 4 - test-utils/src/lib.rs | 10 +- 31 files changed, 49 insertions(+), 387 deletions(-) delete mode 100644 ant-peers-acquisition/Cargo.toml delete mode 100644 ant-peers-acquisition/README.md delete mode 100644 ant-peers-acquisition/src/error.rs delete mode 100644 ant-peers-acquisition/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 0fa6aa094e..607e15070a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -762,7 +762,6 @@ dependencies = [ "ant-bootstrap", "ant-build-info", "ant-logging", - "ant-peers-acquisition", "autonomi", "clap", "color-eyre", @@ -905,7 +904,6 @@ dependencies = [ "ant-evm", "ant-logging", "ant-networking", - "ant-peers-acquisition", "ant-protocol", "ant-registers", "ant-service-management", @@ -959,10 +957,10 @@ dependencies = [ name = "ant-node-manager" version = "0.11.3" dependencies = [ + "ant-bootstrap", "ant-build-info", "ant-evm", "ant-logging", - "ant-peers-acquisition", "ant-protocol", "ant-releases", "ant-service-management", @@ -1005,7 +1003,6 @@ dependencies = [ "ant-build-info", "ant-logging", "ant-node", - "ant-peers-acquisition", "ant-protocol", "ant-service-management", "async-trait", @@ -1023,22 +1020,6 @@ dependencies = [ "tracing-core", ] -[[package]] -name = "ant-peers-acquisition" -version = "0.5.7" -dependencies = [ - "ant-protocol", - "clap", - "lazy_static", - "libp2p 0.54.1 (git+https://github.com/maqi/rust-libp2p.git?branch=kad_0.46.2)", - "rand 0.8.5", - "reqwest 0.12.9", - "thiserror 1.0.69", - "tokio", - "tracing", - "url", -] - [[package]] name = "ant-protocol" version = "0.17.15" @@ -1556,7 +1537,6 @@ dependencies = [ "ant-evm", "ant-logging", "ant-networking", - "ant-peers-acquisition", "ant-protocol", "ant-registers", "bip39", @@ -6586,10 +6566,10 @@ dependencies = [ name = "node-launchpad" version = "0.4.5" dependencies = [ + "ant-bootstrap", "ant-build-info", "ant-evm", "ant-node-manager", - "ant-peers-acquisition", "ant-protocol", "ant-releases", "ant-service-management", @@ -9594,7 +9574,6 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" name = "test-utils" version = "0.4.11" dependencies = [ - "ant-peers-acquisition", "bytes", "color-eyre", "dirs-next", diff --git a/Cargo.toml b/Cargo.toml index eeafdece63..6840a1e40d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,7 +11,6 @@ members = [ "ant-node", "ant-node-manager", "ant-node-rpc-client", - "ant-peers-acquisition", "ant-protocol", "ant-registers", "ant-service-management", diff --git a/Justfile b/Justfile index c80fcf1b1a..2eb3768d03 100644 --- a/Justfile +++ b/Justfile @@ -68,16 +68,16 @@ build-release-artifacts arch nightly="false": cargo binstall --no-confirm cross cross build --release --target $arch --bin nat-detection $nightly_feature cross build --release --target $arch --bin node-launchpad $nightly_feature - cross build --release --features network-contacts,websockets --target $arch --bin ant $nightly_feature - cross build --release --features network-contacts,websockets --target $arch --bin antnode $nightly_feature + cross build --release --features websockets --target $arch --bin ant $nightly_feature + cross build --release --features websockets --target $arch --bin antnode $nightly_feature cross build --release --target $arch --bin antctl $nightly_feature cross build --release --target $arch --bin antctld $nightly_feature cross build --release --target $arch --bin antnode_rpc_client $nightly_feature else cargo build --release --target $arch --bin nat-detection $nightly_feature cargo build --release --target $arch --bin node-launchpad $nightly_feature - cargo build --release --features network-contacts,websockets --target $arch --bin ant $nightly_feature - cargo build --release --features network-contacts,websockets --target $arch --bin antnode $nightly_feature + cargo build --release --features websockets --target $arch --bin ant $nightly_feature + cargo build --release --features websockets --target $arch --bin antnode $nightly_feature cargo build --release --target $arch --bin antctl $nightly_feature cargo build --release --target $arch --bin antctld $nightly_feature cargo build --release --target $arch --bin antnode_rpc_client $nightly_feature diff --git a/README.md b/README.md index 014ea96496..bac5d08181 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ You should build from the `stable` branch, as follows: ``` git checkout stable -cargo build --release --features network-contacts --bin antnode +cargo build --release --bin antnode ``` #### Running the Node @@ -40,23 +40,12 @@ cargo build --release --features network-contacts --bin antnode To run a node and receive rewards, you need to specify your Ethereum address as a parameter. Rewards are paid to the specified address. ``` -cargo run --release --bin antnode --features network-contacts -- --rewards-address +cargo run --release --bin antnode -- --rewards-address ``` More options about EVM Network below. ### For Developers - -#### Build - -You can build `autonomi` and `antnode` with the `network-contacts` feature: - -``` -cargo build --release --features network-contacts --bin autonomi -cargo build --release --features network-contacts --bin antnode -``` - - #### Main Crates - [Autonomi API](https://github.com/maidsafe/autonomi/blob/main/autonomi/README.md) The client APIs @@ -97,8 +86,8 @@ WASM support for the autonomi API is currently under active development. More do used by the autonomi network. - [Registers](https://github.com/maidsafe/autonomi/blob/main/ant-registers/README.md) The registers crate, used for the Register CRDT data type on the network. -- [Peers Acquisition](https://github.com/maidsafe/autonomi/blob/main/ant-peers-acquisition/README.md) - The peers acquisition crate, or: how the network layer discovers bootstrap peers. +- [Bootstrap](https://github.com/maidsafe/autonomi/blob/main/ant-bootstrap/README.md) + The network bootstrap cache or: how the network layer discovers bootstrap peers. - [Build Info](https://github.com/maidsafe/autonomi/blob/main/ant-build-info/README.md) Small helper used to get the build/commit versioning info for debug purposes. diff --git a/ant-cli/Cargo.toml b/ant-cli/Cargo.toml index 05cbd82eac..e7752bde9e 100644 --- a/ant-cli/Cargo.toml +++ b/ant-cli/Cargo.toml @@ -15,9 +15,8 @@ path = "src/main.rs" [features] default = ["metrics"] -local = ["ant-peers-acquisition/local", "autonomi/local"] +local = ["ant-bootstrap/local", "autonomi/local"] metrics = ["ant-logging/process-metrics"] -network-contacts = ["ant-peers-acquisition/network-contacts"] websockets = ["autonomi/websockets"] [[bench]] @@ -28,7 +27,6 @@ harness = false ant-bootstrap = { path = "../ant-bootstrap", version = "0.1.0" } ant-build-info = { path = "../ant-build-info", version = "0.1.19" } ant-logging = { path = "../ant-logging", version = "0.2.40" } -ant-peers-acquisition = { path = "../ant-peers-acquisition", version = "0.5.7" } autonomi = { path = "../autonomi", version = "0.2.4", features = [ "data", "fs", diff --git a/ant-cli/src/main.rs b/ant-cli/src/main.rs index cbab96d8fc..b50092e538 100644 --- a/ant-cli/src/main.rs +++ b/ant-cli/src/main.rs @@ -51,6 +51,7 @@ async fn main() -> Result<()> { fn init_logging_and_metrics(opt: &Opt) -> Result<(ReloadHandle, Option)> { let logging_targets = vec![ + ("ant_bootstrap".to_string(), Level::DEBUG), ("ant_build_info".to_string(), Level::TRACE), ("ant_evm".to_string(), Level::TRACE), ("ant_networking".to_string(), Level::INFO), @@ -59,7 +60,6 @@ fn init_logging_and_metrics(opt: &Opt) -> Result<(ReloadHandle, Option Result> ("ant_logging".to_string(), Level::TRACE), ("ant_node_manager".to_string(), Level::TRACE), ("ant_node_rpc_client".to_string(), Level::TRACE), - ("ant_peers_acquisition".to_string(), Level::TRACE), ("ant_protocol".to_string(), Level::TRACE), ("ant_registers".to_string(), Level::INFO), ("ant_service_management".to_string(), Level::TRACE), diff --git a/ant-node-manager/Cargo.toml b/ant-node-manager/Cargo.toml index 94857697b6..50029846c3 100644 --- a/ant-node-manager/Cargo.toml +++ b/ant-node-manager/Cargo.toml @@ -21,7 +21,6 @@ path = "src/bin/daemon/main.rs" chaos = [] default = ["quic"] local = [] -network-contacts = [] nightly = [] open-metrics = [] otlp = [] @@ -31,10 +30,10 @@ tcp = [] websockets = [] [dependencies] +ant-bootstrap = { path = "../ant-bootstrap", version = "0.1.0" } ant-build-info = { path = "../ant-build-info", version = "0.1.19" } ant-evm = { path = "../ant-evm", version = "0.1.4" } ant-logging = { path = "../ant-logging", version = "0.2.40" } -ant-peers-acquisition = { path = "../ant-peers-acquisition", version = "0.5.7" } ant-protocol = { path = "../ant-protocol", version = "0.17.15" } ant-releases = { git = "https://github.com/jacderida/ant-releases.git", branch = "chore-rename_binaries" } ant-service-management = { path = "../ant-service-management", version = "0.4.3" } diff --git a/ant-node-manager/src/bin/cli/main.rs b/ant-node-manager/src/bin/cli/main.rs index 1e40d20589..eee22641e3 100644 --- a/ant-node-manager/src/bin/cli/main.rs +++ b/ant-node-manager/src/bin/cli/main.rs @@ -9,6 +9,7 @@ mod subcommands; use crate::subcommands::evm_network::EvmNetworkCommand; +use ant_bootstrap::PeersArgs; use ant_evm::RewardsAddress; use ant_logging::{LogBuilder, LogFormat}; use ant_node_manager::{ @@ -16,7 +17,6 @@ use ant_node_manager::{ cmd::{self}, VerbosityLevel, DEFAULT_NODE_STARTUP_CONNECTION_TIMEOUT_S, }; -use ant_peers_acquisition::PeersArgs; use clap::{Parser, Subcommand}; use color_eyre::{eyre::eyre, Result}; use libp2p::Multiaddr; @@ -1381,9 +1381,9 @@ async fn main() -> Result<()> { fn get_log_builder(level: Level) -> Result { let logging_targets = vec![ + ("ant_bootstrap".to_string(), level), ("evmlib".to_string(), level), ("evm-testnet".to_string(), level), - ("ant_peers_acquisition".to_string(), level), ("ant_node_manager".to_string(), level), ("antctl".to_string(), level), ("antctld".to_string(), level), diff --git a/ant-node-manager/src/cmd/auditor.rs b/ant-node-manager/src/cmd/auditor.rs index 92061c1e20..764656d3cc 100644 --- a/ant-node-manager/src/cmd/auditor.rs +++ b/ant-node-manager/src/cmd/auditor.rs @@ -10,7 +10,7 @@ use crate::{ config::{self, is_running_as_root}, print_banner, ServiceManager, VerbosityLevel, }; -use ant_peers_acquisition::PeersArgs; +use ant_bootstrap::PeersArgs; use ant_service_management::{auditor::AuditorService, control::ServiceController, NodeRegistry}; use color_eyre::{eyre::eyre, Result}; use std::path::PathBuf; diff --git a/ant-node-manager/src/cmd/faucet.rs b/ant-node-manager/src/cmd/faucet.rs index d598aed62b..053c3727ac 100644 --- a/ant-node-manager/src/cmd/faucet.rs +++ b/ant-node-manager/src/cmd/faucet.rs @@ -10,7 +10,7 @@ use crate::{ config::{self, is_running_as_root}, print_banner, ServiceManager, VerbosityLevel, }; -use ant_peers_acquisition::PeersArgs; +use ant_bootstrap::PeersArgs; use ant_service_management::{control::ServiceController, FaucetService, NodeRegistry}; use color_eyre::{eyre::eyre, Result}; use std::path::PathBuf; diff --git a/ant-node-manager/src/cmd/local.rs b/ant-node-manager/src/cmd/local.rs index f83c6e3d4c..f28f37d206 100644 --- a/ant-node-manager/src/cmd/local.rs +++ b/ant-node-manager/src/cmd/local.rs @@ -14,9 +14,9 @@ use crate::{ local::{kill_network, run_network, LocalNetworkOptions}, print_banner, status_report, VerbosityLevel, }; +use ant_bootstrap::PeersArgs; use ant_evm::{EvmNetwork, RewardsAddress}; use ant_logging::LogFormat; -use ant_peers_acquisition::PeersArgs; use ant_releases::{AntReleaseRepoActions, ReleaseType}; use ant_service_management::{ control::ServiceController, get_local_node_registry_path, NodeRegistry, @@ -72,10 +72,10 @@ pub async fn join( // If no peers are obtained we will attempt to join the existing local network, if one // is running. - let peers = match peers_args.get_peers().await { + let peers = match peers_args.get_addrs().await { Ok(peers) => Some(peers), Err(err) => match err { - ant_peers_acquisition::error::Error::PeersNotObtained => { + ant_bootstrap::error::Error::NoBootstrapPeersFound => { warn!("PeersNotObtained, peers is set to None"); None } diff --git a/ant-node-manager/src/cmd/mod.rs b/ant-node-manager/src/cmd/mod.rs index 7a77e81678..45138e640d 100644 --- a/ant-node-manager/src/cmd/mod.rs +++ b/ant-node-manager/src/cmd/mod.rs @@ -184,9 +184,6 @@ fn build_binary(bin_type: &ReleaseType) -> Result { if cfg!(feature = "local") { args.extend(["--features", "local"]); } - if cfg!(feature = "network-contacts") { - args.extend(["--features", "network-contacts"]); - } if cfg!(feature = "websockets") { args.extend(["--features", "websockets"]); } diff --git a/ant-node-manager/src/cmd/nat_detection.rs b/ant-node-manager/src/cmd/nat_detection.rs index afe2d442dd..b43238513f 100644 --- a/ant-node-manager/src/cmd/nat_detection.rs +++ b/ant-node-manager/src/cmd/nat_detection.rs @@ -9,7 +9,7 @@ use crate::{ config::get_node_registry_path, helpers::download_and_extract_release, VerbosityLevel, }; -use ant_peers_acquisition::get_peers_from_url; +use ant_bootstrap::ContactsFetcher; use ant_releases::{AntReleaseRepoActions, ReleaseType}; use ant_service_management::{NatDetectionStatus, NodeRegistry}; use color_eyre::eyre::{bail, OptionExt, Result}; @@ -35,7 +35,11 @@ pub async fn run_nat_detection( let servers = match servers { Some(servers) => servers, None => { - let servers = get_peers_from_url(NAT_DETECTION_SERVERS_LIST_URL.parse()?).await?; + let mut contacts_fetcher = ContactsFetcher::new()?; + contacts_fetcher.ignore_peer_id(true); + contacts_fetcher.insert_endpoint(NAT_DETECTION_SERVERS_LIST_URL.parse()?); + + let servers = contacts_fetcher.fetch_addrs().await?; servers .choose_multiple(&mut rand::thread_rng(), 10) diff --git a/ant-node-manager/src/cmd/node.rs b/ant-node-manager/src/cmd/node.rs index 59a04ddc11..f4f6b67a48 100644 --- a/ant-node-manager/src/cmd/node.rs +++ b/ant-node-manager/src/cmd/node.rs @@ -18,9 +18,9 @@ use crate::{ helpers::{download_and_extract_release, get_bin_version}, print_banner, refresh_node_registry, status_report, ServiceManager, VerbosityLevel, }; +use ant_bootstrap::PeersArgs; use ant_evm::{EvmNetwork, RewardsAddress}; use ant_logging::LogFormat; -use ant_peers_acquisition::PeersArgs; use ant_releases::{AntReleaseRepoActions, ReleaseType}; use ant_service_management::{ control::{ServiceControl, ServiceController}, @@ -117,13 +117,13 @@ pub async fn add( // If the `antnode` binary we're using has `network-contacts` enabled (which is the case for released binaries), // it's fine if the service definition doesn't call `antnode` with a `--peer` argument. let is_first = peers_args.first; - let bootstrap_peers = match peers_args.get_peers_exclude_network_contacts().await { + let bootstrap_peers = match peers_args.get_addrs().await { Ok(peers) => { info!("Obtained peers of length {}", peers.len()); - peers + peers.into_iter().take(10).collect::>() } Err(err) => match err { - ant_peers_acquisition::error::Error::PeersNotObtained => { + ant_bootstrap::error::Error::NoBootstrapPeersFound => { info!("No bootstrap peers obtained, setting empty vec."); Vec::new() } diff --git a/ant-node-rpc-client/Cargo.toml b/ant-node-rpc-client/Cargo.toml index 057ed08492..c34db03215 100644 --- a/ant-node-rpc-client/Cargo.toml +++ b/ant-node-rpc-client/Cargo.toml @@ -19,7 +19,6 @@ nightly = [] [dependencies] ant-build-info = { path = "../ant-build-info", version = "0.1.19" } ant-logging = { path = "../ant-logging", version = "0.2.40" } -ant-peers-acquisition = { path = "../ant-peers-acquisition", version = "0.5.7" } ant-protocol = { path = "../ant-protocol", version = "0.17.15", features=["rpc"] } ant-node = { path = "../ant-node", version = "0.112.6" } ant-service-management = { path = "../ant-service-management", version = "0.4.3" } diff --git a/ant-node/Cargo.toml b/ant-node/Cargo.toml index 283dc940a3..8daa19b30e 100644 --- a/ant-node/Cargo.toml +++ b/ant-node/Cargo.toml @@ -17,10 +17,9 @@ path = "src/bin/antnode/main.rs" default = ["metrics", "upnp", "open-metrics", "encrypt-records"] encrypt-records = ["ant-networking/encrypt-records"] extension-module = ["pyo3/extension-module"] -local = ["ant-networking/local", "ant-evm/local"] +local = ["ant-networking/local", "ant-evm/local", "ant-bootstrap/local"] loud = ["ant-networking/loud"] # loud mode: print important messages to console metrics = ["ant-logging/process-metrics"] -network-contacts = ["ant-peers-acquisition/network-contacts"] nightly = [] open-metrics = ["ant-networking/open-metrics", "prometheus-client"] otlp = ["ant-logging/otlp"] @@ -33,7 +32,6 @@ ant-build-info = { path = "../ant-build-info", version = "0.1.19" } ant-evm = { path = "../ant-evm", version = "0.1.4" } ant-logging = { path = "../ant-logging", version = "0.2.40" } ant-networking = { path = "../ant-networking", version = "0.19.5" } -ant-peers-acquisition = { path = "../ant-peers-acquisition", version = "0.5.7" } ant-protocol = { path = "../ant-protocol", version = "0.17.15" } ant-registers = { path = "../ant-registers", version = "0.4.3" } ant-service-management = { path = "../ant-service-management", version = "0.4.3" } diff --git a/ant-node/src/bin/antnode/main.rs b/ant-node/src/bin/antnode/main.rs index caae71685f..bfaa2b8aae 100644 --- a/ant-node/src/bin/antnode/main.rs +++ b/ant-node/src/bin/antnode/main.rs @@ -548,12 +548,12 @@ fn monitor_node_events(mut node_events_rx: NodeEventsReceiver, ctrl_tx: mpsc::Se fn init_logging(opt: &Opt, peer_id: PeerId) -> Result<(String, ReloadHandle, Option)> { let logging_targets = vec![ + ("ant_bootstrap".to_string(), Level::INFO), ("ant_build_info".to_string(), Level::DEBUG), ("ant_evm".to_string(), Level::DEBUG), ("ant_logging".to_string(), Level::DEBUG), ("ant_networking".to_string(), Level::INFO), ("ant_node".to_string(), Level::DEBUG), - ("ant_peers_acquisition".to_string(), Level::DEBUG), ("ant_protocol".to_string(), Level::DEBUG), ("ant_registers".to_string(), Level::DEBUG), ("antnode".to_string(), Level::DEBUG), diff --git a/ant-peers-acquisition/Cargo.toml b/ant-peers-acquisition/Cargo.toml deleted file mode 100644 index 660b55b3e6..0000000000 --- a/ant-peers-acquisition/Cargo.toml +++ /dev/null @@ -1,31 +0,0 @@ -[package] -authors = ["MaidSafe Developers "] -description = "Peer acquisition utilities" -edition = "2021" -homepage = "https://maidsafe.net" -license = "GPL-3.0" -name = "ant-peers-acquisition" -readme = "README.md" -repository = "https://github.com/maidsafe/autonomi" -version = "0.5.7" - -[features] -default = ["network-contacts"] -local = [] -network-contacts = ["ant-protocol"] -websockets = [] - -[dependencies] -ant-protocol = { path = "../ant-protocol", version = "0.17.15", optional = true} -clap = { version = "4.2.1", features = ["derive", "env"] } -lazy_static = "~1.4.0" -libp2p = { git = "https://github.com/maqi/rust-libp2p.git", branch = "kad_0.46.2", features = [] } -rand = "0.8.5" -reqwest = { version="0.12.2", default-features=false, features = ["rustls-tls"] } -thiserror = "1.0.23" -tokio = { version = "1.32.0", default-features = false } -tracing = { version = "~0.1.26" } -url = { version = "2.4.0" } - -[lints] -workspace = true diff --git a/ant-peers-acquisition/README.md b/ant-peers-acquisition/README.md deleted file mode 100644 index 6c409a9103..0000000000 --- a/ant-peers-acquisition/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# ant_peers_acquisition - -Provides utilities for discovering bootstrap peers on a given system. - -It handles `--peer` arguments across all bins, as well as `ANT_PEERS` or indeed picking up an initial set of `network-conacts` from a provided, or hard-coded url. diff --git a/ant-peers-acquisition/src/error.rs b/ant-peers-acquisition/src/error.rs deleted file mode 100644 index d5df7c969b..0000000000 --- a/ant-peers-acquisition/src/error.rs +++ /dev/null @@ -1,19 +0,0 @@ -use thiserror::Error; - -pub type Result = std::result::Result; - -#[derive(Debug, Error)] -pub enum Error { - #[error("Could not parse the supplied multiaddr or socket address")] - InvalidPeerAddr(#[from] libp2p::multiaddr::Error), - #[error("Could not obtain network contacts from {0} after {1} retries")] - FailedToObtainPeersFromUrl(String, usize), - #[error("No valid multaddr was present in the contacts file at {0}")] - NoMultiAddrObtainedFromNetworkContacts(String), - #[error("Could not obtain peers through any available options")] - PeersNotObtained, - #[error(transparent)] - ReqwestError(#[from] reqwest::Error), - #[error(transparent)] - UrlParseError(#[from] url::ParseError), -} diff --git a/ant-peers-acquisition/src/lib.rs b/ant-peers-acquisition/src/lib.rs deleted file mode 100644 index da613e97ad..0000000000 --- a/ant-peers-acquisition/src/lib.rs +++ /dev/null @@ -1,242 +0,0 @@ -// Copyright 2024 MaidSafe.net limited. -// -// This SAFE Network Software is licensed to you under The General Public License (GPL), version 3. -// Unless required by applicable law or agreed to in writing, the SAFE Network Software distributed -// under the GPL Licence is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. Please review the Licences for the specific language governing -// permissions and limitations relating to use of the SAFE Network Software. - -pub mod error; - -use crate::error::{Error, Result}; -use clap::Args; -#[cfg(feature = "network-contacts")] -use lazy_static::lazy_static; -use libp2p::{multiaddr::Protocol, Multiaddr}; -use rand::{seq::SliceRandom, thread_rng}; -use reqwest::Client; -use std::time::Duration; -use tracing::*; -use url::Url; - -#[cfg(feature = "network-contacts")] -lazy_static! { - // URL containing the multi-addresses of the bootstrap nodes. - pub static ref NETWORK_CONTACTS_URL: String = - "https://sn-testnet.s3.eu-west-2.amazonaws.com/network-contacts".to_string(); -} - -// The maximum number of retries to be performed while trying to get peers from a URL. -const MAX_RETRIES_ON_GET_PEERS_FROM_URL: usize = 7; - -/// The name of the environment variable that can be used to pass peers to the node. -pub const ANT_PEERS_ENV: &str = "ANT_PEERS"; - -#[derive(Args, Debug, Default, Clone)] -pub struct PeersArgs { - /// Set to indicate this is the first node in a new network - /// - /// If this argument is used, any others will be ignored because they do not apply to the first - /// node. - #[clap(long)] - pub first: bool, - /// Peer(s) to use for bootstrap, in a 'multiaddr' format containing the peer ID. - /// - /// A multiaddr looks like - /// '/ip4/1.2.3.4/tcp/1200/tcp/p2p/12D3KooWRi6wF7yxWLuPSNskXc6kQ5cJ6eaymeMbCRdTnMesPgFx' where - /// `1.2.3.4` is the IP, `1200` is the port and the (optional) last part is the peer ID. - /// - /// This argument can be provided multiple times to connect to multiple peers. - /// - /// Alternatively, the `ANT_PEERS` environment variable can provide a comma-separated peer - /// list. - #[clap(long = "peer", env = "ANT_PEERS", value_name = "multiaddr", value_delimiter = ',', value_parser = parse_peer_addr, conflicts_with = "first")] - pub peers: Vec, - - /// Specify the URL to fetch the network contacts from. - /// - /// This argument will be overridden if the "peers" argument is set or if the `local` - /// feature flag is enabled. - #[cfg(feature = "network-contacts")] - #[clap(long, conflicts_with = "first")] - pub network_contacts_url: Option, -} - -impl PeersArgs { - /// Gets the peers based on the arguments provided. - /// - /// If the `--first` flag is used, no peers will be provided. - /// - /// Otherwise, peers are obtained in the following order of precedence: - /// * The `--peer` argument. - /// * The `ANT_PEERS` environment variable. - /// * Using the `local` feature, which will return an empty peer list. - /// * Using the `network-contacts` feature, which will download the peer list from a file on S3. - /// - /// Note: the current behaviour is that `--peer` and `ANT_PEERS` will be combined. Some tests - /// currently rely on this. We will change it soon. - pub async fn get_peers(self) -> Result> { - self.get_peers_inner(false).await - } - - /// Gets the peers based on the arguments provided. - /// - /// If the `--first` flag is used, no peers will be provided. - /// - /// Otherwise, peers are obtained in the following order of precedence: - /// * The `--peer` argument. - /// * The `ANT_PEERS` environment variable. - /// * Using the `local` feature, which will return an empty peer list. - /// - /// This will not fetch the peers from network-contacts even if the `network-contacts` feature is enabled. Use - /// get_peers() instead. - /// - /// Note: the current behaviour is that `--peer` and `ANT_PEERS` will be combined. Some tests - /// currently rely on this. We will change it soon. - pub async fn get_peers_exclude_network_contacts(self) -> Result> { - self.get_peers_inner(true).await - } - - async fn get_peers_inner(self, skip_network_contacts: bool) -> Result> { - if self.first { - info!("First node in a new network"); - return Ok(vec![]); - } - - let mut peers = if !self.peers.is_empty() { - info!("Using peers supplied with the --peer argument(s) or ANT_PEERS"); - self.peers - } else if cfg!(feature = "local") { - info!("No peers given"); - info!("The `local` feature is enabled, so peers will be discovered through mDNS."); - return Ok(vec![]); - } else if skip_network_contacts { - info!("Skipping network contacts"); - return Ok(vec![]); - } else if cfg!(feature = "network-contacts") { - self.get_network_contacts().await? - } else { - vec![] - }; - - if peers.is_empty() { - error!("Peers not obtained through any available options"); - return Err(Error::PeersNotObtained); - }; - - // Randomly sort peers before we return them to avoid overly hitting any one peer - let mut rng = thread_rng(); - peers.shuffle(&mut rng); - - Ok(peers) - } - - // should not be reachable, but needed for the compiler to be happy. - #[expect(clippy::unused_async)] - #[cfg(not(feature = "network-contacts"))] - async fn get_network_contacts(&self) -> Result> { - Ok(vec![]) - } - - #[cfg(feature = "network-contacts")] - async fn get_network_contacts(&self) -> Result> { - let url = self - .network_contacts_url - .clone() - .unwrap_or(Url::parse(NETWORK_CONTACTS_URL.as_str())?); - - info!("Trying to fetch the bootstrap peers from {url}"); - - get_peers_from_url(url).await - } -} - -/// Parse strings like `1.2.3.4:1234` and `/ip4/1.2.3.4/tcp/1234` into a multiaddr. -pub fn parse_peer_addr(addr: &str) -> std::result::Result { - // Parse valid IPv4 socket address, e.g. `1.2.3.4:1234`. - if let Ok(addr) = addr.parse::() { - let start_addr = Multiaddr::from(*addr.ip()); - - // Turn the address into a `/ip4//udp//quic-v1` multiaddr. - #[cfg(not(feature = "websockets"))] - let multiaddr = start_addr - .with(Protocol::Udp(addr.port())) - .with(Protocol::QuicV1); - - // Turn the address into a `/ip4//udp//websocket-websys-v1` multiaddr. - #[cfg(feature = "websockets")] - let multiaddr = start_addr - .with(Protocol::Tcp(addr.port())) - .with(Protocol::Ws("/".into())); - - return Ok(multiaddr); - } - - // Parse any valid multiaddr string - addr.parse::() -} - -/// Get and parse a list of peers from a URL. The URL should contain one multiaddr per line. -pub async fn get_peers_from_url(url: Url) -> Result> { - let mut retries = 0; - - #[cfg(not(target_arch = "wasm32"))] - let request_client = Client::builder().timeout(Duration::from_secs(10)).build()?; - // Wasm does not have the timeout method yet. - #[cfg(target_arch = "wasm32")] - let request_client = Client::builder().build()?; - - loop { - let response = request_client.get(url.clone()).send().await; - - match response { - Ok(response) => { - let mut multi_addresses = Vec::new(); - if response.status().is_success() { - let text = response.text().await?; - trace!("Got peers from url: {url}: {text}"); - // example of contacts file exists in resources/network-contacts-examples - for addr in text.split('\n') { - // ignore empty/last lines - if addr.is_empty() { - continue; - } - - debug!("Attempting to parse {addr}"); - multi_addresses.push(parse_peer_addr(addr)?); - } - if !multi_addresses.is_empty() { - trace!("Successfully got peers from URL {multi_addresses:?}"); - return Ok(multi_addresses); - } else { - return Err(Error::NoMultiAddrObtainedFromNetworkContacts( - url.to_string(), - )); - } - } else { - retries += 1; - if retries >= MAX_RETRIES_ON_GET_PEERS_FROM_URL { - return Err(Error::FailedToObtainPeersFromUrl( - url.to_string(), - MAX_RETRIES_ON_GET_PEERS_FROM_URL, - )); - } - } - } - Err(err) => { - error!("Failed to get peers from URL {url}: {err:?}"); - retries += 1; - if retries >= MAX_RETRIES_ON_GET_PEERS_FROM_URL { - return Err(Error::FailedToObtainPeersFromUrl( - url.to_string(), - MAX_RETRIES_ON_GET_PEERS_FROM_URL, - )); - } - } - } - trace!( - "Failed to get peers from URL, retrying {retries}/{MAX_RETRIES_ON_GET_PEERS_FROM_URL}" - ); - tokio::time::sleep(Duration::from_secs(1)).await; - } -} diff --git a/autonomi/Cargo.toml b/autonomi/Cargo.toml index 88d61c711a..2c2b4a7c79 100644 --- a/autonomi/Cargo.toml +++ b/autonomi/Cargo.toml @@ -29,7 +29,6 @@ websockets = ["ant-networking/websockets"] [dependencies] ant-evm = { path = "../ant-evm", version = "0.1.4" } ant-networking = { path = "../ant-networking", version = "0.19.5" } -ant-peers-acquisition = { path = "../ant-peers-acquisition", version = "0.5.7" } ant-protocol = { version = "0.17.15", path = "../ant-protocol" } ant-registers = { path = "../ant-registers", version = "0.4.3" } bip39 = "2.0.0" @@ -63,7 +62,6 @@ xor_name = "5.0.0" [dev-dependencies] alloy = { version = "0.5.3", default-features = false, features = ["std", "reqwest-rustls-tls", "provider-anvil-node", "sol-types", "json", "signers", "contract", "signer-local", "network"] } ant-logging = { path = "../ant-logging", version = "0.2.40" } -ant-peers-acquisition = { path = "../ant-peers-acquisition", version = "0.5.7" } eyre = "0.6.5" sha2 = "0.10.6" # Do not specify the version field. Release process expects even the local dev deps to be published. diff --git a/node-launchpad/Cargo.toml b/node-launchpad/Cargo.toml index 4e488880a2..23926653e0 100644 --- a/node-launchpad/Cargo.toml +++ b/node-launchpad/Cargo.toml @@ -18,10 +18,10 @@ path = "src/bin/tui/main.rs" nightly = [] [dependencies] +ant-bootstrap = { path = "../ant-bootstrap", version = "0.1.0" } ant-build-info = { path = "../ant-build-info", version = "0.1.19" } ant-evm = { path = "../ant-evm", version = "0.1.4" } ant-node-manager = { version = "0.11.3", path = "../ant-node-manager" } -ant-peers-acquisition = { version = "0.5.7", path = "../ant-peers-acquisition" } ant-protocol = { path = "../ant-protocol", version = "0.17.15" } ant-releases = { git = "https://github.com/jacderida/ant-releases.git", branch = "chore-rename_binaries" } ant-service-management = { version = "0.4.3", path = "../ant-service-management" } diff --git a/node-launchpad/src/app.rs b/node-launchpad/src/app.rs index 40124f4d3f..605c51efd3 100644 --- a/node-launchpad/src/app.rs +++ b/node-launchpad/src/app.rs @@ -29,7 +29,7 @@ use crate::{ system::{get_default_mount_point, get_primary_mount_point, get_primary_mount_point_name}, tui, }; -use ant_peers_acquisition::PeersArgs; +use ant_bootstrap::PeersArgs; use color_eyre::eyre::Result; use crossterm::event::KeyEvent; use ratatui::{prelude::Rect, style::Style, widgets::Block}; @@ -317,7 +317,7 @@ impl App { #[cfg(test)] mod tests { use super::*; - use ant_peers_acquisition::PeersArgs; + use ant_bootstrap::PeersArgs; use color_eyre::eyre::Result; use std::io::Cursor; use std::io::Write; diff --git a/node-launchpad/src/bin/tui/main.rs b/node-launchpad/src/bin/tui/main.rs index f2f28af40b..969e2c811a 100644 --- a/node-launchpad/src/bin/tui/main.rs +++ b/node-launchpad/src/bin/tui/main.rs @@ -11,9 +11,9 @@ mod terminal; #[macro_use] extern crate tracing; +use ant_bootstrap::PeersArgs; #[cfg(target_os = "windows")] use ant_node_manager::config::is_running_as_root; -use ant_peers_acquisition::PeersArgs; use clap::Parser; use color_eyre::eyre::Result; use node_launchpad::{ diff --git a/node-launchpad/src/components/status.rs b/node-launchpad/src/components/status.rs index 02e39a54ad..1899bbd9bc 100644 --- a/node-launchpad/src/components/status.rs +++ b/node-launchpad/src/components/status.rs @@ -31,9 +31,9 @@ use crate::{ clear_area, EUCALYPTUS, GHOST_WHITE, LIGHT_PERIWINKLE, VERY_LIGHT_AZURE, VIVID_SKY_BLUE, }, }; +use ant_bootstrap::PeersArgs; use ant_node_manager::add_services::config::PortRange; use ant_node_manager::config::get_node_registry_path; -use ant_peers_acquisition::PeersArgs; use ant_service_management::{ control::ServiceController, NodeRegistry, NodeServiceData, ServiceStatus, }; diff --git a/node-launchpad/src/node_mgmt.rs b/node-launchpad/src/node_mgmt.rs index 788c2991fa..49fd1c1b32 100644 --- a/node-launchpad/src/node_mgmt.rs +++ b/node-launchpad/src/node_mgmt.rs @@ -1,10 +1,10 @@ use crate::action::{Action, StatusActions}; use crate::connection_mode::ConnectionMode; +use ant_bootstrap::PeersArgs; use ant_evm::{EvmNetwork, RewardsAddress}; use ant_node_manager::{ add_services::config::PortRange, config::get_node_registry_path, VerbosityLevel, }; -use ant_peers_acquisition::PeersArgs; use ant_releases::{self, AntReleaseRepoActions, ReleaseType}; use ant_service_management::NodeRegistry; use color_eyre::eyre::{eyre, Error}; diff --git a/node-launchpad/src/utils.rs b/node-launchpad/src/utils.rs index 15dc6b085e..9defb101e5 100644 --- a/node-launchpad/src/utils.rs +++ b/node-launchpad/src/utils.rs @@ -81,8 +81,12 @@ pub fn initialize_logging() -> Result<()> { .context(format!("Failed to create file {log_path:?}"))?; std::env::set_var( "RUST_LOG", - std::env::var("RUST_LOG") - .unwrap_or_else(|_| format!("{}=trace,ant_node_manager=trace,ant_service_management=trace,ant_peers_acquisition=trace", env!("CARGO_CRATE_NAME"))), + std::env::var("RUST_LOG").unwrap_or_else(|_| { + format!( + "{}=trace,ant_node_manager=trace,ant_service_management=trace,ant_bootstrap=debug", + env!("CARGO_CRATE_NAME") + ) + }), ); let file_subscriber = tracing_subscriber::fmt::layer() .with_file(true) diff --git a/test-utils/Cargo.toml b/test-utils/Cargo.toml index 4d05fbfbb3..4124d37c3e 100644 --- a/test-utils/Cargo.toml +++ b/test-utils/Cargo.toml @@ -9,11 +9,7 @@ readme = "README.md" repository = "https://github.com/maidsafe/safe_network" version = "0.4.11" -[features] -local = ["ant-peers-acquisition/local"] - [dependencies] -ant-peers-acquisition = { path = "../ant-peers-acquisition", version = "0.5.7" } bytes = { version = "1.0.1", features = ["serde"] } color-eyre = "~0.6.2" dirs-next = "~2.0.0" diff --git a/test-utils/src/lib.rs b/test-utils/src/lib.rs index 5d3c57960a..68798d7864 100644 --- a/test-utils/src/lib.rs +++ b/test-utils/src/lib.rs @@ -9,7 +9,6 @@ pub mod evm; pub mod testnet; -use ant_peers_acquisition::parse_peer_addr; use bytes::Bytes; use color_eyre::eyre::Result; use libp2p::Multiaddr; @@ -39,10 +38,11 @@ pub fn gen_random_data(len: usize) -> Bytes { /// /// An empty `Vec` will be returned if the env var is not set or if local discovery is enabled. pub fn peers_from_env() -> Result> { - let bootstrap_peers = if cfg!(feature = "local") { - Ok(vec![]) - } else if let Some(peers_str) = env_from_runtime_or_compiletime!("ANT_PEERS") { - peers_str.split(',').map(parse_peer_addr).collect() + let bootstrap_peers = if let Some(peers_str) = env_from_runtime_or_compiletime!("ANT_PEERS") { + peers_str + .split(',') + .map(|str| str.parse::()) + .collect() } else { Ok(vec![]) }?; From f8bb46fc9861f6339dc004a3dc5c34763a979e22 Mon Sep 17 00:00:00 2001 From: Roland Sherwin Date: Thu, 5 Dec 2024 01:52:12 +0530 Subject: [PATCH 13/21] fix(bootstrap): use env tempdir for atomic write --- ant-bootstrap/src/cache_store.rs | 41 ++++++++++++++------------------ 1 file changed, 18 insertions(+), 23 deletions(-) diff --git a/ant-bootstrap/src/cache_store.rs b/ant-bootstrap/src/cache_store.rs index 615f8c7541..c3d79f045b 100644 --- a/ant-bootstrap/src/cache_store.rs +++ b/ant-bootstrap/src/cache_store.rs @@ -207,30 +207,25 @@ impl BootstrapCacheStore { /// Make sure to have clean addrs inside the cache as we don't call craft_valid_multiaddr pub async fn load_cache_data(cfg: &BootstrapCacheConfig) -> Result { // Try to open the file with read permissions - let mut file = match OpenOptions::new().read(true).open(&cfg.cache_file_path) { - Ok(f) => f, - Err(e) => { - warn!("Failed to open cache file: {}", e); - return Err(Error::from(e)); - } - }; + let mut file = OpenOptions::new() + .read(true) + .open(&cfg.cache_file_path) + .inspect_err(|err| warn!("Failed to open cache file: {err}",))?; // Acquire shared lock for reading - if let Err(e) = Self::acquire_shared_lock(&file).await { - warn!("Failed to acquire shared lock: {}", e); - return Err(e); - } + Self::acquire_shared_lock(&file).await.inspect_err(|err| { + warn!("Failed to acquire shared lock: {err}"); + })?; // Read the file contents let mut contents = String::new(); - if let Err(e) = file.read_to_string(&mut contents) { - warn!("Failed to read cache file: {}", e); - return Err(Error::from(e)); - } + file.read_to_string(&mut contents).inspect_err(|err| { + warn!("Failed to read cache file: {err}"); + })?; // Parse the cache data - let mut data = serde_json::from_str::(&contents).map_err(|e| { - warn!("Failed to parse cache data: {}", e); + let mut data = serde_json::from_str::(&contents).map_err(|err| { + warn!("Failed to parse cache data: {err}"); Error::FailedToParseCacheData })?; @@ -389,7 +384,7 @@ impl BootstrapCacheStore { } async fn acquire_shared_lock(file: &File) -> Result<()> { - let file = file.try_clone().map_err(Error::from)?; + let file = file.try_clone()?; tokio::task::spawn_blocking(move || file.try_lock_shared().map_err(Error::from)) .await @@ -426,22 +421,22 @@ impl BootstrapCacheStore { info!("Writing cache to disk: {:?}", self.cache_path); // Create parent directory if it doesn't exist if let Some(parent) = self.cache_path.parent() { - fs::create_dir_all(parent).map_err(Error::from)?; + fs::create_dir_all(parent)?; } // Create a temporary file in the same directory as the cache file - let temp_file = NamedTempFile::new().map_err(Error::from)?; + let temp_dir = std::env::temp_dir(); + let temp_file = NamedTempFile::new_in(&temp_dir)?; // Write data to temporary file - serde_json::to_writer_pretty(&temp_file, &self.data).map_err(Error::from)?; + serde_json::to_writer_pretty(&temp_file, &self.data)?; // Open the target file with proper permissions let file = OpenOptions::new() .write(true) .create(true) .truncate(true) - .open(&self.cache_path) - .map_err(Error::from)?; + .open(&self.cache_path)?; // Acquire exclusive lock Self::acquire_exclusive_lock(&file).await?; From d8f3ac7c31175b0ae1656d89e325992ce0e6bf5b Mon Sep 17 00:00:00 2001 From: Roland Sherwin Date: Thu, 5 Dec 2024 02:31:14 +0530 Subject: [PATCH 14/21] fix(bootstrap): make it wasm compatible --- Cargo.lock | 185 ++--------------------------- ant-bootstrap/Cargo.toml | 16 ++- ant-bootstrap/src/cache_store.rs | 48 +++++--- ant-bootstrap/src/contacts.rs | 5 + ant-bootstrap/src/error.rs | 2 - ant-bootstrap/src/initial_peers.rs | 2 +- ant-bootstrap/tests/cache_tests.rs | 2 +- ant-networking/src/driver.rs | 16 ++- 8 files changed, 65 insertions(+), 211 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 607e15070a..f5eb4ca627 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -743,6 +743,7 @@ dependencies = [ "tracing", "tracing-subscriber", "url", + "wasmtimer", "wiremock", ] @@ -1475,12 +1476,6 @@ dependencies = [ "pin-project-lite", ] -[[package]] -name = "atomic-waker" -version = "1.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" - [[package]] name = "attohttpc" version = "0.24.1" @@ -2468,7 +2463,7 @@ dependencies = [ "bitflags 1.3.2", "core-foundation", "core-graphics-types", - "foreign-types 0.5.0", + "foreign-types", "libc", ] @@ -3521,15 +3516,6 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f81ec6369c545a7d40e4589b5597581fa1c441fe1cce96dd1de43159910a36a2" -[[package]] -name = "foreign-types" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" -dependencies = [ - "foreign-types-shared 0.1.1", -] - [[package]] name = "foreign-types" version = "0.5.0" @@ -3537,7 +3523,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d737d9aa519fb7b749cbc3b962edcf310a8dd1f4b67c91c4f83975dbdd17d965" dependencies = [ "foreign-types-macros", - "foreign-types-shared 0.3.1", + "foreign-types-shared", ] [[package]] @@ -3551,12 +3537,6 @@ dependencies = [ "syn 2.0.90", ] -[[package]] -name = "foreign-types-shared" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" - [[package]] name = "foreign-types-shared" version = "0.3.1" @@ -4433,25 +4413,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "h2" -version = "0.4.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccae279728d634d083c00f6099cb58f01cc99c145b84b8be2f6c74618d79922e" -dependencies = [ - "atomic-waker", - "bytes", - "fnv", - "futures-core", - "futures-sink", - "http 1.1.0", - "indexmap 2.7.0", - "slab", - "tokio", - "tokio-util 0.7.12", - "tracing", -] - [[package]] name = "half" version = "2.4.1" @@ -4813,7 +4774,7 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "h2 0.3.26", + "h2", "http 0.2.12", "http-body 0.4.6", "httparse", @@ -4836,7 +4797,6 @@ dependencies = [ "bytes", "futures-channel", "futures-util", - "h2 0.4.7", "http 1.1.0", "http-body 1.0.1", "httparse", @@ -4891,22 +4851,6 @@ dependencies = [ "tokio-io-timeout", ] -[[package]] -name = "hyper-tls" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" -dependencies = [ - "bytes", - "http-body-util", - "hyper 1.5.1", - "hyper-util", - "native-tls", - "tokio", - "tokio-native-tls", - "tower-service", -] - [[package]] name = "hyper-util" version = "0.1.10" @@ -6458,23 +6402,6 @@ dependencies = [ "tracing-subscriber", ] -[[package]] -name = "native-tls" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8614eb2c83d59d1c8cc974dd3f920198647674a0a035e1af1fa58707e317466" -dependencies = [ - "libc", - "log", - "openssl", - "openssl-probe", - "openssl-sys", - "schannel", - "security-framework", - "security-framework-sys", - "tempfile", -] - [[package]] name = "netlink-packet-core" version = "0.7.0" @@ -6889,50 +6816,6 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381" -[[package]] -name = "openssl" -version = "0.10.68" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6174bc48f102d208783c2c84bf931bb75927a617866870de8a4ea85597f871f5" -dependencies = [ - "bitflags 2.6.0", - "cfg-if", - "foreign-types 0.3.2", - "libc", - "once_cell", - "openssl-macros", - "openssl-sys", -] - -[[package]] -name = "openssl-macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.90", -] - -[[package]] -name = "openssl-probe" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" - -[[package]] -name = "openssl-sys" -version = "0.9.104" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45abf306cbf99debc8195b66b7346498d7b10c210de50418b5ccd7ceba08c741" -dependencies = [ - "cc", - "libc", - "pkg-config", - "vcpkg", -] - [[package]] name = "opentelemetry" version = "0.20.0" @@ -8240,7 +8123,7 @@ dependencies = [ "encoding_rs", "futures-core", "futures-util", - "h2 0.3.26", + "h2", "http 0.2.12", "http-body 0.4.6", "hyper 0.14.31", @@ -8278,22 +8161,18 @@ checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f" dependencies = [ "base64 0.22.1", "bytes", - "encoding_rs", "futures-core", "futures-util", - "h2 0.4.7", "http 1.1.0", "http-body 1.0.1", "http-body-util", "hyper 1.5.1", "hyper-rustls 0.27.3", - "hyper-tls", "hyper-util", "ipnet", "js-sys", "log", "mime", - "native-tls", "once_cell", "percent-encoding", "pin-project-lite", @@ -8305,9 +8184,7 @@ dependencies = [ "serde_json", "serde_urlencoded", "sync_wrapper 1.0.2", - "system-configuration 0.6.1", "tokio", - "tokio-native-tls", "tokio-rustls 0.26.0", "tower-service", "url", @@ -8707,15 +8584,6 @@ dependencies = [ "winapi-util", ] -[[package]] -name = "schannel" -version = "0.1.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" -dependencies = [ - "windows-sys 0.59.0", -] - [[package]] name = "schnellru" version = "0.2.3" @@ -8807,29 +8675,6 @@ dependencies = [ "cc", ] -[[package]] -name = "security-framework" -version = "2.11.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" -dependencies = [ - "bitflags 2.6.0", - "core-foundation", - "core-foundation-sys", - "libc", - "security-framework-sys", -] - -[[package]] -name = "security-framework-sys" -version = "2.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa39c7303dc58b5543c94d22c1766b0d31f2ee58306363ea622b10bbc075eaa2" -dependencies = [ - "core-foundation-sys", - "libc", -] - [[package]] name = "self_encryption" version = "0.30.0" @@ -9793,16 +9638,6 @@ dependencies = [ "syn 2.0.90", ] -[[package]] -name = "tokio-native-tls" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" -dependencies = [ - "native-tls", - "tokio", -] - [[package]] name = "tokio-rustls" version = "0.22.0" @@ -9932,7 +9767,7 @@ dependencies = [ "bytes", "futures-core", "futures-util", - "h2 0.3.26", + "h2", "http 0.2.12", "http-body 0.4.6", "hyper 0.14.31", @@ -9964,7 +9799,7 @@ dependencies = [ "bytes", "futures-core", "futures-util", - "h2 0.3.26", + "h2", "http 0.2.12", "http-body 0.4.6", "hyper 0.14.31", @@ -10452,12 +10287,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" -[[package]] -name = "vcpkg" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" - [[package]] name = "vergen" version = "8.3.2" diff --git a/ant-bootstrap/Cargo.toml b/ant-bootstrap/Cargo.toml index e707df4fef..cfe61bd7f5 100644 --- a/ant-bootstrap/Cargo.toml +++ b/ant-bootstrap/Cargo.toml @@ -18,19 +18,27 @@ ant-protocol = { version = "0.17.15", path = "../ant-protocol" } chrono = { version = "0.4", features = ["serde"] } clap = { version = "4.2.1", features = ["derive", "env"] } dirs-next = "~2.0.0" -fs2 = "0.4.3" futures = "0.3.30" libp2p = { version = "0.54.1", features = ["serde"] } -reqwest = { version = "0.12.2", features = ["json"] } +reqwest = { version = "0.12.2", default-features = false, features = [ + "rustls-tls-manual-roots", +] } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" tempfile = "3.8.1" thiserror = "1.0" -tokio = { version = "1.0", features = ["full", "sync"] } +tokio = { version = "1.0", features = ["time"] } tracing = "0.1" url = "2.4.0" +# fs2 fails to compile on wasm32 target +[target.'cfg(not(target_arch = "wasm32"))'.dependencies] +fs2 = "0.4.3" + [dev-dependencies] wiremock = "0.5" tokio = { version = "1.0", features = ["full", "test-util"] } -tracing-subscriber = { version = "0.3", features = ["env-filter"] } \ No newline at end of file +tracing-subscriber = { version = "0.3", features = ["env-filter"] } + +[target.'cfg(target_arch = "wasm32")'.dependencies] +wasmtimer = "0.2.0" \ No newline at end of file diff --git a/ant-bootstrap/src/cache_store.rs b/ant-bootstrap/src/cache_store.rs index c3d79f045b..facd71490a 100644 --- a/ant-bootstrap/src/cache_store.rs +++ b/ant-bootstrap/src/cache_store.rs @@ -10,6 +10,7 @@ use crate::{ craft_valid_multiaddr, initial_peers::PeersArgs, multiaddr_get_peer_id, BootstrapAddr, BootstrapAddresses, BootstrapCacheConfig, Error, Result, }; +#[cfg(not(target_arch = "wasm32"))] use fs2::FileExt; use libp2p::multiaddr::Protocol; use libp2p::{Multiaddr, PeerId}; @@ -17,7 +18,7 @@ use serde::{Deserialize, Serialize}; use std::collections::hash_map::Entry; use std::collections::HashMap; use std::fs::{self, File, OpenOptions}; -use std::io::{self, Read}; +use std::io::Read; use std::path::PathBuf; use std::time::{Duration, SystemTime}; use tempfile::NamedTempFile; @@ -197,15 +198,15 @@ impl BootstrapCacheStore { Ok(()) } - pub async fn initialize_from_local_cache(&mut self) -> Result<()> { - self.data = Self::load_cache_data(&self.config).await?; + pub fn initialize_from_local_cache(&mut self) -> Result<()> { + self.data = Self::load_cache_data(&self.config)?; self.old_shared_state = self.data.clone(); Ok(()) } /// Load cache data from disk /// Make sure to have clean addrs inside the cache as we don't call craft_valid_multiaddr - pub async fn load_cache_data(cfg: &BootstrapCacheConfig) -> Result { + pub fn load_cache_data(cfg: &BootstrapCacheConfig) -> Result { // Try to open the file with read permissions let mut file = OpenOptions::new() .read(true) @@ -213,7 +214,7 @@ impl BootstrapCacheStore { .inspect_err(|err| warn!("Failed to open cache file: {err}",))?; // Acquire shared lock for reading - Self::acquire_shared_lock(&file).await.inspect_err(|err| { + Self::acquire_shared_lock(&file).inspect_err(|err| { warn!("Failed to acquire shared lock: {err}"); })?; @@ -365,7 +366,7 @@ impl BootstrapCacheStore { return Ok(()); } - if let Ok(data_from_file) = Self::load_cache_data(&self.config).await { + if let Ok(data_from_file) = Self::load_cache_data(&self.config) { self.data.sync(&self.old_shared_state, &data_from_file); // Now the synced version is the old_shared_state } else { @@ -383,19 +384,31 @@ impl BootstrapCacheStore { }) } - async fn acquire_shared_lock(file: &File) -> Result<()> { + /// Acquire a shared lock on the cache file. + #[cfg(target_arch = "wasm32")] + fn acquire_shared_lock(_file: &File) -> Result<()> { + Ok(()) + } + + /// Acquire a shared lock on the cache file. + /// This is a no-op on WASM. + #[cfg(not(target_arch = "wasm32"))] + fn acquire_shared_lock(file: &File) -> Result<()> { let file = file.try_clone()?; + file.try_lock_shared()?; - tokio::task::spawn_blocking(move || file.try_lock_shared().map_err(Error::from)) - .await - .map_err(|e| { - Error::from(std::io::Error::new( - std::io::ErrorKind::Other, - format!("Failed to spawn blocking task: {}", e), - )) - })? + Ok(()) + } + + /// Acquire an exclusive lock on the cache file. + /// This is a no-op on WASM. + #[cfg(target_arch = "wasm32")] + async fn acquire_exclusive_lock(_file: &File) -> Result<()> { + Ok(()) } + /// Acquire an exclusive lock on the cache file. + #[cfg(not(target_arch = "wasm32"))] async fn acquire_exclusive_lock(file: &File) -> Result<()> { let mut backoff = Duration::from_millis(10); let max_attempts = 5; @@ -407,9 +420,12 @@ impl BootstrapCacheStore { Err(_) if attempts >= max_attempts => { return Err(Error::LockError); } - Err(e) if e.kind() == io::ErrorKind::WouldBlock => { + Err(e) if e.kind() == std::io::ErrorKind::WouldBlock => { attempts += 1; + #[cfg(not(target_arch = "wasm32"))] tokio::time::sleep(backoff).await; + #[cfg(target_arch = "wasm32")] + wasmtimer::tokio::sleep(backoff).await; backoff *= 2; } Err(_) => return Err(Error::LockError), diff --git a/ant-bootstrap/src/contacts.rs b/ant-bootstrap/src/contacts.rs index 53c3c3c62f..c984c789b1 100644 --- a/ant-bootstrap/src/contacts.rs +++ b/ant-bootstrap/src/contacts.rs @@ -14,6 +14,7 @@ use std::time::Duration; use url::Url; /// The client fetch timeout +#[cfg(not(target_arch = "wasm32"))] const FETCH_TIMEOUT_SECS: u64 = 30; /// Maximum number of endpoints to fetch at a time const MAX_CONCURRENT_FETCHES: usize = 3; @@ -217,7 +218,11 @@ impl ContactsFetcher { trace!( "Failed to get bootstrap addrs from URL, retrying {retries}/{MAX_RETRIES_ON_FETCH_FAILURE}" ); + + #[cfg(not(target_arch = "wasm32"))] tokio::time::sleep(Duration::from_secs(1)).await; + #[cfg(target_arch = "wasm32")] + wasmtimer::tokio::sleep(Duration::from_secs(1)).await; }; Ok(bootstrap_addresses) diff --git a/ant-bootstrap/src/error.rs b/ant-bootstrap/src/error.rs index e7771a64b4..a8cb8e1cc8 100644 --- a/ant-bootstrap/src/error.rs +++ b/ant-bootstrap/src/error.rs @@ -26,8 +26,6 @@ pub enum Error { Json(#[from] serde_json::Error), #[error("HTTP error: {0}")] Http(#[from] reqwest::Error), - #[error("Timeout error: {0}")] - Timeout(#[from] tokio::time::error::Elapsed), #[error("Persist error: {0}")] Persist(#[from] tempfile::PersistError), #[error("Lock error")] diff --git a/ant-bootstrap/src/initial_peers.rs b/ant-bootstrap/src/initial_peers.rs index a15f60cc05..4bfa372276 100644 --- a/ant-bootstrap/src/initial_peers.rs +++ b/ant-bootstrap/src/initial_peers.rs @@ -164,7 +164,7 @@ impl PeersArgs { }; if let Some(cfg) = cfg { info!("Loading bootstrap addresses from cache"); - if let Ok(data) = BootstrapCacheStore::load_cache_data(&cfg).await { + if let Ok(data) = BootstrapCacheStore::load_cache_data(&cfg) { if let Some(cache) = cache.as_mut() { info!("Initializing cache with bootstrap addresses from cache"); cache.data = data.clone(); diff --git a/ant-bootstrap/tests/cache_tests.rs b/ant-bootstrap/tests/cache_tests.rs index aac95579a0..360280aab5 100644 --- a/ant-bootstrap/tests/cache_tests.rs +++ b/ant-bootstrap/tests/cache_tests.rs @@ -63,7 +63,7 @@ async fn test_cache_persistence() -> Result<(), Box> { // Create a new cache store with the same path let mut cache_store2 = BootstrapCacheStore::empty(config)?; - cache_store2.initialize_from_local_cache().await.unwrap(); + cache_store2.initialize_from_local_cache().unwrap(); let addrs = cache_store2.get_reliable_addrs().collect::>(); assert!(!addrs.is_empty(), "Cache should persist across instances"); diff --git a/ant-networking/src/driver.rs b/ant-networking/src/driver.rs index 87df73825b..8ce4c9c908 100644 --- a/ant-networking/src/driver.rs +++ b/ant-networking/src/driver.rs @@ -21,15 +21,14 @@ use crate::{ record_store_api::UnifiedRecordStore, relay_manager::RelayManager, replication_fetcher::ReplicationFetcher, + target_arch::Interval, target_arch::{interval, spawn, Instant}, - GetRecordError, Network, CLOSE_GROUP_SIZE, + transport, GetRecordError, Network, NodeIssue, CLOSE_GROUP_SIZE, }; #[cfg(feature = "open-metrics")] use crate::{ metrics::service::run_metrics_server, metrics::NetworkMetricsRecorder, MetricsRegistries, }; -use crate::{transport, NodeIssue}; - use ant_bootstrap::BootstrapCacheStore; use ant_evm::PaymentQuote; use ant_protocol::{ @@ -72,11 +71,8 @@ use std::{ num::NonZeroUsize, path::PathBuf, }; +use tokio::sync::{mpsc, oneshot}; use tokio::time::Duration; -use tokio::{ - sync::{mpsc, oneshot}, - time::Interval, -}; use tracing::warn; use xor_name::XorName; @@ -1056,9 +1052,11 @@ impl SwarmDriver { max_cache_save_duration.as_secs(), )); info!("Scaling up the bootstrap cache save interval to {new_duration:?}"); - *current_interval = interval(new_duration); - current_interval.tick().await; // first tick completes immediately + // `Interval` ticks immediately for Tokio, but not for `wasmtimer`, which is used for wasm32. + *current_interval = interval(new_duration); + #[cfg(not(target_arch = "wasm32"))] + current_interval.tick().await; }, } } From cd44b1b587f6786b181f3b48260199f7379b5892 Mon Sep 17 00:00:00 2001 From: Roland Sherwin Date: Thu, 5 Dec 2024 03:15:50 +0530 Subject: [PATCH 15/21] fix(bootstrap): use atomic write crate and remove locks --- Cargo.lock | 34 +++++--- ant-bootstrap/Cargo.toml | 7 +- ant-bootstrap/src/cache_store.rs | 121 +++++++---------------------- ant-bootstrap/src/error.rs | 2 - ant-bootstrap/src/initial_peers.rs | 2 +- ant-bootstrap/tests/cache_tests.rs | 2 +- ant-networking/src/driver.rs | 2 +- 7 files changed, 56 insertions(+), 114 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f5eb4ca627..f9324659bb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -728,10 +728,10 @@ version = "0.1.0" dependencies = [ "ant-logging", "ant-protocol", + "atomic-write-file", "chrono", "clap", "dirs-next", - "fs2", "futures", "libp2p 0.54.1 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.12.9", @@ -1476,6 +1476,16 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "atomic-write-file" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23e32862ecc63d580f4a5e1436a685f51e0629caeb7a7933e4f017d5e2099e13" +dependencies = [ + "nix 0.29.0", + "rand 0.8.5", +] + [[package]] name = "attohttpc" version = "0.24.1" @@ -3558,16 +3568,6 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa" -[[package]] -name = "fs2" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" -dependencies = [ - "libc", - "winapi", -] - [[package]] name = "fs_extra" version = "1.3.0" @@ -6489,6 +6489,18 @@ dependencies = [ "libc", ] +[[package]] +name = "nix" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46" +dependencies = [ + "bitflags 2.6.0", + "cfg-if", + "cfg_aliases", + "libc", +] + [[package]] name = "node-launchpad" version = "0.4.5" diff --git a/ant-bootstrap/Cargo.toml b/ant-bootstrap/Cargo.toml index cfe61bd7f5..1e292cd64d 100644 --- a/ant-bootstrap/Cargo.toml +++ b/ant-bootstrap/Cargo.toml @@ -15,6 +15,7 @@ local = [] [dependencies] ant-logging = { path = "../ant-logging", version = "0.2.40" } ant-protocol = { version = "0.17.15", path = "../ant-protocol" } +atomic-write-file = "0.2.2" chrono = { version = "0.4", features = ["serde"] } clap = { version = "4.2.1", features = ["derive", "env"] } dirs-next = "~2.0.0" @@ -25,20 +26,16 @@ reqwest = { version = "0.12.2", default-features = false, features = [ ] } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" -tempfile = "3.8.1" thiserror = "1.0" tokio = { version = "1.0", features = ["time"] } tracing = "0.1" url = "2.4.0" -# fs2 fails to compile on wasm32 target -[target.'cfg(not(target_arch = "wasm32"))'.dependencies] -fs2 = "0.4.3" - [dev-dependencies] wiremock = "0.5" tokio = { version = "1.0", features = ["full", "test-util"] } tracing-subscriber = { version = "0.3", features = ["env-filter"] } +tempfile = "3.8.1" [target.'cfg(target_arch = "wasm32")'.dependencies] wasmtimer = "0.2.0" \ No newline at end of file diff --git a/ant-bootstrap/src/cache_store.rs b/ant-bootstrap/src/cache_store.rs index facd71490a..3d1e2c1732 100644 --- a/ant-bootstrap/src/cache_store.rs +++ b/ant-bootstrap/src/cache_store.rs @@ -10,18 +10,16 @@ use crate::{ craft_valid_multiaddr, initial_peers::PeersArgs, multiaddr_get_peer_id, BootstrapAddr, BootstrapAddresses, BootstrapCacheConfig, Error, Result, }; -#[cfg(not(target_arch = "wasm32"))] -use fs2::FileExt; -use libp2p::multiaddr::Protocol; -use libp2p::{Multiaddr, PeerId}; +use atomic_write_file::AtomicWriteFile; +use libp2p::{multiaddr::Protocol, Multiaddr, PeerId}; use serde::{Deserialize, Serialize}; -use std::collections::hash_map::Entry; -use std::collections::HashMap; -use std::fs::{self, File, OpenOptions}; -use std::io::Read; -use std::path::PathBuf; -use std::time::{Duration, SystemTime}; -use tempfile::NamedTempFile; +use std::{ + collections::{hash_map::Entry, HashMap}, + fs::{self, OpenOptions}, + io::{Read, Write}, + path::PathBuf, + time::{Duration, SystemTime}, +}; #[derive(Debug, Clone, Serialize, Deserialize)] pub struct CacheData { @@ -194,7 +192,7 @@ impl BootstrapCacheStore { peers_arg .get_bootstrap_addr_and_initialize_cache(Some(self)) .await?; - self.sync_and_save_to_disk(true).await?; + self.sync_and_save_to_disk(true)?; Ok(()) } @@ -213,11 +211,6 @@ impl BootstrapCacheStore { .open(&cfg.cache_file_path) .inspect_err(|err| warn!("Failed to open cache file: {err}",))?; - // Acquire shared lock for reading - Self::acquire_shared_lock(&file).inspect_err(|err| { - warn!("Failed to acquire shared lock: {err}"); - })?; - // Read the file contents let mut contents = String::new(); file.read_to_string(&mut contents).inspect_err(|err| { @@ -327,11 +320,11 @@ impl BootstrapCacheStore { } /// Clear all peers from the cache and save to disk - pub async fn clear_peers_and_save(&mut self) -> Result<()> { + pub fn clear_peers_and_save(&mut self) -> Result<()> { self.data.peers.clear(); self.old_shared_state.peers.clear(); - match self.atomic_write().await { + match self.atomic_write() { Ok(_) => Ok(()), Err(e) => { error!("Failed to save cache to disk: {e}"); @@ -342,7 +335,7 @@ impl BootstrapCacheStore { /// Do not perform cleanup when `data` is fetched from the network. /// The SystemTime might not be accurate. - pub async fn sync_and_save_to_disk(&mut self, with_cleanup: bool) -> Result<()> { + pub fn sync_and_save_to_disk(&mut self, with_cleanup: bool) -> Result<()> { if self.config.disable_cache_writing { info!("Cache writing is disabled, skipping sync to disk"); return Ok(()); @@ -379,92 +372,34 @@ impl BootstrapCacheStore { } self.old_shared_state = self.data.clone(); - self.atomic_write().await.inspect_err(|e| { + self.atomic_write().inspect_err(|e| { error!("Failed to save cache to disk: {e}"); }) } - /// Acquire a shared lock on the cache file. - #[cfg(target_arch = "wasm32")] - fn acquire_shared_lock(_file: &File) -> Result<()> { - Ok(()) - } - - /// Acquire a shared lock on the cache file. - /// This is a no-op on WASM. - #[cfg(not(target_arch = "wasm32"))] - fn acquire_shared_lock(file: &File) -> Result<()> { - let file = file.try_clone()?; - file.try_lock_shared()?; - - Ok(()) - } - - /// Acquire an exclusive lock on the cache file. - /// This is a no-op on WASM. - #[cfg(target_arch = "wasm32")] - async fn acquire_exclusive_lock(_file: &File) -> Result<()> { - Ok(()) - } - - /// Acquire an exclusive lock on the cache file. - #[cfg(not(target_arch = "wasm32"))] - async fn acquire_exclusive_lock(file: &File) -> Result<()> { - let mut backoff = Duration::from_millis(10); - let max_attempts = 5; - let mut attempts = 0; - - loop { - match file.try_lock_exclusive() { - Ok(_) => return Ok(()), - Err(_) if attempts >= max_attempts => { - return Err(Error::LockError); - } - Err(e) if e.kind() == std::io::ErrorKind::WouldBlock => { - attempts += 1; - #[cfg(not(target_arch = "wasm32"))] - tokio::time::sleep(backoff).await; - #[cfg(target_arch = "wasm32")] - wasmtimer::tokio::sleep(backoff).await; - backoff *= 2; - } - Err(_) => return Err(Error::LockError), - } - } - } - - async fn atomic_write(&self) -> Result<()> { - info!("Writing cache to disk: {:?}", self.cache_path); + fn atomic_write(&self) -> Result<()> { + debug!("Writing cache to disk: {:?}", self.cache_path); // Create parent directory if it doesn't exist if let Some(parent) = self.cache_path.parent() { fs::create_dir_all(parent)?; } - // Create a temporary file in the same directory as the cache file - let temp_dir = std::env::temp_dir(); - let temp_file = NamedTempFile::new_in(&temp_dir)?; - - // Write data to temporary file - serde_json::to_writer_pretty(&temp_file, &self.data)?; - - // Open the target file with proper permissions - let file = OpenOptions::new() - .write(true) - .create(true) - .truncate(true) - .open(&self.cache_path)?; + let mut file = AtomicWriteFile::options() + .open(&self.cache_path) + .inspect_err(|err| { + error!("Failed to open cache file using AtomicWriteFile: {err}"); + })?; - // Acquire exclusive lock - Self::acquire_exclusive_lock(&file).await?; - - // Perform atomic rename - temp_file.persist(&self.cache_path).inspect_err(|err| { - error!("Failed to persist file with err: {err:?}"); + let data = serde_json::to_string_pretty(&self.data).inspect_err(|err| { + error!("Failed to serialize cache data: {err}"); + })?; + writeln!(file, "{data}")?; + file.commit().inspect_err(|err| { + error!("Failed to commit atomic write: {err}"); })?; info!("Cache written to disk: {:?}", self.cache_path); - // Lock will be automatically released when file is dropped Ok(()) } } @@ -497,7 +432,7 @@ mod tests { let peer_id = multiaddr_get_peer_id(&addr).unwrap(); store.data.insert(peer_id, BootstrapAddr::new(addr.clone())); } - store.sync_and_save_to_disk(true).await.unwrap(); + store.sync_and_save_to_disk(true).unwrap(); store.update_addr_status(&addr, true); diff --git a/ant-bootstrap/src/error.rs b/ant-bootstrap/src/error.rs index a8cb8e1cc8..77002702e5 100644 --- a/ant-bootstrap/src/error.rs +++ b/ant-bootstrap/src/error.rs @@ -26,8 +26,6 @@ pub enum Error { Json(#[from] serde_json::Error), #[error("HTTP error: {0}")] Http(#[from] reqwest::Error), - #[error("Persist error: {0}")] - Persist(#[from] tempfile::PersistError), #[error("Lock error")] LockError, } diff --git a/ant-bootstrap/src/initial_peers.rs b/ant-bootstrap/src/initial_peers.rs index 4bfa372276..6b1abd3a87 100644 --- a/ant-bootstrap/src/initial_peers.rs +++ b/ant-bootstrap/src/initial_peers.rs @@ -96,7 +96,7 @@ impl PeersArgs { info!("First node in network, no initial bootstrap peers"); if let Some(cache) = cache { info!("Clearing cache for 'first' node"); - cache.clear_peers_and_save().await?; + cache.clear_peers_and_save()?; } return Ok(vec![]); } diff --git a/ant-bootstrap/tests/cache_tests.rs b/ant-bootstrap/tests/cache_tests.rs index 360280aab5..17ddfafde4 100644 --- a/ant-bootstrap/tests/cache_tests.rs +++ b/ant-bootstrap/tests/cache_tests.rs @@ -59,7 +59,7 @@ async fn test_cache_persistence() -> Result<(), Box> { .parse()?; cache_store1.add_addr(addr.clone()); cache_store1.update_addr_status(&addr, true); - cache_store1.sync_and_save_to_disk(true).await.unwrap(); + cache_store1.sync_and_save_to_disk(true).unwrap(); // Create a new cache store with the same path let mut cache_store2 = BootstrapCacheStore::empty(config)?; diff --git a/ant-networking/src/driver.rs b/ant-networking/src/driver.rs index 8ce4c9c908..1b12a99071 100644 --- a/ant-networking/src/driver.rs +++ b/ant-networking/src/driver.rs @@ -1033,7 +1033,7 @@ impl SwarmDriver { continue; }; - if let Err(err) = bootstrap_cache.sync_and_save_to_disk(true).await { + if let Err(err) = bootstrap_cache.sync_and_save_to_disk(true) { error!("Failed to save bootstrap cache: {err}"); } From d89d6d2e005aee68ca94f0c8ee52762681607dec Mon Sep 17 00:00:00 2001 From: Roland Sherwin Date: Thu, 5 Dec 2024 15:51:01 +0530 Subject: [PATCH 16/21] feat(ci): enable bootstrap tests --- .github/workflows/merge.yml | 4 ++++ .github/workflows/nightly.yml | 8 ++++++++ 2 files changed, 12 insertions(+) diff --git a/.github/workflows/merge.yml b/.github/workflows/merge.yml index f306759803..564de2707e 100644 --- a/.github/workflows/merge.yml +++ b/.github/workflows/merge.yml @@ -127,6 +127,10 @@ jobs: timeout-minutes: 25 run: cargo test --release --package autonomi --lib --features="full,fs" + - name: Run bootstrap tests + timeout-minutes: 25 + run: cargo test --release --package ant-bootstrap + - name: Run node tests timeout-minutes: 25 run: cargo test --release --package ant-node --lib diff --git a/.github/workflows/nightly.yml b/.github/workflows/nightly.yml index 32870fff79..23a9b78f99 100644 --- a/.github/workflows/nightly.yml +++ b/.github/workflows/nightly.yml @@ -244,6 +244,14 @@ jobs: run: cargo test --release --lib --bins --no-run timeout-minutes: 30 + - name: Run autonomi tests + timeout-minutes: 25 + run: cargo test --release --package autonomi --lib --features="full,fs" + + - name: Run bootstrap tests + timeout-minutes: 25 + run: cargo test --release --package ant-bootstrap + - name: Run node tests timeout-minutes: 25 run: cargo test --release --package ant-node --lib From c70ee45c637d9071fb9c2d4ad124477c0503f7e3 Mon Sep 17 00:00:00 2001 From: Roland Sherwin Date: Thu, 5 Dec 2024 23:00:32 +0530 Subject: [PATCH 17/21] feat(bootstrap): rework the api to not hold persistant state --- ant-bootstrap/src/cache_store.rs | 165 +++++--------- ant-bootstrap/src/initial_peers.rs | 53 ++--- ant-bootstrap/src/lib.rs | 79 ++----- ant-bootstrap/tests/address_format_tests.rs | 226 +------------------ ant-bootstrap/tests/cache_tests.rs | 82 +------ ant-bootstrap/tests/cli_integration_tests.rs | 129 ++--------- ant-cli/src/access/network.rs | 2 +- ant-networking/src/driver.rs | 25 +- ant-node-manager/src/cmd/local.rs | 2 +- ant-node-manager/src/cmd/node.rs | 2 +- ant-node/src/bin/antnode/main.rs | 6 +- ant-node/src/node.rs | 2 +- 12 files changed, 148 insertions(+), 625 deletions(-) diff --git a/ant-bootstrap/src/cache_store.rs b/ant-bootstrap/src/cache_store.rs index 3d1e2c1732..6877baf9a4 100644 --- a/ant-bootstrap/src/cache_store.rs +++ b/ant-bootstrap/src/cache_store.rs @@ -7,8 +7,8 @@ // permissions and limitations relating to use of the SAFE Network Software. use crate::{ - craft_valid_multiaddr, initial_peers::PeersArgs, multiaddr_get_peer_id, BootstrapAddr, - BootstrapAddresses, BootstrapCacheConfig, Error, Result, + craft_valid_multiaddr, multiaddr_get_peer_id, BootstrapAddr, BootstrapAddresses, + BootstrapCacheConfig, Error, PeersArgs, Result, }; use atomic_write_file::AtomicWriteFile; use libp2p::{multiaddr::Protocol, Multiaddr, PeerId}; @@ -42,23 +42,17 @@ impl CacheData { } } - /// Sync the self cache with another cache by referencing our old_shared_state. - /// Since the cache is updated on periodic interval, we cannot just add our state with the shared state on the fs. - /// This would lead to race conditions, hence the need to store the old shared state in memory and sync it with the - /// new shared state obtained from fs. - pub fn sync(&mut self, old_shared_state: &CacheData, current_shared_state: &CacheData) { - // Add/sync every BootstrapAddresses from shared state into self - for (peer, current_shared_addrs_state) in current_shared_state.peers.iter() { - let old_shared_addrs_state = old_shared_state.peers.get(peer); + /// Sync the self cache with another cache. This would just add the 'other' state to self. + pub fn sync(&mut self, other: &CacheData) { + for (peer, other_addresses_state) in other.peers.iter() { let bootstrap_addresses = self .peers .entry(*peer) - .or_insert(current_shared_addrs_state.clone()); + .or_insert(other_addresses_state.clone()); - trace!("Syncing {peer:?} from fs with addrs count: {:?}, old state count: {:?}. Our in memory state count: {:?}", current_shared_addrs_state.0.len(), old_shared_addrs_state.map(|x| x.0.len()), bootstrap_addresses.0.len()); + trace!("Syncing {peer:?} from other with addrs count: {:?}. Our in memory state count: {:?}", other_addresses_state.0.len(), bootstrap_addresses.0.len()); - // Add/sync every BootstrapAddr into self - bootstrap_addresses.sync(old_shared_addrs_state, current_shared_addrs_state); + bootstrap_addresses.sync(other_addresses_state); } self.last_updated = SystemTime::now(); @@ -153,9 +147,6 @@ pub struct BootstrapCacheStore { pub(crate) cache_path: PathBuf, pub(crate) config: BootstrapCacheConfig, pub(crate) data: CacheData, - /// This is our last known state of the cache on disk, which is shared across all instances. - /// This is not updated until `sync_to_disk` is called. - pub(crate) old_shared_state: CacheData, } impl BootstrapCacheStore { @@ -182,24 +173,38 @@ impl BootstrapCacheStore { cache_path, config, data: CacheData::default(), - old_shared_state: CacheData::default(), }; Ok(store) } - pub async fn initialize_from_peers_arg(&mut self, peers_arg: &PeersArgs) -> Result<()> { - peers_arg - .get_bootstrap_addr_and_initialize_cache(Some(self)) - .await?; - self.sync_and_save_to_disk(true)?; - Ok(()) - } + /// Create a CacheStore from the given peers argument. + /// This also modifies the cfg if provided based on the PeersArgs. + /// And also performs some actions based on the PeersArgs. + pub fn empty_from_peers_args( + peers_arg: &PeersArgs, + cfg: Option, + ) -> Result { + let config = if let Some(cfg) = cfg { + cfg + } else { + BootstrapCacheConfig::default_config()? + }; + let mut store = Self::empty(config)?; - pub fn initialize_from_local_cache(&mut self) -> Result<()> { - self.data = Self::load_cache_data(&self.config)?; - self.old_shared_state = self.data.clone(); - Ok(()) + // If it is the first node, clear the cache. + if peers_arg.first { + info!("First node in network, writing empty cache to disk"); + store.write()?; + } + + // If local mode is enabled, return empty store (will use mDNS) + if peers_arg.local || cfg!(feature = "local") { + info!("Setting config to not write to cache, as 'local' mode is enabled"); + store.config.disable_cache_writing = true; + } + + Ok(store) } /// Load cache data from disk @@ -232,7 +237,7 @@ impl BootstrapCacheStore { self.data.peers.len() } - pub fn get_addrs(&self) -> impl Iterator { + pub fn get_all_addrs(&self) -> impl Iterator { self.data .peers .values() @@ -240,20 +245,18 @@ impl BootstrapCacheStore { } /// Get a list containing single addr per peer. We use the least faulty addr for each peer. - pub fn get_unique_peer_addr(&self) -> impl Iterator { - self.data + /// This list is sorted by the failure rate of the addr. + pub fn get_sorted_addrs(&self) -> impl Iterator { + let mut addrs = self + .data .peers .values() .flat_map(|bootstrap_addresses| bootstrap_addresses.get_least_faulty()) - .map(|bootstrap_addr| &bootstrap_addr.addr) - } + .collect::>(); - pub fn get_reliable_addrs(&self) -> impl Iterator { - self.data - .peers - .values() - .flat_map(|bootstrap_addresses| bootstrap_addresses.0.iter()) - .filter(|bootstrap_addr| bootstrap_addr.is_reliable()) + addrs.sort_by_key(|addr| addr.failure_rate() as u64); + + addrs.into_iter().map(|addr| &addr.addr) } /// Update the status of an addr in the cache. The peer must be added to the cache first. @@ -319,49 +322,21 @@ impl BootstrapCacheStore { self.data.perform_cleanup(&self.config); } - /// Clear all peers from the cache and save to disk - pub fn clear_peers_and_save(&mut self) -> Result<()> { - self.data.peers.clear(); - self.old_shared_state.peers.clear(); - - match self.atomic_write() { - Ok(_) => Ok(()), - Err(e) => { - error!("Failed to save cache to disk: {e}"); - Err(e) - } - } - } - - /// Do not perform cleanup when `data` is fetched from the network. - /// The SystemTime might not be accurate. - pub fn sync_and_save_to_disk(&mut self, with_cleanup: bool) -> Result<()> { + /// Flush the cache to disk after syncing with the CacheData from the file. + /// Do not perform cleanup when `data` is fetched from the network. The SystemTime might not be accurate. + pub fn sync_and_flush_to_disk(&mut self, with_cleanup: bool) -> Result<()> { if self.config.disable_cache_writing { info!("Cache writing is disabled, skipping sync to disk"); return Ok(()); } info!( - "Syncing cache to disk, with data containing: {} peers and old state containing: {} peers", self.data.peers.len(), - self.old_shared_state.peers.len() + "Flushing cache to disk, with data containing: {} peers", + self.data.peers.len(), ); - // Check if the file is read-only before attempting to write - let is_readonly = self - .cache_path - .metadata() - .map(|m| m.permissions().readonly()) - .unwrap_or(false); - - if is_readonly { - warn!("Cannot save to disk: cache file is read-only"); - // todo return err - return Ok(()); - } - if let Ok(data_from_file) = Self::load_cache_data(&self.config) { - self.data.sync(&self.old_shared_state, &data_from_file); - // Now the synced version is the old_shared_state + self.data.sync(&data_from_file); } else { warn!("Failed to load cache data from file, overwriting with new data"); } @@ -370,14 +345,20 @@ impl BootstrapCacheStore { self.data.perform_cleanup(&self.config); self.data.try_remove_oldest_peers(&self.config); } - self.old_shared_state = self.data.clone(); - self.atomic_write().inspect_err(|e| { + self.write().inspect_err(|e| { error!("Failed to save cache to disk: {e}"); - }) + })?; + + // Flush after writing + self.data.peers.clear(); + + Ok(()) } - fn atomic_write(&self) -> Result<()> { + /// Write the cache to disk atomically. This will overwrite the existing cache file, use sync_and_flush_to_disk to + /// sync with the file first. + pub fn write(&self) -> Result<()> { debug!("Writing cache to disk: {:?}", self.cache_path); // Create parent directory if it doesn't exist if let Some(parent) = self.cache_path.parent() { @@ -419,30 +400,6 @@ mod tests { (store.clone(), store.cache_path.clone()) } - #[tokio::test] - async fn test_peer_update_and_save() { - let (mut store, _) = create_test_store().await; - let addr: Multiaddr = - "/ip4/127.0.0.1/tcp/8080/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" - .parse() - .unwrap(); - - // Manually add a peer without using fallback - { - let peer_id = multiaddr_get_peer_id(&addr).unwrap(); - store.data.insert(peer_id, BootstrapAddr::new(addr.clone())); - } - store.sync_and_save_to_disk(true).unwrap(); - - store.update_addr_status(&addr, true); - - let peers = store.get_addrs().collect::>(); - assert_eq!(peers.len(), 1); - assert_eq!(peers[0].addr, addr); - assert_eq!(peers[0].success_count, 1); - assert_eq!(peers[0].failure_count, 0); - } - #[tokio::test] async fn test_peer_cleanup() { let (mut store, _) = create_test_store().await; @@ -471,7 +428,7 @@ mod tests { store.perform_cleanup(); // Get all peers (not just reliable ones) - let peers = store.get_addrs().collect::>(); + let peers = store.get_all_addrs().collect::>(); assert_eq!(peers.len(), 1); assert_eq!(peers[0].addr, good_addr); } @@ -495,7 +452,7 @@ mod tests { store.perform_cleanup(); // Verify peer is still there - let peers = store.get_addrs().collect::>(); + let peers = store.get_all_addrs().collect::>(); assert_eq!(peers.len(), 1); assert_eq!(peers[0].addr, addr); } diff --git a/ant-bootstrap/src/initial_peers.rs b/ant-bootstrap/src/initial_peers.rs index 6b1abd3a87..32a19e6398 100644 --- a/ant-bootstrap/src/initial_peers.rs +++ b/ant-bootstrap/src/initial_peers.rs @@ -73,41 +73,34 @@ impl PeersArgs { /// 2. Addresses from environment variable SAFE_PEERS /// 3. Addresses from cache /// 4. Addresses from network contacts URL - pub async fn get_bootstrap_addr(&self) -> Result> { - self.get_bootstrap_addr_and_initialize_cache(None).await - } - - pub async fn get_addrs(&self) -> Result> { + pub async fn get_addrs(&self, config: Option) -> Result> { Ok(self - .get_bootstrap_addr() + .get_bootstrap_addr(config) .await? .into_iter() .map(|addr| addr.addr) .collect()) } - /// Helper function to fetch bootstrap addresses and initialize cache based on the passed in args. - pub(crate) async fn get_bootstrap_addr_and_initialize_cache( + /// Get bootstrap peers + /// Order of precedence: + /// 1. Addresses from arguments + /// 2. Addresses from environment variable SAFE_PEERS + /// 3. Addresses from cache + /// 4. Addresses from network contacts URL + pub async fn get_bootstrap_addr( &self, - mut cache: Option<&mut BootstrapCacheStore>, + config: Option, ) -> Result> { // If this is the first node, return an empty list if self.first { info!("First node in network, no initial bootstrap peers"); - if let Some(cache) = cache { - info!("Clearing cache for 'first' node"); - cache.clear_peers_and_save()?; - } return Ok(vec![]); } // If local mode is enabled, return empty store (will use mDNS) if self.local || cfg!(feature = "local") { info!("Local mode enabled, using only local discovery."); - if let Some(cache) = cache { - info!("Setting config to not write to cache, as 'local' mode is enabled"); - cache.config.disable_cache_writing = true; - } return Ok(vec![]); } @@ -145,32 +138,20 @@ impl PeersArgs { // Return here if we fetched peers from the args if !bootstrap_addresses.is_empty() { - if let Some(cache) = cache.as_mut() { - info!("Initializing cache with bootstrap addresses from arguments"); - for addr in &bootstrap_addresses { - cache.add_addr(addr.addr.clone()); - } - } + bootstrap_addresses.sort_by_key(|addr| addr.failure_rate() as u64); return Ok(bootstrap_addresses); } // load from cache if present - if !self.ignore_cache { - let cfg = if let Some(cache) = cache.as_ref() { - Some(cache.config.clone()) + let cfg = if let Some(config) = config { + Some(config) } else { BootstrapCacheConfig::default_config().ok() }; if let Some(cfg) = cfg { info!("Loading bootstrap addresses from cache"); if let Ok(data) = BootstrapCacheStore::load_cache_data(&cfg) { - if let Some(cache) = cache.as_mut() { - info!("Initializing cache with bootstrap addresses from cache"); - cache.data = data.clone(); - cache.old_shared_state = data.clone(); - } - bootstrap_addresses = data .peers .into_iter() @@ -186,22 +167,18 @@ impl PeersArgs { } if !bootstrap_addresses.is_empty() { + bootstrap_addresses.sort_by_key(|addr| addr.failure_rate() as u64); return Ok(bootstrap_addresses); } if !self.disable_mainnet_contacts { let contacts_fetcher = ContactsFetcher::with_mainnet_endpoints()?; let addrs = contacts_fetcher.fetch_bootstrap_addresses().await?; - if let Some(cache) = cache.as_mut() { - info!("Initializing cache with bootstrap addresses from mainnet contacts"); - for addr in addrs.iter() { - cache.add_addr(addr.addr.clone()); - } - } bootstrap_addresses = addrs; } if !bootstrap_addresses.is_empty() { + bootstrap_addresses.sort_by_key(|addr| addr.failure_rate() as u64); Ok(bootstrap_addresses) } else { error!("No initial bootstrap peers found through any means"); diff --git a/ant-bootstrap/src/lib.rs b/ant-bootstrap/src/lib.rs index 849901edf1..45379d0f6b 100644 --- a/ant-bootstrap/src/lib.rs +++ b/ant-bootstrap/src/lib.rs @@ -17,30 +17,6 @@ //! - Concurrent Access: File locking for safe multi-process access //! - Atomic Operations: Safe cache updates using atomic file operations //! - Initial Peer Discovery: Fallback web endpoints for new/stale cache scenarios -//! -//! # Example -//! -//! ```no_run -//! use ant_bootstrap::{BootstrapCacheStore, BootstrapCacheConfig, PeersArgs}; -//! use url::Url; -//! -//! # async fn example() -> Result<(), Box> { -//! let config = BootstrapCacheConfig::empty(); -//! let args = PeersArgs { -//! first: false, -//! addrs: vec![], -//! network_contacts_url: Some(Url::parse("https://example.com/peers")?), -//! local: false, -//! disable_mainnet_contacts: false, -//! ignore_cache: false, -//! }; -//! -//! let mut store = BootstrapCacheStore::empty(config)?; -//! store.initialize_from_peers_arg(&args).await?; -//! let addrs = store.get_addrs(); -//! # Ok(()) -//! # } -//! ``` #[macro_use] extern crate tracing; @@ -103,7 +79,7 @@ pub struct BootstrapAddresses(pub Vec); impl BootstrapAddresses { pub fn insert_addr(&mut self, addr: &BootstrapAddr) { if let Some(bootstrap_addr) = self.get_addr_mut(&addr.addr) { - bootstrap_addr.sync(None, addr); + bootstrap_addr.sync(addr); } else { self.0.push(addr.clone()); } @@ -136,19 +112,16 @@ impl BootstrapAddresses { } } - pub fn sync(&mut self, old_shared_state: Option<&Self>, current_shared_state: &Self) { - for current_bootstrap_addr in current_shared_state.0.iter() { - if let Some(bootstrap_addr) = self.get_addr_mut(¤t_bootstrap_addr.addr) { - let old_bootstrap_addr = old_shared_state.and_then(|old_shared_state| { - old_shared_state.get_addr(¤t_bootstrap_addr.addr) - }); - bootstrap_addr.sync(old_bootstrap_addr, current_bootstrap_addr); + pub fn sync(&mut self, other: &Self) { + for other_addr in other.0.iter() { + if let Some(bootstrap_addr) = self.get_addr_mut(&other_addr.addr) { + bootstrap_addr.sync(other_addr); } else { trace!( - "Addr {:?} from fs not found in memory, inserting it.", - current_bootstrap_addr.addr + "Addr {:?} from other not found in self, inserting it.", + other_addr.addr ); - self.insert_addr(current_bootstrap_addr); + self.insert_addr(other_addr); } } } @@ -214,37 +187,15 @@ impl BootstrapAddr { self.success_count >= self.failure_count } - /// If the peer has a old state, just update the difference in values - /// If the peer has no old state, add the values - pub fn sync(&mut self, old_shared_state: Option<&Self>, current_shared_state: &Self) { - trace!("Syncing addr {:?} with old_shared_state: {old_shared_state:?} and current_shared_state: {current_shared_state:?}. Our in-memory state {self:?}", self.addr); - if self.last_seen == current_shared_state.last_seen { + /// Add the values from other into self. + pub fn sync(&mut self, other: &Self) { + trace!("Syncing our state {self:?} with and other: {other:?}."); + if self.last_seen == other.last_seen { return; } - if let Some(old_shared_state) = old_shared_state { - let success_difference = self - .success_count - .saturating_sub(old_shared_state.success_count); - - self.success_count = current_shared_state - .success_count - .saturating_add(success_difference); - - let failure_difference = self - .failure_count - .saturating_sub(old_shared_state.failure_count); - self.failure_count = current_shared_state - .failure_count - .saturating_add(failure_difference); - } else { - self.success_count = self - .success_count - .saturating_add(current_shared_state.success_count); - self.failure_count = self - .failure_count - .saturating_add(current_shared_state.failure_count); - } + self.success_count = self.success_count.saturating_add(other.success_count); + self.failure_count = self.failure_count.saturating_add(other.failure_count); // if at max value, reset to 0 if self.success_count == u32::MAX { @@ -254,7 +205,7 @@ impl BootstrapAddr { self.failure_count = 1; self.success_count = 0; } - self.last_seen = std::cmp::max(self.last_seen, current_shared_state.last_seen); + self.last_seen = std::cmp::max(self.last_seen, other.last_seen); trace!("Successfully synced BootstrapAddr: {self:?}"); } diff --git a/ant-bootstrap/tests/address_format_tests.rs b/ant-bootstrap/tests/address_format_tests.rs index 9673991237..55d9246b8b 100644 --- a/ant-bootstrap/tests/address_format_tests.rs +++ b/ant-bootstrap/tests/address_format_tests.rs @@ -6,7 +6,7 @@ // KIND, either express or implied. Please review the Licences for the specific language governing // permissions and limitations relating to use of the SAFE Network Software. -use ant_bootstrap::{BootstrapCacheConfig, BootstrapCacheStore, PeersArgs}; +use ant_bootstrap::{BootstrapCacheConfig, PeersArgs}; use ant_logging::LogBuilder; use libp2p::Multiaddr; use tempfile::TempDir; @@ -40,7 +40,7 @@ async fn test_multiaddr_format_parsing() -> Result<(), Box()?; let args = PeersArgs { first: false, @@ -51,9 +51,7 @@ async fn test_multiaddr_format_parsing() -> Result<(), Box>(); + let bootstrap_addresses = args.get_bootstrap_addr(None).await?; assert_eq!(bootstrap_addresses.len(), 1, "Should have one peer"); assert_eq!( bootstrap_addresses[0].addr, addr, @@ -68,7 +66,7 @@ async fn test_multiaddr_format_parsing() -> Result<(), Box Result<(), Box> { let _guard = LogBuilder::init_single_threaded_tokio_test("address_format_tests", false); - let (_temp_dir, config) = setup().await; + let (_temp_dir, _config) = setup().await; // Create a mock server with network contacts format let mock_server = MockServer::start().await; @@ -90,17 +88,15 @@ async fn test_network_contacts_format() -> Result<(), Box ignore_cache: false, }; - let mut store = BootstrapCacheStore::empty(config)?; - store.initialize_from_peers_arg(&args).await?; - let adddrs = store.get_addrs().collect::>(); + let addrs = args.get_bootstrap_addr(None).await?; assert_eq!( - adddrs.len(), + addrs.len(), 2, "Should have two peers from network contacts" ); // Verify address formats - for addr in adddrs { + for addr in addrs { let addr_str = addr.addr.to_string(); assert!(addr_str.contains("/ip4/"), "Should have IPv4 address"); assert!(addr_str.contains("/udp/"), "Should have UDP port"); @@ -110,211 +106,3 @@ async fn test_network_contacts_format() -> Result<(), Box Ok(()) } - -#[tokio::test] -async fn test_socket_addr_format() -> Result<(), Box> { - let _guard = LogBuilder::init_single_threaded_tokio_test("address_format_tests", false); - - let temp_dir = TempDir::new()?; - let cache_path = temp_dir.path().join("cache.json"); - - let args = PeersArgs { - first: false, - addrs: vec![], - network_contacts_url: None, - local: true, // Use local mode to avoid getting peers from default endpoints - disable_mainnet_contacts: false, - ignore_cache: false, - }; - - let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); - - let mut store = BootstrapCacheStore::empty(config)?; - store.initialize_from_peers_arg(&args).await?; - let addrs = store.get_addrs().collect::>(); - assert!(addrs.is_empty(), "Should have no peers in local mode"); - - Ok(()) -} - -#[tokio::test] -async fn test_multiaddr_format() -> Result<(), Box> { - let _guard = LogBuilder::init_single_threaded_tokio_test("address_format_tests", false); - - let temp_dir = TempDir::new()?; - let cache_path = temp_dir.path().join("cache.json"); - - let args = PeersArgs { - first: false, - addrs: vec![], - network_contacts_url: None, - local: true, // Use local mode to avoid getting peers from default endpoints - disable_mainnet_contacts: false, - ignore_cache: false, - }; - - let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); - - let mut store = BootstrapCacheStore::empty(config)?; - store.initialize_from_peers_arg(&args).await?; - let addrs = store.get_addrs().collect::>(); - assert!(addrs.is_empty(), "Should have no peers in local mode"); - - Ok(()) -} - -#[tokio::test] -async fn test_invalid_addr_format() -> Result<(), Box> { - let _guard = LogBuilder::init_single_threaded_tokio_test("address_format_tests", false); - - let temp_dir = TempDir::new()?; - let cache_path = temp_dir.path().join("cache.json"); - - let args = PeersArgs { - first: false, - addrs: vec![], - network_contacts_url: None, - local: true, // Use local mode to avoid getting peers from default endpoints - disable_mainnet_contacts: false, - ignore_cache: false, - }; - - let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); - - let mut store = BootstrapCacheStore::empty(config)?; - store.initialize_from_peers_arg(&args).await?; - let addrs = store.get_addrs().collect::>(); - assert!(addrs.is_empty(), "Should have no peers in local mode"); - - Ok(()) -} - -#[tokio::test] -async fn test_mixed_addr_formats() -> Result<(), Box> { - let _guard = LogBuilder::init_single_threaded_tokio_test("address_format_tests", false); - - let temp_dir = TempDir::new()?; - let cache_path = temp_dir.path().join("cache.json"); - - let args = PeersArgs { - first: false, - addrs: vec![], - network_contacts_url: None, - local: true, // Use local mode to avoid getting peers from default endpoints - disable_mainnet_contacts: false, - ignore_cache: false, - }; - - let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); - - let mut store = BootstrapCacheStore::empty(config)?; - store.initialize_from_peers_arg(&args).await?; - let addrs = store.get_addrs().collect::>(); - assert!(addrs.is_empty(), "Should have no peers in local mode"); - - Ok(()) -} - -#[tokio::test] -async fn test_socket_addr_conversion() -> Result<(), Box> { - let _guard = LogBuilder::init_single_threaded_tokio_test("address_format_tests", false); - - let temp_dir = TempDir::new()?; - let cache_path = temp_dir.path().join("cache.json"); - - let args = PeersArgs { - first: false, - addrs: vec![], - network_contacts_url: None, - local: true, // Use local mode to avoid getting peers from default endpoints - disable_mainnet_contacts: false, - ignore_cache: false, - }; - - let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); - - let mut store = BootstrapCacheStore::empty(config)?; - store.initialize_from_peers_arg(&args).await?; - let addrs = store.get_addrs().collect::>(); - assert!(addrs.is_empty(), "Should have no peers in local mode"); - - Ok(()) -} - -#[tokio::test] -async fn test_invalid_socket_addr() -> Result<(), Box> { - let _guard = LogBuilder::init_single_threaded_tokio_test("address_format_tests", false); - - let temp_dir = TempDir::new()?; - let cache_path = temp_dir.path().join("cache.json"); - - let args = PeersArgs { - first: false, - addrs: vec![], - network_contacts_url: None, - local: true, // Use local mode to avoid getting peers from default endpoints - disable_mainnet_contacts: false, - ignore_cache: false, - }; - - let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); - - let mut store = BootstrapCacheStore::empty(config)?; - store.initialize_from_peers_arg(&args).await?; - let addrs = store.get_addrs().collect::>(); - assert!(addrs.is_empty(), "Should have no peers in local mode"); - - Ok(()) -} - -#[tokio::test] -async fn test_invalid_multiaddr() -> Result<(), Box> { - let _guard = LogBuilder::init_single_threaded_tokio_test("address_format_tests", false); - - let temp_dir = TempDir::new()?; - let cache_path = temp_dir.path().join("cache.json"); - - let args = PeersArgs { - first: false, - addrs: vec![], - network_contacts_url: None, - local: true, // Use local mode to avoid getting peers from default endpoints - disable_mainnet_contacts: false, - ignore_cache: false, - }; - - let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); - - let mut store = BootstrapCacheStore::empty(config)?; - store.initialize_from_peers_arg(&args).await?; - let addrs = store.get_addrs().collect::>(); - assert!(addrs.is_empty(), "Should have no peers in local mode"); - - Ok(()) -} - -#[tokio::test] -async fn test_mixed_valid_invalid_addrs() -> Result<(), Box> { - let _guard = LogBuilder::init_single_threaded_tokio_test("address_format_tests", false); - - let temp_dir = TempDir::new()?; - let cache_path = temp_dir.path().join("cache.json"); - - let args = PeersArgs { - first: false, - addrs: vec![], - network_contacts_url: None, - local: true, // Use local mode to avoid getting peers from default endpoints - disable_mainnet_contacts: false, - ignore_cache: false, - }; - - let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); - - let mut store = BootstrapCacheStore::empty(config)?; - store.initialize_from_peers_arg(&args).await?; - let addrs = store.get_addrs().collect::>(); - assert!(addrs.is_empty(), "Should have no peers in local mode"); - - Ok(()) -} diff --git a/ant-bootstrap/tests/cache_tests.rs b/ant-bootstrap/tests/cache_tests.rs index 17ddfafde4..429e6be54a 100644 --- a/ant-bootstrap/tests/cache_tests.rs +++ b/ant-bootstrap/tests/cache_tests.rs @@ -32,88 +32,16 @@ async fn test_cache_store_operations() -> Result<(), Box> cache_store.add_addr(addr.clone()); cache_store.update_addr_status(&addr, true); - let addrs = cache_store.get_reliable_addrs().collect::>(); + let addrs = cache_store.get_sorted_addrs().collect::>(); assert!(!addrs.is_empty(), "Cache should contain the added peer"); assert!( - addrs.iter().any(|p| p.addr == addr), + addrs.iter().any(|&a| a == &addr), "Cache should contain our specific peer" ); Ok(()) } -#[tokio::test] -async fn test_cache_persistence() -> Result<(), Box> { - let _guard = LogBuilder::init_single_threaded_tokio_test("cache_tests", false); - let temp_dir = TempDir::new()?; - let cache_path = temp_dir.path().join("cache.json"); - - // Create first cache store - let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); - - let mut cache_store1 = BootstrapCacheStore::empty(config.clone())?; - - // Add a peer and mark it as reliable - let addr: Multiaddr = - "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" - .parse()?; - cache_store1.add_addr(addr.clone()); - cache_store1.update_addr_status(&addr, true); - cache_store1.sync_and_save_to_disk(true).unwrap(); - - // Create a new cache store with the same path - let mut cache_store2 = BootstrapCacheStore::empty(config)?; - cache_store2.initialize_from_local_cache().unwrap(); - let addrs = cache_store2.get_reliable_addrs().collect::>(); - - assert!(!addrs.is_empty(), "Cache should persist across instances"); - assert!( - addrs.iter().any(|p| p.addr == addr), - "Specific peer should persist" - ); - - Ok(()) -} - -#[tokio::test] -async fn test_cache_reliability_tracking() -> Result<(), Box> { - let _guard = LogBuilder::init_single_threaded_tokio_test("cache_tests", false); - let temp_dir = TempDir::new()?; - let cache_path = temp_dir.path().join("cache.json"); - - let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); - let mut cache_store = BootstrapCacheStore::empty(config)?; - - let addr: Multiaddr = - "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" - .parse()?; - cache_store.add_addr(addr.clone()); - - // Test successful connections - for _ in 0..3 { - cache_store.update_addr_status(&addr, true); - } - - let addrs = cache_store.get_reliable_addrs().collect::>(); - assert!( - addrs.iter().any(|p| p.addr == addr), - "Address should be reliable after successful connections" - ); - - // Test failed connections - for _ in 0..5 { - cache_store.update_addr_status(&addr, false); - } - - let addrs = cache_store.get_reliable_addrs().collect::>(); - assert!( - !addrs.iter().any(|p| p.addr == addr), - "Address should not be reliable after failed connections" - ); - - Ok(()) -} - #[tokio::test] async fn test_cache_max_peers() -> Result<(), Box> { let _guard = LogBuilder::init_single_threaded_tokio_test("cache_tests", false); @@ -137,7 +65,7 @@ async fn test_cache_max_peers() -> Result<(), Box> { sleep(Duration::from_millis(100)).await; } - let addrs = cache_store.get_addrs().collect::>(); + let addrs = cache_store.get_all_addrs().collect::>(); assert_eq!(addrs.len(), 2, "Cache should respect max_peers limit"); // Get the addresses of the peers we have @@ -181,12 +109,12 @@ async fn test_cache_file_corruption() -> Result<(), Box> // Create a new cache store - it should handle the corruption gracefully let mut new_cache_store = BootstrapCacheStore::empty(config)?; - let addrs = new_cache_store.get_addrs().collect::>(); + let addrs = new_cache_store.get_all_addrs().collect::>(); assert!(addrs.is_empty(), "Cache should be empty after corruption"); // Should be able to add peers again new_cache_store.add_addr(addr); - let addrs = new_cache_store.get_addrs().collect::>(); + let addrs = new_cache_store.get_all_addrs().collect::>(); assert_eq!( addrs.len(), 1, diff --git a/ant-bootstrap/tests/cli_integration_tests.rs b/ant-bootstrap/tests/cli_integration_tests.rs index 3afd531b67..1afee9176e 100644 --- a/ant-bootstrap/tests/cli_integration_tests.rs +++ b/ant-bootstrap/tests/cli_integration_tests.rs @@ -6,12 +6,9 @@ // KIND, either express or implied. Please review the Licences for the specific language governing // permissions and limitations relating to use of the SAFE Network Software. -use ant_bootstrap::ANT_PEERS_ENV; -use ant_bootstrap::{BootstrapCacheConfig, BootstrapCacheStore, PeersArgs}; +use ant_bootstrap::{BootstrapCacheConfig, PeersArgs}; use ant_logging::LogBuilder; use libp2p::Multiaddr; -use std::env; -use std::fs; use tempfile::TempDir; use wiremock::{ matchers::{method, path}, @@ -40,9 +37,8 @@ async fn test_first_flag() -> Result<(), Box> { ignore_cache: false, }; - let mut store = BootstrapCacheStore::empty(config.clone())?; - store.initialize_from_peers_arg(&args).await?; - let addrs = store.get_addrs().collect::>(); + let addrs = args.get_addrs(Some(config)).await?; + assert!(addrs.is_empty(), "First node should have no addrs"); Ok(()) @@ -51,7 +47,7 @@ async fn test_first_flag() -> Result<(), Box> { #[tokio::test] async fn test_peer_argument() -> Result<(), Box> { let _guard = LogBuilder::init_single_threaded_tokio_test("cli_integration_tests", false); - let (_temp_dir, config) = setup().await; + let (_temp_dir, _config) = setup().await; let peer_addr: Multiaddr = "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" @@ -62,58 +58,14 @@ async fn test_peer_argument() -> Result<(), Box> { addrs: vec![peer_addr.clone()], network_contacts_url: None, local: false, - disable_mainnet_contacts: false, + disable_mainnet_contacts: true, ignore_cache: false, }; - let mut store = BootstrapCacheStore::empty(config.clone())?; - store.initialize_from_peers_arg(&args).await?; - let addrs = store.get_addrs().collect::>(); - assert_eq!(addrs.len(), 1, "Should have one addr"); - assert_eq!(addrs[0].addr, peer_addr, "Should have the correct address"); + let addrs = args.get_addrs(None).await?; - Ok(()) -} - -#[tokio::test] -async fn test_ant_peers_env() -> Result<(), Box> { - let _guard = LogBuilder::init_single_threaded_tokio_test("cli_integration_tests", false); - - let temp_dir = TempDir::new()?; - let cache_path = temp_dir.path().join("cache.json"); - - // Set ANT_PEERS_ENV environment variable - let addr = - "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE"; - env::set_var(ANT_PEERS_ENV, addr); - - let args = PeersArgs { - first: false, - addrs: vec![], - network_contacts_url: None, - local: false, - disable_mainnet_contacts: false, - ignore_cache: false, - }; - - let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); - - let mut store = BootstrapCacheStore::empty(config.clone())?; - store.initialize_from_peers_arg(&args).await?; - let addrs = store.get_addrs().collect::>(); - - // We should have multiple peers (env var + cache/endpoints) - assert!(!addrs.is_empty(), "Should have peers"); - - // Verify that our env var peer is included in the set - let has_env_peer = addrs.iter().any(|p| p.addr.to_string() == addr); - assert!( - has_env_peer, - "Should include the peer from ANT_PEERS_ENV var" - ); - - // Clean up - env::remove_var(ANT_PEERS_ENV); + assert_eq!(addrs.len(), 1, "Should have one addr"); + assert_eq!(addrs[0], peer_addr, "Should have the correct address"); Ok(()) } @@ -144,9 +96,7 @@ async fn test_network_contacts_fallback() -> Result<(), Box>(); + let addrs = args.get_addrs(Some(config)).await?; assert_eq!( addrs.len(), 2, @@ -176,9 +126,8 @@ async fn test_local_mode() -> Result<(), Box> { ignore_cache: false, }; - let mut store = BootstrapCacheStore::empty(config.clone())?; - store.initialize_from_peers_arg(&args).await?; - let addrs = store.get_addrs().collect::>(); + let addrs = args.get_addrs(Some(config)).await?; + assert!(addrs.is_empty(), "Local mode should have no peers"); // Verify cache was not touched @@ -208,65 +157,17 @@ async fn test_test_network_peers() -> Result<(), Box> { addrs: vec![peer_addr.clone()], network_contacts_url: None, local: false, - disable_mainnet_contacts: false, + disable_mainnet_contacts: true, ignore_cache: false, }; - let mut store = BootstrapCacheStore::empty(config.clone())?; - store.initialize_from_peers_arg(&args).await?; - let addrs = store.get_addrs().collect::>(); + let addrs = args.get_addrs(Some(config)).await?; + assert_eq!(addrs.len(), 1, "Should have exactly one test network peer"); assert_eq!( - addrs[0].addr, peer_addr, + addrs[0], peer_addr, "Should have the correct test network peer" ); - // Verify cache was updated - assert!( - cache_path.exists(), - "Cache file should not exist for test network" - ); - - Ok(()) -} - -#[tokio::test] -async fn test_peers_update_cache() -> Result<(), Box> { - let _guard = LogBuilder::init_single_threaded_tokio_test("cli_integration_tests", false); - - let temp_dir = TempDir::new()?; - let cache_path = temp_dir.path().join("cache.json"); - - // Create a peer address for testing - let peer_addr: Multiaddr = - "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" - .parse()?; - - let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); - - // Create args with peers but no test network mode - let args = PeersArgs { - first: false, - addrs: vec![peer_addr.clone()], - network_contacts_url: None, - local: false, - disable_mainnet_contacts: false, - ignore_cache: false, - }; - - let mut store = BootstrapCacheStore::empty(config.clone())?; - store.initialize_from_peers_arg(&args).await?; - let addrs = store.get_addrs().collect::>(); - assert_eq!(addrs.len(), 1, "Should have one peer"); - assert_eq!(addrs[0].addr, peer_addr, "Should have the correct peer"); - - // Verify cache was updated - assert!(cache_path.exists(), "Cache file should exist"); - let cache_contents = fs::read_to_string(&cache_path)?; - assert!( - cache_contents.contains(&peer_addr.to_string()), - "Cache should contain the peer address" - ); - Ok(()) } diff --git a/ant-cli/src/access/network.rs b/ant-cli/src/access/network.rs index 45f049e31f..acf7acfae6 100644 --- a/ant-cli/src/access/network.rs +++ b/ant-cli/src/access/network.rs @@ -13,7 +13,7 @@ use color_eyre::Result; use color_eyre::Section; pub async fn get_peers(peers: PeersArgs) -> Result> { - peers.get_addrs().await + peers.get_addrs(None).await .wrap_err("Please provide valid Network peers to connect to") .with_suggestion(|| format!("make sure you've provided network peers using the --peers option or the {ANT_PEERS_ENV} env var")) .with_suggestion(|| "a peer address looks like this: /ip4/42.42.42.42/udp/4242/quic-v1/p2p/B64nodePeerIDvdjb3FAJF4ks3moreBase64CharsHere") diff --git a/ant-networking/src/driver.rs b/ant-networking/src/driver.rs index 1b12a99071..125dc543f0 100644 --- a/ant-networking/src/driver.rs +++ b/ant-networking/src/driver.rs @@ -1032,10 +1032,26 @@ impl SwarmDriver { let Some(current_interval) = bootstrap_cache_save_interval.as_mut() else { continue; }; + let start = Instant::now(); - if let Err(err) = bootstrap_cache.sync_and_save_to_disk(true) { - error!("Failed to save bootstrap cache: {err}"); - } + let config = bootstrap_cache.config().clone(); + let mut old_cache = bootstrap_cache.clone(); + + let new = match BootstrapCacheStore::empty(config) { + Ok(new) => new, + Err(err) => { + error!("Failed to create a new empty cache: {err}"); + continue; + } + }; + *bootstrap_cache = new; + + // save the cache to disk + spawn(async move { + if let Err(err) = old_cache.sync_and_flush_to_disk(true) { + error!("Failed to save bootstrap cache: {err}"); + } + }); if current_interval.period() >= bootstrap_cache.config().max_cache_save_duration { continue; @@ -1057,6 +1073,9 @@ impl SwarmDriver { *current_interval = interval(new_duration); #[cfg(not(target_arch = "wasm32"))] current_interval.tick().await; + + trace!("Bootstrap cache synced in {:?}", start.elapsed()); + }, } } diff --git a/ant-node-manager/src/cmd/local.rs b/ant-node-manager/src/cmd/local.rs index f28f37d206..cdf0bd375c 100644 --- a/ant-node-manager/src/cmd/local.rs +++ b/ant-node-manager/src/cmd/local.rs @@ -72,7 +72,7 @@ pub async fn join( // If no peers are obtained we will attempt to join the existing local network, if one // is running. - let peers = match peers_args.get_addrs().await { + let peers = match peers_args.get_addrs(None).await { Ok(peers) => Some(peers), Err(err) => match err { ant_bootstrap::error::Error::NoBootstrapPeersFound => { diff --git a/ant-node-manager/src/cmd/node.rs b/ant-node-manager/src/cmd/node.rs index f4f6b67a48..d21de2b45e 100644 --- a/ant-node-manager/src/cmd/node.rs +++ b/ant-node-manager/src/cmd/node.rs @@ -117,7 +117,7 @@ pub async fn add( // If the `antnode` binary we're using has `network-contacts` enabled (which is the case for released binaries), // it's fine if the service definition doesn't call `antnode` with a `--peer` argument. let is_first = peers_args.first; - let bootstrap_peers = match peers_args.get_addrs().await { + let bootstrap_peers = match peers_args.get_addrs(None).await { Ok(peers) => { info!("Obtained peers of length {}", peers.len()); peers.into_iter().take(10).collect::>() diff --git a/ant-node/src/bin/antnode/main.rs b/ant-node/src/bin/antnode/main.rs index bfaa2b8aae..eff60ae043 100644 --- a/ant-node/src/bin/antnode/main.rs +++ b/ant-node/src/bin/antnode/main.rs @@ -265,8 +265,10 @@ fn main() -> Result<()> { init_logging(&opt, keypair.public().to_peer_id())?; let rt = Runtime::new()?; - let mut bootstrap_cache = BootstrapCacheStore::empty(BootstrapCacheConfig::default_config()?)?; - rt.block_on(bootstrap_cache.initialize_from_peers_arg(&opt.peers))?; + let bootstrap_cache = BootstrapCacheStore::empty_from_peers_args( + &opt.peers, + Some(BootstrapCacheConfig::default_config()?), + )?; let msg = format!( "Running {} v{}", diff --git a/ant-node/src/node.rs b/ant-node/src/node.rs index c3b2ab710c..9f5ac21bba 100644 --- a/ant-node/src/node.rs +++ b/ant-node/src/node.rs @@ -184,7 +184,7 @@ impl NodeBuilder { let initial_peers = if !self.initial_peers.is_empty() { self.initial_peers.clone() } else if let Some(cache) = &self.bootstrap_cache { - cache.get_unique_peer_addr().cloned().collect() + cache.get_sorted_addrs().cloned().collect() } else { vec![] }; From dfeac3bf5d6cd727efa22270e3cb86ec28cb6688 Mon Sep 17 00:00:00 2001 From: Roland Sherwin Date: Thu, 5 Dec 2024 23:27:33 +0530 Subject: [PATCH 18/21] chore(bootstrap): remove components related to serving the json --- ant-bootstrap/src/contacts.rs | 38 +---- ant-bootstrap/src/lib.rs | 34 ---- ant-bootstrap/tests/integration_tests.rs | 202 ----------------------- 3 files changed, 4 insertions(+), 270 deletions(-) delete mode 100644 ant-bootstrap/tests/integration_tests.rs diff --git a/ant-bootstrap/src/contacts.rs b/ant-bootstrap/src/contacts.rs index c984c789b1..83262fbc1a 100644 --- a/ant-bootstrap/src/contacts.rs +++ b/ant-bootstrap/src/contacts.rs @@ -6,7 +6,7 @@ // KIND, either express or implied. Please review the Licences for the specific language governing // permissions and limitations relating to use of the SAFE Network Software. -use crate::{craft_valid_multiaddr_from_str, BootstrapAddr, BootstrapEndpoints, Error, Result}; +use crate::{cache_store::CacheData, craft_valid_multiaddr_from_str, BootstrapAddr, Error, Result}; use futures::stream::{self, StreamExt}; use libp2p::Multiaddr; use reqwest::Client; @@ -230,7 +230,7 @@ impl ContactsFetcher { /// Try to parse a response from a endpoint fn try_parse_response(response: &str, ignore_peer_id: bool) -> Result> { - match serde_json::from_str::(response) { + match serde_json::from_str::(response) { Ok(json_endpoints) => { info!( "Successfully parsed JSON response with {} peers", @@ -239,8 +239,8 @@ impl ContactsFetcher { let bootstrap_addresses = json_endpoints .peers .into_iter() - .filter_map(|addr_str| { - craft_valid_multiaddr_from_str(&addr_str, ignore_peer_id) + .filter_map(|(_, addresses)| { + addresses.get_least_faulty().map(|addr| addr.addr.clone()) }) .collect::>(); @@ -436,34 +436,4 @@ mod tests { let fetcher = ContactsFetcher::with_endpoints(endpoints.clone()).unwrap(); assert_eq!(fetcher.endpoints, endpoints); } - - #[tokio::test] - async fn test_json_endpoints() { - let mock_server = MockServer::start().await; - - Mock::given(method("GET")) - .and(path("/")) - .respond_with(ResponseTemplate::new(200).set_body_string( - r#"{"peers": ["/ip4/127.0.0.1/tcp/8080/p2p/12D3KooWD2aV1f3qkhggzEFaJ24CEFYkSdZF5RKoMLpU6CwExYV5", "/ip4/127.0.0.2/tcp/8080/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE"]}"#, - )) - .mount(&mock_server) - .await; - - let mut fetcher = ContactsFetcher::new().unwrap(); - fetcher.endpoints = vec![mock_server.uri().parse().unwrap()]; - - let addrs = fetcher.fetch_bootstrap_addresses().await.unwrap(); - assert_eq!(addrs.len(), 2); - - let addr1: Multiaddr = - "/ip4/127.0.0.1/tcp/8080/p2p/12D3KooWD2aV1f3qkhggzEFaJ24CEFYkSdZF5RKoMLpU6CwExYV5" - .parse() - .unwrap(); - let addr2: Multiaddr = - "/ip4/127.0.0.2/tcp/8080/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" - .parse() - .unwrap(); - assert!(addrs.iter().any(|p| p.addr == addr1)); - assert!(addrs.iter().any(|p| p.addr == addr2)); - } } diff --git a/ant-bootstrap/src/lib.rs b/ant-bootstrap/src/lib.rs index 45379d0f6b..e7cfa21d8b 100644 --- a/ant-bootstrap/src/lib.rs +++ b/ant-bootstrap/src/lib.rs @@ -38,40 +38,6 @@ pub use contacts::ContactsFetcher; pub use error::{Error, Result}; pub use initial_peers::{PeersArgs, ANT_PEERS_ENV}; -/// Structure representing a list of bootstrap endpoints -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct BootstrapEndpoints { - /// List of peer multiaddresses - pub peers: Vec, - /// Optional metadata about the endpoints - #[serde(default)] - pub metadata: EndpointMetadata, -} - -/// Metadata about bootstrap endpoints -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct EndpointMetadata { - /// When the endpoints were last updated - #[serde(default = "default_last_updated")] - pub last_updated: String, - /// Optional description of the endpoints - #[serde(default)] - pub description: String, -} - -fn default_last_updated() -> String { - chrono::Utc::now().to_rfc3339() -} - -impl Default for EndpointMetadata { - fn default() -> Self { - Self { - last_updated: default_last_updated(), - description: String::new(), - } - } -} - #[derive(Debug, Clone, Serialize, Deserialize)] /// Set of addresses for a particular PeerId pub struct BootstrapAddresses(pub Vec); diff --git a/ant-bootstrap/tests/integration_tests.rs b/ant-bootstrap/tests/integration_tests.rs deleted file mode 100644 index 781330e305..0000000000 --- a/ant-bootstrap/tests/integration_tests.rs +++ /dev/null @@ -1,202 +0,0 @@ -// Copyright 2024 MaidSafe.net limited. -// -// This SAFE Network Software is licensed to you under The General Public License (GPL), version 3. -// Unless required by applicable law or agreed to in writing, the SAFE Network Software distributed -// under the GPL Licence is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. Please review the Licences for the specific language governing -// permissions and limitations relating to use of the SAFE Network Software. - -use ant_bootstrap::{BootstrapEndpoints, ContactsFetcher}; -use libp2p::Multiaddr; -use tracing_subscriber::{fmt, EnvFilter}; -use url::Url; -use wiremock::{ - matchers::{method, path}, - Mock, MockServer, ResponseTemplate, -}; - -// Initialize logging for tests -fn init_logging() { - let _ = fmt() - .with_env_filter(EnvFilter::from_default_env()) - .try_init(); -} - -#[tokio::test] -async fn test_fetch_from_amazon_s3() { - init_logging(); - let fetcher = ContactsFetcher::with_mainnet_endpoints().unwrap(); - let addrs = fetcher.fetch_bootstrap_addresses().await.unwrap(); - - // We should get some peers - assert!(!addrs.is_empty(), "Expected to find some peers from S3"); - - // Verify that all peers have valid multiaddresses - for addr in &addrs { - println!("Found peer: {}", addr.addr); - let addr_str = addr.addr.to_string(); - assert!(addr_str.contains("/ip4/"), "Expected IPv4 address"); - assert!(addr_str.contains("/udp/"), "Expected UDP port"); - assert!(addr_str.contains("/quic-v1/"), "Expected QUIC protocol"); - assert!(addr_str.contains("/p2p/"), "Expected peer ID"); - } -} - -#[tokio::test] -async fn test_individual_s3_endpoints() { - init_logging(); - - // Start a mock server - let mock_server = MockServer::start().await; - - // Create mock responses - let mock_response = r#"/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE -/ip4/127.0.0.2/udp/8081/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERF"#; - - // Mount the mock - Mock::given(method("GET")) - .and(path("/peers")) - .respond_with(ResponseTemplate::new(200).set_body_string(mock_response)) - .mount(&mock_server) - .await; - - let endpoint = format!("{}/peers", mock_server.uri()) - .parse::() - .unwrap(); - let fetcher = ContactsFetcher::with_endpoints(vec![endpoint.clone()]).unwrap(); - - match fetcher.fetch_bootstrap_addresses().await { - Ok(peers) => { - println!( - "Successfully fetched {} peers from {}", - peers.len(), - endpoint - ); - assert!( - !peers.is_empty(), - "Expected to find peers from {}", - endpoint - ); - - // Verify first peer's multiaddr format - if let Some(first_peer) = peers.first() { - let addr_str = first_peer.addr.to_string(); - println!("First peer from {}: {}", endpoint, addr_str); - assert!(addr_str.contains("/ip4/"), "Expected IPv4 address"); - assert!(addr_str.contains("/udp/"), "Expected UDP port"); - assert!(addr_str.contains("/quic-v1/"), "Expected QUIC protocol"); - assert!(addr_str.contains("/p2p/"), "Expected peer ID"); - - // Try to parse it back to ensure it's valid - assert!( - addr_str.parse::().is_ok(), - "Should be valid multiaddr" - ); - } - } - Err(e) => { - panic!("Failed to fetch peers from {}: {}", endpoint, e); - } - } -} - -#[tokio::test] -async fn test_response_format() { - init_logging(); - let fetcher = ContactsFetcher::with_mainnet_endpoints().unwrap(); - let addrs = fetcher.fetch_bootstrap_addresses().await.unwrap(); - - // Get the first peer to check format - let first_peer = addrs.first().expect("Expected at least one peer"); - let addr_str = first_peer.addr.to_string(); - - // Print the address for debugging - println!("First peer address: {}", addr_str); - - // Verify address components - let components: Vec<&str> = addr_str.split('/').collect(); - assert!(components.contains(&"ip4"), "Missing IP4 component"); - assert!(components.contains(&"udp"), "Missing UDP component"); - assert!(components.contains(&"quic-v1"), "Missing QUIC component"); - assert!( - components.iter().any(|&c| c == "p2p"), - "Missing P2P component" - ); - - // Ensure we can parse it back into a multiaddr - let parsed: Multiaddr = addr_str.parse().expect("Should be valid multiaddr"); - assert_eq!(parsed.to_string(), addr_str, "Multiaddr should round-trip"); -} - -#[tokio::test] -async fn test_json_endpoint_format() { - init_logging(); - let mock_server = MockServer::start().await; - - // Create a mock JSON response - let json_response = r#" - { - "peers": [ - "/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE", - "/ip4/127.0.0.2/udp/8081/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERF" - ], - "metadata": { - "description": "Test endpoints", - "last_updated": "2024-01-01T00:00:00Z" - } - } - "#; - - // Mount the mock - Mock::given(method("GET")) - .and(path("/")) // Use root path instead of /peers - .respond_with(ResponseTemplate::new(200).set_body_string(json_response)) - .mount(&mock_server) - .await; - - let endpoint = mock_server.uri().parse::().unwrap(); - let fetcher = ContactsFetcher::with_endpoints(vec![endpoint.clone()]).unwrap(); - - let addrs = fetcher.fetch_bootstrap_addresses().await.unwrap(); - assert_eq!(addrs.len(), 2); - - // Verify peer addresses - let addrs: Vec = addrs.iter().map(|p| p.addr.to_string()).collect(); - assert!(addrs.contains( - &"/ip4/127.0.0.1/udp/8080/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERE" - .to_string() - )); - assert!(addrs.contains( - &"/ip4/127.0.0.2/udp/8081/quic-v1/p2p/12D3KooWRBhwfeP2Y4TCx1SM6s9rUoHhR5STiGwxBhgFRcw3UERF" - .to_string() - )); -} - -#[tokio::test] -async fn test_s3_json_format() { - init_logging(); - - // Fetch and parse the bootstrap cache JSON - let response = - reqwest::get("https://sn-testnet.s3.eu-west-2.amazonaws.com/bootstrap_cache.json") - .await - .unwrap(); - let json_str = response.text().await.unwrap(); - - // Parse using our BootstrapEndpoints struct - let endpoints: BootstrapEndpoints = serde_json::from_str(&json_str).unwrap(); - - // Verify we got all the peers - assert_eq!(endpoints.peers.len(), 24); - - // Verify we can parse each peer address - for addrs in endpoints.peers { - addrs.parse::().unwrap(); - } - - // Verify metadata - assert_eq!( - endpoints.metadata.description, - "Safe Network testnet bootstrap cache" - ); -} From 56865c660ae0d18de60122ca85212508fc12d9a0 Mon Sep 17 00:00:00 2001 From: Roland Sherwin Date: Fri, 6 Dec 2024 15:08:13 +0530 Subject: [PATCH 19/21] feat(bootstrap): write bootstrap cache from the clients --- Cargo.lock | 1 + ant-bootstrap/src/cache_store.rs | 8 ++++---- ant-bootstrap/tests/cache_tests.rs | 8 ++++---- ant-networking/src/driver.rs | 2 +- ant-node/src/bin/antnode/main.rs | 4 +++- autonomi/Cargo.toml | 1 + autonomi/src/client/mod.rs | 12 +++++++++++- 7 files changed, 25 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f9324659bb..34ae07c699 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1539,6 +1539,7 @@ name = "autonomi" version = "0.2.4" dependencies = [ "alloy", + "ant-bootstrap", "ant-evm", "ant-logging", "ant-networking", diff --git a/ant-bootstrap/src/cache_store.rs b/ant-bootstrap/src/cache_store.rs index 6877baf9a4..a6f63b45d8 100644 --- a/ant-bootstrap/src/cache_store.rs +++ b/ant-bootstrap/src/cache_store.rs @@ -155,7 +155,7 @@ impl BootstrapCacheStore { } /// Create a empty CacheStore with the given configuration - pub fn empty(config: BootstrapCacheConfig) -> Result { + pub fn new(config: BootstrapCacheConfig) -> Result { info!("Creating new CacheStore with config: {:?}", config); let cache_path = config.cache_file_path.clone(); @@ -181,7 +181,7 @@ impl BootstrapCacheStore { /// Create a CacheStore from the given peers argument. /// This also modifies the cfg if provided based on the PeersArgs. /// And also performs some actions based on the PeersArgs. - pub fn empty_from_peers_args( + pub fn new_from_peers_args( peers_arg: &PeersArgs, cfg: Option, ) -> Result { @@ -190,7 +190,7 @@ impl BootstrapCacheStore { } else { BootstrapCacheConfig::default_config()? }; - let mut store = Self::empty(config)?; + let mut store = Self::new(config)?; // If it is the first node, clear the cache. if peers_arg.first { @@ -396,7 +396,7 @@ mod tests { let config = crate::BootstrapCacheConfig::empty().with_cache_path(&cache_file); - let store = BootstrapCacheStore::empty(config).unwrap(); + let store = BootstrapCacheStore::new(config).unwrap(); (store.clone(), store.cache_path.clone()) } diff --git a/ant-bootstrap/tests/cache_tests.rs b/ant-bootstrap/tests/cache_tests.rs index 429e6be54a..4dd9b6edf8 100644 --- a/ant-bootstrap/tests/cache_tests.rs +++ b/ant-bootstrap/tests/cache_tests.rs @@ -23,7 +23,7 @@ async fn test_cache_store_operations() -> Result<(), Box> // Create cache store with config let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); - let mut cache_store = BootstrapCacheStore::empty(config)?; + let mut cache_store = BootstrapCacheStore::new(config)?; // Test adding and retrieving peers let addr: Multiaddr = @@ -53,7 +53,7 @@ async fn test_cache_max_peers() -> Result<(), Box> { let mut config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); config.max_peers = 2; - let mut cache_store = BootstrapCacheStore::empty(config)?; + let mut cache_store = BootstrapCacheStore::new(config)?; // Add three peers with distinct timestamps let mut addresses = Vec::new(); @@ -94,7 +94,7 @@ async fn test_cache_file_corruption() -> Result<(), Box> // Create cache with some peers let config = BootstrapCacheConfig::empty().with_cache_path(&cache_path); - let mut cache_store = BootstrapCacheStore::empty(config.clone())?; + let mut cache_store = BootstrapCacheStore::new(config.clone())?; // Add a peer let addr: Multiaddr = @@ -108,7 +108,7 @@ async fn test_cache_file_corruption() -> Result<(), Box> tokio::fs::write(&cache_path, "invalid json content").await?; // Create a new cache store - it should handle the corruption gracefully - let mut new_cache_store = BootstrapCacheStore::empty(config)?; + let mut new_cache_store = BootstrapCacheStore::new(config)?; let addrs = new_cache_store.get_all_addrs().collect::>(); assert!(addrs.is_empty(), "Cache should be empty after corruption"); diff --git a/ant-networking/src/driver.rs b/ant-networking/src/driver.rs index 125dc543f0..9276c39237 100644 --- a/ant-networking/src/driver.rs +++ b/ant-networking/src/driver.rs @@ -1037,7 +1037,7 @@ impl SwarmDriver { let config = bootstrap_cache.config().clone(); let mut old_cache = bootstrap_cache.clone(); - let new = match BootstrapCacheStore::empty(config) { + let new = match BootstrapCacheStore::new(config) { Ok(new) => new, Err(err) => { error!("Failed to create a new empty cache: {err}"); diff --git a/ant-node/src/bin/antnode/main.rs b/ant-node/src/bin/antnode/main.rs index eff60ae043..a6d25b9cf5 100644 --- a/ant-node/src/bin/antnode/main.rs +++ b/ant-node/src/bin/antnode/main.rs @@ -265,10 +265,12 @@ fn main() -> Result<()> { init_logging(&opt, keypair.public().to_peer_id())?; let rt = Runtime::new()?; - let bootstrap_cache = BootstrapCacheStore::empty_from_peers_args( + let mut bootstrap_cache = BootstrapCacheStore::new_from_peers_args( &opt.peers, Some(BootstrapCacheConfig::default_config()?), )?; + // To create the file before startup if it doesn't exist. + bootstrap_cache.sync_and_flush_to_disk(true)?; let msg = format!( "Running {} v{}", diff --git a/autonomi/Cargo.toml b/autonomi/Cargo.toml index 2c2b4a7c79..2fc17a6d8e 100644 --- a/autonomi/Cargo.toml +++ b/autonomi/Cargo.toml @@ -27,6 +27,7 @@ vault = ["data", "registers"] websockets = ["ant-networking/websockets"] [dependencies] +ant-bootstrap = { path = "../ant-bootstrap", version = "0.1.0" } ant-evm = { path = "../ant-evm", version = "0.1.4" } ant-networking = { path = "../ant-networking", version = "0.19.5" } ant-protocol = { version = "0.17.15", path = "../ant-protocol" } diff --git a/autonomi/src/client/mod.rs b/autonomi/src/client/mod.rs index f039d097a0..9ccf33d716 100644 --- a/autonomi/src/client/mod.rs +++ b/autonomi/src/client/mod.rs @@ -34,6 +34,7 @@ pub mod wasm; // private module with utility functions mod utils; +use ant_bootstrap::{BootstrapCacheConfig, BootstrapCacheStore}; pub use ant_evm::Amount; use ant_networking::{interval, multiaddr_is_global, Network, NetworkBuilder, NetworkEvent}; @@ -132,7 +133,16 @@ impl Client { } fn build_client_and_run_swarm(local: bool) -> (Network, mpsc::Receiver) { - let network_builder = NetworkBuilder::new(Keypair::generate_ed25519(), local); + let mut network_builder = NetworkBuilder::new(Keypair::generate_ed25519(), local); + + if let Ok(mut config) = BootstrapCacheConfig::default_config() { + if local { + config.disable_cache_writing = true; + } + if let Ok(cache) = BootstrapCacheStore::new(config) { + network_builder.bootstrap_cache(cache); + } + } // TODO: Re-export `Receiver` from `ant-networking`. Else users need to keep their `tokio` dependency in sync. // TODO: Think about handling the mDNS error here. From 469e4965d9dd66317419915b0591729e37babb2b Mon Sep 17 00:00:00 2001 From: Roland Sherwin Date: Fri, 6 Dec 2024 16:52:32 +0530 Subject: [PATCH 20/21] chore(antctl): use PeersArg::local instead of a separate arg --- ant-bootstrap/src/initial_peers.rs | 6 +++--- ant-node-manager/src/bin/cli/main.rs | 8 +------- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/ant-bootstrap/src/initial_peers.rs b/ant-bootstrap/src/initial_peers.rs index 32a19e6398..07d0cd3b24 100644 --- a/ant-bootstrap/src/initial_peers.rs +++ b/ant-bootstrap/src/initial_peers.rs @@ -25,7 +25,7 @@ pub struct PeersArgs { /// /// If this argument is used, any others will be ignored because they do not apply to the first /// node. - #[clap(long)] + #[clap(long, default_value = "false")] pub first: bool, /// Addr(s) to use for bootstrap, in a 'multiaddr' format containing the peer ID. /// @@ -54,7 +54,7 @@ pub struct PeersArgs { /// Set to indicate this is a local network. You could also set the `local` feature flag to set this to true. /// /// This would use mDNS for peer discovery. - #[clap(long, conflicts_with = "network_contacts_url")] + #[clap(long, conflicts_with = "network_contacts_url", default_value = "false")] pub local: bool, /// Set to indicate this is a testnet. /// @@ -63,7 +63,7 @@ pub struct PeersArgs { pub disable_mainnet_contacts: bool, /// Set to not load the bootstrap addresses from the local cache. - #[clap(long)] + #[clap(long, default_value = "false")] pub ignore_cache: bool, } impl PeersArgs { diff --git a/ant-node-manager/src/bin/cli/main.rs b/ant-node-manager/src/bin/cli/main.rs index eee22641e3..14b84e55f7 100644 --- a/ant-node-manager/src/bin/cli/main.rs +++ b/ant-node-manager/src/bin/cli/main.rs @@ -131,11 +131,6 @@ pub enum SubCmd { /// This enables the use of antnode services from a home network with a router. #[clap(long)] home_network: bool, - /// Set this flag to launch antnode with the --local flag. - /// - /// This is useful for building a service-based local network. - #[clap(long)] - local: bool, /// Provide the path for the log directory for the installed node. /// /// This path is a prefix. Each installed node will have its own directory underneath it. @@ -1075,7 +1070,6 @@ async fn main() -> Result<()> { env_variables, evm_network, home_network, - local, log_dir_path, log_format, max_archived_log_files, @@ -1103,7 +1097,7 @@ async fn main() -> Result<()> { env_variables, Some(evm_network.try_into()?), home_network, - local, + peers.local, log_dir_path, log_format, max_archived_log_files, From 7bbccf5cc238271dce7a67fa1f1e9afd1e9584b3 Mon Sep 17 00:00:00 2001 From: Roland Sherwin Date: Fri, 6 Dec 2024 19:36:56 +0530 Subject: [PATCH 21/21] chore: update based on comments --- ant-bootstrap/src/cache_store.rs | 2 +- ant-networking/src/driver.rs | 7 ++----- ant-networking/src/event/swarm.rs | 18 +++++++++--------- ant-node/src/bin/antnode/main.rs | 2 ++ ant-node/src/error.rs | 2 -- ant-node/src/node.rs | 20 ++++---------------- 6 files changed, 18 insertions(+), 33 deletions(-) diff --git a/ant-bootstrap/src/cache_store.rs b/ant-bootstrap/src/cache_store.rs index a6f63b45d8..c435fbec23 100644 --- a/ant-bootstrap/src/cache_store.rs +++ b/ant-bootstrap/src/cache_store.rs @@ -178,7 +178,7 @@ impl BootstrapCacheStore { Ok(store) } - /// Create a CacheStore from the given peers argument. + /// Create a empty CacheStore from the given peers argument. /// This also modifies the cfg if provided based on the PeersArgs. /// And also performs some actions based on the PeersArgs. pub fn new_from_peers_args( diff --git a/ant-networking/src/driver.rs b/ant-networking/src/driver.rs index 9276c39237..3c14874823 100644 --- a/ant-networking/src/driver.rs +++ b/ant-networking/src/driver.rs @@ -1062,11 +1062,8 @@ impl SwarmDriver { Self::duration_with_variance(bootstrap_cache.config().max_cache_save_duration, 1); // scale up the interval until we reach the max - let new_duration = Duration::from_secs( - std::cmp::min( - current_interval.period().as_secs() * bootstrap_cache.config().cache_save_scaling_factor, - max_cache_save_duration.as_secs(), - )); + let scaled = current_interval.period().as_secs().saturating_mul(bootstrap_cache.config().cache_save_scaling_factor); + let new_duration = Duration::from_secs(std::cmp::min(scaled, max_cache_save_duration.as_secs())); info!("Scaling up the bootstrap cache save interval to {new_duration:?}"); // `Interval` ticks immediately for Tokio, but not for `wasmtimer`, which is used for wasm32. diff --git a/ant-networking/src/event/swarm.rs b/ant-networking/src/event/swarm.rs index 6d0c283a0c..84127c43d3 100644 --- a/ant-networking/src/event/swarm.rs +++ b/ant-networking/src/event/swarm.rs @@ -515,18 +515,18 @@ impl SwarmDriver { } }; - // Just track failures during outgoing connection with `failed_peer_id` inside the bootstrap cache. - // OutgoingConnectionError without peer_id can happen when dialing multiple addresses of a peer. - // And similarly IncomingConnectionError can happen when a peer has multiple transports/listen addrs. - if let (Some((_, failed_addr, _)), Some(bootstrap_cache)) = - (connection_details, self.bootstrap_cache.as_mut()) - { - bootstrap_cache.update_addr_status(&failed_addr, false); - } - if should_clean_peer { warn!("Tracking issue of {failed_peer_id:?}. Clearing it out for now"); + // Just track failures during outgoing connection with `failed_peer_id` inside the bootstrap cache. + // OutgoingConnectionError without peer_id can happen when dialing multiple addresses of a peer. + // And similarly IncomingConnectionError can happen when a peer has multiple transports/listen addrs. + if let (Some((_, failed_addr, _)), Some(bootstrap_cache)) = + (connection_details, self.bootstrap_cache.as_mut()) + { + bootstrap_cache.update_addr_status(&failed_addr, false); + } + if let Some(dead_peer) = self .swarm .behaviour_mut() diff --git a/ant-node/src/bin/antnode/main.rs b/ant-node/src/bin/antnode/main.rs index a6d25b9cf5..6246206211 100644 --- a/ant-node/src/bin/antnode/main.rs +++ b/ant-node/src/bin/antnode/main.rs @@ -295,6 +295,7 @@ fn main() -> Result<()> { // another process with these args. #[cfg(feature = "metrics")] rt.spawn(init_metrics(std::process::id())); + let initial_peres = rt.block_on(opt.peers.get_addrs(None))?; debug!("Node's owner set to: {:?}", opt.owner); let restart_options = rt.block_on(async move { let mut node_builder = NodeBuilder::new( @@ -307,6 +308,7 @@ fn main() -> Result<()> { #[cfg(feature = "upnp")] opt.upnp, ); + node_builder.initial_peers(initial_peres); node_builder.bootstrap_cache(bootstrap_cache); node_builder.is_behind_home_network(opt.home_network); #[cfg(feature = "open-metrics")] diff --git a/ant-node/src/error.rs b/ant-node/src/error.rs index 4a80796eb2..86aba2df5c 100644 --- a/ant-node/src/error.rs +++ b/ant-node/src/error.rs @@ -81,8 +81,6 @@ pub enum Error { // ---------- Initialize Errors #[error("Failed to generate a reward key")] FailedToGenerateRewardKey, - #[error("Cannot set both initial_peers and bootstrap_cache")] - InitialPeersAndBootstrapCacheSet, // ---------- Miscellaneous Errors #[error("Failed to obtain node's current port")] diff --git a/ant-node/src/node.rs b/ant-node/src/node.rs index 9f5ac21bba..018ef4596a 100644 --- a/ant-node/src/node.rs +++ b/ant-node/src/node.rs @@ -11,7 +11,7 @@ use super::{ }; #[cfg(feature = "open-metrics")] use crate::metrics::NodeMetricsRecorder; -use crate::{error::Error, RunningNode}; +use crate::RunningNode; use ant_bootstrap::BootstrapCacheStore; use ant_evm::{AttoTokens, RewardsAddress}; #[cfg(feature = "open-metrics")] @@ -134,12 +134,12 @@ impl NodeBuilder { self.metrics_server_port = port; } - /// Set the initialized bootstrap cache. This is mutually exclusive with `initial_peers` + /// Set the initialized bootstrap cache. pub fn bootstrap_cache(&mut self, cache: BootstrapCacheStore) { self.bootstrap_cache = Some(cache); } - /// Set the initial peers to dial at startup. This is mutually exclusive with `bootstrap_cache` + /// Set the initial peers to dial at startup. pub fn initial_peers(&mut self, peers: Vec) { self.initial_peers = peers; } @@ -177,18 +177,6 @@ impl NodeBuilder { None }; - if !self.initial_peers.is_empty() && self.bootstrap_cache.is_some() { - return Err(Error::InitialPeersAndBootstrapCacheSet); - } - - let initial_peers = if !self.initial_peers.is_empty() { - self.initial_peers.clone() - } else if let Some(cache) = &self.bootstrap_cache { - cache.get_sorted_addrs().cloned().collect() - } else { - vec![] - }; - network_builder.listen_addr(self.addr); #[cfg(feature = "open-metrics")] network_builder.metrics_server_port(self.metrics_server_port); @@ -207,7 +195,7 @@ impl NodeBuilder { let node = NodeInner { network: network.clone(), events_channel: node_events_channel.clone(), - initial_peers, + initial_peers: self.initial_peers, reward_address: self.evm_address, #[cfg(feature = "open-metrics")] metrics_recorder,