From 25b8286603ea8fd7ad59aef9cf7af472fabe5433 Mon Sep 17 00:00:00 2001 From: Qiwei Yang Date: Tue, 6 Aug 2024 09:08:16 +0800 Subject: [PATCH 01/11] wip --- .github/workflows/ci.yml | 7 + Makefile | 5 +- Utils/Package.swift | 5 + Utils/Sources/erasure-coding/Cargo.lock | 665 +++++++++++++++++++++ Utils/Sources/erasure-coding/Cargo.toml | 13 + Utils/Sources/erasure-coding/bindings.h | 107 ++++ Utils/Sources/erasure-coding/build.rs | 14 + Utils/Sources/erasure-coding/cbindgen.toml | 19 + Utils/Sources/erasure-coding/src/ffi.rs | 158 +++++ Utils/Sources/erasure-coding/src/lib.rs | 2 + Utils/Sources/module.modulemap | 5 + scripts/erasure-coding.sh | 17 + 12 files changed, 1016 insertions(+), 1 deletion(-) create mode 100644 Utils/Sources/erasure-coding/Cargo.lock create mode 100644 Utils/Sources/erasure-coding/Cargo.toml create mode 100644 Utils/Sources/erasure-coding/bindings.h create mode 100644 Utils/Sources/erasure-coding/build.rs create mode 100644 Utils/Sources/erasure-coding/cbindgen.toml create mode 100644 Utils/Sources/erasure-coding/src/ffi.rs create mode 100644 Utils/Sources/erasure-coding/src/lib.rs create mode 100755 scripts/erasure-coding.sh diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a90c83a3..400221c2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -77,6 +77,13 @@ jobs: key: ${{ runner.os }}-libs-librocksdb-${{ steps.rocksdb-commit-hash.outputs.commit-hash }} restore-keys: | ${{ runner.os }}-libs-librocksdb + - name: Cache erasure-coding static lib + uses: actions/cache@v4 + with: + path: .lib/libec.a + key: ${{ runner.os }}-libs-libec-${{ hashFiles('Utils/Sources/erasure-coding/**') }} + restore-keys: | + ${{ runner.os }}-libs-libec - name: Setup Swift uses: SwiftyLab/setup-swift@latest with: diff --git a/Makefile b/Makefile index 0a951eef..9eb0ea72 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ default: build githooks: .git/hooks/pre-commit .PHONY: deps -deps: .lib/libblst.a .lib/libbandersnatch_vrfs.a .lib/librocksdb.a +deps: .lib/libblst.a .lib/libbandersnatch_vrfs.a .lib/librocksdb.a .lib/libec.a .lib/libblst.a: ./scripts/blst.sh @@ -16,6 +16,9 @@ deps: .lib/libblst.a .lib/libbandersnatch_vrfs.a .lib/librocksdb.a .lib/libbandersnatch_vrfs.a: $(wildcard Utils/Sources/bandersnatch/src/*) ./scripts/bandersnatch.sh +.lib/libec.a: $(wildcard Utils/Sources/erasure-coding/src/*) + ./scripts/erasure-coding.sh + .lib/librocksdb.a: ./scripts/rocksdb.sh diff --git a/Utils/Package.swift b/Utils/Package.swift index 6447b3e7..cee7018d 100644 --- a/Utils/Package.swift +++ b/Utils/Package.swift @@ -34,6 +34,7 @@ let package = Package( .product(name: "Atomics", package: "swift-atomics"), "blst", "bandersnatch_vrfs", + "erasure_coding", ], swiftSettings: [ .define("DEBUG_ASSERT", .when(configuration: .debug)), @@ -50,6 +51,10 @@ let package = Package( name: "bandersnatch_vrfs", path: "Sources" ), + .systemLibrary( + name: "erasure_coding", + path: "Sources" + ), .testTarget( name: "UtilsTests", dependencies: [ diff --git a/Utils/Sources/erasure-coding/Cargo.lock b/Utils/Sources/erasure-coding/Cargo.lock new file mode 100644 index 00000000..b86b150b --- /dev/null +++ b/Utils/Sources/erasure-coding/Cargo.lock @@ -0,0 +1,665 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "arrayref" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d151e35f61089500b617991b791fc8bfd237ae50cd5950803758a179b41e67a" + +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" + +[[package]] +name = "blake2b_simd" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23285ad32269793932e830392f2fe2f83e26488fd3ec778883a93c8323735780" +dependencies = [ + "arrayref", + "arrayvec", + "constant_time_eq", +] + +[[package]] +name = "bounded-collections" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca548b6163b872067dc5eb82fd130c56881435e30367d2073594a3d9744120dd" +dependencies = [ + "log", + "parity-scale-codec", + "scale-info", +] + +[[package]] +name = "byte-slice-cast" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3ac9f8b63eca6fd385229b3675f6cc0dc5c8a5c8a54a59d4f52ffd670d87b0c" + +[[package]] +name = "bytemuck" +version = "1.16.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "102087e286b4677862ea56cf8fc58bb2cdfa8725c40ffb80fe3a008eb7f2fc83" + +[[package]] +name = "cbindgen" +version = "0.24.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b922faaf31122819ec80c4047cc684c6979a087366c069611e33649bf98e18d" +dependencies = [ + "clap", + "heck", + "indexmap 1.9.3", + "log", + "proc-macro2", + "quote", + "serde", + "serde_json", + "syn 1.0.109", + "tempfile", + "toml", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clap" +version = "3.2.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ea181bf566f71cb9a5d17a59e1871af638180a18fb0035c92ae62b705207123" +dependencies = [ + "atty", + "bitflags 1.3.2", + "clap_lex", + "indexmap 1.9.3", + "strsim", + "termcolor", + "textwrap", +] + +[[package]] +name = "clap_lex" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" +dependencies = [ + "os_str_bytes", +] + +[[package]] +name = "constant_time_eq" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" + +[[package]] +name = "derive_more" +version = "0.99.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f33878137e4dafd7fa914ad4e259e18a4e8e532b9617a2d0150262bf53abfce" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.72", +] + +[[package]] +name = "ec" +version = "0.1.0" +dependencies = [ + "cbindgen", + "erasure-coding", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "erasure-coding" +version = "0.1.0" +source = "git+https://github.com/paritytech/erasure-coding?branch=main#512e77472beb877fe0881a857623d54d97b82bc4" +dependencies = [ + "blake2b_simd", + "bounded-collections", + "parity-scale-codec", + "reed-solomon-simd", + "thiserror", +] + +[[package]] +name = "errno" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "534c5cf6194dfab3db3242765c03bbe257cf92f22b38f6bc0c58d59108a820ba" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "fastrand" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" + +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + +[[package]] +name = "hashbrown" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "impl-trait-for-tuples" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11d7a9f6330b71fea57921c9b61c47ee6e84f72d394754eff6163ae67e7395eb" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "indexmap" +version = "1.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd070e393353796e801d209ad339e89596eb4c8d430d18ede6a1cced8fafbd99" +dependencies = [ + "autocfg", + "hashbrown 0.12.3", +] + +[[package]] +name = "indexmap" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3fc2e30ba82dd1b3911c8de1ffc143c74a914a14e99514d7637e3099df5ea0" +dependencies = [ + "equivalent", + "hashbrown 0.14.5", +] + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "linux-raw-sys" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" + +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + +[[package]] +name = "memchr" +version = "2.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "os_str_bytes" +version = "6.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2355d85b9a3786f481747ced0e0ff2ba35213a1f9bd406ed906554d7af805a1" + +[[package]] +name = "parity-scale-codec" +version = "3.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "306800abfa29c7f16596b5970a588435e3d5b3149683d00c12b699cc19f895ee" +dependencies = [ + "arrayvec", + "byte-slice-cast", + "impl-trait-for-tuples", + "parity-scale-codec-derive", +] + +[[package]] +name = "parity-scale-codec-derive" +version = "3.6.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d830939c76d294956402033aee57a6da7b438f2294eb94864c37b0569053a42c" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "proc-macro-crate" +version = "3.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d37c51ca738a55da99dc0c4a34860fd675453b8b36209178c2249bb13651284" +dependencies = [ + "toml_edit", +] + +[[package]] +name = "proc-macro2" +version = "1.0.86" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "readme-rustdocifier" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "08ad765b21a08b1a8e5cdce052719188a23772bcbefb3c439f0baaf62c56ceac" + +[[package]] +name = "reed-solomon-simd" +version = "2.2.1" +source = "git+https://github.com/ordian/reed-solomon-simd?branch=simd-feature#2e7136fc19f599d7f11f86d51c356482d71ef980" +dependencies = [ + "bytemuck", + "fixedbitset", + "once_cell", + "readme-rustdocifier", +] + +[[package]] +name = "rustix" +version = "0.38.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +dependencies = [ + "bitflags 2.6.0", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "scale-info" +version = "2.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eca070c12893629e2cc820a9761bedf6ce1dcddc9852984d1dc734b8bd9bd024" +dependencies = [ + "cfg-if", + "derive_more", + "parity-scale-codec", + "scale-info-derive", +] + +[[package]] +name = "scale-info-derive" +version = "2.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d35494501194174bda522a32605929eefc9ecf7e0a326c26db1fdd85881eb62" +dependencies = [ + "proc-macro-crate", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "serde" +version = "1.0.204" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.204" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.72", +] + +[[package]] +name = "serde_json" +version = "1.0.122" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "784b6203951c57ff748476b126ccb5e8e2959a5c19e5c617ab1956be3dbc68da" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", +] + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc4b9b9bf2add8093d3f2c0204471e951b2285580335de42f9d2534f3ae7a8af" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8fcd239983515c23a32fb82099f97d0b11b8c72f654ed659363a95c3dad7a53" +dependencies = [ + "cfg-if", + "fastrand", + "once_cell", + "rustix", + "windows-sys 0.52.0", +] + +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "textwrap" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9" + +[[package]] +name = "thiserror" +version = "1.0.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.63" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.72", +] + +[[package]] +name = "toml" +version = "0.5.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" +dependencies = [ + "serde", +] + +[[package]] +name = "toml_datetime" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" + +[[package]] +name = "toml_edit" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a8534fd7f78b5405e860340ad6575217ce99f38d4d5c8f2442cb5ecb50090e1" +dependencies = [ + "indexmap 2.3.0", + "toml_datetime", + "winnow", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" +dependencies = [ + "windows-sys 0.59.0", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "winnow" +version = "0.5.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876" +dependencies = [ + "memchr", +] diff --git a/Utils/Sources/erasure-coding/Cargo.toml b/Utils/Sources/erasure-coding/Cargo.toml new file mode 100644 index 00000000..54257b04 --- /dev/null +++ b/Utils/Sources/erasure-coding/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "ec" +version = "0.1.0" +edition = "2021" + +[lib] +crate-type = ["staticlib"] + +[build-dependencies] +cbindgen = "0.24.0" + +[dependencies] +erasure-coding = { git = "https://github.com/paritytech/erasure-coding", branch = "main" } diff --git a/Utils/Sources/erasure-coding/bindings.h b/Utils/Sources/erasure-coding/bindings.h new file mode 100644 index 00000000..b7824226 --- /dev/null +++ b/Utils/Sources/erasure-coding/bindings.h @@ -0,0 +1,107 @@ +/* Warning, this file is auto generated by cbindgen. Don't modify this manually. */ + +#include +#include +#include +#include + +#define POINT_SIZE 2 +#define SUBSHARD_POINTS 6 + + + +#define MAX_CHUNKS 16384 + +/** + * Fix segment size. + */ +#define SEGMENT_SIZE 4096 + +/** + * Fix number of shards and subshards. + */ +#define N_SHARDS 342 + +/** + * The number of time the erasure coded shards we want. + */ +#define N_REDUNDANCY 2 + +/** + * The total number of shards, both original and ec one. + */ +#define TOTAL_SHARDS ((1 + N_REDUNDANCY) * N_SHARDS) + +/** + * Size of a subshard in bytes. + */ +#define SUBSHARD_SIZE (POINT_SIZE * SUBSHARD_POINTS) + +/** + * The index of an erasure chunk. + */ +typedef struct ChunkIndex ChunkIndex; + +/** + * Result of the reconstruct. + */ +typedef struct ReconstructResult ReconstructResult; + +/** + * Fix size segment of a larger data. + * Data is padded when unaligned with + * the segment size. + */ +typedef struct Segment Segment; + +/** + * Subshard uses some temp memory, so these should be used multiple time instead of allocating. + */ +typedef struct SubShardDecoder SubShardDecoder; + +/** + * Subshard uses some temp memory, so these should be used multiple time instead of allocating. + */ +typedef struct SubShardEncoder SubShardEncoder; + +/** + * Subshard (points in sequential orders). + */ +typedef uint8_t SubShard[SUBSHARD_SIZE]; + +/** + * Initializes a new SubShardEncoder. + */ +struct SubShardEncoder *subshard_encoder_new(void); + +/** + * Frees the SubShardEncoder. + */ +void subshard_encoder_free(struct SubShardEncoder *encoder); + +/** + * Constructs erasure-coded chunks from segments. + * + * out_chunks is N chunks: `Vec<[[u8; 12]; TOTAL_SHARDS]>` + * out_len is N * TOTAL_SHARDS + */ +void subshard_encoder_construct(struct SubShardEncoder *encoder, + const struct Segment *segments, + uintptr_t num_segments, + bool *success, + uint8_t (***out_chunks)[12], + uintptr_t *out_len); + +/** + * Initializes a new SubShardDecoder. + */ +struct SubShardDecoder *subshard_decoder_new(void); + +/** + * Frees the SubShardDecoder. + */ +void subshard_decoder_free(struct SubShardDecoder *decoder); + +uintptr_t reconstruct_result_get_num_decodes(const struct ReconstructResult *result); + +void reconstruct_result_free(struct ReconstructResult *result); diff --git a/Utils/Sources/erasure-coding/build.rs b/Utils/Sources/erasure-coding/build.rs new file mode 100644 index 00000000..7ec4f969 --- /dev/null +++ b/Utils/Sources/erasure-coding/build.rs @@ -0,0 +1,14 @@ +extern crate cbindgen; + +fn main() { + let crate_dir = "./"; + + let config = cbindgen::Config::from_file("cbindgen.toml").unwrap(); + + cbindgen::Builder::new() + .with_crate(crate_dir) + .with_config(config) + .generate() + .expect("Unable to generate bindings") + .write_to_file("bindings.h"); +} diff --git a/Utils/Sources/erasure-coding/cbindgen.toml b/Utils/Sources/erasure-coding/cbindgen.toml new file mode 100644 index 00000000..65c6ce2f --- /dev/null +++ b/Utils/Sources/erasure-coding/cbindgen.toml @@ -0,0 +1,19 @@ +language = "C" + +after_includes = """ + +#define POINT_SIZE 2 +#define SUBSHARD_POINTS 6 + +""" + +autogen_warning = "/* Warning, this file is auto generated by cbindgen. Don't modify this manually. */" + + +[parse] +parse_deps = true +include = ["erasure-coding"] +extra_bindings = ["erasure-coding"] + +[export] +include = ["Segment", "SubShard", "POINT_SIZE", "SUBSHARD_SIZE", "ChunkIndex"] diff --git a/Utils/Sources/erasure-coding/src/ffi.rs b/Utils/Sources/erasure-coding/src/ffi.rs new file mode 100644 index 00000000..2be00cc5 --- /dev/null +++ b/Utils/Sources/erasure-coding/src/ffi.rs @@ -0,0 +1,158 @@ +use erasure_coding::{ChunkIndex, Segment, SubShardDecoder, SubShardEncoder, TOTAL_SHARDS}; +use std::slice; + +/// Initializes a new SubShardEncoder. +#[no_mangle] +pub extern "C" fn subshard_encoder_new() -> *mut SubShardEncoder { + Box::into_raw(Box::new(SubShardEncoder::new().unwrap())) +} + +/// Frees the SubShardEncoder. +#[no_mangle] +pub extern "C" fn subshard_encoder_free(encoder: *mut SubShardEncoder) { + if !encoder.is_null() { + unsafe { drop(Box::from_raw(encoder)) }; + } +} + +/// Constructs erasure-coded chunks from segments. +/// +/// out_chunks is N chunks: `Vec<[[u8; 12]; TOTAL_SHARDS]>` +/// out_len is N * TOTAL_SHARDS +#[no_mangle] +pub extern "C" fn subshard_encoder_construct( + encoder: *mut SubShardEncoder, + segments: *const Segment, + num_segments: usize, + success: *mut bool, + out_chunks: *mut *mut *mut [u8; 12], + out_len: *mut usize, +) { + if encoder.is_null() || segments.is_null() || out_chunks.is_null() || out_len.is_null() { + unsafe { *success = false }; + return; + } + + let encoder = unsafe { &mut *encoder }; + let segments = unsafe { slice::from_raw_parts(segments, num_segments) }; + + match encoder.construct_chunks(segments) { + Ok(result) => { + let total_chunks = result.len() * TOTAL_SHARDS; + let mut chunk_ptrs: Vec<*mut [u8; 12]> = Vec::with_capacity(total_chunks); + + for boxed_array in result { + for chunk in boxed_array.iter() { + chunk_ptrs.push(Box::into_raw(Box::new(*chunk))); + } + } + + unsafe { + *out_chunks = chunk_ptrs.as_mut_ptr(); + *out_len = total_chunks; + } + + std::mem::forget(chunk_ptrs); + unsafe { *success = true }; + } + Err(_) => { + unsafe { *success = false }; + } + } +} + +/// Initializes a new SubShardDecoder. +#[no_mangle] +pub extern "C" fn subshard_decoder_new() -> *mut SubShardDecoder { + Box::into_raw(Box::new(SubShardDecoder::new().unwrap())) +} + +/// Frees the SubShardDecoder. +#[no_mangle] +pub extern "C" fn subshard_decoder_free(decoder: *mut SubShardDecoder) { + if !decoder.is_null() { + unsafe { + drop(Box::from_raw(decoder)); + }; + } +} + +/// Result of the reconstruct. +#[repr(C)] +pub struct ReconstructResult { + pub segments: *mut (u8, Segment), + pub num_segments: usize, + pub num_decodes: usize, +} + +#[no_mangle] +pub extern "C" fn reconstruct_result_get_segments( + result: *const ReconstructResult, +) -> *const (u8, Segment) { + if result.is_null() { + return std::ptr::null(); + } + unsafe { (*result).segments } +} + +#[no_mangle] +pub extern "C" fn reconstruct_result_get_num_decodes(result: *const ReconstructResult) -> usize { + if result.is_null() { + return 0; + } + unsafe { (*result).num_decodes } +} + +#[no_mangle] +pub extern "C" fn reconstruct_result_free(result: *mut ReconstructResult) { + if !result.is_null() { + unsafe { + let boxed_result = Box::from_raw(result); + drop(Box::from_raw(boxed_result.segments)); + } + } +} + +/// Reconstructs data from a list of subshards. +#[no_mangle] +pub extern "C" fn subshard_decoder_reconstruct( + decoder: *mut SubShardDecoder, + subshards: *const (u8, ChunkIndex, [u8; 12]), + num_subshards: usize, + success: *mut bool, +) -> *mut ReconstructResult { + if decoder.is_null() || subshards.is_null() { + unsafe { *success = false }; + return std::ptr::null_mut(); + } + + let decoder = unsafe { &mut *decoder }; + let subshards_slice = unsafe { slice::from_raw_parts(subshards, num_subshards) }; + + let cloned_subshards: Vec<(u8, ChunkIndex, &[u8; 12])> = subshards_slice + .iter() + .map(|&(a, b, ref c)| (a, b, c)) + .collect(); + + match decoder.reconstruct(&mut cloned_subshards.iter().cloned()) { + Ok((segments, num_decodes)) => { + let mut segments_vec: Vec<(u8, Segment)> = segments.into_iter().collect(); + let segments_len = segments_vec.len(); + let segments_ptr = segments_vec.as_mut_ptr(); + + std::mem::forget(segments_vec); // prevent the Vec from being deallocated + + let result = ReconstructResult { + segments: segments_ptr, + num_segments: segments_len, + num_decodes, + }; + unsafe { *success = true }; + Box::into_raw(Box::new(result)) + } + Err(_) => { + unsafe { *success = false }; + std::ptr::null_mut() + } + } +} diff --git a/Utils/Sources/erasure-coding/src/lib.rs b/Utils/Sources/erasure-coding/src/lib.rs new file mode 100644 index 00000000..835193ef --- /dev/null +++ b/Utils/Sources/erasure-coding/src/lib.rs @@ -0,0 +1,2 @@ +pub mod ffi; +pub use ffi::*; diff --git a/Utils/Sources/module.modulemap b/Utils/Sources/module.modulemap index 5d9cdaa4..272ab166 100644 --- a/Utils/Sources/module.modulemap +++ b/Utils/Sources/module.modulemap @@ -7,3 +7,8 @@ module bandersnatch_vrfs { header "bandersnatch/bindings.h" link "bandersnatch_vrfs" } + +module erasure_coding { + header "erasure-coding/bindings.h" + link "ec" +} diff --git a/scripts/erasure-coding.sh b/scripts/erasure-coding.sh new file mode 100755 index 00000000..7f81e958 --- /dev/null +++ b/scripts/erasure-coding.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +set -e + + +# Setup erasure-coding c binding +CWD=$(pwd) + +mkdir -p .lib + +cd Utils/Sources/erasure-coding || { echo "directory not found"; exit 1; } + +cargo build --release --lib + +cp target/release/libec.a ${CWD}/.lib + +echo "Setup erasure-coding successfully." From c7b7e3ee29e10b460abbf5a827e68838ab297a62 Mon Sep 17 00:00:00 2001 From: Qiwei Yang Date: Thu, 8 Aug 2024 19:44:44 +0800 Subject: [PATCH 02/11] update --- Utils/Package.swift | 3 +- Utils/Sources/erasure-coding/bindings.h | 54 +- Utils/Sources/erasure-coding/cbindgen.toml | 5 +- Utils/Sources/erasure-coding/src/ffi.rs | 138 ++- .../Resources/erasure-coding-test-data.json | 1034 +++++++++++++++++ 5 files changed, 1178 insertions(+), 56 deletions(-) create mode 100644 Utils/Tests/UtilsTests/Resources/erasure-coding-test-data.json diff --git a/Utils/Package.swift b/Utils/Package.swift index cee7018d..d12a015a 100644 --- a/Utils/Package.swift +++ b/Utils/Package.swift @@ -60,7 +60,8 @@ let package = Package( dependencies: [ "Utils", .product(name: "Testing", package: "swift-testing"), - ] + ], + resources: [.copy("Resources")] ), ], swiftLanguageVersions: [.version("6")] diff --git a/Utils/Sources/erasure-coding/bindings.h b/Utils/Sources/erasure-coding/bindings.h index b7824226..27207372 100644 --- a/Utils/Sources/erasure-coding/bindings.h +++ b/Utils/Sources/erasure-coding/bindings.h @@ -8,6 +8,8 @@ #define POINT_SIZE 2 #define SUBSHARD_POINTS 6 +typedef uint16_t ChunkIndex; + #define MAX_CHUNKS 16384 @@ -38,31 +40,39 @@ #define SUBSHARD_SIZE (POINT_SIZE * SUBSHARD_POINTS) /** - * The index of an erasure chunk. + * Subshard uses some temp memory, so these should be used multiple time instead of allocating. */ -typedef struct ChunkIndex ChunkIndex; +typedef struct SubShardDecoder SubShardDecoder; /** - * Result of the reconstruct. + * Subshard uses some temp memory, so these should be used multiple time instead of allocating. */ -typedef struct ReconstructResult ReconstructResult; +typedef struct SubShardEncoder SubShardEncoder; -/** - * Fix size segment of a larger data. - * Data is padded when unaligned with - * the segment size. - */ -typedef struct Segment Segment; +typedef struct CSegment { + uint8_t *data; + uint32_t index; +} CSegment; -/** - * Subshard uses some temp memory, so these should be used multiple time instead of allocating. - */ -typedef struct SubShardDecoder SubShardDecoder; +typedef struct SegmentTuple { + uint8_t index; + struct CSegment segment; +} SegmentTuple; /** - * Subshard uses some temp memory, so these should be used multiple time instead of allocating. + * Result of the reconstruct. */ -typedef struct SubShardEncoder SubShardEncoder; +typedef struct ReconstructResult { + struct SegmentTuple *segments; + uintptr_t num_segments; + uintptr_t num_decodes; +} ReconstructResult; + +typedef struct SubShardTuple { + uint8_t seg_index; + ChunkIndex chunk_index; + uint8_t shard[12]; +} SubShardTuple; /** * Subshard (points in sequential orders). @@ -86,7 +96,7 @@ void subshard_encoder_free(struct SubShardEncoder *encoder); * out_len is N * TOTAL_SHARDS */ void subshard_encoder_construct(struct SubShardEncoder *encoder, - const struct Segment *segments, + const struct CSegment *segments, uintptr_t num_segments, bool *success, uint8_t (***out_chunks)[12], @@ -102,6 +112,10 @@ struct SubShardDecoder *subshard_decoder_new(void); */ void subshard_decoder_free(struct SubShardDecoder *decoder); -uintptr_t reconstruct_result_get_num_decodes(const struct ReconstructResult *result); - -void reconstruct_result_free(struct ReconstructResult *result); +/** + * Reconstructs data from a list of subshards. + */ +struct ReconstructResult *subshard_decoder_reconstruct(struct SubShardDecoder *decoder, + const struct SubShardTuple *subshards, + uintptr_t num_subshards, + bool *success); diff --git a/Utils/Sources/erasure-coding/cbindgen.toml b/Utils/Sources/erasure-coding/cbindgen.toml index 65c6ce2f..6f3e3578 100644 --- a/Utils/Sources/erasure-coding/cbindgen.toml +++ b/Utils/Sources/erasure-coding/cbindgen.toml @@ -5,6 +5,8 @@ after_includes = """ #define POINT_SIZE 2 #define SUBSHARD_POINTS 6 +typedef uint16_t ChunkIndex; + """ autogen_warning = "/* Warning, this file is auto generated by cbindgen. Don't modify this manually. */" @@ -16,4 +18,5 @@ include = ["erasure-coding"] extra_bindings = ["erasure-coding"] [export] -include = ["Segment", "SubShard", "POINT_SIZE", "SUBSHARD_SIZE", "ChunkIndex"] +include = ["SubShard", "POINT_SIZE", "SUBSHARD_SIZE"] +exclude = ["ChunkIndex"] diff --git a/Utils/Sources/erasure-coding/src/ffi.rs b/Utils/Sources/erasure-coding/src/ffi.rs index 2be00cc5..5faf915d 100644 --- a/Utils/Sources/erasure-coding/src/ffi.rs +++ b/Utils/Sources/erasure-coding/src/ffi.rs @@ -1,6 +1,42 @@ -use erasure_coding::{ChunkIndex, Segment, SubShardDecoder, SubShardEncoder, TOTAL_SHARDS}; +use erasure_coding::{ + ChunkIndex, Segment, SubShardDecoder, SubShardEncoder, SEGMENT_SIZE, TOTAL_SHARDS, +}; use std::slice; +#[repr(C)] +#[derive(Clone, Copy, Debug)] +pub struct CSegment { + data: *mut u8, // Pointer to the data, length is SEGMENT_SIZE + index: u32, +} + +impl From for CSegment { + fn from(segment: Segment) -> Self { + let mut vec_data = Vec::from(*segment.data); + + let c_segment = CSegment { + data: vec_data.as_mut_ptr(), + index: segment.index, + }; + + // prevent Rust from freeing the Vec while CSegment is in use + std::mem::forget(vec_data); + + c_segment + } +} + +impl From for Segment { + fn from(c_segment: CSegment) -> Self { + let vec_data = unsafe { Vec::from_raw_parts(c_segment.data, SEGMENT_SIZE, SEGMENT_SIZE) }; + + Segment { + data: Box::new(vec_data.try_into().unwrap()), + index: c_segment.index, + } + } +} + /// Initializes a new SubShardEncoder. #[no_mangle] pub extern "C" fn subshard_encoder_new() -> *mut SubShardEncoder { @@ -22,7 +58,7 @@ pub extern "C" fn subshard_encoder_free(encoder: *mut SubShardEncoder) { #[no_mangle] pub extern "C" fn subshard_encoder_construct( encoder: *mut SubShardEncoder, - segments: *const Segment, + segments: *const CSegment, num_segments: usize, success: *mut bool, out_chunks: *mut *mut *mut [u8; 12], @@ -34,9 +70,15 @@ pub extern "C" fn subshard_encoder_construct( } let encoder = unsafe { &mut *encoder }; - let segments = unsafe { slice::from_raw_parts(segments, num_segments) }; - - match encoder.construct_chunks(segments) { + let segments_vec: Vec = unsafe { + slice::from_raw_parts(segments, num_segments) + .iter() + .map(|segment| Segment::from(*segment)) + .collect() + }; + let r_segments: &[Segment] = &segments_vec; + + match encoder.construct_chunks(r_segments) { Ok(result) => { let total_chunks = result.len() * TOTAL_SHARDS; let mut chunk_ptrs: Vec<*mut [u8; 12]> = Vec::with_capacity(total_chunks); @@ -77,47 +119,69 @@ pub extern "C" fn subshard_decoder_free(decoder: *mut SubShardDecoder) { } } +#[repr(C)] +pub struct SegmentTuple { + pub index: u8, + pub segment: CSegment, +} + /// Result of the reconstruct. #[repr(C)] pub struct ReconstructResult { - pub segments: *mut (u8, Segment), + pub segments: *mut SegmentTuple, pub num_segments: usize, pub num_decodes: usize, } -#[no_mangle] -pub extern "C" fn reconstruct_result_get_segments( - result: *const ReconstructResult, -) -> *const (u8, Segment) { - if result.is_null() { - return std::ptr::null(); - } - unsafe { (*result).segments } -} +// #[no_mangle] +// pub extern "C" fn reconstruct_result_get_segments( +// result: *const ReconstructResult, +// ) -> *const SegmentTuple { +// if result.is_null() { +// return std::ptr::null(); +// } +// unsafe { (*result).segments } +// } + +// #[no_mangle] +// pub extern "C" fn reconstruct_result_get_num_segments(result: *const ReconstructResult) -> usize { +// if result.is_null() { +// return 0; +// } +// unsafe { (*result).num_segments } +// } + +// #[no_mangle] +// pub extern "C" fn reconstruct_result_get_num_decodes(result: *const ReconstructResult) -> usize { +// if result.is_null() { +// return 0; +// } +// unsafe { (*result).num_decodes } +// } + +// #[no_mangle] +// pub extern "C" fn reconstruct_result_free(result: *mut ReconstructResult) { +// if !result.is_null() { +// unsafe { +// let boxed_result = Box::from_raw(result); +// drop(Box::from_raw(boxed_result.segments)); +// drop(boxed_result); +// } +// } +// } -#[no_mangle] -pub extern "C" fn reconstruct_result_get_num_decodes(result: *const ReconstructResult) -> usize { - if result.is_null() { - return 0; - } - unsafe { (*result).num_decodes } -} - -#[no_mangle] -pub extern "C" fn reconstruct_result_free(result: *mut ReconstructResult) { - if !result.is_null() { - unsafe { - let boxed_result = Box::from_raw(result); - drop(Box::from_raw(boxed_result.segments)); - } - } +#[repr(C)] +pub struct SubShardTuple { + pub seg_index: u8, + pub chunk_index: ChunkIndex, + pub shard: [u8; 12], } /// Reconstructs data from a list of subshards. #[no_mangle] pub extern "C" fn subshard_decoder_reconstruct( decoder: *mut SubShardDecoder, - subshards: *const (u8, ChunkIndex, [u8; 12]), + subshards: *const SubShardTuple, num_subshards: usize, success: *mut bool, ) -> *mut ReconstructResult { @@ -131,12 +195,18 @@ pub extern "C" fn subshard_decoder_reconstruct( let cloned_subshards: Vec<(u8, ChunkIndex, &[u8; 12])> = subshards_slice .iter() - .map(|&(a, b, ref c)| (a, b, c)) + .map(|t| (t.seg_index, t.chunk_index, &t.shard)) .collect(); match decoder.reconstruct(&mut cloned_subshards.iter().cloned()) { Ok((segments, num_decodes)) => { - let mut segments_vec: Vec<(u8, Segment)> = segments.into_iter().collect(); + let mut segments_vec: Vec = segments + .into_iter() + .map(|(index, segment)| SegmentTuple { + index, + segment: segment.into(), + }) + .collect(); let segments_len = segments_vec.len(); let segments_ptr = segments_vec.as_mut_ptr(); diff --git a/Utils/Tests/UtilsTests/Resources/erasure-coding-test-data.json b/Utils/Tests/UtilsTests/Resources/erasure-coding-test-data.json new file mode 100644 index 00000000..0326e951 --- /dev/null +++ b/Utils/Tests/UtilsTests/Resources/erasure-coding-test-data.json @@ -0,0 +1,1034 @@ +{ + "data": "9c077029c120e6de118edc7bfdd79ed4834115dbfa9ed309aa2339053bb3a07a313e536e8538c962b571fa57c15572ae44fb0f766b16b8e6a7c44ab69caa955c9ab37c217d963dd9d8e4b708348c899a8766a899434dfeacf18fe8d947477e0bc954537d9ad80cd9241205930531432c5c623a9c77ae0865c62b057ab1553331811fe0561f49fc69a6453b799914a0974e114f4e40775af050e723c7d20d0ba6f1ad6524b7a11ac3680da41d1c580cde055a2a1d70fabeaebe14699038b2db6ac4a0d362eeccd36bce908b62c8decfa9ef57f7514e25119288f81581bc15da084f1d977c0e7c3db6c976f6b2a11eb370dcaf4c4fc9fbd884eab541b3970144013ecaa56ae56580cdba4c87aac4e9aec4e6b293cac13ea48ba10c3bdc7408834a16b9f4f888aef5820e8f96e41bfd7e872f33e2773d43ebef7d3c45dc4d869fc6437012b01d14093dd21c18694896dbf295199f76a3a2ca2c3aa4fef98b3e35559bfc43924645bf9c5fd532a8d8466ab0f47c1ecfade8d90cdb3632d935ca96e4c97e51afb2b2dd92745c4b17e9e5efb3bbd1b9819faa980e18b4d5d0e2660d95d493cefeb158f8611ebde40b17df55981ea1fb24d9da7ea6cd2ee31583f1190df11547e7fe5159e0d25b4c1442baa331002f376e90548691b90c25f9a805d3391586e599d1d18aa0e12a0a2436bfcf4d936129bf6d083ec11a9d91419071aa9685e1cd99dbb9626d4031da55b5c6ed4f45722a06f9568d7a516fee5524bc4f86869f0f35cf5a2f499480aa62efa02ee377c0d286addce565e3c3e51d149a5818a59f94a14cb762b5e1882049c4ceb9ef38923b5702e871c6bfa9c7fe6d1c1cc2439078b5d816c6914f15d41cd60df4ba9dfa0b8ce68978fe681731c2c6a8caeff417137a4b1c59142060e29f59665c1550f6ee292c395bc08eb144a5d2383893a4cb3abf34f38e8396e8a574", + "segment": { + "segments": [ + { + "segment_ec": [ + "9c07", + "7029", + "c120", + "e6de", + "118e", + "dc7b", + "fdd7", + "9ed4", + "8341", + "15db", + "fa9e", + "d309", + "aa23", + "3905", + "3bb3", + "a07a", + "313e", + "536e", + "8538", + "c962", + "b571", + "fa57", + "c155", + "72ae", + "44fb", + "0f76", + "6b16", + "b8e6", + "a7c4", + "4ab6", + "9caa", + "955c", + "9ab3", + "7c21", + "7d96", + "3dd9", + "d8e4", + "b708", + "348c", + "899a", + "8766", + "a899", + "434d", + "feac", + "f18f", + "e8d9", + "4747", + "7e0b", + "c954", + "537d", + "9ad8", + "0cd9", + "2412", + "0593", + "0531", + "432c", + "5c62", + "3a9c", + "77ae", + "0865", + "c62b", + "057a", + "b155", + "3331", + "811f", + "e056", + "1f49", + "fc69", + "a645", + "3b79", + "9914", + "a097", + "4e11", + "4f4e", + "4077", + "5af0", + "50e7", + "23c7", + "d20d", + "0ba6", + "f1ad", + "6524", + "b7a1", + "1ac3", + "680d", + "a41d", + "1c58", + "0cde", + "055a", + "2a1d", + "70fa", + "beae", + "be14", + "6990", + "38b2", + "db6a", + "c4a0", + "d362", + "eecc", + "d36b", + "ce90", + "8b62", + "c8de", + "cfa9", + "ef57", + "f751", + "4e25", + "1192", + "88f8", + "1581", + "bc15", + "da08", + "4f1d", + "977c", + "0e7c", + "3db6", + "c976", + "f6b2", + "a11e", + "b370", + "dcaf", + "4c4f", + "c9fb", + "d884", + "eab5", + "41b3", + "9701", + "4401", + "3eca", + "a56a", + "e565", + "80cd", + "ba4c", + "87aa", + "c4e9", + "aec4", + "e6b2", + "93ca", + "c13e", + "a48b", + "a10c", + "3bdc", + "7408", + "834a", + "16b9", + "f4f8", + "88ae", + "f582", + "0e8f", + "96e4", + "1bfd", + "7e87", + "2f33", + "e277", + "3d43", + "ebef", + "7d3c", + "45dc", + "4d86", + "9fc6", + "4370", + "12b0", + "1d14", + "093d", + "d21c", + "1869", + "4896", + "dbf2", + "9519", + "9f76", + "a3a2", + "ca2c", + "3aa4", + "fef9", + "8b3e", + "3555", + "9bfc", + "4392", + "4645", + "bf9c", + "5fd5", + "32a8", + "d846", + "6ab0", + "f47c", + "1ecf", + "ade8", + "d90c", + "db36", + "32d9", + "35ca", + "96e4", + "c97e", + "51af", + "b2b2", + "dd92", + "745c", + "4b17", + "e9e5", + "efb3", + "bbd1", + "b981", + "9faa", + "980e", + "18b4", + "d5d0", + "e266", + "0d95", + "d493", + "cefe", + "b158", + "f861", + "1ebd", + "e40b", + "17df", + "5598", + "1ea1", + "fb24", + "d9da", + "7ea6", + "cd2e", + "e315", + "83f1", + "190d", + "f115", + "47e7", + "fe51", + "59e0", + "d25b", + "4c14", + "42ba", + "a331", + "002f", + "376e", + "9054", + "8691", + "b90c", + "25f9", + "a805", + "d339", + "1586", + "e599", + "d1d1", + "8aa0", + "e12a", + "0a24", + "36bf", + "cf4d", + "9361", + "29bf", + "6d08", + "3ec1", + "1a9d", + "9141", + "9071", + "aa96", + "85e1", + "cd99", + "dbb9", + "626d", + "4031", + "da55", + "b5c6", + "ed4f", + "4572", + "2a06", + "f956", + "8d7a", + "516f", + "ee55", + "24bc", + "4f86", + "869f", + "0f35", + "cf5a", + "2f49", + "9480", + "aa62", + "efa0", + "2ee3", + "77c0", + "d286", + "addc", + "e565", + "e3c3", + "e51d", + "149a", + "5818", + "a59f", + "94a1", + "4cb7", + "62b5", + "e188", + "2049", + "c4ce", + "b9ef", + "3892", + "3b57", + "02e8", + "71c6", + "bfa9", + "c7fe", + "6d1c", + "1cc2", + "4390", + "78b5", + "d816", + "c691", + "4f15", + "d41c", + "d60d", + "f4ba", + "9dfa", + "0b8c", + "e689", + "78fe", + "6817", + "31c2", + "c6a8", + "caef", + "f417", + "137a", + "4b1c", + "5914", + "2060", + "e29f", + "5966", + "5c15", + "50f6", + "ee29", + "2c39", + "5bc0", + "8eb1", + "44a5", + "d238", + "3893", + "a4cb", + "3abf", + "34f3", + "8e83", + "96e8", + "a574", + "7503", + "4f97", + "d16a", + "6727", + "081e", + "dac0", + "a596", + "e343", + "8a51", + "3ced", + "5f5e", + "f369", + "1b86", + "3608", + "eb77", + "460d", + "7830", + "6a6e", + "5ccb", + "1cdc", + "8db3", + "6dec", + "e52c", + "d3ee", + "44a3", + "d341", + "151a", + "9c0d", + "bb11", + "8ea2", + "70f6", + "2986", + "f97e", + "ce77", + "f718", + "7e1a", + "7257", + "af77", + "5938", + "c785", + "63af", + "6901", + "cb38", + "3897", + "3900", + "7561", + "66c8", + "9aa6", + "9121", + "1e58", + "c0cf", + "f447", + "61d9", + "26d0", + "1fb6", + "b3a4", + "aa9f", + "78f1", + "445a", + "14cb", + "b093", + "e605", + "cfd6", + "b2a5", + "2aa1", + "ae23", + "5ba2", + "265f", + "2288", + "3b34", + "92d2", + "6a62", + "32e4", + "dfdd", + "468d", + "d175", + "f4e5", + "8af3", + "e3eb", + "d46f", + "1930", + "2b3c", + "d61e", + "ae3e", + "67da", + "788b", + "c2cf", + "522f", + "62cf", + "b06b", + "3ceb", + "dd61", + "dfe3", + "7b0c", + "64d5", + "bcde", + "5542", + "d2e9", + "3f1b", + "bb17", + "1811", + "5799", + "fa3e", + "314b", + "dc37", + "798a", + "bc44", + "0894", + "1416", + "b9f9", + "65ab", + "b925", + "770f", + "991c", + "f147", + "f325", + "d285", + "0f3a", + "a781", + "9fa1", + "c75b", + "3231", + "2c47", + "7336", + "9d90", + "8d19", + "a848", + "192f", + "58b5", + "1493", + "3131", + "b8be", + "3d6f", + "0ddf", + "5d3b", + "09f8", + "9dd0", + "68fe", + "c354", + "c9c3", + "cd66", + "084c", + "38cb", + "378c", + "38d4", + "c0e4", + "9219", + "fd66", + "c2dc", + "0ae4", + "6bcb", + "29f4", + "b4d6", + "5fe8", + "e13e", + "7197", + "1ab1", + "bdfb", + "598c", + "a918", + "24eb", + "b38e", + "ae22", + "1684", + "3c33", + "a88b", + "58f9", + "e4db", + "117e", + "9ced", + "4b22", + "4cdc", + "02d6", + "130e", + "99be", + "7023", + "e7bc", + "2a72", + "763d", + "d84f", + "08f1", + "ca89", + "141a", + "41f2", + "0c5a", + "58ff", + "3a1f", + "26ee", + "63a2", + "4f01", + "7305", + "9877", + "ca93", + "d982", + "50af", + "0a4b", + "9d77", + "5574", + "96b2", + "bdf4", + "aafd", + "2732", + "dad1", + "90c8", + "ac95", + "8106", + "2d19", + "0125", + "a7db", + "0038", + "f270", + "70ba", + "a8e5", + "fad4", + "f3e5", + "bb3f", + "c4d5", + "1524", + "0d21", + "918c", + "5c95", + "2cf9", + "8825", + "3975", + "9de5", + "8062", + "a72b", + "7e68", + "fd10", + "1cb7", + "2570", + "913f", + "939e", + "5596", + "8448", + "7e50", + "5a7a", + "0d0a", + "48fd", + "4214", + "c819", + "8339", + "c2ca", + "9c64", + "12fa", + "bf75", + "97f4", + "7725", + "1743", + "c829", + "76ea", + "78a4", + "bde9", + "3058", + "1a2a", + "3cdc", + "1cea", + "8f88", + "b6f8", + "6890", + "454e", + "4f1a", + "bca3", + "db77", + "c624", + "0cf5", + "d324", + "5117", + "a4de", + "ce4e", + "6b89", + "1a76", + "bd50", + "5f19", + "73a8", + "799e", + "0f85", + "21ef", + "906c", + "5b27", + "0d58", + "58e9", + "67f1", + "7bae", + "86d5", + "6ac1", + "23e9", + "349a", + "c621", + "bace", + "bc1d", + "0434", + "3b11", + "6e11", + "d084", + "9ce7", + "5f5d", + "2338", + "0cb1", + "6fe3", + "3881", + "97fd", + "17a6", + "1dc5", + "29ef", + "8343", + "061e", + "9970", + "e7a6", + "f53d", + "c9ac", + "5a2a", + "1c15", + "53f1", + "87ee", + "3e6a", + "0cfa", + "b218", + "3da7", + "b6af", + "d501", + "c6b3", + "15cb", + "335a", + "1fb0", + "808a", + "37a9", + "8b79", + "ddfa", + "b0cf", + "9241", + "30ca", + "f632", + "8cd2", + "70a5", + "bd71", + "4c18", + "ffd3", + "759c", + "97d8", + "8baa", + "e377", + "0351", + "048b", + "3429", + "6784", + "969d", + "807c", + "6c9a", + "491a", + "1863", + "806e", + "3953", + "45bb", + "ee31", + "e37e", + "5150", + "0063", + "34ad", + "278a", + "7413", + "7e62", + "5975", + "28df", + "e54a", + "ec0b", + "6222", + "0986", + "3bef", + "33dd", + "3359", + "2a48", + "f7e1", + "2721", + "046b", + "51a0", + "2609", + "ac0a", + "6d80", + "11c1", + "c0eb", + "cae5", + "d5ad", + "7daf", + "6c00", + "4253", + "6908", + "32f8", + "0395", + "e826", + "d0c8", + "ac15", + "d7e9", + "15b7", + "ba72", + "a554", + "3d0b", + "dcd5", + "3de4", + "d58c", + "b0ea", + "4b7b", + "85ad", + "af07", + "fb79", + "4674", + "30aa", + "17fd", + "06b1", + "ecdc", + "1ea3", + "d0a1", + "1656", + "4b64", + "44d5", + "f819", + "8797", + "2299", + "8878", + "422e", + "210d", + "17c9", + "87f4", + "ec96", + "1054", + "011a", + "02da", + "5f62", + "a8b6", + "72fd", + "772d", + "aaf1", + "df81", + "aa49", + "d501", + "d417", + "0b87", + "eedb", + "fd53", + "89be", + "4a53", + "e796", + "3a90", + "450f", + "7d49", + "8243", + "237f", + "1bb8", + "e401", + "6d23", + "54a8", + "8141", + "eb56", + "206a", + "9a81", + "e3e6", + "f93c", + "76dc", + "d5e5", + "110c", + "ccf8", + "9a07", + "90e7", + "9285", + "2277", + "6091", + "c8e5", + "eb35", + "11d5", + "fc80", + "79fc", + "474e", + "8001", + "45b7", + "a40b", + "a375", + "b106", + "1cab", + "96b9", + "560e", + "448f", + "9224", + "7a34", + "3dfc", + "3c4b", + "39ff", + "8f47", + "b294", + "043c", + "8349", + "a58d", + "e46e", + "16b0", + "ec1d", + "2f95", + "8563", + "20da", + "b2d5", + "b6ca", + "f7ae", + "f0a7", + "aa76", + "f232", + "8f52", + "cd50", + "fd99", + "d8cd", + "9356", + "9337", + "548a", + "62aa", + "87ca", + "a9d4", + "d202", + "1110", + "3ebc", + "1162", + "d74a", + "ffcf", + "61d2", + "4839", + "f944", + "c697", + "d926", + "c60c", + "a633", + "91c6", + "3cc4", + "6c93", + "0d75", + "a1cc", + "1888", + "b5e0", + "27cc", + "1d8c", + "72c7", + "1e7e", + "a2a3", + "a3fd", + "0cd5", + "2c9e", + "3030", + "ee0e", + "27aa", + "ce4c", + "9409", + "2f3d", + "a83e", + "e72d", + "5a77", + "6dc9", + "d2ee", + "7ac9", + "98fe", + "3218", + "a64f", + "b938", + "232e", + "2859", + "b764", + "3cfc", + "f8f4", + "6fa4", + "1be4", + "4690", + "aefe", + "1fcf", + "25d9", + "b8f4", + "72fb", + "8e84", + "2f64", + "251e", + "6b56", + "d3d8", + "de04", + "da78", + "5cc3", + "ab02", + "7b03", + "50c1", + "dad7", + "d82b", + "f573", + "6407", + "ee3b", + "04cd", + "4a21", + "3808", + "cd00", + "1912", + "81ed", + "2f77", + "0520", + "9edd", + "2861", + "ef1c", + "2615", + "c842", + "064a", + "6fc9", + "e34e", + "ec3f", + "9c10", + "5f5c", + "e4e7", + "9422", + "c217", + "bd4e", + "71ad", + "d01b", + "2f47", + "ee70", + "726e", + "1342", + "faf4", + "01a3", + "b7d1", + "3f31", + "b84d", + "2dc5", + "3c2b", + "09a2", + "1703", + "7ed8", + "912f", + "94b5", + "5635", + "c3d6", + "90f1", + "96b7", + "250e", + "a217", + "014a", + "0563", + "6268", + "dcd4", + "c47a", + "d657", + "c058", + "7f91", + "60d6", + "3ab3", + "7ca5", + "1d25", + "eb9e", + "dff9", + "0bf9", + "fcfc", + "b826", + "72c5", + "5692", + "7a9f", + "f105", + "4847", + "b8d7", + "43f8", + "62d9", + "7d25", + "de6f", + "15d2", + "7e3c", + "acb9", + "9d73", + "7df9", + "234f", + "467e", + "8976", + "2745", + "d661", + "5600", + "7aff", + "66e6", + "43d1", + "664a", + "d084", + "0ae5", + "7293", + "d8b7", + "f117", + "bb0f" + ] + } + ] + } +} From 01edfe5293b86fd5b49288c0be8bed3234615ddd Mon Sep 17 00:00:00 2001 From: Qiwei Yang Date: Thu, 8 Aug 2024 21:17:52 +0800 Subject: [PATCH 03/11] fix --- Utils/Package.swift | 6 +++++- .../{Resources => TestData}/erasure-coding-test-data.json | 0 2 files changed, 5 insertions(+), 1 deletion(-) rename Utils/Tests/UtilsTests/{Resources => TestData}/erasure-coding-test-data.json (100%) diff --git a/Utils/Package.swift b/Utils/Package.swift index d12a015a..51dd98f9 100644 --- a/Utils/Package.swift +++ b/Utils/Package.swift @@ -1,8 +1,12 @@ // swift-tools-version: 6.0 // The swift-tools-version declares the minimum version of Swift required to build this package. +import Foundation import PackageDescription +// somehow without this the GH Actions CI fails +extension Foundation.Bundle: @unchecked @retroactive Sendable {} + let package = Package( name: "Utils", platforms: [ @@ -61,7 +65,7 @@ let package = Package( "Utils", .product(name: "Testing", package: "swift-testing"), ], - resources: [.copy("Resources")] + resources: [.copy("TestData")] ), ], swiftLanguageVersions: [.version("6")] diff --git a/Utils/Tests/UtilsTests/Resources/erasure-coding-test-data.json b/Utils/Tests/UtilsTests/TestData/erasure-coding-test-data.json similarity index 100% rename from Utils/Tests/UtilsTests/Resources/erasure-coding-test-data.json rename to Utils/Tests/UtilsTests/TestData/erasure-coding-test-data.json From b0bb5e9bf7e24e61c41fa694a686a25fd07c375d Mon Sep 17 00:00:00 2001 From: Qiwei Yang Date: Thu, 8 Aug 2024 21:39:07 +0800 Subject: [PATCH 04/11] fix --- Utils/Package.swift | 4 ---- Utils/Tests/UtilsTests/ErasureCodeTest.swift | 4 ++++ 2 files changed, 4 insertions(+), 4 deletions(-) create mode 100644 Utils/Tests/UtilsTests/ErasureCodeTest.swift diff --git a/Utils/Package.swift b/Utils/Package.swift index 51dd98f9..83ece79f 100644 --- a/Utils/Package.swift +++ b/Utils/Package.swift @@ -1,12 +1,8 @@ // swift-tools-version: 6.0 // The swift-tools-version declares the minimum version of Swift required to build this package. -import Foundation import PackageDescription -// somehow without this the GH Actions CI fails -extension Foundation.Bundle: @unchecked @retroactive Sendable {} - let package = Package( name: "Utils", platforms: [ diff --git a/Utils/Tests/UtilsTests/ErasureCodeTest.swift b/Utils/Tests/UtilsTests/ErasureCodeTest.swift new file mode 100644 index 00000000..f503bbb3 --- /dev/null +++ b/Utils/Tests/UtilsTests/ErasureCodeTest.swift @@ -0,0 +1,4 @@ +import Foundation + +// somehow without this the GH Actions CI fails +extension Foundation.Bundle: @unchecked @retroactive Sendable {} From 6ae4e0f46b5900d2158ef27851f91c6a207aac58 Mon Sep 17 00:00:00 2001 From: Qiwei Yang Date: Fri, 9 Aug 2024 14:29:55 +0800 Subject: [PATCH 05/11] update --- Utils/Sources/Utils/ErasureCoding.swift | 121 +++++++++++++++++++ Utils/Sources/erasure-coding/Cargo.lock | 2 +- Utils/Sources/erasure-coding/Cargo.toml | 2 +- Utils/Sources/erasure-coding/bindings.h | 31 +++-- Utils/Sources/erasure-coding/cbindgen.toml | 6 +- Utils/Sources/erasure-coding/src/ffi.rs | 66 +++------- Utils/Tests/UtilsTests/ErasureCodeTest.swift | 6 + 7 files changed, 169 insertions(+), 65 deletions(-) create mode 100644 Utils/Sources/Utils/ErasureCoding.swift diff --git a/Utils/Sources/Utils/ErasureCoding.swift b/Utils/Sources/Utils/ErasureCoding.swift new file mode 100644 index 00000000..66d6ad09 --- /dev/null +++ b/Utils/Sources/Utils/ErasureCoding.swift @@ -0,0 +1,121 @@ +import erasure_coding +import Foundation + +public enum ErasureCodeError: Error { + case constructFailed + case reconstructFailed +} + +/// Split original data into segments +public func split(data: Data) -> [CSegment] { + var segments: [CSegment] = [] + let segmentSize = Int(SEGMENT_SIZE) + + for i in stride(from: 0, to: data.count, by: segmentSize) { + let end = min(i + segmentSize, data.count) + let segmentData = data[i ..< end] + let index = UInt32(i / segmentSize) + + let segment = CSegment( + data: UnsafeMutablePointer(mutating: segmentData.withUnsafeBytes { $0.baseAddress!.assumingMemoryBound(to: UInt8.self) }), + index: index + ) + segments.append(segment) + } + + // Check and pad the last segment if needed + let remainder = data.count % segmentSize + if remainder > 0 { + // Create a padded segment + var paddedData = Data(count: segmentSize) + let start = data.count - remainder + let segmentData = data[start ..< data.count] + + // Copy data and pad + paddedData.replaceSubrange(0 ..< remainder, with: segmentData) + + let index = UInt32(segments.count) + + let segment = CSegment( + data: UnsafeMutablePointer(mutating: paddedData.withUnsafeBytes { $0.baseAddress!.assumingMemoryBound(to: UInt8.self) }), + index: index + ) + segments[segments.count - 1] = segment + } + + return segments +} + +/// Join segments into original data (with padding) +private func join(segments: [CSegment]) -> Data { + var data = Data() + let sortedSegments = segments.sorted { $0.index < $1.index } + + for segment in sortedSegments { + let segmentData = UnsafeBufferPointer(start: segment.data, count: Int(SEGMENT_SIZE)) + data.append(segmentData) + } + + return data +} + +public class SubShardEncoder { + private let encoder: OpaquePointer + + public init() { + encoder = subshard_encoder_new() + } + + deinit { + subshard_encoder_free(encoder) + } + + /// Construct erasure-coded chunks from segments + /// + /// TODO: note the underlying rust lib is not compatible to GP yet, so this will be changed + public func construct(segments: [CSegment]) -> Result<[UInt8], ErasureCodeError> { + var success = false + var out_len: UInt = 0 + + let expectedOutLen = Int(SUBSHARD_SIZE) * Int(TOTAL_SHARDS) * segments.count + var out_chunks = [UInt8](repeating: 0, count: expectedOutLen) + + segments.withUnsafeBufferPointer { segmentsPtr in + subshard_encoder_construct(encoder, segmentsPtr.baseAddress, UInt(segments.count), &success, &out_chunks, &out_len) + } + + guard success, expectedOutLen == Int(out_len) else { + return .failure(.constructFailed) + } + + return .success(out_chunks) + } +} + +public class SubShardDecoder { + private let decoder: OpaquePointer + + public init() { + decoder = subshard_decoder_new() + } + + deinit { + subshard_decoder_free(decoder) + } + + /// Reconstruct erasure-coded chunks to segments + public func reconstruct(subshards: [SubShardTuple]) -> Result { + var success = false + + let reconstructResult = subshards.withUnsafeBufferPointer { subshardsPtr in + subshard_decoder_reconstruct(decoder, subshardsPtr.baseAddress, UInt(subshards.count), &success) + } + + guard success, let result = reconstructResult + else { + return .failure(.reconstructFailed) + } + + return .success(result.pointee) + } +} diff --git a/Utils/Sources/erasure-coding/Cargo.lock b/Utils/Sources/erasure-coding/Cargo.lock index b86b150b..16368488 100644 --- a/Utils/Sources/erasure-coding/Cargo.lock +++ b/Utils/Sources/erasure-coding/Cargo.lock @@ -160,7 +160,7 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "erasure-coding" version = "0.1.0" -source = "git+https://github.com/paritytech/erasure-coding?branch=main#512e77472beb877fe0881a857623d54d97b82bc4" +source = "git+https://github.com/paritytech/erasure-coding?rev=512e774#512e77472beb877fe0881a857623d54d97b82bc4" dependencies = [ "blake2b_simd", "bounded-collections", diff --git a/Utils/Sources/erasure-coding/Cargo.toml b/Utils/Sources/erasure-coding/Cargo.toml index 54257b04..769b6626 100644 --- a/Utils/Sources/erasure-coding/Cargo.toml +++ b/Utils/Sources/erasure-coding/Cargo.toml @@ -10,4 +10,4 @@ crate-type = ["staticlib"] cbindgen = "0.24.0" [dependencies] -erasure-coding = { git = "https://github.com/paritytech/erasure-coding", branch = "main" } +erasure-coding = { git = "https://github.com/paritytech/erasure-coding", rev = "512e774" } diff --git a/Utils/Sources/erasure-coding/bindings.h b/Utils/Sources/erasure-coding/bindings.h index 27207372..4f38a8f7 100644 --- a/Utils/Sources/erasure-coding/bindings.h +++ b/Utils/Sources/erasure-coding/bindings.h @@ -7,6 +7,8 @@ #define POINT_SIZE 2 #define SUBSHARD_POINTS 6 +#define TOTAL_SHARDS 1026 +#define SUBSHARD_SIZE 12 typedef uint16_t ChunkIndex; @@ -29,16 +31,6 @@ typedef uint16_t ChunkIndex; */ #define N_REDUNDANCY 2 -/** - * The total number of shards, both original and ec one. - */ -#define TOTAL_SHARDS ((1 + N_REDUNDANCY) * N_SHARDS) - -/** - * Size of a subshard in bytes. - */ -#define SUBSHARD_SIZE (POINT_SIZE * SUBSHARD_POINTS) - /** * Subshard uses some temp memory, so these should be used multiple time instead of allocating. */ @@ -49,8 +41,19 @@ typedef struct SubShardDecoder SubShardDecoder; */ typedef struct SubShardEncoder SubShardEncoder; +/** + * Fixed size segment of a larger data. + * Data is padded when unaligned with + * the segment size. + */ typedef struct CSegment { + /** + * Fix size chunk of data. Length is `SEGMENT_SIZE`` + */ uint8_t *data; + /** + * The index of this segment against its full data. + */ uint32_t index; } CSegment; @@ -71,7 +74,7 @@ typedef struct ReconstructResult { typedef struct SubShardTuple { uint8_t seg_index; ChunkIndex chunk_index; - uint8_t shard[12]; + uint8_t subshard[12]; } SubShardTuple; /** @@ -92,14 +95,16 @@ void subshard_encoder_free(struct SubShardEncoder *encoder); /** * Constructs erasure-coded chunks from segments. * - * out_chunks is N chunks: `Vec<[[u8; 12]; TOTAL_SHARDS]>` + * A chunk is a group of subshards `[[u8; 12]; TOTAL_SHARDS]`. + * * out_len is N * TOTAL_SHARDS + * out_chunks is `Vec<[[u8; 12]; TOTAL_SHARDS]>` flattened to 1 dimensional u8 array. */ void subshard_encoder_construct(struct SubShardEncoder *encoder, const struct CSegment *segments, uintptr_t num_segments, bool *success, - uint8_t (***out_chunks)[12], + uint8_t *out_chunks, uintptr_t *out_len); /** diff --git a/Utils/Sources/erasure-coding/cbindgen.toml b/Utils/Sources/erasure-coding/cbindgen.toml index 6f3e3578..ee989c1c 100644 --- a/Utils/Sources/erasure-coding/cbindgen.toml +++ b/Utils/Sources/erasure-coding/cbindgen.toml @@ -4,6 +4,8 @@ after_includes = """ #define POINT_SIZE 2 #define SUBSHARD_POINTS 6 +#define TOTAL_SHARDS 1026 +#define SUBSHARD_SIZE 12 typedef uint16_t ChunkIndex; @@ -18,5 +20,5 @@ include = ["erasure-coding"] extra_bindings = ["erasure-coding"] [export] -include = ["SubShard", "POINT_SIZE", "SUBSHARD_SIZE"] -exclude = ["ChunkIndex"] +include = ["SubShard", "POINT_SIZE"] +exclude = ["ChunkIndex", "TOTAL_SHARDS", "SUBSHARD_SIZE"] diff --git a/Utils/Sources/erasure-coding/src/ffi.rs b/Utils/Sources/erasure-coding/src/ffi.rs index 5faf915d..7ba665d1 100644 --- a/Utils/Sources/erasure-coding/src/ffi.rs +++ b/Utils/Sources/erasure-coding/src/ffi.rs @@ -1,12 +1,17 @@ use erasure_coding::{ ChunkIndex, Segment, SubShardDecoder, SubShardEncoder, SEGMENT_SIZE, TOTAL_SHARDS, }; -use std::slice; +use std::{ptr, slice}; +/// Fixed size segment of a larger data. +/// Data is padded when unaligned with +/// the segment size. #[repr(C)] #[derive(Clone, Copy, Debug)] pub struct CSegment { - data: *mut u8, // Pointer to the data, length is SEGMENT_SIZE + /// Fix size chunk of data. Length is `SEGMENT_SIZE`` + data: *mut u8, + /// The index of this segment against its full data. index: u32, } @@ -53,15 +58,17 @@ pub extern "C" fn subshard_encoder_free(encoder: *mut SubShardEncoder) { /// Constructs erasure-coded chunks from segments. /// -/// out_chunks is N chunks: `Vec<[[u8; 12]; TOTAL_SHARDS]>` +/// A chunk is a group of subshards `[[u8; 12]; TOTAL_SHARDS]`. +/// /// out_len is N * TOTAL_SHARDS +/// out_chunks is `Vec<[[u8; 12]; TOTAL_SHARDS]>` flattened to 1 dimensional u8 array. #[no_mangle] pub extern "C" fn subshard_encoder_construct( encoder: *mut SubShardEncoder, segments: *const CSegment, num_segments: usize, success: *mut bool, - out_chunks: *mut *mut *mut [u8; 12], + out_chunks: *mut u8, out_len: *mut usize, ) { if encoder.is_null() || segments.is_null() || out_chunks.is_null() || out_len.is_null() { @@ -81,20 +88,20 @@ pub extern "C" fn subshard_encoder_construct( match encoder.construct_chunks(r_segments) { Ok(result) => { let total_chunks = result.len() * TOTAL_SHARDS; - let mut chunk_ptrs: Vec<*mut [u8; 12]> = Vec::with_capacity(total_chunks); + let mut data: Vec = Vec::with_capacity(total_chunks); for boxed_array in result { for chunk in boxed_array.iter() { - chunk_ptrs.push(Box::into_raw(Box::new(*chunk))); + data.extend_from_slice(chunk); } } unsafe { - *out_chunks = chunk_ptrs.as_mut_ptr(); + ptr::copy_nonoverlapping(data.as_ptr(), out_chunks, data.len()); *out_len = total_chunks; } - std::mem::forget(chunk_ptrs); + std::mem::forget(data); unsafe { *success = true }; } Err(_) => { @@ -133,48 +140,11 @@ pub struct ReconstructResult { pub num_decodes: usize, } -// #[no_mangle] -// pub extern "C" fn reconstruct_result_get_segments( -// result: *const ReconstructResult, -// ) -> *const SegmentTuple { -// if result.is_null() { -// return std::ptr::null(); -// } -// unsafe { (*result).segments } -// } - -// #[no_mangle] -// pub extern "C" fn reconstruct_result_get_num_segments(result: *const ReconstructResult) -> usize { -// if result.is_null() { -// return 0; -// } -// unsafe { (*result).num_segments } -// } - -// #[no_mangle] -// pub extern "C" fn reconstruct_result_get_num_decodes(result: *const ReconstructResult) -> usize { -// if result.is_null() { -// return 0; -// } -// unsafe { (*result).num_decodes } -// } - -// #[no_mangle] -// pub extern "C" fn reconstruct_result_free(result: *mut ReconstructResult) { -// if !result.is_null() { -// unsafe { -// let boxed_result = Box::from_raw(result); -// drop(Box::from_raw(boxed_result.segments)); -// drop(boxed_result); -// } -// } -// } - #[repr(C)] pub struct SubShardTuple { pub seg_index: u8, pub chunk_index: ChunkIndex, - pub shard: [u8; 12], + pub subshard: [u8; 12], } /// Reconstructs data from a list of subshards. @@ -187,7 +157,7 @@ pub extern "C" fn subshard_decoder_reconstruct( ) -> *mut ReconstructResult { if decoder.is_null() || subshards.is_null() { unsafe { *success = false }; - return std::ptr::null_mut(); + return ptr::null_mut(); } let decoder = unsafe { &mut *decoder }; @@ -195,7 +165,7 @@ pub extern "C" fn subshard_decoder_reconstruct( let cloned_subshards: Vec<(u8, ChunkIndex, &[u8; 12])> = subshards_slice .iter() - .map(|t| (t.seg_index, t.chunk_index, &t.shard)) + .map(|t| (t.seg_index, t.chunk_index, &t.subshard)) .collect(); match decoder.reconstruct(&mut cloned_subshards.iter().cloned()) { diff --git a/Utils/Tests/UtilsTests/ErasureCodeTest.swift b/Utils/Tests/UtilsTests/ErasureCodeTest.swift index f503bbb3..fe3b2efb 100644 --- a/Utils/Tests/UtilsTests/ErasureCodeTest.swift +++ b/Utils/Tests/UtilsTests/ErasureCodeTest.swift @@ -2,3 +2,9 @@ import Foundation // somehow without this the GH Actions CI fails extension Foundation.Bundle: @unchecked @retroactive Sendable {} + +@testable import Utils + +@Suite struct ErasureCodeTests { + @Test func testReconstruct() throws {} +} From 7ab72b21e56a6d8234120ae781776e383009518f Mon Sep 17 00:00:00 2001 From: Qiwei Yang Date: Fri, 9 Aug 2024 14:30:55 +0800 Subject: [PATCH 06/11] fix --- Utils/Tests/UtilsTests/ErasureCodeTest.swift | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Utils/Tests/UtilsTests/ErasureCodeTest.swift b/Utils/Tests/UtilsTests/ErasureCodeTest.swift index fe3b2efb..7ecb9858 100644 --- a/Utils/Tests/UtilsTests/ErasureCodeTest.swift +++ b/Utils/Tests/UtilsTests/ErasureCodeTest.swift @@ -1,10 +1,11 @@ import Foundation +import Testing + +@testable import Utils // somehow without this the GH Actions CI fails extension Foundation.Bundle: @unchecked @retroactive Sendable {} -@testable import Utils - @Suite struct ErasureCodeTests { @Test func testReconstruct() throws {} } From 318200bb5ba2a39469e93ec2cba1ee989d328f30 Mon Sep 17 00:00:00 2001 From: Qiwei Yang Date: Fri, 9 Aug 2024 14:39:05 +0800 Subject: [PATCH 07/11] fix --- Utils/Sources/erasure-coding/bindings.h | 2 +- Utils/Sources/erasure-coding/cbindgen.toml | 2 +- Utils/Sources/erasure-coding/src/ffi.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Utils/Sources/erasure-coding/bindings.h b/Utils/Sources/erasure-coding/bindings.h index 4f38a8f7..c0052ec5 100644 --- a/Utils/Sources/erasure-coding/bindings.h +++ b/Utils/Sources/erasure-coding/bindings.h @@ -97,8 +97,8 @@ void subshard_encoder_free(struct SubShardEncoder *encoder); * * A chunk is a group of subshards `[[u8; 12]; TOTAL_SHARDS]`. * + * out_chunks is N chunks `[[u8; 12]; TOTAL_SHARDS]` flattened to 1 dimensional u8 array. * out_len is N * TOTAL_SHARDS - * out_chunks is `Vec<[[u8; 12]; TOTAL_SHARDS]>` flattened to 1 dimensional u8 array. */ void subshard_encoder_construct(struct SubShardEncoder *encoder, const struct CSegment *segments, diff --git a/Utils/Sources/erasure-coding/cbindgen.toml b/Utils/Sources/erasure-coding/cbindgen.toml index ee989c1c..688d9778 100644 --- a/Utils/Sources/erasure-coding/cbindgen.toml +++ b/Utils/Sources/erasure-coding/cbindgen.toml @@ -20,5 +20,5 @@ include = ["erasure-coding"] extra_bindings = ["erasure-coding"] [export] -include = ["SubShard", "POINT_SIZE"] +include = ["SubShard"] exclude = ["ChunkIndex", "TOTAL_SHARDS", "SUBSHARD_SIZE"] diff --git a/Utils/Sources/erasure-coding/src/ffi.rs b/Utils/Sources/erasure-coding/src/ffi.rs index 7ba665d1..2524998e 100644 --- a/Utils/Sources/erasure-coding/src/ffi.rs +++ b/Utils/Sources/erasure-coding/src/ffi.rs @@ -60,8 +60,8 @@ pub extern "C" fn subshard_encoder_free(encoder: *mut SubShardEncoder) { /// /// A chunk is a group of subshards `[[u8; 12]; TOTAL_SHARDS]`. /// +/// out_chunks is N chunks `[[u8; 12]; TOTAL_SHARDS]` flattened to 1 dimensional u8 array. /// out_len is N * TOTAL_SHARDS -/// out_chunks is `Vec<[[u8; 12]; TOTAL_SHARDS]>` flattened to 1 dimensional u8 array. #[no_mangle] pub extern "C" fn subshard_encoder_construct( encoder: *mut SubShardEncoder, From 08bc3948410bdabe1b774149b5efedc3e7f27c1e Mon Sep 17 00:00:00 2001 From: Qiwei Yang Date: Fri, 9 Aug 2024 20:53:29 +0800 Subject: [PATCH 08/11] tests --- Utils/Sources/Utils/ErasureCoding.swift | 64 ++++++----- Utils/Sources/erasure-coding/Cargo.lock | 17 ++- Utils/Sources/erasure-coding/Cargo.toml | 3 + Utils/Sources/erasure-coding/bindings.h | 2 + Utils/Sources/erasure-coding/src/ffi.rs | 103 +++++++++++++++++- Utils/Tests/UtilsTests/ErasureCodeTest.swift | 91 +++++++++++++++- .../{ => ec}/erasure-coding-test-data.json | 7 +- 7 files changed, 250 insertions(+), 37 deletions(-) rename Utils/Tests/UtilsTests/TestData/{ => ec}/erasure-coding-test-data.json (99%) diff --git a/Utils/Sources/Utils/ErasureCoding.swift b/Utils/Sources/Utils/ErasureCoding.swift index 66d6ad09..49944226 100644 --- a/Utils/Sources/Utils/ErasureCoding.swift +++ b/Utils/Sources/Utils/ErasureCoding.swift @@ -1,6 +1,8 @@ import erasure_coding import Foundation +// TODO: note the underlying rust lib is not compatible with GP yet, so these will be changed + public enum ErasureCodeError: Error { case constructFailed case reconstructFailed @@ -11,9 +13,16 @@ public func split(data: Data) -> [CSegment] { var segments: [CSegment] = [] let segmentSize = Int(SEGMENT_SIZE) - for i in stride(from: 0, to: data.count, by: segmentSize) { + // Create a new data with padding + var paddedData = data + let remainder = data.count % segmentSize + if remainder != 0 { + paddedData.append(Data(repeating: 0, count: segmentSize - remainder)) + } + + for i in stride(from: 0, to: paddedData.count, by: segmentSize) { let end = min(i + segmentSize, data.count) - let segmentData = data[i ..< end] + let segmentData = paddedData[i ..< end] let index = UInt32(i / segmentSize) let segment = CSegment( @@ -23,30 +32,10 @@ public func split(data: Data) -> [CSegment] { segments.append(segment) } - // Check and pad the last segment if needed - let remainder = data.count % segmentSize - if remainder > 0 { - // Create a padded segment - var paddedData = Data(count: segmentSize) - let start = data.count - remainder - let segmentData = data[start ..< data.count] - - // Copy data and pad - paddedData.replaceSubrange(0 ..< remainder, with: segmentData) - - let index = UInt32(segments.count) - - let segment = CSegment( - data: UnsafeMutablePointer(mutating: paddedData.withUnsafeBytes { $0.baseAddress!.assumingMemoryBound(to: UInt8.self) }), - index: index - ) - segments[segments.count - 1] = segment - } - return segments } -/// Join segments into original data (with padding) +/// Join segments into original data (padding not removed) private func join(segments: [CSegment]) -> Data { var data = Data() let sortedSegments = segments.sorted { $0.index < $1.index } @@ -71,8 +60,6 @@ public class SubShardEncoder { } /// Construct erasure-coded chunks from segments - /// - /// TODO: note the underlying rust lib is not compatible to GP yet, so this will be changed public func construct(segments: [CSegment]) -> Result<[UInt8], ErasureCodeError> { var success = false var out_len: UInt = 0 @@ -103,8 +90,31 @@ public class SubShardDecoder { subshard_decoder_free(decoder) } + public class Decoded { + private var result: UnsafeMutablePointer + + public let segments: [SegmentTuple] + public let numDecoded: UInt + + init(_ res: UnsafeMutablePointer) { + result = res + let numSegments = Int(result.pointee.num_segments) + let segmentTuplesPtr = result.pointee.segments + + // Safely access the segments array + let bufferPtr = UnsafeMutableBufferPointer(start: segmentTuplesPtr, count: numSegments) + segments = Array(bufferPtr) + + numDecoded = result.pointee.num_decodes + } + + deinit { + reconstruct_result_free(result) + } + } + /// Reconstruct erasure-coded chunks to segments - public func reconstruct(subshards: [SubShardTuple]) -> Result { + public func reconstruct(subshards: [SubShardTuple]) -> Result { var success = false let reconstructResult = subshards.withUnsafeBufferPointer { subshardsPtr in @@ -116,6 +126,6 @@ public class SubShardDecoder { return .failure(.reconstructFailed) } - return .success(result.pointee) + return .success(Decoded(result)) } } diff --git a/Utils/Sources/erasure-coding/Cargo.lock b/Utils/Sources/erasure-coding/Cargo.lock index 16368488..c474d7e3 100644 --- a/Utils/Sources/erasure-coding/Cargo.lock +++ b/Utils/Sources/erasure-coding/Cargo.lock @@ -149,6 +149,9 @@ version = "0.1.0" dependencies = [ "cbindgen", "erasure-coding", + "hex", + "serde", + "serde_json", ] [[package]] @@ -218,6 +221,12 @@ dependencies = [ "libc", ] +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + [[package]] name = "impl-trait-for-tuples" version = "0.2.2" @@ -404,18 +413,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.204" +version = "1.0.205" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" +checksum = "e33aedb1a7135da52b7c21791455563facbbcc43d0f0f66165b42c21b3dfb150" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.204" +version = "1.0.205" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" +checksum = "692d6f5ac90220161d6774db30c662202721e64aed9058d2c394f451261420c1" dependencies = [ "proc-macro2", "quote", diff --git a/Utils/Sources/erasure-coding/Cargo.toml b/Utils/Sources/erasure-coding/Cargo.toml index 769b6626..2b01f5ec 100644 --- a/Utils/Sources/erasure-coding/Cargo.toml +++ b/Utils/Sources/erasure-coding/Cargo.toml @@ -11,3 +11,6 @@ cbindgen = "0.24.0" [dependencies] erasure-coding = { git = "https://github.com/paritytech/erasure-coding", rev = "512e774" } +hex = "0.4.3" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" diff --git a/Utils/Sources/erasure-coding/bindings.h b/Utils/Sources/erasure-coding/bindings.h index c0052ec5..fbf875a6 100644 --- a/Utils/Sources/erasure-coding/bindings.h +++ b/Utils/Sources/erasure-coding/bindings.h @@ -117,6 +117,8 @@ struct SubShardDecoder *subshard_decoder_new(void); */ void subshard_decoder_free(struct SubShardDecoder *decoder); +void reconstruct_result_free(struct ReconstructResult *result); + /** * Reconstructs data from a list of subshards. */ diff --git a/Utils/Sources/erasure-coding/src/ffi.rs b/Utils/Sources/erasure-coding/src/ffi.rs index 2524998e..cefc02a4 100644 --- a/Utils/Sources/erasure-coding/src/ffi.rs +++ b/Utils/Sources/erasure-coding/src/ffi.rs @@ -127,6 +127,7 @@ pub extern "C" fn subshard_decoder_free(decoder: *mut SubShardDecoder) { } #[repr(C)] +#[derive(Debug)] pub struct SegmentTuple { pub index: u8, pub segment: CSegment, @@ -140,7 +141,28 @@ pub struct ReconstructResult { pub num_decodes: usize, } +#[no_mangle] +pub extern "C" fn reconstruct_result_free(result: *mut ReconstructResult) { + if !result.is_null() { + unsafe { + let boxed_result = Box::from_raw(result); + + // free each CSegment's data pointer + for i in 0..boxed_result.num_segments { + let segment = &*boxed_result.segments.add(i); + if !segment.segment.data.is_null() { + drop(Box::from_raw(segment.segment.data)); + } + } + + drop(Box::from_raw(boxed_result.segments)); + drop(boxed_result); + } + } +} + #[repr(C)] +#[derive(Debug)] pub struct SubShardTuple { pub seg_index: u8, pub chunk_index: ChunkIndex, @@ -163,12 +185,12 @@ pub extern "C" fn subshard_decoder_reconstruct( let decoder = unsafe { &mut *decoder }; let subshards_slice = unsafe { slice::from_raw_parts(subshards, num_subshards) }; - let cloned_subshards: Vec<(u8, ChunkIndex, &[u8; 12])> = subshards_slice + let subshards_vec: Vec<(u8, ChunkIndex, &[u8; 12])> = subshards_slice .iter() .map(|t| (t.seg_index, t.chunk_index, &t.subshard)) .collect(); - match decoder.reconstruct(&mut cloned_subshards.iter().cloned()) { + match decoder.reconstruct(&mut subshards_vec.iter().cloned()) { Ok((segments, num_decodes)) => { let mut segments_vec: Vec = segments .into_iter() @@ -196,3 +218,80 @@ pub extern "C" fn subshard_decoder_reconstruct( } } } + +#[cfg(test)] +mod tests { + use std::fs; + + use super::*; + use erasure_coding::{SubShard, SUBSHARD_SIZE}; + use serde::{Deserialize, Serialize}; + use serde_json; + + #[derive(Serialize, Deserialize, Debug)] + struct JsonData { + data: String, + segment: SegmentData, + } + + #[derive(Serialize, Deserialize, Debug)] + struct SegmentData { + segments: Vec, + } + + #[derive(Serialize, Deserialize, Debug)] + struct Segment { + segment_ec: Vec, + } + + #[test] + fn test_reconstruct_from_json() { + let file = + fs::File::open("../../Tests/UtilsTests/TestData/ec/erasure-coding-test-data.json") + .expect("file should open read only"); + let json_data: JsonData = + serde_json::from_reader(file).expect("file should be proper JSON"); + + // Convert segment_ec data back to bytes and prepare subshards + let mut subshards: Vec<(u8, ChunkIndex, SubShard)> = Vec::new(); + for (segment_idx, segment) in json_data.segment.segments.iter().enumerate() { + for (chunk_idx, chunk) in segment.segment_ec.iter().enumerate() { + let chunk_bytes: Vec = hex::decode(chunk).expect("Failed to decode hex string"); + if chunk_idx >= 684 { + let mut subshard = [0u8; SUBSHARD_SIZE]; + subshard[..chunk_bytes.len()].copy_from_slice(&chunk_bytes); + subshards.push((segment_idx as u8, ChunkIndex(chunk_idx as u16), subshard)); + } + } + } + + // Initialize decoder, call reconstruct! + let mut decoder = SubShardDecoder::new().unwrap(); + + let cloned_subshards: Vec<(u8, ChunkIndex, &[u8; 12])> = + subshards.iter().map(|t| (t.0, t.1, &t.2)).collect(); + + let (reconstructed_segments, _nb_decode) = decoder + .reconstruct(&mut cloned_subshards.iter().cloned()) + .unwrap(); + + // Check the result + // println!("Reconstructed Segments: {:x?}", reconstructed_segments); + // println!("Number of Decodes: {}", nb_decode); + + assert_eq!(reconstructed_segments.len(), 1); + let original_data_bytes = + hex::decode(&json_data.data).expect("Failed to decode hex string"); + // Verify that the data attribute matches the first 342 bytes of the reconstructed data in the first segment + if let Some((_, first_segment)) = reconstructed_segments.get(0) { + assert_eq!( + &first_segment.data[..342], + &original_data_bytes[..342], + "The first 342 bytes of the reconstructed data do not match the original data." + ); + println!("Reconstructed successfully! YAY"); + } else { + panic!("No reconstructed segments found."); + } + } +} diff --git a/Utils/Tests/UtilsTests/ErasureCodeTest.swift b/Utils/Tests/UtilsTests/ErasureCodeTest.swift index 7ecb9858..c53b5212 100644 --- a/Utils/Tests/UtilsTests/ErasureCodeTest.swift +++ b/Utils/Tests/UtilsTests/ErasureCodeTest.swift @@ -1,3 +1,4 @@ +import erasure_coding import Foundation import Testing @@ -6,6 +7,92 @@ import Testing // somehow without this the GH Actions CI fails extension Foundation.Bundle: @unchecked @retroactive Sendable {} -@Suite struct ErasureCodeTests { - @Test func testReconstruct() throws {} +enum TestLoader { + static func getTestFiles(path: String, extension ext: String) throws -> [(path: String, description: String)] { + let prefix = Bundle.module.resourcePath! + "/TestData/\(path)" + let files = try FileManager.default.contentsOfDirectory(atPath: prefix) + var filtered = files.filter { $0.hasSuffix(".\(ext)") } + filtered.sort() + return filtered.map { (path: prefix + "/" + $0, description: $0) } + } +} + +struct ECTestCase: Codable { + let data: String + let segment: ECSegment +} + +struct ECSegment: Codable { + let segments: [SegmentElement] +} + +struct SegmentElement: Codable { + let segmentEc: [String] + + enum CodingKeys: String, CodingKey { + case segmentEc = "segment_ec" + } +} + +struct ErasureCodeTests { + static func loadTests() throws -> [ECTestCase] { + let tests = try TestLoader.getTestFiles(path: "ec", extension: "json") + return try tests.map { + let data = try Data(contentsOf: URL(fileURLWithPath: $0.path)) + let decoder = JSONDecoder() + return try decoder.decode(ECTestCase.self, from: data) + } + } + + @Test(arguments: try loadTests()) + func testReconstruct(testCase: ECTestCase) throws { + // Convert segment_ec data back to bytes and prepare subshards + var subshards: [SubShardTuple] = [] + for (segmentIdx, segment) in testCase.segment.segments.enumerated() { + for (chunkIdx, chunk) in segment.segmentEc.enumerated() { + let chunkBytes = Data(fromHexString: chunk)! + + if chunkIdx >= 684 { + var subshard: [UInt8] = Array(repeating: 0, count: Int(SUBSHARD_SIZE)) + subshard[0 ..< chunkBytes.count].append(contentsOf: chunkBytes) + subshards.append(SubShardTuple( + seg_index: UInt8(segmentIdx), + chunk_index: ChunkIndex(chunkIdx), + subshard: ( + subshard[0], + subshard[1], + subshard[2], + subshard[3], + subshard[4], + subshard[5], + subshard[6], + subshard[7], + subshard[8], + subshard[9], + subshard[10], + subshard[11] + ) + )) + } + } + } + + // Initialize decoder, call reconstruct + let decoder = SubShardDecoder() + let result = decoder.reconstruct(subshards: subshards) + + switch result { + case let .success(decoded): + #expect(decoded.numDecoded == 1) + let segmentTuples = decoded.segments + let segment = segmentTuples[0].segment + + let originalDataBytes = Data(fromHexString: testCase.data)! + let segmentData = Data(UnsafeBufferPointer(start: segment.data, count: Int(SEGMENT_SIZE))) + // #expect(segmentData[0 ..< 342] == originalDataBytes[0 ..< 342]) + #expect(segmentData[0 ..< 2] == originalDataBytes[0 ..< 2]) + case let .failure(error): + Issue.record("Expected success, got \(error)") + } + } } diff --git a/Utils/Tests/UtilsTests/TestData/erasure-coding-test-data.json b/Utils/Tests/UtilsTests/TestData/ec/erasure-coding-test-data.json similarity index 99% rename from Utils/Tests/UtilsTests/TestData/erasure-coding-test-data.json rename to Utils/Tests/UtilsTests/TestData/ec/erasure-coding-test-data.json index 0326e951..dc51892f 100644 --- a/Utils/Tests/UtilsTests/TestData/erasure-coding-test-data.json +++ b/Utils/Tests/UtilsTests/TestData/ec/erasure-coding-test-data.json @@ -1026,9 +1026,12 @@ "7293", "d8b7", "f117", - "bb0f" + "bb0f", + "0e85", + "0e8f", + "8b97" ] } ] } -} +} \ No newline at end of file From b9a7b8ca59359ede345ce39da107a9671a20e293 Mon Sep 17 00:00:00 2001 From: Qiwei Yang Date: Sat, 10 Aug 2024 19:35:43 +0800 Subject: [PATCH 09/11] fix tests --- Utils/Sources/Utils/ErasureCoding.swift | 2 +- Utils/Sources/erasure-coding/src/ffi.rs | 87 ++------------------ Utils/Tests/UtilsTests/ErasureCodeTest.swift | 7 +- 3 files changed, 9 insertions(+), 87 deletions(-) diff --git a/Utils/Sources/Utils/ErasureCoding.swift b/Utils/Sources/Utils/ErasureCoding.swift index 49944226..759ee4c9 100644 --- a/Utils/Sources/Utils/ErasureCoding.swift +++ b/Utils/Sources/Utils/ErasureCoding.swift @@ -90,6 +90,7 @@ public class SubShardDecoder { subshard_decoder_free(decoder) } + /// Decoded reconstruct result public class Decoded { private var result: UnsafeMutablePointer @@ -101,7 +102,6 @@ public class SubShardDecoder { let numSegments = Int(result.pointee.num_segments) let segmentTuplesPtr = result.pointee.segments - // Safely access the segments array let bufferPtr = UnsafeMutableBufferPointer(start: segmentTuplesPtr, count: numSegments) segments = Array(bufferPtr) diff --git a/Utils/Sources/erasure-coding/src/ffi.rs b/Utils/Sources/erasure-coding/src/ffi.rs index cefc02a4..4cc3e5f7 100644 --- a/Utils/Sources/erasure-coding/src/ffi.rs +++ b/Utils/Sources/erasure-coding/src/ffi.rs @@ -87,18 +87,18 @@ pub extern "C" fn subshard_encoder_construct( match encoder.construct_chunks(r_segments) { Ok(result) => { - let total_chunks = result.len() * TOTAL_SHARDS; - let mut data: Vec = Vec::with_capacity(total_chunks); + let total_subshards = result.len() * TOTAL_SHARDS; + let mut data: Vec = Vec::with_capacity(total_subshards); for boxed_array in result { - for chunk in boxed_array.iter() { - data.extend_from_slice(chunk); + for subshard in boxed_array.iter() { + data.extend_from_slice(subshard); } } unsafe { ptr::copy_nonoverlapping(data.as_ptr(), out_chunks, data.len()); - *out_len = total_chunks; + *out_len = total_subshards; } std::mem::forget(data); @@ -218,80 +218,3 @@ pub extern "C" fn subshard_decoder_reconstruct( } } } - -#[cfg(test)] -mod tests { - use std::fs; - - use super::*; - use erasure_coding::{SubShard, SUBSHARD_SIZE}; - use serde::{Deserialize, Serialize}; - use serde_json; - - #[derive(Serialize, Deserialize, Debug)] - struct JsonData { - data: String, - segment: SegmentData, - } - - #[derive(Serialize, Deserialize, Debug)] - struct SegmentData { - segments: Vec, - } - - #[derive(Serialize, Deserialize, Debug)] - struct Segment { - segment_ec: Vec, - } - - #[test] - fn test_reconstruct_from_json() { - let file = - fs::File::open("../../Tests/UtilsTests/TestData/ec/erasure-coding-test-data.json") - .expect("file should open read only"); - let json_data: JsonData = - serde_json::from_reader(file).expect("file should be proper JSON"); - - // Convert segment_ec data back to bytes and prepare subshards - let mut subshards: Vec<(u8, ChunkIndex, SubShard)> = Vec::new(); - for (segment_idx, segment) in json_data.segment.segments.iter().enumerate() { - for (chunk_idx, chunk) in segment.segment_ec.iter().enumerate() { - let chunk_bytes: Vec = hex::decode(chunk).expect("Failed to decode hex string"); - if chunk_idx >= 684 { - let mut subshard = [0u8; SUBSHARD_SIZE]; - subshard[..chunk_bytes.len()].copy_from_slice(&chunk_bytes); - subshards.push((segment_idx as u8, ChunkIndex(chunk_idx as u16), subshard)); - } - } - } - - // Initialize decoder, call reconstruct! - let mut decoder = SubShardDecoder::new().unwrap(); - - let cloned_subshards: Vec<(u8, ChunkIndex, &[u8; 12])> = - subshards.iter().map(|t| (t.0, t.1, &t.2)).collect(); - - let (reconstructed_segments, _nb_decode) = decoder - .reconstruct(&mut cloned_subshards.iter().cloned()) - .unwrap(); - - // Check the result - // println!("Reconstructed Segments: {:x?}", reconstructed_segments); - // println!("Number of Decodes: {}", nb_decode); - - assert_eq!(reconstructed_segments.len(), 1); - let original_data_bytes = - hex::decode(&json_data.data).expect("Failed to decode hex string"); - // Verify that the data attribute matches the first 342 bytes of the reconstructed data in the first segment - if let Some((_, first_segment)) = reconstructed_segments.get(0) { - assert_eq!( - &first_segment.data[..342], - &original_data_bytes[..342], - "The first 342 bytes of the reconstructed data do not match the original data." - ); - println!("Reconstructed successfully! YAY"); - } else { - panic!("No reconstructed segments found."); - } - } -} diff --git a/Utils/Tests/UtilsTests/ErasureCodeTest.swift b/Utils/Tests/UtilsTests/ErasureCodeTest.swift index c53b5212..be550e6c 100644 --- a/Utils/Tests/UtilsTests/ErasureCodeTest.swift +++ b/Utils/Tests/UtilsTests/ErasureCodeTest.swift @@ -54,7 +54,7 @@ struct ErasureCodeTests { if chunkIdx >= 684 { var subshard: [UInt8] = Array(repeating: 0, count: Int(SUBSHARD_SIZE)) - subshard[0 ..< chunkBytes.count].append(contentsOf: chunkBytes) + subshard[0 ..< chunkBytes.count] = [UInt8](chunkBytes)[...] subshards.append(SubShardTuple( seg_index: UInt8(segmentIdx), chunk_index: ChunkIndex(chunkIdx), @@ -85,12 +85,11 @@ struct ErasureCodeTests { case let .success(decoded): #expect(decoded.numDecoded == 1) let segmentTuples = decoded.segments + #expect(segmentTuples.count == 1) let segment = segmentTuples[0].segment - let originalDataBytes = Data(fromHexString: testCase.data)! let segmentData = Data(UnsafeBufferPointer(start: segment.data, count: Int(SEGMENT_SIZE))) - // #expect(segmentData[0 ..< 342] == originalDataBytes[0 ..< 342]) - #expect(segmentData[0 ..< 2] == originalDataBytes[0 ..< 2]) + #expect(segmentData[0 ..< 342] == originalDataBytes[0 ..< 342]) case let .failure(error): Issue.record("Expected success, got \(error)") } From 22dcbce1ee3204cbdd45fd2e6e878f08f02e09ba Mon Sep 17 00:00:00 2001 From: Qiwei Yang Date: Mon, 12 Aug 2024 09:11:44 +0800 Subject: [PATCH 10/11] fix memory --- Utils/Sources/Utils/ErasureCoding.swift | 41 ++++++++++++++++++------- Utils/Sources/erasure-coding/bindings.h | 5 +++ Utils/Sources/erasure-coding/src/ffi.rs | 20 ++++++++++-- 3 files changed, 52 insertions(+), 14 deletions(-) diff --git a/Utils/Sources/Utils/ErasureCoding.swift b/Utils/Sources/Utils/ErasureCoding.swift index 759ee4c9..16bdb954 100644 --- a/Utils/Sources/Utils/ErasureCoding.swift +++ b/Utils/Sources/Utils/ErasureCoding.swift @@ -8,9 +8,32 @@ public enum ErasureCodeError: Error { case reconstructFailed } +public class Segment { + public var csegment: CSegment + + public let data: Data + public let index: Int + + public init?(data: Data, index: UInt32) { + guard data.count == SEGMENT_SIZE else { + return nil + } + csegment = CSegment( + data: UnsafeMutablePointer(mutating: data.withUnsafeBytes { $0.baseAddress!.assumingMemoryBound(to: UInt8.self) }), + index: index + ) + self.data = Data(bytes: csegment.data, count: Int(SEGMENT_SIZE)) + self.index = Int(csegment.index) + } + + deinit { + csegment_data_free(&csegment) + } +} + /// Split original data into segments -public func split(data: Data) -> [CSegment] { - var segments: [CSegment] = [] +public func split(data: Data) -> [Segment] { + var segments: [Segment] = [] let segmentSize = Int(SEGMENT_SIZE) // Create a new data with padding @@ -25,10 +48,7 @@ public func split(data: Data) -> [CSegment] { let segmentData = paddedData[i ..< end] let index = UInt32(i / segmentSize) - let segment = CSegment( - data: UnsafeMutablePointer(mutating: segmentData.withUnsafeBytes { $0.baseAddress!.assumingMemoryBound(to: UInt8.self) }), - index: index - ) + let segment = Segment(data: segmentData, index: index)! segments.append(segment) } @@ -36,13 +56,12 @@ public func split(data: Data) -> [CSegment] { } /// Join segments into original data (padding not removed) -private func join(segments: [CSegment]) -> Data { +public func join(segments: [Segment]) -> Data { var data = Data() let sortedSegments = segments.sorted { $0.index < $1.index } for segment in sortedSegments { - let segmentData = UnsafeBufferPointer(start: segment.data, count: Int(SEGMENT_SIZE)) - data.append(segmentData) + data.append(segment.data) } return data @@ -60,14 +79,14 @@ public class SubShardEncoder { } /// Construct erasure-coded chunks from segments - public func construct(segments: [CSegment]) -> Result<[UInt8], ErasureCodeError> { + public func construct(segments: [Segment]) -> Result<[UInt8], ErasureCodeError> { var success = false var out_len: UInt = 0 let expectedOutLen = Int(SUBSHARD_SIZE) * Int(TOTAL_SHARDS) * segments.count var out_chunks = [UInt8](repeating: 0, count: expectedOutLen) - segments.withUnsafeBufferPointer { segmentsPtr in + segments.map(\.csegment).withUnsafeBufferPointer { segmentsPtr in subshard_encoder_construct(encoder, segmentsPtr.baseAddress, UInt(segments.count), &success, &out_chunks, &out_len) } diff --git a/Utils/Sources/erasure-coding/bindings.h b/Utils/Sources/erasure-coding/bindings.h index fbf875a6..8aac9993 100644 --- a/Utils/Sources/erasure-coding/bindings.h +++ b/Utils/Sources/erasure-coding/bindings.h @@ -82,6 +82,11 @@ typedef struct SubShardTuple { */ typedef uint8_t SubShard[SUBSHARD_SIZE]; +/** + * Frees CSegment's data. + */ +void csegment_data_free(struct CSegment *c_segment); + /** * Initializes a new SubShardEncoder. */ diff --git a/Utils/Sources/erasure-coding/src/ffi.rs b/Utils/Sources/erasure-coding/src/ffi.rs index 4cc3e5f7..b8296bd0 100644 --- a/Utils/Sources/erasure-coding/src/ffi.rs +++ b/Utils/Sources/erasure-coding/src/ffi.rs @@ -24,7 +24,6 @@ impl From for CSegment { index: segment.index, }; - // prevent Rust from freeing the Vec while CSegment is in use std::mem::forget(vec_data); c_segment @@ -42,6 +41,21 @@ impl From for Segment { } } +/// Frees CSegment's data. +#[no_mangle] +pub extern "C" fn csegment_data_free(c_segment: *mut CSegment) { + if !c_segment.is_null() { + let csegment = unsafe { &*c_segment }; + + if !csegment.data.is_null() { + unsafe { + let vec_data = Vec::from_raw_parts(csegment.data, SEGMENT_SIZE, SEGMENT_SIZE); + drop(vec_data); + } + } + } +} + /// Initializes a new SubShardEncoder. #[no_mangle] pub extern "C" fn subshard_encoder_new() -> *mut SubShardEncoder { @@ -101,7 +115,6 @@ pub extern "C" fn subshard_encoder_construct( *out_len = total_subshards; } - std::mem::forget(data); unsafe { *success = true }; } Err(_) => { @@ -202,7 +215,8 @@ pub extern "C" fn subshard_decoder_reconstruct( let segments_len = segments_vec.len(); let segments_ptr = segments_vec.as_mut_ptr(); - std::mem::forget(segments_vec); // prevent the Vec from being deallocated + // prevent the Vec from being deallocated, will be freed in reconstruct_result_free + std::mem::forget(segments_vec); let result = ReconstructResult { segments: segments_ptr, From 5fb78f7b14629efb3dcb74fcf10bcfecebfde4fc Mon Sep 17 00:00:00 2001 From: Qiwei Yang Date: Mon, 12 Aug 2024 12:21:22 +0800 Subject: [PATCH 11/11] fix --- Utils/Sources/Utils/ErasureCoding.swift | 13 +++++-------- Utils/Sources/erasure-coding/bindings.h | 5 ----- Utils/Sources/erasure-coding/src/ffi.rs | 15 --------------- Utils/Tests/UtilsTests/ErasureCodeTest.swift | 10 ++++++++++ 4 files changed, 15 insertions(+), 28 deletions(-) diff --git a/Utils/Sources/Utils/ErasureCoding.swift b/Utils/Sources/Utils/ErasureCoding.swift index 16bdb954..844f4f22 100644 --- a/Utils/Sources/Utils/ErasureCoding.swift +++ b/Utils/Sources/Utils/ErasureCoding.swift @@ -8,7 +8,7 @@ public enum ErasureCodeError: Error { case reconstructFailed } -public class Segment { +public struct Segment { public var csegment: CSegment public let data: Data @@ -25,26 +25,23 @@ public class Segment { self.data = Data(bytes: csegment.data, count: Int(SEGMENT_SIZE)) self.index = Int(csegment.index) } - - deinit { - csegment_data_free(&csegment) - } } /// Split original data into segments public func split(data: Data) -> [Segment] { var segments: [Segment] = [] let segmentSize = Int(SEGMENT_SIZE) + let remainder = data.count % segmentSize + segments.reserveCapacity((data.count / segmentSize) + (remainder > 0 ? 1 : 0)) // Create a new data with padding var paddedData = data - let remainder = data.count % segmentSize if remainder != 0 { paddedData.append(Data(repeating: 0, count: segmentSize - remainder)) } for i in stride(from: 0, to: paddedData.count, by: segmentSize) { - let end = min(i + segmentSize, data.count) + let end = min(i + segmentSize, paddedData.count) let segmentData = paddedData[i ..< end] let index = UInt32(i / segmentSize) @@ -57,7 +54,7 @@ public func split(data: Data) -> [Segment] { /// Join segments into original data (padding not removed) public func join(segments: [Segment]) -> Data { - var data = Data() + var data = Data(capacity: segments.count * Int(SEGMENT_SIZE)) let sortedSegments = segments.sorted { $0.index < $1.index } for segment in sortedSegments { diff --git a/Utils/Sources/erasure-coding/bindings.h b/Utils/Sources/erasure-coding/bindings.h index 8aac9993..fbf875a6 100644 --- a/Utils/Sources/erasure-coding/bindings.h +++ b/Utils/Sources/erasure-coding/bindings.h @@ -82,11 +82,6 @@ typedef struct SubShardTuple { */ typedef uint8_t SubShard[SUBSHARD_SIZE]; -/** - * Frees CSegment's data. - */ -void csegment_data_free(struct CSegment *c_segment); - /** * Initializes a new SubShardEncoder. */ diff --git a/Utils/Sources/erasure-coding/src/ffi.rs b/Utils/Sources/erasure-coding/src/ffi.rs index b8296bd0..9993880f 100644 --- a/Utils/Sources/erasure-coding/src/ffi.rs +++ b/Utils/Sources/erasure-coding/src/ffi.rs @@ -41,21 +41,6 @@ impl From for Segment { } } -/// Frees CSegment's data. -#[no_mangle] -pub extern "C" fn csegment_data_free(c_segment: *mut CSegment) { - if !c_segment.is_null() { - let csegment = unsafe { &*c_segment }; - - if !csegment.data.is_null() { - unsafe { - let vec_data = Vec::from_raw_parts(csegment.data, SEGMENT_SIZE, SEGMENT_SIZE); - drop(vec_data); - } - } - } -} - /// Initializes a new SubShardEncoder. #[no_mangle] pub extern "C" fn subshard_encoder_new() -> *mut SubShardEncoder { diff --git a/Utils/Tests/UtilsTests/ErasureCodeTest.swift b/Utils/Tests/UtilsTests/ErasureCodeTest.swift index be550e6c..afe5f3d5 100644 --- a/Utils/Tests/UtilsTests/ErasureCodeTest.swift +++ b/Utils/Tests/UtilsTests/ErasureCodeTest.swift @@ -94,4 +94,14 @@ struct ErasureCodeTests { Issue.record("Expected success, got \(error)") } } + + @Test func testSplitJoin() { + let testData = Data("Hello, world!".utf8) + let paddedTestData = testData + Data(repeating: 0, count: Int(SEGMENT_SIZE) - (testData.count % Int(SEGMENT_SIZE))) + + let splited = split(data: testData) + let joined = join(segments: splited) + + #expect(joined == paddedTestData) + } }