diff --git a/CMakeLists.txt b/CMakeLists.txt index bc4de79..c1d7184 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,10 +10,6 @@ add_subdirectory(dep) add_library(rlib STATIC lib/rlib/common.hpp lib/rlib/common.cpp - lib/rlib/error.cpp - lib/rlib/error.hpp - lib/rlib/fbuffer.cpp - lib/rlib/fbuffer.hpp lib/rlib/iofile.cpp lib/rlib/iofile.hpp lib/rlib/rbundle.cpp @@ -22,6 +18,8 @@ add_library(rlib STATIC lib/rlib/rcache.cpp lib/rlib/rcdn.hpp lib/rlib/rcdn.cpp + lib/rlib/rchunk.hpp + lib/rlib/rchunk.cpp lib/rlib/rmanifest.cpp lib/rlib/rmanifest.hpp ) @@ -37,14 +35,14 @@ target_link_libraries(rman-ls PRIVATE rlib) add_executable(rman-bl src/rman_bl.cpp) target_link_libraries(rman-bl PRIVATE rlib) -add_executable(rbun-add src/rbun_add.cpp) -target_link_libraries(rbun-add PRIVATE rlib) - add_executable(rbun-chk src/rbun_chk.cpp) target_link_libraries(rbun-chk PRIVATE rlib) add_executable(rbun-ls src/rbun_ls.cpp) target_link_libraries(rbun-ls PRIVATE rlib) +add_executable(rbun-merge src/rbun_merge.cpp) +target_link_libraries(rbun-merge PRIVATE rlib) + add_executable(rbun-usage src/rbun_usage.cpp) -target_link_libraries(rbun-usage PRIVATE rlib) \ No newline at end of file +target_link_libraries(rbun-usage PRIVATE rlib) diff --git a/lib/rlib/common.cpp b/lib/rlib/common.cpp index c763cd4..a0335a7 100644 --- a/lib/rlib/common.cpp +++ b/lib/rlib/common.cpp @@ -2,16 +2,61 @@ #include -#include "error.hpp" +#include +#include +#include +#include using namespace rlib; -auto rlib::progress(char const* banner, std::uint32_t index, std::uint64_t done, std::uint64_t total) noexcept - -> std::string { - constexpr auto MB = 1024.0 * 1024.0; +void rlib::throw_error(char const* from, char const* msg) { + // break point goes here + throw std::runtime_error(std::string(from) + msg); +} + +error_stack_t& rlib::error_stack() noexcept { + thread_local error_stack_t instance = {}; + return instance; +} + +void rlib::push_error_msg(char const* fmt, ...) noexcept { + va_list args; + char buffer[4096]; + int result; + va_start(args, fmt); + result = vsnprintf(buffer, 4096, fmt, args); + va_end(args); + if (result >= 0) { + error_stack().push_back({buffer, buffer + result}); + } +} + +rlib::progress_bar::progress_bar(char const* banner, + bool disabled, + std::uint32_t index, + std::uint64_t done, + std::uint64_t total) noexcept + : banner_(banner), disabled_(disabled), index_(index), done_(done), total_(total), percent_(done_ * 100 / total_) { + this->render(); +} + +rlib::progress_bar::~progress_bar() noexcept { + this->render(); + std::cerr << std::endl; +} + +auto rlib::progress_bar::render() const noexcept -> void { char buffer[128]; - sprintf(buffer, "%s #%u: %4.3f / %4.3f", banner, index, done / MB, total / MB); - return buffer; + sprintf(buffer, "\r%s #%u: %.02fMB %u%%", banner_, index_, total_ / MB, (std::uint32_t)percent_); + std::cerr << buffer; +} + +auto rlib::progress_bar::update(std::uint64_t done) noexcept -> void { + done_ = done; + auto percent = std::exchange(percent_, done_ * 100 / total_); + if (!disabled_ && percent != percent_) { + this->render(); + } } auto rlib::to_hex(std::uint64_t id, std::size_t s) noexcept -> std::string { @@ -36,25 +81,19 @@ auto rlib::clean_path(std::string path) noexcept -> std::string { auto rlib::zstd_decompress(std::span src, std::size_t count) -> std::span { thread_local static std::vector buffer = {}; + std::size_t size_decompressed = rlib_assert_zstd(ZSTD_findDecompressedSize(src.data(), src.size())); + rlib_assert(size_decompressed == count); if (buffer.size() < count) { + buffer.clear(); buffer.resize(count); } - auto result = ZSTD_decompress(buffer.data(), count, src.data(), src.size()); - if (ZSTD_isError(result)) { - rlib_error(ZSTD_getErrorName(result)); - } - rlib_assert(result == count); + std::size_t result = rlib_assert_zstd(ZSTD_decompress(buffer.data(), count, src.data(), src.size())); + rlib_assert(result == size_decompressed); return {buffer.data(), count}; } -auto rlib::try_zstd_decompress(std::span src, std::size_t count) -> std::span { - thread_local static std::vector buffer = {}; - if (buffer.size() < count) { - buffer.resize(count); - } - auto result = ZSTD_decompress(buffer.data(), count, src.data(), src.size()); - if (ZSTD_isError(result)) { - return {}; - } - return {buffer.data(), result}; +auto rlib::zstd_frame_decompress_size(std::span src) -> std::size_t { + ZSTD_frameHeader header = {}; + rlib_assert_zstd(ZSTD_getFrameHeader(&header, src.data(), src.size())); + return header.frameContentSize; } diff --git a/lib/rlib/common.hpp b/lib/rlib/common.hpp index 0e6ddfc..0fe1893 100644 --- a/lib/rlib/common.hpp +++ b/lib/rlib/common.hpp @@ -5,28 +5,107 @@ #include #include #include +#include #include +#include #include #include +#include #include #include #include +#define rlib_paste_impl(x, y) x##y +#define rlib_paste(x, y) rlib_paste_impl(x, y) + +#define rlib_error(msg) ::rlib::throw_error(__func__, msg) + +#define rlib_assert(...) \ + do { \ + if (!(__VA_ARGS__)) [[unlikely]] { \ + ::rlib::throw_error(__func__, ": " #__VA_ARGS__); \ + } \ + } while (false) + +#define rlib_rethrow(...) \ + [&, func = __func__]() -> decltype(auto) { \ + try { \ + return __VA_ARGS__; \ + } catch (std::exception const&) { \ + ::rlib::throw_error(func, ": " #__VA_ARGS__); \ + } \ + }() + +#define rlib_trace(...) \ + ::rlib::ErrorTrace rlib_paste(_trace_, __LINE__) { \ + [&] { ::rlib::push_error_msg(__VA_ARGS__); } \ + } + +#define rlib_assert_zstd(...) \ + [&, func = __func__]() -> std::size_t { \ + if (std::size_t result = __VA_ARGS__; ZSTD_isError(result)) [[unlikely]] { \ + throw_error(func, ZSTD_getErrorName(result)); \ + } else { \ + return result; \ + } \ + }() + namespace rlib { + [[noreturn]] extern void throw_error(char const* from, char const* msg); + + [[noreturn]] inline void throw_error(char const* from, std::error_code const& ec) { + throw_error(from, ec.message().c_str()); + } + + using error_stack_t = std::vector; + + extern error_stack_t& error_stack() noexcept; + + extern void push_error_msg(char const* fmt, ...) noexcept; + + struct progress_bar { + static constexpr auto MB = 1024.0 * 1024.0; + + progress_bar(char const* banner, + bool disabled, + std::uint32_t index, + std::uint64_t done, + std::uint64_t total) noexcept; + ~progress_bar() noexcept; + + auto update(std::uint64_t done) noexcept -> void; + + private: + auto render() const noexcept -> void; + + char const* banner_; + bool disabled_ = {}; + std::uint32_t index_; + std::uint64_t done_; + std::uint64_t total_; + std::uint64_t percent_; + }; + + template + struct ErrorTrace : Func { + inline ErrorTrace(Func&& func) noexcept : Func(std::move(func)) {} + inline ~ErrorTrace() noexcept { + if (std::uncaught_exceptions()) { + Func::operator()(); + } + } + }; extern auto to_hex(std::uint64_t id, std::size_t s = 16) noexcept -> std::string; template requires(std::is_enum_v) inline auto to_hex(T id, std::size_t s = 16) noexcept -> std::string { return to_hex((std::uint64_t)id, s); } - extern auto progress(char const* banner, std::uint32_t index, std::uint64_t done, std::uint64_t total) noexcept - -> std::string; - extern auto clean_path(std::string path) noexcept -> std::string; extern auto zstd_decompress(std::span src, std::size_t count) -> std::span; - extern auto try_zstd_decompress(std::span src, std::size_t count) -> std::span; + extern auto zstd_frame_decompress_size(std::span src) -> std::size_t; template inline auto sort_by(auto beg, auto end) noexcept -> void { diff --git a/lib/rlib/error.cpp b/lib/rlib/error.cpp deleted file mode 100644 index 23ff1e4..0000000 --- a/lib/rlib/error.cpp +++ /dev/null @@ -1,28 +0,0 @@ -#include "error.hpp" - -#include -#include - -using namespace rlib; - -void rlib::throw_error(char const* from, char const* msg) { - // break point goes here - throw std::runtime_error(std::string(from) + msg); -} - -error_stack_t& rlib::error_stack() noexcept { - thread_local error_stack_t instance = {}; - return instance; -} - -void rlib::push_error_msg(char const* fmt, ...) noexcept { - va_list args; - char buffer[4096]; - int result; - va_start(args, fmt); - result = vsnprintf(buffer, 4096, fmt, args); - va_end(args); - if (result >= 0) { - error_stack().push_back({buffer, buffer + result}); - } -} diff --git a/lib/rlib/error.hpp b/lib/rlib/error.hpp deleted file mode 100644 index 7504015..0000000 --- a/lib/rlib/error.hpp +++ /dev/null @@ -1,55 +0,0 @@ -#pragma once -#include -#include -#include -#include - -#define rlib_paste_impl(x, y) x##y -#define rlib_paste(x, y) rlib_paste_impl(x, y) - -#define rlib_error(msg) ::rlib::throw_error(__func__, msg) - -#define rlib_assert(...) \ - do { \ - if (!(__VA_ARGS__)) { \ - ::rlib::throw_error(__func__, ": " #__VA_ARGS__); \ - } \ - } while (false) - -#define rlib_rethrow(...) \ - [&, func = __func__]() -> decltype(auto) { \ - try { \ - return __VA_ARGS__; \ - } catch (std::exception const&) { \ - ::rlib::throw_error(func, ": " #__VA_ARGS__); \ - } \ - }() - -#define rlib_trace(...) \ - ::rlib::ErrorTrace rlib_paste(_trace_, __LINE__) { \ - [&] { ::rlib::push_error_msg(__VA_ARGS__); } \ - } - -namespace rlib { - [[noreturn]] extern void throw_error(char const* from, char const* msg); - - [[noreturn]] inline void throw_error(char const* from, std::error_code const& ec) { - throw_error(from, ec.message().c_str()); - } - - using error_stack_t = std::vector; - - extern error_stack_t& error_stack() noexcept; - - extern void push_error_msg(char const* fmt, ...) noexcept; - - template - struct ErrorTrace : Func { - inline ErrorTrace(Func&& func) noexcept : Func(std::move(func)) {} - inline ~ErrorTrace() noexcept { - if (std::uncaught_exceptions()) { - Func::operator()(); - } - } - }; -} diff --git a/lib/rlib/fbuffer.cpp b/lib/rlib/fbuffer.cpp deleted file mode 100644 index b01389a..0000000 --- a/lib/rlib/fbuffer.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "fbuffer.hpp" \ No newline at end of file diff --git a/lib/rlib/fbuffer.hpp b/lib/rlib/fbuffer.hpp deleted file mode 100644 index 873222a..0000000 --- a/lib/rlib/fbuffer.hpp +++ /dev/null @@ -1,143 +0,0 @@ -#pragma once -#include -#include -#include -#include - -#include "common.hpp" -#include "error.hpp" - -namespace rlib::fbuffer { - struct Offset { - char const* beg = {}; - std::int32_t cur = {}; - std::int32_t end = {}; - - template - inline T as() const { - auto result = T{}; - from_offset(*this, result); - return result; - } - - explicit inline operator bool() const noexcept { return beg != nullptr; } - inline bool operator!() const noexcept { return !operator bool(); } - }; - - struct Table { - Offset beg = {}; - std::int32_t vtable_size = {}; - std::int32_t struct_size = {}; - std::vector offsets = {}; - - inline Offset operator[](std::size_t index) const { - rlib_assert(beg); - auto voffset = index < offsets.size() ? offsets[index] : 0; - auto result = beg; - if (voffset) { - result.cur += voffset; - } else { - result.beg = nullptr; - } - return result; - } - }; - - template - requires(std::is_arithmetic_v || std::is_enum_v) - inline void from_offset(Offset offset, T& value) { - if (!offset) { - value = T{}; - return; - } - T result; - rlib_assert(offset.cur >= 0 && offset.cur + (std::int32_t)sizeof(T) <= offset.end); - memcpy(&result, offset.beg + offset.cur, sizeof(T)); - value = result; - } - - inline void from_offset(Offset offset, Offset& value) { - if (offset) { - auto relative_offset = offset.as(); - if (relative_offset) { - offset.cur += relative_offset; - rlib_assert(offset.cur >= 0 && offset.cur <= offset.end); - } else { - value.beg = nullptr; - } - } - value = offset; - } - - inline void from_offset(Offset offset, std::string& value) { - offset = offset.as(); - if (!offset) { - return; - } - auto size = offset.as(); - if (!size) { - return; - } - rlib_assert(size >= 0 && size <= 4096); - offset.cur += sizeof(std::int32_t); - rlib_assert(offset.cur + size <= offset.end); - value.resize((std::size_t)size); - memcpy(value.data(), offset.beg + offset.cur, (std::size_t)size); - } - - inline void from_offset(Offset offset, Table& value) { - offset = offset.as(); - rlib_assert(offset); - value.beg = offset; - auto relative_offset = offset.as(); - offset.cur -= relative_offset; - rlib_assert(offset.cur >= 0 && offset.cur <= offset.end); - value.vtable_size = offset.as(); - rlib_assert(value.vtable_size >= 4 && value.vtable_size % 2 == 0); - rlib_assert(offset.cur + value.vtable_size <= offset.end); - offset.cur += sizeof(std::uint16_t); - value.struct_size = offset.as(); - offset.cur += sizeof(std::uint16_t); - auto members_size = value.vtable_size - 4; - value.offsets.resize(members_size / 2); - memcpy(value.offsets.data(), offset.beg + offset.cur, members_size); - } - - template - requires(std::is_arithmetic_v || std::is_enum_v) - inline void from_offset(Offset offset, std::vector& value) { - offset = offset.as(); - if (!offset) { - return; - } - auto size = offset.as(); - if (!size) { - return; - } - rlib_assert(size >= 0); - offset.cur += sizeof(std::int32_t); - rlib_assert(offset.cur + size * (std::int32_t)sizeof(T) <= offset.end); - value.resize((std::size_t)size); - memcpy(value.data(), offset.beg + offset.cur, (std::size_t)size * sizeof(T)); - } - - template - inline void from_offset(Offset offset, std::vector& value) { - offset = offset.as(); - if (!offset) { - return; - } - auto size = offset.as(); - if (!size) { - return; - } - rlib_assert(size >= 0); - offset.cur += sizeof(std::int32_t); - rlib_assert(offset.cur + size * (std::int32_t)sizeof(std::int32_t) <= offset.end); - value.resize((std::size_t)size); - for (auto& item : value) { - from_offset(Offset{offset}, item); - offset.cur += 4; - } - } -} \ No newline at end of file diff --git a/lib/rlib/iofile.cpp b/lib/rlib/iofile.cpp index eb7447e..f804c81 100644 --- a/lib/rlib/iofile.cpp +++ b/lib/rlib/iofile.cpp @@ -5,7 +5,7 @@ #include #include -#include "error.hpp" +#include "common.hpp" using namespace rlib; diff --git a/lib/rlib/rbundle.cpp b/lib/rlib/rbundle.cpp index 19b1c6a..009e85d 100644 --- a/lib/rlib/rbundle.cpp +++ b/lib/rlib/rbundle.cpp @@ -5,107 +5,11 @@ #include #include -#include #include "common.hpp" -#include "error.hpp" using namespace rlib; -struct RBUN::Raw { - struct Chunk { - ChunkID chunkId; - std::uint32_t uncompressed_size; - std::uint32_t compressed_size; - }; - - struct Footer { - std::array id_raw; - std::uint32_t chunk_count; - std::uint32_t version; - std::array magic; - }; -}; - -static auto hkdf(std::array& out) { - using digestpp::sha256; - auto ipad = out; - for (auto& p : ipad) { - p ^= 0x36u; - } - auto opad = out; - for (auto& p : opad) { - p ^= 0x5Cu; - } - auto tmp = std::array{}; - sha256().absorb(ipad.data(), ipad.size()).absorb("\0\0\0\1", 4).digest(tmp.data(), 32); - sha256().absorb(opad.data(), opad.size()).absorb(tmp.data(), tmp.size()).digest(tmp.data(), 32); - std::memcpy(&out, tmp.data(), 8); - for (std::uint32_t rounds = 31; rounds; rounds--) { - sha256().absorb(ipad.data(), ipad.size()).absorb(tmp.data(), tmp.size()).digest(tmp.data(), 32); - sha256().absorb(opad.data(), opad.size()).absorb(tmp.data(), tmp.size()).digest(tmp.data(), 32); - for (std::size_t i = 0; i != 8; ++i) { - out[i] ^= tmp[i]; - } - } -} - -auto RBUN::Chunk::hash(std::span data, HashType type) noexcept -> ChunkID { - using digestpp::sha256; - using digestpp::sha512; - std::uint64_t result = {}; - auto out = std::array{}; - switch (type) { - case HashType::None: - return {}; - case HashType::SHA512: { - sha512().absorb((std::uint8_t const*)data.data(), data.size()).digest(out.data(), 64); - break; - } - case HashType::SHA256: { - sha256().absorb((std::uint8_t const*)data.data(), data.size()).digest(out.data(), 32); - break; - } - case HashType::RITO_HKDF: { - sha256().absorb((std::uint8_t const*)data.data(), data.size()).digest(out.data(), 32); - hkdf(out); - break; - } - case HashType::XX64: { - return (ChunkID)XXH64(data.data(), data.size(), 0); - } - default: - break; - } - std::memcpy(&result, out.data(), sizeof(result)); - return (ChunkID)result; -} - -auto RBUN::Chunk::hash_type(std::span data, ChunkID chunkId) -> HashType { - using digestpp::sha256; - using digestpp::sha512; - std::uint64_t result = {}; - auto out = std::array{}; - - sha256().absorb((std::uint8_t const*)data.data(), data.size()).digest(out.data(), 32); - if (std::memcpy(&result, out.data(), sizeof(result)); (ChunkID)result == chunkId) { - return HashType::SHA256; - } - - // reuse first sha256 round from last guess - hkdf(out); - if (std::memcpy(&result, out.data(), sizeof(result)); (ChunkID)result == chunkId) { - return HashType::RITO_HKDF; - } - - sha512().absorb((std::uint8_t const*)data.data(), data.size()).digest(out.data(), 64); - if (std::memcpy(&result, out.data(), sizeof(result)); (ChunkID)result == chunkId) { - return HashType::SHA512; - } - - return HashType::None; -} - auto RBUN::read(IOFile const& file, bool no_lookup) -> RBUN { auto result = RBUN{}; auto footer = Footer{}; @@ -116,7 +20,7 @@ auto RBUN::read(IOFile const& file, bool no_lookup) -> RBUN { rlib_assert(footer.magic == Footer::MAGIC); rlib_assert(footer.version == Footer::VERSION || footer.version == 1); - auto toc_size = sizeof(Chunk) * footer.entry_count; + auto toc_size = sizeof(RChunk) * footer.entry_count; rlib_assert(file_size >= toc_size + sizeof(footer)); @@ -139,9 +43,9 @@ auto RBUN::read(IOFile const& file, bool no_lookup) -> RBUN { result.lookup.reserve(footer.entry_count); for (std::uint64_t compressed_offset = 0; auto const& chunk : result.chunks) { rlib_assert(in_range(compressed_offset, chunk.compressed_size, result.toc_offset)); - rlib_assert(chunk.uncompressed_size <= RBUN::CHUNK_LIMIT); + rlib_assert(chunk.uncompressed_size <= RChunk::LIMIT); rlib_assert(chunk.compressed_size <= ZSTD_compressBound(chunk.uncompressed_size)); - result.lookup[chunk.chunkId] = ChunkSrc{chunk, result.bundleId, compressed_offset}; + result.lookup[chunk.chunkId] = RChunk::Src{chunk, result.bundleId, compressed_offset}; compressed_offset += chunk.compressed_size; } } diff --git a/lib/rlib/rbundle.hpp b/lib/rlib/rbundle.hpp index 54e9c9d..466bf1e 100644 --- a/lib/rlib/rbundle.hpp +++ b/lib/rlib/rbundle.hpp @@ -7,42 +7,10 @@ #include #include "iofile.hpp" +#include "rchunk.hpp" namespace rlib { - enum class BundleID : std::uint64_t { None }; - - enum class ChunkID : std::uint64_t { None }; - - enum class HashType : std::uint8_t { - None, - SHA512, - SHA256, - RITO_HKDF, - XX64 = 0xFF, - }; - struct RBUN { - static constexpr std::size_t CHUNK_LIMIT = 16 * 1024 * 1024; - - struct Chunk { - ChunkID chunkId; - std::uint32_t uncompressed_size; - std::uint32_t compressed_size; - - static auto hash(std::span data, HashType type) noexcept -> ChunkID; - static auto hash_type(std::span data, ChunkID chunkId) -> HashType; - }; - - struct ChunkSrc : Chunk { - BundleID bundleId; - std::uint64_t compressed_offset; - }; - - struct ChunkDst : ChunkSrc { - HashType hash_type; - std::uint64_t uncompressed_offset; - }; - struct Footer { static constexpr std::array MAGIC = {'R', 'B', 'U', 'N'}; static constexpr std::uint32_t VERSION = 0xFFFFFFFF; @@ -55,12 +23,9 @@ namespace rlib { BundleID bundleId = {}; std::uint64_t toc_offset = {}; - std::vector chunks; - std::unordered_map lookup; + std::vector chunks; + std::unordered_map lookup; static auto read(IOFile const& file, bool no_lookup = false) -> RBUN; - - private: - struct Raw; }; } \ No newline at end of file diff --git a/lib/rlib/rcache.cpp b/lib/rlib/rcache.cpp index 77dae07..0daea20 100644 --- a/lib/rlib/rcache.cpp +++ b/lib/rlib/rcache.cpp @@ -5,7 +5,6 @@ #include #include "common.hpp" -#include "error.hpp" using namespace rlib; @@ -17,9 +16,10 @@ RCache::RCache(Options const& options) : file_(options.path, !options.readonly), } bundle_ = RBUN::read(file_); } + RCache::~RCache() { this->flush(); } -auto RCache::add(RBUN::Chunk const& chunk, std::span data) -> bool { +auto RCache::add(RChunk const& chunk, std::span data) -> bool { if (!can_write() || bundle_.lookup.contains(chunk.chunkId)) { return false; } @@ -37,17 +37,16 @@ auto RCache::add(RBUN::Chunk const& chunk, std::span data) -> bool { auto RCache::contains(ChunkID chunkId) const noexcept -> bool { return bundle_.lookup.contains(chunkId); } -auto RCache::run(std::vector chunks, done_cb done, yield_cb yield) const - -> std::vector { - sort_by<&RBUN::ChunkDst::chunkId, &RBUN::ChunkDst::uncompressed_offset>(chunks.begin(), chunks.end()); +auto RCache::uncache(std::vector chunks, RChunk::Dst::data_cb on_data) const -> std::vector { + sort_by<&RChunk::Dst::chunkId, &RChunk::Dst::uncompressed_offset>(chunks.begin(), chunks.end()); auto lastId = ChunkID::None; auto dst = std::span{}; - remove_if(chunks, [&](RBUN::ChunkDst const& chunk) mutable { + remove_if(chunks, [&](RChunk::Dst const& chunk) mutable { if (chunk.chunkId == ChunkID::None) { return false; } if (chunk.chunkId == lastId) { - done(chunk, dst); + on_data(chunk, dst); return true; } auto i = bundle_.lookup.find(chunk.chunkId); @@ -63,8 +62,7 @@ auto RCache::run(std::vector chunks, done_cb done, yield_cb yiel src = file_.copy(c.compressed_offset, c.compressed_size); } dst = zstd_decompress(src, c.uncompressed_size); - done(chunk, dst); - if (yield) yield(); + on_data(chunk, dst); lastId = chunk.chunkId; return true; }); @@ -76,7 +74,7 @@ auto RCache::flush() -> bool { if (!can_write() || (buffer_.empty() && bundle_.toc_offset != 0)) { return false; } - auto toc_size = sizeof(RBUN::Chunk) * bundle_.chunks.size(); + auto toc_size = sizeof(RChunk) * bundle_.chunks.size(); RBUN::Footer footer = { .checksum = std::bit_cast>(XXH64((char const*)bundle_.chunks.data(), toc_size, 0)), .entry_count = (std::uint32_t)bundle_.chunks.size(), diff --git a/lib/rlib/rcache.hpp b/lib/rlib/rcache.hpp index 43e7f43..9fcd891 100644 --- a/lib/rlib/rcache.hpp +++ b/lib/rlib/rcache.hpp @@ -18,18 +18,15 @@ namespace rlib { RCache(Options const& options); ~RCache(); - auto add(RBUN::Chunk const& chunk, std::span data) -> bool; + auto add(RChunk const& chunk, std::span data) -> bool; auto contains(ChunkID chunkId) const noexcept -> bool; - using done_cb = function_ref data)>; - using yield_cb = function_ref; - auto run(std::vector chunks, done_cb done, yield_cb yield) const -> std::vector; + auto uncache(std::vector chunks, RChunk::Dst::data_cb read) const -> std::vector; auto flush() -> bool; auto can_write() const noexcept -> bool { return file_ && !options_.readonly; } - private: IOFile file_; Options options_; diff --git a/lib/rlib/rcdn.cpp b/lib/rlib/rcdn.cpp index 54fa1e2..132f517 100644 --- a/lib/rlib/rcdn.cpp +++ b/lib/rlib/rcdn.cpp @@ -6,7 +6,7 @@ #include #include -#include "error.hpp" +#include "common.hpp" using namespace rlib; @@ -49,7 +49,7 @@ struct RCDN::Worker { } } - auto start(std::span& chunks_queue, done_cb done) -> void* { + auto start(std::span& chunks_queue, RChunk::Dst::data_cb on_data) -> void* { auto chunks = find_chunks(chunks_queue); auto start = std::to_string(chunks.front().compressed_offset); auto end = std::to_string(chunks.back().compressed_offset + chunks.back().compressed_size - 1); @@ -59,16 +59,16 @@ struct RCDN::Worker { rlib_assert(curl_easy_setopt(handle_, CURLOPT_RANGE, range.c_str()) == CURLE_OK); buffer_.clear(); chunks_ = chunks; - done_ = done; + on_data_ = on_data; chunks_queue = chunks_queue.subspan(chunks.size()); return handle_; } - auto finish(std::vector& chunks_failed) -> void* { + auto finish(std::vector& chunks_failed) -> void* { chunks_failed.insert(chunks_failed.end(), chunks_.begin(), chunks_.end()); buffer_.clear(); chunks_ = {}; - done_ = {}; + on_data_ = {}; return handle_; } @@ -76,8 +76,8 @@ struct RCDN::Worker { void* handle_; std::string url_; std::vector buffer_; - std::span chunks_; - done_cb done_; + std::span chunks_; + RChunk::Dst::data_cb on_data_; RCache* cache_out_; auto recieve(std::span recv) -> bool { @@ -107,20 +107,19 @@ struct RCDN::Worker { return true; } - auto decompress(RBUN::ChunkDst const& chunk, std::span src) -> bool { + auto decompress(RChunk::Dst const& chunk, std::span src) -> bool { src = src.subspan(0, chunk.compressed_size); - auto dst = try_zstd_decompress(src, chunk.uncompressed_size); - if (dst.size() != chunk.uncompressed_size) { - return false; - } - if (chunk.hash(dst, chunk.hash_type) != chunk.chunkId) { - return false; - } + // if (src.size() < 5 || std::memcmp(src.data(), "\x28\xB5\x2F\xFD", 4) != 0) { + // return false; + // } + rlib_trace("BundleID: %016llx, ChunkID: %016llx\n", chunk.bundleId, chunk.chunkId); + auto dst = zstd_decompress(src, chunk.uncompressed_size); + rlib_assert(chunk.hash(dst, chunk.hash_type) == chunk.chunkId); if (cache_out_) { cache_out_->add(chunk, src); } while (!chunks_.empty() && chunks_.front().chunkId == chunk.chunkId) { - done_(chunks_.front(), dst); + on_data_(chunks_.front(), dst); chunks_ = chunks_.subspan(1); } return true; @@ -133,7 +132,7 @@ struct RCDN::Worker { return 0; } - static auto find_chunks(std::span chunks) noexcept -> std::span { + static auto find_chunks(std::span chunks) noexcept -> std::span { std::size_t i = 1; for (; i != chunks.size(); ++i) { // 1. Consecutive chunks must be present in same bundle @@ -153,15 +152,13 @@ struct RCDN::Worker { } }; -RCDN::RCDN(Options const& options, RCache* cache_out) { - if (cache_out && !cache_out->can_write()) { - cache_out = nullptr; - } +RCDN::RCDN(Options const& options, RCache* cache_out) + : options_(options), cache_out_(cache_out && cache_out->can_write() ? cache_out : nullptr) { static auto init = CurlInit{}; handle_ = curl_multi_init(); rlib_assert(handle_); - for (std::uint32_t i = std::clamp(options.workers, 1u, 64u); i; --i) { - workers_.push_back(std::make_unique(options, cache_out)); + for (std::uint32_t i = std::clamp(options_.workers, 1u, 64u); i; --i) { + workers_.push_back(std::make_unique(options, cache_out_)); } } @@ -171,58 +168,59 @@ RCDN::~RCDN() noexcept { } } -auto RCDN::run(std::vector chunks, done_cb callback, yield_cb yield, int delay) - -> std::vector { - sort_by<&RBUN::ChunkDst::bundleId, &RBUN::ChunkDst::compressed_offset, &RBUN::ChunkDst::uncompressed_offset>( - chunks.begin(), - chunks.end()); - - auto chunks_failed = std::vector{}; - auto chunks_queue = std::span(chunks); - auto workers_free = std::vector{}; - for (auto const& worker : workers_) { - workers_free.push_back(worker.get()); - } - - for (std::size_t workers_running = 0;;) { - if (yield) yield(); - - // Start new downloads - while (!workers_free.empty() && !chunks_queue.empty()) { - auto worker = workers_free.back(); - auto handle = worker->start(chunks_queue, callback); - workers_free.pop_back(); - ++workers_running; - rlib_assert(curl_multi_add_handle(handle_, handle) == CURLM_OK); - } +auto RCDN::download(std::vector chunks, RChunk::Dst::data_cb on_data) -> std::vector { + for (std::uint32_t retry = options_.retry; !chunks.empty() && retry; --retry) { + sort_by<&RChunk::Dst::bundleId, &RChunk::Dst::compressed_offset, &RChunk::Dst::uncompressed_offset>( + chunks.begin(), + chunks.end()); + + auto chunks_failed = std::vector{}; + chunks_failed.reserve(chunks.size()); + auto chunks_queue = std::span(chunks); + auto workers_free = std::vector{}; + for (auto const& worker : workers_) { + workers_free.push_back(worker.get()); + } + + for (std::size_t workers_running = 0;;) { + // Start new downloads + while (!workers_free.empty() && !chunks_queue.empty()) { + auto worker = workers_free.back(); + auto handle = worker->start(chunks_queue, on_data); + workers_free.pop_back(); + ++workers_running; + rlib_assert(curl_multi_add_handle(handle_, handle) == CURLM_OK); + } - // Return if we ended. - if (!workers_running) { - break; - } + // Return if we ended. + if (!workers_running) { + break; + } - // Perform any actual work. - // NOTE: i do not trust still_running out variable, do our own bookkeeping instead. - int still_running = 0; - rlib_assert(curl_multi_perform(handle_, &still_running) == CURLM_OK); + // Perform any actual work. + // NOTE: i do not trust still_running out variable, do our own bookkeeping instead. + int still_running = 0; + rlib_assert(curl_multi_perform(handle_, &still_running) == CURLM_OK); - // Process messages. - for (int msg_left = 0; auto msg = curl_multi_info_read(handle_, &msg_left);) { - if (msg->msg != CURLMSG_DONE || msg->easy_handle == nullptr) { - continue; + // Process messages. + for (int msg_left = 0; auto msg = curl_multi_info_read(handle_, &msg_left);) { + if (msg->msg != CURLMSG_DONE || msg->easy_handle == nullptr) { + continue; + } + auto worker = (Worker*)nullptr; + curl_easy_getinfo(msg->easy_handle, CURLINFO_PRIVATE, &worker); + rlib_assert(worker); + auto handle = worker->finish(chunks_failed); + workers_free.push_back(worker); + --workers_running; + rlib_assert(curl_multi_remove_handle(handle_, handle) == CURLM_OK); } - auto worker = (Worker*)nullptr; - curl_easy_getinfo(msg->easy_handle, CURLINFO_PRIVATE, &worker); - rlib_assert(worker); - auto handle = worker->finish(chunks_failed); - workers_free.push_back(worker); - --workers_running; - rlib_assert(curl_multi_remove_handle(handle_, handle) == CURLM_OK); + + // Block untill end. + rlib_assert(curl_multi_wait(handle_, nullptr, 0, options_.interval, nullptr) == CURLM_OK); } - // Block untill end. - rlib_assert(curl_multi_wait(handle_, nullptr, 0, delay, nullptr) == CURLM_OK); + chunks = std::move(chunks_failed); } - - return chunks_failed; + return chunks; } diff --git a/lib/rlib/rcdn.hpp b/lib/rlib/rcdn.hpp index 79ca0d0..fd398ee 100644 --- a/lib/rlib/rcdn.hpp +++ b/lib/rlib/rcdn.hpp @@ -8,8 +8,8 @@ #include #include "common.hpp" -#include "rbundle.hpp" #include "rcache.hpp" +#include "rchunk.hpp" namespace rlib { struct RCDN { @@ -17,6 +17,8 @@ namespace rlib { std::string url = {}; bool verbose = {}; long buffer = {}; + int interval = {}; + std::uint32_t retry = {}; std::uint32_t workers = {}; std::string proxy = {}; std::string useragent = {}; @@ -28,16 +30,13 @@ namespace rlib { RCDN(RCDN const&) = delete; ~RCDN() noexcept; - using done_cb = function_ref data)>; - using yield_cb = function_ref; - - auto run(std::vector chunks, done_cb done, yield_cb yield, int delay = 100) - -> std::vector; + auto download(std::vector chunks, RChunk::Dst::data_cb on_good) -> std::vector; private: struct Worker; void* handle_; Options options_; + RCache* cache_out_; std::vector> workers_; }; } \ No newline at end of file diff --git a/lib/rlib/rchunk.cpp b/lib/rlib/rchunk.cpp new file mode 100644 index 0000000..c0d134a --- /dev/null +++ b/lib/rlib/rchunk.cpp @@ -0,0 +1,85 @@ +#include "rchunk.hpp" + +#include +#include +#include + +#include "common.hpp" + +using namespace rlib; + +static auto hkdf(std::array& out) { + using digestpp::sha256; + auto ipad = out; + for (auto& p : ipad) { + p ^= 0x36u; + } + auto opad = out; + for (auto& p : opad) { + p ^= 0x5Cu; + } + auto tmp = std::array{}; + sha256().absorb(ipad.data(), ipad.size()).absorb("\0\0\0\1", 4).digest(tmp.data(), 32); + sha256().absorb(opad.data(), opad.size()).absorb(tmp.data(), tmp.size()).digest(tmp.data(), 32); + std::memcpy(&out, tmp.data(), 8); + for (std::uint32_t rounds = 31; rounds; rounds--) { + sha256().absorb(ipad.data(), ipad.size()).absorb(tmp.data(), tmp.size()).digest(tmp.data(), 32); + sha256().absorb(opad.data(), opad.size()).absorb(tmp.data(), tmp.size()).digest(tmp.data(), 32); + for (std::size_t i = 0; i != 8; ++i) { + out[i] ^= tmp[i]; + } + } +} + +auto RChunk::hash(std::span data, HashType type) noexcept -> ChunkID { + using digestpp::sha256; + using digestpp::sha512; + std::uint64_t result = {}; + auto out = std::array{}; + switch (type) { + case HashType::None: + return {}; + case HashType::SHA512: { + sha512().absorb((std::uint8_t const*)data.data(), data.size()).digest(out.data(), 64); + break; + } + case HashType::SHA256: { + sha256().absorb((std::uint8_t const*)data.data(), data.size()).digest(out.data(), 32); + break; + } + case HashType::RITO_HKDF: { + sha256().absorb((std::uint8_t const*)data.data(), data.size()).digest(out.data(), 32); + hkdf(out); + break; + } + default: + break; + } + std::memcpy(&result, out.data(), sizeof(result)); + return (ChunkID)result; +} + +auto RChunk::hash_type(std::span data, ChunkID chunkId) -> HashType { + using digestpp::sha256; + using digestpp::sha512; + std::uint64_t result = {}; + auto out = std::array{}; + + sha256().absorb((std::uint8_t const*)data.data(), data.size()).digest(out.data(), 32); + if (std::memcpy(&result, out.data(), sizeof(result)); (ChunkID)result == chunkId) { + return HashType::SHA256; + } + + // reuse first sha256 round from last guess + hkdf(out); + if (std::memcpy(&result, out.data(), sizeof(result)); (ChunkID)result == chunkId) { + return HashType::RITO_HKDF; + } + + sha512().absorb((std::uint8_t const*)data.data(), data.size()).digest(out.data(), 64); + if (std::memcpy(&result, out.data(), sizeof(result)); (ChunkID)result == chunkId) { + return HashType::SHA512; + } + + return HashType::None; +} diff --git a/lib/rlib/rchunk.hpp b/lib/rlib/rchunk.hpp new file mode 100644 index 0000000..34b1a63 --- /dev/null +++ b/lib/rlib/rchunk.hpp @@ -0,0 +1,44 @@ +#pragma once +#include +#include +#include + +#include "common.hpp" + +namespace rlib { + enum class BundleID : std::uint64_t { None }; + + enum class ChunkID : std::uint64_t { None }; + + enum class HashType : std::uint8_t { + None, + SHA512, + SHA256, + RITO_HKDF, + }; + + struct RChunk { + static constexpr std::size_t LIMIT = 16 * 1024 * 1024; + + ChunkID chunkId; + std::uint32_t uncompressed_size; + std::uint32_t compressed_size; + + static auto hash(std::span data, HashType type) noexcept -> ChunkID; + static auto hash_type(std::span data, ChunkID chunkId) -> HashType; + + struct Src; + struct Dst; + }; + + struct RChunk::Src : RChunk { + BundleID bundleId; + std::uint64_t compressed_offset; + }; + + struct RChunk::Dst : RChunk::Src { + HashType hash_type; + std::uint64_t uncompressed_offset; + using data_cb = function_ref data)>; + }; +} \ No newline at end of file diff --git a/lib/rlib/rmanifest.cpp b/lib/rlib/rmanifest.cpp index 857feda..8de7708 100644 --- a/lib/rlib/rmanifest.cpp +++ b/lib/rlib/rmanifest.cpp @@ -11,14 +11,144 @@ #include #include "common.hpp" -#include "error.hpp" -#include "fbuffer.hpp" #include "iofile.hpp" using namespace rlib; -using namespace rlib::fbuffer; struct RMAN::Raw { + struct Offset { + char const* beg = {}; + std::int32_t cur = {}; + std::int32_t end = {}; + + template + inline T as() const { + auto result = T{}; + from_offset(*this, result); + return result; + } + + explicit inline operator bool() const noexcept { return beg != nullptr; } + inline bool operator!() const noexcept { return !operator bool(); } + }; + + struct Table { + Offset beg = {}; + std::int32_t vtable_size = {}; + std::int32_t struct_size = {}; + std::vector offsets = {}; + + inline Offset operator[](std::size_t index) const { + rlib_assert(beg); + auto voffset = index < offsets.size() ? offsets[index] : 0; + auto result = beg; + if (voffset) { + result.cur += voffset; + } else { + result.beg = nullptr; + } + return result; + } + }; + + template + requires(std::is_arithmetic_v || std::is_enum_v) + static inline void from_offset(Offset offset, T& value) { + if (!offset) { + value = T{}; + return; + } + T result; + rlib_assert(offset.cur >= 0 && offset.cur + (std::int32_t)sizeof(T) <= offset.end); + memcpy(&result, offset.beg + offset.cur, sizeof(T)); + value = result; + } + + static inline void from_offset(Offset offset, Offset& value) { + if (offset) { + auto relative_offset = offset.as(); + if (relative_offset) { + offset.cur += relative_offset; + rlib_assert(offset.cur >= 0 && offset.cur <= offset.end); + } else { + value.beg = nullptr; + } + } + value = offset; + } + + static inline void from_offset(Offset offset, std::string& value) { + offset = offset.as(); + if (!offset) { + return; + } + auto size = offset.as(); + if (!size) { + return; + } + rlib_assert(size >= 0 && size <= 4096); + offset.cur += sizeof(std::int32_t); + rlib_assert(offset.cur + size <= offset.end); + value.resize((std::size_t)size); + memcpy(value.data(), offset.beg + offset.cur, (std::size_t)size); + } + + static inline void from_offset(Offset offset, Table& value) { + offset = offset.as(); + rlib_assert(offset); + value.beg = offset; + auto relative_offset = offset.as(); + offset.cur -= relative_offset; + rlib_assert(offset.cur >= 0 && offset.cur <= offset.end); + value.vtable_size = offset.as(); + rlib_assert(value.vtable_size >= 4 && value.vtable_size % 2 == 0); + rlib_assert(offset.cur + value.vtable_size <= offset.end); + offset.cur += sizeof(std::uint16_t); + value.struct_size = offset.as(); + offset.cur += sizeof(std::uint16_t); + auto members_size = value.vtable_size - 4; + value.offsets.resize(members_size / 2); + memcpy(value.offsets.data(), offset.beg + offset.cur, members_size); + } + + template + requires(std::is_arithmetic_v || std::is_enum_v) + static inline void from_offset(Offset offset, std::vector& value) { + offset = offset.as(); + if (!offset) { + return; + } + auto size = offset.as(); + if (!size) { + return; + } + rlib_assert(size >= 0); + offset.cur += sizeof(std::int32_t); + rlib_assert(offset.cur + size * (std::int32_t)sizeof(T) <= offset.end); + value.resize((std::size_t)size); + memcpy(value.data(), offset.beg + offset.cur, (std::size_t)size * sizeof(T)); + } + + template + static inline void from_offset(Offset offset, std::vector& value) { + offset = offset.as(); + if (!offset) { + return; + } + auto size = offset.as(); + if (!size) { + return; + } + rlib_assert(size >= 0); + offset.cur += sizeof(std::int32_t); + rlib_assert(offset.cur + size * (std::int32_t)sizeof(std::int32_t) <= offset.end); + value.resize((std::size_t)size); + for (auto& item : value) { + from_offset(Offset{offset}, item); + offset.cur += 4; + } + } + struct Header { static constexpr inline std::uint32_t MAGIC = 0x4e414d52u; std::uint32_t magic; @@ -35,7 +165,7 @@ struct RMAN::Raw { std::unordered_map lookup_dir_name; std::unordered_map lookup_dir_parent; std::unordered_map lookup_params; - std::unordered_map lookup_chunk; + std::unordered_map lookup_chunk; std::vector bundles; std::vector files; @@ -125,14 +255,14 @@ struct RMAN::Raw { auto uncompressed_size = chunk_table[2].as(); auto compressed_size = chunk_table[1].as(); rlib_assert(chunkId != ChunkID::None); - rlib_assert(uncompressed_size <= RBUN::CHUNK_LIMIT); + rlib_assert(uncompressed_size <= RChunk::LIMIT); rlib_assert(compressed_size <= ZSTD_compressBound(uncompressed_size)); - auto chunk = RBUN::Chunk{ + auto chunk = RChunk{ .chunkId = chunkId, .uncompressed_size = uncompressed_size, .compressed_size = compressed_size, }; - auto chunk_src = RBUN::ChunkSrc{chunk, bundle.bundleId, compressed_offset}; + auto chunk_src = RChunk::Src{chunk, bundle.bundleId, compressed_offset}; bundle.chunks.push_back(chunk); lookup_chunk[chunkId] = chunk_src; compressed_offset += compressed_size; @@ -169,25 +299,27 @@ struct RMAN::Raw { } dirId = rlib_rethrow(lookup_dir_parent.at(dirId)); } - auto langs = std::string{"none"}; + auto langs = std::string{}; for (std::size_t i = 0; i != 32; i++) { rlib_trace("LangID: %u", (unsigned int)i); if (!(locale_flags & (1ull << i))) { continue; } - if (auto const& name = rlib_rethrow(lookup_lang_name.at(i + 1)); name != "none") { + auto const& name = rlib_rethrow(lookup_lang_name.at(i + 1)); + if (!langs.empty()) { langs += ";"; - langs += name; - } else { - langs += name; } + langs += name; } - auto chunks = std::vector{}; + if (langs.empty()) { + langs = "none"; + } + auto chunks = std::vector{}; chunks.reserve(chunk_ids.size()); for (std::uint64_t uncompressed_offset = 0; auto chunk_id : chunk_ids) { rlib_trace("ChunkID: %016llX", (unsigned long long)chunk_id); auto& chunk_src = rlib_rethrow(lookup_chunk.at(chunk_id)); - auto chunk_dst = RBUN::ChunkDst{chunk_src, params.hash_type, uncompressed_offset}; + auto chunk_dst = RChunk::Dst{chunk_src, params.hash_type, uncompressed_offset}; chunks.push_back(chunk_dst); uncompressed_offset += chunk_dst.uncompressed_size; rlib_assert(uncompressed_offset <= size); @@ -230,34 +362,27 @@ auto RMAN::File::matches(Filter const& filter) const noexcept -> bool { return true; } -auto RMAN::File::verify(fs::path const& path, bool force) const -> std::optional> { - if (!fs::exists(path) || force) { +auto RMAN::File::verify(fs::path const& path, RChunk::Dst::data_cb on_data) const -> std::vector { + if (!fs::exists(path)) { return chunks; } auto infile = IOFile(path, false); - if (!infile) { - return chunks; - } - thread_local auto buffer = std::vector(); - auto bad = chunks; - remove_if(bad, [&, failfast = false](RBUN::ChunkDst const& chunk) mutable -> bool { + auto result = chunks; + remove_if(result, [&, failfast = false](RChunk::Dst const& chunk) mutable -> bool { if (failfast) { return false; } - buffer.clear(); - buffer.resize(chunk.uncompressed_size); - if (!infile.read(chunk.uncompressed_offset, buffer)) { + if (!in_range(chunk.uncompressed_offset, chunk.uncompressed_size, infile.size())) { failfast = true; return false; } - auto id = RBUN::Chunk::hash(buffer, params.hash_type); + auto data = infile.copy(chunk.uncompressed_offset, chunk.uncompressed_size); + auto id = RChunk::hash(data, params.hash_type); if (id == chunk.chunkId) { + on_data(chunk, data); return true; } return false; }); - if (bad.empty() && infile.size() == size) { - return std::nullopt; - } - return bad; + return result; } diff --git a/lib/rlib/rmanifest.hpp b/lib/rlib/rmanifest.hpp index 51f4120..9014929 100644 --- a/lib/rlib/rmanifest.hpp +++ b/lib/rlib/rmanifest.hpp @@ -9,6 +9,7 @@ #include #include "rbundle.hpp" +#include "rchunk.hpp" namespace rlib { namespace fs = std::filesystem; @@ -39,11 +40,11 @@ namespace rlib { std::string path; std::string link; std::string langs; - std::vector chunks; + std::vector chunks; auto matches(Filter const& filter) const noexcept -> bool; - auto verify(fs::path const& path, bool force = false) const -> std::optional>; + auto verify(fs::path const& path, RChunk::Dst::data_cb on_good) const -> std::vector; }; ManifestID manifestId; diff --git a/src/rbun_chk.cpp b/src/rbun_chk.cpp index a755521..ecdcfd1 100644 --- a/src/rbun_chk.cpp +++ b/src/rbun_chk.cpp @@ -1,7 +1,6 @@ #include -#include +#include #include -#include #include #include @@ -12,29 +11,36 @@ struct Main { std::vector inputs = {}; bool no_hash = {}; bool no_extract = {}; + bool no_progress = {}; } cli = {}; auto parse_args(int argc, char** argv) -> void { argparse::ArgumentParser program(fs::path(argv[0]).filename().generic_string()); program.add_description("Checks one or more bundles for errors."); - program.add_argument("input").help("Bundle file or folder to read from.").remaining(); + program.add_argument("input").help("Bundle file(s) or folder(s) to read from.").remaining().required(); program.add_argument("--no-extract") .help("Do not even attempt to extract chunk.") .default_value(false) .implicit_value(true); program.add_argument("--no-hash").help("Do not verify hash.").default_value(false).implicit_value(true); + program.add_argument("--no-progress") + .help("Do not print progress to cerr.") + .default_value(false) + .implicit_value(true); program.parse_args(argc, argv); cli.no_hash = program.get("--no-extract"); cli.no_extract = program.get("--no-hash"); + cli.no_progress = program.get("--no-progress"); cli.inputs = program.get>("input"); } auto run() -> void { auto paths = std::vector(); + std::cerr << "Collecting input bundles ... " << std::endl; for (auto const& input : cli.inputs) { rlib_assert(fs::exists(input)); if (fs::is_regular_file(input)) { @@ -51,34 +57,42 @@ struct Main { } } } - for (auto const& path : paths) { - verify_bundle(path); + std::cerr << "Processing input bundles ... " << std::endl; + for (std::uint32_t index = paths.size(); auto const& path : paths) { + verify_bundle(path, index--); } } - auto verify_bundle(fs::path const& path) -> void { + auto verify_bundle(fs::path const& path, std::uint32_t index) -> void { try { - rlib_trace("path: %s\n", path.generic_string().c_str()); - printf("Start %s\n", path.filename().generic_string().c_str()); + rlib_trace("path: %s", path.generic_string().c_str()); + std::cout << "START:" << path.filename().generic_string() << std::endl; auto infile = IOFile(path, false); auto bundle = RBUN::read(infile, true); - printf(" ... "); - for (std::uint64_t offset = 0; auto const& chunk : bundle.chunks) { - rlib_assert(in_range(offset, chunk.compressed_size, bundle.toc_offset)); - if (!cli.no_extract) { - auto src = infile.copy(offset, chunk.compressed_size); - auto dst = try_zstd_decompress(src, chunk.uncompressed_size); - rlib_assert(dst.size() == chunk.uncompressed_size); - if (!cli.no_hash) { - auto hash_type = RBUN::Chunk::hash_type(dst, chunk.chunkId); - rlib_assert(hash_type != HashType::None); + { + std::uint64_t offset = 0; + progress_bar p("VERIFIED", cli.no_progress, index, offset, bundle.toc_offset); + for (auto const& chunk : bundle.chunks) { + rlib_assert(in_range(offset, chunk.compressed_size, bundle.toc_offset)); + char zstd_header[32]; + std::size_t header_size = std::min(chunk.compressed_size, 32u); + rlib_assert(infile.read(offset, {zstd_header, header_size})); + rlib_assert(zstd_frame_decompress_size({zstd_header, header_size}) == chunk.uncompressed_size); + if (!cli.no_extract) { + auto src = infile.copy(offset, chunk.compressed_size); + auto dst = zstd_decompress(src, chunk.uncompressed_size); + if (!cli.no_hash) { + auto hash_type = RChunk::hash_type(dst, chunk.chunkId); + rlib_assert(hash_type != HashType::None); + } } + offset += chunk.compressed_size; + p.update(offset); } - offset += chunk.compressed_size; } - printf("Ok!\n"); + std::cout << "OK!" << std::endl; } catch (std::exception const& e) { - printf("Failed!\n"); + std::cout << "FAIL!" << std::endl; std::cerr << e.what() << std::endl; for (auto const& error : error_stack()) { std::cerr << error << std::endl; diff --git a/src/rbun_ls.cpp b/src/rbun_ls.cpp index a582e81..06f8111 100644 --- a/src/rbun_ls.cpp +++ b/src/rbun_ls.cpp @@ -1,7 +1,6 @@ #include -#include +#include #include -#include #include #include @@ -19,7 +18,7 @@ struct Main { "\n" "Output is in CSV format as follows:\n" "BundlID,ChunkID,SizeCompressed,SizeUncompressed"); - program.add_argument("input").help("Bundle file or folder to read from.").remaining(); + program.add_argument("input").help("Bundle file(s) or folder(s) to read from.").remaining().required(); program.parse_args(argc, argv); @@ -28,6 +27,7 @@ struct Main { auto run() -> void { auto paths = std::vector(); + std::cerr << "Collecting input bundles ... " << std::endl; for (auto const& input : cli.inputs) { rlib_assert(fs::exists(input)); if (fs::is_regular_file(input)) { @@ -44,6 +44,7 @@ struct Main { } } } + std::cerr << "Processing input bundles ... " << std::endl; for (auto const& path : paths) { list_bundle(path); } @@ -51,16 +52,17 @@ struct Main { auto list_bundle(fs::path const& path) noexcept -> void { try { - rlib_trace("path: %s\n", path.generic_string().c_str()); + rlib_trace("path: %s", path.generic_string().c_str()); auto infile = IOFile(path, true); auto bundle = RBUN::read(infile); for (std::uint64_t offset = 0; auto const& chunk : bundle.chunks) { if (!in_range(offset, chunk.compressed_size, bundle.toc_offset)) break; - printf("%016llx,%016llX,%llu,%llu\n", - (unsigned long long)bundle.bundleId, - (unsigned long long)chunk.chunkId, - (unsigned long long)chunk.compressed_size, - (unsigned long long)chunk.uncompressed_size); + std::cout // + << to_hex(bundle.bundleId) << ',' // + << to_hex(chunk.chunkId) << ',' // + << chunk.compressed_size << ',' // + << chunk.uncompressed_size << std::endl // + ; offset += chunk.compressed_size; } } catch (std::exception const& e) { diff --git a/src/rbun_add.cpp b/src/rbun_merge.cpp similarity index 53% rename from src/rbun_add.cpp rename to src/rbun_merge.cpp index 2fc0885..fc905fc 100644 --- a/src/rbun_add.cpp +++ b/src/rbun_merge.cpp @@ -1,7 +1,6 @@ #include -#include +#include #include -#include #include #include #include @@ -12,6 +11,9 @@ struct Main { struct CLI { std::string output = {}; std::vector inputs = {}; + bool no_hash = {}; + bool no_extract = {}; + bool no_progress = {}; std::uint32_t buffer = {}; } cli = {}; @@ -19,7 +21,18 @@ struct Main { argparse::ArgumentParser program(fs::path(argv[0]).filename().generic_string()); program.add_description("Adds one or more bundles into first first bundle."); program.add_argument("output").help("Bundle file to write into.").required(); - program.add_argument("input").help("Bundle file or folder to write from.").remaining(); + program.add_argument("input").help("Bundle file(s) or folder to write from.").remaining().required(); + + program.add_argument("--no-extract") + .help("Do not even attempt to extract chunk.") + .default_value(false) + .implicit_value(true); + program.add_argument("--no-hash").help("Do not verify hash.").default_value(false).implicit_value(true); + program.add_argument("--no-progress") + .help("Do not print progress to cerr.") + .default_value(false) + .implicit_value(true); + program.add_argument("--buffer") .help("Size for buffer before flush to disk in killobytes [64, 1048576]") .default_value(std::uint32_t{32 * 1024 * 1024u}) @@ -31,11 +44,15 @@ struct Main { cli.output = program.get("output"); cli.inputs = program.get>("input"); + cli.no_hash = program.get("--no-extract"); + cli.no_extract = program.get("--no-hash"); + cli.no_progress = program.get("--no-progress"); cli.buffer = program.get("--buffer"); } auto run() { auto paths = std::vector(); + std::cerr << "Collecting input bundles ... " << std::endl; for (auto const& input : cli.inputs) { rlib_assert(fs::exists(input)); if (fs::is_regular_file(input)) { @@ -55,32 +72,42 @@ struct Main { if (paths.empty()) { return; } + std::cerr << "Processing output bundle ... " << std::endl; auto output = RCache(RCache::Options{.path = cli.output, .readonly = false, .flush_size = cli.buffer}); - for (auto const& path : paths) { - add_bundle(path, output); + std::cerr << "Processing input bundles ... " << std::endl; + for (std::uint32_t index = paths.size(); auto const& path : paths) { + add_bundle(path, output, index--); } } - auto add_bundle(fs::path const& path, RCache& output) -> void { + auto add_bundle(fs::path const& path, RCache& output, std::uint32_t index) -> void { try { - rlib_trace("path: %s\n", path.generic_string().c_str()); - printf("Start %s\n", path.filename().generic_string().c_str()); + rlib_trace("path: %s", path.generic_string().c_str()); + std::cout << "START:" << path.filename().generic_string() << std::endl; auto infile = IOFile(path, false); auto bundle = RBUN::read(infile, true); - printf(" ... "); - for (std::uint64_t offset = 0; auto const& chunk : bundle.chunks) { - rlib_assert(in_range(offset, chunk.compressed_size, bundle.toc_offset)); - auto src = infile.copy(offset, chunk.compressed_size); - auto dst = try_zstd_decompress(src, chunk.uncompressed_size); - rlib_assert(dst.size() == chunk.uncompressed_size); - auto hash_type = RBUN::Chunk::hash_type(dst, chunk.chunkId); - rlib_assert(hash_type != HashType::None); - offset += chunk.compressed_size; - output.add(chunk, src); + { + std::uint64_t offset = 0; + progress_bar p("VERIFIED", cli.no_progress, index, offset, bundle.toc_offset); + for (auto const& chunk : bundle.chunks) { + rlib_assert(in_range(offset, chunk.compressed_size, bundle.toc_offset)); + auto src = infile.copy(offset, chunk.compressed_size); + rlib_assert(zstd_frame_decompress_size(src) == chunk.uncompressed_size); + if (!cli.no_extract) { + auto dst = zstd_decompress(src, chunk.uncompressed_size); + if (!cli.no_hash) { + auto hash_type = RChunk::hash_type(dst, chunk.chunkId); + rlib_assert(hash_type != HashType::None); + } + } + output.add(chunk, src); + offset += chunk.compressed_size; + p.update(offset); + } } - printf("Ok!\n"); + std::cout << " OK!" << std::endl; } catch (std::exception const& e) { - printf("Failed!\n"); + std::cout << " FAIL!" << std::endl; std::cerr << e.what() << std::endl; for (auto const& error : error_stack()) { std::cerr << error << std::endl; diff --git a/src/rbun_usage.cpp b/src/rbun_usage.cpp index a0d1166..3822a5b 100644 --- a/src/rbun_usage.cpp +++ b/src/rbun_usage.cpp @@ -1,9 +1,9 @@ #include -#include +#include #include -#include #include #include +#include #include using namespace rlib; @@ -22,7 +22,7 @@ struct Main { auto parse_args(int argc, char** argv) -> void { argparse::ArgumentParser program(fs::path(argv[0]).filename().generic_string()); program.add_description("Collects size usage statistics on one or more bundle."); - program.add_argument("input").help("Bundle file(s) or folder to read from.").remaining(); + program.add_argument("input").help("Bundle file(s) or folder(s) to read from.").remaining().required(); program.parse_args(argc, argv); @@ -31,6 +31,7 @@ struct Main { auto run() -> void { auto paths = std::vector(); + std::cerr << "Collecting input bundles ... " << std::endl; for (auto const& input : cli.inputs) { rlib_assert(fs::exists(input)); if (fs::is_regular_file(input)) { @@ -47,11 +48,11 @@ struct Main { } } } - printf("Processing bundles..."); + std::cerr << "Processing input bundles ... " << std::endl; for (auto const& path : paths) { process_bundle(path); } - printf("Callculating usage..."); + std::cerr << "Callculating usage ... " << std::endl; std::size_t total_count_all = 0; std::size_t total_count_uncompressed_uniq = 0; std::size_t total_count_compressed_uniq = 0; @@ -105,7 +106,7 @@ struct Main { auto process_bundle(fs::path const& path) noexcept -> void { try { - rlib_trace("path: %s\n", path.generic_string().c_str()); + rlib_trace("path: %s", path.generic_string().c_str()); auto infile = IOFile(path, false); auto bundle = RBUN::read(infile, true); for (std::uint64_t offset = 0; auto const& chunk : bundle.chunks) { diff --git a/src/rman_bl.cpp b/src/rman_bl.cpp index ad04de3..c4ae652 100644 --- a/src/rman_bl.cpp +++ b/src/rman_bl.cpp @@ -1,6 +1,6 @@ #include -#include -#include +#include +#include #include #include @@ -28,7 +28,7 @@ struct Main { auto manifest = RMAN::read(infile.copy(0, infile.size())); for (auto const& bundle : manifest.bundles) { - printf("/%016llX.bundle\n", (unsigned long long)bundle.bundleId); + std::cout << '/' << to_hex(bundle.bundleId) << ".bundle" << std::endl; } } }; diff --git a/src/rman_dl.cpp b/src/rman_dl.cpp index f3dcc3f..52eaf94 100644 --- a/src/rman_dl.cpp +++ b/src/rman_dl.cpp @@ -1,7 +1,6 @@ #include -#include #include -#include +#include #include #include #include @@ -15,7 +14,6 @@ struct Main { bool no_verify = {}; bool no_write = {}; bool no_progress = {}; - std::uint32_t retry = {}; RMAN::Filter filter = {}; RCache::Options cache = {}; RCDN::Options cdn = {}; @@ -88,6 +86,10 @@ struct Main { .action([](std::string const& value) -> std::uint32_t { return std::clamp((std::uint32_t)std::stoul(value), 1u, 64u); }); + program.add_argument("--cdn-interval") + .help("Curl poll interval in miliseconds.") + .default_value(int{100}) + .action([](std::string const& value) -> int { return std::clamp((int)std::stoul(value), 0, 30000); }); program.add_argument("--cdn-verbose").help("Curl: verbose logging.").default_value(false).implicit_value(true); program.add_argument("--cdn-buffer") .help("Curl buffer size in killobytes [1, 512].") @@ -118,11 +120,12 @@ struct Main { .flush_size = program.get("--cache-buffer"), }; - cli.retry = program.get("--cdn-retry"); cli.cdn = { .url = clean_path(program.get("--cdn")), .verbose = program.get("--cdn-verbose"), .buffer = program.get("--cdn-buffer"), + .interval = program.get("--cdn-interval"), + .retry = program.get("--cdn-retry"), .workers = program.get("--cdn-workers"), .proxy = program.get("--cdn-proxy"), .useragent = program.get("--cdn-useragent"), @@ -151,65 +154,65 @@ struct Main { cdn = std::make_unique(cli.cdn, cache.get()); } - for (auto const& rfile : manifest.files) { - if (!rfile.matches(cli.filter)) continue; - std::cout << "START: " << rfile.path << std::endl; - if (download_file(rfile)) { - std::cout << "OK: " << rfile.path << std::endl; - } else { - std::cout << "FAIL: " << rfile.path << std::endl; - } + remove_if(manifest.files, [&](RMAN::File const& rfile) { return !rfile.matches(cli.filter); }); + + for (std::uint32_t index = manifest.files.size(); auto const& rfile : manifest.files) { + download_file(rfile, index--); } } - auto download_file(RMAN::File const& rfile) -> bool { - rlib_trace("Path: %s", rfile.path.c_str()); - auto bad_chunks = rfile.verify(fs::path(cli.output) / rfile.path, cli.no_verify); - if (!bad_chunks) { - return true; + auto download_file(RMAN::File const& rfile, std::uint32_t index) -> void { + std::cout << "START: " << rfile.path << std::endl; + auto path = fs::path(cli.output) / rfile.path; + rlib_trace("Path: %s", path.generic_string().c_str()); + auto done = std::uint64_t{}; + auto bad_chunks = std::vector{}; + + if (!cli.no_verify) { + progress_bar p("VERIFIED", cli.no_progress, index, done, rfile.size); + bad_chunks = rfile.verify(path, [&](RChunk::Dst const& chunk, std::span data) { + done += chunk.uncompressed_size; + p.update(done); + }); + } else { + bad_chunks = rfile.chunks; } auto outfile = IOFile(); if (!cli.no_write) { - outfile = IOFile(fs::path(cli.output) / rfile.path, true); + outfile = IOFile(path, true); rlib_assert(outfile.resize(0, rfile.size)); } - auto done = std::uint64_t{}; - auto total = std::uint64_t{}; - for (auto const& chunk : *bad_chunks) total += chunk.uncompressed_size; - - if (!bad_chunks->empty() && cache) { - auto yield_func = [&] { std::cout << progress("\rUNCACHED", 0, done, total) << std::flush; }; - yield_func(); - bad_chunks = cache->run( - std::move(*bad_chunks), - [&](RBUN::ChunkDst const& chunk, std::span data) { + if (!bad_chunks.empty() && cache) { + progress_bar p("UNCACHED", cli.no_progress, index, done, rfile.size); + bad_chunks = + cache->uncache(std::move(bad_chunks), [&](RChunk::Dst const& chunk, std::span data) { if (outfile) { rlib_assert(outfile.write(chunk.uncompressed_offset, data)); } done += chunk.uncompressed_size; - }, - cli.no_progress ? RCache::yield_cb() : yield_func); + p.update(done); + }); } - for (std::uint32_t retry = 1; !bad_chunks->empty() && cdn && retry <= cli.retry; ++retry) { - auto yield_func = [&] { std::cout << progress("\rDOWNLOAD", retry, done, total) << std::flush; }; - yield_func(); - bad_chunks = cdn->run( - std::move(*bad_chunks), - [&](RBUN::ChunkDst const& chunk, std::span data) { + if (!bad_chunks.empty() && cdn) { + progress_bar p("DOWNLOAD", cli.no_progress, index, done, rfile.size); + bad_chunks = + cdn->download(std::move(bad_chunks), [&](RChunk::Dst const& chunk, std::span data) { if (outfile) { rlib_assert(outfile.write(chunk.uncompressed_offset, data)); } done += chunk.uncompressed_size; - }, - cli.no_progress ? RCDN::yield_cb() : yield_func); + p.update(done); + }); } - std::cout << "\n"; - - return bad_chunks->empty(); + if (!bad_chunks.empty()) { + std::cout << "FAIL!" << std::endl; + } else { + std::cout << "OK!" << std::endl; + } } }; diff --git a/src/rman_ls.cpp b/src/rman_ls.cpp index 6c973aa..2a48117 100644 --- a/src/rman_ls.cpp +++ b/src/rman_ls.cpp @@ -1,7 +1,6 @@ #include -#include +#include #include -#include #include #include @@ -57,9 +56,13 @@ struct Main { auto manifest = RMAN::read(infile.copy(0, infile.size())); for (auto const& rfile : manifest.files) { - if (!rfile.matches(cli.filter)) continue; - auto line = rfile.path + ',' + std::to_string(rfile.size) + ',' + to_hex(rfile.fileId) + ',' + rfile.langs; - puts(line.c_str()); + if (!rfile.matches(cli.filter)) { + continue; + } + std::cout << rfile.path << ',' // + << std::to_string(rfile.size) << ',' // + << to_hex(rfile.fileId) + ',' // + << rfile.langs << std::endl; } } };