From fb0c11b23a35c9e8183db881ad8b51ce76565547 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Mon, 4 Sep 2023 09:51:23 -0400 Subject: [PATCH] Git object hashing Part of RFC 133 Extracted from our old IPFS branches. Co-Authored-By: Matthew Bauer Co-Authored-By: Carlo Nucera --- Makefile | 1 + src/libexpr/primops.cc | 5 +- src/libstore/binary-cache-store.cc | 9 +- src/libstore/build/local-derivation-goal.cc | 20 +- src/libstore/content-address.cc | 11 + src/libstore/content-address.hh | 18 +- src/libstore/daemon.cc | 7 +- src/libstore/local-store.cc | 57 ++++- src/libstore/nar-accessor.cc | 3 +- src/libstore/remote-store.cc | 1 + src/libstore/store-api.cc | 52 ++++- src/libutil/experimental-features.cc | 10 +- src/libutil/experimental-features.hh | 1 + src/libutil/fs-sink.cc | 30 ++- src/libutil/fs-sink.hh | 11 +- src/libutil/git.cc | 234 +++++++++++++++++++- src/libutil/git.hh | 28 +++ src/libutil/hash.cc | 8 + src/libutil/hash.hh | 6 + src/libutil/serialise.cc | 4 + src/libutil/serialise.hh | 1 + src/nix/add-to-store.cc | 37 +++- src/nix/hash.cc | 18 +- tests/functional/git-hashing/common.sh | 11 + tests/functional/git-hashing/local.mk | 7 + tests/functional/git-hashing/simple.sh | 36 +++ 26 files changed, 586 insertions(+), 40 deletions(-) create mode 100644 tests/functional/git-hashing/common.sh create mode 100644 tests/functional/git-hashing/local.mk create mode 100644 tests/functional/git-hashing/simple.sh diff --git a/Makefile b/Makefile index 6658e3490033..5b422ee79450 100644 --- a/Makefile +++ b/Makefile @@ -30,6 +30,7 @@ makefiles += \ tests/functional/local.mk \ tests/functional/ca/local.mk \ tests/functional/dyn-drv/local.mk \ + tests/functional/git-hashing/local.mk \ tests/functional/test-libstoreconsumer/local.mk \ tests/functional/plugins/local.mk else diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 5de1b2828ee9..4028a572deb8 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -1111,7 +1111,10 @@ drvName, Bindings * attrs, Value & v) auto handleHashMode = [&](const std::string_view s) { if (s == "recursive") ingestionMethod = FileIngestionMethod::Recursive; else if (s == "flat") ingestionMethod = FileIngestionMethod::Flat; - else if (s == "text") { + else if (s == "git") { + experimentalFeatureSettings.require(Xp::GitHashing); + ingestionMethod = FileIngestionMethod::Flat; + } else if (s == "text") { experimentalFeatureSettings.require(Xp::DynamicDerivations); ingestionMethod = TextIngestionMethod {}; } else diff --git a/src/libstore/binary-cache-store.cc b/src/libstore/binary-cache-store.cc index b4fea693f5b8..df7f63fbd9f8 100644 --- a/src/libstore/binary-cache-store.cc +++ b/src/libstore/binary-cache-store.cc @@ -411,10 +411,15 @@ StorePath BinaryCacheStore::addToStore( implementation of this method in terms of addToStoreFromDump. */ HashSink sink { hashAlgo }; - if (method == FileIngestionMethod::Recursive) { + switch (method) { + case FileIngestionMethod::Recursive: dumpPath(srcPath, sink, filter); - } else { + break; + case FileIngestionMethod::Flat: readFile(srcPath, sink); + break; + case FileIngestionMethod::Git: + throw Error("cannot add to binary cache store using the git file ingestion method"); } auto h = sink.finish().first; diff --git a/src/libstore/build/local-derivation-goal.cc b/src/libstore/build/local-derivation-goal.cc index 64b55ca6ac2d..9883321efeb6 100644 --- a/src/libstore/build/local-derivation-goal.cc +++ b/src/libstore/build/local-derivation-goal.cc @@ -8,6 +8,7 @@ #include "finally.hh" #include "util.hh" #include "archive.hh" +#include "git.hh" #include "compression.hh" #include "daemon.hh" #include "topo-sort.hh" @@ -2480,23 +2481,34 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs() rewriteOutput(outputRewrites); /* FIXME optimize and deduplicate with addToStore */ std::string oldHashPart { scratchPath->hashPart() }; - HashModuloSink caSink { outputHash.hashType, oldHashPart }; + Hash got { outputHash.hashType }; // Dummy value std::visit(overloaded { [&](const TextIngestionMethod &) { + HashModuloSink caSink { outputHash.hashType, oldHashPart }; readFile(actualPath, caSink); + got = caSink.finish().first; }, [&](const FileIngestionMethod & m2) { switch (m2) { - case FileIngestionMethod::Recursive: + case FileIngestionMethod::Recursive: { + HashModuloSink caSink { outputHash.hashType, oldHashPart }; dumpPath(actualPath, caSink); + got = caSink.finish().first; break; - case FileIngestionMethod::Flat: + } + case FileIngestionMethod::Flat: { + HashModuloSink caSink { outputHash.hashType, oldHashPart }; readFile(actualPath, caSink); + got = caSink.finish().first; + break; + } + case FileIngestionMethod::Git: { + got = dumpGitHash(outputHash.hashType, (Path) tmpDir + "/tmp"); break; } + } }, }, outputHash.method.raw); - auto got = caSink.finish().first; auto optCA = ContentAddressWithReferences::fromPartsOpt( outputHash.method, diff --git a/src/libstore/content-address.cc b/src/libstore/content-address.cc index e290a8d387eb..d7f4267d75fc 100644 --- a/src/libstore/content-address.cc +++ b/src/libstore/content-address.cc @@ -11,6 +11,9 @@ std::string makeFileIngestionPrefix(FileIngestionMethod m) return ""; case FileIngestionMethod::Recursive: return "r:"; + case FileIngestionMethod::Git: + experimentalFeatureSettings.require(Xp::GitHashing); + return "git:"; default: throw Error("impossible, caught both cases"); } @@ -32,6 +35,10 @@ ContentAddressMethod ContentAddressMethod::parsePrefix(std::string_view & m) ContentAddressMethod method = FileIngestionMethod::Flat; if (splitPrefix(m, "r:")) method = FileIngestionMethod::Recursive; + if (splitPrefix(m, "git:")) { + experimentalFeatureSettings.require(Xp::GitHashing); + method = FileIngestionMethod::Git; + } else if (splitPrefix(m, "text:")) method = TextIngestionMethod {}; return method; @@ -99,6 +106,10 @@ static std::pair parseContentAddressMethodPrefix auto method = FileIngestionMethod::Flat; if (splitPrefix(rest, "r:")) method = FileIngestionMethod::Recursive; + if (splitPrefix(rest, "git:")) { + experimentalFeatureSettings.require(Xp::GitHashing); + method = FileIngestionMethod::Git; + } HashType hashType = parseHashType_(); return { std::move(method), diff --git a/src/libstore/content-address.hh b/src/libstore/content-address.hh index c4d619bdc63f..5d92cd575f9d 100644 --- a/src/libstore/content-address.hh +++ b/src/libstore/content-address.hh @@ -39,12 +39,26 @@ enum struct FileIngestionMethod : uint8_t { /** * Flat-file hashing. Directly ingest the contents of a single file */ - Flat = false, + Flat, + /** * Recursive (or NAR) hashing. Serializes the file-system object in Nix * Archive format and ingest that */ - Recursive = true + Recursive, + + /** + * Git hashing. In particular files are hashed as git "blobs", and + * directories are hashed as git "trees". + * + * @note Git's data model is slightly different, in that a plain + * fail doesn't have an executable bit, directory entries do + * instead. We decide treat a bare file as non-executable by fiat, + * as we do with `FileIngestionMethod::Flat` which also lacks this + * information. Thus, Git can encode some but all of Nix's "File + * System Objects", and this sort of hashing is likewise partial. + */ + Git, }; /** diff --git a/src/libstore/daemon.cc b/src/libstore/daemon.cc index 8cbf6f044faf..af6d6d69b68e 100644 --- a/src/libstore/daemon.cc +++ b/src/libstore/daemon.cc @@ -13,6 +13,7 @@ #include "archive.hh" #include "derivations.hh" #include "args.hh" +#include "git.hh" namespace nix::daemon { @@ -462,13 +463,17 @@ static void performOp(TunnelLogger * logger, ref store, TeeSource savedNARSource(from, saved); ParseSink sink; /* null sink; just parse the NAR */ parseDump(sink, savedNARSource); - } else { + } else if (method == FileIngestionMethod::Flat) { /* Incrementally parse the NAR file, stripping the metadata, and streaming the sole file we expect into `saved`. */ RetrieveRegularNARSink savedRegular { saved }; parseDump(savedRegular, from); if (!savedRegular.regular) throw Error("regular file expected"); + } else { + /* Should have validated above that no other file ingestion + method was used. */ + assert(false); } }); logger->startWork(); diff --git a/src/libstore/local-store.cc b/src/libstore/local-store.cc index 17b4ecc73125..659bcf213b91 100644 --- a/src/libstore/local-store.cc +++ b/src/libstore/local-store.cc @@ -1,5 +1,6 @@ #include "local-store.hh" #include "globals.hh" +#include "git.hh" #include "archive.hh" #include "pathlocks.hh" #include "worker-protocol.hh" @@ -1322,10 +1323,22 @@ StorePath LocalStore::addToStoreFromDump(Source & source0, std::string_view name delTempDir = std::make_unique(tempDir); tempPath = tempDir + "/x"; - if (method == FileIngestionMethod::Recursive) - restorePath(tempPath, bothSource); - else + switch (method) { + case FileIngestionMethod::Flat: writeFile(tempPath, bothSource); + break; + case FileIngestionMethod::Recursive: + restorePath(tempPath, bothSource); + break; + case FileIngestionMethod::Git: + restoreGit(tempPath, bothSource, [&](Hash childHash) { + return this->Store::toRealPath(this->makeFixedOutputPath("git", FixedOutputInfo { + .method = FileIngestionMethod::Git, + .hash = childHash, + })); + }); + break; + } dump.clear(); } @@ -1364,10 +1377,22 @@ StorePath LocalStore::addToStoreFromDump(Source & source0, std::string_view name if (inMemory) { StringSource dumpSource { dump }; /* Restore from the NAR in memory. */ - if (method == FileIngestionMethod::Recursive) - restorePath(realPath, dumpSource); - else + switch (method) { + case FileIngestionMethod::Flat: writeFile(realPath, dumpSource); + break; + case FileIngestionMethod::Recursive: + restorePath(realPath, dumpSource); + break; + case FileIngestionMethod::Git: + restoreGit(realPath, dumpSource, [&](Hash childHash) { + return this->Store::toRealPath(this->makeFixedOutputPath("git", FixedOutputInfo { + .method = FileIngestionMethod::Git, + .hash = childHash, + })); + }); + break; + } } else { /* Move the temporary path we restored above. */ moveFile(tempPath, realPath); @@ -1866,25 +1891,37 @@ ContentAddress LocalStore::hashCAPath( const std::string_view pathHash ) { - HashModuloSink caSink ( hashType, std::string(pathHash) ); + Hash hash { htSHA256 }; // throwaway def to appease C++ std::visit(overloaded { [&](const TextIngestionMethod &) { + HashModuloSink caSink ( hashType, std::string(pathHash) ); readFile(path, caSink); + hash = caSink.finish().first; }, [&](const FileIngestionMethod & m2) { switch (m2) { - case FileIngestionMethod::Recursive: + case FileIngestionMethod::Recursive: { + HashModuloSink caSink ( hashType, std::string(pathHash) ); dumpPath(path, caSink); + hash = caSink.finish().first; break; - case FileIngestionMethod::Flat: + } + case FileIngestionMethod::Flat: { + HashModuloSink caSink ( hashType, std::string(pathHash) ); readFile(path, caSink); + hash = caSink.finish().first; break; } + case FileIngestionMethod::Git: { + hash = dumpGitHash(hashType, path); + break; + } + } }, }, method.raw); return ContentAddress { .method = method, - .hash = caSink.finish().first, + .hash = std::move(hash), }; } diff --git a/src/libstore/nar-accessor.cc b/src/libstore/nar-accessor.cc index f0dfcb19b77e..3cdd6b645136 100644 --- a/src/libstore/nar-accessor.cc +++ b/src/libstore/nar-accessor.cc @@ -69,9 +69,10 @@ struct NarAccessor : public FSAccessor createMember(path, {FSAccessor::Type::tDirectory, false, 0, 0}); } - void createRegularFile(const Path & path) override + void createRegularFile(const Path & path, bool executable = false) override { createMember(path, {FSAccessor::Type::tRegular, false, 0, 0}); + if (executable) isExecutable(); } void closeRegularFile() override diff --git a/src/libstore/remote-store.cc b/src/libstore/remote-store.cc index a639346d1ada..6f4cdb5e4540 100644 --- a/src/libstore/remote-store.cc +++ b/src/libstore/remote-store.cc @@ -13,6 +13,7 @@ #include "derivations.hh" #include "pool.hh" #include "finally.hh" +#include "git.hh" #include "logging.hh" #include "callback.hh" #include "filetransfer.hh" diff --git a/src/libstore/store-api.cc b/src/libstore/store-api.cc index 28689e100e24..5ea2219bd718 100644 --- a/src/libstore/store-api.cc +++ b/src/libstore/store-api.cc @@ -10,6 +10,7 @@ #include "references.hh" #include "archive.hh" #include "callback.hh" +#include "git.hh" #include "remote-store.hh" #include @@ -114,8 +115,8 @@ StorePath Store::followLinksToStorePath(std::string_view path) const for paths copied by addToStore() or produced by fixed-output derivations: the string "fixed:out:::", where - = "r:" for recursive (path) hashes, or "" for flat - (file) hashes + = "r:" for recursive (path) hashes, "git:" for git + paths, or "" for flat (file) hashes = "md5", "sha1" or "sha256" = base-16 representation of the path or flat hash of the contents of the path (or expected contents of the @@ -184,6 +185,9 @@ static std::string makeType( StorePath Store::makeFixedOutputPath(std::string_view name, const FixedOutputInfo & info) const { + if (info.method == FileIngestionMethod::Git && info.hash.type != htSHA1) + throw Error("Git file ingestion must use sha1 hash"); + if (info.hash.type == htSHA256 && info.method == FileIngestionMethod::Recursive) { return makeStorePath(makeType(*this, "source", info.references), info.hash, name); } else { @@ -228,9 +232,22 @@ StorePath Store::makeFixedOutputPathFromCA(std::string_view name, const ContentA std::pair Store::computeStorePathForPath(std::string_view name, const Path & srcPath, FileIngestionMethod method, HashType hashAlgo, PathFilter & filter) const { - Hash h = method == FileIngestionMethod::Recursive - ? hashPath(hashAlgo, srcPath, filter).first - : hashFile(hashAlgo, srcPath); + Hash h { htSHA256 }; // throwaway def to appease C++ + switch (method) { + case FileIngestionMethod::Recursive: { + h = hashPath(hashAlgo, srcPath, filter).first; + break; + } + case FileIngestionMethod::Git: { + h = hashGit(hashAlgo, srcPath, filter).first; + break; + } + case FileIngestionMethod::Flat: { + h = hashFile(hashAlgo, srcPath); + break; + } + } + FixedOutputInfo caInfo { .method = method, .hash = h, @@ -263,10 +280,29 @@ StorePath Store::addToStore( { Path srcPath(absPath(_srcPath)); auto source = sinkToSource([&](Sink & sink) { - if (method == FileIngestionMethod::Recursive) + switch (method) { + case FileIngestionMethod::Recursive: { dumpPath(srcPath, sink, filter); - else + break; + } + case FileIngestionMethod::Git: { + // recursively add to store if path is a directory + + struct stat st; + if (lstat(srcPath.c_str(), &st)) + throw SysError("getting attributes of path '%1%'", srcPath); + if (S_ISDIR(st.st_mode)) + for (auto & i : readDirectory(srcPath)) + addToStore("git", srcPath + "/" + i.name, method, hashAlgo, filter, repair); + + dumpGit(hashAlgo, srcPath, sink, filter); + break; + } + case FileIngestionMethod::Flat: { readFile(srcPath, sink); + break; + } + } }); return addToStoreFromDump(*source, name, method, hashAlgo, repair, references); } @@ -430,6 +466,8 @@ ValidPathInfo Store::addToStoreSlow(std::string_view name, const Path & srcPath, auto hash = method == FileIngestionMethod::Recursive && hashAlgo == htSHA256 ? narHash + : method == FileIngestionMethod::Git + ? hashGit(hashAlgo, srcPath).first : caHashSink.finish().first; if (expectedCAHash && expectedCAHash != hash) diff --git a/src/libutil/experimental-features.cc b/src/libutil/experimental-features.cc index 203455b63ec6..7229e61e17b3 100644 --- a/src/libutil/experimental-features.cc +++ b/src/libutil/experimental-features.cc @@ -12,7 +12,7 @@ struct ExperimentalFeatureDetails std::string_view description; }; -constexpr std::array xpFeatureDetails = {{ +constexpr std::array xpFeatureDetails = {{ { .tag = Xp::CaDerivations, .name = "ca-derivations", @@ -70,6 +70,14 @@ constexpr std::array xpFeatureDetails = {{ [`nix`](@docroot@/command-ref/new-cli/nix.md) for details. )", }, + { + .tag = Xp::GitHashing, + .name = "git-hashing", + .description = R"( + Allow creating (content-addressed) store objects which are hashed via Git's hashing algorithm. + These store objects will not be understandable by older versions of Nix. + )", + }, { .tag = Xp::RecursiveNix, .name = "recursive-nix", diff --git a/src/libutil/experimental-features.hh b/src/libutil/experimental-features.hh index add592ae6245..f0e09a41f121 100644 --- a/src/libutil/experimental-features.hh +++ b/src/libutil/experimental-features.hh @@ -21,6 +21,7 @@ enum struct ExperimentalFeature ImpureDerivations, Flakes, NixCommand, + GitHashing, RecursiveNix, NoUrlLiterals, FetchClosure, diff --git a/src/libutil/fs-sink.cc b/src/libutil/fs-sink.cc index a08a723a4fce..a8de496874de 100644 --- a/src/libutil/fs-sink.cc +++ b/src/libutil/fs-sink.cc @@ -23,10 +23,10 @@ void RestoreSink::createDirectory(const Path & path) throw SysError("creating directory '%1%'", p); }; -void RestoreSink::createRegularFile(const Path & path) +void RestoreSink::createRegularFile(const Path & path, bool executable) { Path p = dstPath + path; - fd = open(p.c_str(), O_CREAT | O_EXCL | O_WRONLY | O_CLOEXEC, 0666); + fd = open(p.c_str(), O_CREAT | O_EXCL | O_WRONLY | O_CLOEXEC, executable ? 0777 : 0666); if (!fd) throw SysError("creating file '%1%'", p); } @@ -74,4 +74,30 @@ void RestoreSink::createSymlink(const Path & path, const std::string & target) nix::createSymlink(target, p); } +void RestoreSink::copyFile(const Path & source) +{ + FdSink sink(fd.get()); + readFile(source, sink); +} + +void RestoreSink::copyDirectory(const Path & source, const Path & destination) +{ + Path p = dstPath + destination; + createDirectory(destination); + for (auto & i : readDirectory(source)) { + struct stat st; + Path entry = source + "/" + i.name; + if (lstat(entry.c_str(), &st)) + throw SysError("getting attributes of path '%1%'", entry); + if (S_ISREG(st.st_mode)) { + createRegularFile(destination + "/" + i.name, st.st_mode & S_IXUSR); + copyFile(entry); + } else if (S_ISDIR(st.st_mode)) + copyDirectory(entry, destination + "/" + i.name); + else + throw Error("Unknown file: %s", entry); + } +} + + } diff --git a/src/libutil/fs-sink.hh b/src/libutil/fs-sink.hh index 6837e2fc4b2b..afd41c9cc3b2 100644 --- a/src/libutil/fs-sink.hh +++ b/src/libutil/fs-sink.hh @@ -13,13 +13,16 @@ struct ParseSink { virtual void createDirectory(const Path & path) { }; - virtual void createRegularFile(const Path & path) { }; + virtual void createRegularFile(const Path & path, bool executable = false) { }; virtual void closeRegularFile() { }; virtual void isExecutable() { }; virtual void preallocateContents(uint64_t size) { }; virtual void receiveContents(std::string_view data) { }; virtual void createSymlink(const Path & path, const std::string & target) { }; + + virtual void copyFile(const Path & source) { }; + virtual void copyDirectory(const Path & source, const Path & destination) { }; }; struct RestoreSink : ParseSink @@ -30,13 +33,17 @@ struct RestoreSink : ParseSink void createDirectory(const Path & path) override; - void createRegularFile(const Path & path) override; + void createRegularFile(const Path & path, bool executable = false) override; void closeRegularFile() override; void isExecutable() override; void preallocateContents(uint64_t size) override; void receiveContents(std::string_view data) override; void createSymlink(const Path & path, const std::string & target) override; + + void copyFile(const Path & source) override; + void copyDirectory(const Path & source, const Path & destination) override; }; + } diff --git a/src/libutil/git.cc b/src/libutil/git.cc index f35c2fdb75cf..7dbea3552f0a 100644 --- a/src/libutil/git.cc +++ b/src/libutil/git.cc @@ -1,8 +1,239 @@ -#include "git.hh" +#include +#include +#include +#include #include +#include // for strcasecmp + +#include +#include +#include +#include +#include + +#include "util.hh" +#include "config.hh" +#include "hash.hh" + +#include "git.hh" +#include "serialise.hh" + +using namespace std::string_literals; + namespace nix { + +static void parse(ParseSink & sink, Source & source, const Path & path, std::function getChildPath); + +// Converts a Path to a ParseSink +void restoreGit(const Path & path, Source & source, std::function getChildPath) +{ + RestoreSink sink; + sink.dstPath = path; + parseGit(sink, source, getChildPath); +} + +void parseGit(ParseSink & sink, Source & source, std::function getChildPath) +{ + parse(sink, source, "", getChildPath); +} + +static std::string getStringUntil(Source & source, char byte) +{ + std::string s; + char n[1]; + source(std::string_view { n, 1 }); + while (*n != byte) { + s += *n; + source(std::string_view { n, 1 }); + } + return s; +} + +static std::string getString(Source & source, int n) +{ + std::string v; + v.resize(n); + source(v); + return v; +} + +static void parse(ParseSink & sink, Source & source, const Path & path, std::function getChildPath) +{ + experimentalFeatureSettings.require(Xp::GitHashing); + + auto type = getString(source, 5); + + if (type == "blob ") { + sink.createRegularFile(path); + + unsigned long long size = std::stoi(getStringUntil(source, 0)); + + sink.preallocateContents(size); + + unsigned long long left = size; + std::string buf; + buf.reserve(65536); + + while (left) { + checkInterrupt(); + buf.resize(std::min((unsigned long long)buf.capacity(), left)); + source(buf); + sink.receiveContents(buf); + left -= buf.size(); + } + } else if (type == "tree ") { + unsigned long long size = std::stoi(getStringUntil(source, 0)); + unsigned long long left = size; + + sink.createDirectory(path); + + while (left) { + std::string perms = getStringUntil(source, ' '); + left -= perms.size(); + left -= 1; + + int perm = std::stoi(perms); + if (perm != 100644 && perm != 100755 && perm != 644 && perm != 755 && perm != 40000) + throw Error("Unknown Git permission: %d", perm); + + std::string name = getStringUntil(source, 0); + left -= name.size(); + left -= 1; + + std::string hashs = getString(source, 20); + left -= 20; + + Hash hash(htSHA1); + std::copy(hashs.begin(), hashs.end(), hash.hash); + + Path entry0 = getChildPath(hash); + Path entry = absPath(entry0); + + struct stat st; + if (lstat(entry.c_str(), &st)) + throw SysError("getting attributes of path '%1%'", entry); + + if (S_ISREG(st.st_mode)) { + if (perm == 40000) + throw SysError("file is a file but expected to be a directory '%1%'", entry); + + sink.createRegularFile(path + "/" + name, perm == 100755 || perm == 755); + + sink.copyFile(entry); + } else if (S_ISDIR(st.st_mode)) { + if (perm != 40000) + throw SysError("file is a directory but expected to be a file '%1%'", entry); + + sink.copyDirectory(entry0, path + "/" + name); + } else throw Error("file '%1%' has an unsupported type", entry); + } + } else throw Error("input doesn't look like a Git object"); +} + +// TODO stream file into sink, rather than reading into vector +GitMode dumpGitBlob(const Path & path, const struct stat st, Sink & sink) +{ + experimentalFeatureSettings.require(Xp::GitHashing); + + auto s = fmt("blob %d\0%s"s, std::to_string(st.st_size), readFile(path)); + + std::string v; + std::copy(s.begin(), s.end(), std::back_inserter(v)); + sink(v); + return st.st_mode & S_IXUSR + ? GitMode::Executable + : GitMode::Regular; +} + +GitMode dumpGitTree(const GitTree & entries, Sink & sink) +{ + experimentalFeatureSettings.require(Xp::GitHashing); + + std::vector v1; + + for (auto & i : entries) { + unsigned int mode; + switch (i.second.first) { + case GitMode::Directory: mode = 40000; break; + case GitMode::Executable: mode = 100755; break; + case GitMode::Regular: mode = 100644; break; + } + auto name = i.first; + if (i.second.first == GitMode::Directory) + name.pop_back(); + auto s1 = fmt("%d %s", mode, name); + std::copy(s1.begin(), s1.end(), std::back_inserter(v1)); + v1.push_back(0); + std::copy(i.second.second.hash, i.second.second.hash + 20, std::back_inserter(v1)); + } + + std::string v2; + auto s2 = fmt("tree %d"s, v1.size()); + std::copy(s2.begin(), s2.end(), std::back_inserter(v2)); + v2.push_back(0); + std::copy(v1.begin(), v1.end(), std::back_inserter(v2)); + + sink(v2); + + return GitMode::Directory; +} + +static std::pair dumpGitHashInternal(HashType ht, const Path & path, PathFilter & filter); + +static GitMode dumpGitInternal(HashType ht, const Path & path, Sink & sink, PathFilter & filter) +{ + struct stat st; + GitMode perm; + if (lstat(path.c_str(), &st)) + throw SysError("getting attributes of path '%1%'", path); + + if (S_ISREG(st.st_mode)) + perm = dumpGitBlob(path, st, sink); + else if (S_ISDIR(st.st_mode)) { + GitTree entries; + for (auto & i : readDirectory(path)) + if (filter(path + "/" + i.name)) { + auto result = dumpGitHashInternal(ht, path + "/" + i.name, filter); + + // correctly observe git order, see + // https://github.com/mirage/irmin/issues/352 + auto name = i.name; + if (result.first == GitMode::Directory) + name += "/"; + + entries.insert_or_assign(name, result); + } + perm = dumpGitTree(entries, sink); + } else throw Error("file '%1%' has an unsupported type", path); + + return perm; +} + + +static std::pair dumpGitHashInternal(HashType ht, const Path & path, PathFilter & filter) +{ + auto hashSink = new HashSink(ht); + auto perm = dumpGitInternal(ht, path, *hashSink, filter); + auto hash = hashSink->finish().first; + return std::pair { perm, hash }; +} + +Hash dumpGitHash(HashType ht, const Path & path, PathFilter & filter) +{ + experimentalFeatureSettings.require(Xp::GitHashing); + + return dumpGitHashInternal(ht, path, filter).second; +} + +void dumpGit(HashType ht, const Path & path, Sink & sink, PathFilter & filter) +{ + experimentalFeatureSettings.require(Xp::GitHashing); + + dumpGitInternal(ht, path, sink, filter); +} + namespace git { std::optional parseLsRemoteLine(std::string_view line) @@ -22,4 +253,5 @@ std::optional parseLsRemoteLine(std::string_view line) } } + } diff --git a/src/libutil/git.hh b/src/libutil/git.hh index bf2b9a2869ab..7563cb95750a 100644 --- a/src/libutil/git.hh +++ b/src/libutil/git.hh @@ -5,8 +5,36 @@ #include #include +#include "types.hh" +#include "serialise.hh" +#include "hash.hh" +#include "fs-sink.hh" + namespace nix { +enum struct GitMode { + Directory, + Executable, + Regular, +}; + +void restoreGit(const Path & path, Source & source, std::function getChildPath); + +void parseGit(ParseSink & sink, Source & source, std::function getChildPath); + +// Dumps a single file to a sink +GitMode dumpGitBlob(const Path & path, const struct stat st, Sink & sink); + +typedef std::map> GitTree; + +// Dumps a representation of a git tree to a sink +GitMode dumpGitTree(const GitTree & entries, Sink & sink); + +// Recursively dumps path, hashing as we go +Hash dumpGitHash(HashType ht, const Path & path, PathFilter & filter = defaultPathFilter); + +void dumpGit(HashType ht, const Path & path, Sink & sink, PathFilter & filter = defaultPathFilter); + namespace git { /** diff --git a/src/libutil/hash.cc b/src/libutil/hash.cc index 2c36d9d9498e..f04c6b9041df 100644 --- a/src/libutil/hash.cc +++ b/src/libutil/hash.cc @@ -8,6 +8,7 @@ #include "args.hh" #include "hash.hh" #include "archive.hh" +#include "git.hh" #include "split.hh" #include "util.hh" @@ -375,6 +376,13 @@ HashResult hashPath( return sink.finish(); } +HashResult hashGit( + HashType ht, const Path & path, PathFilter & filter) +{ + HashSink sink(ht); + dumpGit(ht, path, sink, filter); + return sink.finish(); +} Hash compressHash(const Hash & hash, unsigned int newSize) { diff --git a/src/libutil/hash.hh b/src/libutil/hash.hh index c3aa5cd81843..0d4abd72b8f1 100644 --- a/src/libutil/hash.hh +++ b/src/libutil/hash.hh @@ -161,6 +161,12 @@ typedef std::pair HashResult; HashResult hashPath(HashType ht, const Path & path, PathFilter & filter = defaultPathFilter); +/** + * Compute the git blob/tree hash of the given path. + */ +HashResult hashGit(HashType ht, const Path & path, + PathFilter & filter = defaultPathFilter); + /** * Compress a hash to the specified number of bytes by cyclically * XORing bytes together. diff --git a/src/libutil/serialise.cc b/src/libutil/serialise.cc index 3d5121a19fa6..e5205ce79f00 100644 --- a/src/libutil/serialise.cc +++ b/src/libutil/serialise.cc @@ -74,6 +74,10 @@ void Source::operator () (char * data, size_t len) } } +void Source::operator () (std::string_view data) +{ + (*this)((char *)data.data(), data.size()); +} void Source::drainInto(Sink & sink) { diff --git a/src/libutil/serialise.hh b/src/libutil/serialise.hh index 333c254ea8e3..71c40c83a7d3 100644 --- a/src/libutil/serialise.hh +++ b/src/libutil/serialise.hh @@ -72,6 +72,7 @@ struct Source * an error if it is not going to be available. */ void operator () (char * data, size_t len); + void operator () (std::string_view data); /** * Store up to ‘len’ in the buffer pointed to by ‘data’, and diff --git a/src/nix/add-to-store.cc b/src/nix/add-to-store.cc index 39e5cc99dd2f..f3e9249d2bbd 100644 --- a/src/nix/add-to-store.cc +++ b/src/nix/add-to-store.cc @@ -2,6 +2,7 @@ #include "common-args.hh" #include "store-api.hh" #include "archive.hh" +#include "git.hh" using namespace nix; @@ -34,11 +35,22 @@ struct CmdAddToStore : MixDryRun, StoreCommand auto narHash = hashString(htSHA256, sink.s); - Hash hash = narHash; - if (ingestionMethod == FileIngestionMethod::Flat) { + Hash hash { htSHA256 }; // throwaway def to appease C++ + switch (ingestionMethod) { + case FileIngestionMethod::Recursive: { + hash = narHash; + break; + } + case FileIngestionMethod::Flat: { HashSink hsink(htSHA256); readFile(path, hsink); hash = hsink.finish().first; + break; + } + case FileIngestionMethod::Git: { + hash = dumpGitHash(htSHA1, path); + break; + } } ValidPathInfo info { @@ -102,5 +114,26 @@ struct CmdAddPath : CmdAddToStore } }; +struct CmdAddGit : CmdAddToStore +{ + CmdAddGit() + { + ingestionMethod = FileIngestionMethod::Git; + } + + std::string description() override + { + return "add a path to the Nix store"; + } + + std::string doc() override + { + return + #include "add-path.md" + ; + } +}; + static auto rCmdAddFile = registerCommand2({"store", "add-file"}); static auto rCmdAddPath = registerCommand2({"store", "add-path"}); +static auto rCmdAddGit = registerCommand2({"store", "add-git"}); diff --git a/src/nix/hash.cc b/src/nix/hash.cc index 9feca934557c..368227ab6288 100644 --- a/src/nix/hash.cc +++ b/src/nix/hash.cc @@ -5,6 +5,7 @@ #include "shared.hh" #include "references.hh" #include "archive.hh" +#include "git.hh" using namespace nix; @@ -65,9 +66,11 @@ struct CmdHashBase : Command { switch (mode) { case FileIngestionMethod::Flat: - return "print cryptographic hash of a regular file"; + return "print cryptographic hash of a regular file"; case FileIngestionMethod::Recursive: return "print cryptographic hash of the NAR serialisation of a path"; + case FileIngestionMethod::Git: + return "print cryptographic hash of the Git serialisation of a path"; default: assert(false); }; @@ -84,15 +87,21 @@ struct CmdHashBase : Command hashSink = std::make_unique(ht); switch (mode) { - case FileIngestionMethod::Flat: + case FileIngestionMethod::Flat: { readFile(path, *hashSink); break; - case FileIngestionMethod::Recursive: + } + case FileIngestionMethod::Recursive: { dumpPath(path, *hashSink); break; } + case FileIngestionMethod::Git: + dumpGit(ht, path, *hashSink); + break; + } + + auto h = hashSink->finish().first; - Hash h = hashSink->finish().first; if (truncate && h.hashSize > 20) h = compressHash(h, 20); logger->cout(h.to_string(base, base == SRI)); } @@ -133,6 +142,7 @@ struct CmdHash : NixMultiCommand : MultiCommand({ {"file", []() { return make_ref(FileIngestionMethod::Flat);; }}, {"path", []() { return make_ref(FileIngestionMethod::Recursive); }}, + {"git", []() { return make_ref(FileIngestionMethod::Git); }}, {"to-base16", []() { return make_ref(Base16); }}, {"to-base32", []() { return make_ref(Base32); }}, {"to-base64", []() { return make_ref(Base64); }}, diff --git a/tests/functional/git-hashing/common.sh b/tests/functional/git-hashing/common.sh new file mode 100644 index 000000000000..5de96e74f160 --- /dev/null +++ b/tests/functional/git-hashing/common.sh @@ -0,0 +1,11 @@ +source ../common.sh + +clearStore +clearCache + +# Need backend to support git-hashing too +requireDaemonNewerThan "2.18.0pre20230908" + +enableFeatures "git-hashing" + +restartDaemon diff --git a/tests/functional/git-hashing/local.mk b/tests/functional/git-hashing/local.mk new file mode 100644 index 000000000000..ebec019402b9 --- /dev/null +++ b/tests/functional/git-hashing/local.mk @@ -0,0 +1,7 @@ +git-hashing-tests := \ + $(d)/simple.sh + +install-tests-groups += git-hashing + +clean-files += \ + $(d)/config.nix diff --git a/tests/functional/git-hashing/simple.sh b/tests/functional/git-hashing/simple.sh new file mode 100644 index 000000000000..144a9a5d6a8e --- /dev/null +++ b/tests/functional/git-hashing/simple.sh @@ -0,0 +1,36 @@ +source common.sh + +try () { + hash=$(nix hash git --base16 --type sha1 $TEST_ROOT/hash-path) + if test "$hash" != "$1"; then + echo "git hash, expected $1, got $hash" + exit 1 + fi +} + +rm -rf $TEST_ROOT/hash-path +mkdir $TEST_ROOT/hash-path +echo "Hello World" > $TEST_ROOT/hash-path/hello + +try "117c62a8c5e01758bd284126a6af69deab9dbbe2" + +rm -rf $TEST_ROOT/dummy1 +echo Hello World! > $TEST_ROOT/dummy1 +path1=$(nix store add-git $TEST_ROOT/dummy1) +hash1=$(nix-store -q --hash $path1) +test "$hash1" = "sha256:1brffhvj2c0z6x8qismd43m0iy8dsgfmy10bgg9w11szway2wp9v" + +rm -rf $TEST_ROOT/dummy2 +mkdir -p $TEST_ROOT/dummy2 +echo Hello World! > $TEST_ROOT/dummy2/hello +path2=$(nix store add-git $TEST_ROOT/dummy2) +hash2=$(nix-store -q --hash $path2) +test "$hash2" = "sha256:1vhv7zxam7x277q0y0jcypm7hwhccbzss81vkdgf0ww5sm2am4y0" + +rm -rf $TEST_ROOT/dummy3 +mkdir -p $TEST_ROOT/dummy3 +mkdir -p $TEST_ROOT/dummy3/hello +echo Hello World! > $TEST_ROOT/dummy3/hello/hello +path3=$(nix store add-git $TEST_ROOT/dummy3) +hash3=$(nix-store -q --hash $path3) +test "$hash3" = "sha256:1i2x80840igikhbyy7nqf08ymx3a6n83x1fzyrxvddf0sdl5nqvp"