Skip to content

Commit

Permalink
Git object hashing in libutil
Browse files Browse the repository at this point in the history
This is the core functionality but just unit-tested and not yet made
part of the store layer. This is because there is some tech debt around
(a) repeated boilerplate hashing objects (b) better integration of the
new `SourceAccessor` type that needs to be cleaned up first.

Part of RFC 133

Co-Authored-By: Matthew Bauer <mjbauer95@gmail.com>
Co-Authored-By: Carlo Nucera <carlo.nucera@protonmail.com>
Co-authored-by: Robert Hensing <roberth@users.noreply.github.com>
Co-authored-by: Florian Klink <flokli@flokli.de>
  • Loading branch information
5 people committed Nov 10, 2023
1 parent 9afa697 commit 20b95d6
Show file tree
Hide file tree
Showing 13 changed files with 666 additions and 37 deletions.
8 changes: 8 additions & 0 deletions src/libutil/experimental-features.cc
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,14 @@ constexpr std::array<ExperimentalFeatureDetails, numXpFeatures> xpFeatureDetails
[`nix`](@docroot@/command-ref/new-cli/nix.md) for details.
)",
},
{
.tag = Xp::GitHashing,
.name = "git-hashing",
.description = R"(
Allow creating (content-addressed) store objects which are hashed via Git's hashing algorithm.
These store objects will not be understandable by older versions of Nix.
)",
},
{
.tag = Xp::RecursiveNix,
.name = "recursive-nix",
Expand Down
1 change: 1 addition & 0 deletions src/libutil/experimental-features.hh
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ enum struct ExperimentalFeature
Flakes,
FetchTree,
NixCommand,
GitHashing,
RecursiveNix,
NoUrlLiterals,
FetchClosure,
Expand Down
261 changes: 257 additions & 4 deletions src/libutil/git.cc
Original file line number Diff line number Diff line change
@@ -1,9 +1,263 @@
#include <cerrno>
#include <algorithm>
#include <vector>
#include <map>
#include <regex>
#include <strings.h> // for strcasecmp

#include "signals.hh"
#include "config.hh"
#include "hash.hh"
#include "posix-source-accessor.hh"

#include "git.hh"
#include "serialise.hh"

#include <regex>
namespace nix::git {

using namespace nix;
using namespace std::string_literals;

std::optional<Mode> decodeMode(RawMode m) {
switch (m) {
case (RawMode) Mode::Directory:
case (RawMode) Mode::Executable:
case (RawMode) Mode::Regular:
case (RawMode) Mode::Symlink:
return (Mode) m;
default:
return std::nullopt;
}
}


static std::string getStringUntil(Source & source, char byte)
{
std::string s;
char n[1];
source(std::string_view { n, 1 });
while (*n != byte) {
s += *n;
source(std::string_view { n, 1 });
}
return s;
}


static std::string getString(Source & source, int n)
{
std::string v;
v.resize(n);
source(v);
return v;
}


void parse(
ParseSink & sink,
const Path & sinkPath,
Source & source,
std::function<SinkHook> hook,
const ExperimentalFeatureSettings & xpSettings)
{
xpSettings.require(Xp::GitHashing);

auto type = getString(source, 5);

if (type == "blob ") {
sink.createRegularFile(sinkPath);

unsigned long long size = std::stoi(getStringUntil(source, 0));

sink.preallocateContents(size);

unsigned long long left = size;
std::string buf;
buf.reserve(65536);

while (left) {
checkInterrupt();
buf.resize(std::min((unsigned long long)buf.capacity(), left));
source(buf);
sink.receiveContents(buf);
left -= buf.size();
}
} else if (type == "tree ") {
unsigned long long size = std::stoi(getStringUntil(source, 0));
unsigned long long left = size;

sink.createDirectory(sinkPath);

while (left) {
std::string perms = getStringUntil(source, ' ');
left -= perms.size();
left -= 1;

RawMode rawMode = std::stoi(perms, 0, 8);
auto modeOpt = decodeMode(rawMode);
if (!modeOpt)
throw Error("Unknown Git permission: %o", perms);
auto mode = std::move(*modeOpt);

std::string name = getStringUntil(source, '\0');
left -= name.size();
left -= 1;

std::string hashs = getString(source, 20);
left -= 20;

Hash hash(htSHA1);
std::copy(hashs.begin(), hashs.end(), hash.hash);

hook(name, TreeEntry {
.mode = mode,
.hash = hash,
});

if (mode == Mode::Executable)
sink.isExecutable();
}
} else throw Error("input doesn't look like a Git object");
}


std::optional<Mode> convertMode(SourceAccessor::Type type)
{
switch (type) {
case SourceAccessor::tSymlink: return Mode::Symlink;
case SourceAccessor::tRegular: return Mode::Regular;
case SourceAccessor::tDirectory: return Mode::Directory;
case SourceAccessor::tMisc: return std::nullopt;
default: abort();
}
}


void restore(ParseSink & sink, Source & source, std::function<RestoreHook> hook)
{
parse(sink, "", source, [&](Path name, TreeEntry entry) {
auto [accessor, from] = hook(entry.hash);
auto stat = accessor->lstat(from);
auto gotOpt = convertMode(stat.type);
if (!gotOpt)
throw Error("file '%s' (git hash %s) has an unsupported type",
from,
entry.hash.to_string(HashFormat::Base16, false));
auto & got = *gotOpt;
if (got != entry.mode)
throw Error("git mode of file '%s' (git hash %s) is %o but expected %o",
from,
entry.hash.to_string(HashFormat::Base16, false),
(RawMode) got,
(RawMode) entry.mode);
copyRecursive(
*accessor, from,
sink, name);
});
}


void dumpBlobPrefix(
uint64_t size, Sink & sink,
const ExperimentalFeatureSettings & xpSettings)
{
xpSettings.require(Xp::GitHashing);
auto s = fmt("blob %d\0"s, std::to_string(size));
sink(s);
}


void dumpTree(const Tree & entries, Sink & sink,
const ExperimentalFeatureSettings & xpSettings)
{
xpSettings.require(Xp::GitHashing);

std::string v1;

for (auto & [name, entry] : entries) {
auto name2 = name;
if (entry.mode == Mode::Directory) {
assert(name2.back() == '/');
name2.pop_back();
}
v1 += fmt("%o %s\0"s, static_cast<RawMode>(entry.mode), name2);
std::copy(entry.hash.hash, entry.hash.hash + entry.hash.hashSize, std::back_inserter(v1));
}

{
auto s = fmt("tree %d\0"s, v1.size());
sink(s);
}

sink(v1);
}


Mode dump(
SourceAccessor & accessor, const CanonPath & path,
Sink & sink,
std::function<DumpHook> hook,
PathFilter & filter,
const ExperimentalFeatureSettings & xpSettings)
{
auto st = accessor.lstat(path);

switch (st.type) {
case SourceAccessor::tRegular:
{
accessor.readFile(path, sink, [&](uint64_t size) {
dumpBlobPrefix(size, sink, xpSettings);
});
return st.isExecutable
? Mode::Executable
: Mode::Regular;
}

case SourceAccessor::tDirectory:
{
Tree entries;
for (auto & [name, _] : accessor.readDirectory(path)) {
auto child = path + name;
if (!filter(child.abs())) continue;

auto entry = hook(child);

auto name2 = name;
if (entry.mode == Mode::Directory)
name2 += "/";

entries.insert_or_assign(std::move(name2), std::move(entry));
}
dumpTree(entries, sink, xpSettings);
return Mode::Directory;
}

case SourceAccessor::tSymlink:
case SourceAccessor::tMisc:
default:
throw Error("file '%1%' has an unsupported type", path);
}
}


TreeEntry dumpHash(
HashType ht,
SourceAccessor & accessor, const CanonPath & path, PathFilter & filter)
{
std::function<DumpHook> hook;
hook = [&](const CanonPath & path) -> TreeEntry {
auto hashSink = HashSink(ht);
auto mode = dump(accessor, path, hashSink, hook, filter);
auto hash = hashSink.finish().first;
return {
.mode = mode,
.hash = hash,
};
};

return hook(path);
}

namespace nix {
namespace git {

std::optional<LsRemoteRefLine> parseLsRemoteLine(std::string_view line)
{
Expand All @@ -22,4 +276,3 @@ std::optional<LsRemoteRefLine> parseLsRemoteLine(std::string_view line)
}

}
}
Loading

0 comments on commit 20b95d6

Please sign in to comment.