Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix adding large paths to the store #619

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions dev-shell
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ exec $s release.nix -A tarball --command "
export NIX_PATH='$NIX_PATH'
export NIX_BUILD_SHELL=$(type -p bash)
export c=\$configureFlags
PATH=\"$(pwd)/inst/bin:\$PATH\"
exec $s release.nix -A build.$(if [ $(uname -s) = Darwin ]; then echo x86_64-darwin; else echo x86_64-linux; fi) --exclude tarball --command '
configureFlags+=\" \$c --prefix=$(pwd)/inst --sysconfdir=$(pwd)/inst/etc\"
return
Expand Down
105 changes: 54 additions & 51 deletions src/libstore/local-store.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1361,13 +1361,32 @@ void LocalStore::invalidatePath(const Path & path)
}


Path LocalStore::addToStoreFromDump(const string & dump, const string & name,
bool recursive, HashType hashAlgo, bool repair)
struct HashAndReadSource : Source
{
Hash h = hashString(hashAlgo, dump);
Source & readSource;
HashSink hashSink;
bool hashing;
HashAndReadSource(Source & readSource) : readSource(readSource), hashSink(htSHA256)
{
hashing = true;
}
size_t read(unsigned char * data, size_t len)
{
size_t n = readSource.read(data, len);
if (hashing) hashSink(data, n);
return n;
}
HashResult finish()
{
hashing = false;
return hashSink.finish();
}
};

Path dstPath = makeFixedOutputPath(recursive, hashAlgo, h, name);

Path LocalStore::addToStoreFromDump(Source & source, const Path & dstPath,
bool recursive, bool repair)
{
addTempRoot(dstPath);

if (repair || !isValidPath(dstPath)) {
Expand All @@ -1381,25 +1400,10 @@ Path LocalStore::addToStoreFromDump(const string & dump, const string & name,

if (pathExists(dstPath)) deletePath(dstPath);

if (recursive) {
StringSource source(dump);
restorePath(dstPath, source);
} else
writeFile(dstPath, dump);

HashAndReadSource hashSource(source);
restorePath(dstPath, hashSource, recursive);
HashResult hash = hashSource.finish();
canonicalisePathMetaData(dstPath, -1);

/* Register the SHA-256 hash of the NAR serialisation of
the path in the database. We may just have computed it
above (if called with recursive == true and hashAlgo ==
sha256); otherwise, compute it here. */
HashResult hash;
if (recursive) {
hash.first = hashAlgo == htSHA256 ? h : hashString(htSHA256, dump);
hash.second = dump.size();
} else
hash = hashPath(htSHA256, dstPath);

optimisePath(dstPath); // FIXME: combine with hashPath()

ValidPathInfo info;
Expand All @@ -1422,16 +1426,34 @@ Path LocalStore::addToStore(const string & name, const Path & _srcPath,
Path srcPath(absPath(_srcPath));
debug(format("adding ‘%1%’ to the store") % srcPath);

/* Read the whole path into memory. This is not a very scalable
method for very large paths, but `copyPath' is mainly used for
small files. */
StringSink sink;
if (recursive)
dumpPath(srcPath, sink, filter);
else
sink.s = readFile(srcPath);
Hash h = recursive ? hashPath(hashAlgo, srcPath, filter).first : hashFile(hashAlgo, srcPath);
Path dstPath = makeFixedOutputPath(recursive, hashAlgo, h, name);
addTempRoot(dstPath);
if (repair || !isValidPath(dstPath)) {

PathLocks outputLock(singleton<PathSet, Path>(dstPath));

if (repair || !isValidPath(dstPath)) {

if (pathExists(dstPath)) deletePath(dstPath);

copyPath(srcPath, dstPath, filter, recursive);

return addToStoreFromDump(sink.s, name, recursive, hashAlgo, repair);
canonicalisePathMetaData(dstPath, -1);
HashResult hash = hashPath(htSHA256, dstPath);
optimisePath(dstPath); // FIXME: combine with hashPath()

ValidPathInfo info;
info.path = dstPath;
info.hash = hash.first;
info.narSize = hash.second;
registerValidPath(info);
}

outputLock.setDeletion(true);
}

return dstPath;
Copy link
Contributor

@tfc tfc Jul 5, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Only a minor thing:

How about reducing scope lengths by inverting that outer if-condition and returning early, so the reader needs to track less state in their head:

(also giving repair type const bool helps seeing that no side-effects will happen to it, same with dstPath if applicable... not completely sure, i don't know all the code by heart.)

    if (!repair && isValidPath(dstPath)) {
        return dstPath;
    }

    PathLocks outputLock(singleton<PathSet, Path>(dstPath));

    if (!isValidPath(dstPath)) {

        if (pathExists(dstPath)) deletePath(dstPath);

        copyPath(srcPath, dstPath, filter, recursive);
        canonicalisePathMetaData(dstPath, -1);
        HashResult hash = hashPath(htSHA256, dstPath);
        optimisePath(dstPath); // FIXME: combine with hashPath()

        ValidPathInfo info;
        info.path = dstPath;
        info.hash = hash.first;
        info.narSize = hash.second;
        registerValidPath(info);
    }

    outputLock.setDeletion(true);

    return dstPath;

}


Expand Down Expand Up @@ -1560,24 +1582,6 @@ void LocalStore::exportPath(const Path & path, bool sign,
}


struct HashAndReadSource : Source
{
Source & readSource;
HashSink hashSink;
bool hashing;
HashAndReadSource(Source & readSource) : readSource(readSource), hashSink(htSHA256)
{
hashing = true;
}
size_t read(unsigned char * data, size_t len)
{
size_t n = readSource.read(data, len);
if (hashing) hashSink(data, n);
return n;
}
};


/* Create a temporary directory in the store that won't be
garbage-collected. */
Path LocalStore::createTempDirInStore()
Expand Down Expand Up @@ -1620,8 +1624,7 @@ Path LocalStore::importPath(bool requireSignature, Source & source)
Path deriver = readString(hashAndReadSource);
if (deriver != "") assertStorePath(deriver);

Hash hash = hashAndReadSource.hashSink.finish().first;
hashAndReadSource.hashing = false;
Hash hash = hashAndReadSource.finish().first;

bool haveSignature = readInt(hashAndReadSource) == 1;

Expand Down
4 changes: 2 additions & 2 deletions src/libstore/local-store.hh
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,8 @@ public:
in `dump', which is either a NAR serialisation (if recursive ==
true) or simply the contents of a regular file (if recursive ==
false). */
Path addToStoreFromDump(const string & dump, const string & name,
bool recursive = true, HashType hashAlgo = htSHA256, bool repair = false);
Path addToStoreFromDump(Source& dump, const Path& dstPath,
bool recursive = true, bool repair = false);

Path addTextToStore(const string & name, const string & s,
const PathSet & references, bool repair = false);
Expand Down
13 changes: 9 additions & 4 deletions src/libstore/remote-store.cc
Original file line number Diff line number Diff line change
Expand Up @@ -374,12 +374,17 @@ Path RemoteStore::addToStore(const string & name, const Path & _srcPath,

openConnection();

if (GET_PROTOCOL_MINOR(daemonVersion) < 15) {
throw Error("adding to the store is not supported with an old Nix daemon");
}

Path srcPath(absPath(_srcPath));

to << wopAddToStore << name
<< ((hashAlgo == htSHA256 && recursive) ? 0 : 1) /* backwards compatibility hack */
<< (recursive ? 1 : 0)
<< printHashType(hashAlgo);
Hash h = recursive ? hashPath(hashAlgo, srcPath, filter).first : hashFile(hashAlgo, srcPath);
Path dstPath = makeFixedOutputPath(recursive, hashAlgo, h, name);

to << wopAddToStore << dstPath
<< (recursive ? 1 : 0);

try {
to.written = 0;
Expand Down
2 changes: 1 addition & 1 deletion src/libstore/worker-protocol.hh
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ namespace nix {
#define WORKER_MAGIC_1 0x6e697863
#define WORKER_MAGIC_2 0x6478696f

#define PROTOCOL_VERSION 0x10e
#define PROTOCOL_VERSION 0x10f
#define GET_PROTOCOL_MAJOR(x) ((x) & 0xff00)
#define GET_PROTOCOL_MINOR(x) ((x) & 0x00ff)

Expand Down
118 changes: 114 additions & 4 deletions src/libutil/archive.cc
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ static void parse(ParseSink & sink, Source & source, const Path & path)
names[name] = 0;
}
} else if (s == "node") {
if (s.empty()) throw badArchive("entry name missing");
if (name.empty()) throw badArchive("entry name missing");
parse(sink, source, path + "/" + name);
} else
throw badArchive("unknown field " + s);
Expand Down Expand Up @@ -284,10 +284,17 @@ void parseDump(ParseSink & sink, Source & source)
struct RestoreSink : ParseSink
{
Path dstPath;
bool recursive;
AutoCloseFD fd;

RestoreSink(const Path& dstPath, bool recursive)
: dstPath(dstPath), recursive(recursive)
{
}

void createDirectory(const Path & path)
{
if(!recursive) throw Error("regular file expected");
Path p = dstPath + path;
if (mkdir(p.c_str(), 0777) == -1)
throw SysError(format("creating directory ‘%1%’") % p);
Expand Down Expand Up @@ -332,18 +339,121 @@ struct RestoreSink : ParseSink

void createSymlink(const Path & path, const string & target)
{
if(!recursive) throw Error("regular file expected");
Path p = dstPath + path;
nix::createSymlink(target, p);
}
};


void restorePath(const Path & path, Source & source)
void restorePath(const Path & path, Source & source, bool recursive)
{
RestoreSink sink;
sink.dstPath = path;
RestoreSink sink(path, recursive);
parseDump(sink, source);
}

static void copyContents(const Path & srcPath, const AutoCloseFD& ofd, size_t size)
{
#if HAVE_POSIX_FALLOCATE
if (size) {
errno = posix_fallocate(ofd, 0, size);
/* Note that EINVAL may indicate that the underlying
filesystem doesn't support preallocation (e.g. on
OpenSolaris). Since preallocation is just an
optimisation, ignore it. */
if (errno && errno != EINVAL)
throw SysError(format("preallocating file of %1% bytes") % size);
}
#endif

AutoCloseFD fd = open(srcPath.c_str(), O_RDONLY);
if (fd == -1) throw SysError(format("opening file ‘%1%’") % srcPath);

// TODO: use sendfile(2) / sendfile64(2) on Linux

unsigned char buf[65536];
size_t left = size;

while (left > 0) {
checkInterrupt();
size_t n = left > sizeof(buf) ? sizeof(buf) : left;
readFull(fd, buf, n);
writeFull(ofd, buf, n);
left -= n;
}
}

void copyPath(const Path & srcPath, const Path & dstPath, PathFilter & filter, bool recursive)
{
struct stat st;
if (lstat(srcPath.c_str(), &st))
throw SysError(format("getting attributes of path ‘%1%’") % srcPath);

if (S_ISREG(st.st_mode)) {
AutoCloseFD ofd = open(dstPath.c_str(), O_CREAT | O_EXCL | O_WRONLY, 0666);
if (ofd == -1) throw SysError(format("creating file ‘%1%’") % dstPath);
if (st.st_mode & S_IXUSR) {
struct stat ost;
if (fstat(ofd, &ost) == -1)
throw SysError("fstat");
if (fchmod(ofd, ost.st_mode | (S_IXUSR | S_IXGRP | S_IXOTH)) == -1)
throw SysError("fchmod");
}
copyContents(srcPath, ofd, (size_t) st.st_size);
}
else if (S_ISDIR(st.st_mode)) {
if(!recursive) throw Error("regular file expected");
if (mkdir(dstPath.c_str(), 0777) == -1)
throw SysError(format("creating directory ‘%1%’") % dstPath);
/* If we're on a case-insensitive system like Mac OS X, undo
the case hack applied by restorePath(). */
if (useCaseHack) {
std::map<string, string> unhacked;
for (auto & i : readDirectory(srcPath)) {
string name(i.name);
size_t pos = i.name.find(caseHackSuffix);
if (pos != string::npos) {
printMsg(lvlDebug, format("removing case hack suffix from ‘%1%’") % (srcPath + "/" + i.name));
name.erase(pos);
}
if (unhacked.find(name) != unhacked.end())
throw Error(format("file name collision in between ‘%1%’ and ‘%2%’")
% (srcPath + "/" + unhacked[name]) % (srcPath + "/" + i.name));
unhacked[name] = i.name;
}

std::map<Path, int, CaseInsensitiveCompare> names;
for (auto & i : unhacked)
if (filter(srcPath + "/" + i.first)) {
string name = i.first;
if (name.empty() || name.find('/') != string::npos || name.find((char) 0) != string::npos)
throw Error(format("Directory contains invalid file name ‘%1%’") % name);
auto i = names.find(name);
if (i != names.end()) {
printMsg(lvlDebug, format("case collision between ‘%1%’ and ‘%2%’") % i->first % name);
name += caseHackSuffix;
name += int2String(++i->second);
} else
names[name] = 0;
copyPath(srcPath + "/" + name, dstPath + "/" + name, filter, recursive);
}
} else {
for (auto & i : readDirectory(srcPath)) {
if (filter(srcPath + "/" + i.name)) {
string name = i.name;
if (name.empty() || name.find('/') != string::npos || name.find((char) 0) != string::npos)
throw Error(format("Directory contains invalid file name ‘%1%’") % name);
copyPath(srcPath + "/" + name, dstPath + "/" + name, filter, recursive);
}
}
}
}
else if (S_ISLNK(st.st_mode)) {
if(!recursive) throw Error("regular file expected");
Path target = readLink(srcPath);
nix::createSymlink(target, dstPath);
}
else throw Error(format("file ‘%1%’ has an unsupported type") % srcPath);
}

}
4 changes: 3 additions & 1 deletion src/libutil/archive.hh
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,10 @@ struct ParseSink

void parseDump(ParseSink & sink, Source & source);

void restorePath(const Path & path, Source & source);
void restorePath(const Path & path, Source & source, bool recursive = true);

void copyPath(const Path & srcPath, const Path & dstPath,
PathFilter & filter = defaultPathFilter, bool recursive = true);

// FIXME: global variables are bad m'kay.
extern bool useCaseHack;
Expand Down
6 changes: 6 additions & 0 deletions src/libutil/serialise.hh
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,12 @@ inline Sink & operator << (Sink & sink, uint64_t n)
return sink;
}


template< unsigned N >
Sink & operator << (Sink & sink, const char (&s)[N]) {
writeString((const unsigned char*)s, N-1, sink);
return sink;
}
Sink & operator << (Sink & sink, const string & s);
Sink & operator << (Sink & sink, const Strings & s);
Sink & operator << (Sink & sink, const StringSet & s);
Expand Down
Loading