Skip to content

Commit

Permalink
Fix sparse path matching
Browse files Browse the repository at this point in the history
  • Loading branch information
Christoph Bartschat committed Jan 28, 2024
1 parent 7afc22a commit a781b9b
Show file tree
Hide file tree
Showing 2 changed files with 302 additions and 33 deletions.
162 changes: 129 additions & 33 deletions src/libgit2/sparse.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,26 +13,124 @@
#include "index.h"
#include "ignore.h"

static bool sparse_lookup_in_rules(
#define HAS_FLAG(entity, flag) (((entity->flags & flag) != 0))

static bool pattern_is_cone(const git_attr_fnmatch *match) {
size_t i;

if (match->length == 0) {
return false;
}

if (match->length == 1 && match->pattern[0] == '*') {
// NOTE(christoph): "/*" and "!/*/" both parse to "*", either to:
// positive, wildcard, non-directory, or
// negative, wildcard, directory.
bool is_dir = HAS_FLAG(match, GIT_ATTR_FNMATCH_DIRECTORY);
bool is_negative = HAS_FLAG(match, GIT_ATTR_FNMATCH_NEGATIVE);
return is_dir == is_negative;
}

if (!HAS_FLAG(match, GIT_ATTR_FNMATCH_DIRECTORY)) {
return false;
};

if (HAS_FLAG(match, GIT_ATTR_FNMATCH_HASWILD)) {
for (i = 0; i < match->length - 1; i++) {
if (match->pattern[i] == '*') {
// NOTE(christoph): The ony acceptable wildcard is at the end.
return false;
}
}
}

return true;
}

static bool is_top_level_file(git_attr_path *path) {
return !path->is_dir && (strchr(path->path, '/') == NULL);
}

static bool pattern_matches_path(git_attr_fnmatch *match, git_attr_path *path, size_t path_length) {
// This is the number of characters that must match exactly.
// Ex: "A/B" -> 3,
// "A/B/C/*" -> 5
size_t exact_match_length = match->length;

size_t i;

// If we have a pattern like "A/B/C/*", we have to match "A/B/C" exactly, then have a slash,
// then have at least one more slash (or be a directory).
bool expected_extra_nesting = false;

if (HAS_FLAG(match, GIT_ATTR_FNMATCH_HASWILD)) {
expected_extra_nesting = true;
exact_match_length = match->length - 2; // Cut off the trailing "/*"
}

if (path_length < exact_match_length) {
return false;
}

if (expected_extra_nesting) {
if (path_length < exact_match_length + 2) {
return false;
}

if (!path->is_dir) {
bool found_slash = false;
for (i = exact_match_length + 1; i < path_length; i++) {
if (path->path[i] == '/') {
found_slash = true;
break;
}
}

if (!found_slash) {
return false;
}
}
}

if (path_length > exact_match_length && path->path[exact_match_length] != '/') {
return false;
}

for (i = 0; i < exact_match_length; i++) {
if (path->path[i] != match->pattern[i]) {
return false;
}
}

return true;
}

static int sparse_lookup_in_rules(
int *checkout,
git_attr_file *file,
git_attr_path *path)
{
size_t j;
git_attr_fnmatch *match;

int path_length = strlen(path->path);
if (is_top_level_file(path) ) {
*checkout =GIT_SPARSE_CHECKOUT;
return 0;
}


git_vector_rforeach(&file->rules, j, match) {
if (match->flags & GIT_ATTR_FNMATCH_DIRECTORY &&
path->is_dir == GIT_DIR_FLAG_FALSE)
continue;
if (git_attr_fnmatch__match(match, path)) {
*checkout = ((match->flags & GIT_ATTR_FNMATCH_NEGATIVE) == 0) ?
GIT_SPARSE_CHECKOUT : GIT_SPARSE_NOCHECKOUT;
return true;
if (pattern_matches_path(match, path, path_length)) {
*checkout = HAS_FLAG(match, GIT_ATTR_FNMATCH_NEGATIVE)
? GIT_SPARSE_NOCHECKOUT
: GIT_SPARSE_CHECKOUT;
return 0;
}
}

return false;

*checkout = GIT_SPARSE_NOCHECKOUT;
return 0;
}

static int parse_sparse_file(
Expand All @@ -41,13 +139,27 @@ static int parse_sparse_file(
const char *data,
bool allow_macros)
{
/* Todo: Support for cone mode */
return parse_ignore_file(
int error = parse_ignore_file(
repo,
attrs,
data,
NULL,
allow_macros);

if (error != 0 ) {
return error;
}

size_t j;
git_attr_fnmatch *match;
git_vector_rforeach(&attrs->rules, j, match) {
if (!pattern_is_cone(match)) {
git_error_set(GIT_ERROR_INVALID, "sparse-checkout patterns must be in cone format");
return -1;
}
}

return 0;
}

int git_sparse_attr_file__init_(
Expand Down Expand Up @@ -155,29 +267,13 @@ int git_sparse__lookup(

workdir = git_repository_workdir(sparse->repo);
if ((error = git_attr_path__init(&path, pathname, workdir, dir_flag)))
return -1;

/* No match -> no checkout */
*status = GIT_SPARSE_NOCHECKOUT;

while (1) {
if (sparse_lookup_in_rules(status, sparse->sparse, &path))
goto cleanup;

/* move up one directory */
if (path.basename == path.path)
break;
path.basename[-1] = '\0';
while (path.basename > path.path && *path.basename != '/')
path.basename--;
if (path.basename > path.path)
path.basename++;
path.is_dir = 1;
}
return error;

error = sparse_lookup_in_rules(status, sparse->sparse, &path);

cleanup:
git_attr_path__free(&path);
return 0;

return error;
}

void git_sparse__free(git_sparse *sparse)
Expand Down
173 changes: 173 additions & 0 deletions tests/libgit2/sparse/paths.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,173 @@
#include "clar_libgit2.h"
#include "futils.h"
#include "git2/attr.h"
#include "sparse.h"
#include "status/status_helpers.h"

static git_repository *g_repo = NULL;

void test_sparse_paths__initialize(void)
{
}

void test_sparse_paths__cleanup(void)
{
cl_git_sandbox_cleanup();
}

static void assert_checkout_(
bool expected, const char *filepath,
const char *file, const char *func, int line)
{
int checkout = 0;
cl_git_expect(
git_sparse_check_path(&checkout, g_repo, filepath), 0, file, func, line);
clar__assert(
(expected != 0) == (checkout != 0),
file, func, line, expected ? "should be included" :"should be excluded", filepath, 0);
}

#define assert_checkout(expected, filepath) \
assert_checkout_(expected, filepath, __FILE__, __func__, __LINE__)
#define assert_is_checkout(filepath) \
assert_checkout_(true, filepath, __FILE__, __func__, __LINE__)
#define refute_is_checkout(filepath) \
assert_checkout_(false, filepath, __FILE__, __func__, __LINE__)

void test_sparse_paths__check_path(void)
{
git_sparse_checkout_init_options scopts = GIT_SPARSE_CHECKOUT_INIT_OPTIONS_INIT;
g_repo = cl_git_sandbox_init("sparse");

printf("test_sparse_paths__check_path\n");

cl_git_pass(git_sparse_checkout_init(g_repo, &scopts));
{
char *pattern_strings[] = {
"/*",
"!/*/",
"/A/",
"!/A/*/",
"/A/B/",
"!/A/B/*/",
"/A/B/C/",
"!/A/B/C/*/",
"/A/B/D/",
};
git_strarray patterns = { pattern_strings, ARRAY_SIZE(pattern_strings) };
cl_git_pass(git_sparse_checkout_add(g_repo, &patterns));
}

char *matches[] = {
// Folder prefixes match
"A/",
"A/B/",
"A/B/C/",
"A/B/D/",
"A/B/D/E/",
"A/B/D/E/F/",
// Direct children
"A/_",
"A/B/_",
"A/B/C/_",
"A/B/D/_",
"A/B/D/E/_",
"A/B/D/E/F/_",
};

char * non_matches[] = {
"M/",
"A/N/",
"A/B/O/",
"A/B/CP/",
"A/B/C/P/",
"A/B/C/P/Q/",
"M/_",
"A/N/_",
"A/B/O/_",
"A/B/CP/_",
"A/B/C/P/_",
"A/B/C/P/Q/_",
};

size_t j;
for ( j = 0; j < ARRAY_SIZE(matches); j++) {
assert_is_checkout(matches[j]);
}

for ( j = 0; j < ARRAY_SIZE(non_matches); j++) {
refute_is_checkout(non_matches[j]);
}
}

void test_sparse_paths__check_toplevel(void)
{
git_sparse_checkout_init_options scopts = GIT_SPARSE_CHECKOUT_INIT_OPTIONS_INIT;
g_repo = cl_git_sandbox_init("sparse");

cl_git_pass(git_sparse_checkout_init(g_repo, &scopts));
{
char *pattern_strings[] = {};
git_strarray patterns = { pattern_strings, ARRAY_SIZE(pattern_strings) };
cl_git_pass(git_sparse_checkout_add(g_repo, &patterns));
}

char *matches[] = {
"_", // Even with no include patterns, toplevel files are included.
};

char * non_matches[] = {
"A/",
"A/_",
};

size_t j;
for ( j = 0; j < ARRAY_SIZE(matches); j++) {
assert_is_checkout(matches[j]);
}

for ( j = 0; j < ARRAY_SIZE(non_matches); j++) {
refute_is_checkout(non_matches[j]);
}
}

void test_sparse_paths__validate_cone(void)
{
size_t i;

git_sparse_checkout_init_options scopts = GIT_SPARSE_CHECKOUT_INIT_OPTIONS_INIT;
g_repo = cl_git_sandbox_init("sparse");

cl_git_pass(git_sparse_checkout_init(g_repo, &scopts));

char *good_patterns[] = {
"/*",
"!/*/",
"!/A/B/C/*/",
"/A/B/C/",
};

char *bad_patterns[] = {
"/*/",
"!/*",
"!/A/B/C/*",
"/A/B/C/*",
"/A/*/C/",
"/A/B*/C/",
"/A/B/C",
"A/B/C",
"!A/B/C",
};

for (i = 0; i < ARRAY_SIZE(good_patterns); i++) {
git_strarray patterns = { &good_patterns[i], 1 };
int error = git_sparse_checkout_set(g_repo, &patterns);
clar__assert(error == 0, __FILE__, __func__, __LINE__, "Expected success on:", good_patterns[i], 0);
}

for (i = 0; i < ARRAY_SIZE(bad_patterns); i++) {
git_strarray patterns = { &bad_patterns[i], 1 };
int error = git_sparse_checkout_set(g_repo, &patterns);
clar__assert(error != 0, __FILE__, __func__, __LINE__, "Expected rejection on:", bad_patterns[i], 0);
}
}

0 comments on commit a781b9b

Please sign in to comment.