Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor parseTreeEntries, speed up tree list #21368

Merged
merged 3 commits into from
Oct 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 40 additions & 38 deletions modules/git/parse_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,70 +22,72 @@ func ParseTreeEntries(data []byte) ([]*TreeEntry, error) {
return parseTreeEntries(data, nil)
}

var sepSpace = []byte{' '}

func parseTreeEntries(data []byte, ptree *Tree) ([]*TreeEntry, error) {
entries := make([]*TreeEntry, 0, 10)
var err error
entries := make([]*TreeEntry, 0, bytes.Count(data, []byte{'\n'})+1)
for pos := 0; pos < len(data); {
// expect line to be of the form "<mode> <type> <sha> <space-padded-size>\t<filename>"
// expect line to be of the form:
// <mode> <type> <sha> <space-padded-size>\t<filename>
// <mode> <type> <sha>\t<filename>
posEnd := bytes.IndexByte(data[pos:], '\n')
if posEnd == -1 {
posEnd = len(data)
} else {
posEnd += pos
}
line := data[pos:posEnd]
posTab := bytes.IndexByte(line, '\t')
if posTab == -1 {
return nil, fmt.Errorf("invalid ls-tree output (no tab): %q", line)
}

entry := new(TreeEntry)
entry.ptree = ptree
if pos+6 > len(data) {
return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))

entryAttrs := line[:posTab]
entryName := line[posTab+1:]

entryMode, entryAttrs, _ := bytes.Cut(entryAttrs, sepSpace)
_ /* entryType */, entryAttrs, _ = bytes.Cut(entryAttrs, sepSpace) // the type is not used, the mode is enough to determine the type
entryObjectID, entryAttrs, _ := bytes.Cut(entryAttrs, sepSpace)
if len(entryAttrs) > 0 {
entrySize := entryAttrs // the last field is the space-padded-size
entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(entrySize)), 10, 64)
entry.sized = true
}
switch string(data[pos : pos+6]) {

switch string(entryMode) {
case "100644":
entry.entryMode = EntryModeBlob
pos += 12 // skip over "100644 blob "
case "100755":
entry.entryMode = EntryModeExec
pos += 12 // skip over "100755 blob "
case "120000":
entry.entryMode = EntryModeSymlink
pos += 12 // skip over "120000 blob "
case "160000":
entry.entryMode = EntryModeCommit
pos += 14 // skip over "160000 object "
case "040000", "040755": // git uses 040000 for tree object, but some users may get 040755 for unknown reasons
entry.entryMode = EntryModeTree
pos += 12 // skip over "040000 tree "
default:
return nil, fmt.Errorf("unknown type: %v", string(data[pos:pos+6]))
return nil, fmt.Errorf("unknown type: %v", string(entryMode))
}

if pos+40 > len(data) {
return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
}
id, err := NewIDFromString(string(data[pos : pos+40]))
entry.ID, err = NewIDFromString(string(entryObjectID))
if err != nil {
return nil, fmt.Errorf("Invalid ls-tree output: %v", err)
}
entry.ID = id
pos += 41 // skip over sha and trailing space

end := pos + bytes.IndexByte(data[pos:], '\t')
if end < pos {
return nil, fmt.Errorf("Invalid ls-tree -l output: %s", string(data))
}
entry.size, _ = strconv.ParseInt(strings.TrimSpace(string(data[pos:end])), 10, 64)
entry.sized = true

pos = end + 1

end = pos + bytes.IndexByte(data[pos:], '\n')
if end < pos {
return nil, fmt.Errorf("Invalid ls-tree output: %s", string(data))
return nil, fmt.Errorf("invalid ls-tree output (invalid object id): %q, err: %w", line, err)
}

// In case entry name is surrounded by double quotes(it happens only in git-shell).
if data[pos] == '"' {
entry.name, err = strconv.Unquote(string(data[pos:end]))
if len(entryName) > 0 && entryName[0] == '"' {
entry.name, err = strconv.Unquote(string(entryName))
if err != nil {
return nil, fmt.Errorf("Invalid ls-tree output: %v", err)
return nil, fmt.Errorf("invalid ls-tree output (invalid name): %q, err: %w", line, err)
}
} else {
entry.name = string(data[pos:end])
entry.name = string(entryName)
}

pos = end + 1
pos = posEnd + 1
entries = append(entries, entry)
}
return entries, nil
Expand Down
48 changes: 42 additions & 6 deletions modules/git/parse_nogogit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ import (
"github.com/stretchr/testify/assert"
)

func TestParseTreeEntries(t *testing.T) {
func TestParseTreeEntriesLong(t *testing.T) {
testCases := []struct {
Input string
Expected []*TreeEntry
Expand Down Expand Up @@ -59,11 +59,47 @@ func TestParseTreeEntries(t *testing.T) {
assert.NoError(t, err)
assert.Len(t, entries, len(testCase.Expected))
for i, entry := range entries {
assert.EqualValues(t, testCase.Expected[i].ID, entry.ID)
assert.EqualValues(t, testCase.Expected[i].name, entry.name)
assert.EqualValues(t, testCase.Expected[i].entryMode, entry.entryMode)
assert.EqualValues(t, testCase.Expected[i].sized, entry.sized)
assert.EqualValues(t, testCase.Expected[i].size, entry.size)
assert.EqualValues(t, testCase.Expected[i], entry)
}
}
}

func TestParseTreeEntriesShort(t *testing.T) {
testCases := []struct {
Input string
Expected []*TreeEntry
}{
{
Input: `100644 blob ea0d83c9081af9500ac9f804101b3fd0a5c293af README.md
040000 tree 84b90550547016f73c5dd3f50dea662389e67b6d assets
`,
Expected: []*TreeEntry{
{
ID: MustIDFromString("ea0d83c9081af9500ac9f804101b3fd0a5c293af"),
name: "README.md",
entryMode: EntryModeBlob,
},
{
ID: MustIDFromString("84b90550547016f73c5dd3f50dea662389e67b6d"),
name: "assets",
entryMode: EntryModeTree,
},
},
},
}
for _, testCase := range testCases {
entries, err := ParseTreeEntries([]byte(testCase.Input))
assert.NoError(t, err)
assert.Len(t, entries, len(testCase.Expected))
for i, entry := range entries {
assert.EqualValues(t, testCase.Expected[i], entry)
}
}
}

func TestParseTreeEntriesInvalid(t *testing.T) {
// there was a panic: "runtime error: slice bounds out of range" when the input was invalid: #20315
entries, err := ParseTreeEntries([]byte("100644 blob ea0d83c9081af9500ac9f804101b3fd0a5c293af"))
assert.Error(t, err)
assert.Len(t, entries, 0)
}
2 changes: 1 addition & 1 deletion modules/git/repo_language_stats_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err

tree := commit.Tree

entries, err := tree.ListEntriesRecursive()
entries, err := tree.ListEntriesRecursiveWithSize()
if err != nil {
return nil, err
}
Expand Down
9 changes: 7 additions & 2 deletions modules/git/tree_gogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ func (t *Tree) ListEntries() (Entries, error) {
return entries, nil
}

// ListEntriesRecursive returns all entries of current tree recursively including all subtrees
func (t *Tree) ListEntriesRecursive() (Entries, error) {
// ListEntriesRecursiveWithSize returns all entries of current tree recursively including all subtrees
func (t *Tree) ListEntriesRecursiveWithSize() (Entries, error) {
if t.gogitTree == nil {
err := t.loadTreeObject()
if err != nil {
Expand Down Expand Up @@ -92,3 +92,8 @@ func (t *Tree) ListEntriesRecursive() (Entries, error) {

return entries, nil
}

// ListEntriesRecursiveFast is the alias of ListEntriesRecursiveWithSize for the gogit version
func (t *Tree) ListEntriesRecursiveFast() (Entries, error) {
return t.ListEntriesRecursiveWithSize()
}
19 changes: 16 additions & 3 deletions modules/git/tree_nogogit.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,16 @@ func (t *Tree) ListEntries() (Entries, error) {
return t.entries, err
}

// ListEntriesRecursive returns all entries of current tree recursively including all subtrees
func (t *Tree) ListEntriesRecursive() (Entries, error) {
// listEntriesRecursive returns all entries of current tree recursively including all subtrees
// extraArgs could be "-l" to get the size, which is slower
func (t *Tree) listEntriesRecursive(extraArgs ...string) (Entries, error) {
if t.entriesRecursiveParsed {
return t.entriesRecursive, nil
}

stdout, _, runErr := NewCommand(t.repo.Ctx, "ls-tree", "-t", "-l", "-r", t.ID.String()).RunStdBytes(&RunOpts{Dir: t.repo.Path})
args := append([]string{"ls-tree", "-t", "-r"}, extraArgs...)
args = append(args, t.ID.String())
stdout, _, runErr := NewCommand(t.repo.Ctx, args...).RunStdBytes(&RunOpts{Dir: t.repo.Path})
if runErr != nil {
return nil, runErr
}
Expand All @@ -118,3 +121,13 @@ func (t *Tree) ListEntriesRecursive() (Entries, error) {

return t.entriesRecursive, err
}

// ListEntriesRecursiveFast returns all entries of current tree recursively including all subtrees, no size
func (t *Tree) ListEntriesRecursiveFast() (Entries, error) {
return t.listEntriesRecursive()
}

// ListEntriesRecursiveWithSize returns all entries of current tree recursively including all subtrees, with size
func (t *Tree) ListEntriesRecursiveWithSize() (Entries, error) {
return t.listEntriesRecursive("--long")
}
4 changes: 2 additions & 2 deletions routers/web/repo/treelist.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@ func TreeList(ctx *context.Context) {
return
}

entries, err := tree.ListEntriesRecursive()
entries, err := tree.ListEntriesRecursiveFast()
if err != nil {
ctx.ServerError("ListEntriesRecursive", err)
ctx.ServerError("ListEntriesRecursiveFast", err)
return
}
entries.CustomSort(base.NaturalSortLess)
Expand Down
2 changes: 1 addition & 1 deletion services/repository/files/tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ func GetTreeBySHA(ctx context.Context, repo *repo_model.Repository, gitRepo *git
tree.URL = repo.APIURL() + "/git/trees/" + url.PathEscape(tree.SHA)
var entries git.Entries
if recursive {
entries, err = gitTree.ListEntriesRecursive()
entries, err = gitTree.ListEntriesRecursiveWithSize()
} else {
entries, err = gitTree.ListEntries()
}
Expand Down