go-gitea · zeripath · May 16, 2021 · May 16, 2021 · May 16, 2021 · May 16, 2021
diff --git a/go.mod b/go.mod
@@ -28,6 +28,8 @@ require (
 	github.com/couchbase/goutils v0.0.0-20210118111533-e33d3ffb5401 // indirect
 	github.com/denisenkom/go-mssqldb v0.10.0
 	github.com/dgrijalva/jwt-go v3.2.0+incompatible
+	github.com/djherbis/buffer v1.1.0
+	github.com/djherbis/nio/v3 v3.0.1
 	github.com/dlclark/regexp2 v1.4.0 // indirect
 	github.com/dustin/go-humanize v1.0.0
 	github.com/editorconfig/editorconfig-core-go/v2 v2.4.2

diff --git a/go.sum b/go.sum
@@ -258,6 +258,10 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZm
 github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
 github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
 github.com/dgryski/go-sip13 v0.0.0-20181026042036-e10d5fee7954/go.mod h1:vAd38F8PWV+bWy6jNmig1y/TA+kYO4g3RSRF0IAv0no=
+github.com/djherbis/buffer v1.1.0 h1:uGQ+DZDAMlfC2z3khbBtLcAHC0wyoNrX9lpOml3g3fg=
+github.com/djherbis/buffer v1.1.0/go.mod h1:VwN8VdFkMY0DCALdY8o00d3IZ6Amz/UNVMWcSaJT44o=
+github.com/djherbis/nio/v3 v3.0.1 h1:6wxhnuppteMa6RHA4L81Dq7ThkZH8SwnDzXDYy95vB4=
+github.com/djherbis/nio/v3 v3.0.1/go.mod h1:Ng4h80pbZFMla1yKzm61cF0tqqilXZYrogmWgZxOcmg=
 github.com/dlclark/regexp2 v1.1.6/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
 github.com/dlclark/regexp2 v1.2.0/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc=
 github.com/dlclark/regexp2 v1.4.0 h1:F1rxgk7p4uKjwIQxBs9oAXe5CqrXlCduYEJvrF4u93E=

diff --git a/modules/git/batch_reader.go b/modules/git/batch_reader.go
@@ -11,6 +11,9 @@ import (
 	"math"
 	"strconv"
 	"strings"
+
+	"github.com/djherbis/buffer"
+	"github.com/djherbis/nio/v3"
 )
 
 // WriteCloserError wraps an io.WriteCloser with an additional CloseWithError function
@@ -21,8 +24,9 @@ type WriteCloserError interface {
 
 // CatFileBatchCheck opens git cat-file --batch-check in the provided repo and returns a stdin pipe, a stdout reader and cancel function
 func CatFileBatchCheck(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
+
 	batchStdinReader, batchStdinWriter := io.Pipe()
-	batchStdoutReader, batchStdoutWriter := io.Pipe()
+	batchStdoutReader, batchStdoutWriter := nio.Pipe(buffer.New(4 * 1024))
 	cancel := func() {
 		_ = batchStdinReader.Close()
 		_ = batchStdinWriter.Close()
@@ -42,7 +46,7 @@ func CatFileBatchCheck(repoPath string) (WriteCloserError, *bufio.Reader, func()
 		}
 	}()
 
-	// For simplicities sake we'll us a buffered reader to read from the cat-file --batch
+	// For simplicities sake we'll use a buffered reader to read from the cat-file --batch-check
 	batchReader := bufio.NewReader(batchStdoutReader)
 
 	return batchStdinWriter, batchReader, cancel
@@ -53,7 +57,7 @@ func CatFileBatch(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
 	// We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
 	// so let's create a batch stdin and stdout
 	batchStdinReader, batchStdinWriter := io.Pipe()
-	batchStdoutReader, batchStdoutWriter := io.Pipe()
+	batchStdoutReader, batchStdoutWriter := nio.Pipe(buffer.New(32 * 1024))
 	cancel := func() {
 		_ = batchStdinReader.Close()
 		_ = batchStdinWriter.Close()
@@ -74,7 +78,7 @@ func CatFileBatch(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
 	}()
 
 	// For simplicities sake we'll us a buffered reader to read from the cat-file --batch
-	batchReader := bufio.NewReader(batchStdoutReader)
+	batchReader := bufio.NewReaderSize(batchStdoutReader, 32*1024)
 
 	return batchStdinWriter, batchReader, cancel
 }
@@ -84,22 +88,34 @@ func CatFileBatch(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
 // <sha> SP <type> SP <size> LF
 // sha is a 40byte not 20byte here
 func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err error) {
-	sha, err = rd.ReadBytes(' ')
+	typ, err = rd.ReadString('\n')
 	if err != nil {
 		return
 	}
-	sha = sha[:len(sha)-1]
+	if len(typ) == 1 {
+		// Somewhere there is a missing discard
+		log("WARNING: ReadBatchLine read empty string - there is a missing Discard")
+		typ, err = rd.ReadString('\n')
+		if err != nil {
+			return
+		}
+	}
 
-	typ, err = rd.ReadString('\n')
-	if err != nil {
+	idx := strings.IndexByte(typ, ' ')
+	if idx < 0 {
+		log("missing space typ: %s", typ)
+		err = ErrNotExist{ID: string(sha)}
 		return
 	}
+	sha = []byte(typ[:idx])
+	typ = typ[idx+1:]
 
-	idx := strings.Index(typ, " ")
+	idx = strings.IndexByte(typ, ' ')
 	if idx < 0 {
 		err = ErrNotExist{ID: string(sha)}
 		return
 	}
+
 	sizeStr := typ[idx+1 : len(typ)-1]
 	typ = typ[:idx]
 
@@ -130,7 +146,7 @@ headerLoop:
 	}
 
 	// Discard the rest of the tag
-	discard := size - n
+	discard := size - n + 1
 	for discard > math.MaxInt32 {
 		_, err := rd.Discard(math.MaxInt32)
 		if err != nil {
@@ -211,14 +227,20 @@ func To40ByteSHA(sha, out []byte) []byte {
 func ParseTreeLineSkipMode(rd *bufio.Reader, fnameBuf, shaBuf []byte) (fname, sha []byte, n int, err error) {
 	var readBytes []byte
 	// Skip the Mode
-	readBytes, err = rd.ReadSlice(' ') // NB: DOES NOT ALLOCATE SIMPLY RETURNS SLICE WITHIN READER BUFFER
+	readBytes, err = rd.ReadSlice('\x00') // NB: DOES NOT ALLOCATE SIMPLY RETURNS SLICE WITHIN READER BUFFER
 	if err != nil {
 		return
 	}
-	n += len(readBytes)
+	idx := bytes.IndexByte(readBytes, ' ')
+	if idx < 0 {
+		log("missing space in readBytes: %s", readBytes)
+		err = &ErrNotExist{}
+		return
+	}
+	n += idx + 1
+	readBytes = readBytes[idx+1:]
 
 	// Deal with the fname
-	readBytes, err = rd.ReadSlice('\x00')
 	copy(fnameBuf, readBytes)
 	if len(fnameBuf) > len(readBytes) {
 		fnameBuf = fnameBuf[:len(readBytes)] // cut the buf the correct size
@@ -237,7 +259,7 @@ func ParseTreeLineSkipMode(rd *bufio.Reader, fnameBuf, shaBuf []byte) (fname, sh
 	fname = fnameBuf                      // set the returnable fname to the slice
 
 	// Now deal with the 20-byte SHA
-	idx := 0
+	idx = 0
 	for idx < 20 {
 		read := 0
 		read, err = rd.Read(shaBuf[idx:20])
@@ -262,23 +284,102 @@ func ParseTreeLineSkipMode(rd *bufio.Reader, fnameBuf, shaBuf []byte) (fname, sh
 func ParseTreeLine(rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) {
 	var readBytes []byte
 
-	// Read the Mode
-	readBytes, err = rd.ReadSlice(' ')
+	// Read the Mode & fname
+	readBytes, err = rd.ReadSlice('\x00')
 	if err != nil {
 		return
 	}
-	n += len(readBytes)
-	copy(modeBuf, readBytes)
-	if len(modeBuf) > len(readBytes) {
-		modeBuf = modeBuf[:len(readBytes)]
-	} else {
-		modeBuf = append(modeBuf, readBytes[len(modeBuf):]...)
+	idx := bytes.IndexByte(readBytes, ' ')
+	if idx < 0 {
+		log("missing space in readBytes: %s", readBytes)
+
+		err = &ErrNotExist{}
+		return
+	}
 
+	n += idx + 1
+	copy(modeBuf, readBytes[:idx])
+	if len(modeBuf) >= idx {
+		modeBuf = modeBuf[:idx]
+	} else {
+		modeBuf = append(modeBuf, readBytes[len(modeBuf):idx]...)
 	}
-	mode = modeBuf[:len(modeBuf)-1] // Drop the SP
+	mode = modeBuf
+
+	readBytes = readBytes[idx+1:]
 
 	// Deal with the fname
+	copy(fnameBuf, readBytes)
+	if len(fnameBuf) > len(readBytes) {
+		fnameBuf = fnameBuf[:len(readBytes)]
+	} else {
+		fnameBuf = append(fnameBuf, readBytes[len(fnameBuf):]...)
+	}
+	for err == bufio.ErrBufferFull {
+		readBytes, err = rd.ReadSlice('\x00')
+		fnameBuf = append(fnameBuf, readBytes...)
+	}
+	n += len(fnameBuf)
+	if err != nil {
+		return
+	}
+	fnameBuf = fnameBuf[:len(fnameBuf)-1]
+	fname = fnameBuf
+
+	// Deal with the 20-byte SHA
+	idx = 0
+	for idx < 20 {
+		read := 0
+		read, err = rd.Read(shaBuf[idx:20])
+		n += read
+		if err != nil {
+			return
+		}
+		idx += read
+	}
+	sha = shaBuf
+	return
+}
+
+// ParseTreeLineTree reads a tree entry from a tree in a cat-file --batch stream
+//
+// This carefully avoids allocations - except where fnameBuf is too small.
+// It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations
+//
+// Each line is composed of:
+// <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <20-byte SHA>
+//
+// We don't attempt to convert the 20-byte SHA to 40-byte SHA to save a lot of time
+func ParseTreeLineTree(rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (isTree bool, fname, sha []byte, n int, err error) {
+	var readBytes []byte
+
+	// Read the Mode & fname
 	readBytes, err = rd.ReadSlice('\x00')
+	if err != nil {
+		return
+	}
+	if len(readBytes) < 6 {
+		log("missing space in readBytes: %v", readBytes)
+		err = &ErrNotExist{}
+		return
+	}
+	if !bytes.Equal(readBytes[:6], []byte("40000 ")) {
+		n += len(readBytes)
+		for err == bufio.ErrBufferFull {
+			readBytes, err = rd.ReadSlice('\x00')
+			n += len(readBytes)
+		}
+		d := 0
+		d, err = rd.Discard(20)
+		n += d
+		return
+	}
+	isTree = true
+
+	n += 6
+	readBytes = readBytes[6:]
+
+	// Deal with the fname
 	copy(fnameBuf, readBytes)
 	if len(fnameBuf) > len(readBytes) {
 		fnameBuf = fnameBuf[:len(readBytes)]