From 4044b96e12fdbb3452f67b9139372fbe90197328 Mon Sep 17 00:00:00 2001 From: Antoine GIRARD Date: Fri, 21 Jun 2019 11:23:10 +0200 Subject: [PATCH 1/9] Force vendor of commitgraph Signed-off-by: Filip Navara --- .../format/commitgraph/commitgraph.go | 35 +++ .../plumbing/format/commitgraph/doc.go | 103 +++++++ .../plumbing/format/commitgraph/encoder.go | 190 +++++++++++++ .../plumbing/format/commitgraph/file.go | 259 ++++++++++++++++++ .../plumbing/format/commitgraph/memory.go | 72 +++++ .../plumbing/object/commitgraph/commitnode.go | 98 +++++++ .../object/commitgraph/commitnode_graph.go | 131 +++++++++ .../object/commitgraph/commitnode_object.go | 90 ++++++ .../commitgraph/commitnode_walker_ctime.go | 105 +++++++ .../plumbing/object/commitgraph/doc.go | 7 + vendor/modules.txt | 4 +- 11 files changed, 1093 insertions(+), 1 deletion(-) create mode 100644 vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/commitgraph.go create mode 100644 vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/doc.go create mode 100644 vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/encoder.go create mode 100644 vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/file.go create mode 100644 vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/memory.go create mode 100644 vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode.go create mode 100644 vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_graph.go create mode 100644 vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_object.go create mode 100644 vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_walker_ctime.go create mode 100644 vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/doc.go diff --git a/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/commitgraph.go b/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/commitgraph.go new file mode 100644 index 000000000000..e43cd8978adb --- /dev/null +++ b/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/commitgraph.go @@ -0,0 +1,35 @@ +package commitgraph + +import ( + "time" + + "gopkg.in/src-d/go-git.v4/plumbing" +) + +// CommitData is a reduced representation of Commit as presented in the commit graph +// file. It is merely useful as an optimization for walking the commit graphs. +type CommitData struct { + // TreeHash is the hash of the root tree of the commit. + TreeHash plumbing.Hash + // ParentIndexes are the indexes of the parent commits of the commit. + ParentIndexes []int + // ParentHashes are the hashes of the parent commits of the commit. + ParentHashes []plumbing.Hash + // Generation number is the pre-computed generation in the commit graph + // or zero if not available + Generation int + // When is the timestamp of the commit. + When time.Time +} + +// Index represents a representation of commit graph that allows indexed +// access to the nodes using commit object hash +type Index interface { + // GetIndexByHash gets the index in the commit graph from commit hash, if available + GetIndexByHash(h plumbing.Hash) (int, error) + // GetNodeByIndex gets the commit node from the commit graph using index + // obtained from child node, if available + GetCommitDataByIndex(i int) (*CommitData, error) + // Hashes returns all the hashes that are available in the index + Hashes() []plumbing.Hash +} diff --git a/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/doc.go b/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/doc.go new file mode 100644 index 000000000000..41cd8b1e3153 --- /dev/null +++ b/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/doc.go @@ -0,0 +1,103 @@ +// Package commitgraph implements encoding and decoding of commit-graph files. +// +// Git commit graph format +// ======================= +// +// The Git commit graph stores a list of commit OIDs and some associated +// metadata, including: +// +// - The generation number of the commit. Commits with no parents have +// generation number 1; commits with parents have generation number +// one more than the maximum generation number of its parents. We +// reserve zero as special, and can be used to mark a generation +// number invalid or as "not computed". +// +// - The root tree OID. +// +// - The commit date. +// +// - The parents of the commit, stored using positional references within +// the graph file. +// +// These positional references are stored as unsigned 32-bit integers +// corresponding to the array position within the list of commit OIDs. Due +// to some special constants we use to track parents, we can store at most +// (1 << 30) + (1 << 29) + (1 << 28) - 1 (around 1.8 billion) commits. +// +// == Commit graph files have the following format: +// +// In order to allow extensions that add extra data to the graph, we organize +// the body into "chunks" and provide a binary lookup table at the beginning +// of the body. The header includes certain values, such as number of chunks +// and hash type. +// +// All 4-byte numbers are in network order. +// +// HEADER: +// +// 4-byte signature: +// The signature is: {'C', 'G', 'P', 'H'} +// +// 1-byte version number: +// Currently, the only valid version is 1. +// +// 1-byte Hash Version (1 = SHA-1) +// We infer the hash length (H) from this value. +// +// 1-byte number (C) of "chunks" +// +// 1-byte (reserved for later use) +// Current clients should ignore this value. +// +// CHUNK LOOKUP: +// +// (C + 1) * 12 bytes listing the table of contents for the chunks: +// First 4 bytes describe the chunk id. Value 0 is a terminating label. +// Other 8 bytes provide the byte-offset in current file for chunk to +// start. (Chunks are ordered contiguously in the file, so you can infer +// the length using the next chunk position if necessary.) Each chunk +// ID appears at most once. +// +// The remaining data in the body is described one chunk at a time, and +// these chunks may be given in any order. Chunks are required unless +// otherwise specified. +// +// CHUNK DATA: +// +// OID Fanout (ID: {'O', 'I', 'D', 'F'}) (256 * 4 bytes) +// The ith entry, F[i], stores the number of OIDs with first +// byte at most i. Thus F[255] stores the total +// number of commits (N). +// +// OID Lookup (ID: {'O', 'I', 'D', 'L'}) (N * H bytes) +// The OIDs for all commits in the graph, sorted in ascending order. +// +// Commit Data (ID: {'C', 'D', 'A', 'T' }) (N * (H + 16) bytes) +// * The first H bytes are for the OID of the root tree. +// * The next 8 bytes are for the positions of the first two parents +// of the ith commit. Stores value 0x7000000 if no parent in that +// position. If there are more than two parents, the second value +// has its most-significant bit on and the other bits store an array +// position into the Extra Edge List chunk. +// * The next 8 bytes store the generation number of the commit and +// the commit time in seconds since EPOCH. The generation number +// uses the higher 30 bits of the first 4 bytes, while the commit +// time uses the 32 bits of the second 4 bytes, along with the lowest +// 2 bits of the lowest byte, storing the 33rd and 34th bit of the +// commit time. +// +// Extra Edge List (ID: {'E', 'D', 'G', 'E'}) [Optional] +// This list of 4-byte values store the second through nth parents for +// all octopus merges. The second parent value in the commit data stores +// an array position within this list along with the most-significant bit +// on. Starting at that array position, iterate through this list of commit +// positions for the parents until reaching a value with the most-significant +// bit on. The other bits correspond to the position of the last parent. +// +// TRAILER: +// +// H-byte HASH-checksum of all of the above. +// +// Source: +// https://raw.githubusercontent.com/git/git/master/Documentation/technical/commit-graph-format.txt +package commitgraph diff --git a/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/encoder.go b/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/encoder.go new file mode 100644 index 000000000000..a06871cb7ce4 --- /dev/null +++ b/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/encoder.go @@ -0,0 +1,190 @@ +package commitgraph + +import ( + "crypto/sha1" + "hash" + "io" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/utils/binary" +) + +// Encoder writes MemoryIndex structs to an output stream. +type Encoder struct { + io.Writer + hash hash.Hash +} + +// NewEncoder returns a new stream encoder that writes to w. +func NewEncoder(w io.Writer) *Encoder { + h := sha1.New() + mw := io.MultiWriter(w, h) + return &Encoder{mw, h} +} + +// Encode writes an index into the commit-graph file +func (e *Encoder) Encode(idx Index) error { + var err error + + // Get all the hashes in the input index + hashes := idx.Hashes() + + // Sort the inout and prepare helper structures we'll need for encoding + hashToIndex, fanout, extraEdgesCount := e.prepare(idx, hashes) + + chunkSignatures := [][]byte{oidFanoutSignature, oidLookupSignature, commitDataSignature} + chunkSizes := []uint64{4 * 256, uint64(len(hashes)) * 20, uint64(len(hashes)) * 36} + if extraEdgesCount > 0 { + chunkSignatures = append(chunkSignatures, extraEdgeListSignature) + chunkSizes = append(chunkSizes, uint64(extraEdgesCount)*4) + } + + if err = e.encodeFileHeader(len(chunkSignatures)); err != nil { + return err + } + if err = e.encodeChunkHeaders(chunkSignatures, chunkSizes); err != nil { + return err + } + if err = e.encodeFanout(fanout); err != nil { + return err + } + if err = e.encodeOidLookup(hashes); err != nil { + return err + } + if extraEdges, err := e.encodeCommitData(hashes, hashToIndex, idx); err == nil { + if err = e.encodeExtraEdges(extraEdges); err != nil { + return err + } + } + if err != nil { + return err + } + return e.encodeChecksum() +} + +func (e *Encoder) prepare(idx Index, hashes []plumbing.Hash) (hashToIndex map[plumbing.Hash]uint32, fanout []uint32, extraEdgesCount uint32) { + // Sort the hashes and build our index + plumbing.HashesSort(hashes) + hashToIndex = make(map[plumbing.Hash]uint32) + fanout = make([]uint32, 256) + for i, hash := range hashes { + hashToIndex[hash] = uint32(i) + fanout[hash[0]]++ + } + + // Convert the fanout to cumulative values + for i := 1; i <= 0xff; i++ { + fanout[i] += fanout[i-1] + } + + // Find out if we will need extra edge table + for i := 0; i < len(hashes); i++ { + v, _ := idx.GetCommitDataByIndex(i) + if len(v.ParentHashes) > 2 { + extraEdgesCount += uint32(len(v.ParentHashes) - 1) + break + } + } + + return +} + +func (e *Encoder) encodeFileHeader(chunkCount int) (err error) { + if _, err = e.Write(commitFileSignature); err == nil { + _, err = e.Write([]byte{1, 1, byte(chunkCount), 0}) + } + return +} + +func (e *Encoder) encodeChunkHeaders(chunkSignatures [][]byte, chunkSizes []uint64) (err error) { + // 8 bytes of file header, 12 bytes for each chunk header and 12 byte for terminator + offset := uint64(8 + len(chunkSignatures)*12 + 12) + for i, signature := range chunkSignatures { + if _, err = e.Write(signature); err == nil { + err = binary.WriteUint64(e, offset) + } + if err != nil { + return + } + offset += chunkSizes[i] + } + if _, err = e.Write(lastSignature); err == nil { + err = binary.WriteUint64(e, offset) + } + return +} + +func (e *Encoder) encodeFanout(fanout []uint32) (err error) { + for i := 0; i <= 0xff; i++ { + if err = binary.WriteUint32(e, fanout[i]); err != nil { + return + } + } + return +} + +func (e *Encoder) encodeOidLookup(hashes []plumbing.Hash) (err error) { + for _, hash := range hashes { + if _, err = e.Write(hash[:]); err != nil { + return err + } + } + return +} + +func (e *Encoder) encodeCommitData(hashes []plumbing.Hash, hashToIndex map[plumbing.Hash]uint32, idx Index) (extraEdges []uint32, err error) { + for _, hash := range hashes { + origIndex, _ := idx.GetIndexByHash(hash) + commitData, _ := idx.GetCommitDataByIndex(origIndex) + if _, err = e.Write(commitData.TreeHash[:]); err != nil { + return + } + + var parent1, parent2 uint32 + if len(commitData.ParentHashes) == 0 { + parent1 = parentNone + parent2 = parentNone + } else if len(commitData.ParentHashes) == 1 { + parent1 = hashToIndex[commitData.ParentHashes[0]] + parent2 = parentNone + } else if len(commitData.ParentHashes) == 2 { + parent1 = hashToIndex[commitData.ParentHashes[0]] + parent2 = hashToIndex[commitData.ParentHashes[1]] + } else if len(commitData.ParentHashes) > 2 { + parent1 = hashToIndex[commitData.ParentHashes[0]] + parent2 = uint32(len(extraEdges)) | parentOctopusUsed + for _, parentHash := range commitData.ParentHashes[1:] { + extraEdges = append(extraEdges, hashToIndex[parentHash]) + } + extraEdges[len(extraEdges)-1] |= parentLast + } + + if err = binary.WriteUint32(e, parent1); err == nil { + err = binary.WriteUint32(e, parent2) + } + if err != nil { + return + } + + unixTime := uint64(commitData.When.Unix()) + unixTime |= uint64(commitData.Generation) << 34 + if err = binary.WriteUint64(e, unixTime); err != nil { + return + } + } + return +} + +func (e *Encoder) encodeExtraEdges(extraEdges []uint32) (err error) { + for _, parent := range extraEdges { + if err = binary.WriteUint32(e, parent); err != nil { + return + } + } + return +} + +func (e *Encoder) encodeChecksum() error { + _, err := e.Write(e.hash.Sum(nil)[:20]) + return err +} diff --git a/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/file.go b/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/file.go new file mode 100644 index 000000000000..175d27933393 --- /dev/null +++ b/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/file.go @@ -0,0 +1,259 @@ +package commitgraph + +import ( + "bytes" + encbin "encoding/binary" + "errors" + "io" + "time" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/utils/binary" +) + +var ( + // ErrUnsupportedVersion is returned by OpenFileIndex when the commit graph + // file version is not supported. + ErrUnsupportedVersion = errors.New("Unsupported version") + // ErrUnsupportedHash is returned by OpenFileIndex when the commit graph + // hash function is not supported. Currently only SHA-1 is defined and + // supported + ErrUnsupportedHash = errors.New("Unsupported hash algorithm") + // ErrMalformedCommitGraphFile is returned by OpenFileIndex when the commit + // graph file is corrupted. + ErrMalformedCommitGraphFile = errors.New("Malformed commit graph file") + + commitFileSignature = []byte{'C', 'G', 'P', 'H'} + oidFanoutSignature = []byte{'O', 'I', 'D', 'F'} + oidLookupSignature = []byte{'O', 'I', 'D', 'L'} + commitDataSignature = []byte{'C', 'D', 'A', 'T'} + extraEdgeListSignature = []byte{'E', 'D', 'G', 'E'} + lastSignature = []byte{0, 0, 0, 0} + + parentNone = uint32(0x70000000) + parentOctopusUsed = uint32(0x80000000) + parentOctopusMask = uint32(0x7fffffff) + parentLast = uint32(0x80000000) +) + +type fileIndex struct { + reader io.ReaderAt + fanout [256]int + oidFanoutOffset int64 + oidLookupOffset int64 + commitDataOffset int64 + extraEdgeListOffset int64 +} + +// OpenFileIndex opens a serialized commit graph file in the format described at +// https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt +func OpenFileIndex(reader io.ReaderAt) (Index, error) { + fi := &fileIndex{reader: reader} + + if err := fi.verifyFileHeader(); err != nil { + return nil, err + } + if err := fi.readChunkHeaders(); err != nil { + return nil, err + } + if err := fi.readFanout(); err != nil { + return nil, err + } + + return fi, nil +} + +func (fi *fileIndex) verifyFileHeader() error { + // Verify file signature + var signature = make([]byte, 4) + if _, err := fi.reader.ReadAt(signature, 0); err != nil { + return err + } + if !bytes.Equal(signature, commitFileSignature) { + return ErrMalformedCommitGraphFile + } + + // Read and verify the file header + var header = make([]byte, 4) + if _, err := fi.reader.ReadAt(header, 4); err != nil { + return err + } + if header[0] != 1 { + return ErrUnsupportedVersion + } + if header[1] != 1 { + return ErrUnsupportedHash + } + + return nil +} + +func (fi *fileIndex) readChunkHeaders() error { + var chunkID = make([]byte, 4) + for i := 0; ; i++ { + chunkHeader := io.NewSectionReader(fi.reader, 8+(int64(i)*12), 12) + if _, err := io.ReadAtLeast(chunkHeader, chunkID, 4); err != nil { + return err + } + chunkOffset, err := binary.ReadUint64(chunkHeader) + if err != nil { + return err + } + + if bytes.Equal(chunkID, oidFanoutSignature) { + fi.oidFanoutOffset = int64(chunkOffset) + } else if bytes.Equal(chunkID, oidLookupSignature) { + fi.oidLookupOffset = int64(chunkOffset) + } else if bytes.Equal(chunkID, commitDataSignature) { + fi.commitDataOffset = int64(chunkOffset) + } else if bytes.Equal(chunkID, extraEdgeListSignature) { + fi.extraEdgeListOffset = int64(chunkOffset) + } else if bytes.Equal(chunkID, lastSignature) { + break + } + } + + if fi.oidFanoutOffset <= 0 || fi.oidLookupOffset <= 0 || fi.commitDataOffset <= 0 { + return ErrMalformedCommitGraphFile + } + + return nil +} + +func (fi *fileIndex) readFanout() error { + fanoutReader := io.NewSectionReader(fi.reader, fi.oidFanoutOffset, 256*4) + for i := 0; i < 256; i++ { + fanoutValue, err := binary.ReadUint32(fanoutReader) + if err != nil { + return err + } + if fanoutValue > 0x7fffffff { + return ErrMalformedCommitGraphFile + } + fi.fanout[i] = int(fanoutValue) + } + return nil +} + +func (fi *fileIndex) GetIndexByHash(h plumbing.Hash) (int, error) { + var oid plumbing.Hash + + // Find the hash in the oid lookup table + var low int + if h[0] == 0 { + low = 0 + } else { + low = fi.fanout[h[0]-1] + } + high := fi.fanout[h[0]] + for low < high { + mid := (low + high) >> 1 + offset := fi.oidLookupOffset + int64(mid)*20 + if _, err := fi.reader.ReadAt(oid[:], offset); err != nil { + return 0, err + } + cmp := bytes.Compare(h[:], oid[:]) + if cmp < 0 { + high = mid + } else if cmp == 0 { + return mid, nil + } else { + low = mid + 1 + } + } + + return 0, plumbing.ErrObjectNotFound +} + +func (fi *fileIndex) GetCommitDataByIndex(idx int) (*CommitData, error) { + if idx >= fi.fanout[0xff] { + return nil, plumbing.ErrObjectNotFound + } + + offset := fi.commitDataOffset + int64(idx)*36 + commitDataReader := io.NewSectionReader(fi.reader, offset, 36) + + treeHash, err := binary.ReadHash(commitDataReader) + if err != nil { + return nil, err + } + parent1, err := binary.ReadUint32(commitDataReader) + if err != nil { + return nil, err + } + parent2, err := binary.ReadUint32(commitDataReader) + if err != nil { + return nil, err + } + genAndTime, err := binary.ReadUint64(commitDataReader) + if err != nil { + return nil, err + } + + var parentIndexes []int + if parent2&parentOctopusUsed == parentOctopusUsed { + // Octopus merge + parentIndexes = []int{int(parent1 & parentOctopusMask)} + offset := fi.extraEdgeListOffset + 4*int64(parent2&parentOctopusMask) + buf := make([]byte, 4) + for { + _, err := fi.reader.ReadAt(buf, offset) + if err != nil { + return nil, err + } + + parent := encbin.BigEndian.Uint32(buf) + offset += 4 + parentIndexes = append(parentIndexes, int(parent&parentOctopusMask)) + if parent&parentLast == parentLast { + break + } + } + } else if parent2 != parentNone { + parentIndexes = []int{int(parent1 & parentOctopusMask), int(parent2 & parentOctopusMask)} + } else if parent1 != parentNone { + parentIndexes = []int{int(parent1 & parentOctopusMask)} + } + + parentHashes, err := fi.getHashesFromIndexes(parentIndexes) + if err != nil { + return nil, err + } + + return &CommitData{ + TreeHash: treeHash, + ParentIndexes: parentIndexes, + ParentHashes: parentHashes, + Generation: int(genAndTime >> 34), + When: time.Unix(int64(genAndTime&0x3FFFFFFFF), 0), + }, nil +} + +func (fi *fileIndex) getHashesFromIndexes(indexes []int) ([]plumbing.Hash, error) { + hashes := make([]plumbing.Hash, len(indexes)) + + for i, idx := range indexes { + if idx >= fi.fanout[0xff] { + return nil, ErrMalformedCommitGraphFile + } + + offset := fi.oidLookupOffset + int64(idx)*20 + if _, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil { + return nil, err + } + } + + return hashes, nil +} + +// Hashes returns all the hashes that are available in the index +func (fi *fileIndex) Hashes() []plumbing.Hash { + hashes := make([]plumbing.Hash, fi.fanout[0xff]) + for i := 0; i < int(fi.fanout[0xff]); i++ { + offset := fi.oidLookupOffset + int64(i)*20 + if n, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil || n < 20 { + return nil + } + } + return hashes +} diff --git a/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/memory.go b/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/memory.go new file mode 100644 index 000000000000..a4a96e961289 --- /dev/null +++ b/vendor/gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph/memory.go @@ -0,0 +1,72 @@ +package commitgraph + +import ( + "gopkg.in/src-d/go-git.v4/plumbing" +) + +// MemoryIndex provides a way to build the commit-graph in memory +// for later encoding to file. +type MemoryIndex struct { + commitData []*CommitData + indexMap map[plumbing.Hash]int +} + +// NewMemoryIndex creates in-memory commit graph representation +func NewMemoryIndex() *MemoryIndex { + return &MemoryIndex{ + indexMap: make(map[plumbing.Hash]int), + } +} + +// GetIndexByHash gets the index in the commit graph from commit hash, if available +func (mi *MemoryIndex) GetIndexByHash(h plumbing.Hash) (int, error) { + i, ok := mi.indexMap[h] + if ok { + return i, nil + } + + return 0, plumbing.ErrObjectNotFound +} + +// GetCommitDataByIndex gets the commit node from the commit graph using index +// obtained from child node, if available +func (mi *MemoryIndex) GetCommitDataByIndex(i int) (*CommitData, error) { + if int(i) >= len(mi.commitData) { + return nil, plumbing.ErrObjectNotFound + } + + commitData := mi.commitData[i] + + // Map parent hashes to parent indexes + if commitData.ParentIndexes == nil { + parentIndexes := make([]int, len(commitData.ParentHashes)) + for i, parentHash := range commitData.ParentHashes { + var err error + if parentIndexes[i], err = mi.GetIndexByHash(parentHash); err != nil { + return nil, err + } + } + commitData.ParentIndexes = parentIndexes + } + + return commitData, nil +} + +// Hashes returns all the hashes that are available in the index +func (mi *MemoryIndex) Hashes() []plumbing.Hash { + hashes := make([]plumbing.Hash, 0, len(mi.indexMap)) + for k := range mi.indexMap { + hashes = append(hashes, k) + } + return hashes +} + +// Add adds new node to the memory index +func (mi *MemoryIndex) Add(hash plumbing.Hash, commitData *CommitData) { + // The parent indexes are calculated lazily in GetNodeByIndex + // which allows adding nodes out of order as long as all parents + // are eventually resolved + commitData.ParentIndexes = nil + mi.indexMap[hash] = len(mi.commitData) + mi.commitData = append(mi.commitData, commitData) +} diff --git a/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode.go b/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode.go new file mode 100644 index 000000000000..e218d3210bde --- /dev/null +++ b/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode.go @@ -0,0 +1,98 @@ +package commitgraph + +import ( + "io" + "time" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/object" + "gopkg.in/src-d/go-git.v4/plumbing/storer" +) + +// CommitNode is generic interface encapsulating a lightweight commit object retrieved +// from CommitNodeIndex +type CommitNode interface { + // ID returns the Commit object id referenced by the commit graph node. + ID() plumbing.Hash + // Tree returns the Tree referenced by the commit graph node. + Tree() (*object.Tree, error) + // CommitTime returns the Commiter.When time of the Commit referenced by the commit graph node. + CommitTime() time.Time + // NumParents returns the number of parents in a commit. + NumParents() int + // ParentNodes return a CommitNodeIter for parents of specified node. + ParentNodes() CommitNodeIter + // ParentNode returns the ith parent of a commit. + ParentNode(i int) (CommitNode, error) + // ParentHashes returns hashes of the parent commits for a specified node + ParentHashes() []plumbing.Hash + // Generation returns the generation of the commit for reachability analysis. + // Objects with newer generation are not reachable from objects of older generation. + Generation() uint64 + // Commit returns the full commit object from the node + Commit() (*object.Commit, error) +} + +// CommitNodeIndex is generic interface encapsulating an index of CommitNode objects +type CommitNodeIndex interface { + // Get returns a commit node from a commit hash + Get(hash plumbing.Hash) (CommitNode, error) +} + +// CommitNodeIter is a generic closable interface for iterating over commit nodes. +type CommitNodeIter interface { + Next() (CommitNode, error) + ForEach(func(CommitNode) error) error + Close() +} + +// parentCommitNodeIter provides an iterator for parent commits from associated CommitNodeIndex. +type parentCommitNodeIter struct { + node CommitNode + i int +} + +func newParentgraphCommitNodeIter(node CommitNode) CommitNodeIter { + return &parentCommitNodeIter{node, 0} +} + +// Next moves the iterator to the next commit and returns a pointer to it. If +// there are no more commits, it returns io.EOF. +func (iter *parentCommitNodeIter) Next() (CommitNode, error) { + obj, err := iter.node.ParentNode(iter.i) + if err == object.ErrParentNotFound { + return nil, io.EOF + } + if err == nil { + iter.i++ + } + + return obj, err +} + +// ForEach call the cb function for each commit contained on this iter until +// an error appends or the end of the iter is reached. If ErrStop is sent +// the iteration is stopped but no error is returned. The iterator is closed. +func (iter *parentCommitNodeIter) ForEach(cb func(CommitNode) error) error { + for { + obj, err := iter.Next() + if err != nil { + if err == io.EOF { + return nil + } + + return err + } + + if err := cb(obj); err != nil { + if err == storer.ErrStop { + return nil + } + + return err + } + } +} + +func (iter *parentCommitNodeIter) Close() { +} diff --git a/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_graph.go b/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_graph.go new file mode 100644 index 000000000000..bd54e18886f8 --- /dev/null +++ b/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_graph.go @@ -0,0 +1,131 @@ +package commitgraph + +import ( + "fmt" + "time" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" + "gopkg.in/src-d/go-git.v4/plumbing/object" + "gopkg.in/src-d/go-git.v4/plumbing/storer" +) + +// graphCommitNode is a reduced representation of Commit as presented in the commit +// graph file (commitgraph.Node). It is merely useful as an optimization for walking +// the commit graphs. +// +// graphCommitNode implements the CommitNode interface. +type graphCommitNode struct { + // Hash for the Commit object + hash plumbing.Hash + // Index of the node in the commit graph file + index int + + commitData *commitgraph.CommitData + gci *graphCommitNodeIndex +} + +// graphCommitNodeIndex is an index that can load CommitNode objects from both the commit +// graph files and the object store. +// +// graphCommitNodeIndex implements the CommitNodeIndex interface +type graphCommitNodeIndex struct { + commitGraph commitgraph.Index + s storer.EncodedObjectStorer +} + +// NewGraphCommitNodeIndex returns CommitNodeIndex implementation that uses commit-graph +// files as backing storage and falls back to object storage when necessary +func NewGraphCommitNodeIndex(commitGraph commitgraph.Index, s storer.EncodedObjectStorer) CommitNodeIndex { + return &graphCommitNodeIndex{commitGraph, s} +} + +func (gci *graphCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { + // Check the commit graph first + parentIndex, err := gci.commitGraph.GetIndexByHash(hash) + if err == nil { + parent, err := gci.commitGraph.GetCommitDataByIndex(parentIndex) + if err != nil { + return nil, err + } + + return &graphCommitNode{ + hash: hash, + index: parentIndex, + commitData: parent, + gci: gci, + }, nil + } + + // Fallback to loading full commit object + commit, err := object.GetCommit(gci.s, hash) + if err != nil { + return nil, err + } + + return &objectCommitNode{ + nodeIndex: gci, + commit: commit, + }, nil +} + +func (c *graphCommitNode) ID() plumbing.Hash { + return c.hash +} + +func (c *graphCommitNode) Tree() (*object.Tree, error) { + return object.GetTree(c.gci.s, c.commitData.TreeHash) +} + +func (c *graphCommitNode) CommitTime() time.Time { + return c.commitData.When +} + +func (c *graphCommitNode) NumParents() int { + return len(c.commitData.ParentIndexes) +} + +func (c *graphCommitNode) ParentNodes() CommitNodeIter { + return newParentgraphCommitNodeIter(c) +} + +func (c *graphCommitNode) ParentNode(i int) (CommitNode, error) { + if i < 0 || i >= len(c.commitData.ParentIndexes) { + return nil, object.ErrParentNotFound + } + + parent, err := c.gci.commitGraph.GetCommitDataByIndex(c.commitData.ParentIndexes[i]) + if err != nil { + return nil, err + } + + return &graphCommitNode{ + hash: c.commitData.ParentHashes[i], + index: c.commitData.ParentIndexes[i], + commitData: parent, + gci: c.gci, + }, nil +} + +func (c *graphCommitNode) ParentHashes() []plumbing.Hash { + return c.commitData.ParentHashes +} + +func (c *graphCommitNode) Generation() uint64 { + // If the commit-graph file was generated with older Git version that + // set the generation to zero for every commit the generation assumption + // is still valid. It is just less useful. + return uint64(c.commitData.Generation) +} + +func (c *graphCommitNode) Commit() (*object.Commit, error) { + return object.GetCommit(c.gci.s, c.hash) +} + +func (c *graphCommitNode) String() string { + return fmt.Sprintf( + "%s %s\nDate: %s", + plumbing.CommitObject, c.ID(), + c.CommitTime().Format(object.DateFormat), + ) +} diff --git a/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_object.go b/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_object.go new file mode 100644 index 000000000000..2779a54bc7b1 --- /dev/null +++ b/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_object.go @@ -0,0 +1,90 @@ +package commitgraph + +import ( + "math" + "time" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/object" + "gopkg.in/src-d/go-git.v4/plumbing/storer" +) + +// objectCommitNode is a representation of Commit as presented in the GIT object format. +// +// objectCommitNode implements the CommitNode interface. +type objectCommitNode struct { + nodeIndex CommitNodeIndex + commit *object.Commit +} + +// NewObjectCommitNodeIndex returns CommitNodeIndex implementation that uses +// only object storage to load the nodes +func NewObjectCommitNodeIndex(s storer.EncodedObjectStorer) CommitNodeIndex { + return &objectCommitNodeIndex{s} +} + +func (oci *objectCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { + commit, err := object.GetCommit(oci.s, hash) + if err != nil { + return nil, err + } + + return &objectCommitNode{ + nodeIndex: oci, + commit: commit, + }, nil +} + +// objectCommitNodeIndex is an index that can load CommitNode objects only from the +// object store. +// +// objectCommitNodeIndex implements the CommitNodeIndex interface +type objectCommitNodeIndex struct { + s storer.EncodedObjectStorer +} + +func (c *objectCommitNode) CommitTime() time.Time { + return c.commit.Committer.When +} + +func (c *objectCommitNode) ID() plumbing.Hash { + return c.commit.ID() +} + +func (c *objectCommitNode) Tree() (*object.Tree, error) { + return c.commit.Tree() +} + +func (c *objectCommitNode) NumParents() int { + return c.commit.NumParents() +} + +func (c *objectCommitNode) ParentNodes() CommitNodeIter { + return newParentgraphCommitNodeIter(c) +} + +func (c *objectCommitNode) ParentNode(i int) (CommitNode, error) { + if i < 0 || i >= len(c.commit.ParentHashes) { + return nil, object.ErrParentNotFound + } + + // Note: It's necessary to go through CommitNodeIndex here to ensure + // that if the commit-graph file covers only part of the history we + // start using it when that part is reached. + return c.nodeIndex.Get(c.commit.ParentHashes[i]) +} + +func (c *objectCommitNode) ParentHashes() []plumbing.Hash { + return c.commit.ParentHashes +} + +func (c *objectCommitNode) Generation() uint64 { + // Commit nodes representing objects outside of the commit graph can never + // be reached by objects from the commit-graph thus we return the highest + // possible value. + return math.MaxUint64 +} + +func (c *objectCommitNode) Commit() (*object.Commit, error) { + return c.commit, nil +} diff --git a/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_walker_ctime.go b/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_walker_ctime.go new file mode 100644 index 000000000000..f6a1b6a4efa2 --- /dev/null +++ b/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/commitnode_walker_ctime.go @@ -0,0 +1,105 @@ +package commitgraph + +import ( + "io" + + "github.com/emirpasic/gods/trees/binaryheap" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/storer" +) + +type commitNodeIteratorByCTime struct { + heap *binaryheap.Heap + seenExternal map[plumbing.Hash]bool + seen map[plumbing.Hash]bool +} + +// NewCommitNodeIterCTime returns a CommitNodeIter that walks the commit history, +// starting at the given commit and visiting its parents while preserving Committer Time order. +// this appears to be the closest order to `git log` +// The given callback will be called for each visited commit. Each commit will +// be visited only once. If the callback returns an error, walking will stop +// and will return the error. Other errors might be returned if the history +// cannot be traversed (e.g. missing objects). Ignore allows to skip some +// commits from being iterated. +func NewCommitNodeIterCTime( + c CommitNode, + seenExternal map[plumbing.Hash]bool, + ignore []plumbing.Hash, +) CommitNodeIter { + seen := make(map[plumbing.Hash]bool) + for _, h := range ignore { + seen[h] = true + } + + heap := binaryheap.NewWith(func(a, b interface{}) int { + if a.(CommitNode).CommitTime().Before(b.(CommitNode).CommitTime()) { + return 1 + } + return -1 + }) + + heap.Push(c) + + return &commitNodeIteratorByCTime{ + heap: heap, + seenExternal: seenExternal, + seen: seen, + } +} + +func (w *commitNodeIteratorByCTime) Next() (CommitNode, error) { + var c CommitNode + for { + cIn, ok := w.heap.Pop() + if !ok { + return nil, io.EOF + } + c = cIn.(CommitNode) + cID := c.ID() + + if w.seen[cID] || w.seenExternal[cID] { + continue + } + + w.seen[cID] = true + + for i, h := range c.ParentHashes() { + if w.seen[h] || w.seenExternal[h] { + continue + } + pc, err := c.ParentNode(i) + if err != nil { + return nil, err + } + w.heap.Push(pc) + } + + return c, nil + } +} + +func (w *commitNodeIteratorByCTime) ForEach(cb func(CommitNode) error) error { + for { + c, err := w.Next() + if err == io.EOF { + break + } + if err != nil { + return err + } + + err = cb(c) + if err == storer.ErrStop { + break + } + if err != nil { + return err + } + } + + return nil +} + +func (w *commitNodeIteratorByCTime) Close() {} diff --git a/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/doc.go b/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/doc.go new file mode 100644 index 000000000000..0a55ad5b0162 --- /dev/null +++ b/vendor/gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph/doc.go @@ -0,0 +1,7 @@ +// Package commitgraph provides an interface for efficient traversal over Git +// commit graph either through the regular object storage, or optionally with +// the index stored in commit-graph file (Git 2.18+). +// +// The API and functionality of this package are considered EXPERIMENTAL and is +// not considered stable nor production ready. +package commitgraph diff --git a/vendor/modules.txt b/vendor/modules.txt index ef253f519489..aa2a11becbfd 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -429,6 +429,8 @@ gopkg.in/src-d/go-billy.v4/helper/polyfill gopkg.in/src-d/go-git.v4 gopkg.in/src-d/go-git.v4/config gopkg.in/src-d/go-git.v4/plumbing +gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph +gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph gopkg.in/src-d/go-git.v4/plumbing/cache gopkg.in/src-d/go-git.v4/plumbing/filemode gopkg.in/src-d/go-git.v4/plumbing/object @@ -454,8 +456,8 @@ gopkg.in/src-d/go-git.v4/utils/merkletrie/index gopkg.in/src-d/go-git.v4/utils/merkletrie/noder gopkg.in/src-d/go-git.v4/internal/url gopkg.in/src-d/go-git.v4/plumbing/format/config -gopkg.in/src-d/go-git.v4/plumbing/format/diff gopkg.in/src-d/go-git.v4/utils/binary +gopkg.in/src-d/go-git.v4/plumbing/format/diff gopkg.in/src-d/go-git.v4/plumbing/format/idxfile gopkg.in/src-d/go-git.v4/plumbing/format/objfile gopkg.in/src-d/go-git.v4/storage/filesystem/dotgit From ec1753d274bc42a1f8f30edce80e518d6fcd3fab Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Sun, 21 Apr 2019 13:52:34 +0200 Subject: [PATCH 2/9] Experimental support for git commit graph files and bloom filter index Signed-off-by: Filip Navara --- models/repo.go | 8 ++ modules/git/commit_info.go | 88 ++++++++++++-- modules/git/notes.go | 12 +- modules/git/repo_commitgraph.go | 180 +++++++++++++++++++++++++++++ modules/gitbloom/bloom.go | 57 ++++++++++ modules/gitbloom/build_test.go | 123 ++++++++++++++++++++ modules/gitbloom/encoder.go | 164 ++++++++++++++++++++++++++ modules/gitbloom/fileindex.go | 196 ++++++++++++++++++++++++++++++++ modules/gitbloom/index.go | 13 +++ modules/gitbloom/memoryindex.go | 37 ++++++ 10 files changed, 865 insertions(+), 13 deletions(-) create mode 100644 modules/git/repo_commitgraph.go create mode 100644 modules/gitbloom/bloom.go create mode 100644 modules/gitbloom/build_test.go create mode 100644 modules/gitbloom/encoder.go create mode 100644 modules/gitbloom/fileindex.go create mode 100644 modules/gitbloom/index.go create mode 100644 modules/gitbloom/memoryindex.go diff --git a/models/repo.go b/models/repo.go index 1b4ff1f18678..2a03a596b86a 100644 --- a/models/repo.go +++ b/models/repo.go @@ -2206,6 +2206,14 @@ func GitFsck() { func(idx int, bean interface{}) error { repo := bean.(*Repository) repoPath := repo.RepoPath() + // TODO: Move this elsewhere + if gitRepo, err := git.OpenRepository(repoPath); err == nil { + log.Trace("Building commit graph index") + if err := gitRepo.BuildCommitGraph(false); err != nil { + desc := fmt.Sprintf("Failed to build commit graph (%s): %v", repoPath, err) + log.Warn(desc) + } + } log.Trace("Running health check on repository %s", repoPath) if err := git.Fsck(repoPath, setting.Cron.RepoHealthCheck.Timeout, setting.Cron.RepoHealthCheck.Args...); err != nil { desc := fmt.Sprintf("Failed to health check repository (%s): %v", repoPath, err) diff --git a/modules/git/commit_info.go b/modules/git/commit_info.go index 9270878c7fcb..16665acf592c 100644 --- a/modules/git/commit_info.go +++ b/modules/git/commit_info.go @@ -5,9 +5,13 @@ package git import ( + "fmt" + + "code.gitea.io/gitea/modules/gitbloom" "github.com/emirpasic/gods/trees/binaryheap" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/object" + cgobject "gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph" ) // GetCommitsInfo gets information of all commits that are corresponding to these entries @@ -19,12 +23,21 @@ func (tes Entries) GetCommitsInfo(commit *Commit, treePath string, cache LastCom entryPaths[i+1] = entry.Name() } - c, err := commit.repo.gogitRepo.CommitObject(plumbing.Hash(commit.ID)) + commitNodeIndex, commitGraphFile := commit.repo.CommitNodeIndex() + if commitGraphFile != nil { + defer commitGraphFile.Close() + } + bloomIndex, bloomFile := commit.repo.BloomIndex() + if bloomFile != nil { + defer bloomFile.Close() + } + + c, err := commitNodeIndex.Get(plumbing.Hash(commit.ID)) if err != nil { return nil, nil, err } - revs, err := getLastCommitForPaths(c, treePath, entryPaths) + revs, err := getLastCommitForPaths(bloomIndex, c, treePath, entryPaths) if err != nil { return nil, nil, err } @@ -69,14 +82,14 @@ func (tes Entries) GetCommitsInfo(commit *Commit, treePath string, cache LastCom } type commitAndPaths struct { - commit *object.Commit + commit cgobject.CommitNode // Paths that are still on the branch represented by commit paths []string // Set of hashes for the paths hashes map[string]plumbing.Hash } -func getCommitTree(c *object.Commit, treePath string) (*object.Tree, error) { +func getCommitTree(c cgobject.CommitNode, treePath string) (*object.Tree, error) { tree, err := c.Tree() if err != nil { return nil, err @@ -93,7 +106,17 @@ func getCommitTree(c *object.Commit, treePath string) (*object.Tree, error) { return tree, nil } -func getFileHashes(c *object.Commit, treePath string, paths []string) (map[string]plumbing.Hash, error) { +func getFullPath(treePath, path string) string { + if treePath != "" { + if path != "" { + return treePath + "/" + path + } + return treePath + } + return path +} + +func getFileHashes(c cgobject.CommitNode, treePath string, paths []string) (map[string]plumbing.Hash, error) { tree, err := getCommitTree(c, treePath) if err == object.ErrDirectoryNotFound { // The whole tree didn't exist, so return empty map @@ -118,16 +141,32 @@ func getFileHashes(c *object.Commit, treePath string, paths []string) (map[strin return hashes, nil } -func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (map[string]*object.Commit, error) { +func canSkipCommit(bloomIndex gitbloom.Index, commit cgobject.CommitNode, treePath string, paths []string) bool { + if bloom, err := bloomIndex.GetBloomByHash(commit.ID()); err == nil { + for _, path := range paths { + if bloom.Test(getFullPath(treePath, path)) { + return false + } + } + return true + } + return false +} + +func getLastCommitForPaths(bloomIndex gitbloom.Index, c cgobject.CommitNode, treePath string, paths []string) (map[string]*object.Commit, error) { + walkedCommits := 0 + testedCommits := 0 + skippedCommits := 0 + // We do a tree traversal with nodes sorted by commit time heap := binaryheap.NewWith(func(a, b interface{}) int { - if a.(*commitAndPaths).commit.Committer.When.Before(b.(*commitAndPaths).commit.Committer.When) { + if a.(*commitAndPaths).commit.CommitTime().Before(b.(*commitAndPaths).commit.CommitTime()) { return 1 } return -1 }) - result := make(map[string]*object.Commit) + resultNodes := make(map[string]cgobject.CommitNode) initialHashes, err := getFileHashes(c, treePath, paths) if err != nil { return nil, err @@ -143,17 +182,30 @@ func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (m } current := cIn.(*commitAndPaths) + walkedCommits++ + // Load the parent commits for the one we are currently examining numParents := current.commit.NumParents() - var parents []*object.Commit + var parents []cgobject.CommitNode for i := 0; i < numParents; i++ { - parent, err := current.commit.Parent(i) + parent, err := current.commit.ParentNode(i) if err != nil { break } parents = append(parents, parent) } + // Optimization: If there is only one parent and a bloom filter can tell us + // that none of our paths has changed then skip all the change checking + if bloomIndex != nil && len(parents) >= 1 { + testedCommits++ + if canSkipCommit(bloomIndex, current.commit, treePath, current.paths) { + skippedCommits++ + heap.Push(&commitAndPaths{parents[0], current.paths, current.hashes}) + continue + } + } + // Examine the current commit and set of interesting paths pathUnchanged := make([]bool, len(current.paths)) parentHashes := make([]map[string]plumbing.Hash, len(parents)) @@ -174,7 +226,7 @@ func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (m for i, path := range current.paths { // The results could already contain some newer change for the same path, // so don't override that and bail out on the file early. - if result[path] == nil { + if resultNodes[path] == nil { if pathUnchanged[i] { // The path existed with the same hash in at least one parent so it could // not have been changed in this commit directly. @@ -188,7 +240,7 @@ func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (m // - We are looking at a merge commit and the hash of the file doesn't // match any of the hashes being merged. This is more common for directories, // but it can also happen if a file is changed through conflict resolution. - result[path] = current.commit + resultNodes[path] = current.commit } } } @@ -222,5 +274,17 @@ func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (m } } + // Post-processing + result := make(map[string]*object.Commit) + for path, commitNode := range resultNodes { + var err error + result[path], err = commitNode.Commit() + if err != nil { + return nil, err + } + } + + fmt.Printf("walked %d tested %d skipped %d\n", walkedCommits, testedCommits, skippedCommits) + return result, nil } diff --git a/modules/git/notes.go b/modules/git/notes.go index 7aa5d89a79fd..25221e8cb3c8 100644 --- a/modules/git/notes.go +++ b/modules/git/notes.go @@ -50,7 +50,17 @@ func GetNote(repo *Repository, commitID string, note *Note) error { return err } - lastCommits, err := getLastCommitForPaths(commit, "", []string{commitID}) + commitNodeIndex, commitGraphFile := repo.CommitNodeIndex() + if commitGraphFile != nil { + defer commitGraphFile.Close() + } + + commitNode, err := commitNodeIndex.Get(commit.Hash) + if err != nil { + return nil + } + + lastCommits, err := getLastCommitForPaths(nil, commitNode, "", []string{commitID}) if err != nil { return err } diff --git a/modules/git/repo_commitgraph.go b/modules/git/repo_commitgraph.go new file mode 100644 index 000000000000..8ea6ab353e5c --- /dev/null +++ b/modules/git/repo_commitgraph.go @@ -0,0 +1,180 @@ +package git + +import ( + "fmt" + "io/ioutil" + "os" + "path" + + "code.gitea.io/gitea/modules/gitbloom" + "golang.org/x/exp/mmap" + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" + "gopkg.in/src-d/go-git.v4/plumbing/object" + cgobject "gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph" +) + +// CommitNodeIndex returns the index for walking commit graph +func (r *Repository) CommitNodeIndex() (cgobject.CommitNodeIndex, *mmap.ReaderAt) { + indexPath := path.Join(r.Path, "objects", "info", "commit-graph") + + file, err := mmap.Open(indexPath) + if err == nil { + index, err := commitgraph.OpenFileIndex(file) + if err == nil { + return cgobject.NewGraphCommitNodeIndex(index, r.gogitRepo.Storer), file + } + } + + return cgobject.NewObjectCommitNodeIndex(r.gogitRepo.Storer), nil +} + +// CommitNodeIndex returns the index for walking commit graph +func (r *Repository) BloomIndex() (gitbloom.Index, *mmap.ReaderAt) { + indexPath := path.Join(r.Path, "bloom") + + file, err := mmap.Open(indexPath) + if err == nil { + fmt.Println("BLOOM1") + index, err := gitbloom.OpenFileIndex(file) + if err == nil { + fmt.Println("BLOOM2") + return index, file + } + } + + return nil, nil +} + +// BuildCommitGraph builds the commit-graph index file +func (r *Repository) BuildCommitGraph(withBloomFilters bool) error { + h, err := r.gogitRepo.Head() + if err != nil { + return err + } + + commit, err := r.gogitRepo.CommitObject(h.Hash()) + if err != nil { + return err + } + + // TODO: Incremental updates + idx, err := buildCommitGraph(commit, withBloomFilters) + if err != nil { + return err + } + + f, err := ioutil.TempFile(path.Join(r.Path, "objects", "info"), "commit-graph-tmp") + if err != nil { + return err + } + + tmpName := f.Name() + encoder := commitgraph.NewEncoder(f) + err = encoder.Encode(idx) + f.Close() + if err == nil { + indexPath := path.Join(r.Path, "objects", "info", "commit-graph") + os.Remove(indexPath) + err = os.Rename(tmpName, indexPath) + if err == nil { + return nil + } + } + os.Remove(tmpName) + return err +} + +func buildCommitGraph(c *object.Commit, withBloomFilters bool) (*commitgraph.MemoryIndex, error) { + idx := commitgraph.NewMemoryIndex() + seen := make(map[plumbing.Hash]bool) + // TODO: Unroll the recursion + return idx, addCommitToIndex(idx, c, seen, withBloomFilters) +} + +/* +func dumpTreeIntoBloomFilter(bloomFilter *commitgraph.BloomPathFilter, tree *object.Tree, treePath string) { + for _, entry := range tree.Entries { + fullPath := getFullPath(treePath, entry.Name) + bloomFilter.Add(fullPath) + if entry.Mode == filemode.Dir { + if subtree, err := tree.Tree(entry.Name); err == nil { + dumpTreeIntoBloomFilter(bloomFilter, subtree, fullPath) + } + } + } +} + +func updateBloomFilter(bloomFilter *commitgraph.BloomPathFilter, a, b *object.Tree, treePath string) { + aHashes := make(map[string]plumbing.Hash) + for _, entry := range a.Entries { + aHashes[entry.Name] = entry.Hash + } + + for _, entry := range b.Entries { + if aHashes[entry.Name] != entry.Hash { + // File from 'b' didn't exist in 'a', or it has different hash than in 'a' + fullPath := getFullPath(treePath, entry.Name) + bloomFilter.Add(fullPath) + if entry.Mode == filemode.Dir { + aTree, _ := a.Tree(entry.Name) + bTree, _ := b.Tree(entry.Name) + if aTree != nil && bTree != nil { + updateBloomFilter(bloomFilter, aTree, bTree, fullPath) + } else if aTree != nil { + dumpTreeIntoBloomFilter(bloomFilter, aTree, fullPath) + } else if bTree != nil { + dumpTreeIntoBloomFilter(bloomFilter, bTree, fullPath) + } + } + } + delete(aHashes, entry.Name) + } + + for name := range aHashes { + // File from 'a' is removed in 'b' + bloomFilter.Add(getFullPath(treePath, name)) + } +} + +func createBloomFilter(a, b *object.Tree) *commitgraph.BloomPathFilter { + bloomFilter := commitgraph.NewBloomPathFilter() + updateBloomFilter(bloomFilter, a, b, "") + return bloomFilter +}*/ + +func addCommitToIndex(idx *commitgraph.MemoryIndex, c *object.Commit, seen map[plumbing.Hash]bool, withBloomFilters bool) error { + if seen[c.Hash] { + return nil + } + seen[c.Hash] = true + + // Recursively add parents first + err := c.Parents().ForEach(func(parent *object.Commit) error { + return addCommitToIndex(idx, parent, seen, withBloomFilters) + }) + if err != nil { + return err + } + + // Calculate file difference to first parent commit + /*var bloomFilter *commitgraph.BloomPathFilter + if withBloomFilters && c.NumParents() == 1 { + if parent, err := c.Parent(0); err == nil { + if tree, err := c.Tree(); err == nil { + if parentTree, err := parent.Tree(); err == nil { + bloomFilter = createBloomFilter(parentTree, tree) + } + } + } + }*/ + + // Add this commit if it hasn't been done already + node := &commitgraph.CommitData{ + TreeHash: c.TreeHash, + ParentHashes: c.ParentHashes, + When: c.Committer.When, + } + idx.Add /*WithBloom*/ (c.Hash, node /*, bloomFilter*/) + return nil +} diff --git a/modules/gitbloom/bloom.go b/modules/gitbloom/bloom.go new file mode 100644 index 000000000000..1ff9ea7ce6eb --- /dev/null +++ b/modules/gitbloom/bloom.go @@ -0,0 +1,57 @@ +package gitbloom + +import ( + "github.com/spaolacci/murmur3" +) + +// BloomPathFilter is a probabilistic data structure that helps determining +// whether a path was was changed. +// +// The implementation uses a standard bloom filter with n=512, m=10, k=7 +// parameters using the 64-bit SipHash hash function with zero key. +type BloomPathFilter struct { + b []byte +} + +// Test checks whether a path was previously added to the filter. Returns +// false if the path is not present in the filter. Returns true if the path +// could be present in the filter. +func (f *BloomPathFilter) Test(path string) bool { + hash0 := murmur3.Sum32WithSeed([]byte(path), 0x293ae76f) + hash1 := murmur3.Sum32WithSeed([]byte(path), 0x7e646e2c) + for i := uint32(0); i < 7; i++ { + bit := (hash0 + hash1*i) % uint32(len(f.b)*8) + if f.b[bit>>3]&(1<<(bit&7)) == 0 { + return false + } + } + return true +} + +// Add path data to the filter. +func (f *BloomPathFilter) Add(path string) { + hash0 := murmur3.Sum32WithSeed([]byte(path), 0x293ae76f) + hash1 := murmur3.Sum32WithSeed([]byte(path), 0x7e646e2c) + for i := uint32(0); i < 7; i++ { + bit := (hash0 + hash1*i) % uint32(len(f.b)*8) + f.b[bit>>3] |= 1 << (bit & 7) + } +} + +// Data returns data bytes +func (f *BloomPathFilter) Data() []byte { + return f.b +} + +// NewBloomPathFilter creates a new empty bloom filter for n changed paths +func NewBloomPathFilter(n int) *BloomPathFilter { + f := &BloomPathFilter{make([]byte, ((n+63)/64)*8)} + return f +} + +// LoadBloomPathFilter creates a bloom filter from a byte array previously +// returned by Data +func LoadBloomPathFilter(data []byte) *BloomPathFilter { + f := &BloomPathFilter{data} + return f +} diff --git a/modules/gitbloom/build_test.go b/modules/gitbloom/build_test.go new file mode 100644 index 000000000000..45b47a41eaac --- /dev/null +++ b/modules/gitbloom/build_test.go @@ -0,0 +1,123 @@ +package gitbloom + +import ( + "testing" + + "gopkg.in/src-d/go-git.v4" + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/plumbing/cache" + "gopkg.in/src-d/go-git.v4/plumbing/filemode" + "gopkg.in/src-d/go-git.v4/plumbing/object" + "gopkg.in/src-d/go-git.v4/plumbing/storer" + "gopkg.in/src-d/go-git.v4/storage/filesystem" + + "gopkg.in/src-d/go-billy.v4/osfs" +) + +// Example how to resolve a revision into its commit counterpart +func TestWrite(t *testing.T) { + path := "C:\\Users\\Filip Navara\\gitea-repositories\\filip\\linux.git" + + // We instantiate a new repository targeting the given path (the .git folder) + fs := osfs.New(path) + s := filesystem.NewStorageWithOptions(fs, cache.NewObjectLRUDefault(), filesystem.Options{KeepDescriptors: true}) + r, _ := git.Open(s, fs) + + memoryIndex := NewMemoryIndex() + + iter, _ := r.CommitObjects() + + iter.ForEach(func(c *object.Commit) error { + changes := make([]string, 0, 512) + aTree, _ := c.Tree() + if c.NumParents() > 0 { + c.Parents().ForEach(func(parent *object.Commit) error { + bTree, _ := parent.Tree() + changes = updateBloomFilter(changes, aTree, bTree, "") + return storer.ErrStop + }) + if len(changes) < 512 { + bloomFilter := NewBloomPathFilter(len(changes)) + for _, change := range changes { + bloomFilter.Add(change) + } + memoryIndex.Add(c.ID(), bloomFilter) + } + } + return nil + }) + + f, _ := fs.Create("bloom") + e := NewEncoder(f) + e.Encode(memoryIndex) + f.Close() +} + +func getFullPath(treePath, path string) string { + if treePath != "" { + if path != "" { + return treePath + "/" + path + } + return treePath + } + return path +} + +func dumpTreeIntoBloomFilter(changes []string, tree *object.Tree, treePath string) []string { + for _, entry := range tree.Entries { + fullPath := getFullPath(treePath, entry.Name) + changes = append(changes, fullPath) + if entry.Mode == filemode.Dir { + if subtree, err := tree.Tree(entry.Name); err == nil { + dumpTreeIntoBloomFilter(changes, subtree, fullPath) + } + } + } + return changes +} + +func updateBloomFilter(changes []string, a, b *object.Tree, treePath string) []string { + aHashes := make(map[string]plumbing.Hash) + for _, entry := range a.Entries { + aHashes[entry.Name] = entry.Hash + } + + for _, entry := range b.Entries { + if aHashes[entry.Name] != entry.Hash { + // File from 'b' didn't exist in 'a', or it has different hash than in 'a' + fullPath := getFullPath(treePath, entry.Name) + if entry.Mode == filemode.Dir { + aTree, _ := a.Tree(entry.Name) + bTree, _ := b.Tree(entry.Name) + if aTree != nil && bTree != nil { + changes = updateBloomFilter(changes, aTree, bTree, fullPath) + changes = append(changes, fullPath) + } else if aTree != nil { + changes = dumpTreeIntoBloomFilter(changes, aTree, fullPath) + } else if bTree != nil { + changes = dumpTreeIntoBloomFilter(changes, bTree, fullPath) + } + } else { + changes = append(changes, fullPath) + } + } + delete(aHashes, entry.Name) + } + + for name := range aHashes { + // File from 'a' is removed in 'b' + changes = append(changes, getFullPath(treePath, name)) + } + + return changes +} + +func createBloomFilter(a, b *object.Tree) *BloomPathFilter { + changes := make([]string, 0, 512) + changes = updateBloomFilter(changes, a, b, "") + bloomFilter := NewBloomPathFilter(len(changes)) + for _, change := range changes { + bloomFilter.Add(change) + } + return bloomFilter +} diff --git a/modules/gitbloom/encoder.go b/modules/gitbloom/encoder.go new file mode 100644 index 000000000000..57eac245597c --- /dev/null +++ b/modules/gitbloom/encoder.go @@ -0,0 +1,164 @@ +package gitbloom + +import ( + "crypto/sha1" + "hash" + "io" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/utils/binary" +) + +// Encoder writes MemoryIndex structs to an output stream. +type Encoder struct { + io.Writer + hash hash.Hash +} + +// NewEncoder returns a new stream encoder that writes to w. +func NewEncoder(w io.Writer) *Encoder { + h := sha1.New() + mw := io.MultiWriter(w, h) + return &Encoder{mw, h} +} + +func (e *Encoder) Encode(idx Index) error { + var err error + + // Get all the hashes in the input index + hashes := idx.Hashes() + + // Sort the inout and prepare helper structures we'll need for encoding + fanout, totalBloomSize := e.prepare(idx, hashes) + + chunkSignatures := [][]byte{oidFanoutSignature, oidLookupSignature, bloomIndexesSignature, bloomDataSignature} + chunkSizes := []uint64{4 * 256, uint64(len(hashes)) * 20, uint64(len(hashes)) * 4, totalBloomSize} + + if err = e.encodeFileHeader(len(chunkSignatures)); err != nil { + return err + } + if err = e.encodeChunkHeaders(chunkSignatures, chunkSizes); err != nil { + return err + } + if err = e.encodeFanout(fanout); err != nil { + return err + } + if err = e.encodeOidLookup(hashes); err != nil { + return err + } + if err := e.encodeBloomIndexes(idx, hashes); err != nil { + return err + } + if err := e.encodeBloomData(idx, hashes); err != nil { + return err + } + if err != nil { + return err + } + return e.encodeChecksum() +} + +func (e *Encoder) prepare(idx Index, hashes []plumbing.Hash) (fanout []uint32, totalBloomSize uint64) { + // Sort the hashes and build our index + plumbing.HashesSort(hashes) + fanout = make([]uint32, 256) + for _, hash := range hashes { + fanout[hash[0]]++ + } + + // Convert the fanout to cumulative values + for i := 1; i <= 0xff; i++ { + fanout[i] += fanout[i-1] + } + + // Find out the total size of bloom filters + for _, hash := range hashes { + if bloom, _ := idx.GetBloomByHash(hash); bloom != nil { + totalBloomSize += uint64(len(bloom.Data()) / 8) + break + } + } + + return +} + +func (e *Encoder) encodeFileHeader(chunkCount int) (err error) { + if _, err = e.Write(commitFileSignature); err == nil { + _, err = e.Write([]byte{1, 1, byte(chunkCount), 0}) + } + return +} + +func (e *Encoder) encodeChunkHeaders(chunkSignatures [][]byte, chunkSizes []uint64) (err error) { + // 8 bytes of file header, 12 bytes for each chunk header and 12 byte for terminator + offset := uint64(8 + len(chunkSignatures)*12 + 12) + for i, signature := range chunkSignatures { + if _, err = e.Write(signature); err == nil { + err = binary.WriteUint64(e, offset) + } + if err != nil { + return + } + offset += chunkSizes[i] + } + if _, err = e.Write(lastSignature); err == nil { + err = binary.WriteUint64(e, offset) + } + return +} + +func (e *Encoder) encodeFanout(fanout []uint32) (err error) { + for i := 0; i <= 0xff; i++ { + if err = binary.WriteUint32(e, fanout[i]); err != nil { + return + } + } + return +} + +func (e *Encoder) encodeOidLookup(hashes []plumbing.Hash) (err error) { + for _, hash := range hashes { + if _, err = e.Write(hash[:]); err != nil { + return err + } + } + return +} + +func (e *Encoder) encodeBloomIndexes(idx Index, hashes []plumbing.Hash) (err error) { + currentBloomSize := uint32(0) + for _, hash := range hashes { + if bloom, _ := idx.GetBloomByHash(hash); bloom != nil { + currentBloomSize += uint32(len(bloom.Data()) / 8) + } + if err = binary.WriteUint32(e, currentBloomSize); err != nil { + return + } + } + return +} + +func (e *Encoder) encodeBloomData(idx Index, hashes []plumbing.Hash) (err error) { + if err = binary.WriteUint32(e, 1); err != nil { + return + } + if err = binary.WriteUint32(e, 7); err != nil { + return + } + if err = binary.WriteUint32(e, 10); err != nil { + return + } + for _, hash := range hashes { + if bloom, _ := idx.GetBloomByHash(hash); bloom != nil { + if _, err = e.Write(bloom.Data()); err != nil { + return err + } + } + } + return +} + +func (e *Encoder) encodeChecksum() error { + _, err := e.Write(e.hash.Sum(nil)[:20]) + return err +} diff --git a/modules/gitbloom/fileindex.go b/modules/gitbloom/fileindex.go new file mode 100644 index 000000000000..5f9fd6f1db58 --- /dev/null +++ b/modules/gitbloom/fileindex.go @@ -0,0 +1,196 @@ +package gitbloom + +import ( + "bytes" + encbin "encoding/binary" + "errors" + "io" + + "gopkg.in/src-d/go-git.v4/plumbing" + "gopkg.in/src-d/go-git.v4/utils/binary" +) + +var ( + // ErrUnsupportedVersion is returned by OpenFileIndex when the bloom filter + // file version is not supported. + ErrUnsupportedVersion = errors.New("Unsupported version") + // ErrMalformedBloomFilterFile is returned by OpenFileIndex when the bloom + // filter file is corrupted. + ErrMalformedBloomFilterFile = errors.New("Malformed bloom filter file") + + commitFileSignature = []byte{'C', 'G', 'P', 'H'} + oidFanoutSignature = []byte{'O', 'I', 'D', 'F'} + oidLookupSignature = []byte{'O', 'I', 'D', 'L'} + bloomIndexesSignature = []byte{'B', 'I', 'D', 'X'} + bloomDataSignature = []byte{'B', 'D', 'A', 'T'} + lastSignature = []byte{0, 0, 0, 0} +) + +type fileIndex struct { + reader io.ReaderAt + fanout [256]int + oidFanoutOffset int64 + oidLookupOffset int64 + bloomIndexesOffset int64 + bloomDataOffset int64 +} + +// OpenFileIndex opens a serialized commit graph file in the format described at +// https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt +func OpenFileIndex(reader io.ReaderAt) (Index, error) { + fi := &fileIndex{reader: reader} + + if err := fi.verifyFileHeader(); err != nil { + return nil, err + } + if err := fi.readChunkHeaders(); err != nil { + return nil, err + } + if err := fi.readFanout(); err != nil { + return nil, err + } + + return fi, nil +} + +func (fi *fileIndex) verifyFileHeader() error { + // Verify file signature + var signature = make([]byte, 4) + if _, err := fi.reader.ReadAt(signature, 0); err != nil { + return err + } + if !bytes.Equal(signature, commitFileSignature) { + return ErrMalformedBloomFilterFile + } + + // Read and verify the file header + var header = make([]byte, 4) + if _, err := fi.reader.ReadAt(header, 4); err != nil { + return err + } + if header[0] != 1 { + return ErrUnsupportedVersion + } + + return nil +} + +func (fi *fileIndex) readChunkHeaders() error { + var chunkID = make([]byte, 4) + for i := 0; ; i++ { + chunkHeader := io.NewSectionReader(fi.reader, 8+(int64(i)*12), 12) + if _, err := io.ReadAtLeast(chunkHeader, chunkID, 4); err != nil { + return err + } + chunkOffset, err := binary.ReadUint64(chunkHeader) + if err != nil { + return err + } + + if bytes.Equal(chunkID, oidFanoutSignature) { + fi.oidFanoutOffset = int64(chunkOffset) + } else if bytes.Equal(chunkID, oidLookupSignature) { + fi.oidLookupOffset = int64(chunkOffset) + } else if bytes.Equal(chunkID, bloomIndexesSignature) { + fi.bloomIndexesOffset = int64(chunkOffset) + } else if bytes.Equal(chunkID, bloomDataSignature) { + fi.bloomDataOffset = int64(chunkOffset) + } else if bytes.Equal(chunkID, lastSignature) { + break + } + } + + if fi.oidFanoutOffset <= 0 || fi.oidLookupOffset <= 0 || fi.bloomIndexesOffset <= 0 || fi.bloomDataOffset <= 0 { + return ErrMalformedBloomFilterFile + } + + return nil +} + +func (fi *fileIndex) readFanout() error { + fanoutReader := io.NewSectionReader(fi.reader, fi.oidFanoutOffset, 256*4) + for i := 0; i < 256; i++ { + fanoutValue, err := binary.ReadUint32(fanoutReader) + if err != nil { + return err + } + if fanoutValue > 0x7fffffff { + return ErrMalformedBloomFilterFile + } + fi.fanout[i] = int(fanoutValue) + } + return nil +} + +func (fi *fileIndex) getIndexByHash(h plumbing.Hash) (int, error) { + var oid plumbing.Hash + + // Find the hash in the oid lookup table + var low int + if h[0] == 0 { + low = 0 + } else { + low = fi.fanout[h[0]-1] + } + high := fi.fanout[h[0]] + for low < high { + mid := (low + high) >> 1 + offset := fi.oidLookupOffset + int64(mid)*20 + if _, err := fi.reader.ReadAt(oid[:], offset); err != nil { + return 0, err + } + cmp := bytes.Compare(h[:], oid[:]) + if cmp < 0 { + high = mid + } else if cmp == 0 { + return mid, nil + } else { + low = mid + 1 + } + } + + return 0, plumbing.ErrObjectNotFound +} + +func (fi *fileIndex) GetBloomByHash(h plumbing.Hash) (*BloomPathFilter, error) { + idx, err := fi.getIndexByHash(h) + if err != nil { + return nil, err + } + + buf := make([]byte, 4) + prevIndex := uint32(0) + if idx > 0 { + if _, err := fi.reader.ReadAt(buf, fi.bloomIndexesOffset+int64(idx-1)*4); err != nil { + return nil, err + } + prevIndex = encbin.BigEndian.Uint32(buf) + } + if _, err := fi.reader.ReadAt(buf, fi.bloomIndexesOffset+int64(idx)*4); err != nil { + return nil, err + } + nextIndex := encbin.BigEndian.Uint32(buf) + + length := nextIndex - prevIndex + if length == 0 { + return nil, plumbing.ErrObjectNotFound + } + data := make([]byte, length*8) + _, err = fi.reader.ReadAt(data, fi.bloomDataOffset+12+int64(prevIndex)*8) + if err != nil { + return nil, err + } + + return LoadBloomPathFilter(data), nil +} + +func (fi *fileIndex) Hashes() []plumbing.Hash { + hashes := make([]plumbing.Hash, fi.fanout[0xff]) + for i := 0; i < int(fi.fanout[0xff]); i++ { + offset := fi.oidLookupOffset + int64(i)*20 + if n, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil || n < 20 { + return nil + } + } + return hashes +} diff --git a/modules/gitbloom/index.go b/modules/gitbloom/index.go new file mode 100644 index 000000000000..a99557ad2284 --- /dev/null +++ b/modules/gitbloom/index.go @@ -0,0 +1,13 @@ +package gitbloom + +import ( + "gopkg.in/src-d/go-git.v4/plumbing" +) + +// Index provides methods to access bloom filters individual commits +type Index interface { + // GetBloomByHash gets the bloom path filter for particular commit + GetBloomByHash(h plumbing.Hash) (*BloomPathFilter, error) + // Hashes returns all the hashes that are available in the index + Hashes() []plumbing.Hash +} diff --git a/modules/gitbloom/memoryindex.go b/modules/gitbloom/memoryindex.go new file mode 100644 index 000000000000..1ac5184fe5c6 --- /dev/null +++ b/modules/gitbloom/memoryindex.go @@ -0,0 +1,37 @@ +package gitbloom + +import ( + "gopkg.in/src-d/go-git.v4/plumbing" +) + +type MemoryIndex struct { + filterMap map[plumbing.Hash]*BloomPathFilter +} + +// NewMemoryIndex creates in-memory commit graph representation +func NewMemoryIndex() *MemoryIndex { + return &MemoryIndex{make(map[plumbing.Hash]*BloomPathFilter)} +} + +// GetBloomByHash gets the bloom path filter for particular commit +func (mi *MemoryIndex) GetBloomByHash(h plumbing.Hash) (*BloomPathFilter, error) { + if filter, ok := mi.filterMap[h]; ok { + return filter, nil + } + + return nil, plumbing.ErrObjectNotFound +} + +// Add adds new filter to the memory index +func (mi *MemoryIndex) Add(hash plumbing.Hash, filter *BloomPathFilter) { + mi.filterMap[hash] = filter +} + +// Hashes returns all the hashes that are available in the index +func (mi *MemoryIndex) Hashes() []plumbing.Hash { + hashes := make([]plumbing.Hash, 0, len(mi.filterMap)) + for k := range mi.filterMap { + hashes = append(hashes, k) + } + return hashes +} From 439af6559edb361203736c2b709a8591aae1a6aa Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Thu, 27 Jun 2019 17:12:52 +0200 Subject: [PATCH 3/9] Remove bloom filter experiment and debug prints --- modules/git/commit_info.go | 42 +------ modules/git/notes.go | 2 +- modules/git/repo_commitgraph.go | 19 ---- modules/gitbloom/bloom.go | 57 ---------- modules/gitbloom/build_test.go | 123 -------------------- modules/gitbloom/encoder.go | 164 -------------------------- modules/gitbloom/fileindex.go | 196 -------------------------------- modules/gitbloom/index.go | 13 --- modules/gitbloom/memoryindex.go | 37 ------ 9 files changed, 3 insertions(+), 650 deletions(-) delete mode 100644 modules/gitbloom/bloom.go delete mode 100644 modules/gitbloom/build_test.go delete mode 100644 modules/gitbloom/encoder.go delete mode 100644 modules/gitbloom/fileindex.go delete mode 100644 modules/gitbloom/index.go delete mode 100644 modules/gitbloom/memoryindex.go diff --git a/modules/git/commit_info.go b/modules/git/commit_info.go index 16665acf592c..58de2024b207 100644 --- a/modules/git/commit_info.go +++ b/modules/git/commit_info.go @@ -5,9 +5,6 @@ package git import ( - "fmt" - - "code.gitea.io/gitea/modules/gitbloom" "github.com/emirpasic/gods/trees/binaryheap" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/object" @@ -27,17 +24,13 @@ func (tes Entries) GetCommitsInfo(commit *Commit, treePath string, cache LastCom if commitGraphFile != nil { defer commitGraphFile.Close() } - bloomIndex, bloomFile := commit.repo.BloomIndex() - if bloomFile != nil { - defer bloomFile.Close() - } c, err := commitNodeIndex.Get(plumbing.Hash(commit.ID)) if err != nil { return nil, nil, err } - revs, err := getLastCommitForPaths(bloomIndex, c, treePath, entryPaths) + revs, err := getLastCommitForPaths(c, treePath, entryPaths) if err != nil { return nil, nil, err } @@ -141,23 +134,7 @@ func getFileHashes(c cgobject.CommitNode, treePath string, paths []string) (map[ return hashes, nil } -func canSkipCommit(bloomIndex gitbloom.Index, commit cgobject.CommitNode, treePath string, paths []string) bool { - if bloom, err := bloomIndex.GetBloomByHash(commit.ID()); err == nil { - for _, path := range paths { - if bloom.Test(getFullPath(treePath, path)) { - return false - } - } - return true - } - return false -} - -func getLastCommitForPaths(bloomIndex gitbloom.Index, c cgobject.CommitNode, treePath string, paths []string) (map[string]*object.Commit, error) { - walkedCommits := 0 - testedCommits := 0 - skippedCommits := 0 - +func getLastCommitForPaths(c cgobject.CommitNode, treePath string, paths []string) (map[string]*object.Commit, error) { // We do a tree traversal with nodes sorted by commit time heap := binaryheap.NewWith(func(a, b interface{}) int { if a.(*commitAndPaths).commit.CommitTime().Before(b.(*commitAndPaths).commit.CommitTime()) { @@ -182,8 +159,6 @@ func getLastCommitForPaths(bloomIndex gitbloom.Index, c cgobject.CommitNode, tre } current := cIn.(*commitAndPaths) - walkedCommits++ - // Load the parent commits for the one we are currently examining numParents := current.commit.NumParents() var parents []cgobject.CommitNode @@ -195,17 +170,6 @@ func getLastCommitForPaths(bloomIndex gitbloom.Index, c cgobject.CommitNode, tre parents = append(parents, parent) } - // Optimization: If there is only one parent and a bloom filter can tell us - // that none of our paths has changed then skip all the change checking - if bloomIndex != nil && len(parents) >= 1 { - testedCommits++ - if canSkipCommit(bloomIndex, current.commit, treePath, current.paths) { - skippedCommits++ - heap.Push(&commitAndPaths{parents[0], current.paths, current.hashes}) - continue - } - } - // Examine the current commit and set of interesting paths pathUnchanged := make([]bool, len(current.paths)) parentHashes := make([]map[string]plumbing.Hash, len(parents)) @@ -284,7 +248,5 @@ func getLastCommitForPaths(bloomIndex gitbloom.Index, c cgobject.CommitNode, tre } } - fmt.Printf("walked %d tested %d skipped %d\n", walkedCommits, testedCommits, skippedCommits) - return result, nil } diff --git a/modules/git/notes.go b/modules/git/notes.go index 25221e8cb3c8..a62c558787a7 100644 --- a/modules/git/notes.go +++ b/modules/git/notes.go @@ -60,7 +60,7 @@ func GetNote(repo *Repository, commitID string, note *Note) error { return nil } - lastCommits, err := getLastCommitForPaths(nil, commitNode, "", []string{commitID}) + lastCommits, err := getLastCommitForPaths(commitNode, "", []string{commitID}) if err != nil { return err } diff --git a/modules/git/repo_commitgraph.go b/modules/git/repo_commitgraph.go index 8ea6ab353e5c..a65c6418338d 100644 --- a/modules/git/repo_commitgraph.go +++ b/modules/git/repo_commitgraph.go @@ -1,12 +1,10 @@ package git import ( - "fmt" "io/ioutil" "os" "path" - "code.gitea.io/gitea/modules/gitbloom" "golang.org/x/exp/mmap" "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" @@ -29,23 +27,6 @@ func (r *Repository) CommitNodeIndex() (cgobject.CommitNodeIndex, *mmap.ReaderAt return cgobject.NewObjectCommitNodeIndex(r.gogitRepo.Storer), nil } -// CommitNodeIndex returns the index for walking commit graph -func (r *Repository) BloomIndex() (gitbloom.Index, *mmap.ReaderAt) { - indexPath := path.Join(r.Path, "bloom") - - file, err := mmap.Open(indexPath) - if err == nil { - fmt.Println("BLOOM1") - index, err := gitbloom.OpenFileIndex(file) - if err == nil { - fmt.Println("BLOOM2") - return index, file - } - } - - return nil, nil -} - // BuildCommitGraph builds the commit-graph index file func (r *Repository) BuildCommitGraph(withBloomFilters bool) error { h, err := r.gogitRepo.Head() diff --git a/modules/gitbloom/bloom.go b/modules/gitbloom/bloom.go deleted file mode 100644 index 1ff9ea7ce6eb..000000000000 --- a/modules/gitbloom/bloom.go +++ /dev/null @@ -1,57 +0,0 @@ -package gitbloom - -import ( - "github.com/spaolacci/murmur3" -) - -// BloomPathFilter is a probabilistic data structure that helps determining -// whether a path was was changed. -// -// The implementation uses a standard bloom filter with n=512, m=10, k=7 -// parameters using the 64-bit SipHash hash function with zero key. -type BloomPathFilter struct { - b []byte -} - -// Test checks whether a path was previously added to the filter. Returns -// false if the path is not present in the filter. Returns true if the path -// could be present in the filter. -func (f *BloomPathFilter) Test(path string) bool { - hash0 := murmur3.Sum32WithSeed([]byte(path), 0x293ae76f) - hash1 := murmur3.Sum32WithSeed([]byte(path), 0x7e646e2c) - for i := uint32(0); i < 7; i++ { - bit := (hash0 + hash1*i) % uint32(len(f.b)*8) - if f.b[bit>>3]&(1<<(bit&7)) == 0 { - return false - } - } - return true -} - -// Add path data to the filter. -func (f *BloomPathFilter) Add(path string) { - hash0 := murmur3.Sum32WithSeed([]byte(path), 0x293ae76f) - hash1 := murmur3.Sum32WithSeed([]byte(path), 0x7e646e2c) - for i := uint32(0); i < 7; i++ { - bit := (hash0 + hash1*i) % uint32(len(f.b)*8) - f.b[bit>>3] |= 1 << (bit & 7) - } -} - -// Data returns data bytes -func (f *BloomPathFilter) Data() []byte { - return f.b -} - -// NewBloomPathFilter creates a new empty bloom filter for n changed paths -func NewBloomPathFilter(n int) *BloomPathFilter { - f := &BloomPathFilter{make([]byte, ((n+63)/64)*8)} - return f -} - -// LoadBloomPathFilter creates a bloom filter from a byte array previously -// returned by Data -func LoadBloomPathFilter(data []byte) *BloomPathFilter { - f := &BloomPathFilter{data} - return f -} diff --git a/modules/gitbloom/build_test.go b/modules/gitbloom/build_test.go deleted file mode 100644 index 45b47a41eaac..000000000000 --- a/modules/gitbloom/build_test.go +++ /dev/null @@ -1,123 +0,0 @@ -package gitbloom - -import ( - "testing" - - "gopkg.in/src-d/go-git.v4" - "gopkg.in/src-d/go-git.v4/plumbing" - "gopkg.in/src-d/go-git.v4/plumbing/cache" - "gopkg.in/src-d/go-git.v4/plumbing/filemode" - "gopkg.in/src-d/go-git.v4/plumbing/object" - "gopkg.in/src-d/go-git.v4/plumbing/storer" - "gopkg.in/src-d/go-git.v4/storage/filesystem" - - "gopkg.in/src-d/go-billy.v4/osfs" -) - -// Example how to resolve a revision into its commit counterpart -func TestWrite(t *testing.T) { - path := "C:\\Users\\Filip Navara\\gitea-repositories\\filip\\linux.git" - - // We instantiate a new repository targeting the given path (the .git folder) - fs := osfs.New(path) - s := filesystem.NewStorageWithOptions(fs, cache.NewObjectLRUDefault(), filesystem.Options{KeepDescriptors: true}) - r, _ := git.Open(s, fs) - - memoryIndex := NewMemoryIndex() - - iter, _ := r.CommitObjects() - - iter.ForEach(func(c *object.Commit) error { - changes := make([]string, 0, 512) - aTree, _ := c.Tree() - if c.NumParents() > 0 { - c.Parents().ForEach(func(parent *object.Commit) error { - bTree, _ := parent.Tree() - changes = updateBloomFilter(changes, aTree, bTree, "") - return storer.ErrStop - }) - if len(changes) < 512 { - bloomFilter := NewBloomPathFilter(len(changes)) - for _, change := range changes { - bloomFilter.Add(change) - } - memoryIndex.Add(c.ID(), bloomFilter) - } - } - return nil - }) - - f, _ := fs.Create("bloom") - e := NewEncoder(f) - e.Encode(memoryIndex) - f.Close() -} - -func getFullPath(treePath, path string) string { - if treePath != "" { - if path != "" { - return treePath + "/" + path - } - return treePath - } - return path -} - -func dumpTreeIntoBloomFilter(changes []string, tree *object.Tree, treePath string) []string { - for _, entry := range tree.Entries { - fullPath := getFullPath(treePath, entry.Name) - changes = append(changes, fullPath) - if entry.Mode == filemode.Dir { - if subtree, err := tree.Tree(entry.Name); err == nil { - dumpTreeIntoBloomFilter(changes, subtree, fullPath) - } - } - } - return changes -} - -func updateBloomFilter(changes []string, a, b *object.Tree, treePath string) []string { - aHashes := make(map[string]plumbing.Hash) - for _, entry := range a.Entries { - aHashes[entry.Name] = entry.Hash - } - - for _, entry := range b.Entries { - if aHashes[entry.Name] != entry.Hash { - // File from 'b' didn't exist in 'a', or it has different hash than in 'a' - fullPath := getFullPath(treePath, entry.Name) - if entry.Mode == filemode.Dir { - aTree, _ := a.Tree(entry.Name) - bTree, _ := b.Tree(entry.Name) - if aTree != nil && bTree != nil { - changes = updateBloomFilter(changes, aTree, bTree, fullPath) - changes = append(changes, fullPath) - } else if aTree != nil { - changes = dumpTreeIntoBloomFilter(changes, aTree, fullPath) - } else if bTree != nil { - changes = dumpTreeIntoBloomFilter(changes, bTree, fullPath) - } - } else { - changes = append(changes, fullPath) - } - } - delete(aHashes, entry.Name) - } - - for name := range aHashes { - // File from 'a' is removed in 'b' - changes = append(changes, getFullPath(treePath, name)) - } - - return changes -} - -func createBloomFilter(a, b *object.Tree) *BloomPathFilter { - changes := make([]string, 0, 512) - changes = updateBloomFilter(changes, a, b, "") - bloomFilter := NewBloomPathFilter(len(changes)) - for _, change := range changes { - bloomFilter.Add(change) - } - return bloomFilter -} diff --git a/modules/gitbloom/encoder.go b/modules/gitbloom/encoder.go deleted file mode 100644 index 57eac245597c..000000000000 --- a/modules/gitbloom/encoder.go +++ /dev/null @@ -1,164 +0,0 @@ -package gitbloom - -import ( - "crypto/sha1" - "hash" - "io" - - "gopkg.in/src-d/go-git.v4/plumbing" - "gopkg.in/src-d/go-git.v4/utils/binary" -) - -// Encoder writes MemoryIndex structs to an output stream. -type Encoder struct { - io.Writer - hash hash.Hash -} - -// NewEncoder returns a new stream encoder that writes to w. -func NewEncoder(w io.Writer) *Encoder { - h := sha1.New() - mw := io.MultiWriter(w, h) - return &Encoder{mw, h} -} - -func (e *Encoder) Encode(idx Index) error { - var err error - - // Get all the hashes in the input index - hashes := idx.Hashes() - - // Sort the inout and prepare helper structures we'll need for encoding - fanout, totalBloomSize := e.prepare(idx, hashes) - - chunkSignatures := [][]byte{oidFanoutSignature, oidLookupSignature, bloomIndexesSignature, bloomDataSignature} - chunkSizes := []uint64{4 * 256, uint64(len(hashes)) * 20, uint64(len(hashes)) * 4, totalBloomSize} - - if err = e.encodeFileHeader(len(chunkSignatures)); err != nil { - return err - } - if err = e.encodeChunkHeaders(chunkSignatures, chunkSizes); err != nil { - return err - } - if err = e.encodeFanout(fanout); err != nil { - return err - } - if err = e.encodeOidLookup(hashes); err != nil { - return err - } - if err := e.encodeBloomIndexes(idx, hashes); err != nil { - return err - } - if err := e.encodeBloomData(idx, hashes); err != nil { - return err - } - if err != nil { - return err - } - return e.encodeChecksum() -} - -func (e *Encoder) prepare(idx Index, hashes []plumbing.Hash) (fanout []uint32, totalBloomSize uint64) { - // Sort the hashes and build our index - plumbing.HashesSort(hashes) - fanout = make([]uint32, 256) - for _, hash := range hashes { - fanout[hash[0]]++ - } - - // Convert the fanout to cumulative values - for i := 1; i <= 0xff; i++ { - fanout[i] += fanout[i-1] - } - - // Find out the total size of bloom filters - for _, hash := range hashes { - if bloom, _ := idx.GetBloomByHash(hash); bloom != nil { - totalBloomSize += uint64(len(bloom.Data()) / 8) - break - } - } - - return -} - -func (e *Encoder) encodeFileHeader(chunkCount int) (err error) { - if _, err = e.Write(commitFileSignature); err == nil { - _, err = e.Write([]byte{1, 1, byte(chunkCount), 0}) - } - return -} - -func (e *Encoder) encodeChunkHeaders(chunkSignatures [][]byte, chunkSizes []uint64) (err error) { - // 8 bytes of file header, 12 bytes for each chunk header and 12 byte for terminator - offset := uint64(8 + len(chunkSignatures)*12 + 12) - for i, signature := range chunkSignatures { - if _, err = e.Write(signature); err == nil { - err = binary.WriteUint64(e, offset) - } - if err != nil { - return - } - offset += chunkSizes[i] - } - if _, err = e.Write(lastSignature); err == nil { - err = binary.WriteUint64(e, offset) - } - return -} - -func (e *Encoder) encodeFanout(fanout []uint32) (err error) { - for i := 0; i <= 0xff; i++ { - if err = binary.WriteUint32(e, fanout[i]); err != nil { - return - } - } - return -} - -func (e *Encoder) encodeOidLookup(hashes []plumbing.Hash) (err error) { - for _, hash := range hashes { - if _, err = e.Write(hash[:]); err != nil { - return err - } - } - return -} - -func (e *Encoder) encodeBloomIndexes(idx Index, hashes []plumbing.Hash) (err error) { - currentBloomSize := uint32(0) - for _, hash := range hashes { - if bloom, _ := idx.GetBloomByHash(hash); bloom != nil { - currentBloomSize += uint32(len(bloom.Data()) / 8) - } - if err = binary.WriteUint32(e, currentBloomSize); err != nil { - return - } - } - return -} - -func (e *Encoder) encodeBloomData(idx Index, hashes []plumbing.Hash) (err error) { - if err = binary.WriteUint32(e, 1); err != nil { - return - } - if err = binary.WriteUint32(e, 7); err != nil { - return - } - if err = binary.WriteUint32(e, 10); err != nil { - return - } - for _, hash := range hashes { - if bloom, _ := idx.GetBloomByHash(hash); bloom != nil { - if _, err = e.Write(bloom.Data()); err != nil { - return err - } - } - } - return -} - -func (e *Encoder) encodeChecksum() error { - _, err := e.Write(e.hash.Sum(nil)[:20]) - return err -} diff --git a/modules/gitbloom/fileindex.go b/modules/gitbloom/fileindex.go deleted file mode 100644 index 5f9fd6f1db58..000000000000 --- a/modules/gitbloom/fileindex.go +++ /dev/null @@ -1,196 +0,0 @@ -package gitbloom - -import ( - "bytes" - encbin "encoding/binary" - "errors" - "io" - - "gopkg.in/src-d/go-git.v4/plumbing" - "gopkg.in/src-d/go-git.v4/utils/binary" -) - -var ( - // ErrUnsupportedVersion is returned by OpenFileIndex when the bloom filter - // file version is not supported. - ErrUnsupportedVersion = errors.New("Unsupported version") - // ErrMalformedBloomFilterFile is returned by OpenFileIndex when the bloom - // filter file is corrupted. - ErrMalformedBloomFilterFile = errors.New("Malformed bloom filter file") - - commitFileSignature = []byte{'C', 'G', 'P', 'H'} - oidFanoutSignature = []byte{'O', 'I', 'D', 'F'} - oidLookupSignature = []byte{'O', 'I', 'D', 'L'} - bloomIndexesSignature = []byte{'B', 'I', 'D', 'X'} - bloomDataSignature = []byte{'B', 'D', 'A', 'T'} - lastSignature = []byte{0, 0, 0, 0} -) - -type fileIndex struct { - reader io.ReaderAt - fanout [256]int - oidFanoutOffset int64 - oidLookupOffset int64 - bloomIndexesOffset int64 - bloomDataOffset int64 -} - -// OpenFileIndex opens a serialized commit graph file in the format described at -// https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt -func OpenFileIndex(reader io.ReaderAt) (Index, error) { - fi := &fileIndex{reader: reader} - - if err := fi.verifyFileHeader(); err != nil { - return nil, err - } - if err := fi.readChunkHeaders(); err != nil { - return nil, err - } - if err := fi.readFanout(); err != nil { - return nil, err - } - - return fi, nil -} - -func (fi *fileIndex) verifyFileHeader() error { - // Verify file signature - var signature = make([]byte, 4) - if _, err := fi.reader.ReadAt(signature, 0); err != nil { - return err - } - if !bytes.Equal(signature, commitFileSignature) { - return ErrMalformedBloomFilterFile - } - - // Read and verify the file header - var header = make([]byte, 4) - if _, err := fi.reader.ReadAt(header, 4); err != nil { - return err - } - if header[0] != 1 { - return ErrUnsupportedVersion - } - - return nil -} - -func (fi *fileIndex) readChunkHeaders() error { - var chunkID = make([]byte, 4) - for i := 0; ; i++ { - chunkHeader := io.NewSectionReader(fi.reader, 8+(int64(i)*12), 12) - if _, err := io.ReadAtLeast(chunkHeader, chunkID, 4); err != nil { - return err - } - chunkOffset, err := binary.ReadUint64(chunkHeader) - if err != nil { - return err - } - - if bytes.Equal(chunkID, oidFanoutSignature) { - fi.oidFanoutOffset = int64(chunkOffset) - } else if bytes.Equal(chunkID, oidLookupSignature) { - fi.oidLookupOffset = int64(chunkOffset) - } else if bytes.Equal(chunkID, bloomIndexesSignature) { - fi.bloomIndexesOffset = int64(chunkOffset) - } else if bytes.Equal(chunkID, bloomDataSignature) { - fi.bloomDataOffset = int64(chunkOffset) - } else if bytes.Equal(chunkID, lastSignature) { - break - } - } - - if fi.oidFanoutOffset <= 0 || fi.oidLookupOffset <= 0 || fi.bloomIndexesOffset <= 0 || fi.bloomDataOffset <= 0 { - return ErrMalformedBloomFilterFile - } - - return nil -} - -func (fi *fileIndex) readFanout() error { - fanoutReader := io.NewSectionReader(fi.reader, fi.oidFanoutOffset, 256*4) - for i := 0; i < 256; i++ { - fanoutValue, err := binary.ReadUint32(fanoutReader) - if err != nil { - return err - } - if fanoutValue > 0x7fffffff { - return ErrMalformedBloomFilterFile - } - fi.fanout[i] = int(fanoutValue) - } - return nil -} - -func (fi *fileIndex) getIndexByHash(h plumbing.Hash) (int, error) { - var oid plumbing.Hash - - // Find the hash in the oid lookup table - var low int - if h[0] == 0 { - low = 0 - } else { - low = fi.fanout[h[0]-1] - } - high := fi.fanout[h[0]] - for low < high { - mid := (low + high) >> 1 - offset := fi.oidLookupOffset + int64(mid)*20 - if _, err := fi.reader.ReadAt(oid[:], offset); err != nil { - return 0, err - } - cmp := bytes.Compare(h[:], oid[:]) - if cmp < 0 { - high = mid - } else if cmp == 0 { - return mid, nil - } else { - low = mid + 1 - } - } - - return 0, plumbing.ErrObjectNotFound -} - -func (fi *fileIndex) GetBloomByHash(h plumbing.Hash) (*BloomPathFilter, error) { - idx, err := fi.getIndexByHash(h) - if err != nil { - return nil, err - } - - buf := make([]byte, 4) - prevIndex := uint32(0) - if idx > 0 { - if _, err := fi.reader.ReadAt(buf, fi.bloomIndexesOffset+int64(idx-1)*4); err != nil { - return nil, err - } - prevIndex = encbin.BigEndian.Uint32(buf) - } - if _, err := fi.reader.ReadAt(buf, fi.bloomIndexesOffset+int64(idx)*4); err != nil { - return nil, err - } - nextIndex := encbin.BigEndian.Uint32(buf) - - length := nextIndex - prevIndex - if length == 0 { - return nil, plumbing.ErrObjectNotFound - } - data := make([]byte, length*8) - _, err = fi.reader.ReadAt(data, fi.bloomDataOffset+12+int64(prevIndex)*8) - if err != nil { - return nil, err - } - - return LoadBloomPathFilter(data), nil -} - -func (fi *fileIndex) Hashes() []plumbing.Hash { - hashes := make([]plumbing.Hash, fi.fanout[0xff]) - for i := 0; i < int(fi.fanout[0xff]); i++ { - offset := fi.oidLookupOffset + int64(i)*20 - if n, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil || n < 20 { - return nil - } - } - return hashes -} diff --git a/modules/gitbloom/index.go b/modules/gitbloom/index.go deleted file mode 100644 index a99557ad2284..000000000000 --- a/modules/gitbloom/index.go +++ /dev/null @@ -1,13 +0,0 @@ -package gitbloom - -import ( - "gopkg.in/src-d/go-git.v4/plumbing" -) - -// Index provides methods to access bloom filters individual commits -type Index interface { - // GetBloomByHash gets the bloom path filter for particular commit - GetBloomByHash(h plumbing.Hash) (*BloomPathFilter, error) - // Hashes returns all the hashes that are available in the index - Hashes() []plumbing.Hash -} diff --git a/modules/gitbloom/memoryindex.go b/modules/gitbloom/memoryindex.go deleted file mode 100644 index 1ac5184fe5c6..000000000000 --- a/modules/gitbloom/memoryindex.go +++ /dev/null @@ -1,37 +0,0 @@ -package gitbloom - -import ( - "gopkg.in/src-d/go-git.v4/plumbing" -) - -type MemoryIndex struct { - filterMap map[plumbing.Hash]*BloomPathFilter -} - -// NewMemoryIndex creates in-memory commit graph representation -func NewMemoryIndex() *MemoryIndex { - return &MemoryIndex{make(map[plumbing.Hash]*BloomPathFilter)} -} - -// GetBloomByHash gets the bloom path filter for particular commit -func (mi *MemoryIndex) GetBloomByHash(h plumbing.Hash) (*BloomPathFilter, error) { - if filter, ok := mi.filterMap[h]; ok { - return filter, nil - } - - return nil, plumbing.ErrObjectNotFound -} - -// Add adds new filter to the memory index -func (mi *MemoryIndex) Add(hash plumbing.Hash, filter *BloomPathFilter) { - mi.filterMap[hash] = filter -} - -// Hashes returns all the hashes that are available in the index -func (mi *MemoryIndex) Hashes() []plumbing.Hash { - hashes := make([]plumbing.Hash, 0, len(mi.filterMap)) - for k := range mi.filterMap { - hashes = append(hashes, k) - } - return hashes -} From 6635ebeabb3bbd69eb705526fa515ee3f0fa9fba Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Thu, 27 Jun 2019 17:15:38 +0200 Subject: [PATCH 4/9] Remove old code for building commit graphs --- models/repo.go | 8 -- modules/git/repo_commitgraph.go | 137 -------------------------------- 2 files changed, 145 deletions(-) diff --git a/models/repo.go b/models/repo.go index 2a03a596b86a..1b4ff1f18678 100644 --- a/models/repo.go +++ b/models/repo.go @@ -2206,14 +2206,6 @@ func GitFsck() { func(idx int, bean interface{}) error { repo := bean.(*Repository) repoPath := repo.RepoPath() - // TODO: Move this elsewhere - if gitRepo, err := git.OpenRepository(repoPath); err == nil { - log.Trace("Building commit graph index") - if err := gitRepo.BuildCommitGraph(false); err != nil { - desc := fmt.Sprintf("Failed to build commit graph (%s): %v", repoPath, err) - log.Warn(desc) - } - } log.Trace("Running health check on repository %s", repoPath) if err := git.Fsck(repoPath, setting.Cron.RepoHealthCheck.Timeout, setting.Cron.RepoHealthCheck.Args...); err != nil { desc := fmt.Sprintf("Failed to health check repository (%s): %v", repoPath, err) diff --git a/modules/git/repo_commitgraph.go b/modules/git/repo_commitgraph.go index a65c6418338d..007bfb33deff 100644 --- a/modules/git/repo_commitgraph.go +++ b/modules/git/repo_commitgraph.go @@ -1,14 +1,10 @@ package git import ( - "io/ioutil" - "os" "path" "golang.org/x/exp/mmap" - "gopkg.in/src-d/go-git.v4/plumbing" "gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" - "gopkg.in/src-d/go-git.v4/plumbing/object" cgobject "gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph" ) @@ -26,136 +22,3 @@ func (r *Repository) CommitNodeIndex() (cgobject.CommitNodeIndex, *mmap.ReaderAt return cgobject.NewObjectCommitNodeIndex(r.gogitRepo.Storer), nil } - -// BuildCommitGraph builds the commit-graph index file -func (r *Repository) BuildCommitGraph(withBloomFilters bool) error { - h, err := r.gogitRepo.Head() - if err != nil { - return err - } - - commit, err := r.gogitRepo.CommitObject(h.Hash()) - if err != nil { - return err - } - - // TODO: Incremental updates - idx, err := buildCommitGraph(commit, withBloomFilters) - if err != nil { - return err - } - - f, err := ioutil.TempFile(path.Join(r.Path, "objects", "info"), "commit-graph-tmp") - if err != nil { - return err - } - - tmpName := f.Name() - encoder := commitgraph.NewEncoder(f) - err = encoder.Encode(idx) - f.Close() - if err == nil { - indexPath := path.Join(r.Path, "objects", "info", "commit-graph") - os.Remove(indexPath) - err = os.Rename(tmpName, indexPath) - if err == nil { - return nil - } - } - os.Remove(tmpName) - return err -} - -func buildCommitGraph(c *object.Commit, withBloomFilters bool) (*commitgraph.MemoryIndex, error) { - idx := commitgraph.NewMemoryIndex() - seen := make(map[plumbing.Hash]bool) - // TODO: Unroll the recursion - return idx, addCommitToIndex(idx, c, seen, withBloomFilters) -} - -/* -func dumpTreeIntoBloomFilter(bloomFilter *commitgraph.BloomPathFilter, tree *object.Tree, treePath string) { - for _, entry := range tree.Entries { - fullPath := getFullPath(treePath, entry.Name) - bloomFilter.Add(fullPath) - if entry.Mode == filemode.Dir { - if subtree, err := tree.Tree(entry.Name); err == nil { - dumpTreeIntoBloomFilter(bloomFilter, subtree, fullPath) - } - } - } -} - -func updateBloomFilter(bloomFilter *commitgraph.BloomPathFilter, a, b *object.Tree, treePath string) { - aHashes := make(map[string]plumbing.Hash) - for _, entry := range a.Entries { - aHashes[entry.Name] = entry.Hash - } - - for _, entry := range b.Entries { - if aHashes[entry.Name] != entry.Hash { - // File from 'b' didn't exist in 'a', or it has different hash than in 'a' - fullPath := getFullPath(treePath, entry.Name) - bloomFilter.Add(fullPath) - if entry.Mode == filemode.Dir { - aTree, _ := a.Tree(entry.Name) - bTree, _ := b.Tree(entry.Name) - if aTree != nil && bTree != nil { - updateBloomFilter(bloomFilter, aTree, bTree, fullPath) - } else if aTree != nil { - dumpTreeIntoBloomFilter(bloomFilter, aTree, fullPath) - } else if bTree != nil { - dumpTreeIntoBloomFilter(bloomFilter, bTree, fullPath) - } - } - } - delete(aHashes, entry.Name) - } - - for name := range aHashes { - // File from 'a' is removed in 'b' - bloomFilter.Add(getFullPath(treePath, name)) - } -} - -func createBloomFilter(a, b *object.Tree) *commitgraph.BloomPathFilter { - bloomFilter := commitgraph.NewBloomPathFilter() - updateBloomFilter(bloomFilter, a, b, "") - return bloomFilter -}*/ - -func addCommitToIndex(idx *commitgraph.MemoryIndex, c *object.Commit, seen map[plumbing.Hash]bool, withBloomFilters bool) error { - if seen[c.Hash] { - return nil - } - seen[c.Hash] = true - - // Recursively add parents first - err := c.Parents().ForEach(func(parent *object.Commit) error { - return addCommitToIndex(idx, parent, seen, withBloomFilters) - }) - if err != nil { - return err - } - - // Calculate file difference to first parent commit - /*var bloomFilter *commitgraph.BloomPathFilter - if withBloomFilters && c.NumParents() == 1 { - if parent, err := c.Parent(0); err == nil { - if tree, err := c.Tree(); err == nil { - if parentTree, err := parent.Tree(); err == nil { - bloomFilter = createBloomFilter(parentTree, tree) - } - } - } - }*/ - - // Add this commit if it hasn't been done already - node := &commitgraph.CommitData{ - TreeHash: c.TreeHash, - ParentHashes: c.ParentHashes, - When: c.Committer.When, - } - idx.Add /*WithBloom*/ (c.Hash, node /*, bloomFilter*/) - return nil -} From 33c2254466bfc5c2d95144703440ecbea56c7067 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Thu, 27 Jun 2019 17:16:25 +0200 Subject: [PATCH 5/9] Remove unused function --- modules/git/commit_info.go | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/modules/git/commit_info.go b/modules/git/commit_info.go index 58de2024b207..8417226f8bf8 100644 --- a/modules/git/commit_info.go +++ b/modules/git/commit_info.go @@ -99,16 +99,6 @@ func getCommitTree(c cgobject.CommitNode, treePath string) (*object.Tree, error) return tree, nil } -func getFullPath(treePath, path string) string { - if treePath != "" { - if path != "" { - return treePath + "/" + path - } - return treePath - } - return path -} - func getFileHashes(c cgobject.CommitNode, treePath string, paths []string) (map[string]plumbing.Hash, error) { tree, err := getCommitTree(c, treePath) if err == object.ErrDirectoryNotFound { From 2f153faba314371244d7e8e102fb00333f7e610d Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Thu, 27 Jun 2019 17:22:41 +0200 Subject: [PATCH 6/9] Remove mmap usage --- modules/git/repo_commitgraph.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/git/repo_commitgraph.go b/modules/git/repo_commitgraph.go index 007bfb33deff..cf05e6837f09 100644 --- a/modules/git/repo_commitgraph.go +++ b/modules/git/repo_commitgraph.go @@ -1,18 +1,18 @@ package git import ( + "os" "path" - "golang.org/x/exp/mmap" "gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" cgobject "gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph" ) // CommitNodeIndex returns the index for walking commit graph -func (r *Repository) CommitNodeIndex() (cgobject.CommitNodeIndex, *mmap.ReaderAt) { +func (r *Repository) CommitNodeIndex() (cgobject.CommitNodeIndex, *os.File) { indexPath := path.Join(r.Path, "objects", "info", "commit-graph") - file, err := mmap.Open(indexPath) + file, err := os.Open(indexPath) if err == nil { index, err := commitgraph.OpenFileIndex(file) if err == nil { From 9de6e249bccb4da3fea6dce48b7fcafa32c0d2ff Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Thu, 27 Jun 2019 17:45:09 +0200 Subject: [PATCH 7/9] gofmt --- modules/git/repo_commitgraph.go | 48 ++++++++++++++++----------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/modules/git/repo_commitgraph.go b/modules/git/repo_commitgraph.go index cf05e6837f09..9d26229ad6c7 100644 --- a/modules/git/repo_commitgraph.go +++ b/modules/git/repo_commitgraph.go @@ -1,24 +1,24 @@ -package git - -import ( - "os" - "path" - - "gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" - cgobject "gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph" -) - -// CommitNodeIndex returns the index for walking commit graph -func (r *Repository) CommitNodeIndex() (cgobject.CommitNodeIndex, *os.File) { - indexPath := path.Join(r.Path, "objects", "info", "commit-graph") - - file, err := os.Open(indexPath) - if err == nil { - index, err := commitgraph.OpenFileIndex(file) - if err == nil { - return cgobject.NewGraphCommitNodeIndex(index, r.gogitRepo.Storer), file - } - } - - return cgobject.NewObjectCommitNodeIndex(r.gogitRepo.Storer), nil -} +package git + +import ( + "os" + "path" + + "gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" + cgobject "gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph" +) + +// CommitNodeIndex returns the index for walking commit graph +func (r *Repository) CommitNodeIndex() (cgobject.CommitNodeIndex, *os.File) { + indexPath := path.Join(r.Path, "objects", "info", "commit-graph") + + file, err := os.Open(indexPath) + if err == nil { + index, err := commitgraph.OpenFileIndex(file) + if err == nil { + return cgobject.NewGraphCommitNodeIndex(index, r.gogitRepo.Storer), file + } + } + + return cgobject.NewObjectCommitNodeIndex(r.gogitRepo.Storer), nil +} From 4a7cc0dcf90a2e5dde7470d75cab458f96048cc2 Mon Sep 17 00:00:00 2001 From: Filip Navara Date: Thu, 27 Jun 2019 17:49:54 +0200 Subject: [PATCH 8/9] sort vendor/modules.txt --- vendor/modules.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vendor/modules.txt b/vendor/modules.txt index aa2a11becbfd..969511fe6597 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -429,11 +429,11 @@ gopkg.in/src-d/go-billy.v4/helper/polyfill gopkg.in/src-d/go-git.v4 gopkg.in/src-d/go-git.v4/config gopkg.in/src-d/go-git.v4/plumbing -gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph -gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph gopkg.in/src-d/go-git.v4/plumbing/cache gopkg.in/src-d/go-git.v4/plumbing/filemode +gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph gopkg.in/src-d/go-git.v4/plumbing/object +gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph gopkg.in/src-d/go-git.v4/storage/filesystem gopkg.in/src-d/go-git.v4/internal/revision gopkg.in/src-d/go-git.v4/plumbing/format/gitignore From a9b7776f433f544750979003dca13b32f87b1209 Mon Sep 17 00:00:00 2001 From: Andrew Thornton Date: Sun, 30 Jun 2019 19:27:00 +0100 Subject: [PATCH 9/9] Add copyright header and log commit-graph error --- modules/git/repo_commitgraph.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/modules/git/repo_commitgraph.go b/modules/git/repo_commitgraph.go index 9d26229ad6c7..52263852dc62 100644 --- a/modules/git/repo_commitgraph.go +++ b/modules/git/repo_commitgraph.go @@ -1,9 +1,15 @@ +// Copyright 2019 The Gitea Authors. +// All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + package git import ( "os" "path" + gitealog "code.gitea.io/gitea/modules/log" "gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" cgobject "gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph" ) @@ -14,11 +20,16 @@ func (r *Repository) CommitNodeIndex() (cgobject.CommitNodeIndex, *os.File) { file, err := os.Open(indexPath) if err == nil { - index, err := commitgraph.OpenFileIndex(file) + var index commitgraph.Index + index, err = commitgraph.OpenFileIndex(file) if err == nil { return cgobject.NewGraphCommitNodeIndex(index, r.gogitRepo.Storer), file } } + if !os.IsNotExist(err) { + gitealog.Warn("Unable to read commit-graph for %s: %v", r.Path, err) + } + return cgobject.NewObjectCommitNodeIndex(r.gogitRepo.Storer), nil }