-
Notifications
You must be signed in to change notification settings - Fork 22
/
differ.go
300 lines (254 loc) · 7.69 KB
/
differ.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
/*
Copyright 2016 The gta AUTHORS. All rights reserved.
Use of this source code is governed by the Apache 2 license that can be found
in the LICENSE file.
*/
package gta
import (
"bufio"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"strings"
"sync"
)
// A Differ implements provides methods that return values to understand the
// directories and files that have changed.
// and the dirs they in which occur.
type Differ interface {
// Diff returns a set of absolute pathed directories that have files that
// have been modified.
Diff() (map[string]Directory, error)
// DiffFiles returns a map whose keys are absolute files paths. A map value
// is true when the file exists.
DiffFiles() (map[string]bool, error)
}
// GitDifferOption is an option function used to modify a git differ
type GitDifferOption func(*git)
// SetUseMergeCommit sets the useMergeCommit field on a git differ
func SetUseMergeCommit(useMergeCommit bool) GitDifferOption {
return func(gd *git) {
gd.useMergeCommit = useMergeCommit
}
}
// SetBaseBranch sets the baseBranch field on a git differ
func SetBaseBranch(baseBranch string) GitDifferOption {
return func(gd *git) {
gd.baseBranch = baseBranch
}
}
// NewGitDiffer returns a Differ that determines differences using git.
func NewGitDiffer(opts ...GitDifferOption) Differ {
g := &git{
useMergeCommit: false,
baseBranch: "origin/master",
}
for _, opt := range opts {
opt(g)
}
return &differ{
diff: g.diff,
}
}
// NewFileDiffer returns a Differ that operates on a list of absolute paths of
// changed files.
func NewFileDiffer(files []string) Differ {
m := make(map[string]struct{}, len(files))
for _, v := range files {
m[v] = struct{}{}
}
return &differ{
diff: func() (map[string]struct{}, error) { return m, nil },
}
}
type differ struct {
diff func() (map[string]struct{}, error)
}
// git implements the Differ interface using a git version control method.
type git struct {
baseBranch string
useMergeCommit bool
onceDiff sync.Once
changedFiles map[string]struct{}
diffErr error
}
// A Directory describes changes to a directory and its contents.
type Directory struct {
Exists bool
Files []string
}
// Diff returns a set of changed directories. The keys of the returned map are
// absolute paths.
func (d *differ) Diff() (map[string]Directory, error) {
files, err := d.diff()
if err != nil {
return nil, err
}
existsDirs := make(map[string]Directory, len(files))
for abs := range files {
absdir := filepath.Dir(abs)
dir, ok := existsDirs[absdir]
if !ok {
dir.Exists = exists(absdir)
}
fn := filepath.Base(abs)
dir.Files = append(dir.Files, fn)
existsDirs[absdir] = dir
}
return existsDirs, nil
}
// DiffFiles returns a set of changed files. The keys of the returned map are
// absolute paths. The map values indicate whether or not the file exists: a
// false value means the file was deleted.
func (d *differ) DiffFiles() (map[string]bool, error) {
files, err := d.diff()
if err != nil {
return nil, err
}
existsFiles := map[string]bool{}
for abs := range files {
existsFiles[abs] = exists(abs)
}
return existsFiles, nil
}
func (g *git) getMergeParents() (parent1 string, rightwardParents []string, err error) {
out, err := exec.Command("git", "log", "-1", "--pretty=format:%p").Output()
if err != nil {
return
}
parents := strings.TrimSpace(string(out))
parentSplit := strings.Split(parents, " ")
// for merge commits, parents will include both values
if len(parentSplit) >= 2 {
parent1 = parentSplit[0]
rightwardParents = parentSplit[1:]
return
}
// for squash-merge/rebase commits, get the most recent merge commit hash and use as left parent
out, err = exec.Command("git", "log", "-1", "--merges", "--pretty=format:%h").Output()
if err != nil {
return
}
parent1 = strings.TrimSpace(string(out))
rightwardParents = []string{"HEAD"}
return
}
// diff returns a set of changed files.
func (g *git) diff() (map[string]struct{}, error) {
g.onceDiff.Do(func() {
files, err := func() (map[string]struct{}, error) {
// We get the root of the repository to build our full path.
out, err := exec.Command("git", "rev-parse", "--show-toplevel").Output()
if err != nil {
return nil, err
}
root := strings.TrimSpace(string(out))
// get the revision from which HEAD was branched from g.baseBranch.
parent1, err := g.branchPointOf("HEAD")
if err != nil {
return nil, err
}
// If the branch point is unknown, fall back to using the base branch. In
// most cases, this will be fine, but results in a corner case when base
// branch has been merged into the branch since branch was created. In
// that case, the differences from the base branch and the most recent
// merge will not be considered.
if parent1 == "" {
parent1 = g.baseBranch
}
rightwardParents := []string{"HEAD"}
if g.useMergeCommit {
parent1, rightwardParents, err = g.getMergeParents()
if err != nil {
return nil, err
}
}
files := make(map[string]struct{})
for _, parent2 := range rightwardParents {
// get the names of all affected files without doing rename detection.
cmd := exec.Command("git", "diff", fmt.Sprintf("%s...%s", parent1, parent2), "--name-only", "--no-renames")
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, err
}
if err := cmd.Start(); err != nil {
return nil, err
}
changedPaths, err := diffPaths(root, stdout)
if err != nil {
return nil, err
}
for path := range changedPaths {
files[path] = struct{}{}
}
err = cmd.Wait()
if err != nil {
return nil, err
}
}
return files, nil
}()
if err != nil {
g.diffErr = err
return
}
g.changedFiles = files
})
return g.changedFiles, g.diffErr
}
// diffPaths returns the path that have changed.
func diffPaths(root string, r io.Reader) (map[string]struct{}, error) {
paths := make(map[string]struct{})
scanner := bufio.NewScanner(r)
for scanner.Scan() {
path := scanner.Text()
// We build our full absolute file path.
full, err := filepath.Abs(filepath.Join(root, path))
if err != nil {
return nil, err
}
paths[full] = struct{}{}
}
return paths, scanner.Err()
}
func exists(path string) bool {
_, err := os.Stat(path)
return !os.IsNotExist(err)
}
// branchPointOf will return the oldest commit on g.baseBranch that is in
// branch. If no such commit exists (e.g. branch is a shallow clone or branch
// does not share history with g.baseBranch), then an empty string is returned.
func (g *git) branchPointOf(branch string) (string, error) {
// Use --topo-order to ensure graph order is respected.
//
// Use --parents so each line will list the commit and its parents.
//
// Use --reverse so the first commit in the output will be the oldest commit.
// branch that is not on the base branch.
//
// Do NOT use --first-parent, because the branch may have had merges from
// other branches into it, and we want the oldest possible branch point
// from the base branch in branch.
//
// Do NOT try using git merge-base at all. It would not deliver the right
// result when g.baseBranch had been merged into branch sometime after branch
// was created from g.baseBranch. In such a case, the merge base would be the
// the merge commit where g.baseBranch was merged into branch.
out, err := exec.Command("git", "rev-list", "--topo-order", "--parents", "--reverse", branch, "^"+g.baseBranch).Output()
if err != nil {
return "", nil
}
lines := strings.Split(string(out), "\n")
firstCommit := lines[0]
ancestors := strings.Fields(firstCommit)
if len(ancestors) < 2 {
return "", nil
}
branchPoint := ancestors[1]
return branchPoint, nil
}
type fileDiffer struct {
changedFiles map[string]struct{}
}