-
Notifications
You must be signed in to change notification settings - Fork 0
/
GlobSearch.cs
141 lines (115 loc) · 4.24 KB
/
GlobSearch.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
using DotNet.Globbing;
using System.Collections.Concurrent;
namespace Searcher;
internal static class GlobSearch
{
private static readonly EnumerationOptions diroptions = new() { IgnoreInaccessible = true };
/// <summary>
/// Use globs to find files recursively
/// </summary>
public static string[] FindFiles(string path, IReadOnlyList<Glob> globs, CancellationToken token)
{
var files = new List<string>(100);
foreach (var g in globs) {
FindFilesRecursivelyInternal(ref files, path, g, token);
}
return files
.OrderBy(s => s)
.Distinct()
.ToArray();
}
/// <summary>
/// Inner search routine, to save need for list reallocations
/// </summary>
private static void FindFilesRecursivelyInternal(ref List<string> files, string path, Glob g, CancellationToken token)
{
token.ThrowIfCancellationRequested();
foreach (var file in Directory.GetFiles(path)) {
if (g.IsMatch(Path.GetFileName(file))) {
files.Add(file);
}
}
foreach (var dir in Directory.GetDirectories(path, "*", diroptions)) {
FindFilesRecursivelyInternal(ref files, dir, g, token);
}
}
/// <summary>
/// Use globs to find files recursively, in parallel
/// </summary>
public static string[] ParallelFindFiles(string path, IReadOnlyList<Glob> globs, int parallelthreads, Action<int>? progress, CancellationToken cancellationtoken)
{
if (parallelthreads <= 1) {
return FindFiles(path, globs, cancellationtoken);
}
var count = 0;
var results = new ConcurrentBag<List<string>>();
var currentbuffer = new ConcurrentBag<string> { path };
var nextbuffer = new ConcurrentBag<string>();
// we need 2 buffers, so we can swap them. One is iterated in parallel, the other is build up for the next iteration
while (!currentbuffer.IsEmpty) {
cancellationtoken.ThrowIfCancellationRequested();
count += currentbuffer.Count;
progress?.Invoke(count);
_ = Parallel.ForEach(currentbuffer, new ParallelOptions { MaxDegreeOfParallelism = parallelthreads, CancellationToken = cancellationtoken }, (folder) => {
cancellationtoken.ThrowIfCancellationRequested();
// add subdirectories to the queue, to be processed in parallel on the next batch
foreach (var dir in Directory.GetDirectories(folder, "*", diroptions)) {
nextbuffer.Add(dir);
}
// now find the files that match the globs
var candidates = Directory.GetFiles(folder);
List<string>? found = null;
foreach (var c in candidates) {
cancellationtoken.ThrowIfCancellationRequested();
var size = candidates.Length > 10 ? 10 : candidates.Length;
var filename = Path.GetFileName(c);
foreach (var g in globs) {
if (g.IsMatch(filename)) {
found ??= new List<string>(size);
found.Add(c);
break;
}
}
}
if (found?.Count > 0) {
results.Add(found);
}
});
// if no new folders were added, we are done
if (nextbuffer.IsEmpty) {
break;
}
currentbuffer.Clear(); // clear the processed items
// swap the bags, so currentbuffer is now ready for the next iteration
(nextbuffer, currentbuffer) = (currentbuffer, nextbuffer);
}
// flatten and sort the results
return results.SelectMany(s => s)
.OrderBy(s => s)
.Distinct()
.ToArray();
}
/// <summary>
/// OLD AND SIMPLE ROUTINE. Search for files matching the given patterns (in parallel) in the given folder
/// </summary>
public static string[] RecursiveFindFiles(string path, IReadOnlyList<string> outerpatterns, int parallelthreads, CancellationToken token)
{
var searchoptions = new EnumerationOptions { RecurseSubdirectories = true, IgnoreInaccessible = true };
var results = new ConcurrentBag<string[]>();
_ = Parallel.ForEach(outerpatterns, new ParallelOptions { MaxDegreeOfParallelism = parallelthreads, CancellationToken = token }, pattern => {
if (string.IsNullOrEmpty(pattern)) {
return;
}
var files = Directory.GetFiles(path, pattern, searchoptions);
if (files.Length > 0) {
results.Add(files);
}
});
// merge the results from each task into single sorted array
return results
.SelectMany(x => x)
.OrderBy(x => x)
.Distinct()
.ToArray();
}
}