-
Notifications
You must be signed in to change notification settings - Fork 0
/
ProfileHandling.fs
393 lines (342 loc) · 16 KB
/
ProfileHandling.fs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
/// Module containing functions to execute a profile to
/// remove trash files from directories (when permitted by the profile),
/// clean up stale file and directory entries from the database and
/// to introduce new file and directory entries into the database.
[<CompilationRepresentation (CompilationRepresentationFlags.ModuleSuffix)>]
module NikonTheThird.Krystallizer.ProfileHandling
open FSharp.Control
open NikonTheThird.Krystallizer.Configuration
open NikonTheThird.Krystallizer.Database
open System.IO
open System.Security.Cryptography
open System.Text.RegularExpressions
/// Logger for this module.
let rec private logger = getModuleLogger <@ logger @>
/// Contains all information required for handling a specific DirectoryInfo.
type private DirectoryHandlingState private (configuration, database, profile, trashRegex, rootDirectoryInfo, directoryInfo, directoryEntry) =
/// DirectoryInfos of the handled DirectoryInfo that are only computed when required.
let subdirectoryInfos = lazy (
(directoryInfo : DirectoryInfo).EnumerateDirectories ()
|> Seq.sortBy (fun directoryInfo -> directoryInfo.Name.ToLowerInvariant ())
|> Seq.cache
)
/// FileInfos of the handled DirectoryInfo that are only computed when required.
let fileInfos = lazy (
directoryInfo.EnumerateFiles ()
|> Seq.sortBy (fun fileInfo -> fileInfo.Name.ToLowerInvariant ())
|> Seq.cache
)
/// Set of all the subdirectory names of the handled DirectoryInfo that is
/// only computed when required.
let subdirectoryInfoNameSet = lazy (
subdirectoryInfos.Value
|> Seq.map (fun directoryInfo -> directoryInfo.Name)
|> Set.ofSeq
)
/// Cache for subdirectory entries of the handled DirectoryEntry that
/// is filled when they are fetched from the database.
let mutable subdirectoryEntryCache = ValueNone
/// Cache for file entries of the handled DirectoryEntry that is
/// filled when they are fetched from the database.
let mutable fileEntryCache = ValueNone
/// Cache for a map of file names to file entries of the handled
/// DirectoryEntry that is filled when required.
let mutable fileEntryNameCache = ValueNone
/// Creates a handler for the given directory info and directory entry.
new (configuration, database, profile, rootDirectoryInfo, directoryInfo, directoryEntry) =
let trashRegex = Regex configuration.TrashRegex
DirectoryHandlingState (configuration, database, profile, trashRegex, rootDirectoryInfo, directoryInfo, directoryEntry)
/// Returns the global program configuration.
member _.Configuration : Configuration = configuration
/// Returns the database connection.
member _.Database : DatabaseConnection = database
/// Returns the currently executing profile.
member _.Profile : Profile = profile
/// Returns the regular expression for removing trash files.
member _.TrashRegex : Regex = trashRegex
/// The directory info of the directory currently being handled.
member _.DirectoryInfo = directoryInfo
/// The database directory entry of the directory currently being handled.
member _.DirectoryEntry : DirectoryEntry = directoryEntry
/// A lazily computed sequence of all subdirectory infos of the directory
/// currently being handled.
member _.SubdirectoryInfos = subdirectoryInfos.Value
/// A lazily computed sequence of all file infos of the directory
/// currently being handled.
member _.FileInfos = fileInfos.Value
/// A lazily computed set of all the subdirectory names of the directory
/// currently being handled.
member _.SubdirectoryInfoNameSet = subdirectoryInfoNameSet.Value
/// A set of all the file names of the directory currently being handled.
/// Only existing file info names are returned.
member _.FileInfoNameSet =
fileInfos.Value
|> Seq.filter (fun fileInfo -> fileInfo.Exists)
|> Seq.map (fun fileInfo -> fileInfo.Name)
|> Set.ofSeq
/// Creates a handler for the given directory info and directory entry
/// based on the current configuration.
member _.CreateSubdirectoryState (directoryInfo', directoryEntry') =
DirectoryHandlingState (configuration, database, profile, trashRegex, rootDirectoryInfo, directoryInfo', directoryEntry')
/// Trims the path of the root directory parent directory from the given path,
/// which means that the returned path starts at the root directory.
member _.RemoveRootParentPathFrom (path : string) =
(rootDirectoryInfo : DirectoryInfo).Parent.FullName.Length
|> path.Substring
|> fun path -> path.TrimStart ('\\', '/')
/// A list of all subdirectory entries of the directory currently
/// being handled. The list is only fetched once from the database
/// and then cached.
member _.SubdirectoryEntries = async {
match subdirectoryEntryCache with
| ValueSome subdirectoryEntries ->
return subdirectoryEntries
| ValueNone ->
let! subdirectoryEntries =
ValueSome directoryEntry.Id
|> database.GetDirectoriesByParentId
|> AsyncSeq.toListAsync
do subdirectoryEntryCache <- ValueSome subdirectoryEntries
return subdirectoryEntries
}
/// A list of all file entries of the directory currently being
/// handled. The list is only fetched once from the database and
/// then cached.
member _.FileEntries = async {
match fileEntryCache with
| ValueSome fileEntries ->
return fileEntries
| ValueNone ->
let! fileEntries =
directoryEntry.Id
|> database.GetFilesByDirectoryId
|> AsyncSeq.toListAsync
do fileEntryCache <- ValueSome fileEntries
return fileEntries
}
/// A map of all file names to their corresponding file entries of
/// the directory currently being handled. The map is only fetched
/// once from the database (if the FileEntries have not been accessed)
/// and then cached.
member this.FileEntryNameMap = async {
match fileEntryNameCache with
| ValueSome fileEntryNameMap ->
return fileEntryNameMap
| ValueNone ->
let! fileEntries = this.FileEntries
let fileEntryNameMap =
fileEntries
|> Seq.map (fun ({ Name = name } as fileEntry) -> name, fileEntry)
|> Map.ofSeq
do fileEntryNameCache <- ValueSome fileEntryNameMap
return fileEntryNameMap
}
/// Computes the SHA1 hash of the given file info and returns it.
let private computeHash (fileInfo : FileInfo) = async {
let! token = Async.CancellationToken
try use fileStream = fileInfo.OpenRead ()
use algorithm = SHA1.Create ()
return! algorithm.ComputeHashAsync (fileStream, token)
|> Async.AwaitTask
|> Async.Map ValueSome
with :? FileNotFoundException ->
// The file no longer exists, no hash could be computed.
return ValueNone
}
/// Returns the directory info of the given root directory name.
let private getRootDirectoryInfo configuration rootDirectoryName =
Path.Combine (
executingAssemblyDirectoryInfo.FullName,
configuration.RootDirectoryParentPath,
rootDirectoryName
)
|> DirectoryInfo
/// Fetches the directory entry with the given parent id and name from the
/// database or creates it if it doesn't exist.
let private getOrAddDirectoryEntry (database : DatabaseConnection) parentId name = async {
match! database.TryGetDirectoryByParentIdAndName (parentId, name) with
| ValueSome directoryEntry ->
return directoryEntry
| ValueNone ->
return! database.AddDirectory {
Id = 0
ParentId = parentId
Name = name
}
}
/// Checks if any files in the currently handled directory match the trash
/// regex, and if they do, deletes them.
let private removeTrashFromDirectory (state : DirectoryHandlingState) = async {
do logger.Debug (
"Removing trash from directory {path}",
state.RemoveRootParentPathFrom state.DirectoryInfo.FullName
)
for fileInfo in state.FileInfos do
try if state.TrashRegex.IsMatch fileInfo.Name then
do logger.Information (
"Removing trash file at {path}",
state.RemoveRootParentPathFrom fileInfo.FullName
)
do fileInfo.Delete ()
with ex ->
do logger.Debug (
ex,
"Could not check for trash file at {path}",
state.RemoveRootParentPathFrom fileInfo.FullName
)
}
/// Checks if any files in the currently handled directory are completely
/// empty, and if they are, deletes them.
let private removeEmptyFilesFromDirectory (state : DirectoryHandlingState) = async {
do logger.Debug (
"Removing empty files from directory {path}",
state.RemoveRootParentPathFrom state.DirectoryInfo.FullName
)
for fileInfo in state.FileInfos do
try if fileInfo.Length = 0L then
do logger.Information (
"Removing empty file at {path}",
state.RemoveRootParentPathFrom fileInfo.FullName
)
do fileInfo.Delete ()
with ex ->
do logger.Debug (
ex,
"Could not check for empty file at {path}",
state.RemoveRootParentPathFrom fileInfo.FullName
)
}
/// Checks if there are file entries in the database for the currently handled
/// directory that no longer exist on disk. Removes them.
let private cleanupDatabaseFileEntries (state : DirectoryHandlingState) = async {
do logger.Debug (
"Cleaning up database file entries of directory {path}",
state.RemoveRootParentPathFrom state.DirectoryInfo.FullName
)
for { Id = id; Name = name } in state.FileEntries do
if state.FileInfoNameSet |> Set.contains name |> not then
do logger.Information (
"Removing file entry {name} from directory {path} (id {directoryId})",
name,
state.RemoveRootParentPathFrom state.DirectoryInfo.FullName,
state.DirectoryEntry.Id
)
do! state.Database.RemoveFile id
}
/// Checks if there are directory entries in the database for the currently
/// handled directory that no longer exist on disk. Removes them, delete
/// cascade will take care of any subdirectories and files.
let private cleanupDatabaseDirectoryEntries (state : DirectoryHandlingState) = async {
do logger.Debug (
"Cleaning up database directory entries of directory {path}",
state.RemoveRootParentPathFrom state.DirectoryInfo.FullName
)
for { Id = id; Name = name } in state.SubdirectoryEntries do
if state.SubdirectoryInfoNameSet |> Set.contains name |> not then
do logger.Information (
"Removing subdirectory entry {name} from directory {path} (id {directoryId})",
name,
state.RemoveRootParentPathFrom state.DirectoryInfo.FullName,
state.DirectoryEntry.Id
)
do! state.Database.RemoveDirectory id
}
/// Checks if there are file infos in the currently handled directory
/// that have no entries in the database. Hashes them.
let private getFileInfosToHash (state : DirectoryHandlingState) = asyncSeq {
do logger.Debug (
"Getting file infos to hash in directory {path}",
state.RemoveRootParentPathFrom state.DirectoryInfo.FullName
)
for fileInfo in state.FileInfos do
try match! state.FileEntryNameMap |> Async.Map (Map.tryFind fileInfo.Name) with
| Some { Length = length } when not state.Profile.CheckFileSizes || length = fileInfo.Length ->
// The file in its current state is stored or
// we do not check for file size changes.
do ()
| Some { Id = id } ->
// The file is stored, but it is no longer current.
// Get rid of the old entry and rehash it.
do! state.Database.RemoveFile id
yield struct (state, fileInfo)
| None ->
// The file has not been stored yet.
yield struct (state, fileInfo)
with :? FileNotFoundException ->
// The length of a no longer existing file was accessed.
do ()
}
/// Handles the directory represented by the given state and returns all file
/// infos that require hashing.
/// If the executing profile has trash removal or empty file removal enabled,
/// first trash and/or empty files will be checked for and removed. Then stale
/// file and subdirectory entries will be removed from the database, then
/// returns all files of the current directory that need to be hashed and finally
/// returns all files that need to be hashed of the subdirectories as well.
let rec private handleDirectory (state : DirectoryHandlingState) = asyncSeq {
do logger.Information (
"Handling path {path}",
state.RemoveRootParentPathFrom state.DirectoryInfo.FullName
)
if state.Profile.RemoveTrash then
do! removeTrashFromDirectory state
if state.Profile.RemoveEmptyFiles then
do! removeEmptyFilesFromDirectory state
do! cleanupDatabaseFileEntries state
do! cleanupDatabaseDirectoryEntries state
yield! getFileInfosToHash state
for subdirectoryInfo in state.SubdirectoryInfos do
let! subdirectoryEntry = getOrAddDirectoryEntry state.Database (ValueSome state.DirectoryEntry.Id) subdirectoryInfo.Name
yield! state.CreateSubdirectoryState (subdirectoryInfo, subdirectoryEntry)
|> handleDirectory
}
/// Handles all root directories in the given profile. Performs trash and empty
/// file removal if enabled and stale database entry cleanup and returns all
/// files from all subdirectories that require hashing into the database.
let private handleRootDirectories configuration database profile = asyncSeq {
do logger.Information "Handling all root directories of the profile"
for rootDirectoryName in profile.RootDirectories do
let rootDirectoryInfo = getRootDirectoryInfo configuration rootDirectoryName
if rootDirectoryInfo.Exists then
let! rootDirectoryEntry = getOrAddDirectoryEntry database ValueNone rootDirectoryInfo.Name
yield! DirectoryHandlingState (
configuration,
database,
profile,
rootDirectoryInfo,
rootDirectoryInfo,
rootDirectoryEntry
)
|> handleDirectory
else
do logger.Information (
"Skipping handling of non-existent root directory {name}",
rootDirectoryName
)
}
/// Hashes the given file info and stores it into the database.
let private hashAndStoreFileInfo struct (state : DirectoryHandlingState, fileInfo : FileInfo) = async {
if fileInfo.Exists then
do logger.Information (
"Handling file {path}",
state.RemoveRootParentPathFrom fileInfo.FullName
)
match! computeHash fileInfo with
| ValueSome hash ->
let fileEntry = {
Id = 0
DirectoryId = state.DirectoryEntry.Id
Name = fileInfo.Name
Length = fileInfo.Length
Hash = hash
}
do! state.Database.AddFile fileEntry |> Async.Ignore
| ValueNone -> do ()
}
/// Handles all root directories as specified in the given profile,
/// this means removing trash and empty files if enabled, removing stale
/// database entries and hashing new files into the database with
/// the configured degree of parallelism.
let handleProfile configuration database profile =
handleRootDirectories configuration database profile
|> AsyncSeq.iterAsyncParallelThrottled configuration.DegreeOfParallelism hashAndStoreFileInfo