From 88622032b9e59fa4cdd5d698cc97044c26f59170 Mon Sep 17 00:00:00 2001 From: Thomas <71355143+thomas694@users.noreply.github.com> Date: Mon, 3 Oct 2022 15:56:56 +0200 Subject: [PATCH] Fix issue Some embedded Tumblr videos have been wrongly saved - Due to a bug since version 2.6.0, embedded videos from normal and hidden Tumblr blogs weren't saved correctly. - At the next startup video links from the blog database are checked against the filesystem. In case of a defect video file, the file and the database entry are removed, so that latest on the next forced scan it can be downloaded again. --- .../Controllers/ManagerController.cs | 12 ++-- .../Crawler/TumblrBlogCrawler.cs | 2 +- .../Crawler/TumblrHiddenCrawler.cs | 2 +- .../TumblThree.Domain/Models/Files/Files.cs | 66 +++++++++++++++++-- 4 files changed, 69 insertions(+), 13 deletions(-) diff --git a/src/TumblThree/TumblThree.Applications/Controllers/ManagerController.cs b/src/TumblThree/TumblThree.Applications/Controllers/ManagerController.cs index 7e2ae149..639bd1d9 100644 --- a/src/TumblThree/TumblThree.Applications/Controllers/ManagerController.cs +++ b/src/TumblThree/TumblThree.Applications/Controllers/ManagerController.cs @@ -361,7 +361,7 @@ private async Task LoadAllDatabasesAsync() if (Directory.Exists(path)) { - IReadOnlyList databases = await GetIFilesAsync(path); + IReadOnlyList databases = await GetIFilesAsync(path, false); foreach (IFiles database in databases) { _managerService.AddDatabase(database); @@ -397,7 +397,7 @@ private async Task LoadArchiveAsync() { if (SkipFolder(collection.Id, folder, _shellService.Settings.LoadArchive)) continue; - IReadOnlyList archiveDatabases = await GetIFilesAsync(folder); + IReadOnlyList archiveDatabases = await GetIFilesAsync(folder, true); foreach (IFiles archiveDB in archiveDatabases) { _managerService.AddArchive(archiveDB); @@ -432,10 +432,10 @@ private bool SkipFolder(int currentCollectionId, string folder, bool loadArchive return !loadArchives; } - private Task> GetIFilesAsync(string directory) => Task.Factory.StartNew( - () => GetIFilesCore(directory), CancellationToken.None, TaskCreationOptions.None, TaskScheduler.FromCurrentSynchronizationContext()); + private Task> GetIFilesAsync(string directory, bool isArchive) => Task.Factory.StartNew( + () => GetIFilesCore(directory, isArchive), CancellationToken.None, TaskCreationOptions.None, TaskScheduler.FromCurrentSynchronizationContext()); - private IReadOnlyList GetIFilesCore(string directory) + private IReadOnlyList GetIFilesCore(string directory, bool isArchive) { Logger.Verbose("ManagerController:GetFilesCore Start"); @@ -451,7 +451,7 @@ private IReadOnlyList GetIFilesCore(string directory) //TODO: Refactor try { - IFiles database = Files.Load(filename); + IFiles database = Files.Load(filename, isArchive); if (_shellService.Settings.LoadAllDatabases) { databases.Add(database); diff --git a/src/TumblThree/TumblThree.Applications/Crawler/TumblrBlogCrawler.cs b/src/TumblThree/TumblThree.Applications/Crawler/TumblrBlogCrawler.cs index f930a638..2a429c34 100644 --- a/src/TumblThree/TumblThree.Applications/Crawler/TumblrBlogCrawler.cs +++ b/src/TumblThree/TumblThree.Applications/Crawler/TumblrBlogCrawler.cs @@ -470,7 +470,7 @@ private void AddToJsonQueue(CrawlerData addToList) private void AddToJsonQueue(string[] urls, Post post) { if (urls == null || urls.Length == 0) return; - AddToJsonQueue(new CrawlerData(FileName(urls[0]), post)); + AddToJsonQueue(new CrawlerData(Path.ChangeExtension(FileName(urls[0]), ".json"), post)); } private async Task AddUrlsToDownloadListAsync(TumblrApiJson document) diff --git a/src/TumblThree/TumblThree.Applications/Crawler/TumblrHiddenCrawler.cs b/src/TumblThree/TumblThree.Applications/Crawler/TumblrHiddenCrawler.cs index 22ce10aa..67f482d5 100644 --- a/src/TumblThree/TumblThree.Applications/Crawler/TumblrHiddenCrawler.cs +++ b/src/TumblThree/TumblThree.Applications/Crawler/TumblrHiddenCrawler.cs @@ -455,7 +455,7 @@ private void AddToJsonQueue(CrawlerData addToList) private void AddToJsonQueue(string[] urls, Post post) { if (urls == null || urls.Length == 0) return; - AddToJsonQueue(new CrawlerData(FileName(urls[0]), post)); + AddToJsonQueue(new CrawlerData(Path.ChangeExtension(FileName(urls[0]), ".json"), post)); } private bool CheckIfContainsTaggedPost(Post post) diff --git a/src/TumblThree/TumblThree.Domain/Models/Files/Files.cs b/src/TumblThree/TumblThree.Domain/Models/Files/Files.cs index 97a74efd..f79365b5 100644 --- a/src/TumblThree/TumblThree.Domain/Models/Files/Files.cs +++ b/src/TumblThree/TumblThree.Domain/Models/Files/Files.cs @@ -50,6 +50,9 @@ public Files(string name, string location) [DataMember] public BlogTypes BlogType { get; set; } + [DataMember] + public string Updates { get; set; } + [DataMember] public string Version { get; set; } @@ -102,12 +105,13 @@ public virtual bool CheckIfFileExistsInDB(string filenameUrl) } } - public static IFiles Load(string fileLocation) + public static IFiles Load(string fileLocation, bool isArchive = false) { try { - //isDirty = false; - return LoadCore(fileLocation); + IFiles file = LoadCore(fileLocation, isArchive); + if (!isArchive && file.IsDirty) file.Save(); + return file; } catch (Exception ex) when (ex is SerializationException || ex is FileNotFoundException || ex is IOException) { @@ -116,7 +120,7 @@ public static IFiles Load(string fileLocation) } } - private static IFiles LoadCore(string fileLocation) + private static IFiles LoadCore(string fileLocation, bool isArchive) { using (var stream = new FileStream(fileLocation, FileMode.Open, FileAccess.Read, FileShare.Read)) { @@ -124,6 +128,7 @@ private static IFiles LoadCore(string fileLocation) var file = (Files)serializer.ReadObject(stream); if (file.entries != null) file.entries = new HashSet(file.entries, new FileEntryComparer()); + if (!isArchive) DoUpdates(file); if (file.Version == "1") { for (int i = 0; i < file.links.Count; i++) @@ -202,6 +207,57 @@ private static IFiles LoadCore(string fileLocation) } } + private static void DoUpdates(Files file) + { + // T01 + if (!(file.Updates ?? "").Contains("T01") && + (file.BlogType == BlogTypes.tumblr || file.BlogType == BlogTypes.tmblrpriv) && + new string[] { "1", "2", "3", "4", "5" }.Contains(file.Version) && + Directory.Exists(file.Location)) + { + if (new string[] { "4", "5" }.Contains(file.Version)) + { + foreach (var entry in file.entries.ToArray()) + { + if (!entry.Filename.ToLower().EndsWith(".mp4")) { continue; } + var filepath = Path.Combine(file.Location.Replace("\\Index", ""), file.Name, entry.Filename); + if (!File.Exists(filepath)) { continue; } + + var fi = new FileInfo(filepath); + var fileLength = fi.Length; + if (fi.Length <= 50 * 1024 && + fi.CreationTime > new DateTime(2022, 4, 1)) + { + bool redo = false; + if (fi.Length < 8) + { + redo = true; + } + else + { + using (var fs = File.OpenRead(filepath)) + { + byte[] ba = new byte[8]; + fs.Read(ba, 0, 8); + + if (ba[4] == 0x66 && ba[5] == 0x74 && ba[6] == 0x79 && ba[7] == 0x70) { continue; } + if (ba[0] == 0x7B && ba[1] == 0x0D && ba[2] == 0x0A) { redo = true; } + } + } + + if (redo) + { + File.Delete(filepath); + file.entries.Remove(entry); + } + } + } + } + file.Updates = (string.IsNullOrEmpty(file.Updates) ? "" : "|") + "T01"; + file.isDirty = true; + } + } + public bool Save() { lock (_lockList) @@ -230,7 +286,7 @@ public bool Save() } catch (Exception ex) { - Logger.Error("Files:Save: {0}", ex); + Logger.Error("Files:Save: {0}: {1}", Name, ex); throw; } }