From d11375407f462e32ac3ccacb03e7b0861bbaaa39 Mon Sep 17 00:00:00 2001 From: Johannes Meyer zum Alten Borgloh Date: Mon, 1 May 2017 18:19:56 +0200 Subject: [PATCH] Adds support for 'liked by' downloads - Adds support for downloading liked photo and video posts. --- src/TumblThree/SharedAssemblyInfo.cs | 4 +- .../Controllers/ManagerController.cs | 9 +- .../Downloader/Downloader.cs | 4 +- .../Downloader/DownloaderFactory.cs | 2 + .../Downloader/TumblrDownloader.cs | 2 +- .../Downloader/TumblrLikedByDownloader.cs | 258 ++++++++++++++++++ .../TumblThree.Applications/FileDownloader.cs | 226 +++++++++++++++ .../Properties/Resources.Designer.cs | 13 +- .../Properties/Resources.resx | 5 +- .../Services/SharedCookieService.cs | 58 ++++ .../Services/Validator.cs | 11 - .../ThrottledStream.cs | 157 ----------- .../TumblThree.Applications.csproj | 4 +- .../ViewModels/SettingsViewModel.cs | 64 +++-- .../TumblThree.Domain/Models/Blog.cs | 25 +- .../TumblThree.Domain/Models/BlogTypes.cs | 3 +- .../TumblThree.Domain/Models/Validator.cs | 17 ++ .../TumblThree.Domain.csproj | 1 + .../Views/SettingsView.xaml | 2 +- 19 files changed, 657 insertions(+), 208 deletions(-) create mode 100644 src/TumblThree/TumblThree.Applications/Downloader/TumblrLikedByDownloader.cs create mode 100644 src/TumblThree/TumblThree.Applications/FileDownloader.cs create mode 100644 src/TumblThree/TumblThree.Applications/Services/SharedCookieService.cs delete mode 100644 src/TumblThree/TumblThree.Applications/Services/Validator.cs create mode 100644 src/TumblThree/TumblThree.Domain/Models/Validator.cs diff --git a/src/TumblThree/SharedAssemblyInfo.cs b/src/TumblThree/SharedAssemblyInfo.cs index 90a646e..b56aad9 100644 --- a/src/TumblThree/SharedAssemblyInfo.cs +++ b/src/TumblThree/SharedAssemblyInfo.cs @@ -12,5 +12,5 @@ [assembly: ComVisible(false)] [assembly: NeutralResourcesLanguage("en-US", UltimateResourceFallbackLocation.MainAssembly)] -[assembly: AssemblyVersion("1.0.4.42")] -[assembly: AssemblyFileVersion("1.0.4.42")] +[assembly: AssemblyVersion("1.0.4.43")] +[assembly: AssemblyFileVersion("1.0.4.43")] diff --git a/src/TumblThree/TumblThree.Applications/Controllers/ManagerController.cs b/src/TumblThree/TumblThree.Applications/Controllers/ManagerController.cs index 63c0b01..fc1a7ab 100644 --- a/src/TumblThree/TumblThree.Applications/Controllers/ManagerController.cs +++ b/src/TumblThree/TumblThree.Applications/Controllers/ManagerController.cs @@ -335,9 +335,12 @@ private async Task AddBlogAsync(string blogUrl) { blogUrl = crawlerService.NewBlogUrl; } - + IBlog blog; // FIXME: Dependency - var blog = new Blog(blogUrl, Path.Combine(shellService.Settings.DownloadLocation, "Index"), BlogTypes.tumblr); + if (Validator.IsValidTumblrUrl(blogUrl)) + blog = new Blog(blogUrl, Path.Combine(shellService.Settings.DownloadLocation, "Index"), BlogTypes.tumblr); + else + blog = new Blog(blogUrl, Path.Combine(shellService.Settings.DownloadLocation, "Index"), BlogTypes.tlb); TransferGlobalSettingsToBlog(blog); IDownloader downloader = DownloaderFactory.GetDownloader(blog.BlogType, shellService, crawlerService, blog); await downloader.IsBlogOnlineAsync(); @@ -391,7 +394,7 @@ private void OnClipboardContentChanged(object sender, EventArgs e) private async Task AddBlogBatchedAsync(string[] urls) { var semaphoreSlim = new SemaphoreSlim(15); - foreach (string url in urls.Where(Validator.IsValidTumblrUrl)) + foreach (string url in urls.Where(url => Validator.IsValidTumblrUrl(url) || Validator.IsValidTumblrLikedByUrl(url))) { await semaphoreSlim.WaitAsync(); await AddBlogAsync(url); diff --git a/src/TumblThree/TumblThree.Applications/Downloader/Downloader.cs b/src/TumblThree/TumblThree.Applications/Downloader/Downloader.cs index 6777760..0709e42 100644 --- a/src/TumblThree/TumblThree.Applications/Downloader/Downloader.cs +++ b/src/TumblThree/TumblThree.Applications/Downloader/Downloader.cs @@ -65,6 +65,7 @@ protected HttpWebRequest CreateWebReqeust(string url) request.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate; request.ReadWriteTimeout = shellService.Settings.TimeOut * 1000; request.Timeout = -1; + request.CookieContainer = SharedCookieService.GetUriCookieContainer(new Uri("https://www.tumblr.com/")); ServicePointManager.DefaultConnectionLimit = 400; if (!string.IsNullOrEmpty(shellService.Settings.ProxyHost)) { @@ -211,7 +212,8 @@ protected virtual async Task DownloadBinaryFile(string fileLocation, strin { try { - return await ThrottledStream.DownloadFileWithResumeAsync(url, fileLocation, + var fileDownloader = new FileDownloader(); + return await fileDownloader.DownloadFileWithResumeAsync(url, fileLocation, shellService.Settings, ct); } catch (IOException ex) when ((ex.HResult & 0xFFFF) == 0x27 || (ex.HResult & 0xFFFF) == 0x70) diff --git a/src/TumblThree/TumblThree.Applications/Downloader/DownloaderFactory.cs b/src/TumblThree/TumblThree.Applications/Downloader/DownloaderFactory.cs index b9f2b41..29c77c2 100644 --- a/src/TumblThree/TumblThree.Applications/Downloader/DownloaderFactory.cs +++ b/src/TumblThree/TumblThree.Applications/Downloader/DownloaderFactory.cs @@ -37,6 +37,8 @@ public IDownloader GetDownloader(BlogTypes blogtype, IShellService shellService, { case BlogTypes.tumblr: return new TumblrDownloader(shellService, crawlerService, blog); + case BlogTypes.tlb: + return new TumblrLikedByDownloader(shellService, crawlerService, blog); default: throw new ArgumentException("Website is not supported!", "blogType"); } diff --git a/src/TumblThree/TumblThree.Applications/Downloader/TumblrDownloader.cs b/src/TumblThree/TumblThree.Applications/Downloader/TumblrDownloader.cs index 9fcce47..0a1f04d 100644 --- a/src/TumblThree/TumblThree.Applications/Downloader/TumblrDownloader.cs +++ b/src/TumblThree/TumblThree.Applications/Downloader/TumblrDownloader.cs @@ -306,7 +306,7 @@ private async Task> GetUrlsAsync(IProgress } numberOfPostsCrawled += 50; - UpdateProgressQueueInformation(progress, Resources.ProgressGetUrl, numberOfPostsCrawled, totalPosts); + UpdateProgressQueueInformation(progress, Resources.ProgressGetUrlLong, numberOfPostsCrawled, totalPosts); })()); } await Task.WhenAll(trackedTasks); diff --git a/src/TumblThree/TumblThree.Applications/Downloader/TumblrLikedByDownloader.cs b/src/TumblThree/TumblThree.Applications/Downloader/TumblrLikedByDownloader.cs new file mode 100644 index 0000000..1a75b3c --- /dev/null +++ b/src/TumblThree/TumblThree.Applications/Downloader/TumblrLikedByDownloader.cs @@ -0,0 +1,258 @@ +using System; +using System.Collections.Generic; +using System.ComponentModel.Composition; +using System.Globalization; +using System.IO; +using System.Linq; +using System.Net; +using System.Text; +using System.Text.RegularExpressions; +using System.Threading; +using System.Threading.Tasks; +using System.Xml.Linq; + +using TumblThree.Applications.DataModels; +using TumblThree.Applications.Properties; +using TumblThree.Applications.Services; +using TumblThree.Domain; +using TumblThree.Domain.Models; + +namespace TumblThree.Applications.Downloader +{ + [Export(typeof(IDownloader))] + [ExportMetadata("BlogType", BlogTypes.tumblr)] + public class TumblrLikedByDownloader : Downloader, IDownloader + { + private readonly IBlog blog; + private readonly ICrawlerService crawlerService; + + private readonly IShellService shellService; + private int numberOfPagesCrawled = 0; + + public TumblrLikedByDownloader(IShellService shellService, ICrawlerService crawlerService, IBlog blog) + : base(shellService, crawlerService, blog) + { + this.shellService = shellService; + this.crawlerService = crawlerService; + this.blog = blog; + } + + public async Task Crawl(IProgress progress, CancellationToken ct, PauseToken pt) + { + Logger.Verbose("TumblrLikedByDownloader.Crawl:Start"); + + Task grabber = GetUrlsAsync(progress, ct, pt); + Task downloader = DownloadBlogAsync(progress, ct, pt); + + await grabber; + + UpdateProgressQueueInformation(progress, Resources.ProgressUniqueDownloads); + blog.DuplicatePhotos = DetermineDuplicates(PostTypes.Photo); + blog.DuplicateVideos = DetermineDuplicates(PostTypes.Video); + blog.DuplicateAudios = DetermineDuplicates(PostTypes.Audio); + blog.TotalCount = (blog.TotalCount - blog.DuplicatePhotos - blog.DuplicateAudios - blog.DuplicateVideos); + + await downloader; + + if (!ct.IsCancellationRequested) + { + blog.LastCompleteCrawl = DateTime.Now; + } + + blog.Save(); + + UpdateProgressQueueInformation(progress, ""); + } + + private string ResizeTumblrImageUrl(string imageUrl) + { + var sb = new StringBuilder(imageUrl); + return sb + .Replace("_1280", "_" + shellService.Settings.ImageSize.ToString()) + .Replace("_540", "_" + shellService.Settings.ImageSize.ToString()) + .Replace("_500", "_" + shellService.Settings.ImageSize.ToString()) + .Replace("_400", "_" + shellService.Settings.ImageSize.ToString()) + .Replace("_250", "_" + shellService.Settings.ImageSize.ToString()) + .Replace("_100", "_" + shellService.Settings.ImageSize.ToString()) + .Replace("_75sq", "_" + shellService.Settings.ImageSize.ToString()) + .ToString(); + } + + /// + /// Return the url without the size and type suffix (e.g. + /// https://68.media.tumblr.com/51a99943f4aa7068b6fd9a6b36e4961b/tumblr_mnj6m9Huml1qat3lvo1). + /// + protected override string GetCoreImageUrl(string url) + { + return url.Split('_')[0] + "_" + url.Split('_')[1]; + } + + protected override bool CheckIfFileExistsInDirectory(string url) + { + string fileName = url.Split('/').Last(); + Monitor.Enter(lockObjectDirectory); + string blogPath = blog.DownloadLocation(); + if (Directory.EnumerateFiles(blogPath).Any(file => file.Contains(fileName))) + { + Monitor.Exit(lockObjectDirectory); + return true; + } + Monitor.Exit(lockObjectDirectory); + return false; + } + + private int DetermineDuplicates(PostTypes type) + { + return statisticsBag.Where(url => url.Item1.Equals(type)) + .GroupBy(url => url.Item2) + .Where(g => g.Count() > 1) + .Sum(g => g.Count() - 1); + } + + protected override bool CheckIfFileExistsInDB(string url) + { + string fileName = url.Split('/').Last(); + Monitor.Enter(lockObjectDb); + if (files.Links.Contains(fileName)) + { + Monitor.Exit(lockObjectDb); + return true; + } + Monitor.Exit(lockObjectDb); + return false; + } + + private async Task GetUrlsAsync(IProgress progress, CancellationToken ct, PauseToken pt) + { + var semaphoreSlim = new SemaphoreSlim(shellService.Settings.ParallelScans); + var trackedTasks = new List(); + + foreach (int crawlerNumber in Enumerable.Range(0, shellService.Settings.ParallelScans)) + { + await semaphoreSlim.WaitAsync(); + + trackedTasks.Add(new Func(async () => + { + try + { + string document = await RequestDataAsync(blog.Url + "/page/" + crawlerNumber); + + await AddUrlsToDownloadList(document, progress, crawlerNumber, ct, pt); + } + catch (WebException) + { + } + finally + { + semaphoreSlim.Release(); + } + })()); + } + await Task.WhenAll(trackedTasks); + + producerConsumerCollection.CompleteAdding(); + + if (!ct.IsCancellationRequested) + { + UpdateBlogStats(); + } + } + + private async Task AddUrlsToDownloadList(string document, IProgress progress, int crawlerNumber, CancellationToken ct, PauseToken pt) + { + if (ct.IsCancellationRequested) + { + return; + } + if (pt.IsPaused) + { + pt.WaitWhilePausedWithResponseAsyc().Wait(); + } + + var tags = new List(); + if (!string.IsNullOrWhiteSpace(blog.Tags)) + { + tags = blog.Tags.Split(',').Select(x => x.Trim()).ToList(); + } + + AddPhotoUrlToDownloadList(document, tags); + AddVideoUrlToDownloadList(document, tags); + + Interlocked.Increment(ref numberOfPagesCrawled); + UpdateProgressQueueInformation(progress, Resources.ProgressGetUrlShort, numberOfPagesCrawled); + crawlerNumber += shellService.Settings.ParallelScans; + document = await RequestDataAsync(blog.Url + "/page/" + crawlerNumber); + if (document.Contains("No posts to display.")) + return; + await AddUrlsToDownloadList(document, progress, crawlerNumber, ct, pt); + } + + private void AddPhotoUrlToDownloadList(string document, IList tags) + { + if (blog.DownloadPhoto) + { + var regex = new Regex("data-big-photo=\"(.*)\" "); + foreach (Match match in regex.Matches(document)) + { + string imageUrl = match.Groups[1].Value; + if (blog.SkipGif && imageUrl.EndsWith(".gif")) + { + continue; + } + imageUrl = ResizeTumblrImageUrl(imageUrl); + // FIXME: postID + AddToDownloadList(Tuple.Create(PostTypes.Photo, imageUrl, Guid.NewGuid().ToString("N"))); + } + } + } + + private void AddVideoUrlToDownloadList(string document, IList tags) + { + if (blog.DownloadVideo) + { + var regex = new Regex(" url.Item1.Equals(PostTypes.Photo)); + blog.Videos = statisticsBag.Count(url => url.Item1.Equals(PostTypes.Video)); + blog.Audios = statisticsBag.Count(url => url.Item1.Equals(PostTypes.Audio)); + blog.Texts = statisticsBag.Count(url => url.Item1.Equals(PostTypes.Text)); + blog.Conversations = statisticsBag.Count(url => url.Item1.Equals(PostTypes.Conversation)); + blog.Quotes = statisticsBag.Count(url => url.Item1.Equals(PostTypes.Quote)); + blog.NumberOfLinks = statisticsBag.Count(url => url.Item1.Equals(PostTypes.Link)); + blog.PhotoMetas = statisticsBag.Count(url => url.Item1.Equals(PostTypes.PhotoMeta)); + blog.VideoMetas = statisticsBag.Count(url => url.Item1.Equals(PostTypes.VideoMeta)); + blog.AudioMetas = statisticsBag.Count(url => url.Item1.Equals(PostTypes.AudioMeta)); + } + + private void AddToDownloadList(Tuple addToList) + { + if (statisticsBag.All(download => download.Item3 != addToList.Item3)) + { + statisticsBag.Add(addToList); + producerConsumerCollection.Add(addToList); + } + } + } +} diff --git a/src/TumblThree/TumblThree.Applications/FileDownloader.cs b/src/TumblThree/TumblThree.Applications/FileDownloader.cs new file mode 100644 index 0000000..851822a --- /dev/null +++ b/src/TumblThree/TumblThree.Applications/FileDownloader.cs @@ -0,0 +1,226 @@ +using System; +using System.Diagnostics; +using System.IO; +using System.Net; +using System.Threading; +using System.Threading.Tasks; + +using TumblThree.Applications.Properties; +using TumblThree.Applications.Services; + +namespace TumblThree.Applications +{ + class FileDownloader + { + public event EventHandler Completed; + public event EventHandler ProgressChanged; + + private HttpWebRequest CreateWebReqeust(string url, AppSettings settings) + { + var request = (HttpWebRequest)WebRequest.Create(url); + request.Method = "GET"; + request.ProtocolVersion = HttpVersion.Version11; + request.UserAgent = + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"; + request.AllowAutoRedirect = true; + request.KeepAlive = true; + request.Pipelined = true; + request.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate; + request.ReadWriteTimeout = settings.TimeOut * 1000; + request.Timeout = -1; + request.CookieContainer = SharedCookieService.GetUriCookieContainer(new Uri("https://www.tumblr.com/")); + ServicePointManager.DefaultConnectionLimit = 400; + if (!string.IsNullOrEmpty(settings.ProxyHost)) + { + request.Proxy = new WebProxy(settings.ProxyHost, int.Parse(settings.ProxyPort)); + } + else + { + request.Proxy = null; + } + return request; + } + + public async Task ReadFromURLIntoStream(string url, AppSettings settings) + { + HttpWebRequest request = CreateWebReqeust(url, settings); + + var response = await request.GetResponseAsync() as HttpWebResponse; + if (HttpStatusCode.OK == response.StatusCode) + { + Stream responseStream = response.GetResponseStream(); + return new ThrottledStream(responseStream, settings.Bandwidth * 1024); + } + else + { + return null; + } + } + + private async Task CheckDownloadSizeAsync(string url, AppSettings settings, CancellationToken ct) + { + HttpWebRequest request = CreateWebReqeust(url, settings); + ct.Register(() => request.Abort()); + + using (WebResponse response = await request.GetResponseAsync()) + { + return response.ContentLength; + } + } + + // FIXME: Needs a complete rewrite. Also a append/cache function for resuming incomplete files on the disk. + // Should be in separated class with support for events for downloadspeed, is resumable file?, etc. + // Should check if file is complete, else it will trigger an WebException -- 416 requested range not satisfiable at every request + public async Task DownloadFileWithResumeAsync(string url, string destinationPath, AppSettings settings, CancellationToken ct) + { + long totalBytesReceived = 0; + var attemptCount = 0; + + if (File.Exists(destinationPath)) + { + var fileInfo = new FileInfo(destinationPath); + totalBytesReceived = fileInfo.Length; + if (totalBytesReceived >= await CheckDownloadSizeAsync(url, settings, ct)) + return true; + } + FileMode fileMode = totalBytesReceived > 0 ? FileMode.Append : FileMode.Create; + + using (FileStream fileStream = File.Open(destinationPath, fileMode, FileAccess.Write, FileShare.Read)) + { + while (true) + { + attemptCount += 1; + + if (attemptCount > settings.MaxNumberOfRetries) + { + return false; + } + + try + { + HttpWebRequest request = CreateWebReqeust(url, settings); + ct.Register(() => request.Abort()); + request.AddRange(totalBytesReceived); + + long totalBytesToReceive = 0; + using (WebResponse response = await request.GetResponseAsync()) + { + totalBytesToReceive = totalBytesReceived + response.ContentLength; + + using (Stream responseStream = response.GetResponseStream()) + { + using (var throttledStream = new ThrottledStream(responseStream, settings.Bandwidth * 1024)) + { + var buffer = new byte[4096]; + int bytesRead = throttledStream.Read(buffer, 0, buffer.Length); + Stopwatch sw = Stopwatch.StartNew(); + + while (bytesRead > 0) + { + fileStream.Write(buffer, 0, bytesRead); + totalBytesReceived += bytesRead; + bytesRead = throttledStream.Read(buffer, 0, buffer.Length); + float currentSpeed = totalBytesReceived / (float)sw.Elapsed.TotalSeconds; + + OnProgressChanged(new DownloadProgressChangedEventArgs(totalBytesReceived, totalBytesToReceive, (long)currentSpeed)); + } + } + } + } + if (totalBytesReceived >= totalBytesToReceive) + { + break; + } + } + catch (IOException ioException) + { + // file in use + long win32ErrorCode = ioException.HResult & 0xFFFF; + if (win32ErrorCode == 0x21 || win32ErrorCode == 0x20) + { + return false; + } + // retry (IOException: Received an unexpected EOF or 0 bytes from the transport stream) + } + catch (WebException webException) + { + if (webException.Status == WebExceptionStatus.ConnectionClosed) + { + // retry + } + else + { + throw; + } + } + } + return true; + } + } + + public static bool SaveStreamToDisk(Stream input, string destinationFileName) + { + // Open the destination file + using (var stream = new FileStream(destinationFileName, FileMode.OpenOrCreate, FileAccess.Write)) + { + // Create a 4K buffer to chunk the file + var buf = new byte[4096]; + int BytesRead; + // Read the chunk of the web response into the buffer + while (0 < (BytesRead = input.Read(buf, 0, buf.Length))) + { + // Write the chunk from the buffer to the file + stream.Write(buf, 0, BytesRead); + } + } + return true; + } + + protected void OnProgressChanged(DownloadProgressChangedEventArgs e) + { + var handler = ProgressChanged; + if (handler != null) + { + handler(this, e); + } + } + + protected void OnCompleted(EventArgs e) + { + var handler = Completed; + if (handler != null) + { + handler(this, e); + } + } + } + + public class DownloadProgressChangedEventArgs : EventArgs + { + public DownloadProgressChangedEventArgs(long totalReceived, long fileSize, long currentSpeed) + { + BytesReceived = totalReceived; + TotalBytesToReceive = fileSize; + CurrentSpeed = currentSpeed; + } + + public long BytesReceived { get; private set; } + public long TotalBytesToReceive { get; private set; } + public float ProgressPercentage + { + get + { + return ((float)BytesReceived / (float)TotalBytesToReceive) * 100; + } + } + public float CurrentSpeed { get; private set; } // in bytes + public TimeSpan TimeLeft + { + get + { + var bytesRemainingtoBeReceived = TotalBytesToReceive - BytesReceived; + return TimeSpan.FromSeconds(bytesRemainingtoBeReceived / CurrentSpeed); + } + } + } +} diff --git a/src/TumblThree/TumblThree.Applications/Properties/Resources.Designer.cs b/src/TumblThree/TumblThree.Applications/Properties/Resources.Designer.cs index 08793e2..f15c736 100644 --- a/src/TumblThree/TumblThree.Applications/Properties/Resources.Designer.cs +++ b/src/TumblThree/TumblThree.Applications/Properties/Resources.Designer.cs @@ -495,9 +495,18 @@ public static string ProgressDownloadImage { /// /// Looks up a localized string similar to Evaluated {0} tumblr post urls out of {1} total posts.. /// - public static string ProgressGetUrl { + public static string ProgressGetUrlLong { get { - return ResourceManager.GetString("ProgressGetUrl", resourceCulture); + return ResourceManager.GetString("ProgressGetUrlLong", resourceCulture); + } + } + + /// + /// Looks up a localized string similar to Evaluated {0} tumblr blog sites.. + /// + public static string ProgressGetUrlShort { + get { + return ResourceManager.GetString("ProgressGetUrlShort", resourceCulture); } } diff --git a/src/TumblThree/TumblThree.Applications/Properties/Resources.resx b/src/TumblThree/TumblThree.Applications/Properties/Resources.resx index 0f55ec7..9619e95 100644 --- a/src/TumblThree/TumblThree.Applications/Properties/Resources.resx +++ b/src/TumblThree/TumblThree.Applications/Properties/Resources.resx @@ -261,9 +261,12 @@ Downloading {0}. - + Evaluated {0} tumblr post urls out of {1} total posts. + + Evaluated {0} tumblr blog sites. + Calculating unique downloads, removing duplicates ... diff --git a/src/TumblThree/TumblThree.Applications/Services/SharedCookieService.cs b/src/TumblThree/TumblThree.Applications/Services/SharedCookieService.cs new file mode 100644 index 0000000..bd27a02 --- /dev/null +++ b/src/TumblThree/TumblThree.Applications/Services/SharedCookieService.cs @@ -0,0 +1,58 @@ +using System; +using System.Net; +using System.Runtime.InteropServices; +using System.Text; + +namespace TumblThree.Applications.Services +{ + public class SharedCookieService + { + private const int InternetCookieHttponly = 0x2000; + + [DllImport("wininet.dll", SetLastError = true)] + public static extern bool InternetGetCookieEx( + string url, + string cookieName, + StringBuilder cookieData, + ref int size, + int dwFlags, + IntPtr lpReserved); + + /// + /// Gets the URI cookie container. + /// + /// The URI. + /// + public static CookieContainer GetUriCookieContainer(Uri uri) + { + CookieContainer cookies = null; + // Determine the size of the cookie + int datasize = 8192 * 16; + var cookieData = new StringBuilder(datasize); + if (!InternetGetCookieEx(uri.ToString(), null, cookieData, ref datasize, InternetCookieHttponly, IntPtr.Zero)) + { + if (datasize < 0) + { + return null; + } + // Allocate stringbuilder large enough to hold the cookie + cookieData = new StringBuilder(datasize); + if (!InternetGetCookieEx( + uri.ToString(), + null, cookieData, + ref datasize, + InternetCookieHttponly, + IntPtr.Zero)) + { + return null; + } + } + if (cookieData.Length > 0) + { + cookies = new CookieContainer(); + cookies.SetCookies(uri, cookieData.ToString().Replace(';', ',')); + } + return cookies; + } + } +} diff --git a/src/TumblThree/TumblThree.Applications/Services/Validator.cs b/src/TumblThree/TumblThree.Applications/Services/Validator.cs deleted file mode 100644 index a6304c9..0000000 --- a/src/TumblThree/TumblThree.Applications/Services/Validator.cs +++ /dev/null @@ -1,11 +0,0 @@ -namespace TumblThree.Applications.Services -{ - public static class Validator - { - public static bool IsValidTumblrUrl(string url) - { - return url != null && url.Length > 18 && url.Contains(".tumblr.com") && - (url.StartsWith("http://", true, null) || url.StartsWith("https://", true, null)); - } - } -} diff --git a/src/TumblThree/TumblThree.Applications/ThrottledStream.cs b/src/TumblThree/TumblThree.Applications/ThrottledStream.cs index a91f429..6d31ac7 100644 --- a/src/TumblThree/TumblThree.Applications/ThrottledStream.cs +++ b/src/TumblThree/TumblThree.Applications/ThrottledStream.cs @@ -15,9 +15,7 @@ public class ThrottledStream : Stream private readonly Stream parent; readonly System.Timers.Timer resettimer; readonly AutoResetEvent wh = new AutoResetEvent(true); - private int maxBytesPerSecond; - private int processed; /// @@ -139,160 +137,5 @@ public override void Write(byte[] buffer, int offset, int count) Throttle(count); parent.Write(buffer, offset, count); } - - private static HttpWebRequest CreateWebReqeust(string url, AppSettings settings) - { - var request = (HttpWebRequest)WebRequest.Create(url); - request.Method = "GET"; - request.ProtocolVersion = HttpVersion.Version11; - request.UserAgent = - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"; - request.AllowAutoRedirect = true; - request.KeepAlive = true; - request.Pipelined = true; - request.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate; - request.ReadWriteTimeout = settings.TimeOut * 1000; - request.Timeout = -1; - ServicePointManager.DefaultConnectionLimit = 400; - if (!string.IsNullOrEmpty(settings.ProxyHost)) - { - request.Proxy = new WebProxy(settings.ProxyHost, int.Parse(settings.ProxyPort)); - } - else - { - request.Proxy = null; - } - return request; - } - - public static async Task ReadFromURLIntoStream(string url, AppSettings settings) - { - HttpWebRequest request = CreateWebReqeust(url, settings); - - var response = await request.GetResponseAsync() as HttpWebResponse; - if (HttpStatusCode.OK == response.StatusCode) - { - Stream responseStream = response.GetResponseStream(); - return new ThrottledStream(responseStream, settings.Bandwidth * 1024); - } - else - { - return null; - } - } - - private static async Task CheckDownloadSizeAsync(string url, AppSettings settings, CancellationToken ct) - { - HttpWebRequest request = CreateWebReqeust(url, settings); - ct.Register(() => request.Abort()); - - using (WebResponse response = await request.GetResponseAsync()) - { - return response.ContentLength; - } - } - - // FIXME: Needs a complete rewrite. Also a append/cache function for resuming incomplete files on the disk. - // Should be in separated class with support for events for downloadspeed, is resumable file?, etc. - // Should check if file is complete, else it will trigger an WebException -- 416 requested range not satisfiable at every request - public static async Task DownloadFileWithResumeAsync(string url, string destinationPath, AppSettings settings, CancellationToken ct) - { - long totalBytesReceived = 0; - var attemptCount = 0; - - if (File.Exists(destinationPath)) - { - var fileInfo = new FileInfo(destinationPath); - totalBytesReceived = fileInfo.Length; - if (totalBytesReceived >= await CheckDownloadSizeAsync(url, settings, ct)) - return true; - } - FileMode fileMode = totalBytesReceived > 0 ? FileMode.Append : FileMode.Create; - - using (FileStream fileStream = File.Open(destinationPath, fileMode, FileAccess.Write, FileShare.Read)) - { - while (true) - { - attemptCount += 1; - - if (attemptCount > settings.MaxNumberOfRetries) - { - return false; - } - - try - { - HttpWebRequest request = CreateWebReqeust(url, settings); - ct.Register(() => request.Abort()); - request.AddRange(totalBytesReceived); - - long totalBytesToReceive = 0; - using (WebResponse response = await request.GetResponseAsync()) - { - totalBytesToReceive = totalBytesReceived + response.ContentLength; - - using (Stream responseStream = response.GetResponseStream()) - { - using (var throttledStream = new ThrottledStream(responseStream, settings.Bandwidth * 1024)) - { - var buffer = new byte[4096]; - int bytesRead = throttledStream.Read(buffer, 0, buffer.Length); - while (bytesRead > 0) - { - fileStream.Write(buffer, 0, bytesRead); - totalBytesReceived += bytesRead; - bytesRead = throttledStream.Read(buffer, 0, buffer.Length); - } - } - } - } - if (totalBytesReceived >= totalBytesToReceive) - { - break; - } - } - catch (IOException ioException) - { - // file in use - long win32ErrorCode = ioException.HResult & 0xFFFF; - if (win32ErrorCode == 0x21 || win32ErrorCode == 0x20) - { - return false; - } - // retry (IOException: Received an unexpected EOF or 0 bytes from the transport stream) - } - catch (WebException webException) - { - if (webException.Status == WebExceptionStatus.ConnectionClosed) - { - // retry - } - else - { - throw; - } - } - } - return true; - } - } - - public static bool SaveStreamToDisk(Stream input, string destinationFileName) - { - // Open the destination file - using (var stream = new FileStream(destinationFileName, FileMode.OpenOrCreate, FileAccess.Write)) - { - // Create a 4K buffer to chunk the file - var buf = new byte[4096]; - int BytesRead; - // Read the chunk of the web response into the buffer - while (0 < (BytesRead = input.Read(buf, 0, buf.Length))) - { - // Write the chunk from the buffer to the file - stream.Write(buf, 0, BytesRead); - } - } - return true; - } } } diff --git a/src/TumblThree/TumblThree.Applications/TumblThree.Applications.csproj b/src/TumblThree/TumblThree.Applications/TumblThree.Applications.csproj index c1d906b..eccbc0b 100644 --- a/src/TumblThree/TumblThree.Applications/TumblThree.Applications.csproj +++ b/src/TumblThree/TumblThree.Applications/TumblThree.Applications.csproj @@ -76,6 +76,8 @@ + + @@ -105,9 +107,9 @@ + - diff --git a/src/TumblThree/TumblThree.Applications/ViewModels/SettingsViewModel.cs b/src/TumblThree/TumblThree.Applications/ViewModels/SettingsViewModel.cs index 673801d..01d8335 100644 --- a/src/TumblThree/TumblThree.Applications/ViewModels/SettingsViewModel.cs +++ b/src/TumblThree/TumblThree.Applications/ViewModels/SettingsViewModel.cs @@ -1,7 +1,6 @@ using System; using System.ComponentModel; using System.ComponentModel.Composition; -using System.Text.RegularExpressions; using System.Threading; using System.Waf.Applications; using System.Windows.Input; @@ -442,38 +441,57 @@ private void Authenticate() { try { - ShellService.OAuthManager["consumer_key"] = ApiKey; - ShellService.OAuthManager["consumer_secret"] = SecretKey; - OAuthResponse requestToken = - ShellService.OAuthManager.AcquireRequestToken(settings.RequestTokenUrl, "POST"); - string url = settings.AuthorizeUrl + @"?oauth_token=" + ShellService.OAuthManager["token"]; + var url = @"https://www.tumblr.com/login"; + ShellService.Settings.OAuthCallbackUrl = "https://www.tumblr.com/dashboard"; AuthenticateViewModel authenticateViewModel = authenticateViewModelFactory.CreateExport().Value; authenticateViewModel.AddUrl(url); authenticateViewModel.ShowDialog(ShellService.ShellView); - string oauthTokenUrl = authenticateViewModel.GetUrl(); - - var regex = new Regex("oauth_verifier=(.*)"); - string oauthVerifer = regex.Match(oauthTokenUrl).Groups[1].ToString(); - - //FIXME: 401 (Unauthorized): "oauth_signature does not match expected value" - OAuthResponse accessToken = - ShellService.OAuthManager.AcquireAccessToken(settings.AccessTokenUrl, "POST", oauthVerifer); - - regex = new Regex("oauth_token=(.*)&oauth_token_secret"); - OAuthToken = regex.Match(accessToken.AllText).Groups[1].ToString(); - - regex = new Regex("oauth_token_secret=(.*)"); - OAuthTokenSecret = regex.Match(accessToken.AllText).Groups[1].ToString(); - - ShellService.OAuthManager["token"] = OAuthToken; - ShellService.OAuthManager["token_secret"] = OAuthTokenSecret; } catch (System.Net.WebException ex) { Logger.Error("SettingsViewModel:Authenticate: {0}", ex); ShellService.ShowError(ex, Resources.AuthenticationFailure, ex.Message); + return; } + + // OAuth1.0a authentication implementation for the Tumblr Api v2 + // + //try + //{ + // ShellService.OAuthManager["consumer_key"] = ApiKey; + // ShellService.OAuthManager["consumer_secret"] = SecretKey; + // OAuthResponse requestToken = + // ShellService.OAuthManager.AcquireRequestToken(settings.RequestTokenUrl, "POST"); + // var url = settings.AuthorizeUrl + @"?oauth_token=" + ShellService.OAuthManager["token"]; + + // var authenticateViewModel = authenticateViewModelFactory.CreateExport().Value; + // authenticateViewModel.AddUrl(url); + // authenticateViewModel.ShowDialog(ShellService.ShellView); + // string oauthTokenUrl = authenticateViewModel.GetUrl(); + + // Regex regex = new Regex("oauth_verifier=(.*)"); + // string oauthVerifer = regex.Match(oauthTokenUrl).Groups[1].ToString(); + + // //FIXME: Sometimes works, sometimes not: 401 (Unauthorized): "oauth_signature does not match expected value" + // OAuthResponse accessToken = + // ShellService.OAuthManager.AcquireAccessToken(settings.AccessTokenUrl, "POST", oauthVerifer); + + // regex = new Regex("oauth_token=(.*)&oauth_token_secret"); + // OAuthToken = regex.Match(accessToken.AllText).Groups[1].ToString(); + + // regex = new Regex("oauth_token_secret=(.*)"); + // OAuthTokenSecret = regex.Match(accessToken.AllText).Groups[1].ToString(); + + // ShellService.OAuthManager["token"] = OAuthToken; + // ShellService.OAuthManager["token_secret"] = OAuthTokenSecret; + //} + //catch (System.Net.WebException ex) + //{ + // Logger.Error("SettingsViewModel:Authenticate: {0}", ex); + // ShellService.ShowError(ex, Resources.AuthenticationFailure, ex.Message); + // return; + //} } public void Load() diff --git a/src/TumblThree/TumblThree.Domain/Models/Blog.cs b/src/TumblThree/TumblThree.Domain/Models/Blog.cs index 4e2d9ed..98393d4 100644 --- a/src/TumblThree/TumblThree.Domain/Models/Blog.cs +++ b/src/TumblThree/TumblThree.Domain/Models/Blog.cs @@ -1,6 +1,5 @@ using System; using System.Collections.Generic; -using System.Diagnostics.Eventing.Reader; using System.IO; using System.Linq; using System.Runtime.Serialization; @@ -77,7 +76,7 @@ public Blog(string url, string location, BlogTypes blogType) { Url = url; Url = ExtractUrl(); - Name = ExtractSubDomain(); + Name = ExtractName(); BlogType = blogType; ChildId = Path.Combine(location, Name + "_files." + blogType); Location = location; @@ -608,7 +607,7 @@ public IBlog Load(string fileLocation) var serializer = new DataContractJsonSerializer(GetType()); var blog = (Blog)serializer.ReadObject(stream); blog.Location = Path.Combine((Directory.GetParent(fileLocation).FullName)); - blog.ChildId = Path.Combine(blog.Location, blog.Name + "_files.tumblr"); + blog.ChildId = Path.Combine(blog.Location, blog.Name + "_files." + blog.BlogType); return blog; } } @@ -684,9 +683,27 @@ protected virtual string ExtractSubDomain() return null; } + protected virtual string ExtractName() + { + if (Validator.IsValidTumblrUrl(Url)) + return ExtractSubDomain(); + if (Validator.IsValidTumblrLikedByUrl(Url)) + return Url.Split('/')[5]; + return string.Empty; + } + protected virtual string ExtractUrl() { - return ("https://" + ExtractSubDomain() + ".tumblr.com/"); + if (!Url.Contains("www.tumblr.com")) + { + return ("https://" + ExtractSubDomain() + ".tumblr.com/"); + } + else + { + int index = Url.Split('/')[5].Length; + var lengthOfUrl = 32; + return Url.Substring(0,index + lengthOfUrl); + } } [OnDeserialized] diff --git a/src/TumblThree/TumblThree.Domain/Models/BlogTypes.cs b/src/TumblThree/TumblThree.Domain/Models/BlogTypes.cs index f404e46..c365a7a 100644 --- a/src/TumblThree/TumblThree.Domain/Models/BlogTypes.cs +++ b/src/TumblThree/TumblThree.Domain/Models/BlogTypes.cs @@ -4,6 +4,7 @@ public enum BlogTypes { tumblr, instagram, - twitter + twitter, + tlb } } diff --git a/src/TumblThree/TumblThree.Domain/Models/Validator.cs b/src/TumblThree/TumblThree.Domain/Models/Validator.cs new file mode 100644 index 0000000..d9fa37a --- /dev/null +++ b/src/TumblThree/TumblThree.Domain/Models/Validator.cs @@ -0,0 +1,17 @@ +namespace TumblThree.Domain.Models +{ + public static class Validator + { + public static bool IsValidTumblrUrl(string url) + { + return url != null && url.Length > 18 && url.Contains(".tumblr.com") && !url.Contains("www.tumblr.com") && + (url.StartsWith("http://", true, null) || url.StartsWith("https://", true, null)); + } + + public static bool IsValidTumblrLikedByUrl(string url) + { + return url != null && url.Length > 31 && url.Contains("www.tumblr.com/liked/by/") && + (url.StartsWith("http://", true, null) || url.StartsWith("https://", true, null)); + } + } +} diff --git a/src/TumblThree/TumblThree.Domain/TumblThree.Domain.csproj b/src/TumblThree/TumblThree.Domain/TumblThree.Domain.csproj index b3a055f..773d871 100644 --- a/src/TumblThree/TumblThree.Domain/TumblThree.Domain.csproj +++ b/src/TumblThree/TumblThree.Domain/TumblThree.Domain.csproj @@ -66,6 +66,7 @@ + Code diff --git a/src/TumblThree/TumblThree.Presentation/Views/SettingsView.xaml b/src/TumblThree/TumblThree.Presentation/Views/SettingsView.xaml index 72d4bec..bb94845 100644 --- a/src/TumblThree/TumblThree.Presentation/Views/SettingsView.xaml +++ b/src/TumblThree/TumblThree.Presentation/Views/SettingsView.xaml @@ -440,7 +440,7 @@