From ead38ed27746c9024d74a17f15da6fa893590f7d Mon Sep 17 00:00:00 2001 From: Yann Defretin Date: Sat, 14 Sep 2024 22:34:17 +0200 Subject: [PATCH] Improve Colly parallelism system --- websites/blubeaver.go | 10 ++++++---- websites/blusscreens.go | 9 +++++---- websites/dvdbeaver.go | 10 ++++++---- websites/evanerichards.go | 10 ++++++---- websites/film-grab.go | 10 ++++++---- websites/highdefdiscnews.go | 10 ++++++---- websites/movie_screencaps.go | 9 +++++---- websites/screenmusings.go | 9 +++++---- websites/stillsfrmfilms.go | 10 ++++++---- 9 files changed, 51 insertions(+), 36 deletions(-) diff --git a/websites/blubeaver.go b/websites/blubeaver.go index 2c4f5d6..d4b372b 100644 --- a/websites/blubeaver.go +++ b/websites/blubeaver.go @@ -98,8 +98,6 @@ func BluBeaverScraper(scraper **colly.Collector, options *config.Options) { log.Error.Println("Can't get movie page", log.White(movieURL), ":", log.Red(err)) } - // In case we enabled asynchronous jobs - movieScraper.Wait() }) // Before making a request to URL @@ -194,6 +192,10 @@ func BluBeaverScraper(scraper **colly.Collector, options *config.Options) { log.Error.Println("Can't visit index page", log.White(BluBeaverURL), ":", log.Red(err)) } - // In case we enabled asynchronous jobs - (*scraper).Wait() + // Ensure that all requests are completed before exiting + if (*scraper).Async { + (*scraper).Wait() + movieScraper.Wait() + } + } diff --git a/websites/blusscreens.go b/websites/blusscreens.go index 1b2e86d..0dfd593 100644 --- a/websites/blusscreens.go +++ b/websites/blusscreens.go @@ -111,8 +111,6 @@ func BlusScraper(scraper **colly.Collector, options *config.Options) { log.Error.Println("Can't get movie page", log.White(movieURL), ":", log.Red(err)) } - // In case we enabled asynchronous jobs - movieScraper.Wait() }) // Go through each link to imgur found on the movie page @@ -249,8 +247,11 @@ func BlusScraper(scraper **colly.Collector, options *config.Options) { log.Error.Println("Can't visit index page", log.White(BlusURL), log.Red(err)) } - // In case we enabled asynchronous jobs - (*scraper).Wait() + // Ensure that all requests are completed before exiting + if (*scraper).Async { + (*scraper).Wait() + movieScraper.Wait() + } } diff --git a/websites/dvdbeaver.go b/websites/dvdbeaver.go index 9c42b03..0c887ad 100644 --- a/websites/dvdbeaver.go +++ b/websites/dvdbeaver.go @@ -133,8 +133,6 @@ func DVDBeaverScraper(scraper **colly.Collector, options *config.Options) { log.Error.Println("Can't get movie page", log.White(movieURL), ":", log.Red(err)) } - // In case we enabled asynchronous jobs - movieScraper.Wait() }) // Look for links on images that redirects to a "largest" version. @@ -212,6 +210,10 @@ func DVDBeaverScraper(scraper **colly.Collector, options *config.Options) { log.Error.Println("Can't visit index page:", log.Red(err)) } - // In case we enabled asynchronous jobs - (*scraper).Wait() + // Ensure that all requests are completed before exiting + if (*scraper).Async { + (*scraper).Wait() + movieScraper.Wait() + } + } diff --git a/websites/evanerichards.go b/websites/evanerichards.go index 6b9fd63..b93a827 100644 --- a/websites/evanerichards.go +++ b/websites/evanerichards.go @@ -91,8 +91,6 @@ func EvanERichardsScraper(scraper **colly.Collector, options *config.Options) { log.Error.Println("Can't get movie page", log.White(movieURL), ":", log.Red(err)) } - // In case we enabled asynchronous jobs - movieScraper.Wait() }) // Look for links on thumbnails that redirect to a "largest" version @@ -134,6 +132,10 @@ func EvanERichardsScraper(scraper **colly.Collector, options *config.Options) { log.Error.Println("Can't visit index page", log.White(BluBeaverURL), ":", log.Red(err)) } - // In case we enabled asynchronous jobs - (*scraper).Wait() + // Ensure that all requests are completed before exiting + if (*scraper).Async { + (*scraper).Wait() + movieScraper.Wait() + } + } diff --git a/websites/film-grab.go b/websites/film-grab.go index 63db234..6d0f708 100644 --- a/websites/film-grab.go +++ b/websites/film-grab.go @@ -77,8 +77,6 @@ func FilmGrabScraper(scraper **colly.Collector, options *config.Options) { log.Error.Println("Can't get movie page", log.White(movieURL), ":", log.Red(err)) } - // In case we enabled asynchronous jobs - movieScraper.Wait() }) // Look for links on thumbnails that redirect to a "largest" version @@ -125,6 +123,10 @@ func FilmGrabScraper(scraper **colly.Collector, options *config.Options) { log.Error.Println("Can't visit index page", log.White(FilmGrabURL), ":", log.Red(err)) } - // In case we enabled asynchronous jobs - (*scraper).Wait() + // Ensure that all requests are completed before exiting + if (*scraper).Async { + (*scraper).Wait() + movieScraper.Wait() + } + } diff --git a/websites/highdefdiscnews.go b/websites/highdefdiscnews.go index 76d5492..945a912 100644 --- a/websites/highdefdiscnews.go +++ b/websites/highdefdiscnews.go @@ -83,8 +83,6 @@ func HighDefDiscNewsScraper(scraper **colly.Collector, options *config.Options) log.Error.Println("Can't visit movie page", log.White(movieURL), ":", log.Red(err)) } - // In case we enabled asynchronous jobs - movieScraper.Wait() }) // Look for links on thumbnails that redirects to a "largest" version. @@ -126,8 +124,12 @@ func HighDefDiscNewsScraper(scraper **colly.Collector, options *config.Options) log.Error.Println("Can't visit index page", log.White(HighDefDiscNewsURL), ":", log.Red(err)) } - // In case we enabled asynchronous jobs - (*scraper).Wait() + // Ensure that all requests are completed before exiting + if (*scraper).Async { + (*scraper).Wait() + movieScraper.Wait() + } + } // Isolate the movie's title by getting rid of various words on the right. diff --git a/websites/movie_screencaps.go b/websites/movie_screencaps.go index 9428786..ed5c8ed 100644 --- a/websites/movie_screencaps.go +++ b/websites/movie_screencaps.go @@ -88,8 +88,6 @@ func ScreenCapsScraper(scraper **colly.Collector, options *config.Options) { log.Error.Println("Can't get movie page", log.White(movieURL), ":", log.Red(err)) } - // In case we enabled asynchronous jobs - movieScraper.Wait() }) // Handle pagination by getting the number of pages in total first. @@ -172,7 +170,10 @@ func ScreenCapsScraper(scraper **colly.Collector, options *config.Options) { log.Error.Println("Can't visit index page", log.White(ScreenCapsURL), ":", log.Red(err)) } - // In case we enabled asynchronous jobs - (*scraper).Wait() + // Ensure that all requests are completed before exiting + if (*scraper).Async { + (*scraper).Wait() + movieScraper.Wait() + } } diff --git a/websites/screenmusings.go b/websites/screenmusings.go index 0e93b45..de92004 100644 --- a/websites/screenmusings.go +++ b/websites/screenmusings.go @@ -79,8 +79,6 @@ func ScreenMusingsScraper(scraper **colly.Collector, options *config.Options) { log.Error.Println("Can't get movie page", log.White(movieURL), ":", log.Red(err)) } - // In case we enabled asynchronous jobs - movieScraper.Wait() }) // On every movie page, we are looking for a link to the "most viewed stills". @@ -140,7 +138,10 @@ func ScreenMusingsScraper(scraper **colly.Collector, options *config.Options) { log.Error.Println("Can't visit index page", log.White(ScreenMusingsURL), ":", log.Red(err)) } - // In case we enabled asynchronous jobs - (*scraper).Wait() + // Ensure that all requests are completed before exiting + if (*scraper).Async { + (*scraper).Wait() + movieScraper.Wait() + } } diff --git a/websites/stillsfrmfilms.go b/websites/stillsfrmfilms.go index 856db70..487bfb4 100644 --- a/websites/stillsfrmfilms.go +++ b/websites/stillsfrmfilms.go @@ -87,8 +87,6 @@ func StillsFrmFilmsScraper(scraper **colly.Collector, options *config.Options) { log.Error.Println("Can't get movie page", log.White(movieURL), ":", log.Red(err)) } - // In case we enabled asynchronous jobs - movieScraper.Wait() }) // Look for links on thumbnails that redirect to a "largest" version. @@ -136,6 +134,10 @@ func StillsFrmFilmsScraper(scraper **colly.Collector, options *config.Options) { log.Error.Println("Can't visit index page", log.White(StillsFrmFilmsURL), ":", log.Red(err)) } - // In case we enabled asynchronous jobs - (*scraper).Wait() + // Ensure that all requests are completed before exiting + if (*scraper).Async { + (*scraper).Wait() + movieScraper.Wait() + } + }