Skip to content

Commit

Permalink
Improve Colly parallelism system
Browse files Browse the repository at this point in the history
  • Loading branch information
kinoute committed Sep 14, 2024
1 parent 4d573c5 commit ead38ed
Show file tree
Hide file tree
Showing 9 changed files with 51 additions and 36 deletions.
10 changes: 6 additions & 4 deletions websites/blubeaver.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,6 @@ func BluBeaverScraper(scraper **colly.Collector, options *config.Options) {
log.Error.Println("Can't get movie page", log.White(movieURL), ":", log.Red(err))
}

// In case we enabled asynchronous jobs
movieScraper.Wait()
})

// Before making a request to URL
Expand Down Expand Up @@ -194,6 +192,10 @@ func BluBeaverScraper(scraper **colly.Collector, options *config.Options) {
log.Error.Println("Can't visit index page", log.White(BluBeaverURL), ":", log.Red(err))
}

// In case we enabled asynchronous jobs
(*scraper).Wait()
// Ensure that all requests are completed before exiting
if (*scraper).Async {
(*scraper).Wait()
movieScraper.Wait()
}

}
9 changes: 5 additions & 4 deletions websites/blusscreens.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,6 @@ func BlusScraper(scraper **colly.Collector, options *config.Options) {
log.Error.Println("Can't get movie page", log.White(movieURL), ":", log.Red(err))
}

// In case we enabled asynchronous jobs
movieScraper.Wait()
})

// Go through each link to imgur found on the movie page
Expand Down Expand Up @@ -249,8 +247,11 @@ func BlusScraper(scraper **colly.Collector, options *config.Options) {
log.Error.Println("Can't visit index page", log.White(BlusURL), log.Red(err))
}

// In case we enabled asynchronous jobs
(*scraper).Wait()
// Ensure that all requests are completed before exiting
if (*scraper).Async {
(*scraper).Wait()
movieScraper.Wait()
}

}

Expand Down
10 changes: 6 additions & 4 deletions websites/dvdbeaver.go
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,6 @@ func DVDBeaverScraper(scraper **colly.Collector, options *config.Options) {
log.Error.Println("Can't get movie page", log.White(movieURL), ":", log.Red(err))
}

// In case we enabled asynchronous jobs
movieScraper.Wait()
})

// Look for links on images that redirects to a "largest" version.
Expand Down Expand Up @@ -212,6 +210,10 @@ func DVDBeaverScraper(scraper **colly.Collector, options *config.Options) {
log.Error.Println("Can't visit index page:", log.Red(err))
}

// In case we enabled asynchronous jobs
(*scraper).Wait()
// Ensure that all requests are completed before exiting
if (*scraper).Async {
(*scraper).Wait()
movieScraper.Wait()
}

}
10 changes: 6 additions & 4 deletions websites/evanerichards.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,8 +91,6 @@ func EvanERichardsScraper(scraper **colly.Collector, options *config.Options) {
log.Error.Println("Can't get movie page", log.White(movieURL), ":", log.Red(err))
}

// In case we enabled asynchronous jobs
movieScraper.Wait()
})

// Look for links on thumbnails that redirect to a "largest" version
Expand Down Expand Up @@ -134,6 +132,10 @@ func EvanERichardsScraper(scraper **colly.Collector, options *config.Options) {
log.Error.Println("Can't visit index page", log.White(BluBeaverURL), ":", log.Red(err))
}

// In case we enabled asynchronous jobs
(*scraper).Wait()
// Ensure that all requests are completed before exiting
if (*scraper).Async {
(*scraper).Wait()
movieScraper.Wait()
}

}
10 changes: 6 additions & 4 deletions websites/film-grab.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,6 @@ func FilmGrabScraper(scraper **colly.Collector, options *config.Options) {
log.Error.Println("Can't get movie page", log.White(movieURL), ":", log.Red(err))
}

// In case we enabled asynchronous jobs
movieScraper.Wait()
})

// Look for links on thumbnails that redirect to a "largest" version
Expand Down Expand Up @@ -125,6 +123,10 @@ func FilmGrabScraper(scraper **colly.Collector, options *config.Options) {
log.Error.Println("Can't visit index page", log.White(FilmGrabURL), ":", log.Red(err))
}

// In case we enabled asynchronous jobs
(*scraper).Wait()
// Ensure that all requests are completed before exiting
if (*scraper).Async {
(*scraper).Wait()
movieScraper.Wait()
}

}
10 changes: 6 additions & 4 deletions websites/highdefdiscnews.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,6 @@ func HighDefDiscNewsScraper(scraper **colly.Collector, options *config.Options)
log.Error.Println("Can't visit movie page", log.White(movieURL), ":", log.Red(err))
}

// In case we enabled asynchronous jobs
movieScraper.Wait()
})

// Look for links on thumbnails that redirects to a "largest" version.
Expand Down Expand Up @@ -126,8 +124,12 @@ func HighDefDiscNewsScraper(scraper **colly.Collector, options *config.Options)
log.Error.Println("Can't visit index page", log.White(HighDefDiscNewsURL), ":", log.Red(err))
}

// In case we enabled asynchronous jobs
(*scraper).Wait()
// Ensure that all requests are completed before exiting
if (*scraper).Async {
(*scraper).Wait()
movieScraper.Wait()
}

}

// Isolate the movie's title by getting rid of various words on the right.
Expand Down
9 changes: 5 additions & 4 deletions websites/movie_screencaps.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,6 @@ func ScreenCapsScraper(scraper **colly.Collector, options *config.Options) {
log.Error.Println("Can't get movie page", log.White(movieURL), ":", log.Red(err))
}

// In case we enabled asynchronous jobs
movieScraper.Wait()
})

// Handle pagination by getting the number of pages in total first.
Expand Down Expand Up @@ -172,7 +170,10 @@ func ScreenCapsScraper(scraper **colly.Collector, options *config.Options) {
log.Error.Println("Can't visit index page", log.White(ScreenCapsURL), ":", log.Red(err))
}

// In case we enabled asynchronous jobs
(*scraper).Wait()
// Ensure that all requests are completed before exiting
if (*scraper).Async {
(*scraper).Wait()
movieScraper.Wait()
}

}
9 changes: 5 additions & 4 deletions websites/screenmusings.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,6 @@ func ScreenMusingsScraper(scraper **colly.Collector, options *config.Options) {
log.Error.Println("Can't get movie page", log.White(movieURL), ":", log.Red(err))
}

// In case we enabled asynchronous jobs
movieScraper.Wait()
})

// On every movie page, we are looking for a link to the "most viewed stills".
Expand Down Expand Up @@ -140,7 +138,10 @@ func ScreenMusingsScraper(scraper **colly.Collector, options *config.Options) {
log.Error.Println("Can't visit index page", log.White(ScreenMusingsURL), ":", log.Red(err))
}

// In case we enabled asynchronous jobs
(*scraper).Wait()
// Ensure that all requests are completed before exiting
if (*scraper).Async {
(*scraper).Wait()
movieScraper.Wait()
}

}
10 changes: 6 additions & 4 deletions websites/stillsfrmfilms.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,6 @@ func StillsFrmFilmsScraper(scraper **colly.Collector, options *config.Options) {
log.Error.Println("Can't get movie page", log.White(movieURL), ":", log.Red(err))
}

// In case we enabled asynchronous jobs
movieScraper.Wait()
})

// Look for links on thumbnails that redirect to a "largest" version.
Expand Down Expand Up @@ -136,6 +134,10 @@ func StillsFrmFilmsScraper(scraper **colly.Collector, options *config.Options) {
log.Error.Println("Can't visit index page", log.White(StillsFrmFilmsURL), ":", log.Red(err))
}

// In case we enabled asynchronous jobs
(*scraper).Wait()
// Ensure that all requests are completed before exiting
if (*scraper).Async {
(*scraper).Wait()
movieScraper.Wait()
}

}

0 comments on commit ead38ed

Please sign in to comment.