Skip to content

Commit

Permalink
Merge pull request #18 from ratanparai/feature/wow-scrapper
Browse files Browse the repository at this point in the history
wow scrapper
  • Loading branch information
ratanparai authored Apr 29, 2019
2 parents 805a94d + 0996253 commit bd44610
Show file tree
Hide file tree
Showing 13 changed files with 1,231 additions and 10 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package com.ratanparai.moviedog.db.dao.scrapper

import androidx.test.espresso.matcher.ViewMatchers.assertThat
import com.ratanparai.moviedog.scrapper.WowMovieZoneScrapper
import org.hamcrest.CoreMatchers.equalTo
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.junit.Before
import org.junit.Test

class WowMovieZoneScrapperTest {

private lateinit var searchDoc: Document
private lateinit var movieDoc: Document

@Before
fun loadHtml() {
val searchResultFile = ClassLoader.getSystemResource("wow/HarryPotterSearch_wow.data").readText()
val deathlyHalosFile = ClassLoader.getSystemResource("wow/HarryPotter_Deadly_Hallows_part2.data").readText()

searchDoc = Jsoup.parse(searchResultFile, "http://172.27.27.84")
movieDoc = Jsoup.parse(deathlyHalosFile, "http://172.27.27.84")
}

@Test
fun shouldGetMovieFromMovieDocument() {
val scrapper = WowMovieZoneScrapper()

val movie = scrapper.getMovie(movieDoc)

assertThat(movie.title, equalTo("Harry Potter And The Deathly Hallows Part 2"))
assertThat(movie.description, equalTo("Harry, Ron, and Hermione search for Voldemort''s remaining Horcruxes in their effort to destroy the Dark Lord as the final battle rages on at Hogwarts."))
assertThat(movie.videoUrl, equalTo("http://172.27.27.251/2TB1/1080p/2011/Harry%20Potter%20And%20The%20Deathly%20Hallows%20Part%202%20%282011%29%20%5B1080p%5D/Harry%20Potter%20And%20The%20Deathly%20Hallows%20Part%202%20%282011%29%20%5B1080p%5D.mp4"))
assertThat(movie.productionYear, equalTo(2011))

}
}
8 changes: 7 additions & 1 deletion app/src/main/java/com/ratanparai/moviedog/db/AppDatabase.kt
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,22 @@ import androidx.room.Database
import androidx.room.Room
import androidx.room.RoomDatabase
import com.ratanparai.moviedog.db.dao.MovieDao
import com.ratanparai.moviedog.db.dao.MovieUrlDao
import com.ratanparai.moviedog.db.dao.ScrappedDao
import com.ratanparai.moviedog.db.dao.SearchHashDao
import com.ratanparai.moviedog.db.entity.Movie
import com.ratanparai.moviedog.db.entity.MovieUrl
import com.ratanparai.moviedog.db.entity.Scrapped
import com.ratanparai.moviedog.db.entity.SearchHash
import com.ratanparai.moviedog.utilities.DATABASE_NAME


@Database(entities = [Movie::class, SearchHash::class], version = 1, exportSchema = false)
@Database(entities = [Movie::class, SearchHash::class, Scrapped::class, MovieUrl::class], version = 1, exportSchema = false)
abstract class AppDatabase : RoomDatabase() {
abstract fun movieDao(): MovieDao
abstract fun searchHashDao(): SearchHashDao
abstract fun scrappedDao(): ScrappedDao
abstract fun movieUrlDao(): MovieUrlDao

companion object {

Expand Down
4 changes: 4 additions & 0 deletions app/src/main/java/com/ratanparai/moviedog/db/dao/MovieDao.kt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import androidx.lifecycle.LiveData
import androidx.room.Dao
import androidx.room.Insert
import androidx.room.Query
import androidx.room.Update
import com.ratanparai.moviedog.db.entity.Movie

@Dao
Expand All @@ -15,6 +16,9 @@ interface MovieDao {
@Query("SELECT * FROM movies WHERE id = :movieId")
fun getMovieById(movieId : Int) : Movie

@Query("SELECT * FROM movies WHERE title = :title")
fun getMovieByTitle(title: String): Movie?

@Query("SELECT * FROM movies WHERE imdbId = :imdbId")
fun getMovieByImdbId(imdbId : String) : Movie

Expand Down
16 changes: 16 additions & 0 deletions app/src/main/java/com/ratanparai/moviedog/db/dao/MovieUrlDao.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package com.ratanparai.moviedog.db.dao

import androidx.room.Dao
import androidx.room.Insert
import androidx.room.Query
import com.ratanparai.moviedog.db.entity.MovieUrl

@Dao
interface MovieUrlDao {

@Insert
fun insertMovieUrl(movieUrl: MovieUrl)

@Query("SELECT * FROM movieUrl WHERE movieId = :id")
fun getMovieUrlsByMovieId(id: Int): List<MovieUrl>
}
15 changes: 15 additions & 0 deletions app/src/main/java/com/ratanparai/moviedog/db/dao/ScrappedDao.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package com.ratanparai.moviedog.db.dao

import androidx.room.Dao
import androidx.room.Insert
import androidx.room.Query
import com.ratanparai.moviedog.db.entity.Scrapped

@Dao
interface ScrappedDao {
@Query("SELECT * FROM scrapped WHERE url = :url")
fun getByUrl(url: String) : Scrapped?

@Insert
fun insert(scrapped: Scrapped)
}
13 changes: 13 additions & 0 deletions app/src/main/java/com/ratanparai/moviedog/db/entity/MovieUrl.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package com.ratanparai.moviedog.db.entity

import androidx.room.ColumnInfo
import androidx.room.Entity
import androidx.room.PrimaryKey

@Entity(tableName = "movieUrl")
data class MovieUrl(
@PrimaryKey(autoGenerate = true) @ColumnInfo(name = "id") val id: Int = 0,
val movieId: Int,
val movieUrl: String,
val serviceName: String
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package com.ratanparai.moviedog.db.entity

import androidx.room.Embedded
import androidx.room.Relation

data class MovieWithMovieUrls (
@Embedded val movie: Movie,
@Relation(parentColumn = "id", entityColumn = "movieId", entity = MovieUrl::class) val movieUrls : List<MovieUrl>
)
11 changes: 11 additions & 0 deletions app/src/main/java/com/ratanparai/moviedog/db/entity/Scrapped.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package com.ratanparai.moviedog.db.entity

import androidx.room.ColumnInfo
import androidx.room.Entity
import androidx.room.PrimaryKey

@Entity(tableName = "scrapped")
data class Scrapped (
@PrimaryKey(autoGenerate = true) @ColumnInfo(name = "id") val id: Int = 0,
val url: String
)
Original file line number Diff line number Diff line change
@@ -1,23 +1,60 @@
package com.ratanparai.moviedog.scrapper

import android.media.MediaMetadataRetriever
import android.net.Uri
import android.os.Build
import com.ratanparai.moviedog.db.entity.Movie
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import java.util.HashMap

class WowMovieZoneScrapper: Scrapper {

override fun getSearchUrl(query: String): String {
TODO("not implemented") //To change body of created functions use File | Settings | File Templates.
return String.format(SEARCH_URL, query)
}

override fun getMovie(document: Document): Movie {
TODO("not implemented") //To change body of created functions use File | Settings | File Templates.
// var titleWithYear = document.select(".subheader-maintitle").text()
var title = document.select("body > section.section.details > div.container > div > div:nth-child(1) > h1").text()
var yearText = document.select("body > section.section.details > div.container > div > div.col-10 > div > div > div.col-12.col-sm-8.col-md-8.col-lg-9.col-xl-9 > div > ul > li:nth-child(2)").text()

var year = getYearFromYearText(yearText)

val description = document.select("div.card__description").text()

var videoUrl = document.select("video > source").attr("src").replace(" ", "%20")

var duration = getDuration(videoUrl)

val cardImage =
document.select("div.card__cover > img")
.attr("abs:src")

return Movie(
title = title,
description = description,
videoUrl = videoUrl,
productionYear = year,
duration = duration,
cardImage = cardImage
)
}

override fun getListOfMovieLinksFromSearchResult(document: Document): List<String> {
TODO("not implemented") //To change body of created functions use File | Settings | File Templates.
private fun getYearFromYearText(yearText: String): Int {
val yearOnly = yearText.substring(yearText.indexOf(":")+1)
return yearOnly.toInt()
}

val SEARCH_URL = "http://172.27.27.84/ajax_search?search_value=%s"
override fun getListOfMovieLinksFromSearchResult(document: Document): List<String> {
val result = ArrayList<String>()
val elements = document.select("a")
for (elem in elements) {
result.add(elem.attr("abs:href"))
}

return result.distinct()
}

fun getSearchResult(query : String): Document? {
var query = query.toLowerCase()
Expand All @@ -29,4 +66,28 @@ class WowMovieZoneScrapper: Scrapper {
.header("X-Requested-With", " XMLHttpRequest")
.get()
}

override fun getDocument(url: String): Document {
val headers = HashMap<String, String>()
headers["X-Requested-With"] = "XMLHttpRequest"
return super.getDocument(url, headers)
}

fun getDuration(videoUrl: String): Int {
val mmr = MediaMetadataRetriever()

if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.ICE_CREAM_SANDWICH) {
mmr.setDataSource(videoUrl, HashMap<String, String>())
} else {
mmr.setDataSource(videoUrl)
}

val duration = mmr.extractMetadata(MediaMetadataRetriever.METADATA_KEY_DURATION).toInt()
mmr.release()
return duration
}

companion object {
private const val SEARCH_URL = "http://172.27.27.84/ajax_search?search_value=%s"
}
}
57 changes: 53 additions & 4 deletions app/src/main/java/com/ratanparai/moviedog/service/MovieService.kt
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,17 @@ import android.content.Context
import android.util.Log
import com.ratanparai.moviedog.db.AppDatabase
import com.ratanparai.moviedog.db.dao.MovieDao
import com.ratanparai.moviedog.db.dao.MovieUrlDao
import com.ratanparai.moviedog.db.dao.ScrappedDao
import com.ratanparai.moviedog.db.dao.SearchHashDao
import com.ratanparai.moviedog.db.entity.Movie
import com.ratanparai.moviedog.db.entity.MovieUrl
import com.ratanparai.moviedog.db.entity.Scrapped
import com.ratanparai.moviedog.db.entity.SearchHash
import com.ratanparai.moviedog.scrapper.BdPlexScrapper
import com.ratanparai.moviedog.scrapper.DekhvhaiScrapper
import com.ratanparai.moviedog.scrapper.Scrapper
import com.ratanparai.moviedog.scrapper.WowMovieZoneScrapper
import com.ratanparai.moviedog.utilities.MD5

class MovieService(private val context: Context) {
Expand All @@ -19,17 +24,21 @@ class MovieService(private val context: Context) {
fun search(query: String): List<Movie> {
val dekhvhaiScrapper = DekhvhaiScrapper()
val bdPlexScrapper = BdPlexScrapper()
val wowMovieZoneScrapper = WowMovieZoneScrapper()

val searchHashDao = AppDatabase.getInstance(context).searchHashDao()
val movieDao = AppDatabase.getInstance(context).movieDao()
val scrappedDao = AppDatabase.getInstance(context).scrappedDao()
val movieUrlDao = AppDatabase.getInstance(context).movieUrlDao()

scrapMovies(bdPlexScrapper, query, searchHashDao, movieDao)
scrapMovies(dekhvhaiScrapper, query, searchHashDao, movieDao)
scrapMovies(wowMovieZoneScrapper, query, searchHashDao, movieDao, scrappedDao, movieUrlDao, "WoW Movie")
scrapMovies(bdPlexScrapper, query, searchHashDao, movieDao, scrappedDao, movieUrlDao, "BDPlex")
scrapMovies(dekhvhaiScrapper, query, searchHashDao, movieDao, scrappedDao, movieUrlDao, "Dekhvhai")

return movieDao.searchByTitle(query)
}

private fun scrapMovies(scrapper: Scrapper, query: String, searchHashDao: SearchHashDao, movieDao: MovieDao) {
private fun scrapMovies(scrapper: Scrapper, query: String, searchHashDao: SearchHashDao, movieDao: MovieDao, scrappedDao: ScrappedDao, movieUrlDao: MovieUrlDao, serviceName: String) {
try {
val searchUrl = scrapper.getSearchUrl(query)
val document = scrapper.getDocument(searchUrl)
Expand All @@ -48,10 +57,41 @@ class MovieService(private val context: Context) {


for (link in movieLinks) {

if (alreadyScrappedMovie(link, scrappedDao)) {
Log.d(TAG, "Movie is already scrapped for URL: $link")
continue
}

Log.d(TAG, "First time scrapping movie for URL: $link")

val scrapped = Scrapped(
url = link
)

val movieDoc = scrapper.getDocument(link)
val movie = scrapper.getMovie(movieDoc)
Log.d(TAG, "Scrapped movie: $movie for search URL $searchUrl ")
try {
movieDao.insertMovie(movie)
scrappedDao.insert(scrapped)

val movieFromDao = movieDao.getMovieByTitle(movie.title)
if (movieFromDao == null) {
Log.d(TAG, "The movie is not in database. Inserting movie info and first video link")
val movieId = movieDao.insertMovie(movie).toInt()
val movieUrl = MovieUrl(movieId = movieId, movieUrl = movie.videoUrl, serviceName = serviceName)
movieUrlDao.insertMovieUrl(movieUrl)
} else {
Log.d(TAG, "Movie is already in database. Adding new video urls")
val movieUrl = MovieUrl(
movieId = movieFromDao.id,
movieUrl = movie.videoUrl,
serviceName = serviceName
)

movieUrlDao.insertMovieUrl(movieUrl)
}

} catch (ex: Exception) {
Log.d("MovieService", ex.message)
}
Expand All @@ -71,6 +111,15 @@ class MovieService(private val context: Context) {

}

private fun alreadyScrappedMovie(link: String, scrappedDao: ScrappedDao): Boolean {
val scrapped = scrappedDao.getByUrl(link)
if (scrapped != null) {
return true
}

return false
}

fun getMovieById(id : Int): Movie {
val movieDao = AppDatabase.getInstance(context).movieDao()
return movieDao.getMovieById(id)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,26 @@
package com.ratanparai.moviedog.scrapper

import com.google.common.truth.Truth.assertThat
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.junit.Before
import org.junit.Ignore
import org.junit.Test

class WowMovieZoneScrapperTest {

private lateinit var searchDoc: Document
private lateinit var movieDoc: Document

@Before
fun loadHtml() {
val searchResultFile = ClassLoader.getSystemResource("wow/HarryPotterSearch_wow.data").readText()
val deathlyHalosFile = ClassLoader.getSystemResource("wow/HarryPotter_Deadly_Hallows_part2.data").readText()

searchDoc = Jsoup.parse(searchResultFile, "http://172.27.27.84")
movieDoc = Jsoup.parse(deathlyHalosFile, "http://172.27.27.84")
}

@Ignore
@Test
fun shouldGetHtmlWhenSearchPageRequested() {
Expand All @@ -21,4 +36,13 @@ class WowMovieZoneScrapperTest {
assertThat(actual).isGreaterThan(expected)

}

@Test
fun shouldGetMovieLinksFromSearchResult() {
val scrapper = WowMovieZoneScrapper()
val movieLinks = scrapper.getListOfMovieLinksFromSearchResult(searchDoc)

assertThat(movieLinks.size).isEqualTo(13)

}
}
1 change: 1 addition & 0 deletions app/src/test/resources/wow/HarryPotterSearch_wow.data
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<tr><td><a href="http://172.27.27.84/movie/1354"><img style="border-radius:10px;" width="100px" src="/movie_poster/1548309132_image.jpg"></a></td><td><a href="http://172.27.27.84/movie/1354"<span style="font-size: 20px;color: white;" >Harry Potter and the Deathly Hallows Part 1</span></a><br><span style="color:yellow">Movie</span><br><i class="fa fa-star"> 7.70</i></td><td width="700px"></td></tr><tr><td><a href="http://172.27.27.84/movie/1530"><img style="border-radius:10px;" width="100px" src="/movie_poster/1548310428_image.jpg"></a></td><td><a href="http://172.27.27.84/movie/1530"<span style="font-size: 20px;color: white;" >Harry Potter And The Deathly Hallows Part 2</span></a><br><span style="color:yellow">Movie</span><br><i class="fa fa-star"> 8.10</i></td><td width="700px"></td></tr><tr><td><a href="http://172.27.27.84/movie/3963"><img style="border-radius:10px;" width="100px" src="/movie_poster/1548432022_image.jpg"></a></td><td><a href="http://172.27.27.84/movie/3963"<span style="font-size: 20px;color: white;" >Harry Potter and the Chamber of Secrets</span></a><br><span style="color:yellow">Movie</span><br><i class="fa fa-star"> 7.40</i></td><td width="700px"></td></tr><tr><td><a href="http://172.27.27.84/movie/3964"><img style="border-radius:10px;" width="100px" src="/movie_poster/1548432025_image.jpg"></a></td><td><a href="http://172.27.27.84/movie/3964"<span style="font-size: 20px;color: white;" >Harry Potter and the Deathly Hallows Part 1</span></a><br><span style="color:yellow">Movie</span><br><i class="fa fa-star"> 7.70</i></td><td width="700px"></td></tr><tr><td><a href="http://172.27.27.84/movie/3965"><img style="border-radius:10px;" width="100px" src="/movie_poster/1548432029_image.jpg"></a></td><td><a href="http://172.27.27.84/movie/3965"<span style="font-size: 20px;color: white;" >Harry Potter and the Deathly Hallows Part 2</span></a><br><span style="color:yellow">Movie</span><br><i class="fa fa-star"> 8.10</i></td><td width="700px"></td></tr><tr><td><a href="http://172.27.27.84/movie/3966"><img style="border-radius:10px;" width="100px" src="/movie_poster/1548432034_image.jpg"></a></td><td><a href="http://172.27.27.84/movie/3966"<span style="font-size: 20px;color: white;" >Harry Potter and the Goblet of Fire</span></a><br><span style="color:yellow">Movie</span><br><i class="fa fa-star"> 7.70</i></td><td width="700px"></td></tr><tr><td><a href="http://172.27.27.84/movie/3967"><img style="border-radius:10px;" width="100px" src="/default_images/default_poster.jpg"></a></td><td><a href="http://172.27.27.84/movie/3967"<span style="font-size: 20px;color: white;" >Harry Potter and the Half Blood Prince</span></a><br><span style="color:yellow">Movie</span><br><i class="fa fa-star"> 0.00</i></td><td width="700px"></td></tr><tr><td><a href="http://172.27.27.84/movie/3968"><img style="border-radius:10px;" width="100px" src="/movie_poster/1548432040_image.jpg"></a></td><td><a href="http://172.27.27.84/movie/3968"<span style="font-size: 20px;color: white;" >Harry Potter and the Order of the Phoenix</span></a><br><span style="color:yellow">Movie</span><br><i class="fa fa-star"> 7.50</i></td><td width="700px"></td></tr><tr><td><a href="http://172.27.27.84/movie/3969"><img style="border-radius:10px;" width="100px" src="/movie_poster/1548432044_image.jpg"></a></td><td><a href="http://172.27.27.84/movie/3969"<span style="font-size: 20px;color: white;" >Harry Potter and the Prisoner of Azkaban</span></a><br><span style="color:yellow">Movie</span><br><i class="fa fa-star"> 7.90</i></td><td width="700px"></td></tr><tr><td><a href="http://172.27.27.84/movie/3970"><img style="border-radius:10px;" width="100px" src="/default_images/default_poster.jpg"></a></td><td><a href="http://172.27.27.84/movie/3970"<span style="font-size: 20px;color: white;" >Harry Potter and the Sorcerers Stone</span></a><br><span style="color:yellow">Movie</span><br><i class="fa fa-star"> 0.00</i></td><td width="700px"></td></tr><tr><td><a href="http://172.27.27.84/movie/9159"><img style="border-radius:10px;" width="100px" src="/movie_poster/1548585953_image.jpg"></a></td><td><a href="http://172.27.27.84/movie/9159"<span style="font-size: 20px;color: white;" >Harry Potter and the Deathly Hallows Part 2</span></a><br><span style="color:yellow">Movie</span><br><i class="fa fa-star"> 8.10</i></td><td width="700px"></td></tr><tr><td><a href="http://172.27.27.84/movie/14092"><img style="border-radius:10px;" width="100px" src="/movie_poster/1550727987_image.jpg"></a></td><td><a href="http://172.27.27.84/movie/14092"<span style="font-size: 20px;color: white;" >Harry Potter and the Deathly Hallows Part 1</span></a><br><span style="color:yellow">Movie</span><br><i class="fa fa-star"> 7.70</i></td><td width="700px"></td></tr><tr><td><a href="http://172.27.27.84/movie/14093"><img style="border-radius:10px;" width="100px" src="/movie_poster/1550727990_image.jpg"></a></td><td><a href="http://172.27.27.84/movie/14093"<span style="font-size: 20px;color: white;" >Harry Potter and the Deathly Hallows Part 2</span></a><br><span style="color:yellow">Movie</span><br><i class="fa fa-star"> 8.10</i></td><td width="700px"></td></tr>
Loading

0 comments on commit bd44610

Please sign in to comment.