Skip to content

Commit

Permalink
add wow movie scrapper support
Browse files Browse the repository at this point in the history
also don't scrap the same url multiple times
  • Loading branch information
ratanparai committed Apr 29, 2019
1 parent 6f9d9c4 commit 01d4da3
Show file tree
Hide file tree
Showing 8 changed files with 109 additions and 5 deletions.
5 changes: 4 additions & 1 deletion app/src/main/java/com/ratanparai/moviedog/db/AppDatabase.kt
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,19 @@ import androidx.room.Database
import androidx.room.Room
import androidx.room.RoomDatabase
import com.ratanparai.moviedog.db.dao.MovieDao
import com.ratanparai.moviedog.db.dao.ScrappedDao
import com.ratanparai.moviedog.db.dao.SearchHashDao
import com.ratanparai.moviedog.db.entity.Movie
import com.ratanparai.moviedog.db.entity.Scrapped
import com.ratanparai.moviedog.db.entity.SearchHash
import com.ratanparai.moviedog.utilities.DATABASE_NAME


@Database(entities = [Movie::class, SearchHash::class], version = 1, exportSchema = false)
@Database(entities = [Movie::class, SearchHash::class, Scrapped::class], version = 1, exportSchema = false)
abstract class AppDatabase : RoomDatabase() {
abstract fun movieDao(): MovieDao
abstract fun searchHashDao(): SearchHashDao
abstract fun scrappedDao(): ScrappedDao

companion object {

Expand Down
15 changes: 15 additions & 0 deletions app/src/main/java/com/ratanparai/moviedog/db/dao/ScrappedDao.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package com.ratanparai.moviedog.db.dao

import androidx.room.Dao
import androidx.room.Insert
import androidx.room.Query
import com.ratanparai.moviedog.db.entity.Scrapped

@Dao
interface ScrappedDao {
@Query("SELECT * FROM scrapped WHERE url = :url")
fun getByUrl(url: String) : Scrapped?

@Insert
fun insert(scrapped: Scrapped)
}
13 changes: 13 additions & 0 deletions app/src/main/java/com/ratanparai/moviedog/db/entity/MovieUrl.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package com.ratanparai.moviedog.db.entity

import androidx.room.ColumnInfo
import androidx.room.Entity
import androidx.room.PrimaryKey

@Entity(tableName = "movieUrl")
data class MovieUrl(
@PrimaryKey(autoGenerate = true) @ColumnInfo(name = "id") val id: Int = 0,
val movieId: Int,
val movieUrl: String,
val serviceName: String
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package com.ratanparai.moviedog.db.entity

import androidx.room.Embedded
import androidx.room.Relation

data class MovieWithMovieUrls (
@Embedded val movie: Movie,
@Relation(parentColumn = "id", entityColumn = "movieId", entity = MovieUrl::class) val movieUrls : List<MovieUrl>
)
11 changes: 11 additions & 0 deletions app/src/main/java/com/ratanparai/moviedog/db/entity/Scrapped.kt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package com.ratanparai.moviedog.db.entity

import androidx.room.ColumnInfo
import androidx.room.Entity
import androidx.room.PrimaryKey

@Entity(tableName = "scrapped")
data class Scrapped (
@PrimaryKey(autoGenerate = true) @ColumnInfo(name = "id") val id: Int = 0,
val url: String
)
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,12 @@ class WowMovieZoneScrapper: Scrapper {
.get()
}

override fun getDocument(url: String): Document {
val headers = HashMap<String, String>()
headers["X-Requested-With"] = "XMLHttpRequest"
return super.getDocument(url, headers)
}

fun getDuration(videoUrl: String): Int {
val mmr = MediaMetadataRetriever()

Expand All @@ -75,7 +81,10 @@ class WowMovieZoneScrapper: Scrapper {
} else {
mmr.setDataSource(videoUrl)
}
return mmr.extractMetadata(MediaMetadataRetriever.METADATA_KEY_DURATION).toInt()

val duration = mmr.extractMetadata(MediaMetadataRetriever.METADATA_KEY_DURATION).toInt()
mmr.release()
return duration
}

companion object {
Expand Down
36 changes: 33 additions & 3 deletions app/src/main/java/com/ratanparai/moviedog/service/MovieService.kt
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@ import android.content.Context
import android.util.Log
import com.ratanparai.moviedog.db.AppDatabase
import com.ratanparai.moviedog.db.dao.MovieDao
import com.ratanparai.moviedog.db.dao.ScrappedDao
import com.ratanparai.moviedog.db.dao.SearchHashDao
import com.ratanparai.moviedog.db.entity.Movie
import com.ratanparai.moviedog.db.entity.Scrapped
import com.ratanparai.moviedog.db.entity.SearchHash
import com.ratanparai.moviedog.scrapper.BdPlexScrapper
import com.ratanparai.moviedog.scrapper.DekhvhaiScrapper
import com.ratanparai.moviedog.scrapper.Scrapper
import com.ratanparai.moviedog.scrapper.WowMovieZoneScrapper
import com.ratanparai.moviedog.utilities.MD5

class MovieService(private val context: Context) {
Expand All @@ -19,17 +22,20 @@ class MovieService(private val context: Context) {
fun search(query: String): List<Movie> {
val dekhvhaiScrapper = DekhvhaiScrapper()
val bdPlexScrapper = BdPlexScrapper()
val wowMovieZoneScrapper = WowMovieZoneScrapper()

val searchHashDao = AppDatabase.getInstance(context).searchHashDao()
val movieDao = AppDatabase.getInstance(context).movieDao()
val scrappedDao = AppDatabase.getInstance(context).scrappedDao()

scrapMovies(bdPlexScrapper, query, searchHashDao, movieDao)
scrapMovies(dekhvhaiScrapper, query, searchHashDao, movieDao)
scrapMovies(wowMovieZoneScrapper, query, searchHashDao, movieDao, scrappedDao)
scrapMovies(bdPlexScrapper, query, searchHashDao, movieDao, scrappedDao)
scrapMovies(dekhvhaiScrapper, query, searchHashDao, movieDao, scrappedDao)

return movieDao.searchByTitle(query)
}

private fun scrapMovies(scrapper: Scrapper, query: String, searchHashDao: SearchHashDao, movieDao: MovieDao) {
private fun scrapMovies(scrapper: Scrapper, query: String, searchHashDao: SearchHashDao, movieDao: MovieDao, scrappedDao: ScrappedDao) {
try {
val searchUrl = scrapper.getSearchUrl(query)
val document = scrapper.getDocument(searchUrl)
Expand All @@ -48,10 +54,25 @@ class MovieService(private val context: Context) {


for (link in movieLinks) {

if (alreadyScrappedMovie(link, scrappedDao)) {
Log.d(TAG, "Movie is already scrapped for URL: $link")
continue
}

Log.d(TAG, "First time scrapping movie for URL: $link")

val scrapped = Scrapped(
url = link
)

val movieDoc = scrapper.getDocument(link)
val movie = scrapper.getMovie(movieDoc)
Log.d(TAG, "Scrapped movie: $movie for search URL $searchUrl ")
try {
scrappedDao.insert(scrapped)
movieDao.insertMovie(movie)

} catch (ex: Exception) {
Log.d("MovieService", ex.message)
}
Expand All @@ -71,6 +92,15 @@ class MovieService(private val context: Context) {

}

private fun alreadyScrappedMovie(link: String, scrappedDao: ScrappedDao): Boolean {
val scrapped = scrappedDao.getByUrl(link)
if (scrapped != null) {
return true
}

return false
}

fun getMovieById(id : Int): Movie {
val movieDao = AppDatabase.getInstance(context).movieDao()
return movieDao.getMovieById(id)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,4 +45,18 @@ class WowMovieZoneScrapperTest {
assertThat(movieLinks.size).isEqualTo(13)

}

@Test
fun shouldWorkWithRealAddress() {
val scrapper = WowMovieZoneScrapper()
val searchUrl = scrapper.getSearchUrl("Harry Potter")
val document = scrapper.getDocument(searchUrl)
val searchResult = scrapper.getListOfMovieLinksFromSearchResult(document)
for (result in searchResult) {
val movieDocument = scrapper.getDocument(result)
val movie = scrapper.getMovie(movieDocument)
}

assertThat(searchResult.size).isEqualTo(-1)
}
}

0 comments on commit 01d4da3

Please sign in to comment.