Skip to content

Commit

Permalink
add wow scrapping
Browse files Browse the repository at this point in the history
  • Loading branch information
ratanparai committed Apr 29, 2019
1 parent 0fc3ed1 commit 6f9d9c4
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package com.ratanparai.moviedog.db.dao.scrapper

import androidx.test.espresso.matcher.ViewMatchers.assertThat
import com.ratanparai.moviedog.scrapper.WowMovieZoneScrapper
import org.hamcrest.CoreMatchers.equalTo
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import org.junit.Before
import org.junit.Test

class WowMovieZoneScrapperTest {

private lateinit var searchDoc: Document
private lateinit var movieDoc: Document

@Before
fun loadHtml() {
val searchResultFile = ClassLoader.getSystemResource("wow/HarryPotterSearch_wow.data").readText()
val deathlyHalosFile = ClassLoader.getSystemResource("wow/HarryPotter_Deadly_Hallows_part2.data").readText()

searchDoc = Jsoup.parse(searchResultFile, "http://172.27.27.84")
movieDoc = Jsoup.parse(deathlyHalosFile, "http://172.27.27.84")
}

@Test
fun shouldGetMovieFromMovieDocument() {
val scrapper = WowMovieZoneScrapper()

val movie = scrapper.getMovie(movieDoc)

assertThat(movie.title, equalTo("Harry Potter And The Deathly Hallows Part 2"))
assertThat(movie.description, equalTo("Harry, Ron, and Hermione search for Voldemort''s remaining Horcruxes in their effort to destroy the Dark Lord as the final battle rages on at Hogwarts."))
assertThat(movie.videoUrl, equalTo("http://172.27.27.251/2TB1/1080p/2011/Harry%20Potter%20And%20The%20Deathly%20Hallows%20Part%202%20%282011%29%20%5B1080p%5D/Harry%20Potter%20And%20The%20Deathly%20Hallows%20Part%202%20%282011%29%20%5B1080p%5D.mp4"))
assertThat(movie.productionYear, equalTo(2011))

}
}
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
package com.ratanparai.moviedog.scrapper

import android.media.MediaMetadataRetriever
import android.net.Uri
import android.os.Build
import com.ratanparai.moviedog.db.entity.Movie
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import java.util.HashMap

class WowMovieZoneScrapper: Scrapper {

Expand All @@ -11,7 +15,35 @@ class WowMovieZoneScrapper: Scrapper {
}

override fun getMovie(document: Document): Movie {
TODO("not implemented") //To change body of created functions use File | Settings | File Templates.
// var titleWithYear = document.select(".subheader-maintitle").text()
var title = document.select("body > section.section.details > div.container > div > div:nth-child(1) > h1").text()
var yearText = document.select("body > section.section.details > div.container > div > div.col-10 > div > div > div.col-12.col-sm-8.col-md-8.col-lg-9.col-xl-9 > div > ul > li:nth-child(2)").text()

var year = getYearFromYearText(yearText)

val description = document.select("div.card__description").text()

var videoUrl = document.select("video > source").attr("src").replace(" ", "%20")

var duration = getDuration(videoUrl)

val cardImage =
document.select("div.card__cover > img")
.attr("abs:src")

return Movie(
title = title,
description = description,
videoUrl = videoUrl,
productionYear = year,
duration = duration,
cardImage = cardImage
)
}

private fun getYearFromYearText(yearText: String): Int {
val yearOnly = yearText.substring(yearText.indexOf(":")+1)
return yearOnly.toInt()
}

override fun getListOfMovieLinksFromSearchResult(document: Document): List<String> {
Expand All @@ -35,6 +67,17 @@ class WowMovieZoneScrapper: Scrapper {
.get()
}

fun getDuration(videoUrl: String): Int {
val mmr = MediaMetadataRetriever()

if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.ICE_CREAM_SANDWICH) {
mmr.setDataSource(videoUrl, HashMap<String, String>())
} else {
mmr.setDataSource(videoUrl)
}
return mmr.extractMetadata(MediaMetadataRetriever.METADATA_KEY_DURATION).toInt()
}

companion object {
private const val SEARCH_URL = "http://172.27.27.84/ajax_search?search_value=%s"
}
Expand Down

0 comments on commit 6f9d9c4

Please sign in to comment.