Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up test descriptions, addresses #372. #416

Merged
merged 5 commits into from
Jan 21, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions src/test/scala/io/archivesunleashed/ArcTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ class ArcTest extends FunSuite with BeforeAndAfter {

val dayMonthTestA = "200805"

test("count records") {
test("Count records") {
assert(RecordLoader.loadArchives(arcPath, sc).count == 300L)
}

test("filter date") {
test("Filter date RDD") {
val startSS = 0
val monthSS = 6
val four = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -62,7 +62,7 @@ class ArcTest extends FunSuite with BeforeAndAfter {
five.foreach(date => assert(date.substring(startSS, monthSS) == dayMonthTestA))
}

test("filter url pattern") {
test("Filter URL pattern RDD") {
val keepMatches = RecordLoader.loadArchives(arcPath, sc)
.keepUrlPatterns(Set("http://www.archive.org/about/.*".r))
val discardMatches = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -71,14 +71,14 @@ class ArcTest extends FunSuite with BeforeAndAfter {
assert(discardMatches.count == 284L)
}

test("count links") {
test("Count links RDD") {
val links = RecordLoader.loadArchives(arcPath, sc)
.map(r => ExtractLinksRDD(r.getUrl, r.getContentString))
.reduce((a, b) => a ++ b)
assert(links.size == 664)
}

test("detect language") {
test("Detect language RDD") {
val languageCounts = RecordLoader.loadArchives(arcPath, sc)
.keepMimeTypes(Set("text/html"))
.map(r => RemoveHTMLRDD(r.getContentString))
Expand All @@ -99,7 +99,7 @@ class ArcTest extends FunSuite with BeforeAndAfter {
}
}

test("detect mime type tika") {
test("Detect MIMEtype Tika RDD") {
val mimeTypeCounts = RecordLoader.loadArchives(arcPath, sc)
.map(r => RemoveHTTPHeaderRDD(r.getContentString))
.groupBy(content => DetectMimeTypeTika(content.getBytes))
Expand Down
10 changes: 5 additions & 5 deletions src/test/scala/io/archivesunleashed/ArchiveRecordTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,12 @@ class ArchiveRecordTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}

test("count records") {
test("Count records") {
assert(RecordLoader.loadArchives(arcPath, sc).count == 300L)
assert(RecordLoader.loadArchives(warcPath, sc).count == 299L)
}

test("Resource name produces expected result.") {
test("Resource name produces expected result") {
val textSampleArc = RecordLoader.loadArchives(arcPath, sc)
.map(x => FilenameUtils.getName(x.getArchiveFilename))
.take(3)
Expand Down Expand Up @@ -81,7 +81,7 @@ class ArchiveRecordTest extends FunSuite with BeforeAndAfter {
assert(textSampleWarc.deep == Array("", exampleUrl, exampleUrl).deep)
}

test("Urls") {
test("URLs") {
val textSampleArc = RecordLoader.loadArchives(arcPath, sc)
.map(x => x.getUrl).take(3)
val textSampleWarc = RecordLoader.loadArchives(warcPath, sc)
Expand All @@ -92,7 +92,7 @@ class ArchiveRecordTest extends FunSuite with BeforeAndAfter {
"http://www.archive.org/robots.txt", "http://www.archive.org/").deep)
}

test("Mime-Type") {
test("MIMEtype") {
ruebot marked this conversation as resolved.
Show resolved Hide resolved
val textSampleArc = RecordLoader.loadArchives(arcPath, sc)
.map(x => x.getMimeType).take(3)
val textSampleWarc = RecordLoader.loadArchives(warcPath, sc)
Expand All @@ -103,7 +103,7 @@ class ArchiveRecordTest extends FunSuite with BeforeAndAfter {
"text/html").deep)
}

test("Get Http Status") {
test("Get HTTP status") {
val textSampleArc = RecordLoader.loadArchives(arcPath, sc)
.map(x => x.getHttpStatus).take(3)
val textSampleWarc = RecordLoader.loadArchives(warcPath, sc)
Expand Down
2 changes: 1 addition & 1 deletion src/test/scala/io/archivesunleashed/CountableRDDTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class CountableRDDTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}

test("count records") {
test("Count records; Extract Domain RDD ") {
val base = RecordLoader.loadArchives(arcPath, sc)
.keepValidPages()
.map(r => ExtractDomainRDD(r.getUrl))
Expand Down
38 changes: 19 additions & 19 deletions src/test/scala/io/archivesunleashed/RecordDFTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}

test("keep Valid Pages") {
test("Keep valid pages DF") {
val expected = "http://www.archive.org/"
val base = RecordLoader.loadArchives(arcPath, sc)
.all()
Expand All @@ -48,7 +48,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Discard MimeTypes") {
test("Discard MIMEtypes DF") {
val expected = "filedesc://IAH-20080430204825-00000-blackbook.arc"
val mimeTypes = Set("text/html")
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -59,7 +59,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Discard Date") {
test("Discard date DF") {
val expected = "20080430"
val date = "20080429"
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -70,7 +70,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Discard Urls") {
test("Discard URLs DF") {
val expected = "http://www.archive.org/index.php"
val url = Set("http://www.archive.org/")
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -81,7 +81,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Discard Domains") {
test("Discard domains DF") {
val expected = "http://www.hideout.com.br/"
val domain = Set("www.archive.org")
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -92,7 +92,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Discard HttpStatus") {
test("Discard HTTP status DF") {
val expected = "200"
val statusCode = Set("000")
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -103,7 +103,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Discard Content") {
test("Discard content DF") {
val expected = "dns:www.archive.org"
val contentRegex = Set("Content-Length: [0-9]{4}".r)
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -115,7 +115,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Discard UrlPatterns") {
test("Discard URL patterns DF") {
val expected = "dns:www.archive.org"
val urlRegex = Set(".*images.*".r)
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -127,7 +127,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Discard Languages") {
test("Discard languages DF") {
val expected = "dns:www.archive.org"
val languages = Set("th","de","ht")
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -139,7 +139,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep HttpStatus") {
test("Keep HTTP status DF") {
val expected = "http://www.archive.org/robots.txt"
val statusCode = Set("200")
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -150,7 +150,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep Date") {
test("Keep date DF") {
val expected = "http://www.archive.org/"
val month = List("04")
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -161,7 +161,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep Urls") {
test("Keep URLs DF") {
val expected = "http://www.archive.org/"
val url = Set("http://www.archive.org/")
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -172,7 +172,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep Domains") {
test("Keep domains DF") {
val expected = "http://www.archive.org/robots.txt"
val domain = Set("www.archive.org")
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -183,7 +183,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep MimeTypesTika") {
test("Keep MIMEtypes Tika DF") {
val expected = "image/jpeg"
val mimeType = Set("image/jpeg")
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -194,7 +194,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep MimeTypes") {
test("Keep MIMEtypes DF") {
val expected = "text/html"
val mimeType = Set("text/html")
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -205,7 +205,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep content") {
test("Keep content DF") {
val expected = "http://www.archive.org/images/logoc.jpg"
val contentRegex = Set("Content-Length: [0-9]{4}".r)
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -217,7 +217,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep UrlPatterns") {
test("Keep URL patterns DF") {
val expected = "http://www.archive.org/images/go-button-gateway.gif"
val urlRegex = Set(".*images.*".r)
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -229,7 +229,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep Languages") {
test("Keep languages DF") {
val expected = "http://www.archive.org/images/logoc.jpg"
val languages = Set("th","de","ht")
val base = RecordLoader.loadArchives(arcPath, sc)
Expand All @@ -241,7 +241,7 @@ class RecordDFTest extends FunSuite with BeforeAndAfter {
assert (base.toString == expected)
}

test("Keep keepMimeTypes") {
test("Keep images DF") {
val expected = "image/jpeg"
val base = RecordLoader.loadArchives(arcPath, sc)
.all()
Expand Down
2 changes: 1 addition & 1 deletion src/test/scala/io/archivesunleashed/RecordLoaderTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class RecordLoaderTest extends FunSuite with BeforeAndAfter {
sc = new SparkContext(conf)
}

test("loads Warc") {
test("Load WARC") {
val base = RecordLoader.loadArchives(warcPath, sc)
.keepValidPages()
.map(x => x.getUrl)
Expand Down
Loading