Skip to content

Commit

Permalink
Rename imageLinks to imageGraph; resolves #419
Browse files Browse the repository at this point in the history
  • Loading branch information
ruebot committed Feb 10, 2020
1 parent ae2f097 commit 5c88040
Show file tree
Hide file tree
Showing 5 changed files with 7 additions and 7 deletions.
2 changes: 1 addition & 1 deletion src/main/python/aut/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def images(self):
return DataFrame(self.loader.images(self.path), self.sqlContext)

def image_links(self):
return DataFrame(self.loader.imageLinks(self.path), self.sqlContext)
return DataFrame(self.loader.imageGraph(self.path), self.sqlContext)

def pdfs(self):
return DataFrame(self.loader.pdfs(self.path), self.sqlContext)
Expand Down
4 changes: 2 additions & 2 deletions src/main/scala/io/archivesunleashed/DataFrameLoader.scala
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ class DataFrameLoader(sc: SparkContext) {
}

/* Create a DataFrame with source page, and image url. */
def imageLinks(path: String): DataFrame = {
def imageGraph(path: String): DataFrame = {
RecordLoader.loadArchives(path, sc)
.imageLinks()
.imageGraph()
}

/** Create a DataFrame with image url, filename, extension, mime_type_web_servr, mime_type_tika, width, height, md5, and raw bytes. */
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/io/archivesunleashed/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ package object archivesunleashed {
}

/* Extracts all the images links from a source page. */
def imageLinks(): DataFrame = {
def imageGraph(): DataFrame = {
val records = rdd
.keepValidPages()
.flatMap(r => ({
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class DataFrameLoaderTest extends FunSuite with BeforeAndAfter {
val df = new DataFrameLoader(sc)
val validPages = df.webpages(arcPath)
val hyperlinks = df.webgraph(arcPath)
val imageLinks = df.imageLinks(arcPath)
val imageGraph = df.imageGraph(arcPath)
val images = df.images(arcPath)
val pdfs = df.pdfs(pdfPath)
val audio = df.audio(mediaPath)
Expand All @@ -67,7 +67,7 @@ class DataFrameLoaderTest extends FunSuite with BeforeAndAfter {
assert(r_2(0) == "http://web.archive.org/collections/web/advanced.html")
assert(r_2(1) == "Advanced Search")

val r_3 = imageLinks.take(100)(99)
val r_3 = imageGraph.take(100)(99)
assert(r_3.get(0) == "20080430")
assert(r_3.get(1) == "http://www.archive.org/details/secretarmiesb00spivrich")
assert(r_3.get(2) == "http://www.archive.org/images/star.png")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class ImageLinksTest extends FunSuite with BeforeAndAfter {

test("Image links extraction DF") {
val df = RecordLoader.loadArchives(arcPath, sc)
.imageLinks()
.imageGraph()

// We need this in order to use the $-notation
val spark = SparkSession.builder().master("local").getOrCreate()
Expand Down

0 comments on commit 5c88040

Please sign in to comment.