Skip to content

Commit

Permalink
mega workaround for being unable to use submodules due to allenai/sbt…
Browse files Browse the repository at this point in the history
  • Loading branch information
matanox committed Jan 5, 2016
1 parent fc12473 commit 74688e1
Show file tree
Hide file tree
Showing 423 changed files with 56,568 additions and 212 deletions.
2 changes: 1 addition & 1 deletion .openshift/action_hooks/build
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash

cd $OPENSHIFT_REPO_DIR/payload
git init #to workaround https://github.com/allenai/sbt-plugins/issues/196, https://github.com/sbt/sbt-git/issues/97#issuecomment-168727760
git init # part of working around https://github.com/allenai/sbt-plugins/issues/196, https://github.com/sbt/sbt-git/issues/97#issuecomment-168727760
SBT_PATH=$OPENSHIFT_DATA_DIR/sbt
SBT_DIR=$OPENSHIFT_DATA_DIR/.sbt
IVY_DIR=$OPENSHIFT_DATA_DIR/.ivy
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
555 changes: 555 additions & 0 deletions github-cruncher/.cache-main

Large diffs are not rendered by default.

58 changes: 58 additions & 0 deletions github-cruncher/.classpath
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
<classpath>
<classpathentry kind="src" path="src/main/scala"/>
<classpathentry kind="src" path="/pipeline" exported="true" combineaccessrules="false"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/io.spray/spray-json_2.11/srcs/spray-json_2.11-1.3.2-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/io.spray/spray-json_2.11/bundles/spray-json_2.11-1.3.2.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/commons-io/commons-io/srcs/commons-io-2.4-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/commons-io/commons-io/jars/commons-io-2.4.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/org.allenai.common/common-core_2.11/srcs/common-core_2.11-1.0.1-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/org.allenai.common/common-core_2.11/jars/common-core_2.11-1.0.1.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/org.slf4j/slf4j-api/srcs/slf4j-api-1.7.10-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/org.slf4j/slf4j-api/jars/slf4j-api-1.7.10.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/ch.qos.logback/logback-core/srcs/logback-core-1.1.2-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/ch.qos.logback/logback-core/jars/logback-core-1.1.2.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/ch.qos.logback/logback-classic/srcs/logback-classic-1.1.2-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/ch.qos.logback/logback-classic/jars/logback-classic-1.1.2.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/com.amazonaws/aws-java-sdk-s3/srcs/aws-java-sdk-s3-1.9.40-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/com.amazonaws/aws-java-sdk-s3/jars/aws-java-sdk-s3-1.9.40.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/com.amazonaws/aws-java-sdk-kms/srcs/aws-java-sdk-kms-1.9.40-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/com.amazonaws/aws-java-sdk-kms/jars/aws-java-sdk-kms-1.9.40.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/com.amazonaws/aws-java-sdk-core/srcs/aws-java-sdk-core-1.9.40-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/com.amazonaws/aws-java-sdk-core/jars/aws-java-sdk-core-1.9.40.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/commons-logging/commons-logging/srcs/commons-logging-1.1.3-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/commons-logging/commons-logging/jars/commons-logging-1.1.3.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/org.apache.httpcomponents/httpclient/srcs/httpclient-4.3.4-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/org.apache.httpcomponents/httpclient/jars/httpclient-4.3.4.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/org.apache.httpcomponents/httpcore/srcs/httpcore-4.3.2-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/org.apache.httpcomponents/httpcore/jars/httpcore-4.3.2.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/commons-codec/commons-codec/srcs/commons-codec-1.6-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/commons-codec/commons-codec/jars/commons-codec-1.6.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/org.scala-lang.modules/scala-parser-combinators_2.11/srcs/scala-parser-combinators_2.11-1.0.4-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/org.scala-lang.modules/scala-parser-combinators_2.11/bundles/scala-parser-combinators_2.11-1.0.4.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/org.apache.commons/commons-lang3/srcs/commons-lang3-3.4-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/org.apache.commons/commons-lang3/jars/commons-lang3-3.4.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/org.apache.commons/commons-compress/srcs/commons-compress-1.10-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/org.apache.commons/commons-compress/jars/commons-compress-1.10.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/com.github.nscala-time/nscala-time_2.11/srcs/nscala-time_2.11-2.6.0-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/com.github.nscala-time/nscala-time_2.11/jars/nscala-time_2.11-2.6.0.jar"/>
<classpathentry kind="con" path="org.scala-ide.sdt.launching.SCALA_CONTAINER"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/joda-time/joda-time/srcs/joda-time-2.9.1-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/joda-time/joda-time/jars/joda-time-2.9.1.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/com.typesafe.slick/slick_2.11/srcs/slick_2.11-3.1.1-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/com.typesafe.slick/slick_2.11/bundles/slick_2.11-3.1.1.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/org.reactivestreams/reactive-streams/srcs/reactive-streams-1.0.0-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/org.reactivestreams/reactive-streams/jars/reactive-streams-1.0.0.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/org.slf4j/slf4j-nop/srcs/slf4j-nop-1.6.4-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/org.slf4j/slf4j-nop/jars/slf4j-nop-1.6.4.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/com.typesafe.slick/slick-codegen_2.11/srcs/slick-codegen_2.11-3.1.1-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/com.typesafe.slick/slick-codegen_2.11/jars/slick-codegen_2.11-3.1.1.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/mysql/mysql-connector-java/srcs/mysql-connector-java-5.1.38-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/mysql/mysql-connector-java/jars/mysql-connector-java-5.1.38.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/com.zaxxer/HikariCP-java6/srcs/HikariCP-java6-2.3.9-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/com.zaxxer/HikariCP-java6/bundles/HikariCP-java6-2.3.9.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/org.javassist/javassist/srcs/javassist-3.18.2-GA-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/org.javassist/javassist/bundles/javassist-3.18.2-GA.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/com.typesafe.play/play-json_2.11/srcs/play-json_2.11-2.4.6-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/com.typesafe.play/play-json_2.11/jars/play-json_2.11-2.4.6.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/com.typesafe.play/play-iteratees_2.11/srcs/play-iteratees_2.11-2.4.6-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/com.typesafe.play/play-iteratees_2.11/jars/play-iteratees_2.11-2.4.6.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/org.scala-stm/scala-stm_2.11/srcs/scala-stm_2.11-0.7-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/org.scala-stm/scala-stm_2.11/jars/scala-stm_2.11-0.7.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/com.typesafe/config/srcs/config-1.3.0-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/com.typesafe/config/bundles/config-1.3.0.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/com.typesafe.play/play-functional_2.11/srcs/play-functional_2.11-2.4.6-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/com.typesafe.play/play-functional_2.11/jars/play-functional_2.11-2.4.6.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/com.typesafe.play/play-datacommons_2.11/srcs/play-datacommons_2.11-2.4.6-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/com.typesafe.play/play-datacommons_2.11/jars/play-datacommons_2.11-2.4.6.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/org.joda/joda-convert/srcs/joda-convert-1.7-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/org.joda/joda-convert/jars/joda-convert-1.7.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/com.fasterxml.jackson.core/jackson-core/srcs/jackson-core-2.5.4-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/com.fasterxml.jackson.core/jackson-core/bundles/jackson-core-2.5.4.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/com.fasterxml.jackson.core/jackson-annotations/srcs/jackson-annotations-2.5.4-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/com.fasterxml.jackson.core/jackson-annotations/bundles/jackson-annotations-2.5.4.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/com.fasterxml.jackson.core/jackson-databind/srcs/jackson-databind-2.5.4-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/com.fasterxml.jackson.core/jackson-databind/bundles/jackson-databind-2.5.4.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/com.fasterxml.jackson.datatype/jackson-datatype-jdk8/srcs/jackson-datatype-jdk8-2.5.4-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/com.fasterxml.jackson.datatype/jackson-datatype-jdk8/bundles/jackson-datatype-jdk8-2.5.4.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/com.fasterxml.jackson.datatype/jackson-datatype-jsr310/srcs/jackson-datatype-jsr310-2.5.4-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/com.fasterxml.jackson.datatype/jackson-datatype-jsr310/bundles/jackson-datatype-jsr310-2.5.4.jar"/>
<classpathentry sourcepath="/home/matan/.ivy2/cache/org.scalaj/scalaj-http_2.11/srcs/scalaj-http_2.11-2.2.0-sources.jar" kind="lib" path="/home/matan/.ivy2/cache/org.scalaj/scalaj-http_2.11/jars/scalaj-http_2.11-2.2.0.jar"/>
<classpathentry kind="lib" path="/home/matan/.ivy2/cache/org.apache.storm/storm-core/jars/storm-core-0.10.0.jar"/>
<classpathentry kind="lib" path="/home/matan/.ivy2/cache/com.esotericsoftware.kryo/kryo/bundles/kryo-2.21.jar"/>
<classpathentry kind="lib" path="/home/matan/.ivy2/cache/com.esotericsoftware.reflectasm/reflectasm/jars/reflectasm-1.07-shaded.jar"/>
<classpathentry kind="lib" path="/home/matan/.ivy2/cache/org.ow2.asm/asm/jars/asm-4.0.jar"/>
<classpathentry kind="lib" path="/home/matan/.ivy2/cache/com.esotericsoftware.minlog/minlog/jars/minlog-1.2.jar"/>
<classpathentry kind="lib" path="/home/matan/.ivy2/cache/org.clojure/clojure/jars/clojure-1.6.0.jar"/>
<classpathentry kind="lib" path="/home/matan/.ivy2/cache/com.googlecode.disruptor/disruptor/jars/disruptor-2.10.4.jar"/>
<classpathentry kind="lib" path="/home/matan/.ivy2/cache/org.apache.logging.log4j/log4j-api/jars/log4j-api-2.1.jar"/>
<classpathentry kind="lib" path="/home/matan/.ivy2/cache/org.apache.logging.log4j/log4j-core/jars/log4j-core-2.1.jar"/>
<classpathentry kind="lib" path="/home/matan/.ivy2/cache/org.apache.logging.log4j/log4j-slf4j-impl/jars/log4j-slf4j-impl-2.1.jar"/>
<classpathentry kind="lib" path="/home/matan/.ivy2/cache/org.slf4j/log4j-over-slf4j/jars/log4j-over-slf4j-1.6.6.jar"/>
<classpathentry kind="lib" path="/home/matan/.ivy2/cache/org.apache.hadoop/hadoop-auth/jars/hadoop-auth-2.4.0.jar"/>
<classpathentry kind="lib" path="/home/matan/.ivy2/cache/javax.servlet/servlet-api/jars/servlet-api-2.5.jar"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
<classpathentry kind="output" path="bin"/>
</classpath>
File renamed without changes.
13 changes: 13 additions & 0 deletions github-cruncher/.project
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<projectDescription>
<name>githubCruncher</name>
<buildSpec>
<buildCommand>
<name>org.scala-ide.sdt.core.scalabuilder</name>
</buildCommand>
</buildSpec>
<natures>
<nature>org.scala-ide.sdt.core.scalanature</nature>
<nature>org.eclipse.jdt.core.javanature</nature>
</natures>
<linkedResources> </linkedResources>
</projectDescription>
3 changes: 3 additions & 0 deletions github-cruncher/.settings/org.eclipse.core.resources.prefs
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#Generated by sbteclipse
#Fri Jan 01 18:57:44 IST 2016
encoding/<project>=UTF-8
6 changes: 6 additions & 0 deletions github-cruncher/.settings/org.scala-ide.sdt.core.prefs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#Generated by sbteclipse
#Mon Dec 28 13:15:58 IST 2015
scala.compiler.additionalParams=-Xlint -feature
deprecation=true
scala.compiler.useProjectSettings=true
target=jvm-1.7
Original file line number Diff line number Diff line change
Expand Up @@ -4,52 +4,52 @@ import scala.concurrent.Future
import scalaj.http._
import play.api.libs.json._
import play.api.libs.functional.syntax._
import scala.util.{Try, Success, Failure}
import scala.util.{ Try, Success, Failure }
import scala.annotation.tailrec
import RateLimitedApiCaller._
trait GithubCrawler {

trait GithubCrawler {

/*
* get a relevant scala projects list from github api
*/
def getProjectsList: Future[List[JsValue]] = {
lazy val initialApiCall: HttpRequest =

lazy val initialApiCall: HttpRequest =
Http("https://api.github.com/search/repositories")
.param("q", "language:scala")
.param("sort", "forks")
.param("q", "language:scala")
.param("sort", "forks")

var result: List[JsValue] = List()

def impl(apiCall: HttpRequest = initialApiCall): Future[List[JsValue]] = {

var result: List[JsValue] = List()

def impl(apiCall:HttpRequest = initialApiCall): Future[List[JsValue]] = {

nonBlockingHttp(apiCall) flatMap { response =>

//case Failure(t) => throw new Exception(s"""github api call failed - have we been rate limited? original exception follows:\n$t""")

if (!response.isSuccess)
throw new Exception(s"github api bad response: \n$response")

val asJson: JsValue = Json.parse(response.body) //println(Json.prettyPrint(asJson))
val headers = response.headers
val linkHeaders = parseGithubLinkHeader(headers("Link"))
println(linkHeaders)

val projects = (asJson \ "items")
.as[JsArray]
.as[List[JsValue]]

result ++ projects

if (linkHeaders("next") != linkHeaders("last")) Future { result }
else impl(Http(linkHeaders("next")))
}
if (!response.isSuccess)
throw new Exception(s"github api bad response: \n$response")

val asJson: JsValue = Json.parse(response.body) //println(Json.prettyPrint(asJson))
val headers = response.headers
val linkHeaders = parseGithubLinkHeader(headers("Link"))
println(linkHeaders)

val projects = (asJson \ "items")
.as[JsArray]
.as[List[JsValue]]

result ++ projects

if (linkHeaders("next") != linkHeaders("last")) Future { result }
else impl(Http(linkHeaders("next")))
}

impl()
}

impl()
}

/*
* parses the link header field (http://tools.ietf.org/html/rfc5988) returned by Github's api
* (c.f. https://developer.github.com/guides/traversing-with-pagination/)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,29 @@ import scala.concurrent.duration._
//import java.util.concurrent.TimeUnit._

object Pipeline extends ImplicitPersistenceSerializations {

object PipelineImpl extends Pipeline {
override def rootOutputUrl = {
new File("out").toURI // AIP outputs directory
new File("out").toURI // AIP outputs directory
}
}

case class GenerateProjectsList()
extends Producer[List[play.api.libs.json.JsValue]]
with Ai2StepInfo with GithubCrawler {
override def create = Await.result(getProjectsList,Duration(60, MINUTES))
case class GenerateProjectsList()
extends Producer[List[play.api.libs.json.JsValue]]
with Ai2StepInfo with GithubCrawler {

override def create = Await.result(getProjectsList, Duration(60, MINUTES))
}
case class Clone()
extends Producer[String]
with Ai2StepInfo with GithubCrawler {
override def create = "aaa"

case class Clone()
extends Producer[String]
with Ai2StepInfo with GithubCrawler {

override def create = "aaa"
}

PipelineImpl.Persist.Collection.asJson(GenerateProjectsList())

def run() = {
PipelineImpl.run("github processing pipeline")
//PipelineImpl.openDiagram()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package org.canve.githubCruncher

case class Project(
searchScore: Float,
isFork: Boolean,
forksCount: Int,
description: String,
fullName: String,
sshCloneUrl: String,
httpCloneUrl: String,
url: String,
languagesApiUrl: String
) {
}

object Project {
def apply(item: play.api.libs.json.JsValue) =
new Project(
searchScore = (item \ "score").as[Float],
isFork = (item \ "fork").as[Boolean],
forksCount = (item \ "forks").as[Int],
description = (item \ "description").as[String],
fullName = (item \ "full_name").as[String],
sshCloneUrl = (item \ "ssh_url").as[String],
httpCloneUrl = (item \ "clone_url").as[String],
url = (item \ "html_url").as[String],
languagesApiUrl = (item \ "languages_url").as[String]
)
}
Original file line number Diff line number Diff line change
@@ -1,52 +1,52 @@
package org.canve.githubCruncher
import scala.concurrent.{Future, Promise}
import scala.util.{Success, Failure}
import scala.concurrent.{ Future, Promise }
import scala.util.{ Success, Failure }
import scala.concurrent.ExecutionContext.Implicits.global
import scalaj.http._
import com.github.nscala_time.time.Imports._
import scala.concurrent.Await

object RateLimitedApiCaller {

private var lastKnownState: Future[RateState] = safeRateLimitCheck

/*
* get rate limit status without counting as part of quota
*/
private def safeRateLimitCheck: Future[RateState] =
private def safeRateLimitCheck: Future[RateState] =
performApiCall(Http("https://api.github.com/rate_limit")) map { response =>
if (!response.isSuccess) throw new Exception(s"github api bad or unexpected response: \n$response")
new RateState(response)
}
}

private case class RateState(response: HttpResponse[String]) {
val windowLimit = response.headers("X-RateLimit-Limit").head.toInt

val windowLimit = response.headers("X-RateLimit-Limit").head.toInt
val windowRemaining = response.headers("X-RateLimit-Remaining").head.toInt
val windowEnd = response.headers("X-RateLimit-Reset").head
val windowEnd = response.headers("X-RateLimit-Reset").head

/*
* Use this function to always reserve some api quota, so that the api can always be
* manually examined outside the run of the application
*/
def windowQuotaReserveLeft = (0.1 * windowLimit) > windowRemaining
def windowQuotaReserveLeft = (0.1 * windowLimit) > windowRemaining
}

def nonBlockingHttp(apiCall: HttpRequest) = maybeApiCall(apiCall)

private def maybeApiCall(apiCall: HttpRequest) = {
lastKnownState.flatMap(_.windowQuotaReserveLeft match {
case true => performApiCall(apiCall)
case false =>
case true => performApiCall(apiCall)
case false =>
Future.failed[HttpResponse[String]](new Exception("stopping to avoid exhausting rate limit"))
})
}
}

private def performApiCall(apiCall: HttpRequest): Future[HttpResponse[String]] = {
val response = Future { apiCall.asString }
response.onComplete {
case Failure(f) => throw new Exception(s"failed completing github api call: \n$f")
case Success(response) => lastKnownState = Future.successful(RateState(response))
response.onComplete {
case Failure(f) => throw new Exception(s"failed completing github api call: \n$f")
case Success(response) => lastKnownState = Future.successful(RateState(response))
}
response
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ package org.canve.githubCruncher
import mysql.DB

object app extends App with GithubCrawler {

val db = DB

Pipeline.run

}
Loading

0 comments on commit 74688e1

Please sign in to comment.