Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Pairwise classifier for POS #246

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ lazy val commonSettings = Seq(
Resolver.mavenLocal,
"CogcompSoftware" at "http://cogcomp.cs.illinois.edu/m2repo/"
),
javaOptions ++= List("-Xmx6g"),
javaOptions ++= List("-Xmx6g", "-XX:+UseG1GC"),
libraryDependencies ++= Seq(
"edu.illinois.cs.cogcomp" % "LBJava" % "1.2.8",
"edu.illinois.cs.cogcomp" % "illinois-core-utilities" % cogcompNLPVersion withSources,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,13 +97,12 @@ abstract class ConstrainedClassifier[T <: AnyRef, HEAD <: AnyRef](val dm: DataMo
val name = String.valueOf(infer.subjectTo.hashCode())
var inference = InferenceManager.get(name, head)
if (inference == null) {
inference = infer(head)
if (log)
println("Inference is NULL " + name)
println("Inference is not cached previously; running the inference from scratch ")
inference = infer(head)
InferenceManager.put(name, inference)
}
inference.valueOf(cls, t)

case None =>
val name = String.valueOf(infer.subjectTo.hashCode())

Expand Down Expand Up @@ -155,7 +154,6 @@ abstract class ConstrainedClassifier[T <: AnyRef, HEAD <: AnyRef](val dm: DataMo
// println(remainingIteration)
val v = crTokenTest.next
if (v == null) {

if (remainingIteration > 0) {
crTokenTest.reset()
learnAll(crTokenTest, remainingIteration - 1)
Expand All @@ -166,15 +164,12 @@ abstract class ConstrainedClassifier[T <: AnyRef, HEAD <: AnyRef](val dm: DataMo
learnAll(crTokenTest, remainingIteration)
}
}

learnAll(crTokenTest, iteration)
}

def test(): List[(String, (Double, Double, Double))] = {

val allHeads = this.dm.getNodeWithType[HEAD].getTestingInstances
// allHeads foreach( t => println(s" [HEAD] Using thie head ${t} "))

val data: List[T] = if (tType.equals(headType)) {
allHeads.map(_.asInstanceOf[T]).toList
} else {
Expand Down Expand Up @@ -211,12 +206,13 @@ abstract class ConstrainedClassifier[T <: AnyRef, HEAD <: AnyRef](val dm: DataMo
}
}

/** The constraint object defined for each classifier */
object ConstrainedClassifier {
val ConstraintManager = scala.collection.mutable.HashMap[Int, LfsConstraint[_]]()
def constraint[HEAD <: AnyRef](f: HEAD => FirstOrderConstraint)(implicit headTag: ClassTag[HEAD]): LfsConstraint[HEAD] = {
val hash = f.hashCode()
ConstraintManager.getOrElseUpdate(hash, new LfsConstraint[HEAD] {
override def makeConstrainDef(x: HEAD): FirstOrderConstraint = f(x)
override def makeConstraintDef(x: HEAD): FirstOrderConstraint = f(x)
}).asInstanceOf[LfsConstraint[HEAD]]
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -102,20 +102,21 @@ class FirstOrderConstraints(val r: FirstOrderConstraint) {

}

class LHSFirstOrderEqualityWithValueLBP(cls: Learner, t: AnyRef) {
class LHSFirstOrderEqualityWithValueLBP(learner: Learner, t: AnyRef) {

// probably we need to write here
// LHSFirstOrderEqualityWithValueLBP(cls : Learner, t : AnyRef) extends ConstraintTrait

val lbjRepr = new FirstOrderVariable(cls, t)
// This is the implicit variable in the ILP
val lbjVariable = new FirstOrderVariable(learner, t)

def is(v: String): FirstOrderConstraint = {
new FirstOrderEqualityWithValue(true, lbjRepr, v)
new FirstOrderEqualityWithValue(true, lbjVariable, v)
}

//TODO: not sure if this works correctly. Make sure it works.
def is(v: LHSFirstOrderEqualityWithValueLBP): FirstOrderConstraint = {
new FirstOrderEqualityWithVariable(true, lbjRepr, v.lbjRepr)
new FirstOrderEqualityWithVariable(true, lbjVariable, v.lbjVariable)
}

def isTrue: FirstOrderConstraint = is("true")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,19 @@ import scala.reflect.ClassTag

abstract class LfsConstraint[T <: AnyRef](implicit val tag: ClassTag[T]) {

def makeConstrainDef(x: T): FirstOrderConstraint
def makeConstraintDef(x: T): FirstOrderConstraint

def evalDiscreteValue(t: T): String = {
this.makeConstrainDef(t).evaluate().toString
this.makeConstraintDef(t).evaluate().toString
}

def apply(t: T) = makeConstrainDef(t)
def apply(t: T) = makeConstraintDef(t)

def transfer: ParameterizedConstraint = {
new ParameterizedConstraint() {
override def makeConstraint(__example: AnyRef): FirstOrderConstraint = {
val t: T = __example.asInstanceOf[T]
makeConstrainDef(t)
makeConstraintDef(t)
}

override def discreteValue(__example: AnyRef): String =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ trait DataModel {
r => r.to.tag.toString.equals(tag.toString) && r.from.tag.toString.equals(headTag.toString)
}
if (r.isEmpty) {
throw new Exception(s"Failed to found relations between $tag to $headTag")
throw new Exception(s"Failed to find relations between $tag to $headTag")
} else r flatMap (_.asInstanceOf[Edge[NEED, FROM]].backward.neighborsOf(t)) distinct
} else r flatMap (_.asInstanceOf[Edge[FROM, NEED]].forward.neighborsOf(t)) distinct
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ case class DiscreteProperty[T <: AnyRef](

private def _discreteValue(__example: AnyRef): String = {
val t: T = __example.asInstanceOf[T]
self.sensor(t).mkString("")
self.sensor(t)
}
}
case _ => new ClassifierContainsInLBP {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,52 +1,48 @@
package edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger

import edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent
import java.io.PrintStream

import edu.illinois.cs.cogcomp.core.datastructures.textannotation.{ Sentence, Constituent }
import edu.illinois.cs.cogcomp.lbj.pos.POSBaselineLearner
import edu.illinois.cs.cogcomp.lbjava.learn.{ SparseAveragedPerceptron, SparseNetworkLearner }
import edu.illinois.cs.cogcomp.lbjava.classify.{ FeatureVector, ScoreSet }
import edu.illinois.cs.cogcomp.lbjava.infer.{ FirstOrderConstant, FirstOrderConstraint, OJalgoHook }
import edu.illinois.cs.cogcomp.lbjava.learn.{ Learner, SparseAveragedPerceptron, SparseNetworkLearner }
import edu.illinois.cs.cogcomp.saul.classifier.{ ConstrainedClassifier, Learnable }
import edu.illinois.cs.cogcomp.saul.constraint.ConstraintTypeConversion._
import edu.illinois.cs.cogcomp.saul.classifier.Learnable
import edu.illinois.cs.cogcomp.saulexamples.nlp.CommonSensors
import edu.illinois.cs.cogcomp.saulexamples.nlp.POSTagger.POSDataModel._
import edu.illinois.cs.cogcomp.saulexamples.setcover.{ SetCoverSolverDataModel, Neighborhood, City }

object POSClassifiers {
/** After POSTaggerKnown and POSTaggerUnknown are trained,
* this classifier will return the prediction of POSTaggerKnown if
* the input word was observed during training or of POSTaggerUnknown
* if it wasn't.
*/
def POSClassifier(x: Constituent): String = {
if (BaselineClassifier.classifier.observed(wordForm(x)))
POSTaggerKnown.classifier.valueOf(x, BaselineClassifier.classifier.allowableTags(wordForm(x))).getStringValue
else
POSTaggerUnknown.classifier.valueOf(x, MikheevClassifier.classifier.allowableTags(x)).getStringValue
}

// Loads learned models from the "saul-pos-tagger-models" jar package
def loadModelsFromPackage(): Unit = {
val jarModelPath = "edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/models/"
object POSMixedClassifier extends Learner {
override def write(out: PrintStream): Unit = ???

def loadModel(x: Learnable[Constituent]): Unit = {
val prefix = jarModelPath + x.getClassNameForClassifier
x.load(prefix + ".lc", prefix + ".lex")
}
override def scores(exampleFeatures: Array[Int], exampleValues: Array[Double]): ScoreSet = ???

loadModel(BaselineClassifier)
loadModel(MikheevClassifier)
loadModel(POSTaggerKnown)
loadModel(POSTaggerUnknown)
}
override def classify(exampleFeatures: Array[Int], exampleValues: Array[Double]): FeatureVector = ???

def loadSavedModels(): Unit = {
BaselineClassifier.load()
MikheevClassifier.load()
POSTaggerKnown.load()
POSTaggerUnknown.load()
override def learn(exampleFeatures: Array[Int], exampleValues: Array[Double], exampleLabels: Array[Int], labelValues: Array[Double]): Unit = ???

override def discreteValue(obj: Object): String = {
val x = obj.asInstanceOf[Constituent]
if (BaselineClassifier.classifier.observed(wordForm(x)))
POSTaggerKnown.classifier.valueOf(x, BaselineClassifier.classifier.allowableTags(wordForm(x))).getStringValue
else
POSTaggerUnknown.classifier.valueOf(x, MikheevClassifier.classifier.allowableTags(x)).getStringValue
}
}

def saveModels(): Unit = {
BaselineClassifier.save()
MikheevClassifier.save()
POSTaggerKnown.save()
POSTaggerUnknown.save()
def POSClassifierScoreSet(x: Constituent): ScoreSet = {
if (BaselineClassifier.classifier.observed(wordForm(x)))
POSTaggerKnown.classifier.scores(x, BaselineClassifier.classifier.allowableTags(wordForm(x)))
else
POSTaggerUnknown.classifier.scores(x, MikheevClassifier.classifier.allowableTags(x))
}

object POSTaggerKnown extends Learnable[Constituent](POSDataModel) {
Expand All @@ -59,7 +55,6 @@ object POSClassifiers {
p.thickness = 2
baseLTU = new SparseAveragedPerceptron(p)
}
override val loggging = true
}

object POSTaggerUnknown extends Learnable[Constituent](POSDataModel) {
Expand All @@ -72,20 +67,107 @@ object POSClassifiers {
p.thickness = 4
baseLTU = new SparseAveragedPerceptron(p)
}
override val loggging = true
}

object BaselineClassifier extends Learnable[Constituent](POSDataModel) {
def label = POSLabel
override def feature = using(wordForm)
override lazy val classifier = new POSBaselineLearner()
override val loggging = true
}

object MikheevClassifier extends Learnable[Constituent](POSDataModel) {
def label = POSLabel
override def feature = using(wordForm)
override lazy val classifier = new MikheevLearner
override val loggging = true
}

// Pairwise classifier
object BaselineClassifierPair extends Learnable[(Constituent, Constituent)](POSDataModel) {
def label = POSLabelPair
override def feature = using(wordFormPair)
override lazy val classifier = new POSBaselineLearner()
}

object POSTaggerPairwise extends Learnable[(Constituent, Constituent)](POSDataModel) {
def label = POSLabelPair
override def feature = using(POSBaselineScoresPair)
override lazy val classifier = new SparseNetworkLearner
}

// def sentenceLabelsMatch = ConstrainedClassifier.constraint[Sentence] { s: Sentence =>
// val constituents = CommonSensors.getPOSConstituents(s.getSentenceConstituent.getTextAnnotation)
// constituents.sliding(3).toList._forall { cons: List[Constituent] =>
// consecutiveLabelPairsAreConsistent(cons.head, cons(1), cons(2))
// }
// }

val posLabels = List("#", "$", "''", ",", "-LRB-", "-RRB-", ".", ":", "CC", "CD", "DT", "EX", "FW", "IN", "JJ", "JJR",
"JJS", "LS", "MD", "NN", "NNP", "NNPS", "NNS", "PDT", "POS", "PRP", "PRP$", "RB", "RBR", "RBS", "RP", "SYM", "TO",
"UH", "UNKNOWN", "VB", "VBD", "VBG", "VBN", "VBP", "VBZ", "WDT", "WP", "WP$", "WRB", "``")

def consecutiveLabelPairsAreConsistent(c1: Constituent, c2: Constituent, c3: Constituent): FirstOrderConstraint = {
posLabels._exists { label: String =>
posRightLabelIs(label, c1, c2) ==> posLeftLabelIs(label, c2, c3)
}
}

def posRightLabelIs(label: String, c1: Constituent, c2: Constituent) = {
posLabels._exists { l: String => (POSTaggerPairwise on (c1, c2)).is(l + POSTaggerSensors.labelSeparator + label) }
}

def posLeftLabelIs(label: String, c1: Constituent, c2: Constituent) = {
posLabels._exists { l: String => (POSTaggerPairwise on (c1, c2)).is(label + POSTaggerSensors.labelSeparator + l) }
}

object POSConstrainedClassifier extends ConstrainedClassifier[(Constituent, Constituent), Sentence](POSDataModel, POSTaggerPairwise) {
override def subjectTo = sentenceLabelsMatch
override val solver = new OJalgoHook
override val pathToHead = Some(POSDataModel.tokenPairToSentence)
}

def sentenceLabelsMatch = ConstrainedClassifier.constraint[Sentence] { s: Sentence =>
val constituents = CommonSensors.getPOSConstituents(s.getSentenceConstituent.getTextAnnotation)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

here

val posLabelPairs = for { x <- posLabels; y <- posLabels } yield (x, y)
constituents.sliding(2).toList._forall {
case c1 :: c2 :: _ =>
posLabelPairs._exists { case (l1, l2) => posClassifierLabelCompatible(c1, c2, l1, l2) }
}
// constituents.toList._forall { c => posLabels._exists { l => (POSMixedClassifier on c).is(l) } }
// new FirstOrderConstant(true)
}

def posClassifierLabelCompatible(c1: Constituent, c2: Constituent, l1: String, l2: String) = {
(POSMixedClassifier on c1).is(l1) and
(POSMixedClassifier on c2).is(l2) and
(POSTaggerPairwise on (c1, c2)).is(l1 + POSTaggerSensors.labelSeparator + l2)
}

// Loads learned models from the "saul-pos-tagger-models" jar package
def loadModelsFromPackage(): Unit = {
val jarModelPath = "edu/illinois/cs/cogcomp/saulexamples/nlp/POSTagger/models/"

def loadModel(x: Learnable[Constituent]): Unit = {
val prefix = jarModelPath + x.getClassNameForClassifier
x.load(prefix + ".lc", prefix + ".lex")
}

loadModel(BaselineClassifier)
loadModel(MikheevClassifier)
loadModel(POSTaggerKnown)
loadModel(POSTaggerUnknown)
}

def loadSavedModels(): Unit = {
BaselineClassifier.load()
MikheevClassifier.load()
POSTaggerKnown.load()
POSTaggerUnknown.load()
}

def saveModels(): Unit = {
BaselineClassifier.save()
MikheevClassifier.save()
POSTaggerKnown.save()
POSTaggerUnknown.save()
}
}
Loading