Skip to content

Commit

Permalink
Merge pull request #427 from gnieh/finite-state/regular
Browse files Browse the repository at this point in the history
Add regular language implementation
  • Loading branch information
satabin authored Dec 15, 2022
2 parents 72e42db + 6cbc664 commit 1f4b285
Show file tree
Hide file tree
Showing 7 changed files with 477 additions and 3 deletions.
4 changes: 1 addition & 3 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,7 @@ val commonSettings = List(
"io.circe" %%% "circe-generic" % circeVersion % "test",
"co.fs2" %%% "fs2-io" % fs2Version % "test",
"com.disneystreaming" %%% "weaver-cats" % weaverVersion % "test",
"com.disneystreaming" %%% "weaver-cats-core" % weaverVersion % "test",
"com.disneystreaming" %%% "weaver-core" % weaverVersion % "test",
"com.disneystreaming" %%% "weaver-framework" % weaverVersion % "test",
"com.disneystreaming" %%% "weaver-scalacheck" % weaverVersion % Test,
"com.eed3si9n.expecty" %%% "expecty" % "0.16.0" % "test",
"org.portable-scala" %%% "portable-scala-reflect" % "1.1.2" cross CrossVersion.for3Use2_13
) ++ PartialFunction
Expand Down
23 changes: 23 additions & 0 deletions finite-state/shared/src/main/scala/fs2/data/pfsa/Candidate.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* Copyright 2022 Lucas Satabin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package fs2.data.pfsa

trait Candidate[Set, C] {

def pick(set: Set): Option[C]

}
17 changes: 17 additions & 0 deletions finite-state/shared/src/main/scala/fs2/data/pfsa/PDFA.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@

package fs2.data.pfsa

import cats.Foldable
import cats.syntax.foldable._

import Pred.syntax._

private[data] class PDFA[P, T](val init: Int, val finals: Set[Int], val transitions: Array[List[(P, Int)]])(implicit
Expand All @@ -27,4 +30,18 @@ private[data] class PDFA[P, T](val init: Int, val finals: Set[Int], val transiti
else
transitions(q).collectFirst { case (p, q) if p.satisfies(t) => q }

def recognizes[S[_]: Foldable](input: S[T]): Boolean =
input
.foldLeftM((init, 0)) { case ((q, idx), c) =>
transitions
.lift(q)
.flatMap(_.collectFirst {
case (p, q) if p.satisfies(c) =>
(q, idx + 1)
})
}
.exists { case (q, idx) =>
idx == input.size && finals.contains(q)
}

}
7 changes: 7 additions & 0 deletions finite-state/shared/src/main/scala/fs2/data/pfsa/Pred.scala
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ object Pred {
def apply[P, T](implicit ev: Pred[P, T]): Pred[P, T] = ev

object syntax {

def always[P](implicit P: Pred[P, _]): P =
P.always

def never[P](implicit P: Pred[P, _]): P =
P.never

implicit class PredOps[P](val p1: P) extends AnyVal {
def satisfies[Elt](e: Elt)(implicit P: Pred[P, Elt]): Boolean =
P.satsifies(p1)(e)
Expand Down
267 changes: 267 additions & 0 deletions finite-state/shared/src/main/scala/fs2/data/pfsa/Regular.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
/*
* Copyright 2022 Lucas Satabin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package fs2.data.pfsa

import cats.data.Chain
import cats.syntax.all._
import cats.{Eq, Show}

import Pred.syntax._

/** Simple regular language with character sets.
* This allows to model simple query languages (think XPath or JsonPath)
* and derive DFA out of it.
*/
sealed abstract class Regular[CharSet] {

def ~(that: Regular[CharSet])(implicit CharSet: Pred[CharSet, _], eq: Eq[CharSet]): Regular[CharSet] =
(this, that) match {
case (Regular.Epsilon(), _) => that
case (_, Regular.Epsilon()) => this
case (Regular.Concatenation(re1, re2), _) => re1 ~ (re2 ~ that)
case (_, _) =>
if (this.isSatisfiable && that.isSatisfiable)
Regular.Concatenation(this, that)
else
Regular.empty
}

def &&(that: Regular[CharSet])(implicit CharSet: Pred[CharSet, _], eq: Eq[CharSet]): Regular[CharSet] =
(this, that) match {
case (Regular.And(re1, re2), _) => re1 && (re2 && that)
case (_, _) =>
if (this === that)
this
else if (this === Regular.any)
that
else if (that === Regular.any)
this
else if (this.isSatisfiable && that.isSatisfiable)
Regular.And(this, that)
else
Regular.empty
}

def ||(that: Regular[CharSet])(implicit CharSet: Pred[CharSet, _], eq: Eq[CharSet]): Regular[CharSet] =
(this, that) match {
case (Regular.Or(re1, re2), _) => re1 || (re2 || that)
case (Regular.Chars(cs1), Regular.Chars(cs2)) => Regular.Chars(cs1 || cs2)
case (_, _) =>
if (this === that)
this
else if (this === Regular.any)
Regular.any
else if (that === Regular.any)
Regular.any
else if (!this.isSatisfiable)
that
else if (!that.isSatisfiable)
this
else
Regular.Or(this, that)
}

def unary_!(implicit CharSet: Pred[CharSet, _], eq: Eq[CharSet]): Regular[CharSet] =
this match {
case Regular.Not(re) => re
case Regular.Chars(cs) => Regular.Chars(!cs)
case Regular.Epsilon() => Regular.any
case _ =>
if (this === Regular.any)
Regular.empty
else if (this === Regular.empty)
Regular.any
else
Regular.Not(this)
}

def rep(implicit CharSet: Pred[CharSet, _], eq: Eq[CharSet]): Regular[CharSet] =
this match {
case Regular.Star(_) => this
case _ =>
if (this === Regular.epsilon)
Regular.epsilon
else if (this === Regular.empty)
Regular.epsilon
else
Regular.Star(this)

}

def acceptEpsilon: Boolean =
this match {
case Regular.Epsilon() => true
case Regular.Star(_) => true
case Regular.Or(re1, re2) => re1.acceptEpsilon || re2.acceptEpsilon
case Regular.And(re1, re2) => re1.acceptEpsilon && re2.acceptEpsilon
case Regular.Concatenation(re1, re2) => re1.acceptEpsilon && re2.acceptEpsilon
case Regular.Chars(_) => false
case Regular.Not(re) => !re.acceptEpsilon
}

def derive[C](c: C)(implicit CharSet: Pred[CharSet, C], eq: Eq[CharSet]): Regular[CharSet] =
this match {
case Regular.Epsilon() => Regular.Chars(CharSet.never)
case Regular.Chars(set) if CharSet.satsifies(set)(c) => Regular.Epsilon()
case Regular.Chars(_) => Regular.Chars(CharSet.never)
case Regular.Concatenation(re1, re2) if re1.acceptEpsilon =>
(re1.derive(c) ~ re2) || re2.derive(c)
case Regular.Concatenation(re1, re2) =>
re1.derive(c) ~ re2
case Regular.Or(re1, re2) =>
re1.derive(c) || re2.derive(c)
case Regular.And(re1, re2) =>
re1.derive(c) && re2.derive(c)
case Regular.Star(re) =>
re.derive(c) ~ Regular.Star(re)
case Regular.Not(re) =>
!re.derive(c)
}

def classes[C](implicit CharSet: Pred[CharSet, C]): Set[CharSet] =
this match {
case Regular.Epsilon() => Set(CharSet.always)
case Regular.Chars(chars) => Set(chars, CharSet.not(chars))
case Regular.Concatenation(re1, re2) if re1.acceptEpsilon => combine(re1.classes, re2.classes)
case Regular.Concatenation(re1, _) => re1.classes
case Regular.Or(re1, re2) => combine(re1.classes, re2.classes)
case Regular.And(re1, re2) => combine(re1.classes, re2.classes)
case Regular.Star(re) => re.classes
case Regular.Not(re) => re.classes
}

private def combine[C](c1: Set[CharSet], c2: Set[CharSet])(implicit CharSet: Pred[CharSet, C]): Set[CharSet] =
for {
cs1 <- c1
cs2 <- c2
both = CharSet.and(cs1, cs2)
if CharSet.isSatisfiable(both)
} yield both

def deriveDFA[C](implicit
CharSet: Pred[CharSet, C],
candidate: Candidate[CharSet, C],
eq: Eq[CharSet]): PDFA[CharSet, C] = {

def goto(re: Regular[CharSet],
q: Int,
cs: CharSet,
qs: Chain[Regular[CharSet]],
transitions: Map[Int, List[(CharSet, Int)]]): (Chain[Regular[CharSet]], Map[Int, List[(CharSet, Int)]]) =
candidate.pick(cs) match {
case Some(c) =>
val tgt = re.derive(c)
val equivalent = qs.zipWithIndex.collectFirst {
case (q, idx) if tgt === q => idx
}
equivalent match {
case Some(tgt) => (qs, transitions.combine(Map(q -> List(cs -> tgt))))
case None =>
val qs1 = qs.append(tgt)
val q1 = qs.size.toInt
val transitions1 = transitions.combine(Map(q -> List(cs -> q1)))
explore(qs1, transitions1, tgt)
}
case None =>
(qs, transitions)
}

def explore(qs: Chain[Regular[CharSet]],
transitions: Map[Int, List[(CharSet, Int)]],
re: Regular[CharSet]): (Chain[Regular[CharSet]], Map[Int, List[(CharSet, Int)]]) = {
val q = qs.size.toInt - 1
re.classes.foldLeft((qs, transitions)) { case ((qs, transitions), cs) =>
goto(re, q, cs, qs, transitions)
}
}

val (qs, transitions) = explore(Chain.one(this), Map.empty, this)
val finals = qs.zipWithIndex.collect { case (re, idx) if re.acceptEpsilon => idx }.toList.toSet
new PDFA[CharSet, C](0, finals, Array.tabulate(qs.size.toInt)(transitions.getOrElse(_, Nil)))
}

}
object Regular {
private case class Epsilon[CharSet]() extends Regular[CharSet]
private case class Chars[CharSet](set: CharSet) extends Regular[CharSet]
private case class Star[CharSet](re: Regular[CharSet]) extends Regular[CharSet]
private case class Concatenation[CharSet](re1: Regular[CharSet], re2: Regular[CharSet]) extends Regular[CharSet]
private case class Or[CharSet](re1: Regular[CharSet], re2: Regular[CharSet]) extends Regular[CharSet]
private case class And[CharSet](re1: Regular[CharSet], re2: Regular[CharSet]) extends Regular[CharSet]
private case class Not[CharSet](re: Regular[CharSet]) extends Regular[CharSet]

implicit def eq[CharSet: Eq]: Eq[Regular[CharSet]] = Eq.instance {
case (Epsilon(), Epsilon()) => true
case (Chars(cs1), Chars(cs2)) => cs1 === cs2
case (Star(re1), Star(re2)) => re1 === re2
case (Concatenation(re11, re12), Concatenation(re21, re22)) => re11 === re21 && re12 === re22
case (Or(re11, re12), Or(re21, re22)) =>
(re11 === re21 && re12 === re22) || (re11 === re22 && re12 === re21)
case (And(re11, re12), And(re21, re22)) =>
(re11 === re21 && re12 === re22) || (re11 === re22 && re12 === re21)
case (Not(re1), Not(re2)) => re1 === re2
case _ => false
}

def epsilon[CharSet]: Regular[CharSet] = Epsilon()

def chars[CharSet](cs: CharSet): Regular[CharSet] =
Regular.Chars(cs)

def any[CharSet](implicit CharSet: Pred[CharSet, _]): Regular[CharSet] = Chars(CharSet.always)

def empty[CharSet](implicit CharSet: Pred[CharSet, _]): Regular[CharSet] = Chars(CharSet.never)

implicit def pred[CharSet: Eq, C](implicit CharSet: Pred[CharSet, C]): Pred[Regular[CharSet], C] =
new Pred[Regular[CharSet], C] {

override def satsifies(p: Regular[CharSet])(e: C): Boolean =
p match {
case Epsilon() => false
case Chars(set) => set.satisfies(e)
case Star(re) => re.satisfies(e)
case Concatenation(re1, re2) =>
re1.satisfies(e) || (re1.acceptEpsilon && re2.satisfies(e))
case Or(re1, re2) => re1.satisfies(e) || re2.satisfies(e)
case And(re1, re2) => re1.satisfies(e) && re2.satisfies(e)
case Not(re) => !re.satisfies(e)
}

override def always: Regular[CharSet] = any

override def never: Regular[CharSet] = empty

override def and(p1: Regular[CharSet], p2: Regular[CharSet]): Regular[CharSet] = p1 && p2

override def or(p1: Regular[CharSet], p2: Regular[CharSet]): Regular[CharSet] = p1 || p2

override def not(p: Regular[CharSet]): Regular[CharSet] = !p

override def isSatisfiable(p: Regular[CharSet]): Boolean = p =!= empty

}

implicit def show[CS: Show]: Show[Regular[CS]] = Show.show {
case Epsilon() => "ε"
case Chars(cs) => cs.show
case Concatenation(re1, re2) => show"$re1$re2"
case Or(re1, re2) => show"($re1) | ($re2)"
case And(re1, re2) => show"($re1) & ($re2)"
case Star(re) => show"($re)*"
case Not(re) => show"~($re)"
}
}
Loading

0 comments on commit 1f4b285

Please sign in to comment.