Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add regular language implementation #427

Merged
merged 1 commit into from
Dec 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,7 @@ val commonSettings = List(
"io.circe" %%% "circe-generic" % circeVersion % "test",
"co.fs2" %%% "fs2-io" % fs2Version % "test",
"com.disneystreaming" %%% "weaver-cats" % weaverVersion % "test",
"com.disneystreaming" %%% "weaver-cats-core" % weaverVersion % "test",
"com.disneystreaming" %%% "weaver-core" % weaverVersion % "test",
"com.disneystreaming" %%% "weaver-framework" % weaverVersion % "test",
"com.disneystreaming" %%% "weaver-scalacheck" % weaverVersion % Test,
"com.eed3si9n.expecty" %%% "expecty" % "0.16.0" % "test",
"org.portable-scala" %%% "portable-scala-reflect" % "1.1.2" cross CrossVersion.for3Use2_13
) ++ PartialFunction
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* Copyright 2022 Lucas Satabin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package fs2.data.pfsa

trait Candidate[Set, C] {

def pick(set: Set): Option[C]

}
17 changes: 17 additions & 0 deletions finite-state/shared/src/main/scala/fs2/data/pfsa/PDFA.scala
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@

package fs2.data.pfsa

import cats.Foldable
import cats.syntax.foldable._

import Pred.syntax._

private[data] class PDFA[P, T](val init: Int, val finals: Set[Int], val transitions: Array[List[(P, Int)]])(implicit
Expand All @@ -27,4 +30,18 @@ private[data] class PDFA[P, T](val init: Int, val finals: Set[Int], val transiti
else
transitions(q).collectFirst { case (p, q) if p.satisfies(t) => q }

def recognizes[S[_]: Foldable](input: S[T]): Boolean =
input
.foldLeftM((init, 0)) { case ((q, idx), c) =>
transitions
.lift(q)
.flatMap(_.collectFirst {
case (p, q) if p.satisfies(c) =>
(q, idx + 1)
})
}
.exists { case (q, idx) =>
idx == input.size && finals.contains(q)
}

}
7 changes: 7 additions & 0 deletions finite-state/shared/src/main/scala/fs2/data/pfsa/Pred.scala
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,13 @@ object Pred {
def apply[P, T](implicit ev: Pred[P, T]): Pred[P, T] = ev

object syntax {

def always[P](implicit P: Pred[P, _]): P =
P.always

def never[P](implicit P: Pred[P, _]): P =
P.never

implicit class PredOps[P](val p1: P) extends AnyVal {
def satisfies[Elt](e: Elt)(implicit P: Pred[P, Elt]): Boolean =
P.satsifies(p1)(e)
Expand Down
267 changes: 267 additions & 0 deletions finite-state/shared/src/main/scala/fs2/data/pfsa/Regular.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,267 @@
/*
* Copyright 2022 Lucas Satabin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package fs2.data.pfsa

import cats.data.Chain
import cats.syntax.all._
import cats.{Eq, Show}

import Pred.syntax._

/** Simple regular language with character sets.
* This allows to model simple query languages (think XPath or JsonPath)
* and derive DFA out of it.
*/
sealed abstract class Regular[CharSet] {

def ~(that: Regular[CharSet])(implicit CharSet: Pred[CharSet, _], eq: Eq[CharSet]): Regular[CharSet] =
(this, that) match {
case (Regular.Epsilon(), _) => that
case (_, Regular.Epsilon()) => this
case (Regular.Concatenation(re1, re2), _) => re1 ~ (re2 ~ that)
case (_, _) =>
if (this.isSatisfiable && that.isSatisfiable)
Regular.Concatenation(this, that)
else
Regular.empty
}

def &&(that: Regular[CharSet])(implicit CharSet: Pred[CharSet, _], eq: Eq[CharSet]): Regular[CharSet] =
(this, that) match {
case (Regular.And(re1, re2), _) => re1 && (re2 && that)
case (_, _) =>
if (this === that)
this
else if (this === Regular.any)
that
else if (that === Regular.any)
this
else if (this.isSatisfiable && that.isSatisfiable)
Regular.And(this, that)
else
Regular.empty
}

def ||(that: Regular[CharSet])(implicit CharSet: Pred[CharSet, _], eq: Eq[CharSet]): Regular[CharSet] =
(this, that) match {
case (Regular.Or(re1, re2), _) => re1 || (re2 || that)
case (Regular.Chars(cs1), Regular.Chars(cs2)) => Regular.Chars(cs1 || cs2)
case (_, _) =>
if (this === that)
this
else if (this === Regular.any)
Regular.any
else if (that === Regular.any)
Regular.any
else if (!this.isSatisfiable)
that
else if (!that.isSatisfiable)
this
else
Regular.Or(this, that)
}

def unary_!(implicit CharSet: Pred[CharSet, _], eq: Eq[CharSet]): Regular[CharSet] =
this match {
case Regular.Not(re) => re
case Regular.Chars(cs) => Regular.Chars(!cs)
case Regular.Epsilon() => Regular.any
case _ =>
if (this === Regular.any)
Regular.empty
else if (this === Regular.empty)
Regular.any
else
Regular.Not(this)
}

def rep(implicit CharSet: Pred[CharSet, _], eq: Eq[CharSet]): Regular[CharSet] =
this match {
case Regular.Star(_) => this
case _ =>
if (this === Regular.epsilon)
Regular.epsilon
else if (this === Regular.empty)
Regular.epsilon
else
Regular.Star(this)

}

def acceptEpsilon: Boolean =
this match {
case Regular.Epsilon() => true
case Regular.Star(_) => true
case Regular.Or(re1, re2) => re1.acceptEpsilon || re2.acceptEpsilon
case Regular.And(re1, re2) => re1.acceptEpsilon && re2.acceptEpsilon
case Regular.Concatenation(re1, re2) => re1.acceptEpsilon && re2.acceptEpsilon
case Regular.Chars(_) => false
case Regular.Not(re) => !re.acceptEpsilon
}

def derive[C](c: C)(implicit CharSet: Pred[CharSet, C], eq: Eq[CharSet]): Regular[CharSet] =
this match {
case Regular.Epsilon() => Regular.Chars(CharSet.never)
case Regular.Chars(set) if CharSet.satsifies(set)(c) => Regular.Epsilon()
case Regular.Chars(_) => Regular.Chars(CharSet.never)
case Regular.Concatenation(re1, re2) if re1.acceptEpsilon =>
(re1.derive(c) ~ re2) || re2.derive(c)
case Regular.Concatenation(re1, re2) =>
re1.derive(c) ~ re2
case Regular.Or(re1, re2) =>
re1.derive(c) || re2.derive(c)
case Regular.And(re1, re2) =>
re1.derive(c) && re2.derive(c)
case Regular.Star(re) =>
re.derive(c) ~ Regular.Star(re)
case Regular.Not(re) =>
!re.derive(c)
}

def classes[C](implicit CharSet: Pred[CharSet, C]): Set[CharSet] =
this match {
case Regular.Epsilon() => Set(CharSet.always)
case Regular.Chars(chars) => Set(chars, CharSet.not(chars))
case Regular.Concatenation(re1, re2) if re1.acceptEpsilon => combine(re1.classes, re2.classes)
case Regular.Concatenation(re1, _) => re1.classes
case Regular.Or(re1, re2) => combine(re1.classes, re2.classes)
case Regular.And(re1, re2) => combine(re1.classes, re2.classes)
case Regular.Star(re) => re.classes
case Regular.Not(re) => re.classes
}

private def combine[C](c1: Set[CharSet], c2: Set[CharSet])(implicit CharSet: Pred[CharSet, C]): Set[CharSet] =
for {
cs1 <- c1
cs2 <- c2
both = CharSet.and(cs1, cs2)
if CharSet.isSatisfiable(both)
} yield both

def deriveDFA[C](implicit
CharSet: Pred[CharSet, C],
candidate: Candidate[CharSet, C],
eq: Eq[CharSet]): PDFA[CharSet, C] = {

def goto(re: Regular[CharSet],
q: Int,
cs: CharSet,
qs: Chain[Regular[CharSet]],
transitions: Map[Int, List[(CharSet, Int)]]): (Chain[Regular[CharSet]], Map[Int, List[(CharSet, Int)]]) =
candidate.pick(cs) match {
case Some(c) =>
val tgt = re.derive(c)
val equivalent = qs.zipWithIndex.collectFirst {
case (q, idx) if tgt === q => idx
}
equivalent match {
case Some(tgt) => (qs, transitions.combine(Map(q -> List(cs -> tgt))))
case None =>
val qs1 = qs.append(tgt)
val q1 = qs.size.toInt
val transitions1 = transitions.combine(Map(q -> List(cs -> q1)))
explore(qs1, transitions1, tgt)
}
case None =>
(qs, transitions)
}

def explore(qs: Chain[Regular[CharSet]],
transitions: Map[Int, List[(CharSet, Int)]],
re: Regular[CharSet]): (Chain[Regular[CharSet]], Map[Int, List[(CharSet, Int)]]) = {
val q = qs.size.toInt - 1
re.classes.foldLeft((qs, transitions)) { case ((qs, transitions), cs) =>
goto(re, q, cs, qs, transitions)
}
}

val (qs, transitions) = explore(Chain.one(this), Map.empty, this)
val finals = qs.zipWithIndex.collect { case (re, idx) if re.acceptEpsilon => idx }.toList.toSet
new PDFA[CharSet, C](0, finals, Array.tabulate(qs.size.toInt)(transitions.getOrElse(_, Nil)))
}

}
object Regular {
private case class Epsilon[CharSet]() extends Regular[CharSet]
private case class Chars[CharSet](set: CharSet) extends Regular[CharSet]
private case class Star[CharSet](re: Regular[CharSet]) extends Regular[CharSet]
private case class Concatenation[CharSet](re1: Regular[CharSet], re2: Regular[CharSet]) extends Regular[CharSet]
private case class Or[CharSet](re1: Regular[CharSet], re2: Regular[CharSet]) extends Regular[CharSet]
private case class And[CharSet](re1: Regular[CharSet], re2: Regular[CharSet]) extends Regular[CharSet]
private case class Not[CharSet](re: Regular[CharSet]) extends Regular[CharSet]

implicit def eq[CharSet: Eq]: Eq[Regular[CharSet]] = Eq.instance {
case (Epsilon(), Epsilon()) => true
case (Chars(cs1), Chars(cs2)) => cs1 === cs2
case (Star(re1), Star(re2)) => re1 === re2
case (Concatenation(re11, re12), Concatenation(re21, re22)) => re11 === re21 && re12 === re22
case (Or(re11, re12), Or(re21, re22)) =>
(re11 === re21 && re12 === re22) || (re11 === re22 && re12 === re21)
case (And(re11, re12), And(re21, re22)) =>
(re11 === re21 && re12 === re22) || (re11 === re22 && re12 === re21)
case (Not(re1), Not(re2)) => re1 === re2
case _ => false
}

def epsilon[CharSet]: Regular[CharSet] = Epsilon()

def chars[CharSet](cs: CharSet): Regular[CharSet] =
Regular.Chars(cs)

def any[CharSet](implicit CharSet: Pred[CharSet, _]): Regular[CharSet] = Chars(CharSet.always)

def empty[CharSet](implicit CharSet: Pred[CharSet, _]): Regular[CharSet] = Chars(CharSet.never)

implicit def pred[CharSet: Eq, C](implicit CharSet: Pred[CharSet, C]): Pred[Regular[CharSet], C] =
new Pred[Regular[CharSet], C] {

override def satsifies(p: Regular[CharSet])(e: C): Boolean =
p match {
case Epsilon() => false
case Chars(set) => set.satisfies(e)
case Star(re) => re.satisfies(e)
case Concatenation(re1, re2) =>
re1.satisfies(e) || (re1.acceptEpsilon && re2.satisfies(e))
case Or(re1, re2) => re1.satisfies(e) || re2.satisfies(e)
case And(re1, re2) => re1.satisfies(e) && re2.satisfies(e)
case Not(re) => !re.satisfies(e)
}

override def always: Regular[CharSet] = any

override def never: Regular[CharSet] = empty

override def and(p1: Regular[CharSet], p2: Regular[CharSet]): Regular[CharSet] = p1 && p2

override def or(p1: Regular[CharSet], p2: Regular[CharSet]): Regular[CharSet] = p1 || p2

override def not(p: Regular[CharSet]): Regular[CharSet] = !p

override def isSatisfiable(p: Regular[CharSet]): Boolean = p =!= empty

}

implicit def show[CS: Show]: Show[Regular[CS]] = Show.show {
case Epsilon() => "ε"
case Chars(cs) => cs.show
case Concatenation(re1, re2) => show"$re1$re2"
case Or(re1, re2) => show"($re1) | ($re2)"
case And(re1, re2) => show"($re1) & ($re2)"
case Star(re) => show"($re)*"
case Not(re) => show"~($re)"
}
}
Loading