Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bring JSON parser on par with circe-fs2 #491

Merged
merged 11 commits into from
Jul 17, 2023
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,11 @@ jobs:

- name: Make target directories
if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main')
run: mkdir -p json/.native/target json/play/.jvm/target text/native/target cbor-json/native/target finite-state/native/target target .js/target documentation/target cbor/js/target finite-state/js/target text/js/target benchmarks/.jvm/target json/play/.js/target json/.jvm/target xml/scala-xml/.native/target csv/jvm/target xml/.jvm/target xml/.js/target cbor/native/target json/circe/.native/target finite-state/jvm/target cbor-json/js/target cbor/jvm/target csv/native/target json/circe/.jvm/target .jvm/target csv/js/target csv/generic/jvm/target .native/target text/jvm/target xml/.native/target json/diffson/.native/target json/diffson/.js/target cbor-json/jvm/target json/interpolators/.jvm/target json/.js/target json/interpolators/.js/target csv/generic/js/target json/circe/.js/target json/diffson/.jvm/target xml/scala-xml/.js/target csv/generic/native/target xml/scala-xml/.jvm/target json/interpolators/.native/target project/target
run: mkdir -p json/.native/target json/play/.jvm/target text/native/target cbor-json/native/target finite-state/native/target scalafix/rules/target target .js/target documentation/target cbor/js/target finite-state/js/target text/js/target scalafix/output/target benchmarks/.jvm/target json/play/.js/target json/.jvm/target xml/scala-xml/.native/target csv/jvm/target xml/.jvm/target xml/.js/target cbor/native/target json/circe/.native/target finite-state/jvm/target scalafix/tests/target cbor-json/js/target cbor/jvm/target csv/native/target json/circe/.jvm/target .jvm/target scalafix/input/target csv/js/target csv/generic/jvm/target .native/target text/jvm/target xml/.native/target json/diffson/.native/target json/diffson/.js/target cbor-json/jvm/target json/interpolators/.jvm/target json/.js/target json/interpolators/.js/target csv/generic/js/target json/circe/.js/target json/diffson/.jvm/target xml/scala-xml/.js/target csv/generic/native/target xml/scala-xml/.jvm/target json/interpolators/.native/target project/target

- name: Compress target directories
if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main')
run: tar cf targets.tar json/.native/target json/play/.jvm/target text/native/target cbor-json/native/target finite-state/native/target target .js/target documentation/target cbor/js/target finite-state/js/target text/js/target benchmarks/.jvm/target json/play/.js/target json/.jvm/target xml/scala-xml/.native/target csv/jvm/target xml/.jvm/target xml/.js/target cbor/native/target json/circe/.native/target finite-state/jvm/target cbor-json/js/target cbor/jvm/target csv/native/target json/circe/.jvm/target .jvm/target csv/js/target csv/generic/jvm/target .native/target text/jvm/target xml/.native/target json/diffson/.native/target json/diffson/.js/target cbor-json/jvm/target json/interpolators/.jvm/target json/.js/target json/interpolators/.js/target csv/generic/js/target json/circe/.js/target json/diffson/.jvm/target xml/scala-xml/.js/target csv/generic/native/target xml/scala-xml/.jvm/target json/interpolators/.native/target project/target
run: tar cf targets.tar json/.native/target json/play/.jvm/target text/native/target cbor-json/native/target finite-state/native/target scalafix/rules/target target .js/target documentation/target cbor/js/target finite-state/js/target text/js/target scalafix/output/target benchmarks/.jvm/target json/play/.js/target json/.jvm/target xml/scala-xml/.native/target csv/jvm/target xml/.jvm/target xml/.js/target cbor/native/target json/circe/.native/target finite-state/jvm/target scalafix/tests/target cbor-json/js/target cbor/jvm/target csv/native/target json/circe/.jvm/target .jvm/target scalafix/input/target csv/js/target csv/generic/jvm/target .native/target text/jvm/target xml/.native/target json/diffson/.native/target json/diffson/.js/target cbor-json/jvm/target json/interpolators/.jvm/target json/.js/target json/interpolators/.js/target csv/generic/js/target json/circe/.js/target json/diffson/.jvm/target xml/scala-xml/.js/target csv/generic/native/target xml/scala-xml/.jvm/target json/interpolators/.native/target project/target

- name: Upload target directories
if: github.event_name != 'pull_request' && (startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,15 @@ class JsonParserBenchmarks {
.unsafeRunSync()

@Benchmark
def parseJsonFs2DataValues() =
def parseJsonFs2DataParse() =
jsonStream
.through(ast.parse)
.compile
.drain
.unsafeRunSync()

@Benchmark
def parseJsonFs2DataTokensValues() =
jsonStream
.through(tokens)
.through(ast.values)
Expand Down
36 changes: 35 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,8 @@ lazy val documentation = project
"com.beachape" %% "enumeratum" % "1.7.0",
"org.gnieh" %% "diffson-circe" % diffsonVersion,
"io.circe" %% "circe-generic-extras" % circeExtrasVersion,
"co.fs2" %% "fs2-io" % fs2Version
"co.fs2" %% "fs2-io" % fs2Version,
"io.circe" %% "circe-fs2" % "0.14.1"
),
scalacOptions += "-Ymacro-annotations"
)
Expand Down Expand Up @@ -562,6 +563,39 @@ lazy val benchmarks = crossProject(JVMPlatform)
)
.dependsOn(csv, scalaXml, jsonCirce)

lazy val scalafixInput = (project in file("scalafix/input"))
.disablePlugins(ScalafixPlugin)
.dependsOn(jsonCirce.jvm)

lazy val scalafixOutput = (project in file("scalafix/output"))
.disablePlugins(ScalafixPlugin)
.dependsOn(jsonCirce.jvm)

lazy val scalafixRules = (project in file("scalafix/rules"))
.disablePlugins(ScalafixPlugin)
.settings(
libraryDependencies +=
"ch.epfl.scala" %%
"scalafix-core" %
_root_.scalafix.sbt.BuildInfo.scalafixVersion
)

lazy val scalafixTests = (project in file("scalafix/tests"))
.settings(
scalafixTestkitOutputSourceDirectories :=
(scalafixOutput / Compile / sourceDirectories).value,
scalafixTestkitInputSourceDirectories :=
(scalafixInput / Compile / sourceDirectories).value,
scalafixTestkitInputClasspath :=
(scalafixInput / Compile / fullClasspath).value,
scalafixTestkitInputScalacOptions :=
(scalafixInput / Compile / scalacOptions).value,
scalafixTestkitInputScalaVersion :=
(scalafixInput / Compile / scalaVersion).value
)
.dependsOn(scalafixInput, scalafixRules)
.enablePlugins(ScalafixTestkitPlugin)

// Utils

def onScala2[T](version: String)(values: => List[T]): List[T] = PartialFunction
Expand Down
25 changes: 23 additions & 2 deletions documentation/docs/json/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,18 +111,39 @@ root.index(1).!

To handle Json ASTs, you can use the types and pipes available in the `fs2.data.json.ast` package.

JSON ASTs can be built if you provide an implicit [`Builder[Json]`][builder-api] to the `values` pipe. The `Builder[Json]` typeclass describes how JSON ASTs of type `Json` are built from streams.
#### parsing a stream to values

If you are not interested in tokens, but instead want to parse the input stream into a stream of AST values, you can use the `ast.parse` pipe, provided you have an implicit [`Builder[Json]`][builder-api] in scope. The `Builder[Json]` typeclass describes how JSON ASTs of type `Json` are built from events generated by the parser.

```scala mdoc:compile-only
import ast._

trait SomeJsonType

implicit val builder: Builder[SomeJsonType] = ???

Stream.emit(input).covary[Fallible].through(parse)
```

The `ast` stream emits all top-level JSON values parsed, in our example, the two objects are emitted.

#### From `Token`s to values

JSON ASTs can be built from an existing token stream, provided you have an implicit [`Builder[Json]`][builder-api], using the `values` pipe.

```scala mdoc:compile-only
import ast._

trait SomeJsonType

implicit val builder: Builder[SomeJsonType] = ???

stream.through(values[Fallible, SomeJsonType])
```

The `asts` stream emits all top-level JSON values parsed, in our example, the two objects are emitted.
**Note:** even though this snippet is equivalent in result to the one using `ast.parse` it is less efficient, and if you are only interested in the values, you should always use `ast.parse`.

#### From values to `Token`s

If you provide an implicit [`Tokenizer[Json]`][tokenizer-api], which describes how a JSON AST is transformed into JSON tokens, you can apply transformations to the JSON stream. For instance, you can apply a function `fun` to all values in the `fields3` array by using this code:

Expand Down
49 changes: 39 additions & 10 deletions documentation/docs/json/libraries.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,20 @@ Examples on this page use the following input:
```scala mdoc
import fs2.{Fallible, Stream}
import fs2.data.json._
import fs2.data.json.literals._
import fs2.data.json.jsonpath._
import fs2.data.json.jsonpath.literals._

val stream = json"""{
def input[F[_]] = Stream.emit("""{
"field1": 0,
"field2": "test",
"field3": [1, 2, 3]
}
{
"field1": 2,
"field3": []
}"""
}""").covary[F]

val stream = input[Fallible].through(tokens)

val sel = jsonpath"$$.field3[*]"
```
Expand All @@ -41,19 +42,16 @@ For instance both examples from the [core module documentation][json-doc] with c

```scala mdoc:nest
import fs2.data.json.circe._
import io.circe._

val asts = stream.through(ast.values[Fallible, Json])
asts.compile.toList
val asts = input[Fallible].through(ast.parse)
asts.map(_.spaces2).compile.toList
```

You can use `filter.values` to selects only the values matching the JSONPath and deserialize them using the builder.

```scala mdoc:nest
import fs2.data.json.circe._

import io.circe._

import cats.effect._
import cats.syntax.all._
import cats.effect.unsafe.implicits.global
Expand Down Expand Up @@ -86,7 +84,6 @@ case class Wrapped(test: Int)
```scala mdoc:nest
import fs2.data.json.selector._
import fs2.data.json.circe._
import io.circe._

val values = stream.through(codec.deserialize[Fallible, Data])
values.compile.toList
Expand All @@ -104,7 +101,6 @@ Dropping values can be done similarly.

```scala mdoc:nest
import fs2.data.json.circe._
import io.circe._
import cats.syntax.all._

val f1 = root.field("field1").compile
Expand All @@ -113,6 +109,38 @@ val transformed = stream.through(codec.transformOpt(f1, (i: Int) => (i > 0).guar
transformed.compile.to(collector.pretty())
```

#### Migrating from `circe-fs2`

If you were using [`circe-fs2`][circe-fs2] to emit streams of `Json` values, you can easily switch to `fs2-data-json-circe`. Just replace your usages of `stringStreamParser` or `byteStreamParser` by usage of `fs2.data.json.ast.parse`.

For instance if you had this code:

```scala mdoc:nest
import io.circe.fs2._

import cats.effect._

input[SyncIO]
.through(stringStreamParser)
.map(_.spaces2)
.compile
.toList
.unsafeRunSync()
```

You can replace it by

```scala mdoc:nest
import fs2.data.json._
import fs2.data.json.circe._

input[Fallible]
.through(ast.parse)
.map(_.spaces2)
.compile
.toList
```

satabin marked this conversation as resolved.
Show resolved Hide resolved
### Play! JSON

Module: [![Maven Central](https://img.shields.io/maven-central/v/org.gnieh/fs2-data-json-play_2.13.svg)](https://mvnrepository.com/artifact/org.gnieh/fs2-data-json-play_2.13)
Expand All @@ -124,3 +152,4 @@ It also provides `Deserializer` for types with a `Reads` instance and `Serialize
[json-doc]: /documentation/json/
[circe]: https://circe.github.io/circe/
[play-json]: https://www.playframework.com/
[circe-fs2]: https://github.com/circe/circe-fs2
32 changes: 32 additions & 0 deletions json/src/main/scala-2.12/fs2/data/package.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/*
* Copyright 2023 Lucas Satabin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package fs2

import scala.collection.immutable.VectorBuilder
import scala.collection.mutable.ListBuffer

package object data {

implicit class VectorBuilderOps[T](val builder: VectorBuilder[T]) extends AnyVal {
def addOne(t: T) = builder += t
}

implicit class ListBufferOps[T](val buffer: ListBuffer[T]) extends AnyVal {
def addOne(t: T) = buffer += t
}

}
24 changes: 21 additions & 3 deletions json/src/main/scala/fs2/data/json/ast/package.scala
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@
*/

package fs2
package data.json

import internals.{TokenSelector, ValueParser}
package data
package json

import cats.syntax.all._

import text.{AsCharBuffer, CharLikeChunks}
import internals.{TokenSelector, ValueParser, JsonTokenParser, LegacyTokenParser, BuilderChunkAccumulator, State}

package object ast {

/** Transforms a stream of token into another one. The transformation function `f` is
Expand Down Expand Up @@ -84,6 +86,22 @@ package object ast {
def values[F[_], Json](implicit F: RaiseThrowable[F], builder: Builder[Json]): Pipe[F, Token, Json] =
ValueParser.pipe[F, Json]

/** Parses a stream of characters into a stream of Json values. */
def parse[F[_], T, Json](implicit
F: RaiseThrowable[F],
T: CharLikeChunks[F, T],
builder: Builder[Json]): Pipe[F, T, Json] = { s =>
T match {
case asCharBuffer: AsCharBuffer[F, T] =>
Stream.suspend(
new JsonTokenParser[F, T, Json](s, new BuilderChunkAccumulator(builder))(F, asCharBuffer)
.go_(State.BeforeValue)
.stream)
case _ =>
Stream.suspend(new LegacyTokenParser[F, T, Json](s).parse(new BuilderChunkAccumulator(builder)).stream)
}
}

/** Transforms a stream of Json values into a stream of Json tokens.
*
* This operation is the opposite of `values`.
Expand Down
Loading