From 740ac8fa5873a6e08af8a3cd749667b0a3a9c5ae Mon Sep 17 00:00:00 2001 From: "Ross A. Baker" Date: Wed, 15 Jun 2022 22:09:03 -0400 Subject: [PATCH 1/2] Rudimentary XML generator and round-trip test --- .../org/http4s/scalaxml/ScalaXmlSuite.scala | 17 +- .../org/http4s/scalaxml/generators.scala | 471 ++++++++++++++++++ 2 files changed, 482 insertions(+), 6 deletions(-) create mode 100644 scala-xml/src/test/scala/org/http4s/scalaxml/generators.scala diff --git a/scala-xml/src/test/scala/org/http4s/scalaxml/ScalaXmlSuite.scala b/scala-xml/src/test/scala/org/http4s/scalaxml/ScalaXmlSuite.scala index 8739d94..2fe7883 100644 --- a/scala-xml/src/test/scala/org/http4s/scalaxml/ScalaXmlSuite.scala +++ b/scala-xml/src/test/scala/org/http4s/scalaxml/ScalaXmlSuite.scala @@ -24,17 +24,19 @@ import fs2.Stream import fs2.text.decodeWithCharset import fs2.text.utf8 import munit.CatsEffectSuite -import munit.ScalaCheckSuite +import munit.ScalaCheckEffectSuite import org.http4s.Status.Ok import org.http4s.headers.`Content-Type` import org.http4s.laws.discipline.arbitrary._ +import org.http4s.scalaxml.generators._ import org.scalacheck.Prop._ +import org.scalacheck.effect.PropF._ import org.typelevel.ci._ import java.nio.charset.StandardCharsets import scala.xml.Elem -class ScalaXmlSuite extends CatsEffectSuite with ScalaCheckSuite { +class ScalaXmlSuite extends CatsEffectSuite with ScalaCheckEffectSuite { def getBody(body: EntityBody[IO]): IO[String] = body.through(utf8.decode).foldMonoid.compile.lastOrError @@ -56,10 +58,13 @@ class ScalaXmlSuite extends CatsEffectSuite with ScalaCheckSuite { } } - test("xml should parse the XML") { - server(Request[IO](body = strBody("

h1

"))) - .flatMap(r => getBody(r.body)) - .assertEquals("html") + test("round trips utf-8") { + forAllF(genXml) { (elem: Elem) => + Request[IO]() + .withEntity(elem) + .as[Elem] + .assertEquals(elem) + } } test("parse XML in parallel") { diff --git a/scala-xml/src/test/scala/org/http4s/scalaxml/generators.scala b/scala-xml/src/test/scala/org/http4s/scalaxml/generators.scala new file mode 100644 index 0000000..8016d41 --- /dev/null +++ b/scala-xml/src/test/scala/org/http4s/scalaxml/generators.scala @@ -0,0 +1,471 @@ +/* + * Copyright 2014 http4s.org + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.http4s.scalaxml + +import org.scalacheck.Gen + +import java.util.Locale +import scala.xml._ + +/** Roughly generates XML 1.0, Fourth Edition, which is digestible + * by Xerces 2.12. + * + * Not yet generated: + * - EntityRefs (and EntityDecl) + * - Surrogate pair characters + * - Namespace bindings + */ +object generators { + val char: Seq[Char] = + Seq( + 0x0009 to 0x0009, + 0x000a to 0x000a, + 0x000d to 0x000d, + 0x0020 to 0xd7ff, + 0xe000 to 0xfffd, + ).flatten.map(_.toChar) + + val baseChar: Seq[Char] = + Seq( + 0x0041 to 0x005a, + 0x0061 to 0x007a, + 0x00c0 to 0x00d6, + 0x00d8 to 0x00f6, + 0x00f8 to 0x00ff, + 0x0100 to 0x0131, + 0x0134 to 0x013e, + 0x0141 to 0x0148, + 0x014a to 0x017e, + 0x0180 to 0x01c3, + 0x01cd to 0x01f0, + 0x01f4 to 0x01f5, + 0x01fa to 0x0217, + 0x0250 to 0x02a8, + 0x02bb to 0x02c1, + 0x0386 to 0x0386, + 0x0388 to 0x038a, + 0x038c to 0x038c, + 0x038e to 0x03a1, + 0x03a3 to 0x03ce, + 0x03d0 to 0x03d6, + 0x03da to 0x03da, + 0x03dc to 0x03dc, + 0x03de to 0x03de, + 0x03e0 to 0x03e0, + 0x03e2 to 0x03f3, + 0x0401 to 0x040c, + 0x040e to 0x044f, + 0x0451 to 0x045c, + 0x045e to 0x0481, + 0x0490 to 0x04c4, + 0x04c7 to 0x04c8, + 0x04cb to 0x04cc, + 0x04d0 to 0x04eb, + 0x04ee to 0x04f5, + 0x04f8 to 0x04f9, + 0x0531 to 0x0556, + 0x0559 to 0x0559, + 0x0561 to 0x0586, + 0x05d0 to 0x05ea, + 0x05f0 to 0x05f2, + 0x0621 to 0x063a, + 0x0641 to 0x064a, + 0x0671 to 0x06b7, + 0x06ba to 0x06be, + 0x06c0 to 0x06ce, + 0x06d0 to 0x06d3, + 0x06d5 to 0x06d5, + 0x06e5 to 0x06e6, + 0x0905 to 0x0939, + 0x093d to 0x093d, + 0x0958 to 0x0961, + 0x0985 to 0x098c, + 0x098f to 0x0990, + 0x0993 to 0x09a8, + 0x09aa to 0x09b0, + 0x09b2 to 0x09b2, + 0x09b6 to 0x09b9, + 0x09dc to 0x09dd, + 0x09df to 0x09e1, + 0x09f0 to 0x09f1, + 0x0a05 to 0x0a0a, + 0x0a0f to 0x0a10, + 0x0a13 to 0x0a28, + 0x0a2a to 0x0a30, + 0x0a32 to 0x0a33, + 0x0a35 to 0x0a36, + 0x0a38 to 0x0a39, + 0x0a59 to 0x0a5c, + 0x0a5e to 0x0a5e, + 0x0a72 to 0x0a74, + 0x0a85 to 0x0a8b, + 0x0a8d to 0x0a8d, + 0x0a8f to 0x0a91, + 0x0a93 to 0x0aa8, + 0x0aaa to 0x0ab0, + 0x0ab2 to 0x0ab3, + 0x0ab5 to 0x0ab9, + 0x0abd to 0x0abd, + 0x0ae0 to 0x0ae0, + 0x0b05 to 0x0b0c, + 0x0b0f to 0x0b10, + 0x0b13 to 0x0b28, + 0x0b2a to 0x0b30, + 0x0b32 to 0x0b33, + 0x0b36 to 0x0b39, + 0x0b3d to 0x0b3d, + 0x0b5c to 0x0b5d, + 0x0b5f to 0x0b61, + 0x0b85 to 0x0b8a, + 0x0b8e to 0x0b90, + 0x0b92 to 0x0b95, + 0x0b99 to 0x0b9a, + 0x0b9c to 0x0b9c, + 0x0b9e to 0x0b9f, + 0x0ba3 to 0x0ba4, + 0x0ba8 to 0x0baa, + 0x0bae to 0x0bb5, + 0x0bb7 to 0x0bb9, + 0x0c05 to 0x0c0c, + 0x0c0e to 0x0c10, + 0x0c12 to 0x0c28, + 0x0c2a to 0x0c33, + 0x0c35 to 0x0c39, + 0x0c60 to 0x0c61, + 0x0c85 to 0x0c8c, + 0x0c8e to 0x0c90, + 0x0c92 to 0x0ca8, + 0x0caa to 0x0cb3, + 0x0cb5 to 0x0cb9, + 0x0cde to 0x0cde, + 0x0ce0 to 0x0ce1, + 0x0d05 to 0x0d0c, + 0x0d0e to 0x0d10, + 0x0d12 to 0x0d28, + 0x0d2a to 0x0d39, + 0x0d60 to 0x0d61, + 0x0e01 to 0x0e2e, + 0x0e30 to 0x0e30, + 0x0e32 to 0x0e33, + 0x0e40 to 0x0e45, + 0x0e81 to 0x0e82, + 0x0e84 to 0x0e84, + 0x0e87 to 0x0e88, + 0x0e8a to 0x0e8a, + 0x0e8d to 0x0e8d, + 0x0e94 to 0x0e97, + 0x0e99 to 0x0e9f, + 0x0ea1 to 0x0ea3, + 0x0ea5 to 0x0ea5, + 0x0ea7 to 0x0ea7, + 0x0eaa to 0x0eab, + 0x0ead to 0x0eae, + 0x0eb0 to 0x0eb0, + 0x0eb2 to 0x0eb3, + 0x0ebd to 0x0ebd, + 0x0ec0 to 0x0ec4, + 0x0f40 to 0x0f47, + 0x0f49 to 0x0f69, + 0x10a0 to 0x10c5, + 0x10d0 to 0x10f6, + 0x1100 to 0x1100, + 0x1102 to 0x1103, + 0x1105 to 0x1107, + 0x1109 to 0x1109, + 0x110b to 0x110c, + 0x110e to 0x1112, + 0x113c to 0x113c, + 0x113e to 0x113e, + 0x1140 to 0x1140, + 0x114c to 0x114c, + 0x114e to 0x114e, + 0x1150 to 0x1150, + 0x1154 to 0x1155, + 0x1159 to 0x1159, + 0x115f to 0x1161, + 0x1163 to 0x1163, + 0x1165 to 0x1165, + 0x1167 to 0x1167, + 0x1169 to 0x1169, + 0x116d to 0x116e, + 0x1172 to 0x1173, + 0x1175 to 0x1175, + 0x119e to 0x119e, + 0x11a8 to 0x11a8, + 0x11ab to 0x11ab, + 0x11ae to 0x11af, + 0x11b7 to 0x11b8, + 0x11ba to 0x11ba, + 0x11bc to 0x11c2, + 0x11eb to 0x11eb, + 0x11f0 to 0x11f0, + 0x11f9 to 0x11f9, + 0x1e00 to 0x1e9b, + 0x1ea0 to 0x1ef9, + 0x1f00 to 0x1f15, + 0x1f18 to 0x1f1d, + 0x1f20 to 0x1f45, + 0x1f48 to 0x1f4d, + 0x1f50 to 0x1f57, + 0x1f59 to 0x1f59, + 0x1f5b to 0x1f5b, + 0x1f5d to 0x1f5d, + 0x1f5f to 0x1f7d, + 0x1f80 to 0x1fb4, + 0x1fb6 to 0x1fbc, + 0x1fbe to 0x1fbe, + 0x1fc2 to 0x1fc4, + 0x1fc6 to 0x1fcc, + 0x1fd0 to 0x1fd3, + 0x1fd6 to 0x1fdb, + 0x1fe0 to 0x1fec, + 0x1ff2 to 0x1ff4, + 0x1ff6 to 0x1ffc, + 0x2126 to 0x2126, + 0x212a to 0x212b, + 0x212e to 0x212e, + 0x2180 to 0x2182, + 0x3041 to 0x3094, + 0x30a1 to 0x30fa, + 0x3105 to 0x312c, + 0xac00 to 0xd7a3, + ).flatten.map(_.toChar) + + val ideographic: Seq[Char] = + Seq( + 0x4e00 to 0x9fa5, + 0x3007 to 0x3007, + 0x3021 to 0x3029, + ).flatten.map(_.toChar) + + val letter: Seq[Char] = + baseChar ++ ideographic + + val combiningChar: Seq[Char] = Seq( + 0x0300 to 0x0345, + 0x0360 to 0x0361, + 0x0483 to 0x0486, + 0x0591 to 0x05a1, + 0x05a3 to 0x05b9, + 0x05bb to 0x05bd, + 0x05bf to 0x05bf, + 0x05c1 to 0x05c2, + 0x05c4 to 0x05c4, + 0x064b to 0x0652, + 0x0670 to 0x0670, + 0x06d6 to 0x06dc, + 0x06dd to 0x06df, + 0x06e0 to 0x06e4, + 0x06e7 to 0x06e8, + 0x06ea to 0x06ed, + 0x0901 to 0x0903, + 0x093c to 0x093c, + 0x093e to 0x094c, + 0x094d to 0x094d, + 0x0951 to 0x0954, + 0x0962 to 0x0963, + 0x0981 to 0x0983, + 0x09bc to 0x09bc, + 0x09be to 0x09be, + 0x09bf to 0x09bf, + 0x09c0 to 0x09c4, + 0x09c7 to 0x09c8, + 0x09cb to 0x09cd, + 0x09d7 to 0x09d7, + 0x09e2 to 0x09e3, + 0x0a02 to 0x0a02, + 0x0a3c to 0x0a3c, + 0x0a3e to 0x0a3e, + 0x0a3f to 0x0a3f, + 0x0a40 to 0x0a42, + 0x0a47 to 0x0a48, + 0x0a4b to 0x0a4d, + 0x0a70 to 0x0a71, + 0x0a81 to 0x0a83, + 0x0abc to 0x0abc, + 0x0abe to 0x0ac5, + 0x0ac7 to 0x0ac9, + 0x0acb to 0x0acd, + 0x0b01 to 0x0b03, + 0x0b3c to 0x0b3c, + 0x0b3e to 0x0b43, + 0x0b47 to 0x0b48, + 0x0b4b to 0x0b4d, + 0x0b56 to 0x0b57, + 0x0b82 to 0x0b83, + 0x0bbe to 0x0bc2, + 0x0bc6 to 0x0bc8, + 0x0bca to 0x0bcd, + 0x0bd7 to 0x0bd7, + 0x0c01 to 0x0c03, + 0x0c3e to 0x0c44, + 0x0c46 to 0x0c48, + 0x0c4a to 0x0c4d, + 0x0c55 to 0x0c56, + 0x0c82 to 0x0c83, + 0x0cbe to 0x0cc4, + 0x0cc6 to 0x0cc8, + 0x0cca to 0x0ccd, + 0x0cd5 to 0x0cd6, + 0x0d02 to 0x0d03, + 0x0d3e to 0x0d43, + 0x0d46 to 0x0d48, + 0x0d4a to 0x0d4d, + 0x0d57 to 0x0d57, + 0x0e31 to 0x0e31, + 0x0e34 to 0x0e3a, + 0x0e47 to 0x0e4e, + 0x0eb1 to 0x0eb1, + 0x0eb4 to 0x0eb9, + 0x0ebb to 0x0ebc, + 0x0ec8 to 0x0ecd, + 0x0f18 to 0x0f19, + 0x0f35 to 0x0f35, + 0x0f37 to 0x0f37, + 0x0f39 to 0x0f39, + 0x0f3e to 0x0f3e, + 0x0f3f to 0x0f3f, + 0x0f71 to 0x0f84, + 0x0f86 to 0x0f8b, + 0x0f90 to 0x0f95, + 0x0f97 to 0x0f97, + 0x0f99 to 0x0fad, + 0x0fb1 to 0x0fb7, + 0x0fb9 to 0x0fb9, + 0x20d0 to 0x20dc, + 0x20e1 to 0x20e1, + 0x302a to 0x302f, + 0x3099 to 0x3099, + 0x309a to 0x309a, + ).flatten.map(_.toChar) + + val digit: Seq[Char] = + Seq( + 0x0030 to 0x0039, + 0x0660 to 0x0669, + 0x06f0 to 0x06f9, + 0x0966 to 0x096f, + 0x09e6 to 0x09ef, + 0x0a66 to 0x0a6f, + 0x0ae6 to 0x0aef, + 0x0b66 to 0x0b6f, + 0x0be7 to 0x0bef, + 0x0c66 to 0x0c6f, + 0x0ce6 to 0x0cef, + 0x0d66 to 0x0d6f, + 0x0e50 to 0x0e59, + 0x0ed0 to 0x0ed9, + 0x0f20 to 0x0f29, + ).flatten.map(_.toChar) + + val extender: Seq[Char] = + Seq( + 0x00b7 to 0x00b7, + 0x02d0 to 0x02d0, + 0x02d1 to 0x02d1, + 0x0387 to 0x0387, + 0x0640 to 0x0640, + 0x0e46 to 0x0e46, + 0x0ec6 to 0x0ec6, + 0x3005 to 0x3005, + 0x3031 to 0x3035, + 0x309d to 0x309e, + 0x30fc to 0x30fe, + ).flatten.map(_.toChar) + + val genNcNameStartChar: Gen[Char] = + Gen.oneOf(letter ++ "_") + + val genNcNameChar: Gen[Char] = + Gen.oneOf(letter ++ digit ++ ".-_" ++ combiningChar ++ extender) + + val genNcName: Gen[String] = for { + head <- genNcNameStartChar.map(_.toString) + n <- Gen.poisson(4) + tail <- Gen.stringOfN(n, genNcNameChar) + } yield head + tail + + val genPrefix: Gen[String] = + Gen.oneOf( + Gen.const(null), + genNcName, + ) + + val genAttrValue: Gen[String] = + for { + n <- Gen.poisson(5) + s <- Gen.stringOfN(n, Gen.oneOf(char)) + } yield new String(s.toArray) + + val genAttribute: Gen[Attribute] = + for { + prefix <- genPrefix + name <- genNcName + value <- genAttrValue + } yield Attribute(prefix, name, value, Null) + + val genAttributes: Gen[MetaData] = + for { + n <- Gen.geometric(0.5) + attributes <- Gen.listOfN(n, genAttribute) + } yield attributes.foldLeft(Null: MetaData) { (md, attr) => + Attribute(attr.pre, attr.key, attr.value, md) + } + + val genText: Gen[Text] = + for { + n <- Gen.poisson(5) + s <- Gen.stringOfN(n, Gen.oneOf(char)) + } yield Text(s) + + val genComment: Gen[Comment] = + for { + n <- Gen.poisson(5) + s <- Gen.stringOfN(n, Gen.oneOf(char)) + if !s.contains("--") && !s.endsWith("-") + } yield new Comment(s) + + val genProcInstr: Gen[ProcInstr] = + for { + target <- genNcName + if target.toLowerCase(Locale.ROOT) != "xml" + n <- Gen.poisson(5) + s <- Gen.stringOfN(n, Gen.oneOf(char)) + if !s.contains("?>") + } yield ProcInstr(target, s) + + val genNode: Gen[Node] = + Gen.frequency( + 20 -> genText, + 5 -> genComment, + 1 -> genProcInstr, + ) + + val genContent: Gen[Seq[Node]] = + for { + n <- Gen.geometric(0.5) + node <- Gen.listOfN(n, genNode) + } yield node + + val genXml: Gen[Elem] = for { + prefix <- genPrefix + label <- genNcName + attributes <- genAttributes + children <- genContent + elem = Elem(prefix, label, attributes, TopScope, true, children: _*) + } yield Utility.trim(elem).asInstanceOf[Elem] +} From 3e88a631b19701f33be3c76771db94e46f1899b8 Mon Sep 17 00:00:00 2001 From: "Ross A. Baker" Date: Wed, 15 Jun 2022 23:28:22 -0400 Subject: [PATCH 2/2] Generate normalized attribute values --- .../src/test/scala/org/http4s/scalaxml/generators.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/scala-xml/src/test/scala/org/http4s/scalaxml/generators.scala b/scala-xml/src/test/scala/org/http4s/scalaxml/generators.scala index 8016d41..deb8f13 100644 --- a/scala-xml/src/test/scala/org/http4s/scalaxml/generators.scala +++ b/scala-xml/src/test/scala/org/http4s/scalaxml/generators.scala @@ -409,7 +409,10 @@ object generators { for { n <- Gen.poisson(5) s <- Gen.stringOfN(n, Gen.oneOf(char)) - } yield new String(s.toArray) + // Serialization trims whitespace, so we normalize ours + tb = new TextBuffer() + _ = tb.append(s) + } yield tb.sb.toString val genAttribute: Gen[Attribute] = for {