diff --git a/build.sbt b/build.sbt index 5f927181..ca27c75a 100644 --- a/build.sbt +++ b/build.sbt @@ -75,7 +75,11 @@ ThisBuild / mimaBinaryIssueFilters ++= Seq( ProblemFilters.exclude[IncompatibleMethTypeProblem]("scodec.bits.HexDumpFormat.print"), ProblemFilters.exclude[DirectMissingMethodProblem]("scodec.bits.HexDumpFormat.this"), ProblemFilters.exclude[IncompatibleResultTypeProblem]("scodec.bits.ByteVector.fromHexInternal"), - ProblemFilters.exclude[DirectMissingMethodProblem]("scodec.bits.ByteVector#AtEmpty.apply") + ProblemFilters.exclude[DirectMissingMethodProblem]("scodec.bits.ByteVector#AtEmpty.apply"), + ProblemFilters.exclude[IncompatibleResultTypeProblem]("scodec.bits.ByteVector.fromBinInternal"), + ProblemFilters.exclude[DirectMissingMethodProblem]( + "scodec.bits.ByteVector.fromBinInternal$default$2" + ) ) lazy val root = tlCrossRootProject.aggregate(core, benchmark) diff --git a/core/shared/src/main/scala/scodec/bits/Bases.scala b/core/shared/src/main/scala/scodec/bits/Bases.scala index c895a0c0..13e41009 100644 --- a/core/shared/src/main/scala/scodec/bits/Bases.scala +++ b/core/shared/src/main/scala/scodec/bits/Bases.scala @@ -33,6 +33,12 @@ package scodec.bits /** Provides types related to base conversion -- e.g., binary, hexadecimal, and base 64. */ object Bases { + /** Result of `Alphabet#toIndex` that indicates the character should be ignored. */ + final val IgnoreChar: Int = -1 + + /** Result of `Alphabet#toIndex` that indicates the character and the rest of the line should be ignored. */ + final val IgnoreRestOfLine: Int = -2 + /** Partial mapping between characters and indices used in base conversions. */ trait Alphabet { @@ -76,14 +82,26 @@ object Bases { /** Predefined alphabets for use in base conversions. */ object Alphabets { + private[bits] object HexBinCommentChar { + def unapply(c: Char): Option[Char] = + c match { + case '#' => Some('#') + case ';' => Some(';') + case '|' => Some('|') + case _ => None + } + } + /** Binary alphabet that uses `{0, 1}` and allows whitespace and underscores for separation. */ object Binary extends BinaryAlphabet { def toChar(i: Int) = if (i == 0) '0' else '1' def toIndex(c: Char) = c match { - case '0' => 0 - case '1' => 1 - case _ => throw new IllegalArgumentException + case '0' => 0 + case '1' => 1 + case c if ignore(c) => IgnoreChar + case HexBinCommentChar(_) => IgnoreRestOfLine + case _ => throw new IllegalArgumentException } def ignore(c: Char) = c.isWhitespace || c == '_' } @@ -93,9 +111,11 @@ object Bases { def toChar(i: Int) = if (i == 0) 't' else 'f' def toIndex(c: Char) = c match { - case 't' | 'T' => 0 - case 'f' | 'F' => 1 - case _ => throw new IllegalArgumentException + case 't' | 'T' => 0 + case 'f' | 'F' => 1 + case c if ignore(c) => IgnoreChar + case HexBinCommentChar(_) => IgnoreRestOfLine + case _ => throw new IllegalArgumentException } def ignore(c: Char) = c.isWhitespace || c == '_' } @@ -105,7 +125,12 @@ object Bases { private[bits] abstract class LenientHex extends HexAlphabet { def toIndex(c: Char) = { val i = Character.digit(c, 16) - if (i < 0) if (ignore(c)) -1 else throw new IllegalArgumentException else i + if (i >= 0) i + else + c match { + case c if ignore(c) => IgnoreChar + case HexBinCommentChar(_) => IgnoreRestOfLine + } } def ignore(c: Char) = c.isWhitespace || c == '_' } @@ -144,6 +169,7 @@ object Bases { val lookupIndex = c - indicesMin if (lookupIndex >= 0 && lookupIndex < indices.length && indices(lookupIndex) >= 0) indices(lookupIndex) + else if (ignore(c)) IgnoreChar else throw new IllegalArgumentException } def ignore(c: Char) = c.isWhitespace @@ -172,6 +198,7 @@ object Bases { val lookupIndex = c - indicesMin if (lookupIndex >= 0 && lookupIndex < indices.length && indices(lookupIndex) >= 0) indices(lookupIndex) + else if (ignore(c)) IgnoreChar else throw new IllegalArgumentException } def ignore(c: Char) = c == '-' || c.isWhitespace @@ -194,6 +221,7 @@ object Bases { case c if c >= 'P' && c <= 'Z' => c - 'P' + 9 + 8 + 5 case c if c >= 'a' && c <= 'k' => c - 'a' + 9 + 8 + 5 + 11 case c if c >= 'm' && c <= 'z' => c - 'm' + 9 + 8 + 5 + 11 + 11 + case c if ignore(c) => IgnoreChar case _ => throw new IllegalArgumentException } @@ -214,6 +242,7 @@ object Bases { case c if c >= '0' && c <= '9' => c - '0' + 26 + 26 case '+' => 62 case '/' => 63 + case c if ignore(c) => IgnoreChar case _ => throw new IllegalArgumentException } override def ignore(c: Char) = c.isWhitespace @@ -248,6 +277,7 @@ object Bases { case c if c >= '0' && c <= '9' => c - '0' + 26 + 26 case '-' => 62 case '_' => 63 + case c if ignore(c) => IgnoreChar case _ => throw new IllegalArgumentException } override def ignore(c: Char) = c.isWhitespace diff --git a/core/shared/src/main/scala/scodec/bits/BitVector.scala b/core/shared/src/main/scala/scodec/bits/BitVector.scala index 7169beb9..6a714f10 100644 --- a/core/shared/src/main/scala/scodec/bits/BitVector.scala +++ b/core/shared/src/main/scala/scodec/bits/BitVector.scala @@ -1500,13 +1500,16 @@ object BitVector extends BitVectorCompanionCrossPlatform { str: String, alphabet: Bases.BinaryAlphabet = Bases.Alphabets.Binary ): Either[String, BitVector] = - ByteVector.fromBinInternal(str, alphabet).map { case (bytes, size) => - val toDrop = size match { + try { + val (bytes, count) = ByteVector.fromBinInternal(str, alphabet) + val toDrop = count match { case 0 => 0 case n if n % 8 == 0 => 0 case n => 8 - (n % 8) } - bytes.toBitVector.drop(toDrop.toLong) + Right(bytes.toBitVector.drop(toDrop.toLong)) + } catch { + case t: IllegalArgumentException => Left(t.getMessage) } /** Constructs a `BitVector` from a binary string or returns `None` if the string is not valid diff --git a/core/shared/src/main/scala/scodec/bits/ByteVector.scala b/core/shared/src/main/scala/scodec/bits/ByteVector.scala index fa7d47c4..df8d139e 100644 --- a/core/shared/src/main/scala/scodec/bits/ByteVector.scala +++ b/core/shared/src/main/scala/scodec/bits/ByteVector.scala @@ -1753,6 +1753,10 @@ object ByteVector extends ByteVectorCompanionCrossPlatform { * is not valid hexadecimal. * * The string may start with a `0x` and it may contain whitespace or underscore characters. + * + * Single-line comments are supported - by default, any text after a #, ;, or | charater is ignored + * until the start of the next line, though comment charaters are specified by the specified alphabet. + * * @group base */ def fromHexDescriptive( @@ -1785,11 +1789,11 @@ object ByteVector extends ByteVectorCompanionCrossPlatform { val c = withoutPrefix.charAt(idx) val nibble = if (defaults) { - Character.digit(c, 16) match { - case i if i >= 0 => i - case i if Character.isWhitespace(c) || c == '_' => -1 - case _ => throw new IllegalArgumentException - } + val i = Character.digit(c, 16) + if (i >= 0) i + else if (Character.isWhitespace(c) || c == '_') Bases.IgnoreChar + else if (Bases.Alphabets.HexBinCommentChar.unapply(c).isDefined) Bases.IgnoreRestOfLine + else throw new IllegalArgumentException } else alphabet.toIndex(c) if (nibble >= 0) { if (midByte) { @@ -1801,6 +1805,9 @@ object ByteVector extends ByteVectorCompanionCrossPlatform { midByte = true } count += 1 + } else if (nibble == Bases.IgnoreRestOfLine) { + // Ignore rest of line + while (idx < length && withoutPrefix.charAt(idx) != '\n') idx += 1 } idx += 1 } @@ -1855,55 +1862,80 @@ object ByteVector extends ByteVectorCompanionCrossPlatform { * not valid binary. * * The string may start with a `0b` and it may contain whitespace or underscore characters. + * + * Single-line comments are supported - by default, any text after a #, ;, or | charater is ignored + * until the start of the next line, though comment charaters are specified by the specified alphabet. + * * @group base */ def fromBinDescriptive( str: String, alphabet: Bases.BinaryAlphabet = Bases.Alphabets.Binary - ): Either[String, ByteVector] = fromBinInternal(str, alphabet).map { case (res, _) => res } + ): Either[String, ByteVector] = + try Right(fromBinInternal(str, alphabet)._1) + catch { + case t: IllegalArgumentException => Left(t.getMessage) + } private[bits] def fromBinInternal( str: String, - alphabet: Bases.BinaryAlphabet = Bases.Alphabets.Binary - ): Either[String, (ByteVector, Int)] = { - val prefixed = (str.startsWith("0b")) || (str.startsWith("0B")) + alphabet: Bases.BinaryAlphabet + ): (ByteVector, Int) = { + val prefixed = str.length >= 2 && str.charAt(0) == '0' && { + val second = str.charAt(1) + second == 'b' || second == 'B' + } val withoutPrefix = if (prefixed) str.substring(2) else str var idx, byte, bits, count = 0 - var err: String = null - val bldr = ByteBuffer.allocate((str.size + 7) / 8) - while (idx < withoutPrefix.length && (err eq null)) { - val c = withoutPrefix(idx) - if (!alphabet.ignore(c)) - try { - byte = (byte << 1) | (1 & alphabet.toIndex(c)) + val length = withoutPrefix.length + val out = new Array[Byte]((length + 7) / 8) + var j = 0 + val defaults = alphabet eq Bases.Alphabets.Binary + try + while (idx < length) { + val c = withoutPrefix.charAt(idx) + val bit = + if (defaults) { + c match { + case '0' => 0 + case '1' => 1 + case _ if Character.isWhitespace(c) || c == '_' => Bases.IgnoreChar + case Bases.Alphabets.HexBinCommentChar(_) => Bases.IgnoreRestOfLine + case _ => throw new IllegalArgumentException + } + } else alphabet.toIndex(c) + if (bit >= 0) { + byte = (byte << 1) | bit bits += 1 count += 1 - } catch { - case _: IllegalArgumentException => - err = s"Invalid binary character '$c' at index ${idx + (if (prefixed) 2 else 0)}" + if (bits == 8) { + out(j) = byte.toByte + j += 1 + bits = 0 + byte = 0 + } + } else if (bit == Bases.IgnoreRestOfLine) { + // Ignore rest of line + while (idx < length && withoutPrefix.charAt(idx) != '\n') idx += 1 } - if (bits == 8) { - bldr.put(byte.toByte) - byte = 0 - bits = 0 + idx += 1 } - idx += 1 - } - if (err eq null) - Right( - ( - if (bits > 0) { - bldr.put((byte << (8 - bits)).toByte) - bldr.flip() - ByteVector(bldr).shiftRight((8 - bits).toLong, false) - } else { - bldr.flip() - ByteVector(bldr) - }, - count + catch { + case _: IllegalArgumentException => + val c = withoutPrefix.charAt(idx) + throw new IllegalArgumentException( + s"Invalid binary character '$c' at index ${idx + (if (prefixed) 2 else 0)}" ) - ) - else Left(err) + } + + val result = if (bits > 0) { + out(j) = (byte << (8 - bits)).toByte + j += 1 + ByteVector.view(out).shiftRight((8 - bits).toLong, false) + } else { + ByteVector.view(out).take(j) + } + (result, count) } /** Constructs a `ByteVector` from a binary string or returns `None` if the string is not valid diff --git a/core/shared/src/test/scala/scodec/bits/ByteVectorTest.scala b/core/shared/src/test/scala/scodec/bits/ByteVectorTest.scala index 16fc911e..798458e9 100644 --- a/core/shared/src/test/scala/scodec/bits/ByteVectorTest.scala +++ b/core/shared/src/test/scala/scodec/bits/ByteVectorTest.scala @@ -188,6 +188,50 @@ class ByteVectorTest extends BitsSuite { ) } + test("fromHexDescriptive with comments") { + assertEquals( + ByteVector.fromHexDescriptive(""" + deadbeef ; first line + 01020304 # second line + 05060708 + """), + Right(hex"deadbeef0102030405060708") + ) + + object CustomAlphabet extends Bases.Alphabets.LenientHex { + private val Chars = + Array('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f') + def toChar(i: Int) = Chars(i) + override def toIndex(c: Char): Int = c match { + case '$' => Bases.IgnoreRestOfLine + case _ => super.toIndex(c) + } + } + + assertEquals( + ByteVector.fromHexDescriptive( + """ + deadbeef $ first line + 01020304 $ second line + 05060708 + """, + CustomAlphabet + ), + Right(hex"deadbeef0102030405060708") + ) + } + + property("hex with comments example") { + val packet = hex""" + ; Start of first packet from https://wiki.wireshark.org/uploads/__moin_import__/attachments/SampleCaptures/mpeg2_mp2t_with_cc_drop01.pcap + 01 00 5e 7b ad 47 00 0c db 78 7d 00 08 00 ; Ethernet header + 45 00 05 40 b6 9f 40 00 0c 11 de 95 51 a3 96 3c e9 70 03 28 ; IPv4 header + c3 50 15 7c 05 2c 00 00 ; UDP header + 47 02 00 1e ; MP2T header + """ + assertEquals(packet.size, 46L) + } + property("toHex fromHex roundtrip") { forAll((b: ByteVector) => ByteVector.fromHex(b.toHex).get == b) } @@ -214,6 +258,43 @@ class ByteVectorTest extends BitsSuite { ) } + test("fromBinDescriptive with comments") { + assertEquals( + ByteVector.fromBinDescriptive(""" + 00110011 ; first line + 11001100 # second line + 11110000 + """), + Right(bin"001100111100110011110000".bytes) + ) + + object CustomAlphabet extends Bases.BinaryAlphabet { + def toChar(i: Int) = i match { + case 0 => '0' + case 1 => '1' + } + def toIndex(c: Char): Int = c match { + case '0' => 0 + case '1' => 1 + case '$' => Bases.IgnoreRestOfLine + case _ => Bases.IgnoreChar + } + def ignore(c: Char): Boolean = c.isWhitespace + } + + assertEquals( + ByteVector.fromBinDescriptive( + """ + 00110011 $ first line + 11001100 $ second line + 11110000 + """, + CustomAlphabet + ), + Right(bin"001100111100110011110000".bytes) + ) + } + test("fromValidBin") { assert(ByteVector.fromValidBin(deadbeef.toBin) == deadbeef) intercept[IllegalArgumentException](ByteVector.fromValidBin("1101a000"))