Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support comments in hex and bin strings #498

Merged
merged 7 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,11 @@ ThisBuild / mimaBinaryIssueFilters ++= Seq(
ProblemFilters.exclude[IncompatibleMethTypeProblem]("scodec.bits.HexDumpFormat.print"),
ProblemFilters.exclude[DirectMissingMethodProblem]("scodec.bits.HexDumpFormat.this"),
ProblemFilters.exclude[IncompatibleResultTypeProblem]("scodec.bits.ByteVector.fromHexInternal"),
ProblemFilters.exclude[DirectMissingMethodProblem]("scodec.bits.ByteVector#AtEmpty.apply")
ProblemFilters.exclude[DirectMissingMethodProblem]("scodec.bits.ByteVector#AtEmpty.apply"),
ProblemFilters.exclude[IncompatibleResultTypeProblem]("scodec.bits.ByteVector.fromBinInternal"),
ProblemFilters.exclude[DirectMissingMethodProblem](
"scodec.bits.ByteVector.fromBinInternal$default$2"
)
)

lazy val root = tlCrossRootProject.aggregate(core, benchmark)
Expand Down
44 changes: 37 additions & 7 deletions core/shared/src/main/scala/scodec/bits/Bases.scala
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ package scodec.bits
/** Provides types related to base conversion -- e.g., binary, hexadecimal, and base 64. */
object Bases {

/** Result of `Alphabet#toIndex` that indicates the character should be ignored. */
final val IgnoreChar: Int = -1

/** Result of `Alphabet#toIndex` that indicates the character and the rest of the line should be ignored. */
final val IgnoreRestOfLine: Int = -2

/** Partial mapping between characters and indices used in base conversions.
*/
trait Alphabet {
Expand Down Expand Up @@ -76,14 +82,26 @@ object Bases {
/** Predefined alphabets for use in base conversions. */
object Alphabets {

private[bits] object HexBinCommentChar {
def unapply(c: Char): Option[Char] =
c match {
case '#' => Some('#')
case ';' => Some(';')
case '|' => Some('|')
case _ => None
}
}

/** Binary alphabet that uses `{0, 1}` and allows whitespace and underscores for separation. */
object Binary extends BinaryAlphabet {
def toChar(i: Int) = if (i == 0) '0' else '1'
def toIndex(c: Char) =
c match {
case '0' => 0
case '1' => 1
case _ => throw new IllegalArgumentException
case '0' => 0
case '1' => 1
case c if ignore(c) => IgnoreChar
case HexBinCommentChar(_) => IgnoreRestOfLine
case _ => throw new IllegalArgumentException
}
def ignore(c: Char) = c.isWhitespace || c == '_'
}
Expand All @@ -93,9 +111,11 @@ object Bases {
def toChar(i: Int) = if (i == 0) 't' else 'f'
def toIndex(c: Char) =
c match {
case 't' | 'T' => 0
case 'f' | 'F' => 1
case _ => throw new IllegalArgumentException
case 't' | 'T' => 0
case 'f' | 'F' => 1
case c if ignore(c) => IgnoreChar
case HexBinCommentChar(_) => IgnoreRestOfLine
case _ => throw new IllegalArgumentException
}
def ignore(c: Char) = c.isWhitespace || c == '_'
}
Expand All @@ -105,7 +125,12 @@ object Bases {
private[bits] abstract class LenientHex extends HexAlphabet {
def toIndex(c: Char) = {
val i = Character.digit(c, 16)
if (i < 0) if (ignore(c)) -1 else throw new IllegalArgumentException else i
if (i >= 0) i
else
c match {
case c if ignore(c) => IgnoreChar
case HexBinCommentChar(_) => IgnoreRestOfLine
}
}
def ignore(c: Char) = c.isWhitespace || c == '_'
}
Expand Down Expand Up @@ -144,6 +169,7 @@ object Bases {
val lookupIndex = c - indicesMin
if (lookupIndex >= 0 && lookupIndex < indices.length && indices(lookupIndex) >= 0)
indices(lookupIndex)
else if (ignore(c)) IgnoreChar
else throw new IllegalArgumentException
}
def ignore(c: Char) = c.isWhitespace
Expand Down Expand Up @@ -172,6 +198,7 @@ object Bases {
val lookupIndex = c - indicesMin
if (lookupIndex >= 0 && lookupIndex < indices.length && indices(lookupIndex) >= 0)
indices(lookupIndex)
else if (ignore(c)) IgnoreChar
else throw new IllegalArgumentException
}
def ignore(c: Char) = c == '-' || c.isWhitespace
Expand All @@ -194,6 +221,7 @@ object Bases {
case c if c >= 'P' && c <= 'Z' => c - 'P' + 9 + 8 + 5
case c if c >= 'a' && c <= 'k' => c - 'a' + 9 + 8 + 5 + 11
case c if c >= 'm' && c <= 'z' => c - 'm' + 9 + 8 + 5 + 11 + 11
case c if ignore(c) => IgnoreChar
case _ => throw new IllegalArgumentException
}

Expand All @@ -214,6 +242,7 @@ object Bases {
case c if c >= '0' && c <= '9' => c - '0' + 26 + 26
case '+' => 62
case '/' => 63
case c if ignore(c) => IgnoreChar
case _ => throw new IllegalArgumentException
}
override def ignore(c: Char) = c.isWhitespace
Expand Down Expand Up @@ -248,6 +277,7 @@ object Bases {
case c if c >= '0' && c <= '9' => c - '0' + 26 + 26
case '-' => 62
case '_' => 63
case c if ignore(c) => IgnoreChar
case _ => throw new IllegalArgumentException
}
override def ignore(c: Char) = c.isWhitespace
Expand Down
9 changes: 6 additions & 3 deletions core/shared/src/main/scala/scodec/bits/BitVector.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1500,13 +1500,16 @@ object BitVector extends BitVectorCompanionCrossPlatform {
str: String,
alphabet: Bases.BinaryAlphabet = Bases.Alphabets.Binary
): Either[String, BitVector] =
ByteVector.fromBinInternal(str, alphabet).map { case (bytes, size) =>
val toDrop = size match {
try {
val (bytes, count) = ByteVector.fromBinInternal(str, alphabet)
val toDrop = count match {
case 0 => 0
case n if n % 8 == 0 => 0
case n => 8 - (n % 8)
}
bytes.toBitVector.drop(toDrop.toLong)
Right(bytes.toBitVector.drop(toDrop.toLong))
} catch {
case t: IllegalArgumentException => Left(t.getMessage)
}

/** Constructs a `BitVector` from a binary string or returns `None` if the string is not valid
Expand Down
110 changes: 71 additions & 39 deletions core/shared/src/main/scala/scodec/bits/ByteVector.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1753,6 +1753,10 @@ object ByteVector extends ByteVectorCompanionCrossPlatform {
* is not valid hexadecimal.
*
* The string may start with a `0x` and it may contain whitespace or underscore characters.
*
* Single-line comments are supported - by default, any text after a #, ;, or | charater is ignored
* until the start of the next line, though comment charaters are specified by the specified alphabet.
*
* @group base
*/
def fromHexDescriptive(
Expand Down Expand Up @@ -1785,11 +1789,11 @@ object ByteVector extends ByteVectorCompanionCrossPlatform {
val c = withoutPrefix.charAt(idx)
val nibble =
if (defaults) {
Character.digit(c, 16) match {
case i if i >= 0 => i
case i if Character.isWhitespace(c) || c == '_' => -1
case _ => throw new IllegalArgumentException
}
val i = Character.digit(c, 16)
if (i >= 0) i
else if (Character.isWhitespace(c) || c == '_') Bases.IgnoreChar
else if (Bases.Alphabets.HexBinCommentChar.unapply(c).isDefined) Bases.IgnoreRestOfLine
else throw new IllegalArgumentException
} else alphabet.toIndex(c)
if (nibble >= 0) {
if (midByte) {
Expand All @@ -1801,6 +1805,9 @@ object ByteVector extends ByteVectorCompanionCrossPlatform {
midByte = true
}
count += 1
} else if (nibble == Bases.IgnoreRestOfLine) {
// Ignore rest of line
while (idx < length && withoutPrefix.charAt(idx) != '\n') idx += 1
}
idx += 1
}
Expand Down Expand Up @@ -1855,55 +1862,80 @@ object ByteVector extends ByteVectorCompanionCrossPlatform {
* not valid binary.
*
* The string may start with a `0b` and it may contain whitespace or underscore characters.
*
* Single-line comments are supported - by default, any text after a #, ;, or | charater is ignored
* until the start of the next line, though comment charaters are specified by the specified alphabet.
*
* @group base
*/
def fromBinDescriptive(
str: String,
alphabet: Bases.BinaryAlphabet = Bases.Alphabets.Binary
): Either[String, ByteVector] = fromBinInternal(str, alphabet).map { case (res, _) => res }
): Either[String, ByteVector] =
try Right(fromBinInternal(str, alphabet)._1)
catch {
case t: IllegalArgumentException => Left(t.getMessage)
}

private[bits] def fromBinInternal(
str: String,
alphabet: Bases.BinaryAlphabet = Bases.Alphabets.Binary
): Either[String, (ByteVector, Int)] = {
val prefixed = (str.startsWith("0b")) || (str.startsWith("0B"))
alphabet: Bases.BinaryAlphabet
): (ByteVector, Int) = {
val prefixed = str.length >= 2 && str.charAt(0) == '0' && {
val second = str.charAt(1)
second == 'b' || second == 'B'
}
val withoutPrefix = if (prefixed) str.substring(2) else str
var idx, byte, bits, count = 0
var err: String = null
val bldr = ByteBuffer.allocate((str.size + 7) / 8)
while (idx < withoutPrefix.length && (err eq null)) {
val c = withoutPrefix(idx)
if (!alphabet.ignore(c))
try {
byte = (byte << 1) | (1 & alphabet.toIndex(c))
val length = withoutPrefix.length
val out = new Array[Byte]((length + 7) / 8)
var j = 0
val defaults = alphabet eq Bases.Alphabets.Binary
try
while (idx < length) {
val c = withoutPrefix.charAt(idx)
val bit =
if (defaults) {
c match {
case '0' => 0
case '1' => 1
case _ if Character.isWhitespace(c) || c == '_' => Bases.IgnoreChar
case Bases.Alphabets.HexBinCommentChar(_) => Bases.IgnoreRestOfLine
case _ => throw new IllegalArgumentException
}
} else alphabet.toIndex(c)
if (bit >= 0) {
byte = (byte << 1) | bit
bits += 1
count += 1
} catch {
case _: IllegalArgumentException =>
err = s"Invalid binary character '$c' at index ${idx + (if (prefixed) 2 else 0)}"
if (bits == 8) {
out(j) = byte.toByte
j += 1
bits = 0
byte = 0
}
} else if (bit == Bases.IgnoreRestOfLine) {
// Ignore rest of line
while (idx < length && withoutPrefix.charAt(idx) != '\n') idx += 1
}
if (bits == 8) {
bldr.put(byte.toByte)
byte = 0
bits = 0
idx += 1
}
idx += 1
}
if (err eq null)
Right(
(
if (bits > 0) {
bldr.put((byte << (8 - bits)).toByte)
bldr.flip()
ByteVector(bldr).shiftRight((8 - bits).toLong, false)
} else {
bldr.flip()
ByteVector(bldr)
},
count
catch {
case _: IllegalArgumentException =>
val c = withoutPrefix.charAt(idx)
throw new IllegalArgumentException(
s"Invalid binary character '$c' at index ${idx + (if (prefixed) 2 else 0)}"
)
)
else Left(err)
}

val result = if (bits > 0) {
out(j) = (byte << (8 - bits)).toByte
j += 1
ByteVector.view(out).shiftRight((8 - bits).toLong, false)
} else {
ByteVector.view(out).take(j)
}
(result, count)
}

/** Constructs a `ByteVector` from a binary string or returns `None` if the string is not valid
Expand Down
81 changes: 81 additions & 0 deletions core/shared/src/test/scala/scodec/bits/ByteVectorTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,50 @@ class ByteVectorTest extends BitsSuite {
)
}

test("fromHexDescriptive with comments") {
assertEquals(
ByteVector.fromHexDescriptive("""
deadbeef ; first line
01020304 # second line
05060708
"""),
Right(hex"deadbeef0102030405060708")
)

object CustomAlphabet extends Bases.Alphabets.LenientHex {
private val Chars =
Array('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f')
def toChar(i: Int) = Chars(i)
override def toIndex(c: Char): Int = c match {
case '$' => Bases.IgnoreRestOfLine
case _ => super.toIndex(c)
}
}

assertEquals(
ByteVector.fromHexDescriptive(
"""
deadbeef $ first line
01020304 $ second line
05060708
""",
CustomAlphabet
),
Right(hex"deadbeef0102030405060708")
)
}

property("hex with comments example") {
val packet = hex"""
; Start of first packet from https://wiki.wireshark.org/uploads/__moin_import__/attachments/SampleCaptures/mpeg2_mp2t_with_cc_drop01.pcap
01 00 5e 7b ad 47 00 0c db 78 7d 00 08 00 ; Ethernet header
45 00 05 40 b6 9f 40 00 0c 11 de 95 51 a3 96 3c e9 70 03 28 ; IPv4 header
c3 50 15 7c 05 2c 00 00 ; UDP header
47 02 00 1e ; MP2T header
"""
assertEquals(packet.size, 46L)
}

property("toHex fromHex roundtrip") {
forAll((b: ByteVector) => ByteVector.fromHex(b.toHex).get == b)
}
Expand All @@ -214,6 +258,43 @@ class ByteVectorTest extends BitsSuite {
)
}

test("fromBinDescriptive with comments") {
assertEquals(
ByteVector.fromBinDescriptive("""
00110011 ; first line
11001100 # second line
11110000
"""),
Right(bin"001100111100110011110000".bytes)
)

object CustomAlphabet extends Bases.BinaryAlphabet {
def toChar(i: Int) = i match {
case 0 => '0'
case 1 => '1'
}
def toIndex(c: Char): Int = c match {
case '0' => 0
case '1' => 1
case '$' => Bases.IgnoreRestOfLine
case _ => Bases.IgnoreChar
}
def ignore(c: Char): Boolean = c.isWhitespace
}

assertEquals(
ByteVector.fromBinDescriptive(
"""
00110011 $ first line
11001100 $ second line
11110000
""",
CustomAlphabet
),
Right(bin"001100111100110011110000".bytes)
)
}

test("fromValidBin") {
assert(ByteVector.fromValidBin(deadbeef.toBin) == deadbeef)
intercept[IllegalArgumentException](ByteVector.fromValidBin("1101a000"))
Expand Down