diff --git a/benchmark/src/main/scala/ByteVectorBenchmarkFromHex.scala b/benchmark/src/main/scala/ByteVectorBenchmarkFromHex.scala new file mode 100644 index 00000000..8f525b34 --- /dev/null +++ b/benchmark/src/main/scala/ByteVectorBenchmarkFromHex.scala @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2013, Scodec + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, + * are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package scodec.bits + +import org.openjdk.jmh.annotations._ + +import java.util.concurrent.TimeUnit + +@BenchmarkMode(Array(Mode.Throughput)) +@OutputTimeUnit(TimeUnit.MILLISECONDS) +@State(Scope.Benchmark) +class ByteVectorBenchmarkFromHex { + @Param(Array("1000")) + var size: Int = _ + + private var bvHex: String = _ + + @Setup(Level.Trial) + def setup(): Unit = { + val r = new scala.util.Random(size) + val bs = new Array[Byte](size) + r.nextBytes(bs) + bvHex = ByteVector.view(bs).toHex + } + + @Benchmark + def fromHex: Option[ByteVector] = + ByteVector.fromHex(bvHex) +} diff --git a/benchmark/src/main/scala/ByteVectorBenchmark.scala b/benchmark/src/main/scala/ByteVectorBenchmarkToHex.scala similarity index 91% rename from benchmark/src/main/scala/ByteVectorBenchmark.scala rename to benchmark/src/main/scala/ByteVectorBenchmarkToHex.scala index 0422049a..18c44c97 100644 --- a/benchmark/src/main/scala/ByteVectorBenchmark.scala +++ b/benchmark/src/main/scala/ByteVectorBenchmarkToHex.scala @@ -37,24 +37,21 @@ import java.util.concurrent.TimeUnit @BenchmarkMode(Array(Mode.Throughput)) @OutputTimeUnit(TimeUnit.MILLISECONDS) @State(Scope.Benchmark) -class ByteVectorBenchmark { +class ByteVectorBenchmarkToHex { @Param(Array("1000", "10000", "100000")) var size: Int = _ private var bv: ByteVector = _ - def bytes: Array[Byte] = { + @Setup(Level.Trial) + def setup(): Unit = { val r = new scala.util.Random(size) - val bs = new Array[Byte](size * 3) + val bs = new Array[Byte](size) r.nextBytes(bs) - bs + bv = ByteVector.view(bs) } - @Setup(Level.Trial) - def setup(): Unit = - bv = ByteVector.view(bytes) - @Benchmark - def encodeHex: String = + def toHex: String = bv.toHex } diff --git a/build.sbt b/build.sbt index c0e1a1fa..223aa60b 100644 --- a/build.sbt +++ b/build.sbt @@ -71,7 +71,8 @@ ThisBuild / mimaBinaryIssueFilters ++= Seq( ), ProblemFilters.exclude[IncompatibleMethTypeProblem]("scodec.bits.HexDumpFormat.render"), ProblemFilters.exclude[IncompatibleMethTypeProblem]("scodec.bits.HexDumpFormat.print"), - ProblemFilters.exclude[DirectMissingMethodProblem]("scodec.bits.HexDumpFormat.this") + ProblemFilters.exclude[DirectMissingMethodProblem]("scodec.bits.HexDumpFormat.this"), + ProblemFilters.exclude[IncompatibleResultTypeProblem]("scodec.bits.ByteVector.fromHexInternal") ) lazy val root = tlCrossRootProject.aggregate(core, benchmark) diff --git a/core/shared/src/main/scala/scodec/bits/Bases.scala b/core/shared/src/main/scala/scodec/bits/Bases.scala index 2c2b60b1..c895a0c0 100644 --- a/core/shared/src/main/scala/scodec/bits/Bases.scala +++ b/core/shared/src/main/scala/scodec/bits/Bases.scala @@ -103,13 +103,10 @@ object Bases { /** Abstract hex alphabet that supports `{0-9, A-F, a-f}` for looking up an index from a char. */ private[bits] abstract class LenientHex extends HexAlphabet { - def toIndex(c: Char) = - c match { - case c if c >= '0' && c <= '9' => c - '0' - case c if c >= 'a' && c <= 'f' => 10 + (c - 'a') - case c if c >= 'A' && c <= 'F' => 10 + (c - 'A') - case _ => throw new IllegalArgumentException - } + def toIndex(c: Char) = { + val i = Character.digit(c, 16) + if (i < 0) if (ignore(c)) -1 else throw new IllegalArgumentException else i + } def ignore(c: Char) = c.isWhitespace || c == '_' } diff --git a/core/shared/src/main/scala/scodec/bits/BitVector.scala b/core/shared/src/main/scala/scodec/bits/BitVector.scala index 3fa6e4ab..7169beb9 100644 --- a/core/shared/src/main/scala/scodec/bits/BitVector.scala +++ b/core/shared/src/main/scala/scodec/bits/BitVector.scala @@ -1545,9 +1545,12 @@ object BitVector extends BitVectorCompanionCrossPlatform { str: String, alphabet: Bases.HexAlphabet = Bases.Alphabets.HexLowercase ): Either[String, BitVector] = - ByteVector.fromHexInternal(str, alphabet).map { case (bytes, count) => + try { + val (bytes, count) = ByteVector.fromHexInternal(str, alphabet) val toDrop = if (count % 2 == 0) 0 else 4 - bytes.toBitVector.drop(toDrop.toLong) + Right(bytes.toBitVector.drop(toDrop.toLong)) + } catch { + case t: IllegalArgumentException => Left(t.getMessage) } /** Constructs a `BitVector` from a hexadecimal string or returns `None` if the string is not diff --git a/core/shared/src/main/scala/scodec/bits/ByteVector.scala b/core/shared/src/main/scala/scodec/bits/ByteVector.scala index 1059bc01..8c824efb 100644 --- a/core/shared/src/main/scala/scodec/bits/ByteVector.scala +++ b/core/shared/src/main/scala/scodec/bits/ByteVector.scala @@ -1732,52 +1732,66 @@ object ByteVector extends ByteVectorCompanionCrossPlatform { str: String, alphabet: Bases.HexAlphabet = Bases.Alphabets.HexLowercase ): Either[String, ByteVector] = - fromHexInternal(str, alphabet).map { case (res, _) => res } + try Right(fromHexInternal(str, alphabet)._1) + catch { + case t: IllegalArgumentException => Left(t.getMessage) + } private[bits] def fromHexInternal( str: String, alphabet: Bases.HexAlphabet - ): Either[String, (ByteVector, Int)] = { - val prefixed = (str.startsWith("0x")) || (str.startsWith("0X")) + ): (ByteVector, Int) = { + val prefixed = str.length >= 2 && str.charAt(0) == '0' && { + val second = str.charAt(1) + second == 'x' || second == 'X' + } val withoutPrefix = if (prefixed) str.substring(2) else str var idx, hi, count = 0 var midByte = false - var err: String = null - val bldr = ByteBuffer.allocate((str.size + 1) / 2) - while (idx < withoutPrefix.length && (err eq null)) { - val c = withoutPrefix(idx) - if (!alphabet.ignore(c)) - try { - val nibble = alphabet.toIndex(c) + val length = withoutPrefix.length + val out = new Array[Byte]((length + 1) / 2) + var j = 0 + val defaults = + (alphabet eq Bases.Alphabets.HexLowercase) || (alphabet eq Bases.Alphabets.HexUppercase) + try + while (idx < length) { + val c = withoutPrefix.charAt(idx) + val nibble = + if (defaults) { + Character.digit(c, 16) match { + case i if i >= 0 => i + case i if Character.isWhitespace(c) || c == '_' => -1 + case _ => throw new IllegalArgumentException + } + } else alphabet.toIndex(c) + if (nibble >= 0) { if (midByte) { - bldr.put((hi | nibble).toByte) + out(j) = (hi | nibble).toByte + j += 1 midByte = false } else { - hi = (nibble << 4).toByte.toInt + hi = nibble << 4 midByte = true } count += 1 - } catch { - case _: IllegalArgumentException => - err = s"Invalid hexadecimal character '$c' at index ${idx + (if (prefixed) 2 else 0)}" } - idx += 1 - } - if (err eq null) - Right( - ( - if (midByte) { - bldr.put(hi.toByte) - bldr.flip() - ByteVector(bldr).shiftRight(4, false) - } else { - bldr.flip() - ByteVector(bldr) - }, - count + idx += 1 + } + catch { + case _: IllegalArgumentException => + val c = withoutPrefix.charAt(idx) + throw new IllegalArgumentException( + s"Invalid hexadecimal character '$c' at index ${idx + (if (prefixed) 2 else 0)}" ) - ) - else Left(err) + } + val result = if (midByte) { + out(j) = hi.toByte + j += 1 + ByteVector.view(out).take(j).shiftRight(4, false) + } else { + ByteVector.view(out).take(j) + } + (result, count) } /** Constructs a `ByteVector` from a hexadecimal string or returns `None` if the string is not @@ -1789,7 +1803,11 @@ object ByteVector extends ByteVectorCompanionCrossPlatform { def fromHex( str: String, alphabet: Bases.HexAlphabet = Bases.Alphabets.HexLowercase - ): Option[ByteVector] = fromHexDescriptive(str, alphabet).toOption + ): Option[ByteVector] = + try Some(fromHexInternal(str, alphabet)._1) + catch { + case t: IllegalArgumentException => None + } /** Constructs a `ByteVector` from a hexadecimal string or throws an IllegalArgumentException if * the string is not valid hexadecimal. diff --git a/core/shared/src/test/scala/scodec/bits/ByteVectorTest.scala b/core/shared/src/test/scala/scodec/bits/ByteVectorTest.scala index c37c5281..9b1d8cb2 100644 --- a/core/shared/src/test/scala/scodec/bits/ByteVectorTest.scala +++ b/core/shared/src/test/scala/scodec/bits/ByteVectorTest.scala @@ -176,13 +176,15 @@ class ByteVectorTest extends BitsSuite { ByteVector.fromHexDescriptive("0xde_ad_be_e") == Right(ByteVector(0x0d, 0xea, 0xdb, 0xee)) ) - assert( + assertEquals( ByteVector - .fromHexDescriptive("garbage") == Left("Invalid hexadecimal character 'g' at index 0") + .fromHexDescriptive("garbage"), + Left("Invalid hexadecimal character 'g' at index 0") ) - assert( + assertEquals( ByteVector - .fromHexDescriptive("deadbefg") == Left("Invalid hexadecimal character 'g' at index 7") + .fromHexDescriptive("deadbefg"), + Left("Invalid hexadecimal character 'g' at index 7") ) }