Skip to content

Commit

Permalink
Improve performance of ByteVector.fromHex (#422)
Browse files Browse the repository at this point in the history
* Improve performance of ByteVector.fromHex

* wip

* wip

* Scalafmt

* Fix test

* Fix test

* Retore bincompat

* Inline default alphabet

* Fix headers

* Mima exclusion

* Mima exclusion

* Attempt to increase likelihood of inlining

* Scalafmt
  • Loading branch information
mpilquist authored Feb 26, 2023
1 parent b26ccce commit 24790a1
Show file tree
Hide file tree
Showing 7 changed files with 130 additions and 55 deletions.
57 changes: 57 additions & 0 deletions benchmark/src/main/scala/ByteVectorBenchmarkFromHex.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
/*
* Copyright (c) 2013, Scodec
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

package scodec.bits

import org.openjdk.jmh.annotations._

import java.util.concurrent.TimeUnit

@BenchmarkMode(Array(Mode.Throughput))
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@State(Scope.Benchmark)
class ByteVectorBenchmarkFromHex {
@Param(Array("1000"))
var size: Int = _

private var bvHex: String = _

@Setup(Level.Trial)
def setup(): Unit = {
val r = new scala.util.Random(size)
val bs = new Array[Byte](size)
r.nextBytes(bs)
bvHex = ByteVector.view(bs).toHex
}

@Benchmark
def fromHex: Option[ByteVector] =
ByteVector.fromHex(bvHex)
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,24 +37,21 @@ import java.util.concurrent.TimeUnit
@BenchmarkMode(Array(Mode.Throughput))
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@State(Scope.Benchmark)
class ByteVectorBenchmark {
class ByteVectorBenchmarkToHex {
@Param(Array("1000", "10000", "100000"))
var size: Int = _

private var bv: ByteVector = _

def bytes: Array[Byte] = {
@Setup(Level.Trial)
def setup(): Unit = {
val r = new scala.util.Random(size)
val bs = new Array[Byte](size * 3)
val bs = new Array[Byte](size)
r.nextBytes(bs)
bs
bv = ByteVector.view(bs)
}

@Setup(Level.Trial)
def setup(): Unit =
bv = ByteVector.view(bytes)

@Benchmark
def encodeHex: String =
def toHex: String =
bv.toHex
}
3 changes: 2 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ ThisBuild / mimaBinaryIssueFilters ++= Seq(
),
ProblemFilters.exclude[IncompatibleMethTypeProblem]("scodec.bits.HexDumpFormat.render"),
ProblemFilters.exclude[IncompatibleMethTypeProblem]("scodec.bits.HexDumpFormat.print"),
ProblemFilters.exclude[DirectMissingMethodProblem]("scodec.bits.HexDumpFormat.this")
ProblemFilters.exclude[DirectMissingMethodProblem]("scodec.bits.HexDumpFormat.this"),
ProblemFilters.exclude[IncompatibleResultTypeProblem]("scodec.bits.ByteVector.fromHexInternal")
)

lazy val root = tlCrossRootProject.aggregate(core, benchmark)
Expand Down
11 changes: 4 additions & 7 deletions core/shared/src/main/scala/scodec/bits/Bases.scala
Original file line number Diff line number Diff line change
Expand Up @@ -103,13 +103,10 @@ object Bases {
/** Abstract hex alphabet that supports `{0-9, A-F, a-f}` for looking up an index from a char.
*/
private[bits] abstract class LenientHex extends HexAlphabet {
def toIndex(c: Char) =
c match {
case c if c >= '0' && c <= '9' => c - '0'
case c if c >= 'a' && c <= 'f' => 10 + (c - 'a')
case c if c >= 'A' && c <= 'F' => 10 + (c - 'A')
case _ => throw new IllegalArgumentException
}
def toIndex(c: Char) = {
val i = Character.digit(c, 16)
if (i < 0) if (ignore(c)) -1 else throw new IllegalArgumentException else i
}
def ignore(c: Char) = c.isWhitespace || c == '_'
}

Expand Down
7 changes: 5 additions & 2 deletions core/shared/src/main/scala/scodec/bits/BitVector.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1545,9 +1545,12 @@ object BitVector extends BitVectorCompanionCrossPlatform {
str: String,
alphabet: Bases.HexAlphabet = Bases.Alphabets.HexLowercase
): Either[String, BitVector] =
ByteVector.fromHexInternal(str, alphabet).map { case (bytes, count) =>
try {
val (bytes, count) = ByteVector.fromHexInternal(str, alphabet)
val toDrop = if (count % 2 == 0) 0 else 4
bytes.toBitVector.drop(toDrop.toLong)
Right(bytes.toBitVector.drop(toDrop.toLong))
} catch {
case t: IllegalArgumentException => Left(t.getMessage)
}

/** Constructs a `BitVector` from a hexadecimal string or returns `None` if the string is not
Expand Down
82 changes: 50 additions & 32 deletions core/shared/src/main/scala/scodec/bits/ByteVector.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1732,52 +1732,66 @@ object ByteVector extends ByteVectorCompanionCrossPlatform {
str: String,
alphabet: Bases.HexAlphabet = Bases.Alphabets.HexLowercase
): Either[String, ByteVector] =
fromHexInternal(str, alphabet).map { case (res, _) => res }
try Right(fromHexInternal(str, alphabet)._1)
catch {
case t: IllegalArgumentException => Left(t.getMessage)
}

private[bits] def fromHexInternal(
str: String,
alphabet: Bases.HexAlphabet
): Either[String, (ByteVector, Int)] = {
val prefixed = (str.startsWith("0x")) || (str.startsWith("0X"))
): (ByteVector, Int) = {
val prefixed = str.length >= 2 && str.charAt(0) == '0' && {
val second = str.charAt(1)
second == 'x' || second == 'X'
}
val withoutPrefix = if (prefixed) str.substring(2) else str
var idx, hi, count = 0
var midByte = false
var err: String = null
val bldr = ByteBuffer.allocate((str.size + 1) / 2)
while (idx < withoutPrefix.length && (err eq null)) {
val c = withoutPrefix(idx)
if (!alphabet.ignore(c))
try {
val nibble = alphabet.toIndex(c)
val length = withoutPrefix.length
val out = new Array[Byte]((length + 1) / 2)
var j = 0
val defaults =
(alphabet eq Bases.Alphabets.HexLowercase) || (alphabet eq Bases.Alphabets.HexUppercase)
try
while (idx < length) {
val c = withoutPrefix.charAt(idx)
val nibble =
if (defaults) {
Character.digit(c, 16) match {
case i if i >= 0 => i
case i if Character.isWhitespace(c) || c == '_' => -1
case _ => throw new IllegalArgumentException
}
} else alphabet.toIndex(c)
if (nibble >= 0) {
if (midByte) {
bldr.put((hi | nibble).toByte)
out(j) = (hi | nibble).toByte
j += 1
midByte = false
} else {
hi = (nibble << 4).toByte.toInt
hi = nibble << 4
midByte = true
}
count += 1
} catch {
case _: IllegalArgumentException =>
err = s"Invalid hexadecimal character '$c' at index ${idx + (if (prefixed) 2 else 0)}"
}
idx += 1
}
if (err eq null)
Right(
(
if (midByte) {
bldr.put(hi.toByte)
bldr.flip()
ByteVector(bldr).shiftRight(4, false)
} else {
bldr.flip()
ByteVector(bldr)
},
count
idx += 1
}
catch {
case _: IllegalArgumentException =>
val c = withoutPrefix.charAt(idx)
throw new IllegalArgumentException(
s"Invalid hexadecimal character '$c' at index ${idx + (if (prefixed) 2 else 0)}"
)
)
else Left(err)
}
val result = if (midByte) {
out(j) = hi.toByte
j += 1
ByteVector.view(out).take(j).shiftRight(4, false)
} else {
ByteVector.view(out).take(j)
}
(result, count)
}

/** Constructs a `ByteVector` from a hexadecimal string or returns `None` if the string is not
Expand All @@ -1789,7 +1803,11 @@ object ByteVector extends ByteVectorCompanionCrossPlatform {
def fromHex(
str: String,
alphabet: Bases.HexAlphabet = Bases.Alphabets.HexLowercase
): Option[ByteVector] = fromHexDescriptive(str, alphabet).toOption
): Option[ByteVector] =
try Some(fromHexInternal(str, alphabet)._1)
catch {
case t: IllegalArgumentException => None
}

/** Constructs a `ByteVector` from a hexadecimal string or throws an IllegalArgumentException if
* the string is not valid hexadecimal.
Expand Down
10 changes: 6 additions & 4 deletions core/shared/src/test/scala/scodec/bits/ByteVectorTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -176,13 +176,15 @@ class ByteVectorTest extends BitsSuite {
ByteVector.fromHexDescriptive("0xde_ad_be_e") == Right(ByteVector(0x0d, 0xea, 0xdb, 0xee))
)

assert(
assertEquals(
ByteVector
.fromHexDescriptive("garbage") == Left("Invalid hexadecimal character 'g' at index 0")
.fromHexDescriptive("garbage"),
Left("Invalid hexadecimal character 'g' at index 0")
)
assert(
assertEquals(
ByteVector
.fromHexDescriptive("deadbefg") == Left("Invalid hexadecimal character 'g' at index 7")
.fromHexDescriptive("deadbefg"),
Left("Invalid hexadecimal character 'g' at index 7")
)
}

Expand Down

0 comments on commit 24790a1

Please sign in to comment.