Skip to content

Commit

Permalink
huff0: Speed up symbol counting (#887)
Browse files Browse the repository at this point in the history
The compiler inserts a nil check instruction into the first loop in
countSimple. Lift that out of the loop for some extra throughput:

	goos: linux
	goarch: amd64
	pkg: github.com/klauspost/compress/huff0
	cpu: Intel(R) Core(TM) i7-3770K CPU @ 3.50GHz
	                                         │  huff0/old   │           huff0/nilcheck            │
	                                         │     B/s      │     B/s       vs base               │
	Compress1XReuseNone/digits-8               438.9Mi ± 0%   467.7Mi ± 1%   +6.55% (p=0.002 n=6)
	Compress1XReuseNone/gettysburg-8           247.3Mi ± 2%   257.4Mi ± 1%   +4.08% (p=0.002 n=6)
	Compress1XReuseNone/twain-8                349.2Mi ± 1%   367.9Mi ± 1%   +5.35% (p=0.002 n=6)
	Compress1XReuseNone/low-ent.10k-8          474.3Mi ± 1%   488.4Mi ± 0%   +2.98% (p=0.002 n=6)
	Compress1XReuseNone/superlow-ent-10k-8     303.4Mi ± 1%   341.6Mi ± 0%  +12.57% (p=0.002 n=6)
	Compress1XReuseNone/crash2-8               10.89Mi ± 3%   10.90Mi ± 1%        ~ (p=0.794 n=6)
	Compress1XReuseNone/endzerobits-8          15.40Mi ± 5%   15.54Mi ± 0%   +0.87% (p=0.006 n=6)
	Compress1XReuseNone/endnonzero-8           7.548Mi ± 2%   7.539Mi ± 5%        ~ (p=0.855 n=6)
	Compress1XReuseNone/case1-8                14.29Mi ± 1%   14.39Mi ± 1%        ~ (p=0.082 n=6)
	Compress1XReuseNone/case2-8                11.89Mi ± 1%   11.92Mi ± 0%        ~ (p=0.121 n=6)
	Compress1XReuseNone/case3-8                12.96Mi ± 0%   12.92Mi ± 0%        ~ (p=0.201 n=6)
	Compress1XReuseNone/pngdata.001-8          289.2Mi ± 1%   318.2Mi ± 0%  +10.03% (p=0.002 n=6)
	Compress1XReuseNone/normcount2-8           34.46Mi ± 1%   34.30Mi ± 1%        ~ (p=0.485 n=6)
	Compress1XReuseAllow/digits-8              458.0Mi ± 1%   490.6Mi ± 1%   +7.11% (p=0.002 n=6)
	Compress1XReuseAllow/gettysburg-8          279.5Mi ± 3%   293.1Mi ± 1%   +4.85% (p=0.002 n=6)
	Compress1XReuseAllow/twain-8               348.2Mi ± 1%   367.2Mi ± 0%   +5.45% (p=0.002 n=6)
	Compress1XReuseAllow/low-ent.10k-8         478.0Mi ± 1%   490.4Mi ± 1%   +2.58% (p=0.002 n=6)
	Compress1XReuseAllow/superlow-ent-10k-8    307.2Mi ± 0%   345.6Mi ± 0%  +12.49% (p=0.002 n=6)
	Compress1XReuseAllow/crash2-8              16.92Mi ± 1%   17.09Mi ± 1%   +0.99% (p=0.006 n=6)
	Compress1XReuseAllow/endzerobits-8         16.75Mi ± 2%   16.84Mi ± 0%   +0.54% (p=0.002 n=6)
	Compress1XReuseAllow/endnonzero-8          12.58Mi ± 1%   12.65Mi ± 0%   +0.57% (p=0.002 n=6)
	Compress1XReuseAllow/case1-8               19.77Mi ± 1%   19.81Mi ± 1%        ~ (p=0.589 n=6)
	Compress1XReuseAllow/case2-8               16.96Mi ± 3%   16.58Mi ± 3%        ~ (p=0.288 n=6)
	Compress1XReuseAllow/case3-8               18.04Mi ± 2%   17.90Mi ± 2%        ~ (p=0.818 n=6)
	Compress1XReuseAllow/pngdata.001-8         291.6Mi ± 0%   322.0Mi ± 0%  +10.44% (p=0.002 n=6)
	Compress1XReuseAllow/normcount2-8          48.58Mi ± 1%   48.38Mi ± 1%        ~ (p=0.258 n=6)
	Compress1XReusePrefer/digits-8             460.6Mi ± 0%   493.0Mi ± 0%   +7.04% (p=0.002 n=6)
	Compress1XReusePrefer/gettysburg-8         412.8Mi ± 1%   436.7Mi ± 2%   +5.77% (p=0.002 n=6)
	Compress1XReusePrefer/twain-8              350.4Mi ± 0%   369.4Mi ± 0%   +5.41% (p=0.002 n=6)
	Compress1XReusePrefer/low-ent.10k-8        481.8Mi ± 0%   493.6Mi ± 0%   +2.44% (p=0.002 n=6)
	Compress1XReusePrefer/superlow-ent-10k-8   311.3Mi ± 1%   351.8Mi ± 0%  +12.99% (p=0.002 n=6)
	Compress1XReusePrefer/crash2-8             63.51Mi ± 1%   65.02Mi ± 1%   +2.38% (p=0.002 n=6)
	Compress1XReusePrefer/endzerobits-8        24.28Mi ± 0%   24.38Mi ± 0%   +0.43% (p=0.004 n=6)
	Compress1XReusePrefer/endnonzero-8         33.18Mi ± 0%   33.35Mi ± 0%   +0.49% (p=0.017 n=6)
	Compress1XReusePrefer/case1-8              148.9Mi ± 1%   165.1Mi ± 0%  +10.88% (p=0.002 n=6)
	Compress1XReusePrefer/case2-8              141.4Mi ± 0%   142.9Mi ± 0%   +1.07% (p=0.002 n=6)
	Compress1XReusePrefer/case3-8              152.1Mi ± 0%   154.3Mi ± 0%   +1.42% (p=0.002 n=6)
	Compress1XReusePrefer/pngdata.001-8        299.3Mi ± 1%   331.3Mi ± 0%  +10.70% (p=0.002 n=6)
	Compress1XReusePrefer/normcount2-8         210.7Mi ± 1%   215.1Mi ± 1%   +2.07% (p=0.002 n=6)
	Compress4XReuseNone/digits-8               457.9Mi ± 1%   490.0Mi ± 0%   +7.01% (p=0.002 n=6)
	Compress4XReuseNone/gettysburg-8           245.4Mi ± 0%   255.5Mi ± 0%   +4.11% (p=0.002 n=6)
	Compress4XReuseNone/twain-8                348.3Mi ± 0%   367.9Mi ± 0%   +5.63% (p=0.002 n=6)
	Compress4XReuseNone/low-ent.10k-8          475.1Mi ± 1%   487.0Mi ± 0%   +2.50% (p=0.002 n=6)
	Compress4XReuseNone/superlow-ent-10k-8     302.4Mi ± 0%   339.3Mi ± 3%  +12.19% (p=0.002 n=6)
	Compress4XReuseNone/case1-8                14.31Mi ± 0%   14.24Mi ± 1%        ~ (p=0.119 n=6)
	Compress4XReuseNone/case2-8                11.69Mi ± 1%   11.66Mi ± 1%        ~ (p=0.502 n=6)
	Compress4XReuseNone/case3-8                12.72Mi ± 0%   12.67Mi ± 1%        ~ (p=0.102 n=6)
	Compress4XReuseNone/pngdata.001-8          289.0Mi ± 1%   317.7Mi ± 0%   +9.92% (p=0.002 n=6)
	Compress4XReuseNone/normcount2-8           33.35Mi ± 1%   33.45Mi ± 3%        ~ (p=0.909 n=6)
	Compress4XReuseAllow/digits-8              458.1Mi ± 2%   491.1Mi ± 0%   +7.21% (p=0.002 n=6)
	Compress4XReuseAllow/gettysburg-8          281.0Mi ± 1%   292.5Mi ± 0%   +4.09% (p=0.002 n=6)
	Compress4XReuseAllow/twain-8               348.8Mi ± 0%   368.4Mi ± 1%   +5.63% (p=0.002 n=6)
	Compress4XReuseAllow/low-ent.10k-8         477.3Mi ± 0%   488.7Mi ± 2%        ~ (p=0.065 n=6)
	Compress4XReuseAllow/superlow-ent-10k-8    305.8Mi ± 0%   344.4Mi ± 0%  +12.63% (p=0.002 n=6)
	Compress4XReuseAllow/case1-8               19.34Mi ± 1%   19.54Mi ± 2%   +1.01% (p=0.039 n=6)
	Compress4XReuseAllow/case2-8               16.57Mi ± 0%   15.89Mi ± 5%        ~ (p=0.061 n=6)
	Compress4XReuseAllow/case3-8               17.68Mi ± 0%   17.17Mi ± 8%        ~ (p=0.061 n=6)
	Compress4XReuseAllow/pngdata.001-8         291.2Mi ± 0%   319.6Mi ± 1%   +9.75% (p=0.002 n=6)
	Compress4XReuseAllow/normcount2-8          47.46Mi ± 1%   47.57Mi ± 1%        ~ (p=1.000 n=6)
	Compress4XReusePrefer/digits-8             460.0Mi ± 0%   492.9Mi ± 0%   +7.14% (p=0.002 n=6)
	Compress4XReusePrefer/gettysburg-8         408.3Mi ± 1%   432.6Mi ± 0%   +5.95% (p=0.002 n=6)
	Compress4XReusePrefer/twain-8              350.0Mi ± 0%   370.0Mi ± 1%   +5.70% (p=0.002 n=6)
	Compress4XReusePrefer/low-ent.10k-8        481.1Mi ± 0%   492.7Mi ± 0%   +2.41% (p=0.002 n=6)
	Compress4XReusePrefer/superlow-ent-10k-8   309.3Mi ± 1%   351.0Mi ± 0%  +13.50% (p=0.002 n=6)
	Compress4XReusePrefer/case1-8              130.5Mi ± 0%   140.2Mi ± 1%   +7.44% (p=0.002 n=6)
	Compress4XReusePrefer/case2-8              120.0Mi ± 0%   120.8Mi ± 1%   +0.69% (p=0.004 n=6)
	Compress4XReusePrefer/case3-8              126.3Mi ± 2%   129.6Mi ± 0%   +2.64% (p=0.002 n=6)
	Compress4XReusePrefer/pngdata.001-8        300.2Mi ± 1%   330.6Mi ± 0%  +10.13% (p=0.002 n=6)
	Compress4XReusePrefer/normcount2-8         183.7Mi ± 1%   187.2Mi ± 1%   +1.88% (p=0.009 n=6)
	geomean                                    111.6Mi        116.1Mi        +3.99%
  • Loading branch information
greatroar authored Nov 24, 2023
1 parent 80ba129 commit 8f7526c
Showing 1 changed file with 1 addition and 0 deletions.
1 change: 1 addition & 0 deletions huff0/compress.go
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,7 @@ func (s *Scratch) compress4Xp(src []byte) ([]byte, error) {
// Does not update s.clearCount.
func (s *Scratch) countSimple(in []byte) (max int, reuse bool) {
reuse = true
_ = s.count // Assert that s != nil to speed up the following loop.
for _, v := range in {
s.count[v]++
}
Expand Down

0 comments on commit 8f7526c

Please sign in to comment.