Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
chore: add new compat entry for StableRNGs at version 1 for package d…
…ocs, (keep existing compat) (#881) Co-authored-by: CompatHelper Julia <compathelper_noreply@julialang.org>
- Loading branch information
cf99f8b
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Lux Benchmarks
Dense(512 => 512, identity)(512 x 128)/forward/CPU/2 thread(s)
409792
ns412833
ns0.99
Dense(512 => 512, identity)(512 x 128)/forward/CPU/4 thread(s)
322250
ns324917
ns0.99
Dense(512 => 512, identity)(512 x 128)/forward/CPU/8 thread(s)
243583
ns322791
ns0.75
Dense(512 => 512, identity)(512 x 128)/forward/CPU/1 thread(s)
739625
ns741270.5
ns1.00
Dense(512 => 512, identity)(512 x 128)/forward/GPU/CUDA
44053
ns44918
ns0.98
Dense(512 => 512, identity)(512 x 128)/zygote/CPU/2 thread(s)
1353834
ns1358250
ns1.00
Dense(512 => 512, identity)(512 x 128)/zygote/CPU/4 thread(s)
2426458
ns2444062.5
ns0.99
Dense(512 => 512, identity)(512 x 128)/zygote/CPU/8 thread(s)
16512459
ns14162791
ns1.17
Dense(512 => 512, identity)(512 x 128)/zygote/CPU/1 thread(s)
2191083.5
ns2277500
ns0.96
Dense(512 => 512, identity)(512 x 128)/zygote/GPU/CUDA
209370
ns212604
ns0.98
Dense(512 => 512, identity)(512 x 128)/enzyme/CPU/2 thread(s)
1454375
ns1450562.5
ns1.00
Dense(512 => 512, identity)(512 x 128)/enzyme/CPU/4 thread(s)
908458
ns960958.5
ns0.95
Dense(512 => 512, identity)(512 x 128)/enzyme/CPU/8 thread(s)
1834875
ns1778125
ns1.03
Dense(512 => 512, identity)(512 x 128)/enzyme/CPU/1 thread(s)
2240458.5
ns2274000
ns0.99
Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/forward/CPU/2 thread(s)
1748562.5
ns1767833.5
ns0.99
Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/forward/CPU/4 thread(s)
1089395.5
ns1083978.5
ns1.00
Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/forward/CPU/8 thread(s)
1512729
ns1529021
ns0.99
Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/forward/CPU/1 thread(s)
3013750
ns2954750
ns1.02
Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/forward/GPU/CUDA
208817.5
ns209644
ns1.00
Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/zygote/CPU/2 thread(s)
12152041.5
ns12148854.5
ns1.00
Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/zygote/CPU/4 thread(s)
8814875
ns8834958.5
ns1.00
Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/zygote/CPU/8 thread(s)
9198917
ns9230875
ns1.00
Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/zygote/CPU/1 thread(s)
18613479
ns18631937.5
ns1.00
Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/zygote/GPU/CUDA
1488013.5
ns1509941
ns0.99
Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/enzyme/CPU/2 thread(s)
17304750
ns17314333
ns1.00
Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/enzyme/CPU/4 thread(s)
13952770.5
ns13961542
ns1.00
Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/enzyme/CPU/8 thread(s)
14533958
ns14514291
ns1.00
Conv((3, 3), 2 => 2, identity)(64 x 64 x 2 x 128)/enzyme/CPU/1 thread(s)
21843833.5
ns21865437.5
ns1.00
Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/forward/CPU/2 thread(s)
250399541.5
ns249016958.5
ns1.01
Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/forward/CPU/4 thread(s)
148350083
ns148521291
ns1.00
Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/forward/CPU/8 thread(s)
117130083
ns116073791
ns1.01
Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/forward/CPU/1 thread(s)
450838083
ns447568292
ns1.01
Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/forward/GPU/CUDA
5478039
ns5499808
ns1.00
Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/zygote/CPU/2 thread(s)
1223340875
ns1227795916
ns1.00
Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/zygote/CPU/4 thread(s)
931640292
ns931180042
ns1.00
Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/zygote/CPU/8 thread(s)
831594354.5
ns831332521
ns1.00
Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/zygote/CPU/1 thread(s)
1647325416
ns1629694167
ns1.01
Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/zygote/GPU/CUDA
31506744.5
ns31376705.5
ns1.00
Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/enzyme/CPU/2 thread(s)
1144335875
ns1167771625
ns0.98
Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/enzyme/CPU/4 thread(s)
995382583.5
ns1003953563
ns0.99
Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/enzyme/CPU/8 thread(s)
1322398292
ns1322017146
ns1.00
Conv((3, 3), 64 => 64, relu)(64 x 64 x 64 x 128)/enzyme/CPU/1 thread(s)
1739450208
ns1730835103.5
ns1.00
lenet(28, 28, 1, 32)/forward/CPU/2 thread(s)
1068417
ns1100791
ns0.97
lenet(28, 28, 1, 32)/forward/CPU/4 thread(s)
1603458.5
ns1624625
ns0.99
lenet(28, 28, 1, 32)/forward/CPU/8 thread(s)
3760063
ns3431229
ns1.10
lenet(28, 28, 1, 32)/forward/CPU/1 thread(s)
782062
ns781521
ns1.00
lenet(28, 28, 1, 32)/forward/GPU/CUDA
261189.5
ns272287.5
ns0.96
lenet(28, 28, 1, 32)/zygote/CPU/2 thread(s)
3001979
ns3015146
ns1.00
lenet(28, 28, 1, 32)/zygote/CPU/4 thread(s)
4127958
ns4087333.5
ns1.01
lenet(28, 28, 1, 32)/zygote/CPU/8 thread(s)
10894833
ns10933000
ns1.00
lenet(28, 28, 1, 32)/zygote/CPU/1 thread(s)
3233270.5
ns3238167
ns1.00
lenet(28, 28, 1, 32)/zygote/GPU/CUDA
1128601
ns1132885
ns1.00
Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/forward/CPU/2 thread(s)
2312312.5
ns2306750
ns1.00
Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/forward/CPU/4 thread(s)
1427541.5
ns1433208.5
ns1.00
Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/forward/CPU/8 thread(s)
1552396
ns1678625.5
ns0.92
Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/forward/CPU/1 thread(s)
4205417
ns4201375
ns1.00
Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/forward/GPU/CUDA
207575
ns209995
ns0.99
Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/zygote/CPU/2 thread(s)
19386792
ns19417729
ns1.00
Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/zygote/CPU/4 thread(s)
16057458
ns16114625
ns1.00
Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/zygote/CPU/8 thread(s)
17256291
ns17220375
ns1.00
Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/zygote/CPU/1 thread(s)
25860208
ns25992250
ns0.99
Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/zygote/GPU/CUDA
1590086
ns1600144
ns0.99
Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/enzyme/CPU/2 thread(s)
34375666
ns34149500
ns1.01
Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/enzyme/CPU/4 thread(s)
30899458.5
ns30894937.5
ns1.00
Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/enzyme/CPU/8 thread(s)
31158000
ns31140666
ns1.00
Conv((3, 3), 2 => 2, gelu)(64 x 64 x 2 x 128)/enzyme/CPU/1 thread(s)
36246917
ns36754250
ns0.99
Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/forward/CPU/2 thread(s)
4546167
ns4526959
ns1.00
Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/forward/CPU/4 thread(s)
2772584
ns2746459
ns1.01
Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/forward/CPU/8 thread(s)
2682438
ns2911584
ns0.92
Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/forward/CPU/1 thread(s)
8378667
ns8399583
ns1.00
Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/forward/GPU/CUDA
420456
ns373956
ns1.12
Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/zygote/CPU/2 thread(s)
38885979.5
ns38745459
ns1.00
Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/zygote/CPU/4 thread(s)
32074313
ns32111709
ns1.00
Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/zygote/CPU/8 thread(s)
32239667
ns32268625
ns1.00
Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/zygote/CPU/1 thread(s)
51823708
ns52066792
ns1.00
Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/zygote/GPU/CUDA
2618884
ns2635152.5
ns0.99
Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/enzyme/CPU/2 thread(s)
82643500
ns88780729
ns0.93
Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/enzyme/CPU/4 thread(s)
112560458
ns84997250
ns1.32
Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/enzyme/CPU/8 thread(s)
185039874.5
ns218329542
ns0.85
Conv((3, 3), 4 => 4, gelu)(64 x 64 x 4 x 128)/enzyme/CPU/1 thread(s)
73747708
ns74358917
ns0.99
Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/forward/CPU/2 thread(s)
268204791.5
ns267246875
ns1.00
Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/forward/CPU/4 thread(s)
159374708
ns158965875
ns1.00
Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/forward/CPU/8 thread(s)
123950416.5
ns126688521
ns0.98
Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/forward/CPU/1 thread(s)
485039833
ns485596792
ns1.00
Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/forward/GPU/CUDA
7043693
ns7022210
ns1.00
Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/zygote/CPU/2 thread(s)
1468109979
ns1468898146
ns1.00
Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/zygote/CPU/4 thread(s)
1174089583
ns1171204459
ns1.00
Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/zygote/CPU/8 thread(s)
1065212458.5
ns1068921333.5
ns1.00
Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/zygote/CPU/1 thread(s)
2013851104.5
ns2001229479
ns1.01
Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/zygote/GPU/CUDA
34531403
ns34725068.5
ns0.99
Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/enzyme/CPU/2 thread(s)
1695591000
ns1692415625
ns1.00
Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/enzyme/CPU/4 thread(s)
1493306146
ns1500720958.5
ns1.00
Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/enzyme/CPU/8 thread(s)
1801755584
ns1766379833
ns1.02
Conv((3, 3), 64 => 64, gelu)(64 x 64 x 64 x 128)/enzyme/CPU/1 thread(s)
2201440812.5
ns2224153125
ns0.99
lenet(28, 28, 1, 128)/forward/CPU/2 thread(s)
1806792
ns1760875
ns1.03
lenet(28, 28, 1, 128)/forward/CPU/4 thread(s)
2531562
ns2595167
ns0.98
lenet(28, 28, 1, 128)/forward/CPU/8 thread(s)
7672666
ns7433916.5
ns1.03
lenet(28, 28, 1, 128)/forward/CPU/1 thread(s)
2462833
ns2426041.5
ns1.02
lenet(28, 28, 1, 128)/forward/GPU/CUDA
266951
ns273792
ns0.98
lenet(28, 28, 1, 128)/zygote/CPU/2 thread(s)
9343333
ns9254417
ns1.01
lenet(28, 28, 1, 128)/zygote/CPU/4 thread(s)
11495750
ns11474333
ns1.00
lenet(28, 28, 1, 128)/zygote/CPU/8 thread(s)
26058854.5
ns25126166
ns1.04
lenet(28, 28, 1, 128)/zygote/CPU/1 thread(s)
11770625
ns11780750
ns1.00
lenet(28, 28, 1, 128)/zygote/GPU/CUDA
1165407
ns1194908
ns0.98
vgg16(32, 32, 3, 32)/forward/CPU/2 thread(s)
379821291
ns381207125
ns1.00
vgg16(32, 32, 3, 32)/forward/CPU/4 thread(s)
284431333.5
ns285815709
ns1.00
vgg16(32, 32, 3, 32)/forward/CPU/8 thread(s)
276993833.5
ns233745708
ns1.19
vgg16(32, 32, 3, 32)/forward/CPU/1 thread(s)
453499125
ns453344667
ns1.00
vgg16(32, 32, 3, 32)/forward/GPU/CUDA
4933427
ns4852271
ns1.02
vgg16(32, 32, 3, 32)/zygote/CPU/2 thread(s)
1154735042
ns1157427583
ns1.00
vgg16(32, 32, 3, 32)/zygote/CPU/4 thread(s)
934566458
ns931406250
ns1.00
vgg16(32, 32, 3, 32)/zygote/CPU/8 thread(s)
1022641417
ns929761209
ns1.10
vgg16(32, 32, 3, 32)/zygote/CPU/1 thread(s)
1392634541
ns1403593291
ns0.99
vgg16(32, 32, 3, 32)/zygote/GPU/CUDA
18839648
ns19807136
ns0.95
lenet(28, 28, 1, 64)/forward/CPU/2 thread(s)
1047667
ns1051042
ns1.00
lenet(28, 28, 1, 64)/forward/CPU/4 thread(s)
1906208
ns1930834
ns0.99
lenet(28, 28, 1, 64)/forward/CPU/8 thread(s)
6506020.5
ns4821271
ns1.35
lenet(28, 28, 1, 64)/forward/CPU/1 thread(s)
1385270.5
ns1297541
ns1.07
lenet(28, 28, 1, 64)/forward/GPU/CUDA
268224
ns269906
ns0.99
lenet(28, 28, 1, 64)/zygote/CPU/2 thread(s)
6461437
ns6495729
ns0.99
lenet(28, 28, 1, 64)/zygote/CPU/4 thread(s)
13802959
ns12306583.5
ns1.12
lenet(28, 28, 1, 64)/zygote/CPU/8 thread(s)
21722625
ns18165416.5
ns1.20
lenet(28, 28, 1, 64)/zygote/CPU/1 thread(s)
6091083
ns6025750
ns1.01
lenet(28, 28, 1, 64)/zygote/GPU/CUDA
1208321
ns1207681.5
ns1.00
Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/forward/CPU/2 thread(s)
70468396
ns70586437.5
ns1.00
Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/forward/CPU/4 thread(s)
43613625
ns43556333.5
ns1.00
Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/forward/CPU/8 thread(s)
39889875
ns39526083
ns1.01
Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/forward/CPU/1 thread(s)
132854895.5
ns132710667
ns1.00
Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/forward/GPU/CUDA
1872456
ns1944845
ns0.96
Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/zygote/CPU/2 thread(s)
355307875
ns356816354
ns1.00
Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/zygote/CPU/4 thread(s)
270273125
ns270253083
ns1.00
Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/zygote/CPU/8 thread(s)
254197770.5
ns254146791.5
ns1.00
Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/zygote/CPU/1 thread(s)
534390229.5
ns534914958.5
ns1.00
Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/zygote/GPU/CUDA
12309296.5
ns12308008
ns1.00
Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/enzyme/CPU/2 thread(s)
395284167
ns396010084
ns1.00
Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/enzyme/CPU/4 thread(s)
394804354.5
ns407805500
ns0.97
Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/enzyme/CPU/8 thread(s)
701196333.5
ns706921292
ns0.99
Conv((3, 3), 32 => 32, identity)(64 x 64 x 32 x 128)/enzyme/CPU/1 thread(s)
711179875
ns711811750
ns1.00
vgg16(32, 32, 3, 128)/forward/CPU/2 thread(s)
1186639833
ns1187507791
ns1.00
vgg16(32, 32, 3, 128)/forward/CPU/4 thread(s)
689274542
ns764568937.5
ns0.90
vgg16(32, 32, 3, 128)/forward/CPU/8 thread(s)
640237249.5
ns631341166
ns1.01
vgg16(32, 32, 3, 128)/forward/CPU/1 thread(s)
1775678646
ns1772828250
ns1.00
vgg16(32, 32, 3, 128)/forward/GPU/CUDA
12314528
ns12544942.5
ns0.98
vgg16(32, 32, 3, 128)/zygote/CPU/2 thread(s)
3680556646
ns3767262229
ns0.98
vgg16(32, 32, 3, 128)/zygote/CPU/4 thread(s)
2857162417
ns2869944333
ns1.00
vgg16(32, 32, 3, 128)/zygote/CPU/8 thread(s)
2854405625
ns2705287250
ns1.06
vgg16(32, 32, 3, 128)/zygote/CPU/1 thread(s)
5145784083
ns5058993459
ns1.02
vgg16(32, 32, 3, 128)/zygote/GPU/CUDA
49808957
ns49891272
ns1.00
Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/forward/CPU/2 thread(s)
3409479
ns3429042
ns0.99
Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/forward/CPU/4 thread(s)
2065084
ns2081583
ns0.99
Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/forward/CPU/8 thread(s)
2479917
ns2543583
ns0.97
Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/forward/CPU/1 thread(s)
6015479
ns6024375
ns1.00
Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/forward/GPU/CUDA
341120
ns338827
ns1.01
Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/zygote/CPU/2 thread(s)
25925021
ns26104562.5
ns0.99
Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/zygote/CPU/4 thread(s)
18915667
ns19078958.5
ns0.99
Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/zygote/CPU/8 thread(s)
19134125.5
ns19625020.5
ns0.97
Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/zygote/CPU/1 thread(s)
39216437.5
ns39317959
ns1.00
Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/zygote/GPU/CUDA
2468869
ns2462668
ns1.00
Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/enzyme/CPU/2 thread(s)
55378250
ns54777416
ns1.01
Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/enzyme/CPU/4 thread(s)
81111916
ns80697167
ns1.01
Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/enzyme/CPU/8 thread(s)
174313958.5
ns170440292
ns1.02
Conv((3, 3), 4 => 4, relu)(64 x 64 x 4 x 128)/enzyme/CPU/1 thread(s)
45500125
ns45420250
ns1.00
Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/forward/CPU/2 thread(s)
1779417
ns1787458
ns1.00
Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/forward/CPU/4 thread(s)
1092250
ns1101875
ns0.99
Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/forward/CPU/8 thread(s)
1547583
ns1569708
ns0.99
Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/forward/CPU/1 thread(s)
3037625
ns3035500
ns1.00
Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/forward/GPU/CUDA
212275
ns215425
ns0.99
Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/zygote/CPU/2 thread(s)
12533437.5
ns12537208
ns1.00
Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/zygote/CPU/4 thread(s)
9199000
ns9283500
ns0.99
Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/zygote/CPU/8 thread(s)
9578167
ns9641937.5
ns0.99
Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/zygote/CPU/1 thread(s)
18975812.5
ns18984166.5
ns1.00
Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/zygote/GPU/CUDA
1533549
ns1531405
ns1.00
Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/enzyme/CPU/2 thread(s)
17619125
ns17668583
ns1.00
Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/enzyme/CPU/4 thread(s)
14239459
ns14332291.5
ns0.99
Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/enzyme/CPU/8 thread(s)
14500521
ns14569250
ns1.00
Conv((3, 3), 2 => 2, relu)(64 x 64 x 2 x 128)/enzyme/CPU/1 thread(s)
22180250
ns22181083.5
ns1.00
Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/forward/CPU/2 thread(s)
70496583.5
ns70579000.5
ns1.00
Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/forward/CPU/4 thread(s)
43594834
ns43509167
ns1.00
Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/forward/CPU/8 thread(s)
39807625
ns39545292
ns1.01
Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/forward/CPU/1 thread(s)
132718979
ns132823604.5
ns1.00
Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/forward/GPU/CUDA
1947710
ns1947535
ns1.00
Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/zygote/CPU/2 thread(s)
360073791
ns361581166
ns1.00
Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/zygote/CPU/4 thread(s)
345868042
ns345861541.5
ns1.00
Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/zygote/CPU/8 thread(s)
302741792
ns303584333
ns1.00
Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/zygote/CPU/1 thread(s)
725319167
ns724116959
ns1.00
Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/zygote/GPU/CUDA
13371028
ns13351785.5
ns1.00
Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/enzyme/CPU/2 thread(s)
419555417
ns419705187.5
ns1.00
Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/enzyme/CPU/4 thread(s)
418148437.5
ns420514459
ns0.99
Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/enzyme/CPU/8 thread(s)
710077458.5
ns697427687
ns1.02
Conv((3, 3), 32 => 32, relu)(64 x 64 x 32 x 128)/enzyme/CPU/1 thread(s)
715636334
ns717027625
ns1.00
mlp7layer_bn(gelu)(32 x 256)/forward/CPU/2 thread(s)
1661042
ns1700896
ns0.98
mlp7layer_bn(gelu)(32 x 256)/forward/CPU/4 thread(s)
1277792
ns1344562.5
ns0.95
mlp7layer_bn(gelu)(32 x 256)/forward/CPU/8 thread(s)
1134813
ns1353750
ns0.84
mlp7layer_bn(gelu)(32 x 256)/forward/CPU/1 thread(s)
2433292
ns2400417
ns1.01
mlp7layer_bn(gelu)(32 x 256)/forward/GPU/CUDA
584506.5
ns590707
ns0.99
mlp7layer_bn(gelu)(32 x 256)/zygote/CPU/2 thread(s)
9020542
ns8924250
ns1.01
mlp7layer_bn(gelu)(32 x 256)/zygote/CPU/4 thread(s)
12869000
ns12992208
ns0.99
mlp7layer_bn(gelu)(32 x 256)/zygote/CPU/8 thread(s)
32651417
ns30772062.5
ns1.06
mlp7layer_bn(gelu)(32 x 256)/zygote/CPU/1 thread(s)
9805792
ns9884229.5
ns0.99
mlp7layer_bn(gelu)(32 x 256)/zygote/GPU/CUDA
1428291
ns1479651
ns0.97
mlp7layer_bn(gelu)(32 x 256)/enzyme/CPU/2 thread(s)
18111583
ns17441145.5
ns1.04
mlp7layer_bn(gelu)(32 x 256)/enzyme/CPU/4 thread(s)
17253354
ns16807333
ns1.03
mlp7layer_bn(gelu)(32 x 256)/enzyme/CPU/8 thread(s)
26535354
ns30461791.5
ns0.87
mlp7layer_bn(gelu)(32 x 256)/enzyme/CPU/1 thread(s)
14356792
ns14317375
ns1.00
Dense(512 => 512, relu)(512 x 128)/forward/CPU/2 thread(s)
710208
ns789375
ns0.90
Dense(512 => 512, relu)(512 x 128)/forward/CPU/4 thread(s)
599312.5
ns595083.5
ns1.01
Dense(512 => 512, relu)(512 x 128)/forward/CPU/8 thread(s)
912395.5
ns1038125
ns0.88
Dense(512 => 512, relu)(512 x 128)/forward/CPU/1 thread(s)
725791
ns725167
ns1.00
Dense(512 => 512, relu)(512 x 128)/forward/GPU/CUDA
47816
ns48555.5
ns0.98
Dense(512 => 512, relu)(512 x 128)/zygote/CPU/2 thread(s)
1582187.5
ns1507084
ns1.05
Dense(512 => 512, relu)(512 x 128)/zygote/CPU/4 thread(s)
973833
ns1043292
ns0.93
Dense(512 => 512, relu)(512 x 128)/zygote/CPU/8 thread(s)
1835187.5
ns1413583
ns1.30
Dense(512 => 512, relu)(512 x 128)/zygote/CPU/1 thread(s)
2183125
ns2256583
ns0.97
Dense(512 => 512, relu)(512 x 128)/zygote/GPU/CUDA
236731.5
ns241345.5
ns0.98
Dense(512 => 512, relu)(512 x 128)/enzyme/CPU/2 thread(s)
1600083
ns1541063
ns1.04
Dense(512 => 512, relu)(512 x 128)/enzyme/CPU/4 thread(s)
1053041.5
ns1073583.5
ns0.98
Dense(512 => 512, relu)(512 x 128)/enzyme/CPU/8 thread(s)
1388771
ns1495667
ns0.93
Dense(512 => 512, relu)(512 x 128)/enzyme/CPU/1 thread(s)
2256062
ns2216500
ns1.02
Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/forward/CPU/2 thread(s)
3409541.5
ns3407458.5
ns1.00
Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/forward/CPU/4 thread(s)
2060229
ns2060208
ns1.00
Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/forward/CPU/8 thread(s)
2482875
ns2504792
ns0.99
Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/forward/CPU/1 thread(s)
5998167
ns6019500
ns1.00
Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/forward/GPU/CUDA
286197
ns283414
ns1.01
Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/zygote/CPU/2 thread(s)
24038625
ns24068584
ns1.00
Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/zygote/CPU/4 thread(s)
17258666.5
ns17256458.5
ns1.00
Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/zygote/CPU/8 thread(s)
17123396
ns17166250
ns1.00
Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/zygote/CPU/1 thread(s)
37487104
ns37584937.5
ns1.00
Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/zygote/GPU/CUDA
2409477.5
ns2397302
ns1.01
Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/enzyme/CPU/2 thread(s)
54679729
ns52933521
ns1.03
Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/enzyme/CPU/4 thread(s)
84538542
ns83805875
ns1.01
Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/enzyme/CPU/8 thread(s)
157339000
ns168151312.5
ns0.94
Conv((3, 3), 4 => 4, identity)(64 x 64 x 4 x 128)/enzyme/CPU/1 thread(s)
44498708
ns44568645.5
ns1.00
Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/forward/CPU/2 thread(s)
250028813
ns250376958
ns1.00
Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/forward/CPU/4 thread(s)
147930708
ns148122999.5
ns1.00
Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/forward/CPU/8 thread(s)
116617291
ns115699917
ns1.01
Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/forward/CPU/1 thread(s)
454228375
ns448012646
ns1.01
Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/forward/GPU/CUDA
5443404
ns5442645
ns1.00
Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/zygote/CPU/2 thread(s)
1101896208
ns1105356584
ns1.00
Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/zygote/CPU/4 thread(s)
855324125.5
ns854303812.5
ns1.00
Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/zygote/CPU/8 thread(s)
839930250.5
ns826724000
ns1.02
Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/zygote/CPU/1 thread(s)
1774005666
ns1752988167
ns1.01
Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/zygote/GPU/CUDA
29278014
ns28762466
ns1.02
Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/enzyme/CPU/2 thread(s)
1013677520.5
ns1031896104
ns0.98
Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/enzyme/CPU/4 thread(s)
922761000
ns962579167
ns0.96
Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/enzyme/CPU/8 thread(s)
1320593542
ns1179808792
ns1.12
Conv((3, 3), 64 => 64, identity)(64 x 64 x 64 x 128)/enzyme/CPU/1 thread(s)
1744904771
ns1752419187.5
ns1.00
mlp7layer_bn(relu)(32 x 256)/forward/CPU/2 thread(s)
1230812.5
ns1246312
ns0.99
mlp7layer_bn(relu)(32 x 256)/forward/CPU/4 thread(s)
967417
ns981667
ns0.99
mlp7layer_bn(relu)(32 x 256)/forward/CPU/8 thread(s)
669125
ns924938
ns0.72
mlp7layer_bn(relu)(32 x 256)/forward/CPU/1 thread(s)
2028541
ns1952875
ns1.04
mlp7layer_bn(relu)(32 x 256)/forward/GPU/CUDA
558507.5
ns559173.5
ns1.00
mlp7layer_bn(relu)(32 x 256)/zygote/CPU/2 thread(s)
6006292
ns5968250
ns1.01
mlp7layer_bn(relu)(32 x 256)/zygote/CPU/4 thread(s)
6899417
ns6725083
ns1.03
mlp7layer_bn(relu)(32 x 256)/zygote/CPU/8 thread(s)
25958937
ns24147709
ns1.08
mlp7layer_bn(relu)(32 x 256)/zygote/CPU/1 thread(s)
7102312
ns7125208
ns1.00
mlp7layer_bn(relu)(32 x 256)/zygote/GPU/CUDA
1368625
ns1363102
ns1.00
mlp7layer_bn(relu)(32 x 256)/enzyme/CPU/2 thread(s)
10886750
ns10592083.5
ns1.03
mlp7layer_bn(relu)(32 x 256)/enzyme/CPU/4 thread(s)
9389042
ns9872770.5
ns0.95
mlp7layer_bn(relu)(32 x 256)/enzyme/CPU/8 thread(s)
17293854.5
ns16891792
ns1.02
mlp7layer_bn(relu)(32 x 256)/enzyme/CPU/1 thread(s)
7443459
ns8542250.5
ns0.87
Dense(128 => 128, gelu)(128 x 128)/forward/CPU/2 thread(s)
352104
ns490083
ns0.72
Dense(128 => 128, gelu)(128 x 128)/forward/CPU/4 thread(s)
409416.5
ns414250
ns0.99
Dense(128 => 128, gelu)(128 x 128)/forward/CPU/8 thread(s)
3455917
ns1848916.5
ns1.87
Dense(128 => 128, gelu)(128 x 128)/forward/CPU/1 thread(s)
88750
ns89417
ns0.99
Dense(128 => 128, gelu)(128 x 128)/forward/GPU/CUDA
27682
ns27713
ns1.00
Dense(128 => 128, gelu)(128 x 128)/zygote/CPU/2 thread(s)
392604
ns381875
ns1.03
Dense(128 => 128, gelu)(128 x 128)/zygote/CPU/4 thread(s)
399000
ns447500
ns0.89
Dense(128 => 128, gelu)(128 x 128)/zygote/CPU/8 thread(s)
4557125
ns4415146
ns1.03
Dense(128 => 128, gelu)(128 x 128)/zygote/CPU/1 thread(s)
258875
ns259083.5
ns1.00
Dense(128 => 128, gelu)(128 x 128)/zygote/GPU/CUDA
221053
ns221456.5
ns1.00
Dense(128 => 128, gelu)(128 x 128)/enzyme/CPU/2 thread(s)
422125
ns412875
ns1.02
Dense(128 => 128, gelu)(128 x 128)/enzyme/CPU/4 thread(s)
429208
ns474250
ns0.91
Dense(128 => 128, gelu)(128 x 128)/enzyme/CPU/8 thread(s)
4755354
ns4220333
ns1.13
Dense(128 => 128, gelu)(128 x 128)/enzyme/CPU/1 thread(s)
270916
ns271166
ns1.00
Dense(128 => 128, relu)(128 x 128)/forward/CPU/2 thread(s)
305104
ns434854
ns0.70
Dense(128 => 128, relu)(128 x 128)/forward/CPU/4 thread(s)
348458
ns353250
ns0.99
Dense(128 => 128, relu)(128 x 128)/forward/CPU/8 thread(s)
635625
ns650792
ns0.98
Dense(128 => 128, relu)(128 x 128)/forward/CPU/1 thread(s)
54250
ns54375
ns1.00
Dense(128 => 128, relu)(128 x 128)/forward/GPU/CUDA
27950
ns27922
ns1.00
Dense(128 => 128, relu)(128 x 128)/zygote/CPU/2 thread(s)
355958
ns339896.5
ns1.05
Dense(128 => 128, relu)(128 x 128)/zygote/CPU/4 thread(s)
274500
ns340500
ns0.81
Dense(128 => 128, relu)(128 x 128)/zygote/CPU/8 thread(s)
753208.5
ns611187.5
ns1.23
Dense(128 => 128, relu)(128 x 128)/zygote/CPU/1 thread(s)
151667
ns152292
ns1.00
Dense(128 => 128, relu)(128 x 128)/zygote/GPU/CUDA
205458.5
ns206825
ns0.99
Dense(128 => 128, relu)(128 x 128)/enzyme/CPU/2 thread(s)
372292
ns356792
ns1.04
Dense(128 => 128, relu)(128 x 128)/enzyme/CPU/4 thread(s)
288521
ns355875
ns0.81
Dense(128 => 128, relu)(128 x 128)/enzyme/CPU/8 thread(s)
798979
ns420542
ns1.90
Dense(128 => 128, relu)(128 x 128)/enzyme/CPU/1 thread(s)
150792
ns151000
ns1.00
vgg16(32, 32, 3, 64)/forward/CPU/2 thread(s)
602253459
ns603607250
ns1.00
vgg16(32, 32, 3, 64)/forward/CPU/4 thread(s)
430857604
ns425272979
ns1.01
vgg16(32, 32, 3, 64)/forward/CPU/8 thread(s)
392009125
ns372455458
ns1.05
vgg16(32, 32, 3, 64)/forward/CPU/1 thread(s)
877215958
ns873099458
ns1.00
vgg16(32, 32, 3, 64)/forward/GPU/CUDA
7028016
ns7619709
ns0.92
vgg16(32, 32, 3, 64)/zygote/CPU/2 thread(s)
1996302145.5
ns2006739833.5
ns0.99
vgg16(32, 32, 3, 64)/zygote/CPU/4 thread(s)
1609994521
ns1613467771
ns1.00
vgg16(32, 32, 3, 64)/zygote/CPU/8 thread(s)
1565616166.5
ns1601604000
ns0.98
vgg16(32, 32, 3, 64)/zygote/CPU/1 thread(s)
2641861333
ns2628483083
ns1.01
vgg16(32, 32, 3, 64)/zygote/GPU/CUDA
25992958
ns26335134
ns0.99
Dense(512 => 512, gelu)(512 x 128)/forward/CPU/2 thread(s)
536791.5
ns520146
ns1.03
Dense(512 => 512, gelu)(512 x 128)/forward/CPU/4 thread(s)
435250
ns434479
ns1.00
Dense(512 => 512, gelu)(512 x 128)/forward/CPU/8 thread(s)
2792250
ns1898520.5
ns1.47
Dense(512 => 512, gelu)(512 x 128)/forward/CPU/1 thread(s)
865125
ns866625
ns1.00
Dense(512 => 512, gelu)(512 x 128)/forward/GPU/CUDA
47701
ns47286
ns1.01
Dense(512 => 512, gelu)(512 x 128)/zygote/CPU/2 thread(s)
1900167
ns1848208.5
ns1.03
Dense(512 => 512, gelu)(512 x 128)/zygote/CPU/4 thread(s)
2798208
ns2786229
ns1.00
Dense(512 => 512, gelu)(512 x 128)/zygote/CPU/8 thread(s)
16325500
ns14679500
ns1.11
Dense(512 => 512, gelu)(512 x 128)/zygote/CPU/1 thread(s)
2771604
ns2771958
ns1.00
Dense(512 => 512, gelu)(512 x 128)/zygote/GPU/CUDA
248374
ns249296.5
ns1.00
Dense(512 => 512, gelu)(512 x 128)/enzyme/CPU/2 thread(s)
1976729
ns1937125
ns1.02
Dense(512 => 512, gelu)(512 x 128)/enzyme/CPU/4 thread(s)
5051583
ns5035312.5
ns1.00
Dense(512 => 512, gelu)(512 x 128)/enzyme/CPU/8 thread(s)
16501146
ns14724291.5
ns1.12
Dense(512 => 512, gelu)(512 x 128)/enzyme/CPU/1 thread(s)
2698083.5
ns2768167
ns0.97
mlp7layer_bn(tanh)(32 x 256)/forward/CPU/2 thread(s)
1614854
ns1574791.5
ns1.03
mlp7layer_bn(tanh)(32 x 256)/forward/CPU/4 thread(s)
1236833
ns1257666
ns0.98
mlp7layer_bn(tanh)(32 x 256)/forward/CPU/8 thread(s)
1069583
ns1200500
ns0.89
mlp7layer_bn(tanh)(32 x 256)/forward/CPU/1 thread(s)
2226209
ns2226083
ns1.00
mlp7layer_bn(tanh)(32 x 256)/forward/GPU/CUDA
577670
ns584985.5
ns0.99
mlp7layer_bn(tanh)(32 x 256)/zygote/CPU/2 thread(s)
5930562.5
ns5976500
ns0.99
mlp7layer_bn(tanh)(32 x 256)/zygote/CPU/4 thread(s)
6880833
ns4604667
ns1.49
mlp7layer_bn(tanh)(32 x 256)/zygote/CPU/8 thread(s)
26135520.5
ns25216125
ns1.04
mlp7layer_bn(tanh)(32 x 256)/zygote/CPU/1 thread(s)
7284792
ns7317042
ns1.00
mlp7layer_bn(tanh)(32 x 256)/zygote/GPU/CUDA
1356112
ns1363255
ns0.99
mlp7layer_bn(tanh)(32 x 256)/enzyme/CPU/2 thread(s)
12782291
ns12710625
ns1.01
mlp7layer_bn(tanh)(32 x 256)/enzyme/CPU/4 thread(s)
11955834
ns11988958
ns1.00
mlp7layer_bn(tanh)(32 x 256)/enzyme/CPU/8 thread(s)
21105833.5
ns21409084
ns0.99
mlp7layer_bn(tanh)(32 x 256)/enzyme/CPU/1 thread(s)
10667312.5
ns10882083
ns0.98
Dense(16 => 16, relu)(16 x 128)/forward/CPU/2 thread(s)
2334
ns2291
ns1.02
Dense(16 => 16, relu)(16 x 128)/forward/CPU/4 thread(s)
4792
ns2708
ns1.77
Dense(16 => 16, relu)(16 x 128)/forward/CPU/8 thread(s)
3625
ns2959
ns1.23
Dense(16 => 16, relu)(16 x 128)/forward/CPU/1 thread(s)
2375
ns2375
ns1
Dense(16 => 16, relu)(16 x 128)/forward/GPU/CUDA
24681
ns24451.5
ns1.01
Dense(16 => 16, relu)(16 x 128)/zygote/CPU/2 thread(s)
7333
ns7042
ns1.04
Dense(16 => 16, relu)(16 x 128)/zygote/CPU/4 thread(s)
7250
ns7084
ns1.02
Dense(16 => 16, relu)(16 x 128)/zygote/CPU/8 thread(s)
7167
ns7209
ns0.99
Dense(16 => 16, relu)(16 x 128)/zygote/CPU/1 thread(s)
7291
ns7166
ns1.02
Dense(16 => 16, relu)(16 x 128)/zygote/GPU/CUDA
209372.5
ns210193.5
ns1.00
Dense(16 => 16, relu)(16 x 128)/enzyme/CPU/2 thread(s)
8333
ns8125
ns1.03
Dense(16 => 16, relu)(16 x 128)/enzyme/CPU/4 thread(s)
8292
ns8292
ns1
Dense(16 => 16, relu)(16 x 128)/enzyme/CPU/8 thread(s)
8500
ns8208
ns1.04
Dense(16 => 16, relu)(16 x 128)/enzyme/CPU/1 thread(s)
6000
ns5917
ns1.01
Dense(16 => 16, gelu)(16 x 128)/forward/CPU/2 thread(s)
10312.5
ns11000.5
ns0.94
Dense(16 => 16, gelu)(16 x 128)/forward/CPU/4 thread(s)
14125
ns16166
ns0.87
Dense(16 => 16, gelu)(16 x 128)/forward/CPU/8 thread(s)
10687.5
ns11146
ns0.96
Dense(16 => 16, gelu)(16 x 128)/forward/CPU/1 thread(s)
7167
ns7125
ns1.01
Dense(16 => 16, gelu)(16 x 128)/forward/GPU/CUDA
24485
ns24717
ns0.99
Dense(16 => 16, gelu)(16 x 128)/zygote/CPU/2 thread(s)
19958
ns20000
ns1.00
Dense(16 => 16, gelu)(16 x 128)/zygote/CPU/4 thread(s)
20041.5
ns20000
ns1.00
Dense(16 => 16, gelu)(16 x 128)/zygote/CPU/8 thread(s)
19833
ns20125
ns0.99
Dense(16 => 16, gelu)(16 x 128)/zygote/CPU/1 thread(s)
20000
ns20250
ns0.99
Dense(16 => 16, gelu)(16 x 128)/zygote/GPU/CUDA
229359
ns230632.5
ns0.99
Dense(16 => 16, gelu)(16 x 128)/enzyme/CPU/2 thread(s)
23395.5
ns23375
ns1.00
Dense(16 => 16, gelu)(16 x 128)/enzyme/CPU/4 thread(s)
23750
ns23417
ns1.01
Dense(16 => 16, gelu)(16 x 128)/enzyme/CPU/8 thread(s)
23542
ns23645.5
ns1.00
Dense(16 => 16, gelu)(16 x 128)/enzyme/CPU/1 thread(s)
21333
ns21375
ns1.00
Dense(128 => 128, identity)(128 x 128)/forward/CPU/2 thread(s)
28875
ns29458
ns0.98
Dense(128 => 128, identity)(128 x 128)/forward/CPU/4 thread(s)
28750
ns28834
ns1.00
Dense(128 => 128, identity)(128 x 128)/forward/CPU/8 thread(s)
29083
ns28625
ns1.02
Dense(128 => 128, identity)(128 x 128)/forward/CPU/1 thread(s)
46041
ns46333
ns0.99
Dense(128 => 128, identity)(128 x 128)/forward/GPU/CUDA
25546
ns25821.5
ns0.99
Dense(128 => 128, identity)(128 x 128)/zygote/CPU/2 thread(s)
221812.5
ns226542
ns0.98
Dense(128 => 128, identity)(128 x 128)/zygote/CPU/4 thread(s)
279708
ns274167
ns1.02
Dense(128 => 128, identity)(128 x 128)/zygote/CPU/8 thread(s)
4417417
ns4023229.5
ns1.10
Dense(128 => 128, identity)(128 x 128)/zygote/CPU/1 thread(s)
145625
ns145708
ns1.00
Dense(128 => 128, identity)(128 x 128)/zygote/GPU/CUDA
211875.5
ns205677
ns1.03
Dense(128 => 128, identity)(128 x 128)/enzyme/CPU/2 thread(s)
332875
ns339625
ns0.98
Dense(128 => 128, identity)(128 x 128)/enzyme/CPU/4 thread(s)
321125
ns311625
ns1.03
Dense(128 => 128, identity)(128 x 128)/enzyme/CPU/8 thread(s)
562312.5
ns520417
ns1.08
Dense(128 => 128, identity)(128 x 128)/enzyme/CPU/1 thread(s)
161625
ns161292
ns1.00
Dense(16 => 16, identity)(16 x 128)/forward/CPU/2 thread(s)
2083
ns1875
ns1.11
Dense(16 => 16, identity)(16 x 128)/forward/CPU/4 thread(s)
2125
ns1833
ns1.16
Dense(16 => 16, identity)(16 x 128)/forward/CPU/8 thread(s)
3875
ns2104.5
ns1.84
Dense(16 => 16, identity)(16 x 128)/forward/CPU/1 thread(s)
1709
ns1625
ns1.05
Dense(16 => 16, identity)(16 x 128)/forward/GPU/CUDA
22559
ns22965
ns0.98
Dense(16 => 16, identity)(16 x 128)/zygote/CPU/2 thread(s)
5334
ns5250
ns1.02
Dense(16 => 16, identity)(16 x 128)/zygote/CPU/4 thread(s)
5437.5
ns5250
ns1.04
Dense(16 => 16, identity)(16 x 128)/zygote/CPU/8 thread(s)
5458
ns5292
ns1.03
Dense(16 => 16, identity)(16 x 128)/zygote/CPU/1 thread(s)
5417
ns5208
ns1.04
Dense(16 => 16, identity)(16 x 128)/zygote/GPU/CUDA
254509.5
ns261526
ns0.97
Dense(16 => 16, identity)(16 x 128)/enzyme/CPU/2 thread(s)
11708
ns11208
ns1.04
Dense(16 => 16, identity)(16 x 128)/enzyme/CPU/4 thread(s)
11416
ns11333
ns1.01
Dense(16 => 16, identity)(16 x 128)/enzyme/CPU/8 thread(s)
11416
ns11459
ns1.00
Dense(16 => 16, identity)(16 x 128)/enzyme/CPU/1 thread(s)
6750
ns6708
ns1.01
Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/forward/CPU/2 thread(s)
79881458
ns79891416
ns1.00
Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/forward/CPU/4 thread(s)
49107667
ns49038584
ns1.00
Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/forward/CPU/8 thread(s)
43180145.5
ns44836791
ns0.96
Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/forward/CPU/1 thread(s)
151771375
ns151572917
ns1.00
Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/forward/GPU/CUDA
2680326.5
ns2695899
ns0.99
Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/zygote/CPU/2 thread(s)
662703292
ns665802334
ns1.00
Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/zygote/CPU/4 thread(s)
414205958
ns410890125
ns1.01
Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/zygote/CPU/8 thread(s)
397227958
ns399102167
ns1.00
Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/zygote/CPU/1 thread(s)
688889667
ns681784916
ns1.01
Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/zygote/GPU/CUDA
14602708
ns14619713
ns1.00
Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/enzyme/CPU/2 thread(s)
715248166.5
ns710708249.5
ns1.01
Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/enzyme/CPU/4 thread(s)
686640708
ns671159083
ns1.02
Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/enzyme/CPU/8 thread(s)
1044047896
ns978285458
ns1.07
Conv((3, 3), 32 => 32, gelu)(64 x 64 x 32 x 128)/enzyme/CPU/1 thread(s)
994524042
ns996959708
ns1.00
This comment was automatically generated by workflow using github-action-benchmark.