From 91530e66e72c023c475e6f02ae2cbc5f7122d0e2 Mon Sep 17 00:00:00 2001 From: Jonathan Marchand Date: Sun, 22 Jan 2023 21:41:38 +0100 Subject: [PATCH] docs: how to support a new model tutorial (#181) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: first commit for how to support a new model tutorial * feat: more explanations on torch FX * docs: add more on torch fx * docs: move to tutorial folder * docs: some mispell fixes * docs: new documentation structure * docs: update get started page with titles * fix: grammar check * doc: few corrections Co-authored-by: Michaël Benesty --- docs/get-started/start.md | 3 - docs/how-to-guides/attention.drawio.svg | 4 + docs/how-to-guides/attention_fused.drawio.svg | 4 + docs/how-to-guides/attention_original.png | Bin 0 -> 27016 bytes docs/how-to-guides/fx_graph.drawio.svg | 4 + docs/how-to-guides/get-started.md | 27 + docs/how-to-guides/support-new-model.md | 689 ++++++++++++++++++ docs/how-to-guides/torchfx.drawio.svg | 4 + docs/overrides/home.html | 6 +- docs/{how-it-works => tutorials}/page.md | 0 mkdocs.yml | 9 +- 11 files changed, 740 insertions(+), 10 deletions(-) delete mode 100644 docs/get-started/start.md create mode 100644 docs/how-to-guides/attention.drawio.svg create mode 100644 docs/how-to-guides/attention_fused.drawio.svg create mode 100644 docs/how-to-guides/attention_original.png create mode 100644 docs/how-to-guides/fx_graph.drawio.svg create mode 100644 docs/how-to-guides/get-started.md create mode 100644 docs/how-to-guides/support-new-model.md create mode 100644 docs/how-to-guides/torchfx.drawio.svg rename docs/{how-it-works => tutorials}/page.md (100%) diff --git a/docs/get-started/start.md b/docs/get-started/start.md deleted file mode 100644 index 4e2bd575..00000000 --- a/docs/get-started/start.md +++ /dev/null @@ -1,3 +0,0 @@ -# Welcome to kernl.ai - -The documentation is currently being drafted. \ No newline at end of file diff --git a/docs/how-to-guides/attention.drawio.svg b/docs/how-to-guides/attention.drawio.svg new file mode 100644 index 00000000..1a1d079a --- /dev/null +++ b/docs/how-to-guides/attention.drawio.svg @@ -0,0 +1,4 @@ + + + +
self_embeddings_token_type_ids
self_embeddings_token_type_ids
getitem
getitem
expand
expand
self_embeddings_token_type_embeddings
self_embeddings_token_type_embeddings
self_embeddings_word_embeddings
self_embeddings_word_embeddings
input_ids
input_ids
add
add
getitem_2
getitem_2
self_embeddings_position_ids
self_embeddings_position_ids
self_embeddings_position_embeddings
self_embeddings_position_embeddings
add_37
add_37
self_embeddings_layer_norm
self_embeddings_layer_norm
self_encoder_layer_0_attention_self_value
self_encoder_layer_0_attention_self_value
self_encoder_layer_0_attention_self_key
self_encoder_layer_0_attention_self_key
self_encoder_layer_0_attention_self_query
self_encoder_layer_0_attention_self_query
view_2
view_2
view
view
attention_mask
attention_mask
view_1
view_1
getitem_1
getitem_1
to
to
sub
sub
mul
mul
permute_2
permute_2
permute
permute
transpose
transpose
matmul
matmul
truediv
truediv
permute_1
permute_1
add_1
add_1
softmax
softmax
matmul_1
matmul_1
permute_3
permute_3
contiguous
contiguous
view_3
view_3
self_encoder_layer_0_attention_output_dense
self_encoder_layer_0_attention_output_dense
\(\operatorname{softmax}(\frac{QK^T}{\sqrt{d_k}})V\)
\(\operatorname{softmax}(\frac{QK^T}{\sqrt{d_k}})\)
\(\frac{QK^T}{\sqrt{d_k}}\)
\(QK^T\)
\(Q\)
\(K^T\)
\(K\)
\(V\)
Text is not SVG - cannot display
\ No newline at end of file diff --git a/docs/how-to-guides/attention_fused.drawio.svg b/docs/how-to-guides/attention_fused.drawio.svg new file mode 100644 index 00000000..38f28ea5 --- /dev/null +++ b/docs/how-to-guides/attention_fused.drawio.svg @@ -0,0 +1,4 @@ + + + +
self_embeddings_token_type_ids
self_embeddings_token_type_ids
getitem
getitem
expand
expand
self_embeddings_token_type_embeddings
self_embeddings_token_type_embeddings
self_embeddings_word_embeddings
self_embeddings_word_embeddings
input_ids
input_ids
add
add
getitem_2
getitem_2
self_embeddings_position_ids
self_embeddings_position_ids
self_embeddings_position_embeddings
self_embeddings_position_embeddings
add_37
add_37
self_embeddings_layer_norm
self_embeddings_layer_norm
self_encoder_layer_0_attention_self_value
self_encoder_layer_0_attention_self_value
self_encoder_layer_0_attention_self_key
self_encoder_layer_0_attention_self_key
self_encoder_layer_0_attention_self_query
self_encoder_layer_0_attention_self_query
view_2
view_2
view
view
attention_mask
attention_mask
view_1
view_1
getitem_1
getitem_1
to
to
sub
sub
mul
mul
permute_2
permute_2
permute
permute
permute_1
permute_1
attention_forward
attention_forward
permute_3
permute_3
contiguous
contiguous
view_3
view_3
self_encoder_layer_0_attention_output_dense
self_encoder_layer_0_attention_output_dense
\(Q\)
\(K\)
\(V\)
empty_like
empty_like
\(output\)
Text is not SVG - cannot display
\ No newline at end of file diff --git a/docs/how-to-guides/attention_original.png b/docs/how-to-guides/attention_original.png new file mode 100644 index 0000000000000000000000000000000000000000..315c091065f18dbcfb76eb28fce970349aa73e12 GIT binary patch literal 27016 zcmb5VbySpJxGz3{gfxO8AYst;r3D#4K zlRai`BPlMo+3gz{+e<{eP@p#H+l3VbN_dp?PP4DHuuI95-5&pcS zfoYLGZotKcI69TWQYAn%TO2+(Po7{uf75Gv>h0z9!Ra>L_V^U10p;3B0t$Hx> zg&%Y294r#$J>p)9PXDr7v$jH}hfHT@8)`Ob)lk&t{jQ^bD75BWXB4U(IZe*3!D*qV z-SqvNqCsl+>F|DZT2gCS(o`D6x@;;fWdeQpL#}#ecK3sI+39g(A}I4>G`1$y0tEUU zqO8!(T${xP0#!Oj3Sgd4;ewbFYF+T=T@=E#=SS~@q?BDBOYb-IrifvIB1DhHvH$n) zGJYR{Ry|ZCAg1x1k{7_Yz;6jJ^d5MZ5#boBJKRW9fC#Gl$!{4!fd%RaHSW7FC4gR_ zQ;(#hvA98jKPr$G8#5}BbS*5;gdU&~nG=yw@4?6bz`o}Ix=Dy4})Oux=(e`*$dmJlrt(T!P~lWhuJ z#s$g7C?`=zsh*tOO3B%g9uf=-Q}KbAGe(up|`)wv1~M{)^D!f zdT}u0Gd;UHY+t~fT<9H-YFXkG?NUGw*|kQ*IYm+rXO--HH^<^02S(JKqfdp-f|^U` zTgZr1yYmG(MWE0lE^IO>SqRW18EBdi4O2FVP(vt*ZG1%lnB@ERBVDCom zQv6U56HBzoM~0TsGMLzZ{sCfgyxZviZWG&8a|uA(!Vk+QLddM3*Z!Noq=m$r4fHd7 zRhFKCZ_ebA`^Lc7xFu^EHY^(T~llf9>OUyE7ej(W(LX3NjhDuf{hph zj5fRBL61s#ccmIN%a|~M$;kC8&bf(1uAYe-Mz5)pNjl-S-L~|Yn4)zRBh9inP6BY| zdX2Ju`S0H^P7$61+~_AZ!+B_x|6C>w4Qy^=H`(aqJTXOC(rmacmOJ{=bU5X1nWx5L zh{)|*cD>)v5}xAN4_(B3mUucbTb1x>R|eW&yFKr0#wh4qpenIh^Nj&9;cC?Og%9{*83Aj`e&+objwxUL;k zY8B$`DS-TTs*Xx<;JluMsmJFQiy6~lEO&2R5&`ksupjt^j;G>Mvg-kQ(SI{sS$qjC zaO?Ymf2xjWc;&hpXx;k5Qfw$c|AP%MeyPcIqj1Tbc~ifS|91vou5Qd@(r$ZhGofhL zH_Dklloblaku&Ukd_7>Qo1B5|5WAC$6@+DPis-<#8ygOH0$0E=l{Z3SMZI}E{?xn{ zkRPyUk3S1BYi9@n!>Si%F@jdTD|Rqj{PJ(|O1mqy)9q<00pAVFGU52YTVvJ4RO;ua zk=NCg`)uUtl$E@#4JPW*)t0BiNVY!;l;|R2@f?9ILG^VLvLt`KiviT#K7k&7iA^OU z)V1y2bHI`eLrFyfO$fGXsRB&-eoKPE&ljhMX*_6c&JMa~C7ovKlJ1l%QyLBDj*p4i z67Vh=AN(@#z3cuoR=BClpS^qOeNY1r>ua~!3CT-)fjg}ZOsXum`SUpxi=^crhJH%Q z*kFeATR{|Z+dM`GQ>P-8 zQ2wFi)mT-eq}c36+r629THjNVf0wKSnYq~C+XnXL8rZ^x%g0_1{`T*ft>AEvQ}OVV z9OfHS5ZTP%`}Nb!aSCPTSmcUX1=PF|_1lMGwsSM_IleMPfNmBXEw;EI4Ns2&#qUFJVpjsf-<_)0N|EqEP~D4HP*50)*JHu2^)nEUzZ3pZ zy3zRdpo2tW>(LV%^I3zRr27wB3meht#jY4D3yT=rz;I4^0BG|69R?v(-!J5md4_(L zfsZORbL8J&hJ6Azd*1@9uw~~+~rKn-^h1>DAkm2oyv!ym~ow3%MUA_7v1YQ=t>=QFU z)lY64`f4QLCOF)>t<`@8MXx2%dRt+uucYT=dFT7a9NVAVeB!;Ih5ws0z^(6d`#kM%BAu_hx{I^TN4uzlY4q74a_UF2e zX#K16L`F6}Nq^2)v>2Gz{!9n;#~~bxNQh%nB*b*KZX{Y|$=#}%r%pTrHDaFiN)Hh)Ia~XSwRY`m@wc2g z4&E-$sh0y+F={AJOh~v_+rH4NLv(II4wbUEo3F_mK?=-D(%~@k@v-Oni>Q4V1@~<) z(#ks^gxbq%YJS&*r|ERT+xOcpS*&}8H%t#FLxuUZ2NT6@O(l>*`;m0XAPrgn8 zs5$V1O)n?xgNQ(AX(yjipWZ;V`+{D=YEz;{iXfj_6s^!&pGK7aciaM&_%X3BD(;?} zqm?PPOE%$_brNUqS}#s~q!LYM2Wyal5S2kdkN-L50}ERx)27e$*Zf4Av^Lw`!K<#P zw#bWxC3GVtZ%aQ0T`(Nd;#hqLv0QpNH7WbTDmNJb?mnarDB($SI^tNlGj7qsFVNHX-#tBcZ+Xl`AN#Bzu0Ck2^QPhPCFs~nWuiLQyCW<0> zUE6t9un}G4#ax)_+Pz%BqA5ih${97%3OP+{#9iGzSkdadIK-M09#b!Zk}bxQs;Hcb3FY@nX;qQ+h$Q#CHV*S!F6_) zan#)m2LR-@-TEkbhr;cqIhNKQsWxudZ^*r3m`jVd@Vpkr^iGdqaWS8t&L~QbBigSv z-0DN);PTyt6VX4_-X0Ht9Fde)XOaZRe!tW%N?r_M+)YSfr7VLk4?_0OpR$(BgooTeOE zgKf_zJX=;PHhaUqB+MIIhg_RcgqzUvEJF8!;vT4>^WK;Go?$Ww&@ z=GsHwPMLO_&0^l|fZNmJn~U3>v&3{}_$kuNE$B)?TQ z2<+RBc``_uRTrb0B@dD{-{q0h3`74|4^3v^C_W}}G2Ph4Atx~xne!3V9wWIAK8szq zXR*n#v$?RJzq*pr)dP{Yj~Dx4e6qNBN(p3>g-x2_RjoMWQ*{L`LIhF~Ph!5JL$ROf zR-s=dD`f zq-1+e&c2GVJz#p-!(hhJrY^Ob@nhtc^j^=>N}s#&+5kJq&eV{3LE~Z}2*j*?WF=O1 zv`z&Qa`Itb7*3-iA^)+%Asx+2MLxv}$F$>MO>d%z;@?tWQTSRY-o7Fa6qGcBuUPmz z&4kle9e`Cbe}HZijL!HEL$U2t;eYmRgx}z5b9XE!e&VH4cO&HXm-EupmKv+VA!wDM zq9z~yw^RQoga<_N3g5BD?4W`F&P4G&-r3zutRI2Ka!kqpZ57W(yxjMjnlNIAQtM&? zi}6A5zhZDB3Oty3u<8|eXvCP;qFJ4MteSM_) zQL3?@yj^)`l34cU6Z`dkAo3Ai+uffZ5#%XtIhgk6d$pCDeRn;a?bK}E^{|}Y*)_C6 zggePQ)wR!dy&nPbJf^I$6dFPBM_qH-3 zXV$6-d<)qpC$N;3yRPmz#xQ!Dn0i z&_`|gWz*R*K=*?*?f3eu(%UKJHyVi#U*@b5;eKRMS%387ee`WTEwpRD_D}+7=Gbs? zzwWP>*86|5P|(>;iy)()qlKt5?{Ds5!8l(?auq)sW$pz;MDkQ}i zYOR8#Ym_vzUWB8~KI&!Vs#1JorYKKG%lM=cQS%u8a%Yc=>t7Py$wKYJB50SkkbUmL zuQBUrY^E<_3Wlta=Z8k`v^JOv{7jYO_xfm+ z7qKeF*z?n}v7YbOL*1mw>OQ9D-Orrc-&AGCshB%Y6rPxa@!to_s&34AUhWnJ4AR8$ z2UeV2dgvK7(!ih<%$O_1h66vv$5NB2Q<-h?DV2FNMXZ=XJ7*SVVo*f%|0VYE|BrRZ z##6Ef4?UR;P&blb8&|sw7+GaaKk`ZGAOjbmd2HYU2vMcdS9RG#KqMuSXC|^9ay%mB z+V9h`@9;q!U9ySl@j<%l^u`zIH0x^L>_QStk#hTwY8v3_&B9BV6PySZI7xLV9i}Kf1R%OV@p@%(uQ%euu2R-JqDJXra zkybmMFQ=x_u{9=ibd@4vPYT(Hr{_j;^0xiPf5L=W6l0Tkbz)FXt7_fuJ65Hl#9XU- zB*vBqGzrJ0JDxi+F@2pB?~{QiiOJ-bd;j*&0;A0$c;rLP1tc9*e?oga68F`QwPyVX(SI_$W3S12wap}@Oyyy7_0zsj7F(Uqx-4~opL z?Hf3Ycih@;6n)OU+TF4hMoN1%6!H+E zVVF%uB>dK%#;?vN@)BB#R4pAxwJp=4-(@(*#kzp{BIFn~cE$419ry&TXEm28`?C{( zzh_V9;-ge!mFLVtH*q%%v150iEDNmd6YAQDy zuZwbsHflGAX$d5w3e>f2W)5AoM0c;PGaRC?=aaqWCbXJ7$8bw|Gk;=9^ZM`3w06Vh z_N>d5Zmk(Uc{|^}CY%i)r#HHDwXf0ZBqrlr+xU;mXvq!NH$ZLz(_Kmh&$h^uECRNLz98`YA~y0C*>Ovh=( zYgHahfnSY9)^1)7!25uNvi%pJfMzWafc&5l&&g4 zG4s{=EGQMEuUWw2aa&8n`=BZ}7VoiMPG7Svk&^Q7YEgXGJWkS%QUSNqR@=(OSS0=% zuak9OZ*<8&Q$9!OA#fnWCtgppO&qpN+>-hGeVf?VZ1NNetwP=6kJN0Az*nr`?awC- zy{3lwwLl-m0@vb`ZjbHwp9sO?n%wtys(WfYx^6>Y-s|t*ne!)+t2nplJ+pV?pax~&jZk#+Jzj9XjK9LzGl>(=&x5;I9%TMw@-MZ4t z*=zkrm4R2kRjdGR4IH7YFZS1?|G=KLen12+BwmngLw@H(Hw4^VAxm}U+inYP;E1gD zSZcFt@1^-fawd$0)XJkMjLn{EL4K9*&iy`F zT^Fn&9EKE9QEEA2q;# zT~>7izBmL)^S-FHn;21PZPF2w|E)++W>UwKY5fP#PyrI-#?ub0r$Q}={l4~Z`|?;&yWVt8B{oc1^DwN`-)X<`LcV~q00Nw#fH^m>KUi*S zTM_e19n_YL#g><@UFEsW8b;l*DL5v4B}JW*lk04r7<)5Zau*;nTvRj>H?32# z)+vr^k_k+SIj+aG=?l8KAr5YKHuZ|>w5O{9R8Q`I6XGdRmXD8_0Oe;n|*v)1=1q>HOrQi%5k#6$E95#swuqfXV z-z1j5zyRdXvArTXv4%@*H+qC`6gIbh>vXZG;*n{f*rddBGkhh>fK?~CfXF8PdT%aG0gP1`K^N*tSlubg;G*>BF5I2K)96bmCk!%X@_gUDhb!xF7ZN&nR4`%V_E$5TQvl>lW( zX8`}9@}3f<#LQ1H+8nHeaF|ezskV)e36A@hmLcwwL;tz2*FNDxEqGiopd%iQ+W3_b zaMn{7QhO*x59IZn2IuFUPLdLwtKC3-+eGvKsHh^$TJ=Saw{#tZI47R1cK!4fJgwwF z?PGmp@H-NB2IkX!ARe9JFSf$g_YUReGeLTi98&)VSpds0c02JK+j@asy#a!$wVU2a z4hV-G*3?vy(rJ{~V7lPUsQq?w;Jn*AMBq3I*pLFRp{?8H@zV0^t6YX4hT2)GIcMSh&aQ>a@fhd5xVTs3()!L zCaMX(=u@W2(^Y^H!u?;jbK_i+aiG4ls$?VukX+>>W}sz=#`jCV>=v#bGhwpjx4)LQ zyzQLw8Pb30mc<^(VFWr~|00N_@vV4%ZW9^CPzp&oB!u9-{#kbf=Asl}ARNF{@^Bmgc&45ADku#5mL~ADB!`4f*iN6q=4bhr z|EvW#a5F7PFh_Syrye@hx`873l)t=6r!FxqrDjFcV*LFY;PN59q@yvRMDf7ZJXCBG zJ~`g$R@$G^0@T`ivv#B%fHthgJKMW>2RVBR`)lb9reo_B0PPL=nF*TI+8fQNMBvb1mqZyET#00p%j`KEB!mi^va@r#Hx9P5f0Lo@pe zAAlypdv10I9Uzx(y1FTsM%{Jh=XB-U{-up45-8j`WNGv`j}b~IcG|SX9C?{MTpNDZ$Wh~9ESh?wDH*em#U=B|`9R3QCEb<5X9NO!V<~CC z1fZVI!qZdzG8ZAoVR?Wu+))WFXS|~;fGq3u=?avw7;plpxO`m{>!s}AxJN=K=kM@og4 zuy$`!@y_A^hBe{R+)n`zD=^Atp${dF1d|9bzS=;F{-!5?e9w+oxo%5)mEox_9YDqa z@+hFfKqi71F6grA{dJu5*R1trCPD`AWeg8*6tyO2XSZ20)>eP)5Hv8B7sX^k3y9;##fjEwcIJkr zj*5Lst1f;GDAL7wc!u9z-8aDTDr*+~GlD$|e5n6oI@n2aA?sDyKEs}+NegG(D7$WN z;t-D1##2Y>=yes4)a+dd_ano9{ZPAkOne0#en!9@kKC`n= zQb0p*91jL(oHl0=Uh2J|U(2n*$GVQ55i!B^Ssw~%PcJyjHF`nzr#{2NtxfKQ+^ic+ zn38nxxr4B`ZMw{U+X(|R$xq>h{WH@QM(F6)K%$mK#dFMy9`m)gGWg<|LboO|wm22c z2Mi5v6`nh11O%bjL2Ew_3xZw(rkriGlgyVE2K$Jz5Y`|*xFmK9hvFF@FY z%@dDW%J46UN3vwv^;>7Efv~c6Qp|&^S!=(VD?U&AD-n~Ckf8a^x)~?EJJ-+!dvW42 zblq{%-|L;NcQu?dknuG zLN(7f3J$4Q0}0goKfU}#4~>cAwv~88m>Gt{Yk*nFqLo$~tS$~`(*@mQSbmDhJ@Uj1 za%3>#fX2*Qa)s|P*gXT@4NKe~BwIHBOe6K9KXKG}ZH_b0Y)q(MaKPEz4ku8(MDKjD zag8H`C-i`W3Wxz|s;v!>2j)+E%^o85A|G9QVZFg(B>lwvhXBkJMl8BrQhlmLEcH8d z-bIoc^m=mh3Y%Gr3Y0}n3dG-JKguVJ_WOWimx0nB{_yT3OYAq;>c|{;xPBZ{VSV?0 z!7dgPhCG&^`-T)K+NsxSOHm$Sqs{Dk_<3FBr?73&!}o#sV_g$}it90nuR0F4o6%6`Ofkm7cg`dJ!O?C)J+; z=Fh|hGQ_g2A3-=kT~H!Bpqf}3-4wDL^qS|UUqA${nZxqH{}#+}7*Tm6;fEt(&+ZlJWC00_;E}hQ+vJQP(+ohVght2LpNf<~Z4Ws#!nuR= z-J!OgvMxTL{J*R?&ak+a3iz-=l7LX8_DW*Z%uCb*apHL9tp98A5R*;E zUTS*D5q&WwoAgJfj^z*BDcRByggR|Y2dbZ+wWX2&bP!<9Q)r5}z#-4`R50YR6~lTw zbYF&*t~&Rvf$e;Mg1h{PI8X-@2VvQKt?=rFs4PThwvYoeEkFItHZ`}2>S5qxRdN}t+#6%MlJAMt_6J#_t!+TN~zL4KuQ zFl;=p&JN&Rm)_^)V65Dn2sxF^?B>6q^af>*mnNE(rhZ>3ofnUK5D89@E(fM4bYh0X z#tq`5@Uskys&hqJTSzG>hndm^h-`U%FthUpS-f4KbF7AHN%P{#Aoc}A9rcW56tE&_ zR0dxfZPw=*HQMs$J@m~&O}s$~=jDQa$K|4VbIF0j6G4Vb^$`yJe~Y0r741rROR?5F zmBr48O#Eg>vwgE0@jbFhn{8_x=_1bkM`{DtVpDE=4xf8vEfUuJ>t?pmiRTD9?|t>) z!$9iB%_V&(XUc&cRu=X5aYuH-?G;9z zSMVub#2V#;KMDRdrq!r{gcox#h}u}G2W_aPN}2tz=G1e>*cU2kvBjNM-E36k&@2Kk zrq))EyDFozkFu4CKxxIqkz;!{CGK*Zu=Tsz(b-q^yG#l|sB<{jWAsF7BQzKAGI5PW zLU#{|L|{MjpZ7w1iV1ZaAwC63)26o)*USoJ(R77#_+;vAOc-fG_yA+rnu`~s_@d(@ z8KCb6l(Ft6#hz61B#r1Wqq&LO>V#~0sS#y{rnj#y zM-ufqYg?Lve0~?S~1X8WE)Z73lV=BMgfEh8i|Fi@}77H*8F9A}YRYB5#`A2K~ zaR~Fm=Ek#@e_L*)8WZ08Xqp)_W4hLs-u`l7T-3L`{WRecYNuEP2r@=Zfr?Mn>d^k7 z)IfpJw&3X}JHWRGq@FH)H(H$TZXCc6(NffecW$3GjV!(;(*5$c68kp-m-9D?jpYGH zV;`sp?D{_qU;MgUn9>7;wka`WHTU?!jxPP0q{})VTJK2jAFB21&G&_)h zN)lt8fTWuPa`y1)>vP|=+J)p!mA~6&*}Xi+FXyj=WcGA9)ivI(e_^0$%GIc@Xfa#j z>IOC&W2cd&*nX+21%z{Mc=`AgIxnmWhm%lj%)M`OY2}7g2h!l*!H?hFljdbInbyYC zHm>i4LYee$bpL$Y0tDx9uDRaU_?LcA$A#V)-=@M^C5b})j2_T9+-=^GFSHOWO^6SUy#D@g7+|El%%$sjtn$?fn_HKO7 zy83e-lbgM3_SVGtxv=c6c&ievkbnv^fyba}$GFO#`Cg2>Qa3~f+QNIXt2zB6C;=NW z5yS>rm+eEi9kvC_MeRG{*l?{B+WZ!e5%I^ zUdu$O?rW`~-xD*wv+F9^@$qC7&jd7Bzi+{C)87Obid|{ss#5Eh?uzgG*JXuCj{4*3 z;4#j=6Vw#!Ah587*Di*9=>F#qqKa!OgVaI}d&dUORAr-0(eqvbC&sKC3(?~0YnM7x zZeG0w1!K+0|G>ceBhlmAvE>p1@u<#~w_6)d-Pr!~$=^mXey7J16D7obDA3(%wCB~{ zJsEniliCXCk}`e_5WrO)3>bC?`7BZ^X@5N!5;ftF+N7 zfPl`mtdoMBejaorm*WHKFNFk0S@wYTOxD#%0P*C59$^ z6mqP4Gx+;3&8Wn7i_Nw&i54l!IQ=;0(1I+oM@JGffXyiNc73~s$V4aG3rAr;>HTzKx*z%1_5fq0^im9tT6dOA^~ zlUHITX1ekrK91Yp?2AO(>z2|2C7U}W$~789^hn1x^L676%jKe7@8M-p6aPn8cfPIx zF~3x030)fth2xkN0Qk7nW;}b=lF6_#v2evmEH1~2ClJ93oD<*0Usn{AAvY=D{1=#g zq$HQt1CG))AB>!^1lqp+^pH4Qo%-3GQ8{j}&W4{|B*!PH%I|Y)i9%wsYFiryIr<|x#;y-d3)wl3M0Zxr zB{*b#IP;CFbqgFKQg>}`Z!Slqi_DDJ$gYr*Twh|MBOac+Ef}xXUTIt5S^v1q#3N=b z0gkg?UvOJ^-&b}3a?2+S8_Z#~WN%J)7VQ1H4E*;k?WYC9-m>wW*z{i8d5y{3Mumt; z6B$3Z~uNLDfs=V@+iDveWx)jh&KRY6`!+g zdil=bPm;fd=1-Zf(&@7SVOXs%5Z_{mS5Q$HtEG?S4mSg4G2I52zcpwcfzWUw4D|u7 z;JYYW@f=U7{c;?_w+(>FkDInBwDtB;LSiT^Ik@lpg1^87%atGvR5;vl6Q{;Fme2mB z->BM#0lk&oTD6(Z0JRixSh}{v2i_SUm`(?W!%p)6s`n890$*^*cb3_VH{J4ao=?PR z@sxcdsPU-+fQmM4UM>fD`B$T11VwmV#_w{RLp=13q>r}f(rJrEy}Ins1<1q-q7H4F z0LU@%HSmQQwoF~0l0KHu-_8w12EqPCjk~-){<=~-nM)61ZBU}<9p$TYL|23oE6zJZ%h0!D>nyX)06ID92pa|u$p zY98BVNV_G$1+j_OdVuHI+zH`(-;6)vUBz#RM8E&}w`{Efn6*IKLFyhW=7fIIsSgeP zI$l#9&7W%cB9#|F(JW7`u~hI%?kW)0Mt&X4;yD$RGOtpG?7|1o7))ks%cMp2m$*R@Tn@8{0D z({wGKC}?MGIIt4ykTIqCC>ZmarfPeaH-6& zMvsidWImEVg+7t7`GENw(IBDy+!Ae_gGVBY9WqocW94>2D2`V#dvwZk0!xxH22KBM zz-i!3AJEI<;5FAM7D1tV1>Crj5;sNze5S36$(U`SgU; zlj|=~*CFA(2PB`o3@c`C^h;r_^uHIQ04vXSX>dy2F=HpPRvq>^3F577Sf1v<(fjQ~$VyK=NDf?kF zdcK~?Z6V)hPYI-X#UQ?6)2Wr4J%fN-V;F^cQd@~muXaB$xfq551Vah17( zclZb#Pk`1#-v=GDh#__Z0bcJQN#WenboljK=bwx_zJ9y{-QzM#x_>J1zG)$FDTF)G z5+Lurk0pL$rl3&2uWK63pHON0q-_Azix+as&Vvs|E!h8y16t-xh?xfS=cmU2p6bo) zJ?X+S?ApTz4wZrxzKbO?KW%Tz!W80q(lT|&)z<)|^!R56z-_=Ra5(f!T|?C2mp?@T z6g0&@P^Fn$1sVvEzyED=n&c<~U>2>hTcImp*4!LZw?&QSrp6!+h&vv%om5}$T{A1PP$!T-d(Nme9_4iplf2(vnBQzI z={s+4;w%%cxX_-*SUT~X$aRc8Cx!js;&hX72yN;=Yz5E2CuX#_Dtexm>rzG{9F;Qz$gxv6pbS9zJDZ2pGzP9mNF;*LYXB>rD<5d^uG{{(S|mgR z5do%U3%DGcOppJnl9U|_bhf^Qms|CAwH+wSr%XTziksjbVFGf}|)?TlnUVqXCP3mkE6lWF})P;d1@i=>miOZlBKA_^}h?$nbhKVn6CnB}tTXIT5hdHo(*A z&6hlzWbZt_5fVeE22vB0+eUN!^|%Fha5*!OR?kCq$i(?a7Y4eN_6CfWb+TRr_?D-U za|iT)lGcX^d)`xK`tYX=MOfp+uKO5O2&>wy&g`4_Xz!0MK3)A;D^Zl2332(p4&YmB zn|0o+h8*q+3M1h9CR2j(NI_C_&!F3Vu~Fg`UDK}X=UCEY6+td-$q?Yxla=iKE)#Vi z;0nC@R9Wce3dB2V06Xl(XnpdxyQR(pn*jV1uZ+3t?uHm9=NO2#oFk?`f7(w+>)&IpY!dT1B2e`}ikK6HXb*ZJL+^cGy^+oT)vc&RJ6>YI1(kQbV z5gSthlBj*_lD`iuIji)GHXWuleu>uU9&(5{zGkdXnXw{#rs}bh9HYyvm%cSTG_LxL$c~;dr27mJ%hhq-Ow@P zF%^N+C5_yu`)gg17Q-zx?OCVeuW*|JreE6mfwD)wF9aZ0&a01CwfpF(U$NCrx&^dB)0%5{&`ZP_C;YJnq*B}Z;&+WD|ZMR45?`s*=Vn~$~GvTc4U2mP5Ol^X5Gd(}6 zv~!zysFrtjV@v(6ocyv$r1j839JA^d(tUV?9pTpeqBbjmIwk9rI(mq6{Ti1X#ectL za!phetg_}X1tO2WcnJ}Xh#i_?XHQzhArJ40q)2h02GSYt``-_b-e+pF^PZxT@e51m zJ4?@t0R678P;e+*qJV1p$nBA%@GWrP;4+2nxQ_2lN91)aD>8K|Q)){LF{|8!mt#x4 z7~cXy%7i*k|Ck$OS-7X0Wej9-Uvrt@3%FlpJ!JfpW3+rR|h}ACyI2o+5UiJ zjW_5|-LS}4Y0h2Z)kyKAHm3G?TQVZZlVY}L%Wy#CA|GP!Q{9(RA&Yo(U!qNx^fNA? zC;F=ghsS)j&!h_q4p-E=qV5xM$4QVZKCs)bM!dKXM^#dTN?Zk@W+)4R`qi>!5oyDycv= zq)uxbsG?7N|CzXeA~f5Fl0cy<*V?><21q6FJ;543(vPOhgac^;4ffaL_!a6icgeOQ ztMMTi7O24U)k9+>eESzPRqFU|bSd=_sx#0d;2uF>L5=ut!@GOP|K?I20=X1Bq@~8c zUuRFzXalJaP=rnRdo|#5Aoo(h@bAzK33?*Ikgq`Y?pk>mm^{B(yqcSQrVWsG0TMC_ z4_r-M7Q_z)fQ|B*}-6S61`fPp5v-ysuk2ITp7I5C4r< zeV3|`9(BV*K8-#g6vlixb%Rj|uv-I3hi9+h)z-QO4qTH%0uugD{}2N=j06dUFo1Nh z%$b1u(Ep;EZktxgGCUTvw-IR83-e{h>1V>*VECSbV1Wu<*@uKDzqxV_i8 zjgcTDZBQu-sKM?Ui;dg8T4qH3VjkorRTX@vZnGp1!ba3A^JJsJ`LKyb<~L0;vQ2oFFk%kfLXSoZ9)6qSGRyL9rpO#}ODQM(t z9-u5il@6PXz>w9>5BOaUtkCDv7xWF{XG;ZKFZi3teQ^MK^)i+9yVU5X#GcLjF$~13 zq??;_V}-~yyV;6#S`o1jHA0KQ?`8oLdS(IT*Oz2l@p`6XcR7Ssh>3#C?*diXEXw7L z{+jx+3c4Hhrs-P9?|dAWm5Jv8TMN7`#ltzQbXd>1{<9W9zqQXC)wB9kk$3%JQH!Jq zOVtZg>HTk9{eS2LtbZ^B>{v@IM;LLMUP{mT5?4Xd5*F`>vZ$>L9+1JHwz}B7;MP|` z;SFBf+=(_*%PmrEMpD|J!iTDBzKqnQO8C)&J{%mkc56CG@#o=6^B$dDTS%NOUfa#C zx=WA*A|-v@t6Zy_ET~usS~rk&1EEyd?}qZw=ekb4!J|x=XI~Zsh9fEIjub%Rx%poC z5t%38aqsG=E6)da%)m#BWzIcfiX%wDPB&Jx!t0w7?9Rel1Y zB)}{Orgf_j|Aw$O=ZvO~8pvk7S~pQ%REMU8`rgv}3vf+C9dz*Cbskv#iyjnO-u0Gy4JlZJsB@6C zTj^fdN~=w20WuGt6liap%@Tre2*CQon3lJD+Zp~BNnfAbAnTZam+cJ6>Ur8c&?L}# zB*oL^6V%BtID&I(!svXal2K{e85V{1eKi5uoM;yZb<+FK)Lj?MqV#kc#Z>PEmn-?EBa7|l^YC9^Xyaz5= z<1PaFL|{Zi)pA^GSi4IxFCMN4q8W${P1%3ooRjEIhf{3ua&==(1 zhZ~lND*^}!aR;%Q7T6cO1_P5ZY^R;mI$D7r6 zP%Y<^%mI42w{!Y5cWz_c?-7Fq2q0TX7L_Pf)opBaN-t85T}zJl=#Una)1Od?)FNi4 zVnQW+F1?!3zodc85PD>UuP8pgv@l$L`(q6e)sjzm;nz;ZRFcOu_75X|Omx!u*{e}= zprHjmT!umq=AH^)_?D!v`4n*+0)=rZzPx<*X=bvBn|EAo4{gGo_*{n#jjpjf;mKH;(XHxy-cEh)h|b7Use*de zthr&%&NJ37z~~HP4OH%ql9m*yN{sr8lTO#USGt+*y8n2B7wEbet(tqI-fl9;CssRW%Qq_h(@_b+(Qj zj?O%)i4LBF!cQTcz@b%EH$~cJka!8PFTU9yH-yEBPNUdn7?4-#Z2Sj;MsfJgZ5gla z?ozT#_?3r3Qgg}~>`N+^EjI5o9>vFgVD8i*uOg>GhvtgOZ<=ugy}kV`ook2}X*w8S zXYAb@MwQRR=g{HEx!qG-iS_gQ%Vyrsh5Z*%Dwx;vQLmSAKE53})tf?dVN*Z_fe=l! z>0amLwz;>nz;A8P@8dICFpAy41zRa8#~m*o1QAsOQ!h+W#J87H{IdEbLnopnVTE+3 zq12MWboI5IWIra{IFWo|Cr;lWOZ>-e0^Ef&+pELH<{LmpP z4E+Pr`hGcGQ4(t#2v;MJwXvJ>Epk5Bs~&;7(+zf&b9;`i8*Jzwzg&2dDi-8BzCWHH zhpQ10z(HmMoayn#w{KfYbz{~|0+V8AucmZPR3rqgpXa@NhUuD@a5ElL)|yjE6EQ;F zV{t5X(3^Q{=M{>u{KyRg&@xeC-!lkD)YQ(WP}+Vo1bSI7YjS^dwk))-^1`>1ugjvn z@dEk#Y(=c+RsXEYPSp%ypz@}7mV!Pg`5x7hWl`Va}t-cH=4|RgY_#kTfk+#(m8aevGBi#ZvWCiTD-Y00ia(s$KljZsYyh z9VV2$jVfOsZ1?pvNgjYOfCAbPm*qZ#`4&XiePY>y|)Uztl*L;g(7rsNGoE_yG-+D!8p%aM#0fV@0O9^jX-6)P0bv6ht66dYG>jG@trS6?ZhQx@xa0~i$voznZ zQI_l?>@s?wy?0#2DH_m7(qtxwCf_!w1G+pCE@wn+QzkDfOL}%v@B3nm?b5#^1^0;7 z9YB#G~l|GE}S{ zDqxrI;Oo)3{n+Hu{b%>+o@KEr<*&&^$8KT?T zzeY^ofrD;obY!I;s3zR=oF3Ji&pK|Le-m@goPpv2o}tFx2wG4s?r~AQE|K9dVS_H5 zO*C6ysWM$3`3*NaVj0`ImcOPkOf&*Q(}tDrFRFjNcVZ3%E~KWPz=eN<8U}UzMk}ES z{2(3HeihHbrx>XI@wD9o{_4U8^w=aotICmFP}kHumJ~PO{m>T|H#8iS61@CLH1!Rz3(AY~|Y3hXVxZ(etkA0lKI+?+N0z@+At zlB@b9M~i&d)`Ra(q?adSof;T*Ph7Ij6y3SX1GRU3j^cR{9#tud2ECYkdyNu?Vflis~JXJ2o1WisjfddA_rt=fjA$qsfv?xNiC-I;> zh}LIS5qSes57!>dI@3S;8dJe~{T6)Cq@nxdAW5hSv>kZ!U$U1d`sk_u$>`)K(64E~tu?O@xdexDexUCGxgDIr%@0NVb`4zD=+vJO%Iq@T zicwZ@3#tQ*X*{0GUH%mQ$+WljZU+)KEwCAv5CelGeNzrU6DALROavZ_WT=sjA`w#h zCOIX#nRu6L)acyS8)Y7*eY80AY*3ADm6Q%8nuy!^0)H|#x-<&Kc5sf%j8FJ-88UvQ z;qfH`_7YagI)cm5@>;X#as~KF8BfB0DN4~-?GB*Dd-y9B=F+9AV77#seBHo?4rEFC zg!j2bozLM)JY!Fj#-@qF!iyH(o)d~KKlvqmX}4DoYh;pctyl|I+f zoWibOufiF@`iHCcAB9XjM5{!%1tT_`@^pS*fXk)>bgmGi(t6g8Fg!#+eCt6R>1%6M z6;erhkeIn8+tYq}DEgH}6*TZS^~e8)8~?Ze{x8sKE9#EzobZ%#e0&$BRuP5aH0B?Y)nT))3Z=K*aS8^5 z`tB-RkqKBQhJ@|5-@q9yuI@fLq@oYAA%I+R)Uo>CeLk+YkcGTs0+L1m_IeEu?HqwS zPuL7+!*pKtmmgGEV(7%ZBf7k%qT?T=oh&F}tOHZd#p3E#U``>zNO|1|{`P_W^ zOD?}pFS+LoN4Y|xNc}&Y0`e62S{yap?Z}i}5Y3p&a|uM{7-zi3L#s%H`qkKW1T@b^e4Sj%3m7?y7dBwQBMYD|)aHCb5`2wYN$&^wksFwTX zmCovso6EXkEHBO!piYV2YANK-46^}=Z@TQMA$Yz~S|zJFj>+J01&OwOhqN0Yb~rB# zB%oCPjPp5HqkWgdoPjaKhM@U)j|LR^9|EWO5PNSepuEI_-NqU znaZVTZbRLCKpsT2;Zn+Kkljf~XMJp_s94YunXE9`@pQ6J@398p%2F8Go{wVXtR^*< z_fIzJWjX)EQWSMb-y9bOlM4#!EcoaP`kwc8u9 z!~a7T`^A4FJ+mcuZ`vi&c_EVVpFhzzjnf$P1te0=#eGjd6k;TVT$L-kOegObvgmhH z02h$D1S?UM*jk<7w(RUQ@WQmq!H1&?3&y_1gqZ)`29c_sYmRi5(lENQx>Om}W%}zX zsa8!aCx~-Bs;i^&=s@x2Vwh9!0SxViV#j!`uIf93?^?6#ap{O(9u#+x;r)Z%wu-5| z(i=JZad9#_BJlne*sQX|Y{ijY62$g>*1;1Io1U&+Zm5EUy}eB|f8kh)cx|O^>E^`m zPdNvgqSJygydebW5-C(U}L@_C}Hg9@Sb1I?(eiU(Zb17CX&7k$C&( zGScU!qd_(3jm^VEsO>D2b^D!^Z9jJk3m9SAsK+m@#ym zZ)bG0dT4s&p$`he8g?SfXYA!(t7Nv|-s|503dT&FJ? z*<|^GjGlXE0X1&9$&A?()f1S@I`?-lnbAfXDPTCncDfFR(S@ZnOHwF;llN2F6Y%ut zCqc>N5xpv$)K}FwHF=HKeYf8&qeDa0G;4s$DQh`6Zf$MFfLg++FyCmI?AI;D>5NX! za8ExsH@iNDEctKVo?G=D*n^-T=0C%b%c-Z!^y0+^aZu;_sxst0ayB*1WtA}(qnsfz z)KgG8bGnCJAQcySItkXY896Pgu5+y$!ki82f{TNzz~t8H&!H%iEH`}Y57)7p z=Z~jdxQNY(=%;nNem>cA`;?hEymLaj5MPpGSPxeqx#s~nJ6ni?EUm6s+1n^coOcAu z)XXmITo`?V;yxY396 zW@`|q)qe<@4dgd6v&TJ|4ry;rTqhvI5 z;@mNs07j|z=lkd3U+!G})8|Pb`#Civk5p;-p^1JMCI((NFjF)2K?jRpcayWeXs)lX zFZIWBeUxZ+bDBknQoBxG!RMx^xYpI(!n;TKrJ(_?_t+tNp}!eOThnQ`w#pp8fPF#> z8WGDZ&`fWuUEF^X6d%N3a%qSG?&w6V8g?V4J-_M*l?tPo;R} zlkRYH@Bb3NwuZ;{dv-B^jJbA8)H31 z8{_G@0Ui~hVx#i0xc*_f`^(wo63*G_75wU&&cm*qbD!%2IgLT~y6^iI9_&!w;TT;| zXltnd+Ucl50K3T*AI^N8Zyb+x%Gx;H&-U0!4b%3geh}xekNNK}{ua?8Gx`@)^}?SZ zrb?*R_K0t9S8|qCRNnhhFl+gla7@L95TTNM#rRx*j5VK;xHtFFfk%zxxr{_T*_U>; zSw#KQV9xuJ#x)uH26NekpZy~ngh?bv?x9bkYoT*nH`E|ahWQ-PK zvZeFzkfk;T>N+dRlfk;MzW!3L(xy38ByBG0Dwxs!O+A;_J@w-_3-)&CLAkOPY!bA(7edoYxb2J)*>Eyg`lNM#3Qt{d(3G~RSyQ;CV zXXs}v5bDY2J&{mc?#Ip44|^~M#YaXyvGG&gpV-ZTb$4m7cy4IOj`D3b1|`E6HCt_w z$trW&VwLp@$tf?LeGOY;?Yfg+Y?$|Nx%bw*fx7>C_w7!K5ljBClmFxTzBb$1h%Pl= z$UL3#G^;IUMv^t`QWlS(&ePlWM8NpH2EI&R#Qzw$RlYH%pKoxFpgXDa#ff)mYJ2F= zj#=}ZAAY_Gz|Jb3Rk28Shzm3NResa#{3W8;Q{zikk5cbWSX0TCvrpO*wZsds$3!`<$)GuCkVk1^4^Xk-NU&otaW7YENujq+wg?N9H)flWHI)1xJAcsEdzkv?!wI` z5mk+Q=E6=pXclXnlKP!6?xjwNNz=WxU|ItL2w8Mu9P`~IhjRkcD2ndPp$K9zY7LGP0fe3)2BB<;HsL-^}eUE6TT>y zV!Jegd|JpoC9^85&HV6frI@9D%W41R1Kh-LvE&}2cn;P$FAOq+4YzD@_~Kl8)>m|8 z@>73YM{)>9A65x>MtUrg&b5U1XXBd&TzY8)S#-K4aWEp8<^-D?JI{?1{1wEjrBZ6r z*;>*@SwgtMt%>2#45~Tv)i=C97!Gl+t+_C6*S){d|BRd5yAf=+#FY>4awhXk|Kx9+ z$?R-L)NafU;lkN&=ORPiddn;mFyNw}XQ0p46uEtME)$o`Q*sUZBr$k? zd|V2Q_yHNv$JXAf?*aybm?L{wWP)(ql1G$8%mBaCBzNjFJF?)f)a#G_bv|3OU8{pc zuO|1}-zO+(63DiyX}NX)ghF^nh%L*uep<*s9k4YJx=mnW>;I>1%ScdFzSZ`vOa8~p z+HD^@7+F(yNc_Gy7bW~taC!OsmZ3p}r-iAD z|F4OhQZ+4JpGJCiOZz_lq_%L4DwgGF+N#K&{1Rp;KUUiMHMW1{F5%|Ht6R_4NuG1v zP~jG|s=6FX_8qV#{85H8L38ZaZuw|ima;-EawGEV7c+e`V4sB|Woa&XQ$C=gvF$$Lpb*k*85&J@lQ}yVWg^uf>xYsvg$5 zx{UWCLK%_Rz-Eh)VT;k*O~lRN3OBg9IA~j|t?f|zJ=8uFXvP;?d#a;K4<_WE0$-g*HDw<4kH4!u`f;$wzt#|!0 zgCMiA?07(R92j-uuyQpIk$kk*YO?C|o@sN4ZfJh_Wl%UNgF<2=zJ^%_D!=D1=G0)s zx z8Z@}x>&zoASNT-1;Yq{CE3YhNpQ0?e#|IJT*h2NKUbAdU<)v=EovYnTKVoxzNA|OM zyOlhREVy2=QtnGXfPK84#VeYbHzr$t%*{0qSYgqHE7~-Vr>yMF;~$!uNaO38R9W_~ zFHvSio3fPhP#();vN*De=wmI8sBxRac(KXMGOx4RP?cqP_0AQRrLyBKp*9XiJYhyu zcLSWINGVj<0>)F8JUf?uJkLW6{>A!1Ath))%M4dZ^0PZJ*jU!C2&wieq9W3tiyb60 z%@HtaFTmo+y)Y5ETB0fC&2(St4!oU2*2-yKw!%@?54=|8{a(V!G9JhGT8f|UG>>ig zQiGo(%gXTj78{Bc(SxS@Y_CjO-f%!1>+ZuXp4|J+*2G)j2 zUtb`dZ05_cFSz}Tw{gIwg9j0iN`a5fo`8E~&FOdZ0>!f`IiWXr=kKtZx)WEJ(_GmZ z>Y64}rM8#>u+10_0KllD&!$}l$GMvus5sB{D3&qa>T{bgoyV|VwKswifGIAC)czJdk1xM zsMIW-?~|GkLb5_$a7alq9qVzNE#2EfCo;2DzU%&}_)I-Lr`+bTwe-7|#4C~mnarO* zQ!Iv37hTb0>llU@`SQdCXFTg)eNw;tPML|9nB+s)x(ijY-cho8I` z5ENqEGWndX?FJ5u?m8=@HUt-k&?vGFmIsoD|G!U{+yyBK1#$ylII#Hg3 + + +
param
param
linear
linear
  x  
  x  
clamp
clamp
add
add
output
output
Text is not SVG - cannot display
\ No newline at end of file diff --git a/docs/how-to-guides/get-started.md b/docs/how-to-guides/get-started.md new file mode 100644 index 00000000..48de3fe1 --- /dev/null +++ b/docs/how-to-guides/get-started.md @@ -0,0 +1,27 @@ +# Get started + +## Install + +To install Kernl library, you just have to pip install it: + +``` { .bash } +python3 -m pip install install 'git+https://github.com/ELS-RD/kernl' --extra-index-url https://download.pytorch.org/whl/nightly/cu117 +``` + +## Optimize a model + +Then, in your progam, you have to import the optimization function and apply it to your model: + +``` { .py } +from transformers import AutoModel +from kernl.model_optimization import optimize_model + +model = AutoModel.from_pretrained(model_name).eval().cuda() +optimize_model(model) +``` + +That's it, you have your model with Kernl's optimizations ! + +Beware, Kernl works only on Ampere GPU and with python `3.9.*` for now. + +Look at the [repository README](https://github.com/ELS-RD/kernl#readme) for more informations. diff --git a/docs/how-to-guides/support-new-model.md b/docs/how-to-guides/support-new-model.md new file mode 100644 index 00000000..c2985a83 --- /dev/null +++ b/docs/how-to-guides/support-new-model.md @@ -0,0 +1,689 @@ +# How to support a new model + +## How does Kernl optimize a model + +### Overview + +To optimize a model, Kernl uses [TorchDynamo](https://github.com/pytorch/torchdynamo) JIT compiler and provides a custom backend where we replace part of the [Torch FX](https://pytorch.org/docs/1.13/fx.html) graph with optimized kernels. + +The custom backend is defined in [src/kernl/model_optimization.py](https://github.com/ELS-RD/kernl/blob/v0.1.0/src/kernl/model_optimization.py#L44) + +``` { .py } +def compiler(gm: torch.fx.GraphModule, example_inputs: List[torch.Tensor]): + dynamo_backend_ofi(gm) + return cuda_graphs_wrapper(gm, example_inputs, pool=pool) +``` +This backend combines two steps: + +- First one is to apply the graph replacements +- Second one is to use [CUDA graphs](https://pytorch.org/blog/accelerating-pytorch-with-cuda-graphs/) + +The second step eliminates most of the CPU overhead but we won't elaborate on this and focus on the first one that does the graph replacements. + + +### Inspecting the FX Graph + +First, a few words about [Torch FX](https://pytorch.org/docs/1.13/fx.html). +Torch FX is a torch module to module transformation toolkit. It can trace a torch module (or a function) execution. All the operations are then recorded into a graph of nodes. From this graph, Torch FX generates python code matching the graph's semantics. Both graph and python code are accessible from the Torch FX GraphModule, which is also a torch module instance. Torch FX allows us to play with FX graphs but stay at the torch module level. + +
+ ![Torch FX](torchfx.drawio.svg){ lazyload=true } +
Torch FX
+
+ +With a FX GraphModule, we can inspect both the FX Graph and the generated code, with `.graph` and `.code` respectively. For better readability, one may want to use `#!python graph_report(gm)` that print the FX Graph in a tabular way (similarly to [Torch FX `print_tabular` method](https://pytorch.org/docs/1.13/fx.html#torch.fx.Graph.print_tabular))[^1]. + +[^1]: Aditionnaly, we can enable [TorchDynamo](https://github.com/pytorch/torchdynamo)'s tracing with `#!python torch._dynamo.config.log_level = logging.DEBUG` to display the compiled graph. Enabling `#!python torch._dynamo.config.output_graph_code` displays the graph's code instead. See [TorchDynamo's configuration](https://github.com/pytorch/pytorch/blob/ebeecbf833dfbeba07bd5d88e2bb24f63240bfa4/torch/_dynamo/config.py) for details. + +`#!python graph_report(gm)` lists all the operations during the execution. Each line corresponds to a node from the FX Graph with the given information: + +- `opcode` is the kind of operation +- `name` is the node name, usually the operation name +- `target` is the operation applied in this node +- `args` and `kwargs` are the arguments given to the operation + +For more precise information on the kind of nodes and their semantics [see the Torch FX Node documentation](https://pytorch.org/docs/1.13/fx.html#torch.fx.Node). + +For example, if we trace this following torch module into a FX GraphModule: + +``` { .py } +class MyModule(torch.nn.Module): + def __init__(self): + super().__init__() + self.param = torch.nn.Parameter(torch.rand(3, 4)) + self.linear = torch.nn.Linear(4, 5) + + def forward(self, x): + return self.linear(x + self.param).clamp(min=0.0, max=1.0) + + +m = MyModule() +gm = torch.fx.symbolic_trace(m) +``` + +We can print the graph with the `graph_report(gm)` function: + +``` +opcode name target args kwargs +------------- ------ ----------------------- ---------- ------------------------ +placeholder x x () {} +get_attr param param () {} +call_function add (x, param) {} +call_module linear linear (add,) {} +call_method clamp clamp (linear,) {'min': 0.0, 'max': 1.0} +output output output (clamp,) {} +---------- +Used modules +---------- +name target_type +------ ------------------------------------------------ +linear Linear(in_features=4, out_features=5, bias=True) +``` + +We can see here every operation listed in computation order, from getting the forward function parameter to returning the output. One more useful thing that `graph_report(gm)` does is to print the list of torch modules used in the graph, as in the node list we only have the torch module names and not the actual torch module types. + +The generated code from this FX Graph is the following: + +``` { .py } +def forward(self, x): + param = self.param + add = x + param; x = param = None + linear = self.linear(add); add = None + clamp = linear.clamp(min = 0.0, max = 1.0); linear = None + return clamp +``` + +More visually, we can draw the FX Graph to better see the computation. In this representation, edges represent the link between a node and the nodes in its arguments (we're discarding `args` and `kwargs` that are note previously defined nodes): + +
+ ![FX Graph](fx_graph.drawio.svg){ lazyload=true } +
FX Graph
+
+ +### Replace part of the FX Graph + +An FX Graph [can be modified directly](https://pytorch.org/docs/1.13/fx.html#direct-graph-manipulation) but we'll rather use subgraph rewriting. + +To rewrite the Torch FX Graph, Kernl uses the `#!python repace_pattern()` function defined in [src/kernl/utils/extended_matcher.py](https://github.com/ELS-RD/kernl/blob/v0.1.0/src/kernl/utils/extended_matcher.py#L337). It's the same function `#!python repace_pattern()` of [Torch FX](https://pytorch.org/docs/1.13/fx.html#subgraph-rewriting-with-replace-pattern) but with some bugfixes (that should be integrated in PyTorch in the future). + +``` { .py } +def replace_pattern(gm: GraphModule, pattern: Callable, replacement: Callable) -> List[Match]: +``` + +The function takes a graph `gm` and two callables `pattern` and `replacement` that can be either a torch module or a function. It'll convert `pattern` and `replacement` to an FX Graph and try to replace subgraphs from `gm` matching `pattern` with `replacement`. + +For example, given this 2-layers perceptron model, we'd like to replace the first layer activation from `tanh` to `reLU`. + +``` { .py } +class FeedForward(torch.nn.Module): + + def __init__(self, input_size, hidden_size): + super(FeedForward, self).__init__() + self.fc1 = torch.nn.Linear(input_size, hidden_size) + self.tanh = torch.nn.Tanh() + self.fc2 = torch.nn.Linear(hidden_size, 1) + self.sigmoid = torch.nn.Sigmoid() + + def forward(self, x): + hidden = self.fc1(x) + tanh = self.tanh(hidden) + output = self.fc2(tanh) + output = self.sigmoid(output) + return output + +m = Feedforward(5, 10) +``` + +By tracing this module, we can print the FX Graph: + +``` { hl_lines="4 5" } +opcode name target args kwargs +----------- ------- -------- ---------- -------- +placeholder x x () {} +call_module fc1 fc1 (x,) {} +call_module tanh tanh (fc1,) {} +call_module fc2 fc2 (tanh,) {} +call_module sigmoid sigmoid (fc2,) {} +output output output (sigmoid,) {} +---------- +Used modules +---------- +name target_type +------- ------------------------------------------------- +fc1 Linear(in_features=5, out_features=10, bias=True) +tanh Tanh() +fc2 Linear(in_features=10, out_features=1, bias=True) +sigmoid Sigmoid() +``` +We see that the graph is a straightforward sequence of operations. To replace the first layer and its activation function, we just need to match the highlighted subgraph. To achieve that we create a simple torch module with a linear module and the tanh activation function. + +``` { .py } +class Pattern(torch.nn.Module): + def __init__(self): + super().__init__() + self.linear = torch.nn.Linear(1, 1) + self.activation = torch.nn.Tanh() + + def forward(self, v): + return self.activation(self.linear(v)) +``` + +The corresponding FX Graph is the following: + +``` +opcode name target args kwargs +----------- ---------- ---------- ------------- -------- +placeholder v v () {} +call_module linear linear (v,) {} +call_module activation activation (linear,) {} +output output output (activation,) {} +---------- +Used modules +---------- +name target_type +---------- ------------------------------------------------ +linear Linear(in_features=1, out_features=1, bias=True) +activation Tanh() +``` + +We don't need the node names to be the same as the ones in the graph we want to match, what is important is that we match the same node pattern. In our example, the node names differ (`fc1` and `tanh` in the graph, `linear` and `activation` in the pattern subgraph), but the modules called are identical (`Linear` and `Tanh`). + +We have our pattern subgraph, we may now write our replacement subgraph with the ReLU activation function and display its FX Graph. + +``` { .py } +class Replacement(torch.nn.Module): + def __init__(self): + super().__init__() + self.linear = torch.nn.Linear(1, 1) + self.relu = torch.nn.ReLU() + + def forward(self, v): + return self.relu(self.linear(v)) +``` + +``` +opcode name target args kwargs +----------- ------ -------- --------- -------- +placeholder v v () {} +call_module linear linear (v,) {} +call_module relu relu (linear,) {} +output output output (relu,) {} +---------- +Used modules +---------- +name target_type +------ ------------------------------------------------ +linear Linear(in_features=1, out_features=1, bias=True) +relu ReLU() +``` + +Unlike the matching pattern, we must be a bit cautious of the node names in the replacement pattern. If we want to reuse the nodes matched in the graph, we must use the same node names as in the pattern. Otherwise, it'll create a new node in the graph. In our example, the `linear` and the `v` node are kept from the node matched in the original graph but the `relu` node is added to the graph. + +Finally, we can apply the replacement and look at the resulting FX Graph: + +``` { .py } +replace_pattern(gm, Pattern(), Replacement()) +``` + +``` +opcode name target args kwargs +----------- ------- -------- ---------- -------- +placeholder x x () {} +call_module linear fc1 (x,) {} +call_module relu relu (linear,) {} +call_module fc2 fc2 (relu,) {} +call_module sigmoid sigmoid (fc2,) {} +output output output (sigmoid,) {} +---------- +Used modules +---------- +name target_type +------- ------------------------------------------------- +fc1 Linear(in_features=5, out_features=10, bias=True) +relu ReLU() +fc2 Linear(in_features=10, out_features=1, bias=True) +sigmoid Sigmoid() +``` + +The resulting graph has switched from `tanh` activation to `reLU`, the `fc1` node has been kept untouched. + +When we don't need to match a call to a torch submodule, it's easier to write pattern and a replacement as functions, as we'll see in [our example with BERT attention]. + + [our example with BERT attention]: #example-replacing-bert-attention + +There are some limitations with subgraph rewriting. When we use a function not covered by Torch FX, we'll have to use [Torch wrap function](https://pytorch.org/docs/1.13/fx.html#non-torch-functions) in order to appear in the FX Graph but not to be traced. + +``` { .py } +torch.fx.wrap(fn) +``` + +## Example: replacing BERT Attention + +In this example, we'll see how to replace the attention part of a BERT model with Kernl's optimized attention kernel. + +### Understanding Attention + +First, we need to look how attention works, the [original paper](https://arxiv.org/abs/1706.03762) "Attention Is All You Need" is a good starting point. More specifically, we'll focus on the Attention part where the attention function is defined: + +!!! quote "Attention Is All You Need" + An attention function can be described as mapping a query and a set of key-value pairs to an output, + where the query, keys, values, and output are all vectors. The output is computed as a weighted sum + of the values, where the weight assigned to each value is computed by a compatibility function of the + query with the corresponding key. + + (...) + + We call our particular attention "Scaled Dot-Product Attention". The input consists of + queries and keys of dimension $d_k$, and values of dimension $d_v$. We compute the dot products of the + query with all keys, divide each by $\sqrt{d_k}$, and apply a softmax function to obtain the weights on the + values. + In practice, we compute the attention function on a set of queries simultaneously, packed together + into a matrix $Q$. The keys and values are also packed together into matrices $K$ and $V$. We compute + the matrix of outputs as: + + $$ + \operatorname{Attention}(Q,K,V)=\operatorname{softmax}(\frac{QK^T}{\sqrt{d_k}})V + $$ + +This function can be represented as a computation graph where the attention mask is added in the process: + +
+ ![Scaled Dot-Product Attention](attention_original.png){ width="150"; lazyload=true } +
Scaled Dot-Product Attention
+
+ +This graph representation will be useful as it is this graph we'll try to replace to optimize a BERT model. + +### Find the Attention graph pattern + +For our example, we'll replace the attention part from the "bert-base-uncased" pre-trained model from [Hugging Face Transformers](https://huggingface.co/transformers). If we look at the [BERT implementation](https://github.com/huggingface/transformers/blob/v4.24.0/src/transformers/models/bert/modeling_bert.py#L243), we find the attention function as a torch module: + +=== "Code Excerpt" + + ```{ .py .annotate } + class BertSelfAttention(nn.Module): + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.FloatTensor] = None, + head_mask: Optional[torch.FloatTensor] = None, + encoder_hidden_states: Optional[torch.FloatTensor] = None, + encoder_attention_mask: Optional[torch.FloatTensor] = None, + past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, + output_attentions: Optional[bool] = False, + ) -> Tuple[torch.Tensor]: + ... + # Take the dot product between "query" and "key" to get the raw attention scores. # (1) + attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2)) + ... + attention_scores = attention_scores / math.sqrt(self.attention_head_size) # (2) + if attention_mask is not None: + # Apply the attention mask is (precomputed for all layers in BertModel forward() function) + attention_scores = attention_scores + attention_mask + + # Normalize the attention scores to probabilities. # (3) + attention_probs = nn.functional.softmax(attention_scores, dim=-1) + ... + context_layer = torch.matmul(attention_probs, value_layer) # (4) + ... + ``` + + 1. $QK^T$ + 2. $\frac{QK^T}{\sqrt{d_k}}$ + 3. $\operatorname{softmax}(\frac{QK^T}{\sqrt{d_k}})$ + 4. $\operatorname{softmax}(\frac{QK^T}{\sqrt{d_k}})V$ + +=== "Full Code" + + ```{ .py .annotate hl_lines="49 50 68 69 70 71 72 73 74 84" } + class BertSelfAttention(nn.Module): + def forward( + self, + hidden_states: torch.Tensor, + attention_mask: Optional[torch.FloatTensor] = None, + head_mask: Optional[torch.FloatTensor] = None, + encoder_hidden_states: Optional[torch.FloatTensor] = None, + encoder_attention_mask: Optional[torch.FloatTensor] = None, + past_key_value: Optional[Tuple[Tuple[torch.FloatTensor]]] = None, + output_attentions: Optional[bool] = False, + ) -> Tuple[torch.Tensor]: + mixed_query_layer = self.query(hidden_states) + + # If this is instantiated as a cross-attention module, the keys + # and values come from an encoder; the attention mask needs to be + # such that the encoder's padding tokens are not attended to. + is_cross_attention = encoder_hidden_states is not None + + if is_cross_attention and past_key_value is not None: + # reuse k,v, cross_attentions + key_layer = past_key_value[0] + value_layer = past_key_value[1] + attention_mask = encoder_attention_mask + elif is_cross_attention: + key_layer = self.transpose_for_scores(self.key(encoder_hidden_states)) + value_layer = self.transpose_for_scores(self.value(encoder_hidden_states)) + attention_mask = encoder_attention_mask + elif past_key_value is not None: + key_layer = self.transpose_for_scores(self.key(hidden_states)) + value_layer = self.transpose_for_scores(self.value(hidden_states)) + key_layer = torch.cat([past_key_value[0], key_layer], dim=2) + value_layer = torch.cat([past_key_value[1], value_layer], dim=2) + else: + key_layer = self.transpose_for_scores(self.key(hidden_states)) + value_layer = self.transpose_for_scores(self.value(hidden_states)) + + query_layer = self.transpose_for_scores(mixed_query_layer) + + if self.is_decoder: + # if cross_attention save Tuple(torch.Tensor, torch.Tensor) of all cross attention key/value_states. + # Further calls to cross_attention layer can then reuse all cross-attention + # key/value_states (first "if" case) + # if uni-directional self-attention (decoder) save Tuple(torch.Tensor, torch.Tensor) of + # all previous decoder key/value_states. Further calls to uni-directional self-attention + # can concat previous decoder key/value_states to current projected key/value_states (third "elif" case) + # if encoder bi-directional self-attention `past_key_value` is always `None` + past_key_value = (key_layer, value_layer) + + # Take the dot product between "query" and "key" to get the raw attention scores. # (1) + attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2)) + + if self.position_embedding_type == "relative_key" or self.position_embedding_type == "relative_key_query": + seq_length = hidden_states.size()[1] + position_ids_l = torch.arange(seq_length, dtype=torch.long, device=hidden_states.device).view(-1, 1) + position_ids_r = torch.arange(seq_length, dtype=torch.long, device=hidden_states.device).view(1, -1) + distance = position_ids_l - position_ids_r + positional_embedding = self.distance_embedding(distance + self.max_position_embeddings - 1) + positional_embedding = positional_embedding.to(dtype=query_layer.dtype) # fp16 compatibility + + if self.position_embedding_type == "relative_key": + relative_position_scores = torch.einsum("bhld,lrd->bhlr", query_layer, positional_embedding) + attention_scores = attention_scores + relative_position_scores + elif self.position_embedding_type == "relative_key_query": + relative_position_scores_query = torch.einsum("bhld,lrd->bhlr", query_layer, positional_embedding) + relative_position_scores_key = torch.einsum("bhrd,lrd->bhlr", key_layer, positional_embedding) + attention_scores = attention_scores + relative_position_scores_query + relative_position_scores_key + + attention_scores = attention_scores / math.sqrt(self.attention_head_size) # (2) + if attention_mask is not None: + # Apply the attention mask is (precomputed for all layers in BertModel forward() function) + attention_scores = attention_scores + attention_mask + + # Normalize the attention scores to probabilities. # (3) + attention_probs = nn.functional.softmax(attention_scores, dim=-1) + + # This is actually dropping out entire tokens to attend to, which might + # seem a bit unusual, but is taken from the original Transformer paper. + attention_probs = self.dropout(attention_probs) + + # Mask heads if we want to + if head_mask is not None: + attention_probs = attention_probs * head_mask + + context_layer = torch.matmul(attention_probs, value_layer) # (4) + + context_layer = context_layer.permute(0, 2, 1, 3).contiguous() + new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,) + context_layer = context_layer.view(new_context_layer_shape) + + outputs = (context_layer, attention_probs) if output_attentions else (context_layer,) + + if self.is_decoder: + outputs = outputs + (past_key_value,) + return outputs + ``` + + 1. $QK^T$ + 2. $\frac{QK^T}{\sqrt{d_k}}$ + 3. $\operatorname{softmax}(\frac{QK^T}{\sqrt{d_k}})$ + 4. $\operatorname{softmax}(\frac{QK^T}{\sqrt{d_k}})V$ + +We see that the Hugging Face implementation is close to the definition from the paper, we want to find the attention pattern in this model. + +To begin, we'll write a short script running the model with a dummy input: + +``` { .py } +import torch +from transformers import AutoModel + +from kernl.model_optimization import optimize_model + + +model = AutoModel.from_pretrained(pretrained_model_name_or_path="bert-base-uncased").eval().cuda() + +optimize_model(model) +shape = (1, 128) + +with torch.inference_mode(), torch.cuda.amp.autocast(enabled=True, dtype=torch.float16, cache_enabled=True): + inputs = { + "input_ids": torch.randint(2, 10000, shape, device="cuda", dtype=torch.long), + "attention_mask": torch.ones(shape, device="cuda", dtype=torch.long), + } + output = model(**inputs) +``` + +If we run `#!python graph_report(gm)` and `#!python gm.code` in the `#!python dynamo_backend_ofi(gm)` function in the Kernl library, we can print the FX Graph and the python code from the model computation. For our example, we'll keep the `normalize_operators` and the `remove_dropout` functions as it simplifies the model's graph a bit. + +``` { .py } +def dynamo_backend_ofi(gm: torch.fx.GraphModule, assume_causal=False): + normalize_operators(gm) + remove_dropout(gm) + print(graph_report(gm)) + print(gm.code) + return gm +``` + +Below is the resulting output, we only show the beginning of the graph until the first attention layer. + +???+ example "Part of the FX Graph of BERT model" + + === "`#!python graph_report(gm)`" + + ``` + opcode name target args kwargs + ------------- ------------------------------------------------- --------------------------------------------------------- --------------------------------------------------------------------------------------- ------------------------ + placeholder input_ids input_ids () {} + placeholder attention_mask attention_mask () {} + get_attr self_embeddings_token_type_ids self_embeddings_token_type_ids () {} + call_function getitem (self_embeddings_token_type_ids, (slice(None, None, None), slice(None, 128, None))) {} + call_method expand expand (getitem, 1, 128) {} + call_function getitem_1 (attention_mask, (slice(None, None, None), None, None, slice(None, None, None))) {} + call_method to to (getitem_1,) {'dtype': torch.float32} + call_function sub (1.0, to) {} + call_function mul (sub, -3.4028234663852886e+38) {} + get_attr self_embeddings_position_ids self_embeddings_position_ids () {} + call_function getitem_2 (self_embeddings_position_ids, (slice(None, None, None), slice(0, 128, None))) {} + call_module self_embeddings_word_embeddings self_embeddings_word_embeddings (input_ids,) {} + call_module self_embeddings_token_type_embeddings self_embeddings_token_type_embeddings (expand,) {} + call_function add (self_embeddings_word_embeddings, self_embeddings_token_type_embeddings) {} + call_module self_embeddings_position_embeddings self_embeddings_position_embeddings (getitem_2,) {} + call_function add_37 (add, self_embeddings_position_embeddings) {} + call_module self_embeddings_layer_norm self_embeddings_LayerNorm (add_37,) {} + call_module self_encoder_layer_0_attention_self_query self_encoder_layer_0_attention_self_query (self_embeddings_layer_norm,) {} + call_module self_encoder_layer_0_attention_self_key self_encoder_layer_0_attention_self_key (self_embeddings_layer_norm,) {} + call_method view view (self_encoder_layer_0_attention_self_key, (1, 128, 12, 64)) {} + call_method permute permute (view, 0, 2, 1, 3) {} + call_module self_encoder_layer_0_attention_self_value self_encoder_layer_0_attention_self_value (self_embeddings_layer_norm,) {} + call_method view_1 view (self_encoder_layer_0_attention_self_value, (1, 128, 12, 64)) {} + call_method permute_1 permute (view_1, 0, 2, 1, 3) {} + call_method view_2 view (self_encoder_layer_0_attention_self_query, (1, 128, 12, 64)) {} + call_method permute_2 permute (view_2, 0, 2, 1, 3) {} + call_method transpose transpose (permute, -1, -2) {} + call_function matmul (permute_2, transpose) {} + call_function truediv (matmul, 8.0) {} + call_function add_1 (truediv, mul) {} + call_function softmax (add_1,) {'dim': -1} + call_function matmul_1 (softmax, permute_1) {} + call_method permute_3 permute (matmul_1, 0, 2, 1, 3) {} + call_method contiguous contiguous (permute_3,) {} + call_method view_3 view (contiguous, (1, 128, 768)) {} + call_module self_encoder_layer_0_attention_output_dense self_encoder_layer_0_attention_output_dense (view_3,) {} + ``` + + === "`#!python gm.code`" + + ```{ .py } + def forward(self, input_ids : torch.Tensor, attention_mask : torch.Tensor): + self_embeddings_token_type_ids = self.self_embeddings_token_type_ids + getitem = self_embeddings_token_type_ids[(slice(None, None, None), slice(None, 128, None))]; self_embeddings_token_type_ids = None + expand = getitem.expand(1, 128); getitem = None + getitem_1 = attention_mask[(slice(None, None, None), None, None, slice(None, None, None))]; attention_mask = None + to = getitem_1.to(dtype = torch.float32); getitem_1 = None + sub = 1.0 - to; to = None + mul = sub * -3.4028234663852886e+38; sub = None + self_embeddings_position_ids = self.self_embeddings_position_ids + getitem_2 = self_embeddings_position_ids[(slice(None, None, None), slice(0, 128, None))]; self_embeddings_position_ids = None + self_embeddings_word_embeddings = self.self_embeddings_word_embeddings(input_ids); input_ids = None + self_embeddings_token_type_embeddings = self.self_embeddings_token_type_embeddings(expand); expand = None + add = self_embeddings_word_embeddings + self_embeddings_token_type_embeddings; self_embeddings_word_embeddings = self_embeddings_token_type_embeddings = None + self_embeddings_position_embeddings = self.self_embeddings_position_embeddings(getitem_2); getitem_2 = None + add_37 = torch.add(add, self_embeddings_position_embeddings); add = self_embeddings_position_embeddings = None + self_embeddings_layer_norm = self.self_embeddings_LayerNorm(add_37); add_37 = None + self_encoder_layer_0_attention_self_query = self.self_encoder_layer_0_attention_self_query(self_embeddings_layer_norm) + self_encoder_layer_0_attention_self_key = self.self_encoder_layer_0_attention_self_key(self_embeddings_layer_norm) + view = self_encoder_layer_0_attention_self_key.view((1, 128, 12, 64)); self_encoder_layer_0_attention_self_key = None + permute = view.permute(0, 2, 1, 3); view = None + self_encoder_layer_0_attention_self_value = self.self_encoder_layer_0_attention_self_value(self_embeddings_layer_norm) + view_1 = self_encoder_layer_0_attention_self_value.view((1, 128, 12, 64)); self_encoder_layer_0_attention_self_value = None + permute_1 = view_1.permute(0, 2, 1, 3); view_1 = None + view_2 = self_encoder_layer_0_attention_self_query.view((1, 128, 12, 64)); self_encoder_layer_0_attention_self_query = None + permute_2 = view_2.permute(0, 2, 1, 3); view_2 = None + transpose = permute.transpose(-1, -2); permute = None + matmul = torch.matmul(permute_2, transpose); permute_2 = transpose = None + truediv = matmul / 8.0; matmul = None + add_1 = truediv + mul; truediv = None + softmax = torch.nn.functional.softmax(add_1, dim = -1); add_1 = None + matmul_1 = torch.matmul(softmax, permute_1); softmax = permute_1 = None + permute_3 = matmul_1.permute(0, 2, 1, 3); matmul_1 = None + contiguous = permute_3.contiguous(); permute_3 = None + view_3 = contiguous.view((1, 128, 768)); contiguous = None + self_encoder_layer_0_attention_output_dense = self.self_encoder_layer_0_attention_output_dense(view_3); view_3 = None + ``` + +If we draw the FX Graph, we can identify in yellow the attention part: + +
+ ![Attention in BERT FX Graph](attention.drawio.svg){ lazyload=true } +
Attention in BERT FX Graph
+
+ +Now, we look into the code which lines correspond to these nodes in the FX Graph. + +``` { .py } +transpose = permute.transpose(-1, -2); permute = None +matmul = torch.matmul(permute_2, transpose); permute_2 = transpose = None +truediv = matmul / 8.0; matmul = None +add_1 = truediv + mul; truediv = None +softmax = torch.nn.functional.softmax(add_1, dim = -1); add_1 = None +matmul_1 = torch.matmul(softmax, permute_1); softmax = permute_1 = None + +``` + +We now have our pattern to catch in the model, to make the pattern easier to read, we rename the following nodes: + +- `permute` → `k` +- `permute_1` → `v` +- `permute_2` → `q` +- `mul` → `attention_mask` + +and can write our pattern function: + +``` { .py } +def pattern(q, k, attention_mask, v): + transpose = k.transpose(-1, -2) + matmul = torch.matmul(q, transpose) + truediv = matmul / 8.0 + add_1 = truediv + attention_mask + softmax = torch.nn.functional.softmax(add_1, dim=-1) + matmul_1 = torch.matmul(softmax, v) + return matmul_1 +``` + +### Replace the Attention part + +We now need to add our replace function to call the optimized kernel. We can see in [kernl/model_optimization.py](https://github.com/ELS-RD/kernl/blob/v0.1.0/src/kernl/implementations/attention.py#L483) the optimized attention kernel needs in addition to `q`, `k`, `v` and `attention_mask`, the `output` and the `sm_scale` parameter. + +``` { .py } +def attention_forward( + q: torch.Tensor, + k: torch.Tensor, + v: torch.Tensor, + output: torch.Tensor, + sm_scale: float, + is_causal: bool = False, + attention_mask: Optional[torch.Tensor] = None, +): +``` + +The `output` parameter is simply the resulting tensor. We need to provide the tensor beforehand. + +The `sm_scale` parameter corresponds to the scale factor applied to the query-key compatibility function in the attention function. Defined as $\frac{1}{\sqrt{d_k}}$, it corresponds to the `true_div` node in the FX Graph. In this case `sm_scale` is $\frac{1}{8.0}$. + +We can now write our replacement part by calling the optimized kernel: + +``` { .py } +torch.fx.wrap("attention_forward") + +def replace(q, k, attention_mask, v): + output = torch.empty_like(q) + output = attention_forward(q, k, v, output, 1 / 8.0, is_causal=False, attention_mask=attention_mask) + return output +``` + +To wrap it up, we can define our replacement function: + +``` { .py } +import torch + +from kernl.implementations.attention import attention_forward +from kernl.utils.extended_matcher import replace_pattern + + +torch.fx.wrap("attention_forward") + +def replace_attention(gm: torch.fx.GraphModule): + + def pattern(q, k, attention_mask, v): + transpose = k.transpose(-1, -2) + matmul = torch.matmul(q, transpose) + truediv = matmul / 8.0 + add_1 = truediv + attention_mask + softmax = torch.nn.functional.softmax(add_1, dim=-1) + matmul_1 = torch.matmul(softmax, v) + return matmul_1 + + def replace(q, k, attention_mask, v): + output = torch.empty_like(q) + output = attention_forward(q, k, v, output, 1 / 8.0, is_causal=False, attention_mask=attention_mask) + return output + + replace_pattern(gm, pattern, replace) + +``` + +And use it in the TorchDynamo backend. + +``` { .py hl_lines="4"} +def dynamo_backend_ofi(gm: torch.fx.GraphModule, assume_causal=False): + normalize_operators(gm) + remove_dropout(gm) + replace_attention(gm) + print(graph_report(gm)) + print(gm.code) + return gm +``` + +If we print again the FX Graph after the graph replacement, we see that's all the previous nodes from the attention part are now replaced by the call to the optimized kernel. + +=== "Attention in BERT FX Graph" + +
+ ![Attention in BERT FX Graph](attention.drawio.svg){ lazyload=true } +
Attention in BERT FX Graph
+
+ +=== "Attention replaced by an optimized kernel in BERT FX Graph" + +
+ ![Attention replaced by a kernel in BERT FX Graph](attention_fused.drawio.svg){ lazyload=true } +
Attention replaced by a kernel in BERT FX Graph
+
diff --git a/docs/how-to-guides/torchfx.drawio.svg b/docs/how-to-guides/torchfx.drawio.svg new file mode 100644 index 00000000..a39eb717 --- /dev/null +++ b/docs/how-to-guides/torchfx.drawio.svg @@ -0,0 +1,4 @@ + + + +
Torch Module
Torch Module
Torch FX GraphModule
Torch FX GraphModule
Torch FX Graph
Torch FX Graph
Python code
Python code
Torch Module
Torch Module
Share same
semantic
Share same...
Trace
Trace
Text is not SVG - cannot display
\ No newline at end of file diff --git a/docs/overrides/home.html b/docs/overrides/home.html index 5dd32991..9de8f61b 100644 --- a/docs/overrides/home.html +++ b/docs/overrides/home.html @@ -36,7 +36,7 @@

Kernl is an open source projectoptimizes and accelerates your PyTorch model