From 0c9c014455d3acbc947159557fe446100649d98e Mon Sep 17 00:00:00 2001 From: Daniel Date: Tue, 19 Mar 2024 20:28:43 +0800 Subject: [PATCH] Update TensorRT-LLM Docs --- docs/docs/guides/providers/image.png | Bin 27275 -> 0 bytes docs/docs/guides/providers/tensorrt-llm.md | 210 ++++++++++++++++----- docs/docs/integrations/tensorrt.md | 8 - docs/docusaurus.config.js | 4 + 4 files changed, 170 insertions(+), 52 deletions(-) delete mode 100644 docs/docs/guides/providers/image.png delete mode 100644 docs/docs/integrations/tensorrt.md diff --git a/docs/docs/guides/providers/image.png b/docs/docs/guides/providers/image.png deleted file mode 100644 index 5f1f7104eb85703538d73fff97bdeee0019e4d33..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 27275 zcmc$`XH-*N*EUL1P^2g#(vc!aFG{Z>y-F_zP(*r_0HF%pqEe+xM`_ZF^eQSXp-2fu z2ti1IP$eNkLV&ZmpZER7c)#Z_(oE+eS=~4X2KYebs$rx-M%I`@OL}?<_iZT<+y z2*t?9#?V7wZ&N6itbF6)TP3dWt@4Njt6N_er%ec8=1}E}c?DfrqNR0ESC1ywM&C(U zhyRQZl)I0x?=wHBS7qk3&CJQPWVSs$c^IhuLR z$+(V-$}T)lf41CB7w?=bl&zxCumrS3-xE>p5K!YYE#i|jemxJCMgh8F8P`#v z7uUMS3m=F?7<(yr%ZgQeFPTPV)`wd*V#@Q%{bMFs&PW@pWznxJfQz=uY$?gNL}ne7 zE>%xo=7mf1axOn3{t|P{8_X63*>(TbSH#2<`zLGdZNx&BoFh3h1TXkG>o;Z5fT(S@<#yA{3`o@&j$DaiCepU<7_@?Nhr5 zGOy^OZ8Gjs#db}XaVT2qF)+l`BlcVQ<2fo7D;;JMeO`XTdTFKaTSUv>}2FkP$~+NTYp7&qiU`)J9JLDpjZ4<J-kujSc z?T;-Lcg%Z*cYY68>Y_`zB|ptj72;;ew5LxtC`hl~-n+#zbd1yQD>(*l-AA+x1*I2& zXKe=>hq&xTL7mOVgo&uaIG}$Yp>yITLGn_qG=BH{(=R4+6%LOK+7=^E4m;f@zhJ3fkvF^6i60ZQZ7Q7y&w<{c z9Ytz1QJ2K(;9x4;N0;1vXT@b|)#?*Yv7$-hJLR6z*n`c!JQJ8}xK5;uKm?5H-!OVDy=75MM}YY zKbwKTv2O$%dP|M;n_>EX+fsmm-4QUQ-PT3%p1m&k+#xbLIH)Shbj*1o2LxOYcTgFd-4YU$a2( zDF$ywdev^C`N!A7t+9 z85A1Z{c~lQclcQ@OuWm>ZJC=etmEN(3)W~jn-JM`_+22R+_H)76^>KRKuT>rf^;xc zasNiK_Hto3b_9d(*^ltem6$$&h6W}$$!FQVVPfocW|x%Yn!eF~BV>a-7=`XTUF^#& z(oHLUFxGy#g2#=I=Th{|Q=zEC67I62$%kxW5!P*+cyFt9fcmqTQ1fw>}5vs46D+s80NrjnEJ@NAYsX?6y z*tbFzUwLTzKn!@K$E5c9G4)ZW+c%G|zrU`F7Axt#k@)kOeuI_)+jLtjVdg1Uht)LP zw%P5mLTjhny(DmQZ}7XI?d8MpyFmCqS>wtdq?4s8 zKO93%>UOl3sM(p+cWzZ3DzeHh`F#)Y0z(q`Y8$D{sA_7IPW1_e&5X@yw|CcPADf&D*eLs(BZFzW5 z+INY0(vIsB=!lqpE1K|*`x7O(Hc_*$!5dAgeM-1dndv~PPO5K$1%YO{aRiw^>{B<4 z0awmYK+~x|$W1q>9()#ge`~e~d7v`gs2oFTTscBr!MqsOas&|tWW)hKc`@Ap{A5aX z51!TNW_#&WGVI;gB!f-T1L3o4LgXAN#!&9Yn>0T@WDTiEiACQ*Ft#^-CbHD2i*QH# zmeNgfXHM7#B?nF^j)v!`MaA7Olr(*V|2=84gdPVSDGSH*B%`OIG6|RV+FezyWaX$) zlKU^~k9DR-X2JN{KE&Z|T?@d_dza5=L~(aaQDGFDxCymBo9+;Q1+8FXwbR4ZcY=kw zm=_@2>|(oAe4rK1Fpa+!GPpaDt-s4-S~ieT9MY`g32e`mz7Oq;%dFh)td|S<{sU~k zlJfA<6>Sj;^G?y99$QJKY5Z3lX6H&*{GSvT9oU@d1Cf(!cg+~c5@v2Y%mR)xbH1H_ zk^V^iSL6TK#N+xeatF5W|8DR}Pyfvb1|{WzPX7P6th1G{p2q@gjn5kweY!G>+^U@N zn!FbVryfbmXU;Hf6a+h5WE-WwiQDxOYM|E>t6}Bv<1Nd`l>6K~Sv!#=9I^MPM8XB8zL{R{-ZL|5br6J_Ni5ZexNUh|&y zJ@FC|7=wW%xn&zsVDr!SV9XYD%SjuICre8&bl609myQJd>8Gwx3;|NDdv)DdDApX` zW$)8bHVT!uP;gf$^09K|=Fgd~iAlmBle`df%LpV^9TrYn_eJ^G-EQts8(DgX$rD#M~GmFAuFBpzWvCf zuJ2rV-~6VHVZV=Pry&?w6zB|5lEy;6bMeaFKsuF|MutzmaDV8G# zs*$jt$&I6)qAtZC#JYcdVBTO5)Jh56jEE?!g751Z^!k*>p5}$urSHe0G(;XJYv+mU zgxUrb){}xn(RbIsOPZz4*7)*JPkri&C+idTCKowlfS3GL<%!J_b8rg-Pu~J$qriMcUF+u+;tmQyyU90w zRmrJ)Oxtr{%+1m!WMfm-$L=(h45XP8~mcx7%W))^rVpZKoaA|?26^4f|HEeImr8Mj) zJC3+wGr*knyU-@ci@PVcV6apAed z+=c&7;In=Giyqkfpgtn-Ywg4}`iW2G1 zxQ`4GpE=ljnPQQCa={M${;s0iHoNw)WVynAO_9K|VFuLBA4_TuKXt&mhZb!*XNC8* zRsH;6ZA_3uT+z8Q9fZqtB!G--jMX(iHEUw*C3rD6e3X)U%36+d-&qN@EtTf8{@}Vx z@f>8Sva$T&1bH5*VCC3q8thaRim!GAB{L65L=j*>&Qs&aamYspmh+sgb*=XX(js@; zx5~9yYO{D?!H_`W=>?TVyL`IVxBvXqWGG`I1~L9vcX>_lml1V&P{>8rtA?L3hXvPy z?YOC0K`o*)Gi_(nkv+(cm!*x8>_h}O~P^3(I(r`V^tC26AP2Xg>Qn2-VhJ=;o zP{|Oqh@)*XorBr12%T`cCWA^duzIPO6mwN`pjTwsYA?k>^Pum-Q@o8t48)^~N{~A_Kb%c=OTsBJ+d{u`ynng51&}E+#H*=@Kt#WdQC8|#&y{K$%1N) z=MMMDe&4w%=zHsEiU2idps9kajgD66%Zp_A=f*Y2+Bow36vUmx1@(_fz+O>?9KC$60w0 z%mMjEP3h?VClV7gi(sH!%5cVuS*Hfs={|g*hCR9{w1LpRZ}xkxV1YY|bO5XolZCAM zXdQG0Up-F0@fTFq_HNU)L+KQy$?E&-i}iM`*giidmj*jy))f4Tx>YvA?lToz$5|z7 zBg$vB>&!Cy!BoU8o(QVA$K#8L%Z}&7xi?}Jb)%?gaSb2Y7h&oO^&&b|&O`RgmL=w zLTle@ya=>$Z>D9ml@F=Cb5v_ATLf9p3GnjjZwS3`JiB-EV+C0uaUBbW%lYq;OYJ%W zo4zz?qFfK)27-iSkhj>VNcJRs7W`^!cE4s<`?NGFlPTLn-4xp2P=^-wZGnpP3YV|d z84#N$-xWl6iND;g`C_bKCDA9ESt4OJZK%z@%HXqbGEugC*>Z!#Zk6*x(9mUQl)RUz`Ck|2Q$QNl)SFqW?MdVlq3f z%0zmkYYohc)XsQA5 zcRn4Qva>O=mxz56gw_aO4qzq^p(-FG-SzKy;M-C~Yh3x~gIWQmV>|8Y>)kxxwIHxP zeXdW9OK~_rS`;#F$+Y1xyGl^a9Rzx>Rz|*+bdS7ym|LM^0&g2OG%`hgG;f8G_F0{eN_#&-FO@!IKVm-&*4SSVPzpPAxH4ZH6a)(s=lG79cwkvgoUj-ci7F-xB5q3iWz(xu2vOh z?6(||(Bv3AwP?JMEE32!Vxm5how}9jkZFV;%(!A)S+nr8*ZQRkf}MyJ8wAB1}C?kA|dLu;x3lH_nj(u5d``IM<0zA zisDRBh}`-5GP_kclkg+%`qj)i5o#*n=n5cPv4&X&!TYTQeTN zLL4J3tR?;!&Ie^#BI*Y?d?&y_#YM@+|3650~ zuZHME4Jz}jy|VW%bI)X%JCCh}a|?W}Ru#(2vwo&h-KQlnJ^W4>EjFYljJO%Fbmxi6z2ZK5 zJZswS=PXXBG-ObOda9FHDqduYxb^l`S1!YCvA&mlySOLMY72*xTM_Mx3WpIKCoaK;geI=-K|L!_gsvpi$SWFYx(q2h(FBRS1a(wpMJppQcNw>~V&(-Oi{&}1 z3|7mPdli{q6&Cwk!dXn{{bi={`eF1K1OLr+hnseh6WswjANYRD9n6_hFkj!cK{VkwjP#^>h{7esZ`n2WMy z(Si<_Gzz5l3wSa^EiY?JFF=-H_h(zq;GlvPG6zfV>)TcwuSS$+i*|EMcG?tk2&4@) zHBs*O8n^fZ~-s;I^I$0*5u_VNVrRJ9TR@Z+|HCi)V7NaF%U$ic?q{`(_F=K4fMQk^?=@_e&OeONbou zo%iulqcDd?D9JtYzx1W9jt7aWj+wax4;A>U7so-+CaWKjD~mA~<_ofmF{9txbtwFV z<@Z}_gRdk(n)oI^$;^JeZxK5gB;M2DH5(1CwCb#p**0dgbEG@; z@n!9)f0XxJ*w$XLZqX~kof5l!;pbn>O9i$v!~1wa9HQ7OMJz9r(@on!A6%zzB>wT7Z3HFS89A@8(OdRS= zY#sI4e&u6n(zGxLAAHLZ{StgyB5Jm`*l{PGVD1P3T0e*lgwcXTB%2Aw_LPWANs z7J$H6UNaVCENI3l#bVf<|GQ;~mHb|c<*L7MAX!QmtNR1=iz@&pR{P9Pas1fx_Q7VB zx)T%>*P06eZ*;CeqXWLm8c2mS+?9o$ONYF4f!|h!XJq_~y=DDNr~OYPPAUT6xWr~{ z!wT+?=BNnL0l?qX^9J+<4flUW>B?C|-W2vr=y^=9=lMEtKv1azK3%(3k-U0am+Xz< zSpd`33ou61Q+~^kjm#qb(>GXlmz)3u9mVwj2{+SC8-xOwpyDIRPJoum*pFZJ_6@UF zC^vqeV(l0K_McEWx>g{$1gyRT!F?wn`_i%pANt+1-4q&IN)_mp|9lO6XPNsp0yn9{ zEjDyn44~;a%$4RHuRKhVUw!+mKjzYaohJaHnBUCvYSR0c>tO)}rh;pl;2T+PrCQO^&&wFF4?P4k$O=zOQj1` z`LOf(NZP978QL)(sAKP=1?n$w4Wc;5%TX!xhtdbH0gTlB;q}p%Eo(sD`to3{S3DN2+pOm87A-vAjgF8=WM&u?0qVm z#j#V$YjyVHvzWePlk-CpO;>Dmm#z+OC6=Ll2*we%(})(-Vym}jAr1}*=!!c}+L>Br zuL~1JSF&KHll2mHu;V! zO(Lp7zOye&?nuj^OArJZ260!P7PyrrA$by;kQY5K>!mrxLNHu-L9l+}ujO6HgDvZf z@aZM2U!PO(QU3+RY}^$4$?63#atN}N>vJzdM7-SZIWk^@5Z+CU?5CTkLTn-!Ph>PK z3c@@1G9R>R_0lQbqXutn)dgYe#1UR^qDVtgh9{8G1nP;ePxrq;23efsKR%+SkA zzKH;XsVP=*`d8=-Cef#SNfT0}=>VBP^9uj53NgIWcaaRf5jK-lo$Y!Q^(U4)!#VaZ z5NZKl%)&@zw>!5#r*r>|1Q;^lVXg}9TF3BkAMZK>z*||Y$aHIL_-{F>`k;OFyDAsd z1;OW!Is~XD>6Ccj-V*N_Pzuz1K5ncgYpJf59PN6v?$9==XJ`MCoc-O*P%fAEWIPg- zZ%M0RSd}5gFMFjaNHoFF?%uvvtcU6s@%->dy%uc~x{z-GjJ+^1)wzyw;TlbVq-}Kg zM9LJ}aAUvNale0)|D?%CNvlxXw%X^r5H(5z%%1!YB!JQhgvi|BJh4-Hres=(g)|z? z%Z6LGja7#}DDoR*bp_^0UKd^2ypDmvh!!yx1lnujkx zeX2;C5V0tC9zJ<^N7a?*w)3#k;mfa|6<1zSLK`(i>+K*L`?fEZiBR&StIBE(>1Q@6 z$`bN6lDlry)I>0v5JQ@>6XB2qIKG{X%MD%>@NUbLSOYH!zib&o#9ERZJz8+F(UR7Z z8N%T{rnT*K%|z`qkT7!U&9rG+Q+Nw!8MS<11 z5aRc~LPD~hL>0fQ(~wVXO8EOO{Aq7RVW>y0>X2(=-Kf%3-aaJ&b=xY)e$GQC@=pzS z2OKD9R%dkqSecr)=d2)BL4wvPFX+aj81L&YNrcqw($wxG!a2YZc$|2Ue{gjudheJT z4EeP%r%k(w4V=ql`U679PT)OMSHfcUg%U<3eDc$pU$A!8OsG9RcyQXJDsO7;ul4g5 z^@8&8X9tgJhtN9DI|3m|>hb!r;+Nt`X}8+HH^&>SH7CBhDl48uE$oEN-T&O)&sn{6 zn-L4~Q(qfjF&#_T@=ZPG0=rj^Q*&$%GtsZ3+&O0KL9-J$S`PJUjTdK&o%^=SwQb9a z9@dG9gfe(|PgQ89YsnooSw!G;V1Nakausf49OdyUC))&i3{VA1HP^lv4cE_WAF$S% zKh6@e21m8LMFRp7@OQul6QpFO1k%Y?Wafr{Z*R>uSJ+PKASF5C=^IX2(V{MbQP-~p zxyJ>`fkjbggzr$7mM3$W*{R~&8%UroaQGhMj6M9pWU_PI6zP>P#2uyFvS}5?j=xG{ z1&LR$%k{CCwnQv{HlD5~psJ?lGPU$PlY4va2s0RJXn|BYfCs>b*<~GueRqE)QZN+b?Tl; z`g|%S0(rhuUmZTZH-+!1QsRb&n=-B@KAnc5uJcyomh=VY7U0Er>Q71SO5CwJk!mu+ zdQ(vp&=_hMSb=~w3jS3FsYBydy%>#t0!T#AU7s%W6R^fl^ZrAeu*Oi7#)B~g=|y*H zuSbW#u`~NWTW1PslhA7reKwuNzN!y}OA)cW^LS(j!;nH1!>g7Q9YMH z_*o`IVQy^Ko`C#nVfPa6;-{Zr1sZr@=aNM&v~=67g3ZE$@@~bObfX$sL!k4!gjtw!bB5|NdN(tgqo+p@FnX zv@T4td|(QrwirlSHGh;J( zaQ1rwwGg7TJZI%9l%Z51rjjH3R>z^097@|0geI+J%eH9| zM*IHT^=5E zX>vqd(?GY7eMyUtzL^?kQMJOEIha317A3}0z{8W^BxH1F)7;cGSR-V2A zG4NcDYGGe8Vh8MESwKZX>2W>zDSY2jtOkhFS8p$V zK&F{I&T4lrAHfEwP5A*VUUuJY<;vfzJxfai!Eh0ljEKFNaWF0~@)6R>C+?Gk9J;tO zr4bo86$8D(LGGS^duodve;ekssiS;1CE|5x*&@+uYNlzBs)9hy{6rhIBK7O1&v}m` zfzG5kE`TMV_!1T5e8vrw!2VYL*y7~tx>$Y*!tDoovOC48SJhbyIY~+r$|fwj@AAG{ zi=*$9V;n}~R-U5Qo_@k6I*Bk!zSoXv2!fimgj-2;r0_EouTfQS!e;|ojU4{k)4`>$ zsIDm$`Y@8)@~CYaJ9H9^`tGD)ru7w8P&J{KJ|g z8IIRzo|ZB-JB+?j?c5P*mQ5+^&Abn;k?MEnqKe7W(4MM7E&^=7V{(#s-*XdBx25)n z>RHs6A?Fqp|Ij2K8>m}Cg30YnH%sR9TLc1XY8w=0iz@g~wcmG@VUj}{?{ox`w3Nhp znNnS;7$!fx-U^pA6R~XxzmeaXmkojO3CizY21BY<+P9`G`BhotyETS zsQXd-S4aB7XWyk#M3Ywj1!Q8Wqv)lxsZAi;cWQktIhL?IajK!<{#!-p;L4Y_jdcT_ zqBP$k4qS_wyVy9skhYTKOT`#2+oNf>ztYKp7qcLK zM1EY%+o2$L1Uo+ChS~2e_QO*=xDE<+Qqjle9ek|Y;lm~EPv6gVDyKKUz`)-8VQRKy zID6WRksr45G^?eHu;qm}Hst_3-*<|4Om*#{SN6ExafD%dI41JpAl445wVA zS$a>R2#y4XkAA=#fzm>$xjZ=kPLbh??dkNn-fV>vK(;o2c%0R*I3+zOom1GvzMZ?T zgCLcIcK7li4wg8+Y~O@V3{CmNGsZJz6Fqj9!T>Vymm2`Vz#!FZX2gyw>prkUq&&nk zP@*u&$~qvzRj^C5HSS6YoEyk}J(uVnQ*rpO&~b>$)1aA!nY}>#Ei9 zld7X_M2<%k#aoL0>YlHW(~5)L6x1}cv3KZiIrSD?wk`NnZ_|55mB8`!s|I*HU%{P& zvx6pGqH92~^rk)xR!3jFC(m`y+98=4? z!?BX2?$_@qqwsMbbRD^_UH4kAeUh=^(OYcUi>hrV@V&B;qqsH&^Q;lUodV@i(ct8m z?=Lu}SHWSKINJv7Qj0uLa!oH<=oR$Gg0D9XbzZT2@I`T+T*qyr;)IOtc!W!SI9m{ZW(skci-lS!%xnfK#Zsp;&z?ZD$3HHDN~XZCi#K50n~6@2cn zI<1R5SwsS3Xoo2?_0NVOPnMhOsOTW3KSERKr*!RxWF+aREFqD8Kl$fT#rh6Sg>X_b zzV!7QWH4Wh@jBBpC99<1!b$0&PQQj^$WZ6|!Jru5ECa%`&~?oYXyfu@)#`2CA|tz1 zn#&)=hr)%vP`SecRP1^ixM~;(Y1HOa9JN_PVOc}rIZ>PIas`{uksJ~kl-L}n#@wPB zTEa*Hv7j(9HTbdb>v`4D`gHbB>JFA0+1YbJM}qf4|7<-3#4jhGk1g6o!x*v494Z-#x&v zNWuAq)XVjn)iK|Nl6jvuoC)4-$d4odZvk#%y27ZakeGMofADf3jm1GX_CnUoS}x&u zX4V*hj$b_sJMTG0_@0*>pYBK6M*>3{bnVqGH?3Er+9 zI6Xqxi$-C}ZkAr$TKdypak%x`=l?|A620y9AR_$tw?jaL&2LnC*ao;Mt@Q4HX|HA+ zX3y7(&N3V>8s9yK{U20Xy8Z53f&X4CZT}y+*Z)V#uJY3jKGjZaS+;{{VI;Bg*$8cv zxPwD)_c3t87Lj5X)5*squ&7zrJZWsQ2`&I9VToqxU*V}{S!mNmq2a9y!Em(oPb^ZNX;$k5PG0xZvvR$zPnY|9gl^nGW3TFS2mXS5hRX; z0LJMQTYJ#}Dfex{ln|J4(*Ke`jExE;kZsonHmqw>=@E_U|CehmJ@{ zSpQmS`*n{!JJ0{+RKS;JPMyyNpw9p`{MF?PSEoNEMunaz#RE#D`eOhqvk{Hl>v%>? zWc}~BHIABo!yQ_D>?nu)P#6s5xtM?4ACz_4ftvZ!T1l7s@kpl@LHy&vH(zKevl4IZ!7fpe3R@KBz@S`s2pr zDt_mB*4^vlWKVHr)BDfp%K*KK$%HnK11Mjs|~D$hk{T^CFHeBzV0CNxm7#LFdiKDW7LOE_zq-s~%V&Sv zD&{!IMVMp7!V7Ex<)@;bCAD3X0A9ivwa_OB{k!N}l+{uGFM+lFh$~i}9J0oa1jKSg zleO6CA+9h{Q0qz>P4=Ja)};X0D&RQ7HL#L1Rm~)Y2(TusZzZsQ+5_tN!_bQ}6?5v< zLawMIWK@)*NG+}L+84Lz0XMmR$#EB<<(Jdy)dqb27=9no6}cq^g&Vyostr<6c6_+6 zdB&sR*1^yR#HUxH%2}^|&a!9f-WRu~?D^%l5$E zcYy=STeB=+ObJI~tK*MW7Z9%LYv($Q+g&N>i<(vAPB^3++nehO#ym%CXAQ}&h0W&e zugL*`&5dm?9Pfl3S_)!`D6cIa8#tpe-da0?5->=$SILS-H+qfAg#R3-o;Nb}hsbYN0QC zLH!|1WZ#=0MNvf?VlE&2r;0oUBX&2RtA=x)rSDkl0EoD| zN!D`W{rAM`o4>ZMm2jWBZ3(My1Q2beaz1c&+qEyex{yfbo&(XGT`r)$K0b1x)XMje zze<7h1x^f9?%(Z$9RGfkz!|*M*bb;h;;!0LO&v0iG#bYEciyw0Wuzv{GEvN7;QV82 ze8>!H^|IBUd!(@6jhpfm<=&|Qc4L32$!zAth|(0WDP_~|j-nz!+^fSWTY5ARos6)j z_Nfy9RdDtbtEUDlo4U=gGo!TM{n8&0XqI`yWLN2bk(?e9=f?rqeA<)<^gBKyn7f;4 zEnDMSXd;E!>)LlPmg+p}-cZ5(;{%+qa_7qbks6$_7$hu9KkHfudC`cUwP(mYuwhW0 zn7+I|67-z2Be(zX8tM|x?R7*}_cJvW(x2~d{=(;nj(34crmhd5-LS7oY%&vBQY-UsvcOJedyXLx;x_ z0|Cxq7Dw4t?7rPnrjBTg;E`+ZtxsoBtO z`V%rp?%eK0u-Rg84+sw!yE<3`BX zp;;+$u7XdM*4?QH8pLYCH$7VF-1)tVqR7x+zpLazP05*VC$sqI)1;OMcdv13Zfh*R zT-9*O^nD|5(}Z{y)>vpUN{Iek09DW#z;#SS_}Fj-ynmsl@cSR8xUsJIriWzekx^Ub zMQm2dG%@qsm6EvThKPG_kigox*gFyj;RIA^?kDKzdq6ah0O=#=Uj=kpEZsKuV)yiE zBmMS6LN_YSAih2vErPCyg``%fWoaoSpksHxlfP@qtG9P5g0@k^_=cUs7$2cd3`$d# zlfmy4Q5W2gDlTWDbb=9l^TfFDZ`B8eTK>rj>ijM4PEk%4o&>x{N+a)W6B_9YN`3he<8K1Y#E^{=9Dvm7yJ|fU9p`re zI?Rq!t4BBdpb?7kVuY4;O*Aj|v4w*_qjN7jmOS&a(sO2J-V1{3DUSMecA#nWfHci#+ZTlnz&@~( z_%*ZnjhjO0C00Ns9uzueANEu44u5?mW0hvt7R4e>`xZ7a_L3Oy+js#JJ|!MP|KG}W zTEZYJgePh{ret_Ji2h;0v#xbsqx^U4HuYP7Q@{~po(|~_{h>{C0i-(OVydZz6)qQA zfdBN3T`zy-JrW-9UD^Wg!g_k51i&po1FylW@TVTg9|OYTuYjT#+ecLfRktCCx(c4+7mLb1$V?VK=S zZJ=2{HMbkS1r6TWhn&t|h2?H`o|mQ@+leJL?p9f=bxCCbPpF0}q(3~5?ZYSE7<2l# zw+4j#q&uqA%=f92KL^qTVQ;krJ!;6a2hJUZCOJvW-Vt7_(&3J1=CBu_z*xPGmU;3+ zdUfR{jMr#qlw)On68kkNOY^G7O{bt2Q)9NW?i)L`cGpwlz=#XJmi?C{B8Rsf<)ug* zLs3TGUKIBugWBsVH2CqP#6HXSH(&T;PlTa#B{1a=-gK8*D$pCli+fnMT1CXT;bcry z@>F~l^fl}l9aXXDTZ>9WgM&O{c&3ZrydK(Xl~57nAcr9p6C7Q!(CHqbJ-CQyYf%3JK2_w>=1u|bCW|DeLf*1M&vPlY-BS~aS(j= zhK86|bzyhZ+r*^1#A6PBoyaFwf3Fm_ea7oI8zu|V8{gd1^=&rwx4IpzaepIyyqoNc zv)@<2uWQ9xXhW@0dZh&$lbE-icNn~Ef`hu{=gle~9bH?$URyKiG5S4@vhU+QA7))$ zPfJ%)!*lL=$v2dlcti$`m5rRZmgRxyBP^9g4Jj_Zbviv!(Z309&}gCzr43xf3vv6vk>5jA@p9Cx!lBU-VYzI zncc#~f6J@_IFaslbe8=TdVa*JZCUCE(frL6{vY+Xv%ymuWYgYG_3LhC2uBvTBeOPq z2-PTjcDWprw~~>_FZ9kS$>}wKuEp}He~faY^4WEm^5nIP;p=Hm*0ysixZ_v$EGg@| zM*=f-c+4SienbNQ(K$3|-uR^Akd0cJsNTZ%%_SR@<=|c!e?LH-ikg)o;JrjfENLfG zHJWFaR7jYjkmu5{6?FUfG|!*0B1(8M2J8uMng}-3QPUT;p5lpbiPbrK-^TY2=2I&U(N>6$c-kEC ziQYYarT$kHH_mh83KOt`?ee_(O{TMua4J)0*KBvookH+w&!DXHVvPB(0Rb*G?(1~9Cl$Bpv_x8%hr=B_}&Okpi-N-yl}t@l||OWS}SL|YYoiQ4w6k)^Wg zKa>tFk~sFD=P@ba74fgqyJRZmaRzf#RiwCET*5J1# zDovezFm+pAsL6YTW!>7k-@E;H9D1H-oxApDx;G%}6dJnBd7+J%7w(ylj5?i*>anvu z0Zuz-LCwxKKPXOeV>G#mKRvb|I^4M5L?<+{l6gdxTXJc!qa;OD(Xyd=YlGZlG{xs0 z7zb0^ei7y6(i~oa7n@?GE>?HI2`Pqe+avYsdu}f7Tu-V`e9IKyT+ZLp(6rrsud>JT!Rl4P>JtiM#vS#^{BEmO$U=S!i%VXQB{c+?YD1B z4+-enk!@c7I!5VP+hXO>$UkxS3)6iJ-|itAsDTl2cD?QkyvWvH9@Q2(uK_bRb}pl6 z#ZpTvLv59@Md8KQOmE4%rE;gU7N=cqr6s7_VT$x#fqEXtU5#~}Pi^&3z?2#?ssEh& zkhl3k2PFfM`z7EHWkYBPIoHtErf2vIpSh7IEA3M5vu<)5d^K7~z)UD^*ZkU}&G7%r zJ)7=wXEsScW5^JVOK372Fg(|v{gMY{QT=!2@D257mvrL##SxXz6kS*4SwTmMaslMv zTIq-I-_J7w)VW7Gn;98_GW2avOgbJ9em%Nt4|bo`<9 z!rjeDF>9#x8GB%W+2jau`avATy5eaL*yaA5k%}toXQhQ; z`FEvUrQxmSvKPLCR*ZZ;fn?k~S=((xs@8Bhv7_+nZRiM5dV%H+J8pl>c01duBKAs>&=bqtuANt$7yiqnoor);n5K_2; zW$>n)zMX=pRhwK2>`G~h`g?R!)Fg!_t#P&B?e~HxW@_{Y*SG9HX?Ty5DOI*jYxjRm z?h(!BOv?%4MR8zv9Xb5*BWKR}^Mwiy>wqn4|dMLeO;iTz2MbAMNsC`QdNw^TUb@IJ5M(+bRF>4(fV> zha~6O{>Om%5&(cPaq&m~{Zb@$AwpY4rJIPWJ?_h`kp>3ula}``2D05& zo6bP@1E8F^IQ-6E%X-b2eY17ZM`W3_F2{0l;EOPF{84Kkz|FQ{St^~Xz_D&Rv=V?a z{$JIdXH=70*RCx{Q7KB3jv^{jl_p445$P&Y1R)3;F`|GVLNJ83#VrxA06|(%O11(< zdMB_ow1AXI5rjk_U_bJ{+vJOJLkXVDOp)-&U@bXh4`VY4?i9P zC%-pmbDdVtDFdggBy+l3l>(mPUGu*(w0-(5^y8-M9TN8Ugs#aI4>u#0}S5UUfzkE0=o2 zD_m*D@qFceHw)A?rY3B)eGY{FPB$jg*Rs}6dv*tow>7+PKc0s1L3F8B*=*{;lFf3K zjOkDBGYH~^Xx59t%iT(?%mrmDH&K>Fkah3UQgT#-Yt+^<8ps1Cf}}Kcn;54eMJl9W z020R|F~~EQkt6dgJqJPK*ZX3;`h6lX5b1LtysiRx?SZGXd*$7Fv}^N^dLE5CZu=Ae z0T#H1-vlm4Cot`bVEQ5m*EF43qsE~@wnxd13#7PIN^x+v?vNOy9zwza2D`r+=y>A5 zefa%ya~M(FMGUkzj@EokAVi!6a^LXVH3o)YR3Y?2^FCReyvS*l3A9}>_qxzqx6~?w zTR1cg6az+lbdAT965*-Q&DY>kKKI9gBhs<>oU{ zzEo)d5(OzBJwgUk&sowF#>~za3J(NpfFjW8z)S9Cv@e_xeenc2e)3E*>odFJp9c>L zYW~xi(zU0pY$k4tC@FO#HM36#-!> ze6xc75ujZw>Sc!kCxl>$pDN^2%&^qeI^v~NQwN2KAH|OD(`?WxFm@2eE^YAl*U%Fd zP}~g)ZWXLqhyD%zH2x9Suvq_CSd))e7Bv`refPdGu3MN)uagozyW%Qlz=4&Ro4V;O zU8oGZv~M(NRWaHZximqf8&B>>%B$yPiA;q%XxYe5k&r|6e>$ah%JC}Y05eWh!p2xJUE^kl(km<&ak51 z5sQykhK^W$Oszh5+FsSw#MpdqjA+TV3j1mPBm7;47D3mthx|ZCFBbl_5?S`lImY&P z8A-ziKR{*gR#-EOP+r?Vt^t6t*{cnL9Vwas}%Jjpinuw=ais& zInp88&FNk$e&SJVAR}&vPFsfXHs7>woMuc|YR83m)>WZnLtwTDkORk#N!d z2h=Q0FI7AWI~(@TB#5(1&+eAmjTt*Yes+vuEV*6*p@&>V%*s#$v{NEq4+k#arGXTU zUFTf7o?E5>^?E?lI8Osl)8QtPyKYEUyCQ7ivS4$_gVo%IgX#Ty-9tf=92Z(>!|U7N zG_=#|Wnmpyr2?ll8d?2fVh6-P2brcW#PTU!8S%&XOyV`n=EU@8M4B&9&ozm3(Y7v_ z2c4(1XNQf~x|aQ6)u~yh@<560Feu0q(|C~tA1nKJALPD4)I`1ToScdqycK)Z&8Y)@ zJ0_7+7)FTem$807b!N ztdUz)#DPV-l)AJk`#uI{HIw$zWP#~yvdbk_hJCN!?-bj%iX56NWUfxY%hYAhT&n3S z3Alc$BII?Zgl+8i+k2D`C7Kzs(_U)!QFIm#p7I2mMA2qp#mQaQ*jKkZ5s{e-f6_r1iy5cVjm%CT{L}WuIWp>VhZ1E}YW~&|ciuRcOg@M(9_@h>i<`kB}7&w$jrRF?x z182+55?iolj}`IK4Sj@Evv-dFD#@u!mH0^YJyHXH@P|qP?LYdB@g*9f3xL`Kf}TD} z28FWzJ=IOiB`j-L$DN393VKJ`NyfI78gG!Mf7Q%|m{)672e;^UzagN8_){_l$>CSI zpgQ}+g!N{i|B}?3fsFNmv0Qy=w+sYQjFvE;hI_K~u-|0}<^)c|e9c%#cG|(eg5!kw z{B~K&&yl;wSO#ugZ$lGEI38iOI2(eI{okV|CHEcoF^^E+5)MboyN~@r7Kl9(tK_yA zUNZitXp6YV;oDnEgiA`14s)LcO^hVdmJ??%2fOa8WH?*)iy}U$F2J|ubQw?XbygJ; ze5Vf2^(0=0?4Aa++}pINM};K!vr6GpA0pkZgs@I2oBdJW=F2gw6Hv6$? zJdD!`?r4(EW&9%FF>&ODfAf`{;_JyyTm>u6B9c6lqL|>UD)fa#QZoBF9l$;r`?%)7 zW5t&?f3qGPU7Ig-nA;Rmh9&C}=c|*L#391csL8Fhfwk(p+ z-z7JgY#P+eo(?NxUOLW*-=V^YAH3)$?_fyJ4}L#>MA0X5n3spDrp@6uTYinr*S5dm zJr)coD7l+@VGzQo2d;~+|CDpOr9lC;_G6&GyWGLDPL&{cqs*^S;4<8aZ|5nJkHI=a z$aHg&Wqjli7o_>0gZXhrLSc7~ZV0WDVaIcfl-8K;Ibh;e&wrBt&fRZd9yx0dRuibr z`oH@9cl3{j!_eG1@#cRwf&L%kg#6!wnb?AmJ17#ZUUmPsDUExB#lQk&Urm7^V|PKr zRjE?NMwg&H#y2sv`djt$X)p=GR`Lc!qT8-E-+6#tOrEX(M3PT|@rE(>d1LyqsWl#Z zh7m~*&0od9R<iSn`4=i;@7w=n`bysQ)=&$`Fkd&oW^S zFOVEE;CbDi*jO@J(}<9+>z@V@U9LW1pFA_RqUq^~>bDK!wkd40Us}BqlXILuWI5Yr z>P1p3PdFF58cZ}la2x{}W;f*8Cj||?(YxggQBci3EpO!m5V#Uqep{FQ4MKmkvfxTW zw)&|DQUE)zc6*F`<33TG2#^rJ88Xm+=)+)^@lDh5-e0m1aHLwA`lAoS+_6WgAUXmv zVIBs~fkh?bmjUyj&u=qdR_4LD)dGEk#CGurbt(lySQ_|u5Y;8gcx9N84VL5D9Er+; z;R!{6?-Ew%+~Ymc%q7=OnP`dZ6WUMT>eX7zp)-1J?QcSPKQTt%qp;q4bB-~|$g?lN zjr*0H_+rdyZW6X9tTrx)Lsj1ThZ7LrWI`LeZo3|EMuWQm%2uL281`;IaOoWWr;4`! zAS>qP#dSliuB4kIi#G?GE6UN{4^8$X^&Sn*_9zz69^}6filKMal1O1&^AO<+>LUv7 zdqRG8ot(2XSs@9~?7QaYTXX^lhY&VFV&|PcK(b_Oi~RK^N4qgyJIw3Qq2!wZdO7h1 z+WlH*c@1G(zb@(h_C<4NUeu5|V3Aqx-a6Nk_eO)T$(Vrr1z2Y1hI8^?-t2U~ z^X;ZR3)6l^Sd+AUt&<^xf?YG90h3`j1Q@5s%ak+d-tsfpC{*16@HloJkz>Jz!`6o3 zwW!u-Z`USYDif8^yu12lxUQS`zwINURXt@d{YZFI&n!Cc^qaU|eK>Ayy$=C4md1b`Wwvbj*KA)!2ZB8*_@|v?iuLSfeGoc?mR;U@CM=`iFotY znb-;8$e-tF^-eWARc|1dh$=x}h)68>5i(x2AmjjY4%#N{u0$fwe|UHbfr zI8mUvzbdIpx@@rj>r930`~R#4?OS?tSFZ8|@f3i|dqcfP{@Dd#Fi&J%rzUMYba%kl zPG{%{$0OyRwax4Y&S#g-!1z4z6xvu6cMIb8y~yGH2zI+viqONI?e5ml1eg`s6^3tI zISFyCZpJJD2gWa~YwA(>-q}$q>zgUJHk{glOGqV}JwGE7-FjK_xsXBxIXHu_knE?k zV1R=G>G0}hsc6mEH>{3mh>=CE$dXW?gfo>`)-Dt4%ZpB)-TIQr+at2 zRHnIVPnBf*q~g>A?}J0u@jSOGD{beDSFV9p#eFH$*#5NuwG8QWy(`OBJAF!j#LJ}l z5I@$hfd%$eOIKqK7abDhD*jA}Hy@EJeV*t2O6E7?Cbyk??PdmMD`Le1B$FayV zS%7A8v{0zd5)!s{5kx2&u9>*mDxI*yI=CC3iTwHI33+kxQ`99;Ni1HHmPbtWO@Kh3 z++Hs}+>CDgKs~>$6A%Ja+alul7G{zFJrDTgzZ0>m+B8j1ND?VX-pIO`Yk+Kbw^KSH zr`K(ES(@^g-yX)yaKZf;z>hZ!BSYf5`}2~(xkL5_9k$e7TbJTPCL*~XRQUUM$4t0l zbvu>v!au;N=9}MBVcD87^rbchSd&aceiZQBqyYm||FynVbzD(wUrY1@S4B4=L~cf* z6#wP*dw)?YMy>*aOmYF04z_wt`6%Q~gRw;kGQabX*!rs-HHYa5svj;zt>!p)1?(Px zI_ysY854N-Ef&tyj_C_TZ<&6qZlX-?Pv<3IKSEUcy|I3o&(YrlVlFX1sT)Qjb}c8c zxabE1Is*WEl|GPmm8~C8*0BlXp+q!bZq@X=)!jLi{)zf*a7Tsnbj;D6O5kck0SXBY z&mk8FekEOXn$!%#?5#*g{SLuqe)X#YS6Cvqwa~pjzOK%=2;P^4h#^aElDP_S>k~p0 zHamK0>u~a0-ISr2W^HcP5`+0>DfrBTd8LHy({`MfS*94WsJp9YfFhja0s}mmxCwUV z!pVTOp(6CZtC2yo9p9@<3E*qW|H({qjQw(3N^a{kN@f&NAY1N8Ualk4tj;(ZtD7x<~_fFurjoxPoxV>(~5r4v4S3vzs02(CDHLx+1FMc@zq{{H=`XUXd z?`ny3wVKV{E;5I1bI1j=yb%K#b6?gCNw&~0$~2Y@`w@V%Nwbc6_TA%$K&IMu3Chi) zsVPl4nuN{ZI`}*KmPzf81Zqli%6(umti*#BYfAr^u7{Dh8Tiy}Fdu~S!H(GMBjAI- z9{NA^8UDY=2Xq4^Kz%>Nk4?co`vvVny@7{>fg5|Z->>1}+kwrm(g+}tx}8#Kr5Ax?KY4vtL>D@L~FRu*@spCRszNn7_g-HU6 z7L5ih$p(f2CGexm_Pbq(B?JCd<1iS?_b<)|M%9M{WMlsf@Fs?pf}1K_>bO8UJU3l{ z0PE<>awv%oRg#eT{RBmv_`*{DdJ_1UcM8i}f>%QI9Ictr$vCRd z5Ir+@nJ1?|AzKVj$uX4^*BP@_d0?S4(0)}hG z%zT-DKqUHFSEjyZsKqcydz0QdBY;{6gXqI>>E7xzcu~v&a~AL+W(G950bM;qGdyaE zC^^Kdw_UD&5;_aewZI1F9&zvBV73tfs8k667Hv>~MG^C8rj%EDjJ3s2!kQppSPIyy z3R^!d(Vzv$s{wlQw$x0!?$Puzkmu>$-CF9O%6v)vtOKsGp`Rhdi0AnRiZ@!l-(T^z z@&bfEWV|v54UC=jBqS}D4X5301F|swUxFfdRs?!vGTGEKDKvQp1C(>Bz=BEm8pZ-! zqm}u7{(!ntE@uNbq>66BfBsG#?8TQ-ST<+inkkYYsc9pbhMt^MdmB1#hm%p zm78P#)Q>NJV7cA9?w13VZ+kypml%DyBXvHN>j_TV5BNC8UF4sB){2XOMXsEfyl3am zo#%M;yR_Zf_!T2UrLxvTzEug4&=n#L!BU5K3}Y(DyDaON5*E`kCBYS~PDEB{i@Rgi z!x1=`_A;I>kB3htk{sgm3SJEch2JtJ@^`{9U+da`2aEE&sr zu=h6f<6-3>F5h~up3j3*6Ap~p7$vJhJWJMw|A*Q_fWkHpTrP)Pc#V)eO9wKMt~q57 zu7dX>)<`4npli^_Wa=NqkQ$BgDGGucsJVX3mGxJM5vsAZ%|$>Hk9{YK7_=XdtZAS( zeC{tO@7f5M$EeTE1%4v7E)W@j<{C0kd5oMP?QtOZZdBdQ=;jJ(D(&s)`GCH+7IfVz z)zQa@s?OuFSKJDjp*J@cGl%v1GN59j&^LgvwL&+(EugIUAz`_l$Q}&cf+XD8IdE{9 z>@v_8ND@o2)pHx2sKxQt^-nW;ypPfHlcmxI#{c~WaNcWuktjwx&p0$60;o;o4h_i?|Eg}~3R!lwr z?$GV|BvFy7D3*SK1vst-<~C&B&Q4*7MUfXQ_hwvPjr^{4Ue&h<^>&^byu2-$>s++C zvwSh*A+a0Q_=|6JAJ3gB#|nwTr)R&-X#|GFH76^kO2FlnbtOt8=h$to?qTX6F96#fxpohxE*KudXJd9u&iV1&{z^QdrA zfB$+Q1Rh|GsIV2C;|d|HYUALt?p?l;Kpx{Wlh3r{`}t!BI__BTm>=Ki=Ar1YSOxm7 z!nBK=zs})hfkv*fIC+wwla^N6v~rqF&P$*&A%li` z__~L%etE6zIOIK$QW|dE)cBX8cpcwS2e)~dk2(bjS*qyNxWFpP&Bgd6Uy`RS9@G;3 z7c$DWE2~-4|BtGPX9IEMRND66GHChV2gijEf+B`OGsU@EnRqoYwLSWd&IY-O@H{CZ zhJ*54%G;-B-*Ws@4|ij8LIyVe@yu<4OA3T%0VlZ=SW6M(LwYC2xj8OknrhdtY%(1@ zFMT?ntfBdE?B>JLp@5K16{zw`F6_?G4e|Z{hOi|ximPA7G_Flh`2?BtWW0;){dPm5 z1;!YLoF9!+0}Ny8a)+O%21O7Ae!3eqC8C&5aU=(jY5Dp`Sz0i!rG!8({7d?uw`7*#5(2_Pa^PZ+B9wfb|OTT!*-EN{&GbUta>!MObG zX5lzRt~6k26YBz=qp8xd=dlcDsBEGG2a{jh`ZaN{zDFg1Gf;MusrY=Oy=6{J+s{^& z)24|LCva9V3BH;i)K*6B1>9eJr--5R-I(AY&u=s)Z;2ALhI0GuerZu$Sns| zR0zdpH(6bippqb()hAIJrtjJe@v^Q3yH~Ckt&N>0p+`|n@6sP^GJ0O3 zOQQ<8+*KieHbA?6jZRQkmzPbaIO8*~|0t^;kggLecQXl-za`P`e5iel>E6{3 z9!7ORh)&D2t!X*F87Dii4zE@NIac}d$aG@))gVOsPJdixt{&Dj^pZC8d!JQ|VVo}M$O{?O>W4oE2e)DvqHri7@ zLtnj-OY03y@c#fi&AP5ytW7ZGzMMfja_1&0goZY~;Y%N?OHy}zbt%>@38zFM-q+05 zDHD4*GyP&}3*|%KV+60~SH#e^u-rB_vAn0`1m|~(1z6Hsh!gXxToht&mjENm=(%2h zEWiD0?qP*ThTWzNT}FIU(~D%aOp2%)muMDk+XPhgL??>JAa`m;LL9r*HglCkcYm1U zh@5B-wQQ(pTR2aDgt<(g;A>Chl+8T7#qhTRSNTK-22&0^YO1VC34?M8|Y4#&%sss7pS{TKiC5Q{f($ZX`xuN zRPHVRjni*JeTwsQ7}_V#5C=-r#reyDlUCw&fFRBfYV4okbT1k00CkrDg^m9$(C`f_0; z^3Q9U2QD+()$1gKEF*JVECurm$PH|Y1M(Q{yvKOfw~3w(@J2)Lj1*tjb}2N)>UeF) zRcRiAtxSC8bA*oZl=|&4-WKV9fb%bSMT~1|-tKV>PySN{`~j&QRu=XbKAiK4{VySR Be -Users with Nvidia GPUs can get **20-40% faster\* token speeds** on their laptop or desktops by using [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). The greater implication is that you are running FP16, which is also more accurate than quantized models. +:::info -This guide walks you through how to install Jan's official [TensorRT-LLM Extension](https://github.com/janhq/nitro-tensorrt-llm). This extension uses [Nitro-TensorRT-LLM](https://github.com/janhq/nitro-tensorrt-llm) as the AI engine, instead of the default [Nitro-Llama-CPP](https://github.com/janhq/nitro). It includes an efficient C++ server to natively execute the [TRT-LLM C++ runtime](https://nvidia.github.io/TensorRT-LLM/gpt_runtime.html). It also comes with additional feature and performance improvements like OpenAI compatibility, tokenizer improvements, and queues. +TensorRT-LLM support was launched in 0.4.9, and should be regarded as an Experimental feature. -\*Compared to using LlamaCPP engine. - -:::warning -This feature is only available for Windows users. Linux is coming soon. - -Additionally, we only prebuilt a few demo models. You can always build your desired models directly on your machine. [Read here](#build-your-own-tensorrt-models). +- Only Windows is supported for now. +- Please report bugs in our Discord's [#tensorrt-llm](https://discord.com/channels/1107178041848909847/1201832734704795688) channel. ::: +Jan supports [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) as an alternate Inference Engine, for users who have Nvidia GPUs with large VRAM. TensorRT-LLM allows for blazing fast inference, but requires Nvidia GPUs with [larger VRAM](https://nvidia.github.io/TensorRT-LLM/memory.html). + +## What is TensorRT-LLM? + +[TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) is an hardware-optimized LLM inference engine for Nvidia GPUs, that compiles models to run extremely fast on Nvidia GPUs. +- Mainly used on Nvidia's Datacenter-grade GPUs like the H100s [to produce 10,000 tok/s](https://nvidia.github.io/TensorRT-LLM/blogs/H100vsA100.html). +- Can be used on Nvidia's workstation (e.g. [A6000](https://www.nvidia.com/en-us/design-visualization/rtx-6000/)) and consumer-grade GPUs (e.g. [RTX 4090](https://www.nvidia.com/en-us/geforce/graphics-cards/40-series/rtx-4090/)) + +:::tip[Benefits] + +- Our performance testing shows 20-40% faster token/s speeds on consumer-grade GPUs +- On datacenter-grade GPUs, TensorRT-LLM can go up to 10,000 tokens/s +- TensorRT-LLM is a relatively new library, that was [released in Sept 2023](https://github.com/NVIDIA/TensorRT-LLM/graphs/contributors). We anticipate performance and resource utilization improvements in the future. + +::: + +:::warning[Caveats] + +- TensorRT-LLM requires models to be compiled into GPU and OS-specific "Model Engines" (vs. GGUF's "convert once, run anywhere" approach) +- TensorRT-LLM Model Engines tend to utilize larger amount of VRAM and RAM in exchange for performance +- This usually means only people with top-of-the-line Nvidia GPUs can use TensorRT-LLM + +::: + + ## Requirements -- A Windows PC +### Hardware + +- Windows PC - Nvidia GPU(s): Ada or Ampere series (i.e. RTX 4000s & 3000s). More will be supported soon. - 3GB+ of disk space to download TRT-LLM artifacts and a Nitro binary -- Jan v0.4.9+ or Jan v0.4.8-321+ (nightly) -- Nvidia Driver v535+ ([installation guide](https://jan.ai/guides/common-error/not-using-gpu/#1-ensure-gpu-mode-requirements)) -- CUDA Toolkit v12.2+ ([installation guide](https://jan.ai/guides/common-error/not-using-gpu/#1-ensure-gpu-mode-requirements)) -## Install TensorRT-Extension +**Compatible GPUs** + +| Architecture | Supported? | Consumer-grade | Workstation-grade | +| ------------ | --- | -------------- | ----------------- | +| Ada | ✅ | 4050 and above | RTX A2000 Ada | +| Ampere | ✅ | 3050 and above | A100 | +| Turing | ❌ | Not Supported | Not Supported | + +:::info + +Please ping us in Discord's [#tensorrt-llm](https://discord.com/channels/1107178041848909847/1201832734704795688) channel if you would like Turing support. + +::: + +### Software + +- Jan v0.4.9+ or Jan v0.4.8-321+ (nightly) +- [Nvidia Driver v535+](https://jan.ai/guides/common-error/not-using-gpu/#1-ensure-gpu-mode-requirements) +- [CUDA Toolkit v12.2+](https://jan.ai/guides/common-error/not-using-gpu/#1-ensure-gpu-mode-requirements) + +## Getting Started + +### Install TensorRT-Extension 1. Go to Settings > Extensions -2. Click install next to the TensorRT-LLM Extension -3. Check that files are correctly downloaded +2. Install the TensorRT-LLM Extension + +:::info +You can check if files have been correctly downloaded: ```sh ls ~\jan\extensions\@janhq\tensorrt-llm-extension\dist\bin -# Your Extension Folder should now include `nitro.exe`, among other artifacts needed to run TRT-LLM +# Your Extension Folder should now include `nitro.exe`, among other `.dll` files needed to run TRT-LLM ``` - -## Download a Compatible Model - -TensorRT-LLM can only run models in `TensorRT` format. These models, aka "TensorRT Engines", are prebuilt specifically for each target OS+GPU architecture. - -We offer a handful of precompiled models for Ampere and Ada cards that you can immediately download and play with: - -1. Restart the application and go to the Hub -2. Look for models with the `TensorRT-LLM` label in the recommended models list. Click download. This step might take some time. 🙏 - -![image](https://hackmd.io/_uploads/rJewrEgRp.png) - -3. Click use and start chatting! -4. You may need to allow Nitro in your network - -![alt text](image.png) - -:::warning -If you are our nightly builds, you may have to reinstall the TensorRT-LLM extension each time you update the app. We're working on better extension lifecyles - stay tuned. ::: -## Configure Settings +### Download a TensorRT-LLM Model -You can customize the default parameters for how Jan runs TensorRT-LLM. +Jan's Hub has a few pre-compiled TensorRT-LLM models that you can download, which have a `TensorRT-LLM` label -:::info +- We automatically download the TensorRT-LLM Model Engine for your GPU architecture +- We have made a few 1.1b models available that can run even on Laptop GPUs with 8gb VRAM + + +| Model | OS | Ada (40XX) | Ampere (30XX) | Description | +| ------------------- | ------- | ---------- | ------------- | --------------------------------------------------- | +| Llamacorn 1.1b | Windows | ✅ | ✅ | TinyLlama-1.1b, fine-tuned for usability | +| TinyJensen 1.1b | Windows | ✅ | ✅ | TinyLlama-1.1b, fine-tuned on Jensen Huang speeches | +| Mistral Instruct 7b | Windows | ✅ | ✅ | Mistral | + +### Importing Pre-built Models + +You can import a pre-built model, by creating a new folder in Jan's `/models` directory that includes: + +- TensorRT-LLM Engine files (e.g. `tokenizer`, `.engine`, etc) +- `model.json` that registers these files, and specifies `engine` as `nitro-tensorrt-llm` + +:::note[Sample model.json] + +Note the `engine` is `nitro-tensorrt-llm`: this won't work without it! + +```js +{ + "sources": [ + { + "filename": "config.json", + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/config.json" + }, + { + "filename": "mistral_float16_tp1_rank0.engine", + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/mistral_float16_tp1_rank0.engine" + }, + { + "filename": "tokenizer.model", + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer.model" + }, + { + "filename": "special_tokens_map.json", + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/special_tokens_map.json" + }, + { + "filename": "tokenizer.json", + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer.json" + }, + { + "filename": "tokenizer_config.json", + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/tokenizer_config.json" + }, + { + "filename": "model.cache", + "url": "https://delta.jan.ai/dist/models///tensorrt-llm-v0.7.1/TinyJensen-1.1B-Chat-fp16/model.cache" + } + ], + "id": "tinyjensen-1.1b-chat-fp16", + "object": "model", + "name": "TinyJensen 1.1B Chat FP16", + "version": "1.0", + "description": "Do you want to chat with Jensen Huan? Here you are", + "format": "TensorRT-LLM", + "settings": { + "ctx_len": 2048, + "text_model": false + }, + "parameters": { + "max_tokens": 4096 + }, + "metadata": { + "author": "LLama", + "tags": [ + "TensorRT-LLM", + "1B", + "Finetuned" + ], + "size": 2151000000 + }, + "engine": "nitro-tensorrt-llm" +} +``` + +::: + +### Using a TensorRT-LLM Model + +You can just select and use a TensorRT-LLM model from Jan's Thread interface. +- Jan will automatically start the TensorRT-LLM model engine in the background +- You may encounter a pop-up from Windows Security, asking for Nitro to allow public and private network access + +:::info[Why does Nitro need network access?] + +- This is because Jan runs TensorRT-LLM using the [Nitro Server](https://github.com/janhq/nitro-tensorrt-llm/) +- Jan makes network calls to the Nitro server running on your computer on a separate port + +::: + +### Configure Settings + +:::note coming soon ::: ## Troubleshooting -### Incompatible Extension vs Engine versions +## Extension Details -For now, the model versions are pinned to the extension versions. +Jan's TensorRT-LLM Extension is built on top of the open source [Nitro TensorRT-LLM Server](https://github.com/janhq/nitro-tensorrt-llm), a C++ inference server on top of TensorRT-LLM that provides an OpenAI-compatible API. + +### Manual Build + +To manually build the artifacts needed to run the server and TensorRT-LLM, you can reference the source code. [Read here](https://github.com/janhq/nitro-tensorrt-llm?tab=readme-ov-file#quickstart). ### Uninstall Extension @@ -89,11 +214,8 @@ For now, the model versions are pinned to the extension versions. 3. Delete the entire Extensions folder. 4. Reopen the app, only the default extensions should be restored. -### Install Nitro-TensorRT-LLM manually -To manually build the artifacts needed to run the server and TensorRT-LLM, you can reference the source code. [Read here](https://github.com/janhq/nitro-tensorrt-llm?tab=readme-ov-file#quickstart). - -### Build your own TensorRT models +## Build your own TensorRT models :::info coming soon diff --git a/docs/docs/integrations/tensorrt.md b/docs/docs/integrations/tensorrt.md deleted file mode 100644 index 8a77d1436..000000000 --- a/docs/docs/integrations/tensorrt.md +++ /dev/null @@ -1,8 +0,0 @@ ---- -title: TensorRT-LLM ---- - -## Quicklinks - -- Jan Framework [Extension Code](https://github.com/janhq/jan/tree/main/extensions/inference-triton-trtllm-extension) -- TensorRT [Source URL](https://github.com/NVIDIA/TensorRT-LLM) diff --git a/docs/docusaurus.config.js b/docs/docusaurus.config.js index 761e741db..60d2cd7af 100644 --- a/docs/docusaurus.config.js +++ b/docs/docusaurus.config.js @@ -117,6 +117,10 @@ const config = { from: '/guides/using-extensions/', to: '/guides/extensions/', }, + { + from: '/integrations/tensorrt', + to: '/guides/providers/tensorrt-llm' + }, ], }, ],