From 0ddb01f56b4bf3ae97c52088f17ceea5e1db395b Mon Sep 17 00:00:00 2001 From: Duncan Blythe Date: Mon, 11 Nov 2024 12:00:24 +0100 Subject: [PATCH] Add remote template support and patch api-key --- .github/workflows/ci_templates.yaml | 1 - CHANGELOG.md | 2 +- superduper/cli/main.py | 22 ++++- templates/simple_rag/VERSION | 2 +- .../a03a6ac18d448d0e3cd7b62ebcd7f19d777b5a59 | Bin 0 -> 11388 bytes .../de5a10b374e634d964148beb865f73c5b82e53a6 | Bin 0 -> 766 bytes templates/simple_rag/build.ipynb | 81 +++++++++++++----- templates/simple_rag/component.json | 19 ++-- 8 files changed, 88 insertions(+), 39 deletions(-) create mode 100644 templates/simple_rag/blobs/a03a6ac18d448d0e3cd7b62ebcd7f19d777b5a59 create mode 100644 templates/simple_rag/blobs/de5a10b374e634d964148beb865f73c5b82e53a6 diff --git a/.github/workflows/ci_templates.yaml b/.github/workflows/ci_templates.yaml index 8163a32d3..72ebb786a 100644 --- a/.github/workflows/ci_templates.yaml +++ b/.github/workflows/ci_templates.yaml @@ -4,7 +4,6 @@ on: pull_request: branches: - main - - '[0-9]+.[0-9]+' paths: # Paths that may affect code quality concurrency: diff --git a/CHANGELOG.md b/CHANGELOG.md index e9e789c07..3aa69d126 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 **Before you create a Pull Request, remember to update the Changelog with your changes.** -## Changes Since Last Release +## Changes Since Last Release #### Changed defaults / behaviours diff --git a/superduper/cli/main.py b/superduper/cli/main.py index 46515445d..45397ddb5 100644 --- a/superduper/cli/main.py +++ b/superduper/cli/main.py @@ -1,6 +1,5 @@ import json import os -import subprocess from superduper import CFG, Component, logging, superduper from superduper.components.template import Template @@ -103,9 +102,20 @@ def bootstrap( db = superduper(data_backend) existing = db.show('template') + + if template.startswith('http'): + import subprocess + + logging.info('Downloading remote template...') + subprocess.run(['curl', '-O', '-k', template]) + template = template.split('/')[-1] + if destination is not None: - root = os.path.dirname(os.path.dirname(__file__)) - template_directory = os.path.join(root, f'templates/{template}') + if os.path.exists(template): + template_directory = template + else: + root = os.path.dirname(os.path.dirname(__file__)) + template_directory = os.path.join(root, f'templates/{template}') print(template_directory) import shutil @@ -114,8 +124,12 @@ def bootstrap( if template in existing: logging.warn(f'Template {template} already exists') + logging.info(f'Applying template: {template} from inbuilt') - tem = getattr(inbuilt, template) + if os.path.exists(template): + tem = Template.read(template) + else: + tem = getattr(inbuilt, template) if tem.requirements and pip_install: with open('/tmp/requirements.txt', 'w') as f: f.write('\n'.join(tem.requirements)) diff --git a/templates/simple_rag/VERSION b/templates/simple_rag/VERSION index 8ff5d387d..1d0ba9ea1 100644 --- a/templates/simple_rag/VERSION +++ b/templates/simple_rag/VERSION @@ -1 +1 @@ -0.5.0.dev +0.4.0 diff --git a/templates/simple_rag/blobs/a03a6ac18d448d0e3cd7b62ebcd7f19d777b5a59 b/templates/simple_rag/blobs/a03a6ac18d448d0e3cd7b62ebcd7f19d777b5a59 new file mode 100644 index 0000000000000000000000000000000000000000..bac1551dee4aaaa5ffaa6769b9fae4edf39f9884 GIT binary patch literal 11388 zcmcgy-ESP%b)VT?E|<$siIgm-sXcX5Xlc+(+J@znk(?4M%VAbp3nHa76@=k%XO?$I z`^EW?OkoAMP$jK`X&qN1pp6kkO4^s^rH^^(Ya8Sb=$ju}n}r>fG^_omdQWoV9}I(q}df9O}d-?80+ zVd?a!Hv8Q{zi0FuyV~vR2KrB2px+CIqoK1mUK4Kbx?%Yu?~QQ_uy_62AX8RVeByQt zRrS3RUbJ?Pz*$w*dcD4*IqiPW28KH~iH>TwZyCOj@NK8B>Cuq9e7om>>LqZu+dDnY zaV>N^;^l3}V!SJ6>%SDosJXY(F`Rx6LmOCz-flWXZ!Vf-%k4FZFHIEqc=An5ig2|V zDK{lA$a8W@F3Lw`zb@A${4dnUzWb^q$+eav4JBRHb9eG~Whj}lnfpxA^MLZ7Nk7it z&O3$2V|rmoZ@DM3jNXrnPb68=$A*$R&aFji)yIFFr}hzg?8rm;r}8DKEe&&jBmafG zA^DXC1l6|HcCYQUwGQN>`wzUlva0yr^m>HEY!!c$LKwP29TeMbHVxZuxt-3Q??4;l zJJ6T|ad{J}+U#gHNuJcw)4Hsoa$AR*+pRW~>5X^#J3Aomdt?c%pU1M$W05v_u4 zbXq`Z){gCa#p^c%V45vN?SikiV}Pso!SZF>u(XOs1eFizL-{4?Zti92iTr79 zmp=7m1q1X7X20F@#q2%b%T-r^ZTkQ(EZ@+q3dbXkh(96yyDK)67CSAv%fBYXMNeYKwNgQrfK_FXF^zBte*Bp)A z)3B@L=2tZ9Dk3v{vHIbFBA3>=daiH5<@8HNt8(%z`ggQF!#cTm?V55%X&_oJDX!br zR~ZKCbW3YGeQWQ%CFONA4ZzFLx~s}-PD0YDo`){4o~%Xj{^Yf5uf^mB9T(BSKKPT1 z+?eUFSqLOxiZ4#;&14>FerFMyuU$JC^Jr>KVW59%P}}`}hkI;VE+%CofVo66)n$Wm zDKhc;z%m8(lkt*p5)y{AXq^L*_j?rctu<>W8AIQ72Ck#p7)e*#$B1z$MnSN12Ow#v zK?poZ;-S+#rw4hT`&=3~5dXmj%FmAJN{mmB%W$lAQ^hx~0b!9nWNo15cGa7G3sHe& ziItV+zXI_Z)(sL2Nu5yV;Ny_GV`1GktSslv0GVQIe57>;9r`|_@U`BaGUP#|`s&7@ z)8BiV6~_n80ag`^3uAa|Wo5BkUL9zb)>YE}vK}v1O24Hz!caPf)~ZHb$&+Hw04&wWaj8O#@~h4I}Zj zHHBg&QHXF#verf2V>e5BhbYDAE8B+R+F0M)J)Up;c|5DogDO#+SX9{rT7WPb|MFrv#}>SYE0bl0M!VqmqqG-b2D9f>GrsI;qw((fr6WP@1J-iGxCA2&O$ z?HCp~we4hn0V}Yv49e;-1$=DWFhD-0$-EuSzOES69ff@hW8nM^!jGpIg@Eq~&4C6i zmMCk#-7=bc&5p5emKaAn^?8&$t|}KOi&f%i5p#f_g%Kjs>eJ@4FHC&MQ9}u$hEnMw zQ6UjR*G3Ry?8TV;fl*@<<{Ly1(leB-wE9*kgdo7g-|r76Yr2m7kX8kl=8oH?Og$n( zB8o+*IPGrY03`Qx%&Bt<7uXl^4%?M&t$E#miIW-F?sm8BR9RE+2i~q!E@^vFr^Qun z%94f;lLJ*O!|uBlQqnWkYBdo@IM}b#7J}`0}48w4vZubVoW8Bl{K!MC<-XTaR$)0K0w;$a9H3z zY>t6B-)uXACA77bl@TVm-s$9Omm(7 z8@d5s?IO7IAOrJVj+e-oL=>&#$NhA;dt>AkEMs7y)X5pUc#icgtlcPxCf%N`wG35g zw!mw#W%X|%=X^j(U6wBjn~olpo71o>bQ`o_k3HtBFRuvsz~oR&M)Cnx{GIr@SBj)Wh!giPWcm}27R6s>29lD7_-hh`_|ZswCZ33& z9^CC`8T3KZ5U;04|GIc6PKZaDuVS7rI+O?EAhvqn5 z!H`f(E{cCdQS6mY(=y%Lc@H`|jbMa58I*f+EmuI7MXI&DV+qYDYEn)7z&nyOa7qbe z!+R+~Vv8bLc+fN=pNoc$roiQi&d~%PjDQGn>2<^_QIZ;)^*y!{n`*j8jTg3*gVm$1 zH`X^}+b&Svmr>N`;$h>NhT$zdRox-S^u00_n7Ycupv1eQeMWomP(t^DBB(;NGsE~b z-0Ns2zECxG4X%Q}7d6m>&HEsu-RJV-6yVf-lKFQGX*M+^SP+fJ8{@_7w=k*+O6Y^A zmGVmd19U`)a8$svp)8zINI63j?rE*QyN zN*cT`1Ky(-_O$2epcoW&8K%bqrih90oHv42M`Cd-9aWA68G?BEP-ax3yp4it zW=_8w%&DAgJRR0tyy;9un&JA|0g?>1vJQ>=JTD1qW-BZ+hLb#oXD_tKa0^r=az6X? zdip%$naLK!8Mf6gVQVf?ho_eB=d#OIZW(rO9X`8qQV(ZXonls0lEm(DCD{u+G{sJ5 z%InMszIFJMolKSH8D8IJUgD^DRF@9LKgYN<&+w=P5-m_@gZv&G3ZL)pKjL%?~eqpEkVoR>x8IEf(H< zfX+r$@8u9n8`Aq0(TLE#MGn^^m_ApaPUh5`C(+21f(H$;d=Jt+!-wH7@Z*H%<#bVF zM#WK9xVIJVrm-P3*b*9?xHecrBesYJ60MinC>m@P4K|A}A0Qjf3v?sXdVy|ad=0j~ z7wAm(Sr07q0h?&UB%^7t)jXi9CN7;zlNX$Teg1>^yb(A<0lfl`m>`Iab%r2zka>d0 zOUx5^KrSZ&XHf{n9`gqTk=rzQ`VDrRM&LPrNNwak4fY>#H1Hsbq=6gN31Z{ENDz5d z5NBt_f>*Q*T$<@&%S8xpE;>MuqZ@6+nAhCm@v~Tr! zaEeahk+x-%@OV_p(ao8HT$XckIaihoUqp8x^7RP>!t-=}=8o*hk8@9?R_-?6fw&=A zGdeE4jHJi;p-h)zZcX8S1Yf849$^asmabMLZ#NN&ygZH*uwPX1a@fDXIlCjUB@bu< z8T5B)L_fF;*h^?;8qEwV$;s;ecxM4Usb`~2%~pnIo!_0MT_d2 zE7dpOUhcHFmj`=}==TV-oZ8W)+Z5uR`oGnV&gJ6yLjnbP>I>hSVkuc}ug4pu(>UU> z)c7h=c+ui^aBs!@0o2gQdgZ;)0ma#H8OllV%_JEM%tq}uxPXMaBs%1}Bk8$MAw673 z!mplZxWI6M;W38C7%nngWO$t6afV9_ml&R4c!J?F!)1mi8J=W#is31Srx~7Rc!uE_ zhG!X`Wq6L^Ifjoge1zen3?F59p5b|h7Z_e(_!z^-K82;cL<f{UW;E`D*fSOrSQu#3u7(1u205PX9)5HrV669TgK{q*5 z$Gg19%W4JJUg}@L5>B3V*EhE)@{ko{G0pu3*L!%2Ib+`03qA-*ui{c#a1F7AOM=N2 zqd34&gbHr$n7;z)Y!nNF5J$0}pcU8MqDWxgr_uZn4?ZlRFoAnL^usrEimGbtCh-fG zzoz8J@#U{veF47p_e0r4)G{e`yMkZxtsMCm{*b(xNZrgL{*b(xNZm{%ZYI(;6G@wi zB+W!pW+DkQk%E~>xlANnCK4-izBLcZh=8CbgPII#GN{R*CWD#`YBH$FpeBQw3~Dl{ z$)F~Knha_(sL7xv6SeDzkIq7rqJ1Sv+DxPGtkXpgI%&pkA_vp4K91lmgyBH-{|sS6 z^+l2--R{b$pBfuasedL}kyst9$$vptZ$Cr697;zNyz)SaPxT-@+bgs-(ZL#Y+n}VS zt2s?UM-`N`1Zm}cU;O$LEFEP!3=B7+yonaleOy$_>fE+8T!gw$|E@0fjN5Vh0YEm}}PZN&kE)Cxv!`@Na@*826^L(Q^Xo@N=L_3rEo8kK?>L8q0BOsb^2 zalHiDXAgL!xTvw9i*UEGRqR64m>0IgAUH5+fjPd#7#VM|#JyWQM-8^rV68>_5*>FI z%e-|9QFF}Oyz>KH;zRTuA0gKp+qFY=Dl^4+D&q~#3?qa%`ynx=(C>X;SA{5fvrRUO zYLcbVCKtIPdW*1}T`0*;HHlR28*ygHu?89T)qmdH=p8C4`p&j1Gu_;ENvjti7vg(0 zt?(#VkI^Oi(gBvhFvUN;nFky#K66*DZ#^;qzjW8mj{Or>`csv0QEGoH@r&)*g}&hW zU9=OMNp9$HHXE3ed;lw!{%1ce&cBLMij3}zc6UbpaJ0QQ9E=8A+oU*mw!5FD+JK7U zz=cl(ZymXQ-?&d8B^JhrlI&+HVwpYw=WnEOJ-eK#IEKZ0*KU&ZA2WaXugPS>v>>%6Q3BVG_u_6I j>w*zop=n;I($HM-DyxSyd3{xv>rQO4m%OQu)qwl~18E3) literal 0 HcmV?d00001 diff --git a/templates/simple_rag/build.ipynb b/templates/simple_rag/build.ipynb index d1331d229..05c3e5c38 100644 --- a/templates/simple_rag/build.ipynb +++ b/templates/simple_rag/build.ipynb @@ -42,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "3ef70f6d-a189-460a-8864-241a689624e2", "metadata": { "editable": true, @@ -66,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "cb029a5e-fedf-4f07-8a31-d220cfbfbb3d", "metadata": { "editable": true, @@ -75,7 +75,22 @@ }, "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-Nov-11 11:49:18.44| INFO | superduper.misc.plugins:13 | Loading plugin: mongodb\n", + "2024-Nov-11 11:49:18.49| INFO | superduper.base.datalayer:76 | Building Data Layer\n", + "2024-Nov-11 11:49:18.49| INFO | superduper.base.build:184 | Configuration: \n", + " +---------------+--------------+\n", + "| Configuration | Value |\n", + "+---------------+--------------+\n", + "| Data Backend | mongomock:// |\n", + "+---------------+--------------+\n" + ] + } + ], "source": [ "from superduper import superduper, CFG\n", "\n", @@ -88,7 +103,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "4e7902bd", "metadata": { "editable": true, @@ -114,7 +129,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "id": "1ef8dd07-1b47-4dce-84dd-a081d1f5ee9d", "metadata": {}, "outputs": [], @@ -136,7 +151,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "id": "c5965fdf", "metadata": {}, "outputs": [], @@ -170,7 +185,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "2d20eaa0-a416-4483-938e-23f79845739a", "metadata": {}, "outputs": [], @@ -198,7 +213,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "93d21872-d4dc-40dc-abab-fb07ba102ea3", "metadata": {}, "outputs": [], @@ -217,7 +232,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "31900eec-b516-4bef-939e-2e8f46252b12", "metadata": {}, "outputs": [], @@ -265,7 +280,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "id": "a9b1f538-65ca-499e-b6d0-2dd733f81723", "metadata": {}, "outputs": [], @@ -276,7 +291,8 @@ "from superduper_openai import OpenAIEmbedding\n", "\n", "openai_embedding = OpenAIEmbedding(\n", - " identifier='text-embedding-ada-002',\n", + " identifier='text-embedding',\n", + " model='text-embedding-ada-002',\n", " datatype=sqlvector(shape=(1536,)),\n", " client_kwargs={'base_url': BASE_URL, 'api_key': API_KEY},\n", ")" @@ -292,7 +308,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "id": "4663fa4b-c2ec-427d-bf8b-b8b109cc2ccf", "metadata": {}, "outputs": [], @@ -315,7 +331,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "id": "509c3505-54c5-4e68-84ec-3df8bea0fd74", "metadata": {}, "outputs": [], @@ -335,7 +351,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "id": "f98e5ff4", "metadata": {}, "outputs": [], @@ -360,7 +376,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "id": "44baeb09-6f35-4cf2-b814-46283a59f7e9", "metadata": {}, "outputs": [], @@ -387,7 +403,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "2d3a0d3a-da1c-41ec-b16c-f281c46ad794", "metadata": {}, "outputs": [], @@ -398,7 +414,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "2a82ea22-9694-4c65-b72f-c89ae49d1ab2", "metadata": {}, "outputs": [], @@ -417,7 +433,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "e6787c78-4b14-4a72-818b-450408a74331", "metadata": {}, "outputs": [], @@ -436,7 +452,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "e7c16557-af76-4e70-83d9-2984e19a9554", "metadata": {}, "outputs": [], @@ -463,10 +479,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "2e850c03-33c6-4c88-95d3-d14146a6a0af", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-Nov-11 11:49:19.42| WARNING | superduper.base.document:479 | Leaf listener:chunker already exists\n", + "2024-Nov-11 11:49:19.42| WARNING | superduper.base.document:479 | Leaf model:chunker already exists\n", + "2024-Nov-11 11:49:19.43| WARNING | superduper.base.document:479 | Leaf datatype:dill already exists\n", + "2024-Nov-11 11:49:19.43| WARNING | superduper.base.document:479 | Leaf var-table-name-select-var-id-field-x already exists\n" + ] + } + ], "source": [ "from superduper import Template, Table, Schema\n", "from superduper.components.dataset import RemoteData\n", @@ -526,10 +553,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "id": "8924ba0d-7c01-4d6c-87fb-245531db7506", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2024-Nov-11 11:49:19.43| WARNING | superduper.base.document:479 | Leaf str already exists\n" + ] + } + ], "source": [ "template.export('.')" ] diff --git a/templates/simple_rag/component.json b/templates/simple_rag/component.json index 01c2650a6..c70a90ba7 100644 --- a/templates/simple_rag/component.json +++ b/templates/simple_rag/component.json @@ -20,11 +20,11 @@ "method": "dill", "encodable": "artifact" }, - "a395902bca4bf34255e97a6a057b0edffc3500b2": { + "de5a10b374e634d964148beb865f73c5b82e53a6": { "_path": "superduper.components.datatype.Artifact", "datatype": "?datatype:dill", "uri": null, - "blob": "&:blob:a395902bca4bf34255e97a6a057b0edffc3500b2" + "blob": "&:blob:de5a10b374e634d964148beb865f73c5b82e53a6" }, "dataset:superduper-docs": { "_path": "superduper.components.dataset.RemoteData", @@ -32,7 +32,7 @@ "plugins": null, "cache": true, "status": null, - "getter": "?a395902bca4bf34255e97a6a057b0edffc3500b2" + "getter": "?de5a10b374e634d964148beb865f73c5b82e53a6" }, "table:sample_simple_rag": { "_path": "superduper.components.table.Table", @@ -58,14 +58,14 @@ "method": "dill", "encodable": "artifact" }, - "727d3bb560939e1211f9cac189d56e07e9622eeb": { + "a03a6ac18d448d0e3cd7b62ebcd7f19d777b5a59": { "_path": "superduper.components.datatype.Artifact", "datatype": "?datatype:dill", "uri": null, - "blob": "&:blob:727d3bb560939e1211f9cac189d56e07e9622eeb" + "blob": "&:blob:a03a6ac18d448d0e3cd7b62ebcd7f19d777b5a59" }, "model:chunker": { - "_object": "?727d3bb560939e1211f9cac189d56e07e9622eeb", + "_object": "?a03a6ac18d448d0e3cd7b62ebcd7f19d777b5a59", "upstream": null, "plugins": null, "cache": true, @@ -108,7 +108,7 @@ 1536 ] }, - "model:": { + "model:text-embedding": { "_path": "superduper_openai.model.OpenAIEmbedding", "upstream": null, "plugins": null, @@ -155,7 +155,7 @@ "status": null, "cdc_table": "chunker__?(listener:chunker.uuid)", "key": "chunker__?(listener:chunker.uuid)", - "model": "?model:", + "model": "?model:text-embedding", "predict_kwargs": {}, "select": "?outputs-chunker-?(listener:chunker.uuid)-select-id-source-outputs-chunker-?(listener:chunker.uuid)", "flatten": false @@ -293,6 +293,7 @@ "default": null } }, + "schema": null, "blobs": null, "files": null, "requirements": null, @@ -304,4 +305,4 @@ } }, "_files": {} -} +} \ No newline at end of file