diff --git a/.gitignore b/.gitignore index 80e557d1..a568934e 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,11 @@ activate* activate/* kotaemon-env* .env +workdir* +workdir/* +run_container.sh +Makefile +Dockerfile ### Emacs ### # -*- mode: gitignore; -*- diff --git a/Dockerfile b/Dockerfile index 94d83cb9..b8d6976e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -37,12 +37,16 @@ RUN bash scripts/download_pdfjs.sh $PDFJS_PREBUILT_DIR COPY . /app COPY .env.example /app/.env +# Update pip command +RUN pip install --upgrade pip + # Install pip packages RUN --mount=type=ssh \ --mount=type=cache,target=/root/.cache/pip \ pip install -e "libs/kotaemon" \ && pip install -e "libs/ktem" \ - && pip install "pdfservices-sdk@git+https://github.com/niallcm/pdfservices-python-sdk.git@bump-and-unfreeze-requirements" + && pip install "pdfservices-sdk@git+https://github.com/niallcm/pdfservices-python-sdk.git@bump-and-unfreeze-requirements" \ + && pip install "docling" RUN --mount=type=ssh \ --mount=type=cache,target=/root/.cache/pip \ diff --git a/docs/pages/app/ext/user-management.md b/docs/pages/app/ext/user-management.md index 988380f2..1b3979ee 100644 --- a/docs/pages/app/ext/user-management.md +++ b/docs/pages/app/ext/user-management.md @@ -11,4 +11,4 @@ Once enabled, you have access to the following features: - User login/logout (located in Settings Tab) - User changing password (located in Settings Tab) -- Create / List / Edit / Delete user (located in Admin > User Management Tab) +- Create / List / Edit / Delete user (located in Resources > Users Tab) diff --git a/libs/kotaemon/kotaemon/rerankings/tei_fast_rerank.py b/libs/kotaemon/kotaemon/rerankings/tei_fast_rerank.py index 4ac4b8ef..3cb81f63 100644 --- a/libs/kotaemon/kotaemon/rerankings/tei_fast_rerank.py +++ b/libs/kotaemon/kotaemon/rerankings/tei_fast_rerank.py @@ -29,13 +29,18 @@ class TeiFastReranking(BaseReranking): ), ) is_truncated: Optional[bool] = Param(True, help="Whether to truncate the inputs") + max_tokens: Optional[int] = Param(512, help="This option is used to specify the maximum number of tokens supported by the reranker model.") def client(self, query, texts): + if self.is_truncated == True: + max_tokens = self.max_tokens # default is 512 tokens. + truncated_texts = [text[:max_tokens] for text in texts] + response = session.post( url=self.endpoint_url, json={ "query": query, - "texts": texts, + "texts": truncated_texts, "is_truncated": self.is_truncated, # default is True }, ).json()