From 83be2a13e0384d3de52190d86ccb1b5d7a197d84 Mon Sep 17 00:00:00 2001 From: pstern-sl <157847713+pstern-sl@users.noreply.github.com> Date: Fri, 7 Jun 2024 17:36:40 -0400 Subject: [PATCH] feat(api): adding chunking_strategy to polling helpers (#1478) --- .../resources/beta/vector_stores/file_batches.py | 8 ++++++++ src/openai/resources/beta/vector_stores/files.py | 16 ++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/openai/resources/beta/vector_stores/file_batches.py b/src/openai/resources/beta/vector_stores/file_batches.py index 21ac68f6de..d6862c24ef 100644 --- a/src/openai/resources/beta/vector_stores/file_batches.py +++ b/src/openai/resources/beta/vector_stores/file_batches.py @@ -174,11 +174,13 @@ def create_and_poll( *, file_ids: List[str], poll_interval_ms: int | NotGiven = NOT_GIVEN, + chunking_strategy: file_batch_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN, ) -> VectorStoreFileBatch: """Create a vector store batch and poll until all files have been processed.""" batch = self.create( vector_store_id=vector_store_id, file_ids=file_ids, + chunking_strategy=chunking_strategy, ) # TODO: don't poll unless necessary?? return self.poll( @@ -306,6 +308,7 @@ def upload_and_poll( max_concurrency: int = 5, file_ids: List[str] = [], poll_interval_ms: int | NotGiven = NOT_GIVEN, + chunking_strategy: file_batch_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN, ) -> VectorStoreFileBatch: """Uploads the given files concurrently and then creates a vector store file batch. @@ -343,6 +346,7 @@ def upload_and_poll( vector_store_id=vector_store_id, file_ids=[*file_ids, *(f.id for f in results)], poll_interval_ms=poll_interval_ms, + chunking_strategy=chunking_strategy, ) return batch @@ -488,11 +492,13 @@ async def create_and_poll( *, file_ids: List[str], poll_interval_ms: int | NotGiven = NOT_GIVEN, + chunking_strategy: file_batch_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN, ) -> VectorStoreFileBatch: """Create a vector store batch and poll until all files have been processed.""" batch = await self.create( vector_store_id=vector_store_id, file_ids=file_ids, + chunking_strategy=chunking_strategy, ) # TODO: don't poll unless necessary?? return await self.poll( @@ -620,6 +626,7 @@ async def upload_and_poll( max_concurrency: int = 5, file_ids: List[str] = [], poll_interval_ms: int | NotGiven = NOT_GIVEN, + chunking_strategy: file_batch_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN, ) -> VectorStoreFileBatch: """Uploads the given files concurrently and then creates a vector store file batch. @@ -680,6 +687,7 @@ async def trio_upload_file(limiter: trio.CapacityLimiter, file: FileTypes) -> No vector_store_id=vector_store_id, file_ids=[*file_ids, *(f.id for f in uploaded_files)], poll_interval_ms=poll_interval_ms, + chunking_strategy=chunking_strategy, ) return batch diff --git a/src/openai/resources/beta/vector_stores/files.py b/src/openai/resources/beta/vector_stores/files.py index 30f19ef491..bc1655027c 100644 --- a/src/openai/resources/beta/vector_stores/files.py +++ b/src/openai/resources/beta/vector_stores/files.py @@ -245,9 +245,10 @@ def create_and_poll( *, vector_store_id: str, poll_interval_ms: int | NotGiven = NOT_GIVEN, + chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN, ) -> VectorStoreFile: """Attach a file to the given vector store and wait for it to be processed.""" - self.create(vector_store_id=vector_store_id, file_id=file_id) + self.create(vector_store_id=vector_store_id, file_id=file_id, chunking_strategy=chunking_strategy) return self.poll( file_id, @@ -301,6 +302,7 @@ def upload( *, vector_store_id: str, file: FileTypes, + chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN, ) -> VectorStoreFile: """Upload a file to the `files` API and then attach it to the given vector store. @@ -308,7 +310,7 @@ def upload( polling helper method to wait for processing to complete). """ file_obj = self._client.files.create(file=file, purpose="assistants") - return self.create(vector_store_id=vector_store_id, file_id=file_obj.id) + return self.create(vector_store_id=vector_store_id, file_id=file_obj.id, chunking_strategy=chunking_strategy) def upload_and_poll( self, @@ -316,12 +318,14 @@ def upload_and_poll( vector_store_id: str, file: FileTypes, poll_interval_ms: int | NotGiven = NOT_GIVEN, + chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN, ) -> VectorStoreFile: """Add a file to a vector store and poll until processing is complete.""" file_obj = self._client.files.create(file=file, purpose="assistants") return self.create_and_poll( vector_store_id=vector_store_id, file_id=file_obj.id, + chunking_strategy=chunking_strategy, poll_interval_ms=poll_interval_ms, ) @@ -542,9 +546,10 @@ async def create_and_poll( *, vector_store_id: str, poll_interval_ms: int | NotGiven = NOT_GIVEN, + chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN, ) -> VectorStoreFile: """Attach a file to the given vector store and wait for it to be processed.""" - await self.create(vector_store_id=vector_store_id, file_id=file_id) + await self.create(vector_store_id=vector_store_id, file_id=file_id, chunking_strategy=chunking_strategy) return await self.poll( file_id, @@ -598,6 +603,7 @@ async def upload( *, vector_store_id: str, file: FileTypes, + chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN, ) -> VectorStoreFile: """Upload a file to the `files` API and then attach it to the given vector store. @@ -605,7 +611,7 @@ async def upload( polling helper method to wait for processing to complete). """ file_obj = await self._client.files.create(file=file, purpose="assistants") - return await self.create(vector_store_id=vector_store_id, file_id=file_obj.id) + return await self.create(vector_store_id=vector_store_id, file_id=file_obj.id, chunking_strategy=chunking_strategy) async def upload_and_poll( self, @@ -613,6 +619,7 @@ async def upload_and_poll( vector_store_id: str, file: FileTypes, poll_interval_ms: int | NotGiven = NOT_GIVEN, + chunking_strategy: file_create_params.ChunkingStrategy | NotGiven = NOT_GIVEN, ) -> VectorStoreFile: """Add a file to a vector store and poll until processing is complete.""" file_obj = await self._client.files.create(file=file, purpose="assistants") @@ -620,6 +627,7 @@ async def upload_and_poll( vector_store_id=vector_store_id, file_id=file_obj.id, poll_interval_ms=poll_interval_ms, + chunking_strategy=chunking_strategy )