Skip to content

Commit

Permalink
feat: raise GCS object size limit to 2.5TB (#2261)
Browse files Browse the repository at this point in the history
This is an interim fix for just GCS to solve
#2247

Because of the challenges of casting between `&dyn ObjectStore` and
`&dyn MultiPartStore`, it's not easy in the current version of
`object_store` to implement this generically over all stores. However,
in the next version of `object_store` (0.10.0), there is a new API for
`put_multipart()` that will make it easy to extend this implementation
to all stores.
  • Loading branch information
wjones127 authored Apr 26, 2024
1 parent fc9757b commit f3b4ba4
Show file tree
Hide file tree
Showing 5 changed files with 584 additions and 2 deletions.
5 changes: 4 additions & 1 deletion rust/lance-io/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,12 @@ tokio.workspace = true
tracing.workspace = true
url.workspace = true
path_abs.workspace = true
rand.workspace = true

[dev-dependencies]
criterion.workspace = true
parquet.workspace = true
pprof.workspace = true
rand.workspace = true
tempfile.workspace = true

[build-dependencies]
Expand All @@ -56,3 +56,6 @@ prost-build.workspace = true
[[bench]]
name = "scheduler"
harness = false

[features]
gcs-test = []
12 changes: 11 additions & 1 deletion rust/lance-io/src/object_store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use futures::{future, stream::BoxStream, StreamExt, TryStreamExt};
use object_store::aws::{
AmazonS3ConfigKey, AwsCredential as ObjectStoreAwsCredential, AwsCredentialProvider,
};
use object_store::gcp::GoogleCloudStorageBuilder;
use object_store::{
aws::AmazonS3Builder, azure::AzureConfigKey, gcp::GoogleConfigKey, local::LocalFileSystem,
memory::InMemory, CredentialProvider, Error as ObjectStoreError, Result as ObjectStoreResult,
Expand All @@ -29,7 +30,9 @@ use tokio::{io::AsyncWriteExt, sync::RwLock};
use url::Url;

use super::local::LocalObjectReader;
mod gcs_wrapper;
mod tracing;
use self::gcs_wrapper::PatchedGoogleCloudStorage;
use self::tracing::ObjectStoreTracingExt;
use crate::{object_reader::CloudObjectReader, object_writer::ObjectWriter, traits::Reader};
use lance_core::{Error, Result};
Expand Down Expand Up @@ -716,7 +719,14 @@ async fn configure_store(url: &str, options: ObjectStoreParams) -> Result<Object

"gs" => {
storage_options.with_env_gcs();
let (store, _) = parse_url_opts(&url, storage_options.as_gcs_options())?;
let mut builder = GoogleCloudStorageBuilder::new().with_url(url.as_ref());
for (key, value) in storage_options.as_gcs_options() {
builder = builder.with_config(key, value);
}
let store = builder.build()?;
// Temporary fix for having larger object sizes. Replace when
// object_store 0.10.0 is available.
let store = PatchedGoogleCloudStorage(Arc::new(store));
let store = Arc::new(store);
Ok(ObjectStore {
inner: store,
Expand Down
Loading

0 comments on commit f3b4ba4

Please sign in to comment.