From 4dc0f155fa9cedcf8ef59922c136a12ac2a33174 Mon Sep 17 00:00:00 2001 From: Xuanwo Date: Thu, 14 Nov 2024 17:02:33 +0800 Subject: [PATCH] docs: Polish docs for write with if not exists (#5320) --- core/src/types/operator/blocking_operator.rs | 2 +- core/src/types/operator/operator.rs | 390 +++++++++++++------ 2 files changed, 268 insertions(+), 124 deletions(-) diff --git a/core/src/types/operator/blocking_operator.rs b/core/src/types/operator/blocking_operator.rs index 9ea08fb81f4e..0119e10485be 100644 --- a/core/src/types/operator/blocking_operator.rs +++ b/core/src/types/operator/blocking_operator.rs @@ -595,7 +595,7 @@ impl BlockingOperator { Ok(()) } - /// Write data with option described in OpenDAL [RFC-0661][`crate::docs::rfcs::rfc_0661_path_in_accessor`] + /// Write data with options. /// /// # Notes /// diff --git a/core/src/types/operator/operator.rs b/core/src/types/operator/operator.rs index fc8b31d25389..0e8af02989eb 100644 --- a/core/src/types/operator/operator.rs +++ b/core/src/types/operator/operator.rs @@ -709,20 +709,19 @@ impl Operator { /// /// ## Extra Options /// - /// [`Operator::write`] is a wrapper of [`Operator::write_with`] without any options. To use - /// extra options like `content_type` and `cache_control`, please use [`Operator::write_with`] - /// instead. + /// [`Operator::write`] is a simplified version of [`Operator::write_with`] without additional options. + /// For advanced features like `content_type` and `cache_control`, use [`Operator::write_with`] instead. /// /// ## Streaming Write /// - /// This function will write all bytes at once. For more precise memory control or - /// writing data continuously, please use [`Operator::writer`]. + /// This method performs a single bulk write operation. For finer-grained memory control + /// or streaming data writes, use [`Operator::writer`] instead. /// - /// ## Multipart Uploads Write + /// ## Multipart Uploads /// - /// OpenDAL abstracts the multipart uploads into [`Writer`]. It will automatically - /// handle the multipart uploads for you. You can control the behavior of multipart uploads - /// by setting `chunk`, `concurrent` via [`Operator::writer_with`] + /// OpenDAL provides multipart upload functionality through the [`Writer`] abstraction, + /// handling all upload details automatically. You can customize the upload behavior by + /// configuring `chunk` size and `concurrent` operations via [`Operator::writer_with`]. /// /// # Examples /// @@ -858,28 +857,34 @@ impl Operator { Ok(()) } - /// Write multiple bytes into path. + /// Create a writer for streaming data to the given path. /// /// # Notes /// + /// ## Writer Features + /// + /// The writer provides several powerful capabilities: + /// - Streaming writes for continuous data transfer + /// - Automatic multipart upload handling + /// - Memory-efficient chunk-based writing + /// /// ## Extra Options /// - /// [`Operator::writer`] is a wrapper of [`Operator::writer_with`] without any options. To use - /// extra options like `content_type` and `cache_control`, please use [`Operator::writer_with`] - /// instead. + /// [`Operator::writer`] is a simplified version of [`Operator::writer_with`] without additional options. + /// For advanced features like `content_type` and `cache_control`, use [`Operator::writer_with`] instead. /// - /// ## Chunk + /// ## Chunk Size Handling /// - /// Some storage services have a minimum chunk size requirement. For example, `s3` could return - /// hard errors like `EntityTooSmall` if the chunk size is too small. Some services like `gcs` - /// also return errors if the chunk size is not aligned. Besides, cloud storage services will cost - /// more money if we write data in small chunks. + /// Storage services often have specific requirements for chunk sizes: + /// - Services like `s3` may return `EntityTooSmall` errors for undersized chunks + /// - Using small chunks in cloud storage services can lead to increased costs /// - /// OpenDAL sets the chunk size automatically based on the [Capability](crate::types::Capability) - /// of the service if users don't set it. Users can set `chunk` to control the exact size to send - /// to the storage service. + /// OpenDAL automatically determines optimal chunk sizes based on the service's + /// [Capability](crate::types::Capability). However, you can override this by explicitly + /// setting the `chunk` parameter. /// - /// Users can use [`Operator::writer_with`] to set a good chunk size might improve the performance, + /// For improved performance, consider setting an appropriate chunk size using + /// [`Operator::writer_with`]. /// /// # Examples /// @@ -900,18 +905,29 @@ impl Operator { self.writer_with(path).await } - /// Write multiple bytes into path with extra options. + /// Create a writer for streaming data to the given path with more options. /// - /// # Options + /// # Usages /// /// ## `append` /// - /// Set `append` for this `write` request. + /// Sets append mode for this write request. + /// + /// ### Capability + /// + /// Check [`Capability::write_can_append`] before using this feature. + /// + /// ### Behavior /// - /// `write` by default to overwrite existing files. To append to the end of file instead, - /// please set `append` to true. + /// - By default, write operations overwrite existing files + /// - When append is set to true: + /// - New data will be appended to the end of existing file + /// - If file doesn't exist, it will be created + /// - If not supported, will return an error /// - /// The following example will append data to existing file instead. + /// This operation allows adding data to existing files instead of overwriting them. + /// + /// ### Example /// /// ```no_run /// # use opendal::Result; @@ -931,21 +947,33 @@ impl Operator { /// /// ## `chunk` /// - /// Set `chunk` for the writer. + /// Sets chunk size for buffered writes. + /// + /// ### Capability + /// + /// Check [`Capability::write_multi_min_size`] and [`Capability::write_multi_max_size`] for size limits. + /// + /// ### Behavior /// - /// Some storage services have a minimum chunk size requirement. For example, `s3` could return - /// hard errors like `EntityTooSmall` if the chunk size is too small. Some services like `gcs` - /// also return errors if the chunk size is not aligned. Besides, cloud storage services will cost - /// more money if we write data in small chunks. + /// - By default, OpenDAL sets optimal chunk size based on service capabilities + /// - When chunk size is set: + /// - Data will be buffered until reaching chunk size + /// - One API call will be made per chunk + /// - Last chunk may be smaller than chunk size + /// - Important considerations: + /// - Some services require minimum chunk sizes (e.g. S3's EntityTooSmall error) + /// - Smaller chunks increase API calls and costs + /// - Larger chunks increase memory usage, but improve performance and reduce costs /// - /// OpenDAL sets the chunk size automatically based on the [Capability](crate::types::Capability) - /// of the service if users don't set it. Users can set `chunk` to control the exact size to send - /// to the storage service. + /// ### Performance Impact /// - /// Set a good chunk size might improve the performance, reduce the API calls and save money. + /// Setting appropriate chunk size can: + /// - Reduce number of API calls + /// - Improve overall throughput + /// - Lower operation costs + /// - Better utilize network bandwidth /// - /// The following example will set the writer chunk to 8MiB. Only one API call will be sent at - /// `close` instead. + /// ### Example /// /// ```no_run /// # use opendal::Result; @@ -955,6 +983,7 @@ impl Operator { /// use bytes::Bytes; /// /// # async fn test(op: Operator) -> Result<()> { + /// // Set 8MiB chunk size - data will be sent in one API call at close /// let mut w = op /// .writer_with("path/to/file") /// .chunk(8 * 1024 * 1024) @@ -966,19 +995,34 @@ impl Operator { /// # } /// ``` /// - /// ## `concurrent` + /// # `concurrent` /// - /// Set `concurrent` for the writer. + /// Sets concurrent write operations for this writer. /// - /// OpenDAL by default to write file without concurrent. This is not efficient for cases when users - /// write large chunks of data. By setting `concurrent`, opendal will writing files concurrently - /// on support storage services. + /// ## Behavior /// - /// The following example will set the writer concurrent to 8. + /// - By default, OpenDAL writes files sequentially + /// - When concurrent is set: + /// - Multiple write operations can execute in parallel + /// - Write operations return immediately without waiting if tasks space are available + /// - Close operation ensures all writes complete in order + /// - Memory usage increases with concurrency level + /// - If not supported, falls back to sequential writes /// - /// - The first write will start and return immediately. - /// - The second write will start and return immediately. - /// - The close will make sure all writes are done in order and return result. + /// This feature significantly improves performance when: + /// - Writing large files + /// - Network latency is high + /// - Storage service supports concurrent uploads like multipart uploads + /// + /// ## Performance Impact + /// + /// Setting appropriate concurrency can: + /// - Increase write throughput + /// - Reduce total write time + /// - Better utilize available bandwidth + /// - Trade memory for performance + /// + /// ## Example /// /// ```no_run /// # use opendal::Result; @@ -988,19 +1032,49 @@ impl Operator { /// use bytes::Bytes; /// /// # async fn test(op: Operator) -> Result<()> { + /// // Enable concurrent writes with 8 parallel operations /// let mut w = op.writer_with("path/to/file").concurrent(8).await?; - /// w.write(vec![0; 4096]).await?; // Start the first write - /// w.write(vec![1; 4096]).await?; // Second write will be concurrent without wait - /// w.close().await?; // Close will make sure all writes are done and success + /// + /// // First write starts immediately + /// w.write(vec![0; 4096]).await?; + /// + /// // Second write runs concurrently with first + /// w.write(vec![1; 4096]).await?; + /// + /// // Ensures all writes complete successfully and in order + /// w.close().await?; /// # Ok(()) /// # } /// ``` /// /// ## `cache_control` /// - /// Set the `cache_control` for this `write` request. + /// Sets Cache-Control header for this write operation. + /// + /// ### Capability + /// + /// Sets `Cache-Control` header for this write request. + /// + /// ### Capability + /// + /// Check [`Capability::write_with_cache_control`] before using this feature. + /// + /// ### Behavior /// - /// Some storage services support setting `cache_control` as system metadata. + /// - If supported, sets Cache-Control as system metadata on the target file + /// - The value should follow HTTP Cache-Control header format + /// - If not supported, the value will be ignored + /// + /// This operation allows controlling caching behavior for the written content. + /// + /// ### Use Cases + /// + /// - Setting browser cache duration + /// - Configuring CDN behavior + /// - Optimizing content delivery + /// - Managing cache invalidation + /// + /// ### Example /// /// ```no_run /// # use opendal::Result; @@ -1010,6 +1084,7 @@ impl Operator { /// use bytes::Bytes; /// /// # async fn test(op: Operator) -> Result<()> { + /// // Cache content for 7 days (604800 seconds) /// let mut w = op /// .writer_with("path/to/file") /// .cache_control("max-age=604800") @@ -1021,11 +1096,28 @@ impl Operator { /// # } /// ``` /// + /// ### References + /// + /// - [MDN Cache-Control](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Cache-Control) + /// - [RFC 7234 Section 5.2](https://tools.ietf.org/html/rfc7234#section-5.2) + /// /// ## `content_type` /// - /// Set the `content_type` for this `write` request. + /// Sets `Content-Type` header for this write operation. + /// + /// ## Capability + /// + /// Check [`Capability::write_with_content_type`] before using this feature. /// - /// Some storage services support setting `content_type` as system metadata. + /// ### Behavior + /// + /// - If supported, sets Content-Type as system metadata on the target file + /// - The value should follow MIME type format (e.g. "text/plain", "image/jpeg") + /// - If not supported, the value will be ignored + /// + /// This operation allows specifying the media type of the content being written. + /// + /// ## Example /// /// ```no_run /// # use opendal::Result; @@ -1033,6 +1125,7 @@ impl Operator { /// use bytes::Bytes; /// /// # async fn test(op: Operator) -> Result<()> { + /// // Set content type for plain text file /// let mut w = op /// .writer_with("path/to/file") /// .content_type("text/plain") @@ -1046,30 +1139,25 @@ impl Operator { /// /// ## `content_disposition` /// - /// Set the `content_disposition` for this `write` request. + /// Sets Content-Disposition header for this write request. /// - /// Some storage services support setting `content_disposition` as system metadata. + /// ### Capability /// - /// ``` - /// # use opendal::Result; - /// # use opendal::Operator; - /// # use futures::StreamExt; - /// # use futures::SinkExt; - /// use bytes::Bytes; + /// Check [`Capability::write_with_content_disposition`] before using this feature. /// - /// # async fn test(op: Operator) -> Result<()> { - /// let mut w = op - /// .writer_with("path/to/file") - /// .content_disposition("attachment; filename=\"filename.jpg\"") - /// .await?; - /// w.write(vec![0; 4096]).await?; - /// w.write(vec![1; 4096]).await?; - /// w.close().await?; - /// # Ok(()) - /// # } - /// ``` + /// ### Behavior /// - /// # Examples + /// - If supported, sets Content-Disposition as system metadata on the target file + /// - The value should follow HTTP Content-Disposition header format + /// - Common values include: + /// - `inline` - Content displayed within browser + /// - `attachment` - Content downloaded as file + /// - `attachment; filename="example.jpg"` - Downloaded with specified filename + /// - If not supported, the value will be ignored + /// + /// This operation allows controlling how the content should be displayed or downloaded. + /// + /// ### Example /// /// ``` /// # use opendal::Result; @@ -1081,7 +1169,7 @@ impl Operator { /// # async fn test(op: Operator) -> Result<()> { /// let mut w = op /// .writer_with("path/to/file") - /// .content_type("application/octet-stream") + /// .content_disposition("attachment; filename=\"filename.jpg\"") /// .await?; /// w.write(vec![0; 4096]).await?; /// w.write(vec![1; 4096]).await?; @@ -1122,27 +1210,35 @@ impl Operator { /// /// ## Streaming Write /// - /// This function will write all bytes at once. For more precise memory control or - /// writing data lazily, please use [`Operator::writer_with`]. + /// This method performs a single bulk write operation for all bytes. For finer-grained + /// memory control or lazy writing, consider using [`Operator::writer_with`] instead. /// - /// ## Multipart Uploads Write + /// ## Multipart Uploads /// - /// OpenDAL abstracts the multipart uploads into [`Writer`]. It will automatically - /// handle the multipart uploads for you. You can control the behavior of multipart uploads - /// by setting `chunk`, `concurrent` via [`Operator::writer_with`] + /// OpenDAL handles multipart uploads through the [`Writer`] abstraction, managing all + /// the upload details automatically. You can customize the upload behavior by configuring + /// `chunk` size and `concurrent` operations via [`Operator::writer_with`]. /// - /// # Options + /// # Usages /// /// ## `append` /// - /// Set `append` for this `write` request. + /// Sets `append` mode for this write request. /// - /// `write` by default to overwrite existing files. To append to the end of file instead, - /// please set `append` to true. + /// ### Capability /// - /// The following example will append data to existing file instead. + /// Check [`Capability::write_with_append`] before using this feature. /// - /// ``` + /// ### Behavior + /// + /// - If append is true, data will be appended to the end of existing file + /// - If append is false (default), existing file will be overwritten + /// + /// This operation allows appending data to existing files instead of overwriting them. + /// + /// ### Example + /// + /// ```no_run /// # use opendal::Result; /// # use opendal::Operator; /// use bytes::Bytes; @@ -1156,11 +1252,30 @@ impl Operator { /// /// ## `cache_control` /// - /// Set the `cache_control` for this `write` request. + /// Sets `Cache-Control` header for this write request. /// - /// Some storage services support setting `cache_control` as system metadata. + /// ### Capability /// - /// ``` + /// Check [`Capability::write_with_cache_control`] before using this feature. + /// + /// ### Behavior + /// + /// - If supported, sets Cache-Control as system metadata on the target file + /// - The value should follow HTTP Cache-Control header format + /// - If not supported, the value will be ignored + /// + /// This operation allows controlling caching behavior for the written content. + /// + /// ## Use Cases + /// + /// - Setting browser cache duration + /// - Configuring CDN behavior + /// - Optimizing content delivery + /// - Managing cache invalidation + /// + /// ### Example + /// + /// ```no_run /// # use opendal::Result; /// # use opendal::Operator; /// use bytes::Bytes; @@ -1177,11 +1292,23 @@ impl Operator { /// /// ## `content_type` /// - /// Set the `content_type` for this `write` request. + /// Sets Content-Type header for this write request. /// - /// Some storage services support setting `content_type` as system metadata. + /// ### Capability /// - /// ``` + /// Check [`Capability::write_with_content_type`] before using this feature. + /// + /// ### Behavior + /// + /// - If supported, sets Content-Type as system metadata on the target file + /// - The value should follow MIME type format (e.g. "text/plain", "image/jpeg") + /// - If not supported, the value will be ignored + /// + /// This operation allows specifying the media type of the content being written. + /// + /// ### Example + /// + /// ```no_run /// # use opendal::Result; /// # use opendal::Operator; /// use bytes::Bytes; @@ -1198,11 +1325,27 @@ impl Operator { /// /// ## `content_disposition` /// - /// Set the `content_disposition` for this `write` request. + /// Sets Content-Disposition header for this write request. /// - /// Some storage services support setting `content_disposition` as system metadata. + /// ### Capability /// - /// ``` + /// Check [`Capability::write_with_content_disposition`] before using this feature. + /// + /// ### Behavior + /// + /// - If supported, sets Content-Disposition as system metadata on the target file + /// - The value should follow HTTP Content-Disposition header format + /// - Common values include: + /// - `inline` - Content displayed within browser + /// - `attachment` - Content downloaded as file + /// - `attachment; filename="example.jpg"` - Downloaded with specified filename + /// - If not supported, the value will be ignored + /// + /// This operation allows controlling how the content should be displayed or downloaded. + /// + /// ### Example + /// + /// ```no_run /// # use opendal::Result; /// # use opendal::Operator; /// use bytes::Bytes; @@ -1219,12 +1362,21 @@ impl Operator { /// /// ## `if_none_match` /// - /// Set `if_none_match` for this `write` request. + /// Sets an `if none match` condition with specified ETag for this write request. + /// + /// ### Capability + /// + /// Check [`Capability::write_with_if_none_match`] before using this feature. + /// + /// ### Behavior /// - /// This feature can be used to check if the file already exists. - /// This prevents overwriting of existing objects with identical key names. + /// - If the target file's ETag equals the specified one, returns [`ErrorKind::ConditionNotMatch`] + /// - If the target file's ETag differs from the specified one, proceeds with the write operation /// - /// If file exists, an error with kind [`ErrorKind::ConditionNotMatch`] will be returned. + /// This operation will succeed when the target's ETag is different from the specified one, + /// providing a way for concurrency control. + /// + /// ### Example /// /// ```no_run /// # use opendal::{ErrorKind, Result}; @@ -1240,13 +1392,21 @@ impl Operator { /// /// ## `if_not_exists` /// - /// This feature allows to safely write a file only if it does not exist. It is designed - /// to be concurrency-safe, and can be used to a file lock. For storage services that - /// support the `if_not_exists` feature, only one write operation will succeed, while all - /// other attempts will fail. + /// Sets an `if not exists` condition for this write request. + /// + /// ### Capability + /// + /// Check [`Capability::write_with_if_not_exists`] before using this feature. + /// + /// ### Behavior /// - /// If the file already exists, an error with kind [`ErrorKind::ConditionNotMatch`] will - /// be returned. + /// - If the target file exists, returns [`ErrorKind::ConditionNotMatch`] + /// - If the target file doesn't exist, proceeds with the write operation + /// + /// This operation provides atomic file creation that is concurrency-safe. + /// Only one write operation will succeed while others will fail. + /// + /// ### Example /// /// ```no_run /// # use opendal::{ErrorKind, Result}; @@ -1256,22 +1416,6 @@ impl Operator { /// let res = op.write_with("path/to/file", bs).if_not_exists(true).await; /// assert!(res.is_err()); /// assert_eq!(res.unwrap_err().kind(), ErrorKind::ConditionNotMatch); - /// # Ok(())} - /// ``` - /// - /// # Examples - /// - /// ``` - /// # use opendal::Result; - /// # use opendal::Operator; - /// use bytes::Bytes; - /// - /// # async fn test(op: Operator) -> Result<()> { - /// let bs = b"hello, world!".to_vec(); - /// let _ = op - /// .write_with("path/to/file", bs) - /// .content_type("text/plain") - /// .await?; /// # Ok(()) /// # } /// ```