Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add large-object support for CopyObject API #644

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions api-put-object-common.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@ func hashCopyN(hashAlgorithms map[string]hash.Hash, hashSums map[string][]byte,
return size, err
}

// getUploadID - fetch upload id if already present for an object name
// or initiate a new request to fetch a new upload id.
// newUploadID - create a new uploadId for the given bucket and
// object. Also sets metadata.
func (c Client) newUploadID(bucketName, objectName string, metaData map[string][]string) (uploadID string, err error) {
// Input validation.
if err := isValidBucketName(bucketName); err != nil {
Expand Down
130 changes: 125 additions & 5 deletions api-put-object-copy.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,16 @@
package minio

import (
"fmt"
"net/http"
"net/url"
"strings"

"github.com/minio/minio-go/pkg/s3utils"
)

// CopyObject - copy a source object into a new object with the provided name in the provided bucket
// CopyObject - copy a source object into a new object with the
// provided name in the provided bucket
func (c Client) CopyObject(bucketName string, objectName string, objectSource string, cpCond CopyConditions) error {
// Input validation.
if err := isValidBucketName(bucketName); err != nil {
Expand All @@ -31,19 +35,47 @@ func (c Client) CopyObject(bucketName string, objectName string, objectSource st
if err := isValidObjectName(objectName); err != nil {
return err
}
if objectSource == "" {
return ErrInvalidArgument("Object source cannot be empty.")
srcBucket, srcObject, err := getObjectSource(objectSource)
if err != nil {
return err
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Stray newline?

// Get info about the source object
srcInfo, err := c.StatObject(srcBucket, srcObject)
if err != nil {
return err
}
srcByteRangeSize := cpCond.getRangeSize()
if srcByteRangeSize > srcInfo.Size ||
(srcByteRangeSize > 0 && cpCond.byteRangeEnd >= srcInfo.Size) {
return ErrInvalidArgument(fmt.Sprintf(
"Specified byte range (%d, %d) does not fit within source object (size = %d)",
cpCond.byteRangeStart, cpCond.byteRangeEnd, srcInfo.Size))
}

copySize := srcByteRangeSize
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A comment explaining how an empty range is interpreted as the entire object would provide context for future.

if copySize == 0 {
copySize = srcInfo.Size
}

// customHeaders apply headers.
customHeaders := make(http.Header)
for _, cond := range cpCond.conditions {
customHeaders.Set(cond.key, cond.value)
for key, value := range cpCond.conditions {
customHeaders.Set(key, value)
}

// Set copy source.
customHeaders.Set("x-amz-copy-source", s3utils.EncodePath(objectSource))

// Check if single part copy suffices. Multipart is required when:
// 1. source-range-offset does not refer to full source object, or
// 2. size of copied object > 5gb
if copySize > maxPartSize ||
(srcByteRangeSize > 0 && srcByteRangeSize != srcInfo.Size) {
return c.multipartCopyObject(bucketName, objectName,
objectSource, cpCond, customHeaders, copySize)
}

// Execute PUT on objectName.
resp, err := c.executeMethod("PUT", requestMetadata{
bucketName: bucketName,
Expand All @@ -70,3 +102,91 @@ func (c Client) CopyObject(bucketName string, objectName string, objectSource st
// Return nil on success.
return nil
}

func getObjectSource(src string) (bucket string, object string, err error) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment necessary.. and a test case..

parts := strings.Split(src, "/")
if len(parts) != 3 || parts[0] != "" || parts[1] == "" || parts[2] == "" {
return "", "", ErrInvalidArgument("Object source should be formatted as '/bucketName/objectName'")
}
return parts[1], parts[2], nil
}

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment necessary..

func (c Client) multipartCopyObject(bucketName string, objectName string,
objectSource string, cpCond CopyConditions, headers http.Header,
copySize int64) error {

// Compute split sizes for multipart copy.
partsCount, partSize, lastPartSize, err := optimalPartInfo(copySize)
if err != nil {
return err
}

// It is not possible to resume a multipart copy object
// operation, so we just create new uploadID and proceed with
// the copy operations.
uid, err := c.newUploadID(bucketName, objectSource, nil)
if err != nil {
return err
}

queryParams := url.Values{}
queryParams.Set("uploadId", uid)

var complMultipartUpload completeMultipartUpload

// Initiate copy object operations.
for partNumber := 1; partNumber <= partsCount; partNumber++ {
pCond := cpCond.duplicate()
pCond.byteRangeStart = partSize * (int64(partNumber) - 1)
if partNumber < partsCount {
pCond.byteRangeEnd = pCond.byteRangeStart + partSize - 1
} else {
pCond.byteRangeEnd = pCond.byteRangeStart + lastPartSize - 1
}

// Update the source range header value.
headers.Set("x-amz-copy-source-range",
fmt.Sprintf("bytes:%d-%d", pCond.byteRangeStart,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is wrong btw - it should be bytes=%d-%d

pCond.byteRangeEnd))

// Update part number in the query parameters.
queryParams.Set("partNumber", fmt.Sprintf("%d", partNumber))

// Perform part-copy.
resp, err := c.executeMethod("PUT", requestMetadata{
bucketName: bucketName,
objectName: objectName,
customHeader: headers,
queryValues: queryParams,
})
defer closeResponse(resp)
if err != nil {
return err
}
if resp != nil {
if resp.StatusCode != http.StatusOK {
return httpRespToErrorResponse(resp, bucketName,
objectName)
}
}

// Decode copy response on success.
cpObjRes := copyObjectResult{}
err = xmlDecoder(resp.Body, &cpObjRes)
if err != nil {
return err
}

// append part info for complete multipart request
complMultipartUpload.Parts = append(complMultipartUpload.Parts,
CompletePart{
PartNumber: partNumber,
ETag: cpObjRes.ETag,
})
}

// Complete the multipart upload.
_, err = c.completeMultipartUpload(bucketName, objectName, uid,
complMultipartUpload)
return err
}
3 changes: 2 additions & 1 deletion api-s3-datatypes.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ type initiator struct {
DisplayName string
}

// copyObjectResult container for copy object response.
// copyObjectResult container for copy object and copy object part
// response.
type copyObjectResult struct {
ETag string
LastModified string // time string format "2006-01-02T15:04:05.000Z"
Expand Down
4 changes: 2 additions & 2 deletions api_functional_v2_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -949,14 +949,14 @@ func TestCopyObjectV2(t *testing.T) {
}

// Set copy conditions.
copyConds := CopyConditions{}
copyConds := NewCopyConditions()
err = copyConds.SetModified(time.Date(2014, time.April, 0, 0, 0, 0, 0, time.UTC))
if err != nil {
t.Fatal("Error:", err)
}

// Copy source.
copySource := bucketName + "/" + objectName
copySource := "/" + bucketName + "/" + objectName

// Perform the Copy
err = c.CopyObject(bucketName+"-copy", objectName+"-copy", copySource, copyConds)
Expand Down
4 changes: 2 additions & 2 deletions api_functional_v4_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1692,7 +1692,7 @@ func TestCopyObject(t *testing.T) {
}

// Set copy conditions.
copyConds := CopyConditions{}
copyConds := NewCopyConditions()

// Start by setting wrong conditions
err = copyConds.SetModified(time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC))
Expand Down Expand Up @@ -1722,7 +1722,7 @@ func TestCopyObject(t *testing.T) {
}

// Copy source.
copySource := bucketName + "/" + objectName
copySource := "/" + bucketName + "/" + objectName

// Perform the Copy
err = c.CopyObject(bucketName+"-copy", objectName+"-copy", copySource, copyConds)
Expand Down
86 changes: 49 additions & 37 deletions copy-conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,44 +21,34 @@ import (
"time"
)

// copyCondition explanation:
// http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectCOPY.html
//
// Example:
//
// copyCondition {
// key: "x-amz-copy-if-modified-since",
// value: "Tue, 15 Nov 1994 12:45:26 GMT",
// }
//
type copyCondition struct {
key string
value string
}

// CopyConditions - copy conditions.
type CopyConditions struct {
conditions []copyCondition
conditions map[string]string
// start and end offset (inclusive) of source object to be
// copied.
byteRangeStart int64
byteRangeEnd int64
}

// NewCopyConditions - Instantiate new list of conditions. This
// function is left behind for backward compatibility. The idiomatic
// way to set an empty set of copy conditions is,
// ``copyConditions := CopyConditions{}``.
// NewCopyConditions - Instantiate new list of conditions. Prefer to
// use this function as it initializes byte-range.
//
func NewCopyConditions() CopyConditions {
return CopyConditions{}
return CopyConditions{
conditions: make(map[string]string),
// default values for byte-range indicating that they
// are not provided by the user
byteRangeStart: -1,
byteRangeEnd: -1,
}
}

// SetMatchETag - set match etag.
func (c *CopyConditions) SetMatchETag(etag string) error {
if etag == "" {
return ErrInvalidArgument("ETag cannot be empty.")
}
c.conditions = append(c.conditions, copyCondition{
key: "x-amz-copy-source-if-match",
value: etag,
})
c.conditions["x-amz-copy-source-if-match"] = etag
return nil
}

Expand All @@ -67,10 +57,7 @@ func (c *CopyConditions) SetMatchETagExcept(etag string) error {
if etag == "" {
return ErrInvalidArgument("ETag cannot be empty.")
}
c.conditions = append(c.conditions, copyCondition{
key: "x-amz-copy-source-if-none-match",
value: etag,
})
c.conditions["x-amz-copy-source-if-none-match"] = etag
return nil
}

Expand All @@ -79,10 +66,7 @@ func (c *CopyConditions) SetUnmodified(modTime time.Time) error {
if modTime.IsZero() {
return ErrInvalidArgument("Modified since cannot be empty.")
}
c.conditions = append(c.conditions, copyCondition{
key: "x-amz-copy-source-if-unmodified-since",
value: modTime.Format(http.TimeFormat),
})
c.conditions["x-amz-copy-source-if-unmodified-since"] = modTime.Format(http.TimeFormat)
return nil
}

Expand All @@ -91,9 +75,37 @@ func (c *CopyConditions) SetModified(modTime time.Time) error {
if modTime.IsZero() {
return ErrInvalidArgument("Modified since cannot be empty.")
}
c.conditions = append(c.conditions, copyCondition{
key: "x-amz-copy-source-if-modified-since",
value: modTime.Format(http.TimeFormat),
})
c.conditions["x-amz-copy-source-if-modified-since"] = modTime.Format(http.TimeFormat)
return nil
}

// SetByteRange - set the start and end of the source object to be
// copied.
func (c *CopyConditions) SetByteRange(start, end int64) error {
if start < 0 || end < start {
return ErrInvalidArgument("Range start less than 0 or range end less than range start.")
}
if end-start+1 < 1 {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this condition is useless,

Here we are already sure that start <= end:

start <= end <=> 0 <= end - start <=> 1 <= end - start + 1,

so we will never have end-start+1 < 1

return ErrInvalidArgument("Offset must refer to a non-zero range length.")
}
c.byteRangeEnd = end
c.byteRangeStart = start
return nil
}

func (c *CopyConditions) getRangeSize() int64 {
if c.byteRangeStart < 0 {
// only happens if byte-range was not set by user
return 0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably it is better to set this to -1 to say unknown. Probably zero is a valid value.

}
return c.byteRangeEnd - c.byteRangeStart + 1
}

func (c *CopyConditions) duplicate() *CopyConditions {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

call this clone() or cloneCopyConditions in Go style.

r := NewCopyConditions()
for k, v := range c.conditions {
r.conditions[k] = v
}
r.byteRangeEnd, r.byteRangeStart = c.byteRangeEnd, c.byteRangeStart
return &r
}