s3: add --s3-copy-cutoff for size to switch to multipart copy

Before this change we used the same (relatively low limits) for server
side copy as we did for multipart uploads.  It doesn't make sense to
use the same limits since no data is being downloaded or uploaded for
a server side copy.

This change introduces a new parameter --s3-copy-cutoff to control
when the switch from single to multipart server size copy happens and
defaults it to the maximum 5GB.

This makes server side copies much more efficient.

It also fixes the erroneous error when trying to set the modification
time of a file bigger than 5GB.

See #3778
This commit is contained in:
Nick Craig-Wood 2019-12-02 17:14:57 +00:00
parent f4746f5064
commit 0d10640aaa

View File

@ -715,6 +715,16 @@ file you can stream upload is 48GB. If you wish to stream upload
larger files then you will need to increase chunk_size.`, larger files then you will need to increase chunk_size.`,
Default: minChunkSize, Default: minChunkSize,
Advanced: true, Advanced: true,
}, {
Name: "copy_cutoff",
Help: `Cutoff for switching to multipart copy
Any files larger than this that need to be server side copied will be
copied in chunks of this size.
The minimum is 0 and the maximum is 5GB.`,
Default: fs.SizeSuffix(maxSizeForCopy),
Advanced: true,
}, { }, {
Name: "disable_checksum", Name: "disable_checksum",
Help: "Don't store MD5 checksum with object metadata", Help: "Don't store MD5 checksum with object metadata",
@ -809,6 +819,7 @@ type Options struct {
SSEKMSKeyID string `config:"sse_kms_key_id"` SSEKMSKeyID string `config:"sse_kms_key_id"`
StorageClass string `config:"storage_class"` StorageClass string `config:"storage_class"`
UploadCutoff fs.SizeSuffix `config:"upload_cutoff"` UploadCutoff fs.SizeSuffix `config:"upload_cutoff"`
CopyCutoff fs.SizeSuffix `config:"copy_cutoff"`
ChunkSize fs.SizeSuffix `config:"chunk_size"` ChunkSize fs.SizeSuffix `config:"chunk_size"`
DisableChecksum bool `config:"disable_checksum"` DisableChecksum bool `config:"disable_checksum"`
SessionToken string `config:"session_token"` SessionToken string `config:"session_token"`
@ -1653,7 +1664,7 @@ func (f *Fs) copy(ctx context.Context, req *s3.CopyObjectInput, dstBucket, dstPa
req.StorageClass = &f.opt.StorageClass req.StorageClass = &f.opt.StorageClass
} }
if srcSize >= int64(f.opt.UploadCutoff) { if srcSize >= int64(f.opt.CopyCutoff) {
return f.copyMultipart(ctx, req, dstBucket, dstPath, srcBucket, srcPath, srcSize) return f.copyMultipart(ctx, req, dstBucket, dstPath, srcBucket, srcPath, srcSize)
} }
return f.pacer.Call(func() (bool, error) { return f.pacer.Call(func() (bool, error) {
@ -1704,7 +1715,7 @@ func (f *Fs) copyMultipart(ctx context.Context, req *s3.CopyObjectInput, dstBuck
} }
}() }()
partSize := int64(f.opt.ChunkSize) partSize := int64(f.opt.CopyCutoff)
numParts := (srcSize-1)/partSize + 1 numParts := (srcSize-1)/partSize + 1
var parts []*s3.CompletedPart var parts []*s3.CompletedPart
@ -1932,11 +1943,6 @@ func (o *Object) SetModTime(ctx context.Context, modTime time.Time) error {
} }
o.meta[metaMtime] = aws.String(swift.TimeToFloatString(modTime)) o.meta[metaMtime] = aws.String(swift.TimeToFloatString(modTime))
if o.bytes >= maxSizeForCopy {
fs.Debugf(o, "SetModTime is unsupported for objects bigger than %v bytes", fs.SizeSuffix(maxSizeForCopy))
return nil
}
// Can't update metadata here, so return this error to force a recopy // Can't update metadata here, so return this error to force a recopy
if o.storageClass == "GLACIER" || o.storageClass == "DEEP_ARCHIVE" { if o.storageClass == "GLACIER" || o.storageClass == "DEEP_ARCHIVE" {
return fs.ErrorCantSetModTime return fs.ErrorCantSetModTime