From 76ee3060d1fb3985a45761c0e65e3824d7df3b04 Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Mon, 23 Nov 2020 11:53:31 +0000 Subject: [PATCH] s3: Add MD5 metadata to objects uploaded with SSE-AWS/SSE-C Before this change, small objects uploaded with SSE-AWS/SSE-C would not have MD5 sums. This change adds metadata for these objects in the same way that the metadata is stored for multipart uploaded objects. See: #1824 #2827 --- backend/s3/s3.go | 7 ++++++- docs/content/s3.md | 22 ++++++++++++++++++++++ 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/backend/s3/s3.go b/backend/s3/s3.go index 86a029660..f70bcaa34 100644 --- a/backend/s3/s3.go +++ b/backend/s3/s3.go @@ -3118,6 +3118,7 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op // read the md5sum if available // - for non multipart // - so we can add a ContentMD5 + // - so we can add the md5sum in the metadata as metaMD5Hash if using SSE/SSE-C // - for multipart provided checksums aren't disabled // - so we can add the md5sum in the metadata as metaMD5Hash var md5sum string @@ -3127,7 +3128,11 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op hashBytes, err := hex.DecodeString(hash) if err == nil { md5sum = base64.StdEncoding.EncodeToString(hashBytes) - if multipart { + if (multipart || o.fs.etagIsNotMD5) && !o.fs.opt.DisableChecksum { + // Set the md5sum as metadata on the object if + // - a multipart upload + // - the Etag is not an MD5, eg when using SSE/SSE-C + // provided checksums aren't disabled metadata[metaMD5Hash] = &md5sum } } diff --git a/docs/content/s3.md b/docs/content/s3.md index b3421904f..2895b4e16 100644 --- a/docs/content/s3.md +++ b/docs/content/s3.md @@ -277,6 +277,28 @@ side copy to update the modification if the object can be copied in a single par In the case the object is larger than 5Gb or is in Glacier or Glacier Deep Archive storage the object will be uploaded rather than copied. +Note that reading this from the object takes an additional `HEAD` +request as the metadata isn't returned in object listings. + +### Hashes ### + +For small objects which weren't uploaded as multipart uploads (objects +sized below `--s3-upload-cutoff` if uploaded with rclone) rclone uses +the `ETag:` header as an MD5 checksum. + +However for objects which were uploaded as multipart uploads or with +server side encryption (SSE-AWS or SSE-C) the `ETag` header is no +longer the MD5 sum of the data, so rclone adds an additional piece of +metadata `X-Amz-Meta-Md5chksum` which is a base64 encoded MD5 hash (in +the same format as is required for `Content-MD5`). + +For large objects, calculating this hash can take some time so the +addition of this hash can be disabled with `--s3-disable-checksum`. +This will mean that these objects do not have an MD5 checksum. + +Note that reading this from the object takes an additional `HEAD` +request as the metadata isn't returned in object listings. + ### Cleanup ### If you run `rclone cleanup s3:bucket` then it will remove all pending