internetarchive: add --internetarchive-metadata="key=value" for setting item metadata

Added the ability to include item's metadata on uploads via the
Internet Archive backend using the `--internetarchive-metadata="key=value"`
argument. This is hidden from the configurator as should only
really be used on the command line.

Before this change, metadata had to be manually added after uploads.
With this new feature, users can specify metadata directly during the
upload process.
This commit is contained in:
Corentin Barreau 2025-01-17 17:00:34 +01:00 committed by GitHub
parent 375953cba3
commit dbb21165d4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -151,6 +151,19 @@ Owner is able to add custom keys. Metadata feature grabs all the keys including
Help: "Host of InternetArchive Frontend.\n\nLeave blank for default value.",
Default: "https://archive.org",
Advanced: true,
}, {
Name: "item_metadata",
Help: `Metadata to be set on the IA item, this is different from file-level metadata that can be set using --metadata-set.
Format is key=value and the 'x-archive-meta-' prefix is automatically added.`,
Default: []string{},
Hide: fs.OptionHideConfigurator,
Advanced: true,
}, {
Name: "item_derive",
Help: `Whether to trigger derive on the IA item or not. If set to false, the item will not be derived by IA upon upload.
The derive process produces a number of secondary files from an upload to make an upload more usable on the web.
Setting this to false is useful for uploading files that are already in a format that IA can display or reduce burden on IA's infrastructure.`,
Default: true,
}, {
Name: "disable_checksum",
Help: `Don't ask the server to test against MD5 checksum calculated by rclone.
@ -201,6 +214,8 @@ type Options struct {
Endpoint string `config:"endpoint"`
FrontEndpoint string `config:"front_endpoint"`
DisableChecksum bool `config:"disable_checksum"`
ItemMetadata []string `config:"item_metadata"`
ItemDerive bool `config:"item_derive"`
WaitArchive fs.Duration `config:"wait_archive"`
Enc encoder.MultiEncoder `config:"encoding"`
}
@ -790,17 +805,23 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op
"x-amz-filemeta-rclone-update-track": updateTracker,
// we add some more headers for intuitive actions
"x-amz-auto-make-bucket": "1", // create an item if does not exist, do nothing if already
"x-archive-auto-make-bucket": "1", // same as above in IAS3 original way
"x-archive-keep-old-version": "0", // do not keep old versions (a.k.a. trashes in other clouds)
"x-archive-meta-mediatype": "data", // mark media type of the uploading file as "data"
"x-archive-queue-derive": "0", // skip derivation process (e.g. encoding to smaller files, OCR on PDFs)
"x-archive-cascade-delete": "1", // enable "cascate delete" (delete all derived files in addition to the file itself)
"x-amz-auto-make-bucket": "1", // create an item if does not exist, do nothing if already
"x-archive-auto-make-bucket": "1", // same as above in IAS3 original way
"x-archive-keep-old-version": "0", // do not keep old versions (a.k.a. trashes in other clouds)
"x-archive-cascade-delete": "1", // enable "cascate delete" (delete all derived files in addition to the file itself)
}
if size >= 0 {
headers["Content-Length"] = fmt.Sprintf("%d", size)
headers["x-archive-size-hint"] = fmt.Sprintf("%d", size)
}
// This is IA's ITEM metadata, not file metadata
headers, err = o.appendItemMetadataHeaders(headers, o.fs.opt)
if err != nil {
return err
}
var mdata fs.Metadata
mdata, err = fs.GetMetadataOptions(ctx, o.fs, src, options)
if err == nil && mdata != nil {
@ -863,6 +884,51 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op
return err
}
func (o *Object) appendItemMetadataHeaders(headers map[string]string, options Options) (newHeaders map[string]string, err error) {
metadataCounter := make(map[string]int)
metadataValues := make(map[string][]string)
// First pass: count occurrences and collect values
for _, v := range options.ItemMetadata {
parts := strings.SplitN(v, "=", 2)
if len(parts) != 2 {
return newHeaders, errors.New("item metadata key=value should be in the form key=value")
}
key, value := parts[0], parts[1]
metadataCounter[key]++
metadataValues[key] = append(metadataValues[key], value)
}
// Second pass: add headers with appropriate prefixes
for key, count := range metadataCounter {
if count == 1 {
// Only one occurrence, use x-archive-meta-
headers[fmt.Sprintf("x-archive-meta-%s", key)] = metadataValues[key][0]
} else {
// Multiple occurrences, use x-archive-meta01-, x-archive-meta02-, etc.
for i, value := range metadataValues[key] {
headers[fmt.Sprintf("x-archive-meta%02d-%s", i+1, key)] = value
}
}
}
if o.fs.opt.ItemDerive {
headers["x-archive-queue-derive"] = "1"
} else {
headers["x-archive-queue-derive"] = "0"
}
fs.Debugf(o, "Setting IA item derive: %t", o.fs.opt.ItemDerive)
for k, v := range headers {
if strings.HasPrefix(k, "x-archive-meta") {
fs.Debugf(o, "Setting IA item metadata: %s=%s", k, v)
}
}
return headers, nil
}
// Remove an object
func (o *Object) Remove(ctx context.Context) (err error) {
bucket, bucketPath := o.split()