From dbb21165d4a320815c8925f34634a14aeb94c23e Mon Sep 17 00:00:00 2001 From: Corentin Barreau Date: Fri, 17 Jan 2025 17:00:34 +0100 Subject: [PATCH] internetarchive: add --internetarchive-metadata="key=value" for setting item metadata Added the ability to include item's metadata on uploads via the Internet Archive backend using the `--internetarchive-metadata="key=value"` argument. This is hidden from the configurator as should only really be used on the command line. Before this change, metadata had to be manually added after uploads. With this new feature, users can specify metadata directly during the upload process. --- backend/internetarchive/internetarchive.go | 78 ++++++++++++++++++++-- 1 file changed, 72 insertions(+), 6 deletions(-) diff --git a/backend/internetarchive/internetarchive.go b/backend/internetarchive/internetarchive.go index 8718401ec..ec6db9684 100644 --- a/backend/internetarchive/internetarchive.go +++ b/backend/internetarchive/internetarchive.go @@ -151,6 +151,19 @@ Owner is able to add custom keys. Metadata feature grabs all the keys including Help: "Host of InternetArchive Frontend.\n\nLeave blank for default value.", Default: "https://archive.org", Advanced: true, + }, { + Name: "item_metadata", + Help: `Metadata to be set on the IA item, this is different from file-level metadata that can be set using --metadata-set. +Format is key=value and the 'x-archive-meta-' prefix is automatically added.`, + Default: []string{}, + Hide: fs.OptionHideConfigurator, + Advanced: true, + }, { + Name: "item_derive", + Help: `Whether to trigger derive on the IA item or not. If set to false, the item will not be derived by IA upon upload. +The derive process produces a number of secondary files from an upload to make an upload more usable on the web. +Setting this to false is useful for uploading files that are already in a format that IA can display or reduce burden on IA's infrastructure.`, + Default: true, }, { Name: "disable_checksum", Help: `Don't ask the server to test against MD5 checksum calculated by rclone. @@ -201,6 +214,8 @@ type Options struct { Endpoint string `config:"endpoint"` FrontEndpoint string `config:"front_endpoint"` DisableChecksum bool `config:"disable_checksum"` + ItemMetadata []string `config:"item_metadata"` + ItemDerive bool `config:"item_derive"` WaitArchive fs.Duration `config:"wait_archive"` Enc encoder.MultiEncoder `config:"encoding"` } @@ -790,17 +805,23 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op "x-amz-filemeta-rclone-update-track": updateTracker, // we add some more headers for intuitive actions - "x-amz-auto-make-bucket": "1", // create an item if does not exist, do nothing if already - "x-archive-auto-make-bucket": "1", // same as above in IAS3 original way - "x-archive-keep-old-version": "0", // do not keep old versions (a.k.a. trashes in other clouds) - "x-archive-meta-mediatype": "data", // mark media type of the uploading file as "data" - "x-archive-queue-derive": "0", // skip derivation process (e.g. encoding to smaller files, OCR on PDFs) - "x-archive-cascade-delete": "1", // enable "cascate delete" (delete all derived files in addition to the file itself) + "x-amz-auto-make-bucket": "1", // create an item if does not exist, do nothing if already + "x-archive-auto-make-bucket": "1", // same as above in IAS3 original way + "x-archive-keep-old-version": "0", // do not keep old versions (a.k.a. trashes in other clouds) + "x-archive-cascade-delete": "1", // enable "cascate delete" (delete all derived files in addition to the file itself) } + if size >= 0 { headers["Content-Length"] = fmt.Sprintf("%d", size) headers["x-archive-size-hint"] = fmt.Sprintf("%d", size) } + + // This is IA's ITEM metadata, not file metadata + headers, err = o.appendItemMetadataHeaders(headers, o.fs.opt) + if err != nil { + return err + } + var mdata fs.Metadata mdata, err = fs.GetMetadataOptions(ctx, o.fs, src, options) if err == nil && mdata != nil { @@ -863,6 +884,51 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op return err } +func (o *Object) appendItemMetadataHeaders(headers map[string]string, options Options) (newHeaders map[string]string, err error) { + metadataCounter := make(map[string]int) + metadataValues := make(map[string][]string) + + // First pass: count occurrences and collect values + for _, v := range options.ItemMetadata { + parts := strings.SplitN(v, "=", 2) + if len(parts) != 2 { + return newHeaders, errors.New("item metadata key=value should be in the form key=value") + } + key, value := parts[0], parts[1] + metadataCounter[key]++ + metadataValues[key] = append(metadataValues[key], value) + } + + // Second pass: add headers with appropriate prefixes + for key, count := range metadataCounter { + if count == 1 { + // Only one occurrence, use x-archive-meta- + headers[fmt.Sprintf("x-archive-meta-%s", key)] = metadataValues[key][0] + } else { + // Multiple occurrences, use x-archive-meta01-, x-archive-meta02-, etc. + for i, value := range metadataValues[key] { + headers[fmt.Sprintf("x-archive-meta%02d-%s", i+1, key)] = value + } + } + } + + if o.fs.opt.ItemDerive { + headers["x-archive-queue-derive"] = "1" + } else { + headers["x-archive-queue-derive"] = "0" + } + + fs.Debugf(o, "Setting IA item derive: %t", o.fs.opt.ItemDerive) + + for k, v := range headers { + if strings.HasPrefix(k, "x-archive-meta") { + fs.Debugf(o, "Setting IA item metadata: %s=%s", k, v) + } + } + + return headers, nil +} + // Remove an object func (o *Object) Remove(ctx context.Context) (err error) { bucket, bucketPath := o.split()