mirror of
https://github.com/rclone/rclone.git
synced 2025-01-19 11:22:47 +08:00
internetarchive: add support for Metadata
This commit is contained in:
parent
b4d847cadd
commit
42dfadfa1b
|
@ -38,6 +38,84 @@ func init() {
|
||||||
Name: "internetarchive",
|
Name: "internetarchive",
|
||||||
Description: "Internet Archive",
|
Description: "Internet Archive",
|
||||||
NewFs: NewFs,
|
NewFs: NewFs,
|
||||||
|
|
||||||
|
MetadataInfo: &fs.MetadataInfo{
|
||||||
|
System: map[string]fs.MetadataHelp{
|
||||||
|
"name": {
|
||||||
|
Help: "Full file path, without the bucket part",
|
||||||
|
Type: "filename",
|
||||||
|
Example: "backend/internetarchive/internetarchive.go",
|
||||||
|
},
|
||||||
|
"source": {
|
||||||
|
Help: "The source of the file",
|
||||||
|
Type: "string",
|
||||||
|
Example: "original",
|
||||||
|
},
|
||||||
|
"mtime": {
|
||||||
|
Help: "Time of last modification, managed by Rclone",
|
||||||
|
Type: "RFC 3339",
|
||||||
|
Example: "2006-01-02T15:04:05.999999999Z",
|
||||||
|
},
|
||||||
|
"size": {
|
||||||
|
Help: "File size in bytes",
|
||||||
|
Type: "decimal number",
|
||||||
|
Example: "123456",
|
||||||
|
},
|
||||||
|
"md5": {
|
||||||
|
Help: "MD5 hash calculated by Internet Archive",
|
||||||
|
Type: "string",
|
||||||
|
Example: "01234567012345670123456701234567",
|
||||||
|
},
|
||||||
|
"crc32": {
|
||||||
|
Help: "CRC32 calculated by Internet Archive",
|
||||||
|
Type: "string",
|
||||||
|
Example: "01234567",
|
||||||
|
},
|
||||||
|
"sha1": {
|
||||||
|
Help: "SHA1 hash calculated by Internet Archive",
|
||||||
|
Type: "string",
|
||||||
|
Example: "0123456701234567012345670123456701234567",
|
||||||
|
},
|
||||||
|
"format": {
|
||||||
|
Help: "Name of format identified by Internet Archive",
|
||||||
|
Type: "string",
|
||||||
|
Example: "Comma-Separated Values",
|
||||||
|
},
|
||||||
|
"old_version": {
|
||||||
|
Help: "Whether the file was replaced and moved by keep-old-version flag",
|
||||||
|
Type: "boolean",
|
||||||
|
Example: "true",
|
||||||
|
},
|
||||||
|
"viruscheck": {
|
||||||
|
Help: "The last time viruscheck process was run for the file (?)",
|
||||||
|
Type: "unixtime",
|
||||||
|
Example: "1654191352",
|
||||||
|
},
|
||||||
|
|
||||||
|
"rclone-ia-mtime": {
|
||||||
|
Help: "Time of last modification, managed by Internet Archive",
|
||||||
|
Type: "RFC 3339",
|
||||||
|
Example: "2006-01-02T15:04:05.999999999Z",
|
||||||
|
},
|
||||||
|
"rclone-mtime": {
|
||||||
|
Help: "Time of last modification, managed by Rclone",
|
||||||
|
Type: "RFC 3339",
|
||||||
|
Example: "2006-01-02T15:04:05.999999999Z",
|
||||||
|
},
|
||||||
|
"rclone-update-track": {
|
||||||
|
Help: "Random value used by Rclone for tracking changes inside Internet Archive",
|
||||||
|
Type: "string",
|
||||||
|
Example: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
Help: `Metadata fields provided by Internet Archive.
|
||||||
|
If there are multiple values for a key, only the first one is returned.
|
||||||
|
This is a limitation of Rclone, that supports one value per one key.
|
||||||
|
|
||||||
|
Owner is able to add custom keys. Metadata feature grabs all the keys including them.
|
||||||
|
`,
|
||||||
|
},
|
||||||
|
|
||||||
Options: []fs.Option{{
|
Options: []fs.Option{{
|
||||||
Name: "access_key_id",
|
Name: "access_key_id",
|
||||||
Help: "IAS3 Access Key.\n\nLeave blank for anonymous access.\nYou can find one here: https://archive.org/account/s3.php",
|
Help: "IAS3 Access Key.\n\nLeave blank for anonymous access.\nYou can find one here: https://archive.org/account/s3.php",
|
||||||
|
@ -90,6 +168,14 @@ Only enable if you need to be guaranteed to be reflected after write operations.
|
||||||
// maximum size of an item. this is constant across all items
|
// maximum size of an item. this is constant across all items
|
||||||
const iaItemMaxSize int64 = 1099511627776
|
const iaItemMaxSize int64 = 1099511627776
|
||||||
|
|
||||||
|
// metadata keys that are not writeable
|
||||||
|
var roMetadataKey = map[string]interface{}{
|
||||||
|
// do not add mtime here, it's a documented exception
|
||||||
|
"name": nil, "source": nil, "size": nil, "md5": nil,
|
||||||
|
"crc32": nil, "sha1": nil, "format": nil, "old_version": nil,
|
||||||
|
"viruscheck": nil,
|
||||||
|
}
|
||||||
|
|
||||||
// Options defines the configuration for this backend
|
// Options defines the configuration for this backend
|
||||||
type Options struct {
|
type Options struct {
|
||||||
AccessKeyID string `config:"access_key_id"`
|
AccessKeyID string `config:"access_key_id"`
|
||||||
|
@ -122,6 +208,7 @@ type Object struct {
|
||||||
md5 string // md5 hash of the file presented by the server
|
md5 string // md5 hash of the file presented by the server
|
||||||
sha1 string // sha1 hash of the file presented by the server
|
sha1 string // sha1 hash of the file presented by the server
|
||||||
crc32 string // crc32 of the file presented by the server
|
crc32 string // crc32 of the file presented by the server
|
||||||
|
rawData json.RawMessage
|
||||||
}
|
}
|
||||||
|
|
||||||
// IAFile reprensents a subset of object in MetadataResponse.Files
|
// IAFile reprensents a subset of object in MetadataResponse.Files
|
||||||
|
@ -135,6 +222,8 @@ type IAFile struct {
|
||||||
Md5 string `json:"md5"`
|
Md5 string `json:"md5"`
|
||||||
Crc32 string `json:"crc32"`
|
Crc32 string `json:"crc32"`
|
||||||
Sha1 string `json:"sha1"`
|
Sha1 string `json:"sha1"`
|
||||||
|
|
||||||
|
rawData json.RawMessage
|
||||||
}
|
}
|
||||||
|
|
||||||
// MetadataResponse reprensents subset of the JSON object returned by (frontend)/metadata/
|
// MetadataResponse reprensents subset of the JSON object returned by (frontend)/metadata/
|
||||||
|
@ -143,6 +232,12 @@ type MetadataResponse struct {
|
||||||
ItemSize int64 `json:"item_size"`
|
ItemSize int64 `json:"item_size"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MetadataResponseRaw is the form of MetadataResponse to deal with metadata
|
||||||
|
type MetadataResponseRaw struct {
|
||||||
|
Files []json.RawMessage `json:"files"`
|
||||||
|
ItemSize int64 `json:"item_size"`
|
||||||
|
}
|
||||||
|
|
||||||
// ModMetadataResponse represents response for amending metadata
|
// ModMetadataResponse represents response for amending metadata
|
||||||
type ModMetadataResponse struct {
|
type ModMetadataResponse struct {
|
||||||
// https://archive.org/services/docs/api/md-write.html#example
|
// https://archive.org/services/docs/api/md-write.html#example
|
||||||
|
@ -227,6 +322,9 @@ func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, e
|
||||||
f.setRoot(root)
|
f.setRoot(root)
|
||||||
f.features = (&fs.Features{
|
f.features = (&fs.Features{
|
||||||
BucketBased: true,
|
BucketBased: true,
|
||||||
|
ReadMetadata: true,
|
||||||
|
WriteMetadata: true,
|
||||||
|
UserMetadata: true,
|
||||||
}).Fill(ctx, f)
|
}).Fill(ctx, f)
|
||||||
|
|
||||||
f.srv = rest.NewClient(fshttp.NewClient(ctx))
|
f.srv = rest.NewClient(fshttp.NewClient(ctx))
|
||||||
|
@ -307,18 +405,17 @@ func (o *Object) SetModTime(ctx context.Context, t time.Time) (err error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://archive.org/services/docs/api/md-write.html
|
// https://archive.org/services/docs/api/md-write.html
|
||||||
var patch = []interface{}{
|
// the following code might be useful for modifying metadata of an uploaded file
|
||||||
|
patch := []map[string]string{
|
||||||
// we should drop it first to clear all rclone-provided mtimes
|
// we should drop it first to clear all rclone-provided mtimes
|
||||||
struct {
|
{
|
||||||
Op string `json:"op"`
|
"op": "remove",
|
||||||
Path string `json:"path"`
|
"path": "/rclone-mtime",
|
||||||
}{"remove", "/rclone-mtime"},
|
}, {
|
||||||
struct {
|
"op": "add",
|
||||||
Op string `json:"op"`
|
"path": "/rclone-mtime",
|
||||||
Path string `json:"path"`
|
"value": t.Format(time.RFC3339Nano),
|
||||||
Value string `json:"value"`
|
}}
|
||||||
}{"add", "/rclone-mtime", t.Format(time.RFC3339Nano)},
|
|
||||||
}
|
|
||||||
res, err := json.Marshal(patch)
|
res, err := json.Marshal(patch)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -685,6 +782,23 @@ func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, op
|
||||||
headers["Content-Length"] = fmt.Sprintf("%d", size)
|
headers["Content-Length"] = fmt.Sprintf("%d", size)
|
||||||
headers["x-archive-size-hint"] = fmt.Sprintf("%d", size)
|
headers["x-archive-size-hint"] = fmt.Sprintf("%d", size)
|
||||||
}
|
}
|
||||||
|
var mdata fs.Metadata
|
||||||
|
mdata, err = fs.GetMetadataOptions(ctx, src, options)
|
||||||
|
if err == nil && mdata != nil {
|
||||||
|
for mk, mv := range mdata {
|
||||||
|
mk = strings.ToLower(mk)
|
||||||
|
if strings.HasPrefix(mk, "rclone-") {
|
||||||
|
fs.LogPrintf(fs.LogLevelWarning, o, "reserved metadata key %s is about to set", mk)
|
||||||
|
} else if _, ok := roMetadataKey[mk]; ok {
|
||||||
|
fs.LogPrintf(fs.LogLevelWarning, o, "setting or modifying read-only key %s is requested, skipping", mk)
|
||||||
|
continue
|
||||||
|
} else if mk == "mtime" {
|
||||||
|
// redirect to make it work
|
||||||
|
mk = "rclone-mtime"
|
||||||
|
}
|
||||||
|
headers[fmt.Sprintf("x-amz-filemeta-%s", mk)] = mv
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// read the md5sum if available
|
// read the md5sum if available
|
||||||
var md5sumHex string
|
var md5sumHex string
|
||||||
|
@ -762,6 +876,34 @@ func (o *Object) String() string {
|
||||||
return o.remote
|
return o.remote
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Metadata returns all file metadata provided by Internet Archive
|
||||||
|
func (o *Object) Metadata(ctx context.Context) (m fs.Metadata, err error) {
|
||||||
|
if o.rawData == nil {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
raw := make(map[string]json.RawMessage)
|
||||||
|
err = json.Unmarshal(o.rawData, &raw)
|
||||||
|
if err != nil {
|
||||||
|
// fatal: json parsing failed
|
||||||
|
return
|
||||||
|
}
|
||||||
|
for k, v := range raw {
|
||||||
|
items, err := listOrString(v)
|
||||||
|
if len(items) == 0 || err != nil {
|
||||||
|
// skip: an entry failed to parse
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
m.Set(k, items[0])
|
||||||
|
}
|
||||||
|
// move the old mtime to an another key
|
||||||
|
if v, ok := m["mtime"]; ok {
|
||||||
|
m["rclone-ia-mtime"] = v
|
||||||
|
}
|
||||||
|
// overwrite with a correct mtime
|
||||||
|
m["mtime"] = o.modTime.Format(time.RFC3339Nano)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
func (f *Fs) shouldRetry(resp *http.Response, err error) (bool, error) {
|
func (f *Fs) shouldRetry(resp *http.Response, err error) (bool, error) {
|
||||||
if resp != nil {
|
if resp != nil {
|
||||||
for _, e := range retryErrorCodes {
|
for _, e := range retryErrorCodes {
|
||||||
|
@ -788,7 +930,7 @@ func (o *Object) split() (bucket, bucketPath string) {
|
||||||
return o.fs.split(o.remote)
|
return o.fs.split(o.remote)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *Fs) requestMetadata(ctx context.Context, bucket string) (result MetadataResponse, err error) {
|
func (f *Fs) requestMetadata(ctx context.Context, bucket string) (result *MetadataResponse, err error) {
|
||||||
var resp *http.Response
|
var resp *http.Response
|
||||||
// make a GET request to (frontend)/metadata/:item/
|
// make a GET request to (frontend)/metadata/:item/
|
||||||
opts := rest.Opts{
|
opts := rest.Opts{
|
||||||
|
@ -796,12 +938,15 @@ func (f *Fs) requestMetadata(ctx context.Context, bucket string) (result Metadat
|
||||||
Path: path.Join("/metadata/", bucket),
|
Path: path.Join("/metadata/", bucket),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var temp MetadataResponseRaw
|
||||||
err = f.pacer.Call(func() (bool, error) {
|
err = f.pacer.Call(func() (bool, error) {
|
||||||
resp, err = f.front.CallJSON(ctx, &opts, nil, &result)
|
resp, err = f.front.CallJSON(ctx, &opts, nil, &temp)
|
||||||
return f.shouldRetry(resp, err)
|
return f.shouldRetry(resp, err)
|
||||||
})
|
})
|
||||||
|
if err != nil {
|
||||||
return result, err
|
return
|
||||||
|
}
|
||||||
|
return temp.unraw()
|
||||||
}
|
}
|
||||||
|
|
||||||
// list up all files/directories without any filters
|
// list up all files/directories without any filters
|
||||||
|
@ -998,6 +1143,7 @@ func makeValidObject(f *Fs, remote string, file IAFile, mtime time.Time, size in
|
||||||
md5: file.Md5,
|
md5: file.Md5,
|
||||||
crc32: file.Crc32,
|
crc32: file.Crc32,
|
||||||
sha1: file.Sha1,
|
sha1: file.Sha1,
|
||||||
|
rawData: file.rawData,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1045,6 +1191,23 @@ func (file IAFile) parseMtime() (mtime time.Time) {
|
||||||
return mtime
|
return mtime
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (mrr *MetadataResponseRaw) unraw() (_ *MetadataResponse, err error) {
|
||||||
|
var files []IAFile
|
||||||
|
for _, raw := range mrr.Files {
|
||||||
|
var parsed IAFile
|
||||||
|
err = json.Unmarshal(raw, &parsed)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
parsed.rawData = raw
|
||||||
|
files = append(files, parsed)
|
||||||
|
}
|
||||||
|
return &MetadataResponse{
|
||||||
|
Files: files,
|
||||||
|
ItemSize: mrr.ItemSize,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
func compareSize(a, b int64) bool {
|
func compareSize(a, b int64) bool {
|
||||||
if a < 0 || b < 0 {
|
if a < 0 || b < 0 {
|
||||||
// we won't compare if any of them is not known
|
// we won't compare if any of them is not known
|
||||||
|
@ -1106,4 +1269,5 @@ var (
|
||||||
_ fs.PublicLinker = &Fs{}
|
_ fs.PublicLinker = &Fs{}
|
||||||
_ fs.Abouter = &Fs{}
|
_ fs.Abouter = &Fs{}
|
||||||
_ fs.Object = &Object{}
|
_ fs.Object = &Object{}
|
||||||
|
_ fs.Metadataer = &Object{}
|
||||||
)
|
)
|
||||||
|
|
|
@ -38,6 +38,33 @@ You can optionally wait for the server's processing to finish, by setting non-ze
|
||||||
By making it wait, rclone can do normal file comparison.
|
By making it wait, rclone can do normal file comparison.
|
||||||
Make sure to set a large enough value (e.g. `30m0s` for smaller files) as it can take a long time depending on server's queue.
|
Make sure to set a large enough value (e.g. `30m0s` for smaller files) as it can take a long time depending on server's queue.
|
||||||
|
|
||||||
|
## About metadata
|
||||||
|
This backend supports setting, updating and reading metadata of each file.
|
||||||
|
The metadata will appear as file metadata on Internet Archive.
|
||||||
|
However, some fields are reserved by both Internet Archive and rclone.
|
||||||
|
|
||||||
|
The following are reserved by Internet Archive:
|
||||||
|
- `name`
|
||||||
|
- `source`
|
||||||
|
- `size`
|
||||||
|
- `md5`
|
||||||
|
- `crc32`
|
||||||
|
- `sha1`
|
||||||
|
- `format`
|
||||||
|
- `old_version`
|
||||||
|
- `viruscheck`
|
||||||
|
|
||||||
|
Trying to set values to these keys is ignored with a warning.
|
||||||
|
Only setting `mtime` is an exception. Doing so make it the identical behavior as setting ModTime.
|
||||||
|
|
||||||
|
rclone reserves all the keys starting with `rclone-`. Setting value for these keys will give you warnings, but values are set according to request.
|
||||||
|
|
||||||
|
If there are multiple values for a key, only the first one is returned.
|
||||||
|
This is a limitation of rclone, that supports one value per one key.
|
||||||
|
It can be triggered when you did a server-side copy.
|
||||||
|
|
||||||
|
Reading metadata will also provide custom (non-standard nor reserved) ones.
|
||||||
|
|
||||||
## Configuration
|
## Configuration
|
||||||
|
|
||||||
Here is an example of making an internetarchive configuration.
|
Here is an example of making an internetarchive configuration.
|
||||||
|
|
|
@ -33,7 +33,7 @@ Here is an overview of the major features of each cloud storage system.
|
||||||
| HiDrive | HiDrive ¹² | R/W | No | No | - | - |
|
| HiDrive | HiDrive ¹² | R/W | No | No | - | - |
|
||||||
| HTTP | - | R | No | No | R | - |
|
| HTTP | - | R | No | No | R | - |
|
||||||
| Hubic | MD5 | R/W | No | No | R/W | - |
|
| Hubic | MD5 | R/W | No | No | R/W | - |
|
||||||
| Internet Archive | MD5, SHA1, CRC32 | R/W ¹¹ | No | No | - | - |
|
| Internet Archive | MD5, SHA1, CRC32 | R/W ¹¹ | No | No | - | RWU |
|
||||||
| Jottacloud | MD5 | R/W | Yes | No | R | - |
|
| Jottacloud | MD5 | R/W | Yes | No | R | - |
|
||||||
| Koofr | MD5 | - | Yes | No | - | - |
|
| Koofr | MD5 | - | Yes | No | - | - |
|
||||||
| Mail.ru Cloud | Mailru ⁶ | R/W | Yes | No | - | - |
|
| Mail.ru Cloud | Mailru ⁶ | R/W | Yes | No | - | - |
|
||||||
|
|
Loading…
Reference in New Issue
Block a user