From ddaf01ece94c1fa269b32da4cf939d8d4aaf6c3a Mon Sep 17 00:00:00 2001 From: Nick Craig-Wood Date: Wed, 15 Nov 2023 09:37:57 +0000 Subject: [PATCH] azurefiles: finish docs and implementation and add optional interfaces - use rclone's http Transport - fix handling of 0 length files - combine into one file and remove uneeded abstraction - make `chunk_size` and `upload_concurrency` settable - make auth the same as azureblob - set the Features correctly - implement `--azurefiles-max-stream-size` - remove arbitrary sleep on Mkdir - implement `--header-upload` - implement read and write MimeType for objects - implement optional methods - About - Copy - DirMove - Move - OpenWriterAt - PutStream - finish documentation - disable build on plan9 and js Fixes #365 Fixes #7378 --- README.md | 1 + backend/azurefiles/azurefiles.go | 1380 +++++++++++++++-- .../azurefiles/azurefiles_internal_test.go | 5 +- backend/azurefiles/azurefiles_test.go | 3 + backend/azurefiles/azurefiles_unsupported.go | 7 + backend/azurefiles/directory.go | 44 - backend/azurefiles/fs.go | 292 ---- backend/azurefiles/object.go | 279 ---- bin/make_manual.py | 1 + docs/content/_index.md | 1 + docs/content/azurefiles.md | 698 ++++++++- docs/content/docs.md | 1 + docs/content/overview.md | 2 + docs/layouts/chrome/navbar.html | 1 + fstest/test_all/config.yaml | 2 + 15 files changed, 1994 insertions(+), 723 deletions(-) create mode 100644 backend/azurefiles/azurefiles_unsupported.go delete mode 100644 backend/azurefiles/directory.go delete mode 100644 backend/azurefiles/fs.go delete mode 100644 backend/azurefiles/object.go diff --git a/README.md b/README.md index 97ed7820d..616f1f2bc 100644 --- a/README.md +++ b/README.md @@ -59,6 +59,7 @@ Rclone *("rsync for cloud storage")* is a command-line program to sync files and * Mega [:page_facing_up:](https://rclone.org/mega/) * Memory [:page_facing_up:](https://rclone.org/memory/) * Microsoft Azure Blob Storage [:page_facing_up:](https://rclone.org/azureblob/) + * Microsoft Azure Files Storage [:page_facing_up:](https://rclone.org/azurefiles/) * Microsoft OneDrive [:page_facing_up:](https://rclone.org/onedrive/) * Minio [:page_facing_up:](https://rclone.org/s3/#minio) * Nextcloud [:page_facing_up:](https://rclone.org/webdav/#nextcloud) diff --git a/backend/azurefiles/azurefiles.go b/backend/azurefiles/azurefiles.go index 5d86bc4c2..fd8b4a06e 100644 --- a/backend/azurefiles/azurefiles.go +++ b/backend/azurefiles/azurefiles.go @@ -1,36 +1,72 @@ +//go:build !plan9 && !js +// +build !plan9,!js + // Package azurefiles provides an interface to Microsoft Azure Files package azurefiles +/* + TODO + + This uses LastWriteTime which seems to work. The API return also + has LastModified - needs investigation + + Needs pacer to have retries + + HTTP headers need to be passed + + Could support Metadata + + FIXME write mime type + + See FIXME markers + + Optional interfaces for Object + - ID + +*/ + import ( + "bytes" "context" + "crypto/md5" + "encoding/hex" + "encoding/json" "errors" "fmt" + "io" + "net/http" + "net/url" + "os" "path" + "strings" + "sync" + "time" + "github.com/Azure/azure-sdk-for-go/sdk/azcore" + "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" + "github.com/Azure/azure-sdk-for-go/sdk/azidentity" "github.com/Azure/azure-sdk-for-go/sdk/storage/azfile/directory" "github.com/Azure/azure-sdk-for-go/sdk/storage/azfile/file" + "github.com/Azure/azure-sdk-for-go/sdk/storage/azfile/fileerror" "github.com/Azure/azure-sdk-for-go/sdk/storage/azfile/service" + "github.com/Azure/azure-sdk-for-go/sdk/storage/azfile/share" "github.com/rclone/rclone/fs" "github.com/rclone/rclone/fs/config" "github.com/rclone/rclone/fs/config/configmap" "github.com/rclone/rclone/fs/config/configstruct" + "github.com/rclone/rclone/fs/config/obscure" + "github.com/rclone/rclone/fs/fshttp" + "github.com/rclone/rclone/fs/hash" "github.com/rclone/rclone/lib/encoder" + "github.com/rclone/rclone/lib/env" + "github.com/rclone/rclone/lib/readers" ) -// TODO: enable x-ms-allow-trailing-do -// TODO: length -// EncodeCtl | EncodeDel because del is defined as a CTL characater in section 2.2 of RFC 2616. -var defaultEncoder = (encoder.EncodeDoubleQuote | - encoder.EncodeBackSlash | - encoder.EncodeSlash | - encoder.EncodeColon | - encoder.EncodePipe | - encoder.EncodeLtGt | - encoder.EncodeAsterisk | - encoder.EncodeQuestion | - encoder.EncodeInvalidUtf8 | - encoder.EncodeCtl | encoder.EncodeDel | - encoder.EncodeDot | encoder.EncodeRightPeriod) +const ( + maxFileSize = 4 * fs.Tebi + defaultChunkSize = 4 * fs.Mebi + storageDefaultBaseURL = "core.windows.net" // FIXME not sure this is correct +) func init() { fs.Register(&fs.RegInfo{ @@ -38,116 +74,529 @@ func init() { Description: "Microsoft Azure Files", NewFs: NewFs, Options: []fs.Option{{ - Name: "share_name", - Help: `Azure Files Share Name.`, - }, { - Name: "connection_string", - Help: `Azure Files Connection String.`, - }, { Name: "account", - Help: `Storage Account Name.`, + Help: `Azure Storage Account Name. + +Set this to the Azure Storage Account Name in use. + +Leave blank to use SAS URL or connection string, otherwise it needs to be set. + +If this is blank and if env_auth is set it will be read from the +environment variable ` + "`AZURE_STORAGE_ACCOUNT_NAME`" + ` if possible. +`, + Sensitive: true, }, { - Name: "key", - Help: `Storage Account Shared Key.`, + Name: "share_name", + Help: `Azure Files Share Name. + +This is required and is the name of the share to access. +`, + }, { + Name: "env_auth", + Help: `Read credentials from runtime (environment variables, CLI or MSI). + +See the [authentication docs](/azurefiles#authentication) for full info.`, + Default: false, + }, { + Name: "key", + Help: `Storage Account Shared Key. + +Leave blank to use SAS URL or connection string.`, Sensitive: true, }, { Name: "sas_url", - Help: `Shared Access Signature. - -Works after allowing access to service, Container and Object resource types`, + Help: `SAS URL. + +Leave blank if using account/key or connection string.`, Sensitive: true, + }, { + Name: "connection_string", + Help: `Azure Files Connection String.`, + Sensitive: true, + }, { + Name: "tenant", + Help: `ID of the service principal's tenant. Also called its directory ID. + +Set this if using +- Service principal with client secret +- Service principal with certificate +- User with username and password +`, + Sensitive: true, + }, { + Name: "client_id", + Help: `The ID of the client in use. + +Set this if using +- Service principal with client secret +- Service principal with certificate +- User with username and password +`, + Sensitive: true, + }, { + Name: "client_secret", + Help: `One of the service principal's client secrets + +Set this if using +- Service principal with client secret +`, + Sensitive: true, + }, { + Name: "client_certificate_path", + Help: `Path to a PEM or PKCS12 certificate file including the private key. + +Set this if using +- Service principal with certificate +`, + }, { + Name: "client_certificate_password", + Help: `Password for the certificate file (optional). + +Optionally set this if using +- Service principal with certificate + +And the certificate has a password. +`, + IsPassword: true, + }, { + Name: "client_send_certificate_chain", + Help: `Send the certificate chain when using certificate auth. + +Specifies whether an authentication request will include an x5c header +to support subject name / issuer based authentication. When set to +true, authentication requests include the x5c header. + +Optionally set this if using +- Service principal with certificate +`, + Default: false, + Advanced: true, + }, { + Name: "username", + Help: `User name (usually an email address) + +Set this if using +- User with username and password +`, + Advanced: true, + Sensitive: true, + }, { + Name: "password", + Help: `The user's password + +Set this if using +- User with username and password +`, + IsPassword: true, + Advanced: true, + }, { + Name: "service_principal_file", + Help: `Path to file containing credentials for use with a service principal. + +Leave blank normally. Needed only if you want to use a service principal instead of interactive login. + + $ az ad sp create-for-rbac --name "" \ + --role "Storage Files Data Owner" \ + --scopes "/subscriptions//resourceGroups//providers/Microsoft.Storage/storageAccounts//blobServices/default/containers/" \ + > azure-principal.json + +See ["Create an Azure service principal"](https://docs.microsoft.com/en-us/cli/azure/create-an-azure-service-principal-azure-cli) and ["Assign an Azure role for access to files data"](https://docs.microsoft.com/en-us/azure/storage/common/storage-auth-aad-rbac-cli) pages for more details. + +**NB** this section needs updating for Azure Files - pull requests appreciated! + +It may be more convenient to put the credentials directly into the +rclone config file under the ` + "`client_id`, `tenant` and `client_secret`" + ` +keys instead of setting ` + "`service_principal_file`" + `. +`, + Advanced: true, + }, { + Name: "use_msi", + Help: `Use a managed service identity to authenticate (only works in Azure). + +When true, use a [managed service identity](https://docs.microsoft.com/en-us/azure/active-directory/managed-identities-azure-resources/) +to authenticate to Azure Storage instead of a SAS token or account key. + +If the VM(SS) on which this program is running has a system-assigned identity, it will +be used by default. If the resource has no system-assigned but exactly one user-assigned identity, +the user-assigned identity will be used by default. If the resource has multiple user-assigned +identities, the identity to use must be explicitly specified using exactly one of the msi_object_id, +msi_client_id, or msi_mi_res_id parameters.`, + Default: false, + Advanced: true, + }, { + Name: "msi_object_id", + Help: "Object ID of the user-assigned MSI to use, if any.\n\nLeave blank if msi_client_id or msi_mi_res_id specified.", + Advanced: true, + Sensitive: true, + }, { + Name: "msi_client_id", + Help: "Object ID of the user-assigned MSI to use, if any.\n\nLeave blank if msi_object_id or msi_mi_res_id specified.", + Advanced: true, + Sensitive: true, + }, { + Name: "msi_mi_res_id", + Help: "Azure resource ID of the user-assigned MSI to use, if any.\n\nLeave blank if msi_client_id or msi_object_id specified.", + Advanced: true, + Sensitive: true, + }, { + Name: "endpoint", + Help: "Endpoint for the service.\n\nLeave blank normally.", + Advanced: true, + }, { + Name: "chunk_size", + Help: `Upload chunk size. + +Note that this is stored in memory and there may be up to +"--transfers" * "--azurefile-upload-concurrency" chunks stored at once +in memory.`, + Default: defaultChunkSize, + Advanced: true, + }, { + Name: "upload_concurrency", + Help: `Concurrency for multipart uploads. + +This is the number of chunks of the same file that are uploaded +concurrently. + +If you are uploading small numbers of large files over high-speed +links and these uploads do not fully utilize your bandwidth, then +increasing this may help to speed up the transfers. + +Note that chunks are stored in memory and there may be up to +"--transfers" * "--azurefile-upload-concurrency" chunks stored at once +in memory.`, + Default: 16, + Advanced: true, + }, { + Name: "max_stream_size", + Help: strings.ReplaceAll(`Max size for streamed files. + +Azure files needs to know in advance how big the file will be. When +rclone doesn't know it uses this value instead. + +This will be used when rclone is streaming data, the most common uses are: + +- Uploading files with |--vfs-cache-mode off| with |rclone mount| +- Using |rclone rcat| +- Copying files with unknown length + +You will need this much free space in the share as the file will be this size temporarily. +`, "|", "`"), + Default: 10 * fs.Gibi, + Advanced: true, }, { Name: config.ConfigEncoding, Help: config.ConfigEncodingHelp, Advanced: true, - Default: defaultEncoder, + Default: (encoder.EncodeDoubleQuote | + encoder.EncodeBackSlash | + encoder.EncodeSlash | + encoder.EncodeColon | + encoder.EncodePipe | + encoder.EncodeLtGt | + encoder.EncodeAsterisk | + encoder.EncodeQuestion | + encoder.EncodeInvalidUtf8 | + encoder.EncodeCtl | encoder.EncodeDel | + encoder.EncodeDot | encoder.EncodeRightPeriod), }}, }) } // Options defines the configuration for this backend type Options struct { - ShareName string - ConnectionString string - Account string - Key string - SASUrl string `config:"sas_url"` - Enc encoder.MultiEncoder `config:"encoding"` + Account string `config:"account"` + ShareName string `config:"share_name"` + EnvAuth bool `config:"env_auth"` + Key string `config:"key"` + SASURL string `config:"sas_url"` + ConnectionString string `config:"connection_string"` + Tenant string `config:"tenant"` + ClientID string `config:"client_id"` + ClientSecret string `config:"client_secret"` + ClientCertificatePath string `config:"client_certificate_path"` + ClientCertificatePassword string `config:"client_certificate_password"` + ClientSendCertificateChain bool `config:"client_send_certificate_chain"` + Username string `config:"username"` + Password string `config:"password"` + ServicePrincipalFile string `config:"service_principal_file"` + UseMSI bool `config:"use_msi"` + MSIObjectID string `config:"msi_object_id"` + MSIClientID string `config:"msi_client_id"` + MSIResourceID string `config:"msi_mi_res_id"` + Endpoint string `config:"endpoint"` + ChunkSize fs.SizeSuffix `config:"chunk_size"` + MaxStreamSize fs.SizeSuffix `config:"max_stream_size"` + UploadConcurrency int `config:"upload_concurrency"` + Enc encoder.MultiEncoder `config:"encoding"` } -type authenticationScheme int +// Fs represents a root directory inside a share. The root directory can be "" +type Fs struct { + name string // name of this remote + root string // the path we are working on if any + opt Options // parsed config options + features *fs.Features // optional features + shareClient *share.Client // a client for the share itself + svc *directory.Client // the root service +} -const ( - accountAndKey authenticationScheme = iota - connectionString - sasURL -) +// Object describes a Azure File Share File +type Object struct { + fs *Fs // what this object is part of + remote string // The remote path + size int64 // Size of the object + md5 []byte // MD5 hash if known + modTime time.Time // The modified time of the object if known + contentType string // content type if known +} -func authenticationSchemeFromOptions(opt *Options) (authenticationScheme, error) { - if opt.ConnectionString != "" { - return connectionString, nil - } else if opt.Account != "" && opt.Key != "" { - return accountAndKey, nil - } else if opt.SASUrl != "" { - return sasURL, nil +// Wrap the http.Transport to satisfy the Transporter interface +type transporter struct { + http.RoundTripper +} + +// Make a new transporter +func newTransporter(ctx context.Context) transporter { + return transporter{ + RoundTripper: fshttp.NewTransport(ctx), } - return -1, errors.New("could not determine authentication scheme from options") +} + +// Do sends the HTTP request and returns the HTTP response or error. +func (tr transporter) Do(req *http.Request) (*http.Response, error) { + return tr.RoundTripper.RoundTrip(req) +} + +type servicePrincipalCredentials struct { + AppID string `json:"appId"` + Password string `json:"password"` + Tenant string `json:"tenant"` +} + +// parseServicePrincipalCredentials unmarshals a service principal credentials JSON file as generated by az cli. +func parseServicePrincipalCredentials(ctx context.Context, credentialsData []byte) (*servicePrincipalCredentials, error) { + var spCredentials servicePrincipalCredentials + if err := json.Unmarshal(credentialsData, &spCredentials); err != nil { + return nil, fmt.Errorf("error parsing credentials from JSON file: %w", err) + } + // TODO: support certificate credentials + // Validate all fields present + if spCredentials.AppID == "" || spCredentials.Password == "" || spCredentials.Tenant == "" { + return nil, fmt.Errorf("missing fields in credentials file") + } + return &spCredentials, nil } // Factored out from NewFs so that it can be tested with opt *Options and without m configmap.Mapper func newFsFromOptions(ctx context.Context, name, root string, opt *Options) (fs.Fs, error) { - as, err := authenticationSchemeFromOptions(opt) - if err != nil { - return nil, err + // Client options specifying our own transport + policyClientOptions := policy.ClientOptions{ + Transport: newTransporter(ctx), } - var serviceClient *service.Client - switch as { - case connectionString: - serviceClient, err = service.NewClientFromConnectionString(opt.ConnectionString, nil) - if err != nil { - return nil, err + clientOpt := service.ClientOptions{ + ClientOptions: policyClientOptions, + } + + // Here we auth by setting one of cred, sharedKeyCred or f.client + var ( + cred azcore.TokenCredential + sharedKeyCred *service.SharedKeyCredential + client *service.Client + err error + ) + switch { + case opt.EnvAuth: + // Read account from environment if needed + if opt.Account == "" { + opt.Account, _ = os.LookupEnv("AZURE_STORAGE_ACCOUNT_NAME") } - case accountAndKey: - skc, err := file.NewSharedKeyCredential(opt.Account, opt.Key) - if err != nil { - return nil, err + // Read credentials from the environment + options := azidentity.DefaultAzureCredentialOptions{ + ClientOptions: policyClientOptions, } - fileURL := fmt.Sprintf("https://%s.file.core.windows.net/%s", opt.Account, opt.ShareName) - serviceClient, err = service.NewClientWithSharedKeyCredential(fileURL, skc, nil) + cred, err = azidentity.NewDefaultAzureCredential(&options) if err != nil { - return nil, err + return nil, fmt.Errorf("create azure environment credential failed: %w", err) } - case sasURL: + case opt.Account != "" && opt.Key != "": + sharedKeyCred, err = service.NewSharedKeyCredential(opt.Account, opt.Key) if err != nil { - return nil, fmt.Errorf("failed to parse SAS URL: %w", err) + return nil, fmt.Errorf("create new shared key credential failed: %w", err) } - serviceClient, err = service.NewClientWithNoCredential(opt.SASUrl, nil) + case opt.SASURL != "": + client, err = service.NewClientWithNoCredential(opt.SASURL, &clientOpt) if err != nil { return nil, fmt.Errorf("unable to create SAS URL client: %w", err) } + case opt.ConnectionString != "": + client, err = service.NewClientFromConnectionString(opt.ConnectionString, &clientOpt) + if err != nil { + return nil, fmt.Errorf("unable to create connection string client: %w", err) + } + case opt.ClientID != "" && opt.Tenant != "" && opt.ClientSecret != "": + // Service principal with client secret + options := azidentity.ClientSecretCredentialOptions{ + ClientOptions: policyClientOptions, + } + cred, err = azidentity.NewClientSecretCredential(opt.Tenant, opt.ClientID, opt.ClientSecret, &options) + if err != nil { + return nil, fmt.Errorf("error creating a client secret credential: %w", err) + } + case opt.ClientID != "" && opt.Tenant != "" && opt.ClientCertificatePath != "": + // Service principal with certificate + // + // Read the certificate + data, err := os.ReadFile(env.ShellExpand(opt.ClientCertificatePath)) + if err != nil { + return nil, fmt.Errorf("error reading client certificate file: %w", err) + } + // NewClientCertificateCredential requires at least one *x509.Certificate, and a + // crypto.PrivateKey. + // + // ParseCertificates returns these given certificate data in PEM or PKCS12 format. + // It handles common scenarios but has limitations, for example it doesn't load PEM + // encrypted private keys. + var password []byte + if opt.ClientCertificatePassword != "" { + pw, err := obscure.Reveal(opt.Password) + if err != nil { + return nil, fmt.Errorf("certificate password decode failed - did you obscure it?: %w", err) + } + password = []byte(pw) + } + certs, key, err := azidentity.ParseCertificates(data, password) + if err != nil { + return nil, fmt.Errorf("failed to parse client certificate file: %w", err) + } + options := azidentity.ClientCertificateCredentialOptions{ + ClientOptions: policyClientOptions, + SendCertificateChain: opt.ClientSendCertificateChain, + } + cred, err = azidentity.NewClientCertificateCredential( + opt.Tenant, opt.ClientID, certs, key, &options, + ) + if err != nil { + return nil, fmt.Errorf("create azure service principal with client certificate credential failed: %w", err) + } + case opt.ClientID != "" && opt.Tenant != "" && opt.Username != "" && opt.Password != "": + // User with username and password + options := azidentity.UsernamePasswordCredentialOptions{ + ClientOptions: policyClientOptions, + } + password, err := obscure.Reveal(opt.Password) + if err != nil { + return nil, fmt.Errorf("user password decode failed - did you obscure it?: %w", err) + } + cred, err = azidentity.NewUsernamePasswordCredential( + opt.Tenant, opt.ClientID, opt.Username, password, &options, + ) + if err != nil { + return nil, fmt.Errorf("authenticate user with password failed: %w", err) + } + case opt.ServicePrincipalFile != "": + // Loading service principal credentials from file. + loadedCreds, err := os.ReadFile(env.ShellExpand(opt.ServicePrincipalFile)) + if err != nil { + return nil, fmt.Errorf("error opening service principal credentials file: %w", err) + } + parsedCreds, err := parseServicePrincipalCredentials(ctx, loadedCreds) + if err != nil { + return nil, fmt.Errorf("error parsing service principal credentials file: %w", err) + } + options := azidentity.ClientSecretCredentialOptions{ + ClientOptions: policyClientOptions, + } + cred, err = azidentity.NewClientSecretCredential(parsedCreds.Tenant, parsedCreds.AppID, parsedCreds.Password, &options) + if err != nil { + return nil, fmt.Errorf("error creating a client secret credential: %w", err) + } + case opt.UseMSI: + // Specifying a user-assigned identity. Exactly one of the above IDs must be specified. + // Validate and ensure exactly one is set. (To do: better validation.) + var b2i = map[bool]int{false: 0, true: 1} + set := b2i[opt.MSIClientID != ""] + b2i[opt.MSIObjectID != ""] + b2i[opt.MSIResourceID != ""] + if set > 1 { + return nil, errors.New("more than one user-assigned identity ID is set") + } + var options azidentity.ManagedIdentityCredentialOptions + switch { + case opt.MSIClientID != "": + options.ID = azidentity.ClientID(opt.MSIClientID) + case opt.MSIObjectID != "": + // FIXME this doesn't appear to be in the new SDK? + return nil, fmt.Errorf("MSI object ID is currently unsupported") + case opt.MSIResourceID != "": + options.ID = azidentity.ResourceID(opt.MSIResourceID) + } + cred, err = azidentity.NewManagedIdentityCredential(&options) + if err != nil { + return nil, fmt.Errorf("failed to acquire MSI token: %w", err) + } + default: + return nil, errors.New("no authentication method configured") } - shareClient := serviceClient.NewShareClient(opt.ShareName) - shareRootDirClient := shareClient.NewRootDirectoryClient() - f := Fs{ - shareRootDirClient: shareRootDirClient, - name: name, - root: root, - opt: opt, + // Make the client if not already created + if client == nil { + // Work out what the endpoint is if it is still unset + if opt.Endpoint == "" { + if opt.Account == "" { + return nil, fmt.Errorf("account must be set: can't make service URL") + } + u, err := url.Parse(fmt.Sprintf("https://%s.%s", opt.Account, storageDefaultBaseURL)) + if err != nil { + return nil, fmt.Errorf("failed to make azure storage URL from account: %w", err) + } + opt.Endpoint = u.String() + } + if sharedKeyCred != nil { + // Shared key cred + client, err = service.NewClientWithSharedKeyCredential(opt.Endpoint, sharedKeyCred, &clientOpt) + if err != nil { + return nil, fmt.Errorf("create client with shared key failed: %w", err) + } + } else if cred != nil { + // Azidentity cred + client, err = service.NewClient(opt.Endpoint, cred, &clientOpt) + if err != nil { + return nil, fmt.Errorf("create client failed: %w", err) + } + } } - // How to check whether a file exists at this location - _, propsErr := shareRootDirClient.NewFileClient(f.opt.Enc.FromStandardPath(root)).GetProperties(ctx, nil) + if client == nil { + return nil, fmt.Errorf("internal error: auth failed to make credentials or client") + } + + shareClient := client.NewShareClient(opt.ShareName) + svc := shareClient.NewRootDirectoryClient() + f := &Fs{ + shareClient: shareClient, + svc: svc, + name: name, + root: root, + opt: *opt, + } + f.features = (&fs.Features{ + CanHaveEmptyDirectories: true, + PartialUploads: true, // files are visible as they are being uploaded + CaseInsensitive: true, + SlowHash: true, // calling Hash() generally takes an extra transaction + ReadMimeType: true, + WriteMimeType: true, + }).Fill(ctx, f) + + // Check whether a file exists at this location + _, propsErr := f.fileClient("").GetProperties(ctx, nil) if propsErr == nil { f.root = path.Dir(root) - return &f, fs.ErrorIsFile + return f, fs.ErrorIsFile } - return &f, nil + return f, nil } -// NewFs constructs an Fs from the path, container:path -// -// TODO: what happens when root is a file +// NewFs constructs an Fs from the root func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, error) { opt := new(Options) err := configstruct.Set(m, opt) @@ -157,33 +606,762 @@ func NewFs(ctx context.Context, name, root string, m configmap.Mapper) (fs.Fs, e return newFsFromOptions(ctx, name, root, opt) } -var listFilesAndDirectoriesOptions = &directory.ListFilesAndDirectoriesOptions{ - Include: directory.ListFilesInclude{ - Timestamps: true, - }, +// ------------------------------------------------------------ + +// Name of the remote (as passed into NewFs) +func (f *Fs) Name() string { + return f.name } -// Fs represents a root directory inside a share. The root directory can be "" -type Fs struct { - shareRootDirClient *directory.Client - name string - root string - opt *Options +// Root of the remote (as passed into NewFs) +func (f *Fs) Root() string { + return f.root } -func (c *common) String() string { - return c.remote +// String converts this Fs to a string +func (f *Fs) String() string { + return fmt.Sprintf("azurefiles root '%s'", f.root) } -func (c *common) Remote() string { - return c.remote +// Features returns the optional features of this Fs +func (f *Fs) Features() *fs.Features { + return f.features } -// TODO: implement MimeTyper -// TODO: what heppens when update is called on Directory - -type common struct { - f *Fs - remote string - properties +// Precision return the precision of this Fs +// +// One second. FileREST API times are in RFC1123 which in the example shows a precision of seconds +// Source: https://learn.microsoft.com/en-us/rest/api/storageservices/representation-of-date-time-values-in-headers +func (f *Fs) Precision() time.Duration { + return time.Second } + +// Hashes returns the supported hash sets. +// +// MD5: since it is listed as header in the response for file properties +// Source: https://learn.microsoft.com/en-us/rest/api/storageservices/get-file-properties +func (f *Fs) Hashes() hash.Set { + return hash.NewHashSet(hash.MD5) +} + +// Encode remote and turn it into an absolute path in the share +func (f *Fs) absPath(remote string) string { + return f.opt.Enc.FromStandardPath(path.Join(f.root, remote)) +} + +// Make a directory client from the dir +func (f *Fs) dirClient(dir string) *directory.Client { + return f.svc.NewSubdirectoryClient(f.absPath(dir)) +} + +// Make a file client from the remote +func (f *Fs) fileClient(remote string) *file.Client { + return f.svc.NewFileClient(f.absPath(remote)) +} + +// NewObject finds the Object at remote. If it can't be found +// it returns the error fs.ErrorObjectNotFound. +// +// Does not return ErrorIsDir when a directory exists instead of file. since the documentation +// for [rclone.fs.Fs.NewObject] rqeuires no extra work to determine whether it is directory +// +// This initiates a network request and returns an error if object is not found. +func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) { + resp, err := f.fileClient(remote).GetProperties(ctx, nil) + if fileerror.HasCode(err, fileerror.ParentNotFound, fileerror.ResourceNotFound) { + return nil, fs.ErrorObjectNotFound + } else if err != nil { + return nil, fmt.Errorf("unable to find object remote %q: %w", remote, err) + } + + o := &Object{ + fs: f, + remote: remote, + } + o.setMetadata(&resp) + return o, nil +} + +// Make a directory using the absolute path from the root of the share +// +// This recursiely creating parent directories all the way to the root +// of the share. +func (f *Fs) absMkdir(ctx context.Context, absPath string) error { + if absPath == "" { + return nil + } + dirClient := f.svc.NewSubdirectoryClient(absPath) + + // now := time.Now() + // smbProps := &file.SMBProperties{ + // LastWriteTime: &now, + // } + // dirCreateOptions := &directory.CreateOptions{ + // FileSMBProperties: smbProps, + // } + + _, createDirErr := dirClient.Create(ctx, nil) + if fileerror.HasCode(createDirErr, fileerror.ParentNotFound) { + parentDir := path.Dir(absPath) + if parentDir == absPath { + return fmt.Errorf("internal error: infinite recursion since parent and remote are equal") + } + makeParentErr := f.absMkdir(ctx, parentDir) + if makeParentErr != nil { + return fmt.Errorf("could not make parent of %q: %w", absPath, makeParentErr) + } + return f.absMkdir(ctx, absPath) + } else if fileerror.HasCode(createDirErr, fileerror.ResourceAlreadyExists) { + return nil + } else if createDirErr != nil { + return fmt.Errorf("unable to MkDir: %w", createDirErr) + } + return nil +} + +// Mkdir creates nested directories +func (f *Fs) Mkdir(ctx context.Context, remote string) error { + return f.absMkdir(ctx, f.absPath(remote)) +} + +// Make the parent directory of remote +func (f *Fs) mkParentDir(ctx context.Context, remote string) error { + // Can't make the parent of root + if remote == "" { + return nil + } + return f.Mkdir(ctx, path.Dir(remote)) +} + +// Rmdir deletes the root folder +// +// Returns an error if it isn't empty +func (f *Fs) Rmdir(ctx context.Context, dir string) error { + dirClient := f.dirClient(dir) + _, err := dirClient.Delete(ctx, nil) + if err != nil { + if fileerror.HasCode(err, fileerror.DirectoryNotEmpty) { + return fs.ErrorDirectoryNotEmpty + } else if fileerror.HasCode(err, fileerror.ResourceNotFound) { + return fs.ErrorDirNotFound + } + return fmt.Errorf("could not rmdir dir %q: %w", dir, err) + } + return nil +} + +// Put the object +// +// Copies the reader in to the new object. This new object is returned. +// +// The new object may have been created if an error is returned +func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { + // Temporary Object under construction + fs := &Object{ + fs: f, + remote: src.Remote(), + } + return fs, fs.Update(ctx, in, src, options...) +} + +// PutStream uploads to the remote path with the modTime given of indeterminate size +func (f *Fs) PutStream(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { + return f.Put(ctx, in, src, options...) +} + +// List the objects and directories in dir into entries. The entries can be +// returned in any order but should be for a complete directory. +// +// dir should be "" to list the root, and should not have trailing slashes. +// +// This should return ErrDirNotFound if the directory isn't found. +func (f *Fs) List(ctx context.Context, dir string) (fs.DirEntries, error) { + var entries fs.DirEntries + subDirClient := f.dirClient(dir) + + // Checking whether directory exists + _, err := subDirClient.GetProperties(ctx, nil) + if fileerror.HasCode(err, fileerror.ParentNotFound, fileerror.ResourceNotFound) { + return entries, fs.ErrorDirNotFound + } else if err != nil { + return entries, err + } + + var opt = &directory.ListFilesAndDirectoriesOptions{ + Include: directory.ListFilesInclude{ + Timestamps: true, + }, + } + pager := subDirClient.NewListFilesAndDirectoriesPager(opt) + for pager.More() { + resp, err := pager.NextPage(ctx) + if err != nil { + return entries, err + } + for _, directory := range resp.Segment.Directories { + // Name *string `xml:"Name"` + // Attributes *string `xml:"Attributes"` + // ID *string `xml:"FileId"` + // PermissionKey *string `xml:"PermissionKey"` + // Properties.ContentLength *int64 `xml:"Content-Length"` + // Properties.ChangeTime *time.Time `xml:"ChangeTime"` + // Properties.CreationTime *time.Time `xml:"CreationTime"` + // Properties.ETag *azcore.ETag `xml:"Etag"` + // Properties.LastAccessTime *time.Time `xml:"LastAccessTime"` + // Properties.LastModified *time.Time `xml:"Last-Modified"` + // Properties.LastWriteTime *time.Time `xml:"LastWriteTime"` + var modTime time.Time + if directory.Properties.LastWriteTime != nil { + modTime = *directory.Properties.LastWriteTime + } + leaf := f.opt.Enc.ToStandardPath(*directory.Name) + entry := fs.NewDir(path.Join(dir, leaf), modTime) + if directory.ID != nil { + entry.SetID(*directory.ID) + } + if directory.Properties.ContentLength != nil { + entry.SetSize(*directory.Properties.ContentLength) + } + entries = append(entries, entry) + } + for _, file := range resp.Segment.Files { + leaf := f.opt.Enc.ToStandardPath(*file.Name) + entry := &Object{ + fs: f, + remote: path.Join(dir, leaf), + } + if file.Properties.ContentLength != nil { + entry.size = *file.Properties.ContentLength + } + if file.Properties.LastWriteTime != nil { + entry.modTime = *file.Properties.LastWriteTime + } + entries = append(entries, entry) + } + } + return entries, nil +} + +// ------------------------------------------------------------ + +// Fs returns the parent Fs +func (o *Object) Fs() fs.Info { + return o.fs +} + +// Size of object in bytes +func (o *Object) Size() int64 { + return o.size +} + +// Return a string version +func (o *Object) String() string { + if o == nil { + return "" + } + return o.remote +} + +// Remote returns the remote path +func (o *Object) Remote() string { + return o.remote +} + +// fileClient makes a specialized client for this object +func (o *Object) fileClient() *file.Client { + return o.fs.fileClient(o.remote) +} + +// set the metadata from file.GetPropertiesResponse +func (o *Object) setMetadata(resp *file.GetPropertiesResponse) { + if resp.ContentLength != nil { + o.size = *resp.ContentLength + } + o.md5 = resp.ContentMD5 + if resp.FileLastWriteTime != nil { + o.modTime = *resp.FileLastWriteTime + } + if resp.ContentType != nil { + o.contentType = *resp.ContentType + } +} + +// readMetaData gets the metadata if it hasn't already been fetched +func (o *Object) getMetadata(ctx context.Context) error { + resp, err := o.fileClient().GetProperties(ctx, nil) + if err != nil { + return fmt.Errorf("failed to fetch properties: %w", err) + } + o.setMetadata(&resp) + return nil +} + +// Hash returns the MD5 of an object returning a lowercase hex string +// +// May make a network request becaue the [fs.List] method does not +// return MD5 hashes for DirEntry +func (o *Object) Hash(ctx context.Context, ty hash.Type) (string, error) { + if ty != hash.MD5 { + return "", hash.ErrUnsupported + } + if len(o.md5) == 0 { + err := o.getMetadata(ctx) + if err != nil { + return "", err + } + } + return hex.EncodeToString(o.md5), nil +} + +// MimeType returns the content type of the Object if +// known, or "" if not +func (o *Object) MimeType(ctx context.Context) string { + if o.contentType == "" { + err := o.getMetadata(ctx) + if err != nil { + fs.Errorf(o, "Failed to fetch Content-Type") + } + } + return o.contentType +} + +// Storable returns a boolean showing whether this object storable +func (o *Object) Storable() bool { + return true +} + +// ModTime returns the modification time of the object +// +// Returns time.Now() if not present +func (o *Object) ModTime(ctx context.Context) time.Time { + if o.modTime.IsZero() { + return time.Now() + } + return o.modTime +} + +// SetModTime sets the modification time +func (o *Object) SetModTime(ctx context.Context, t time.Time) error { + opt := file.SetHTTPHeadersOptions{ + SMBProperties: &file.SMBProperties{ + LastWriteTime: &t, + }, + } + _, err := o.fileClient().SetHTTPHeaders(ctx, &opt) + if err != nil { + return fmt.Errorf("unable to set modTime: %w", err) + } + o.modTime = t + return nil +} + +// Remove an object +func (o *Object) Remove(ctx context.Context) error { + if _, err := o.fileClient().Delete(ctx, nil); err != nil { + return fmt.Errorf("unable to delete remote %q: %w", o.remote, err) + } + return nil +} + +// Open an object for read +func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (io.ReadCloser, error) { + // Offset and Count for range download + var offset int64 + var count int64 + fs.FixRangeOption(options, o.size) + for _, option := range options { + switch x := option.(type) { + case *fs.RangeOption: + offset, count = x.Decode(o.size) + if count < 0 { + count = o.size - offset + } + case *fs.SeekOption: + offset = x.Offset + default: + if option.Mandatory() { + fs.Logf(o, "Unsupported mandatory option: %v", option) + } + } + } + opt := file.DownloadStreamOptions{ + Range: file.HTTPRange{ + Offset: offset, + Count: count, + }, + } + resp, err := o.fileClient().DownloadStream(ctx, &opt) + if err != nil { + return nil, fmt.Errorf("could not open remote %q: %w", o.remote, err) + } + return resp.Body, nil +} + +// Returns a pointer to t - useful for returning pointers to constants +func ptr[T any](t T) *T { + return &t +} + +var warnStreamUpload sync.Once + +// Update the object with the contents of the io.Reader, modTime, size and MD5 hash +func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (err error) { + var ( + size = src.Size() + sizeUnknown = false + hashUnknown = true + fc = o.fileClient() + isNewlyCreated = o.modTime.IsZero() + counter *readers.CountingReader + md5Hash []byte + hasher = md5.New() + ) + + if size > int64(maxFileSize) { + return fmt.Errorf("update: max supported file size is %vB. provided size is %vB", maxFileSize, fs.SizeSuffix(size)) + } else if size < 0 { + size = int64(o.fs.opt.MaxStreamSize) + sizeUnknown = true + warnStreamUpload.Do(func() { + fs.Logf(o.fs, "Streaming uploads will have maximum file size of %v - adjust with --azurefiles-max-stream-size", o.fs.opt.MaxStreamSize) + }) + } + + if isNewlyCreated { + // Make parent directory + if mkDirErr := o.fs.mkParentDir(ctx, src.Remote()); mkDirErr != nil { + return fmt.Errorf("update: unable to make parent directories: %w", mkDirErr) + } + // Create the file at the size given + if _, createErr := fc.Create(ctx, size, nil); createErr != nil { + return fmt.Errorf("update: unable to create file: %w", createErr) + } + } else { + // Resize the file if needed + if size != o.Size() { + if _, resizeErr := fc.Resize(ctx, size, nil); resizeErr != nil { + return fmt.Errorf("update: unable to resize while trying to update: %w ", resizeErr) + } + } + } + + // Measure the size if it is unknown + if sizeUnknown { + counter = readers.NewCountingReader(in) + in = counter + } + + // Check we have a source MD5 hash... + if hashStr, err := src.Hash(ctx, hash.MD5); err == nil && hashStr != "" { + md5Hash, err = hex.DecodeString(hashStr) + if err == nil { + hashUnknown = false + } else { + fs.Errorf(o, "internal error: decoding hex encoded md5 %q: %v", hashStr, err) + } + } + + // ...if not calculate one + if hashUnknown { + in = io.TeeReader(in, hasher) + } + + // Upload the file + opt := file.UploadStreamOptions{ + ChunkSize: int64(o.fs.opt.ChunkSize), + Concurrency: o.fs.opt.UploadConcurrency, + } + if err := fc.UploadStream(ctx, in, &opt); err != nil { + // Remove partially uploaded file on error + if isNewlyCreated { + if _, delErr := fc.Delete(ctx, nil); delErr != nil { + fs.Errorf(o, "failed to delete partially uploaded file: %v", delErr) + } + } + return fmt.Errorf("update: failed to upload stream: %w", err) + } + + if sizeUnknown { + // Read the uploaded size - the file will be truncated to that size by updateSizeHashModTime + size = int64(counter.BytesRead()) + } + if hashUnknown { + md5Hash = hasher.Sum(nil) + } + + // Update the properties + modTime := src.ModTime(ctx) + contentType := fs.MimeType(ctx, src) + httpHeaders := file.HTTPHeaders{ + ContentMD5: md5Hash, + ContentType: &contentType, + } + // Apply upload options (also allows one to overwrite content-type) + for _, option := range options { + key, value := option.Header() + lowerKey := strings.ToLower(key) + switch lowerKey { + case "cache-control": + httpHeaders.CacheControl = &value + case "content-disposition": + httpHeaders.ContentDisposition = &value + case "content-encoding": + httpHeaders.ContentEncoding = &value + case "content-language": + httpHeaders.ContentLanguage = &value + case "content-type": + httpHeaders.ContentType = &value + } + } + _, err = fc.SetHTTPHeaders(ctx, &file.SetHTTPHeadersOptions{ + FileContentLength: &size, + SMBProperties: &file.SMBProperties{ + LastWriteTime: &modTime, + }, + HTTPHeaders: &httpHeaders, + }) + if err != nil { + return fmt.Errorf("update: failed to set properties: %w", err) + } + + // Make sure Object is in sync + o.size = size + o.md5 = md5Hash + o.modTime = modTime + o.contentType = contentType + return nil +} + +// Move src to this remote using server-side move operations. +// +// This is stored with the remote path given. +// +// It returns the destination Object and a possible error. +// +// Will only be called if src.Fs().Name() == f.Name() +// +// If it isn't possible then return fs.ErrorCantMove +func (f *Fs) Move(ctx context.Context, src fs.Object, remote string) (fs.Object, error) { + srcObj, ok := src.(*Object) + if !ok { + fs.Debugf(src, "Can't move - not same remote type") + return nil, fs.ErrorCantMove + } + err := f.mkParentDir(ctx, remote) + if err != nil { + return nil, fmt.Errorf("Move: mkParentDir failed: %w", err) + } + opt := file.RenameOptions{ + IgnoreReadOnly: ptr(true), + ReplaceIfExists: ptr(true), + } + dstAbsPath := f.absPath(remote) + fc := srcObj.fileClient() + _, err = fc.Rename(ctx, dstAbsPath, &opt) + if err != nil { + return nil, fmt.Errorf("Move: Rename failed: %w", err) + } + dstObj, err := f.NewObject(ctx, remote) + if err != nil { + return nil, fmt.Errorf("Move: NewObject failed: %w", err) + } + return dstObj, nil +} + +// DirMove moves src, srcRemote to this remote at dstRemote +// using server-side move operations. +// +// Will only be called if src.Fs().Name() == f.Name() +// +// If it isn't possible then return fs.ErrorCantDirMove +// +// If destination exists then return fs.ErrorDirExists +func (f *Fs) DirMove(ctx context.Context, src fs.Fs, srcRemote, dstRemote string) error { + dstFs := f + srcFs, ok := src.(*Fs) + if !ok { + fs.Debugf(srcFs, "Can't move directory - not same remote type") + return fs.ErrorCantDirMove + } + + _, err := dstFs.dirClient(dstRemote).GetProperties(ctx, nil) + if err == nil { + return fs.ErrorDirExists + } + if !fileerror.HasCode(err, fileerror.ParentNotFound, fileerror.ResourceNotFound) { + return fmt.Errorf("DirMove: failed to get status of destination directory: %w", err) + } + + err = dstFs.mkParentDir(ctx, dstRemote) + if err != nil { + return fmt.Errorf("DirMove: mkParentDir failed: %w", err) + } + + opt := directory.RenameOptions{ + IgnoreReadOnly: ptr(false), + ReplaceIfExists: ptr(false), + } + dstAbsPath := dstFs.absPath(dstRemote) + dirClient := srcFs.dirClient(srcRemote) + _, err = dirClient.Rename(ctx, dstAbsPath, &opt) + if err != nil { + if fileerror.HasCode(err, fileerror.ResourceAlreadyExists) { + return fs.ErrorDirExists + } + return fmt.Errorf("DirMove: Rename failed: %w", err) + } + return nil +} + +// Copy src to this remote using server-side copy operations. +// +// This is stored with the remote path given. +// +// It returns the destination Object and a possible error. +// +// Will only be called if src.Fs().Name() == f.Name() +// +// If it isn't possible then return fs.ErrorCantCopy +func (f *Fs) Copy(ctx context.Context, src fs.Object, remote string) (fs.Object, error) { + srcObj, ok := src.(*Object) + if !ok { + fs.Debugf(src, "Can't copy - not same remote type") + return nil, fs.ErrorCantCopy + } + err := f.mkParentDir(ctx, remote) + if err != nil { + return nil, fmt.Errorf("Copy: mkParentDir failed: %w", err) + } + opt := file.StartCopyFromURLOptions{ + CopyFileSMBInfo: &file.CopyFileSMBInfo{ + Attributes: file.SourceCopyFileAttributes{}, + ChangeTime: file.SourceCopyFileChangeTime{}, + CreationTime: file.SourceCopyFileCreationTime{}, + LastWriteTime: file.SourceCopyFileLastWriteTime{}, + PermissionCopyMode: ptr(file.PermissionCopyModeTypeSource), + IgnoreReadOnly: ptr(true), + }, + } + srcURL := srcObj.fileClient().URL() + fc := f.fileClient(remote) + _, err = fc.StartCopyFromURL(ctx, srcURL, &opt) + if err != nil { + return nil, fmt.Errorf("Copy failed: %w", err) + } + dstObj, err := f.NewObject(ctx, remote) + if err != nil { + return nil, fmt.Errorf("Copy: NewObject failed: %w", err) + } + return dstObj, nil +} + +// Implementation of WriterAt +type writerAt struct { + ctx context.Context + f *Fs + fc *file.Client + mu sync.Mutex // protects variables below + size int64 +} + +// Adaptor to add a Close method to bytes.Reader +type bytesReaderCloser struct { + *bytes.Reader +} + +// Close the bytesReaderCloser +func (bytesReaderCloser) Close() error { + return nil +} + +// WriteAt writes len(p) bytes from p to the underlying data stream +// at offset off. It returns the number of bytes written from p (0 <= n <= len(p)) +// and any error encountered that caused the write to stop early. +// WriteAt must return a non-nil error if it returns n < len(p). +// +// If WriteAt is writing to a destination with a seek offset, +// WriteAt should not affect nor be affected by the underlying +// seek offset. +// +// Clients of WriteAt can execute parallel WriteAt calls on the same +// destination if the ranges do not overlap. +// +// Implementations must not retain p. +func (w *writerAt) WriteAt(p []byte, off int64) (n int, err error) { + endOffset := off + int64(len(p)) + w.mu.Lock() + if w.size < endOffset { + _, err = w.fc.Resize(w.ctx, endOffset, nil) + if err != nil { + w.mu.Unlock() + return 0, fmt.Errorf("WriteAt: failed to resize file: %w ", err) + } + w.size = endOffset + } + w.mu.Unlock() + + in := bytesReaderCloser{bytes.NewReader(p)} + _, err = w.fc.UploadRange(w.ctx, off, in, nil) + if err != nil { + return 0, err + } + return len(p), nil +} + +// Close the writer +func (w *writerAt) Close() error { + // FIXME should we be doing something here? + return nil +} + +// OpenWriterAt opens with a handle for random access writes +// +// Pass in the remote desired and the size if known. +// +// It truncates any existing object +func (f *Fs) OpenWriterAt(ctx context.Context, remote string, size int64) (fs.WriterAtCloser, error) { + err := f.mkParentDir(ctx, remote) + if err != nil { + return nil, fmt.Errorf("OpenWriterAt: failed to create parent directory: %w", err) + } + fc := f.fileClient(remote) + if size < 0 { + size = 0 + } + _, err = fc.Create(ctx, size, nil) + if err != nil { + return nil, fmt.Errorf("OpenWriterAt: unable to create file: %w", err) + } + w := &writerAt{ + ctx: ctx, + f: f, + fc: fc, + size: size, + } + return w, nil +} + +// About gets quota information +func (f *Fs) About(ctx context.Context) (*fs.Usage, error) { + stats, err := f.shareClient.GetStatistics(ctx, nil) + if err != nil { + return nil, fmt.Errorf("failed to read share statistics: %w", err) + } + usage := &fs.Usage{ + Used: stats.ShareUsageBytes, // bytes in use + } + return usage, nil +} + +// Check the interfaces are satisfied +var ( + _ fs.Fs = &Fs{} + _ fs.PutStreamer = &Fs{} + _ fs.Abouter = &Fs{} + _ fs.Mover = &Fs{} + _ fs.DirMover = &Fs{} + _ fs.Copier = &Fs{} + _ fs.OpenWriterAter = &Fs{} + _ fs.Object = &Object{} + _ fs.MimeTyper = &Object{} +) diff --git a/backend/azurefiles/azurefiles_internal_test.go b/backend/azurefiles/azurefiles_internal_test.go index 5286b8a72..b123ad730 100644 --- a/backend/azurefiles/azurefiles_internal_test.go +++ b/backend/azurefiles/azurefiles_internal_test.go @@ -1,3 +1,6 @@ +//go:build !plan9 && !js +// +build !plan9,!js + package azurefiles import ( @@ -41,7 +44,7 @@ func (f *Fs) InternalTestAuth(t *testing.T) { name: "SASUrl", options: &Options{ ShareName: shareName, - SASUrl: "", + SASURL: "", }}, } diff --git a/backend/azurefiles/azurefiles_test.go b/backend/azurefiles/azurefiles_test.go index 84ce8c714..d8091fa49 100644 --- a/backend/azurefiles/azurefiles_test.go +++ b/backend/azurefiles/azurefiles_test.go @@ -1,3 +1,6 @@ +//go:build !plan9 && !js +// +build !plan9,!js + package azurefiles import ( diff --git a/backend/azurefiles/azurefiles_unsupported.go b/backend/azurefiles/azurefiles_unsupported.go new file mode 100644 index 000000000..1674e8f20 --- /dev/null +++ b/backend/azurefiles/azurefiles_unsupported.go @@ -0,0 +1,7 @@ +// Build for azurefiles for unsupported platforms to stop go complaining +// about "no buildable Go source files " + +//go:build plan9 || js +// +build plan9 js + +package azurefiles diff --git a/backend/azurefiles/directory.go b/backend/azurefiles/directory.go deleted file mode 100644 index 7717c04b2..000000000 --- a/backend/azurefiles/directory.go +++ /dev/null @@ -1,44 +0,0 @@ -package azurefiles - -import ( - "context" - "time" -) - -// Directory is a filesystem like directory provided by an Fs -type Directory struct { - common -} - -// Items returns the count of items in this directory or this -// directory and subdirectories if known, -1 for unknown -// -// It is unknown since getting the count of items results in a -// network request -func (d *Directory) Items() int64 { - return -1 -} - -// ID returns empty string. Can be implemented as part of IDer -func (d *Directory) ID() string { - return "" -} - -// Size is returns the size of the file. -// This method is implemented because it is part of the [fs.DirEntry] interface -func (d *Directory) Size() int64 { - return 0 -} - -// ModTime returns the modification time of the object -// -// TODO: check whether FileLastWriteTime is what the clients of this API want. Maybe -// FileLastWriteTime does not get changed when directory contents are updated but consumers -// of this API expect d.ModTime to do so -func (d *Directory) ModTime(ctx context.Context) time.Time { - props, err := d.f.dirClient(d.remote).GetProperties(ctx, nil) - if err != nil { - return time.Now() - } - return *props.FileLastWriteTime -} diff --git a/backend/azurefiles/fs.go b/backend/azurefiles/fs.go deleted file mode 100644 index 2a4b2535d..000000000 --- a/backend/azurefiles/fs.go +++ /dev/null @@ -1,292 +0,0 @@ -package azurefiles - -import ( - "context" - "errors" - "fmt" - "io" - "log" - "path" - "time" - - "github.com/Azure/azure-sdk-for-go/sdk/storage/azfile/directory" - "github.com/Azure/azure-sdk-for-go/sdk/storage/azfile/file" - "github.com/Azure/azure-sdk-for-go/sdk/storage/azfile/fileerror" - "github.com/rclone/rclone/fs" - "github.com/rclone/rclone/fs/hash" -) - -const sleepDurationBetweenRecursiveMkdirPutCalls = time.Millisecond * 500 -const fourTbInBytes = 4398046511104 - -// NewObject finds the Object at remote. If it can't be found -// it returns the error fs.ErrorObjectNotFound. -// -// Does not return ErrorIsDir when a directory exists instead of file. since the documentation -// for [rclone.fs.Fs.NewObject] rqeuires no extra work to determine whether it is directory -// -// Inspired by azureblob store, this initiates a network request and returns an error if object is not found. -func (f *Fs) NewObject(ctx context.Context, remote string) (fs.Object, error) { - fileClient := f.fileClient(remote) - resp, err := fileClient.GetProperties(ctx, nil) - if fileerror.HasCode(err, fileerror.ParentNotFound, fileerror.ResourceNotFound) { - return nil, fs.ErrorObjectNotFound - } else if err != nil { - return nil, fmt.Errorf("unable to find object remote=%s : %w", remote, err) - } - - ob := objectInstance(f, remote, *resp.ContentLength, resp.ContentMD5, *resp.FileLastWriteTime) - return &ob, nil -} - -// Mkdir creates nested directories as indicated by test FsMkdirRmdirSubdir -// TODO: write custom test case where parent directories are created -// Mkdir creates the container if it doesn't exist -func (f *Fs) Mkdir(ctx context.Context, remote string) error { - return f.mkdirRelativeToRootOfShare(ctx, f.decodedFullPath(remote)) -} - -// rclone completes commands such as rclone copy localdir remote:parentcontainer/childcontainer -// where localdir is a tree of files and directories. The above command is expected to complete even -// when parentcontainer and childcontainer directors do not exist on the remote. The following -// code with emphasis on fullPathRelativeToShareRoot is written to handle such cases by recursiely creating -// parent directories all the way to the root of the share -// -// When path argument is an empty string, windows and linux return and error. However, this -// implementation does not return an error -func (f *Fs) mkdirRelativeToRootOfShare(ctx context.Context, fullPathRelativeToShareRoot string) error { - fp := fullPathRelativeToShareRoot - if fp == "" { - return nil - } - dirClient := f.newSubdirectoryClientFromEncodedPathRelativeToShareRoot(f.encodePath(fp)) - // now := time.Now() - // smbProps := &file.SMBProperties{ - // LastWriteTime: &now, - // } - // dirCreateOptions := &directory.CreateOptions{ - // FileSMBProperties: smbProps, - // } - - _, createDirErr := dirClient.Create(ctx, nil) - if fileerror.HasCode(createDirErr, fileerror.ParentNotFound) { - parentDir := path.Dir(fp) - if parentDir == fp { - log.Fatal("This will lead to infinite recursion since parent and remote are equal") - } - makeParentErr := f.mkdirRelativeToRootOfShare(ctx, parentDir) - if makeParentErr != nil { - return fmt.Errorf("could not make parent of %s : %w", fp, makeParentErr) - } - log.Printf("Mkdir: waiting for %s after making parent=%s", sleepDurationBetweenRecursiveMkdirPutCalls.String(), parentDir) - time.Sleep(sleepDurationBetweenRecursiveMkdirPutCalls) - return f.mkdirRelativeToRootOfShare(ctx, fp) - } else if fileerror.HasCode(createDirErr, fileerror.ResourceAlreadyExists) { - return nil - } else if createDirErr != nil { - return fmt.Errorf("unable to MkDir: %w", createDirErr) - } - return nil -} - -// Rmdir deletes the root folder -// -// Returns an error if it isn't empty -func (f *Fs) Rmdir(ctx context.Context, remote string) error { - dirClient := f.dirClient(remote) - _, err := dirClient.Delete(ctx, nil) - if err != nil { - if fileerror.HasCode(err, fileerror.DirectoryNotEmpty) { - return fs.ErrorDirectoryNotEmpty - } else if fileerror.HasCode(err, fileerror.ResourceNotFound) { - return fs.ErrorDirNotFound - } - return fmt.Errorf("could not rmdir dir=\"%s\" : %w", remote, err) - } - return nil - -} - -// Put the object -// -// Copies the reader in to the new object. This new object is returned. -// -// The new object may have been created if an error is returned -// TODO: when file.CLient.Creat is being used, provide HTTP headesr such as content type and content MD5 -// TODO: maybe replace PUT with NewObject + Update -// TODO: in case file is created but there is a problem on upload, what happens -// TODO: what happens when file already exists at the location -func (f *Fs) Put(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) (fs.Object, error) { - if src.Size() > fourTbInBytes { - return nil, fmt.Errorf("max supported file size is 4TB. provided size is %d", src.Size()) - } else if src.Size() < 0 { - // TODO: what should happened when src.Size == 0 - return nil, fmt.Errorf("src.Size is a required to be a whole number : %d", src.Size()) - } - fc := f.fileClient(src.Remote()) - - _, createErr := fc.Create(ctx, src.Size(), nil) - if fileerror.HasCode(createErr, fileerror.ParentNotFound) { - parentDir := path.Dir(src.Remote()) - if mkDirErr := f.Mkdir(ctx, parentDir); mkDirErr != nil { - return nil, fmt.Errorf("unable to make parent directories : %w", mkDirErr) - } - log.Printf("Mkdir: waiting for %s after making parent=%s", sleepDurationBetweenRecursiveMkdirPutCalls.String(), parentDir) - time.Sleep(sleepDurationBetweenRecursiveMkdirPutCalls) - return f.Put(ctx, in, src, options...) - } else if createErr != nil { - return nil, fmt.Errorf("unable to create file : %w", createErr) - } - - obj := &Object{ - common: common{ - f: f, - remote: src.Remote(), - }, - } - if updateErr := obj.upload(ctx, in, src, true, options...); updateErr != nil { - err := fmt.Errorf("while executing update after creating file as part of fs.Put : %w", updateErr) - if _, delErr := fc.Delete(ctx, nil); delErr != nil { - return nil, errors.Join(delErr, updateErr) - } - return obj, err - } - - return obj, nil -} - -// Name of the remote (as passed into NewFs) -func (f *Fs) Name() string { - return f.name -} - -// Root of the remote (as passed into NewFs) -func (f *Fs) Root() string { - return f.root -} - -// String converts this Fs to a string -func (f *Fs) String() string { - return fmt.Sprintf("azurefiles root '%s'", f.root) -} - -// Precision return the precision of this Fs -// -// One second. FileREST API times are in RFC1123 which in the example shows a precision of seconds -// Source: https://learn.microsoft.com/en-us/rest/api/storageservices/representation-of-date-time-values-in-headers -func (f *Fs) Precision() time.Duration { - return time.Second -} - -// Hashes returns the supported hash sets. -// -// MD5: since it is listed as header in the response for file properties -// Source: https://learn.microsoft.com/en-us/rest/api/storageservices/get-file-properties -func (f *Fs) Hashes() hash.Set { - return hash.NewHashSet(hash.MD5) -} - -// Features returns the optional features of this Fs -// -// TODO: add features:- public link, SlowModTime, SlowHash, -// ReadMetadata, WriteMetadata,UserMetadata,PutUnchecked, PutStream -// PartialUploads: Maybe???? -// FileID and DirectoryID can be implemented. They are atleast returned as part of listing response -func (f *Fs) Features() *fs.Features { - return &fs.Features{ - CanHaveEmptyDirectories: true, - // Copy: func(ctx context.Context, src fs.Object, remote string) (fs.Object, error) { - // return f.CopyFile(ctx, src, remote) - // }, - } -} - -// List the objects and directories in dir into entries. The entries can be -// returned in any order but should be for a complete directory. -// -// dir should be "" to list the root, and should not have trailing slashes. -// -// This should return ErrDirNotFound if the directory isn't found. -// -// TODO: handle case regariding "" and "/". I remember reading about them somewhere -func (f *Fs) List(ctx context.Context, remote string) (fs.DirEntries, error) { - var entries fs.DirEntries - subDirClient := f.dirClient(remote) - - // Checking whether directory exists - _, err := subDirClient.GetProperties(ctx, nil) - if fileerror.HasCode(err, fileerror.ParentNotFound, fileerror.ResourceNotFound) { - return entries, fs.ErrorDirNotFound - } else if err != nil { - return entries, err - } - - pager := subDirClient.NewListFilesAndDirectoriesPager(listFilesAndDirectoriesOptions) - for pager.More() { - resp, err := pager.NextPage(ctx) - if err != nil { - return entries, err - } - for _, dir := range resp.Segment.Directories { - de := &Directory{ - common{f: f, - remote: path.Join(remote, f.decodePath(*dir.Name)), - properties: properties{ - lastWriteTime: *dir.Properties.LastWriteTime, - }}, - } - entries = append(entries, de) - } - - for _, file := range resp.Segment.Files { - de := &Object{ - common{f: f, - remote: path.Join(remote, f.decodePath(*file.Name)), - properties: properties{ - contentLength: *file.Properties.ContentLength, - lastWriteTime: *file.Properties.LastWriteTime, - }}, - } - entries = append(entries, de) - } - } - - return entries, nil - -} - -type encodedPath string - -func (f *Fs) decodedFullPath(decodedRemote string) string { - return path.Join(f.root, decodedRemote) -} - -func (f *Fs) dirClient(decodedRemote string) *directory.Client { - fullPathDecoded := f.decodedFullPath(decodedRemote) - fullPathEncoded := f.encodePath(fullPathDecoded) - return f.newSubdirectoryClientFromEncodedPathRelativeToShareRoot(fullPathEncoded) -} - -func (f *Fs) newSubdirectoryClientFromEncodedPathRelativeToShareRoot(p encodedPath) *directory.Client { - return f.shareRootDirClient.NewSubdirectoryClient(string(p)) -} - -func (f *Fs) fileClient(decodedRemote string) *file.Client { - fullPathDecoded := f.decodedFullPath(decodedRemote) - fullPathEncoded := f.encodePath(fullPathDecoded) - return f.fileClientFromEncodedPathRelativeToShareRoot(fullPathEncoded) -} - -func (f *Fs) fileClientFromEncodedPathRelativeToShareRoot(p encodedPath) *file.Client { - return f.shareRootDirClient.NewFileClient(string(p)) -} - -func (f *Fs) encodePath(p string) encodedPath { - return encodedPath(f.opt.Enc.FromStandardPath(p)) -} - -func (f *Fs) decodePath(p string) string { - return f.opt.Enc.ToStandardPath(p) -} - -// on 20231019 at 1324 work to be continued at trying to fix FAIL: TestIntegration/FsMkdir/FsPutFiles/FromRoot diff --git a/backend/azurefiles/object.go b/backend/azurefiles/object.go deleted file mode 100644 index 4421e96d1..000000000 --- a/backend/azurefiles/object.go +++ /dev/null @@ -1,279 +0,0 @@ -package azurefiles - -import ( - "context" - "crypto/md5" - "encoding/hex" - "fmt" - "io" - "log/slog" - "time" - - "github.com/Azure/azure-sdk-for-go/sdk/storage/azfile/file" - "github.com/rclone/rclone/fs" - "github.com/rclone/rclone/fs/hash" -) - -// TODO: maybe use this in the result of list. or replace all instances where object instances are created -func objectInstance(f *Fs, remote string, contentLength int64, md5Hash []byte, lwt time.Time) Object { - return Object{common: common{ - f: f, - remote: remote, - properties: properties{ - contentLength: contentLength, - md5Hash: md5Hash, - lastWriteTime: lwt, - }, - }} -} - -// Size of object in bytes -func (o *Object) Size() int64 { - return o.properties.contentLength -} - -// Fs returns the parent Fs -func (o *Object) Fs() fs.Info { - return o.f -} - -// Hash returns the MD5 of an object returning a lowercase hex string -// -// May make a network request becaue the [fs.List] method does not -// return MD5 hashes for DirEntry -func (o *Object) Hash(ctx context.Context, ty hash.Type) (string, error) { - if ty != hash.MD5 { - return "", hash.ErrUnsupported - } - if len(o.common.properties.md5Hash) == 0 { - props, err := o.fileClient().GetProperties(ctx, nil) - if err != nil { - return "", fmt.Errorf("unable to fetch properties to determine hash") - } - o.common.properties.md5Hash = props.ContentMD5 - } - return hex.EncodeToString(o.common.properties.md5Hash), nil -} - -// Storable returns a boolean showing whether this object storable -func (o *Object) Storable() bool { - return true -} - -// Object describes a Azure File Share File not a Directory -type Object struct { - common -} - -// These fields have pointer types because it seems to -// TODO: descide whether these could be pointer or not -type properties struct { - contentLength int64 - md5Hash []byte - lastWriteTime time.Time -} - -func (o *Object) fileClient() *file.Client { - decodedFullPath := o.f.decodedFullPath(o.remote) - fullEncodedPath := o.f.encodePath(decodedFullPath) - return o.f.fileClientFromEncodedPathRelativeToShareRoot(fullEncodedPath) -} - -// SetModTime sets the modification time -func (o *Object) SetModTime(ctx context.Context, t time.Time) error { - smbProps := file.SMBProperties{ - LastWriteTime: &t, - } - setHeadersOptions := file.SetHTTPHeadersOptions{ - SMBProperties: &smbProps, - } - _, err := o.fileClient().SetHTTPHeaders(ctx, &setHeadersOptions) - if err != nil { - return fmt.Errorf("unable to set modTime : %w", err) - } - o.lastWriteTime = t - return nil -} - -// ModTime returns the modification time of the object -// -// Returns time.Now() if not present -// TODO: convert o.lastWriteTime to *time.Time so that one can know when it has -// been explicitly set -func (o *Object) ModTime(ctx context.Context) time.Time { - if o.lastWriteTime.Unix() <= 1 { - return time.Now() - } - return o.lastWriteTime -} - -// Remove an object -func (o *Object) Remove(ctx context.Context) error { - // TODO: should the options for delete not be nil. Depends on behaviour expected by consumers - if _, err := o.fileClient().Delete(ctx, nil); err != nil { - return fmt.Errorf("unable to delete remote=\"%s\" : %w", o.remote, err) - } - return nil -} - -// Open an object for read -// -// TODO: check for mandatory options and the other options -func (o *Object) Open(ctx context.Context, options ...fs.OpenOption) (io.ReadCloser, error) { - downloadStreamOptions := file.DownloadStreamOptions{} - for _, opt := range options { - switch v := opt.(type) { - case *fs.SeekOption: - httpRange := file.HTTPRange{ - Offset: v.Offset, - } - downloadStreamOptions.Range = httpRange - case *fs.RangeOption: - var start *int64 - var end *int64 - if v.Start >= 0 { - start = &v.Start - } - if v.End >= 0 { - end = &v.End - } - - fhr := file.HTTPRange{} - if start != nil && end != nil { - fhr.Offset = *start - fhr.Count = *end - *start + 1 - } else if start != nil && end == nil { - fhr.Offset = *start - } else if start == nil && end != nil { - fhr.Offset = o.contentLength - *end - } - - downloadStreamOptions.Range = fhr - } - } - resp, err := o.fileClient().DownloadStream(ctx, &downloadStreamOptions) - if err != nil { - return nil, fmt.Errorf("could not open remote=\"%s\" : %w", o.remote, err) - } - return resp.Body, nil -} - -func (o *Object) upload(ctx context.Context, in io.Reader, src fs.ObjectInfo, isDestNewlyCreated bool, options ...fs.OpenOption) error { - if src.Size() > fourTbInBytes { - return fmt.Errorf("max supported file size is 4TB. provided size is %d", src.Size()) - } else if src.Size() < 0 { - return fmt.Errorf("files with unknown sizes are not supported") - } - - fc := o.fileClient() - - if !isDestNewlyCreated { - if src.Size() != o.Size() { - if _, resizeErr := fc.Resize(ctx, src.Size(), nil); resizeErr != nil { - return fmt.Errorf("unable to resize while trying to update. %w ", resizeErr) - } - } - } - - var md5Hash []byte - hashToBeComputed := false - if hashStr, err := src.Hash(ctx, hash.MD5); err != nil || hashStr == "" { - hashToBeComputed = true - } else { - var decodeErr error - md5Hash, decodeErr = hex.DecodeString(hashStr) - if decodeErr != nil { - hashToBeComputed = true - msg := fmt.Sprintf("should not happen. Error while decoding hex encoded md5 '%s'. Error is %s", - hashStr, decodeErr.Error()) - slog.Error(msg) - } - } - var uploadErr error - if hashToBeComputed { - md5Hash, uploadErr = uploadStreamAndComputeHash(ctx, fc, in, src, options...) - } else { - uploadErr = uploadStream(ctx, fc, in, src, options...) - } - if uploadErr != nil { - return fmt.Errorf("while uploading %s : %w", src.Remote(), uploadErr) - } - - modTime := src.ModTime(ctx) - if err := uploadSizeHashLWT(ctx, fc, src.Size(), md5Hash, modTime); err != nil { - - return fmt.Errorf("while setting size hash and last write time for %s : %w", src.Remote(), err) - } - o.properties.contentLength = src.Size() - o.properties.md5Hash = md5Hash - o.properties.lastWriteTime = modTime - return nil -} - -// Update the object with the contents of the io.Reader, modTime, size and MD5 hash -// Does not create a new object -// -// TODO: implement options. understand purpose of options -func (o *Object) Update(ctx context.Context, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error { - return o.upload(ctx, in, src, false, options...) -} - -// cannot set modTime header here because setHTTPHeaders does not allow setting metadata -func uploadStream(ctx context.Context, fc *file.Client, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) error { - // TODO: set concurrency level - uploadStreamOptions := file.UploadStreamOptions{ - ChunkSize: chunkSize(options...), - } - - if err := fc.UploadStream(ctx, in, &uploadStreamOptions); err != nil { - return fmt.Errorf("unable to upload. cannot upload stream : %w", err) - } - return nil -} - -func uploadStreamAndComputeHash(ctx context.Context, fc *file.Client, in io.Reader, src fs.ObjectInfo, options ...fs.OpenOption) ([]byte, error) { - hasher := md5.New() - teeReader := io.TeeReader(in, hasher) - err := uploadStream(ctx, fc, teeReader, src, options...) - if err != nil { - return []byte{}, err - } - return hasher.Sum(nil), nil - -} - -// the function is named with prefix 'upload' since it indicates that things will be modified on the server -func uploadSizeHashLWT(ctx context.Context, fc *file.Client, size int64, hash []byte, lwt time.Time) error { - smbProps := file.SMBProperties{ - LastWriteTime: &lwt, - } - httpHeaders := &file.HTTPHeaders{ - ContentMD5: hash, - } - _, err := fc.SetHTTPHeaders(ctx, &file.SetHTTPHeadersOptions{ - FileContentLength: &size, - SMBProperties: &smbProps, - HTTPHeaders: httpHeaders, - }) - if err != nil { - return fmt.Errorf("while setting size, hash, lastWriteTime : %w", err) - } - return nil -} - -func chunkSize(options ...fs.OpenOption) int64 { - for _, option := range options { - if chunkOpt, ok := option.(*fs.ChunkOption); ok { - return chunkOpt.ChunkSize - } - } - return 1048576 -} - -// Return a string version -func (o *Object) String() string { - if o == nil { - return "" - } - return o.common.String() -} diff --git a/bin/make_manual.py b/bin/make_manual.py index 19bb88cc0..5fc825098 100755 --- a/bin/make_manual.py +++ b/bin/make_manual.py @@ -58,6 +58,7 @@ docs = [ "memory.md", "netstorage.md", "azureblob.md", + "azurefiles.md", "onedrive.md", "opendrive.md", "oracleobjectstorage.md", diff --git a/docs/content/_index.md b/docs/content/_index.md index 5c6c2b86f..8787afd79 100644 --- a/docs/content/_index.md +++ b/docs/content/_index.md @@ -144,6 +144,7 @@ WebDAV or S3, that work out of the box.) {{< provider name="Mega" home="https://mega.nz/" config="/mega/" >}} {{< provider name="Memory" home="/memory/" config="/memory/" >}} {{< provider name="Microsoft Azure Blob Storage" home="https://azure.microsoft.com/en-us/services/storage/blobs/" config="/azureblob/" >}} +{{< provider name="Microsoft Azure Files Storage" home="https://azure.microsoft.com/en-us/services/storage/files/" config="/azurefiles/" >}} {{< provider name="Microsoft OneDrive" home="https://onedrive.live.com/" config="/onedrive/" >}} {{< provider name="Minio" home="https://www.minio.io/" config="/s3/#minio" >}} {{< provider name="Nextcloud" home="https://nextcloud.com/" config="/webdav/#nextcloud" >}} diff --git a/docs/content/azurefiles.md b/docs/content/azurefiles.md index f00c71b47..a6e84f2e0 100644 --- a/docs/content/azurefiles.md +++ b/docs/content/azurefiles.md @@ -1,21 +1,707 @@ --- title: "Microsoft Azure Files Storage" description: "Rclone docs for Microsoft Azure Files Storage" +versionIntroduced: "v1.65" --- -# Microsoft Azure File Storage +# {{< icon "fab fa-windows" >}} Microsoft Azure Files Storage +Paths are specified as `remote:` You may put subdirectories in too, +e.g. `remote:path/to/dir`. + +## Configuration + +Here is an example of making a Microsoft Azure Files Storage +configuration. For a remote called `remote`. First run: + + rclone config + +This will guide you through an interactive setup process: + +``` +No remotes found, make a new one? +n) New remote +s) Set configuration password +q) Quit config +n/s/q> n +name> remote +Type of storage to configure. +Choose a number from below, or type in your own value +[snip] +XX / Microsoft Azure Files Storage + \ "azurefiles" +[snip] + +Option account. +Azure Storage Account Name. +Set this to the Azure Storage Account Name in use. +Leave blank to use SAS URL or connection string, otherwise it needs to be set. +If this is blank and if env_auth is set it will be read from the +environment variable `AZURE_STORAGE_ACCOUNT_NAME` if possible. +Enter a value. Press Enter to leave empty. +account> account_name + +Option share_name. +Azure Files Share Name. +This is required and is the name of the share to access. +Enter a value. Press Enter to leave empty. +share_name> share_name + +Option env_auth. +Read credentials from runtime (environment variables, CLI or MSI). +See the [authentication docs](/azurefiles#authentication) for full info. +Enter a boolean value (true or false). Press Enter for the default (false). +env_auth> + +Option key. +Storage Account Shared Key. +Leave blank to use SAS URL or connection string. +Enter a value. Press Enter to leave empty. +key> base64encodedkey== + +Option sas_url. +SAS URL. +Leave blank if using account/key or connection string. +Enter a value. Press Enter to leave empty. +sas_url> + +Option connection_string. +Azure Files Connection String. +Enter a value. Press Enter to leave empty. +connection_string> +[snip] + +Configuration complete. +Options: +- type: azurefiles +- account: account_name +- share_name: share_name +- key: base64encodedkey== +Keep this "remote" remote? +y) Yes this is OK (default) +e) Edit this remote +d) Delete this remote +y/e/d> +``` + +Once configured you can use rclone. + +See all files in the top level: + + rclone lsf remote: + +Make a new directory in the root: + + rclone mkdir remote:dir + +Recursively List the contents: + + rclone ls remote: + +Sync `/home/local/directory` to the remote directory, deleting any +excess files in the directory. + + rclone sync --interactive /home/local/directory remote:dir ### Modified time -Stored as azure standard `LastModified` time stored on files +The modified time is stored as Azure standard `LastModified` time on +files + +### Performance + +When uploading large files, increasing the value of +`--azurefiles-upload-concurrency` will increase performance at the cost +of using more memory. The default of 16 is set quite conservatively to +use less memory. It maybe be necessary raise it to 64 or higher to +fully utilize a 1 GBit/s link with a single file transfer. + +### Restricted filename characters + +In addition to the [default restricted characters set](/overview/#restricted-characters) +the following characters are also replaced: + +| Character | Value | Replacement | +| --------- |:-----:|:-----------:| +| " | 0x22 | " | +| * | 0x2A | * | +| : | 0x3A | : | +| < | 0x3C | < | +| > | 0x3E | > | +| ? | 0x3F | ? | +| \ | 0x5C | \ | +| \| | 0x7C | | | + +File names can also not end with the following characters. +These only get replaced if they are the last character in the name: + +| Character | Value | Replacement | +| --------- |:-----:|:-----------:| +| . | 0x2E | . | + +Invalid UTF-8 bytes will also be [replaced](/overview/#invalid-utf8), +as they can't be used in JSON strings. ### Hashes -MD5 hashes are stored with files. +MD5 hashes are stored with files. Not all files will have MD5 hashes +as these have to be uploaded with the file. ### Authentication {#authentication} -1. ConnectionString -2. Accout and Key -3. SAS URL \ No newline at end of file +There are a number of ways of supplying credentials for Azure Files +Storage. Rclone tries them in the order of the sections below. + +#### Env Auth + +If the `env_auth` config parameter is `true` then rclone will pull +credentials from the environment or runtime. + +It tries these authentication methods in this order: + +1. Environment Variables +2. Managed Service Identity Credentials +3. Azure CLI credentials (as used by the az tool) + +These are described in the following sections + +##### Env Auth: 1. Environment Variables + +If `env_auth` is set and environment variables are present rclone +authenticates a service principal with a secret or certificate, or a +user with a password, depending on which environment variable are set. +It reads configuration from these variables, in the following order: + +1. Service principal with client secret + - `AZURE_TENANT_ID`: ID of the service principal's tenant. Also called its "directory" ID. + - `AZURE_CLIENT_ID`: the service principal's client ID + - `AZURE_CLIENT_SECRET`: one of the service principal's client secrets +2. Service principal with certificate + - `AZURE_TENANT_ID`: ID of the service principal's tenant. Also called its "directory" ID. + - `AZURE_CLIENT_ID`: the service principal's client ID + - `AZURE_CLIENT_CERTIFICATE_PATH`: path to a PEM or PKCS12 certificate file including the private key. + - `AZURE_CLIENT_CERTIFICATE_PASSWORD`: (optional) password for the certificate file. + - `AZURE_CLIENT_SEND_CERTIFICATE_CHAIN`: (optional) Specifies whether an authentication request will include an x5c header to support subject name / issuer based authentication. When set to "true" or "1", authentication requests include the x5c header. +3. User with username and password + - `AZURE_TENANT_ID`: (optional) tenant to authenticate in. Defaults to "organizations". + - `AZURE_CLIENT_ID`: client ID of the application the user will authenticate to + - `AZURE_USERNAME`: a username (usually an email address) + - `AZURE_PASSWORD`: the user's password +4. Workload Identity + - `AZURE_TENANT_ID`: Tenant to authenticate in. + - `AZURE_CLIENT_ID`: Client ID of the application the user will authenticate to. + - `AZURE_FEDERATED_TOKEN_FILE`: Path to projected service account token file. + - `AZURE_AUTHORITY_HOST`: Authority of an Azure Active Directory endpoint (default: login.microsoftonline.com). + + +##### Env Auth: 2. Managed Service Identity Credentials + +When using Managed Service Identity if the VM(SS) on which this +program is running has a system-assigned identity, it will be used by +default. If the resource has no system-assigned but exactly one +user-assigned identity, the user-assigned identity will be used by +default. + +If the resource has multiple user-assigned identities you will need to +unset `env_auth` and set `use_msi` instead. See the [`use_msi` +section](#use_msi). + +##### Env Auth: 3. Azure CLI credentials (as used by the az tool) + +Credentials created with the `az` tool can be picked up using `env_auth`. + +For example if you were to login with a service principal like this: + + az login --service-principal -u XXX -p XXX --tenant XXX + +Then you could access rclone resources like this: + + rclone lsf :azurefiles,env_auth,account=ACCOUNT: + +Or + + rclone lsf --azurefiles-env-auth --azurefiles-account=ACCOUNT :azurefiles: + +#### Account and Shared Key + +This is the most straight forward and least flexible way. Just fill +in the `account` and `key` lines and leave the rest blank. + +#### SAS URL + +To use it leave `account`, `key` and `connection_string` blank and fill in `sas_url`. + +#### Connection String + +To use it leave `account`, `key` and "sas_url" blank and fill in `connection_string`. + +#### Service principal with client secret + +If these variables are set, rclone will authenticate with a service principal with a client secret. + +- `tenant`: ID of the service principal's tenant. Also called its "directory" ID. +- `client_id`: the service principal's client ID +- `client_secret`: one of the service principal's client secrets + +The credentials can also be placed in a file using the +`service_principal_file` configuration option. + +#### Service principal with certificate + +If these variables are set, rclone will authenticate with a service principal with certificate. + +- `tenant`: ID of the service principal's tenant. Also called its "directory" ID. +- `client_id`: the service principal's client ID +- `client_certificate_path`: path to a PEM or PKCS12 certificate file including the private key. +- `client_certificate_password`: (optional) password for the certificate file. +- `client_send_certificate_chain`: (optional) Specifies whether an authentication request will include an x5c header to support subject name / issuer based authentication. When set to "true" or "1", authentication requests include the x5c header. + +**NB** `client_certificate_password` must be obscured - see [rclone obscure](/commands/rclone_obscure/). + +#### User with username and password + +If these variables are set, rclone will authenticate with username and password. + +- `tenant`: (optional) tenant to authenticate in. Defaults to "organizations". +- `client_id`: client ID of the application the user will authenticate to +- `username`: a username (usually an email address) +- `password`: the user's password + +Microsoft doesn't recommend this kind of authentication, because it's +less secure than other authentication flows. This method is not +interactive, so it isn't compatible with any form of multi-factor +authentication, and the application must already have user or admin +consent. This credential can only authenticate work and school +accounts; it can't authenticate Microsoft accounts. + +**NB** `password` must be obscured - see [rclone obscure](/commands/rclone_obscure/). + +#### Managed Service Identity Credentials {#use_msi} + +If `use_msi` is set then managed service identity credentials are +used. This authentication only works when running in an Azure service. +`env_auth` needs to be unset to use this. + +However if you have multiple user identities to choose from these must +be explicitly specified using exactly one of the `msi_object_id`, +`msi_client_id`, or `msi_mi_res_id` parameters. + +If none of `msi_object_id`, `msi_client_id`, or `msi_mi_res_id` is +set, this is is equivalent to using `env_auth`. + +{{< rem autogenerated options start" - DO NOT EDIT - instead edit fs.RegInfo in backend/azurefiles/azurefiles.go then run make backenddocs" >}} +### Standard options + +Here are the Standard options specific to azurefiles (Microsoft Azure Files). + +#### --azurefiles-account + +Azure Storage Account Name. + +Set this to the Azure Storage Account Name in use. + +Leave blank to use SAS URL or connection string, otherwise it needs to be set. + +If this is blank and if env_auth is set it will be read from the +environment variable `AZURE_STORAGE_ACCOUNT_NAME` if possible. + + +Properties: + +- Config: account +- Env Var: RCLONE_AZUREFILES_ACCOUNT +- Type: string +- Required: false + +#### --azurefiles-share-name + +Azure Files Share Name. + +This is required and is the name of the share to access. + + +Properties: + +- Config: share_name +- Env Var: RCLONE_AZUREFILES_SHARE_NAME +- Type: string +- Required: false + +#### --azurefiles-env-auth + +Read credentials from runtime (environment variables, CLI or MSI). + +See the [authentication docs](/azurefiles#authentication) for full info. + +Properties: + +- Config: env_auth +- Env Var: RCLONE_AZUREFILES_ENV_AUTH +- Type: bool +- Default: false + +#### --azurefiles-key + +Storage Account Shared Key. + +Leave blank to use SAS URL or connection string. + +Properties: + +- Config: key +- Env Var: RCLONE_AZUREFILES_KEY +- Type: string +- Required: false + +#### --azurefiles-sas-url + +SAS URL. + +Leave blank if using account/key or connection string. + +Properties: + +- Config: sas_url +- Env Var: RCLONE_AZUREFILES_SAS_URL +- Type: string +- Required: false + +#### --azurefiles-connection-string + +Azure Files Connection String. + +Properties: + +- Config: connection_string +- Env Var: RCLONE_AZUREFILES_CONNECTION_STRING +- Type: string +- Required: false + +#### --azurefiles-tenant + +ID of the service principal's tenant. Also called its directory ID. + +Set this if using +- Service principal with client secret +- Service principal with certificate +- User with username and password + + +Properties: + +- Config: tenant +- Env Var: RCLONE_AZUREFILES_TENANT +- Type: string +- Required: false + +#### --azurefiles-client-id + +The ID of the client in use. + +Set this if using +- Service principal with client secret +- Service principal with certificate +- User with username and password + + +Properties: + +- Config: client_id +- Env Var: RCLONE_AZUREFILES_CLIENT_ID +- Type: string +- Required: false + +#### --azurefiles-client-secret + +One of the service principal's client secrets + +Set this if using +- Service principal with client secret + + +Properties: + +- Config: client_secret +- Env Var: RCLONE_AZUREFILES_CLIENT_SECRET +- Type: string +- Required: false + +#### --azurefiles-client-certificate-path + +Path to a PEM or PKCS12 certificate file including the private key. + +Set this if using +- Service principal with certificate + + +Properties: + +- Config: client_certificate_path +- Env Var: RCLONE_AZUREFILES_CLIENT_CERTIFICATE_PATH +- Type: string +- Required: false + +#### --azurefiles-client-certificate-password + +Password for the certificate file (optional). + +Optionally set this if using +- Service principal with certificate + +And the certificate has a password. + + +**NB** Input to this must be obscured - see [rclone obscure](/commands/rclone_obscure/). + +Properties: + +- Config: client_certificate_password +- Env Var: RCLONE_AZUREFILES_CLIENT_CERTIFICATE_PASSWORD +- Type: string +- Required: false + +### Advanced options + +Here are the Advanced options specific to azurefiles (Microsoft Azure Files). + +#### --azurefiles-client-send-certificate-chain + +Send the certificate chain when using certificate auth. + +Specifies whether an authentication request will include an x5c header +to support subject name / issuer based authentication. When set to +true, authentication requests include the x5c header. + +Optionally set this if using +- Service principal with certificate + + +Properties: + +- Config: client_send_certificate_chain +- Env Var: RCLONE_AZUREFILES_CLIENT_SEND_CERTIFICATE_CHAIN +- Type: bool +- Default: false + +#### --azurefiles-username + +User name (usually an email address) + +Set this if using +- User with username and password + + +Properties: + +- Config: username +- Env Var: RCLONE_AZUREFILES_USERNAME +- Type: string +- Required: false + +#### --azurefiles-password + +The user's password + +Set this if using +- User with username and password + + +**NB** Input to this must be obscured - see [rclone obscure](/commands/rclone_obscure/). + +Properties: + +- Config: password +- Env Var: RCLONE_AZUREFILES_PASSWORD +- Type: string +- Required: false + +#### --azurefiles-service-principal-file + +Path to file containing credentials for use with a service principal. + +Leave blank normally. Needed only if you want to use a service principal instead of interactive login. + + $ az ad sp create-for-rbac --name "" \ + --role "Storage Files Data Owner" \ + --scopes "/subscriptions//resourceGroups//providers/Microsoft.Storage/storageAccounts//blobServices/default/containers/" \ + > azure-principal.json + +See ["Create an Azure service principal"](https://docs.microsoft.com/en-us/cli/azure/create-an-azure-service-principal-azure-cli) and ["Assign an Azure role for access to files data"](https://docs.microsoft.com/en-us/azure/storage/common/storage-auth-aad-rbac-cli) pages for more details. + +**NB** this section needs updating for Azure Files - pull requests appreciated! + +It may be more convenient to put the credentials directly into the +rclone config file under the `client_id`, `tenant` and `client_secret` +keys instead of setting `service_principal_file`. + + +Properties: + +- Config: service_principal_file +- Env Var: RCLONE_AZUREFILES_SERVICE_PRINCIPAL_FILE +- Type: string +- Required: false + +#### --azurefiles-use-msi + +Use a managed service identity to authenticate (only works in Azure). + +When true, use a [managed service identity](https://docs.microsoft.com/en-us/azure/active-directory/managed-identities-azure-resources/) +to authenticate to Azure Storage instead of a SAS token or account key. + +If the VM(SS) on which this program is running has a system-assigned identity, it will +be used by default. If the resource has no system-assigned but exactly one user-assigned identity, +the user-assigned identity will be used by default. If the resource has multiple user-assigned +identities, the identity to use must be explicitly specified using exactly one of the msi_object_id, +msi_client_id, or msi_mi_res_id parameters. + +Properties: + +- Config: use_msi +- Env Var: RCLONE_AZUREFILES_USE_MSI +- Type: bool +- Default: false + +#### --azurefiles-msi-object-id + +Object ID of the user-assigned MSI to use, if any. + +Leave blank if msi_client_id or msi_mi_res_id specified. + +Properties: + +- Config: msi_object_id +- Env Var: RCLONE_AZUREFILES_MSI_OBJECT_ID +- Type: string +- Required: false + +#### --azurefiles-msi-client-id + +Object ID of the user-assigned MSI to use, if any. + +Leave blank if msi_object_id or msi_mi_res_id specified. + +Properties: + +- Config: msi_client_id +- Env Var: RCLONE_AZUREFILES_MSI_CLIENT_ID +- Type: string +- Required: false + +#### --azurefiles-msi-mi-res-id + +Azure resource ID of the user-assigned MSI to use, if any. + +Leave blank if msi_client_id or msi_object_id specified. + +Properties: + +- Config: msi_mi_res_id +- Env Var: RCLONE_AZUREFILES_MSI_MI_RES_ID +- Type: string +- Required: false + +#### --azurefiles-endpoint + +Endpoint for the service. + +Leave blank normally. + +Properties: + +- Config: endpoint +- Env Var: RCLONE_AZUREFILES_ENDPOINT +- Type: string +- Required: false + +#### --azurefiles-chunk-size + +Upload chunk size. + +Note that this is stored in memory and there may be up to +"--transfers" * "--azurefile-upload-concurrency" chunks stored at once +in memory. + +Properties: + +- Config: chunk_size +- Env Var: RCLONE_AZUREFILES_CHUNK_SIZE +- Type: SizeSuffix +- Default: 4Mi + +#### --azurefiles-upload-concurrency + +Concurrency for multipart uploads. + +This is the number of chunks of the same file that are uploaded +concurrently. + +If you are uploading small numbers of large files over high-speed +links and these uploads do not fully utilize your bandwidth, then +increasing this may help to speed up the transfers. + +Note that chunks are stored in memory and there may be up to +"--transfers" * "--azurefile-upload-concurrency" chunks stored at once +in memory. + +Properties: + +- Config: upload_concurrency +- Env Var: RCLONE_AZUREFILES_UPLOAD_CONCURRENCY +- Type: int +- Default: 16 + +#### --azurefiles-max-stream-size + +Max size for streamed files. + +Azure files needs to know in advance how big the file will be. When +rclone doesn't know it uses this value instead. + +This will be used when rclone is streaming data, the most common uses are: + +- Uploading files with `--vfs-cache-mode off` with `rclone mount` +- Using `rclone rcat` +- Copying files with unknown length + +You will need this much free space in the share as the file will be this size temporarily. + + +Properties: + +- Config: max_stream_size +- Env Var: RCLONE_AZUREFILES_MAX_STREAM_SIZE +- Type: SizeSuffix +- Default: 10Gi + +#### --azurefiles-encoding + +The encoding for the backend. + +See the [encoding section in the overview](/overview/#encoding) for more info. + +Properties: + +- Config: encoding +- Env Var: RCLONE_AZUREFILES_ENCODING +- Type: Encoding +- Default: Slash,LtGt,DoubleQuote,Colon,Question,Asterisk,Pipe,BackSlash,Del,Ctl,RightPeriod,InvalidUtf8,Dot + +{{< rem autogenerated options stop >}} + +### Custom upload headers + +You can set custom upload headers with the `--header-upload` flag. + +- Cache-Control +- Content-Disposition +- Content-Encoding +- Content-Language +- Content-Type + +Eg `--header-upload "Content-Type: text/potato"` + +## Limitations + +MD5 sums are only uploaded with chunked files if the source has an MD5 +sum. This will always be the case for a local to azure copy. diff --git a/docs/content/docs.md b/docs/content/docs.md index d297e817e..b6f2e7b1d 100644 --- a/docs/content/docs.md +++ b/docs/content/docs.md @@ -58,6 +58,7 @@ See the following for detailed instructions for * [Mega](/mega/) * [Memory](/memory/) * [Microsoft Azure Blob Storage](/azureblob/) + * [Microsoft Azure Files Storage](/azurefiles/) * [Microsoft OneDrive](/onedrive/) * [OpenStack Swift / Rackspace Cloudfiles / Blomp Cloud Storage / Memset Memstore](/swift/) * [OpenDrive](/opendrive/) diff --git a/docs/content/overview.md b/docs/content/overview.md index a49c6bd4b..a5f3164e2 100644 --- a/docs/content/overview.md +++ b/docs/content/overview.md @@ -39,6 +39,7 @@ Here is an overview of the major features of each cloud storage system. | Mega | - | - | No | Yes | - | - | | Memory | MD5 | R/W | No | No | - | - | | Microsoft Azure Blob Storage | MD5 | R/W | No | No | R/W | - | +| Microsoft Azure Files Storage | MD5 | R/W | Yes | No | R/W | - | | Microsoft OneDrive | QuickXorHash ⁵ | R/W | Yes | No | R | - | | OpenDrive | MD5 | R/W | Yes | Partial ⁸ | - | - | | OpenStack Swift | MD5 | R/W | No | No | R/W | - | @@ -490,6 +491,7 @@ upon backend-specific capabilities. | Mega | Yes | No | Yes | Yes | Yes | No | No | No | Yes | Yes | Yes | | Memory | No | Yes | No | No | No | Yes | Yes | No | No | No | No | | Microsoft Azure Blob Storage | Yes | Yes | No | No | No | Yes | Yes | Yes | No | No | No | +| Microsoft Azure Files Storage | No | Yes | Yes | Yes | No | No | Yes | Yes | No | Yes | Yes | | Microsoft OneDrive | Yes | Yes | Yes | Yes | Yes | Yes | No | No | Yes | Yes | Yes | | OpenDrive | Yes | Yes | Yes | Yes | No | No | No | No | No | No | Yes | | OpenStack Swift | Yes ¹ | Yes | No | No | No | Yes | Yes | No | No | Yes | No | diff --git a/docs/layouts/chrome/navbar.html b/docs/layouts/chrome/navbar.html index 91cc10c3c..9475bbefa 100644 --- a/docs/layouts/chrome/navbar.html +++ b/docs/layouts/chrome/navbar.html @@ -81,6 +81,7 @@ Mega Memory Microsoft Azure Blob Storage + Microsoft Azure Files Storage Microsoft OneDrive OpenDrive QingStor diff --git a/fstest/test_all/config.yaml b/fstest/test_all/config.yaml index 38de401d7..0acdf0c2e 100644 --- a/fstest/test_all/config.yaml +++ b/fstest/test_all/config.yaml @@ -320,6 +320,8 @@ backends: - backend: "azureblob" remote: "TestAzureBlob,directory_markers:" fastlist: true + - backend: "azurefiles" + remote: "TestAzureFiles:" - backend: "pcloud" remote: "TestPcloud:" fastlist: true