bisync: allow lock file expiration/renewal with --max-lock - #7470
Background: Bisync uses lock files as a safety feature to prevent
interference from other bisync runs while it is running. Bisync normally
removes these lock files at the end of a run, but if bisync is abruptly
interrupted, these files will be left behind. By default, they will lock out
all future runs, until the user has a chance to manually check things out and
remove the lock.
Before this change, lock files blocked future runs indefinitely, so a single
interrupted run would lock out all future runs forever (absent user
intervention), and there was no way to change this behavior.
After this change, a new --max-lock flag can be used to make lock files
automatically expire after a certain period of time, so that future runs are
not locked out forever, and auto-recovery is possible. --max-lock can be any
duration 2m or greater (or 0 to disable). If set, lock files older than this
will be considered "expired", and future runs will be allowed to disregard them
and proceed. (Note that the --max-lock duration must be set by the process that
left the lock file -- not the later one interpreting it.)
If set, bisync will also "renew" these lock files every
--max-lock_minus_one_minute throughout a run, for extra safety. (For example,
with --max-lock 5m, bisync would renew the lock file (for another 5 minutes)
every 4 minutes until the run has completed.) In other words, it should not be
possible for a lock file to pass its expiration time while the process that
created it is still running -- and you can therefore be reasonably sure that
any _expired_ lock file you may find was left there by an interrupted run, not
one that is still running and just taking awhile.
If --max-lock is 0 or not set, the default is that lock files will never
expire, and will block future runs (of these same two bisync paths)
indefinitely.
For maximum resilience from disruptions, consider setting a relatively short
duration like --max-lock 2m along with --resilient and --recover, and a
relatively frequent cron schedule. The result will be a very robust
"set-it-and-forget-it" bisync run that can automatically bounce back from
almost any interruption it might encounter, without requiring the user to get
involved and run a --resync.
2023-12-03 16:19:13 +08:00
package bisync
import (
"encoding/json"
"fmt"
"io"
"os"
"strconv"
"sync"
"time"
"github.com/rclone/rclone/cmd/bisync/bilib"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/lib/terminal"
)
const basicallyforever = 200 * 365 * 24 * time . Hour
var stopRenewal func ( )
var data = struct {
Session string
PID string
TimeRenewed time . Time
TimeExpires time . Time
} { }
func ( b * bisyncRun ) setLockFile ( ) error {
b . lockFile = ""
b . setLockFileExpiration ( )
if ! b . opt . DryRun {
b . lockFile = b . basePath + ".lck"
if bilib . FileExists ( b . lockFile ) {
if ! b . lockFileIsExpired ( ) {
errTip := Color ( terminal . MagentaFg , "Tip: this indicates that another bisync run (of these same paths) either is still running or was interrupted before completion. \n" )
errTip += Color ( terminal . MagentaFg , "If you're SURE you want to override this safety feature, you can delete the lock file with the following command, then run bisync again: \n" )
errTip += fmt . Sprintf ( Color ( terminal . HiRedFg , "rclone deletefile \"%s\"" ) , b . lockFile )
return fmt . Errorf ( Color ( terminal . RedFg , "prior lock file found: %s \n" ) + errTip , Color ( terminal . HiYellowFg , b . lockFile ) )
}
}
pidStr := [ ] byte ( strconv . Itoa ( os . Getpid ( ) ) )
if err = os . WriteFile ( b . lockFile , pidStr , bilib . PermSecure ) ; err != nil {
return fmt . Errorf ( Color ( terminal . RedFg , "cannot create lock file: %s: %w" ) , b . lockFile , err )
}
fs . Debugf ( nil , "Lock file created: %s" , b . lockFile )
b . renewLockFile ( )
stopRenewal = b . startLockRenewal ( )
}
return nil
}
func ( b * bisyncRun ) removeLockFile ( ) {
if b . lockFile != "" {
stopRenewal ( )
errUnlock := os . Remove ( b . lockFile )
if errUnlock == nil {
fs . Debugf ( nil , "Lock file removed: %s" , b . lockFile )
} else if err == nil {
err = errUnlock
} else {
fs . Errorf ( nil , "cannot remove lockfile %s: %v" , b . lockFile , errUnlock )
}
b . lockFile = "" // block removing it again
}
}
func ( b * bisyncRun ) setLockFileExpiration ( ) {
if b . opt . MaxLock > 0 && b . opt . MaxLock < 2 * time . Minute {
fs . Logf ( nil , Color ( terminal . YellowFg , "--max-lock cannot be shorter than 2 minutes (unless 0.) Changing --max-lock from %v to %v" ) , b . opt . MaxLock , 2 * time . Minute )
b . opt . MaxLock = 2 * time . Minute
} else if b . opt . MaxLock <= 0 {
b . opt . MaxLock = basicallyforever
}
}
func ( b * bisyncRun ) renewLockFile ( ) {
if b . lockFile != "" && bilib . FileExists ( b . lockFile ) {
data . Session = b . basePath
data . PID = strconv . Itoa ( os . Getpid ( ) )
data . TimeRenewed = time . Now ( )
data . TimeExpires = time . Now ( ) . Add ( b . opt . MaxLock )
// save data file
df , err := os . Create ( b . lockFile )
b . handleErr ( b . lockFile , "error renewing lock file" , err , true , true )
b . handleErr ( b . lockFile , "error encoding JSON to lock file" , json . NewEncoder ( df ) . Encode ( data ) , true , true )
b . handleErr ( b . lockFile , "error closing lock file" , df . Close ( ) , true , true )
if b . opt . MaxLock < basicallyforever {
fs . Infof ( nil , Color ( terminal . HiBlueFg , "lock file renewed for %v. New expiration: %v" ) , b . opt . MaxLock , data . TimeExpires )
}
}
}
func ( b * bisyncRun ) lockFileIsExpired ( ) bool {
if b . lockFile != "" && bilib . FileExists ( b . lockFile ) {
rdf , err := os . Open ( b . lockFile )
b . handleErr ( b . lockFile , "error reading lock file" , err , true , true )
dec := json . NewDecoder ( rdf )
for {
2024-04-09 19:17:00 +08:00
if err := dec . Decode ( & data ) ; err != nil {
if err != io . EOF {
fs . Errorf ( b . lockFile , "err: %v" , err )
}
bisync: allow lock file expiration/renewal with --max-lock - #7470
Background: Bisync uses lock files as a safety feature to prevent
interference from other bisync runs while it is running. Bisync normally
removes these lock files at the end of a run, but if bisync is abruptly
interrupted, these files will be left behind. By default, they will lock out
all future runs, until the user has a chance to manually check things out and
remove the lock.
Before this change, lock files blocked future runs indefinitely, so a single
interrupted run would lock out all future runs forever (absent user
intervention), and there was no way to change this behavior.
After this change, a new --max-lock flag can be used to make lock files
automatically expire after a certain period of time, so that future runs are
not locked out forever, and auto-recovery is possible. --max-lock can be any
duration 2m or greater (or 0 to disable). If set, lock files older than this
will be considered "expired", and future runs will be allowed to disregard them
and proceed. (Note that the --max-lock duration must be set by the process that
left the lock file -- not the later one interpreting it.)
If set, bisync will also "renew" these lock files every
--max-lock_minus_one_minute throughout a run, for extra safety. (For example,
with --max-lock 5m, bisync would renew the lock file (for another 5 minutes)
every 4 minutes until the run has completed.) In other words, it should not be
possible for a lock file to pass its expiration time while the process that
created it is still running -- and you can therefore be reasonably sure that
any _expired_ lock file you may find was left there by an interrupted run, not
one that is still running and just taking awhile.
If --max-lock is 0 or not set, the default is that lock files will never
expire, and will block future runs (of these same two bisync paths)
indefinitely.
For maximum resilience from disruptions, consider setting a relatively short
duration like --max-lock 2m along with --resilient and --recover, and a
relatively frequent cron schedule. The result will be a very robust
"set-it-and-forget-it" bisync run that can automatically bounce back from
almost any interruption it might encounter, without requiring the user to get
involved and run a --resync.
2023-12-03 16:19:13 +08:00
break
}
}
b . handleErr ( b . lockFile , "error closing file" , rdf . Close ( ) , true , true )
if ! data . TimeExpires . IsZero ( ) && data . TimeExpires . Before ( time . Now ( ) ) {
fs . Infof ( b . lockFile , Color ( terminal . GreenFg , "Lock file found, but it expired at %v. Will delete it and proceed." ) , data . TimeExpires )
markFailed ( b . listing1 ) // listing is untrusted so force revert to prior (if --recover) or create new ones (if --resync)
markFailed ( b . listing2 )
return true
}
fs . Infof ( b . lockFile , Color ( terminal . RedFg , "Valid lock file found. Expires at %v. (%v from now)" ) , data . TimeExpires , time . Since ( data . TimeExpires ) . Abs ( ) . Round ( time . Second ) )
prettyprint ( data , "Lockfile info" , fs . LogLevelInfo )
}
return false
}
// StartLockRenewal renews the lockfile every --max-lock minus one minute.
//
// It returns a func which should be called to stop the renewal.
func ( b * bisyncRun ) startLockRenewal ( ) func ( ) {
if b . opt . MaxLock <= 0 || b . opt . MaxLock >= basicallyforever || b . lockFile == "" {
return func ( ) { }
}
stopLockRenewal := make ( chan struct { } )
var wg sync . WaitGroup
wg . Add ( 1 )
go func ( ) {
defer wg . Done ( )
ticker := time . NewTicker ( b . opt . MaxLock - time . Minute )
for {
select {
case <- ticker . C :
b . renewLockFile ( )
case <- stopLockRenewal :
ticker . Stop ( )
return
}
}
} ( )
return func ( ) {
close ( stopLockRenewal )
wg . Wait ( )
}
}
func markFailed ( file string ) {
failFile := file + "-err"
if bilib . FileExists ( file ) {
_ = os . Remove ( failFile )
_ = os . Rename ( file , failFile )
}
}