Clean up provisioned modules on error; refactor Run(); add Validate()

Modules that return an error during provisioning should still be cleaned
up so that they don't leak any resources they may have allocated before
the error occurred. Cleanup should be able to run even if Provision does
not complete fully.
This commit is contained in:
Matthew Holt 2019-09-30 09:16:01 -06:00
parent 1e66226217
commit 8eb2c37251
No known key found for this signature in database
GPG Key ID: 2A349DD577D586A5
4 changed files with 138 additions and 101 deletions

View File

@ -54,7 +54,33 @@ func Run(newCfg *Config) error {
currentCfgMu.Lock()
defer currentCfgMu.Unlock()
if newCfg != nil {
// run the new config and start all its apps
err := run(newCfg, true)
if err != nil {
return err
}
// swap old config with the new one
oldCfg := currentCfg
currentCfg = newCfg
// Stop, Cleanup each old app
unsyncedStop(oldCfg)
return nil
}
// run runs newCfg and starts all its apps if
// start is true. If any errors happen, cleanup
// is performed if any modules were provisioned;
// apps that were started already will be stopped,
// so this function should not leak resources if
// an error is returned.
func run(newCfg *Config, start bool) error {
if newCfg == nil {
return nil
}
// because we will need to roll back any state
// modifications if this function errors, we
// keep a single error value and scope all
@ -121,12 +147,18 @@ func Run(newCfg *Config) error {
return err
}
if !start {
return nil
}
// Start
err = func() error {
return func() error {
var started []string
for name, a := range newCfg.apps {
err := a.Start()
if err != nil {
// an app failed to start, so we need to stop
// all other apps that were already started
for _, otherAppName := range started {
err2 := newCfg.apps[otherAppName].Stop()
if err2 != nil {
@ -140,19 +172,6 @@ func Run(newCfg *Config) error {
}
return nil
}()
if err != nil {
return err
}
}
// swap old config with the new one
oldCfg := currentCfg
currentCfg = newCfg
// Stop, Cleanup each old app
unsyncedStop(oldCfg)
return nil
}
// Stop stops running the current configuration.
@ -168,26 +187,34 @@ func Stop() error {
return nil
}
// unsyncedStop stops oldCfg from running, but if
// unsyncedStop stops cfg from running, but if
// applicable, you need to acquire locks yourself.
// It is a no-op if oldCfg is nil. If any app
// It is a no-op if cfg is nil. If any app
// returns an error when stopping, it is logged
// and the function continues with the next app.
func unsyncedStop(oldCfg *Config) {
if oldCfg == nil {
// This function assumes all apps in cfg were
// successfully started.
func unsyncedStop(cfg *Config) {
if cfg == nil {
return
}
// stop each app
for name, a := range oldCfg.apps {
for name, a := range cfg.apps {
err := a.Stop()
if err != nil {
log.Printf("[ERROR] stop %s: %v", name, err)
}
}
// clean up all old modules
oldCfg.cancelFunc()
// clean up all modules
cfg.cancelFunc()
}
// Validate loads, provisions, and validates
// cfg, but does not start running it.
func Validate(cfg *Config) error {
return run(cfg, false)
}
// Duration is a JSON-string-unmarshable duration type.

View File

@ -131,6 +131,14 @@ func (ctx Context) LoadModule(name string, rawMsg json.RawMessage) (interface{},
if prov, ok := val.(Provisioner); ok {
err := prov.Provision(ctx)
if err != nil {
// incomplete provisioning could have left state
// dangling, so make sure it gets cleaned up
if cleanerUpper, ok := val.(CleanerUpper); ok {
err2 := cleanerUpper.Cleanup()
if err2 != nil {
err = fmt.Errorf("%v; additionally, cleanup: %v", err, err2)
}
}
return nil, fmt.Errorf("provision %s: %v", mod.Name, err)
}
}
@ -138,6 +146,7 @@ func (ctx Context) LoadModule(name string, rawMsg json.RawMessage) (interface{},
if validator, ok := val.(Validator); ok {
err := validator.Validate()
if err != nil {
// since the module was already provisioned, make sure we clean up
if cleanerUpper, ok := val.(CleanerUpper); ok {
err2 := cleanerUpper.Cleanup()
if err2 != nil {

View File

@ -253,9 +253,10 @@ type Validator interface {
// CleanerUpper is implemented by modules which may have side-effects
// such as opened files, spawned goroutines, or allocated some sort
// of non-local state when they were provisioned. This method should
// of non-stack state when they were provisioned. This method should
// deallocate/cleanup those resources to prevent memory leaks. Cleanup
// should be fast and efficient.
// should be fast and efficient. Cleanup should work even if Provision
// returns an error, to allow cleaning up from partial provisionings.
type CleanerUpper interface {
Cleanup() error
}

View File

@ -65,9 +65,9 @@ func gracefulStop(sigName string) {
os.Exit(exitCode)
}
// Exit codes. Generally, you will want to avoid
// automatically restarting the process if the
// exit code is 1.
// Exit codes. Generally, you should NOT
// automatically restart the process if the
// exit code is ExitCodeFailedStartup (1).
const (
ExitCodeSuccess = iota
ExitCodeFailedStartup