From 4c89e3f5fa1c65ccd0c843f98e4013c2085f243f Mon Sep 17 00:00:00 2001 From: William Hubbs Date: Wed, 10 May 2017 18:01:10 -0500 Subject: supervise-daemon:create multiple options from --respawn-limit This creates --respawn-delay, --respawn-max and --respawn-period. It was suggested that it would be easier to follow if the options were separated. This is for #126. --- man/openrc-run.8 | 16 +++++++++++++-- man/supervise-daemon.8 | 52 +++++++++++++++++++++++++++-------------------- sh/supervise-daemon.sh | 4 +++- src/rc/supervise-daemon.c | 50 ++++++++++++++++++++++++++++++--------------- 4 files changed, 81 insertions(+), 41 deletions(-) diff --git a/man/openrc-run.8 b/man/openrc-run.8 index c7ac2ac1..f627004d 100644 --- a/man/openrc-run.8 +++ b/man/openrc-run.8 @@ -167,8 +167,20 @@ Display name used for the above defined command. Process name to match when signaling the daemon. .It Ar stopsig Signal to send when stopping the daemon. -.It Ar respawn_limit -Respawn limit +.It Ar respawn_delay +Respawn delay +.Xr supervise-daemon 8 +will use for this daemon. See +.Xr supervise-daemon 8 +for more information about this setting. +.It Ar respawn_max +Respawn max +.Xr supervise-daemon 8 +will use for this daemon. See +.Xr supervise-daemon 8 +for more information about this setting. +.It Ar respawn_period +Respawn period .Xr supervise-daemon 8 will use for this daemon. See .Xr supervise-daemon 8 diff --git a/man/supervise-daemon.8 b/man/supervise-daemon.8 index 43e74ef7..19aa5d44 100644 --- a/man/supervise-daemon.8 +++ b/man/supervise-daemon.8 @@ -16,6 +16,8 @@ .Nd starts a daemon and restarts it if it crashes .Sh SYNOPSIS .Nm +.Fl D , -respawn-delay +.Ar seconds .Fl d , -chdir .Ar path .Fl e , -env @@ -26,16 +28,18 @@ .Ar arg .Fl k , -umask .Ar value +.Fl m , -respawn-max +.Ar count .Fl N , -nicelevel .Ar level .Fl p , -pidfile .Ar pidfile -.Fl u , -user -.Ar user +.Fl P , -respawn-period +.Ar seconds .Fl r , -chroot .Ar chrootpath -.Fl R , -respawn-limit -.Ar limit +.Fl u , -user +.Ar user .Fl 1 , -stdout .Ar logfile .Fl 2 , -stderr @@ -84,6 +88,9 @@ Print the action(s) that are taken just before doing them. .Pp The options are as follows: .Bl -tag -width indent +.It Fl D , -respawn-delay Ar seconds +wait this number of seconds before restarting a daemon after it crashes. +The default is 0. .It Fl d , -chdir Ar path chdir to this directory before starting the daemon. .It Fl e , -env Ar VAR=VALUE @@ -96,29 +103,21 @@ Class can be 0 for none, 1 for real time, 2 for best effort and 3 for idle. Data can be from 0 to 7 inclusive. .It Fl k , -umask Ar mode Set the umask of the daemon. +.It Fl m , -respawn-max Ar count +Sets the maximum number of times a daemon will be respawned during a +respawn period. If a daemon dies more than this number of times during a +respawn period, +.Nm +will give up trying to respawn it and exit. The default is 10, and 0 +means unlimited. .It Fl N , -nicelevel Ar level Modifies the scheduling priority of the daemon. +.It Fl P , -respawn-period Ar seconds +Sets the length of a respawn period. The default is 10 seconds. See the +description of --respawn-max for more information. .It Fl r , -chroot Ar path chroot to this directory before starting the daemon. All other paths, such as the path to the daemon, chdir and pidfile, should be relative to the chroot. -.It Fl R , -respawn-limit Ar limit -Control how agressively -.Nm -will try to respawn a daemon when it fails to start. The limit argument -can be a pair of integers separated bya colon or the string unlimited. -.Pp -If a pair of integers is given, the first is a maximum number of respawn -attempts and the second is a time period. It should be interpreted as: -If the daemon dies and has to be respawned more than -times in any time period of seconds, exit and give up. -.Pp -For example, the default is 10:5. -This means if the supervisor respawns a daemon more than ten times -in any 5 second period, it gives up and exits. -.Pp -if unlimited is given as the limit, it means that the supervisor will -not exit or give up, no matter how many times the daemon it is -supervising needs to be respawned. .It Fl u , -user Ar user Start the daemon as the specified user. .It Fl 1 , -stdout Ar logfile @@ -143,6 +142,15 @@ to parse its options, which allows it to accept the `--' option which will cause it to stop processing options at that point. Any subsequent arguments are passed as arguments to the daemon to start and used when finding a daemon to stop or signal. +.Sh NOTE +If respawn-delay, respawn-max and respawn-period are not set correctly, +it is possible to trigger a situation in which the supervisor will +infinitely try to respawn a daemon. To avoid this, if you change the +values of --respawn-delay, --respawn-max or --respawn-period, always +make sure the settings mmake sense. For example, a respawn period of 5 +seconds with a respawn max of 10 and a respawn delay of 1 second leads +to infinite respawning since there can never be 10 respawns within 5 +seconds. .Sh SEE ALSO .Xr chdir 2 , .Xr chroot 2 , diff --git a/sh/supervise-daemon.sh b/sh/supervise-daemon.sh index c6130edb..8add2147 100644 --- a/sh/supervise-daemon.sh +++ b/sh/supervise-daemon.sh @@ -25,7 +25,9 @@ supervise_start() eval supervise-daemon --start \ ${chroot:+--chroot} $chroot \ ${pidfile:+--pidfile} $pidfile \ - ${respawn_limit:+--respawn-limit} $respawn_limit \ + ${respawn_delay:+--respawn-delay} $respawn_delay \ + ${respawn_max:+--respawn-max} $respawn_max \ + ${respawn_period:+--respawn-period} $respawn_period \ ${command_user+--user} $command_user \ $supervise_daemon_args \ $command \ diff --git a/src/rc/supervise-daemon.c b/src/rc/supervise-daemon.c index bd24d782..76c9d426 100644 --- a/src/rc/supervise-daemon.c +++ b/src/rc/supervise-daemon.c @@ -66,38 +66,42 @@ static struct pam_conv conv = { NULL, NULL}; const char *applet = NULL; const char *extraopts = NULL; -const char *getoptstring = "d:e:g:I:Kk:N:p:r:R:Su:1:2:" \ +const char *getoptstring = "D:d:e:g:I:Kk:m:N:p:r:Su:1:2:" \ getoptstring_COMMON; const struct option longopts[] = { + { "respawn-delay", 1, NULL, 'D'}, { "chdir", 1, NULL, 'd'}, { "env", 1, NULL, 'e'}, { "group", 1, NULL, 'g'}, { "ionice", 1, NULL, 'I'}, { "stop", 0, NULL, 'K'}, { "umask", 1, NULL, 'k'}, + { "respawn-max", 1, NULL, 'm'}, { "nicelevel", 1, NULL, 'N'}, { "pidfile", 1, NULL, 'p'}, - { "user", 1, NULL, 'u'}, + { "respawn-period", 1, NULL, 'P'}, { "chroot", 1, NULL, 'r'}, - { "respawn-limit", 1, NULL, 'R'}, { "start", 0, NULL, 'S'}, + { "user", 1, NULL, 'u'}, { "stdout", 1, NULL, '1'}, { "stderr", 1, NULL, '2'}, longopts_COMMON }; const char * const longopts_help[] = { + "Set a respawn delay", "Change the PWD", "Set an environment string", "Change the process group", "Set an ionice class:data when starting", "Stop daemon", "Set the umask for the daemon", + "set maximum number of respawn attempts", "Set a nicelevel when starting", "Match pid found in this file", - "Change the process user", + "Set respawn time period", "Chroot to this directory", - "set a respawn limit", "Start daemon", + "Change the process user", "Redirect stdout to file", "Redirect stderr to file", longopts_help_COMMON @@ -429,6 +433,7 @@ int main(int argc, char **argv) int n; char exec_file[PATH_MAX]; int respawn_count = 0; + int respawn_delay = 0; int respawn_max = 10; int respawn_period = 5; time_t respawn_now= 0; @@ -469,6 +474,12 @@ int main(int argc, char **argv) while ((opt = getopt_long(argc, argv, getoptstring, longopts, (int *) 0)) != -1) switch (opt) { + case 'D': /* --respawn-delay time */ + n = sscanf(optarg, "%d", &respawn_delay); + if (n != 1 || respawn_delay < 1) + eerrorx("Invalid respawn-delay value '%s'", optarg); + break; + case 'I': /* --ionice */ if (sscanf(optarg, "%d:%d", &ionicec, &ioniced) == 0) eerrorx("%s: invalid ionice `%s'", @@ -490,6 +501,12 @@ int main(int argc, char **argv) applet, optarg); break; + case 'P': /* --respawn-period time */ + n = sscanf(optarg, "%d", &respawn_period); + if (n != 1 || respawn_delay < 1) + eerrorx("Invalid respawn-delay value '%s'", optarg); + break; + case 'S': /* --start */ start = true; break; @@ -519,6 +536,12 @@ int main(int argc, char **argv) applet, optarg); break; + case 'm': /* --respawn-max count */ + n = sscanf(optarg, "%d", &respawn_max); + if (n != 1 || respawn_max < 1) + eerrorx("Invalid respawn-max value '%s'", optarg); + break; + case 'p': /* --pidfile */ pidfile = optarg; break; @@ -527,17 +550,6 @@ int main(int argc, char **argv) ch_root = optarg; break; - case 'R': /* --respawn-limit unlimited|count:period */ - if (strcasecmp(optarg, "unlimited") == 0) { - respawn_max = 0; - respawn_period = 0; - } else { - n = sscanf(optarg, "%d:%d", &respawn_max, &respawn_period); - if (n != 2 || respawn_max < 1 || respawn_period < 1) - eerrorx("Invalid respawn-limit setting '%s'", optarg); - } - break; - case 'u': /* --user | */ { p = optarg; @@ -600,6 +612,11 @@ int main(int argc, char **argv) if (start) { if (!exec) eerrorx("%s: nothing to start", applet); + if (respawn_delay * respawn_max > respawn_period) { + ewarn("%s: Please increase the value of --respawn-period to more " + "than %d to avoid infinite respawning", applet, + respawn_delay * respawn_max); + } } /* Expand ~ */ @@ -732,6 +749,7 @@ int main(int argc, char **argv) syslog(LOG_INFO, "stopping %s, pid %d", exec, child_pid); kill(child_pid, SIGTERM); } else { + sleep(respawn_delay); if (respawn_max > 0 && respawn_period > 0) { respawn_now = time(NULL); if (first_spawn == 0) -- cgit v1.2.3