aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWilliam Hubbs <w.d.hubbs@gmail.com>2017-05-09 18:20:52 -0500
committerWilliam Hubbs <w.d.hubbs@gmail.com>2017-05-09 18:30:08 -0500
commit3673040722b75c0a4d06fbeb272f917c7d1ea7c4 (patch)
tree1329fd5b6e8354c41d56c239f1f7d1127fc0dc28
parent96c8ba2fb5f91a711ef5bfcfd8affe0b74ad18fe (diff)
supervise-daemon: add a --respawn-limit option
Allow limiting the number of times supervise-daemon will attempt to respawn a daemon once it has died to prevent infinite respawning. Also, set a reasonable default limit (10 times in a 5 second period). This is for issue #126.
-rw-r--r--man/openrc-run.86
-rw-r--r--man/supervise-daemon.820
-rw-r--r--sh/supervise-daemon.sh1
-rw-r--r--src/rc/supervise-daemon.c37
4 files changed, 63 insertions, 1 deletions
diff --git a/man/openrc-run.8 b/man/openrc-run.8
index 25ec5b91..c7ac2ac1 100644
--- a/man/openrc-run.8
+++ b/man/openrc-run.8
@@ -167,6 +167,12 @@ Display name used for the above defined command.
Process name to match when signaling the daemon.
.It Ar stopsig
Signal to send when stopping the daemon.
+.It Ar respawn_limit
+Respawn limit
+.Xr supervise-daemon 8
+will use for this daemon. See
+.Xr supervise-daemon 8
+for more information about this setting.
.It Ar retry
Retry schedule to use when stopping the daemon. It can either be a
timeout in seconds or multiple signal/timeout pairs (like SIGTERM/5).
diff --git a/man/supervise-daemon.8 b/man/supervise-daemon.8
index 06087675..43e74ef7 100644
--- a/man/supervise-daemon.8
+++ b/man/supervise-daemon.8
@@ -34,6 +34,8 @@
.Ar user
.Fl r , -chroot
.Ar chrootpath
+.Fl R , -respawn-limit
+.Ar limit
.Fl 1 , -stdout
.Ar logfile
.Fl 2 , -stderr
@@ -99,6 +101,24 @@ Modifies the scheduling priority of the daemon.
.It Fl r , -chroot Ar path
chroot to this directory before starting the daemon. All other paths, such
as the path to the daemon, chdir and pidfile, should be relative to the chroot.
+.It Fl R , -respawn-limit Ar limit
+Control how agressively
+.Nm
+will try to respawn a daemon when it fails to start. The limit argument
+can be a pair of integers separated bya colon or the string unlimited.
+.Pp
+If a pair of integers is given, the first is a maximum number of respawn
+attempts and the second is a time period. It should be interpreted as:
+If the daemon dies and has to be respawned more than <first number>
+times in any time period of <second number> seconds, exit and give up.
+.Pp
+For example, the default is 10:5.
+This means if the supervisor respawns a daemon more than ten times
+in any 5 second period, it gives up and exits.
+.Pp
+if unlimited is given as the limit, it means that the supervisor will
+not exit or give up, no matter how many times the daemon it is
+supervising needs to be respawned.
.It Fl u , -user Ar user
Start the daemon as the specified user.
.It Fl 1 , -stdout Ar logfile
diff --git a/sh/supervise-daemon.sh b/sh/supervise-daemon.sh
index bff68a4c..c6130edb 100644
--- a/sh/supervise-daemon.sh
+++ b/sh/supervise-daemon.sh
@@ -25,6 +25,7 @@ supervise_start()
eval supervise-daemon --start \
${chroot:+--chroot} $chroot \
${pidfile:+--pidfile} $pidfile \
+ ${respawn_limit:+--respawn-limit} $respawn_limit \
${command_user+--user} $command_user \
$supervise_daemon_args \
$command \
diff --git a/src/rc/supervise-daemon.c b/src/rc/supervise-daemon.c
index 2080e4a6..bd24d782 100644
--- a/src/rc/supervise-daemon.c
+++ b/src/rc/supervise-daemon.c
@@ -66,7 +66,7 @@ static struct pam_conv conv = { NULL, NULL};
const char *applet = NULL;
const char *extraopts = NULL;
-const char *getoptstring = "d:e:g:I:Kk:N:p:r:Su:1:2:" \
+const char *getoptstring = "d:e:g:I:Kk:N:p:r:R:Su:1:2:" \
getoptstring_COMMON;
const struct option longopts[] = {
{ "chdir", 1, NULL, 'd'},
@@ -79,6 +79,7 @@ const struct option longopts[] = {
{ "pidfile", 1, NULL, 'p'},
{ "user", 1, NULL, 'u'},
{ "chroot", 1, NULL, 'r'},
+ { "respawn-limit", 1, NULL, 'R'},
{ "start", 0, NULL, 'S'},
{ "stdout", 1, NULL, '1'},
{ "stderr", 1, NULL, '2'},
@@ -95,6 +96,7 @@ const char * const longopts_help[] = {
"Match pid found in this file",
"Change the process user",
"Chroot to this directory",
+ "set a respawn limit",
"Start daemon",
"Redirect stdout to file",
"Redirect stderr to file",
@@ -424,7 +426,13 @@ int main(int argc, char **argv)
char *p;
char *token;
int i;
+ int n;
char exec_file[PATH_MAX];
+ int respawn_count = 0;
+ int respawn_max = 10;
+ int respawn_period = 5;
+ time_t respawn_now= 0;
+ time_t first_spawn= 0;
struct passwd *pw;
struct group *gr;
FILE *fp;
@@ -519,6 +527,17 @@ int main(int argc, char **argv)
ch_root = optarg;
break;
+ case 'R': /* --respawn-limit unlimited|count:period */
+ if (strcasecmp(optarg, "unlimited") == 0) {
+ respawn_max = 0;
+ respawn_period = 0;
+ } else {
+ n = sscanf(optarg, "%d:%d", &respawn_max, &respawn_period);
+ if (n != 2 || respawn_max < 1 || respawn_period < 1)
+ eerrorx("Invalid respawn-limit setting '%s'", optarg);
+ }
+ break;
+
case 'u': /* --user <username>|<uid> */
{
p = optarg;
@@ -713,6 +732,22 @@ int main(int argc, char **argv)
syslog(LOG_INFO, "stopping %s, pid %d", exec, child_pid);
kill(child_pid, SIGTERM);
} else {
+ if (respawn_max > 0 && respawn_period > 0) {
+ respawn_now = time(NULL);
+ if (first_spawn == 0)
+ first_spawn = respawn_now;
+ if (respawn_now - first_spawn > respawn_period) {
+ respawn_count = 0;
+ first_spawn = 0;
+ } else
+ respawn_count++;
+ if (respawn_count >= respawn_max) {
+ syslog(LOG_INFO, "respawned \"%s\" too many times, "
+ "exiting", exec);
+ exiting = true;
+ continue;
+ }
+ }
if (WIFEXITED(i))
syslog(LOG_INFO, "%s, pid %d, exited with return code %d",
exec, child_pid, WEXITSTATUS(i));