aboutsummaryrefslogtreecommitdiff
path: root/watchdog.c
diff options
context:
space:
mode:
authorLizzy Fleckenstein <lizzy@vlhl.dev>2026-04-01 20:36:46 +0200
committerLizzy Fleckenstein <lizzy@vlhl.dev>2026-04-01 20:36:46 +0200
commitabda2001b80bc71f48cfbdc02a4f8988e5f30a39 (patch)
tree6b3430800a84c0ae7c86787763414ec399f2f0f9 /watchdog.c
downloadburstdog-abda2001b80bc71f48cfbdc02a4f8988e5f30a39.tar.xz
init
Diffstat (limited to 'watchdog.c')
-rw-r--r--watchdog.c208
1 files changed, 208 insertions, 0 deletions
diff --git a/watchdog.c b/watchdog.c
new file mode 100644
index 0000000..e547aae
--- /dev/null
+++ b/watchdog.c
@@ -0,0 +1,208 @@
+#define _GNU_SOURCE
+#include <sys/time.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <time.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <time.h>
+
+// settings
+#define WATCHDOG_IV 10 // how often to wake up, per second
+#define WATCHDOG_BURST 98 // cpu time percentage considered a burst
+#define WATCHDOG_BURST_END 90 // cpu time percentage considered the end of a burst
+#define WATCHDOG_SAMPLES 5 // how many samples a burst needs to persist for to be logged
+
+// arbitrary limits, bump if necessary
+#define MAX_PROCESS 4096
+#define DIRBUFFER_SIZE 262144
+
+struct process
+{
+ unsigned int pid;
+ unsigned int time[WATCHDOG_SAMPLES];
+};
+
+struct process_tab
+{
+ size_t num;
+ struct process arr[MAX_PROCESS];
+};
+
+static int pids[MAX_PROCESS];
+static struct process_tab process_tabs[2];
+static char dirbuffer[DIRBUFFER_SIZE];
+static char statbuffer[BUFSIZ];
+static char linebuffer[BUFSIZ];
+
+struct linux_dirent64 {
+ ino64_t d_ino; /* 64-bit inode number */
+ off64_t d_off; /* Not an offset; see getdents() */
+ unsigned short d_reclen; /* Size of this dirent */
+ unsigned char d_type; /* File type */
+ char d_name[]; /* Filename (null-terminated) */
+};
+
+static int compare_int(const void *pa, const void *pb)
+{
+ int a = *(const int *) pa;
+ int b = *(const int *) pb;
+ return a > b ? 1 : a == b ? 0 : -1;
+}
+
+static bool is_sep(char c)
+{
+ return c == ' ' || c == '\0';
+}
+
+static char *nth_word(unsigned int n, char *ptr, size_t size)
+{
+ size_t start = 0, pos = 0;
+ bool paren = false;
+ for (unsigned int i = 0; i <= n; i++) {
+ do {
+ if (pos >= size) return NULL;
+ if (ptr[pos] == '(') paren = true;
+ if (ptr[pos] == ')') paren = false;
+ } while (!is_sep(ptr[pos++]) || paren);
+
+ if (i == n-1) start = pos;
+ }
+ ptr[pos-1] = '\0';
+ return &ptr[start];
+}
+
+int main(int argc, char **argv)
+{
+ long clock_tick = sysconf(_SC_CLK_TCK);
+
+ char *logfile = argc > 1 ? argv[1] : "watchdog.log";
+ int logfd = open(logfile, O_WRONLY | O_CREAT | O_APPEND, 0644);
+ if (logfd == -1) {
+ perror("open logfile");
+ return EXIT_FAILURE;
+ }
+
+ unsigned int times[WATCHDOG_SAMPLES];
+ unsigned int num_samples = 0;
+ unsigned int bursting_pid = 0;
+ struct process_tab *procs = &process_tabs[0], *oldprocs = &process_tabs[1];
+ for (;;) {
+ size_t move_samples = num_samples;
+ if (num_samples < WATCHDOG_SAMPLES)
+ num_samples++;
+ else
+ move_samples--;
+ if (num_samples)
+ memmove(&times[1], &times[0], sizeof(unsigned int) * move_samples);
+ struct timespec ts;
+ clock_gettime(CLOCK_BOOTTIME, &ts);
+ times[0] = ts.tv_sec * clock_tick + ts.tv_nsec * clock_tick / 1000000000;
+
+ int dirfd = open("/proc", O_RDONLY | O_DIRECTORY);
+ if (dirfd == -1) {
+ perror("open /proc");
+ return EXIT_FAILURE;
+ }
+
+ ssize_t n_dirent = getdents64(dirfd, dirbuffer, DIRBUFFER_SIZE);
+ if (n_dirent == -1) {
+ perror("getdents64 /proc");
+ return EXIT_FAILURE;
+ }
+
+ size_t num_pids = 0;
+ char *dirptr = dirbuffer;
+ for (size_t i = 0; i < n_dirent; i++) {
+ struct linux_dirent64 *d = (void*) dirptr;
+ int pid;
+ if ((d->d_type == DT_DIR || d->d_type == DT_UNKNOWN) && (pid = atoi(d->d_name))) {
+ if (num_pids == MAX_PROCESS) {
+ fprintf(stderr, "too many processes\n");
+ return EXIT_FAILURE;
+ }
+ pids[num_pids++] = pid;
+ }
+ dirptr += d->d_reclen;
+ }
+
+ qsort(pids, num_pids, sizeof(int), &compare_int);
+
+ procs->num = 0;
+ size_t oldproc_idx = 0;
+ for (size_t i = 0; i < num_pids; i++) {
+ char statname[20];
+ snprintf(statname, 20, "%d/stat", pids[i]);
+
+ int statfd = openat(dirfd, statname, O_RDONLY);
+ if (statfd == -1) // fail silently
+ continue;
+
+ ssize_t n_read = read(statfd, statbuffer, BUFSIZ-1);
+ close(statfd);
+ if (n_read == -1)
+ continue;
+ statbuffer[n_read] = '\0';
+
+ char *name = nth_word(1, statbuffer, n_read+1);
+ char *utime = nth_word(13, statbuffer, n_read+1);
+ char *stime = nth_word(14, statbuffer, n_read+1);
+ if (!name || !utime || !stime)
+ continue;
+
+ struct process *proc = &procs->arr[procs->num++];
+ proc->pid = pids[i];
+ proc->time[0] = atoi(utime) + atoi(stime);
+
+ struct process *oldproc = NULL;
+ if (num_samples > 1) {
+ while (oldproc_idx < oldprocs->num && oldprocs->arr[oldproc_idx].pid < proc->pid)
+ oldproc_idx++;
+ if (oldproc_idx < oldprocs->num && oldprocs->arr[oldproc_idx].pid == proc->pid)
+ oldproc = &oldprocs->arr[oldproc_idx];
+ }
+
+ if (oldproc) {
+ memcpy(&proc->time[1], &oldproc->time[0], (WATCHDOG_SAMPLES-1) * sizeof(int));
+ } else {
+ for (size_t j = 1; j < WATCHDOG_SAMPLES; j++)
+ proc->time[j] = proc->time[0];
+ }
+
+ if (num_samples == WATCHDOG_SAMPLES) {
+ unsigned int total = times[0] - times[WATCHDOG_SAMPLES-1];
+ unsigned int subset = proc->time[0] - proc->time[WATCHDOG_SAMPLES-1];
+ unsigned int share = subset * 100 / total;
+ if (bursting_pid == proc->pid && share < WATCHDOG_BURST_END) {
+ bursting_pid = 0;
+ } else if (bursting_pid != proc->pid && share >= WATCHDOG_BURST) {
+ time_t time_v = time(NULL);
+ struct tm time_s;
+ localtime_r(&time_v, &time_s);
+ char timebuf[128];
+ strftime(timebuf, 128, "%c", &time_s);
+
+ int n_line = snprintf(linebuffer, BUFSIZ,
+ "%s: %5d %s is bursting: %d of %d ticks (%d%%)\n",
+ timebuf, proc->pid, name, subset, total, share);
+
+ int n_written = write(logfd, linebuffer, n_line);
+ (void) n_written;
+ fsync(logfd);
+ bursting_pid = proc->pid;
+ }
+ }
+ }
+
+ close(dirfd);
+
+ struct process_tab *tmp = oldprocs;
+ oldprocs = procs;
+ procs = tmp;
+
+ usleep(1000000/WATCHDOG_IV);
+ }
+}