From 102ded275951f107447434c933d1e3d98ac82bfa Mon Sep 17 00:00:00 2001 From: Lizzy Fleckenstein Date: Wed, 1 Apr 2026 22:18:41 +0200 Subject: rename to burstdog --- .gitignore | 4 +- Makefile | 8 +-- README.md | 16 ++--- burstdog.c | 227 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ watchdog.c | 227 ------------------------------------------------------------- 5 files changed, 241 insertions(+), 241 deletions(-) create mode 100644 burstdog.c delete mode 100644 watchdog.c diff --git a/.gitignore b/.gitignore index 8955088..711446a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ -watchdog -watchdog.log +burstdog +burstdog.log diff --git a/Makefile b/Makefile index 5f7c715..e578636 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,10 @@ CFLAGS = -O3 -Wall -Wextra PREFIX = /usr/local -watchdog: watchdog.c - $(CC) $(CFLAGS) watchdog.c -o watchdog +burstdog: burstdog.c + $(CC) $(CFLAGS) burstdog.c -o burstdog -install: watchdog - install -Dm 755 watchdog "$(PREFIX)/bin/watchdog" +install: burstdog + install -Dm 755 burstdog "$(PREFIX)/bin/burstdog" .PHONY: install diff --git a/README.md b/README.md index 1b527a5..005e299 100644 --- a/README.md +++ b/README.md @@ -1,27 +1,27 @@ -# watchdog +# burstdog Monitor CPU usage by process and report bursts to a logfile. ## Invocation ```sh -watchdog [logfile] +burstdog [logfile] ``` -Default logfile is `watchdog.log` in the current directory. +Default logfile is `burstdog.log` in the current directory. ## Build -Compile using `make` or `cc watchdog.c -O3 -o watchdog`. +Compile using `make` or `cc burstdog.c -O3 -o burstdog`. -See [watchdog.c](watchdog.c) for configuration options. +See [burstdog.c](burstdog.c) for configuration options. Install to PREFIX using `make install`. -## Test watchdog +## Test burstdog ```sh -$ watchdog & -$ tail -f watchdog.log +$ burstdog & +$ tail -f burstdog.log $ cat < /dev/random > /dev/null # in a different shell ``` diff --git a/burstdog.c b/burstdog.c new file mode 100644 index 0000000..8544f9e --- /dev/null +++ b/burstdog.c @@ -0,0 +1,227 @@ +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// settings +#define BURSTDOG_IV 10 // how often to wake up, per second +#define BURSTDOG_BURST 98 // cpu time percentage considered a burst +#define BURSTDOG_BURST_END 90 // cpu time percentage considered the end of a burst +#define BURSTDOG_SAMPLES 5 // how many samples a burst needs to persist for to be logged + +#if BURSTDOG_SAMPLES < 2 +#error must consider at least 2 samples +#endif + +// arbitrary limits, bump if necessary +#define MAX_PROCESS 4096 +#define DIRBUFFER_SIZE 262144 + +struct process +{ + unsigned int pid; + int fd; + unsigned int time[BURSTDOG_SAMPLES]; +}; + +struct process_tab +{ + size_t num; + struct process arr[MAX_PROCESS]; +}; + +static unsigned int pids[MAX_PROCESS]; +static struct process_tab process_tabs[2]; +static char dirbuffer[DIRBUFFER_SIZE]; +static char statbuffer[BUFSIZ]; +static char linebuffer[BUFSIZ]; + +struct linux_dirent64 { + ino64_t d_ino; /* 64-bit inode number */ + off64_t d_off; /* Not an offset; see getdents() */ + unsigned short d_reclen; /* Size of this dirent */ + unsigned char d_type; /* File type */ + char d_name[]; /* Filename (null-terminated) */ +}; + +static int compare_int(const void *pa, const void *pb) +{ + int a = *(const int *) pa; + int b = *(const int *) pb; + return a > b ? 1 : a == b ? 0 : -1; +} + +static bool is_sep(char c) +{ + return c == ' ' || c == '\0'; +} + +static char *nth_word(unsigned int n, char *ptr, size_t size) +{ + size_t start = 0, pos = 0; + bool paren = false; + for (unsigned int i = 0; i <= n; i++) { + do { + if (pos >= size) return NULL; + if (ptr[pos] == '(') paren = true; + if (ptr[pos] == ')') paren = false; + } while (!is_sep(ptr[pos++]) || paren); + + if (i == n-1) start = pos; + } + ptr[pos-1] = '\0'; + return &ptr[start]; +} + +int main(int argc, char **argv) +{ + long clock_tick = sysconf(_SC_CLK_TCK); + + char *logfile = argc > 1 ? argv[1] : "burstdog.log"; + int logfd = open(logfile, O_WRONLY | O_CREAT | O_APPEND | O_DIRECT, 0644); + if (logfd == -1) { + perror("open logfile"); + return EXIT_FAILURE; + } + + int dirfd = open("/proc", O_RDONLY | O_DIRECTORY); + if (dirfd == -1) { + perror("open /proc"); + return EXIT_FAILURE; + } + + unsigned int times[BURSTDOG_SAMPLES]; + unsigned int num_samples = 0; + unsigned int bursting_pid = 0; + struct process_tab *procs = &process_tabs[0], *oldprocs = &process_tabs[1]; + for (;;) { + size_t move_samples = num_samples; + if (num_samples < BURSTDOG_SAMPLES) + num_samples++; + else + move_samples--; + if (num_samples) + memmove(×[1], ×[0], sizeof(unsigned int) * move_samples); + struct timespec ts; + clock_gettime(CLOCK_BOOTTIME, &ts); + times[0] = ts.tv_sec * clock_tick + ts.tv_nsec * clock_tick / 1000000000; + + if (lseek(dirfd, 0, SEEK_SET) != 0) { + perror("seek /proc"); + return EXIT_FAILURE; + } + + ssize_t n_dirent = getdents64(dirfd, dirbuffer, DIRBUFFER_SIZE); + if (n_dirent == -1) { + perror("getdents64 /proc"); + return EXIT_FAILURE; + } + + size_t num_pids = 0; + char *dirptr = dirbuffer; + while (dirptr < dirbuffer + n_dirent) { + struct linux_dirent64 *d = (void*) dirptr; + int pid; + if ((d->d_type == DT_DIR || d->d_type == DT_UNKNOWN) && (pid = atoi(d->d_name))) { + if (num_pids == MAX_PROCESS) { + fprintf(stderr, "too many processes\n"); + return EXIT_FAILURE; + } + pids[num_pids++] = pid; + } + dirptr += d->d_reclen; + } + + qsort(pids, num_pids, sizeof(int), &compare_int); + + procs->num = 0; + size_t oldproc_idx = 0; + for (size_t i = 0; i < num_pids; i++) { + struct process *oldproc = NULL; + if (num_samples > 1) { + while (oldproc_idx < oldprocs->num && oldprocs->arr[oldproc_idx].pid < pids[i]) + close(oldprocs->arr[oldproc_idx++].fd); + if (oldproc_idx < oldprocs->num && oldprocs->arr[oldproc_idx].pid == pids[i]) + oldproc = &oldprocs->arr[oldproc_idx++]; + } + + int statfd; + if (oldproc) { + statfd = oldproc->fd; + } else { + char statname[20]; + snprintf(statname, 20, "%d/stat", pids[i]); + + statfd = openat(dirfd, statname, O_RDONLY); + if (statfd == -1) // fail silently + continue; + } + + ssize_t n_read = pread(statfd, statbuffer, BUFSIZ-1, 0); + if (n_read == -1) { + close(statfd); + continue; + } + statbuffer[n_read] = '\0'; + + char *name = nth_word(1, statbuffer, n_read+1); + char *utime = nth_word(13, statbuffer, n_read+1); + char *stime = nth_word(14, statbuffer, n_read+1); + if (!name || !utime || !stime) { + close(statfd); + continue; + } + + struct process *proc = &procs->arr[procs->num++]; + proc->pid = pids[i]; + proc->fd = statfd; + proc->time[0] = atoi(utime) + atoi(stime); + + if (oldproc) { + memcpy(&proc->time[1], &oldproc->time[0], (BURSTDOG_SAMPLES-1) * sizeof(int)); + } else { + for (size_t j = 1; j < BURSTDOG_SAMPLES; j++) + proc->time[j] = proc->time[0]; + } + + if (num_samples == BURSTDOG_SAMPLES) { + unsigned int total = times[0] - times[BURSTDOG_SAMPLES-1]; + unsigned int subset = proc->time[0] - proc->time[BURSTDOG_SAMPLES-1]; + unsigned int share = subset * 100 / total; + if (bursting_pid == proc->pid && share < BURSTDOG_BURST_END) { + bursting_pid = 0; + } else if (bursting_pid != proc->pid && share >= BURSTDOG_BURST) { + time_t time_v = time(NULL); + struct tm time_s; + localtime_r(&time_v, &time_s); + char timebuf[128]; + strftime(timebuf, 128, "%c", &time_s); + + int n_line = snprintf(linebuffer, BUFSIZ, + "%s: %5d %s is bursting: %d of %d ticks (%d%%)\n", + timebuf, proc->pid, name, subset, total, share); + + int n_written = write(logfd, linebuffer, n_line); + (void) n_written; + bursting_pid = proc->pid; + } + } + } + + while (oldproc_idx < oldprocs->num) + close(oldprocs->arr[oldproc_idx++].fd); + + struct process_tab *tmp = oldprocs; + oldprocs = procs; + procs = tmp; + + usleep(1000000/BURSTDOG_IV); + } +} diff --git a/watchdog.c b/watchdog.c deleted file mode 100644 index 0e2d91b..0000000 --- a/watchdog.c +++ /dev/null @@ -1,227 +0,0 @@ -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -// settings -#define WATCHDOG_IV 10 // how often to wake up, per second -#define WATCHDOG_BURST 98 // cpu time percentage considered a burst -#define WATCHDOG_BURST_END 90 // cpu time percentage considered the end of a burst -#define WATCHDOG_SAMPLES 5 // how many samples a burst needs to persist for to be logged - -#if WATCHDOG_SAMPLES < 2 -#error must consider at least 2 samples -#endif - -// arbitrary limits, bump if necessary -#define MAX_PROCESS 4096 -#define DIRBUFFER_SIZE 262144 - -struct process -{ - unsigned int pid; - int fd; - unsigned int time[WATCHDOG_SAMPLES]; -}; - -struct process_tab -{ - size_t num; - struct process arr[MAX_PROCESS]; -}; - -static unsigned int pids[MAX_PROCESS]; -static struct process_tab process_tabs[2]; -static char dirbuffer[DIRBUFFER_SIZE]; -static char statbuffer[BUFSIZ]; -static char linebuffer[BUFSIZ]; - -struct linux_dirent64 { - ino64_t d_ino; /* 64-bit inode number */ - off64_t d_off; /* Not an offset; see getdents() */ - unsigned short d_reclen; /* Size of this dirent */ - unsigned char d_type; /* File type */ - char d_name[]; /* Filename (null-terminated) */ -}; - -static int compare_int(const void *pa, const void *pb) -{ - int a = *(const int *) pa; - int b = *(const int *) pb; - return a > b ? 1 : a == b ? 0 : -1; -} - -static bool is_sep(char c) -{ - return c == ' ' || c == '\0'; -} - -static char *nth_word(unsigned int n, char *ptr, size_t size) -{ - size_t start = 0, pos = 0; - bool paren = false; - for (unsigned int i = 0; i <= n; i++) { - do { - if (pos >= size) return NULL; - if (ptr[pos] == '(') paren = true; - if (ptr[pos] == ')') paren = false; - } while (!is_sep(ptr[pos++]) || paren); - - if (i == n-1) start = pos; - } - ptr[pos-1] = '\0'; - return &ptr[start]; -} - -int main(int argc, char **argv) -{ - long clock_tick = sysconf(_SC_CLK_TCK); - - char *logfile = argc > 1 ? argv[1] : "watchdog.log"; - int logfd = open(logfile, O_WRONLY | O_CREAT | O_APPEND | O_DIRECT, 0644); - if (logfd == -1) { - perror("open logfile"); - return EXIT_FAILURE; - } - - int dirfd = open("/proc", O_RDONLY | O_DIRECTORY); - if (dirfd == -1) { - perror("open /proc"); - return EXIT_FAILURE; - } - - unsigned int times[WATCHDOG_SAMPLES]; - unsigned int num_samples = 0; - unsigned int bursting_pid = 0; - struct process_tab *procs = &process_tabs[0], *oldprocs = &process_tabs[1]; - for (;;) { - size_t move_samples = num_samples; - if (num_samples < WATCHDOG_SAMPLES) - num_samples++; - else - move_samples--; - if (num_samples) - memmove(×[1], ×[0], sizeof(unsigned int) * move_samples); - struct timespec ts; - clock_gettime(CLOCK_BOOTTIME, &ts); - times[0] = ts.tv_sec * clock_tick + ts.tv_nsec * clock_tick / 1000000000; - - if (lseek(dirfd, 0, SEEK_SET) != 0) { - perror("seek /proc"); - return EXIT_FAILURE; - } - - ssize_t n_dirent = getdents64(dirfd, dirbuffer, DIRBUFFER_SIZE); - if (n_dirent == -1) { - perror("getdents64 /proc"); - return EXIT_FAILURE; - } - - size_t num_pids = 0; - char *dirptr = dirbuffer; - while (dirptr < dirbuffer + n_dirent) { - struct linux_dirent64 *d = (void*) dirptr; - int pid; - if ((d->d_type == DT_DIR || d->d_type == DT_UNKNOWN) && (pid = atoi(d->d_name))) { - if (num_pids == MAX_PROCESS) { - fprintf(stderr, "too many processes\n"); - return EXIT_FAILURE; - } - pids[num_pids++] = pid; - } - dirptr += d->d_reclen; - } - - qsort(pids, num_pids, sizeof(int), &compare_int); - - procs->num = 0; - size_t oldproc_idx = 0; - for (size_t i = 0; i < num_pids; i++) { - struct process *oldproc = NULL; - if (num_samples > 1) { - while (oldproc_idx < oldprocs->num && oldprocs->arr[oldproc_idx].pid < pids[i]) - close(oldprocs->arr[oldproc_idx++].fd); - if (oldproc_idx < oldprocs->num && oldprocs->arr[oldproc_idx].pid == pids[i]) - oldproc = &oldprocs->arr[oldproc_idx++]; - } - - int statfd; - if (oldproc) { - statfd = oldproc->fd; - } else { - char statname[20]; - snprintf(statname, 20, "%d/stat", pids[i]); - - statfd = openat(dirfd, statname, O_RDONLY); - if (statfd == -1) // fail silently - continue; - } - - ssize_t n_read = pread(statfd, statbuffer, BUFSIZ-1, 0); - if (n_read == -1) { - close(statfd); - continue; - } - statbuffer[n_read] = '\0'; - - char *name = nth_word(1, statbuffer, n_read+1); - char *utime = nth_word(13, statbuffer, n_read+1); - char *stime = nth_word(14, statbuffer, n_read+1); - if (!name || !utime || !stime) { - close(statfd); - continue; - } - - struct process *proc = &procs->arr[procs->num++]; - proc->pid = pids[i]; - proc->fd = statfd; - proc->time[0] = atoi(utime) + atoi(stime); - - if (oldproc) { - memcpy(&proc->time[1], &oldproc->time[0], (WATCHDOG_SAMPLES-1) * sizeof(int)); - } else { - for (size_t j = 1; j < WATCHDOG_SAMPLES; j++) - proc->time[j] = proc->time[0]; - } - - if (num_samples == WATCHDOG_SAMPLES) { - unsigned int total = times[0] - times[WATCHDOG_SAMPLES-1]; - unsigned int subset = proc->time[0] - proc->time[WATCHDOG_SAMPLES-1]; - unsigned int share = subset * 100 / total; - if (bursting_pid == proc->pid && share < WATCHDOG_BURST_END) { - bursting_pid = 0; - } else if (bursting_pid != proc->pid && share >= WATCHDOG_BURST) { - time_t time_v = time(NULL); - struct tm time_s; - localtime_r(&time_v, &time_s); - char timebuf[128]; - strftime(timebuf, 128, "%c", &time_s); - - int n_line = snprintf(linebuffer, BUFSIZ, - "%s: %5d %s is bursting: %d of %d ticks (%d%%)\n", - timebuf, proc->pid, name, subset, total, share); - - int n_written = write(logfd, linebuffer, n_line); - (void) n_written; - bursting_pid = proc->pid; - } - } - } - - while (oldproc_idx < oldprocs->num) - close(oldprocs->arr[oldproc_idx++].fd); - - struct process_tab *tmp = oldprocs; - oldprocs = procs; - procs = tmp; - - usleep(1000000/WATCHDOG_IV); - } -} -- cgit v1.2.3