1/*
2 * Copyright 2020, Data61, CSIRO (ABN 41 687 119 230)
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7/*
8 * Running 64-bit Isabelle is a dangerous task. In particular, it is liable to
9 * suck up all your RAM and send your system into swap-death on quite a regular
10 * basis.
11 *
12 * This Linux utility will regularly scan the system for signs of swap-death
13 * (i.e., low memory and high pagefault rate and high load average) and send
14 * SIGSTOP to processes suspected of being the cause.
15 *
16 * When it triggers, it will write to syslog stating the process stopped.
17 *
18 * 2012 David Greenaway
19 */
20
21#define _GNU_SOURCE /* for asprintf */
22#include <stdio.h>
23#include <stdlib.h>
24
25#include <sys/types.h>
26#include <sys/sysinfo.h>
27#include <sys/time.h>
28#include <sys/resource.h>
29
30#include <dirent.h>
31#include <unistd.h>
32#include <signal.h>
33#include <syslog.h>
34#include <string.h>
35
36/* A system load-average considered "dangerous". */
37#define DANGEROUS_LOAD 3.0
38
39/* A system load-average considered "very dangerous". */
40#define VERY_DANGEROUS_LOAD 10.0
41
42/* A percentage of RAM considered to be "dangerously low". */
43#define DANGEROUS_FREE_RAM 0.02
44
45/* A number of page-faults per second considered to be heavy swapping. */
46#define DANGEROUS_FAULTS_PER_SECOND 100
47
48/* A minimum Linux OOM score for a process to be considered for stopping. */
49#define MIN_OOM_SCORE 3
50
51/* Number of seconds we sleep between each system probe. */
52#define SLEEP_TIME 5
53
54/* Number of seconds we sleep for after stopping a process before considereing
55 * stopping another. */
56#define SLEEP_AFTER_STOP_SECONDS 15
57
58/* Scheduling priority we should run at. */
59#define SCHED_PRIO (-10)
60
61/* Misc OS constants. */
62#define MAX_PATH_SIZE 1024
63#define MAX_LINE_SIZE 1024
64#define LINUX_SYSINFO_LOADS_SCALE 65536
65
66void fatal(const char *str)
67{
68    printf("%s\n", str);
69    exit(1);
70}
71
72/* Get the name of a process from its PID. */
73int name_of(int pid, char *output, size_t len)
74{
75    char *path;
76    if (asprintf(&path, "/proc/%d/cmdline", pid) == -1) {
77        return -1;
78    }
79
80    /* Open the process's command line details from /proc. */
81    FILE *f = fopen(path, "r");
82    free(path);
83    if (f == NULL) {
84        return -1;
85    }
86
87    /* Here we potentially read too much, but cmdline entries are NUL delimited
88     * so the resulting data is a valid C string of just the first argument as
89     * desired.
90     */
91    int r = fread(output, len, 1, f);
92    (void)r;
93    fclose(f);
94
95    return 0;
96}
97
98/* Iterate through processes in the system. */
99void iterate_processes(char **limit, void (*proc_fn)(int, void *), void *data)
100{
101    /* Open /proc */
102    DIR *proc_dir = opendir("/proc");
103    if (proc_dir == NULL) {
104        fprintf(stderr, "Could not open /proc.");
105        exit(1);
106    }
107
108    /* Read through processes. */
109    while (1) {
110        /* Read directory. */
111        struct dirent *e = readdir(proc_dir);
112        if (e == NULL) {
113            break;
114        }
115
116        /* Skip non-directories. */
117        if ((e->d_type & DT_DIR) == 0) {
118            continue;
119        }
120
121        /* Process? */
122        int p = atoi(e->d_name);
123        if (p == 0) {
124            continue;
125        }
126
127        if (limit != NULL) {
128            int skip = 1;
129
130            /* Find the name of the process we're looking at. */
131            char name[PATH_MAX];
132            if (name_of(p, name, PATH_MAX) != 0)
133                /* This process doesn't have a name. Poor thing. */
134            {
135                continue;
136            }
137
138            /* Determine if this process matches any of the processes we should
139             * be considering.
140             */
141            char **l;
142            for (l = limit; *l != NULL; l++) {
143                if (!strcmp(name, *l)) {
144                    skip = 0;
145                    break;
146                }
147                char *last_slash = strrchr(name, '/');
148                if (last_slash != NULL && !strcmp(last_slash + 1, *l)) {
149                    skip = 0;
150                    break;
151                }
152            }
153
154            if (skip == 1)
155                /* No match. */
156            {
157                continue;
158            }
159
160#if DEBUG
161            printf("Considering %s...\n", name);
162#endif
163        }
164
165        proc_fn(p, data);
166    }
167
168    /* Cleanup. */
169    closedir(proc_dir);
170}
171
172struct test_data {
173    int worst_pid;
174    unsigned long worst_oom_score;
175    long long total_faults;
176};
177
178void test_process(int p, void *d)
179{
180    struct test_data *data = d;
181    char buf[MAX_PATH_SIZE];
182    unsigned long oom_score = 0;
183    unsigned long vmem_usage = 0;
184    unsigned long rmem_usage = 0;
185    unsigned long pagefaults = 0;
186    char state;
187    FILE *f;
188    int n;
189
190    /* Read OOM score of process. */
191    sprintf(buf, "/proc/%d/oom_score", p);
192    f = fopen(buf, "r");
193    if (f == NULL) {
194        return;
195    }
196    n = fscanf(f, "%lu", &oom_score);
197    if (n != 1) {
198        fatal("Could not read process oom_score.");
199    }
200    fclose(f);
201
202    /* Read memory usage of process. */
203    sprintf(buf, "/proc/%d/statm", p);
204    f = fopen(buf, "r");
205    if (f == NULL) {
206        return;
207    }
208    n = fscanf(f, "%lu %lu", &vmem_usage, &rmem_usage);
209    if (n != 2) {
210        fatal("Could not read process memory usage.");
211    }
212    fclose(f);
213
214    /* Read pagefault information about the process. */
215    sprintf(buf, "/proc/%d/stat", p);
216    f = fopen(buf, "r");
217    if (f == NULL) {
218        return;
219    }
220    n = fscanf(f, "%*d %*s %c %*d %*d %*d %*d %*d %*u %*u %*u %lu", &state, &pagefaults);
221    if (n != 2) {
222        fatal("Could not read process stat info.");
223    }
224    fclose(f);
225
226    /* Are we in an active_state? */
227    int process_active = (state != 'T' && state != 'Z');
228
229    /* Collate data. */
230    data->total_faults += pagefaults;
231    if (oom_score > data->worst_oom_score && process_active) {
232        data->worst_oom_score = oom_score;
233        data->worst_pid = p;
234    }
235}
236
237static long int parse_meminfo_int(char *buf)
238{
239    while (*buf == ' ') {
240        buf++;
241    }
242    return strtol(buf, NULL, 10);
243}
244
245static void get_free_memory(unsigned long *total, unsigned long *free)
246{
247    char buf[MAX_LINE_SIZE];
248    unsigned long memtotal = 0;
249    unsigned long memfree = 0;
250    unsigned long memcached = 0;
251
252    /* Read meminfo file. */
253    FILE *f = fopen("/proc/meminfo", "r");
254    if (f == NULL) {
255        fprintf(stderr, "Could not open /proc/meminfo.");
256        exit(1);
257    }
258
259    while (1) {
260        char *r = fgets(buf, MAX_LINE_SIZE, f);
261        if (r == NULL) {
262            break;
263        }
264        if (strncmp("MemTotal: ", buf, 10) == 0) {
265            memtotal = parse_meminfo_int(buf + 10);
266        } else if (strncmp("MemFree:  ", buf, 10) == 0) {
267            memfree = parse_meminfo_int(buf + 10);
268        } else if (strncmp("Cached:   ", buf, 10) == 0) {
269            memcached = parse_meminfo_int(buf + 10);
270        }
271    }
272
273    fclose(f);
274    *total = memtotal;
275    *free = memfree + memcached;
276}
277
278int is_system_unstable(
279    long long last_fault_count,
280    long long this_fault_count)
281{
282    struct sysinfo info;
283    int error = sysinfo(&info);
284    if (error) {
285        return 0;
286    }
287
288    /* Get free RAM. */
289    unsigned long memtotal, memfree;
290    get_free_memory(&memtotal, &memfree);
291    double free_ram = (memfree / (double)memtotal);
292
293    /* Get number of faults. */
294    long long faults = 0;
295    if (last_fault_count > 0) {
296        faults = (this_fault_count - last_fault_count);
297    }
298
299    /* Get system load. */
300    double system_load = info.loads[0] / (double)LINUX_SYSINFO_LOADS_SCALE;
301
302#if DEBUG
303    /* Print information. */
304    printf("[RAM: %5.1lf] [LOAD: %5.1lf] [FAULTS: %5lld]\n",
305           free_ram * 100.0, system_load, faults);
306#endif
307
308    /* Determine if the system is unstable. */
309    if (free_ram > DANGEROUS_FREE_RAM) {
310        return 0;
311    }
312    if (system_load < DANGEROUS_LOAD) {
313        return 0;
314    }
315    if (faults < DANGEROUS_FAULTS_PER_SECOND * SLEEP_TIME && system_load < VERY_DANGEROUS_LOAD) {
316        return 0;
317    }
318    return 1;
319}
320
321/* Determine what signal the given string parses to. */
322int parse_signal(const char *input, int *signal, const char **signame)
323{
324    if (!strcmp(input, "SIGABRT") || !strcmp(input, "6")) {
325        *signal = SIGABRT;
326        *signame = "SIGABRT";
327        return 0;
328    }
329
330    if (!strcmp(input, "SIGSTOP") || !strcmp(input, "17")) {
331        *signal = SIGSTOP;
332        *signame = "SIGSTOP";
333        return 0;
334    }
335
336    if (!strcmp(input, "SIGTERM") || !strcmp(input, "15")) {
337        *signal = SIGTERM;
338        *signame = "SIGTERM";
339        return 0;
340    }
341
342    if (!strcmp(input, "SIGKILL") || !strcmp(input, "9")) {
343        *signal = SIGKILL;
344        *signame = "SIGKILL";
345        return 0;
346    }
347
348    return 1;
349}
350
351void usage(int argc, char **argv)
352{
353    printf("\n"
354           "usage: %s [<SIGNAL>] [<processes>]\n\n"
355           "Monitors the system for high load and sends a signal to (hopefully)\n"
356           "the culprit process.\n\n"
357           "<SIGNAL> must be either SIGKILL or SIGSTOP.\n"
358           "If you don't pass a list of candidate processes, all are considered.\n\n",
359           argc > 0 ? argv[0] : "autostop");
360}
361
362int main(int argc, char **argv)
363{
364    int skip_count = 0;
365    long long last_fault_count = 0;
366    int signal;
367    const char *signame;
368    char **suspects;
369
370    /* Determine which signal to send. */
371    if (argc < 2) {
372        signal = SIGSTOP;
373        signame = "SIGSTOP";
374    } else {
375        int error = parse_signal(argv[1], &signal, &signame);
376        if (error) {
377            usage(argc, argv);
378            return 1;
379        }
380    }
381
382    /* Determine the list of candidate processes. */
383    suspects = argc > 2 ? &argv[2] : NULL;
384
385    /* Set our scheduling priority higher. */
386    (void)setpriority(PRIO_PROCESS, 0, SCHED_PRIO);
387
388    while (1) {
389        /* Collect data. */
390        struct test_data d = {
391            .worst_pid = -1,
392            .worst_oom_score = MIN_OOM_SCORE,
393            .total_faults = 0,
394        };
395        iterate_processes(suspects, test_process, &d);
396
397        /* Determine if things are looking bad and we haven't recently stoped something. */
398        if (is_system_unstable(last_fault_count, d.total_faults)) {
399            if (d.worst_pid != -1 && skip_count == 0) {
400#if DEBUG
401                printf("Sending %s to pid %d.\n", signame, d.worst_pid);
402#endif
403                int error = kill(d.worst_pid, signal);
404                if (!error) {
405                    syslog(LOG_ALERT,
406                           "auto-stop: Sending %s to pid %d to prevent system melt-down.\n", signame, d.worst_pid);
407                    skip_count = SLEEP_AFTER_STOP_SECONDS / SLEEP_TIME;
408                }
409            }
410        }
411        if (skip_count > 0) {
412            skip_count--;
413        }
414
415        last_fault_count = d.total_faults;
416        sleep(SLEEP_TIME);
417    }
418
419    return 0;
420}
421