1/*
2 * Copyright 2014, NICTA
3 *
4 * This software may be distributed and modified according to the terms of
5 * the BSD 2-Clause license. Note that NO WARRANTY is provided.
6 * See "LICENSE_BSD2.txt" for details.
7 *
8 * @TAG(NICTA_BSD)
9 */
10
11/*
12 * Running 64-bit Isabelle is a dangerous task. In particular, it is liable to
13 * suck up all your RAM and send your system into swap-death on quite a regular
14 * basis.
15 *
16 * This Linux utility will regularly scan the system for signs of swap-death
17 * (i.e., low memory and high pagefault rate and high load average) and send
18 * SIGSTOP to processes suspected of being the cause.
19 *
20 * When it triggers, it will write to syslog stating the process stopped.
21 *
22 * 2012 David Greenaway
23 */
24
25#define _GNU_SOURCE /* for asprintf */
26#include <stdio.h>
27#include <stdlib.h>
28
29#include <sys/types.h>
30#include <sys/sysinfo.h>
31#include <sys/time.h>
32#include <sys/resource.h>
33
34#include <dirent.h>
35#include <unistd.h>
36#include <signal.h>
37#include <syslog.h>
38#include <string.h>
39
40/* A system load-average considered "dangerous". */
41#define DANGEROUS_LOAD 3.0
42
43/* A system load-average considered "very dangerous". */
44#define VERY_DANGEROUS_LOAD 10.0
45
46/* A percentage of RAM considered to be "dangerously low". */
47#define DANGEROUS_FREE_RAM 0.02
48
49/* A number of page-faults per second considered to be heavy swapping. */
50#define DANGEROUS_FAULTS_PER_SECOND 100
51
52/* A minimum Linux OOM score for a process to be considered for stopping. */
53#define MIN_OOM_SCORE 3
54
55/* Number of seconds we sleep between each system probe. */
56#define SLEEP_TIME 5
57
58/* Number of seconds we sleep for after stopping a process before considereing
59 * stopping another. */
60#define SLEEP_AFTER_STOP_SECONDS 15
61
62/* Scheduling priority we should run at. */
63#define SCHED_PRIO (-10)
64
65/* Misc OS constants. */
66#define MAX_PATH_SIZE 1024
67#define MAX_LINE_SIZE 1024
68#define LINUX_SYSINFO_LOADS_SCALE 65536
69
70void
71fatal(const char *str)
72{
73    printf("%s\n", str);
74    exit(1);
75}
76
77/* Get the name of a process from its PID. */
78int
79name_of(int pid, char *output, size_t len) {
80    char *path;
81    if (asprintf(&path, "/proc/%d/cmdline", pid) == -1)
82        return -1;
83
84    /* Open the process's command line details from /proc. */
85    FILE *f = fopen(path, "r");
86    free(path);
87    if (f == NULL)
88        return -1;
89
90    /* Here we potentially read too much, but cmdline entries are NUL delimited
91     * so the resulting data is a valid C string of just the first argument as
92     * desired.
93     */
94    int r = fread(output, len, 1, f);
95    (void)r;
96    fclose(f);
97
98    return 0;
99}
100
101/* Iterate through processes in the system. */
102void
103iterate_processes(char **limit, void (*proc_fn)(int, void *), void *data)
104{
105    /* Open /proc */
106    DIR *proc_dir = opendir("/proc");
107    if (proc_dir == NULL) {
108        fprintf(stderr, "Could not open /proc.");
109        exit(1);
110    }
111
112    /* Read through processes. */
113    while (1) {
114        /* Read directory. */
115        struct dirent *e = readdir(proc_dir);
116        if (e == NULL)
117            break;
118
119        /* Skip non-directories. */
120        if ((e->d_type & DT_DIR) == 0)
121            continue;
122
123        /* Process? */
124        int p = atoi(e->d_name);
125        if (p == 0)
126            continue;
127
128        if (limit != NULL) {
129            int skip = 1;
130
131            /* Find the name of the process we're looking at. */
132            char name[PATH_MAX];
133            if (name_of(p, name, PATH_MAX) != 0)
134                /* This process doesn't have a name. Poor thing. */
135                continue;
136
137            /* Determine if this process matches any of the processes we should
138             * be considering.
139             */
140            char **l;
141            for (l = limit; *l != NULL; l++) {
142                if (!strcmp(name, *l)) {
143                    skip = 0;
144                    break;
145                }
146                char *last_slash = strrchr(name, '/');
147                if (last_slash != NULL && !strcmp(last_slash + 1, *l)) {
148                    skip = 0;
149                    break;
150                }
151            }
152
153            if (skip == 1)
154                /* No match. */
155                continue;
156
157#if DEBUG
158            printf("Considering %s...\n", name);
159#endif
160        }
161
162        proc_fn(p, data);
163    }
164
165    /* Cleanup. */
166    closedir(proc_dir);
167}
168
169struct test_data {
170    int worst_pid;
171    unsigned long worst_oom_score;
172    long long total_faults;
173};
174
175void test_process(int p, void *d)
176{
177    struct test_data *data = d;
178    char buf[MAX_PATH_SIZE];
179    unsigned long oom_score = 0;
180    unsigned long vmem_usage = 0;
181    unsigned long rmem_usage = 0;
182    unsigned long pagefaults = 0;
183    char state;
184    FILE *f;
185    int n;
186
187    /* Read OOM score of process. */
188    sprintf(buf, "/proc/%d/oom_score", p);
189    f = fopen(buf, "r");
190    if (f == NULL)
191        return;
192    n = fscanf(f, "%lu", &oom_score);
193    if (n != 1)
194        fatal("Could not read process oom_score.");
195    fclose(f);
196
197    /* Read memory usage of process. */
198    sprintf(buf, "/proc/%d/statm", p);
199    f = fopen(buf, "r");
200    if (f == NULL)
201        return;
202    n = fscanf(f, "%lu %lu", &vmem_usage, &rmem_usage);
203    if (n != 2)
204        fatal("Could not read process memory usage.");
205    fclose(f);
206
207    /* Read pagefault information about the process. */
208    sprintf(buf, "/proc/%d/stat", p);
209    f = fopen(buf, "r");
210    if (f == NULL)
211        return;
212    n = fscanf(f, "%*d %*s %c %*d %*d %*d %*d %*d %*u %*u %*u %lu", &state, &pagefaults);
213    if (n != 2)
214        fatal("Could not read process stat info.");
215    fclose(f);
216
217    /* Are we in an active_state? */
218    int process_active = (state != 'T' && state != 'Z');
219
220    /* Collate data. */
221    data->total_faults += pagefaults;
222    if (oom_score > data->worst_oom_score && process_active) {
223        data->worst_oom_score = oom_score;
224        data->worst_pid = p;
225    }
226}
227
228static long int
229parse_meminfo_int(char *buf)
230{
231    while (*buf == ' ')
232        buf++;
233    return strtol(buf, NULL, 10);
234}
235
236static void
237get_free_memory(unsigned long *total, unsigned long *free)
238{
239    char buf[MAX_LINE_SIZE];
240    unsigned long memtotal = 0;
241    unsigned long memfree = 0;
242    unsigned long memcached = 0;
243
244    /* Read meminfo file. */
245    FILE *f = fopen("/proc/meminfo", "r");
246    if (f == NULL) {
247        fprintf(stderr, "Could not open /proc/meminfo.");
248        exit(1);
249    }
250
251    while (1) {
252        char *r = fgets(buf, MAX_LINE_SIZE, f);
253        if (r == NULL)
254            break;
255        if (strncmp("MemTotal: ", buf, 10) == 0) {
256            memtotal = parse_meminfo_int(buf + 10);
257        } else if (strncmp("MemFree:  ", buf, 10) == 0) {
258            memfree = parse_meminfo_int(buf + 10);
259        } else if (strncmp("Cached:   ", buf, 10) == 0) {
260            memcached = parse_meminfo_int(buf + 10);
261        }
262    }
263
264    fclose(f);
265    *total = memtotal;
266    *free = memfree + memcached;
267}
268
269int is_system_unstable(
270        long long last_fault_count,
271        long long this_fault_count)
272{
273    struct sysinfo info;
274    int error = sysinfo(&info);
275    if (error)
276        return 0;
277
278    /* Get free RAM. */
279    unsigned long memtotal, memfree;
280    get_free_memory(&memtotal, &memfree);
281    double free_ram = (memfree / (double)memtotal);
282
283    /* Get number of faults. */
284    long long faults = 0;
285    if (last_fault_count > 0)
286        faults = (this_fault_count - last_fault_count);
287
288    /* Get system load. */
289    double system_load = info.loads[0] / (double)LINUX_SYSINFO_LOADS_SCALE;
290
291#if DEBUG
292    /* Print information. */
293    printf("[RAM: %5.1lf] [LOAD: %5.1lf] [FAULTS: %5lld]\n",
294            free_ram * 100.0, system_load, faults);
295#endif
296
297    /* Determine if the system is unstable. */
298    if (free_ram > DANGEROUS_FREE_RAM)
299        return 0;
300    if (system_load < DANGEROUS_LOAD)
301        return 0;
302    if (faults < DANGEROUS_FAULTS_PER_SECOND * SLEEP_TIME && system_load < VERY_DANGEROUS_LOAD)
303        return 0;
304    return 1;
305}
306
307/* Determine what signal the given string parses to. */
308int parse_signal(const char *input, int *signal, const char **signame)
309{
310    if (!strcmp(input, "SIGABRT") || !strcmp(input, "6")) {
311        *signal = SIGABRT;
312        *signame = "SIGABRT";
313        return 0;
314    }
315
316    if (!strcmp(input, "SIGSTOP") || !strcmp(input, "17")) {
317        *signal = SIGSTOP;
318        *signame = "SIGSTOP";
319        return 0;
320    }
321
322    if (!strcmp(input, "SIGTERM") || !strcmp(input, "15")) {
323        *signal = SIGTERM;
324        *signame = "SIGTERM";
325        return 0;
326    }
327
328    if (!strcmp(input, "SIGKILL") || !strcmp(input, "9")) {
329        *signal = SIGKILL;
330        *signame = "SIGKILL";
331        return 0;
332    }
333
334    return 1;
335}
336
337void usage(int argc, char **argv)
338{
339    printf("\n"
340        "usage: %s [<SIGNAL>] [<processes>]\n\n"
341        "Monitors the system for high load and sends a signal to (hopefully)\n"
342        "the culprit process.\n\n"
343        "<SIGNAL> must be either SIGKILL or SIGSTOP.\n"
344        "If you don't pass a list of candidate processes, all are considered.\n\n",
345        argc > 0 ? argv[0] : "autostop");
346}
347
348int main(int argc, char **argv)
349{
350    int skip_count = 0;
351    long long last_fault_count = 0;
352    int signal;
353    const char *signame;
354    char **suspects;
355
356    /* Determine which signal to send. */
357    if (argc < 2) {
358        signal = SIGSTOP;
359        signame = "SIGSTOP";
360    } else {
361        int error = parse_signal(argv[1], &signal, &signame);
362        if (error) {
363            usage(argc, argv);
364            return 1;
365        }
366    }
367
368    /* Determine the list of candidate processes. */
369    suspects = argc > 2 ? &argv[2] : NULL;
370
371    /* Set our scheduling priority higher. */
372    (void)setpriority(PRIO_PROCESS, 0, SCHED_PRIO);
373
374    while (1) {
375        /* Collect data. */
376        struct test_data d = {
377            .worst_pid = -1,
378            .worst_oom_score = MIN_OOM_SCORE,
379            .total_faults = 0,
380        };
381        iterate_processes(suspects, test_process, &d);
382
383        /* Determine if things are looking bad and we haven't recently stoped something. */
384        if (is_system_unstable(last_fault_count, d.total_faults)) {
385            if (d.worst_pid != -1 && skip_count == 0) {
386#if DEBUG
387                printf("Sending %s to pid %d.\n", signame, d.worst_pid);
388#endif
389                int error = kill(d.worst_pid, signal);
390                if (!error) {
391                    syslog(LOG_ALERT,
392                            "auto-stop: Sending %s to pid %d to prevent system melt-down.\n", signame, d.worst_pid);
393                    skip_count = SLEEP_AFTER_STOP_SECONDS / SLEEP_TIME;
394                }
395            }
396        }
397        if (skip_count > 0)
398            skip_count--;
399
400        last_fault_count = d.total_faults;
401        sleep(SLEEP_TIME);
402    }
403
404    return 0;
405}
406