1/*
2 * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23package org.graalvm.compiler.hotspot;
24
25import static org.graalvm.compiler.hotspot.HotSpotGraalCompiler.fmt;
26
27import java.util.Arrays;
28
29import org.graalvm.compiler.debug.TTY;
30import org.graalvm.compiler.options.Option;
31import org.graalvm.compiler.options.OptionKey;
32import org.graalvm.compiler.options.OptionType;
33import org.graalvm.compiler.options.OptionValues;
34
35import jdk.vm.ci.meta.ResolvedJavaMethod;
36
37/**
38 * A watch dog for reporting long running compilations. This is designed to be an always on
39 * mechanism for the purpose of getting better reports from customer sites. As such, it only exits
40 * the VM when it is very sure about a stuck compilation as opposed to only observing a long running
41 * compilation. In both cases, it logs messages to {@link TTY}.
42 *
43 * A watch dog thread is associated with each compiler thread. It wakes up every
44 * {@value #SPIN_TIMEOUT_MS} milliseconds to observe the state of the compiler thread. After the
45 * first {@link Options#CompilationWatchDogStartDelay} seconds of a specific compilation, the watch
46 * dog reports a long running compilation. Every
47 * {@link Options#CompilationWatchDogStackTraceInterval} seconds after that point in time where the
48 * same compilation is still executing, the watch dog takes a stack trace of the compiler thread. If
49 * more than {@value Options#NonFatalIdenticalCompilationSnapshots} contiguous identical stack
50 * traces are seen, the watch dog reports a stuck compilation and exits the VM.
51 */
52class CompilationWatchDog extends Thread implements AutoCloseable {
53
54    public static class Options {
55        // @formatter:off
56        @Option(help = "Delay in seconds before watch dog monitoring a compilation (0 disables monitoring).", type = OptionType.Debug)
57        public static final OptionKey<Double> CompilationWatchDogStartDelay = new OptionKey<>(0.0D);
58        @Option(help = "Interval in seconds between a watch dog reporting stack traces for long running compilations.", type = OptionType.Debug)
59        public static final OptionKey<Double> CompilationWatchDogStackTraceInterval = new OptionKey<>(60.0D);
60        @Option(help = "Number of contiguous identical compiler thread stack traces allowed before the VM exits " +
61                       "on the basis of a stuck compilation.", type = OptionType.Debug)
62        public static final OptionKey<Integer> NonFatalIdenticalCompilationSnapshots = new OptionKey<>(20);
63        // @formatter:on
64    }
65
66    private enum WatchDogState {
67        /**
68         * The watch dog thread sleeps currently, either no method is currently compiled, or no
69         * method is compiled long enough to be monitored.
70         */
71        SLEEPING,
72        /**
73         * The watch dog thread identified a compilation that already takes long enough to be
74         * interesting. It will sleep and wake up periodically and check if the current compilation
75         * takes too long. If it takes too long it will start collecting stack traces from the
76         * compiler thread.
77         */
78        WATCHING_WITHOUT_STACK_INSPECTION,
79        /**
80         * The watch dog thread is fully monitoring the compiler thread. It takes stack traces
81         * periodically and sleeps again until the next period. If the number of stack traces
82         * reaches a certain upper bound and those stack traces are equal it will shut down the
83         * entire VM with an error.
84         */
85        WATCHING_WITH_STACK_INSPECTION
86    }
87
88    /**
89     * The number of milliseconds a watch dog thread sleeps between observing the state of the
90     * compilation thread it is associated with. Most compilations are expected to complete within
91     * this time period and thus not be actively monitored by the watch dog.
92     */
93    private static final int SPIN_TIMEOUT_MS = 1000;
94
95    private WatchDogState state = WatchDogState.SLEEPING;
96    private final Thread compilerThread;
97    private final long startDelayMilliseconds;
98    private final long stackTraceIntervalMilliseconds;
99    private final int nonFatalIdenticalCompilationSnapshots;
100    private volatile ResolvedJavaMethod currentMethod;
101    private volatile int currentId;
102    private ResolvedJavaMethod lastWatched;
103
104    // The 4 fields below are for a single compilation being watched
105    private long elapsed;
106    private int traceIntervals;
107    private int numberOfIdenticalStackTraces;
108    private StackTraceElement[] lastStackTrace;
109
110    CompilationWatchDog(Thread compilerThread, long startDelayMilliseconds, long stackTraceIntervalMilliseconds, int nonFatalIdenticalCompilationSnapshots) {
111        this.compilerThread = compilerThread;
112        this.setName("WatchDog" + getId() + "[" + compilerThread.getName() + "]");
113        this.setPriority(Thread.MAX_PRIORITY);
114        this.setDaemon(true);
115        this.startDelayMilliseconds = startDelayMilliseconds;
116        this.stackTraceIntervalMilliseconds = stackTraceIntervalMilliseconds;
117        this.nonFatalIdenticalCompilationSnapshots = nonFatalIdenticalCompilationSnapshots;
118    }
119
120    public void startCompilation(ResolvedJavaMethod method, int id) {
121        trace("start %s", fmt(method));
122        this.currentMethod = method;
123        this.currentId = id;
124    }
125
126    public void stopCompilation() {
127        trace(" stop %s", fmt(currentMethod));
128        this.currentMethod = null;
129    }
130
131    private void reset() {
132        elapsed = 0;
133        traceIntervals = 0;
134        numberOfIdenticalStackTraces = 0;
135        lastWatched = null;
136        lastStackTrace = null;
137        state = WatchDogState.SLEEPING;
138    }
139
140    private void tick(WatchDogState newState) {
141        state = newState;
142    }
143
144    /**
145     * Saves the current stack trace {@link StackTraceElement} of the monitored compiler thread
146     * {@link CompilationWatchDog#compilerThread}.
147     *
148     * @param newStackTrace the current stack trace of the monitored compiler thread
149     * @return {@code true} if the stack trace is equal to the last stack trace (or if it is the
150     *         first one) and {@code false} if it is not equal to the last one.
151     */
152    private boolean recordStackTrace(StackTraceElement[] newStackTrace) {
153        if (lastStackTrace == null) {
154            lastStackTrace = newStackTrace;
155            return true;
156        }
157        if (!Arrays.equals(lastStackTrace, newStackTrace)) {
158            lastStackTrace = newStackTrace;
159            return false;
160        }
161        return true;
162    }
163
164    /**
165     * Set to true to debug the watch dog.
166     */
167    private static final boolean DEBUG = Boolean.getBoolean("debug.graal.CompilationWatchDog");
168
169    private void trace(String format, Object... args) {
170        if (DEBUG) {
171            TTY.println(this + ": " + String.format(format, args));
172        }
173    }
174
175    private static long ms(double seconds) {
176        return (long) seconds * 1000;
177    }
178
179    private static double secs(long ms) {
180        return (double) ms / 1000;
181    }
182
183    @Override
184    public String toString() {
185        return getName();
186    }
187
188    @Override
189    public void run() {
190        try {
191            trace("Started%n", this);
192            while (true) {
193                // get a copy of the last set method
194                final ResolvedJavaMethod currentlyCompiling = currentMethod;
195                if (currentlyCompiling == null) {
196                    // continue sleeping, compilation is either over before starting
197                    // to watch the compiler thread or no compilation at all started
198                    reset();
199                } else {
200                    switch (state) {
201                        case SLEEPING:
202                            lastWatched = currentlyCompiling;
203                            elapsed = 0;
204                            tick(WatchDogState.WATCHING_WITHOUT_STACK_INSPECTION);
205                            break;
206                        case WATCHING_WITHOUT_STACK_INSPECTION:
207                            if (currentlyCompiling.equals(lastWatched)) {
208                                if (elapsed >= startDelayMilliseconds) {
209                                    // we looked at the same compilation for a certain time
210                                    // so now we start to collect stack traces
211                                    tick(WatchDogState.WATCHING_WITH_STACK_INSPECTION);
212                                    trace("changes mode to watching with stack traces");
213                                } else {
214                                    // we still compile the same method but won't collect traces
215                                    // yet
216                                    trace("watching without stack traces [%.2f seconds]", secs(elapsed));
217                                }
218                                elapsed += SPIN_TIMEOUT_MS;
219                            } else {
220                                // compilation finished before we exceeded initial watching
221                                // period
222                                reset();
223                            }
224                            break;
225                        case WATCHING_WITH_STACK_INSPECTION:
226                            if (currentlyCompiling.equals(lastWatched)) {
227                                if (elapsed >= startDelayMilliseconds + (traceIntervals * stackTraceIntervalMilliseconds)) {
228                                    trace("took a stack trace");
229                                    boolean newStackTrace = recordStackTrace(compilerThread.getStackTrace());
230                                    if (!newStackTrace) {
231                                        trace("%d identical stack traces in a row", numberOfIdenticalStackTraces);
232                                        numberOfIdenticalStackTraces = 0;
233                                    }
234                                    numberOfIdenticalStackTraces++;
235                                    if (numberOfIdenticalStackTraces > nonFatalIdenticalCompilationSnapshots) {
236                                        synchronized (CompilationWatchDog.class) {
237                                            TTY.printf("======================= WATCH DOG THREAD =======================%n" +
238                                                            "%s took %d identical stack traces, which indicates a stuck compilation (id=%d) of %s%n%sExiting VM%n", this,
239                                                            numberOfIdenticalStackTraces, currentId, fmt(currentMethod), fmt(lastStackTrace));
240                                            System.exit(-1);
241                                        }
242                                    } else if (newStackTrace) {
243                                        synchronized (CompilationWatchDog.class) {
244                                            TTY.printf("======================= WATCH DOG THREAD =======================%n" +
245                                                            "%s detected long running compilation (id=%d) of %s [%.2f seconds]%n%s", this, currentId, fmt(currentMethod),
246                                                            secs(elapsed), fmt(lastStackTrace));
247                                        }
248                                    }
249                                    traceIntervals++;
250                                } else {
251                                    // we still watch the compilation in the same trace interval
252                                    trace("watching with stack traces [%.2f seconds]", secs(elapsed));
253                                }
254                                elapsed += SPIN_TIMEOUT_MS;
255                            } else {
256                                // compilation finished before we are able to collect stack
257                                // traces
258                                reset();
259                            }
260                            break;
261                        default:
262                            break;
263                    }
264                }
265                try {
266                    Thread.sleep(SPIN_TIMEOUT_MS);
267                } catch (InterruptedException e) {
268                    // Silently swallow
269                }
270            }
271        } catch (VirtualMachineError vmError) {
272            /*
273             * We encounter a VM error. This includes for example OutOfMemoryExceptions. In such a
274             * case we silently swallow the error. If it happens again the application thread will
275             * most likely encounter the same problem. If not the watchdog thread will no longer
276             * monitor the compilation and thus the error cannot happen again.
277             */
278        } catch (Throwable t) {
279            /*
280             * A real exception happened on the compilation watchdog. This is unintended behavior
281             * and must not happen in any case.
282             */
283            throw new InternalError(String.format("%s encountered an exception%n%s%n", this, fmt(t)), t);
284        }
285    }
286
287    private static final ThreadLocal<CompilationWatchDog> WATCH_DOGS = new ThreadLocal<>();
288
289    /**
290     * Opens a scope for watching the compilation of a given method.
291     *
292     * @param method a method about to be compiled
293     * @param id compilation request identifier
294     * @return {@code null} if the compilation watch dog is disabled otherwise this object. The
295     *         returned value should be used in a {@code try}-with-resources statement whose scope
296     *         is the whole compilation so that leaving the scope will cause {@link #close()} to be
297     *         called.
298     */
299    static CompilationWatchDog watch(ResolvedJavaMethod method, int id, OptionValues options) {
300        long startDelayMilliseconds = ms(Options.CompilationWatchDogStartDelay.getValue(options));
301        if (startDelayMilliseconds > 0.0D) {
302            // Lazily get a watch dog thread for the current compiler thread
303            CompilationWatchDog watchDog = WATCH_DOGS.get();
304            if (watchDog == null) {
305                Thread currentThread = currentThread();
306                long stackTraceIntervalMilliseconds = ms(Options.CompilationWatchDogStackTraceInterval.getValue(options));
307                int nonFatalIdenticalCompilationSnapshots = Options.NonFatalIdenticalCompilationSnapshots.getValue(options);
308                watchDog = new CompilationWatchDog(currentThread, startDelayMilliseconds, stackTraceIntervalMilliseconds, nonFatalIdenticalCompilationSnapshots);
309                WATCH_DOGS.set(watchDog);
310                watchDog.start();
311            }
312            watchDog.startCompilation(method, id);
313            return watchDog;
314        }
315        return null;
316    }
317
318    @Override
319    public void close() {
320        stopCompilation();
321    }
322}
323