1/* record_replay.h                  -*-C++-*-
2 *
3 *************************************************************************
4 *
5 *  @copyright
6 *  Copyright (C) 2012-2013, Intel Corporation
7 *  All rights reserved.
8 *
9 *  @copyright
10 *  Redistribution and use in source and binary forms, with or without
11 *  modification, are permitted provided that the following conditions
12 *  are met:
13 *
14 *    * Redistributions of source code must retain the above copyright
15 *      notice, this list of conditions and the following disclaimer.
16 *    * Redistributions in binary form must reproduce the above copyright
17 *      notice, this list of conditions and the following disclaimer in
18 *      the documentation and/or other materials provided with the
19 *      distribution.
20 *    * Neither the name of Intel Corporation nor the names of its
21 *      contributors may be used to endorse or promote products derived
22 *      from this software without specific prior written permission.
23 *
24 *  @copyright
25 *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 *  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 *  HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
30 *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
31 *  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
32 *  OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
33 *  AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
35 *  WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 *  POSSIBILITY OF SUCH DAMAGE.
37 *
38 **************************************************************************/
39
40/**
41 * @file record-replay.h
42 *
43 * @brief record-replay.h and .cpp encapsulate most of the functionality to
44 * record and play back a Cilk Plus application.
45 *
46 * Recording is directed by the setting of the CILK_RECORD_LOG environment
47 * variable.  If it's defined, the value specifies the root we'll use to
48 * generate files for each worker using the following format string:
49 * "%s%d.cilklog", where the integer is the value of w->self.
50 *
51 * Replay is directed by the setting of the CILK_REPLAY_LOG environment
52 * variable, interpreted the same way as CILK_RECORD_LOG.  If both
53 * CILK_RECORD_LOG and CILK_REPLAY_LOG are defined, a warning will be given
54 * and the attempt to record a log will be ignored.
55 *
56 * Recording is relatively straightforward.  We write all information about a
57 * worker to a per-worker file.
58 *
59 * Each pedigree record consists of the following fields.  All fields must be
60 * present in every record to make parsing easy.
61 *    - Type - A string identifying the pedigree record.  See the PED_TYPE_STR_
62 *      macros for the currently defined values.
63 *    - Pedigree - A string of pedigree values, with underscores between
64 *      adjacent values.
65 *    - i1 - Record type-specific value.  -1 if not used.
66 *    - i2 - Record type-specific value.  -1 if not used.
67 *
68 * WORKERS record - only written to the file for worker 0.  Note that this is
69 * the first worker in the workers array. Worker 0 is the first system worker,
70 * *NOT* a user worker.
71 *  - Type: "Workers"
72 *  - Pedigree: Always "0" - ignored
73 *  - i1: Number of workers (g->P) when we recorded the log.  A mismatch when
74 *        we attempt to replay the log will result in aborting the execution.
75 *  - i2: Log version number - Specified by PED_VERSION in record-replay.cpp
76 *
77 * STEAL record - written after a successful steal.
78 *  - Type: "Steal"
79 *  - Pedigree: Pedigree of stolen frame
80 *  - i1: Worker the frame was stolen from
81 *  - i2: -1
82 *
83 * SYNC record - written after a worker continues from a sync.
84 *  - Type: "Sync"
85 *  - Pedigree: Pedigree of sync.  Note that this is the pedigree *before*
86 *        the pedigree in incremented in setup_for_execution_pedigree().
87 *  - i1: -1
88 *  - i2: -1
89 *
90 * ORPHANED record - saved on a return to a stolen parent.
91 *  - Type: "Orphaned"
92 *  - Pedigree: Pedigree of the parent frame *before* the pedigree is
93 *        incremented by the return
94 *  - i1: -1
95 *  - i2: -1
96 *
97 * On replay, the data is loaded into a per-worker array, and the data is
98 * consumed in order as needed.
99 */
100
101#ifndef INCLUDED_RECORD_REPLAY_DOT_H
102#define INCLUDED_RECORD_REPLAY_DOT_H
103
104#include "cilk/common.h"
105#include "global_state.h"
106
107/**
108 * Define CILK_RECORD_REPLAY to enable record/replay functionality.  If
109 * CILK_RECORD_REPLAY is not defined, all of the record/replay functions in
110 * record-replay.h will be stubbed out.  Since they're declared as inline,
111 * functions, the resulting build should have no performance impact due to
112 * the implementation or record/replay.
113 */
114 #define CILK_RECORD_REPLAY 1
115
116/**
117 * Define RECORD_ON_REPLAY=1 to write logs when we're replaying a log.  This
118 * should only be needed when debugging the replay functionality.  This should
119 * always be defined as 0 when record-replay.h is checked in.
120 */
121#define RECORD_ON_REPLAY 0
122
123__CILKRTS_BEGIN_EXTERN_C
124
125#ifdef CILK_RECORD_REPLAY
126// Declarations of internal record/replay functions.  The inlined versions
127// further down do some preliminary testing (like if we're not recording or
128// replaying) and will stub out the functionality if we've compiled out the
129// record/replay feature
130int replay_match_sync_pedigree_internal(__cilkrts_worker *w);
131void replay_wait_for_steal_if_parent_was_stolen_internal(__cilkrts_worker *w);
132void replay_record_steal_internal(__cilkrts_worker *w, int32_t victim_id);
133void replay_record_sync_internal(__cilkrts_worker *w);
134void replay_record_orphaned_internal(__cilkrts_worker *w);
135int replay_match_victim_pedigree_internal(__cilkrts_worker *w, __cilkrts_worker *victim);
136void replay_advance_from_sync_internal (__cilkrts_worker *w);
137int replay_get_next_recorded_victim_internal(__cilkrts_worker *w);
138#endif //  CILK_RECORD_REPLAY
139
140// Publically defined record/replay API
141
142/**
143 * If we're replaying a log, wait for our parent to be stolen if it was when
144 * the log was recorded.  If record/replay is compiled out, this is a noop.
145 *
146 * @param w The __cilkrts_worker we're executing on.  The worker's replay
147 * list will be checked for a ORPHANED record with a matching pedigree.  If
148 * there is a match, the ORPHANED record will be consumed.
149 */
150#ifdef CILK_RECORD_REPLAY
151__CILKRTS_INLINE
152void replay_wait_for_steal_if_parent_was_stolen(__cilkrts_worker *w)
153{
154    // Only check if we're replaying a log
155    if (REPLAY_LOG == w->g->record_or_replay)
156        replay_wait_for_steal_if_parent_was_stolen_internal(w);
157}
158#else
159__CILKRTS_INLINE
160void replay_wait_for_steal_if_parent_was_stolen(__cilkrts_worker *w)
161{
162    // If record/replay is disabled, we never wait
163}
164#endif //  CILK_RECORD_REPLAY
165
166/**
167 * Called from random_steal() to override the ID of the randomly chosen victim
168 * worker which this worker will attempt to steal from. Returns the worker id
169 * of the next victim this worker was recorded stealing from, or -1 if the
170 * next record in the log is not a STEAL.
171 *
172 * @note This call does NOT attempt to match the pedigree.  That will be done
173 * by replay_match_victim_pedigree() after random_steal() has locked the victim
174 * worker.
175 *
176 * @param w The __cilkrts_worker we're executing on.  The worker's replay log
177 * is checked for a STEAL record.  If we've got one, the stolen worker ID is
178 * returned.
179 * @param id The randomly chosen victim worker ID.  If we're not replaying a
180 * log, or if record/replay has been compiled out, this is the value that
181 * will be returned.
182 *
183 * @return id if we're not replaying a log
184 * @return -1 if the next record is not a STEAL
185 * @return recorded stolen worker ID if we've got a matching STEAL record
186 */
187#ifdef CILK_RECORD_REPLAY
188__CILKRTS_INLINE
189int replay_get_next_recorded_victim(__cilkrts_worker *w, int id)
190{
191    // Only check if we're replaying a log
192    if (REPLAY_LOG == w->g->record_or_replay)
193        return replay_get_next_recorded_victim_internal(w);
194    else
195        return id;
196}
197#else
198__CILKRTS_INLINE
199int replay_get_next_recorded_victim(__cilkrts_worker *w, int id)
200{
201    // Record/replay is disabled.  Always return the original worker id
202    return id;
203}
204#endif //  CILK_RECORD_REPLAY
205
206/**
207 * Initialize per-worker data for record/replay.  A noop if record/replay
208 * is disabled, or if we're not recording or replaying anything.
209 *
210 * If we're recording a log, this will ready us to create the per-worker
211 * logs.
212 *
213 * If we're replaying a log, this will read the logs into the per-worker
214 * structures.
215 *
216 * @param g Cilk runtime global state
217 */
218void replay_init_workers(global_state_t *g);
219
220/**
221 * Record a record on a successful steal.  A noop if record/replay is
222 * diabled, or if we're not recording anything
223 *
224 * @param w The __cilkrts_worker we're executing on.  The pedigree of
225 * the stolen frame will be walked to generate the STEAL record.
226 *
227 * @param victim_id The worker ID of the worker w stole from.
228 */
229#ifdef CILK_RECORD_REPLAY
230__CILKRTS_INLINE
231void replay_record_steal(__cilkrts_worker *w, int32_t victim_id)
232{
233#if RECORD_ON_REPLAY
234    // If we're recording on replay, write the record if we're recording or
235    // replaying
236    if (RECORD_REPLAY_NONE == w->g->record_or_replay)
237        return;
238#else
239    // Only write the record if we're recording
240    if (RECORD_LOG != w->g->record_or_replay)
241        return;
242#endif
243
244    replay_record_steal_internal(w, victim_id);
245}
246#else
247__CILKRTS_INLINE
248void replay_record_steal(__cilkrts_worker *w, int32_t victim_id)
249{
250}
251#endif //  CILK_RECORD_REPLAY
252
253/**
254 * Record a record when continuing after a sync.  A noop if record/replay is
255 * diabled, or if we're not recording anything, or if the sync was abandoned,
256 * meaning this isn't the worker that continues from the sync.
257 *
258 * @param w The __cilkrts_worker for we're executing on.  The pedigree of
259 * the sync-ing frame will be walked to generate the SYNC record.
260 *
261 * @param continuing True if this worker will be continuing from the
262 * cilk_sync.  A SYNC record will only be generated if continuing is true.
263 */
264#ifdef CILK_RECORD_REPLAY
265__CILKRTS_INLINE
266void replay_record_sync(__cilkrts_worker *w, int continuing)
267{
268    // If this was not the last worker to the syn, return
269    if (! continuing)
270        return;
271
272#if RECORD_ON_REPLAY
273    // If we're recording on replay, write the record if we're recording or
274    // replaying
275    if (RECORD_REPLAY_NONE == w->g->record_or_replay)
276        return;
277#else
278    // Only write the record if we're recording
279    if (RECORD_LOG != w->g->record_or_replay)
280        return;
281#endif
282
283    replay_record_sync_internal(w);
284}
285#else
286__CILKRTS_INLINE
287void replay_record_sync(__cilkrts_worker *w, int abandoned)
288{
289}
290#endif //  CILK_RECORD_REPLAY
291
292/**
293 * Record a record on a return to a stolen parent.  A noop if record/replay is
294 * diabled, or if we're not recording anything.
295 *
296 * @param w The __cilkrts_worker for we're executing on.  The pedigree of the
297 * frame that has discovered that its parent has been stolken will be walked
298 * to generate the ORPHANED record.
299 */
300#ifdef CILK_RECORD_REPLAY
301__CILKRTS_INLINE
302void replay_record_orphaned(__cilkrts_worker *w)
303{
304#if RECORD_ON_REPLAY
305    // If we're recording on replay, write the record if we're recording or
306    // replaying
307    if (RECORD_REPLAY_NONE == w->g->record_or_replay)
308        return;
309#else
310    // Only write the record if we're recording
311    if (RECORD_LOG != w->g->record_or_replay)
312        return;
313#endif
314
315    replay_record_orphaned_internal(w);
316}
317#else
318__CILKRTS_INLINE
319void replay_record_orphaned(__cilkrts_worker *w)
320{
321}
322#endif //  CILK_RECORD_REPLAY
323
324/**
325 * Test whether the frame at the head of the victim matches the pedigree of
326 * the frame that was recorded being stolen.  Called in random steal to verify
327 * that we're about to steal the correct frame.
328 *
329 * @param w The __cilkrts_worker for we're executing on.  The current worker
330 * is needed to find the replay entry to be checked.
331 *
332 * @param victim The __cilkrts_worker for we're proposing to steal a frame
333 * from.  The victim's head entry is
334 * is needed to find the replay entry to be checked.
335 *
336 * @return 0 if we're replaying a log and the victim's pedigree does NOT match
337 * the next frame the worker is expected to steal.
338 *
339 * @return 1 in all other cases to indicate that the steal attempt should
340 * continue
341 */
342#ifdef CILK_RECORD_REPLAY
343__CILKRTS_INLINE
344int replay_match_victim_pedigree(__cilkrts_worker *w, __cilkrts_worker *victim)
345{
346    // We're not replaying a log. The victim is always acceptable
347    if (REPLAY_LOG != w->g->record_or_replay)
348        return 1;
349
350    // Return 1 if the victim's pedigree matches the frame the worker stole
351    // when we recorded the log
352    return replay_match_victim_pedigree_internal(w, victim);
353}
354#else
355__CILKRTS_INLINE
356int replay_match_victim_pedigree(__cilkrts_worker *w, __cilkrts_worker *victim)
357{
358    // Record/replay is disabled.  The victim is always acceptable
359    return 1;
360}
361#endif //  CILK_RECORD_REPLAY
362
363/**
364 * Test whether the current replay entry is a sync record matching the
365 * worker's pedigree.
366 *
367 * @param w The __cilkrts_worker for we're executing on.
368 *
369 * @return 1 if the current replay entry matches the current pedigree.
370 * @return 0 if there's no match, or if we're not replaying a log.
371 */
372#ifdef CILK_RECORD_REPLAY
373__CILKRTS_INLINE
374int replay_match_sync_pedigree(__cilkrts_worker *w)
375{
376    // If we're not replaying, assume no match
377    if (REPLAY_LOG != w->g->record_or_replay)
378        return 0;
379
380    return replay_match_sync_pedigree_internal(w);
381}
382#else
383__CILKRTS_INLINE
384int replay_match_sync_pedigree(__cilkrts_worker *w)
385{
386    // Record/replay is disabled.  Assume no match
387    return 0;
388}
389#endif
390
391/**
392 * Marks a sync record seen, advancing to the next record in the replay list.
393 *
394 * This function will only advance to the next record if:
395 *   - Record/replay hasn't been compiled out AND
396 *   - We're replaying a log AND
397 *   - A match was found AND
398 *   - The sync is not being abandoned
399 *
400 * @param w The __cilkrts_worker for we're executing on.
401 * @param match_found The value returned by replay_match_sync_pedigree().  If
402 * match_found is false, nothing is done.
403 * @param continuing  Flag indicating whether this worker will continue from
404 * the sync (it's the last worker to the sync) or if it will abandon the work
405 * and go to the scheduling loop to look for more work it can steal.
406 */
407#ifdef CILK_RECORD_REPLAY
408__CILKRTS_INLINE
409void replay_advance_from_sync(__cilkrts_worker *w, int match_found, int continuing)
410{
411    // If we're replaying a log, and the current sync wasn't abandoned, and we
412    // found a match in the log, mark the sync record seen.
413    if ((REPLAY_LOG == w->g->record_or_replay) && match_found && continuing)
414        replay_advance_from_sync_internal(w);
415}
416#else
417__CILKRTS_INLINE
418void replay_advance_from_sync(__cilkrts_worker *w, int match_found, int continuing)
419{
420}
421#endif
422
423/**
424 * Release any resources used to read or write a replay log.
425 *
426 * @param g Cilk runtime global state
427 */
428void replay_term(global_state_t *g);
429
430__CILKRTS_END_EXTERN_C
431
432#endif // ! defined(INCLUDED_RECORD_REPLAY_DOT_H)
433