1/* record_replay.h -*-C++-*- 2 * 3 ************************************************************************* 4 * 5 * @copyright 6 * Copyright (C) 2012-2013, Intel Corporation 7 * All rights reserved. 8 * 9 * @copyright 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 14 * * Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * * Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in 18 * the documentation and/or other materials provided with the 19 * distribution. 20 * * Neither the name of Intel Corporation nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific prior written permission. 23 * 24 * @copyright 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 30 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 31 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 32 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 33 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 35 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 * 38 **************************************************************************/ 39 40/** 41 * @file record-replay.h 42 * 43 * @brief record-replay.h and .cpp encapsulate most of the functionality to 44 * record and play back a Cilk Plus application. 45 * 46 * Recording is directed by the setting of the CILK_RECORD_LOG environment 47 * variable. If it's defined, the value specifies the root we'll use to 48 * generate files for each worker using the following format string: 49 * "%s%d.cilklog", where the integer is the value of w->self. 50 * 51 * Replay is directed by the setting of the CILK_REPLAY_LOG environment 52 * variable, interpreted the same way as CILK_RECORD_LOG. If both 53 * CILK_RECORD_LOG and CILK_REPLAY_LOG are defined, a warning will be given 54 * and the attempt to record a log will be ignored. 55 * 56 * Recording is relatively straightforward. We write all information about a 57 * worker to a per-worker file. 58 * 59 * Each pedigree record consists of the following fields. All fields must be 60 * present in every record to make parsing easy. 61 * - Type - A string identifying the pedigree record. See the PED_TYPE_STR_ 62 * macros for the currently defined values. 63 * - Pedigree - A string of pedigree values, with underscores between 64 * adjacent values. 65 * - i1 - Record type-specific value. -1 if not used. 66 * - i2 - Record type-specific value. -1 if not used. 67 * 68 * WORKERS record - only written to the file for worker 0. Note that this is 69 * the first worker in the workers array. Worker 0 is the first system worker, 70 * *NOT* a user worker. 71 * - Type: "Workers" 72 * - Pedigree: Always "0" - ignored 73 * - i1: Number of workers (g->P) when we recorded the log. A mismatch when 74 * we attempt to replay the log will result in aborting the execution. 75 * - i2: Log version number - Specified by PED_VERSION in record-replay.cpp 76 * 77 * STEAL record - written after a successful steal. 78 * - Type: "Steal" 79 * - Pedigree: Pedigree of stolen frame 80 * - i1: Worker the frame was stolen from 81 * - i2: -1 82 * 83 * SYNC record - written after a worker continues from a sync. 84 * - Type: "Sync" 85 * - Pedigree: Pedigree of sync. Note that this is the pedigree *before* 86 * the pedigree in incremented in setup_for_execution_pedigree(). 87 * - i1: -1 88 * - i2: -1 89 * 90 * ORPHANED record - saved on a return to a stolen parent. 91 * - Type: "Orphaned" 92 * - Pedigree: Pedigree of the parent frame *before* the pedigree is 93 * incremented by the return 94 * - i1: -1 95 * - i2: -1 96 * 97 * On replay, the data is loaded into a per-worker array, and the data is 98 * consumed in order as needed. 99 */ 100 101#ifndef INCLUDED_RECORD_REPLAY_DOT_H 102#define INCLUDED_RECORD_REPLAY_DOT_H 103 104#include "cilk/common.h" 105#include "global_state.h" 106 107/** 108 * Define CILK_RECORD_REPLAY to enable record/replay functionality. If 109 * CILK_RECORD_REPLAY is not defined, all of the record/replay functions in 110 * record-replay.h will be stubbed out. Since they're declared as inline, 111 * functions, the resulting build should have no performance impact due to 112 * the implementation or record/replay. 113 */ 114 #define CILK_RECORD_REPLAY 1 115 116/** 117 * Define RECORD_ON_REPLAY=1 to write logs when we're replaying a log. This 118 * should only be needed when debugging the replay functionality. This should 119 * always be defined as 0 when record-replay.h is checked in. 120 */ 121#define RECORD_ON_REPLAY 0 122 123__CILKRTS_BEGIN_EXTERN_C 124 125#ifdef CILK_RECORD_REPLAY 126// Declarations of internal record/replay functions. The inlined versions 127// further down do some preliminary testing (like if we're not recording or 128// replaying) and will stub out the functionality if we've compiled out the 129// record/replay feature 130int replay_match_sync_pedigree_internal(__cilkrts_worker *w); 131void replay_wait_for_steal_if_parent_was_stolen_internal(__cilkrts_worker *w); 132void replay_record_steal_internal(__cilkrts_worker *w, int32_t victim_id); 133void replay_record_sync_internal(__cilkrts_worker *w); 134void replay_record_orphaned_internal(__cilkrts_worker *w); 135int replay_match_victim_pedigree_internal(__cilkrts_worker *w, __cilkrts_worker *victim); 136void replay_advance_from_sync_internal (__cilkrts_worker *w); 137int replay_get_next_recorded_victim_internal(__cilkrts_worker *w); 138#endif // CILK_RECORD_REPLAY 139 140// Publically defined record/replay API 141 142/** 143 * If we're replaying a log, wait for our parent to be stolen if it was when 144 * the log was recorded. If record/replay is compiled out, this is a noop. 145 * 146 * @param w The __cilkrts_worker we're executing on. The worker's replay 147 * list will be checked for a ORPHANED record with a matching pedigree. If 148 * there is a match, the ORPHANED record will be consumed. 149 */ 150#ifdef CILK_RECORD_REPLAY 151__CILKRTS_INLINE 152void replay_wait_for_steal_if_parent_was_stolen(__cilkrts_worker *w) 153{ 154 // Only check if we're replaying a log 155 if (REPLAY_LOG == w->g->record_or_replay) 156 replay_wait_for_steal_if_parent_was_stolen_internal(w); 157} 158#else 159__CILKRTS_INLINE 160void replay_wait_for_steal_if_parent_was_stolen(__cilkrts_worker *w) 161{ 162 // If record/replay is disabled, we never wait 163} 164#endif // CILK_RECORD_REPLAY 165 166/** 167 * Called from random_steal() to override the ID of the randomly chosen victim 168 * worker which this worker will attempt to steal from. Returns the worker id 169 * of the next victim this worker was recorded stealing from, or -1 if the 170 * next record in the log is not a STEAL. 171 * 172 * @note This call does NOT attempt to match the pedigree. That will be done 173 * by replay_match_victim_pedigree() after random_steal() has locked the victim 174 * worker. 175 * 176 * @param w The __cilkrts_worker we're executing on. The worker's replay log 177 * is checked for a STEAL record. If we've got one, the stolen worker ID is 178 * returned. 179 * @param id The randomly chosen victim worker ID. If we're not replaying a 180 * log, or if record/replay has been compiled out, this is the value that 181 * will be returned. 182 * 183 * @return id if we're not replaying a log 184 * @return -1 if the next record is not a STEAL 185 * @return recorded stolen worker ID if we've got a matching STEAL record 186 */ 187#ifdef CILK_RECORD_REPLAY 188__CILKRTS_INLINE 189int replay_get_next_recorded_victim(__cilkrts_worker *w, int id) 190{ 191 // Only check if we're replaying a log 192 if (REPLAY_LOG == w->g->record_or_replay) 193 return replay_get_next_recorded_victim_internal(w); 194 else 195 return id; 196} 197#else 198__CILKRTS_INLINE 199int replay_get_next_recorded_victim(__cilkrts_worker *w, int id) 200{ 201 // Record/replay is disabled. Always return the original worker id 202 return id; 203} 204#endif // CILK_RECORD_REPLAY 205 206/** 207 * Initialize per-worker data for record/replay. A noop if record/replay 208 * is disabled, or if we're not recording or replaying anything. 209 * 210 * If we're recording a log, this will ready us to create the per-worker 211 * logs. 212 * 213 * If we're replaying a log, this will read the logs into the per-worker 214 * structures. 215 * 216 * @param g Cilk runtime global state 217 */ 218void replay_init_workers(global_state_t *g); 219 220/** 221 * Record a record on a successful steal. A noop if record/replay is 222 * diabled, or if we're not recording anything 223 * 224 * @param w The __cilkrts_worker we're executing on. The pedigree of 225 * the stolen frame will be walked to generate the STEAL record. 226 * 227 * @param victim_id The worker ID of the worker w stole from. 228 */ 229#ifdef CILK_RECORD_REPLAY 230__CILKRTS_INLINE 231void replay_record_steal(__cilkrts_worker *w, int32_t victim_id) 232{ 233#if RECORD_ON_REPLAY 234 // If we're recording on replay, write the record if we're recording or 235 // replaying 236 if (RECORD_REPLAY_NONE == w->g->record_or_replay) 237 return; 238#else 239 // Only write the record if we're recording 240 if (RECORD_LOG != w->g->record_or_replay) 241 return; 242#endif 243 244 replay_record_steal_internal(w, victim_id); 245} 246#else 247__CILKRTS_INLINE 248void replay_record_steal(__cilkrts_worker *w, int32_t victim_id) 249{ 250} 251#endif // CILK_RECORD_REPLAY 252 253/** 254 * Record a record when continuing after a sync. A noop if record/replay is 255 * diabled, or if we're not recording anything, or if the sync was abandoned, 256 * meaning this isn't the worker that continues from the sync. 257 * 258 * @param w The __cilkrts_worker for we're executing on. The pedigree of 259 * the sync-ing frame will be walked to generate the SYNC record. 260 * 261 * @param continuing True if this worker will be continuing from the 262 * cilk_sync. A SYNC record will only be generated if continuing is true. 263 */ 264#ifdef CILK_RECORD_REPLAY 265__CILKRTS_INLINE 266void replay_record_sync(__cilkrts_worker *w, int continuing) 267{ 268 // If this was not the last worker to the syn, return 269 if (! continuing) 270 return; 271 272#if RECORD_ON_REPLAY 273 // If we're recording on replay, write the record if we're recording or 274 // replaying 275 if (RECORD_REPLAY_NONE == w->g->record_or_replay) 276 return; 277#else 278 // Only write the record if we're recording 279 if (RECORD_LOG != w->g->record_or_replay) 280 return; 281#endif 282 283 replay_record_sync_internal(w); 284} 285#else 286__CILKRTS_INLINE 287void replay_record_sync(__cilkrts_worker *w, int abandoned) 288{ 289} 290#endif // CILK_RECORD_REPLAY 291 292/** 293 * Record a record on a return to a stolen parent. A noop if record/replay is 294 * diabled, or if we're not recording anything. 295 * 296 * @param w The __cilkrts_worker for we're executing on. The pedigree of the 297 * frame that has discovered that its parent has been stolken will be walked 298 * to generate the ORPHANED record. 299 */ 300#ifdef CILK_RECORD_REPLAY 301__CILKRTS_INLINE 302void replay_record_orphaned(__cilkrts_worker *w) 303{ 304#if RECORD_ON_REPLAY 305 // If we're recording on replay, write the record if we're recording or 306 // replaying 307 if (RECORD_REPLAY_NONE == w->g->record_or_replay) 308 return; 309#else 310 // Only write the record if we're recording 311 if (RECORD_LOG != w->g->record_or_replay) 312 return; 313#endif 314 315 replay_record_orphaned_internal(w); 316} 317#else 318__CILKRTS_INLINE 319void replay_record_orphaned(__cilkrts_worker *w) 320{ 321} 322#endif // CILK_RECORD_REPLAY 323 324/** 325 * Test whether the frame at the head of the victim matches the pedigree of 326 * the frame that was recorded being stolen. Called in random steal to verify 327 * that we're about to steal the correct frame. 328 * 329 * @param w The __cilkrts_worker for we're executing on. The current worker 330 * is needed to find the replay entry to be checked. 331 * 332 * @param victim The __cilkrts_worker for we're proposing to steal a frame 333 * from. The victim's head entry is 334 * is needed to find the replay entry to be checked. 335 * 336 * @return 0 if we're replaying a log and the victim's pedigree does NOT match 337 * the next frame the worker is expected to steal. 338 * 339 * @return 1 in all other cases to indicate that the steal attempt should 340 * continue 341 */ 342#ifdef CILK_RECORD_REPLAY 343__CILKRTS_INLINE 344int replay_match_victim_pedigree(__cilkrts_worker *w, __cilkrts_worker *victim) 345{ 346 // We're not replaying a log. The victim is always acceptable 347 if (REPLAY_LOG != w->g->record_or_replay) 348 return 1; 349 350 // Return 1 if the victim's pedigree matches the frame the worker stole 351 // when we recorded the log 352 return replay_match_victim_pedigree_internal(w, victim); 353} 354#else 355__CILKRTS_INLINE 356int replay_match_victim_pedigree(__cilkrts_worker *w, __cilkrts_worker *victim) 357{ 358 // Record/replay is disabled. The victim is always acceptable 359 return 1; 360} 361#endif // CILK_RECORD_REPLAY 362 363/** 364 * Test whether the current replay entry is a sync record matching the 365 * worker's pedigree. 366 * 367 * @param w The __cilkrts_worker for we're executing on. 368 * 369 * @return 1 if the current replay entry matches the current pedigree. 370 * @return 0 if there's no match, or if we're not replaying a log. 371 */ 372#ifdef CILK_RECORD_REPLAY 373__CILKRTS_INLINE 374int replay_match_sync_pedigree(__cilkrts_worker *w) 375{ 376 // If we're not replaying, assume no match 377 if (REPLAY_LOG != w->g->record_or_replay) 378 return 0; 379 380 return replay_match_sync_pedigree_internal(w); 381} 382#else 383__CILKRTS_INLINE 384int replay_match_sync_pedigree(__cilkrts_worker *w) 385{ 386 // Record/replay is disabled. Assume no match 387 return 0; 388} 389#endif 390 391/** 392 * Marks a sync record seen, advancing to the next record in the replay list. 393 * 394 * This function will only advance to the next record if: 395 * - Record/replay hasn't been compiled out AND 396 * - We're replaying a log AND 397 * - A match was found AND 398 * - The sync is not being abandoned 399 * 400 * @param w The __cilkrts_worker for we're executing on. 401 * @param match_found The value returned by replay_match_sync_pedigree(). If 402 * match_found is false, nothing is done. 403 * @param continuing Flag indicating whether this worker will continue from 404 * the sync (it's the last worker to the sync) or if it will abandon the work 405 * and go to the scheduling loop to look for more work it can steal. 406 */ 407#ifdef CILK_RECORD_REPLAY 408__CILKRTS_INLINE 409void replay_advance_from_sync(__cilkrts_worker *w, int match_found, int continuing) 410{ 411 // If we're replaying a log, and the current sync wasn't abandoned, and we 412 // found a match in the log, mark the sync record seen. 413 if ((REPLAY_LOG == w->g->record_or_replay) && match_found && continuing) 414 replay_advance_from_sync_internal(w); 415} 416#else 417__CILKRTS_INLINE 418void replay_advance_from_sync(__cilkrts_worker *w, int match_found, int continuing) 419{ 420} 421#endif 422 423/** 424 * Release any resources used to read or write a replay log. 425 * 426 * @param g Cilk runtime global state 427 */ 428void replay_term(global_state_t *g); 429 430__CILKRTS_END_EXTERN_C 431 432#endif // ! defined(INCLUDED_RECORD_REPLAY_DOT_H) 433