1/*
2 * Copyright (c) 2015, ETH Zurich.
3 * Copyright (c) 2015, Hewlett Packard Enterprise Development LP.
4 * All rights reserved.
5 *
6 * This file is distributed under the terms in the attached LICENSE file.
7 * If you do not find this file, copies can be found by writing to:
8 * ETH Zurich D-INFK, Universitaetstrasse 6, CH-8092 Zurich. Attn: Systems Group.
9 */
10
11// THC runtime system
12//
13// Naming conventions:
14//
15//  THCStudlyCaps    - Functions for use from user code.
16//
17//  _thc_...         - Intrinsic functions, called from compiler-generated
18//                     code.  Their prototypes must match Intrinsics.td
19//
20//  thc_lower_case   - Internal functions used in this library.
21//
22//  thc_lower_case_0 - Arch-OS specific functions (implemented at the
23//                     bottom of this file).
24
25#define FB_KIND_FINISH        0
26#define FB_KIND_TOP_FINISH    1
27
28#include <stdlib.h>
29#include <stdio.h>
30#include <string.h>
31#include <assert.h>
32
33#ifdef BARRELFISH
34#include <barrelfish/barrelfish.h>
35#include <barrelfish/dispatch.h>
36#include <barrelfish/waitset.h>
37#include <thc/thc.h>
38#else
39#include "thc.h"
40#endif
41
42#ifdef linux
43#include <pthread.h>
44#endif
45
46#define NOT_REACHED assert(0 && "Not reached")
47
48/* It is necessary to set the esp of a lazy awe some way into it's lazy */
49/* allocated stack, so that it can pass arguments below its current esp */
50/* This value defines the size of the buffer (should be more than size  */
51/* of arguments passed to any function call).                            */
52#define LAZY_STACK_BUFFER 512
53
54#ifdef BARRELFISH
55#define DEBUGPRINTF debug_printf
56#else
57#define DEBUGPRINTF printf
58#endif
59
60//#define DEBUG_STATS(XX)
61#define DEBUG_STATS(XX) do{ XX; } while (0)
62#define DEBUG_STATS_PREFIX        "         stats:    "
63
64//#define VERBOSE_DEBUG
65
66#ifdef VERBOSE_DEBUG
67#define DEBUG_YIELD(XX) do{ XX; } while (0)
68#define DEBUG_STACK(XX) do{ XX; } while (0)
69#define DEBUG_AWE(XX) do{ XX; } while (0)
70#define DEBUG_FINISH(XX) do{ XX; } while (0)
71#define DEBUG_CANCEL(XX) do{ XX; } while (0)
72#define DEBUG_INIT(XX) do{ XX; } while (0)
73#define DEBUG_DISPATCH(XX) do{ XX; } while (0)
74#else
75#define DEBUG_YIELD(XX)
76#define DEBUG_STACK(XX)
77#define DEBUG_AWE(XX)
78#define DEBUG_FINISH(XX)
79#define DEBUG_CANCEL(XX)
80#define DEBUG_INIT(XX)
81#define DEBUG_DISPATCH(XX)
82#endif
83
84#define DEBUG_YIELD_PREFIX        "         yield:    "
85#define DEBUG_STACK_PREFIX        "         stack:    "
86#define DEBUG_AWE_PREFIX          "         awe:      "
87#define DEBUG_FINISH_PREFIX       "         finish:   "
88#define DEBUG_CANCEL_PREFIX       "         cancel:   "
89#define DEBUG_INIT_PREFIX         "         init:     "
90#define DEBUG_DISPATCH_PREFIX     "         dispatch: "
91
92/***********************************************************************/
93
94// Prototypes
95//
96// NB: those marked as "extern" are actually defined in this same file,
97// but the entire function (including label, prolog, epilogue, etc) is
98// in inline-asm, and so the definition is not visible to the compiler.
99
100static void thc_awe_init(awe_t *awe, void *eip, void *ebp, void *esp);
101static void thc_dispatch(PTState_t *pts);
102
103extern void thc_awe_execute_0(awe_t *awe);
104static void *thc_alloc_new_stack_0(void);
105
106static PTState_t *thc_get_pts_0(void);
107static void thc_set_pts_0(PTState_t *pts);
108
109static inline void thc_schedule_local(awe_t *awe);
110
111/***********************************************************************/
112
113// Per-thread state
114
115static PTState_t *PTS(void) {
116  PTState_t *pts = thc_get_pts_0();
117#ifndef NDEBUG
118  if (pts!=NULL) {
119    pts->getTls++;
120  }
121#endif
122  return pts;
123}
124
125static void InitPTS(void) {
126  PTState_t *pts = malloc(sizeof(PTState_t));
127  memset(pts, 0, sizeof(PTState_t));
128  thc_latch_init(&pts->latch);
129  assert((PTS() == NULL) && "PTS already initialized");
130  thc_set_pts_0(pts);
131}
132
133static void thc_pts_lock(PTState_t *t) {
134#ifndef NDEBUG
135  t->lock++;
136#endif
137  thc_latch_acquire(&t->latch);
138}
139
140static void thc_pts_unlock(PTState_t *t) {
141  thc_latch_release(&t->latch);
142}
143
144
145#ifdef NDEBUG
146static void thc_print_pts_stats(PTState_t *t, int clear) { }
147#else
148static struct thc_latch debug_latch = {0};
149static void thc_print_pts_stats(PTState_t *t, int clear) {
150  thc_latch_acquire(&debug_latch);
151
152  DEBUG_STATS(DEBUGPRINTF(DEBUG_STATS_PREFIX "----------------------------------------\n"));
153  DEBUG_STATS(DEBUGPRINTF(DEBUG_STATS_PREFIX "  %c stacks         %8d %8d\n",
154                          (t->stacksAllocated == t->stacksDeallocated) ? ' ' : '*',
155                          t->stacksAllocated, t->stacksDeallocated));
156  DEBUG_STATS(DEBUGPRINTF(DEBUG_STATS_PREFIX "  %c stack memories %8d %8d\n",
157                          (t->stackMemoriesAllocated == t->stackMemoriesDeallocated) ? ' ' : '*',
158                          t->stackMemoriesAllocated, t->stackMemoriesDeallocated));
159  DEBUG_STATS(DEBUGPRINTF(DEBUG_STATS_PREFIX "  %c finish blocks  %8d %8d\n",
160                          (t->finishBlocksStarted == t->finishBlocksEnded) ? ' ' : '*',
161                          t->finishBlocksStarted, t->finishBlocksEnded));
162  DEBUG_STATS(DEBUGPRINTF(DEBUG_STATS_PREFIX "  %c async calls    %8d %8d\n",
163                          (t->asyncCallsStarted == t->asyncCallsEnded) ? ' ' : '*',
164                          t->asyncCallsStarted, t->asyncCallsEnded));
165  DEBUG_STATS(DEBUGPRINTF(DEBUG_STATS_PREFIX "  %c awe            %8d %8d\n",
166                          (t->aweCreated == t->aweResumed) ? ' ' : '*',
167                          t->aweCreated, t->aweResumed));
168  DEBUG_STATS(DEBUGPRINTF(DEBUG_STATS_PREFIX "  %c idle           %8d %8d\n",
169                          (t->idleStarted == t->idleComplete) ? ' ' : '*',
170                          t->idleStarted, t->idleComplete));
171  DEBUG_STATS(DEBUGPRINTF(DEBUG_STATS_PREFIX "    get-tls        %8d\n",
172                          t->getTls));
173  DEBUG_STATS(DEBUGPRINTF(DEBUG_STATS_PREFIX "    lock           %8d\n",
174                          t->lock));
175  DEBUG_STATS(DEBUGPRINTF(DEBUG_STATS_PREFIX "    cancels        %8d\n",
176                          t->cancelsRequested));
177  DEBUG_STATS(DEBUGPRINTF(DEBUG_STATS_PREFIX "  %c actions        %8d %8d %8d\n",
178                          (t->cancelsAdded == (t->cancelsRun + t->cancelsRemoved)) ? ' ' : '*',
179                          t->cancelsAdded, t->cancelsRun, t->cancelsRemoved));
180  DEBUG_STATS(DEBUGPRINTF(DEBUG_STATS_PREFIX "    message send   %8d\n",
181                          t->sendCount));
182  DEBUG_STATS(DEBUGPRINTF(DEBUG_STATS_PREFIX "    message recv   %8d\n",
183                          t->recvCount));
184  DEBUG_STATS(DEBUGPRINTF(DEBUG_STATS_PREFIX "----------------------------------------\n"));
185
186  if (clear) {
187    t->stacksAllocated -= t->stacksDeallocated;
188    t->stacksDeallocated = 0;
189    t->stackMemoriesAllocated -= t->stackMemoriesDeallocated;
190    t->stackMemoriesDeallocated = 0;
191    t->finishBlocksStarted -= t->finishBlocksEnded;
192    t->finishBlocksEnded = 0;
193    t->asyncCallsStarted -= t->asyncCallsEnded;
194    t->asyncCallsEnded = 0;
195    t->aweCreated -= t->aweResumed;
196    t->aweResumed = 0;
197    t->idleStarted -= t->idleComplete;
198    t->idleComplete = 0;
199    t->getTls = 0;
200    t->lock = 0;
201    t->cancelsRequested = 0;
202    t->cancelsAdded = 0;
203    t->cancelsRun = 0;
204    t->cancelsRemoved = 0;
205  }
206
207  thc_latch_release(&debug_latch);
208}
209
210#endif
211
212/***********************************************************************/
213
214// Stack management
215
216// An value of type stack_t represents a stack which has been allocated
217// but which is not currently in use.  It is placed at the top of the
218// memory reserved for the stack.
219
220#define STACK_COMMIT_BYTES (16*4096)
221#define STACK_GUARD_BYTES  (1*4096)
222
223// Allocate a new stack, returning an address just above the top of
224// the committed region.  The stack comprises STACK_COMMIT_BYTES
225// followed by an inaccessible STACK_GUARD_BYTES.
226//
227// There is currently no support for extending a stack, or allowing it
228// to be discontiguous
229
230void *_thc_allocstack(void) {
231  PTState_t *pts = PTS();
232  void *result = NULL;
233  DEBUG_STACK(DEBUGPRINTF(DEBUG_STACK_PREFIX "> AllocStack\n"));
234  if (pts->free_stacks != NULL) {
235    // Re-use previously freed stack
236    DEBUG_STACK(DEBUGPRINTF(DEBUG_STACK_PREFIX "  Re-using free stack\n"));
237    stack_t *r = pts->free_stacks;
238    pts->free_stacks = pts->free_stacks->next;
239    result = ((void*)r) + sizeof(stack_t);
240  } else {
241    result = (void*)thc_alloc_new_stack_0();
242#ifndef NDEBUG
243    pts->stackMemoriesAllocated ++;
244#endif
245  }
246  DEBUG_STACK(DEBUGPRINTF(DEBUG_STACK_PREFIX "< AllocStack = %p\n", result));
247#ifndef NDEBUG
248  pts->stacksAllocated ++;
249#endif
250  return result;
251}
252
253// De-allocate a stack back to THC's pool of free stacks
254
255void _thc_freestack(void *s) {
256  PTState_t *pts = PTS();
257  stack_t *stack = (stack_t*)(s - sizeof(stack_t));
258  DEBUG_STACK(DEBUGPRINTF(DEBUG_STACK_PREFIX "> FreeStack(%p)\n", stack));
259  stack->next = pts->free_stacks;
260  pts->free_stacks = stack;
261  DEBUG_STACK(DEBUGPRINTF(DEBUG_STACK_PREFIX "< FreeStack\n"));
262#ifndef NDEBUG
263  pts->stacksDeallocated ++;
264#endif
265}
266
267static void thc_pendingfree(PTState_t * pts) {
268  if (pts->pendingFree) {
269    DEBUG_DISPATCH(DEBUGPRINTF(DEBUG_DISPATCH_PREFIX
270                               "  pending free of stack %p\n",
271                               PTS()->pendingFree));
272    _thc_freestack(pts->pendingFree);
273    pts->pendingFree = NULL;
274  }
275}
276
277void _thc_pendingfree(void) {
278  thc_pendingfree(PTS());
279}
280
281#ifdef CONFIG_LAZY_THC
282
283// This checks whether the awe's lazy stack is finished with (according to
284// the provided esp, and puts it on pending free list if so.
285
286static void check_lazy_stack_finished (PTState_t *pts, void *esp) {
287  assert(pts->curr_lazy_stack);
288  DEBUG_STACK(DEBUGPRINTF(DEBUG_STACK_PREFIX
289			  "> CheckLazyStackFinished(s=%p,esp+buf=%p)\n",
290			  pts->curr_lazy_stack, esp + LAZY_STACK_BUFFER));
291  if ((esp + LAZY_STACK_BUFFER) == pts->curr_lazy_stack) {
292    // nothing on lazy stack, we can safely free it
293    DEBUG_STACK(DEBUGPRINTF(DEBUG_STACK_PREFIX "  freeing lazy stack %p\n",
294			    pts->curr_lazy_stack));
295    assert(pts->pendingFree == NULL);
296    pts->pendingFree = pts->curr_lazy_stack;
297  }
298  DEBUG_STACK(DEBUGPRINTF(DEBUG_STACK_PREFIX "< CheckLazyStackFinished()\n"));
299}
300
301// Allocate a lazy stack for this awe's continuation to execute on.
302
303static void alloc_lazy_stack (awe_t *awe) {
304  DEBUG_STACK(DEBUGPRINTF(DEBUG_STACK_PREFIX "> AllocLazyStack(awe=%p)\n",
305			  awe));
306  assert(awe->status == LAZY_AWE && !awe->lazy_stack);
307  awe->lazy_stack = _thc_allocstack();
308  void * new_esp =  awe->lazy_stack - LAZY_STACK_BUFFER;
309  *((void **) new_esp) = awe->esp;
310  awe->esp = new_esp;
311  assert(((uintptr_t)awe->esp & 15) == 0); // check the stack's alignment
312  awe->status = ALLOCATED_LAZY_STACK;
313  DEBUG_STACK(DEBUGPRINTF(DEBUG_STACK_PREFIX "< AllocLazyStack(awe=%p,s=%p)\n",
314			  awe, awe->lazy_stack));
315}
316
317#else // EAGER_THC
318static inline void alloc_lazy_stack (awe_t *awe) {
319    /* Shouldn't be called in eager version */
320    NOT_REACHED;
321}
322static inline void check_lazy_stack_finished (PTState_t *pts, awe_t *awe) {
323    /* Nothing to do here in eager version */
324}
325#endif // CONFIG_LAZY_THC
326
327// Execute "fn(args)" on the stack growing down from "stacktop".  This is
328// just a wrapper around the arch-os specific function.
329
330/***********************************************************************/
331
332static void thc_run_idle_fn(void) {
333  PTState_t *pts = PTS();
334  void *s = pts->idle_stack;
335#ifndef NDEBUG
336  pts->idleStarted++;
337#endif
338  DEBUG_DISPATCH(DEBUGPRINTF(DEBUG_DISPATCH_PREFIX "  calling idle fn\n"));
339  pts->idle_fn(pts->idle_args);
340  DEBUG_DISPATCH(DEBUGPRINTF(DEBUG_DISPATCH_PREFIX "  returned from idle fn\n"));
341#ifndef NDEBUG
342  pts->idleComplete++;
343#endif
344  pts->pendingFree = s;
345  thc_dispatch(pts);
346  NOT_REACHED;
347}
348
349__attribute__ ((unused))
350static void re_init_dispatch_awe(void *a, void *arg) {
351  PTState_t *pts = PTS();
352  awe_t *awe = (awe_t *)a;
353  pts->dispatch_awe = *awe;
354  assert(awe->status == EAGER_AWE && !pts->curr_lazy_stack);
355#ifndef NDEBUG
356  // Do not count dispatch AWE in the debugging stats (it is created
357  // once and then resumed once per dispatch-loop entry, so it obscures
358  // mis-match between normal 1-shot AWEs)
359  pts->aweCreated--;
360#endif
361  thc_dispatch(pts);
362}
363
364// Dispatch loop
365//
366// Currently, this maintains a doubly-linked list of runnable AWEs.
367// New AWEs are added to the tail of the list.  Execution proceeds from
368// the head.
369//
370// dispatch_awe is initialized to refer to the entry point for the
371// "dispatch loop" function.
372
373static void thc_dispatch_loop(void) {
374  PTState_t *pts = PTS();
375
376  // Re-initialize pts->dispatch_awe to this point, just after we have
377  // read PTS.  This will save the per-thread-state access on future
378  // executions of the function.
379  CALL_CONT((unsigned char*)&re_init_dispatch_awe, NULL);
380
381  DEBUG_DISPATCH(DEBUGPRINTF(DEBUG_DISPATCH_PREFIX "> dispatch_loop\n"));
382
383  thc_pendingfree(pts);
384
385  // Pick up work passed to us from other threads
386  if (pts->aweRemoteHead.next != &pts->aweRemoteTail) {
387    awe_t *tmp = pts->aweHead.next;
388    thc_pts_lock(pts);
389    // Move remote list into our local list
390    pts->aweHead.next = pts->aweRemoteHead.next;
391    pts->aweRemoteHead.next->prev = &pts->aweHead;
392    pts->aweRemoteTail.prev->next = tmp;
393    tmp->prev = pts->aweRemoteTail.prev;
394    // Clear remote list
395    pts->aweRemoteHead.next = &pts->aweRemoteTail;
396    pts->aweRemoteTail.prev = &pts->aweRemoteHead;
397    thc_pts_unlock(pts);
398  }
399
400  if (pts->aweHead.next == &pts->aweTail) {
401    DEBUG_DISPATCH(DEBUGPRINTF(DEBUG_DISPATCH_PREFIX "  queue empty\n"));
402    assert(pts->idle_fn != NULL && "Dispatch loop idle, and no idle_fn work");
403    void *idle_stack = _thc_allocstack();
404    awe_t idle_awe;
405    // Set start of stack-frame marker
406    *((void**)(idle_stack - LAZY_STACK_BUFFER + __WORD_SIZE)) = NULL;
407    thc_awe_init(&idle_awe, &thc_run_idle_fn, idle_stack - LAZY_STACK_BUFFER,
408                 idle_stack - LAZY_STACK_BUFFER);
409#ifndef NDEBUG
410    pts->aweCreated++;
411#endif
412    pts->idle_stack = idle_stack;
413    pts->current_fb = NULL;
414#ifndef NDEBUG
415    pts->aweResumed++;
416#endif
417    pts->curr_lazy_stack = NULL;
418    DEBUG_DISPATCH(DEBUGPRINTF(DEBUG_DISPATCH_PREFIX "  executing idle function\n"));
419    thc_awe_execute_0(&idle_awe);
420    NOT_REACHED;
421  }
422
423  awe_t *awe = pts->aweHead.next;
424
425  DEBUG_DISPATCH(DEBUGPRINTF(DEBUG_DISPATCH_PREFIX "  got AWE %p "
426			     "(ip=%p, sp=%p, fp=%p)\n",
427			     awe, awe->eip, awe->esp, awe->ebp));
428  pts->aweHead.next = awe->next;
429  pts->current_fb = awe->current_fb;
430  pts->curr_lazy_stack = awe->lazy_stack;
431  awe->next->prev = &(pts->aweHead);
432#ifndef NDEBUG
433  pts->aweResumed ++;
434#endif
435  thc_awe_execute_0(awe);
436}
437
438static void thc_init_dispatch_loop(void) {
439  PTState_t *pts = PTS();
440  pts->dispatchStack = _thc_allocstack();
441  // Set start of stack-frame marker
442  *((void**)(pts->dispatchStack - LAZY_STACK_BUFFER + __WORD_SIZE)) = NULL;
443  thc_awe_init(&pts->dispatch_awe, &thc_dispatch_loop,
444               pts->dispatchStack - LAZY_STACK_BUFFER,
445               pts->dispatchStack - LAZY_STACK_BUFFER);
446  pts->aweHead.next = &(pts->aweTail);
447  pts->aweTail.prev = &(pts->aweHead);
448  pts->aweRemoteHead.next = &(pts->aweRemoteTail);
449  pts->aweRemoteTail.prev = &(pts->aweRemoteHead);
450
451  DEBUG_INIT(DEBUGPRINTF(DEBUG_INIT_PREFIX
452                         "  initialized dispatch awe %p\n",
453                         &pts->dispatch_awe));
454  DEBUG_INIT(DEBUGPRINTF(DEBUG_INIT_PREFIX
455                         "  (%p, %p, %p)\n",
456                         pts->dispatch_awe.eip,
457                         pts->dispatch_awe.ebp,
458                         pts->dispatch_awe.esp));
459}
460
461static void thc_exit_dispatch_loop(void) {
462  PTState_t *pts = PTS();
463  assert(!pts->shouldExit);
464  pts->shouldExit = 1;
465  // Wait for idle loop to finish
466  while (pts->aweHead.next != &(pts->aweTail)) {
467    THCYield();
468  }
469  // Exit
470  thc_pts_lock(pts);
471  assert((pts->aweHead.next == &(pts->aweTail)) &&
472         "Dispatch queue not empty at exit");
473  DEBUG_INIT(DEBUGPRINTF(DEBUG_INIT_PREFIX
474                         "  NULLing out dispatch AWE\n"));
475  thc_awe_init(&pts->dispatch_awe, NULL, NULL, NULL);
476  _thc_freestack(pts->dispatchStack);
477}
478
479// Enter the dispatch function via dispatch_awe.
480//
481// (Hence the dispatch loop will run on its own stack, rather than
482// the caller's)
483
484static void thc_dispatch(PTState_t *pts) {
485  assert(pts && pts->doneInit && "Not initialized RTS");
486  thc_awe_execute_0(&pts->dispatch_awe);
487}
488
489static void thc_start_rts(void) {
490  InitPTS();
491  assert(PTS() && (!PTS()->doneInit) && "Already initialized RTS");
492  DEBUG_INIT(DEBUGPRINTF(DEBUG_INIT_PREFIX "> Starting\n"));
493  thc_init_dispatch_loop();
494  PTS()->doneInit = 1;
495  DEBUG_INIT(DEBUGPRINTF(DEBUG_INIT_PREFIX "< Starting\n"));
496}
497
498static void thc_end_rts(void) {
499  PTState_t *pts = PTS();
500  assert(pts->doneInit && "Not initialized RTS");
501  DEBUG_INIT(DEBUGPRINTF(DEBUG_INIT_PREFIX "> Ending\n"));
502  thc_exit_dispatch_loop();
503
504  // Count up the stacks that we have left.  This is merely for
505  // book-keeping: once the dispatch loop is done, then the
506  // number of stacks on our free list should equal the number
507  // allocated from the OS.
508  while (pts->free_stacks != NULL) {
509    pts->free_stacks = pts->free_stacks->next;
510#ifndef NDEBUG
511    pts->stackMemoriesDeallocated ++;
512#endif
513  }
514
515  // Done
516  thc_print_pts_stats(PTS(), 0);
517  PTS()->doneInit = 0;
518  DEBUG_INIT(DEBUGPRINTF(DEBUG_INIT_PREFIX "< Ending\n"));
519}
520
521/***********************************************************************/
522
523// AWE management
524
525static void thc_awe_init(awe_t *awe, void *eip, void *ebp, void *esp) {
526  DEBUG_AWE(DEBUGPRINTF(DEBUG_AWE_PREFIX "> AWEInit(%p, %p, %p, %p)\n",
527                        awe, eip, ebp, esp));
528  PTState_t *pts = PTS();
529  awe->eip = eip;
530  awe->ebp = ebp;
531  awe->esp = esp;
532  assert(((uintptr_t)awe->esp & 15) == 0);
533  awe->pts = pts;
534  awe->status = EAGER_AWE;
535  awe->lazy_stack = NULL;
536  awe->current_fb = NULL;
537  awe->next = NULL;
538  awe->prev = NULL;
539  DEBUG_AWE(DEBUGPRINTF(DEBUG_AWE_PREFIX "< AWEInit\n"));
540}
541
542// This function is not meant to be used externally, but its only use
543// here is from within the inline assembly language functions.  The
544// C "used" attribute is not currently maintained through Clang & LLVM
545// with the C backend, so we cannot rely on that.
546extern void _thc_schedulecont_c(awe_t *awe);
547void _thc_schedulecont_c(awe_t *awe) {
548  PTState_t *pts = PTS();
549  awe->pts = pts;
550#ifndef NDEBUG
551  pts->aweCreated ++;
552#endif
553  thc_schedule_local(awe);
554}
555
556// This function is not meant to be used externally, but its only use
557// here is from within the inline assembly language functions.  The
558// C "used" attribute is not currently maintained through Clang & LLVM
559// with the C backend, so we cannot rely on that.
560extern void _thc_callcont_c(awe_t *awe, THCContFn_t fn, void *args);
561void _thc_callcont_c(awe_t *awe,
562                     THCContFn_t fn,
563                     void *args) {
564  PTState_t *pts = PTS();
565  awe->pts = pts;
566  awe->current_fb = pts->current_fb;
567#ifndef NDEBUG
568  pts->aweCreated ++;
569#endif
570  fn(awe, args);
571}
572
573#ifdef CONFIG_LAZY_THC
574
575static void init_lazy_awe (void ** lazy_awe_fp) {
576
577  // Get the saved awe
578  awe_t *awe = THC_LAZY_FRAME_AWE(lazy_awe_fp);
579
580  DEBUG_AWE(DEBUGPRINTF(DEBUG_AWE_PREFIX " found lazy awe %p @ frameptr %p",
581			awe, lazy_awe_fp));
582
583  // Scrub nested return, lazy awe will now return through dispatch loop
584  THC_LAZY_FRAME_RET(lazy_awe_fp) = NULL;
585
586  assert(awe->status == LAZY_AWE);
587  // Allocate a new stack for this awe
588  alloc_lazy_stack(awe);
589  // lazily start async block
590  _thc_startasync(awe->current_fb, awe->lazy_stack);
591  // schedule lazy awe
592  _thc_schedulecont_c(awe);
593}
594
595// Check for all lazy awe on the stack - initalizing and scheduling any if
596// they are found.
597
598static void check_for_lazy_awe (void * ebp) {
599  DEBUG_AWE(DEBUGPRINTF(DEBUG_AWE_PREFIX "> CheckForLazyAWE (ebp=%p)\n", ebp));
600  void **frame_ptr  = (void **) ebp;
601  void *ret_addr    = THC_LAZY_FRAME_RET(frame_ptr);
602  while (frame_ptr != NULL && ret_addr != NULL) {
603    if (ret_addr == &_thc_lazy_awe_marker) {
604      init_lazy_awe(frame_ptr);
605    }
606    frame_ptr = (void **) THC_LAZY_FRAME_PREV(frame_ptr);
607    ret_addr   = THC_LAZY_FRAME_RET(frame_ptr);
608  }
609
610  DEBUG_AWE(DEBUGPRINTF(DEBUG_AWE_PREFIX "< CheckForLazyAWE\n"));
611}
612
613#else  // CONFIG_EAGER_THC
614static inline void check_for_lazy_awe (void * ebp) { }
615#endif // CONFIG_LAZY_THC
616
617/***********************************************************************/
618
619// Implementation of finish blocks
620//
621// The implementation of finish blocks is straightforward:
622// _thc_endfinishblock yields back to the dispatch loop if it finds
623// the count non-zero, and stashes away a continuation in
624// fb->finish_awe which will be resumed when the final async
625// call finsihes.  _thc_endasync picks this up.
626
627void _thc_startfinishblock(finish_t *fb, int fb_kind) {
628  PTState_t *pts = PTS();
629  finish_t *current_fb = pts->current_fb;
630  DEBUG_FINISH(DEBUGPRINTF(DEBUG_FINISH_PREFIX "> StartFinishBlock (%p,%s)\n",
631                           fb,
632                           (fb_kind == 0) ? "FINISH" : "TOP-FINISH"));
633  assert(PTS() && (PTS()->doneInit) && "Not initialized RTS");
634  fb -> count = 0;
635  fb -> finish_awe = NULL;
636  fb->cancel_item = NULL;
637  fb->cancel_requested = 0;
638  fb->start_node.fb = fb;
639  fb->end_node.fb = fb;
640  fb->enclosing_lazy_stack = PTS()->curr_lazy_stack;
641  fb->enclosing_fb = current_fb;
642  DEBUG_FINISH(DEBUGPRINTF(DEBUG_FINISH_PREFIX "  FB %p nested within %p\n",
643                           fb, current_fb));
644  pts->current_fb = fb;
645
646  // Initialize cancel status
647  fb->fb_kind = fb_kind;
648  if (fb_kind != FB_KIND_TOP_FINISH &&
649      current_fb != NULL &&
650      current_fb->cancel_requested) {
651    DEBUG_FINISH(DEBUGPRINTF(DEBUG_FINISH_PREFIX "  Propagating cancel flag on init\n"));
652    fb->cancel_requested = 1;
653  }
654
655  // Link into finish list
656  //
657  // Before:
658  //  [current_fb.end->prev] <-> [current_fb.end]
659  //
660  // After:
661  //  [current_fb.end->prev] <-> [fb->start] <-> [fb->end] <-> [current_fb.end]
662
663  DEBUG_FINISH(DEBUGPRINTF(DEBUG_FINISH_PREFIX "  Connecting own [%p]<->[%p]\n",
664                           &(fb->start_node), &(fb->end_node)));
665
666  fb->start_node.next = &(fb->end_node);
667  fb->end_node.prev = &(fb->start_node);
668  if (current_fb != NULL) {
669    DEBUG_FINISH(DEBUGPRINTF(DEBUG_FINISH_PREFIX "  Splicing between [%p]<->[%p]\n",
670                             (current_fb->end_node.prev), &(current_fb->end_node)));
671    assert(current_fb->end_node.prev->next = &(current_fb->end_node));
672    assert(current_fb->start_node.next->prev = &(current_fb->start_node));
673    current_fb->end_node.prev->next = &(fb->start_node);
674    fb->start_node.prev = current_fb->end_node.prev;
675    fb->end_node.next = &(current_fb->end_node);
676    current_fb->end_node.prev = &(fb->end_node);
677  } else {
678    fb->start_node.prev = NULL;
679    fb->end_node.next = NULL;
680  }
681
682  DEBUG_FINISH(DEBUGPRINTF(DEBUG_FINISH_PREFIX "< StartFinishBlock\n"));
683#ifndef NDEBUG
684  PTS()->finishBlocksStarted ++;
685#endif
686}
687
688__attribute__ ((unused))
689static void _thc_endfinishblock0(void *a, void *f) {
690  finish_t *fb = (finish_t*)f;
691  awe_t *awe = (awe_t*)a;
692
693  awe->lazy_stack = awe->pts->curr_lazy_stack;
694
695  DEBUG_FINISH(DEBUGPRINTF(DEBUG_FINISH_PREFIX "  Waiting f=%p awe=%p\n",
696                           fb, a));
697  assert(fb->finish_awe == NULL);
698  fb->finish_awe = a;
699  thc_dispatch(awe->pts);
700  NOT_REACHED;
701}
702
703static void thc_run_cancel_actions(PTState_t *pts, finish_t *fb) {
704  cancel_item_t *ci = fb->cancel_item;
705  fb->cancel_item = NULL;
706  if (ci == NULL) {
707    DEBUG_FINISH(DEBUGPRINTF(DEBUG_FINISH_PREFIX "  No cancel actions on %p\n", fb));
708  } else {
709    while (ci != NULL) {
710      cancel_item_t *nci = ci->next;
711      DEBUG_CANCEL(DEBUGPRINTF(DEBUG_CANCEL_PREFIX "  Running cancellation action %p\n",
712                               ci));
713#ifndef NDEBUG
714      pts->cancelsRun ++;
715#endif
716      assert(ci->was_run == 0);
717      ci->was_run = 1;
718      (*ci->fn)(ci->arg);
719      ci = nci;
720    }
721  }
722}
723
724void _thc_do_cancel_request(finish_t *fb) {
725  PTState_t *pts = PTS();
726
727  // Set own cancellation request flag
728  DEBUG_FINISH(DEBUGPRINTF(DEBUG_FINISH_PREFIX "  Setting cancel request flag\n"));
729  fb->cancel_requested = 1;
730#ifndef NDEBUG
731  pts->cancelsRequested++;
732#endif
733
734  // Handle nested cancel blocks
735  finish_list_t *fl = fb->start_node.next;
736  while (fl->fb != fb) {
737    DEBUG_FINISH(DEBUGPRINTF(DEBUG_FINISH_PREFIX "  Looking at nested FB %p kind %d\n",
738                             fl->fb, (int)fl->fb->fb_kind));
739    assert(fl->prev == NULL || fl->prev->next == fl);
740    assert(fl->next == NULL || fl->next->prev == fl);
741    if (fl->fb->fb_kind == FB_KIND_TOP_FINISH) {
742      // We have found a non-nested cancel block.  This occurs when there is an
743      // intervening non-cancelable function between (i) the block we are currently
744      // cancelling, and (ii) the block that we have just encountered.  Skip
745      // past the non-nested block.
746      DEBUG_FINISH(DEBUGPRINTF(DEBUG_FINISH_PREFIX "  Skipping FB_KIND_TOP_FINISH to %p\n",
747                               fl->fb->end_node.next));
748      fl = fl->fb->end_node.next;
749    } else {
750      fl->fb->cancel_requested = 1;
751      if (fl == &(fl->fb->end_node)) {
752        thc_run_cancel_actions(pts, fl->fb);
753      }
754      fl = fl->next;
755    }
756  }
757
758  // Run our own cancellation actions
759  thc_run_cancel_actions(pts, fb);
760}
761
762void _thc_endfinishblock(finish_t *fb, void *stack) {
763  PTState_t *pts = PTS();
764  DEBUG_FINISH(DEBUGPRINTF(DEBUG_FINISH_PREFIX "> EndFinishBlock(%p)\n",
765                           fb));
766  assert((pts->doneInit) && "Not initialized RTS");
767  DEBUG_FINISH(DEBUGPRINTF(DEBUG_FINISH_PREFIX "  count=%d\n",
768                           (int)fb->count));
769
770  if (fb->count == 0) {
771    // Zero first time.  Check there's not an AWE waiting.
772    assert(fb->finish_awe == NULL);
773  } else {
774    // Non-zero first time, add ourselves as the waiting AWE.
775    CALL_CONT_LAZY((unsigned char*)&_thc_endfinishblock0, fb);
776  }
777
778  assert(fb->count == 0);
779  assert(fb->cancel_item == NULL);
780  assert(fb->start_node.next == &(fb->end_node));
781  assert(fb->end_node.prev == &(fb->start_node));
782  if (fb->start_node.prev == NULL) {
783    // No enclosing finish block
784    DEBUG_FINISH(DEBUGPRINTF(DEBUG_FINISH_PREFIX "  No enclosing FB\n"));
785    assert(fb->end_node.next == NULL);
786  } else {
787    // Remove from enclosing finish block's list
788    DEBUG_FINISH(DEBUGPRINTF(DEBUG_FINISH_PREFIX "  Removing from between [%p]<->[%p]\n",
789                             fb->start_node.prev, fb->end_node.next));
790    fb->start_node.prev->next = fb->end_node.next;
791    fb->end_node.next->prev = fb->start_node.prev;
792  }
793
794  if (pts->curr_lazy_stack &&
795      (fb->enclosing_fb == NULL || stack != fb->enclosing_fb->old_sp) &&
796      pts->curr_lazy_stack != fb->enclosing_lazy_stack) {
797      check_lazy_stack_finished(pts, stack);
798  }
799
800  PTS()->curr_lazy_stack = fb->enclosing_lazy_stack;
801  pts->current_fb = fb->enclosing_fb;
802
803  DEBUG_FINISH(DEBUGPRINTF(DEBUG_FINISH_PREFIX "< EndFinishBlock\n"));
804#ifndef NDEBUG
805  pts->finishBlocksEnded ++;
806#endif
807}
808
809void _thc_startasync(void *f, void *stack) {
810  finish_t *fb = (finish_t*)f;
811  DEBUG_FINISH(DEBUGPRINTF(DEBUG_FINISH_PREFIX "> StartAsync(%p,%p)\n",
812                           fb, stack));
813  fb->count ++;
814  DEBUG_FINISH(DEBUGPRINTF(DEBUG_FINISH_PREFIX "< StartAsync count now %d\n",
815                           (int)fb->count));
816#ifndef NDEBUG
817  PTS()->asyncCallsStarted ++;
818#endif
819}
820
821void _thc_endasync(void *f, void *s) {
822  finish_t *fb = (finish_t*)f;
823  PTState_t *pts = PTS();
824#ifndef NDEBUG
825  pts->asyncCallsEnded ++;
826#endif
827  DEBUG_FINISH(DEBUGPRINTF(DEBUG_FINISH_PREFIX "> EndAsync(%p,%p)\n",
828                           fb, s));
829  assert(fb->count > 0);
830  fb->count --;
831  DEBUG_FINISH(DEBUGPRINTF(DEBUG_FINISH_PREFIX "  count now %d\n",
832                           (int)fb->count));
833  assert(pts->pendingFree == NULL);
834
835#ifdef CONFIG_LAZY_THC
836  assert(__builtin_return_address(1) == NULL); /* Should have been nulled */
837  /* Check whether we are running on a lazy stack, and can dispose of it */
838  if (pts->curr_lazy_stack && s != fb->old_sp) {
839      check_lazy_stack_finished(pts, s);
840  }
841#else // Eager AWE
842  pts->pendingFree = s;
843#endif // CONFIG_LAZY_THC
844
845  if (fb->count == 0) {
846    if (fb -> finish_awe) {
847      DEBUG_FINISH(DEBUGPRINTF(DEBUG_FINISH_PREFIX "  waiting AWE %p\n",
848                               fb->finish_awe));
849      thc_schedule_local(fb -> finish_awe);
850      fb -> finish_awe = NULL;
851    }
852  }
853
854  DEBUG_FINISH(DEBUGPRINTF(DEBUG_FINISH_PREFIX "< EndAsync\n"));
855  thc_dispatch(pts);
856  NOT_REACHED;
857}
858
859/***********************************************************************/
860
861// Operations for use by application code
862
863void THCDumpStats(int clear_stats) {
864  thc_print_pts_stats(PTS(), clear_stats);
865}
866
867void THCIncSendCount(void) {
868#ifndef NDEBUG
869  if (PTS() != NULL) {
870    PTS()->sendCount++;
871  }
872#endif
873}
874
875void THCIncRecvCount(void) {
876#ifndef NDEBUG
877  if (PTS() != NULL) {
878    PTS()->recvCount++;
879  }
880#endif
881}
882
883__attribute__ ((unused))
884static void thc_yield_with_cont(void *a, void *arg) {
885  DEBUG_YIELD(DEBUGPRINTF(DEBUG_YIELD_PREFIX "! %p (%p,%p,%p) yield\n",
886                          a,
887                          ((awe_t*)a)->eip,
888                          ((awe_t*)a)->ebp,
889                          ((awe_t*)a)->esp));
890  awe_t *awe = (awe_t*)a;
891  awe->lazy_stack = awe->pts->curr_lazy_stack;
892  // check if we have yielded within a lazy awe
893  check_for_lazy_awe(awe->ebp);
894  THCScheduleBack(awe);
895  thc_dispatch(awe->pts);
896}
897
898void THCYield(void) {
899  CALL_CONT_LAZY((void*)&thc_yield_with_cont, NULL);
900}
901
902__attribute__ ((unused))
903static void thc_yieldto_with_cont(void *a, void *arg) {
904  DEBUG_YIELD(DEBUGPRINTF(DEBUG_YIELD_PREFIX "! %p (%p,%p,%p) yield\n",
905                          a,
906                          ((awe_t*)a)->eip,
907                          ((awe_t*)a)->ebp,
908                          ((awe_t*)a)->esp));
909  awe_t *last_awe = (awe_t*)a;
910
911  last_awe->lazy_stack = last_awe->pts->curr_lazy_stack;
912  // check if we have yielded within a lazy awe
913  check_for_lazy_awe(last_awe->ebp);
914
915  THCScheduleBack(last_awe);
916  awe_t *awe = (awe_t *)arg;
917#ifndef NDEBUG
918  PTS()->aweResumed++;
919#endif
920
921  awe->pts->curr_lazy_stack = awe->lazy_stack;
922  awe->pts->current_fb = awe->current_fb;
923
924  thc_awe_execute_0(awe);
925}
926
927void THCYieldTo(awe_t *awe_ptr) {
928  if (PTS() == awe_ptr->pts) {
929    CALL_CONT_LAZY((void*)&thc_yieldto_with_cont, (void*)awe_ptr);
930  } else {
931    THCSchedule(awe_ptr);
932  }
933}
934
935void THCFinish(void) {
936  thc_dispatch(PTS());
937}
938
939__attribute__ ((unused))
940static void thc_suspend_with_cont(void *a, void *arg) {
941  DEBUG_YIELD(DEBUGPRINTF(DEBUG_YIELD_PREFIX "! %p (%p,%p,%p) wait\n",
942                          a,
943                          ((awe_t*)a)->eip,
944                          ((awe_t*)a)->ebp,
945                          ((awe_t*)a)->esp));
946  *(void**)arg = a;  awe_t *awe = (awe_t*)a;
947  awe->lazy_stack = awe->pts->curr_lazy_stack;
948  // check if we have yielded within a lazy awe
949  check_for_lazy_awe(awe->ebp);
950  thc_dispatch(awe->pts);
951}
952
953void THCSuspend(awe_t **awe_ptr_ptr) {
954  CALL_CONT_LAZY(&thc_suspend_with_cont, awe_ptr_ptr);
955}
956
957typedef struct {
958  awe_t       **awe_addr;
959  THCThenFn_t   then_fn;
960  void         *then_arg;
961} then_args_t;
962
963__attribute__ ((unused))
964static void thc_suspendthen_with_cont(void *a, void *arg) {
965  then_args_t *ta = (then_args_t*)arg;
966
967  DEBUG_YIELD(DEBUGPRINTF(DEBUG_YIELD_PREFIX "! %p (%p,%p,%p) waitthen\n",
968                          a,
969                          ((awe_t*)a)->eip,
970                          ((awe_t*)a)->ebp,
971                          ((awe_t*)a)->esp));
972  *(void**)(ta->awe_addr) = a;
973  ta->then_fn(ta->then_arg);
974
975  awe_t *awe = (awe_t*)a;
976  awe->lazy_stack = awe->pts->curr_lazy_stack;
977  // check if we have yielded within a lazy awe
978  check_for_lazy_awe(awe->ebp);
979  thc_dispatch(awe->pts);
980}
981
982void THCSuspendThen(awe_t **awe_ptr_ptr, THCThenFn_t fn, void *arg) {
983  then_args_t t;
984  t.awe_addr = awe_ptr_ptr;
985  t.then_fn = fn;
986  t.then_arg = arg;
987  CALL_CONT_LAZY((void*)&thc_suspendthen_with_cont, &t);
988}
989
990// Add the supplied AWE to the dispatch queue
991//
992// By default we add to the head.  This means that in the implementation
993// of "X ; async { Y } ; Z" we will run X;Y;Z in sequence (assuming that
994// Y does not block).  This relies on Z being put at the head of the
995// queue.
996
997static inline void thc_schedule_local(awe_t *awe) {
998  PTState_t *awe_pts;
999  DEBUG_AWE(DEBUGPRINTF(DEBUG_AWE_PREFIX "> THCSchedule(%p)\n",
1000                        awe));
1001  awe_pts = awe->pts;
1002  awe->prev = &(awe_pts->aweHead);
1003  awe->next = awe_pts->aweHead.next;
1004  awe_pts->aweHead.next->prev = awe;
1005  awe_pts->aweHead.next = awe;
1006  DEBUG_AWE(DEBUGPRINTF(DEBUG_AWE_PREFIX "  added AWE between %p %p\n",
1007                        awe->prev, awe->next));
1008  DEBUG_AWE(DEBUGPRINTF(DEBUG_AWE_PREFIX "< THCSchedule\n"));
1009}
1010
1011void THCSchedule(awe_t *awe) {
1012  PTState_t *awe_pts;
1013  DEBUG_AWE(DEBUGPRINTF(DEBUG_AWE_PREFIX "> THCSchedule(%p)\n",
1014                        awe));
1015  awe_pts = awe->pts;
1016  if (awe_pts == PTS()) {
1017    // Work is for us
1018    awe->prev = &(awe_pts->aweHead);
1019    awe->next = awe_pts->aweHead.next;
1020    awe_pts->aweHead.next->prev = awe;
1021    awe_pts->aweHead.next = awe;
1022  } else {
1023    // Work is remote
1024    thc_pts_lock(awe_pts);
1025    awe->prev = &(awe_pts->aweRemoteHead);
1026    awe->next = awe_pts->aweRemoteHead.next;
1027    awe_pts->aweRemoteHead.next->prev = awe;
1028    awe_pts->aweRemoteHead.next = awe;
1029    thc_pts_unlock(awe_pts);
1030  }
1031  DEBUG_AWE(DEBUGPRINTF(DEBUG_AWE_PREFIX "  added AWE between %p %p\n",
1032                        awe->prev, awe->next));
1033  DEBUG_AWE(DEBUGPRINTF(DEBUG_AWE_PREFIX "< THCSchedule\n"));
1034}
1035
1036// Add the supplied AWE to the tail of the dispatch queue (for THCYield)
1037
1038void THCScheduleBack(awe_t *awe) {
1039  PTState_t *awe_pts = awe->pts;
1040  DEBUG_AWE(DEBUGPRINTF(DEBUG_AWE_PREFIX "> THCSchedule(%p)\n",
1041                        awe));
1042  assert(awe_pts == PTS());
1043  awe_pts = awe->pts;
1044  awe->prev = awe_pts->aweTail.prev;
1045  awe->next = &(awe_pts->aweTail);
1046  awe_pts->aweTail.prev->next = awe;
1047  awe_pts->aweTail.prev = awe;
1048  DEBUG_AWE(DEBUGPRINTF(DEBUG_AWE_PREFIX "  added AWE between %p %p\n",
1049                        awe->prev, awe->next));
1050  DEBUG_AWE(DEBUGPRINTF(DEBUG_AWE_PREFIX "< THCSchedule\n"));
1051}
1052
1053void THCAddCancelItem(cancel_item_t *ci, THCCancelFn_t fn, void *arg) {
1054  PTState_t *pts = PTS();
1055  DEBUG_CANCEL(DEBUGPRINTF(DEBUG_CANCEL_PREFIX "> THCAddCancelItem(%p)\n", ci));
1056  ci->fn = fn;
1057  ci->arg = arg;
1058  ci->was_run = 0;
1059  finish_t *fb = pts->current_fb;
1060  assert(fb != NULL && "Current fb NULL");
1061  DEBUG_CANCEL(DEBUGPRINTF(DEBUG_CANCEL_PREFIX "  FB %p\n", fb));
1062  ci->next = fb->cancel_item;
1063  fb->cancel_item = ci;
1064#ifndef NDEBUG
1065  PTS()->cancelsAdded ++;
1066#endif
1067  DEBUG_CANCEL(DEBUGPRINTF(DEBUG_CANCEL_PREFIX "< THCAddCancelItem(%p)\n", ci));
1068}
1069
1070void THCRemoveCancelItem(cancel_item_t *ci) {
1071  PTState_t *pts = PTS();
1072  finish_t *fb = pts->current_fb;
1073  DEBUG_CANCEL(DEBUGPRINTF(DEBUG_CANCEL_PREFIX "> THCRemoveCancelItem(%p) from FB %p\n",
1074                           ci, fb));
1075  assert(fb != NULL && "Current fb NULL");
1076  assert(!ci->was_run);
1077  cancel_item_t **cip = &(fb->cancel_item);
1078  while (*cip != NULL && *cip != ci) {
1079    cip = &((*cip)->next);
1080  }
1081  assert(*cip != NULL && "Cancel-item not found during remove");
1082  *cip = ci->next;
1083#ifndef NDEBUG
1084  PTS()->cancelsRemoved ++;
1085#endif
1086  DEBUG_CANCEL(DEBUGPRINTF(DEBUG_CANCEL_PREFIX "< THCRemoveCancelItem(%p)\n", ci));
1087}
1088
1089int THCCancelItemRan(cancel_item_t *ci) {
1090  DEBUG_CANCEL(DEBUGPRINTF(DEBUG_CANCEL_PREFIX "> THCCancelItemRan(%p) = %d\n",
1091                           ci, ci->was_run));
1092  return ci->was_run;
1093}
1094
1095int THCIsCancelRequested(void) {
1096  PTState_t *pts = PTS();
1097  finish_t *fb = pts->current_fb;
1098  DEBUG_CANCEL(DEBUGPRINTF(DEBUG_CANCEL_PREFIX "> THCIsCancelRequested()\n"));
1099  DEBUG_CANCEL(DEBUGPRINTF(DEBUG_CANCEL_PREFIX "  FB %p\n", fb));
1100  int result = fb->cancel_requested;
1101  DEBUG_CANCEL(DEBUGPRINTF(DEBUG_CANCEL_PREFIX "< THCIsCancelRequested()=%d\n", result));
1102  return result;
1103}
1104
1105#if 0
1106int THCRun(THCFn_t fn,
1107           void *args,
1108           THCIdleFn_t idle_fn,
1109           void *idle_args) {
1110  thc_start_rts();
1111  PTS()->idle_fn = idle_fn;
1112  PTS()->idle_args = idle_args;
1113  PTS()->idle_stack = NULL;
1114  int r = fn(args);
1115  thc_end_rts();
1116  return r;
1117}
1118#endif
1119
1120/**********************************************************************/
1121
1122// Start-of-day code for Barrelfish, where we initialize THC before
1123// entry to main.
1124
1125static int idle_ct = 0;
1126
1127static void IdleFn(void *arg) {
1128  int me = ++idle_ct;
1129  struct waitset *ws = get_default_waitset();
1130  PTState_t *pts = PTS();
1131
1132  while (!pts->shouldExit) {
1133    // Block for the next event to occur
1134    errval_t err = event_dispatch(ws);
1135    if (err_is_fail(err)) {
1136      assert(0 && "event_dispatch failed in THC idle function");
1137      abort();
1138    }
1139
1140    // Exit if a new idle loop has started (this will happen
1141    // if the handler called from event_dispatch blocks, e.g.,
1142    // in the bottom-half of a THC receive function)
1143    if (me != idle_ct) {
1144      break;
1145    }
1146
1147    // Yield while some real work is now available
1148    while (pts->aweHead.next != &pts->aweTail &&
1149           !pts->shouldExit) {
1150      THCYield();
1151    }
1152  }
1153}
1154
1155__attribute__((constructor))
1156static void thc_init(void) {
1157  thc_start_rts();
1158  PTS()->idle_fn = IdleFn;
1159  PTS()->idle_args = NULL;
1160  PTS()->idle_stack = NULL;
1161}
1162
1163__attribute__((destructor))
1164static void thc_done(void) {
1165  thc_end_rts();
1166}
1167
1168//struct run_args {
1169//  int argc;
1170//  char **argv;
1171//};
1172//
1173//static int thcmain_wrapper(void *st) {
1174//  struct run_args *ra = (struct run_args *) st;
1175//  return thcmain(ra->argc, ra->argv);
1176//}
1177//
1178//int main(int argc, char *argv[])
1179//{
1180//  struct run_args ra;
1181//  ra.argc = argc;
1182//  ra.argv = argv;
1183//  return THCRun(thcmain_wrapper, &ra, IdleFn, NULL);
1184//}
1185
1186
1187/**********************************************************************/
1188
1189// Arch-OS specific code
1190
1191// 1. Stack allocation
1192
1193#if defined(WINDOWS) || defined(__CYGWIN__)
1194#include <Windows.h>
1195static void error_exit(LPTSTR lpszFunction)
1196{
1197    // Retrieve the system error message for the last-error code
1198    LPVOID lpMsgBuf;
1199    LPVOID lpDisplayBuf;
1200    DWORD dw = GetLastError();
1201
1202    FormatMessage(
1203        FORMAT_MESSAGE_ALLOCATE_BUFFER |
1204        FORMAT_MESSAGE_FROM_SYSTEM |
1205        FORMAT_MESSAGE_IGNORE_INSERTS,
1206        NULL,
1207        dw,
1208        MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
1209        (LPTSTR) &lpMsgBuf,
1210        0, NULL );
1211
1212    // Display the error message and exit the process
1213    printf("%s failed with error %ld:%s", lpszFunction, dw, lpMsgBuf);
1214    ExitProcess(dw);
1215}
1216
1217static void *thc_alloc_new_stack_0(void) {
1218  void *res = VirtualAlloc(NULL,
1219                           STACK_COMMIT_BYTES + STACK_GUARD_BYTES,
1220                           MEM_RESERVE,
1221                           PAGE_NOACCESS);
1222  if (!res) {
1223    error_exit(TEXT("VirtualAlloc(MEM_RESERVE)"));
1224  }
1225  DEBUG_STACK(DEBUGPRINTF(DEBUG_STACK_PREFIX "  Reserved %p..%p\n",
1226                          res, res+STACK_COMMIT_BYTES+STACK_GUARD_BYTES));
1227  void *com = VirtualAlloc(res + STACK_GUARD_BYTES,
1228                           STACK_COMMIT_BYTES,
1229                           MEM_COMMIT,
1230                           PAGE_READWRITE);
1231  if (!com) {
1232    error_exit(TEXT("VirtualAlloc(MEM_COMMIT)"));
1233  }
1234  DEBUG_STACK(DEBUGPRINTF(DEBUG_STACK_PREFIX "  Committed %p..%p\n",
1235                          com, com+STACK_COMMIT_BYTES));
1236  void *result = com + STACK_COMMIT_BYTES;
1237  return result;
1238}
1239#elif defined(linux)
1240#include <sys/mman.h>
1241#include <errno.h>
1242
1243static void *thc_alloc_new_stack_0(void) {
1244  void *res = mmap(NULL,
1245                   STACK_COMMIT_BYTES + STACK_GUARD_BYTES,
1246                   PROT_READ | PROT_WRITE,
1247                   MAP_PRIVATE | MAP_ANONYMOUS,
1248                   0, 0);
1249  if (!res) {
1250    fprintf(stderr, "URK: mmap returned errno=%d\n", errno);
1251    exit(-1);
1252  }
1253
1254  DEBUG_STACK(DEBUGPRINTF(DEBUG_STACK_PREFIX "  mmap %p..%p\n",
1255                          res, res+STACK_COMMIT_BYTES+STACK_GUARD_BYTES));
1256
1257  int r = mprotect(res, STACK_GUARD_BYTES, PROT_NONE);
1258  if (r) {
1259    fprintf(stderr, "URK: mprotect returned errno=%d\n", errno);
1260    exit(-1);
1261  }
1262
1263  res += STACK_GUARD_BYTES + STACK_COMMIT_BYTES;
1264  return res;
1265}
1266#elif defined(BARRELFISH)
1267static void *thc_alloc_new_stack_0(void) {
1268  char *res = malloc(STACK_COMMIT_BYTES+STACK_GUARD_BYTES);
1269  if (!res) {
1270    printf("URK: malloc failed\n");
1271    exit(-1);
1272  }
1273
1274  for (int i = 0; i < STACK_GUARD_BYTES; i ++) {
1275    res[i] = 0xff;
1276  }
1277
1278  //printf("Warning: stack %p..%p has no guard page\n",
1279  //       res + STACK_GUARD_BYTES, res + STACK_GUARD_BYTES + STACK_COMMIT_BYTES);
1280
1281  return res + STACK_GUARD_BYTES + STACK_COMMIT_BYTES;
1282}
1283#else
1284#error No definition for _thc_alloc_new_stack_0
1285#endif
1286
1287/***********************************************************************/
1288
1289// 3. AWE execution
1290//
1291// These functions are particularly delicate:
1292//
1293// (a) The _thc_schedulecont and _thc_callcont functions are called
1294//     with a pointer to an awe_t which has been alloca'd on the
1295//     caller's stack frame.  Aside from the stack/frame-pointers,
1296//     the caller is responsible for saving any registers that may
1297//     be live at the point of the call (including those which are
1298//     conventionally callee-save).  The _thc_schedulecont and
1299//     _thc_callcont functions initialize the AWE with the
1300//     stack/frame-pointer values for when the call returns, and
1301//     initializing the saved EIP with the instruction immediately
1302//     after that call.
1303//
1304// (b) A call to _thc_schedulecont returns normally with 0.
1305//
1306// (c) When an AWE is executed, the stack/frame-pointers are restored
1307//     and the register used for return values (e.g., EAX) is
1308//     initialized to non-0.
1309
1310#if (defined(__x86_64__) && (defined(linux) || defined(BARRELFISH)))
1311/*
1312            static void thc_awe_execute_0(awe_t *awe)    // rdi
1313*/
1314__asm__ ("      .text \n\t"
1315         "      .align  16                 \n\t"
1316         "thc_awe_execute_0:               \n\t"
1317         " mov 8(%rdi), %rbp               \n\t"
1318         " mov 16(%rdi), %rsp              \n\t"
1319         " subq $8, %rsp                   \n\t"
1320         " jmp *0(%rdi)                    \n\t");
1321
1322/*
1323           int _thc_schedulecont(awe_t *awe)   // rdi
1324*/
1325
1326__asm__ ("      .text \n\t"
1327         "      .align  16           \n\t"
1328         "      .globl  _thc_schedulecont \n\t"
1329         "      .type   _thc_schedulecont, @function \n\t"
1330         "_thc_schedulecont:         \n\t"
1331         " mov  0(%rsp), %rsi        \n\t"
1332         " mov  %rsi,  0(%rdi)       \n\t" // EIP   (our return address)
1333         " mov  %rbp,  8(%rdi)       \n\t" // EBP
1334         " mov  %rsp, 16(%rdi)       \n\t" // ESP+8 (after return)
1335         " addq $8,   16(%rdi)       \n\t"
1336         // AWE now initialized.  Call C function for scheduling.
1337         // It will return normally to us.  The AWE will resume
1338         // directly in our caller.
1339         " call _thc_schedulecont_c  \n\t"  // AWE still in rdi
1340         " movq $0, %rax             \n\t"
1341         " ret                       \n\t");
1342
1343/*
1344           void _thc_callcont(awe_t *awe,   // rdi
1345                   THCContFn_t fn,          // rsi
1346                   void *args) {            // rdx
1347*/
1348
1349__asm__ ("      .text \n\t"
1350         "      .align  16           \n\t"
1351         "      .globl  _thc_callcont \n\t"
1352         "      .type   _thc_callcont, @function \n\t"
1353         "_thc_callcont:             \n\t"
1354         " mov  0(%rsp), %rax        \n\t"
1355         " mov  %rax,  0(%rdi)       \n\t" // EIP (our return address)
1356         " mov  %rbp,  8(%rdi)       \n\t" // EBP
1357         " mov  %rsp, 16(%rdi)       \n\t" // ESP+8 (after return)
1358         " addq $16,   16(%rdi)       \n\t"
1359         // AWE now initialized.  Call into C for the rest.
1360         // rdi : AWE , rsi : fn , rdx : args
1361         " subq $8, %rsp             \n\t" // align the stack
1362         " call _thc_callcont_c      \n\t"
1363         " int3\n\t");
1364
1365/*
1366            static void _thc_lazy_awe_marker()
1367*/
1368
1369__asm__ ("      .text \n\t"
1370         "      .align  16            \n\t"
1371         "      .globl  _thc_lazy_awe \n\t"
1372         "      .globl  _thc_lazy_awe_marker \n\t"
1373	 " _thc_lazy_awe:            \n\t" /* This is for debugging so we get */
1374         " nop                       \n\t" /* a sensible call stack           */
1375	 " _thc_lazy_awe_marker:     \n\t"
1376	 " int3                      \n\t" /* should never be called */
1377	 );
1378
1379#elif (defined(__i386__) && (defined(linux) || defined(BARRELFISH)))
1380
1381/*
1382            static void thc_awe_execute_0(awe_t *awe)    // 4
1383*/
1384
1385__asm__ ("      .text                     \n\t"
1386         "      .align  16                \n\t"
1387         "      .globl  thc_awe_execute_0 \n\t"
1388         "thc_awe_execute_0:              \n\t"
1389         " mov 4(%esp), %eax              \n\t"
1390         " mov 4(%eax), %ebp              \n\t"
1391         " mov 8(%eax), %esp              \n\t"
1392         " jmp *0(%eax)                   \n\t");
1393
1394/*
1395           int _thc_schedulecont(awe_t *awe)   // 4
1396*/
1397
1398__asm__ ("      .text                     \n\t"
1399         "      .align  16           \n\t"
1400         "      .globl  _thc_schedulecont \n\t"
1401         "_thc_schedulecont:         \n\t"
1402         " movl 4(%esp), %eax        \n\t"
1403         " movl 0(%esp), %esi        \n\t"
1404         " movl %esi,  0(%eax)       \n\t" // EIP   (our return address)
1405         " movl %ebp,  4(%eax)       \n\t" // EBP
1406         " movl %esp,  8(%eax)       \n\t" // ESP+4 (after return)
1407         " addl $4,    8(%eax)       \n\t"
1408         // AWE now initialized.  Call C function for scheduling.
1409         // It will return normally to us.  The AWE will resume
1410         // directly in our caller.
1411         " pushl %eax                \n\t"
1412         " call _thc_schedulecont_c  \n\t"
1413         " popl %eax                 \n\t"
1414         " movl $0, %eax             \n\t"
1415         " ret                       \n\t");
1416
1417/*
1418           void _thc_callcont(awe_t *awe,   // 4
1419                   THCContFn_t fn,          // 8
1420                   void *args) {            // 12
1421*/
1422
1423__asm__ ("      .text                     \n\t"
1424         "      .align  16           \n\t"
1425         "      .globl  _thc_callcont \n\t"
1426         "_thc_callcont:             \n\t"
1427         " movl 4(%esp), %eax        \n\t"
1428         " movl 0(%esp), %esi        \n\t"
1429         " movl %esi, 0(%eax)        \n\t" // EIP (our return address)
1430         " movl %ebp, 4(%eax)        \n\t" // EBP
1431         " movl %esp, 8(%eax)        \n\t" // ESP
1432         " addl $4, 8(%eax)          \n\t"
1433         // AWE now initialized.  Call into C for the rest.
1434         " movl 8(%esp), %edi        \n\t" // fn
1435         " movl 12(%esp), %esi       \n\t" // args
1436         // Set up stack frame for callee:
1437         " pushl %esi                \n\t"
1438         " pushl %edi                \n\t"
1439         " pushl %eax                \n\t"
1440         " call _thc_callcont_c      \n\t"
1441         " int3\n\t");
1442
1443/*
1444            static void _thc_lazy_awe_marker()
1445*/
1446
1447__asm__ ("      .text \n\t"
1448         "      .align  16            \n\t"
1449         "      .globl  _thc_lazy_awe \n\t"
1450         "      .globl  _thc_lazy_awe_marker \n\t"
1451	 " _thc_lazy_awe:            \n\t" /* This is for debugging so we get */
1452         " nop                       \n\t" /* a sensible call stack           */
1453	 " _thc_lazy_awe_marker:     \n\t"
1454	 " int3                      \n\t" /* should never be called */
1455	 );
1456
1457#elif (defined(__i386__) && (defined(WINDOWS) || defined(__CYGWIN__)))
1458
1459/*
1460            static void thc_awe_execute_0(awe_t *awe)    // 4
1461*/
1462
1463__asm__ ("      .text                     \n\t"
1464         "      .align  16                 \n\t"
1465         "      .globl  _thc_awe_execute_0 \n\t"
1466         "_thc_awe_execute_0:              \n\t"
1467         " mov 4(%esp), %eax               \n\t"
1468         " mov 4(%eax), %ebp               \n\t"
1469         " mov 8(%eax), %esp               \n\t"
1470         " jmp *0(%eax)                    \n\t");
1471
1472/*
1473           int _thc_schedulecont(awe_t *awe)   // 4
1474*/
1475
1476__asm__ ("      .text                     \n\t"
1477         "      .align  16           \n\t"
1478         "      .globl  __thc_schedulecont \n\t"
1479         "__thc_schedulecont:        \n\t"
1480         " movl 4(%esp), %eax        \n\t"
1481         " movl 0(%esp), %esi        \n\t"
1482         " movl %esi,  0(%eax)       \n\t" // EIP   (our return address)
1483         " movl %ebp,  4(%eax)       \n\t" // EBP
1484         " movl %esp,  8(%eax)       \n\t" // ESP+4 (after return)
1485         " addl $4,    8(%eax)       \n\t"
1486         // AWE now initialized.  Call C function for scheduling.
1487         // It will return normally to us.  The AWE will resume
1488         // directly in our caller.
1489         " pushl %eax                \n\t"
1490         " call __thc_schedulecont_c \n\t"
1491         " popl %eax                 \n\t"
1492         " movl $0, %eax             \n\t"
1493         " ret                       \n\t");
1494
1495/*
1496           void _thc_callcont(awe_t *awe,   // 4
1497                   THCContFn_t fn,          // 8
1498                   void *args) {            // 12
1499*/
1500
1501__asm__ ("      .text                     \n\t"
1502         "      .align  16           \n\t"
1503         "      .globl  __thc_callcont \n\t"
1504         "__thc_callcont:            \n\t"
1505         " movl 4(%esp), %eax        \n\t"
1506         " movl 0(%esp), %esi        \n\t"
1507         " movl %esi, 0(%eax)        \n\t" // EIP (our return address)
1508         " movl %ebp, 4(%eax)        \n\t" // EBP
1509         " movl %esp, 8(%eax)        \n\t" // ESP
1510         " addl $4, 8(%eax)          \n\t"
1511         // AWE now initialized.  Call into C for the rest.
1512         " movl 8(%esp), %edi        \n\t" // fn
1513         " movl 12(%esp), %esi       \n\t" // args
1514         // Set up stack frame for callee:
1515         " pushl %esi                \n\t"
1516         " pushl %edi                \n\t"
1517         " pushl %eax                \n\t"
1518         " call __thc_callcont_c     \n\t"
1519         " int3\n\t");
1520
1521/*
1522            static void _thc_lazy_awe_marker()
1523*/
1524
1525__asm__ ("      .text \n\t"
1526         "      .align  16            \n\t"
1527         "      .globl  _thc_lazy_awe \n\t"
1528         "      .globl  _thc_lazy_awe_marker \n\t"
1529	 " _thc_lazy_awe:            \n\t" /* This is for debugging so we get */
1530         " nop                       \n\t" /* a sensible call stack           */
1531	 " _thc_lazy_awe_marker:     \n\t"
1532	 " int3                      \n\t" /* should never be called */
1533	 );
1534
1535#elif (defined(__arm__) && (defined(linux) || defined(BARRELFISH) \
1536                        && !defined(__ARM_ARCH_7M__)))
1537// NOTES:
1538//  - not sure about alignment (.align)
1539
1540/*
1541            static void thc_awe_execute_0(awe_t *awe)    // r0
1542*/
1543
1544__asm__ (" .text              \n\t"
1545         " .align  2          \n\t"
1546         "thc_awe_execute_0:  \n\t"
1547         " ldr sp, [r0, #8]   \n\t" // sp = awe->esp (stack pointer)
1548         " ldr fp, [r0, #4]   \n\t" // fp = awe->ebp (frame pointer)
1549         " ldr pc, [r0, #0]   \n\t" // pc = awe->eip (jump / pc)
1550);
1551
1552/*
1553           int _thc_schedulecont(awe_t *awe)   // r0
1554*/
1555
1556__asm__ (" .text                    \n\t"
1557         " .align  2                \n\t"
1558         " .globl _thc_schedulecont \n\t"
1559         " .type _thc_schedulecont, %function \n\t"
1560         "_thc_schedulecont:  \n\t"
1561         // save fp, sp, lr in stack (similarly to what gcc does)
1562         // from ARM Architecutre Reference Manual ARMv7-A and ARMv7-R
1563         // PUSH (A8-248):
1564         // "The SP and PC can be in the list in ARM code, but not in Thumb
1565         //  code. However, ARM instructions that include the SP or the PC in
1566         //  the list are deprecated, and if the SP is in the list, the value
1567         //  the instruction stores for the SP is UNKNOWN."
1568         " mov ip, sp         \n\t"
1569         " push {fp, ip, lr}  \n\t"
1570         // set awe
1571         " str lr, [r0, #0]   \n\t" // awe->eip = lr (return address)
1572         " str fp, [r0, #4]   \n\t" // awe->ebp = fp (frame pointer)
1573         " str sp, [r0, #8]   \n\t" // awe->esp = sp (stack pointer)
1574         // Call C function void _thc_schedulecont_c(awe_t *awe)
1575         // awe still in r0
1576         " bl _thc_schedulecont_c \n\t"
1577         // return 0
1578         "mov r0, #0 \n\t"
1579         // restore saved state. We return by restoring lr in the pc
1580         " ldm sp, {fp, sp, pc} \n\t"
1581);
1582
1583/*
1584           __attribute__((returns_twice)) void
1585           void _thc_callcont(awe_t *awe,   // r0
1586                   THCContFn_t fn,          // r1
1587                   void *args) {            // r2
1588*/
1589
1590__asm__ (" .text                          \n\t"
1591         " .align  2                      \n\t"
1592         " .globl _thc_callcont           \n\t"
1593         " .type _thc_callcont, %function \n\t"
1594         "_thc_callcont:                  \n\t"
1595         // set  awe
1596         " str lr, [r0, #0]   \n\t" // awe->eip = lr (return address)
1597         " str fp, [r0, #4]   \n\t" // awe->ebp = fp (frame pointer)
1598         " str sp, [r0, #8]   \n\t" // awe->esp = sp (stack pointer)
1599         // AWE now initialized.  Call into C for the rest.
1600         // r0 : AWE , r1 : fn , r2 : args
1601         " bl _thc_callcont_c\n\t"
1602         // hopefully a fault (x86 does int3)
1603         " mov r0, #0xffffffff \n\t"
1604         " ldr r0, [r0] \n\t"
1605);
1606#elif (defined(__aarch64__) && (defined(linux) || defined(BARRELFISH)))
1607// NOTES:
1608//  - not sure about alignment (.align)
1609
1610/*
1611            static void thc_awe_execute_0(awe_t *awe)    // r0
1612*/
1613
1614__asm__ (" .text              \n\t"
1615         " .align  6          \n\t"
1616         "thc_awe_execute_0:  \n\t"
1617		 " ldr x29, [x0, #16]  \n\t"
1618         " mov sp,  x29        \n\t" // sp = awe->esp (stack pointer)
1619         " ldr x29, [x0, #8]   \n\t" // fp = awe->ebp (frame pointer)
1620         " ldr x30, [x0, #0]  \n\t" // pc = awe->eip (jump / pc)
1621		 " br x30			  \n\t"
1622);
1623
1624/*
1625           int _thc_schedulecont(awe_t *awe)   // r0
1626*/
1627
1628__asm__ (" .text                    \n\t"
1629         " .align  6                \n\t"
1630         " .globl _thc_schedulecont \n\t"
1631         " .type _thc_schedulecont, %function \n\t"
1632         "_thc_schedulecont:  \n\t"
1633         // save fp, sp, lr in stack (similarly to what gcc does)
1634         // from ARM Architecutre Reference Manual ARMv7-A and ARMv7-R
1635         // PUSH (A8-248):
1636         // "The SP and PC can be in the list in ARM code, but not in Thumb
1637         //  code. However, ARM instructions that include the SP or the PC in
1638         //  the list are deprecated, and if the SP is in the list, the value
1639         //  the instruction stores for the SP is UNKNOWN."
1640         " mov  x28, sp       \n\t"
1641		 " sub  sp, sp, #24		  \n\t"
1642		 " str  x29, [sp, #0] \n\t"
1643		 " str  x30, [sp, #8]   \n\t" // awe->esp = sp (stack pointer)
1644		 " str  x28, [sp, #16] \n\t"
1645
1646         // set awe
1647         " str x30, [x0, #0]   \n\t" // awe->eip = lr (return address)
1648         " str x29, [x0, #8]   \n\t" // awe->ebp = fp (frame pointer)
1649		 " mov x28, sp		  \n\t"
1650         " str x28, [x0, #16]   \n\t" // awe->esp = sp (stack pointer)
1651         // Call C function void _thc_schedulecont_c(awe_t *awe)
1652         // awe still in r0
1653         " bl _thc_schedulecont_c \n\t"
1654         // return 0
1655         " mov x0, #0 \n\t"
1656         // restore saved state. We return by restoring lr in the pc
1657		 " ldr x29, [sp], #8	\n\t"
1658		 " ldr x30, [sp], #8    \n\t"
1659		 " ldr x28, [sp], #8    \n\t"
1660		 " mov sp, x28			\n\t"
1661		 " ret          \n\t"
1662);
1663
1664/*
1665           __attribute__((returns_twice)) void
1666           void _thc_callcont(awe_t *awe,   // r0
1667                   THCContFn_t fn,          // r1
1668                   void *args) {            // r2
1669*/
1670
1671__asm__ (" .text                          \n\t"
1672         " .align  6                      \n\t"
1673         " .globl _thc_callcont           \n\t"
1674         " .type _thc_callcont, %function \n\t"
1675         "_thc_callcont:                  \n\t"
1676
1677		 // set  awe
1678         " mov  x28, sp        \n\t"
1679         " str  x30, [x0, #0]  \n\t" // awe->eip = lr (return address)
1680         " str  x29, [x0, #8]  \n\t" // awe->ebp = fp (frame pointer)
1681         " str  x28, [x0, #16] \n\t" // awe->esp = sp (stack pointer)
1682
1683		 // AWE now initialized.  Call into C for the rest.
1684         // r0 : AWE , r1 : fn , r2 : args
1685         " bl _thc_callcont_c\n\t"
1686         // hopefully a fault (x86 does int3)
1687         " mov x0, #0xffffffff \n\t"
1688         " ldr x0, [x0] \n\t"
1689);
1690
1691
1692#else
1693void thc_awe_execute_0(awe_t *awe) {
1694  assert(0 && "_thc_awe_execute_0 not implemented for this architecture");
1695}
1696
1697int _thc_schedulecont(awe_t *awe) {
1698  assert(0 && "_thc_schedulecont not implemented for this architecture");
1699  return 0;
1700}
1701
1702void _thc_callcont(awe_t *awe,
1703                   THCContFn_t fn,
1704                   void *args) {
1705  assert(0 && "_thc_callcont not implemented for this architecture");
1706}
1707
1708void _thc_lazy_awe_marker(void) {
1709  assert(0 && "_thc_lazy_awe_marker not implemented for this architecture");
1710}
1711#endif
1712
1713/***********************************************************************/
1714
1715// 4. Per-thread state
1716
1717#if defined(WINDOWS) || defined(__CYGWIN__)
1718volatile int TlsInitLatch = 0;
1719volatile DWORD TlsIndex = 0;
1720
1721static PTState_t *thc_get_pts_0(void) {
1722  if (!TlsIndex) {
1723    do {
1724      if (__sync_bool_compare_and_swap(&TlsInitLatch, 0, 1)) {
1725        break;
1726      }
1727    } while (1);
1728    if (!TlsIndex) {
1729      TlsIndex = TlsAlloc();
1730      if (TlsIndex == TLS_OUT_OF_INDEXES) {
1731        error_exit("TlsAlloc failed");
1732      }
1733    }
1734    TlsInitLatch = 0;
1735  }
1736
1737  return (PTState_t *) (TlsGetValue(TlsIndex));
1738}
1739
1740static void thc_set_pts_0(PTState_t *st) {
1741  if (!TlsIndex) {
1742    DWORD index = TlsAlloc();
1743    if (index == TLS_OUT_OF_INDEXES) {
1744      error_exit("TlsAlloc failed");
1745    }
1746  }
1747
1748  if (!TlsSetValue(TlsIndex, st)) {
1749    error_exit("TlsSetValue failed");
1750  }
1751}
1752#elif defined(BARRELFISH)
1753static PTState_t *thc_get_pts_0(void) {
1754  return (PTState_t*)thread_get_tls();
1755}
1756
1757static void thc_set_pts_0(PTState_t *st) {
1758  thread_set_tls((void*)st);
1759}
1760#elif defined(linux)
1761volatile int TlsInitLatch = 0;
1762volatile int TlsDoneInit = 0;
1763pthread_key_t TlsKey = 0;
1764
1765static PTState_t *thc_get_pts_0(void) {
1766  if (!TlsDoneInit) {
1767    do {
1768      if (__sync_bool_compare_and_swap(&TlsInitLatch, 0, 1)) {
1769        break;
1770      }
1771    } while (1);
1772    if (!TlsDoneInit) {
1773      int r = pthread_key_create(&TlsKey, NULL);
1774      assert((!r) && "pthread_key_create failed");
1775      TlsDoneInit = 1;
1776    }
1777    TlsInitLatch = 0;
1778  }
1779
1780  return (PTState_t *) (pthread_getspecific(TlsKey));
1781}
1782
1783static void thc_set_pts_0(PTState_t *st) {
1784  assert(TlsDoneInit);
1785  pthread_setspecific(TlsKey, (void*)st);
1786}
1787#else
1788#error No definition for thc_get_pts_0
1789#endif
1790
1791
1792/**********************************************************************/
1793