1/* Barrelfish THC language extensions */ 2 3/* 4 * Copyright (c) 2015, ETH Zurich. 5 * Copyright (c) 2015, Hewlett Packard Enterprise Development LP. 6 * All rights reserved. 7 * 8 * This file is distributed under the terms in the attached LICENSE file. 9 * If you do not find this file, copies can be found by writing to: 10 * ETH Zurich D-INFK, Universitaetstrasse 6, CH-8092 Zurich. Attn: Systems Group. 11 */ 12 13#ifndef _THC_INTERNAL_H_ 14#define _THC_INTERNAL_H_ 15 16/***********************************************************************/ 17 18typedef struct ptstate_t PTState_t; 19typedef struct stack_t stack_t; 20typedef struct finish_t finish_t; 21 22// Definition of an AWE, asynchronous work element. This definition must 23// match the assembly-language definitions at the bottom of thc.c which 24// access fields in the AWE structure. 25 26enum awe_status { 27 EAGER_AWE = 0, 28 LAZY_AWE, 29 NEEDS_LAZY_STACK, 30 ALLOCATED_LAZY_STACK 31}; 32 33struct awe_t { 34 // Fields representing the code to run when the AWE is executed. 35 void *eip; 36 void *ebp; 37 void *esp; 38 39 // Can be EAGER_ASYNC, LAZY_ASYNC or NEEDS_LASY_STACK 40 enum awe_status status; 41 42 // Stack which is allocated if awe is caller yields to this AWE. 43 void *lazy_stack; 44 45 // Link from an AWE to the per-thread state for the thread it 46 // runs in. 47 PTState_t *pts; 48 49 // Link from an AWE to the immediately-enclosing finish 50 finish_t *current_fb; 51 52 // Fields used by the runtime system to link together AWEs, e.g., 53 // on a thread's run-queue, or on a list of waiters on a 54 // synchronization object. 55 awe_t *prev; 56 awe_t *next; 57}; 58 59/***********************************************************************/ 60 61// Definition of a finish block's data structure. 62// 63// Finish blocks are held on a linked list threaded through the start_node 64// and end_node fields. The blocks dynamically nested within a given 65// finish block are held between these two nodes. (This enables easy 66// iteration through all these dynamically nested blocks). 67 68typedef struct finish_list_t finish_list_t; 69 70struct finish_list_t { 71 finish_list_t *prev; 72 finish_list_t *next; 73 finish_t *fb; 74}; 75 76struct finish_t { 77 void *old_sp; /* stack pointer when entering do {} finish */ 78 unsigned long count; 79 awe_t *finish_awe; 80 int fb_kind; 81 int cancel_requested; 82 finish_list_t start_node; 83 finish_list_t end_node; 84 finish_t *enclosing_fb; 85 void *enclosing_lazy_stack; 86 cancel_item_t *cancel_item; 87}; 88 89/***********************************************************************/ 90 91// Per-thread runtime system state 92 93struct stack_t { 94 stack_t *next; 95}; 96 97struct ptstate_t { 98 99 // Thread-local fields: ............................................. 100 101 // Head/tail sentinels of the dispatch list 102 awe_t aweHead; 103 awe_t aweTail; 104 105 // Immediately-enclosing finish block for the currently running code 106 finish_t *current_fb; 107 108 // Initialization / termination flags 109 int doneInit; 110 int shouldExit; 111 112 // Stack that the thread's dispatch loop will run on 113 void *dispatchStack; 114 115 // If we are running on a lazily allocated stack, this will point to its start 116 void *curr_lazy_stack; 117 118 // Function to execute whenever the dispatch loop is idle (e.g., 119 // to block the thread until an incoming message which might change 120 // the state of the dispatch loop). 121 THCIdleFn_t idle_fn; 122 void *idle_args; 123 void *idle_stack; 124 125 // Stack to be de-allocated on the next execution of the dispatch loop 126 // (an async call terminates by re-entering the dispatch loop with 127 // pendingFree set to the stack it was using. It cannot dealloacte 128 // its own stack while it is in use). 129 void *pendingFree; 130 131 // AWE to enter for the dispatch loop on this thread 132 awe_t dispatch_awe; 133 134 // Free stacks for re-use 135 stack_t *free_stacks; 136 137 138#ifndef NDEBUG 139 // Debugging statistics 140 int stackMemoriesAllocated; 141 int stackMemoriesDeallocated; 142 int stacksAllocated; 143 int stacksDeallocated; 144 int finishBlocksStarted; 145 int finishBlocksEnded; 146 int asyncCallsStarted; 147 int asyncCallsEnded; 148 int aweCreated; 149 int aweResumed; 150 int idleStarted; 151 int idleComplete; 152 int cancelsRequested; 153 int cancelsAdded; 154 int cancelsRun; 155 int cancelsRemoved; 156 int getTls; 157 int lock; 158 int sendCount; 159 int recvCount; 160#endif 161 162 // Shared fields: ................................................... 163 164 // Latch protecting the dispatch list 165 struct thc_latch latch; 166 167 // Head/tail sentinels of the remote dispatch list on which other 168 // threads place AWEs that they have unblocks but which belong to 169 // this thread 170 awe_t aweRemoteHead; 171 awe_t aweRemoteTail; 172}; 173 174typedef void (*THCContFn_t)(void *cont, void *args); 175 176void *_thc_allocstack(void); 177void _thc_freestack(void *s); 178void _thc_onaltstack(void *s, void *fn, void *args); 179void _thc_startasync(void *f, void *stack); 180void _thc_endasync(void *f, void *s); 181void _thc_startfinishblock(finish_t *fb, int fb_kind); 182void _thc_endfinishblock(finish_t *fb, void *stack); 183void _thc_do_cancel_request(finish_t *fb); 184void _thc_callcont(awe_t *awe, THCContFn_t fn, void *args) __attribute__((returns_twice)); 185int _thc_schedulecont(awe_t *awe) __attribute__((returns_twice)); 186void _thc_lazy_awe_marker(void); 187void _thc_pendingfree(void); 188 189/***********************************************************************/ 190 191// Symbols declared in the .text.nx section 192 193extern int _start_text_nx; 194extern int _end_text_nx; 195 196/***********************************************************************/ 197 198/* Macro to force callee-saves to be spilled to the stack */ 199 200#if defined(__x86_64__) 201#define KILL_CALLEE_SAVES() \ 202 __asm__ volatile ("" : : : "rbx", "r12", "r13", "r14", "r15", \ 203 "memory", "cc") 204#elif defined(__i386__) 205#ifdef __pic__ 206#define KILL_CALLEE_SAVES() \ 207 __asm__ volatile ("" : : : "edi", "esi", "esp", "memory", "cc") 208#else 209#define KILL_CALLEE_SAVES() \ 210 __asm__ volatile ("" : : : "ebx", "edi", "esi", "esp", "memory", "cc") 211#endif 212#elif defined(__arm__) 213// see ARM Procedure Call Standard (APCS): 5.1 Machine Registers 214// NB: gcc complains about clobbering two registers: 215// . v8 (i.e., r11), is the frame pointer in ARM and cannot be clobbered 216// . v6 is the PIC register 217// 218#if defined(__pic__) 219 #define KILL_CALLEE_SAVES() \ 220 __asm__ volatile ("" : : : "sp", \ 221 "v1", "v2", "v3", "v4", "v5", "v7", \ 222 "s16", "s17", "s18", "s19", "s20", "s21", "s22", \ 223 "s23", "s24", "s25", "s26", "s27", "s28", "s29", \ 224 "s30", "31", \ 225 "memory") 226#else // same as before, but including v6 227 #define KILL_CALLEE_SAVES() \ 228 __asm__ volatile ("" : : : "sp", \ 229 "v1", "v2", "v3", "v4", "v5", "v6", "v7", \ 230 "s16", "s17", "s18", "s19", "s20", "s21", "s22", \ 231 "s23", "s24", "s25", "s26", "s27", "s28", "s29", \ 232 "s30", "31", \ 233 "memory") 234 235#endif 236#elif defined(__aarch64__) 237 #define KILL_CALLEE_SAVES() \ 238 __asm__ volatile ("" : : : \ 239 "x19", "x20", "x21", "x22", "x23", "x24", "x25", \ 240 "x26", "x27", "x28", \ 241 "31", \ 242 "memory") 243 244#else 245#error "Need definition of KILL_CALLEE_SAVES" 246#endif 247 248#define __WORD_SIZE (sizeof(void*)) 249 250 251/***********************************************************************/ 252 253#ifdef CONFIG_LAZY_THC 254 255/***********************************************************************/ 256 257#if defined(__x86_64__) 258/* Force args on stack - there must be a better way of doing this, but */ 259/* regparam(0) doesn't work on x86_64 */ 260#define FORCE_ARGS_STACK void*__a, void*__b, void*__c, void*__d, \ 261 void*__e, void*__f, 262#define FORCE_ARGS_STACK_CALL NULL, NULL, NULL, NULL, NULL, NULL, 263#elif defined(__i386__) 264#define FORCE_ARGS_STACK 265#define FORCE_ARGS_STACK_CALL 266#elif defined(__arm__) || defined(__aarch64__) 267#define FORCE_ARGS_STACK assert(0 && "THC not yet implemented on ARM") 268#define FORCE_ARGS_STACK_CALL assert(0 && "THC not yet implemented on ARM") 269#elif defined(__aarch64__) 270#define FORCE_ARGS_STACK assert(0 && "THC not yet implemented on ARM") 271#define FORCE_ARGS_STACK_CALL assert(0 && "THC not yet implemented on ARM") 272#else 273#error "Need definition of FORCE_ARGS_STACK" 274#endif 275 276#define FORCE_FRAME_POINTER_USE \ 277 /* Do a zero byte alloca to force local variable access via ebp */ \ 278 /* Note, this does not add any code (even with -O0. */ \ 279 __builtin_alloca(0) 280 281#if defined(__x86_64__) 282#define GET_STACK_POINTER(STACK_PTR) \ 283 __asm__ volatile ("movq %%rsp, %0 \n\t" \ 284 : "=m"(STACK_PTR) : ) 285#define RESTORE_OLD_STACK_POINTER(OLD_STACK_PTR) \ 286 __asm__ volatile ("movq %0, %%rsp \n\t" \ 287 : : "m"(OLD_STACK_PTR)) 288#elif defined(__i386__) 289#define GET_STACK_POINTER(STACK_PTR) \ 290 __asm__ volatile ("movl %%esp, %0 \n\t" \ 291 : "=m"(STACK_PTR) : ) 292#define RESTORE_OLD_STACK_POINTER(OLD_STACK_PTR) \ 293 __asm__ volatile ("movl %0, %%esp \n\t" \ 294 : : "m"(OLD_STACK_PTR)) 295#elif defined(__arm__) || defined(__aarch64__) 296#define GET_STACK_POINTER(_) assert(0 && "THC not yet implemented on ARM") 297#define RESTORE_OLD_STACK_POINTER(_) assert(0 && "THC not yet implemented on ARM") 298#else 299#error "Need definition of GET_STACK_POINTER and RESTORE_OLD_STACK_POINTER" 300#endif 301 302 303#if defined(__x86_64__) || defined(__i386__) 304// INIT_LAZY_AWE() is used in the beggining of the nested function in ASYNC_. 305// The signature of the nested function is: 306// void _thc_nested_async(FORCE_ARGS_STACK awe_t *awe) 307// 308// So in INIT_LAZY_AWE, the stack in x86 looks like: 309// sp -> 310// ....... 311// rbp-> [ saved rbp ] rbp[0] 312// [ RET ] rbp[1] 313// [ awe ] rbp[2] (passed as first arg) 314#define THC_LAZY_FRAME_PREV(FRAME_PTR) *((FRAME_PTR)+0) 315#define THC_LAZY_FRAME_RET(FRAME_PTR) *((FRAME_PTR)+1) 316#define THC_LAZY_FRAME_AWE(FRAME_PTR) *((FRAME_PTR)+2) 317#endif 318 319#if defined(__x86_64__) 320#define INIT_LAZY_AWE(AWE_PTR, LAZY_MARKER) \ 321 __asm__ volatile ( \ 322 " movq 8(%%rbp), %%rsi \n\t" \ 323 " movq %%rsi, 0(%0) \n\t" /* RIP (our return address) */ \ 324 " movq 0(%%rbp), %%rsi \n\t" \ 325 " movq %%rsi, 8(%0) \n\t" /* RBP */ \ 326 " movq %1, 8(%%rbp) \n\t" /* put marker as ret address */ \ 327 : : "r"((AWE_PTR)), "r"((LAZY_MARKER)) : "rsi" ); 328#define RETURN_CONT(JMP_ADDR) \ 329 __asm__ volatile ( \ 330 " movq %rbp, %rsp \n\t" /* free frame */ \ 331 " popq %rbp \n\t" /* restore rbp */ \ 332 " addq $8, %rsp \n\t" /* pop old ret address */ \ 333 " jmp " JMP_ADDR " \n\t" /* jump to continuation */ \ 334 ); 335#elif defined(__i386__) 336#define INIT_LAZY_AWE(AWE_PTR, LAZY_MARKER) \ 337 __asm__ volatile ( \ 338 " movl 4(%%ebp), %%esi \n\t" \ 339 " movl %%esi, 0(%0) \n\t" /* EIP (our return address) */ \ 340 " movl 0(%%ebp), %%esi \n\t" \ 341 " movl %%esi, 4(%0) \n\t" /* EBP */ \ 342 " movl %1, 4(%%ebp) \n\t" /* put marker as ret address */ \ 343 : : "r"((AWE_PTR)), "r"((LAZY_MARKER)) : "esi" ); 344#define RETURN_CONT(JMP_ADDR) \ 345 __asm__ volatile ( \ 346 " movl %ebp, %esp \n\t" /* free frame */ \ 347 " popl %ebp \n\t" /* restore ebp */ \ 348 " addl $4, %esp \n\t" /* clean up stack for callee */ \ 349 " jmp " JMP_ADDR " \n\t" /* jump to continuation */ \ 350 ); 351#elif defined(__arm__) || defined(__aarch64__) 352 353// *** NOTEs for the adventurous: porting lazy THC to ARM 354// 355// INIT_LAZY_AWE puts a marker in place of the returned address, which is saved 356// in the awe structure. check_for_lazy_awe() checks for this marker and lazily 357// initializes an awe if needed. 358// 359// In ARM, the caller passes the return address via lr and not the stack. 360// Gcc (4.7) usually compiles functions the following way: 361// mov ip, sp 362// push {rXX, rYY, fp, ip, lr, pc} 363// sub fp, ip, #4 364// .... 365// ldm sp, {rXX, rYY, fp, sp, pc} 366// 367// So the return address is pushed on the stack by the callee, but I'm not sure 368// how consistent is this even if we only consider gcc. 369// 370// check_for_lazy_awe() and init_lazy_awe() also need to change. 371 372#define INIT_LAZY_AWE(_) assert(0 && "THC not yet implemented on AARCH64") 373#define RETURN_CONT(_) assert(0 && "THC not yet implemented on AARCH64") 374#define GET_LAZY_AWE(_) assert(0 && "THC not yet implemented on AARCH64") 375#else 376#error "Need definition of INIT_LAZY_AWE & GET_LAZY_AWE" 377#endif 378 379/***********************************************************************/ 380 381#define SCHEDULE_CONT(_AWE_PTR, NESTED_FUNC) \ 382 ({ \ 383 KILL_CALLEE_SAVES(); \ 384 NESTED_FUNC(FORCE_ARGS_STACK_CALL _AWE_PTR); \ 385 }) 386 387#define CALL_CONT(_FN,_ARG) \ 388 do { \ 389 awe_t _awe; \ 390 _awe.status = EAGER_AWE; \ 391 _awe.lazy_stack = NULL; \ 392 KILL_CALLEE_SAVES(); \ 393 _thc_callcont(&_awe, (THCContFn_t)(_FN), (_ARG)); \ 394 } while (0) 395 396 397#define CALL_CONT_LAZY(_FN,_ARG) \ 398 do { \ 399 awe_t _awe; \ 400 _awe.status = LAZY_AWE; \ 401 _awe.lazy_stack = NULL; \ 402 KILL_CALLEE_SAVES(); \ 403 _thc_callcont(&_awe, (THCContFn_t)(_FN), (_ARG)); \ 404 } while (0) 405 406/***********************************************************************/ 407 408#else /* EAGER_THC */ 409 410/***********************************************************************/ 411 412// not required in the lazy CALL_CONT in the eager version 413#define FORCE_FRAME_POINTER_USE /* Not used */ do {} while(0) 414#define GET_STACK_POINTER(_) /* Not used */ 415#define RESTORE_OLD_STACK_POINTER(_) /* Not used */ 416 417 418// SWIZZLE_DEF: 419// - _NAME: name of the function 420// - _NS: new stack, address just above top of commited region 421// - _FN: (nested) function to call: void _FN(void) 422 423#if (defined(__x86_64__) && (defined(linux) || defined(BARRELFISH))) 424#define SWIZZLE_DEF_(_NAME,_NS,_FN) \ 425 __attribute__((noinline)) void _NAME(void) { \ 426 __asm__ volatile("movq %0, %%rdi \n\t" /* put NS to %rdi */ \ 427 "subq $8, %%rdi \n\t" /* fix NS address */ \ 428 "movq %%rsp, (%%rdi) \n\t" /* store sp to NS */ \ 429 "movq %%rdi, %%rsp \n\t" /* set sp to NS */ \ 430 "call " _FN " \n\t" /* call _FN */ \ 431 "popq %%rsp \n\t" /* restore old sp */ \ 432 : \ 433 : "m" (_NS) \ 434 : "memory", "cc", "rsi", "rdi"); \ 435 } 436#define SWIZZLE_DEF(_NAME,_NS,_FN) SWIZZLE_DEF_(_NAME,_NS,_FN) 437#elif (defined(__i386__) && (defined(linux) || defined(BARRELFISH))) 438#define SWIZZLE_DEF(_NAME,_NS,_FN) \ 439 __attribute__((noinline)) void _NAME(void) { \ 440 __asm__ volatile("movl %0, %%edx \n\t" \ 441 "subl $4, %%edx \n\t" \ 442 "movl %%esp, (%%edx) \n\t" \ 443 "movl %%edx, %%esp \n\t" \ 444 "call " _FN " \n\t" \ 445 "pop %%esp \n\t" \ 446 : \ 447 : "m" (_NS) \ 448 : "memory", "cc", "eax", "edx"); \ 449 } 450#elif defined(__arm__) && (defined(linux) || defined(BARRELFISH)) 451 452// Notes: 453// - ARM Architecutre Reference Manual ARMv7-A and ARMv7-R: 454// STMDB: 455// "The SP and PC can be in the list in ARM code, but not in Thumb code. 456// However, ARM instructions that include the SP or the PC in the list are 457// deprecated." 458// - This can probably be optimized 459// 460#define SWIZZLE_DEF(_NAME, _NS, _FN) \ 461 __attribute__((noinline)) void _NAME(void) { \ 462 __asm__ volatile("ldr r0, %0 \n\t" /* set r0 to new stack */ \ 463 "mov r1, sp \n\t" /* set r1 to old stack */ \ 464 "stmdb r0!, {r1} \n\t" /* save old stack to new stack */ \ 465 "mov sp, r0 \n\t" /* set sp to new stack */ \ 466 "bl " _FN " \n\t" /* call _FN */ \ 467 "ldmia sp, {r1} \n\t" /* old stack pointer to r1 */ \ 468 "mov sp, r1 \n\t" /* restore stack pointer */ \ 469 : \ 470 : "m" (_NS) \ 471 : "memory", "r0", "r1"); \ 472 } 473#elif defined(__aarch64__) && (defined(linux) || defined(BARRELFISH)) 474 475// - NYI 476#define SWIZZLE_DEF(_NAME, _NS, _FN) assert(0 && "THC not yet implemented on AARCH64") 477 478#else 479#error "No definition of SWIZZLE_DEF for THC" 480#endif 481 482/***********************************************************************/ 483 484#define SCHEDULE_CONT(_AWE_PTR) \ 485 ({ \ 486 KILL_CALLEE_SAVES(); \ 487 _thc_schedulecont((awe_t*)_AWE_PTR); \ 488 }) 489 490#define CALL_CONT(_FN,_ARG) \ 491 do { \ 492 awe_t _awe; \ 493 KILL_CALLEE_SAVES(); \ 494 _thc_callcont(&_awe, (THCContFn_t)(_FN), (_ARG)); \ 495 } while (0) 496 497// no lazy CALL_CONT in the eager version 498#define CALL_CONT_LAZY CALL_CONT 499 500#endif // LAZY / EAGER THC 501 502#endif // _THC_INTERNAL_H_ 503