1#ifdef __x86_64__ 2 3/* ----------------------------------------------------------------------- 4 x86-ffi64.c - Copyright (c) 2002 Bo Thorsen <bo@suse.de> 5 6 x86-64 Foreign Function Interface 7 8 Permission is hereby granted, free of charge, to any person obtaining 9 a copy of this software and associated documentation files (the 10 ``Software''), to deal in the Software without restriction, including 11 without limitation the rights to use, copy, modify, merge, publish, 12 distribute, sublicense, and/or sell copies of the Software, and to 13 permit persons to whom the Software is furnished to do so, subject to 14 the following conditions: 15 16 The above copyright notice and this permission notice shall be included 17 in all copies or substantial portions of the Software. 18 19 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 IN NO EVENT SHALL CYGNUS SOLUTIONS BE LIABLE FOR ANY CLAIM, DAMAGES OR 23 OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 24 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 25 OTHER DEALINGS IN THE SOFTWARE. 26 ----------------------------------------------------------------------- */ 27 28#include <ffi.h> 29#include <ffi_common.h> 30 31#include <stdlib.h> 32#include <stdarg.h> 33 34#define MAX_GPR_REGS 6 35#define MAX_SSE_REGS 8 36 37typedef struct RegisterArgs { 38 /* Registers for argument passing. */ 39 UINT64 gpr[MAX_GPR_REGS]; 40 __int128_t sse[MAX_SSE_REGS]; 41} RegisterArgs; 42 43extern void 44ffi_call_unix64( 45 void* args, 46 unsigned long bytes, 47 unsigned flags, 48 void* raddr, 49 void (*fnaddr)(), 50 unsigned ssecount); 51 52/* All reference to register classes here is identical to the code in 53 gcc/config/i386/i386.c. Do *not* change one without the other. */ 54 55/* Register class used for passing given 64bit part of the argument. 56 These represent classes as documented by the PS ABI, with the exception 57 of SSESF, SSEDF classes, that are basically SSE class, just gcc will 58 use SF or DFmode move instead of DImode to avoid reformating penalties. 59 60 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves 61 whenever possible (upper half does contain padding). */ 62enum x86_64_reg_class 63{ 64 X86_64_NO_CLASS, 65 X86_64_INTEGER_CLASS, 66 X86_64_INTEGERSI_CLASS, 67 X86_64_SSE_CLASS, 68 X86_64_SSESF_CLASS, 69 X86_64_SSEDF_CLASS, 70 X86_64_SSEUP_CLASS, 71 X86_64_X87_CLASS, 72 X86_64_X87UP_CLASS, 73 X86_64_COMPLEX_X87_CLASS, 74 X86_64_MEMORY_CLASS 75}; 76 77#define MAX_CLASSES 4 78#define SSE_CLASS_P(X) ((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS) 79 80/* x86-64 register passing implementation. See x86-64 ABI for details. Goal 81 of this code is to classify each 8bytes of incoming argument by the register 82 class and assign registers accordingly. */ 83 84/* Return the union class of CLASS1 and CLASS2. 85 See the x86-64 PS ABI for details. */ 86static enum x86_64_reg_class 87merge_classes( 88 enum x86_64_reg_class class1, 89 enum x86_64_reg_class class2) 90{ 91 /* Rule #1: If both classes are equal, this is the resulting class. */ 92 if (class1 == class2) 93 return class1; 94 95 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is 96 the other class. */ 97 if (class1 == X86_64_NO_CLASS) 98 return class2; 99 100 if (class2 == X86_64_NO_CLASS) 101 return class1; 102 103 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ 104 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) 105 return X86_64_MEMORY_CLASS; 106 107 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ 108 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) 109 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) 110 return X86_64_INTEGERSI_CLASS; 111 112 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS 113 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) 114 return X86_64_INTEGER_CLASS; 115 116 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, 117 MEMORY is used. */ 118 if (class1 == X86_64_X87_CLASS 119 || class1 == X86_64_X87UP_CLASS 120 || class1 == X86_64_COMPLEX_X87_CLASS 121 || class2 == X86_64_X87_CLASS 122 || class2 == X86_64_X87UP_CLASS 123 || class2 == X86_64_COMPLEX_X87_CLASS) 124 return X86_64_MEMORY_CLASS; 125 126 /* Rule #6: Otherwise class SSE is used. */ 127 return X86_64_SSE_CLASS; 128} 129 130/* Classify the argument of type TYPE and mode MODE. 131 CLASSES will be filled by the register class used to pass each word 132 of the operand. The number of words is returned. In case the parameter 133 should be passed in memory, 0 is returned. As a special case for zero 134 sized containers, classes[0] will be NO_CLASS and 1 is returned. 135 136 See the x86-64 PS ABI for details. */ 137 138static int 139classify_argument( 140 ffi_type* type, 141 enum x86_64_reg_class classes[], 142 size_t byte_offset) 143{ 144 switch (type->type) 145 { 146 case FFI_TYPE_UINT8: 147 case FFI_TYPE_SINT8: 148 case FFI_TYPE_UINT16: 149 case FFI_TYPE_SINT16: 150 case FFI_TYPE_UINT32: 151 case FFI_TYPE_SINT32: 152 case FFI_TYPE_UINT64: 153 case FFI_TYPE_SINT64: 154 case FFI_TYPE_POINTER: 155 if (byte_offset + type->size <= 4) 156 classes[0] = X86_64_INTEGERSI_CLASS; 157 else 158 classes[0] = X86_64_INTEGER_CLASS; 159 160 return 1; 161 162 case FFI_TYPE_FLOAT: 163 if (byte_offset == 0) 164 classes[0] = X86_64_SSESF_CLASS; 165 else 166 classes[0] = X86_64_SSE_CLASS; 167 168 return 1; 169 170 case FFI_TYPE_DOUBLE: 171 classes[0] = X86_64_SSEDF_CLASS; 172 return 1; 173 174 case FFI_TYPE_LONGDOUBLE: 175 classes[0] = X86_64_X87_CLASS; 176 classes[1] = X86_64_X87UP_CLASS; 177 return 2; 178 179 case FFI_TYPE_STRUCT: 180 { 181 ffi_type** ptr; 182 int i; 183 enum x86_64_reg_class subclasses[MAX_CLASSES]; 184 const int UNITS_PER_WORD = 8; 185 int words = 186 (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 187 188 /* If the struct is larger than 16 bytes, pass it on the stack. */ 189 if (type->size > 16) 190 return 0; 191 192 for (i = 0; i < words; i++) 193 classes[i] = X86_64_NO_CLASS; 194 195 /* Merge the fields of structure. */ 196 for (ptr = type->elements; *ptr != NULL; ptr++) 197 { 198 byte_offset = ALIGN(byte_offset, (*ptr)->alignment); 199 200 int num = classify_argument(*ptr, subclasses, byte_offset % 8); 201 202 if (num == 0) 203 return 0; 204 205 int pos = byte_offset / 8; 206 207 for (i = 0; i < num; i++) 208 { 209 classes[i + pos] = 210 merge_classes(subclasses[i], classes[i + pos]); 211 } 212 213 byte_offset += (*ptr)->size; 214 } 215 216 /* Final merger cleanup. */ 217 for (i = 0; i < words; i++) 218 { 219 /* If one class is MEMORY, everything should be passed in 220 memory. */ 221 if (classes[i] == X86_64_MEMORY_CLASS) 222 return 0; 223 224 /* The X86_64_SSEUP_CLASS should be always preceded by 225 X86_64_SSE_CLASS. */ 226 if (classes[i] == X86_64_SSEUP_CLASS 227 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS)) 228 classes[i] = X86_64_SSE_CLASS; 229 230 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */ 231 if (classes[i] == X86_64_X87UP_CLASS 232 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS)) 233 classes[i] = X86_64_SSE_CLASS; 234 } 235 236 return words; 237 } 238 239 default: 240 FFI_ASSERT(0); 241 } 242 243 return 0; /* Never reached. */ 244} 245 246/* Examine the argument and return set number of register required in each 247 class. Return zero if parameter should be passed in memory, otherwise 248 the number of registers. */ 249static int 250examine_argument( 251 ffi_type* type, 252 enum x86_64_reg_class classes[MAX_CLASSES], 253 _Bool in_return, 254 int* pngpr, 255 int* pnsse) 256{ 257 int n = classify_argument(type, classes, 0); 258 int ngpr = 0; 259 int nsse = 0; 260 int i; 261 262 if (n == 0) 263 return 0; 264 265 for (i = 0; i < n; ++i) 266 { 267 switch (classes[i]) 268 { 269 case X86_64_INTEGER_CLASS: 270 case X86_64_INTEGERSI_CLASS: 271 ngpr++; 272 break; 273 274 case X86_64_SSE_CLASS: 275 case X86_64_SSESF_CLASS: 276 case X86_64_SSEDF_CLASS: 277 nsse++; 278 break; 279 280 case X86_64_NO_CLASS: 281 case X86_64_SSEUP_CLASS: 282 break; 283 284 case X86_64_X87_CLASS: 285 case X86_64_X87UP_CLASS: 286 case X86_64_COMPLEX_X87_CLASS: 287 return in_return != 0; 288 289 default: 290 abort(); 291 } 292 } 293 294 *pngpr = ngpr; 295 *pnsse = nsse; 296 297 return n; 298} 299 300/* Perform machine dependent cif processing. */ 301ffi_status 302ffi_prep_cif_machdep( 303 ffi_cif* cif) 304{ 305 int gprcount = 0; 306 int ssecount = 0; 307 int flags = cif->rtype->type; 308 int i, avn, n, ngpr, nsse; 309 enum x86_64_reg_class classes[MAX_CLASSES]; 310 size_t bytes; 311 312 if (flags != FFI_TYPE_VOID) 313 { 314 n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse); 315 316 if (n == 0) 317 { 318 /* The return value is passed in memory. A pointer to that 319 memory is the first argument. Allocate a register for it. */ 320 gprcount++; 321 322 /* We don't have to do anything in asm for the return. */ 323 flags = FFI_TYPE_VOID; 324 } 325 else if (flags == FFI_TYPE_STRUCT) 326 { 327 /* Mark which registers the result appears in. */ 328 _Bool sse0 = SSE_CLASS_P(classes[0]); 329 _Bool sse1 = n == 2 && SSE_CLASS_P(classes[1]); 330 331 if (sse0 && !sse1) 332 flags |= 1 << 8; 333 else if (!sse0 && sse1) 334 flags |= 1 << 9; 335 else if (sse0 && sse1) 336 flags |= 1 << 10; 337 338 /* Mark the true size of the structure. */ 339 flags |= cif->rtype->size << 12; 340 } 341 } 342 343 /* Go over all arguments and determine the way they should be passed. 344 If it's in a register and there is space for it, let that be so. If 345 not, add it's size to the stack byte count. */ 346 for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++) 347 { 348 if (examine_argument(cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0 349 || gprcount + ngpr > MAX_GPR_REGS 350 || ssecount + nsse > MAX_SSE_REGS) 351 { 352 long align = cif->arg_types[i]->alignment; 353 354 if (align < 8) 355 align = 8; 356 357 bytes = ALIGN(bytes, align); 358 bytes += cif->arg_types[i]->size; 359 } 360 else 361 { 362 gprcount += ngpr; 363 ssecount += nsse; 364 } 365 } 366 367 if (ssecount) 368 flags |= 1 << 11; 369 370 cif->flags = flags; 371 cif->bytes = bytes; 372 373 return FFI_OK; 374} 375 376void 377ffi_call( 378 ffi_cif* cif, 379 void (*fn)(), 380 void* rvalue, 381 void** avalue) 382{ 383 enum x86_64_reg_class classes[MAX_CLASSES]; 384 char* stack; 385 char* argp; 386 ffi_type** arg_types; 387 int gprcount, ssecount, ngpr, nsse, i, avn; 388 _Bool ret_in_memory; 389 RegisterArgs* reg_args; 390 391 /* Can't call 32-bit mode from 64-bit mode. */ 392 FFI_ASSERT(cif->abi == FFI_UNIX64); 393 394 /* If the return value is a struct and we don't have a return value 395 address then we need to make one. Note the setting of flags to 396 VOID above in ffi_prep_cif_machdep. */ 397 ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT 398 && (cif->flags & 0xff) == FFI_TYPE_VOID); 399 400 if (rvalue == NULL && ret_in_memory) 401 rvalue = alloca (cif->rtype->size); 402 403 /* Allocate the space for the arguments, plus 4 words of temp space. */ 404 stack = alloca(sizeof(RegisterArgs) + cif->bytes + 4 * 8); 405 reg_args = (RegisterArgs*)stack; 406 argp = stack + sizeof(RegisterArgs); 407 408 gprcount = ssecount = 0; 409 410 /* If the return value is passed in memory, add the pointer as the 411 first integer argument. */ 412 if (ret_in_memory) 413 reg_args->gpr[gprcount++] = (long) rvalue; 414 415 avn = cif->nargs; 416 arg_types = cif->arg_types; 417 418 for (i = 0; i < avn; ++i) 419 { 420 size_t size = arg_types[i]->size; 421 int n; 422 423 n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse); 424 425 if (n == 0 426 || gprcount + ngpr > MAX_GPR_REGS 427 || ssecount + nsse > MAX_SSE_REGS) 428 { 429 long align = arg_types[i]->alignment; 430 431 /* Stack arguments are *always* at least 8 byte aligned. */ 432 if (align < 8) 433 align = 8; 434 435 /* Pass this argument in memory. */ 436 argp = (void *) ALIGN (argp, align); 437 memcpy (argp, avalue[i], size); 438 argp += size; 439 } 440 else 441 { /* The argument is passed entirely in registers. */ 442 char *a = (char *) avalue[i]; 443 int j; 444 445 for (j = 0; j < n; j++, a += 8, size -= 8) 446 { 447 switch (classes[j]) 448 { 449 case X86_64_INTEGER_CLASS: 450 case X86_64_INTEGERSI_CLASS: 451 reg_args->gpr[gprcount] = 0; 452 memcpy (®_args->gpr[gprcount], a, size < 8 ? size : 8); 453 gprcount++; 454 break; 455 456 case X86_64_SSE_CLASS: 457 case X86_64_SSEDF_CLASS: 458 reg_args->sse[ssecount++] = *(UINT64 *) a; 459 break; 460 461 case X86_64_SSESF_CLASS: 462 reg_args->sse[ssecount++] = *(UINT32 *) a; 463 break; 464 465 default: 466 abort(); 467 } 468 } 469 } 470 } 471 472 ffi_call_unix64 (stack, cif->bytes + sizeof(RegisterArgs), 473 cif->flags, rvalue, fn, ssecount); 474} 475 476extern void ffi_closure_unix64(void); 477 478ffi_status 479ffi_prep_closure( 480 ffi_closure* closure, 481 ffi_cif* cif, 482 void (*fun)(ffi_cif*, void*, void**, void*), 483 void* user_data) 484{ 485 if (cif->abi != FFI_UNIX64) 486 return FFI_BAD_ABI; 487 488 volatile unsigned short* tramp = 489 (volatile unsigned short*)&closure->tramp[0]; 490 491 tramp[0] = 0xbb49; /* mov <code>, %r11 */ 492 *(void* volatile*)&tramp[1] = ffi_closure_unix64; 493 tramp[5] = 0xba49; /* mov <data>, %r10 */ 494 *(void* volatile*)&tramp[6] = closure; 495 496 /* Set the carry bit if the function uses any sse registers. 497 This is clc or stc, together with the first byte of the jmp. */ 498 tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8; 499 tramp[11] = 0xe3ff; /* jmp *%r11 */ 500 501 closure->cif = cif; 502 closure->fun = fun; 503 closure->user_data = user_data; 504 505 return FFI_OK; 506} 507 508int 509ffi_closure_unix64_inner( 510 ffi_closure* closure, 511 void* rvalue, 512 RegisterArgs* reg_args, 513 char* argp) 514{ 515 ffi_cif* cif = closure->cif; 516 void** avalue = alloca(cif->nargs * sizeof(void *)); 517 ffi_type** arg_types; 518 long i, avn; 519 int gprcount = 0; 520 int ssecount = 0; 521 int ngpr, nsse; 522 int ret; 523 524 ret = cif->rtype->type; 525 526 if (ret != FFI_TYPE_VOID) 527 { 528 enum x86_64_reg_class classes[MAX_CLASSES]; 529 int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse); 530 531 if (n == 0) 532 { 533 /* The return value goes in memory. Arrange for the closure 534 return value to go directly back to the original caller. */ 535 rvalue = (void *) reg_args->gpr[gprcount++]; 536 537 /* We don't have to do anything in asm for the return. */ 538 ret = FFI_TYPE_VOID; 539 } 540 else if (ret == FFI_TYPE_STRUCT && n == 2) 541 { 542 /* Mark which register the second word of the structure goes in. */ 543 _Bool sse0 = SSE_CLASS_P (classes[0]); 544 _Bool sse1 = SSE_CLASS_P (classes[1]); 545 546 if (!sse0 && sse1) 547 ret |= 1 << 8; 548 else if (sse0 && !sse1) 549 ret |= 1 << 9; 550 } 551 } 552 553 avn = cif->nargs; 554 arg_types = cif->arg_types; 555 556 for (i = 0; i < avn; ++i) 557 { 558 enum x86_64_reg_class classes[MAX_CLASSES]; 559 int n; 560 561 n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse); 562 563 if (n == 0 564 || gprcount + ngpr > MAX_GPR_REGS 565 || ssecount + nsse > MAX_SSE_REGS) 566 { 567 long align = arg_types[i]->alignment; 568 569 /* Stack arguments are *always* at least 8 byte aligned. */ 570 if (align < 8) 571 align = 8; 572 573 /* Pass this argument in memory. */ 574 argp = (void *) ALIGN (argp, align); 575 avalue[i] = argp; 576 argp += arg_types[i]->size; 577 } 578 579#if !defined(X86_DARWIN) 580 /* If the argument is in a single register, or two consecutive 581 registers, then we can use that address directly. */ 582 else if (n == 1 || (n == 2 && 583 SSE_CLASS_P (classes[0]) == SSE_CLASS_P (classes[1]))) 584 { 585 // The argument is in a single register. 586 if (SSE_CLASS_P (classes[0])) 587 { 588 avalue[i] = ®_args->sse[ssecount]; 589 ssecount += n; 590 } 591 else 592 { 593 avalue[i] = ®_args->gpr[gprcount]; 594 gprcount += n; 595 } 596 } 597#endif 598 599 /* Otherwise, allocate space to make them consecutive. */ 600 else 601 { 602 char *a = alloca (16); 603 int j; 604 605 avalue[i] = a; 606 607 for (j = 0; j < n; j++, a += 8) 608 { 609 if (SSE_CLASS_P (classes[j])) 610 memcpy (a, ®_args->sse[ssecount++], 8); 611 else 612 memcpy (a, ®_args->gpr[gprcount++], 8); 613 } 614 } 615 } 616 617 /* Invoke the closure. */ 618 closure->fun (cif, rvalue, avalue, closure->user_data); 619 620 /* Tell assembly how to perform return type promotions. */ 621 return ret; 622} 623 624#endif /* __x86_64__ */ 625