1#ifdef __x86_64__ 2 3/* ----------------------------------------------------------------------- 4 x86-ffi64.c - Copyright (c) 2002 Bo Thorsen <bo@suse.de> 5 6 x86-64 Foreign Function Interface 7 8 Permission is hereby granted, free of charge, to any person obtaining 9 a copy of this software and associated documentation files (the 10 ``Software''), to deal in the Software without restriction, including 11 without limitation the rights to use, copy, modify, merge, publish, 12 distribute, sublicense, and/or sell copies of the Software, and to 13 permit persons to whom the Software is furnished to do so, subject to 14 the following conditions: 15 16 The above copyright notice and this permission notice shall be included 17 in all copies or substantial portions of the Software. 18 19 THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 IN NO EVENT SHALL CYGNUS SOLUTIONS BE LIABLE FOR ANY CLAIM, DAMAGES OR 23 OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 24 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 25 OTHER DEALINGS IN THE SOFTWARE. 26 ----------------------------------------------------------------------- */ 27 28#include <ffi.h> 29#include <ffi_common.h> 30 31#include <stdlib.h> 32#include <stdarg.h> 33 34#define MAX_GPR_REGS 6 35#define MAX_SSE_REGS 8 36 37typedef struct RegisterArgs { 38 /* Registers for argument passing. */ 39 UINT64 gpr[MAX_GPR_REGS]; 40 __int128_t sse[MAX_SSE_REGS]; 41} RegisterArgs; 42 43extern void 44ffi_call_unix64( 45 void* args, 46 unsigned long bytes, 47 unsigned flags, 48 void* raddr, 49 void (*fnaddr)(), 50 unsigned ssecount); 51 52/* All reference to register classes here is identical to the code in 53 gcc/config/i386/i386.c. Do *not* change one without the other. */ 54 55/* Register class used for passing given 64bit part of the argument. 56 These represent classes as documented by the PS ABI, with the exception 57 of SSESF, SSEDF classes, that are basically SSE class, just gcc will 58 use SF or DFmode move instead of DImode to avoid reformating penalties. 59 60 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves 61 whenever possible (upper half does contain padding). */ 62enum x86_64_reg_class 63{ 64 X86_64_NO_CLASS, 65 X86_64_INTEGER_CLASS, 66 X86_64_INTEGERSI_CLASS, 67 X86_64_SSE_CLASS, 68 X86_64_SSESF_CLASS, 69 X86_64_SSEDF_CLASS, 70 X86_64_SSEUP_CLASS, 71 X86_64_X87_CLASS, 72 X86_64_X87UP_CLASS, 73 X86_64_COMPLEX_X87_CLASS, 74 X86_64_MEMORY_CLASS 75}; 76 77#define MAX_CLASSES 4 78#define SSE_CLASS_P(X) ((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS) 79 80/* x86-64 register passing implementation. See x86-64 ABI for details. Goal 81 of this code is to classify each 8bytes of incoming argument by the register 82 class and assign registers accordingly. */ 83 84/* Return the union class of CLASS1 and CLASS2. 85 See the x86-64 PS ABI for details. */ 86static enum x86_64_reg_class 87merge_classes( 88 enum x86_64_reg_class class1, 89 enum x86_64_reg_class class2) 90{ 91 /* Rule #1: If both classes are equal, this is the resulting class. */ 92 if (class1 == class2) 93 return class1; 94 95 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is 96 the other class. */ 97 if (class1 == X86_64_NO_CLASS) 98 return class2; 99 100 if (class2 == X86_64_NO_CLASS) 101 return class1; 102 103 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ 104 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) 105 return X86_64_MEMORY_CLASS; 106 107 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ 108 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) 109 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) 110 return X86_64_INTEGERSI_CLASS; 111 112 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS 113 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) 114 return X86_64_INTEGER_CLASS; 115 116 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, 117 MEMORY is used. */ 118 if (class1 == X86_64_X87_CLASS 119 || class1 == X86_64_X87UP_CLASS 120 || class1 == X86_64_COMPLEX_X87_CLASS 121 || class2 == X86_64_X87_CLASS 122 || class2 == X86_64_X87UP_CLASS 123 || class2 == X86_64_COMPLEX_X87_CLASS) 124 return X86_64_MEMORY_CLASS; 125 126 /* Rule #6: Otherwise class SSE is used. */ 127 return X86_64_SSE_CLASS; 128} 129 130/* Classify the argument of type TYPE and mode MODE. 131 CLASSES will be filled by the register class used to pass each word 132 of the operand. The number of words is returned. In case the parameter 133 should be passed in memory, 0 is returned. As a special case for zero 134 sized containers, classes[0] will be NO_CLASS and 1 is returned. 135 136 See the x86-64 PS ABI for details. */ 137 138static int 139classify_argument( 140 ffi_type* type, 141 enum x86_64_reg_class classes[], 142 size_t byte_offset) 143{ 144 switch (type->type) 145 { 146 case FFI_TYPE_UINT8: 147 case FFI_TYPE_SINT8: 148 case FFI_TYPE_UINT16: 149 case FFI_TYPE_SINT16: 150 case FFI_TYPE_UINT32: 151 case FFI_TYPE_SINT32: 152 case FFI_TYPE_UINT64: 153 case FFI_TYPE_SINT64: 154 case FFI_TYPE_POINTER: 155 if (byte_offset + type->size <= 4) 156 classes[0] = X86_64_INTEGERSI_CLASS; 157 else 158 classes[0] = X86_64_INTEGER_CLASS; 159 160 return 1; 161 162 case FFI_TYPE_FLOAT: 163 if (byte_offset == 0) 164 classes[0] = X86_64_SSESF_CLASS; 165 else 166 classes[0] = X86_64_SSE_CLASS; 167 168 return 1; 169 170 case FFI_TYPE_DOUBLE: 171 classes[0] = X86_64_SSEDF_CLASS; 172 return 1; 173 174 case FFI_TYPE_LONGDOUBLE: 175 classes[0] = X86_64_X87_CLASS; 176 classes[1] = X86_64_X87UP_CLASS; 177 return 2; 178 179 case FFI_TYPE_STRUCT: 180 { 181 ffi_type** ptr; 182 int i; 183 enum x86_64_reg_class subclasses[MAX_CLASSES]; 184 const int UNITS_PER_WORD = 8; 185 int words = 186 (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 187 188 /* If the struct is larger than 16 bytes, pass it on the stack. */ 189 if (type->size > 16) 190 return 0; 191 192 for (i = 0; i < words; i++) 193 classes[i] = X86_64_NO_CLASS; 194 195 /* Merge the fields of structure. */ 196 for (ptr = type->elements; *ptr != NULL; ptr++) 197 { 198 byte_offset = ALIGN(byte_offset, (*ptr)->alignment); 199 200 int num = classify_argument(*ptr, subclasses, byte_offset % 8); 201 202 if (num == 0) 203 return 0; 204 205 int pos = byte_offset / 8; 206 207 for (i = 0; i < num; i++) 208 { 209 classes[i + pos] = 210 merge_classes(subclasses[i], classes[i + pos]); 211 } 212 213 byte_offset += (*ptr)->size; 214 } 215 216 /* Final merger cleanup. */ 217 for (i = 0; i < words; i++) 218 { 219 /* If one class is MEMORY, everything should be passed in 220 memory. */ 221 if (classes[i] == X86_64_MEMORY_CLASS) 222 return 0; 223 224 /* The X86_64_SSEUP_CLASS should be always preceded by 225 X86_64_SSE_CLASS. */ 226 if (classes[i] == X86_64_SSEUP_CLASS 227 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS)) 228 classes[i] = X86_64_SSE_CLASS; 229 230 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */ 231 if (classes[i] == X86_64_X87UP_CLASS 232 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS)) 233 classes[i] = X86_64_SSE_CLASS; 234 } 235 236 return words; 237 } 238 239 default: 240 FFI_ASSERT(0); 241 } 242 243 return 0; /* Never reached. */ 244} 245 246/* Examine the argument and return set number of register required in each 247 class. Return zero if parameter should be passed in memory, otherwise 248 the number of registers. */ 249static int 250examine_argument( 251 ffi_type* type, 252 enum x86_64_reg_class classes[MAX_CLASSES], 253 _Bool in_return, 254 int* pngpr, 255 int* pnsse) 256{ 257 int n = classify_argument(type, classes, 0); 258 int ngpr = 0; 259 int nsse = 0; 260 int i; 261 262 if (n == 0) 263 return 0; 264 265 for (i = 0; i < n; ++i) 266 { 267 switch (classes[i]) 268 { 269 case X86_64_INTEGER_CLASS: 270 case X86_64_INTEGERSI_CLASS: 271 ngpr++; 272 break; 273 274 case X86_64_SSE_CLASS: 275 case X86_64_SSESF_CLASS: 276 case X86_64_SSEDF_CLASS: 277 nsse++; 278 break; 279 280 case X86_64_NO_CLASS: 281 case X86_64_SSEUP_CLASS: 282 break; 283 284 case X86_64_X87_CLASS: 285 case X86_64_X87UP_CLASS: 286 case X86_64_COMPLEX_X87_CLASS: 287 return in_return != 0; 288 289 default: 290 abort(); 291 } 292 } 293 294 *pngpr = ngpr; 295 *pnsse = nsse; 296 297 return n; 298} 299 300/* Perform machine dependent cif processing. */ 301ffi_status 302ffi_prep_cif_machdep( 303 ffi_cif* cif) 304{ 305 int gprcount = 0; 306 int ssecount = 0; 307 int flags = cif->rtype->type; 308 int i, avn, n, ngpr, nsse; 309 enum x86_64_reg_class classes[MAX_CLASSES]; 310 size_t bytes; 311 312 if (flags != FFI_TYPE_VOID) 313 { 314 n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse); 315 316 if (n == 0) 317 { 318 /* The return value is passed in memory. A pointer to that 319 memory is the first argument. Allocate a register for it. */ 320 gprcount++; 321 322 /* We don't have to do anything in asm for the return. */ 323 flags = FFI_TYPE_VOID; 324 } 325 else if (flags == FFI_TYPE_STRUCT) 326 { 327 /* Mark which registers the result appears in. */ 328 _Bool sse0 = SSE_CLASS_P(classes[0]); 329 _Bool sse1 = n == 2 && SSE_CLASS_P(classes[1]); 330 331 if (sse0 && !sse1) 332 flags |= 1 << 8; 333 else if (!sse0 && sse1) 334 flags |= 1 << 9; 335 else if (sse0 && sse1) 336 flags |= 1 << 10; 337 338 /* Mark the true size of the structure. */ 339 flags |= cif->rtype->size << 12; 340 } 341 } 342 343 /* Go over all arguments and determine the way they should be passed. 344 If it's in a register and there is space for it, let that be so. If 345 not, add it's size to the stack byte count. */ 346 for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++) 347 { 348 if (examine_argument(cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0 349 || gprcount + ngpr > MAX_GPR_REGS 350 || ssecount + nsse > MAX_SSE_REGS) 351 { 352 long align = cif->arg_types[i]->alignment; 353 354 if (align < 8) 355 align = 8; 356 357 bytes = ALIGN(bytes, align); 358 bytes += cif->arg_types[i]->size; 359 } 360 else 361 { 362 gprcount += ngpr; 363 ssecount += nsse; 364 } 365 } 366 367 if (ssecount) 368 flags |= 1 << 11; 369 370 cif->flags = flags; 371 cif->bytes = bytes; 372 373 return FFI_OK; 374} 375 376void 377ffi_call( 378 ffi_cif* cif, 379 void (*fn)(), 380 void* rvalue, 381 void** avalue) 382{ 383 enum x86_64_reg_class classes[MAX_CLASSES]; 384 char* stack; 385 char* argp; 386 ffi_type** arg_types; 387 int gprcount, ssecount, ngpr, nsse, i, avn; 388 _Bool ret_in_memory; 389 RegisterArgs* reg_args; 390 391 /* Can't call 32-bit mode from 64-bit mode. */ 392 FFI_ASSERT(cif->abi == FFI_UNIX64); 393 394 /* If the return value is a struct and we don't have a return value 395 address then we need to make one. Note the setting of flags to 396 VOID above in ffi_prep_cif_machdep. */ 397 ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT 398 && (cif->flags & 0xff) == FFI_TYPE_VOID); 399 400 if (rvalue == NULL && ret_in_memory) 401 rvalue = alloca (cif->rtype->size); 402 403 /* Allocate the space for the arguments, plus 4 words of temp space. */ 404 stack = alloca(sizeof(RegisterArgs) + cif->bytes + 4 * 8); 405 reg_args = (RegisterArgs*)stack; 406 argp = stack + sizeof(RegisterArgs); 407 408 gprcount = ssecount = 0; 409 410 /* If the return value is passed in memory, add the pointer as the 411 first integer argument. */ 412 if (ret_in_memory) 413 reg_args->gpr[gprcount++] = (long) rvalue; 414 415 avn = cif->nargs; 416 arg_types = cif->arg_types; 417 418 for (i = 0; i < avn; ++i) 419 { 420 size_t size = arg_types[i]->size; 421 int n; 422 423 n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse); 424 425 if (n == 0 426 || gprcount + ngpr > MAX_GPR_REGS 427 || ssecount + nsse > MAX_SSE_REGS) 428 { 429 long align = arg_types[i]->alignment; 430 431 /* Stack arguments are *always* at least 8 byte aligned. */ 432 if (align < 8) 433 align = 8; 434 435 /* Pass this argument in memory. */ 436 argp = (void *) ALIGN (argp, align); 437 memcpy (argp, avalue[i], size); 438 argp += size; 439 } 440 else 441 { /* The argument is passed entirely in registers. */ 442 char *a = (char *) avalue[i]; 443 SINT64 signExtendedValue = 0; 444 int j; 445 446 /* Sign-extend small signed values to fill the entire register. */ 447 switch (arg_types[i]->type) { 448 case FFI_TYPE_SINT8: 449 signExtendedValue = (SINT64) *((SINT8 *) avalue[i]); 450 a = (char *) &signExtendedValue; 451 size = 8; 452 break; 453 case FFI_TYPE_SINT16: 454 signExtendedValue = (SINT64) *((SINT16*) avalue[i]); 455 a = (char *) &signExtendedValue; 456 size = 8; 457 break; 458 case FFI_TYPE_SINT32: 459 signExtendedValue = (SINT64) *((SINT32*) avalue[i]); 460 a = (char *) &signExtendedValue; 461 size = 8; 462 break; 463 default: 464 break; 465 } 466 467 for (j = 0; j < n; j++, a += 8, size -= 8) 468 { 469 switch (classes[j]) 470 { 471 case X86_64_INTEGER_CLASS: 472 case X86_64_INTEGERSI_CLASS: 473 reg_args->gpr[gprcount] = 0; 474 memcpy (®_args->gpr[gprcount], a, size < 8 ? size : 8); 475 gprcount++; 476 break; 477 478 case X86_64_SSE_CLASS: 479 case X86_64_SSEDF_CLASS: 480 reg_args->sse[ssecount++] = *(UINT64 *) a; 481 break; 482 483 case X86_64_SSESF_CLASS: 484 reg_args->sse[ssecount++] = *(UINT32 *) a; 485 break; 486 487 default: 488 abort(); 489 } 490 } 491 } 492 } 493 494 ffi_call_unix64 (stack, cif->bytes + sizeof(RegisterArgs), 495 cif->flags, rvalue, fn, ssecount); 496} 497 498extern void ffi_closure_unix64(void); 499 500ffi_status 501ffi_prep_closure( 502 ffi_closure* closure, 503 ffi_cif* cif, 504 void (*fun)(ffi_cif*, void*, void**, void*), 505 void* user_data) 506{ 507 if (cif->abi != FFI_UNIX64) 508 return FFI_BAD_ABI; 509 510 volatile unsigned short* tramp = 511 (volatile unsigned short*)&closure->tramp[0]; 512 513 tramp[0] = 0xbb49; /* mov <code>, %r11 */ 514 *(void* volatile*)&tramp[1] = ffi_closure_unix64; 515 tramp[5] = 0xba49; /* mov <data>, %r10 */ 516 *(void* volatile*)&tramp[6] = closure; 517 518 /* Set the carry bit if the function uses any sse registers. 519 This is clc or stc, together with the first byte of the jmp. */ 520 tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8; 521 tramp[11] = 0xe3ff; /* jmp *%r11 */ 522 523 closure->cif = cif; 524 closure->fun = fun; 525 closure->user_data = user_data; 526 527 return FFI_OK; 528} 529 530int 531ffi_closure_unix64_inner( 532 ffi_closure* closure, 533 void* rvalue, 534 RegisterArgs* reg_args, 535 char* argp) 536{ 537 ffi_cif* cif = closure->cif; 538 void** avalue = alloca(cif->nargs * sizeof(void *)); 539 ffi_type** arg_types; 540 long i, avn; 541 int gprcount = 0; 542 int ssecount = 0; 543 int ngpr, nsse; 544 int ret; 545 546 ret = cif->rtype->type; 547 548 if (ret != FFI_TYPE_VOID) 549 { 550 enum x86_64_reg_class classes[MAX_CLASSES]; 551 int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse); 552 553 if (n == 0) 554 { 555 /* The return value goes in memory. Arrange for the closure 556 return value to go directly back to the original caller. */ 557 rvalue = (void *) reg_args->gpr[gprcount++]; 558 559 /* We don't have to do anything in asm for the return. */ 560 ret = FFI_TYPE_VOID; 561 } 562 else if (ret == FFI_TYPE_STRUCT && n == 2) 563 { 564 /* Mark which register the second word of the structure goes in. */ 565 _Bool sse0 = SSE_CLASS_P (classes[0]); 566 _Bool sse1 = SSE_CLASS_P (classes[1]); 567 568 if (!sse0 && sse1) 569 ret |= 1 << 8; 570 else if (sse0 && !sse1) 571 ret |= 1 << 9; 572 } 573 } 574 575 avn = cif->nargs; 576 arg_types = cif->arg_types; 577 578 for (i = 0; i < avn; ++i) 579 { 580 enum x86_64_reg_class classes[MAX_CLASSES]; 581 int n; 582 583 n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse); 584 585 if (n == 0 586 || gprcount + ngpr > MAX_GPR_REGS 587 || ssecount + nsse > MAX_SSE_REGS) 588 { 589 long align = arg_types[i]->alignment; 590 591 /* Stack arguments are *always* at least 8 byte aligned. */ 592 if (align < 8) 593 align = 8; 594 595 /* Pass this argument in memory. */ 596 argp = (void *) ALIGN (argp, align); 597 avalue[i] = argp; 598 argp += arg_types[i]->size; 599 } 600 601#if !defined(X86_DARWIN) 602 /* If the argument is in a single register, or two consecutive 603 registers, then we can use that address directly. */ 604 else if (n == 1 || (n == 2 && 605 SSE_CLASS_P (classes[0]) == SSE_CLASS_P (classes[1]))) 606 { 607 // The argument is in a single register. 608 if (SSE_CLASS_P (classes[0])) 609 { 610 avalue[i] = ®_args->sse[ssecount]; 611 ssecount += n; 612 } 613 else 614 { 615 avalue[i] = ®_args->gpr[gprcount]; 616 gprcount += n; 617 } 618 } 619#endif 620 621 /* Otherwise, allocate space to make them consecutive. */ 622 else 623 { 624 char *a = alloca (16); 625 int j; 626 627 avalue[i] = a; 628 629 for (j = 0; j < n; j++, a += 8) 630 { 631 if (SSE_CLASS_P (classes[j])) 632 memcpy (a, ®_args->sse[ssecount++], 8); 633 else 634 memcpy (a, ®_args->gpr[gprcount++], 8); 635 } 636 } 637 } 638 639 /* Invoke the closure. */ 640 closure->fun (cif, rvalue, avalue, closure->user_data); 641 642 /* Tell assembly how to perform return type promotions. */ 643 return ret; 644} 645 646#endif /* __x86_64__ */ 647