1/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd. 2 3Permission is hereby granted, free of charge, to any person obtaining 4a copy of this software and associated documentation files (the 5``Software''), to deal in the Software without restriction, including 6without limitation the rights to use, copy, modify, merge, publish, 7distribute, sublicense, and/or sell copies of the Software, and to 8permit persons to whom the Software is furnished to do so, subject to 9the following conditions: 10 11The above copyright notice and this permission notice shall be 12included in all copies or substantial portions of the Software. 13 14THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 15EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 17IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 18CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 19TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 20SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ 21 22#include <stdio.h> 23 24#include <ffi.h> 25#include <ffi_common.h> 26 27#include <stdlib.h> 28 29/* Stack alignment requirement in bytes */ 30#if defined (__APPLE__) 31#define AARCH64_STACK_ALIGN 1 32#else 33#define AARCH64_STACK_ALIGN 16 34#endif 35 36#define N_X_ARG_REG 8 37#define N_V_ARG_REG 8 38 39#define AARCH64_FFI_WITH_V (1 << AARCH64_FFI_WITH_V_BIT) 40 41union _d 42{ 43 UINT64 d; 44 UINT32 s[2]; 45}; 46 47struct call_context 48{ 49 UINT64 x [AARCH64_N_XREG]; 50 struct 51 { 52 union _d d[2]; 53 } v [AARCH64_N_VREG]; 54}; 55 56#if defined (__clang__) && defined (__APPLE__) 57extern void 58sys_icache_invalidate (void *start, size_t len); 59#endif 60 61static inline void 62ffi_clear_cache (void *start, void *end) 63{ 64#if defined (__clang__) && defined (__APPLE__) 65 sys_icache_invalidate (start, (char *)end - (char *)start); 66#elif defined (__GNUC__) 67 __builtin___clear_cache (start, end); 68#else 69#error "Missing builtin to flush instruction cache" 70#endif 71} 72 73static void * 74get_x_addr (struct call_context *context, unsigned n) 75{ 76 return &context->x[n]; 77} 78 79static void * 80get_s_addr (struct call_context *context, unsigned n) 81{ 82#if defined __AARCH64EB__ 83 return &context->v[n].d[1].s[1]; 84#else 85 return &context->v[n].d[0].s[0]; 86#endif 87} 88 89static void * 90get_d_addr (struct call_context *context, unsigned n) 91{ 92#if defined __AARCH64EB__ 93 return &context->v[n].d[1]; 94#else 95 return &context->v[n].d[0]; 96#endif 97} 98 99static void * 100get_v_addr (struct call_context *context, unsigned n) 101{ 102 return &context->v[n]; 103} 104 105/* Return the memory location at which a basic type would reside 106 were it to have been stored in register n. */ 107 108static void * 109get_basic_type_addr (unsigned short type, struct call_context *context, 110 unsigned n) 111{ 112 switch (type) 113 { 114 case FFI_TYPE_FLOAT: 115 return get_s_addr (context, n); 116 case FFI_TYPE_DOUBLE: 117 return get_d_addr (context, n); 118#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE 119 case FFI_TYPE_LONGDOUBLE: 120 return get_v_addr (context, n); 121#endif 122 case FFI_TYPE_UINT8: 123 case FFI_TYPE_SINT8: 124 case FFI_TYPE_UINT16: 125 case FFI_TYPE_SINT16: 126 case FFI_TYPE_UINT32: 127 case FFI_TYPE_SINT32: 128 case FFI_TYPE_INT: 129 case FFI_TYPE_POINTER: 130 case FFI_TYPE_UINT64: 131 case FFI_TYPE_SINT64: 132 return get_x_addr (context, n); 133 case FFI_TYPE_VOID: 134 return NULL; 135 default: 136 FFI_ASSERT (0); 137 return NULL; 138 } 139} 140 141/* Return the alignment width for each of the basic types. */ 142 143static size_t 144get_basic_type_alignment (unsigned short type) 145{ 146 switch (type) 147 { 148 case FFI_TYPE_FLOAT: 149#if defined (__APPLE__) 150 return sizeof (UINT32); 151#endif 152 case FFI_TYPE_DOUBLE: 153 return sizeof (UINT64); 154#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE 155 case FFI_TYPE_LONGDOUBLE: 156 return sizeof (long double); 157#endif 158 case FFI_TYPE_UINT8: 159 case FFI_TYPE_SINT8: 160#if defined (__APPLE__) 161 return sizeof (UINT8); 162#endif 163 case FFI_TYPE_UINT16: 164 case FFI_TYPE_SINT16: 165#if defined (__APPLE__) 166 return sizeof (UINT16); 167#endif 168 case FFI_TYPE_UINT32: 169 case FFI_TYPE_INT: 170 case FFI_TYPE_SINT32: 171#if defined (__APPLE__) 172 return sizeof (UINT32); 173#endif 174 case FFI_TYPE_POINTER: 175 case FFI_TYPE_UINT64: 176 case FFI_TYPE_SINT64: 177 return sizeof (UINT64); 178 179 default: 180 FFI_ASSERT (0); 181 return 0; 182 } 183} 184 185/* Return the size in bytes for each of the basic types. */ 186 187static size_t 188get_basic_type_size (unsigned short type) 189{ 190 switch (type) 191 { 192 case FFI_TYPE_FLOAT: 193 return sizeof (UINT32); 194 case FFI_TYPE_DOUBLE: 195 return sizeof (UINT64); 196#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE 197 case FFI_TYPE_LONGDOUBLE: 198 return sizeof (long double); 199#endif 200 case FFI_TYPE_UINT8: 201 return sizeof (UINT8); 202 case FFI_TYPE_SINT8: 203 return sizeof (SINT8); 204 case FFI_TYPE_UINT16: 205 return sizeof (UINT16); 206 case FFI_TYPE_SINT16: 207 return sizeof (SINT16); 208 case FFI_TYPE_UINT32: 209 return sizeof (UINT32); 210 case FFI_TYPE_INT: 211 case FFI_TYPE_SINT32: 212 return sizeof (SINT32); 213 case FFI_TYPE_POINTER: 214 case FFI_TYPE_UINT64: 215 return sizeof (UINT64); 216 case FFI_TYPE_SINT64: 217 return sizeof (SINT64); 218 219 default: 220 FFI_ASSERT (0); 221 return 0; 222 } 223} 224 225extern void 226ffi_call_SYSV (unsigned (*)(struct call_context *context, unsigned char *, 227 extended_cif *), 228 struct call_context *context, 229 extended_cif *, 230 size_t, 231 void (*fn)(void)); 232 233extern void 234ffi_closure_SYSV (ffi_closure *); 235 236/* Test for an FFI floating point representation. */ 237 238static unsigned 239is_floating_type (unsigned short type) 240{ 241 return (type == FFI_TYPE_FLOAT || type == FFI_TYPE_DOUBLE 242 || type == FFI_TYPE_LONGDOUBLE); 243} 244 245/* Test for a homogeneous structure. */ 246 247static unsigned short 248get_homogeneous_type (ffi_type *ty) 249{ 250 if (ty->type == FFI_TYPE_STRUCT && ty->elements) 251 { 252 unsigned i; 253 unsigned short candidate_type 254 = get_homogeneous_type (ty->elements[0]); 255 for (i =1; ty->elements[i]; i++) 256 { 257 unsigned short iteration_type = 0; 258 /* If we have a nested struct, we must find its homogeneous type. 259 If that fits with our candidate type, we are still 260 homogeneous. */ 261 if (ty->elements[i]->type == FFI_TYPE_STRUCT 262 && ty->elements[i]->elements) 263 { 264 iteration_type = get_homogeneous_type (ty->elements[i]); 265 } 266 else 267 { 268 iteration_type = ty->elements[i]->type; 269 } 270 271 /* If we are not homogeneous, return FFI_TYPE_STRUCT. */ 272 if (candidate_type != iteration_type) 273 return FFI_TYPE_STRUCT; 274 } 275 return candidate_type; 276 } 277 278 /* Base case, we have no more levels of nesting, so we 279 are a basic type, and so, trivially homogeneous in that type. */ 280 return ty->type; 281} 282 283/* Determine the number of elements within a STRUCT. 284 285 Note, we must handle nested structs. 286 287 If ty is not a STRUCT this function will return 0. */ 288 289static unsigned 290element_count (ffi_type *ty) 291{ 292 if (ty->type == FFI_TYPE_STRUCT && ty->elements) 293 { 294 unsigned n; 295 unsigned elems = 0; 296 for (n = 0; ty->elements[n]; n++) 297 { 298 if (ty->elements[n]->type == FFI_TYPE_STRUCT 299 && ty->elements[n]->elements) 300 elems += element_count (ty->elements[n]); 301 else 302 elems++; 303 } 304 return elems; 305 } 306 return 0; 307} 308 309/* Test for a homogeneous floating point aggregate. 310 311 A homogeneous floating point aggregate is a homogeneous aggregate of 312 a half- single- or double- precision floating point type with one 313 to four elements. Note that this includes nested structs of the 314 basic type. */ 315 316static int 317is_hfa (ffi_type *ty) 318{ 319 if (ty->type == FFI_TYPE_STRUCT 320 && ty->elements[0] 321 && is_floating_type (get_homogeneous_type (ty))) 322 { 323 unsigned n = element_count (ty); 324 return n >= 1 && n <= 4; 325 } 326 return 0; 327} 328 329/* Test if an ffi_type is a candidate for passing in a register. 330 331 This test does not check that sufficient registers of the 332 appropriate class are actually available, merely that IFF 333 sufficient registers are available then the argument will be passed 334 in register(s). 335 336 Note that an ffi_type that is deemed to be a register candidate 337 will always be returned in registers. 338 339 Returns 1 if a register candidate else 0. */ 340 341static int 342is_register_candidate (ffi_type *ty) 343{ 344 switch (ty->type) 345 { 346 case FFI_TYPE_VOID: 347 case FFI_TYPE_FLOAT: 348 case FFI_TYPE_DOUBLE: 349#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE 350 case FFI_TYPE_LONGDOUBLE: 351#endif 352 case FFI_TYPE_UINT8: 353 case FFI_TYPE_UINT16: 354 case FFI_TYPE_UINT32: 355 case FFI_TYPE_UINT64: 356 case FFI_TYPE_POINTER: 357 case FFI_TYPE_SINT8: 358 case FFI_TYPE_SINT16: 359 case FFI_TYPE_SINT32: 360 case FFI_TYPE_INT: 361 case FFI_TYPE_SINT64: 362 return 1; 363 364 case FFI_TYPE_STRUCT: 365 if (is_hfa (ty)) 366 { 367 return 1; 368 } 369 else if (ty->size > 16) 370 { 371 /* Too large. Will be replaced with a pointer to memory. The 372 pointer MAY be passed in a register, but the value will 373 not. This test specifically fails since the argument will 374 never be passed by value in registers. */ 375 return 0; 376 } 377 else 378 { 379 /* Might be passed in registers depending on the number of 380 registers required. */ 381 return (ty->size + 7) / 8 < N_X_ARG_REG; 382 } 383 break; 384 385 default: 386 FFI_ASSERT (0); 387 break; 388 } 389 390 return 0; 391} 392 393/* Test if an ffi_type argument or result is a candidate for a vector 394 register. */ 395 396static int 397is_v_register_candidate (ffi_type *ty) 398{ 399 return is_floating_type (ty->type) 400 || (ty->type == FFI_TYPE_STRUCT && is_hfa (ty)); 401} 402 403/* Representation of the procedure call argument marshalling 404 state. 405 406 The terse state variable names match the names used in the AARCH64 407 PCS. */ 408 409struct arg_state 410{ 411 unsigned ngrn; /* Next general-purpose register number. */ 412 unsigned nsrn; /* Next vector register number. */ 413 size_t nsaa; /* Next stack offset. */ 414 415#if defined (__APPLE__) 416 unsigned allocating_variadic; 417#endif 418}; 419 420/* Initialize a procedure call argument marshalling state. */ 421static void 422arg_init (struct arg_state *state, size_t call_frame_size) 423{ 424 state->ngrn = 0; 425 state->nsrn = 0; 426 state->nsaa = 0; 427 428#if defined (__APPLE__) 429 state->allocating_variadic = 0; 430#endif 431} 432 433/* Return the number of available consecutive core argument 434 registers. */ 435 436static unsigned 437available_x (struct arg_state *state) 438{ 439 return N_X_ARG_REG - state->ngrn; 440} 441 442/* Return the number of available consecutive vector argument 443 registers. */ 444 445static unsigned 446available_v (struct arg_state *state) 447{ 448 return N_V_ARG_REG - state->nsrn; 449} 450 451static void * 452allocate_to_x (struct call_context *context, struct arg_state *state) 453{ 454 FFI_ASSERT (state->ngrn < N_X_ARG_REG); 455 return get_x_addr (context, (state->ngrn)++); 456} 457 458static void * 459allocate_to_s (struct call_context *context, struct arg_state *state) 460{ 461 FFI_ASSERT (state->nsrn < N_V_ARG_REG); 462 return get_s_addr (context, (state->nsrn)++); 463} 464 465static void * 466allocate_to_d (struct call_context *context, struct arg_state *state) 467{ 468 FFI_ASSERT (state->nsrn < N_V_ARG_REG); 469 return get_d_addr (context, (state->nsrn)++); 470} 471 472static void * 473allocate_to_v (struct call_context *context, struct arg_state *state) 474{ 475 FFI_ASSERT (state->nsrn < N_V_ARG_REG); 476 return get_v_addr (context, (state->nsrn)++); 477} 478 479/* Allocate an aligned slot on the stack and return a pointer to it. */ 480static void * 481allocate_to_stack (struct arg_state *state, void *stack, size_t alignment, 482 size_t size) 483{ 484 void *allocation; 485 486 /* Round up the NSAA to the larger of 8 or the natural 487 alignment of the argument's type. */ 488 state->nsaa = ALIGN (state->nsaa, alignment); 489 state->nsaa = ALIGN (state->nsaa, alignment); 490#if defined (__APPLE__) 491 if (state->allocating_variadic) 492 state->nsaa = ALIGN (state->nsaa, 8); 493#else 494 state->nsaa = ALIGN (state->nsaa, 8); 495#endif 496 497 allocation = stack + state->nsaa; 498 499 state->nsaa += size; 500 return allocation; 501} 502 503static void 504copy_basic_type (void *dest, void *source, unsigned short type) 505{ 506 /* This is necessary to ensure that basic types are copied 507 sign extended to 64-bits as libffi expects. */ 508 switch (type) 509 { 510 case FFI_TYPE_FLOAT: 511 *(float *) dest = *(float *) source; 512 break; 513 case FFI_TYPE_DOUBLE: 514 *(double *) dest = *(double *) source; 515 break; 516#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE 517 case FFI_TYPE_LONGDOUBLE: 518 *(long double *) dest = *(long double *) source; 519 break; 520#endif 521 case FFI_TYPE_UINT8: 522 *(ffi_arg *) dest = *(UINT8 *) source; 523 break; 524 case FFI_TYPE_SINT8: 525 *(ffi_sarg *) dest = *(SINT8 *) source; 526 break; 527 case FFI_TYPE_UINT16: 528 *(ffi_arg *) dest = *(UINT16 *) source; 529 break; 530 case FFI_TYPE_SINT16: 531 *(ffi_sarg *) dest = *(SINT16 *) source; 532 break; 533 case FFI_TYPE_UINT32: 534 *(ffi_arg *) dest = *(UINT32 *) source; 535 break; 536 case FFI_TYPE_INT: 537 case FFI_TYPE_SINT32: 538 *(ffi_sarg *) dest = *(SINT32 *) source; 539 break; 540 case FFI_TYPE_POINTER: 541 case FFI_TYPE_UINT64: 542 *(ffi_arg *) dest = *(UINT64 *) source; 543 break; 544 case FFI_TYPE_SINT64: 545 *(ffi_sarg *) dest = *(SINT64 *) source; 546 break; 547 case FFI_TYPE_VOID: 548 break; 549 550 default: 551 FFI_ASSERT (0); 552 } 553} 554 555static void 556copy_hfa_to_reg_or_stack (void *memory, 557 ffi_type *ty, 558 struct call_context *context, 559 unsigned char *stack, 560 struct arg_state *state) 561{ 562 unsigned elems = element_count (ty); 563 if (available_v (state) < elems) 564 { 565 /* There are insufficient V registers. Further V register allocations 566 are prevented, the NSAA is adjusted (by allocate_to_stack ()) 567 and the argument is copied to memory at the adjusted NSAA. */ 568 state->nsrn = N_V_ARG_REG; 569 memcpy (allocate_to_stack (state, stack, ty->alignment, ty->size), 570 memory, 571 ty->size); 572 } 573 else 574 { 575 int i; 576 unsigned short type = get_homogeneous_type (ty); 577 for (i = 0; i < elems; i++) 578 { 579 void *reg = allocate_to_v (context, state); 580 copy_basic_type (reg, memory, type); 581 memory += get_basic_type_size (type); 582 } 583 } 584} 585 586/* Either allocate an appropriate register for the argument type, or if 587 none are available, allocate a stack slot and return a pointer 588 to the allocated space. */ 589 590static void * 591allocate_to_register_or_stack (struct call_context *context, 592 unsigned char *stack, 593 struct arg_state *state, 594 unsigned short type) 595{ 596 size_t alignment = get_basic_type_alignment (type); 597 size_t size = alignment; 598 switch (type) 599 { 600 case FFI_TYPE_FLOAT: 601 /* This is the only case for which the allocated stack size 602 should not match the alignment of the type. */ 603 size = sizeof (UINT32); 604 /* Fall through. */ 605 case FFI_TYPE_DOUBLE: 606 if (state->nsrn < N_V_ARG_REG) 607 return allocate_to_d (context, state); 608 state->nsrn = N_V_ARG_REG; 609 break; 610#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE 611 case FFI_TYPE_LONGDOUBLE: 612 if (state->nsrn < N_V_ARG_REG) 613 return allocate_to_v (context, state); 614 state->nsrn = N_V_ARG_REG; 615 break; 616#endif 617 case FFI_TYPE_UINT8: 618 case FFI_TYPE_SINT8: 619 case FFI_TYPE_UINT16: 620 case FFI_TYPE_SINT16: 621 case FFI_TYPE_UINT32: 622 case FFI_TYPE_SINT32: 623 case FFI_TYPE_INT: 624 case FFI_TYPE_POINTER: 625 case FFI_TYPE_UINT64: 626 case FFI_TYPE_SINT64: 627 if (state->ngrn < N_X_ARG_REG) 628 return allocate_to_x (context, state); 629 state->ngrn = N_X_ARG_REG; 630 break; 631 default: 632 FFI_ASSERT (0); 633 } 634 635 return allocate_to_stack (state, stack, alignment, size); 636} 637 638/* Copy a value to an appropriate register, or if none are 639 available, to the stack. */ 640 641static void 642copy_to_register_or_stack (struct call_context *context, 643 unsigned char *stack, 644 struct arg_state *state, 645 void *value, 646 unsigned short type) 647{ 648 copy_basic_type ( 649 allocate_to_register_or_stack (context, stack, state, type), 650 value, 651 type); 652} 653 654/* Marshall the arguments from FFI representation to procedure call 655 context and stack. */ 656 657static unsigned 658aarch64_prep_args (struct call_context *context, unsigned char *stack, 659 extended_cif *ecif) 660{ 661 int i; 662 struct arg_state state; 663 664 arg_init (&state, ALIGN(ecif->cif->bytes, 16)); 665 666 for (i = 0; i < ecif->cif->nargs; i++) 667 { 668 ffi_type *ty = ecif->cif->arg_types[i]; 669 switch (ty->type) 670 { 671 case FFI_TYPE_VOID: 672 FFI_ASSERT (0); 673 break; 674 675 /* If the argument is a basic type the argument is allocated to an 676 appropriate register, or if none are available, to the stack. */ 677 case FFI_TYPE_FLOAT: 678 case FFI_TYPE_DOUBLE: 679#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE 680 case FFI_TYPE_LONGDOUBLE: 681#endif 682 case FFI_TYPE_UINT8: 683 case FFI_TYPE_SINT8: 684 case FFI_TYPE_UINT16: 685 case FFI_TYPE_SINT16: 686 case FFI_TYPE_UINT32: 687 case FFI_TYPE_INT: 688 case FFI_TYPE_SINT32: 689 case FFI_TYPE_POINTER: 690 case FFI_TYPE_UINT64: 691 case FFI_TYPE_SINT64: 692 copy_to_register_or_stack (context, stack, &state, 693 ecif->avalue[i], ty->type); 694 break; 695 696 case FFI_TYPE_STRUCT: 697 if (is_hfa (ty)) 698 { 699 copy_hfa_to_reg_or_stack (ecif->avalue[i], ty, context, 700 stack, &state); 701 } 702 else if (ty->size > 16) 703 { 704 /* If the argument is a composite type that is larger than 16 705 bytes, then the argument has been copied to memory, and 706 the argument is replaced by a pointer to the copy. */ 707 708 copy_to_register_or_stack (context, stack, &state, 709 &(ecif->avalue[i]), FFI_TYPE_POINTER); 710 } 711 else if (available_x (&state) >= (ty->size + 7) / 8) 712 { 713 /* If the argument is a composite type and the size in 714 double-words is not more than the number of available 715 X registers, then the argument is copied into consecutive 716 X registers. */ 717 int j; 718 for (j = 0; j < (ty->size + 7) / 8; j++) 719 { 720 memcpy (allocate_to_x (context, &state), 721 &(((UINT64 *) ecif->avalue[i])[j]), 722 sizeof (UINT64)); 723 } 724 } 725 else 726 { 727 /* Otherwise, there are insufficient X registers. Further X 728 register allocations are prevented, the NSAA is adjusted 729 (by allocate_to_stack ()) and the argument is copied to 730 memory at the adjusted NSAA. */ 731 state.ngrn = N_X_ARG_REG; 732 733 memcpy (allocate_to_stack (&state, stack, ty->alignment, 734 ty->size), ecif->avalue + i, ty->size); 735 } 736 break; 737 738 default: 739 FFI_ASSERT (0); 740 break; 741 } 742 743#if defined (__APPLE__) 744 if (i + 1 == ecif->cif->aarch64_nfixedargs) 745 { 746 state.ngrn = N_X_ARG_REG; 747 state.nsrn = N_V_ARG_REG; 748 749 state.allocating_variadic = 1; 750 } 751#endif 752 } 753 754 return ecif->cif->aarch64_flags; 755} 756 757ffi_status 758ffi_prep_cif_machdep (ffi_cif *cif) 759{ 760 /* Round the stack up to a multiple of the stack alignment requirement. */ 761 cif->bytes = 762 (cif->bytes + (AARCH64_STACK_ALIGN - 1)) & ~ (AARCH64_STACK_ALIGN - 1); 763 764 /* Initialize our flags. We are interested if this CIF will touch a 765 vector register, if so we will enable context save and load to 766 those registers, otherwise not. This is intended to be friendly 767 to lazy float context switching in the kernel. */ 768 cif->aarch64_flags = 0; 769 770 if (is_v_register_candidate (cif->rtype)) 771 { 772 cif->aarch64_flags |= AARCH64_FFI_WITH_V; 773 } 774 else 775 { 776 int i; 777 for (i = 0; i < cif->nargs; i++) 778 if (is_v_register_candidate (cif->arg_types[i])) 779 { 780 cif->aarch64_flags |= AARCH64_FFI_WITH_V; 781 break; 782 } 783 } 784 785#if defined (__APPLE__) 786 cif->aarch64_nfixedargs = 0; 787#endif 788 789 return FFI_OK; 790} 791 792#if defined (__APPLE__) 793 794/* Perform Apple-specific cif processing for variadic calls */ 795ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif, 796 unsigned int nfixedargs, 797 unsigned int ntotalargs) 798{ 799 ffi_status status; 800 801 status = ffi_prep_cif_machdep (cif); 802 803 cif->aarch64_nfixedargs = nfixedargs; 804 805 return status; 806} 807 808#endif 809 810/* Call a function with the provided arguments and capture the return 811 value. */ 812void 813ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) 814{ 815 extended_cif ecif; 816 817 ecif.cif = cif; 818 ecif.avalue = avalue; 819 ecif.rvalue = rvalue; 820 821 switch (cif->abi) 822 { 823 case FFI_SYSV: 824 { 825 struct call_context context; 826 size_t stack_bytes; 827 828 /* Figure out the total amount of stack space we need, the 829 above call frame space needs to be 16 bytes aligned to 830 ensure correct alignment of the first object inserted in 831 that space hence the ALIGN applied to cif->bytes.*/ 832 stack_bytes = ALIGN(cif->bytes, 16); 833 834 memset (&context, 0, sizeof (context)); 835 if (is_register_candidate (cif->rtype)) 836 { 837 ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn); 838 switch (cif->rtype->type) 839 { 840 case FFI_TYPE_VOID: 841 case FFI_TYPE_FLOAT: 842 case FFI_TYPE_DOUBLE: 843#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE 844 case FFI_TYPE_LONGDOUBLE: 845#endif 846 case FFI_TYPE_UINT8: 847 case FFI_TYPE_SINT8: 848 case FFI_TYPE_UINT16: 849 case FFI_TYPE_SINT16: 850 case FFI_TYPE_UINT32: 851 case FFI_TYPE_SINT32: 852 case FFI_TYPE_POINTER: 853 case FFI_TYPE_UINT64: 854 case FFI_TYPE_INT: 855 case FFI_TYPE_SINT64: 856 { 857 void *addr = get_basic_type_addr (cif->rtype->type, 858 &context, 0); 859 copy_basic_type (rvalue, addr, cif->rtype->type); 860 break; 861 } 862 863 case FFI_TYPE_STRUCT: 864 if (is_hfa (cif->rtype)) 865 { 866 int j; 867 unsigned short type = get_homogeneous_type (cif->rtype); 868 unsigned elems = element_count (cif->rtype); 869 for (j = 0; j < elems; j++) 870 { 871 void *reg = get_basic_type_addr (type, &context, j); 872 copy_basic_type (rvalue, reg, type); 873 rvalue += get_basic_type_size (type); 874 } 875 } 876 else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG) 877 { 878 size_t size = ALIGN (cif->rtype->size, sizeof (UINT64)); 879 memcpy (rvalue, get_x_addr (&context, 0), size); 880 } 881 else 882 { 883 FFI_ASSERT (0); 884 } 885 break; 886 887 default: 888 FFI_ASSERT (0); 889 break; 890 } 891 } 892 else 893 { 894 memcpy (get_x_addr (&context, 8), &rvalue, sizeof (UINT64)); 895 ffi_call_SYSV (aarch64_prep_args, &context, &ecif, 896 stack_bytes, fn); 897 } 898 break; 899 } 900 901 default: 902 FFI_ASSERT (0); 903 break; 904 } 905} 906 907static unsigned char trampoline [] = 908{ 0x70, 0x00, 0x00, 0x58, /* ldr x16, 1f */ 909 0x91, 0x00, 0x00, 0x10, /* adr x17, 2f */ 910 0x00, 0x02, 0x1f, 0xd6 /* br x16 */ 911}; 912 913/* Build a trampoline. */ 914 915#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX,FLAGS) \ 916 ({unsigned char *__tramp = (unsigned char*)(TRAMP); \ 917 UINT64 __fun = (UINT64)(FUN); \ 918 UINT64 __ctx = (UINT64)(CTX); \ 919 UINT64 __flags = (UINT64)(FLAGS); \ 920 memcpy (__tramp, trampoline, sizeof (trampoline)); \ 921 memcpy (__tramp + 12, &__fun, sizeof (__fun)); \ 922 memcpy (__tramp + 20, &__ctx, sizeof (__ctx)); \ 923 memcpy (__tramp + 28, &__flags, sizeof (__flags)); \ 924 ffi_clear_cache(__tramp, __tramp + FFI_TRAMPOLINE_SIZE); \ 925 }) 926 927ffi_status 928ffi_prep_closure_loc (ffi_closure* closure, 929 ffi_cif* cif, 930 void (*fun)(ffi_cif*,void*,void**,void*), 931 void *user_data, 932 void *codeloc) 933{ 934 if (cif->abi != FFI_SYSV) 935 return FFI_BAD_ABI; 936 937 FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_SYSV, codeloc, 938 cif->aarch64_flags); 939 940 closure->cif = cif; 941 closure->user_data = user_data; 942 closure->fun = fun; 943 944 return FFI_OK; 945} 946 947/* Primary handler to setup and invoke a function within a closure. 948 949 A closure when invoked enters via the assembler wrapper 950 ffi_closure_SYSV(). The wrapper allocates a call context on the 951 stack, saves the interesting registers (from the perspective of 952 the calling convention) into the context then passes control to 953 ffi_closure_SYSV_inner() passing the saved context and a pointer to 954 the stack at the point ffi_closure_SYSV() was invoked. 955 956 On the return path the assembler wrapper will reload call context 957 registers. 958 959 ffi_closure_SYSV_inner() marshalls the call context into ffi value 960 descriptors, invokes the wrapped function, then marshalls the return 961 value back into the call context. */ 962 963void FFI_HIDDEN 964ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context, 965 void *stack) 966{ 967 ffi_cif *cif = closure->cif; 968 void **avalue = (void**) alloca (cif->nargs * sizeof (void*)); 969 void *rvalue = NULL; 970 int i; 971 struct arg_state state; 972 973 arg_init (&state, ALIGN(cif->bytes, 16)); 974 975 for (i = 0; i < cif->nargs; i++) 976 { 977 ffi_type *ty = cif->arg_types[i]; 978 979 switch (ty->type) 980 { 981 case FFI_TYPE_VOID: 982 FFI_ASSERT (0); 983 break; 984 985 case FFI_TYPE_UINT8: 986 case FFI_TYPE_SINT8: 987 case FFI_TYPE_UINT16: 988 case FFI_TYPE_SINT16: 989 case FFI_TYPE_UINT32: 990 case FFI_TYPE_SINT32: 991 case FFI_TYPE_INT: 992 case FFI_TYPE_POINTER: 993 case FFI_TYPE_UINT64: 994 case FFI_TYPE_SINT64: 995 case FFI_TYPE_FLOAT: 996 case FFI_TYPE_DOUBLE: 997#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE 998 case FFI_TYPE_LONGDOUBLE: 999 avalue[i] = allocate_to_register_or_stack (context, stack, 1000 &state, ty->type); 1001 break; 1002#endif 1003 1004 case FFI_TYPE_STRUCT: 1005 if (is_hfa (ty)) 1006 { 1007 unsigned n = element_count (ty); 1008 if (available_v (&state) < n) 1009 { 1010 state.nsrn = N_V_ARG_REG; 1011 avalue[i] = allocate_to_stack (&state, stack, ty->alignment, 1012 ty->size); 1013 } 1014 else 1015 { 1016 switch (get_homogeneous_type (ty)) 1017 { 1018 case FFI_TYPE_FLOAT: 1019 { 1020 /* Eeek! We need a pointer to the structure, 1021 however the homogeneous float elements are 1022 being passed in individual S registers, 1023 therefore the structure is not represented as 1024 a contiguous sequence of bytes in our saved 1025 register context. We need to fake up a copy 1026 of the structure laid out in memory 1027 correctly. The fake can be tossed once the 1028 closure function has returned hence alloca() 1029 is sufficient. */ 1030 int j; 1031 UINT32 *p = avalue[i] = alloca (ty->size); 1032 for (j = 0; j < element_count (ty); j++) 1033 memcpy (&p[j], 1034 allocate_to_s (context, &state), 1035 sizeof (*p)); 1036 break; 1037 } 1038 1039 case FFI_TYPE_DOUBLE: 1040 { 1041 /* Eeek! We need a pointer to the structure, 1042 however the homogeneous float elements are 1043 being passed in individual S registers, 1044 therefore the structure is not represented as 1045 a contiguous sequence of bytes in our saved 1046 register context. We need to fake up a copy 1047 of the structure laid out in memory 1048 correctly. The fake can be tossed once the 1049 closure function has returned hence alloca() 1050 is sufficient. */ 1051 int j; 1052 UINT64 *p = avalue[i] = alloca (ty->size); 1053 for (j = 0; j < element_count (ty); j++) 1054 memcpy (&p[j], 1055 allocate_to_d (context, &state), 1056 sizeof (*p)); 1057 break; 1058 } 1059 1060#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE 1061 case FFI_TYPE_LONGDOUBLE: 1062 memcpy (&avalue[i], 1063 allocate_to_v (context, &state), 1064 sizeof (*avalue)); 1065 break; 1066#endif 1067 1068 default: 1069 FFI_ASSERT (0); 1070 break; 1071 } 1072 } 1073 } 1074 else if (ty->size > 16) 1075 { 1076 /* Replace Composite type of size greater than 16 with a 1077 pointer. */ 1078 memcpy (&avalue[i], 1079 allocate_to_register_or_stack (context, stack, 1080 &state, FFI_TYPE_POINTER), 1081 sizeof (avalue[i])); 1082 } 1083 else if (available_x (&state) >= (ty->size + 7) / 8) 1084 { 1085 avalue[i] = get_x_addr (context, state.ngrn); 1086 state.ngrn += (ty->size + 7) / 8; 1087 } 1088 else 1089 { 1090 state.ngrn = N_X_ARG_REG; 1091 1092 avalue[i] = allocate_to_stack (&state, stack, ty->alignment, 1093 ty->size); 1094 } 1095 break; 1096 1097 default: 1098 FFI_ASSERT (0); 1099 break; 1100 } 1101 } 1102 1103 /* Figure out where the return value will be passed, either in 1104 registers or in a memory block allocated by the caller and passed 1105 in x8. */ 1106 1107 if (is_register_candidate (cif->rtype)) 1108 { 1109 /* Register candidates are *always* returned in registers. */ 1110 1111 /* Allocate a scratchpad for the return value, we will let the 1112 callee scrible the result into the scratch pad then move the 1113 contents into the appropriate return value location for the 1114 call convention. */ 1115 rvalue = alloca (cif->rtype->size); 1116 (closure->fun) (cif, rvalue, avalue, closure->user_data); 1117 1118 /* Copy the return value into the call context so that it is returned 1119 as expected to our caller. */ 1120 switch (cif->rtype->type) 1121 { 1122 case FFI_TYPE_VOID: 1123 break; 1124 1125 case FFI_TYPE_UINT8: 1126 case FFI_TYPE_UINT16: 1127 case FFI_TYPE_UINT32: 1128 case FFI_TYPE_POINTER: 1129 case FFI_TYPE_UINT64: 1130 case FFI_TYPE_SINT8: 1131 case FFI_TYPE_SINT16: 1132 case FFI_TYPE_INT: 1133 case FFI_TYPE_SINT32: 1134 case FFI_TYPE_SINT64: 1135 case FFI_TYPE_FLOAT: 1136 case FFI_TYPE_DOUBLE: 1137#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE 1138 case FFI_TYPE_LONGDOUBLE: 1139#endif 1140 { 1141 void *addr = get_basic_type_addr (cif->rtype->type, context, 0); 1142 copy_basic_type (addr, rvalue, cif->rtype->type); 1143 break; 1144 } 1145 case FFI_TYPE_STRUCT: 1146 if (is_hfa (cif->rtype)) 1147 { 1148 int j; 1149 unsigned short type = get_homogeneous_type (cif->rtype); 1150 unsigned elems = element_count (cif->rtype); 1151 for (j = 0; j < elems; j++) 1152 { 1153 void *reg = get_basic_type_addr (type, context, j); 1154 copy_basic_type (reg, rvalue, type); 1155 rvalue += get_basic_type_size (type); 1156 } 1157 } 1158 else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG) 1159 { 1160 size_t size = ALIGN (cif->rtype->size, sizeof (UINT64)) ; 1161 memcpy (get_x_addr (context, 0), rvalue, size); 1162 } 1163 else 1164 { 1165 FFI_ASSERT (0); 1166 } 1167 break; 1168 default: 1169 FFI_ASSERT (0); 1170 break; 1171 } 1172 } 1173 else 1174 { 1175 memcpy (&rvalue, get_x_addr (context, 8), sizeof (UINT64)); 1176 (closure->fun) (cif, rvalue, avalue, closure->user_data); 1177 } 1178} 1179 1180