1/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
2
3Permission is hereby granted, free of charge, to any person obtaining
4a copy of this software and associated documentation files (the
5``Software''), to deal in the Software without restriction, including
6without limitation the rights to use, copy, modify, merge, publish,
7distribute, sublicense, and/or sell copies of the Software, and to
8permit persons to whom the Software is furnished to do so, subject to
9the following conditions:
10
11The above copyright notice and this permission notice shall be
12included in all copies or substantial portions of the Software.
13
14THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
15EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
21
22#include <stdio.h>
23
24#include <ffi.h>
25#include <ffi_common.h>
26
27#include <stdlib.h>
28
29/* Stack alignment requirement in bytes */
30#if defined (__APPLE__)
31#define AARCH64_STACK_ALIGN 1
32#else
33#define AARCH64_STACK_ALIGN 16
34#endif
35
36#define N_X_ARG_REG 8
37#define N_V_ARG_REG 8
38
39#define AARCH64_FFI_WITH_V (1 << AARCH64_FFI_WITH_V_BIT)
40
41union _d
42{
43  UINT64 d;
44  UINT32 s[2];
45};
46
47struct call_context
48{
49  UINT64 x [AARCH64_N_XREG];
50  struct
51  {
52    union _d d[2];
53  } v [AARCH64_N_VREG];
54};
55
56#if defined (__clang__) && defined (__APPLE__)
57extern void
58sys_icache_invalidate (void *start, size_t len);
59#endif
60
61static inline void
62ffi_clear_cache (void *start, void *end)
63{
64#if defined (__clang__) && defined (__APPLE__)
65	sys_icache_invalidate (start, (char *)end - (char *)start);
66#elif defined (__GNUC__)
67	__builtin___clear_cache (start, end);
68#else
69#error "Missing builtin to flush instruction cache"
70#endif
71}
72
73static void *
74get_x_addr (struct call_context *context, unsigned n)
75{
76  return &context->x[n];
77}
78
79static void *
80get_s_addr (struct call_context *context, unsigned n)
81{
82#if defined __AARCH64EB__
83  return &context->v[n].d[1].s[1];
84#else
85  return &context->v[n].d[0].s[0];
86#endif
87}
88
89static void *
90get_d_addr (struct call_context *context, unsigned n)
91{
92#if defined __AARCH64EB__
93  return &context->v[n].d[1];
94#else
95  return &context->v[n].d[0];
96#endif
97}
98
99static void *
100get_v_addr (struct call_context *context, unsigned n)
101{
102  return &context->v[n];
103}
104
105/* Return the memory location at which a basic type would reside
106   were it to have been stored in register n.  */
107
108static void *
109get_basic_type_addr (unsigned short type, struct call_context *context,
110		     unsigned n)
111{
112  switch (type)
113    {
114    case FFI_TYPE_FLOAT:
115      return get_s_addr (context, n);
116    case FFI_TYPE_DOUBLE:
117      return get_d_addr (context, n);
118#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
119    case FFI_TYPE_LONGDOUBLE:
120      return get_v_addr (context, n);
121#endif
122    case FFI_TYPE_UINT8:
123    case FFI_TYPE_SINT8:
124    case FFI_TYPE_UINT16:
125    case FFI_TYPE_SINT16:
126    case FFI_TYPE_UINT32:
127    case FFI_TYPE_SINT32:
128    case FFI_TYPE_INT:
129    case FFI_TYPE_POINTER:
130    case FFI_TYPE_UINT64:
131    case FFI_TYPE_SINT64:
132      return get_x_addr (context, n);
133    case FFI_TYPE_VOID:
134      return NULL;
135    default:
136      FFI_ASSERT (0);
137      return NULL;
138    }
139}
140
141/* Return the alignment width for each of the basic types.  */
142
143static size_t
144get_basic_type_alignment (unsigned short type)
145{
146  switch (type)
147    {
148    case FFI_TYPE_FLOAT:
149#if defined (__APPLE__)
150      return sizeof (UINT32);
151#endif
152    case FFI_TYPE_DOUBLE:
153      return sizeof (UINT64);
154#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
155    case FFI_TYPE_LONGDOUBLE:
156      return sizeof (long double);
157#endif
158    case FFI_TYPE_UINT8:
159    case FFI_TYPE_SINT8:
160#if defined (__APPLE__)
161	  return sizeof (UINT8);
162#endif
163    case FFI_TYPE_UINT16:
164    case FFI_TYPE_SINT16:
165#if defined (__APPLE__)
166	  return sizeof (UINT16);
167#endif
168    case FFI_TYPE_UINT32:
169    case FFI_TYPE_INT:
170    case FFI_TYPE_SINT32:
171#if defined (__APPLE__)
172	  return sizeof (UINT32);
173#endif
174    case FFI_TYPE_POINTER:
175    case FFI_TYPE_UINT64:
176    case FFI_TYPE_SINT64:
177      return sizeof (UINT64);
178
179    default:
180      FFI_ASSERT (0);
181      return 0;
182    }
183}
184
185/* Return the size in bytes for each of the basic types.  */
186
187static size_t
188get_basic_type_size (unsigned short type)
189{
190  switch (type)
191    {
192    case FFI_TYPE_FLOAT:
193      return sizeof (UINT32);
194    case FFI_TYPE_DOUBLE:
195      return sizeof (UINT64);
196#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
197    case FFI_TYPE_LONGDOUBLE:
198      return sizeof (long double);
199#endif
200    case FFI_TYPE_UINT8:
201      return sizeof (UINT8);
202    case FFI_TYPE_SINT8:
203      return sizeof (SINT8);
204    case FFI_TYPE_UINT16:
205      return sizeof (UINT16);
206    case FFI_TYPE_SINT16:
207      return sizeof (SINT16);
208    case FFI_TYPE_UINT32:
209      return sizeof (UINT32);
210    case FFI_TYPE_INT:
211    case FFI_TYPE_SINT32:
212      return sizeof (SINT32);
213    case FFI_TYPE_POINTER:
214    case FFI_TYPE_UINT64:
215      return sizeof (UINT64);
216    case FFI_TYPE_SINT64:
217      return sizeof (SINT64);
218
219    default:
220      FFI_ASSERT (0);
221      return 0;
222    }
223}
224
225extern void
226ffi_call_SYSV (unsigned (*)(struct call_context *context, unsigned char *,
227			    extended_cif *),
228               struct call_context *context,
229               extended_cif *,
230               size_t,
231               void (*fn)(void));
232
233extern void
234ffi_closure_SYSV (ffi_closure *);
235
236/* Test for an FFI floating point representation.  */
237
238static unsigned
239is_floating_type (unsigned short type)
240{
241  return (type == FFI_TYPE_FLOAT || type == FFI_TYPE_DOUBLE
242	  || type == FFI_TYPE_LONGDOUBLE);
243}
244
245/* Test for a homogeneous structure.  */
246
247static unsigned short
248get_homogeneous_type (ffi_type *ty)
249{
250  if (ty->type == FFI_TYPE_STRUCT && ty->elements)
251    {
252      unsigned i;
253      unsigned short candidate_type
254	= get_homogeneous_type (ty->elements[0]);
255      for (i =1; ty->elements[i]; i++)
256	{
257	  unsigned short iteration_type = 0;
258	  /* If we have a nested struct, we must find its homogeneous type.
259	     If that fits with our candidate type, we are still
260	     homogeneous.  */
261	  if (ty->elements[i]->type == FFI_TYPE_STRUCT
262	      && ty->elements[i]->elements)
263	    {
264	      iteration_type = get_homogeneous_type (ty->elements[i]);
265	    }
266	  else
267	    {
268	      iteration_type = ty->elements[i]->type;
269	    }
270
271	  /* If we are not homogeneous, return FFI_TYPE_STRUCT.  */
272	  if (candidate_type != iteration_type)
273	    return FFI_TYPE_STRUCT;
274	}
275      return candidate_type;
276    }
277
278  /* Base case, we have no more levels of nesting, so we
279     are a basic type, and so, trivially homogeneous in that type.  */
280  return ty->type;
281}
282
283/* Determine the number of elements within a STRUCT.
284
285   Note, we must handle nested structs.
286
287   If ty is not a STRUCT this function will return 0.  */
288
289static unsigned
290element_count (ffi_type *ty)
291{
292  if (ty->type == FFI_TYPE_STRUCT && ty->elements)
293    {
294      unsigned n;
295      unsigned elems = 0;
296      for (n = 0; ty->elements[n]; n++)
297	{
298	  if (ty->elements[n]->type == FFI_TYPE_STRUCT
299	      && ty->elements[n]->elements)
300	    elems += element_count (ty->elements[n]);
301	  else
302	    elems++;
303	}
304      return elems;
305    }
306  return 0;
307}
308
309/* Test for a homogeneous floating point aggregate.
310
311   A homogeneous floating point aggregate is a homogeneous aggregate of
312   a half- single- or double- precision floating point type with one
313   to four elements.  Note that this includes nested structs of the
314   basic type.  */
315
316static int
317is_hfa (ffi_type *ty)
318{
319  if (ty->type == FFI_TYPE_STRUCT
320      && ty->elements[0]
321      && is_floating_type (get_homogeneous_type (ty)))
322    {
323      unsigned n = element_count (ty);
324      return n >= 1 && n <= 4;
325    }
326  return 0;
327}
328
329/* Test if an ffi_type is a candidate for passing in a register.
330
331   This test does not check that sufficient registers of the
332   appropriate class are actually available, merely that IFF
333   sufficient registers are available then the argument will be passed
334   in register(s).
335
336   Note that an ffi_type that is deemed to be a register candidate
337   will always be returned in registers.
338
339   Returns 1 if a register candidate else 0.  */
340
341static int
342is_register_candidate (ffi_type *ty)
343{
344  switch (ty->type)
345    {
346    case FFI_TYPE_VOID:
347    case FFI_TYPE_FLOAT:
348    case FFI_TYPE_DOUBLE:
349#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
350    case FFI_TYPE_LONGDOUBLE:
351#endif
352    case FFI_TYPE_UINT8:
353    case FFI_TYPE_UINT16:
354    case FFI_TYPE_UINT32:
355    case FFI_TYPE_UINT64:
356    case FFI_TYPE_POINTER:
357    case FFI_TYPE_SINT8:
358    case FFI_TYPE_SINT16:
359    case FFI_TYPE_SINT32:
360    case FFI_TYPE_INT:
361    case FFI_TYPE_SINT64:
362      return 1;
363
364    case FFI_TYPE_STRUCT:
365      if (is_hfa (ty))
366        {
367          return 1;
368        }
369      else if (ty->size > 16)
370        {
371          /* Too large. Will be replaced with a pointer to memory. The
372             pointer MAY be passed in a register, but the value will
373             not. This test specifically fails since the argument will
374             never be passed by value in registers. */
375          return 0;
376        }
377      else
378        {
379          /* Might be passed in registers depending on the number of
380             registers required. */
381          return (ty->size + 7) / 8 < N_X_ARG_REG;
382        }
383      break;
384
385    default:
386      FFI_ASSERT (0);
387      break;
388    }
389
390  return 0;
391}
392
393/* Test if an ffi_type argument or result is a candidate for a vector
394   register.  */
395
396static int
397is_v_register_candidate (ffi_type *ty)
398{
399  return is_floating_type (ty->type)
400	   || (ty->type == FFI_TYPE_STRUCT && is_hfa (ty));
401}
402
403/* Representation of the procedure call argument marshalling
404   state.
405
406   The terse state variable names match the names used in the AARCH64
407   PCS. */
408
409struct arg_state
410{
411  unsigned ngrn;                /* Next general-purpose register number. */
412  unsigned nsrn;                /* Next vector register number. */
413  size_t nsaa;                  /* Next stack offset. */
414
415#if defined (__APPLE__)
416  unsigned allocating_variadic;
417#endif
418};
419
420/* Initialize a procedure call argument marshalling state.  */
421static void
422arg_init (struct arg_state *state, size_t call_frame_size)
423{
424  state->ngrn = 0;
425  state->nsrn = 0;
426  state->nsaa = 0;
427
428#if defined (__APPLE__)
429  state->allocating_variadic = 0;
430#endif
431}
432
433/* Return the number of available consecutive core argument
434   registers.  */
435
436static unsigned
437available_x (struct arg_state *state)
438{
439  return N_X_ARG_REG - state->ngrn;
440}
441
442/* Return the number of available consecutive vector argument
443   registers.  */
444
445static unsigned
446available_v (struct arg_state *state)
447{
448  return N_V_ARG_REG - state->nsrn;
449}
450
451static void *
452allocate_to_x (struct call_context *context, struct arg_state *state)
453{
454  FFI_ASSERT (state->ngrn < N_X_ARG_REG);
455  return get_x_addr (context, (state->ngrn)++);
456}
457
458static void *
459allocate_to_s (struct call_context *context, struct arg_state *state)
460{
461  FFI_ASSERT (state->nsrn < N_V_ARG_REG);
462  return get_s_addr (context, (state->nsrn)++);
463}
464
465static void *
466allocate_to_d (struct call_context *context, struct arg_state *state)
467{
468  FFI_ASSERT (state->nsrn < N_V_ARG_REG);
469  return get_d_addr (context, (state->nsrn)++);
470}
471
472static void *
473allocate_to_v (struct call_context *context, struct arg_state *state)
474{
475  FFI_ASSERT (state->nsrn < N_V_ARG_REG);
476  return get_v_addr (context, (state->nsrn)++);
477}
478
479/* Allocate an aligned slot on the stack and return a pointer to it.  */
480static void *
481allocate_to_stack (struct arg_state *state, void *stack, size_t alignment,
482		   size_t size)
483{
484  void *allocation;
485
486  /* Round up the NSAA to the larger of 8 or the natural
487     alignment of the argument's type.  */
488  state->nsaa = ALIGN (state->nsaa, alignment);
489  state->nsaa = ALIGN (state->nsaa, alignment);
490#if defined (__APPLE__)
491  if (state->allocating_variadic)
492    state->nsaa = ALIGN (state->nsaa, 8);
493#else
494  state->nsaa = ALIGN (state->nsaa, 8);
495#endif
496
497  allocation = stack + state->nsaa;
498
499  state->nsaa += size;
500  return allocation;
501}
502
503static void
504copy_basic_type (void *dest, void *source, unsigned short type)
505{
506  /* This is necessary to ensure that basic types are copied
507     sign extended to 64-bits as libffi expects.  */
508  switch (type)
509    {
510    case FFI_TYPE_FLOAT:
511      *(float *) dest = *(float *) source;
512      break;
513    case FFI_TYPE_DOUBLE:
514      *(double *) dest = *(double *) source;
515      break;
516#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
517    case FFI_TYPE_LONGDOUBLE:
518      *(long double *) dest = *(long double *) source;
519      break;
520#endif
521    case FFI_TYPE_UINT8:
522      *(ffi_arg *) dest = *(UINT8 *) source;
523      break;
524    case FFI_TYPE_SINT8:
525      *(ffi_sarg *) dest = *(SINT8 *) source;
526      break;
527    case FFI_TYPE_UINT16:
528      *(ffi_arg *) dest = *(UINT16 *) source;
529      break;
530    case FFI_TYPE_SINT16:
531      *(ffi_sarg *) dest = *(SINT16 *) source;
532      break;
533    case FFI_TYPE_UINT32:
534      *(ffi_arg *) dest = *(UINT32 *) source;
535      break;
536    case FFI_TYPE_INT:
537    case FFI_TYPE_SINT32:
538      *(ffi_sarg *) dest = *(SINT32 *) source;
539      break;
540    case FFI_TYPE_POINTER:
541    case FFI_TYPE_UINT64:
542      *(ffi_arg *) dest = *(UINT64 *) source;
543      break;
544    case FFI_TYPE_SINT64:
545      *(ffi_sarg *) dest = *(SINT64 *) source;
546      break;
547    case FFI_TYPE_VOID:
548      break;
549
550    default:
551      FFI_ASSERT (0);
552    }
553}
554
555static void
556copy_hfa_to_reg_or_stack (void *memory,
557			  ffi_type *ty,
558			  struct call_context *context,
559			  unsigned char *stack,
560			  struct arg_state *state)
561{
562  unsigned elems = element_count (ty);
563  if (available_v (state) < elems)
564    {
565      /* There are insufficient V registers. Further V register allocations
566	 are prevented, the NSAA is adjusted (by allocate_to_stack ())
567	 and the argument is copied to memory at the adjusted NSAA.  */
568      state->nsrn = N_V_ARG_REG;
569      memcpy (allocate_to_stack (state, stack, ty->alignment, ty->size),
570	      memory,
571	      ty->size);
572    }
573  else
574    {
575      int i;
576      unsigned short type = get_homogeneous_type (ty);
577      for (i = 0; i < elems; i++)
578	{
579	  void *reg = allocate_to_v (context, state);
580	  copy_basic_type (reg, memory, type);
581	  memory += get_basic_type_size (type);
582	}
583    }
584}
585
586/* Either allocate an appropriate register for the argument type, or if
587   none are available, allocate a stack slot and return a pointer
588   to the allocated space.  */
589
590static void *
591allocate_to_register_or_stack (struct call_context *context,
592			       unsigned char *stack,
593			       struct arg_state *state,
594			       unsigned short type)
595{
596  size_t alignment = get_basic_type_alignment (type);
597  size_t size = alignment;
598  switch (type)
599    {
600    case FFI_TYPE_FLOAT:
601      /* This is the only case for which the allocated stack size
602	 should not match the alignment of the type.  */
603      size = sizeof (UINT32);
604      /* Fall through.  */
605    case FFI_TYPE_DOUBLE:
606      if (state->nsrn < N_V_ARG_REG)
607	return allocate_to_d (context, state);
608      state->nsrn = N_V_ARG_REG;
609      break;
610#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
611    case FFI_TYPE_LONGDOUBLE:
612      if (state->nsrn < N_V_ARG_REG)
613	return allocate_to_v (context, state);
614      state->nsrn = N_V_ARG_REG;
615      break;
616#endif
617    case FFI_TYPE_UINT8:
618    case FFI_TYPE_SINT8:
619    case FFI_TYPE_UINT16:
620    case FFI_TYPE_SINT16:
621    case FFI_TYPE_UINT32:
622    case FFI_TYPE_SINT32:
623    case FFI_TYPE_INT:
624    case FFI_TYPE_POINTER:
625    case FFI_TYPE_UINT64:
626    case FFI_TYPE_SINT64:
627      if (state->ngrn < N_X_ARG_REG)
628	return allocate_to_x (context, state);
629      state->ngrn = N_X_ARG_REG;
630      break;
631    default:
632      FFI_ASSERT (0);
633    }
634
635    return allocate_to_stack (state, stack, alignment, size);
636}
637
638/* Copy a value to an appropriate register, or if none are
639   available, to the stack.  */
640
641static void
642copy_to_register_or_stack (struct call_context *context,
643			   unsigned char *stack,
644			   struct arg_state *state,
645			   void *value,
646			   unsigned short type)
647{
648  copy_basic_type (
649	  allocate_to_register_or_stack (context, stack, state, type),
650	  value,
651	  type);
652}
653
654/* Marshall the arguments from FFI representation to procedure call
655   context and stack.  */
656
657static unsigned
658aarch64_prep_args (struct call_context *context, unsigned char *stack,
659		   extended_cif *ecif)
660{
661  int i;
662  struct arg_state state;
663
664  arg_init (&state, ALIGN(ecif->cif->bytes, 16));
665
666  for (i = 0; i < ecif->cif->nargs; i++)
667    {
668      ffi_type *ty = ecif->cif->arg_types[i];
669      switch (ty->type)
670	{
671	case FFI_TYPE_VOID:
672	  FFI_ASSERT (0);
673	  break;
674
675	/* If the argument is a basic type the argument is allocated to an
676	   appropriate register, or if none are available, to the stack.  */
677	case FFI_TYPE_FLOAT:
678	case FFI_TYPE_DOUBLE:
679#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
680	case FFI_TYPE_LONGDOUBLE:
681#endif
682	case FFI_TYPE_UINT8:
683	case FFI_TYPE_SINT8:
684	case FFI_TYPE_UINT16:
685	case FFI_TYPE_SINT16:
686	case FFI_TYPE_UINT32:
687	case FFI_TYPE_INT:
688	case FFI_TYPE_SINT32:
689	case FFI_TYPE_POINTER:
690	case FFI_TYPE_UINT64:
691	case FFI_TYPE_SINT64:
692	  copy_to_register_or_stack (context, stack, &state,
693				     ecif->avalue[i], ty->type);
694	  break;
695
696	case FFI_TYPE_STRUCT:
697	  if (is_hfa (ty))
698	    {
699	      copy_hfa_to_reg_or_stack (ecif->avalue[i], ty, context,
700					stack, &state);
701	    }
702	  else if (ty->size > 16)
703	    {
704	      /* If the argument is a composite type that is larger than 16
705		 bytes, then the argument has been copied to memory, and
706		 the argument is replaced by a pointer to the copy.  */
707
708	      copy_to_register_or_stack (context, stack, &state,
709					 &(ecif->avalue[i]), FFI_TYPE_POINTER);
710	    }
711	  else if (available_x (&state) >= (ty->size + 7) / 8)
712	    {
713	      /* If the argument is a composite type and the size in
714		 double-words is not more than the number of available
715		 X registers, then the argument is copied into consecutive
716		 X registers.  */
717	      int j;
718	      for (j = 0; j < (ty->size + 7) / 8; j++)
719		{
720		  memcpy (allocate_to_x (context, &state),
721			  &(((UINT64 *) ecif->avalue[i])[j]),
722			  sizeof (UINT64));
723		}
724	    }
725	  else
726	    {
727	      /* Otherwise, there are insufficient X registers. Further X
728		 register allocations are prevented, the NSAA is adjusted
729		 (by allocate_to_stack ()) and the argument is copied to
730		 memory at the adjusted NSAA.  */
731	      state.ngrn = N_X_ARG_REG;
732
733	      memcpy (allocate_to_stack (&state, stack, ty->alignment,
734					 ty->size), ecif->avalue + i, ty->size);
735	    }
736	  break;
737
738	default:
739	  FFI_ASSERT (0);
740	  break;
741	}
742
743#if defined (__APPLE__)
744      if (i + 1 == ecif->cif->aarch64_nfixedargs)
745	{
746	  state.ngrn = N_X_ARG_REG;
747	  state.nsrn = N_V_ARG_REG;
748
749	  state.allocating_variadic = 1;
750	}
751#endif
752    }
753
754  return ecif->cif->aarch64_flags;
755}
756
757ffi_status
758ffi_prep_cif_machdep (ffi_cif *cif)
759{
760  /* Round the stack up to a multiple of the stack alignment requirement. */
761  cif->bytes =
762    (cif->bytes + (AARCH64_STACK_ALIGN - 1)) & ~ (AARCH64_STACK_ALIGN - 1);
763
764  /* Initialize our flags. We are interested if this CIF will touch a
765     vector register, if so we will enable context save and load to
766     those registers, otherwise not. This is intended to be friendly
767     to lazy float context switching in the kernel.  */
768  cif->aarch64_flags = 0;
769
770  if (is_v_register_candidate (cif->rtype))
771    {
772      cif->aarch64_flags |= AARCH64_FFI_WITH_V;
773    }
774  else
775    {
776      int i;
777      for (i = 0; i < cif->nargs; i++)
778        if (is_v_register_candidate (cif->arg_types[i]))
779          {
780            cif->aarch64_flags |= AARCH64_FFI_WITH_V;
781            break;
782          }
783    }
784
785#if defined (__APPLE__)
786  cif->aarch64_nfixedargs = 0;
787#endif
788
789  return FFI_OK;
790}
791
792#if defined (__APPLE__)
793
794/* Perform Apple-specific cif processing for variadic calls */
795ffi_status ffi_prep_cif_machdep_var(ffi_cif *cif,
796				    unsigned int nfixedargs,
797				    unsigned int ntotalargs)
798{
799  ffi_status status;
800
801  status = ffi_prep_cif_machdep (cif);
802
803  cif->aarch64_nfixedargs = nfixedargs;
804
805  return status;
806}
807
808#endif
809
810/* Call a function with the provided arguments and capture the return
811   value.  */
812void
813ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
814{
815  extended_cif ecif;
816
817  ecif.cif = cif;
818  ecif.avalue = avalue;
819  ecif.rvalue = rvalue;
820
821  switch (cif->abi)
822    {
823    case FFI_SYSV:
824      {
825        struct call_context context;
826	size_t stack_bytes;
827
828	/* Figure out the total amount of stack space we need, the
829	   above call frame space needs to be 16 bytes aligned to
830	   ensure correct alignment of the first object inserted in
831	   that space hence the ALIGN applied to cif->bytes.*/
832	stack_bytes = ALIGN(cif->bytes, 16);
833
834	memset (&context, 0, sizeof (context));
835        if (is_register_candidate (cif->rtype))
836          {
837            ffi_call_SYSV (aarch64_prep_args, &context, &ecif, stack_bytes, fn);
838            switch (cif->rtype->type)
839              {
840              case FFI_TYPE_VOID:
841              case FFI_TYPE_FLOAT:
842              case FFI_TYPE_DOUBLE:
843#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
844              case FFI_TYPE_LONGDOUBLE:
845#endif
846              case FFI_TYPE_UINT8:
847              case FFI_TYPE_SINT8:
848              case FFI_TYPE_UINT16:
849              case FFI_TYPE_SINT16:
850              case FFI_TYPE_UINT32:
851              case FFI_TYPE_SINT32:
852              case FFI_TYPE_POINTER:
853              case FFI_TYPE_UINT64:
854              case FFI_TYPE_INT:
855              case FFI_TYPE_SINT64:
856		{
857		  void *addr = get_basic_type_addr (cif->rtype->type,
858						    &context, 0);
859		  copy_basic_type (rvalue, addr, cif->rtype->type);
860		  break;
861		}
862
863              case FFI_TYPE_STRUCT:
864                if (is_hfa (cif->rtype))
865		  {
866		    int j;
867		    unsigned short type = get_homogeneous_type (cif->rtype);
868		    unsigned elems = element_count (cif->rtype);
869		    for (j = 0; j < elems; j++)
870		      {
871			void *reg = get_basic_type_addr (type, &context, j);
872			copy_basic_type (rvalue, reg, type);
873			rvalue += get_basic_type_size (type);
874		      }
875		  }
876                else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG)
877                  {
878                    size_t size = ALIGN (cif->rtype->size, sizeof (UINT64));
879                    memcpy (rvalue, get_x_addr (&context, 0), size);
880                  }
881                else
882                  {
883                    FFI_ASSERT (0);
884                  }
885                break;
886
887              default:
888                FFI_ASSERT (0);
889                break;
890              }
891          }
892        else
893          {
894            memcpy (get_x_addr (&context, 8), &rvalue, sizeof (UINT64));
895            ffi_call_SYSV (aarch64_prep_args, &context, &ecif,
896			   stack_bytes, fn);
897          }
898        break;
899      }
900
901    default:
902      FFI_ASSERT (0);
903      break;
904    }
905}
906
907static unsigned char trampoline [] =
908{ 0x70, 0x00, 0x00, 0x58,	/* ldr	x16, 1f	*/
909  0x91, 0x00, 0x00, 0x10,	/* adr	x17, 2f	*/
910  0x00, 0x02, 0x1f, 0xd6	/* br	x16	*/
911};
912
913/* Build a trampoline.  */
914
915#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX,FLAGS)			\
916  ({unsigned char *__tramp = (unsigned char*)(TRAMP);			\
917    UINT64  __fun = (UINT64)(FUN);					\
918    UINT64  __ctx = (UINT64)(CTX);					\
919    UINT64  __flags = (UINT64)(FLAGS);					\
920    memcpy (__tramp, trampoline, sizeof (trampoline));			\
921    memcpy (__tramp + 12, &__fun, sizeof (__fun));			\
922    memcpy (__tramp + 20, &__ctx, sizeof (__ctx));			\
923    memcpy (__tramp + 28, &__flags, sizeof (__flags));			\
924    ffi_clear_cache(__tramp, __tramp + FFI_TRAMPOLINE_SIZE);		\
925  })
926
927ffi_status
928ffi_prep_closure_loc (ffi_closure* closure,
929                      ffi_cif* cif,
930                      void (*fun)(ffi_cif*,void*,void**,void*),
931                      void *user_data,
932                      void *codeloc)
933{
934  if (cif->abi != FFI_SYSV)
935    return FFI_BAD_ABI;
936
937  FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_SYSV, codeloc,
938		       cif->aarch64_flags);
939
940  closure->cif  = cif;
941  closure->user_data = user_data;
942  closure->fun  = fun;
943
944  return FFI_OK;
945}
946
947/* Primary handler to setup and invoke a function within a closure.
948
949   A closure when invoked enters via the assembler wrapper
950   ffi_closure_SYSV(). The wrapper allocates a call context on the
951   stack, saves the interesting registers (from the perspective of
952   the calling convention) into the context then passes control to
953   ffi_closure_SYSV_inner() passing the saved context and a pointer to
954   the stack at the point ffi_closure_SYSV() was invoked.
955
956   On the return path the assembler wrapper will reload call context
957   registers.
958
959   ffi_closure_SYSV_inner() marshalls the call context into ffi value
960   descriptors, invokes the wrapped function, then marshalls the return
961   value back into the call context.  */
962
963void FFI_HIDDEN
964ffi_closure_SYSV_inner (ffi_closure *closure, struct call_context *context,
965			void *stack)
966{
967  ffi_cif *cif = closure->cif;
968  void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
969  void *rvalue = NULL;
970  int i;
971  struct arg_state state;
972
973  arg_init (&state, ALIGN(cif->bytes, 16));
974
975  for (i = 0; i < cif->nargs; i++)
976    {
977      ffi_type *ty = cif->arg_types[i];
978
979      switch (ty->type)
980	{
981	case FFI_TYPE_VOID:
982	  FFI_ASSERT (0);
983	  break;
984
985	case FFI_TYPE_UINT8:
986	case FFI_TYPE_SINT8:
987	case FFI_TYPE_UINT16:
988	case FFI_TYPE_SINT16:
989	case FFI_TYPE_UINT32:
990	case FFI_TYPE_SINT32:
991	case FFI_TYPE_INT:
992	case FFI_TYPE_POINTER:
993	case FFI_TYPE_UINT64:
994	case FFI_TYPE_SINT64:
995	case  FFI_TYPE_FLOAT:
996	case  FFI_TYPE_DOUBLE:
997#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
998	case  FFI_TYPE_LONGDOUBLE:
999	  avalue[i] = allocate_to_register_or_stack (context, stack,
1000						     &state, ty->type);
1001	  break;
1002#endif
1003
1004	case FFI_TYPE_STRUCT:
1005	  if (is_hfa (ty))
1006	    {
1007	      unsigned n = element_count (ty);
1008	      if (available_v (&state) < n)
1009		{
1010		  state.nsrn = N_V_ARG_REG;
1011		  avalue[i] = allocate_to_stack (&state, stack, ty->alignment,
1012						 ty->size);
1013		}
1014	      else
1015		{
1016		  switch (get_homogeneous_type (ty))
1017		    {
1018		    case FFI_TYPE_FLOAT:
1019		      {
1020			/* Eeek! We need a pointer to the structure,
1021			   however the homogeneous float elements are
1022			   being passed in individual S registers,
1023			   therefore the structure is not represented as
1024			   a contiguous sequence of bytes in our saved
1025			   register context. We need to fake up a copy
1026			   of the structure laid out in memory
1027			   correctly. The fake can be tossed once the
1028			   closure function has returned hence alloca()
1029			   is sufficient. */
1030			int j;
1031			UINT32 *p = avalue[i] = alloca (ty->size);
1032			for (j = 0; j < element_count (ty); j++)
1033			  memcpy (&p[j],
1034				  allocate_to_s (context, &state),
1035				  sizeof (*p));
1036			break;
1037		      }
1038
1039		    case FFI_TYPE_DOUBLE:
1040		      {
1041			/* Eeek! We need a pointer to the structure,
1042			   however the homogeneous float elements are
1043			   being passed in individual S registers,
1044			   therefore the structure is not represented as
1045			   a contiguous sequence of bytes in our saved
1046			   register context. We need to fake up a copy
1047			   of the structure laid out in memory
1048			   correctly. The fake can be tossed once the
1049			   closure function has returned hence alloca()
1050			   is sufficient. */
1051			int j;
1052			UINT64 *p = avalue[i] = alloca (ty->size);
1053			for (j = 0; j < element_count (ty); j++)
1054			  memcpy (&p[j],
1055				  allocate_to_d (context, &state),
1056				  sizeof (*p));
1057			break;
1058		      }
1059
1060#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
1061		    case FFI_TYPE_LONGDOUBLE:
1062			  memcpy (&avalue[i],
1063				  allocate_to_v (context, &state),
1064				  sizeof (*avalue));
1065		      break;
1066#endif
1067
1068		    default:
1069		      FFI_ASSERT (0);
1070		      break;
1071		    }
1072		}
1073	    }
1074	  else if (ty->size > 16)
1075	    {
1076	      /* Replace Composite type of size greater than 16 with a
1077		 pointer.  */
1078	      memcpy (&avalue[i],
1079		      allocate_to_register_or_stack (context, stack,
1080						     &state, FFI_TYPE_POINTER),
1081		      sizeof (avalue[i]));
1082	    }
1083	  else if (available_x (&state) >= (ty->size + 7) / 8)
1084	    {
1085	      avalue[i] = get_x_addr (context, state.ngrn);
1086	      state.ngrn += (ty->size + 7) / 8;
1087	    }
1088	  else
1089	    {
1090	      state.ngrn = N_X_ARG_REG;
1091
1092	      avalue[i] = allocate_to_stack (&state, stack, ty->alignment,
1093					     ty->size);
1094	    }
1095	  break;
1096
1097	default:
1098	  FFI_ASSERT (0);
1099	  break;
1100	}
1101    }
1102
1103  /* Figure out where the return value will be passed, either in
1104     registers or in a memory block allocated by the caller and passed
1105     in x8.  */
1106
1107  if (is_register_candidate (cif->rtype))
1108    {
1109      /* Register candidates are *always* returned in registers. */
1110
1111      /* Allocate a scratchpad for the return value, we will let the
1112         callee scrible the result into the scratch pad then move the
1113         contents into the appropriate return value location for the
1114         call convention.  */
1115      rvalue = alloca (cif->rtype->size);
1116      (closure->fun) (cif, rvalue, avalue, closure->user_data);
1117
1118      /* Copy the return value into the call context so that it is returned
1119         as expected to our caller.  */
1120      switch (cif->rtype->type)
1121        {
1122        case FFI_TYPE_VOID:
1123          break;
1124
1125        case FFI_TYPE_UINT8:
1126        case FFI_TYPE_UINT16:
1127        case FFI_TYPE_UINT32:
1128        case FFI_TYPE_POINTER:
1129        case FFI_TYPE_UINT64:
1130        case FFI_TYPE_SINT8:
1131        case FFI_TYPE_SINT16:
1132        case FFI_TYPE_INT:
1133        case FFI_TYPE_SINT32:
1134        case FFI_TYPE_SINT64:
1135        case FFI_TYPE_FLOAT:
1136        case FFI_TYPE_DOUBLE:
1137#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
1138        case FFI_TYPE_LONGDOUBLE:
1139#endif
1140	  {
1141	    void *addr = get_basic_type_addr (cif->rtype->type, context, 0);
1142	    copy_basic_type (addr, rvalue, cif->rtype->type);
1143            break;
1144	  }
1145        case FFI_TYPE_STRUCT:
1146          if (is_hfa (cif->rtype))
1147	    {
1148	      int j;
1149	      unsigned short type = get_homogeneous_type (cif->rtype);
1150	      unsigned elems = element_count (cif->rtype);
1151	      for (j = 0; j < elems; j++)
1152		{
1153		  void *reg = get_basic_type_addr (type, context, j);
1154		  copy_basic_type (reg, rvalue, type);
1155		  rvalue += get_basic_type_size (type);
1156		}
1157	    }
1158          else if ((cif->rtype->size + 7) / 8 < N_X_ARG_REG)
1159            {
1160              size_t size = ALIGN (cif->rtype->size, sizeof (UINT64)) ;
1161              memcpy (get_x_addr (context, 0), rvalue, size);
1162            }
1163          else
1164            {
1165              FFI_ASSERT (0);
1166            }
1167          break;
1168        default:
1169          FFI_ASSERT (0);
1170          break;
1171        }
1172    }
1173  else
1174    {
1175      memcpy (&rvalue, get_x_addr (context, 8), sizeof (UINT64));
1176      (closure->fun) (cif, rvalue, avalue, closure->user_data);
1177    }
1178}
1179
1180