1/* -----------------------------------------------------------------------
2   ffi.c - Copyright (c) 1998 Geoffrey Keating
3
4   PowerPC Foreign Function Interface
5
6   Permission is hereby granted, free of charge, to any person obtaining
7   a copy of this software and associated documentation files (the
8   ``Software''), to deal in the Software without restriction, including
9   without limitation the rights to use, copy, modify, merge, publish,
10   distribute, sublicense, and/or sell copies of the Software, and to
11   permit persons to whom the Software is furnished to do so, subject to
12   the following conditions:
13
14   The above copyright notice and this permission notice shall be included
15   in all copies or substantial portions of the Software.
16
17   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
18   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
21   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23   OTHER DEALINGS IN THE SOFTWARE.
24   ----------------------------------------------------------------------- */
25
26#include <ffi.h>
27#include <ffi_common.h>
28
29#include <stdlib.h>
30#include <stdio.h>
31
32
33extern void ffi_closure_SYSV (void);
34extern void FFI_HIDDEN ffi_closure_LINUX64 (void);
35
36enum {
37  /* The assembly depends on these exact flags.  */
38  FLAG_RETURNS_SMST	= 1 << (31-31), /* Used for FFI_SYSV small structs.  */
39  FLAG_RETURNS_NOTHING  = 1 << (31-30), /* These go in cr7 */
40  FLAG_RETURNS_FP       = 1 << (31-29),
41  FLAG_RETURNS_64BITS   = 1 << (31-28),
42  FLAG_RETURNS_128BITS  = 1 << (31-27),
43
44  FLAG_ARG_NEEDS_COPY   = 1 << (31- 7),
45  FLAG_FP_ARGUMENTS     = 1 << (31- 6), /* cr1.eq; specified by ABI */
46  FLAG_4_GPR_ARGUMENTS  = 1 << (31- 5),
47  FLAG_RETVAL_REFERENCE = 1 << (31- 4)
48};
49
50/* About the SYSV ABI.  */
51enum {
52  NUM_GPR_ARG_REGISTERS = 8,
53  NUM_FPR_ARG_REGISTERS = 8
54};
55enum { ASM_NEEDS_REGISTERS = 4 };
56
57/* ffi_prep_args_SYSV is called by the assembly routine once stack space
58   has been allocated for the function's arguments.
59
60   The stack layout we want looks like this:
61
62   |   Return address from ffi_call_SYSV 4bytes	|	higher addresses
63   |--------------------------------------------|
64   |   Previous backchain pointer	4	|       stack pointer here
65   |--------------------------------------------|<+ <<<	on entry to
66   |   Saved r28-r31			4*4	| |	ffi_call_SYSV
67   |--------------------------------------------| |
68   |   GPR registers r3-r10		8*4	| |	ffi_call_SYSV
69   |--------------------------------------------| |
70   |   FPR registers f1-f8 (optional)	8*8	| |
71   |--------------------------------------------| |	stack	|
72   |   Space for copied structures		| |	grows	|
73   |--------------------------------------------| |	down    V
74   |   Parameters that didn't fit in registers  | |
75   |--------------------------------------------| |	lower addresses
76   |   Space for callee's LR		4	| |
77   |--------------------------------------------| |	stack pointer here
78   |   Current backchain pointer	4	|-/	during
79   |--------------------------------------------|   <<<	ffi_call_SYSV
80
81*/
82
83/*@-exportheader@*/
84void
85ffi_prep_args_SYSV (extended_cif *ecif, unsigned *const stack)
86/*@=exportheader@*/
87{
88  const unsigned bytes = ecif->cif->bytes;
89  const unsigned flags = ecif->cif->flags;
90
91  typedef union {
92    char *c;
93    unsigned *u;
94    long long *ll;
95    float *f;
96    double *d;
97  } valp;
98
99  /* 'stacktop' points at the previous backchain pointer.  */
100  valp stacktop;
101
102  /* 'gpr_base' points at the space for gpr3, and grows upwards as
103     we use GPR registers.  */
104  valp gpr_base;
105  int intarg_count;
106
107  /* 'fpr_base' points at the space for fpr1, and grows upwards as
108     we use FPR registers.  */
109  valp fpr_base;
110  int fparg_count;
111
112  /* 'copy_space' grows down as we put structures in it.  It should
113     stay 16-byte aligned.  */
114  valp copy_space;
115
116  /* 'next_arg' grows up as we put parameters in it.  */
117  valp next_arg;
118
119  int i;
120  ffi_type **ptr;
121  double double_tmp;
122  union {
123    void **v;
124    char **c;
125    signed char **sc;
126    unsigned char **uc;
127    signed short **ss;
128    unsigned short **us;
129    unsigned int **ui;
130    long long **ll;
131    float **f;
132    double **d;
133  } p_argv;
134  size_t struct_copy_size;
135  unsigned gprvalue;
136
137  stacktop.c = (char *) stack + bytes;
138  gpr_base.u = stacktop.u - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS;
139  intarg_count = 0;
140  fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS;
141  fparg_count = 0;
142  copy_space.c = ((flags & FLAG_FP_ARGUMENTS) ? fpr_base.c : gpr_base.c);
143  next_arg.u = stack + 2;
144
145  /* Check that everything starts aligned properly.  */
146  FFI_ASSERT (((unsigned) (char *) stack & 0xF) == 0);
147  FFI_ASSERT (((unsigned) copy_space.c & 0xF) == 0);
148  FFI_ASSERT (((unsigned) stacktop.c & 0xF) == 0);
149  FFI_ASSERT ((bytes & 0xF) == 0);
150  FFI_ASSERT (copy_space.c >= next_arg.c);
151
152  /* Deal with return values that are actually pass-by-reference.  */
153  if (flags & FLAG_RETVAL_REFERENCE)
154    {
155      *gpr_base.u++ = (unsigned long) (char *) ecif->rvalue;
156      intarg_count++;
157    }
158
159  /* Now for the arguments.  */
160  p_argv.v = ecif->avalue;
161  for (ptr = ecif->cif->arg_types, i = ecif->cif->nargs;
162       i > 0;
163       i--, ptr++, p_argv.v++)
164    {
165      switch ((*ptr)->type)
166	{
167	case FFI_TYPE_FLOAT:
168	  double_tmp = **p_argv.f;
169	  if (fparg_count >= NUM_FPR_ARG_REGISTERS)
170	    {
171	      *next_arg.f = (float) double_tmp;
172	      next_arg.u += 1;
173	    }
174	  else
175	    *fpr_base.d++ = double_tmp;
176	  fparg_count++;
177	  FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
178	  break;
179
180	case FFI_TYPE_DOUBLE:
181	  double_tmp = **p_argv.d;
182
183	  if (fparg_count >= NUM_FPR_ARG_REGISTERS)
184	    {
185	      if (intarg_count >= NUM_GPR_ARG_REGISTERS
186		  && intarg_count % 2 != 0)
187		{
188		  intarg_count++;
189		  next_arg.u++;
190		}
191	      *next_arg.d = double_tmp;
192	      next_arg.u += 2;
193	    }
194	  else
195	    *fpr_base.d++ = double_tmp;
196	  fparg_count++;
197	  FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
198	  break;
199
200	case FFI_TYPE_UINT64:
201	case FFI_TYPE_SINT64:
202	  if (intarg_count == NUM_GPR_ARG_REGISTERS-1)
203	    intarg_count++;
204	  if (intarg_count >= NUM_GPR_ARG_REGISTERS)
205	    {
206	      if (intarg_count % 2 != 0)
207		{
208		  intarg_count++;
209		  next_arg.u++;
210		}
211	      *next_arg.ll = **p_argv.ll;
212	      next_arg.u += 2;
213	    }
214	  else
215	    {
216	      /* whoops: abi states only certain register pairs
217	       * can be used for passing long long int
218	       * specifically (r3,r4), (r5,r6), (r7,r8),
219	       * (r9,r10) and if next arg is long long but
220	       * not correct starting register of pair then skip
221	       * until the proper starting register
222	       */
223	      if (intarg_count % 2 != 0)
224		{
225		  intarg_count ++;
226		  gpr_base.u++;
227		}
228	      *gpr_base.ll++ = **p_argv.ll;
229	    }
230	  intarg_count += 2;
231	  break;
232
233	case FFI_TYPE_STRUCT:
234#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
235	case FFI_TYPE_LONGDOUBLE:
236#endif
237	  struct_copy_size = ((*ptr)->size + 15) & ~0xF;
238	  copy_space.c -= struct_copy_size;
239	  memcpy (copy_space.c, *p_argv.c, (*ptr)->size);
240
241	  gprvalue = (unsigned long) copy_space.c;
242
243	  FFI_ASSERT (copy_space.c > next_arg.c);
244	  FFI_ASSERT (flags & FLAG_ARG_NEEDS_COPY);
245	  goto putgpr;
246
247	case FFI_TYPE_UINT8:
248	  gprvalue = **p_argv.uc;
249	  goto putgpr;
250	case FFI_TYPE_SINT8:
251	  gprvalue = **p_argv.sc;
252	  goto putgpr;
253	case FFI_TYPE_UINT16:
254	  gprvalue = **p_argv.us;
255	  goto putgpr;
256	case FFI_TYPE_SINT16:
257	  gprvalue = **p_argv.ss;
258	  goto putgpr;
259
260	case FFI_TYPE_INT:
261	case FFI_TYPE_UINT32:
262	case FFI_TYPE_SINT32:
263	case FFI_TYPE_POINTER:
264	  gprvalue = **p_argv.ui;
265
266	putgpr:
267	  if (intarg_count >= NUM_GPR_ARG_REGISTERS)
268	    *next_arg.u++ = gprvalue;
269	  else
270	    *gpr_base.u++ = gprvalue;
271	  intarg_count++;
272	  break;
273	}
274    }
275
276  /* Check that we didn't overrun the stack...  */
277  FFI_ASSERT (copy_space.c >= next_arg.c);
278  FFI_ASSERT (gpr_base.u <= stacktop.u - ASM_NEEDS_REGISTERS);
279  FFI_ASSERT (fpr_base.u
280	      <= stacktop.u - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS);
281  FFI_ASSERT (flags & FLAG_4_GPR_ARGUMENTS || intarg_count <= 4);
282}
283
284/* About the LINUX64 ABI.  */
285enum {
286  NUM_GPR_ARG_REGISTERS64 = 8,
287  NUM_FPR_ARG_REGISTERS64 = 13
288};
289enum { ASM_NEEDS_REGISTERS64 = 4 };
290
291/* ffi_prep_args64 is called by the assembly routine once stack space
292   has been allocated for the function's arguments.
293
294   The stack layout we want looks like this:
295
296   |   Ret addr from ffi_call_LINUX64	8bytes	|	higher addresses
297   |--------------------------------------------|
298   |   CR save area			8bytes	|
299   |--------------------------------------------|
300   |   Previous backchain pointer	8	|	stack pointer here
301   |--------------------------------------------|<+ <<<	on entry to
302   |   Saved r28-r31			4*8	| |	ffi_call_LINUX64
303   |--------------------------------------------| |
304   |   GPR registers r3-r10		8*8	| |
305   |--------------------------------------------| |
306   |   FPR registers f1-f13 (optional)	13*8	| |
307   |--------------------------------------------| |
308   |   Parameter save area		        | |
309   |--------------------------------------------| |
310   |   TOC save area			8	| |
311   |--------------------------------------------| |	stack	|
312   |   Linker doubleword		8	| |	grows	|
313   |--------------------------------------------| |	down	V
314   |   Compiler doubleword		8	| |
315   |--------------------------------------------| |	lower addresses
316   |   Space for callee's LR		8	| |
317   |--------------------------------------------| |
318   |   CR save area			8	| |
319   |--------------------------------------------| |	stack pointer here
320   |   Current backchain pointer	8	|-/	during
321   |--------------------------------------------|   <<<	ffi_call_LINUX64
322
323*/
324
325/*@-exportheader@*/
326void FFI_HIDDEN
327ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack)
328/*@=exportheader@*/
329{
330  const unsigned long bytes = ecif->cif->bytes;
331  const unsigned long flags = ecif->cif->flags;
332
333  typedef union {
334    char *c;
335    unsigned long *ul;
336    float *f;
337    double *d;
338  } valp;
339
340  /* 'stacktop' points at the previous backchain pointer.  */
341  valp stacktop;
342
343  /* 'next_arg' points at the space for gpr3, and grows upwards as
344     we use GPR registers, then continues at rest.  */
345  valp gpr_base;
346  valp gpr_end;
347  valp rest;
348  valp next_arg;
349
350  /* 'fpr_base' points at the space for fpr3, and grows upwards as
351     we use FPR registers.  */
352  valp fpr_base;
353  int fparg_count;
354
355  int i, words;
356  ffi_type **ptr;
357  double double_tmp;
358  union {
359    void **v;
360    char **c;
361    signed char **sc;
362    unsigned char **uc;
363    signed short **ss;
364    unsigned short **us;
365    signed int **si;
366    unsigned int **ui;
367    unsigned long **ul;
368    float **f;
369    double **d;
370  } p_argv;
371  unsigned long gprvalue;
372
373  stacktop.c = (char *) stack + bytes;
374  gpr_base.ul = stacktop.ul - ASM_NEEDS_REGISTERS64 - NUM_GPR_ARG_REGISTERS64;
375  gpr_end.ul = gpr_base.ul + NUM_GPR_ARG_REGISTERS64;
376  rest.ul = stack + 6 + NUM_GPR_ARG_REGISTERS64;
377  fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS64;
378  fparg_count = 0;
379  next_arg.ul = gpr_base.ul;
380
381  /* Check that everything starts aligned properly.  */
382  FFI_ASSERT (((unsigned long) (char *) stack & 0xF) == 0);
383  FFI_ASSERT (((unsigned long) stacktop.c & 0xF) == 0);
384  FFI_ASSERT ((bytes & 0xF) == 0);
385
386  /* Deal with return values that are actually pass-by-reference.  */
387  if (flags & FLAG_RETVAL_REFERENCE)
388    *next_arg.ul++ = (unsigned long) (char *) ecif->rvalue;
389
390  /* Now for the arguments.  */
391  p_argv.v = ecif->avalue;
392  for (ptr = ecif->cif->arg_types, i = ecif->cif->nargs;
393       i > 0;
394       i--, ptr++, p_argv.v++)
395    {
396      switch ((*ptr)->type)
397	{
398	case FFI_TYPE_FLOAT:
399	  double_tmp = **p_argv.f;
400	  *next_arg.f = (float) double_tmp;
401	  if (++next_arg.ul == gpr_end.ul)
402	    next_arg.ul = rest.ul;
403	  if (fparg_count < NUM_FPR_ARG_REGISTERS64)
404	    *fpr_base.d++ = double_tmp;
405	  fparg_count++;
406	  FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
407	  break;
408
409	case FFI_TYPE_DOUBLE:
410	  double_tmp = **p_argv.d;
411	  *next_arg.d = double_tmp;
412	  if (++next_arg.ul == gpr_end.ul)
413	    next_arg.ul = rest.ul;
414	  if (fparg_count < NUM_FPR_ARG_REGISTERS64)
415	    *fpr_base.d++ = double_tmp;
416	  fparg_count++;
417	  FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
418	  break;
419
420#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
421	case FFI_TYPE_LONGDOUBLE:
422	  double_tmp = (*p_argv.d)[0];
423	  *next_arg.d = double_tmp;
424	  if (++next_arg.ul == gpr_end.ul)
425	    next_arg.ul = rest.ul;
426	  if (fparg_count < NUM_FPR_ARG_REGISTERS64)
427	    *fpr_base.d++ = double_tmp;
428	  fparg_count++;
429	  double_tmp = (*p_argv.d)[1];
430	  *next_arg.d = double_tmp;
431	  if (++next_arg.ul == gpr_end.ul)
432	    next_arg.ul = rest.ul;
433	  if (fparg_count < NUM_FPR_ARG_REGISTERS64)
434	    *fpr_base.d++ = double_tmp;
435	  fparg_count++;
436	  FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
437	  break;
438#endif
439
440	case FFI_TYPE_STRUCT:
441	  words = ((*ptr)->size + 7) / 8;
442	  if (next_arg.ul >= gpr_base.ul && next_arg.ul + words > gpr_end.ul)
443	    {
444	      size_t first = gpr_end.c - next_arg.c;
445	      memcpy (next_arg.c, *p_argv.c, first);
446	      memcpy (rest.c, *p_argv.c + first, (*ptr)->size - first);
447	      next_arg.c = rest.c + words * 8 - first;
448	    }
449	  else
450	    {
451	      char *where = next_arg.c;
452
453	      /* Structures with size less than eight bytes are passed
454		 left-padded.  */
455	      if ((*ptr)->size < 8)
456		where += 8 - (*ptr)->size;
457
458	      memcpy (where, *p_argv.c, (*ptr)->size);
459	      next_arg.ul += words;
460	      if (next_arg.ul == gpr_end.ul)
461		next_arg.ul = rest.ul;
462	    }
463	  break;
464
465	case FFI_TYPE_UINT8:
466	  gprvalue = **p_argv.uc;
467	  goto putgpr;
468	case FFI_TYPE_SINT8:
469	  gprvalue = **p_argv.sc;
470	  goto putgpr;
471	case FFI_TYPE_UINT16:
472	  gprvalue = **p_argv.us;
473	  goto putgpr;
474	case FFI_TYPE_SINT16:
475	  gprvalue = **p_argv.ss;
476	  goto putgpr;
477	case FFI_TYPE_UINT32:
478	  gprvalue = **p_argv.ui;
479	  goto putgpr;
480	case FFI_TYPE_INT:
481	case FFI_TYPE_SINT32:
482	  gprvalue = **p_argv.si;
483	  goto putgpr;
484
485	case FFI_TYPE_UINT64:
486	case FFI_TYPE_SINT64:
487	case FFI_TYPE_POINTER:
488	  gprvalue = **p_argv.ul;
489	putgpr:
490	  *next_arg.ul++ = gprvalue;
491	  if (next_arg.ul == gpr_end.ul)
492	    next_arg.ul = rest.ul;
493	  break;
494	}
495    }
496
497  FFI_ASSERT (flags & FLAG_4_GPR_ARGUMENTS
498	      || (next_arg.ul >= gpr_base.ul
499		  && next_arg.ul <= gpr_base.ul + 4));
500}
501
502
503
504/* Perform machine dependent cif processing */
505ffi_status
506ffi_prep_cif_machdep (ffi_cif *cif)
507{
508  /* All this is for the SYSV and LINUX64 ABI.  */
509  int i;
510  ffi_type **ptr;
511  unsigned bytes;
512  int fparg_count = 0, intarg_count = 0;
513  unsigned flags = 0;
514  unsigned struct_copy_size = 0;
515  unsigned type = cif->rtype->type;
516  unsigned size = cif->rtype->size;
517
518  if (cif->abi != FFI_LINUX64)
519    {
520      /* All the machine-independent calculation of cif->bytes will be wrong.
521	 Redo the calculation for SYSV.  */
522
523      /* Space for the frame pointer, callee's LR, and the asm's temp regs.  */
524      bytes = (2 + ASM_NEEDS_REGISTERS) * sizeof (int);
525
526      /* Space for the GPR registers.  */
527      bytes += NUM_GPR_ARG_REGISTERS * sizeof (int);
528    }
529  else
530    {
531      /* 64-bit ABI.  */
532
533      /* Space for backchain, CR, LR, cc/ld doubleword, TOC and the asm's temp
534	 regs.  */
535      bytes = (6 + ASM_NEEDS_REGISTERS64) * sizeof (long);
536
537      /* Space for the mandatory parm save area and general registers.  */
538      bytes += 2 * NUM_GPR_ARG_REGISTERS64 * sizeof (long);
539
540#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
541      if (type == FFI_TYPE_LONGDOUBLE)
542	type = FFI_TYPE_DOUBLE;
543#endif
544    }
545
546  /* Return value handling.  The rules for SYSV are as follows:
547     - 32-bit (or less) integer values are returned in gpr3;
548     - Structures of size <= 4 bytes also returned in gpr3;
549     - 64-bit integer values and structures between 5 and 8 bytes are returned
550     in gpr3 and gpr4;
551     - Single/double FP values are returned in fpr1;
552     - Larger structures and long double (if not equivalent to double) values
553     are allocated space and a pointer is passed as the first argument.
554     For LINUX64:
555     - integer values in gpr3;
556     - Structures/Unions by reference;
557     - Single/double FP values in fpr1, long double in fpr1,fpr2.  */
558  switch (type)
559    {
560    case FFI_TYPE_DOUBLE:
561      flags |= FLAG_RETURNS_64BITS;
562      /* Fall through.  */
563    case FFI_TYPE_FLOAT:
564      flags |= FLAG_RETURNS_FP;
565      break;
566
567    case FFI_TYPE_UINT64:
568    case FFI_TYPE_SINT64:
569      flags |= FLAG_RETURNS_64BITS;
570      break;
571
572    case FFI_TYPE_STRUCT:
573      if (cif->abi == FFI_SYSV)
574	{
575	  /* The final SYSV ABI says that structures smaller or equal 8 bytes
576	     are returned in r3/r4. The FFI_GCC_SYSV ABI instead returns them
577	     in memory.  */
578
579	  /* Treat structs with size <= 8 bytes.  */
580	  if (size <= 8)
581	    {
582	      flags |= FLAG_RETURNS_SMST;
583	      /* These structs are returned in r3. We pack the type and the
584		 precalculated shift value (needed in the sysv.S) into flags.
585		 The same applies for the structs returned in r3/r4.  */
586	      if (size <= 4)
587		{
588		  flags |= 1 << (31 - FFI_SYSV_TYPE_SMALL_STRUCT - 1);
589		  flags |= 8 * (4 - size) << 4;
590		  break;
591		}
592	      /* These structs are returned in r3 and r4. See above.   */
593	      if  (size <= 8)
594		{
595		  flags |= 1 << (31 - FFI_SYSV_TYPE_SMALL_STRUCT - 2);
596		  flags |= 8 * (8 - size) << 4;
597		  break;
598		}
599	    }
600	}
601      /* else fall through.  */
602#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
603    case FFI_TYPE_LONGDOUBLE:
604      if (type == FFI_TYPE_LONGDOUBLE && cif->abi == FFI_LINUX64)
605	{
606	  flags |= FLAG_RETURNS_128BITS;
607	  flags |= FLAG_RETURNS_FP;
608	  break;
609	}
610#endif
611      intarg_count++;
612      flags |= FLAG_RETVAL_REFERENCE;
613      /* Fall through.  */
614    case FFI_TYPE_VOID:
615      flags |= FLAG_RETURNS_NOTHING;
616      break;
617
618    default:
619      /* Returns 32-bit integer, or similar.  Nothing to do here.  */
620      break;
621    }
622
623  if (cif->abi != FFI_LINUX64)
624    /* The first NUM_GPR_ARG_REGISTERS words of integer arguments, and the
625       first NUM_FPR_ARG_REGISTERS fp arguments, go in registers; the rest
626       goes on the stack.  Structures and long doubles (if not equivalent
627       to double) are passed as a pointer to a copy of the structure.
628       Stuff on the stack needs to keep proper alignment.  */
629    for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
630      {
631	switch ((*ptr)->type)
632	  {
633	  case FFI_TYPE_FLOAT:
634	    fparg_count++;
635	    /* floating singles are not 8-aligned on stack */
636	    break;
637
638	  case FFI_TYPE_DOUBLE:
639	    fparg_count++;
640	    /* If this FP arg is going on the stack, it must be
641	       8-byte-aligned.  */
642	    if (fparg_count > NUM_FPR_ARG_REGISTERS
643		&& intarg_count >= NUM_GPR_ARG_REGISTERS
644		&& intarg_count % 2 != 0)
645	      intarg_count++;
646	    break;
647
648	  case FFI_TYPE_UINT64:
649	  case FFI_TYPE_SINT64:
650	    /* 'long long' arguments are passed as two words, but
651	       either both words must fit in registers or both go
652	       on the stack.  If they go on the stack, they must
653	       be 8-byte-aligned.
654
655	       Also, only certain register pairs can be used for
656	       passing long long int -- specifically (r3,r4), (r5,r6),
657	       (r7,r8), (r9,r10).
658	    */
659	    if (intarg_count == NUM_GPR_ARG_REGISTERS-1
660		|| intarg_count % 2 != 0)
661	      intarg_count++;
662	    intarg_count += 2;
663	    break;
664
665	  case FFI_TYPE_STRUCT:
666#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
667	  case FFI_TYPE_LONGDOUBLE:
668#endif
669	    /* We must allocate space for a copy of these to enforce
670	       pass-by-value.  Pad the space up to a multiple of 16
671	       bytes (the maximum alignment required for anything under
672	       the SYSV ABI).  */
673	    struct_copy_size += ((*ptr)->size + 15) & ~0xF;
674	    /* Fall through (allocate space for the pointer).  */
675
676	  default:
677	    /* Everything else is passed as a 4-byte word in a GPR, either
678	       the object itself or a pointer to it.  */
679	    intarg_count++;
680	    break;
681	  }
682      }
683  else
684    for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
685      {
686	switch ((*ptr)->type)
687	  {
688#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
689	  case FFI_TYPE_LONGDOUBLE:
690	    fparg_count += 2;
691	    intarg_count += 2;
692	    break;
693#endif
694	  case FFI_TYPE_FLOAT:
695	  case FFI_TYPE_DOUBLE:
696	    fparg_count++;
697	    intarg_count++;
698	    break;
699
700	  case FFI_TYPE_STRUCT:
701	    intarg_count += ((*ptr)->size + 7) / 8;
702	    break;
703
704	  default:
705	    /* Everything else is passed as a 8-byte word in a GPR, either
706	       the object itself or a pointer to it.  */
707	    intarg_count++;
708	    break;
709	  }
710      }
711
712  if (fparg_count != 0)
713    flags |= FLAG_FP_ARGUMENTS;
714  if (intarg_count > 4)
715    flags |= FLAG_4_GPR_ARGUMENTS;
716  if (struct_copy_size != 0)
717    flags |= FLAG_ARG_NEEDS_COPY;
718
719  if (cif->abi != FFI_LINUX64)
720    {
721      /* Space for the FPR registers, if needed.  */
722      if (fparg_count != 0)
723	bytes += NUM_FPR_ARG_REGISTERS * sizeof (double);
724
725      /* Stack space.  */
726      if (intarg_count > NUM_GPR_ARG_REGISTERS)
727	bytes += (intarg_count - NUM_GPR_ARG_REGISTERS) * sizeof (int);
728      if (fparg_count > NUM_FPR_ARG_REGISTERS)
729	bytes += (fparg_count - NUM_FPR_ARG_REGISTERS) * sizeof (double);
730    }
731  else
732    {
733      /* Space for the FPR registers, if needed.  */
734      if (fparg_count != 0)
735	bytes += NUM_FPR_ARG_REGISTERS64 * sizeof (double);
736
737      /* Stack space.  */
738      if (intarg_count > NUM_GPR_ARG_REGISTERS64)
739	bytes += (intarg_count - NUM_GPR_ARG_REGISTERS64) * sizeof (long);
740    }
741
742  /* The stack space allocated needs to be a multiple of 16 bytes.  */
743  bytes = (bytes + 15) & ~0xF;
744
745  /* Add in the space for the copied structures.  */
746  bytes += struct_copy_size;
747
748  cif->flags = flags;
749  cif->bytes = bytes;
750
751  return FFI_OK;
752}
753
754/*@-declundef@*/
755/*@-exportheader@*/
756extern void ffi_call_SYSV(/*@out@*/ extended_cif *,
757			  unsigned, unsigned,
758			  /*@out@*/ unsigned *,
759			  void (*fn)());
760extern void FFI_HIDDEN ffi_call_LINUX64(/*@out@*/ extended_cif *,
761					unsigned long, unsigned long,
762					/*@out@*/ unsigned long *,
763					void (*fn)());
764/*@=declundef@*/
765/*@=exportheader@*/
766
767void
768ffi_call(/*@dependent@*/ ffi_cif *cif,
769	 void (*fn)(),
770	 /*@out@*/ void *rvalue,
771	 /*@dependent@*/ void **avalue)
772{
773  extended_cif ecif;
774
775  ecif.cif = cif;
776  ecif.avalue = avalue;
777
778  /* If the return value is a struct and we don't have a return	*/
779  /* value address then we need to make one		        */
780
781  if ((rvalue == NULL) && (cif->rtype->type == FFI_TYPE_STRUCT))
782    {
783      /*@-sysunrecog@*/
784      ecif.rvalue = alloca(cif->rtype->size);
785      /*@=sysunrecog@*/
786    }
787  else
788    ecif.rvalue = rvalue;
789
790
791  switch (cif->abi)
792    {
793#ifndef POWERPC64
794    case FFI_SYSV:
795    case FFI_GCC_SYSV:
796      /*@-usedef@*/
797      ffi_call_SYSV (&ecif, -cif->bytes, cif->flags, ecif.rvalue, fn);
798      /*@=usedef@*/
799      break;
800#else
801    case FFI_LINUX64:
802      /*@-usedef@*/
803      ffi_call_LINUX64 (&ecif, -(long) cif->bytes, cif->flags, ecif.rvalue, fn);
804      /*@=usedef@*/
805      break;
806#endif
807    default:
808      FFI_ASSERT (0);
809      break;
810    }
811}
812
813
814#ifndef POWERPC64
815#define MIN_CACHE_LINE_SIZE 8
816
817static void
818flush_icache (char *addr1, int size)
819{
820  int i;
821  char * addr;
822  for (i = 0; i < size; i += MIN_CACHE_LINE_SIZE)
823    {
824      addr = addr1 + i;
825      __asm__ volatile ("icbi 0,%0;" "dcbf 0,%0;"
826			: : "r" (addr) : "memory");
827    }
828  addr = addr1 + size - 1;
829  __asm__ volatile ("icbi 0,%0;" "dcbf 0,%0;" "sync;" "isync;"
830		    : : "r"(addr) : "memory");
831}
832#endif
833
834ffi_status
835ffi_prep_closure (ffi_closure *closure,
836		  ffi_cif *cif,
837		  void (*fun) (ffi_cif *, void *, void **, void *),
838		  void *user_data)
839{
840#ifdef POWERPC64
841  void **tramp = (void **) &closure->tramp[0];
842
843  FFI_ASSERT (cif->abi == FFI_LINUX64);
844  /* Copy function address and TOC from ffi_closure_LINUX64.  */
845  memcpy (tramp, (char *) ffi_closure_LINUX64, 16);
846  tramp[2] = (void *) closure;
847#else
848  unsigned int *tramp;
849
850  FFI_ASSERT (cif->abi == FFI_GCC_SYSV || cif->abi == FFI_SYSV);
851
852  tramp = (unsigned int *) &closure->tramp[0];
853  tramp[0] = 0x7c0802a6;  /*   mflr    r0 */
854  tramp[1] = 0x4800000d;  /*   bl      10 <trampoline_initial+0x10> */
855  tramp[4] = 0x7d6802a6;  /*   mflr    r11 */
856  tramp[5] = 0x7c0803a6;  /*   mtlr    r0 */
857  tramp[6] = 0x800b0000;  /*   lwz     r0,0(r11) */
858  tramp[7] = 0x816b0004;  /*   lwz     r11,4(r11) */
859  tramp[8] = 0x7c0903a6;  /*   mtctr   r0 */
860  tramp[9] = 0x4e800420;  /*   bctr */
861  *(void **) &tramp[2] = (void *) ffi_closure_SYSV; /* function */
862  *(void **) &tramp[3] = (void *) closure;          /* context */
863
864  /* Flush the icache.  */
865  flush_icache (&closure->tramp[0],FFI_TRAMPOLINE_SIZE);
866#endif
867
868  closure->cif = cif;
869  closure->fun = fun;
870  closure->user_data = user_data;
871
872  return FFI_OK;
873}
874
875typedef union
876{
877  float f;
878  double d;
879} ffi_dblfl;
880
881int ffi_closure_helper_SYSV (ffi_closure *, void *, unsigned long *,
882			     ffi_dblfl *, unsigned long *);
883
884/* Basically the trampoline invokes ffi_closure_SYSV, and on
885 * entry, r11 holds the address of the closure.
886 * After storing the registers that could possibly contain
887 * parameters to be passed into the stack frame and setting
888 * up space for a return value, ffi_closure_SYSV invokes the
889 * following helper function to do most of the work
890 */
891
892int
893ffi_closure_helper_SYSV (ffi_closure *closure, void *rvalue,
894			 unsigned long *pgr, ffi_dblfl *pfr,
895			 unsigned long *pst)
896{
897  /* rvalue is the pointer to space for return value in closure assembly */
898  /* pgr is the pointer to where r3-r10 are stored in ffi_closure_SYSV */
899  /* pfr is the pointer to where f1-f8 are stored in ffi_closure_SYSV  */
900  /* pst is the pointer to outgoing parameter stack in original caller */
901
902  void **          avalue;
903  ffi_type **      arg_types;
904  long             i, avn;
905  long             nf;   /* number of floating registers already used */
906  long             ng;   /* number of general registers already used */
907  ffi_cif *        cif;
908  double           temp;
909  unsigned         size;
910
911  cif = closure->cif;
912  avalue = alloca (cif->nargs * sizeof (void *));
913  size = cif->rtype->size;
914
915  nf = 0;
916  ng = 0;
917
918  /* Copy the caller's structure return value address so that the closure
919     returns the data directly to the caller.
920     For FFI_SYSV the result is passed in r3/r4 if the struct size is less
921     or equal 8 bytes.  */
922
923  if (cif->rtype->type == FFI_TYPE_STRUCT)
924    {
925      if (!((cif->abi == FFI_SYSV) && (size <= 8)))
926	{
927	  rvalue = (void *) *pgr;
928	  ng++;
929	  pgr++;
930	}
931    }
932
933  i = 0;
934  avn = cif->nargs;
935  arg_types = cif->arg_types;
936
937  /* Grab the addresses of the arguments from the stack frame.  */
938  while (i < avn)
939    {
940      switch (arg_types[i]->type)
941	{
942	case FFI_TYPE_SINT8:
943	case FFI_TYPE_UINT8:
944	  /* there are 8 gpr registers used to pass values */
945	  if (ng < 8)
946	    {
947	      avalue[i] = (char *) pgr + 3;
948	      ng++;
949	      pgr++;
950	    }
951	  else
952	    {
953	      avalue[i] = (char *) pst + 3;
954	      pst++;
955	    }
956	  break;
957
958	case FFI_TYPE_SINT16:
959	case FFI_TYPE_UINT16:
960	  /* there are 8 gpr registers used to pass values */
961	  if (ng < 8)
962	    {
963	      avalue[i] = (char *) pgr + 2;
964	      ng++;
965	      pgr++;
966	    }
967	  else
968	    {
969	      avalue[i] = (char *) pst + 2;
970	      pst++;
971	    }
972	  break;
973
974	case FFI_TYPE_SINT32:
975	case FFI_TYPE_UINT32:
976	case FFI_TYPE_POINTER:
977	  /* there are 8 gpr registers used to pass values */
978	  if (ng < 8)
979	    {
980	      avalue[i] = pgr;
981	      ng++;
982	      pgr++;
983	    }
984	  else
985	    {
986	      avalue[i] = pst;
987	      pst++;
988	    }
989	  break;
990
991	case FFI_TYPE_STRUCT:
992	  /* Structs are passed by reference. The address will appear in a
993	     gpr if it is one of the first 8 arguments.  */
994	  if (ng < 8)
995	    {
996	      avalue[i] = (void *) *pgr;
997	      ng++;
998	      pgr++;
999	    }
1000	  else
1001	    {
1002	      avalue[i] = (void *) *pst;
1003	      pst++;
1004	    }
1005	  break;
1006
1007	case FFI_TYPE_SINT64:
1008	case FFI_TYPE_UINT64:
1009	  /* passing long long ints are complex, they must
1010	   * be passed in suitable register pairs such as
1011	   * (r3,r4) or (r5,r6) or (r6,r7), or (r7,r8) or (r9,r10)
1012	   * and if the entire pair aren't available then the outgoing
1013	   * parameter stack is used for both but an alignment of 8
1014	   * must will be kept.  So we must either look in pgr
1015	   * or pst to find the correct address for this type
1016	   * of parameter.
1017	   */
1018	  if (ng < 7)
1019	    {
1020	      if (ng & 0x01)
1021		{
1022		  /* skip r4, r6, r8 as starting points */
1023		  ng++;
1024		  pgr++;
1025		}
1026	      avalue[i] = pgr;
1027	      ng += 2;
1028	      pgr += 2;
1029	    }
1030	  else
1031	    {
1032	      if (((long) pst) & 4)
1033		pst++;
1034	      avalue[i] = pst;
1035	      pst += 2;
1036	    }
1037	  break;
1038
1039	case FFI_TYPE_FLOAT:
1040	  /* unfortunately float values are stored as doubles
1041	   * in the ffi_closure_SYSV code (since we don't check
1042	   * the type in that routine).
1043	   */
1044
1045	  /* there are 8 64bit floating point registers */
1046
1047	  if (nf < 8)
1048	    {
1049	      temp = pfr->d;
1050	      pfr->f = (float) temp;
1051	      avalue[i] = pfr;
1052	      nf++;
1053	      pfr++;
1054	    }
1055	  else
1056	    {
1057	      /* FIXME? here we are really changing the values
1058	       * stored in the original calling routines outgoing
1059	       * parameter stack.  This is probably a really
1060	       * naughty thing to do but...
1061	       */
1062	      avalue[i] = pst;
1063	      nf++;
1064	      pst += 1;
1065	    }
1066	  break;
1067
1068	case FFI_TYPE_DOUBLE:
1069	  /* On the outgoing stack all values are aligned to 8 */
1070	  /* there are 8 64bit floating point registers */
1071
1072	  if (nf < 8)
1073	    {
1074	      avalue[i] = pfr;
1075	      nf++;
1076	      pfr++;
1077	    }
1078	  else
1079	    {
1080	      if (((long) pst) & 4)
1081		pst++;
1082	      avalue[i] = pst;
1083	      nf++;
1084	      pst += 2;
1085	    }
1086	  break;
1087
1088	default:
1089	  FFI_ASSERT (0);
1090	}
1091
1092      i++;
1093    }
1094
1095
1096  (closure->fun) (cif, rvalue, avalue, closure->user_data);
1097
1098  /* Tell ffi_closure_SYSV how to perform return type promotions.
1099     Because the FFI_SYSV ABI returns the structures <= 8 bytes in r3/r4
1100     we have to tell ffi_closure_SYSV how to treat them.  */
1101  if (cif->abi == FFI_SYSV && cif->rtype->type == FFI_TYPE_STRUCT
1102      && size <= 8)
1103    return FFI_SYSV_TYPE_SMALL_STRUCT + size;
1104  return cif->rtype->type;
1105
1106}
1107
1108int FFI_HIDDEN ffi_closure_helper_LINUX64 (ffi_closure *, void *,
1109					   unsigned long *, ffi_dblfl *);
1110
1111int FFI_HIDDEN
1112ffi_closure_helper_LINUX64 (ffi_closure *closure, void *rvalue,
1113			    unsigned long *pst, ffi_dblfl *pfr)
1114{
1115  /* rvalue is the pointer to space for return value in closure assembly */
1116  /* pst is the pointer to parameter save area
1117     (r3-r10 are stored into its first 8 slots by ffi_closure_LINUX64) */
1118  /* pfr is the pointer to where f1-f13 are stored in ffi_closure_LINUX64 */
1119
1120  void **avalue;
1121  ffi_type **arg_types;
1122  long i, avn;
1123  ffi_cif *cif;
1124  ffi_dblfl *end_pfr = pfr + NUM_FPR_ARG_REGISTERS64;
1125
1126  cif = closure->cif;
1127  avalue = alloca (cif->nargs * sizeof (void *));
1128
1129  /* Copy the caller's structure return value address so that the closure
1130     returns the data directly to the caller.  */
1131  if (cif->rtype->type == FFI_TYPE_STRUCT)
1132    {
1133      rvalue = (void *) *pst;
1134      pst++;
1135    }
1136
1137  i = 0;
1138  avn = cif->nargs;
1139  arg_types = cif->arg_types;
1140
1141  /* Grab the addresses of the arguments from the stack frame.  */
1142  while (i < avn)
1143    {
1144      switch (arg_types[i]->type)
1145	{
1146	case FFI_TYPE_SINT8:
1147	case FFI_TYPE_UINT8:
1148	  avalue[i] = (char *) pst + 7;
1149	  pst++;
1150	  break;
1151
1152	case FFI_TYPE_SINT16:
1153	case FFI_TYPE_UINT16:
1154	  avalue[i] = (char *) pst + 6;
1155	  pst++;
1156	  break;
1157
1158	case FFI_TYPE_SINT32:
1159	case FFI_TYPE_UINT32:
1160	  avalue[i] = (char *) pst + 4;
1161	  pst++;
1162	  break;
1163
1164	case FFI_TYPE_SINT64:
1165	case FFI_TYPE_UINT64:
1166	case FFI_TYPE_POINTER:
1167	  avalue[i] = pst;
1168	  pst++;
1169	  break;
1170
1171	case FFI_TYPE_STRUCT:
1172	  /* Structures with size less than eight bytes are passed
1173	     left-padded.  */
1174	  if (arg_types[i]->size < 8)
1175	    avalue[i] = (char *) pst + 8 - arg_types[i]->size;
1176	  else
1177	    avalue[i] = pst;
1178	  pst += (arg_types[i]->size + 7) / 8;
1179	  break;
1180
1181	case FFI_TYPE_FLOAT:
1182	  /* unfortunately float values are stored as doubles
1183	   * in the ffi_closure_LINUX64 code (since we don't check
1184	   * the type in that routine).
1185	   */
1186
1187	  /* there are 13 64bit floating point registers */
1188
1189	  if (pfr < end_pfr)
1190	    {
1191	      double temp = pfr->d;
1192	      pfr->f = (float) temp;
1193	      avalue[i] = pfr;
1194	      pfr++;
1195	    }
1196	  else
1197	    avalue[i] = pst;
1198	  pst++;
1199	  break;
1200
1201	case FFI_TYPE_DOUBLE:
1202	  /* On the outgoing stack all values are aligned to 8 */
1203	  /* there are 13 64bit floating point registers */
1204
1205	  if (pfr < end_pfr)
1206	    {
1207	      avalue[i] = pfr;
1208	      pfr++;
1209	    }
1210	  else
1211	    avalue[i] = pst;
1212	  pst++;
1213	  break;
1214
1215#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
1216	case FFI_TYPE_LONGDOUBLE:
1217	  if (pfr + 1 < end_pfr)
1218	    {
1219	      avalue[i] = pfr;
1220	      pfr += 2;
1221	    }
1222	  else
1223	    {
1224	      if (pfr < end_pfr)
1225		{
1226		  /* Passed partly in f13 and partly on the stack.
1227		     Move it all to the stack.  */
1228		  *pst = *(unsigned long *) pfr;
1229		  pfr++;
1230		}
1231	      avalue[i] = pst;
1232	    }
1233	  pst += 2;
1234	  break;
1235#endif
1236
1237	default:
1238	  FFI_ASSERT (0);
1239	}
1240
1241      i++;
1242    }
1243
1244
1245  (closure->fun) (cif, rvalue, avalue, closure->user_data);
1246
1247  /* Tell ffi_closure_LINUX64 how to perform return type promotions.  */
1248  return cif->rtype->type;
1249}
1250