1/* -----------------------------------------------------------------------
2   ffi.c - Copyright (c) 2002  Bo Thorsen <bo@suse.de>
3
4   x86-64 Foreign Function Interface
5
6   Permission is hereby granted, free of charge, to any person obtaining
7   a copy of this software and associated documentation files (the
8   ``Software''), to deal in the Software without restriction, including
9   without limitation the rights to use, copy, modify, merge, publish,
10   distribute, sublicense, and/or sell copies of the Software, and to
11   permit persons to whom the Software is furnished to do so, subject to
12   the following conditions:
13
14   The above copyright notice and this permission notice shall be included
15   in all copies or substantial portions of the Software.
16
17   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
18   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
20   IN NO EVENT SHALL CYGNUS SOLUTIONS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23   OTHER DEALINGS IN THE SOFTWARE.
24   ----------------------------------------------------------------------- */
25
26#include <ffi.h>
27#include <ffi_common.h>
28
29#include <stdlib.h>
30#include <stdarg.h>
31
32#ifdef __x86_64__
33
34#define MAX_GPR_REGS 6
35#define MAX_SSE_REGS 8
36
37struct register_args
38{
39  /* Registers for argument passing.  */
40  UINT64 gpr[MAX_GPR_REGS];
41  __int128_t sse[MAX_SSE_REGS];
42};
43
44extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
45			     void *raddr, void (*fnaddr)(), unsigned ssecount);
46
47/* All reference to register classes here is identical to the code in
48   gcc/config/i386/i386.c. Do *not* change one without the other.  */
49
50/* Register class used for passing given 64bit part of the argument.
51   These represent classes as documented by the PS ABI, with the exception
52   of SSESF, SSEDF classes, that are basically SSE class, just gcc will
53   use SF or DFmode move instead of DImode to avoid reformating penalties.
54
55   Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
56   whenever possible (upper half does contain padding).  */
57enum x86_64_reg_class
58  {
59    X86_64_NO_CLASS,
60    X86_64_INTEGER_CLASS,
61    X86_64_INTEGERSI_CLASS,
62    X86_64_SSE_CLASS,
63    X86_64_SSESF_CLASS,
64    X86_64_SSEDF_CLASS,
65    X86_64_SSEUP_CLASS,
66    X86_64_X87_CLASS,
67    X86_64_X87UP_CLASS,
68    X86_64_COMPLEX_X87_CLASS,
69    X86_64_MEMORY_CLASS
70  };
71
72#define MAX_CLASSES 4
73
74#define SSE_CLASS_P(X)	((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
75
76/* x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
77   of this code is to classify each 8bytes of incoming argument by the register
78   class and assign registers accordingly.  */
79
80/* Return the union class of CLASS1 and CLASS2.
81   See the x86-64 PS ABI for details.  */
82
83static enum x86_64_reg_class
84merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
85{
86  /* Rule #1: If both classes are equal, this is the resulting class.  */
87  if (class1 == class2)
88    return class1;
89
90  /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
91     the other class.  */
92  if (class1 == X86_64_NO_CLASS)
93    return class2;
94  if (class2 == X86_64_NO_CLASS)
95    return class1;
96
97  /* Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
98  if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
99    return X86_64_MEMORY_CLASS;
100
101  /* Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
102  if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
103      || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
104    return X86_64_INTEGERSI_CLASS;
105  if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
106      || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
107    return X86_64_INTEGER_CLASS;
108
109  /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
110     MEMORY is used.  */
111  if (class1 == X86_64_X87_CLASS
112      || class1 == X86_64_X87UP_CLASS
113      || class1 == X86_64_COMPLEX_X87_CLASS
114      || class2 == X86_64_X87_CLASS
115      || class2 == X86_64_X87UP_CLASS
116      || class2 == X86_64_COMPLEX_X87_CLASS)
117    return X86_64_MEMORY_CLASS;
118
119  /* Rule #6: Otherwise class SSE is used.  */
120  return X86_64_SSE_CLASS;
121}
122
123/* Classify the argument of type TYPE and mode MODE.
124   CLASSES will be filled by the register class used to pass each word
125   of the operand.  The number of words is returned.  In case the parameter
126   should be passed in memory, 0 is returned. As a special case for zero
127   sized containers, classes[0] will be NO_CLASS and 1 is returned.
128
129   See the x86-64 PS ABI for details.
130*/
131static int
132classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
133		   size_t byte_offset)
134{
135  switch (type->type)
136    {
137    case FFI_TYPE_UINT8:
138    case FFI_TYPE_SINT8:
139    case FFI_TYPE_UINT16:
140    case FFI_TYPE_SINT16:
141    case FFI_TYPE_UINT32:
142    case FFI_TYPE_SINT32:
143    case FFI_TYPE_UINT64:
144    case FFI_TYPE_SINT64:
145    case FFI_TYPE_POINTER:
146      if (byte_offset + type->size <= 4)
147	classes[0] = X86_64_INTEGERSI_CLASS;
148      else
149	classes[0] = X86_64_INTEGER_CLASS;
150      return 1;
151    case FFI_TYPE_FLOAT:
152      if (byte_offset == 0)
153	classes[0] = X86_64_SSESF_CLASS;
154      else
155	classes[0] = X86_64_SSE_CLASS;
156      return 1;
157    case FFI_TYPE_DOUBLE:
158      classes[0] = X86_64_SSEDF_CLASS;
159      return 1;
160    case FFI_TYPE_LONGDOUBLE:
161      classes[0] = X86_64_X87_CLASS;
162      classes[1] = X86_64_X87UP_CLASS;
163      return 2;
164    case FFI_TYPE_STRUCT:
165      {
166	const int UNITS_PER_WORD = 8;
167	int words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
168	ffi_type **ptr;
169	int i;
170	enum x86_64_reg_class subclasses[MAX_CLASSES];
171
172	/* If the struct is larger than 16 bytes, pass it on the stack.  */
173	if (type->size > 16)
174	  return 0;
175
176	for (i = 0; i < words; i++)
177	  classes[i] = X86_64_NO_CLASS;
178
179	/* Merge the fields of structure.  */
180	for (ptr = type->elements; *ptr != NULL; ptr++)
181	  {
182	    int num;
183
184	    byte_offset = ALIGN (byte_offset, (*ptr)->alignment);
185
186	    num = classify_argument (*ptr, subclasses, byte_offset % 8);
187	    if (num == 0)
188	      return 0;
189	    for (i = 0; i < num; i++)
190	      {
191		int pos = byte_offset / 8;
192		classes[i + pos] =
193		  merge_classes (subclasses[i], classes[i + pos]);
194	      }
195
196	    byte_offset += (*ptr)->size;
197	  }
198
199	/* Final merger cleanup.  */
200	for (i = 0; i < words; i++)
201	  {
202	    /* If one class is MEMORY, everything should be passed in
203	       memory.  */
204	    if (classes[i] == X86_64_MEMORY_CLASS)
205	      return 0;
206
207	    /* The X86_64_SSEUP_CLASS should be always preceded by
208	       X86_64_SSE_CLASS.  */
209	    if (classes[i] == X86_64_SSEUP_CLASS
210		&& (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
211	      classes[i] = X86_64_SSE_CLASS;
212
213	    /*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
214	    if (classes[i] == X86_64_X87UP_CLASS
215		&& (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
216	      classes[i] = X86_64_SSE_CLASS;
217	  }
218	return words;
219      }
220
221    default:
222      FFI_ASSERT(0);
223    }
224  return 0; /* Never reached.  */
225}
226
227/* Examine the argument and return set number of register required in each
228   class.  Return zero iff parameter should be passed in memory, otherwise
229   the number of registers.  */
230
231static int
232examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
233		  _Bool in_return, int *pngpr, int *pnsse)
234{
235  int i, n, ngpr, nsse;
236
237  n = classify_argument (type, classes, 0);
238  if (n == 0)
239    return 0;
240
241  ngpr = nsse = 0;
242  for (i = 0; i < n; ++i)
243    switch (classes[i])
244      {
245      case X86_64_INTEGER_CLASS:
246      case X86_64_INTEGERSI_CLASS:
247	ngpr++;
248	break;
249      case X86_64_SSE_CLASS:
250      case X86_64_SSESF_CLASS:
251      case X86_64_SSEDF_CLASS:
252	nsse++;
253	break;
254      case X86_64_NO_CLASS:
255      case X86_64_SSEUP_CLASS:
256	break;
257      case X86_64_X87_CLASS:
258      case X86_64_X87UP_CLASS:
259      case X86_64_COMPLEX_X87_CLASS:
260	return in_return != 0;
261      default:
262	abort ();
263      }
264
265  *pngpr = ngpr;
266  *pnsse = nsse;
267
268  return n;
269}
270
271/* Perform machine dependent cif processing.  */
272
273ffi_status
274ffi_prep_cif_machdep (ffi_cif *cif)
275{
276  int gprcount, ssecount, i, avn, n, ngpr, nsse, flags;
277  enum x86_64_reg_class classes[MAX_CLASSES];
278  size_t bytes;
279
280  gprcount = ssecount = 0;
281
282  flags = cif->rtype->type;
283  if (flags != FFI_TYPE_VOID)
284    {
285      n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
286      if (n == 0)
287	{
288	  /* The return value is passed in memory.  A pointer to that
289	     memory is the first argument.  Allocate a register for it.  */
290	  gprcount++;
291	  /* We don't have to do anything in asm for the return.  */
292	  flags = FFI_TYPE_VOID;
293	}
294      else if (flags == FFI_TYPE_STRUCT)
295	{
296	  /* Mark which registers the result appears in.  */
297	  _Bool sse0 = SSE_CLASS_P (classes[0]);
298	  _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
299	  if (sse0 && !sse1)
300	    flags |= 1 << 8;
301	  else if (!sse0 && sse1)
302	    flags |= 1 << 9;
303	  else if (sse0 && sse1)
304	    flags |= 1 << 10;
305	  /* Mark the true size of the structure.  */
306	  flags |= cif->rtype->size << 12;
307	}
308    }
309
310  /* Go over all arguments and determine the way they should be passed.
311     If it's in a register and there is space for it, let that be so. If
312     not, add it's size to the stack byte count.  */
313  for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
314    {
315      if (examine_argument (cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
316	  || gprcount + ngpr > MAX_GPR_REGS
317	  || ssecount + nsse > MAX_SSE_REGS)
318	{
319	  long align = cif->arg_types[i]->alignment;
320
321	  if (align < 8)
322	    align = 8;
323
324	  bytes = ALIGN(bytes, align);
325	  bytes += cif->arg_types[i]->size;
326	}
327      else
328	{
329	  gprcount += ngpr;
330	  ssecount += nsse;
331	}
332    }
333  if (ssecount)
334    flags |= 1 << 11;
335  cif->flags = flags;
336  cif->bytes = bytes;
337
338  return FFI_OK;
339}
340
341void
342ffi_call (ffi_cif *cif, void (*fn)(), void *rvalue, void **avalue)
343{
344  enum x86_64_reg_class classes[MAX_CLASSES];
345  char *stack, *argp;
346  ffi_type **arg_types;
347  int gprcount, ssecount, ngpr, nsse, i, avn;
348  _Bool ret_in_memory;
349  struct register_args *reg_args;
350
351  /* Can't call 32-bit mode from 64-bit mode.  */
352  FFI_ASSERT (cif->abi == FFI_UNIX64);
353
354  /* If the return value is a struct and we don't have a return value
355     address then we need to make one.  Note the setting of flags to
356     VOID above in ffi_prep_cif_machdep.  */
357  ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
358		   && (cif->flags & 0xff) == FFI_TYPE_VOID);
359  if (rvalue == NULL && ret_in_memory)
360    rvalue = alloca (cif->rtype->size);
361
362  /* Allocate the space for the arguments, plus 4 words of temp space.  */
363  stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
364  reg_args = (struct register_args *) stack;
365  argp = stack + sizeof (struct register_args);
366
367  gprcount = ssecount = 0;
368
369  /* If the return value is passed in memory, add the pointer as the
370     first integer argument.  */
371  if (ret_in_memory)
372    reg_args->gpr[gprcount++] = (long) rvalue;
373
374  avn = cif->nargs;
375  arg_types = cif->arg_types;
376
377  for (i = 0; i < avn; ++i)
378    {
379      size_t size = arg_types[i]->size;
380      int n;
381
382      n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
383      if (n == 0
384	  || gprcount + ngpr > MAX_GPR_REGS
385	  || ssecount + nsse > MAX_SSE_REGS)
386	{
387	  long align = arg_types[i]->alignment;
388
389	  /* Stack arguments are *always* at least 8 byte aligned.  */
390	  if (align < 8)
391	    align = 8;
392
393	  /* Pass this argument in memory.  */
394	  argp = (void *) ALIGN (argp, align);
395	  memcpy (argp, avalue[i], size);
396	  argp += size;
397	}
398      else
399	{
400	  /* The argument is passed entirely in registers.  */
401	  char *a = (char *) avalue[i];
402	  int j;
403
404	  for (j = 0; j < n; j++, a += 8, size -= 8)
405	    {
406	      switch (classes[j])
407		{
408		case X86_64_INTEGER_CLASS:
409		case X86_64_INTEGERSI_CLASS:
410		  reg_args->gpr[gprcount] = 0;
411		  memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
412		  gprcount++;
413		  break;
414		case X86_64_SSE_CLASS:
415		case X86_64_SSEDF_CLASS:
416		  reg_args->sse[ssecount++] = *(UINT64 *) a;
417		  break;
418		case X86_64_SSESF_CLASS:
419		  reg_args->sse[ssecount++] = *(UINT32 *) a;
420		  break;
421		default:
422		  abort();
423		}
424	    }
425	}
426    }
427
428  ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
429		   cif->flags, rvalue, fn, ssecount);
430}
431
432
433extern void ffi_closure_unix64(void);
434
435ffi_status
436ffi_prep_closure (ffi_closure* closure,
437		  ffi_cif* cif,
438		  void (*fun)(ffi_cif*, void*, void**, void*),
439		  void *user_data)
440{
441  volatile unsigned short *tramp;
442
443  tramp = (volatile unsigned short *) &closure->tramp[0];
444
445  tramp[0] = 0xbb49;		/* mov <code>, %r11	*/
446  *(void * volatile *) &tramp[1] = ffi_closure_unix64;
447  tramp[5] = 0xba49;		/* mov <data>, %r10	*/
448  *(void * volatile *) &tramp[6] = closure;
449
450  /* Set the carry bit iff the function uses any sse registers.
451     This is clc or stc, together with the first byte of the jmp.  */
452  tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
453
454  tramp[11] = 0xe3ff;			/* jmp *%r11    */
455
456  closure->cif = cif;
457  closure->fun = fun;
458  closure->user_data = user_data;
459
460  return FFI_OK;
461}
462
463int
464ffi_closure_unix64_inner(ffi_closure *closure, void *rvalue,
465			 struct register_args *reg_args, char *argp)
466{
467  ffi_cif *cif;
468  void **avalue;
469  ffi_type **arg_types;
470  long i, avn;
471  int gprcount, ssecount, ngpr, nsse;
472  int ret;
473
474  cif = closure->cif;
475  avalue = alloca(cif->nargs * sizeof(void *));
476  gprcount = ssecount = 0;
477
478  ret = cif->rtype->type;
479  if (ret != FFI_TYPE_VOID)
480    {
481      enum x86_64_reg_class classes[MAX_CLASSES];
482      int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
483      if (n == 0)
484	{
485	  /* The return value goes in memory.  Arrange for the closure
486	     return value to go directly back to the original caller.  */
487	  rvalue = (void *) reg_args->gpr[gprcount++];
488	  /* We don't have to do anything in asm for the return.  */
489	  ret = FFI_TYPE_VOID;
490	}
491      else if (ret == FFI_TYPE_STRUCT && n == 2)
492	{
493	  /* Mark which register the second word of the structure goes in.  */
494	  _Bool sse0 = SSE_CLASS_P (classes[0]);
495	  _Bool sse1 = SSE_CLASS_P (classes[1]);
496	  if (!sse0 && sse1)
497	    ret |= 1 << 8;
498	  else if (sse0 && !sse1)
499	    ret |= 1 << 9;
500	}
501    }
502
503  avn = cif->nargs;
504  arg_types = cif->arg_types;
505
506  for (i = 0; i < avn; ++i)
507    {
508      enum x86_64_reg_class classes[MAX_CLASSES];
509      int n;
510
511      n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
512      if (n == 0
513	  || gprcount + ngpr > MAX_GPR_REGS
514	  || ssecount + nsse > MAX_SSE_REGS)
515	{
516	  long align = arg_types[i]->alignment;
517
518	  /* Stack arguments are *always* at least 8 byte aligned.  */
519	  if (align < 8)
520	    align = 8;
521
522	  /* Pass this argument in memory.  */
523	  argp = (void *) ALIGN (argp, align);
524	  avalue[i] = argp;
525	  argp += arg_types[i]->size;
526	}
527      /* If the argument is in a single register, or two consecutive
528	 registers, then we can use that address directly.  */
529      else if (n == 1
530	       || (n == 2
531		   && SSE_CLASS_P (classes[0]) == SSE_CLASS_P (classes[1])))
532	{
533	  /* The argument is in a single register.  */
534	  if (SSE_CLASS_P (classes[0]))
535	    {
536	      avalue[i] = &reg_args->sse[ssecount];
537	      ssecount += n;
538	    }
539	  else
540	    {
541	      avalue[i] = &reg_args->gpr[gprcount];
542	      gprcount += n;
543	    }
544	}
545      /* Otherwise, allocate space to make them consecutive.  */
546      else
547	{
548	  char *a = alloca (16);
549	  int j;
550
551	  avalue[i] = a;
552	  for (j = 0; j < n; j++, a += 8)
553	    {
554	      if (SSE_CLASS_P (classes[j]))
555		memcpy (a, &reg_args->sse[ssecount++], 8);
556	      else
557		memcpy (a, &reg_args->gpr[gprcount++], 8);
558	    }
559	}
560    }
561
562  /* Invoke the closure.  */
563  closure->fun (cif, rvalue, avalue, closure->user_data);
564
565  /* Tell assembly how to perform return type promotions.  */
566  return ret;
567}
568
569#endif /* __x86_64__ */
570