1#ifdef __x86_64__
2
3/* -----------------------------------------------------------------------
4   x86-ffi64.c - Copyright (c) 2002  Bo Thorsen <bo@suse.de>
5
6   x86-64 Foreign Function Interface
7
8   Permission is hereby granted, free of charge, to any person obtaining
9   a copy of this software and associated documentation files (the
10   ``Software''), to deal in the Software without restriction, including
11   without limitation the rights to use, copy, modify, merge, publish,
12   distribute, sublicense, and/or sell copies of the Software, and to
13   permit persons to whom the Software is furnished to do so, subject to
14   the following conditions:
15
16   The above copyright notice and this permission notice shall be included
17   in all copies or substantial portions of the Software.
18
19   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
20   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22   IN NO EVENT SHALL CYGNUS SOLUTIONS BE LIABLE FOR ANY CLAIM, DAMAGES OR
23   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
24   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
25   OTHER DEALINGS IN THE SOFTWARE.
26   ----------------------------------------------------------------------- */
27
28#include <ffi.h>
29#include <ffi_common.h>
30
31#include <stdlib.h>
32#include <stdarg.h>
33
34#define MAX_GPR_REGS 6
35#define MAX_SSE_REGS 8
36
37typedef struct RegisterArgs {
38	/* Registers for argument passing.  */
39	UINT64		gpr[MAX_GPR_REGS];
40	__int128_t	sse[MAX_SSE_REGS];
41} RegisterArgs;
42
43extern void
44ffi_call_unix64(
45	void*			args,
46	unsigned long	bytes,
47	unsigned		flags,
48	void*			raddr,
49	void			(*fnaddr)(),
50	unsigned		ssecount);
51
52/*	All reference to register classes here is identical to the code in
53	gcc/config/i386/i386.c. Do *not* change one without the other.  */
54
55/*	Register class used for passing given 64bit part of the argument.
56	These represent classes as documented by the PS ABI, with the exception
57	of SSESF, SSEDF classes, that are basically SSE class, just gcc will
58	use SF or DFmode move instead of DImode to avoid reformating penalties.
59
60	Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
61	whenever possible (upper half does contain padding).  */
62enum x86_64_reg_class
63{
64	X86_64_NO_CLASS,
65	X86_64_INTEGER_CLASS,
66	X86_64_INTEGERSI_CLASS,
67	X86_64_SSE_CLASS,
68	X86_64_SSESF_CLASS,
69	X86_64_SSEDF_CLASS,
70	X86_64_SSEUP_CLASS,
71	X86_64_X87_CLASS,
72	X86_64_X87UP_CLASS,
73	X86_64_COMPLEX_X87_CLASS,
74	X86_64_MEMORY_CLASS
75};
76
77#define MAX_CLASSES 4
78#define SSE_CLASS_P(X)	((X) >= X86_64_SSE_CLASS && X <= X86_64_SSEUP_CLASS)
79
80/*	x86-64 register passing implementation.  See x86-64 ABI for details.  Goal
81	of this code is to classify each 8bytes of incoming argument by the register
82	class and assign registers accordingly.  */
83
84/*	Return the union class of CLASS1 and CLASS2.
85	See the x86-64 PS ABI for details.  */
86static enum x86_64_reg_class
87merge_classes(
88	enum x86_64_reg_class	class1,
89	enum x86_64_reg_class	class2)
90{
91	/*	Rule #1: If both classes are equal, this is the resulting class.  */
92	if (class1 == class2)
93		return class1;
94
95	/*	Rule #2: If one of the classes is NO_CLASS, the resulting class is
96		the other class.  */
97	if (class1 == X86_64_NO_CLASS)
98		return class2;
99
100	if (class2 == X86_64_NO_CLASS)
101		return class1;
102
103	/*	Rule #3: If one of the classes is MEMORY, the result is MEMORY.  */
104	if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
105		return X86_64_MEMORY_CLASS;
106
107	/*	Rule #4: If one of the classes is INTEGER, the result is INTEGER.  */
108	if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
109		|| (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
110		return X86_64_INTEGERSI_CLASS;
111
112	if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
113		|| class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
114		return X86_64_INTEGER_CLASS;
115
116	/*	Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
117		MEMORY is used.  */
118	if (class1 == X86_64_X87_CLASS
119		|| class1 == X86_64_X87UP_CLASS
120		|| class1 == X86_64_COMPLEX_X87_CLASS
121		|| class2 == X86_64_X87_CLASS
122		|| class2 == X86_64_X87UP_CLASS
123		|| class2 == X86_64_COMPLEX_X87_CLASS)
124		return X86_64_MEMORY_CLASS;
125
126	/*	Rule #6: Otherwise class SSE is used.  */
127	return X86_64_SSE_CLASS;
128}
129
130/*	Classify the argument of type TYPE and mode MODE.
131	CLASSES will be filled by the register class used to pass each word
132	of the operand.  The number of words is returned.  In case the parameter
133	should be passed in memory, 0 is returned. As a special case for zero
134	sized containers, classes[0] will be NO_CLASS and 1 is returned.
135
136	See the x86-64 PS ABI for details.	*/
137
138static int
139classify_argument(
140	ffi_type*				type,
141	enum x86_64_reg_class	classes[],
142	size_t					byte_offset)
143{
144	switch (type->type)
145	{
146		case FFI_TYPE_UINT8:
147		case FFI_TYPE_SINT8:
148		case FFI_TYPE_UINT16:
149		case FFI_TYPE_SINT16:
150		case FFI_TYPE_UINT32:
151		case FFI_TYPE_SINT32:
152		case FFI_TYPE_UINT64:
153		case FFI_TYPE_SINT64:
154		case FFI_TYPE_POINTER:
155			if (byte_offset + type->size <= 4)
156				classes[0] = X86_64_INTEGERSI_CLASS;
157			else
158				classes[0] = X86_64_INTEGER_CLASS;
159
160			return 1;
161
162		case FFI_TYPE_FLOAT:
163			if (byte_offset == 0)
164				classes[0] = X86_64_SSESF_CLASS;
165			else
166				classes[0] = X86_64_SSE_CLASS;
167
168			return 1;
169
170		case FFI_TYPE_DOUBLE:
171			classes[0] = X86_64_SSEDF_CLASS;
172			return 1;
173
174		case FFI_TYPE_LONGDOUBLE:
175			classes[0] = X86_64_X87_CLASS;
176			classes[1] = X86_64_X87UP_CLASS;
177			return 2;
178
179		case FFI_TYPE_STRUCT:
180		{
181			ffi_type**				ptr;
182			int						i;
183			enum x86_64_reg_class	subclasses[MAX_CLASSES];
184			const int				UNITS_PER_WORD = 8;
185			int						words =
186				(type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
187
188			/* If the struct is larger than 16 bytes, pass it on the stack.  */
189			if (type->size > 16)
190				return 0;
191
192			for (i = 0; i < words; i++)
193				classes[i] = X86_64_NO_CLASS;
194
195			/* Merge the fields of structure.  */
196			for (ptr = type->elements; *ptr != NULL; ptr++)
197			{
198				byte_offset = ALIGN(byte_offset, (*ptr)->alignment);
199
200				int	num = classify_argument(*ptr, subclasses, byte_offset % 8);
201
202				if (num == 0)
203					return 0;
204
205				int pos = byte_offset / 8;
206
207				for (i = 0; i < num; i++)
208				{
209					classes[i + pos] =
210						merge_classes(subclasses[i], classes[i + pos]);
211				}
212
213				byte_offset += (*ptr)->size;
214			}
215
216			/* Final merger cleanup.  */
217			for (i = 0; i < words; i++)
218			{
219				/*	If one class is MEMORY, everything should be passed in
220					memory.  */
221				if (classes[i] == X86_64_MEMORY_CLASS)
222					return 0;
223
224				/*	The X86_64_SSEUP_CLASS should be always preceded by
225					X86_64_SSE_CLASS.  */
226				if (classes[i] == X86_64_SSEUP_CLASS
227					&& (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
228					classes[i] = X86_64_SSE_CLASS;
229
230				/*  X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS.  */
231				if (classes[i] == X86_64_X87UP_CLASS
232					&& (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
233					classes[i] = X86_64_SSE_CLASS;
234			}
235
236			return words;
237		}
238
239		default:
240			FFI_ASSERT(0);
241	}
242
243	return 0; /* Never reached.  */
244}
245
246/*	Examine the argument and return set number of register required in each
247	class.  Return zero if parameter should be passed in memory, otherwise
248	the number of registers.  */
249static int
250examine_argument(
251	ffi_type*				type,
252	enum x86_64_reg_class	classes[MAX_CLASSES],
253	_Bool					in_return,
254	int*					pngpr,
255	int*					pnsse)
256{
257	int	n = classify_argument(type, classes, 0);
258	int ngpr = 0;
259	int	nsse = 0;
260	int	i;
261
262	if (n == 0)
263		return 0;
264
265	for (i = 0; i < n; ++i)
266	{
267		switch (classes[i])
268		{
269			case X86_64_INTEGER_CLASS:
270			case X86_64_INTEGERSI_CLASS:
271				ngpr++;
272				break;
273
274			case X86_64_SSE_CLASS:
275			case X86_64_SSESF_CLASS:
276			case X86_64_SSEDF_CLASS:
277				nsse++;
278				break;
279
280			case X86_64_NO_CLASS:
281			case X86_64_SSEUP_CLASS:
282				break;
283
284			case X86_64_X87_CLASS:
285			case X86_64_X87UP_CLASS:
286			case X86_64_COMPLEX_X87_CLASS:
287				return in_return != 0;
288
289			default:
290				abort();
291		}
292	}
293
294	*pngpr = ngpr;
295	*pnsse = nsse;
296
297	return n;
298}
299
300/* Perform machine dependent cif processing.  */
301ffi_status
302ffi_prep_cif_machdep(
303	ffi_cif*	cif)
304{
305	int						gprcount = 0;
306	int						ssecount = 0;
307	int						flags = cif->rtype->type;
308	int						i, avn, n, ngpr, nsse;
309	enum x86_64_reg_class	classes[MAX_CLASSES];
310	size_t					bytes;
311
312	if (flags != FFI_TYPE_VOID)
313	{
314		n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
315
316		if (n == 0)
317		{
318			/*	The return value is passed in memory.  A pointer to that
319				memory is the first argument.  Allocate a register for it.  */
320			gprcount++;
321
322			/* We don't have to do anything in asm for the return.  */
323			flags = FFI_TYPE_VOID;
324		}
325		else if (flags == FFI_TYPE_STRUCT)
326		{
327			/* Mark which registers the result appears in.  */
328			_Bool sse0 = SSE_CLASS_P(classes[0]);
329			_Bool sse1 = n == 2 && SSE_CLASS_P(classes[1]);
330
331			if (sse0 && !sse1)
332				flags |= 1 << 8;
333			else if (!sse0 && sse1)
334				flags |= 1 << 9;
335			else if (sse0 && sse1)
336				flags |= 1 << 10;
337
338			/* Mark the true size of the structure.  */
339			flags |= cif->rtype->size << 12;
340		}
341	}
342
343	/*	Go over all arguments and determine the way they should be passed.
344		If it's in a register and there is space for it, let that be so. If
345		not, add it's size to the stack byte count.  */
346	for (bytes = 0, i = 0, avn = cif->nargs; i < avn; i++)
347	{
348		if (examine_argument(cif->arg_types[i], classes, 0, &ngpr, &nsse) == 0
349			|| gprcount + ngpr > MAX_GPR_REGS
350			|| ssecount + nsse > MAX_SSE_REGS)
351		{
352			long align = cif->arg_types[i]->alignment;
353
354			if (align < 8)
355				align = 8;
356
357			bytes = ALIGN(bytes, align);
358			bytes += cif->arg_types[i]->size;
359		}
360		else
361		{
362			gprcount += ngpr;
363			ssecount += nsse;
364		}
365	}
366
367	if (ssecount)
368		flags |= 1 << 11;
369
370	cif->flags = flags;
371	cif->bytes = bytes;
372
373	return FFI_OK;
374}
375
376void
377ffi_call(
378	ffi_cif*	cif,
379	void		(*fn)(),
380	void*		rvalue,
381	void**		avalue)
382{
383	enum x86_64_reg_class	classes[MAX_CLASSES];
384	char*					stack;
385	char*					argp;
386	ffi_type**				arg_types;
387	int						gprcount, ssecount, ngpr, nsse, i, avn;
388	_Bool					ret_in_memory;
389	RegisterArgs*			reg_args;
390
391	/* Can't call 32-bit mode from 64-bit mode.  */
392	FFI_ASSERT(cif->abi == FFI_UNIX64);
393
394	/*	If the return value is a struct and we don't have a return value
395		address then we need to make one.  Note the setting of flags to
396		VOID above in ffi_prep_cif_machdep.  */
397	ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
398		&& (cif->flags & 0xff) == FFI_TYPE_VOID);
399
400	if (rvalue == NULL && ret_in_memory)
401		rvalue = alloca (cif->rtype->size);
402
403	/* Allocate the space for the arguments, plus 4 words of temp space.  */
404	stack = alloca(sizeof(RegisterArgs) + cif->bytes + 4 * 8);
405	reg_args = (RegisterArgs*)stack;
406	argp = stack + sizeof(RegisterArgs);
407
408	gprcount = ssecount = 0;
409
410	/*	If the return value is passed in memory, add the pointer as the
411		first integer argument.  */
412	if (ret_in_memory)
413		reg_args->gpr[gprcount++] = (long) rvalue;
414
415	avn = cif->nargs;
416	arg_types = cif->arg_types;
417
418	for (i = 0; i < avn; ++i)
419	{
420		size_t size = arg_types[i]->size;
421		int n;
422
423		n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
424
425		if (n == 0
426			|| gprcount + ngpr > MAX_GPR_REGS
427			|| ssecount + nsse > MAX_SSE_REGS)
428		{
429			long align = arg_types[i]->alignment;
430
431			/* Stack arguments are *always* at least 8 byte aligned.  */
432			if (align < 8)
433				align = 8;
434
435			/* Pass this argument in memory.  */
436			argp = (void *) ALIGN (argp, align);
437			memcpy (argp, avalue[i], size);
438			argp += size;
439		}
440		else
441		{	/* The argument is passed entirely in registers.  */
442			char *a = (char *) avalue[i];
443			int j;
444
445			for (j = 0; j < n; j++, a += 8, size -= 8)
446			{
447				switch (classes[j])
448				{
449					case X86_64_INTEGER_CLASS:
450					case X86_64_INTEGERSI_CLASS:
451						reg_args->gpr[gprcount] = 0;
452						memcpy (&reg_args->gpr[gprcount], a, size < 8 ? size : 8);
453						gprcount++;
454						break;
455
456					case X86_64_SSE_CLASS:
457					case X86_64_SSEDF_CLASS:
458						reg_args->sse[ssecount++] = *(UINT64 *) a;
459						break;
460
461					case X86_64_SSESF_CLASS:
462						reg_args->sse[ssecount++] = *(UINT32 *) a;
463						break;
464
465					default:
466						abort();
467				}
468			}
469		}
470	}
471
472	ffi_call_unix64 (stack, cif->bytes + sizeof(RegisterArgs),
473		cif->flags, rvalue, fn, ssecount);
474}
475
476extern void ffi_closure_unix64(void);
477
478ffi_status
479ffi_prep_closure(
480	ffi_closure*	closure,
481	ffi_cif*		cif,
482	void			(*fun)(ffi_cif*, void*, void**, void*),
483	void*			user_data)
484{
485	if (cif->abi != FFI_UNIX64)
486		return FFI_BAD_ABI;
487
488	volatile unsigned short*	tramp =
489		(volatile unsigned short*)&closure->tramp[0];
490
491	tramp[0] = 0xbb49;		/* mov <code>, %r11	*/
492	*(void* volatile*)&tramp[1] = ffi_closure_unix64;
493	tramp[5] = 0xba49;		/* mov <data>, %r10	*/
494	*(void* volatile*)&tramp[6] = closure;
495
496	/*	Set the carry bit if the function uses any sse registers.
497		This is clc or stc, together with the first byte of the jmp.  */
498	tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
499	tramp[11] = 0xe3ff;			/* jmp *%r11    */
500
501	closure->cif = cif;
502	closure->fun = fun;
503	closure->user_data = user_data;
504
505	return FFI_OK;
506}
507
508int
509ffi_closure_unix64_inner(
510	ffi_closure*	closure,
511	void*			rvalue,
512	RegisterArgs*	reg_args,
513	char*			argp)
514{
515	ffi_cif*	cif = closure->cif;
516	void**		avalue = alloca(cif->nargs * sizeof(void *));
517	ffi_type**	arg_types;
518	long		i, avn;
519	int			gprcount = 0;
520	int			ssecount = 0;
521	int			ngpr, nsse;
522	int			ret;
523
524	ret = cif->rtype->type;
525
526	if (ret != FFI_TYPE_VOID)
527    {
528		enum x86_64_reg_class classes[MAX_CLASSES];
529		int n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
530
531		if (n == 0)
532		{
533			/* The return value goes in memory.  Arrange for the closure
534			return value to go directly back to the original caller.  */
535			rvalue = (void *) reg_args->gpr[gprcount++];
536
537			/* We don't have to do anything in asm for the return.  */
538			ret = FFI_TYPE_VOID;
539		}
540		else if (ret == FFI_TYPE_STRUCT && n == 2)
541		{
542			/* Mark which register the second word of the structure goes in.  */
543			_Bool sse0 = SSE_CLASS_P (classes[0]);
544			_Bool sse1 = SSE_CLASS_P (classes[1]);
545
546			if (!sse0 && sse1)
547				ret |= 1 << 8;
548			else if (sse0 && !sse1)
549				ret |= 1 << 9;
550		}
551	}
552
553	avn = cif->nargs;
554	arg_types = cif->arg_types;
555
556	for (i = 0; i < avn; ++i)
557	{
558		enum x86_64_reg_class classes[MAX_CLASSES];
559		int n;
560
561		n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
562
563		if (n == 0
564			|| gprcount + ngpr > MAX_GPR_REGS
565			|| ssecount + nsse > MAX_SSE_REGS)
566		{
567			long align = arg_types[i]->alignment;
568
569			/* Stack arguments are *always* at least 8 byte aligned.  */
570			if (align < 8)
571				align = 8;
572
573			/* Pass this argument in memory.  */
574			argp = (void *) ALIGN (argp, align);
575			avalue[i] = argp;
576			argp += arg_types[i]->size;
577		}
578
579#if !defined(X86_DARWIN)
580		/*	If the argument is in a single register, or two consecutive
581			registers, then we can use that address directly.  */
582		else if (n == 1 || (n == 2 &&
583		   SSE_CLASS_P (classes[0]) == SSE_CLASS_P (classes[1])))
584		{
585			// The argument is in a single register.
586			if (SSE_CLASS_P (classes[0]))
587			{
588				avalue[i] = &reg_args->sse[ssecount];
589				ssecount += n;
590			}
591			else
592			{
593				avalue[i] = &reg_args->gpr[gprcount];
594				gprcount += n;
595			}
596		}
597#endif
598
599		/* Otherwise, allocate space to make them consecutive.  */
600		else
601		{
602			char *a = alloca (16);
603			int j;
604
605			avalue[i] = a;
606
607			for (j = 0; j < n; j++, a += 8)
608			{
609				if (SSE_CLASS_P (classes[j]))
610					memcpy (a, &reg_args->sse[ssecount++], 8);
611				else
612					memcpy (a, &reg_args->gpr[gprcount++], 8);
613			}
614		}
615	}
616
617	/* Invoke the closure.  */
618	closure->fun (cif, rvalue, avalue, closure->user_data);
619
620	/* Tell assembly how to perform return type promotions.  */
621	return ret;
622}
623
624#endif /* __x86_64__ */
625