1#if defined(__ppc__) || defined(__ppc64__)
2
3/* -----------------------------------------------------------------------
4   ffi.c - Copyright (c) 1998 Geoffrey Keating
5
6   PowerPC Foreign Function Interface
7
8   Darwin ABI support (c) 2001 John Hornkvist
9   AIX ABI support (c) 2002 Free Software Foundation, Inc.
10
11   Permission is hereby granted, free of charge, to any person obtaining
12   a copy of this software and associated documentation files (the
13   ``Software''), to deal in the Software without restriction, including
14   without limitation the rights to use, copy, modify, merge, publish,
15   distribute, sublicense, and/or sell copies of the Software, and to
16   permit persons to whom the Software is furnished to do so, subject to
17   the following conditions:
18
19   The above copyright notice and this permission notice shall be included
20   in all copies or substantial portions of the Software.
21
22   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
23   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
26   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28   OTHER DEALINGS IN THE SOFTWARE.
29   ----------------------------------------------------------------------- */
30
31#include <ffi.h>
32#include <ffi_common.h>
33
34#include <stdbool.h>
35#include <stdio.h>
36#include <stdlib.h>
37#include <ppc-darwin.h>
38#include <architecture/ppc/mode_independent_asm.h>
39
40#if defined(POWERPC_DARWIN)
41#include <libkern/OSCacheControl.h>	// for sys_icache_invalidate()
42#endif
43
44extern void ffi_closure_ASM(void);
45
46// The layout of a function descriptor.  A C function pointer really
47// points to one of these.
48typedef struct aix_fd_struct {
49  void*	code_pointer;
50  void*	toc;
51} aix_fd;
52
53/* ffi_prep_args is called by the assembly routine once stack space
54   has been allocated for the function's arguments.
55
56   The stack layout we want looks like this:
57
58   |   Return address from ffi_call_DARWIN      |	higher addresses
59   |--------------------------------------------|
60   |   Previous backchain pointer      4/8      |	    stack pointer here
61   |--------------------------------------------|-\ <<< on entry to
62   |   Saved r28-r31                 (4/8)*4    | |	    ffi_call_DARWIN
63   |--------------------------------------------| |
64   |   Parameters      (at least 8*(4/8)=32/64) | | (176) +112 - +288
65   |--------------------------------------------| |
66   |   Space for GPR2                  4/8      | |
67   |--------------------------------------------| |	stack	|
68   |   Reserved						 (4/8)*2    | |	grows	|
69   |--------------------------------------------| |	down	V
70   |   Space for callee's LR           4/8      | |
71   |--------------------------------------------| |	lower addresses
72   |   Saved CR                        4/8      | |
73   |--------------------------------------------| |     stack pointer here
74   |   Current backchain pointer       4/8      | |     during
75   |--------------------------------------------|-/ <<< ffi_call_DARWIN
76
77	Note: ppc64 CR is saved in the low word of a long on the stack.
78*/
79
80/*@-exportheader@*/
81void
82ffi_prep_args(
83	extended_cif*	inEcif,
84	unsigned *const	stack)
85/*@=exportheader@*/
86{
87	/*	Copy the ecif to a local var so we can trample the arg.
88		BC note: test this with GP later for possible problems...	*/
89	volatile extended_cif*	ecif	= inEcif;
90
91	const unsigned bytes	= ecif->cif->bytes;
92	const unsigned flags	= ecif->cif->flags;
93
94	/*	Cast the stack arg from int* to long*. sizeof(long) == 4 in 32-bit mode
95		and 8 in 64-bit mode.	*/
96	unsigned long *const longStack	= (unsigned long *const)stack;
97
98	/* 'stacktop' points at the previous backchain pointer.	*/
99#if defined(__ppc64__)
100	//	In ppc-darwin.s, an extra 96 bytes is reserved for the linkage area,
101	//	saved registers, and an extra FPR.
102	unsigned long *const stacktop	=
103		(unsigned long *)(unsigned long)((char*)longStack + bytes + 96);
104#elif defined(__ppc__)
105	unsigned long *const stacktop	= longStack + (bytes / sizeof(long));
106#else
107#error undefined architecture
108#endif
109
110	/* 'fpr_base' points at the space for fpr1, and grows upwards as
111		we use FPR registers.  */
112	double*		fpr_base = (double*)(stacktop - ASM_NEEDS_REGISTERS) -
113		NUM_FPR_ARG_REGISTERS;
114
115#if defined(__ppc64__)
116	//	64-bit saves an extra register, and uses an extra FPR. Knock fpr_base
117	//	down a couple pegs.
118	fpr_base -= 2;
119#endif
120
121	unsigned int	fparg_count = 0;
122
123	/* 'next_arg' grows up as we put parameters in it.  */
124	unsigned long*	next_arg = longStack + 6; /* 6 reserved positions.  */
125
126	int				i;
127	double			double_tmp;
128	void**			p_argv = ecif->avalue;
129	unsigned long	gprvalue;
130	ffi_type**		ptr = ecif->cif->arg_types;
131
132	/* Check that everything starts aligned properly.  */
133	FFI_ASSERT(stack == SF_ROUND(stack));
134	FFI_ASSERT(stacktop == SF_ROUND(stacktop));
135	FFI_ASSERT(bytes == SF_ROUND(bytes));
136
137	/*	Deal with return values that are actually pass-by-reference.
138		Rule:
139		Return values are referenced by r3, so r4 is the first parameter.  */
140
141	if (flags & FLAG_RETVAL_REFERENCE)
142		*next_arg++ = (unsigned long)(char*)ecif->rvalue;
143
144	/* Now for the arguments.  */
145	for (i = ecif->cif->nargs; i > 0; i--, ptr++, p_argv++)
146    {
147		switch ((*ptr)->type)
148		{
149			/*	If a floating-point parameter appears before all of the general-
150				purpose registers are filled, the corresponding GPRs that match
151				the size of the floating-point parameter are shadowed for the
152				benefit of vararg and pre-ANSI functions.	*/
153			case FFI_TYPE_FLOAT:
154				double_tmp = *(float*)*p_argv;
155
156				if (fparg_count < NUM_FPR_ARG_REGISTERS)
157					*fpr_base++ = double_tmp;
158
159				*(double*)next_arg = double_tmp;
160
161				next_arg++;
162				fparg_count++;
163				FFI_ASSERT(flags & FLAG_FP_ARGUMENTS);
164
165				break;
166
167			case FFI_TYPE_DOUBLE:
168				double_tmp = *(double*)*p_argv;
169
170				if (fparg_count < NUM_FPR_ARG_REGISTERS)
171					*fpr_base++ = double_tmp;
172
173				*(double*)next_arg = double_tmp;
174
175				next_arg += MODE_CHOICE(2,1);
176				fparg_count++;
177				FFI_ASSERT(flags & FLAG_FP_ARGUMENTS);
178
179				break;
180
181#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
182			case FFI_TYPE_LONGDOUBLE:
183#if defined(__ppc64__)
184				if (fparg_count < NUM_FPR_ARG_REGISTERS)
185					*(long double*)fpr_base	= *(long double*)*p_argv;
186#elif defined(__ppc__)
187				if (fparg_count < NUM_FPR_ARG_REGISTERS - 1)
188					*(long double*)fpr_base	= *(long double*)*p_argv;
189				else if (fparg_count == NUM_FPR_ARG_REGISTERS - 1)
190					*(double*)fpr_base	= *(double*)*p_argv;
191#else
192#error undefined architecture
193#endif
194
195				*(long double*)next_arg	= *(long double*)*p_argv;
196				fparg_count += 2;
197				fpr_base += 2;
198				next_arg += MODE_CHOICE(4,2);
199				FFI_ASSERT(flags & FLAG_FP_ARGUMENTS);
200
201				break;
202#endif	//	FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
203
204			case FFI_TYPE_UINT64:
205			case FFI_TYPE_SINT64:
206#if defined(__ppc64__)
207				gprvalue = *(long long*)*p_argv;
208				goto putgpr;
209#elif defined(__ppc__)
210				*(long long*)next_arg = *(long long*)*p_argv;
211				next_arg += 2;
212				break;
213#else
214#error undefined architecture
215#endif
216
217			case FFI_TYPE_POINTER:
218				gprvalue = *(unsigned long*)*p_argv;
219				goto putgpr;
220
221			case FFI_TYPE_UINT8:
222				gprvalue = *(unsigned char*)*p_argv;
223				goto putgpr;
224
225			case FFI_TYPE_SINT8:
226				gprvalue = *(signed char*)*p_argv;
227				goto putgpr;
228
229			case FFI_TYPE_UINT16:
230				gprvalue = *(unsigned short*)*p_argv;
231				goto putgpr;
232
233			case FFI_TYPE_SINT16:
234				gprvalue = *(signed short*)*p_argv;
235				goto putgpr;
236
237			case FFI_TYPE_STRUCT:
238			{
239#if defined(__ppc64__)
240				unsigned int	gprSize = 0;
241				unsigned int	fprSize = 0;
242
243				ffi64_struct_to_reg_form(*ptr, (char*)*p_argv, NULL, &fparg_count,
244					(char*)next_arg, &gprSize, (char*)fpr_base, &fprSize);
245				next_arg += gprSize / sizeof(long);
246				fpr_base += fprSize / sizeof(double);
247
248#elif defined(__ppc__)
249				char*	dest_cpy = (char*)next_arg;
250
251			/*	Structures that match the basic modes (QI 1 byte, HI 2 bytes,
252				SI 4 bytes) are aligned as if they were those modes.
253				Structures with 3 byte in size are padded upwards.  */
254				unsigned size_al = (*ptr)->size;
255
256			/*	If the first member of the struct is a double, then align
257				the struct to double-word.  */
258				if ((*ptr)->elements[0]->type == FFI_TYPE_DOUBLE)
259					size_al = ALIGN((*ptr)->size, 8);
260
261				if (ecif->cif->abi == FFI_DARWIN)
262				{
263					if (size_al < 3)
264						dest_cpy += 4 - size_al;
265				}
266
267				memcpy((char*)dest_cpy, (char*)*p_argv, size_al);
268				next_arg += (size_al + 3) / 4;
269#else
270#error undefined architecture
271#endif
272				break;
273			}
274
275			case FFI_TYPE_INT:
276			case FFI_TYPE_UINT32:
277			case FFI_TYPE_SINT32:
278				gprvalue = *(unsigned*)*p_argv;
279
280putgpr:
281				*next_arg++ = gprvalue;
282				break;
283
284			default:
285				break;
286		}
287	}
288
289  /* Check that we didn't overrun the stack...  */
290  //FFI_ASSERT(gpr_base <= stacktop - ASM_NEEDS_REGISTERS);
291  //FFI_ASSERT((unsigned *)fpr_base
292  //	     <= stacktop - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS);
293  //FFI_ASSERT(flags & FLAG_4_GPR_ARGUMENTS || intarg_count <= 4);
294}
295
296#if defined(__ppc64__)
297
298bool
299ffi64_struct_contains_fp(
300	const ffi_type*	inType)
301{
302	bool			containsFP	= false;
303	unsigned int	i;
304
305	for (i = 0; inType->elements[i] != NULL && !containsFP; i++)
306	{
307		if (inType->elements[i]->type == FFI_TYPE_FLOAT		||
308			inType->elements[i]->type == FFI_TYPE_DOUBLE	||
309			inType->elements[i]->type == FFI_TYPE_LONGDOUBLE)
310			containsFP = true;
311		else if (inType->elements[i]->type == FFI_TYPE_STRUCT)
312			containsFP = ffi64_struct_contains_fp(inType->elements[i]);
313	}
314
315	return containsFP;
316}
317
318#endif	// defined(__ppc64__)
319
320/* Perform machine dependent cif processing.  */
321ffi_status
322ffi_prep_cif_machdep(
323	ffi_cif*	cif)
324{
325	/* All this is for the DARWIN ABI.  */
326	int				i;
327	ffi_type**		ptr;
328	int				intarg_count = 0;
329	int				fparg_count = 0;
330	unsigned int	flags = 0;
331	unsigned int	size_al = 0;
332
333	/*	All the machine-independent calculation of cif->bytes will be wrong.
334		Redo the calculation for DARWIN.  */
335
336	/*	Space for the frame pointer, callee's LR, CR, etc, and for
337		the asm's temp regs.  */
338	unsigned int	bytes = (6 + ASM_NEEDS_REGISTERS) * sizeof(long);
339
340	/*	Return value handling.  The rules are as follows:
341		- 32-bit (or less) integer values are returned in gpr3;
342		- Structures of size <= 4 bytes also returned in gpr3;
343		- 64-bit integer values and structures between 5 and 8 bytes are
344			returned in gpr3 and gpr4;
345		- Single/double FP values are returned in fpr1;
346		- Long double FP (if not equivalent to double) values are returned in
347			fpr1 and fpr2;
348		- Larger structures values are allocated space and a pointer is passed
349			as the first argument.  */
350	switch (cif->rtype->type)
351	{
352#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
353		case FFI_TYPE_LONGDOUBLE:
354			flags |= FLAG_RETURNS_128BITS;
355			flags |= FLAG_RETURNS_FP;
356			break;
357#endif	// FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
358
359		case FFI_TYPE_DOUBLE:
360			flags |= FLAG_RETURNS_64BITS;
361			/* Fall through.  */
362		case FFI_TYPE_FLOAT:
363			flags |= FLAG_RETURNS_FP;
364			break;
365
366#if defined(__ppc64__)
367		case FFI_TYPE_POINTER:
368#endif
369		case FFI_TYPE_UINT64:
370		case FFI_TYPE_SINT64:
371			flags |= FLAG_RETURNS_64BITS;
372			break;
373
374		case FFI_TYPE_STRUCT:
375		{
376#if defined(__ppc64__)
377
378			if (ffi64_stret_needs_ptr(cif->rtype, NULL, NULL))
379			{
380				flags |= FLAG_RETVAL_REFERENCE;
381				flags |= FLAG_RETURNS_NOTHING;
382				intarg_count++;
383			}
384			else
385			{
386				flags |= FLAG_RETURNS_STRUCT;
387
388				if (ffi64_struct_contains_fp(cif->rtype))
389					flags |= FLAG_STRUCT_CONTAINS_FP;
390			}
391
392#elif defined(__ppc__)
393
394			flags |= FLAG_RETVAL_REFERENCE;
395			flags |= FLAG_RETURNS_NOTHING;
396			intarg_count++;
397
398#else
399#error undefined architecture
400#endif
401			break;
402		}
403
404		case FFI_TYPE_VOID:
405			flags |= FLAG_RETURNS_NOTHING;
406			break;
407
408		default:
409			/* Returns 32-bit integer, or similar.  Nothing to do here.  */
410			break;
411	}
412
413	/*	The first NUM_GPR_ARG_REGISTERS words of integer arguments, and the
414		first NUM_FPR_ARG_REGISTERS fp arguments, go in registers; the rest
415		goes on the stack.  Structures are passed as a pointer to a copy of
416		the structure. Stuff on the stack needs to keep proper alignment.  */
417	for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
418	{
419		switch ((*ptr)->type)
420		{
421			case FFI_TYPE_FLOAT:
422			case FFI_TYPE_DOUBLE:
423				fparg_count++;
424				/*	If this FP arg is going on the stack, it must be
425					8-byte-aligned.  */
426				if (fparg_count > NUM_FPR_ARG_REGISTERS
427					&& intarg_count % 2 != 0)
428					intarg_count++;
429				break;
430
431#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
432			case FFI_TYPE_LONGDOUBLE:
433				fparg_count += 2;
434				/*	If this FP arg is going on the stack, it must be
435					8-byte-aligned.  */
436
437				if (
438#if defined(__ppc64__)
439					fparg_count > NUM_FPR_ARG_REGISTERS + 1
440#elif defined(__ppc__)
441					fparg_count > NUM_FPR_ARG_REGISTERS
442#else
443#error undefined architecture
444#endif
445					&& intarg_count % 2 != 0)
446					intarg_count++;
447
448				intarg_count += 2;
449				break;
450#endif	// FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
451
452			case FFI_TYPE_UINT64:
453			case FFI_TYPE_SINT64:
454				/*	'long long' arguments are passed as two words, but
455					either both words must fit in registers or both go
456					on the stack.  If they go on the stack, they must
457					be 8-byte-aligned.  */
458				if (intarg_count == NUM_GPR_ARG_REGISTERS - 1
459					|| (intarg_count >= NUM_GPR_ARG_REGISTERS
460					&& intarg_count % 2 != 0))
461					intarg_count++;
462
463				intarg_count += MODE_CHOICE(2,1);
464
465				break;
466
467			case FFI_TYPE_STRUCT:
468				size_al = (*ptr)->size;
469				/*	If the first member of the struct is a double, then align
470					the struct to double-word.  */
471				if ((*ptr)->elements[0]->type == FFI_TYPE_DOUBLE)
472					size_al = ALIGN((*ptr)->size, 8);
473
474#if defined(__ppc64__)
475				// Look for FP struct members.
476				unsigned int	j;
477
478				for (j = 0; (*ptr)->elements[j] != NULL; j++)
479				{
480					if ((*ptr)->elements[j]->type == FFI_TYPE_FLOAT	||
481						(*ptr)->elements[j]->type == FFI_TYPE_DOUBLE)
482					{
483						fparg_count++;
484
485						if (fparg_count > NUM_FPR_ARG_REGISTERS)
486							intarg_count++;
487					}
488					else if ((*ptr)->elements[j]->type == FFI_TYPE_LONGDOUBLE)
489					{
490						fparg_count += 2;
491
492						if (fparg_count > NUM_FPR_ARG_REGISTERS + 1)
493							intarg_count += 2;
494					}
495					else
496						intarg_count++;
497				}
498#elif defined(__ppc__)
499				intarg_count += (size_al + 3) / 4;
500#else
501#error undefined architecture
502#endif
503
504				break;
505
506			default:
507				/*	Everything else is passed as a 4/8-byte word in a GPR, either
508					the object itself or a pointer to it.  */
509				intarg_count++;
510				break;
511		}
512	}
513
514	/* Space for the FPR registers, if needed.  */
515	if (fparg_count != 0)
516	{
517		flags |= FLAG_FP_ARGUMENTS;
518#if defined(__ppc64__)
519		bytes += (NUM_FPR_ARG_REGISTERS + 1) * sizeof(double);
520#elif defined(__ppc__)
521		bytes += NUM_FPR_ARG_REGISTERS * sizeof(double);
522#else
523#error undefined architecture
524#endif
525	}
526
527	/* Stack space.  */
528#if defined(__ppc64__)
529	if ((intarg_count + fparg_count) > NUM_GPR_ARG_REGISTERS)
530		bytes += (intarg_count + fparg_count) * sizeof(long);
531#elif defined(__ppc__)
532	if ((intarg_count + 2 * fparg_count) > NUM_GPR_ARG_REGISTERS)
533		bytes += (intarg_count + 2 * fparg_count) * sizeof(long);
534#else
535#error undefined architecture
536#endif
537	else
538		bytes += NUM_GPR_ARG_REGISTERS * sizeof(long);
539
540	/* The stack space allocated needs to be a multiple of 16/32 bytes.  */
541	bytes = SF_ROUND(bytes);
542
543	cif->flags = flags;
544	cif->bytes = bytes;
545
546	return FFI_OK;
547}
548
549/*@-declundef@*/
550/*@-exportheader@*/
551extern void
552ffi_call_AIX(
553/*@out@*/	extended_cif*,
554			unsigned,
555			unsigned,
556/*@out@*/	unsigned*,
557			void (*fn)(void),
558			void (*fn2)(extended_cif*, unsigned *const));
559
560extern void
561ffi_call_DARWIN(
562/*@out@*/	extended_cif*,
563			unsigned long,
564			unsigned,
565/*@out@*/	unsigned*,
566			void (*fn)(void),
567			void (*fn2)(extended_cif*, unsigned *const));
568/*@=declundef@*/
569/*@=exportheader@*/
570
571void
572ffi_call(
573/*@dependent@*/	ffi_cif*	cif,
574				void		(*fn)(void),
575/*@out@*/		void*		rvalue,
576/*@dependent@*/	void**		avalue)
577{
578	extended_cif ecif;
579
580	ecif.cif = cif;
581	ecif.avalue = avalue;
582
583	/*	If the return value is a struct and we don't have a return
584		value address then we need to make one.  */
585	if ((rvalue == NULL) &&
586		(cif->rtype->type == FFI_TYPE_STRUCT))
587	{
588		/*@-sysunrecog@*/
589		ecif.rvalue = alloca(cif->rtype->size);
590		/*@=sysunrecog@*/
591	}
592	else
593		ecif.rvalue = rvalue;
594
595	switch (cif->abi)
596	{
597		case FFI_AIX:
598			/*@-usedef@*/
599			ffi_call_AIX(&ecif, -cif->bytes,
600				cif->flags, ecif.rvalue, fn, ffi_prep_args);
601			/*@=usedef@*/
602			break;
603
604		case FFI_DARWIN:
605			/*@-usedef@*/
606			ffi_call_DARWIN(&ecif, -(long)cif->bytes,
607				cif->flags, ecif.rvalue, fn, ffi_prep_args);
608			/*@=usedef@*/
609			break;
610
611		default:
612			FFI_ASSERT(0);
613			break;
614    }
615}
616
617/* here I'd like to add the stack frame layout we use in darwin_closure.S
618   and aix_clsoure.S
619
620   SP previous -> +---------------------------------------+ <--- child frame
621		  | back chain to caller 4                |
622		  +---------------------------------------+ 4
623		  | saved CR 4                            |
624		  +---------------------------------------+ 8
625		  | saved LR 4                            |
626		  +---------------------------------------+ 12
627		  | reserved for compilers 4              |
628		  +---------------------------------------+ 16
629		  | reserved for binders 4                |
630		  +---------------------------------------+ 20
631		  | saved TOC pointer 4                   |
632		  +---------------------------------------+ 24
633		  | always reserved 8*4=32 (previous GPRs)|
634		  | according to the linkage convention   |
635		  | from AIX                              |
636		  +---------------------------------------+ 56
637		  | our FPR area 13*8=104                 |
638		  | f1                                    |
639		  | .                                     |
640		  | f13                                   |
641		  +---------------------------------------+ 160
642		  | result area 8                         |
643		  +---------------------------------------+ 168
644		  | alignement to the next multiple of 16 |
645SP current -->    +---------------------------------------+ 176 <- parent frame
646		  | back chain to caller 4                |
647		  +---------------------------------------+ 180
648		  | saved CR 4                            |
649		  +---------------------------------------+ 184
650		  | saved LR 4                            |
651		  +---------------------------------------+ 188
652		  | reserved for compilers 4              |
653		  +---------------------------------------+ 192
654		  | reserved for binders 4                |
655		  +---------------------------------------+ 196
656		  | saved TOC pointer 4                   |
657		  +---------------------------------------+ 200
658		  | always reserved 8*4=32  we store our  |
659		  | GPRs here                             |
660		  | r3                                    |
661		  | .                                     |
662		  | r10                                   |
663		  +---------------------------------------+ 232
664		  | overflow part                         |
665		  +---------------------------------------+ xxx
666		  | ????                                  |
667		  +---------------------------------------+ xxx
668*/
669
670#if !defined(POWERPC_DARWIN)
671
672#define MIN_LINE_SIZE 32
673
674static void
675flush_icache(
676	char*	addr)
677{
678#ifndef _AIX
679	__asm__ volatile (
680		"dcbf 0,%0\n"
681		"sync\n"
682		"icbi 0,%0\n"
683		"sync\n"
684		"isync"
685		: : "r" (addr) : "memory");
686#endif
687}
688
689static void
690flush_range(
691	char*	addr,
692	int		size)
693{
694	int i;
695
696	for (i = 0; i < size; i += MIN_LINE_SIZE)
697		flush_icache(addr + i);
698
699	flush_icache(addr + size - 1);
700}
701
702#endif	// !defined(POWERPC_DARWIN)
703
704ffi_status
705ffi_prep_closure(
706	ffi_closure*	closure,
707	ffi_cif*		cif,
708	void			(*fun)(ffi_cif*, void*, void**, void*),
709	void*			user_data)
710{
711	switch (cif->abi)
712	{
713		case FFI_DARWIN:
714		{
715			FFI_ASSERT (cif->abi == FFI_DARWIN);
716
717			unsigned int*	tramp = (unsigned int*)&closure->tramp[0];
718
719#if defined(__ppc64__)
720			tramp[0] = 0x7c0802a6;	//	mflr	r0
721			tramp[1] = 0x429f0005;	//	bcl		20,31,+0x8
722			tramp[2] = 0x7d6802a6;	//	mflr	r11
723			tramp[3] = 0x7c0803a6;	//	mtlr	r0
724			tramp[4] = 0xe98b0018;	//	ld		r12,24(r11)
725			tramp[5] = 0x7d8903a6;	//	mtctr	r12
726			tramp[6] = 0xe96b0020;	//	ld		r11,32(r11)
727			tramp[7] = 0x4e800420;	//	bctr
728			*(unsigned long*)&tramp[8] = (unsigned long)ffi_closure_ASM;
729			*(unsigned long*)&tramp[10] = (unsigned long)closure;
730#elif defined(__ppc__)
731			tramp[0] = 0x7c0802a6;	//	mflr	r0
732			tramp[1] = 0x429f0005;	//	bcl		20,31,+0x8
733			tramp[2] = 0x7d6802a6;	//	mflr	r11
734			tramp[3] = 0x7c0803a6;	//	mtlr	r0
735			tramp[4] = 0x818b0018;	//	lwz		r12,24(r11)
736			tramp[5] = 0x7d8903a6;	//	mtctr	r12
737			tramp[6] = 0x816b001c;	//	lwz		r11,28(r11)
738			tramp[7] = 0x4e800420;	//	bctr
739			tramp[8] = (unsigned long)ffi_closure_ASM;
740			tramp[9] = (unsigned long)closure;
741#else
742#error undefined architecture
743#endif
744
745			closure->cif = cif;
746			closure->fun = fun;
747			closure->user_data = user_data;
748
749			// Flush the icache. Only necessary on Darwin.
750#if defined(POWERPC_DARWIN)
751			sys_icache_invalidate(closure->tramp, FFI_TRAMPOLINE_SIZE);
752#else
753			flush_range(closure->tramp, FFI_TRAMPOLINE_SIZE);
754#endif
755
756			break;
757		}
758
759		case FFI_AIX:
760		{
761			FFI_ASSERT (cif->abi == FFI_AIX);
762
763			ffi_aix_trampoline_struct*	tramp_aix =
764				(ffi_aix_trampoline_struct*)(closure->tramp);
765			aix_fd*	fd = (aix_fd*)(void*)ffi_closure_ASM;
766
767			tramp_aix->code_pointer = fd->code_pointer;
768			tramp_aix->toc = fd->toc;
769			tramp_aix->static_chain = closure;
770			closure->cif = cif;
771			closure->fun = fun;
772			closure->user_data = user_data;
773			break;
774		}
775
776		default:
777			return FFI_BAD_ABI;
778	}
779
780	return FFI_OK;
781}
782
783#if defined(__ppc__)
784	typedef double ldbits[2];
785
786	typedef union
787	{
788		ldbits lb;
789		long double ld;
790	} ldu;
791#endif
792
793typedef union
794{
795	float	f;
796	double	d;
797} ffi_dblfl;
798
799/*	The trampoline invokes ffi_closure_ASM, and on entry, r11 holds the
800	address of the closure. After storing the registers that could possibly
801	contain parameters to be passed into the stack frame and setting up space
802	for a return value, ffi_closure_ASM invokes the following helper function
803	to do most of the work.  */
804int
805ffi_closure_helper_DARWIN(
806	ffi_closure*	closure,
807	void*			rvalue,
808	unsigned long*	pgr,
809	ffi_dblfl*		pfr)
810{
811	/*	rvalue is the pointer to space for return value in closure assembly
812		pgr is the pointer to where r3-r10 are stored in ffi_closure_ASM
813		pfr is the pointer to where f1-f13 are stored in ffi_closure_ASM.  */
814
815#if defined(__ppc__)
816	ldu	temp_ld;
817#endif
818
819	double				temp;
820	unsigned int		i;
821	unsigned int		nf = 0;	/* number of FPRs already used.  */
822	unsigned int		ng = 0;	/* number of GPRs already used.  */
823	ffi_cif*			cif = closure->cif;
824	long				avn = cif->nargs;
825	void**				avalue = alloca(cif->nargs * sizeof(void*));
826	ffi_type**			arg_types = cif->arg_types;
827
828	/*	Copy the caller's structure return value address so that the closure
829		returns the data directly to the caller.  */
830#if defined(__ppc64__)
831	if (cif->rtype->type == FFI_TYPE_STRUCT &&
832		ffi64_stret_needs_ptr(cif->rtype, NULL, NULL))
833#elif defined(__ppc__)
834	if (cif->rtype->type == FFI_TYPE_STRUCT)
835#else
836#error undefined architecture
837#endif
838	{
839		rvalue = (void*)*pgr;
840		pgr++;
841		ng++;
842	}
843
844	/* Grab the addresses of the arguments from the stack frame.  */
845	for (i = 0; i < avn; i++)
846	{
847		switch (arg_types[i]->type)
848		{
849			case FFI_TYPE_SINT8:
850			case FFI_TYPE_UINT8:
851				avalue[i] = (char*)pgr + MODE_CHOICE(3,7);
852				ng++;
853				pgr++;
854				break;
855
856			case FFI_TYPE_SINT16:
857			case FFI_TYPE_UINT16:
858				avalue[i] = (char*)pgr + MODE_CHOICE(2,6);
859				ng++;
860				pgr++;
861				break;
862
863#if defined(__ppc__)
864			case FFI_TYPE_POINTER:
865#endif
866			case FFI_TYPE_SINT32:
867			case FFI_TYPE_UINT32:
868				avalue[i] = (char*)pgr + MODE_CHOICE(0,4);
869				ng++;
870				pgr++;
871
872				break;
873
874			case FFI_TYPE_STRUCT:
875				if (cif->abi == FFI_DARWIN)
876				{
877#if defined(__ppc64__)
878					unsigned int	gprSize = 0;
879					unsigned int	fprSize	= 0;
880					unsigned int	savedFPRSize = fprSize;
881
882					avalue[i] = alloca(arg_types[i]->size);
883					ffi64_struct_to_ram_form(arg_types[i], (const char*)pgr,
884						&gprSize, (const char*)pfr, &fprSize, &nf, avalue[i], NULL);
885
886					ng	+= gprSize / sizeof(long);
887					pgr	+= gprSize / sizeof(long);
888					pfr	+= (fprSize - savedFPRSize) / sizeof(double);
889
890#elif defined(__ppc__)
891					/*	Structures that match the basic modes (QI 1 byte, HI 2 bytes,
892						SI 4 bytes) are aligned as if they were those modes.  */
893					unsigned int	size_al	= size_al = arg_types[i]->size;
894
895					/*	If the first member of the struct is a double, then align
896						the struct to double-word.  */
897					if (arg_types[i]->elements[0]->type == FFI_TYPE_DOUBLE)
898						size_al = ALIGN(arg_types[i]->size, 8);
899
900					if (size_al < 3)
901						avalue[i] = (void*)pgr + MODE_CHOICE(4,8) - size_al;
902					else
903						avalue[i] = (void*)pgr;
904
905					ng	+= (size_al + 3) / sizeof(long);
906					pgr += (size_al + 3) / sizeof(long);
907#else
908#error undefined architecture
909#endif
910				}
911
912				break;
913
914#if defined(__ppc64__)
915			case FFI_TYPE_POINTER:
916#endif
917			case FFI_TYPE_SINT64:
918			case FFI_TYPE_UINT64:
919				/* Long long ints are passed in 1 or 2 GPRs.  */
920				avalue[i] = pgr;
921				ng += MODE_CHOICE(2,1);
922				pgr += MODE_CHOICE(2,1);
923
924				break;
925
926			case FFI_TYPE_FLOAT:
927				/*	A float value consumes a GPR.
928					There are 13 64-bit floating point registers.  */
929				if (nf < NUM_FPR_ARG_REGISTERS)
930				{
931					temp = pfr->d;
932					pfr->f = (float)temp;
933					avalue[i] = pfr;
934					pfr++;
935				}
936				else
937					avalue[i] = pgr;
938
939				nf++;
940				ng++;
941				pgr++;
942				break;
943
944			case FFI_TYPE_DOUBLE:
945				/*	A double value consumes one or two GPRs.
946					There are 13 64bit floating point registers.  */
947				if (nf < NUM_FPR_ARG_REGISTERS)
948				{
949					avalue[i] = pfr;
950					pfr++;
951				}
952				else
953					avalue[i] = pgr;
954
955				nf++;
956				ng += MODE_CHOICE(2,1);
957				pgr += MODE_CHOICE(2,1);
958
959				break;
960
961#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
962
963			case FFI_TYPE_LONGDOUBLE:
964#if defined(__ppc64__)
965				if (nf < NUM_FPR_ARG_REGISTERS)
966				{
967					avalue[i] = pfr;
968					pfr += 2;
969				}
970#elif defined(__ppc__)
971				/*	A long double value consumes 2/4 GPRs and 2 FPRs.
972					There are 13 64bit floating point registers.  */
973				if (nf < NUM_FPR_ARG_REGISTERS - 1)
974				{
975					avalue[i] = pfr;
976					pfr += 2;
977				}
978				/*	Here we have the situation where one part of the long double
979					is stored in fpr13 and the other part is already on the stack.
980					We use a union to pass the long double to avalue[i].  */
981				else if (nf == NUM_FPR_ARG_REGISTERS - 1)
982				{
983					memcpy (&temp_ld.lb[0], pfr, sizeof(temp_ld.lb[0]));
984					memcpy (&temp_ld.lb[1], pgr + 2, sizeof(temp_ld.lb[1]));
985					avalue[i] = &temp_ld.ld;
986				}
987#else
988#error undefined architecture
989#endif
990				else
991					avalue[i] = pgr;
992
993				nf += 2;
994				ng += MODE_CHOICE(4,2);
995				pgr += MODE_CHOICE(4,2);
996
997				break;
998
999#endif	/*	FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE	*/
1000
1001			default:
1002				FFI_ASSERT(0);
1003				break;
1004		}
1005	}
1006
1007	(closure->fun)(cif, rvalue, avalue, closure->user_data);
1008
1009	/* Tell ffi_closure_ASM to perform return type promotions.  */
1010	return cif->rtype->type;
1011}
1012
1013#if defined(__ppc64__)
1014
1015/*	ffi64_struct_to_ram_form
1016
1017	Rebuild a struct's natural layout from buffers of concatenated registers.
1018	Return the number of registers used.
1019	inGPRs[0-7] == r3, inFPRs[0-7] == f1 ...
1020*/
1021void
1022ffi64_struct_to_ram_form(
1023	const ffi_type*	inType,
1024	const char*		inGPRs,
1025	unsigned int*	ioGPRMarker,
1026	const char*		inFPRs,
1027	unsigned int*	ioFPRMarker,
1028	unsigned int*	ioFPRsUsed,
1029	char*			outStruct,	// caller-allocated
1030	unsigned int*	ioStructMarker)
1031{
1032	unsigned int	srcGMarker		= 0;
1033	unsigned int	srcFMarker		= 0;
1034	unsigned int	savedFMarker	= 0;
1035	unsigned int	fprsUsed		= 0;
1036	unsigned int	savedFPRsUsed	= 0;
1037	unsigned int	destMarker		= 0;
1038
1039	static unsigned int	recurseCount	= 0;
1040
1041	if (ioGPRMarker)
1042		srcGMarker	= *ioGPRMarker;
1043
1044	if (ioFPRMarker)
1045	{
1046		srcFMarker		= *ioFPRMarker;
1047		savedFMarker	= srcFMarker;
1048	}
1049
1050	if (ioFPRsUsed)
1051	{
1052		fprsUsed		= *ioFPRsUsed;
1053		savedFPRsUsed	= fprsUsed;
1054	}
1055
1056	if (ioStructMarker)
1057		destMarker	= *ioStructMarker;
1058
1059	size_t			i;
1060
1061	switch (inType->size)
1062	{
1063		case 1: case 2: case 4:
1064			srcGMarker += 8 - inType->size;
1065			break;
1066
1067		default:
1068			break;
1069	}
1070
1071	for (i = 0; inType->elements[i] != NULL; i++)
1072	{
1073		switch (inType->elements[i]->type)
1074		{
1075			case FFI_TYPE_FLOAT:
1076				srcFMarker = ALIGN(srcFMarker, 4);
1077				srcGMarker = ALIGN(srcGMarker, 4);
1078				destMarker = ALIGN(destMarker, 4);
1079
1080				if (fprsUsed < NUM_FPR_ARG_REGISTERS)
1081				{
1082					*(float*)&outStruct[destMarker]	=
1083						(float)*(double*)&inFPRs[srcFMarker];
1084					srcFMarker += 8;
1085					fprsUsed++;
1086				}
1087				else
1088					*(float*)&outStruct[destMarker]	=
1089						(float)*(double*)&inGPRs[srcGMarker];
1090
1091				srcGMarker += 4;
1092				destMarker += 4;
1093
1094				// Skip to next GPR if next element won't fit and we're
1095				// not already at a register boundary.
1096				if (inType->elements[i + 1] != NULL && (destMarker % 8))
1097				{
1098					if (!FFI_TYPE_1_BYTE(inType->elements[i + 1]->type) &&
1099						(!FFI_TYPE_2_BYTE(inType->elements[i + 1]->type) ||
1100						(ALIGN(srcGMarker, 8) - srcGMarker) < 2) &&
1101						(!FFI_TYPE_4_BYTE(inType->elements[i + 1]->type) ||
1102						(ALIGN(srcGMarker, 8) - srcGMarker) < 4))
1103						srcGMarker	= ALIGN(srcGMarker, 8);
1104				}
1105
1106				break;
1107
1108			case FFI_TYPE_DOUBLE:
1109				srcFMarker = ALIGN(srcFMarker, 8);
1110				destMarker = ALIGN(destMarker, 8);
1111
1112				if (fprsUsed < NUM_FPR_ARG_REGISTERS)
1113				{
1114					*(double*)&outStruct[destMarker]	=
1115						*(double*)&inFPRs[srcFMarker];
1116					srcFMarker += 8;
1117					fprsUsed++;
1118				}
1119				else
1120					*(double*)&outStruct[destMarker]	=
1121						*(double*)&inGPRs[srcGMarker];
1122
1123				destMarker += 8;
1124
1125				// Skip next GPR
1126				srcGMarker += 8;
1127				srcGMarker = ALIGN(srcGMarker, 8);
1128
1129				break;
1130
1131			case FFI_TYPE_LONGDOUBLE:
1132				destMarker = ALIGN(destMarker, 16);
1133
1134				if (fprsUsed < NUM_FPR_ARG_REGISTERS)
1135				{
1136					srcFMarker = ALIGN(srcFMarker, 8);
1137					srcGMarker = ALIGN(srcGMarker, 8);
1138					*(long double*)&outStruct[destMarker]	=
1139						*(long double*)&inFPRs[srcFMarker];
1140					srcFMarker += 16;
1141					fprsUsed += 2;
1142				}
1143				else
1144				{
1145					srcFMarker = ALIGN(srcFMarker, 16);
1146					srcGMarker = ALIGN(srcGMarker, 16);
1147					*(long double*)&outStruct[destMarker]	=
1148						*(long double*)&inGPRs[srcGMarker];
1149				}
1150
1151				destMarker += 16;
1152
1153				// Skip next 2 GPRs
1154				srcGMarker += 16;
1155				srcGMarker = ALIGN(srcGMarker, 8);
1156
1157				break;
1158
1159			case FFI_TYPE_UINT8:
1160			case FFI_TYPE_SINT8:
1161			{
1162				if (inType->alignment == 1)	// chars only
1163				{
1164					if (inType->size == 1)
1165						outStruct[destMarker++] = inGPRs[srcGMarker++];
1166					else if (inType->size == 2)
1167					{
1168						outStruct[destMarker++] = inGPRs[srcGMarker++];
1169						outStruct[destMarker++] = inGPRs[srcGMarker++];
1170						i++;
1171					}
1172					else
1173					{
1174						memcpy(&outStruct[destMarker],
1175							&inGPRs[srcGMarker], inType->size);
1176						srcGMarker += inType->size;
1177						destMarker += inType->size;
1178						i += inType->size - 1;
1179					}
1180				}
1181				else	// chars and other stuff
1182				{
1183					outStruct[destMarker++] = inGPRs[srcGMarker++];
1184
1185					// Skip to next GPR if next element won't fit and we're
1186					// not already at a register boundary.
1187					if (inType->elements[i + 1] != NULL && (srcGMarker % 8))
1188					{
1189						if (!FFI_TYPE_1_BYTE(inType->elements[i + 1]->type) &&
1190							(!FFI_TYPE_2_BYTE(inType->elements[i + 1]->type) ||
1191							(ALIGN(srcGMarker, 8) - srcGMarker) < 2) &&
1192							(!FFI_TYPE_4_BYTE(inType->elements[i + 1]->type) ||
1193							(ALIGN(srcGMarker, 8) - srcGMarker) < 4))
1194							srcGMarker	= ALIGN(srcGMarker, inType->alignment);	// was 8
1195					}
1196				}
1197
1198				break;
1199			}
1200
1201			case FFI_TYPE_UINT16:
1202			case FFI_TYPE_SINT16:
1203				srcGMarker = ALIGN(srcGMarker, 2);
1204				destMarker = ALIGN(destMarker, 2);
1205
1206				*(short*)&outStruct[destMarker] =
1207					*(short*)&inGPRs[srcGMarker];
1208				srcGMarker += 2;
1209				destMarker += 2;
1210
1211				break;
1212
1213			case FFI_TYPE_INT:
1214			case FFI_TYPE_UINT32:
1215			case FFI_TYPE_SINT32:
1216				srcGMarker = ALIGN(srcGMarker, 4);
1217				destMarker = ALIGN(destMarker, 4);
1218
1219				*(int*)&outStruct[destMarker] =
1220					*(int*)&inGPRs[srcGMarker];
1221				srcGMarker += 4;
1222				destMarker += 4;
1223
1224				break;
1225
1226			case FFI_TYPE_POINTER:
1227			case FFI_TYPE_UINT64:
1228			case FFI_TYPE_SINT64:
1229				srcGMarker = ALIGN(srcGMarker, 8);
1230				destMarker = ALIGN(destMarker, 8);
1231
1232				*(long long*)&outStruct[destMarker] =
1233					*(long long*)&inGPRs[srcGMarker];
1234				srcGMarker += 8;
1235				destMarker += 8;
1236
1237				break;
1238
1239			case FFI_TYPE_STRUCT:
1240				recurseCount++;
1241				ffi64_struct_to_ram_form(inType->elements[i], inGPRs,
1242					&srcGMarker, inFPRs, &srcFMarker, &fprsUsed,
1243					outStruct, &destMarker);
1244				recurseCount--;
1245				break;
1246
1247			default:
1248				FFI_ASSERT(0);	// unknown element type
1249				break;
1250		}
1251	}
1252
1253	srcGMarker = ALIGN(srcGMarker, inType->alignment);
1254
1255	// Take care of the special case for 16-byte structs, but not for
1256	// nested structs.
1257	if (recurseCount == 0 && srcGMarker == 16)
1258	{
1259		*(long double*)&outStruct[0] = *(long double*)&inGPRs[0];
1260		srcFMarker	= savedFMarker;
1261		fprsUsed	= savedFPRsUsed;
1262	}
1263
1264	if (ioGPRMarker)
1265		*ioGPRMarker = ALIGN(srcGMarker, 8);
1266
1267	if (ioFPRMarker)
1268		*ioFPRMarker = srcFMarker;
1269
1270	if (ioFPRsUsed)
1271		*ioFPRsUsed	= fprsUsed;
1272
1273	if (ioStructMarker)
1274		*ioStructMarker	= ALIGN(destMarker, 8);
1275}
1276
1277/*	ffi64_struct_to_reg_form
1278
1279	Copy a struct's elements into buffers that can be sliced into registers.
1280	Return the sizes of the output buffers in bytes. Pass NULL buffer pointers
1281	to calculate size only.
1282	outGPRs[0-7] == r3, outFPRs[0-7] == f1 ...
1283*/
1284void
1285ffi64_struct_to_reg_form(
1286	const ffi_type*	inType,
1287	const char*		inStruct,
1288	unsigned int*	ioStructMarker,
1289	unsigned int*	ioFPRsUsed,
1290	char*			outGPRs,	// caller-allocated
1291	unsigned int*	ioGPRSize,
1292	char*			outFPRs,	// caller-allocated
1293	unsigned int*	ioFPRSize)
1294{
1295	size_t			i;
1296	unsigned int	srcMarker		= 0;
1297	unsigned int	destGMarker		= 0;
1298	unsigned int	destFMarker		= 0;
1299	unsigned int	savedFMarker	= 0;
1300	unsigned int	fprsUsed		= 0;
1301	unsigned int	savedFPRsUsed	= 0;
1302
1303	static unsigned int	recurseCount	= 0;
1304
1305	if (ioStructMarker)
1306		srcMarker	= *ioStructMarker;
1307
1308	if (ioFPRsUsed)
1309	{
1310		fprsUsed		= *ioFPRsUsed;
1311		savedFPRsUsed	= fprsUsed;
1312	}
1313
1314	if (ioGPRSize)
1315		destGMarker	= *ioGPRSize;
1316
1317	if (ioFPRSize)
1318	{
1319		destFMarker		= *ioFPRSize;
1320		savedFMarker	= destFMarker;
1321	}
1322
1323	switch (inType->size)
1324	{
1325		case 1: case 2: case 4:
1326			destGMarker += 8 - inType->size;
1327			break;
1328
1329		default:
1330			break;
1331	}
1332
1333	for (i = 0; inType->elements[i] != NULL; i++)
1334	{
1335		switch (inType->elements[i]->type)
1336		{
1337			// Shadow floating-point types in GPRs for vararg and pre-ANSI
1338			// functions.
1339			case FFI_TYPE_FLOAT:
1340				// Nudge markers to next 4/8-byte boundary
1341				srcMarker = ALIGN(srcMarker, 4);
1342				destGMarker = ALIGN(destGMarker, 4);
1343				destFMarker = ALIGN(destFMarker, 8);
1344
1345				if (fprsUsed < NUM_FPR_ARG_REGISTERS)
1346				{
1347					if (outFPRs != NULL && inStruct != NULL)
1348						*(double*)&outFPRs[destFMarker] =
1349							(double)*(float*)&inStruct[srcMarker];
1350
1351					destFMarker += 8;
1352					fprsUsed++;
1353				}
1354
1355				if (outGPRs != NULL && inStruct != NULL)
1356					*(double*)&outGPRs[destGMarker] =
1357						(double)*(float*)&inStruct[srcMarker];
1358
1359				srcMarker += 4;
1360				destGMarker += 4;
1361
1362				// Skip to next GPR if next element won't fit and we're
1363				// not already at a register boundary.
1364				if (inType->elements[i + 1] != NULL && (srcMarker % 8))
1365				{
1366					if (!FFI_TYPE_1_BYTE(inType->elements[i + 1]->type) &&
1367						(!FFI_TYPE_2_BYTE(inType->elements[i + 1]->type) ||
1368						(ALIGN(destGMarker, 8) - destGMarker) < 2) &&
1369						(!FFI_TYPE_4_BYTE(inType->elements[i + 1]->type) ||
1370						(ALIGN(destGMarker, 8) - destGMarker) < 4))
1371						destGMarker	= ALIGN(destGMarker, 8);
1372				}
1373
1374				break;
1375
1376			case FFI_TYPE_DOUBLE:
1377				srcMarker = ALIGN(srcMarker, 8);
1378				destFMarker = ALIGN(destFMarker, 8);
1379
1380				if (fprsUsed < NUM_FPR_ARG_REGISTERS)
1381				{
1382					if (outFPRs != NULL && inStruct != NULL)
1383						*(double*)&outFPRs[destFMarker] =
1384							*(double*)&inStruct[srcMarker];
1385
1386					destFMarker += 8;
1387					fprsUsed++;
1388				}
1389
1390				if (outGPRs != NULL && inStruct != NULL)
1391					*(double*)&outGPRs[destGMarker] =
1392						*(double*)&inStruct[srcMarker];
1393
1394				srcMarker += 8;
1395
1396				// Skip next GPR
1397				destGMarker += 8;
1398				destGMarker = ALIGN(destGMarker, 8);
1399
1400				break;
1401
1402			case FFI_TYPE_LONGDOUBLE:
1403				srcMarker = ALIGN(srcMarker, 16);
1404
1405				if (fprsUsed < NUM_FPR_ARG_REGISTERS)
1406				{
1407					destFMarker = ALIGN(destFMarker, 8);
1408					destGMarker = ALIGN(destGMarker, 8);
1409
1410					if (outFPRs != NULL && inStruct != NULL)
1411						*(long double*)&outFPRs[destFMarker] =
1412							*(long double*)&inStruct[srcMarker];
1413
1414					if (outGPRs != NULL && inStruct != NULL)
1415						*(long double*)&outGPRs[destGMarker] =
1416							*(long double*)&inStruct[srcMarker];
1417
1418					destFMarker += 16;
1419					fprsUsed += 2;
1420				}
1421				else
1422				{
1423				 	destGMarker = ALIGN(destGMarker, 16);
1424
1425					 if (outGPRs != NULL && inStruct != NULL)
1426						*(long double*)&outGPRs[destGMarker] =
1427							*(long double*)&inStruct[srcMarker];
1428				}
1429
1430				srcMarker += 16;
1431				destGMarker += 16;	// Skip next 2 GPRs
1432				destGMarker = ALIGN(destGMarker, 8);	// was 16
1433
1434				break;
1435
1436			case FFI_TYPE_UINT8:
1437			case FFI_TYPE_SINT8:
1438				if (inType->alignment == 1)	// bytes only
1439				{
1440					if (inType->size == 1)
1441					{
1442						if (outGPRs != NULL && inStruct != NULL)
1443							outGPRs[destGMarker] = inStruct[srcMarker];
1444
1445						srcMarker++;
1446						destGMarker++;
1447					}
1448					else if (inType->size == 2)
1449					{
1450						if (outGPRs != NULL && inStruct != NULL)
1451						{
1452							outGPRs[destGMarker] = inStruct[srcMarker];
1453							outGPRs[destGMarker + 1] = inStruct[srcMarker + 1];
1454						}
1455
1456						srcMarker += 2;
1457						destGMarker += 2;
1458
1459						i++;
1460					}
1461					else
1462					{
1463						if (outGPRs != NULL && inStruct != NULL)
1464						{
1465							// Avoid memcpy for small chunks.
1466							if (inType->size <= sizeof(long))
1467								*(long*)&outGPRs[destGMarker] =
1468									*(long*)&inStruct[srcMarker];
1469							else
1470								memcpy(&outGPRs[destGMarker],
1471									&inStruct[srcMarker], inType->size);
1472						}
1473
1474						srcMarker += inType->size;
1475						destGMarker += inType->size;
1476						i += inType->size - 1;
1477					}
1478				}
1479				else	// bytes and other stuff
1480				{
1481					if (outGPRs != NULL && inStruct != NULL)
1482						outGPRs[destGMarker] = inStruct[srcMarker];
1483
1484					srcMarker++;
1485					destGMarker++;
1486
1487					// Skip to next GPR if next element won't fit and we're
1488					// not already at a register boundary.
1489					if (inType->elements[i + 1] != NULL && (destGMarker % 8))
1490					{
1491						if (!FFI_TYPE_1_BYTE(inType->elements[i + 1]->type) &&
1492							(!FFI_TYPE_2_BYTE(inType->elements[i + 1]->type) ||
1493							(ALIGN(destGMarker, 8) - destGMarker) < 2) &&
1494							(!FFI_TYPE_4_BYTE(inType->elements[i + 1]->type) ||
1495							(ALIGN(destGMarker, 8) - destGMarker) < 4))
1496							destGMarker	= ALIGN(destGMarker, inType->alignment);	// was 8
1497					}
1498				}
1499
1500				break;
1501
1502			case FFI_TYPE_UINT16:
1503			case FFI_TYPE_SINT16:
1504				srcMarker = ALIGN(srcMarker, 2);
1505				destGMarker = ALIGN(destGMarker, 2);
1506
1507				if (outGPRs != NULL && inStruct != NULL)
1508					*(short*)&outGPRs[destGMarker] =
1509						*(short*)&inStruct[srcMarker];
1510
1511				srcMarker += 2;
1512				destGMarker += 2;
1513
1514				if (inType->elements[i + 1] == NULL)
1515					destGMarker	= ALIGN(destGMarker, inType->alignment);
1516
1517				break;
1518
1519			case FFI_TYPE_INT:
1520			case FFI_TYPE_UINT32:
1521			case FFI_TYPE_SINT32:
1522				srcMarker = ALIGN(srcMarker, 4);
1523				destGMarker = ALIGN(destGMarker, 4);
1524
1525				if (outGPRs != NULL && inStruct != NULL)
1526					*(int*)&outGPRs[destGMarker] =
1527						*(int*)&inStruct[srcMarker];
1528
1529				srcMarker += 4;
1530				destGMarker += 4;
1531
1532				break;
1533
1534			case FFI_TYPE_POINTER:
1535			case FFI_TYPE_UINT64:
1536			case FFI_TYPE_SINT64:
1537				srcMarker = ALIGN(srcMarker, 8);
1538				destGMarker = ALIGN(destGMarker, 8);
1539
1540				if (outGPRs != NULL && inStruct != NULL)
1541					*(long long*)&outGPRs[destGMarker] =
1542						*(long long*)&inStruct[srcMarker];
1543
1544				srcMarker += 8;
1545				destGMarker += 8;
1546
1547				if (inType->elements[i + 1] == NULL)
1548					destGMarker	= ALIGN(destGMarker, inType->alignment);
1549
1550				break;
1551
1552			case FFI_TYPE_STRUCT:
1553				recurseCount++;
1554				ffi64_struct_to_reg_form(inType->elements[i],
1555					inStruct, &srcMarker, &fprsUsed, outGPRs,
1556					&destGMarker, outFPRs, &destFMarker);
1557				recurseCount--;
1558				break;
1559
1560			default:
1561				FFI_ASSERT(0);
1562				break;
1563		}
1564	}
1565
1566	destGMarker	= ALIGN(destGMarker, inType->alignment);
1567
1568	// Take care of the special case for 16-byte structs, but not for
1569	// nested structs.
1570	if (recurseCount == 0 && destGMarker == 16)
1571	{
1572		if (outGPRs != NULL && inStruct != NULL)
1573			*(long double*)&outGPRs[0] = *(long double*)&inStruct[0];
1574
1575		destFMarker	= savedFMarker;
1576		fprsUsed	= savedFPRsUsed;
1577	}
1578
1579	if (ioStructMarker)
1580		*ioStructMarker	= ALIGN(srcMarker, 8);
1581
1582	if (ioFPRsUsed)
1583		*ioFPRsUsed	= fprsUsed;
1584
1585	if (ioGPRSize)
1586		*ioGPRSize = ALIGN(destGMarker, 8);
1587
1588	if (ioFPRSize)
1589		*ioFPRSize = ALIGN(destFMarker, 8);
1590}
1591
1592/*	ffi64_stret_needs_ptr
1593
1594	Determine whether a returned struct needs a pointer in r3 or can fit
1595	in registers.
1596*/
1597
1598bool
1599ffi64_stret_needs_ptr(
1600	const ffi_type*	inType,
1601	unsigned short*	ioGPRCount,
1602	unsigned short*	ioFPRCount)
1603{
1604	// Obvious case first- struct is larger than combined FPR size.
1605	if (inType->size > 14 * 8)
1606		return true;
1607
1608	// Now the struct can physically fit in registers, determine if it
1609	// also fits logically.
1610	bool			needsPtr	= false;
1611	unsigned short	gprsUsed	= 0;
1612	unsigned short	fprsUsed	= 0;
1613	size_t			i;
1614
1615	if (ioGPRCount)
1616		gprsUsed = *ioGPRCount;
1617
1618	if (ioFPRCount)
1619		fprsUsed = *ioFPRCount;
1620
1621	for (i = 0; inType->elements[i] != NULL && !needsPtr; i++)
1622	{
1623		switch (inType->elements[i]->type)
1624		{
1625			case FFI_TYPE_FLOAT:
1626			case FFI_TYPE_DOUBLE:
1627				gprsUsed++;
1628				fprsUsed++;
1629
1630				if (fprsUsed > 13)
1631					needsPtr = true;
1632
1633				break;
1634
1635			case FFI_TYPE_LONGDOUBLE:
1636				gprsUsed += 2;
1637				fprsUsed += 2;
1638
1639				if (fprsUsed > 14)
1640					needsPtr = true;
1641
1642				break;
1643
1644			case FFI_TYPE_UINT8:
1645			case FFI_TYPE_SINT8:
1646			{
1647				gprsUsed++;
1648
1649				if (gprsUsed > 8)
1650				{
1651					needsPtr = true;
1652					break;
1653				}
1654
1655				if (inType->elements[i + 1] == NULL)	// last byte in the struct
1656					break;
1657
1658				// Count possible contiguous bytes ahead, up to 8.
1659				unsigned short j;
1660
1661				for (j = 1; j < 8; j++)
1662				{
1663					if (inType->elements[i + j] == NULL ||
1664						!FFI_TYPE_1_BYTE(inType->elements[i + j]->type))
1665						break;
1666				}
1667
1668				i += j - 1;	// allow for i++ before the test condition
1669
1670				break;
1671			}
1672
1673			case FFI_TYPE_UINT16:
1674			case FFI_TYPE_SINT16:
1675			case FFI_TYPE_INT:
1676			case FFI_TYPE_UINT32:
1677			case FFI_TYPE_SINT32:
1678			case FFI_TYPE_POINTER:
1679			case FFI_TYPE_UINT64:
1680			case FFI_TYPE_SINT64:
1681				gprsUsed++;
1682
1683				if (gprsUsed > 8)
1684					needsPtr = true;
1685
1686				break;
1687
1688			case FFI_TYPE_STRUCT:
1689				needsPtr = ffi64_stret_needs_ptr(
1690					inType->elements[i], &gprsUsed, &fprsUsed);
1691
1692				break;
1693
1694			default:
1695				FFI_ASSERT(0);
1696				break;
1697		}
1698	}
1699
1700	if (ioGPRCount)
1701		*ioGPRCount = gprsUsed;
1702
1703	if (ioFPRCount)
1704		*ioFPRCount = fprsUsed;
1705
1706	return needsPtr;
1707}
1708
1709/*	ffi64_data_size
1710
1711	Calculate the size in bytes of an ffi type.
1712*/
1713
1714unsigned int
1715ffi64_data_size(
1716	const ffi_type*	inType)
1717{
1718	unsigned int	size = 0;
1719
1720	switch (inType->type)
1721	{
1722		case FFI_TYPE_UINT8:
1723		case FFI_TYPE_SINT8:
1724			size = 1;
1725			break;
1726
1727		case FFI_TYPE_UINT16:
1728		case FFI_TYPE_SINT16:
1729			size = 2;
1730			break;
1731
1732		case FFI_TYPE_INT:
1733		case FFI_TYPE_UINT32:
1734		case FFI_TYPE_SINT32:
1735		case FFI_TYPE_FLOAT:
1736			size = 4;
1737			break;
1738
1739		case FFI_TYPE_POINTER:
1740		case FFI_TYPE_UINT64:
1741		case FFI_TYPE_SINT64:
1742		case FFI_TYPE_DOUBLE:
1743			size = 8;
1744			break;
1745
1746		case FFI_TYPE_LONGDOUBLE:
1747			size = 16;
1748			break;
1749
1750		case FFI_TYPE_STRUCT:
1751			ffi64_struct_to_reg_form(
1752				inType, NULL, NULL, NULL, NULL, &size, NULL, NULL);
1753			break;
1754
1755		case FFI_TYPE_VOID:
1756			break;
1757
1758		default:
1759			FFI_ASSERT(0);
1760			break;
1761	}
1762
1763	return size;
1764}
1765
1766#endif	/*	defined(__ppc64__)	*/
1767#endif	/* __ppc__ || __ppc64__ */