1#if defined(__ppc__) || defined(__ppc64__)
2
3/* -----------------------------------------------------------------------
4   ffi.c - Copyright (c) 1998 Geoffrey Keating
5
6   PowerPC Foreign Function Interface
7
8   Darwin ABI support (c) 2001 John Hornkvist
9   AIX ABI support (c) 2002 Free Software Foundation, Inc.
10
11   Permission is hereby granted, free of charge, to any person obtaining
12   a copy of this software and associated documentation files (the
13   ``Software''), to deal in the Software without restriction, including
14   without limitation the rights to use, copy, modify, merge, publish,
15   distribute, sublicense, and/or sell copies of the Software, and to
16   permit persons to whom the Software is furnished to do so, subject to
17   the following conditions:
18
19   The above copyright notice and this permission notice shall be included
20   in all copies or substantial portions of the Software.
21
22   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
23   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
26   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28   OTHER DEALINGS IN THE SOFTWARE.
29   ----------------------------------------------------------------------- */
30
31#include <ffi.h>
32#include <ffi_common.h>
33
34#include <stdbool.h>
35#include <stdio.h>
36#include <stdlib.h>
37#include <ppc-darwin.h>
38#include <architecture/ppc/mode_independent_asm.h>
39
40#if 0 /* branch below doesn't work with 10.4u SDK */
41
42#if defined(POWERPC_DARWIN)
43#include <libkern/OSCacheControl.h>	// for sys_icache_invalidate()
44#endif
45
46#else
47
48#pragma weak sys_icache_invalidate
49extern void sys_icache_invalidate(void *start, size_t len);
50
51#endif
52
53extern void ffi_closure_ASM(void);
54
55// The layout of a function descriptor.  A C function pointer really
56// points to one of these.
57typedef struct aix_fd_struct {
58  void*	code_pointer;
59  void*	toc;
60} aix_fd;
61
62/* ffi_prep_args is called by the assembly routine once stack space
63   has been allocated for the function's arguments.
64
65   The stack layout we want looks like this:
66
67   |   Return address from ffi_call_DARWIN      |	higher addresses
68   |--------------------------------------------|
69   |   Previous backchain pointer      4/8      |	    stack pointer here
70   |--------------------------------------------|-\ <<< on entry to
71   |   Saved r28-r31                 (4/8)*4    | |	    ffi_call_DARWIN
72   |--------------------------------------------| |
73   |   Parameters      (at least 8*(4/8)=32/64) | | (176) +112 - +288
74   |--------------------------------------------| |
75   |   Space for GPR2                  4/8      | |
76   |--------------------------------------------| |	stack	|
77   |   Reserved						 (4/8)*2    | |	grows	|
78   |--------------------------------------------| |	down	V
79   |   Space for callee's LR           4/8      | |
80   |--------------------------------------------| |	lower addresses
81   |   Saved CR                        4/8      | |
82   |--------------------------------------------| |     stack pointer here
83   |   Current backchain pointer       4/8      | |     during
84   |--------------------------------------------|-/ <<< ffi_call_DARWIN
85
86	Note: ppc64 CR is saved in the low word of a long on the stack.
87*/
88
89/*@-exportheader@*/
90void
91ffi_prep_args(
92	extended_cif*	inEcif,
93	unsigned *const	stack)
94/*@=exportheader@*/
95{
96	/*	Copy the ecif to a local var so we can trample the arg.
97		BC note: test this with GP later for possible problems...	*/
98	volatile extended_cif*	ecif	= inEcif;
99
100	const unsigned bytes	= ecif->cif->bytes;
101	const unsigned flags	= ecif->cif->flags;
102
103	/*	Cast the stack arg from int* to long*. sizeof(long) == 4 in 32-bit mode
104		and 8 in 64-bit mode.	*/
105	unsigned long *const longStack	= (unsigned long *const)stack;
106
107	/* 'stacktop' points at the previous backchain pointer.	*/
108#if defined(__ppc64__)
109	//	In ppc-darwin.s, an extra 96 bytes is reserved for the linkage area,
110	//	saved registers, and an extra FPR.
111	unsigned long *const stacktop	=
112		(unsigned long *)(unsigned long)((char*)longStack + bytes + 96);
113#elif defined(__ppc__)
114	unsigned long *const stacktop	= longStack + (bytes / sizeof(long));
115#else
116#error undefined architecture
117#endif
118
119	/* 'fpr_base' points at the space for fpr1, and grows upwards as
120		we use FPR registers.  */
121	double*		fpr_base = (double*)(stacktop - ASM_NEEDS_REGISTERS) -
122		NUM_FPR_ARG_REGISTERS;
123
124#if defined(__ppc64__)
125	//	64-bit saves an extra register, and uses an extra FPR. Knock fpr_base
126	//	down a couple pegs.
127	fpr_base -= 2;
128#endif
129
130	unsigned int	fparg_count = 0;
131
132	/* 'next_arg' grows up as we put parameters in it.  */
133	unsigned long*	next_arg = longStack + 6; /* 6 reserved positions.  */
134
135	int				i;
136	double			double_tmp;
137	void**			p_argv = ecif->avalue;
138	unsigned long	gprvalue;
139	ffi_type**		ptr = ecif->cif->arg_types;
140
141	/* Check that everything starts aligned properly.  */
142	FFI_ASSERT(stack == SF_ROUND(stack));
143	FFI_ASSERT(stacktop == SF_ROUND(stacktop));
144	FFI_ASSERT(bytes == SF_ROUND(bytes));
145
146	/*	Deal with return values that are actually pass-by-reference.
147		Rule:
148		Return values are referenced by r3, so r4 is the first parameter.  */
149
150	if (flags & FLAG_RETVAL_REFERENCE)
151		*next_arg++ = (unsigned long)(char*)ecif->rvalue;
152
153	/* Now for the arguments.  */
154	for (i = ecif->cif->nargs; i > 0; i--, ptr++, p_argv++)
155    {
156		switch ((*ptr)->type)
157		{
158			/*	If a floating-point parameter appears before all of the general-
159				purpose registers are filled, the corresponding GPRs that match
160				the size of the floating-point parameter are shadowed for the
161				benefit of vararg and pre-ANSI functions.	*/
162			case FFI_TYPE_FLOAT:
163				double_tmp = *(float*)*p_argv;
164
165				if (fparg_count < NUM_FPR_ARG_REGISTERS)
166					*fpr_base++ = double_tmp;
167
168				*(double*)next_arg = double_tmp;
169
170				next_arg++;
171				fparg_count++;
172				FFI_ASSERT(flags & FLAG_FP_ARGUMENTS);
173
174				break;
175
176			case FFI_TYPE_DOUBLE:
177				double_tmp = *(double*)*p_argv;
178
179				if (fparg_count < NUM_FPR_ARG_REGISTERS)
180					*fpr_base++ = double_tmp;
181
182				*(double*)next_arg = double_tmp;
183
184				next_arg += MODE_CHOICE(2,1);
185				fparg_count++;
186				FFI_ASSERT(flags & FLAG_FP_ARGUMENTS);
187
188				break;
189
190#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
191			case FFI_TYPE_LONGDOUBLE:
192#if defined(__ppc64__)
193				if (fparg_count < NUM_FPR_ARG_REGISTERS)
194					*(long double*)fpr_base	= *(long double*)*p_argv;
195#elif defined(__ppc__)
196				if (fparg_count < NUM_FPR_ARG_REGISTERS - 1)
197					*(long double*)fpr_base	= *(long double*)*p_argv;
198				else if (fparg_count == NUM_FPR_ARG_REGISTERS - 1)
199					*(double*)fpr_base	= *(double*)*p_argv;
200#else
201#error undefined architecture
202#endif
203
204				*(long double*)next_arg	= *(long double*)*p_argv;
205				fparg_count += 2;
206				fpr_base += 2;
207				next_arg += MODE_CHOICE(4,2);
208				FFI_ASSERT(flags & FLAG_FP_ARGUMENTS);
209
210				break;
211#endif	//	FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
212
213			case FFI_TYPE_UINT64:
214			case FFI_TYPE_SINT64:
215#if defined(__ppc64__)
216				gprvalue = *(long long*)*p_argv;
217				goto putgpr;
218#elif defined(__ppc__)
219				*(long long*)next_arg = *(long long*)*p_argv;
220				next_arg += 2;
221				break;
222#else
223#error undefined architecture
224#endif
225
226			case FFI_TYPE_POINTER:
227				gprvalue = *(unsigned long*)*p_argv;
228				goto putgpr;
229
230			case FFI_TYPE_UINT8:
231				gprvalue = *(unsigned char*)*p_argv;
232				goto putgpr;
233
234			case FFI_TYPE_SINT8:
235				gprvalue = *(signed char*)*p_argv;
236				goto putgpr;
237
238			case FFI_TYPE_UINT16:
239				gprvalue = *(unsigned short*)*p_argv;
240				goto putgpr;
241
242			case FFI_TYPE_SINT16:
243				gprvalue = *(signed short*)*p_argv;
244				goto putgpr;
245
246			case FFI_TYPE_STRUCT:
247			{
248#if defined(__ppc64__)
249				unsigned int	gprSize = 0;
250				unsigned int	fprSize = 0;
251
252				ffi64_struct_to_reg_form(*ptr, (char*)*p_argv, NULL, &fparg_count,
253					(char*)next_arg, &gprSize, (char*)fpr_base, &fprSize);
254				next_arg += gprSize / sizeof(long);
255				fpr_base += fprSize / sizeof(double);
256
257#elif defined(__ppc__)
258				char*	dest_cpy = (char*)next_arg;
259
260			/*	Structures that match the basic modes (QI 1 byte, HI 2 bytes,
261				SI 4 bytes) are aligned as if they were those modes.
262				Structures with 3 byte in size are padded upwards.  */
263				unsigned size_al = (*ptr)->size;
264
265			/*	If the first member of the struct is a double, then align
266				the struct to double-word.  */
267				if ((*ptr)->elements[0]->type == FFI_TYPE_DOUBLE)
268					size_al = ALIGN((*ptr)->size, 8);
269
270				if (ecif->cif->abi == FFI_DARWIN)
271				{
272					if (size_al < 3)
273						dest_cpy += 4 - size_al;
274				}
275
276				memcpy((char*)dest_cpy, (char*)*p_argv, size_al);
277				next_arg += (size_al + 3) / 4;
278#else
279#error undefined architecture
280#endif
281				break;
282			}
283
284			case FFI_TYPE_INT:
285			case FFI_TYPE_UINT32:
286			case FFI_TYPE_SINT32:
287				gprvalue = *(unsigned*)*p_argv;
288
289putgpr:
290				*next_arg++ = gprvalue;
291				break;
292
293			default:
294				break;
295		}
296	}
297
298  /* Check that we didn't overrun the stack...  */
299  //FFI_ASSERT(gpr_base <= stacktop - ASM_NEEDS_REGISTERS);
300  //FFI_ASSERT((unsigned *)fpr_base
301  //	     <= stacktop - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS);
302  //FFI_ASSERT(flags & FLAG_4_GPR_ARGUMENTS || intarg_count <= 4);
303}
304
305#if defined(__ppc64__)
306
307bool
308ffi64_struct_contains_fp(
309	const ffi_type*	inType)
310{
311	bool			containsFP	= false;
312	unsigned int	i;
313
314	for (i = 0; inType->elements[i] != NULL && !containsFP; i++)
315	{
316		if (inType->elements[i]->type == FFI_TYPE_FLOAT		||
317			inType->elements[i]->type == FFI_TYPE_DOUBLE	||
318			inType->elements[i]->type == FFI_TYPE_LONGDOUBLE)
319			containsFP = true;
320		else if (inType->elements[i]->type == FFI_TYPE_STRUCT)
321			containsFP = ffi64_struct_contains_fp(inType->elements[i]);
322	}
323
324	return containsFP;
325}
326
327#endif	// defined(__ppc64__)
328
329/* Perform machine dependent cif processing.  */
330ffi_status
331ffi_prep_cif_machdep(
332	ffi_cif*	cif)
333{
334	/* All this is for the DARWIN ABI.  */
335	int				i;
336	ffi_type**		ptr;
337	int				intarg_count = 0;
338	int				fparg_count = 0;
339	unsigned int	flags = 0;
340	unsigned int	size_al = 0;
341
342	/*	All the machine-independent calculation of cif->bytes will be wrong.
343		Redo the calculation for DARWIN.  */
344
345	/*	Space for the frame pointer, callee's LR, CR, etc, and for
346		the asm's temp regs.  */
347	unsigned int	bytes = (6 + ASM_NEEDS_REGISTERS) * sizeof(long);
348
349	/*	Return value handling.  The rules are as follows:
350		- 32-bit (or less) integer values are returned in gpr3;
351		- Structures of size <= 4 bytes also returned in gpr3;
352		- 64-bit integer values and structures between 5 and 8 bytes are
353			returned in gpr3 and gpr4;
354		- Single/double FP values are returned in fpr1;
355		- Long double FP (if not equivalent to double) values are returned in
356			fpr1 and fpr2;
357		- Larger structures values are allocated space and a pointer is passed
358			as the first argument.  */
359	switch (cif->rtype->type)
360	{
361#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
362		case FFI_TYPE_LONGDOUBLE:
363			flags |= FLAG_RETURNS_128BITS;
364			flags |= FLAG_RETURNS_FP;
365			break;
366#endif	// FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
367
368		case FFI_TYPE_DOUBLE:
369			flags |= FLAG_RETURNS_64BITS;
370			/* Fall through.  */
371		case FFI_TYPE_FLOAT:
372			flags |= FLAG_RETURNS_FP;
373			break;
374
375#if defined(__ppc64__)
376		case FFI_TYPE_POINTER:
377#endif
378		case FFI_TYPE_UINT64:
379		case FFI_TYPE_SINT64:
380			flags |= FLAG_RETURNS_64BITS;
381			break;
382
383		case FFI_TYPE_STRUCT:
384		{
385#if defined(__ppc64__)
386
387			if (ffi64_stret_needs_ptr(cif->rtype, NULL, NULL))
388			{
389				flags |= FLAG_RETVAL_REFERENCE;
390				flags |= FLAG_RETURNS_NOTHING;
391				intarg_count++;
392			}
393			else
394			{
395				flags |= FLAG_RETURNS_STRUCT;
396
397				if (ffi64_struct_contains_fp(cif->rtype))
398					flags |= FLAG_STRUCT_CONTAINS_FP;
399			}
400
401#elif defined(__ppc__)
402
403			flags |= FLAG_RETVAL_REFERENCE;
404			flags |= FLAG_RETURNS_NOTHING;
405			intarg_count++;
406
407#else
408#error undefined architecture
409#endif
410			break;
411		}
412
413		case FFI_TYPE_VOID:
414			flags |= FLAG_RETURNS_NOTHING;
415			break;
416
417		default:
418			/* Returns 32-bit integer, or similar.  Nothing to do here.  */
419			break;
420	}
421
422	/*	The first NUM_GPR_ARG_REGISTERS words of integer arguments, and the
423		first NUM_FPR_ARG_REGISTERS fp arguments, go in registers; the rest
424		goes on the stack.  Structures are passed as a pointer to a copy of
425		the structure. Stuff on the stack needs to keep proper alignment.  */
426	for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
427	{
428		switch ((*ptr)->type)
429		{
430			case FFI_TYPE_FLOAT:
431			case FFI_TYPE_DOUBLE:
432				fparg_count++;
433				/*	If this FP arg is going on the stack, it must be
434					8-byte-aligned.  */
435				if (fparg_count > NUM_FPR_ARG_REGISTERS
436					&& intarg_count % 2 != 0)
437					intarg_count++;
438				break;
439
440#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
441			case FFI_TYPE_LONGDOUBLE:
442				fparg_count += 2;
443				/*	If this FP arg is going on the stack, it must be
444					8-byte-aligned.  */
445
446				if (
447#if defined(__ppc64__)
448					fparg_count > NUM_FPR_ARG_REGISTERS + 1
449#elif defined(__ppc__)
450					fparg_count > NUM_FPR_ARG_REGISTERS
451#else
452#error undefined architecture
453#endif
454					&& intarg_count % 2 != 0)
455					intarg_count++;
456
457				intarg_count += 2;
458				break;
459#endif	// FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
460
461			case FFI_TYPE_UINT64:
462			case FFI_TYPE_SINT64:
463				/*	'long long' arguments are passed as two words, but
464					either both words must fit in registers or both go
465					on the stack.  If they go on the stack, they must
466					be 8-byte-aligned.  */
467				if (intarg_count == NUM_GPR_ARG_REGISTERS - 1
468					|| (intarg_count >= NUM_GPR_ARG_REGISTERS
469					&& intarg_count % 2 != 0))
470					intarg_count++;
471
472				intarg_count += MODE_CHOICE(2,1);
473
474				break;
475
476			case FFI_TYPE_STRUCT:
477				size_al = (*ptr)->size;
478				/*	If the first member of the struct is a double, then align
479					the struct to double-word.  */
480				if ((*ptr)->elements[0]->type == FFI_TYPE_DOUBLE)
481					size_al = ALIGN((*ptr)->size, 8);
482
483#if defined(__ppc64__)
484				// Look for FP struct members.
485				unsigned int	j;
486
487				for (j = 0; (*ptr)->elements[j] != NULL; j++)
488				{
489					if ((*ptr)->elements[j]->type == FFI_TYPE_FLOAT	||
490						(*ptr)->elements[j]->type == FFI_TYPE_DOUBLE)
491					{
492						fparg_count++;
493
494						if (fparg_count > NUM_FPR_ARG_REGISTERS)
495							intarg_count++;
496					}
497					else if ((*ptr)->elements[j]->type == FFI_TYPE_LONGDOUBLE)
498					{
499						fparg_count += 2;
500
501						if (fparg_count > NUM_FPR_ARG_REGISTERS + 1)
502							intarg_count += 2;
503					}
504					else
505						intarg_count++;
506				}
507#elif defined(__ppc__)
508				intarg_count += (size_al + 3) / 4;
509#else
510#error undefined architecture
511#endif
512
513				break;
514
515			default:
516				/*	Everything else is passed as a 4/8-byte word in a GPR, either
517					the object itself or a pointer to it.  */
518				intarg_count++;
519				break;
520		}
521	}
522
523	/* Space for the FPR registers, if needed.  */
524	if (fparg_count != 0)
525	{
526		flags |= FLAG_FP_ARGUMENTS;
527#if defined(__ppc64__)
528		bytes += (NUM_FPR_ARG_REGISTERS + 1) * sizeof(double);
529#elif defined(__ppc__)
530		bytes += NUM_FPR_ARG_REGISTERS * sizeof(double);
531#else
532#error undefined architecture
533#endif
534	}
535
536	/* Stack space.  */
537#if defined(__ppc64__)
538	if ((intarg_count + fparg_count) > NUM_GPR_ARG_REGISTERS)
539		bytes += (intarg_count + fparg_count) * sizeof(long);
540#elif defined(__ppc__)
541	if ((intarg_count + 2 * fparg_count) > NUM_GPR_ARG_REGISTERS)
542		bytes += (intarg_count + 2 * fparg_count) * sizeof(long);
543#else
544#error undefined architecture
545#endif
546	else
547		bytes += NUM_GPR_ARG_REGISTERS * sizeof(long);
548
549	/* The stack space allocated needs to be a multiple of 16/32 bytes.  */
550	bytes = SF_ROUND(bytes);
551
552	cif->flags = flags;
553	cif->bytes = bytes;
554
555	return FFI_OK;
556}
557
558/*@-declundef@*/
559/*@-exportheader@*/
560extern void
561ffi_call_AIX(
562/*@out@*/	extended_cif*,
563			unsigned,
564			unsigned,
565/*@out@*/	unsigned*,
566			void (*fn)(void),
567			void (*fn2)(extended_cif*, unsigned *const));
568
569extern void
570ffi_call_DARWIN(
571/*@out@*/	extended_cif*,
572			unsigned long,
573			unsigned,
574/*@out@*/	unsigned*,
575			void (*fn)(void),
576			void (*fn2)(extended_cif*, unsigned *const));
577/*@=declundef@*/
578/*@=exportheader@*/
579
580void
581ffi_call(
582/*@dependent@*/	ffi_cif*	cif,
583				void		(*fn)(void),
584/*@out@*/		void*		rvalue,
585/*@dependent@*/	void**		avalue)
586{
587	extended_cif ecif;
588
589	ecif.cif = cif;
590	ecif.avalue = avalue;
591
592	/*	If the return value is a struct and we don't have a return
593		value address then we need to make one.  */
594	if ((rvalue == NULL) &&
595		(cif->rtype->type == FFI_TYPE_STRUCT))
596	{
597		/*@-sysunrecog@*/
598		ecif.rvalue = alloca(cif->rtype->size);
599		/*@=sysunrecog@*/
600	}
601	else
602		ecif.rvalue = rvalue;
603
604	switch (cif->abi)
605	{
606		case FFI_AIX:
607			/*@-usedef@*/
608			ffi_call_AIX(&ecif, -cif->bytes,
609				cif->flags, ecif.rvalue, fn, ffi_prep_args);
610			/*@=usedef@*/
611			break;
612
613		case FFI_DARWIN:
614			/*@-usedef@*/
615			ffi_call_DARWIN(&ecif, -(long)cif->bytes,
616				cif->flags, ecif.rvalue, fn, ffi_prep_args);
617			/*@=usedef@*/
618			break;
619
620		default:
621			FFI_ASSERT(0);
622			break;
623    }
624}
625
626/* here I'd like to add the stack frame layout we use in darwin_closure.S
627   and aix_clsoure.S
628
629   SP previous -> +---------------------------------------+ <--- child frame
630		  | back chain to caller 4                |
631		  +---------------------------------------+ 4
632		  | saved CR 4                            |
633		  +---------------------------------------+ 8
634		  | saved LR 4                            |
635		  +---------------------------------------+ 12
636		  | reserved for compilers 4              |
637		  +---------------------------------------+ 16
638		  | reserved for binders 4                |
639		  +---------------------------------------+ 20
640		  | saved TOC pointer 4                   |
641		  +---------------------------------------+ 24
642		  | always reserved 8*4=32 (previous GPRs)|
643		  | according to the linkage convention   |
644		  | from AIX                              |
645		  +---------------------------------------+ 56
646		  | our FPR area 13*8=104                 |
647		  | f1                                    |
648		  | .                                     |
649		  | f13                                   |
650		  +---------------------------------------+ 160
651		  | result area 8                         |
652		  +---------------------------------------+ 168
653		  | alignement to the next multiple of 16 |
654SP current -->    +---------------------------------------+ 176 <- parent frame
655		  | back chain to caller 4                |
656		  +---------------------------------------+ 180
657		  | saved CR 4                            |
658		  +---------------------------------------+ 184
659		  | saved LR 4                            |
660		  +---------------------------------------+ 188
661		  | reserved for compilers 4              |
662		  +---------------------------------------+ 192
663		  | reserved for binders 4                |
664		  +---------------------------------------+ 196
665		  | saved TOC pointer 4                   |
666		  +---------------------------------------+ 200
667		  | always reserved 8*4=32  we store our  |
668		  | GPRs here                             |
669		  | r3                                    |
670		  | .                                     |
671		  | r10                                   |
672		  +---------------------------------------+ 232
673		  | overflow part                         |
674		  +---------------------------------------+ xxx
675		  | ????                                  |
676		  +---------------------------------------+ xxx
677*/
678
679#if !defined(POWERPC_DARWIN)
680
681#define MIN_LINE_SIZE 32
682
683static void
684flush_icache(
685	char*	addr)
686{
687#ifndef _AIX
688	__asm__ volatile (
689		"dcbf 0,%0\n"
690		"sync\n"
691		"icbi 0,%0\n"
692		"sync\n"
693		"isync"
694		: : "r" (addr) : "memory");
695#endif
696}
697
698static void
699flush_range(
700	char*	addr,
701	int		size)
702{
703	int i;
704
705	for (i = 0; i < size; i += MIN_LINE_SIZE)
706		flush_icache(addr + i);
707
708	flush_icache(addr + size - 1);
709}
710
711#endif	// !defined(POWERPC_DARWIN)
712
713ffi_status
714ffi_prep_closure(
715	ffi_closure*	closure,
716	ffi_cif*		cif,
717	void			(*fun)(ffi_cif*, void*, void**, void*),
718	void*			user_data)
719{
720	switch (cif->abi)
721	{
722		case FFI_DARWIN:
723		{
724			FFI_ASSERT (cif->abi == FFI_DARWIN);
725
726			unsigned int*	tramp = (unsigned int*)&closure->tramp[0];
727
728#if defined(__ppc64__)
729			tramp[0] = 0x7c0802a6;	//	mflr	r0
730			tramp[1] = 0x429f0005;	//	bcl		20,31,+0x8
731			tramp[2] = 0x7d6802a6;	//	mflr	r11
732			tramp[3] = 0x7c0803a6;	//	mtlr	r0
733			tramp[4] = 0xe98b0018;	//	ld		r12,24(r11)
734			tramp[5] = 0x7d8903a6;	//	mtctr	r12
735			tramp[6] = 0xe96b0020;	//	ld		r11,32(r11)
736			tramp[7] = 0x4e800420;	//	bctr
737			*(unsigned long*)&tramp[8] = (unsigned long)ffi_closure_ASM;
738			*(unsigned long*)&tramp[10] = (unsigned long)closure;
739#elif defined(__ppc__)
740			tramp[0] = 0x7c0802a6;	//	mflr	r0
741			tramp[1] = 0x429f0005;	//	bcl		20,31,+0x8
742			tramp[2] = 0x7d6802a6;	//	mflr	r11
743			tramp[3] = 0x7c0803a6;	//	mtlr	r0
744			tramp[4] = 0x818b0018;	//	lwz		r12,24(r11)
745			tramp[5] = 0x7d8903a6;	//	mtctr	r12
746			tramp[6] = 0x816b001c;	//	lwz		r11,28(r11)
747			tramp[7] = 0x4e800420;	//	bctr
748			tramp[8] = (unsigned long)ffi_closure_ASM;
749			tramp[9] = (unsigned long)closure;
750#else
751#error undefined architecture
752#endif
753
754			closure->cif = cif;
755			closure->fun = fun;
756			closure->user_data = user_data;
757
758			// Flush the icache. Only necessary on Darwin.
759#if defined(POWERPC_DARWIN)
760			sys_icache_invalidate(closure->tramp, FFI_TRAMPOLINE_SIZE);
761#else
762			flush_range(closure->tramp, FFI_TRAMPOLINE_SIZE);
763#endif
764
765			break;
766		}
767
768		case FFI_AIX:
769		{
770			FFI_ASSERT (cif->abi == FFI_AIX);
771
772			ffi_aix_trampoline_struct*	tramp_aix =
773				(ffi_aix_trampoline_struct*)(closure->tramp);
774			aix_fd*	fd = (aix_fd*)(void*)ffi_closure_ASM;
775
776			tramp_aix->code_pointer = fd->code_pointer;
777			tramp_aix->toc = fd->toc;
778			tramp_aix->static_chain = closure;
779			closure->cif = cif;
780			closure->fun = fun;
781			closure->user_data = user_data;
782			break;
783		}
784
785		default:
786			return FFI_BAD_ABI;
787	}
788
789	return FFI_OK;
790}
791
792#if defined(__ppc__)
793	typedef double ldbits[2];
794
795	typedef union
796	{
797		ldbits lb;
798		long double ld;
799	} ldu;
800#endif
801
802typedef union
803{
804	float	f;
805	double	d;
806} ffi_dblfl;
807
808/*	The trampoline invokes ffi_closure_ASM, and on entry, r11 holds the
809	address of the closure. After storing the registers that could possibly
810	contain parameters to be passed into the stack frame and setting up space
811	for a return value, ffi_closure_ASM invokes the following helper function
812	to do most of the work.  */
813int
814ffi_closure_helper_DARWIN(
815	ffi_closure*	closure,
816	void*			rvalue,
817	unsigned long*	pgr,
818	ffi_dblfl*		pfr)
819{
820	/*	rvalue is the pointer to space for return value in closure assembly
821		pgr is the pointer to where r3-r10 are stored in ffi_closure_ASM
822		pfr is the pointer to where f1-f13 are stored in ffi_closure_ASM.  */
823
824#if defined(__ppc__)
825	ldu	temp_ld;
826#endif
827
828	double				temp;
829	unsigned int		i;
830	unsigned int		nf = 0;	/* number of FPRs already used.  */
831	unsigned int		ng = 0;	/* number of GPRs already used.  */
832	ffi_cif*			cif = closure->cif;
833	long				avn = cif->nargs;
834	void**				avalue = alloca(cif->nargs * sizeof(void*));
835	ffi_type**			arg_types = cif->arg_types;
836
837	/*	Copy the caller's structure return value address so that the closure
838		returns the data directly to the caller.  */
839#if defined(__ppc64__)
840	if (cif->rtype->type == FFI_TYPE_STRUCT &&
841		ffi64_stret_needs_ptr(cif->rtype, NULL, NULL))
842#elif defined(__ppc__)
843	if (cif->rtype->type == FFI_TYPE_STRUCT)
844#else
845#error undefined architecture
846#endif
847	{
848		rvalue = (void*)*pgr;
849		pgr++;
850		ng++;
851	}
852
853	/* Grab the addresses of the arguments from the stack frame.  */
854	for (i = 0; i < avn; i++)
855	{
856		switch (arg_types[i]->type)
857		{
858			case FFI_TYPE_SINT8:
859			case FFI_TYPE_UINT8:
860				avalue[i] = (char*)pgr + MODE_CHOICE(3,7);
861				ng++;
862				pgr++;
863				break;
864
865			case FFI_TYPE_SINT16:
866			case FFI_TYPE_UINT16:
867				avalue[i] = (char*)pgr + MODE_CHOICE(2,6);
868				ng++;
869				pgr++;
870				break;
871
872#if defined(__ppc__)
873			case FFI_TYPE_POINTER:
874#endif
875			case FFI_TYPE_SINT32:
876			case FFI_TYPE_UINT32:
877				avalue[i] = (char*)pgr + MODE_CHOICE(0,4);
878				ng++;
879				pgr++;
880
881				break;
882
883			case FFI_TYPE_STRUCT:
884				if (cif->abi == FFI_DARWIN)
885				{
886#if defined(__ppc64__)
887					unsigned int	gprSize = 0;
888					unsigned int	fprSize	= 0;
889					unsigned int	savedFPRSize = fprSize;
890
891					avalue[i] = alloca(arg_types[i]->size);
892					ffi64_struct_to_ram_form(arg_types[i], (const char*)pgr,
893						&gprSize, (const char*)pfr, &fprSize, &nf, avalue[i], NULL);
894
895					ng	+= gprSize / sizeof(long);
896					pgr	+= gprSize / sizeof(long);
897					pfr	+= (fprSize - savedFPRSize) / sizeof(double);
898
899#elif defined(__ppc__)
900					/*	Structures that match the basic modes (QI 1 byte, HI 2 bytes,
901						SI 4 bytes) are aligned as if they were those modes.  */
902					unsigned int	size_al	= size_al = arg_types[i]->size;
903
904					/*	If the first member of the struct is a double, then align
905						the struct to double-word.  */
906					if (arg_types[i]->elements[0]->type == FFI_TYPE_DOUBLE)
907						size_al = ALIGN(arg_types[i]->size, 8);
908
909					if (size_al < 3)
910						avalue[i] = (void*)pgr + MODE_CHOICE(4,8) - size_al;
911					else
912						avalue[i] = (void*)pgr;
913
914					ng	+= (size_al + 3) / sizeof(long);
915					pgr += (size_al + 3) / sizeof(long);
916#else
917#error undefined architecture
918#endif
919				}
920
921				break;
922
923#if defined(__ppc64__)
924			case FFI_TYPE_POINTER:
925#endif
926			case FFI_TYPE_SINT64:
927			case FFI_TYPE_UINT64:
928				/* Long long ints are passed in 1 or 2 GPRs.  */
929				avalue[i] = pgr;
930				ng += MODE_CHOICE(2,1);
931				pgr += MODE_CHOICE(2,1);
932
933				break;
934
935			case FFI_TYPE_FLOAT:
936				/*	A float value consumes a GPR.
937					There are 13 64-bit floating point registers.  */
938				if (nf < NUM_FPR_ARG_REGISTERS)
939				{
940					temp = pfr->d;
941					pfr->f = (float)temp;
942					avalue[i] = pfr;
943					pfr++;
944				}
945				else
946					avalue[i] = pgr;
947
948				nf++;
949				ng++;
950				pgr++;
951				break;
952
953			case FFI_TYPE_DOUBLE:
954				/*	A double value consumes one or two GPRs.
955					There are 13 64bit floating point registers.  */
956				if (nf < NUM_FPR_ARG_REGISTERS)
957				{
958					avalue[i] = pfr;
959					pfr++;
960				}
961				else
962					avalue[i] = pgr;
963
964				nf++;
965				ng += MODE_CHOICE(2,1);
966				pgr += MODE_CHOICE(2,1);
967
968				break;
969
970#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
971
972			case FFI_TYPE_LONGDOUBLE:
973#if defined(__ppc64__)
974				if (nf < NUM_FPR_ARG_REGISTERS)
975				{
976					avalue[i] = pfr;
977					pfr += 2;
978				}
979#elif defined(__ppc__)
980				/*	A long double value consumes 2/4 GPRs and 2 FPRs.
981					There are 13 64bit floating point registers.  */
982				if (nf < NUM_FPR_ARG_REGISTERS - 1)
983				{
984					avalue[i] = pfr;
985					pfr += 2;
986				}
987				/*	Here we have the situation where one part of the long double
988					is stored in fpr13 and the other part is already on the stack.
989					We use a union to pass the long double to avalue[i].  */
990				else if (nf == NUM_FPR_ARG_REGISTERS - 1)
991				{
992					memcpy (&temp_ld.lb[0], pfr, sizeof(temp_ld.lb[0]));
993					memcpy (&temp_ld.lb[1], pgr + 2, sizeof(temp_ld.lb[1]));
994					avalue[i] = &temp_ld.ld;
995				}
996#else
997#error undefined architecture
998#endif
999				else
1000					avalue[i] = pgr;
1001
1002				nf += 2;
1003				ng += MODE_CHOICE(4,2);
1004				pgr += MODE_CHOICE(4,2);
1005
1006				break;
1007
1008#endif	/*	FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE	*/
1009
1010			default:
1011				FFI_ASSERT(0);
1012				break;
1013		}
1014	}
1015
1016	(closure->fun)(cif, rvalue, avalue, closure->user_data);
1017
1018	/* Tell ffi_closure_ASM to perform return type promotions.  */
1019	return cif->rtype->type;
1020}
1021
1022#if defined(__ppc64__)
1023
1024/*	ffi64_struct_to_ram_form
1025
1026	Rebuild a struct's natural layout from buffers of concatenated registers.
1027	Return the number of registers used.
1028	inGPRs[0-7] == r3, inFPRs[0-7] == f1 ...
1029*/
1030void
1031ffi64_struct_to_ram_form(
1032	const ffi_type*	inType,
1033	const char*		inGPRs,
1034	unsigned int*	ioGPRMarker,
1035	const char*		inFPRs,
1036	unsigned int*	ioFPRMarker,
1037	unsigned int*	ioFPRsUsed,
1038	char*			outStruct,	// caller-allocated
1039	unsigned int*	ioStructMarker)
1040{
1041	unsigned int	srcGMarker		= 0;
1042	unsigned int	srcFMarker		= 0;
1043	unsigned int	savedFMarker	= 0;
1044	unsigned int	fprsUsed		= 0;
1045	unsigned int	savedFPRsUsed	= 0;
1046	unsigned int	destMarker		= 0;
1047
1048	static unsigned int	recurseCount	= 0;
1049
1050	if (ioGPRMarker)
1051		srcGMarker	= *ioGPRMarker;
1052
1053	if (ioFPRMarker)
1054	{
1055		srcFMarker		= *ioFPRMarker;
1056		savedFMarker	= srcFMarker;
1057	}
1058
1059	if (ioFPRsUsed)
1060	{
1061		fprsUsed		= *ioFPRsUsed;
1062		savedFPRsUsed	= fprsUsed;
1063	}
1064
1065	if (ioStructMarker)
1066		destMarker	= *ioStructMarker;
1067
1068	size_t			i;
1069
1070	switch (inType->size)
1071	{
1072		case 1: case 2: case 4:
1073			srcGMarker += 8 - inType->size;
1074			break;
1075
1076		default:
1077			break;
1078	}
1079
1080	for (i = 0; inType->elements[i] != NULL; i++)
1081	{
1082		switch (inType->elements[i]->type)
1083		{
1084			case FFI_TYPE_FLOAT:
1085				srcFMarker = ALIGN(srcFMarker, 4);
1086				srcGMarker = ALIGN(srcGMarker, 4);
1087				destMarker = ALIGN(destMarker, 4);
1088
1089				if (fprsUsed < NUM_FPR_ARG_REGISTERS)
1090				{
1091					*(float*)&outStruct[destMarker]	=
1092						(float)*(double*)&inFPRs[srcFMarker];
1093					srcFMarker += 8;
1094					fprsUsed++;
1095				}
1096				else
1097					*(float*)&outStruct[destMarker]	=
1098						(float)*(double*)&inGPRs[srcGMarker];
1099
1100				srcGMarker += 4;
1101				destMarker += 4;
1102
1103				// Skip to next GPR if next element won't fit and we're
1104				// not already at a register boundary.
1105				if (inType->elements[i + 1] != NULL && (destMarker % 8))
1106				{
1107					if (!FFI_TYPE_1_BYTE(inType->elements[i + 1]->type) &&
1108						(!FFI_TYPE_2_BYTE(inType->elements[i + 1]->type) ||
1109						(ALIGN(srcGMarker, 8) - srcGMarker) < 2) &&
1110						(!FFI_TYPE_4_BYTE(inType->elements[i + 1]->type) ||
1111						(ALIGN(srcGMarker, 8) - srcGMarker) < 4))
1112						srcGMarker	= ALIGN(srcGMarker, 8);
1113				}
1114
1115				break;
1116
1117			case FFI_TYPE_DOUBLE:
1118				srcFMarker = ALIGN(srcFMarker, 8);
1119				destMarker = ALIGN(destMarker, 8);
1120
1121				if (fprsUsed < NUM_FPR_ARG_REGISTERS)
1122				{
1123					*(double*)&outStruct[destMarker]	=
1124						*(double*)&inFPRs[srcFMarker];
1125					srcFMarker += 8;
1126					fprsUsed++;
1127				}
1128				else
1129					*(double*)&outStruct[destMarker]	=
1130						*(double*)&inGPRs[srcGMarker];
1131
1132				destMarker += 8;
1133
1134				// Skip next GPR
1135				srcGMarker += 8;
1136				srcGMarker = ALIGN(srcGMarker, 8);
1137
1138				break;
1139
1140			case FFI_TYPE_LONGDOUBLE:
1141				destMarker = ALIGN(destMarker, 16);
1142
1143				if (fprsUsed < NUM_FPR_ARG_REGISTERS)
1144				{
1145					srcFMarker = ALIGN(srcFMarker, 8);
1146					srcGMarker = ALIGN(srcGMarker, 8);
1147					*(long double*)&outStruct[destMarker]	=
1148						*(long double*)&inFPRs[srcFMarker];
1149					srcFMarker += 16;
1150					fprsUsed += 2;
1151				}
1152				else
1153				{
1154					srcFMarker = ALIGN(srcFMarker, 16);
1155					srcGMarker = ALIGN(srcGMarker, 16);
1156					*(long double*)&outStruct[destMarker]	=
1157						*(long double*)&inGPRs[srcGMarker];
1158				}
1159
1160				destMarker += 16;
1161
1162				// Skip next 2 GPRs
1163				srcGMarker += 16;
1164				srcGMarker = ALIGN(srcGMarker, 8);
1165
1166				break;
1167
1168			case FFI_TYPE_UINT8:
1169			case FFI_TYPE_SINT8:
1170			{
1171				if (inType->alignment == 1)	// chars only
1172				{
1173					if (inType->size == 1)
1174						outStruct[destMarker++] = inGPRs[srcGMarker++];
1175					else if (inType->size == 2)
1176					{
1177						outStruct[destMarker++] = inGPRs[srcGMarker++];
1178						outStruct[destMarker++] = inGPRs[srcGMarker++];
1179						i++;
1180					}
1181					else
1182					{
1183						memcpy(&outStruct[destMarker],
1184							&inGPRs[srcGMarker], inType->size);
1185						srcGMarker += inType->size;
1186						destMarker += inType->size;
1187						i += inType->size - 1;
1188					}
1189				}
1190				else	// chars and other stuff
1191				{
1192					outStruct[destMarker++] = inGPRs[srcGMarker++];
1193
1194					// Skip to next GPR if next element won't fit and we're
1195					// not already at a register boundary.
1196					if (inType->elements[i + 1] != NULL && (srcGMarker % 8))
1197					{
1198						if (!FFI_TYPE_1_BYTE(inType->elements[i + 1]->type) &&
1199							(!FFI_TYPE_2_BYTE(inType->elements[i + 1]->type) ||
1200							(ALIGN(srcGMarker, 8) - srcGMarker) < 2) &&
1201							(!FFI_TYPE_4_BYTE(inType->elements[i + 1]->type) ||
1202							(ALIGN(srcGMarker, 8) - srcGMarker) < 4))
1203							srcGMarker	= ALIGN(srcGMarker, inType->alignment);	// was 8
1204					}
1205				}
1206
1207				break;
1208			}
1209
1210			case FFI_TYPE_UINT16:
1211			case FFI_TYPE_SINT16:
1212				srcGMarker = ALIGN(srcGMarker, 2);
1213				destMarker = ALIGN(destMarker, 2);
1214
1215				*(short*)&outStruct[destMarker] =
1216					*(short*)&inGPRs[srcGMarker];
1217				srcGMarker += 2;
1218				destMarker += 2;
1219
1220				break;
1221
1222			case FFI_TYPE_INT:
1223			case FFI_TYPE_UINT32:
1224			case FFI_TYPE_SINT32:
1225				srcGMarker = ALIGN(srcGMarker, 4);
1226				destMarker = ALIGN(destMarker, 4);
1227
1228				*(int*)&outStruct[destMarker] =
1229					*(int*)&inGPRs[srcGMarker];
1230				srcGMarker += 4;
1231				destMarker += 4;
1232
1233				break;
1234
1235			case FFI_TYPE_POINTER:
1236			case FFI_TYPE_UINT64:
1237			case FFI_TYPE_SINT64:
1238				srcGMarker = ALIGN(srcGMarker, 8);
1239				destMarker = ALIGN(destMarker, 8);
1240
1241				*(long long*)&outStruct[destMarker] =
1242					*(long long*)&inGPRs[srcGMarker];
1243				srcGMarker += 8;
1244				destMarker += 8;
1245
1246				break;
1247
1248			case FFI_TYPE_STRUCT:
1249				recurseCount++;
1250				ffi64_struct_to_ram_form(inType->elements[i], inGPRs,
1251					&srcGMarker, inFPRs, &srcFMarker, &fprsUsed,
1252					outStruct, &destMarker);
1253				recurseCount--;
1254				break;
1255
1256			default:
1257				FFI_ASSERT(0);	// unknown element type
1258				break;
1259		}
1260	}
1261
1262	srcGMarker = ALIGN(srcGMarker, inType->alignment);
1263
1264	// Take care of the special case for 16-byte structs, but not for
1265	// nested structs.
1266	if (recurseCount == 0 && srcGMarker == 16)
1267	{
1268		*(long double*)&outStruct[0] = *(long double*)&inGPRs[0];
1269		srcFMarker	= savedFMarker;
1270		fprsUsed	= savedFPRsUsed;
1271	}
1272
1273	if (ioGPRMarker)
1274		*ioGPRMarker = ALIGN(srcGMarker, 8);
1275
1276	if (ioFPRMarker)
1277		*ioFPRMarker = srcFMarker;
1278
1279	if (ioFPRsUsed)
1280		*ioFPRsUsed	= fprsUsed;
1281
1282	if (ioStructMarker)
1283		*ioStructMarker	= ALIGN(destMarker, 8);
1284}
1285
1286/*	ffi64_struct_to_reg_form
1287
1288	Copy a struct's elements into buffers that can be sliced into registers.
1289	Return the sizes of the output buffers in bytes. Pass NULL buffer pointers
1290	to calculate size only.
1291	outGPRs[0-7] == r3, outFPRs[0-7] == f1 ...
1292*/
1293void
1294ffi64_struct_to_reg_form(
1295	const ffi_type*	inType,
1296	const char*		inStruct,
1297	unsigned int*	ioStructMarker,
1298	unsigned int*	ioFPRsUsed,
1299	char*			outGPRs,	// caller-allocated
1300	unsigned int*	ioGPRSize,
1301	char*			outFPRs,	// caller-allocated
1302	unsigned int*	ioFPRSize)
1303{
1304	size_t			i;
1305	unsigned int	srcMarker		= 0;
1306	unsigned int	destGMarker		= 0;
1307	unsigned int	destFMarker		= 0;
1308	unsigned int	savedFMarker	= 0;
1309	unsigned int	fprsUsed		= 0;
1310	unsigned int	savedFPRsUsed	= 0;
1311
1312	static unsigned int	recurseCount	= 0;
1313
1314	if (ioStructMarker)
1315		srcMarker	= *ioStructMarker;
1316
1317	if (ioFPRsUsed)
1318	{
1319		fprsUsed		= *ioFPRsUsed;
1320		savedFPRsUsed	= fprsUsed;
1321	}
1322
1323	if (ioGPRSize)
1324		destGMarker	= *ioGPRSize;
1325
1326	if (ioFPRSize)
1327	{
1328		destFMarker		= *ioFPRSize;
1329		savedFMarker	= destFMarker;
1330	}
1331
1332	switch (inType->size)
1333	{
1334		case 1: case 2: case 4:
1335			destGMarker += 8 - inType->size;
1336			break;
1337
1338		default:
1339			break;
1340	}
1341
1342	for (i = 0; inType->elements[i] != NULL; i++)
1343	{
1344		switch (inType->elements[i]->type)
1345		{
1346			// Shadow floating-point types in GPRs for vararg and pre-ANSI
1347			// functions.
1348			case FFI_TYPE_FLOAT:
1349				// Nudge markers to next 4/8-byte boundary
1350				srcMarker = ALIGN(srcMarker, 4);
1351				destGMarker = ALIGN(destGMarker, 4);
1352				destFMarker = ALIGN(destFMarker, 8);
1353
1354				if (fprsUsed < NUM_FPR_ARG_REGISTERS)
1355				{
1356					if (outFPRs != NULL && inStruct != NULL)
1357						*(double*)&outFPRs[destFMarker] =
1358							(double)*(float*)&inStruct[srcMarker];
1359
1360					destFMarker += 8;
1361					fprsUsed++;
1362				}
1363
1364				if (outGPRs != NULL && inStruct != NULL)
1365					*(double*)&outGPRs[destGMarker] =
1366						(double)*(float*)&inStruct[srcMarker];
1367
1368				srcMarker += 4;
1369				destGMarker += 4;
1370
1371				// Skip to next GPR if next element won't fit and we're
1372				// not already at a register boundary.
1373				if (inType->elements[i + 1] != NULL && (srcMarker % 8))
1374				{
1375					if (!FFI_TYPE_1_BYTE(inType->elements[i + 1]->type) &&
1376						(!FFI_TYPE_2_BYTE(inType->elements[i + 1]->type) ||
1377						(ALIGN(destGMarker, 8) - destGMarker) < 2) &&
1378						(!FFI_TYPE_4_BYTE(inType->elements[i + 1]->type) ||
1379						(ALIGN(destGMarker, 8) - destGMarker) < 4))
1380						destGMarker	= ALIGN(destGMarker, 8);
1381				}
1382
1383				break;
1384
1385			case FFI_TYPE_DOUBLE:
1386				srcMarker = ALIGN(srcMarker, 8);
1387				destFMarker = ALIGN(destFMarker, 8);
1388
1389				if (fprsUsed < NUM_FPR_ARG_REGISTERS)
1390				{
1391					if (outFPRs != NULL && inStruct != NULL)
1392						*(double*)&outFPRs[destFMarker] =
1393							*(double*)&inStruct[srcMarker];
1394
1395					destFMarker += 8;
1396					fprsUsed++;
1397				}
1398
1399				if (outGPRs != NULL && inStruct != NULL)
1400					*(double*)&outGPRs[destGMarker] =
1401						*(double*)&inStruct[srcMarker];
1402
1403				srcMarker += 8;
1404
1405				// Skip next GPR
1406				destGMarker += 8;
1407				destGMarker = ALIGN(destGMarker, 8);
1408
1409				break;
1410
1411			case FFI_TYPE_LONGDOUBLE:
1412				srcMarker = ALIGN(srcMarker, 16);
1413
1414				if (fprsUsed < NUM_FPR_ARG_REGISTERS)
1415				{
1416					destFMarker = ALIGN(destFMarker, 8);
1417					destGMarker = ALIGN(destGMarker, 8);
1418
1419					if (outFPRs != NULL && inStruct != NULL)
1420						*(long double*)&outFPRs[destFMarker] =
1421							*(long double*)&inStruct[srcMarker];
1422
1423					if (outGPRs != NULL && inStruct != NULL)
1424						*(long double*)&outGPRs[destGMarker] =
1425							*(long double*)&inStruct[srcMarker];
1426
1427					destFMarker += 16;
1428					fprsUsed += 2;
1429				}
1430				else
1431				{
1432				 	destGMarker = ALIGN(destGMarker, 16);
1433
1434					 if (outGPRs != NULL && inStruct != NULL)
1435						*(long double*)&outGPRs[destGMarker] =
1436							*(long double*)&inStruct[srcMarker];
1437				}
1438
1439				srcMarker += 16;
1440				destGMarker += 16;	// Skip next 2 GPRs
1441				destGMarker = ALIGN(destGMarker, 8);	// was 16
1442
1443				break;
1444
1445			case FFI_TYPE_UINT8:
1446			case FFI_TYPE_SINT8:
1447				if (inType->alignment == 1)	// bytes only
1448				{
1449					if (inType->size == 1)
1450					{
1451						if (outGPRs != NULL && inStruct != NULL)
1452							outGPRs[destGMarker] = inStruct[srcMarker];
1453
1454						srcMarker++;
1455						destGMarker++;
1456					}
1457					else if (inType->size == 2)
1458					{
1459						if (outGPRs != NULL && inStruct != NULL)
1460						{
1461							outGPRs[destGMarker] = inStruct[srcMarker];
1462							outGPRs[destGMarker + 1] = inStruct[srcMarker + 1];
1463						}
1464
1465						srcMarker += 2;
1466						destGMarker += 2;
1467
1468						i++;
1469					}
1470					else
1471					{
1472						if (outGPRs != NULL && inStruct != NULL)
1473						{
1474							// Avoid memcpy for small chunks.
1475							if (inType->size <= sizeof(long))
1476								*(long*)&outGPRs[destGMarker] =
1477									*(long*)&inStruct[srcMarker];
1478							else
1479								memcpy(&outGPRs[destGMarker],
1480									&inStruct[srcMarker], inType->size);
1481						}
1482
1483						srcMarker += inType->size;
1484						destGMarker += inType->size;
1485						i += inType->size - 1;
1486					}
1487				}
1488				else	// bytes and other stuff
1489				{
1490					if (outGPRs != NULL && inStruct != NULL)
1491						outGPRs[destGMarker] = inStruct[srcMarker];
1492
1493					srcMarker++;
1494					destGMarker++;
1495
1496					// Skip to next GPR if next element won't fit and we're
1497					// not already at a register boundary.
1498					if (inType->elements[i + 1] != NULL && (destGMarker % 8))
1499					{
1500						if (!FFI_TYPE_1_BYTE(inType->elements[i + 1]->type) &&
1501							(!FFI_TYPE_2_BYTE(inType->elements[i + 1]->type) ||
1502							(ALIGN(destGMarker, 8) - destGMarker) < 2) &&
1503							(!FFI_TYPE_4_BYTE(inType->elements[i + 1]->type) ||
1504							(ALIGN(destGMarker, 8) - destGMarker) < 4))
1505							destGMarker	= ALIGN(destGMarker, inType->alignment);	// was 8
1506					}
1507				}
1508
1509				break;
1510
1511			case FFI_TYPE_UINT16:
1512			case FFI_TYPE_SINT16:
1513				srcMarker = ALIGN(srcMarker, 2);
1514				destGMarker = ALIGN(destGMarker, 2);
1515
1516				if (outGPRs != NULL && inStruct != NULL)
1517					*(short*)&outGPRs[destGMarker] =
1518						*(short*)&inStruct[srcMarker];
1519
1520				srcMarker += 2;
1521				destGMarker += 2;
1522
1523				if (inType->elements[i + 1] == NULL)
1524					destGMarker	= ALIGN(destGMarker, inType->alignment);
1525
1526				break;
1527
1528			case FFI_TYPE_INT:
1529			case FFI_TYPE_UINT32:
1530			case FFI_TYPE_SINT32:
1531				srcMarker = ALIGN(srcMarker, 4);
1532				destGMarker = ALIGN(destGMarker, 4);
1533
1534				if (outGPRs != NULL && inStruct != NULL)
1535					*(int*)&outGPRs[destGMarker] =
1536						*(int*)&inStruct[srcMarker];
1537
1538				srcMarker += 4;
1539				destGMarker += 4;
1540
1541				break;
1542
1543			case FFI_TYPE_POINTER:
1544			case FFI_TYPE_UINT64:
1545			case FFI_TYPE_SINT64:
1546				srcMarker = ALIGN(srcMarker, 8);
1547				destGMarker = ALIGN(destGMarker, 8);
1548
1549				if (outGPRs != NULL && inStruct != NULL)
1550					*(long long*)&outGPRs[destGMarker] =
1551						*(long long*)&inStruct[srcMarker];
1552
1553				srcMarker += 8;
1554				destGMarker += 8;
1555
1556				if (inType->elements[i + 1] == NULL)
1557					destGMarker	= ALIGN(destGMarker, inType->alignment);
1558
1559				break;
1560
1561			case FFI_TYPE_STRUCT:
1562				recurseCount++;
1563				ffi64_struct_to_reg_form(inType->elements[i],
1564					inStruct, &srcMarker, &fprsUsed, outGPRs,
1565					&destGMarker, outFPRs, &destFMarker);
1566				recurseCount--;
1567				break;
1568
1569			default:
1570				FFI_ASSERT(0);
1571				break;
1572		}
1573	}
1574
1575	destGMarker	= ALIGN(destGMarker, inType->alignment);
1576
1577	// Take care of the special case for 16-byte structs, but not for
1578	// nested structs.
1579	if (recurseCount == 0 && destGMarker == 16)
1580	{
1581		if (outGPRs != NULL && inStruct != NULL)
1582			*(long double*)&outGPRs[0] = *(long double*)&inStruct[0];
1583
1584		destFMarker	= savedFMarker;
1585		fprsUsed	= savedFPRsUsed;
1586	}
1587
1588	if (ioStructMarker)
1589		*ioStructMarker	= ALIGN(srcMarker, 8);
1590
1591	if (ioFPRsUsed)
1592		*ioFPRsUsed	= fprsUsed;
1593
1594	if (ioGPRSize)
1595		*ioGPRSize = ALIGN(destGMarker, 8);
1596
1597	if (ioFPRSize)
1598		*ioFPRSize = ALIGN(destFMarker, 8);
1599}
1600
1601/*	ffi64_stret_needs_ptr
1602
1603	Determine whether a returned struct needs a pointer in r3 or can fit
1604	in registers.
1605*/
1606
1607bool
1608ffi64_stret_needs_ptr(
1609	const ffi_type*	inType,
1610	unsigned short*	ioGPRCount,
1611	unsigned short*	ioFPRCount)
1612{
1613	// Obvious case first- struct is larger than combined FPR size.
1614	if (inType->size > 14 * 8)
1615		return true;
1616
1617	// Now the struct can physically fit in registers, determine if it
1618	// also fits logically.
1619	bool			needsPtr	= false;
1620	unsigned short	gprsUsed	= 0;
1621	unsigned short	fprsUsed	= 0;
1622	size_t			i;
1623
1624	if (ioGPRCount)
1625		gprsUsed = *ioGPRCount;
1626
1627	if (ioFPRCount)
1628		fprsUsed = *ioFPRCount;
1629
1630	for (i = 0; inType->elements[i] != NULL && !needsPtr; i++)
1631	{
1632		switch (inType->elements[i]->type)
1633		{
1634			case FFI_TYPE_FLOAT:
1635			case FFI_TYPE_DOUBLE:
1636				gprsUsed++;
1637				fprsUsed++;
1638
1639				if (fprsUsed > 13)
1640					needsPtr = true;
1641
1642				break;
1643
1644			case FFI_TYPE_LONGDOUBLE:
1645				gprsUsed += 2;
1646				fprsUsed += 2;
1647
1648				if (fprsUsed > 14)
1649					needsPtr = true;
1650
1651				break;
1652
1653			case FFI_TYPE_UINT8:
1654			case FFI_TYPE_SINT8:
1655			{
1656				gprsUsed++;
1657
1658				if (gprsUsed > 8)
1659				{
1660					needsPtr = true;
1661					break;
1662				}
1663
1664				if (inType->elements[i + 1] == NULL)	// last byte in the struct
1665					break;
1666
1667				// Count possible contiguous bytes ahead, up to 8.
1668				unsigned short j;
1669
1670				for (j = 1; j < 8; j++)
1671				{
1672					if (inType->elements[i + j] == NULL ||
1673						!FFI_TYPE_1_BYTE(inType->elements[i + j]->type))
1674						break;
1675				}
1676
1677				i += j - 1;	// allow for i++ before the test condition
1678
1679				break;
1680			}
1681
1682			case FFI_TYPE_UINT16:
1683			case FFI_TYPE_SINT16:
1684			case FFI_TYPE_INT:
1685			case FFI_TYPE_UINT32:
1686			case FFI_TYPE_SINT32:
1687			case FFI_TYPE_POINTER:
1688			case FFI_TYPE_UINT64:
1689			case FFI_TYPE_SINT64:
1690				gprsUsed++;
1691
1692				if (gprsUsed > 8)
1693					needsPtr = true;
1694
1695				break;
1696
1697			case FFI_TYPE_STRUCT:
1698				needsPtr = ffi64_stret_needs_ptr(
1699					inType->elements[i], &gprsUsed, &fprsUsed);
1700
1701				break;
1702
1703			default:
1704				FFI_ASSERT(0);
1705				break;
1706		}
1707	}
1708
1709	if (ioGPRCount)
1710		*ioGPRCount = gprsUsed;
1711
1712	if (ioFPRCount)
1713		*ioFPRCount = fprsUsed;
1714
1715	return needsPtr;
1716}
1717
1718/*	ffi64_data_size
1719
1720	Calculate the size in bytes of an ffi type.
1721*/
1722
1723unsigned int
1724ffi64_data_size(
1725	const ffi_type*	inType)
1726{
1727	unsigned int	size = 0;
1728
1729	switch (inType->type)
1730	{
1731		case FFI_TYPE_UINT8:
1732		case FFI_TYPE_SINT8:
1733			size = 1;
1734			break;
1735
1736		case FFI_TYPE_UINT16:
1737		case FFI_TYPE_SINT16:
1738			size = 2;
1739			break;
1740
1741		case FFI_TYPE_INT:
1742		case FFI_TYPE_UINT32:
1743		case FFI_TYPE_SINT32:
1744		case FFI_TYPE_FLOAT:
1745			size = 4;
1746			break;
1747
1748		case FFI_TYPE_POINTER:
1749		case FFI_TYPE_UINT64:
1750		case FFI_TYPE_SINT64:
1751		case FFI_TYPE_DOUBLE:
1752			size = 8;
1753			break;
1754
1755		case FFI_TYPE_LONGDOUBLE:
1756			size = 16;
1757			break;
1758
1759		case FFI_TYPE_STRUCT:
1760			ffi64_struct_to_reg_form(
1761				inType, NULL, NULL, NULL, NULL, &size, NULL, NULL);
1762			break;
1763
1764		case FFI_TYPE_VOID:
1765			break;
1766
1767		default:
1768			FFI_ASSERT(0);
1769			break;
1770	}
1771
1772	return size;
1773}
1774
1775#endif	/*	defined(__ppc64__)	*/
1776#endif	/* __ppc__ || __ppc64__ */
1777