1#if defined(__ppc__) || defined(__ppc64__)
2
3/* -----------------------------------------------------------------------
4   ffi.c - Copyright (c) 1998 Geoffrey Keating
5
6   PowerPC Foreign Function Interface
7
8   Darwin ABI support (c) 2001 John Hornkvist
9   AIX ABI support (c) 2002 Free Software Foundation, Inc.
10
11   Permission is hereby granted, free of charge, to any person obtaining
12   a copy of this software and associated documentation files (the
13   ``Software''), to deal in the Software without restriction, including
14   without limitation the rights to use, copy, modify, merge, publish,
15   distribute, sublicense, and/or sell copies of the Software, and to
16   permit persons to whom the Software is furnished to do so, subject to
17   the following conditions:
18
19   The above copyright notice and this permission notice shall be included
20   in all copies or substantial portions of the Software.
21
22   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
23   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
24   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
25   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
26   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
27   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28   OTHER DEALINGS IN THE SOFTWARE.
29   ----------------------------------------------------------------------- */
30
31#include "ffi.h"
32#include "ffi_common.h"
33
34#include <stdbool.h>
35#include <stdio.h>
36#include <stdlib.h>
37#include "ppc-darwin.h"
38#include <architecture/ppc/mode_independent_asm.h>
39
40#if 0
41#if defined(POWERPC_DARWIN)
42#include <libkern/OSCacheControl.h>	// for sys_icache_invalidate()
43#endif
44
45#else
46
47/* Explicit prototype instead of including a header to allow compilation
48 * on Tiger systems.
49 */
50
51#pragma weak sys_icache_invalidate
52extern void sys_icache_invalidate(void *start, size_t len);
53
54#endif
55
56extern void ffi_closure_ASM(void);
57
58// The layout of a function descriptor.  A C function pointer really
59// points to one of these.
60typedef struct aix_fd_struct {
61  void*	code_pointer;
62  void*	toc;
63} aix_fd;
64
65/* ffi_prep_args is called by the assembly routine once stack space
66   has been allocated for the function's arguments.
67
68   The stack layout we want looks like this:
69
70   |   Return address from ffi_call_DARWIN      |	higher addresses
71   |--------------------------------------------|
72   |   Previous backchain pointer      4/8      |	    stack pointer here
73   |--------------------------------------------|-\ <<< on entry to
74   |   Saved r28-r31                 (4/8)*4    | |	    ffi_call_DARWIN
75   |--------------------------------------------| |
76   |   Parameters      (at least 8*(4/8)=32/64) | | (176) +112 - +288
77   |--------------------------------------------| |
78   |   Space for GPR2                  4/8      | |
79   |--------------------------------------------| |	stack	|
80   |   Reserved						 (4/8)*2    | |	grows	|
81   |--------------------------------------------| |	down	V
82   |   Space for callee's LR           4/8      | |
83   |--------------------------------------------| |	lower addresses
84   |   Saved CR                        4/8      | |
85   |--------------------------------------------| |     stack pointer here
86   |   Current backchain pointer       4/8      | |     during
87   |--------------------------------------------|-/ <<< ffi_call_DARWIN
88
89	Note: ppc64 CR is saved in the low word of a long on the stack.
90*/
91
92/*@-exportheader@*/
93void
94ffi_prep_args(
95	extended_cif*	inEcif,
96	unsigned *const	stack)
97/*@=exportheader@*/
98{
99	/*	Copy the ecif to a local var so we can trample the arg.
100		BC note: test this with GP later for possible problems...	*/
101	volatile extended_cif*	ecif	= inEcif;
102
103	const unsigned bytes	= ecif->cif->bytes;
104	const unsigned flags	= ecif->cif->flags;
105
106	/*	Cast the stack arg from int* to long*. sizeof(long) == 4 in 32-bit mode
107		and 8 in 64-bit mode.	*/
108	unsigned long *const longStack	= (unsigned long *const)stack;
109
110	/* 'stacktop' points at the previous backchain pointer.	*/
111#if defined(__ppc64__)
112	//	In ppc-darwin.s, an extra 96 bytes is reserved for the linkage area,
113	//	saved registers, and an extra FPR.
114	unsigned long *const stacktop	=
115		(unsigned long *)(unsigned long)((char*)longStack + bytes + 96);
116#elif defined(__ppc__)
117	unsigned long *const stacktop	= longStack + (bytes / sizeof(long));
118#else
119#error undefined architecture
120#endif
121
122	/* 'fpr_base' points at the space for fpr1, and grows upwards as
123		we use FPR registers.  */
124	double*		fpr_base = (double*)(stacktop - ASM_NEEDS_REGISTERS) -
125		NUM_FPR_ARG_REGISTERS;
126
127#if defined(__ppc64__)
128	//	64-bit saves an extra register, and uses an extra FPR. Knock fpr_base
129	//	down a couple pegs.
130	fpr_base -= 2;
131#endif
132
133	unsigned int	fparg_count = 0;
134
135	/* 'next_arg' grows up as we put parameters in it.  */
136	unsigned long*	next_arg = longStack + 6; /* 6 reserved positions.  */
137
138	int				i;
139	double			double_tmp;
140	void**			p_argv = ecif->avalue;
141	unsigned long	gprvalue;
142	ffi_type**		ptr = ecif->cif->arg_types;
143
144	/* Check that everything starts aligned properly.  */
145	FFI_ASSERT(stack == SF_ROUND(stack));
146	FFI_ASSERT(stacktop == SF_ROUND(stacktop));
147	FFI_ASSERT(bytes == SF_ROUND(bytes));
148
149	/*	Deal with return values that are actually pass-by-reference.
150		Rule:
151		Return values are referenced by r3, so r4 is the first parameter.  */
152
153	if (flags & FLAG_RETVAL_REFERENCE)
154		*next_arg++ = (unsigned long)(char*)ecif->rvalue;
155
156	/* Now for the arguments.  */
157	for (i = ecif->cif->nargs; i > 0; i--, ptr++, p_argv++)
158    {
159		switch ((*ptr)->type)
160		{
161			/*	If a floating-point parameter appears before all of the general-
162				purpose registers are filled, the corresponding GPRs that match
163				the size of the floating-point parameter are shadowed for the
164				benefit of vararg and pre-ANSI functions.	*/
165			case FFI_TYPE_FLOAT:
166				double_tmp = *(float*)*p_argv;
167
168				if (fparg_count < NUM_FPR_ARG_REGISTERS)
169					*fpr_base++ = double_tmp;
170
171				*(double*)next_arg = double_tmp;
172
173				next_arg++;
174				fparg_count++;
175				FFI_ASSERT(flags & FLAG_FP_ARGUMENTS);
176
177				break;
178
179			case FFI_TYPE_DOUBLE:
180				double_tmp = *(double*)*p_argv;
181
182				if (fparg_count < NUM_FPR_ARG_REGISTERS)
183					*fpr_base++ = double_tmp;
184
185				*(double*)next_arg = double_tmp;
186
187				next_arg += MODE_CHOICE(2,1);
188				fparg_count++;
189				FFI_ASSERT(flags & FLAG_FP_ARGUMENTS);
190
191				break;
192
193#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
194			case FFI_TYPE_LONGDOUBLE:
195#if defined(__ppc64__)
196				if (fparg_count < NUM_FPR_ARG_REGISTERS)
197					*(long double*)fpr_base	= *(long double*)*p_argv;
198#elif defined(__ppc__)
199				if (fparg_count < NUM_FPR_ARG_REGISTERS - 1)
200					*(long double*)fpr_base	= *(long double*)*p_argv;
201				else if (fparg_count == NUM_FPR_ARG_REGISTERS - 1)
202					*(double*)fpr_base	= *(double*)*p_argv;
203#else
204#error undefined architecture
205#endif
206
207				*(long double*)next_arg	= *(long double*)*p_argv;
208				fparg_count += 2;
209				fpr_base += 2;
210				next_arg += MODE_CHOICE(4,2);
211				FFI_ASSERT(flags & FLAG_FP_ARGUMENTS);
212
213				break;
214#endif	//	FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
215
216			case FFI_TYPE_UINT64:
217			case FFI_TYPE_SINT64:
218#if defined(__ppc64__)
219				gprvalue = *(long long*)*p_argv;
220				goto putgpr;
221#elif defined(__ppc__)
222				*(long long*)next_arg = *(long long*)*p_argv;
223				next_arg += 2;
224				break;
225#else
226#error undefined architecture
227#endif
228
229			case FFI_TYPE_POINTER:
230				gprvalue = *(unsigned long*)*p_argv;
231				goto putgpr;
232
233			case FFI_TYPE_UINT8:
234				gprvalue = *(unsigned char*)*p_argv;
235				goto putgpr;
236
237			case FFI_TYPE_SINT8:
238				gprvalue = *(signed char*)*p_argv;
239				goto putgpr;
240
241			case FFI_TYPE_UINT16:
242				gprvalue = *(unsigned short*)*p_argv;
243				goto putgpr;
244
245			case FFI_TYPE_SINT16:
246				gprvalue = *(signed short*)*p_argv;
247				goto putgpr;
248
249			case FFI_TYPE_STRUCT:
250			{
251#if defined(__ppc64__)
252				unsigned int	gprSize = 0;
253				unsigned int	fprSize = 0;
254
255				ffi64_struct_to_reg_form(*ptr, (char*)*p_argv, NULL, &fparg_count,
256					(char*)next_arg, &gprSize, (char*)fpr_base, &fprSize);
257				next_arg += gprSize / sizeof(long);
258				fpr_base += fprSize / sizeof(double);
259
260#elif defined(__ppc__)
261				char*	dest_cpy = (char*)next_arg;
262
263			/*	Structures that match the basic modes (QI 1 byte, HI 2 bytes,
264				SI 4 bytes) are aligned as if they were those modes.
265				Structures with 3 byte in size are padded upwards.  */
266				unsigned size_al = (*ptr)->size;
267
268			/*	If the first member of the struct is a double, then align
269				the struct to double-word.  */
270				if ((*ptr)->elements[0]->type == FFI_TYPE_DOUBLE)
271					size_al = ALIGN((*ptr)->size, 8);
272
273				if (ecif->cif->abi == FFI_DARWIN)
274				{
275					if (size_al < 3)
276						dest_cpy += 4 - size_al;
277				}
278
279				memcpy((char*)dest_cpy, (char*)*p_argv, size_al);
280				next_arg += (size_al + 3) / 4;
281#else
282#error undefined architecture
283#endif
284				break;
285			}
286
287			case FFI_TYPE_INT:
288			case FFI_TYPE_UINT32:
289			case FFI_TYPE_SINT32:
290				gprvalue = *(unsigned*)*p_argv;
291
292putgpr:
293				*next_arg++ = gprvalue;
294				break;
295
296			default:
297				break;
298		}
299	}
300
301  /* Check that we didn't overrun the stack...  */
302  //FFI_ASSERT(gpr_base <= stacktop - ASM_NEEDS_REGISTERS);
303  //FFI_ASSERT((unsigned *)fpr_base
304  //	     <= stacktop - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS);
305  //FFI_ASSERT(flags & FLAG_4_GPR_ARGUMENTS || intarg_count <= 4);
306}
307
308#if defined(__ppc64__)
309
310bool
311ffi64_struct_contains_fp(
312	const ffi_type*	inType)
313{
314	bool			containsFP	= false;
315	unsigned int	i;
316
317	for (i = 0; inType->elements[i] != NULL && !containsFP; i++)
318	{
319		if (inType->elements[i]->type == FFI_TYPE_FLOAT		||
320			inType->elements[i]->type == FFI_TYPE_DOUBLE	||
321			inType->elements[i]->type == FFI_TYPE_LONGDOUBLE)
322			containsFP = true;
323		else if (inType->elements[i]->type == FFI_TYPE_STRUCT)
324			containsFP = ffi64_struct_contains_fp(inType->elements[i]);
325	}
326
327	return containsFP;
328}
329
330#endif	// defined(__ppc64__)
331
332/* Perform machine dependent cif processing.  */
333ffi_status
334ffi_prep_cif_machdep(
335	ffi_cif*	cif)
336{
337	/* All this is for the DARWIN ABI.  */
338	int				i;
339	ffi_type**		ptr;
340	int				intarg_count = 0;
341	int				fparg_count = 0;
342	unsigned int	flags = 0;
343	unsigned int	size_al = 0;
344
345	/*	All the machine-independent calculation of cif->bytes will be wrong.
346		Redo the calculation for DARWIN.  */
347
348	/*	Space for the frame pointer, callee's LR, CR, etc, and for
349		the asm's temp regs.  */
350	unsigned int	bytes = (6 + ASM_NEEDS_REGISTERS) * sizeof(long);
351
352	/*	Return value handling.  The rules are as follows:
353		- 32-bit (or less) integer values are returned in gpr3;
354		- Structures of size <= 4 bytes also returned in gpr3;
355		- 64-bit integer values and structures between 5 and 8 bytes are
356			returned in gpr3 and gpr4;
357		- Single/double FP values are returned in fpr1;
358		- Long double FP (if not equivalent to double) values are returned in
359			fpr1 and fpr2;
360		- Larger structures values are allocated space and a pointer is passed
361			as the first argument.  */
362	switch (cif->rtype->type)
363	{
364#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
365		case FFI_TYPE_LONGDOUBLE:
366			flags |= FLAG_RETURNS_128BITS;
367			flags |= FLAG_RETURNS_FP;
368			break;
369#endif	// FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
370
371		case FFI_TYPE_DOUBLE:
372			flags |= FLAG_RETURNS_64BITS;
373			/* Fall through.  */
374		case FFI_TYPE_FLOAT:
375			flags |= FLAG_RETURNS_FP;
376			break;
377
378#if defined(__ppc64__)
379		case FFI_TYPE_POINTER:
380#endif
381		case FFI_TYPE_UINT64:
382		case FFI_TYPE_SINT64:
383			flags |= FLAG_RETURNS_64BITS;
384			break;
385
386		case FFI_TYPE_STRUCT:
387		{
388#if defined(__ppc64__)
389
390			if (ffi64_stret_needs_ptr(cif->rtype, NULL, NULL))
391			{
392				flags |= FLAG_RETVAL_REFERENCE;
393				flags |= FLAG_RETURNS_NOTHING;
394				intarg_count++;
395			}
396			else
397			{
398				flags |= FLAG_RETURNS_STRUCT;
399
400				if (ffi64_struct_contains_fp(cif->rtype))
401					flags |= FLAG_STRUCT_CONTAINS_FP;
402			}
403
404#elif defined(__ppc__)
405
406			flags |= FLAG_RETVAL_REFERENCE;
407			flags |= FLAG_RETURNS_NOTHING;
408			intarg_count++;
409
410#else
411#error undefined architecture
412#endif
413			break;
414		}
415
416		case FFI_TYPE_VOID:
417			flags |= FLAG_RETURNS_NOTHING;
418			break;
419
420		default:
421			/* Returns 32-bit integer, or similar.  Nothing to do here.  */
422			break;
423	}
424
425	/*	The first NUM_GPR_ARG_REGISTERS words of integer arguments, and the
426		first NUM_FPR_ARG_REGISTERS fp arguments, go in registers; the rest
427		goes on the stack.  Structures are passed as a pointer to a copy of
428		the structure. Stuff on the stack needs to keep proper alignment.  */
429	for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
430	{
431		switch ((*ptr)->type)
432		{
433			case FFI_TYPE_FLOAT:
434			case FFI_TYPE_DOUBLE:
435				fparg_count++;
436				/*	If this FP arg is going on the stack, it must be
437					8-byte-aligned.  */
438				if (fparg_count > NUM_FPR_ARG_REGISTERS
439					&& intarg_count % 2 != 0)
440					intarg_count++;
441				break;
442
443#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
444			case FFI_TYPE_LONGDOUBLE:
445				fparg_count += 2;
446				/*	If this FP arg is going on the stack, it must be
447					8-byte-aligned.  */
448
449				if (
450#if defined(__ppc64__)
451					fparg_count > NUM_FPR_ARG_REGISTERS + 1
452#elif defined(__ppc__)
453					fparg_count > NUM_FPR_ARG_REGISTERS
454#else
455#error undefined architecture
456#endif
457					&& intarg_count % 2 != 0)
458					intarg_count++;
459
460				intarg_count += 2;
461				break;
462#endif	// FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
463
464			case FFI_TYPE_UINT64:
465			case FFI_TYPE_SINT64:
466				/*	'long long' arguments are passed as two words, but
467					either both words must fit in registers or both go
468					on the stack.  If they go on the stack, they must
469					be 8-byte-aligned.  */
470				if (intarg_count == NUM_GPR_ARG_REGISTERS - 1
471					|| (intarg_count >= NUM_GPR_ARG_REGISTERS
472					&& intarg_count % 2 != 0))
473					intarg_count++;
474
475				intarg_count += MODE_CHOICE(2,1);
476
477				break;
478
479			case FFI_TYPE_STRUCT:
480				size_al = (*ptr)->size;
481				/*	If the first member of the struct is a double, then align
482					the struct to double-word.  */
483				if ((*ptr)->elements[0]->type == FFI_TYPE_DOUBLE)
484					size_al = ALIGN((*ptr)->size, 8);
485
486#if defined(__ppc64__)
487				// Look for FP struct members.
488				unsigned int	j;
489
490				for (j = 0; (*ptr)->elements[j] != NULL; j++)
491				{
492					if ((*ptr)->elements[j]->type == FFI_TYPE_FLOAT	||
493						(*ptr)->elements[j]->type == FFI_TYPE_DOUBLE)
494					{
495						fparg_count++;
496
497						if (fparg_count > NUM_FPR_ARG_REGISTERS)
498							intarg_count++;
499					}
500					else if ((*ptr)->elements[j]->type == FFI_TYPE_LONGDOUBLE)
501					{
502						fparg_count += 2;
503
504						if (fparg_count > NUM_FPR_ARG_REGISTERS + 1)
505							intarg_count += 2;
506					}
507					else
508						intarg_count++;
509				}
510#elif defined(__ppc__)
511				intarg_count += (size_al + 3) / 4;
512#else
513#error undefined architecture
514#endif
515
516				break;
517
518			default:
519				/*	Everything else is passed as a 4/8-byte word in a GPR, either
520					the object itself or a pointer to it.  */
521				intarg_count++;
522				break;
523		}
524	}
525
526	/* Space for the FPR registers, if needed.  */
527	if (fparg_count != 0)
528	{
529		flags |= FLAG_FP_ARGUMENTS;
530#if defined(__ppc64__)
531		bytes += (NUM_FPR_ARG_REGISTERS + 1) * sizeof(double);
532#elif defined(__ppc__)
533		bytes += NUM_FPR_ARG_REGISTERS * sizeof(double);
534#else
535#error undefined architecture
536#endif
537	}
538
539	/* Stack space.  */
540#if defined(__ppc64__)
541	if ((intarg_count + fparg_count) > NUM_GPR_ARG_REGISTERS)
542		bytes += (intarg_count + fparg_count) * sizeof(long);
543#elif defined(__ppc__)
544	if ((intarg_count + 2 * fparg_count) > NUM_GPR_ARG_REGISTERS)
545		bytes += (intarg_count + 2 * fparg_count) * sizeof(long);
546#else
547#error undefined architecture
548#endif
549	else
550		bytes += NUM_GPR_ARG_REGISTERS * sizeof(long);
551
552	/* The stack space allocated needs to be a multiple of 16/32 bytes.  */
553	bytes = SF_ROUND(bytes);
554
555	cif->flags = flags;
556	cif->bytes = bytes;
557
558	return FFI_OK;
559}
560
561/*@-declundef@*/
562/*@-exportheader@*/
563extern void
564ffi_call_AIX(
565/*@out@*/	extended_cif*,
566			unsigned,
567			unsigned,
568/*@out@*/	unsigned*,
569			void (*fn)(void),
570			void (*fn2)(extended_cif*, unsigned *const));
571
572extern void
573ffi_call_DARWIN(
574/*@out@*/	extended_cif*,
575			unsigned long,
576			unsigned,
577/*@out@*/	unsigned*,
578			void (*fn)(void),
579			void (*fn2)(extended_cif*, unsigned *const));
580/*@=declundef@*/
581/*@=exportheader@*/
582
583void
584ffi_call(
585/*@dependent@*/	ffi_cif*	cif,
586				void		(*fn)(void),
587/*@out@*/		void*		rvalue,
588/*@dependent@*/	void**		avalue)
589{
590	extended_cif ecif;
591
592	ecif.cif = cif;
593	ecif.avalue = avalue;
594
595	/*	If the return value is a struct and we don't have a return
596		value address then we need to make one.  */
597	if ((rvalue == NULL) &&
598		(cif->rtype->type == FFI_TYPE_STRUCT))
599	{
600		/*@-sysunrecog@*/
601		ecif.rvalue = alloca(cif->rtype->size);
602		/*@=sysunrecog@*/
603	}
604	else
605		ecif.rvalue = rvalue;
606
607	switch (cif->abi)
608	{
609		case FFI_AIX:
610			/*@-usedef@*/
611			ffi_call_AIX(&ecif, -cif->bytes,
612				cif->flags, ecif.rvalue, fn, ffi_prep_args);
613			/*@=usedef@*/
614			break;
615
616		case FFI_DARWIN:
617			/*@-usedef@*/
618			ffi_call_DARWIN(&ecif, -(long)cif->bytes,
619				cif->flags, ecif.rvalue, fn, ffi_prep_args);
620			/*@=usedef@*/
621			break;
622
623		default:
624			FFI_ASSERT(0);
625			break;
626    }
627}
628
629/* here I'd like to add the stack frame layout we use in darwin_closure.S
630   and aix_clsoure.S
631
632   SP previous -> +---------------------------------------+ <--- child frame
633		  | back chain to caller 4                |
634		  +---------------------------------------+ 4
635		  | saved CR 4                            |
636		  +---------------------------------------+ 8
637		  | saved LR 4                            |
638		  +---------------------------------------+ 12
639		  | reserved for compilers 4              |
640		  +---------------------------------------+ 16
641		  | reserved for binders 4                |
642		  +---------------------------------------+ 20
643		  | saved TOC pointer 4                   |
644		  +---------------------------------------+ 24
645		  | always reserved 8*4=32 (previous GPRs)|
646		  | according to the linkage convention   |
647		  | from AIX                              |
648		  +---------------------------------------+ 56
649		  | our FPR area 13*8=104                 |
650		  | f1                                    |
651		  | .                                     |
652		  | f13                                   |
653		  +---------------------------------------+ 160
654		  | result area 8                         |
655		  +---------------------------------------+ 168
656		  | alignement to the next multiple of 16 |
657SP current -->    +---------------------------------------+ 176 <- parent frame
658		  | back chain to caller 4                |
659		  +---------------------------------------+ 180
660		  | saved CR 4                            |
661		  +---------------------------------------+ 184
662		  | saved LR 4                            |
663		  +---------------------------------------+ 188
664		  | reserved for compilers 4              |
665		  +---------------------------------------+ 192
666		  | reserved for binders 4                |
667		  +---------------------------------------+ 196
668		  | saved TOC pointer 4                   |
669		  +---------------------------------------+ 200
670		  | always reserved 8*4=32  we store our  |
671		  | GPRs here                             |
672		  | r3                                    |
673		  | .                                     |
674		  | r10                                   |
675		  +---------------------------------------+ 232
676		  | overflow part                         |
677		  +---------------------------------------+ xxx
678		  | ????                                  |
679		  +---------------------------------------+ xxx
680*/
681
682#if !defined(POWERPC_DARWIN)
683
684#define MIN_LINE_SIZE 32
685
686static void
687flush_icache(
688	char*	addr)
689{
690#ifndef _AIX
691	__asm__ volatile (
692		"dcbf 0,%0\n"
693		"sync\n"
694		"icbi 0,%0\n"
695		"sync\n"
696		"isync"
697		: : "r" (addr) : "memory");
698#endif
699}
700
701static void
702flush_range(
703	char*	addr,
704	int		size)
705{
706	int i;
707
708	for (i = 0; i < size; i += MIN_LINE_SIZE)
709		flush_icache(addr + i);
710
711	flush_icache(addr + size - 1);
712}
713
714#endif	// !defined(POWERPC_DARWIN)
715
716ffi_status
717ffi_prep_closure(
718	ffi_closure*	closure,
719	ffi_cif*		cif,
720	void			(*fun)(ffi_cif*, void*, void**, void*),
721	void*			user_data)
722{
723	switch (cif->abi)
724	{
725		case FFI_DARWIN:
726		{
727			FFI_ASSERT (cif->abi == FFI_DARWIN);
728
729			unsigned int*	tramp = (unsigned int*)&closure->tramp[0];
730
731#if defined(__ppc64__)
732			tramp[0] = 0x7c0802a6;	//	mflr	r0
733			tramp[1] = 0x429f0005;	//	bcl		20,31,+0x8
734			tramp[2] = 0x7d6802a6;	//	mflr	r11
735			tramp[3] = 0x7c0803a6;	//	mtlr	r0
736			tramp[4] = 0xe98b0018;	//	ld		r12,24(r11)
737			tramp[5] = 0x7d8903a6;	//	mtctr	r12
738			tramp[6] = 0xe96b0020;	//	ld		r11,32(r11)
739			tramp[7] = 0x4e800420;	//	bctr
740			*(unsigned long*)&tramp[8] = (unsigned long)ffi_closure_ASM;
741			*(unsigned long*)&tramp[10] = (unsigned long)closure;
742#elif defined(__ppc__)
743			tramp[0] = 0x7c0802a6;	//	mflr	r0
744			tramp[1] = 0x429f0005;	//	bcl		20,31,+0x8
745			tramp[2] = 0x7d6802a6;	//	mflr	r11
746			tramp[3] = 0x7c0803a6;	//	mtlr	r0
747			tramp[4] = 0x818b0018;	//	lwz		r12,24(r11)
748			tramp[5] = 0x7d8903a6;	//	mtctr	r12
749			tramp[6] = 0x816b001c;	//	lwz		r11,28(r11)
750			tramp[7] = 0x4e800420;	//	bctr
751			tramp[8] = (unsigned long)ffi_closure_ASM;
752			tramp[9] = (unsigned long)closure;
753#else
754#error undefined architecture
755#endif
756
757			closure->cif = cif;
758			closure->fun = fun;
759			closure->user_data = user_data;
760
761			// Flush the icache. Only necessary on Darwin.
762#if defined(POWERPC_DARWIN)
763			if (sys_icache_invalidate) {
764				sys_icache_invalidate(closure->tramp, FFI_TRAMPOLINE_SIZE);
765			}
766#else
767			flush_range(closure->tramp, FFI_TRAMPOLINE_SIZE);
768#endif
769
770			break;
771		}
772
773		case FFI_AIX:
774		{
775			FFI_ASSERT (cif->abi == FFI_AIX);
776
777			ffi_aix_trampoline_struct*	tramp_aix =
778				(ffi_aix_trampoline_struct*)(closure->tramp);
779			aix_fd*	fd = (aix_fd*)(void*)ffi_closure_ASM;
780
781			tramp_aix->code_pointer = fd->code_pointer;
782			tramp_aix->toc = fd->toc;
783			tramp_aix->static_chain = closure;
784			closure->cif = cif;
785			closure->fun = fun;
786			closure->user_data = user_data;
787			break;
788		}
789
790		default:
791			return FFI_BAD_ABI;
792	}
793
794	return FFI_OK;
795}
796
797#if defined(__ppc__)
798	typedef double ldbits[2];
799
800	typedef union
801	{
802		ldbits lb;
803		long double ld;
804	} ldu;
805#endif
806
807/*	The trampoline invokes ffi_closure_ASM, and on entry, r11 holds the
808	address of the closure. After storing the registers that could possibly
809	contain parameters to be passed into the stack frame and setting up space
810	for a return value, ffi_closure_ASM invokes the following helper function
811	to do most of the work.  */
812int
813ffi_closure_helper_DARWIN(
814	ffi_closure*	closure,
815	void*			rvalue,
816	unsigned long*	pgr,
817	ffi_dblfl*		pfr)
818{
819	/*	rvalue is the pointer to space for return value in closure assembly
820		pgr is the pointer to where r3-r10 are stored in ffi_closure_ASM
821		pfr is the pointer to where f1-f13 are stored in ffi_closure_ASM.  */
822
823#if defined(__ppc__)
824	ldu	temp_ld;
825#endif
826
827	double				temp;
828	unsigned int		i;
829	unsigned int		nf = 0;	/* number of FPRs already used.  */
830	unsigned int		ng = 0;	/* number of GPRs already used.  */
831	ffi_cif*			cif = closure->cif;
832	unsigned int				avn = cif->nargs;
833	void**				avalue = alloca(cif->nargs * sizeof(void*));
834	ffi_type**			arg_types = cif->arg_types;
835
836	/*	Copy the caller's structure return value address so that the closure
837		returns the data directly to the caller.  */
838#if defined(__ppc64__)
839	if (cif->rtype->type == FFI_TYPE_STRUCT &&
840		ffi64_stret_needs_ptr(cif->rtype, NULL, NULL))
841#elif defined(__ppc__)
842	if (cif->rtype->type == FFI_TYPE_STRUCT)
843#else
844#error undefined architecture
845#endif
846	{
847		rvalue = (void*)*pgr;
848		pgr++;
849		ng++;
850	}
851
852	/* Grab the addresses of the arguments from the stack frame.  */
853	for (i = 0; i < avn; i++)
854	{
855		switch (arg_types[i]->type)
856		{
857			case FFI_TYPE_SINT8:
858			case FFI_TYPE_UINT8:
859				avalue[i] = (char*)pgr + MODE_CHOICE(3,7);
860				ng++;
861				pgr++;
862				break;
863
864			case FFI_TYPE_SINT16:
865			case FFI_TYPE_UINT16:
866				avalue[i] = (char*)pgr + MODE_CHOICE(2,6);
867				ng++;
868				pgr++;
869				break;
870
871#if defined(__ppc__)
872			case FFI_TYPE_POINTER:
873#endif
874			case FFI_TYPE_SINT32:
875			case FFI_TYPE_UINT32:
876				avalue[i] = (char*)pgr + MODE_CHOICE(0,4);
877				ng++;
878				pgr++;
879
880				break;
881
882			case FFI_TYPE_STRUCT:
883				if (cif->abi == FFI_DARWIN)
884				{
885#if defined(__ppc64__)
886					unsigned int	gprSize = 0;
887					unsigned int	fprSize	= 0;
888					unsigned int	savedFPRSize = fprSize;
889
890					avalue[i] = alloca(arg_types[i]->size);
891					ffi64_struct_to_ram_form(arg_types[i], (const char*)pgr,
892						&gprSize, (const char*)pfr, &fprSize, &nf, avalue[i], NULL);
893
894					ng	+= gprSize / sizeof(long);
895					pgr	+= gprSize / sizeof(long);
896					pfr	+= (fprSize - savedFPRSize) / sizeof(double);
897
898#elif defined(__ppc__)
899					/*	Structures that match the basic modes (QI 1 byte, HI 2 bytes,
900						SI 4 bytes) are aligned as if they were those modes.  */
901					unsigned int	size_al	= size_al = arg_types[i]->size;
902
903					/*	If the first member of the struct is a double, then align
904						the struct to double-word.  */
905					if (arg_types[i]->elements[0]->type == FFI_TYPE_DOUBLE)
906						size_al = ALIGN(arg_types[i]->size, 8);
907
908					if (size_al < 3)
909						avalue[i] = (char*)pgr + MODE_CHOICE(4,8) - size_al;
910					else
911						avalue[i] = (char*)pgr;
912
913					ng	+= (size_al + 3) / sizeof(long);
914					pgr += (size_al + 3) / sizeof(long);
915#else
916#error undefined architecture
917#endif
918				}
919
920				break;
921
922#if defined(__ppc64__)
923			case FFI_TYPE_POINTER:
924#endif
925			case FFI_TYPE_SINT64:
926			case FFI_TYPE_UINT64:
927				/* Long long ints are passed in 1 or 2 GPRs.  */
928				avalue[i] = pgr;
929				ng += MODE_CHOICE(2,1);
930				pgr += MODE_CHOICE(2,1);
931
932				break;
933
934			case FFI_TYPE_FLOAT:
935				/*	A float value consumes a GPR.
936					There are 13 64-bit floating point registers.  */
937				if (nf < NUM_FPR_ARG_REGISTERS)
938				{
939					temp = pfr->d;
940					pfr->f = (float)temp;
941					avalue[i] = pfr;
942					pfr++;
943				}
944				else
945					avalue[i] = pgr;
946
947				nf++;
948				ng++;
949				pgr++;
950				break;
951
952			case FFI_TYPE_DOUBLE:
953				/*	A double value consumes one or two GPRs.
954					There are 13 64bit floating point registers.  */
955				if (nf < NUM_FPR_ARG_REGISTERS)
956				{
957					avalue[i] = pfr;
958					pfr++;
959				}
960				else
961					avalue[i] = pgr;
962
963				nf++;
964				ng += MODE_CHOICE(2,1);
965				pgr += MODE_CHOICE(2,1);
966
967				break;
968
969#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
970
971			case FFI_TYPE_LONGDOUBLE:
972#if defined(__ppc64__)
973				if (nf < NUM_FPR_ARG_REGISTERS)
974				{
975					avalue[i] = pfr;
976					pfr += 2;
977				}
978#elif defined(__ppc__)
979				/*	A long double value consumes 2/4 GPRs and 2 FPRs.
980					There are 13 64bit floating point registers.  */
981				if (nf < NUM_FPR_ARG_REGISTERS - 1)
982				{
983					avalue[i] = pfr;
984					pfr += 2;
985				}
986				/*	Here we have the situation where one part of the long double
987					is stored in fpr13 and the other part is already on the stack.
988					We use a union to pass the long double to avalue[i].  */
989				else if (nf == NUM_FPR_ARG_REGISTERS - 1)
990				{
991					memcpy (&temp_ld.lb[0], pfr, sizeof(ldbits));
992					memcpy (&temp_ld.lb[1], pgr + 2, sizeof(ldbits));
993					avalue[i] = &temp_ld.ld;
994				}
995#else
996#error undefined architecture
997#endif
998				else
999					avalue[i] = pgr;
1000
1001				nf += 2;
1002				ng += MODE_CHOICE(4,2);
1003				pgr += MODE_CHOICE(4,2);
1004
1005				break;
1006
1007#endif	/*	FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE	*/
1008
1009			default:
1010				FFI_ASSERT(0);
1011				break;
1012		}
1013	}
1014
1015	(closure->fun)(cif, rvalue, avalue, closure->user_data);
1016
1017	/* Tell ffi_closure_ASM to perform return type promotions.  */
1018	return cif->rtype->type;
1019}
1020
1021#if defined(__ppc64__)
1022
1023/*	ffi64_struct_to_ram_form
1024
1025	Rebuild a struct's natural layout from buffers of concatenated registers.
1026	Return the number of registers used.
1027	inGPRs[0-7] == r3, inFPRs[0-7] == f1 ...
1028*/
1029void
1030ffi64_struct_to_ram_form(
1031	const ffi_type*	inType,
1032	const char*		inGPRs,
1033	unsigned int*	ioGPRMarker,
1034	const char*		inFPRs,
1035	unsigned int*	ioFPRMarker,
1036	unsigned int*	ioFPRsUsed,
1037	char*			outStruct,	// caller-allocated
1038	unsigned int*	ioStructMarker)
1039{
1040	unsigned int	srcGMarker		= 0;
1041	unsigned int	srcFMarker		= 0;
1042	unsigned int	savedFMarker	= 0;
1043	unsigned int	fprsUsed		= 0;
1044	unsigned int	savedFPRsUsed	= 0;
1045	unsigned int	destMarker		= 0;
1046
1047	static unsigned int	recurseCount	= 0;
1048
1049	if (ioGPRMarker)
1050		srcGMarker	= *ioGPRMarker;
1051
1052	if (ioFPRMarker)
1053	{
1054		srcFMarker		= *ioFPRMarker;
1055		savedFMarker	= srcFMarker;
1056	}
1057
1058	if (ioFPRsUsed)
1059	{
1060		fprsUsed		= *ioFPRsUsed;
1061		savedFPRsUsed	= fprsUsed;
1062	}
1063
1064	if (ioStructMarker)
1065		destMarker	= *ioStructMarker;
1066
1067	size_t			i;
1068
1069	switch (inType->size)
1070	{
1071		case 1: case 2: case 4:
1072			srcGMarker += 8 - inType->size;
1073			break;
1074
1075		default:
1076			break;
1077	}
1078
1079	for (i = 0; inType->elements[i] != NULL; i++)
1080	{
1081		switch (inType->elements[i]->type)
1082		{
1083			case FFI_TYPE_FLOAT:
1084				srcFMarker = ALIGN(srcFMarker, 4);
1085				srcGMarker = ALIGN(srcGMarker, 4);
1086				destMarker = ALIGN(destMarker, 4);
1087
1088				if (fprsUsed < NUM_FPR_ARG_REGISTERS)
1089				{
1090					*(float*)&outStruct[destMarker]	=
1091						(float)*(double*)&inFPRs[srcFMarker];
1092					srcFMarker += 8;
1093					fprsUsed++;
1094				}
1095				else
1096					*(float*)&outStruct[destMarker]	=
1097						(float)*(double*)&inGPRs[srcGMarker];
1098
1099				srcGMarker += 4;
1100				destMarker += 4;
1101
1102				// Skip to next GPR if next element won't fit and we're
1103				// not already at a register boundary.
1104				if (inType->elements[i + 1] != NULL && (destMarker % 8))
1105				{
1106					if (!FFI_TYPE_1_BYTE(inType->elements[i + 1]->type) &&
1107						(!FFI_TYPE_2_BYTE(inType->elements[i + 1]->type) ||
1108						(ALIGN(srcGMarker, 8) - srcGMarker) < 2) &&
1109						(!FFI_TYPE_4_BYTE(inType->elements[i + 1]->type) ||
1110						(ALIGN(srcGMarker, 8) - srcGMarker) < 4))
1111						srcGMarker	= ALIGN(srcGMarker, 8);
1112				}
1113
1114				break;
1115
1116			case FFI_TYPE_DOUBLE:
1117				srcFMarker = ALIGN(srcFMarker, 8);
1118				destMarker = ALIGN(destMarker, 8);
1119
1120				if (fprsUsed < NUM_FPR_ARG_REGISTERS)
1121				{
1122					*(double*)&outStruct[destMarker]	=
1123						*(double*)&inFPRs[srcFMarker];
1124					srcFMarker += 8;
1125					fprsUsed++;
1126				}
1127				else
1128					*(double*)&outStruct[destMarker]	=
1129						*(double*)&inGPRs[srcGMarker];
1130
1131				destMarker += 8;
1132
1133				// Skip next GPR
1134				srcGMarker += 8;
1135				srcGMarker = ALIGN(srcGMarker, 8);
1136
1137				break;
1138
1139			case FFI_TYPE_LONGDOUBLE:
1140				destMarker = ALIGN(destMarker, 16);
1141
1142				if (fprsUsed < NUM_FPR_ARG_REGISTERS)
1143				{
1144					srcFMarker = ALIGN(srcFMarker, 8);
1145					srcGMarker = ALIGN(srcGMarker, 8);
1146					*(long double*)&outStruct[destMarker]	=
1147						*(long double*)&inFPRs[srcFMarker];
1148					srcFMarker += 16;
1149					fprsUsed += 2;
1150				}
1151				else
1152				{
1153					srcFMarker = ALIGN(srcFMarker, 16);
1154					srcGMarker = ALIGN(srcGMarker, 16);
1155					*(long double*)&outStruct[destMarker]	=
1156						*(long double*)&inGPRs[srcGMarker];
1157				}
1158
1159				destMarker += 16;
1160
1161				// Skip next 2 GPRs
1162				srcGMarker += 16;
1163				srcGMarker = ALIGN(srcGMarker, 8);
1164
1165				break;
1166
1167			case FFI_TYPE_UINT8:
1168			case FFI_TYPE_SINT8:
1169			{
1170				if (inType->alignment == 1)	// chars only
1171				{
1172					if (inType->size == 1)
1173						outStruct[destMarker++] = inGPRs[srcGMarker++];
1174					else if (inType->size == 2)
1175					{
1176						outStruct[destMarker++] = inGPRs[srcGMarker++];
1177						outStruct[destMarker++] = inGPRs[srcGMarker++];
1178						i++;
1179					}
1180					else
1181					{
1182						memcpy(&outStruct[destMarker],
1183							&inGPRs[srcGMarker], inType->size);
1184						srcGMarker += inType->size;
1185						destMarker += inType->size;
1186						i += inType->size - 1;
1187					}
1188				}
1189				else	// chars and other stuff
1190				{
1191					outStruct[destMarker++] = inGPRs[srcGMarker++];
1192
1193					// Skip to next GPR if next element won't fit and we're
1194					// not already at a register boundary.
1195					if (inType->elements[i + 1] != NULL && (srcGMarker % 8))
1196					{
1197						if (!FFI_TYPE_1_BYTE(inType->elements[i + 1]->type) &&
1198							(!FFI_TYPE_2_BYTE(inType->elements[i + 1]->type) ||
1199							(ALIGN(srcGMarker, 8) - srcGMarker) < 2) &&
1200							(!FFI_TYPE_4_BYTE(inType->elements[i + 1]->type) ||
1201							(ALIGN(srcGMarker, 8) - srcGMarker) < 4))
1202							srcGMarker	= ALIGN(srcGMarker, inType->alignment);	// was 8
1203					}
1204				}
1205
1206				break;
1207			}
1208
1209			case FFI_TYPE_UINT16:
1210			case FFI_TYPE_SINT16:
1211				srcGMarker = ALIGN(srcGMarker, 2);
1212				destMarker = ALIGN(destMarker, 2);
1213
1214				*(short*)&outStruct[destMarker] =
1215					*(short*)&inGPRs[srcGMarker];
1216				srcGMarker += 2;
1217				destMarker += 2;
1218
1219				break;
1220
1221			case FFI_TYPE_INT:
1222			case FFI_TYPE_UINT32:
1223			case FFI_TYPE_SINT32:
1224				srcGMarker = ALIGN(srcGMarker, 4);
1225				destMarker = ALIGN(destMarker, 4);
1226
1227				*(int*)&outStruct[destMarker] =
1228					*(int*)&inGPRs[srcGMarker];
1229				srcGMarker += 4;
1230				destMarker += 4;
1231
1232				break;
1233
1234			case FFI_TYPE_POINTER:
1235			case FFI_TYPE_UINT64:
1236			case FFI_TYPE_SINT64:
1237				srcGMarker = ALIGN(srcGMarker, 8);
1238				destMarker = ALIGN(destMarker, 8);
1239
1240				*(long long*)&outStruct[destMarker] =
1241					*(long long*)&inGPRs[srcGMarker];
1242				srcGMarker += 8;
1243				destMarker += 8;
1244
1245				break;
1246
1247			case FFI_TYPE_STRUCT:
1248				recurseCount++;
1249				ffi64_struct_to_ram_form(inType->elements[i], inGPRs,
1250					&srcGMarker, inFPRs, &srcFMarker, &fprsUsed,
1251					outStruct, &destMarker);
1252				recurseCount--;
1253				break;
1254
1255			default:
1256				FFI_ASSERT(0);	// unknown element type
1257				break;
1258		}
1259	}
1260
1261	srcGMarker = ALIGN(srcGMarker, inType->alignment);
1262
1263	// Take care of the special case for 16-byte structs, but not for
1264	// nested structs.
1265	if (recurseCount == 0 && srcGMarker == 16)
1266	{
1267		*(long double*)&outStruct[0] = *(long double*)&inGPRs[0];
1268		srcFMarker	= savedFMarker;
1269		fprsUsed	= savedFPRsUsed;
1270	}
1271
1272	if (ioGPRMarker)
1273		*ioGPRMarker = ALIGN(srcGMarker, 8);
1274
1275	if (ioFPRMarker)
1276		*ioFPRMarker = srcFMarker;
1277
1278	if (ioFPRsUsed)
1279		*ioFPRsUsed	= fprsUsed;
1280
1281	if (ioStructMarker)
1282		*ioStructMarker	= ALIGN(destMarker, 8);
1283}
1284
1285/*	ffi64_struct_to_reg_form
1286
1287	Copy a struct's elements into buffers that can be sliced into registers.
1288	Return the sizes of the output buffers in bytes. Pass NULL buffer pointers
1289	to calculate size only.
1290	outGPRs[0-7] == r3, outFPRs[0-7] == f1 ...
1291*/
1292void
1293ffi64_struct_to_reg_form(
1294	const ffi_type*	inType,
1295	const char*		inStruct,
1296	unsigned int*	ioStructMarker,
1297	unsigned int*	ioFPRsUsed,
1298	char*			outGPRs,	// caller-allocated
1299	unsigned int*	ioGPRSize,
1300	char*			outFPRs,	// caller-allocated
1301	unsigned int*	ioFPRSize)
1302{
1303	size_t			i;
1304	unsigned int	srcMarker		= 0;
1305	unsigned int	destGMarker		= 0;
1306	unsigned int	destFMarker		= 0;
1307	unsigned int	savedFMarker	= 0;
1308	unsigned int	fprsUsed		= 0;
1309	unsigned int	savedFPRsUsed	= 0;
1310
1311	static unsigned int	recurseCount	= 0;
1312
1313	if (ioStructMarker)
1314		srcMarker	= *ioStructMarker;
1315
1316	if (ioFPRsUsed)
1317	{
1318		fprsUsed		= *ioFPRsUsed;
1319		savedFPRsUsed	= fprsUsed;
1320	}
1321
1322	if (ioGPRSize)
1323		destGMarker	= *ioGPRSize;
1324
1325	if (ioFPRSize)
1326	{
1327		destFMarker		= *ioFPRSize;
1328		savedFMarker	= destFMarker;
1329	}
1330
1331	switch (inType->size)
1332	{
1333		case 1: case 2: case 4:
1334			destGMarker += 8 - inType->size;
1335			break;
1336
1337		default:
1338			break;
1339	}
1340
1341	for (i = 0; inType->elements[i] != NULL; i++)
1342	{
1343		switch (inType->elements[i]->type)
1344		{
1345			// Shadow floating-point types in GPRs for vararg and pre-ANSI
1346			// functions.
1347			case FFI_TYPE_FLOAT:
1348				// Nudge markers to next 4/8-byte boundary
1349				srcMarker = ALIGN(srcMarker, 4);
1350				destGMarker = ALIGN(destGMarker, 4);
1351				destFMarker = ALIGN(destFMarker, 8);
1352
1353				if (fprsUsed < NUM_FPR_ARG_REGISTERS)
1354				{
1355					if (outFPRs != NULL && inStruct != NULL)
1356						*(double*)&outFPRs[destFMarker] =
1357							(double)*(float*)&inStruct[srcMarker];
1358
1359					destFMarker += 8;
1360					fprsUsed++;
1361				}
1362
1363				if (outGPRs != NULL && inStruct != NULL)
1364					*(double*)&outGPRs[destGMarker] =
1365						(double)*(float*)&inStruct[srcMarker];
1366
1367				srcMarker += 4;
1368				destGMarker += 4;
1369
1370				// Skip to next GPR if next element won't fit and we're
1371				// not already at a register boundary.
1372				if (inType->elements[i + 1] != NULL && (srcMarker % 8))
1373				{
1374					if (!FFI_TYPE_1_BYTE(inType->elements[i + 1]->type) &&
1375						(!FFI_TYPE_2_BYTE(inType->elements[i + 1]->type) ||
1376						(ALIGN(destGMarker, 8) - destGMarker) < 2) &&
1377						(!FFI_TYPE_4_BYTE(inType->elements[i + 1]->type) ||
1378						(ALIGN(destGMarker, 8) - destGMarker) < 4))
1379						destGMarker	= ALIGN(destGMarker, 8);
1380				}
1381
1382				break;
1383
1384			case FFI_TYPE_DOUBLE:
1385				srcMarker = ALIGN(srcMarker, 8);
1386				destFMarker = ALIGN(destFMarker, 8);
1387
1388				if (fprsUsed < NUM_FPR_ARG_REGISTERS)
1389				{
1390					if (outFPRs != NULL && inStruct != NULL)
1391						*(double*)&outFPRs[destFMarker] =
1392							*(double*)&inStruct[srcMarker];
1393
1394					destFMarker += 8;
1395					fprsUsed++;
1396				}
1397
1398				if (outGPRs != NULL && inStruct != NULL)
1399					*(double*)&outGPRs[destGMarker] =
1400						*(double*)&inStruct[srcMarker];
1401
1402				srcMarker += 8;
1403
1404				// Skip next GPR
1405				destGMarker += 8;
1406				destGMarker = ALIGN(destGMarker, 8);
1407
1408				break;
1409
1410			case FFI_TYPE_LONGDOUBLE:
1411				srcMarker = ALIGN(srcMarker, 16);
1412
1413				if (fprsUsed < NUM_FPR_ARG_REGISTERS)
1414				{
1415					destFMarker = ALIGN(destFMarker, 8);
1416					destGMarker = ALIGN(destGMarker, 8);
1417
1418					if (outFPRs != NULL && inStruct != NULL)
1419						*(long double*)&outFPRs[destFMarker] =
1420							*(long double*)&inStruct[srcMarker];
1421
1422					if (outGPRs != NULL && inStruct != NULL)
1423						*(long double*)&outGPRs[destGMarker] =
1424							*(long double*)&inStruct[srcMarker];
1425
1426					destFMarker += 16;
1427					fprsUsed += 2;
1428				}
1429				else
1430				{
1431				 	destGMarker = ALIGN(destGMarker, 16);
1432
1433					 if (outGPRs != NULL && inStruct != NULL)
1434						*(long double*)&outGPRs[destGMarker] =
1435							*(long double*)&inStruct[srcMarker];
1436				}
1437
1438				srcMarker += 16;
1439				destGMarker += 16;	// Skip next 2 GPRs
1440				destGMarker = ALIGN(destGMarker, 8);	// was 16
1441
1442				break;
1443
1444			case FFI_TYPE_UINT8:
1445			case FFI_TYPE_SINT8:
1446				if (inType->alignment == 1)	// bytes only
1447				{
1448					if (inType->size == 1)
1449					{
1450						if (outGPRs != NULL && inStruct != NULL)
1451							outGPRs[destGMarker] = inStruct[srcMarker];
1452
1453						srcMarker++;
1454						destGMarker++;
1455					}
1456					else if (inType->size == 2)
1457					{
1458						if (outGPRs != NULL && inStruct != NULL)
1459						{
1460							outGPRs[destGMarker] = inStruct[srcMarker];
1461							outGPRs[destGMarker + 1] = inStruct[srcMarker + 1];
1462						}
1463
1464						srcMarker += 2;
1465						destGMarker += 2;
1466
1467						i++;
1468					}
1469					else
1470					{
1471						if (outGPRs != NULL && inStruct != NULL)
1472						{
1473							// Avoid memcpy for small chunks.
1474							if (inType->size <= sizeof(long))
1475								*(long*)&outGPRs[destGMarker] =
1476									*(long*)&inStruct[srcMarker];
1477							else
1478								memcpy(&outGPRs[destGMarker],
1479									&inStruct[srcMarker], inType->size);
1480						}
1481
1482						srcMarker += inType->size;
1483						destGMarker += inType->size;
1484						i += inType->size - 1;
1485					}
1486				}
1487				else	// bytes and other stuff
1488				{
1489					if (outGPRs != NULL && inStruct != NULL)
1490						outGPRs[destGMarker] = inStruct[srcMarker];
1491
1492					srcMarker++;
1493					destGMarker++;
1494
1495					// Skip to next GPR if next element won't fit and we're
1496					// not already at a register boundary.
1497					if (inType->elements[i + 1] != NULL && (destGMarker % 8))
1498					{
1499						if (!FFI_TYPE_1_BYTE(inType->elements[i + 1]->type) &&
1500							(!FFI_TYPE_2_BYTE(inType->elements[i + 1]->type) ||
1501							(ALIGN(destGMarker, 8) - destGMarker) < 2) &&
1502							(!FFI_TYPE_4_BYTE(inType->elements[i + 1]->type) ||
1503							(ALIGN(destGMarker, 8) - destGMarker) < 4))
1504							destGMarker	= ALIGN(destGMarker, inType->alignment);	// was 8
1505					}
1506				}
1507
1508				break;
1509
1510			case FFI_TYPE_UINT16:
1511			case FFI_TYPE_SINT16:
1512				srcMarker = ALIGN(srcMarker, 2);
1513				destGMarker = ALIGN(destGMarker, 2);
1514
1515				if (outGPRs != NULL && inStruct != NULL)
1516					*(short*)&outGPRs[destGMarker] =
1517						*(short*)&inStruct[srcMarker];
1518
1519				srcMarker += 2;
1520				destGMarker += 2;
1521
1522				if (inType->elements[i + 1] == NULL)
1523					destGMarker	= ALIGN(destGMarker, inType->alignment);
1524
1525				break;
1526
1527			case FFI_TYPE_INT:
1528			case FFI_TYPE_UINT32:
1529			case FFI_TYPE_SINT32:
1530				srcMarker = ALIGN(srcMarker, 4);
1531				destGMarker = ALIGN(destGMarker, 4);
1532
1533				if (outGPRs != NULL && inStruct != NULL)
1534					*(int*)&outGPRs[destGMarker] =
1535						*(int*)&inStruct[srcMarker];
1536
1537				srcMarker += 4;
1538				destGMarker += 4;
1539
1540				break;
1541
1542			case FFI_TYPE_POINTER:
1543			case FFI_TYPE_UINT64:
1544			case FFI_TYPE_SINT64:
1545				srcMarker = ALIGN(srcMarker, 8);
1546				destGMarker = ALIGN(destGMarker, 8);
1547
1548				if (outGPRs != NULL && inStruct != NULL)
1549					*(long long*)&outGPRs[destGMarker] =
1550						*(long long*)&inStruct[srcMarker];
1551
1552				srcMarker += 8;
1553				destGMarker += 8;
1554
1555				if (inType->elements[i + 1] == NULL)
1556					destGMarker	= ALIGN(destGMarker, inType->alignment);
1557
1558				break;
1559
1560			case FFI_TYPE_STRUCT:
1561				recurseCount++;
1562				ffi64_struct_to_reg_form(inType->elements[i],
1563					inStruct, &srcMarker, &fprsUsed, outGPRs,
1564					&destGMarker, outFPRs, &destFMarker);
1565				recurseCount--;
1566				break;
1567
1568			default:
1569				FFI_ASSERT(0);
1570				break;
1571		}
1572	}
1573
1574	destGMarker	= ALIGN(destGMarker, inType->alignment);
1575
1576	// Take care of the special case for 16-byte structs, but not for
1577	// nested structs.
1578	if (recurseCount == 0 && destGMarker == 16)
1579	{
1580		if (outGPRs != NULL && inStruct != NULL)
1581			*(long double*)&outGPRs[0] = *(long double*)&inStruct[0];
1582
1583		destFMarker	= savedFMarker;
1584		fprsUsed	= savedFPRsUsed;
1585	}
1586
1587	if (ioStructMarker)
1588		*ioStructMarker	= ALIGN(srcMarker, 8);
1589
1590	if (ioFPRsUsed)
1591		*ioFPRsUsed	= fprsUsed;
1592
1593	if (ioGPRSize)
1594		*ioGPRSize = ALIGN(destGMarker, 8);
1595
1596	if (ioFPRSize)
1597		*ioFPRSize = ALIGN(destFMarker, 8);
1598}
1599
1600/*	ffi64_stret_needs_ptr
1601
1602	Determine whether a returned struct needs a pointer in r3 or can fit
1603	in registers.
1604*/
1605
1606bool
1607ffi64_stret_needs_ptr(
1608	const ffi_type*	inType,
1609	unsigned short*	ioGPRCount,
1610	unsigned short*	ioFPRCount)
1611{
1612	// Obvious case first- struct is larger than combined FPR size.
1613	if (inType->size > 14 * 8)
1614		return true;
1615
1616	// Now the struct can physically fit in registers, determine if it
1617	// also fits logically.
1618	bool			needsPtr	= false;
1619	unsigned short	gprsUsed	= 0;
1620	unsigned short	fprsUsed	= 0;
1621	size_t			i;
1622
1623	if (ioGPRCount)
1624		gprsUsed = *ioGPRCount;
1625
1626	if (ioFPRCount)
1627		fprsUsed = *ioFPRCount;
1628
1629	for (i = 0; inType->elements[i] != NULL && !needsPtr; i++)
1630	{
1631		switch (inType->elements[i]->type)
1632		{
1633			case FFI_TYPE_FLOAT:
1634			case FFI_TYPE_DOUBLE:
1635				gprsUsed++;
1636				fprsUsed++;
1637
1638				if (fprsUsed > 13)
1639					needsPtr = true;
1640
1641				break;
1642
1643			case FFI_TYPE_LONGDOUBLE:
1644				gprsUsed += 2;
1645				fprsUsed += 2;
1646
1647				if (fprsUsed > 14)
1648					needsPtr = true;
1649
1650				break;
1651
1652			case FFI_TYPE_UINT8:
1653			case FFI_TYPE_SINT8:
1654			{
1655				gprsUsed++;
1656
1657				if (gprsUsed > 8)
1658				{
1659					needsPtr = true;
1660					break;
1661				}
1662
1663				if (inType->elements[i + 1] == NULL)	// last byte in the struct
1664					break;
1665
1666				// Count possible contiguous bytes ahead, up to 8.
1667				unsigned short j;
1668
1669				for (j = 1; j < 8; j++)
1670				{
1671					if (inType->elements[i + j] == NULL ||
1672						!FFI_TYPE_1_BYTE(inType->elements[i + j]->type))
1673						break;
1674				}
1675
1676				i += j - 1;	// allow for i++ before the test condition
1677
1678				break;
1679			}
1680
1681			case FFI_TYPE_UINT16:
1682			case FFI_TYPE_SINT16:
1683			case FFI_TYPE_INT:
1684			case FFI_TYPE_UINT32:
1685			case FFI_TYPE_SINT32:
1686			case FFI_TYPE_POINTER:
1687			case FFI_TYPE_UINT64:
1688			case FFI_TYPE_SINT64:
1689				gprsUsed++;
1690
1691				if (gprsUsed > 8)
1692					needsPtr = true;
1693
1694				break;
1695
1696			case FFI_TYPE_STRUCT:
1697				needsPtr = ffi64_stret_needs_ptr(
1698					inType->elements[i], &gprsUsed, &fprsUsed);
1699
1700				break;
1701
1702			default:
1703				FFI_ASSERT(0);
1704				break;
1705		}
1706	}
1707
1708	if (ioGPRCount)
1709		*ioGPRCount = gprsUsed;
1710
1711	if (ioFPRCount)
1712		*ioFPRCount = fprsUsed;
1713
1714	return needsPtr;
1715}
1716
1717/*	ffi64_data_size
1718
1719	Calculate the size in bytes of an ffi type.
1720*/
1721
1722unsigned int
1723ffi64_data_size(
1724	const ffi_type*	inType)
1725{
1726	unsigned int	size = 0;
1727
1728	switch (inType->type)
1729	{
1730		case FFI_TYPE_UINT8:
1731		case FFI_TYPE_SINT8:
1732			size = 1;
1733			break;
1734
1735		case FFI_TYPE_UINT16:
1736		case FFI_TYPE_SINT16:
1737			size = 2;
1738			break;
1739
1740		case FFI_TYPE_INT:
1741		case FFI_TYPE_UINT32:
1742		case FFI_TYPE_SINT32:
1743		case FFI_TYPE_FLOAT:
1744			size = 4;
1745			break;
1746
1747		case FFI_TYPE_POINTER:
1748		case FFI_TYPE_UINT64:
1749		case FFI_TYPE_SINT64:
1750		case FFI_TYPE_DOUBLE:
1751			size = 8;
1752			break;
1753
1754		case FFI_TYPE_LONGDOUBLE:
1755			size = 16;
1756			break;
1757
1758		case FFI_TYPE_STRUCT:
1759			ffi64_struct_to_reg_form(
1760				inType, NULL, NULL, NULL, NULL, &size, NULL, NULL);
1761			break;
1762
1763		case FFI_TYPE_VOID:
1764			break;
1765
1766		default:
1767			FFI_ASSERT(0);
1768			break;
1769	}
1770
1771	return size;
1772}
1773
1774#endif	/*	defined(__ppc64__)	*/
1775#endif	/* __ppc__ || __ppc64__ */
1776