1/*
2 * Copyright (c) 2002-2007 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28
29/* Emulate64.s
30 *
31 * Software emulation of instructions not handled in hw, on 64-bit machines.
32 */
33
34#include <sys/appleapiopts.h>
35#include <ppc/asm.h>
36#include <ppc/proc_reg.h>
37#include <ppc/exception.h>
38#include <mach/machine/vm_param.h>
39#include <ppc/cpu_capabilities.h>
40#include <assym.s>
41
42// CR bit set if the instruction is an "update" form (LFDU, STWU, etc):
43#define	kUpdate	25
44
45// CR bit set if interrupt occured in trace mode (ie, MSR_SE_BIT):
46#define kTrace	8
47
48// CR bit set if notification on alignment interrupts is requested (notifyUnalignbit in spcFlags):
49#define	kNotify	9
50
51// CR bit distinguishes between alignment and program exceptions:
52#define	kAlignment	10
53
54
55
56// *************************************
57// * P R O G R A M   I N T E R R U P T *
58// *************************************
59//
60// These are floating pt exceptions, illegal instructions, privileged mode violations,
61// and traps.  All we're interested in at this low level is illegal instructions.
62// The ones we "emulate" are:
63//		DCBA,  which is not implemented in the IBM 970.  The emulation is to ignore it,
64//			   as it is just a hint.
65//		MCRXR, which is not implemented on the IBM 970, but is in the PPC ISA.
66//
67// Additionally, to facilitate debugging the alignment handler, we recognize a special
68// diagnostic mode that is used to simulate alignment exceptions.  When in this mode,
69// if the instruction has opcode==0 and the extended opcode is one of the X-form
70// instructions that can take an alignment interrupt, then we change the opcode to
71// 31 and pretend it got an alignment interrupt.  This exercises paths that
72// are hard to drive or perhaps never driven on this particular CPU.
73
74        .text
75        .globl	EXT(Emulate64)
76        .align	5
77LEXT(Emulate64)
78        crclr	kAlignment						// not an alignment exception
79        b		a64AlignAssistJoin				// join alignment handler
80
81
82// Return from alignment handler with all the regs loaded for opcode emulation.
83
84a64HandleProgramInt:
85        rlwinm.	r0,r29,0,SRR1_PRG_ILL_INS_BIT,SRR1_PRG_ILL_INS_BIT	// illegal opcode?
86        beq		a64PassAlong					// No, must have been trap or priv violation etc
87        rlwinm	r3,r20,6,26,31					// right justify opcode field (bits 0-5)
88        rlwinm	r4,r20,31,22,31					// right justify extended opcode field (bits 21-30)
89        cmpwi	cr0,r3,31						// X-form?
90        cmpwi	cr1,r4,758						// DCBA?
91        cmpwi	cr4,r4,512						// MCRXR?
92        crand	cr1_eq,cr0_eq,cr1_eq			// merge the two tests for DCBA
93        crand	cr4_eq,cr0_eq,cr4_eq			// and for MCRXR
94        beq++	cr1_eq,a64ExitEm				// was DCBA, so ignore
95        bne--	cr4_eq,a64NotEmulated			// skip if not MCRXR
96
97// Was MCRXR, so emulate.
98
99        ld		r3,savexer(r13)					// get the XER
100        lwz		r4,savecr(r13)					// and the CR
101        rlwinm	r5,r20,11,27,29					// get (CR# * 4) from instruction
102        rlwinm	r6,r3,0,4,31					// zero XER[32-35] (also XER[0-31])
103        sld		r4,r4,r5						// move target CR field to bits 32-35
104        rlwimi	r4,r3,0,0,3						// move XER[32-35] into CR field
105        stw		r6,savexer+4(r13)				// update XER
106        srd		r4,r4,r5						// re-position CR
107        stw		r4,savecr(r13)					// update CR
108        b		a64ExitEm						// done
109
110// Not an opcode we normally emulate.  If in special diagnostic mode and opcode=0,
111// emulate as an alignment exception.  This special case is for test software.
112
113a64NotEmulated:
114        lwz		r30,dgFlags(0)					// Get the flags
115        rlwinm.	r0,r30,0,enaDiagEMb,enaDiagEMb	// Do we want to try to emulate something?
116        beq++	a64PassAlong					// No emulation allowed
117        cmpwi	r3,0							// opcode==0 ?
118        bne		a64PassAlong					// not the special case
119        oris	r20,r20,0x7C00					// change opcode to 31
120        crset	kAlignment						// say we took alignment exception
121        rlwinm	r5,r4,0,26+1,26-1				// mask Update bit (32) out of extended opcode
122        rlwinm	r5,r5,0,0,31					// Clean out leftover junk from rlwinm
123
124        cmpwi	r4,1014							// dcbz/dcbz128 ?
125        crmove	cr1_eq,cr0_eq
126        cmpwi	r5,21							// ldx/ldux ?
127        cror	cr1_eq,cr0_eq,cr1_eq
128        cmpwi	r5,599							// lfdx/lfdux ?
129        cror	cr1_eq,cr0_eq,cr1_eq
130        cmpwi	r5,535							// lfsx/lfsux ?
131        cror	cr1_eq,cr0_eq,cr1_eq
132        cmpwi	r5,343							// lhax/lhaux ?
133        cror	cr1_eq,cr0_eq,cr1_eq
134        cmpwi	r4,790							// lhbrx ?
135        cror	cr1_eq,cr0_eq,cr1_eq
136        cmpwi	r5,279							// lhzx/lhzux ?
137        cror	cr1_eq,cr0_eq,cr1_eq
138        cmpwi	r4,597							// lswi ?
139        cror	cr1_eq,cr0_eq,cr1_eq
140        cmpwi	r4,533							// lswx ?
141        cror	cr1_eq,cr0_eq,cr1_eq
142        cmpwi	r5,341							// lwax/lwaux ?
143        cror	cr1_eq,cr0_eq,cr1_eq
144        cmpwi	r4,534							// lwbrx ?
145        cror	cr1_eq,cr0_eq,cr1_eq
146        cmpwi	r5,23							// lwz/lwzx ?
147        cror	cr1_eq,cr0_eq,cr1_eq
148        cmpwi	r5,149							// stdx/stdux ?
149        cror	cr1_eq,cr0_eq,cr1_eq
150        cmpwi	r5,727							// stfdx/stfdux ?
151        cror	cr1_eq,cr0_eq,cr1_eq
152        cmpwi	r4,983							// stfiwx ?
153        cror	cr1_eq,cr0_eq,cr1_eq
154        cmpwi	r5,663							// stfsx/stfsux ?
155        cror	cr1_eq,cr0_eq,cr1_eq
156        cmpwi	r4,918							// sthbrx ?
157        cror	cr1_eq,cr0_eq,cr1_eq
158        cmpwi	r5,407							// sthx/sthux ?
159        cror	cr1_eq,cr0_eq,cr1_eq
160        cmpwi	r4,725							// stswi ?
161        cror	cr1_eq,cr0_eq,cr1_eq
162        cmpwi	r4,661							// stswx ?
163        cror	cr1_eq,cr0_eq,cr1_eq
164        cmpwi	r4,662							// stwbrx ?
165        cror	cr1_eq,cr0_eq,cr1_eq
166        cmpwi	r5,151							// stwx/stwux ?
167        cror	cr1_eq,cr0_eq,cr1_eq
168
169        beq++	cr1,a64GotInstruction			// it was one of the X-forms we handle
170        crclr	kAlignment						// revert to program interrupt
171        b		a64PassAlong					// not recognized extended opcode
172
173
174// *****************************************
175// * A L I G N M E N T   I N T E R R U P T *
176// *****************************************
177//
178// We get here in exception context, ie with interrupts disabled, translation off, and
179// in 64-bit mode, with:
180//		r13 = save-area pointer, with general context already saved in it
181//		cr6 = feature flags
182// We preserve r13 and cr6.  Other GPRs and CRs, the LR and CTR are used.
183//
184// Current 64-bit processors (GPUL) handle almost all misaligned operations in hardware,
185// so this routine usually isn't called very often.  Only floating pt ops that cross a page
186// boundary and are not word aligned, and LMW/STMW can take exceptions to cacheable memory.
187// However, in contrast to G3 and G4, any misaligned load/store will get an alignment
188// interrupt on uncached memory.
189//
190// We always emulate scalar ops with a series of byte load/stores.  Doing so is no slower
191// than LWZ/STW in cases where a scalar op gets an alignment exception.
192//
193// This routine supports all legal permutations of alignment interrupts occuring in user or
194// supervisor mode, 32 or 64-bit addressing, and translation on or off.  We do not emulate
195// instructions that go past the end of an address space, such as "LHZ -1(0)"; we just pass
196// along the alignment exception rather than wrap around to byte 0.
197//
198// First, check for a few special cases such as virtual machines, etc.
199
200        .globl	EXT(AlignAssist64)
201        .align	5
202LEXT(AlignAssist64)
203        crset	kAlignment								// mark as alignment interrupt
204
205a64AlignAssistJoin:										// join here from program interrupt handler
206      	li		r0,0									// Get a 0
207        mfsprg	r31,0									// get the per_proc data ptr
208        mcrf	cr3,cr6									// save feature flags here...
209        lwz		r21,spcFlags(r31)						// grab the special flags
210        ld		r29,savesrr1(r13)						// get the MSR etc at the fault
211        ld		r28,savesrr0(r13)						// get the EA of faulting instruction
212       	stw		r0,savemisc3(r13)						// Assume we will handle this ok
213        mfmsr	r26										// save MSR at entry
214        rlwinm.	r0,r21,0,runningVMbit,runningVMbit		// Are we running a VM?
215        lwz		r19,dgFlags(0)							// Get the diagnostics flags
216        bne--	a64PassAlong							// yes, let the virtual machine monitor handle
217
218
219// Set up the MSR shadow regs.  We turn on FP in this routine, and usually set DR and RI
220// when accessing user space (the SLB is still set up with all the user space translations.)
221// However, if the interrupt occured in the kernel with DR off, we keep it off while
222// accessing the "target" address space.  If we set DR to access the target space, we also
223// set RI.  The RI bit tells the exception handlers to clear cr0 beq and return if we get an
224// exception accessing the user address space.  We are careful to test cr0 beq after every such
225// access.  We keep the following "shadows" of the MSR in global regs across this code:
226//		r25 = MSR at entry, plus FP and probably DR and RI (used to access target space)
227//		r26 = MSR at entry
228//		r27 = free
229//		r29 = SRR1 (ie, MSR at interrupt)
230// Note that EE and IR are always off, and SF is always on in this code.
231
232		rlwinm	r3,r29,31,MSR_DR_BIT,MSR_DR_BIT			// Move instruction translate bit to DR
233        rlwimi	r3,r3,32-MSR_RI_BIT+MSR_DR_BIT,MSR_RI_BIT,MSR_RI_BIT	// if DR is now set, set RI too
234        or		r25,r26,r3								// assemble MSR to use accessing target space
235
236
237// Because the DSISR and DAR are either not set or are not to be trusted on some 64-bit
238// processors on an alignment interrupt, we must fetch the faulting instruction ourselves,
239// then decode/hash the opcode and reconstruct the EA manually.
240
241        mtmsr	r25					// turn on FP and (if it was on at fault) DR and RI
242        isync						// wait for it to happen
243		cmpw	r0,r0				// turn on beq so we can check for DSIs
244        lwz		r20,0(r28)			// fetch faulting instruction, probably with DR on
245        bne--	a64RedriveAsISI		// got a DSI trying to fetch it, pretend it was an ISI
246        mtmsr	r26					// turn DR back off
247        isync						// wait for it to happen
248
249
250// Set a few flags while we wait for the faulting instruction to arrive from cache.
251
252        rlwinm.	r0,r29,0,MSR_SE_BIT,MSR_SE_BIT				// Were we single stepping?
253		stw		r20,savemisc2(r13)	// Save the instruction image in case we notify
254        crnot	kTrace,cr0_eq
255        rlwinm.	r0,r19,0,enaNotifyEMb,enaNotifyEMb			// Should we notify?
256        crnot	kNotify,cr0_eq
257
258		rlwinm	r3,r29,0,MSR_DR_BIT,MSR_DR_BIT			// was data translation on at fault?
259        rlwimi	r3,r3,32-MSR_RI_BIT+MSR_DR_BIT,MSR_RI_BIT,MSR_RI_BIT	// if DR is now set, set RI too
260        or		r25,r26,r3								// assemble MSR to use accessing target space
261
262
263// Hash the intruction into a 5-bit value "AAAAB" used to index the branch table, and a
264// 1-bit kUpdate flag, as follows:
265//  � for X-form instructions (with primary opcode 31):
266//       the "AAAA" bits are bits 21-24 of the instruction
267//       the "B" bit is the XOR of bits 29 and 30
268//       the update bit is instruction bit 25
269//	� for D and DS-form instructions (actually, any primary opcode except 31):
270//       the "AAAA" bits are bits 1-4 of the instruction
271//       the "B" bit is 0
272//       the update bit is instruction bit 5
273//
274// Just for fun (and perhaps a little speed on deep-pipe machines), we compute the hash,
275// update flag, and EA without branches and with ipc >= 2.
276//
277// When we "bctr" to the opcode-specific reoutine, the following are all set up:
278//		MSR = EE and IR off, SF and FP on
279//		r12 = full 64-bit EA (r17 is clamped EA)
280//		r13 = save-area pointer (physical)
281//		r14 = ptr to saver0 in save-area (ie, to base of GPRs)
282//		r15 = 0x00000000FFFFFFFF if 32-bit mode fault, 0xFFFFFFFFFFFFFFFF if 64
283//		r16 = RA * 8 (ie, reg# not reg value)
284//		r17 = EA, clamped to 32 bits if 32-bit mode fault (see also r12)
285//		r18 = (RA|0) (reg value)
286//		r19 = -1 if X-form, 0 if D-form
287//		r20 = faulting instruction
288//		r21 = RT * 8 (ie, reg# not reg value)
289//		r22 = addr(aaFPopTable)+(RT*32), ie ptr to floating pt table for target register
290//		r25 = MSR at entrance, probably with DR and RI set (for access to target space)
291//		r26 = MSR at entrance
292//		r27 = free
293//		r28 = SRR0 (ie, EA of faulting instruction)
294//		r29 = SRR1 (ie, MSR at fault)
295//		r30 = scratch, usually user data
296//		r31 = per-proc pointer
297//		cr2 = kTrace, kNotify, and kAlignment flags
298//      cr3 = saved copy of feature flags used in lowmem vector code
299//		cr6 = bits 24-27 of CR are bits 24-27 of opcode if X-form, or bits 4-5 and 00 if D-form
300//			  bit 25 is the kUpdate flag, set for update form instructions
301//		cr7 = bits 28-31 of CR are bits 28-31 of opcode if X-form, or 0 if D-form
302
303a64GotInstruction:					// here from program interrupt with instruction in r20
304        rlwinm	r21,r20,6+6,20,25	// move the primary opcode (bits 0-6) to bits 20-25
305        la		r14,saver0(r13)		// r14 <- base address of GPR registers
306        xori	r19,r21,0x07C0		// iff primary opcode is 31, set r19 to 0
307        rlwinm	r16,r20,16+3,24,28	// r16 <- RA*8
308        subi	r19,r19,1			// set bit 0 iff X-form (ie, if primary opcode is 31)
309        rlwinm	r17,r20,21+3,24,28	// r17 <- RB*8 (if X-form)
310        sradi	r19,r19,63			// r19 <- -1 if X-form, 0 if D-form
311        extsh	r22,r20				// r22 <- displacement (if D-form)
312
313        ldx		r23,r14,r17			// get (RB), if any
314        and		r15,r20,r19			// instruction if X, 0 if D
315        andc	r17,r21,r19			// primary opcode in bits 20-25 if D, 0 if X
316        ldx		r18,r14,r16			// get (RA)
317        subi	r24,r16,1			// set bit 0 iff RA==0
318        or		r21,r15,r17			// r21 <- instruction if X, or bits 0-5 in bits 20-25 if D
319        sradi	r24,r24,63			// r24 <- -1 if RA==0, 0 otherwise
320        rlwinm	r17,r21,32-4,25,28	// shift opcode bits 21-24 to 25-28 (hash "AAAA" bits)
321        lis		r10,ha16(a64BranchTable)	// start to build up branch table address
322        rlwimi	r17,r21,0,29,29		// move opcode bit 29 into hash as start of "B" bit
323        rlwinm	r30,r21,1,29,29		// position opcode bit 30 in position 29
324        and		r12,r23,r19			// RB if X-form, 0 if D-form
325        andc	r11,r22,r19			// 0 if X-form, sign extended displacement if D-form
326        xor		r17,r17,r30			// bit 29 ("B") of hash is xor(bit29,bit30)
327        addi	r10,r10,lo16(a64BranchTable)
328        or		r12,r12,r11			// r12 <- (RB) or displacement, as appropriate
329        lwzx	r30,r10,r17			// get address from branch table
330        mtcrf	0x01,r21			// move opcode bits 28-31 to CR7
331        sradi	r15,r29,32			// propogate SF bit from SRR1 (MSR_SF, which is bit 0)
332        andc	r18,r18,r24			// r18 <- (RA|0)
333        mtcrf	0x02,r21			// move opcode bits 24-27 to CR6 (kUpdate is bit 25)
334        add		r12,r18,r12			// r12 <- 64-bit EA
335        mtctr	r30					// set up branch address
336
337        oris	r15,r15,0xFFFF		// start to fill low word of r15 with 1s
338        rlwinm	r21,r20,11+3,24,28	// r21 <- RT * 8
339        lis		r22,ha16(EXT(aaFPopTable))	// start to compute address of floating pt table
340        ori		r15,r15,0xFFFF		// now bits 32-63 of r15 are 1s
341        addi	r22,r22,lo16(EXT(aaFPopTable))
342        and		r17,r12,r15			// clamp EA to 32 bits if fault occured in 32-bit mode
343        rlwimi	r22,r21,2,22,26		// move RT into aaFPopTable address (which is 1KB aligned)
344
345        bf--	kAlignment,a64HandleProgramInt	// return to Program Interrupt handler
346        bctr						// if alignment interrupt, jump to opcode-specific routine
347
348
349// Floating-pt load single (lfs[u], lfsx[u])
350
351a64LfsLfsx:
352        bl		a64Load4Bytes		// get data in r30
353        mtctr	r22					// set up address of "lfs fRT,emfp0(r31)"
354        stw		r30,emfp0(r31)		// put word here for aaFPopTable routine
355        bctrl						// do the lfs
356        b		a64UpdateCheck		// update RA if necessary and exit
357
358
359// Floating-pt store single (stfs[u], stfsx[u])
360
361a64StfsStfsx:
362        ori		r22,r22,8			// set dir==1 (ie, single store) in aaFPopTable
363        mtctr	r22					// set up address of "stfs fRT,emfp0(r31)"
364        bctrl						// execute the store into emfp0
365        lwz		r30,emfp0(r31)		// get the word
366        bl		a64Store4Bytes		// store r30 into user space
367        b		a64UpdateCheck		// update RA if necessary and exit
368
369
370// Floating-pt store as integer word (stfiwx)
371
372a64Stfiwx:
373        ori		r22,r22,16+8		// set size=1, dir==1 (ie, double store) in aaFPopTable
374        mtctr	r22					// set up FP register table address
375        bctrl						// double precision store into emfp0
376        lwz		r30,emfp0+4(r31)	// get the low-order word
377        bl		a64Store4Bytes		// store r30 into user space
378        b		a64Exit				// successfully emulated
379
380
381// Floating-pt load double (lfd[u], lfdx[u])
382
383a64LfdLfdx:
384        ori		r22,r22,16			// set Double bit in aaFPopTable address
385        bl		a64Load8Bytes		// get data in r30
386        mtctr	r22					// set up address of "lfd fRT,emfp0(r31)"
387        std		r30,emfp0(r31)		// put doubleword here for aaFPopTable routine
388        bctrl						// execute the load
389        b		a64UpdateCheck		// update RA if necessary and exit
390
391
392// Floating-pt store double (stfd[u], stfdx[u])
393
394a64StfdStfdx:
395        ori		r22,r22,16+8		// set size=1, dir==1 (ie, double store) in aaFPopTable address
396        mtctr	r22					// address of routine to stfd RT
397        bctrl						// store into emfp0
398        ld		r30,emfp0(r31)		// get the doubleword
399        bl		a64Store8Bytes		// store r30 into user space
400        b		a64UpdateCheck		// update RA if necessary and exit
401
402
403// Load halfword w 0-fill (lhz[u], lhzx[u])
404
405a64LhzLhzx:
406        bl		a64Load2Bytes		// load into r30 from user space (w 0-fill)
407        stdx	r30,r14,r21			// store into RT slot in register file
408        b		a64UpdateCheck		// update RA if necessary and exit
409
410
411// Load halfword w sign fill (lha[u], lhax[u])
412
413a64LhaLhax:
414        bl		a64Load2Bytes		// load into r30 from user space (w 0-fill)
415        extsh	r30,r30				// sign-extend
416        stdx	r30,r14,r21			// store into RT slot in register file
417        b		a64UpdateCheck		// update RA if necessary and exit
418
419
420// Load halfword byte reversed (lhbrx)
421
422a64Lhbrx:
423        bl		a64Load2Bytes		// load into r30 from user space (w 0-fill)
424        rlwinm	r3,r30,8,16,23		// reverse bytes into r3
425        rlwimi	r3,r30,24,24,31
426        stdx	r3,r14,r21			// store into RT slot in register file
427        b		a64Exit				// successfully emulated
428
429
430// Store halfword (sth[u], sthx[u])
431
432a64SthSthx:
433        ldx		r30,r14,r21			// get RT
434        bl		a64Store2Bytes		// store r30 into user space
435        b		a64UpdateCheck		// update RA if necessary and exit
436
437
438// Store halfword byte reversed (sthbrx)
439
440a64Sthbrx:
441        addi	r21,r21,6			// point to low two bytes of RT
442        lhbrx	r30,r14,r21			// load and reverse
443        bl		a64Store2Bytes		// store r30 into user space
444        b		a64Exit				// successfully emulated
445
446
447// Load word w 0-fill (lwz[u], lwzx[u]), also lwarx.
448
449a64LwzLwzxLwarx:
450        andc	r3,r19,r20			// light bit 30 of r3 iff lwarx
451        andi.	r0,r3,2				// is it lwarx?
452        bne--	a64PassAlong		// yes, never try to emulate a lwarx
453        bl		a64Load4Bytes		// load 4 bytes from user space into r30 (0-filled)
454        stdx	r30,r14,r21			// update register file
455        b		a64UpdateCheck		// update RA if necessary and exit
456
457
458// Load word w sign fill (lwa, lwax[u])
459
460a64Lwa:
461        crclr	kUpdate				// no update form of lwa (its a reserved encoding)
462a64Lwax:
463        bl		a64Load4Bytes		// load 4 bytes from user space into r30 (0-filled)
464        extsw	r30,r30				// sign extend
465        stdx	r30,r14,r21			// update register file
466        b		a64UpdateCheck		// update RA if necessary and exit
467
468
469// Load word byte reversed (lwbrx)
470
471a64Lwbrx:
472        bl		a64Load4Bytes		// load 4 bytes from user space into r30 (0-filled)
473        rlwinm	r3,r30,24,0,31		// flip bytes 1234 to 4123
474        rlwimi	r3,r30,8,8,15		// r3 is now 4323
475        rlwimi	r3,r30,8,24,31		// r3 is now 4321
476        stdx	r3,r14,r21			// update register file
477        b		a64Exit				// successfully emulated
478
479
480// Store word (stw[u], stwx[u])
481
482a64StwStwx:
483        ldx		r30,r14,r21			// get RT
484        bl		a64Store4Bytes		// store r30 into user space
485        b		a64UpdateCheck		// update RA if necessary and exit
486
487
488// Store word byte reversed (stwbrx)
489
490a64Stwbrx:
491        addi	r21,r21,4			// point to low word of RT
492        lwbrx	r30,r14,r21			// load and reverse
493        bl		a64Store4Bytes		// store r30 into user space
494        b		a64Exit				// successfully emulated
495
496
497// Load doubleword (ld[u], ldx[u]), also lwa.
498
499a64LdLwa:							// these are DS form: ld=0, ldu=1, and lwa=2
500        mtcrf	0x01,r20			// move DS field to cr7
501        rlwinm	r3,r20,0,30,31		// must adjust EA by subtracting DS field
502        sub		r12,r12,r3			// subtract from full 64-bit EA
503        and		r17,r12,r15			// then re-clamp to 32 bits if necessary
504        bt		30,a64Lwa			// handle lwa
505        crmove	kUpdate,31			// if opcode bit 31 is set, it is ldu so set update flag
506a64Ldx:
507        bl		a64Load8Bytes		// load 8 bytes from user space into r30
508        stdx	r30,r14,r21			// update register file
509        b		a64UpdateCheck		// update RA if necessary and exit
510
511
512// Store doubleword (stdx[u], std[u], stwcx)
513
514a64StdxStwcx:
515        bf--	30,a64PassAlong		// stwcx, so pass along alignment exception
516        b		a64Stdx				// was stdx
517a64StdStfiwx:						// if DS form: 0=std, 1=stdu, 2-3=undefined
518        bt		30,a64Stfiwx		// handle stfiwx
519        rlwinm	r3,r20,0,30,31		// must adjust EA by subtracting DS field
520        mtcrf	0x01,r20			// move DS field to cr7
521        sub		r12,r12,r3			// subtract from full 64-bit EA
522        and		r17,r12,r15			// then re-clamp to 32 bits if necessary
523        crmove	kUpdate,31			// if DS==1, then it is update form
524a64Stdx:
525        ldx		r30,r14,r21			// get RT
526        bl		a64Store8Bytes		// store RT into user space
527        b		a64UpdateCheck		// update RA if necessary and exit
528
529
530// Dcbz and Dcbz128 (bit 10 distinguishes the two forms)
531
532a64DcbzDcbz128:
533        andis.	r0,r20,0x0020		// bit 10 set?
534        li		r3,0				// get a 0 to store
535        li		r0,4				// assume 32-bit version, store 8 bytes 4x
536        rldicr	r17,r17,0,63-5		// 32-byte align EA
537		li		r4,_COMM_PAGE_BASE_ADDRESS
538        beq		a64DcbzSetup		// it was the 32-byte version
539        rldicr	r17,r17,0,63-7		// zero low 7 bits of EA
540        li		r0,16				// store 8 bytes 16x
541a64DcbzSetup:
542		sub		r4,r28,r4			// get instruction offset from start of commpage
543        and		r4,r4,r15			// mask off high-order bits if 32-bit mode
544		cmpldi  r4,_COMM_PAGE_AREA_USED // did fault occur in commpage area?
545        bge		a64NotCommpage		// not in commpage
546        rlwinm.	r4,r29,0,MSR_PR_BIT,MSR_PR_BIT	// did fault occur in user mode?
547        beq--	a64NotCommpage		// do not zero cr7 if kernel got alignment exception
548        lwz		r4,savecr(r13)		// if we take a dcbz{128} in the commpage...
549        rlwinm	r4,r4,0,0,27		// ...clear user's cr7...
550        stw		r4,savecr(r13)		// ...as a flag for commpage code
551a64NotCommpage:
552        mtctr	r0
553        cmpw	r0,r0				// turn cr0 beq on so we can check for DSIs
554        mtmsr	r25					// turn on DR and RI so we can address user space
555        isync						// wait for it to happen
556a64DcbzLoop:
557        std		r3,0(r17)			// store into user space
558        bne--	a64RedriveAsDSI
559        addi	r17,r17,8
560        bdnz	a64DcbzLoop
561
562        mtmsr	r26					// restore MSR
563        isync						// wait for it to happen
564        b		a64Exit
565
566
567// Load and store multiple (lmw, stmw), distinguished by bit 25
568
569a64LmwStmw:
570        subfic	r22,r21,32*8		// how many regs to load or store?
571        srwi	r22,r22,1			// get bytes to load/store
572        bf		25,a64LoadMultiple	// handle lmw
573        b		a64StoreMultiple	// it was stmw
574
575
576// Load string word immediate (lswi)
577
578a64Lswi:
579        rlwinm	r22,r20,21,27,31	// get #bytes in r22
580        and		r17,r18,r15			// recompute EA as (RA|0), and clamp
581        subi	r3,r22,1			// r22==0?
582        rlwimi	r22,r3,6,26,26		// map count of 0 to 32
583        b		a64LoadMultiple
584
585
586// Store string word immediate (stswi)
587
588a64Stswi:
589        rlwinm	r22,r20,21,27,31	// get #bytes in r22
590        and		r17,r18,r15			// recompute EA as (RA|0), and clamp
591        subi	r3,r22,1			// r22==0?
592        rlwimi	r22,r3,6,26,26		// map count of 0 to 32
593        b		a64StoreMultiple
594
595
596// Load string word indexed (lswx), also lwbrx
597
598a64LswxLwbrx:
599        bf		30,a64Lwbrx			// was lwbrx
600        ld		r22,savexer(r13)	// get the xer
601        rlwinm	r22,r22,0,25,31		// isolate the byte count
602        b		a64LoadMultiple		// join common code
603
604
605// Store string word indexed (stswx), also stwbrx
606
607a64StswxStwbrx:
608        bf		30,a64Stwbrx		// was stwbrx
609        ld		r22,savexer(r13)	// get the xer
610        rlwinm	r22,r22,0,25,31		// isolate the byte count
611        b		a64StoreMultiple	// join common code
612
613
614// Load multiple words.  This handles lmw, lswi, and lswx.
615
616a64LoadMultiple:					// r22 = byte count, may be 0
617        subic.	r3,r22,1			// get (#bytes-1)
618        blt		a64Exit				// done if 0
619        add		r4,r17,r3			// get EA of last operand byte
620        and		r4,r4,r15			// clamp
621        cmpld	r4,r17				// address space wrap?
622        blt--	a64PassAlong		// pass along exception if so
623        srwi.	r4,r22,2			// get # full words to load
624        rlwinm	r22,r22,0,30,31		// r22 <- leftover byte count
625        cmpwi	cr1,r22,0			// leftover bytes?
626        beq		a64Lm3				// no words
627        mtctr	r4					// set up word count
628        cmpw	r0,r0				// set beq for DSI test
629a64Lm2:
630        mtmsr	r25					// turn on DR and RI
631        isync						// wait for it to happen
632        lbz		r3,0(r17)
633        bne--	a64RedriveAsDSI		// got a DSI
634        lbz		r4,1(r17)
635        bne--	a64RedriveAsDSI		// got a DSI
636        lbz		r5,2(r17)
637        bne--	a64RedriveAsDSI		// got a DSI
638        lbz		r6,3(r17)
639        bne--	a64RedriveAsDSI		// got a DSI
640        rlwinm	r30,r3,24,0,7		// pack bytes into r30
641        rldimi	r30,r4,16,40
642        rldimi	r30,r5,8,48
643        rldimi	r30,r6,0,56
644        mtmsr	r26					// turn DR back off so we can store into register file
645        isync
646        addi	r17,r17,4			// bump EA
647        stdx	r30,r14,r21			// pack into register file
648        addi	r21,r21,8			// bump register file offset
649        rlwinm	r21,r21,0,24,28		// wrap around to 0
650        bdnz	a64Lm2
651a64Lm3:								// cr1/r22 = leftover bytes (0-3), cr0 beq set
652        beq		cr1,a64Exit			// no leftover bytes
653        mtctr	r22
654        mtmsr	r25					// turn on DR so we can access user space
655        isync
656        lbz		r3,0(r17)			// get 1st leftover byte
657        bne--	a64RedriveAsDSI		// got a DSI
658        rlwinm	r30,r3,24,0,7		// position in byte 4 of r30 (and clear rest of r30)
659        bdz		a64Lm4				// only 1 byte leftover
660        lbz		r3,1(r17)			// get 2nd byte
661        bne--	a64RedriveAsDSI		// got a DSI
662        rldimi	r30,r3,16,40		// insert into byte 5 of r30
663        bdz		a64Lm4				// only 2 bytes leftover
664        lbz		r3,2(r17)			// get 3rd byte
665        bne--	a64RedriveAsDSI		// got a DSI
666        rldimi	r30,r3,8,48			// insert into byte 6
667a64Lm4:
668        mtmsr	r26					// turn DR back off so we can store into register file
669        isync
670        stdx	r30,r14,r21			// pack partially-filled word into register file
671        b		a64Exit
672
673
674// Store multiple words.  This handles stmw, stswi, and stswx.
675
676a64StoreMultiple:					// r22 = byte count, may be 0
677        subic.	r3,r22,1			// get (#bytes-1)
678        blt		a64Exit				// done if 0
679        add		r4,r17,r3			// get EA of last operand byte
680        and		r4,r4,r15			// clamp
681        cmpld	r4,r17				// address space wrap?
682        blt--	a64PassAlong		// pass along exception if so
683        srwi.	r4,r22,2			// get # full words to load
684        rlwinm	r22,r22,0,30,31		// r22 <- leftover byte count
685        cmpwi	cr1,r22,0			// leftover bytes?
686        beq		a64Sm3				// no words
687        mtctr	r4					// set up word count
688        cmpw	r0,r0				// turn on beq so we can check for DSIs
689a64Sm2:
690        ldx		r30,r14,r21			// get next register
691        addi	r21,r21,8			// bump register file offset
692        rlwinm	r21,r21,0,24,28		// wrap around to 0
693        srwi	r3,r30,24			// shift the four bytes into position
694        srwi	r4,r30,16
695        srwi	r5,r30,8
696        mtmsr	r25					// turn on DR so we can access user space
697        isync						// wait for it to happen
698        stb		r3,0(r17)
699        bne--	a64RedriveAsDSI		// got a DSI
700        stb		r4,1(r17)
701        bne--	a64RedriveAsDSI		// got a DSI
702        stb		r5,2(r17)
703        bne--	a64RedriveAsDSI		// got a DSI
704        stb		r30,3(r17)
705        bne--	a64RedriveAsDSI		// got a DSI
706        mtmsr	r26					// turn DR back off
707        isync
708        addi	r17,r17,4			// bump EA
709        bdnz	a64Sm2
710a64Sm3:								// r22 = 0-3, cr1 set on r22, cr0 beq set
711        beq		cr1,a64Exit			// no leftover bytes
712        ldx		r30,r14,r21			// get last register
713        mtctr	r22
714        mtmsr	r25					// turn on DR so we can access user space
715        isync						// wait for it to happen
716a64Sm4:
717        rlwinm	r30,r30,8,0,31		// position next byte
718        stb		r30,0(r17)			// pack into user space
719        addi	r17,r17,1			// bump user space ptr
720        bne--	a64RedriveAsDSI		// got a DSI
721        bdnz	a64Sm4
722        mtmsr	r26					// turn DR back off
723        isync
724        b		a64Exit
725
726
727// Subroutines to load bytes from user space.
728
729a64Load2Bytes:						// load 2 bytes right-justified into r30
730        addi	r7,r17,1			// get EA of last byte
731        and		r7,r7,r15			// clamp
732        cmpld	r7,r17				// address wrap?
733        blt--	a64PassAlong		// yes
734        mtmsr	r25					// turn on DR so we can access user space
735        isync						// wait for it to happen
736        sub.	r30,r30,r30			// 0-fill dest and set beq
737        b		a64Load2			// jump into routine
738a64Load4Bytes:						// load 4 bytes right-justified into r30 (ie, low order word)
739        addi	r7,r17,3			// get EA of last byte
740        and		r7,r7,r15			// clamp
741        cmpld	r7,r17				// address wrap?
742        blt--	a64PassAlong		// yes
743        mtmsr	r25					// turn on DR so we can access user space
744        isync						// wait for it to happen
745        sub.	r30,r30,r30			// 0-fill dest and set beq
746        b		a64Load4			// jump into routine
747a64Load8Bytes:						// load 8 bytes into r30
748        addi	r7,r17,7			// get EA of last byte
749        and		r7,r7,r15			// clamp
750        cmpld	r7,r17				// address wrap?
751        blt--	a64PassAlong		// yes
752        mtmsr	r25					// turn on DR so we can access user space
753        isync						// wait for it to happen
754        sub.	r30,r30,r30			// 0-fill dest and set beq
755        lbz		r3,-7(r7)			// get byte 0
756        bne--	a64RedriveAsDSI		// got a DSI
757        lbz		r4,-6(r7)			// and byte 1, etc
758        bne--	a64RedriveAsDSI		// got a DSI
759        lbz		r5,-5(r7)
760        bne--	a64RedriveAsDSI		// got a DSI
761        lbz		r6,-4(r7)
762        bne--	a64RedriveAsDSI		// got a DSI
763        rldimi	r30,r3,56,0			// position bytes in upper word
764        rldimi	r30,r4,48,8
765        rldimi	r30,r5,40,16
766        rldimi	r30,r6,32,24
767a64Load4:
768        lbz		r3,-3(r7)
769        bne--	a64RedriveAsDSI		// got a DSI
770        lbz		r4,-2(r7)
771        bne--	a64RedriveAsDSI		// got a DSI
772        rldimi	r30,r3,24,32		// insert bytes 4 and 5 into r30
773        rldimi	r30,r4,16,40
774a64Load2:
775        lbz		r3,-1(r7)
776        bne--	a64RedriveAsDSI		// got a DSI
777        lbz		r4,0(r7)
778        bne--	a64RedriveAsDSI		// got a DSI
779        mtmsr	r26					// turn DR back off
780        isync
781        rldimi	r30,r3,8,48			// insert bytes 6 and 7 into r30
782        rldimi	r30,r4,0,56
783        blr
784
785
786// Subroutines to store bytes into user space.
787
788a64Store2Bytes:						// store bytes 6 and 7 of r30
789        addi	r7,r17,1			// get EA of last byte
790        and		r7,r7,r15			// clamp
791        cmpld	r7,r17				// address wrap?
792        blt--	a64PassAlong		// yes
793        mtmsr	r25					// turn on DR so we can access user space
794        isync						// wait for it to happen
795        cmpw	r0,r0				// set beq so we can check for DSI
796        b		a64Store2			// jump into routine
797a64Store4Bytes:						// store bytes 4-7 of r30 (ie, low order word)
798        addi	r7,r17,3			// get EA of last byte
799        and		r7,r7,r15			// clamp
800        cmpld	r7,r17				// address wrap?
801        blt--	a64PassAlong		// yes
802        mtmsr	r25					// turn on DR so we can access user space
803        isync						// wait for it to happen
804        cmpw	r0,r0				// set beq so we can check for DSI
805        b		a64Store4			// jump into routine
806a64Store8Bytes:						// r30 = bytes
807        addi	r7,r17,7			// get EA of last byte
808        and		r7,r7,r15			// clamp
809        cmpld	r7,r17				// address wrap?
810        blt--	a64PassAlong		// yes
811        mtmsr	r25					// turn on DR so we can access user space
812        isync						// wait for it to happen
813        cmpw	r0,r0				// set beq so we can check for DSI
814        rotldi	r3,r30,8			// shift byte 0 into position
815        rotldi	r4,r30,16			// and byte 1
816        rotldi	r5,r30,24			// and byte 2
817        rotldi	r6,r30,32			// and byte 3
818        stb		r3,-7(r7)			// store byte 0
819        bne--	a64RedriveAsDSI		// got a DSI
820        stb		r4,-6(r7)			// and byte 1 etc...
821        bne--	a64RedriveAsDSI		// got a DSI
822        stb		r5,-5(r7)
823        bne--	a64RedriveAsDSI		// got a DSI
824        stb		r6,-4(r7)
825        bne--	a64RedriveAsDSI		// got a DSI
826a64Store4:
827        rotldi	r3,r30,40			// shift byte 4 into position
828        rotldi	r4,r30,48			// and byte 5
829        stb		r3,-3(r7)
830        bne--	a64RedriveAsDSI		// got a DSI
831        stb		r4,-2(r7)
832        bne--	a64RedriveAsDSI		// got a DSI
833a64Store2:
834        rotldi	r3,r30,56			// shift byte 6 into position
835        stb		r3,-1(r7)			// store byte 6
836        bne--	a64RedriveAsDSI		// got a DSI
837        stb		r30,0(r7)			// store byte 7, which is already positioned
838        bne--	a64RedriveAsDSI		// got a DSI
839        mtmsr	r26					// turn off DR
840        isync
841        blr
842
843
844// Exit routines.
845
846a64ExitEm:
847		li		r30,T_EMULATE			// Change exception code to emulate
848		stw		r30,saveexception(r13)	// Save it
849		b		a64Exit					// Join standard exit routine...
850
851a64PassAlong:							// unhandled exception, just pass it along
852        li		r0,1					// Set that the alignment/program exception was not emulated
853        crset	kNotify					// return T_ALIGNMENT or T_PROGRAM
854		stw		r0,savemisc3(r13)		// Set that emulation was not done
855        crclr	kTrace					// not a trace interrupt
856        b		a64Exit1
857a64UpdateCheck:							// successfully emulated, may be update form
858        bf		kUpdate,a64Exit			// update?
859        stdx	r12,r14,r16				// yes, store 64-bit EA into RA
860a64Exit:								// instruction successfully emulated
861        addi	r28,r28,4				// bump SRR0 past the emulated instruction
862        li		r30,T_IN_VAIN			// eat the interrupt since we emulated it
863        and		r28,r28,r15				// clamp to address space size (32 vs 64)
864        std		r28,savesrr0(r13)		// save, so we return to next instruction
865a64Exit1:
866        bt--	kTrace,a64Trace			// were we in single-step at fault?
867        bt--	kNotify,a64Notify		// should we say T_ALIGNMENT anyway?
868a64Exit2:
869        mcrf	cr6,cr3					// restore feature flags
870        mr		r11,r30					// pass back exception code (T_IN_VAIN etc) in r11
871        b		EXT(EmulExit)			// return to exception processing
872
873
874// Notification requested: pass exception upstairs even though it might have been emulated.
875
876a64Notify:
877        li		r30,T_ALIGNMENT			// somebody wants to know about it (but don't redrive)
878        bt		kAlignment,a64Exit2		// was an alignment exception
879        li		r30,T_PROGRAM			// was an emulated instruction
880        b		a64Exit2
881
882
883// Emulate a trace interrupt after handling alignment interrupt.
884
885a64Trace:
886        lwz		r9,SAVflags(r13)		// get the save-area flags
887        li		r30,T_TRACE
888        oris	r9,r9,hi16(SAVredrive)	// Set the redrive bit
889        stw		r30,saveexception(r13)	// Set the exception code
890        stw		r9,SAVflags(r13)		// Set the flags
891        b		a64Exit2				// Exit and do trace interrupt...
892
893
894// Got a DSI accessing user space.  Redrive.  One way this can happen is if another
895// processor removes a mapping while we are emulating.
896
897a64RedriveAsISI:						// this DSI happened fetching the opcode (r1==DSISR  r4==DAR)
898        mtmsr	r26						// turn DR back off
899        isync							// wait for it to happen
900        li		r30,T_INSTRUCTION_ACCESS
901        rlwimi	r29,r1,0,1,4			// insert the fault type from DSI's DSISR
902        std		r29,savesrr1(r13)		// update SRR1 to look like an ISI
903        b		a64Redrive
904
905a64RedriveAsDSI:						// r0==DAR  r1==DSISR
906        mtmsr	r26						// turn DR back off
907        isync							// wait for it to happen
908        stw		r1,savedsisr(r13)		// Set the DSISR of failed access
909        std		r0,savedar(r13)			// Set the address of the failed access
910        li		r30,T_DATA_ACCESS		// Set failing data access code
911a64Redrive:
912        lwz		r9,SAVflags(r13)		// Pick up the flags
913        stw		r30,saveexception(r13)	// Set the replacement code
914        oris	r9,r9,hi16(SAVredrive)	// Set the redrive bit
915        stw		r9,SAVflags(r13)		// Set redrive request
916        crclr	kTrace					// don't take a trace interrupt
917        crclr	kNotify					// don't pass alignment exception
918        b		a64Exit2				// done
919
920
921// This is the branch table, indexed by the "AAAAB" opcode hash.
922
923a64BranchTable:
924        .long	a64LwzLwzxLwarx		// 00000  lwz[u], lwzx[u], lwarx
925        .long	a64Ldx				// 00001  ldx[u]
926        .long	a64PassAlong		// 00010  ldarx 	(never emulate these)
927        .long	a64PassAlong		// 00011
928        .long	a64StwStwx			// 00100  stw[u], stwx[u]
929        .long	a64StdxStwcx		// 00101  stdx[u], stwcx
930        .long	a64PassAlong		// 00110
931        .long	a64PassAlong		// 00111  stdcx		(never emulate these)
932        .long	a64LhzLhzx			// 01000  lhz[u], lhzx[u]
933        .long	a64PassAlong		// 01001
934        .long	a64LhaLhax			// 01010  lha[u], lhax[u]
935        .long	a64Lwax				// 01011  lwax[u]
936        .long	a64SthSthx			// 01100  sth[u], sthx[u]
937        .long	a64PassAlong		// 01101
938        .long	a64LmwStmw			// 01110  lmw, stmw
939        .long	a64PassAlong		// 01111
940        .long	a64LfsLfsx			// 10000  lfs[u], lfsx[u]
941        .long	a64LswxLwbrx		// 10001  lswx, lwbrx
942        .long	a64LfdLfdx			// 10010  lfd[u], lfdx[u]
943        .long	a64Lswi				// 10011  lswi
944        .long	a64StfsStfsx		// 10100  stfs[u], stfsx[u]
945        .long	a64StswxStwbrx		// 10101  stswx, stwbrx
946        .long	a64StfdStfdx		// 10110  stfd[u], stfdx[u]
947        .long	a64Stswi			// 10111  stswi
948        .long	a64PassAlong		// 11000
949        .long	a64Lhbrx			// 11001  lhbrx
950        .long	a64LdLwa			// 11010  ld[u], lwa
951        .long	a64PassAlong		// 11011
952        .long	a64PassAlong		// 11100
953        .long	a64Sthbrx			// 11101  sthbrx
954        .long	a64StdStfiwx		// 11110  std[u], stfiwx
955        .long	a64DcbzDcbz128		// 11111  dcbz, dcbz128
956
957
958