1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/* common code with bug fixes from original version in trap.c */
27
28#include <sys/param.h>
29#include <sys/types.h>
30#include <sys/systm.h>
31#include <sys/archsystm.h>
32#include <sys/vmsystm.h>
33#include <sys/fpu/fpusystm.h>
34#include <sys/fpu/fpu_simulator.h>
35#include <sys/inline.h>
36#include <sys/debug.h>
37#include <sys/privregs.h>
38#include <sys/machpcb.h>
39#include <sys/simulate.h>
40#include <sys/proc.h>
41#include <sys/cmn_err.h>
42#include <sys/stack.h>
43#include <sys/watchpoint.h>
44#include <sys/trap.h>
45#include <sys/machtrap.h>
46#include <sys/mman.h>
47#include <sys/asi.h>
48#include <sys/copyops.h>
49#include <vm/as.h>
50#include <vm/page.h>
51#include <sys/model.h>
52#include <vm/seg_vn.h>
53#include <sys/byteorder.h>
54#include <sys/time.h>
55
56#define	IS_IBIT_SET(x)	(x & 0x2000)
57#define	IS_VIS1(op, op3)(op == 2 && op3 == 0x36)
58#define	IS_FLOAT_QUAD_OP(op, op3)(op == 2 && (op3 == 0x34 ||	\
59		op3 == 0x35))
60#define	IS_PARTIAL_OR_SHORT_FLOAT_LD_ST(op, op3, asi)		\
61		(op == 3 && (op3 == IOP_V8_LDDFA ||		\
62		op3 == IOP_V8_STDFA) &&	asi > ASI_SNFL)
63
64static int aligndebug = 0;
65
66/*
67 * For the sake of those who must be compatible with unaligned
68 * architectures, users can link their programs to use a
69 * corrective trap handler that will fix unaligned references
70 * a special trap #6 (T_FIX_ALIGN) enables this 'feature'.
71 * Returns 1 for success, 0 for failure.
72 */
73
74int
75do_unaligned(struct regs *rp, caddr_t *badaddr)
76{
77	uint_t	inst, op3, asi = 0;
78	uint_t	rd, rs1, rs2;
79	int	sz, nf = 0, ltlend = 0;
80	int	floatflg;
81	int	fsrflg;
82	int	immflg;
83	int	lddstdflg;
84	caddr_t	addr;
85	uint64_t val;
86	union {
87		uint64_t	l[2];
88		uint32_t	i[4];
89		uint16_t	s[8];
90		uint8_t		c[16];
91	} data;
92
93	ASSERT(USERMODE(rp->r_tstate));
94	inst = fetch_user_instr((caddr_t)rp->r_pc);
95
96	op3 = (inst >> 19) & 0x3f;
97	rd = (inst >> 25) & 0x1f;
98	rs1 = (inst >> 14) & 0x1f;
99	rs2 = inst & 0x1f;
100	floatflg = (inst >> 24) & 1;
101	immflg = (inst >> 13) & 1;
102	lddstdflg = fsrflg = 0;
103
104	/* if not load or store do nothing */
105	if ((inst >> 30) != 3)
106		return (0);
107
108	/* if ldstub or swap, do nothing */
109	if ((inst & 0xc1680000) == 0xc0680000)
110		return (0);
111
112	/* if cas/casx, do nothing */
113	if ((inst & 0xc1e00000) == 0xc1e00000)
114		return (0);
115
116	if (floatflg) {
117		switch ((inst >> 19) & 3) {	/* map size bits to a number */
118		case 0: sz = 4;
119			break;			/* ldf{a}/stf{a} */
120		case 1: fsrflg = 1;
121			if (rd == 0)
122				sz = 4;		/* ldfsr/stfsr */
123			else  if (rd == 1)
124				sz = 8;		/* ldxfsr/stxfsr */
125			else
126				return (SIMU_ILLEGAL);
127			break;
128		case 2: sz = 16;
129			break;		/* ldqf{a}/stqf{a} */
130		case 3: sz = 8;
131			break;		/* lddf{a}/stdf{a} */
132		}
133		/*
134		 * Fix to access extra double register encoding plus
135		 * compensate to access the correct fpu_dreg.
136		 */
137		if ((sz > 4) && (fsrflg == 0)) {
138			if ((rd & 1) == 1)
139				rd = (rd & 0x1e) | 0x20;
140			rd = rd >> 1;
141			if ((sz == 16) && ((rd & 0x1) != 0))
142				return (SIMU_ILLEGAL);
143		}
144	} else {
145		int sz_bits = (inst >> 19) & 0xf;
146		switch (sz_bits) {		/* map size bits to a number */
147		case 0:				/* lduw{a} */
148		case 4:				/* stw{a} */
149		case 8:				/* ldsw{a} */
150		case 0xf:			/* swap */
151			sz = 4; break;
152		case 1:				/* ldub{a} */
153		case 5:				/* stb{a} */
154		case 9:				/* ldsb{a} */
155		case 0xd:			/* ldstub */
156			sz = 1; break;
157		case 2:				/* lduh{a} */
158		case 6:				/* sth{a} */
159		case 0xa:			/* ldsh{a} */
160			sz = 2; break;
161		case 3:				/* ldd{a} */
162		case 7:				/* std{a} */
163			lddstdflg = 1;
164			sz = 8; break;
165		case 0xb:			/* ldx{a} */
166		case 0xe:			/* stx{a} */
167			sz = 8; break;
168		}
169	}
170
171
172	/* only support primary and secondary asi's */
173	if ((op3 >> 4) & 1) {
174		if (immflg) {
175			asi = (uint_t)(rp->r_tstate >> TSTATE_ASI_SHIFT) &
176			    TSTATE_ASI_MASK;
177		} else {
178			asi = (inst >> 5) & 0xff;
179		}
180		switch (asi) {
181		case ASI_P:
182		case ASI_S:
183			break;
184		case ASI_PNF:
185		case ASI_SNF:
186			nf = 1;
187			break;
188		case ASI_PL:
189		case ASI_SL:
190			ltlend = 1;
191			break;
192		case ASI_PNFL:
193		case ASI_SNFL:
194			ltlend = 1;
195			nf = 1;
196			break;
197		default:
198			return (0);
199		}
200		/*
201		 * Non-faulting stores generate a data_access_exception trap,
202		 * according to the Spitfire manual, which should be signaled
203		 * as an illegal instruction trap, because it can't be fixed.
204		 */
205		if ((nf) && ((op3 == IOP_V8_STQFA) || (op3 == IOP_V8_STDFA)))
206			return (SIMU_ILLEGAL);
207	}
208
209	if (aligndebug) {
210		printf("unaligned access at %p, instruction: 0x%x\n",
211		    (void *)rp->r_pc, inst);
212		printf("type %s", (((inst >> 21) & 1) ? "st" : "ld"));
213		if (((inst >> 21) & 1) == 0)
214			printf(" %s", (((inst >> 22) & 1) ?
215			    "signed" : "unsigned"));
216		printf(" asi 0x%x size %d immflg %d\n", asi, sz, immflg);
217		printf("rd = %d, op3 = 0x%x, rs1 = %d, rs2 = %d, imm13=0x%x\n",
218		    rd, op3, rs1, rs2, (inst & 0x1fff));
219	}
220
221	(void) flush_user_windows_to_stack(NULL);
222	if (getreg(rp, rs1, &val, badaddr))
223		return (SIMU_FAULT);
224	addr = (caddr_t)val;		/* convert to 32/64 bit address */
225	if (aligndebug)
226		printf("addr 1 = %p\n", (void *)addr);
227
228	/* check immediate bit and use immediate field or reg (rs2) */
229	if (immflg) {
230		int imm;
231		imm  = inst & 0x1fff;		/* mask out immediate field */
232		imm <<= 19;			/* sign extend it */
233		imm >>= 19;
234		addr += imm;			/* compute address */
235	} else {
236		if (getreg(rp, rs2, &val, badaddr))
237			return (SIMU_FAULT);
238		addr += val;
239	}
240
241	/*
242	 * If this is a 32-bit program, chop the address accordingly.  The
243	 * intermediate uintptr_t casts prevent warnings under a certain
244	 * compiler, and the temporary 32 bit storage is intended to force
245	 * proper code generation and break up what would otherwise be a
246	 * quadruple cast.
247	 */
248	if (curproc->p_model == DATAMODEL_ILP32) {
249		caddr32_t addr32 = (caddr32_t)(uintptr_t)addr;
250		addr = (caddr_t)(uintptr_t)addr32;
251	}
252
253	if (aligndebug)
254		printf("addr 2 = %p\n", (void *)addr);
255
256	if (addr >= curproc->p_as->a_userlimit) {
257		*badaddr = addr;
258		goto badret;
259	}
260
261	/* a single bit differentiates ld and st */
262	if ((inst >> 21) & 1) {			/* store */
263		if (floatflg) {
264			klwp_id_t lwp = ttolwp(curthread);
265			kfpu_t *fp = lwptofpu(lwp);
266			/* Ensure fp has been enabled */
267			if (fpu_exists) {
268				if (!(_fp_read_fprs() & FPRS_FEF))
269					fp_enable();
270			} else {
271				if (!fp->fpu_en)
272					fp_enable();
273			}
274			/* if fpu_exists read fpu reg */
275			if (fpu_exists) {
276				if (fsrflg) {
277					_fp_read_pfsr(&data.l[0]);
278				} else {
279					if (sz == 4) {
280						data.i[0] = 0;
281						_fp_read_pfreg(
282						    (unsigned *)&data.i[1], rd);
283					}
284					if (sz >= 8)
285						_fp_read_pdreg(
286						    &data.l[0], rd);
287					if (sz == 16)
288						_fp_read_pdreg(
289						    &data.l[1], rd+1);
290				}
291			} else {
292				if (fsrflg) {
293					/* Clear reserved bits, set version=7 */
294					fp->fpu_fsr &= ~0x30301000;
295					fp->fpu_fsr |= 0xE0000;
296					data.l[0] = fp->fpu_fsr;
297				} else {
298					if (sz == 4) {
299						data.i[0] = 0;
300						data.i[1] =
301						    (unsigned)fp->
302						    fpu_fr.fpu_regs[rd];
303					}
304					if (sz >= 8)
305						data.l[0] =
306						    fp->fpu_fr.fpu_dregs[rd];
307					if (sz == 16)
308						data.l[1] =
309						    fp->fpu_fr.fpu_dregs[rd+1];
310				}
311			}
312		} else {
313			if (lddstdflg) {		/* combine the data */
314				if (getreg(rp, rd, &data.l[0], badaddr))
315					return (SIMU_FAULT);
316				if (getreg(rp, rd+1, &data.l[1], badaddr))
317					return (SIMU_FAULT);
318				if (ltlend) {
319					/*
320					 * For STD, each 32-bit word is byte-
321					 * swapped individually.  For
322					 * simplicity we don't want to do that
323					 * below, so we swap the words now to
324					 * get the desired result in the end.
325					 */
326					data.i[0] = data.i[3];
327				} else {
328					data.i[0] = data.i[1];
329					data.i[1] = data.i[3];
330				}
331			} else {
332				if (getreg(rp, rd, &data.l[0], badaddr))
333					return (SIMU_FAULT);
334			}
335		}
336
337		if (aligndebug) {
338			if (sz == 16) {
339				printf("data %x %x %x %x\n",
340				    data.i[0], data.i[1], data.i[2], data.c[3]);
341			} else {
342				printf("data %x %x %x %x %x %x %x %x\n",
343				    data.c[0], data.c[1], data.c[2], data.c[3],
344				    data.c[4], data.c[5], data.c[6], data.c[7]);
345			}
346		}
347
348		if (ltlend) {
349			if (sz == 1) {
350				if (xcopyout_little(&data.c[7], addr,
351				    (size_t)sz) != 0)
352					goto badret;
353			} else if (sz == 2) {
354				if (xcopyout_little(&data.s[3], addr,
355				    (size_t)sz) != 0)
356					goto badret;
357			} else if (sz == 4) {
358				if (xcopyout_little(&data.i[1], addr,
359				    (size_t)sz) != 0)
360					goto badret;
361			} else {
362				if (xcopyout_little(&data.l[0], addr,
363				    (size_t)sz) != 0)
364					goto badret;
365			}
366		} else {
367			if (sz == 1) {
368				if (copyout(&data.c[7], addr, (size_t)sz) == -1)
369					goto badret;
370			} else if (sz == 2) {
371				if (copyout(&data.s[3], addr, (size_t)sz) == -1)
372					goto badret;
373			} else if (sz == 4) {
374				if (copyout(&data.i[1], addr, (size_t)sz) == -1)
375					goto badret;
376			} else {
377				if (copyout(&data.l[0], addr, (size_t)sz) == -1)
378					goto badret;
379			}
380		}
381	} else {				/* load */
382		if (sz == 1) {
383			if (ltlend) {
384				if (xcopyin_little(addr, &data.c[7],
385				    (size_t)sz) != 0) {
386					if (nf)
387						data.c[7] = 0;
388					else
389						goto badret;
390				}
391			} else {
392				if (copyin(addr, &data.c[7],
393				    (size_t)sz) == -1) {
394					if (nf)
395						data.c[7] = 0;
396					else
397						goto badret;
398				}
399			}
400			/* if signed and the sign bit is set extend it */
401			if (((inst >> 22) & 1) && ((data.c[7] >> 7) & 1)) {
402				data.i[0] = (uint_t)-1;	/* extend sign bit */
403				data.s[2] = (ushort_t)-1;
404				data.c[6] = (uchar_t)-1;
405			} else {
406				data.i[0] = 0;	/* clear upper 32+24 bits */
407				data.s[2] = 0;
408				data.c[6] = 0;
409			}
410		} else if (sz == 2) {
411			if (ltlend) {
412				if (xcopyin_little(addr, &data.s[3],
413				    (size_t)sz) != 0) {
414					if (nf)
415						data.s[3] = 0;
416					else
417						goto badret;
418				}
419			} else {
420				if (copyin(addr, &data.s[3],
421				    (size_t)sz) == -1) {
422					if (nf)
423						data.s[3] = 0;
424					else
425						goto badret;
426				}
427			}
428			/* if signed and the sign bit is set extend it */
429			if (((inst >> 22) & 1) && ((data.s[3] >> 15) & 1)) {
430				data.i[0] = (uint_t)-1;	/* extend sign bit */
431				data.s[2] = (ushort_t)-1;
432			} else {
433				data.i[0] = 0;	/* clear upper 32+16 bits */
434				data.s[2] = 0;
435			}
436		} else if (sz == 4) {
437			if (ltlend) {
438				if (xcopyin_little(addr, &data.i[1],
439				    (size_t)sz) != 0) {
440					if (!nf)
441						goto badret;
442					data.i[1] = 0;
443				}
444			} else {
445				if (copyin(addr, &data.i[1],
446				    (size_t)sz) == -1) {
447					if (!nf)
448						goto badret;
449					data.i[1] = 0;
450				}
451			}
452			/* if signed and the sign bit is set extend it */
453			if (((inst >> 22) & 1) && ((data.i[1] >> 31) & 1)) {
454				data.i[0] = (uint_t)-1;	/* extend sign bit */
455			} else {
456				data.i[0] = 0;	/* clear upper 32 bits */
457			}
458		} else {
459			if (ltlend) {
460				if (xcopyin_little(addr, &data.l[0],
461				    (size_t)sz) != 0) {
462					if (!nf)
463						goto badret;
464					data.l[0] = 0;
465				}
466			} else {
467				if (copyin(addr, &data.l[0],
468				    (size_t)sz) == -1) {
469					if (!nf)
470						goto badret;
471					data.l[0] = 0;
472				}
473			}
474		}
475
476		if (aligndebug) {
477			if (sz == 16) {
478				printf("data %x %x %x %x\n",
479				    data.i[0], data.i[1], data.i[2], data.c[3]);
480			} else {
481				printf("data %x %x %x %x %x %x %x %x\n",
482				    data.c[0], data.c[1], data.c[2], data.c[3],
483				    data.c[4], data.c[5], data.c[6], data.c[7]);
484			}
485		}
486
487		if (floatflg) {		/* if fpu_exists write fpu reg */
488			klwp_id_t lwp = ttolwp(curthread);
489			kfpu_t *fp = lwptofpu(lwp);
490			/* Ensure fp has been enabled */
491			if (fpu_exists) {
492				if (!(_fp_read_fprs() & FPRS_FEF))
493					fp_enable();
494			} else {
495				if (!fp->fpu_en)
496					fp_enable();
497			}
498			/* if fpu_exists read fpu reg */
499			if (fpu_exists) {
500				if (fsrflg) {
501					_fp_write_pfsr(&data.l[0]);
502				} else {
503					if (sz == 4)
504						_fp_write_pfreg(
505						    (unsigned *)&data.i[1], rd);
506					if (sz >= 8)
507						_fp_write_pdreg(
508						    &data.l[0], rd);
509					if (sz == 16)
510						_fp_write_pdreg(
511						    &data.l[1], rd+1);
512				}
513			} else {
514				if (fsrflg) {
515					fp->fpu_fsr = data.l[0];
516				} else {
517					if (sz == 4)
518						fp->fpu_fr.fpu_regs[rd] =
519						    (unsigned)data.i[1];
520					if (sz >= 8)
521						fp->fpu_fr.fpu_dregs[rd] =
522						    data.l[0];
523					if (sz == 16)
524						fp->fpu_fr.fpu_dregs[rd+1] =
525						    data.l[1];
526				}
527			}
528		} else {
529			if (lddstdflg) {		/* split the data */
530				if (ltlend) {
531					/*
532					 * For LDD, each 32-bit word is byte-
533					 * swapped individually.  We didn't
534					 * do that above, but this will give
535					 * us the desired result.
536					 */
537					data.i[3] = data.i[0];
538				} else {
539					data.i[3] = data.i[1];
540					data.i[1] = data.i[0];
541				}
542				data.i[0] = 0;
543				data.i[2] = 0;
544				if (putreg(&data.l[0], rp, rd, badaddr) == -1)
545					goto badret;
546				if (putreg(&data.l[1], rp, rd+1, badaddr) == -1)
547					goto badret;
548			} else {
549				if (putreg(&data.l[0], rp, rd, badaddr) == -1)
550					goto badret;
551			}
552		}
553	}
554	return (SIMU_SUCCESS);
555badret:
556	return (SIMU_FAULT);
557}
558
559
560int
561simulate_lddstd(struct regs *rp, caddr_t *badaddr)
562{
563	uint_t	inst, op3, asi = 0;
564	uint_t	rd, rs1, rs2;
565	int	nf = 0, ltlend = 0, usermode;
566	int	immflg;
567	uint64_t reven;
568	uint64_t rodd;
569	caddr_t	addr;
570	uint64_t val;
571	uint64_t data;
572
573	usermode = USERMODE(rp->r_tstate);
574
575	if (usermode)
576		inst = fetch_user_instr((caddr_t)rp->r_pc);
577	else
578		inst = *(uint_t *)rp->r_pc;
579
580	op3 = (inst >> 19) & 0x3f;
581	rd = (inst >> 25) & 0x1f;
582	rs1 = (inst >> 14) & 0x1f;
583	rs2 = inst & 0x1f;
584	immflg = (inst >> 13) & 1;
585
586	if (USERMODE(rp->r_tstate))
587		(void) flush_user_windows_to_stack(NULL);
588	else
589		flush_windows();
590
591	if ((op3 >> 4) & 1) {		/* is this LDDA/STDA? */
592		if (immflg) {
593			asi = (uint_t)(rp->r_tstate >> TSTATE_ASI_SHIFT) &
594			    TSTATE_ASI_MASK;
595		} else {
596			asi = (inst >> 5) & 0xff;
597		}
598		switch (asi) {
599		case ASI_P:
600		case ASI_S:
601			break;
602		case ASI_PNF:
603		case ASI_SNF:
604			nf = 1;
605			break;
606		case ASI_PL:
607		case ASI_SL:
608			ltlend = 1;
609			break;
610		case ASI_PNFL:
611		case ASI_SNFL:
612			ltlend = 1;
613			nf = 1;
614			break;
615		case ASI_AIUP:
616		case ASI_AIUS:
617			usermode = 1;
618			break;
619		case ASI_AIUPL:
620		case ASI_AIUSL:
621			usermode = 1;
622			ltlend = 1;
623			break;
624		default:
625			return (SIMU_ILLEGAL);
626		}
627	}
628
629	if (getreg(rp, rs1, &val, badaddr))
630		return (SIMU_FAULT);
631	addr = (caddr_t)val;		/* convert to 32/64 bit address */
632
633	/* check immediate bit and use immediate field or reg (rs2) */
634	if (immflg) {
635		int imm;
636		imm  = inst & 0x1fff;		/* mask out immediate field */
637		imm <<= 19;			/* sign extend it */
638		imm >>= 19;
639		addr += imm;			/* compute address */
640	} else {
641		if (getreg(rp, rs2, &val, badaddr))
642			return (SIMU_FAULT);
643		addr += val;
644	}
645
646	/*
647	 * T_UNIMP_LDD and T_UNIMP_STD are higher priority than
648	 * T_ALIGNMENT.  So we have to make sure that the address is
649	 * kosher before trying to use it, because the hardware hasn't
650	 * checked it for us yet.
651	 */
652	if (((uintptr_t)addr & 0x7) != 0) {
653		if (curproc->p_fixalignment)
654			return (do_unaligned(rp, badaddr));
655		else
656			return (SIMU_UNALIGN);
657	}
658
659	/*
660	 * If this is a 32-bit program, chop the address accordingly.  The
661	 * intermediate uintptr_t casts prevent warnings under a certain
662	 * compiler, and the temporary 32 bit storage is intended to force
663	 * proper code generation and break up what would otherwise be a
664	 * quadruple cast.
665	 */
666	if (curproc->p_model == DATAMODEL_ILP32 && usermode) {
667		caddr32_t addr32 = (caddr32_t)(uintptr_t)addr;
668		addr = (caddr_t)(uintptr_t)addr32;
669	}
670
671	if ((inst >> 21) & 1) {			/* store */
672		if (getreg(rp, rd, &reven, badaddr))
673			return (SIMU_FAULT);
674		if (getreg(rp, rd+1, &rodd, badaddr))
675			return (SIMU_FAULT);
676		if (ltlend) {
677			reven = BSWAP_32(reven);
678			rodd  = BSWAP_32(rodd);
679		}
680		data = (reven << 32) | rodd;
681		if (usermode) {
682			if (suword64_nowatch(addr, data) == -1)
683				return (SIMU_FAULT);
684		} else {
685			*(uint64_t *)addr = data;
686		}
687	} else {				/* load */
688		if (usermode) {
689			if (fuword64_nowatch(addr, &data)) {
690				if (nf)
691					data = 0;
692				else
693					return (SIMU_FAULT);
694			}
695		} else
696			data = *(uint64_t *)addr;
697
698		reven = (data >> 32);
699		rodd  = (uint64_t)(uint32_t)data;
700		if (ltlend) {
701			reven = BSWAP_32(reven);
702			rodd  = BSWAP_32(rodd);
703		}
704
705		if (putreg(&reven, rp, rd, badaddr) == -1)
706			return (SIMU_FAULT);
707		if (putreg(&rodd, rp, rd+1, badaddr) == -1)
708			return (SIMU_FAULT);
709	}
710	return (SIMU_SUCCESS);
711}
712
713
714/*
715 * simulate popc
716 */
717static int
718simulate_popc(struct regs *rp, caddr_t *badaddr, uint_t inst)
719{
720	uint_t	rd, rs2, rs1;
721	uint_t	immflg;
722	uint64_t val, cnt = 0;
723
724	rd = (inst >> 25) & 0x1f;
725	rs1 = (inst >> 14) & 0x1f;
726	rs2 = inst & 0x1f;
727	immflg = (inst >> 13) & 1;
728
729	if (rs1 > 0)
730		return (SIMU_ILLEGAL);
731
732	(void) flush_user_windows_to_stack(NULL);
733
734	/* check immediate bit and use immediate field or reg (rs2) */
735	if (immflg) {
736		int64_t imm;
737		imm  = inst & 0x1fff;		/* mask out immediate field */
738		imm <<= 51;			/* sign extend it */
739		imm >>= 51;
740		if (imm != 0) {
741			for (cnt = 0; imm != 0; imm &= imm-1)
742				cnt++;
743		}
744	} else {
745		if (getreg(rp, rs2, &val, badaddr))
746			return (SIMU_FAULT);
747		if (val != 0) {
748			for (cnt = 0; val != 0; val &= val-1)
749				cnt++;
750		}
751	}
752
753	if (putreg(&cnt, rp, rd, badaddr) == -1)
754		return (SIMU_FAULT);
755
756	return (SIMU_SUCCESS);
757}
758
759/*
760 * simulate mulscc
761 */
762static int
763simulate_mulscc(struct regs *rp, caddr_t *badaddr, uint_t inst)
764{
765	uint32_t	s1, s2;
766	uint32_t	c, d, v;
767	uint_t		rd, rs1;
768	int64_t		d64;
769	uint64_t	ud64;
770	uint64_t	drs1;
771
772	(void) flush_user_windows_to_stack(NULL);
773
774	if ((inst >> 13) & 1) {		/* immediate */
775		d64 = inst & 0x1fff;
776		d64 <<= 51;		/* sign extend it */
777		d64 >>= 51;
778	} else {
779		uint_t		rs2;
780		uint64_t	drs2;
781
782		if (inst & 0x1fe0) {
783			return (SIMU_ILLEGAL);
784		}
785		rs2 = inst & 0x1f;
786		if (getreg(rp, rs2, &drs2, badaddr)) {
787			return (SIMU_FAULT);
788		}
789		d64 = (int64_t)drs2;
790	}
791
792	rs1 = (inst >> 14) & 0x1f;
793	if (getreg(rp, rs1, &drs1, badaddr)) {
794		return (SIMU_FAULT);
795	}
796	/* icc.n xor icc.v */
797	s1 = ((rp->r_tstate & TSTATE_IN) >> (TSTATE_CCR_SHIFT + 3)) ^
798	    ((rp->r_tstate & TSTATE_IV) >> (TSTATE_CCR_SHIFT + 1));
799	s1 = (s1 << 31) | (((uint32_t)drs1) >> 1);
800
801	if (rp->r_y & 1) {
802		s2 = (uint32_t)d64;
803	} else {
804		s2 = 0;
805	}
806	d = s1 + s2;
807
808	ud64 = (uint64_t)d;
809
810	/* set the icc flags */
811	v = (s1 & s2 & ~d) | (~s1 & ~s2 & d);
812	c = (s1 & s2) | (~d & (s1 | s2));
813	rp->r_tstate &= ~TSTATE_ICC;
814	rp->r_tstate |= (uint64_t)((c >> 31) & 1) << (TSTATE_CCR_SHIFT + 0);
815	rp->r_tstate |= (uint64_t)((v >> 31) & 1) << (TSTATE_CCR_SHIFT + 1);
816	rp->r_tstate |= (uint64_t)(d ? 0 : 1) << (TSTATE_CCR_SHIFT + 2);
817	rp->r_tstate |= (uint64_t)((d >> 31) & 1) << (TSTATE_CCR_SHIFT + 3);
818
819	if (rp->r_tstate & TSTATE_IC) {
820		ud64 |= (1ULL << 32);
821	}
822
823	/* set the xcc flags */
824	rp->r_tstate &= ~TSTATE_XCC;
825	if (ud64 == 0) {
826		rp->r_tstate |= TSTATE_XZ;
827	}
828
829	rd = (inst >> 25) & 0x1f;
830	if (putreg(&ud64, rp, rd, badaddr)) {
831		return (SIMU_FAULT);
832	}
833
834	d64 = (drs1 << 32) | (uint32_t)rp->r_y;
835	d64 >>= 1;
836	rp->r_y = (uint32_t)d64;
837
838	return (SIMU_SUCCESS);
839}
840
841/*
842 * simulate unimplemented instructions (popc, ldqf{a}, stqf{a})
843 */
844int
845simulate_unimp(struct regs *rp, caddr_t *badaddr)
846{
847	uint_t	inst, optype, op3, asi;
848	uint_t	rs1, rd;
849	uint_t	ignor, i;
850	machpcb_t *mpcb = lwptompcb(ttolwp(curthread));
851	int	nomatch = 0;
852	caddr_t	addr = (caddr_t)rp->r_pc;
853	struct as *as;
854	caddr_t	ka;
855	pfn_t	pfnum;
856	page_t *pp;
857	proc_t *p = ttoproc(curthread);
858	struct seg *mapseg;
859	struct segvn_data *svd;
860
861	ASSERT(USERMODE(rp->r_tstate));
862	inst = fetch_user_instr(addr);
863	if (inst == (uint_t)-1) {
864		mpcb->mpcb_illexcaddr = addr;
865		mpcb->mpcb_illexcinsn = (uint32_t)-1;
866		return (SIMU_ILLEGAL);
867	}
868
869	/*
870	 * When fixing dirty v8 instructions there's a race if two processors
871	 * are executing the dirty executable at the same time.  If one
872	 * cleans the instruction as the other is executing it the second
873	 * processor will see a clean instruction when it comes through this
874	 * code and will return SIMU_ILLEGAL.  To work around the race
875	 * this code will keep track of the last illegal instruction seen
876	 * by each lwp and will only take action if the illegal instruction
877	 * is repeatable.
878	 */
879	if (addr != mpcb->mpcb_illexcaddr ||
880	    inst != mpcb->mpcb_illexcinsn)
881		nomatch = 1;
882	mpcb->mpcb_illexcaddr = addr;
883	mpcb->mpcb_illexcinsn = inst;
884
885	/* instruction fields */
886	i = (inst >> 13) & 0x1;
887	rd = (inst >> 25) & 0x1f;
888	optype = (inst >> 30) & 0x3;
889	op3 = (inst >> 19) & 0x3f;
890	ignor = (inst >> 5) & 0xff;
891	if (IS_IBIT_SET(inst)) {
892		asi = (uint32_t)((rp->r_tstate >> TSTATE_ASI_SHIFT) &
893		    TSTATE_ASI_MASK);
894	} else {
895		asi = ignor;
896	}
897
898	if (IS_VIS1(optype, op3) ||
899	    IS_PARTIAL_OR_SHORT_FLOAT_LD_ST(optype, op3, asi) ||
900	    IS_FLOAT_QUAD_OP(optype, op3)) {
901		klwp_t *lwp = ttolwp(curthread);
902		kfpu_t *fp = lwptofpu(lwp);
903		if (fpu_exists) {
904			if (!(_fp_read_fprs() & FPRS_FEF))
905				fp_enable();
906			_fp_read_pfsr(&fp->fpu_fsr);
907		} else {
908			if (!fp->fpu_en)
909				fp_enable();
910		}
911		fp_precise(rp);
912		return (SIMU_RETRY);
913	}
914
915	if (optype == 2 && op3 == IOP_V8_POPC) {
916		return (simulate_popc(rp, badaddr, inst));
917	} else if (optype == 3 && op3 == IOP_V8_POPC) {
918		return (SIMU_ILLEGAL);
919	} else if (optype == OP_V8_ARITH && op3 == IOP_V8_MULScc) {
920		return (simulate_mulscc(rp, badaddr, inst));
921	}
922
923	if (optype == OP_V8_LDSTR) {
924		if (op3 == IOP_V8_LDQF || op3 == IOP_V8_LDQFA ||
925		    op3 == IOP_V8_STQF || op3 == IOP_V8_STQFA)
926			return (do_unaligned(rp, badaddr));
927	}
928
929	/* This is a new instruction so illexccnt should also be set. */
930	if (nomatch) {
931		mpcb->mpcb_illexccnt = 0;
932		return (SIMU_RETRY);
933	}
934
935	/*
936	 * In order to keep us from entering into an infinite loop while
937	 * attempting to clean up faulty instructions, we will return
938	 * SIMU_ILLEGAL once we've cleaned up the instruction as much
939	 * as we can, and still end up here.
940	 */
941	if (mpcb->mpcb_illexccnt >= 3)
942		return (SIMU_ILLEGAL);
943
944	mpcb->mpcb_illexccnt += 1;
945
946	/*
947	 * The rest of the code handles v8 binaries with instructions
948	 * that have dirty (non-zero) bits in reserved or 'ignored'
949	 * fields; these will cause core dumps on v9 machines.
950	 *
951	 * We only clean dirty instructions in 32-bit programs (ie, v8)
952	 * running on SPARCv9 processors.  True v9 programs are forced
953	 * to use the instruction set as intended.
954	 */
955	if (lwp_getdatamodel(curthread->t_lwp) != DATAMODEL_ILP32)
956		return (SIMU_ILLEGAL);
957	switch (optype) {
958	case OP_V8_BRANCH:
959	case OP_V8_CALL:
960		return (SIMU_ILLEGAL);	/* these don't have ignored fields */
961		/*NOTREACHED*/
962	case OP_V8_ARITH:
963		switch (op3) {
964		case IOP_V8_RETT:
965			if (rd == 0 && !(i == 0 && ignor))
966				return (SIMU_ILLEGAL);
967			if (rd)
968				inst &= ~(0x1f << 25);
969			if (i == 0 && ignor)
970				inst &= ~(0xff << 5);
971			break;
972		case IOP_V8_TCC:
973			if (i == 0 && ignor != 0) {
974				inst &= ~(0xff << 5);
975			} else if (i == 1 && (((inst >> 7) & 0x3f) != 0)) {
976				inst &= ~(0x3f << 7);
977			} else {
978				return (SIMU_ILLEGAL);
979			}
980			break;
981		case IOP_V8_JMPL:
982		case IOP_V8_RESTORE:
983		case IOP_V8_SAVE:
984			if ((op3 == IOP_V8_RETT && rd) ||
985			    (i == 0 && ignor)) {
986				inst &= ~(0xff << 5);
987			} else {
988				return (SIMU_ILLEGAL);
989			}
990			break;
991		case IOP_V8_FCMP:
992			if (rd == 0)
993				return (SIMU_ILLEGAL);
994			inst &= ~(0x1f << 25);
995			break;
996		case IOP_V8_RDASR:
997			rs1 = ((inst >> 14) & 0x1f);
998			if (rs1 == 1 || (rs1 >= 7 && rs1 <= 14)) {
999				/*
1000				 * The instruction specifies an invalid
1001				 * state register - better bail out than
1002				 * "fix" it when we're not sure what was
1003				 * intended.
1004				 */
1005				return (SIMU_ILLEGAL);
1006			}
1007				/*
1008				 * Note: this case includes the 'stbar'
1009				 * instruction (rs1 == 15 && i == 0).
1010				 */
1011				if ((ignor = (inst & 0x3fff)) != 0)
1012					inst &= ~(0x3fff);
1013			break;
1014		case IOP_V8_SRA:
1015		case IOP_V8_SRL:
1016		case IOP_V8_SLL:
1017			if (ignor == 0)
1018				return (SIMU_ILLEGAL);
1019			inst &= ~(0xff << 5);
1020			break;
1021		case IOP_V8_ADD:
1022		case IOP_V8_AND:
1023		case IOP_V8_OR:
1024		case IOP_V8_XOR:
1025		case IOP_V8_SUB:
1026		case IOP_V8_ANDN:
1027		case IOP_V8_ORN:
1028		case IOP_V8_XNOR:
1029		case IOP_V8_ADDC:
1030		case IOP_V8_UMUL:
1031		case IOP_V8_SMUL:
1032		case IOP_V8_SUBC:
1033		case IOP_V8_UDIV:
1034		case IOP_V8_SDIV:
1035		case IOP_V8_ADDcc:
1036		case IOP_V8_ANDcc:
1037		case IOP_V8_ORcc:
1038		case IOP_V8_XORcc:
1039		case IOP_V8_SUBcc:
1040		case IOP_V8_ANDNcc:
1041		case IOP_V8_ORNcc:
1042		case IOP_V8_XNORcc:
1043		case IOP_V8_ADDCcc:
1044		case IOP_V8_UMULcc:
1045		case IOP_V8_SMULcc:
1046		case IOP_V8_SUBCcc:
1047		case IOP_V8_UDIVcc:
1048		case IOP_V8_SDIVcc:
1049		case IOP_V8_TADDcc:
1050		case IOP_V8_TSUBcc:
1051		case IOP_V8_TADDccTV:
1052		case IOP_V8_TSUBccTV:
1053		case IOP_V8_MULScc:
1054		case IOP_V8_WRASR:
1055		case IOP_V8_FLUSH:
1056			if (i != 0 || ignor == 0)
1057				return (SIMU_ILLEGAL);
1058			inst &= ~(0xff << 5);
1059			break;
1060		default:
1061			return (SIMU_ILLEGAL);
1062		}
1063		break;
1064	case OP_V8_LDSTR:
1065		switch (op3) {
1066		case IOP_V8_STFSR:
1067		case IOP_V8_LDFSR:
1068			if (rd == 0 && !(i == 0 && ignor))
1069				return (SIMU_ILLEGAL);
1070			if (rd)
1071				inst &= ~(0x1f << 25);
1072			if (i == 0 && ignor)
1073				inst &= ~(0xff << 5);
1074			break;
1075		default:
1076			if (optype == OP_V8_LDSTR && !IS_LDST_ALT(op3) &&
1077			    i == 0 && ignor)
1078				inst &= ~(0xff << 5);
1079			else
1080				return (SIMU_ILLEGAL);
1081			break;
1082		}
1083		break;
1084	default:
1085		return (SIMU_ILLEGAL);
1086	}
1087
1088	as = p->p_as;
1089
1090	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1091	mapseg = as_findseg(as, (caddr_t)rp->r_pc, 0);
1092	ASSERT(mapseg != NULL);
1093	svd = (struct segvn_data *)mapseg->s_data;
1094
1095	/*
1096	 * We only create COW page for MAP_PRIVATE mappings.
1097	 */
1098	SEGVN_LOCK_ENTER(as, &svd->lock, RW_READER);
1099	if ((svd->type & MAP_TYPE) & MAP_SHARED) {
1100		SEGVN_LOCK_EXIT(as, &svd->lock);
1101		AS_LOCK_EXIT(as, &as->a_lock);
1102		return (SIMU_ILLEGAL);
1103	}
1104	SEGVN_LOCK_EXIT(as, &svd->lock);
1105	AS_LOCK_EXIT(as, &as->a_lock);
1106
1107	/*
1108	 * A "flush" instruction using the user PC's vaddr will not work
1109	 * here, at least on Spitfire. Instead we create a temporary kernel
1110	 * mapping to the user's text page, then modify and flush that.
1111	 * Break COW by locking user page.
1112	 */
1113	if (as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK), PAGESIZE,
1114	    F_SOFTLOCK, S_READ))
1115		return (SIMU_FAULT);
1116
1117	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1118	pfnum = hat_getpfnum(as->a_hat, (caddr_t)rp->r_pc);
1119	AS_LOCK_EXIT(as, &as->a_lock);
1120	if (pf_is_memory(pfnum)) {
1121		pp = page_numtopp_nolock(pfnum);
1122		ASSERT(pp == NULL || PAGE_LOCKED(pp));
1123	} else {
1124		(void) as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK),
1125		    PAGESIZE, F_SOFTUNLOCK, S_READ);
1126		return (SIMU_FAULT);
1127	}
1128
1129	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
1130	ka = ppmapin(pp, PROT_READ|PROT_WRITE, (caddr_t)rp->r_pc);
1131	*(uint_t *)(ka + (uintptr_t)(rp->r_pc % PAGESIZE)) = inst;
1132	doflush(ka + (uintptr_t)(rp->r_pc % PAGESIZE));
1133	ppmapout(ka);
1134	AS_LOCK_EXIT(as, &as->a_lock);
1135
1136	(void) as_fault(as->a_hat, as, (caddr_t)(rp->r_pc & PAGEMASK),
1137	    PAGESIZE, F_SOFTUNLOCK, S_READ);
1138	return (SIMU_RETRY);
1139}
1140
1141/*
1142 * Simulate a "rd %tick" or "rd %stick" (%asr24) instruction.
1143 */
1144int
1145simulate_rdtick(struct regs *rp)
1146{
1147	uint_t	inst, op, op3, rd, rs1, i;
1148	caddr_t badaddr;
1149
1150	inst = fetch_user_instr((caddr_t)rp->r_pc);
1151	op   = (inst >> 30) & 0x3;
1152	rd   = (inst >> 25) & 0x1F;
1153	op3  = (inst >> 19) & 0x3F;
1154	i    = (inst >> 13) & 0x1;
1155
1156	/*
1157	 * Make sure this is either a %tick read (rs1 == 0x4) or
1158	 * a %stick read (rs1 == 0x18) instruction.
1159	 */
1160	if (op == 2 && op3 == 0x28 && i == 0) {
1161		rs1 = (inst >> 14) & 0x1F;
1162
1163		if (rs1 == 0x4) {
1164			uint64_t tick;
1165			(void) flush_user_windows_to_stack(NULL);
1166			tick = gettick_counter();
1167			if (putreg(&tick, rp, rd, &badaddr) == 0)
1168				return (SIMU_SUCCESS);
1169		} else if (rs1 == 0x18) {
1170			uint64_t stick;
1171			(void) flush_user_windows_to_stack(NULL);
1172			stick = gethrtime_unscaled();
1173			if (putreg(&stick, rp, rd, &badaddr) == 0)
1174				return (SIMU_SUCCESS);
1175		}
1176	}
1177
1178	return (SIMU_FAULT);
1179}
1180
1181/*
1182 * Get the value of a register for instruction simulation
1183 * by using the regs or window structure pointers.
1184 * Return 0 for success, and -1 for failure.  If there is a failure,
1185 * save the faulting address using badaddr pointer.
1186 * We have 64 bit globals and outs, and 32 or 64 bit ins and locals.
1187 * Don't truncate globals/outs for 32 bit programs, for v8+ support.
1188 */
1189int
1190getreg(struct regs *rp, uint_t reg, uint64_t *val, caddr_t *badaddr)
1191{
1192	uint64_t *rgs, *sp;
1193	int rv = 0;
1194
1195	rgs = (uint64_t *)&rp->r_ps;		/* globals and outs */
1196	sp = (uint64_t *)rp->r_sp;		/* ins and locals */
1197	if (reg == 0) {
1198		*val = 0;
1199	} else if (reg < 16) {
1200		*val = rgs[reg];
1201	} else if (IS_V9STACK(sp)) {
1202		uint64_t *rw = (uint64_t *)((uintptr_t)sp + V9BIAS64);
1203		uint64_t *addr = (uint64_t *)&rw[reg - 16];
1204		uint64_t res;
1205
1206		if (USERMODE(rp->r_tstate)) {
1207			if (fuword64_nowatch(addr, &res) == -1) {
1208				*badaddr = (caddr_t)addr;
1209				rv = -1;
1210			}
1211		} else {
1212			res = *addr;
1213		}
1214		*val = res;
1215	} else {
1216		caddr32_t sp32 = (caddr32_t)(uintptr_t)sp;
1217		uint32_t *rw = (uint32_t *)(uintptr_t)sp32;
1218		uint32_t *addr = (uint32_t *)&rw[reg - 16];
1219		uint32_t res;
1220
1221		if (USERMODE(rp->r_tstate)) {
1222			if (fuword32_nowatch(addr, &res) == -1) {
1223				*badaddr = (caddr_t)addr;
1224				rv = -1;
1225			}
1226		} else {
1227			res = *addr;
1228		}
1229		*val = (uint64_t)res;
1230	}
1231	return (rv);
1232}
1233
1234/*
1235 * Set the value of a register after instruction simulation
1236 * by using the regs or window structure pointers.
1237 * Return 0 for succes -1 failure.
1238 * save the faulting address using badaddr pointer.
1239 * We have 64 bit globals and outs, and 32 or 64 bit ins and locals.
1240 * Don't truncate globals/outs for 32 bit programs, for v8+ support.
1241 */
1242int
1243putreg(uint64_t	*data, struct regs *rp, uint_t reg, caddr_t *badaddr)
1244{
1245	uint64_t *rgs, *sp;
1246	int rv = 0;
1247
1248	rgs = (uint64_t *)&rp->r_ps;		/* globals and outs */
1249	sp = (uint64_t *)rp->r_sp;		/* ins and locals */
1250	if (reg == 0) {
1251		return (0);
1252	} else if (reg < 16) {
1253		rgs[reg] = *data;
1254	} else if (IS_V9STACK(sp)) {
1255		uint64_t *rw = (uint64_t *)((uintptr_t)sp + V9BIAS64);
1256		uint64_t *addr = (uint64_t *)&rw[reg - 16];
1257		uint64_t res;
1258
1259		if (USERMODE(rp->r_tstate)) {
1260			struct machpcb *mpcb = lwptompcb(curthread->t_lwp);
1261
1262			res = *data;
1263			if (suword64_nowatch(addr, res) != 0) {
1264				*badaddr = (caddr_t)addr;
1265				rv = -1;
1266			}
1267			/*
1268			 * We have changed a local or in register;
1269			 * nuke the watchpoint return windows.
1270			 */
1271			mpcb->mpcb_rsp[0] = NULL;
1272			mpcb->mpcb_rsp[1] = NULL;
1273		} else {
1274			res = *data;
1275			*addr = res;
1276		}
1277	} else {
1278		caddr32_t sp32 = (caddr32_t)(uintptr_t)sp;
1279		uint32_t *rw = (uint32_t *)(uintptr_t)sp32;
1280		uint32_t *addr = (uint32_t *)&rw[reg - 16];
1281		uint32_t res;
1282
1283		if (USERMODE(rp->r_tstate)) {
1284			struct machpcb *mpcb = lwptompcb(curthread->t_lwp);
1285
1286			res = (uint_t)*data;
1287			if (suword32_nowatch(addr, res) != 0) {
1288				*badaddr = (caddr_t)addr;
1289				rv = -1;
1290			}
1291			/*
1292			 * We have changed a local or in register;
1293			 * nuke the watchpoint return windows.
1294			 */
1295			mpcb->mpcb_rsp[0] = NULL;
1296			mpcb->mpcb_rsp[1] = NULL;
1297
1298		} else {
1299			res = (uint_t)*data;
1300			*addr = res;
1301		}
1302	}
1303	return (rv);
1304}
1305
1306/*
1307 * Calculate a memory reference address from instruction
1308 * operands, used to return the address of a fault, instead
1309 * of the instruction when an error occurs.  This is code that is
1310 * common with most of the routines that simulate instructions.
1311 */
1312int
1313calc_memaddr(struct regs *rp, caddr_t *badaddr)
1314{
1315	uint_t	inst;
1316	uint_t	rd, rs1, rs2;
1317	int	sz;
1318	int	immflg;
1319	int	floatflg;
1320	caddr_t  addr;
1321	uint64_t val;
1322
1323	if (USERMODE(rp->r_tstate))
1324		inst = fetch_user_instr((caddr_t)rp->r_pc);
1325	else
1326		inst = *(uint_t *)rp->r_pc;
1327
1328	rd = (inst >> 25) & 0x1f;
1329	rs1 = (inst >> 14) & 0x1f;
1330	rs2 = inst & 0x1f;
1331	floatflg = (inst >> 24) & 1;
1332	immflg = (inst >> 13) & 1;
1333
1334	if (floatflg) {
1335		switch ((inst >> 19) & 3) {	/* map size bits to a number */
1336		case 0: sz = 4; break;		/* ldf/stf */
1337		case 1: return (0);		/* ld[x]fsr/st[x]fsr */
1338		case 2: sz = 16; break;		/* ldqf/stqf */
1339		case 3: sz = 8; break;		/* lddf/stdf */
1340		}
1341		/*
1342		 * Fix to access extra double register encoding plus
1343		 * compensate to access the correct fpu_dreg.
1344		 */
1345		if (sz > 4) {
1346			if ((rd & 1) == 1)
1347				rd = (rd & 0x1e) | 0x20;
1348			rd = rd >> 1;
1349		}
1350	} else {
1351		switch ((inst >> 19) & 0xf) {	/* map size bits to a number */
1352		case 0:				/* lduw */
1353		case 4:				/* stw */
1354		case 8:				/* ldsw */
1355		case 0xf:			/* swap */
1356			sz = 4; break;
1357		case 1:				/* ldub */
1358		case 5:				/* stb */
1359		case 9:				/* ldsb */
1360		case 0xd:			/* ldstub */
1361			sz = 1; break;
1362		case 2:				/* lduh */
1363		case 6:				/* sth */
1364		case 0xa:			/* ldsh */
1365			sz = 2; break;
1366		case 3:				/* ldd */
1367		case 7:				/* std */
1368		case 0xb:			/* ldx */
1369		case 0xe:			/* stx */
1370			sz = 8; break;
1371		}
1372	}
1373
1374	if (USERMODE(rp->r_tstate))
1375		(void) flush_user_windows_to_stack(NULL);
1376	else
1377		flush_windows();
1378
1379	if (getreg(rp, rs1, &val, badaddr))
1380		return (SIMU_FAULT);
1381	addr = (caddr_t)val;
1382
1383	/* check immediate bit and use immediate field or reg (rs2) */
1384	if (immflg) {
1385		int imm;
1386		imm = inst & 0x1fff;		/* mask out immediate field */
1387		imm <<= 19;			/* sign extend it */
1388		imm >>= 19;
1389		addr += imm;			/* compute address */
1390	} else {
1391		if (getreg(rp, rs2, &val, badaddr))
1392			return (SIMU_FAULT);
1393		addr += val;
1394	}
1395
1396	/*
1397	 * If this is a 32-bit program, chop the address accordingly.  The
1398	 * intermediate uintptr_t casts prevent warnings under a certain
1399	 * compiler, and the temporary 32 bit storage is intended to force
1400	 * proper code generation and break up what would otherwise be a
1401	 * quadruple cast.
1402	 */
1403	if (curproc->p_model == DATAMODEL_ILP32 && USERMODE(rp->r_tstate)) {
1404		caddr32_t addr32 = (caddr32_t)(uintptr_t)addr;
1405		addr = (caddr_t)(uintptr_t)addr32;
1406	}
1407
1408	*badaddr = addr;
1409	return ((uintptr_t)addr & (sz - 1) ? SIMU_UNALIGN : SIMU_SUCCESS);
1410}
1411
1412/*
1413 * Return the size of a load or store instruction (1, 2, 4, 8, 16, 64).
1414 * Also compute the precise address by instruction disassembly.
1415 * (v9 page faults only provide the page address via the hardware.)
1416 * Return 0 on failure (not a load or store instruction).
1417 */
1418int
1419instr_size(struct regs *rp, caddr_t *addrp, enum seg_rw rdwr)
1420{
1421	uint_t	inst, op3, asi;
1422	uint_t	rd, rs1, rs2;
1423	int	sz = 0;
1424	int	immflg;
1425	int	floatflg;
1426	caddr_t	addr;
1427	caddr_t badaddr;
1428	uint64_t val;
1429
1430	if (rdwr == S_EXEC) {
1431		*addrp = (caddr_t)rp->r_pc;
1432		return (4);
1433	}
1434
1435	/*
1436	 * Fetch the instruction from user-level.
1437	 * We would like to assert this:
1438	 *   ASSERT(USERMODE(rp->r_tstate));
1439	 * but we can't because we can reach this point from a
1440	 * register window underflow/overflow and the v9 wbuf
1441	 * traps call trap() with T_USER even though r_tstate
1442	 * indicates a system trap, not a user trap.
1443	 */
1444	inst = fetch_user_instr((caddr_t)rp->r_pc);
1445
1446	op3 = (inst >> 19) & 0x3f;
1447	rd = (inst >> 25) & 0x1f;
1448	rs1 = (inst >> 14) & 0x1f;
1449	rs2 = inst & 0x1f;
1450	floatflg = (inst >> 24) & 1;
1451	immflg = (inst >> 13) & 1;
1452
1453	/* if not load or store do nothing.  can't happen? */
1454	if ((inst >> 30) != 3)
1455		return (0);
1456
1457	if (immflg)
1458		asi = (uint_t)((rp->r_tstate >> TSTATE_ASI_SHIFT) &
1459		    TSTATE_ASI_MASK);
1460	else
1461		asi = (inst >> 5) & 0xff;
1462
1463	if (floatflg) {
1464		/* check for ld/st alternate and highest defined V9 asi */
1465		if ((op3 & 0x30) == 0x30 && asi > ASI_SNFL) {
1466			sz = extended_asi_size(asi);
1467		} else {
1468			switch (op3 & 3) {
1469			case 0:
1470				sz = 4;			/* ldf/stf/cas */
1471				break;
1472			case 1:
1473				if (rd == 0)
1474					sz = 4;		/* ldfsr/stfsr */
1475				else
1476					sz = 8;		/* ldxfsr/stxfsr */
1477				break;
1478			case 2:
1479				if (op3 == 0x3e)
1480					sz = 8;		/* casx */
1481				else
1482					sz = 16;	/* ldqf/stqf */
1483				break;
1484			case 3:
1485				sz = 8;			/* lddf/stdf */
1486				break;
1487			}
1488		}
1489	} else {
1490		switch (op3 & 0xf) {		/* map size bits to a number */
1491		case 0:				/* lduw */
1492		case 4:				/* stw */
1493		case 8:				/* ldsw */
1494		case 0xf:			/* swap */
1495			sz = 4; break;
1496		case 1:				/* ldub */
1497		case 5:				/* stb */
1498		case 9:				/* ldsb */
1499		case 0xd:			/* ldstub */
1500			sz = 1; break;
1501		case 2:				/* lduh */
1502		case 6:				/* sth */
1503		case 0xa:			/* ldsh */
1504			sz = 2; break;
1505		case 3:				/* ldd */
1506		case 7:				/* std */
1507		case 0xb:			/* ldx */
1508		case 0xe:			/* stx */
1509			sz = 8; break;
1510		}
1511	}
1512
1513	if (sz == 0)	/* can't happen? */
1514		return (0);
1515	(void) flush_user_windows_to_stack(NULL);
1516
1517	if (getreg(rp, rs1, &val, &badaddr))
1518		return (0);
1519	addr = (caddr_t)val;
1520
1521	/* cas/casx don't use rs2 / simm13 to compute the address */
1522	if ((op3 & 0x3d) != 0x3c) {
1523		/* check immediate bit and use immediate field or reg (rs2) */
1524		if (immflg) {
1525			int imm;
1526			imm  = inst & 0x1fff;	/* mask out immediate field */
1527			imm <<= 19;		/* sign extend it */
1528			imm >>= 19;
1529			addr += imm;		/* compute address */
1530		} else {
1531			/*
1532			 * asi's in the 0xCx range are partial store
1533			 * instructions.  For these, rs2 is a mask, not part of
1534			 * the address.
1535			 */
1536			if (!(floatflg && (asi & 0xf0) == 0xc0)) {
1537				if (getreg(rp, rs2, &val, &badaddr))
1538					return (0);
1539				addr += val;
1540			}
1541		}
1542	}
1543
1544	/*
1545	 * If this is a 32-bit program, chop the address accordingly.  The
1546	 * intermediate uintptr_t casts prevent warnings under a certain
1547	 * compiler, and the temporary 32 bit storage is intended to force
1548	 * proper code generation and break up what would otherwise be a
1549	 * quadruple cast.
1550	 */
1551	if (curproc->p_model == DATAMODEL_ILP32) {
1552		caddr32_t addr32 = (caddr32_t)(uintptr_t)addr;
1553		addr = (caddr_t)(uintptr_t)addr32;
1554	}
1555
1556	*addrp = addr;
1557	ASSERT(sz != 0);
1558	return (sz);
1559}
1560
1561/*
1562 * Fetch an instruction from user-level.
1563 * Deal with watchpoints, if they are in effect.
1564 */
1565int32_t
1566fetch_user_instr(caddr_t vaddr)
1567{
1568	proc_t *p = curproc;
1569	int32_t instr;
1570
1571	/*
1572	 * If this is a 32-bit program, chop the address accordingly.  The
1573	 * intermediate uintptr_t casts prevent warnings under a certain
1574	 * compiler, and the temporary 32 bit storage is intended to force
1575	 * proper code generation and break up what would otherwise be a
1576	 * quadruple cast.
1577	 */
1578	if (p->p_model == DATAMODEL_ILP32) {
1579		caddr32_t vaddr32 = (caddr32_t)(uintptr_t)vaddr;
1580		vaddr = (caddr_t)(uintptr_t)vaddr32;
1581	}
1582
1583	if (fuword32_nowatch(vaddr, (uint32_t *)&instr) == -1)
1584		instr = -1;
1585
1586	return (instr);
1587}
1588