exec.c revision 1.3
1/*
2 * This file is part of SIS.
3 *
4 * SIS, SPARC instruction simulator V1.8 Copyright (C) 1995 Jiri Gaisler,
5 * European Space Agency
6 *
7 * This program is free software; you can redistribute it and/or modify it under
8 * the terms of the GNU General Public License as published by the Free
9 * Software Foundation; either version 3 of the License, or (at your option)
10 * any later version.
11 *
12 * This program is distributed in the hope that it will be useful, but WITHOUT
13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
15 * more details.
16 *
17 * You should have received a copy of the GNU General Public License along with
18 * this program; if not, see <http://www.gnu.org/licenses/>.
19 *
20 */
21
22#include "config.h"
23#include "sis.h"
24#include "end.h"
25#include <math.h>
26#include <stdio.h>
27
28extern int32    sis_verbose, sparclite;
29int ext_irl = 0;
30
31/* Load/store interlock delay */
32#define FLSTHOLD 1
33
34/* Load delay (delete if unwanted - speeds up simulation) */
35#define LOAD_DEL 1
36
37#define T_LD	2
38#define T_LDD	3
39#define T_ST	3
40#define T_STD	4
41#define T_LDST	4
42#define T_JMPL	2
43#define T_RETT	2
44
45#define FSR_QNE 	0x2000
46#define FP_EXE_MODE 0
47#define	FP_EXC_PE   1
48#define FP_EXC_MODE 2
49
50#define	FBA	8
51#define	FBN	0
52#define	FBNE	1
53#define	FBLG	2
54#define	FBUL	3
55#define	FBL 	4
56#define	FBUG	5
57#define	FBG 	6
58#define	FBU 	7
59#define FBA	8
60#define FBE	9
61#define FBUE	10
62#define FBGE	11
63#define FBUGE	12
64#define FBLE	13
65#define FBULE	14
66#define FBO	15
67
68#define	FCC_E 	0
69#define	FCC_L 	1
70#define	FCC_G 	2
71#define	FCC_U 	3
72
73#define PSR_ET 0x20
74#define PSR_EF 0x1000
75#define PSR_PS 0x40
76#define PSR_S  0x80
77#define PSR_N  0x0800000
78#define PSR_Z  0x0400000
79#define PSR_V  0x0200000
80#define PSR_C  0x0100000
81#define PSR_CC 0x0F00000
82#define PSR_CWP 0x7
83#define PSR_PIL 0x0f00
84
85#define ICC_N	(icc >> 3)
86#define ICC_Z	(icc >> 2)
87#define ICC_V	(icc >> 1)
88#define ICC_C	(icc)
89
90#define FP_PRES	(sregs->fpu_pres)
91
92#define TRAP_IEXC 1
93#define TRAP_UNIMP 2
94#define TRAP_PRIVI 3
95#define TRAP_FPDIS 4
96#define TRAP_WOFL 5
97#define TRAP_WUFL 6
98#define TRAP_UNALI 7
99#define TRAP_FPEXC 8
100#define TRAP_DEXC 9
101#define TRAP_TAG 10
102#define TRAP_DIV0 0x2a
103
104#define FSR_TT		0x1C000
105#define FP_IEEE		0x04000
106#define FP_UNIMP	0x0C000
107#define FP_SEQ_ERR	0x10000
108
109#define	BICC_BN		0
110#define	BICC_BE		1
111#define	BICC_BLE	2
112#define	BICC_BL		3
113#define	BICC_BLEU	4
114#define	BICC_BCS	5
115#define	BICC_NEG	6
116#define	BICC_BVS	7
117#define	BICC_BA		8
118#define	BICC_BNE	9
119#define	BICC_BG		10
120#define	BICC_BGE	11
121#define	BICC_BGU	12
122#define	BICC_BCC	13
123#define	BICC_POS	14
124#define	BICC_BVC	15
125
126#define INST_SIMM13 0x1fff
127#define INST_RS2    0x1f
128#define INST_I	    0x2000
129#define ADD 	0x00
130#define ADDCC 	0x10
131#define ADDX 	0x08
132#define ADDXCC 	0x18
133#define TADDCC 	0x20
134#define TSUBCC  0x21
135#define TADDCCTV 0x22
136#define TSUBCCTV 0x23
137#define IAND 	0x01
138#define IANDCC 	0x11
139#define IANDN 	0x05
140#define IANDNCC	0x15
141#define MULScc 	0x24
142#define DIVScc 	0x1D
143#define SMUL	0x0B
144#define SMULCC	0x1B
145#define UMUL	0x0A
146#define UMULCC	0x1A
147#define SDIV	0x0F
148#define SDIVCC	0x1F
149#define UDIV	0x0E
150#define UDIVCC	0x1E
151#define IOR 	0x02
152#define IORCC 	0x12
153#define IORN 	0x06
154#define IORNCC 	0x16
155#define SLL 	0x25
156#define SRA 	0x27
157#define SRL 	0x26
158#define SUB 	0x04
159#define SUBCC 	0x14
160#define SUBX 	0x0C
161#define SUBXCC 	0x1C
162#define IXNOR 	0x07
163#define IXNORCC	0x17
164#define IXOR 	0x03
165#define IXORCC 	0x13
166#define SETHI 	0x04
167#define BICC 	0x02
168#define FPBCC 	0x06
169#define RDY 	0x28
170#define RDPSR 	0x29
171#define RDWIM 	0x2A
172#define RDTBR 	0x2B
173#define SCAN 	0x2C
174#define WRY	0x30
175#define WRPSR	0x31
176#define WRWIM	0x32
177#define WRTBR	0x33
178#define JMPL 	0x38
179#define RETT 	0x39
180#define TICC 	0x3A
181#define SAVE 	0x3C
182#define RESTORE 0x3D
183#define LDD	0x03
184#define LDDA	0x13
185#define LD	0x00
186#define LDA	0x10
187#define LDF	0x20
188#define LDDF	0x23
189#define LDSTUB	0x0D
190#define LDSTUBA	0x1D
191#define LDUB	0x01
192#define LDUBA	0x11
193#define LDSB	0x09
194#define LDSBA	0x19
195#define LDUH	0x02
196#define LDUHA	0x12
197#define LDSH	0x0A
198#define LDSHA	0x1A
199#define LDFSR	0x21
200#define ST	0x04
201#define STA	0x14
202#define STB	0x05
203#define STBA	0x15
204#define STD	0x07
205#define STDA	0x17
206#define STF	0x24
207#define STDFQ	0x26
208#define STDF	0x27
209#define STFSR	0x25
210#define STH	0x06
211#define STHA	0x16
212#define SWAP	0x0F
213#define SWAPA	0x1F
214#define FLUSH	0x3B
215
216#define SIGN_BIT 0x80000000
217
218/* # of cycles overhead when a trap is taken */
219#define TRAP_C  3
220
221/* Forward declarations */
222
223static uint32	sub_cc (uint32 psr, int32 operand1, int32 operand2,
224			int32 result);
225static uint32	add_cc (uint32 psr, int32 operand1, int32 operand2,
226			int32 result);
227static void	log_cc (int32 result, struct pstate *sregs);
228static int	fpexec (uint32 op3, uint32 rd, uint32 rs1, uint32 rs2,
229			struct pstate *sregs);
230static int	chk_asi (struct pstate *sregs, uint32 *asi, uint32 op3);
231
232
233extern struct estate ebase;
234extern int32    nfp,ift;
235
236#ifdef ERRINJ
237extern uint32 errtt, errftt;
238#endif
239
240static uint32
241sub_cc(psr, operand1, operand2, result)
242    uint32          psr;
243    int32           operand1;
244    int32           operand2;
245    int32           result;
246{
247    psr = ((psr & ~PSR_N) | ((result >> 8) & PSR_N));
248    if (result)
249	psr &= ~PSR_Z;
250    else
251	psr |= PSR_Z;
252    psr = (psr & ~PSR_V) | ((((operand1 & ~operand2 & ~result) |
253			   (~operand1 & operand2 & result)) >> 10) & PSR_V);
254    psr = (psr & ~PSR_C) | ((((~operand1 & operand2) |
255			 ((~operand1 | operand2) & result)) >> 11) & PSR_C);
256    return (psr);
257}
258
259uint32
260add_cc(psr, operand1, operand2, result)
261    uint32          psr;
262    int32           operand1;
263    int32           operand2;
264    int32           result;
265{
266    psr = ((psr & ~PSR_N) | ((result >> 8) & PSR_N));
267    if (result)
268	psr &= ~PSR_Z;
269    else
270	psr |= PSR_Z;
271    psr = (psr & ~PSR_V) | ((((operand1 & operand2 & ~result) |
272			  (~operand1 & ~operand2 & result)) >> 10) & PSR_V);
273    psr = (psr & ~PSR_C) | ((((operand1 & operand2) |
274			 ((operand1 | operand2) & ~result)) >> 11) & PSR_C);
275    return(psr);
276}
277
278static void
279log_cc(result, sregs)
280    int32           result;
281    struct pstate  *sregs;
282{
283    sregs->psr &= ~(PSR_CC);	/* Zero CC bits */
284    sregs->psr = (sregs->psr | ((result >> 8) & PSR_N));
285    if (result == 0)
286	sregs->psr |= PSR_Z;
287}
288
289/* Add two unsigned 32-bit integers, and calculate the carry out. */
290
291static uint32
292add32 (uint32 n1, uint32 n2, int *carry)
293{
294  uint32 result = n1 + n2;
295
296  *carry = result < n1 || result < n1;
297  return(result);
298}
299
300/* Multiply two 32-bit integers.  */
301
302static void
303mul64 (uint32 n1, uint32 n2, uint32 *result_hi, uint32 *result_lo, int msigned)
304{
305  uint32 lo, mid1, mid2, hi, reg_lo, reg_hi;
306  int carry;
307  int sign = 0;
308
309  /* If this is a signed multiply, calculate the sign of the result
310     and make the operands positive.  */
311  if (msigned)
312    {
313      sign = (n1 ^ n2) & SIGN_BIT;
314      if (n1 & SIGN_BIT)
315	n1 = -n1;
316      if (n2 & SIGN_BIT)
317	n2 = -n2;
318
319    }
320
321  /* We can split the 32x32 into four 16x16 operations. This ensures
322     that we do not lose precision on 32bit only hosts: */
323  lo =   ((n1 & 0xFFFF) * (n2 & 0xFFFF));
324  mid1 = ((n1 & 0xFFFF) * ((n2 >> 16) & 0xFFFF));
325  mid2 = (((n1 >> 16) & 0xFFFF) * (n2 & 0xFFFF));
326  hi =   (((n1 >> 16) & 0xFFFF) * ((n2 >> 16) & 0xFFFF));
327
328  /* We now need to add all of these results together, taking care
329     to propogate the carries from the additions: */
330  reg_lo = add32 (lo, (mid1 << 16), &carry);
331  reg_hi = carry;
332  reg_lo = add32 (reg_lo, (mid2 << 16), &carry);
333  reg_hi += (carry + ((mid1 >> 16) & 0xFFFF) + ((mid2 >> 16) & 0xFFFF) + hi);
334
335  /* Negate result if necessary. */
336  if (sign)
337    {
338      reg_hi = ~ reg_hi;
339      reg_lo = - reg_lo;
340      if (reg_lo == 0)
341	reg_hi++;
342    }
343
344  *result_lo = reg_lo;
345  *result_hi = reg_hi;
346}
347
348
349/* Divide a 64-bit integer by a 32-bit integer.  We cheat and assume
350   that the host compiler supports long long operations.  */
351
352static void
353div64 (uint32 n1_hi, uint32 n1_low, uint32 n2, uint32 *result, int msigned)
354{
355  uint64 n1;
356
357  n1 = ((uint64) n1_hi) << 32;
358  n1 |= ((uint64) n1_low) & 0xffffffff;
359
360  if (msigned)
361    {
362      int64 n1_s = (int64) n1;
363      int32 n2_s = (int32) n2;
364      n1_s = n1_s / n2_s;
365      n1 = (uint64) n1_s;
366    }
367  else
368    n1 = n1 / n2;
369
370  *result = (uint32) (n1 & 0xffffffff);
371}
372
373
374int
375dispatch_instruction(sregs)
376    struct pstate  *sregs;
377{
378
379    uint32          cwp, op, op2, op3, asi, rd, cond, rs1,
380                    rs2;
381    uint32          ldep, icc;
382    int32           operand1, operand2, *rdd, result, eicc,
383                    new_cwp;
384    int32           pc, npc, data, address, ws, mexc, fcc;
385    int32	    ddata[2];
386
387    sregs->ninst++;
388    cwp = ((sregs->psr & PSR_CWP) << 4);
389    op = sregs->inst >> 30;
390    pc = sregs->npc;
391    npc = sregs->npc + 4;
392    op3 = rd = rs1 = operand2 = eicc = 0;
393    rdd = 0;
394    if (op & 2) {
395
396	op3 = (sregs->inst >> 19) & 0x3f;
397	rs1 = (sregs->inst >> 14) & 0x1f;
398	rd = (sregs->inst >> 25) & 0x1f;
399
400#ifdef LOAD_DEL
401
402	/* Check if load dependecy is possible */
403	if (ebase.simtime <= sregs->ildtime)
404	    ldep = (((op3 & 0x38) != 0x28) && ((op3 & 0x3e) != 0x34) && (sregs->ildreg != 0));
405        else
406	    ldep = 0;
407	if (sregs->inst & INST_I) {
408	    if (ldep && (sregs->ildreg == rs1))
409		sregs->hold++;
410	    operand2 = sregs->inst;
411	    operand2 = ((operand2 << 19) >> 19);	/* sign extend */
412	} else {
413	    rs2 = sregs->inst & INST_RS2;
414	    if (rs2 > 7)
415		operand2 = sregs->r[(cwp + rs2) & 0x7f];
416	    else
417		operand2 = sregs->g[rs2];
418	    if (ldep && ((sregs->ildreg == rs1) || (sregs->ildreg == rs2)))
419		sregs->hold++;
420	}
421#else
422	if (sregs->inst & INST_I) {
423	    operand2 = sregs->inst;
424	    operand2 = ((operand2 << 19) >> 19);	/* sign extend */
425	} else {
426	    rs2 = sregs->inst & INST_RS2;
427	    if (rs2 > 7)
428		operand2 = sregs->r[(cwp + rs2) & 0x7f];
429	    else
430		operand2 = sregs->g[rs2];
431	}
432#endif
433
434	if (rd > 7)
435	    rdd = &(sregs->r[(cwp + rd) & 0x7f]);
436	else
437	    rdd = &(sregs->g[rd]);
438	if (rs1 > 7)
439	    rs1 = sregs->r[(cwp + rs1) & 0x7f];
440	else
441	    rs1 = sregs->g[rs1];
442    }
443    switch (op) {
444    case 0:
445	op2 = (sregs->inst >> 22) & 0x7;
446	switch (op2) {
447	case SETHI:
448	    rd = (sregs->inst >> 25) & 0x1f;
449	    if (rd > 7)
450		rdd = &(sregs->r[(cwp + rd) & 0x7f]);
451	    else
452		rdd = &(sregs->g[rd]);
453	    *rdd = sregs->inst << 10;
454	    break;
455	case BICC:
456#ifdef STAT
457	    sregs->nbranch++;
458#endif
459	    icc = sregs->psr >> 20;
460	    cond = ((sregs->inst >> 25) & 0x0f);
461	    switch (cond) {
462	    case BICC_BN:
463		eicc = 0;
464		break;
465	    case BICC_BE:
466		eicc = ICC_Z;
467		break;
468	    case BICC_BLE:
469		eicc = ICC_Z | (ICC_N ^ ICC_V);
470		break;
471	    case BICC_BL:
472		eicc = (ICC_N ^ ICC_V);
473		break;
474	    case BICC_BLEU:
475		eicc = ICC_C | ICC_Z;
476		break;
477	    case BICC_BCS:
478		eicc = ICC_C;
479		break;
480	    case BICC_NEG:
481		eicc = ICC_N;
482		break;
483	    case BICC_BVS:
484		eicc = ICC_V;
485		break;
486	    case BICC_BA:
487		eicc = 1;
488		if (sregs->inst & 0x20000000)
489		    sregs->annul = 1;
490		break;
491	    case BICC_BNE:
492		eicc = ~(ICC_Z);
493		break;
494	    case BICC_BG:
495		eicc = ~(ICC_Z | (ICC_N ^ ICC_V));
496		break;
497	    case BICC_BGE:
498		eicc = ~(ICC_N ^ ICC_V);
499		break;
500	    case BICC_BGU:
501		eicc = ~(ICC_C | ICC_Z);
502		break;
503	    case BICC_BCC:
504		eicc = ~(ICC_C);
505		break;
506	    case BICC_POS:
507		eicc = ~(ICC_N);
508		break;
509	    case BICC_BVC:
510		eicc = ~(ICC_V);
511		break;
512	    }
513	    if (eicc & 1) {
514		operand1 = sregs->inst;
515		operand1 = ((operand1 << 10) >> 8);	/* sign extend */
516		npc = sregs->pc + operand1;
517	    } else {
518		if (sregs->inst & 0x20000000)
519		    sregs->annul = 1;
520	    }
521	    break;
522	case FPBCC:
523#ifdef STAT
524	    sregs->nbranch++;
525#endif
526	    if (!((sregs->psr & PSR_EF) && FP_PRES)) {
527		sregs->trap = TRAP_FPDIS;
528		break;
529	    }
530	    if (ebase.simtime < sregs->ftime) {
531		sregs->ftime = ebase.simtime + sregs->hold;
532	    }
533	    cond = ((sregs->inst >> 25) & 0x0f);
534	    fcc = (sregs->fsr >> 10) & 0x3;
535	    switch (cond) {
536	    case FBN:
537		eicc = 0;
538		break;
539	    case FBNE:
540		eicc = (fcc != FCC_E);
541		break;
542	    case FBLG:
543		eicc = (fcc == FCC_L) || (fcc == FCC_G);
544		break;
545	    case FBUL:
546		eicc = (fcc == FCC_L) || (fcc == FCC_U);
547		break;
548	    case FBL:
549		eicc = (fcc == FCC_L);
550		break;
551	    case FBUG:
552		eicc = (fcc == FCC_G) || (fcc == FCC_U);
553		break;
554	    case FBG:
555		eicc = (fcc == FCC_G);
556		break;
557	    case FBU:
558		eicc = (fcc == FCC_U);
559		break;
560	    case FBA:
561		eicc = 1;
562		if (sregs->inst & 0x20000000)
563		    sregs->annul = 1;
564		break;
565	    case FBE:
566		eicc = !(fcc != FCC_E);
567		break;
568	    case FBUE:
569		eicc = !((fcc == FCC_L) || (fcc == FCC_G));
570		break;
571	    case FBGE:
572		eicc = !((fcc == FCC_L) || (fcc == FCC_U));
573		break;
574	    case FBUGE:
575		eicc = !(fcc == FCC_L);
576		break;
577	    case FBLE:
578		eicc = !((fcc == FCC_G) || (fcc == FCC_U));
579		break;
580	    case FBULE:
581		eicc = !(fcc == FCC_G);
582		break;
583	    case FBO:
584		eicc = !(fcc == FCC_U);
585		break;
586	    }
587	    if (eicc) {
588		operand1 = sregs->inst;
589		operand1 = ((operand1 << 10) >> 8);	/* sign extend */
590		npc = sregs->pc + operand1;
591	    } else {
592		if (sregs->inst & 0x20000000)
593		    sregs->annul = 1;
594	    }
595	    break;
596
597	default:
598	    sregs->trap = TRAP_UNIMP;
599	    break;
600	}
601	break;
602    case 1:			/* CALL */
603#ifdef STAT
604	sregs->nbranch++;
605#endif
606	sregs->r[(cwp + 15) & 0x7f] = sregs->pc;
607	npc = sregs->pc + (sregs->inst << 2);
608	break;
609
610    case 2:
611	if ((op3 >> 1) == 0x1a) {
612	    if (!((sregs->psr & PSR_EF) && FP_PRES)) {
613		sregs->trap = TRAP_FPDIS;
614	    } else {
615		rs1 = (sregs->inst >> 14) & 0x1f;
616		rs2 = sregs->inst & 0x1f;
617		sregs->trap = fpexec(op3, rd, rs1, rs2, sregs);
618	    }
619	} else {
620
621	    switch (op3) {
622	    case TICC:
623	        icc = sregs->psr >> 20;
624	        cond = ((sregs->inst >> 25) & 0x0f);
625	        switch (cond) {
626		case BICC_BN:
627		    eicc = 0;
628		    break;
629		case BICC_BE:
630		    eicc = ICC_Z;
631		    break;
632		case BICC_BLE:
633		    eicc = ICC_Z | (ICC_N ^ ICC_V);
634		    break;
635		case BICC_BL:
636		    eicc = (ICC_N ^ ICC_V);
637		    break;
638		case BICC_BLEU:
639		    eicc = ICC_C | ICC_Z;
640		    break;
641		case BICC_BCS:
642		    eicc = ICC_C;
643		    break;
644		case BICC_NEG:
645		    eicc = ICC_N;
646		    break;
647		case BICC_BVS:
648		    eicc = ICC_V;
649		    break;
650	        case BICC_BA:
651		    eicc = 1;
652		    break;
653	        case BICC_BNE:
654		    eicc = ~(ICC_Z);
655		    break;
656	        case BICC_BG:
657		    eicc = ~(ICC_Z | (ICC_N ^ ICC_V));
658		    break;
659	        case BICC_BGE:
660		    eicc = ~(ICC_N ^ ICC_V);
661		    break;
662	        case BICC_BGU:
663		    eicc = ~(ICC_C | ICC_Z);
664		    break;
665	        case BICC_BCC:
666		    eicc = ~(ICC_C);
667		    break;
668	        case BICC_POS:
669		    eicc = ~(ICC_N);
670		    break;
671	        case BICC_BVC:
672		    eicc = ~(ICC_V);
673		    break;
674		}
675		if (eicc & 1) {
676		    sregs->trap = (0x80 | ((rs1 + operand2) & 0x7f));
677		}
678		break;
679
680	    case MULScc:
681		operand1 =
682		    (((sregs->psr & PSR_V) ^ ((sregs->psr & PSR_N) >> 2))
683		     << 10) | (rs1 >> 1);
684		if ((sregs->y & 1) == 0)
685		    operand2 = 0;
686		*rdd = operand1 + operand2;
687		sregs->y = (rs1 << 31) | (sregs->y >> 1);
688		sregs->psr = add_cc(sregs->psr, operand1, operand2, *rdd);
689		break;
690	    case DIVScc:
691		{
692		  int sign;
693		  uint32 result, remainder;
694		  int c0, y31;
695
696		  if (!sparclite) {
697		     sregs->trap = TRAP_UNIMP;
698                     break;
699		  }
700
701		  sign = ((sregs->psr & PSR_V) != 0) ^ ((sregs->psr & PSR_N) != 0);
702
703		  remainder = (sregs->y << 1) | (rs1 >> 31);
704
705		  /* If true sign is positive, calculate remainder - divisor.
706		     Otherwise, calculate remainder + divisor.  */
707		  if (sign == 0)
708		    operand2 = ~operand2 + 1;
709		  result = remainder + operand2;
710
711		  /* The SPARClite User's Manual is not clear on how
712		     the "carry out" of the above ALU operation is to
713		     be calculated.  From trial and error tests
714		     on the the chip itself, it appears that it is
715		     a normal addition carry, and not a subtraction borrow,
716		     even in cases where the divisor is subtracted
717		     from the remainder.  FIXME: get the true story
718		     from Fujitsu. */
719		  c0 = result < (uint32) remainder
720		       || result < (uint32) operand2;
721
722		  if (result & 0x80000000)
723		    sregs->psr |= PSR_N;
724		  else
725		    sregs->psr &= ~PSR_N;
726
727		  y31 = (sregs->y & 0x80000000) == 0x80000000;
728
729		  if (result == 0 && sign == y31)
730		    sregs->psr |= PSR_Z;
731		  else
732		    sregs->psr &= ~PSR_Z;
733
734		  sign = (sign && !y31) || (!c0 && (sign || !y31));
735
736		  if (sign ^ (result >> 31))
737		    sregs->psr |= PSR_V;
738		  else
739		    sregs->psr &= ~PSR_V;
740
741		  if (!sign)
742		    sregs->psr |= PSR_C;
743		  else
744		    sregs->psr &= ~PSR_C;
745
746		  sregs->y = result;
747
748		  if (rd != 0)
749		    *rdd = (rs1 << 1) | !sign;
750		}
751		break;
752	    case SMUL:
753		{
754		  mul64 (rs1, operand2, &sregs->y, rdd, 1);
755		}
756		break;
757	    case SMULCC:
758		{
759		  uint32 result;
760
761		  mul64 (rs1, operand2, &sregs->y, &result, 1);
762
763		  if (result & 0x80000000)
764		    sregs->psr |= PSR_N;
765		  else
766		    sregs->psr &= ~PSR_N;
767
768		  if (result == 0)
769		    sregs->psr |= PSR_Z;
770		  else
771		    sregs->psr &= ~PSR_Z;
772
773		  *rdd = result;
774		}
775		break;
776	    case UMUL:
777		{
778		  mul64 (rs1, operand2, &sregs->y, rdd, 0);
779		}
780		break;
781	    case UMULCC:
782		{
783		  uint32 result;
784
785		  mul64 (rs1, operand2, &sregs->y, &result, 0);
786
787		  if (result & 0x80000000)
788		    sregs->psr |= PSR_N;
789		  else
790		    sregs->psr &= ~PSR_N;
791
792		  if (result == 0)
793		    sregs->psr |= PSR_Z;
794		  else
795		    sregs->psr &= ~PSR_Z;
796
797		  *rdd = result;
798		}
799		break;
800	    case SDIV:
801		{
802		  if (sparclite) {
803		     sregs->trap = TRAP_UNIMP;
804                     break;
805		  }
806
807		  if (operand2 == 0) {
808		    sregs->trap = TRAP_DIV0;
809		    break;
810		  }
811
812		  div64 (sregs->y, rs1, operand2, rdd, 1);
813		}
814		break;
815	    case SDIVCC:
816		{
817		  uint32 result;
818
819		  if (sparclite) {
820		     sregs->trap = TRAP_UNIMP;
821                     break;
822		  }
823
824		  if (operand2 == 0) {
825		    sregs->trap = TRAP_DIV0;
826		    break;
827		  }
828
829		  div64 (sregs->y, rs1, operand2, &result, 1);
830
831		  if (result & 0x80000000)
832		    sregs->psr |= PSR_N;
833		  else
834		    sregs->psr &= ~PSR_N;
835
836		  if (result == 0)
837		    sregs->psr |= PSR_Z;
838		  else
839		    sregs->psr &= ~PSR_Z;
840
841		  /* FIXME: should set overflow flag correctly.  */
842		  sregs->psr &= ~(PSR_C | PSR_V);
843
844		  *rdd = result;
845		}
846		break;
847	    case UDIV:
848		{
849		  if (sparclite) {
850		     sregs->trap = TRAP_UNIMP;
851                     break;
852		  }
853
854		  if (operand2 == 0) {
855		    sregs->trap = TRAP_DIV0;
856		    break;
857		  }
858
859		  div64 (sregs->y, rs1, operand2, rdd, 0);
860		}
861		break;
862	    case UDIVCC:
863		{
864		  uint32 result;
865
866		  if (sparclite) {
867		     sregs->trap = TRAP_UNIMP;
868                     break;
869		  }
870
871		  if (operand2 == 0) {
872		    sregs->trap = TRAP_DIV0;
873		    break;
874		  }
875
876		  div64 (sregs->y, rs1, operand2, &result, 0);
877
878		  if (result & 0x80000000)
879		    sregs->psr |= PSR_N;
880		  else
881		    sregs->psr &= ~PSR_N;
882
883		  if (result == 0)
884		    sregs->psr |= PSR_Z;
885		  else
886		    sregs->psr &= ~PSR_Z;
887
888		  /* FIXME: should set overflow flag correctly.  */
889		  sregs->psr &= ~(PSR_C | PSR_V);
890
891		  *rdd = result;
892		}
893		break;
894	    case IXNOR:
895		*rdd = rs1 ^ ~operand2;
896		break;
897	    case IXNORCC:
898		*rdd = rs1 ^ ~operand2;
899		log_cc(*rdd, sregs);
900		break;
901	    case IXOR:
902		*rdd = rs1 ^ operand2;
903		break;
904	    case IXORCC:
905		*rdd = rs1 ^ operand2;
906		log_cc(*rdd, sregs);
907		break;
908	    case IOR:
909		*rdd = rs1 | operand2;
910		break;
911	    case IORCC:
912		*rdd = rs1 | operand2;
913		log_cc(*rdd, sregs);
914		break;
915	    case IORN:
916		*rdd = rs1 | ~operand2;
917		break;
918	    case IORNCC:
919		*rdd = rs1 | ~operand2;
920		log_cc(*rdd, sregs);
921		break;
922	    case IANDNCC:
923		*rdd = rs1 & ~operand2;
924		log_cc(*rdd, sregs);
925		break;
926	    case IANDN:
927		*rdd = rs1 & ~operand2;
928		break;
929	    case IAND:
930		*rdd = rs1 & operand2;
931		break;
932	    case IANDCC:
933		*rdd = rs1 & operand2;
934		log_cc(*rdd, sregs);
935		break;
936	    case SUB:
937		*rdd = rs1 - operand2;
938		break;
939	    case SUBCC:
940		*rdd = rs1 - operand2;
941		sregs->psr = sub_cc(sregs->psr, rs1, operand2, *rdd);
942		break;
943	    case SUBX:
944		*rdd = rs1 - operand2 - ((sregs->psr >> 20) & 1);
945		break;
946	    case SUBXCC:
947		*rdd = rs1 - operand2 - ((sregs->psr >> 20) & 1);
948		sregs->psr = sub_cc(sregs->psr, rs1, operand2, *rdd);
949		break;
950	    case ADD:
951		*rdd = rs1 + operand2;
952		break;
953	    case ADDCC:
954		*rdd = rs1 + operand2;
955		sregs->psr = add_cc(sregs->psr, rs1, operand2, *rdd);
956		break;
957	    case ADDX:
958		*rdd = rs1 + operand2 + ((sregs->psr >> 20) & 1);
959		break;
960	    case ADDXCC:
961		*rdd = rs1 + operand2 + ((sregs->psr >> 20) & 1);
962		sregs->psr = add_cc(sregs->psr, rs1, operand2, *rdd);
963		break;
964	    case TADDCC:
965		*rdd = rs1 + operand2;
966		sregs->psr = add_cc(sregs->psr, rs1, operand2, *rdd);
967		if ((rs1 | operand2) & 0x3)
968		    sregs->psr |= PSR_V;
969		break;
970	    case TSUBCC:
971		*rdd = rs1 - operand2;
972		sregs->psr = sub_cc (sregs->psr, rs1, operand2, *rdd);
973		if ((rs1 | operand2) & 0x3)
974		    sregs->psr |= PSR_V;
975		break;
976	    case TADDCCTV:
977		*rdd = rs1 + operand2;
978		result = add_cc(0, rs1, operand2, *rdd);
979		if ((rs1 | operand2) & 0x3)
980		    result |= PSR_V;
981		if (result & PSR_V) {
982		    sregs->trap = TRAP_TAG;
983		} else {
984		    sregs->psr = (sregs->psr & ~PSR_CC) | result;
985		}
986		break;
987	    case TSUBCCTV:
988		*rdd = rs1 - operand2;
989		result = add_cc (0, rs1, operand2, *rdd);
990		if ((rs1 | operand2) & 0x3)
991		    result |= PSR_V;
992		if (result & PSR_V)
993		  {
994		      sregs->trap = TRAP_TAG;
995		  }
996		else
997		  {
998		      sregs->psr = (sregs->psr & ~PSR_CC) | result;
999		  }
1000		break;
1001	    case SLL:
1002		*rdd = rs1 << (operand2 & 0x1f);
1003		break;
1004	    case SRL:
1005		*rdd = rs1 >> (operand2 & 0x1f);
1006		break;
1007	    case SRA:
1008		*rdd = ((int) rs1) >> (operand2 & 0x1f);
1009		break;
1010	    case FLUSH:
1011		if (ift) sregs->trap = TRAP_UNIMP;
1012		break;
1013	    case SAVE:
1014		new_cwp = ((sregs->psr & PSR_CWP) - 1) & PSR_CWP;
1015		if (sregs->wim & (1 << new_cwp)) {
1016		    sregs->trap = TRAP_WOFL;
1017		    break;
1018		}
1019		if (rd > 7)
1020		    rdd = &(sregs->r[((new_cwp << 4) + rd) & 0x7f]);
1021		*rdd = rs1 + operand2;
1022		sregs->psr = (sregs->psr & ~PSR_CWP) | new_cwp;
1023		break;
1024	    case RESTORE:
1025
1026		new_cwp = ((sregs->psr & PSR_CWP) + 1) & PSR_CWP;
1027		if (sregs->wim & (1 << new_cwp)) {
1028		    sregs->trap = TRAP_WUFL;
1029		    break;
1030		}
1031		if (rd > 7)
1032		    rdd = &(sregs->r[((new_cwp << 4) + rd) & 0x7f]);
1033		*rdd = rs1 + operand2;
1034		sregs->psr = (sregs->psr & ~PSR_CWP) | new_cwp;
1035		break;
1036	    case RDPSR:
1037		if (!(sregs->psr & PSR_S)) {
1038		    sregs->trap = TRAP_PRIVI;
1039		    break;
1040		}
1041		*rdd = sregs->psr;
1042		break;
1043	    case RDY:
1044                if (!sparclite)
1045                    *rdd = sregs->y;
1046                else {
1047                    int rs1_is_asr = (sregs->inst >> 14) & 0x1f;
1048                    if ( 0 == rs1_is_asr )
1049                        *rdd = sregs->y;
1050                    else if ( 17 == rs1_is_asr )
1051                        *rdd = sregs->asr17;
1052                    else {
1053                        sregs->trap = TRAP_UNIMP;
1054                        break;
1055                    }
1056                }
1057		break;
1058	    case RDWIM:
1059		if (!(sregs->psr & PSR_S)) {
1060		    sregs->trap = TRAP_PRIVI;
1061		    break;
1062		}
1063		*rdd = sregs->wim;
1064		break;
1065	    case RDTBR:
1066		if (!(sregs->psr & PSR_S)) {
1067		    sregs->trap = TRAP_PRIVI;
1068		    break;
1069		}
1070		*rdd = sregs->tbr;
1071		break;
1072	    case WRPSR:
1073		if ((sregs->psr & 0x1f) > 7) {
1074		    sregs->trap = TRAP_UNIMP;
1075		    break;
1076		}
1077		if (!(sregs->psr & PSR_S)) {
1078		    sregs->trap = TRAP_PRIVI;
1079		    break;
1080		}
1081		sregs->psr = (rs1 ^ operand2) & 0x00f03fff;
1082		break;
1083	    case WRWIM:
1084		if (!(sregs->psr & PSR_S)) {
1085		    sregs->trap = TRAP_PRIVI;
1086		    break;
1087		}
1088		sregs->wim = (rs1 ^ operand2) & 0x0ff;
1089		break;
1090	    case WRTBR:
1091		if (!(sregs->psr & PSR_S)) {
1092		    sregs->trap = TRAP_PRIVI;
1093		    break;
1094		}
1095		sregs->tbr = (sregs->tbr & 0x00000ff0) |
1096		    ((rs1 ^ operand2) & 0xfffff000);
1097		break;
1098	    case WRY:
1099                if (!sparclite)
1100                    sregs->y = (rs1 ^ operand2);
1101                else {
1102                    if ( 0 == rd )
1103                        sregs->y = (rs1 ^ operand2);
1104                    else if ( 17 == rd )
1105                        sregs->asr17 = (rs1 ^ operand2);
1106                    else {
1107                        sregs->trap = TRAP_UNIMP;
1108                        break;
1109                    }
1110                }
1111		break;
1112	    case JMPL:
1113
1114#ifdef STAT
1115		sregs->nbranch++;
1116#endif
1117		sregs->icnt = T_JMPL;	/* JMPL takes two cycles */
1118		if (rs1 & 0x3) {
1119		    sregs->trap = TRAP_UNALI;
1120		    break;
1121		}
1122		*rdd = sregs->pc;
1123		npc = rs1 + operand2;
1124		break;
1125	    case RETT:
1126		address = rs1 + operand2;
1127		new_cwp = ((sregs->psr & PSR_CWP) + 1) & PSR_CWP;
1128		sregs->icnt = T_RETT;	/* RETT takes two cycles */
1129		if (sregs->psr & PSR_ET) {
1130		    sregs->trap = TRAP_UNIMP;
1131		    break;
1132		}
1133		if (!(sregs->psr & PSR_S)) {
1134		    sregs->trap = TRAP_PRIVI;
1135		    break;
1136		}
1137		if (sregs->wim & (1 << new_cwp)) {
1138		    sregs->trap = TRAP_WUFL;
1139		    break;
1140		}
1141		if (address & 0x3) {
1142		    sregs->trap = TRAP_UNALI;
1143		    break;
1144		}
1145		sregs->psr = (sregs->psr & ~PSR_CWP) | new_cwp | PSR_ET;
1146		sregs->psr =
1147		    (sregs->psr & ~PSR_S) | ((sregs->psr & PSR_PS) << 1);
1148		npc = address;
1149		break;
1150
1151	    case SCAN:
1152		{
1153		  uint32 result, mask;
1154		  int i;
1155
1156		  if (!sparclite) {
1157		     sregs->trap = TRAP_UNIMP;
1158                     break;
1159		  }
1160		  mask = (operand2 & 0x80000000) | (operand2 >> 1);
1161		  result = rs1 ^ mask;
1162
1163		  for (i = 0; i < 32; i++) {
1164		    if (result & 0x80000000)
1165		      break;
1166		    result <<= 1;
1167		  }
1168
1169		  *rdd = i == 32 ? 63 : i;
1170		}
1171		break;
1172
1173	    default:
1174		sregs->trap = TRAP_UNIMP;
1175		break;
1176	    }
1177	}
1178	break;
1179    case 3:			/* Load/store instructions */
1180
1181	address = rs1 + operand2;
1182
1183	if (sregs->psr & PSR_S)
1184	    asi = 11;
1185	 else
1186	    asi = 10;
1187
1188	if (op3 & 4) {
1189	    sregs->icnt = T_ST;	/* Set store instruction count */
1190#ifdef STAT
1191	    sregs->nstore++;
1192#endif
1193	} else {
1194	    sregs->icnt = T_LD;	/* Set load instruction count */
1195#ifdef STAT
1196	    sregs->nload++;
1197#endif
1198	}
1199
1200	/* Decode load/store instructions */
1201
1202	switch (op3) {
1203	case LDDA:
1204	    if (!chk_asi(sregs, &asi, op3)) break;
1205	case LDD:
1206	    if (address & 0x7) {
1207		sregs->trap = TRAP_UNALI;
1208		break;
1209	    }
1210	    if (rd & 1) {
1211		rd &= 0x1e;
1212		if (rd > 7)
1213		    rdd = &(sregs->r[(cwp + rd) & 0x7f]);
1214		else
1215		    rdd = &(sregs->g[rd]);
1216	    }
1217	    mexc = memory_read(asi, address, ddata, 3, &ws);
1218	    sregs->hold += ws * 2;
1219	    sregs->icnt = T_LDD;
1220	    if (mexc) {
1221		sregs->trap = TRAP_DEXC;
1222	    } else {
1223		rdd[0] = ddata[0];
1224		rdd[1] = ddata[1];
1225#ifdef STAT
1226		sregs->nload++;	/* Double load counts twice */
1227#endif
1228	    }
1229	    break;
1230
1231	case LDA:
1232	    if (!chk_asi(sregs, &asi, op3)) break;
1233	case LD:
1234	    if (address & 0x3) {
1235		sregs->trap = TRAP_UNALI;
1236		break;
1237	    }
1238	    mexc = memory_read(asi, address, &data, 2, &ws);
1239	    sregs->hold += ws;
1240	    if (mexc) {
1241		sregs->trap = TRAP_DEXC;
1242	    } else {
1243		*rdd = data;
1244	    }
1245	    break;
1246	case LDSTUBA:
1247	    if (!chk_asi(sregs, &asi, op3)) break;
1248	case LDSTUB:
1249	    mexc = memory_read(asi, address, &data, 0, &ws);
1250	    sregs->hold += ws;
1251	    sregs->icnt = T_LDST;
1252	    if (mexc) {
1253		sregs->trap = TRAP_DEXC;
1254		break;
1255	    }
1256	    *rdd = data;
1257	    data = 0x0ff;
1258	    mexc = memory_write(asi, address, &data, 0, &ws);
1259	    sregs->hold += ws;
1260	    if (mexc) {
1261		sregs->trap = TRAP_DEXC;
1262	    }
1263#ifdef STAT
1264	    sregs->nload++;
1265#endif
1266	    break;
1267	case LDSBA:
1268	case LDUBA:
1269	    if (!chk_asi(sregs, &asi, op3)) break;
1270	case LDSB:
1271	case LDUB:
1272	    mexc = memory_read(asi, address, &data, 0, &ws);
1273	    sregs->hold += ws;
1274	    if (mexc) {
1275		sregs->trap = TRAP_DEXC;
1276		break;
1277	    }
1278	    if ((op3 == LDSB) && (data & 0x80))
1279		data |= 0xffffff00;
1280	    *rdd = data;
1281	    break;
1282	case LDSHA:
1283	case LDUHA:
1284	    if (!chk_asi(sregs, &asi, op3)) break;
1285	case LDSH:
1286	case LDUH:
1287	    if (address & 0x1) {
1288		sregs->trap = TRAP_UNALI;
1289		break;
1290	    }
1291	    mexc = memory_read(asi, address, &data, 1, &ws);
1292	    sregs->hold += ws;
1293	    if (mexc) {
1294		sregs->trap = TRAP_DEXC;
1295		break;
1296	    }
1297	    if ((op3 == LDSH) && (data & 0x8000))
1298		data |= 0xffff0000;
1299	    *rdd = data;
1300	    break;
1301	case LDF:
1302	    if (!((sregs->psr & PSR_EF) && FP_PRES)) {
1303		sregs->trap = TRAP_FPDIS;
1304		break;
1305	    }
1306	    if (address & 0x3) {
1307		sregs->trap = TRAP_UNALI;
1308		break;
1309	    }
1310	    if (ebase.simtime < sregs->ftime) {
1311		if ((sregs->frd == rd) || (sregs->frs1 == rd) ||
1312		    (sregs->frs2 == rd))
1313		    sregs->fhold += (sregs->ftime - ebase.simtime);
1314	    }
1315	    mexc = memory_read(asi, address, &data, 2, &ws);
1316	    sregs->hold += ws;
1317	    sregs->flrd = rd;
1318	    sregs->ltime = ebase.simtime + sregs->icnt + FLSTHOLD +
1319		sregs->hold + sregs->fhold;
1320	    if (mexc) {
1321		sregs->trap = TRAP_DEXC;
1322	    } else {
1323		sregs->fs[rd] = *((float32 *) & data);
1324	    }
1325	    break;
1326	case LDDF:
1327	    if (!((sregs->psr & PSR_EF) && FP_PRES)) {
1328		sregs->trap = TRAP_FPDIS;
1329		break;
1330	    }
1331	    if (address & 0x7) {
1332		sregs->trap = TRAP_UNALI;
1333		break;
1334	    }
1335	    if (ebase.simtime < sregs->ftime) {
1336		if (((sregs->frd >> 1) == (rd >> 1)) ||
1337		    ((sregs->frs1 >> 1) == (rd >> 1)) ||
1338		    ((sregs->frs2 >> 1) == (rd >> 1)))
1339		    sregs->fhold += (sregs->ftime - ebase.simtime);
1340	    }
1341	    mexc = memory_read(asi, address, ddata, 3, &ws);
1342	    sregs->hold += ws * 2;
1343	    sregs->icnt = T_LDD;
1344	    if (mexc) {
1345		sregs->trap = TRAP_DEXC;
1346	    } else {
1347		rd &= 0x1E;
1348		sregs->flrd = rd;
1349		sregs->fs[rd] = *((float32 *) & ddata[0]);
1350#ifdef STAT
1351		sregs->nload++;	/* Double load counts twice */
1352#endif
1353		sregs->fs[rd + 1] = *((float32 *) & ddata[1]);
1354		sregs->ltime = ebase.simtime + sregs->icnt + FLSTHOLD +
1355			       sregs->hold + sregs->fhold;
1356	    }
1357	    break;
1358	case LDFSR:
1359	    if (ebase.simtime < sregs->ftime) {
1360		sregs->fhold += (sregs->ftime - ebase.simtime);
1361	    }
1362	    if (!((sregs->psr & PSR_EF) && FP_PRES)) {
1363		sregs->trap = TRAP_FPDIS;
1364		break;
1365	    }
1366	    if (address & 0x3) {
1367		sregs->trap = TRAP_UNALI;
1368		break;
1369	    }
1370	    mexc = memory_read(asi, address, &data, 2, &ws);
1371	    sregs->hold += ws;
1372	    if (mexc) {
1373		sregs->trap = TRAP_DEXC;
1374	    } else {
1375		sregs->fsr =
1376		    (sregs->fsr & 0x7FF000) | (data & ~0x7FF000);
1377		set_fsr(sregs->fsr);
1378	    }
1379	    break;
1380	case STFSR:
1381	    if (!((sregs->psr & PSR_EF) && FP_PRES)) {
1382		sregs->trap = TRAP_FPDIS;
1383		break;
1384	    }
1385	    if (address & 0x3) {
1386		sregs->trap = TRAP_UNALI;
1387		break;
1388	    }
1389	    if (ebase.simtime < sregs->ftime) {
1390		sregs->fhold += (sregs->ftime - ebase.simtime);
1391	    }
1392	    mexc = memory_write(asi, address, &sregs->fsr, 2, &ws);
1393	    sregs->hold += ws;
1394	    if (mexc) {
1395		sregs->trap = TRAP_DEXC;
1396	    }
1397	    break;
1398
1399	case STA:
1400	    if (!chk_asi(sregs, &asi, op3)) break;
1401	case ST:
1402	    if (address & 0x3) {
1403		sregs->trap = TRAP_UNALI;
1404		break;
1405	    }
1406	    mexc = memory_write(asi, address, rdd, 2, &ws);
1407	    sregs->hold += ws;
1408	    if (mexc) {
1409		sregs->trap = TRAP_DEXC;
1410	    }
1411	    break;
1412	case STBA:
1413	    if (!chk_asi(sregs, &asi, op3)) break;
1414	case STB:
1415	    mexc = memory_write(asi, address, rdd, 0, &ws);
1416	    sregs->hold += ws;
1417	    if (mexc) {
1418		sregs->trap = TRAP_DEXC;
1419	    }
1420	    break;
1421	case STDA:
1422	    if (!chk_asi(sregs, &asi, op3)) break;
1423	case STD:
1424	    if (address & 0x7) {
1425		sregs->trap = TRAP_UNALI;
1426		break;
1427	    }
1428	    if (rd & 1) {
1429		rd &= 0x1e;
1430		if (rd > 7)
1431		    rdd = &(sregs->r[(cwp + rd) & 0x7f]);
1432		else
1433		    rdd = &(sregs->g[rd]);
1434	    }
1435	    mexc = memory_write(asi, address, rdd, 3, &ws);
1436	    sregs->hold += ws;
1437	    sregs->icnt = T_STD;
1438#ifdef STAT
1439	    sregs->nstore++;	/* Double store counts twice */
1440#endif
1441	    if (mexc) {
1442		sregs->trap = TRAP_DEXC;
1443		break;
1444	    }
1445	    break;
1446	case STDFQ:
1447	    if ((sregs->psr & 0x1f) > 7) {
1448		sregs->trap = TRAP_UNIMP;
1449		break;
1450	    }
1451	    if (!((sregs->psr & PSR_EF) && FP_PRES)) {
1452		sregs->trap = TRAP_FPDIS;
1453		break;
1454	    }
1455	    if (address & 0x7) {
1456		sregs->trap = TRAP_UNALI;
1457		break;
1458	    }
1459	    if (!(sregs->fsr & FSR_QNE)) {
1460		sregs->fsr = (sregs->fsr & ~FSR_TT) | FP_SEQ_ERR;
1461		break;
1462	    }
1463	    rdd = &(sregs->fpq[0]);
1464	    mexc = memory_write(asi, address, rdd, 3, &ws);
1465	    sregs->hold += ws;
1466	    sregs->icnt = T_STD;
1467#ifdef STAT
1468	    sregs->nstore++;	/* Double store counts twice */
1469#endif
1470	    if (mexc) {
1471		sregs->trap = TRAP_DEXC;
1472		break;
1473	    } else {
1474		sregs->fsr &= ~FSR_QNE;
1475		sregs->fpstate = FP_EXE_MODE;
1476	    }
1477	    break;
1478	case STHA:
1479	    if (!chk_asi(sregs, &asi, op3)) break;
1480	case STH:
1481	    if (address & 0x1) {
1482		sregs->trap = TRAP_UNALI;
1483		break;
1484	    }
1485	    mexc = memory_write(asi, address, rdd, 1, &ws);
1486	    sregs->hold += ws;
1487	    if (mexc) {
1488		sregs->trap = TRAP_DEXC;
1489	    }
1490	    break;
1491	case STF:
1492	    if (!((sregs->psr & PSR_EF) && FP_PRES)) {
1493		sregs->trap = TRAP_FPDIS;
1494		break;
1495	    }
1496	    if (address & 0x3) {
1497		sregs->trap = TRAP_UNALI;
1498		break;
1499	    }
1500	    if (ebase.simtime < sregs->ftime) {
1501		if (sregs->frd == rd)
1502		    sregs->fhold += (sregs->ftime - ebase.simtime);
1503	    }
1504	    mexc = memory_write(asi, address, &sregs->fsi[rd], 2, &ws);
1505	    sregs->hold += ws;
1506	    if (mexc) {
1507		sregs->trap = TRAP_DEXC;
1508	    }
1509	    break;
1510	case STDF:
1511	    if (!((sregs->psr & PSR_EF) && FP_PRES)) {
1512		sregs->trap = TRAP_FPDIS;
1513		break;
1514	    }
1515	    if (address & 0x7) {
1516		sregs->trap = TRAP_UNALI;
1517		break;
1518	    }
1519	    rd &= 0x1E;
1520	    if (ebase.simtime < sregs->ftime) {
1521		if ((sregs->frd == rd) || (sregs->frd + 1 == rd))
1522		    sregs->fhold += (sregs->ftime - ebase.simtime);
1523	    }
1524	    mexc = memory_write(asi, address, &sregs->fsi[rd], 3, &ws);
1525	    sregs->hold += ws;
1526	    sregs->icnt = T_STD;
1527#ifdef STAT
1528	    sregs->nstore++;	/* Double store counts twice */
1529#endif
1530	    if (mexc) {
1531		sregs->trap = TRAP_DEXC;
1532	    }
1533	    break;
1534	case SWAPA:
1535	    if (!chk_asi(sregs, &asi, op3)) break;
1536	case SWAP:
1537	    if (address & 0x3) {
1538		sregs->trap = TRAP_UNALI;
1539		break;
1540	    }
1541	    mexc = memory_read(asi, address, &data, 2, &ws);
1542	    sregs->hold += ws;
1543	    if (mexc) {
1544		sregs->trap = TRAP_DEXC;
1545		break;
1546	    }
1547	    mexc = memory_write(asi, address, rdd, 2, &ws);
1548	    sregs->hold += ws;
1549	    sregs->icnt = T_LDST;
1550	    if (mexc) {
1551		sregs->trap = TRAP_DEXC;
1552		break;
1553	    } else
1554		*rdd = data;
1555#ifdef STAT
1556	    sregs->nload++;
1557#endif
1558	    break;
1559
1560
1561	default:
1562	    sregs->trap = TRAP_UNIMP;
1563	    break;
1564	}
1565
1566#ifdef LOAD_DEL
1567
1568	if (!(op3 & 4)) {
1569	    sregs->ildtime = ebase.simtime + sregs->hold + sregs->icnt;
1570	    sregs->ildreg = rd;
1571	    if ((op3 | 0x10) == 0x13)
1572		sregs->ildreg |= 1;	/* Double load, odd register loaded
1573					 * last */
1574	}
1575#endif
1576	break;
1577
1578    default:
1579	sregs->trap = TRAP_UNIMP;
1580	break;
1581    }
1582    sregs->g[0] = 0;
1583    if (!sregs->trap) {
1584	sregs->pc = pc;
1585	sregs->npc = npc;
1586    }
1587    return (0);
1588}
1589
1590#define T_FABSs		2
1591#define T_FADDs		4
1592#define T_FADDd		4
1593#define T_FCMPs		4
1594#define T_FCMPd		4
1595#define T_FDIVs		20
1596#define T_FDIVd		35
1597#define T_FMOVs		2
1598#define T_FMULs		5
1599#define T_FMULd		9
1600#define T_FNEGs		2
1601#define T_FSQRTs	37
1602#define T_FSQRTd	65
1603#define T_FSUBs		4
1604#define T_FSUBd		4
1605#define T_FdTOi		7
1606#define T_FdTOs		3
1607#define T_FiTOs		6
1608#define T_FiTOd		6
1609#define T_FsTOi		6
1610#define T_FsTOd		2
1611
1612#define FABSs	0x09
1613#define FADDs	0x41
1614#define FADDd	0x42
1615#define FCMPs	0x51
1616#define FCMPd	0x52
1617#define FCMPEs	0x55
1618#define FCMPEd	0x56
1619#define FDIVs	0x4D
1620#define FDIVd	0x4E
1621#define FMOVs	0x01
1622#define FMULs	0x49
1623#define FMULd	0x4A
1624#define FNEGs	0x05
1625#define FSQRTs	0x29
1626#define FSQRTd	0x2A
1627#define FSUBs	0x45
1628#define FSUBd	0x46
1629#define FdTOi	0xD2
1630#define FdTOs	0xC6
1631#define FiTOs	0xC4
1632#define FiTOd	0xC8
1633#define FsTOi	0xD1
1634#define FsTOd	0xC9
1635
1636
1637static int
1638fpexec(op3, rd, rs1, rs2, sregs)
1639    uint32          op3, rd, rs1, rs2;
1640    struct pstate  *sregs;
1641{
1642    uint32          opf, tem, accex;
1643    int32           fcc;
1644    uint32          ldadj;
1645
1646    if (sregs->fpstate == FP_EXC_MODE) {
1647	sregs->fsr = (sregs->fsr & ~FSR_TT) | FP_SEQ_ERR;
1648	sregs->fpstate = FP_EXC_PE;
1649	return (0);
1650    }
1651    if (sregs->fpstate == FP_EXC_PE) {
1652	sregs->fpstate = FP_EXC_MODE;
1653	return (TRAP_FPEXC);
1654    }
1655    opf = (sregs->inst >> 5) & 0x1ff;
1656
1657    /*
1658     * Check if we already have an FPop in the pipe. If so, halt until it is
1659     * finished by incrementing fhold with the remaining execution time
1660     */
1661
1662    if (ebase.simtime < sregs->ftime) {
1663	sregs->fhold = (sregs->ftime - ebase.simtime);
1664    } else {
1665	sregs->fhold = 0;
1666
1667	/* Check load dependencies. */
1668
1669	if (ebase.simtime < sregs->ltime) {
1670
1671	    /* Don't check rs1 if single operand instructions */
1672
1673	    if (((opf >> 6) == 0) || ((opf >> 6) == 3))
1674		rs1 = 32;
1675
1676	    /* Adjust for double floats */
1677
1678	    ldadj = opf & 1;
1679	    if (!(((sregs->flrd - rs1) >> ldadj) && ((sregs->flrd - rs2) >> ldadj)))
1680		sregs->fhold++;
1681	}
1682    }
1683
1684    sregs->finst++;
1685
1686    sregs->frs1 = rs1;		/* Store src and dst for dependecy check */
1687    sregs->frs2 = rs2;
1688    sregs->frd = rd;
1689
1690    sregs->ftime = ebase.simtime + sregs->hold + sregs->fhold;
1691
1692    /* SPARC is big-endian - swap double floats if host is little-endian */
1693    /* This is ugly - I know ... */
1694
1695    /* FIXME: should use (CURRENT_HOST_BYTE_ORDER == CURRENT_TARGET_BYTE_ORDER)
1696       but what about machines where float values are different endianness
1697       from integer values? */
1698
1699#ifdef HOST_LITTLE_ENDIAN_FLOAT
1700    rs1 &= 0x1f;
1701    switch (opf) {
1702	case FADDd:
1703	case FDIVd:
1704	case FMULd:
1705	case FSQRTd:
1706	case FSUBd:
1707        case FCMPd:
1708        case FCMPEd:
1709	case FdTOi:
1710	case FdTOs:
1711    	    sregs->fdp[rs1 | 1] = sregs->fs[rs1 & ~1];
1712    	    sregs->fdp[rs1 & ~1] = sregs->fs[rs1 | 1];
1713    	    sregs->fdp[rs2 | 1] = sregs->fs[rs2 & ~1];
1714    	    sregs->fdp[rs2 & ~1] = sregs->fs[rs2 | 1];
1715    default:
1716      break;
1717    }
1718#endif
1719
1720    clear_accex();
1721
1722    switch (opf) {
1723    case FABSs:
1724	sregs->fs[rd] = fabs(sregs->fs[rs2]);
1725	sregs->ftime += T_FABSs;
1726	sregs->frs1 = 32;	/* rs1 ignored */
1727	break;
1728    case FADDs:
1729	sregs->fs[rd] = sregs->fs[rs1] + sregs->fs[rs2];
1730	sregs->ftime += T_FADDs;
1731	break;
1732    case FADDd:
1733	sregs->fd[rd >> 1] = sregs->fd[rs1 >> 1] + sregs->fd[rs2 >> 1];
1734	sregs->ftime += T_FADDd;
1735	break;
1736    case FCMPs:
1737    case FCMPEs:
1738	if (sregs->fs[rs1] == sregs->fs[rs2])
1739	    fcc = 3;
1740	else if (sregs->fs[rs1] < sregs->fs[rs2])
1741	    fcc = 2;
1742	else if (sregs->fs[rs1] > sregs->fs[rs2])
1743	    fcc = 1;
1744	else
1745	    fcc = 0;
1746	sregs->fsr |= 0x0C00;
1747	sregs->fsr &= ~(fcc << 10);
1748	sregs->ftime += T_FCMPs;
1749	sregs->frd = 32;	/* rd ignored */
1750	if ((fcc == 0) && (opf == FCMPEs)) {
1751	    sregs->fpstate = FP_EXC_PE;
1752	    sregs->fsr = (sregs->fsr & ~0x1C000) | (1 << 14);
1753	}
1754	break;
1755    case FCMPd:
1756    case FCMPEd:
1757	if (sregs->fd[rs1 >> 1] == sregs->fd[rs2 >> 1])
1758	    fcc = 3;
1759	else if (sregs->fd[rs1 >> 1] < sregs->fd[rs2 >> 1])
1760	    fcc = 2;
1761	else if (sregs->fd[rs1 >> 1] > sregs->fd[rs2 >> 1])
1762	    fcc = 1;
1763	else
1764	    fcc = 0;
1765	sregs->fsr |= 0x0C00;
1766	sregs->fsr &= ~(fcc << 10);
1767	sregs->ftime += T_FCMPd;
1768	sregs->frd = 32;	/* rd ignored */
1769	if ((fcc == 0) && (opf == FCMPEd)) {
1770	    sregs->fpstate = FP_EXC_PE;
1771	    sregs->fsr = (sregs->fsr & ~FSR_TT) | FP_IEEE;
1772	}
1773	break;
1774    case FDIVs:
1775	sregs->fs[rd] = sregs->fs[rs1] / sregs->fs[rs2];
1776	sregs->ftime += T_FDIVs;
1777	break;
1778    case FDIVd:
1779	sregs->fd[rd >> 1] = sregs->fd[rs1 >> 1] / sregs->fd[rs2 >> 1];
1780	sregs->ftime += T_FDIVd;
1781	break;
1782    case FMOVs:
1783	sregs->fs[rd] = sregs->fs[rs2];
1784	sregs->ftime += T_FMOVs;
1785	sregs->frs1 = 32;	/* rs1 ignored */
1786	break;
1787    case FMULs:
1788	sregs->fs[rd] = sregs->fs[rs1] * sregs->fs[rs2];
1789	sregs->ftime += T_FMULs;
1790	break;
1791    case FMULd:
1792	sregs->fd[rd >> 1] = sregs->fd[rs1 >> 1] * sregs->fd[rs2 >> 1];
1793	sregs->ftime += T_FMULd;
1794	break;
1795    case FNEGs:
1796	sregs->fs[rd] = -sregs->fs[rs2];
1797	sregs->ftime += T_FNEGs;
1798	sregs->frs1 = 32;	/* rs1 ignored */
1799	break;
1800    case FSQRTs:
1801	if (sregs->fs[rs2] < 0.0) {
1802	    sregs->fpstate = FP_EXC_PE;
1803	    sregs->fsr = (sregs->fsr & ~FSR_TT) | FP_IEEE;
1804	    sregs->fsr = (sregs->fsr & 0x1f) | 0x10;
1805	    break;
1806	}
1807	sregs->fs[rd] = sqrt(sregs->fs[rs2]);
1808	sregs->ftime += T_FSQRTs;
1809	sregs->frs1 = 32;	/* rs1 ignored */
1810	break;
1811    case FSQRTd:
1812	if (sregs->fd[rs2 >> 1] < 0.0) {
1813	    sregs->fpstate = FP_EXC_PE;
1814	    sregs->fsr = (sregs->fsr & ~FSR_TT) | FP_IEEE;
1815	    sregs->fsr = (sregs->fsr & 0x1f) | 0x10;
1816	    break;
1817	}
1818	sregs->fd[rd >> 1] = sqrt(sregs->fd[rs2 >> 1]);
1819	sregs->ftime += T_FSQRTd;
1820	sregs->frs1 = 32;	/* rs1 ignored */
1821	break;
1822    case FSUBs:
1823	sregs->fs[rd] = sregs->fs[rs1] - sregs->fs[rs2];
1824	sregs->ftime += T_FSUBs;
1825	break;
1826    case FSUBd:
1827	sregs->fd[rd >> 1] = sregs->fd[rs1 >> 1] - sregs->fd[rs2 >> 1];
1828	sregs->ftime += T_FSUBd;
1829	break;
1830    case FdTOi:
1831	sregs->fsi[rd] = (int) sregs->fd[rs2 >> 1];
1832	sregs->ftime += T_FdTOi;
1833	sregs->frs1 = 32;	/* rs1 ignored */
1834	break;
1835    case FdTOs:
1836	sregs->fs[rd] = (float32) sregs->fd[rs2 >> 1];
1837	sregs->ftime += T_FdTOs;
1838	sregs->frs1 = 32;	/* rs1 ignored */
1839	break;
1840    case FiTOs:
1841	sregs->fs[rd] = (float32) sregs->fsi[rs2];
1842	sregs->ftime += T_FiTOs;
1843	sregs->frs1 = 32;	/* rs1 ignored */
1844	break;
1845    case FiTOd:
1846	sregs->fd[rd >> 1] = (float64) sregs->fsi[rs2];
1847	sregs->ftime += T_FiTOd;
1848	sregs->frs1 = 32;	/* rs1 ignored */
1849	break;
1850    case FsTOi:
1851	sregs->fsi[rd] = (int) sregs->fs[rs2];
1852	sregs->ftime += T_FsTOi;
1853	sregs->frs1 = 32;	/* rs1 ignored */
1854	break;
1855    case FsTOd:
1856	sregs->fd[rd >> 1] = sregs->fs[rs2];
1857	sregs->ftime += T_FsTOd;
1858	sregs->frs1 = 32;	/* rs1 ignored */
1859	break;
1860
1861    default:
1862	sregs->fsr = (sregs->fsr & ~FSR_TT) | FP_UNIMP;
1863	sregs->fpstate = FP_EXC_PE;
1864    }
1865
1866#ifdef ERRINJ
1867    if (errftt) {
1868	sregs->fsr = (sregs->fsr & ~FSR_TT) | (errftt << 14);
1869	sregs->fpstate = FP_EXC_PE;
1870	if (sis_verbose) printf("Inserted fpu error %X\n",errftt);
1871	errftt = 0;
1872    }
1873#endif
1874
1875    accex = get_accex();
1876
1877#ifdef HOST_LITTLE_ENDIAN_FLOAT
1878    switch (opf) {
1879    case FADDd:
1880    case FDIVd:
1881    case FMULd:
1882    case FSQRTd:
1883    case FSUBd:
1884    case FiTOd:
1885    case FsTOd:
1886	sregs->fs[rd & ~1] = sregs->fdp[rd | 1];
1887	sregs->fs[rd | 1] = sregs->fdp[rd & ~1];
1888    default:
1889      break;
1890    }
1891#endif
1892    if (sregs->fpstate == FP_EXC_PE) {
1893	sregs->fpq[0] = sregs->pc;
1894	sregs->fpq[1] = sregs->inst;
1895	sregs->fsr |= FSR_QNE;
1896    } else {
1897	tem = (sregs->fsr >> 23) & 0x1f;
1898	if (tem & accex) {
1899	    sregs->fpstate = FP_EXC_PE;
1900	    sregs->fsr = (sregs->fsr & ~FSR_TT) | FP_IEEE;
1901	    sregs->fsr = ((sregs->fsr & ~0x1f) | accex);
1902	} else {
1903	    sregs->fsr = ((((sregs->fsr >> 5) | accex) << 5) | accex);
1904	}
1905	if (sregs->fpstate == FP_EXC_PE) {
1906	    sregs->fpq[0] = sregs->pc;
1907	    sregs->fpq[1] = sregs->inst;
1908	    sregs->fsr |= FSR_QNE;
1909	}
1910    }
1911    clear_accex();
1912
1913    return (0);
1914
1915
1916}
1917
1918static int
1919chk_asi(sregs, asi, op3)
1920    struct pstate  *sregs;
1921    uint32 *asi, op3;
1922
1923{
1924    if (!(sregs->psr & PSR_S)) {
1925	sregs->trap = TRAP_PRIVI;
1926	return (0);
1927    } else if (sregs->inst & INST_I) {
1928	sregs->trap = TRAP_UNIMP;
1929	return (0);
1930    } else
1931	*asi = (sregs->inst >> 5) & 0x0ff;
1932    return(1);
1933}
1934
1935int
1936execute_trap(sregs)
1937    struct pstate  *sregs;
1938{
1939    int32           cwp;
1940
1941    if (sregs->trap == 256) {
1942	sregs->pc = 0;
1943	sregs->npc = 4;
1944	sregs->trap = 0;
1945    } else if (sregs->trap == 257) {
1946	    return (ERROR);
1947    } else {
1948
1949	if ((sregs->psr & PSR_ET) == 0)
1950	    return (ERROR);
1951
1952	sregs->tbr = (sregs->tbr & 0xfffff000) | (sregs->trap << 4);
1953	sregs->trap = 0;
1954	sregs->psr &= ~PSR_ET;
1955	sregs->psr |= ((sregs->psr & PSR_S) >> 1);
1956	sregs->annul = 0;
1957	sregs->psr = (((sregs->psr & PSR_CWP) - 1) & 0x7) | (sregs->psr & ~PSR_CWP);
1958	cwp = ((sregs->psr & PSR_CWP) << 4);
1959	sregs->r[(cwp + 17) & 0x7f] = sregs->pc;
1960	sregs->r[(cwp + 18) & 0x7f] = sregs->npc;
1961	sregs->psr |= PSR_S;
1962	sregs->pc = sregs->tbr;
1963	sregs->npc = sregs->tbr + 4;
1964
1965        if ( 0 != (1 & sregs->asr17) ) {
1966            /* single vector trapping! */
1967            sregs->pc = sregs->tbr & 0xfffff000;
1968            sregs->npc = sregs->pc + 4;
1969        }
1970
1971	/* Increase simulator time */
1972	sregs->icnt = TRAP_C;
1973
1974    }
1975
1976
1977    return (0);
1978
1979}
1980
1981extern struct irqcell irqarr[16];
1982
1983int
1984check_interrupts(sregs)
1985    struct pstate  *sregs;
1986{
1987#ifdef ERRINJ
1988    if (errtt) {
1989	sregs->trap = errtt;
1990	if (sis_verbose) printf("Inserted error trap 0x%02X\n",errtt);
1991	errtt = 0;
1992    }
1993#endif
1994
1995    if ((ext_irl) && (sregs->psr & PSR_ET) &&
1996	((ext_irl == 15) || (ext_irl > (int) ((sregs->psr & PSR_PIL) >> 8)))) {
1997	if (sregs->trap == 0) {
1998	    sregs->trap = 16 + ext_irl;
1999	    irqarr[ext_irl & 0x0f].callback(irqarr[ext_irl & 0x0f].arg);
2000	    return(1);
2001	}
2002    }
2003    return(0);
2004}
2005
2006void
2007init_regs(sregs)
2008    struct pstate  *sregs;
2009{
2010    sregs->pc = 0;
2011    sregs->npc = 4;
2012    sregs->trap = 0;
2013    sregs->psr &= 0x00f03fdf;
2014    sregs->psr |= 0x080;	/* Set supervisor bit */
2015    sregs->breakpoint = 0;
2016    sregs->annul = 0;
2017    sregs->fpstate = FP_EXE_MODE;
2018    sregs->fpqn = 0;
2019    sregs->ftime = 0;
2020    sregs->ltime = 0;
2021    sregs->err_mode = 0;
2022    ext_irl = 0;
2023    sregs->g[0] = 0;
2024#ifdef HOST_LITTLE_ENDIAN_FLOAT
2025    sregs->fdp = (float32 *) sregs->fd;
2026    sregs->fsi = (int32 *) sregs->fs;
2027#else
2028    sregs->fs = (float32 *) sregs->fd;
2029    sregs->fsi = (int32 *) sregs->fd;
2030#endif
2031    sregs->fsr = 0;
2032    sregs->fpu_pres = !nfp;
2033    set_fsr(sregs->fsr);
2034    sregs->bphit = 0;
2035    sregs->ildreg = 0;
2036    sregs->ildtime = 0;
2037
2038    sregs->y = 0;
2039    sregs->asr17 = 0;
2040
2041    sregs->rett_err = 0;
2042    sregs->jmpltime = 0;
2043}
2044