exec.c revision 1.9
1/* This file is part of SIS (SPARC instruction simulator)
2
3   Copyright (C) 1995-2020 Free Software Foundation, Inc.
4   Contributed by Jiri Gaisler, European Space Agency
5
6   This program is free software; you can redistribute it and/or modify
7   it under the terms of the GNU General Public License as published by
8   the Free Software Foundation; either version 3 of the License, or
9   (at your option) any later version.
10
11   This program is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14   GNU General Public License for more details.
15
16   You should have received a copy of the GNU General Public License
17   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
18
19#include "config.h"
20#include "sis.h"
21#include <math.h>
22#include <stdio.h>
23
24extern int32    sis_verbose, sparclite;
25int ext_irl = 0;
26
27/* Load/store interlock delay */
28#define FLSTHOLD 1
29
30/* Load delay (delete if unwanted - speeds up simulation) */
31#define LOAD_DEL 1
32
33#define T_LD	2
34#define T_LDD	3
35#define T_ST	3
36#define T_STD	4
37#define T_LDST	4
38#define T_JMPL	2
39#define T_RETT	2
40
41#define FSR_QNE 	0x2000
42#define FP_EXE_MODE 0
43#define	FP_EXC_PE   1
44#define FP_EXC_MODE 2
45
46#define	FBA	8
47#define	FBN	0
48#define	FBNE	1
49#define	FBLG	2
50#define	FBUL	3
51#define	FBL 	4
52#define	FBUG	5
53#define	FBG 	6
54#define	FBU 	7
55#define FBA	8
56#define FBE	9
57#define FBUE	10
58#define FBGE	11
59#define FBUGE	12
60#define FBLE	13
61#define FBULE	14
62#define FBO	15
63
64#define	FCC_E 	0
65#define	FCC_L 	1
66#define	FCC_G 	2
67#define	FCC_U 	3
68
69#define PSR_ET 0x20
70#define PSR_EF 0x1000
71#define PSR_PS 0x40
72#define PSR_S  0x80
73#define PSR_N  0x0800000
74#define PSR_Z  0x0400000
75#define PSR_V  0x0200000
76#define PSR_C  0x0100000
77#define PSR_CC 0x0F00000
78#define PSR_CWP 0x7
79#define PSR_PIL 0x0f00
80
81#define ICC_N	(icc >> 3)
82#define ICC_Z	(icc >> 2)
83#define ICC_V	(icc >> 1)
84#define ICC_C	(icc)
85
86#define FP_PRES	(sregs->fpu_pres)
87
88#define TRAP_IEXC 1
89#define TRAP_UNIMP 2
90#define TRAP_PRIVI 3
91#define TRAP_FPDIS 4
92#define TRAP_WOFL 5
93#define TRAP_WUFL 6
94#define TRAP_UNALI 7
95#define TRAP_FPEXC 8
96#define TRAP_DEXC 9
97#define TRAP_TAG 10
98#define TRAP_DIV0 0x2a
99
100#define FSR_TT		0x1C000
101#define FP_IEEE		0x04000
102#define FP_UNIMP	0x0C000
103#define FP_SEQ_ERR	0x10000
104
105#define	BICC_BN		0
106#define	BICC_BE		1
107#define	BICC_BLE	2
108#define	BICC_BL		3
109#define	BICC_BLEU	4
110#define	BICC_BCS	5
111#define	BICC_NEG	6
112#define	BICC_BVS	7
113#define	BICC_BA		8
114#define	BICC_BNE	9
115#define	BICC_BG		10
116#define	BICC_BGE	11
117#define	BICC_BGU	12
118#define	BICC_BCC	13
119#define	BICC_POS	14
120#define	BICC_BVC	15
121
122#define INST_SIMM13 0x1fff
123#define INST_RS2    0x1f
124#define INST_I	    0x2000
125#define ADD 	0x00
126#define ADDCC 	0x10
127#define ADDX 	0x08
128#define ADDXCC 	0x18
129#define TADDCC 	0x20
130#define TSUBCC  0x21
131#define TADDCCTV 0x22
132#define TSUBCCTV 0x23
133#define IAND 	0x01
134#define IANDCC 	0x11
135#define IANDN 	0x05
136#define IANDNCC	0x15
137#define MULScc 	0x24
138#define DIVScc 	0x1D
139#define SMUL	0x0B
140#define SMULCC	0x1B
141#define UMUL	0x0A
142#define UMULCC	0x1A
143#define SDIV	0x0F
144#define SDIVCC	0x1F
145#define UDIV	0x0E
146#define UDIVCC	0x1E
147#define IOR 	0x02
148#define IORCC 	0x12
149#define IORN 	0x06
150#define IORNCC 	0x16
151#define SLL 	0x25
152#define SRA 	0x27
153#define SRL 	0x26
154#define SUB 	0x04
155#define SUBCC 	0x14
156#define SUBX 	0x0C
157#define SUBXCC 	0x1C
158#define IXNOR 	0x07
159#define IXNORCC	0x17
160#define IXOR 	0x03
161#define IXORCC 	0x13
162#define SETHI 	0x04
163#define BICC 	0x02
164#define FPBCC 	0x06
165#define RDY 	0x28
166#define RDPSR 	0x29
167#define RDWIM 	0x2A
168#define RDTBR 	0x2B
169#define SCAN 	0x2C
170#define WRY	0x30
171#define WRPSR	0x31
172#define WRWIM	0x32
173#define WRTBR	0x33
174#define JMPL 	0x38
175#define RETT 	0x39
176#define TICC 	0x3A
177#define SAVE 	0x3C
178#define RESTORE 0x3D
179#define LDD	0x03
180#define LDDA	0x13
181#define LD	0x00
182#define LDA	0x10
183#define LDF	0x20
184#define LDDF	0x23
185#define LDSTUB	0x0D
186#define LDSTUBA	0x1D
187#define LDUB	0x01
188#define LDUBA	0x11
189#define LDSB	0x09
190#define LDSBA	0x19
191#define LDUH	0x02
192#define LDUHA	0x12
193#define LDSH	0x0A
194#define LDSHA	0x1A
195#define LDFSR	0x21
196#define ST	0x04
197#define STA	0x14
198#define STB	0x05
199#define STBA	0x15
200#define STD	0x07
201#define STDA	0x17
202#define STF	0x24
203#define STDFQ	0x26
204#define STDF	0x27
205#define STFSR	0x25
206#define STH	0x06
207#define STHA	0x16
208#define SWAP	0x0F
209#define SWAPA	0x1F
210#define FLUSH	0x3B
211
212#define SIGN_BIT 0x80000000
213
214/* # of cycles overhead when a trap is taken */
215#define TRAP_C  3
216
217/* Forward declarations */
218
219static uint32	sub_cc (uint32 psr, int32 operand1, int32 operand2,
220			int32 result);
221static uint32	add_cc (uint32 psr, int32 operand1, int32 operand2,
222			int32 result);
223static void	log_cc (int32 result, struct pstate *sregs);
224static int	fpexec (uint32 op3, uint32 rd, uint32 rs1, uint32 rs2,
225			struct pstate *sregs);
226static int	chk_asi (struct pstate *sregs, uint32 *asi, uint32 op3);
227
228
229extern struct estate ebase;
230extern int32    nfp,ift;
231
232#ifdef ERRINJ
233extern uint32 errtt, errftt;
234#endif
235
236static uint32
237sub_cc(psr, operand1, operand2, result)
238    uint32          psr;
239    int32           operand1;
240    int32           operand2;
241    int32           result;
242{
243    psr = ((psr & ~PSR_N) | ((result >> 8) & PSR_N));
244    if (result)
245	psr &= ~PSR_Z;
246    else
247	psr |= PSR_Z;
248    psr = (psr & ~PSR_V) | ((((operand1 & ~operand2 & ~result) |
249			   (~operand1 & operand2 & result)) >> 10) & PSR_V);
250    psr = (psr & ~PSR_C) | ((((~operand1 & operand2) |
251			 ((~operand1 | operand2) & result)) >> 11) & PSR_C);
252    return psr;
253}
254
255uint32
256add_cc(psr, operand1, operand2, result)
257    uint32          psr;
258    int32           operand1;
259    int32           operand2;
260    int32           result;
261{
262    psr = ((psr & ~PSR_N) | ((result >> 8) & PSR_N));
263    if (result)
264	psr &= ~PSR_Z;
265    else
266	psr |= PSR_Z;
267    psr = (psr & ~PSR_V) | ((((operand1 & operand2 & ~result) |
268			  (~operand1 & ~operand2 & result)) >> 10) & PSR_V);
269    psr = (psr & ~PSR_C) | ((((operand1 & operand2) |
270			 ((operand1 | operand2) & ~result)) >> 11) & PSR_C);
271    return psr;
272}
273
274static void
275log_cc(result, sregs)
276    int32           result;
277    struct pstate  *sregs;
278{
279    sregs->psr &= ~(PSR_CC);	/* Zero CC bits */
280    sregs->psr = (sregs->psr | ((result >> 8) & PSR_N));
281    if (result == 0)
282	sregs->psr |= PSR_Z;
283}
284
285/* Add two unsigned 32-bit integers, and calculate the carry out. */
286
287static uint32
288add32 (uint32 n1, uint32 n2, int *carry)
289{
290  uint32 result = n1 + n2;
291
292  *carry = result < n1 || result < n2;
293  return result;
294}
295
296/* Multiply two 32-bit integers.  */
297
298static void
299mul64 (uint32 n1, uint32 n2, uint32 *result_hi, uint32 *result_lo, int msigned)
300{
301  uint32 lo, mid1, mid2, hi, reg_lo, reg_hi;
302  int carry;
303  int sign = 0;
304
305  /* If this is a signed multiply, calculate the sign of the result
306     and make the operands positive.  */
307  if (msigned)
308    {
309      sign = (n1 ^ n2) & SIGN_BIT;
310      if (n1 & SIGN_BIT)
311	n1 = -n1;
312      if (n2 & SIGN_BIT)
313	n2 = -n2;
314
315    }
316
317  /* We can split the 32x32 into four 16x16 operations. This ensures
318     that we do not lose precision on 32bit only hosts: */
319  lo =   ((n1 & 0xFFFF) * (n2 & 0xFFFF));
320  mid1 = ((n1 & 0xFFFF) * ((n2 >> 16) & 0xFFFF));
321  mid2 = (((n1 >> 16) & 0xFFFF) * (n2 & 0xFFFF));
322  hi =   (((n1 >> 16) & 0xFFFF) * ((n2 >> 16) & 0xFFFF));
323
324  /* We now need to add all of these results together, taking care
325     to propogate the carries from the additions: */
326  reg_lo = add32 (lo, (mid1 << 16), &carry);
327  reg_hi = carry;
328  reg_lo = add32 (reg_lo, (mid2 << 16), &carry);
329  reg_hi += (carry + ((mid1 >> 16) & 0xFFFF) + ((mid2 >> 16) & 0xFFFF) + hi);
330
331  /* Negate result if necessary. */
332  if (sign)
333    {
334      reg_hi = ~ reg_hi;
335      reg_lo = - reg_lo;
336      if (reg_lo == 0)
337	reg_hi++;
338    }
339
340  *result_lo = reg_lo;
341  *result_hi = reg_hi;
342}
343
344
345/* Divide a 64-bit integer by a 32-bit integer.  We cheat and assume
346   that the host compiler supports long long operations.  */
347
348static void
349div64 (uint32 n1_hi, uint32 n1_low, uint32 n2, uint32 *result, int msigned)
350{
351  uint64 n1;
352
353  n1 = ((uint64) n1_hi) << 32;
354  n1 |= ((uint64) n1_low) & 0xffffffff;
355
356  if (msigned)
357    {
358      int64 n1_s = (int64) n1;
359      int32 n2_s = (int32) n2;
360      n1_s = n1_s / n2_s;
361      n1 = (uint64) n1_s;
362    }
363  else
364    n1 = n1 / n2;
365
366  *result = (uint32) (n1 & 0xffffffff);
367}
368
369
370static int
371extract_short (uint32 data, uint32 address)
372{
373    return ((data >> ((2 - (address & 2)) * 8)) & 0xffff);
374}
375
376static int
377extract_short_signed (uint32 data, uint32 address)
378{
379    uint32 tmp = ((data >> ((2 - (address & 2)) * 8)) & 0xffff);
380    if (tmp & 0x8000)
381        tmp |= 0xffff0000;
382    return tmp;
383}
384
385static int
386extract_byte (uint32 data, uint32 address)
387{
388    return ((data >> ((3 - (address & 3)) * 8)) & 0xff);
389}
390
391static int
392extract_byte_signed (uint32 data, uint32 address)
393{
394    uint32 tmp = ((data >> ((3 - (address & 3)) * 8)) & 0xff);
395    if (tmp & 0x80)
396        tmp |= 0xffffff00;
397    return tmp;
398}
399
400int
401dispatch_instruction(sregs)
402    struct pstate  *sregs;
403{
404
405    uint32          cwp, op, op2, op3, asi, rd, cond, rs1,
406                    rs2;
407    uint32          ldep, icc;
408    int32           operand1, operand2, *rdd, result, eicc,
409                    new_cwp;
410    int32           pc, npc, data, address, ws, mexc, fcc;
411    int32	    ddata[2];
412
413    sregs->ninst++;
414    cwp = ((sregs->psr & PSR_CWP) << 4);
415    op = sregs->inst >> 30;
416    pc = sregs->npc;
417    npc = sregs->npc + 4;
418    op3 = rd = rs1 = operand2 = eicc = 0;
419    rdd = 0;
420    if (op & 2) {
421
422	op3 = (sregs->inst >> 19) & 0x3f;
423	rs1 = (sregs->inst >> 14) & 0x1f;
424	rd = (sregs->inst >> 25) & 0x1f;
425
426#ifdef LOAD_DEL
427
428	/* Check if load dependecy is possible */
429	if (ebase.simtime <= sregs->ildtime)
430	    ldep = (((op3 & 0x38) != 0x28) && ((op3 & 0x3e) != 0x34) && (sregs->ildreg != 0));
431        else
432	    ldep = 0;
433	if (sregs->inst & INST_I) {
434	    if (ldep && (sregs->ildreg == rs1))
435		sregs->hold++;
436	    operand2 = sregs->inst;
437	    operand2 = ((operand2 << 19) >> 19);	/* sign extend */
438	} else {
439	    rs2 = sregs->inst & INST_RS2;
440	    if (rs2 > 7)
441		operand2 = sregs->r[(cwp + rs2) & 0x7f];
442	    else
443		operand2 = sregs->g[rs2];
444	    if (ldep && ((sregs->ildreg == rs1) || (sregs->ildreg == rs2)))
445		sregs->hold++;
446	}
447#else
448	if (sregs->inst & INST_I) {
449	    operand2 = sregs->inst;
450	    operand2 = ((operand2 << 19) >> 19);	/* sign extend */
451	} else {
452	    rs2 = sregs->inst & INST_RS2;
453	    if (rs2 > 7)
454		operand2 = sregs->r[(cwp + rs2) & 0x7f];
455	    else
456		operand2 = sregs->g[rs2];
457	}
458#endif
459
460	if (rd > 7)
461	    rdd = &(sregs->r[(cwp + rd) & 0x7f]);
462	else
463	    rdd = &(sregs->g[rd]);
464	if (rs1 > 7)
465	    rs1 = sregs->r[(cwp + rs1) & 0x7f];
466	else
467	    rs1 = sregs->g[rs1];
468    }
469    switch (op) {
470    case 0:
471	op2 = (sregs->inst >> 22) & 0x7;
472	switch (op2) {
473	case SETHI:
474	    rd = (sregs->inst >> 25) & 0x1f;
475	    if (rd > 7)
476		rdd = &(sregs->r[(cwp + rd) & 0x7f]);
477	    else
478		rdd = &(sregs->g[rd]);
479	    *rdd = sregs->inst << 10;
480	    break;
481	case BICC:
482#ifdef STAT
483	    sregs->nbranch++;
484#endif
485	    icc = sregs->psr >> 20;
486	    cond = ((sregs->inst >> 25) & 0x0f);
487	    switch (cond) {
488	    case BICC_BN:
489		eicc = 0;
490		break;
491	    case BICC_BE:
492		eicc = ICC_Z;
493		break;
494	    case BICC_BLE:
495		eicc = ICC_Z | (ICC_N ^ ICC_V);
496		break;
497	    case BICC_BL:
498		eicc = (ICC_N ^ ICC_V);
499		break;
500	    case BICC_BLEU:
501		eicc = ICC_C | ICC_Z;
502		break;
503	    case BICC_BCS:
504		eicc = ICC_C;
505		break;
506	    case BICC_NEG:
507		eicc = ICC_N;
508		break;
509	    case BICC_BVS:
510		eicc = ICC_V;
511		break;
512	    case BICC_BA:
513		eicc = 1;
514		if (sregs->inst & 0x20000000)
515		    sregs->annul = 1;
516		break;
517	    case BICC_BNE:
518		eicc = ~(ICC_Z);
519		break;
520	    case BICC_BG:
521		eicc = ~(ICC_Z | (ICC_N ^ ICC_V));
522		break;
523	    case BICC_BGE:
524		eicc = ~(ICC_N ^ ICC_V);
525		break;
526	    case BICC_BGU:
527		eicc = ~(ICC_C | ICC_Z);
528		break;
529	    case BICC_BCC:
530		eicc = ~(ICC_C);
531		break;
532	    case BICC_POS:
533		eicc = ~(ICC_N);
534		break;
535	    case BICC_BVC:
536		eicc = ~(ICC_V);
537		break;
538	    }
539	    if (eicc & 1) {
540		operand1 = sregs->inst;
541		operand1 = ((operand1 << 10) >> 8);	/* sign extend */
542		npc = sregs->pc + operand1;
543	    } else {
544		if (sregs->inst & 0x20000000)
545		    sregs->annul = 1;
546	    }
547	    break;
548	case FPBCC:
549#ifdef STAT
550	    sregs->nbranch++;
551#endif
552	    if (!((sregs->psr & PSR_EF) && FP_PRES)) {
553		sregs->trap = TRAP_FPDIS;
554		break;
555	    }
556	    if (ebase.simtime < sregs->ftime) {
557		sregs->ftime = ebase.simtime + sregs->hold;
558	    }
559	    cond = ((sregs->inst >> 25) & 0x0f);
560	    fcc = (sregs->fsr >> 10) & 0x3;
561	    switch (cond) {
562	    case FBN:
563		eicc = 0;
564		break;
565	    case FBNE:
566		eicc = (fcc != FCC_E);
567		break;
568	    case FBLG:
569		eicc = (fcc == FCC_L) || (fcc == FCC_G);
570		break;
571	    case FBUL:
572		eicc = (fcc == FCC_L) || (fcc == FCC_U);
573		break;
574	    case FBL:
575		eicc = (fcc == FCC_L);
576		break;
577	    case FBUG:
578		eicc = (fcc == FCC_G) || (fcc == FCC_U);
579		break;
580	    case FBG:
581		eicc = (fcc == FCC_G);
582		break;
583	    case FBU:
584		eicc = (fcc == FCC_U);
585		break;
586	    case FBA:
587		eicc = 1;
588		if (sregs->inst & 0x20000000)
589		    sregs->annul = 1;
590		break;
591	    case FBE:
592		eicc = !(fcc != FCC_E);
593		break;
594	    case FBUE:
595		eicc = !((fcc == FCC_L) || (fcc == FCC_G));
596		break;
597	    case FBGE:
598		eicc = !((fcc == FCC_L) || (fcc == FCC_U));
599		break;
600	    case FBUGE:
601		eicc = !(fcc == FCC_L);
602		break;
603	    case FBLE:
604		eicc = !((fcc == FCC_G) || (fcc == FCC_U));
605		break;
606	    case FBULE:
607		eicc = !(fcc == FCC_G);
608		break;
609	    case FBO:
610		eicc = !(fcc == FCC_U);
611		break;
612	    }
613	    if (eicc) {
614		operand1 = sregs->inst;
615		operand1 = ((operand1 << 10) >> 8);	/* sign extend */
616		npc = sregs->pc + operand1;
617	    } else {
618		if (sregs->inst & 0x20000000)
619		    sregs->annul = 1;
620	    }
621	    break;
622
623	default:
624	    sregs->trap = TRAP_UNIMP;
625	    break;
626	}
627	break;
628    case 1:			/* CALL */
629#ifdef STAT
630	sregs->nbranch++;
631#endif
632	sregs->r[(cwp + 15) & 0x7f] = sregs->pc;
633	npc = sregs->pc + (sregs->inst << 2);
634	break;
635
636    case 2:
637	if ((op3 >> 1) == 0x1a) {
638	    if (!((sregs->psr & PSR_EF) && FP_PRES)) {
639		sregs->trap = TRAP_FPDIS;
640	    } else {
641		rs1 = (sregs->inst >> 14) & 0x1f;
642		rs2 = sregs->inst & 0x1f;
643		sregs->trap = fpexec(op3, rd, rs1, rs2, sregs);
644	    }
645	} else {
646
647	    switch (op3) {
648	    case TICC:
649	        icc = sregs->psr >> 20;
650	        cond = ((sregs->inst >> 25) & 0x0f);
651	        switch (cond) {
652		case BICC_BN:
653		    eicc = 0;
654		    break;
655		case BICC_BE:
656		    eicc = ICC_Z;
657		    break;
658		case BICC_BLE:
659		    eicc = ICC_Z | (ICC_N ^ ICC_V);
660		    break;
661		case BICC_BL:
662		    eicc = (ICC_N ^ ICC_V);
663		    break;
664		case BICC_BLEU:
665		    eicc = ICC_C | ICC_Z;
666		    break;
667		case BICC_BCS:
668		    eicc = ICC_C;
669		    break;
670		case BICC_NEG:
671		    eicc = ICC_N;
672		    break;
673		case BICC_BVS:
674		    eicc = ICC_V;
675		    break;
676	        case BICC_BA:
677		    eicc = 1;
678		    break;
679	        case BICC_BNE:
680		    eicc = ~(ICC_Z);
681		    break;
682	        case BICC_BG:
683		    eicc = ~(ICC_Z | (ICC_N ^ ICC_V));
684		    break;
685	        case BICC_BGE:
686		    eicc = ~(ICC_N ^ ICC_V);
687		    break;
688	        case BICC_BGU:
689		    eicc = ~(ICC_C | ICC_Z);
690		    break;
691	        case BICC_BCC:
692		    eicc = ~(ICC_C);
693		    break;
694	        case BICC_POS:
695		    eicc = ~(ICC_N);
696		    break;
697	        case BICC_BVC:
698		    eicc = ~(ICC_V);
699		    break;
700		}
701		if (eicc & 1) {
702		    sregs->trap = (0x80 | ((rs1 + operand2) & 0x7f));
703		}
704		break;
705
706	    case MULScc:
707		operand1 =
708		    (((sregs->psr & PSR_V) ^ ((sregs->psr & PSR_N) >> 2))
709		     << 10) | (rs1 >> 1);
710		if ((sregs->y & 1) == 0)
711		    operand2 = 0;
712		*rdd = operand1 + operand2;
713		sregs->y = (rs1 << 31) | (sregs->y >> 1);
714		sregs->psr = add_cc(sregs->psr, operand1, operand2, *rdd);
715		break;
716	    case DIVScc:
717		{
718		  int sign;
719		  uint32 result, remainder;
720		  int c0, y31;
721
722		  if (!sparclite) {
723		     sregs->trap = TRAP_UNIMP;
724                     break;
725		  }
726
727		  sign = ((sregs->psr & PSR_V) != 0) ^ ((sregs->psr & PSR_N) != 0);
728
729		  remainder = (sregs->y << 1) | (rs1 >> 31);
730
731		  /* If true sign is positive, calculate remainder - divisor.
732		     Otherwise, calculate remainder + divisor.  */
733		  if (sign == 0)
734		    operand2 = ~operand2 + 1;
735		  result = remainder + operand2;
736
737		  /* The SPARClite User's Manual is not clear on how
738		     the "carry out" of the above ALU operation is to
739		     be calculated.  From trial and error tests
740		     on the the chip itself, it appears that it is
741		     a normal addition carry, and not a subtraction borrow,
742		     even in cases where the divisor is subtracted
743		     from the remainder.  FIXME: get the true story
744		     from Fujitsu. */
745		  c0 = result < (uint32) remainder
746		       || result < (uint32) operand2;
747
748		  if (result & 0x80000000)
749		    sregs->psr |= PSR_N;
750		  else
751		    sregs->psr &= ~PSR_N;
752
753		  y31 = (sregs->y & 0x80000000) == 0x80000000;
754
755		  if (result == 0 && sign == y31)
756		    sregs->psr |= PSR_Z;
757		  else
758		    sregs->psr &= ~PSR_Z;
759
760		  sign = (sign && !y31) || (!c0 && (sign || !y31));
761
762		  if (sign ^ (result >> 31))
763		    sregs->psr |= PSR_V;
764		  else
765		    sregs->psr &= ~PSR_V;
766
767		  if (!sign)
768		    sregs->psr |= PSR_C;
769		  else
770		    sregs->psr &= ~PSR_C;
771
772		  sregs->y = result;
773
774		  if (rd != 0)
775		    *rdd = (rs1 << 1) | !sign;
776		}
777		break;
778	    case SMUL:
779		{
780		  mul64 (rs1, operand2, &sregs->y, rdd, 1);
781		}
782		break;
783	    case SMULCC:
784		{
785		  uint32 result;
786
787		  mul64 (rs1, operand2, &sregs->y, &result, 1);
788
789		  if (result & 0x80000000)
790		    sregs->psr |= PSR_N;
791		  else
792		    sregs->psr &= ~PSR_N;
793
794		  if (result == 0)
795		    sregs->psr |= PSR_Z;
796		  else
797		    sregs->psr &= ~PSR_Z;
798
799		  *rdd = result;
800		}
801		break;
802	    case UMUL:
803		{
804		  mul64 (rs1, operand2, &sregs->y, rdd, 0);
805		}
806		break;
807	    case UMULCC:
808		{
809		  uint32 result;
810
811		  mul64 (rs1, operand2, &sregs->y, &result, 0);
812
813		  if (result & 0x80000000)
814		    sregs->psr |= PSR_N;
815		  else
816		    sregs->psr &= ~PSR_N;
817
818		  if (result == 0)
819		    sregs->psr |= PSR_Z;
820		  else
821		    sregs->psr &= ~PSR_Z;
822
823		  *rdd = result;
824		}
825		break;
826	    case SDIV:
827		{
828		  if (sparclite) {
829		     sregs->trap = TRAP_UNIMP;
830                     break;
831		  }
832
833		  if (operand2 == 0) {
834		    sregs->trap = TRAP_DIV0;
835		    break;
836		  }
837
838		  div64 (sregs->y, rs1, operand2, rdd, 1);
839		}
840		break;
841	    case SDIVCC:
842		{
843		  uint32 result;
844
845		  if (sparclite) {
846		     sregs->trap = TRAP_UNIMP;
847                     break;
848		  }
849
850		  if (operand2 == 0) {
851		    sregs->trap = TRAP_DIV0;
852		    break;
853		  }
854
855		  div64 (sregs->y, rs1, operand2, &result, 1);
856
857		  if (result & 0x80000000)
858		    sregs->psr |= PSR_N;
859		  else
860		    sregs->psr &= ~PSR_N;
861
862		  if (result == 0)
863		    sregs->psr |= PSR_Z;
864		  else
865		    sregs->psr &= ~PSR_Z;
866
867		  /* FIXME: should set overflow flag correctly.  */
868		  sregs->psr &= ~(PSR_C | PSR_V);
869
870		  *rdd = result;
871		}
872		break;
873	    case UDIV:
874		{
875		  if (sparclite) {
876		     sregs->trap = TRAP_UNIMP;
877                     break;
878		  }
879
880		  if (operand2 == 0) {
881		    sregs->trap = TRAP_DIV0;
882		    break;
883		  }
884
885		  div64 (sregs->y, rs1, operand2, rdd, 0);
886		}
887		break;
888	    case UDIVCC:
889		{
890		  uint32 result;
891
892		  if (sparclite) {
893		     sregs->trap = TRAP_UNIMP;
894                     break;
895		  }
896
897		  if (operand2 == 0) {
898		    sregs->trap = TRAP_DIV0;
899		    break;
900		  }
901
902		  div64 (sregs->y, rs1, operand2, &result, 0);
903
904		  if (result & 0x80000000)
905		    sregs->psr |= PSR_N;
906		  else
907		    sregs->psr &= ~PSR_N;
908
909		  if (result == 0)
910		    sregs->psr |= PSR_Z;
911		  else
912		    sregs->psr &= ~PSR_Z;
913
914		  /* FIXME: should set overflow flag correctly.  */
915		  sregs->psr &= ~(PSR_C | PSR_V);
916
917		  *rdd = result;
918		}
919		break;
920	    case IXNOR:
921		*rdd = rs1 ^ ~operand2;
922		break;
923	    case IXNORCC:
924		*rdd = rs1 ^ ~operand2;
925		log_cc(*rdd, sregs);
926		break;
927	    case IXOR:
928		*rdd = rs1 ^ operand2;
929		break;
930	    case IXORCC:
931		*rdd = rs1 ^ operand2;
932		log_cc(*rdd, sregs);
933		break;
934	    case IOR:
935		*rdd = rs1 | operand2;
936		break;
937	    case IORCC:
938		*rdd = rs1 | operand2;
939		log_cc(*rdd, sregs);
940		break;
941	    case IORN:
942		*rdd = rs1 | ~operand2;
943		break;
944	    case IORNCC:
945		*rdd = rs1 | ~operand2;
946		log_cc(*rdd, sregs);
947		break;
948	    case IANDNCC:
949		*rdd = rs1 & ~operand2;
950		log_cc(*rdd, sregs);
951		break;
952	    case IANDN:
953		*rdd = rs1 & ~operand2;
954		break;
955	    case IAND:
956		*rdd = rs1 & operand2;
957		break;
958	    case IANDCC:
959		*rdd = rs1 & operand2;
960		log_cc(*rdd, sregs);
961		break;
962	    case SUB:
963		*rdd = rs1 - operand2;
964		break;
965	    case SUBCC:
966		*rdd = rs1 - operand2;
967		sregs->psr = sub_cc(sregs->psr, rs1, operand2, *rdd);
968		break;
969	    case SUBX:
970		*rdd = rs1 - operand2 - ((sregs->psr >> 20) & 1);
971		break;
972	    case SUBXCC:
973		*rdd = rs1 - operand2 - ((sregs->psr >> 20) & 1);
974		sregs->psr = sub_cc(sregs->psr, rs1, operand2, *rdd);
975		break;
976	    case ADD:
977		*rdd = rs1 + operand2;
978		break;
979	    case ADDCC:
980		*rdd = rs1 + operand2;
981		sregs->psr = add_cc(sregs->psr, rs1, operand2, *rdd);
982		break;
983	    case ADDX:
984		*rdd = rs1 + operand2 + ((sregs->psr >> 20) & 1);
985		break;
986	    case ADDXCC:
987		*rdd = rs1 + operand2 + ((sregs->psr >> 20) & 1);
988		sregs->psr = add_cc(sregs->psr, rs1, operand2, *rdd);
989		break;
990	    case TADDCC:
991		*rdd = rs1 + operand2;
992		sregs->psr = add_cc(sregs->psr, rs1, operand2, *rdd);
993		if ((rs1 | operand2) & 0x3)
994		    sregs->psr |= PSR_V;
995		break;
996	    case TSUBCC:
997		*rdd = rs1 - operand2;
998		sregs->psr = sub_cc (sregs->psr, rs1, operand2, *rdd);
999		if ((rs1 | operand2) & 0x3)
1000		    sregs->psr |= PSR_V;
1001		break;
1002	    case TADDCCTV:
1003		*rdd = rs1 + operand2;
1004		result = add_cc(0, rs1, operand2, *rdd);
1005		if ((rs1 | operand2) & 0x3)
1006		    result |= PSR_V;
1007		if (result & PSR_V) {
1008		    sregs->trap = TRAP_TAG;
1009		} else {
1010		    sregs->psr = (sregs->psr & ~PSR_CC) | result;
1011		}
1012		break;
1013	    case TSUBCCTV:
1014		*rdd = rs1 - operand2;
1015		result = add_cc (0, rs1, operand2, *rdd);
1016		if ((rs1 | operand2) & 0x3)
1017		    result |= PSR_V;
1018		if (result & PSR_V)
1019		  {
1020		      sregs->trap = TRAP_TAG;
1021		  }
1022		else
1023		  {
1024		      sregs->psr = (sregs->psr & ~PSR_CC) | result;
1025		  }
1026		break;
1027	    case SLL:
1028		*rdd = rs1 << (operand2 & 0x1f);
1029		break;
1030	    case SRL:
1031		*rdd = rs1 >> (operand2 & 0x1f);
1032		break;
1033	    case SRA:
1034		*rdd = ((int) rs1) >> (operand2 & 0x1f);
1035		break;
1036	    case FLUSH:
1037		if (ift) sregs->trap = TRAP_UNIMP;
1038		break;
1039	    case SAVE:
1040		new_cwp = ((sregs->psr & PSR_CWP) - 1) & PSR_CWP;
1041		if (sregs->wim & (1 << new_cwp)) {
1042		    sregs->trap = TRAP_WOFL;
1043		    break;
1044		}
1045		if (rd > 7)
1046		    rdd = &(sregs->r[((new_cwp << 4) + rd) & 0x7f]);
1047		*rdd = rs1 + operand2;
1048		sregs->psr = (sregs->psr & ~PSR_CWP) | new_cwp;
1049		break;
1050	    case RESTORE:
1051
1052		new_cwp = ((sregs->psr & PSR_CWP) + 1) & PSR_CWP;
1053		if (sregs->wim & (1 << new_cwp)) {
1054		    sregs->trap = TRAP_WUFL;
1055		    break;
1056		}
1057		if (rd > 7)
1058		    rdd = &(sregs->r[((new_cwp << 4) + rd) & 0x7f]);
1059		*rdd = rs1 + operand2;
1060		sregs->psr = (sregs->psr & ~PSR_CWP) | new_cwp;
1061		break;
1062	    case RDPSR:
1063		if (!(sregs->psr & PSR_S)) {
1064		    sregs->trap = TRAP_PRIVI;
1065		    break;
1066		}
1067		*rdd = sregs->psr;
1068		break;
1069	    case RDY:
1070                if (!sparclite)
1071                    *rdd = sregs->y;
1072                else {
1073                    int rs1_is_asr = (sregs->inst >> 14) & 0x1f;
1074                    if ( 0 == rs1_is_asr )
1075                        *rdd = sregs->y;
1076                    else if ( 17 == rs1_is_asr )
1077                        *rdd = sregs->asr17;
1078                    else {
1079                        sregs->trap = TRAP_UNIMP;
1080                        break;
1081                    }
1082                }
1083		break;
1084	    case RDWIM:
1085		if (!(sregs->psr & PSR_S)) {
1086		    sregs->trap = TRAP_PRIVI;
1087		    break;
1088		}
1089		*rdd = sregs->wim;
1090		break;
1091	    case RDTBR:
1092		if (!(sregs->psr & PSR_S)) {
1093		    sregs->trap = TRAP_PRIVI;
1094		    break;
1095		}
1096		*rdd = sregs->tbr;
1097		break;
1098	    case WRPSR:
1099		if ((sregs->psr & 0x1f) > 7) {
1100		    sregs->trap = TRAP_UNIMP;
1101		    break;
1102		}
1103		if (!(sregs->psr & PSR_S)) {
1104		    sregs->trap = TRAP_PRIVI;
1105		    break;
1106		}
1107		sregs->psr = (sregs->psr & 0xff000000) |
1108			(rs1 ^ operand2) & 0x00f03fff;
1109		break;
1110	    case WRWIM:
1111		if (!(sregs->psr & PSR_S)) {
1112		    sregs->trap = TRAP_PRIVI;
1113		    break;
1114		}
1115		sregs->wim = (rs1 ^ operand2) & 0x0ff;
1116		break;
1117	    case WRTBR:
1118		if (!(sregs->psr & PSR_S)) {
1119		    sregs->trap = TRAP_PRIVI;
1120		    break;
1121		}
1122		sregs->tbr = (sregs->tbr & 0x00000ff0) |
1123		    ((rs1 ^ operand2) & 0xfffff000);
1124		break;
1125	    case WRY:
1126                if (!sparclite)
1127                    sregs->y = (rs1 ^ operand2);
1128                else {
1129                    if ( 0 == rd )
1130                        sregs->y = (rs1 ^ operand2);
1131                    else if ( 17 == rd )
1132                        sregs->asr17 = (rs1 ^ operand2);
1133                    else {
1134                        sregs->trap = TRAP_UNIMP;
1135                        break;
1136                    }
1137                }
1138		break;
1139	    case JMPL:
1140
1141#ifdef STAT
1142		sregs->nbranch++;
1143#endif
1144		sregs->icnt = T_JMPL;	/* JMPL takes two cycles */
1145		if (rs1 & 0x3) {
1146		    sregs->trap = TRAP_UNALI;
1147		    break;
1148		}
1149		*rdd = sregs->pc;
1150		npc = rs1 + operand2;
1151		break;
1152	    case RETT:
1153		address = rs1 + operand2;
1154		new_cwp = ((sregs->psr & PSR_CWP) + 1) & PSR_CWP;
1155		sregs->icnt = T_RETT;	/* RETT takes two cycles */
1156		if (sregs->psr & PSR_ET) {
1157		    sregs->trap = TRAP_UNIMP;
1158		    break;
1159		}
1160		if (!(sregs->psr & PSR_S)) {
1161		    sregs->trap = TRAP_PRIVI;
1162		    break;
1163		}
1164		if (sregs->wim & (1 << new_cwp)) {
1165		    sregs->trap = TRAP_WUFL;
1166		    break;
1167		}
1168		if (address & 0x3) {
1169		    sregs->trap = TRAP_UNALI;
1170		    break;
1171		}
1172		sregs->psr = (sregs->psr & ~PSR_CWP) | new_cwp | PSR_ET;
1173		sregs->psr =
1174		    (sregs->psr & ~PSR_S) | ((sregs->psr & PSR_PS) << 1);
1175		npc = address;
1176		break;
1177
1178	    case SCAN:
1179		{
1180		  uint32 result, mask;
1181		  int i;
1182
1183		  if (!sparclite) {
1184		     sregs->trap = TRAP_UNIMP;
1185                     break;
1186		  }
1187		  mask = (operand2 & 0x80000000) | (operand2 >> 1);
1188		  result = rs1 ^ mask;
1189
1190		  for (i = 0; i < 32; i++) {
1191		    if (result & 0x80000000)
1192		      break;
1193		    result <<= 1;
1194		  }
1195
1196		  *rdd = i == 32 ? 63 : i;
1197		}
1198		break;
1199
1200	    default:
1201		sregs->trap = TRAP_UNIMP;
1202		break;
1203	    }
1204	}
1205	break;
1206    case 3:			/* Load/store instructions */
1207
1208	address = rs1 + operand2;
1209
1210	if (sregs->psr & PSR_S)
1211	    asi = 11;
1212	 else
1213	    asi = 10;
1214
1215	if (op3 & 4) {
1216	    sregs->icnt = T_ST;	/* Set store instruction count */
1217#ifdef STAT
1218	    sregs->nstore++;
1219#endif
1220	} else {
1221	    sregs->icnt = T_LD;	/* Set load instruction count */
1222#ifdef STAT
1223	    sregs->nload++;
1224#endif
1225	}
1226
1227	/* Decode load/store instructions */
1228
1229	switch (op3) {
1230	case LDDA:
1231	    if (!chk_asi(sregs, &asi, op3)) break;
1232	case LDD:
1233	    if (address & 0x7) {
1234		sregs->trap = TRAP_UNALI;
1235		break;
1236	    }
1237	    if (rd & 1) {
1238		rd &= 0x1e;
1239		if (rd > 7)
1240		    rdd = &(sregs->r[(cwp + rd) & 0x7f]);
1241		else
1242		    rdd = &(sregs->g[rd]);
1243	    }
1244	    mexc = memory_read (asi, address, ddata, 2, &ws);
1245	    sregs->hold += ws;
1246	    mexc |= memory_read (asi, address+4, &ddata[1], 2, &ws);
1247	    sregs->hold += ws;
1248	    sregs->icnt = T_LDD;
1249	    if (mexc) {
1250		sregs->trap = TRAP_DEXC;
1251	    } else {
1252		rdd[0] = ddata[0];
1253		rdd[1] = ddata[1];
1254#ifdef STAT
1255		sregs->nload++;	/* Double load counts twice */
1256#endif
1257	    }
1258	    break;
1259
1260	case LDA:
1261	    if (!chk_asi(sregs, &asi, op3)) break;
1262	case LD:
1263	    if (address & 0x3) {
1264		sregs->trap = TRAP_UNALI;
1265		break;
1266	    }
1267	    mexc = memory_read(asi, address, &data, 2, &ws);
1268	    sregs->hold += ws;
1269	    if (mexc) {
1270		sregs->trap = TRAP_DEXC;
1271	    } else {
1272		*rdd = data;
1273	    }
1274	    break;
1275	case LDSTUBA:
1276	    if (!chk_asi(sregs, &asi, op3)) break;
1277	case LDSTUB:
1278	    mexc = memory_read(asi, address, &data, 0, &ws);
1279	    sregs->hold += ws;
1280	    sregs->icnt = T_LDST;
1281	    if (mexc) {
1282		sregs->trap = TRAP_DEXC;
1283		break;
1284	    }
1285	    data = extract_byte (data, address);
1286	    *rdd = data;
1287	    data = 0x0ff;
1288	    mexc = memory_write(asi, address, &data, 0, &ws);
1289	    sregs->hold += ws;
1290	    if (mexc) {
1291		sregs->trap = TRAP_DEXC;
1292	    }
1293#ifdef STAT
1294	    sregs->nload++;
1295#endif
1296	    break;
1297	case LDSBA:
1298	case LDUBA:
1299	    if (!chk_asi(sregs, &asi, op3)) break;
1300	case LDSB:
1301	case LDUB:
1302	    mexc = memory_read(asi, address, &data, 0, &ws);
1303	    sregs->hold += ws;
1304	    if (mexc) {
1305		sregs->trap = TRAP_DEXC;
1306		break;
1307	    }
1308	    if (op3 == LDSB)
1309	        data = extract_byte_signed (data, address);
1310	    else
1311	        data = extract_byte (data, address);
1312	    *rdd = data;
1313	    break;
1314	case LDSHA:
1315	case LDUHA:
1316	    if (!chk_asi(sregs, &asi, op3)) break;
1317	case LDSH:
1318	case LDUH:
1319	    if (address & 0x1) {
1320		sregs->trap = TRAP_UNALI;
1321		break;
1322	    }
1323	    mexc = memory_read(asi, address, &data, 1, &ws);
1324	    sregs->hold += ws;
1325	    if (mexc) {
1326		sregs->trap = TRAP_DEXC;
1327		break;
1328	    }
1329	    if (op3 == LDSH)
1330	        data = extract_short_signed (data, address);
1331	    else
1332	        data = extract_short (data, address);
1333	    *rdd = data;
1334	    break;
1335	case LDF:
1336	    if (!((sregs->psr & PSR_EF) && FP_PRES)) {
1337		sregs->trap = TRAP_FPDIS;
1338		break;
1339	    }
1340	    if (address & 0x3) {
1341		sregs->trap = TRAP_UNALI;
1342		break;
1343	    }
1344	    if (ebase.simtime < sregs->ftime) {
1345		if ((sregs->frd == rd) || (sregs->frs1 == rd) ||
1346		    (sregs->frs2 == rd))
1347		    sregs->fhold += (sregs->ftime - ebase.simtime);
1348	    }
1349	    mexc = memory_read(asi, address, &data, 2, &ws);
1350	    sregs->hold += ws;
1351	    sregs->flrd = rd;
1352	    sregs->ltime = ebase.simtime + sregs->icnt + FLSTHOLD +
1353		sregs->hold + sregs->fhold;
1354	    if (mexc) {
1355		sregs->trap = TRAP_DEXC;
1356	    } else {
1357		sregs->fs[rd] = *((float32 *) & data);
1358	    }
1359	    break;
1360	case LDDF:
1361	    if (!((sregs->psr & PSR_EF) && FP_PRES)) {
1362		sregs->trap = TRAP_FPDIS;
1363		break;
1364	    }
1365	    if (address & 0x7) {
1366		sregs->trap = TRAP_UNALI;
1367		break;
1368	    }
1369	    if (ebase.simtime < sregs->ftime) {
1370		if (((sregs->frd >> 1) == (rd >> 1)) ||
1371		    ((sregs->frs1 >> 1) == (rd >> 1)) ||
1372		    ((sregs->frs2 >> 1) == (rd >> 1)))
1373		    sregs->fhold += (sregs->ftime - ebase.simtime);
1374	    }
1375	    mexc = memory_read (asi, address, ddata, 2, &ws);
1376	    sregs->hold += ws;
1377	    mexc |= memory_read (asi, address+4, &ddata[1], 2, &ws);
1378	    sregs->hold += ws;
1379	    sregs->icnt = T_LDD;
1380	    if (mexc) {
1381		sregs->trap = TRAP_DEXC;
1382	    } else {
1383		rd &= 0x1E;
1384		sregs->flrd = rd;
1385		sregs->fs[rd] = *((float32 *) & ddata[0]);
1386#ifdef STAT
1387		sregs->nload++;	/* Double load counts twice */
1388#endif
1389		sregs->fs[rd + 1] = *((float32 *) & ddata[1]);
1390		sregs->ltime = ebase.simtime + sregs->icnt + FLSTHOLD +
1391			       sregs->hold + sregs->fhold;
1392	    }
1393	    break;
1394	case LDFSR:
1395	    if (ebase.simtime < sregs->ftime) {
1396		sregs->fhold += (sregs->ftime - ebase.simtime);
1397	    }
1398	    if (!((sregs->psr & PSR_EF) && FP_PRES)) {
1399		sregs->trap = TRAP_FPDIS;
1400		break;
1401	    }
1402	    if (address & 0x3) {
1403		sregs->trap = TRAP_UNALI;
1404		break;
1405	    }
1406	    mexc = memory_read(asi, address, &data, 2, &ws);
1407	    sregs->hold += ws;
1408	    if (mexc) {
1409		sregs->trap = TRAP_DEXC;
1410	    } else {
1411		sregs->fsr =
1412		    (sregs->fsr & 0x7FF000) | (data & ~0x7FF000);
1413		set_fsr(sregs->fsr);
1414	    }
1415	    break;
1416	case STFSR:
1417	    if (!((sregs->psr & PSR_EF) && FP_PRES)) {
1418		sregs->trap = TRAP_FPDIS;
1419		break;
1420	    }
1421	    if (address & 0x3) {
1422		sregs->trap = TRAP_UNALI;
1423		break;
1424	    }
1425	    if (ebase.simtime < sregs->ftime) {
1426		sregs->fhold += (sregs->ftime - ebase.simtime);
1427	    }
1428	    mexc = memory_write(asi, address, &sregs->fsr, 2, &ws);
1429	    sregs->hold += ws;
1430	    if (mexc) {
1431		sregs->trap = TRAP_DEXC;
1432	    }
1433	    break;
1434
1435	case STA:
1436	    if (!chk_asi(sregs, &asi, op3)) break;
1437	case ST:
1438	    if (address & 0x3) {
1439		sregs->trap = TRAP_UNALI;
1440		break;
1441	    }
1442	    mexc = memory_write(asi, address, rdd, 2, &ws);
1443	    sregs->hold += ws;
1444	    if (mexc) {
1445		sregs->trap = TRAP_DEXC;
1446	    }
1447	    break;
1448	case STBA:
1449	    if (!chk_asi(sregs, &asi, op3)) break;
1450	case STB:
1451	    mexc = memory_write(asi, address, rdd, 0, &ws);
1452	    sregs->hold += ws;
1453	    if (mexc) {
1454		sregs->trap = TRAP_DEXC;
1455	    }
1456	    break;
1457	case STDA:
1458	    if (!chk_asi(sregs, &asi, op3)) break;
1459	case STD:
1460	    if (address & 0x7) {
1461		sregs->trap = TRAP_UNALI;
1462		break;
1463	    }
1464	    if (rd & 1) {
1465		rd &= 0x1e;
1466		if (rd > 7)
1467		    rdd = &(sregs->r[(cwp + rd) & 0x7f]);
1468		else
1469		    rdd = &(sregs->g[rd]);
1470	    }
1471	    mexc = memory_write(asi, address, rdd, 3, &ws);
1472	    sregs->hold += ws;
1473	    sregs->icnt = T_STD;
1474#ifdef STAT
1475	    sregs->nstore++;	/* Double store counts twice */
1476#endif
1477	    if (mexc) {
1478		sregs->trap = TRAP_DEXC;
1479		break;
1480	    }
1481	    break;
1482	case STDFQ:
1483	    if ((sregs->psr & 0x1f) > 7) {
1484		sregs->trap = TRAP_UNIMP;
1485		break;
1486	    }
1487	    if (!((sregs->psr & PSR_EF) && FP_PRES)) {
1488		sregs->trap = TRAP_FPDIS;
1489		break;
1490	    }
1491	    if (address & 0x7) {
1492		sregs->trap = TRAP_UNALI;
1493		break;
1494	    }
1495	    if (!(sregs->fsr & FSR_QNE)) {
1496		sregs->fsr = (sregs->fsr & ~FSR_TT) | FP_SEQ_ERR;
1497		break;
1498	    }
1499	    rdd = &(sregs->fpq[0]);
1500	    mexc = memory_write(asi, address, rdd, 3, &ws);
1501	    sregs->hold += ws;
1502	    sregs->icnt = T_STD;
1503#ifdef STAT
1504	    sregs->nstore++;	/* Double store counts twice */
1505#endif
1506	    if (mexc) {
1507		sregs->trap = TRAP_DEXC;
1508		break;
1509	    } else {
1510		sregs->fsr &= ~FSR_QNE;
1511		sregs->fpstate = FP_EXE_MODE;
1512	    }
1513	    break;
1514	case STHA:
1515	    if (!chk_asi(sregs, &asi, op3)) break;
1516	case STH:
1517	    if (address & 0x1) {
1518		sregs->trap = TRAP_UNALI;
1519		break;
1520	    }
1521	    mexc = memory_write(asi, address, rdd, 1, &ws);
1522	    sregs->hold += ws;
1523	    if (mexc) {
1524		sregs->trap = TRAP_DEXC;
1525	    }
1526	    break;
1527	case STF:
1528	    if (!((sregs->psr & PSR_EF) && FP_PRES)) {
1529		sregs->trap = TRAP_FPDIS;
1530		break;
1531	    }
1532	    if (address & 0x3) {
1533		sregs->trap = TRAP_UNALI;
1534		break;
1535	    }
1536	    if (ebase.simtime < sregs->ftime) {
1537		if (sregs->frd == rd)
1538		    sregs->fhold += (sregs->ftime - ebase.simtime);
1539	    }
1540	    mexc = memory_write(asi, address, &sregs->fsi[rd], 2, &ws);
1541	    sregs->hold += ws;
1542	    if (mexc) {
1543		sregs->trap = TRAP_DEXC;
1544	    }
1545	    break;
1546	case STDF:
1547	    if (!((sregs->psr & PSR_EF) && FP_PRES)) {
1548		sregs->trap = TRAP_FPDIS;
1549		break;
1550	    }
1551	    if (address & 0x7) {
1552		sregs->trap = TRAP_UNALI;
1553		break;
1554	    }
1555	    rd &= 0x1E;
1556	    if (ebase.simtime < sregs->ftime) {
1557		if ((sregs->frd == rd) || (sregs->frd + 1 == rd))
1558		    sregs->fhold += (sregs->ftime - ebase.simtime);
1559	    }
1560	    mexc = memory_write(asi, address, &sregs->fsi[rd], 3, &ws);
1561	    sregs->hold += ws;
1562	    sregs->icnt = T_STD;
1563#ifdef STAT
1564	    sregs->nstore++;	/* Double store counts twice */
1565#endif
1566	    if (mexc) {
1567		sregs->trap = TRAP_DEXC;
1568	    }
1569	    break;
1570	case SWAPA:
1571	    if (!chk_asi(sregs, &asi, op3)) break;
1572	case SWAP:
1573	    if (address & 0x3) {
1574		sregs->trap = TRAP_UNALI;
1575		break;
1576	    }
1577	    mexc = memory_read(asi, address, &data, 2, &ws);
1578	    sregs->hold += ws;
1579	    if (mexc) {
1580		sregs->trap = TRAP_DEXC;
1581		break;
1582	    }
1583	    mexc = memory_write(asi, address, rdd, 2, &ws);
1584	    sregs->hold += ws;
1585	    sregs->icnt = T_LDST;
1586	    if (mexc) {
1587		sregs->trap = TRAP_DEXC;
1588		break;
1589	    } else
1590		*rdd = data;
1591#ifdef STAT
1592	    sregs->nload++;
1593#endif
1594	    break;
1595
1596
1597	default:
1598	    sregs->trap = TRAP_UNIMP;
1599	    break;
1600	}
1601
1602#ifdef LOAD_DEL
1603
1604	if (!(op3 & 4)) {
1605	    sregs->ildtime = ebase.simtime + sregs->hold + sregs->icnt;
1606	    sregs->ildreg = rd;
1607	    if ((op3 | 0x10) == 0x13)
1608		sregs->ildreg |= 1;	/* Double load, odd register loaded
1609					 * last */
1610	}
1611#endif
1612	break;
1613
1614    default:
1615	sregs->trap = TRAP_UNIMP;
1616	break;
1617    }
1618    sregs->g[0] = 0;
1619    if (!sregs->trap) {
1620	sregs->pc = pc;
1621	sregs->npc = npc;
1622    }
1623    return 0;
1624}
1625
1626#define T_FABSs		2
1627#define T_FADDs		4
1628#define T_FADDd		4
1629#define T_FCMPs		4
1630#define T_FCMPd		4
1631#define T_FDIVs		20
1632#define T_FDIVd		35
1633#define T_FMOVs		2
1634#define T_FMULs		5
1635#define T_FMULd		9
1636#define T_FNEGs		2
1637#define T_FSQRTs	37
1638#define T_FSQRTd	65
1639#define T_FSUBs		4
1640#define T_FSUBd		4
1641#define T_FdTOi		7
1642#define T_FdTOs		3
1643#define T_FiTOs		6
1644#define T_FiTOd		6
1645#define T_FsTOi		6
1646#define T_FsTOd		2
1647
1648#define FABSs	0x09
1649#define FADDs	0x41
1650#define FADDd	0x42
1651#define FCMPs	0x51
1652#define FCMPd	0x52
1653#define FCMPEs	0x55
1654#define FCMPEd	0x56
1655#define FDIVs	0x4D
1656#define FDIVd	0x4E
1657#define FMOVs	0x01
1658#define FMULs	0x49
1659#define FMULd	0x4A
1660#define FNEGs	0x05
1661#define FSQRTs	0x29
1662#define FSQRTd	0x2A
1663#define FSUBs	0x45
1664#define FSUBd	0x46
1665#define FdTOi	0xD2
1666#define FdTOs	0xC6
1667#define FiTOs	0xC4
1668#define FiTOd	0xC8
1669#define FsTOi	0xD1
1670#define FsTOd	0xC9
1671
1672
1673static int
1674fpexec(op3, rd, rs1, rs2, sregs)
1675    uint32          op3, rd, rs1, rs2;
1676    struct pstate  *sregs;
1677{
1678    uint32          opf, tem, accex;
1679    int32           fcc;
1680    uint32          ldadj;
1681
1682    if (sregs->fpstate == FP_EXC_MODE) {
1683	sregs->fsr = (sregs->fsr & ~FSR_TT) | FP_SEQ_ERR;
1684	sregs->fpstate = FP_EXC_PE;
1685	return 0;
1686    }
1687    if (sregs->fpstate == FP_EXC_PE) {
1688	sregs->fpstate = FP_EXC_MODE;
1689	return TRAP_FPEXC;
1690    }
1691    opf = (sregs->inst >> 5) & 0x1ff;
1692
1693    /*
1694     * Check if we already have an FPop in the pipe. If so, halt until it is
1695     * finished by incrementing fhold with the remaining execution time
1696     */
1697
1698    if (ebase.simtime < sregs->ftime) {
1699	sregs->fhold = (sregs->ftime - ebase.simtime);
1700    } else {
1701	sregs->fhold = 0;
1702
1703	/* Check load dependencies. */
1704
1705	if (ebase.simtime < sregs->ltime) {
1706
1707	    /* Don't check rs1 if single operand instructions */
1708
1709	    if (((opf >> 6) == 0) || ((opf >> 6) == 3))
1710		rs1 = 32;
1711
1712	    /* Adjust for double floats */
1713
1714	    ldadj = opf & 1;
1715	    if (!(((sregs->flrd - rs1) >> ldadj) && ((sregs->flrd - rs2) >> ldadj)))
1716		sregs->fhold++;
1717	}
1718    }
1719
1720    sregs->finst++;
1721
1722    sregs->frs1 = rs1;		/* Store src and dst for dependecy check */
1723    sregs->frs2 = rs2;
1724    sregs->frd = rd;
1725
1726    sregs->ftime = ebase.simtime + sregs->hold + sregs->fhold;
1727
1728    /* SPARC is big-endian - swap double floats if host is little-endian */
1729    /* This is ugly - I know ... */
1730
1731    /* FIXME: should use (HOST_BYTE_ORDER == CURRENT_TARGET_BYTE_ORDER)
1732       but what about machines where float values are different endianness
1733       from integer values? */
1734
1735#ifdef HOST_LITTLE_ENDIAN
1736    rs1 &= 0x1f;
1737    switch (opf) {
1738	case FADDd:
1739	case FDIVd:
1740	case FMULd:
1741	case FSQRTd:
1742	case FSUBd:
1743        case FCMPd:
1744        case FCMPEd:
1745	case FdTOi:
1746	case FdTOs:
1747    	    sregs->fdp[rs1 | 1] = sregs->fs[rs1 & ~1];
1748    	    sregs->fdp[rs1 & ~1] = sregs->fs[rs1 | 1];
1749    	    sregs->fdp[rs2 | 1] = sregs->fs[rs2 & ~1];
1750    	    sregs->fdp[rs2 & ~1] = sregs->fs[rs2 | 1];
1751    default:
1752      break;
1753    }
1754#endif
1755
1756    clear_accex();
1757
1758    switch (opf) {
1759    case FABSs:
1760	sregs->fs[rd] = fabs(sregs->fs[rs2]);
1761	sregs->ftime += T_FABSs;
1762	sregs->frs1 = 32;	/* rs1 ignored */
1763	break;
1764    case FADDs:
1765	sregs->fs[rd] = sregs->fs[rs1] + sregs->fs[rs2];
1766	sregs->ftime += T_FADDs;
1767	break;
1768    case FADDd:
1769	sregs->fd[rd >> 1] = sregs->fd[rs1 >> 1] + sregs->fd[rs2 >> 1];
1770	sregs->ftime += T_FADDd;
1771	break;
1772    case FCMPs:
1773    case FCMPEs:
1774	if (sregs->fs[rs1] == sregs->fs[rs2])
1775	    fcc = 3;
1776	else if (sregs->fs[rs1] < sregs->fs[rs2])
1777	    fcc = 2;
1778	else if (sregs->fs[rs1] > sregs->fs[rs2])
1779	    fcc = 1;
1780	else
1781	    fcc = 0;
1782	sregs->fsr |= 0x0C00;
1783	sregs->fsr &= ~(fcc << 10);
1784	sregs->ftime += T_FCMPs;
1785	sregs->frd = 32;	/* rd ignored */
1786	if ((fcc == 0) && (opf == FCMPEs)) {
1787	    sregs->fpstate = FP_EXC_PE;
1788	    sregs->fsr = (sregs->fsr & ~0x1C000) | (1 << 14);
1789	}
1790	break;
1791    case FCMPd:
1792    case FCMPEd:
1793	if (sregs->fd[rs1 >> 1] == sregs->fd[rs2 >> 1])
1794	    fcc = 3;
1795	else if (sregs->fd[rs1 >> 1] < sregs->fd[rs2 >> 1])
1796	    fcc = 2;
1797	else if (sregs->fd[rs1 >> 1] > sregs->fd[rs2 >> 1])
1798	    fcc = 1;
1799	else
1800	    fcc = 0;
1801	sregs->fsr |= 0x0C00;
1802	sregs->fsr &= ~(fcc << 10);
1803	sregs->ftime += T_FCMPd;
1804	sregs->frd = 32;	/* rd ignored */
1805	if ((fcc == 0) && (opf == FCMPEd)) {
1806	    sregs->fpstate = FP_EXC_PE;
1807	    sregs->fsr = (sregs->fsr & ~FSR_TT) | FP_IEEE;
1808	}
1809	break;
1810    case FDIVs:
1811	sregs->fs[rd] = sregs->fs[rs1] / sregs->fs[rs2];
1812	sregs->ftime += T_FDIVs;
1813	break;
1814    case FDIVd:
1815	sregs->fd[rd >> 1] = sregs->fd[rs1 >> 1] / sregs->fd[rs2 >> 1];
1816	sregs->ftime += T_FDIVd;
1817	break;
1818    case FMOVs:
1819	sregs->fs[rd] = sregs->fs[rs2];
1820	sregs->ftime += T_FMOVs;
1821	sregs->frs1 = 32;	/* rs1 ignored */
1822	break;
1823    case FMULs:
1824	sregs->fs[rd] = sregs->fs[rs1] * sregs->fs[rs2];
1825	sregs->ftime += T_FMULs;
1826	break;
1827    case FMULd:
1828	sregs->fd[rd >> 1] = sregs->fd[rs1 >> 1] * sregs->fd[rs2 >> 1];
1829	sregs->ftime += T_FMULd;
1830	break;
1831    case FNEGs:
1832	sregs->fs[rd] = -sregs->fs[rs2];
1833	sregs->ftime += T_FNEGs;
1834	sregs->frs1 = 32;	/* rs1 ignored */
1835	break;
1836    case FSQRTs:
1837	if (sregs->fs[rs2] < 0.0) {
1838	    sregs->fpstate = FP_EXC_PE;
1839	    sregs->fsr = (sregs->fsr & ~FSR_TT) | FP_IEEE;
1840	    sregs->fsr = (sregs->fsr & 0x1f) | 0x10;
1841	    break;
1842	}
1843	sregs->fs[rd] = sqrt(sregs->fs[rs2]);
1844	sregs->ftime += T_FSQRTs;
1845	sregs->frs1 = 32;	/* rs1 ignored */
1846	break;
1847    case FSQRTd:
1848	if (sregs->fd[rs2 >> 1] < 0.0) {
1849	    sregs->fpstate = FP_EXC_PE;
1850	    sregs->fsr = (sregs->fsr & ~FSR_TT) | FP_IEEE;
1851	    sregs->fsr = (sregs->fsr & 0x1f) | 0x10;
1852	    break;
1853	}
1854	sregs->fd[rd >> 1] = sqrt(sregs->fd[rs2 >> 1]);
1855	sregs->ftime += T_FSQRTd;
1856	sregs->frs1 = 32;	/* rs1 ignored */
1857	break;
1858    case FSUBs:
1859	sregs->fs[rd] = sregs->fs[rs1] - sregs->fs[rs2];
1860	sregs->ftime += T_FSUBs;
1861	break;
1862    case FSUBd:
1863	sregs->fd[rd >> 1] = sregs->fd[rs1 >> 1] - sregs->fd[rs2 >> 1];
1864	sregs->ftime += T_FSUBd;
1865	break;
1866    case FdTOi:
1867	sregs->fsi[rd] = (int) sregs->fd[rs2 >> 1];
1868	sregs->ftime += T_FdTOi;
1869	sregs->frs1 = 32;	/* rs1 ignored */
1870	break;
1871    case FdTOs:
1872	sregs->fs[rd] = (float32) sregs->fd[rs2 >> 1];
1873	sregs->ftime += T_FdTOs;
1874	sregs->frs1 = 32;	/* rs1 ignored */
1875	break;
1876    case FiTOs:
1877	sregs->fs[rd] = (float32) sregs->fsi[rs2];
1878	sregs->ftime += T_FiTOs;
1879	sregs->frs1 = 32;	/* rs1 ignored */
1880	break;
1881    case FiTOd:
1882	sregs->fd[rd >> 1] = (float64) sregs->fsi[rs2];
1883	sregs->ftime += T_FiTOd;
1884	sregs->frs1 = 32;	/* rs1 ignored */
1885	break;
1886    case FsTOi:
1887	sregs->fsi[rd] = (int) sregs->fs[rs2];
1888	sregs->ftime += T_FsTOi;
1889	sregs->frs1 = 32;	/* rs1 ignored */
1890	break;
1891    case FsTOd:
1892	sregs->fd[rd >> 1] = sregs->fs[rs2];
1893	sregs->ftime += T_FsTOd;
1894	sregs->frs1 = 32;	/* rs1 ignored */
1895	break;
1896
1897    default:
1898	sregs->fsr = (sregs->fsr & ~FSR_TT) | FP_UNIMP;
1899	sregs->fpstate = FP_EXC_PE;
1900    }
1901
1902#ifdef ERRINJ
1903    if (errftt) {
1904	sregs->fsr = (sregs->fsr & ~FSR_TT) | (errftt << 14);
1905	sregs->fpstate = FP_EXC_PE;
1906	if (sis_verbose) printf("Inserted fpu error %X\n",errftt);
1907	errftt = 0;
1908    }
1909#endif
1910
1911    accex = get_accex();
1912
1913#ifdef HOST_LITTLE_ENDIAN
1914    switch (opf) {
1915    case FADDd:
1916    case FDIVd:
1917    case FMULd:
1918    case FSQRTd:
1919    case FSUBd:
1920    case FiTOd:
1921    case FsTOd:
1922	sregs->fs[rd & ~1] = sregs->fdp[rd | 1];
1923	sregs->fs[rd | 1] = sregs->fdp[rd & ~1];
1924    default:
1925      break;
1926    }
1927#endif
1928    if (sregs->fpstate == FP_EXC_PE) {
1929	sregs->fpq[0] = sregs->pc;
1930	sregs->fpq[1] = sregs->inst;
1931	sregs->fsr |= FSR_QNE;
1932    } else {
1933	tem = (sregs->fsr >> 23) & 0x1f;
1934	if (tem & accex) {
1935	    sregs->fpstate = FP_EXC_PE;
1936	    sregs->fsr = (sregs->fsr & ~FSR_TT) | FP_IEEE;
1937	    sregs->fsr = ((sregs->fsr & ~0x1f) | accex);
1938	} else {
1939	    sregs->fsr = ((((sregs->fsr >> 5) | accex) << 5) | accex);
1940	}
1941	if (sregs->fpstate == FP_EXC_PE) {
1942	    sregs->fpq[0] = sregs->pc;
1943	    sregs->fpq[1] = sregs->inst;
1944	    sregs->fsr |= FSR_QNE;
1945	}
1946    }
1947    clear_accex();
1948
1949    return 0;
1950
1951
1952}
1953
1954static int
1955chk_asi(sregs, asi, op3)
1956    struct pstate  *sregs;
1957    uint32 *asi, op3;
1958
1959{
1960    if (!(sregs->psr & PSR_S)) {
1961	sregs->trap = TRAP_PRIVI;
1962	return 0;
1963    } else if (sregs->inst & INST_I) {
1964	sregs->trap = TRAP_UNIMP;
1965	return 0;
1966    } else
1967	*asi = (sregs->inst >> 5) & 0x0ff;
1968    return 1;
1969}
1970
1971int
1972execute_trap(sregs)
1973    struct pstate  *sregs;
1974{
1975    int32           cwp;
1976
1977    if (sregs->trap == 256) {
1978	sregs->pc = 0;
1979	sregs->npc = 4;
1980	sregs->trap = 0;
1981    } else if (sregs->trap == 257) {
1982	    return ERROR;
1983    } else {
1984
1985	if ((sregs->psr & PSR_ET) == 0)
1986	    return ERROR;
1987
1988	sregs->tbr = (sregs->tbr & 0xfffff000) | (sregs->trap << 4);
1989	sregs->trap = 0;
1990	sregs->psr &= ~PSR_ET;
1991	sregs->psr |= ((sregs->psr & PSR_S) >> 1);
1992	sregs->annul = 0;
1993	sregs->psr = (((sregs->psr & PSR_CWP) - 1) & 0x7) | (sregs->psr & ~PSR_CWP);
1994	cwp = ((sregs->psr & PSR_CWP) << 4);
1995	sregs->r[(cwp + 17) & 0x7f] = sregs->pc;
1996	sregs->r[(cwp + 18) & 0x7f] = sregs->npc;
1997	sregs->psr |= PSR_S;
1998	sregs->pc = sregs->tbr;
1999	sregs->npc = sregs->tbr + 4;
2000
2001        if ( 0 != (1 & sregs->asr17) ) {
2002            /* single vector trapping! */
2003            sregs->pc = sregs->tbr & 0xfffff000;
2004            sregs->npc = sregs->pc + 4;
2005        }
2006
2007	/* Increase simulator time */
2008	sregs->icnt = TRAP_C;
2009
2010    }
2011
2012
2013    return 0;
2014
2015}
2016
2017extern struct irqcell irqarr[16];
2018
2019int
2020check_interrupts(sregs)
2021    struct pstate  *sregs;
2022{
2023#ifdef ERRINJ
2024    if (errtt) {
2025	sregs->trap = errtt;
2026	if (sis_verbose) printf("Inserted error trap 0x%02X\n",errtt);
2027	errtt = 0;
2028    }
2029#endif
2030
2031    if ((ext_irl) && (sregs->psr & PSR_ET) &&
2032	((ext_irl == 15) || (ext_irl > (int) ((sregs->psr & PSR_PIL) >> 8)))) {
2033	if (sregs->trap == 0) {
2034	    sregs->trap = 16 + ext_irl;
2035	    irqarr[ext_irl & 0x0f].callback(irqarr[ext_irl & 0x0f].arg);
2036	    return 1;
2037	}
2038    }
2039    return 0;
2040}
2041
2042void
2043init_regs(sregs)
2044    struct pstate  *sregs;
2045{
2046    sregs->pc = 0;
2047    sregs->npc = 4;
2048    sregs->trap = 0;
2049    sregs->psr &= 0x00f03fdf;
2050    sregs->psr |= 0x11000080;	/* Set supervisor bit */
2051    sregs->breakpoint = 0;
2052    sregs->annul = 0;
2053    sregs->fpstate = FP_EXE_MODE;
2054    sregs->fpqn = 0;
2055    sregs->ftime = 0;
2056    sregs->ltime = 0;
2057    sregs->err_mode = 0;
2058    ext_irl = 0;
2059    sregs->g[0] = 0;
2060#ifdef HOST_LITTLE_ENDIAN
2061    sregs->fdp = (float32 *) sregs->fd;
2062    sregs->fsi = (int32 *) sregs->fs;
2063#else
2064    sregs->fs = (float32 *) sregs->fd;
2065    sregs->fsi = (int32 *) sregs->fd;
2066#endif
2067    sregs->fsr = 0;
2068    sregs->fpu_pres = !nfp;
2069    set_fsr(sregs->fsr);
2070    sregs->bphit = 0;
2071    sregs->ildreg = 0;
2072    sregs->ildtime = 0;
2073
2074    sregs->y = 0;
2075    sregs->asr17 = 0;
2076
2077    sregs->rett_err = 0;
2078    sregs->jmpltime = 0;
2079}
2080