1#ifndef __ASSEMBLER__
2# define __ASSEMBLER__ 1
3#endif
4#include "crypto/sparc_arch.h"
5
6#ifdef __arch64__
7.register	%g2,#scratch
8.register	%g3,#scratch
9#endif
10
11#ifdef __PIC__
12SPARC_PIC_THUNK(%g1)
13#endif
14
15.globl	bn_GF2m_mul_2x2
16.align	16
17bn_GF2m_mul_2x2:
18        SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
19        ld	[%g1+0],%g1             	! OPENSSL_sparcv9cap_P[0]
20
21        andcc	%g1, SPARCV9_VIS3, %g0
22        bz,pn	%icc,.Lsoftware
23        nop
24
25	sllx	%o1, 32, %o1
26	sllx	%o3, 32, %o3
27	or	%o2, %o1, %o1
28	or	%o4, %o3, %o3
29	.word	0x95b262ab			! xmulx   %o1, %o3, %o2
30	.word	0x99b262cb			! xmulxhi %o1, %o3, %o4
31	srlx	%o2, 32, %o1			! 13 cycles later
32	st	%o2, [%o0+0]
33	st	%o1, [%o0+4]
34	srlx	%o4, 32, %o3
35	st	%o4, [%o0+8]
36	retl
37	st	%o3, [%o0+12]
38
39.align	16
40.Lsoftware:
41	save	%sp,-STACK_FRAME-128,%sp
42
43	sllx	%i1,32,%g1
44	mov	-1,%o4
45	sllx	%i3,32,%o7
46	or	%i2,%g1,%g1
47	srlx	%o4,1,%o5			! 0x7fff...
48	or	%i4,%o7,%o7
49	srlx	%o4,2,%o4			! 0x3fff...
50	add	%sp,STACK_BIAS+STACK_FRAME,%l0
51
52	sllx	%g1,2,%o2
53	mov	%g1,%o0
54	sllx	%g1,1,%o1
55
56	srax	%o2,63,%g5			! broadcast 61st bit
57	and	%o5,%o2,%o2			! (a<<2)&0x7fff...
58	srlx	%o5,2,%o5
59	srax	%o1,63,%g4			! broadcast 62nd bit
60	and	%o4,%o1,%o1			! (a<<1)&0x3fff...
61	srax	%o0,63,%g1			! broadcast 63rd bit
62	and	%o5,%o0,%o0			! (a<<0)&0x1fff...
63
64	sllx	%o0,3,%o3
65	and	%o7,%g1,%g1
66	and	%o7,%g4,%g4
67	and	%o7,%g5,%g5
68
69	stx	%g0,[%l0+0*8]			! tab[0]=0
70	xor	%o0,%o1,%o4
71	stx	%o0,[%l0+1*8]			! tab[1]=a1
72	stx	%o1,[%l0+2*8]			! tab[2]=a2
73	 xor	%o2,%o3,%o5
74	stx	%o4,[%l0+3*8]			! tab[3]=a1^a2
75	 xor	%o2,%o0,%o0
76
77	stx	%o2,[%l0+4*8]			! tab[4]=a4
78	xor	%o2,%o1,%o1
79	stx	%o0,[%l0+5*8]			! tab[5]=a1^a4
80	xor	%o2,%o4,%o4
81	stx	%o1,[%l0+6*8]			! tab[6]=a2^a4
82	 xor	%o5,%o0,%o0
83	stx	%o4,[%l0+7*8]			! tab[7]=a1^a2^a4
84	 xor	%o5,%o1,%o1
85
86	stx	%o3,[%l0+8*8]			! tab[8]=a8
87	xor	%o5,%o4,%o4
88	stx	%o0,[%l0+9*8]			! tab[9]=a1^a8
89	 xor	%o2,%o0,%o0
90	stx	%o1,[%l0+10*8]			! tab[10]=a2^a8
91	 xor	%o2,%o1,%o1
92	stx	%o4,[%l0+11*8]		! tab[11]=a1^a2^a8
93
94	xor	%o2,%o4,%o4
95	stx	%o5,[%l0+12*8]		! tab[12]=a4^a8
96	 srlx	%g1,1,%o3
97	stx	%o0,[%l0+13*8]			! tab[13]=a1^a4^a8
98	 sllx	%g1,63,%g1
99	stx	%o1,[%l0+14*8]			! tab[14]=a2^a4^a8
100	 srlx	%g4,2,%g2
101	stx	%o4,[%l0+15*8]		! tab[15]=a1^a2^a4^a8
102
103	sllx	%g4,62,%o0
104	 sllx	%o7,3,%g4
105	srlx	%g5,3,%g3
106	 and	%g4,120,%g4
107	sllx	%g5,61,%o1
108	 ldx	[%l0+%g4],%g4
109	 srlx	%o7,4-3,%g5
110	xor	%g2,%o3,%o3
111	 and	%g5,120,%g5
112	xor	%o0,%g1,%g1
113	 ldx	[%l0+%g5],%g5
114	xor	%g3,%o3,%o3
115
116	xor	%g4,%g1,%g1
117	srlx	%o7,8-3,%g4
118	 xor	%o1,%g1,%g1
119	and	%g4,120,%g4
120	sllx	%g5,4,%g2
121	ldx	[%l0+%g4],%g4
122	srlx	%g5,60,%g3
123	xor	%g2,%g1,%g1
124	srlx	%o7,12-3,%g5
125	xor	%g3,%o3,%o3
126	and	%g5,120,%g5
127	sllx	%g4,8,%g3
128	ldx	[%l0+%g5],%g5
129	srlx	%g4,56,%g2
130	xor	%g3,%g1,%g1
131	srlx	%o7,16-3,%g4
132	xor	%g2,%o3,%o3
133	and	%g4,120,%g4
134	sllx	%g5,12,%g2
135	ldx	[%l0+%g4],%g4
136	srlx	%g5,52,%g3
137	xor	%g2,%g1,%g1
138	srlx	%o7,20-3,%g5
139	xor	%g3,%o3,%o3
140	and	%g5,120,%g5
141	sllx	%g4,16,%g3
142	ldx	[%l0+%g5],%g5
143	srlx	%g4,48,%g2
144	xor	%g3,%g1,%g1
145	srlx	%o7,24-3,%g4
146	xor	%g2,%o3,%o3
147	and	%g4,120,%g4
148	sllx	%g5,20,%g2
149	ldx	[%l0+%g4],%g4
150	srlx	%g5,44,%g3
151	xor	%g2,%g1,%g1
152	srlx	%o7,28-3,%g5
153	xor	%g3,%o3,%o3
154	and	%g5,120,%g5
155	sllx	%g4,24,%g3
156	ldx	[%l0+%g5],%g5
157	srlx	%g4,40,%g2
158	xor	%g3,%g1,%g1
159	srlx	%o7,32-3,%g4
160	xor	%g2,%o3,%o3
161	and	%g4,120,%g4
162	sllx	%g5,28,%g2
163	ldx	[%l0+%g4],%g4
164	srlx	%g5,36,%g3
165	xor	%g2,%g1,%g1
166	srlx	%o7,36-3,%g5
167	xor	%g3,%o3,%o3
168	and	%g5,120,%g5
169	sllx	%g4,32,%g3
170	ldx	[%l0+%g5],%g5
171	srlx	%g4,32,%g2
172	xor	%g3,%g1,%g1
173	srlx	%o7,40-3,%g4
174	xor	%g2,%o3,%o3
175	and	%g4,120,%g4
176	sllx	%g5,36,%g2
177	ldx	[%l0+%g4],%g4
178	srlx	%g5,28,%g3
179	xor	%g2,%g1,%g1
180	srlx	%o7,44-3,%g5
181	xor	%g3,%o3,%o3
182	and	%g5,120,%g5
183	sllx	%g4,40,%g3
184	ldx	[%l0+%g5],%g5
185	srlx	%g4,24,%g2
186	xor	%g3,%g1,%g1
187	srlx	%o7,48-3,%g4
188	xor	%g2,%o3,%o3
189	and	%g4,120,%g4
190	sllx	%g5,44,%g2
191	ldx	[%l0+%g4],%g4
192	srlx	%g5,20,%g3
193	xor	%g2,%g1,%g1
194	srlx	%o7,52-3,%g5
195	xor	%g3,%o3,%o3
196	and	%g5,120,%g5
197	sllx	%g4,48,%g3
198	ldx	[%l0+%g5],%g5
199	srlx	%g4,16,%g2
200	xor	%g3,%g1,%g1
201	srlx	%o7,56-3,%g4
202	xor	%g2,%o3,%o3
203	and	%g4,120,%g4
204	sllx	%g5,52,%g2
205	ldx	[%l0+%g4],%g4
206	srlx	%g5,12,%g3
207	xor	%g2,%g1,%g1
208	srlx	%o7,60-3,%g5
209	xor	%g3,%o3,%o3
210	and	%g5,120,%g5
211	sllx	%g4,56,%g3
212	ldx	[%l0+%g5],%g5
213	srlx	%g4,8,%g2
214	xor	%g3,%g1,%g1
215
216	sllx	%g5,60,%g3
217	 xor	%g2,%o3,%o3
218	srlx	%g5,4,%g2
219	xor	%g3,%g1,%g1
220	xor	%g2,%o3,%o3
221
222	srlx	%g1,32,%i1
223	st	%g1,[%i0+0]
224	st	%i1,[%i0+4]
225	srlx	%o3,32,%i2
226	st	%o3,[%i0+8]
227	st	%i2,[%i0+12]
228
229	ret
230	restore
231.type	bn_GF2m_mul_2x2,#function
232.size	bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
233.asciz	"GF(2^m) Multiplication for SPARCv9, CRYPTOGAMS by <appro@openssl.org>"
234.align	4
235