1/*	$NetBSD: n_atan2.S,v 1.11 2024/05/07 15:49:33 riastradh Exp $	*/
2/*
3 * Copyright (c) 1985, 1993
4 *	The Regents of the University of California.  All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 * 3. Neither the name of the University nor the names of its contributors
15 *    may be used to endorse or promote products derived from this software
16 *    without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 *	@(#)atan2.s	8.1 (Berkeley) 6/4/93
31 */
32
33#include <machine/asm.h>
34
35/*
36 * ATAN2(Y,X)
37 * RETURN ARG (X+iY)
38 * VAX D FORMAT (56 BITS PRECISION)
39 * CODED IN VAX ASSEMBLY LANGUAGE BY K.C. NG, 4/16/85;
40 *
41 *
42 * Method :
43 *	1. Reduce y to positive by atan2(y,x)=-atan2(-y,x).
44 *	2. Reduce x to positive by (if x and y are unexceptional):
45 *		ARG (x+iy) = arctan(y/x)   	   ... if x > 0,
46 *		ARG (x+iy) = pi - arctan[y/(-x)]   ... if x < 0,
47 *	3. According to the integer k=4t+0.25 truncated , t=y/x, the argument
48 *	   is further reduced to one of the following intervals and the
49 *	   arctangent of y/x is evaluated by the corresponding formula:
50 *
51 *          [0,7/16]	   atan(y/x) = t - t^3*(a1+t^2*(a2+...(a10+t^2*a11)...)
52 *	   [7/16,11/16]    atan(y/x) = atan(1/2) + atan( (y-x/2)/(x+y/2) )
53 *	   [11/16.19/16]   atan(y/x) = atan( 1 ) + atan( (y-x)/(x+y) )
54 *	   [19/16,39/16]   atan(y/x) = atan(3/2) + atan( (y-1.5x)/(x+1.5y) )
55 *	   [39/16,INF]     atan(y/x) = atan(INF) + atan( -x/y )
56 *
57 * Special cases:
58 * Notations: atan2(y,x) == ARG (x+iy) == ARG(x,y).
59 *
60 *	ARG( NAN , (anything) ) is NaN;
61 *	ARG( (anything), NaN ) is NaN;
62 *	ARG(+(anything but NaN), +-0) is +-0  ;
63 *	ARG(-(anything but NaN), +-0) is +-PI ;
64 *	ARG( 0, +-(anything but 0 and NaN) ) is +-PI/2;
65 *	ARG( +INF,+-(anything but INF and NaN) ) is +-0 ;
66 *	ARG( -INF,+-(anything but INF and NaN) ) is +-PI;
67 *	ARG( +INF,+-INF ) is +-PI/4 ;
68 *	ARG( -INF,+-INF ) is +-3PI/4;
69 *	ARG( (anything but,0,NaN, and INF),+-INF ) is +-PI/2;
70 *
71 * Accuracy:
72 *	atan2(y,x) returns the exact ARG(x+iy) nearly rounded.
73 */
74
75WEAK_ALIAS(atan2f, _atan2f)
76
77ENTRY(_atan2f, 0)
78	cvtfd	4(%ap),-(%sp)
79	calls	$2,_C_LABEL(_atan2)
80	cvtdf	%r0,%r0
81	ret
82END(_atan2f)
83
84WEAK_ALIAS(atan2, _atan2)
85WEAK_ALIAS(atan2l, _atan2l)
86
87STRONG_ALIAS(_atan2l, _atan2)
88ENTRY(_atan2, 0x0fc0)
89	movq	4(%ap),%r2		# %r2 = y
90	movq	12(%ap),%r4		# %r4 = x
91	bicw3	$0x7f,%r2,%r0
92	bicw3	$0x7f,%r4,%r1
93	cmpw	%r0,$0x8000		# y is the reserved operand
94	jeql	resop
95	cmpw	%r1,$0x8000		# x is the reserved operand
96	jeql	resop
97	subl2	$8,%sp
98	bicw3	$0x7fff,%r2,-4(%fp)	# copy y sign bit to -4(%fp)
99	bicw3	$0x7fff,%r4,-8(%fp)	# copy x sign bit to -8(%fp)
100	cmpd	%r4,$0x4080		# x = 1.0 ?
101	bneq	xnot1
102	movq	%r2,%r0
103	bicw2	$0x8000,%r0		# t = |y|
104	movq	%r0,%r2			# y = |y|
105	jbr	begin
106xnot1:
107	bicw3	$0x807f,%r2,%r11		# yexp
108	jeql	yeq0			# if y=0 goto yeq0
109	bicw3	$0x807f,%r4,%r10		# xexp
110	jeql	pio2			# if x=0 goto pio2
111	subw2	%r10,%r11			# k = yexp - xexp
112	cmpw	%r11,$0x2000		# k >= 64 (exp) ?
113	jgeq	pio2			# atan2 = +-pi/2
114	divd3	%r4,%r2,%r0		# t = y/x  never overflow
115	bicw2	$0x8000,%r0		# t > 0
116	bicw2	$0xff80,%r2		# clear the exponent of y
117	bicw2	$0xff80,%r4		# clear the exponent of x
118	bisw2	$0x4080,%r2		# normalize y to [1,2)
119	bisw2	$0x4080,%r4		# normalize x to [1,2)
120	subw2	%r11,%r4			# scale x so that yexp-xexp=k
121begin:
122	cmpw	%r0,$0x411c		# t : 39/16
123	jgeq	L50
124	addl3	$0x180,%r0,%r10		# 8*t
125	cvtrfl	%r10,%r10			# [8*t] rounded to int
126	ashl	$-1,%r10,%r10		# [8*t]/2
127	casel	%r10,$0,$4
128L1:
129	.word	L20-L1
130	.word	L20-L1
131	.word	L30-L1
132	.word	L40-L1
133	.word	L40-L1
134L10:
135	movq	$0xb4d9940f985e407b,%r6	# Hi=.98279372324732906796d0
136	movq	$0x21b1879a3bc2a2fc,%r8	# Lo=-.17092002525602665777d-17
137	subd3	%r4,%r2,%r0		# y-x
138	addw2	$0x80,%r0		# 2(y-x)
139	subd2	%r4,%r0			# 2(y-x)-x
140	addw2	$0x80,%r4		# 2x
141	movq	%r2,%r10
142	addw2	$0x80,%r10		# 2y
143	addd2	%r10,%r2			# 3y
144	addd2	%r4,%r2			# 3y+2x
145	divd2	%r2,%r0			# (2y-3x)/(2x+3y)
146	jbr	L60
147L20:
148	cmpw	%r0,$0x3280		# t : 2**(-28)
149	jlss	L80
150	clrq	%r6			# Hi=%r6=0, Lo=%r8=0
151	clrq	%r8
152	jbr	L60
153L30:
154	movq	$0xda7b2b0d63383fed,%r6	# Hi=.46364760900080611433d0
155	movq	$0xf0ea17b2bf912295,%r8	# Lo=.10147340032515978826d-17
156	movq	%r2,%r0
157	addw2	$0x80,%r0		# 2y
158	subd2	%r4,%r0			# 2y-x
159	addw2	$0x80,%r4		# 2x
160	addd2	%r2,%r4			# 2x+y
161	divd2	%r4,%r0 			# (2y-x)/(2x+y)
162	jbr	L60
163L50:
164	movq	$0x68c2a2210fda40c9,%r6	# Hi=1.5707963267948966135d1
165	movq	$0x06e0145c26332326,%r8	# Lo=.22517417741562176079d-17
166	cmpw	%r0,$0x5100		# y : 2**57
167	bgeq	L90
168	divd3	%r2,%r4,%r0
169	bisw2	$0x8000,%r0 		# -x/y
170	jbr	L60
171L40:
172	movq	$0x68c2a2210fda4049,%r6	# Hi=.78539816339744830676d0
173	movq	$0x06e0145c263322a6,%r8	# Lo=.11258708870781088040d-17
174	subd3	%r4,%r2,%r0		# y-x
175	addd2	%r4,%r2			# y+x
176	divd2	%r2,%r0			# (y-x)/(y+x)
177L60:
178	movq	%r0,%r10
179	muld2	%r0,%r0
180	polyd	%r0,$12,ptable
181	muld2	%r10,%r0
182	subd2	%r0,%r8
183	addd3	%r8,%r10,%r0
184	addd2	%r6,%r0
185L80:
186	movw	-8(%fp),%r2
187	bneq	pim
188	bisw2	-4(%fp),%r0		# return sign(y)*%r0
189	ret
190L90:					# x >= 2**25
191	movq	%r6,%r0
192	jbr	L80
193pim:
194	subd3	%r0,$0x68c2a2210fda4149,%r0	# pi-t
195	bisw2	-4(%fp),%r0
196	ret
197yeq0:
198	movw	-8(%fp),%r2
199	beql	zero			# if sign(x)=1 return pi
200	movq	$0x68c2a2210fda4149,%r0	# pi=3.1415926535897932270d1
201	ret
202zero:
203	clrq	%r0			# return 0
204	ret
205pio2:
206	movq	$0x68c2a2210fda40c9,%r0	# pi/2=1.5707963267948966135d1
207	bisw2	-4(%fp),%r0		# return sign(y)*pi/2
208	ret
209resop:
210	movq	$0x8000,%r0		# propagate the reserved operand
211	ret
212END(_atan2)
213
214	_ALIGN_TEXT
215ptable:
216	.quad	0xb50f5ce96e7abd60
217	.quad	0x51e44a42c1073e02
218	.quad	0x3487e3289643be35
219	.quad	0xdb62066dffba3e54
220	.quad	0xcf8e2d5199abbe70
221	.quad	0x26f39cb884883e88
222	.quad	0x135117d18998be9d
223	.quad	0x602ce9742e883eba
224	.quad	0xa35ad0be8e38bee3
225	.quad	0xffac922249243f12
226	.quad	0x7f14ccccccccbf4c
227	.quad	0xaa8faaaaaaaa3faa
228	.quad	0x0000000000000000
229