1//
2// Copyright (c) 2000, Intel Corporation
3// All rights reserved.
4//
5// Contributed 2/15/2000 by Marius Cornea, John Harrison, Cristina Iordache,
6// Ted Kubaska, Bob Norin, and Shane Story of the Computational Software Lab,
7// Intel Corporation.
8//
9// WARRANTY DISCLAIMER
10//
11// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
12// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
13// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
14// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
15// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
16// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
17// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
18// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
19// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
20// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
21// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
22//
23// Intel Corporation is the author of this code, and requests that all
24// problem reports or change requests be submitted to it directly at
25// http://developer.intel.com/opensource.
26//
27
28#include <machine/asm.h>
29__FBSDID("$FreeBSD: releng/10.2/lib/libc/ia64/gen/__divsf3.S 111777 2003-03-03 01:09:46Z obrien $");
30
31ENTRY(__divsf3, 0)
32{ .mfi
33  // a is in f8
34  // b is in f9
35
36  // general registers used: r31, r32, r33, r34
37  // predicate registers used: p6
38  // floating-point registers used: f6, f7, f8
39
40  nop.m 0
41  // load a, the first argument, in f6
42  mov f6=f8
43  nop.i 0;;
44} { .mfi
45  nop.m 0
46  // load b, the second argument, in f7
47  mov f7=f9
48  nop.i 0;;
49} { .mfi
50
51  // BEGIN SINGLE PRECISION LATENCY-OPTIMIZED DIVIDE ALGORITHM
52
53  nop.m 0
54  // Step (1)
55  // y0 = 1 / b in f8
56  frcpa.s0 f8,p6=f6,f7
57  nop.i 0;;
58} { .mfi
59  nop.m 0
60  // Step (2)
61  // q0 = a * y0 in f6
62  (p6) fma.s1 f6=f6,f8,f0
63  nop.i 0
64} { .mfi
65  nop.m 0
66  // Step (3)
67  // e0 = 1 - b * y0 in f7
68  (p6) fnma.s1 f7=f7,f8,f1
69  nop.i 0;;
70} { .mfi
71  nop.m 0
72  // Step (4)
73  // q1 = q0 + e0 * q0 in f6
74  (p6) fma.s1 f6=f7,f6,f6
75  nop.i 0
76} { .mfi
77  nop.m 0
78  // Step (5)
79  // e1 = e0 * e0 in f7
80  (p6) fma.s1 f7=f7,f7,f0
81  nop.i 0;;
82} { .mfi
83  nop.m 0
84  // Step (6)
85  // q2 = q1 + e1 * q1 in f6
86  (p6) fma.s1 f6=f7,f6,f6
87  nop.i 0
88} { .mfi
89  nop.m 0
90  // Step (7)
91  // e2 = e1 * e1 in f7
92  (p6) fma.s1 f7=f7,f7,f0
93  nop.i 0;;
94} { .mfi
95  nop.m 0
96  // Step (8)
97  // q3 = q2 + e2 * q2 in f6
98  (p6) fma.d.s1 f6=f7,f6,f6
99  nop.i 0;;
100} { .mfi
101  nop.m 0
102  // Step (9)
103  // q3' = q3 in f8
104  (p6) fma.s.s0 f8=f6,f1,f0
105  nop.i 0;;
106
107  // END SINGLE PRECISION LATENCY-OPTIMIZED DIVIDE ALGORITHM
108
109} { .mmb
110  nop.m 0
111  nop.m 0
112  // return
113  br.ret.sptk b0;;
114}
115
116END(__divsf3)
117