166633Sdfr.file "__umoddi3.s"
266633Sdfr
366633Sdfr// $FreeBSD$
4139815Simp
5139815Simp//-
666633Sdfr// Copyright (c) 2000, Intel Corporation
766633Sdfr// All rights reserved.
866633Sdfr//
966633Sdfr// Contributed 2/15/2000 by Marius Cornea, John Harrison, Cristina Iordache,
1066633Sdfr// Ted Kubaska, Bob Norin, and Shane Story of the Computational Software Lab,
1166633Sdfr// Intel Corporation.
1266633Sdfr//
1366633Sdfr// WARRANTY DISCLAIMER
1466633Sdfr//
1566633Sdfr// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
1666633Sdfr// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
1766633Sdfr// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
1866633Sdfr// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
1966633Sdfr// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
2066633Sdfr// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
2166633Sdfr// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
2266633Sdfr// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
2366633Sdfr// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
2466633Sdfr// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
2566633Sdfr// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2666633Sdfr//
2766633Sdfr// Intel Corporation is the author of this code, and requests that all
2866633Sdfr// problem reports or change requests be submitted to it directly at
2966633Sdfr// http://developer.intel.com/opensource.
3066633Sdfr//
3166633Sdfr
3266633Sdfr.section .text
3366633Sdfr
3466633Sdfr  // 64-bit unsigned integer remainder
3566633Sdfr
3666633Sdfr.proc __umoddi3#
3766633Sdfr.align 32
3866633Sdfr.global __umoddi3#
3966633Sdfr.align 32
4066633Sdfr
4166633Sdfr__umoddi3:
4266633Sdfr
4366633Sdfr{ .mii
4466633Sdfr  alloc r31=ar.pfs,3,0,0,0
4566633Sdfr  nop.i 0
4666633Sdfr  nop.i 0
4766633Sdfr} { .mmb
4866633Sdfr
4966633Sdfr  // 64-BIT UNSIGNED INTEGER REMAINDER BEGINS HERE
5066633Sdfr
5166633Sdfr  // general register used:
52110725Sschweikh  //    r32 - 64-bit unsigned integer dividend, called a below
53110725Sschweikh  //    r33 - 64-bit unsigned integer divisor, called b below
5466633Sdfr  //    r8 - 64-bit unsigned integer result
5566633Sdfr  // floating-point registers used: f6, f7, f8, f9, f10, f11, f12
5666633Sdfr  // predicate registers used: p6
5766633Sdfr
58110725Sschweikh  setf.sig f12=r32  // holds a in integer form
5966633Sdfr  setf.sig f7=r33
6066633Sdfr  nop.b 0;;
6166633Sdfr} { .mfi
6266633Sdfr  // get 2's complement of b
6366633Sdfr  sub r33=r0,r33
6466633Sdfr  fcvt.xuf.s1 f6=f12
6566633Sdfr  nop.i 0
6666633Sdfr} { .mfi
6766633Sdfr  nop.m 0
6866633Sdfr  fcvt.xuf.s1 f7=f7
6966633Sdfr  nop.i 0;;
7066633Sdfr} { .mfi
7166633Sdfr  nop.m 0
7266633Sdfr  // Step (1)
7366633Sdfr  // y0 = 1 / b in f8
7466633Sdfr  frcpa.s1 f8,p6=f6,f7
7566633Sdfr  nop.i 0;;
7666633Sdfr} { .mfi
7766633Sdfr  nop.m 0
7866633Sdfr  // Step (2)
7966633Sdfr  // q0 = a * y0 in f10
8066633Sdfr  (p6) fma.s1 f10=f6,f8,f0
8166633Sdfr  nop.i 0
8266633Sdfr} { .mfi
8366633Sdfr  nop.m 0
8466633Sdfr  // Step (3)
8566633Sdfr  // e0 = 1 - b * y0 in f9
8666633Sdfr  (p6) fnma.s1 f9=f7,f8,f1
8766633Sdfr  nop.i 0;;
8866633Sdfr} { .mfi
8966633Sdfr  nop.m 0
9066633Sdfr  // Step (4)
9166633Sdfr  // q1 = q0 + e0 * q0 in f10
9266633Sdfr  (p6) fma.s1 f10=f9,f10,f10
9366633Sdfr  nop.i 0
9466633Sdfr} { .mfi
9566633Sdfr  nop.m 0
9666633Sdfr  // Step (5)
9766633Sdfr  // e1 = e0 * e0 in f11
9866633Sdfr  (p6) fma.s1 f11=f9,f9,f0
9966633Sdfr  nop.i 0;;
10066633Sdfr} { .mfi
10166633Sdfr  nop.m 0
10266633Sdfr  // Step (6)
10366633Sdfr  // y1 = y0 + e0 * y0 in f8
10466633Sdfr  (p6) fma.s1 f8=f9,f8,f8
10566633Sdfr  nop.i 0;;
10666633Sdfr} { .mfi
10766633Sdfr  nop.m 0
10866633Sdfr  // Step (7)
10966633Sdfr  // q2 = q1 + e1 * q1 in f9
11066633Sdfr  (p6) fma.s1 f9=f11,f10,f10
11166633Sdfr  nop.i 0;;
11266633Sdfr} { .mfi
11366633Sdfr  nop.m 0
11466633Sdfr  // Step (8)
11566633Sdfr  // y2 = y1 + e1 * y1 in f8
11666633Sdfr  (p6) fma.s1 f8=f11,f8,f8
11766633Sdfr  nop.i 0;;
11866633Sdfr} { .mfi
11966633Sdfr  nop.m 0
12066633Sdfr  // Step (9)
12166633Sdfr  // r2 = a - b * q2 in f10
12266633Sdfr  (p6) fnma.s1 f10=f7,f9,f6
12366633Sdfr  nop.i 0;;
12466633Sdfr} { .mfi
12566633Sdfr  // f7=-b
12666633Sdfr  setf.sig f7=r33
12766633Sdfr  // Step (10)
12866633Sdfr  // q3 = q2 + r2 * y2 in f8
12966633Sdfr  (p6) fma.s1 f8=f10,f8,f9
13066633Sdfr  nop.i 0;;
13166633Sdfr} { .mfi
13266633Sdfr  nop.m 0
13366633Sdfr  // (11) q = trunc(q3)
13466633Sdfr  fcvt.fxu.trunc.s1 f8=f8
13566633Sdfr  nop.i 0;;
13666633Sdfr}  { .mfi
13766633Sdfr  nop.m 0
13866633Sdfr  // (12) r = a + (-b) * q
13966633Sdfr  xma.l f8=f8,f7,f12
14066633Sdfr  nop.i 0;;
14166633Sdfr}  { .mib
14266633Sdfr  getf.sig r8=f8
14366633Sdfr  nop.i 0
14466633Sdfr  nop.b 0
14566633Sdfr}
14666633Sdfr
14766633Sdfr  // 64-BIT UNSIGNED INTEGER REMAINDER ENDS HERE
14866633Sdfr
14966633Sdfr{ .mib
15066633Sdfr  nop.m 0
15166633Sdfr  nop.i 0
15266633Sdfr  br.ret.sptk b0;;
15366633Sdfr}
15466633Sdfr
15566633Sdfr.endp __umoddi3
156