10SN/A.file "__moddi3.s"
21410Sihse
30SN/A// $FreeBSD: releng/10.3/sys/libkern/ia64/__moddi3.S 139815 2005-01-07 00:24:33Z imp $
40SN/A
50SN/A//-
60SN/A// Copyright (c) 2000, Intel Corporation
7180SN/A// All rights reserved.
80SN/A//
9180SN/A// Contributed 2/15/2000 by Marius Cornea, John Harrison, Cristina Iordache,
100SN/A// Ted Kubaska, Bob Norin, and Shane Story of the Computational Software Lab,
110SN/A// Intel Corporation.
120SN/A//
130SN/A// WARRANTY DISCLAIMER
140SN/A//
150SN/A// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
160SN/A// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
170SN/A// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
180SN/A// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
190SN/A// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
200SN/A// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21180SN/A// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22180SN/A// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23180SN/A// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
240SN/A// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
250SN/A// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
261410Sihse//
271410Sihse// Intel Corporation is the author of this code, and requests that all
281410Sihse// problem reports or change requests be submitted to it directly at
291410Sihse// http://developer.intel.com/opensource.
301410Sihse//
311410Sihse
32910SN/A.section .text
33910SN/A
341410Sihse// 64-bit signed integer remainder
355SN/A
36910SN/A.proc __moddi3#
371410Sihse.align 32
38910SN/A.global __moddi3#
39338SN/A.align 32
401426Sihse
411410Sihse__moddi3:
421426Sihse
431410Sihse{ .mii
441410Sihse  alloc r31=ar.pfs,3,0,0,0
451426Sihse  nop.i 0
461426Sihse  nop.i 0
47311SN/A} { .mmb
481426Sihse
491426Sihse  // 64-BIT SIGNED INTEGER REMAINDER BEGINS HERE
501426Sihse
511410Sihse  // general register used:
521426Sihse  //    r32 - 64-bit signed integer dividend, called a below
531651Sihse  //    r33 - 64-bit signed integer divisor, called b below
541651Sihse  //    r8 - 64-bit signed integer result
551120SN/A  //    r2 - scratch register
561410Sihse  // floating-point registers used: f6, f7, f8, f9, f10, f11, f12
571410Sihse  // predicate registers used: p6
581410Sihse
591426Sihse  setf.sig f12=r32  // holds a in integer form
601426Sihse  setf.sig f7=r33
611426Sihse  nop.b 0
621426Sihse} { .mlx
631426Sihse  nop.m 0
641426Sihse  //movl r2=0x8000000000000000;;
651426Sihse  movl r2=0xffffffffffffffff;;
661426Sihse} { .mfi
671426Sihse  // get the 2's complement of b
681426Sihse  sub r33=r0,r33
691426Sihse  fcvt.xf f6=f12
701426Sihse  nop.i 0
711426Sihse} { .mfi
721426Sihse  nop.m 0
731426Sihse  fcvt.xf f7=f7
741426Sihse  nop.i 0;;
751426Sihse} { .mfi
761426Sihse  nop.m 0
771426Sihse  // Step (1)
781426Sihse  // y0 = 1 / b in f8
791426Sihse  frcpa.s1 f8,p6=f6,f7
801426Sihse  nop.i 0;;
811426Sihse} { .mfi
821426Sihse  nop.m 0
831426Sihse  // Step (2)
841426Sihse  // q0 = a * y0 in f10
851426Sihse  (p6) fma.s1 f10=f6,f8,f0
861426Sihse  nop.i 0
871426Sihse} { .mfi
881426Sihse  nop.m 0
891426Sihse  // Step (3)
901426Sihse  // e0 = 1 - b * y0 in f9
911426Sihse  (p6) fnma.s1 f9=f7,f8,f1
921426Sihse  nop.i 0;;
931426Sihse} { .mfi
941426Sihse  nop.m 0
951426Sihse  // Step (4)
961426Sihse  // q1 = q0 + e0 * q0 in f10
971426Sihse  (p6) fma.s1 f10=f9,f10,f10
981426Sihse  nop.i 0
991426Sihse} { .mfi
1001426Sihse  nop.m 0
1011426Sihse  // Step (5)
1021426Sihse  // e1 = e0 * e0 in f11
1031426Sihse  (p6) fma.s1 f11=f9,f9,f0
1041426Sihse  nop.i 0;;
1051426Sihse} { .mfi
1061426Sihse  nop.m 0
1071426Sihse  // Step (6)
1081426Sihse  // y1 = y0 + e0 * y0 in f8
1091426Sihse  (p6) fma.s1 f8=f9,f8,f8
1101426Sihse  nop.i 0;;
1111426Sihse} { .mfi
1121426Sihse  nop.m 0
1131426Sihse  // Step (7)
1141426Sihse  // q2 = q1 + e1 * q1 in f9
1151426Sihse  (p6) fma.s1 f9=f11,f10,f10
1161426Sihse  nop.i 0;;
1171426Sihse} { .mfi
1181426Sihse  nop.m 0
1191426Sihse  // Step (8)
1201426Sihse  // y2 = y1 + e1 * y1 in f8
1211426Sihse  (p6) fma.s1 f8=f11,f8,f8
1221426Sihse  nop.i 0;;
1231426Sihse} { .mfi
1241410Sihse  nop.m 0
1251120SN/A  // Step (9)
1261426Sihse  // r2 = a - b * q2 in f10
1271426Sihse  (p6) fnma.s1 f10=f7,f9,f6
1281426Sihse  nop.i 0;;
1291426Sihse} { .mfi
1301426Sihse  setf.sig f7=r33
1311426Sihse  // Step (10)
1321426Sihse  // q3 = q2 + r2 * y2 in f8
1331410Sihse  (p6) fma.s1 f8=f10,f8,f9
1341426Sihse  nop.i 0;;
1351426Sihse} { .mfi
1361426Sihse  nop.m 0
13727SN/A  // (11) q = trunc(q3)
1381410Sihse  fcvt.fx.trunc.s1 f8=f8
1391410Sihse  nop.i 0;;
1401701Sihse} { .mfi
1411701Sihse  nop.m 0
1421701Sihse  // (12) r = a + (-b) * q
1431410Sihse  xma.l f8=f8,f7,f12
1441410Sihse  nop.i 0;;
1451410Sihse}  { .mib
1461410Sihse  getf.sig r8=f8
1471410Sihse  nop.i 0
1481410Sihse  nop.b 0
1491410Sihse}
1501426Sihse
1511426Sihse  // 64-BIT SIGNED INTEGER REMAINDER ENDS HERE
1521410Sihse
1531410Sihse{ .mib
1541410Sihse  nop.m 0
1551156SN/A  nop.i 0
1561120SN/A  br.ret.sptk b0;;
1571936Sihse}
1581936Sihse
1591936Sihse.endp __moddi3
1601426Sihse