__umoddi3.S revision 110725
1.file "__umoddi3.s" 2 3// $FreeBSD: head/sys/libkern/ia64/__umoddi3.S 110725 2003-02-11 20:15:11Z schweikh $ 4// 5// Copyright (c) 2000, Intel Corporation 6// All rights reserved. 7// 8// Contributed 2/15/2000 by Marius Cornea, John Harrison, Cristina Iordache, 9// Ted Kubaska, Bob Norin, and Shane Story of the Computational Software Lab, 10// Intel Corporation. 11// 12// WARRANTY DISCLAIMER 13// 14// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 15// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 16// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 17// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 18// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING 23// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25// 26// Intel Corporation is the author of this code, and requests that all 27// problem reports or change requests be submitted to it directly at 28// http://developer.intel.com/opensource. 29// 30 31.section .text 32 33 // 64-bit unsigned integer remainder 34 35.proc __umoddi3# 36.align 32 37.global __umoddi3# 38.align 32 39 40__umoddi3: 41 42{ .mii 43 alloc r31=ar.pfs,3,0,0,0 44 nop.i 0 45 nop.i 0 46} { .mmb 47 48 // 64-BIT UNSIGNED INTEGER REMAINDER BEGINS HERE 49 50 // general register used: 51 // r32 - 64-bit unsigned integer dividend, called a below 52 // r33 - 64-bit unsigned integer divisor, called b below 53 // r8 - 64-bit unsigned integer result 54 // floating-point registers used: f6, f7, f8, f9, f10, f11, f12 55 // predicate registers used: p6 56 57 setf.sig f12=r32 // holds a in integer form 58 setf.sig f7=r33 59 nop.b 0;; 60} { .mfi 61 // get 2's complement of b 62 sub r33=r0,r33 63 fcvt.xuf.s1 f6=f12 64 nop.i 0 65} { .mfi 66 nop.m 0 67 fcvt.xuf.s1 f7=f7 68 nop.i 0;; 69} { .mfi 70 nop.m 0 71 // Step (1) 72 // y0 = 1 / b in f8 73 frcpa.s1 f8,p6=f6,f7 74 nop.i 0;; 75} { .mfi 76 nop.m 0 77 // Step (2) 78 // q0 = a * y0 in f10 79 (p6) fma.s1 f10=f6,f8,f0 80 nop.i 0 81} { .mfi 82 nop.m 0 83 // Step (3) 84 // e0 = 1 - b * y0 in f9 85 (p6) fnma.s1 f9=f7,f8,f1 86 nop.i 0;; 87} { .mfi 88 nop.m 0 89 // Step (4) 90 // q1 = q0 + e0 * q0 in f10 91 (p6) fma.s1 f10=f9,f10,f10 92 nop.i 0 93} { .mfi 94 nop.m 0 95 // Step (5) 96 // e1 = e0 * e0 in f11 97 (p6) fma.s1 f11=f9,f9,f0 98 nop.i 0;; 99} { .mfi 100 nop.m 0 101 // Step (6) 102 // y1 = y0 + e0 * y0 in f8 103 (p6) fma.s1 f8=f9,f8,f8 104 nop.i 0;; 105} { .mfi 106 nop.m 0 107 // Step (7) 108 // q2 = q1 + e1 * q1 in f9 109 (p6) fma.s1 f9=f11,f10,f10 110 nop.i 0;; 111} { .mfi 112 nop.m 0 113 // Step (8) 114 // y2 = y1 + e1 * y1 in f8 115 (p6) fma.s1 f8=f11,f8,f8 116 nop.i 0;; 117} { .mfi 118 nop.m 0 119 // Step (9) 120 // r2 = a - b * q2 in f10 121 (p6) fnma.s1 f10=f7,f9,f6 122 nop.i 0;; 123} { .mfi 124 // f7=-b 125 setf.sig f7=r33 126 // Step (10) 127 // q3 = q2 + r2 * y2 in f8 128 (p6) fma.s1 f8=f10,f8,f9 129 nop.i 0;; 130} { .mfi 131 nop.m 0 132 // (11) q = trunc(q3) 133 fcvt.fxu.trunc.s1 f8=f8 134 nop.i 0;; 135} { .mfi 136 nop.m 0 137 // (12) r = a + (-b) * q 138 xma.l f8=f8,f7,f12 139 nop.i 0;; 140} { .mib 141 getf.sig r8=f8 142 nop.i 0 143 nop.b 0 144} 145 146 // 64-BIT UNSIGNED INTEGER REMAINDER ENDS HERE 147 148{ .mib 149 nop.m 0 150 nop.i 0 151 br.ret.sptk b0;; 152} 153 154.endp __umoddi3 155