166633Sdfr.file "__umoddi3.s" 266633Sdfr 366633Sdfr// $FreeBSD$ 4139815Simp 5139815Simp//- 666633Sdfr// Copyright (c) 2000, Intel Corporation 766633Sdfr// All rights reserved. 866633Sdfr// 966633Sdfr// Contributed 2/15/2000 by Marius Cornea, John Harrison, Cristina Iordache, 1066633Sdfr// Ted Kubaska, Bob Norin, and Shane Story of the Computational Software Lab, 1166633Sdfr// Intel Corporation. 1266633Sdfr// 1366633Sdfr// WARRANTY DISCLAIMER 1466633Sdfr// 1566633Sdfr// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 1666633Sdfr// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 1766633Sdfr// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 1866633Sdfr// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 1966633Sdfr// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 2066633Sdfr// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 2166633Sdfr// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 2266633Sdfr// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 2366633Sdfr// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING 2466633Sdfr// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 2566633Sdfr// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 2666633Sdfr// 2766633Sdfr// Intel Corporation is the author of this code, and requests that all 2866633Sdfr// problem reports or change requests be submitted to it directly at 2966633Sdfr// http://developer.intel.com/opensource. 3066633Sdfr// 3166633Sdfr 3266633Sdfr.section .text 3366633Sdfr 3466633Sdfr // 64-bit unsigned integer remainder 3566633Sdfr 3666633Sdfr.proc __umoddi3# 3766633Sdfr.align 32 3866633Sdfr.global __umoddi3# 3966633Sdfr.align 32 4066633Sdfr 4166633Sdfr__umoddi3: 4266633Sdfr 4366633Sdfr{ .mii 4466633Sdfr alloc r31=ar.pfs,3,0,0,0 4566633Sdfr nop.i 0 4666633Sdfr nop.i 0 4766633Sdfr} { .mmb 4866633Sdfr 4966633Sdfr // 64-BIT UNSIGNED INTEGER REMAINDER BEGINS HERE 5066633Sdfr 5166633Sdfr // general register used: 52110725Sschweikh // r32 - 64-bit unsigned integer dividend, called a below 53110725Sschweikh // r33 - 64-bit unsigned integer divisor, called b below 5466633Sdfr // r8 - 64-bit unsigned integer result 5566633Sdfr // floating-point registers used: f6, f7, f8, f9, f10, f11, f12 5666633Sdfr // predicate registers used: p6 5766633Sdfr 58110725Sschweikh setf.sig f12=r32 // holds a in integer form 5966633Sdfr setf.sig f7=r33 6066633Sdfr nop.b 0;; 6166633Sdfr} { .mfi 6266633Sdfr // get 2's complement of b 6366633Sdfr sub r33=r0,r33 6466633Sdfr fcvt.xuf.s1 f6=f12 6566633Sdfr nop.i 0 6666633Sdfr} { .mfi 6766633Sdfr nop.m 0 6866633Sdfr fcvt.xuf.s1 f7=f7 6966633Sdfr nop.i 0;; 7066633Sdfr} { .mfi 7166633Sdfr nop.m 0 7266633Sdfr // Step (1) 7366633Sdfr // y0 = 1 / b in f8 7466633Sdfr frcpa.s1 f8,p6=f6,f7 7566633Sdfr nop.i 0;; 7666633Sdfr} { .mfi 7766633Sdfr nop.m 0 7866633Sdfr // Step (2) 7966633Sdfr // q0 = a * y0 in f10 8066633Sdfr (p6) fma.s1 f10=f6,f8,f0 8166633Sdfr nop.i 0 8266633Sdfr} { .mfi 8366633Sdfr nop.m 0 8466633Sdfr // Step (3) 8566633Sdfr // e0 = 1 - b * y0 in f9 8666633Sdfr (p6) fnma.s1 f9=f7,f8,f1 8766633Sdfr nop.i 0;; 8866633Sdfr} { .mfi 8966633Sdfr nop.m 0 9066633Sdfr // Step (4) 9166633Sdfr // q1 = q0 + e0 * q0 in f10 9266633Sdfr (p6) fma.s1 f10=f9,f10,f10 9366633Sdfr nop.i 0 9466633Sdfr} { .mfi 9566633Sdfr nop.m 0 9666633Sdfr // Step (5) 9766633Sdfr // e1 = e0 * e0 in f11 9866633Sdfr (p6) fma.s1 f11=f9,f9,f0 9966633Sdfr nop.i 0;; 10066633Sdfr} { .mfi 10166633Sdfr nop.m 0 10266633Sdfr // Step (6) 10366633Sdfr // y1 = y0 + e0 * y0 in f8 10466633Sdfr (p6) fma.s1 f8=f9,f8,f8 10566633Sdfr nop.i 0;; 10666633Sdfr} { .mfi 10766633Sdfr nop.m 0 10866633Sdfr // Step (7) 10966633Sdfr // q2 = q1 + e1 * q1 in f9 11066633Sdfr (p6) fma.s1 f9=f11,f10,f10 11166633Sdfr nop.i 0;; 11266633Sdfr} { .mfi 11366633Sdfr nop.m 0 11466633Sdfr // Step (8) 11566633Sdfr // y2 = y1 + e1 * y1 in f8 11666633Sdfr (p6) fma.s1 f8=f11,f8,f8 11766633Sdfr nop.i 0;; 11866633Sdfr} { .mfi 11966633Sdfr nop.m 0 12066633Sdfr // Step (9) 12166633Sdfr // r2 = a - b * q2 in f10 12266633Sdfr (p6) fnma.s1 f10=f7,f9,f6 12366633Sdfr nop.i 0;; 12466633Sdfr} { .mfi 12566633Sdfr // f7=-b 12666633Sdfr setf.sig f7=r33 12766633Sdfr // Step (10) 12866633Sdfr // q3 = q2 + r2 * y2 in f8 12966633Sdfr (p6) fma.s1 f8=f10,f8,f9 13066633Sdfr nop.i 0;; 13166633Sdfr} { .mfi 13266633Sdfr nop.m 0 13366633Sdfr // (11) q = trunc(q3) 13466633Sdfr fcvt.fxu.trunc.s1 f8=f8 13566633Sdfr nop.i 0;; 13666633Sdfr} { .mfi 13766633Sdfr nop.m 0 13866633Sdfr // (12) r = a + (-b) * q 13966633Sdfr xma.l f8=f8,f7,f12 14066633Sdfr nop.i 0;; 14166633Sdfr} { .mib 14266633Sdfr getf.sig r8=f8 14366633Sdfr nop.i 0 14466633Sdfr nop.b 0 14566633Sdfr} 14666633Sdfr 14766633Sdfr // 64-BIT UNSIGNED INTEGER REMAINDER ENDS HERE 14866633Sdfr 14966633Sdfr{ .mib 15066633Sdfr nop.m 0 15166633Sdfr nop.i 0 15266633Sdfr br.ret.sptk b0;; 15366633Sdfr} 15466633Sdfr 15566633Sdfr.endp __umoddi3 156