1dnl IA-64 mpn_copyd -- copy limb vector, decrementing. 2 3dnl Copyright 2001, 2002, 2004 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 3 of the License, or (at 10dnl your option) any later version. 11 12dnl The GNU MP Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22C cycles/limb 23C Itanium: 1 24C Itanium 2: 0.5 25 26C INPUT PARAMETERS 27C rp = r32 28C sp = r33 29C n = r34 30 31ASM_START() 32PROLOGUE(mpn_copyd) 33 .prologue 34 .save ar.lc, r2 35 .body 36ifdef(`HAVE_ABI_32', 37` addp4 r32 = 0, r32 38 addp4 r33 = 0, r33 39 sxt4 r34 = r34 40 ;; 41') 42{.mmi 43 shladd r32 = r34, 3, r32 44 shladd r33 = r34, 3, r33 45 mov.i r2 = ar.lc 46} 47{.mmi 48 and r14 = 3, r34 49 cmp.ge p14, p15 = 3, r34 50 add r34 = -4, r34 51 ;; 52} 53{.mmi 54 cmp.eq p8, p0 = 1, r14 55 cmp.eq p10, p0 = 2, r14 56 cmp.eq p12, p0 = 3, r14 57} 58{.bbb 59 (p8) br.dptk .Lb01 60 (p10) br.dptk .Lb10 61 (p12) br.dptk .Lb11 62} 63 64.Lb00: C n = 0, 4, 8, 12, ... 65 add r32 = -8, r32 66 add r33 = -8, r33 67 (p14) br.dptk .Ls00 68 ;; 69 add r21 = -8, r33 70 ld8 r16 = [r33], -16 71 shr r15 = r34, 2 72 ;; 73 ld8 r17 = [r21], -16 74 mov.i ar.lc = r15 75 ld8 r18 = [r33], -16 76 add r20 = -8, r32 77 ;; 78 ld8 r19 = [r21], -16 79 br.cloop.dptk .Loop 80 ;; 81 br.sptk .Lend 82 ;; 83 84.Lb01: C n = 1, 5, 9, 13, ... 85 add r21 = -8, r33 86 add r20 = -8, r32 87 add r33 = -16, r33 88 add r32 = -16, r32 89 ;; 90 ld8 r19 = [r21], -16 91 shr r15 = r34, 2 92 (p14) br.dptk .Ls01 93 ;; 94 ld8 r16 = [r33], -16 95 mov.i ar.lc = r15 96 ;; 97 ld8 r17 = [r21], -16 98 ld8 r18 = [r33], -16 99 br.sptk .Li01 100 ;; 101 102.Lb10: C n = 2,6, 10, 14, ... 103 add r21 = -16, r33 104 shr r15 = r34, 2 105 add r20 = -16, r32 106 add r32 = -8, r32 107 add r33 = -8, r33 108 ;; 109 ld8 r18 = [r33], -16 110 ld8 r19 = [r21], -16 111 mov.i ar.lc = r15 112 (p14) br.dptk .Ls10 113 ;; 114 ld8 r16 = [r33], -16 115 ld8 r17 = [r21], -16 116 br.sptk .Li10 117 ;; 118 119.Lb11: C n = 3, 7, 11, 15, ... 120 add r21 = -8, r33 121 add r20 = -8, r32 122 add r33 = -16, r33 123 add r32 = -16, r32 124 ;; 125 ld8 r17 = [r21], -16 126 shr r15 = r34, 2 127 ;; 128 ld8 r18 = [r33], -16 129 mov.i ar.lc = r15 130 ld8 r19 = [r21], -16 131 (p14) br.dptk .Ls11 132 ;; 133 ld8 r16 = [r33], -16 134 br.sptk .Li11 135 ;; 136 137 ALIGN(32) 138.Loop: 139.Li00: 140{.mmb 141 st8 [r32] = r16, -16 142 ld8 r16 = [r33], -16 143 nop.b 0 144} 145.Li11: 146{.mmb 147 st8 [r20] = r17, -16 148 ld8 r17 = [r21], -16 149 nop.b 0 150 ;; 151} 152.Li10: 153{.mmb 154 st8 [r32] = r18, -16 155 ld8 r18 = [r33], -16 156 nop.b 0 157} 158.Li01: 159{.mmb 160 st8 [r20] = r19, -16 161 ld8 r19 = [r21], -16 162 br.cloop.dptk .Loop 163 ;; 164} 165.Lend: st8 [r32] = r16, -16 166.Ls11: st8 [r20] = r17, -16 167 ;; 168.Ls10: st8 [r32] = r18, -16 169.Ls01: st8 [r20] = r19, -16 170.Ls00: mov.i ar.lc = r2 171 br.ret.sptk.many b0 172EPILOGUE() 173ASM_END() 174