1dnl S/390-32 mpn_copyd 2 3dnl Copyright 2011 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31 32include(`../config.m4') 33 34C cycles/limb 35C cycles/limb 36C z900 1.65 37C z990 1.125 38C z9 ? 39C z10 ? 40C z196 ? 41 42C FIXME: 43C * Avoid saving/restoring callee-saves registers for n < 3. This could be 44C done by setting rp=r1, up=r2, i=r0 and r3,r4,r5 for clock regs. 45C We could then use r3...r10 in main loop. 46 47C INPUT PARAMETERS 48define(`rp_param', `%r2') 49define(`up_param', `%r3') 50define(`n', `%r4') 51 52define(`rp', `%r8') 53define(`up', `%r9') 54 55ASM_START() 56PROLOGUE(mpn_copyd) 57 stm %r6, %r11, 24(%r15) 58 59 lr %r1, n 60 sll %r1, 2 61 la %r10, 8(n) 62 ahi %r1, -32 63 srl %r10, 3 64 lhi %r11, -32 65 66 la rp, 0(%r1,rp_param) C FIXME use lay on z990 and later 67 la up, 0(%r1,up_param) C FIXME use lay on z990 and later 68 69 lhi %r7, 7 70 nr %r7, n C n mod 8 71 chi %r7, 2 72 jh L(b34567) 73 chi %r7, 1 74 je L(b1) 75 jh L(b2) 76 77L(b0): brct %r10, L(top) 78 j L(end) 79 80L(b1): l %r0, 28(up) 81 ahi up, -4 82 st %r0, 28(rp) 83 ahi rp, -4 84 brct %r10, L(top) 85 j L(end) 86 87L(b2): lm %r0, %r1, 24(up) 88 ahi up, -8 89 stm %r0, %r1, 24(rp) 90 ahi rp, -8 91 brct %r10, L(top) 92 j L(end) 93 94L(b34567): 95 chi %r7, 4 96 jl L(b3) 97 je L(b4) 98 chi %r7, 6 99 je L(b6) 100 jh L(b7) 101 102L(b5): lm %r0, %r4, 12(up) 103 ahi up, -20 104 stm %r0, %r4, 12(rp) 105 ahi rp, -20 106 brct %r10, L(top) 107 j L(end) 108 109L(b3): lm %r0, %r2, 20(up) 110 ahi up, -12 111 stm %r0, %r2, 20(rp) 112 ahi rp, -12 113 brct %r10, L(top) 114 j L(end) 115 116L(b4): lm %r0, %r3, 16(up) 117 ahi up, -16 118 stm %r0, %r3, 16(rp) 119 ahi rp, -16 120 brct %r10, L(top) 121 j L(end) 122 123L(b6): lm %r0, %r5, 8(up) 124 ahi up, -24 125 stm %r0, %r5, 8(rp) 126 ahi rp, -24 127 brct %r10, L(top) 128 j L(end) 129 130L(b7): lm %r0, %r6, 4(up) 131 ahi up, -28 132 stm %r0, %r6, 4(rp) 133 ahi rp, -28 134 brct %r10, L(top) 135 j L(end) 136 137L(top): lm %r0, %r7, 0(up) 138 la up, 0(%r11,up) 139 stm %r0, %r7, 0(rp) 140 la rp, 0(%r11,rp) 141 brct %r10, L(top) 142 143L(end): lm %r6, %r11, 24(%r15) 144 br %r14 145EPILOGUE() 146