popham.asm revision 1.1.1.1
1dnl ARM mpn_popcount and mpn_hamdist. 2 3dnl Contributed to the GNU project by Torbj��rn Granlund. 4 5dnl Copyright 2013 Free Software Foundation, Inc. 6 7dnl This file is part of the GNU MP Library. 8dnl 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of either: 11dnl 12dnl * the GNU Lesser General Public License as published by the Free 13dnl Software Foundation; either version 3 of the License, or (at your 14dnl option) any later version. 15dnl 16dnl or 17dnl 18dnl * the GNU General Public License as published by the Free Software 19dnl Foundation; either version 2 of the License, or (at your option) any 20dnl later version. 21dnl 22dnl or both in parallel, as here. 23dnl 24dnl The GNU MP Library is distributed in the hope that it will be useful, but 25dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27dnl for more details. 28dnl 29dnl You should have received copies of the GNU General Public License and the 30dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31dnl see https://www.gnu.org/licenses/. 32 33include(`../config.m4') 34 35C popcount hamdist 36C cycles/limb cycles/limb 37C StrongARM - 38C XScale - 39C Cortex-A7 ? 40C Cortex-A8 ? 41C Cortex-A9 8.94 9.47 42C Cortex-A15 5.67 6.44 43 44C Architecture requirements: 45C v5 - 46C v5t - 47C v5te ldrd strd 48C v6 usada8 49C v6t2 - 50C v7a - 51 52ifdef(`OPERATION_popcount',` 53 define(`func',`mpn_popcount') 54 define(`ap', `r0') 55 define(`n', `r1') 56 define(`a0', `r2') 57 define(`a1', `r3') 58 define(`s', `r5') 59 define(`b_01010101', `r6') 60 define(`b_00110011', `r7') 61 define(`b_00001111', `r8') 62 define(`zero', `r9') 63 define(`POPC', `$1') 64 define(`HAMD', `dnl') 65') 66ifdef(`OPERATION_hamdist',` 67 define(`func',`mpn_hamdist') 68 define(`ap', `r0') 69 define(`bp', `r1') 70 define(`n', `r2') 71 define(`a0', `r6') 72 define(`a1', `r7') 73 define(`b0', `r4') 74 define(`b1', `r5') 75 define(`s', `r11') 76 define(`b_01010101', `r8') 77 define(`b_00110011', `r9') 78 define(`b_00001111', `r10') 79 define(`zero', `r3') 80 define(`POPC', `dnl') 81 define(`HAMD', `$1') 82') 83 84MULFUNC_PROLOGUE(mpn_popcount mpn_hamdist) 85 86ASM_START() 87PROLOGUE(func) 88POPC(` push { r4-r9 } ') 89HAMD(` push { r4-r11 } ') 90 91 ldr b_01010101, =0x55555555 92 mov r12, #0 93 ldr b_00110011, =0x33333333 94 mov zero, #0 95 ldr b_00001111, =0x0f0f0f0f 96 97 tst n, #1 98 beq L(evn) 99 100L(odd): ldr a1, [ap], #4 C 1 x 32 1-bit accumulators, 0-1 101HAMD(` ldr b1, [bp], #4 ') C 1 x 32 1-bit accumulators, 0-1 102HAMD(` eor a1, a1, b1 ') 103 and r4, b_01010101, a1, lsr #1 104 sub a1, a1, r4 105 and r4, a1, b_00110011 106 bic r5, a1, b_00110011 107 add r5, r4, r5, lsr #2 C 8 4-bit accumulators, 0-4 108 subs n, n, #1 109 b L(mid) 110 111L(evn): mov s, #0 112 113L(top): ldrd a0, a1, [ap], #8 C 2 x 32 1-bit accumulators, 0-1 114HAMD(` ldrd b0, b1, [bp], #8') 115HAMD(` eor a0, a0, b0 ') 116HAMD(` eor a1, a1, b1 ') 117 subs n, n, #2 118 usada8 r12, s, zero, r12 119 and r4, b_01010101, a0, lsr #1 120 sub a0, a0, r4 121 and r4, b_01010101, a1, lsr #1 122 sub a1, a1, r4 123 and r4, a0, b_00110011 124 bic r5, a0, b_00110011 125 add a0, r4, r5, lsr #2 C 8 4-bit accumulators, 0-4 126 and r4, a1, b_00110011 127 bic r5, a1, b_00110011 128 add a1, r4, r5, lsr #2 C 8 4-bit accumulators, 0-4 129 add r5, a0, a1 C 8 4-bit accumulators, 0-8 130L(mid): and r4, r5, b_00001111 131 bic r5, r5, b_00001111 132 add s, r4, r5, lsr #4 C 4 8-bit accumulators 133 bne L(top) 134 135 usada8 r0, s, zero, r12 136POPC(` pop { r4-r9 } ') 137HAMD(` pop { r4-r11 } ') 138 bx r14 139EPILOGUE() 140