1dnl Intel P5 mpn_popcount -- mpn bit population count. 2 3dnl Copyright 2001, 2002, 2014, 2015 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33 34C P5: 8.0 cycles/limb 35 36 37C unsigned long mpn_popcount (mp_srcptr src, mp_size_t size); 38C 39C An arithmetic approach has been found to be slower than the table lookup, 40C due to needing too many instructions. 41 42C The slightly strange quoting here helps the renaming done by tune/many.pl. 43deflit(TABLE_NAME, 44m4_assert_defined(`GSYM_PREFIX') 45GSYM_PREFIX`'mpn_popcount``'_table') 46 47C FIXME: exporting the table to hamdist is incorrect as it hurt incremental 48C linking. 49 50 RODATA 51 ALIGN(8) 52 GLOBL TABLE_NAME 53TABLE_NAME: 54forloop(i,0,255, 55` .byte m4_popcount(i) 56') 57 58defframe(PARAM_SIZE,8) 59defframe(PARAM_SRC, 4) 60 61 TEXT 62 ALIGN(8) 63 64PROLOGUE(mpn_popcount) 65deflit(`FRAME',0) 66 67 movl PARAM_SIZE, %ecx 68 pushl %esi FRAME_pushl() 69 70ifdef(`PIC',` 71 pushl %ebx FRAME_pushl() 72 pushl %ebp FRAME_pushl() 73ifdef(`DARWIN',` 74 shll %ecx C size in byte pairs 75 LEA( TABLE_NAME, %ebp) 76 movl PARAM_SRC, %esi 77 xorl %eax, %eax C total 78 xorl %ebx, %ebx C byte 79 xorl %edx, %edx C byte 80',` 81 call L(here) 82L(here): 83 popl %ebp 84 shll %ecx C size in byte pairs 85 86 addl $_GLOBAL_OFFSET_TABLE_+[.-L(here)], %ebp 87 movl PARAM_SRC, %esi 88 89 xorl %eax, %eax C total 90 xorl %ebx, %ebx C byte 91 92 movl TABLE_NAME@GOT(%ebp), %ebp 93 xorl %edx, %edx C byte 94') 95define(TABLE,`(%ebp,$1)') 96',` 97dnl non-PIC 98 shll %ecx C size in byte pairs 99 movl PARAM_SRC, %esi 100 101 pushl %ebx FRAME_pushl() 102 xorl %eax, %eax C total 103 104 xorl %ebx, %ebx C byte 105 xorl %edx, %edx C byte 106 107define(TABLE,`TABLE_NAME`'($1)') 108') 109 110 111 ALIGN(8) C necessary on P55 for claimed speed 112L(top): 113 C eax total 114 C ebx byte 115 C ecx counter, 2*size to 2 116 C edx byte 117 C esi src 118 C edi 119 C ebp [PIC] table 120 121 addl %ebx, %eax 122 movb -1(%esi,%ecx,2), %bl 123 124 addl %edx, %eax 125 movb -2(%esi,%ecx,2), %dl 126 127 movb TABLE(%ebx), %bl 128 decl %ecx 129 130 movb TABLE(%edx), %dl 131 jnz L(top) 132 133 134ifdef(`PIC',` 135 popl %ebp 136') 137 addl %ebx, %eax 138 popl %ebx 139 140 addl %edx, %eax 141 popl %esi 142 143 ret 144 145EPILOGUE() 146ASM_END() 147