1/* An example of extending the speed program to measure routines not in GMP. 2 3Copyright 1999, 2000, 2002, 2003, 2005 Free Software Foundation, Inc. 4 5This file is part of the GNU MP Library. 6 7The GNU MP Library is free software; you can redistribute it and/or modify 8it under the terms of the GNU Lesser General Public License as published by 9the Free Software Foundation; either version 3 of the License, or (at your 10option) any later version. 11 12The GNU MP Library is distributed in the hope that it will be useful, but 13WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15License for more details. 16 17You should have received a copy of the GNU Lesser General Public License 18along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ 19 20 21/* The extension here is three versions of an mpn arithmetic mean. These 22 aren't meant to be particularly useful, just examples. 23 24 You can run something like the following to compare their speeds. 25 26 ./speed-ext -s 1-20 -c mean_calls mean_open mean_open2 27 28 On RISC chips, mean_open() might be fastest if the compiler is doing a 29 good job. On the register starved x86s, mean_calls will be fastest. 30 31 32 Notes: 33 34 SPEED_EXTRA_PROTOS and SPEED_EXTRA_ROUTINES are macros that get expanded 35 by speed.c in useful places. SPEED_EXTRA_PROTOS goes after the header 36 files, and SPEED_EXTRA_ROUTINES goes in the array of available routines. 37 38 The advantage of this #include "speed.c" scheme is that there's no 39 editing of a copy of that file, and new features in new versions of it 40 will be immediately available. 41 42 In a real program the routines mean_calls() etc would probably be in 43 separate C or assembler source files, and just the measuring 44 speed_mean_calls() etc would be here. Linking against other libraries 45 for things to measure is perfectly possible too. 46 47 When attempting to compare two versions of the same named routine, say 48 like the generic and assembler versions of mpn_add_n(), creative use of 49 cc -D or #define is suggested, so one or both can be renamed and linked 50 into the same program. It'll be much easier to compare them side by side 51 than with separate programs for each. 52 53 common.c has notes on writing speed measuring routines. 54 55 Remember to link against tune/libspeed.la (or tune/.libs/libspeed.a if 56 not using libtool) to get common.o and other objects needed by speed.c. */ 57 58 59#define SPEED_EXTRA_PROTOS \ 60 double speed_mean_calls __GMP_PROTO ((struct speed_params *s)); \ 61 double speed_mean_open __GMP_PROTO ((struct speed_params *s)); \ 62 double speed_mean_open2 __GMP_PROTO ((struct speed_params *s)); 63 64#define SPEED_EXTRA_ROUTINES \ 65 { "mean_calls", speed_mean_calls }, \ 66 { "mean_open", speed_mean_open }, \ 67 { "mean_open2", speed_mean_open2 }, 68 69#include "speed.c" 70 71 72/* A straightforward implementation calling mpn subroutines. 73 74 wp,size is set to (xp,size + yp,size) / 2. The return value is the 75 remainder from the division. The other versions are the same. */ 76 77mp_limb_t 78mean_calls (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size) 79{ 80 mp_limb_t c, ret; 81 82 ASSERT (size >= 1); 83 84 c = mpn_add_n (wp, xp, yp, size); 85 ret = mpn_rshift (wp, wp, size, 1) >> (GMP_LIMB_BITS-1); 86 wp[size-1] |= (c << (GMP_LIMB_BITS-1)); 87 return ret; 88} 89 90 91/* An open-coded version, making one pass over the data. The right shift is 92 done as the added limbs are produced. The addition code follows 93 mpn/generic/add_n.c. */ 94 95mp_limb_t 96mean_open (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size) 97{ 98 mp_limb_t w, wprev, x, y, c, ret; 99 mp_size_t i; 100 101 ASSERT (size >= 1); 102 103 x = xp[0]; 104 y = yp[0]; 105 106 wprev = x + y; 107 c = (wprev < x); 108 ret = (wprev & 1); 109 110#define RSHIFT(hi,lo) (((lo) >> 1) | ((hi) << (GMP_LIMB_BITS-1))) 111 112 for (i = 1; i < size; i++) 113 { 114 x = xp[i]; 115 y = yp[i]; 116 117 w = x + c; 118 c = (w < x); 119 w += y; 120 c += (w < y); 121 122 wp[i-1] = RSHIFT (w, wprev); 123 wprev = w; 124 } 125 126 wp[i-1] = RSHIFT (c, wprev); 127 128 return ret; 129} 130 131 132/* Another one-pass version, but right shifting the source limbs rather than 133 the result limbs. There's not much chance of this being better than the 134 above, but it's an alternative at least. */ 135 136mp_limb_t 137mean_open2 (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size) 138{ 139 mp_limb_t w, x, y, xnext, ynext, c, ret; 140 mp_size_t i; 141 142 ASSERT (size >= 1); 143 144 x = xp[0]; 145 y = yp[0]; 146 147 /* ret is the low bit of x+y, c is the carry out of that low bit add */ 148 ret = (x ^ y) & 1; 149 c = (x & y) & 1; 150 151 for (i = 0; i < size-1; i++) 152 { 153 xnext = xp[i+1]; 154 ynext = yp[i+1]; 155 x = RSHIFT (xnext, x); 156 y = RSHIFT (ynext, y); 157 158 w = x + c; 159 c = (w < x); 160 w += y; 161 c += (w < y); 162 wp[i] = w; 163 164 x = xnext; 165 y = ynext; 166 } 167 168 wp[i] = (x >> 1) + (y >> 1) + c; 169 170 return ret; 171} 172 173 174/* The speed measuring routines are the same apart from which function they 175 run, so a macro is used. Actually this macro is the same as 176 SPEED_ROUTINE_MPN_BINARY_N. */ 177 178#define SPEED_ROUTINE_MEAN(mean_fun) \ 179 { \ 180 unsigned i; \ 181 mp_ptr wp; \ 182 double t; \ 183 TMP_DECL; \ 184 \ 185 SPEED_RESTRICT_COND (s->size >= 1); \ 186 \ 187 TMP_MARK; \ 188 SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \ 189 \ 190 speed_operand_src (s, s->xp, s->size); \ 191 speed_operand_src (s, s->yp, s->size); \ 192 speed_operand_dst (s, wp, s->size); \ 193 speed_cache_fill (s); \ 194 \ 195 speed_starttime (); \ 196 i = s->reps; \ 197 do \ 198 mean_fun (wp, s->xp, s->yp, s->size); \ 199 while (--i != 0); \ 200 t = speed_endtime (); \ 201 \ 202 TMP_FREE; \ 203 return t; \ 204 } 205 206double 207speed_mean_calls (struct speed_params *s) 208{ 209 SPEED_ROUTINE_MEAN (mean_calls); 210} 211 212double 213speed_mean_open (struct speed_params *s) 214{ 215 SPEED_ROUTINE_MEAN (mean_open); 216} 217 218double 219speed_mean_open2 (struct speed_params *s) 220{ 221 SPEED_ROUTINE_MEAN (mean_open2); 222} 223