1/* An example of extending the speed program to measure routines not in GMP.
2
3Copyright 1999, 2000, 2002, 2003, 2005 Free Software Foundation, Inc.
4
5This file is part of the GNU MP Library.
6
7The GNU MP Library is free software; you can redistribute it and/or modify
8it under the terms of the GNU Lesser General Public License as published by
9the Free Software Foundation; either version 3 of the License, or (at your
10option) any later version.
11
12The GNU MP Library is distributed in the hope that it will be useful, but
13WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
15License for more details.
16
17You should have received a copy of the GNU Lesser General Public License
18along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
19
20
21/* The extension here is three versions of an mpn arithmetic mean.  These
22   aren't meant to be particularly useful, just examples.
23
24   You can run something like the following to compare their speeds.
25
26           ./speed-ext -s 1-20 -c mean_calls mean_open mean_open2
27
28   On RISC chips, mean_open() might be fastest if the compiler is doing a
29   good job.  On the register starved x86s, mean_calls will be fastest.
30
31
32   Notes:
33
34   SPEED_EXTRA_PROTOS and SPEED_EXTRA_ROUTINES are macros that get expanded
35   by speed.c in useful places.  SPEED_EXTRA_PROTOS goes after the header
36   files, and SPEED_EXTRA_ROUTINES goes in the array of available routines.
37
38   The advantage of this #include "speed.c" scheme is that there's no
39   editing of a copy of that file, and new features in new versions of it
40   will be immediately available.
41
42   In a real program the routines mean_calls() etc would probably be in
43   separate C or assembler source files, and just the measuring
44   speed_mean_calls() etc would be here.  Linking against other libraries
45   for things to measure is perfectly possible too.
46
47   When attempting to compare two versions of the same named routine, say
48   like the generic and assembler versions of mpn_add_n(), creative use of
49   cc -D or #define is suggested, so one or both can be renamed and linked
50   into the same program.  It'll be much easier to compare them side by side
51   than with separate programs for each.
52
53   common.c has notes on writing speed measuring routines.
54
55   Remember to link against tune/libspeed.la (or tune/.libs/libspeed.a if
56   not using libtool) to get common.o and other objects needed by speed.c.  */
57
58
59#define SPEED_EXTRA_PROTOS                                              \
60  double speed_mean_calls __GMP_PROTO ((struct speed_params *s));       \
61  double speed_mean_open  __GMP_PROTO ((struct speed_params *s));       \
62  double speed_mean_open2 __GMP_PROTO ((struct speed_params *s));
63
64#define SPEED_EXTRA_ROUTINES            \
65  { "mean_calls",  speed_mean_calls  }, \
66  { "mean_open",   speed_mean_open   }, \
67  { "mean_open2",  speed_mean_open2  },
68
69#include "speed.c"
70
71
72/* A straightforward implementation calling mpn subroutines.
73
74   wp,size is set to (xp,size + yp,size) / 2.  The return value is the
75   remainder from the division.  The other versions are the same.  */
76
77mp_limb_t
78mean_calls (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
79{
80  mp_limb_t  c, ret;
81
82  ASSERT (size >= 1);
83
84  c = mpn_add_n (wp, xp, yp, size);
85  ret = mpn_rshift (wp, wp, size, 1) >> (GMP_LIMB_BITS-1);
86  wp[size-1] |= (c << (GMP_LIMB_BITS-1));
87  return ret;
88}
89
90
91/* An open-coded version, making one pass over the data.  The right shift is
92   done as the added limbs are produced.  The addition code follows
93   mpn/generic/add_n.c. */
94
95mp_limb_t
96mean_open (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
97{
98  mp_limb_t  w, wprev, x, y, c, ret;
99  mp_size_t  i;
100
101  ASSERT (size >= 1);
102
103  x = xp[0];
104  y = yp[0];
105
106  wprev = x + y;
107  c = (wprev < x);
108  ret = (wprev & 1);
109
110#define RSHIFT(hi,lo)   (((lo) >> 1) | ((hi) << (GMP_LIMB_BITS-1)))
111
112  for (i = 1; i < size; i++)
113    {
114      x = xp[i];
115      y = yp[i];
116
117      w = x + c;
118      c = (w < x);
119      w += y;
120      c += (w < y);
121
122      wp[i-1] = RSHIFT (w, wprev);
123      wprev = w;
124    }
125
126  wp[i-1] = RSHIFT (c, wprev);
127
128  return ret;
129}
130
131
132/* Another one-pass version, but right shifting the source limbs rather than
133   the result limbs.  There's not much chance of this being better than the
134   above, but it's an alternative at least. */
135
136mp_limb_t
137mean_open2 (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
138{
139  mp_limb_t  w, x, y, xnext, ynext, c, ret;
140  mp_size_t  i;
141
142  ASSERT (size >= 1);
143
144  x = xp[0];
145  y = yp[0];
146
147  /* ret is the low bit of x+y, c is the carry out of that low bit add */
148  ret = (x ^ y) & 1;
149  c   = (x & y) & 1;
150
151  for (i = 0; i < size-1; i++)
152    {
153      xnext = xp[i+1];
154      ynext = yp[i+1];
155      x = RSHIFT (xnext, x);
156      y = RSHIFT (ynext, y);
157
158      w = x + c;
159      c = (w < x);
160      w += y;
161      c += (w < y);
162      wp[i] = w;
163
164      x = xnext;
165      y = ynext;
166    }
167
168  wp[i] = (x >> 1) + (y >> 1) + c;
169
170  return ret;
171}
172
173
174/* The speed measuring routines are the same apart from which function they
175   run, so a macro is used.  Actually this macro is the same as
176   SPEED_ROUTINE_MPN_BINARY_N.  */
177
178#define SPEED_ROUTINE_MEAN(mean_fun)                    \
179  {                                                     \
180    unsigned  i;                                        \
181    mp_ptr    wp;                                       \
182    double    t;                                        \
183    TMP_DECL;                                  \
184                                                        \
185    SPEED_RESTRICT_COND (s->size >= 1);                 \
186                                                        \
187    TMP_MARK;                                  \
188    SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp);   \
189                                                        \
190    speed_operand_src (s, s->xp, s->size);              \
191    speed_operand_src (s, s->yp, s->size);              \
192    speed_operand_dst (s, wp, s->size);                 \
193    speed_cache_fill (s);                               \
194                                                        \
195    speed_starttime ();                                 \
196    i = s->reps;                                        \
197    do                                                  \
198      mean_fun (wp, s->xp, s->yp, s->size);             \
199    while (--i != 0);                                   \
200    t = speed_endtime ();                               \
201                                                        \
202    TMP_FREE;                                  \
203    return t;                                           \
204  }
205
206double
207speed_mean_calls (struct speed_params *s)
208{
209  SPEED_ROUTINE_MEAN (mean_calls);
210}
211
212double
213speed_mean_open (struct speed_params *s)
214{
215  SPEED_ROUTINE_MEAN (mean_open);
216}
217
218double
219speed_mean_open2 (struct speed_params *s)
220{
221  SPEED_ROUTINE_MEAN (mean_open2);
222}
223