1/* [Description]
2
3Copyright 2010-2023 Free Software Foundation, Inc.
4Contributed by the AriC and Caramba projects, INRIA.
5
6This file is part of the GNU MPFR Library.
7
8The GNU MPFR Library is free software; you can redistribute it and/or modify
9it under the terms of the GNU Lesser General Public License as published by
10the Free Software Foundation; either version 3 of the License, or (at your
11option) any later version.
12
13The GNU MPFR Library is distributed in the hope that it will be useful, but
14WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
16License for more details.
17
18You should have received a copy of the GNU Lesser General Public License
19along with the GNU MPFR Library; see the file COPYING.LESSER.  If not, see
20https://www.gnu.org/licenses/ or write to the Free Software Foundation, Inc.,
2151 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. */
22
23#include <stdlib.h>
24#include <time.h>
25
26#define MPFR_NEED_LONGLONG_H
27#include "mpfr-impl.h"
28
29#undef _PROTO
30#define _PROTO __GMP_PROTO
31#include "speed.h"
32
33/* Let f be a function for which we have several implementations f1, f2... */
34/* We wish to have a quick overview of which implementation is the best    */
35/* in function of the point x where f(x) is computed and of the precision */
36/* prec requested by the user.                                             */
37/* This is performed by drawing a 2D graphic with color indicating which   */
38/* method is the best.                                                     */
39/* For building this graphic, the following structur is used (see the core */
40/* of generate_2D_sample for an explanation of each field.                 */
41struct speed_params2D
42{
43  /* x-window: [min_x, max_x] or [2^min_x, 2^max_x]                        */
44  /*           or [-2^(max_x), -2^(min_x)] U [2^min_x, 2^max_x]            */
45  /* depending on the value of logarithmic_scale_x                         */
46  double min_x;
47  double max_x;
48
49  /* prec-window: [min_prec, max_prec] */
50  mpfr_prec_t min_prec;
51  mpfr_prec_t max_prec;
52
53  /* number of sample points for the x-axis and the prec-axis */
54  int nb_points_x;
55  int nb_points_prec;
56
57  /* should the sample points be logarithmically scaled or not */
58  int logarithmic_scale_x;
59  int logarithmic_scale_prec;
60
61  /* list of functions g1, g2... measuring the execution time of f1, f2...  */
62  /* when given a point x and a precision prec stored in s.                 */
63  /* We use s->xp to store the significant of x, s->r to store its exponent */
64  /* s->align_xp to store its sign, and s->size to store prec.              */
65  double (**speed_funcs) (struct speed_params *s);
66};
67
68/* Given an array t of nb_functions double indicating the timings of several */
69/* implementations, return i, such that t[i] is the best timing.             */
70int
71find_best (double *t, int nb_functions)
72{
73  int i, ibest;
74  double best;
75
76  if (nb_functions<=0)
77    {
78      fprintf (stderr, "There is no function\n");
79      abort ();
80    }
81
82  ibest = 0;
83  best = t[0];
84  for (i=1; i<nb_functions; i++)
85    {
86      if (t[i]<best)
87        {
88          best = t[i];
89          ibest = i;
90        }
91    }
92
93  return ibest;
94}
95
96void generate_2D_sample (FILE *output, struct speed_params2D param)
97{
98  mpfr_t temp;
99  double incr_prec;
100  mpfr_t incr_x;
101  mpfr_t x, x2;
102  double prec;
103  struct speed_params s;
104  int i;
105  int test;
106  int nb_functions;
107  double *t; /* store the timing of each implementation */
108
109  /* We first determine how many implementations we have */
110  nb_functions = 0;
111  while (param.speed_funcs[nb_functions] != NULL)
112    nb_functions++;
113
114  t = malloc (nb_functions * sizeof (double));
115  if (t == NULL)
116    {
117      fprintf (stderr, "Can't allocate memory.\n");
118      abort ();
119    }
120
121
122  mpfr_init2 (temp, MPFR_SMALL_PRECISION);
123
124  /* The precision is sampled from min_prec to max_prec with        */
125  /* approximately nb_points_prec points. If logarithmic_scale_prec */
126  /* is true, the precision is multiplied by incr_prec at each      */
127  /* step. Otherwise, incr_prec is added at each step.              */
128  if (param.logarithmic_scale_prec)
129    {
130      mpfr_set_ui (temp, (unsigned long int)param.max_prec, MPFR_RNDU);
131      mpfr_div_ui (temp, temp, (unsigned long int)param.min_prec, MPFR_RNDU);
132      mpfr_root (temp, temp,
133                 (unsigned long int)param.nb_points_prec, MPFR_RNDU);
134      incr_prec = mpfr_get_d (temp, MPFR_RNDU);
135    }
136  else
137    {
138      incr_prec = (double)param.max_prec - (double)param.min_prec;
139      incr_prec = incr_prec/((double)param.nb_points_prec);
140    }
141
142  /* The points x are sampled according to the following rule:             */
143  /* If logarithmic_scale_x = 0:                                           */
144  /*    nb_points_x points are equally distributed between min_x and max_x */
145  /* If logarithmic_scale_x = 1:                                           */
146  /*    nb_points_x points are sampled from 2^(min_x) to 2^(max_x). At     */
147  /*    each step, the current point is multiplied by incr_x.              */
148  /* If logarithmic_scale_x = -1:                                          */
149  /*    nb_points_x/2 points are sampled from -2^(max_x) to -2^(min_x)     */
150  /*    (at each step, the current point is divided by incr_x);  and       */
151  /*    nb_points_x/2 points are sampled from 2^(min_x) to 2^(max_x)       */
152  /*    (at each step, the current point is multiplied by incr_x).         */
153  mpfr_init2 (incr_x, param.max_prec);
154  if (param.logarithmic_scale_x == 0)
155    {
156      mpfr_set_d (incr_x,
157                  (param.max_x - param.min_x)/(double)param.nb_points_x,
158                  MPFR_RNDU);
159    }
160  else if (param.logarithmic_scale_x == -1)
161    {
162      mpfr_set_d (incr_x,
163                  2.*(param.max_x - param.min_x)/(double)param.nb_points_x,
164                  MPFR_RNDU);
165      mpfr_exp2 (incr_x, incr_x, MPFR_RNDU);
166    }
167  else
168    { /* other values of param.logarithmic_scale_x are considered as 1 */
169      mpfr_set_d (incr_x,
170                  (param.max_x - param.min_x)/(double)param.nb_points_x,
171                  MPFR_RNDU);
172      mpfr_exp2 (incr_x, incr_x, MPFR_RNDU);
173    }
174
175  /* Main loop */
176  mpfr_init2 (x, param.max_prec);
177  mpfr_init2 (x2, param.max_prec);
178  prec = (double)param.min_prec;
179  while (prec <= param.max_prec)
180    {
181      printf ("prec = %d\n", (int)prec);
182      if (param.logarithmic_scale_x == 0)
183        mpfr_set_d (temp, param.min_x, MPFR_RNDU);
184      else if (param.logarithmic_scale_x == -1)
185        {
186          mpfr_set_d (temp, param.max_x, MPFR_RNDD);
187          mpfr_exp2 (temp, temp, MPFR_RNDD);
188          mpfr_neg (temp, temp, MPFR_RNDU);
189        }
190      else
191        {
192          mpfr_set_d (temp, param.min_x, MPFR_RNDD);
193          mpfr_exp2 (temp, temp, MPFR_RNDD);
194        }
195
196      /* We perturb x a little bit, in order to avoid trailing zeros that */
197      /* might change the behavior of algorithms.                         */
198      mpfr_const_pi (x, MPFR_RNDN);
199      mpfr_div_2ui (x, x, 7, MPFR_RNDN);
200      mpfr_add_ui (x, x, 1, MPFR_RNDN);
201      mpfr_mul (x, x, temp, MPFR_RNDN);
202
203      test = 1;
204      while (test)
205        {
206          mpfr_fprintf (output, "%e\t", mpfr_get_d (x, MPFR_RNDN));
207          mpfr_fprintf (output, "%Pu\t", (mpfr_prec_t)prec);
208
209          s.r = (mp_limb_t)mpfr_get_exp (x);
210          s.size = (mpfr_prec_t)prec;
211          s.align_xp = (mpfr_sgn (x) > 0)?1:2;
212          mpfr_set_prec (x2, (mpfr_prec_t)prec);
213          mpfr_set (x2, x, MPFR_RNDU);
214          s.xp = x2->_mpfr_d;
215
216          for (i=0; i<nb_functions; i++)
217            {
218              t[i] = speed_measure (param.speed_funcs[i], &s);
219              mpfr_fprintf (output, "%e\t", t[i]);
220            }
221          fprintf (output, "%d\n", 1 + find_best (t, nb_functions));
222
223          if (param.logarithmic_scale_x == 0)
224            {
225              mpfr_add (x, x, incr_x, MPFR_RNDU);
226              if (mpfr_cmp_d (x, param.max_x) > 0)
227                test=0;
228            }
229          else
230            {
231              if (mpfr_sgn (x) < 0 )
232                { /* if x<0, it means that logarithmic_scale_x=-1 */
233                  mpfr_div (x, x, incr_x, MPFR_RNDU);
234                  mpfr_abs (temp, x, MPFR_RNDD);
235                  mpfr_log2 (temp, temp, MPFR_RNDD);
236                  if (mpfr_cmp_d (temp, param.min_x) < 0)
237                    mpfr_neg (x, x, MPFR_RNDN);
238                }
239              else
240                {
241                  mpfr_mul (x, x, incr_x, MPFR_RNDU);
242                  mpfr_set (temp, x, MPFR_RNDD);
243                  mpfr_log2 (temp, temp, MPFR_RNDD);
244                  if (mpfr_cmp_d (temp, param.max_x) > 0)
245                    test=0;
246                }
247            }
248        }
249
250      prec = ( (param.logarithmic_scale_prec) ? (prec * incr_prec)
251               : (prec + incr_prec) );
252      fprintf (output, "\n");
253    }
254
255  free (t);
256  mpfr_clear (incr_x);
257  mpfr_clear (x);
258  mpfr_clear (x2);
259  mpfr_clear (temp);
260
261  return;
262}
263
264#define SPEED_MPFR_FUNC_2D(mean_func)                   \
265  do                                                    \
266    {                                                   \
267      double t;                                         \
268      unsigned i;                                       \
269      mpfr_t w, x;                                      \
270      mp_size_t size;                                   \
271                                                        \
272      SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);   \
273      SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);   \
274                                                        \
275      size = (s->size-1)/GMP_NUMB_BITS+1;               \
276      s->xp[size-1] |= MPFR_LIMB_HIGHBIT;               \
277      MPFR_TMP_INIT1 (s->xp, x, s->size);               \
278      MPFR_SET_EXP (x, (mpfr_exp_t) s->r);              \
279      if (s->align_xp == 2) MPFR_SET_NEG (x);           \
280                                                        \
281      mpfr_init2 (w, s->size);                          \
282      speed_starttime ();                               \
283      i = s->reps;                                      \
284                                                        \
285      do                                                \
286        mean_func (w, x, MPFR_RNDN);                    \
287      while (--i != 0);                                 \
288      t = speed_endtime ();                             \
289                                                        \
290      mpfr_clear (w);                                   \
291      return t;                                         \
292    }                                                   \
293  while (0)
294
295mpfr_prec_t mpfr_exp_2_threshold;
296mpfr_prec_t old_threshold = MPFR_EXP_2_THRESHOLD;
297#undef  MPFR_EXP_2_THRESHOLD
298#define MPFR_EXP_2_THRESHOLD mpfr_exp_2_threshold
299#include "exp_2.c"
300
301double
302timing_exp1 (struct speed_params *s)
303{
304  mpfr_exp_2_threshold = s->size+1;
305  SPEED_MPFR_FUNC_2D (mpfr_exp_2);
306}
307
308double
309timing_exp2 (struct speed_params *s)
310{
311  mpfr_exp_2_threshold = s->size-1;
312  SPEED_MPFR_FUNC_2D (mpfr_exp_2);
313}
314
315double
316timing_exp3 (struct speed_params *s)
317{
318  SPEED_MPFR_FUNC_2D (mpfr_exp_3);
319}
320
321
322#include "ai.c"
323double
324timing_ai1 (struct speed_params *s)
325{
326  SPEED_MPFR_FUNC_2D (mpfr_ai1);
327}
328
329double
330timing_ai2 (struct speed_params *s)
331{
332  SPEED_MPFR_FUNC_2D (mpfr_ai2);
333}
334
335/* These functions are for testing purpose only */
336/* They are used to draw which method is actually used */
337double
338virtual_timing_ai1 (struct speed_params *s)
339{
340  double t;
341  unsigned i;
342  mpfr_t w, x;
343  mp_size_t size;
344  mpfr_t temp1, temp2;
345
346  SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);
347  SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);
348
349  size = (s->size-1)/GMP_NUMB_BITS+1;
350  s->xp[size-1] |= MPFR_LIMB_HIGHBIT;
351  MPFR_TMP_INIT1 (s->xp, x, s->size);
352  MPFR_SET_EXP (x, (mpfr_exp_t) s->r);
353  if (s->align_xp == 2) MPFR_SET_NEG (x);
354
355  mpfr_init2 (w, s->size);
356  speed_starttime ();
357  i = s->reps;
358
359  mpfr_init2 (temp1, MPFR_SMALL_PRECISION);
360  mpfr_init2 (temp2, MPFR_SMALL_PRECISION);
361
362  mpfr_set (temp1, x, MPFR_SMALL_PRECISION);
363  mpfr_set_si (temp2, MPFR_AI_THRESHOLD2, MPFR_RNDN);
364  mpfr_mul_ui (temp2, temp2, (unsigned int)MPFR_PREC (w), MPFR_RNDN);
365
366  if (MPFR_IS_NEG (x))
367      mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD1, MPFR_RNDN);
368  else
369      mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD3, MPFR_RNDN);
370
371  mpfr_add (temp1, temp1, temp2, MPFR_RNDN);
372
373  if (mpfr_cmp_si (temp1, MPFR_AI_SCALE) > 0)
374    t = 1000.;
375  else
376    t = 1.;
377
378  mpfr_clear (temp1);
379  mpfr_clear (temp2);
380
381  return t;
382}
383
384double
385virtual_timing_ai2 (struct speed_params *s)
386{
387  double t;
388  unsigned i;
389  mpfr_t w, x;
390  mp_size_t size;
391  mpfr_t temp1, temp2;
392
393  SPEED_RESTRICT_COND (s->size >= MPFR_PREC_MIN);
394  SPEED_RESTRICT_COND (s->size <= MPFR_PREC_MAX);
395
396  size = (s->size-1)/GMP_NUMB_BITS+1;
397  s->xp[size-1] |= MPFR_LIMB_HIGHBIT;
398  MPFR_TMP_INIT1 (s->xp, x, s->size);
399  MPFR_SET_EXP (x, (mpfr_exp_t) s->r);
400  if (s->align_xp == 2) MPFR_SET_NEG (x);
401
402  mpfr_init2 (w, s->size);
403  speed_starttime ();
404  i = s->reps;
405
406  mpfr_init2 (temp1, MPFR_SMALL_PRECISION);
407  mpfr_init2 (temp2, MPFR_SMALL_PRECISION);
408
409  mpfr_set (temp1, x, MPFR_SMALL_PRECISION);
410  mpfr_set_si (temp2, MPFR_AI_THRESHOLD2, MPFR_RNDN);
411  mpfr_mul_ui (temp2, temp2, (unsigned int)MPFR_PREC (w), MPFR_RNDN);
412
413  if (MPFR_IS_NEG (x))
414      mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD1, MPFR_RNDN);
415  else
416      mpfr_mul_si (temp1, temp1, MPFR_AI_THRESHOLD3, MPFR_RNDN);
417
418  mpfr_add (temp1, temp1, temp2, MPFR_RNDN);
419
420  if (mpfr_cmp_si (temp1, MPFR_AI_SCALE) > 0)
421    t = 1.;
422  else
423    t = 1000.;
424
425  mpfr_clear (temp1);
426  mpfr_clear (temp2);
427
428  return t;
429}
430
431int
432main (void)
433{
434  FILE *output;
435  struct speed_params2D param;
436  double (*speed_funcs[3]) (struct speed_params *s);
437
438  /* char filename[256] = "virtual_timing_ai.dat"; */
439  /* speed_funcs[0] = virtual_timing_ai1; */
440  /* speed_funcs[1] = virtual_timing_ai2; */
441
442  char filename[256] = "airy.dat";
443  speed_funcs[0] = timing_ai1;
444  speed_funcs[1] = timing_ai2;
445
446  speed_funcs[2] = NULL;
447  output = fopen (filename, "w");
448  if (output == NULL)
449    {
450      fprintf (stderr, "Can't open file '%s' for writing.\n", filename);
451      abort ();
452    }
453  param.min_x = -80;
454  param.max_x = 60;
455  param.min_prec = 50;
456  param.max_prec = 1500;
457  param.nb_points_x = 200;
458  param.nb_points_prec = 200;
459  param.logarithmic_scale_x  = 0;
460  param.logarithmic_scale_prec = 0;
461  param.speed_funcs = speed_funcs;
462
463  generate_2D_sample (output, param);
464
465  fclose (output);
466  mpfr_free_cache ();
467  return 0;
468}
469