1/* Multiply table generator for tile.
2   Copyright (C) 2011-2020 Free Software Foundation, Inc.
3   Contributed by Walter Lee (walt@tilera.com)
4
5   This file is part of GCC.
6
7   GCC is free software; you can redistribute it and/or modify it
8   under the terms of the GNU General Public License as published
9   by the Free Software Foundation; either version 3, or (at your
10   option) any later version.
11
12   GCC is distributed in the hope that it will be useful, but WITHOUT
13   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
15   License for more details.
16
17   You should have received a copy of the GNU General Public License
18   along with GCC; see the file COPYING3.  If not see
19   <http://www.gnu.org/licenses/>.  */
20
21/* This program creates a table used to compile multiply by constant
22   efficiently.
23
24   This program should be compiled by a c++ compiler.  If it's
25   compiled with -DTILEPRO, it generates the multiply table for
26   TILEPro; otherwise it generates the multiply table for TILE-Gx.
27   Running the program produces the table in stdout.
28
29   The program works by generating every possible combination of up to
30   MAX_INSTRUCTIONS linear operators (such as add, sub, s2a, left
31   shift) and computing the multiplier computed by those instructions.
32   For example,
33
34   s2a r2,r1,r1
35   s2a r3,r2,r2
36
37   multiplies r1 by 25.
38
39   There are usually multiple instruction sequences to multiply by a
40   given constant. This program keeps only the cheapest one.
41   "Cheapest" is defined first by the minimum theoretical schedule
42   length, and if those are equal then by the number of instructions,
43   and if those are equal then by which instructions we "prefer"
44   (e.g. because we think one might take infinitesimally less power
45   than another, or simply because we arbitrarily pick one to be more
46   canonical).
47
48   Once this program has determined the best instruction sequence for
49   each multiplier, it emits them in a table sorted by the multiplier
50   so the user can binary-search it to look for a match.  The table is
51   pruned based on various criteria to keep its sizes reasonable.  */
52
53#include <stdio.h>
54#include <stdlib.h>
55#include <assert.h>
56#include <string.h>
57#define __STDC_LIMIT_MACROS
58#include <stdint.h>
59
60#include <map>
61
62#ifdef TILEPRO
63
64/* The string representing the architecture.  */
65#define ARCH "tilepro"
66
67/* The type for the multiplication.  */
68typedef int MUL_TYPE;
69
70#else
71
72/* The string representing the architecture.  */
73#define ARCH "tilegx"
74
75/* The type for the multiplication.  */
76typedef long long MUL_TYPE;
77
78#endif
79
80/* Longest instruction sequence this will produce. With the current
81   stupid algorithm runtime grows very quickly with this number.  */
82#define MAX_INSTRUCTIONS 4
83
84/* Maximum number of subexpressions in the expression DAG being
85   generated.  This is the same as the number of instructions, except
86   that zero and the original register we'd like to multiply by a
87   constant are also thrown into the mix.  */
88#define MAX_SUBEXPRS (2 + MAX_INSTRUCTIONS)
89
90#define MIN(x, y)  ((x) <= (y) ? (x) : (y))
91#define MAX(x, y)  ((x) >= (y) ? (x) : (y))
92
93/* For this program a unary op is one which has only one nonconstant
94   operand.  So shift left by 5 is considered unary.  */
95typedef MUL_TYPE (*unary_op_func) (MUL_TYPE);
96typedef MUL_TYPE (*binary_op_func) (MUL_TYPE, MUL_TYPE);
97
98/* This describes an operation like 'add two registers' or 'left-shift
99   by 7'.
100
101   We call something a 'unary' operator if it only takes in one
102   register as input, even though it might have an implicit second
103   constant operand.  Currently this is used for left-shift by
104   constant.  */
105class Operator
106{
107public:
108  /* Construct for a binary operator.  */
109  Operator (const char *pattern, const char *name, binary_op_func func,
110	    int cost)
111    : m_pattern (pattern), m_name (name), m_top_index (-1),
112      m_unary_func (0), m_binary_func (func), m_cost (cost),
113      m_rhs_if_unary (0)
114  {
115  }
116
117  /* Construct for a unary operator.  */
118  Operator (const char *pattern, const char *name, unary_op_func func,
119	    int rhs_if_unary, int cost)
120    : m_pattern (pattern), m_name (name), m_top_index (-1),
121      m_unary_func (func), m_binary_func (0), m_cost (cost),
122      m_rhs_if_unary (rhs_if_unary)
123  {
124  }
125
126  bool is_unary () const
127  {
128    return m_binary_func == NULL;
129  }
130
131  /* Name of the pattern for this operation, e.g. CODE_FOR_addsi3.  */
132  const char *m_pattern;
133
134  /* Name of the opcode for this operation, e.g. add.  */
135  const char *m_name;
136
137  /* We don't have enough bits in our output representation to store
138     the original insn_code value, so we store a compressed form
139     instead.  These values are decoded back into insn_code via the
140     machine-generated multiply_insn_seq_decode_opcode lookup
141     table.  */
142  int m_top_index;
143
144  /* Unary operator to apply, or NULL if this is a binary operator.  */
145  unary_op_func m_unary_func;
146
147  /* Binary operator to apply, or NULL if this is a unary operator.  */
148  binary_op_func m_binary_func;
149
150  /* Function of how expensive we consider this operator. Higher is
151     worse.  */
152  int m_cost;
153
154  /* the RHS value to write into the C file if unary; used for shift
155     count.  */
156  int m_rhs_if_unary;
157};
158
159
160/* An entry in an expression DAG.  */
161class Expr
162{
163public:
164  Expr () : m_op (NULL), m_lhs (0), m_rhs (0), m_produced_val (0),
165    m_critical_path_length (0)
166  {
167  }
168
169  /* The operator being applied to the operands.  */
170  const Operator *m_op;
171
172  /* The index of the left-hand operand in the array of subexpressions
173     already computed.  */
174  int m_lhs;
175
176  /* For binary ops ,this is the index of the left-hand operand in the
177     array of subexpressions already computed. For unary ops, it is
178     specific to the op (e.g. it might hold a constant shift
179     count).  */
180  int m_rhs;
181
182  /* The multiplier produced by this expression tree. For example, for
183     the tree ((x << 5) + x), the value would be 33.  */
184  MUL_TYPE m_produced_val;
185
186  /* How far is this expression from the root, i.e. how many cycles
187     minimum will it take to compute this?  */
188  int m_critical_path_length;
189};
190
191
192/* Make function pointers for the various linear operators we can
193   apply to compute a multiplicative value.  */
194
195static MUL_TYPE
196add (MUL_TYPE n1, MUL_TYPE n2)
197{
198  return n1 + n2;
199}
200
201static MUL_TYPE
202sub (MUL_TYPE n1, MUL_TYPE n2)
203{
204  return n1 - n2;
205}
206
207static MUL_TYPE
208s1a (MUL_TYPE n1, MUL_TYPE n2)
209{
210  return n1 * 2 + n2;
211}
212
213static MUL_TYPE
214s2a (MUL_TYPE n1, MUL_TYPE n2)
215{
216  return n1 * 4 + n2;
217}
218
219static MUL_TYPE
220s3a (MUL_TYPE n1, MUL_TYPE n2)
221{
222  return n1 * 8 + n2;
223}
224
225#define SHIFT(count)                            \
226static MUL_TYPE                                 \
227shift##count(MUL_TYPE n)                        \
228{                                               \
229  return n << (count);                          \
230}
231
232SHIFT (1);
233SHIFT (2);
234SHIFT (3);
235SHIFT (4);
236SHIFT (5);
237SHIFT (6);
238SHIFT (7);
239SHIFT (8);
240SHIFT (9);
241SHIFT (10);
242SHIFT (11);
243SHIFT (12);
244SHIFT (13);
245SHIFT (14);
246SHIFT (15);
247SHIFT (16);
248SHIFT (17);
249SHIFT (18);
250SHIFT (19);
251SHIFT (20);
252SHIFT (21);
253SHIFT (22);
254SHIFT (23);
255SHIFT (24);
256SHIFT (25);
257SHIFT (26);
258SHIFT (27);
259SHIFT (28);
260SHIFT (29);
261SHIFT (30);
262SHIFT (31);
263#ifndef TILEPRO
264SHIFT (32);
265SHIFT (33);
266SHIFT (34);
267SHIFT (35);
268SHIFT (36);
269SHIFT (37);
270SHIFT (38);
271SHIFT (39);
272SHIFT (40);
273SHIFT (41);
274SHIFT (42);
275SHIFT (43);
276SHIFT (44);
277SHIFT (45);
278SHIFT (46);
279SHIFT (47);
280SHIFT (48);
281SHIFT (49);
282SHIFT (50);
283SHIFT (51);
284SHIFT (52);
285SHIFT (53);
286SHIFT (54);
287SHIFT (55);
288SHIFT (56);
289SHIFT (57);
290SHIFT (58);
291SHIFT (59);
292SHIFT (60);
293SHIFT (61);
294SHIFT (62);
295SHIFT (63);
296#endif
297
298#ifdef TILEPRO
299static Operator ops[] = {
300  Operator ("CODE_FOR_addsi3", "add", add, 1040),
301  Operator ("CODE_FOR_subsi3", "sub", sub, 1041),
302  Operator ("CODE_FOR_insn_s1a", "s1a", s1a, 1042),
303  Operator ("CODE_FOR_insn_s2a", "s2a", s2a, 1043),
304  Operator ("CODE_FOR_insn_s3a", "s3a", s3a, 1044),
305  /* Note: shl by 1 is not necessary, since adding a value to itself
306     produces the same result. But the architecture team thinks
307     left-shift may use slightly less power, so we might as well
308     prefer it.  */
309  Operator ("CODE_FOR_ashlsi3", "shli", shift1, 1, 1001),
310  Operator ("CODE_FOR_ashlsi3", "shli", shift2, 2, 1002),
311  Operator ("CODE_FOR_ashlsi3", "shli", shift3, 3, 1003),
312  Operator ("CODE_FOR_ashlsi3", "shli", shift4, 4, 1004),
313  Operator ("CODE_FOR_ashlsi3", "shli", shift5, 5, 1005),
314  Operator ("CODE_FOR_ashlsi3", "shli", shift6, 6, 1006),
315  Operator ("CODE_FOR_ashlsi3", "shli", shift7, 7, 1007),
316  Operator ("CODE_FOR_ashlsi3", "shli", shift8, 8, 1008),
317  Operator ("CODE_FOR_ashlsi3", "shli", shift9, 9, 1009),
318  Operator ("CODE_FOR_ashlsi3", "shli", shift10, 10, 1010),
319  Operator ("CODE_FOR_ashlsi3", "shli", shift11, 11, 1011),
320  Operator ("CODE_FOR_ashlsi3", "shli", shift12, 12, 1012),
321  Operator ("CODE_FOR_ashlsi3", "shli", shift13, 13, 1013),
322  Operator ("CODE_FOR_ashlsi3", "shli", shift14, 14, 1014),
323  Operator ("CODE_FOR_ashlsi3", "shli", shift15, 15, 1015),
324  Operator ("CODE_FOR_ashlsi3", "shli", shift16, 16, 1016),
325  Operator ("CODE_FOR_ashlsi3", "shli", shift17, 17, 1017),
326  Operator ("CODE_FOR_ashlsi3", "shli", shift18, 18, 1018),
327  Operator ("CODE_FOR_ashlsi3", "shli", shift19, 19, 1019),
328  Operator ("CODE_FOR_ashlsi3", "shli", shift20, 20, 1020),
329  Operator ("CODE_FOR_ashlsi3", "shli", shift21, 21, 1021),
330  Operator ("CODE_FOR_ashlsi3", "shli", shift22, 22, 1022),
331  Operator ("CODE_FOR_ashlsi3", "shli", shift23, 23, 1023),
332  Operator ("CODE_FOR_ashlsi3", "shli", shift24, 24, 1024),
333  Operator ("CODE_FOR_ashlsi3", "shli", shift25, 25, 1025),
334  Operator ("CODE_FOR_ashlsi3", "shli", shift26, 26, 1026),
335  Operator ("CODE_FOR_ashlsi3", "shli", shift27, 27, 1027),
336  Operator ("CODE_FOR_ashlsi3", "shli", shift28, 28, 1028),
337  Operator ("CODE_FOR_ashlsi3", "shli", shift29, 29, 1029),
338  Operator ("CODE_FOR_ashlsi3", "shli", shift30, 30, 1030),
339  Operator ("CODE_FOR_ashlsi3", "shli", shift31, 31, 1031)
340};
341#else
342static Operator ops[] = {
343  Operator ("CODE_FOR_adddi3", "add", add, 1070),
344  Operator ("CODE_FOR_subdi3", "sub", sub, 1071),
345  Operator ("CODE_FOR_insn_shl1add", "shl1add", s1a, 1072),
346  Operator ("CODE_FOR_insn_shl2add", "shl2add", s2a, 1073),
347  Operator ("CODE_FOR_insn_shl3add", "shl3add", s3a, 1074),
348  // Note: shl by 1 is not necessary, since adding a value to itself
349  // produces the same result. But the architecture team thinks left-shift
350  // may use slightly less power, so we might as well prefer it.
351  Operator ("CODE_FOR_ashldi3", "shli", shift1, 1, 1001),
352  Operator ("CODE_FOR_ashldi3", "shli", shift2, 2, 1002),
353  Operator ("CODE_FOR_ashldi3", "shli", shift3, 3, 1003),
354  Operator ("CODE_FOR_ashldi3", "shli", shift4, 4, 1004),
355  Operator ("CODE_FOR_ashldi3", "shli", shift5, 5, 1005),
356  Operator ("CODE_FOR_ashldi3", "shli", shift6, 6, 1006),
357  Operator ("CODE_FOR_ashldi3", "shli", shift7, 7, 1007),
358  Operator ("CODE_FOR_ashldi3", "shli", shift8, 8, 1008),
359  Operator ("CODE_FOR_ashldi3", "shli", shift9, 9, 1009),
360  Operator ("CODE_FOR_ashldi3", "shli", shift10, 10, 1010),
361  Operator ("CODE_FOR_ashldi3", "shli", shift11, 11, 1011),
362  Operator ("CODE_FOR_ashldi3", "shli", shift12, 12, 1012),
363  Operator ("CODE_FOR_ashldi3", "shli", shift13, 13, 1013),
364  Operator ("CODE_FOR_ashldi3", "shli", shift14, 14, 1014),
365  Operator ("CODE_FOR_ashldi3", "shli", shift15, 15, 1015),
366  Operator ("CODE_FOR_ashldi3", "shli", shift16, 16, 1016),
367  Operator ("CODE_FOR_ashldi3", "shli", shift17, 17, 1017),
368  Operator ("CODE_FOR_ashldi3", "shli", shift18, 18, 1018),
369  Operator ("CODE_FOR_ashldi3", "shli", shift19, 19, 1019),
370  Operator ("CODE_FOR_ashldi3", "shli", shift20, 20, 1020),
371  Operator ("CODE_FOR_ashldi3", "shli", shift21, 21, 1021),
372  Operator ("CODE_FOR_ashldi3", "shli", shift22, 22, 1022),
373  Operator ("CODE_FOR_ashldi3", "shli", shift23, 23, 1023),
374  Operator ("CODE_FOR_ashldi3", "shli", shift24, 24, 1024),
375  Operator ("CODE_FOR_ashldi3", "shli", shift25, 25, 1025),
376  Operator ("CODE_FOR_ashldi3", "shli", shift26, 26, 1026),
377  Operator ("CODE_FOR_ashldi3", "shli", shift27, 27, 1027),
378  Operator ("CODE_FOR_ashldi3", "shli", shift28, 28, 1028),
379  Operator ("CODE_FOR_ashldi3", "shli", shift29, 29, 1029),
380  Operator ("CODE_FOR_ashldi3", "shli", shift30, 30, 1030),
381  Operator ("CODE_FOR_ashldi3", "shli", shift31, 31, 1031),
382  Operator ("CODE_FOR_ashldi3", "shli", shift32, 32, 1032),
383  Operator ("CODE_FOR_ashldi3", "shli", shift33, 33, 1033),
384  Operator ("CODE_FOR_ashldi3", "shli", shift34, 34, 1034),
385  Operator ("CODE_FOR_ashldi3", "shli", shift35, 35, 1035),
386  Operator ("CODE_FOR_ashldi3", "shli", shift36, 36, 1036),
387  Operator ("CODE_FOR_ashldi3", "shli", shift37, 37, 1037),
388  Operator ("CODE_FOR_ashldi3", "shli", shift38, 38, 1038),
389  Operator ("CODE_FOR_ashldi3", "shli", shift39, 39, 1039),
390  Operator ("CODE_FOR_ashldi3", "shli", shift40, 40, 1040),
391  Operator ("CODE_FOR_ashldi3", "shli", shift41, 41, 1041),
392  Operator ("CODE_FOR_ashldi3", "shli", shift42, 42, 1042),
393  Operator ("CODE_FOR_ashldi3", "shli", shift43, 43, 1043),
394  Operator ("CODE_FOR_ashldi3", "shli", shift44, 44, 1044),
395  Operator ("CODE_FOR_ashldi3", "shli", shift45, 45, 1045),
396  Operator ("CODE_FOR_ashldi3", "shli", shift46, 46, 1046),
397  Operator ("CODE_FOR_ashldi3", "shli", shift47, 47, 1047),
398  Operator ("CODE_FOR_ashldi3", "shli", shift48, 48, 1048),
399  Operator ("CODE_FOR_ashldi3", "shli", shift49, 49, 1049),
400  Operator ("CODE_FOR_ashldi3", "shli", shift50, 50, 1050),
401  Operator ("CODE_FOR_ashldi3", "shli", shift51, 51, 1051),
402  Operator ("CODE_FOR_ashldi3", "shli", shift52, 52, 1052),
403  Operator ("CODE_FOR_ashldi3", "shli", shift53, 53, 1053),
404  Operator ("CODE_FOR_ashldi3", "shli", shift54, 54, 1054),
405  Operator ("CODE_FOR_ashldi3", "shli", shift55, 55, 1055),
406  Operator ("CODE_FOR_ashldi3", "shli", shift56, 56, 1056),
407  Operator ("CODE_FOR_ashldi3", "shli", shift57, 57, 1057),
408  Operator ("CODE_FOR_ashldi3", "shli", shift58, 58, 1058),
409  Operator ("CODE_FOR_ashldi3", "shli", shift59, 59, 1059),
410  Operator ("CODE_FOR_ashldi3", "shli", shift60, 60, 1060),
411  Operator ("CODE_FOR_ashldi3", "shli", shift61, 61, 1061),
412  Operator ("CODE_FOR_ashldi3", "shli", shift62, 62, 1062),
413  Operator ("CODE_FOR_ashldi3", "shli", shift63, 63, 1063)
414};
415#endif
416
417/* An ExpressionTree is an expression DAG.  */
418class ExpressionTree
419{
420public:
421  ExpressionTree () : m_num_vals (2)
422  {
423    m_exprs[0].m_produced_val = 0;
424    m_exprs[1].m_produced_val = 1;
425  }
426
427  void copy_technique_from (ExpressionTree * other)
428  {
429    /* TODO: write this; other can compute the same products with less
430       cost.  We update this ExpressionTree object because some int is
431       already mapped to it.  */
432  }
433
434  int m_num_vals;
435  Expr m_exprs[MAX_SUBEXPRS];
436
437  int cost () const
438  {
439    int cost = 0;
440    for (int j = 2; j < m_num_vals; j++)
441        cost += m_exprs[j].m_op->m_cost;
442      return cost + m_exprs[m_num_vals - 1].m_critical_path_length * 1000000;
443  }
444};
445
446
447typedef std::map<MUL_TYPE, ExpressionTree *> ExpressionTreeMap;
448
449
450static void
451find_sequences (ExpressionTree &s, ExpressionTreeMap &best_solution)
452{
453  /* Don't look more if we can't add any new values to the expression
454     tree.  */
455  const int num_vals = s.m_num_vals;
456  if (num_vals == MAX_SUBEXPRS)
457    return;
458
459  /* Grow array to make room for new values added as we loop.  */
460  s.m_num_vals = num_vals + 1;
461
462  const Operator *const prev_op = s.m_exprs[num_vals - 1].m_op;
463  const int prev_top_index = (prev_op != NULL) ? prev_op->m_top_index : -1;
464
465  for (size_t f = 0; f < sizeof ops / sizeof ops[0]; f++)
466    {
467      const Operator *const op = &ops[f];
468
469      for (int i = 0; i < num_vals; i++)
470	{
471	  /* Only allow zero as the first operand to sub, otherwise
472	     it is useless.  */
473	  if (i == 0 && op->m_binary_func != sub)
474	    continue;
475
476	  /* Unary ops don't actually use the second operand, so as a
477	     big hack we trick it into only looping once in the inner
478	     loop.  */
479	  const int j_end = op->is_unary () ? 2 : num_vals;
480
481	  /* We never allow zero as the second operand, as it is
482	     always useless.  */
483	  for (int j = 1; j < j_end; j++)
484	    {
485	      /* Does this expression use the immediately previous
486		 expression?  */
487	      const bool uses_prev_value =
488		(i == num_vals - 1
489		 || (!op->is_unary () && j == num_vals - 1));
490
491	      if (!uses_prev_value)
492		{
493		  /* For search efficiency, prune redundant
494		     instruction orderings.
495
496		     This op does not take the immediately preceding
497		     value as input, which means we could have done it
498		     in the previous slot. If its opcode is less than
499		     the previous instruction's, we'll declare this
500		     instruction order non-canonical and not pursue
501		     this branch of the search.  */
502		  if (op->m_top_index < prev_top_index)
503		    continue;
504		}
505
506	      MUL_TYPE n;
507	      if (op->is_unary ())
508		{
509		  n = op->m_unary_func (s.m_exprs[i].m_produced_val);
510		}
511	      else
512		{
513		  n = op->m_binary_func (s.m_exprs[i].m_produced_val,
514					 s.m_exprs[j].m_produced_val);
515		}
516
517	      bool duplicate = false;
518	      for (int k = 0; k < num_vals; k++)
519		if (n == s.m_exprs[j].m_produced_val)
520		  {
521		    duplicate = true;
522		    break;
523		  }
524
525	      if (duplicate)
526		continue;
527
528	      /* See if we found the best solution for n.  */
529	      Expr *e = &s.m_exprs[num_vals];
530	      e->m_op = op;
531	      e->m_lhs = i;
532	      e->m_rhs = op->is_unary () ? op->m_rhs_if_unary : j;
533	      e->m_produced_val = n;
534	      e->m_critical_path_length =
535		1 + MAX (s.m_exprs[i].m_critical_path_length,
536			 s.m_exprs[j].m_critical_path_length);
537
538	      ExpressionTreeMap::iterator best (best_solution.find (n));
539	      if (best == best_solution.end ()
540		  || (*best).second->cost () > s.cost ())
541		best_solution[n] = new ExpressionTree (s);
542
543	      /* Recurse and find more.  */
544	      find_sequences (s, best_solution);
545	    }
546	}
547    }
548
549  /* Restore old size.  */
550  s.m_num_vals = num_vals;
551}
552
553
554/* For each insn_code used by an operator, choose a compact number so
555   it takes less space in the output data structure. This prints out a
556   lookup table used to map the compactified number back to an
557   insn_code.  */
558static void
559create_insn_code_compression_table ()
560{
561  int next_index = 1;
562
563  /* Entry 0 must hold CODE_FOR_nothing to mean "end of array".  */
564  printf ("const enum insn_code %s_multiply_insn_seq_decode_opcode[] = {\n"
565	  "  CODE_FOR_nothing /* must be first */ ", ARCH);
566
567  for (size_t i = 0; i < sizeof ops / sizeof ops[0]; i++)
568    {
569      Operator *op = &ops[i];
570      int index = -1;
571
572      /* See if some previous Operator was using the same insn_code.
573	 If so, reuse its table entry.  */
574      for (size_t j = 0; j < i; j++)
575	{
576	  Operator *old = &ops[j];
577	  if (strcmp (old->m_pattern, op->m_pattern) == 0)
578	    {
579	      index = old->m_top_index;
580	      break;
581	    }
582	}
583
584      if (index == -1)
585	{
586	  /* We need to make a new entry in the table.  */
587	  printf (",\n  %s", op->m_pattern);
588	  index = next_index++;
589	}
590
591      op->m_top_index = index;
592    }
593
594  printf ("\n};\n\n");
595}
596
597
598/* These are constants we've seen in code, that we want to keep table
599   entries for.  */
600static int multiply_constants_used[] = {
601  -11796480,
602  -27439,
603  -25148,
604  -22820,
605  -21709,
606  -20995,
607  -20284,
608  -20239,
609  -19595,
610  -19447,
611  -19183,
612  -19165,
613  -18730,
614  -17828,
615  -17799,
616  -17237,
617  -17036,
618  -16549,
619  -16423,
620  -16294,
621  -16244,
622  -16069,
623  -15137,
624  -15083,
625  -15038,
626  -14924,
627  -14905,
628  -14752,
629  -14731,
630  -14529,
631  -14273,
632  -14090,
633  -14084,
634  -14043,
635  -13850,
636  -13802,
637  -13631,
638  -13455,
639  -13275,
640  -12879,
641  -12700,
642  -12534,
643  -12399,
644  -12131,
645  -12112,
646  -12054,
647  -12019,
648  -11759,
649  -11585,
650  -11467,
651  -11395,
652  -11295,
653  -11248,
654  -11148,
655  -11116,
656  -11086,
657  -11059,
658  -11018,
659  -10811,
660  -10538,
661  -10258,
662  -10217,
663  -10033,
664  -9766,
665  -9754,
666  -9534,
667  -9527,
668  -9467,
669  -9262,
670  -9232,
671  -9222,
672  -9198,
673  -9191,
674  -9113,
675  -8825,
676  -8756,
677  -8697,
678  -8693,
679  -8565,
680  -8342,
681  -8208,
682  -8200,
683  -8170,
684  -8102,
685  -7770,
686  -7678,
687  -7562,
688  -7376,
689  -7373,
690  -7221,
691  -7121,
692  -6835,
693  -6810,
694  -6626,
695  -6581,
696  -6461,
697  -6278,
698  -6263,
699  -6163,
700  -6029,
701  -5816,
702  -5540,
703  -5461,
704  -5384,
705  -5329,
706  -4985,
707  -4926,
708  -4815,
709  -4788,
710  -4758,
711  -4433,
712  -4229,
713  -4209,
714  -4176,
715  -4104,
716  -4095,
717  -4078,
718  -3941,
719  -3818,
720  -3600,
721  -3474,
722  -3314,
723  -3264,
724  -3196,
725  -3072,
726  -2912,
727  -2836,
728  -2773,
729  -2269,
730  -2184,
731  -2100,
732  -1730,
733  -1512,
734  -1500,
735  -1396,
736  -1344,
737  -1312,
738  -1297,
739  -1059,
740  -1058,
741  1027,
742  1049,
743  1059,
744  1100,
745  1104,
746  1108,
747  1136,
748  1200,
749  1204,
750  1242,
751  1292,
752  1304,
753  1312,
754  1320,
755  1336,
756  1344,
757  1348,
758  1360,
759  1364,
760  1395,
761  1448,
762  1460,
763  1461,
764  1472,
765  1488,
766  1500,
767  1512,
768  1568,
769  1576,
770  1649,
771  1664,
772  1684,
773  1696,
774  1744,
775  1812,
776  1822,
777  1884,
778  1963,
779  1978,
780  2000,
781  2012,
782  2014,
783  2037,
784  2039,
785  2100,
786  2139,
787  2144,
788  2184,
789  2237,
790  2260,
791  2320,
792  2408,
793  2446,
794  2447,
795  2499,
796  2531,
797  2578,
798  2592,
799  2611,
800  2633,
801  2704,
802  2730,
803  2773,
804  2880,
805  2896,
806  2998,
807  3000,
808  3001,
809  3021,
810  3079,
811  3112,
812  3150,
813  3179,
814  3192,
815  3240,
816  3264,
817  3271,
818  3283,
819  3328,
820  3363,
821  3367,
822  3453,
823  3529,
824  3570,
825  3580,
826  3600,
827  3624,
828  3707,
829  3783,
830  3826,
831  3897,
832  3941,
833  3962,
834  3989,
835  4000,
836  4025,
837  4073,
838  4093,
839  4099,
840  4108,
841  4184,
842  4209,
843  4369,
844  4376,
845  4416,
846  4433,
847  4434,
848  4482,
849  4582,
850  4712,
851  4717,
852  4813,
853  4815,
854  4864,
855  5000,
856  5027,
857  5040,
858  5091,
859  5195,
860  5243,
861  5260,
862  5285,
863  5329,
864  5331,
865  5350,
866  5361,
867  5387,
868  5461,
869  5492,
870  5529,
871  5573,
872  5793,
873  5819,
874  5915,
875  5946,
876  5992,
877  6000,
878  6164,
879  6205,
880  6262,
881  6263,
882  6269,
883  6270,
884  6387,
885  6400,
886  6406,
887  6476,
888  6541,
889  6565,
890  6568,
891  6626,
892  6656,
893  6732,
894  6810,
895  6817,
896  6859,
897  7040,
898  7053,
899  7141,
900  7169,
901  7221,
902  7223,
903  7274,
904  7282,
905  7350,
906  7369,
907  7373,
908  7442,
909  7447,
910  7471,
911  7518,
912  7542,
913  7566,
914  7587,
915  7663,
916  7678,
917  7682,
918  7748,
919  7752,
920  7791,
921  8000,
922  8026,
923  8048,
924  8170,
925  8203,
926  8204,
927  8290,
928  8368,
929  8520,
930  8640,
931  8666,
932  8672,
933  8697,
934  8716,
935  8728,
936  8756,
937  8820,
938  8875,
939  8918,
940  8956,
941  9058,
942  9154,
943  9175,
944  9191,
945  9217,
946  9262,
947  9321,
948  9373,
949  9434,
950  9465,
951  9514,
952  9534,
953  9633,
954  9746,
955  9810,
956  9850,
957  9947,
958  9973,
959  10000,
960  10009,
961  10033,
962  10055,
963  10217,
964  10248,
965  10298,
966  10310,
967  10323,
968  10368,
969  10438,
970  10456,
971  10486,
972  10538,
973  10664,
974  10695,
975  10700,
976  10703,
977  10832,
978  10887,
979  10935,
980  10958,
981  11018,
982  11059,
983  11061,
984  11086,
985  11116,
986  11148,
987  11190,
988  11249,
989  11314,
990  11332,
991  11363,
992  11409,
993  11415,
994  11443,
995  11467,
996  11512,
997  11522,
998  11529,
999  11585,
1000  11759,
1001  11768,
1002  11795,
1003  11893,
1004  11997,
1005  12131,
1006  12299,
1007  12536,
1008  12543,
1009  12893,
1010  12945,
1011  12998,
1012  13109,
1013  13213,
1014  13685,
1015  13930,
1016  14023,
1017  14024,
1018  14271,
1019  14564,
1020  14647,
1021  15326,
1022  15850,
1023  15855,
1024  15929,
1025  16000,
1026  16154,
1027  16496,
1028  16807,
1029  16819,
1030  16984,
1031  17203,
1032  17223,
1033  17333,
1034  17760,
1035  17799,
1036  17837,
1037  18029,
1038  18068,
1039  18336,
1040  18515,
1041  19595,
1042  20017,
1043  20131,
1044  20862,
1045  20995,
1046  21709,
1047  22554,
1048  25000,
1049  25172,
1050  25600,
1051  25733,
1052  27439,
1053  38470,
1054  46802,
1055  50000,
1056  11796480,
1057  16843009,
1058  23592960,
1059};
1060
1061
1062const int num_mult_constants_used =
1063  (int)(sizeof multiply_constants_used
1064	/ sizeof multiply_constants_used[0]);
1065
1066
1067#define XSIZE (sizeof multiply_constants_used / \
1068	       sizeof multiply_constants_used[0] + 32) / 32
1069unsigned multiply_constants_avail[XSIZE];
1070#undef XSIZE
1071
1072
1073/* bsearch helper function.  */
1074static int
1075compare_constants (const void *key, const void *t)
1076{
1077  return (*(int*)key) - *((int*)t);
1078}
1079
1080
1081static int *
1082find_mult_constants_used (int multiplier)
1083{
1084  return (int *) bsearch (&multiplier, multiply_constants_used,
1085			  num_mult_constants_used,
1086			  sizeof multiply_constants_used[0],
1087			  compare_constants);
1088}
1089
1090
1091int num_ops (ExpressionTree *s)
1092{
1093  int n = 0;
1094  for (int i = 0; i < s->m_num_vals; i++)
1095    {
1096      Expr *e = &s->m_exprs[i];
1097      if (e->m_op != NULL)
1098	n++;
1099    }
1100  return n;
1101}
1102
1103
1104#ifdef TILEPRO
1105bool
1106tilepro_emit (int multiplier, int num_ops)
1107{
1108  int abs_multiplier = (multiplier >= 0) ? multiplier : -multiplier;
1109
1110  /* Keep constants in range [-1024, 1024].  */
1111  if (abs_multiplier <= 1024)
1112    return true;
1113
1114  /* Keep constants near powers of two.  */
1115  int prev_pow2 = 1 << (31 - __builtin_clz (abs_multiplier));
1116  int next_pow2 = prev_pow2 << 1;
1117
1118  if ((abs_multiplier - prev_pow2 <= 10)
1119      || (next_pow2 - abs_multiplier <= 10))
1120    return true;
1121
1122  /* Keep constants near powers of ten.  */
1123  {
1124    long long j = 1;
1125    long long prev_pow10;
1126    long long next_pow10;
1127
1128    while (((j * 10) < abs_multiplier)
1129	   && (j < (j * 10)))
1130      j = j * 10;
1131
1132    prev_pow10 = j;
1133    next_pow10 = j * 10;
1134
1135    if ((abs_multiplier - prev_pow10 <= 10)
1136	|| (next_pow10 - abs_multiplier <= 10))
1137      return true;
1138  }
1139
1140  /* Keep short sequences that have two or fewer ops.  */
1141  if (num_ops <= 2)
1142    return true;
1143
1144  /* Keep constants that are mostly 0's or mostly 1's.  */
1145  if (__builtin_popcount (multiplier) <= 2 ||
1146      __builtin_popcount (multiplier) >= 30)
1147    return true;
1148
1149  /* Keep constants seen in actual code.  */
1150  if ((find_mult_constants_used (multiplier)))
1151    return true;
1152
1153  return false;
1154}
1155#else
1156bool
1157tilegx_emit (long long multiplier, int num_ops)
1158{
1159  long long abs_multiplier = (multiplier >= 0) ? multiplier : - multiplier;
1160
1161  /* Keep constants in range [-1024, 1024].  */
1162  if (abs_multiplier <= 1024)
1163    return true;
1164
1165  /* Otherwise exclude sequences with four ops.  */
1166  if (num_ops > 3)
1167    return false;
1168
1169  /* Keep constants near powers of two.  */
1170  {
1171    unsigned long long prev_pow2 =
1172      1LL << (63 - __builtin_clzll (abs_multiplier));
1173    unsigned long long next_pow2 = prev_pow2 << 1;
1174
1175    /* handle overflow case. */
1176    if (next_pow2 == 0)
1177      {
1178	if (prev_pow2 - abs_multiplier <= 10)
1179	  return true;
1180      }
1181    else if ((abs_multiplier - prev_pow2 <= 10)
1182	     || (next_pow2 - abs_multiplier <= 10))
1183      return true;
1184  }
1185
1186  /* Keep constants near powers of ten.  */
1187  {
1188    long long j = 1;
1189    long long prev_pow10;
1190    long long next_pow10;
1191
1192    while (((j * 10) < abs_multiplier)
1193	   && (j < (j * 10)))
1194      j = j * 10;
1195
1196    prev_pow10 = j;
1197    next_pow10 = j * 10;
1198
1199    if ((abs_multiplier - prev_pow10 <= 100)
1200	|| (next_pow10
1201	    && (next_pow10 - abs_multiplier <= 100)))
1202      return true;
1203  }
1204
1205  if (num_ops <= 2)
1206    return true;
1207
1208  /* Keep constants that are mostly 0's or mostly 1's.  */
1209  if (__builtin_popcountll (multiplier) <= 2 ||
1210      __builtin_popcountll (multiplier) >= 62)
1211    return true;
1212
1213  /* Keep constants seen in actual code.  */
1214  if (find_mult_constants_used (multiplier))
1215    return true;
1216
1217  return false;
1218}
1219#endif
1220
1221
1222int
1223main ()
1224{
1225  ExpressionTreeMap best_solution;
1226  ExpressionTree s;
1227
1228#ifdef TILEPRO
1229  printf ("/* Constant multiply table for TILEPro.\n");
1230#else
1231  printf ("/* Constant multiply table for TILE-Gx.\n");
1232#endif
1233  printf ("   Copyright (C) 2011-2020 Free Software Foundation, Inc.\n");
1234  printf ("   Contributed by Walter Lee (walt@tilera.com)\n");
1235  printf ("\n");
1236  printf ("   This file is part of GCC.\n");
1237  printf ("\n");
1238  printf ("   GCC is free software; you can redistribute it and/or modify it\n");
1239  printf ("   under the terms of the GNU General Public License as published\n");
1240  printf ("   by the Free Software Foundation; either version 3, or (at your\n");
1241  printf ("   option) any later version.\n");
1242  printf ("\n");
1243  printf ("   GCC is distributed in the hope that it will be useful, but WITHOUT\n");
1244  printf ("   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n");
1245  printf ("   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public\n");
1246  printf ("   License for more details.\n");
1247  printf ("\n");
1248  printf ("   You should have received a copy of the GNU General Public License\n");
1249  printf ("   along with GCC; see the file COPYING3.  If not see\n");
1250  printf ("   <http://www.gnu.org/licenses/>.  */\n");
1251  printf ("\n");
1252  printf ("/* Note this file is auto-generated from gen-mul-tables.cc.\n");
1253  printf ("   Make any required changes there.  */\n");
1254  printf ("\n");
1255  printf ("#include \"config.h\"\n");
1256  printf ("#include \"system.h\"\n");
1257  printf ("#include \"coretypes.h\"\n");
1258  printf ("#include \"backend.h\"\n");
1259  printf ("#include \"rtl.h\"\n");
1260  printf ("#include \"expmed.h\"\n");
1261  printf ("#include \"%s-multiply.h\"\n\n", ARCH);
1262  create_insn_code_compression_table ();
1263
1264  /* Try all combinations of operators and see what constants we
1265     produce.  For each possible constant, record the most efficient
1266     way to generate it.  */
1267  find_sequences (s, best_solution);
1268
1269  printf ("const struct %s_multiply_insn_seq "
1270	  "%s_multiply_insn_seq_table[] = {\n",
1271	  ARCH, ARCH);
1272
1273  const char *comma_separator = "";
1274
1275  ExpressionTreeMap::iterator i (best_solution.begin ());
1276  for (; i != best_solution.end (); ++i)
1277    {
1278      ExpressionTree *s = (*i).second;
1279      const MUL_TYPE n = (*i).first;
1280
1281      if (n == 0 || n == 1)
1282	{
1283	  /* Both of these require zero operations, so these entries
1284	     are bogus and should never be used.  */
1285	  continue;
1286	}
1287
1288      /* Prune the list of entries to keep the table to a reasonable
1289	 size.  */
1290#ifdef TILEPRO
1291      if (!tilepro_emit (n, num_ops (s)))
1292	continue;
1293#else
1294      if (!tilegx_emit (n, num_ops (s)))
1295	continue;
1296#endif
1297
1298      printf ("%s", comma_separator);
1299
1300#ifdef TILEPRO
1301      const MUL_TYPE int_min = INT32_MIN;
1302#else
1303      const MUL_TYPE int_min = INT64_MIN;
1304#endif
1305      if (n == int_min)
1306	{
1307	  /* Handle C's woeful lack of unary negation. Without this,
1308	     printing out INT_MIN in decimal will yield an unsigned
1309	     int which could generate a compiler warning.  */
1310#ifdef TILEPRO
1311	  printf ("  {%d - 1 /* 0x%x */ ,\n   {", n + 1,
1312		  (unsigned) n);
1313#else
1314	  printf ("  {%lldll - 1 /* 0x%llx */ ,\n   {", n + 1,
1315		  (unsigned MUL_TYPE) n);
1316#endif
1317	}
1318      else
1319	{
1320#ifdef TILEPRO
1321	  printf ("  {%d /* 0x%x */ ,\n   {", n, (unsigned) n);
1322#else
1323	  printf ("  {%lldll /* 0x%llx */ ,\n   {", n, (unsigned MUL_TYPE) n);
1324#endif
1325	}
1326
1327      bool first = true;
1328      for (int j = 0; j < s->m_num_vals; j++)
1329	{
1330	  Expr *e = &s->m_exprs[j];
1331
1332	  const Operator *op = e->m_op;
1333	  if (op == NULL)
1334	    continue;
1335
1336	  char buf[1024];
1337	  snprintf (buf, sizeof buf, "%s{%d, %d, %d}%s",
1338		    first ? "" : "    ",
1339		    op->m_top_index,
1340		    e->m_lhs, e->m_rhs, (j + 1) == s->m_num_vals ? "}" : ",");
1341
1342	  char opnd0[10];
1343	  if (e->m_lhs)
1344	    snprintf (opnd0, sizeof opnd0, "r%d", e->m_lhs);
1345	  else
1346	    snprintf (opnd0, sizeof opnd0, "zero");
1347
1348	  printf ("%s\t\t\t/* %s r%d, %s, %s%d */\n",
1349		  buf, op->m_name, j, opnd0,
1350		  op->is_unary () ? "" : "r", e->m_rhs);
1351
1352	  first = false;
1353	}
1354      printf ("   }");
1355      comma_separator = ",\n";
1356    }
1357
1358  printf ("\n};\n\n");
1359  printf ("const int %s_multiply_insn_seq_table_size =\n"
1360	  "  (int) (sizeof %s_multiply_insn_seq_table\n"
1361	  "         / sizeof %s_multiply_insn_seq_table[0]);\n",
1362	  ARCH, ARCH, ARCH);
1363
1364  return EXIT_SUCCESS;
1365}
1366