1/* Multiply table generator for tile. 2 Copyright (C) 2011-2020 Free Software Foundation, Inc. 3 Contributed by Walter Lee (walt@tilera.com) 4 5 This file is part of GCC. 6 7 GCC is free software; you can redistribute it and/or modify it 8 under the terms of the GNU General Public License as published 9 by the Free Software Foundation; either version 3, or (at your 10 option) any later version. 11 12 GCC is distributed in the hope that it will be useful, but WITHOUT 13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 15 License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with GCC; see the file COPYING3. If not see 19 <http://www.gnu.org/licenses/>. */ 20 21/* This program creates a table used to compile multiply by constant 22 efficiently. 23 24 This program should be compiled by a c++ compiler. If it's 25 compiled with -DTILEPRO, it generates the multiply table for 26 TILEPro; otherwise it generates the multiply table for TILE-Gx. 27 Running the program produces the table in stdout. 28 29 The program works by generating every possible combination of up to 30 MAX_INSTRUCTIONS linear operators (such as add, sub, s2a, left 31 shift) and computing the multiplier computed by those instructions. 32 For example, 33 34 s2a r2,r1,r1 35 s2a r3,r2,r2 36 37 multiplies r1 by 25. 38 39 There are usually multiple instruction sequences to multiply by a 40 given constant. This program keeps only the cheapest one. 41 "Cheapest" is defined first by the minimum theoretical schedule 42 length, and if those are equal then by the number of instructions, 43 and if those are equal then by which instructions we "prefer" 44 (e.g. because we think one might take infinitesimally less power 45 than another, or simply because we arbitrarily pick one to be more 46 canonical). 47 48 Once this program has determined the best instruction sequence for 49 each multiplier, it emits them in a table sorted by the multiplier 50 so the user can binary-search it to look for a match. The table is 51 pruned based on various criteria to keep its sizes reasonable. */ 52 53#include <stdio.h> 54#include <stdlib.h> 55#include <assert.h> 56#include <string.h> 57#define __STDC_LIMIT_MACROS 58#include <stdint.h> 59 60#include <map> 61 62#ifdef TILEPRO 63 64/* The string representing the architecture. */ 65#define ARCH "tilepro" 66 67/* The type for the multiplication. */ 68typedef int MUL_TYPE; 69 70#else 71 72/* The string representing the architecture. */ 73#define ARCH "tilegx" 74 75/* The type for the multiplication. */ 76typedef long long MUL_TYPE; 77 78#endif 79 80/* Longest instruction sequence this will produce. With the current 81 stupid algorithm runtime grows very quickly with this number. */ 82#define MAX_INSTRUCTIONS 4 83 84/* Maximum number of subexpressions in the expression DAG being 85 generated. This is the same as the number of instructions, except 86 that zero and the original register we'd like to multiply by a 87 constant are also thrown into the mix. */ 88#define MAX_SUBEXPRS (2 + MAX_INSTRUCTIONS) 89 90#define MIN(x, y) ((x) <= (y) ? (x) : (y)) 91#define MAX(x, y) ((x) >= (y) ? (x) : (y)) 92 93/* For this program a unary op is one which has only one nonconstant 94 operand. So shift left by 5 is considered unary. */ 95typedef MUL_TYPE (*unary_op_func) (MUL_TYPE); 96typedef MUL_TYPE (*binary_op_func) (MUL_TYPE, MUL_TYPE); 97 98/* This describes an operation like 'add two registers' or 'left-shift 99 by 7'. 100 101 We call something a 'unary' operator if it only takes in one 102 register as input, even though it might have an implicit second 103 constant operand. Currently this is used for left-shift by 104 constant. */ 105class Operator 106{ 107public: 108 /* Construct for a binary operator. */ 109 Operator (const char *pattern, const char *name, binary_op_func func, 110 int cost) 111 : m_pattern (pattern), m_name (name), m_top_index (-1), 112 m_unary_func (0), m_binary_func (func), m_cost (cost), 113 m_rhs_if_unary (0) 114 { 115 } 116 117 /* Construct for a unary operator. */ 118 Operator (const char *pattern, const char *name, unary_op_func func, 119 int rhs_if_unary, int cost) 120 : m_pattern (pattern), m_name (name), m_top_index (-1), 121 m_unary_func (func), m_binary_func (0), m_cost (cost), 122 m_rhs_if_unary (rhs_if_unary) 123 { 124 } 125 126 bool is_unary () const 127 { 128 return m_binary_func == NULL; 129 } 130 131 /* Name of the pattern for this operation, e.g. CODE_FOR_addsi3. */ 132 const char *m_pattern; 133 134 /* Name of the opcode for this operation, e.g. add. */ 135 const char *m_name; 136 137 /* We don't have enough bits in our output representation to store 138 the original insn_code value, so we store a compressed form 139 instead. These values are decoded back into insn_code via the 140 machine-generated multiply_insn_seq_decode_opcode lookup 141 table. */ 142 int m_top_index; 143 144 /* Unary operator to apply, or NULL if this is a binary operator. */ 145 unary_op_func m_unary_func; 146 147 /* Binary operator to apply, or NULL if this is a unary operator. */ 148 binary_op_func m_binary_func; 149 150 /* Function of how expensive we consider this operator. Higher is 151 worse. */ 152 int m_cost; 153 154 /* the RHS value to write into the C file if unary; used for shift 155 count. */ 156 int m_rhs_if_unary; 157}; 158 159 160/* An entry in an expression DAG. */ 161class Expr 162{ 163public: 164 Expr () : m_op (NULL), m_lhs (0), m_rhs (0), m_produced_val (0), 165 m_critical_path_length (0) 166 { 167 } 168 169 /* The operator being applied to the operands. */ 170 const Operator *m_op; 171 172 /* The index of the left-hand operand in the array of subexpressions 173 already computed. */ 174 int m_lhs; 175 176 /* For binary ops ,this is the index of the left-hand operand in the 177 array of subexpressions already computed. For unary ops, it is 178 specific to the op (e.g. it might hold a constant shift 179 count). */ 180 int m_rhs; 181 182 /* The multiplier produced by this expression tree. For example, for 183 the tree ((x << 5) + x), the value would be 33. */ 184 MUL_TYPE m_produced_val; 185 186 /* How far is this expression from the root, i.e. how many cycles 187 minimum will it take to compute this? */ 188 int m_critical_path_length; 189}; 190 191 192/* Make function pointers for the various linear operators we can 193 apply to compute a multiplicative value. */ 194 195static MUL_TYPE 196add (MUL_TYPE n1, MUL_TYPE n2) 197{ 198 return n1 + n2; 199} 200 201static MUL_TYPE 202sub (MUL_TYPE n1, MUL_TYPE n2) 203{ 204 return n1 - n2; 205} 206 207static MUL_TYPE 208s1a (MUL_TYPE n1, MUL_TYPE n2) 209{ 210 return n1 * 2 + n2; 211} 212 213static MUL_TYPE 214s2a (MUL_TYPE n1, MUL_TYPE n2) 215{ 216 return n1 * 4 + n2; 217} 218 219static MUL_TYPE 220s3a (MUL_TYPE n1, MUL_TYPE n2) 221{ 222 return n1 * 8 + n2; 223} 224 225#define SHIFT(count) \ 226static MUL_TYPE \ 227shift##count(MUL_TYPE n) \ 228{ \ 229 return n << (count); \ 230} 231 232SHIFT (1); 233SHIFT (2); 234SHIFT (3); 235SHIFT (4); 236SHIFT (5); 237SHIFT (6); 238SHIFT (7); 239SHIFT (8); 240SHIFT (9); 241SHIFT (10); 242SHIFT (11); 243SHIFT (12); 244SHIFT (13); 245SHIFT (14); 246SHIFT (15); 247SHIFT (16); 248SHIFT (17); 249SHIFT (18); 250SHIFT (19); 251SHIFT (20); 252SHIFT (21); 253SHIFT (22); 254SHIFT (23); 255SHIFT (24); 256SHIFT (25); 257SHIFT (26); 258SHIFT (27); 259SHIFT (28); 260SHIFT (29); 261SHIFT (30); 262SHIFT (31); 263#ifndef TILEPRO 264SHIFT (32); 265SHIFT (33); 266SHIFT (34); 267SHIFT (35); 268SHIFT (36); 269SHIFT (37); 270SHIFT (38); 271SHIFT (39); 272SHIFT (40); 273SHIFT (41); 274SHIFT (42); 275SHIFT (43); 276SHIFT (44); 277SHIFT (45); 278SHIFT (46); 279SHIFT (47); 280SHIFT (48); 281SHIFT (49); 282SHIFT (50); 283SHIFT (51); 284SHIFT (52); 285SHIFT (53); 286SHIFT (54); 287SHIFT (55); 288SHIFT (56); 289SHIFT (57); 290SHIFT (58); 291SHIFT (59); 292SHIFT (60); 293SHIFT (61); 294SHIFT (62); 295SHIFT (63); 296#endif 297 298#ifdef TILEPRO 299static Operator ops[] = { 300 Operator ("CODE_FOR_addsi3", "add", add, 1040), 301 Operator ("CODE_FOR_subsi3", "sub", sub, 1041), 302 Operator ("CODE_FOR_insn_s1a", "s1a", s1a, 1042), 303 Operator ("CODE_FOR_insn_s2a", "s2a", s2a, 1043), 304 Operator ("CODE_FOR_insn_s3a", "s3a", s3a, 1044), 305 /* Note: shl by 1 is not necessary, since adding a value to itself 306 produces the same result. But the architecture team thinks 307 left-shift may use slightly less power, so we might as well 308 prefer it. */ 309 Operator ("CODE_FOR_ashlsi3", "shli", shift1, 1, 1001), 310 Operator ("CODE_FOR_ashlsi3", "shli", shift2, 2, 1002), 311 Operator ("CODE_FOR_ashlsi3", "shli", shift3, 3, 1003), 312 Operator ("CODE_FOR_ashlsi3", "shli", shift4, 4, 1004), 313 Operator ("CODE_FOR_ashlsi3", "shli", shift5, 5, 1005), 314 Operator ("CODE_FOR_ashlsi3", "shli", shift6, 6, 1006), 315 Operator ("CODE_FOR_ashlsi3", "shli", shift7, 7, 1007), 316 Operator ("CODE_FOR_ashlsi3", "shli", shift8, 8, 1008), 317 Operator ("CODE_FOR_ashlsi3", "shli", shift9, 9, 1009), 318 Operator ("CODE_FOR_ashlsi3", "shli", shift10, 10, 1010), 319 Operator ("CODE_FOR_ashlsi3", "shli", shift11, 11, 1011), 320 Operator ("CODE_FOR_ashlsi3", "shli", shift12, 12, 1012), 321 Operator ("CODE_FOR_ashlsi3", "shli", shift13, 13, 1013), 322 Operator ("CODE_FOR_ashlsi3", "shli", shift14, 14, 1014), 323 Operator ("CODE_FOR_ashlsi3", "shli", shift15, 15, 1015), 324 Operator ("CODE_FOR_ashlsi3", "shli", shift16, 16, 1016), 325 Operator ("CODE_FOR_ashlsi3", "shli", shift17, 17, 1017), 326 Operator ("CODE_FOR_ashlsi3", "shli", shift18, 18, 1018), 327 Operator ("CODE_FOR_ashlsi3", "shli", shift19, 19, 1019), 328 Operator ("CODE_FOR_ashlsi3", "shli", shift20, 20, 1020), 329 Operator ("CODE_FOR_ashlsi3", "shli", shift21, 21, 1021), 330 Operator ("CODE_FOR_ashlsi3", "shli", shift22, 22, 1022), 331 Operator ("CODE_FOR_ashlsi3", "shli", shift23, 23, 1023), 332 Operator ("CODE_FOR_ashlsi3", "shli", shift24, 24, 1024), 333 Operator ("CODE_FOR_ashlsi3", "shli", shift25, 25, 1025), 334 Operator ("CODE_FOR_ashlsi3", "shli", shift26, 26, 1026), 335 Operator ("CODE_FOR_ashlsi3", "shli", shift27, 27, 1027), 336 Operator ("CODE_FOR_ashlsi3", "shli", shift28, 28, 1028), 337 Operator ("CODE_FOR_ashlsi3", "shli", shift29, 29, 1029), 338 Operator ("CODE_FOR_ashlsi3", "shli", shift30, 30, 1030), 339 Operator ("CODE_FOR_ashlsi3", "shli", shift31, 31, 1031) 340}; 341#else 342static Operator ops[] = { 343 Operator ("CODE_FOR_adddi3", "add", add, 1070), 344 Operator ("CODE_FOR_subdi3", "sub", sub, 1071), 345 Operator ("CODE_FOR_insn_shl1add", "shl1add", s1a, 1072), 346 Operator ("CODE_FOR_insn_shl2add", "shl2add", s2a, 1073), 347 Operator ("CODE_FOR_insn_shl3add", "shl3add", s3a, 1074), 348 // Note: shl by 1 is not necessary, since adding a value to itself 349 // produces the same result. But the architecture team thinks left-shift 350 // may use slightly less power, so we might as well prefer it. 351 Operator ("CODE_FOR_ashldi3", "shli", shift1, 1, 1001), 352 Operator ("CODE_FOR_ashldi3", "shli", shift2, 2, 1002), 353 Operator ("CODE_FOR_ashldi3", "shli", shift3, 3, 1003), 354 Operator ("CODE_FOR_ashldi3", "shli", shift4, 4, 1004), 355 Operator ("CODE_FOR_ashldi3", "shli", shift5, 5, 1005), 356 Operator ("CODE_FOR_ashldi3", "shli", shift6, 6, 1006), 357 Operator ("CODE_FOR_ashldi3", "shli", shift7, 7, 1007), 358 Operator ("CODE_FOR_ashldi3", "shli", shift8, 8, 1008), 359 Operator ("CODE_FOR_ashldi3", "shli", shift9, 9, 1009), 360 Operator ("CODE_FOR_ashldi3", "shli", shift10, 10, 1010), 361 Operator ("CODE_FOR_ashldi3", "shli", shift11, 11, 1011), 362 Operator ("CODE_FOR_ashldi3", "shli", shift12, 12, 1012), 363 Operator ("CODE_FOR_ashldi3", "shli", shift13, 13, 1013), 364 Operator ("CODE_FOR_ashldi3", "shli", shift14, 14, 1014), 365 Operator ("CODE_FOR_ashldi3", "shli", shift15, 15, 1015), 366 Operator ("CODE_FOR_ashldi3", "shli", shift16, 16, 1016), 367 Operator ("CODE_FOR_ashldi3", "shli", shift17, 17, 1017), 368 Operator ("CODE_FOR_ashldi3", "shli", shift18, 18, 1018), 369 Operator ("CODE_FOR_ashldi3", "shli", shift19, 19, 1019), 370 Operator ("CODE_FOR_ashldi3", "shli", shift20, 20, 1020), 371 Operator ("CODE_FOR_ashldi3", "shli", shift21, 21, 1021), 372 Operator ("CODE_FOR_ashldi3", "shli", shift22, 22, 1022), 373 Operator ("CODE_FOR_ashldi3", "shli", shift23, 23, 1023), 374 Operator ("CODE_FOR_ashldi3", "shli", shift24, 24, 1024), 375 Operator ("CODE_FOR_ashldi3", "shli", shift25, 25, 1025), 376 Operator ("CODE_FOR_ashldi3", "shli", shift26, 26, 1026), 377 Operator ("CODE_FOR_ashldi3", "shli", shift27, 27, 1027), 378 Operator ("CODE_FOR_ashldi3", "shli", shift28, 28, 1028), 379 Operator ("CODE_FOR_ashldi3", "shli", shift29, 29, 1029), 380 Operator ("CODE_FOR_ashldi3", "shli", shift30, 30, 1030), 381 Operator ("CODE_FOR_ashldi3", "shli", shift31, 31, 1031), 382 Operator ("CODE_FOR_ashldi3", "shli", shift32, 32, 1032), 383 Operator ("CODE_FOR_ashldi3", "shli", shift33, 33, 1033), 384 Operator ("CODE_FOR_ashldi3", "shli", shift34, 34, 1034), 385 Operator ("CODE_FOR_ashldi3", "shli", shift35, 35, 1035), 386 Operator ("CODE_FOR_ashldi3", "shli", shift36, 36, 1036), 387 Operator ("CODE_FOR_ashldi3", "shli", shift37, 37, 1037), 388 Operator ("CODE_FOR_ashldi3", "shli", shift38, 38, 1038), 389 Operator ("CODE_FOR_ashldi3", "shli", shift39, 39, 1039), 390 Operator ("CODE_FOR_ashldi3", "shli", shift40, 40, 1040), 391 Operator ("CODE_FOR_ashldi3", "shli", shift41, 41, 1041), 392 Operator ("CODE_FOR_ashldi3", "shli", shift42, 42, 1042), 393 Operator ("CODE_FOR_ashldi3", "shli", shift43, 43, 1043), 394 Operator ("CODE_FOR_ashldi3", "shli", shift44, 44, 1044), 395 Operator ("CODE_FOR_ashldi3", "shli", shift45, 45, 1045), 396 Operator ("CODE_FOR_ashldi3", "shli", shift46, 46, 1046), 397 Operator ("CODE_FOR_ashldi3", "shli", shift47, 47, 1047), 398 Operator ("CODE_FOR_ashldi3", "shli", shift48, 48, 1048), 399 Operator ("CODE_FOR_ashldi3", "shli", shift49, 49, 1049), 400 Operator ("CODE_FOR_ashldi3", "shli", shift50, 50, 1050), 401 Operator ("CODE_FOR_ashldi3", "shli", shift51, 51, 1051), 402 Operator ("CODE_FOR_ashldi3", "shli", shift52, 52, 1052), 403 Operator ("CODE_FOR_ashldi3", "shli", shift53, 53, 1053), 404 Operator ("CODE_FOR_ashldi3", "shli", shift54, 54, 1054), 405 Operator ("CODE_FOR_ashldi3", "shli", shift55, 55, 1055), 406 Operator ("CODE_FOR_ashldi3", "shli", shift56, 56, 1056), 407 Operator ("CODE_FOR_ashldi3", "shli", shift57, 57, 1057), 408 Operator ("CODE_FOR_ashldi3", "shli", shift58, 58, 1058), 409 Operator ("CODE_FOR_ashldi3", "shli", shift59, 59, 1059), 410 Operator ("CODE_FOR_ashldi3", "shli", shift60, 60, 1060), 411 Operator ("CODE_FOR_ashldi3", "shli", shift61, 61, 1061), 412 Operator ("CODE_FOR_ashldi3", "shli", shift62, 62, 1062), 413 Operator ("CODE_FOR_ashldi3", "shli", shift63, 63, 1063) 414}; 415#endif 416 417/* An ExpressionTree is an expression DAG. */ 418class ExpressionTree 419{ 420public: 421 ExpressionTree () : m_num_vals (2) 422 { 423 m_exprs[0].m_produced_val = 0; 424 m_exprs[1].m_produced_val = 1; 425 } 426 427 void copy_technique_from (ExpressionTree * other) 428 { 429 /* TODO: write this; other can compute the same products with less 430 cost. We update this ExpressionTree object because some int is 431 already mapped to it. */ 432 } 433 434 int m_num_vals; 435 Expr m_exprs[MAX_SUBEXPRS]; 436 437 int cost () const 438 { 439 int cost = 0; 440 for (int j = 2; j < m_num_vals; j++) 441 cost += m_exprs[j].m_op->m_cost; 442 return cost + m_exprs[m_num_vals - 1].m_critical_path_length * 1000000; 443 } 444}; 445 446 447typedef std::map<MUL_TYPE, ExpressionTree *> ExpressionTreeMap; 448 449 450static void 451find_sequences (ExpressionTree &s, ExpressionTreeMap &best_solution) 452{ 453 /* Don't look more if we can't add any new values to the expression 454 tree. */ 455 const int num_vals = s.m_num_vals; 456 if (num_vals == MAX_SUBEXPRS) 457 return; 458 459 /* Grow array to make room for new values added as we loop. */ 460 s.m_num_vals = num_vals + 1; 461 462 const Operator *const prev_op = s.m_exprs[num_vals - 1].m_op; 463 const int prev_top_index = (prev_op != NULL) ? prev_op->m_top_index : -1; 464 465 for (size_t f = 0; f < sizeof ops / sizeof ops[0]; f++) 466 { 467 const Operator *const op = &ops[f]; 468 469 for (int i = 0; i < num_vals; i++) 470 { 471 /* Only allow zero as the first operand to sub, otherwise 472 it is useless. */ 473 if (i == 0 && op->m_binary_func != sub) 474 continue; 475 476 /* Unary ops don't actually use the second operand, so as a 477 big hack we trick it into only looping once in the inner 478 loop. */ 479 const int j_end = op->is_unary () ? 2 : num_vals; 480 481 /* We never allow zero as the second operand, as it is 482 always useless. */ 483 for (int j = 1; j < j_end; j++) 484 { 485 /* Does this expression use the immediately previous 486 expression? */ 487 const bool uses_prev_value = 488 (i == num_vals - 1 489 || (!op->is_unary () && j == num_vals - 1)); 490 491 if (!uses_prev_value) 492 { 493 /* For search efficiency, prune redundant 494 instruction orderings. 495 496 This op does not take the immediately preceding 497 value as input, which means we could have done it 498 in the previous slot. If its opcode is less than 499 the previous instruction's, we'll declare this 500 instruction order non-canonical and not pursue 501 this branch of the search. */ 502 if (op->m_top_index < prev_top_index) 503 continue; 504 } 505 506 MUL_TYPE n; 507 if (op->is_unary ()) 508 { 509 n = op->m_unary_func (s.m_exprs[i].m_produced_val); 510 } 511 else 512 { 513 n = op->m_binary_func (s.m_exprs[i].m_produced_val, 514 s.m_exprs[j].m_produced_val); 515 } 516 517 bool duplicate = false; 518 for (int k = 0; k < num_vals; k++) 519 if (n == s.m_exprs[j].m_produced_val) 520 { 521 duplicate = true; 522 break; 523 } 524 525 if (duplicate) 526 continue; 527 528 /* See if we found the best solution for n. */ 529 Expr *e = &s.m_exprs[num_vals]; 530 e->m_op = op; 531 e->m_lhs = i; 532 e->m_rhs = op->is_unary () ? op->m_rhs_if_unary : j; 533 e->m_produced_val = n; 534 e->m_critical_path_length = 535 1 + MAX (s.m_exprs[i].m_critical_path_length, 536 s.m_exprs[j].m_critical_path_length); 537 538 ExpressionTreeMap::iterator best (best_solution.find (n)); 539 if (best == best_solution.end () 540 || (*best).second->cost () > s.cost ()) 541 best_solution[n] = new ExpressionTree (s); 542 543 /* Recurse and find more. */ 544 find_sequences (s, best_solution); 545 } 546 } 547 } 548 549 /* Restore old size. */ 550 s.m_num_vals = num_vals; 551} 552 553 554/* For each insn_code used by an operator, choose a compact number so 555 it takes less space in the output data structure. This prints out a 556 lookup table used to map the compactified number back to an 557 insn_code. */ 558static void 559create_insn_code_compression_table () 560{ 561 int next_index = 1; 562 563 /* Entry 0 must hold CODE_FOR_nothing to mean "end of array". */ 564 printf ("const enum insn_code %s_multiply_insn_seq_decode_opcode[] = {\n" 565 " CODE_FOR_nothing /* must be first */ ", ARCH); 566 567 for (size_t i = 0; i < sizeof ops / sizeof ops[0]; i++) 568 { 569 Operator *op = &ops[i]; 570 int index = -1; 571 572 /* See if some previous Operator was using the same insn_code. 573 If so, reuse its table entry. */ 574 for (size_t j = 0; j < i; j++) 575 { 576 Operator *old = &ops[j]; 577 if (strcmp (old->m_pattern, op->m_pattern) == 0) 578 { 579 index = old->m_top_index; 580 break; 581 } 582 } 583 584 if (index == -1) 585 { 586 /* We need to make a new entry in the table. */ 587 printf (",\n %s", op->m_pattern); 588 index = next_index++; 589 } 590 591 op->m_top_index = index; 592 } 593 594 printf ("\n};\n\n"); 595} 596 597 598/* These are constants we've seen in code, that we want to keep table 599 entries for. */ 600static int multiply_constants_used[] = { 601 -11796480, 602 -27439, 603 -25148, 604 -22820, 605 -21709, 606 -20995, 607 -20284, 608 -20239, 609 -19595, 610 -19447, 611 -19183, 612 -19165, 613 -18730, 614 -17828, 615 -17799, 616 -17237, 617 -17036, 618 -16549, 619 -16423, 620 -16294, 621 -16244, 622 -16069, 623 -15137, 624 -15083, 625 -15038, 626 -14924, 627 -14905, 628 -14752, 629 -14731, 630 -14529, 631 -14273, 632 -14090, 633 -14084, 634 -14043, 635 -13850, 636 -13802, 637 -13631, 638 -13455, 639 -13275, 640 -12879, 641 -12700, 642 -12534, 643 -12399, 644 -12131, 645 -12112, 646 -12054, 647 -12019, 648 -11759, 649 -11585, 650 -11467, 651 -11395, 652 -11295, 653 -11248, 654 -11148, 655 -11116, 656 -11086, 657 -11059, 658 -11018, 659 -10811, 660 -10538, 661 -10258, 662 -10217, 663 -10033, 664 -9766, 665 -9754, 666 -9534, 667 -9527, 668 -9467, 669 -9262, 670 -9232, 671 -9222, 672 -9198, 673 -9191, 674 -9113, 675 -8825, 676 -8756, 677 -8697, 678 -8693, 679 -8565, 680 -8342, 681 -8208, 682 -8200, 683 -8170, 684 -8102, 685 -7770, 686 -7678, 687 -7562, 688 -7376, 689 -7373, 690 -7221, 691 -7121, 692 -6835, 693 -6810, 694 -6626, 695 -6581, 696 -6461, 697 -6278, 698 -6263, 699 -6163, 700 -6029, 701 -5816, 702 -5540, 703 -5461, 704 -5384, 705 -5329, 706 -4985, 707 -4926, 708 -4815, 709 -4788, 710 -4758, 711 -4433, 712 -4229, 713 -4209, 714 -4176, 715 -4104, 716 -4095, 717 -4078, 718 -3941, 719 -3818, 720 -3600, 721 -3474, 722 -3314, 723 -3264, 724 -3196, 725 -3072, 726 -2912, 727 -2836, 728 -2773, 729 -2269, 730 -2184, 731 -2100, 732 -1730, 733 -1512, 734 -1500, 735 -1396, 736 -1344, 737 -1312, 738 -1297, 739 -1059, 740 -1058, 741 1027, 742 1049, 743 1059, 744 1100, 745 1104, 746 1108, 747 1136, 748 1200, 749 1204, 750 1242, 751 1292, 752 1304, 753 1312, 754 1320, 755 1336, 756 1344, 757 1348, 758 1360, 759 1364, 760 1395, 761 1448, 762 1460, 763 1461, 764 1472, 765 1488, 766 1500, 767 1512, 768 1568, 769 1576, 770 1649, 771 1664, 772 1684, 773 1696, 774 1744, 775 1812, 776 1822, 777 1884, 778 1963, 779 1978, 780 2000, 781 2012, 782 2014, 783 2037, 784 2039, 785 2100, 786 2139, 787 2144, 788 2184, 789 2237, 790 2260, 791 2320, 792 2408, 793 2446, 794 2447, 795 2499, 796 2531, 797 2578, 798 2592, 799 2611, 800 2633, 801 2704, 802 2730, 803 2773, 804 2880, 805 2896, 806 2998, 807 3000, 808 3001, 809 3021, 810 3079, 811 3112, 812 3150, 813 3179, 814 3192, 815 3240, 816 3264, 817 3271, 818 3283, 819 3328, 820 3363, 821 3367, 822 3453, 823 3529, 824 3570, 825 3580, 826 3600, 827 3624, 828 3707, 829 3783, 830 3826, 831 3897, 832 3941, 833 3962, 834 3989, 835 4000, 836 4025, 837 4073, 838 4093, 839 4099, 840 4108, 841 4184, 842 4209, 843 4369, 844 4376, 845 4416, 846 4433, 847 4434, 848 4482, 849 4582, 850 4712, 851 4717, 852 4813, 853 4815, 854 4864, 855 5000, 856 5027, 857 5040, 858 5091, 859 5195, 860 5243, 861 5260, 862 5285, 863 5329, 864 5331, 865 5350, 866 5361, 867 5387, 868 5461, 869 5492, 870 5529, 871 5573, 872 5793, 873 5819, 874 5915, 875 5946, 876 5992, 877 6000, 878 6164, 879 6205, 880 6262, 881 6263, 882 6269, 883 6270, 884 6387, 885 6400, 886 6406, 887 6476, 888 6541, 889 6565, 890 6568, 891 6626, 892 6656, 893 6732, 894 6810, 895 6817, 896 6859, 897 7040, 898 7053, 899 7141, 900 7169, 901 7221, 902 7223, 903 7274, 904 7282, 905 7350, 906 7369, 907 7373, 908 7442, 909 7447, 910 7471, 911 7518, 912 7542, 913 7566, 914 7587, 915 7663, 916 7678, 917 7682, 918 7748, 919 7752, 920 7791, 921 8000, 922 8026, 923 8048, 924 8170, 925 8203, 926 8204, 927 8290, 928 8368, 929 8520, 930 8640, 931 8666, 932 8672, 933 8697, 934 8716, 935 8728, 936 8756, 937 8820, 938 8875, 939 8918, 940 8956, 941 9058, 942 9154, 943 9175, 944 9191, 945 9217, 946 9262, 947 9321, 948 9373, 949 9434, 950 9465, 951 9514, 952 9534, 953 9633, 954 9746, 955 9810, 956 9850, 957 9947, 958 9973, 959 10000, 960 10009, 961 10033, 962 10055, 963 10217, 964 10248, 965 10298, 966 10310, 967 10323, 968 10368, 969 10438, 970 10456, 971 10486, 972 10538, 973 10664, 974 10695, 975 10700, 976 10703, 977 10832, 978 10887, 979 10935, 980 10958, 981 11018, 982 11059, 983 11061, 984 11086, 985 11116, 986 11148, 987 11190, 988 11249, 989 11314, 990 11332, 991 11363, 992 11409, 993 11415, 994 11443, 995 11467, 996 11512, 997 11522, 998 11529, 999 11585, 1000 11759, 1001 11768, 1002 11795, 1003 11893, 1004 11997, 1005 12131, 1006 12299, 1007 12536, 1008 12543, 1009 12893, 1010 12945, 1011 12998, 1012 13109, 1013 13213, 1014 13685, 1015 13930, 1016 14023, 1017 14024, 1018 14271, 1019 14564, 1020 14647, 1021 15326, 1022 15850, 1023 15855, 1024 15929, 1025 16000, 1026 16154, 1027 16496, 1028 16807, 1029 16819, 1030 16984, 1031 17203, 1032 17223, 1033 17333, 1034 17760, 1035 17799, 1036 17837, 1037 18029, 1038 18068, 1039 18336, 1040 18515, 1041 19595, 1042 20017, 1043 20131, 1044 20862, 1045 20995, 1046 21709, 1047 22554, 1048 25000, 1049 25172, 1050 25600, 1051 25733, 1052 27439, 1053 38470, 1054 46802, 1055 50000, 1056 11796480, 1057 16843009, 1058 23592960, 1059}; 1060 1061 1062const int num_mult_constants_used = 1063 (int)(sizeof multiply_constants_used 1064 / sizeof multiply_constants_used[0]); 1065 1066 1067#define XSIZE (sizeof multiply_constants_used / \ 1068 sizeof multiply_constants_used[0] + 32) / 32 1069unsigned multiply_constants_avail[XSIZE]; 1070#undef XSIZE 1071 1072 1073/* bsearch helper function. */ 1074static int 1075compare_constants (const void *key, const void *t) 1076{ 1077 return (*(int*)key) - *((int*)t); 1078} 1079 1080 1081static int * 1082find_mult_constants_used (int multiplier) 1083{ 1084 return (int *) bsearch (&multiplier, multiply_constants_used, 1085 num_mult_constants_used, 1086 sizeof multiply_constants_used[0], 1087 compare_constants); 1088} 1089 1090 1091int num_ops (ExpressionTree *s) 1092{ 1093 int n = 0; 1094 for (int i = 0; i < s->m_num_vals; i++) 1095 { 1096 Expr *e = &s->m_exprs[i]; 1097 if (e->m_op != NULL) 1098 n++; 1099 } 1100 return n; 1101} 1102 1103 1104#ifdef TILEPRO 1105bool 1106tilepro_emit (int multiplier, int num_ops) 1107{ 1108 int abs_multiplier = (multiplier >= 0) ? multiplier : -multiplier; 1109 1110 /* Keep constants in range [-1024, 1024]. */ 1111 if (abs_multiplier <= 1024) 1112 return true; 1113 1114 /* Keep constants near powers of two. */ 1115 int prev_pow2 = 1 << (31 - __builtin_clz (abs_multiplier)); 1116 int next_pow2 = prev_pow2 << 1; 1117 1118 if ((abs_multiplier - prev_pow2 <= 10) 1119 || (next_pow2 - abs_multiplier <= 10)) 1120 return true; 1121 1122 /* Keep constants near powers of ten. */ 1123 { 1124 long long j = 1; 1125 long long prev_pow10; 1126 long long next_pow10; 1127 1128 while (((j * 10) < abs_multiplier) 1129 && (j < (j * 10))) 1130 j = j * 10; 1131 1132 prev_pow10 = j; 1133 next_pow10 = j * 10; 1134 1135 if ((abs_multiplier - prev_pow10 <= 10) 1136 || (next_pow10 - abs_multiplier <= 10)) 1137 return true; 1138 } 1139 1140 /* Keep short sequences that have two or fewer ops. */ 1141 if (num_ops <= 2) 1142 return true; 1143 1144 /* Keep constants that are mostly 0's or mostly 1's. */ 1145 if (__builtin_popcount (multiplier) <= 2 || 1146 __builtin_popcount (multiplier) >= 30) 1147 return true; 1148 1149 /* Keep constants seen in actual code. */ 1150 if ((find_mult_constants_used (multiplier))) 1151 return true; 1152 1153 return false; 1154} 1155#else 1156bool 1157tilegx_emit (long long multiplier, int num_ops) 1158{ 1159 long long abs_multiplier = (multiplier >= 0) ? multiplier : - multiplier; 1160 1161 /* Keep constants in range [-1024, 1024]. */ 1162 if (abs_multiplier <= 1024) 1163 return true; 1164 1165 /* Otherwise exclude sequences with four ops. */ 1166 if (num_ops > 3) 1167 return false; 1168 1169 /* Keep constants near powers of two. */ 1170 { 1171 unsigned long long prev_pow2 = 1172 1LL << (63 - __builtin_clzll (abs_multiplier)); 1173 unsigned long long next_pow2 = prev_pow2 << 1; 1174 1175 /* handle overflow case. */ 1176 if (next_pow2 == 0) 1177 { 1178 if (prev_pow2 - abs_multiplier <= 10) 1179 return true; 1180 } 1181 else if ((abs_multiplier - prev_pow2 <= 10) 1182 || (next_pow2 - abs_multiplier <= 10)) 1183 return true; 1184 } 1185 1186 /* Keep constants near powers of ten. */ 1187 { 1188 long long j = 1; 1189 long long prev_pow10; 1190 long long next_pow10; 1191 1192 while (((j * 10) < abs_multiplier) 1193 && (j < (j * 10))) 1194 j = j * 10; 1195 1196 prev_pow10 = j; 1197 next_pow10 = j * 10; 1198 1199 if ((abs_multiplier - prev_pow10 <= 100) 1200 || (next_pow10 1201 && (next_pow10 - abs_multiplier <= 100))) 1202 return true; 1203 } 1204 1205 if (num_ops <= 2) 1206 return true; 1207 1208 /* Keep constants that are mostly 0's or mostly 1's. */ 1209 if (__builtin_popcountll (multiplier) <= 2 || 1210 __builtin_popcountll (multiplier) >= 62) 1211 return true; 1212 1213 /* Keep constants seen in actual code. */ 1214 if (find_mult_constants_used (multiplier)) 1215 return true; 1216 1217 return false; 1218} 1219#endif 1220 1221 1222int 1223main () 1224{ 1225 ExpressionTreeMap best_solution; 1226 ExpressionTree s; 1227 1228#ifdef TILEPRO 1229 printf ("/* Constant multiply table for TILEPro.\n"); 1230#else 1231 printf ("/* Constant multiply table for TILE-Gx.\n"); 1232#endif 1233 printf (" Copyright (C) 2011-2020 Free Software Foundation, Inc.\n"); 1234 printf (" Contributed by Walter Lee (walt@tilera.com)\n"); 1235 printf ("\n"); 1236 printf (" This file is part of GCC.\n"); 1237 printf ("\n"); 1238 printf (" GCC is free software; you can redistribute it and/or modify it\n"); 1239 printf (" under the terms of the GNU General Public License as published\n"); 1240 printf (" by the Free Software Foundation; either version 3, or (at your\n"); 1241 printf (" option) any later version.\n"); 1242 printf ("\n"); 1243 printf (" GCC is distributed in the hope that it will be useful, but WITHOUT\n"); 1244 printf (" ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n"); 1245 printf (" or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public\n"); 1246 printf (" License for more details.\n"); 1247 printf ("\n"); 1248 printf (" You should have received a copy of the GNU General Public License\n"); 1249 printf (" along with GCC; see the file COPYING3. If not see\n"); 1250 printf (" <http://www.gnu.org/licenses/>. */\n"); 1251 printf ("\n"); 1252 printf ("/* Note this file is auto-generated from gen-mul-tables.cc.\n"); 1253 printf (" Make any required changes there. */\n"); 1254 printf ("\n"); 1255 printf ("#include \"config.h\"\n"); 1256 printf ("#include \"system.h\"\n"); 1257 printf ("#include \"coretypes.h\"\n"); 1258 printf ("#include \"backend.h\"\n"); 1259 printf ("#include \"rtl.h\"\n"); 1260 printf ("#include \"expmed.h\"\n"); 1261 printf ("#include \"%s-multiply.h\"\n\n", ARCH); 1262 create_insn_code_compression_table (); 1263 1264 /* Try all combinations of operators and see what constants we 1265 produce. For each possible constant, record the most efficient 1266 way to generate it. */ 1267 find_sequences (s, best_solution); 1268 1269 printf ("const struct %s_multiply_insn_seq " 1270 "%s_multiply_insn_seq_table[] = {\n", 1271 ARCH, ARCH); 1272 1273 const char *comma_separator = ""; 1274 1275 ExpressionTreeMap::iterator i (best_solution.begin ()); 1276 for (; i != best_solution.end (); ++i) 1277 { 1278 ExpressionTree *s = (*i).second; 1279 const MUL_TYPE n = (*i).first; 1280 1281 if (n == 0 || n == 1) 1282 { 1283 /* Both of these require zero operations, so these entries 1284 are bogus and should never be used. */ 1285 continue; 1286 } 1287 1288 /* Prune the list of entries to keep the table to a reasonable 1289 size. */ 1290#ifdef TILEPRO 1291 if (!tilepro_emit (n, num_ops (s))) 1292 continue; 1293#else 1294 if (!tilegx_emit (n, num_ops (s))) 1295 continue; 1296#endif 1297 1298 printf ("%s", comma_separator); 1299 1300#ifdef TILEPRO 1301 const MUL_TYPE int_min = INT32_MIN; 1302#else 1303 const MUL_TYPE int_min = INT64_MIN; 1304#endif 1305 if (n == int_min) 1306 { 1307 /* Handle C's woeful lack of unary negation. Without this, 1308 printing out INT_MIN in decimal will yield an unsigned 1309 int which could generate a compiler warning. */ 1310#ifdef TILEPRO 1311 printf (" {%d - 1 /* 0x%x */ ,\n {", n + 1, 1312 (unsigned) n); 1313#else 1314 printf (" {%lldll - 1 /* 0x%llx */ ,\n {", n + 1, 1315 (unsigned MUL_TYPE) n); 1316#endif 1317 } 1318 else 1319 { 1320#ifdef TILEPRO 1321 printf (" {%d /* 0x%x */ ,\n {", n, (unsigned) n); 1322#else 1323 printf (" {%lldll /* 0x%llx */ ,\n {", n, (unsigned MUL_TYPE) n); 1324#endif 1325 } 1326 1327 bool first = true; 1328 for (int j = 0; j < s->m_num_vals; j++) 1329 { 1330 Expr *e = &s->m_exprs[j]; 1331 1332 const Operator *op = e->m_op; 1333 if (op == NULL) 1334 continue; 1335 1336 char buf[1024]; 1337 snprintf (buf, sizeof buf, "%s{%d, %d, %d}%s", 1338 first ? "" : " ", 1339 op->m_top_index, 1340 e->m_lhs, e->m_rhs, (j + 1) == s->m_num_vals ? "}" : ","); 1341 1342 char opnd0[10]; 1343 if (e->m_lhs) 1344 snprintf (opnd0, sizeof opnd0, "r%d", e->m_lhs); 1345 else 1346 snprintf (opnd0, sizeof opnd0, "zero"); 1347 1348 printf ("%s\t\t\t/* %s r%d, %s, %s%d */\n", 1349 buf, op->m_name, j, opnd0, 1350 op->is_unary () ? "" : "r", e->m_rhs); 1351 1352 first = false; 1353 } 1354 printf (" }"); 1355 comma_separator = ",\n"; 1356 } 1357 1358 printf ("\n};\n\n"); 1359 printf ("const int %s_multiply_insn_seq_table_size =\n" 1360 " (int) (sizeof %s_multiply_insn_seq_table\n" 1361 " / sizeof %s_multiply_insn_seq_table[0]);\n", 1362 ARCH, ARCH, ARCH); 1363 1364 return EXIT_SUCCESS; 1365} 1366