isl_ast_codegen.c revision 1.1.1.1
1/* 2 * Copyright 2012-2014 Ecole Normale Superieure 3 * Copyright 2014 INRIA Rocquencourt 4 * 5 * Use of this software is governed by the MIT license 6 * 7 * Written by Sven Verdoolaege, 8 * Ecole Normale Superieure, 45 rue d���Ulm, 75230 Paris, France 9 * and Inria Paris - Rocquencourt, Domaine de Voluceau - Rocquencourt, 10 * B.P. 105 - 78153 Le Chesnay, France 11 */ 12 13#include <limits.h> 14#include <isl/id.h> 15#include <isl/val.h> 16#include <isl/space.h> 17#include <isl/aff.h> 18#include <isl/constraint.h> 19#include <isl/set.h> 20#include <isl/ilp.h> 21#include <isl/union_set.h> 22#include <isl/union_map.h> 23#include <isl/schedule_node.h> 24#include <isl/options.h> 25#include <isl_sort.h> 26#include <isl_tarjan.h> 27#include <isl_ast_private.h> 28#include <isl_ast_build_expr.h> 29#include <isl_ast_build_private.h> 30#include <isl_ast_graft_private.h> 31 32/* Try and reduce the number of disjuncts in the representation of "set", 33 * without dropping explicit representations of local variables. 34 */ 35static __isl_give isl_set *isl_set_coalesce_preserve(__isl_take isl_set *set) 36{ 37 isl_ctx *ctx; 38 int save_preserve; 39 40 if (!set) 41 return NULL; 42 43 ctx = isl_set_get_ctx(set); 44 save_preserve = isl_options_get_coalesce_preserve_locals(ctx); 45 isl_options_set_coalesce_preserve_locals(ctx, 1); 46 set = isl_set_coalesce(set); 47 isl_options_set_coalesce_preserve_locals(ctx, save_preserve); 48 return set; 49} 50 51/* Data used in generate_domain. 52 * 53 * "build" is the input build. 54 * "list" collects the results. 55 */ 56struct isl_generate_domain_data { 57 isl_ast_build *build; 58 59 isl_ast_graft_list *list; 60}; 61 62static __isl_give isl_ast_graft_list *generate_next_level( 63 __isl_take isl_union_map *executed, 64 __isl_take isl_ast_build *build); 65static __isl_give isl_ast_graft_list *generate_code( 66 __isl_take isl_union_map *executed, __isl_take isl_ast_build *build, 67 int internal); 68 69/* Generate an AST for a single domain based on 70 * the (non single valued) inverse schedule "executed". 71 * 72 * We extend the schedule with the iteration domain 73 * and continue generating through a call to generate_code. 74 * 75 * In particular, if executed has the form 76 * 77 * S -> D 78 * 79 * then we continue generating code on 80 * 81 * [S -> D] -> D 82 * 83 * The extended inverse schedule is clearly single valued 84 * ensuring that the nested generate_code will not reach this function, 85 * but will instead create calls to all elements of D that need 86 * to be executed from the current schedule domain. 87 */ 88static isl_stat generate_non_single_valued(__isl_take isl_map *executed, 89 struct isl_generate_domain_data *data) 90{ 91 isl_map *identity; 92 isl_ast_build *build; 93 isl_ast_graft_list *list; 94 95 build = isl_ast_build_copy(data->build); 96 97 identity = isl_set_identity(isl_map_range(isl_map_copy(executed))); 98 executed = isl_map_domain_product(executed, identity); 99 build = isl_ast_build_set_single_valued(build, 1); 100 101 list = generate_code(isl_union_map_from_map(executed), build, 1); 102 103 data->list = isl_ast_graft_list_concat(data->list, list); 104 105 return isl_stat_ok; 106} 107 108/* Call the at_each_domain callback, if requested by the user, 109 * after recording the current inverse schedule in the build. 110 */ 111static __isl_give isl_ast_graft *at_each_domain(__isl_take isl_ast_graft *graft, 112 __isl_keep isl_map *executed, __isl_keep isl_ast_build *build) 113{ 114 if (!graft || !build) 115 return isl_ast_graft_free(graft); 116 if (!build->at_each_domain) 117 return graft; 118 119 build = isl_ast_build_copy(build); 120 build = isl_ast_build_set_executed(build, 121 isl_union_map_from_map(isl_map_copy(executed))); 122 if (!build) 123 return isl_ast_graft_free(graft); 124 125 graft->node = build->at_each_domain(graft->node, 126 build, build->at_each_domain_user); 127 isl_ast_build_free(build); 128 129 if (!graft->node) 130 graft = isl_ast_graft_free(graft); 131 132 return graft; 133} 134 135/* Generate a call expression for the single executed 136 * domain element "map" and put a guard around it based its (simplified) 137 * domain. "executed" is the original inverse schedule from which "map" 138 * has been derived. In particular, "map" is either identical to "executed" 139 * or it is the result of gisting "executed" with respect to the build domain. 140 * "executed" is only used if there is an at_each_domain callback. 141 * 142 * At this stage, any pending constraints in the build can no longer 143 * be simplified with respect to any enforced constraints since 144 * the call node does not have any enforced constraints. 145 * Since all pending constraints not covered by any enforced constraints 146 * will be added as a guard to the graft in create_node_scaled, 147 * even in the eliminated case, the pending constraints 148 * can be considered to have been generated by outer constructs. 149 * 150 * If the user has set an at_each_domain callback, it is called 151 * on the constructed call expression node. 152 */ 153static isl_stat add_domain(__isl_take isl_map *executed, 154 __isl_take isl_map *map, struct isl_generate_domain_data *data) 155{ 156 isl_ast_build *build; 157 isl_ast_graft *graft; 158 isl_ast_graft_list *list; 159 isl_set *guard, *pending; 160 161 build = isl_ast_build_copy(data->build); 162 pending = isl_ast_build_get_pending(build); 163 build = isl_ast_build_replace_pending_by_guard(build, pending); 164 165 guard = isl_map_domain(isl_map_copy(map)); 166 guard = isl_set_compute_divs(guard); 167 guard = isl_set_coalesce_preserve(guard); 168 guard = isl_set_gist(guard, isl_ast_build_get_generated(build)); 169 guard = isl_ast_build_specialize(build, guard); 170 171 graft = isl_ast_graft_alloc_domain(map, build); 172 graft = at_each_domain(graft, executed, build); 173 isl_ast_build_free(build); 174 isl_map_free(executed); 175 graft = isl_ast_graft_add_guard(graft, guard, data->build); 176 177 list = isl_ast_graft_list_from_ast_graft(graft); 178 data->list = isl_ast_graft_list_concat(data->list, list); 179 180 return isl_stat_ok; 181} 182 183/* Generate an AST for a single domain based on 184 * the inverse schedule "executed" and add it to data->list. 185 * 186 * If there is more than one domain element associated to the current 187 * schedule "time", then we need to continue the generation process 188 * in generate_non_single_valued. 189 * Note that the inverse schedule being single-valued may depend 190 * on constraints that are only available in the original context 191 * domain specified by the user. We therefore first introduce 192 * some of the constraints of data->build->domain. In particular, 193 * we intersect with a single-disjunct approximation of this set. 194 * We perform this approximation to avoid further splitting up 195 * the executed relation, possibly introducing a disjunctive guard 196 * on the statement. 197 * 198 * On the other hand, we only perform the test after having taken the gist 199 * of the domain as the resulting map is the one from which the call 200 * expression is constructed. Using this map to construct the call 201 * expression usually yields simpler results in cases where the original 202 * map is not obviously single-valued. 203 * If the original map is obviously single-valued, then the gist 204 * operation is skipped. 205 * 206 * Because we perform the single-valuedness test on the gisted map, 207 * we may in rare cases fail to recognize that the inverse schedule 208 * is single-valued. This becomes problematic if this happens 209 * from the recursive call through generate_non_single_valued 210 * as we would then end up in an infinite recursion. 211 * We therefore check if we are inside a call to generate_non_single_valued 212 * and revert to the ungisted map if the gisted map turns out not to be 213 * single-valued. 214 * 215 * Otherwise, call add_domain to generate a call expression (with guard) and 216 * to call the at_each_domain callback, if any. 217 */ 218static isl_stat generate_domain(__isl_take isl_map *executed, void *user) 219{ 220 struct isl_generate_domain_data *data = user; 221 isl_set *domain; 222 isl_map *map = NULL; 223 int empty, sv; 224 225 domain = isl_ast_build_get_domain(data->build); 226 domain = isl_set_from_basic_set(isl_set_simple_hull(domain)); 227 executed = isl_map_intersect_domain(executed, domain); 228 empty = isl_map_is_empty(executed); 229 if (empty < 0) 230 goto error; 231 if (empty) { 232 isl_map_free(executed); 233 return isl_stat_ok; 234 } 235 236 sv = isl_map_plain_is_single_valued(executed); 237 if (sv < 0) 238 goto error; 239 if (sv) 240 return add_domain(executed, isl_map_copy(executed), data); 241 242 executed = isl_map_coalesce(executed); 243 map = isl_map_copy(executed); 244 map = isl_ast_build_compute_gist_map_domain(data->build, map); 245 sv = isl_map_is_single_valued(map); 246 if (sv < 0) 247 goto error; 248 if (!sv) { 249 isl_map_free(map); 250 if (data->build->single_valued) 251 map = isl_map_copy(executed); 252 else 253 return generate_non_single_valued(executed, data); 254 } 255 256 return add_domain(executed, map, data); 257error: 258 isl_map_free(map); 259 isl_map_free(executed); 260 return isl_stat_error; 261} 262 263/* Call build->create_leaf to a create "leaf" node in the AST, 264 * encapsulate the result in an isl_ast_graft and return the result 265 * as a 1-element list. 266 * 267 * Note that the node returned by the user may be an entire tree. 268 * 269 * Since the node itself cannot enforce any constraints, we turn 270 * all pending constraints into guards and add them to the resulting 271 * graft to ensure that they will be generated. 272 * 273 * Before we pass control to the user, we first clear some information 274 * from the build that is (presumbably) only meaningful 275 * for the current code generation. 276 * This includes the create_leaf callback itself, so we make a copy 277 * of the build first. 278 */ 279static __isl_give isl_ast_graft_list *call_create_leaf( 280 __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) 281{ 282 isl_set *guard; 283 isl_ast_node *node; 284 isl_ast_graft *graft; 285 isl_ast_build *user_build; 286 287 guard = isl_ast_build_get_pending(build); 288 user_build = isl_ast_build_copy(build); 289 user_build = isl_ast_build_replace_pending_by_guard(user_build, 290 isl_set_copy(guard)); 291 user_build = isl_ast_build_set_executed(user_build, executed); 292 user_build = isl_ast_build_clear_local_info(user_build); 293 if (!user_build) 294 node = NULL; 295 else 296 node = build->create_leaf(user_build, build->create_leaf_user); 297 graft = isl_ast_graft_alloc(node, build); 298 graft = isl_ast_graft_add_guard(graft, guard, build); 299 isl_ast_build_free(build); 300 return isl_ast_graft_list_from_ast_graft(graft); 301} 302 303static __isl_give isl_ast_graft_list *build_ast_from_child( 304 __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, 305 __isl_take isl_union_map *executed); 306 307/* Generate an AST after having handled the complete schedule 308 * of this call to the code generator or the complete band 309 * if we are generating an AST from a schedule tree. 310 * 311 * If we are inside a band node, then move on to the child of the band. 312 * 313 * If the user has specified a create_leaf callback, control 314 * is passed to the user in call_create_leaf. 315 * 316 * Otherwise, we generate one or more calls for each individual 317 * domain in generate_domain. 318 */ 319static __isl_give isl_ast_graft_list *generate_inner_level( 320 __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) 321{ 322 isl_ctx *ctx; 323 struct isl_generate_domain_data data = { build }; 324 325 if (!build || !executed) 326 goto error; 327 328 if (isl_ast_build_has_schedule_node(build)) { 329 isl_schedule_node *node; 330 node = isl_ast_build_get_schedule_node(build); 331 build = isl_ast_build_reset_schedule_node(build); 332 return build_ast_from_child(build, node, executed); 333 } 334 335 if (build->create_leaf) 336 return call_create_leaf(executed, build); 337 338 ctx = isl_union_map_get_ctx(executed); 339 data.list = isl_ast_graft_list_alloc(ctx, 0); 340 if (isl_union_map_foreach_map(executed, &generate_domain, &data) < 0) 341 data.list = isl_ast_graft_list_free(data.list); 342 343 if (0) 344error: data.list = NULL; 345 isl_ast_build_free(build); 346 isl_union_map_free(executed); 347 return data.list; 348} 349 350/* Call the before_each_for callback, if requested by the user. 351 */ 352static __isl_give isl_ast_node *before_each_for(__isl_take isl_ast_node *node, 353 __isl_keep isl_ast_build *build) 354{ 355 isl_id *id; 356 357 if (!node || !build) 358 return isl_ast_node_free(node); 359 if (!build->before_each_for) 360 return node; 361 id = build->before_each_for(build, build->before_each_for_user); 362 node = isl_ast_node_set_annotation(node, id); 363 return node; 364} 365 366/* Call the after_each_for callback, if requested by the user. 367 */ 368static __isl_give isl_ast_graft *after_each_for(__isl_take isl_ast_graft *graft, 369 __isl_keep isl_ast_build *build) 370{ 371 if (!graft || !build) 372 return isl_ast_graft_free(graft); 373 if (!build->after_each_for) 374 return graft; 375 graft->node = build->after_each_for(graft->node, build, 376 build->after_each_for_user); 377 if (!graft->node) 378 return isl_ast_graft_free(graft); 379 return graft; 380} 381 382/* Plug in all the know values of the current and outer dimensions 383 * in the domain of "executed". In principle, we only need to plug 384 * in the known value of the current dimension since the values of 385 * outer dimensions have been plugged in already. 386 * However, it turns out to be easier to just plug in all known values. 387 */ 388static __isl_give isl_union_map *plug_in_values( 389 __isl_take isl_union_map *executed, __isl_keep isl_ast_build *build) 390{ 391 return isl_ast_build_substitute_values_union_map_domain(build, 392 executed); 393} 394 395/* Check if the constraint "c" is a lower bound on dimension "pos", 396 * an upper bound, or independent of dimension "pos". 397 */ 398static int constraint_type(isl_constraint *c, int pos) 399{ 400 if (isl_constraint_is_lower_bound(c, isl_dim_set, pos)) 401 return 1; 402 if (isl_constraint_is_upper_bound(c, isl_dim_set, pos)) 403 return 2; 404 return 0; 405} 406 407/* Compare the types of the constraints "a" and "b", 408 * resulting in constraints that are independent of "depth" 409 * to be sorted before the lower bounds on "depth", which in 410 * turn are sorted before the upper bounds on "depth". 411 */ 412static int cmp_constraint(__isl_keep isl_constraint *a, 413 __isl_keep isl_constraint *b, void *user) 414{ 415 int *depth = user; 416 int t1 = constraint_type(a, *depth); 417 int t2 = constraint_type(b, *depth); 418 419 return t1 - t2; 420} 421 422/* Extract a lower bound on dimension "pos" from constraint "c". 423 * 424 * If the constraint is of the form 425 * 426 * a x + f(...) >= 0 427 * 428 * then we essentially return 429 * 430 * l = ceil(-f(...)/a) 431 * 432 * However, if the current dimension is strided, then we need to make 433 * sure that the lower bound we construct is of the form 434 * 435 * f + s a 436 * 437 * with f the offset and s the stride. 438 * We therefore compute 439 * 440 * f + s * ceil((l - f)/s) 441 */ 442static __isl_give isl_aff *lower_bound(__isl_keep isl_constraint *c, 443 int pos, __isl_keep isl_ast_build *build) 444{ 445 isl_aff *aff; 446 447 aff = isl_constraint_get_bound(c, isl_dim_set, pos); 448 aff = isl_aff_ceil(aff); 449 450 if (isl_ast_build_has_stride(build, pos)) { 451 isl_aff *offset; 452 isl_val *stride; 453 454 offset = isl_ast_build_get_offset(build, pos); 455 stride = isl_ast_build_get_stride(build, pos); 456 457 aff = isl_aff_sub(aff, isl_aff_copy(offset)); 458 aff = isl_aff_scale_down_val(aff, isl_val_copy(stride)); 459 aff = isl_aff_ceil(aff); 460 aff = isl_aff_scale_val(aff, stride); 461 aff = isl_aff_add(aff, offset); 462 } 463 464 aff = isl_ast_build_compute_gist_aff(build, aff); 465 466 return aff; 467} 468 469/* Return the exact lower bound (or upper bound if "upper" is set) 470 * of "domain" as a piecewise affine expression. 471 * 472 * If we are computing a lower bound (of a strided dimension), then 473 * we need to make sure it is of the form 474 * 475 * f + s a 476 * 477 * where f is the offset and s is the stride. 478 * We therefore need to include the stride constraint before computing 479 * the minimum. 480 */ 481static __isl_give isl_pw_aff *exact_bound(__isl_keep isl_set *domain, 482 __isl_keep isl_ast_build *build, int upper) 483{ 484 isl_set *stride; 485 isl_map *it_map; 486 isl_pw_aff *pa; 487 isl_pw_multi_aff *pma; 488 489 domain = isl_set_copy(domain); 490 if (!upper) { 491 stride = isl_ast_build_get_stride_constraint(build); 492 domain = isl_set_intersect(domain, stride); 493 } 494 it_map = isl_ast_build_map_to_iterator(build, domain); 495 if (upper) 496 pma = isl_map_lexmax_pw_multi_aff(it_map); 497 else 498 pma = isl_map_lexmin_pw_multi_aff(it_map); 499 pa = isl_pw_multi_aff_get_pw_aff(pma, 0); 500 isl_pw_multi_aff_free(pma); 501 pa = isl_ast_build_compute_gist_pw_aff(build, pa); 502 pa = isl_pw_aff_coalesce(pa); 503 504 return pa; 505} 506 507/* Callback for sorting the isl_pw_aff_list passed to reduce_list and 508 * remove_redundant_lower_bounds. 509 */ 510static int reduce_list_cmp(__isl_keep isl_pw_aff *a, __isl_keep isl_pw_aff *b, 511 void *user) 512{ 513 return isl_pw_aff_plain_cmp(a, b); 514} 515 516/* Given a list of lower bounds "list", remove those that are redundant 517 * with respect to the other bounds in "list" and the domain of "build". 518 * 519 * We first sort the bounds in the same way as they would be sorted 520 * by set_for_node_expressions so that we can try and remove the last 521 * bounds first. 522 * 523 * For a lower bound to be effective, there needs to be at least 524 * one domain element for which it is larger than all other lower bounds. 525 * For each lower bound we therefore intersect the domain with 526 * the conditions that it is larger than all other bounds and 527 * check whether the result is empty. If so, the bound can be removed. 528 */ 529static __isl_give isl_pw_aff_list *remove_redundant_lower_bounds( 530 __isl_take isl_pw_aff_list *list, __isl_keep isl_ast_build *build) 531{ 532 int i, j; 533 isl_size n; 534 isl_set *domain; 535 536 list = isl_pw_aff_list_sort(list, &reduce_list_cmp, NULL); 537 538 n = isl_pw_aff_list_n_pw_aff(list); 539 if (n < 0) 540 return isl_pw_aff_list_free(list); 541 if (n <= 1) 542 return list; 543 544 domain = isl_ast_build_get_domain(build); 545 546 for (i = n - 1; i >= 0; --i) { 547 isl_pw_aff *pa_i; 548 isl_set *domain_i; 549 int empty; 550 551 domain_i = isl_set_copy(domain); 552 pa_i = isl_pw_aff_list_get_pw_aff(list, i); 553 554 for (j = 0; j < n; ++j) { 555 isl_pw_aff *pa_j; 556 isl_set *better; 557 558 if (j == i) 559 continue; 560 561 pa_j = isl_pw_aff_list_get_pw_aff(list, j); 562 better = isl_pw_aff_gt_set(isl_pw_aff_copy(pa_i), pa_j); 563 domain_i = isl_set_intersect(domain_i, better); 564 } 565 566 empty = isl_set_is_empty(domain_i); 567 568 isl_set_free(domain_i); 569 isl_pw_aff_free(pa_i); 570 571 if (empty < 0) 572 goto error; 573 if (!empty) 574 continue; 575 list = isl_pw_aff_list_drop(list, i, 1); 576 n--; 577 } 578 579 isl_set_free(domain); 580 581 return list; 582error: 583 isl_set_free(domain); 584 return isl_pw_aff_list_free(list); 585} 586 587/* Extract a lower bound on dimension "pos" from each constraint 588 * in "constraints" and return the list of lower bounds. 589 * If "constraints" has zero elements, then we extract a lower bound 590 * from "domain" instead. 591 * 592 * If the current dimension is strided, then the lower bound 593 * is adjusted by lower_bound to match the stride information. 594 * This modification may make one or more lower bounds redundant 595 * with respect to the other lower bounds. We therefore check 596 * for this condition and remove the redundant lower bounds. 597 */ 598static __isl_give isl_pw_aff_list *lower_bounds( 599 __isl_keep isl_constraint_list *constraints, int pos, 600 __isl_keep isl_set *domain, __isl_keep isl_ast_build *build) 601{ 602 isl_ctx *ctx; 603 isl_pw_aff_list *list; 604 int i; 605 isl_size n; 606 607 if (!build) 608 return NULL; 609 610 n = isl_constraint_list_n_constraint(constraints); 611 if (n < 0) 612 return NULL; 613 if (n == 0) { 614 isl_pw_aff *pa; 615 pa = exact_bound(domain, build, 0); 616 return isl_pw_aff_list_from_pw_aff(pa); 617 } 618 619 ctx = isl_ast_build_get_ctx(build); 620 list = isl_pw_aff_list_alloc(ctx,n); 621 622 for (i = 0; i < n; ++i) { 623 isl_aff *aff; 624 isl_constraint *c; 625 626 c = isl_constraint_list_get_constraint(constraints, i); 627 aff = lower_bound(c, pos, build); 628 isl_constraint_free(c); 629 list = isl_pw_aff_list_add(list, isl_pw_aff_from_aff(aff)); 630 } 631 632 if (isl_ast_build_has_stride(build, pos)) 633 list = remove_redundant_lower_bounds(list, build); 634 635 return list; 636} 637 638/* Extract an upper bound on dimension "pos" from each constraint 639 * in "constraints" and return the list of upper bounds. 640 * If "constraints" has zero elements, then we extract an upper bound 641 * from "domain" instead. 642 */ 643static __isl_give isl_pw_aff_list *upper_bounds( 644 __isl_keep isl_constraint_list *constraints, int pos, 645 __isl_keep isl_set *domain, __isl_keep isl_ast_build *build) 646{ 647 isl_ctx *ctx; 648 isl_pw_aff_list *list; 649 int i; 650 isl_size n; 651 652 n = isl_constraint_list_n_constraint(constraints); 653 if (n < 0) 654 return NULL; 655 if (n == 0) { 656 isl_pw_aff *pa; 657 pa = exact_bound(domain, build, 1); 658 return isl_pw_aff_list_from_pw_aff(pa); 659 } 660 661 ctx = isl_ast_build_get_ctx(build); 662 list = isl_pw_aff_list_alloc(ctx,n); 663 664 for (i = 0; i < n; ++i) { 665 isl_aff *aff; 666 isl_constraint *c; 667 668 c = isl_constraint_list_get_constraint(constraints, i); 669 aff = isl_constraint_get_bound(c, isl_dim_set, pos); 670 isl_constraint_free(c); 671 aff = isl_aff_floor(aff); 672 list = isl_pw_aff_list_add(list, isl_pw_aff_from_aff(aff)); 673 } 674 675 return list; 676} 677 678/* Return an isl_ast_expr that performs the reduction of type "type" 679 * on AST expressions corresponding to the elements in "list". 680 * 681 * The list is assumed to contain at least one element. 682 * If the list contains exactly one element, then the returned isl_ast_expr 683 * simply computes that affine expression. 684 * If the list contains more than one element, then we sort it 685 * using a fairly arbitrary but hopefully reasonably stable order. 686 */ 687static __isl_give isl_ast_expr *reduce_list(enum isl_ast_expr_op_type type, 688 __isl_keep isl_pw_aff_list *list, __isl_keep isl_ast_build *build) 689{ 690 int i; 691 isl_size n; 692 isl_ctx *ctx; 693 isl_ast_expr *expr; 694 695 n = isl_pw_aff_list_n_pw_aff(list); 696 if (n < 0) 697 return NULL; 698 699 if (n == 1) 700 return isl_ast_build_expr_from_pw_aff_internal(build, 701 isl_pw_aff_list_get_pw_aff(list, 0)); 702 703 ctx = isl_pw_aff_list_get_ctx(list); 704 expr = isl_ast_expr_alloc_op(ctx, type, n); 705 706 list = isl_pw_aff_list_copy(list); 707 list = isl_pw_aff_list_sort(list, &reduce_list_cmp, NULL); 708 if (!list) 709 return isl_ast_expr_free(expr); 710 711 for (i = 0; i < n; ++i) { 712 isl_ast_expr *expr_i; 713 714 expr_i = isl_ast_build_expr_from_pw_aff_internal(build, 715 isl_pw_aff_list_get_pw_aff(list, i)); 716 expr = isl_ast_expr_op_add_arg(expr, expr_i); 717 } 718 719 isl_pw_aff_list_free(list); 720 return expr; 721} 722 723/* Add guards implied by the "generated constraints", 724 * but not (necessarily) enforced by the generated AST to "guard". 725 * In particular, if there is any stride constraints, 726 * then add the guard implied by those constraints. 727 * If we have generated a degenerate loop, then add the guard 728 * implied by "bounds" on the outer dimensions, i.e., the guard 729 * that ensures that the single value actually exists. 730 * Since there may also be guards implied by a combination 731 * of these constraints, we first combine them before 732 * deriving the implied constraints. 733 */ 734static __isl_give isl_set *add_implied_guards(__isl_take isl_set *guard, 735 int degenerate, __isl_keep isl_basic_set *bounds, 736 __isl_keep isl_ast_build *build) 737{ 738 isl_size depth; 739 isl_bool has_stride; 740 isl_space *space; 741 isl_set *dom, *set; 742 743 depth = isl_ast_build_get_depth(build); 744 has_stride = isl_ast_build_has_stride(build, depth); 745 if (depth < 0 || has_stride < 0) 746 return isl_set_free(guard); 747 if (!has_stride && !degenerate) 748 return guard; 749 750 space = isl_basic_set_get_space(bounds); 751 dom = isl_set_universe(space); 752 753 if (degenerate) { 754 bounds = isl_basic_set_copy(bounds); 755 bounds = isl_basic_set_drop_constraints_not_involving_dims( 756 bounds, isl_dim_set, depth, 1); 757 set = isl_set_from_basic_set(bounds); 758 dom = isl_set_intersect(dom, set); 759 } 760 761 if (has_stride) { 762 set = isl_ast_build_get_stride_constraint(build); 763 dom = isl_set_intersect(dom, set); 764 } 765 766 dom = isl_set_eliminate(dom, isl_dim_set, depth, 1); 767 dom = isl_ast_build_compute_gist(build, dom); 768 guard = isl_set_intersect(guard, dom); 769 770 return guard; 771} 772 773/* Update "graft" based on "sub_build" for the degenerate case. 774 * 775 * "build" is the build in which graft->node was created 776 * "sub_build" contains information about the current level itself, 777 * including the single value attained. 778 * 779 * We set the initialization part of the for loop to the single 780 * value attained by the current dimension. 781 * The increment and condition are not strictly needed as they are known 782 * to be "1" and "iterator <= value" respectively. 783 */ 784static __isl_give isl_ast_graft *refine_degenerate( 785 __isl_take isl_ast_graft *graft, __isl_keep isl_ast_build *build, 786 __isl_keep isl_ast_build *sub_build) 787{ 788 isl_pw_aff *value; 789 isl_ast_expr *init; 790 791 if (!graft || !sub_build) 792 return isl_ast_graft_free(graft); 793 794 value = isl_pw_aff_copy(sub_build->value); 795 796 init = isl_ast_build_expr_from_pw_aff_internal(build, value); 797 graft->node = isl_ast_node_for_set_init(graft->node, init); 798 if (!graft->node) 799 return isl_ast_graft_free(graft); 800 801 return graft; 802} 803 804/* Return the intersection of constraints in "list" as a set. 805 */ 806static __isl_give isl_set *intersect_constraints( 807 __isl_keep isl_constraint_list *list) 808{ 809 int i; 810 isl_size n; 811 isl_basic_set *bset; 812 813 n = isl_constraint_list_n_constraint(list); 814 if (n < 0) 815 return NULL; 816 if (n < 1) 817 isl_die(isl_constraint_list_get_ctx(list), isl_error_internal, 818 "expecting at least one constraint", return NULL); 819 820 bset = isl_basic_set_from_constraint( 821 isl_constraint_list_get_constraint(list, 0)); 822 for (i = 1; i < n; ++i) { 823 isl_basic_set *bset_i; 824 825 bset_i = isl_basic_set_from_constraint( 826 isl_constraint_list_get_constraint(list, i)); 827 bset = isl_basic_set_intersect(bset, bset_i); 828 } 829 830 return isl_set_from_basic_set(bset); 831} 832 833/* Compute the constraints on the outer dimensions enforced by 834 * graft->node and add those constraints to graft->enforced, 835 * in case the upper bound is expressed as a set "upper". 836 * 837 * In particular, if l(...) is a lower bound in "lower", and 838 * 839 * -a i + f(...) >= 0 or a i <= f(...) 840 * 841 * is an upper bound ocnstraint on the current dimension i, 842 * then the for loop enforces the constraint 843 * 844 * -a l(...) + f(...) >= 0 or a l(...) <= f(...) 845 * 846 * We therefore simply take each lower bound in turn, plug it into 847 * the upper bounds and compute the intersection over all lower bounds. 848 * 849 * If a lower bound is a rational expression, then 850 * isl_basic_set_preimage_multi_aff will force this rational 851 * expression to have only integer values. However, the loop 852 * itself does not enforce this integrality constraint. We therefore 853 * use the ceil of the lower bounds instead of the lower bounds themselves. 854 * Other constraints will make sure that the for loop is only executed 855 * when each of the lower bounds attains an integral value. 856 * In particular, potentially rational values only occur in 857 * lower_bound if the offset is a (seemingly) rational expression, 858 * but then outer conditions will make sure that this rational expression 859 * only attains integer values. 860 */ 861static __isl_give isl_ast_graft *set_enforced_from_set( 862 __isl_take isl_ast_graft *graft, 863 __isl_keep isl_pw_aff_list *lower, int pos, __isl_keep isl_set *upper) 864{ 865 isl_space *space; 866 isl_basic_set *enforced; 867 isl_pw_multi_aff *pma; 868 int i; 869 isl_size n; 870 871 n = isl_pw_aff_list_n_pw_aff(lower); 872 if (!graft || n < 0) 873 return isl_ast_graft_free(graft); 874 875 space = isl_set_get_space(upper); 876 enforced = isl_basic_set_universe(isl_space_copy(space)); 877 878 space = isl_space_map_from_set(space); 879 pma = isl_pw_multi_aff_identity(space); 880 881 for (i = 0; i < n; ++i) { 882 isl_pw_aff *pa; 883 isl_set *enforced_i; 884 isl_basic_set *hull; 885 isl_pw_multi_aff *pma_i; 886 887 pa = isl_pw_aff_list_get_pw_aff(lower, i); 888 pa = isl_pw_aff_ceil(pa); 889 pma_i = isl_pw_multi_aff_copy(pma); 890 pma_i = isl_pw_multi_aff_set_pw_aff(pma_i, pos, pa); 891 enforced_i = isl_set_copy(upper); 892 enforced_i = isl_set_preimage_pw_multi_aff(enforced_i, pma_i); 893 hull = isl_set_simple_hull(enforced_i); 894 enforced = isl_basic_set_intersect(enforced, hull); 895 } 896 897 isl_pw_multi_aff_free(pma); 898 899 graft = isl_ast_graft_enforce(graft, enforced); 900 901 return graft; 902} 903 904/* Compute the constraints on the outer dimensions enforced by 905 * graft->node and add those constraints to graft->enforced, 906 * in case the upper bound is expressed as 907 * a list of affine expressions "upper". 908 * 909 * The enforced condition is that each lower bound expression is less 910 * than or equal to each upper bound expression. 911 */ 912static __isl_give isl_ast_graft *set_enforced_from_list( 913 __isl_take isl_ast_graft *graft, 914 __isl_keep isl_pw_aff_list *lower, __isl_keep isl_pw_aff_list *upper) 915{ 916 isl_set *cond; 917 isl_basic_set *enforced; 918 919 lower = isl_pw_aff_list_copy(lower); 920 upper = isl_pw_aff_list_copy(upper); 921 cond = isl_pw_aff_list_le_set(lower, upper); 922 enforced = isl_set_simple_hull(cond); 923 graft = isl_ast_graft_enforce(graft, enforced); 924 925 return graft; 926} 927 928/* Does "aff" have a negative constant term? 929 */ 930static isl_bool aff_constant_is_negative(__isl_keep isl_set *set, 931 __isl_keep isl_aff *aff, void *user) 932{ 933 isl_bool is_neg; 934 isl_val *v; 935 936 v = isl_aff_get_constant_val(aff); 937 is_neg = isl_val_is_neg(v); 938 isl_val_free(v); 939 940 return is_neg; 941} 942 943/* Does "pa" have a negative constant term over its entire domain? 944 */ 945static isl_bool pw_aff_constant_is_negative(__isl_keep isl_pw_aff *pa, 946 void *user) 947{ 948 return isl_pw_aff_every_piece(pa, &aff_constant_is_negative, NULL); 949} 950 951/* Does each element in "list" have a negative constant term? 952 */ 953static int list_constant_is_negative(__isl_keep isl_pw_aff_list *list) 954{ 955 return isl_pw_aff_list_every(list, &pw_aff_constant_is_negative, NULL); 956} 957 958/* Add 1 to each of the elements in "list", where each of these elements 959 * is defined over the internal schedule space of "build". 960 */ 961static __isl_give isl_pw_aff_list *list_add_one( 962 __isl_take isl_pw_aff_list *list, __isl_keep isl_ast_build *build) 963{ 964 int i; 965 isl_size n; 966 isl_space *space; 967 isl_aff *aff; 968 isl_pw_aff *one; 969 970 n = isl_pw_aff_list_n_pw_aff(list); 971 if (n < 0) 972 return isl_pw_aff_list_free(list); 973 974 space = isl_ast_build_get_space(build, 1); 975 aff = isl_aff_zero_on_domain(isl_local_space_from_space(space)); 976 aff = isl_aff_add_constant_si(aff, 1); 977 one = isl_pw_aff_from_aff(aff); 978 979 for (i = 0; i < n; ++i) { 980 isl_pw_aff *pa; 981 pa = isl_pw_aff_list_get_pw_aff(list, i); 982 pa = isl_pw_aff_add(pa, isl_pw_aff_copy(one)); 983 list = isl_pw_aff_list_set_pw_aff(list, i, pa); 984 } 985 986 isl_pw_aff_free(one); 987 988 return list; 989} 990 991/* Set the condition part of the for node graft->node in case 992 * the upper bound is represented as a list of piecewise affine expressions. 993 * 994 * In particular, set the condition to 995 * 996 * iterator <= min(list of upper bounds) 997 * 998 * If each of the upper bounds has a negative constant term, then 999 * set the condition to 1000 * 1001 * iterator < min(list of (upper bound + 1)s) 1002 * 1003 */ 1004static __isl_give isl_ast_graft *set_for_cond_from_list( 1005 __isl_take isl_ast_graft *graft, __isl_keep isl_pw_aff_list *list, 1006 __isl_keep isl_ast_build *build) 1007{ 1008 int neg; 1009 isl_ast_expr *bound, *iterator, *cond; 1010 enum isl_ast_expr_op_type type = isl_ast_expr_op_le; 1011 1012 if (!graft || !list) 1013 return isl_ast_graft_free(graft); 1014 1015 neg = list_constant_is_negative(list); 1016 if (neg < 0) 1017 return isl_ast_graft_free(graft); 1018 list = isl_pw_aff_list_copy(list); 1019 if (neg) { 1020 list = list_add_one(list, build); 1021 type = isl_ast_expr_op_lt; 1022 } 1023 1024 bound = reduce_list(isl_ast_expr_op_min, list, build); 1025 iterator = isl_ast_expr_copy(graft->node->u.f.iterator); 1026 cond = isl_ast_expr_alloc_binary(type, iterator, bound); 1027 graft->node = isl_ast_node_for_set_cond(graft->node, cond); 1028 1029 isl_pw_aff_list_free(list); 1030 if (!graft->node) 1031 return isl_ast_graft_free(graft); 1032 return graft; 1033} 1034 1035/* Set the condition part of the for node graft->node in case 1036 * the upper bound is represented as a set. 1037 */ 1038static __isl_give isl_ast_graft *set_for_cond_from_set( 1039 __isl_take isl_ast_graft *graft, __isl_keep isl_set *set, 1040 __isl_keep isl_ast_build *build) 1041{ 1042 isl_ast_expr *cond; 1043 1044 if (!graft) 1045 return NULL; 1046 1047 cond = isl_ast_build_expr_from_set_internal(build, isl_set_copy(set)); 1048 graft->node = isl_ast_node_for_set_cond(graft->node, cond); 1049 if (!graft->node) 1050 return isl_ast_graft_free(graft); 1051 return graft; 1052} 1053 1054/* Construct an isl_ast_expr for the increment (i.e., stride) of 1055 * the current dimension. 1056 */ 1057static __isl_give isl_ast_expr *for_inc(__isl_keep isl_ast_build *build) 1058{ 1059 isl_size depth; 1060 isl_val *v; 1061 isl_ctx *ctx; 1062 1063 depth = isl_ast_build_get_depth(build); 1064 if (depth < 0) 1065 return NULL; 1066 ctx = isl_ast_build_get_ctx(build); 1067 1068 if (!isl_ast_build_has_stride(build, depth)) 1069 return isl_ast_expr_alloc_int_si(ctx, 1); 1070 1071 v = isl_ast_build_get_stride(build, depth); 1072 return isl_ast_expr_from_val(v); 1073} 1074 1075/* Should we express the loop condition as 1076 * 1077 * iterator <= min(list of upper bounds) 1078 * 1079 * or as a conjunction of constraints? 1080 * 1081 * The first is constructed from a list of upper bounds. 1082 * The second is constructed from a set. 1083 * 1084 * If there are no upper bounds in "constraints", then this could mean 1085 * that "domain" simply doesn't have an upper bound or that we didn't 1086 * pick any upper bound. In the first case, we want to generate the 1087 * loop condition as a(n empty) conjunction of constraints 1088 * In the second case, we will compute 1089 * a single upper bound from "domain" and so we use the list form. 1090 * 1091 * If there are upper bounds in "constraints", 1092 * then we use the list form iff the atomic_upper_bound option is set. 1093 */ 1094static int use_upper_bound_list(isl_ctx *ctx, int n_upper, 1095 __isl_keep isl_set *domain, int depth) 1096{ 1097 if (n_upper > 0) 1098 return isl_options_get_ast_build_atomic_upper_bound(ctx); 1099 else 1100 return isl_set_dim_has_upper_bound(domain, isl_dim_set, depth); 1101} 1102 1103/* Fill in the expressions of the for node in graft->node. 1104 * 1105 * In particular, 1106 * - set the initialization part of the loop to the maximum of the lower bounds 1107 * - extract the increment from the stride of the current dimension 1108 * - construct the for condition either based on a list of upper bounds 1109 * or on a set of upper bound constraints. 1110 */ 1111static __isl_give isl_ast_graft *set_for_node_expressions( 1112 __isl_take isl_ast_graft *graft, __isl_keep isl_pw_aff_list *lower, 1113 int use_list, __isl_keep isl_pw_aff_list *upper_list, 1114 __isl_keep isl_set *upper_set, __isl_keep isl_ast_build *build) 1115{ 1116 isl_ast_expr *init; 1117 1118 if (!graft) 1119 return NULL; 1120 1121 init = reduce_list(isl_ast_expr_op_max, lower, build); 1122 graft->node = isl_ast_node_for_set_init(graft->node, init); 1123 graft->node = isl_ast_node_for_set_inc(graft->node, for_inc(build)); 1124 1125 if (!graft->node) 1126 graft = isl_ast_graft_free(graft); 1127 1128 if (use_list) 1129 graft = set_for_cond_from_list(graft, upper_list, build); 1130 else 1131 graft = set_for_cond_from_set(graft, upper_set, build); 1132 1133 return graft; 1134} 1135 1136/* Update "graft" based on "bounds" and "domain" for the generic, 1137 * non-degenerate, case. 1138 * 1139 * "c_lower" and "c_upper" contain the lower and upper bounds 1140 * that the loop node should express. 1141 * "domain" is the subset of the intersection of the constraints 1142 * for which some code is executed. 1143 * 1144 * There may be zero lower bounds or zero upper bounds in "constraints" 1145 * in case the list of constraints was created 1146 * based on the atomic option or based on separation with explicit bounds. 1147 * In that case, we use "domain" to derive lower and/or upper bounds. 1148 * 1149 * We first compute a list of one or more lower bounds. 1150 * 1151 * Then we decide if we want to express the condition as 1152 * 1153 * iterator <= min(list of upper bounds) 1154 * 1155 * or as a conjunction of constraints. 1156 * 1157 * The set of enforced constraints is then computed either based on 1158 * a list of upper bounds or on a set of upper bound constraints. 1159 * We do not compute any enforced constraints if we were forced 1160 * to compute a lower or upper bound using exact_bound. The domains 1161 * of the resulting expressions may imply some bounds on outer dimensions 1162 * that we do not want to appear in the enforced constraints since 1163 * they are not actually enforced by the corresponding code. 1164 * 1165 * Finally, we fill in the expressions of the for node. 1166 */ 1167static __isl_give isl_ast_graft *refine_generic_bounds( 1168 __isl_take isl_ast_graft *graft, 1169 __isl_take isl_constraint_list *c_lower, 1170 __isl_take isl_constraint_list *c_upper, 1171 __isl_keep isl_set *domain, __isl_keep isl_ast_build *build) 1172{ 1173 isl_size depth; 1174 isl_ctx *ctx; 1175 isl_pw_aff_list *lower; 1176 int use_list; 1177 isl_set *upper_set = NULL; 1178 isl_pw_aff_list *upper_list = NULL; 1179 isl_size n_lower, n_upper; 1180 1181 depth = isl_ast_build_get_depth(build); 1182 if (!graft || !c_lower || !c_upper || depth < 0) 1183 goto error; 1184 1185 ctx = isl_ast_graft_get_ctx(graft); 1186 1187 n_lower = isl_constraint_list_n_constraint(c_lower); 1188 n_upper = isl_constraint_list_n_constraint(c_upper); 1189 if (n_lower < 0 || n_upper < 0) 1190 goto error; 1191 1192 use_list = use_upper_bound_list(ctx, n_upper, domain, depth); 1193 1194 lower = lower_bounds(c_lower, depth, domain, build); 1195 1196 if (use_list) 1197 upper_list = upper_bounds(c_upper, depth, domain, build); 1198 else if (n_upper > 0) 1199 upper_set = intersect_constraints(c_upper); 1200 else 1201 upper_set = isl_set_universe(isl_set_get_space(domain)); 1202 1203 if (n_lower == 0 || n_upper == 0) 1204 ; 1205 else if (use_list) 1206 graft = set_enforced_from_list(graft, lower, upper_list); 1207 else 1208 graft = set_enforced_from_set(graft, lower, depth, upper_set); 1209 1210 graft = set_for_node_expressions(graft, lower, use_list, upper_list, 1211 upper_set, build); 1212 1213 isl_pw_aff_list_free(lower); 1214 isl_pw_aff_list_free(upper_list); 1215 isl_set_free(upper_set); 1216 isl_constraint_list_free(c_lower); 1217 isl_constraint_list_free(c_upper); 1218 1219 return graft; 1220error: 1221 isl_constraint_list_free(c_lower); 1222 isl_constraint_list_free(c_upper); 1223 return isl_ast_graft_free(graft); 1224} 1225 1226/* Internal data structure used inside count_constraints to keep 1227 * track of the number of constraints that are independent of dimension "pos", 1228 * the lower bounds in "pos" and the upper bounds in "pos". 1229 */ 1230struct isl_ast_count_constraints_data { 1231 int pos; 1232 1233 int n_indep; 1234 int n_lower; 1235 int n_upper; 1236}; 1237 1238/* Increment data->n_indep, data->lower or data->upper depending 1239 * on whether "c" is independent of dimensions data->pos, 1240 * a lower bound or an upper bound. 1241 */ 1242static isl_stat count_constraints(__isl_take isl_constraint *c, void *user) 1243{ 1244 struct isl_ast_count_constraints_data *data = user; 1245 1246 if (isl_constraint_is_lower_bound(c, isl_dim_set, data->pos)) 1247 data->n_lower++; 1248 else if (isl_constraint_is_upper_bound(c, isl_dim_set, data->pos)) 1249 data->n_upper++; 1250 else 1251 data->n_indep++; 1252 1253 isl_constraint_free(c); 1254 1255 return isl_stat_ok; 1256} 1257 1258/* Update "graft" based on "bounds" and "domain" for the generic, 1259 * non-degenerate, case. 1260 * 1261 * "list" respresent the list of bounds that need to be encoded by 1262 * the for loop. Only the constraints that involve the iterator 1263 * are relevant here. The other constraints are taken care of by 1264 * the caller and are included in the generated constraints of "build". 1265 * "domain" is the subset of the intersection of the constraints 1266 * for which some code is executed. 1267 * "build" is the build in which graft->node was created. 1268 * 1269 * We separate lower bounds, upper bounds and constraints that 1270 * are independent of the loop iterator. 1271 * 1272 * The actual for loop bounds are generated in refine_generic_bounds. 1273 */ 1274static __isl_give isl_ast_graft *refine_generic_split( 1275 __isl_take isl_ast_graft *graft, __isl_take isl_constraint_list *list, 1276 __isl_keep isl_set *domain, __isl_keep isl_ast_build *build) 1277{ 1278 struct isl_ast_count_constraints_data data; 1279 isl_size depth; 1280 isl_constraint_list *lower; 1281 isl_constraint_list *upper; 1282 1283 depth = isl_ast_build_get_depth(build); 1284 if (depth < 0) 1285 list = isl_constraint_list_free(list); 1286 if (!list) 1287 return isl_ast_graft_free(graft); 1288 1289 data.pos = depth; 1290 1291 list = isl_constraint_list_sort(list, &cmp_constraint, &data.pos); 1292 if (!list) 1293 return isl_ast_graft_free(graft); 1294 1295 data.n_indep = data.n_lower = data.n_upper = 0; 1296 if (isl_constraint_list_foreach(list, &count_constraints, &data) < 0) { 1297 isl_constraint_list_free(list); 1298 return isl_ast_graft_free(graft); 1299 } 1300 1301 lower = isl_constraint_list_drop(list, 0, data.n_indep); 1302 upper = isl_constraint_list_copy(lower); 1303 lower = isl_constraint_list_drop(lower, data.n_lower, data.n_upper); 1304 upper = isl_constraint_list_drop(upper, 0, data.n_lower); 1305 1306 return refine_generic_bounds(graft, lower, upper, domain, build); 1307} 1308 1309/* Update "graft" based on "bounds" and "domain" for the generic, 1310 * non-degenerate, case. 1311 * 1312 * "bounds" respresent the bounds that need to be encoded by 1313 * the for loop (or a guard around the for loop). 1314 * "domain" is the subset of "bounds" for which some code is executed. 1315 * "build" is the build in which graft->node was created. 1316 * 1317 * We break up "bounds" into a list of constraints and continue with 1318 * refine_generic_split. 1319 */ 1320static __isl_give isl_ast_graft *refine_generic( 1321 __isl_take isl_ast_graft *graft, 1322 __isl_keep isl_basic_set *bounds, __isl_keep isl_set *domain, 1323 __isl_keep isl_ast_build *build) 1324{ 1325 isl_constraint_list *list; 1326 1327 if (!build || !graft) 1328 return isl_ast_graft_free(graft); 1329 1330 list = isl_basic_set_get_constraint_list(bounds); 1331 1332 graft = refine_generic_split(graft, list, domain, build); 1333 1334 return graft; 1335} 1336 1337/* Create a for node for the current level. 1338 * 1339 * Mark the for node degenerate if "degenerate" is set. 1340 */ 1341static __isl_give isl_ast_node *create_for(__isl_keep isl_ast_build *build, 1342 int degenerate) 1343{ 1344 isl_size depth; 1345 isl_id *id; 1346 isl_ast_node *node; 1347 1348 depth = isl_ast_build_get_depth(build); 1349 if (depth < 0) 1350 return NULL; 1351 1352 id = isl_ast_build_get_iterator_id(build, depth); 1353 node = isl_ast_node_alloc_for(id); 1354 if (degenerate) 1355 node = isl_ast_node_for_mark_degenerate(node); 1356 1357 return node; 1358} 1359 1360/* If the ast_build_exploit_nested_bounds option is set, then return 1361 * the constraints enforced by all elements in "list". 1362 * Otherwise, return the universe. 1363 */ 1364static __isl_give isl_basic_set *extract_shared_enforced( 1365 __isl_keep isl_ast_graft_list *list, __isl_keep isl_ast_build *build) 1366{ 1367 isl_ctx *ctx; 1368 isl_space *space; 1369 1370 if (!list) 1371 return NULL; 1372 1373 ctx = isl_ast_graft_list_get_ctx(list); 1374 if (isl_options_get_ast_build_exploit_nested_bounds(ctx)) 1375 return isl_ast_graft_list_extract_shared_enforced(list, build); 1376 1377 space = isl_ast_build_get_space(build, 1); 1378 return isl_basic_set_universe(space); 1379} 1380 1381/* Return the pending constraints of "build" that are not already taken 1382 * care of (by a combination of "enforced" and the generated constraints 1383 * of "build"). 1384 */ 1385static __isl_give isl_set *extract_pending(__isl_keep isl_ast_build *build, 1386 __isl_keep isl_basic_set *enforced) 1387{ 1388 isl_set *guard, *context; 1389 1390 guard = isl_ast_build_get_pending(build); 1391 context = isl_set_from_basic_set(isl_basic_set_copy(enforced)); 1392 context = isl_set_intersect(context, 1393 isl_ast_build_get_generated(build)); 1394 return isl_set_gist(guard, context); 1395} 1396 1397/* Create an AST node for the current dimension based on 1398 * the schedule domain "bounds" and return the node encapsulated 1399 * in an isl_ast_graft. 1400 * 1401 * "executed" is the current inverse schedule, taking into account 1402 * the bounds in "bounds" 1403 * "domain" is the domain of "executed", with inner dimensions projected out. 1404 * It may be a strict subset of "bounds" in case "bounds" was created 1405 * based on the atomic option or based on separation with explicit bounds. 1406 * 1407 * "domain" may satisfy additional equalities that result 1408 * from intersecting "executed" with "bounds" in add_node. 1409 * It may also satisfy some global constraints that were dropped out because 1410 * we performed separation with explicit bounds. 1411 * The very first step is then to copy these constraints to "bounds". 1412 * 1413 * Since we may be calling before_each_for and after_each_for 1414 * callbacks, we record the current inverse schedule in the build. 1415 * 1416 * We consider three builds, 1417 * "build" is the one in which the current level is created, 1418 * "body_build" is the build in which the next level is created, 1419 * "sub_build" is essentially the same as "body_build", except that 1420 * the depth has not been increased yet. 1421 * 1422 * "build" already contains information (in strides and offsets) 1423 * about the strides at the current level, but this information is not 1424 * reflected in the build->domain. 1425 * We first add this information and the "bounds" to the sub_build->domain. 1426 * isl_ast_build_set_loop_bounds adds the stride information and 1427 * checks whether the current dimension attains 1428 * only a single value and whether this single value can be represented using 1429 * a single affine expression. 1430 * In the first case, the current level is considered "degenerate". 1431 * In the second, sub-case, the current level is considered "eliminated". 1432 * Eliminated levels don't need to be reflected in the AST since we can 1433 * simply plug in the affine expression. For degenerate, but non-eliminated, 1434 * levels, we do introduce a for node, but mark is as degenerate so that 1435 * it can be printed as an assignment of the single value to the loop 1436 * "iterator". 1437 * 1438 * If the current level is eliminated, we explicitly plug in the value 1439 * for the current level found by isl_ast_build_set_loop_bounds in the 1440 * inverse schedule. This ensures that if we are working on a slice 1441 * of the domain based on information available in the inverse schedule 1442 * and the build domain, that then this information is also reflected 1443 * in the inverse schedule. This operation also eliminates the current 1444 * dimension from the inverse schedule making sure no inner dimensions depend 1445 * on the current dimension. Otherwise, we create a for node, marking 1446 * it degenerate if appropriate. The initial for node is still incomplete 1447 * and will be completed in either refine_degenerate or refine_generic. 1448 * 1449 * We then generate a sequence of grafts for the next level, 1450 * create a surrounding graft for the current level and insert 1451 * the for node we created (if the current level is not eliminated). 1452 * Before creating a graft for the current level, we first extract 1453 * hoistable constraints from the child guards and combine them 1454 * with the pending constraints in the build. These constraints 1455 * are used to simplify the child guards and then added to the guard 1456 * of the current graft to ensure that they will be generated. 1457 * If the hoisted guard is a disjunction, then we use it directly 1458 * to gist the guards on the children before intersect it with the 1459 * pending constraints. We do so because this disjunction is typically 1460 * identical to the guards on the children such that these guards 1461 * can be effectively removed completely. After the intersection, 1462 * the gist operation would have a harder time figuring this out. 1463 * 1464 * Finally, we set the bounds of the for loop in either 1465 * refine_degenerate or refine_generic. 1466 * We do so in a context where the pending constraints of the build 1467 * have been replaced by the guard of the current graft. 1468 */ 1469static __isl_give isl_ast_graft *create_node_scaled( 1470 __isl_take isl_union_map *executed, 1471 __isl_take isl_basic_set *bounds, __isl_take isl_set *domain, 1472 __isl_take isl_ast_build *build) 1473{ 1474 isl_size depth; 1475 int degenerate; 1476 isl_bool eliminated; 1477 isl_size n; 1478 isl_basic_set *hull; 1479 isl_basic_set *enforced; 1480 isl_set *guard, *hoisted; 1481 isl_ast_node *node = NULL; 1482 isl_ast_graft *graft; 1483 isl_ast_graft_list *children; 1484 isl_ast_build *sub_build; 1485 isl_ast_build *body_build; 1486 1487 domain = isl_ast_build_eliminate_divs(build, domain); 1488 domain = isl_set_detect_equalities(domain); 1489 hull = isl_set_unshifted_simple_hull(isl_set_copy(domain)); 1490 bounds = isl_basic_set_intersect(bounds, hull); 1491 build = isl_ast_build_set_executed(build, isl_union_map_copy(executed)); 1492 1493 depth = isl_ast_build_get_depth(build); 1494 if (depth < 0) 1495 build = isl_ast_build_free(build); 1496 sub_build = isl_ast_build_copy(build); 1497 bounds = isl_basic_set_remove_redundancies(bounds); 1498 bounds = isl_ast_build_specialize_basic_set(sub_build, bounds); 1499 sub_build = isl_ast_build_set_loop_bounds(sub_build, 1500 isl_basic_set_copy(bounds)); 1501 degenerate = isl_ast_build_has_value(sub_build); 1502 eliminated = isl_ast_build_has_affine_value(sub_build, depth); 1503 if (degenerate < 0 || eliminated < 0) 1504 executed = isl_union_map_free(executed); 1505 if (!degenerate) 1506 bounds = isl_ast_build_compute_gist_basic_set(build, bounds); 1507 sub_build = isl_ast_build_set_pending_generated(sub_build, 1508 isl_basic_set_copy(bounds)); 1509 if (eliminated) 1510 executed = plug_in_values(executed, sub_build); 1511 else 1512 node = create_for(build, degenerate); 1513 1514 body_build = isl_ast_build_copy(sub_build); 1515 body_build = isl_ast_build_increase_depth(body_build); 1516 if (!eliminated) 1517 node = before_each_for(node, body_build); 1518 children = generate_next_level(executed, 1519 isl_ast_build_copy(body_build)); 1520 1521 enforced = extract_shared_enforced(children, build); 1522 guard = extract_pending(sub_build, enforced); 1523 hoisted = isl_ast_graft_list_extract_hoistable_guard(children, build); 1524 n = isl_set_n_basic_set(hoisted); 1525 if (n < 0) 1526 children = isl_ast_graft_list_free(children); 1527 if (n > 1) 1528 children = isl_ast_graft_list_gist_guards(children, 1529 isl_set_copy(hoisted)); 1530 guard = isl_set_intersect(guard, hoisted); 1531 if (!eliminated) 1532 guard = add_implied_guards(guard, degenerate, bounds, build); 1533 1534 graft = isl_ast_graft_alloc_from_children(children, 1535 isl_set_copy(guard), enforced, build, sub_build); 1536 1537 if (!eliminated) { 1538 isl_ast_build *for_build; 1539 1540 graft = isl_ast_graft_insert_for(graft, node); 1541 for_build = isl_ast_build_copy(build); 1542 for_build = isl_ast_build_replace_pending_by_guard(for_build, 1543 isl_set_copy(guard)); 1544 if (degenerate) 1545 graft = refine_degenerate(graft, for_build, sub_build); 1546 else 1547 graft = refine_generic(graft, bounds, 1548 domain, for_build); 1549 isl_ast_build_free(for_build); 1550 } 1551 isl_set_free(guard); 1552 if (!eliminated) 1553 graft = after_each_for(graft, body_build); 1554 1555 isl_ast_build_free(body_build); 1556 isl_ast_build_free(sub_build); 1557 isl_ast_build_free(build); 1558 isl_basic_set_free(bounds); 1559 isl_set_free(domain); 1560 1561 return graft; 1562} 1563 1564/* Internal data structure for checking if all constraints involving 1565 * the input dimension "depth" are such that the other coefficients 1566 * are multiples of "m", reducing "m" if they are not. 1567 * If "m" is reduced all the way down to "1", then the check has failed 1568 * and we break out of the iteration. 1569 */ 1570struct isl_check_scaled_data { 1571 int depth; 1572 isl_val *m; 1573}; 1574 1575/* If constraint "c" involves the input dimension data->depth, 1576 * then make sure that all the other coefficients are multiples of data->m, 1577 * reducing data->m if needed. 1578 * Break out of the iteration if data->m has become equal to "1". 1579 */ 1580static isl_stat constraint_check_scaled(__isl_take isl_constraint *c, 1581 void *user) 1582{ 1583 struct isl_check_scaled_data *data = user; 1584 int i, j; 1585 isl_size n; 1586 enum isl_dim_type t[] = { isl_dim_param, isl_dim_in, isl_dim_out, 1587 isl_dim_div }; 1588 1589 if (!isl_constraint_involves_dims(c, isl_dim_in, data->depth, 1)) { 1590 isl_constraint_free(c); 1591 return isl_stat_ok; 1592 } 1593 1594 for (i = 0; i < 4; ++i) { 1595 n = isl_constraint_dim(c, t[i]); 1596 if (n < 0) 1597 break; 1598 for (j = 0; j < n; ++j) { 1599 isl_val *d; 1600 1601 if (t[i] == isl_dim_in && j == data->depth) 1602 continue; 1603 if (!isl_constraint_involves_dims(c, t[i], j, 1)) 1604 continue; 1605 d = isl_constraint_get_coefficient_val(c, t[i], j); 1606 data->m = isl_val_gcd(data->m, d); 1607 if (isl_val_is_one(data->m)) 1608 break; 1609 } 1610 if (j < n) 1611 break; 1612 } 1613 1614 isl_constraint_free(c); 1615 1616 return i < 4 ? isl_stat_error : isl_stat_ok; 1617} 1618 1619/* For each constraint of "bmap" that involves the input dimension data->depth, 1620 * make sure that all the other coefficients are multiples of data->m, 1621 * reducing data->m if needed. 1622 * Break out of the iteration if data->m has become equal to "1". 1623 */ 1624static isl_stat basic_map_check_scaled(__isl_take isl_basic_map *bmap, 1625 void *user) 1626{ 1627 isl_stat r; 1628 1629 r = isl_basic_map_foreach_constraint(bmap, 1630 &constraint_check_scaled, user); 1631 isl_basic_map_free(bmap); 1632 1633 return r; 1634} 1635 1636/* For each constraint of "map" that involves the input dimension data->depth, 1637 * make sure that all the other coefficients are multiples of data->m, 1638 * reducing data->m if needed. 1639 * Break out of the iteration if data->m has become equal to "1". 1640 */ 1641static isl_stat map_check_scaled(__isl_take isl_map *map, void *user) 1642{ 1643 isl_stat r; 1644 1645 r = isl_map_foreach_basic_map(map, &basic_map_check_scaled, user); 1646 isl_map_free(map); 1647 1648 return r; 1649} 1650 1651/* Create an AST node for the current dimension based on 1652 * the schedule domain "bounds" and return the node encapsulated 1653 * in an isl_ast_graft. 1654 * 1655 * "executed" is the current inverse schedule, taking into account 1656 * the bounds in "bounds" 1657 * "domain" is the domain of "executed", with inner dimensions projected out. 1658 * 1659 * 1660 * Before moving on to the actual AST node construction in create_node_scaled, 1661 * we first check if the current dimension is strided and if we can scale 1662 * down this stride. Note that we only do this if the ast_build_scale_strides 1663 * option is set. 1664 * 1665 * In particular, let the current dimension take on values 1666 * 1667 * f + s a 1668 * 1669 * with a an integer. We check if we can find an integer m that (obviously) 1670 * divides both f and s. 1671 * 1672 * If so, we check if the current dimension only appears in constraints 1673 * where the coefficients of the other variables are multiples of m. 1674 * We perform this extra check to avoid the risk of introducing 1675 * divisions by scaling down the current dimension. 1676 * 1677 * If so, we scale the current dimension down by a factor of m. 1678 * That is, we plug in 1679 * 1680 * i = m i' (1) 1681 * 1682 * Note that in principle we could always scale down strided loops 1683 * by plugging in 1684 * 1685 * i = f + s i' 1686 * 1687 * but this may result in i' taking on larger values than the original i, 1688 * due to the shift by "f". 1689 * By constrast, the scaling in (1) can only reduce the (absolute) value "i". 1690 */ 1691static __isl_give isl_ast_graft *create_node(__isl_take isl_union_map *executed, 1692 __isl_take isl_basic_set *bounds, __isl_take isl_set *domain, 1693 __isl_take isl_ast_build *build) 1694{ 1695 struct isl_check_scaled_data data; 1696 isl_size depth; 1697 isl_ctx *ctx; 1698 isl_aff *offset; 1699 isl_val *d; 1700 1701 ctx = isl_ast_build_get_ctx(build); 1702 if (!isl_options_get_ast_build_scale_strides(ctx)) 1703 return create_node_scaled(executed, bounds, domain, build); 1704 1705 depth = isl_ast_build_get_depth(build); 1706 if (depth < 0) 1707 build = isl_ast_build_free(build); 1708 data.depth = depth; 1709 if (!isl_ast_build_has_stride(build, data.depth)) 1710 return create_node_scaled(executed, bounds, domain, build); 1711 1712 offset = isl_ast_build_get_offset(build, data.depth); 1713 data.m = isl_ast_build_get_stride(build, data.depth); 1714 if (!data.m) 1715 offset = isl_aff_free(offset); 1716 offset = isl_aff_scale_down_val(offset, isl_val_copy(data.m)); 1717 d = isl_aff_get_denominator_val(offset); 1718 if (!d) 1719 executed = isl_union_map_free(executed); 1720 1721 if (executed && isl_val_is_divisible_by(data.m, d)) 1722 data.m = isl_val_div(data.m, d); 1723 else { 1724 data.m = isl_val_set_si(data.m, 1); 1725 isl_val_free(d); 1726 } 1727 1728 if (!isl_val_is_one(data.m)) { 1729 if (isl_union_map_foreach_map(executed, &map_check_scaled, 1730 &data) < 0 && 1731 !isl_val_is_one(data.m)) 1732 executed = isl_union_map_free(executed); 1733 } 1734 1735 if (!isl_val_is_one(data.m)) { 1736 isl_space *space; 1737 isl_multi_aff *ma; 1738 isl_aff *aff; 1739 isl_map *map; 1740 isl_union_map *umap; 1741 1742 space = isl_ast_build_get_space(build, 1); 1743 space = isl_space_map_from_set(space); 1744 ma = isl_multi_aff_identity(space); 1745 aff = isl_multi_aff_get_aff(ma, data.depth); 1746 aff = isl_aff_scale_val(aff, isl_val_copy(data.m)); 1747 ma = isl_multi_aff_set_aff(ma, data.depth, aff); 1748 1749 bounds = isl_basic_set_preimage_multi_aff(bounds, 1750 isl_multi_aff_copy(ma)); 1751 domain = isl_set_preimage_multi_aff(domain, 1752 isl_multi_aff_copy(ma)); 1753 map = isl_map_reverse(isl_map_from_multi_aff(ma)); 1754 umap = isl_union_map_from_map(map); 1755 executed = isl_union_map_apply_domain(executed, 1756 isl_union_map_copy(umap)); 1757 build = isl_ast_build_scale_down(build, isl_val_copy(data.m), 1758 umap); 1759 } 1760 isl_aff_free(offset); 1761 isl_val_free(data.m); 1762 1763 return create_node_scaled(executed, bounds, domain, build); 1764} 1765 1766/* Add the basic set to the list that "user" points to. 1767 */ 1768static isl_stat collect_basic_set(__isl_take isl_basic_set *bset, void *user) 1769{ 1770 isl_basic_set_list **list = user; 1771 1772 *list = isl_basic_set_list_add(*list, bset); 1773 1774 return isl_stat_ok; 1775} 1776 1777/* Extract the basic sets of "set" and collect them in an isl_basic_set_list. 1778 */ 1779static __isl_give isl_basic_set_list *isl_basic_set_list_from_set( 1780 __isl_take isl_set *set) 1781{ 1782 isl_size n; 1783 isl_ctx *ctx; 1784 isl_basic_set_list *list; 1785 1786 n = isl_set_n_basic_set(set); 1787 if (n < 0) 1788 set = isl_set_free(set); 1789 if (!set) 1790 return NULL; 1791 1792 ctx = isl_set_get_ctx(set); 1793 1794 list = isl_basic_set_list_alloc(ctx, n); 1795 if (isl_set_foreach_basic_set(set, &collect_basic_set, &list) < 0) 1796 list = isl_basic_set_list_free(list); 1797 1798 isl_set_free(set); 1799 return list; 1800} 1801 1802/* Generate code for the schedule domain "bounds" 1803 * and add the result to "list". 1804 * 1805 * We mainly detect strides here and check if the bounds do not 1806 * conflict with the current build domain 1807 * and then pass over control to create_node. 1808 * 1809 * "bounds" reflects the bounds on the current dimension and possibly 1810 * some extra conditions on outer dimensions. 1811 * It does not, however, include any divs involving the current dimension, 1812 * so it does not capture any stride constraints. 1813 * We therefore need to compute that part of the schedule domain that 1814 * intersects with "bounds" and derive the strides from the result. 1815 */ 1816static __isl_give isl_ast_graft_list *add_node( 1817 __isl_take isl_ast_graft_list *list, __isl_take isl_union_map *executed, 1818 __isl_take isl_basic_set *bounds, __isl_take isl_ast_build *build) 1819{ 1820 isl_ast_graft *graft; 1821 isl_set *domain = NULL; 1822 isl_union_set *uset; 1823 int empty, disjoint; 1824 1825 uset = isl_union_set_from_basic_set(isl_basic_set_copy(bounds)); 1826 executed = isl_union_map_intersect_domain(executed, uset); 1827 empty = isl_union_map_is_empty(executed); 1828 if (empty < 0) 1829 goto error; 1830 if (empty) 1831 goto done; 1832 1833 uset = isl_union_map_domain(isl_union_map_copy(executed)); 1834 domain = isl_set_from_union_set(uset); 1835 domain = isl_ast_build_specialize(build, domain); 1836 1837 domain = isl_set_compute_divs(domain); 1838 domain = isl_ast_build_eliminate_inner(build, domain); 1839 disjoint = isl_set_is_disjoint(domain, build->domain); 1840 if (disjoint < 0) 1841 goto error; 1842 if (disjoint) 1843 goto done; 1844 1845 build = isl_ast_build_detect_strides(build, isl_set_copy(domain)); 1846 1847 graft = create_node(executed, bounds, domain, 1848 isl_ast_build_copy(build)); 1849 list = isl_ast_graft_list_add(list, graft); 1850 isl_ast_build_free(build); 1851 return list; 1852error: 1853 list = isl_ast_graft_list_free(list); 1854done: 1855 isl_set_free(domain); 1856 isl_basic_set_free(bounds); 1857 isl_union_map_free(executed); 1858 isl_ast_build_free(build); 1859 return list; 1860} 1861 1862/* Does any element of i follow or coincide with any element of j 1863 * at the current depth for equal values of the outer dimensions? 1864 */ 1865static isl_bool domain_follows_at_depth(__isl_keep isl_basic_set *i, 1866 __isl_keep isl_basic_set *j, void *user) 1867{ 1868 int depth = *(int *) user; 1869 isl_basic_map *test; 1870 isl_bool empty; 1871 int l; 1872 1873 test = isl_basic_map_from_domain_and_range(isl_basic_set_copy(i), 1874 isl_basic_set_copy(j)); 1875 for (l = 0; l < depth; ++l) 1876 test = isl_basic_map_equate(test, isl_dim_in, l, 1877 isl_dim_out, l); 1878 test = isl_basic_map_order_ge(test, isl_dim_in, depth, 1879 isl_dim_out, depth); 1880 empty = isl_basic_map_is_empty(test); 1881 isl_basic_map_free(test); 1882 1883 return isl_bool_not(empty); 1884} 1885 1886/* Split up each element of "list" into a part that is related to "bset" 1887 * according to "gt" and a part that is not. 1888 * Return a list that consist of "bset" and all the pieces. 1889 */ 1890static __isl_give isl_basic_set_list *add_split_on( 1891 __isl_take isl_basic_set_list *list, __isl_take isl_basic_set *bset, 1892 __isl_keep isl_basic_map *gt) 1893{ 1894 int i; 1895 isl_size n; 1896 isl_basic_set_list *res; 1897 1898 n = isl_basic_set_list_n_basic_set(list); 1899 if (n < 0) 1900 bset = isl_basic_set_free(bset); 1901 1902 gt = isl_basic_map_copy(gt); 1903 gt = isl_basic_map_intersect_domain(gt, isl_basic_set_copy(bset)); 1904 res = isl_basic_set_list_from_basic_set(bset); 1905 for (i = 0; res && i < n; ++i) { 1906 isl_basic_set *bset; 1907 isl_set *set1, *set2; 1908 isl_basic_map *bmap; 1909 int empty; 1910 1911 bset = isl_basic_set_list_get_basic_set(list, i); 1912 bmap = isl_basic_map_copy(gt); 1913 bmap = isl_basic_map_intersect_range(bmap, bset); 1914 bset = isl_basic_map_range(bmap); 1915 empty = isl_basic_set_is_empty(bset); 1916 if (empty < 0) 1917 res = isl_basic_set_list_free(res); 1918 if (empty) { 1919 isl_basic_set_free(bset); 1920 bset = isl_basic_set_list_get_basic_set(list, i); 1921 res = isl_basic_set_list_add(res, bset); 1922 continue; 1923 } 1924 1925 res = isl_basic_set_list_add(res, isl_basic_set_copy(bset)); 1926 set1 = isl_set_from_basic_set(bset); 1927 bset = isl_basic_set_list_get_basic_set(list, i); 1928 set2 = isl_set_from_basic_set(bset); 1929 set1 = isl_set_subtract(set2, set1); 1930 set1 = isl_set_make_disjoint(set1); 1931 1932 res = isl_basic_set_list_concat(res, 1933 isl_basic_set_list_from_set(set1)); 1934 } 1935 isl_basic_map_free(gt); 1936 isl_basic_set_list_free(list); 1937 return res; 1938} 1939 1940static __isl_give isl_ast_graft_list *generate_sorted_domains( 1941 __isl_keep isl_basic_set_list *domain_list, 1942 __isl_keep isl_union_map *executed, 1943 __isl_keep isl_ast_build *build); 1944 1945/* Internal data structure for add_nodes. 1946 * 1947 * "executed" and "build" are extra arguments to be passed to add_node. 1948 * "list" collects the results. 1949 */ 1950struct isl_add_nodes_data { 1951 isl_union_map *executed; 1952 isl_ast_build *build; 1953 1954 isl_ast_graft_list *list; 1955}; 1956 1957/* Generate code for the schedule domains in "scc" 1958 * and add the results to "list". 1959 * 1960 * The domains in "scc" form a strongly connected component in the ordering. 1961 * If the number of domains in "scc" is larger than 1, then this means 1962 * that we cannot determine a valid ordering for the domains in the component. 1963 * This should be fairly rare because the individual domains 1964 * have been made disjoint first. 1965 * The problem is that the domains may be integrally disjoint but not 1966 * rationally disjoint. For example, we may have domains 1967 * 1968 * { [i,i] : 0 <= i <= 1 } and { [i,1-i] : 0 <= i <= 1 } 1969 * 1970 * These two domains have an empty intersection, but their rational 1971 * relaxations do intersect. It is impossible to order these domains 1972 * in the second dimension because the first should be ordered before 1973 * the second for outer dimension equal to 0, while it should be ordered 1974 * after for outer dimension equal to 1. 1975 * 1976 * This may happen in particular in case of unrolling since the domain 1977 * of each slice is replaced by its simple hull. 1978 * 1979 * For each basic set i in "scc" and for each of the following basic sets j, 1980 * we split off that part of the basic set i that shares the outer dimensions 1981 * with j and lies before j in the current dimension. 1982 * We collect all the pieces in a new list that replaces "scc". 1983 * 1984 * While the elements in "scc" should be disjoint, we double-check 1985 * this property to avoid running into an infinite recursion in case 1986 * they intersect due to some internal error. 1987 */ 1988static isl_stat add_nodes(__isl_take isl_basic_set_list *scc, void *user) 1989{ 1990 struct isl_add_nodes_data *data = user; 1991 int i; 1992 isl_size depth; 1993 isl_size n; 1994 isl_basic_set *bset, *first; 1995 isl_basic_set_list *list; 1996 isl_space *space; 1997 isl_basic_map *gt; 1998 1999 n = isl_basic_set_list_n_basic_set(scc); 2000 if (n < 0) 2001 goto error; 2002 bset = isl_basic_set_list_get_basic_set(scc, 0); 2003 if (n == 1) { 2004 isl_basic_set_list_free(scc); 2005 data->list = add_node(data->list, 2006 isl_union_map_copy(data->executed), bset, 2007 isl_ast_build_copy(data->build)); 2008 return data->list ? isl_stat_ok : isl_stat_error; 2009 } 2010 2011 depth = isl_ast_build_get_depth(data->build); 2012 if (depth < 0) 2013 bset = isl_basic_set_free(bset); 2014 space = isl_basic_set_get_space(bset); 2015 space = isl_space_map_from_set(space); 2016 gt = isl_basic_map_universe(space); 2017 for (i = 0; i < depth; ++i) 2018 gt = isl_basic_map_equate(gt, isl_dim_in, i, isl_dim_out, i); 2019 gt = isl_basic_map_order_gt(gt, isl_dim_in, depth, isl_dim_out, depth); 2020 2021 first = isl_basic_set_copy(bset); 2022 list = isl_basic_set_list_from_basic_set(bset); 2023 for (i = 1; i < n; ++i) { 2024 int disjoint; 2025 2026 bset = isl_basic_set_list_get_basic_set(scc, i); 2027 2028 disjoint = isl_basic_set_is_disjoint(bset, first); 2029 if (disjoint < 0) 2030 list = isl_basic_set_list_free(list); 2031 else if (!disjoint) 2032 isl_die(isl_basic_set_list_get_ctx(scc), 2033 isl_error_internal, 2034 "basic sets in scc are assumed to be disjoint", 2035 list = isl_basic_set_list_free(list)); 2036 2037 list = add_split_on(list, bset, gt); 2038 } 2039 isl_basic_set_free(first); 2040 isl_basic_map_free(gt); 2041 isl_basic_set_list_free(scc); 2042 scc = list; 2043 data->list = isl_ast_graft_list_concat(data->list, 2044 generate_sorted_domains(scc, data->executed, data->build)); 2045 isl_basic_set_list_free(scc); 2046 2047 return data->list ? isl_stat_ok : isl_stat_error; 2048error: 2049 isl_basic_set_list_free(scc); 2050 return isl_stat_error; 2051} 2052 2053/* Sort the domains in "domain_list" according to the execution order 2054 * at the current depth (for equal values of the outer dimensions), 2055 * generate code for each of them, collecting the results in a list. 2056 * If no code is generated (because the intersection of the inverse schedule 2057 * with the domains turns out to be empty), then an empty list is returned. 2058 * 2059 * The caller is responsible for ensuring that the basic sets in "domain_list" 2060 * are pair-wise disjoint. It can, however, in principle happen that 2061 * two basic sets should be ordered one way for one value of the outer 2062 * dimensions and the other way for some other value of the outer dimensions. 2063 * We therefore play safe and look for strongly connected components. 2064 * The function add_nodes takes care of handling non-trivial components. 2065 */ 2066static __isl_give isl_ast_graft_list *generate_sorted_domains( 2067 __isl_keep isl_basic_set_list *domain_list, 2068 __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build) 2069{ 2070 isl_ctx *ctx; 2071 struct isl_add_nodes_data data; 2072 isl_size depth; 2073 isl_size n; 2074 2075 n = isl_basic_set_list_n_basic_set(domain_list); 2076 if (n < 0) 2077 return NULL; 2078 2079 ctx = isl_basic_set_list_get_ctx(domain_list); 2080 data.list = isl_ast_graft_list_alloc(ctx, n); 2081 if (n == 0) 2082 return data.list; 2083 if (n == 1) 2084 return add_node(data.list, isl_union_map_copy(executed), 2085 isl_basic_set_list_get_basic_set(domain_list, 0), 2086 isl_ast_build_copy(build)); 2087 2088 depth = isl_ast_build_get_depth(build); 2089 data.executed = executed; 2090 data.build = build; 2091 if (depth < 0 || isl_basic_set_list_foreach_scc(domain_list, 2092 &domain_follows_at_depth, &depth, 2093 &add_nodes, &data) < 0) 2094 data.list = isl_ast_graft_list_free(data.list); 2095 2096 return data.list; 2097} 2098 2099/* Do i and j share any values for the outer dimensions? 2100 */ 2101static isl_bool shared_outer(__isl_keep isl_basic_set *i, 2102 __isl_keep isl_basic_set *j, void *user) 2103{ 2104 int depth = *(int *) user; 2105 isl_basic_map *test; 2106 isl_bool empty; 2107 int l; 2108 2109 test = isl_basic_map_from_domain_and_range(isl_basic_set_copy(i), 2110 isl_basic_set_copy(j)); 2111 for (l = 0; l < depth; ++l) 2112 test = isl_basic_map_equate(test, isl_dim_in, l, 2113 isl_dim_out, l); 2114 empty = isl_basic_map_is_empty(test); 2115 isl_basic_map_free(test); 2116 2117 return isl_bool_not(empty); 2118} 2119 2120/* Internal data structure for generate_sorted_domains_wrap. 2121 * 2122 * "n" is the total number of basic sets 2123 * "executed" and "build" are extra arguments to be passed 2124 * to generate_sorted_domains. 2125 * 2126 * "single" is set to 1 by generate_sorted_domains_wrap if there 2127 * is only a single component. 2128 * "list" collects the results. 2129 */ 2130struct isl_ast_generate_parallel_domains_data { 2131 isl_size n; 2132 isl_union_map *executed; 2133 isl_ast_build *build; 2134 2135 int single; 2136 isl_ast_graft_list *list; 2137}; 2138 2139/* Call generate_sorted_domains on "scc", fuse the result into a list 2140 * with either zero or one graft and collect the these single element 2141 * lists into data->list. 2142 * 2143 * If there is only one component, i.e., if the number of basic sets 2144 * in the current component is equal to the total number of basic sets, 2145 * then data->single is set to 1 and the result of generate_sorted_domains 2146 * is not fused. 2147 */ 2148static isl_stat generate_sorted_domains_wrap(__isl_take isl_basic_set_list *scc, 2149 void *user) 2150{ 2151 struct isl_ast_generate_parallel_domains_data *data = user; 2152 isl_ast_graft_list *list; 2153 isl_size n; 2154 2155 n = isl_basic_set_list_n_basic_set(scc); 2156 if (n < 0) 2157 scc = isl_basic_set_list_free(scc); 2158 list = generate_sorted_domains(scc, data->executed, data->build); 2159 data->single = n == data->n; 2160 if (!data->single) 2161 list = isl_ast_graft_list_fuse(list, data->build); 2162 if (!data->list) 2163 data->list = list; 2164 else 2165 data->list = isl_ast_graft_list_concat(data->list, list); 2166 2167 isl_basic_set_list_free(scc); 2168 if (!data->list) 2169 return isl_stat_error; 2170 2171 return isl_stat_ok; 2172} 2173 2174/* Look for any (weakly connected) components in the "domain_list" 2175 * of domains that share some values of the outer dimensions. 2176 * That is, domains in different components do not share any values 2177 * of the outer dimensions. This means that these components 2178 * can be freely reordered. 2179 * Within each of the components, we sort the domains according 2180 * to the execution order at the current depth. 2181 * 2182 * If there is more than one component, then generate_sorted_domains_wrap 2183 * fuses the result of each call to generate_sorted_domains 2184 * into a list with either zero or one graft and collects these (at most) 2185 * single element lists into a bigger list. This means that the elements of the 2186 * final list can be freely reordered. In particular, we sort them 2187 * according to an arbitrary but fixed ordering to ease merging of 2188 * graft lists from different components. 2189 */ 2190static __isl_give isl_ast_graft_list *generate_parallel_domains( 2191 __isl_keep isl_basic_set_list *domain_list, 2192 __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build) 2193{ 2194 isl_size depth; 2195 struct isl_ast_generate_parallel_domains_data data; 2196 2197 data.n = isl_basic_set_list_n_basic_set(domain_list); 2198 if (data.n < 0) 2199 return NULL; 2200 2201 if (data.n <= 1) 2202 return generate_sorted_domains(domain_list, executed, build); 2203 2204 depth = isl_ast_build_get_depth(build); 2205 if (depth < 0) 2206 return NULL; 2207 data.list = NULL; 2208 data.executed = executed; 2209 data.build = build; 2210 data.single = 0; 2211 if (isl_basic_set_list_foreach_scc(domain_list, &shared_outer, &depth, 2212 &generate_sorted_domains_wrap, 2213 &data) < 0) 2214 data.list = isl_ast_graft_list_free(data.list); 2215 2216 if (!data.single) 2217 data.list = isl_ast_graft_list_sort_guard(data.list); 2218 2219 return data.list; 2220} 2221 2222/* Internal data for separate_domain. 2223 * 2224 * "explicit" is set if we only want to use explicit bounds. 2225 * 2226 * "domain" collects the separated domains. 2227 */ 2228struct isl_separate_domain_data { 2229 isl_ast_build *build; 2230 int explicit; 2231 isl_set *domain; 2232}; 2233 2234/* Extract implicit bounds on the current dimension for the executed "map". 2235 * 2236 * The domain of "map" may involve inner dimensions, so we 2237 * need to eliminate them. 2238 */ 2239static __isl_give isl_set *implicit_bounds(__isl_take isl_map *map, 2240 __isl_keep isl_ast_build *build) 2241{ 2242 isl_set *domain; 2243 2244 domain = isl_map_domain(map); 2245 domain = isl_ast_build_eliminate(build, domain); 2246 2247 return domain; 2248} 2249 2250/* Extract explicit bounds on the current dimension for the executed "map". 2251 * 2252 * Rather than eliminating the inner dimensions as in implicit_bounds, 2253 * we simply drop any constraints involving those inner dimensions. 2254 * The idea is that most bounds that are implied by constraints on the 2255 * inner dimensions will be enforced by for loops and not by explicit guards. 2256 * There is then no need to separate along those bounds. 2257 */ 2258static __isl_give isl_set *explicit_bounds(__isl_take isl_map *map, 2259 __isl_keep isl_ast_build *build) 2260{ 2261 isl_set *domain; 2262 isl_size depth; 2263 isl_size dim; 2264 2265 depth = isl_ast_build_get_depth(build); 2266 dim = isl_map_dim(map, isl_dim_out); 2267 if (depth < 0 || dim < 0) 2268 return isl_map_domain(isl_map_free(map)); 2269 map = isl_map_drop_constraints_involving_dims(map, isl_dim_out, 0, dim); 2270 2271 domain = isl_map_domain(map); 2272 dim = isl_set_dim(domain, isl_dim_set); 2273 domain = isl_set_detect_equalities(domain); 2274 domain = isl_set_drop_constraints_involving_dims(domain, 2275 isl_dim_set, depth + 1, dim - (depth + 1)); 2276 domain = isl_set_remove_divs_involving_dims(domain, 2277 isl_dim_set, depth, 1); 2278 domain = isl_set_remove_unknown_divs(domain); 2279 2280 return domain; 2281} 2282 2283/* Split data->domain into pieces that intersect with the range of "map" 2284 * and pieces that do not intersect with the range of "map" 2285 * and then add that part of the range of "map" that does not intersect 2286 * with data->domain. 2287 */ 2288static isl_stat separate_domain(__isl_take isl_map *map, void *user) 2289{ 2290 struct isl_separate_domain_data *data = user; 2291 isl_set *domain; 2292 isl_set *d1, *d2; 2293 2294 if (data->explicit) 2295 domain = explicit_bounds(map, data->build); 2296 else 2297 domain = implicit_bounds(map, data->build); 2298 2299 domain = isl_set_coalesce(domain); 2300 domain = isl_set_make_disjoint(domain); 2301 d1 = isl_set_subtract(isl_set_copy(domain), isl_set_copy(data->domain)); 2302 d2 = isl_set_subtract(isl_set_copy(data->domain), isl_set_copy(domain)); 2303 data->domain = isl_set_intersect(data->domain, domain); 2304 data->domain = isl_set_union(data->domain, d1); 2305 data->domain = isl_set_union(data->domain, d2); 2306 2307 return isl_stat_ok; 2308} 2309 2310/* Separate the schedule domains of "executed". 2311 * 2312 * That is, break up the domain of "executed" into basic sets, 2313 * such that for each basic set S, every element in S is associated with 2314 * the same domain spaces. 2315 * 2316 * "space" is the (single) domain space of "executed". 2317 */ 2318static __isl_give isl_set *separate_schedule_domains( 2319 __isl_take isl_space *space, __isl_take isl_union_map *executed, 2320 __isl_keep isl_ast_build *build) 2321{ 2322 struct isl_separate_domain_data data = { build }; 2323 isl_ctx *ctx; 2324 2325 ctx = isl_ast_build_get_ctx(build); 2326 data.explicit = isl_options_get_ast_build_separation_bounds(ctx) == 2327 ISL_AST_BUILD_SEPARATION_BOUNDS_EXPLICIT; 2328 data.domain = isl_set_empty(space); 2329 if (isl_union_map_foreach_map(executed, &separate_domain, &data) < 0) 2330 data.domain = isl_set_free(data.domain); 2331 2332 isl_union_map_free(executed); 2333 return data.domain; 2334} 2335 2336/* Temporary data used during the search for a lower bound for unrolling. 2337 * 2338 * "build" is the build in which the unrolling will be performed 2339 * "domain" is the original set for which to find a lower bound 2340 * "depth" is the dimension for which to find a lower boudn 2341 * "expansion" is the expansion that needs to be applied to "domain" 2342 * in the unrolling that will be performed 2343 * 2344 * "lower" is the best lower bound found so far. It is NULL if we have not 2345 * found any yet. 2346 * "n" is the corresponding size. If lower is NULL, then the value of n 2347 * is undefined. 2348 * "n_div" is the maximal number of integer divisions in the first 2349 * unrolled iteration (after expansion). It is set to -1 if it hasn't 2350 * been computed yet. 2351 */ 2352struct isl_find_unroll_data { 2353 isl_ast_build *build; 2354 isl_set *domain; 2355 int depth; 2356 isl_basic_map *expansion; 2357 2358 isl_aff *lower; 2359 int *n; 2360 int n_div; 2361}; 2362 2363/* Return the constraint 2364 * 2365 * i_"depth" = aff + offset 2366 */ 2367static __isl_give isl_constraint *at_offset(int depth, __isl_keep isl_aff *aff, 2368 int offset) 2369{ 2370 aff = isl_aff_copy(aff); 2371 aff = isl_aff_add_coefficient_si(aff, isl_dim_in, depth, -1); 2372 aff = isl_aff_add_constant_si(aff, offset); 2373 return isl_equality_from_aff(aff); 2374} 2375 2376/* Update *user to the number of integer divisions in the first element 2377 * of "ma", if it is larger than the current value. 2378 */ 2379static isl_stat update_n_div(__isl_take isl_set *set, 2380 __isl_take isl_multi_aff *ma, void *user) 2381{ 2382 isl_aff *aff; 2383 int *n = user; 2384 isl_size n_div; 2385 2386 aff = isl_multi_aff_get_aff(ma, 0); 2387 n_div = isl_aff_dim(aff, isl_dim_div); 2388 isl_aff_free(aff); 2389 isl_multi_aff_free(ma); 2390 isl_set_free(set); 2391 2392 if (n_div > *n) 2393 *n = n_div; 2394 2395 return n_div >= 0 ? isl_stat_ok : isl_stat_error; 2396} 2397 2398/* Get the number of integer divisions in the expression for the iterator 2399 * value at the first slice in the unrolling based on lower bound "lower", 2400 * taking into account the expansion that needs to be performed on this slice. 2401 */ 2402static int get_expanded_n_div(struct isl_find_unroll_data *data, 2403 __isl_keep isl_aff *lower) 2404{ 2405 isl_constraint *c; 2406 isl_set *set; 2407 isl_map *it_map, *expansion; 2408 isl_pw_multi_aff *pma; 2409 int n; 2410 2411 c = at_offset(data->depth, lower, 0); 2412 set = isl_set_copy(data->domain); 2413 set = isl_set_add_constraint(set, c); 2414 expansion = isl_map_from_basic_map(isl_basic_map_copy(data->expansion)); 2415 set = isl_set_apply(set, expansion); 2416 it_map = isl_ast_build_map_to_iterator(data->build, set); 2417 pma = isl_pw_multi_aff_from_map(it_map); 2418 n = 0; 2419 if (isl_pw_multi_aff_foreach_piece(pma, &update_n_div, &n) < 0) 2420 n = -1; 2421 isl_pw_multi_aff_free(pma); 2422 2423 return n; 2424} 2425 2426/* Is the lower bound "lower" with corresponding iteration count "n" 2427 * better than the one stored in "data"? 2428 * If there is no upper bound on the iteration count ("n" is infinity) or 2429 * if the count is too large, then we cannot use this lower bound. 2430 * Otherwise, if there was no previous lower bound or 2431 * if the iteration count of the new lower bound is smaller than 2432 * the iteration count of the previous lower bound, then we consider 2433 * the new lower bound to be better. 2434 * If the iteration count is the same, then compare the number 2435 * of integer divisions that would be needed to express 2436 * the iterator value at the first slice in the unrolling 2437 * according to the lower bound. If we end up computing this 2438 * number, then store the lowest value in data->n_div. 2439 */ 2440static int is_better_lower_bound(struct isl_find_unroll_data *data, 2441 __isl_keep isl_aff *lower, __isl_keep isl_val *n) 2442{ 2443 int cmp; 2444 int n_div; 2445 2446 if (!n) 2447 return -1; 2448 if (isl_val_is_infty(n)) 2449 return 0; 2450 if (isl_val_cmp_si(n, INT_MAX) > 0) 2451 return 0; 2452 if (!data->lower) 2453 return 1; 2454 cmp = isl_val_cmp_si(n, *data->n); 2455 if (cmp < 0) 2456 return 1; 2457 if (cmp > 0) 2458 return 0; 2459 if (data->n_div < 0) 2460 data->n_div = get_expanded_n_div(data, data->lower); 2461 if (data->n_div < 0) 2462 return -1; 2463 if (data->n_div == 0) 2464 return 0; 2465 n_div = get_expanded_n_div(data, lower); 2466 if (n_div < 0) 2467 return -1; 2468 if (n_div >= data->n_div) 2469 return 0; 2470 data->n_div = n_div; 2471 2472 return 1; 2473} 2474 2475/* Check if we can use "c" as a lower bound and if it is better than 2476 * any previously found lower bound. 2477 * 2478 * If "c" does not involve the dimension at the current depth, 2479 * then we cannot use it. 2480 * Otherwise, let "c" be of the form 2481 * 2482 * i >= f(j)/a 2483 * 2484 * We compute the maximal value of 2485 * 2486 * -ceil(f(j)/a)) + i + 1 2487 * 2488 * over the domain. If there is such a value "n", then we know 2489 * 2490 * -ceil(f(j)/a)) + i + 1 <= n 2491 * 2492 * or 2493 * 2494 * i < ceil(f(j)/a)) + n 2495 * 2496 * meaning that we can use ceil(f(j)/a)) as a lower bound for unrolling. 2497 * We just need to check if we have found any lower bound before and 2498 * if the new lower bound is better (smaller n or fewer integer divisions) 2499 * than the previously found lower bounds. 2500 */ 2501static isl_stat update_unrolling_lower_bound(struct isl_find_unroll_data *data, 2502 __isl_keep isl_constraint *c) 2503{ 2504 isl_aff *aff, *lower; 2505 isl_val *max; 2506 int better; 2507 2508 if (!isl_constraint_is_lower_bound(c, isl_dim_set, data->depth)) 2509 return isl_stat_ok; 2510 2511 lower = isl_constraint_get_bound(c, isl_dim_set, data->depth); 2512 lower = isl_aff_ceil(lower); 2513 aff = isl_aff_copy(lower); 2514 aff = isl_aff_neg(aff); 2515 aff = isl_aff_add_coefficient_si(aff, isl_dim_in, data->depth, 1); 2516 aff = isl_aff_add_constant_si(aff, 1); 2517 max = isl_set_max_val(data->domain, aff); 2518 isl_aff_free(aff); 2519 2520 better = is_better_lower_bound(data, lower, max); 2521 if (better < 0 || !better) { 2522 isl_val_free(max); 2523 isl_aff_free(lower); 2524 return better < 0 ? isl_stat_error : isl_stat_ok; 2525 } 2526 2527 isl_aff_free(data->lower); 2528 data->lower = lower; 2529 *data->n = isl_val_get_num_si(max); 2530 isl_val_free(max); 2531 2532 return isl_stat_ok; 2533} 2534 2535/* Check if we can use "c" as a lower bound and if it is better than 2536 * any previously found lower bound. 2537 */ 2538static isl_stat constraint_find_unroll(__isl_take isl_constraint *c, void *user) 2539{ 2540 struct isl_find_unroll_data *data; 2541 isl_stat r; 2542 2543 data = (struct isl_find_unroll_data *) user; 2544 r = update_unrolling_lower_bound(data, c); 2545 isl_constraint_free(c); 2546 2547 return r; 2548} 2549 2550/* Look for a lower bound l(i) on the dimension at "depth" 2551 * and a size n such that "domain" is a subset of 2552 * 2553 * { [i] : l(i) <= i_d < l(i) + n } 2554 * 2555 * where d is "depth" and l(i) depends only on earlier dimensions. 2556 * Furthermore, try and find a lower bound such that n is as small as possible. 2557 * In particular, "n" needs to be finite. 2558 * "build" is the build in which the unrolling will be performed. 2559 * "expansion" is the expansion that needs to be applied to "domain" 2560 * in the unrolling that will be performed. 2561 * 2562 * Inner dimensions have been eliminated from "domain" by the caller. 2563 * 2564 * We first construct a collection of lower bounds on the input set 2565 * by computing its simple hull. We then iterate through them, 2566 * discarding those that we cannot use (either because they do not 2567 * involve the dimension at "depth" or because they have no corresponding 2568 * upper bound, meaning that "n" would be unbounded) and pick out the 2569 * best from the remaining ones. 2570 * 2571 * If we cannot find a suitable lower bound, then we consider that 2572 * to be an error. 2573 */ 2574static __isl_give isl_aff *find_unroll_lower_bound( 2575 __isl_keep isl_ast_build *build, __isl_keep isl_set *domain, 2576 int depth, __isl_keep isl_basic_map *expansion, int *n) 2577{ 2578 struct isl_find_unroll_data data = 2579 { build, domain, depth, expansion, NULL, n, -1 }; 2580 isl_basic_set *hull; 2581 2582 hull = isl_set_simple_hull(isl_set_copy(domain)); 2583 2584 if (isl_basic_set_foreach_constraint(hull, 2585 &constraint_find_unroll, &data) < 0) 2586 goto error; 2587 2588 isl_basic_set_free(hull); 2589 2590 if (!data.lower) 2591 isl_die(isl_set_get_ctx(domain), isl_error_invalid, 2592 "cannot find lower bound for unrolling", return NULL); 2593 2594 return data.lower; 2595error: 2596 isl_basic_set_free(hull); 2597 return isl_aff_free(data.lower); 2598} 2599 2600/* Call "fn" on each iteration of the current dimension of "domain". 2601 * If "init" is not NULL, then it is called with the number of 2602 * iterations before any call to "fn". 2603 * Return -1 on failure. 2604 * 2605 * Since we are going to be iterating over the individual values, 2606 * we first check if there are any strides on the current dimension. 2607 * If there is, we rewrite the current dimension i as 2608 * 2609 * i = stride i' + offset 2610 * 2611 * and then iterate over individual values of i' instead. 2612 * 2613 * We then look for a lower bound on i' and a size such that the domain 2614 * is a subset of 2615 * 2616 * { [j,i'] : l(j) <= i' < l(j) + n } 2617 * 2618 * and then take slices of the domain at values of i' 2619 * between l(j) and l(j) + n - 1. 2620 * 2621 * We compute the unshifted simple hull of each slice to ensure that 2622 * we have a single basic set per offset. The slicing constraint 2623 * may get simplified away before the unshifted simple hull is taken 2624 * and may therefore in some rare cases disappear from the result. 2625 * We therefore explicitly add the constraint back after computing 2626 * the unshifted simple hull to ensure that the basic sets 2627 * remain disjoint. The constraints that are dropped by taking the hull 2628 * will be taken into account at the next level, as in the case of the 2629 * atomic option. 2630 * 2631 * Finally, we map i' back to i and call "fn". 2632 */ 2633static int foreach_iteration(__isl_take isl_set *domain, 2634 __isl_keep isl_ast_build *build, int (*init)(int n, void *user), 2635 int (*fn)(__isl_take isl_basic_set *bset, void *user), void *user) 2636{ 2637 int i, n; 2638 isl_bool empty; 2639 isl_size depth; 2640 isl_multi_aff *expansion; 2641 isl_basic_map *bmap; 2642 isl_aff *lower = NULL; 2643 isl_ast_build *stride_build; 2644 2645 depth = isl_ast_build_get_depth(build); 2646 if (depth < 0) 2647 domain = isl_set_free(domain); 2648 2649 domain = isl_ast_build_eliminate_inner(build, domain); 2650 domain = isl_set_intersect(domain, isl_ast_build_get_domain(build)); 2651 stride_build = isl_ast_build_copy(build); 2652 stride_build = isl_ast_build_detect_strides(stride_build, 2653 isl_set_copy(domain)); 2654 expansion = isl_ast_build_get_stride_expansion(stride_build); 2655 2656 domain = isl_set_preimage_multi_aff(domain, 2657 isl_multi_aff_copy(expansion)); 2658 domain = isl_ast_build_eliminate_divs(stride_build, domain); 2659 isl_ast_build_free(stride_build); 2660 2661 bmap = isl_basic_map_from_multi_aff(expansion); 2662 2663 empty = isl_set_is_empty(domain); 2664 if (empty < 0) { 2665 n = -1; 2666 } else if (empty) { 2667 n = 0; 2668 } else { 2669 lower = find_unroll_lower_bound(build, domain, depth, bmap, &n); 2670 if (!lower) 2671 n = -1; 2672 } 2673 if (n >= 0 && init && init(n, user) < 0) 2674 n = -1; 2675 for (i = 0; i < n; ++i) { 2676 isl_set *set; 2677 isl_basic_set *bset; 2678 isl_constraint *slice; 2679 2680 slice = at_offset(depth, lower, i); 2681 set = isl_set_copy(domain); 2682 set = isl_set_add_constraint(set, isl_constraint_copy(slice)); 2683 bset = isl_set_unshifted_simple_hull(set); 2684 bset = isl_basic_set_add_constraint(bset, slice); 2685 bset = isl_basic_set_apply(bset, isl_basic_map_copy(bmap)); 2686 2687 if (fn(bset, user) < 0) 2688 break; 2689 } 2690 2691 isl_aff_free(lower); 2692 isl_set_free(domain); 2693 isl_basic_map_free(bmap); 2694 2695 return n < 0 || i < n ? -1 : 0; 2696} 2697 2698/* Data structure for storing the results and the intermediate objects 2699 * of compute_domains. 2700 * 2701 * "list" is the main result of the function and contains a list 2702 * of disjoint basic sets for which code should be generated. 2703 * 2704 * "executed" and "build" are inputs to compute_domains. 2705 * "schedule_domain" is the domain of "executed". 2706 * 2707 * "option" contains the domains at the current depth that should by 2708 * atomic, separated or unrolled. These domains are as specified by 2709 * the user, except that inner dimensions have been eliminated and 2710 * that they have been made pair-wise disjoint. 2711 * 2712 * "sep_class" contains the user-specified split into separation classes 2713 * specialized to the current depth. 2714 * "done" contains the union of the separation domains that have already 2715 * been handled. 2716 */ 2717struct isl_codegen_domains { 2718 isl_basic_set_list *list; 2719 2720 isl_union_map *executed; 2721 isl_ast_build *build; 2722 isl_set *schedule_domain; 2723 2724 isl_set *option[4]; 2725 2726 isl_map *sep_class; 2727 isl_set *done; 2728}; 2729 2730/* Internal data structure for do_unroll. 2731 * 2732 * "domains" stores the results of compute_domains. 2733 * "class_domain" is the original class domain passed to do_unroll. 2734 * "unroll_domain" collects the unrolled iterations. 2735 */ 2736struct isl_ast_unroll_data { 2737 struct isl_codegen_domains *domains; 2738 isl_set *class_domain; 2739 isl_set *unroll_domain; 2740}; 2741 2742/* Given an iteration of an unrolled domain represented by "bset", 2743 * add it to data->domains->list. 2744 * Since we may have dropped some constraints, we intersect with 2745 * the class domain again to ensure that each element in the list 2746 * is disjoint from the other class domains. 2747 */ 2748static int do_unroll_iteration(__isl_take isl_basic_set *bset, void *user) 2749{ 2750 struct isl_ast_unroll_data *data = user; 2751 isl_set *set; 2752 isl_basic_set_list *list; 2753 2754 set = isl_set_from_basic_set(bset); 2755 data->unroll_domain = isl_set_union(data->unroll_domain, 2756 isl_set_copy(set)); 2757 set = isl_set_intersect(set, isl_set_copy(data->class_domain)); 2758 set = isl_set_make_disjoint(set); 2759 list = isl_basic_set_list_from_set(set); 2760 data->domains->list = isl_basic_set_list_concat(data->domains->list, 2761 list); 2762 2763 return 0; 2764} 2765 2766/* Extend domains->list with a list of basic sets, one for each value 2767 * of the current dimension in "domain" and remove the corresponding 2768 * sets from the class domain. Return the updated class domain. 2769 * The divs that involve the current dimension have not been projected out 2770 * from this domain. 2771 * 2772 * We call foreach_iteration to iterate over the individual values and 2773 * in do_unroll_iteration we collect the individual basic sets in 2774 * domains->list and their union in data->unroll_domain, which is then 2775 * used to update the class domain. 2776 */ 2777static __isl_give isl_set *do_unroll(struct isl_codegen_domains *domains, 2778 __isl_take isl_set *domain, __isl_take isl_set *class_domain) 2779{ 2780 struct isl_ast_unroll_data data; 2781 2782 if (!domain) 2783 return isl_set_free(class_domain); 2784 if (!class_domain) 2785 return isl_set_free(domain); 2786 2787 data.domains = domains; 2788 data.class_domain = class_domain; 2789 data.unroll_domain = isl_set_empty(isl_set_get_space(domain)); 2790 2791 if (foreach_iteration(domain, domains->build, NULL, 2792 &do_unroll_iteration, &data) < 0) 2793 data.unroll_domain = isl_set_free(data.unroll_domain); 2794 2795 class_domain = isl_set_subtract(class_domain, data.unroll_domain); 2796 2797 return class_domain; 2798} 2799 2800/* Add domains to domains->list for each individual value of the current 2801 * dimension, for that part of the schedule domain that lies in the 2802 * intersection of the option domain and the class domain. 2803 * Remove the corresponding sets from the class domain and 2804 * return the updated class domain. 2805 * 2806 * We first break up the unroll option domain into individual pieces 2807 * and then handle each of them separately. The unroll option domain 2808 * has been made disjoint in compute_domains_init_options, 2809 * 2810 * Note that we actively want to combine different pieces of the 2811 * schedule domain that have the same value at the current dimension. 2812 * We therefore need to break up the unroll option domain before 2813 * intersecting with class and schedule domain, hoping that the 2814 * unroll option domain specified by the user is relatively simple. 2815 */ 2816static __isl_give isl_set *compute_unroll_domains( 2817 struct isl_codegen_domains *domains, __isl_take isl_set *class_domain) 2818{ 2819 isl_set *unroll_domain; 2820 isl_basic_set_list *unroll_list; 2821 int i; 2822 isl_size n; 2823 isl_bool empty; 2824 2825 empty = isl_set_is_empty(domains->option[isl_ast_loop_unroll]); 2826 if (empty < 0) 2827 return isl_set_free(class_domain); 2828 if (empty) 2829 return class_domain; 2830 2831 unroll_domain = isl_set_copy(domains->option[isl_ast_loop_unroll]); 2832 unroll_list = isl_basic_set_list_from_set(unroll_domain); 2833 2834 n = isl_basic_set_list_n_basic_set(unroll_list); 2835 if (n < 0) 2836 class_domain = isl_set_free(class_domain); 2837 for (i = 0; i < n; ++i) { 2838 isl_basic_set *bset; 2839 2840 bset = isl_basic_set_list_get_basic_set(unroll_list, i); 2841 unroll_domain = isl_set_from_basic_set(bset); 2842 unroll_domain = isl_set_intersect(unroll_domain, 2843 isl_set_copy(class_domain)); 2844 unroll_domain = isl_set_intersect(unroll_domain, 2845 isl_set_copy(domains->schedule_domain)); 2846 2847 empty = isl_set_is_empty(unroll_domain); 2848 if (empty >= 0 && empty) { 2849 isl_set_free(unroll_domain); 2850 continue; 2851 } 2852 2853 class_domain = do_unroll(domains, unroll_domain, class_domain); 2854 } 2855 2856 isl_basic_set_list_free(unroll_list); 2857 2858 return class_domain; 2859} 2860 2861/* Try and construct a single basic set that includes the intersection of 2862 * the schedule domain, the atomic option domain and the class domain. 2863 * Add the resulting basic set(s) to domains->list and remove them 2864 * from class_domain. Return the updated class domain. 2865 * 2866 * We construct a single domain rather than trying to combine 2867 * the schedule domains of individual domains because we are working 2868 * within a single component so that non-overlapping schedule domains 2869 * should already have been separated. 2870 * We do however need to make sure that this single domains is a subset 2871 * of the class domain so that it would not intersect with any other 2872 * class domains. This means that we may end up splitting up the atomic 2873 * domain in case separation classes are being used. 2874 * 2875 * "domain" is the intersection of the schedule domain and the class domain, 2876 * with inner dimensions projected out. 2877 */ 2878static __isl_give isl_set *compute_atomic_domain( 2879 struct isl_codegen_domains *domains, __isl_take isl_set *class_domain) 2880{ 2881 isl_basic_set *bset; 2882 isl_basic_set_list *list; 2883 isl_set *domain, *atomic_domain; 2884 int empty; 2885 2886 domain = isl_set_copy(domains->option[isl_ast_loop_atomic]); 2887 domain = isl_set_intersect(domain, isl_set_copy(class_domain)); 2888 domain = isl_set_intersect(domain, 2889 isl_set_copy(domains->schedule_domain)); 2890 empty = isl_set_is_empty(domain); 2891 if (empty < 0) 2892 class_domain = isl_set_free(class_domain); 2893 if (empty) { 2894 isl_set_free(domain); 2895 return class_domain; 2896 } 2897 2898 domain = isl_ast_build_eliminate(domains->build, domain); 2899 domain = isl_set_coalesce_preserve(domain); 2900 bset = isl_set_unshifted_simple_hull(domain); 2901 domain = isl_set_from_basic_set(bset); 2902 atomic_domain = isl_set_copy(domain); 2903 domain = isl_set_intersect(domain, isl_set_copy(class_domain)); 2904 class_domain = isl_set_subtract(class_domain, atomic_domain); 2905 domain = isl_set_make_disjoint(domain); 2906 list = isl_basic_set_list_from_set(domain); 2907 domains->list = isl_basic_set_list_concat(domains->list, list); 2908 2909 return class_domain; 2910} 2911 2912/* Split up the schedule domain into uniform basic sets, 2913 * in the sense that each element in a basic set is associated to 2914 * elements of the same domains, and add the result to domains->list. 2915 * Do this for that part of the schedule domain that lies in the 2916 * intersection of "class_domain" and the separate option domain. 2917 * 2918 * "class_domain" may or may not include the constraints 2919 * of the schedule domain, but this does not make a difference 2920 * since we are going to intersect it with the domain of the inverse schedule. 2921 * If it includes schedule domain constraints, then they may involve 2922 * inner dimensions, but we will eliminate them in separation_domain. 2923 */ 2924static int compute_separate_domain(struct isl_codegen_domains *domains, 2925 __isl_keep isl_set *class_domain) 2926{ 2927 isl_space *space; 2928 isl_set *domain; 2929 isl_union_map *executed; 2930 isl_basic_set_list *list; 2931 int empty; 2932 2933 domain = isl_set_copy(domains->option[isl_ast_loop_separate]); 2934 domain = isl_set_intersect(domain, isl_set_copy(class_domain)); 2935 executed = isl_union_map_copy(domains->executed); 2936 executed = isl_union_map_intersect_domain(executed, 2937 isl_union_set_from_set(domain)); 2938 empty = isl_union_map_is_empty(executed); 2939 if (empty < 0 || empty) { 2940 isl_union_map_free(executed); 2941 return empty < 0 ? -1 : 0; 2942 } 2943 2944 space = isl_set_get_space(class_domain); 2945 domain = separate_schedule_domains(space, executed, domains->build); 2946 2947 list = isl_basic_set_list_from_set(domain); 2948 domains->list = isl_basic_set_list_concat(domains->list, list); 2949 2950 return 0; 2951} 2952 2953/* Split up the domain at the current depth into disjoint 2954 * basic sets for which code should be generated separately 2955 * for the given separation class domain. 2956 * 2957 * If any separation classes have been defined, then "class_domain" 2958 * is the domain of the current class and does not refer to inner dimensions. 2959 * Otherwise, "class_domain" is the universe domain. 2960 * 2961 * We first make sure that the class domain is disjoint from 2962 * previously considered class domains. 2963 * 2964 * The separate domains can be computed directly from the "class_domain". 2965 * 2966 * The unroll, atomic and remainder domains need the constraints 2967 * from the schedule domain. 2968 * 2969 * For unrolling, the actual schedule domain is needed (with divs that 2970 * may refer to the current dimension) so that stride detection can be 2971 * performed. 2972 * 2973 * For atomic and remainder domains, inner dimensions and divs involving 2974 * the current dimensions should be eliminated. 2975 * In case we are working within a separation class, we need to intersect 2976 * the result with the current "class_domain" to ensure that the domains 2977 * are disjoint from those generated from other class domains. 2978 * 2979 * The domain that has been made atomic may be larger than specified 2980 * by the user since it needs to be representable as a single basic set. 2981 * This possibly larger domain is removed from class_domain by 2982 * compute_atomic_domain. It is computed first so that the extended domain 2983 * would not overlap with any domains computed before. 2984 * Similary, the unrolled domains may have some constraints removed and 2985 * may therefore also be larger than specified by the user. 2986 * 2987 * If anything is left after handling separate, unroll and atomic, 2988 * we split it up into basic sets and append the basic sets to domains->list. 2989 */ 2990static isl_stat compute_partial_domains(struct isl_codegen_domains *domains, 2991 __isl_take isl_set *class_domain) 2992{ 2993 isl_basic_set_list *list; 2994 isl_set *domain; 2995 2996 class_domain = isl_set_subtract(class_domain, 2997 isl_set_copy(domains->done)); 2998 domains->done = isl_set_union(domains->done, 2999 isl_set_copy(class_domain)); 3000 3001 class_domain = compute_atomic_domain(domains, class_domain); 3002 class_domain = compute_unroll_domains(domains, class_domain); 3003 3004 domain = isl_set_copy(class_domain); 3005 3006 if (compute_separate_domain(domains, domain) < 0) 3007 goto error; 3008 domain = isl_set_subtract(domain, 3009 isl_set_copy(domains->option[isl_ast_loop_separate])); 3010 3011 domain = isl_set_intersect(domain, 3012 isl_set_copy(domains->schedule_domain)); 3013 3014 domain = isl_ast_build_eliminate(domains->build, domain); 3015 domain = isl_set_intersect(domain, isl_set_copy(class_domain)); 3016 3017 domain = isl_set_coalesce_preserve(domain); 3018 domain = isl_set_make_disjoint(domain); 3019 3020 list = isl_basic_set_list_from_set(domain); 3021 domains->list = isl_basic_set_list_concat(domains->list, list); 3022 3023 isl_set_free(class_domain); 3024 3025 return isl_stat_ok; 3026error: 3027 isl_set_free(domain); 3028 isl_set_free(class_domain); 3029 return isl_stat_error; 3030} 3031 3032/* Split up the domain at the current depth into disjoint 3033 * basic sets for which code should be generated separately 3034 * for the separation class identified by "pnt". 3035 * 3036 * We extract the corresponding class domain from domains->sep_class, 3037 * eliminate inner dimensions and pass control to compute_partial_domains. 3038 */ 3039static isl_stat compute_class_domains(__isl_take isl_point *pnt, void *user) 3040{ 3041 struct isl_codegen_domains *domains = user; 3042 isl_set *class_set; 3043 isl_set *domain; 3044 int disjoint; 3045 3046 class_set = isl_set_from_point(pnt); 3047 domain = isl_map_domain(isl_map_intersect_range( 3048 isl_map_copy(domains->sep_class), class_set)); 3049 domain = isl_ast_build_compute_gist(domains->build, domain); 3050 domain = isl_ast_build_eliminate(domains->build, domain); 3051 3052 disjoint = isl_set_plain_is_disjoint(domain, domains->schedule_domain); 3053 if (disjoint < 0) 3054 return isl_stat_error; 3055 if (disjoint) { 3056 isl_set_free(domain); 3057 return isl_stat_ok; 3058 } 3059 3060 return compute_partial_domains(domains, domain); 3061} 3062 3063/* Extract the domains at the current depth that should be atomic, 3064 * separated or unrolled and store them in option. 3065 * 3066 * The domains specified by the user might overlap, so we make 3067 * them disjoint by subtracting earlier domains from later domains. 3068 */ 3069static void compute_domains_init_options(isl_set *option[4], 3070 __isl_keep isl_ast_build *build) 3071{ 3072 enum isl_ast_loop_type type, type2; 3073 isl_set *unroll; 3074 3075 for (type = isl_ast_loop_atomic; 3076 type <= isl_ast_loop_separate; ++type) { 3077 option[type] = isl_ast_build_get_option_domain(build, type); 3078 for (type2 = isl_ast_loop_atomic; type2 < type; ++type2) 3079 option[type] = isl_set_subtract(option[type], 3080 isl_set_copy(option[type2])); 3081 } 3082 3083 unroll = option[isl_ast_loop_unroll]; 3084 unroll = isl_set_coalesce(unroll); 3085 unroll = isl_set_make_disjoint(unroll); 3086 option[isl_ast_loop_unroll] = unroll; 3087} 3088 3089/* Split up the domain at the current depth into disjoint 3090 * basic sets for which code should be generated separately, 3091 * based on the user-specified options. 3092 * Return the list of disjoint basic sets. 3093 * 3094 * There are three kinds of domains that we need to keep track of. 3095 * - the "schedule domain" is the domain of "executed" 3096 * - the "class domain" is the domain corresponding to the currrent 3097 * separation class 3098 * - the "option domain" is the domain corresponding to one of the options 3099 * atomic, unroll or separate 3100 * 3101 * We first consider the individial values of the separation classes 3102 * and split up the domain for each of them separately. 3103 * Finally, we consider the remainder. If no separation classes were 3104 * specified, then we call compute_partial_domains with the universe 3105 * "class_domain". Otherwise, we take the "schedule_domain" as "class_domain", 3106 * with inner dimensions removed. We do this because we want to 3107 * avoid computing the complement of the class domains (i.e., the difference 3108 * between the universe and domains->done). 3109 */ 3110static __isl_give isl_basic_set_list *compute_domains( 3111 __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build) 3112{ 3113 struct isl_codegen_domains domains; 3114 isl_ctx *ctx; 3115 isl_set *domain; 3116 isl_union_set *schedule_domain; 3117 isl_set *classes; 3118 isl_space *space; 3119 int n_param; 3120 enum isl_ast_loop_type type; 3121 isl_bool empty; 3122 3123 if (!executed) 3124 return NULL; 3125 3126 ctx = isl_union_map_get_ctx(executed); 3127 domains.list = isl_basic_set_list_alloc(ctx, 0); 3128 3129 schedule_domain = isl_union_map_domain(isl_union_map_copy(executed)); 3130 domain = isl_set_from_union_set(schedule_domain); 3131 3132 compute_domains_init_options(domains.option, build); 3133 3134 domains.sep_class = isl_ast_build_get_separation_class(build); 3135 classes = isl_map_range(isl_map_copy(domains.sep_class)); 3136 n_param = isl_set_dim(classes, isl_dim_param); 3137 if (n_param < 0) 3138 classes = isl_set_free(classes); 3139 classes = isl_set_project_out(classes, isl_dim_param, 0, n_param); 3140 3141 space = isl_set_get_space(domain); 3142 domains.build = build; 3143 domains.schedule_domain = isl_set_copy(domain); 3144 domains.executed = executed; 3145 domains.done = isl_set_empty(space); 3146 3147 if (isl_set_foreach_point(classes, &compute_class_domains, &domains) < 0) 3148 domains.list = isl_basic_set_list_free(domains.list); 3149 isl_set_free(classes); 3150 3151 empty = isl_set_is_empty(domains.done); 3152 if (empty < 0) { 3153 domains.list = isl_basic_set_list_free(domains.list); 3154 domain = isl_set_free(domain); 3155 } else if (empty) { 3156 isl_set_free(domain); 3157 domain = isl_set_universe(isl_set_get_space(domains.done)); 3158 } else { 3159 domain = isl_ast_build_eliminate(build, domain); 3160 } 3161 if (compute_partial_domains(&domains, domain) < 0) 3162 domains.list = isl_basic_set_list_free(domains.list); 3163 3164 isl_set_free(domains.schedule_domain); 3165 isl_set_free(domains.done); 3166 isl_map_free(domains.sep_class); 3167 for (type = isl_ast_loop_atomic; type <= isl_ast_loop_separate; ++type) 3168 isl_set_free(domains.option[type]); 3169 3170 return domains.list; 3171} 3172 3173/* Generate code for a single component, after shifting (if any) 3174 * has been applied, in case the schedule was specified as a union map. 3175 * 3176 * We first split up the domain at the current depth into disjoint 3177 * basic sets based on the user-specified options. 3178 * Then we generated code for each of them and concatenate the results. 3179 */ 3180static __isl_give isl_ast_graft_list *generate_shifted_component_flat( 3181 __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) 3182{ 3183 isl_basic_set_list *domain_list; 3184 isl_ast_graft_list *list = NULL; 3185 3186 domain_list = compute_domains(executed, build); 3187 list = generate_parallel_domains(domain_list, executed, build); 3188 3189 isl_basic_set_list_free(domain_list); 3190 isl_union_map_free(executed); 3191 isl_ast_build_free(build); 3192 3193 return list; 3194} 3195 3196/* Generate code for a single component, after shifting (if any) 3197 * has been applied, in case the schedule was specified as a schedule tree 3198 * and the separate option was specified. 3199 * 3200 * We perform separation on the domain of "executed" and then generate 3201 * an AST for each of the resulting disjoint basic sets. 3202 */ 3203static __isl_give isl_ast_graft_list *generate_shifted_component_tree_separate( 3204 __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) 3205{ 3206 isl_space *space; 3207 isl_set *domain; 3208 isl_basic_set_list *domain_list; 3209 isl_ast_graft_list *list; 3210 3211 space = isl_ast_build_get_space(build, 1); 3212 domain = separate_schedule_domains(space, 3213 isl_union_map_copy(executed), build); 3214 domain_list = isl_basic_set_list_from_set(domain); 3215 3216 list = generate_parallel_domains(domain_list, executed, build); 3217 3218 isl_basic_set_list_free(domain_list); 3219 isl_union_map_free(executed); 3220 isl_ast_build_free(build); 3221 3222 return list; 3223} 3224 3225/* Internal data structure for generate_shifted_component_tree_unroll. 3226 * 3227 * "executed" and "build" are inputs to generate_shifted_component_tree_unroll. 3228 * "list" collects the constructs grafts. 3229 */ 3230struct isl_ast_unroll_tree_data { 3231 isl_union_map *executed; 3232 isl_ast_build *build; 3233 isl_ast_graft_list *list; 3234}; 3235 3236/* Initialize data->list to a list of "n" elements. 3237 */ 3238static int init_unroll_tree(int n, void *user) 3239{ 3240 struct isl_ast_unroll_tree_data *data = user; 3241 isl_ctx *ctx; 3242 3243 ctx = isl_ast_build_get_ctx(data->build); 3244 data->list = isl_ast_graft_list_alloc(ctx, n); 3245 3246 return 0; 3247} 3248 3249/* Given an iteration of an unrolled domain represented by "bset", 3250 * generate the corresponding AST and add the result to data->list. 3251 */ 3252static int do_unroll_tree_iteration(__isl_take isl_basic_set *bset, void *user) 3253{ 3254 struct isl_ast_unroll_tree_data *data = user; 3255 3256 data->list = add_node(data->list, isl_union_map_copy(data->executed), 3257 bset, isl_ast_build_copy(data->build)); 3258 3259 return 0; 3260} 3261 3262/* Generate code for a single component, after shifting (if any) 3263 * has been applied, in case the schedule was specified as a schedule tree 3264 * and the unroll option was specified. 3265 * 3266 * We call foreach_iteration to iterate over the individual values and 3267 * construct and collect the corresponding grafts in do_unroll_tree_iteration. 3268 */ 3269static __isl_give isl_ast_graft_list *generate_shifted_component_tree_unroll( 3270 __isl_take isl_union_map *executed, __isl_take isl_set *domain, 3271 __isl_take isl_ast_build *build) 3272{ 3273 struct isl_ast_unroll_tree_data data = { executed, build, NULL }; 3274 3275 if (foreach_iteration(domain, build, &init_unroll_tree, 3276 &do_unroll_tree_iteration, &data) < 0) 3277 data.list = isl_ast_graft_list_free(data.list); 3278 3279 isl_union_map_free(executed); 3280 isl_ast_build_free(build); 3281 3282 return data.list; 3283} 3284 3285/* Does "domain" involve a disjunction that is purely based on 3286 * constraints involving only outer dimension? 3287 * 3288 * In particular, is there a disjunction such that the constraints 3289 * involving the current and later dimensions are the same over 3290 * all the disjuncts? 3291 */ 3292static isl_bool has_pure_outer_disjunction(__isl_keep isl_set *domain, 3293 __isl_keep isl_ast_build *build) 3294{ 3295 isl_basic_set *hull; 3296 isl_set *shared, *inner; 3297 isl_bool equal; 3298 isl_size depth; 3299 isl_size n; 3300 isl_size dim; 3301 3302 n = isl_set_n_basic_set(domain); 3303 if (n < 0) 3304 return isl_bool_error; 3305 if (n <= 1) 3306 return isl_bool_false; 3307 dim = isl_set_dim(domain, isl_dim_set); 3308 depth = isl_ast_build_get_depth(build); 3309 if (dim < 0 || depth < 0) 3310 return isl_bool_error; 3311 3312 inner = isl_set_copy(domain); 3313 inner = isl_set_drop_constraints_not_involving_dims(inner, 3314 isl_dim_set, depth, dim - depth); 3315 hull = isl_set_plain_unshifted_simple_hull(isl_set_copy(inner)); 3316 shared = isl_set_from_basic_set(hull); 3317 equal = isl_set_plain_is_equal(inner, shared); 3318 isl_set_free(inner); 3319 isl_set_free(shared); 3320 3321 return equal; 3322} 3323 3324/* Generate code for a single component, after shifting (if any) 3325 * has been applied, in case the schedule was specified as a schedule tree. 3326 * In particular, handle the base case where there is either no isolated 3327 * set or we are within the isolated set (in which case "isolated" is set) 3328 * or the iterations that precede or follow the isolated set. 3329 * 3330 * The schedule domain is broken up or combined into basic sets 3331 * according to the AST generation option specified in the current 3332 * schedule node, which may be either atomic, separate, unroll or 3333 * unspecified. If the option is unspecified, then we currently simply 3334 * split the schedule domain into disjoint basic sets. 3335 * 3336 * In case the separate option is specified, the AST generation is 3337 * handled by generate_shifted_component_tree_separate. 3338 * In the other cases, we need the global schedule domain. 3339 * In the unroll case, the AST generation is then handled by 3340 * generate_shifted_component_tree_unroll which needs the actual 3341 * schedule domain (with divs that may refer to the current dimension) 3342 * so that stride detection can be performed. 3343 * In the atomic or unspecified case, inner dimensions and divs involving 3344 * the current dimensions should be eliminated. 3345 * The result is then either combined into a single basic set or 3346 * split up into disjoint basic sets. 3347 * Finally an AST is generated for each basic set and the results are 3348 * concatenated. 3349 * 3350 * If the schedule domain involves a disjunction that is purely based on 3351 * constraints involving only outer dimension, then it is treated as 3352 * if atomic was specified. This ensures that only a single loop 3353 * is generated instead of a sequence of identical loops with 3354 * different guards. 3355 */ 3356static __isl_give isl_ast_graft_list *generate_shifted_component_tree_base( 3357 __isl_take isl_union_map *executed, __isl_take isl_ast_build *build, 3358 int isolated) 3359{ 3360 isl_bool outer_disjunction; 3361 isl_union_set *schedule_domain; 3362 isl_set *domain; 3363 isl_basic_set_list *domain_list; 3364 isl_ast_graft_list *list; 3365 enum isl_ast_loop_type type; 3366 3367 type = isl_ast_build_get_loop_type(build, isolated); 3368 if (type < 0) 3369 goto error; 3370 3371 if (type == isl_ast_loop_separate) 3372 return generate_shifted_component_tree_separate(executed, 3373 build); 3374 3375 schedule_domain = isl_union_map_domain(isl_union_map_copy(executed)); 3376 domain = isl_set_from_union_set(schedule_domain); 3377 3378 if (type == isl_ast_loop_unroll) 3379 return generate_shifted_component_tree_unroll(executed, domain, 3380 build); 3381 3382 domain = isl_ast_build_eliminate(build, domain); 3383 domain = isl_set_coalesce_preserve(domain); 3384 3385 outer_disjunction = has_pure_outer_disjunction(domain, build); 3386 if (outer_disjunction < 0) 3387 domain = isl_set_free(domain); 3388 3389 if (outer_disjunction || type == isl_ast_loop_atomic) { 3390 isl_basic_set *hull; 3391 hull = isl_set_unshifted_simple_hull(domain); 3392 domain_list = isl_basic_set_list_from_basic_set(hull); 3393 } else { 3394 domain = isl_set_make_disjoint(domain); 3395 domain_list = isl_basic_set_list_from_set(domain); 3396 } 3397 3398 list = generate_parallel_domains(domain_list, executed, build); 3399 3400 isl_basic_set_list_free(domain_list); 3401 isl_union_map_free(executed); 3402 isl_ast_build_free(build); 3403 3404 return list; 3405error: 3406 isl_union_map_free(executed); 3407 isl_ast_build_free(build); 3408 return NULL; 3409} 3410 3411/* Extract out the disjunction imposed by "domain" on the outer 3412 * schedule dimensions. 3413 * 3414 * In particular, remove all inner dimensions from "domain" (including 3415 * the current dimension) and then remove the constraints that are shared 3416 * by all disjuncts in the result. 3417 */ 3418static __isl_give isl_set *extract_disjunction(__isl_take isl_set *domain, 3419 __isl_keep isl_ast_build *build) 3420{ 3421 isl_set *hull; 3422 isl_size depth; 3423 isl_size dim; 3424 3425 domain = isl_ast_build_specialize(build, domain); 3426 depth = isl_ast_build_get_depth(build); 3427 dim = isl_set_dim(domain, isl_dim_set); 3428 if (depth < 0 || dim < 0) 3429 return isl_set_free(domain); 3430 domain = isl_set_eliminate(domain, isl_dim_set, depth, dim - depth); 3431 domain = isl_set_remove_unknown_divs(domain); 3432 hull = isl_set_copy(domain); 3433 hull = isl_set_from_basic_set(isl_set_unshifted_simple_hull(hull)); 3434 domain = isl_set_gist(domain, hull); 3435 3436 return domain; 3437} 3438 3439/* Add "guard" to the grafts in "list". 3440 * "build" is the outer AST build, while "sub_build" includes "guard" 3441 * in its generated domain. 3442 * 3443 * First combine the grafts into a single graft and then add the guard. 3444 * If the list is empty, or if some error occurred, then simply return 3445 * the list. 3446 */ 3447static __isl_give isl_ast_graft_list *list_add_guard( 3448 __isl_take isl_ast_graft_list *list, __isl_keep isl_set *guard, 3449 __isl_keep isl_ast_build *build, __isl_keep isl_ast_build *sub_build) 3450{ 3451 isl_ast_graft *graft; 3452 isl_size n; 3453 3454 list = isl_ast_graft_list_fuse(list, sub_build); 3455 3456 n = isl_ast_graft_list_n_ast_graft(list); 3457 if (n < 0) 3458 return isl_ast_graft_list_free(list); 3459 if (n != 1) 3460 return list; 3461 3462 graft = isl_ast_graft_list_get_ast_graft(list, 0); 3463 graft = isl_ast_graft_add_guard(graft, isl_set_copy(guard), build); 3464 list = isl_ast_graft_list_set_ast_graft(list, 0, graft); 3465 3466 return list; 3467} 3468 3469/* Generate code for a single component, after shifting (if any) 3470 * has been applied, in case the schedule was specified as a schedule tree. 3471 * In particular, do so for the specified subset of the schedule domain. 3472 * 3473 * If we are outside of the isolated part, then "domain" may include 3474 * a disjunction. Explicitly generate this disjunction at this point 3475 * instead of relying on the disjunction getting hoisted back up 3476 * to this level. 3477 */ 3478static __isl_give isl_ast_graft_list *generate_shifted_component_tree_part( 3479 __isl_keep isl_union_map *executed, __isl_take isl_set *domain, 3480 __isl_keep isl_ast_build *build, int isolated) 3481{ 3482 isl_union_set *uset; 3483 isl_ast_graft_list *list; 3484 isl_ast_build *sub_build; 3485 int empty; 3486 3487 uset = isl_union_set_from_set(isl_set_copy(domain)); 3488 executed = isl_union_map_copy(executed); 3489 executed = isl_union_map_intersect_domain(executed, uset); 3490 empty = isl_union_map_is_empty(executed); 3491 if (empty < 0) 3492 goto error; 3493 if (empty) { 3494 isl_ctx *ctx; 3495 isl_union_map_free(executed); 3496 isl_set_free(domain); 3497 ctx = isl_ast_build_get_ctx(build); 3498 return isl_ast_graft_list_alloc(ctx, 0); 3499 } 3500 3501 sub_build = isl_ast_build_copy(build); 3502 if (!isolated) { 3503 domain = extract_disjunction(domain, build); 3504 sub_build = isl_ast_build_restrict_generated(sub_build, 3505 isl_set_copy(domain)); 3506 } 3507 list = generate_shifted_component_tree_base(executed, 3508 isl_ast_build_copy(sub_build), isolated); 3509 if (!isolated) 3510 list = list_add_guard(list, domain, build, sub_build); 3511 isl_ast_build_free(sub_build); 3512 isl_set_free(domain); 3513 return list; 3514error: 3515 isl_union_map_free(executed); 3516 isl_set_free(domain); 3517 return NULL; 3518} 3519 3520/* Generate code for a single component, after shifting (if any) 3521 * has been applied, in case the schedule was specified as a schedule tree. 3522 * In particular, do so for the specified sequence of subsets 3523 * of the schedule domain, "before", "isolated", "after" and "other", 3524 * where only the "isolated" part is considered to be isolated. 3525 */ 3526static __isl_give isl_ast_graft_list *generate_shifted_component_parts( 3527 __isl_take isl_union_map *executed, __isl_take isl_set *before, 3528 __isl_take isl_set *isolated, __isl_take isl_set *after, 3529 __isl_take isl_set *other, __isl_take isl_ast_build *build) 3530{ 3531 isl_ast_graft_list *list, *res; 3532 3533 res = generate_shifted_component_tree_part(executed, before, build, 0); 3534 list = generate_shifted_component_tree_part(executed, isolated, 3535 build, 1); 3536 res = isl_ast_graft_list_concat(res, list); 3537 list = generate_shifted_component_tree_part(executed, after, build, 0); 3538 res = isl_ast_graft_list_concat(res, list); 3539 list = generate_shifted_component_tree_part(executed, other, build, 0); 3540 res = isl_ast_graft_list_concat(res, list); 3541 3542 isl_union_map_free(executed); 3543 isl_ast_build_free(build); 3544 3545 return res; 3546} 3547 3548/* Does "set" intersect "first", but not "second"? 3549 */ 3550static isl_bool only_intersects_first(__isl_keep isl_set *set, 3551 __isl_keep isl_set *first, __isl_keep isl_set *second) 3552{ 3553 isl_bool disjoint; 3554 3555 disjoint = isl_set_is_disjoint(set, first); 3556 if (disjoint < 0) 3557 return isl_bool_error; 3558 if (disjoint) 3559 return isl_bool_false; 3560 3561 return isl_set_is_disjoint(set, second); 3562} 3563 3564/* Generate code for a single component, after shifting (if any) 3565 * has been applied, in case the schedule was specified as a schedule tree. 3566 * In particular, do so in case of isolation where there is 3567 * only an "isolated" part and an "after" part. 3568 * "dead1" and "dead2" are freed by this function in order to simplify 3569 * the caller. 3570 * 3571 * The "before" and "other" parts are set to empty sets. 3572 */ 3573static __isl_give isl_ast_graft_list *generate_shifted_component_only_after( 3574 __isl_take isl_union_map *executed, __isl_take isl_set *isolated, 3575 __isl_take isl_set *after, __isl_take isl_ast_build *build, 3576 __isl_take isl_set *dead1, __isl_take isl_set *dead2) 3577{ 3578 isl_set *empty; 3579 3580 empty = isl_set_empty(isl_set_get_space(after)); 3581 isl_set_free(dead1); 3582 isl_set_free(dead2); 3583 return generate_shifted_component_parts(executed, isl_set_copy(empty), 3584 isolated, after, empty, build); 3585} 3586 3587/* Generate code for a single component, after shifting (if any) 3588 * has been applied, in case the schedule was specified as a schedule tree. 3589 * 3590 * We first check if the user has specified an isolated schedule domain 3591 * and that we are not already outside of this isolated schedule domain. 3592 * If so, we break up the schedule domain into iterations that 3593 * precede the isolated domain, the isolated domain itself, 3594 * the iterations that follow the isolated domain and 3595 * the remaining iterations (those that are incomparable 3596 * to the isolated domain). 3597 * We generate an AST for each piece and concatenate the results. 3598 * 3599 * If the isolated domain is not convex, then it is replaced 3600 * by a convex superset to ensure that the sets of preceding and 3601 * following iterations are properly defined and, in particular, 3602 * that there are no intermediate iterations that do not belong 3603 * to the isolated domain. 3604 * 3605 * In the special case where at least one element of the schedule 3606 * domain that does not belong to the isolated domain needs 3607 * to be scheduled after this isolated domain, but none of those 3608 * elements need to be scheduled before, break up the schedule domain 3609 * in only two parts, the isolated domain, and a part that will be 3610 * scheduled after the isolated domain. 3611 * 3612 * If no isolated set has been specified, then we generate an 3613 * AST for the entire inverse schedule. 3614 */ 3615static __isl_give isl_ast_graft_list *generate_shifted_component_tree( 3616 __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) 3617{ 3618 int i; 3619 isl_size depth; 3620 int empty, has_isolate; 3621 isl_space *space; 3622 isl_union_set *schedule_domain; 3623 isl_set *domain; 3624 isl_basic_set *hull; 3625 isl_set *isolated, *before, *after, *test; 3626 isl_map *gt, *lt; 3627 isl_bool pure; 3628 3629 build = isl_ast_build_extract_isolated(build); 3630 has_isolate = isl_ast_build_has_isolated(build); 3631 if (has_isolate < 0) 3632 executed = isl_union_map_free(executed); 3633 else if (!has_isolate) 3634 return generate_shifted_component_tree_base(executed, build, 0); 3635 3636 schedule_domain = isl_union_map_domain(isl_union_map_copy(executed)); 3637 domain = isl_set_from_union_set(schedule_domain); 3638 3639 isolated = isl_ast_build_get_isolated(build); 3640 isolated = isl_set_intersect(isolated, isl_set_copy(domain)); 3641 test = isl_ast_build_specialize(build, isl_set_copy(isolated)); 3642 empty = isl_set_is_empty(test); 3643 isl_set_free(test); 3644 if (empty < 0) 3645 goto error; 3646 if (empty) { 3647 isl_set_free(isolated); 3648 isl_set_free(domain); 3649 return generate_shifted_component_tree_base(executed, build, 0); 3650 } 3651 depth = isl_ast_build_get_depth(build); 3652 if (depth < 0) 3653 goto error; 3654 3655 isolated = isl_ast_build_eliminate(build, isolated); 3656 hull = isl_set_unshifted_simple_hull(isolated); 3657 isolated = isl_set_from_basic_set(hull); 3658 3659 space = isl_space_map_from_set(isl_set_get_space(isolated)); 3660 gt = isl_map_universe(space); 3661 for (i = 0; i < depth; ++i) 3662 gt = isl_map_equate(gt, isl_dim_in, i, isl_dim_out, i); 3663 gt = isl_map_order_gt(gt, isl_dim_in, depth, isl_dim_out, depth); 3664 lt = isl_map_reverse(isl_map_copy(gt)); 3665 before = isl_set_apply(isl_set_copy(isolated), gt); 3666 after = isl_set_apply(isl_set_copy(isolated), lt); 3667 3668 domain = isl_set_subtract(domain, isl_set_copy(isolated)); 3669 pure = only_intersects_first(domain, after, before); 3670 if (pure < 0) 3671 executed = isl_union_map_free(executed); 3672 else if (pure) 3673 return generate_shifted_component_only_after(executed, isolated, 3674 domain, build, before, after); 3675 domain = isl_set_subtract(domain, isl_set_copy(before)); 3676 domain = isl_set_subtract(domain, isl_set_copy(after)); 3677 after = isl_set_subtract(after, isl_set_copy(isolated)); 3678 after = isl_set_subtract(after, isl_set_copy(before)); 3679 before = isl_set_subtract(before, isl_set_copy(isolated)); 3680 3681 return generate_shifted_component_parts(executed, before, isolated, 3682 after, domain, build); 3683error: 3684 isl_set_free(domain); 3685 isl_set_free(isolated); 3686 isl_union_map_free(executed); 3687 isl_ast_build_free(build); 3688 return NULL; 3689} 3690 3691/* Generate code for a single component, after shifting (if any) 3692 * has been applied. 3693 * 3694 * Call generate_shifted_component_tree or generate_shifted_component_flat 3695 * depending on whether the schedule was specified as a schedule tree. 3696 */ 3697static __isl_give isl_ast_graft_list *generate_shifted_component( 3698 __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) 3699{ 3700 if (isl_ast_build_has_schedule_node(build)) 3701 return generate_shifted_component_tree(executed, build); 3702 else 3703 return generate_shifted_component_flat(executed, build); 3704} 3705 3706struct isl_set_map_pair { 3707 isl_set *set; 3708 isl_map *map; 3709}; 3710 3711/* Given an array "domain" of isl_set_map_pairs and an array "order" 3712 * of indices into the "domain" array, 3713 * return the union of the "map" fields of the elements 3714 * indexed by the first "n" elements of "order". 3715 */ 3716static __isl_give isl_union_map *construct_component_executed( 3717 struct isl_set_map_pair *domain, int *order, int n) 3718{ 3719 int i; 3720 isl_map *map; 3721 isl_union_map *executed; 3722 3723 map = isl_map_copy(domain[order[0]].map); 3724 executed = isl_union_map_from_map(map); 3725 for (i = 1; i < n; ++i) { 3726 map = isl_map_copy(domain[order[i]].map); 3727 executed = isl_union_map_add_map(executed, map); 3728 } 3729 3730 return executed; 3731} 3732 3733/* Generate code for a single component, after shifting (if any) 3734 * has been applied. 3735 * 3736 * The component inverse schedule is specified as the "map" fields 3737 * of the elements of "domain" indexed by the first "n" elements of "order". 3738 */ 3739static __isl_give isl_ast_graft_list *generate_shifted_component_from_list( 3740 struct isl_set_map_pair *domain, int *order, int n, 3741 __isl_take isl_ast_build *build) 3742{ 3743 isl_union_map *executed; 3744 3745 executed = construct_component_executed(domain, order, n); 3746 return generate_shifted_component(executed, build); 3747} 3748 3749/* Does set dimension "pos" of "set" have an obviously fixed value? 3750 */ 3751static int dim_is_fixed(__isl_keep isl_set *set, int pos) 3752{ 3753 int fixed; 3754 isl_val *v; 3755 3756 v = isl_set_plain_get_val_if_fixed(set, isl_dim_set, pos); 3757 if (!v) 3758 return -1; 3759 fixed = !isl_val_is_nan(v); 3760 isl_val_free(v); 3761 3762 return fixed; 3763} 3764 3765/* Given an array "domain" of isl_set_map_pairs and an array "order" 3766 * of indices into the "domain" array, 3767 * do all (except for at most one) of the "set" field of the elements 3768 * indexed by the first "n" elements of "order" have a fixed value 3769 * at position "depth"? 3770 */ 3771static int at_most_one_non_fixed(struct isl_set_map_pair *domain, 3772 int *order, int n, int depth) 3773{ 3774 int i; 3775 int non_fixed = -1; 3776 3777 for (i = 0; i < n; ++i) { 3778 int f; 3779 3780 f = dim_is_fixed(domain[order[i]].set, depth); 3781 if (f < 0) 3782 return -1; 3783 if (f) 3784 continue; 3785 if (non_fixed >= 0) 3786 return 0; 3787 non_fixed = i; 3788 } 3789 3790 return 1; 3791} 3792 3793/* Given an array "domain" of isl_set_map_pairs and an array "order" 3794 * of indices into the "domain" array, 3795 * eliminate the inner dimensions from the "set" field of the elements 3796 * indexed by the first "n" elements of "order", provided the current 3797 * dimension does not have a fixed value. 3798 * 3799 * Return the index of the first element in "order" with a corresponding 3800 * "set" field that does not have an (obviously) fixed value. 3801 */ 3802static int eliminate_non_fixed(struct isl_set_map_pair *domain, 3803 int *order, int n, int depth, __isl_keep isl_ast_build *build) 3804{ 3805 int i; 3806 int base = -1; 3807 3808 for (i = n - 1; i >= 0; --i) { 3809 int f; 3810 f = dim_is_fixed(domain[order[i]].set, depth); 3811 if (f < 0) 3812 return -1; 3813 if (f) 3814 continue; 3815 domain[order[i]].set = isl_ast_build_eliminate_inner(build, 3816 domain[order[i]].set); 3817 base = i; 3818 } 3819 3820 return base; 3821} 3822 3823/* Given an array "domain" of isl_set_map_pairs and an array "order" 3824 * of indices into the "domain" array, 3825 * find the element of "domain" (amongst those indexed by the first "n" 3826 * elements of "order") with the "set" field that has the smallest 3827 * value for the current iterator. 3828 * 3829 * Note that the domain with the smallest value may depend on the parameters 3830 * and/or outer loop dimension. Since the result of this function is only 3831 * used as heuristic, we only make a reasonable attempt at finding the best 3832 * domain, one that should work in case a single domain provides the smallest 3833 * value for the current dimension over all values of the parameters 3834 * and outer dimensions. 3835 * 3836 * In particular, we compute the smallest value of the first domain 3837 * and replace it by that of any later domain if that later domain 3838 * has a smallest value that is smaller for at least some value 3839 * of the parameters and outer dimensions. 3840 */ 3841static int first_offset(struct isl_set_map_pair *domain, int *order, int n, 3842 __isl_keep isl_ast_build *build) 3843{ 3844 int i; 3845 isl_map *min_first; 3846 int first = 0; 3847 3848 min_first = isl_ast_build_map_to_iterator(build, 3849 isl_set_copy(domain[order[0]].set)); 3850 min_first = isl_map_lexmin(min_first); 3851 3852 for (i = 1; i < n; ++i) { 3853 isl_map *min, *test; 3854 int empty; 3855 3856 min = isl_ast_build_map_to_iterator(build, 3857 isl_set_copy(domain[order[i]].set)); 3858 min = isl_map_lexmin(min); 3859 test = isl_map_copy(min); 3860 test = isl_map_apply_domain(isl_map_copy(min_first), test); 3861 test = isl_map_order_lt(test, isl_dim_in, 0, isl_dim_out, 0); 3862 empty = isl_map_is_empty(test); 3863 isl_map_free(test); 3864 if (empty >= 0 && !empty) { 3865 isl_map_free(min_first); 3866 first = i; 3867 min_first = min; 3868 } else 3869 isl_map_free(min); 3870 3871 if (empty < 0) 3872 break; 3873 } 3874 3875 isl_map_free(min_first); 3876 3877 return i < n ? -1 : first; 3878} 3879 3880/* Construct a shifted inverse schedule based on the original inverse schedule, 3881 * the stride and the offset. 3882 * 3883 * The original inverse schedule is specified as the "map" fields 3884 * of the elements of "domain" indexed by the first "n" elements of "order". 3885 * 3886 * "stride" and "offset" are such that the difference 3887 * between the values of the current dimension of domain "i" 3888 * and the values of the current dimension for some reference domain are 3889 * equal to 3890 * 3891 * stride * integer + offset[i] 3892 * 3893 * Moreover, 0 <= offset[i] < stride. 3894 * 3895 * For each domain, we create a map 3896 * 3897 * { [..., j, ...] -> [..., j - offset[i], offset[i], ....] } 3898 * 3899 * where j refers to the current dimension and the other dimensions are 3900 * unchanged, and apply this map to the original schedule domain. 3901 * 3902 * For example, for the original schedule 3903 * 3904 * { A[i] -> [2i]: 0 <= i < 10; B[i] -> [2i+1] : 0 <= i < 10 } 3905 * 3906 * and assuming the offset is 0 for the A domain and 1 for the B domain, 3907 * we apply the mapping 3908 * 3909 * { [j] -> [j, 0] } 3910 * 3911 * to the schedule of the "A" domain and the mapping 3912 * 3913 * { [j - 1] -> [j, 1] } 3914 * 3915 * to the schedule of the "B" domain. 3916 * 3917 * 3918 * Note that after the transformation, the differences between pairs 3919 * of values of the current dimension over all domains are multiples 3920 * of stride and that we have therefore exposed the stride. 3921 * 3922 * 3923 * To see that the mapping preserves the lexicographic order, 3924 * first note that each of the individual maps above preserves the order. 3925 * If the value of the current iterator is j1 in one domain and j2 in another, 3926 * then if j1 = j2, we know that the same map is applied to both domains 3927 * and the order is preserved. 3928 * Otherwise, let us assume, without loss of generality, that j1 < j2. 3929 * If c1 >= c2 (with c1 and c2 the corresponding offsets), then 3930 * 3931 * j1 - c1 < j2 - c2 3932 * 3933 * and the order is preserved. 3934 * If c1 < c2, then we know 3935 * 3936 * 0 <= c2 - c1 < s 3937 * 3938 * We also have 3939 * 3940 * j2 - j1 = n * s + r 3941 * 3942 * with n >= 0 and 0 <= r < s. 3943 * In other words, r = c2 - c1. 3944 * If n > 0, then 3945 * 3946 * j1 - c1 < j2 - c2 3947 * 3948 * If n = 0, then 3949 * 3950 * j1 - c1 = j2 - c2 3951 * 3952 * and so 3953 * 3954 * (j1 - c1, c1) << (j2 - c2, c2) 3955 * 3956 * with "<<" the lexicographic order, proving that the order is preserved 3957 * in all cases. 3958 */ 3959static __isl_give isl_union_map *construct_shifted_executed( 3960 struct isl_set_map_pair *domain, int *order, int n, 3961 __isl_keep isl_val *stride, __isl_keep isl_multi_val *offset, 3962 __isl_keep isl_ast_build *build) 3963{ 3964 int i; 3965 isl_union_map *executed; 3966 isl_space *space; 3967 isl_map *map; 3968 isl_size depth; 3969 isl_constraint *c; 3970 3971 depth = isl_ast_build_get_depth(build); 3972 if (depth < 0) 3973 return NULL; 3974 space = isl_ast_build_get_space(build, 1); 3975 executed = isl_union_map_empty(isl_space_copy(space)); 3976 space = isl_space_map_from_set(space); 3977 map = isl_map_identity(isl_space_copy(space)); 3978 map = isl_map_eliminate(map, isl_dim_out, depth, 1); 3979 map = isl_map_insert_dims(map, isl_dim_out, depth + 1, 1); 3980 space = isl_space_insert_dims(space, isl_dim_out, depth + 1, 1); 3981 3982 c = isl_constraint_alloc_equality(isl_local_space_from_space(space)); 3983 c = isl_constraint_set_coefficient_si(c, isl_dim_in, depth, 1); 3984 c = isl_constraint_set_coefficient_si(c, isl_dim_out, depth, -1); 3985 3986 for (i = 0; i < n; ++i) { 3987 isl_map *map_i; 3988 isl_val *v; 3989 3990 v = isl_multi_val_get_val(offset, i); 3991 if (!v) 3992 break; 3993 map_i = isl_map_copy(map); 3994 map_i = isl_map_fix_val(map_i, isl_dim_out, depth + 1, 3995 isl_val_copy(v)); 3996 v = isl_val_neg(v); 3997 c = isl_constraint_set_constant_val(c, v); 3998 map_i = isl_map_add_constraint(map_i, isl_constraint_copy(c)); 3999 4000 map_i = isl_map_apply_domain(isl_map_copy(domain[order[i]].map), 4001 map_i); 4002 executed = isl_union_map_add_map(executed, map_i); 4003 } 4004 4005 isl_constraint_free(c); 4006 isl_map_free(map); 4007 4008 if (i < n) 4009 executed = isl_union_map_free(executed); 4010 4011 return executed; 4012} 4013 4014/* Generate code for a single component, after exposing the stride, 4015 * given that the schedule domain is "shifted strided". 4016 * 4017 * The component inverse schedule is specified as the "map" fields 4018 * of the elements of "domain" indexed by the first "n" elements of "order". 4019 * 4020 * The schedule domain being "shifted strided" means that the differences 4021 * between the values of the current dimension of domain "i" 4022 * and the values of the current dimension for some reference domain are 4023 * equal to 4024 * 4025 * stride * integer + offset[i] 4026 * 4027 * We first look for the domain with the "smallest" value for the current 4028 * dimension and adjust the offsets such that the offset of the "smallest" 4029 * domain is equal to zero. The other offsets are reduced modulo stride. 4030 * 4031 * Based on this information, we construct a new inverse schedule in 4032 * construct_shifted_executed that exposes the stride. 4033 * Since this involves the introduction of a new schedule dimension, 4034 * the build needs to be changed accordingly. 4035 * After computing the AST, the newly introduced dimension needs 4036 * to be removed again from the list of grafts. We do this by plugging 4037 * in a mapping that represents the new schedule domain in terms of the 4038 * old schedule domain. 4039 */ 4040static __isl_give isl_ast_graft_list *generate_shift_component( 4041 struct isl_set_map_pair *domain, int *order, int n, 4042 __isl_keep isl_val *stride, __isl_keep isl_multi_val *offset, 4043 __isl_take isl_ast_build *build) 4044{ 4045 isl_ast_graft_list *list; 4046 int first; 4047 isl_size depth; 4048 isl_val *val; 4049 isl_multi_val *mv; 4050 isl_space *space; 4051 isl_multi_aff *ma, *zero; 4052 isl_union_map *executed; 4053 4054 depth = isl_ast_build_get_depth(build); 4055 4056 first = first_offset(domain, order, n, build); 4057 if (depth < 0 || first < 0) 4058 goto error; 4059 4060 mv = isl_multi_val_copy(offset); 4061 val = isl_multi_val_get_val(offset, first); 4062 val = isl_val_neg(val); 4063 mv = isl_multi_val_add_val(mv, val); 4064 mv = isl_multi_val_mod_val(mv, isl_val_copy(stride)); 4065 4066 executed = construct_shifted_executed(domain, order, n, stride, mv, 4067 build); 4068 space = isl_ast_build_get_space(build, 1); 4069 space = isl_space_map_from_set(space); 4070 ma = isl_multi_aff_identity(isl_space_copy(space)); 4071 space = isl_space_from_domain(isl_space_domain(space)); 4072 space = isl_space_add_dims(space, isl_dim_out, 1); 4073 zero = isl_multi_aff_zero(space); 4074 ma = isl_multi_aff_range_splice(ma, depth + 1, zero); 4075 build = isl_ast_build_insert_dim(build, depth + 1); 4076 list = generate_shifted_component(executed, build); 4077 4078 list = isl_ast_graft_list_preimage_multi_aff(list, ma); 4079 4080 isl_multi_val_free(mv); 4081 4082 return list; 4083error: 4084 isl_ast_build_free(build); 4085 return NULL; 4086} 4087 4088/* Does any node in the schedule tree rooted at the current schedule node 4089 * of "build" depend on outer schedule nodes? 4090 */ 4091static int has_anchored_subtree(__isl_keep isl_ast_build *build) 4092{ 4093 isl_schedule_node *node; 4094 int dependent = 0; 4095 4096 node = isl_ast_build_get_schedule_node(build); 4097 dependent = isl_schedule_node_is_subtree_anchored(node); 4098 isl_schedule_node_free(node); 4099 4100 return dependent; 4101} 4102 4103/* Generate code for a single component. 4104 * 4105 * The component inverse schedule is specified as the "map" fields 4106 * of the elements of "domain" indexed by the first "n" elements of "order". 4107 * 4108 * This function may modify the "set" fields of "domain". 4109 * 4110 * Before proceeding with the actual code generation for the component, 4111 * we first check if there are any "shifted" strides, meaning that 4112 * the schedule domains of the individual domains are all strided, 4113 * but that they have different offsets, resulting in the union 4114 * of schedule domains not being strided anymore. 4115 * 4116 * The simplest example is the schedule 4117 * 4118 * { A[i] -> [2i]: 0 <= i < 10; B[i] -> [2i+1] : 0 <= i < 10 } 4119 * 4120 * Both schedule domains are strided, but their union is not. 4121 * This function detects such cases and then rewrites the schedule to 4122 * 4123 * { A[i] -> [2i, 0]: 0 <= i < 10; B[i] -> [2i, 1] : 0 <= i < 10 } 4124 * 4125 * In the new schedule, the schedule domains have the same offset (modulo 4126 * the stride), ensuring that the union of schedule domains is also strided. 4127 * 4128 * 4129 * If there is only a single domain in the component, then there is 4130 * nothing to do. Similarly, if the current schedule dimension has 4131 * a fixed value for almost all domains then there is nothing to be done. 4132 * In particular, we need at least two domains where the current schedule 4133 * dimension does not have a fixed value. 4134 * Finally, in case of a schedule map input, 4135 * if any of the options refer to the current schedule dimension, 4136 * then we bail out as well. It would be possible to reformulate the options 4137 * in terms of the new schedule domain, but that would introduce constraints 4138 * that separate the domains in the options and that is something we would 4139 * like to avoid. 4140 * In the case of a schedule tree input, we bail out if any of 4141 * the descendants of the current schedule node refer to outer 4142 * schedule nodes in any way. 4143 * 4144 * 4145 * To see if there is any shifted stride, we look at the differences 4146 * between the values of the current dimension in pairs of domains 4147 * for equal values of outer dimensions. These differences should be 4148 * of the form 4149 * 4150 * m x + r 4151 * 4152 * with "m" the stride and "r" a constant. Note that we cannot perform 4153 * this analysis on individual domains as the lower bound in each domain 4154 * may depend on parameters or outer dimensions and so the current dimension 4155 * itself may not have a fixed remainder on division by the stride. 4156 * 4157 * In particular, we compare the first domain that does not have an 4158 * obviously fixed value for the current dimension to itself and all 4159 * other domains and collect the offsets and the gcd of the strides. 4160 * If the gcd becomes one, then we failed to find shifted strides. 4161 * If the gcd is zero, then the differences were all fixed, meaning 4162 * that some domains had non-obviously fixed values for the current dimension. 4163 * If all the offsets are the same (for those domains that do not have 4164 * an obviously fixed value for the current dimension), then we do not 4165 * apply the transformation. 4166 * If none of the domains were skipped, then there is nothing to do. 4167 * If some of them were skipped, then if we apply separation, the schedule 4168 * domain should get split in pieces with a (non-shifted) stride. 4169 * 4170 * Otherwise, we apply a shift to expose the stride in 4171 * generate_shift_component. 4172 */ 4173static __isl_give isl_ast_graft_list *generate_component( 4174 struct isl_set_map_pair *domain, int *order, int n, 4175 __isl_take isl_ast_build *build) 4176{ 4177 int i, d; 4178 isl_size depth; 4179 isl_ctx *ctx; 4180 isl_map *map; 4181 isl_set *deltas; 4182 isl_val *gcd = NULL; 4183 isl_multi_val *mv; 4184 int fixed, skip; 4185 int base; 4186 isl_ast_graft_list *list; 4187 int res = 0; 4188 4189 depth = isl_ast_build_get_depth(build); 4190 if (depth < 0) 4191 goto error; 4192 4193 skip = n == 1; 4194 if (skip >= 0 && !skip) 4195 skip = at_most_one_non_fixed(domain, order, n, depth); 4196 if (skip >= 0 && !skip) { 4197 if (isl_ast_build_has_schedule_node(build)) 4198 skip = has_anchored_subtree(build); 4199 else 4200 skip = isl_ast_build_options_involve_depth(build); 4201 } 4202 if (skip < 0) 4203 goto error; 4204 if (skip) 4205 return generate_shifted_component_from_list(domain, 4206 order, n, build); 4207 4208 base = eliminate_non_fixed(domain, order, n, depth, build); 4209 if (base < 0) 4210 goto error; 4211 4212 ctx = isl_ast_build_get_ctx(build); 4213 4214 mv = isl_multi_val_zero(isl_space_set_alloc(ctx, 0, n)); 4215 4216 fixed = 1; 4217 for (i = 0; i < n; ++i) { 4218 isl_val *r, *m; 4219 4220 map = isl_map_from_domain_and_range( 4221 isl_set_copy(domain[order[base]].set), 4222 isl_set_copy(domain[order[i]].set)); 4223 for (d = 0; d < depth; ++d) 4224 map = isl_map_equate(map, isl_dim_in, d, 4225 isl_dim_out, d); 4226 deltas = isl_map_deltas(map); 4227 res = isl_set_dim_residue_class_val(deltas, depth, &m, &r); 4228 isl_set_free(deltas); 4229 if (res < 0) 4230 break; 4231 4232 if (i == 0) 4233 gcd = m; 4234 else 4235 gcd = isl_val_gcd(gcd, m); 4236 if (isl_val_is_one(gcd)) { 4237 isl_val_free(r); 4238 break; 4239 } 4240 mv = isl_multi_val_set_val(mv, i, r); 4241 4242 res = dim_is_fixed(domain[order[i]].set, depth); 4243 if (res < 0) 4244 break; 4245 if (res) 4246 continue; 4247 4248 if (fixed && i > base) { 4249 isl_val *a, *b; 4250 a = isl_multi_val_get_val(mv, i); 4251 b = isl_multi_val_get_val(mv, base); 4252 if (isl_val_ne(a, b)) 4253 fixed = 0; 4254 isl_val_free(a); 4255 isl_val_free(b); 4256 } 4257 } 4258 4259 if (res < 0 || !gcd) { 4260 isl_ast_build_free(build); 4261 list = NULL; 4262 } else if (i < n || fixed || isl_val_is_zero(gcd)) { 4263 list = generate_shifted_component_from_list(domain, 4264 order, n, build); 4265 } else { 4266 list = generate_shift_component(domain, order, n, gcd, mv, 4267 build); 4268 } 4269 4270 isl_val_free(gcd); 4271 isl_multi_val_free(mv); 4272 4273 return list; 4274error: 4275 isl_ast_build_free(build); 4276 return NULL; 4277} 4278 4279/* Store both "map" itself and its domain in the 4280 * structure pointed to by *next and advance to the next array element. 4281 */ 4282static isl_stat extract_domain(__isl_take isl_map *map, void *user) 4283{ 4284 struct isl_set_map_pair **next = user; 4285 4286 (*next)->map = isl_map_copy(map); 4287 (*next)->set = isl_map_domain(map); 4288 (*next)++; 4289 4290 return isl_stat_ok; 4291} 4292 4293static isl_bool after_in_tree(__isl_keep isl_union_map *umap, 4294 __isl_keep isl_schedule_node *node); 4295 4296/* Is any domain element of "umap" scheduled after any of 4297 * the corresponding image elements by the tree rooted at 4298 * the child of "node"? 4299 */ 4300static isl_bool after_in_child(__isl_keep isl_union_map *umap, 4301 __isl_keep isl_schedule_node *node) 4302{ 4303 isl_schedule_node *child; 4304 isl_bool after; 4305 4306 child = isl_schedule_node_get_child(node, 0); 4307 after = after_in_tree(umap, child); 4308 isl_schedule_node_free(child); 4309 4310 return after; 4311} 4312 4313/* Is any domain element of "umap" scheduled after any of 4314 * the corresponding image elements by the tree rooted at 4315 * the band node "node"? 4316 * 4317 * We first check if any domain element is scheduled after any 4318 * of the corresponding image elements by the band node itself. 4319 * If not, we restrict "map" to those pairs of element that 4320 * are scheduled together by the band node and continue with 4321 * the child of the band node. 4322 * If there are no such pairs then the map passed to after_in_child 4323 * will be empty causing it to return 0. 4324 */ 4325static isl_bool after_in_band(__isl_keep isl_union_map *umap, 4326 __isl_keep isl_schedule_node *node) 4327{ 4328 isl_multi_union_pw_aff *mupa; 4329 isl_union_map *partial, *test, *gt, *universe, *umap1, *umap2; 4330 isl_union_set *domain, *range; 4331 isl_space *space; 4332 isl_bool empty; 4333 isl_bool after; 4334 isl_size n; 4335 4336 n = isl_schedule_node_band_n_member(node); 4337 if (n < 0) 4338 return isl_bool_error; 4339 if (n == 0) 4340 return after_in_child(umap, node); 4341 4342 mupa = isl_schedule_node_band_get_partial_schedule(node); 4343 space = isl_multi_union_pw_aff_get_space(mupa); 4344 partial = isl_union_map_from_multi_union_pw_aff(mupa); 4345 test = isl_union_map_copy(umap); 4346 test = isl_union_map_apply_domain(test, isl_union_map_copy(partial)); 4347 test = isl_union_map_apply_range(test, isl_union_map_copy(partial)); 4348 gt = isl_union_map_from_map(isl_map_lex_gt(space)); 4349 test = isl_union_map_intersect(test, gt); 4350 empty = isl_union_map_is_empty(test); 4351 isl_union_map_free(test); 4352 4353 if (empty < 0 || !empty) { 4354 isl_union_map_free(partial); 4355 return isl_bool_not(empty); 4356 } 4357 4358 universe = isl_union_map_universe(isl_union_map_copy(umap)); 4359 domain = isl_union_map_domain(isl_union_map_copy(universe)); 4360 range = isl_union_map_range(universe); 4361 umap1 = isl_union_map_copy(partial); 4362 umap1 = isl_union_map_intersect_domain(umap1, domain); 4363 umap2 = isl_union_map_intersect_domain(partial, range); 4364 test = isl_union_map_apply_range(umap1, isl_union_map_reverse(umap2)); 4365 test = isl_union_map_intersect(test, isl_union_map_copy(umap)); 4366 after = after_in_child(test, node); 4367 isl_union_map_free(test); 4368 return after; 4369} 4370 4371/* Is any domain element of "umap" scheduled after any of 4372 * the corresponding image elements by the tree rooted at 4373 * the context node "node"? 4374 * 4375 * The context constraints apply to the schedule domain, 4376 * so we cannot apply them directly to "umap", which contains 4377 * pairs of statement instances. Instead, we add them 4378 * to the range of the prefix schedule for both domain and 4379 * range of "umap". 4380 */ 4381static isl_bool after_in_context(__isl_keep isl_union_map *umap, 4382 __isl_keep isl_schedule_node *node) 4383{ 4384 isl_union_map *prefix, *universe, *umap1, *umap2; 4385 isl_union_set *domain, *range; 4386 isl_set *context; 4387 isl_bool after; 4388 4389 umap = isl_union_map_copy(umap); 4390 context = isl_schedule_node_context_get_context(node); 4391 prefix = isl_schedule_node_get_prefix_schedule_union_map(node); 4392 universe = isl_union_map_universe(isl_union_map_copy(umap)); 4393 domain = isl_union_map_domain(isl_union_map_copy(universe)); 4394 range = isl_union_map_range(universe); 4395 umap1 = isl_union_map_copy(prefix); 4396 umap1 = isl_union_map_intersect_domain(umap1, domain); 4397 umap2 = isl_union_map_intersect_domain(prefix, range); 4398 umap1 = isl_union_map_intersect_range(umap1, 4399 isl_union_set_from_set(context)); 4400 umap1 = isl_union_map_apply_range(umap1, isl_union_map_reverse(umap2)); 4401 umap = isl_union_map_intersect(umap, umap1); 4402 4403 after = after_in_child(umap, node); 4404 4405 isl_union_map_free(umap); 4406 4407 return after; 4408} 4409 4410/* Is any domain element of "umap" scheduled after any of 4411 * the corresponding image elements by the tree rooted at 4412 * the expansion node "node"? 4413 * 4414 * We apply the expansion to domain and range of "umap" and 4415 * continue with its child. 4416 */ 4417static isl_bool after_in_expansion(__isl_keep isl_union_map *umap, 4418 __isl_keep isl_schedule_node *node) 4419{ 4420 isl_union_map *expansion; 4421 isl_bool after; 4422 4423 expansion = isl_schedule_node_expansion_get_expansion(node); 4424 umap = isl_union_map_copy(umap); 4425 umap = isl_union_map_apply_domain(umap, isl_union_map_copy(expansion)); 4426 umap = isl_union_map_apply_range(umap, expansion); 4427 4428 after = after_in_child(umap, node); 4429 4430 isl_union_map_free(umap); 4431 4432 return after; 4433} 4434 4435/* Is any domain element of "umap" scheduled after any of 4436 * the corresponding image elements by the tree rooted at 4437 * the extension node "node"? 4438 * 4439 * Since the extension node may add statement instances before or 4440 * after the pairs of statement instances in "umap", we return isl_bool_true 4441 * to ensure that these pairs are not broken up. 4442 */ 4443static isl_bool after_in_extension(__isl_keep isl_union_map *umap, 4444 __isl_keep isl_schedule_node *node) 4445{ 4446 return isl_bool_true; 4447} 4448 4449/* Is any domain element of "umap" scheduled after any of 4450 * the corresponding image elements by the tree rooted at 4451 * the filter node "node"? 4452 * 4453 * We intersect domain and range of "umap" with the filter and 4454 * continue with its child. 4455 */ 4456static isl_bool after_in_filter(__isl_keep isl_union_map *umap, 4457 __isl_keep isl_schedule_node *node) 4458{ 4459 isl_union_set *filter; 4460 isl_bool after; 4461 4462 umap = isl_union_map_copy(umap); 4463 filter = isl_schedule_node_filter_get_filter(node); 4464 umap = isl_union_map_intersect_domain(umap, isl_union_set_copy(filter)); 4465 umap = isl_union_map_intersect_range(umap, filter); 4466 4467 after = after_in_child(umap, node); 4468 4469 isl_union_map_free(umap); 4470 4471 return after; 4472} 4473 4474/* Is any domain element of "umap" scheduled after any of 4475 * the corresponding image elements by the tree rooted at 4476 * the set node "node"? 4477 * 4478 * This is only the case if this condition holds in any 4479 * of the (filter) children of the set node. 4480 * In particular, if the domain and the range of "umap" 4481 * are contained in different children, then the condition 4482 * does not hold. 4483 */ 4484static isl_bool after_in_set(__isl_keep isl_union_map *umap, 4485 __isl_keep isl_schedule_node *node) 4486{ 4487 int i; 4488 isl_size n; 4489 4490 n = isl_schedule_node_n_children(node); 4491 if (n < 0) 4492 return isl_bool_error; 4493 for (i = 0; i < n; ++i) { 4494 isl_schedule_node *child; 4495 isl_bool after; 4496 4497 child = isl_schedule_node_get_child(node, i); 4498 after = after_in_tree(umap, child); 4499 isl_schedule_node_free(child); 4500 4501 if (after < 0 || after) 4502 return after; 4503 } 4504 4505 return isl_bool_false; 4506} 4507 4508/* Return the filter of child "i" of "node". 4509 */ 4510static __isl_give isl_union_set *child_filter( 4511 __isl_keep isl_schedule_node *node, int i) 4512{ 4513 isl_schedule_node *child; 4514 isl_union_set *filter; 4515 4516 child = isl_schedule_node_get_child(node, i); 4517 filter = isl_schedule_node_filter_get_filter(child); 4518 isl_schedule_node_free(child); 4519 4520 return filter; 4521} 4522 4523/* Is any domain element of "umap" scheduled after any of 4524 * the corresponding image elements by the tree rooted at 4525 * the sequence node "node"? 4526 * 4527 * This happens in particular if any domain element is 4528 * contained in a later child than one containing a range element or 4529 * if the condition holds within a given child in the sequence. 4530 * The later part of the condition is checked by after_in_set. 4531 */ 4532static isl_bool after_in_sequence(__isl_keep isl_union_map *umap, 4533 __isl_keep isl_schedule_node *node) 4534{ 4535 int i, j; 4536 isl_size n; 4537 isl_union_map *umap_i; 4538 isl_bool empty; 4539 isl_bool after = isl_bool_false; 4540 4541 n = isl_schedule_node_n_children(node); 4542 if (n < 0) 4543 return isl_bool_error; 4544 for (i = 1; i < n; ++i) { 4545 isl_union_set *filter_i; 4546 4547 umap_i = isl_union_map_copy(umap); 4548 filter_i = child_filter(node, i); 4549 umap_i = isl_union_map_intersect_domain(umap_i, filter_i); 4550 empty = isl_union_map_is_empty(umap_i); 4551 if (empty < 0) 4552 goto error; 4553 if (empty) { 4554 isl_union_map_free(umap_i); 4555 continue; 4556 } 4557 4558 for (j = 0; j < i; ++j) { 4559 isl_union_set *filter_j; 4560 isl_union_map *umap_ij; 4561 4562 umap_ij = isl_union_map_copy(umap_i); 4563 filter_j = child_filter(node, j); 4564 umap_ij = isl_union_map_intersect_range(umap_ij, 4565 filter_j); 4566 empty = isl_union_map_is_empty(umap_ij); 4567 isl_union_map_free(umap_ij); 4568 4569 if (empty < 0) 4570 goto error; 4571 if (!empty) 4572 after = isl_bool_true; 4573 if (after) 4574 break; 4575 } 4576 4577 isl_union_map_free(umap_i); 4578 if (after) 4579 break; 4580 } 4581 4582 if (after < 0 || after) 4583 return after; 4584 4585 return after_in_set(umap, node); 4586error: 4587 isl_union_map_free(umap_i); 4588 return isl_bool_error; 4589} 4590 4591/* Is any domain element of "umap" scheduled after any of 4592 * the corresponding image elements by the tree rooted at "node"? 4593 * 4594 * If "umap" is empty, then clearly there is no such element. 4595 * Otherwise, consider the different types of nodes separately. 4596 */ 4597static isl_bool after_in_tree(__isl_keep isl_union_map *umap, 4598 __isl_keep isl_schedule_node *node) 4599{ 4600 isl_bool empty; 4601 enum isl_schedule_node_type type; 4602 4603 empty = isl_union_map_is_empty(umap); 4604 if (empty < 0) 4605 return isl_bool_error; 4606 if (empty) 4607 return isl_bool_false; 4608 if (!node) 4609 return isl_bool_error; 4610 4611 type = isl_schedule_node_get_type(node); 4612 switch (type) { 4613 case isl_schedule_node_error: 4614 return isl_bool_error; 4615 case isl_schedule_node_leaf: 4616 return isl_bool_false; 4617 case isl_schedule_node_band: 4618 return after_in_band(umap, node); 4619 case isl_schedule_node_domain: 4620 isl_die(isl_schedule_node_get_ctx(node), isl_error_internal, 4621 "unexpected internal domain node", 4622 return isl_bool_error); 4623 case isl_schedule_node_context: 4624 return after_in_context(umap, node); 4625 case isl_schedule_node_expansion: 4626 return after_in_expansion(umap, node); 4627 case isl_schedule_node_extension: 4628 return after_in_extension(umap, node); 4629 case isl_schedule_node_filter: 4630 return after_in_filter(umap, node); 4631 case isl_schedule_node_guard: 4632 case isl_schedule_node_mark: 4633 return after_in_child(umap, node); 4634 case isl_schedule_node_set: 4635 return after_in_set(umap, node); 4636 case isl_schedule_node_sequence: 4637 return after_in_sequence(umap, node); 4638 } 4639 4640 return isl_bool_true; 4641} 4642 4643/* Is any domain element of "map1" scheduled after any domain 4644 * element of "map2" by the subtree underneath the current band node, 4645 * while at the same time being scheduled together by the current 4646 * band node, i.e., by "map1" and "map2? 4647 * 4648 * If the child of the current band node is a leaf, then 4649 * no element can be scheduled after any other element. 4650 * 4651 * Otherwise, we construct a relation between domain elements 4652 * of "map1" and domain elements of "map2" that are scheduled 4653 * together and then check if the subtree underneath the current 4654 * band node determines their relative order. 4655 */ 4656static isl_bool after_in_subtree(__isl_keep isl_ast_build *build, 4657 __isl_keep isl_map *map1, __isl_keep isl_map *map2) 4658{ 4659 isl_schedule_node *node; 4660 isl_map *map; 4661 isl_union_map *umap; 4662 isl_bool after; 4663 4664 node = isl_ast_build_get_schedule_node(build); 4665 if (!node) 4666 return isl_bool_error; 4667 node = isl_schedule_node_child(node, 0); 4668 if (isl_schedule_node_get_type(node) == isl_schedule_node_leaf) { 4669 isl_schedule_node_free(node); 4670 return isl_bool_false; 4671 } 4672 map = isl_map_copy(map2); 4673 map = isl_map_apply_domain(map, isl_map_copy(map1)); 4674 umap = isl_union_map_from_map(map); 4675 after = after_in_tree(umap, node); 4676 isl_union_map_free(umap); 4677 isl_schedule_node_free(node); 4678 return after; 4679} 4680 4681/* Internal data for any_scheduled_after. 4682 * 4683 * "build" is the build in which the AST is constructed. 4684 * "depth" is the number of loops that have already been generated 4685 * "group_coscheduled" is a local copy of options->ast_build_group_coscheduled 4686 * "domain" is an array of set-map pairs corresponding to the different 4687 * iteration domains. The set is the schedule domain, i.e., the domain 4688 * of the inverse schedule, while the map is the inverse schedule itself. 4689 */ 4690struct isl_any_scheduled_after_data { 4691 isl_ast_build *build; 4692 int depth; 4693 int group_coscheduled; 4694 struct isl_set_map_pair *domain; 4695}; 4696 4697/* Is any element of domain "i" scheduled after any element of domain "j" 4698 * (for a common iteration of the first data->depth loops)? 4699 * 4700 * data->domain[i].set contains the domain of the inverse schedule 4701 * for domain "i", i.e., elements in the schedule domain. 4702 * 4703 * If we are inside a band of a schedule tree and there is a pair 4704 * of elements in the two domains that is schedule together by 4705 * the current band, then we check if any element of "i" may be schedule 4706 * after element of "j" by the descendants of the band node. 4707 * 4708 * If data->group_coscheduled is set, then we also return 1 if there 4709 * is any pair of elements in the two domains that are scheduled together. 4710 */ 4711static isl_bool any_scheduled_after(int i, int j, void *user) 4712{ 4713 struct isl_any_scheduled_after_data *data = user; 4714 isl_size dim = isl_set_dim(data->domain[i].set, isl_dim_set); 4715 int pos; 4716 4717 if (dim < 0) 4718 return isl_bool_error; 4719 4720 for (pos = data->depth; pos < dim; ++pos) { 4721 int follows; 4722 4723 follows = isl_set_follows_at(data->domain[i].set, 4724 data->domain[j].set, pos); 4725 4726 if (follows < -1) 4727 return isl_bool_error; 4728 if (follows > 0) 4729 return isl_bool_true; 4730 if (follows < 0) 4731 return isl_bool_false; 4732 } 4733 4734 if (isl_ast_build_has_schedule_node(data->build)) { 4735 isl_bool after; 4736 4737 after = after_in_subtree(data->build, data->domain[i].map, 4738 data->domain[j].map); 4739 if (after < 0 || after) 4740 return after; 4741 } 4742 4743 return isl_bool_ok(data->group_coscheduled); 4744} 4745 4746/* Look for independent components at the current depth and generate code 4747 * for each component separately. The resulting lists of grafts are 4748 * merged in an attempt to combine grafts with identical guards. 4749 * 4750 * Code for two domains can be generated separately if all the elements 4751 * of one domain are scheduled before (or together with) all the elements 4752 * of the other domain. We therefore consider the graph with as nodes 4753 * the domains and an edge between two nodes if any element of the first 4754 * node is scheduled after any element of the second node. 4755 * If the ast_build_group_coscheduled is set, then we also add an edge if 4756 * there is any pair of elements in the two domains that are scheduled 4757 * together. 4758 * Code is then generated (by generate_component) 4759 * for each of the strongly connected components in this graph 4760 * in their topological order. 4761 * 4762 * Since the test is performed on the domain of the inverse schedules of 4763 * the different domains, we precompute these domains and store 4764 * them in data.domain. 4765 */ 4766static __isl_give isl_ast_graft_list *generate_components( 4767 __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) 4768{ 4769 int i; 4770 isl_ctx *ctx = isl_ast_build_get_ctx(build); 4771 isl_size n = isl_union_map_n_map(executed); 4772 isl_size depth; 4773 struct isl_any_scheduled_after_data data; 4774 struct isl_set_map_pair *next; 4775 struct isl_tarjan_graph *g = NULL; 4776 isl_ast_graft_list *list = NULL; 4777 int n_domain = 0; 4778 4779 data.domain = NULL; 4780 if (n < 0) 4781 goto error; 4782 data.domain = isl_calloc_array(ctx, struct isl_set_map_pair, n); 4783 if (!data.domain) 4784 goto error; 4785 n_domain = n; 4786 4787 next = data.domain; 4788 if (isl_union_map_foreach_map(executed, &extract_domain, &next) < 0) 4789 goto error; 4790 4791 depth = isl_ast_build_get_depth(build); 4792 if (depth < 0) 4793 goto error; 4794 data.build = build; 4795 data.depth = depth; 4796 data.group_coscheduled = isl_options_get_ast_build_group_coscheduled(ctx); 4797 g = isl_tarjan_graph_init(ctx, n, &any_scheduled_after, &data); 4798 if (!g) 4799 goto error; 4800 4801 list = isl_ast_graft_list_alloc(ctx, 0); 4802 4803 i = 0; 4804 while (list && n) { 4805 isl_ast_graft_list *list_c; 4806 int first = i; 4807 4808 if (g->order[i] == -1) 4809 isl_die(ctx, isl_error_internal, "cannot happen", 4810 goto error); 4811 ++i; --n; 4812 while (g->order[i] != -1) { 4813 ++i; --n; 4814 } 4815 4816 list_c = generate_component(data.domain, 4817 g->order + first, i - first, 4818 isl_ast_build_copy(build)); 4819 list = isl_ast_graft_list_merge(list, list_c, build); 4820 4821 ++i; 4822 } 4823 4824 if (0) 4825error: list = isl_ast_graft_list_free(list); 4826 isl_tarjan_graph_free(g); 4827 for (i = 0; i < n_domain; ++i) { 4828 isl_map_free(data.domain[i].map); 4829 isl_set_free(data.domain[i].set); 4830 } 4831 free(data.domain); 4832 isl_union_map_free(executed); 4833 isl_ast_build_free(build); 4834 4835 return list; 4836} 4837 4838/* Generate code for the next level (and all inner levels). 4839 * 4840 * If "executed" is empty, i.e., no code needs to be generated, 4841 * then we return an empty list. 4842 * 4843 * If we have already generated code for all loop levels, then we pass 4844 * control to generate_inner_level. 4845 * 4846 * If "executed" lives in a single space, i.e., if code needs to be 4847 * generated for a single domain, then there can only be a single 4848 * component and we go directly to generate_shifted_component. 4849 * Otherwise, we call generate_components to detect the components 4850 * and to call generate_component on each of them separately. 4851 */ 4852static __isl_give isl_ast_graft_list *generate_next_level( 4853 __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) 4854{ 4855 isl_size depth; 4856 isl_size dim; 4857 isl_size n; 4858 4859 if (!build || !executed) 4860 goto error; 4861 4862 if (isl_union_map_is_empty(executed)) { 4863 isl_ctx *ctx = isl_ast_build_get_ctx(build); 4864 isl_union_map_free(executed); 4865 isl_ast_build_free(build); 4866 return isl_ast_graft_list_alloc(ctx, 0); 4867 } 4868 4869 depth = isl_ast_build_get_depth(build); 4870 dim = isl_ast_build_dim(build, isl_dim_set); 4871 if (depth < 0 || dim < 0) 4872 goto error; 4873 if (depth >= dim) 4874 return generate_inner_level(executed, build); 4875 4876 n = isl_union_map_n_map(executed); 4877 if (n < 0) 4878 goto error; 4879 if (n == 1) 4880 return generate_shifted_component(executed, build); 4881 4882 return generate_components(executed, build); 4883error: 4884 isl_union_map_free(executed); 4885 isl_ast_build_free(build); 4886 return NULL; 4887} 4888 4889/* Internal data structure used by isl_ast_build_node_from_schedule_map. 4890 * internal, executed and build are the inputs to generate_code. 4891 * list collects the output. 4892 */ 4893struct isl_generate_code_data { 4894 int internal; 4895 isl_union_map *executed; 4896 isl_ast_build *build; 4897 4898 isl_ast_graft_list *list; 4899}; 4900 4901/* Given an inverse schedule in terms of the external build schedule, i.e., 4902 * 4903 * [E -> S] -> D 4904 * 4905 * with E the external build schedule and S the additional schedule "space", 4906 * reformulate the inverse schedule in terms of the internal schedule domain, 4907 * i.e., return 4908 * 4909 * [I -> S] -> D 4910 * 4911 * We first obtain a mapping 4912 * 4913 * I -> E 4914 * 4915 * take the inverse and the product with S -> S, resulting in 4916 * 4917 * [I -> S] -> [E -> S] 4918 * 4919 * Applying the map to the input produces the desired result. 4920 */ 4921static __isl_give isl_union_map *internal_executed( 4922 __isl_take isl_union_map *executed, __isl_keep isl_space *space, 4923 __isl_keep isl_ast_build *build) 4924{ 4925 isl_map *id, *proj; 4926 4927 proj = isl_ast_build_get_schedule_map(build); 4928 proj = isl_map_reverse(proj); 4929 space = isl_space_map_from_set(isl_space_copy(space)); 4930 id = isl_map_identity(space); 4931 proj = isl_map_product(proj, id); 4932 executed = isl_union_map_apply_domain(executed, 4933 isl_union_map_from_map(proj)); 4934 return executed; 4935} 4936 4937/* Generate an AST that visits the elements in the range of data->executed 4938 * in the relative order specified by the corresponding domain element(s) 4939 * for those domain elements that belong to "set". 4940 * Add the result to data->list. 4941 * 4942 * The caller ensures that "set" is a universe domain. 4943 * "space" is the space of the additional part of the schedule. 4944 * It is equal to the space of "set" if build->domain is parametric. 4945 * Otherwise, it is equal to the range of the wrapped space of "set". 4946 * 4947 * If the build space is not parametric and 4948 * if isl_ast_build_node_from_schedule_map 4949 * was called from an outside user (data->internal not set), then 4950 * the (inverse) schedule refers to the external build domain and needs to 4951 * be transformed to refer to the internal build domain. 4952 * 4953 * If the build space is parametric, then we add some of the parameter 4954 * constraints to the executed relation. Adding these constraints 4955 * allows for an earlier detection of conflicts in some cases. 4956 * However, we do not want to divide the executed relation into 4957 * more disjuncts than necessary. We therefore approximate 4958 * the constraints on the parameters by a single disjunct set. 4959 * 4960 * The build is extended to include the additional part of the schedule. 4961 * If the original build space was not parametric, then the options 4962 * in data->build refer only to the additional part of the schedule 4963 * and they need to be adjusted to refer to the complete AST build 4964 * domain. 4965 * 4966 * After having adjusted inverse schedule and build, we start generating 4967 * code with the outer loop of the current code generation 4968 * in generate_next_level. 4969 * 4970 * If the original build space was not parametric, we undo the embedding 4971 * on the resulting isl_ast_node_list so that it can be used within 4972 * the outer AST build. 4973 */ 4974static isl_stat generate_code_in_space(struct isl_generate_code_data *data, 4975 __isl_take isl_set *set, __isl_take isl_space *space) 4976{ 4977 isl_union_map *executed; 4978 isl_ast_build *build; 4979 isl_ast_graft_list *list; 4980 int embed; 4981 4982 executed = isl_union_map_copy(data->executed); 4983 executed = isl_union_map_intersect_domain(executed, 4984 isl_union_set_from_set(set)); 4985 4986 embed = !isl_set_is_params(data->build->domain); 4987 if (embed && !data->internal) 4988 executed = internal_executed(executed, space, data->build); 4989 if (!embed) { 4990 isl_set *domain; 4991 domain = isl_ast_build_get_domain(data->build); 4992 domain = isl_set_from_basic_set(isl_set_simple_hull(domain)); 4993 executed = isl_union_map_intersect_params(executed, domain); 4994 } 4995 4996 build = isl_ast_build_copy(data->build); 4997 build = isl_ast_build_product(build, space); 4998 4999 list = generate_next_level(executed, build); 5000 5001 list = isl_ast_graft_list_unembed(list, embed); 5002 5003 data->list = isl_ast_graft_list_concat(data->list, list); 5004 5005 return isl_stat_ok; 5006} 5007 5008/* Generate an AST that visits the elements in the range of data->executed 5009 * in the relative order specified by the corresponding domain element(s) 5010 * for those domain elements that belong to "set". 5011 * Add the result to data->list. 5012 * 5013 * The caller ensures that "set" is a universe domain. 5014 * 5015 * If the build space S is not parametric, then the space of "set" 5016 * need to be a wrapped relation with S as domain. That is, it needs 5017 * to be of the form 5018 * 5019 * [S -> T] 5020 * 5021 * Check this property and pass control to generate_code_in_space 5022 * passing along T. 5023 * If the build space is not parametric, then T is the space of "set". 5024 */ 5025static isl_stat generate_code_set(__isl_take isl_set *set, void *user) 5026{ 5027 struct isl_generate_code_data *data = user; 5028 isl_space *space, *build_space; 5029 int is_domain; 5030 5031 space = isl_set_get_space(set); 5032 5033 if (isl_set_is_params(data->build->domain)) 5034 return generate_code_in_space(data, set, space); 5035 5036 build_space = isl_ast_build_get_space(data->build, data->internal); 5037 space = isl_space_unwrap(space); 5038 is_domain = isl_space_is_domain(build_space, space); 5039 isl_space_free(build_space); 5040 space = isl_space_range(space); 5041 5042 if (is_domain < 0) 5043 goto error; 5044 if (!is_domain) 5045 isl_die(isl_set_get_ctx(set), isl_error_invalid, 5046 "invalid nested schedule space", goto error); 5047 5048 return generate_code_in_space(data, set, space); 5049error: 5050 isl_set_free(set); 5051 isl_space_free(space); 5052 return isl_stat_error; 5053} 5054 5055/* Generate an AST that visits the elements in the range of "executed" 5056 * in the relative order specified by the corresponding domain element(s). 5057 * 5058 * "build" is an isl_ast_build that has either been constructed by 5059 * isl_ast_build_from_context or passed to a callback set by 5060 * isl_ast_build_set_create_leaf. 5061 * In the first case, the space of the isl_ast_build is typically 5062 * a parametric space, although this is currently not enforced. 5063 * In the second case, the space is never a parametric space. 5064 * If the space S is not parametric, then the domain space(s) of "executed" 5065 * need to be wrapped relations with S as domain. 5066 * 5067 * If the domain of "executed" consists of several spaces, then an AST 5068 * is generated for each of them (in arbitrary order) and the results 5069 * are concatenated. 5070 * 5071 * If "internal" is set, then the domain "S" above refers to the internal 5072 * schedule domain representation. Otherwise, it refers to the external 5073 * representation, as returned by isl_ast_build_get_schedule_space. 5074 * 5075 * We essentially run over all the spaces in the domain of "executed" 5076 * and call generate_code_set on each of them. 5077 */ 5078static __isl_give isl_ast_graft_list *generate_code( 5079 __isl_take isl_union_map *executed, __isl_take isl_ast_build *build, 5080 int internal) 5081{ 5082 isl_ctx *ctx; 5083 struct isl_generate_code_data data = { 0 }; 5084 isl_space *space; 5085 isl_union_set *schedule_domain; 5086 isl_union_map *universe; 5087 5088 if (!build) 5089 goto error; 5090 space = isl_ast_build_get_space(build, 1); 5091 space = isl_space_align_params(space, 5092 isl_union_map_get_space(executed)); 5093 space = isl_space_align_params(space, 5094 isl_union_map_get_space(build->options)); 5095 build = isl_ast_build_align_params(build, isl_space_copy(space)); 5096 executed = isl_union_map_align_params(executed, space); 5097 if (!executed || !build) 5098 goto error; 5099 5100 ctx = isl_ast_build_get_ctx(build); 5101 5102 data.internal = internal; 5103 data.executed = executed; 5104 data.build = build; 5105 data.list = isl_ast_graft_list_alloc(ctx, 0); 5106 5107 universe = isl_union_map_universe(isl_union_map_copy(executed)); 5108 schedule_domain = isl_union_map_domain(universe); 5109 if (isl_union_set_foreach_set(schedule_domain, &generate_code_set, 5110 &data) < 0) 5111 data.list = isl_ast_graft_list_free(data.list); 5112 5113 isl_union_set_free(schedule_domain); 5114 isl_union_map_free(executed); 5115 5116 isl_ast_build_free(build); 5117 return data.list; 5118error: 5119 isl_union_map_free(executed); 5120 isl_ast_build_free(build); 5121 return NULL; 5122} 5123 5124/* Generate an AST that visits the elements in the domain of "schedule" 5125 * in the relative order specified by the corresponding image element(s). 5126 * 5127 * "build" is an isl_ast_build that has either been constructed by 5128 * isl_ast_build_from_context or passed to a callback set by 5129 * isl_ast_build_set_create_leaf. 5130 * In the first case, the space of the isl_ast_build is typically 5131 * a parametric space, although this is currently not enforced. 5132 * In the second case, the space is never a parametric space. 5133 * If the space S is not parametric, then the range space(s) of "schedule" 5134 * need to be wrapped relations with S as domain. 5135 * 5136 * If the range of "schedule" consists of several spaces, then an AST 5137 * is generated for each of them (in arbitrary order) and the results 5138 * are concatenated. 5139 * 5140 * We first initialize the local copies of the relevant options. 5141 * We do this here rather than when the isl_ast_build is created 5142 * because the options may have changed between the construction 5143 * of the isl_ast_build and the call to isl_generate_code. 5144 * 5145 * The main computation is performed on an inverse schedule (with 5146 * the schedule domain in the domain and the elements to be executed 5147 * in the range) called "executed". 5148 */ 5149__isl_give isl_ast_node *isl_ast_build_node_from_schedule_map( 5150 __isl_keep isl_ast_build *build, __isl_take isl_union_map *schedule) 5151{ 5152 isl_ast_graft_list *list; 5153 isl_ast_node *node; 5154 isl_union_map *executed; 5155 5156 build = isl_ast_build_copy(build); 5157 build = isl_ast_build_set_single_valued(build, 0); 5158 schedule = isl_union_map_coalesce(schedule); 5159 schedule = isl_union_map_remove_redundancies(schedule); 5160 executed = isl_union_map_reverse(schedule); 5161 list = generate_code(executed, isl_ast_build_copy(build), 0); 5162 node = isl_ast_node_from_graft_list(list, build); 5163 isl_ast_build_free(build); 5164 5165 return node; 5166} 5167 5168/* The old name for isl_ast_build_node_from_schedule_map. 5169 * It is being kept for backward compatibility, but 5170 * it will be removed in the future. 5171 */ 5172__isl_give isl_ast_node *isl_ast_build_ast_from_schedule( 5173 __isl_keep isl_ast_build *build, __isl_take isl_union_map *schedule) 5174{ 5175 return isl_ast_build_node_from_schedule_map(build, schedule); 5176} 5177 5178/* Generate an AST that visits the elements in the domain of "executed" 5179 * in the relative order specified by the leaf node "node". 5180 * 5181 * The relation "executed" maps the outer generated loop iterators 5182 * to the domain elements executed by those iterations. 5183 * 5184 * Simply pass control to generate_inner_level. 5185 * Note that the current build does not refer to any band node, so 5186 * that generate_inner_level will not try to visit the child of 5187 * the leaf node. 5188 * 5189 * If multiple statement instances reach a leaf, 5190 * then they can be executed in any order. 5191 * Group the list of grafts based on shared guards 5192 * such that identical guards are only generated once 5193 * when the list is eventually passed on to isl_ast_graft_list_fuse. 5194 */ 5195static __isl_give isl_ast_graft_list *build_ast_from_leaf( 5196 __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, 5197 __isl_take isl_union_map *executed) 5198{ 5199 isl_ast_graft_list *list; 5200 5201 isl_schedule_node_free(node); 5202 list = generate_inner_level(executed, isl_ast_build_copy(build)); 5203 list = isl_ast_graft_list_group_on_guard(list, build); 5204 isl_ast_build_free(build); 5205 5206 return list; 5207} 5208 5209/* Check that the band partial schedule "partial" does not filter out 5210 * any statement instances, as specified by the range of "executed". 5211 */ 5212static isl_stat check_band_schedule_total_on_instances( 5213 __isl_keep isl_multi_union_pw_aff *partial, 5214 __isl_keep isl_union_map *executed) 5215{ 5216 isl_bool subset; 5217 isl_union_set *domain, *instances; 5218 5219 instances = isl_union_map_range(isl_union_map_copy(executed)); 5220 partial = isl_multi_union_pw_aff_copy(partial); 5221 domain = isl_multi_union_pw_aff_domain(partial); 5222 subset = isl_union_set_is_subset(instances, domain); 5223 isl_union_set_free(domain); 5224 isl_union_set_free(instances); 5225 5226 if (subset < 0) 5227 return isl_stat_error; 5228 if (!subset) 5229 isl_die(isl_union_map_get_ctx(executed), isl_error_invalid, 5230 "band node is not allowed to drop statement instances", 5231 return isl_stat_error); 5232 return isl_stat_ok; 5233} 5234 5235/* Generate an AST that visits the elements in the domain of "executed" 5236 * in the relative order specified by the band node "node" and its descendants. 5237 * 5238 * The relation "executed" maps the outer generated loop iterators 5239 * to the domain elements executed by those iterations. 5240 * 5241 * If the band is empty, we continue with its descendants. 5242 * Otherwise, we extend the build and the inverse schedule with 5243 * the additional space/partial schedule and continue generating 5244 * an AST in generate_next_level. 5245 * As soon as we have extended the inverse schedule with the additional 5246 * partial schedule, we look for equalities that may exists between 5247 * the old and the new part. 5248 */ 5249static __isl_give isl_ast_graft_list *build_ast_from_band( 5250 __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, 5251 __isl_take isl_union_map *executed) 5252{ 5253 isl_space *space; 5254 isl_multi_union_pw_aff *extra; 5255 isl_union_map *extra_umap; 5256 isl_ast_graft_list *list; 5257 isl_size n1, n2; 5258 isl_size n; 5259 5260 n = isl_schedule_node_band_n_member(node); 5261 if (!build || n < 0 || !executed) 5262 goto error; 5263 5264 if (n == 0) 5265 return build_ast_from_child(build, node, executed); 5266 5267 extra = isl_schedule_node_band_get_partial_schedule(node); 5268 extra = isl_multi_union_pw_aff_align_params(extra, 5269 isl_ast_build_get_space(build, 1)); 5270 space = isl_multi_union_pw_aff_get_space(extra); 5271 5272 if (check_band_schedule_total_on_instances(extra, executed) < 0) 5273 executed = isl_union_map_free(executed); 5274 5275 extra_umap = isl_union_map_from_multi_union_pw_aff(extra); 5276 extra_umap = isl_union_map_reverse(extra_umap); 5277 5278 executed = isl_union_map_domain_product(executed, extra_umap); 5279 executed = isl_union_map_detect_equalities(executed); 5280 5281 n1 = isl_ast_build_dim(build, isl_dim_param); 5282 build = isl_ast_build_product(build, space); 5283 n2 = isl_ast_build_dim(build, isl_dim_param); 5284 if (n1 < 0 || n2 < 0) 5285 build = isl_ast_build_free(build); 5286 else if (n2 > n1) 5287 isl_die(isl_ast_build_get_ctx(build), isl_error_invalid, 5288 "band node is not allowed to introduce new parameters", 5289 build = isl_ast_build_free(build)); 5290 build = isl_ast_build_set_schedule_node(build, node); 5291 5292 list = generate_next_level(executed, build); 5293 5294 list = isl_ast_graft_list_unembed(list, 1); 5295 5296 return list; 5297error: 5298 isl_schedule_node_free(node); 5299 isl_union_map_free(executed); 5300 isl_ast_build_free(build); 5301 return NULL; 5302} 5303 5304/* Hoist a list of grafts (in practice containing a single graft) 5305 * from "sub_build" (which includes extra context information) 5306 * to "build". 5307 * 5308 * In particular, project out all additional parameters introduced 5309 * by the context node from the enforced constraints and the guard 5310 * of the single graft. 5311 */ 5312static __isl_give isl_ast_graft_list *hoist_out_of_context( 5313 __isl_take isl_ast_graft_list *list, __isl_keep isl_ast_build *build, 5314 __isl_keep isl_ast_build *sub_build) 5315{ 5316 isl_ast_graft *graft; 5317 isl_basic_set *enforced; 5318 isl_set *guard; 5319 isl_size n_param, extra_param; 5320 5321 n_param = isl_ast_build_dim(build, isl_dim_param); 5322 extra_param = isl_ast_build_dim(sub_build, isl_dim_param); 5323 if (n_param < 0 || extra_param < 0) 5324 return isl_ast_graft_list_free(list); 5325 5326 if (extra_param == n_param) 5327 return list; 5328 5329 extra_param -= n_param; 5330 enforced = isl_ast_graft_list_extract_shared_enforced(list, sub_build); 5331 enforced = isl_basic_set_project_out(enforced, isl_dim_param, 5332 n_param, extra_param); 5333 enforced = isl_basic_set_remove_unknown_divs(enforced); 5334 guard = isl_ast_graft_list_extract_hoistable_guard(list, sub_build); 5335 guard = isl_set_remove_divs_involving_dims(guard, isl_dim_param, 5336 n_param, extra_param); 5337 guard = isl_set_project_out(guard, isl_dim_param, n_param, extra_param); 5338 guard = isl_set_compute_divs(guard); 5339 graft = isl_ast_graft_alloc_from_children(list, guard, enforced, 5340 build, sub_build); 5341 list = isl_ast_graft_list_from_ast_graft(graft); 5342 5343 return list; 5344} 5345 5346/* Generate an AST that visits the elements in the domain of "executed" 5347 * in the relative order specified by the context node "node" 5348 * and its descendants. 5349 * 5350 * The relation "executed" maps the outer generated loop iterators 5351 * to the domain elements executed by those iterations. 5352 * 5353 * The context node may introduce additional parameters as well as 5354 * constraints on the outer schedule dimensions or original parameters. 5355 * 5356 * We add the extra parameters to a new build and the context 5357 * constraints to both the build and (as a single disjunct) 5358 * to the domain of "executed". Since the context constraints 5359 * are specified in terms of the input schedule, we first need 5360 * to map them to the internal schedule domain. 5361 * 5362 * After constructing the AST from the descendants of "node", 5363 * we combine the list of grafts into a single graft within 5364 * the new build, in order to be able to exploit the additional 5365 * context constraints during this combination. 5366 * 5367 * Additionally, if the current node is the outermost node in 5368 * the schedule tree (apart from the root domain node), we generate 5369 * all pending guards, again to be able to exploit the additional 5370 * context constraints. We currently do not do this for internal 5371 * context nodes since we may still want to hoist conditions 5372 * to outer AST nodes. 5373 * 5374 * If the context node introduced any new parameters, then they 5375 * are removed from the set of enforced constraints and guard 5376 * in hoist_out_of_context. 5377 */ 5378static __isl_give isl_ast_graft_list *build_ast_from_context( 5379 __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, 5380 __isl_take isl_union_map *executed) 5381{ 5382 isl_set *context; 5383 isl_space *space; 5384 isl_multi_aff *internal2input; 5385 isl_ast_build *sub_build; 5386 isl_ast_graft_list *list; 5387 isl_size n; 5388 isl_size depth; 5389 5390 depth = isl_schedule_node_get_tree_depth(node); 5391 if (depth < 0) 5392 build = isl_ast_build_free(build); 5393 space = isl_ast_build_get_space(build, 1); 5394 context = isl_schedule_node_context_get_context(node); 5395 context = isl_set_align_params(context, space); 5396 sub_build = isl_ast_build_copy(build); 5397 space = isl_set_get_space(context); 5398 sub_build = isl_ast_build_align_params(sub_build, space); 5399 internal2input = isl_ast_build_get_internal2input(sub_build); 5400 context = isl_set_preimage_multi_aff(context, internal2input); 5401 sub_build = isl_ast_build_restrict_generated(sub_build, 5402 isl_set_copy(context)); 5403 context = isl_set_from_basic_set(isl_set_simple_hull(context)); 5404 executed = isl_union_map_intersect_domain(executed, 5405 isl_union_set_from_set(context)); 5406 5407 list = build_ast_from_child(isl_ast_build_copy(sub_build), 5408 node, executed); 5409 n = isl_ast_graft_list_n_ast_graft(list); 5410 if (n < 0) 5411 list = isl_ast_graft_list_free(list); 5412 5413 list = isl_ast_graft_list_fuse(list, sub_build); 5414 if (depth == 1) 5415 list = isl_ast_graft_list_insert_pending_guard_nodes(list, 5416 sub_build); 5417 if (n >= 1) 5418 list = hoist_out_of_context(list, build, sub_build); 5419 5420 isl_ast_build_free(build); 5421 isl_ast_build_free(sub_build); 5422 5423 return list; 5424} 5425 5426/* Generate an AST that visits the elements in the domain of "executed" 5427 * in the relative order specified by the expansion node "node" and 5428 * its descendants. 5429 * 5430 * The relation "executed" maps the outer generated loop iterators 5431 * to the domain elements executed by those iterations. 5432 * 5433 * We expand the domain elements by the expansion and 5434 * continue with the descendants of the node. 5435 */ 5436static __isl_give isl_ast_graft_list *build_ast_from_expansion( 5437 __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, 5438 __isl_take isl_union_map *executed) 5439{ 5440 isl_union_map *expansion; 5441 isl_size n1, n2; 5442 5443 expansion = isl_schedule_node_expansion_get_expansion(node); 5444 expansion = isl_union_map_align_params(expansion, 5445 isl_union_map_get_space(executed)); 5446 5447 n1 = isl_union_map_dim(executed, isl_dim_param); 5448 executed = isl_union_map_apply_range(executed, expansion); 5449 n2 = isl_union_map_dim(executed, isl_dim_param); 5450 if (n1 < 0 || n2 < 0) 5451 goto error; 5452 if (n2 > n1) 5453 isl_die(isl_ast_build_get_ctx(build), isl_error_invalid, 5454 "expansion node is not allowed to introduce " 5455 "new parameters", goto error); 5456 5457 return build_ast_from_child(build, node, executed); 5458error: 5459 isl_ast_build_free(build); 5460 isl_schedule_node_free(node); 5461 isl_union_map_free(executed); 5462 return NULL; 5463} 5464 5465/* Generate an AST that visits the elements in the domain of "executed" 5466 * in the relative order specified by the extension node "node" and 5467 * its descendants. 5468 * 5469 * The relation "executed" maps the outer generated loop iterators 5470 * to the domain elements executed by those iterations. 5471 * 5472 * Extend the inverse schedule with the extension applied to current 5473 * set of generated constraints. Since the extension if formulated 5474 * in terms of the input schedule, it first needs to be transformed 5475 * to refer to the internal schedule. 5476 */ 5477static __isl_give isl_ast_graft_list *build_ast_from_extension( 5478 __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, 5479 __isl_take isl_union_map *executed) 5480{ 5481 isl_union_set *schedule_domain; 5482 isl_union_map *extension; 5483 isl_set *set; 5484 5485 set = isl_ast_build_get_generated(build); 5486 set = isl_set_from_basic_set(isl_set_simple_hull(set)); 5487 schedule_domain = isl_union_set_from_set(set); 5488 5489 extension = isl_schedule_node_extension_get_extension(node); 5490 5491 extension = isl_union_map_preimage_domain_multi_aff(extension, 5492 isl_multi_aff_copy(build->internal2input)); 5493 extension = isl_union_map_intersect_domain(extension, schedule_domain); 5494 extension = isl_ast_build_substitute_values_union_map_domain(build, 5495 extension); 5496 executed = isl_union_map_union(executed, extension); 5497 5498 return build_ast_from_child(build, node, executed); 5499} 5500 5501/* Generate an AST that visits the elements in the domain of "executed" 5502 * in the relative order specified by the filter node "node" and 5503 * its descendants. 5504 * 5505 * The relation "executed" maps the outer generated loop iterators 5506 * to the domain elements executed by those iterations. 5507 * 5508 * We simply intersect the iteration domain (i.e., the range of "executed") 5509 * with the filter and continue with the descendants of the node, 5510 * unless the resulting inverse schedule is empty, in which 5511 * case we return an empty list. 5512 * 5513 * If the result of the intersection is equal to the original "executed" 5514 * relation, then keep the original representation since the intersection 5515 * may have unnecessarily broken up the relation into a greater number 5516 * of disjuncts. 5517 */ 5518static __isl_give isl_ast_graft_list *build_ast_from_filter( 5519 __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, 5520 __isl_take isl_union_map *executed) 5521{ 5522 isl_ctx *ctx; 5523 isl_union_set *filter; 5524 isl_union_map *orig; 5525 isl_ast_graft_list *list; 5526 int empty; 5527 isl_bool unchanged; 5528 isl_size n1, n2; 5529 5530 orig = isl_union_map_copy(executed); 5531 if (!build || !node || !executed) 5532 goto error; 5533 5534 filter = isl_schedule_node_filter_get_filter(node); 5535 filter = isl_union_set_align_params(filter, 5536 isl_union_map_get_space(executed)); 5537 n1 = isl_union_map_dim(executed, isl_dim_param); 5538 executed = isl_union_map_intersect_range(executed, filter); 5539 n2 = isl_union_map_dim(executed, isl_dim_param); 5540 if (n1 < 0 || n2 < 0) 5541 goto error; 5542 if (n2 > n1) 5543 isl_die(isl_ast_build_get_ctx(build), isl_error_invalid, 5544 "filter node is not allowed to introduce " 5545 "new parameters", goto error); 5546 5547 unchanged = isl_union_map_is_subset(orig, executed); 5548 empty = isl_union_map_is_empty(executed); 5549 if (unchanged < 0 || empty < 0) 5550 goto error; 5551 if (unchanged) { 5552 isl_union_map_free(executed); 5553 return build_ast_from_child(build, node, orig); 5554 } 5555 isl_union_map_free(orig); 5556 if (!empty) 5557 return build_ast_from_child(build, node, executed); 5558 5559 ctx = isl_ast_build_get_ctx(build); 5560 list = isl_ast_graft_list_alloc(ctx, 0); 5561 isl_ast_build_free(build); 5562 isl_schedule_node_free(node); 5563 isl_union_map_free(executed); 5564 return list; 5565error: 5566 isl_ast_build_free(build); 5567 isl_schedule_node_free(node); 5568 isl_union_map_free(executed); 5569 isl_union_map_free(orig); 5570 return NULL; 5571} 5572 5573/* Generate an AST that visits the elements in the domain of "executed" 5574 * in the relative order specified by the guard node "node" and 5575 * its descendants. 5576 * 5577 * The relation "executed" maps the outer generated loop iterators 5578 * to the domain elements executed by those iterations. 5579 * 5580 * Ensure that the associated guard is enforced by the outer AST 5581 * constructs by adding it to the guard of the graft. 5582 * Since we know that we will enforce the guard, we can also include it 5583 * in the generated constraints used to construct an AST for 5584 * the descendant nodes. 5585 */ 5586static __isl_give isl_ast_graft_list *build_ast_from_guard( 5587 __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, 5588 __isl_take isl_union_map *executed) 5589{ 5590 isl_space *space; 5591 isl_set *guard, *hoisted; 5592 isl_basic_set *enforced; 5593 isl_ast_build *sub_build; 5594 isl_ast_graft *graft; 5595 isl_ast_graft_list *list; 5596 isl_size n1, n2, n; 5597 5598 space = isl_ast_build_get_space(build, 1); 5599 guard = isl_schedule_node_guard_get_guard(node); 5600 n1 = isl_space_dim(space, isl_dim_param); 5601 guard = isl_set_align_params(guard, space); 5602 n2 = isl_set_dim(guard, isl_dim_param); 5603 if (n1 < 0 || n2 < 0) 5604 guard = isl_set_free(guard); 5605 else if (n2 > n1) 5606 isl_die(isl_ast_build_get_ctx(build), isl_error_invalid, 5607 "guard node is not allowed to introduce " 5608 "new parameters", guard = isl_set_free(guard)); 5609 guard = isl_set_preimage_multi_aff(guard, 5610 isl_multi_aff_copy(build->internal2input)); 5611 guard = isl_ast_build_specialize(build, guard); 5612 guard = isl_set_gist(guard, isl_set_copy(build->generated)); 5613 5614 sub_build = isl_ast_build_copy(build); 5615 sub_build = isl_ast_build_restrict_generated(sub_build, 5616 isl_set_copy(guard)); 5617 5618 list = build_ast_from_child(isl_ast_build_copy(sub_build), 5619 node, executed); 5620 5621 hoisted = isl_ast_graft_list_extract_hoistable_guard(list, sub_build); 5622 n = isl_set_n_basic_set(hoisted); 5623 if (n < 0) 5624 list = isl_ast_graft_list_free(list); 5625 if (n > 1) 5626 list = isl_ast_graft_list_gist_guards(list, 5627 isl_set_copy(hoisted)); 5628 guard = isl_set_intersect(guard, hoisted); 5629 enforced = extract_shared_enforced(list, build); 5630 graft = isl_ast_graft_alloc_from_children(list, guard, enforced, 5631 build, sub_build); 5632 5633 isl_ast_build_free(sub_build); 5634 isl_ast_build_free(build); 5635 return isl_ast_graft_list_from_ast_graft(graft); 5636} 5637 5638/* Call the before_each_mark callback, if requested by the user. 5639 * 5640 * Return 0 on success and -1 on error. 5641 * 5642 * The caller is responsible for recording the current inverse schedule 5643 * in "build". 5644 */ 5645static isl_stat before_each_mark(__isl_keep isl_id *mark, 5646 __isl_keep isl_ast_build *build) 5647{ 5648 if (!build) 5649 return isl_stat_error; 5650 if (!build->before_each_mark) 5651 return isl_stat_ok; 5652 return build->before_each_mark(mark, build, 5653 build->before_each_mark_user); 5654} 5655 5656/* Call the after_each_mark callback, if requested by the user. 5657 * 5658 * The caller is responsible for recording the current inverse schedule 5659 * in "build". 5660 */ 5661static __isl_give isl_ast_graft *after_each_mark( 5662 __isl_take isl_ast_graft *graft, __isl_keep isl_ast_build *build) 5663{ 5664 if (!graft || !build) 5665 return isl_ast_graft_free(graft); 5666 if (!build->after_each_mark) 5667 return graft; 5668 graft->node = build->after_each_mark(graft->node, build, 5669 build->after_each_mark_user); 5670 if (!graft->node) 5671 return isl_ast_graft_free(graft); 5672 return graft; 5673} 5674 5675 5676/* Generate an AST that visits the elements in the domain of "executed" 5677 * in the relative order specified by the mark node "node" and 5678 * its descendants. 5679 * 5680 * The relation "executed" maps the outer generated loop iterators 5681 * to the domain elements executed by those iterations. 5682 5683 * Since we may be calling before_each_mark and after_each_mark 5684 * callbacks, we record the current inverse schedule in the build. 5685 * 5686 * We generate an AST for the child of the mark node, combine 5687 * the graft list into a single graft and then insert the mark 5688 * in the AST of that single graft. 5689 */ 5690static __isl_give isl_ast_graft_list *build_ast_from_mark( 5691 __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, 5692 __isl_take isl_union_map *executed) 5693{ 5694 isl_id *mark; 5695 isl_ast_graft *graft; 5696 isl_ast_graft_list *list; 5697 isl_size n; 5698 5699 build = isl_ast_build_set_executed(build, isl_union_map_copy(executed)); 5700 5701 mark = isl_schedule_node_mark_get_id(node); 5702 if (before_each_mark(mark, build) < 0) 5703 node = isl_schedule_node_free(node); 5704 5705 list = build_ast_from_child(isl_ast_build_copy(build), node, executed); 5706 list = isl_ast_graft_list_fuse(list, build); 5707 n = isl_ast_graft_list_n_ast_graft(list); 5708 if (n < 0) 5709 list = isl_ast_graft_list_free(list); 5710 if (n == 0) { 5711 isl_id_free(mark); 5712 } else { 5713 graft = isl_ast_graft_list_get_ast_graft(list, 0); 5714 graft = isl_ast_graft_insert_mark(graft, mark); 5715 graft = after_each_mark(graft, build); 5716 list = isl_ast_graft_list_set_ast_graft(list, 0, graft); 5717 } 5718 isl_ast_build_free(build); 5719 5720 return list; 5721} 5722 5723static __isl_give isl_ast_graft_list *build_ast_from_schedule_node( 5724 __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, 5725 __isl_take isl_union_map *executed); 5726 5727/* Generate an AST that visits the elements in the domain of "executed" 5728 * in the relative order specified by the sequence (or set) node "node" and 5729 * its descendants. 5730 * 5731 * The relation "executed" maps the outer generated loop iterators 5732 * to the domain elements executed by those iterations. 5733 * 5734 * We simply generate an AST for each of the children and concatenate 5735 * the results. 5736 */ 5737static __isl_give isl_ast_graft_list *build_ast_from_sequence( 5738 __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, 5739 __isl_take isl_union_map *executed) 5740{ 5741 int i; 5742 isl_size n; 5743 isl_ctx *ctx; 5744 isl_ast_graft_list *list; 5745 5746 ctx = isl_ast_build_get_ctx(build); 5747 list = isl_ast_graft_list_alloc(ctx, 0); 5748 5749 n = isl_schedule_node_n_children(node); 5750 if (n < 0) 5751 list = isl_ast_graft_list_free(list); 5752 for (i = 0; i < n; ++i) { 5753 isl_schedule_node *child; 5754 isl_ast_graft_list *list_i; 5755 5756 child = isl_schedule_node_get_child(node, i); 5757 list_i = build_ast_from_schedule_node(isl_ast_build_copy(build), 5758 child, isl_union_map_copy(executed)); 5759 list = isl_ast_graft_list_concat(list, list_i); 5760 } 5761 isl_ast_build_free(build); 5762 isl_schedule_node_free(node); 5763 isl_union_map_free(executed); 5764 5765 return list; 5766} 5767 5768/* Generate an AST that visits the elements in the domain of "executed" 5769 * in the relative order specified by the node "node" and its descendants. 5770 * 5771 * The relation "executed" maps the outer generated loop iterators 5772 * to the domain elements executed by those iterations. 5773 * 5774 * The node types are handled in separate functions. 5775 * Set nodes are currently treated in the same way as sequence nodes. 5776 * The children of a set node may be executed in any order, 5777 * including the order of the children. 5778 */ 5779static __isl_give isl_ast_graft_list *build_ast_from_schedule_node( 5780 __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, 5781 __isl_take isl_union_map *executed) 5782{ 5783 enum isl_schedule_node_type type; 5784 5785 type = isl_schedule_node_get_type(node); 5786 5787 switch (type) { 5788 case isl_schedule_node_error: 5789 goto error; 5790 case isl_schedule_node_leaf: 5791 return build_ast_from_leaf(build, node, executed); 5792 case isl_schedule_node_band: 5793 return build_ast_from_band(build, node, executed); 5794 case isl_schedule_node_context: 5795 return build_ast_from_context(build, node, executed); 5796 case isl_schedule_node_domain: 5797 isl_die(isl_schedule_node_get_ctx(node), isl_error_unsupported, 5798 "unexpected internal domain node", goto error); 5799 case isl_schedule_node_expansion: 5800 return build_ast_from_expansion(build, node, executed); 5801 case isl_schedule_node_extension: 5802 return build_ast_from_extension(build, node, executed); 5803 case isl_schedule_node_filter: 5804 return build_ast_from_filter(build, node, executed); 5805 case isl_schedule_node_guard: 5806 return build_ast_from_guard(build, node, executed); 5807 case isl_schedule_node_mark: 5808 return build_ast_from_mark(build, node, executed); 5809 case isl_schedule_node_sequence: 5810 case isl_schedule_node_set: 5811 return build_ast_from_sequence(build, node, executed); 5812 } 5813 5814 isl_die(isl_ast_build_get_ctx(build), isl_error_internal, 5815 "unhandled type", goto error); 5816error: 5817 isl_union_map_free(executed); 5818 isl_schedule_node_free(node); 5819 isl_ast_build_free(build); 5820 5821 return NULL; 5822} 5823 5824/* Generate an AST that visits the elements in the domain of "executed" 5825 * in the relative order specified by the (single) child of "node" and 5826 * its descendants. 5827 * 5828 * The relation "executed" maps the outer generated loop iterators 5829 * to the domain elements executed by those iterations. 5830 * 5831 * This function is never called on a leaf, set or sequence node, 5832 * so the node always has exactly one child. 5833 */ 5834static __isl_give isl_ast_graft_list *build_ast_from_child( 5835 __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node, 5836 __isl_take isl_union_map *executed) 5837{ 5838 node = isl_schedule_node_child(node, 0); 5839 return build_ast_from_schedule_node(build, node, executed); 5840} 5841 5842/* Generate an AST that visits the elements in the domain of the domain 5843 * node "node" in the relative order specified by its descendants. 5844 * 5845 * An initial inverse schedule is created that maps a zero-dimensional 5846 * schedule space to the node domain. 5847 * The input "build" is assumed to have a parametric domain and 5848 * is replaced by the same zero-dimensional schedule space. 5849 * 5850 * We also add some of the parameter constraints in the build domain 5851 * to the executed relation. Adding these constraints 5852 * allows for an earlier detection of conflicts in some cases. 5853 * However, we do not want to divide the executed relation into 5854 * more disjuncts than necessary. We therefore approximate 5855 * the constraints on the parameters by a single disjunct set. 5856 */ 5857static __isl_give isl_ast_node *build_ast_from_domain( 5858 __isl_take isl_ast_build *build, __isl_take isl_schedule_node *node) 5859{ 5860 isl_ctx *ctx; 5861 isl_union_set *domain, *schedule_domain; 5862 isl_union_map *executed; 5863 isl_space *space; 5864 isl_set *set; 5865 isl_ast_graft_list *list; 5866 isl_ast_node *ast; 5867 int is_params; 5868 5869 if (!build) 5870 goto error; 5871 5872 ctx = isl_ast_build_get_ctx(build); 5873 space = isl_ast_build_get_space(build, 1); 5874 is_params = isl_space_is_params(space); 5875 isl_space_free(space); 5876 if (is_params < 0) 5877 goto error; 5878 if (!is_params) 5879 isl_die(ctx, isl_error_unsupported, 5880 "expecting parametric initial context", goto error); 5881 5882 domain = isl_schedule_node_domain_get_domain(node); 5883 domain = isl_union_set_coalesce(domain); 5884 5885 space = isl_union_set_get_space(domain); 5886 space = isl_space_set_from_params(space); 5887 build = isl_ast_build_product(build, space); 5888 5889 set = isl_ast_build_get_domain(build); 5890 set = isl_set_from_basic_set(isl_set_simple_hull(set)); 5891 schedule_domain = isl_union_set_from_set(set); 5892 5893 executed = isl_union_map_from_domain_and_range(schedule_domain, domain); 5894 list = build_ast_from_child(isl_ast_build_copy(build), node, executed); 5895 ast = isl_ast_node_from_graft_list(list, build); 5896 isl_ast_build_free(build); 5897 5898 return ast; 5899error: 5900 isl_schedule_node_free(node); 5901 isl_ast_build_free(build); 5902 return NULL; 5903} 5904 5905/* Generate an AST that visits the elements in the domain of "schedule" 5906 * in the relative order specified by the schedule tree. 5907 * 5908 * "build" is an isl_ast_build that has been created using 5909 * isl_ast_build_alloc or isl_ast_build_from_context based 5910 * on a parametric set. 5911 * 5912 * The construction starts at the root node of the schedule, 5913 * which is assumed to be a domain node. 5914 */ 5915__isl_give isl_ast_node *isl_ast_build_node_from_schedule( 5916 __isl_keep isl_ast_build *build, __isl_take isl_schedule *schedule) 5917{ 5918 isl_ctx *ctx; 5919 isl_schedule_node *node; 5920 5921 if (!build || !schedule) 5922 goto error; 5923 5924 ctx = isl_ast_build_get_ctx(build); 5925 5926 node = isl_schedule_get_root(schedule); 5927 if (!node) 5928 goto error; 5929 isl_schedule_free(schedule); 5930 5931 build = isl_ast_build_copy(build); 5932 build = isl_ast_build_set_single_valued(build, 0); 5933 if (isl_schedule_node_get_type(node) != isl_schedule_node_domain) 5934 isl_die(ctx, isl_error_unsupported, 5935 "expecting root domain node", 5936 build = isl_ast_build_free(build)); 5937 return build_ast_from_domain(build, node); 5938error: 5939 isl_schedule_free(schedule); 5940 return NULL; 5941} 5942