1/* 2 * Copyright 2012 Ecole Normale Superieure 3 * 4 * Use of this software is governed by the MIT license 5 * 6 * Written by Sven Verdoolaege, 7 * Ecole Normale Superieure, 45 rue d’Ulm, 75230 Paris, France 8 */ 9 10#include <limits.h> 11#include <isl/aff.h> 12#include <isl/set.h> 13#include <isl/ilp.h> 14#include <isl/union_map.h> 15#include <isl_sort.h> 16#include <isl_tarjan.h> 17#include <isl_ast_private.h> 18#include <isl_ast_build_expr.h> 19#include <isl_ast_build_private.h> 20#include <isl_ast_graft_private.h> 21 22/* Add the constraint to the list that "user" points to, if it is not 23 * a div constraint. 24 */ 25static int collect_constraint(__isl_take isl_constraint *constraint, 26 void *user) 27{ 28 isl_constraint_list **list = user; 29 30 if (isl_constraint_is_div_constraint(constraint)) 31 isl_constraint_free(constraint); 32 else 33 *list = isl_constraint_list_add(*list, constraint); 34 35 return 0; 36} 37 38/* Extract the constraints of "bset" (except the div constraints) 39 * and collect them in an isl_constraint_list. 40 */ 41static __isl_give isl_constraint_list *isl_constraint_list_from_basic_set( 42 __isl_take isl_basic_set *bset) 43{ 44 int n; 45 isl_ctx *ctx; 46 isl_constraint_list *list; 47 48 if (!bset) 49 return NULL; 50 51 ctx = isl_basic_set_get_ctx(bset); 52 53 n = isl_basic_set_n_constraint(bset); 54 list = isl_constraint_list_alloc(ctx, n); 55 if (isl_basic_set_foreach_constraint(bset, 56 &collect_constraint, &list) < 0) 57 list = isl_constraint_list_free(list); 58 59 isl_basic_set_free(bset); 60 return list; 61} 62 63/* Data used in generate_domain. 64 * 65 * "build" is the input build. 66 * "list" collects the results. 67 */ 68struct isl_generate_domain_data { 69 isl_ast_build *build; 70 71 isl_ast_graft_list *list; 72}; 73 74static __isl_give isl_ast_graft_list *generate_next_level( 75 __isl_take isl_union_map *executed, 76 __isl_take isl_ast_build *build); 77static __isl_give isl_ast_graft_list *generate_code( 78 __isl_take isl_union_map *executed, __isl_take isl_ast_build *build, 79 int internal); 80 81/* Generate an AST for a single domain based on 82 * the (non single valued) inverse schedule "executed". 83 * 84 * We extend the schedule with the iteration domain 85 * and continue generating through a call to generate_code. 86 * 87 * In particular, if executed has the form 88 * 89 * S -> D 90 * 91 * then we continue generating code on 92 * 93 * [S -> D] -> D 94 * 95 * The extended inverse schedule is clearly single valued 96 * ensuring that the nested generate_code will not reach this function, 97 * but will instead create calls to all elements of D that need 98 * to be executed from the current schedule domain. 99 */ 100static int generate_non_single_valued(__isl_take isl_map *executed, 101 struct isl_generate_domain_data *data) 102{ 103 isl_map *identity; 104 isl_ast_build *build; 105 isl_ast_graft_list *list; 106 107 build = isl_ast_build_copy(data->build); 108 109 identity = isl_set_identity(isl_map_range(isl_map_copy(executed))); 110 executed = isl_map_domain_product(executed, identity); 111 build = isl_ast_build_set_single_valued(build, 1); 112 113 list = generate_code(isl_union_map_from_map(executed), build, 1); 114 115 data->list = isl_ast_graft_list_concat(data->list, list); 116 117 return 0; 118} 119 120/* Call the at_each_domain callback, if requested by the user, 121 * after recording the current inverse schedule in the build. 122 */ 123static __isl_give isl_ast_graft *at_each_domain(__isl_take isl_ast_graft *graft, 124 __isl_keep isl_map *executed, __isl_keep isl_ast_build *build) 125{ 126 if (!graft || !build) 127 return isl_ast_graft_free(graft); 128 if (!build->at_each_domain) 129 return graft; 130 131 build = isl_ast_build_copy(build); 132 build = isl_ast_build_set_executed(build, 133 isl_union_map_from_map(isl_map_copy(executed))); 134 if (!build) 135 return isl_ast_graft_free(graft); 136 137 graft->node = build->at_each_domain(graft->node, 138 build, build->at_each_domain_user); 139 isl_ast_build_free(build); 140 141 if (!graft->node) 142 graft = isl_ast_graft_free(graft); 143 144 return graft; 145} 146 147/* Generate an AST for a single domain based on 148 * the inverse schedule "executed". 149 * 150 * If there is more than one domain element associated to the current 151 * schedule "time", then we need to continue the generation process 152 * in generate_non_single_valued. 153 * Note that the inverse schedule being single-valued may depend 154 * on constraints that are only available in the original context 155 * domain specified by the user. We therefore first introduce 156 * the constraints from data->build->domain. 157 * On the other hand, we only perform the test after having taken the gist 158 * of the domain as the resulting map is the one from which the call 159 * expression is constructed. Using this map to construct the call 160 * expression usually yields simpler results. 161 * Because we perform the single-valuedness test on the gisted map, 162 * we may in rare cases fail to recognize that the inverse schedule 163 * is single-valued. This becomes problematic if this happens 164 * from the recursive call through generate_non_single_valued 165 * as we would then end up in an infinite recursion. 166 * We therefore check if we are inside a call to generate_non_single_valued 167 * and revert to the ungisted map if the gisted map turns out not to be 168 * single-valued. 169 * 170 * Otherwise, we generate a call expression for the single executed 171 * domain element and put a guard around it based on the (simplified) 172 * domain of "executed". 173 * 174 * If the user has set an at_each_domain callback, it is called 175 * on the constructed call expression node. 176 */ 177static int generate_domain(__isl_take isl_map *executed, void *user) 178{ 179 struct isl_generate_domain_data *data = user; 180 isl_ast_graft *graft; 181 isl_ast_graft_list *list; 182 isl_set *guard; 183 isl_map *map; 184 int sv; 185 186 executed = isl_map_intersect_domain(executed, 187 isl_set_copy(data->build->domain)); 188 189 executed = isl_map_coalesce(executed); 190 map = isl_map_copy(executed); 191 map = isl_ast_build_compute_gist_map_domain(data->build, map); 192 sv = isl_map_is_single_valued(map); 193 if (sv < 0) 194 goto error; 195 if (!sv) { 196 isl_map_free(map); 197 if (data->build->single_valued) 198 map = isl_map_copy(executed); 199 else 200 return generate_non_single_valued(executed, data); 201 } 202 guard = isl_map_domain(isl_map_copy(map)); 203 guard = isl_set_coalesce(guard); 204 guard = isl_ast_build_compute_gist(data->build, guard); 205 graft = isl_ast_graft_alloc_domain(map, data->build); 206 graft = at_each_domain(graft, executed, data->build); 207 208 isl_map_free(executed); 209 graft = isl_ast_graft_add_guard(graft, guard, data->build); 210 211 list = isl_ast_graft_list_from_ast_graft(graft); 212 data->list = isl_ast_graft_list_concat(data->list, list); 213 214 return 0; 215error: 216 isl_map_free(map); 217 isl_map_free(executed); 218 return -1; 219} 220 221/* Call build->create_leaf to a create "leaf" node in the AST, 222 * encapsulate the result in an isl_ast_graft and return the result 223 * as a 1-element list. 224 * 225 * Note that the node returned by the user may be an entire tree. 226 * 227 * Before we pass control to the user, we first clear some information 228 * from the build that is (presumbably) only meaningful 229 * for the current code generation. 230 * This includes the create_leaf callback itself, so we make a copy 231 * of the build first. 232 */ 233static __isl_give isl_ast_graft_list *call_create_leaf( 234 __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) 235{ 236 isl_ast_node *node; 237 isl_ast_graft *graft; 238 isl_ast_build *user_build; 239 240 user_build = isl_ast_build_copy(build); 241 user_build = isl_ast_build_set_executed(user_build, executed); 242 user_build = isl_ast_build_clear_local_info(user_build); 243 if (!user_build) 244 node = NULL; 245 else 246 node = build->create_leaf(user_build, build->create_leaf_user); 247 graft = isl_ast_graft_alloc(node, build); 248 isl_ast_build_free(build); 249 return isl_ast_graft_list_from_ast_graft(graft); 250} 251 252/* Generate an AST after having handled the complete schedule 253 * of this call to the code generator. 254 * 255 * If the user has specified a create_leaf callback, control 256 * is passed to the user in call_create_leaf. 257 * 258 * Otherwise, we generate one or more calls for each individual 259 * domain in generate_domain. 260 */ 261static __isl_give isl_ast_graft_list *generate_inner_level( 262 __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) 263{ 264 isl_ctx *ctx; 265 struct isl_generate_domain_data data = { build }; 266 267 if (!build || !executed) 268 goto error; 269 270 if (build->create_leaf) 271 return call_create_leaf(executed, build); 272 273 ctx = isl_union_map_get_ctx(executed); 274 data.list = isl_ast_graft_list_alloc(ctx, 0); 275 if (isl_union_map_foreach_map(executed, &generate_domain, &data) < 0) 276 data.list = isl_ast_graft_list_free(data.list); 277 278 if (0) 279error: data.list = NULL; 280 isl_ast_build_free(build); 281 isl_union_map_free(executed); 282 return data.list; 283} 284 285/* Call the before_each_for callback, if requested by the user. 286 */ 287static __isl_give isl_ast_node *before_each_for(__isl_take isl_ast_node *node, 288 __isl_keep isl_ast_build *build) 289{ 290 isl_id *id; 291 292 if (!node || !build) 293 return isl_ast_node_free(node); 294 if (!build->before_each_for) 295 return node; 296 id = build->before_each_for(build, build->before_each_for_user); 297 node = isl_ast_node_set_annotation(node, id); 298 return node; 299} 300 301/* Call the after_each_for callback, if requested by the user. 302 */ 303static __isl_give isl_ast_graft *after_each_for(__isl_keep isl_ast_graft *graft, 304 __isl_keep isl_ast_build *build) 305{ 306 if (!graft || !build) 307 return isl_ast_graft_free(graft); 308 if (!build->after_each_for) 309 return graft; 310 graft->node = build->after_each_for(graft->node, build, 311 build->after_each_for_user); 312 if (!graft->node) 313 return isl_ast_graft_free(graft); 314 return graft; 315} 316 317/* Plug in all the know values of the current and outer dimensions 318 * in the domain of "executed". In principle, we only need to plug 319 * in the known value of the current dimension since the values of 320 * outer dimensions have been plugged in already. 321 * However, it turns out to be easier to just plug in all known values. 322 */ 323static __isl_give isl_union_map *plug_in_values( 324 __isl_take isl_union_map *executed, __isl_keep isl_ast_build *build) 325{ 326 return isl_ast_build_substitute_values_union_map_domain(build, 327 executed); 328} 329 330/* Check if the constraint "c" is a lower bound on dimension "pos", 331 * an upper bound, or independent of dimension "pos". 332 */ 333static int constraint_type(isl_constraint *c, int pos) 334{ 335 if (isl_constraint_is_lower_bound(c, isl_dim_set, pos)) 336 return 1; 337 if (isl_constraint_is_upper_bound(c, isl_dim_set, pos)) 338 return 2; 339 return 0; 340} 341 342/* Compare the types of the constraints "a" and "b", 343 * resulting in constraints that are independent of "depth" 344 * to be sorted before the lower bounds on "depth", which in 345 * turn are sorted before the upper bounds on "depth". 346 */ 347static int cmp_constraint(__isl_keep isl_constraint *a, 348 __isl_keep isl_constraint *b, void *user) 349{ 350 int *depth = user; 351 int t1 = constraint_type(a, *depth); 352 int t2 = constraint_type(b, *depth); 353 354 return t1 - t2; 355} 356 357/* Extract a lower bound on dimension "pos" from constraint "c". 358 * 359 * If the constraint is of the form 360 * 361 * a x + f(...) >= 0 362 * 363 * then we essentially return 364 * 365 * l = ceil(-f(...)/a) 366 * 367 * However, if the current dimension is strided, then we need to make 368 * sure that the lower bound we construct is of the form 369 * 370 * f + s a 371 * 372 * with f the offset and s the stride. 373 * We therefore compute 374 * 375 * f + s * ceil((l - f)/s) 376 */ 377static __isl_give isl_aff *lower_bound(__isl_keep isl_constraint *c, 378 int pos, __isl_keep isl_ast_build *build) 379{ 380 isl_aff *aff; 381 382 aff = isl_constraint_get_bound(c, isl_dim_set, pos); 383 aff = isl_aff_ceil(aff); 384 385 if (isl_ast_build_has_stride(build, pos)) { 386 isl_aff *offset; 387 isl_val *stride; 388 389 offset = isl_ast_build_get_offset(build, pos); 390 stride = isl_ast_build_get_stride(build, pos); 391 392 aff = isl_aff_sub(aff, isl_aff_copy(offset)); 393 aff = isl_aff_scale_down_val(aff, isl_val_copy(stride)); 394 aff = isl_aff_ceil(aff); 395 aff = isl_aff_scale_val(aff, stride); 396 aff = isl_aff_add(aff, offset); 397 } 398 399 aff = isl_ast_build_compute_gist_aff(build, aff); 400 401 return aff; 402} 403 404/* Return the exact lower bound (or upper bound if "upper" is set) 405 * of "domain" as a piecewise affine expression. 406 * 407 * If we are computing a lower bound (of a strided dimension), then 408 * we need to make sure it is of the form 409 * 410 * f + s a 411 * 412 * where f is the offset and s is the stride. 413 * We therefore need to include the stride constraint before computing 414 * the minimum. 415 */ 416static __isl_give isl_pw_aff *exact_bound(__isl_keep isl_set *domain, 417 __isl_keep isl_ast_build *build, int upper) 418{ 419 isl_set *stride; 420 isl_map *it_map; 421 isl_pw_aff *pa; 422 isl_pw_multi_aff *pma; 423 424 domain = isl_set_copy(domain); 425 if (!upper) { 426 stride = isl_ast_build_get_stride_constraint(build); 427 domain = isl_set_intersect(domain, stride); 428 } 429 it_map = isl_ast_build_map_to_iterator(build, domain); 430 if (upper) 431 pma = isl_map_lexmax_pw_multi_aff(it_map); 432 else 433 pma = isl_map_lexmin_pw_multi_aff(it_map); 434 pa = isl_pw_multi_aff_get_pw_aff(pma, 0); 435 isl_pw_multi_aff_free(pma); 436 pa = isl_ast_build_compute_gist_pw_aff(build, pa); 437 pa = isl_pw_aff_coalesce(pa); 438 439 return pa; 440} 441 442/* Extract a lower bound on dimension "pos" from each constraint 443 * in "constraints" and return the list of lower bounds. 444 * If "constraints" has zero elements, then we extract a lower bound 445 * from "domain" instead. 446 */ 447static __isl_give isl_pw_aff_list *lower_bounds( 448 __isl_keep isl_constraint_list *constraints, int pos, 449 __isl_keep isl_set *domain, __isl_keep isl_ast_build *build) 450{ 451 isl_ctx *ctx; 452 isl_pw_aff_list *list; 453 int i, n; 454 455 if (!build) 456 return NULL; 457 458 n = isl_constraint_list_n_constraint(constraints); 459 if (n == 0) { 460 isl_pw_aff *pa; 461 pa = exact_bound(domain, build, 0); 462 return isl_pw_aff_list_from_pw_aff(pa); 463 } 464 465 ctx = isl_ast_build_get_ctx(build); 466 list = isl_pw_aff_list_alloc(ctx,n); 467 468 for (i = 0; i < n; ++i) { 469 isl_aff *aff; 470 isl_constraint *c; 471 472 c = isl_constraint_list_get_constraint(constraints, i); 473 aff = lower_bound(c, pos, build); 474 isl_constraint_free(c); 475 list = isl_pw_aff_list_add(list, isl_pw_aff_from_aff(aff)); 476 } 477 478 return list; 479} 480 481/* Extract an upper bound on dimension "pos" from each constraint 482 * in "constraints" and return the list of upper bounds. 483 * If "constraints" has zero elements, then we extract an upper bound 484 * from "domain" instead. 485 */ 486static __isl_give isl_pw_aff_list *upper_bounds( 487 __isl_keep isl_constraint_list *constraints, int pos, 488 __isl_keep isl_set *domain, __isl_keep isl_ast_build *build) 489{ 490 isl_ctx *ctx; 491 isl_pw_aff_list *list; 492 int i, n; 493 494 n = isl_constraint_list_n_constraint(constraints); 495 if (n == 0) { 496 isl_pw_aff *pa; 497 pa = exact_bound(domain, build, 1); 498 return isl_pw_aff_list_from_pw_aff(pa); 499 } 500 501 ctx = isl_ast_build_get_ctx(build); 502 list = isl_pw_aff_list_alloc(ctx,n); 503 504 for (i = 0; i < n; ++i) { 505 isl_aff *aff; 506 isl_constraint *c; 507 508 c = isl_constraint_list_get_constraint(constraints, i); 509 aff = isl_constraint_get_bound(c, isl_dim_set, pos); 510 isl_constraint_free(c); 511 aff = isl_aff_floor(aff); 512 list = isl_pw_aff_list_add(list, isl_pw_aff_from_aff(aff)); 513 } 514 515 return list; 516} 517 518/* Return an isl_ast_expr that performs the reduction of type "type" 519 * on AST expressions corresponding to the elements in "list". 520 * 521 * The list is assumed to contain at least one element. 522 * If the list contains exactly one element, then the returned isl_ast_expr 523 * simply computes that affine expression. 524 */ 525static __isl_give isl_ast_expr *reduce_list(enum isl_ast_op_type type, 526 __isl_keep isl_pw_aff_list *list, __isl_keep isl_ast_build *build) 527{ 528 int i, n; 529 isl_ctx *ctx; 530 isl_ast_expr *expr; 531 532 if (!list) 533 return NULL; 534 535 n = isl_pw_aff_list_n_pw_aff(list); 536 537 if (n == 1) 538 return isl_ast_build_expr_from_pw_aff_internal(build, 539 isl_pw_aff_list_get_pw_aff(list, 0)); 540 541 ctx = isl_pw_aff_list_get_ctx(list); 542 expr = isl_ast_expr_alloc_op(ctx, type, n); 543 if (!expr) 544 return NULL; 545 546 for (i = 0; i < n; ++i) { 547 isl_ast_expr *expr_i; 548 549 expr_i = isl_ast_build_expr_from_pw_aff_internal(build, 550 isl_pw_aff_list_get_pw_aff(list, i)); 551 if (!expr_i) 552 return isl_ast_expr_free(expr); 553 expr->u.op.args[i] = expr_i; 554 } 555 556 return expr; 557} 558 559/* Add a guard to "graft" based on "bound" in the case of a degenerate 560 * level (including the special case of an eliminated level). 561 * 562 * We eliminate the current dimension, simplify the result in the current 563 * build and add the result as guards to the graft. 564 * 565 * Note that we cannot simply drop the constraints on the current dimension 566 * even in the eliminated case, because the single affine expression may 567 * not be explicitly available in "bounds". Moreover, the single affine 568 * expression may only be defined on a subset of the build domain, 569 * so we do in some cases need to insert a guard even in the eliminated case. 570 */ 571static __isl_give isl_ast_graft *add_degenerate_guard( 572 __isl_take isl_ast_graft *graft, __isl_keep isl_basic_set *bounds, 573 __isl_keep isl_ast_build *build) 574{ 575 int depth; 576 isl_set *dom; 577 578 depth = isl_ast_build_get_depth(build); 579 580 dom = isl_set_from_basic_set(isl_basic_set_copy(bounds)); 581 if (isl_ast_build_has_stride(build, depth)) { 582 isl_set *stride; 583 584 stride = isl_ast_build_get_stride_constraint(build); 585 dom = isl_set_intersect(dom, stride); 586 } 587 dom = isl_set_eliminate(dom, isl_dim_set, depth, 1); 588 dom = isl_ast_build_compute_gist(build, dom); 589 590 graft = isl_ast_graft_add_guard(graft, dom, build); 591 592 return graft; 593} 594 595/* Update "graft" based on "bounds" for the eliminated case. 596 * 597 * In the eliminated case, no for node is created, so we only need 598 * to check if "bounds" imply any guards that need to be inserted. 599 */ 600static __isl_give isl_ast_graft *refine_eliminated( 601 __isl_take isl_ast_graft *graft, __isl_keep isl_basic_set *bounds, 602 __isl_keep isl_ast_build *build) 603{ 604 return add_degenerate_guard(graft, bounds, build); 605} 606 607/* Update "graft" based on "bounds" and "sub_build" for the degenerate case. 608 * 609 * "build" is the build in which graft->node was created 610 * "sub_build" contains information about the current level itself, 611 * including the single value attained. 612 * 613 * We first set the initialization part of the for loop to the single 614 * value attained by the current dimension. 615 * The increment and condition are not strictly needed as the are known 616 * to be "1" and "iterator <= value" respectively. 617 * Then we set the size of the iterator and 618 * check if "bounds" imply any guards that need to be inserted. 619 */ 620static __isl_give isl_ast_graft *refine_degenerate( 621 __isl_take isl_ast_graft *graft, __isl_keep isl_basic_set *bounds, 622 __isl_keep isl_ast_build *build, 623 __isl_keep isl_ast_build *sub_build) 624{ 625 isl_pw_aff *value; 626 627 if (!graft || !sub_build) 628 return isl_ast_graft_free(graft); 629 630 value = isl_pw_aff_copy(sub_build->value); 631 632 graft->node->u.f.init = isl_ast_build_expr_from_pw_aff_internal(build, 633 value); 634 if (!graft->node->u.f.init) 635 return isl_ast_graft_free(graft); 636 637 graft = add_degenerate_guard(graft, bounds, build); 638 639 return graft; 640} 641 642/* Return the intersection of constraints in "list" as a set. 643 */ 644static __isl_give isl_set *intersect_constraints( 645 __isl_keep isl_constraint_list *list) 646{ 647 int i, n; 648 isl_basic_set *bset; 649 650 n = isl_constraint_list_n_constraint(list); 651 if (n < 1) 652 isl_die(isl_constraint_list_get_ctx(list), isl_error_internal, 653 "expecting at least one constraint", return NULL); 654 655 bset = isl_basic_set_from_constraint( 656 isl_constraint_list_get_constraint(list, 0)); 657 for (i = 1; i < n; ++i) { 658 isl_basic_set *bset_i; 659 660 bset_i = isl_basic_set_from_constraint( 661 isl_constraint_list_get_constraint(list, i)); 662 bset = isl_basic_set_intersect(bset, bset_i); 663 } 664 665 return isl_set_from_basic_set(bset); 666} 667 668/* Compute the constraints on the outer dimensions enforced by 669 * graft->node and add those constraints to graft->enforced, 670 * in case the upper bound is expressed as a set "upper". 671 * 672 * In particular, if l(...) is a lower bound in "lower", and 673 * 674 * -a i + f(...) >= 0 or a i <= f(...) 675 * 676 * is an upper bound ocnstraint on the current dimension i, 677 * then the for loop enforces the constraint 678 * 679 * -a l(...) + f(...) >= 0 or a l(...) <= f(...) 680 * 681 * We therefore simply take each lower bound in turn, plug it into 682 * the upper bounds and compute the intersection over all lower bounds. 683 * 684 * If a lower bound is a rational expression, then 685 * isl_basic_set_preimage_multi_aff will force this rational 686 * expression to have only integer values. However, the loop 687 * itself does not enforce this integrality constraint. We therefore 688 * use the ceil of the lower bounds instead of the lower bounds themselves. 689 * Other constraints will make sure that the for loop is only executed 690 * when each of the lower bounds attains an integral value. 691 * In particular, potentially rational values only occur in 692 * lower_bound if the offset is a (seemingly) rational expression, 693 * but then outer conditions will make sure that this rational expression 694 * only attains integer values. 695 */ 696static __isl_give isl_ast_graft *set_enforced_from_set( 697 __isl_take isl_ast_graft *graft, 698 __isl_keep isl_pw_aff_list *lower, int pos, __isl_keep isl_set *upper) 699{ 700 isl_space *space; 701 isl_basic_set *enforced; 702 isl_pw_multi_aff *pma; 703 int i, n; 704 705 if (!graft || !lower) 706 return isl_ast_graft_free(graft); 707 708 space = isl_set_get_space(upper); 709 enforced = isl_basic_set_universe(isl_space_copy(space)); 710 711 space = isl_space_map_from_set(space); 712 pma = isl_pw_multi_aff_identity(space); 713 714 n = isl_pw_aff_list_n_pw_aff(lower); 715 for (i = 0; i < n; ++i) { 716 isl_pw_aff *pa; 717 isl_set *enforced_i; 718 isl_basic_set *hull; 719 isl_pw_multi_aff *pma_i; 720 721 pa = isl_pw_aff_list_get_pw_aff(lower, i); 722 pa = isl_pw_aff_ceil(pa); 723 pma_i = isl_pw_multi_aff_copy(pma); 724 pma_i = isl_pw_multi_aff_set_pw_aff(pma_i, pos, pa); 725 enforced_i = isl_set_copy(upper); 726 enforced_i = isl_set_preimage_pw_multi_aff(enforced_i, pma_i); 727 hull = isl_set_simple_hull(enforced_i); 728 enforced = isl_basic_set_intersect(enforced, hull); 729 } 730 731 isl_pw_multi_aff_free(pma); 732 733 graft = isl_ast_graft_enforce(graft, enforced); 734 735 return graft; 736} 737 738/* Compute the constraints on the outer dimensions enforced by 739 * graft->node and add those constraints to graft->enforced, 740 * in case the upper bound is expressed as 741 * a list of affine expressions "upper". 742 * 743 * The enforced condition is that each lower bound expression is less 744 * than or equal to each upper bound expression. 745 */ 746static __isl_give isl_ast_graft *set_enforced_from_list( 747 __isl_take isl_ast_graft *graft, 748 __isl_keep isl_pw_aff_list *lower, __isl_keep isl_pw_aff_list *upper) 749{ 750 isl_set *cond; 751 isl_basic_set *enforced; 752 753 lower = isl_pw_aff_list_copy(lower); 754 upper = isl_pw_aff_list_copy(upper); 755 cond = isl_pw_aff_list_le_set(lower, upper); 756 enforced = isl_set_simple_hull(cond); 757 graft = isl_ast_graft_enforce(graft, enforced); 758 759 return graft; 760} 761 762/* Does "aff" have a negative constant term? 763 */ 764static int aff_constant_is_negative(__isl_take isl_set *set, 765 __isl_take isl_aff *aff, void *user) 766{ 767 int *neg = user; 768 isl_val *v; 769 770 v = isl_aff_get_constant_val(aff); 771 *neg = isl_val_is_neg(v); 772 isl_val_free(v); 773 isl_set_free(set); 774 isl_aff_free(aff); 775 776 return *neg ? 0 : -1; 777} 778 779/* Does "pa" have a negative constant term over its entire domain? 780 */ 781static int pw_aff_constant_is_negative(__isl_take isl_pw_aff *pa, void *user) 782{ 783 int r; 784 int *neg = user; 785 786 r = isl_pw_aff_foreach_piece(pa, &aff_constant_is_negative, user); 787 isl_pw_aff_free(pa); 788 789 return *neg ? 0 : -1; 790} 791 792/* Does each element in "list" have a negative constant term? 793 * 794 * The callback terminates the iteration as soon an element has been 795 * found that does not have a negative constant term. 796 */ 797static int list_constant_is_negative(__isl_keep isl_pw_aff_list *list) 798{ 799 int neg = 1; 800 801 if (isl_pw_aff_list_foreach(list, 802 &pw_aff_constant_is_negative, &neg) < 0 && neg) 803 return -1; 804 805 return neg; 806} 807 808/* Add 1 to each of the elements in "list", where each of these elements 809 * is defined over the internal schedule space of "build". 810 */ 811static __isl_give isl_pw_aff_list *list_add_one( 812 __isl_take isl_pw_aff_list *list, __isl_keep isl_ast_build *build) 813{ 814 int i, n; 815 isl_space *space; 816 isl_aff *aff; 817 isl_pw_aff *one; 818 819 space = isl_ast_build_get_space(build, 1); 820 aff = isl_aff_zero_on_domain(isl_local_space_from_space(space)); 821 aff = isl_aff_add_constant_si(aff, 1); 822 one = isl_pw_aff_from_aff(aff); 823 824 n = isl_pw_aff_list_n_pw_aff(list); 825 for (i = 0; i < n; ++i) { 826 isl_pw_aff *pa; 827 pa = isl_pw_aff_list_get_pw_aff(list, i); 828 pa = isl_pw_aff_add(pa, isl_pw_aff_copy(one)); 829 list = isl_pw_aff_list_set_pw_aff(list, i, pa); 830 } 831 832 isl_pw_aff_free(one); 833 834 return list; 835} 836 837/* Set the condition part of the for node graft->node in case 838 * the upper bound is represented as a list of piecewise affine expressions. 839 * 840 * In particular, set the condition to 841 * 842 * iterator <= min(list of upper bounds) 843 * 844 * If each of the upper bounds has a negative constant term, then 845 * set the condition to 846 * 847 * iterator < min(list of (upper bound + 1)s) 848 * 849 */ 850static __isl_give isl_ast_graft *set_for_cond_from_list( 851 __isl_take isl_ast_graft *graft, __isl_keep isl_pw_aff_list *list, 852 __isl_keep isl_ast_build *build) 853{ 854 int neg; 855 isl_ast_expr *bound, *iterator, *cond; 856 enum isl_ast_op_type type = isl_ast_op_le; 857 858 if (!graft || !list) 859 return isl_ast_graft_free(graft); 860 861 neg = list_constant_is_negative(list); 862 if (neg < 0) 863 return isl_ast_graft_free(graft); 864 list = isl_pw_aff_list_copy(list); 865 if (neg) { 866 list = list_add_one(list, build); 867 type = isl_ast_op_lt; 868 } 869 870 bound = reduce_list(isl_ast_op_min, list, build); 871 iterator = isl_ast_expr_copy(graft->node->u.f.iterator); 872 cond = isl_ast_expr_alloc_binary(type, iterator, bound); 873 graft->node->u.f.cond = cond; 874 875 isl_pw_aff_list_free(list); 876 if (!graft->node->u.f.cond) 877 return isl_ast_graft_free(graft); 878 return graft; 879} 880 881/* Set the condition part of the for node graft->node in case 882 * the upper bound is represented as a set. 883 */ 884static __isl_give isl_ast_graft *set_for_cond_from_set( 885 __isl_take isl_ast_graft *graft, __isl_keep isl_set *set, 886 __isl_keep isl_ast_build *build) 887{ 888 isl_ast_expr *cond; 889 890 if (!graft) 891 return NULL; 892 893 cond = isl_ast_build_expr_from_set(build, isl_set_copy(set)); 894 graft->node->u.f.cond = cond; 895 if (!graft->node->u.f.cond) 896 return isl_ast_graft_free(graft); 897 return graft; 898} 899 900/* Construct an isl_ast_expr for the increment (i.e., stride) of 901 * the current dimension. 902 */ 903static __isl_give isl_ast_expr *for_inc(__isl_keep isl_ast_build *build) 904{ 905 int depth; 906 isl_val *v; 907 isl_ctx *ctx; 908 909 if (!build) 910 return NULL; 911 ctx = isl_ast_build_get_ctx(build); 912 depth = isl_ast_build_get_depth(build); 913 914 if (!isl_ast_build_has_stride(build, depth)) 915 return isl_ast_expr_alloc_int_si(ctx, 1); 916 917 v = isl_ast_build_get_stride(build, depth); 918 return isl_ast_expr_from_val(v); 919} 920 921/* Should we express the loop condition as 922 * 923 * iterator <= min(list of upper bounds) 924 * 925 * or as a conjunction of constraints? 926 * 927 * The first is constructed from a list of upper bounds. 928 * The second is constructed from a set. 929 * 930 * If there are no upper bounds in "constraints", then this could mean 931 * that "domain" simply doesn't have an upper bound or that we didn't 932 * pick any upper bound. In the first case, we want to generate the 933 * loop condition as a(n empty) conjunction of constraints 934 * In the second case, we will compute 935 * a single upper bound from "domain" and so we use the list form. 936 * 937 * If there are upper bounds in "constraints", 938 * then we use the list form iff the atomic_upper_bound option is set. 939 */ 940static int use_upper_bound_list(isl_ctx *ctx, int n_upper, 941 __isl_keep isl_set *domain, int depth) 942{ 943 if (n_upper > 0) 944 return isl_options_get_ast_build_atomic_upper_bound(ctx); 945 else 946 return isl_set_dim_has_upper_bound(domain, isl_dim_set, depth); 947} 948 949/* Fill in the expressions of the for node in graft->node. 950 * 951 * In particular, 952 * - set the initialization part of the loop to the maximum of the lower bounds 953 * - set the size of the iterator based on the values attained by the iterator 954 * - extract the increment from the stride of the current dimension 955 * - construct the for condition either based on a list of upper bounds 956 * or on a set of upper bound constraints. 957 */ 958static __isl_give isl_ast_graft *set_for_node_expressions( 959 __isl_take isl_ast_graft *graft, __isl_keep isl_pw_aff_list *lower, 960 int use_list, __isl_keep isl_pw_aff_list *upper_list, 961 __isl_keep isl_set *upper_set, __isl_keep isl_ast_build *build) 962{ 963 isl_ast_node *node; 964 965 if (!graft) 966 return NULL; 967 968 build = isl_ast_build_copy(build); 969 build = isl_ast_build_set_enforced(build, 970 isl_ast_graft_get_enforced(graft)); 971 972 node = graft->node; 973 node->u.f.init = reduce_list(isl_ast_op_max, lower, build); 974 node->u.f.inc = for_inc(build); 975 976 if (use_list) 977 graft = set_for_cond_from_list(graft, upper_list, build); 978 else 979 graft = set_for_cond_from_set(graft, upper_set, build); 980 981 isl_ast_build_free(build); 982 983 if (!node->u.f.iterator || !node->u.f.init || 984 !node->u.f.cond || !node->u.f.inc) 985 return isl_ast_graft_free(graft); 986 987 return graft; 988} 989 990/* Update "graft" based on "bounds" and "domain" for the generic, 991 * non-degenerate, case. 992 * 993 * "c_lower" and "c_upper" contain the lower and upper bounds 994 * that the loop node should express. 995 * "domain" is the subset of the intersection of the constraints 996 * for which some code is executed. 997 * 998 * There may be zero lower bounds or zero upper bounds in "constraints" 999 * in case the list of constraints was created 1000 * based on the atomic option or based on separation with explicit bounds. 1001 * In that case, we use "domain" to derive lower and/or upper bounds. 1002 * 1003 * We first compute a list of one or more lower bounds. 1004 * 1005 * Then we decide if we want to express the condition as 1006 * 1007 * iterator <= min(list of upper bounds) 1008 * 1009 * or as a conjunction of constraints. 1010 * 1011 * The set of enforced constraints is then computed either based on 1012 * a list of upper bounds or on a set of upper bound constraints. 1013 * We do not compute any enforced constraints if we were forced 1014 * to compute a lower or upper bound using exact_bound. The domains 1015 * of the resulting expressions may imply some bounds on outer dimensions 1016 * that we do not want to appear in the enforced constraints since 1017 * they are not actually enforced by the corresponding code. 1018 * 1019 * Finally, we fill in the expressions of the for node. 1020 */ 1021static __isl_give isl_ast_graft *refine_generic_bounds( 1022 __isl_take isl_ast_graft *graft, 1023 __isl_take isl_constraint_list *c_lower, 1024 __isl_take isl_constraint_list *c_upper, 1025 __isl_keep isl_set *domain, __isl_keep isl_ast_build *build) 1026{ 1027 int depth; 1028 isl_ctx *ctx; 1029 isl_pw_aff_list *lower; 1030 int use_list; 1031 isl_set *upper_set = NULL; 1032 isl_pw_aff_list *upper_list = NULL; 1033 int n_lower, n_upper; 1034 1035 if (!graft || !c_lower || !c_upper || !build) 1036 goto error; 1037 1038 depth = isl_ast_build_get_depth(build); 1039 ctx = isl_ast_graft_get_ctx(graft); 1040 1041 n_lower = isl_constraint_list_n_constraint(c_lower); 1042 n_upper = isl_constraint_list_n_constraint(c_upper); 1043 1044 use_list = use_upper_bound_list(ctx, n_upper, domain, depth); 1045 1046 lower = lower_bounds(c_lower, depth, domain, build); 1047 1048 if (use_list) 1049 upper_list = upper_bounds(c_upper, depth, domain, build); 1050 else if (n_upper > 0) 1051 upper_set = intersect_constraints(c_upper); 1052 else 1053 upper_set = isl_set_universe(isl_set_get_space(domain)); 1054 1055 if (n_lower == 0 || n_upper == 0) 1056 ; 1057 else if (use_list) 1058 graft = set_enforced_from_list(graft, lower, upper_list); 1059 else 1060 graft = set_enforced_from_set(graft, lower, depth, upper_set); 1061 1062 graft = set_for_node_expressions(graft, lower, use_list, upper_list, 1063 upper_set, build); 1064 1065 isl_pw_aff_list_free(lower); 1066 isl_pw_aff_list_free(upper_list); 1067 isl_set_free(upper_set); 1068 isl_constraint_list_free(c_lower); 1069 isl_constraint_list_free(c_upper); 1070 1071 return graft; 1072error: 1073 isl_constraint_list_free(c_lower); 1074 isl_constraint_list_free(c_upper); 1075 return isl_ast_graft_free(graft); 1076} 1077 1078/* Internal data structure used inside count_constraints to keep 1079 * track of the number of constraints that are independent of dimension "pos", 1080 * the lower bounds in "pos" and the upper bounds in "pos". 1081 */ 1082struct isl_ast_count_constraints_data { 1083 int pos; 1084 1085 int n_indep; 1086 int n_lower; 1087 int n_upper; 1088}; 1089 1090/* Increment data->n_indep, data->lower or data->upper depending 1091 * on whether "c" is independenct of dimensions data->pos, 1092 * a lower bound or an upper bound. 1093 */ 1094static int count_constraints(__isl_take isl_constraint *c, void *user) 1095{ 1096 struct isl_ast_count_constraints_data *data = user; 1097 1098 if (isl_constraint_is_lower_bound(c, isl_dim_set, data->pos)) 1099 data->n_lower++; 1100 else if (isl_constraint_is_upper_bound(c, isl_dim_set, data->pos)) 1101 data->n_upper++; 1102 else 1103 data->n_indep++; 1104 1105 isl_constraint_free(c); 1106 1107 return 0; 1108} 1109 1110/* Update "graft" based on "bounds" and "domain" for the generic, 1111 * non-degenerate, case. 1112 * 1113 * "list" respresent the list of bounds that need to be encoded by 1114 * the for loop (or a guard around the for loop). 1115 * "domain" is the subset of the intersection of the constraints 1116 * for which some code is executed. 1117 * "build" is the build in which graft->node was created. 1118 * 1119 * We separate lower bounds, upper bounds and constraints that 1120 * are independent of the loop iterator. 1121 * 1122 * The actual for loop bounds are generated in refine_generic_bounds. 1123 * If there are any constraints that are independent of the loop iterator, 1124 * we need to put a guard around the for loop (which may get hoisted up 1125 * to higher levels) and we call refine_generic_bounds in a build 1126 * where this guard is enforced. 1127 */ 1128static __isl_give isl_ast_graft *refine_generic_split( 1129 __isl_take isl_ast_graft *graft, __isl_take isl_constraint_list *list, 1130 __isl_keep isl_set *domain, __isl_keep isl_ast_build *build) 1131{ 1132 isl_ast_build *for_build; 1133 isl_set *guard; 1134 struct isl_ast_count_constraints_data data; 1135 isl_constraint_list *lower; 1136 isl_constraint_list *upper; 1137 1138 if (!list) 1139 return isl_ast_graft_free(graft); 1140 1141 data.pos = isl_ast_build_get_depth(build); 1142 1143 list = isl_constraint_list_sort(list, &cmp_constraint, &data.pos); 1144 if (!list) 1145 return isl_ast_graft_free(graft); 1146 1147 data.n_indep = data.n_lower = data.n_upper = 0; 1148 if (isl_constraint_list_foreach(list, &count_constraints, &data) < 0) { 1149 isl_constraint_list_free(list); 1150 return isl_ast_graft_free(graft); 1151 } 1152 1153 lower = isl_constraint_list_copy(list); 1154 lower = isl_constraint_list_drop(lower, 0, data.n_indep); 1155 upper = isl_constraint_list_copy(lower); 1156 lower = isl_constraint_list_drop(lower, data.n_lower, data.n_upper); 1157 upper = isl_constraint_list_drop(upper, 0, data.n_lower); 1158 1159 if (data.n_indep == 0) { 1160 isl_constraint_list_free(list); 1161 return refine_generic_bounds(graft, lower, upper, 1162 domain, build); 1163 } 1164 1165 list = isl_constraint_list_drop(list, data.n_indep, 1166 data.n_lower + data.n_upper); 1167 guard = intersect_constraints(list); 1168 isl_constraint_list_free(list); 1169 1170 for_build = isl_ast_build_copy(build); 1171 for_build = isl_ast_build_restrict_pending(for_build, 1172 isl_set_copy(guard)); 1173 graft = refine_generic_bounds(graft, lower, upper, domain, for_build); 1174 isl_ast_build_free(for_build); 1175 1176 graft = isl_ast_graft_add_guard(graft, guard, build); 1177 1178 return graft; 1179} 1180 1181/* Add the guard implied by the current stride constraint (if any), 1182 * but not (necessarily) enforced by the generated AST to "graft". 1183 */ 1184static __isl_give isl_ast_graft *add_stride_guard( 1185 __isl_take isl_ast_graft *graft, __isl_keep isl_ast_build *build) 1186{ 1187 int depth; 1188 isl_set *dom; 1189 1190 depth = isl_ast_build_get_depth(build); 1191 if (!isl_ast_build_has_stride(build, depth)) 1192 return graft; 1193 1194 dom = isl_ast_build_get_stride_constraint(build); 1195 dom = isl_set_eliminate(dom, isl_dim_set, depth, 1); 1196 dom = isl_ast_build_compute_gist(build, dom); 1197 1198 graft = isl_ast_graft_add_guard(graft, dom, build); 1199 1200 return graft; 1201} 1202 1203/* Update "graft" based on "bounds" and "domain" for the generic, 1204 * non-degenerate, case. 1205 * 1206 * "bounds" respresent the bounds that need to be encoded by 1207 * the for loop (or a guard around the for loop). 1208 * "domain" is the subset of "bounds" for which some code is executed. 1209 * "build" is the build in which graft->node was created. 1210 * 1211 * We break up "bounds" into a list of constraints and continue with 1212 * refine_generic_split. 1213 */ 1214static __isl_give isl_ast_graft *refine_generic( 1215 __isl_take isl_ast_graft *graft, 1216 __isl_keep isl_basic_set *bounds, __isl_keep isl_set *domain, 1217 __isl_keep isl_ast_build *build) 1218{ 1219 isl_constraint_list *list; 1220 1221 if (!build || !graft) 1222 return isl_ast_graft_free(graft); 1223 1224 bounds = isl_basic_set_copy(bounds); 1225 bounds = isl_ast_build_compute_gist_basic_set(build, bounds); 1226 list = isl_constraint_list_from_basic_set(bounds); 1227 1228 graft = refine_generic_split(graft, list, domain, build); 1229 graft = add_stride_guard(graft, build); 1230 1231 return graft; 1232} 1233 1234/* Create a for node for the current level. 1235 * 1236 * Mark the for node degenerate if "degenerate" is set. 1237 */ 1238static __isl_give isl_ast_node *create_for(__isl_keep isl_ast_build *build, 1239 int degenerate) 1240{ 1241 int depth; 1242 isl_id *id; 1243 isl_ast_node *node; 1244 1245 if (!build) 1246 return NULL; 1247 1248 depth = isl_ast_build_get_depth(build); 1249 id = isl_ast_build_get_iterator_id(build, depth); 1250 node = isl_ast_node_alloc_for(id); 1251 if (degenerate) 1252 node = isl_ast_node_for_mark_degenerate(node); 1253 1254 return node; 1255} 1256 1257/* Create an AST node for the current dimension based on 1258 * the schedule domain "bounds" and return the node encapsulated 1259 * in an isl_ast_graft. 1260 * 1261 * "executed" is the current inverse schedule, taking into account 1262 * the bounds in "bounds" 1263 * "domain" is the domain of "executed", with inner dimensions projected out. 1264 * It may be a strict subset of "bounds" in case "bounds" was created 1265 * based on the atomic option or based on separation with explicit bounds. 1266 * 1267 * "domain" may satisfy additional equalities that result 1268 * from intersecting "executed" with "bounds" in add_node. 1269 * It may also satisfy some global constraints that were dropped out because 1270 * we performed separation with explicit bounds. 1271 * The very first step is then to copy these constraints to "bounds". 1272 * 1273 * Since we may be calling before_each_for and after_each_for 1274 * callbacks, we record the current inverse schedule in the build. 1275 * 1276 * We consider three builds, 1277 * "build" is the one in which the current level is created, 1278 * "body_build" is the build in which the next level is created, 1279 * "sub_build" is essentially the same as "body_build", except that 1280 * the depth has not been increased yet. 1281 * 1282 * "build" already contains information (in strides and offsets) 1283 * about the strides at the current level, but this information is not 1284 * reflected in the build->domain. 1285 * We first add this information and the "bounds" to the sub_build->domain. 1286 * isl_ast_build_set_loop_bounds checks whether the current dimension attains 1287 * only a single value and whether this single value can be represented using 1288 * a single affine expression. 1289 * In the first case, the current level is considered "degenerate". 1290 * In the second, sub-case, the current level is considered "eliminated". 1291 * Eliminated level don't need to be reflected in the AST since we can 1292 * simply plug in the affine expression. For degenerate, but non-eliminated, 1293 * levels, we do introduce a for node, but mark is as degenerate so that 1294 * it can be printed as an assignment of the single value to the loop 1295 * "iterator". 1296 * 1297 * If the current level is eliminated, we explicitly plug in the value 1298 * for the current level found by isl_ast_build_set_loop_bounds in the 1299 * inverse schedule. This ensures that if we are working on a slice 1300 * of the domain based on information available in the inverse schedule 1301 * and the build domain, that then this information is also reflected 1302 * in the inverse schedule. This operation also eliminates the current 1303 * dimension from the inverse schedule making sure no inner dimensions depend 1304 * on the current dimension. Otherwise, we create a for node, marking 1305 * it degenerate if appropriate. The initial for node is still incomplete 1306 * and will be completed in either refine_degenerate or refine_generic. 1307 * 1308 * We then generate a sequence of grafts for the next level, 1309 * create a surrounding graft for the current level and insert 1310 * the for node we created (if the current level is not eliminated). 1311 * 1312 * Finally, we set the bounds of the for loop and insert guards 1313 * (either in the AST or in the graft) in one of 1314 * refine_eliminated, refine_degenerate or refine_generic. 1315 */ 1316static __isl_give isl_ast_graft *create_node_scaled( 1317 __isl_take isl_union_map *executed, 1318 __isl_take isl_basic_set *bounds, __isl_take isl_set *domain, 1319 __isl_take isl_ast_build *build) 1320{ 1321 int depth; 1322 int degenerate, eliminated; 1323 isl_basic_set *hull; 1324 isl_ast_node *node = NULL; 1325 isl_ast_graft *graft; 1326 isl_ast_graft_list *children; 1327 isl_ast_build *sub_build; 1328 isl_ast_build *body_build; 1329 1330 domain = isl_ast_build_eliminate_divs(build, domain); 1331 domain = isl_set_detect_equalities(domain); 1332 hull = isl_set_unshifted_simple_hull(isl_set_copy(domain)); 1333 bounds = isl_basic_set_intersect(bounds, hull); 1334 build = isl_ast_build_set_executed(build, isl_union_map_copy(executed)); 1335 1336 depth = isl_ast_build_get_depth(build); 1337 sub_build = isl_ast_build_copy(build); 1338 sub_build = isl_ast_build_include_stride(sub_build); 1339 sub_build = isl_ast_build_set_loop_bounds(sub_build, 1340 isl_basic_set_copy(bounds)); 1341 degenerate = isl_ast_build_has_value(sub_build); 1342 eliminated = isl_ast_build_has_affine_value(sub_build, depth); 1343 if (degenerate < 0 || eliminated < 0) 1344 executed = isl_union_map_free(executed); 1345 if (eliminated) 1346 executed = plug_in_values(executed, sub_build); 1347 else 1348 node = create_for(build, degenerate); 1349 1350 body_build = isl_ast_build_copy(sub_build); 1351 body_build = isl_ast_build_increase_depth(body_build); 1352 if (!eliminated) 1353 node = before_each_for(node, body_build); 1354 children = generate_next_level(executed, 1355 isl_ast_build_copy(body_build)); 1356 1357 graft = isl_ast_graft_alloc_level(children, build, sub_build); 1358 if (!eliminated) 1359 graft = isl_ast_graft_insert_for(graft, node); 1360 if (eliminated) 1361 graft = refine_eliminated(graft, bounds, build); 1362 else if (degenerate) 1363 graft = refine_degenerate(graft, bounds, build, sub_build); 1364 else 1365 graft = refine_generic(graft, bounds, domain, build); 1366 if (!eliminated) 1367 graft = after_each_for(graft, body_build); 1368 1369 isl_ast_build_free(body_build); 1370 isl_ast_build_free(sub_build); 1371 isl_ast_build_free(build); 1372 isl_basic_set_free(bounds); 1373 isl_set_free(domain); 1374 1375 return graft; 1376} 1377 1378/* Internal data structure for checking if all constraints involving 1379 * the input dimension "depth" are such that the other coefficients 1380 * are multiples of "m", reducing "m" if they are not. 1381 * If "m" is reduced all the way down to "1", then the check has failed 1382 * and we break out of the iteration. 1383 */ 1384struct isl_check_scaled_data { 1385 int depth; 1386 isl_val *m; 1387}; 1388 1389/* If constraint "c" involves the input dimension data->depth, 1390 * then make sure that all the other coefficients are multiples of data->m, 1391 * reducing data->m if needed. 1392 * Break out of the iteration if data->m has become equal to "1". 1393 */ 1394static int constraint_check_scaled(__isl_take isl_constraint *c, void *user) 1395{ 1396 struct isl_check_scaled_data *data = user; 1397 int i, j, n; 1398 enum isl_dim_type t[] = { isl_dim_param, isl_dim_in, isl_dim_out, 1399 isl_dim_div }; 1400 1401 if (!isl_constraint_involves_dims(c, isl_dim_in, data->depth, 1)) { 1402 isl_constraint_free(c); 1403 return 0; 1404 } 1405 1406 for (i = 0; i < 4; ++i) { 1407 n = isl_constraint_dim(c, t[i]); 1408 for (j = 0; j < n; ++j) { 1409 isl_val *d; 1410 1411 if (t[i] == isl_dim_in && j == data->depth) 1412 continue; 1413 if (!isl_constraint_involves_dims(c, t[i], j, 1)) 1414 continue; 1415 d = isl_constraint_get_coefficient_val(c, t[i], j); 1416 data->m = isl_val_gcd(data->m, d); 1417 if (isl_val_is_one(data->m)) 1418 break; 1419 } 1420 if (j < n) 1421 break; 1422 } 1423 1424 isl_constraint_free(c); 1425 1426 return i < 4 ? -1 : 0; 1427} 1428 1429/* For each constraint of "bmap" that involves the input dimension data->depth, 1430 * make sure that all the other coefficients are multiples of data->m, 1431 * reducing data->m if needed. 1432 * Break out of the iteration if data->m has become equal to "1". 1433 */ 1434static int basic_map_check_scaled(__isl_take isl_basic_map *bmap, void *user) 1435{ 1436 int r; 1437 1438 r = isl_basic_map_foreach_constraint(bmap, 1439 &constraint_check_scaled, user); 1440 isl_basic_map_free(bmap); 1441 1442 return r; 1443} 1444 1445/* For each constraint of "map" that involves the input dimension data->depth, 1446 * make sure that all the other coefficients are multiples of data->m, 1447 * reducing data->m if needed. 1448 * Break out of the iteration if data->m has become equal to "1". 1449 */ 1450static int map_check_scaled(__isl_take isl_map *map, void *user) 1451{ 1452 int r; 1453 1454 r = isl_map_foreach_basic_map(map, &basic_map_check_scaled, user); 1455 isl_map_free(map); 1456 1457 return r; 1458} 1459 1460/* Create an AST node for the current dimension based on 1461 * the schedule domain "bounds" and return the node encapsulated 1462 * in an isl_ast_graft. 1463 * 1464 * "executed" is the current inverse schedule, taking into account 1465 * the bounds in "bounds" 1466 * "domain" is the domain of "executed", with inner dimensions projected out. 1467 * 1468 * 1469 * Before moving on to the actual AST node construction in create_node_scaled, 1470 * we first check if the current dimension is strided and if we can scale 1471 * down this stride. Note that we only do this if the ast_build_scale_strides 1472 * option is set. 1473 * 1474 * In particular, let the current dimension take on values 1475 * 1476 * f + s a 1477 * 1478 * with a an integer. We check if we can find an integer m that (obviouly) 1479 * divides both f and s. 1480 * 1481 * If so, we check if the current dimension only appears in constraints 1482 * where the coefficients of the other variables are multiples of m. 1483 * We perform this extra check to avoid the risk of introducing 1484 * divisions by scaling down the current dimension. 1485 * 1486 * If so, we scale the current dimension down by a factor of m. 1487 * That is, we plug in 1488 * 1489 * i = m i' (1) 1490 * 1491 * Note that in principle we could always scale down strided loops 1492 * by plugging in 1493 * 1494 * i = f + s i' 1495 * 1496 * but this may result in i' taking on larger values than the original i, 1497 * due to the shift by "f". 1498 * By constrast, the scaling in (1) can only reduce the (absolute) value "i". 1499 */ 1500static __isl_give isl_ast_graft *create_node(__isl_take isl_union_map *executed, 1501 __isl_take isl_basic_set *bounds, __isl_take isl_set *domain, 1502 __isl_take isl_ast_build *build) 1503{ 1504 struct isl_check_scaled_data data; 1505 isl_ctx *ctx; 1506 isl_aff *offset; 1507 isl_val *d; 1508 1509 ctx = isl_ast_build_get_ctx(build); 1510 if (!isl_options_get_ast_build_scale_strides(ctx)) 1511 return create_node_scaled(executed, bounds, domain, build); 1512 1513 data.depth = isl_ast_build_get_depth(build); 1514 if (!isl_ast_build_has_stride(build, data.depth)) 1515 return create_node_scaled(executed, bounds, domain, build); 1516 1517 offset = isl_ast_build_get_offset(build, data.depth); 1518 data.m = isl_ast_build_get_stride(build, data.depth); 1519 if (!data.m) 1520 offset = isl_aff_free(offset); 1521 offset = isl_aff_scale_down_val(offset, isl_val_copy(data.m)); 1522 d = isl_aff_get_denominator_val(offset); 1523 if (!d) 1524 executed = isl_union_map_free(executed); 1525 1526 if (executed && isl_val_is_divisible_by(data.m, d)) 1527 data.m = isl_val_div(data.m, d); 1528 else { 1529 data.m = isl_val_set_si(data.m, 1); 1530 isl_val_free(d); 1531 } 1532 1533 if (!isl_val_is_one(data.m)) { 1534 if (isl_union_map_foreach_map(executed, &map_check_scaled, 1535 &data) < 0 && 1536 !isl_val_is_one(data.m)) 1537 executed = isl_union_map_free(executed); 1538 } 1539 1540 if (!isl_val_is_one(data.m)) { 1541 isl_space *space; 1542 isl_multi_aff *ma; 1543 isl_aff *aff; 1544 isl_map *map; 1545 isl_union_map *umap; 1546 1547 space = isl_ast_build_get_space(build, 1); 1548 space = isl_space_map_from_set(space); 1549 ma = isl_multi_aff_identity(space); 1550 aff = isl_multi_aff_get_aff(ma, data.depth); 1551 aff = isl_aff_scale_val(aff, isl_val_copy(data.m)); 1552 ma = isl_multi_aff_set_aff(ma, data.depth, aff); 1553 1554 bounds = isl_basic_set_preimage_multi_aff(bounds, 1555 isl_multi_aff_copy(ma)); 1556 domain = isl_set_preimage_multi_aff(domain, 1557 isl_multi_aff_copy(ma)); 1558 map = isl_map_reverse(isl_map_from_multi_aff(ma)); 1559 umap = isl_union_map_from_map(map); 1560 executed = isl_union_map_apply_domain(executed, 1561 isl_union_map_copy(umap)); 1562 build = isl_ast_build_scale_down(build, isl_val_copy(data.m), 1563 umap); 1564 } 1565 isl_aff_free(offset); 1566 isl_val_free(data.m); 1567 1568 return create_node_scaled(executed, bounds, domain, build); 1569} 1570 1571/* Add the basic set to the list that "user" points to. 1572 */ 1573static int collect_basic_set(__isl_take isl_basic_set *bset, void *user) 1574{ 1575 isl_basic_set_list **list = user; 1576 1577 *list = isl_basic_set_list_add(*list, bset); 1578 1579 return 0; 1580} 1581 1582/* Extract the basic sets of "set" and collect them in an isl_basic_set_list. 1583 */ 1584static __isl_give isl_basic_set_list *isl_basic_set_list_from_set( 1585 __isl_take isl_set *set) 1586{ 1587 int n; 1588 isl_ctx *ctx; 1589 isl_basic_set_list *list; 1590 1591 if (!set) 1592 return NULL; 1593 1594 ctx = isl_set_get_ctx(set); 1595 1596 n = isl_set_n_basic_set(set); 1597 list = isl_basic_set_list_alloc(ctx, n); 1598 if (isl_set_foreach_basic_set(set, &collect_basic_set, &list) < 0) 1599 list = isl_basic_set_list_free(list); 1600 1601 isl_set_free(set); 1602 return list; 1603} 1604 1605/* Generate code for the schedule domain "bounds" 1606 * and add the result to "list". 1607 * 1608 * We mainly detect strides and additional equalities here 1609 * and then pass over control to create_node. 1610 * 1611 * "bounds" reflects the bounds on the current dimension and possibly 1612 * some extra conditions on outer dimensions. 1613 * It does not, however, include any divs involving the current dimension, 1614 * so it does not capture any stride constraints. 1615 * We therefore need to compute that part of the schedule domain that 1616 * intersects with "bounds" and derive the strides from the result. 1617 */ 1618static __isl_give isl_ast_graft_list *add_node( 1619 __isl_take isl_ast_graft_list *list, __isl_take isl_union_map *executed, 1620 __isl_take isl_basic_set *bounds, __isl_take isl_ast_build *build) 1621{ 1622 isl_ast_graft *graft; 1623 isl_set *domain = NULL; 1624 isl_union_set *uset; 1625 int empty; 1626 1627 uset = isl_union_set_from_basic_set(isl_basic_set_copy(bounds)); 1628 executed = isl_union_map_intersect_domain(executed, uset); 1629 empty = isl_union_map_is_empty(executed); 1630 if (empty < 0) 1631 goto error; 1632 if (empty) 1633 goto done; 1634 1635 uset = isl_union_map_domain(isl_union_map_copy(executed)); 1636 domain = isl_set_from_union_set(uset); 1637 domain = isl_ast_build_compute_gist(build, domain); 1638 empty = isl_set_is_empty(domain); 1639 if (empty < 0) 1640 goto error; 1641 if (empty) 1642 goto done; 1643 1644 domain = isl_ast_build_eliminate_inner(build, domain); 1645 build = isl_ast_build_detect_strides(build, isl_set_copy(domain)); 1646 1647 graft = create_node(executed, bounds, domain, 1648 isl_ast_build_copy(build)); 1649 list = isl_ast_graft_list_add(list, graft); 1650 isl_ast_build_free(build); 1651 return list; 1652error: 1653 list = isl_ast_graft_list_free(list); 1654done: 1655 isl_set_free(domain); 1656 isl_basic_set_free(bounds); 1657 isl_union_map_free(executed); 1658 isl_ast_build_free(build); 1659 return list; 1660} 1661 1662/* Does any element of i follow or coincide with any element of j 1663 * at the current depth for equal values of the outer dimensions? 1664 */ 1665static int domain_follows_at_depth(__isl_keep isl_basic_set *i, 1666 __isl_keep isl_basic_set *j, void *user) 1667{ 1668 int depth = *(int *) user; 1669 isl_basic_map *test; 1670 int empty; 1671 int l; 1672 1673 test = isl_basic_map_from_domain_and_range(isl_basic_set_copy(i), 1674 isl_basic_set_copy(j)); 1675 for (l = 0; l < depth; ++l) 1676 test = isl_basic_map_equate(test, isl_dim_in, l, 1677 isl_dim_out, l); 1678 test = isl_basic_map_order_ge(test, isl_dim_in, depth, 1679 isl_dim_out, depth); 1680 empty = isl_basic_map_is_empty(test); 1681 isl_basic_map_free(test); 1682 1683 return empty < 0 ? -1 : !empty; 1684} 1685 1686/* Split up each element of "list" into a part that is related to "bset" 1687 * according to "gt" and a part that is not. 1688 * Return a list that consist of "bset" and all the pieces. 1689 */ 1690static __isl_give isl_basic_set_list *add_split_on( 1691 __isl_take isl_basic_set_list *list, __isl_take isl_basic_set *bset, 1692 __isl_keep isl_basic_map *gt) 1693{ 1694 int i, n; 1695 isl_basic_set_list *res; 1696 1697 gt = isl_basic_map_copy(gt); 1698 gt = isl_basic_map_intersect_domain(gt, isl_basic_set_copy(bset)); 1699 n = isl_basic_set_list_n_basic_set(list); 1700 res = isl_basic_set_list_from_basic_set(bset); 1701 for (i = 0; res && i < n; ++i) { 1702 isl_basic_set *bset; 1703 isl_set *set1, *set2; 1704 isl_basic_map *bmap; 1705 int empty; 1706 1707 bset = isl_basic_set_list_get_basic_set(list, i); 1708 bmap = isl_basic_map_copy(gt); 1709 bmap = isl_basic_map_intersect_range(bmap, bset); 1710 bset = isl_basic_map_range(bmap); 1711 empty = isl_basic_set_is_empty(bset); 1712 if (empty < 0) 1713 res = isl_basic_set_list_free(res); 1714 if (empty) { 1715 isl_basic_set_free(bset); 1716 bset = isl_basic_set_list_get_basic_set(list, i); 1717 res = isl_basic_set_list_add(res, bset); 1718 continue; 1719 } 1720 1721 res = isl_basic_set_list_add(res, isl_basic_set_copy(bset)); 1722 set1 = isl_set_from_basic_set(bset); 1723 bset = isl_basic_set_list_get_basic_set(list, i); 1724 set2 = isl_set_from_basic_set(bset); 1725 set1 = isl_set_subtract(set2, set1); 1726 set1 = isl_set_make_disjoint(set1); 1727 1728 res = isl_basic_set_list_concat(res, 1729 isl_basic_set_list_from_set(set1)); 1730 } 1731 isl_basic_map_free(gt); 1732 isl_basic_set_list_free(list); 1733 return res; 1734} 1735 1736static __isl_give isl_ast_graft_list *generate_sorted_domains( 1737 __isl_keep isl_basic_set_list *domain_list, 1738 __isl_keep isl_union_map *executed, 1739 __isl_keep isl_ast_build *build); 1740 1741/* Internal data structure for add_nodes. 1742 * 1743 * "executed" and "build" are extra arguments to be passed to add_node. 1744 * "list" collects the results. 1745 */ 1746struct isl_add_nodes_data { 1747 isl_union_map *executed; 1748 isl_ast_build *build; 1749 1750 isl_ast_graft_list *list; 1751}; 1752 1753/* Generate code for the schedule domains in "scc" 1754 * and add the results to "list". 1755 * 1756 * The domains in "scc" form a strongly connected component in the ordering. 1757 * If the number of domains in "scc" is larger than 1, then this means 1758 * that we cannot determine a valid ordering for the domains in the component. 1759 * This should be fairly rare because the individual domains 1760 * have been made disjoint first. 1761 * The problem is that the domains may be integrally disjoint but not 1762 * rationally disjoint. For example, we may have domains 1763 * 1764 * { [i,i] : 0 <= i <= 1 } and { [i,1-i] : 0 <= i <= 1 } 1765 * 1766 * These two domains have an empty intersection, but their rational 1767 * relaxations do intersect. It is impossible to order these domains 1768 * in the second dimension because the first should be ordered before 1769 * the second for outer dimension equal to 0, while it should be ordered 1770 * after for outer dimension equal to 1. 1771 * 1772 * This may happen in particular in case of unrolling since the domain 1773 * of each slice is replaced by its simple hull. 1774 * 1775 * For each basic set i in "scc" and for each of the following basic sets j, 1776 * we split off that part of the basic set i that shares the outer dimensions 1777 * with j and lies before j in the current dimension. 1778 * We collect all the pieces in a new list that replaces "scc". 1779 */ 1780static int add_nodes(__isl_take isl_basic_set_list *scc, void *user) 1781{ 1782 struct isl_add_nodes_data *data = user; 1783 int i, n, depth; 1784 isl_basic_set *bset; 1785 isl_basic_set_list *list; 1786 isl_space *space; 1787 isl_basic_map *gt; 1788 1789 n = isl_basic_set_list_n_basic_set(scc); 1790 bset = isl_basic_set_list_get_basic_set(scc, 0); 1791 if (n == 1) { 1792 isl_basic_set_list_free(scc); 1793 data->list = add_node(data->list, 1794 isl_union_map_copy(data->executed), bset, 1795 isl_ast_build_copy(data->build)); 1796 return data->list ? 0 : -1; 1797 } 1798 1799 depth = isl_ast_build_get_depth(data->build); 1800 space = isl_basic_set_get_space(bset); 1801 space = isl_space_map_from_set(space); 1802 gt = isl_basic_map_universe(space); 1803 for (i = 0; i < depth; ++i) 1804 gt = isl_basic_map_equate(gt, isl_dim_in, i, isl_dim_out, i); 1805 gt = isl_basic_map_order_gt(gt, isl_dim_in, depth, isl_dim_out, depth); 1806 1807 list = isl_basic_set_list_from_basic_set(bset); 1808 for (i = 1; i < n; ++i) { 1809 bset = isl_basic_set_list_get_basic_set(scc, i); 1810 list = add_split_on(list, bset, gt); 1811 } 1812 isl_basic_map_free(gt); 1813 isl_basic_set_list_free(scc); 1814 scc = list; 1815 data->list = isl_ast_graft_list_concat(data->list, 1816 generate_sorted_domains(scc, data->executed, data->build)); 1817 isl_basic_set_list_free(scc); 1818 1819 return data->list ? 0 : -1; 1820} 1821 1822/* Sort the domains in "domain_list" according to the execution order 1823 * at the current depth (for equal values of the outer dimensions), 1824 * generate code for each of them, collecting the results in a list. 1825 * If no code is generated (because the intersection of the inverse schedule 1826 * with the domains turns out to be empty), then an empty list is returned. 1827 * 1828 * The caller is responsible for ensuring that the basic sets in "domain_list" 1829 * are pair-wise disjoint. It can, however, in principle happen that 1830 * two basic sets should be ordered one way for one value of the outer 1831 * dimensions and the other way for some other value of the outer dimensions. 1832 * We therefore play safe and look for strongly connected components. 1833 * The function add_nodes takes care of handling non-trivial components. 1834 */ 1835static __isl_give isl_ast_graft_list *generate_sorted_domains( 1836 __isl_keep isl_basic_set_list *domain_list, 1837 __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build) 1838{ 1839 isl_ctx *ctx; 1840 struct isl_add_nodes_data data; 1841 int depth; 1842 int n; 1843 1844 if (!domain_list) 1845 return NULL; 1846 1847 ctx = isl_basic_set_list_get_ctx(domain_list); 1848 n = isl_basic_set_list_n_basic_set(domain_list); 1849 data.list = isl_ast_graft_list_alloc(ctx, n); 1850 if (n == 0) 1851 return data.list; 1852 if (n == 1) 1853 return add_node(data.list, isl_union_map_copy(executed), 1854 isl_basic_set_list_get_basic_set(domain_list, 0), 1855 isl_ast_build_copy(build)); 1856 1857 depth = isl_ast_build_get_depth(build); 1858 data.executed = executed; 1859 data.build = build; 1860 if (isl_basic_set_list_foreach_scc(domain_list, 1861 &domain_follows_at_depth, &depth, 1862 &add_nodes, &data) < 0) 1863 data.list = isl_ast_graft_list_free(data.list); 1864 1865 return data.list; 1866} 1867 1868/* Do i and j share any values for the outer dimensions? 1869 */ 1870static int shared_outer(__isl_keep isl_basic_set *i, 1871 __isl_keep isl_basic_set *j, void *user) 1872{ 1873 int depth = *(int *) user; 1874 isl_basic_map *test; 1875 int empty; 1876 int l; 1877 1878 test = isl_basic_map_from_domain_and_range(isl_basic_set_copy(i), 1879 isl_basic_set_copy(j)); 1880 for (l = 0; l < depth; ++l) 1881 test = isl_basic_map_equate(test, isl_dim_in, l, 1882 isl_dim_out, l); 1883 empty = isl_basic_map_is_empty(test); 1884 isl_basic_map_free(test); 1885 1886 return empty < 0 ? -1 : !empty; 1887} 1888 1889/* Internal data structure for generate_sorted_domains_wrap. 1890 * 1891 * "n" is the total number of basic sets 1892 * "executed" and "build" are extra arguments to be passed 1893 * to generate_sorted_domains. 1894 * 1895 * "single" is set to 1 by generate_sorted_domains_wrap if there 1896 * is only a single component. 1897 * "list" collects the results. 1898 */ 1899struct isl_ast_generate_parallel_domains_data { 1900 int n; 1901 isl_union_map *executed; 1902 isl_ast_build *build; 1903 1904 int single; 1905 isl_ast_graft_list *list; 1906}; 1907 1908/* Call generate_sorted_domains on "scc", fuse the result into a list 1909 * with either zero or one graft and collect the these single element 1910 * lists into data->list. 1911 * 1912 * If there is only one component, i.e., if the number of basic sets 1913 * in the current component is equal to the total number of basic sets, 1914 * then data->single is set to 1 and the result of generate_sorted_domains 1915 * is not fused. 1916 */ 1917static int generate_sorted_domains_wrap(__isl_take isl_basic_set_list *scc, 1918 void *user) 1919{ 1920 struct isl_ast_generate_parallel_domains_data *data = user; 1921 isl_ast_graft_list *list; 1922 1923 list = generate_sorted_domains(scc, data->executed, data->build); 1924 data->single = isl_basic_set_list_n_basic_set(scc) == data->n; 1925 if (!data->single) 1926 list = isl_ast_graft_list_fuse(list, data->build); 1927 if (!data->list) 1928 data->list = list; 1929 else 1930 data->list = isl_ast_graft_list_concat(data->list, list); 1931 1932 isl_basic_set_list_free(scc); 1933 if (!data->list) 1934 return -1; 1935 1936 return 0; 1937} 1938 1939/* Look for any (weakly connected) components in the "domain_list" 1940 * of domains that share some values of the outer dimensions. 1941 * That is, domains in different components do not share any values 1942 * of the outer dimensions. This means that these components 1943 * can be freely reordered. 1944 * Within each of the components, we sort the domains according 1945 * to the execution order at the current depth. 1946 * 1947 * If there is more than one component, then generate_sorted_domains_wrap 1948 * fuses the result of each call to generate_sorted_domains 1949 * into a list with either zero or one graft and collects these (at most) 1950 * single element lists into a bigger list. This means that the elements of the 1951 * final list can be freely reordered. In particular, we sort them 1952 * according to an arbitrary but fixed ordering to ease merging of 1953 * graft lists from different components. 1954 */ 1955static __isl_give isl_ast_graft_list *generate_parallel_domains( 1956 __isl_keep isl_basic_set_list *domain_list, 1957 __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build) 1958{ 1959 int depth; 1960 struct isl_ast_generate_parallel_domains_data data; 1961 1962 if (!domain_list) 1963 return NULL; 1964 1965 data.n = isl_basic_set_list_n_basic_set(domain_list); 1966 if (data.n <= 1) 1967 return generate_sorted_domains(domain_list, executed, build); 1968 1969 depth = isl_ast_build_get_depth(build); 1970 data.list = NULL; 1971 data.executed = executed; 1972 data.build = build; 1973 data.single = 0; 1974 if (isl_basic_set_list_foreach_scc(domain_list, &shared_outer, &depth, 1975 &generate_sorted_domains_wrap, 1976 &data) < 0) 1977 data.list = isl_ast_graft_list_free(data.list); 1978 1979 if (!data.single) 1980 data.list = isl_ast_graft_list_sort_guard(data.list); 1981 1982 return data.list; 1983} 1984 1985/* Internal data for separate_domain. 1986 * 1987 * "explicit" is set if we only want to use explicit bounds. 1988 * 1989 * "domain" collects the separated domains. 1990 */ 1991struct isl_separate_domain_data { 1992 isl_ast_build *build; 1993 int explicit; 1994 isl_set *domain; 1995}; 1996 1997/* Extract implicit bounds on the current dimension for the executed "map". 1998 * 1999 * The domain of "map" may involve inner dimensions, so we 2000 * need to eliminate them. 2001 */ 2002static __isl_give isl_set *implicit_bounds(__isl_take isl_map *map, 2003 __isl_keep isl_ast_build *build) 2004{ 2005 isl_set *domain; 2006 2007 domain = isl_map_domain(map); 2008 domain = isl_ast_build_eliminate(build, domain); 2009 2010 return domain; 2011} 2012 2013/* Extract explicit bounds on the current dimension for the executed "map". 2014 * 2015 * Rather than eliminating the inner dimensions as in implicit_bounds, 2016 * we simply drop any constraints involving those inner dimensions. 2017 * The idea is that most bounds that are implied by constraints on the 2018 * inner dimensions will be enforced by for loops and not by explicit guards. 2019 * There is then no need to separate along those bounds. 2020 */ 2021static __isl_give isl_set *explicit_bounds(__isl_take isl_map *map, 2022 __isl_keep isl_ast_build *build) 2023{ 2024 isl_set *domain; 2025 int depth, dim; 2026 2027 dim = isl_map_dim(map, isl_dim_out); 2028 map = isl_map_drop_constraints_involving_dims(map, isl_dim_out, 0, dim); 2029 2030 domain = isl_map_domain(map); 2031 depth = isl_ast_build_get_depth(build); 2032 dim = isl_set_dim(domain, isl_dim_set); 2033 domain = isl_set_detect_equalities(domain); 2034 domain = isl_set_drop_constraints_involving_dims(domain, 2035 isl_dim_set, depth + 1, dim - (depth + 1)); 2036 domain = isl_set_remove_divs_involving_dims(domain, 2037 isl_dim_set, depth, 1); 2038 domain = isl_set_remove_unknown_divs(domain); 2039 2040 return domain; 2041} 2042 2043/* Split data->domain into pieces that intersect with the range of "map" 2044 * and pieces that do not intersect with the range of "map" 2045 * and then add that part of the range of "map" that does not intersect 2046 * with data->domain. 2047 */ 2048static int separate_domain(__isl_take isl_map *map, void *user) 2049{ 2050 struct isl_separate_domain_data *data = user; 2051 isl_set *domain; 2052 isl_set *d1, *d2; 2053 2054 if (data->explicit) 2055 domain = explicit_bounds(map, data->build); 2056 else 2057 domain = implicit_bounds(map, data->build); 2058 2059 domain = isl_set_coalesce(domain); 2060 domain = isl_set_make_disjoint(domain); 2061 d1 = isl_set_subtract(isl_set_copy(domain), isl_set_copy(data->domain)); 2062 d2 = isl_set_subtract(isl_set_copy(data->domain), isl_set_copy(domain)); 2063 data->domain = isl_set_intersect(data->domain, domain); 2064 data->domain = isl_set_union(data->domain, d1); 2065 data->domain = isl_set_union(data->domain, d2); 2066 2067 return 0; 2068} 2069 2070/* Separate the schedule domains of "executed". 2071 * 2072 * That is, break up the domain of "executed" into basic sets, 2073 * such that for each basic set S, every element in S is associated with 2074 * the same domain spaces. 2075 * 2076 * "space" is the (single) domain space of "executed". 2077 */ 2078static __isl_give isl_set *separate_schedule_domains( 2079 __isl_take isl_space *space, __isl_take isl_union_map *executed, 2080 __isl_keep isl_ast_build *build) 2081{ 2082 struct isl_separate_domain_data data = { build }; 2083 isl_ctx *ctx; 2084 2085 ctx = isl_ast_build_get_ctx(build); 2086 data.explicit = isl_options_get_ast_build_separation_bounds(ctx) == 2087 ISL_AST_BUILD_SEPARATION_BOUNDS_EXPLICIT; 2088 data.domain = isl_set_empty(space); 2089 if (isl_union_map_foreach_map(executed, &separate_domain, &data) < 0) 2090 data.domain = isl_set_free(data.domain); 2091 2092 isl_union_map_free(executed); 2093 return data.domain; 2094} 2095 2096/* Temporary data used during the search for a lower bound for unrolling. 2097 * 2098 * "domain" is the original set for which to find a lower bound 2099 * "depth" is the dimension for which to find a lower boudn 2100 * 2101 * "lower" is the best lower bound found so far. It is NULL if we have not 2102 * found any yet. 2103 * "n" is the corresponding size. If lower is NULL, then the value of n 2104 * is undefined. 2105 */ 2106struct isl_find_unroll_data { 2107 isl_set *domain; 2108 int depth; 2109 2110 isl_aff *lower; 2111 int *n; 2112}; 2113 2114/* Check if we can use "c" as a lower bound and if it is better than 2115 * any previously found lower bound. 2116 * 2117 * If "c" does not involve the dimension at the current depth, 2118 * then we cannot use it. 2119 * Otherwise, let "c" be of the form 2120 * 2121 * i >= f(j)/a 2122 * 2123 * We compute the maximal value of 2124 * 2125 * -ceil(f(j)/a)) + i + 1 2126 * 2127 * over the domain. If there is such a value "n", then we know 2128 * 2129 * -ceil(f(j)/a)) + i + 1 <= n 2130 * 2131 * or 2132 * 2133 * i < ceil(f(j)/a)) + n 2134 * 2135 * meaning that we can use ceil(f(j)/a)) as a lower bound for unrolling. 2136 * We just need to check if we have found any lower bound before and 2137 * if the new lower bound is better (smaller n) than the previously found 2138 * lower bounds. 2139 */ 2140static int update_unrolling_lower_bound(struct isl_find_unroll_data *data, 2141 __isl_keep isl_constraint *c) 2142{ 2143 isl_aff *aff, *lower; 2144 isl_val *max; 2145 2146 if (!isl_constraint_is_lower_bound(c, isl_dim_set, data->depth)) 2147 return 0; 2148 2149 lower = isl_constraint_get_bound(c, isl_dim_set, data->depth); 2150 lower = isl_aff_ceil(lower); 2151 aff = isl_aff_copy(lower); 2152 aff = isl_aff_neg(aff); 2153 aff = isl_aff_add_coefficient_si(aff, isl_dim_in, data->depth, 1); 2154 aff = isl_aff_add_constant_si(aff, 1); 2155 max = isl_set_max_val(data->domain, aff); 2156 isl_aff_free(aff); 2157 2158 if (!max) 2159 goto error; 2160 if (isl_val_is_infty(max)) { 2161 isl_val_free(max); 2162 isl_aff_free(lower); 2163 return 0; 2164 } 2165 2166 if (isl_val_cmp_si(max, INT_MAX) <= 0 && 2167 (!data->lower || isl_val_cmp_si(max, *data->n) < 0)) { 2168 isl_aff_free(data->lower); 2169 data->lower = lower; 2170 *data->n = isl_val_get_num_si(max); 2171 } else 2172 isl_aff_free(lower); 2173 isl_val_free(max); 2174 2175 return 1; 2176error: 2177 isl_aff_free(lower); 2178 return -1; 2179} 2180 2181/* Check if we can use "c" as a lower bound and if it is better than 2182 * any previously found lower bound. 2183 */ 2184static int constraint_find_unroll(__isl_take isl_constraint *c, void *user) 2185{ 2186 struct isl_find_unroll_data *data; 2187 int r; 2188 2189 data = (struct isl_find_unroll_data *) user; 2190 r = update_unrolling_lower_bound(data, c); 2191 isl_constraint_free(c); 2192 2193 return r; 2194} 2195 2196/* Look for a lower bound l(i) on the dimension at "depth" 2197 * and a size n such that "domain" is a subset of 2198 * 2199 * { [i] : l(i) <= i_d < l(i) + n } 2200 * 2201 * where d is "depth" and l(i) depends only on earlier dimensions. 2202 * Furthermore, try and find a lower bound such that n is as small as possible. 2203 * In particular, "n" needs to be finite. 2204 * 2205 * Inner dimensions have been eliminated from "domain" by the caller. 2206 * 2207 * We first construct a collection of lower bounds on the input set 2208 * by computing its simple hull. We then iterate through them, 2209 * discarding those that we cannot use (either because they do not 2210 * involve the dimension at "depth" or because they have no corresponding 2211 * upper bound, meaning that "n" would be unbounded) and pick out the 2212 * best from the remaining ones. 2213 * 2214 * If we cannot find a suitable lower bound, then we consider that 2215 * to be an error. 2216 */ 2217static __isl_give isl_aff *find_unroll_lower_bound(__isl_keep isl_set *domain, 2218 int depth, int *n) 2219{ 2220 struct isl_find_unroll_data data = { domain, depth, NULL, n }; 2221 isl_basic_set *hull; 2222 2223 hull = isl_set_simple_hull(isl_set_copy(domain)); 2224 2225 if (isl_basic_set_foreach_constraint(hull, 2226 &constraint_find_unroll, &data) < 0) 2227 goto error; 2228 2229 isl_basic_set_free(hull); 2230 2231 if (!data.lower) 2232 isl_die(isl_set_get_ctx(domain), isl_error_invalid, 2233 "cannot find lower bound for unrolling", return NULL); 2234 2235 return data.lower; 2236error: 2237 isl_basic_set_free(hull); 2238 return isl_aff_free(data.lower); 2239} 2240 2241/* Return the constraint 2242 * 2243 * i_"depth" = aff + offset 2244 */ 2245static __isl_give isl_constraint *at_offset(int depth, __isl_keep isl_aff *aff, 2246 int offset) 2247{ 2248 aff = isl_aff_copy(aff); 2249 aff = isl_aff_add_coefficient_si(aff, isl_dim_in, depth, -1); 2250 aff = isl_aff_add_constant_si(aff, offset); 2251 return isl_equality_from_aff(aff); 2252} 2253 2254/* Data structure for storing the results and the intermediate objects 2255 * of compute_domains. 2256 * 2257 * "list" is the main result of the function and contains a list 2258 * of disjoint basic sets for which code should be generated. 2259 * 2260 * "executed" and "build" are inputs to compute_domains. 2261 * "schedule_domain" is the domain of "executed". 2262 * 2263 * "option" constains the domains at the current depth that should by 2264 * atomic, separated or unrolled. These domains are as specified by 2265 * the user, except that inner dimensions have been eliminated and 2266 * that they have been made pair-wise disjoint. 2267 * 2268 * "sep_class" contains the user-specified split into separation classes 2269 * specialized to the current depth. 2270 * "done" contains the union of the separation domains that have already 2271 * been handled. 2272 */ 2273struct isl_codegen_domains { 2274 isl_basic_set_list *list; 2275 2276 isl_union_map *executed; 2277 isl_ast_build *build; 2278 isl_set *schedule_domain; 2279 2280 isl_set *option[3]; 2281 2282 isl_map *sep_class; 2283 isl_set *done; 2284}; 2285 2286/* Extend domains->list with a list of basic sets, one for each value 2287 * of the current dimension in "domain" and remove the corresponding 2288 * sets from the class domain. Return the updated class domain. 2289 * The divs that involve the current dimension have not been projected out 2290 * from this domain. 2291 * 2292 * Since we are going to be iterating over the individual values, 2293 * we first check if there are any strides on the current dimension. 2294 * If there is, we rewrite the current dimension i as 2295 * 2296 * i = stride i' + offset 2297 * 2298 * and then iterate over individual values of i' instead. 2299 * 2300 * We then look for a lower bound on i' and a size such that the domain 2301 * is a subset of 2302 * 2303 * { [j,i'] : l(j) <= i' < l(j) + n } 2304 * 2305 * and then take slices of the domain at values of i' 2306 * between l(j) and l(j) + n - 1. 2307 * 2308 * We compute the unshifted simple hull of each slice to ensure that 2309 * we have a single basic set per offset. The slicing constraint 2310 * may get simplified away before the unshifted simple hull is taken 2311 * and may therefore in some rare cases disappear from the result. 2312 * We therefore explicitly add the constraint back after computing 2313 * the unshifted simple hull to ensure that the basic sets 2314 * remain disjoint. The constraints that are dropped by taking the hull 2315 * will be taken into account at the next level, as in the case of the 2316 * atomic option. 2317 * 2318 * Finally, we map i' back to i and add each basic set to the list. 2319 * Since we may have dropped some constraints, we intersect with 2320 * the class domain again to ensure that each element in the list 2321 * is disjoint from the other class domains. 2322 */ 2323static __isl_give isl_set *do_unroll(struct isl_codegen_domains *domains, 2324 __isl_take isl_set *domain, __isl_take isl_set *class_domain) 2325{ 2326 int i, n; 2327 int depth; 2328 isl_ctx *ctx; 2329 isl_aff *lower; 2330 isl_multi_aff *expansion; 2331 isl_basic_map *bmap; 2332 isl_set *unroll_domain; 2333 isl_ast_build *build; 2334 2335 if (!domain) 2336 return isl_set_free(class_domain); 2337 2338 ctx = isl_set_get_ctx(domain); 2339 depth = isl_ast_build_get_depth(domains->build); 2340 build = isl_ast_build_copy(domains->build); 2341 domain = isl_ast_build_eliminate_inner(build, domain); 2342 build = isl_ast_build_detect_strides(build, isl_set_copy(domain)); 2343 expansion = isl_ast_build_get_stride_expansion(build); 2344 2345 domain = isl_set_preimage_multi_aff(domain, 2346 isl_multi_aff_copy(expansion)); 2347 domain = isl_ast_build_eliminate_divs(build, domain); 2348 2349 isl_ast_build_free(build); 2350 2351 lower = find_unroll_lower_bound(domain, depth, &n); 2352 if (!lower) 2353 class_domain = isl_set_free(class_domain); 2354 2355 bmap = isl_basic_map_from_multi_aff(expansion); 2356 2357 unroll_domain = isl_set_empty(isl_set_get_space(domain)); 2358 2359 for (i = 0; class_domain && i < n; ++i) { 2360 isl_set *set; 2361 isl_basic_set *bset; 2362 isl_constraint *slice; 2363 isl_basic_set_list *list; 2364 2365 slice = at_offset(depth, lower, i); 2366 set = isl_set_copy(domain); 2367 set = isl_set_add_constraint(set, isl_constraint_copy(slice)); 2368 bset = isl_set_unshifted_simple_hull(set); 2369 bset = isl_basic_set_add_constraint(bset, slice); 2370 bset = isl_basic_set_apply(bset, isl_basic_map_copy(bmap)); 2371 set = isl_set_from_basic_set(bset); 2372 unroll_domain = isl_set_union(unroll_domain, isl_set_copy(set)); 2373 set = isl_set_intersect(set, isl_set_copy(class_domain)); 2374 set = isl_set_make_disjoint(set); 2375 list = isl_basic_set_list_from_set(set); 2376 domains->list = isl_basic_set_list_concat(domains->list, list); 2377 } 2378 2379 class_domain = isl_set_subtract(class_domain, unroll_domain); 2380 2381 isl_aff_free(lower); 2382 isl_set_free(domain); 2383 isl_basic_map_free(bmap); 2384 2385 return class_domain; 2386} 2387 2388/* Add domains to domains->list for each individual value of the current 2389 * dimension, for that part of the schedule domain that lies in the 2390 * intersection of the option domain and the class domain. 2391 * Remove the corresponding sets from the class domain and 2392 * return the updated class domain. 2393 * 2394 * We first break up the unroll option domain into individual pieces 2395 * and then handle each of them separately. The unroll option domain 2396 * has been made disjoint in compute_domains_init_options, 2397 * 2398 * Note that we actively want to combine different pieces of the 2399 * schedule domain that have the same value at the current dimension. 2400 * We therefore need to break up the unroll option domain before 2401 * intersecting with class and schedule domain, hoping that the 2402 * unroll option domain specified by the user is relatively simple. 2403 */ 2404static __isl_give isl_set *compute_unroll_domains( 2405 struct isl_codegen_domains *domains, __isl_take isl_set *class_domain) 2406{ 2407 isl_set *unroll_domain; 2408 isl_basic_set_list *unroll_list; 2409 int i, n; 2410 int empty; 2411 2412 empty = isl_set_is_empty(domains->option[unroll]); 2413 if (empty < 0) 2414 return isl_set_free(class_domain); 2415 if (empty) 2416 return class_domain; 2417 2418 unroll_domain = isl_set_copy(domains->option[unroll]); 2419 unroll_list = isl_basic_set_list_from_set(unroll_domain); 2420 2421 n = isl_basic_set_list_n_basic_set(unroll_list); 2422 for (i = 0; i < n; ++i) { 2423 isl_basic_set *bset; 2424 2425 bset = isl_basic_set_list_get_basic_set(unroll_list, i); 2426 unroll_domain = isl_set_from_basic_set(bset); 2427 unroll_domain = isl_set_intersect(unroll_domain, 2428 isl_set_copy(class_domain)); 2429 unroll_domain = isl_set_intersect(unroll_domain, 2430 isl_set_copy(domains->schedule_domain)); 2431 2432 empty = isl_set_is_empty(unroll_domain); 2433 if (empty >= 0 && empty) { 2434 isl_set_free(unroll_domain); 2435 continue; 2436 } 2437 2438 class_domain = do_unroll(domains, unroll_domain, class_domain); 2439 } 2440 2441 isl_basic_set_list_free(unroll_list); 2442 2443 return class_domain; 2444} 2445 2446/* Try and construct a single basic set that includes the intersection of 2447 * the schedule domain, the atomic option domain and the class domain. 2448 * Add the resulting basic set(s) to domains->list and remove them 2449 * from class_domain. Return the updated class domain. 2450 * 2451 * We construct a single domain rather than trying to combine 2452 * the schedule domains of individual domains because we are working 2453 * within a single component so that non-overlapping schedule domains 2454 * should already have been separated. 2455 * We do however need to make sure that this single domains is a subset 2456 * of the class domain so that it would not intersect with any other 2457 * class domains. This means that we may end up splitting up the atomic 2458 * domain in case separation classes are being used. 2459 * 2460 * "domain" is the intersection of the schedule domain and the class domain, 2461 * with inner dimensions projected out. 2462 */ 2463static __isl_give isl_set *compute_atomic_domain( 2464 struct isl_codegen_domains *domains, __isl_take isl_set *class_domain) 2465{ 2466 isl_basic_set *bset; 2467 isl_basic_set_list *list; 2468 isl_set *domain, *atomic_domain; 2469 int empty; 2470 2471 domain = isl_set_copy(domains->option[atomic]); 2472 domain = isl_set_intersect(domain, isl_set_copy(class_domain)); 2473 domain = isl_set_intersect(domain, 2474 isl_set_copy(domains->schedule_domain)); 2475 empty = isl_set_is_empty(domain); 2476 if (empty < 0) 2477 class_domain = isl_set_free(class_domain); 2478 if (empty) { 2479 isl_set_free(domain); 2480 return class_domain; 2481 } 2482 2483 domain = isl_ast_build_eliminate(domains->build, domain); 2484 domain = isl_set_coalesce(domain); 2485 bset = isl_set_unshifted_simple_hull(domain); 2486 domain = isl_set_from_basic_set(bset); 2487 atomic_domain = isl_set_copy(domain); 2488 domain = isl_set_intersect(domain, isl_set_copy(class_domain)); 2489 class_domain = isl_set_subtract(class_domain, atomic_domain); 2490 domain = isl_set_make_disjoint(domain); 2491 list = isl_basic_set_list_from_set(domain); 2492 domains->list = isl_basic_set_list_concat(domains->list, list); 2493 2494 return class_domain; 2495} 2496 2497/* Split up the schedule domain into uniform basic sets, 2498 * in the sense that each element in a basic set is associated to 2499 * elements of the same domains, and add the result to domains->list. 2500 * Do this for that part of the schedule domain that lies in the 2501 * intersection of "class_domain" and the separate option domain. 2502 * 2503 * "class_domain" may or may not include the constraints 2504 * of the schedule domain, but this does not make a difference 2505 * since we are going to intersect it with the domain of the inverse schedule. 2506 * If it includes schedule domain constraints, then they may involve 2507 * inner dimensions, but we will eliminate them in separation_domain. 2508 */ 2509static int compute_separate_domain(struct isl_codegen_domains *domains, 2510 __isl_keep isl_set *class_domain) 2511{ 2512 isl_space *space; 2513 isl_set *domain; 2514 isl_union_map *executed; 2515 isl_basic_set_list *list; 2516 int empty; 2517 2518 domain = isl_set_copy(domains->option[separate]); 2519 domain = isl_set_intersect(domain, isl_set_copy(class_domain)); 2520 executed = isl_union_map_copy(domains->executed); 2521 executed = isl_union_map_intersect_domain(executed, 2522 isl_union_set_from_set(domain)); 2523 empty = isl_union_map_is_empty(executed); 2524 if (empty < 0 || empty) { 2525 isl_union_map_free(executed); 2526 return empty < 0 ? -1 : 0; 2527 } 2528 2529 space = isl_set_get_space(class_domain); 2530 domain = separate_schedule_domains(space, executed, domains->build); 2531 2532 list = isl_basic_set_list_from_set(domain); 2533 domains->list = isl_basic_set_list_concat(domains->list, list); 2534 2535 return 0; 2536} 2537 2538/* Split up the domain at the current depth into disjoint 2539 * basic sets for which code should be generated separately 2540 * for the given separation class domain. 2541 * 2542 * If any separation classes have been defined, then "class_domain" 2543 * is the domain of the current class and does not refer to inner dimensions. 2544 * Otherwise, "class_domain" is the universe domain. 2545 * 2546 * We first make sure that the class domain is disjoint from 2547 * previously considered class domains. 2548 * 2549 * The separate domains can be computed directly from the "class_domain". 2550 * 2551 * The unroll, atomic and remainder domains need the constraints 2552 * from the schedule domain. 2553 * 2554 * For unrolling, the actual schedule domain is needed (with divs that 2555 * may refer to the current dimension) so that stride detection can be 2556 * performed. 2557 * 2558 * For atomic and remainder domains, inner dimensions and divs involving 2559 * the current dimensions should be eliminated. 2560 * In case we are working within a separation class, we need to intersect 2561 * the result with the current "class_domain" to ensure that the domains 2562 * are disjoint from those generated from other class domains. 2563 * 2564 * The domain that has been made atomic may be larger than specified 2565 * by the user since it needs to be representable as a single basic set. 2566 * This possibly larger domain is removed from class_domain by 2567 * compute_atomic_domain. It is computed first so that the extended domain 2568 * would not overlap with any domains computed before. 2569 * Similary, the unrolled domains may have some constraints removed and 2570 * may therefore also be larger than specified by the user. 2571 * 2572 * If anything is left after handling separate, unroll and atomic, 2573 * we split it up into basic sets and append the basic sets to domains->list. 2574 */ 2575static int compute_partial_domains(struct isl_codegen_domains *domains, 2576 __isl_take isl_set *class_domain) 2577{ 2578 isl_basic_set_list *list; 2579 isl_set *domain; 2580 2581 class_domain = isl_set_subtract(class_domain, 2582 isl_set_copy(domains->done)); 2583 domains->done = isl_set_union(domains->done, 2584 isl_set_copy(class_domain)); 2585 2586 class_domain = compute_atomic_domain(domains, class_domain); 2587 class_domain = compute_unroll_domains(domains, class_domain); 2588 2589 domain = isl_set_copy(class_domain); 2590 2591 if (compute_separate_domain(domains, domain) < 0) 2592 goto error; 2593 domain = isl_set_subtract(domain, 2594 isl_set_copy(domains->option[separate])); 2595 2596 domain = isl_set_intersect(domain, 2597 isl_set_copy(domains->schedule_domain)); 2598 2599 domain = isl_ast_build_eliminate(domains->build, domain); 2600 domain = isl_set_intersect(domain, isl_set_copy(class_domain)); 2601 2602 domain = isl_set_coalesce(domain); 2603 domain = isl_set_make_disjoint(domain); 2604 2605 list = isl_basic_set_list_from_set(domain); 2606 domains->list = isl_basic_set_list_concat(domains->list, list); 2607 2608 isl_set_free(class_domain); 2609 2610 return 0; 2611error: 2612 isl_set_free(domain); 2613 isl_set_free(class_domain); 2614 return -1; 2615} 2616 2617/* Split up the domain at the current depth into disjoint 2618 * basic sets for which code should be generated separately 2619 * for the separation class identified by "pnt". 2620 * 2621 * We extract the corresponding class domain from domains->sep_class, 2622 * eliminate inner dimensions and pass control to compute_partial_domains. 2623 */ 2624static int compute_class_domains(__isl_take isl_point *pnt, void *user) 2625{ 2626 struct isl_codegen_domains *domains = user; 2627 isl_set *class_set; 2628 isl_set *domain; 2629 int disjoint; 2630 2631 class_set = isl_set_from_point(pnt); 2632 domain = isl_map_domain(isl_map_intersect_range( 2633 isl_map_copy(domains->sep_class), class_set)); 2634 domain = isl_ast_build_compute_gist(domains->build, domain); 2635 domain = isl_ast_build_eliminate(domains->build, domain); 2636 2637 disjoint = isl_set_plain_is_disjoint(domain, domains->schedule_domain); 2638 if (disjoint < 0) 2639 return -1; 2640 if (disjoint) { 2641 isl_set_free(domain); 2642 return 0; 2643 } 2644 2645 return compute_partial_domains(domains, domain); 2646} 2647 2648/* Extract the domains at the current depth that should be atomic, 2649 * separated or unrolled and store them in option. 2650 * 2651 * The domains specified by the user might overlap, so we make 2652 * them disjoint by subtracting earlier domains from later domains. 2653 */ 2654static void compute_domains_init_options(isl_set *option[3], 2655 __isl_keep isl_ast_build *build) 2656{ 2657 enum isl_ast_build_domain_type type, type2; 2658 2659 for (type = atomic; type <= separate; ++type) { 2660 option[type] = isl_ast_build_get_option_domain(build, type); 2661 for (type2 = atomic; type2 < type; ++type2) 2662 option[type] = isl_set_subtract(option[type], 2663 isl_set_copy(option[type2])); 2664 } 2665 2666 option[unroll] = isl_set_coalesce(option[unroll]); 2667 option[unroll] = isl_set_make_disjoint(option[unroll]); 2668} 2669 2670/* Split up the domain at the current depth into disjoint 2671 * basic sets for which code should be generated separately, 2672 * based on the user-specified options. 2673 * Return the list of disjoint basic sets. 2674 * 2675 * There are three kinds of domains that we need to keep track of. 2676 * - the "schedule domain" is the domain of "executed" 2677 * - the "class domain" is the domain corresponding to the currrent 2678 * separation class 2679 * - the "option domain" is the domain corresponding to one of the options 2680 * atomic, unroll or separate 2681 * 2682 * We first consider the individial values of the separation classes 2683 * and split up the domain for each of them separately. 2684 * Finally, we consider the remainder. If no separation classes were 2685 * specified, then we call compute_partial_domains with the universe 2686 * "class_domain". Otherwise, we take the "schedule_domain" as "class_domain", 2687 * with inner dimensions removed. We do this because we want to 2688 * avoid computing the complement of the class domains (i.e., the difference 2689 * between the universe and domains->done). 2690 */ 2691static __isl_give isl_basic_set_list *compute_domains( 2692 __isl_keep isl_union_map *executed, __isl_keep isl_ast_build *build) 2693{ 2694 struct isl_codegen_domains domains; 2695 isl_ctx *ctx; 2696 isl_set *domain; 2697 isl_union_set *schedule_domain; 2698 isl_set *classes; 2699 isl_space *space; 2700 int n_param; 2701 enum isl_ast_build_domain_type type; 2702 int empty; 2703 2704 if (!executed) 2705 return NULL; 2706 2707 ctx = isl_union_map_get_ctx(executed); 2708 domains.list = isl_basic_set_list_alloc(ctx, 0); 2709 2710 schedule_domain = isl_union_map_domain(isl_union_map_copy(executed)); 2711 domain = isl_set_from_union_set(schedule_domain); 2712 2713 compute_domains_init_options(domains.option, build); 2714 2715 domains.sep_class = isl_ast_build_get_separation_class(build); 2716 classes = isl_map_range(isl_map_copy(domains.sep_class)); 2717 n_param = isl_set_dim(classes, isl_dim_param); 2718 classes = isl_set_project_out(classes, isl_dim_param, 0, n_param); 2719 2720 space = isl_set_get_space(domain); 2721 domains.build = build; 2722 domains.schedule_domain = isl_set_copy(domain); 2723 domains.executed = executed; 2724 domains.done = isl_set_empty(space); 2725 2726 if (isl_set_foreach_point(classes, &compute_class_domains, &domains) < 0) 2727 domains.list = isl_basic_set_list_free(domains.list); 2728 isl_set_free(classes); 2729 2730 empty = isl_set_is_empty(domains.done); 2731 if (empty < 0) { 2732 domains.list = isl_basic_set_list_free(domains.list); 2733 domain = isl_set_free(domain); 2734 } else if (empty) { 2735 isl_set_free(domain); 2736 domain = isl_set_universe(isl_set_get_space(domains.done)); 2737 } else { 2738 domain = isl_ast_build_eliminate(build, domain); 2739 } 2740 if (compute_partial_domains(&domains, domain) < 0) 2741 domains.list = isl_basic_set_list_free(domains.list); 2742 2743 isl_set_free(domains.schedule_domain); 2744 isl_set_free(domains.done); 2745 isl_map_free(domains.sep_class); 2746 for (type = atomic; type <= separate; ++type) 2747 isl_set_free(domains.option[type]); 2748 2749 return domains.list; 2750} 2751 2752/* Generate code for a single component, after shifting (if any) 2753 * has been applied. 2754 * 2755 * We first split up the domain at the current depth into disjoint 2756 * basic sets based on the user-specified options. 2757 * Then we generated code for each of them and concatenate the results. 2758 */ 2759static __isl_give isl_ast_graft_list *generate_shifted_component( 2760 __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) 2761{ 2762 isl_basic_set_list *domain_list; 2763 isl_ast_graft_list *list = NULL; 2764 2765 domain_list = compute_domains(executed, build); 2766 list = generate_parallel_domains(domain_list, executed, build); 2767 2768 isl_basic_set_list_free(domain_list); 2769 isl_union_map_free(executed); 2770 isl_ast_build_free(build); 2771 2772 return list; 2773} 2774 2775struct isl_set_map_pair { 2776 isl_set *set; 2777 isl_map *map; 2778}; 2779 2780/* Given an array "domain" of isl_set_map_pairs and an array "order" 2781 * of indices into the "domain" array, 2782 * return the union of the "map" fields of the elements 2783 * indexed by the first "n" elements of "order". 2784 */ 2785static __isl_give isl_union_map *construct_component_executed( 2786 struct isl_set_map_pair *domain, int *order, int n) 2787{ 2788 int i; 2789 isl_map *map; 2790 isl_union_map *executed; 2791 2792 map = isl_map_copy(domain[order[0]].map); 2793 executed = isl_union_map_from_map(map); 2794 for (i = 1; i < n; ++i) { 2795 map = isl_map_copy(domain[order[i]].map); 2796 executed = isl_union_map_add_map(executed, map); 2797 } 2798 2799 return executed; 2800} 2801 2802/* Generate code for a single component, after shifting (if any) 2803 * has been applied. 2804 * 2805 * The component inverse schedule is specified as the "map" fields 2806 * of the elements of "domain" indexed by the first "n" elements of "order". 2807 */ 2808static __isl_give isl_ast_graft_list *generate_shifted_component_from_list( 2809 struct isl_set_map_pair *domain, int *order, int n, 2810 __isl_take isl_ast_build *build) 2811{ 2812 isl_union_map *executed; 2813 2814 executed = construct_component_executed(domain, order, n); 2815 return generate_shifted_component(executed, build); 2816} 2817 2818/* Does set dimension "pos" of "set" have an obviously fixed value? 2819 */ 2820static int dim_is_fixed(__isl_keep isl_set *set, int pos) 2821{ 2822 int fixed; 2823 isl_val *v; 2824 2825 v = isl_set_plain_get_val_if_fixed(set, isl_dim_set, pos); 2826 if (!v) 2827 return -1; 2828 fixed = !isl_val_is_nan(v); 2829 isl_val_free(v); 2830 2831 return fixed; 2832} 2833 2834/* Given an array "domain" of isl_set_map_pairs and an array "order" 2835 * of indices into the "domain" array, 2836 * do all (except for at most one) of the "set" field of the elements 2837 * indexed by the first "n" elements of "order" have a fixed value 2838 * at position "depth"? 2839 */ 2840static int at_most_one_non_fixed(struct isl_set_map_pair *domain, 2841 int *order, int n, int depth) 2842{ 2843 int i; 2844 int non_fixed = -1; 2845 2846 for (i = 0; i < n; ++i) { 2847 int f; 2848 2849 f = dim_is_fixed(domain[order[i]].set, depth); 2850 if (f < 0) 2851 return -1; 2852 if (f) 2853 continue; 2854 if (non_fixed >= 0) 2855 return 0; 2856 non_fixed = i; 2857 } 2858 2859 return 1; 2860} 2861 2862/* Given an array "domain" of isl_set_map_pairs and an array "order" 2863 * of indices into the "domain" array, 2864 * eliminate the inner dimensions from the "set" field of the elements 2865 * indexed by the first "n" elements of "order", provided the current 2866 * dimension does not have a fixed value. 2867 * 2868 * Return the index of the first element in "order" with a corresponding 2869 * "set" field that does not have an (obviously) fixed value. 2870 */ 2871static int eliminate_non_fixed(struct isl_set_map_pair *domain, 2872 int *order, int n, int depth, __isl_keep isl_ast_build *build) 2873{ 2874 int i; 2875 int base = -1; 2876 2877 for (i = n - 1; i >= 0; --i) { 2878 int f; 2879 f = dim_is_fixed(domain[order[i]].set, depth); 2880 if (f < 0) 2881 return -1; 2882 if (f) 2883 continue; 2884 domain[order[i]].set = isl_ast_build_eliminate_inner(build, 2885 domain[order[i]].set); 2886 base = i; 2887 } 2888 2889 return base; 2890} 2891 2892/* Given an array "domain" of isl_set_map_pairs and an array "order" 2893 * of indices into the "domain" array, 2894 * find the element of "domain" (amongst those indexed by the first "n" 2895 * elements of "order") with the "set" field that has the smallest 2896 * value for the current iterator. 2897 * 2898 * Note that the domain with the smallest value may depend on the parameters 2899 * and/or outer loop dimension. Since the result of this function is only 2900 * used as heuristic, we only make a reasonable attempt at finding the best 2901 * domain, one that should work in case a single domain provides the smallest 2902 * value for the current dimension over all values of the parameters 2903 * and outer dimensions. 2904 * 2905 * In particular, we compute the smallest value of the first domain 2906 * and replace it by that of any later domain if that later domain 2907 * has a smallest value that is smaller for at least some value 2908 * of the parameters and outer dimensions. 2909 */ 2910static int first_offset(struct isl_set_map_pair *domain, int *order, int n, 2911 __isl_keep isl_ast_build *build) 2912{ 2913 int i; 2914 isl_map *min_first; 2915 int first = 0; 2916 2917 min_first = isl_ast_build_map_to_iterator(build, 2918 isl_set_copy(domain[order[0]].set)); 2919 min_first = isl_map_lexmin(min_first); 2920 2921 for (i = 1; i < n; ++i) { 2922 isl_map *min, *test; 2923 int empty; 2924 2925 min = isl_ast_build_map_to_iterator(build, 2926 isl_set_copy(domain[order[i]].set)); 2927 min = isl_map_lexmin(min); 2928 test = isl_map_copy(min); 2929 test = isl_map_apply_domain(isl_map_copy(min_first), test); 2930 test = isl_map_order_lt(test, isl_dim_in, 0, isl_dim_out, 0); 2931 empty = isl_map_is_empty(test); 2932 isl_map_free(test); 2933 if (empty >= 0 && !empty) { 2934 isl_map_free(min_first); 2935 first = i; 2936 min_first = min; 2937 } else 2938 isl_map_free(min); 2939 2940 if (empty < 0) 2941 break; 2942 } 2943 2944 isl_map_free(min_first); 2945 2946 return i < n ? -1 : first; 2947} 2948 2949/* Construct a shifted inverse schedule based on the original inverse schedule, 2950 * the stride and the offset. 2951 * 2952 * The original inverse schedule is specified as the "map" fields 2953 * of the elements of "domain" indexed by the first "n" elements of "order". 2954 * 2955 * "stride" and "offset" are such that the difference 2956 * between the values of the current dimension of domain "i" 2957 * and the values of the current dimension for some reference domain are 2958 * equal to 2959 * 2960 * stride * integer + offset[i] 2961 * 2962 * Moreover, 0 <= offset[i] < stride. 2963 * 2964 * For each domain, we create a map 2965 * 2966 * { [..., j, ...] -> [..., j - offset[i], offset[i], ....] } 2967 * 2968 * where j refers to the current dimension and the other dimensions are 2969 * unchanged, and apply this map to the original schedule domain. 2970 * 2971 * For example, for the original schedule 2972 * 2973 * { A[i] -> [2i]: 0 <= i < 10; B[i] -> [2i+1] : 0 <= i < 10 } 2974 * 2975 * and assuming the offset is 0 for the A domain and 1 for the B domain, 2976 * we apply the mapping 2977 * 2978 * { [j] -> [j, 0] } 2979 * 2980 * to the schedule of the "A" domain and the mapping 2981 * 2982 * { [j - 1] -> [j, 1] } 2983 * 2984 * to the schedule of the "B" domain. 2985 * 2986 * 2987 * Note that after the transformation, the differences between pairs 2988 * of values of the current dimension over all domains are multiples 2989 * of stride and that we have therefore exposed the stride. 2990 * 2991 * 2992 * To see that the mapping preserves the lexicographic order, 2993 * first note that each of the individual maps above preserves the order. 2994 * If the value of the current iterator is j1 in one domain and j2 in another, 2995 * then if j1 = j2, we know that the same map is applied to both domains 2996 * and the order is preserved. 2997 * Otherwise, let us assume, without loss of generality, that j1 < j2. 2998 * If c1 >= c2 (with c1 and c2 the corresponding offsets), then 2999 * 3000 * j1 - c1 < j2 - c2 3001 * 3002 * and the order is preserved. 3003 * If c1 < c2, then we know 3004 * 3005 * 0 <= c2 - c1 < s 3006 * 3007 * We also have 3008 * 3009 * j2 - j1 = n * s + r 3010 * 3011 * with n >= 0 and 0 <= r < s. 3012 * In other words, r = c2 - c1. 3013 * If n > 0, then 3014 * 3015 * j1 - c1 < j2 - c2 3016 * 3017 * If n = 0, then 3018 * 3019 * j1 - c1 = j2 - c2 3020 * 3021 * and so 3022 * 3023 * (j1 - c1, c1) << (j2 - c2, c2) 3024 * 3025 * with "<<" the lexicographic order, proving that the order is preserved 3026 * in all cases. 3027 */ 3028static __isl_give isl_union_map *contruct_shifted_executed( 3029 struct isl_set_map_pair *domain, int *order, int n, 3030 __isl_keep isl_val *stride, __isl_keep isl_multi_val *offset, 3031 __isl_take isl_ast_build *build) 3032{ 3033 int i; 3034 isl_union_map *executed; 3035 isl_space *space; 3036 isl_map *map; 3037 int depth; 3038 isl_constraint *c; 3039 3040 depth = isl_ast_build_get_depth(build); 3041 space = isl_ast_build_get_space(build, 1); 3042 executed = isl_union_map_empty(isl_space_copy(space)); 3043 space = isl_space_map_from_set(space); 3044 map = isl_map_identity(isl_space_copy(space)); 3045 map = isl_map_eliminate(map, isl_dim_out, depth, 1); 3046 map = isl_map_insert_dims(map, isl_dim_out, depth + 1, 1); 3047 space = isl_space_insert_dims(space, isl_dim_out, depth + 1, 1); 3048 3049 c = isl_equality_alloc(isl_local_space_from_space(space)); 3050 c = isl_constraint_set_coefficient_si(c, isl_dim_in, depth, 1); 3051 c = isl_constraint_set_coefficient_si(c, isl_dim_out, depth, -1); 3052 3053 for (i = 0; i < n; ++i) { 3054 isl_map *map_i; 3055 isl_val *v; 3056 3057 v = isl_multi_val_get_val(offset, i); 3058 if (!v) 3059 break; 3060 map_i = isl_map_copy(map); 3061 map_i = isl_map_fix_val(map_i, isl_dim_out, depth + 1, 3062 isl_val_copy(v)); 3063 v = isl_val_neg(v); 3064 c = isl_constraint_set_constant_val(c, v); 3065 map_i = isl_map_add_constraint(map_i, isl_constraint_copy(c)); 3066 3067 map_i = isl_map_apply_domain(isl_map_copy(domain[order[i]].map), 3068 map_i); 3069 executed = isl_union_map_add_map(executed, map_i); 3070 } 3071 3072 isl_constraint_free(c); 3073 isl_map_free(map); 3074 3075 if (i < n) 3076 executed = isl_union_map_free(executed); 3077 3078 return executed; 3079} 3080 3081/* Generate code for a single component, after exposing the stride, 3082 * given that the schedule domain is "shifted strided". 3083 * 3084 * The component inverse schedule is specified as the "map" fields 3085 * of the elements of "domain" indexed by the first "n" elements of "order". 3086 * 3087 * The schedule domain being "shifted strided" means that the differences 3088 * between the values of the current dimension of domain "i" 3089 * and the values of the current dimension for some reference domain are 3090 * equal to 3091 * 3092 * stride * integer + offset[i] 3093 * 3094 * We first look for the domain with the "smallest" value for the current 3095 * dimension and adjust the offsets such that the offset of the "smallest" 3096 * domain is equal to zero. The other offsets are reduced modulo stride. 3097 * 3098 * Based on this information, we construct a new inverse schedule in 3099 * contruct_shifted_executed that exposes the stride. 3100 * Since this involves the introduction of a new schedule dimension, 3101 * the build needs to be changed accodingly. 3102 * After computing the AST, the newly introduced dimension needs 3103 * to be removed again from the list of grafts. We do this by plugging 3104 * in a mapping that represents the new schedule domain in terms of the 3105 * old schedule domain. 3106 */ 3107static __isl_give isl_ast_graft_list *generate_shift_component( 3108 struct isl_set_map_pair *domain, int *order, int n, 3109 __isl_keep isl_val *stride, __isl_keep isl_multi_val *offset, 3110 __isl_take isl_ast_build *build) 3111{ 3112 isl_ast_graft_list *list; 3113 int first; 3114 int depth; 3115 isl_ctx *ctx; 3116 isl_val *val; 3117 isl_multi_val *mv; 3118 isl_space *space; 3119 isl_multi_aff *ma, *zero; 3120 isl_union_map *executed; 3121 3122 ctx = isl_ast_build_get_ctx(build); 3123 depth = isl_ast_build_get_depth(build); 3124 3125 first = first_offset(domain, order, n, build); 3126 if (first < 0) 3127 return isl_ast_build_free(build); 3128 3129 mv = isl_multi_val_copy(offset); 3130 val = isl_multi_val_get_val(offset, first); 3131 val = isl_val_neg(val); 3132 mv = isl_multi_val_add_val(mv, val); 3133 mv = isl_multi_val_mod_val(mv, isl_val_copy(stride)); 3134 3135 executed = contruct_shifted_executed(domain, order, n, stride, mv, 3136 build); 3137 space = isl_ast_build_get_space(build, 1); 3138 space = isl_space_map_from_set(space); 3139 ma = isl_multi_aff_identity(isl_space_copy(space)); 3140 space = isl_space_from_domain(isl_space_domain(space)); 3141 space = isl_space_add_dims(space, isl_dim_out, 1); 3142 zero = isl_multi_aff_zero(space); 3143 ma = isl_multi_aff_range_splice(ma, depth + 1, zero); 3144 build = isl_ast_build_insert_dim(build, depth + 1); 3145 list = generate_shifted_component(executed, build); 3146 3147 list = isl_ast_graft_list_preimage_multi_aff(list, ma); 3148 3149 isl_multi_val_free(mv); 3150 3151 return list; 3152} 3153 3154/* Generate code for a single component. 3155 * 3156 * The component inverse schedule is specified as the "map" fields 3157 * of the elements of "domain" indexed by the first "n" elements of "order". 3158 * 3159 * This function may modify the "set" fields of "domain". 3160 * 3161 * Before proceeding with the actual code generation for the component, 3162 * we first check if there are any "shifted" strides, meaning that 3163 * the schedule domains of the individual domains are all strided, 3164 * but that they have different offsets, resulting in the union 3165 * of schedule domains not being strided anymore. 3166 * 3167 * The simplest example is the schedule 3168 * 3169 * { A[i] -> [2i]: 0 <= i < 10; B[i] -> [2i+1] : 0 <= i < 10 } 3170 * 3171 * Both schedule domains are strided, but their union is not. 3172 * This function detects such cases and then rewrites the schedule to 3173 * 3174 * { A[i] -> [2i, 0]: 0 <= i < 10; B[i] -> [2i, 1] : 0 <= i < 10 } 3175 * 3176 * In the new schedule, the schedule domains have the same offset (modulo 3177 * the stride), ensuring that the union of schedule domains is also strided. 3178 * 3179 * 3180 * If there is only a single domain in the component, then there is 3181 * nothing to do. Similarly, if the current schedule dimension has 3182 * a fixed value for almost all domains then there is nothing to be done. 3183 * In particular, we need at least two domains where the current schedule 3184 * dimension does not have a fixed value. 3185 * Finally, if any of the options refer to the current schedule dimension, 3186 * then we bail out as well. It would be possible to reformulate the options 3187 * in terms of the new schedule domain, but that would introduce constraints 3188 * that separate the domains in the options and that is something we would 3189 * like to avoid. 3190 * 3191 * 3192 * To see if there is any shifted stride, we look at the differences 3193 * between the values of the current dimension in pairs of domains 3194 * for equal values of outer dimensions. These differences should be 3195 * of the form 3196 * 3197 * m x + r 3198 * 3199 * with "m" the stride and "r" a constant. Note that we cannot perform 3200 * this analysis on individual domains as the lower bound in each domain 3201 * may depend on parameters or outer dimensions and so the current dimension 3202 * itself may not have a fixed remainder on division by the stride. 3203 * 3204 * In particular, we compare the first domain that does not have an 3205 * obviously fixed value for the current dimension to itself and all 3206 * other domains and collect the offsets and the gcd of the strides. 3207 * If the gcd becomes one, then we failed to find shifted strides. 3208 * If the gcd is zero, then the differences were all fixed, meaning 3209 * that some domains had non-obviously fixed values for the current dimension. 3210 * If all the offsets are the same (for those domains that do not have 3211 * an obviously fixed value for the current dimension), then we do not 3212 * apply the transformation. 3213 * If none of the domains were skipped, then there is nothing to do. 3214 * If some of them were skipped, then if we apply separation, the schedule 3215 * domain should get split in pieces with a (non-shifted) stride. 3216 * 3217 * Otherwise, we apply a shift to expose the stride in 3218 * generate_shift_component. 3219 */ 3220static __isl_give isl_ast_graft_list *generate_component( 3221 struct isl_set_map_pair *domain, int *order, int n, 3222 __isl_take isl_ast_build *build) 3223{ 3224 int i, d; 3225 int depth; 3226 isl_ctx *ctx; 3227 isl_map *map; 3228 isl_set *deltas; 3229 isl_val *gcd = NULL; 3230 isl_multi_val *mv; 3231 int fixed, skip; 3232 int base; 3233 isl_ast_graft_list *list; 3234 int res = 0; 3235 3236 depth = isl_ast_build_get_depth(build); 3237 3238 skip = n == 1; 3239 if (skip >= 0 && !skip) 3240 skip = at_most_one_non_fixed(domain, order, n, depth); 3241 if (skip >= 0 && !skip) 3242 skip = isl_ast_build_options_involve_depth(build); 3243 if (skip < 0) 3244 return isl_ast_build_free(build); 3245 if (skip) 3246 return generate_shifted_component_from_list(domain, 3247 order, n, build); 3248 3249 base = eliminate_non_fixed(domain, order, n, depth, build); 3250 if (base < 0) 3251 return isl_ast_build_free(build); 3252 3253 ctx = isl_ast_build_get_ctx(build); 3254 3255 mv = isl_multi_val_zero(isl_space_set_alloc(ctx, 0, n)); 3256 3257 fixed = 1; 3258 for (i = 0; i < n; ++i) { 3259 isl_val *r, *m; 3260 3261 map = isl_map_from_domain_and_range( 3262 isl_set_copy(domain[order[base]].set), 3263 isl_set_copy(domain[order[i]].set)); 3264 for (d = 0; d < depth; ++d) 3265 map = isl_map_equate(map, isl_dim_in, d, 3266 isl_dim_out, d); 3267 deltas = isl_map_deltas(map); 3268 res = isl_set_dim_residue_class_val(deltas, depth, &m, &r); 3269 isl_set_free(deltas); 3270 if (res < 0) 3271 break; 3272 3273 if (i == 0) 3274 gcd = m; 3275 else 3276 gcd = isl_val_gcd(gcd, m); 3277 if (isl_val_is_one(gcd)) { 3278 isl_val_free(r); 3279 break; 3280 } 3281 mv = isl_multi_val_set_val(mv, i, r); 3282 3283 res = dim_is_fixed(domain[order[i]].set, depth); 3284 if (res < 0) 3285 break; 3286 if (res) 3287 continue; 3288 3289 if (fixed && i > base) { 3290 isl_val *a, *b; 3291 a = isl_multi_val_get_val(mv, i); 3292 b = isl_multi_val_get_val(mv, base); 3293 if (isl_val_ne(a, b)) 3294 fixed = 0; 3295 isl_val_free(a); 3296 isl_val_free(b); 3297 } 3298 } 3299 3300 if (res < 0 || !gcd) { 3301 isl_ast_build_free(build); 3302 list = NULL; 3303 } else if (i < n || fixed || isl_val_is_zero(gcd)) { 3304 list = generate_shifted_component_from_list(domain, 3305 order, n, build); 3306 } else { 3307 list = generate_shift_component(domain, order, n, gcd, mv, 3308 build); 3309 } 3310 3311 isl_val_free(gcd); 3312 isl_multi_val_free(mv); 3313 3314 return list; 3315} 3316 3317/* Store both "map" itself and its domain in the 3318 * structure pointed to by *next and advance to the next array element. 3319 */ 3320static int extract_domain(__isl_take isl_map *map, void *user) 3321{ 3322 struct isl_set_map_pair **next = user; 3323 3324 (*next)->map = isl_map_copy(map); 3325 (*next)->set = isl_map_domain(map); 3326 (*next)++; 3327 3328 return 0; 3329} 3330 3331/* Internal data for any_scheduled_after. 3332 * 3333 * "depth" is the number of loops that have already been generated 3334 * "group_coscheduled" is a local copy of options->ast_build_group_coscheduled 3335 * "domain" is an array of set-map pairs corresponding to the different 3336 * iteration domains. The set is the schedule domain, i.e., the domain 3337 * of the inverse schedule, while the map is the inverse schedule itself. 3338 */ 3339struct isl_any_scheduled_after_data { 3340 int depth; 3341 int group_coscheduled; 3342 struct isl_set_map_pair *domain; 3343}; 3344 3345/* Is any element of domain "i" scheduled after any element of domain "j" 3346 * (for a common iteration of the first data->depth loops)? 3347 * 3348 * data->domain[i].set contains the domain of the inverse schedule 3349 * for domain "i", i.e., elements in the schedule domain. 3350 * 3351 * If data->group_coscheduled is set, then we also return 1 if there 3352 * is any pair of elements in the two domains that are scheduled together. 3353 */ 3354static int any_scheduled_after(int i, int j, void *user) 3355{ 3356 struct isl_any_scheduled_after_data *data = user; 3357 int dim = isl_set_dim(data->domain[i].set, isl_dim_set); 3358 int pos; 3359 3360 for (pos = data->depth; pos < dim; ++pos) { 3361 int follows; 3362 3363 follows = isl_set_follows_at(data->domain[i].set, 3364 data->domain[j].set, pos); 3365 3366 if (follows < -1) 3367 return -1; 3368 if (follows > 0) 3369 return 1; 3370 if (follows < 0) 3371 return 0; 3372 } 3373 3374 return data->group_coscheduled; 3375} 3376 3377/* Look for independent components at the current depth and generate code 3378 * for each component separately. The resulting lists of grafts are 3379 * merged in an attempt to combine grafts with identical guards. 3380 * 3381 * Code for two domains can be generated separately if all the elements 3382 * of one domain are scheduled before (or together with) all the elements 3383 * of the other domain. We therefore consider the graph with as nodes 3384 * the domains and an edge between two nodes if any element of the first 3385 * node is scheduled after any element of the second node. 3386 * If the ast_build_group_coscheduled is set, then we also add an edge if 3387 * there is any pair of elements in the two domains that are scheduled 3388 * together. 3389 * Code is then generated (by generate_component) 3390 * for each of the strongly connected components in this graph 3391 * in their topological order. 3392 * 3393 * Since the test is performed on the domain of the inverse schedules of 3394 * the different domains, we precompute these domains and store 3395 * them in data.domain. 3396 */ 3397static __isl_give isl_ast_graft_list *generate_components( 3398 __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) 3399{ 3400 int i; 3401 isl_ctx *ctx = isl_ast_build_get_ctx(build); 3402 int n = isl_union_map_n_map(executed); 3403 struct isl_any_scheduled_after_data data; 3404 struct isl_set_map_pair *next; 3405 struct isl_tarjan_graph *g = NULL; 3406 isl_ast_graft_list *list = NULL; 3407 int n_domain = 0; 3408 3409 data.domain = isl_calloc_array(ctx, struct isl_set_map_pair, n); 3410 if (!data.domain) 3411 goto error; 3412 n_domain = n; 3413 3414 next = data.domain; 3415 if (isl_union_map_foreach_map(executed, &extract_domain, &next) < 0) 3416 goto error; 3417 3418 if (!build) 3419 goto error; 3420 data.depth = isl_ast_build_get_depth(build); 3421 data.group_coscheduled = isl_options_get_ast_build_group_coscheduled(ctx); 3422 g = isl_tarjan_graph_init(ctx, n, &any_scheduled_after, &data); 3423 3424 list = isl_ast_graft_list_alloc(ctx, 0); 3425 3426 i = 0; 3427 while (list && n) { 3428 isl_ast_graft_list *list_c; 3429 int first = i; 3430 3431 if (g->order[i] == -1) 3432 isl_die(ctx, isl_error_internal, "cannot happen", 3433 goto error); 3434 ++i; --n; 3435 while (g->order[i] != -1) { 3436 ++i; --n; 3437 } 3438 3439 list_c = generate_component(data.domain, 3440 g->order + first, i - first, 3441 isl_ast_build_copy(build)); 3442 list = isl_ast_graft_list_merge(list, list_c, build); 3443 3444 ++i; 3445 } 3446 3447 if (0) 3448error: list = isl_ast_graft_list_free(list); 3449 isl_tarjan_graph_free(g); 3450 for (i = 0; i < n_domain; ++i) { 3451 isl_map_free(data.domain[i].map); 3452 isl_set_free(data.domain[i].set); 3453 } 3454 free(data.domain); 3455 isl_union_map_free(executed); 3456 isl_ast_build_free(build); 3457 3458 return list; 3459} 3460 3461/* Generate code for the next level (and all inner levels). 3462 * 3463 * If "executed" is empty, i.e., no code needs to be generated, 3464 * then we return an empty list. 3465 * 3466 * If we have already generated code for all loop levels, then we pass 3467 * control to generate_inner_level. 3468 * 3469 * If "executed" lives in a single space, i.e., if code needs to be 3470 * generated for a single domain, then there can only be a single 3471 * component and we go directly to generate_shifted_component. 3472 * Otherwise, we call generate_components to detect the components 3473 * and to call generate_component on each of them separately. 3474 */ 3475static __isl_give isl_ast_graft_list *generate_next_level( 3476 __isl_take isl_union_map *executed, __isl_take isl_ast_build *build) 3477{ 3478 int depth; 3479 3480 if (!build || !executed) 3481 goto error; 3482 3483 if (isl_union_map_is_empty(executed)) { 3484 isl_ctx *ctx = isl_ast_build_get_ctx(build); 3485 isl_union_map_free(executed); 3486 isl_ast_build_free(build); 3487 return isl_ast_graft_list_alloc(ctx, 0); 3488 } 3489 3490 depth = isl_ast_build_get_depth(build); 3491 if (depth >= isl_set_dim(build->domain, isl_dim_set)) 3492 return generate_inner_level(executed, build); 3493 3494 if (isl_union_map_n_map(executed) == 1) 3495 return generate_shifted_component(executed, build); 3496 3497 return generate_components(executed, build); 3498error: 3499 isl_union_map_free(executed); 3500 isl_ast_build_free(build); 3501 return NULL; 3502} 3503 3504/* Internal data structure used by isl_ast_build_ast_from_schedule. 3505 * internal, executed and build are the inputs to generate_code. 3506 * list collects the output. 3507 */ 3508struct isl_generate_code_data { 3509 int internal; 3510 isl_union_map *executed; 3511 isl_ast_build *build; 3512 3513 isl_ast_graft_list *list; 3514}; 3515 3516/* Given an inverse schedule in terms of the external build schedule, i.e., 3517 * 3518 * [E -> S] -> D 3519 * 3520 * with E the external build schedule and S the additional schedule "space", 3521 * reformulate the inverse schedule in terms of the internal schedule domain, 3522 * i.e., return 3523 * 3524 * [I -> S] -> D 3525 * 3526 * We first obtain a mapping 3527 * 3528 * I -> E 3529 * 3530 * take the inverse and the product with S -> S, resulting in 3531 * 3532 * [I -> S] -> [E -> S] 3533 * 3534 * Applying the map to the input produces the desired result. 3535 */ 3536static __isl_give isl_union_map *internal_executed( 3537 __isl_take isl_union_map *executed, __isl_keep isl_space *space, 3538 __isl_keep isl_ast_build *build) 3539{ 3540 isl_map *id, *proj; 3541 3542 proj = isl_ast_build_get_schedule_map(build); 3543 proj = isl_map_reverse(proj); 3544 space = isl_space_map_from_set(isl_space_copy(space)); 3545 id = isl_map_identity(space); 3546 proj = isl_map_product(proj, id); 3547 executed = isl_union_map_apply_domain(executed, 3548 isl_union_map_from_map(proj)); 3549 return executed; 3550} 3551 3552/* Generate an AST that visits the elements in the range of data->executed 3553 * in the relative order specified by the corresponding image element(s) 3554 * for those image elements that belong to "set". 3555 * Add the result to data->list. 3556 * 3557 * The caller ensures that "set" is a universe domain. 3558 * "space" is the space of the additional part of the schedule. 3559 * It is equal to the space of "set" if build->domain is parametric. 3560 * Otherwise, it is equal to the range of the wrapped space of "set". 3561 * 3562 * If the build space is not parametric and if isl_ast_build_ast_from_schedule 3563 * was called from an outside user (data->internal not set), then 3564 * the (inverse) schedule refers to the external build domain and needs to 3565 * be transformed to refer to the internal build domain. 3566 * 3567 * The build is extended to include the additional part of the schedule. 3568 * If the original build space was not parametric, then the options 3569 * in data->build refer only to the additional part of the schedule 3570 * and they need to be adjusted to refer to the complete AST build 3571 * domain. 3572 * 3573 * After having adjusted inverse schedule and build, we start generating 3574 * code with the outer loop of the current code generation 3575 * in generate_next_level. 3576 * 3577 * If the original build space was not parametric, we undo the embedding 3578 * on the resulting isl_ast_node_list so that it can be used within 3579 * the outer AST build. 3580 */ 3581static int generate_code_in_space(struct isl_generate_code_data *data, 3582 __isl_take isl_set *set, __isl_take isl_space *space) 3583{ 3584 isl_union_map *executed; 3585 isl_ast_build *build; 3586 isl_ast_graft_list *list; 3587 int embed; 3588 3589 executed = isl_union_map_copy(data->executed); 3590 executed = isl_union_map_intersect_domain(executed, 3591 isl_union_set_from_set(set)); 3592 3593 embed = !isl_set_is_params(data->build->domain); 3594 if (embed && !data->internal) 3595 executed = internal_executed(executed, space, data->build); 3596 3597 build = isl_ast_build_copy(data->build); 3598 build = isl_ast_build_product(build, space); 3599 3600 list = generate_next_level(executed, build); 3601 3602 list = isl_ast_graft_list_unembed(list, embed); 3603 3604 data->list = isl_ast_graft_list_concat(data->list, list); 3605 3606 return 0; 3607} 3608 3609/* Generate an AST that visits the elements in the range of data->executed 3610 * in the relative order specified by the corresponding domain element(s) 3611 * for those domain elements that belong to "set". 3612 * Add the result to data->list. 3613 * 3614 * The caller ensures that "set" is a universe domain. 3615 * 3616 * If the build space S is not parametric, then the space of "set" 3617 * need to be a wrapped relation with S as domain. That is, it needs 3618 * to be of the form 3619 * 3620 * [S -> T] 3621 * 3622 * Check this property and pass control to generate_code_in_space 3623 * passing along T. 3624 * If the build space is not parametric, then T is the space of "set". 3625 */ 3626static int generate_code_set(__isl_take isl_set *set, void *user) 3627{ 3628 struct isl_generate_code_data *data = user; 3629 isl_space *space, *build_space; 3630 int is_domain; 3631 3632 space = isl_set_get_space(set); 3633 3634 if (isl_set_is_params(data->build->domain)) 3635 return generate_code_in_space(data, set, space); 3636 3637 build_space = isl_ast_build_get_space(data->build, data->internal); 3638 space = isl_space_unwrap(space); 3639 is_domain = isl_space_is_domain(build_space, space); 3640 isl_space_free(build_space); 3641 space = isl_space_range(space); 3642 3643 if (is_domain < 0) 3644 goto error; 3645 if (!is_domain) 3646 isl_die(isl_set_get_ctx(set), isl_error_invalid, 3647 "invalid nested schedule space", goto error); 3648 3649 return generate_code_in_space(data, set, space); 3650error: 3651 isl_set_free(set); 3652 isl_space_free(space); 3653 return -1; 3654} 3655 3656/* Generate an AST that visits the elements in the range of "executed" 3657 * in the relative order specified by the corresponding domain element(s). 3658 * 3659 * "build" is an isl_ast_build that has either been constructed by 3660 * isl_ast_build_from_context or passed to a callback set by 3661 * isl_ast_build_set_create_leaf. 3662 * In the first case, the space of the isl_ast_build is typically 3663 * a parametric space, although this is currently not enforced. 3664 * In the second case, the space is never a parametric space. 3665 * If the space S is not parametric, then the domain space(s) of "executed" 3666 * need to be wrapped relations with S as domain. 3667 * 3668 * If the domain of "executed" consists of several spaces, then an AST 3669 * is generated for each of them (in arbitrary order) and the results 3670 * are concatenated. 3671 * 3672 * If "internal" is set, then the domain "S" above refers to the internal 3673 * schedule domain representation. Otherwise, it refers to the external 3674 * representation, as returned by isl_ast_build_get_schedule_space. 3675 * 3676 * We essentially run over all the spaces in the domain of "executed" 3677 * and call generate_code_set on each of them. 3678 */ 3679static __isl_give isl_ast_graft_list *generate_code( 3680 __isl_take isl_union_map *executed, __isl_take isl_ast_build *build, 3681 int internal) 3682{ 3683 isl_ctx *ctx; 3684 struct isl_generate_code_data data = { 0 }; 3685 isl_space *space; 3686 isl_union_set *schedule_domain; 3687 isl_union_map *universe; 3688 3689 if (!build) 3690 goto error; 3691 space = isl_ast_build_get_space(build, 1); 3692 space = isl_space_align_params(space, 3693 isl_union_map_get_space(executed)); 3694 space = isl_space_align_params(space, 3695 isl_union_map_get_space(build->options)); 3696 build = isl_ast_build_align_params(build, isl_space_copy(space)); 3697 executed = isl_union_map_align_params(executed, space); 3698 if (!executed || !build) 3699 goto error; 3700 3701 ctx = isl_ast_build_get_ctx(build); 3702 3703 data.internal = internal; 3704 data.executed = executed; 3705 data.build = build; 3706 data.list = isl_ast_graft_list_alloc(ctx, 0); 3707 3708 universe = isl_union_map_universe(isl_union_map_copy(executed)); 3709 schedule_domain = isl_union_map_domain(universe); 3710 if (isl_union_set_foreach_set(schedule_domain, &generate_code_set, 3711 &data) < 0) 3712 data.list = isl_ast_graft_list_free(data.list); 3713 3714 isl_union_set_free(schedule_domain); 3715 isl_union_map_free(executed); 3716 3717 isl_ast_build_free(build); 3718 return data.list; 3719error: 3720 isl_union_map_free(executed); 3721 isl_ast_build_free(build); 3722 return NULL; 3723} 3724 3725/* Generate an AST that visits the elements in the domain of "schedule" 3726 * in the relative order specified by the corresponding image element(s). 3727 * 3728 * "build" is an isl_ast_build that has either been constructed by 3729 * isl_ast_build_from_context or passed to a callback set by 3730 * isl_ast_build_set_create_leaf. 3731 * In the first case, the space of the isl_ast_build is typically 3732 * a parametric space, although this is currently not enforced. 3733 * In the second case, the space is never a parametric space. 3734 * If the space S is not parametric, then the range space(s) of "schedule" 3735 * need to be wrapped relations with S as domain. 3736 * 3737 * If the range of "schedule" consists of several spaces, then an AST 3738 * is generated for each of them (in arbitrary order) and the results 3739 * are concatenated. 3740 * 3741 * We first initialize the local copies of the relevant options. 3742 * We do this here rather than when the isl_ast_build is created 3743 * because the options may have changed between the construction 3744 * of the isl_ast_build and the call to isl_generate_code. 3745 * 3746 * The main computation is performed on an inverse schedule (with 3747 * the schedule domain in the domain and the elements to be executed 3748 * in the range) called "executed". 3749 */ 3750__isl_give isl_ast_node *isl_ast_build_ast_from_schedule( 3751 __isl_keep isl_ast_build *build, __isl_take isl_union_map *schedule) 3752{ 3753 isl_ast_graft_list *list; 3754 isl_ast_node *node; 3755 isl_union_map *executed; 3756 3757 build = isl_ast_build_copy(build); 3758 build = isl_ast_build_set_single_valued(build, 0); 3759 executed = isl_union_map_reverse(schedule); 3760 list = generate_code(executed, isl_ast_build_copy(build), 0); 3761 node = isl_ast_node_from_graft_list(list, build); 3762 isl_ast_build_free(build); 3763 3764 return node; 3765} 3766