1/* Affinity tests. 2 Copyright (C) 2013-2015 Free Software Foundation, Inc. 3 4 GCC is free software; you can redistribute it and/or modify it under 5 the terms of the GNU General Public License as published by the Free 6 Software Foundation; either version 3, or (at your option) any later 7 version. 8 9 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 10 WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with GCC; see the file COPYING3. If not see 16 <http://www.gnu.org/licenses/>. */ 17 18/* { dg-do run } */ 19/* { dg-set-target-env-var OMP_PROC_BIND "false" } */ 20/* { dg-additional-options "-DINTERPOSE_GETAFFINITY -DDO_FORK -ldl" { target *-*-linux* } } */ 21 22#ifndef _GNU_SOURCE 23#define _GNU_SOURCE 24#endif 25#include "config.h" 26#include <omp.h> 27#include <stdio.h> 28#include <stdlib.h> 29#include <string.h> 30#include <unistd.h> 31 32#ifdef DO_FORK 33#include <signal.h> 34#include <sys/wait.h> 35#endif 36#ifdef HAVE_PTHREAD_AFFINITY_NP 37#include <sched.h> 38#include <pthread.h> 39#ifdef INTERPOSE_GETAFFINITY 40#include <dlfcn.h> 41#endif 42#endif 43 44struct place 45{ 46 int start, len; 47}; 48struct places 49{ 50 char name[40]; 51 int count; 52 struct place places[8]; 53} places_array[] = { 54 { "", 1, { { -1, -1 } } }, 55 { "{0}:8", 8, 56 { { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 }, 57 { 4, 1 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } }, 58 { "{7,6}:2:-3", 2, { { 6, 2 }, { 3, 2 } } }, 59 { "{6,7}:4:-2,!{2,3}", 3, { { 6, 2 }, { 4, 2 }, { 0, 2 } } }, 60 { "{1}:7:1", 7, 61 { { 1, 1 }, { 2, 1 }, { 3, 1 }, 62 { 4, 1 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } }, 63 { "{0,1},{3,2,4},{6,5,!6},{6},{7:2:-1,!6}", 5, 64 { { 0, 2 }, { 2, 3 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } } 65}; 66 67unsigned long contig_cpucount; 68unsigned long min_cpusetsize; 69 70#if defined (HAVE_PTHREAD_AFFINITY_NP) && defined (_SC_NPROCESSORS_CONF) \ 71 && defined (CPU_ALLOC_SIZE) 72 73#if defined (RTLD_NEXT) && defined (INTERPOSE_GETAFFINITY) 74int (*orig_getaffinity_np) (pthread_t, size_t, cpu_set_t *); 75 76int 77pthread_getaffinity_np (pthread_t thread, size_t cpusetsize, cpu_set_t *cpuset) 78{ 79 int ret; 80 unsigned long i, max; 81 if (orig_getaffinity_np == NULL) 82 { 83 orig_getaffinity_np = (int (*) (pthread_t, size_t, cpu_set_t *)) 84 dlsym (RTLD_NEXT, "pthread_getaffinity_np"); 85 if (orig_getaffinity_np == NULL) 86 exit (0); 87 } 88 ret = orig_getaffinity_np (thread, cpusetsize, cpuset); 89 if (ret != 0) 90 return ret; 91 if (contig_cpucount == 0) 92 { 93 max = 8 * cpusetsize; 94 for (i = 0; i < max; i++) 95 if (!CPU_ISSET_S (i, cpusetsize, cpuset)) 96 break; 97 contig_cpucount = i; 98 min_cpusetsize = cpusetsize; 99 } 100 return ret; 101} 102#endif 103 104void 105print_affinity (struct place p) 106{ 107 static unsigned long size; 108 if (size == 0) 109 { 110 if (min_cpusetsize) 111 size = min_cpusetsize; 112 else 113 { 114 size = sysconf (_SC_NPROCESSORS_CONF); 115 size = CPU_ALLOC_SIZE (size); 116 if (size < sizeof (cpu_set_t)) 117 size = sizeof (cpu_set_t); 118 } 119 } 120 cpu_set_t *cpusetp = (cpu_set_t *) __builtin_alloca (size); 121 if (pthread_getaffinity_np (pthread_self (), size, cpusetp) == 0) 122 { 123 unsigned long i, len, max = 8 * size; 124 int notfirst = 0, unexpected = 1; 125 126 printf (" bound to {"); 127 for (i = 0, len = 0; i < max; i++) 128 if (CPU_ISSET_S (i, size, cpusetp)) 129 { 130 if (len == 0) 131 { 132 if (notfirst) 133 { 134 unexpected = 1; 135 printf (","); 136 } 137 else if (i == (unsigned long) p.start) 138 unexpected = 0; 139 notfirst = 1; 140 printf ("%lu", i); 141 } 142 ++len; 143 } 144 else 145 { 146 if (len && len != (unsigned long) p.len) 147 unexpected = 1; 148 if (len > 1) 149 printf (":%lu", len); 150 len = 0; 151 } 152 if (len && len != (unsigned long) p.len) 153 unexpected = 1; 154 if (len > 1) 155 printf (":%lu", len); 156 printf ("}"); 157 if (p.start != -1 && unexpected) 158 { 159 printf (", expected {%d", p.start); 160 if (p.len != 1) 161 printf (":%d", p.len); 162 printf ("} instead"); 163 } 164 else if (p.start != -1) 165 printf (", verified"); 166 } 167} 168#else 169void 170print_affinity (struct place p) 171{ 172 (void) p.start; 173 (void) p.len; 174} 175#endif 176 177 178int 179main () 180{ 181 char *env_proc_bind = getenv ("OMP_PROC_BIND"); 182 int test_false = env_proc_bind && strcmp (env_proc_bind, "false") == 0; 183 int test_true = env_proc_bind && strcmp (env_proc_bind, "true") == 0; 184 int test_spread_master_close 185 = env_proc_bind && strcmp (env_proc_bind, "spread,master,close") == 0; 186 char *env_places = getenv ("OMP_PLACES"); 187 int test_places = 0; 188 189#ifdef DO_FORK 190 if (env_places == NULL && contig_cpucount >= 8 && test_false 191 && getenv ("GOMP_AFFINITY") == NULL) 192 { 193 int i, j, status; 194 pid_t pid; 195 for (j = 0; j < 2; j++) 196 { 197 if (setenv ("OMP_PROC_BIND", j ? "spread,master,close" : "true", 1) 198 < 0) 199 break; 200 for (i = sizeof (places_array) / sizeof (places_array[0]) - 1; 201 i; --i) 202 { 203 if (setenv ("OMP_PLACES", places_array[i].name, 1) < 0) 204 break; 205 pid = fork (); 206 if (pid == -1) 207 break; 208 if (pid == 0) 209 { 210 execl ("/proc/self/exe", "affinity-1.exe", NULL); 211 _exit (1); 212 } 213 if (waitpid (pid, &status, 0) < 0) 214 break; 215 if (WIFSIGNALED (status) && WTERMSIG (status) == SIGABRT) 216 abort (); 217 else if (!WIFEXITED (status) || WEXITSTATUS (status) != 0) 218 break; 219 } 220 if (i) 221 break; 222 } 223 } 224#endif 225 226 int first = 1; 227 if (env_proc_bind) 228 { 229 printf ("OMP_PROC_BIND='%s'", env_proc_bind); 230 first = 0; 231 } 232 if (env_places) 233 printf ("%sOMP_PLACES='%s'", first ? "" : " ", env_places); 234 printf ("\n"); 235 236 if (env_places && contig_cpucount >= 8 237 && (test_true || test_spread_master_close)) 238 { 239 for (test_places = sizeof (places_array) / sizeof (places_array[0]) - 1; 240 test_places; --test_places) 241 if (strcmp (env_places, places_array[test_places].name) == 0) 242 break; 243 } 244 245#define verify(if_true, if_s_m_c) \ 246 if (test_false && omp_get_proc_bind () != omp_proc_bind_false) \ 247 abort (); \ 248 if (test_true && omp_get_proc_bind () != if_true) \ 249 abort (); \ 250 if (test_spread_master_close && omp_get_proc_bind () != if_s_m_c) \ 251 abort (); 252 253 verify (omp_proc_bind_true, omp_proc_bind_spread); 254 255 printf ("Initial thread"); 256 print_affinity (places_array[test_places].places[0]); 257 printf ("\n"); 258 omp_set_nested (1); 259 omp_set_dynamic (0); 260 261 #pragma omp parallel if (0) 262 { 263 verify (omp_proc_bind_true, omp_proc_bind_master); 264 #pragma omp parallel if (0) 265 { 266 verify (omp_proc_bind_true, omp_proc_bind_close); 267 #pragma omp parallel if (0) 268 { 269 verify (omp_proc_bind_true, omp_proc_bind_close); 270 } 271 #pragma omp parallel if (0) proc_bind (spread) 272 { 273 verify (omp_proc_bind_spread, omp_proc_bind_spread); 274 } 275 } 276 #pragma omp parallel if (0) proc_bind (master) 277 { 278 verify (omp_proc_bind_master, omp_proc_bind_close); 279 #pragma omp parallel if (0) 280 { 281 verify (omp_proc_bind_master, omp_proc_bind_close); 282 } 283 #pragma omp parallel if (0) proc_bind (spread) 284 { 285 verify (omp_proc_bind_spread, omp_proc_bind_spread); 286 } 287 } 288 } 289 290 /* True/spread */ 291 #pragma omp parallel num_threads (4) 292 { 293 verify (omp_proc_bind_true, omp_proc_bind_master); 294 #pragma omp critical 295 { 296 struct place p = places_array[0].places[0]; 297 int thr = omp_get_thread_num (); 298 printf ("#1 thread %d", thr); 299 if (omp_get_num_threads () == 4 && test_spread_master_close) 300 switch (places_array[test_places].count) 301 { 302 case 8: 303 /* T = 4, P = 8, each subpartition has 2 places. */ 304 case 7: 305 /* T = 4, P = 7, each subpartition has 2 places, but 306 last partition, which has just one place. */ 307 p = places_array[test_places].places[2 * thr]; 308 break; 309 case 5: 310 /* T = 4, P = 5, first subpartition has 2 places, the 311 rest just one. */ 312 p = places_array[test_places].places[thr ? 1 + thr : 0]; 313 break; 314 case 3: 315 /* T = 4, P = 3, unit sized subpartitions, first gets 316 thr0 and thr3, second thr1, third thr2. */ 317 p = places_array[test_places].places[thr == 3 ? 0 : thr]; 318 break; 319 case 2: 320 /* T = 4, P = 2, unit sized subpartitions, each with 321 2 threads. */ 322 p = places_array[test_places].places[thr / 2]; 323 break; 324 } 325 print_affinity (p); 326 printf ("\n"); 327 } 328 #pragma omp barrier 329 if (omp_get_thread_num () == 3) 330 { 331 /* True/spread, true/master. */ 332 #pragma omp parallel num_threads (3) 333 { 334 verify (omp_proc_bind_true, omp_proc_bind_close); 335 #pragma omp critical 336 { 337 struct place p = places_array[0].places[0]; 338 int thr = omp_get_thread_num (); 339 printf ("#1,#1 thread 3,%d", thr); 340 if (omp_get_num_threads () == 3 && test_spread_master_close) 341 /* Outer is spread, inner master, so just bind to the 342 place or the master thread, which is thr 3 above. */ 343 switch (places_array[test_places].count) 344 { 345 case 8: 346 case 7: 347 p = places_array[test_places].places[6]; 348 break; 349 case 5: 350 p = places_array[test_places].places[4]; 351 break; 352 case 3: 353 p = places_array[test_places].places[0]; 354 break; 355 case 2: 356 p = places_array[test_places].places[1]; 357 break; 358 } 359 print_affinity (p); 360 printf ("\n"); 361 } 362 } 363 /* True/spread, spread. */ 364 #pragma omp parallel num_threads (5) proc_bind (spread) 365 { 366 verify (omp_proc_bind_spread, omp_proc_bind_close); 367 #pragma omp critical 368 { 369 struct place p = places_array[0].places[0]; 370 int thr = omp_get_thread_num (); 371 printf ("#1,#2 thread 3,%d", thr); 372 if (omp_get_num_threads () == 5 && test_spread_master_close) 373 /* Outer is spread, inner spread. */ 374 switch (places_array[test_places].count) 375 { 376 case 8: 377 /* T = 5, P = 2, unit sized subpartitions. */ 378 p = places_array[test_places].places[thr == 4 ? 6 379 : 6 + thr / 2]; 380 break; 381 /* The rest are T = 5, P = 1. */ 382 case 7: 383 p = places_array[test_places].places[6]; 384 break; 385 case 5: 386 p = places_array[test_places].places[4]; 387 break; 388 case 3: 389 p = places_array[test_places].places[0]; 390 break; 391 case 2: 392 p = places_array[test_places].places[1]; 393 break; 394 } 395 print_affinity (p); 396 printf ("\n"); 397 } 398 #pragma omp barrier 399 if (omp_get_thread_num () == 3) 400 { 401 /* True/spread, spread, close. */ 402 #pragma omp parallel num_threads (5) proc_bind (close) 403 { 404 verify (omp_proc_bind_close, omp_proc_bind_close); 405 #pragma omp critical 406 { 407 struct place p = places_array[0].places[0]; 408 int thr = omp_get_thread_num (); 409 printf ("#1,#2,#1 thread 3,3,%d", thr); 410 if (omp_get_num_threads () == 5 && test_spread_master_close) 411 /* Outer is spread, inner spread, innermost close. */ 412 switch (places_array[test_places].count) 413 { 414 /* All are T = 5, P = 1. */ 415 case 8: 416 p = places_array[test_places].places[7]; 417 break; 418 case 7: 419 p = places_array[test_places].places[6]; 420 break; 421 case 5: 422 p = places_array[test_places].places[4]; 423 break; 424 case 3: 425 p = places_array[test_places].places[0]; 426 break; 427 case 2: 428 p = places_array[test_places].places[1]; 429 break; 430 } 431 print_affinity (p); 432 printf ("\n"); 433 } 434 } 435 } 436 } 437 /* True/spread, master. */ 438 #pragma omp parallel num_threads (4) proc_bind(master) 439 { 440 verify (omp_proc_bind_master, omp_proc_bind_close); 441 #pragma omp critical 442 { 443 struct place p = places_array[0].places[0]; 444 int thr = omp_get_thread_num (); 445 printf ("#1,#3 thread 3,%d", thr); 446 if (omp_get_num_threads () == 4 && test_spread_master_close) 447 /* Outer is spread, inner master, so just bind to the 448 place or the master thread, which is thr 3 above. */ 449 switch (places_array[test_places].count) 450 { 451 case 8: 452 case 7: 453 p = places_array[test_places].places[6]; 454 break; 455 case 5: 456 p = places_array[test_places].places[4]; 457 break; 458 case 3: 459 p = places_array[test_places].places[0]; 460 break; 461 case 2: 462 p = places_array[test_places].places[1]; 463 break; 464 } 465 print_affinity (p); 466 printf ("\n"); 467 } 468 } 469 /* True/spread, close. */ 470 #pragma omp parallel num_threads (6) proc_bind (close) 471 { 472 verify (omp_proc_bind_close, omp_proc_bind_close); 473 #pragma omp critical 474 { 475 struct place p = places_array[0].places[0]; 476 int thr = omp_get_thread_num (); 477 printf ("#1,#4 thread 3,%d", thr); 478 if (omp_get_num_threads () == 6 && test_spread_master_close) 479 /* Outer is spread, inner close. */ 480 switch (places_array[test_places].count) 481 { 482 case 8: 483 /* T = 6, P = 2, unit sized subpartitions. */ 484 p = places_array[test_places].places[6 + thr / 3]; 485 break; 486 /* The rest are T = 6, P = 1. */ 487 case 7: 488 p = places_array[test_places].places[6]; 489 break; 490 case 5: 491 p = places_array[test_places].places[4]; 492 break; 493 case 3: 494 p = places_array[test_places].places[0]; 495 break; 496 case 2: 497 p = places_array[test_places].places[1]; 498 break; 499 } 500 print_affinity (p); 501 printf ("\n"); 502 } 503 } 504 } 505 } 506 507 /* Spread. */ 508 #pragma omp parallel num_threads (5) proc_bind(spread) 509 { 510 verify (omp_proc_bind_spread, omp_proc_bind_master); 511 #pragma omp critical 512 { 513 struct place p = places_array[0].places[0]; 514 int thr = omp_get_thread_num (); 515 printf ("#2 thread %d", thr); 516 if (omp_get_num_threads () == 5 517 && (test_spread_master_close || test_true)) 518 switch (places_array[test_places].count) 519 { 520 case 8: 521 /* T = 5, P = 8, first 3 subpartitions have 2 places, last 522 2 one place. */ 523 p = places_array[test_places].places[thr < 3 ? 2 * thr : 3 + thr]; 524 break; 525 case 7: 526 /* T = 5, P = 7, first 2 subpartitions have 2 places, last 527 3 one place. */ 528 p = places_array[test_places].places[thr < 2 ? 2 * thr : 2 + thr]; 529 break; 530 case 5: 531 /* T = 5, P = 5, unit sized subpartitions, each one with one 532 thread. */ 533 p = places_array[test_places].places[thr]; 534 break; 535 case 3: 536 /* T = 5, P = 3, unit sized subpartitions, first gets 537 thr0 and thr3, second thr1 and thr4, third thr2. */ 538 p = places_array[test_places].places[thr >= 3 ? thr - 3 : thr]; 539 break; 540 case 2: 541 /* T = 5, P = 2, unit sized subpartitions, first with 542 thr{0,1,4} and second with thr{2,3}. */ 543 p = places_array[test_places].places[thr == 4 ? 0 : thr / 2]; 544 break; 545 } 546 print_affinity (p); 547 printf ("\n"); 548 } 549 #pragma omp barrier 550 if (omp_get_thread_num () == 3) 551 { 552 int pp = 0; 553 switch (places_array[test_places].count) 554 { 555 case 8: pp = 6; break; 556 case 7: pp = 5; break; 557 case 5: pp = 3; break; 558 case 2: pp = 1; break; 559 } 560 /* Spread, spread/master. */ 561 #pragma omp parallel num_threads (3) firstprivate (pp) 562 { 563 verify (omp_proc_bind_spread, omp_proc_bind_close); 564 #pragma omp critical 565 { 566 struct place p = places_array[0].places[0]; 567 int thr = omp_get_thread_num (); 568 printf ("#2,#1 thread 3,%d", thr); 569 if (test_spread_master_close || test_true) 570 /* Outer is spread, inner spread resp. master, bit we have 571 just unit sized partitions. */ 572 p = places_array[test_places].places[pp]; 573 print_affinity (p); 574 printf ("\n"); 575 } 576 } 577 /* Spread, spread. */ 578 #pragma omp parallel num_threads (5) proc_bind (spread) \ 579 firstprivate (pp) 580 { 581 verify (omp_proc_bind_spread, omp_proc_bind_close); 582 #pragma omp critical 583 { 584 struct place p = places_array[0].places[0]; 585 int thr = omp_get_thread_num (); 586 printf ("#2,#2 thread 3,%d", thr); 587 if (test_spread_master_close || test_true) 588 /* Outer is spread, inner spread, bit we have 589 just unit sized partitions. */ 590 p = places_array[test_places].places[pp]; 591 print_affinity (p); 592 printf ("\n"); 593 } 594 } 595 /* Spread, master. */ 596 #pragma omp parallel num_threads (4) proc_bind(master) \ 597 firstprivate(pp) 598 { 599 verify (omp_proc_bind_master, omp_proc_bind_close); 600 #pragma omp critical 601 { 602 struct place p = places_array[0].places[0]; 603 int thr = omp_get_thread_num (); 604 printf ("#2,#3 thread 3,%d", thr); 605 if (test_spread_master_close || test_true) 606 /* Outer is spread, inner master, bit we have 607 just unit sized partitions. */ 608 p = places_array[test_places].places[pp]; 609 print_affinity (p); 610 printf ("\n"); 611 } 612 } 613 /* Spread, close. */ 614 #pragma omp parallel num_threads (6) proc_bind (close) \ 615 firstprivate (pp) 616 { 617 verify (omp_proc_bind_close, omp_proc_bind_close); 618 #pragma omp critical 619 { 620 struct place p = places_array[0].places[0]; 621 int thr = omp_get_thread_num (); 622 printf ("#2,#4 thread 3,%d", thr); 623 if (test_spread_master_close || test_true) 624 /* Outer is spread, inner close, bit we have 625 just unit sized partitions. */ 626 p = places_array[test_places].places[pp]; 627 print_affinity (p); 628 printf ("\n"); 629 } 630 } 631 } 632 } 633 634 /* Master. */ 635 #pragma omp parallel num_threads (3) proc_bind(master) 636 { 637 verify (omp_proc_bind_master, omp_proc_bind_master); 638 #pragma omp critical 639 { 640 struct place p = places_array[0].places[0]; 641 int thr = omp_get_thread_num (); 642 printf ("#3 thread %d", thr); 643 if (test_spread_master_close || test_true) 644 p = places_array[test_places].places[0]; 645 print_affinity (p); 646 printf ("\n"); 647 } 648 #pragma omp barrier 649 if (omp_get_thread_num () == 2) 650 { 651 /* Master, master. */ 652 #pragma omp parallel num_threads (4) 653 { 654 verify (omp_proc_bind_master, omp_proc_bind_close); 655 #pragma omp critical 656 { 657 struct place p = places_array[0].places[0]; 658 int thr = omp_get_thread_num (); 659 printf ("#3,#1 thread 2,%d", thr); 660 if (test_spread_master_close || test_true) 661 /* Outer is master, inner is master. */ 662 p = places_array[test_places].places[0]; 663 print_affinity (p); 664 printf ("\n"); 665 } 666 } 667 /* Master, spread. */ 668 #pragma omp parallel num_threads (4) proc_bind (spread) 669 { 670 verify (omp_proc_bind_spread, omp_proc_bind_close); 671 #pragma omp critical 672 { 673 struct place p = places_array[0].places[0]; 674 int thr = omp_get_thread_num (); 675 printf ("#3,#2 thread 2,%d", thr); 676 if (omp_get_num_threads () == 4 677 && (test_spread_master_close || test_true)) 678 /* Outer is master, inner is spread. */ 679 switch (places_array[test_places].count) 680 { 681 case 8: 682 /* T = 4, P = 8, each subpartition has 2 places. */ 683 case 7: 684 /* T = 4, P = 7, each subpartition has 2 places, but 685 last partition, which has just one place. */ 686 p = places_array[test_places].places[2 * thr]; 687 break; 688 case 5: 689 /* T = 4, P = 5, first subpartition has 2 places, the 690 rest just one. */ 691 p = places_array[test_places].places[thr ? 1 + thr : 0]; 692 break; 693 case 3: 694 /* T = 4, P = 3, unit sized subpartitions, first gets 695 thr0 and thr3, second thr1, third thr2. */ 696 p = places_array[test_places].places[thr == 3 ? 0 : thr]; 697 break; 698 case 2: 699 /* T = 4, P = 2, unit sized subpartitions, each with 700 2 threads. */ 701 p = places_array[test_places].places[thr / 2]; 702 break; 703 } 704 print_affinity (p); 705 printf ("\n"); 706 } 707 #pragma omp barrier 708 if (omp_get_thread_num () == 0) 709 { 710 /* Master, spread, close. */ 711 #pragma omp parallel num_threads (5) proc_bind (close) 712 { 713 verify (omp_proc_bind_close, omp_proc_bind_close); 714 #pragma omp critical 715 { 716 struct place p = places_array[0].places[0]; 717 int thr = omp_get_thread_num (); 718 printf ("#3,#2,#1 thread 2,0,%d", thr); 719 if (omp_get_num_threads () == 5 720 && (test_spread_master_close || test_true)) 721 /* Outer is master, inner spread, innermost close. */ 722 switch (places_array[test_places].count) 723 { 724 /* First 3 are T = 5, P = 2. */ 725 case 8: 726 case 7: 727 case 5: 728 p = places_array[test_places].places[(thr & 2) / 2]; 729 break; 730 /* All the rest are T = 5, P = 1. */ 731 case 3: 732 case 2: 733 p = places_array[test_places].places[0]; 734 break; 735 } 736 print_affinity (p); 737 printf ("\n"); 738 } 739 } 740 } 741 #pragma omp barrier 742 if (omp_get_thread_num () == 3) 743 { 744 /* Master, spread, close. */ 745 #pragma omp parallel num_threads (5) proc_bind (close) 746 { 747 verify (omp_proc_bind_close, omp_proc_bind_close); 748 #pragma omp critical 749 { 750 struct place p = places_array[0].places[0]; 751 int thr = omp_get_thread_num (); 752 printf ("#3,#2,#2 thread 2,3,%d", thr); 753 if (omp_get_num_threads () == 5 754 && (test_spread_master_close || test_true)) 755 /* Outer is master, inner spread, innermost close. */ 756 switch (places_array[test_places].count) 757 { 758 case 8: 759 /* T = 5, P = 2. */ 760 p = places_array[test_places].places[6 761 + (thr & 2) / 2]; 762 break; 763 /* All the rest are T = 5, P = 1. */ 764 case 7: 765 p = places_array[test_places].places[6]; 766 break; 767 case 5: 768 p = places_array[test_places].places[4]; 769 break; 770 case 3: 771 p = places_array[test_places].places[0]; 772 break; 773 case 2: 774 p = places_array[test_places].places[1]; 775 break; 776 } 777 print_affinity (p); 778 printf ("\n"); 779 } 780 } 781 } 782 } 783 /* Master, master. */ 784 #pragma omp parallel num_threads (4) proc_bind(master) 785 { 786 verify (omp_proc_bind_master, omp_proc_bind_close); 787 #pragma omp critical 788 { 789 struct place p = places_array[0].places[0]; 790 int thr = omp_get_thread_num (); 791 printf ("#3,#3 thread 2,%d", thr); 792 if (test_spread_master_close || test_true) 793 /* Outer is master, inner master. */ 794 p = places_array[test_places].places[0]; 795 print_affinity (p); 796 printf ("\n"); 797 } 798 } 799 /* Master, close. */ 800 #pragma omp parallel num_threads (6) proc_bind (close) 801 { 802 verify (omp_proc_bind_close, omp_proc_bind_close); 803 #pragma omp critical 804 { 805 struct place p = places_array[0].places[0]; 806 int thr = omp_get_thread_num (); 807 printf ("#3,#4 thread 2,%d", thr); 808 if (omp_get_num_threads () == 6 809 && (test_spread_master_close || test_true)) 810 switch (places_array[test_places].count) 811 { 812 case 8: 813 /* T = 6, P = 8. */ 814 case 7: 815 /* T = 6, P = 7. */ 816 p = places_array[test_places].places[thr]; 817 break; 818 case 5: 819 /* T = 6, P = 5. thr{0,5} go into the first place. */ 820 p = places_array[test_places].places[thr == 5 ? 0 : thr]; 821 break; 822 case 3: 823 /* T = 6, P = 3, two threads into each place. */ 824 p = places_array[test_places].places[thr / 2]; 825 break; 826 case 2: 827 /* T = 6, P = 2, 3 threads into each place. */ 828 p = places_array[test_places].places[thr / 3]; 829 break; 830 } 831 print_affinity (p); 832 printf ("\n"); 833 } 834 } 835 } 836 } 837 838 #pragma omp parallel num_threads (5) proc_bind(close) 839 { 840 verify (omp_proc_bind_close, omp_proc_bind_master); 841 #pragma omp critical 842 { 843 struct place p = places_array[0].places[0]; 844 int thr = omp_get_thread_num (); 845 printf ("#4 thread %d", thr); 846 if (omp_get_num_threads () == 5 847 && (test_spread_master_close || test_true)) 848 switch (places_array[test_places].count) 849 { 850 case 8: 851 /* T = 5, P = 8. */ 852 case 7: 853 /* T = 5, P = 7. */ 854 case 5: 855 /* T = 5, P = 5. */ 856 p = places_array[test_places].places[thr]; 857 break; 858 case 3: 859 /* T = 5, P = 3, thr{0,3} in first place, thr{1,4} in second, 860 thr2 in third. */ 861 p = places_array[test_places].places[thr >= 3 ? thr - 3 : thr]; 862 break; 863 case 2: 864 /* T = 5, P = 2, thr{0,1,4} in first place, thr{2,3} in second. */ 865 p = places_array[test_places].places[thr == 4 ? 0 : thr / 2]; 866 break; 867 } 868 print_affinity (p); 869 printf ("\n"); 870 } 871 #pragma omp barrier 872 if (omp_get_thread_num () == 2) 873 { 874 int pp = 0; 875 switch (places_array[test_places].count) 876 { 877 case 8: 878 case 7: 879 case 5: 880 case 3: 881 pp = 2; 882 break; 883 case 2: 884 pp = 1; 885 break; 886 } 887 /* Close, close/master. */ 888 #pragma omp parallel num_threads (4) firstprivate (pp) 889 { 890 verify (omp_proc_bind_close, omp_proc_bind_close); 891 #pragma omp critical 892 { 893 struct place p = places_array[0].places[0]; 894 int thr = omp_get_thread_num (); 895 printf ("#4,#1 thread 2,%d", thr); 896 if (test_spread_master_close) 897 /* Outer is close, inner is master. */ 898 p = places_array[test_places].places[pp]; 899 else if (omp_get_num_threads () == 4 && test_true) 900 /* Outer is close, inner is close. */ 901 switch (places_array[test_places].count) 902 { 903 case 8: 904 /* T = 4, P = 8. */ 905 case 7: 906 /* T = 4, P = 7. */ 907 p = places_array[test_places].places[2 + thr]; 908 break; 909 case 5: 910 /* T = 4, P = 5. There is wrap-around for thr3. */ 911 p = places_array[test_places].places[thr == 3 ? 0 : 2 + thr]; 912 break; 913 case 3: 914 /* T = 4, P = 3, thr{0,3} go into p2, thr1 into p0, thr2 915 into p1. */ 916 p = places_array[test_places].places[(2 + thr) % 3]; 917 break; 918 case 2: 919 /* T = 4, P = 2, 2 threads into each place. */ 920 p = places_array[test_places].places[1 - thr / 2]; 921 break; 922 } 923 924 print_affinity (p); 925 printf ("\n"); 926 } 927 } 928 /* Close, spread. */ 929 #pragma omp parallel num_threads (4) proc_bind (spread) 930 { 931 verify (omp_proc_bind_spread, omp_proc_bind_close); 932 #pragma omp critical 933 { 934 struct place p = places_array[0].places[0]; 935 int thr = omp_get_thread_num (); 936 printf ("#4,#2 thread 2,%d", thr); 937 if (omp_get_num_threads () == 4 938 && (test_spread_master_close || test_true)) 939 /* Outer is close, inner is spread. */ 940 switch (places_array[test_places].count) 941 { 942 case 8: 943 /* T = 4, P = 8, each subpartition has 2 places. */ 944 case 7: 945 /* T = 4, P = 7, each subpartition has 2 places, but 946 last partition, which has just one place. */ 947 p = places_array[test_places].places[thr == 3 ? 0 948 : 2 + 2 * thr]; 949 break; 950 case 5: 951 /* T = 4, P = 5, first subpartition has 2 places, the 952 rest just one. */ 953 p = places_array[test_places].places[thr == 3 ? 0 954 : 2 + thr]; 955 break; 956 case 3: 957 /* T = 4, P = 3, unit sized subpartitions, third gets 958 thr0 and thr3, first thr1, second thr2. */ 959 p = places_array[test_places].places[thr == 0 ? 2 : thr - 1]; 960 break; 961 case 2: 962 /* T = 4, P = 2, unit sized subpartitions, each with 963 2 threads. */ 964 p = places_array[test_places].places[1 - thr / 2]; 965 break; 966 } 967 print_affinity (p); 968 printf ("\n"); 969 } 970 #pragma omp barrier 971 if (omp_get_thread_num () == 0) 972 { 973 /* Close, spread, close. */ 974 #pragma omp parallel num_threads (5) proc_bind (close) 975 { 976 verify (omp_proc_bind_close, omp_proc_bind_close); 977 #pragma omp critical 978 { 979 struct place p = places_array[0].places[0]; 980 int thr = omp_get_thread_num (); 981 printf ("#4,#2,#1 thread 2,0,%d", thr); 982 if (omp_get_num_threads () == 5 983 && (test_spread_master_close || test_true)) 984 /* Outer is close, inner spread, innermost close. */ 985 switch (places_array[test_places].count) 986 { 987 case 8: 988 case 7: 989 /* T = 5, P = 2. */ 990 p = places_array[test_places].places[2 991 + (thr & 2) / 2]; 992 break; 993 /* All the rest are T = 5, P = 1. */ 994 case 5: 995 case 3: 996 p = places_array[test_places].places[2]; 997 break; 998 case 2: 999 p = places_array[test_places].places[1]; 1000 break; 1001 } 1002 print_affinity (p); 1003 printf ("\n"); 1004 } 1005 } 1006 } 1007 #pragma omp barrier 1008 if (omp_get_thread_num () == 2) 1009 { 1010 /* Close, spread, close. */ 1011 #pragma omp parallel num_threads (5) proc_bind (close) 1012 { 1013 verify (omp_proc_bind_close, omp_proc_bind_close); 1014 #pragma omp critical 1015 { 1016 struct place p = places_array[0].places[0]; 1017 int thr = omp_get_thread_num (); 1018 printf ("#4,#2,#2 thread 2,2,%d", thr); 1019 if (omp_get_num_threads () == 5 1020 && (test_spread_master_close || test_true)) 1021 /* Outer is close, inner spread, innermost close. */ 1022 switch (places_array[test_places].count) 1023 { 1024 case 8: 1025 /* T = 5, P = 2. */ 1026 p = places_array[test_places].places[6 1027 + (thr & 2) / 2]; 1028 break; 1029 /* All the rest are T = 5, P = 1. */ 1030 case 7: 1031 p = places_array[test_places].places[6]; 1032 break; 1033 case 5: 1034 p = places_array[test_places].places[4]; 1035 break; 1036 case 3: 1037 p = places_array[test_places].places[1]; 1038 break; 1039 case 2: 1040 p = places_array[test_places].places[0]; 1041 break; 1042 } 1043 print_affinity (p); 1044 printf ("\n"); 1045 } 1046 } 1047 } 1048 #pragma omp barrier 1049 if (omp_get_thread_num () == 3) 1050 { 1051 /* Close, spread, close. */ 1052 #pragma omp parallel num_threads (5) proc_bind (close) 1053 { 1054 verify (omp_proc_bind_close, omp_proc_bind_close); 1055 #pragma omp critical 1056 { 1057 struct place p = places_array[0].places[0]; 1058 int thr = omp_get_thread_num (); 1059 printf ("#4,#2,#3 thread 2,3,%d", thr); 1060 if (omp_get_num_threads () == 5 1061 && (test_spread_master_close || test_true)) 1062 /* Outer is close, inner spread, innermost close. */ 1063 switch (places_array[test_places].count) 1064 { 1065 case 8: 1066 case 7: 1067 case 5: 1068 /* T = 5, P = 2. */ 1069 p = places_array[test_places].places[(thr & 2) / 2]; 1070 break; 1071 /* All the rest are T = 5, P = 1. */ 1072 case 3: 1073 p = places_array[test_places].places[2]; 1074 break; 1075 case 2: 1076 p = places_array[test_places].places[0]; 1077 break; 1078 } 1079 print_affinity (p); 1080 printf ("\n"); 1081 } 1082 } 1083 } 1084 } 1085 /* Close, master. */ 1086 #pragma omp parallel num_threads (4) proc_bind(master) \ 1087 firstprivate (pp) 1088 { 1089 verify (omp_proc_bind_master, omp_proc_bind_close); 1090 #pragma omp critical 1091 { 1092 struct place p = places_array[0].places[0]; 1093 int thr = omp_get_thread_num (); 1094 printf ("#4,#3 thread 2,%d", thr); 1095 if (test_spread_master_close || test_true) 1096 /* Outer is close, inner master. */ 1097 p = places_array[test_places].places[pp]; 1098 print_affinity (p); 1099 printf ("\n"); 1100 } 1101 } 1102 /* Close, close. */ 1103 #pragma omp parallel num_threads (6) proc_bind (close) 1104 { 1105 verify (omp_proc_bind_close, omp_proc_bind_close); 1106 #pragma omp critical 1107 { 1108 struct place p = places_array[0].places[0]; 1109 int thr = omp_get_thread_num (); 1110 printf ("#4,#4 thread 2,%d", thr); 1111 if (omp_get_num_threads () == 6 1112 && (test_spread_master_close || test_true)) 1113 switch (places_array[test_places].count) 1114 { 1115 case 8: 1116 /* T = 6, P = 8. */ 1117 p = places_array[test_places].places[2 + thr]; 1118 break; 1119 case 7: 1120 /* T = 6, P = 7. */ 1121 p = places_array[test_places].places[thr == 5 ? 0 : 2 + thr]; 1122 break; 1123 case 5: 1124 /* T = 6, P = 5. thr{0,5} go into the third place. */ 1125 p = places_array[test_places].places[thr >= 3 ? thr - 3 1126 : 2 + thr]; 1127 break; 1128 case 3: 1129 /* T = 6, P = 3, two threads into each place. */ 1130 p = places_array[test_places].places[thr < 2 ? 2 1131 : thr / 2 - 1]; 1132 break; 1133 case 2: 1134 /* T = 6, P = 2, 3 threads into each place. */ 1135 p = places_array[test_places].places[1 - thr / 3]; 1136 break; 1137 } 1138 print_affinity (p); 1139 printf ("\n"); 1140 } 1141 } 1142 } 1143 } 1144 1145 return 0; 1146} 1147