1/* Affinity tests.
2   Copyright (C) 2013-2015 Free Software Foundation, Inc.
3
4   GCC is free software; you can redistribute it and/or modify it under
5   the terms of the GNU General Public License as published by the Free
6   Software Foundation; either version 3, or (at your option) any later
7   version.
8
9   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
10   WARRANTY; without even the implied warranty of MERCHANTABILITY or
11   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12   for more details.
13
14   You should have received a copy of the GNU General Public License
15   along with GCC; see the file COPYING3.  If not see
16   <http://www.gnu.org/licenses/>.  */
17
18/* { dg-do run } */
19/* { dg-set-target-env-var OMP_PROC_BIND "false" } */
20/* { dg-additional-options "-DINTERPOSE_GETAFFINITY -DDO_FORK -ldl" { target *-*-linux* } } */
21
22#ifndef _GNU_SOURCE
23#define _GNU_SOURCE
24#endif
25#include "config.h"
26#include <omp.h>
27#include <stdio.h>
28#include <stdlib.h>
29#include <string.h>
30#include <unistd.h>
31
32#ifdef DO_FORK
33#include <signal.h>
34#include <sys/wait.h>
35#endif
36#ifdef HAVE_PTHREAD_AFFINITY_NP
37#include <sched.h>
38#include <pthread.h>
39#ifdef INTERPOSE_GETAFFINITY
40#include <dlfcn.h>
41#endif
42#endif
43
44struct place
45{
46  int start, len;
47};
48struct places
49{
50  char name[40];
51  int count;
52  struct place places[8];
53} places_array[] = {
54  { "", 1, { { -1, -1 } } },
55  { "{0}:8", 8,
56    { { 0, 1 }, { 1, 1 }, { 2, 1 }, { 3, 1 },
57      { 4, 1 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } },
58  { "{7,6}:2:-3", 2, { { 6, 2 }, { 3, 2 } } },
59  { "{6,7}:4:-2,!{2,3}", 3, { { 6, 2 }, { 4, 2 }, { 0, 2 } } },
60  { "{1}:7:1", 7,
61    { { 1, 1 }, { 2, 1 }, { 3, 1 },
62      { 4, 1 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } },
63  { "{0,1},{3,2,4},{6,5,!6},{6},{7:2:-1,!6}", 5,
64    { { 0, 2 }, { 2, 3 }, { 5, 1 }, { 6, 1 }, { 7, 1 } } }
65};
66
67unsigned long contig_cpucount;
68unsigned long min_cpusetsize;
69
70#if defined (HAVE_PTHREAD_AFFINITY_NP) && defined (_SC_NPROCESSORS_CONF) \
71    && defined (CPU_ALLOC_SIZE)
72
73#if defined (RTLD_NEXT) && defined (INTERPOSE_GETAFFINITY)
74int (*orig_getaffinity_np) (pthread_t, size_t, cpu_set_t *);
75
76int
77pthread_getaffinity_np (pthread_t thread, size_t cpusetsize, cpu_set_t *cpuset)
78{
79  int ret;
80  unsigned long i, max;
81  if (orig_getaffinity_np == NULL)
82    {
83      orig_getaffinity_np = (int (*) (pthread_t, size_t, cpu_set_t *))
84			    dlsym (RTLD_NEXT, "pthread_getaffinity_np");
85      if (orig_getaffinity_np == NULL)
86	exit (0);
87    }
88  ret = orig_getaffinity_np (thread, cpusetsize, cpuset);
89  if (ret != 0)
90    return ret;
91  if (contig_cpucount == 0)
92    {
93      max = 8 * cpusetsize;
94      for (i = 0; i < max; i++)
95	if (!CPU_ISSET_S (i, cpusetsize, cpuset))
96	  break;
97      contig_cpucount = i;
98      min_cpusetsize = cpusetsize;
99    }
100  return ret;
101}
102#endif
103
104void
105print_affinity (struct place p)
106{
107  static unsigned long size;
108  if (size == 0)
109    {
110      if (min_cpusetsize)
111	size = min_cpusetsize;
112      else
113	{
114	  size = sysconf (_SC_NPROCESSORS_CONF);
115	  size = CPU_ALLOC_SIZE (size);
116	  if (size < sizeof (cpu_set_t))
117	    size = sizeof (cpu_set_t);
118	}
119    }
120  cpu_set_t *cpusetp = (cpu_set_t *) __builtin_alloca (size);
121  if (pthread_getaffinity_np (pthread_self (), size, cpusetp) == 0)
122    {
123      unsigned long i, len, max = 8 * size;
124      int notfirst = 0, unexpected = 1;
125
126      printf (" bound to {");
127      for (i = 0, len = 0; i < max; i++)
128	if (CPU_ISSET_S (i, size, cpusetp))
129	  {
130	    if (len == 0)
131	      {
132		if (notfirst)
133		  {
134		    unexpected = 1;
135		    printf (",");
136		  }
137		else if (i == (unsigned long) p.start)
138		  unexpected = 0;
139		notfirst = 1;
140		printf ("%lu", i);
141	      }
142	    ++len;
143	  }
144	else
145	  {
146	    if (len && len != (unsigned long) p.len)
147	      unexpected = 1;
148	    if (len > 1)
149	      printf (":%lu", len);
150	    len = 0;
151	  }
152      if (len && len != (unsigned long) p.len)
153	unexpected = 1;
154      if (len > 1)
155	printf (":%lu", len);
156      printf ("}");
157      if (p.start != -1 && unexpected)
158	{
159	  printf (", expected {%d", p.start);
160	  if (p.len != 1)
161	    printf (":%d", p.len);
162	  printf ("} instead");
163	}
164      else if (p.start != -1)
165	printf (", verified");
166    }
167}
168#else
169void
170print_affinity (struct place p)
171{
172  (void) p.start;
173  (void) p.len;
174}
175#endif
176
177
178int
179main ()
180{
181  char *env_proc_bind = getenv ("OMP_PROC_BIND");
182  int test_false = env_proc_bind && strcmp (env_proc_bind, "false") == 0;
183  int test_true = env_proc_bind && strcmp (env_proc_bind, "true") == 0;
184  int test_spread_master_close
185    = env_proc_bind && strcmp (env_proc_bind, "spread,master,close") == 0;
186  char *env_places = getenv ("OMP_PLACES");
187  int test_places = 0;
188
189#ifdef DO_FORK
190  if (env_places == NULL && contig_cpucount >= 8 && test_false
191      && getenv ("GOMP_AFFINITY") == NULL)
192    {
193      int i, j, status;
194      pid_t pid;
195      for (j = 0; j < 2; j++)
196	{
197	  if (setenv ("OMP_PROC_BIND", j ? "spread,master,close" : "true", 1)
198	      < 0)
199	    break;
200	  for (i = sizeof (places_array) / sizeof (places_array[0]) - 1;
201	       i; --i)
202	    {
203	      if (setenv ("OMP_PLACES", places_array[i].name, 1) < 0)
204		break;
205	      pid = fork ();
206	      if (pid == -1)
207		break;
208	      if (pid == 0)
209		{
210		  execl ("/proc/self/exe", "affinity-1.exe", NULL);
211		  _exit (1);
212		}
213	      if (waitpid (pid, &status, 0) < 0)
214		break;
215	      if (WIFSIGNALED (status) && WTERMSIG (status) == SIGABRT)
216		abort ();
217	      else if (!WIFEXITED (status) || WEXITSTATUS (status) != 0)
218		break;
219	    }
220	  if (i)
221	    break;
222	}
223    }
224#endif
225
226  int first = 1;
227  if (env_proc_bind)
228    {
229      printf ("OMP_PROC_BIND='%s'", env_proc_bind);
230      first = 0;
231    }
232  if (env_places)
233    printf ("%sOMP_PLACES='%s'", first ? "" : " ", env_places);
234  printf ("\n");
235
236  if (env_places && contig_cpucount >= 8
237      && (test_true || test_spread_master_close))
238    {
239      for (test_places = sizeof (places_array) / sizeof (places_array[0]) - 1;
240	   test_places; --test_places)
241	if (strcmp (env_places, places_array[test_places].name) == 0)
242	  break;
243    }
244
245#define verify(if_true, if_s_m_c) \
246  if (test_false && omp_get_proc_bind () != omp_proc_bind_false)	\
247    abort ();								\
248  if (test_true && omp_get_proc_bind () != if_true)			\
249    abort ();								\
250  if (test_spread_master_close && omp_get_proc_bind () != if_s_m_c)	\
251    abort ();
252
253  verify (omp_proc_bind_true, omp_proc_bind_spread);
254
255  printf ("Initial thread");
256  print_affinity (places_array[test_places].places[0]);
257  printf ("\n");
258  omp_set_nested (1);
259  omp_set_dynamic (0);
260
261  #pragma omp parallel if (0)
262  {
263    verify (omp_proc_bind_true, omp_proc_bind_master);
264    #pragma omp parallel if (0)
265    {
266      verify (omp_proc_bind_true, omp_proc_bind_close);
267      #pragma omp parallel if (0)
268      {
269	verify (omp_proc_bind_true, omp_proc_bind_close);
270      }
271      #pragma omp parallel if (0) proc_bind (spread)
272      {
273	verify (omp_proc_bind_spread, omp_proc_bind_spread);
274      }
275    }
276    #pragma omp parallel if (0) proc_bind (master)
277    {
278      verify (omp_proc_bind_master, omp_proc_bind_close);
279      #pragma omp parallel if (0)
280      {
281	verify (omp_proc_bind_master, omp_proc_bind_close);
282      }
283      #pragma omp parallel if (0) proc_bind (spread)
284      {
285	verify (omp_proc_bind_spread, omp_proc_bind_spread);
286      }
287    }
288  }
289
290  /* True/spread */
291  #pragma omp parallel num_threads (4)
292  {
293    verify (omp_proc_bind_true, omp_proc_bind_master);
294    #pragma omp critical
295    {
296      struct place p = places_array[0].places[0];
297      int thr = omp_get_thread_num ();
298      printf ("#1 thread %d", thr);
299      if (omp_get_num_threads () == 4 && test_spread_master_close)
300	switch (places_array[test_places].count)
301	  {
302	  case 8:
303	    /* T = 4, P = 8, each subpartition has 2 places.  */
304	  case 7:
305	    /* T = 4, P = 7, each subpartition has 2 places, but
306	       last partition, which has just one place.  */
307	    p = places_array[test_places].places[2 * thr];
308	    break;
309	  case 5:
310	    /* T = 4, P = 5, first subpartition has 2 places, the
311	       rest just one.  */
312	    p = places_array[test_places].places[thr ? 1 + thr : 0];
313	    break;
314	  case 3:
315	    /* T = 4, P = 3, unit sized subpartitions, first gets
316	       thr0 and thr3, second thr1, third thr2.  */
317	    p = places_array[test_places].places[thr == 3 ? 0 : thr];
318	    break;
319	  case 2:
320	    /* T = 4, P = 2, unit sized subpartitions, each with
321	       2 threads.  */
322	    p = places_array[test_places].places[thr / 2];
323	    break;
324	  }
325      print_affinity (p);
326      printf ("\n");
327    }
328    #pragma omp barrier
329    if (omp_get_thread_num () == 3)
330      {
331	/* True/spread, true/master.  */
332	#pragma omp parallel num_threads (3)
333	{
334	  verify (omp_proc_bind_true, omp_proc_bind_close);
335	  #pragma omp critical
336	  {
337	    struct place p = places_array[0].places[0];
338	    int thr = omp_get_thread_num ();
339	    printf ("#1,#1 thread 3,%d", thr);
340	    if (omp_get_num_threads () == 3 && test_spread_master_close)
341	      /* Outer is spread, inner master, so just bind to the
342		 place or the master thread, which is thr 3 above.  */
343	      switch (places_array[test_places].count)
344		{
345		case 8:
346		case 7:
347		  p = places_array[test_places].places[6];
348		  break;
349		case 5:
350		  p = places_array[test_places].places[4];
351		  break;
352		case 3:
353		  p = places_array[test_places].places[0];
354		  break;
355		case 2:
356		  p = places_array[test_places].places[1];
357		  break;
358		}
359	    print_affinity (p);
360	    printf ("\n");
361	  }
362	}
363	/* True/spread, spread.  */
364	#pragma omp parallel num_threads (5) proc_bind (spread)
365	{
366	  verify (omp_proc_bind_spread, omp_proc_bind_close);
367	  #pragma omp critical
368	  {
369	    struct place p = places_array[0].places[0];
370	    int thr = omp_get_thread_num ();
371	    printf ("#1,#2 thread 3,%d", thr);
372	    if (omp_get_num_threads () == 5 && test_spread_master_close)
373	      /* Outer is spread, inner spread.  */
374	      switch (places_array[test_places].count)
375		{
376		case 8:
377		  /* T = 5, P = 2, unit sized subpartitions.  */
378		  p = places_array[test_places].places[thr == 4 ? 6
379						       : 6 + thr / 2];
380		  break;
381		/* The rest are T = 5, P = 1.  */
382		case 7:
383		  p = places_array[test_places].places[6];
384		  break;
385		case 5:
386		  p = places_array[test_places].places[4];
387		  break;
388		case 3:
389		  p = places_array[test_places].places[0];
390		  break;
391		case 2:
392		  p = places_array[test_places].places[1];
393		  break;
394		}
395	    print_affinity (p);
396	    printf ("\n");
397	  }
398	  #pragma omp barrier
399	  if (omp_get_thread_num () == 3)
400	    {
401	      /* True/spread, spread, close.  */
402	      #pragma omp parallel num_threads (5) proc_bind (close)
403	      {
404		verify (omp_proc_bind_close, omp_proc_bind_close);
405		#pragma omp critical
406		{
407		  struct place p = places_array[0].places[0];
408		  int thr = omp_get_thread_num ();
409		  printf ("#1,#2,#1 thread 3,3,%d", thr);
410		  if (omp_get_num_threads () == 5 && test_spread_master_close)
411		    /* Outer is spread, inner spread, innermost close.  */
412		    switch (places_array[test_places].count)
413		      {
414		      /* All are T = 5, P = 1.  */
415		      case 8:
416			p = places_array[test_places].places[7];
417			break;
418		      case 7:
419			p = places_array[test_places].places[6];
420			break;
421		      case 5:
422			p = places_array[test_places].places[4];
423			break;
424		      case 3:
425			p = places_array[test_places].places[0];
426			break;
427		      case 2:
428			p = places_array[test_places].places[1];
429			break;
430		      }
431		  print_affinity (p);
432		  printf ("\n");
433		}
434	      }
435	    }
436	}
437	/* True/spread, master.  */
438	#pragma omp parallel num_threads (4) proc_bind(master)
439	{
440	  verify (omp_proc_bind_master, omp_proc_bind_close);
441	  #pragma omp critical
442	  {
443	    struct place p = places_array[0].places[0];
444	    int thr = omp_get_thread_num ();
445	    printf ("#1,#3 thread 3,%d", thr);
446	    if (omp_get_num_threads () == 4 && test_spread_master_close)
447	      /* Outer is spread, inner master, so just bind to the
448		 place or the master thread, which is thr 3 above.  */
449	      switch (places_array[test_places].count)
450		{
451		case 8:
452		case 7:
453		  p = places_array[test_places].places[6];
454		  break;
455		case 5:
456		  p = places_array[test_places].places[4];
457		  break;
458		case 3:
459		  p = places_array[test_places].places[0];
460		  break;
461		case 2:
462		  p = places_array[test_places].places[1];
463		  break;
464		}
465	    print_affinity (p);
466	    printf ("\n");
467	  }
468	}
469	/* True/spread, close.  */
470	#pragma omp parallel num_threads (6) proc_bind (close)
471	{
472	  verify (omp_proc_bind_close, omp_proc_bind_close);
473	  #pragma omp critical
474	  {
475	    struct place p = places_array[0].places[0];
476	    int thr = omp_get_thread_num ();
477	    printf ("#1,#4 thread 3,%d", thr);
478	    if (omp_get_num_threads () == 6 && test_spread_master_close)
479	      /* Outer is spread, inner close.  */
480	      switch (places_array[test_places].count)
481		{
482		case 8:
483		  /* T = 6, P = 2, unit sized subpartitions.  */
484		  p = places_array[test_places].places[6 + thr / 3];
485		  break;
486		/* The rest are T = 6, P = 1.  */
487		case 7:
488		  p = places_array[test_places].places[6];
489		  break;
490		case 5:
491		  p = places_array[test_places].places[4];
492		  break;
493		case 3:
494		  p = places_array[test_places].places[0];
495		  break;
496		case 2:
497		  p = places_array[test_places].places[1];
498		  break;
499		}
500	    print_affinity (p);
501	    printf ("\n");
502	  }
503	}
504      }
505  }
506
507  /* Spread.  */
508  #pragma omp parallel num_threads (5) proc_bind(spread)
509  {
510    verify (omp_proc_bind_spread, omp_proc_bind_master);
511    #pragma omp critical
512    {
513      struct place p = places_array[0].places[0];
514      int thr = omp_get_thread_num ();
515      printf ("#2 thread %d", thr);
516      if (omp_get_num_threads () == 5
517	  && (test_spread_master_close || test_true))
518	switch (places_array[test_places].count)
519	  {
520	  case 8:
521	    /* T = 5, P = 8, first 3 subpartitions have 2 places, last
522	       2 one place.  */
523	    p = places_array[test_places].places[thr < 3 ? 2 * thr : 3 + thr];
524	    break;
525	  case 7:
526	    /* T = 5, P = 7, first 2 subpartitions have 2 places, last
527	       3 one place.  */
528	    p = places_array[test_places].places[thr < 2 ? 2 * thr : 2 + thr];
529	    break;
530	  case 5:
531	    /* T = 5, P = 5, unit sized subpartitions, each one with one
532	       thread.  */
533	    p = places_array[test_places].places[thr];
534	    break;
535	  case 3:
536	    /* T = 5, P = 3, unit sized subpartitions, first gets
537	       thr0 and thr3, second thr1 and thr4, third thr2.  */
538	    p = places_array[test_places].places[thr >= 3 ? thr - 3 : thr];
539	    break;
540	  case 2:
541	    /* T = 5, P = 2, unit sized subpartitions, first with
542	       thr{0,1,4} and second with thr{2,3}.  */
543	    p = places_array[test_places].places[thr == 4 ? 0 : thr / 2];
544	    break;
545	  }
546      print_affinity (p);
547      printf ("\n");
548    }
549    #pragma omp barrier
550    if (omp_get_thread_num () == 3)
551      {
552	int pp = 0;
553	switch (places_array[test_places].count)
554	  {
555	  case 8: pp = 6; break;
556	  case 7: pp = 5; break;
557	  case 5: pp = 3; break;
558	  case 2: pp = 1; break;
559	  }
560	/* Spread, spread/master.  */
561	#pragma omp parallel num_threads (3) firstprivate (pp)
562	{
563	  verify (omp_proc_bind_spread, omp_proc_bind_close);
564	  #pragma omp critical
565	  {
566	    struct place p = places_array[0].places[0];
567	    int thr = omp_get_thread_num ();
568	    printf ("#2,#1 thread 3,%d", thr);
569	    if (test_spread_master_close || test_true)
570	      /* Outer is spread, inner spread resp. master, bit we have
571		 just unit sized partitions.  */
572	      p = places_array[test_places].places[pp];
573	    print_affinity (p);
574	    printf ("\n");
575	  }
576	}
577	/* Spread, spread.  */
578	#pragma omp parallel num_threads (5) proc_bind (spread) \
579			     firstprivate (pp)
580	{
581	  verify (omp_proc_bind_spread, omp_proc_bind_close);
582	  #pragma omp critical
583	  {
584	    struct place p = places_array[0].places[0];
585	    int thr = omp_get_thread_num ();
586	    printf ("#2,#2 thread 3,%d", thr);
587	    if (test_spread_master_close || test_true)
588	      /* Outer is spread, inner spread, bit we have
589		 just unit sized partitions.  */
590	      p = places_array[test_places].places[pp];
591	    print_affinity (p);
592	    printf ("\n");
593	  }
594	}
595	/* Spread, master.  */
596	#pragma omp parallel num_threads (4) proc_bind(master) \
597			     firstprivate(pp)
598	{
599	  verify (omp_proc_bind_master, omp_proc_bind_close);
600	  #pragma omp critical
601	  {
602	    struct place p = places_array[0].places[0];
603	    int thr = omp_get_thread_num ();
604	    printf ("#2,#3 thread 3,%d", thr);
605	    if (test_spread_master_close || test_true)
606	      /* Outer is spread, inner master, bit we have
607		 just unit sized partitions.  */
608	      p = places_array[test_places].places[pp];
609	    print_affinity (p);
610	    printf ("\n");
611	  }
612	}
613	/* Spread, close.  */
614	#pragma omp parallel num_threads (6) proc_bind (close) \
615			     firstprivate (pp)
616	{
617	  verify (omp_proc_bind_close, omp_proc_bind_close);
618	  #pragma omp critical
619	  {
620	    struct place p = places_array[0].places[0];
621	    int thr = omp_get_thread_num ();
622	    printf ("#2,#4 thread 3,%d", thr);
623	    if (test_spread_master_close || test_true)
624	      /* Outer is spread, inner close, bit we have
625		 just unit sized partitions.  */
626	      p = places_array[test_places].places[pp];
627	    print_affinity (p);
628	    printf ("\n");
629	  }
630	}
631      }
632  }
633
634  /* Master.  */
635  #pragma omp parallel num_threads (3) proc_bind(master)
636  {
637    verify (omp_proc_bind_master, omp_proc_bind_master);
638    #pragma omp critical
639    {
640      struct place p = places_array[0].places[0];
641      int thr = omp_get_thread_num ();
642      printf ("#3 thread %d", thr);
643      if (test_spread_master_close || test_true)
644	p = places_array[test_places].places[0];
645      print_affinity (p);
646      printf ("\n");
647    }
648    #pragma omp barrier
649    if (omp_get_thread_num () == 2)
650      {
651	/* Master, master.  */
652	#pragma omp parallel num_threads (4)
653	{
654	  verify (omp_proc_bind_master, omp_proc_bind_close);
655	  #pragma omp critical
656	  {
657	    struct place p = places_array[0].places[0];
658	    int thr = omp_get_thread_num ();
659	    printf ("#3,#1 thread 2,%d", thr);
660	    if (test_spread_master_close || test_true)
661	      /* Outer is master, inner is master.  */
662	      p = places_array[test_places].places[0];
663	    print_affinity (p);
664	    printf ("\n");
665	  }
666	}
667	/* Master, spread.  */
668	#pragma omp parallel num_threads (4) proc_bind (spread)
669	{
670	  verify (omp_proc_bind_spread, omp_proc_bind_close);
671	  #pragma omp critical
672	  {
673	    struct place p = places_array[0].places[0];
674	    int thr = omp_get_thread_num ();
675	    printf ("#3,#2 thread 2,%d", thr);
676	    if (omp_get_num_threads () == 4
677		&& (test_spread_master_close || test_true))
678	      /* Outer is master, inner is spread.  */
679	      switch (places_array[test_places].count)
680		{
681		case 8:
682		  /* T = 4, P = 8, each subpartition has 2 places.  */
683		case 7:
684		  /* T = 4, P = 7, each subpartition has 2 places, but
685		     last partition, which has just one place.  */
686		  p = places_array[test_places].places[2 * thr];
687		  break;
688		case 5:
689		  /* T = 4, P = 5, first subpartition has 2 places, the
690		     rest just one.  */
691		  p = places_array[test_places].places[thr ? 1 + thr : 0];
692		  break;
693		case 3:
694		  /* T = 4, P = 3, unit sized subpartitions, first gets
695		     thr0 and thr3, second thr1, third thr2.  */
696		  p = places_array[test_places].places[thr == 3 ? 0 : thr];
697		  break;
698		case 2:
699		  /* T = 4, P = 2, unit sized subpartitions, each with
700		     2 threads.  */
701		  p = places_array[test_places].places[thr / 2];
702		  break;
703		}
704	    print_affinity (p);
705	    printf ("\n");
706	  }
707	  #pragma omp barrier
708	  if (omp_get_thread_num () == 0)
709	    {
710	      /* Master, spread, close.  */
711	      #pragma omp parallel num_threads (5) proc_bind (close)
712	      {
713		verify (omp_proc_bind_close, omp_proc_bind_close);
714		#pragma omp critical
715		{
716		  struct place p = places_array[0].places[0];
717		  int thr = omp_get_thread_num ();
718		  printf ("#3,#2,#1 thread 2,0,%d", thr);
719		  if (omp_get_num_threads () == 5
720		      && (test_spread_master_close || test_true))
721		    /* Outer is master, inner spread, innermost close.  */
722		    switch (places_array[test_places].count)
723		      {
724		      /* First 3 are T = 5, P = 2.  */
725		      case 8:
726		      case 7:
727		      case 5:
728			p = places_array[test_places].places[(thr & 2) / 2];
729			break;
730		      /* All the rest are T = 5, P = 1.  */
731		      case 3:
732		      case 2:
733			p = places_array[test_places].places[0];
734			break;
735		      }
736		  print_affinity (p);
737		  printf ("\n");
738		}
739	      }
740	    }
741	  #pragma omp barrier
742	  if (omp_get_thread_num () == 3)
743	    {
744	      /* Master, spread, close.  */
745	      #pragma omp parallel num_threads (5) proc_bind (close)
746	      {
747		verify (omp_proc_bind_close, omp_proc_bind_close);
748		#pragma omp critical
749		{
750		  struct place p = places_array[0].places[0];
751		  int thr = omp_get_thread_num ();
752		  printf ("#3,#2,#2 thread 2,3,%d", thr);
753		  if (omp_get_num_threads () == 5
754		      && (test_spread_master_close || test_true))
755		    /* Outer is master, inner spread, innermost close.  */
756		    switch (places_array[test_places].count)
757		      {
758		      case 8:
759			/* T = 5, P = 2.  */
760			p = places_array[test_places].places[6
761							     + (thr & 2) / 2];
762			break;
763		      /* All the rest are T = 5, P = 1.  */
764		      case 7:
765			p = places_array[test_places].places[6];
766			break;
767		      case 5:
768			p = places_array[test_places].places[4];
769			break;
770		      case 3:
771			p = places_array[test_places].places[0];
772			break;
773		      case 2:
774			p = places_array[test_places].places[1];
775			break;
776		      }
777		  print_affinity (p);
778		  printf ("\n");
779		}
780	      }
781	    }
782	}
783	/* Master, master.  */
784	#pragma omp parallel num_threads (4) proc_bind(master)
785	{
786	  verify (omp_proc_bind_master, omp_proc_bind_close);
787	  #pragma omp critical
788	  {
789	    struct place p = places_array[0].places[0];
790	    int thr = omp_get_thread_num ();
791	    printf ("#3,#3 thread 2,%d", thr);
792	    if (test_spread_master_close || test_true)
793	      /* Outer is master, inner master.  */
794	      p = places_array[test_places].places[0];
795	    print_affinity (p);
796	    printf ("\n");
797	  }
798	}
799	/* Master, close.  */
800	#pragma omp parallel num_threads (6) proc_bind (close)
801	{
802	  verify (omp_proc_bind_close, omp_proc_bind_close);
803	  #pragma omp critical
804	  {
805	    struct place p = places_array[0].places[0];
806	    int thr = omp_get_thread_num ();
807	    printf ("#3,#4 thread 2,%d", thr);
808	    if (omp_get_num_threads () == 6
809		&& (test_spread_master_close || test_true))
810	      switch (places_array[test_places].count)
811		{
812		case 8:
813		  /* T = 6, P = 8.  */
814		case 7:
815		  /* T = 6, P = 7.  */
816		  p = places_array[test_places].places[thr];
817		  break;
818		case 5:
819		  /* T = 6, P = 5.  thr{0,5} go into the first place.  */
820		  p = places_array[test_places].places[thr == 5 ? 0 : thr];
821		  break;
822		case 3:
823		  /* T = 6, P = 3, two threads into each place.  */
824		  p = places_array[test_places].places[thr / 2];
825		  break;
826		case 2:
827		  /* T = 6, P = 2, 3 threads into each place.  */
828		  p = places_array[test_places].places[thr / 3];
829		  break;
830		}
831	    print_affinity (p);
832	    printf ("\n");
833	  }
834	}
835      }
836  }
837
838  #pragma omp parallel num_threads (5) proc_bind(close)
839  {
840    verify (omp_proc_bind_close, omp_proc_bind_master);
841    #pragma omp critical
842    {
843      struct place p = places_array[0].places[0];
844      int thr = omp_get_thread_num ();
845      printf ("#4 thread %d", thr);
846      if (omp_get_num_threads () == 5
847	  && (test_spread_master_close || test_true))
848	switch (places_array[test_places].count)
849	  {
850	  case 8:
851	    /* T = 5, P = 8.  */
852	  case 7:
853	    /* T = 5, P = 7.  */
854	  case 5:
855	    /* T = 5, P = 5.  */
856	    p = places_array[test_places].places[thr];
857	    break;
858	  case 3:
859	    /* T = 5, P = 3, thr{0,3} in first place, thr{1,4} in second,
860	       thr2 in third.  */
861	    p = places_array[test_places].places[thr >= 3 ? thr - 3 : thr];
862	    break;
863	  case 2:
864	    /* T = 5, P = 2, thr{0,1,4} in first place, thr{2,3} in second.  */
865	    p = places_array[test_places].places[thr == 4 ? 0 : thr / 2];
866	    break;
867	  }
868      print_affinity (p);
869      printf ("\n");
870    }
871    #pragma omp barrier
872    if (omp_get_thread_num () == 2)
873      {
874	int pp = 0;
875	switch (places_array[test_places].count)
876	  {
877	  case 8:
878	  case 7:
879	  case 5:
880	  case 3:
881	    pp = 2;
882	    break;
883	  case 2:
884	    pp = 1;
885	    break;
886	  }
887	/* Close, close/master.  */
888	#pragma omp parallel num_threads (4) firstprivate (pp)
889	{
890	  verify (omp_proc_bind_close, omp_proc_bind_close);
891	  #pragma omp critical
892	  {
893	    struct place p = places_array[0].places[0];
894	    int thr = omp_get_thread_num ();
895	    printf ("#4,#1 thread 2,%d", thr);
896	    if (test_spread_master_close)
897	      /* Outer is close, inner is master.  */
898	      p = places_array[test_places].places[pp];
899	    else if (omp_get_num_threads () == 4 && test_true)
900	      /* Outer is close, inner is close.  */
901	      switch (places_array[test_places].count)
902		{
903		case 8:
904		  /* T = 4, P = 8.  */
905		case 7:
906		  /* T = 4, P = 7.  */
907		  p = places_array[test_places].places[2 + thr];
908		  break;
909		case 5:
910		  /* T = 4, P = 5.  There is wrap-around for thr3.  */
911		  p = places_array[test_places].places[thr == 3 ? 0 : 2 + thr];
912		  break;
913		case 3:
914		  /* T = 4, P = 3, thr{0,3} go into p2, thr1 into p0, thr2
915		     into p1.  */
916		  p = places_array[test_places].places[(2 + thr) % 3];
917		  break;
918		case 2:
919		  /* T = 4, P = 2, 2 threads into each place.  */
920		  p = places_array[test_places].places[1 - thr / 2];
921		  break;
922		}
923
924	    print_affinity (p);
925	    printf ("\n");
926	  }
927	}
928	/* Close, spread.  */
929	#pragma omp parallel num_threads (4) proc_bind (spread)
930	{
931	  verify (omp_proc_bind_spread, omp_proc_bind_close);
932	  #pragma omp critical
933	  {
934	    struct place p = places_array[0].places[0];
935	    int thr = omp_get_thread_num ();
936	    printf ("#4,#2 thread 2,%d", thr);
937	    if (omp_get_num_threads () == 4
938		&& (test_spread_master_close || test_true))
939	      /* Outer is close, inner is spread.  */
940	      switch (places_array[test_places].count)
941		{
942		case 8:
943		  /* T = 4, P = 8, each subpartition has 2 places.  */
944		case 7:
945		  /* T = 4, P = 7, each subpartition has 2 places, but
946		     last partition, which has just one place.  */
947		  p = places_array[test_places].places[thr == 3 ? 0
948						       : 2 + 2 * thr];
949		  break;
950		case 5:
951		  /* T = 4, P = 5, first subpartition has 2 places, the
952		     rest just one.  */
953		  p = places_array[test_places].places[thr == 3 ? 0
954						       : 2 + thr];
955		  break;
956		case 3:
957		  /* T = 4, P = 3, unit sized subpartitions, third gets
958		     thr0 and thr3, first thr1, second thr2.  */
959		  p = places_array[test_places].places[thr == 0 ? 2 : thr - 1];
960		  break;
961		case 2:
962		  /* T = 4, P = 2, unit sized subpartitions, each with
963		     2 threads.  */
964		  p = places_array[test_places].places[1 - thr / 2];
965		  break;
966		}
967	    print_affinity (p);
968	    printf ("\n");
969	  }
970	  #pragma omp barrier
971	  if (omp_get_thread_num () == 0)
972	    {
973	      /* Close, spread, close.  */
974	      #pragma omp parallel num_threads (5) proc_bind (close)
975	      {
976		verify (omp_proc_bind_close, omp_proc_bind_close);
977		#pragma omp critical
978		{
979		  struct place p = places_array[0].places[0];
980		  int thr = omp_get_thread_num ();
981		  printf ("#4,#2,#1 thread 2,0,%d", thr);
982		  if (omp_get_num_threads () == 5
983		      && (test_spread_master_close || test_true))
984		    /* Outer is close, inner spread, innermost close.  */
985		    switch (places_array[test_places].count)
986		      {
987		      case 8:
988		      case 7:
989			/* T = 5, P = 2.  */
990			p = places_array[test_places].places[2
991							     + (thr & 2) / 2];
992			break;
993		      /* All the rest are T = 5, P = 1.  */
994		      case 5:
995		      case 3:
996			p = places_array[test_places].places[2];
997			break;
998		      case 2:
999			p = places_array[test_places].places[1];
1000			break;
1001		      }
1002		  print_affinity (p);
1003		  printf ("\n");
1004		}
1005	      }
1006	    }
1007	  #pragma omp barrier
1008	  if (omp_get_thread_num () == 2)
1009	    {
1010	      /* Close, spread, close.  */
1011	      #pragma omp parallel num_threads (5) proc_bind (close)
1012	      {
1013		verify (omp_proc_bind_close, omp_proc_bind_close);
1014		#pragma omp critical
1015		{
1016		  struct place p = places_array[0].places[0];
1017		  int thr = omp_get_thread_num ();
1018		  printf ("#4,#2,#2 thread 2,2,%d", thr);
1019		  if (omp_get_num_threads () == 5
1020		      && (test_spread_master_close || test_true))
1021		    /* Outer is close, inner spread, innermost close.  */
1022		    switch (places_array[test_places].count)
1023		      {
1024		      case 8:
1025			/* T = 5, P = 2.  */
1026			p = places_array[test_places].places[6
1027							     + (thr & 2) / 2];
1028			break;
1029		      /* All the rest are T = 5, P = 1.  */
1030		      case 7:
1031			p = places_array[test_places].places[6];
1032			break;
1033		      case 5:
1034			p = places_array[test_places].places[4];
1035			break;
1036		      case 3:
1037			p = places_array[test_places].places[1];
1038			break;
1039		      case 2:
1040			p = places_array[test_places].places[0];
1041			break;
1042		      }
1043		  print_affinity (p);
1044		  printf ("\n");
1045		}
1046	      }
1047	    }
1048	  #pragma omp barrier
1049	  if (omp_get_thread_num () == 3)
1050	    {
1051	      /* Close, spread, close.  */
1052	      #pragma omp parallel num_threads (5) proc_bind (close)
1053	      {
1054		verify (omp_proc_bind_close, omp_proc_bind_close);
1055		#pragma omp critical
1056		{
1057		  struct place p = places_array[0].places[0];
1058		  int thr = omp_get_thread_num ();
1059		  printf ("#4,#2,#3 thread 2,3,%d", thr);
1060		  if (omp_get_num_threads () == 5
1061		      && (test_spread_master_close || test_true))
1062		    /* Outer is close, inner spread, innermost close.  */
1063		    switch (places_array[test_places].count)
1064		      {
1065		      case 8:
1066		      case 7:
1067		      case 5:
1068			/* T = 5, P = 2.  */
1069			p = places_array[test_places].places[(thr & 2) / 2];
1070			break;
1071		      /* All the rest are T = 5, P = 1.  */
1072		      case 3:
1073			p = places_array[test_places].places[2];
1074			break;
1075		      case 2:
1076			p = places_array[test_places].places[0];
1077			break;
1078		      }
1079		  print_affinity (p);
1080		  printf ("\n");
1081		}
1082	      }
1083	    }
1084	}
1085	/* Close, master.  */
1086	#pragma omp parallel num_threads (4) proc_bind(master) \
1087			     firstprivate (pp)
1088	{
1089	  verify (omp_proc_bind_master, omp_proc_bind_close);
1090	  #pragma omp critical
1091	  {
1092	    struct place p = places_array[0].places[0];
1093	    int thr = omp_get_thread_num ();
1094	    printf ("#4,#3 thread 2,%d", thr);
1095	    if (test_spread_master_close || test_true)
1096	      /* Outer is close, inner master.  */
1097	      p = places_array[test_places].places[pp];
1098	    print_affinity (p);
1099	    printf ("\n");
1100	  }
1101	}
1102	/* Close, close.  */
1103	#pragma omp parallel num_threads (6) proc_bind (close)
1104	{
1105	  verify (omp_proc_bind_close, omp_proc_bind_close);
1106	  #pragma omp critical
1107	  {
1108	    struct place p = places_array[0].places[0];
1109	    int thr = omp_get_thread_num ();
1110	    printf ("#4,#4 thread 2,%d", thr);
1111	    if (omp_get_num_threads () == 6
1112		&& (test_spread_master_close || test_true))
1113	      switch (places_array[test_places].count)
1114		{
1115		case 8:
1116		  /* T = 6, P = 8.  */
1117		  p = places_array[test_places].places[2 + thr];
1118		  break;
1119		case 7:
1120		  /* T = 6, P = 7.  */
1121		  p = places_array[test_places].places[thr == 5 ? 0 : 2 + thr];
1122		  break;
1123		case 5:
1124		  /* T = 6, P = 5.  thr{0,5} go into the third place.  */
1125		  p = places_array[test_places].places[thr >= 3 ? thr - 3
1126						       : 2 + thr];
1127		  break;
1128		case 3:
1129		  /* T = 6, P = 3, two threads into each place.  */
1130		  p = places_array[test_places].places[thr < 2 ? 2
1131						       : thr / 2 - 1];
1132		  break;
1133		case 2:
1134		  /* T = 6, P = 2, 3 threads into each place.  */
1135		  p = places_array[test_places].places[1 - thr / 3];
1136		  break;
1137		}
1138	    print_affinity (p);
1139	    printf ("\n");
1140	  }
1141	}
1142      }
1143  }
1144
1145  return 0;
1146}
1147