1169695Skan/* Copyright (C) 2005 Free Software Foundation, Inc. 2169695Skan Contributed by Richard Henderson <rth@redhat.com>. 3169695Skan 4169695Skan This file is part of the GNU OpenMP Library (libgomp). 5169695Skan 6169695Skan Libgomp is free software; you can redistribute it and/or modify it 7169695Skan under the terms of the GNU Lesser General Public License as published by 8169695Skan the Free Software Foundation; either version 2.1 of the License, or 9169695Skan (at your option) any later version. 10169695Skan 11169695Skan Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY 12169695Skan WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 13169695Skan FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for 14169695Skan more details. 15169695Skan 16169695Skan You should have received a copy of the GNU Lesser General Public License 17169695Skan along with libgomp; see the file COPYING.LIB. If not, write to the 18169695Skan Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 19169695Skan MA 02110-1301, USA. */ 20169695Skan 21169695Skan/* As a special exception, if you link this library with other files, some 22169695Skan of which are compiled with GCC, to produce an executable, this library 23169695Skan does not by itself cause the resulting executable to be covered by the 24169695Skan GNU General Public License. This exception does not however invalidate 25169695Skan any other reasons why the executable file might be covered by the GNU 26169695Skan General Public License. */ 27169695Skan 28169695Skan/* This file contains routines for managing work-share iteration, both 29169695Skan for loops and sections. */ 30169695Skan 31169695Skan#include "libgomp.h" 32169695Skan#include <stdlib.h> 33169695Skan 34169695Skan 35169695Skan/* This function implements the STATIC scheduling method. The caller should 36169695Skan iterate *pstart <= x < *pend. Return zero if there are more iterations 37169695Skan to perform; nonzero if not. Return less than 0 if this thread had 38169695Skan received the absolutely last iteration. */ 39169695Skan 40169695Skanint 41169695Skangomp_iter_static_next (long *pstart, long *pend) 42169695Skan{ 43169695Skan struct gomp_thread *thr = gomp_thread (); 44169695Skan struct gomp_team *team = thr->ts.team; 45169695Skan struct gomp_work_share *ws = thr->ts.work_share; 46169695Skan unsigned long nthreads = team ? team->nthreads : 1; 47169695Skan 48169695Skan if (thr->ts.static_trip == -1) 49169695Skan return -1; 50169695Skan 51169695Skan /* Quick test for degenerate teams and orphaned constructs. */ 52169695Skan if (nthreads == 1) 53169695Skan { 54169695Skan *pstart = ws->next; 55169695Skan *pend = ws->end; 56169695Skan thr->ts.static_trip = -1; 57169695Skan return ws->next == ws->end; 58169695Skan } 59169695Skan 60169695Skan /* We interpret chunk_size zero as "unspecified", which means that we 61169695Skan should break up the iterations such that each thread makes only one 62169695Skan trip through the outer loop. */ 63169695Skan if (ws->chunk_size == 0) 64169695Skan { 65169695Skan unsigned long n, q, i; 66169695Skan unsigned long s0, e0; 67169695Skan long s, e; 68169695Skan 69169695Skan if (thr->ts.static_trip > 0) 70169695Skan return 1; 71169695Skan 72169695Skan /* Compute the total number of iterations. */ 73169695Skan s = ws->incr + (ws->incr > 0 ? -1 : 1); 74169695Skan n = (ws->end - ws->next + s) / ws->incr; 75169695Skan i = thr->ts.team_id; 76169695Skan 77169695Skan /* Compute the "zero-based" start and end points. That is, as 78169695Skan if the loop began at zero and incremented by one. */ 79169695Skan q = n / nthreads; 80169695Skan q += (q * nthreads != n); 81169695Skan s0 = q * i; 82169695Skan e0 = s0 + q; 83169695Skan if (e0 > n) 84169695Skan e0 = n; 85169695Skan 86169695Skan /* Notice when no iterations allocated for this thread. */ 87169695Skan if (s0 >= e0) 88169695Skan { 89169695Skan thr->ts.static_trip = 1; 90169695Skan return 1; 91169695Skan } 92169695Skan 93169695Skan /* Transform these to the actual start and end numbers. */ 94169695Skan s = (long)s0 * ws->incr + ws->next; 95169695Skan e = (long)e0 * ws->incr + ws->next; 96169695Skan 97169695Skan *pstart = s; 98169695Skan *pend = e; 99169695Skan thr->ts.static_trip = (e0 == n ? -1 : 1); 100169695Skan return 0; 101169695Skan } 102169695Skan else 103169695Skan { 104169695Skan unsigned long n, s0, e0, i, c; 105169695Skan long s, e; 106169695Skan 107169695Skan /* Otherwise, each thread gets exactly chunk_size iterations 108169695Skan (if available) each time through the loop. */ 109169695Skan 110169695Skan s = ws->incr + (ws->incr > 0 ? -1 : 1); 111169695Skan n = (ws->end - ws->next + s) / ws->incr; 112169695Skan i = thr->ts.team_id; 113169695Skan c = ws->chunk_size; 114169695Skan 115169695Skan /* Initial guess is a C sized chunk positioned nthreads iterations 116169695Skan in, offset by our thread number. */ 117169695Skan s0 = (thr->ts.static_trip * nthreads + i) * c; 118169695Skan e0 = s0 + c; 119169695Skan 120169695Skan /* Detect overflow. */ 121169695Skan if (s0 >= n) 122169695Skan return 1; 123169695Skan if (e0 > n) 124169695Skan e0 = n; 125169695Skan 126169695Skan /* Transform these to the actual start and end numbers. */ 127169695Skan s = (long)s0 * ws->incr + ws->next; 128169695Skan e = (long)e0 * ws->incr + ws->next; 129169695Skan 130169695Skan *pstart = s; 131169695Skan *pend = e; 132169695Skan 133169695Skan if (e0 == n) 134169695Skan thr->ts.static_trip = -1; 135169695Skan else 136169695Skan thr->ts.static_trip++; 137169695Skan return 0; 138169695Skan } 139169695Skan} 140169695Skan 141169695Skan 142169695Skan/* This function implements the DYNAMIC scheduling method. Arguments are 143169695Skan as for gomp_iter_static_next. This function must be called with ws->lock 144169695Skan held. */ 145169695Skan 146169695Skanbool 147169695Skangomp_iter_dynamic_next_locked (long *pstart, long *pend) 148169695Skan{ 149169695Skan struct gomp_thread *thr = gomp_thread (); 150169695Skan struct gomp_work_share *ws = thr->ts.work_share; 151169695Skan long start, end, chunk, left; 152169695Skan 153169695Skan start = ws->next; 154169695Skan if (start == ws->end) 155169695Skan return false; 156169695Skan 157169695Skan chunk = ws->chunk_size * ws->incr; 158169695Skan left = ws->end - start; 159169695Skan if (ws->incr < 0) 160169695Skan { 161169695Skan if (chunk < left) 162169695Skan chunk = left; 163169695Skan } 164169695Skan else 165169695Skan { 166169695Skan if (chunk > left) 167169695Skan chunk = left; 168169695Skan } 169169695Skan end = start + chunk; 170169695Skan 171169695Skan ws->next = end; 172169695Skan *pstart = start; 173169695Skan *pend = end; 174169695Skan return true; 175169695Skan} 176169695Skan 177169695Skan 178169695Skan#ifdef HAVE_SYNC_BUILTINS 179169695Skan/* Similar, but doesn't require the lock held, and uses compare-and-swap 180169695Skan instead. Note that the only memory value that changes is ws->next. */ 181169695Skan 182169695Skanbool 183169695Skangomp_iter_dynamic_next (long *pstart, long *pend) 184169695Skan{ 185169695Skan struct gomp_thread *thr = gomp_thread (); 186169695Skan struct gomp_work_share *ws = thr->ts.work_share; 187169695Skan long start, end, nend, chunk, incr; 188169695Skan 189169695Skan start = ws->next; 190169695Skan end = ws->end; 191169695Skan incr = ws->incr; 192169695Skan chunk = ws->chunk_size * incr; 193169695Skan 194169695Skan while (1) 195169695Skan { 196169695Skan long left = end - start; 197169695Skan long tmp; 198169695Skan 199169695Skan if (start == end) 200169695Skan return false; 201169695Skan 202169695Skan if (incr < 0) 203169695Skan { 204169695Skan if (chunk < left) 205169695Skan chunk = left; 206169695Skan } 207169695Skan else 208169695Skan { 209169695Skan if (chunk > left) 210169695Skan chunk = left; 211169695Skan } 212169695Skan nend = start + chunk; 213169695Skan 214169695Skan tmp = __sync_val_compare_and_swap (&ws->next, start, nend); 215169695Skan if (__builtin_expect (tmp == start, 1)) 216169695Skan break; 217169695Skan 218169695Skan start = tmp; 219169695Skan } 220169695Skan 221169695Skan *pstart = start; 222169695Skan *pend = nend; 223169695Skan return true; 224169695Skan} 225169695Skan#endif /* HAVE_SYNC_BUILTINS */ 226169695Skan 227169695Skan 228169695Skan/* This function implements the GUIDED scheduling method. Arguments are 229169695Skan as for gomp_iter_static_next. This function must be called with the 230169695Skan work share lock held. */ 231169695Skan 232169695Skanbool 233169695Skangomp_iter_guided_next_locked (long *pstart, long *pend) 234169695Skan{ 235169695Skan struct gomp_thread *thr = gomp_thread (); 236169695Skan struct gomp_work_share *ws = thr->ts.work_share; 237169695Skan struct gomp_team *team = thr->ts.team; 238169695Skan unsigned long nthreads = team ? team->nthreads : 1; 239169695Skan unsigned long n, q; 240169695Skan long start, end; 241169695Skan 242169695Skan if (ws->next == ws->end) 243169695Skan return false; 244169695Skan 245282152Spfg start = ws->next; 246282152Spfg n = (ws->end - start) / ws->incr; 247169695Skan q = (n + nthreads - 1) / nthreads; 248169695Skan 249169695Skan if (q < ws->chunk_size) 250169695Skan q = ws->chunk_size; 251282152Spfg if (q <= n) 252282152Spfg end = start + q * ws->incr; 253282152Spfg else 254282152Spfg end = ws->end; 255169695Skan 256169695Skan ws->next = end; 257169695Skan *pstart = start; 258169695Skan *pend = end; 259169695Skan return true; 260169695Skan} 261169695Skan 262169695Skan#ifdef HAVE_SYNC_BUILTINS 263169695Skan/* Similar, but doesn't require the lock held, and uses compare-and-swap 264169695Skan instead. Note that the only memory value that changes is ws->next. */ 265169695Skan 266169695Skanbool 267169695Skangomp_iter_guided_next (long *pstart, long *pend) 268169695Skan{ 269169695Skan struct gomp_thread *thr = gomp_thread (); 270169695Skan struct gomp_work_share *ws = thr->ts.work_share; 271169695Skan struct gomp_team *team = thr->ts.team; 272169695Skan unsigned long nthreads = team ? team->nthreads : 1; 273169695Skan long start, end, nend, incr; 274169695Skan unsigned long chunk_size; 275169695Skan 276169695Skan start = ws->next; 277169695Skan end = ws->end; 278169695Skan incr = ws->incr; 279169695Skan chunk_size = ws->chunk_size; 280169695Skan 281169695Skan while (1) 282169695Skan { 283169695Skan unsigned long n, q; 284169695Skan long tmp; 285169695Skan 286169695Skan if (start == end) 287169695Skan return false; 288169695Skan 289282152Spfg n = (end - start) / incr; 290169695Skan q = (n + nthreads - 1) / nthreads; 291169695Skan 292169695Skan if (q < chunk_size) 293169695Skan q = chunk_size; 294282152Spfg if (__builtin_expect (q <= n, 1)) 295282152Spfg nend = start + q * incr; 296282152Spfg else 297282152Spfg nend = end; 298169695Skan 299169695Skan tmp = __sync_val_compare_and_swap (&ws->next, start, nend); 300169695Skan if (__builtin_expect (tmp == start, 1)) 301169695Skan break; 302169695Skan 303169695Skan start = tmp; 304169695Skan } 305169695Skan 306169695Skan *pstart = start; 307169695Skan *pend = nend; 308169695Skan return true; 309169695Skan} 310169695Skan#endif /* HAVE_SYNC_BUILTINS */ 311