1/*
2    Title:      Thread functions
3    Author:     David C.J. Matthews
4
5    Copyright (c) 2007,2008,2013-15, 2017, 2019, 2020 David C.J. Matthews
6
7    This library is free software; you can redistribute it and/or
8    modify it under the terms of the GNU Lesser General Public
9    License version 2.1 as published by the Free Software Foundation.
10
11    This library is distributed in the hope that it will be useful,
12    but WITHOUT ANY WARRANTY; without even the implied warranty of
13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14    Lesser General Public License for more details.
15
16    You should have received a copy of the GNU Lesser General Public
17    License along with this library; if not, write to the Free Software
18    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19
20*/
21
22#ifdef HAVE_CONFIG_H
23#include "config.h"
24#elif defined(_WIN32)
25#include "winconfig.h"
26#else
27#error "No configuration file"
28#endif
29
30#ifdef HAVE_STDIO_H
31#include <stdio.h>
32#endif
33
34#ifdef HAVE_ERRNO_H
35#include <errno.h>
36#endif
37
38#ifdef HAVE_STDLIB_H
39#include <stdlib.h>
40#endif
41
42#ifdef HAVE_STRING_H
43#include <string.h>
44#endif
45
46#ifdef HAVE_LIMITS_H
47#include <limits.h>
48#endif
49
50#ifdef HAVE_ASSERT_H
51#include <assert.h>
52#define ASSERT(x) assert(x)
53#else
54#define ASSERT(x)
55#endif
56
57#ifdef HAVE_PROCESS_H
58#include <process.h>
59#endif
60
61#ifdef HAVE_SYS_TYPES_H
62#include <sys/types.h>
63#endif
64
65#ifdef HAVE_SYS_STAT_H
66#include <sys/stat.h>
67#endif
68
69#ifdef HAVE_SYS_TIME_H
70#include <sys/time.h>
71#endif
72
73#ifdef HAVE_UNISTD_H
74#include <unistd.h> // Want unistd for _SC_NPROCESSORS_ONLN at least
75#endif
76
77#ifdef HAVE_SYS_SELECT_H
78#include <sys/select.h>
79#endif
80
81#ifdef HAVE_WINDOWS_H
82#include <windows.h>
83#endif
84
85#if (!defined(_WIN32))
86#include <pthread.h>
87#endif
88
89#ifdef HAVE_SYS_SYSCTL_H
90// Used determine number of processors in Mac OS X.
91#include <sys/sysctl.h>
92#endif
93
94#if (defined(_WIN32))
95#include <tchar.h>
96#endif
97
98#include <new>
99#include <vector>
100
101/************************************************************************
102 *
103 * Include runtime headers
104 *
105 ************************************************************************/
106
107#include "globals.h"
108#include "gc.h"
109#include "mpoly.h"
110#include "arb.h"
111#include "machine_dep.h"
112#include "diagnostics.h"
113#include "processes.h"
114#include "run_time.h"
115#include "sys.h"
116#include "sighandler.h"
117#include "scanaddrs.h"
118#include "save_vec.h"
119#include "rts_module.h"
120#include "noreturn.h"
121#include "memmgr.h"
122#include "locking.h"
123#include "profiling.h"
124#include "sharedata.h"
125#include "exporter.h"
126#include "statistics.h"
127#include "rtsentry.h"
128#include "gc_progress.h"
129
130extern "C" {
131    POLYEXTERNALSYMBOL POLYUNSIGNED PolyThreadKillSelf(FirstArgument threadId);
132    POLYEXTERNALSYMBOL POLYUNSIGNED PolyThreadMutexBlock(FirstArgument threadId, PolyWord arg);
133    POLYEXTERNALSYMBOL POLYUNSIGNED PolyThreadMutexUnlock(FirstArgument threadId, PolyWord arg);
134    POLYEXTERNALSYMBOL POLYUNSIGNED PolyThreadCondVarWait(FirstArgument threadId, PolyWord arg);
135    POLYEXTERNALSYMBOL POLYUNSIGNED PolyThreadCondVarWaitUntil(FirstArgument threadId, PolyWord lockArg, PolyWord timeArg);
136    POLYEXTERNALSYMBOL POLYUNSIGNED PolyThreadCondVarWake(PolyWord targetThread);
137    POLYEXTERNALSYMBOL POLYUNSIGNED PolyThreadForkThread(FirstArgument threadId, PolyWord function, PolyWord attrs, PolyWord stack);
138    POLYEXTERNALSYMBOL POLYUNSIGNED PolyThreadIsActive(PolyWord targetThread);
139    POLYEXTERNALSYMBOL POLYUNSIGNED PolyThreadInterruptThread(PolyWord targetThread);
140    POLYEXTERNALSYMBOL POLYUNSIGNED PolyThreadKillThread(PolyWord targetThread);
141    POLYEXTERNALSYMBOL POLYUNSIGNED PolyThreadBroadcastInterrupt(FirstArgument threadId);
142    POLYEXTERNALSYMBOL POLYUNSIGNED PolyThreadTestInterrupt(FirstArgument threadId);
143    POLYEXTERNALSYMBOL POLYUNSIGNED PolyThreadNumProcessors();
144    POLYEXTERNALSYMBOL POLYUNSIGNED PolyThreadNumPhysicalProcessors();
145    POLYEXTERNALSYMBOL POLYUNSIGNED PolyThreadMaxStackSize(FirstArgument threadId, PolyWord newSize);
146}
147
148#define SAVE(x) taskData->saveVec.push(x)
149#define SIZEOF(x) (sizeof(x)/sizeof(PolyWord))
150
151// These values are stored in the second word of thread id object as
152// a tagged integer.  They may be set and read by the thread in the ML
153// code.
154#define PFLAG_BROADCAST     1   // If set, accepts a broadcast
155// How to handle interrrupts
156#define PFLAG_IGNORE        0   // Ignore interrupts completely
157#define PFLAG_SYNCH         2   // Handle synchronously
158#define PFLAG_ASYNCH        4   // Handle asynchronously
159#define PFLAG_ASYNCH_ONCE   6   // First handle asynchronously then switch to synch.
160#define PFLAG_INTMASK       6   // Mask of the above bits
161
162struct _entrypts processesEPT[] =
163{
164    { "PolyThreadKillSelf",             (polyRTSFunction)&PolyThreadKillSelf},
165    { "PolyThreadMutexBlock",           (polyRTSFunction)&PolyThreadMutexBlock},
166    { "PolyThreadMutexUnlock",          (polyRTSFunction)&PolyThreadMutexUnlock},
167    { "PolyThreadCondVarWait",          (polyRTSFunction)&PolyThreadCondVarWait},
168    { "PolyThreadCondVarWaitUntil",     (polyRTSFunction)&PolyThreadCondVarWaitUntil},
169    { "PolyThreadCondVarWake",          (polyRTSFunction)&PolyThreadCondVarWake},
170    { "PolyThreadForkThread",           (polyRTSFunction)&PolyThreadForkThread},
171    { "PolyThreadIsActive",             (polyRTSFunction)&PolyThreadIsActive},
172    { "PolyThreadInterruptThread",      (polyRTSFunction)&PolyThreadInterruptThread},
173    { "PolyThreadKillThread",           (polyRTSFunction)&PolyThreadKillThread},
174    { "PolyThreadBroadcastInterrupt",   (polyRTSFunction)&PolyThreadBroadcastInterrupt},
175    { "PolyThreadTestInterrupt",        (polyRTSFunction)&PolyThreadTestInterrupt},
176    { "PolyThreadNumProcessors",        (polyRTSFunction)&PolyThreadNumProcessors},
177    { "PolyThreadNumPhysicalProcessors",(polyRTSFunction)&PolyThreadNumPhysicalProcessors},
178    { "PolyThreadMaxStackSize",         (polyRTSFunction)&PolyThreadMaxStackSize},
179
180    { NULL, NULL} // End of list.
181};
182
183class Processes: public ProcessExternal, public RtsModule
184{
185public:
186    Processes();
187    // RtsModule overrides
188    virtual void Init(void);
189    virtual void Stop(void);
190    virtual void GarbageCollect(ScanAddress *process);
191    virtual void ForkChild(void) { singleThreaded = true;  } // After a Unix fork this is single threaded
192public:
193    void BroadcastInterrupt(void);
194    void BeginRootThread(PolyObject *rootFunction);
195    void RequestProcessExit(int n); // Request all ML threads to exit and set the process result code.
196    // Called when a thread has completed - doesn't return.
197    virtual NORETURNFN(void ThreadExit(TaskData *taskData));
198
199    // Called when a thread may block.  Returns some time later when perhaps
200    // the input is available.
201    virtual void ThreadPauseForIO(TaskData *taskData, Waiter *pWait);
202    // Return the task data for the current thread.
203    virtual TaskData *GetTaskDataForThread(void);
204    // Create a new task data object for the current thread.
205    virtual TaskData *CreateNewTaskData(Handle threadId, Handle threadFunction,
206                           Handle args, PolyWord flags);
207    // ForkFromRTS.  Creates a new thread from within the RTS.
208    virtual bool ForkFromRTS(TaskData *taskData, Handle proc, Handle arg);
209    // Create a new thread.  The "args" argument is only used for threads
210    // created in the RTS by the signal handler.
211    Handle ForkThread(TaskData *taskData, Handle threadFunction,
212                    Handle args, PolyWord flags, PolyWord stacksize);
213    // Process general RTS requests from ML.
214    Handle ThreadDispatch(TaskData *taskData, Handle args, Handle code);
215
216    virtual void ThreadUseMLMemory(TaskData *taskData);
217    virtual void ThreadReleaseMLMemory(TaskData *taskData);
218
219    virtual poly_exn* GetInterrupt(void) { return interrupt_exn; }
220
221    // If the schedule lock is already held we need to use these functions.
222    void ThreadUseMLMemoryWithSchedLock(TaskData *taskData);
223    void ThreadReleaseMLMemoryWithSchedLock(TaskData *taskData);
224
225    // Requests from the threads for actions that need to be performed by
226    // the root thread. Make the request and wait until it has completed.
227    virtual void MakeRootRequest(TaskData *taskData, MainThreadRequest *request);
228
229    // Deal with any interrupt or kill requests.
230    virtual bool ProcessAsynchRequests(TaskData *taskData);
231    // Process an interrupt request synchronously.
232    virtual void TestSynchronousRequests(TaskData *taskData);
233    // Process any events, synchronous or asynchronous.
234    virtual void TestAnyEvents(TaskData *taskData);
235
236    // Set a thread to be interrupted or killed.  Wakes up the
237    // thread if necessary.  MUST be called with schedLock held.
238    void MakeRequest(TaskData *p, ThreadRequests request);
239
240    // Profiling control.
241    virtual void StartProfiling(void);
242    virtual void StopProfiling(void);
243
244#ifdef HAVE_WINDOWS_H
245    // Windows: Called every millisecond while profiling is on.
246    void ProfileInterrupt(void);
247#else
248    // Unix: Start a profile timer for a thread.
249    void StartProfilingTimer(void);
250#endif
251    // Memory allocation.  Tries to allocate space.  If the allocation succeeds it
252    // may update the allocation values in the taskData object.  If the heap is exhausted
253    // it may set this thread (or other threads) to raise an exception.
254    PolyWord *FindAllocationSpace(TaskData *taskData, POLYUNSIGNED words, bool alwaysInSeg);
255
256    // Get the task data value from the task reference.
257    // The task data reference is a volatile ref containing the
258    // address of the C++ task data.
259    // N.B.  This is updated when the thread exits and the TaskData object
260    // is deleted.
261    TaskData *TaskForIdentifier(PolyObject *taskId) {
262        return *(TaskData**)(((ThreadObject*)taskId)->threadRef.AsObjPtr());
263    }
264
265    // Signal handling support.  The ML signal handler thread blocks until it is
266    // woken up by the signal detection thread.
267    virtual bool WaitForSignal(TaskData *taskData, PLock *sigLock);
268    virtual void SignalArrived(void);
269
270    // Operations on mutexes
271    void MutexBlock(TaskData *taskData, Handle hMutex);
272    void MutexUnlock(TaskData *taskData, Handle hMutex);
273
274    // Operations on condition variables.
275    void WaitInfinite(TaskData *taskData, Handle hMutex);
276    void WaitUntilTime(TaskData *taskData, Handle hMutex, Handle hTime);
277    bool WakeThread(PolyObject *targetThread);
278
279    // Generally, the system runs with multiple threads.  After a
280    // fork, though, there is only one thread.
281    bool singleThreaded;
282
283    // Each thread has an entry in this vector.
284    std::vector<TaskData*> taskArray;
285
286    /* schedLock: This lock must be held when making scheduling decisions.
287       It must also be held before adding items to taskArray, removing
288       them or scanning the vector.
289       It must also be held before deleting a TaskData object
290       or using it in a thread other than the "owner"  */
291    PLock schedLock;
292#if (!defined(_WIN32))
293    pthread_key_t tlsId;
294#else
295    DWORD tlsId;
296#endif
297
298    // We make an exception packet for Interrupt and store it here.
299    // This exception can be raised if we run out of store so we need to
300    // make sure we have the packet before we do.
301    poly_exn *interrupt_exn;
302
303    /* initialThreadWait: The initial thread waits on this for
304       wake-ups from the ML threads requesting actions such as GC or
305       close-down. */
306    PCondVar initialThreadWait;
307    // A requesting thread sets this to indicate the request.  This value
308    // is only reset once the request has been satisfied.
309    MainThreadRequest *threadRequest;
310
311    PCondVar mlThreadWait;  // All the threads block on here until the request has completed.
312
313    int exitResult;
314    bool exitRequest;
315
316#ifdef HAVE_WINDOWS_H /* Windows including Cygwin */
317    // Used in profiling
318    HANDLE hStopEvent; /* Signalled to stop all threads. */
319    HANDLE profilingHd;
320    HANDLE mainThreadHandle; // Handle for main thread
321    LONGLONG lastCPUTime; // CPU used by main thread.
322#endif
323
324    TaskData *sigTask;  // Pointer to current signal task.
325};
326
327// Global process data.
328static Processes processesModule;
329ProcessExternal *processes = &processesModule;
330
331Processes::Processes(): singleThreaded(false),
332    schedLock("Scheduler"), interrupt_exn(0),
333    threadRequest(0), exitResult(0), exitRequest(false), sigTask(0)
334{
335#ifdef HAVE_WINDOWS_H
336    hStopEvent = NULL;
337    profilingHd = NULL;
338    lastCPUTime = 0;
339    mainThreadHandle = NULL;
340#endif
341}
342
343enum _mainThreadPhase mainThreadPhase = MTP_USER_CODE;
344
345// Get the attribute flags.
346static POLYUNSIGNED ThreadAttrs(TaskData *taskData)
347{
348    return UNTAGGED_UNSIGNED(taskData->threadObject->flags);
349}
350
351POLYUNSIGNED PolyThreadMutexBlock(FirstArgument threadId, PolyWord arg)
352{
353    TaskData *taskData = TaskData::FindTaskForId(threadId);
354    ASSERT(taskData != 0);
355    taskData->PreRTSCall();
356    Handle reset = taskData->saveVec.mark();
357    Handle pushedArg = taskData->saveVec.push(arg);
358
359    if (profileMode == kProfileMutexContention)
360        taskData->addProfileCount(1);
361
362    try {
363        processesModule.MutexBlock(taskData, pushedArg);
364    }
365    catch (KillException &) {
366        processes->ThreadExit(taskData); // TestSynchronousRequests may test for kill
367    }
368    catch (...) { } // If an ML exception is raised
369
370    taskData->saveVec.reset(reset);
371    taskData->PostRTSCall();
372    return TAGGED(0).AsUnsigned();
373}
374
375POLYUNSIGNED PolyThreadMutexUnlock(FirstArgument threadId, PolyWord arg)
376{
377    TaskData *taskData = TaskData::FindTaskForId(threadId);
378    ASSERT(taskData != 0);
379    taskData->PreRTSCall();
380    Handle reset = taskData->saveVec.mark();
381    Handle pushedArg = taskData->saveVec.push(arg);
382
383    try {
384        processesModule.MutexUnlock(taskData, pushedArg);
385    }
386    catch (KillException &) {
387        processes->ThreadExit(taskData); // TestSynchronousRequests may test for kill
388    }
389    catch (...) { } // If an ML exception is raised
390
391    taskData->saveVec.reset(reset);
392    taskData->PostRTSCall();
393    return TAGGED(0).AsUnsigned();
394}
395
396/* A mutex was locked i.e. the count was ~1 or less.  We will have set it to
397  ~1. This code blocks if the count is still ~1.  It does actually return
398  if another thread tries to lock the mutex and hasn't yet set the value
399  to ~1 but that doesn't matter since whenever we return we simply try to
400  get the lock again. */
401void Processes::MutexBlock(TaskData *taskData, Handle hMutex)
402{
403    PLocker lock(&schedLock);
404    // We have to check the value again with schedLock held rather than
405    // simply waiting because otherwise the unlocking thread could have
406    // set the variable back to 0 (unlocked) and signalled any waiters
407    // before we actually got to wait.
408    if (UNTAGGED(DEREFHANDLE(hMutex)->Get(0)) > 1)
409    {
410        // Set this so we can see what we're blocked on.
411        taskData->blockMutex = DEREFHANDLE(hMutex);
412        // Now release the ML memory.  A GC can start.
413        ThreadReleaseMLMemoryWithSchedLock(taskData);
414        // Wait until we're woken up.  We mustn't block if we have been
415        // interrupted, and are processing interrupts asynchronously, or
416        // we've been killed.
417        switch (taskData->requests)
418        {
419        case kRequestKill:
420            // We've been killed.  Handle this later.
421            break;
422        case kRequestInterrupt:
423            {
424                // We've been interrupted.
425                POLYUNSIGNED attrs = ThreadAttrs(taskData) & PFLAG_INTMASK;
426                if (attrs == PFLAG_ASYNCH || attrs == PFLAG_ASYNCH_ONCE)
427                    break;
428                // If we're ignoring interrupts or handling them synchronously
429                // we don't do anything here.
430            }
431        case kRequestNone:
432            globalStats.incCount(PSC_THREADS_WAIT_MUTEX);
433            taskData->threadLock.Wait(&schedLock);
434            globalStats.decCount(PSC_THREADS_WAIT_MUTEX);
435        }
436        taskData->blockMutex = 0; // No longer blocked.
437        ThreadUseMLMemoryWithSchedLock(taskData);
438    }
439    // Test to see if we have been interrupted and if this thread
440    // processes interrupts asynchronously we should raise an exception
441    // immediately.  Perhaps we do that whenever we exit from the RTS.
442}
443
444/* Unlock a mutex.  Called after decrementing the count and discovering
445   that at least one other thread has tried to lock it.  We may need
446   to wake up threads that are blocked. */
447void Processes::MutexUnlock(TaskData *taskData, Handle hMutex)
448{
449    // The caller has already set the variable to 1 (unlocked).
450    // We need to acquire schedLock so that we can
451    // be sure that any thread that is trying to lock sees either
452    // the updated value (and so doesn't wait) or has successfully
453    // waited on its threadLock (and so will be woken up).
454    PLocker lock(&schedLock);
455    // Unlock any waiters.
456    for (std::vector<TaskData*>::iterator i = taskArray.begin(); i != taskArray.end(); i++)
457    {
458        TaskData *p = *i;
459        // If the thread is blocked on this mutex we can signal the thread.
460        if (p && p->blockMutex == DEREFHANDLE(hMutex))
461            p->threadLock.Signal();
462    }
463}
464
465POLYUNSIGNED PolyThreadCondVarWait(FirstArgument threadId, PolyWord arg)
466{
467    TaskData *taskData = TaskData::FindTaskForId(threadId);
468    ASSERT(taskData != 0);
469    taskData->PreRTSCall();
470    Handle reset = taskData->saveVec.mark();
471    Handle pushedArg = taskData->saveVec.push(arg);
472
473    try {
474        processesModule.WaitInfinite(taskData, pushedArg);
475    }
476    catch (KillException &) {
477        processes->ThreadExit(taskData); // TestSynchronousRequests may test for kill
478    }
479    catch (...) { } // If an ML exception is raised
480
481    taskData->saveVec.reset(reset);
482    taskData->PostRTSCall();
483    return TAGGED(0).AsUnsigned();
484}
485
486POLYUNSIGNED PolyThreadCondVarWaitUntil(FirstArgument threadId, PolyWord lockArg, PolyWord timeArg)
487{
488    TaskData *taskData = TaskData::FindTaskForId(threadId);
489    ASSERT(taskData != 0);
490    taskData->PreRTSCall();
491    Handle reset = taskData->saveVec.mark();
492    Handle pushedLockArg = taskData->saveVec.push(lockArg);
493    Handle pushedTimeArg = taskData->saveVec.push(timeArg);
494
495    try {
496        processesModule.WaitUntilTime(taskData, pushedLockArg, pushedTimeArg);
497    }
498    catch (KillException &) {
499        processes->ThreadExit(taskData); // TestSynchronousRequests may test for kill
500    }
501    catch (...) { } // If an ML exception is raised
502
503    taskData->saveVec.reset(reset);
504    taskData->PostRTSCall();
505    return TAGGED(0).AsUnsigned();
506}
507
508// Atomically drop a mutex and wait for a wake up.
509// It WILL NOT RAISE AN EXCEPTION unless it is set to handle exceptions
510// asynchronously (which it shouldn't do if the ML caller code is correct).
511// It may return as a result of any of the following:
512//      an explicit wake up.
513//      an interrupt, either direct or broadcast
514//      a trap i.e. a request to handle an asynchronous event.
515void Processes::WaitInfinite(TaskData *taskData, Handle hMutex)
516{
517    PLocker lock(&schedLock);
518    // Atomically release the mutex.  This is atomic because we hold schedLock
519    // so no other thread can call signal or broadcast.
520    Handle decrResult = taskData->AtomicDecrement(hMutex);
521    if (UNTAGGED(decrResult->Word()) != 0)
522    {
523        taskData->AtomicReset(hMutex);
524        // The mutex was locked so we have to release any waiters.
525        // Unlock any waiters.
526        for (std::vector<TaskData*>::iterator i = taskArray.begin(); i != taskArray.end(); i++)
527        {
528            TaskData *p = *i;
529            // If the thread is blocked on this mutex we can signal the thread.
530            if (p && p->blockMutex == DEREFHANDLE(hMutex))
531                p->threadLock.Signal();
532        }
533    }
534    // Wait until we're woken up.  Don't block if we have been interrupted
535    // or killed.
536    if (taskData->requests == kRequestNone)
537    {
538        // Now release the ML memory.  A GC can start.
539        ThreadReleaseMLMemoryWithSchedLock(taskData);
540        globalStats.incCount(PSC_THREADS_WAIT_CONDVAR);
541        taskData->threadLock.Wait(&schedLock);
542        globalStats.decCount(PSC_THREADS_WAIT_CONDVAR);
543        // We want to use the memory again.
544        ThreadUseMLMemoryWithSchedLock(taskData);
545    }
546}
547
548// Atomically drop a mutex and wait for a wake up or a time to wake up
549void Processes::WaitUntilTime(TaskData *taskData, Handle hMutex, Handle hWakeTime)
550{
551    // Convert the time into the correct format for WaitUntil before acquiring
552    // schedLock.  div_longc could do a GC which requires schedLock.
553#if (defined(_WIN32))
554    // On Windows it is the number of 100ns units since the epoch
555    FILETIME tWake;
556    getFileTimeFromArb(taskData, hWakeTime, &tWake);
557#else
558    // Unix style times.
559    struct timespec tWake;
560    // On Unix we represent times as a number of microseconds.
561    Handle hMillion = Make_arbitrary_precision(taskData, 1000000);
562    tWake.tv_sec =
563        get_C_ulong(taskData, DEREFWORD(div_longc(taskData, hMillion, hWakeTime)));
564    tWake.tv_nsec =
565        1000*get_C_ulong(taskData, DEREFWORD(rem_longc(taskData, hMillion, hWakeTime)));
566#endif
567    PLocker lock(&schedLock);
568    // Atomically release the mutex.  This is atomic because we hold schedLock
569    // so no other thread can call signal or broadcast.
570    Handle decrResult = taskData->AtomicDecrement(hMutex);
571    if (UNTAGGED(decrResult->Word()) != 0)
572    {
573        taskData->AtomicReset(hMutex);
574        // The mutex was locked so we have to release any waiters.
575        // Unlock any waiters.
576        for (std::vector<TaskData*>::iterator i = taskArray.begin(); i != taskArray.end(); i++)
577        {
578            TaskData *p = *i;
579            // If the thread is blocked on this mutex we can signal the thread.
580            if (p && p->blockMutex == DEREFHANDLE(hMutex))
581                p->threadLock.Signal();
582        }
583    }
584    // Wait until we're woken up.  Don't block if we have been interrupted
585    // or killed.
586    if (taskData->requests == kRequestNone)
587    {
588        // Now release the ML memory.  A GC can start.
589        ThreadReleaseMLMemoryWithSchedLock(taskData);
590        globalStats.incCount(PSC_THREADS_WAIT_CONDVAR);
591        (void)taskData->threadLock.WaitUntil(&schedLock, &tWake);
592        globalStats.decCount(PSC_THREADS_WAIT_CONDVAR);
593        // We want to use the memory again.
594        ThreadUseMLMemoryWithSchedLock(taskData);
595    }
596}
597
598bool Processes::WakeThread(PolyObject *targetThread)
599{
600    bool result = false; // Default to failed.
601    // Acquire the schedLock first.  This ensures that this is
602    // atomic with respect to waiting.
603    PLocker lock(&schedLock);
604    TaskData *p = TaskForIdentifier(targetThread);
605    if (p && p->threadObject == targetThread)
606    {
607        POLYUNSIGNED attrs = ThreadAttrs(p) & PFLAG_INTMASK;
608        if (p->requests == kRequestNone ||
609            (p->requests == kRequestInterrupt && attrs == PFLAG_IGNORE))
610        {
611            p->threadLock.Signal();
612            result = true;
613        }
614    }
615    return result;
616}
617
618POLYUNSIGNED PolyThreadCondVarWake(PolyWord targetThread)
619{
620    if (processesModule.WakeThread(targetThread.AsObjPtr()))
621        return TAGGED(1).AsUnsigned();
622    else return TAGGED(0).AsUnsigned();
623}
624
625// Test if a thread is active.
626POLYUNSIGNED PolyThreadIsActive(PolyWord targetThread)
627{
628    // There's a race here: the thread may be exiting but since we're not doing
629    // anything with the TaskData object we don't need a lock.
630    TaskData *p = processesModule.TaskForIdentifier(targetThread.AsObjPtr());
631    if (p != 0) return TAGGED(1).AsUnsigned();
632    else return TAGGED(0).AsUnsigned();
633}
634
635// Send an interrupt to a specific thread
636POLYUNSIGNED PolyThreadInterruptThread(PolyWord targetThread)
637{
638    // Must lock here because the thread may be exiting.
639    processesModule.schedLock.Lock();
640    TaskData *p = processesModule.TaskForIdentifier(targetThread.AsObjPtr());
641    if (p) processesModule.MakeRequest(p, kRequestInterrupt);
642    processesModule.schedLock.Unlock();
643    // If the thread cannot be identified return false.
644    // The caller can then raise an exception
645    if (p == 0) return TAGGED(0).AsUnsigned();
646    else return TAGGED(1).AsUnsigned();
647}
648
649// Kill a specific thread
650POLYUNSIGNED PolyThreadKillThread(PolyWord targetThread)
651{
652    processesModule.schedLock.Lock();
653    TaskData *p = processesModule.TaskForIdentifier(targetThread.AsObjPtr());
654    if (p) processesModule.MakeRequest(p, kRequestKill);
655    processesModule.schedLock.Unlock();
656    // If the thread cannot be identified return false.
657    // The caller can then raise an exception
658    if (p == 0) return TAGGED(0).AsUnsigned();
659    else return TAGGED(1).AsUnsigned();
660}
661
662POLYUNSIGNED PolyThreadBroadcastInterrupt(FirstArgument /*threadId*/)
663{
664    processesModule.BroadcastInterrupt();
665    return TAGGED(0).AsUnsigned();
666}
667
668POLYUNSIGNED PolyThreadTestInterrupt(FirstArgument threadId)
669{
670    TaskData *taskData = TaskData::FindTaskForId(threadId);
671    ASSERT(taskData != 0);
672    taskData->PreRTSCall();
673    Handle reset = taskData->saveVec.mark();
674
675    try {
676        processesModule.TestSynchronousRequests(taskData);
677        // Also process any asynchronous requests that may be pending.
678        // These will be handled "soon" but if we have just switched from deferring
679        // interrupts this guarantees that any deferred interrupts will be handled now.
680        if (processesModule.ProcessAsynchRequests(taskData))
681            throw IOException();
682    }
683    catch (KillException &) {
684        processes->ThreadExit(taskData); // TestSynchronousRequests may test for kill
685    }
686    catch (...) { } // If an ML exception is raised
687
688    taskData->saveVec.reset(reset);
689    taskData->PostRTSCall();
690    return TAGGED(0).AsUnsigned();
691}
692
693// Return the number of processors.
694// Returns 1 if there is any problem.
695POLYUNSIGNED PolyThreadNumProcessors(void)
696{
697    return TAGGED(NumberOfProcessors()).AsUnsigned();
698}
699
700// Return the number of physical processors.
701// Returns 0 if there is any problem.
702POLYUNSIGNED PolyThreadNumPhysicalProcessors(void)
703{
704    return TAGGED(NumberOfPhysicalProcessors()).AsUnsigned();
705}
706
707// Set the maximum stack size.
708POLYUNSIGNED PolyThreadMaxStackSize(FirstArgument threadId, PolyWord newSize)
709{
710    TaskData *taskData = TaskData::FindTaskForId(threadId);
711    ASSERT(taskData != 0);
712    taskData->PreRTSCall();
713    Handle reset = taskData->saveVec.mark();
714
715    try {
716            taskData->threadObject->mlStackSize = newSize;
717            if (newSize != TAGGED(0))
718            {
719                uintptr_t current = taskData->currentStackSpace(); // Current size in words
720                uintptr_t newWords = getPolyUnsigned(taskData, newSize);
721                if (current > newWords)
722                    raise_exception0(taskData, EXC_interrupt);
723            }
724    }
725    catch (KillException &) {
726        processes->ThreadExit(taskData); // TestSynchronousRequests may test for kill
727    }
728    catch (...) { } // If an ML exception is raised
729
730    taskData->saveVec.reset(reset);
731    taskData->PostRTSCall();
732    return TAGGED(0).AsUnsigned();
733}
734
735// Old dispatch function.  This is only required because the pre-built compiler
736// may use some of these e.g. fork.
737Handle Processes::ThreadDispatch(TaskData *taskData, Handle args, Handle code)
738{
739    unsigned c = get_C_unsigned(taskData, code->Word());
740    TaskData *ptaskData = taskData;
741    switch (c)
742    {
743    case 1:
744        MutexBlock(taskData, args);
745        return SAVE(TAGGED(0));
746
747    case 2:
748        MutexUnlock(taskData, args);
749        return SAVE(TAGGED(0));
750
751    case 7: // Fork a new thread.  The arguments are the function to run and the attributes.
752            return ForkThread(ptaskData, SAVE(args->WordP()->Get(0)),
753                        (Handle)0, args->WordP()->Get(1),
754                        // For backwards compatibility we check the length here
755                        args->WordP()->Length() <= 2 ? TAGGED(0) : args->WordP()->Get(2));
756
757    case 10: // Broadcast an interrupt to all threads that are interested.
758        BroadcastInterrupt();
759        return SAVE(TAGGED(0));
760
761    default:
762        {
763            char msg[100];
764            sprintf(msg, "Unknown thread function: %u", c);
765            raise_fail(taskData, msg);
766            return 0;
767        }
768    }
769}
770
771// Fill unused allocation space with a dummy object to preserve the invariant
772// that memory is always valid.
773void TaskData::FillUnusedSpace(void)
774{
775    if (allocPointer > allocLimit)
776        gMem.FillUnusedSpace(allocLimit, allocPointer-allocLimit);
777}
778
779
780TaskData::TaskData(): allocPointer(0), allocLimit(0), allocSize(MIN_HEAP_SIZE), allocCount(0),
781        stack(0), threadObject(0), signalStack(0),
782        inML(false), requests(kRequestNone), blockMutex(0), inMLHeap(false),
783        runningProfileTimer(false)
784{
785#ifdef HAVE_WINDOWS_H
786    lastCPUTime = 0;
787#endif
788#ifdef HAVE_WINDOWS_H
789    threadHandle = 0;
790#endif
791    threadExited = false;
792}
793
794TaskData::~TaskData()
795{
796    if (signalStack) free(signalStack);
797    if (stack) gMem.DeleteStackSpace(stack);
798#ifdef HAVE_WINDOWS_H
799    if (threadHandle) CloseHandle(threadHandle);
800#endif
801}
802
803// Broadcast an interrupt to all relevant threads.
804void Processes::BroadcastInterrupt(void)
805{
806    // If a thread is set to accept broadcast interrupts set it to
807    // "interrupted".
808    PLocker lock(&schedLock);
809    for (std::vector<TaskData*>::iterator i = taskArray.begin(); i != taskArray.end(); i++)
810    {
811        TaskData *p = *i;
812        if (p)
813        {
814            POLYUNSIGNED attrs = ThreadAttrs(p);
815            if (attrs & PFLAG_BROADCAST)
816                MakeRequest(p, kRequestInterrupt);
817        }
818    }
819}
820
821// Set the asynchronous request variable for the thread.  Must be called
822// with the schedLock held.  Tries to wake the thread up if possible.
823void Processes::MakeRequest(TaskData *p, ThreadRequests request)
824{
825    // We don't override a request to kill by an interrupt request.
826    if (p->requests < request)
827    {
828        p->requests = request;
829        p->InterruptCode();
830        p->threadLock.Signal();
831        // Set the value in the ML object as well so the ML code can see it
832        p->threadObject->requestCopy = TAGGED(request);
833    }
834}
835
836void Processes::ThreadExit(TaskData *taskData)
837{
838    if (debugOptions & DEBUG_THREADS)
839        Log("THREAD: Thread %p exiting\n", taskData);
840
841#if (!defined(_WIN32))
842    // Block any profile interrupt from now on.  We're deleting the ML stack for this thread.
843    sigset_t block_sigs;
844    sigemptyset(&block_sigs);
845    sigaddset(&block_sigs, SIGVTALRM);
846    pthread_sigmask(SIG_BLOCK, &block_sigs, NULL);
847    // Remove the thread-specific data since it's no
848    // longer valid.
849    pthread_setspecific(tlsId, 0);
850#endif
851
852    if (singleThreaded) finish(0);
853
854    schedLock.Lock();
855    ThreadReleaseMLMemoryWithSchedLock(taskData); // Allow a GC if it was waiting for us.
856    taskData->threadExited = true;
857    initialThreadWait.Signal(); // Tell it we've finished.
858    schedLock.Unlock();
859#if (!defined(_WIN32))
860    pthread_exit(0);
861#else
862    ExitThread(0);
863#endif
864}
865
866// These two functions are used for calls from outside where
867// the lock has not yet been acquired.
868void Processes::ThreadUseMLMemory(TaskData *taskData)
869{
870    // Trying to acquire the lock here may block if a GC is in progress
871    PLocker lock(&schedLock);
872    ThreadUseMLMemoryWithSchedLock(taskData);
873}
874
875void Processes::ThreadReleaseMLMemory(TaskData *taskData)
876{
877    PLocker lock(&schedLock);
878    ThreadReleaseMLMemoryWithSchedLock(taskData);
879}
880
881// Called when a thread wants to resume using the ML heap.  That could
882// be after a wait for some reason or after executing some foreign code.
883// Since there could be a GC in progress already at this point we may either
884// be blocked waiting to acquire schedLock or we may need to wait until
885// we are woken up at the end of the GC.
886void Processes::ThreadUseMLMemoryWithSchedLock(TaskData *taskData)
887{
888    TaskData *ptaskData = taskData;
889    // If there is a request outstanding we have to wait for it to
890    // complete.  We notify the root thread and wait for it.
891    while (threadRequest != 0)
892    {
893        initialThreadWait.Signal();
894        // Wait for the GC to happen
895        mlThreadWait.Wait(&schedLock);
896    }
897    ASSERT(! ptaskData->inMLHeap);
898    ptaskData->inMLHeap = true;
899}
900
901// Called to indicate that the thread has temporarily finished with the
902// ML memory either because it is going to wait for something or because
903// it is going to run foreign code.  If there is an outstanding GC request
904// that can proceed.
905void Processes::ThreadReleaseMLMemoryWithSchedLock(TaskData *taskData)
906{
907    TaskData *ptaskData = taskData;
908    ASSERT(ptaskData->inMLHeap);
909    ptaskData->inMLHeap = false;
910    // Put a dummy object in any unused space.  This maintains the
911    // invariant that the allocated area is filled with valid objects.
912    ptaskData->FillUnusedSpace();
913    //
914    if (threadRequest != 0)
915        initialThreadWait.Signal();
916}
917
918
919// Make a request to the root thread.
920void Processes::MakeRootRequest(TaskData *taskData, MainThreadRequest *request)
921{
922    if (singleThreaded)
923    {
924        mainThreadPhase = request->mtp;
925        ThreadReleaseMLMemoryWithSchedLock(taskData); // Primarily to call FillUnusedSpace
926        request->Perform();
927        ThreadUseMLMemoryWithSchedLock(taskData);
928        mainThreadPhase = MTP_USER_CODE;
929    }
930    else
931    {
932        PLocker locker(&schedLock);
933
934        // Wait for any other requests.
935        while (threadRequest != 0)
936        {
937            // Deal with any pending requests.
938            ThreadReleaseMLMemoryWithSchedLock(taskData);
939            ThreadUseMLMemoryWithSchedLock(taskData); // Drops schedLock while waiting.
940        }
941        // Now the other requests have been dealt with (and we have schedLock).
942        request->completed = false;
943        threadRequest = request;
944        // Wait for it to complete.
945        while (! request->completed)
946        {
947            ThreadReleaseMLMemoryWithSchedLock(taskData);
948            ThreadUseMLMemoryWithSchedLock(taskData); // Drops schedLock while waiting.
949        }
950    }
951}
952
953// Find space for an object.  Returns a pointer to the start.  "words" must include
954// the length word and the result points at where the length word will go.
955PolyWord *Processes::FindAllocationSpace(TaskData *taskData, POLYUNSIGNED words, bool alwaysInSeg)
956{
957    bool triedInterrupt = false;
958#ifdef POLYML32IN64
959    if (words & 1) words++; // Must always be an even number of words.
960#endif
961
962    while (1)
963    {
964        // After a GC allocPointer and allocLimit are zero and when allocating the
965        // heap segment we request a minimum of zero words.
966        if (taskData->allocPointer != 0 && taskData->allocPointer >= taskData->allocLimit + words)
967        {
968            // There's space in the current segment,
969            taskData->allocPointer -= words;
970#ifdef POLYML32IN64
971            // Zero the last word.  If we've rounded up an odd number the caller won't set it.
972            if (words != 0) taskData->allocPointer[words-1] = PolyWord::FromUnsigned(0);
973            ASSERT((uintptr_t)taskData->allocPointer & 4); // Must be odd-word aligned
974#endif
975            return taskData->allocPointer;
976        }
977        else // Insufficient space in this area.
978        {
979            if (words > taskData->allocSize && ! alwaysInSeg)
980            {
981                // If the object we want is larger than the heap segment size
982                // we allocate it separately rather than in the segment.
983                PolyWord *foundSpace = gMem.AllocHeapSpace(words);
984                if (foundSpace) return foundSpace;
985            }
986            else
987            {
988                // Fill in any unused space in the existing segment
989                taskData->FillUnusedSpace();
990                // Get another heap segment with enough space for this object.
991                uintptr_t requestSpace = taskData->allocSize+words;
992                uintptr_t spaceSize = requestSpace;
993                // Get the space and update spaceSize with the actual size.
994                PolyWord *space = gMem.AllocHeapSpace(words, spaceSize);
995                if (space)
996                {
997                    // Double the allocation size for the next time if
998                    // we succeeded in allocating the whole space.
999                    taskData->allocCount++;
1000                    if (spaceSize == requestSpace) taskData->allocSize = taskData->allocSize*2;
1001                    taskData->allocLimit = space;
1002                    taskData->allocPointer = space+spaceSize;
1003                    // Actually allocate the object
1004                    taskData->allocPointer -= words;
1005#ifdef POLYML32IN64
1006                    ASSERT((uintptr_t)taskData->allocPointer & 4); // Must be odd-word aligned
1007#endif
1008                    return taskData->allocPointer;
1009                }
1010            }
1011
1012            // It's possible that another thread has requested a GC in which case
1013            // we will have memory when that happens.  We don't want to start
1014            // another GC.
1015            if (! singleThreaded)
1016            {
1017                PLocker locker(&schedLock);
1018                if (threadRequest != 0)
1019                {
1020                    ThreadReleaseMLMemoryWithSchedLock(taskData);
1021                    ThreadUseMLMemoryWithSchedLock(taskData);
1022                    continue; // Try again
1023                }
1024            }
1025
1026            // Try garbage-collecting.  If this failed return 0.
1027            if (! QuickGC(taskData, words))
1028            {
1029                extern FILE *polyStderr;
1030                if (! triedInterrupt)
1031                {
1032                    triedInterrupt = true;
1033                    fprintf(polyStderr,"Run out of store - interrupting threads\n");
1034                    if (debugOptions & DEBUG_THREADS)
1035                        Log("THREAD: Run out of store, interrupting threads\n");
1036                    BroadcastInterrupt();
1037                    try {
1038                        if (ProcessAsynchRequests(taskData))
1039                            return 0; // Has been interrupted.
1040                    }
1041                    catch(KillException &)
1042                    {
1043                        // The thread may have been killed.
1044                        ThreadExit(taskData);
1045                    }
1046                    // Not interrupted: pause this thread to allow for other
1047                    // interrupted threads to free something.
1048#if defined(_WIN32)
1049                    Sleep(5000);
1050#else
1051                    sleep(5);
1052#endif
1053                    // Try again.
1054                }
1055                else {
1056                    // That didn't work.  Exit.
1057                    fprintf(polyStderr,"Failed to recover - exiting\n");
1058                    RequestProcessExit(1); // Begins the shutdown process
1059                    ThreadExit(taskData); // And terminate this thread.
1060                }
1061             }
1062            // Try again.  There should be space now.
1063        }
1064    }
1065}
1066
1067#ifdef _MSC_VER
1068// Don't tell me that exitThread has a non-void type.
1069#pragma warning(disable:4646)
1070#endif
1071
1072Handle exitThread(TaskData *taskData)
1073/* A call to this is put on the stack of a new thread so when the
1074   thread function returns the thread goes away. */
1075{
1076    processesModule.ThreadExit(taskData);
1077}
1078
1079// Terminate the current thread.  Never returns.
1080POLYUNSIGNED PolyThreadKillSelf(FirstArgument threadId)
1081{
1082    TaskData *taskData = TaskData::FindTaskForId(threadId);
1083    ASSERT(taskData != 0);
1084    taskData->PreRTSCall(); // Possibly not needed since we never return
1085    processesModule.ThreadExit(taskData);
1086    return 0;
1087}
1088
1089/* Called when a thread is about to block, usually because of IO.
1090   If this is interruptable (currently only used for Posix functions)
1091   the process will be set to raise an exception if any signal is handled.
1092   It may also raise an exception if another thread has called
1093   broadcastInterrupt. */
1094void Processes::ThreadPauseForIO(TaskData *taskData, Waiter *pWait)
1095{
1096    TestAnyEvents(taskData); // Consider this a blocking call that may raise Interrupt
1097    ThreadReleaseMLMemory(taskData);
1098    globalStats.incCount(PSC_THREADS_WAIT_IO);
1099    pWait->Wait(1000); // Wait up to a second
1100    globalStats.decCount(PSC_THREADS_WAIT_IO);
1101    ThreadUseMLMemory(taskData);
1102    TestAnyEvents(taskData); // Check if we've been interrupted.
1103}
1104
1105// Default waiter: simply wait for the time.  In Unix it may be woken
1106// up by a signal.
1107void Waiter::Wait(unsigned maxMillisecs)
1108{
1109    // Since this is used only when we can't monitor the source directly
1110    // we set this to 10ms so that we're not waiting too long.
1111    if (maxMillisecs > 10) maxMillisecs = 10;
1112#if (defined(_WIN32))
1113    Sleep(maxMillisecs);
1114#else
1115    // Unix
1116    fd_set read_fds, write_fds, except_fds;
1117    struct timeval toWait = { 0, 0 };
1118    toWait.tv_sec = maxMillisecs / 1000;
1119    toWait.tv_usec = (maxMillisecs % 1000) * 1000;
1120    FD_ZERO(&read_fds);
1121    FD_ZERO(&write_fds);
1122    FD_ZERO(&except_fds);
1123    select(FD_SETSIZE, &read_fds, &write_fds, &except_fds, &toWait);
1124#endif
1125}
1126
1127static Waiter defWait;
1128Waiter *Waiter::defaultWaiter = &defWait;
1129
1130#ifdef _WIN32
1131// Wait for the specified handle to be signalled.
1132void WaitHandle::Wait(unsigned maxMillisecs)
1133{
1134    // Wait until we get input or we're woken up.
1135    if (maxMillisecs > m_maxWait)
1136        maxMillisecs = m_maxWait;
1137    if (m_Handle == NULL)
1138        Sleep(maxMillisecs);
1139    else WaitForSingleObject(m_Handle, maxMillisecs);
1140}
1141
1142#else
1143
1144// Unix and Cygwin: Wait for a file descriptor on input.
1145void WaitInputFD::Wait(unsigned maxMillisecs)
1146{
1147    fd_set read_fds, write_fds, except_fds;
1148    struct timeval toWait = { 0, 0 };
1149    toWait.tv_sec = maxMillisecs / 1000;
1150    toWait.tv_usec = (maxMillisecs % 1000) * 1000;
1151    FD_ZERO(&read_fds);
1152    if (m_waitFD >= 0) FD_SET(m_waitFD, &read_fds);
1153    FD_ZERO(&write_fds);
1154    FD_ZERO(&except_fds);
1155    select(FD_SETSIZE, &read_fds, &write_fds, &except_fds, &toWait);
1156}
1157#endif
1158
1159// Get the task data for the current thread.  This is held in
1160// thread-local storage.  Normally this is passed in taskData but
1161// in a few cases this isn't available.
1162TaskData *Processes::GetTaskDataForThread(void)
1163{
1164#if (!defined(_WIN32))
1165    return (TaskData *)pthread_getspecific(tlsId);
1166#else
1167    return (TaskData *)TlsGetValue(tlsId);
1168#endif
1169}
1170
1171// Called to create a task data object in the current thread.
1172// This is currently only used if a thread created in foreign code calls
1173// a callback.
1174TaskData *Processes::CreateNewTaskData(Handle threadId, Handle threadFunction,
1175                           Handle args, PolyWord flags)
1176{
1177    TaskData *taskData = machineDependent->CreateTaskData();
1178#if defined(HAVE_WINDOWS_H)
1179    HANDLE thisProcess = GetCurrentProcess();
1180    DuplicateHandle(thisProcess, GetCurrentThread(), thisProcess,
1181        &(taskData->threadHandle), THREAD_ALL_ACCESS, FALSE, 0);
1182#endif
1183    unsigned thrdIndex;
1184
1185    {
1186        PLocker lock(&schedLock);
1187        // See if there's a spare entry in the array.
1188        for (thrdIndex = 0;
1189                thrdIndex < taskArray.size() && taskArray[thrdIndex] != 0;
1190                thrdIndex++);
1191
1192        if (thrdIndex == taskArray.size()) // Need to expand the array
1193        {
1194            try {
1195                taskArray.push_back(taskData);
1196            } catch (std::bad_alloc&) {
1197                delete(taskData);
1198                throw MemoryException();
1199            }
1200        }
1201        else
1202        {
1203            taskArray[thrdIndex] = taskData;
1204        }
1205    }
1206
1207    taskData->stack = gMem.NewStackSpace(machineDependent->InitialStackSize());
1208    if (taskData->stack == 0)
1209    {
1210        delete(taskData);
1211        throw MemoryException();
1212    }
1213
1214    // TODO:  Check that there isn't a problem if we try to allocate
1215    // memory here and result in a GC.
1216    taskData->InitStackFrame(taskData, threadFunction, args);
1217
1218    ThreadUseMLMemory(taskData);
1219
1220    // If the forking thread has created an ML thread object use that
1221    // otherwise create a new one in the current context.
1222    if (threadId != 0)
1223        taskData->threadObject = (ThreadObject*)threadId->WordP();
1224    else
1225    {
1226        // Make a thread reference to point to this taskData object.
1227        Handle threadRef = MakeVolatileWord(taskData, taskData);
1228        // Make a thread object.  Since it's in the thread table it can't be garbage collected.
1229        taskData->threadObject = (ThreadObject*)alloc(taskData, sizeof(ThreadObject)/sizeof(PolyWord), F_MUTABLE_BIT);
1230        taskData->threadObject->threadRef = threadRef->Word();
1231        taskData->threadObject->flags = flags != TAGGED(0) ? TAGGED(PFLAG_SYNCH): flags;
1232        taskData->threadObject->threadLocal = TAGGED(0); // Empty thread-local store
1233        taskData->threadObject->requestCopy = TAGGED(0); // Cleared interrupt state
1234        taskData->threadObject->mlStackSize = TAGGED(0); // Unlimited stack size
1235        for (unsigned i = 0; i < sizeof(taskData->threadObject->debuggerSlots)/sizeof(PolyWord); i++)
1236            taskData->threadObject->debuggerSlots[i] = TAGGED(0);
1237    }
1238
1239#if (!defined(_WIN32))
1240    initThreadSignals(taskData);
1241    pthread_setspecific(tlsId, taskData);
1242#else
1243    TlsSetValue(tlsId, taskData);
1244#endif
1245    globalStats.incCount(PSC_THREADS);
1246
1247    return taskData;
1248}
1249
1250// This function is run when a new thread has been forked.  The
1251// parameter is the taskData value for the new thread.  This function
1252// is also called directly for the main thread.
1253#if (!defined(_WIN32))
1254static void *NewThreadFunction(void *parameter)
1255{
1256    TaskData *taskData = (TaskData *)parameter;
1257#ifdef HAVE_WINDOWS_H
1258    // Cygwin: Get the Windows thread handle in case it's needed for profiling.
1259    HANDLE thisProcess = GetCurrentProcess();
1260    DuplicateHandle(thisProcess, GetCurrentThread(), thisProcess,
1261        &(taskData->threadHandle), THREAD_ALL_ACCESS, FALSE, 0);
1262#endif
1263    initThreadSignals(taskData);
1264    pthread_setspecific(processesModule.tlsId, taskData);
1265    taskData->saveVec.init(); // Remove initial data
1266    globalStats.incCount(PSC_THREADS);
1267    processes->ThreadUseMLMemory(taskData);
1268    try {
1269        taskData->EnterPolyCode(); // Will normally (always?) call ExitThread.
1270    }
1271    catch (KillException &) {
1272        processesModule.ThreadExit(taskData);
1273    }
1274
1275    return 0;
1276}
1277#else
1278static DWORD WINAPI NewThreadFunction(void *parameter)
1279{
1280    TaskData *taskData = (TaskData *)parameter;
1281    TlsSetValue(processesModule.tlsId, taskData);
1282    taskData->saveVec.init(); // Removal initial data
1283    globalStats.incCount(PSC_THREADS);
1284    processes->ThreadUseMLMemory(taskData);
1285    try {
1286        taskData->EnterPolyCode();
1287    }
1288    catch (KillException &) {
1289        processesModule.ThreadExit(taskData);
1290    }
1291    return 0;
1292}
1293#endif
1294
1295// Sets up the initial thread from the root function.  This is run on
1296// the initial thread of the process so it will work if we don't
1297// have pthreads.
1298// When multithreading this thread also deals with all garbage-collection
1299// and similar operations and the ML threads send it requests to deal with
1300// that.  These require all the threads to pause until the operation is complete
1301// since they affect all memory but they are also sometimes highly recursive.
1302// On Mac OS X and on Linux if the stack limit is set to unlimited only the
1303// initial thread has a large stack and newly created threads have smaller
1304// stacks.  We need to make sure that any significant stack usage occurs only
1305// on the inital thread.
1306void Processes::BeginRootThread(PolyObject *rootFunction)
1307{
1308    int exitLoopCount = 100; // Maximum 100 * 400 ms.
1309    if (taskArray.size() < 1) {
1310        try {
1311            taskArray.push_back(0);
1312        } catch (std::bad_alloc&) {
1313            ::Exit("Unable to create the initial thread - insufficient memory");
1314        }
1315    }
1316
1317    try {
1318        // We can't use ForkThread because we don't have a taskData object before we start
1319        TaskData *taskData = machineDependent->CreateTaskData();
1320        Handle threadRef = MakeVolatileWord(taskData, taskData);
1321        taskData->threadObject = (ThreadObject*)alloc(taskData, sizeof(ThreadObject) / sizeof(PolyWord), F_MUTABLE_BIT);
1322        taskData->threadObject->threadRef = threadRef->Word();
1323        // The initial thread is set to accept broadcast interrupt requests
1324        // and handle them synchronously.  This is for backwards compatibility.
1325        taskData->threadObject->flags = TAGGED(PFLAG_BROADCAST|PFLAG_ASYNCH); // Flags
1326        taskData->threadObject->threadLocal = TAGGED(0); // Empty thread-local store
1327        taskData->threadObject->requestCopy = TAGGED(0); // Cleared interrupt state
1328        taskData->threadObject->mlStackSize = TAGGED(0); // Unlimited stack size
1329        for (unsigned i = 0; i < sizeof(taskData->threadObject->debuggerSlots)/sizeof(PolyWord); i++)
1330            taskData->threadObject->debuggerSlots[i] = TAGGED(0);
1331#if defined(HAVE_WINDOWS_H)
1332        taskData->threadHandle = mainThreadHandle;
1333#endif
1334        taskArray[0] = taskData;
1335
1336        taskData->stack = gMem.NewStackSpace(machineDependent->InitialStackSize());
1337        if (taskData->stack == 0)
1338            ::Exit("Unable to create the initial thread - insufficient memory");
1339
1340        taskData->InitStackFrame(taskData, taskData->saveVec.push(rootFunction), (Handle)0);
1341
1342        // Create a packet for the Interrupt exception once so that we don't have to
1343        // allocate when we need to raise it.
1344        // We can only do this once the taskData object has been created.
1345        if (interrupt_exn == 0)
1346            interrupt_exn = makeExceptionPacket(taskData, EXC_interrupt);
1347
1348        if (singleThreaded)
1349        {
1350            // If we don't have threading enter the code as if this were a new thread.
1351            // This will call finish so will never return.
1352            NewThreadFunction(taskData);
1353        }
1354
1355        schedLock.Lock();
1356        int errorCode = 0;
1357#if (!defined(_WIN32))
1358        if (pthread_create(&taskData->threadId, NULL, NewThreadFunction, taskData) != 0)
1359            errorCode = errno;
1360#else
1361        taskData->threadHandle =
1362            CreateThread(NULL, 0, NewThreadFunction, taskData, 0, NULL);
1363        if (taskData->threadHandle == NULL) errorCode = GetLastError();
1364#endif
1365        if (errorCode != 0)
1366        {
1367            // Thread creation failed.
1368            taskArray[0] = 0;
1369            delete(taskData);
1370            ExitWithError("Unable to create initial thread:", errorCode);
1371        }
1372
1373        if (debugOptions & DEBUG_THREADS)
1374            Log("THREAD: Forked initial root thread %p\n", taskData);
1375    }
1376    catch (std::bad_alloc &) {
1377        ::Exit("Unable to create the initial thread - insufficient memory");
1378    }
1379
1380    // Wait until the threads terminate or make a request.
1381    // We only release schedLock while waiting.
1382    while (1)
1383    {
1384        // Look at the threads to see if they are running.
1385        bool allStopped = true;
1386        bool noUserThreads = true;
1387        bool signalThreadRunning = false;
1388        for (std::vector<TaskData*>::iterator i = taskArray.begin(); i != taskArray.end(); i++)
1389        {
1390            TaskData *p = *i;
1391            if (p)
1392            {
1393                if (p == sigTask) signalThreadRunning = true;
1394                else if (! p->threadExited) noUserThreads = false;
1395
1396                if (p->inMLHeap)
1397                {
1398                    allStopped = false;
1399                    // It must be running - interrupt it if we are waiting.
1400                    if (threadRequest != 0) p->InterruptCode();
1401                }
1402                else if (p->threadExited) // Has the thread terminated?
1403                {
1404                    // Wait for it to actually stop then delete the task data.
1405#if (!defined(_WIN32))
1406                    pthread_join(p->threadId, NULL);
1407#else
1408                    WaitForSingleObject(p->threadHandle, INFINITE);
1409#endif
1410                    // The thread ref is no longer valid.
1411                    *(TaskData**)(p->threadObject->threadRef.AsObjPtr()) = 0;
1412                    delete(p); // Delete the task Data
1413                    *i = 0;
1414                    globalStats.decCount(PSC_THREADS);
1415                }
1416            }
1417        }
1418        if (noUserThreads)
1419        {
1420            // If all threads apart from the signal thread have exited then
1421            // we can finish but we must make sure that the signal thread has
1422            // exited before we finally finish and deallocate the memory.
1423            if (signalThreadRunning) exitRequest = true;
1424            else break; // Really no threads.
1425        }
1426
1427        if (allStopped && threadRequest != 0)
1428        {
1429            mainThreadPhase = threadRequest->mtp;
1430            gcProgressBeginOtherGC(); // The default unless we're doing a GC.
1431            gMem.ProtectImmutable(false); // GC, sharing and export may all write to the immutable area
1432            threadRequest->Perform();
1433            gMem.ProtectImmutable(true);
1434            mainThreadPhase = MTP_USER_CODE;
1435            gcProgressReturnToML();
1436            threadRequest->completed = true;
1437            threadRequest = 0; // Allow a new request.
1438            mlThreadWait.Signal();
1439        }
1440
1441        // Have we had a request to stop?  This may have happened while in the GC.
1442        if (exitRequest)
1443        {
1444            // Set this to kill the threads.
1445            for (std::vector<TaskData*>::iterator i = taskArray.begin(); i != taskArray.end(); i++)
1446            {
1447                TaskData *taskData = *i;
1448                if (taskData && taskData->requests != kRequestKill)
1449                    MakeRequest(taskData, kRequestKill);
1450            }
1451            // Leave exitRequest set so that if we're in the process of
1452            // creating a new thread we will request it to stop when the
1453            // taskData object has been added to the table.
1454        }
1455
1456        // Now release schedLock and wait for a thread
1457        // to wake us up or for the timer to expire to update the statistics.
1458        if (! initialThreadWait.WaitFor(&schedLock, 400))
1459        {
1460            // We didn't receive a request in the last 400ms
1461            if (exitRequest)
1462            {
1463                if (--exitLoopCount < 0)
1464                {
1465                    // The loop count has expired and there is at least one thread that hasn't exited.
1466                    // Assume we've deadlocked.
1467#if defined(HAVE_WINDOWS_H)
1468                    ExitProcess(1);
1469#else
1470                    _exit(1); // Something is stuck.  Get out without calling destructors.
1471#endif
1472                }
1473            }
1474        }
1475        // Update the periodic stats.
1476        // Calculate the free memory.  We have to be careful here because although
1477        // we have the schedLock we don't have any lock that prevents a thread
1478        // from allocating a new segment.  Since these statistics are only
1479        // very rough it doesn't matter if there's a glitch.
1480        // One possibility would be see if the value of
1481        // gMem.GetFreeAllocSpace() has changed from what it was at the
1482        // start and recalculate if it has.
1483        // We also count the number of threads in ML code.  Taking the
1484        // lock in EnterPolyCode on every RTS call turned out to be
1485        // expensive.
1486        uintptr_t freeSpace = 0;
1487        unsigned threadsInML = 0;
1488        for (std::vector<TaskData*>::iterator j = taskArray.begin(); j != taskArray.end(); j++)
1489        {
1490            TaskData *taskData = *j;
1491            if (taskData)
1492            {
1493                // This gets the values last time it was in the RTS.
1494                PolyWord *limit = taskData->allocLimit, *ptr = taskData->allocPointer;
1495                if (limit < ptr && (uintptr_t)(ptr-limit) < taskData->allocSize)
1496                    freeSpace += ptr-limit;
1497                if (taskData->inML) threadsInML++;
1498            }
1499        }
1500        // Add the space in the allocation areas after calculating the sizes for the
1501        // threads in case a thread has allocated some more.
1502        freeSpace += gMem.GetFreeAllocSpace();
1503        globalStats.updatePeriodicStats(freeSpace, threadsInML);
1504
1505        // Process the profile queue if necessary.
1506        processProfileQueue();
1507    }
1508    schedLock.Unlock();
1509    finish(exitResult); // Close everything down and exit.
1510}
1511
1512// Create a new thread.  Returns the ML thread identifier object if it succeeds.
1513// May raise an exception.
1514Handle Processes::ForkThread(TaskData *taskData, Handle threadFunction,
1515                           Handle args, PolyWord flags, PolyWord stacksize)
1516{
1517    if (singleThreaded)
1518        raise_exception_string(taskData, EXC_thread, "Threads not available");
1519
1520    try {
1521        // Create a taskData object for the new thread
1522        TaskData *newTaskData = machineDependent->CreateTaskData();
1523        // We allocate the thread object in the PARENT's space
1524        Handle threadRef = MakeVolatileWord(taskData, newTaskData);
1525        Handle threadId = alloc_and_save(taskData, sizeof(ThreadObject) / sizeof(PolyWord), F_MUTABLE_BIT);
1526        newTaskData->threadObject = (ThreadObject*)DEREFHANDLE(threadId);
1527        newTaskData->threadObject->threadRef = threadRef->Word();
1528        newTaskData->threadObject->flags = flags; // Flags
1529        newTaskData->threadObject->threadLocal = TAGGED(0); // Empty thread-local store
1530        newTaskData->threadObject->requestCopy = TAGGED(0); // Cleared interrupt state
1531        newTaskData->threadObject->mlStackSize = stacksize;
1532        for (unsigned i = 0; i < sizeof(newTaskData->threadObject->debuggerSlots)/sizeof(PolyWord); i++)
1533            newTaskData->threadObject->debuggerSlots[i] = TAGGED(0);
1534
1535        unsigned thrdIndex;
1536        schedLock.Lock();
1537        // Before forking a new thread check to see whether we have been asked
1538        // to exit.  Processes::Exit sets the current set of threads to exit but won't
1539        // see a new thread.
1540        if (taskData->requests == kRequestKill)
1541        {
1542            schedLock.Unlock();
1543            // Raise an exception although the thread may exit before we get there.
1544            raise_exception_string(taskData, EXC_thread, "Thread is exiting");
1545        }
1546
1547        // See if there's a spare entry in the array.
1548        for (thrdIndex = 0;
1549             thrdIndex < taskArray.size() && taskArray[thrdIndex] != 0;
1550             thrdIndex++);
1551
1552        if (thrdIndex == taskArray.size()) // Need to expand the array
1553        {
1554            try {
1555                taskArray.push_back(newTaskData);
1556            } catch (std::bad_alloc&) {
1557                delete(newTaskData);
1558                schedLock.Unlock();
1559                raise_exception_string(taskData, EXC_thread, "Too many threads");
1560            }
1561        }
1562        else
1563        {
1564            taskArray[thrdIndex] = newTaskData;
1565        }
1566        schedLock.Unlock();
1567
1568        newTaskData->stack = gMem.NewStackSpace(machineDependent->InitialStackSize());
1569        if (newTaskData->stack == 0)
1570        {
1571            delete(newTaskData);
1572            raise_exception_string(taskData, EXC_thread, "Unable to allocate thread stack");
1573        }
1574
1575        // Allocate anything needed for the new stack in the parent's heap.
1576        // The child still has inMLHeap set so mustn't GC.
1577        newTaskData->InitStackFrame(taskData, threadFunction, args);
1578
1579        // Now actually fork the thread.
1580        bool success = false;
1581        schedLock.Lock();
1582#if (!defined(_WIN32))
1583        success = pthread_create(&newTaskData->threadId, NULL, NewThreadFunction, newTaskData) == 0;
1584#else
1585        newTaskData->threadHandle =
1586            CreateThread(NULL, 0, NewThreadFunction, newTaskData, 0, NULL);
1587        success = newTaskData->threadHandle != NULL;
1588#endif
1589        if (success)
1590        {
1591            schedLock.Unlock();
1592
1593            if (debugOptions & DEBUG_THREADS)
1594                Log("THREAD: Forking new thread %p from thread %p\n", newTaskData, taskData);
1595
1596            return threadId;
1597        }
1598        // Thread creation failed.
1599        taskArray[thrdIndex] = 0;
1600        delete(newTaskData);
1601        schedLock.Unlock();
1602
1603        if (debugOptions & DEBUG_THREADS)
1604            Log("THREAD: Fork from thread %p failed\n", taskData);
1605
1606        raise_exception_string(taskData, EXC_thread, "Thread creation failed");
1607    }
1608    catch (std::bad_alloc &) {
1609            raise_exception_string(taskData, EXC_thread, "Insufficient memory");
1610    }
1611}
1612
1613// ForkFromRTS.  Creates a new thread from within the RTS.  This is currently used
1614// only to run a signal function.
1615bool Processes::ForkFromRTS(TaskData *taskData, Handle proc, Handle arg)
1616{
1617    try {
1618        (void)ForkThread(taskData, proc, arg, TAGGED(PFLAG_SYNCH), TAGGED(0));
1619        return true;
1620    } catch (IOException &)
1621    {
1622        // If it failed
1623        return false;
1624    }
1625}
1626
1627POLYUNSIGNED PolyThreadForkThread(FirstArgument threadId, PolyWord function, PolyWord attrs, PolyWord stack)
1628{
1629    TaskData *taskData = TaskData::FindTaskForId(threadId);
1630    ASSERT(taskData != 0);
1631    taskData->PreRTSCall();
1632    Handle reset = taskData->saveVec.mark();
1633    Handle pushedFunction = taskData->saveVec.push(function);
1634    Handle result = 0;
1635
1636    try {
1637        result = processesModule.ForkThread(taskData, pushedFunction, (Handle)0, attrs, stack);
1638    }
1639    catch (KillException &) {
1640        processes->ThreadExit(taskData); // TestSynchronousRequests may test for kill
1641    }
1642    catch (...) { } // If an ML exception is raised
1643
1644    taskData->saveVec.reset(reset);
1645    taskData->PostRTSCall();
1646    if (result == 0) return TAGGED(0).AsUnsigned();
1647    else return result->Word().AsUnsigned();
1648}
1649
1650// Deal with any interrupt or kill requests.
1651bool Processes::ProcessAsynchRequests(TaskData *taskData)
1652{
1653    bool wasInterrupted = false;
1654    TaskData *ptaskData = taskData;
1655
1656    schedLock.Lock();
1657
1658    switch (ptaskData->requests)
1659    {
1660    case kRequestNone:
1661        schedLock.Unlock();
1662        break;
1663
1664    case kRequestInterrupt:
1665        {
1666            // Handle asynchronous interrupts only.
1667            // We've been interrupted.
1668            POLYUNSIGNED attrs = ThreadAttrs(ptaskData);
1669            POLYUNSIGNED intBits = attrs & PFLAG_INTMASK;
1670            if (intBits == PFLAG_ASYNCH || intBits == PFLAG_ASYNCH_ONCE)
1671            {
1672                if (intBits == PFLAG_ASYNCH_ONCE)
1673                {
1674                    // Set this so from now on it's synchronous.
1675                    // This word is only ever set by the thread itself so
1676                    // we don't need to synchronise.
1677                    attrs = (attrs & (~PFLAG_INTMASK)) | PFLAG_SYNCH;
1678                    ptaskData->threadObject->flags = TAGGED(attrs);
1679                }
1680                ptaskData->requests = kRequestNone; // Clear this
1681                ptaskData->threadObject->requestCopy = TAGGED(0); // And in the ML copy
1682                schedLock.Unlock();
1683                // Don't actually throw the exception here.
1684                taskData->SetException(interrupt_exn);
1685                wasInterrupted = true;
1686            }
1687            else schedLock.Unlock();
1688        }
1689        break;
1690
1691    case kRequestKill: // The thread has been asked to stop.
1692        schedLock.Unlock();
1693        throw KillException();
1694        // Doesn't return.
1695    }
1696
1697#ifndef HAVE_WINDOWS_H
1698    // Start the profile timer if needed.
1699    if (profileMode == kProfileTime)
1700    {
1701        if (! ptaskData->runningProfileTimer)
1702        {
1703            ptaskData->runningProfileTimer = true;
1704            StartProfilingTimer();
1705        }
1706    }
1707    else ptaskData->runningProfileTimer = false;
1708    // The timer will be stopped next time it goes off.
1709#endif
1710    return wasInterrupted;
1711}
1712
1713// If this thread is processing interrupts synchronously and has been
1714// interrupted clear the interrupt and raise the exception.  This is
1715// called from IO routines which may block.
1716void Processes::TestSynchronousRequests(TaskData *taskData)
1717{
1718    TaskData *ptaskData = taskData;
1719    schedLock.Lock();
1720    switch (ptaskData->requests)
1721    {
1722    case kRequestNone:
1723        schedLock.Unlock();
1724        break;
1725
1726    case kRequestInterrupt:
1727        {
1728            // Handle synchronous interrupts only.
1729            // We've been interrupted.
1730            POLYUNSIGNED attrs = ThreadAttrs(ptaskData);
1731            POLYUNSIGNED intBits = attrs & PFLAG_INTMASK;
1732            if (intBits == PFLAG_SYNCH)
1733            {
1734                ptaskData->requests = kRequestNone; // Clear this
1735                ptaskData->threadObject->requestCopy = TAGGED(0);
1736                schedLock.Unlock();
1737                taskData->SetException(interrupt_exn);
1738                throw IOException();
1739            }
1740            else schedLock.Unlock();
1741        }
1742        break;
1743
1744    case kRequestKill: // The thread has been asked to stop.
1745        schedLock.Unlock();
1746        throw KillException();
1747        // Doesn't return.
1748    }
1749}
1750
1751// Check for asynchronous or synchronous events
1752void Processes::TestAnyEvents(TaskData *taskData)
1753{
1754    TestSynchronousRequests(taskData);
1755    if (ProcessAsynchRequests(taskData))
1756        throw IOException();
1757}
1758
1759// Request that the process should exit.
1760// This will usually be called from an ML thread as a result of
1761// a call to OS.Process.exit but on Windows it can be called from the GUI thread.
1762void Processes::RequestProcessExit(int n)
1763{
1764    if (singleThreaded)
1765        finish(n);
1766
1767    exitResult = n;
1768    exitRequest = true;
1769    PLocker lock(&schedLock); // Lock so we know the main thread is waiting
1770    initialThreadWait.Signal(); // Wake it if it's sleeping.
1771}
1772
1773#if !defined(HAVE_WINDOWS_H)
1774// N.B. This may be called either by an ML thread or by the main thread.
1775// On the main thread taskData will be null.
1776static void catchVTALRM(SIG_HANDLER_ARGS(sig, context))
1777{
1778    ASSERT(sig == SIGVTALRM);
1779    if (profileMode != kProfileTime)
1780    {
1781        // We stop the timer for this thread on the next signal after we end profile
1782        static struct itimerval stoptime = {{0, 0}, {0, 0}};
1783        /* Stop the timer */
1784        setitimer(ITIMER_VIRTUAL, & stoptime, NULL);
1785    }
1786    else {
1787        TaskData *taskData = processes->GetTaskDataForThread();
1788        handleProfileTrap(taskData, (SIGNALCONTEXT*)context);
1789    }
1790}
1791
1792#else /* Windows including Cygwin */
1793// This runs as a separate thread.  Every millisecond it checks the CPU time used
1794// by each ML thread and increments the count for each thread that has used a
1795// millisecond of CPU time.
1796
1797static bool testCPUtime(HANDLE hThread, LONGLONG &lastCPUTime)
1798{
1799    FILETIME cTime, eTime, kTime, uTime;
1800    // Try to get the thread CPU time if possible.  This isn't supported
1801    // in Windows 95/98 so if it fails we just include this thread anyway.
1802    if (GetThreadTimes(hThread, &cTime, &eTime, &kTime, &uTime))
1803    {
1804        LONGLONG totalTime = 0;
1805        LARGE_INTEGER li;
1806        li.LowPart = kTime.dwLowDateTime;
1807        li.HighPart = kTime.dwHighDateTime;
1808        totalTime += li.QuadPart;
1809        li.LowPart = uTime.dwLowDateTime;
1810        li.HighPart = uTime.dwHighDateTime;
1811        totalTime += li.QuadPart;
1812        if (totalTime - lastCPUTime >= 10000)
1813        {
1814            lastCPUTime = totalTime;
1815            return true;
1816        }
1817        return false;
1818    }
1819    else return true; // Failed to get thread time, maybe Win95.
1820 }
1821
1822
1823void Processes::ProfileInterrupt(void)
1824{
1825    // Wait for millisecond or until the stop event is signalled.
1826    while (WaitForSingleObject(hStopEvent, 1) == WAIT_TIMEOUT)
1827    {
1828        // We need to hold schedLock to examine the taskArray but
1829        // that is held during garbage collection.
1830        if (schedLock.Trylock())
1831        {
1832            for (std::vector<TaskData*>::iterator i = taskArray.begin(); i != taskArray.end(); i++)
1833            {
1834                TaskData *p = *i;
1835                if (p && p->threadHandle)
1836                {
1837                    if (testCPUtime(p->threadHandle, p->lastCPUTime))
1838                    {
1839                        CONTEXT context;
1840                        SuspendThread(p->threadHandle);
1841                        context.ContextFlags = CONTEXT_CONTROL; /* Get Eip and Esp */
1842                        if (GetThreadContext(p->threadHandle, &context))
1843                        {
1844                            handleProfileTrap(p, &context);
1845                        }
1846                        ResumeThread(p->threadHandle);
1847                    }
1848                }
1849            }
1850            schedLock.Unlock();
1851        }
1852        // Check the CPU time used by the main thread.  This is used for GC
1853        // so we need to check that as well.
1854        if (testCPUtime(mainThreadHandle, lastCPUTime))
1855            handleProfileTrap(NULL, NULL);
1856    }
1857}
1858
1859DWORD WINAPI ProfilingTimer(LPVOID parm)
1860{
1861    processesModule.ProfileInterrupt();
1862    return 0;
1863}
1864
1865#endif
1866
1867// Profiling control.  Called by the root thread.
1868void Processes::StartProfiling(void)
1869{
1870#ifdef HAVE_WINDOWS_H
1871    DWORD threadId;
1872    extern FILE *polyStdout;
1873    if (profilingHd)
1874        return;
1875    ResetEvent(hStopEvent);
1876    profilingHd = CreateThread(NULL, 0, ProfilingTimer, NULL, 0, &threadId);
1877    if (profilingHd == NULL)
1878    {
1879        fputs("Creating ProfilingTimer thread failed.\n", polyStdout);
1880        return;
1881    }
1882    /* Give this a higher than normal priority so it pre-empts the main
1883       thread.  Without this it will tend only to be run when the main
1884       thread blocks for some reason. */
1885    SetThreadPriority(profilingHd, THREAD_PRIORITY_ABOVE_NORMAL);
1886#else
1887    // In Linux, at least, we need to run a timer in each thread.
1888    // We request each to enter the RTS so that it will start the timer.
1889    // Since this is being run by the main thread while all the ML threads
1890    // are paused this may not actually be necessary.
1891    for (std::vector<TaskData*>::iterator i = taskArray.begin(); i != taskArray.end(); i++)
1892    {
1893        TaskData *taskData = *i;
1894        if (taskData)
1895        {
1896            taskData->InterruptCode();
1897        }
1898    }
1899    StartProfilingTimer(); // Start the timer in the root thread.
1900#endif
1901}
1902
1903void Processes::StopProfiling(void)
1904{
1905#ifdef HAVE_WINDOWS_H
1906    if (hStopEvent) SetEvent(hStopEvent);
1907    // Wait for the thread to stop
1908    if (profilingHd)
1909    {
1910        WaitForSingleObject(profilingHd, 10000);
1911        CloseHandle(profilingHd);
1912    }
1913    profilingHd = NULL;
1914#endif
1915}
1916
1917// Called by the ML signal handling thread.  It blocks until a signal
1918// arrives.  There should only be a single thread waiting here.
1919bool Processes::WaitForSignal(TaskData *taskData, PLock *sigLock)
1920{
1921    TaskData *ptaskData = taskData;
1922    // We need to hold the signal lock until we have acquired schedLock.
1923    PLocker lock(&schedLock);
1924    sigLock->Unlock();
1925    if (sigTask != 0)
1926    {
1927        return false;
1928    }
1929    sigTask = ptaskData;
1930
1931    if (ptaskData->requests == kRequestNone)
1932    {
1933        // Now release the ML memory.  A GC can start.
1934        ThreadReleaseMLMemoryWithSchedLock(ptaskData);
1935        globalStats.incCount(PSC_THREADS_WAIT_SIGNAL);
1936        ptaskData->threadLock.Wait(&schedLock);
1937        globalStats.decCount(PSC_THREADS_WAIT_SIGNAL);
1938        // We want to use the memory again.
1939        ThreadUseMLMemoryWithSchedLock(ptaskData);
1940    }
1941
1942    sigTask = 0;
1943    return true;
1944}
1945
1946// Called by the signal detection thread to wake up the signal handler
1947// thread.  Must be called AFTER releasing sigLock.
1948void Processes::SignalArrived(void)
1949{
1950    PLocker locker(&schedLock);
1951    if (sigTask)
1952        sigTask->threadLock.Signal();
1953}
1954
1955#if (!defined(_WIN32))
1956// This is called when the thread exits in foreign code and
1957// ThreadExit has not been called.
1958static void threaddata_destructor(void *p)
1959{
1960    TaskData *pt = (TaskData *)p;
1961    pt->threadExited = true;
1962    // This doesn't actually wake the main thread and relies on the
1963    // regular check to release the task data.
1964}
1965#endif
1966
1967void Processes::Init(void)
1968{
1969#if (!defined(_WIN32))
1970    pthread_key_create(&tlsId, threaddata_destructor);
1971#else
1972    tlsId = TlsAlloc();
1973#endif
1974
1975#if defined(HAVE_WINDOWS_H) /* Windows including Cygwin. */
1976    // Create stop event for time profiling.
1977    hStopEvent = CreateEvent(NULL, TRUE, FALSE, NULL);
1978    // Get the thread handle for this thread.
1979    HANDLE thisProcess = GetCurrentProcess();
1980    DuplicateHandle(thisProcess, GetCurrentThread(), thisProcess,
1981        &mainThreadHandle, THREAD_ALL_ACCESS, FALSE, 0);
1982#else
1983    // Set up a signal handler.  This will be the same for all threads.
1984    markSignalInuse(SIGVTALRM);
1985    setSignalHandler(SIGVTALRM, catchVTALRM);
1986#endif
1987}
1988
1989#ifndef HAVE_WINDOWS_H
1990// On Linux, at least, each thread needs to run this.
1991void Processes::StartProfilingTimer(void)
1992{
1993    // set virtual timer to go off every millisecond
1994    struct itimerval starttime;
1995    starttime.it_interval.tv_sec = starttime.it_value.tv_sec = 0;
1996    starttime.it_interval.tv_usec = starttime.it_value.tv_usec = 1000;
1997    setitimer(ITIMER_VIRTUAL,&starttime,NULL);
1998}
1999#endif
2000
2001void Processes::Stop(void)
2002{
2003#if (!defined(_WIN32))
2004    pthread_key_delete(tlsId);
2005#else
2006    TlsFree(tlsId);
2007#endif
2008
2009#if defined(HAVE_WINDOWS_H)
2010    /* Stop the timer and profiling threads. */
2011    if (hStopEvent) SetEvent(hStopEvent);
2012    if (profilingHd)
2013    {
2014        WaitForSingleObject(profilingHd, 10000);
2015        CloseHandle(profilingHd);
2016        profilingHd = NULL;
2017    }
2018    if (hStopEvent) CloseHandle(hStopEvent);
2019    hStopEvent = NULL;
2020    if (mainThreadHandle) CloseHandle(mainThreadHandle);
2021    mainThreadHandle = NULL;
2022#else
2023    profileMode = kProfileOff;
2024    // Make sure the timer is not running
2025    struct itimerval stoptime;
2026    memset(&stoptime, 0, sizeof(stoptime));
2027    setitimer(ITIMER_VIRTUAL, &stoptime, NULL);
2028#endif
2029}
2030
2031void Processes::GarbageCollect(ScanAddress *process)
2032/* Ensures that all the objects are retained and their addresses updated. */
2033{
2034    /* The interrupt exn */
2035    if (interrupt_exn != 0) {
2036        PolyObject *p = interrupt_exn;
2037        process->ScanRuntimeAddress(&p, ScanAddress::STRENGTH_STRONG);
2038        interrupt_exn = (PolyException*)p;
2039    }
2040    for (std::vector<TaskData*>::iterator i = taskArray.begin(); i != taskArray.end(); i++)
2041    {
2042        if (*i)
2043            (*i)->GarbageCollect(process);
2044    }
2045}
2046
2047void TaskData::GarbageCollect(ScanAddress *process)
2048{
2049    saveVec.gcScan(process);
2050
2051    if (threadObject != 0)
2052    {
2053        PolyObject *p = threadObject;
2054        process->ScanRuntimeAddress(&p, ScanAddress::STRENGTH_STRONG);
2055        threadObject = (ThreadObject*)p;
2056    }
2057    if (blockMutex != 0)
2058        process->ScanRuntimeAddress(&blockMutex, ScanAddress::STRENGTH_STRONG);
2059    // The allocation spaces are no longer valid.
2060    allocPointer = 0;
2061    allocLimit = 0;
2062    // Divide the allocation size by four. If we have made a single allocation
2063    // since the last GC the size will have been doubled after the allocation.
2064    // On average for each thread, apart from the one that ran out of space
2065    // and requested the GC, half of the space will be unused so reducing by
2066    // four should give a good estimate for next time.
2067    if (allocCount != 0)
2068    { // Do this only once for each GC.
2069        allocCount = 0;
2070        allocSize = allocSize/4;
2071        if (allocSize < MIN_HEAP_SIZE)
2072            allocSize = MIN_HEAP_SIZE;
2073    }
2074}
2075
2076// Return the number of processors.
2077extern unsigned NumberOfProcessors(void)
2078{
2079#if (defined(_WIN32))
2080        SYSTEM_INFO info;
2081        memset(&info, 0, sizeof(info));
2082        GetSystemInfo(&info);
2083        if (info.dwNumberOfProcessors == 0) // Just in case
2084            info.dwNumberOfProcessors = 1;
2085        return info.dwNumberOfProcessors;
2086#elif(defined(_SC_NPROCESSORS_ONLN))
2087        long res = sysconf(_SC_NPROCESSORS_ONLN);
2088        if (res <= 0) res = 1;
2089        return res;
2090#elif(defined(HAVE_SYSCTL) && defined(CTL_HW) && defined(HW_NCPU))
2091        static int mib[2] = { CTL_HW, HW_NCPU };
2092        int nCPU = 1;
2093        size_t len = sizeof(nCPU);
2094        if (sysctl(mib, 2, &nCPU, &len, NULL, 0) == 0 && len == sizeof(nCPU))
2095             return nCPU;
2096        else return 1;
2097#else
2098        // Can't determine.
2099        return 1;
2100#endif
2101}
2102
2103// Return the number of physical processors.  If hyperthreading is
2104// enabled this returns less than NumberOfProcessors.  Returns zero if
2105// it cannot be determined.
2106// This can be used in Cygwin as well as native Windows.
2107#if (defined(HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION))
2108typedef BOOL (WINAPI *GETP)(SYSTEM_LOGICAL_PROCESSOR_INFORMATION*, PDWORD);
2109
2110// Windows - use GetLogicalProcessorInformation if it's available.
2111static unsigned WinNumPhysicalProcessors(void)
2112{
2113    GETP getProcInfo = (GETP) GetProcAddress(GetModuleHandle(_T("kernel32")), "GetLogicalProcessorInformation");
2114    if (getProcInfo == 0) return 0;
2115
2116    // It's there - use it.
2117    SYSTEM_LOGICAL_PROCESSOR_INFORMATION *buff = 0;
2118    DWORD space = 0;
2119    while (getProcInfo(buff, &space) == FALSE)
2120    {
2121        if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
2122        {
2123            free(buff);
2124            return 0;
2125        }
2126        free(buff);
2127        buff = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION)malloc(space);
2128        if (buff == 0) return 0;
2129    }
2130    // Calculate the number of full entries in case it's truncated.
2131    unsigned nItems = space / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
2132    unsigned numProcs = 0;
2133    for (unsigned i = 0; i < nItems; i++)
2134    {
2135        if (buff[i].Relationship == RelationProcessorCore)
2136            numProcs++;
2137    }
2138    free(buff);
2139    return numProcs;
2140}
2141#endif
2142
2143// Read and parse /proc/cpuinfo
2144static unsigned LinuxNumPhysicalProcessors(void)
2145{
2146    // Find out the total.  This should be the maximum.
2147    unsigned nProcs = NumberOfProcessors();
2148    // If there's only one we don't need to check further.
2149    if (nProcs <= 1) return nProcs;
2150    long *cpus = (long*)calloc(nProcs, sizeof(long));
2151    if (cpus == 0) return 0;
2152
2153    FILE *cpuInfo = fopen("/proc/cpuinfo", "r");
2154    if (cpuInfo == NULL) { free(cpus); return 0; }
2155
2156    char line[40];
2157    unsigned count = 0;
2158    while (fgets(line, sizeof(line), cpuInfo) != NULL)
2159    {
2160        if (strncmp(line, "core id\t\t:", 10) == 0)
2161        {
2162            long n = strtol(line+10, NULL, 10);
2163            unsigned i = 0;
2164            // Skip this id if we've seen it already
2165            while (i < count && cpus[i] != n) i++;
2166            if (i == count) cpus[count++] = n;
2167        }
2168        if (strchr(line, '\n') == 0)
2169        {
2170            int ch;
2171            do { ch = getc(cpuInfo); } while (ch != '\n' && ch != EOF);
2172        }
2173    }
2174
2175    fclose(cpuInfo);
2176    free(cpus);
2177    return count;
2178}
2179
2180extern unsigned NumberOfPhysicalProcessors(void)
2181{
2182    unsigned numProcs = 0;
2183#if (defined(HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION))
2184    numProcs = WinNumPhysicalProcessors();
2185    if (numProcs != 0) return numProcs;
2186#endif
2187#if (defined(HAVE_SYSCTLBYNAME) && defined(HAVE_SYS_SYSCTL_H))
2188    // Mac OS X
2189    int nCores;
2190    size_t len = sizeof(nCores);
2191    if (sysctlbyname("hw.physicalcpu", &nCores, &len, NULL, 0) == 0)
2192        return (unsigned)nCores;
2193#endif
2194    numProcs = LinuxNumPhysicalProcessors();
2195    if (numProcs != 0) return numProcs;
2196    // Any other cases?
2197    return numProcs;
2198}
2199