1%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
2% Copyright (c) 2009, 2011, ETH Zurich.
3% All rights reserved.
4%
5% This file is distributed under the terms in the attached LICENSE file.
6% If you do not find this file, copies can be found by writing to:
7% ETH Zurich D-INFK, Universitaetstrasse 6, CH-8092 Zurich. Attn: Systems Group.
8%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
9
10:-lib(ic).
11:-lib(ic_global).
12:-use_module(library(ic_edge_finder)).
13:-lib(branch_and_bound).
14
15% :-include("../data/globalthread_testdata.txt").
16
17:- set_flag(print_depth, 200).
18
19% :-dynamic(currentbar/5).
20
21:-dynamic(task_currently_allocated/3).
22:-dynamic(task_status/5).
23:-dynamic(task_statistics/6).
24:-dynamic(task_max_parallel/3).
25
26
27
28%CLP: Aus Daten und Information Wissen machen -> Siehe Buch Qin
29
30
31%Regla
32% 1. #Threads <= Nr (z.Bsp. minimale Blockgroesse pro Thread)
33% 2. #Threads >= Nr
34% 3. Wenn Blockgroesse < Cachelinegroesse => Threads auf HT gruppieren
35% 4. Wenn Threads auf Puffer operieren ((de-)codieren, maplist,...)
36%    dann HTs benutzen
37% 5. Wenn Threads intensive Berechnungen von wenigen Variablen durchfuehren,
38%    dann echte Cores benutzen (ev. zweiter HT als Mem-Prefetcher benutzen)
39% 6. Global: Sporadische Threads gruppieren, Threads die lange im System
40%    bleiben und lange auf gleichem Puffer arbeiten (Netzwerk, Server)
41%    als eigene Gruppe auf #Cores gruppieren
42% 7. Services bauen: OpenSSL-Service, der AES-Ver- und -Entschluesselung
43%    anbietet und dadurch lange im System bleibt und weiss, wo wann welche
44%    Threads zum Einsatz kommen
45% 8. Allen Services jeweils sagen, dass sie jetzt so und so viele Threads
46%    auf diesen und diesen Cores benutzen koennen
47% 9. User-Apps, die schnell kommen und gehen auf Cores gruppieren
48
49%10. Applikation soll definieren, welche Threads miteinander kommunizieren
50%    oder synchronisieren. Algo hier soll dann diese Threads moeglichst
51%    nahe voneinander plazieren.
52
53%11. Applikation soll definieren, dass sie nur auf Core, der bla unterstuetzt,
54%    laufen kann (beispielsweise einer, der FPU hat). Algo soll entsprechend
55%    nur diese Cores auswaehlen
56
57%12. Algo soll FPU threads moeglichst auf verschiedene Cores setzen, damit das
58%    Betriebssystem keinen FPU-Kontext-Switch machen muss. Dieser wird
59%    nur on-demand by FPU-Exception (interrupt) gemacht, falls neuer Thread
60%    nach Switch das erstemal FPU-Instruktion ausfuehrt. Wenn man nur einen
61%    Thread pro Core mit FPU-Instruktionen laufen laesst, muss nie FPU-Exception
62%    behandelt werden.
63
64
65% HW data
66% cpu_affinity(APIC, LocalSApicEid, ProximityDomain).
67
68%cpu_affinity(0, 0, 0).
69%cpu_affinity(1, 0, 0).
70%cpu_affinity(2, 0, 0).
71%cpu_affinity(3, 0, 0).
72%cpu_affinity(4, 0, 0).
73%cpu_affinity(5, 0, 0).
74
75%cpu_affinity(6, 0, 1).
76%cpu_affinity(7, 0, 1).
77%cpu_affinity(8, 0, 1).
78%cpu_affinity(9, 0, 1).
79%cpu_affinity(10, 0, 1).
80%cpu_affinity(11, 0, 1).
81
82%cpu_affinity(12, 0, 2).
83%cpu_affinity(13, 0, 2).
84%cpu_affinity(14, 0, 2).
85%cpu_affinity(15, 0, 2).
86%cpu_affinity(16, 0, 2).
87%cpu_affinity(17, 0, 2).
88
89%cpu_affinity(18, 0, 3).
90%cpu_affinity(19, 0, 3).
91%cpu_affinity(20, 0, 3).
92%cpu_affinity(21, 0, 3).
93%cpu_affinity(22, 0, 3).
94%cpu_affinity(23, 0, 3).
95
96affinity_domain_list(Domains) :-
97    findall(D, cpu_affinity(_,_,D),DomainList),
98    affinity_domain_list(DomainList, Domains).
99affinity_domain_list([H|DomainList], [H|Domains]) :-
100    subtract(DomainList, [H], DomainListNew),
101    affinity_domain_list(DomainListNew, Domains).
102affinity_domain_list([], []).
103
104get_numa_domain_for_core(CoreNumber, NUMADomain, RL, RH, MaxSize) :-
105    ( is_predicate(cpu_affinity/3),cpu_affinity(CoreNumber, _, NUMADomain) ->
106        findall(L, memory_affinity(L, _, NUMADomain), AfL),
107        findall(H, (memory_affinity(L1, S, NUMADomain), H is L1 + S), AfH),
108        findall(Sz, memory_affinity(_, Sz, NUMADomain), Sizes),
109        eclipse_language:min(AfL, RL),
110        eclipse_language:max(AfH, RH),
111        sum(Sizes, MaxSize)
112        ;
113        NUMADomain = 0,
114        RL = 0,
115        RH = 4294967295,
116        MaxSize = 4294967295
117    ).
118
119% task(functionaddress, clientsocket, minblocksize(sz)).
120%task(t1, 1, minblocksize(1)).
121%task(t2, 2, minblocksize(1)).
122%task(t3, 3, minblocksize(1)).
123%task(t4, 4, minblocksize(1)).
124
125% task_max_parallel(t1, Socket, 4).
126%task_max_parallel(t1, 1, 4).
127%task_max_parallel(t2, 2, 2).
128%task_max_parallel(t3, 3, 3).
129%task_max_parallel(t4, 4, 1).
130
131
132% data entries to define whether the task is compute or memory bound
133%task_numa(functionaddress, clientid, memory).
134% -> spread threads on different NUMA domains
135
136%task_numa(functionaddress, clientid, compute).
137% -> fill one NUMA domain (use all the cores on the same domain), before using
138%    cores on other domains
139
140
141
142
143task_get_max_parallel(Name, ClientID, MaxNr) :-
144    core_list_id(CoreIDList),
145    length(CoreIDList, NrRunningCores),
146    ( task_max_parallel(Name, ClientID, MaxNr) ->
147        true
148        ;
149        MaxNr is NrRunningCores
150    ).
151
152
153nr_running_tasks(Nr) :-
154    findall(_, (task(Name,ClientID,_),task_status(Name,ClientID,_,_,running)), TaskList),
155    length(TaskList, Nr).
156
157task_list(TaskList) :-
158    findall(taskname(Name,ClientID), ( task(Name,ClientID,_), task_running_status(Name, ClientID, running)), TaskNames),
159    length(TaskNames, Len),
160    ( foreach(N, TaskNames),
161      foreach(Task, TaskList),
162      for(I, 1, Len)
163      do
164        Task = task(I, N)
165    ).
166
167core_list_hw_id(CoreList):-
168    findall(HWID, corename(_,_,apic(HWID)), CoreList).
169
170% use higlevel name, that makes it easier
171core_list_id(CoreList):-
172    findall(corename(ID), corename(ID,_,_), CoreList).
173
174
175get_max_parallel(task(_, N), N).
176
177sum_max_parallel_threads(Sum) :-
178    findall(S,(task(N,ClientID,_),task_get_max_parallel(N,ClientID,S), task_running_status(N, ClientID,running)),L),
179    sum(L, Sum).
180
181%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
182% create data structure
183%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
184
185%access_nr(_, [], []).
186%access_nr(1, [H|_], H).
187%access_nr(N, [_|T], El) :-
188%    M is N - 1,
189%    access_nr(M, T, El).
190
191access_nr(N, [H|T], El) :-
192    ( N = 1 ->
193        El = H;
194      N < 1 ->
195        El = [];
196      M is N - 1,
197      access_nr(M, T, El)
198    ).
199
200transpose_matrix(M, CoreList) :-
201%    nr_running_cores(NrRunningCores),
202    nr_running_tasks(NrRunningTasks),
203    core_list_hw_id(CoreIDList),
204    length(CoreIDList, NrRunningCores),
205    ( for(I, 1, NrRunningCores),
206      foreach(core(I, _, TaskList), CoreList),
207      param(NrRunningTasks),
208      param(M)
209      do
210      ( for(J,1,NrRunningTasks),
211        foreach(task(J, TaskNama, TaskVar), TaskList),
212        param(M),
213        param(I)
214        do
215          access_nr(J, M, TEl),
216          task(_, TaskNama, TCoreList) = TEl,
217          access_nr(I, TCoreList, CEl),
218          core(_, _, TaskVar) = CEl
219      )
220    ).
221
222thread_alloc_create_datastructure(TaskList, CoreList) :-
223%    nr_running_cores(NrRunningCores),
224    nr_running_tasks(NrRunningTasks),
225    task_list(TLNames),
226    core_list_hw_id(CoreIDList),
227    length(CoreIDList, NrRunningCores),
228    ( for(I, 1, NrRunningTasks),
229      foreach(task(_,TaskNama), TLNames),
230      foreach(task(I, TaskNama, CoreList), TaskList),
231      param(NrRunningCores),
232      param(CoreIDList)
233      do
234      ( for(J, 1, NrRunningCores),
235        foreach(HWID, CoreIDList),
236        foreach(core(J, HWID, Core), CoreList)
237        do
238        Core::[0..1]
239      )
240    ),
241    transpose_matrix(TaskList, CoreList).
242%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
243
244
245%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
246% instantiate a variable
247%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
248
249globalthreadalloc_labelall(TaskList) :-
250    ( foreach(Task, TaskList),
251      foreach(VarList, VarLists)
252      do
253        task(_,_, TaskVarList) = Task,
254        maplist(corelistvariable, TaskVarList, VarList)
255    ),
256    flatten(VarLists, VarListsF),
257    labeling(VarListsF).
258%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
259
260
261%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
262% constraints
263%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
264
265tasklistvariable(task(_, _, Var), Var).
266
267nr_tasks_per_core(MaxNr, CoreList) :-
268    ( foreach(Core, CoreList),
269      param(MaxNr)
270      do
271        core(_, _,CoreVarList) = Core,
272        maplist(tasklistvariable, CoreVarList, CoreVars),
273        ic_global:sumlist(CoreVars, CoreSum),
274        CoreSum $=< MaxNr
275    ).
276
277corelistvariable(core(_, _, Var), Var).
278
279
280% explicit min and max nr threads per task
281nr_threads_per_task(MinNr, MaxNr, TaskList) :-
282    ( foreach(Task, TaskList),
283      param(MinNr),
284      param(MaxNr)
285      do
286        task(_, _, TaskVarList) = Task,
287        maplist(corelistvariable, TaskVarList, TaskVars),
288        ic_global:sumlist(TaskVars, TaskSum),
289        TaskSum $>= MinNr,
290        TaskSum $=< MaxNr
291    ).
292
293
294% max nr threads set according to stored data:
295% task_max_parallel(taskname, maxnr threads).
296
297nr_threads_per_task(TaskList) :-
298    ( foreach(Task, TaskList)
299      do
300        task(_, TaskName, TaskVarList) = Task,
301        taskname(TN,TClID) = TaskName,
302        task_get_max_parallel(TN, TClID, MaxNr),
303        maplist(corelistvariable, TaskVarList, TaskVars),
304        ic_global:sumlist(TaskVars, TaskSum),
305        TaskSum $=< MaxNr
306    ).
307
308
309% this is the main function handling the per task NUMA properties (no global
310% NUMA properties are handled here).
311% Actually we cannot specify that one task should evenly spread its threads
312% over all NUMA regions and at the same time specify globally that one NUMA
313% region can only have one task on it. This would violate that a massively
314% parallel task evenly distributes its threads on _all_ NUMA regions. Therefore
315% specifying both at the same time is contradicting and would not work.
316% If at ll, we would need to make sure that one task only gets a subset of
317% all available NUMA regions and therefore only a subset of all cores already
318% from the beginning when assigning cores to each task.
319
320numa_properties_per_task(TaskList) :-
321    affinity_domain_list(L),
322    ( foreach(Task, TaskList),
323      param(L)
324      do
325        task(_, TaskName, TaskVarList) = Task,
326        ( foreach(D,L),
327          foreach(CList, CLists),
328          param(TaskVarList)
329          do
330%            findall(core(_,corename(CoreNr),_),cpu_affinity(CoreNr,_,D),SubtractList),
331            findall(core(_,CoreNr,_),cpu_affinity(CoreNr,_,D),SubtractList),
332            subtract(TaskVarList,SubtractList,TmpList),
333            subtract(TaskVarList,TmpList,CList)
334        ),
335        apply_numa_properties(TaskName, CLists)
336    ).
337
338
339% this function decides which NUMA policy applies to which task
340
341apply_numa_properties(taskname(FunctionAddress, ClID), NUMACoreList) :-
342    ( is_predicate(task_numa/3),task_numa(FunctionAddress, ClID, memory) ->
343        apply_numa_properties_memory(NUMACoreList)
344      ;
345       is_predicate(task_numa/3),task_numa(FunctionAddress, ClID, compute) ->
346        apply_numa_properties_compute(NUMACoreList)
347      ;
348       is_predicate(task_working_set_size/3), task_working_set_size(FunctionAddress, ClID, WorkingSetSize) ->
349        apply_numa_properties_max_numa_size(NUMACoreList, WorkingSetSize)
350      ;
351      true
352    ).
353
354
355% the concrete NUMA policy implementations
356
357apply_numa_properties_compute(NUMACoreList) :-
358    ( foreach(NUMARegion, NUMACoreList),
359      foreach(B, NUMASums)
360      do
361        maplist(corelistvariable, NUMARegion, NUMARegionVars),
362        ic_global:sumlist(NUMARegionVars, Sum),
363        length(NUMARegionVars, Len),
364        and(Sum $\= 0, Sum $\= Len, B)
365    ),
366    ic_global:sumlist(NUMASums, Sum),
367    Sum $=< 1.
368
369apply_numa_properties_memory(NUMACoreList) :-
370    ( foreach(NUMARegion, NUMACoreList),
371      foreach(Sum, NUMASums)
372      do
373        maplist(corelistvariable, NUMARegion, NUMARegionVars),
374        ic_global:sumlist(NUMARegionVars, Sum)
375    ),
376    ic:minlist(NUMASums, Min),
377    ic:maxlist(NUMASums, Max),
378    Diff $= Max - Min,
379    Diff $=< 1.
380
381apply_numa_properties_max_numa_size(NUMACoreList, WorkingSetSize) :-
382    ( foreach(NUMARegion, NUMACoreList),
383      foreach(Sum, NUMASums)
384      do
385        maplist(corelistvariable, NUMARegion, NUMARegionVars),
386        ic_global:sumlist(NUMARegionVars, Sum)
387    ),
388    findall(A, memory_affinity(_, _, A), AL),
389    sort(AL, ALS),
390    ( foreach(Af, ALS),
391      foreach(Sz, NodeSizes)
392      do
393        findall(Size, memory_affinity(_ , Size, Af), Sizes),
394        sum(Sizes, Sz)
395    ),
396    ( foreach(N, NodeSizes),
397      foreach(NSum, NUMASums),
398      param(WorkingSetSize)
399      do
400        AllocSize $= NSum * WorkingSetSize,
401        AllocSize $=< N
402    ).
403
404
405
406%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
407
408adjust_ax_parallel_threads([], _, _).
409adjust_ax_parallel_threads([H|TaskList], NrRunningCores, Sum):-
410    task(_, TaskName, TaskVarList) = H,
411    taskname(TN, TClID) = TaskName,
412    task_get_max_parallel(TN, TClID, MaxNr),
413    EffektivNumberFloat is (NrRunningCores / Sum * MaxNr),
414    floor(EffektivNumberFloat, EffektivNumberFloor),
415    integer(EffektivNumberFloor, EffektivNumberInt),
416    ( EffektivNumberInt =:= 0 ->
417        EffektivNumber is 1
418        ;
419        EffektivNumber is EffektivNumberInt
420    ),
421    min(EffektivNumber, MaxNr, DefNr),
422    maplist(corelistvariable, TaskVarList, TaskVars),
423    ic_global:sumlist(TaskVars, TaskSum),
424    TaskSum $= DefNr,
425    NewNumberRunningCores is NrRunningCores - EffektivNumber,
426    NewSum is Sum - MaxNr,
427    adjust_ax_parallel_threads(TaskList, NewNumberRunningCores, NewSum).
428
429
430%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
431% main internal goal: create data structure, apply constraints, instantiate
432%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
433
434haupt(TaskList) :-
435    thread_alloc_create_datastructure(TaskList, CoreList),
436    nr_tasks_per_core(1, CoreList),
437%    nr_running_cores(NrRunningCores),
438    core_list_hw_id(CoreIDList),
439    length(CoreIDList, NrRunningCores),
440%    nr_threads_per_task(1,NrRunningCores, TaskList),
441%    nr_threads_per_task(TaskList),
442    sum_max_parallel_threads(Sum),
443    adjust_ax_parallel_threads(TaskList, NrRunningCores, Sum),
444    numa_properties_per_task(TaskList),
445    globalthreadalloc_labelall(TaskList).
446%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
447
448
449
450%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
451% transforming solution to get a nice output
452%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
453
454% get all the allocated cores (the ones which have a "1")
455% this is a kind of filter function which creates a list of allocated cores
456% per task
457
458allocated_cores([], []).
459allocated_cores([H|CoreList], AllocatedCoresList) :-
460    core(_, _, 0) = H,
461    allocated_cores(CoreList, AllocatedCoresList).
462allocated_cores([H|CoreList], [HWID|AllocatedCoresList]) :-
463    core(_, HWID, 1) = H,
464    allocated_cores(CoreList, AllocatedCoresList).
465
466
467
468% get all the allocated cores for each task
469
470allocation_output(TaskList, AllocatedCoresList) :-
471    ( foreach(Task, TaskList),
472      foreach(task(TaskName,AllocatedCores), AllocatedCoresList)
473      do
474        task(_, TaskName, CoreList) = Task,
475        allocated_cores(CoreList, AllocatedCores)
476    ).
477
478
479% Compute the allocated core difference per task. This difference defines
480% which cores have to be added and which ones have to be removed according
481% to the last allocation. Every element contains the task as well as the
482% core name and an operation (1 = add this core to the task, 0 = remove this
483% core from this task).
484
485allocation_difference(AllocatedCores, OpsisF) :-
486    ( foreach(Task, AllocatedCores),
487      foreach(Ops, Opsis)
488      do
489        task(taskname(Name,TClID), NewCoreList) = Task,
490        ( is_predicate(task_currently_allocated/3),task_currently_allocated(Name, TClID, CurrentCoreList) ->
491            true
492            ;
493            CurrentCoreList = []
494        ),
495        ( is_predicate(task_currently_allocated/3),retract(task_currently_allocated(Name, TClID, CurrentCoreList)) ->
496            true;
497            true
498        ),
499        assert(task_currently_allocated(Name, TClID, NewCoreList)),
500        subtract(CurrentCoreList, NewCoreList, RemoveCores),
501        subtract(NewCoreList, CurrentCoreList, AddCores),
502        ( foreach(AddCore, AddCores),
503          foreach(El, OperationsList),
504          param(Name),
505          param(TClID)
506          do
507%% XXX: WARNING: there is a mess between hardware corenames and core numbers as used in the OS
508%%               this only works on Linux where we only see core numbers allocated by the kernel
509%%               and no apic IDs
510%            corename(CN) = AddCore,
511            CN = AddCore,
512            get_numa_domain_for_core(CN, NUMADomain, Low, High, Sz),
513%            El = task_thread(Name, TClID, corename(AddCore), 1, numa(NUMADomain))
514            El = task_thread(Name, TClID, AddCore, 1, numa(NUMADomain, Low, High, Sz))
515%            El = task_thread(Name, TClID, corename(AddCore), 1)
516        ),
517        ( foreach(RemoveCore, RemoveCores),
518          foreach(El2, OperationsList2),
519          param(Name),
520          param(TClID)
521          do
522%% XXX: WARNING: there is a mess between hardware corenames and core numbers as used in the OS
523%%               this only works on Linux where we only see core numbers allocated by the kernel
524%%               and no apic IDs
525%            corename(CN2) = RemoveCore,
526            CN2 = RemoveCore,
527            get_numa_domain_for_core(CN2, NUMADomain, Low, High, Sz),
528%            El2 = task_thread(Name, TClID, corename(RemoveCore), 0, numa(NUMADomain))
529            El2 = task_thread(Name, TClID, RemoveCore, 0, numa(NUMADomain, Low, High, Sz))
530%            El2 = task_thread(Name, TClID, corename(RemoveCore), 0)
531        ),
532        append(OperationsList, OperationsList2, Ops)
533    ),
534    flatten(Opsis, OpsisF).
535%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
536
537
538
539%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
540% this is the main goal called regularly by the resource manager
541%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
542
543task_allocation_plan(P) :-
544    haupt(Plan),
545    !,
546    allocation_output(Plan, AllocatedCores),
547    allocation_difference(AllocatedCores, P).
548
549%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
550
551
552%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
553%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
554task_add_statistics(FunctionAddress, ClientSocket, tsc(TSCTicks), nrcores(NrC), startaddr(Start), endaddr(End)) :-
555    assert(task_statistics(FunctionAddress, ClientSocket, tsc(TSCTicks), nrcores(NrC), startaddr(Start), endaddr(End))).
556%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
557
558%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
559%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
560task_config(FunctionAddress, ClID, Start, End, running):-
561    ( is_predicate(task_status/5),task_status(FunctionAddress, ClID, _, _, _) ->
562        retract(task_status(FunctionAddress, ClID, _, _, _))
563        ;
564        true
565    ),
566    assert(task_status(FunctionAddress, ClID, Start, End, running)).
567
568task_config(FunctionAddress, ClID, Start, End, stopped):-
569    ( is_predicate(task_status/5),task_status(FunctionAddress, ClID, _, _, _) ->
570        retract(task_status(FunctionAddress, ClID, _, _, _))
571        ;
572        true
573    ),
574    ( is_predicate(task_currently_allocated/3) ->
575        retractall(task_currently_allocated(FunctionAddress, ClID, _))
576        ;
577        true
578    ),
579    assert(task_status(FunctionAddress, ClID, Start, End, stopped)).
580
581
582%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
583% Limit the Parellelism that this task can get
584%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
585task_set_max_parallel(FunctionAddress, ClID, MaxNr) :-
586    ( is_predicate(task_max_parallel/3) ->
587        retractall(task_max_parallel(FunctionAddress, ClID,_))
588        ;
589        true
590    ),
591    assert(task_max_parallel(FunctionAddress, ClID, MaxNr)).
592%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
593
594
595%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
596% set the working set size per core
597% This helps deciding whether the sum of the working set of all cores
598% belonging to the same NUMA node does not exceed the size of the NUMA
599% node
600%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
601task_set_working_set_size(FunctionAddress, ClID, WorkingSetSize) :-
602    ( is_predicate(task_working_set_size/3) ->
603        retractall(task_working_set_size(FunctionAddress, ClID, _))
604        ;
605        true
606    ),
607    assert(task_working_set_size(FunctionAddress, ClID, WorkingSetSize)).
608%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
609
610
611
612%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
613% Register a function. Called by the resource manager
614%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
615task_register_function(FunctionAddress, ClID, MinBlockSize):-
616    ( is_predicate(task/3) ->
617        retractall(task(FunctionAddress, ClID, _))
618        ;
619        true
620    ),
621    assert(task(FunctionAddress, ClID, minblocksize(MinBlockSize))).
622
623%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
624
625
626%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
627task_running_status(FunctionAddress, ClID, Running) :-
628    ( is_predicate(task_status/5),task_status(FunctionAddress, ClID, _, _, Running) ->
629        true
630        ;
631        Running = nonexistent
632    ).
633
634%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
635%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
636task_remove_client(ClID):-
637    ( is_predicate(task/3),task(_, ClID, _) ->
638        retract(task(_, ClID, _))
639        ;
640        true
641    ),
642    ( is_predicate(task_max_parallel/3),task_max_parallel(_, ClID, _) ->
643      retract(task_max_parallel(_, ClID, _))
644      ;
645      true
646    ),
647    ( is_predicate(task_status/5),task_status(_, ClID, _, _, _) ->
648        retract(task_status(_, ClID, _, _, _))
649        ;
650        true
651    ),
652    ( is_predicate(task_currently_allocated/3),task_currently_allocated(_, ClID, _) ->
653        retract(task_currently_allocated(_, ClID, _))
654        ;
655        true
656    ),
657    ( is_predicate(task_statistics/6),task_statistics(_, ClID, _, _, _, _) ->
658        retract(task_statistics(_, ClID, _, _, _, _))
659        ;
660        true
661    ),
662    ( is_predicate(task_working_set_size/3), task_working_set_size(_, ClID, _) ->
663        retractall(task_working_set_size(_ , ClID, _))
664        ;
665        true
666    ).
667%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
668
669
670