1%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2% Copyright (c) 2009, 2011, ETH Zurich. 3% All rights reserved. 4% 5% This file is distributed under the terms in the attached LICENSE file. 6% If you do not find this file, copies can be found by writing to: 7% ETH Zurich D-INFK, Universitaetstrasse 6, CH-8092 Zurich. Attn: Systems Group. 8%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 9 10:-lib(ic). 11:-lib(ic_global). 12:-use_module(library(ic_edge_finder)). 13:-lib(branch_and_bound). 14 15% :-include("../data/globalthread_testdata.txt"). 16 17:- set_flag(print_depth, 200). 18 19% :-dynamic(currentbar/5). 20 21:-dynamic(task_currently_allocated/3). 22:-dynamic(task_status/5). 23:-dynamic(task_statistics/6). 24:-dynamic(task_max_parallel/3). 25 26 27 28%CLP: Aus Daten und Information Wissen machen -> Siehe Buch Qin 29 30 31%Regla 32% 1. #Threads <= Nr (z.Bsp. minimale Blockgroesse pro Thread) 33% 2. #Threads >= Nr 34% 3. Wenn Blockgroesse < Cachelinegroesse => Threads auf HT gruppieren 35% 4. Wenn Threads auf Puffer operieren ((de-)codieren, maplist,...) 36% dann HTs benutzen 37% 5. Wenn Threads intensive Berechnungen von wenigen Variablen durchfuehren, 38% dann echte Cores benutzen (ev. zweiter HT als Mem-Prefetcher benutzen) 39% 6. Global: Sporadische Threads gruppieren, Threads die lange im System 40% bleiben und lange auf gleichem Puffer arbeiten (Netzwerk, Server) 41% als eigene Gruppe auf #Cores gruppieren 42% 7. Services bauen: OpenSSL-Service, der AES-Ver- und -Entschluesselung 43% anbietet und dadurch lange im System bleibt und weiss, wo wann welche 44% Threads zum Einsatz kommen 45% 8. Allen Services jeweils sagen, dass sie jetzt so und so viele Threads 46% auf diesen und diesen Cores benutzen koennen 47% 9. User-Apps, die schnell kommen und gehen auf Cores gruppieren 48 49%10. Applikation soll definieren, welche Threads miteinander kommunizieren 50% oder synchronisieren. Algo hier soll dann diese Threads moeglichst 51% nahe voneinander plazieren. 52 53%11. Applikation soll definieren, dass sie nur auf Core, der bla unterstuetzt, 54% laufen kann (beispielsweise einer, der FPU hat). Algo soll entsprechend 55% nur diese Cores auswaehlen 56 57%12. Algo soll FPU threads moeglichst auf verschiedene Cores setzen, damit das 58% Betriebssystem keinen FPU-Kontext-Switch machen muss. Dieser wird 59% nur on-demand by FPU-Exception (interrupt) gemacht, falls neuer Thread 60% nach Switch das erstemal FPU-Instruktion ausfuehrt. Wenn man nur einen 61% Thread pro Core mit FPU-Instruktionen laufen laesst, muss nie FPU-Exception 62% behandelt werden. 63 64 65% HW data 66% cpu_affinity(APIC, LocalSApicEid, ProximityDomain). 67 68%cpu_affinity(0, 0, 0). 69%cpu_affinity(1, 0, 0). 70%cpu_affinity(2, 0, 0). 71%cpu_affinity(3, 0, 0). 72%cpu_affinity(4, 0, 0). 73%cpu_affinity(5, 0, 0). 74 75%cpu_affinity(6, 0, 1). 76%cpu_affinity(7, 0, 1). 77%cpu_affinity(8, 0, 1). 78%cpu_affinity(9, 0, 1). 79%cpu_affinity(10, 0, 1). 80%cpu_affinity(11, 0, 1). 81 82%cpu_affinity(12, 0, 2). 83%cpu_affinity(13, 0, 2). 84%cpu_affinity(14, 0, 2). 85%cpu_affinity(15, 0, 2). 86%cpu_affinity(16, 0, 2). 87%cpu_affinity(17, 0, 2). 88 89%cpu_affinity(18, 0, 3). 90%cpu_affinity(19, 0, 3). 91%cpu_affinity(20, 0, 3). 92%cpu_affinity(21, 0, 3). 93%cpu_affinity(22, 0, 3). 94%cpu_affinity(23, 0, 3). 95 96affinity_domain_list(Domains) :- 97 findall(D, cpu_affinity(_,_,D),DomainList), 98 affinity_domain_list(DomainList, Domains). 99affinity_domain_list([H|DomainList], [H|Domains]) :- 100 subtract(DomainList, [H], DomainListNew), 101 affinity_domain_list(DomainListNew, Domains). 102affinity_domain_list([], []). 103 104get_numa_domain_for_core(CoreNumber, NUMADomain, RL, RH, MaxSize) :- 105 ( is_predicate(cpu_affinity/3),cpu_affinity(CoreNumber, _, NUMADomain) -> 106 findall(L, memory_affinity(L, _, NUMADomain), AfL), 107 findall(H, (memory_affinity(L1, S, NUMADomain), H is L1 + S), AfH), 108 findall(Sz, memory_affinity(_, Sz, NUMADomain), Sizes), 109 eclipse_language:min(AfL, RL), 110 eclipse_language:max(AfH, RH), 111 sum(Sizes, MaxSize) 112 ; 113 NUMADomain = 0, 114 RL = 0, 115 RH = 4294967295, 116 MaxSize = 4294967295 117 ). 118 119% task(functionaddress, clientsocket, minblocksize(sz)). 120%task(t1, 1, minblocksize(1)). 121%task(t2, 2, minblocksize(1)). 122%task(t3, 3, minblocksize(1)). 123%task(t4, 4, minblocksize(1)). 124 125% task_max_parallel(t1, Socket, 4). 126%task_max_parallel(t1, 1, 4). 127%task_max_parallel(t2, 2, 2). 128%task_max_parallel(t3, 3, 3). 129%task_max_parallel(t4, 4, 1). 130 131 132% data entries to define whether the task is compute or memory bound 133%task_numa(functionaddress, clientid, memory). 134% -> spread threads on different NUMA domains 135 136%task_numa(functionaddress, clientid, compute). 137% -> fill one NUMA domain (use all the cores on the same domain), before using 138% cores on other domains 139 140 141 142 143task_get_max_parallel(Name, ClientID, MaxNr) :- 144 core_list_id(CoreIDList), 145 length(CoreIDList, NrRunningCores), 146 ( task_max_parallel(Name, ClientID, MaxNr) -> 147 true 148 ; 149 MaxNr is NrRunningCores 150 ). 151 152 153nr_running_tasks(Nr) :- 154 findall(_, (task(Name,ClientID,_),task_status(Name,ClientID,_,_,running)), TaskList), 155 length(TaskList, Nr). 156 157task_list(TaskList) :- 158 findall(taskname(Name,ClientID), ( task(Name,ClientID,_), task_running_status(Name, ClientID, running)), TaskNames), 159 length(TaskNames, Len), 160 ( foreach(N, TaskNames), 161 foreach(Task, TaskList), 162 for(I, 1, Len) 163 do 164 Task = task(I, N) 165 ). 166 167core_list_hw_id(CoreList):- 168 findall(HWID, corename(_,_,apic(HWID)), CoreList). 169 170% use higlevel name, that makes it easier 171core_list_id(CoreList):- 172 findall(corename(ID), corename(ID,_,_), CoreList). 173 174 175get_max_parallel(task(_, N), N). 176 177sum_max_parallel_threads(Sum) :- 178 findall(S,(task(N,ClientID,_),task_get_max_parallel(N,ClientID,S), task_running_status(N, ClientID,running)),L), 179 sum(L, Sum). 180 181%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 182% create data structure 183%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 184 185%access_nr(_, [], []). 186%access_nr(1, [H|_], H). 187%access_nr(N, [_|T], El) :- 188% M is N - 1, 189% access_nr(M, T, El). 190 191access_nr(N, [H|T], El) :- 192 ( N = 1 -> 193 El = H; 194 N < 1 -> 195 El = []; 196 M is N - 1, 197 access_nr(M, T, El) 198 ). 199 200transpose_matrix(M, CoreList) :- 201% nr_running_cores(NrRunningCores), 202 nr_running_tasks(NrRunningTasks), 203 core_list_hw_id(CoreIDList), 204 length(CoreIDList, NrRunningCores), 205 ( for(I, 1, NrRunningCores), 206 foreach(core(I, _, TaskList), CoreList), 207 param(NrRunningTasks), 208 param(M) 209 do 210 ( for(J,1,NrRunningTasks), 211 foreach(task(J, TaskNama, TaskVar), TaskList), 212 param(M), 213 param(I) 214 do 215 access_nr(J, M, TEl), 216 task(_, TaskNama, TCoreList) = TEl, 217 access_nr(I, TCoreList, CEl), 218 core(_, _, TaskVar) = CEl 219 ) 220 ). 221 222thread_alloc_create_datastructure(TaskList, CoreList) :- 223% nr_running_cores(NrRunningCores), 224 nr_running_tasks(NrRunningTasks), 225 task_list(TLNames), 226 core_list_hw_id(CoreIDList), 227 length(CoreIDList, NrRunningCores), 228 ( for(I, 1, NrRunningTasks), 229 foreach(task(_,TaskNama), TLNames), 230 foreach(task(I, TaskNama, CoreList), TaskList), 231 param(NrRunningCores), 232 param(CoreIDList) 233 do 234 ( for(J, 1, NrRunningCores), 235 foreach(HWID, CoreIDList), 236 foreach(core(J, HWID, Core), CoreList) 237 do 238 Core::[0..1] 239 ) 240 ), 241 transpose_matrix(TaskList, CoreList). 242%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 243 244 245%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 246% instantiate a variable 247%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 248 249globalthreadalloc_labelall(TaskList) :- 250 ( foreach(Task, TaskList), 251 foreach(VarList, VarLists) 252 do 253 task(_,_, TaskVarList) = Task, 254 maplist(corelistvariable, TaskVarList, VarList) 255 ), 256 flatten(VarLists, VarListsF), 257 labeling(VarListsF). 258%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 259 260 261%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 262% constraints 263%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 264 265tasklistvariable(task(_, _, Var), Var). 266 267nr_tasks_per_core(MaxNr, CoreList) :- 268 ( foreach(Core, CoreList), 269 param(MaxNr) 270 do 271 core(_, _,CoreVarList) = Core, 272 maplist(tasklistvariable, CoreVarList, CoreVars), 273 ic_global:sumlist(CoreVars, CoreSum), 274 CoreSum $=< MaxNr 275 ). 276 277corelistvariable(core(_, _, Var), Var). 278 279 280% explicit min and max nr threads per task 281nr_threads_per_task(MinNr, MaxNr, TaskList) :- 282 ( foreach(Task, TaskList), 283 param(MinNr), 284 param(MaxNr) 285 do 286 task(_, _, TaskVarList) = Task, 287 maplist(corelistvariable, TaskVarList, TaskVars), 288 ic_global:sumlist(TaskVars, TaskSum), 289 TaskSum $>= MinNr, 290 TaskSum $=< MaxNr 291 ). 292 293 294% max nr threads set according to stored data: 295% task_max_parallel(taskname, maxnr threads). 296 297nr_threads_per_task(TaskList) :- 298 ( foreach(Task, TaskList) 299 do 300 task(_, TaskName, TaskVarList) = Task, 301 taskname(TN,TClID) = TaskName, 302 task_get_max_parallel(TN, TClID, MaxNr), 303 maplist(corelistvariable, TaskVarList, TaskVars), 304 ic_global:sumlist(TaskVars, TaskSum), 305 TaskSum $=< MaxNr 306 ). 307 308 309% this is the main function handling the per task NUMA properties (no global 310% NUMA properties are handled here). 311% Actually we cannot specify that one task should evenly spread its threads 312% over all NUMA regions and at the same time specify globally that one NUMA 313% region can only have one task on it. This would violate that a massively 314% parallel task evenly distributes its threads on _all_ NUMA regions. Therefore 315% specifying both at the same time is contradicting and would not work. 316% If at ll, we would need to make sure that one task only gets a subset of 317% all available NUMA regions and therefore only a subset of all cores already 318% from the beginning when assigning cores to each task. 319 320numa_properties_per_task(TaskList) :- 321 affinity_domain_list(L), 322 ( foreach(Task, TaskList), 323 param(L) 324 do 325 task(_, TaskName, TaskVarList) = Task, 326 ( foreach(D,L), 327 foreach(CList, CLists), 328 param(TaskVarList) 329 do 330% findall(core(_,corename(CoreNr),_),cpu_affinity(CoreNr,_,D),SubtractList), 331 findall(core(_,CoreNr,_),cpu_affinity(CoreNr,_,D),SubtractList), 332 subtract(TaskVarList,SubtractList,TmpList), 333 subtract(TaskVarList,TmpList,CList) 334 ), 335 apply_numa_properties(TaskName, CLists) 336 ). 337 338 339% this function decides which NUMA policy applies to which task 340 341apply_numa_properties(taskname(FunctionAddress, ClID), NUMACoreList) :- 342 ( is_predicate(task_numa/3),task_numa(FunctionAddress, ClID, memory) -> 343 apply_numa_properties_memory(NUMACoreList) 344 ; 345 is_predicate(task_numa/3),task_numa(FunctionAddress, ClID, compute) -> 346 apply_numa_properties_compute(NUMACoreList) 347 ; 348 is_predicate(task_working_set_size/3), task_working_set_size(FunctionAddress, ClID, WorkingSetSize) -> 349 apply_numa_properties_max_numa_size(NUMACoreList, WorkingSetSize) 350 ; 351 true 352 ). 353 354 355% the concrete NUMA policy implementations 356 357apply_numa_properties_compute(NUMACoreList) :- 358 ( foreach(NUMARegion, NUMACoreList), 359 foreach(B, NUMASums) 360 do 361 maplist(corelistvariable, NUMARegion, NUMARegionVars), 362 ic_global:sumlist(NUMARegionVars, Sum), 363 length(NUMARegionVars, Len), 364 and(Sum $\= 0, Sum $\= Len, B) 365 ), 366 ic_global:sumlist(NUMASums, Sum), 367 Sum $=< 1. 368 369apply_numa_properties_memory(NUMACoreList) :- 370 ( foreach(NUMARegion, NUMACoreList), 371 foreach(Sum, NUMASums) 372 do 373 maplist(corelistvariable, NUMARegion, NUMARegionVars), 374 ic_global:sumlist(NUMARegionVars, Sum) 375 ), 376 ic:minlist(NUMASums, Min), 377 ic:maxlist(NUMASums, Max), 378 Diff $= Max - Min, 379 Diff $=< 1. 380 381apply_numa_properties_max_numa_size(NUMACoreList, WorkingSetSize) :- 382 ( foreach(NUMARegion, NUMACoreList), 383 foreach(Sum, NUMASums) 384 do 385 maplist(corelistvariable, NUMARegion, NUMARegionVars), 386 ic_global:sumlist(NUMARegionVars, Sum) 387 ), 388 findall(A, memory_affinity(_, _, A), AL), 389 sort(AL, ALS), 390 ( foreach(Af, ALS), 391 foreach(Sz, NodeSizes) 392 do 393 findall(Size, memory_affinity(_ , Size, Af), Sizes), 394 sum(Sizes, Sz) 395 ), 396 ( foreach(N, NodeSizes), 397 foreach(NSum, NUMASums), 398 param(WorkingSetSize) 399 do 400 AllocSize $= NSum * WorkingSetSize, 401 AllocSize $=< N 402 ). 403 404 405 406%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 407 408adjust_ax_parallel_threads([], _, _). 409adjust_ax_parallel_threads([H|TaskList], NrRunningCores, Sum):- 410 task(_, TaskName, TaskVarList) = H, 411 taskname(TN, TClID) = TaskName, 412 task_get_max_parallel(TN, TClID, MaxNr), 413 EffektivNumberFloat is (NrRunningCores / Sum * MaxNr), 414 floor(EffektivNumberFloat, EffektivNumberFloor), 415 integer(EffektivNumberFloor, EffektivNumberInt), 416 ( EffektivNumberInt =:= 0 -> 417 EffektivNumber is 1 418 ; 419 EffektivNumber is EffektivNumberInt 420 ), 421 min(EffektivNumber, MaxNr, DefNr), 422 maplist(corelistvariable, TaskVarList, TaskVars), 423 ic_global:sumlist(TaskVars, TaskSum), 424 TaskSum $= DefNr, 425 NewNumberRunningCores is NrRunningCores - EffektivNumber, 426 NewSum is Sum - MaxNr, 427 adjust_ax_parallel_threads(TaskList, NewNumberRunningCores, NewSum). 428 429 430%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 431% main internal goal: create data structure, apply constraints, instantiate 432%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 433 434haupt(TaskList) :- 435 thread_alloc_create_datastructure(TaskList, CoreList), 436 nr_tasks_per_core(1, CoreList), 437% nr_running_cores(NrRunningCores), 438 core_list_hw_id(CoreIDList), 439 length(CoreIDList, NrRunningCores), 440% nr_threads_per_task(1,NrRunningCores, TaskList), 441% nr_threads_per_task(TaskList), 442 sum_max_parallel_threads(Sum), 443 adjust_ax_parallel_threads(TaskList, NrRunningCores, Sum), 444 numa_properties_per_task(TaskList), 445 globalthreadalloc_labelall(TaskList). 446%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 447 448 449 450%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 451% transforming solution to get a nice output 452%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 453 454% get all the allocated cores (the ones which have a "1") 455% this is a kind of filter function which creates a list of allocated cores 456% per task 457 458allocated_cores([], []). 459allocated_cores([H|CoreList], AllocatedCoresList) :- 460 core(_, _, 0) = H, 461 allocated_cores(CoreList, AllocatedCoresList). 462allocated_cores([H|CoreList], [HWID|AllocatedCoresList]) :- 463 core(_, HWID, 1) = H, 464 allocated_cores(CoreList, AllocatedCoresList). 465 466 467 468% get all the allocated cores for each task 469 470allocation_output(TaskList, AllocatedCoresList) :- 471 ( foreach(Task, TaskList), 472 foreach(task(TaskName,AllocatedCores), AllocatedCoresList) 473 do 474 task(_, TaskName, CoreList) = Task, 475 allocated_cores(CoreList, AllocatedCores) 476 ). 477 478 479% Compute the allocated core difference per task. This difference defines 480% which cores have to be added and which ones have to be removed according 481% to the last allocation. Every element contains the task as well as the 482% core name and an operation (1 = add this core to the task, 0 = remove this 483% core from this task). 484 485allocation_difference(AllocatedCores, OpsisF) :- 486 ( foreach(Task, AllocatedCores), 487 foreach(Ops, Opsis) 488 do 489 task(taskname(Name,TClID), NewCoreList) = Task, 490 ( is_predicate(task_currently_allocated/3),task_currently_allocated(Name, TClID, CurrentCoreList) -> 491 true 492 ; 493 CurrentCoreList = [] 494 ), 495 ( is_predicate(task_currently_allocated/3),retract(task_currently_allocated(Name, TClID, CurrentCoreList)) -> 496 true; 497 true 498 ), 499 assert(task_currently_allocated(Name, TClID, NewCoreList)), 500 subtract(CurrentCoreList, NewCoreList, RemoveCores), 501 subtract(NewCoreList, CurrentCoreList, AddCores), 502 ( foreach(AddCore, AddCores), 503 foreach(El, OperationsList), 504 param(Name), 505 param(TClID) 506 do 507%% XXX: WARNING: there is a mess between hardware corenames and core numbers as used in the OS 508%% this only works on Linux where we only see core numbers allocated by the kernel 509%% and no apic IDs 510% corename(CN) = AddCore, 511 CN = AddCore, 512 get_numa_domain_for_core(CN, NUMADomain, Low, High, Sz), 513% El = task_thread(Name, TClID, corename(AddCore), 1, numa(NUMADomain)) 514 El = task_thread(Name, TClID, AddCore, 1, numa(NUMADomain, Low, High, Sz)) 515% El = task_thread(Name, TClID, corename(AddCore), 1) 516 ), 517 ( foreach(RemoveCore, RemoveCores), 518 foreach(El2, OperationsList2), 519 param(Name), 520 param(TClID) 521 do 522%% XXX: WARNING: there is a mess between hardware corenames and core numbers as used in the OS 523%% this only works on Linux where we only see core numbers allocated by the kernel 524%% and no apic IDs 525% corename(CN2) = RemoveCore, 526 CN2 = RemoveCore, 527 get_numa_domain_for_core(CN2, NUMADomain, Low, High, Sz), 528% El2 = task_thread(Name, TClID, corename(RemoveCore), 0, numa(NUMADomain)) 529 El2 = task_thread(Name, TClID, RemoveCore, 0, numa(NUMADomain, Low, High, Sz)) 530% El2 = task_thread(Name, TClID, corename(RemoveCore), 0) 531 ), 532 append(OperationsList, OperationsList2, Ops) 533 ), 534 flatten(Opsis, OpsisF). 535%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 536 537 538 539%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 540% this is the main goal called regularly by the resource manager 541%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 542 543task_allocation_plan(P) :- 544 haupt(Plan), 545 !, 546 allocation_output(Plan, AllocatedCores), 547 allocation_difference(AllocatedCores, P). 548 549%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 550 551 552%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 553%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 554task_add_statistics(FunctionAddress, ClientSocket, tsc(TSCTicks), nrcores(NrC), startaddr(Start), endaddr(End)) :- 555 assert(task_statistics(FunctionAddress, ClientSocket, tsc(TSCTicks), nrcores(NrC), startaddr(Start), endaddr(End))). 556%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 557 558%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 559%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 560task_config(FunctionAddress, ClID, Start, End, running):- 561 ( is_predicate(task_status/5),task_status(FunctionAddress, ClID, _, _, _) -> 562 retract(task_status(FunctionAddress, ClID, _, _, _)) 563 ; 564 true 565 ), 566 assert(task_status(FunctionAddress, ClID, Start, End, running)). 567 568task_config(FunctionAddress, ClID, Start, End, stopped):- 569 ( is_predicate(task_status/5),task_status(FunctionAddress, ClID, _, _, _) -> 570 retract(task_status(FunctionAddress, ClID, _, _, _)) 571 ; 572 true 573 ), 574 ( is_predicate(task_currently_allocated/3) -> 575 retractall(task_currently_allocated(FunctionAddress, ClID, _)) 576 ; 577 true 578 ), 579 assert(task_status(FunctionAddress, ClID, Start, End, stopped)). 580 581 582%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 583% Limit the Parellelism that this task can get 584%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 585task_set_max_parallel(FunctionAddress, ClID, MaxNr) :- 586 ( is_predicate(task_max_parallel/3) -> 587 retractall(task_max_parallel(FunctionAddress, ClID,_)) 588 ; 589 true 590 ), 591 assert(task_max_parallel(FunctionAddress, ClID, MaxNr)). 592%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 593 594 595%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 596% set the working set size per core 597% This helps deciding whether the sum of the working set of all cores 598% belonging to the same NUMA node does not exceed the size of the NUMA 599% node 600%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 601task_set_working_set_size(FunctionAddress, ClID, WorkingSetSize) :- 602 ( is_predicate(task_working_set_size/3) -> 603 retractall(task_working_set_size(FunctionAddress, ClID, _)) 604 ; 605 true 606 ), 607 assert(task_working_set_size(FunctionAddress, ClID, WorkingSetSize)). 608%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 609 610 611 612%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 613% Register a function. Called by the resource manager 614%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 615task_register_function(FunctionAddress, ClID, MinBlockSize):- 616 ( is_predicate(task/3) -> 617 retractall(task(FunctionAddress, ClID, _)) 618 ; 619 true 620 ), 621 assert(task(FunctionAddress, ClID, minblocksize(MinBlockSize))). 622 623%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 624 625 626%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 627task_running_status(FunctionAddress, ClID, Running) :- 628 ( is_predicate(task_status/5),task_status(FunctionAddress, ClID, _, _, Running) -> 629 true 630 ; 631 Running = nonexistent 632 ). 633 634%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 635%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 636task_remove_client(ClID):- 637 ( is_predicate(task/3),task(_, ClID, _) -> 638 retract(task(_, ClID, _)) 639 ; 640 true 641 ), 642 ( is_predicate(task_max_parallel/3),task_max_parallel(_, ClID, _) -> 643 retract(task_max_parallel(_, ClID, _)) 644 ; 645 true 646 ), 647 ( is_predicate(task_status/5),task_status(_, ClID, _, _, _) -> 648 retract(task_status(_, ClID, _, _, _)) 649 ; 650 true 651 ), 652 ( is_predicate(task_currently_allocated/3),task_currently_allocated(_, ClID, _) -> 653 retract(task_currently_allocated(_, ClID, _)) 654 ; 655 true 656 ), 657 ( is_predicate(task_statistics/6),task_statistics(_, ClID, _, _, _, _) -> 658 retract(task_statistics(_, ClID, _, _, _, _)) 659 ; 660 true 661 ), 662 ( is_predicate(task_working_set_size/3), task_working_set_size(_, ClID, _) -> 663 retractall(task_working_set_size(_ , ClID, _)) 664 ; 665 true 666 ). 667%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 668 669 670