1% load the decoding net
2:- ["decoding_net4_support"].
3:- lib(listut).
4
5state_equal(state(M0,F0,A0),state(M1,F1,A1)) :-
6    perm(M0,M1),
7    perm(F0,F1),
8    perm(A0,A1).
9
10reset_static_state :-
11    retractall(translate(_,_)),
12    retractall(accept(_)),
13    retractall(overlay(_,_)),
14    retractall(node_id_next(_)),
15    retractall(configurable(_,_,_)),
16    retractall(node_id_node_enum(_,_)),
17    retractall(current_state(_)),
18    retract_translate(_,_),
19    retract_accept(_),
20    retract_overlay(_,_),
21    retract_configurable(_,_,_),
22    init_state.
23
24test_translate_region1 :-
25    reset_static_state,
26    assert_translate(region(["IN"], block(0, 1000)), name(["OUT"], 400)),
27    state_empty(S),
28    translate_region(S, region(["IN"], block(0, 1000)), Out),
29    writeln(Out).
30
31test_translate_region2 :-
32    reset_static_state,
33    assert_translate(region(["IN"], block(0, 1000)), name(["OUT"], 400)),
34    state_empty(S),
35    translate_region(S, In, region(["OUT"], block(400, 1400))),
36    writeln(In).
37
38test_translate_region3 :-
39    reset_static_state,
40    assert_translate(region(["IN"], block(0, 1000)), name(["OUT"], 400)),
41    state_empty(S),
42    not(translate_region(S, _, region(["OUT"], block(400, 1500)))).
43
44test_translate_region4 :-
45    reset_static_state,
46    assert_overlay(["IN"], ["OUT"]),
47    state_empty(S),
48    translate_region(S, In, region(["OUT"], block(400, 1500))),
49    writeln(In).
50
51test_decodes_region1 :-
52    reset_static_state,
53    assert_overlay(["IN"], ["NEXT"]),
54    assert_translate(region(["NEXT"], block(0, 1000)), name(["OUT"], 400)),
55    state_empty(S),
56    decodes_region(S, region(["IN"], block(400, 1000)), Out),
57    writeln(Out).
58
59test_decodes_region2 :-
60    reset_static_state,
61    assert_overlay(["IN"], ["NEXT"]),
62    assert_translate(region(["NEXT"], block(0, 1000)), name(["OUT"], 400)),
63    state_empty(S),
64    decodes_region(S, In, region(["OUT"], block(400, 1500))),
65    writeln(In).
66
67test_resolves_region1 :-
68    reset_static_state,
69    assert_overlay(["IN"], ["NEXT"]),
70    assert_translate(region(["NEXT"], block(0, 1000)), name(["OUT"], 400)),
71    assert_accept(region(["OUT"], block(0, 10000))),
72    state_empty(S),
73    resolves_region(S, In, region(["OUT"], block(400, 1500))),
74    writeln(In).
75
76test_resolves_region2 :-
77    reset_static_state,
78    assert_overlay(["IN"], ["NEXT"]),
79    assert_translate(region(["NEXT"], block(0, 1000)), name(["OUT"], 400)),
80    assert_accept(region(["OUT"], block(0, 700))),
81    state_empty(S),
82    not(resolves_region(S, _, region(["OUT"], block(400, 1500)))).
83
84:- export test_flat1/0.
85test_flat1 :-
86    reset_static_state,
87    % This example resembles the xeon phi socket on
88    assert_translate(region(["SOCKET"], block(1000, 2000)), name(["GDDR"], 0)),
89    assert_translate(region(["SOCKET"], block(10000, 11000)), name(["SMPT_IN"], 0)),
90    assert_configurable(["SMPT_IN"],34,["SMPT_OUT"]),
91    assert_overlay(["SMPT_OUT"],["PCIBUS"]),
92    assert_translate(region(["PCIBUS"], block(5000, 6000)), name(["DRAM"], 0)),
93    assert_accept(region(["DRAM"], block(0, 1000))),
94    assert_accept(region(["GDDR"], block(0, 1000))),
95    findall((A,B,C), flat(A,B,C), Li),
96    (foreach((A,B,C), Li) do
97        printf("flat(%p,%p,%p)\n", [A,B,C])
98    ).
99
100:- export dump_flat/0.
101dump_flat :-
102    findall((A,B,C), flat(A,B,C), Li),
103    (foreach((A,B,C), Li) do
104        printf("flat(%p,%p,%p)\n", [A,B,C])
105    ).
106
107:- export test_alloc1/0.
108test_alloc1 :-
109    reset_static_state,
110    % This example resembles the xeon phi socket on
111    Size512M is 512 * 1024 * 1024,
112    Size2M is 2 * 1024 * 1024,
113    assert_accept(region(["DRAM"], block(0, Size))),
114    state_empty(S0),
115    state_add_free(S0, ["DRAM"], [block(0,Size)], S1),
116    Reg1 = region(["DRAM"],_),
117    Reg2 = region(["DRAM"],_),
118    alloc(S1, Size2M, Reg1, S2),
119    printf("Allocated (1): Reg=%p\nNewState=%p\n", [Reg1,S2]),
120    alloc(S2, Size2M, Reg2, S3),
121    printf("Allocated (2): Reg=%p\nNewState=%p\n", [Reg2,S3]).
122
123:- export test_alloc2/0.
124test_alloc2 :-
125    reset_static_state,
126    % This example resembles the xeon phi socket on
127    Size is 512 * 1024 * 1024,
128    Size2M is 2 * 1024 * 1024,
129    assert_translate(region(["SOCKET"], block(0, Size)), name(["GDDR"], 0)),
130    assert_translate(region(["SOCKET"], block(10000, 11000)), name(["SMPT_IN"], 0)),
131    assert_configurable(["SMPT_IN"],34,["SMPT_OUT"]),
132    assert_overlay(["SMPT_OUT"],["PCIBUS"]),
133    assert_translate(region(["PCIBUS"], block(0, Size)), name(["DRAM"], 0)),
134    assert_accept(region(["GDDR"], block(0, Size))),
135    assert_accept(region(["DRAM"], block(0, Size))),
136    state_empty(S0),
137    state_add_free(S0, ["DRAM"], [block(0,Size)], S1),
138    state_add_free(S1, ["GDDR"], [block(0,Size)], S2),
139    state_add_avail(S2, ["SMPT_IN"], 32, S3),
140    Reg1 = region(["GDDR"],_),
141    alloc(S3, Size2M, Reg1, ["SOCKET"], S4),
142    printf("Allocated (reachable from Socket): Reg=%p\nNewState=%p\n", [Reg1,S4]),
143    Reg2 = region(["DRAM"],_),
144    alloc(S4, Size2M, Reg2, ["SOCKET"], S5),
145    printf("Allocated (reachable from Socket): Reg=%p\nNewState=%p\n", [Reg2,S5]),
146    Reg3 = region(["DRAM"],_),
147    alloc(S5, Size2M, Reg3, ["SOCKET"], ["PCIBUS"], S6),
148    printf("Allocated (reachable from Socket and Pcibus): Reg=%p\nNewState=%p\n", [Reg3,S6]).
149
150test_map1 :-
151    % Case without a node reconfiguration necessary
152    reset_static_state,
153    Size is 512 * 1024 * 1024,
154    Size2M is 2 * 1024 * 1024,
155    Offset is 16 * 1024 * 1024,
156    OffsetLimit is Offset + Size,
157    assert_translate(region(["SOCKET"], block(Offset, OffsetLimit)), name(["GDDR"], 0)),
158    state_empty(S0),
159    state_add_free(S0, ["SOCKET"], [block(Offset,OffsetLimit)], S1),
160
161    assert_translate(region(["SOCKET"], block(10000, 11000)), name(["SMPT_IN"], 0)),
162    assert_overlay(["SMPT_OUT"],["PCIBUS"]),
163    assert_translate(region(["PCIBUS"], block(0, Size)), name(["DRAM"], 0)),
164    assert_accept(region(["GDDR"], block(0, Size))),
165    assert_accept(region(["DRAM"], block(0, Size))),
166    assert_conf_node(S1, ["SMPT_IN"],["SMPT_OUT"], 34, 32, S2),
167    state_add_free(S2, ["DRAM"], [block(0,Size)], S3),
168    state_add_free(S3, ["GDDR"], [block(0,Size)], S4),
169    Limit2M is Size2M - 1,
170    SrcRegion = region(["SOCKET"], _),
171    DstRegion = region(["GDDR"], block(0, Limit2M)),
172    map(S4, SrcRegion, DstRegion, S5),
173    printf("Src=%p --> Dst=%p with S1=%p\n", [SrcRegion, DstRegion, S5]).
174
175test_map1_nochoice :-
176    findall(dummy, test_map1, Li),
177    length(Li, 1).
178
179
180
181test_map2 :-
182    % Case with a node configuration necessary, the translated node fits in
183    % the size of the remapped nodes.
184    reset_static_state,
185    Size is 512 * 1024 * 1024,
186    Size2M is 2 * 1024 * 1024,
187    Offset is 16 * 1024 * 1024,
188    OffsetLimit is Offset + Size,
189    %assert_translate(region(["SOCKET"], block(Offset, OffsetLimit)), name(["GDDR"], 0)),
190    state_empty(S0),
191    state_add_free(S0, ["SOCKET"], [block(Offset,OffsetLimit)], S1),
192    assert_translate(region(["SOCKET"], block(Offset, OffsetLimit)), name(["SMPT_IN"], 0)),
193    assert_overlay(["SMPT_OUT"],["PCIBUS"]),
194    assert_translate(region(["PCIBUS"], block(0, Size)), name(["DRAM"], 0)),
195    assert_accept(region(["GDDR"], block(0, Size))),
196    assert_accept(region(["DRAM"], block(0, Size))),
197    assert_conf_node(S1, ["SMPT_IN"],["SMPT_OUT"], 34, 32, S2),
198    state_add_free(S2, ["DRAM"], [block(0,Size)], S3),
199    state_add_free(S3, ["GDDR"], [block(0,Size)], S4),
200
201    Limit2M is Size2M - 1,
202    SrcRegion = region(["SOCKET"], _),
203    DstRegion = region(["DRAM"], block(0, Limit2M)),
204    map(S4, SrcRegion, DstRegion, S5),
205    printf("Src=%p --> Dst=%p with NewS=%p\n", [SrcRegion, DstRegion, S5]).
206
207test_map3 :-
208    % Case with a node configuration necessary, the translated node spans
209    % multiple remapped nodes.
210    reset_static_state,
211    Size is 512 * 1024 * 1024,
212    Size2M is 2 * 1024 * 1024,
213    Offset is 16 * 1024 * 1024,
214    OffsetLimit is Offset + Size,
215    state_empty(S0),
216    assert_accept_node(S0, region(["DRAM"], block(0, Size)), S1),
217    assert_vspace_node(S1, region(["IN"], block(0,Size)), name(["MMU"], 0), S2),
218    assert_conf_node(S2, ["MMU"],["DRAM"], 21, 1024, S3),
219
220    Limit8M is 8 * 1024 * 1024,
221    SrcRegion = region(["IN"], _),
222    DstRegion = region(["DRAM"], block(0, Limit8M)),
223    findall((A,B,C), flat(A,B,C), Li),
224    (foreach((A,B,C), Li) do
225        printf("flat(%p,%p,%p)\n", [A,B,C])
226    ),
227    map(S3, SrcRegion, DstRegion, S4),
228    printf("Src=%p --> Dst=%p with NewS=%p\n", [SrcRegion, DstRegion, S4]).
229
230test_map4 :-
231    reset_static_state,
232    % Case with a node configuration necessary that passes two configurable
233    % nodes.
234    Size is 512 * 1024 * 1024,
235    Size2M is 2 * 1024 * 1024,
236    Offset is 16 * 1024 * 1024,
237    OffsetLimit is Offset + Size,
238    state_empty(S0),
239    assert_accept_node(S0, region(["DRAM"], block(0, Size)),S1),
240    assert_conf_node(S1, ["SMPT_IN"],["IOMMU_IN"], 34, 32, S2),
241    assert_conf_node(S2, ["IOMMU_IN"],["DRAM"], 21, 1024, S3),
242    assert_vspace_node(S3, region(["IN"], block(0,Size)), name(["SMPT_IN"], 0), S4),
243
244    Limit8M is 8 * 1024 * 1024 - 1,
245    SrcRegion = region(["IN"], _),
246    DstRegion = region(["DRAM"], block(0, Limit8M)),
247    findall((A,B,C), flat(A,B,C), Li),
248    map(S4, SrcRegion, DstRegion, S5),
249    printf("Src=%p --> Dst=%p with NewS=%p\n", [SrcRegion, DstRegion, S4]),
250    write_conf_update(S4,S5).
251
252test_map_wrap :-
253    reset_static_state,
254    assert_accept(region(["DRAM"], block(0, Size))),
255    state_empty(S0),
256    Limit is 512 * 1024 *1024 *1024,
257    assert_vspace_node(S0, region(["IN"],block(0,Limit)), name(["SMPT_IN"], 0), S1),
258    assert_conf_node(S1, ["SMPT_IN"],["IOMMU_IN"], 34, 32, S2),
259    assert_conf_node(S2, ["IOMMU_IN"],["DRAM"], 21, 1024, S3),
260
261    Size2M is 2 * 1024 * 1024,
262    node_enum(["DRAM"], DramEnum),
263    node_enum(["IN"], SrcEnum),
264    map_wrap(S3, Size2M, 21, DramEnum, 0, [SrcEnum], S4).
265
266test_alloc_wrap :-
267    reset_static_state,
268    assert_accept(region(["DRAM"], block(0, Size))),
269    state_empty(S0),
270    state_add_free(S0, ["DRAM"], [block(0,Size)], S1),
271    Size2M is 2 * 1024 * 1024,
272    node_enum(["DRAM"], DramEnum),
273    alloc_wrap(S1, Size2M, 21, DramEnum, [], S2).
274
275test_add_process :-
276    reset_static_state,
277    init(S0),
278    add_process(S0, E1, S1),
279    printf("ProcEnum=%p, State=%p\n", [E1, S1]),
280    node_enum(["DRAM"], DramEnum),
281    Size2M is 2 * 1024 * 1024,
282    alloc_wrap(S1, Size2M, 21, DramEnum, [E1], S2).
283
284test_add_xeon_phi :-
285    reset_static_state,
286    state_empty(S0),
287    add_xeon_phi(S0, addr(10,0,0), E1, S1),
288    printf("XPhiEnum=%p, State=%p\n", [E1]),
289    add_process(S1, E1, S2),
290    printf("ProcEnum=%p, State=%p\n", [E1]),
291    xeon_phi_meta(S2, E1, KNC_SOCKET_E, _, _, _, GDDR_E),
292    Size = 2 * 1024 * 1024,
293    printf("before allocation"),
294    alloc_wrap(S2, Size, _, GDDR_E, [KNC_SOCKET_E]).
295
296test_add_vm :-
297    reset_static_state,
298    state_get(S),
299    node_enum(["DRAM"], DramEnum),
300    Limit2G is 2 * 1024 * 1024 * 1024 - 1,
301    Blk = block(0, Limit2G),
302    assert_accept(region(["DRAM"], Blk)),
303    state_add_free(S, ["DRAM"], [Blk], S1),
304    Limit1G is 1024 * 1024 * 1024 - 1,
305    DramBase = 1024 * 1024 * 1024,
306    DramLimit = 1024 * 1024 * 1024 - 1,
307    add_vm_overlay(S1, VmEnum, 0, Limt1G, DramEnum, DramBase, NewS),
308    printf("VmEnum %p\n", VmEnum).
309
310
311% Make sure this does not create backtracking behaviour.
312test_freelist_nochoice :-
313    findall(X,free_list_insert([block(0,19),block(50,100)], block(20,49), X),Li),
314    length(Li, 1).
315
316% Functional correctnes
317test_freelist2 :-
318    free_list_insert([block(0,10)], block(11,20), [block(0,20)]),
319    free_list_insert([block(0,10),block(50,100)], block(20,30), [block(0,10),block(20,30), block(50,100)]),
320    free_list_insert([block(0,19),block(50,100)], block(20,49), [block(0,100)]),
321    free_list_insert([block(50,100)], block(200,210), [block(50,100), block(200,210)]),
322    free_list_insert([block(50,100)], block(0,10), [block(0,10), block(50,100)]),
323    free_list_insert([block(50,100)], block(40,49), [block(40,100)]),
324    findall(R, free_list_allocated([block(0,10),block(20,30), block(50,100)], 0, 100, R), RLi),
325    printf("Free list allocated %p\n", [RLi]).
326
327
328test_alloc_free :-
329    reset_static_state,
330    state_get(S),
331    node_enum(["DRAM"], DramEnum),
332    Limit2G is 2 * 1024 * 1024 * 1024 - 1,
333    Blk = block(0, Limit2G),
334    assert_accept(region(["DRAM"], Blk)),
335    state_add_free(S, ["DRAM"], [Blk], S1),
336    Reg1 = region(["DRAM"],  _),
337    Reg2 = region(["DRAM"],  _),
338    Reg3 = region(["DRAM"],  _),
339    Reg4 = region(["DRAM"],  _),
340    Reg5 = region(["DRAM"],  _),
341    Reg6 = region(["DRAM"],  _),
342    Size1M is 1024 * 1024,
343    Size2M is 2 * 1024 * 1024,
344    alloc(S1, Size1M, Reg1, S2),
345    printf("Reg1 = %p\n", [Reg1]),
346    alloc(S2, Size1M, Reg2, S3),
347    printf("Reg2 = %p\n", [Reg2]),
348    alloc(S3, Size1M, Reg3, S4),
349    printf("Reg3 = %p\n", [Reg3]),
350    alloc(S4, Size1M, Reg4, S5),
351    printf("Reg4 = %p\n", [Reg4]),
352    alloc(S5, Size1M, Reg5, S6),
353    printf("Reg5 = %p\n", [Reg5]),
354    free(S6, Reg2, S7),
355    free(S7, Reg4, S8),
356    alloc(S8, Size2M, Reg6, S9),
357    printf("Reg6 = %p\n", [Reg6]),
358    state_has_free(S9, ["DRAM"], FreeBlks),
359    findall(R, free_list_allocated(FreeBlks, 0, Limit2G, R), RLi),
360    printf("Free list allocated %p\n", [RLi]).
361
362test_alloc_specific :-
363    reset_static_state,
364    state_get(S),
365    node_enum(["DRAM"], DramEnum),
366    Limit2G is 2 * 1024 * 1024 * 1024 - 1,
367    Blk = block(0, Limit2G),
368    assert_accept(region(["DRAM"], Blk)),
369    state_add_free(S, ["DRAM"], [Blk], S1),
370
371    Base is 10 * 1024 * 1024,
372    Limit is 12 * 1024 * 1024 - 1,
373    Reg1 = region(["DRAM"],  block(Base, Limit)),
374    region_size(Reg1, Reg1Size),
375    alloc(S1, Reg1Size, Reg1, S2),
376    printf("alloc specific Reg1=%p, S2=%p\n", [Reg1, S2]).
377
378test_unmap_setup(NewS) :-
379    reset_static_state,
380    Size is 512 * 1024 * 1024,
381    Size2M is 2 * 1024 * 1024,
382    Offset is 16 * 1024 * 1024,
383    OffsetLimit is Offset + Size,
384    assert_accept(region(["DRAM"], block(0, Size))),
385    state_empty(S0),
386    assert_conf_node(S0, ["MMU0"],["DRAM"], 21, 1024, S1),
387    assert_conf_node(S1, ["MMU1"],["DRAM"], 21, 1024, S2),
388    state_add_free(S2, ["DRAM"], [block(0,Size)], S3),
389
390    state_add_free(S3, ["PROC0"], [block(0,Size)], S4),
391    state_add_free(S4, ["PROC1"], [block(0,Size)], S5),
392    assert_translate(S5, region(["PROC0"], block(0, Size)), name(["MMU0"], 0), S6),
393    assert_translate(S6, region(["PROC1"], block(0, Size)), name(["MMU1"], 0), NewS).
394
395% one map and followed by one unmap keeps the state intact
396test_unmap1 :-
397    test_unmap_setup(S4),
398    Size8M is 8 * 1024 * 1024,
399    SrcRegion = region(["PROC0"], _),
400    DstRegion = region(["DRAM"], _),
401    alloc(S4, Size8M, DstRegion, S5),
402    %printf("Allocated Src=%p\n", [DstRegion]),
403    map(S5, SrcRegion, DstRegion, S6),
404    %printf("Mapped Src=%p --> Dst=%p with NewS=%p\n", [SrcRegion, DstRegion, S6]),
405    installed_vspace_map(S6, SrcRegion),
406    unmap(S6, [SrcRegion], DstRegion, S7),
407    %printf("Unmapped Src, NewS=%p\n", [S7]),
408    state_equal(S5, S7).
409
410test_unmap1_nochoice :-
411    test_unmap_setup(S4),
412    Size8M is 8 * 1024 * 1024,
413    SrcRegion = region(["PROC0"], _),
414    DstRegion = region(["DRAM"], _),
415    alloc(S4, Size8M, DstRegion, S5),
416    %printf("Allocated Src=%p\n", [DstRegion]),
417    map(S5, SrcRegion, DstRegion, S6),
418    %printf("Mapped Src=%p --> Dst=%p with NewS=%p\n", [SrcRegion, DstRegion, S6]),
419    installed_vspace_map(S6, SrcRegion),
420    findall(S7, unmap(S6, [SrcRegion], DstRegion, S7), Li),
421    length(L1, 1).
422
423%  3 maps into same space followed by one unmap keeps the state intact
424test_unmap2 :-
425    test_unmap_setup(S4),
426    Size8M is 8 * 1024 * 1024,
427    Size6M is 6 * 1024 * 1024,
428    SrcRegion1 = region(["PROC0"], _),
429    SrcRegion2 = region(["PROC0"], _),
430    DstRegion1 = region(["DRAM"], _),
431    DstRegion2 = region(["DRAM"], _),
432    DstRegion3 = region(["DRAM"], _),
433    alloc(S4, Size8M, DstRegion1, S5),
434    alloc(S5, Size8M, DstRegion2, S6),
435    alloc(S6, Size6M, DstRegion3, S7),
436    printf("Dest1=%p,  Dest2=%p, Dest3=%p\n", [DstRegion1, DstRegion2, DstRegion3]),
437    map(S7, SrcRegion1, DstRegion1, S8),
438    map(S8, SrcRegion2, DstRegion2, S9),
439    map(S9, SrcRegion3, DstRegion3, S10),
440    unmap(S10, [SrcRegion1], DstRegion1, S11),
441    unmap(S11, [SrcRegion2], DstRegion2, S12),
442    unmap(S12, [SrcRegion3], DstRegion3, S13),
443    state_equal(S7, S13).
444
445test_unmap3 :-
446    test_unmap_setup(S4),
447    Size8M is 8 * 1024 * 1024,
448    Size6M is 6 * 1024 * 1024,
449    SrcRegion1 = region(["PROC0"], _),
450    SrcRegion2 = region(["PROC1"], _),
451    DstRegion = region(["DRAM"], _),
452    alloc(S4, Size8M, DstRegion, S5),
453    map(S5, SrcRegion1, DstRegion, S6),
454    map(S6, SrcRegion2, DstRegion, S7),
455    unmap(S7, [SrcRegion1,SrcRegion2], DstRegion, S8),
456    printf("S5=%p\nS7=%p\n", [S5,S8]),
457    state_equal(S5, S8).
458
459
460run_test(Test) :-
461    (
462        printf("Calling Test %p...\n", Test),
463        call(Test),
464        writeln(" Succeeds!")
465    ) ; (
466        writeln("#################################################"),
467        writeln(" !!! Fails !!!"),
468        writeln("#################################################")
469    ).
470
471:- export run_all_tests/0.
472run_all_tests :-
473    run_test(test_translate_region1),
474    run_test(test_translate_region2),
475    run_test(test_translate_region3),
476    run_test(test_translate_region4),
477    run_test(test_decodes_region1),
478    run_test(test_decodes_region2),
479    run_test(test_resolves_region1),
480    run_test(test_resolves_region2),
481    run_test(test_flat1),
482    run_test(test_alloc1),
483    run_test(test_alloc2),
484    run_test(test_map1),
485    run_test(test_map2),
486    run_test(test_map3),
487    run_test(test_map4),
488    run_test(test_map_wrap),
489    run_test(test_alloc_wrap),
490    run_test(test_add_process),
491    run_test(test_add_vm),
492    run_test(test_freelist_nochoice),
493    run_test(test_freelist2),
494    run_test(test_alloc_free),
495    run_test(test_unmap1),
496    run_test(test_unmap1_nochoice),
497    run_test(test_unmap2),
498    run_test(test_unmap3).
499
500/*
501 *-----------------
502 * Benchmarks
503 * ----------------
504 */
505
506bench_init(E1, E2, NewS) :-
507    reset_static_state,
508    init_state,
509    init(S0),
510    add_pci(S0, addr(0,0,0), E1, S1),
511    add_pci(S1, addr(0,0,1), E2, NewS).
512
513% Benchmark allocation time with increasing number of nodes.
514bench_nodes_one(NumPci) :-
515    bench_init(E1,E2, S0),
516    (fromto(S0, SIn, SOut, S1), for(I,2,NumPci) do
517        add_pci(SIn, addr(0,0,I),_, SOut)
518    ),
519
520    % setup for alloc
521    Size2M is 2097152,
522    DestReg = region(["DRAM"],_),
523    node_enum(N1, E1),
524    not(N1 = addr(_,_,_)),
525    node_enum(N2, E2),
526    not(N2 = addr(_,_,_)),
527    statistics(hr_time, Start),
528    % and go
529    alloc(S1, Size2M, DestReg, N1, N2, NewS),
530    statistics(hr_time, Stop),
531    Diff is Stop - Start,
532    printf("%p %p\n", [NumPci, Diff]).
533
534bench_nodes(MaxNumPci) :-
535    writeln("===== NODES ALLOC BENCH START ====="),
536    (for(I,2,MaxNumPci) do
537        bench_nodes_one(I), !
538    ).
539
540% Increasing number of allocations on a real system
541bench_real_ram_alloc(NumAllocs) :-
542    node_enum(addr(_,_,_), E1),
543    node_enum(N1, E1),
544    not(N1 = addr(_,_,_)),
545
546    node_enum(addr(_,_,_), E1),
547    node_enum(N2, E2),
548    not(N2 = addr(_,_,_)),
549    not(N2 = N1),
550
551    Dest = ["DRAM"],
552
553    printf("Determined Dest=%p Src=%p,%p for ram alloc bench\n",
554        [Dest, N1, N2]),
555    writeln("===== REAL RAM ALLOC BENCH START ====="),
556    (for(I,0,NumAllocs), param(Dest), param(N1), param(N2) do
557        Size2M is 2097152,
558        DestReg = region(Dest, _),
559
560        state_get(S),
561        statistics(hr_time, Start),
562        alloc(S, Size2M, DestReg, N1, N2, NewS),
563        statistics(hr_time, Stop),
564        state_set(NewS),
565        Diff is Stop - Start,
566        %printf("Allocated %p\n", [DestReg]),
567        printf("%p %p\n", [I, Diff])
568    ).
569
570
571% RUN ALL SYNTHETIC BENCHMARKS. Resets state, breaks BF if run on real system.
572bench_synth :-
573    bench_nodes(100).
574
575% RUN ALL REAL BENCHMARS: Expects KNC system has been instantiated
576% does not reset state.
577bench_real :-
578    bench_real_ram_alloc(1000).
579
580