1% BEGIN LICENSE BLOCK
2% Version: CMPL 1.1
3%
4% The contents of this file are subject to the Cisco-style Mozilla Public
5% License Version 1.1 (the "License"); you may not use this file except
6% in compliance with the License.  You may obtain a copy of the License
7% at www.eclipse-clp.org/license.
8% 
9% Software distributed under the License is distributed on an "AS IS"
10% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See
11% the License for the specific language governing rights and limitations
12% under the License. 
13% 
14% The Original Code is  The ECLiPSe Constraint Logic Programming System. 
15% The Initial Developer of the Original Code is  Cisco Systems, Inc. 
16% Portions created by the Initial Developer are
17% Copyright (C) 2006 Cisco Systems, Inc.  All Rights Reserved.
18% 
19% Contributor(s): Joachim Schimpf.
20% 
21% END LICENSE BLOCK
22% ----------------------------------------------------------------------
23% System:	ECLiPSe Constraint Logic Programming System
24% Component:	ECLiPSe III compiler
25% Version:	$Id: compiler_codegen.ecl,v 1.31 2013/02/09 20:27:57 jschimpf Exp $
26% ----------------------------------------------------------------------
27
28:- module(compiler_codegen).
29
30:- comment(summary, "ECLiPSe III compiler - code generation").
31:- comment(copyright, "Cisco Technology Inc").
32:- comment(author, "Joachim Schimpf").
33:- comment(date, "$Date: 2013/02/09 20:27:57 $").
34
35
36:- lib(hash).
37
38:- use_module(compiler_common).
39
40:- include(compiler_compound).
41
42
43%----------------------------------------------------------------------
44% Chunk data
45% This data structure holds information that evolves along the chunk.
46%----------------------------------------------------------------------
47
48:- local struct(chunk_data(
49	occurred,		% hash table varid->bool (vars already seen in chunk)
50	aux_count,		% number of auxiliary temporaries
51	need_global,		% space needed on global stack at this point
52	allocated,		% environment size at this point (-1 no env)
53	eam			% environment activity map at chunk entry
54    )).
55
56
57init_chunk_data(EAM, ESize, chunk_data{aux_count:0,occurred:Init,eam:EAM,allocated:ESize}) :-
58	hash_create(Init).
59
60start_new_chunk(EAM, ChunkData0, ChunkData) :-
61	update_struct(chunk_data, [aux_count:0,occurred:Init,eam:EAM], ChunkData0, ChunkData),
62	hash_create(Init).
63
64print_chunk_data(_,_).
65
66
67%----------------------------------------------------------------------
68% Register a variable occurrence within a chunk and
69% returns a "variable occurrence descriptor" of the form:
70%
71%	void				void variable
72%	tmp_first			first occurrence of a temporary in its chunk
73%	tmp				repeat occurrence of a temporary in its chunk
74%	perm_first(y(Y))		first occurrence of a perm in its 1st chunk
75%	perm_first_in_chunk(y(Y))	first occurrence of perm in a later chunk
76%	perm(y(Y))			repeat occurrence of perm
77%
78% Special case: head perms that are still waiting to be moved into the environment
79% at the end of the initial chunk (delayed_perm) are classified as tmp.
80
81variable_occurrence(variable{varid:VarId,class:Class}, ChunkData0, ChunkData, Code0, Code, Descriptor) :-
82	ChunkData0 = chunk_data{occurred:OccurredInChunk,eam:EAM},
83	variable_occurrence1(Class, EAM, VarId, OccurredInChunk, Descriptor),
84	( Descriptor = perm_first(y(Y)) ->
85	    env_allocate_if_needed(Y, ChunkData0, ChunkData, Code0, Code)
86	;
87	    ChunkData0 = ChunkData, Code0 = Code
88	).
89
90    variable_occurrence1(void, _EAM, _VarId, _OccurredInChunk, Descriptor) ?-
91    	Descriptor = void.
92    variable_occurrence1(nonvoid(y(Y)), EAM, VarId, OccurredInChunk, Descriptor) ?- !,
93	( hash_get(OccurredInChunk, VarId, Type) ->
94	    ( Type == delayed_perm ->
95		Descriptor = tmp
96	    ;
97		Descriptor = perm(y(Y))
98	    )
99	;
100	    hash_set(OccurredInChunk, VarId, true),
101	    ( 0 is getbit(EAM, Y-1) ->
102		Descriptor = perm_first(y(Y))
103	    ;
104		Descriptor = perm_first_in_chunk(y(Y))
105	    )
106	).
107    variable_occurrence1(nonvoid(_Tmp), _EAM, VarId, OccurredInChunk, Descriptor) ?-
108	( hash_contains(OccurredInChunk, VarId) ->
109	    Descriptor = tmp
110	;
111	    hash_set(OccurredInChunk, VarId, true),
112	    Descriptor = tmp_first
113	).
114
115
116potential_first_temp_occurrence(variable{varid:VarId,class:nonvoid(temp)}, ChunkData) :-
117	ChunkData = chunk_data{occurred:OccurredInChunk},
118	\+ hash_contains(OccurredInChunk, VarId).
119	
120
121new_aux_temp(ChunkData0, ChunkData, aux(AuxCount)) :-
122	AuxCount is ChunkData0[aux_count of chunk_data] + 1,
123	update_struct(chunk_data, [aux_count:AuxCount], ChunkData0, ChunkData).
124
125
126%----------------------------------------------------------------------
127% Code generation 
128%----------------------------------------------------------------------
129
130:- comment(generate_code/5, [
131    summary:"Generate WAM code from normalised source for one predicate",
132    amode:generate_code(+,-,?,+,+),
133    args:[
134	"Body":"Normalised and fully annotated source of the predicate",
135	"Code":"Resulting annotated code",
136	"CodeEnd":"Tail of resulting annotated code",
137	"Options":"Options structure",
138	"ModulePred":"Context module and Name/Arity"
139    ],
140    see_also:[assign_am_registers/3,struct(code)]
141]).
142
143:- export generate_code/5.
144
145generate_code(Clause, Code, AuxCode, Options, ModPred) :-
146	init_chunk_data(0, -1, ChunkData0),
147	Code = [code{instr:label(Start)}|Code1],
148	alloc_check_start(ChunkData0, ChunkData1, Code1, Code2),
149	generate_branch(Clause, [], ChunkData1, _ChunkData, 0, -1, AuxCode, [], Code2, [code{instr:ret}|next([])], Options, ModPred@Start).
150
151
152generate_branch(AllChunks, HeadPerms, ChunkData0, ChunkData, BranchExitInitMap, ExitEnvSize, AuxCode0, AuxCode, Code0, Code, Options, SelfInfo) :-
153	% first chunk in branch
154	generate_chunk(AllChunks, OtherChunks, HeadPerms, ChunkData0, ChunkData2, AuxCode0, AuxCode1, Code0, Code1, Options, SelfInfo),
155	(
156	    fromto(OtherChunks,ThisChunk,NextChunk,[]),
157	    fromto(ChunkData2,ChunkData3,ChunkData6,ChunkData7),
158	    fromto(Code1,next(Code2),Code4,Code5),
159	    fromto(AuxCode1,AuxCode2,AuxCode3,AuxCode),
160	    param(Options,SelfInfo)
161	do
162	    alloc_check_start(ChunkData3, ChunkData5, Code2, Code3),
163	    generate_chunk(ThisChunk, NextChunk, [], ChunkData5, ChunkData6, AuxCode2, AuxCode3, Code3, next(Code4), Options, SelfInfo)
164	),
165	% Make sure all branches have ExitEnvSize allocated (or all deallocated)
166	env_allocate_last_chance(ExitEnvSize, ChunkData7, ChunkData, Code5, Code6),
167	% Generate initialization code for any variables which did not occur
168	% in or before the branch, but have a non-first occurrence after it.
169	emit_initialize(BranchExitInitMap, Code6, next(Code)).
170
171
172:- mode generate_chunk(+,-,+,+,-,?,-,-,?,+,+).
173generate_chunk([], [], HeadPerms, ChunkData0, ChunkData, AuxCode, AuxCode, Code, Code1, _Options, _Module) :-
174	% end of chunk (non-regular end of branch or clause)
175	move_head_perms(HeadPerms, ChunkData0, ChunkData1, Code, Code1),
176	alloc_check_end(ChunkData1),
177	start_new_chunk(0, ChunkData1, ChunkData).
178
179generate_chunk([Goal|Goals], NextChunk, HeadPerms0, ChunkData0, ChunkData, AuxCode, AuxCode0, Code, Code0, Options, SelfInfo) :-
180	( Goal = goal{kind:simple} ->		 % special goals
181	    SelfInfo = Module:_@_,
182	    generate_simple_goal(Goal, ChunkData0, ChunkData1, Code, Code1, Options, Module),
183	    generate_chunk(Goals, NextChunk, HeadPerms0, ChunkData1, ChunkData, AuxCode, AuxCode0, Code1, Code0, Options, SelfInfo)
184
185	; Goal = goal{kind:head,args:Args} ->	% clause-head or pseudo-head
186
187	    verify HeadPerms0 == [],
188	    generate_head_info([](Args), 1, ChunkData0, ChunkData3, HeadPerms3, [], OrigRegDescs),
189	    Code = [code{instr:nop,regs:OrigRegDescs}|Code1],
190	    generate_chunk(Goals, NextChunk, HeadPerms3, ChunkData3, ChunkData, AuxCode, AuxCode0, Code1, Code0, Options, SelfInfo)
191
192	; Goal = goal{kind:regular,functor:P,args:Args,lookup_module:LM,envmap:EAM,envsize:ESize} ->
193	    move_head_perms(HeadPerms0, ChunkData0, ChunkData1, Code, Code1),
194	    SelfInfo = Module:Self@SelfLab,
195	    generate_regular_puts(Args, ChunkData1, ChunkData2, Code1, Code2, OutArgs, Module),
196	    ( LM\==Module ->
197	    	Pred = LM:P, Dest = Pred	% calling non-visible
198	    ; P==Self ->
199	    	Pred = P, Dest = ref(SelfLab)	% direct recursive call
200	    ;
201	    	Pred = P, Dest = Pred		% calling visible pred
202	    ),
203	    call_instr(ESize, Dest, EAM, ChunkData2, ChunkData3, Code2, Code3, CallInstr),
204	    emit_call_regular(CallInstr, OutArgs, Pred, Goal, Code3, Code0, Options),
205	    NextChunk = Goals,
206	    AuxCode = AuxCode0,
207	    % end of chunk
208	    alloc_check_end(ChunkData3),
209	    start_new_chunk(EAM, ChunkData3, ChunkData)
210
211	; Goal = disjunction{branches:Branches, branchlabels:BranchLabelArray, determinism:BranchDets,
212		entrymap:_EAM, entrysize:EntryESize, exitmap: DisjExitEAM, exitsize:ExitESize,
213		arity:TryArity, args:Args, branchheadargs:HeadArgsArray,
214		branchentrymaps:BranchEamArray, branchinitmaps:BranchExitInits,
215		indexes:IndexDescs} ->
216
217	    arity(BranchLabelArray, NBranches),
218	    make_retry_me_activity_maps(BranchEamArray, RetryEamArray),
219	    NextChunk = Goals,
220
221	    % Pre-disjunction: move pseudo-arguments into place and make switches
222	    move_head_perms(HeadPerms0, ChunkData0, ChunkData00, Code, Code101),
223	    generate_regular_puts(Args, ChunkData00, ChunkData1, Code101, Code102, ArgDests, []),
224	    Code102 = [code{instr:nop,regs:ArgDests}|Code103],
225	    generate_indexing(IndexDescs, BranchLabelArray, BranchEamArray, TryArity, ChunkData1, Code103, next(Code104), AuxCode, AuxCode1, Options),
226	    %alloc_check_split(ChunkData1, [GAlloc1|GAllocs2toN]),	% moved down to get less delays
227	    env_set_allocate_size(EntryESize, ChunkData1),
228	    ChunkData1 = chunk_data{allocated:ActualESize},
229
230	    % TRY (first alternative)
231	    Branches = [Branch1|Branches2toN],
232	    BranchExitInits = [BranchExitInit1|BranchExitInits2toN],
233	    arg(1, BranchLabelArray, BrLabel1),
234	    arg(1, BranchEamArray, EAM1),
235	    arg(1, BranchDets, Det1),
236	    Code104 = [
237		code{instr:try_me_else(#no_port,TryArity,ref(Label2)),regs:ArgOrigs},
238		code{instr:label(BrLabel1),regs:[]}|Code106],
239	    start_new_chunk(EAM1, ChunkData1, ChunkData2),
240	    alloc_check_start_branch(Det1, ChunkData2, ChunkData3, Code106, Code107, GAlloc1),
241	    generate_head_info(HeadArgsArray, 1, ChunkData3, ChunkData4, PseudoHeadPerms, [], ArgOrigs),
242	    generate_branch(Branch1, PseudoHeadPerms, ChunkData4, ChunkDataE1, BranchExitInit1, ExitESize, AuxCode1, AuxCode2, Code107, Code2, Options, SelfInfo),
243	    Code2 = [code{instr:branch(ref(LabelJoin)),regs:[]}|Code3],
244
245	    % RETRY (middle alternatives)
246	    (
247		for(I, 2, NBranches-1),
248		fromto(Branches2toN, [Branch|Branches], Branches, [BranchN]),
249		fromto(BranchExitInits2toN, [BranchExitInit|BEIs], BEIs, [BranchExitInitN]),
250		fromto(GAllocs2toN, [GAllocI|GAs], GAs, [GAllocN]),
251		fromto(ChunkDataE2toN, [ChunkDataE|CDEs], CDEs, [ChunkDataEN]),
252		fromto(Code3, Code4, Code7, Code8),
253		fromto(AuxCode2, AuxCode3, AuxCode4, AuxCode5),
254		fromto(Label2, LabelI, LabelI1, LabelN),
255		param(LabelJoin,BranchLabelArray,BranchEamArray,RetryEamArray,Options,SelfInfo,BranchDets,ChunkData1,HeadArgsArray,ActualESize,ExitESize)
256	    do
257		arg(I, BranchLabelArray, BrLabelI),
258		arg(I, BranchEamArray, EAM),
259		arg(I, RetryEamArray, RetryEAM),
260		arg(I, BranchDets, DetI),
261		retry_me_instr(Options, ActualESize, ref(LabelI1), eam(RetryEAM), RetryMeInstr),
262		Code4 = [
263		    code{instr:label(LabelI),regs:[]},
264		    code{instr:RetryMeInstr,regs:ArgOrigs},
265		    code{instr:label(BrLabelI),regs:[]}
266		    |Code5],
267		start_new_chunk(EAM, ChunkData1, ChunkData2),
268		alloc_check_start_branch(DetI, ChunkData2, ChunkData3, Code5, Code51, GAllocI),
269		generate_head_info(HeadArgsArray, I, ChunkData3, ChunkData4, PseudoHeadPerms, [], ArgOrigs),
270		generate_branch(Branch, PseudoHeadPerms, ChunkData4, ChunkDataE, BranchExitInit, ExitESize, AuxCode3, AuxCode4, Code51, Code6, Options, SelfInfo),
271		Code6 = [code{instr:branch(ref(LabelJoin)),regs:[]}|Code7]
272	    ),
273
274	    % TRUST (last alternative)
275	    arg(NBranches, BranchLabelArray, BrLabelN),
276	    arg(NBranches, BranchEamArray, EAMN),
277	    arg(NBranches, RetryEamArray, RetryEAMN),
278	    arg(NBranches, BranchDets, DetN),
279	    trust_me_instr(Options, ActualESize, eam(RetryEAMN), TrustMeInstr),
280	    Code8 = [
281		code{instr:label(LabelN),regs:[]},
282		code{instr:TrustMeInstr,regs:ArgOrigsN},
283		code{instr:label(BrLabelN),regs:[]}
284		|Code9],
285	    start_new_chunk(EAMN, ChunkData1, ChunkData2N),
286	    alloc_check_start_branch(DetN, ChunkData2N, ChunkData3N, Code9, Code91, GAllocN),
287	    alloc_check_split(ChunkData1, [GAlloc1|GAllocs2toN]),
288	    generate_head_info(HeadArgsArray, NBranches, ChunkData3N, ChunkData4N, PseudoHeadPermsN, [], ArgOrigsN),
289	    generate_branch(BranchN, PseudoHeadPermsN, ChunkData4N, ChunkDataEN, BranchExitInitN, ExitESize, AuxCode5, AuxCode0, Code91, Code10, Options, SelfInfo),
290
291	    % Post-disjunction
292	    Code10 = [code{instr:label(LabelJoin),regs:[]}|Code0],
293	    init_chunk_data(DisjExitEAM, ExitESize, ChunkData),
294	    alloc_check_join([ChunkDataE1|ChunkDataE2toN], ChunkData)
295
296	;
297	    printf(error, "ERROR: unexpected goal in generate_chunk", []),
298	    abort
299	).
300
301
302% Select retry/trust instructions according to debug mode,
303% and whether an environment exists or not (-1).
304
305retry_me_instr(options{debug:off}, -1, Else, EAM, Instr) ?- !, Instr = retry_me_else(#no_port,Else), verify EAM==eam(0).
306retry_me_instr(options{debug:on},  -1, Else, EAM, Instr) ?- !, Instr = retry_me_else(#next_port,Else), verify EAM==eam(0).
307retry_me_instr(options{debug:off},  _, Else, EAM, Instr) ?- !, Instr = retry_me_inline(#no_port,Else,EAM).
308retry_me_instr(options{debug:on},   _, Else, EAM, Instr) ?- !, Instr = retry_me_inline(#else_port,Else,EAM).
309
310trust_me_instr(options{debug:off}, -1, EAM, Instr) ?- !, Instr = trust_me(#no_port), verify EAM==eam(0).
311trust_me_instr(options{debug:on},  -1, EAM, Instr) ?- !, Instr = trust_me(#next_port), verify EAM==eam(0).
312trust_me_instr(options{debug:off},  _, EAM, Instr) ?- !, Instr = trust_me_inline(#no_port,EAM).
313trust_me_instr(options{debug:on},   _, EAM, Instr) ?- !, Instr = trust_me_inline(#else_port,EAM).
314
315retry_instr(options{debug:off}, -1, Alt, _EAM, Instr) ?- !, Instr = retry(#no_port,Alt).
316retry_instr(options{debug:on},  -1, Alt, _EAM, Instr) ?- !, Instr = retry(#next_port,Alt).
317retry_instr(options{debug:off},  _, Alt,  EAM, Instr) ?- !, Instr = retry_inline(#no_port,Alt,EAM).
318retry_instr(options{debug:on},   _, Alt,  EAM, Instr) ?- !, Instr = retry_inline(#else_port,Alt,EAM).
319
320trust_instr(options{debug:off}, -1, Alt, _EAM, Instr) ?- !, Instr = trust(#no_port,Alt).
321trust_instr(options{debug:on},  -1, Alt, _EAM, Instr) ?- !, Instr = trust(#next_port,Alt).
322trust_instr(options{debug:off},  _, Alt,  EAM, Instr) ?- !, Instr = trust_inline(#no_port,Alt,EAM).
323trust_instr(options{debug:on},   _, Alt,  EAM, Instr) ?- !, Instr = trust_inline(#else_port,Alt,EAM).
324
325
326% Environment activity at retry/trust instructions is the union of
327% the activities of this and all following branches still to be tried
328make_retry_me_activity_maps(BranchEamArray,RetryEamArray) :-
329	arity(BranchEamArray, NBranches),
330	dim(RetryEamArray, [NBranches]),
331	(
332	    for(I,NBranches,1,-1),
333	    fromto(0,RetryEAM0,RetryEAM,_),
334	    param(BranchEamArray,RetryEamArray)
335	do
336	    arg(I, BranchEamArray, EAM),
337	    arg(I, RetryEamArray, RetryEAM),
338	    RetryEAM is RetryEAM0 \/ EAM
339	).
340
341make_retry_activity_maps(RevGroup, BranchEamArray, RetryEams) :-
342	(
343	    foreach(I, RevGroup),
344	    foreach(RetryEAM, RetryEams),
345	    fromto(0,RetryEAM0,RetryEAM,_),
346	    param(BranchEamArray)
347	do
348	    arg(I, BranchEamArray, EAM),
349	    RetryEAM is RetryEAM0 \/ EAM
350	).
351
352
353generate_head_info([], _BranchI, ChunkData, ChunkData, HeadPerms, HeadPerms, []) :- !.
354generate_head_info(HeadArgsArray, BranchI, ChunkData0, ChunkData3, HeadPerms3, HeadPerms0, OrigRegDescs) :-
355	arg(BranchI, HeadArgsArray, Args),
356	(
357	    foreach(VarDesc, Args),
358	    foreach(r(VarId,a(I),orig,_), OrigRegDescs),
359	    fromto(ChunkData0, ChunkData1, ChunkData1, ChunkData3),
360	    fromto(HeadPerms3, HeadPerms2, HeadPerms1, HeadPerms0),
361	    count(I,1,_)
362	do
363	    VarDesc = variable{varid:VarId,class:C},
364	    ChunkData1 = chunk_data{occurred:OccurredInChunk},
365	    verify \+(hash_contains(OccurredInChunk, VarId)),
366	    ( C = nonvoid(y(Y)) ->
367		hash_set(OccurredInChunk, VarId, delayed_perm),
368		HeadPerms2 = [delayed_move(VarId,y(Y))|HeadPerms1]
369	    ;
370		hash_set(OccurredInChunk, VarId, true),
371		HeadPerms2 = HeadPerms1
372	    )
373	).
374
375
376emit_call_regular(CallInstr, RegDescs, QPred, Goal, Code, Code0, options{debug:Debug}) :-
377	    ( Debug == off ->
378		Code = [code{instr:CallInstr,regs:RegDescs}|Code0]
379	    ;
380		Goal = goal{path:Path,line:Line,from:From,to:To},
381		Code = [code{instr:debug_call(QPred,#call_port,Path,Line,From,To),regs:RegDescs},
382			code{instr:CallInstr,regs:[]}|Code0]
383	    ).
384
385
386%----------------------------------------------------------------------
387% Environment allocation/deallocation
388
389% Lazily insert an allocate instruction just before the first access of y(MinY).
390% The allocation size is filled in later when we reach a point where the
391% needed size is known, the next regular goal, the next cut, end of branch,
392% or start of disjunction. 
393env_allocate_if_needed(MinY, ChunkData0, ChunkData, Code0, Code) :-
394	% not really using MinY here, only for (incomplete) consistency check
395	ChunkData0 = chunk_data{allocated:ExistingESize},
396	( var(ExistingESize) ->
397	    % allocate instruction already emitted, waiting for size
398	    Code0 = Code, ChunkData0 = ChunkData
399	; ExistingESize >= 0 ->
400	    % already allocated and sized
401	    verify ExistingESize >= MinY,
402	    Code0 = Code, ChunkData0 = ChunkData
403	;
404	    % allocate here, size will be inserted later (at least MinY)
405	    Code0 = [code{instr:allocate(SizeFilledInLater)}|Code],
406	    update_struct(chunk_data, [allocated:SizeFilledInLater], ChunkData0, ChunkData)
407	).
408
409
410% Generate the allocate instruction that is required between the two ChunkData
411env_allocate_delta(chunk_data{allocated:Before}, chunk_data{allocated:After}, Code0, Code) ?-
412	( Before == After -> Code0 = Code
413	; Code0 = [code{instr:allocate(After)}|Code]
414	).
415
416
417% If there was an earlier allocate, make sure it allocates at least ESizeHere.
418% If no allocate was emitted so far, don't do anything now.
419env_set_allocate_size(-1, chunk_data{allocated:ExistingESize}) :- !,
420	verify ExistingESize < 0.	% should have no environment anyway
421env_set_allocate_size(ESizeHere, chunk_data{allocated:ExistingESize}) :-
422    	( var(ExistingESize) ->
423	    ExistingESize = ESizeHere
424	; ExistingESize >= 0 ->
425	    % already allocated and sized
426	    verify ExistingESize >= ESizeHere
427	; 
428	    true	 % don't allocate here
429	).
430
431
432% Allocate/deallocate, if not yet done
433env_allocate_last_chance(-1, ChunkData0, ChunkData, Code0, Code) :- !,
434	% deallocation request
435	ChunkData0 = chunk_data{allocated:ExistingESize},
436    	( var(ExistingESize) ->
437	    unreachable("unexpected allocate..deallocate sequence"), abort,
438	    ExistingESize = 0,
439	    Code0 = [code{instr:deallocate}|Code],
440	    update_struct(chunk_data, [allocated: -1], ChunkData0, ChunkData)
441	; ExistingESize >= 0 ->
442	    % deallocate existing environment
443	    Code0 = [code{instr:deallocate}|Code],
444	    update_struct(chunk_data, [allocated: -1], ChunkData0, ChunkData)
445	; 
446	    % no environment anyway
447	    Code0 = Code, ChunkData0 = ChunkData
448	).
449env_allocate_last_chance(ESizeHere, ChunkData0, ChunkData, Code0, Code) :-
450	ChunkData0 = chunk_data{allocated:ExistingESize},
451	( var(ExistingESize) ->
452	    % allocate instruction already emitted, fill in size
453	    ExistingESize = ESizeHere,
454	    Code0 = Code, ChunkData0 = ChunkData
455	; ExistingESize >= 0 ->
456	    % already allocated and sized
457	    verify ExistingESize >= ESizeHere,
458	    Code0 = Code, ChunkData0 = ChunkData
459	;
460	    % allocate here, for ESizeHere
461	    Code0 = [code{instr:allocate(ESizeHere)}|Code],
462	    update_struct(chunk_data, [allocated:ESizeHere], ChunkData0, ChunkData)
463	).
464
465
466% Select a call instruction and allocate/deallocate as required
467%	EnvAllocated	CallESize	CallInstr
468%	-1		-1		jmp
469%	 N		-1		chain (= deallocate,jmp)
470%	-1		 N		allocate,call
471%	 N		 N		call
472call_instr(-1, Dest, EAM, ChunkData0, ChunkData, Code, Code, CallInstr) :- !,
473	% deallocation request
474	ChunkData0 = chunk_data{allocated:ExistingESize},
475	verify (EAM==0, nonvar(ExistingESize)),
476	( ExistingESize >= 0 ->
477	    % deallocate existing environment
478	    CallInstr = chain(Dest),
479	    update_struct(chunk_data, [allocated: -1], ChunkData0, ChunkData)
480	; 
481	    % no environment anyway
482	    CallInstr = jmp(Dest),
483	    ChunkData0 = ChunkData
484	).
485call_instr(CallESize, Dest, EAM, ChunkData0, ChunkData, Code0, Code, CallInstr) :-
486	CallInstr = callf(Dest,eam(EAM)),
487	env_allocate_last_chance(CallESize, ChunkData0, ChunkData, Code0, Code).
488
489
490%----------------------------------------------------------------------
491% Indexing code generation
492% 
493% Compilation scheme: We generate code for all indexes that the indexing
494% analysis has discovered, in order of their quality. When an index cannot
495% exclude any branches of the disjunction, we fall through and try the next
496% best index. If any reduction is achieved, we don't try further indexes
497% (although we could) - this prevents index code explosion.
498% 
499% 1. Main indexes in order of quality
500% 
501%     These look at one argument register, and jump either
502% 	- directly to one alternative
503% 	- to a sub-index
504% 	- to a try-sequence
505% 	- to fail
506%     All index instructions fall through for variables. In the unusual
507%     case that the variable case filters out any alternatives, a jump
508%     follows which effectively extends the switch instruction with a
509%     variable case (to avoid falling through to the next index).
510% 
511% 2. Main indexes are followed by Try_me_else/retry_me_else/trust_me
512%	sequence with code for alternatives 1..N
513% 
514% 3. Followed by continuation after the disjunction.
515% 
516% 4. Sub-indexes and Try-sequences go into separate AuxCode sequence and
517%	are eventually appended to the end of the whole predicate code.
518%	These are all short, independent sequences of either a single sub-
519%	index instruction (integer_switch etc), or try-retry*-trust. The
520%	variable-fall-through cases of the secondary switches are never used.
521%	This code doesn't need the register allocator run over it (its
522%	register positions are shared with the main code sequence).
523%	The reason it goes at the end of the code is so we don't need
524%	to jump over it.
525% 
526% 
527% Each Index consists of one or more switch instructions that operate
528% on the same variable (argument register or permanent variable).
529% Possible combinations, with optional parts in brackets:
530% 
531%     switch_on_type TypeLabel1...TypeLabelN
532%     [branch VarLabel]
533%     ...
534%     [AtomLabel:	atom_switch ValueLabel1...ValueLabelN]
535%     [IntLabel:	integer_switch ValueLabel1...ValueLabelN]
536%     [FunctorLabel:	functor_switch ValueLabel1...ValueLabelN]
537% 
538%     atom_switch ValueLabel1...ValueLabelN DefaultLabel
539%     [branch VarLabel]
540% 
541%     integer_switch ValueLabel1...ValueLabel DefaultLabel
542%     [branch VarLabel]
543% 
544%     functor_switch ValueLabel1...ValueLabelN DefaultLabel
545%     [branch VarLabel]
546% 
547%     list_switch ListLabel NilLabel DefaultLabel
548%     [branch VarLabel]
549% 
550% The indexing code should not move any data around, so register and
551% environment slot contents remain untouched. This is because it
552% contains jumps to the beginnings of the other alternatives, which
553% all expect the same starting state as the first alternative
554% before any indexing code.
555%----------------------------------------------------------------------
556
557% generate_indexing
558% Input:	IndexDescs - ordered list of index descriptors
559%		BranchLabelArray - labels for alternative branches
560%		BranchEamArray - entry EAMs for alternative branches
561%		TryArity - number of args to save in choicepoints
562% Output:	Code - main indexing code
563%		AuxCode - sub-index and try-sequence code
564
565generate_indexing(IndexDescs, BranchLabelArray, BranchEamArray, TryArity, ChunkData, Code0, Code, AuxCode0, AuxCode, Options) :-
566	arity(BranchLabelArray, NBranches),
567	( for(I,1,NBranches), foreach(I,AllBranches) do true ),
568	hash_create(LabelTable),
569	(
570	    foreach(index{quality:Quality,variable:VarDesc,partition:DecisionTree},IndexDescs),
571	    fromto(Code0,Code1,Code3,Code),
572	    fromto(AuxCode0,AuxCode1,AuxCode2,AuxCode),
573	    param(LabelTable,BranchLabelArray,BranchEamArray,AllBranches,TryArity,NBranches,ChunkData,Options)
574	do
575	    ( Quality < NBranches ->
576		% Create label for "all branches of the disjunction". This is
577		% re-created for each index, and is the address of the next
578		% index, or the try_me-sequence respectively.
579		hash_set(LabelTable, AllBranches, NextIndexLabel),
580
581		generate_index(VarDesc, DecisionTree, LabelTable, BranchLabelArray, BranchEamArray, NextIndexLabel,
582		    TryArity, ChunkData, Code1, Code2, AuxCode1, AuxCode2, Options),
583		Code2 = [code{instr:label(NextIndexLabel),regs:[]}|Code3]
584	    ;
585		% Omit really bad indexes
586	    	Code1=Code3, AuxCode1=AuxCode2
587	    )
588	).
589
590
591% Precompute a sorted list of the non-variable tags
592:- local variable(tagnames).
593:- local initialization((
594    	sepia_kernel:decode_code(tags,TagArray),
595	TagArray=..[_|TagList0],
596	once delete(meta, TagList0, TagList1),
597	sort(TagList1, TagList),
598	setval(tagnames, TagList)
599    )).
600
601
602% Generate code for the index characterised by VarDesc and DecisionTree
603
604generate_index(VarDesc, DecisionTree, LabelTable, BranchLabelArray, BranchEamArray, NextIndexLabel,
605	    	TryArity, ChunkData, Code0, Code, AuxCode0, AuxCode, Options) :-
606	VarDesc = variable{varid:VarId},
607	ChunkData = chunk_data{allocated:Allocated},
608
609	% Create a label for this index's default case
610	dt_lookup2(DecisionTree, [], DefaultGroup, _),
611	create_group(DefaultGroup, LabelTable, BranchLabelArray, BranchEamArray, TryArity, DefaultLabel, Allocated, Options, AuxCode0, AuxCode1),
612
613	% First go through the non-variable tags: generate switch_on_values,
614	% try-sequences for branch-groups and a hash table of their labels,
615	% and a table for use by switch_on_type.
616	getval(tagnames, TagNames),
617	(
618	    foreach(TagName,TagNames),				% in: tag name
619	    foreach(TagName:ref(TagLabel),Table0),		% out: partial table for switch_on_type
620	    fromto(UsedTags,UsedTags1,UsedTags0,[]),		% out: tags that need to be distinguished
621	    fromto(SubDefaults,SubDefaults1,SubDefaults0,[]),	% out: default labels of subswitches
622	    fromto(AuxCode1,AuxCode2,AuxCode6,AuxCode7),	% out: code for try-sequences
623	    fromto(TmpCode0,TmpCode1,TmpCode3,TmpCode4),	% out: code for sub-switches
624	    param(DecisionTree,BranchLabelArray,BranchEamArray,TryArity,DefaultLabel,VarId,Allocated,Options),	% in
625	    param(LabelTable),					% inout: labels of try-groups
626	    param(VarLoc,SubRegDesc)				% out: parameters for sub-switches
627	do
628	    ( dt_lookup2(DecisionTree, [TagName], TagDefaultGroup, TagExceptions) ->
629		% we have entries for this tag
630		UsedTags1 = [TagName|UsedTags0],
631		( TagExceptions = [] ->
632		    % need only a try sequence for this tag
633		    verify TagDefaultGroup \== [],
634		    % group: all alternatives for this tag
635		    SubDefaults1 = SubDefaults0,
636		    TmpCode1 = TmpCode3,
637		    create_group(TagDefaultGroup, LabelTable, BranchLabelArray, BranchEamArray, TryArity, TagLabel, Allocated, Options, AuxCode2, AuxCode6)
638		;
639		    % we could use a switch_on_value
640		    ( TagDefaultGroup == [] ->
641			TagDefaultLabel = DefaultLabel,
642			AuxCode2 = AuxCode3
643		    ;
644			% group: default alternatives for this type
645			create_group(TagDefaultGroup, LabelTable, BranchLabelArray, BranchEamArray, TryArity, TagDefaultLabel, Allocated, Options, AuxCode2, AuxCode3)
646		    ),
647		    % make a value switch, unless it is trivial
648		    ( TagDefaultLabel == fail, DefaultLabel == fail, TagExceptions = [_Value-ValueGroup], ValueGroup = [_] ->
649			% omit singleton value switches
650			% (although they could lead to earlier failure)
651			SubDefaults1 = SubDefaults0,
652			TmpCode1 = TmpCode3,
653			create_group(ValueGroup, LabelTable, BranchLabelArray, BranchEamArray, TryArity, TagLabel, Allocated, Options, AuxCode3, AuxCode6)
654		    ;
655			% do use a value switch
656			SubDefaults1 = [TagDefaultLabel|SubDefaults0],
657			(
658			    foreach(Value-ValueGroup,TagExceptions),
659			    foreach(Value-ref(ValueLabel),ValueLabels),
660			    fromto(AuxCode3,AuxCode4,AuxCode5,AuxCode6),
661			    param(LabelTable,BranchLabelArray,BranchEamArray,TryArity,Allocated,Options)
662			do
663			    % group: alternatives for this value
664			    create_group(ValueGroup, LabelTable, BranchLabelArray, BranchEamArray, TryArity, ValueLabel, Allocated, Options, AuxCode4, AuxCode5)
665			),
666			TmpCode1 = [code{instr:label(TagLabel),regs:[]}|TmpCode2],
667			emit_switch_on_value(VarId, TagName, ValueLabels, TagDefaultLabel, VarLoc, SubRegDesc, TmpCode2, TmpCode3)
668		    )
669		)
670	    ;
671		% no entries for this tag, use global default label
672		TagLabel = DefaultLabel,
673		AuxCode2 = AuxCode6,
674		TmpCode1 = TmpCode3,
675		UsedTags1 = UsedTags0,
676		SubDefaults1 = SubDefaults0
677	    )
678	),
679
680	% Now consider the variable tags (var/meta/free)
681	( dt_lookup2(DecisionTree, [var], VarDefaultGroup, VarExceptions) ->
682	    ( VarExceptions == [] ->
683		% no distinction free/meta
684		create_group(VarDefaultGroup, LabelTable, BranchLabelArray, BranchEamArray, TryArity, VarLabel, Allocated, Options, AuxCode7, AuxCode9),
685		Table = [meta:ref(VarLabel)|Table0]
686	    ;
687		% need to distinguish free/meta
688		( member(meta-MetaGroup, VarExceptions) -> true ; MetaGroup = VarDefaultGroup ),
689		( member(free-FreeGroup, VarExceptions) -> true ; FreeGroup = VarDefaultGroup ),
690		create_group(FreeGroup, LabelTable, BranchLabelArray, BranchEamArray, TryArity, VarLabel, Allocated, Options, AuxCode7, AuxCode8),
691		create_group(MetaGroup, LabelTable, BranchLabelArray, BranchEamArray, TryArity, MetaLabel, Allocated, Options, AuxCode8, AuxCode9),
692		Table = [meta:ref(MetaLabel)|Table0]
693	    )
694	;
695	    % no var cases (rare)
696	    Table = [meta:ref(DefaultLabel)|Table0],
697	    VarLabel = DefaultLabel,
698	    AuxCode7 = AuxCode9
699	),
700
701	% Get the location of the switch-variable
702	reg_or_perm(VarDesc, ChunkData, FirstRegDesc, VarLoc),
703	% Create switch_on_type if useful
704	( var(MetaGroup), UsedTags=[_ValueSwitchTag], SubDefaults==[DefaultLabel] ->
705	    % We don't need a switch_on_type
706	    % hook (single) subswitch code into main sequence
707	    Code0 = TmpCode0, TmpCode4 = Code1, AuxCode9 = AuxCode,
708	    SubRegDesc = FirstRegDesc
709
710	; var(MetaGroup), list_tags_only(UsedTags) ->
711	    % A list_switch is sufficient
712	    verify TmpCode0 == TmpCode4,	% should have no subswitches
713	    emit_switch_on_list(Table, DefaultLabel, VarLoc, FirstRegDesc, Code0, Code1),
714	    AuxCode9 = AuxCode
715	;
716	    % Need the full switch_on_type, possibly with subswitches
717	    emit_switch_on_type(Table, VarLoc, FirstRegDesc, Code0, Code1),
718	    % hook subswitches (zero or more) into aux sequence
719	    AuxCode9 = TmpCode0, TmpCode4 = AuxCode,
720	    SubRegDesc = r(VarId,VarLoc,use,_)
721	),
722	emit_var_jmp(VarLabel, NextIndexLabel, Code1, Code).
723
724
725list_tags_only([[]]) :- !.
726list_tags_only([list]) :- !.
727list_tags_only([[],list]) :- !.
728
729
730% A "group" is a sequence of clauses linked by try/retry/trust-instructions.
731% Get the label for the given group. Create a try sequence if necessary.
732create_group(Group, LabelTable, BranchLabelArray, BranchEamArray, TryArity, GroupLabel, Allocated, Options, AuxCode1, AuxCode) :-
733	( Group = [] ->
734	    AuxCode1 = AuxCode,
735	    GroupLabel = fail
736	; hash_get(LabelTable, Group, GroupLabel) ->
737	    AuxCode1 = AuxCode
738	;
739	    hash_set(LabelTable, Group, GroupLabel),
740	    emit_try_sequence(Group, BranchLabelArray, BranchEamArray, TryArity, GroupLabel, Allocated, Options, AuxCode1, AuxCode)
741	).
742
743
744% Emit the switch_on_type instruction or its simpler version list_switch.
745%	Table		List of Tagname:ref(Label)
746%	DefaultLabel	Label for tags that do not occur in Types
747
748emit_switch_on_type(Table, VarLoc, RegDesc, Code0, Code) :-
749	Code0 = [code{instr:switch_on_type(VarLoc,Table),
750		    regs:[RegDesc]}|Code].
751
752
753emit_switch_on_list(Table, DefaultLabel, VarLoc, RegDesc, Code0, Code) :-
754	memberchk([]:NilRef, Table),
755	memberchk(list:ListRef, Table),
756	Code0 = [code{instr:list_switch(VarLoc,ListRef,NilRef,ref(DefaultLabel)),
757		    regs:[RegDesc]}|Code].
758
759
760
761% Emit a jump to VarLabel, unless it is the (subsequent) NextIndexLabel
762emit_var_jmp(VarLabel, NextIndexLabel, Code0, Code) :-
763	( VarLabel == NextIndexLabel ->
764	    Code0 = Code
765	;
766	    Code0 = [code{instr:branch(ref(VarLabel)),regs:[]}|Code]
767	).
768
769
770% Emit switches on constants (can be main index or sub-index).
771% Note: if this is used to generate a sub-index, then the code goes
772% into the AuxCode sequence, and the register allocator will not run
773% over it. In this case, VarLoc gets instantiated as a side effect of
774% the register allocator running over the corresponding main index.
775% RegDesc is ignored in this case.
776emit_switch_on_value(_VarId, integer, Table, DefaultLabel, VarLoc, RegDesc,
777	    [code{instr:integer_switch(VarLoc,Table,ref(DefaultLabel)),
778		    regs:[RegDesc]}|Code], Code).
779emit_switch_on_value(_VarId, atom, Table, DefaultLabel, VarLoc, RegDesc,
780	    [code{instr:atom_switch(VarLoc,Table,ref(DefaultLabel)),
781		    regs:[RegDesc]}|Code], Code).
782emit_switch_on_value(_VarId, structure, Table, DefaultLabel, VarLoc, RegDesc,
783	    [code{instr:functor_switch(VarLoc,Table,ref(DefaultLabel)),
784		    regs:[RegDesc]}|Code], Code).
785
786
787emit_try_sequence(Group, BranchLabelArray, BranchEamArray, TryArity, TryLabel, Allocated, Options, Code1, Code6) :-
788	( Group = [BranchNr1|BranchNrs2toN] ->
789	    arg(BranchNr1, BranchLabelArray, BranchLabel1),
790	    ( BranchNrs2toN == [] ->
791		% only one alternative, no try sequence needed
792		TryLabel = BranchLabel1,
793		Code1 = Code6
794	    ;
795		Code1 = [code{instr:label(TryLabel),regs:[]},
796			code{instr:try(#no_port,TryArity,ref(BranchLabel1)),regs:[]}
797			|Code2],
798		(
799		    fromto(BranchNrs2toN,[BranchNr|BranchNrs],BranchNrs,[BranchNrN]),
800		    fromto([],RevGroup1,[BranchNr|RevGroup1],RevGroup),
801		    fromto([],RetryEams1,[RetryEam|RetryEams1],RetryEams),
802		    fromto(Code2,Code3,Code4,Code5),
803		    param(BranchLabelArray,Allocated,Options)
804		do
805		    Code3 = [code{instr:RetryInstr,regs:[]}|Code4],
806		    arg(BranchNr, BranchLabelArray, BranchLabel),
807		    retry_instr(Options, Allocated, ref(BranchLabel), eam(RetryEam), RetryInstr)
808		),
809		Code5 = [code{instr:TrustInstr,regs:[]}|Code6],
810		arg(BranchNrN, BranchLabelArray, BranchLabelN),
811		trust_instr(Options, Allocated, ref(BranchLabelN), eam(TrustEam), TrustInstr),
812		make_retry_activity_maps([BranchNrN|RevGroup], BranchEamArray, [TrustEam|RetryEams])
813	    )
814	;
815	    TryLabel = fail, Code1 = Code6
816	).
817
818
819% Var is expected either in a temporary or a perm (not first).
820% return a corresponding register descriptor
821reg_or_perm(Var, ChunkData, RegDesc, VarLoc) :-
822	Var = variable{varid:VarId},
823	variable_occurrence(Var, ChunkData, ChunkData1, Code0, Code1, VarOccDesc),
824	verify (ChunkData==ChunkData1, Code0==Code1),
825	( VarOccDesc = tmp ->
826	    RegDesc = r(VarId,VarLoc,use,_)
827	; VarOccDesc = perm_first_in_chunk(VarLoc) ->
828	    RegDesc = r(VarId,VarLoc,perm,_)
829	; verify VarOccDesc = perm(_Y),
830	    RegDesc = r(VarId,VarLoc,use,_)
831	).
832
833
834% Initialize environment slots according to the bitmap given.  We can't
835% use the current initialize instruction because we want global variables.
836% This code doesn't need register allocation run over it!
837emit_initialize(EAM, Code, Code0) :-
838	decode_activity_map(EAM, Ys),
839	length(Ys, N),
840	% We always generate a gc_test, assuming we are in a separate
841	% pseudo-chunk at the end of a branch. In this case, we must
842	% establish a stack margin at the end of the branch, because
843	% the following chunk will assume the availability of it.
844	% This is ugly, but could be simply folded into an initialize
845	% instruction.
846	Code = [code{instr:gc_test(N)}|Code3],
847	(
848	    foreach(Y,Ys),
849	    fromto(Code3,Code1,Code2,Code0)
850	do
851	    Code1 = [code{instr:put_global_variable(y(Y)),regs:[],comment:initialize}|Code2]
852	).
853
854
855%----------------------------------------------------------------------
856% Regular goal arguments
857% We first "put" arguments that have the most first occurrences
858% of variables within compound terms. Reason:
859% If a variable occurs directly on an argument position and also
860% within a structure in another argument, the structure should be
861% put first so the variable is located inside the structure.
862% In addition, temps should be freed as soon as possible, so
863% arguments with lots of temporaries should be put first.
864%----------------------------------------------------------------------
865
866generate_regular_puts(Args, ChunkData0, ChunkData, Code0, Code, CallRegDescs, Module) :-
867
868	% determine an order (this should be an option)
869	heuristic_put_order(Args, ChunkData0, Ordered),
870
871	% construct the arguments in the determined order
872	(
873	    foreach(put(_,I,Arg), Ordered),
874	    foreach(r(ArgId,a(I),dest,_), CallRegDescs),
875	    fromto(ChunkData0, ChunkData1, ChunkData2, ChunkData),
876	    fromto(Code0, Code1, Code2, Code),
877	    param(Module)
878	do
879	    put_term(Arg, ChunkData1, ChunkData2, Code1, Code2, ArgId, Module)
880	).
881
882
883heuristic_put_order(Args, ChunkData, SortedWeightsIs) :-
884	(
885	    count(I,1,_),
886	    foreach(Arg,Args),
887	    foreach(put(Weight,I,Arg), WeightsIs),
888	    param(ChunkData)
889	do
890	    heuristic_argument_weight(Arg, 0, ChunkData, 0, Weight)
891	),
892	sort(1, >=, WeightsIs, SortedWeightsIs),
893%	( WeightsIs==SortedWeightsIs-> true ; writeln(SortedWeightsIs) ),
894	true.
895
896
897    :- mode heuristic_argument_weight(+,+,+,+,-).
898    heuristic_argument_weight(Var, InStruct, ChunkData, VN0, VN) :-
899	Var = variable{class:C},
900	( potential_first_temp_occurrence(Var, ChunkData) ->
901	    % first occurrences of temp variables inside compound terms
902	    % count towards the weight because they require a new register.
903	    VN is VN0 + InStruct
904	; C = nonvoid(y(_)) ->
905	    % perms are treated like constants
906	    VN is VN0 - 1 + InStruct
907	;
908	    VN = VN0
909	).
910    heuristic_argument_weight(structure{args:Args}, _, ChunkData, VN0, VN) :-
911	heuristic_argument_weight(Args, 1, ChunkData, VN0, VN).
912    heuristic_argument_weight([X|Xs], _, ChunkData, VN0, VN) :-
913	heuristic_argument_weight(X, 1, ChunkData, VN0, VN1),
914	heuristic_argument_weight(Xs, 1, ChunkData, VN1, VN).
915    heuristic_argument_weight(Term, InStruct, _ChunkData, VN0, VN) :-
916    	atomic(Term),
917	% constants should be put last because putting them definitely
918	% uses up one register
919	VN is VN0 - 1 + InStruct.
920
921
922/*
923
924A different method...
925
926% The interesting point here is computing the order in which the
927% arguments for the call will be constructed. There are two aspects:
928% Dataflow: every put overwrites an argument register, so this
929% register must not be the only source for something still needed.
930% We therefore compute a dependency graph and sort it topologically.
931% Heuristics: if a variable occurs both on its own and in a compound
932% term, the compound terms should be put first because that locates
933% the variable within the term and saves an instruction.
934
935generate_regular_puts(goal{args:Args,functor:F/N},
936		ChunkData0, ChunkData, Code0, Code) :-
937	Call =.. [F|Args],	%%% preliminary
938
939	% For each argument of the call, find out which current argument
940	% register's content is needed to construct it (if any).
941	% Also, compute a heuristic argument weight.
942	functor(NeededRegs, F, N),	% array of register lists
943	functor(OccupiedBy, F, N),	% array of varids
944	(
945	    for(I,1,N),
946	    foreach(NVars-I, VarWeights),
947	    param(Call,NeededRegs,ChunkData0,OccupiedBy,N)
948	do
949	    arg(I, Call, Arg),
950	    arg(I, NeededRegs, Regs),
951	    collect_arg_regs_needed_in_term(Arg, I, N, ChunkData0, OccupiedBy, [], Regs, 0, NVars)
952	),
953
954	% Preorder the arguments heuristically: sort them according to
955	% the number of variables that occur within structures.
956	% (the order is reversed because the subsequent topsort will
957	% reverse it again!)
958	sort(1, =<, VarWeights, SortedVarWeights),
959	( foreach(_-I,SortedVarWeights), foreach(I,RevPreOrder) do true ),
960
961	% By topological sorting of the "needs" graph, find a good order
962	% to construct the call arguments. CycleBreakers are graph edges
963	% that need to be removed to allow topological sorting.
964	top_sort(NeededRegs, RevPreOrder, Order, CycleBreakers),
965	printf("Order: %w, Breakers: %w%n", [Order, CycleBreakers]),
966
967	% We move the "needed" register for every problematic edge
968	% to an alternative location.
969	(
970	    foreach(_PutPos->NeededPos, CycleBreakers),
971	    fromto(ChunkData0, ChunkData1, ChunkData2, ChunkData3),
972	    fromto(Code0, Code1, Code2, Code3),
973	    param(OccupiedBy)
974	do
975	    arg(NeededPos, OccupiedBy, VarId),
976	    replace_current_location(VarId, a(NeededPos), Tmp, ChunkData1, ChunkData2),
977	    Code1 = [move(a(NeededPos),Tmp)|Code2]
978	),
979
980	% Finally construct the arguments in the topological order
981	(
982	    foreach(I,Order),
983	    fromto(ChunkData3, ChunkData4, ChunkData5, ChunkData),
984	    fromto(Code3, Code4, Code5, Code),
985	    param(Call)
986	do
987	    arg(I, Call, Arg),
988	    % TODO: could lookup  (J needs I)  here and move I away
989	    % instead of doing eager previous loop 
990	    body(I, Arg, ChunkData4, ChunkData5, Code4, Code5)
991	).
992
993
994    % Term is the I-th argument of Max arguments to a call.
995    % We compute a list of those registers whose contents is absolutely
996    % needed to construct this argument. These registers come from variables
997    % that occur in Term and have only a single location which is a
998    % register =< Max (with the trivial exception of the correct
999    % register occuring already in the correct call position).
1000    % As an unrelated extra, we count the number of variables that occur
1001    % within structures - this will be used as an ordering heuristics.
1002    :- mode collect_arg_regs_needed_in_term(+,+,+,+,+,+,-,+,-).
1003    collect_arg_regs_needed_in_term(variable{varid:VarId}, I, Max, ChunkData, OccupiedBy, Regs0, Regs, VN0, VN) :-
1004	VN is VN0+1-sgn(I),	% I::1..Max for topmost, 0 other occurrences
1005	(
1006	    get_current_locations(VarId, ChunkData, CurrentLocations),
1007	    CurrentLocations = [SingleLocation],
1008	    nonvar(SingleLocation),
1009	    SingleLocation = a(J),
1010	    J =\= I,		% not topmost (I=0), or wrong register
1011	    J =< Max
1012	->
1013	    arg(J, OccupiedBy, VarId),
1014	    Regs = [J|Regs0]
1015	;
1016	    Regs = Regs0
1017	).
1018    collect_arg_regs_needed_in_term(structure{args:Args}, _, Max, ChunkData, OccupiedBy, Regs0, Regs, VN0, VN) :-
1019	collect_arg_regs_needed_in_term(Args, 0, Max, ChunkData, OccupiedBy, Regs0, Regs, VN0, VN).
1020    collect_arg_regs_needed_in_term([X|Xs], _, Max, ChunkData, OccupiedBy, Regs0, Regs, VN0, VN) :-
1021	collect_arg_regs_needed_in_term(X, 0, Max, ChunkData, OccupiedBy, Regs0, Regs1, VN0, VN1),
1022	collect_arg_regs_needed_in_term(Xs, 0, Max, ChunkData, OccupiedBy, Regs1, Regs, VN1, VN).
1023    collect_arg_regs_needed_in_term(Term, _, _Max, _ChunkData, _OccupiedBy, Regs, Regs, VN, VN) :-
1024    	atomic(Term).
1025*/
1026
1027%----------------------------------------------------------------------
1028% Generate code for "simple" goals (built-ins)
1029%----------------------------------------------------------------------
1030
1031:- include(compiler_builtins).
1032
1033
1034%----------------------------------------------------------------------
1035% Generate code for constructing an arbitrary term
1036%----------------------------------------------------------------------
1037
1038put_term(Term, ChunkData0, ChunkData, Code, Code0, VarId, _Module) :-
1039	Term = variable{varid:VarId}, !,
1040	put_variable(Term, ChunkData0, ChunkData, Code, Code0).
1041put_term(Term, ChunkData0, ChunkData, Code, Code0, ValId, Module) :-
1042	new_aux_temp(ChunkData0, ChunkData1, ValId),
1043	body(ValId, Term, ChunkData1, ChunkData, Code, Code0, Module).
1044
1045
1046%
1047% Generate code that makes sure that a variable physically exists
1048% (it might need to be initialised if it is the first occurrence)
1049% and its location is available somewhere (register or env slot).
1050% Generate register annotations to tell the reg allocator about
1051% the location. A concrete register (plus possibly extra move
1052% instructions) will be assigned by the reg allocator later.
1053%
1054% put_variable(+VarDesc, +ChunkData0, -ChunkData, -Code, ?Code0).
1055%
1056
1057:- mode put_variable(+,+,-,-,?).
1058
1059put_variable(Var, ChunkData0, ChunkData, Code0, Code) :-
1060	Var = variable{varid:VarId},
1061	variable_occurrence(Var, ChunkData0, ChunkData1, Code0, Code1, VarOccDesc),
1062	put_va_code(VarOccDesc, VarId, Code1, Code, GAlloc),
1063	alloc_check_pwords(GAlloc, ChunkData1, ChunkData).
1064
1065    put_va_code(void, VarId, Code, Code0, 1) :-
1066	Code = [code{instr:put_variable(R),regs:[r(VarId,R,def,_)]}|Code0].
1067    put_va_code(tmp_first, VarId, Code, Code0, 1) :-
1068	Code = [code{instr:put_variable(R),regs:[r(VarId,R,def,_)]}|Code0].
1069    put_va_code(tmp, _VarId, Code, Code0, 0) :-
1070	% Variable already known in this chunk: The register allocator will
1071	% move it to the correct register as necessary (triggered by the dest
1072	% descriptor that comes with the call instruction).
1073	Code = Code0.
1074    put_va_code(perm_first(Y), VarId, Code, Code0, 1) :-
1075	% First ever occurrence of this permanent variable. Emit code to
1076	% initialise it and tell the reg allocator about the two locations.
1077	Code = [code{instr:put_global_variable(R,Y),regs:[r(VarId,Y,perm,_),r(VarId,R,def,_)]}|Code0].
1078    put_va_code(perm_first_in_chunk(Y), VarId, Code, Code0, 0) :-
1079	% First occurrence of this permanent variable in this chunk.
1080	% Tell the reg allocator about the permanent location. It will then
1081	% move it to the correct register as necessary (triggered by the dest
1082	% descriptor that comes with the call instruction).
1083	Code = [code{instr:nop,regs:[r(VarId,Y,perm,_)]}|Code0].
1084    put_va_code(perm(_Y), _VarId, Code, Code0, 0) :-
1085	% Variable already known in this chunk. The register allocator will
1086	% move it to the correct register as necessary.
1087	Code = Code0.
1088
1089
1090% Generate code to move head occurrences of permanent variables
1091% into their environment slots.
1092move_head_perms([], ChunkData, ChunkData, Code, Code) :- !.
1093move_head_perms(HeadPerms, ChunkData0, ChunkData, Code0, Code) :-
1094	env_allocate_if_needed(1/*dummy*/, ChunkData0, ChunkData, Code0, Code1),
1095	(
1096	    foreach(delayed_move(VarId,Y),HeadPerms),
1097	    fromto(Code1,[Move|Code2],Code2,Code)
1098	do
1099	    Move = code{instr:move(R,Y),regs:[r(VarId,R,use,_),r(VarId,Y,perm,_)]}
1100	).
1101
1102
1103%----------------------------------------------------------------------
1104% Global stack allocation checks
1105%
1106% We distinguish the following points in the WAM code:
1107%
1108% start: a point where we are guaranteed to have the standard margin
1109%	available on the global stack (this is the case at predicate
1110%	entry or after returning from a regular call).
1111%
1112% allocation(maximum):
1113%	a (potential) allocation point, we know the maximum used.
1114%	These are instructions like put_structure, write_list, etc
1115%	We insert no checks at these points.
1116%
1117% after_unbounded_alloc(certainly/maybe reached):
1118%	a (potential) unbounded allocation point, after which we
1119%	have no guarantee except the standard margin (either we
1120%	have the same as before, or standard margin). Examples are:
1121%	- get_value (because of attributed variable unification, which
1122%	  builds up the MU-list). It is certainly reached.
1123%	- read_value, same as get_value, but not certainly reached.
1124%	- arithmetic builtins, because of bignums+rationals
1125%	We may need to insert a check after this (potential) allocation.
1126%
1127% split: before a disjunction - we promote the max allocation
1128%	requirement of the branches left over split-point.
1129
1130% start_branch({det,try}):
1131%	promote check to the left. This is used for the first branch,
1132%	or all branches in case of deterministic switch.
1133%
1134% start_branch({retry,trust}):
1135%	If less than margin needed, promote check to the left (because
1136%	we may enter the branch directly via switch). If more than
1137%	margin needed, insert check here (because we may enter
1138%	via retry/trust and have only guarantee for standard margin),
1139%	and promote nothing left.
1140%
1141% end: a point where an implicit check follows (call, ret, ...)
1142%
1143%
1144%
1145% This code uses delayed goals to fill in the size-arguments in the
1146% gc_test instructions once they become known.  This results in lots
1147% of gc_test <small> which must be removed later, but it does not
1148% leave gaps in the code, which is a bit nicer for debugging.
1149% Note that this code is independent of the chunk structure.
1150%----------------------------------------------------------------------
1151
1152alloc_check_pwords(0, ChunkData0, ChunkData) :- !,
1153	ChunkData = ChunkData0.
1154alloc_check_pwords(N, ChunkData0, ChunkData) :-
1155	ChunkData0 = chunk_data{need_global:N0},
1156	suspend(+(N1,N,N0), 0, N1->inst),
1157	update_struct(chunk_data, [need_global:N1], ChunkData0, ChunkData).
1158
1159alloc_check_start(ChunkData0, ChunkData, [code{instr:gc_test(N)}|Code0], Code0) :-
1160	update_struct(chunk_data, [need_global:N], ChunkData0, ChunkData).
1161
1162alloc_check_split(chunk_data{need_global:Max}, List) :-
1163	max_list(List, 0, Max).
1164	
1165    delay max_list(Xs, _Max0, _Max) if var(Xs).
1166    delay max_list([X|_], _Max0, _Max) if var(X).
1167    max_list([], Max, Max).
1168    max_list([X|Xs], Max0, Max) :-
1169	Max1 is max(Max0,X),
1170	max_list(Xs, Max1, Max).
1171
1172alloc_check_start_branch(Det, ChunkData0, ChunkData, Code, Code0, NeedBefore) :-
1173	( first_alternative(Det) ->
1174	    % we have come here directly via switch from code before,
1175	    % so promote left.
1176	    Code = Code0,
1177	    update_struct(chunk_data, [need_global:NeedBefore], ChunkData0, ChunkData)
1178	;
1179	    % we have to promote left because we have no guarantee here:
1180	    % we may have come here directly via switch from code before
1181	    % (-> rely on left-promoted amount), or we may have had a
1182	    % retry/trust (-> we can rely on standard margin).
1183	    Code = [code{instr:gc_test(N)}|Code0],
1184	    update_struct(chunk_data, [need_global:NeedAfter], ChunkData0, ChunkData),
1185	    suspend(test_or_promote(reached, NeedAfter, NeedBefore, N), 0, NeedAfter->inst)
1186	).
1187
1188alloc_check_join(_ChunkDataEs, _).	% disabled
1189%alloc_check_join(ChunkDataEs, chunk_data{need_global:N}) ?-
1190%	( foreach(chunk_data{need_global:N},ChunkDataEs), param(N) do true ).
1191
1192
1193% N is integer, 'unbounded' or 'unbounded_maybe'
1194alloc_check_after(N, ChunkData0, ChunkData, Code, Code) :-
1195	integer(N), !,
1196	alloc_check_pwords(N, ChunkData0, ChunkData).
1197alloc_check_after(UnbReach, ChunkData0, ChunkData, [code{instr:gc_test(N)}|Code0], Code0) :- !,
1198	ChunkData0 = chunk_data{need_global:NeedBefore},
1199	update_struct(chunk_data, [need_global:NeedAfter], ChunkData0, ChunkData),
1200	suspend(test_or_promote(UnbReach, NeedAfter, NeedBefore, N), 0, NeedAfter->inst).
1201
1202    % we are just after a potentially unbounded allocation+check
1203    test_or_promote(UnbReach, NeedAfter, NeedBefore, TestHere) :-
1204	( NeedAfter > #wam_max_global_push ->
1205	    ( UnbReach == unbounded_maybe ->
1206		% since the check might not be reached,
1207		% check for enough space in the previous test
1208		TestHere=NeedAfter, NeedBefore=NeedAfter
1209	    ;
1210		% since the check is certainly reached,
1211		% it has the responsibility for NeedAfter
1212		TestHere=NeedAfter, NeedBefore=0
1213	    )
1214	;
1215	    % no check needed because either:
1216	    % - no allocate&check, previous check covers
1217	    % - after-bip, bip doesn't allocate, and previous check covers
1218	    % - after-bip, bip allocates&checks, and we have Guarantee
1219	    TestHere=0, NeedBefore=NeedAfter
1220	).
1221
1222alloc_check_end(chunk_data{need_global:0}).
1223
1224
1225%----------------------------------------------------------------------
1226% Debugging and testing
1227%----------------------------------------------------------------------
1228
1229:- comment(print_annotated_code/1, [
1230    summary:"Debugging: print annotated WAM code",
1231    amode:print_annotated_code(+),
1232    args:[
1233	"Code":"A list of struct(code)"
1234    ],
1235    see_also:[generate_code/5,struct(code)]
1236]).
1237
1238:- export print_annotated_code/1.
1239
1240print_annotated_code(Code) :-
1241	writeln("------ Code ------"),
1242	( fromto(Code,Code1,Code4,[]) do
1243	    ( fromto(Code1,[InstrDesc|Code2],Code3,next(Code4)) do
1244		( InstrDesc = code{instr:Instr,regs:Regs,comment:C} ->
1245		    ( Instr = label(_) ->
1246			printf("%Vw%t", [Instr])
1247		    ;
1248			printf("%t%Vw", [Instr])
1249		    ),
1250		    ( nonvar(Regs) -> printf("%t%t%_w", [Regs]) ; true ),
1251		    ( nonvar(C) -> printf("%t%% %Vw", [C]) ; true ),
1252		    nl
1253		;
1254		    ( InstrDesc = label(_) ->
1255			printf("%Vw%n", [InstrDesc])
1256		    ;
1257			printf("%t%Vw%n", [InstrDesc])
1258		    )
1259		),
1260		% allow termination by [] or next([])
1261		( Code2 == [] -> Code3 = next([]) ; Code3 = Code2 )
1262	    )
1263	).
1264
1265