1% BEGIN LICENSE BLOCK
2% Version: CMPL 1.1
3%
4% The contents of this file are subject to the Cisco-style Mozilla Public
5% License Version 1.1 (the "License"); you may not use this file except
6% in compliance with the License.  You may obtain a copy of the License
7% at www.eclipse-clp.org/license.
8% 
9% Software distributed under the License is distributed on an "AS IS"
10% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See
11% the License for the specific language governing rights and limitations
12% under the License. 
13% 
14% The Original Code is  The ECLiPSe Constraint Logic Programming System. 
15% The Initial Developer of the Original Code is  Cisco Systems, Inc. 
16% Portions created by the Initial Developer are
17% Copyright (C) 2006 Cisco Systems, Inc.  All Rights Reserved.
18% 
19% Contributor(s): Joachim Schimpf
20%                 Kish Shen
21% 
22% END LICENSE BLOCK
23% ----------------------------------------------------------------------
24% System:	ECLiPSe Constraint Logic Programming System
25% Component:	ECLiPSe III compiler
26% Version:	$Id: compiler_peephole.ecl,v 1.28 2015/05/27 16:48:51 jschimpf Exp $
27% ----------------------------------------------------------------------
28
29:- module(compiler_peephole).
30
31:- comment(summary, "ECLiPSe III compiler - peephole optimizer").
32:- comment(copyright, "Cisco Technology Inc").
33:- comment(author, "Joachim Schimpf, Kish Shen").
34:- comment(date, "$Date: 2015/05/27 16:48:51 $").
35
36:- comment(desc, ascii("
37    This pass does simple code improvements like:
38
39	 - eliminating some instructions (e.g. nop)
40	 - moving branch targets (e.g. to skip unneeded type tests)
41	 - merging instructions (e.g. call+ret -> jmp)
42    Takes a list of register-allocated, annotated code  Code is simplified,
43    and finally the annotations are stripped, and a plain list of 
44    instructions is returned, which can be fed into the assembler.
45")).
46
47:- use_module(compiler_common).
48
49:- import meta_index/2 from sepia_kernel.
50
51
52:- local struct(chunk(
53    	cont,	% index of continuation chunk
54	len,	% length of code list
55	code,	% code list
56	done)).	% 'done' if chunk already in final code list, else uninstantiated
57
58         
59% maximum size of code chunks that should be duplicated to save a branch
60max_joined_len(2).
61
62
63:- comment(simplify_code/3, [
64    summary:"Strip annotations and do peephole optimizations",
65    amode:simplify_code(+,-,+),
66    args:[
67	"AnnotatedCodeIn":"A list of annotated WAM code (struct(code))",
68	"WamCodeOut":"A list of WAM instructions in lib(asm) format",
69	"Options":"struct(options)"
70    ],
71    see_also:[struct(code)]
72]).
73
74/* 
75simplify_code(+CodeList, -WamList +Options)
76    Performs peephole optimisation on annotated code list CodeList, and
77    peoduces an (unannotated) WamList of abstract instruction.
78
79    The code can either be broken into `chunks', to allow for inter-chunk
80    optimisations (such as moving of jump targets, joining of short
81    continuations, and dead code removal), and then peephole optimisation
82    is performed on each chunk, or it can be peephole optimised as a unit.
83
84    If broken into chunks, the chunks are rejoined without any
85    `dead' chunks (i.e. chunks that cannot be reached). The chunks may be
86    rejoined in a different order from the original. Because some
87    originally adjacent chunks are best if adjucent in the final code,
88    these are rejoined early (before peephole optimisation), to ensure that
89    they stay adjacent.
90
91    The joining of short continuations will duplicate code, but reduces 
92    branching, and the joined code allows for last call optimisation,
93    if the continuation exits from the predicate. Short continuations are
94    only joined if there are more than one chunk that continues into it;
95    this is to prevent duplication of the code -- one for the continuation,
96    and one for any branch targets. This optimisation greatly reduces the
97    code size expansion. The alternative is to put a label into the first
98    joined short continuation to act as a branch target, but the label
99    prevents important optimisations (such as the last call opt) in the
100    `boundary' code that is joined.
101*/
102:- export simplify_code/3.
103simplify_code(CodeList, WamList, options{opt_level:OptLevel}) :-
104	( OptLevel > 0 ->
105	    flat_code_to_basic_blocks(CodeList, BasicBlockArray, Rejoins),
106            make_nonreplicate_array(BasicBlockArray, Rejoins, NonRepArray),
107            interchunk_simplify(BasicBlockArray, Rejoins, NonRepArray,
108                                ReachedArray, Targets),
109            compute_chunk_connections(BasicBlockArray, ReachedArray, Targets,
110                ContArray, RefedArray, JoinedArray, Branches, BranchesT),
111            ( for(_,1,max_joined_len), 
112              param(BasicBlockArray, NonRepArray, ReachedArray,
113                    ContArray,RefedArray,JoinedArray) 
114            do
115		join_short_continuations(BasicBlockArray, ReachedArray,
116                    NonRepArray, ContArray, RefedArray, JoinedArray)
117	    ),
118            % add marked chunks in JoinedArray to Branches
119            ( foreacharg(J, JoinedArray, Idx), param(RefedArray),
120              fromto(BranchesT, B1,B2, []) do
121                ( nonvar(J),
122                  arg(Idx, RefedArray, Refed),
123                  nonvar(Refed)
124                -> 
125                    % add to branches that needs to be processed when
126                    % joining branches back together for continuations
127                    % that have been joined early,  and is ref'ed
128                    B1 = [Idx|B2]
129                ;
130                    B1 = B2
131                )
132            ),
133	    ( foreacharg(chunk{code:Chunk,cont:Cont,len:Len,done:Done},
134                         BasicBlockArray,I), 
135              param(BasicBlockArray) do
136                ( var(Done) ->
137                    simplify_chunk(Chunk, SimplChunk),
138                    % Len is approximate after simplify! 
139                    setarg(I, BasicBlockArray, chunk{code:SimplChunk,len:Len,cont:Cont})
140                ;
141                    true
142                )
143            ),
144	    basic_blocks_to_flat_code(BasicBlockArray, Branches, JoinedArray, ReachedArray, CodeList1),
145            simplify_chunk(CodeList1, SimpCodeList)  % run simplify again on entire code list
146	;
147	    simplify_chunk(CodeList, SimpCodeList)
148        ),
149        ( foreach(code{instr:Instr},SimpCodeList), foreach(Instr,WamList) do
150            true
151        ).
152
153compute_chunk_connections(BasicBlockArray, ReachedArray, Targets, ContArray,
154    RefedArray, JoinedArray, Branches, BranchesT) :-
155        functor(BasicBlockArray, F, N),
156        functor(ContArray, F, N),
157        functor(RefedArray,F, N),
158        functor(JoinedArray, F, N),
159        arg(1, ContArray, r([])), % Chunk 1 marked as cont'ed into 
160        ( foreacharg(chunk{cont:Cont}, BasicBlockArray, I),
161         param(ContArray,ReachedArray) do
162            % ContArray: determine chunks that are continuations
163            % from 0  (ContArray[n] is var), 
164            %      1  (ContArray[n] = r(M), M is var)
165            %      1+ (ContArray[n] = r([])
166            % other chunks
167            ( arg(I, ReachedArray, Reached),
168              nonvar(Reached), % is a reached chunk
169              integer(Cont), Cont > 0
170            ->
171                arg(Cont, ContArray, CStatus),
172                (var(CStatus) -> CStatus = r(_) ; CStatus = r([]))
173            ;
174                true
175            )
176        ),
177        % RefedArray: 
178        % RefedArray[n] is var if it is not the target of any ref()
179        % RefedArray[n] = [] if it is the target of one or more ref()
180        ( foreach(T, Targets), param(RefedArray) do
181            arg(T, RefedArray, [])
182        ),
183        ( foreacharg(IsCont,ContArray, I), foreacharg(Refed,RefedArray), 
184          fromto(Branches, Branches0,Branches1, BranchesT) do
185            ( var(IsCont), nonvar(Refed) ->
186                % chunk is not continued into, but is referenced, 
187                % i.e. first chunk of a branch
188                Branches0 = [I|Branches1]
189            ;
190                Branches0 = Branches1
191            )
192        ).
193
194% Take a simple list of annotated code, and cut it up at the labels.
195% The result are code chunks that correspond to basic blocks.
196% Number each chunk and instantiate the corresponding Label to this number.
197% Enter the chunk into an array indexed by the chunk number.
198%
199% Also determine if two consecutive chunks are `contiguous' chunks, i.e.
200% the instructions at the splitting of the chunks should be contiguous in
201% the final code if possible. These chunks will be rejoined as soon as 
202% possible, unless the earlier chunk is unreachable. The first chunk numbers 
203% for each of these contiguous chunks are collected in Rejoins
204%
205% 
206% We already do some opportunistic simplification here:
207% - removing the code{} wrapper
208% - eliminating nops
209% - eliminating redundant consecutive labels (unifying the Labels)
210% - eliminating unreachable code between branch() and the next label()
211% - make indirect branch() (branch() to another branch()) direct
212%
213% During code traversal, we maintain a State variable with the values:
214%  labels:	the previous instruction was a label (not in rejoin state)
215%  normal:	we are in the middle of a chunk
216%  unreachable:	the current code is unreachable
217%  rejoin:      the previous instruction was a `contiguous' instruction, i.e.
218%               it should be contiguous with the following instruction
219%  rejoinlabels: the previous instruction was a label, encountered while
220%               state was rejoin
221
222flat_code_to_basic_blocks(AnnCode, BasicBlockArray, Rejoins) :-
223	(
224	    fromto(AnnCode, [AnnInstr|Code1], Code1, []),
225	    fromto(FirstChunk,Chunk0,Chunk1,LastChunk),
226	    fromto(FirstChunk,Tail0,Tail1,[]),
227            fromto(1,Label0,Label1,_),	% last label (while in
228                                        % labels/rejoinlabels state)
229	    fromto(Chunks,Chunks0,Chunks1,Chunks2),
230	    fromto(0,N0,N1,N),			% chunk number
231	    fromto(0,Len0,Len1,Len),		% chunk length
232            fromto([],Rejoins0,Rejoins1,Rejoins), % rejoin chunk numbers
233            fromto(labels,State,State1,EndState)
234	do
235            AnnInstr = code{instr:Instr},
236            ( Instr = label(L) ->
237		verify var(L),
238                ( State == rejoin ->
239                    State1 = rejoinlabels 
240                ; State == rejoinlabels ->
241                    State1 = rejoinlabels
242                ; 
243                    State1 = labels
244                ),
245                Label1 = L,
246                Rejoins0 = Rejoins1,
247                N1 = N0,
248                ( (State == labels ; State == rejoinlabels) ->
249                    Label1 = Label0,		% a redundant label
250                    Len1 = Len0,
251                    Chunk1 = Chunk0,
252                    Tail0 = Tail1,
253                    Chunks0 = Chunks1
254                ; State == unreachable ->
255                    Len1 = 0,
256                    Chunk1 = Tail1,	% start a new chunk
257                    Tail0 = [],		% terminate the previous chunk
258                    Chunks0 = Chunks1	% dont't collect finished chunk
259                ; % State == normal ; State == rejoin
260                    Len1 = 0,
261                    Chunk1 = Tail1,	% start a new chunk
262                    Tail0 = [],		% terminate the previous chunk
263                    % collect finished chunk (L is uninstantiated)
264                    Chunks0 = [chunk{code:Chunk0,len:Len0,cont:L}|Chunks1]
265                )
266
267	    ; Instr = branch(ref(L)) ->
268                N1 = N0,
269                Label1 = none,
270                Rejoins1 = Rejoins0,
271                State1 = unreachable,
272                ( (State == labels ; State == rejoinlabels) ->
273                    % branch follows immediately from a label
274                    Label0 = L,		% get rid of indirect label
275                    Len0 = Len1,
276		    Chunk0 = Chunk1,
277		    Chunks0 = Chunks1,
278		    Tail0 = Tail1
279                ; State == unreachable ->
280		    succ(Len0, Len1),
281		    Chunk0 = Chunk1,
282		    Chunks0 = Chunks1,
283		    Tail0 = Tail1
284		; atom(L)  ->
285		    Len1 = 0,
286		    succ(Len0, Len2),
287		    Chunk1 = Tail1,		% start a new chunk
288		    Tail0 = [AnnInstr],		% terminate the previous chunk
289		    Chunks0 = [chunk{code:Chunk0,len:Len2,cont:0}|Chunks1] % collect finished chunk
290		;
291                    Len1 = 0,
292		    Chunk1 = Tail1,		% start a new chunk
293		    Tail0 = [],			% terminate the previous chunk
294		    Chunks0 = [chunk{code:Chunk0,len:Len0,cont:L}|Chunks1]	% collect finished chunk
295		)
296
297            ; is_nop(Instr) ->
298                Rejoins0 = Rejoins1,
299                Label0 = Label1,
300                N1 = N0,
301                Len1 = Len0,
302                Chunk1 = Chunk0,
303		Chunks1 = Chunks0,
304	    	Tail1 = Tail0,
305                State = State1   % keep same state
306
307            ; 
308                Label1 = none,
309                ( (State == labels ; State == rejoinlabels) ->
310                    % init. current chunk -- we are in code following a label
311                    % that we want to keep
312                    Label0 = N1,	% instantiate the previous label
313                    succ(N0, N1),	% current chunk number
314                    (State == rejoinlabels ->
315                        Rejoins1 = [N0|Rejoins0] % current is a rejoin chunk
316                    ;
317                        Rejoins1 = Rejoins0
318                    )
319                ;
320                    N0 = N1,
321                    Rejoins1 = Rejoins0
322                ),
323                ( unconditional_transfer(Instr) ->
324                    State1 = unreachable,
325                    ( State == unreachable ->
326                        succ(Len0, Len1),
327                        Chunk1 = Chunk0,
328                        Chunks1 = Chunks0,
329                        Tail1 = Tail0
330                    ;
331                        Len1 = 0,
332                        succ(Len0, Len2),
333                        Chunk1 = Tail1,		% start a new chunk
334                        Tail0 = [AnnInstr],	% terminat current chunk
335			% collect finished chunk
336                        Chunks0 = [chunk{code:Chunk0,len:Len2,cont:0}|Chunks1]
337                    )
338                
339                ; 
340                    succ(Len0, Len1),
341                    Chunk1 = Chunk0,
342                    Chunks0 = Chunks1,
343                    Tail0 = [AnnInstr|Tail1],	% append this instruction
344                    next_state(Instr, State, State1)
345                )
346            )
347        ),
348	( EndState = unreachable ->
349	    Chunks2 = []
350	;
351	    Chunks2 = [chunk{code:LastChunk,len:Len,cont:0}]
352	),
353	verify length(Chunks, N),
354	BasicBlockArray =.. [[]|Chunks].
355
356% determine the next state while in the middle of traversing code
357next_state(Instr, State, NextState) :-
358        ( State == unreachable -> 
359            NextState = unreachable 
360        ; 
361            ( indexing_branch(Instr) -> 
362                NextState = rejoin % following code should be contiguous
363            ; 
364                NextState = normal
365            )
366        ).
367
368    % Unconditional control transfer instructions
369    % Only needs to list instructions generated by the code generator
370    unconditional_transfer(branch(_)).
371    unconditional_transfer(exit).
372    unconditional_transfer(exitd).
373    unconditional_transfer(failure).
374    unconditional_transfer(ret).
375    unconditional_transfer(retd).
376    unconditional_transfer(retn).
377    unconditional_transfer(jmp(_)).
378    unconditional_transfer(jmpd(_)).
379    unconditional_transfer(chain(_)).
380    unconditional_transfer(chaind(_)).
381    unconditional_transfer(trust(_,_)).
382    unconditional_transfer(trust_inline(_,_,_)).
383    % generated instructions from peephole optimiser
384    unconditional_transfer(move_chain(_,_,_)).
385    unconditional_transfer(branchs(_,_)).
386
387    % unconditional control transfer instruction to outside the predicate
388    % Subset of unconditional_transfer/1, plus extra instr from peephole
389    % optimisation.  Keep the two in sync!
390    % Separate definitions for the two to avoid cuts in merged definition
391    unconditional_transfer_out(exit).
392    unconditional_transfer_out(exitd).
393    unconditional_transfer_out(failure).
394    unconditional_transfer_out(ret).
395    unconditional_transfer_out(retd).
396    unconditional_transfer_out(retn).
397/*    unconditional_transfer_out(jmp(_)).
398    unconditional_transfer_out(jmpd(_)).
399    unconditional_transfer_out(chain(_)).
400    unconditional_transfer_out(chaind(_)).
401    % generated instr from peephole optimiser 
402    unconditional_transfer_out(branchs(_,_)).
403    unconditional_transfer_out(jmpd(_,_)).*/
404
405    % these are indexing branch instructions with a default fall-through
406    % case. It is desirable that the fall-through code is contiguous with
407    % the instruction rather than a branch to somewhere else. However, if 
408    % code following is split into a new chunk, the two chunks should be 
409    % rejoined as soon as possible to ensure the final code is contiguous.
410    indexing_branch(try_me_else(_,_,_)).
411    indexing_branch(try(_,_,_)).
412    indexing_branch(retry_me_else(_,_)).
413    indexing_branch(retry_me_inline(_,_,_)).
414    indexing_branch(retry(_,_)).
415    indexing_branch(retry_inline(_,_,_)).
416    indexing_branch(trust_me(_)).
417    indexing_branch(trust_me_inline(_,_)).
418
419
420%----------------------------------------------------------------------
421% inter-chunk reachability and simplifications
422%----------------------------------------------------------------------
423
424% interchunk_simplify is intended to do peephole optimisations across
425% different chunks, connected by refs.
426% mark all reachable chunks by following the continuations and refs.
427% Rejoin any contiguous chunks, unless its first chunk is unreachable
428
429interchunk_simplify(BasicBlockArray, Rejoins, NonRepArray, ReachedArray, Targets) :-
430        find_reached_chunks(BasicBlockArray, NonRepArray, ReachedArray, Targets),
431        rejoin_contiguous_chunks(BasicBlockArray, ReachedArray, Rejoins).
432
433find_reached_chunks(BasicBlockArray, NonRepArray, ReachedArray, Targets) :-
434        functor(BasicBlockArray, F, N),
435        functor(ReachedArray, F, N),
436        functor(TargetArray, F, N), 
437        N1 is N + 1,  % start of extra label id
438        arg(1, ReachedArray, []), % first chunk
439        arg(1, BasicBlockArray, Chunk),
440        find_reached_chunks_(Chunk, BasicBlockArray, NonRepArray, ReachedArray, 
441                             Targets, Targets, TargetArray, N1, _).
442
443find_reached_chunks_(Chunk, BasicBlockArray, NonRepArray, ReachedArray, Targets,
444                     TargetsT0, TargetArray, NL0, NL) :-
445        Chunk = chunk{cont:Cont,code:Code},
446        process_chunk_targets(Code, BasicBlockArray, Cont, NonRepArray, TargetArray, 
447                              NL0, NL1, TargetsT0, TargetsT1, NewCode),
448        setarg(code of chunk, Chunk, NewCode),
449        ( integer(Cont), Cont > 0,    % continue to another chunk 
450          arg(Cont, ReachedArray, ReachedCont), 
451          var(ReachedCont)  % that chunk have not yet been reached
452        -> 
453            ReachedCont =  [],  % Mark as reached
454            arg(Cont, BasicBlockArray, ContChunk)
455        ; 
456            true
457        ),
458        ( nonvar(ContChunk) ->
459            find_reached_chunks_(ContChunk, BasicBlockArray, NonRepArray, ReachedArray, 
460                                 Targets, TargetsT1, TargetArray, NL1, NL)
461        ;
462            find_chunks_in_branch(Targets, BasicBlockArray, NonRepArray, ReachedArray,
463                                  TargetsT1, TargetArray, NL1, NL)
464        ).
465
466find_chunks_in_branch(Targets, BasicBlockArray, NonRepArray, ReachedArray, 
467                      TargetsT, TargetArray, NL0, NL) :-
468        ( var(Targets) ->
469            true % queue empty, done
470        ;
471            Targets = [Target|Targets0],
472            arg(Target, ReachedArray, RefStatus),
473            ( var(RefStatus) ->  % not yet processed
474                RefStatus = [], % process it now, and mark it as reached
475                arg(Target, BasicBlockArray, Chunk),
476                find_reached_chunks_(Chunk, BasicBlockArray, NonRepArray, ReachedArray,
477                    Targets0, TargetsT, TargetArray, NL0, NL)
478            ;
479                find_chunks_in_branch(Targets0, BasicBlockArray, NonRepArray,
480                    ReachedArray, TargetsT, TargetArray, NL0, NL)
481            )
482        ).
483
484% Find all ref()s that refer to unprocessed chunks and queue the labels
485% also perform inter-chunk optimisations by looking at the instructions
486% in the original chunk and the chunks being ref'ed
487process_chunk_targets([Code|Rest0], BasicBlockArray, Cont, NonRepArray, TargetArray, 
488                      NL0, NL, TargetsT0, TargetsT, New) ?-
489        Code = code{instr:I},        
490        process_instr_targets(I, Code, BasicBlockArray, Cont, NonRepArray, TargetArray,
491                              Rest0, Rest1, NL0, NL1, TargetsT0, TargetsT1, New, New1),
492        process_chunk_targets(Rest1, BasicBlockArray, Cont, NonRepArray, TargetArray,
493                              NL1, NL, TargetsT1, TargetsT, New1).
494process_chunk_targets([], _, _, _, _, NL0, NL, TargetsT0, TargetsT, New) ?-
495            NL0 = NL, TargetsT0 = TargetsT, New = [].
496
497
498process_instr_targets(atom_switch(a(A),Table,ref(Def)), Code, BasicBlockArray, _, NonRepArray, TargetArray,
499    Rest0, Rest, NL0, NL, TargetsT0, TargetsT, New, NewT) ?-
500        !,
501        Rest0 = Rest,
502        mark_and_accumulate_targets(Def, TargetArray, TargetsT0, TargetsT1),
503        update_struct(code, [instr:atom_switch(a(A),NewTable,ref(Def))], Code, NewCode),
504        New = [NewCode|NewT],
505        ( foreach(Atom-Ref, Table), 
506          foreach(Atom-NewRef, NewTable),
507          fromto(TargetsT1, TT2,TT3, TargetsT),
508          fromto(NL0, NL1,NL2, NL),
509          param(BasicBlockArray,NonRepArray,TargetArray,A)
510        do
511            skip_subsumed_instr([(get_atom(a(A),Atom),next), 
512                                 (in_get_atom(a(A),Atom),next)], 
513                                Ref, BasicBlockArray, NonRepArray, TargetArray, NL1, NL2, TT2, TT3, NewRef)
514        ).
515process_instr_targets(functor_switch(a(A),Table,ref(Def)), Code, BasicBlockArray, _, NonRepArray, 
516    TargetArray, Rest0, Rest, NL0, NL, TargetsT0, TargetsT, New, NewT) ?- !,
517        Rest0 = Rest,
518        mark_and_accumulate_targets(Def, TargetArray, TargetsT0, TargetsT1),
519        update_struct(code, [instr:functor_switch(a(A),NewTable,ref(Def))], Code, NewCode),
520        New = [NewCode|NewT],
521        ( foreach(Func-FRef, Table), 
522          foreach(Func-NewFRef, NewTable),
523          fromto(TargetsT1, TT2,TT3, TargetsT),
524          fromto(NL0, NL1,NL2, NL),
525          param(BasicBlockArray,NonRepArray,TargetArray,A)
526        do
527            skip_subsumed_instr([(get_structure(a(A),Func,ReadRef),ReadRef),
528                                 (in_get_structure(a(A),Func,InRef),InRef)], 
529                 FRef, BasicBlockArray, NonRepArray, TargetArray, NL1,NL2, TT2,TT3, NewFRef)
530        ). 
531process_instr_targets(integer_switch(a(A),Table,ref(Def)), Code, BasicBlockArray, _, NonRepArray,  
532    TargetArray, Rest0, Rest, NL0, NL, TargetsT0, TargetsT, New, NewT) ?- !,
533        Rest0 = Rest,
534        mark_and_accumulate_targets(Def, TargetArray, TargetsT0, TargetsT1),
535        update_struct(code, [instr:integer_switch(a(A),NewTable,ref(Def))], Code, NewCode),
536        New = [NewCode|NewT],
537        ( foreach(Int-Ref, Table), 
538          foreach(Int-NewRef, NewTable),
539          fromto(TargetsT1, TT2,TT3, TargetsT),
540          fromto(NL0, NL1,NL2, NL),
541          param(BasicBlockArray,NonRepArray,TargetArray,A)
542        do
543            skip_subsumed_instr([(get_integer(a(A),Int),next),
544                                 (in_get_integer(a(A),Int),next)], 
545                                Ref, BasicBlockArray, NonRepArray, TargetArray, NL1, NL2, TT2, TT3, NewRef)
546                                
547        ). 
548process_instr_targets(list_switch(a(A),ListRef,NilRef,ref(VarLab)), Code, BasicBlockArray, _, NonRepArray, TargetArray, 
549    Rest0, Rest, NL0, NL, TargetsT0, TargetsT, New, NewT) ?- !,
550        Rest0 = Rest,
551        mark_and_accumulate_targets(VarLab, TargetArray, TargetsT0, TargetsT1),
552        update_struct(code, [instr:list_switch(a(A),NewListRef,NewNilRef,ref(VarLab))], Code, NewCode),
553        New = [NewCode|NewT],
554        skip_subsumed_instr([(get_list(a(A),ReadRef),ReadRef),
555                            (in_get_list(a(A),InRef),InRef)], ListRef, 
556                            BasicBlockArray, NonRepArray, TargetArray, NL0, NL1, TargetsT1, TargetsT2, NewListRef),
557        skip_subsumed_instr([(get_nil(a(A)),next),
558                             (in_get_nil(a(A)),next)], 
559                            NilRef, BasicBlockArray,  NonRepArray, TargetArray, NL1, NL, TargetsT2, TargetsT, NewNilRef).
560process_instr_targets(switch_on_type(a(A),SwitchList), Code, BasicBlockArray, Cont, NonRepArray, TargetArray, 
561    Rest0, Rest, NL0, NL, TargetsT0, TargetsT, New, NewT) ?- !,
562        update_struct(code, [instr:switch_on_type(a(A),NewSwitchList)], Code, NewCode),
563        New = [NewCode|NewT1],
564        ( Rest0 == [] ->
565            % end of chunk, the fall through case (type = var) continues to 
566            % Cont
567            Rest0 = Rest,
568            ContRef = ref(Cont),
569            subsumed_type_instr(free, A, VSkipCands),
570            skip_subsumed_instr(VSkipCands, ContRef, BasicBlockArray, NonRepArray,
571                                TargetArray, NL0, NL1, TargetsT0, TargetsT1, NewVRef),
572            ( ContRef == NewVRef ->
573                % no subsumed instruction found, no change to following code
574                NewT1 = NewT
575            ;
576                % add a branch to new label
577                NewT1 = [code{instr:branch(NewVRef)}|NewT]
578            )
579        ;
580            % code following switch_on_type in chunk, do nothing with it
581            % for now (could check for subsumed type test that is skipped
582            Rest0 = Rest,
583            NewT1 = NewT,
584            NL0  = NL1,
585            TargetsT0 = TargetsT1
586        ),
587        (
588            foreach(Type:Ref, SwitchList),
589            foreach(Type:NewRef,NewSwitchList),
590            fromto(TargetsT1, TT1,TT2, TargetsT),
591            fromto(NL1, NL2,NL3, NL),
592            param(BasicBlockArray,NonRepArray,TargetArray,A)
593        do
594            ( subsumed_type_instr(Type, A, SkipCandidates) -> 
595                skip_subsumed_instr(SkipCandidates, Ref, BasicBlockArray,
596                    NonRepArray, TargetArray, NL2,NL3, TT1,TT2, NewRef)
597            ;
598                Ref = ref(Label),
599                NL2 = NL3,
600                NewRef = Ref,
601                mark_and_accumulate_targets(Label, TargetArray, TT1, TT2) 
602            )
603        ).
604process_instr_targets(Xs, Code, _BasicBlockArray, _Cont, _NonRepArray, TargetArray, 
605                      Rest, Rest, NL, NL, TargetsT0, TargetsT, New, NewT) :-
606        New = [Code|NewT],
607        find_targets(Xs, TargetArray, TargetsT0, TargetsT).
608
609
610find_targets(ref(L), TargetArray, TargetsT0, TargetsT1) ?- !, 
611        mark_and_accumulate_targets(L, TargetArray, TargetsT0, TargetsT1).
612find_targets(Xs, TargetArray, TargetsT0, TargetsT) :- 
613    	compound(Xs), !,
614	(
615	    foreacharg(X,Xs),
616            fromto(TargetsT0, TargetsT1,TargetsT2, TargetsT),
617	    param(TargetArray)
618	do
619            find_targets(X, TargetArray, TargetsT1, TargetsT2)
620	). 
621find_targets(_, _, TargetsT, TargetsT).
622
623% mark_and_accumulate_targets checks if T is a new target, and mark it in
624% TargetArray if it is new, and add it to the Targets list
625mark_and_accumulate_targets(T, TargetArray, TargetsT0, TargetsT1) :-
626        (
627            integer(T),
628            arg(T, TargetArray, IsNew),
629	    var(IsNew)
630	->
631	    TargetsT0 = [T|TargetsT1],
632            IsNew = []  % mark target chunk as reached
633	;
634	    TargetsT0 = TargetsT1
635	).
636
637% skip_subsumed_instr checks to see if the chunk referenced by BaseRef
638% starts with SkipInstr, which is one of the Candidates of instructions 
639% that is subsumed. If it does, change BaseRef to skip the 
640% instruction, either to the following instruction, or to the target
641% given in NewRef
642skip_subsumed_instr(Candidates, BaseRef, BasicBlockArray, NonRepArray, 
643                    TargetArray,  NL0, NL1, TargetsT0, TargetsT1, NewRef) :-
644        BaseRef = ref(BaseTarget),
645        ( integer(BaseTarget),
646          arg(BaseTarget, BasicBlockArray, Chunk),
647          Chunk = chunk{code:Code},
648          match_skipped_instr(Candidates, SkipInstr, NewRefPos, Code, Rest)
649          %Code = [SkipInstr|Rest] % Base chunk has skipped instr
650        ->  
651            ( NewRefPos == next ->  % new target follows skipped instr  
652                ( Rest = [code{instr:label(NL)}|_] ->
653                    NewCode = [code{instr:SkipInstr}|Rest], % has a label already
654                    NL1 = NL0
655                ;
656                    NL = NL0,               % add a new label
657                    NewCode = [code{instr:SkipInstr},code{instr:label(NL)}|Rest],
658                    NL1 is NL0 + 1
659                ),
660                arg(BaseTarget, NonRepArray, []), % chunk now non-replicatable
661                NewRef = ref(NL),  % move target to after skipped instr 
662                setarg(code of chunk, Chunk, NewCode),
663                % jumping into chunk BaseTarget, so mark it if needed
664                mark_and_accumulate_targets(BaseTarget, TargetArray, TargetsT0, TargetsT1)
665            ; NewRefPos = ref(NewTarget) -> % new target is an existing label
666                NL1 = NL0,
667                NewRef = NewRefPos,
668                mark_and_accumulate_targets(NewTarget, TargetArray, TargetsT0, TargetsT1)
669            ;   % don't know where new target is
670                TargetsT0 = TargetsT1,
671                NL0 = NL1,
672                NewRef = BaseRef
673            )
674        ;   % SkipInstr not matched
675            NL0 = NL1,
676            NewRef = BaseRef,
677            mark_and_accumulate_targets(BaseTarget, TargetArray, TargetsT0, TargetsT1)
678        ).
679
680match_skipped_instr([(Candidate,NewRef0)|Candidates], SkipInstr, NewRef, Code, Rest) :-
681        ( Code = [code{instr:Candidate}|Rest] ->
682            SkipInstr = Candidate,
683            NewRef0 = NewRef
684        ;
685            match_skipped_instr(Candidates, SkipInstr, NewRef, Code, Rest)
686        ).
687
688
689% the (mainly) type test instructions that are subsumed by the type
690% switches of switch_on_type
691subsumed_type_instr(meta, A, [(bi_var(a(A)),next),(bi_meta(a(A)),next),(in_get_meta(a(A),_),next)]).
692subsumed_type_instr([], A, [(get_nil(a(A)),next),(bi_atom(a(A)),next),
693                            (bi_atomic(a(A)),next),(bi_callable(a(A)),next),
694                            (bi_nonvar(a(A)),next),(in_get_nil(a(A)),next),
695                            (bi_nonvar(a(A)),next)]).
696subsumed_type_instr(atom, A, [(bi_atom(a(A)),next),(bi_atomic(a(A)),next),
697                              (bi_callable(a(A)),next),(bi_nonvar(a(A)),next)]).
698subsumed_type_instr(bignum, A, [(bi_number(a(A)),next),(bi_integer(a(A)),next),
699                                (bi_bignum(a(A)),next),(bi_atomic(a(A)),next),
700                                (bi_nonvar(a(A)),next)]).
701subsumed_type_instr(integer, A, [(bi_number(a(A)),next),(bi_integer(a(A)),next),
702                                (bi_atomic(a(A)),next),(bi_nonvar(a(A)),next)]).
703subsumed_type_instr(breal, A, [(bi_number(a(A)),next),(bi_real(a(A)),next),
704                               (bi_breal(a(A)),next),(bi_nonvar(a(A)),next),(bi_atomic(a(A)),next)]).
705subsumed_type_instr(double, A, [(bi_number(a(A)),next),(bi_real(a(A)),next),
706                               (bi_float(a(A)),next),(bi_nonvar(a(A)),next),(bi_atomic(a(A)),next)]).
707subsumed_type_instr(goal, A, [(bi_atomic(a(A)),next),(bi_nonvar(a(A)),next)]).
708subsumed_type_instr(handle, A, [(bi_is_handle(a(A)),next),(bi_nonvar(a(A)),next),(bi_atomic(a(A)),next)]).
709subsumed_type_instr(list, A, [(bi_compound(a(A)),next),
710                                (bi_callable(a(A)),next),(bi_nonvar(a(A)),next)]).
711subsumed_type_instr(rational, A, [(bi_number(a(A)),next),(bi_rational(a(A)),next),(bi_nonvar(a(A)),next),
712				(bi_atomic(a(A)),next)]).
713subsumed_type_instr(string, A, [(bi_atomic(a(A)),next),(bi_string(a(A)),next),(bi_nonvar(a(A)),next)]).
714subsumed_type_instr(structure, A, [(bi_compound(a(A)),next),
715				(bi_callable(a(A)),next),(bi_nonvar(a(A)),next)]).
716subsumed_type_instr(free, A, [(bi_var(a(A)),next),(bi_free(a(A)),next)]).
717
718% rejoin adjacent chunks that should be contiguous if the first chunk
719% is reached. Rejoins must have later chunks first in the list because more 
720% after rejoining two chunks, the rejoined chunk can be rejoined with the
721% previous chunk
722rejoin_contiguous_chunks(BasicBlockArray, ReachedArray, Rejoins) :-
723        (foreach(R, Rejoins), param(BasicBlockArray, ReachedArray) do
724            arg(R, BasicBlockArray, chunk{len:Len,code:Code}),
725            arg(R, ReachedArray, Reached),
726            ( nonvar(Reached) -> % first chunk of rejoin chunks reached? 
727                succ(R, NextC),  % yes, rejoin with succeeding chunk
728                % succeeding chunk mark as processed
729                arg(NextC, BasicBlockArray, NextChunk),
730                NextChunk = chunk{len:NextLen,code:NextCode,cont:NextCont,done:done},
731                append(Code, [code{instr:label(NextC)}|NextCode],NewCode),
732                NewLen is Len + NextLen,
733                setarg(R, BasicBlockArray, chunk{len:NewLen,code:NewCode, cont:NextCont}),
734                setarg(cont of chunk, NextChunk, 0)  % get rid of the old continuation in the discarded chunk
735            ;
736                % first chunk not reached, so don't join
737                true
738            )
739        ).
740
741% NonRepArray indicates which chunks should not be replicated -- currently
742% chunks that contains labels (i.e. rejoined chunks)
743make_nonreplicate_array(BasicBlockArray, Rejoins, NonRepArray) :-
744        functor(BasicBlockArray, F, A),
745        functor(NonRepArray, F, A),
746        ( foreach(R, Rejoins), param(NonRepArray) do
747            R1 is R + 1,
748            arg(R, NonRepArray, []), 
749            arg(R1, NonRepArray, [])
750        ).
751
752% Joins a chunk to its continuation if the continuation is short, and can
753% be replicated -- i.e. there are no labels inside the continuation chunk.
754% An optimisation is that if the continuation immediately jumps elsewhere,
755% the continuation of the chunk is simply updated.
756join_short_continuations(BasicBlockArray, ReachedArray, NonRepArray, ContArray, RefedArray, JoinedArray) :-
757        (
758	    foreacharg(Chunk,BasicBlockArray,I),
759	    param(BasicBlockArray,NonRepArray,ReachedArray,ContArray,RefedArray,JoinedArray)
760	do
761            Chunk = chunk{cont:Cont,len:Len,code:Code,done:Done},
762            ( Cont == 0 ->
763                true % no continuatipn to join
764            ; nonvar(Done) ->
765                true % nonvar if chunk discarded, don't join
766            ;
767                arg(Cont, BasicBlockArray, NextChunk),
768                NextChunk = chunk{len:ContLen,code:ContCode,cont:ContCont},
769                arg(I, ReachedArray, ReachedI),
770                ( var(ReachedI) ->
771                    true % chunk not reached, don't join
772                ; arg(Cont, NonRepArray, NonRepC), nonvar(NonRepC)  ->
773                    true  % next chunk should not be replicated -- don't join
774                ; arg(Cont, ContArray, ContStatus), ContStatus \== r([]),
775                  arg(Cont, RefedArray, Refed), nonvar(Refed) ->
776                    true % cont chunk is continuation for one (i.e. this) chunk
777                         % only, and is referenced, don't join now
778                ;
779                    arg(Cont, BasicBlockArray, NextChunk),
780                    NextChunk = chunk{len:ContLen,code:ContCode,cont:ContCont},
781                    ( ContLen > max_joined_len ->
782                        true
783                    ; 
784                        % Join NextChunk to chunk I.
785                        % mark NextChunk as joined, and update ContArray
786                        % if this is not the first time NextChunk is joined
787                        % because NextChunk is now replicated and so is its
788                        % continuation (ContCont)
789                        arg(Cont, JoinedArray, Joined),
790                        ( var(Joined) ->
791                            Joined = []
792                        ;
793                            (ContCont > 0 ->
794                                % make sure ContCont is now marked as
795                                % having multiple continuations
796                                arg(ContCont, ContArray, r([]))
797                            ;
798                                true
799                            )
800                        ),
801
802                        append(Code, ContCode, NewCode),
803                        NewLen is Len+ContLen,
804                        setarg(I, BasicBlockArray, chunk{code:NewCode,len:NewLen,cont:ContCont})
805                    )
806                )
807            )
808        ).
809
810
811% Flatten the BasicBlockArray into a WAM code list.
812% We emit only the reachable chunks, by collecting all ref()s in the code,
813% and filter for those ref()s that are not continued into, i.e. start of
814% branches, plus chunks that have been joined but are ref'ed as well..
815% The done-flag in the array indicates whether the chunk has already been
816% processed.
817
818basic_blocks_to_flat_code(BasicBlockArray, Reached, JoinedArray, ReachedArray, Code) :-
819	(
820	    fromto(1,I,NextI,0),			% current chunk
821	    fromto(1,PrevCont,Cont,_),			% prev. chunk's continuation
822	    fromto(Reached,Reached1,Reached2,_),	% branches (queue)
823	    fromto(Code,Code0,Code3,[]),		% result list
824	    param(BasicBlockArray, JoinedArray, ReachedArray)
825	do
826	    arg(I, BasicBlockArray, Chunk),
827	    Chunk = chunk{code:ChunkCode,done:Done,cont:Cont0},
828	    ( var(Done) ->
829		% process chunk: append code to final code list
830		Done = done,
831		Cont = Cont0,
832                Code0 = [code{instr:label(I)}|Code1],
833		append(ChunkCode, Code2, Code1)
834	    ; PrevCont == I ->
835		% previous chunk continues into this one, but it has already
836		% been emitted, so we need a branch 
837                % can't copy because 
838                %  1) chunk may have labels
839                %  2) no length info for chunk because of simplification
840                Code0 = [code{instr:branch(ref(I))}|Code2],
841		Cont = 0
842	    ;
843		Cont = 0,
844		Code0 = Code2
845	    ),
846	    % Choose the next chunk to process: prefer the current chunk's
847	    % continuation, otherwise pick one from the queue
848	    ( Cont > 0 ->
849               ( should_continue_branch(Cont, I, BasicBlockArray, JoinedArray, ReachedArray) ->
850                   Code2 = [code{instr:branch(ref(Cont))}|Code3],   
851                   Reached1 = [NextI|Reached2]		% don't use continuation
852               ;
853                   Code2 = Code3,
854                   NextI = Cont, Reached1 = Reached2	% use continuation
855               )
856	    ; Reached1 == [] ->
857                Code2 = Code3,
858	    	NextI = 0				% queue empty, finished
859	    ;
860                Code2 = Code3,
861		Reached1 = [NextI|Reached2]		% pick from queue
862	    )
863	).
864
865/* should_continue_branch(Cont, Current, BasicBlockArray, JoinArraye, ReachedArray)
866   determines if the continuation chunk should be appended to the 
867   current one, or if a new branch started. The idea is to preserve
868   the original branching if possible, to preserve any optimisation
869   performed by the compiler. However, if a chunk is already joined
870   (e.g. by joing short continuations), then do not try to preserve
871   original branching as chunk may be replicated
872*/
873should_continue_branch(Cont, Current, BasicBlockArray, JoinedArray, ReachedArray) :-
874        Cont =\= Current + 1, % Continuation is not next chunk
875        BeforeCont is Cont - 1, 
876        BeforeCont \== 0,
877        arg(BeforeCont, BasicBlockArray, BeforeChunk),
878        BeforeChunk = chunk{done:Done,cont:Cont}, 
879        % BeforeChunk continues into Continue (i.e. original branching)
880        var(Done),
881        arg(BeforeCont, JoinedArray, BeforeJoined),
882        var(BeforeJoined),    % BeforeChunk was not joined early
883        arg(BeforeCont, ReachedArray, BeforeReached),
884        nonvar(BeforeReached). % check that BeforeChunk is not dead code 
885
886%----------------------------------------------------------------------
887% simplify a basic block
888%----------------------------------------------------------------------
889
890% simplify_chunk leaves the annotations around instructions so that it can be
891% run multiple times on a chunk.
892% Every time we make a simplification, we back up 2 instructions to the
893% left, and try to simplify again. These two instructions are in the two
894% (possibly empty) difference lists Rescan1 and Rescan2.
895
896simplify_chunk(Code, SimplifiedCode) :-
897	simplify_chunk(Empty1, Empty1, Empty2, Empty2, Code, SimplifiedCode).
898
899:- mode simplify_chunk(?,?,?,?,+,-).
900simplify_chunk(Rescan1, Rescan2, Rescan2, [], [], Rescan1).
901simplify_chunk(Rescan1, RescanT1, Rescan2, RescanT2, [AnnInstr|More], AllSimplified) :-
902        AnnInstr = code{instr:Instr},
903        ( simplify(Instr, AnnInstr, More, Simplified, MoreTail, SimplifiedTail) ->
904%	    log(Instr, More, Simplified),
905	    % We transformed Instr+More -> Simplified
906	    % Now simplify Rescan+Simplified+Moretail
907	    RescanT1 = Rescan2, RescanT2 = Simplified, SimplifiedTail = MoreTail,
908	    simplify_chunk(Empty1, Empty1, Empty2, Empty2, Rescan1, AllSimplified)
909	;
910	    % Instr which couldn't be simplified goes into rescan2,
911	    % and the old rescan1 goes into the final code.
912	    AllSimplified = Rescan1,
913	    simplify_chunk(Rescan2, RescanT2, [AnnInstr|Tail], Tail, More, RescanT1)
914
915	    % Only 1 instruction back:
916%	    AllSimplified = Rescan2,
917%	    simplify_chunk(Rescan1, RescanT1, [AnnInstr|Tail], Tail, More, RescanT2)
918	).
919
920
921log(Instr, More, Simplified) :-
922	code_instr(More, Next),
923	code_instr(Simplified, Simp),
924	writeln(Instr+Next->Simp).
925
926code_instr(X, []) :- var(X), !.
927code_instr([], []) :- !.
928code_instr([code{instr:Instr}|_], Instr).
929
930
931is_nop(nop) ?- true.
932is_nop(move(X,X)) ?- true.
933is_nop(gc_test(0)) ?- true.
934is_nop(initialize([])) ?- true.
935
936
937% simplify(+Instr, +Code, +Follow, -New, -FollowTail, -NewTail)
938% New is where the simplified annotated instruction goes, with an 
939% uninstantiated NewTail FollowTail is the tail of the existing following 
940% instruction, with the head being the next instruction to simplified.
941% Code is the annotated version of Instr. Instr is extracted to allow
942% for indexing. This must fail if no simplification is done!
943
944simplify(nop, _, More, New, MoreT, NewT) ?- !, 
945        NewT = New,
946        MoreT = More.
947
948simplify(gc_test(N), _, More, New, MoreT, NewT) ?-
949	( N==0 ->
950	    true
951	;
952	    N =< #wam_max_global_push,
953	    % The following test is necessary to retain small gc_tests in
954	    % the (rare) case of initialisation code at the end of branches!
955	    More = [code{instr:Instr}|_], Instr \= put_global_variable(y(_))
956	),
957	!, 
958        NewT = New,
959        MoreT = More.
960
961simplify(move(X,X), _, More, New, MoreT, NewT) ?- !, 
962        NewT = New,
963        MoreT = More.
964
965simplify(initialize(y([])), _, More, New, MoreT, NewT) ?- !, 
966        NewT = New,
967        MoreT = More.
968
969simplify(deallocate, _Code, [code{instr:ret}|More], New, MoreT, NewT) ?- !,
970        MoreT = More,
971        New = [code{instr:exit}|NewT].
972
973simplify(jmp(_), Code, [code{instr:ret}|More], New, MoreT, NewT) ?- !,
974        MoreT = More,
975        New = [Code|NewT].
976
977simplify(chain(_), Code, [code{instr:ret}|More], New, MoreT, NewT) ?- !,
978        MoreT = More,
979        New = [Code|NewT].
980
981simplify(move_chain(_,_,_), Code, [code{instr:ret}|More], New, MoreT, NewT) ?- !,
982        MoreT = More,
983        New = [Code|NewT].
984
985simplify(callf(P,eam(0)), Code, [code{instr:Instr}|More], New, MoreT, NewT) ?- !,
986        MoreT = More,
987        New = [NewCode|NewT],
988        % body goals order rearranged here to avoid old compiler bug
989        update_struct(code, instr:NewInstr, Code, NewCode),
990	simplify_call(P, Instr, NewInstr).
991
992simplify(move_callf(Y,A,P,eam(0)), Code, [code{instr:exit}|More], New, MoreT, NewT) ?- !,
993        MoreT = More,
994        New = [NewCode|NewT],
995	true,
996        update_struct(code, instr:move_chain(Y,A,P), Code, NewCode).
997
998simplify(call(P,eam(0)), Code, [code{instr:Instr}|More], New, MoreT, NewT) ?- !,
999        MoreT = More,
1000	New = [NewCode|NewT],
1001        % body goals order rearranged here to avoid old compiler bug
1002        update_struct(code, instr:NewInstr, Code, NewCode),
1003	simplify_call(P, Instr, NewInstr).
1004
1005/*simplify(cut(y(1),_N), [exit|More], New) ?- !,
1006        New = [exitc|More].
1007*/
1008simplify(savecut(AY), Code, [code{instr:cut(AY)}|More], New, MoreT, NewT) ?- !,
1009        % remove cut(..) and allow savecut(..) to be examined again for further simplifications
1010        MoreT = [Code|More],
1011        New = NewT.
1012
1013simplify(savecut(AY), Code, [code{instr:cut(AY,_)}|More], New, MoreT, NewT) ?- !,
1014        % remove cut(..) and allow savecut(..) to be examined again for further simplifications
1015        MoreT = [Code|More],
1016        New = NewT.
1017
1018simplify(savecut(_), _, More, New, MoreT, NewT) ?- !,
1019        More = [code{instr:Instr}|_],
1020        New = NewT,
1021        More = MoreT,
1022        unconditional_transfer_out(Instr).
1023
1024simplify(cut(A), Code, [code{instr:cut(A)}|More], New, MoreT, NewT) ?- !,
1025        New = [Code|NewT],
1026        More = MoreT.
1027
1028simplify(cut(AY,E), Code, [code{instr:cut(AY,E)}|More], New, MoreT, NewT) ?- !,
1029        New = [Code|NewT],
1030        More = MoreT.
1031
1032/*simplify(push_structure(B), [write_did(F/A)|More], New) ?- !,
1033        B is A + 1,
1034        New = [write_structure(F/A)|More].
1035*/
1036simplify(allocate(N), _, [code{instr:move(a(I),y(J)),regs:Regs}|More], New, MoreT, NewT) ?- !,
1037        More = MoreT,
1038        New = [code{instr:get_variable(N, a(I), y(J)),regs:Regs}|NewT].
1039
1040simplify(allocate(N), _, [code{instr:chain(P)}|Rest], New, RestT, NewT) ?- !,
1041	verify N==0,
1042        New = [code{instr:jmp(P)}|NewT],
1043        RestT = Rest.
1044
1045simplify(space(N), _, [code{instr:branch(L)}|More], New, MoreT, NewT) ?- !,
1046        More = MoreT,
1047        New = [code{instr:branchs(N,L)}|NewT].
1048
1049simplify(space(N), _, [code{instr:exit}|More], New, MoreT, NewT) ?- !,
1050        More = MoreT,
1051        New = [code{instr:exits(N)}|NewT].
1052/*        
1053simplify(space(N), _, [code{instr:jmpd(L)}|More], New, MoreT, NewT) ?- !,
1054        More = MoreT,
1055        New = [code{instr:jmpd(N,L)}|NewT].
1056*/
1057	% the code generator compiles attribute unification as if it were
1058	% unifying a meta/N structure. Since attribute_name->slot mapping
1059	% can change between sessions, we transform sequences like
1060	%	read_attribute suspend		(where suspend->1)
1061	%	read_void*			(N times)
1062	%	read_xxx			(match actual attribute)
1063	% into
1064	%	read_attribute name		(where name->N)
1065	%	read_xxx			(match actual attribute)
1066	% to make the code session-independent. Note that this cannot cope
1067	% with multiple attributes being matched at once. This restriction
1068	% also exists in the old compiler; lifting it requires a different
1069	% compilation scheme with probably new instructions.
1070simplify(read_attribute(FirstName), _, More0, New, MoreT, NewT) ?-
1071	meta_index(FirstName, I0),
1072	count_same_instr(More0, read_void, I0, I, MoreT),
1073	I > I0,
1074	!,
1075	( meta_index(Name, I) ->
1076	    New = [code{instr:read_attribute(Name)}|NewT]
1077	;
1078	    % as many or more read_voids than attributes
1079	    New = NewT
1080	).
1081
1082simplify(read_void, _, [code{instr:read_void}|Rest0], New, RestT, NewT) ?- !,
1083	count_same_instr(Rest0, read_void, 2, N, RestT),
1084	New = [code{instr:read_void(N)}|NewT].
1085
1086simplify(write_void, _, [code{instr:write_void}|Rest0], New, RestT, NewT) ?- !,
1087        count_same_instr(Rest0, write_void, 2, N, RestT),
1088        New = [code{instr:write_void(N)}|NewT]. 
1089
1090simplify(push_void, _, [code{instr:push_void}|Rest0], New, RestT, NewT) ?- !,
1091        count_same_instr(Rest0, push_void, 2, N, RestT),
1092        New = [code{instr:push_void(N)}|NewT]. 
1093
1094simplify(move(y(Y1),a(A1)), _, [AnnInstr0|Rest0], New, RestT, NewT) ?- 
1095        AnnInstr0 = code{instr:move(y(Y2),a(A2))}, !,
1096        ( A2 =:= A1 + 1, Y2 =:= Y1 + 1 ->
1097            % the arguments for the moves are consecutive
1098            extract_conargs_moves(Rest0, move(y(Y),a(A)), Y, A, Y1, A1, 2, N, RestT),
1099            New = [code{instr:move(N,y(Y1),a(A1))}|NewT]
1100        ;
1101            MoveInstrs = [move(y(Y1),a(A1))|MoveInstrs0],
1102            extract_nonconargs_moves(Rest0, move(y(_),a(_)), AnnInstr0, Y2, A2, MoveInstrs0, RestT),
1103            MoveInstrs0 \= [], % no compact possible with single move.
1104            compact_moves(MoveInstrs, New, NewT)
1105        ).
1106
1107simplify(move(a(A1),y(Y1)), _, [AnnInstr0|Rest0], New, RestT, NewT) ?- 
1108        AnnInstr0 = code{instr:move(a(A2),y(Y2))}, !,
1109        ( A2 =:= A1 + 1, Y2 =:= Y1 + 1 ->
1110            % the arguments for the moves are consecutive
1111            extract_conargs_moves(Rest0, move(a(A),y(Y)), A, Y, A1, Y1, 2, N, RestT),
1112            New = [code{instr:move(N,a(A1),y(Y1))}|NewT]
1113        ;
1114            MoveInstrs = [move(a(A1),y(Y1))|MoveInstrs0],
1115            extract_nonconargs_moves(Rest0, move(a(_),y(_)), AnnInstr0, A2, Y2, MoveInstrs0, RestT),
1116            MoveInstrs0 \= [], % no compact possible with single move
1117            compact_moves(MoveInstrs, New, NewT)
1118        ).
1119
1120simplify(move(y(Y),a(A)), _, [code{instr:callf(P,EAM)}|Rest0], New, RestT, NewT) ?- !,
1121        New = [code{instr:move_callf(y(Y),a(A),P,EAM)}|NewT],
1122        RestT = Rest0.
1123
1124simplify(move(y(Y),a(A)), _, [code{instr:chain(P)}|Rest], New, RestT, NewT) ?- !,
1125        New = [code{instr:move_chain(y(Y),a(A),P)}|NewT],
1126        RestT = Rest.
1127
1128simplify(put_global_variable(a(A),y(Y)), _, [code{instr:callf(P,EAM)}|Rest0], New, RestT, NewT) ?- !,
1129        New = [code{instr:put_global_variable_callf(a(A),y(Y),P,EAM)}|NewT],
1130        RestT = Rest0.
1131
1132simplify(move(a(A1),a(A2)), Code, Rest, New, RestT, NewT) ?- !,
1133        Code = code{regs:Regs},
1134        extract_moveaas(Rest, Moves, RegInfos, RestT), 
1135	Moves \= [],
1136        simplify_moveaas([A1>A2|Moves], [Regs|RegInfos], New, NewT).
1137
1138simplify(move(y(Y1),y(Y2)), _, [code{instr:move(y(Y3),y(Y4))}|Rest], New, RestT, NewT) ?- !,
1139        ( Rest = [code{instr:move(y(Y5),y(Y6))}|Rest0] ->
1140            New = [code{instr:move(y(Y1),y(Y2),y(Y3),y(Y4),y(Y5),y(Y6))}|NewT],
1141            Rest0 = RestT
1142        ;
1143            New = [code{instr:move(y(Y1),y(Y2),y(Y3),y(Y4))}|NewT],
1144            Rest = RestT
1145        ).
1146
1147simplify(read_variable(a(A1)), _, [code{instr:read_variable(a(A2))}|Rest], New, RestT, NewT) ?- !,
1148        New = [code{instr:read_variable2(a(A1),a(A2))}|NewT],
1149        RestT = Rest.
1150
1151simplify(read_variable(a(A1)), _, [code{instr:read_variable(y(Y2))}|Rest], New, RestT, NewT) ?- !,
1152        New = [code{instr:read_variable2(a(A1),y(Y2))}|NewT],
1153        RestT = Rest.
1154
1155simplify(write_variable(a(A1)), _, [code{instr:write_variable(a(A2))}|Rest], New, RestT, NewT) ?- !,
1156        New = [code{instr:write_variable2(a(A1),a(A2))}|NewT],
1157        RestT = Rest.
1158
1159simplify(push_variable(a(A1)), _, [code{instr:push_variable(a(A2))}|Rest], New, RestT, NewT) ?- !,
1160        New = [code{instr:write_variable2(a(A1),a(A2))}|NewT],
1161        RestT = Rest.
1162
1163simplify(write_variable(a(A1)), _, [code{instr:write_variable(y(Y2))}|Rest], New, RestT, NewT) ?- !,
1164        New = [code{instr:write_variable2(a(A1),y(Y2))}|NewT],
1165        RestT = Rest.
1166
1167simplify(read_variable(y(Y1)), _, [code{instr:read_variable(y(Y2))}|Rest], New, RestT, NewT) ?- !,
1168        New = [code{instr:read_variable2(y(Y1),y(Y2))}|NewT],
1169        RestT = Rest.
1170
1171simplify(write_variable(y(Y1)), _, [code{instr:write_variable(y(Y2))}|Rest], New, RestT, NewT) ?- !,
1172        New = [code{instr:write_variable2(y(Y1),y(Y2))}|NewT],
1173        RestT = Rest.
1174
1175simplify(push_variable(y(Y1)), _, [code{instr:push_variable(y(Y2))}|Rest], New, RestT, NewT) ?- !,
1176        New = [code{instr:write_variable2(y(Y1),y(Y2))}|NewT],
1177        RestT = Rest.
1178
1179simplify(write_local_value(a(A1)), _, [code{instr:write_local_value(a(A2))}|Rest], New, RestT, NewT) ?- !,
1180        New = [code{instr:write_local_value2(a(A1),a(A2))}|NewT],
1181        RestT = Rest.
1182
1183simplify(write_local_value(y(Y1)), _, [code{instr:write_local_value(y(Y2))}|Rest], New, RestT, NewT) ?- !,
1184        New = [code{instr:write_local_value2(y(Y1),y(Y2))}|NewT],
1185        RestT = Rest.
1186
1187simplify(push_local_value(a(A1)), _, [code{instr:push_local_value(a(A2))}|Rest], New, RestT, NewT) ?- !,
1188        New = [code{instr:push_local_value2(a(A1),a(A2))}|NewT],
1189        RestT = Rest.
1190
1191simplify(push_local_value(y(Y1)), _, [code{instr:push_local_value(y(Y2))}|Rest], New, RestT, NewT) ?- !,
1192        New = [code{instr:push_local_value2(y(Y1),y(Y2))}|NewT],
1193        RestT = Rest.
1194
1195simplify(put_global_variable(a(A1),y(Y1)), _, [code{instr:put_global_variable(a(A2),y(Y2))}|Rest], New, RestT, NewT) ?- !,
1196        New = [code{instr:put_global_variable2(a(A1),y(Y1),a(A2),y(Y2))}|NewT],
1197        RestT = Rest.
1198
1199simplify(put_variable(a(A1)), _, [code{instr:put_variable(a(A2))}|Rest], New, RestT, NewT) ?- !,
1200        New = [code{instr:put_variable2(a(A1),a(A2))}|NewT],
1201        RestT = Rest.
1202
1203simplify(write_integer(C1), _, [code{instr:write_integer(C2)}|Rest], New, RestT, NewT) ?- !,
1204        New = [code{instr:write_integer2(C1,C2)}|NewT],
1205        RestT = Rest.
1206
1207/* push_integer = write_integer in emu.c */
1208simplify(push_integer(C1), _, [code{instr:push_integer(C2)}|Rest], New, RestT, NewT) ?- !,
1209        New = [code{instr:write_integer2(C1,C2)}|NewT],
1210        RestT = Rest.
1211
1212simplify(write_atom(C1), _, [code{instr:write_atom(C2)}|Rest], New, RestT, NewT) ?- !,
1213        New = [code{instr:write_atom2(C1,C2)}|NewT],
1214        RestT = Rest.
1215
1216simplify(write_atom(C1), _, [code{instr:write_did(C2)}|Rest], New, RestT, NewT) ?- !,
1217        New = [code{instr:write_atomdid(C1,C2)}|NewT],
1218        RestT = Rest.
1219
1220simplify(write_did(C1), _, [code{instr:write_did(C2)}|Rest], New, RestT, NewT) ?- !,
1221        New = [code{instr:write_did2(C1,C2)}|NewT],
1222        RestT = Rest.
1223
1224simplify(write_did(C1), _, [code{instr:write_atom(C2)}|Rest], New, RestT, NewT) ?- !,
1225        New = [code{instr:write_didatom(C1,C2)}|NewT],
1226        RestT = Rest.
1227
1228simplify(write_atom(C1), _, [code{instr:write_integer(C2)}|Rest], New, RestT, NewT) ?- !,
1229        New = [code{instr:write_atominteger(C1,C2)}|NewT],
1230        RestT = Rest.
1231
1232simplify(write_did(C1), _, [code{instr:write_integer(C2)}|Rest], New, RestT, NewT) ?- !,
1233        New = [code{instr:write_didinteger(C1,C2)}|NewT],
1234        RestT = Rest.
1235
1236% broken instruction
1237%simplify(read_atom(C1), _, [code{instr:read_integer(C2)}|Rest], New, RestT, NewT) ?- !,
1238%        New = [code{instr:read_atominteger(C1,C2)}|NewT],
1239%        RestT = Rest.
1240
1241simplify(write_integer(C1), _, [code{instr:write_atom(C2)}|Rest], New, RestT, NewT) ?- !,
1242        New = [code{instr:write_integeratom(C1,C2)}|NewT],
1243        RestT = Rest.
1244
1245simplify(write_integer(C1), _, [code{instr:write_did(C2)}|Rest], New, RestT, NewT) ?- !,
1246        New = [code{instr:write_integerdid(C1,C2)}|NewT],
1247        RestT = Rest.
1248
1249% broken instruction
1250%simplify(read_integer(C1), _, [code{instr:read_atom(C2)}|Rest], New, RestT, NewT) ?- !,
1251%        New = [code{instr:read_integeratom(C1,C2)}|NewT],
1252%        RestT = Rest.
1253
1254
1255/* extract consecutive move a(_) a(_) insturctions for further optimisation 
1256   MoveRegs is a list of the arg register pairs for each move instruction,
1257   and RegInfos is a list of the corresponding regs field for the instruction 
1258*/ 
1259:- mode extract_moveaas(+, -, -, -).
1260extract_moveaas([code{instr:move(a(A1),a(A2)),regs:RegI}|Rest], MoveRegs, RegInfos, RestT) ?- !,
1261        MoveRegs = [A1>A2|MoveRegs1], % use '>' to suggest move direction
1262        RegInfos = [RegI|RegInfos1],
1263        extract_moveaas(Rest, MoveRegs1, RegInfos1, RestT).
1264extract_moveaas(Rest, [], [], Rest).
1265
1266/* simplify the move a(_) a(_) sequence by
1267   1. extracting sequences of move a(_) a(_) which are shifting the value between a
1268      chain of registers (e.g A1<-A2...<-An). The move instr may be non-consecutive, 
1269      effectively rearranging the order of the moves: move a(Source) a(Dest) can be
1270      done earlier as long as the intervening moves does not:
1271         a) overwrite Source (i.e. Source register is not a destination for intervening moves)
1272         b) use the contents of Dest (i.e. Dest register is not a source for intervening moves)
1273      A chain is represented as a list [a(A1),a(A2)...] which represents the chain A1<-A2...
1274   2. convert these chains to the following instructions, in order of preference:
1275         a) rotate type instruction  A1<-A2...<-A1
1276         b) shift type instruction A1<-A2 ...<-An
1277         c) multiple move instruction (non-chained moves) 
1278*/
1279simplify_moveaas(Regs, RegInfos, New, NewT) :-
1280        extract_reg_chains(Regs, RegInfos, Chains, [], ChainInfos, []),
1281        convert_chains_to_instrs(Chains, ChainInfos, 0, [], New, NewT).
1282
1283extract_reg_chains([], [], Chains, ChainsT, ChainInfos, ChainInfosT) ?- 
1284        Chains = ChainsT,
1285        ChainInfos = ChainInfosT.
1286extract_reg_chains([A1>A2|Regs0], [[A1Info,A2Info]|RegInfos0], Chains, ChainsT, ChainInfos, ChainInfosT) :-
1287        ( find_reg_chain(A1, A1Info, Regs0, RegInfos0, [], [], Regs1, RegInfos1, Chained1, CInfo1),
1288          Chained1 \= [_] % no chain found if there is only  one element 
1289        ->
1290        
1291            Chains =[[a(A2)|Chained1]|Chains1],
1292            ChainInfos = [[A2Info|CInfo1]|ChainInfos1]
1293            
1294        ;
1295            Chains = [[a(A1),a(A2)]|Chains1],
1296            ChainInfos = [[A1Info,A2Info]|ChainInfos1],
1297            RegInfos0 = RegInfos1,
1298            Regs0 = Regs1
1299        ),
1300        % try find more chains in remaining move instructions
1301        extract_reg_chains(Regs1, RegInfos1, Chains1, ChainsT, ChainInfos1, ChainInfosT).
1302
1303find_reg_chain(S, SInfo, [], [], _UnmovedSs, _UnmovedDs, RegsOut, RInfosOut, Chained, CInfo) ?- !,
1304        RegsOut = [],
1305        RInfosOut = [],
1306        Chained = [a(S)],
1307        CInfo = [SInfo].
1308find_reg_chain(S0, S0Info, [RegPair|Regs1], [RPairInfo|RInfos1], UnmovedSs0, UnmovedDs0, 
1309               RegsOut, RInfosOut, Chained, CInfo) :-
1310        RegPair = (S1>D1),
1311        RPairInfo = [S1Info,D1Info],
1312        (  D1 == S0  ->
1313            % S1>D1 match for chain, can it be added to chain?
1314            ( nonmember(D1, UnmovedSs0), % not a source in intervening moves
1315              nonmember(S1, UnmovedDs0),  % not a destination in intervening moves
1316              check_source_reg_info(S1, S1Info, UnmovedSs0)
1317            ->
1318                % add to chain
1319                Chained = [a(D1)|Chained1],
1320                CInfo = [D1Info|CInfo1],
1321                find_reg_chain(S1, S1Info, Regs1, RInfos1, UnmovedSs0,
1322                               UnmovedDs0, RegsOut, RInfosOut, Chained1, CInfo1)
1323            ;
1324                % intervening moves prevent S1>D1 to be part of the chain,
1325                % stop now
1326                RegsOut = [],
1327                RInfosOut = [],
1328                Chained = [a(S0)],
1329                CInfo = [S0Info]
1330                
1331            )
1332        ;
1333            % S1>D1 does not match for current chain. Try matching with
1334            % subsequent moves
1335            RegsOut = [RegPair|RegsOut1],
1336            RInfosOut = [RPairInfo|RInfosOut1],
1337            find_reg_chain(S0, S0Info, Regs1, RInfos1, [S1|UnmovedSs0], [D1|UnmovedDs0], 
1338                           RegsOut1, RInfosOut1, Chained, CInfo)
1339        ).
1340        
1341    % make sure that the source register information is still correct if
1342    % the move instruction is added to a chain: if it is the last use of
1343    % the register, it should not be moved before an earlier use of the
1344    % register as a source. [alternative: update the reg info instead] 
1345    check_source_reg_info(S, SInfo, UnmovedSs) :-
1346        ( SInfo = r(_,_,_,IsLast), IsLast == last ->
1347            nonmember(S, UnmovedSs) 
1348         ; 
1349            true
1350        ).
1351       
1352
1353
1354convert_chains_to_instrs([], [], NMoves, MoveRegs, New, NewT) ?- 
1355        (NMoves > 0 ->
1356            combine_moves(MoveRegs, Instr),
1357            New = [code{instr:Instr}|NewT]
1358        ;
1359            New = NewT
1360        ).
1361convert_chains_to_instrs([Chain|Rest], [CInfo|ChainsInfo], NMoves, MoveRegs, New, NewT) ?-
1362        length(Chain, L),
1363        ( L == 2 ->
1364            /* a move instr */
1365            ( NMoves =:= maxmoveaas -> /* maxmoveaas must be > 0! */
1366                combine_moves(MoveRegs, MoveInstr),
1367                New = [code{instr:MoveInstr}|New1],
1368                NMoves1 = 1,
1369                MoveRegs1 = Chain
1370            ;
1371                New1 = New,
1372                NMoves1 is NMoves + 1,
1373                append(MoveRegs, Chain, MoveRegs1)
1374            ),
1375            convert_chains_to_instrs(Rest, ChainsInfo, NMoves1, MoveRegs1, New1, NewT)
1376        
1377        ;   /* a shift instruction */
1378            ( NMoves > 0 ->
1379                /* generate previously accumulated move instr */
1380                combine_moves(MoveRegs, MoveInstr),
1381                New = [code{instr:MoveInstr}|New1]
1382            ;
1383                New1 = New
1384            ),
1385            ( L =< maxshift -> /* maxshift must be > 2 */
1386                ( L == 4, 
1387                  Chain = [T,A1,A2,T],
1388                  CInfo = [_,_,_,TInfo],
1389                  TInfo = r(_,_,_,IsLast),
1390                  IsLast == last
1391                ->
1392                   New1 = [code{instr:swap(A1,A2)}|New2]
1393                ; L == 5, 
1394                  Chain = [T,A1,A2,A3,T],
1395                  CInfo = [_,_,_,_,TInfo],
1396                  TInfo = r(_,_,_,IsLast),
1397                  IsLast == last
1398                ->
1399                   New1 = [code{instr:rot(A1,A2,A3)}|New2]
1400                ;
1401                   ShiftInstr =.. [shift|Chain],
1402                   New1 = [code{instr:ShiftInstr}|New2]
1403                )
1404
1405            ;
1406                split_shift_instrs(Chain, L, New1, New2)
1407            ),
1408            convert_chains_to_instrs(Rest, ChainsInfo, 0, [], New2, NewT)
1409        ).
1410
1411combine_moves(MoveRegs, Instr) :-
1412        Instr =.. [move|MoveRegs].
1413
1414split_shift_instrs(Chain, L, New, NewT) :-
1415        maxshift(Max),
1416        split_chain(L, Max, Chain, New, NewT).
1417
1418split_chain(Len, Max, Chain, New, NewT) :-
1419        ( Len == 2 ->
1420            /* 2 arguments - move instr, argument order reversed */
1421            Chain = [A1,A2],
1422            New = [code{instr:move(A2,A1)}|NewT]
1423        ; Len =< Max ->
1424            Instr =.. [shift|Chain],
1425            New = [code{instr:Instr}|NewT]
1426        ; 
1427            get_subchain(Max, Chain, SubChain, RestChain),
1428            Instr =.. [shift|SubChain],
1429            New = [code{instr:Instr}|New1],
1430            Len1 is Len - Max + 1,
1431            split_chain(Len1, Max, RestChain, New1, NewT)
1432        ).
1433
1434get_subchain(1, List0, SubT, RestList) :- !, 
1435        List0 = RestList,
1436        List0 = [E|_],
1437        SubT = [E].
1438get_subchain(N, [E|List0], SubT, RestList) :-
1439        SubT = [E|SubT0],
1440        N0 is N - 1,
1441        get_subchain(N0, List0, SubT0, RestList).
1442
1443maxmoveaas(3).  /* maximum number of non-related move a(_) a(_) than can be combined */
1444maxshift(5).    /* maximum number of arguments in a shift instruction */
1445
1446% extract a sequence of move instructions of the same type whose argument
1447% refers to consecutive registers. The number of such move instructions, N,
1448% is returned
1449extract_conargs_moves(Codes, Instr, X, Y, X0, Y0, N0, N, Rest) :-
1450        ( \+ \+ (Codes = [code{instr:Instr}|_], X0 + N0 =:= X, Y0 + N0 =:= Y) 
1451        ->
1452            Codes = [_|Codes1],
1453            N1 is N0+1,
1454            extract_conargs_moves(Codes1, Instr, X, Y, X0, Y0, N1, N, Rest)
1455        ;
1456            N = N0, 
1457            Rest = Codes
1458        ).
1459
1460% extract a sequence of move instructions of type Template whose arguments
1461% are not consectuve, starting with AnnInstr0. Can return an empty sequence
1462extract_nonconargs_moves(Codes0, Template, AnnInstr0, X0, Y0, MoveInstrs1, Codes) :-
1463        AnnInstr0 = code{instr:Instr0},
1464        ( Codes0 = [AnnInstr1|Codes1],
1465          AnnInstr1 = code{instr:Instr1},
1466          \+ \+ Instr1 = Template ->
1467            arg([1,1], Instr1, X1),
1468            arg([2,1], Instr1, Y1),
1469            ( X1 =:= X0 + 1,
1470              Y1 =:= Y0 + 1 ->
1471                MoveInstrs1 = [],
1472                Codes = [AnnInstr0|Codes0]
1473            ;
1474                MoveInstrs1 = [Instr0|MoveInstrs2], 
1475                extract_nonconargs_moves(Codes1, Template, AnnInstr1, X1, Y1,
1476                                         MoveInstrs2, Codes)
1477            )
1478        ;
1479            MoveInstrs1 = [Instr0],
1480            Codes = Codes0
1481        ).
1482
1483:- mode compact_moves(+,-,-).
1484compact_moves([], Tail, Tail).
1485compact_moves([Instr1,Instr2,Instr3|Rest], [code{instr:move(X1,Y1,X2,Y2,X3,Y3)}|CRest1],
1486              CRest) :-
1487        !,
1488        Instr1 =.. [_,X1,Y1],
1489        Instr2 =.. [_,X2,Y2],
1490        Instr3 =.. [_,X3,Y3],
1491        compact_moves(Rest, CRest1, CRest).
1492compact_moves([Instr1,Instr2], [code{instr:move(X1,Y1,X2,Y2)}|CRest], CRest) :-
1493        !,
1494        Instr1 =.. [_,X1,Y1],
1495        Instr2 =.. [_,X2,Y2].
1496compact_moves([Instr], [code{instr:Instr}|CRest], CRest).
1497
1498
1499
1500count_same_instr(Codes, Instr, N0, N, Rest) :-
1501    	( Codes = [code{instr:Instr}|Codes1] ->
1502	    N1 is N0+1,
1503	    count_same_instr(Codes1, Instr, N1, N, Rest) 
1504	;
1505	    Rest = Codes, N = N0
1506	).
1507
1508:- mode simplify_call(+,+,-).
1509simplify_call(P, ret, jmp(P)).
1510simplify_call(P, exit, chain(P)).
1511
1512
1513%----------------------------------------------------------------------
1514end_of_file.
1515%----------------------------------------------------------------------
1516
1517
1518
1519
1520Requirements
1521------------
1522
1523Process and simplify a WAM code list.  The main problems are:
1524
1525    - how to substitute patterns that are not consecutive,
1526	i.e. contain jumps
1527
1528    - how to make sure that all new substitutions opportunities arising
1529    	from performed substitutions are found
1530
1531    - how to detect unreachable labels
1532
1533It might be useful to transform the code sequence into a graph and work on
1534that. Read up on some implementation techniques.
1535
1536
1537
1538Sample substitution patterns:
1539-----------------------------
1540
1541Pattern 1:	(eliminate instr)
1542
1543	nop
1544
1545    -> replace with nothing
1546
1547
1548Pattern 1a:
1549
1550    	move X X
1551
1552    -> replace with nothing
1553
1554Pattern 1b:
1555
1556    	branch lab
1557    otherlab:
1558    lab:
1559	...
1560
1561    ->
1562    otherlab:
1563    lab:
1564	...
1565
1566
1567Pattern 2:	(merge instr sequence)
1568
1569	move(B,A)
1570	move(C,B)
1571    ->
1572    	shift(A,B,C)
1573
1574	move(Yi,Aj)
1575	move(Yk,Al)
1576    ->
1577    	move(Yi,Aj,Yk,Al)
1578
1579Pattern 2a:
1580
1581	call	P N
1582	ret
1583    ->
1584    	jmp 	P
1585
1586
1587Pattern 3:	(merge broken instr sequence)
1588
1589	call	P N
1590	branch	l1
1591	...
1592    l1:
1593	ret
1594    ->
1595	jmp	P
1596	...
1597    l1:			(might now be unreachable)
1598	ret
1599
1600
1601Pattern 4:	(eliminate unreachable code)
1602
1603	...a...
1604	branch/jmp
1605    l1:			(not a jump target)
1606	...b...
1607    l2:
1608	...c...
1609    ->
1610	...a...
1611	branch/jmp
1612    l2:
1613	...c...
1614
1615
1616Pattern 5:	(skip subsumed instruction)
1617
1618	Atom_switch A1 [a->alab, b->blab]
1619
1620	...
1621    alab:
1622	Get_atom A1 a
1623	...
1624    blab:
1625	Get_atom A1 b
1626	...
1627
1628    -> change the Atom_switch to jump beyond the Get_atom instruction directly.
1629
1630
1631Pattern 5a:	(skip subsumed instruction)
1632
1633	List_switch A1 llab ...
1634	...
1635    llab:
1636	Get_list A1 rlab
1637	...
1638    rlab:
1639	Read_xxx
1640
1641    -> Here the List_switch should be changed to jump directly to rlab.
1642
1643Pattern 5a:    (skip subsumed instruction)
1644
1645	get_variable n An Ym
1646	switch_on_type Ym meta:mlab 
1647
1648     mlab:
1649        move Ym An
1650        ...
1651
1652     -> change the meta:mlab to meta:lab where lab is after move Ym An
1653
1654        get_variable n An Ym
1655	list_switch Ym ref(llab) ref(nlab) ...
1656
1657     nlab:
1658        move Ym An
1659        in_get_nil An
1660        ...
1661
1662     -> change to:
1663
1664        get_variable n An Ym
1665	list_switch An ref(lab) ref(nlab) ...
1666
1667     nlab:
1668        move Ym An
1669        in_get_nil An
1670     lab:
1671        ...
1672
1673Pattern 5a:	(redirect to shared code)
1674
1675	List_switch A1 llab ...
1676	...
1677    llab:
1678	Failure
1679
1680    -> Here the List_switch should be changed to jump directly to the
1681    	global fail label.
1682
1683
1684
1685
1686
1687Remove Res instruction when an event-triggering instruction follows
1688before failure can occur (but probably better done earlier):
1689
1690    Res,...,Call
1691    Res,...,Metacall
1692    Res,...,Jmp
1693    Res,...,Chain
1694    Res,...,Ret
1695    Res,...,Exit
1696
1697
1698Various Patterns:
1699
1700
1701    savecut(a(A)),cut(a(A))	-->	savecut(a(A))
1702    savecut(..), <transfer out> -->     <transfer out> unsafe for calls
1703                                        
1704    read_void,read_void+	-->	read_void N
1705
1706    write_void,write_void+	-->	write_void N
1707                                        
1708    allocate n, move Ai,Yj      -->     get_variable(n,Ai,Yj)
1709                                        
1710    space n, branch L           -->     branchs n,L
1711    space n, jmpd L             -->     jmpd n, L
1712
1713Patterns that are not safe to optimise:
1714                                      
1715    push_structure(N+1),write_did(F/N)  --> write_structure(F/N)
1716    because the push_structure and write_did may refer to different structs                                        
1717    cut(y(1),N), exit		-->	exitc 
1718    because cut(...) may be local cut (not the whole cluase)                                  
1719