1------------------------------------------------------------------------------
2--                                                                          --
3--                         GNAT COMPILER COMPONENTS                         --
4--                                                                          --
5--                             E X P _ U N S T                              --
6--                                                                          --
7--                                 S p e c                                  --
8--                                                                          --
9--          Copyright (C) 2014-2015, Free Software Foundation, Inc.         --
10--                                                                          --
11-- GNAT is free software;  you can  redistribute it  and/or modify it under --
12-- terms of the  GNU General Public License as published  by the Free Soft- --
13-- ware  Foundation;  either version 3,  or (at your option) any later ver- --
14-- sion.  GNAT is distributed in the hope that it will be useful, but WITH- --
15-- OUT ANY WARRANTY;  without even the  implied warranty of MERCHANTABILITY --
16-- or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License --
17-- for  more details.  You should have  received  a copy of the GNU General --
18-- Public License  distributed with GNAT; see file COPYING3.  If not, go to --
19-- http://www.gnu.org/licenses for a complete copy of the license.          --
20--                                                                          --
21-- GNAT was originally developed  by the GNAT team at  New York University. --
22-- Extensive contributions were provided by Ada Core Technologies Inc.      --
23--                                                                          --
24------------------------------------------------------------------------------
25
26--  Expand routines for unnesting subprograms
27
28with Types; use Types;
29
30package Exp_Unst is
31
32   --  -----------------
33   --  -- The Problem --
34   --  -----------------
35
36   --  Normally, nested subprograms in the source result in corresponding
37   --  nested subprograms in the resulting tree. We then expect the back end
38   --  to handle such nested subprograms, including all cases of uplevel
39   --  references. For example, the GCC back end can do this relatively easily
40   --  since GNU C (as an extension) allows nested functions with uplevel
41   --  references, and implements an appropriate static chain approach to
42   --  dealing with such uplevel references.
43
44   --  However, we also want to be able to interface with back ends that do
45   --  not easily handle such uplevel references. One example is the back end
46   --  that translates the tree into standard C source code. In the future,
47   --  other back ends might need the same capability (e.g. a back end that
48   --  generated LLVM intermediate code).
49
50   --  We could imagine simply handling such references in the appropriate
51   --  back end. For example the back end that generates C could recognize
52   --  nested subprograms and rig up some way of translating them, e.g. by
53   --  making a static-link source level visible.
54
55   --  Rather than take that approach, we prefer to do a semantics-preserving
56   --  transformation on the GNAT tree, that eliminates the problem before we
57   --  hand the tree over to the back end. There are two reasons for preferring
58   --  this approach:
59
60   --     First: the work needs only to be done once for all affected back ends
61   --     and we can remain within the semantics of the tree. The front end is
62   --     full of tree transformations, so we have all the infrastructure for
63   --     doing transformations of this type.
64
65   --     Second: given that the transformation will be semantics-preserving,
66   --     we can still used the standard GCC back end to build code from it.
67   --     This means we can easily run our full test suite to verify that the
68   --     transformations are indeed semantics preserving. It is a lot more
69   --     work to thoroughly test the output of specialized back ends.
70
71   --  Looking at the problem, we have three situations to deal with. Note
72   --  that in these examples, we use all lower case, since that is the way
73   --  the internal tree is cased.
74
75   --     First, cases where there are no uplevel references, for example
76
77   --       procedure case1 is
78   --          function max (m, n : Integer) return integer is
79   --          begin
80   --             return integer'max (m, n);
81   --          end max;
82   --          ...
83   --       end case1;
84
85   --     Second, cases where there are explicit uplevel references.
86
87   --       procedure case2 (b : integer) is
88   --          procedure Inner (bb : integer);
89   --
90   --          procedure inner2 is
91   --          begin
92   --            inner(5);
93   --          end;
94   --
95   --          x  : integer := 77;
96   --          y  : constant integer := 15 * 16;
97   --          rv : integer := 10;
98   --
99   --          procedure inner (bb : integer) is
100   --          begin
101   --             x := rv + y + bb + b;
102   --          end;
103   --
104   --       begin
105   --          inner2;
106   --       end case2;
107
108   --     In this second example, B, X, RV are uplevel referenced. Y is not
109   --     considered as an uplevel reference since it is a static constant
110   --     where references are replaced by the value at compile time.
111
112   --   Third, cases where there are implicit uplevel references via types
113   --   whose bounds depend on locally declared constants or variables:
114
115   --       function case3 (x, y : integer) return boolean is
116   --          subtype dynam is integer range x .. y + 3;
117   --          subtype static is integer range 42 .. 73;
118   --          xx : dynam := y;
119   --
120   --          type darr is array (dynam) of Integer;
121   --          type darec is record
122   --             A : darr;
123   --             B : integer;
124   --          end record;
125   --          darecv : darec;
126   --
127   --          function inner (b : integer) return boolean is
128   --          begin
129   --            return b in dynam and then darecv.b in static;
130   --          end inner;
131   --
132   --       begin
133   --         return inner (42) and then inner (xx * 3 - y * 2);
134   --       end case3;
135   --
136   --     In this third example, the membership test implicitly references the
137   --     the bounds of Dynam, which both involve uplevel references.
138
139   --  ------------------
140   --  -- The Solution --
141   --  ------------------
142
143   --  Looking at the three cases above, the first case poses no problem at
144   --  all. Indeed the subprogram could have been declared at the outer level
145   --  (perhaps changing the name). But this style is quite common as a way
146   --  of limiting the scope of a local procedure called only within the outer
147   --  procedure. We could move it to the outer level (with a name change if
148   --  needed), but we don't bother. We leave it nested, and the back end just
149   --  translates it as though it were not nested.
150
151   --  In general we leave nested procedures nested, rather than trying to move
152   --  them to the outer level (the back end may do that, e.g. as part of the
153   --  translation to C, but we don't do it in the tree itself). This saves a
154   --  LOT of trouble in terms of visibility and semantics.
155
156   --  But of course we have to deal with the uplevel references. The idea is
157   --  to rewrite these nested subprograms so that they no longer have any such
158   --  uplevel references, so by the time they reach the back end, they all are
159   --  case 1 (no uplevel references) and thus easily handled.
160
161   --  To deal with explicit uplevel references (case 2 above), we proceed with
162   --  the following steps:
163
164   --    All entities marked as being uplevel referenced are marked as aliased
165   --    since they will be accessed indirectly via an activation record as
166   --    described below.
167
168   --    An activation record is created containing system address values
169   --    for each uplevel referenced entity in a given scope. In the example
170   --    given before, we would have:
171
172   --      type AREC1T is record
173   --         b  : Address;
174   --         x  : Address;
175   --         rv : Address;
176   --      end record;
177
178   --      AREC1 : aliased AREC1T;
179
180   --      type AREC1PT is access all AREC1T;
181   --      AREC1P : constant AREC1PT := AREC1'Access;
182
183   --   The fields of AREC1 are set at the point the corresponding entity
184   --   is declared (immediately for parameters).
185
186   --   Note: the 1 in all these names represents the fact that we are at the
187   --   outer level of nesting. As we will see later, deeper levels of nesting
188   --   will use AREC2, AREC3, ...
189
190   --   For all subprograms nested immediately within the corresponding scope,
191   --   a parameter AREC1F is passed, and all calls to these routines have
192   --   AREC1P added as an additional formal.
193
194   --   Now within the nested procedures, any reference to an uplevel entity
195   --   xxx is replaced by Tnn!(AREC1.xxx).all (where ! represents a call
196   --   to unchecked conversion to convert the address to the access type
197   --   and Tnn is a locally declared type that is "access all t", where t
198   --   is the type of the reference).
199
200   --   Note: the reason that we use Address as the component type in the
201   --   declaration of AREC1T is that we may create this type before we see
202   --   the declaration of this type.
203
204   --   The following shows example 2 above after this translation:
205
206   --       procedure case2x (b : aliased Integer) is
207   --          type AREC1T is record
208   --             b  : Address;
209   --             x  : Address;
210   --             rv : Address;
211   --          end record;
212   --
213   --          AREC1 : aliased AREC1T;
214   --          type AREC1PT is access all AREC1T;
215   --          AREC1P : constant AREC1PT := AREC1'Access;
216   --
217   --          AREC1.b := b'Address;
218   --
219   --          procedure inner (bb : integer; AREC1F : AREC1PT);
220   --
221   --          procedure inner2 (AREC1F : AREC1PT) is
222   --          begin
223   --            inner(5, AREC1F);
224   --          end;
225   --
226   --          x  : aliased integer := 77;
227   --          AREC1.x := X'Address;
228   --
229   --          y  : constant Integer := 15 * 16;
230   --
231   --          rv : aliased Integer;
232   --          AREC1.rv := rv'Address;
233   --
234   --          procedure inner (bb : integer; AREC1F : AREC1PT) is
235   --          begin
236   --             type Tnn1 is access all Integer;
237   --             type Tnn2 is access all Integer;
238   --             type Tnn3 is access all Integer;
239   --             Tnn1!(AREC1F.x).all :=
240   --               Tnn2!(AREC1F.rv).all + y + b + Tnn3!(AREC1F.b).all;
241   --          end;
242   --
243   --       begin
244   --          inner2 (AREC1P);
245   --       end case2x;
246
247   --  And now the inner procedures INNER2 and INNER have no uplevel references
248   --  so they have been reduced to case 1, which is the case easily handled by
249   --  the back end. Note that the generated code is not strictly legal Ada
250   --  because of the assignments to AREC1 in the declarative sequence, but the
251   --  GNAT tree always allows such mixing of declarations and statements, so
252   --  the back end must be prepared to handle this in any case.
253
254   --  Case 3 where we have uplevel references to types is a bit more complex.
255   --  That would especially be the case if we did a full transformation that
256   --  completely eliminated such uplevel references as we did for case 2. But
257   --  instead of trying to do that, we rewrite the subprogram so that the code
258   --  generator can easily detect and deal with these uplevel type references.
259
260   --  First we distinguish two cases
261
262   --    Static types are one of the two following cases:
263
264   --      Discrete types whose bounds are known at compile time. This is not
265   --      quite the same as what is tested by Is_OK_Static_Subtype, in that
266   --      it allows compile time known values that are not static expressions.
267
268   --      Composite types, whose components are (recursively) static types.
269
270   --    Dynamic types are one of the two following cases:
271
272   --      Discrete types with at least one bound not known at compile time.
273
274   --      Composite types with at least one component that is (recursively)
275   --      a dynamic type.
276
277   --    Uplevel references to static types are not a problem, the front end
278   --    or the code generator fetches the bounds as required, and since they
279   --    are compile time known values, this value can just be extracted and
280   --    no actual uplevel reference is required.
281
282   --    Uplevel references to dynamic types are a potential problem, since
283   --    such references may involve an implicit access to a dynamic bound,
284   --    and this reference is an implicit uplevel access.
285
286   --    To fully unnest such references would be messy, since we would have
287   --    to create local copies of the dynamic types involved, so that the
288   --    front end or code generator could generate an explicit uplevel
289   --    reference to the bound involved. Rather than do that, we set things
290   --    up so that this situation can be easily detected and dealt with when
291   --    there is an implicit reference to the bounds.
292
293   --    What we do is to always generate a local constant for any dynamic
294   --    bound in a dynamic subtype xx with name xx_FIRST or xx_LAST. The one
295   --    case where we can skip this is where the bound is For
296   --    example in the third example above, subtype dynam is expanded as
297
298   --      dynam_LAST  : constant Integer := y + 3;
299   --      subtype dynam is integer range x .. dynam_LAST;
300
301   --    Now if type dynam is uplevel referenced (as it is this case), then
302   --    the bounds x and dynam_LAST are marked as uplevel references
303   --    so that appropriate entries are made in the activation record. Any
304   --    explicit reference to such a bound in the front end generated code
305   --    will be handled by the normal uplevel reference mechanism which we
306   --    described above for case 2. For implicit references by a back end
307   --    that needs to unnest things, any such implicit reference to one of
308   --    these bounds can be replaced by an appropriate reference to the entry
309   --    in the activation record for xx_FIRST or xx_LAST. Thus the back end
310   --    can eliminate the problematical uplevel reference without the need to
311   --    do the heavy tree modification to do that at the code expansion level
312
313   --  Looking at case 3 again, here is the normal -gnatG expanded code
314
315     --  function case3 (x : integer; y : integer) return boolean is
316     --     dynam_LAST : constant integer := y {+} 3;
317     --     subtype dynam is integer range x .. dynam_LAST;
318     --     subtype static is integer range 42 .. 73;
319     --
320     --     [constraint_error when
321     --       not (y in x .. dynam_LAST)
322     --       "range check failed"]
323     --
324     --     xx : dynam := y;
325     --
326     --     type darr is array (x .. dynam_LAST) of integer;
327     --     type darec is record
328     --        a : darr;
329     --        b : integer;
330     --     end record;
331     --     [type TdarrB is array (x .. dynam_LAST range <>) of integer]
332     --     freeze TdarrB []
333     --     darecv : darec;
334     --
335     --     function inner (b : integer) return boolean is
336     --     begin
337     --        return b in x .. dynam_LAST and then darecv.b in 42 .. 73;
338     --     end inner;
339     --  begin
340     --     return inner (42) and then inner (xx {*} 3 {-} y {*} 2);
341     --  end case3;
342
343   --  Note: the actual expanded code has fully qualified names so for
344   --  example function inner is actually function case3__inner. For now
345   --  we ignore that detail to clarify the examples.
346
347   --  Here we see that some of the bounds references are expanded by the
348   --  front end, so that we get explicit references to y or dynamLast. These
349   --  cases are handled by the normal uplevel reference mechanism described
350   --  above for case 2. This is the case for the constraint check for the
351   --  initialization of xx, and the range check in function inner.
352
353   --  But the reference darecv.b in the return statement of function
354   --  inner has an implicit reference to the bounds of dynam, since to
355   --  compute the location of b in the record, we need the length of a.
356
357   --  Here is the full translation of the third example:
358
359   --       function case3x (x, y : integer) return boolean is
360   --          type AREC1T is record
361   --             x          : Address;
362   --             dynam_LAST : Address;
363   --          end record;
364   --
365   --          AREC1 : aliased AREC1T;
366   --          type AREC1PT is access all AREC1T;
367   --          AREC1P : constant AREC1PT := AREC1'Access;
368   --
369   --          AREC1.x := x'Address;
370   --
371   --          dynam_LAST : constant integer := y {+} 3;
372   --          AREC1.dynam_LAST := dynam_LAST'Address;
373   --          subtype dynam is integer range x .. dynam_LAST;
374   --          xx : dynam := y;
375   --
376   --          [constraint_error when
377   --            not (y in x .. dynam_LAST)
378   --            "range check failed"]
379   --
380   --          subtype static is integer range 42 .. 73;
381   --
382   --          type darr is array (x .. dynam_LAST) of Integer;
383   --          type darec is record
384   --             A : darr;
385   --             B : integer;
386   --          end record;
387   --          darecv : darec;
388   --
389   --          function inner (b : integer; AREC1F : AREC1PT) return boolean is
390   --          begin
391   --             type Tnn is access all Integer
392   --             return b in x .. Tnn!(AREC1F.dynam_LAST).all
393   --               and then darecv.b in 42 .. 73;
394   --          end inner;
395   --
396   --       begin
397   --         return inner (42, AREC1P) and then inner (xx * 3, AREC1P);
398   --       end case3x;
399
400   --  And now the back end when it processes darecv.b will access the bounds
401   --  of darecv.a by referencing the d and dynam_LAST fields of AREC1P.
402
403   -----------------------------
404   -- Multiple Nesting Levels --
405   -----------------------------
406
407   --  In our examples so far, we have only nested to a single level, but the
408   --  scheme generalizes to multiple levels of nesting and in this section we
409   --  discuss how this generalization works.
410
411   --  Consider this example with two nesting levels
412
413   --  To deal with elimination of uplevel references, we follow the same basic
414   --  approach described above for case 2, except that we need an activation
415   --  record at each nested level. Basically the rule is that any procedure
416   --  that has nested procedures needs an activation record. When we do this,
417   --  the inner activation records have a pointer (uplink) to the immediately
418   --  enclosing activation record, the normal arrangement of static links. The
419   --  following shows the full translation of this fourth case.
420
421   --     function case4x (x : integer) return integer is
422   --        type AREC1T is record
423   --           v1 : Address;
424   --        end record;
425   --
426   --        AREC1 : aliased AREC1T;
427   --        type AREC1PT is access all AREC1T;
428   --        AREC1P : constant AREC1PT := AREC1'Access;
429   --
430   --        v1 : integer := x;
431   --        AREC1.v1 := v1'Address;
432   --
433   --        function inner1 (y : integer; AREC1F : AREC1PT) return integer is
434   --           type AREC2T is record
435   --              AREC1U : AREC1PT := AREC1F;
436   --              v2     : Address;
437   --           end record;
438   --
439   --           AREC2 : aliased AREC2T;
440   --           type AREC2PT is access all AREC2T;
441   --           AREC2P : constant AREC2PT := AREC2'Access;
442   --
443   --           type Tnn1 is access all Integer;
444   --           v2 : integer := Tnn1!(AREC1F.v1).all {+} 1;
445   --           AREC2.v2 := v2'Address;
446   --
447   --           function inner2
448   --              (z : integer; AREC2F : AREC2PT) return integer
449   --           is
450   --           begin
451   --              type Tnn1 is access all Integer;
452   --              type Tnn2 is access all Integer;
453   --              return integer(z {+}
454   --                             Tnn1!(AREC2F.AREC1U.v1).all {+}
455   --                             Tnn2!(AREC2F.v2).all);
456   --           end inner2;
457   --        begin
458   --           type Tnn is access all Integer;
459   --           return integer(y {+} inner2 (Tnn!(AREC1F.v1).all, AREC2P));
460   --        end inner1;
461   --     begin
462   --        return inner1 (x, AREC1P);
463   --     end case4x;
464
465   --  As can be seen in this example, the level number following AREC in the
466   --  names avoids any confusion between AREC names at different levels.
467
468   -------------------------
469   -- Name Disambiguation --
470   -------------------------
471
472   --  As described above, the translation scheme would raise issues when the
473   --  code generator did the actual unnesting if identically named nested
474   --  subprograms exist. Similarly overloading would cause a naming issue.
475
476   --  In fact, the expanded code includes qualified names which eliminate this
477   --  problem. We omitted the qualification from the exapnded examples above
478   --  for simplicity. But to see this in action, consider this example:
479
480   --    function Mnames return Boolean is
481   --       procedure Inner is
482   --          procedure Inner is
483   --          begin
484   --             null;
485   --          end;
486   --       begin
487   --          Inner;
488   --       end;
489   --       function F (A : Boolean) return Boolean is
490   --       begin
491   --          return not A;
492   --       end;
493   --       function F (A : Integer) return Boolean is
494   --       begin
495   --          return A > 42;
496   --       end;
497   --    begin
498   --       Inner;
499   --       return F (42) or F (True);
500   --    end;
501
502   --  The expanded code actually looks like:
503
504   --    function mnames return boolean is
505   --       procedure mnames__inner is
506   --          procedure mnames__inner__inner is
507   --          begin
508   --             null;
509   --             return;
510   --          end mnames__inner__inner;
511   --       begin
512   --          mnames__inner__inner;
513   --          return;
514   --       end mnames__inner;
515   --       function mnames__f (a : boolean) return boolean is
516   --       begin
517   --          return not a;
518   --       end mnames__f;
519   --       function mnames__f__2 (a : integer) return boolean is
520   --       begin
521   --          return a > 42;
522   --       end mnames__f__2;
523   --    begin
524   --       mnames__inner;
525   --       return mnames__f__2 (42) or mnames__f (true);
526   --    end mnames;
527
528   --  As can be seen from studying this example, the qualification deals both
529   --  with the issue of clashing names (mnames__inner, mnames__inner__inner),
530   --  and with overloading (mnames__f, mnames__f__2).
531
532   -----------------
533   -- Subprograms --
534   -----------------
535
536   procedure Check_Uplevel_Reference_To_Type (Typ : Entity_Id);
537   --  This procedure is called if Sem_Util.Check_Nested_Access detects an
538   --  uplevel reference to a type or subtype entity Typ. On return there are
539   --  two cases, if Typ is a static type (defined as a discrete type with
540   --  static bounds, or a record all of whose components are of a static type,
541   --  or an array whose index and component types are all static types), then
542   --  the flag Is_Static_Type (Typ) will be set True, and in this case the
543   --  flag Has_Uplevel_Reference is not set since we don't need to worry about
544   --  uplevel references to static types. If on the other hand Typ is not a
545   --  static type, then the flag Has_Uplevel_Reference will be set, and any
546   --  non-static bounds referenced by the type will also be marked as having
547   --  uplevel references (by setting Has_Uplevel_Reference for these bounds).
548
549   procedure Note_Uplevel_Reference (N : Node_Id; Subp : Entity_Id);
550   --  Called in Unnest_Subprogram_Mode when we detect an explicit uplevel
551   --  reference (node N) to an enclosing subprogram Subp.
552
553   procedure Unnest_Subprogram (Subp : Entity_Id; Subp_Body : Node_Id);
554   --  Subp is a library level subprogram which has nested subprograms, and
555   --  Subp_Body is the corresponding N_Subprogram_Body node. This procedure
556   --  declares the AREC types and objects, adds assignments to the AREC record
557   --  as required, defines the xxxPTR types for uplevel referenced objects,
558   --  adds the ARECP parameter to all nested subprograms which need it, and
559   --  modifies all uplevel references appropriately.
560
561end Exp_Unst;
562