1#!/usr/bin/perl
2
3# Copyright (c) 2006 Joe Hurd, distributed under the BSD License
4
5use strict;
6use warnings;
7use Pod::Usage;
8use Getopt::Std;
9
10use vars qw($opt_h $opt_c $opt_r);
11
12getopts('hc:r:');
13
14if ($opt_h or scalar @ARGV == 0)
15{
16    pod2usage({-exitval => 2,
17	       -verbose => 2});
18}
19
20if (!$opt_c) { die "mlpp: you must specify the SML compiler\n"; }
21if ($opt_c ne "mosml" && $opt_c ne "mlton" && $opt_c ne "polyml") {
22    die "mlpp: the SML compiler must be one of {mosml,mlton,polyml}.\n";
23}
24
25# Autoflush STDIN
26$|++;
27
28sub unquotify {
29    if (scalar @_ == 0) { return; }
30
31    my $pre = "[";
32
33    for my $quote (@_) {
34        my $nl = chomp $quote;
35        my @qs = split (/\^(\w+)/, $quote);
36        my @ps = ();
37
38        for (my $s = 0; 0 < scalar @qs; $s = 1 - $s) {
39            my $q = shift @qs;
40            if ($s == 0) {
41                $q =~ s/\\/\\\\/g;
42                $q =~ s/\"/\\\"/g;
43                push @ps, "QUOTE \"$q\"" unless ($q eq "");
44            }
45            elsif ($s == 1) {
46                push @ps, "ANTIQUOTE $q";
47            }
48            else { die; }
49        }
50
51        if (0 < $nl) {
52            if (0 < scalar @ps) {
53                my $p = pop @ps;
54                if ($p =~ /QUOTE \"(.*)\"/) { push @ps, "QUOTE \"$1\\n\""; }
55                else { push @ps, $p; push @ps, "QUOTE \"\\n\""; }
56            }
57            else { push @ps, "QUOTE \"\\n\""; }
58        }
59        else {
60            (0 < scalar @ps) or die;
61        }
62
63        print STDOUT ($pre . join (", ", @ps));
64        $pre = ",\n";
65    }
66
67    print STDOUT "]";
68}
69
70sub print_normal {
71    (scalar @_ == 1) or die;
72    my $text = shift @_;
73
74    if ($opt_c eq "mosml") {
75        $text =~ s/Array\.copy/Array_copy/g;
76        $text =~ s/Array\.foldli/Array_foldli/g;
77        $text =~ s/Array\.foldri/Array_foldri/g;
78        $text =~ s/Array\.modifyi/Array_modifyi/g;
79        $text =~ s/OS\.Process\.isSuccess/OS_Process_isSuccess/g;
80        $text =~ s/PP\.ppstream/ppstream/g;
81        $text =~ s/String\.concatWith/String_concatWith/g;
82        $text =~ s/String\.isSubstring/String_isSubstring/g;
83        $text =~ s/String\.isSuffix/String_isSuffix/g;
84        $text =~ s/Substring\.full/Substring_full/g;
85        $text =~ s/TextIO\.inputLine/TextIO_inputLine/g;
86        $text =~ s/Vector\.foldli/Vector_foldli/g;
87        $text =~ s/Vector\.mapi/Vector_mapi/g;
88    }
89
90    print STDOUT $text;
91}
92
93sub process_file {
94    (scalar @_ == 1) or die;
95    my $filename = shift @_;
96    my $line_num = 0;
97
98    if ($opt_c eq "mlton") {
99        print STDOUT "(*#line 0.0 \"$filename\"*)\n";
100    }
101
102    open my $INPUT, "$filename" or
103        die "mlpp: couldn't open $filename: $!\n";
104
105    my $state = "normal";
106    my $comment = 0;
107    my $revealed_comment = 0;
108    my @quotes = ();
109
110    while (my $line = <$INPUT>) {
111        (chomp ($line) == 1)
112            or warn "no terminating newline in $filename\nline = '$line'\n";
113
114        while (1) {
115            if ($state eq "quote") {
116                if ($line =~ /(.*?)\`(.*)$/) {
117                    push @quotes, $1;
118                    $line = $2;
119                    unquotify @quotes;
120                    @quotes = ();
121                    $state = "normal";
122                }
123                else {
124                    push @quotes, "$line\n";
125                    last;
126                }
127            }
128            elsif ($state eq "comment") {
129                if ($line =~ /^(.*?)(\(\*|\*\))(.*)$/) {
130                    my $leadup = $1;
131                    my $pat = $2;
132                    $line = $3;
133                    print STDOUT $leadup;
134
135                    if ($pat eq "(*") {
136                        print STDOUT $pat;
137                        ++$comment;
138                    }
139                    elsif ($pat eq "*)") {
140                        print STDOUT $pat;
141                        --$comment;
142                        if ($comment == 0) { $state = "normal"; }
143                    }
144                    else {
145                        die;
146                    }
147                }
148                else {
149                    print STDOUT "$line\n";
150                    last;
151                }
152            }
153            elsif ($state eq "dquote") {
154                if ($line =~ /^(.*?)\"(.*)$/) {
155                    my $leadup = $1;
156                    $line = $2;
157                    print STDOUT ($leadup . "\"");
158
159                    if ($leadup =~ /(\\+)$/ && ((length $1) % 2 == 1)) {
160                        # This is an escaped double quote
161                    }
162                    else {
163                        $state = "normal";
164                    }
165                }
166                else {
167                    die "EOL inside \" quote\n";
168                }
169            }
170            elsif ($state eq "normal") {
171                if ($line =~ /^ *use *\"([^"]+)\" *; *$/) {
172                    my $use_filename = $1;
173                    if ($use_filename !~ /^\// && $filename =~ /^(.*)\//) {
174                        $use_filename = $1 . '/' . $use_filename;
175                    }
176                    process_file ($use_filename);
177                    if ($opt_c eq "mlton") {
178                        print STDOUT "(*#line $line_num.0 \"$filename\"*)\n";
179                    }
180                    print STDOUT "\n";
181                    last;
182                }
183                elsif ($line =~ /^(.*?)(\`|\(\*|\*\)|\")(.*)$/) {
184                    my $leadup = $1;
185                    my $pat = $2;
186                    $line = $3;
187                    print_normal $leadup;
188
189                    if ($pat eq "`") {
190                        $state = "quote";
191                    }
192                    elsif ($pat eq "(*") {
193                        my $is_revealed = 0;
194                        if ($line =~ /^([[:alnum:]_-]+)/) {
195                            my $rev = $1;
196                            if ($rev eq $opt_c ||
197                                ($opt_r && $rev =~ /^$opt_r$/)) {
198                                my $rev_len = length $rev;
199                                $line = substr $line, $rev_len;
200                                ++$revealed_comment;
201                                $is_revealed = 1;
202                            }
203                        }
204                        if (!$is_revealed) {
205                            print STDOUT $pat;
206                            $state = "comment";
207                            ++$comment;
208                        }
209                    }
210                    elsif ($pat eq "*)") {
211                        if ($revealed_comment == 0) {
212                            die "mlpp: too many comment closers.\n"
213                        }
214                        --$revealed_comment;
215                    }
216                    elsif ($pat eq "\"") {
217                        print STDOUT $pat;
218                        $state = "dquote";
219                    }
220                    else {
221                        die;
222                    }
223                }
224                else {
225                    print_normal "$line\n";
226                    last;
227                }
228            }
229            else {
230                die;
231            }
232        }
233
234        ++$line_num;
235    }
236
237    if ($state eq "quote") {
238        die "mlpp: EOF inside \` quote\n";
239    }
240    elsif ($state eq "dquote") {
241        die "mlpp: EOF inside \" quote\n";
242    }
243    elsif ($state eq "comment") {
244        die "mlpp: EOF inside comment\n";
245    }
246    else {
247        ($state eq "normal") or die;
248    }
249
250    close $INPUT;
251}
252
253while (0 < scalar @ARGV) {
254    my $filename = shift @ARGV;
255    process_file $filename;
256}
257
258__END__
259
260=pod
261
262=head1 NAME
263
264mlpp - preprocesses SML files for compilation
265
266=head1 SYNOPSIS
267
268mlpp [-h] [-c compiler] [-r TAG] sml-file ... > preprocessed-sml-file
269
270=head1 ARGUMENTS
271
272The recognized flags are described below:
273
274=over 2
275
276=item B<-h>
277
278Produce this documentation.
279
280=item B<-c compiler>
281
282Select the SML compiler that will be used.
283
284=item B<-r TAG-REGEX>
285
286Remove all comment brackets tagged like this: (*TAG revealed-code *)
287where the TAG-REGEX matches the TAG.
288
289=back
290
291=head1 DESCRIPTION
292
293Concatenates the input list of SML source files into a single file
294ready to be compiled, by expanding quotations and antiquotations, and
295concatenating into a single file.
296
297=head1 BUGS
298
299Waiting to rear their ugly heads.
300
301=head1 AUTHORS
302
303Joe Hurd <joe@gilith.com>
304
305=head1 SEE ALSO
306
307Perl(1).
308
309=cut
310