1#!/usr/bin/perl -w
2
3# Copyright (C) 2006, 2007, 2009, 2010, 2013 Apple Inc. All rights reserved.
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions
7# are met:
8#
9# 1.  Redistributions of source code must retain the above copyright
10#     notice, this list of conditions and the following disclaimer.
11# 2.  Redistributions in binary form must reproduce the above copyright
12#     notice, this list of conditions and the following disclaimer in the
13#     documentation and/or other materials provided with the distribution.
14# 3.  Neither the name of Apple Inc. ("Apple") nor the names of
15#     its contributors may be used to endorse or promote products derived
16#     from this software without specific prior written permission.
17#
18# THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
19# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21# DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
22# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
29# This script is like the genstrings tool (minus most of the options) with these differences.
30#
31#    1) It uses the names UI_STRING and UI_STRING_WITH_KEY for the macros, rather than the macros
32#       from NSBundle.h, and doesn't support tables (although they would be easy to add).
33#    2) It supports UTF-8 in key strings (and hence uses "" strings rather than @"" strings;
34#       @"" strings only reliably support ASCII since they are decoded based on the system encoding
35#       at runtime, so give different results on US and Japanese systems for example).
36#    3) It looks for strings that are not marked for localization, using both macro names that are
37#       known to be used for debugging in Intrigue source code and an exceptions file.
38#    4) It finds the files to work on rather than taking them as parameters, and also uses a
39#       hardcoded location for both the output file and the exceptions file.
40#       It would have been nice to use the project to find the source files, but it's too hard to
41#       locate source files after parsing a .pbxproj file.
42
43# The exceptions file has a list of strings in quotes, filenames, and filename/string pairs separated by :.
44
45use strict;
46use Getopt::Long;
47no warnings 'deprecated';
48
49sub UnescapeHexSequence($);
50
51my %isDebugMacro = ( ASSERT_WITH_MESSAGE => 1, LOG_ERROR => 1, ERROR => 1, NSURL_ERROR => 1, FATAL => 1, LOG => 1, LOG_WARNING => 1, UI_STRING_LOCALIZE_LATER => 1, UI_STRING_LOCALIZE_LATER_KEY => 1, LPCTSTR_UI_STRING_LOCALIZE_LATER => 1, UNLOCALIZED_STRING => 1, UNLOCALIZED_LPCTSTR => 1, dprintf => 1, NSException => 1, NSLog => 1, printf => 1 );
52
53my $verify;
54my $exceptionsFile;
55my @directoriesToSkip = ();
56
57my %options = (
58    'verify' => \$verify,
59    'exceptions=s' => \$exceptionsFile,
60    'skip=s' => \@directoriesToSkip,
61);
62
63GetOptions(%options);
64
65@ARGV >= 2 or die "Usage: extract-localizable-strings [--verify] [--exceptions <exceptions file>] <file to update> [--skip directory | directory]...\nDid you mean to run update-webkit-localizable-strings instead?\n";
66
67-f $exceptionsFile or die "Couldn't find exceptions file $exceptionsFile\n" unless !defined $exceptionsFile;
68
69my $fileToUpdate = shift @ARGV;
70-f $fileToUpdate or die "Couldn't find file to update $fileToUpdate\n";
71
72my $warnAboutUnlocalizedStrings = defined $exceptionsFile;
73
74my @directories = ();
75if (@ARGV < 1) {
76    push(@directories, ".");
77} else {
78    for my $dir (@ARGV) {
79        push @directories, $dir;
80    }
81}
82
83my $sawError = 0;
84
85my $localizedCount = 0;
86my $keyCollisionCount = 0;
87my $notLocalizedCount = 0;
88my $NSLocalizeCount = 0;
89
90my %exception;
91my %usedException;
92
93if (defined $exceptionsFile && open EXCEPTIONS, $exceptionsFile) {
94    while (<EXCEPTIONS>) {
95        chomp;
96        if (/^"([^\\"]|\\.)*"$/ or /^[-_\/\w\s.]+.(h|m|mm|c|cpp)$/ or /^[-_\/\w\s.]+.(h|m|mm|c|cpp):"([^\\"]|\\.)*"$/) {
97            if ($exception{$_}) {
98                print "$exceptionsFile:$.: warning: exception for $_ appears twice\n";
99                print "$exceptionsFile:$exception{$_}: warning: first appearance\n";
100            } else {
101                $exception{$_} = $.;
102            }
103        } else {
104            print "$exceptionsFile:$.: warning: syntax error\n";
105        }
106    }
107    close EXCEPTIONS;
108}
109
110my $quotedDirectoriesString = '"' . join('" "', @directories) . '"';
111for my $dir (@directoriesToSkip) {
112    $quotedDirectoriesString .= ' -path "' . $dir . '" -prune -o';
113}
114
115my @files = ( split "\n", `find $quotedDirectoriesString \\( -name "*.h" -o -name "*.m" -o -name "*.mm" -o -name "*.c" -o -name "*.cpp" \\)` );
116
117for my $file (sort @files) {
118    next if $file =~ /\/\w+LocalizableStrings\w*\.h$/ || $file =~ /\/LocalizedStrings\.h$/;
119
120    $file =~ s-^./--;
121
122    open SOURCE, $file or die "can't open $file\n";
123
124    my $inComment = 0;
125
126    my $expected = "";
127    my $macroLine;
128    my $macro;
129    my $UIString;
130    my $key;
131    my $comment;
132
133    my $string;
134    my $stringLine;
135    my $nestingLevel;
136
137    my $previousToken = "";
138
139    while (<SOURCE>) {
140        chomp;
141
142        # Handle continued multi-line comment.
143        if ($inComment) {
144            next unless s-.*\*/--;
145            $inComment = 0;
146        }
147
148        next unless defined $nestingLevel or /(\"|\/\*)/;
149
150        # Handle all the tokens in the line.
151        while (s-^\s*([#\w]+|/\*|//|[^#\w/'"()\[\],]+|.)--) {
152            my $token = $1;
153
154            if ($token eq "\"") {
155                if ($expected and $expected ne "a quoted string") {
156                    print "$file:$.: found a quoted string but expected $expected\n";
157                    $sawError = 1;
158                    $expected = "";
159                }
160                if (s-^(([^\\$token]|\\.)*?)$token--) {
161                    if (!defined $string) {
162                        $stringLine = $.;
163                        $string = $1;
164                    } else {
165                        $string .= $1;
166                    }
167                } else {
168                    print "$file:$.: mismatched quotes\n";
169                    $sawError = 1;
170                    $_ = "";
171                }
172                next;
173            }
174
175            if (defined $string) {
176handleString:
177                if ($expected) {
178                    if (!defined $UIString) {
179                        # FIXME: Validate UTF-8 here?
180                        $UIString = $string;
181                        $expected = ",";
182                    } elsif (($macro =~ /(WEB_)?UI_STRING_KEY(_INTERNAL)?$/) and !defined $key) {
183                        # FIXME: Validate UTF-8 here?
184                        $key = $string;
185                        $expected = ",";
186                    } elsif (!defined $comment) {
187                        # FIXME: Validate UTF-8 here?
188                        $comment = $string;
189                        $expected = ")";
190                    }
191                } else {
192                    if (defined $nestingLevel) {
193                        # In a debug macro, no need to localize.
194                    } elsif ($previousToken eq "#include" or $previousToken eq "#import") {
195                        # File name, no need to localize.
196                    } elsif ($previousToken eq "extern" and $string eq "C") {
197                        # extern "C", no need to localize.
198                    } elsif ($string eq "") {
199                        # Empty string can sometimes be localized, but we need not complain if not.
200                    } elsif ($exception{$file}) {
201                        $usedException{$file} = 1;
202                    } elsif ($exception{"\"$string\""}) {
203                        $usedException{"\"$string\""} = 1;
204                    } elsif ($exception{"$file:\"$string\""}) {
205                        $usedException{"$file:\"$string\""} = 1;
206                    } else {
207                        print "$file:$stringLine: warning: \"$string\" is not marked for localization\n" if $warnAboutUnlocalizedStrings;
208                        $notLocalizedCount++;
209                    }
210                }
211                $string = undef;
212                last if !defined $token;
213            }
214
215            $previousToken = $token;
216
217            if ($token =~ /^NSLocalized/ && $token !~ /NSLocalizedDescriptionKey/ && $token !~ /NSLocalizedStringFromTableInBundle/ && $token !~ /NSLocalizedFileSizeDescription/ && $token !~ /NSLocalizedDescriptionKey/ && $token !~ /NSLocalizedRecoverySuggestionErrorKey/) {
218                print "$file:$.: found a use of an NSLocalized macro ($token); not supported\n";
219                $nestingLevel = 0 if !defined $nestingLevel;
220                $sawError = 1;
221                $NSLocalizeCount++;
222            } elsif ($token eq "/*") {
223                if (!s-^.*?\*/--) {
224                    $_ = ""; # If the comment doesn't end, discard the result of the line and set flag
225                    $inComment = 1;
226                }
227            } elsif ($token eq "//") {
228                $_ = ""; # Discard the rest of the line
229            } elsif ($token eq "'") {
230                if (!s-([^\\]|\\.)'--) { #' <-- that single quote makes the Project Builder editor less confused
231                    print "$file:$.: mismatched single quote\n";
232                    $sawError = 1;
233                    $_ = "";
234                }
235            } else {
236                if ($expected and $expected ne $token) {
237                    print "$file:$.: found $token but expected $expected\n";
238                    $sawError = 1;
239                    $expected = "";
240                }
241                if ($token =~ /(WEB_)?UI_STRING(_KEY)?(_INTERNAL)?$/) {
242                    $expected = "(";
243                    $macro = $token;
244                    $UIString = undef;
245                    $key = undef;
246                    $comment = undef;
247                    $macroLine = $.;
248                } elsif ($token eq "(" or $token eq "[") {
249                    ++$nestingLevel if defined $nestingLevel;
250                    $expected = "a quoted string" if $expected;
251                } elsif ($token eq ",") {
252                    $expected = "a quoted string" if $expected;
253                } elsif ($token eq ")" or $token eq "]") {
254                    $nestingLevel = undef if defined $nestingLevel && !--$nestingLevel;
255                    if ($expected) {
256                        $key = $UIString if !defined $key;
257                        HandleUIString($UIString, $key, $comment, $file, $macroLine);
258                        $macro = "";
259                        $expected = "";
260                        $localizedCount++;
261                    }
262                } elsif ($isDebugMacro{$token}) {
263                    $nestingLevel = 0 if !defined $nestingLevel;
264                }
265            }
266        }
267
268    }
269
270    goto handleString if defined $string;
271
272    if ($expected) {
273        print "$file: reached end of file but expected $expected\n";
274        $sawError = 1;
275    }
276
277    close SOURCE;
278}
279
280# Unescapes C language hexadecimal escape sequences.
281sub UnescapeHexSequence($)
282{
283    my ($originalStr) = @_;
284
285    my $escapedStr = $originalStr;
286    my $unescapedStr = "";
287
288    for (;;) {
289        if ($escapedStr =~ s-^\\x([[:xdigit:]]+)--) {
290            if (256 <= hex($1)) {
291                print "Hexadecimal escape sequence out of range: \\x$1\n";
292                return undef;
293            }
294            $unescapedStr .= pack("H*", $1);
295        } elsif ($escapedStr =~ s-^(.)--) {
296            $unescapedStr .= $1;
297        } else {
298            return $unescapedStr;
299        }
300    }
301}
302
303my %stringByKey;
304my %commentByKey;
305my %fileByKey;
306my %lineByKey;
307
308sub HandleUIString
309{
310    my ($string, $key, $comment, $file, $line) = @_;
311
312    my $bad = 0;
313    $string = UnescapeHexSequence($string);
314    if (!defined($string)) {
315        print "$file:$line: string has an illegal hexadecimal escape sequence\n";
316        $bad = 1;
317    }
318    $key = UnescapeHexSequence($key);
319    if (!defined($key)) {
320        print "$file:$line: key has an illegal hexadecimal escape sequence\n";
321        $bad = 1;
322    }
323    $comment = UnescapeHexSequence($comment);
324    if (!defined($comment)) {
325        print "$file:$line: comment has an illegal hexadecimal escape sequence\n";
326        $bad = 1;
327    }
328    if (grep { $_ == 0xFFFD } unpack "U*", $string) {
329        print "$file:$line: string for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n";
330        $bad = 1;
331    }
332    if ($string ne $key && grep { $_ == 0xFFFD } unpack "U*", $key) {
333        print "$file:$line: key has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n";
334        $bad = 1;
335    }
336    if (grep { $_ == 0xFFFD } unpack "U*", $comment) {
337        print "$file:$line: comment for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n";
338        $bad = 1;
339    }
340    if ($bad) {
341        $sawError = 1;
342        return;
343    }
344
345    if ($stringByKey{$key} && $stringByKey{$key} ne $string) {
346        print "$file:$line: warning: encountered the same key, \"$key\", twice, with different strings\n";
347        print "$fileByKey{$key}:$lineByKey{$key}: warning: previous occurrence\n";
348        $keyCollisionCount++;
349        return;
350    }
351    if ($commentByKey{$key} && $commentByKey{$key} ne $comment) {
352        print "$file:$line: warning: encountered the same key, \"$key\", twice, with different comments\n";
353        print "$fileByKey{$key}:$lineByKey{$key}: warning: previous occurrence\n";
354        $keyCollisionCount++;
355        return;
356    }
357
358    $fileByKey{$key} = $file;
359    $lineByKey{$key} = $line;
360    $stringByKey{$key} = $string;
361    $commentByKey{$key} = $comment;
362}
363
364print "\n" if $sawError || $notLocalizedCount || $NSLocalizeCount;
365
366my @unusedExceptions = sort grep { !$usedException{$_} } keys %exception;
367if (@unusedExceptions) {
368    for my $unused (@unusedExceptions) {
369        print "$exceptionsFile:$exception{$unused}: warning: exception $unused not used\n";
370    }
371    print "\n";
372}
373
374print "$localizedCount localizable strings\n" if $localizedCount;
375print "$keyCollisionCount key collisions\n" if $keyCollisionCount;
376print "$notLocalizedCount strings not marked for localization\n" if $notLocalizedCount;
377print "$NSLocalizeCount uses of NSLocalize\n" if $NSLocalizeCount;
378print scalar(@unusedExceptions), " unused exceptions\n" if @unusedExceptions;
379
380if ($sawError) {
381    print "\nErrors encountered. Exiting without writing to $fileToUpdate.\n";
382    exit 1;
383}
384
385my $localizedStrings = "";
386
387for my $key (sort keys %commentByKey) {
388    $localizedStrings .= "/* $commentByKey{$key} */\n\"$key\" = \"$stringByKey{$key}\";\n\n";
389}
390
391if (-e "$fileToUpdate") {
392    if (!$verify) {
393        # Write out the strings file as UTF-8
394        open STRINGS, ">", "$fileToUpdate" or die;
395        print STRINGS $localizedStrings;
396        close STRINGS;
397    } else {
398        open STRINGS, $fileToUpdate or die;
399
400        my $lastComment;
401        my $line;
402
403        while (<STRINGS>) {
404            chomp;
405
406            next if (/^\s*$/);
407
408            if (/^\/\* (.*) \*\/$/) {
409                $lastComment = $1;
410            } elsif (/^"((?:[^\\]|\\[^"])*)"\s*=\s*"((?:[^\\]|\\[^"])*)";$/) #
411            {
412                my $string = delete $stringByKey{$1};
413                if (!defined $string) {
414                    print "$fileToUpdate:$.: unused key \"$1\"\n";
415                    $sawError = 1;
416                } else {
417                    if (!($string eq $2)) {
418                        print "$fileToUpdate:$.: unexpected value \"$2\" for key \"$1\"\n";
419                        print "$fileByKey{$1}:$lineByKey{$1}: expected value \"$string\" defined here\n";
420                        $sawError = 1;
421                    }
422                    if (!($lastComment eq $commentByKey{$1})) {
423                        print "$fileToUpdate:$.: unexpected comment /* $lastComment */ for key \"$1\"\n";
424                        print "$fileByKey{$1}:$lineByKey{$1}: expected comment /* $commentByKey{$1} */ defined here\n";
425                        $sawError = 1;
426                    }
427                }
428            } else {
429                print "$fileToUpdate:$.: line with unexpected format: $_\n";
430                $sawError = 1;
431            }
432        }
433
434        for my $missing (keys %stringByKey) {
435            print "$fileByKey{$missing}:$lineByKey{$missing}: missing key \"$missing\"\n";
436            $sawError = 1;
437        }
438
439        if ($sawError) {
440            print "\n$fileToUpdate:0: file is not up to date.\n";
441            exit 1;
442        }
443    }
444} else {
445    print "error: $fileToUpdate does not exist\n";
446    exit 1;
447}
448