1#!/usr/bin/perl -w 2 3# Copyright (C) 2006, 2007, 2009, 2010, 2013 Apple Inc. All rights reserved. 4# 5# Redistribution and use in source and binary forms, with or without 6# modification, are permitted provided that the following conditions 7# are met: 8# 9# 1. Redistributions of source code must retain the above copyright 10# notice, this list of conditions and the following disclaimer. 11# 2. Redistributions in binary form must reproduce the above copyright 12# notice, this list of conditions and the following disclaimer in the 13# documentation and/or other materials provided with the distribution. 14# 3. Neither the name of Apple Inc. ("Apple") nor the names of 15# its contributors may be used to endorse or promote products derived 16# from this software without specific prior written permission. 17# 18# THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY 19# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21# DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY 22# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 25# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29# This script is like the genstrings tool (minus most of the options) with these differences. 30# 31# 1) It uses the names UI_STRING and UI_STRING_WITH_KEY for the macros, rather than the macros 32# from NSBundle.h, and doesn't support tables (although they would be easy to add). 33# 2) It supports UTF-8 in key strings (and hence uses "" strings rather than @"" strings; 34# @"" strings only reliably support ASCII since they are decoded based on the system encoding 35# at runtime, so give different results on US and Japanese systems for example). 36# 3) It looks for strings that are not marked for localization, using both macro names that are 37# known to be used for debugging in Intrigue source code and an exceptions file. 38# 4) It finds the files to work on rather than taking them as parameters, and also uses a 39# hardcoded location for both the output file and the exceptions file. 40# It would have been nice to use the project to find the source files, but it's too hard to 41# locate source files after parsing a .pbxproj file. 42 43# The exceptions file has a list of strings in quotes, filenames, and filename/string pairs separated by :. 44 45use strict; 46use Getopt::Long; 47no warnings 'deprecated'; 48 49sub UnescapeHexSequence($); 50 51my %isDebugMacro = ( ASSERT_WITH_MESSAGE => 1, LOG_ERROR => 1, ERROR => 1, NSURL_ERROR => 1, FATAL => 1, LOG => 1, LOG_WARNING => 1, UI_STRING_LOCALIZE_LATER => 1, UI_STRING_LOCALIZE_LATER_KEY => 1, LPCTSTR_UI_STRING_LOCALIZE_LATER => 1, UNLOCALIZED_STRING => 1, UNLOCALIZED_LPCTSTR => 1, dprintf => 1, NSException => 1, NSLog => 1, printf => 1 ); 52 53my $verify; 54my $exceptionsFile; 55my @directoriesToSkip = (); 56 57my %options = ( 58 'verify' => \$verify, 59 'exceptions=s' => \$exceptionsFile, 60 'skip=s' => \@directoriesToSkip, 61); 62 63GetOptions(%options); 64 65@ARGV >= 2 or die "Usage: extract-localizable-strings [--verify] [--exceptions <exceptions file>] <file to update> [--skip directory | directory]...\nDid you mean to run update-webkit-localizable-strings instead?\n"; 66 67-f $exceptionsFile or die "Couldn't find exceptions file $exceptionsFile\n" unless !defined $exceptionsFile; 68 69my $fileToUpdate = shift @ARGV; 70-f $fileToUpdate or die "Couldn't find file to update $fileToUpdate\n"; 71 72my $warnAboutUnlocalizedStrings = defined $exceptionsFile; 73 74my @directories = (); 75if (@ARGV < 1) { 76 push(@directories, "."); 77} else { 78 for my $dir (@ARGV) { 79 push @directories, $dir; 80 } 81} 82 83my $sawError = 0; 84 85my $localizedCount = 0; 86my $keyCollisionCount = 0; 87my $notLocalizedCount = 0; 88my $NSLocalizeCount = 0; 89 90my %exception; 91my %usedException; 92 93if (defined $exceptionsFile && open EXCEPTIONS, $exceptionsFile) { 94 while (<EXCEPTIONS>) { 95 chomp; 96 if (/^"([^\\"]|\\.)*"$/ or /^[-_\/\w\s.]+.(h|m|mm|c|cpp)$/ or /^[-_\/\w\s.]+.(h|m|mm|c|cpp):"([^\\"]|\\.)*"$/) { 97 if ($exception{$_}) { 98 print "$exceptionsFile:$.: warning: exception for $_ appears twice\n"; 99 print "$exceptionsFile:$exception{$_}: warning: first appearance\n"; 100 } else { 101 $exception{$_} = $.; 102 } 103 } else { 104 print "$exceptionsFile:$.: warning: syntax error\n"; 105 } 106 } 107 close EXCEPTIONS; 108} 109 110my $quotedDirectoriesString = '"' . join('" "', @directories) . '"'; 111for my $dir (@directoriesToSkip) { 112 $quotedDirectoriesString .= ' -path "' . $dir . '" -prune -o'; 113} 114 115my @files = ( split "\n", `find $quotedDirectoriesString \\( -name "*.h" -o -name "*.m" -o -name "*.mm" -o -name "*.c" -o -name "*.cpp" \\)` ); 116 117for my $file (sort @files) { 118 next if $file =~ /\/\w+LocalizableStrings\w*\.h$/ || $file =~ /\/LocalizedStrings\.h$/; 119 120 $file =~ s-^./--; 121 122 open SOURCE, $file or die "can't open $file\n"; 123 124 my $inComment = 0; 125 126 my $expected = ""; 127 my $macroLine; 128 my $macro; 129 my $UIString; 130 my $key; 131 my $comment; 132 133 my $string; 134 my $stringLine; 135 my $nestingLevel; 136 137 my $previousToken = ""; 138 139 while (<SOURCE>) { 140 chomp; 141 142 # Handle continued multi-line comment. 143 if ($inComment) { 144 next unless s-.*\*/--; 145 $inComment = 0; 146 } 147 148 next unless defined $nestingLevel or /(\"|\/\*)/; 149 150 # Handle all the tokens in the line. 151 while (s-^\s*([#\w]+|/\*|//|[^#\w/'"()\[\],]+|.)--) { 152 my $token = $1; 153 154 if ($token eq "\"") { 155 if ($expected and $expected ne "a quoted string") { 156 print "$file:$.: found a quoted string but expected $expected\n"; 157 $sawError = 1; 158 $expected = ""; 159 } 160 if (s-^(([^\\$token]|\\.)*?)$token--) { 161 if (!defined $string) { 162 $stringLine = $.; 163 $string = $1; 164 } else { 165 $string .= $1; 166 } 167 } else { 168 print "$file:$.: mismatched quotes\n"; 169 $sawError = 1; 170 $_ = ""; 171 } 172 next; 173 } 174 175 if (defined $string) { 176handleString: 177 if ($expected) { 178 if (!defined $UIString) { 179 # FIXME: Validate UTF-8 here? 180 $UIString = $string; 181 $expected = ","; 182 } elsif (($macro =~ /(WEB_)?UI_STRING_KEY(_INTERNAL)?$/) and !defined $key) { 183 # FIXME: Validate UTF-8 here? 184 $key = $string; 185 $expected = ","; 186 } elsif (!defined $comment) { 187 # FIXME: Validate UTF-8 here? 188 $comment = $string; 189 $expected = ")"; 190 } 191 } else { 192 if (defined $nestingLevel) { 193 # In a debug macro, no need to localize. 194 } elsif ($previousToken eq "#include" or $previousToken eq "#import") { 195 # File name, no need to localize. 196 } elsif ($previousToken eq "extern" and $string eq "C") { 197 # extern "C", no need to localize. 198 } elsif ($string eq "") { 199 # Empty string can sometimes be localized, but we need not complain if not. 200 } elsif ($exception{$file}) { 201 $usedException{$file} = 1; 202 } elsif ($exception{"\"$string\""}) { 203 $usedException{"\"$string\""} = 1; 204 } elsif ($exception{"$file:\"$string\""}) { 205 $usedException{"$file:\"$string\""} = 1; 206 } else { 207 print "$file:$stringLine: warning: \"$string\" is not marked for localization\n" if $warnAboutUnlocalizedStrings; 208 $notLocalizedCount++; 209 } 210 } 211 $string = undef; 212 last if !defined $token; 213 } 214 215 $previousToken = $token; 216 217 if ($token =~ /^NSLocalized/ && $token !~ /NSLocalizedDescriptionKey/ && $token !~ /NSLocalizedStringFromTableInBundle/ && $token !~ /NSLocalizedFileSizeDescription/ && $token !~ /NSLocalizedDescriptionKey/ && $token !~ /NSLocalizedRecoverySuggestionErrorKey/) { 218 print "$file:$.: found a use of an NSLocalized macro ($token); not supported\n"; 219 $nestingLevel = 0 if !defined $nestingLevel; 220 $sawError = 1; 221 $NSLocalizeCount++; 222 } elsif ($token eq "/*") { 223 if (!s-^.*?\*/--) { 224 $_ = ""; # If the comment doesn't end, discard the result of the line and set flag 225 $inComment = 1; 226 } 227 } elsif ($token eq "//") { 228 $_ = ""; # Discard the rest of the line 229 } elsif ($token eq "'") { 230 if (!s-([^\\]|\\.)'--) { #' <-- that single quote makes the Project Builder editor less confused 231 print "$file:$.: mismatched single quote\n"; 232 $sawError = 1; 233 $_ = ""; 234 } 235 } else { 236 if ($expected and $expected ne $token) { 237 print "$file:$.: found $token but expected $expected\n"; 238 $sawError = 1; 239 $expected = ""; 240 } 241 if ($token =~ /(WEB_)?UI_STRING(_KEY)?(_INTERNAL)?$/) { 242 $expected = "("; 243 $macro = $token; 244 $UIString = undef; 245 $key = undef; 246 $comment = undef; 247 $macroLine = $.; 248 } elsif ($token eq "(" or $token eq "[") { 249 ++$nestingLevel if defined $nestingLevel; 250 $expected = "a quoted string" if $expected; 251 } elsif ($token eq ",") { 252 $expected = "a quoted string" if $expected; 253 } elsif ($token eq ")" or $token eq "]") { 254 $nestingLevel = undef if defined $nestingLevel && !--$nestingLevel; 255 if ($expected) { 256 $key = $UIString if !defined $key; 257 HandleUIString($UIString, $key, $comment, $file, $macroLine); 258 $macro = ""; 259 $expected = ""; 260 $localizedCount++; 261 } 262 } elsif ($isDebugMacro{$token}) { 263 $nestingLevel = 0 if !defined $nestingLevel; 264 } 265 } 266 } 267 268 } 269 270 goto handleString if defined $string; 271 272 if ($expected) { 273 print "$file: reached end of file but expected $expected\n"; 274 $sawError = 1; 275 } 276 277 close SOURCE; 278} 279 280# Unescapes C language hexadecimal escape sequences. 281sub UnescapeHexSequence($) 282{ 283 my ($originalStr) = @_; 284 285 my $escapedStr = $originalStr; 286 my $unescapedStr = ""; 287 288 for (;;) { 289 if ($escapedStr =~ s-^\\x([[:xdigit:]]+)--) { 290 if (256 <= hex($1)) { 291 print "Hexadecimal escape sequence out of range: \\x$1\n"; 292 return undef; 293 } 294 $unescapedStr .= pack("H*", $1); 295 } elsif ($escapedStr =~ s-^(.)--) { 296 $unescapedStr .= $1; 297 } else { 298 return $unescapedStr; 299 } 300 } 301} 302 303my %stringByKey; 304my %commentByKey; 305my %fileByKey; 306my %lineByKey; 307 308sub HandleUIString 309{ 310 my ($string, $key, $comment, $file, $line) = @_; 311 312 my $bad = 0; 313 $string = UnescapeHexSequence($string); 314 if (!defined($string)) { 315 print "$file:$line: string has an illegal hexadecimal escape sequence\n"; 316 $bad = 1; 317 } 318 $key = UnescapeHexSequence($key); 319 if (!defined($key)) { 320 print "$file:$line: key has an illegal hexadecimal escape sequence\n"; 321 $bad = 1; 322 } 323 $comment = UnescapeHexSequence($comment); 324 if (!defined($comment)) { 325 print "$file:$line: comment has an illegal hexadecimal escape sequence\n"; 326 $bad = 1; 327 } 328 if (grep { $_ == 0xFFFD } unpack "U*", $string) { 329 print "$file:$line: string for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n"; 330 $bad = 1; 331 } 332 if ($string ne $key && grep { $_ == 0xFFFD } unpack "U*", $key) { 333 print "$file:$line: key has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n"; 334 $bad = 1; 335 } 336 if (grep { $_ == 0xFFFD } unpack "U*", $comment) { 337 print "$file:$line: comment for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n"; 338 $bad = 1; 339 } 340 if ($bad) { 341 $sawError = 1; 342 return; 343 } 344 345 if ($stringByKey{$key} && $stringByKey{$key} ne $string) { 346 print "$file:$line: warning: encountered the same key, \"$key\", twice, with different strings\n"; 347 print "$fileByKey{$key}:$lineByKey{$key}: warning: previous occurrence\n"; 348 $keyCollisionCount++; 349 return; 350 } 351 if ($commentByKey{$key} && $commentByKey{$key} ne $comment) { 352 print "$file:$line: warning: encountered the same key, \"$key\", twice, with different comments\n"; 353 print "$fileByKey{$key}:$lineByKey{$key}: warning: previous occurrence\n"; 354 $keyCollisionCount++; 355 return; 356 } 357 358 $fileByKey{$key} = $file; 359 $lineByKey{$key} = $line; 360 $stringByKey{$key} = $string; 361 $commentByKey{$key} = $comment; 362} 363 364print "\n" if $sawError || $notLocalizedCount || $NSLocalizeCount; 365 366my @unusedExceptions = sort grep { !$usedException{$_} } keys %exception; 367if (@unusedExceptions) { 368 for my $unused (@unusedExceptions) { 369 print "$exceptionsFile:$exception{$unused}: warning: exception $unused not used\n"; 370 } 371 print "\n"; 372} 373 374print "$localizedCount localizable strings\n" if $localizedCount; 375print "$keyCollisionCount key collisions\n" if $keyCollisionCount; 376print "$notLocalizedCount strings not marked for localization\n" if $notLocalizedCount; 377print "$NSLocalizeCount uses of NSLocalize\n" if $NSLocalizeCount; 378print scalar(@unusedExceptions), " unused exceptions\n" if @unusedExceptions; 379 380if ($sawError) { 381 print "\nErrors encountered. Exiting without writing to $fileToUpdate.\n"; 382 exit 1; 383} 384 385my $localizedStrings = ""; 386 387for my $key (sort keys %commentByKey) { 388 $localizedStrings .= "/* $commentByKey{$key} */\n\"$key\" = \"$stringByKey{$key}\";\n\n"; 389} 390 391if (-e "$fileToUpdate") { 392 if (!$verify) { 393 # Write out the strings file as UTF-8 394 open STRINGS, ">", "$fileToUpdate" or die; 395 print STRINGS $localizedStrings; 396 close STRINGS; 397 } else { 398 open STRINGS, $fileToUpdate or die; 399 400 my $lastComment; 401 my $line; 402 403 while (<STRINGS>) { 404 chomp; 405 406 next if (/^\s*$/); 407 408 if (/^\/\* (.*) \*\/$/) { 409 $lastComment = $1; 410 } elsif (/^"((?:[^\\]|\\[^"])*)"\s*=\s*"((?:[^\\]|\\[^"])*)";$/) # 411 { 412 my $string = delete $stringByKey{$1}; 413 if (!defined $string) { 414 print "$fileToUpdate:$.: unused key \"$1\"\n"; 415 $sawError = 1; 416 } else { 417 if (!($string eq $2)) { 418 print "$fileToUpdate:$.: unexpected value \"$2\" for key \"$1\"\n"; 419 print "$fileByKey{$1}:$lineByKey{$1}: expected value \"$string\" defined here\n"; 420 $sawError = 1; 421 } 422 if (!($lastComment eq $commentByKey{$1})) { 423 print "$fileToUpdate:$.: unexpected comment /* $lastComment */ for key \"$1\"\n"; 424 print "$fileByKey{$1}:$lineByKey{$1}: expected comment /* $commentByKey{$1} */ defined here\n"; 425 $sawError = 1; 426 } 427 } 428 } else { 429 print "$fileToUpdate:$.: line with unexpected format: $_\n"; 430 $sawError = 1; 431 } 432 } 433 434 for my $missing (keys %stringByKey) { 435 print "$fileByKey{$missing}:$lineByKey{$missing}: missing key \"$missing\"\n"; 436 $sawError = 1; 437 } 438 439 if ($sawError) { 440 print "\n$fileToUpdate:0: file is not up to date.\n"; 441 exit 1; 442 } 443 } 444} else { 445 print "error: $fileToUpdate does not exist\n"; 446 exit 1; 447} 448