| #!/usr/bin/perl -w |
| |
| # Copyright (C) 2006, 2007, 2009, 2010 Apple Inc. All rights reserved. |
| # |
| # Redistribution and use in source and binary forms, with or without |
| # modification, are permitted provided that the following conditions |
| # are met: |
| # |
| # 1. Redistributions of source code must retain the above copyright |
| # notice, this list of conditions and the following disclaimer. |
| # 2. Redistributions in binary form must reproduce the above copyright |
| # notice, this list of conditions and the following disclaimer in the |
| # documentation and/or other materials provided with the distribution. |
| # 3. Neither the name of Apple Computer, Inc. ("Apple") nor the names of |
| # its contributors may be used to endorse or promote products derived |
| # from this software without specific prior written permission. |
| # |
| # THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY |
| # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
| # WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| # DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY |
| # DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES |
| # (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; |
| # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND |
| # ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| # THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| |
| # This script is like the genstrings tool (minus most of the options) with these differences. |
| # |
| # 1) It uses the names UI_STRING and UI_STRING_WITH_KEY for the macros, rather than the macros |
| # from NSBundle.h, and doesn't support tables (although they would be easy to add). |
| # 2) It supports UTF-8 in key strings (and hence uses "" strings rather than @"" strings; |
| # @"" strings only reliably support ASCII since they are decoded based on the system encoding |
| # at runtime, so give different results on US and Japanese systems for example). |
| # 3) It looks for strings that are not marked for localization, using both macro names that are |
| # known to be used for debugging in Intrigue source code and an exceptions file. |
| # 4) It finds the files to work on rather than taking them as parameters, and also uses a |
| # hardcoded location for both the output file and the exceptions file. |
| # It would have been nice to use the project to find the source files, but it's too hard to |
| # locate source files after parsing a .pbxproj file. |
| |
| # The exceptions file has a list of strings in quotes, filenames, and filename/string pairs separated by :. |
| |
| use strict; |
| |
| sub UnescapeHexSequence($); |
| |
| my %isDebugMacro = ( ASSERT_WITH_MESSAGE => 1, LOG_ERROR => 1, ERROR => 1, NSURL_ERROR => 1, FATAL => 1, LOG => 1, LOG_WARNING => 1, UI_STRING_LOCALIZE_LATER => 1, LPCTSTR_UI_STRING_LOCALIZE_LATER => 1, UNLOCALIZED_STRING => 1, UNLOCALIZED_LPCTSTR => 1, dprintf => 1, NSException => 1, NSLog => 1, printf => 1 ); |
| |
| @ARGV >= 2 or die "Usage: extract-localizable-strings <exceptions file> <file to update> [ directory... ]\nDid you mean to run update-webkit-localizable-strings instead?\n"; |
| |
| my $exceptionsFile = shift @ARGV; |
| -f $exceptionsFile or die "Couldn't find exceptions file $exceptionsFile\n" unless $exceptionsFile eq "-"; |
| |
| my $fileToUpdate = shift @ARGV; |
| -f $fileToUpdate or die "Couldn't find file to update $fileToUpdate\n"; |
| |
| my $warnAboutUnlocalizedStrings = $exceptionsFile ne "-"; |
| |
| my @directories = (); |
| my @directoriesToSkip = (); |
| if (@ARGV < 1) { |
| push(@directories, "."); |
| } else { |
| for my $dir (@ARGV) { |
| if ($dir =~ /^-(.*)$/) { |
| push @directoriesToSkip, $1; |
| } else { |
| push @directories, $dir; |
| } |
| } |
| } |
| |
| my $sawError = 0; |
| |
| my $localizedCount = 0; |
| my $keyCollisionCount = 0; |
| my $notLocalizedCount = 0; |
| my $NSLocalizeCount = 0; |
| |
| my %exception; |
| my %usedException; |
| |
| if ($exceptionsFile ne "-" && open EXCEPTIONS, $exceptionsFile) { |
| while (<EXCEPTIONS>) { |
| chomp; |
| if (/^"([^\\"]|\\.)*"$/ or /^[-_\/\w.]+.(h|m|mm|c|cpp)$/ or /^[-_\/\w.]+.(h|m|mm|c|cpp):"([^\\"]|\\.)*"$/) { |
| if ($exception{$_}) { |
| print "$exceptionsFile:$.:exception for $_ appears twice\n"; |
| print "$exceptionsFile:$exception{$_}:first appearance\n"; |
| } else { |
| $exception{$_} = $.; |
| } |
| } else { |
| print "$exceptionsFile:$.:syntax error\n"; |
| } |
| } |
| close EXCEPTIONS; |
| } |
| |
| my $quotedDirectoriesString = '"' . join('" "', @directories) . '"'; |
| for my $dir (@directoriesToSkip) { |
| $quotedDirectoriesString .= ' -path "' . $dir . '" -prune -o'; |
| } |
| |
| my @files = ( split "\n", `find $quotedDirectoriesString \\( -name "*.h" -o -name "*.m" -o -name "*.mm" -o -name "*.c" -o -name "*.cpp" \\)` ); |
| |
| for my $file (sort @files) { |
| next if $file =~ /\/\w+LocalizableStrings\w*\.h$/ || $file =~ /\/LocalizedStrings\.h$/; |
| |
| $file =~ s-^./--; |
| |
| open SOURCE, $file or die "can't open $file\n"; |
| |
| my $inComment = 0; |
| |
| my $expected = ""; |
| my $macroLine; |
| my $macro; |
| my $UIString; |
| my $key; |
| my $comment; |
| |
| my $string; |
| my $stringLine; |
| my $nestingLevel; |
| |
| my $previousToken = ""; |
| |
| while (<SOURCE>) { |
| chomp; |
| |
| # Handle continued multi-line comment. |
| if ($inComment) { |
| next unless s-.*\*/--; |
| $inComment = 0; |
| } |
| |
| # Handle all the tokens in the line. |
| while (s-^\s*([#\w]+|/\*|//|[^#\w/'"()\[\],]+|.)--) { |
| my $token = $1; |
| |
| if ($token eq "\"") { |
| if ($expected and $expected ne "a quoted string") { |
| print "$file:$.:ERROR:found a quoted string but expected $expected\n"; |
| $sawError = 1; |
| $expected = ""; |
| } |
| if (s-^(([^\\$token]|\\.)*?)$token--) { |
| if (!defined $string) { |
| $stringLine = $.; |
| $string = $1; |
| } else { |
| $string .= $1; |
| } |
| } else { |
| print "$file:$.:ERROR:mismatched quotes\n"; |
| $sawError = 1; |
| $_ = ""; |
| } |
| next; |
| } |
| |
| if (defined $string) { |
| handleString: |
| if ($expected) { |
| if (!defined $UIString) { |
| # FIXME: Validate UTF-8 here? |
| $UIString = $string; |
| $expected = ","; |
| } elsif (($macro =~ /(WEB_)?UI_STRING_KEY(_INTERNAL)?$/) and !defined $key) { |
| # FIXME: Validate UTF-8 here? |
| $key = $string; |
| $expected = ","; |
| } elsif (!defined $comment) { |
| # FIXME: Validate UTF-8 here? |
| $comment = $string; |
| $expected = ")"; |
| } |
| } else { |
| if (defined $nestingLevel) { |
| # In a debug macro, no need to localize. |
| } elsif ($previousToken eq "#include" or $previousToken eq "#import") { |
| # File name, no need to localize. |
| } elsif ($previousToken eq "extern" and $string eq "C") { |
| # extern "C", no need to localize. |
| } elsif ($string eq "") { |
| # Empty string can sometimes be localized, but we need not complain if not. |
| } elsif ($exception{$file}) { |
| $usedException{$file} = 1; |
| } elsif ($exception{"\"$string\""}) { |
| $usedException{"\"$string\""} = 1; |
| } elsif ($exception{"$file:\"$string\""}) { |
| $usedException{"$file:\"$string\""} = 1; |
| } else { |
| print "$file:$stringLine:\"$string\" is not marked for localization\n" if $warnAboutUnlocalizedStrings; |
| $notLocalizedCount++; |
| } |
| } |
| $string = undef; |
| last if !defined $token; |
| } |
| |
| $previousToken = $token; |
| |
| if ($token =~ /^NSLocalized/ && $token !~ /NSLocalizedDescriptionKey/ && $token !~ /NSLocalizedStringFromTableInBundle/) { |
| print "$file:$.:ERROR:found a use of an NSLocalized macro; not supported\n"; |
| $nestingLevel = 0 if !defined $nestingLevel; |
| $sawError = 1; |
| $NSLocalizeCount++; |
| } elsif ($token eq "/*") { |
| if (!s-^.*?\*/--) { |
| $_ = ""; # If the comment doesn't end, discard the result of the line and set flag |
| $inComment = 1; |
| } |
| } elsif ($token eq "//") { |
| $_ = ""; # Discard the rest of the line |
| } elsif ($token eq "'") { |
| if (!s-([^\\]|\\.)'--) { #' <-- that single quote makes the Project Builder editor less confused |
| print "$file:$.:ERROR:mismatched single quote\n"; |
| $sawError = 1; |
| $_ = ""; |
| } |
| } else { |
| if ($expected and $expected ne $token) { |
| print "$file:$.:ERROR:found $token but expected $expected\n"; |
| $sawError = 1; |
| $expected = ""; |
| } |
| if ($token =~ /(WEB_)?UI_STRING(_KEY)?(_INTERNAL)?$/) { |
| $expected = "("; |
| $macro = $token; |
| $UIString = undef; |
| $key = undef; |
| $comment = undef; |
| $macroLine = $.; |
| } elsif ($token eq "(" or $token eq "[") { |
| ++$nestingLevel if defined $nestingLevel; |
| $expected = "a quoted string" if $expected; |
| } elsif ($token eq ",") { |
| $expected = "a quoted string" if $expected; |
| } elsif ($token eq ")" or $token eq "]") { |
| $nestingLevel = undef if defined $nestingLevel && !--$nestingLevel; |
| if ($expected) { |
| $key = $UIString if !defined $key; |
| HandleUIString($UIString, $key, $comment, $file, $macroLine); |
| $macro = ""; |
| $expected = ""; |
| $localizedCount++; |
| } |
| } elsif ($isDebugMacro{$token}) { |
| $nestingLevel = 0 if !defined $nestingLevel; |
| } |
| } |
| } |
| |
| } |
| |
| goto handleString if defined $string; |
| |
| if ($expected) { |
| print "$file:ERROR:reached end of file but expected $expected\n"; |
| $sawError = 1; |
| } |
| |
| close SOURCE; |
| } |
| |
| # Unescapes C language hexadecimal escape sequences. |
| sub UnescapeHexSequence($) |
| { |
| my ($originalStr) = @_; |
| |
| my $escapedStr = $originalStr; |
| my $unescapedStr = ""; |
| |
| for (;;) { |
| if ($escapedStr =~ s-^\\x([[:xdigit:]]+)--) { |
| if (256 <= hex($1)) { |
| print "Hexadecimal escape sequence out of range: \\x$1\n"; |
| return undef; |
| } |
| $unescapedStr .= pack("H*", $1); |
| } elsif ($escapedStr =~ s-^(.)--) { |
| $unescapedStr .= $1; |
| } else { |
| return $unescapedStr; |
| } |
| } |
| } |
| |
| my %stringByKey; |
| my %commentByKey; |
| my %fileByKey; |
| my %lineByKey; |
| |
| sub HandleUIString |
| { |
| my ($string, $key, $comment, $file, $line) = @_; |
| |
| my $bad = 0; |
| $string = UnescapeHexSequence($string); |
| if (!defined($string)) { |
| print "$file:$line:ERROR:string has an illegal hexadecimal escape sequence\n"; |
| $bad = 1; |
| } |
| $key = UnescapeHexSequence($key); |
| if (!defined($key)) { |
| print "$file:$line:ERROR:key has an illegal hexadecimal escape sequence\n"; |
| $bad = 1; |
| } |
| $comment = UnescapeHexSequence($comment); |
| if (!defined($comment)) { |
| print "$file:$line:ERROR:comment has an illegal hexadecimal escape sequence\n"; |
| $bad = 1; |
| } |
| if (grep { $_ == 0xFFFD } unpack "U*", $string) { |
| print "$file:$line:ERROR:string for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n"; |
| $bad = 1; |
| } |
| if ($string ne $key && grep { $_ == 0xFFFD } unpack "U*", $key) { |
| print "$file:$line:ERROR:key has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n"; |
| $bad = 1; |
| } |
| if (grep { $_ == 0xFFFD } unpack "U*", $comment) { |
| print "$file:$line:ERROR:comment for translation has illegal UTF-8 -- most likely a problem with the Text Encoding of the source file\n"; |
| $bad = 1; |
| } |
| if ($bad) { |
| $sawError = 1; |
| return; |
| } |
| |
| if ($stringByKey{$key} && $stringByKey{$key} ne $string) { |
| print "$file:$line:encountered the same key, \"$key\", twice, with different strings\n"; |
| print "$fileByKey{$key}:$lineByKey{$key}:previous occurrence\n"; |
| $keyCollisionCount++; |
| return; |
| } |
| if ($commentByKey{$key} && $commentByKey{$key} ne $comment) { |
| print "$file:$line:encountered the same key, \"$key\", twice, with different comments\n"; |
| print "$fileByKey{$key}:$lineByKey{$key}:previous occurrence\n"; |
| $keyCollisionCount++; |
| return; |
| } |
| |
| $fileByKey{$key} = $file; |
| $lineByKey{$key} = $line; |
| $stringByKey{$key} = $string; |
| $commentByKey{$key} = $comment; |
| } |
| |
| print "\n" if $sawError || $notLocalizedCount || $NSLocalizeCount; |
| |
| my @unusedExceptions = sort grep { !$usedException{$_} } keys %exception; |
| if (@unusedExceptions) { |
| for my $unused (@unusedExceptions) { |
| print "$exceptionsFile:$exception{$unused}:exception $unused not used\n"; |
| } |
| print "\n"; |
| } |
| |
| print "$localizedCount localizable strings\n" if $localizedCount; |
| print "$keyCollisionCount key collisions\n" if $keyCollisionCount; |
| print "$notLocalizedCount strings not marked for localization\n" if $notLocalizedCount; |
| print "$NSLocalizeCount uses of NSLocalize\n" if $NSLocalizeCount; |
| print scalar(@unusedExceptions), " unused exceptions\n" if @unusedExceptions; |
| |
| if ($sawError) { |
| print "\nErrors encountered. Exiting without writing to $fileToUpdate.\n"; |
| exit 1; |
| } |
| |
| my $localizedStrings = ""; |
| |
| for my $key (sort keys %commentByKey) { |
| $localizedStrings .= "/* $commentByKey{$key} */\n\"$key\" = \"$stringByKey{$key}\";\n\n"; |
| } |
| |
| # Write out the strings file in UTF-16 with a BOM. |
| utf8::decode($localizedStrings) if $^V ge v5.8; |
| my $output = pack "n*", (0xFEFF, unpack "U*", $localizedStrings); |
| |
| if (-e "$fileToUpdate") { |
| open STRINGS, ">", "$fileToUpdate" or die; |
| print STRINGS $output; |
| close STRINGS; |
| } else { |
| print "$fileToUpdate does not exist\n"; |
| exit 1; |
| } |