summaryrefslogtreecommitdiff
path: root/tools/split-messages.pl
diff options
context:
space:
mode:
Diffstat (limited to 'tools/split-messages.pl')
-rw-r--r--tools/split-messages.pl318
1 files changed, 318 insertions, 0 deletions
diff --git a/tools/split-messages.pl b/tools/split-messages.pl
new file mode 100644
index 000000000..1c78fd271
--- /dev/null
+++ b/tools/split-messages.pl
@@ -0,0 +1,318 @@
+#!/usr/bin/perl
+#
+# Copyright 2013 Vivek Dasmohapatra <vivek@collabora.co.uk>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# * The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+=head1
+
+Filter the NetSurf combined messages (i10n) file according to language
+and platform and generate output in a selection of formats for use
+both internally within netsurf and externally for translation
+services.
+
+=cut
+
+use strict;
+
+use Getopt::Long ();
+use Fcntl qw( O_CREAT O_EXCL O_WRONLY O_APPEND O_RDONLY O_WRONLY );
+
+use IO::Compress::Gzip;
+
+use constant GETOPT_OPTS => qw( auto_abbrev no_getopt_compat bundling );
+use constant GETOPT_SPEC =>
+ qw( output|o=s
+ input|i=s
+ lang|l=s
+ dlang|d=s
+ plat|platform|p=s
+ format|fmt|f=s
+ warning|W=s
+ gzip|z
+ help|h|? );
+
+# default option values:
+my %opt = qw( dlang en plat any format messages warning none );
+
+sub input_stream ();
+sub output_stream ();
+sub formatter ();
+sub static_section($);
+sub usage ();
+
+sub main ()
+{
+ my $input;
+ my $output;
+ my $format;
+ my $header;
+ my $footer;
+ my $opt_ok;
+
+ # option parsing:
+ Getopt::Long::Configure( GETOPT_OPTS );
+ $opt_ok = Getopt::Long::GetOptions( \%opt, GETOPT_SPEC );
+
+ # allow input and output to be specified as non-option arguments:
+ if( @ARGV ) { $opt{input } ||= shift( @ARGV ) }
+ if( @ARGV ) { $opt{output} ||= shift( @ARGV ) }
+
+ # open the appropriate streams and get the formatter and headers:
+ if( $opt_ok )
+ {
+ $input = input_stream();
+ $output = output_stream();
+ $format = formatter();
+ $header = static_section('header');
+ $footer = static_section('footer');
+ }
+
+ # double check the options are sane (and we weren't asked for the help)
+ if( !$opt_ok || $opt{help} || $opt{lang} !~ /^[a-z]{2}(?:_[A-Z]{2})?$/ || $opt{dlang} !~ /^[a-z]{2}(?:_[A-Z]{2})?$/ )
+ {
+ usage();
+ }
+
+ # we are good to go:
+ print( $output $header );
+
+ my $cur_key;
+
+ my $dlang_key;
+ my $dlang_val;
+
+ my $tran_out = 1;
+ my $tran_val;
+ my $tran_key;
+
+ while (<$input>)
+ {
+ # skip comment and empty lines
+ /^#/ && next;
+ /^\s*$/ && next;
+
+ # only parsing things that look like message lines:
+ if( /^([a-z]{2}(?:_[A-Z]{2})?).([^.]+).([^:]+):(.*)/ )
+ {
+ my( $lang, $plat, $key, $val ) = ( $1, $2, $3, $4 );
+
+ # skip the line if it is not for our target platform
+ if( $opt{plat} ne 'any' &&
+ $opt{plat} ne $plat &&
+ 'all' ne $plat )
+ {
+ next;
+ }
+
+ # On key change ensure a translation has been generated
+ if ($cur_key ne $key)
+ {
+ if ($tran_out == 0)
+ {
+ # No translaton for previous key
+ if ($cur_key eq $dlang_key)
+ {
+ print( $output $format->( $dlang_key, $dlang_val ) );
+ if( $opt{warning} eq "fb" )
+ {
+ warn( "warning: $dlang_key missing translation in $opt{lang} using $opt{dlang} instead" );
+ }
+ }
+ else
+ {
+ # No translation and nothing in default language
+ warn( "warning: $dlang_key missing translation in $opt{lang} and no fallback in $opt{dlang}" );
+ }
+ }
+ else
+ {
+ if (($opt{dlang} ne $opt{lang} ) && ($tran_key eq $dlang_key) && ($tran_val eq $dlang_val))
+ {
+ if( $opt{warning} eq "dup" )
+ {
+ warn( "warning: $tran_key value in $opt{lang} is same as in default $opt{dlang}" );
+ }
+ }
+ }
+ $cur_key = $key;
+ $tran_out = 0;
+ }
+
+ # capture the key/value in the default language
+ if( $lang eq $opt{dlang} )
+ {
+ $dlang_key = $key;
+ $dlang_val = $val;
+ }
+
+ # output if its the target language
+ if( $lang eq $opt{lang} ) {
+ print( $output $format->( $key, $val ) );
+ $tran_out = 1;
+ $tran_val = $val;
+ $tran_key = $key;
+ }
+ }
+ else
+ {
+ warn( "Malformed entry: $_" );
+ }
+ }
+
+ print( $output $footer );
+}
+
+main();
+
+sub usage ()
+{
+ my @fmt = map { s/::$//; $_ } keys(%{$::{'msgfmt::'}});
+ print(STDERR <<TXT );
+usage:
+ $0 -l lang-code [-d def-lang-code] [-W warning] \
+ [-o output-file] [-i input-file] [-p platform] [-f format] [-z]
+
+ $0 -l lang-code ... [input-file [output-file]]
+
+ lang-code : en fr ko ... (no default)
+ def-lang-code : en fr ko ... (default 'en')
+ warning : none, all (default 'none')
+ platform : any gtk ami (default 'any')
+ format : @fmt (default 'messages')
+ input-file : defaults to standard input
+ output-file : defaults to standard output
+TXT
+ exit(1);
+}
+
+sub input_stream ()
+{
+ if( $opt{input} )
+ {
+ my $ifh;
+
+ sysopen( $ifh, $opt{input}, O_RDONLY ) ||
+ die( "$0: Failed to open input file $opt{input}: $!\n" );
+
+ return $ifh;
+ }
+
+ return \*STDIN;
+}
+
+sub underlying_output_stream ()
+{
+ if( $opt{output} )
+ {
+ my $ofh;
+
+ sysopen( $ofh, $opt{output}, O_CREAT|O_EXCL|O_APPEND|O_WRONLY ) ||
+ die( "$0: Failed to open output file $opt{output}: $!\n" );
+
+ return $ofh;
+ }
+
+ return \*STDOUT;
+}
+
+sub output_stream ()
+{
+ my $ofh = underlying_output_stream();
+
+ if( $opt{gzip} )
+ {
+ $ofh = new IO::Compress::Gzip( $ofh, AutoClose => 1, -Level => 9 );
+ }
+
+ return $ofh;
+}
+
+sub formatter ()
+{
+ my $name = $opt{format};
+ my $func = "msgfmt::$name"->UNIVERSAL::can("format");
+
+ return $func || die( "No handler found for format '$name'\n" );
+}
+
+sub static_section ($)
+{
+ my $name = $opt{format};
+ my $sect = shift();
+ my $func = "msgfmt::$name"->UNIVERSAL::can( $sect );
+
+ return $func ? $func->() : "";
+}
+
+# format implementations:
+{
+ package msgfmt::java;
+
+ sub escape { $_[0] =~ s/([:'\\])/\\$1/g; $_[0] }
+ sub format { return join(' = ', $_[0], escape( $_[1] ) ) . "\n" }
+ sub header { "# autogenerated from " . ($opt{input} || '-stdin-') . "\n" }
+}
+
+{
+ package msgfmt::messages; # native netsurf format
+
+ sub format { return join( ":", @_ ) . "\n" }
+ sub header
+ {
+ my $in = $opt{input} || '-stdin-';
+ return <<TXT;
+# This messages file is automatically generated from $in
+# at build-time. Please go and edit that instead of this.\n
+TXT
+ }
+}
+
+{
+ package msgfmt::transifex;
+ use base 'msgfmt::java';
+
+ # transifex has the following quirks:
+ # \ processing is buggy - they re-process every \\ as a \
+ # so \\n, instead or producing literal '\n', is interpreted as \ ^J
+ # Additionally, although the java properties format specifies
+ # that ' should be \ escaped, transifex does not allow/support this:
+ sub escape { $_[0] =~ s/(:|\\(?![abfnrtv]))/\\$1/g; $_[0] }
+ sub format { return join(' = ', $_[0], escape( $_[1] ) ) . "\n" }
+}
+
+########### YAML ###########
+#{
+# package msgfmt::yaml;
+# use YAML qw(Dump Bless);
+# print Dump %data;
+#}
+
+{
+ package msgfmt::android;
+
+ sub header { qq|<?xml version="1.0" encoding="utf-8"?>\n<resources>\n| }
+ sub footer { qq|</resources>| }
+ sub format
+ {
+ use HTML::Entities qw(encode_entities);
+ my $escaped = encode_entities( $_[1], '<>&"' );
+ qq| <string name="$_[0]">$escaped</string>\n|;
+ }
+}