From 820767e417fb63355a904f0ce0761be17f3a03fa Mon Sep 17 00:00:00 2001 From: Vivek Dasmohapatra Date: Wed, 1 May 2013 11:41:25 +0100 Subject: Improve the message split script to allow alternate output formats --- Makefile | 4 +- utils/split-messages.pl | 231 +++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 219 insertions(+), 16 deletions(-) mode change 100755 => 100644 utils/split-messages.pl diff --git a/Makefile b/Makefile index 0ae5b4ab6..e57800b37 100644 --- a/Makefile +++ b/Makefile @@ -605,7 +605,7 @@ clean-builddir: CLEANS += clean-builddir all-program: $(EXETARGET) post-exe - $(call split_install_messages, '[^\.]+', !NetSurf/Resources) + $(call split_install_messages, any, !NetSurf/Resources) .PHONY: testament testament utils/testament.h: @@ -744,7 +744,7 @@ FAT_LANGUAGES=de en fr it nl define split_install_messages $(foreach LANG, $(FAT_LANGUAGES), @echo MSGSPLIT: $(1)/$(LANG) to $(2) $(Q)mkdir -p $(2)/$(LANG)$(3) - $(Q)$(PERL) utils/split-messages.pl $(LANG) $(1) < resources/FatMessages | gzip -9n > $(2)$(3)/$(LANG)/Messages + $(Q)$(PERL) utils/split-messages.pl -l $(LANG) -p $(1) -f messages resources/FatMessages | gzip -9n > $(2)$(3)/$(LANG)/Messages ) endef diff --git a/utils/split-messages.pl b/utils/split-messages.pl old mode 100755 new mode 100644 index 2bbe79a43..08d882210 --- a/utils/split-messages.pl +++ b/utils/split-messages.pl @@ -1,23 +1,226 @@ -#!/usr/bin/perl -w +#!/usr/bin/perl +# +# Copyright 2013 Vivek Dasmohapatra +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# * The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +=head1 + +Filter the NetSurf combined messages (i10n) file according to language +and platform and generate output in a selection of formats for use +both internally within netsurf and externally for translation +services. + +=cut use strict; -die "usage: split-messages < FatMessages > ThinMessages" if ($#ARGV != 1); +use Getopt::Long (); +use Fcntl qw( O_CREAT O_EXCL O_WRONLY O_APPEND O_RDONLY O_WRONLY ); + +use constant GETOPT_OPTS => qw( auto_abbrev no_getopt_compat bundling ); +use constant GETOPT_SPEC => + qw( output|o=s + input|i=s + lang|l=s + plat|platform|p=s + format|fmt|f=s + help|h|? ); + +# default option values: +my %opt = qw( plat any format messages ); + +sub input_stream (); +sub output_stream (); +sub formatter (); +sub header (); +sub usage (); + +sub main () +{ + my $input; + my $output; + my $format; + my $header; + my $opt_ok; + + # option parsing: + Getopt::Long::Configure( GETOPT_OPTS ); + $opt_ok = Getopt::Long::GetOptions( \%opt, GETOPT_SPEC ); + + # allow input and output to be specified as non-option arguments: + if( @ARGV ) { $opt{input } ||= shift( @ARGV ) } + if( @ARGV ) { $opt{output} ||= shift( @ARGV ) } + + # open the appropriate streams and get the formatter and headers: + if( $opt_ok ) + { + $input = input_stream(); + $output = output_stream(); + $format = formatter(); + $header = header(); + } + + # double check the options are sane (and we weren't asked for the help) + if( !$opt_ok || $opt{help} || $opt{lang} !~ /^[a-z]{2}$/ ) + { + usage(); + } + + # we are good to go: + print( $output $header ); + + while (<$input>) + { + /^#/ && next; + /^\s*$/ && next; + # only parsing thinsg that look like message lines: + if( /^([a-z]{2}).([^.]+).([^:]+):(.*)/ ) + { + my( $lang, $plat, $key, $val ) = ( $1, $2, $3, $4 ); + + if( $lang ne $opt{lang} ) { next }; + if( $opt{plat} eq 'any' || + $opt{plat} eq $plat || + 'all' eq $plat ) + { + print( $output $format->( $key, $val ), "\n" ); + } + } + else + { + warn( "Malformed entry: $_" ); + } + } +} + +main(); + +sub usage () +{ + my @fmt = map { s/::$//; $_ } keys(%{$::{'msgfmt::'}}); + print(STDERR <UNIVERSAL::can("format"); + + return $func || die( "No handler found for format '$name'\n" ); +} + +sub header () +{ + my $name = $opt{format}; + my $func = "msgfmt::$name"->UNIVERSAL::can("header"); -my $allprefix = $langname . ".all."; -my $platprefix = $langname . "." . $platname . "."; + return $func ? $func->() : ""; +} -print "# This messages file is automatically generated from FatMessages\n"; -print "# at build-time. Please go and edit that instead of this.\n\n"; +# format implementations: +{ + package msgfmt::java; -foreach () { - if (not /^#/ and not /^\s*$/) { - if (/^$allprefix/ or /^$platprefix/) { - s/^$langname\.(all|$platname)\.//; - print "$_"; - } + # escape characters spec says ' should be escaped here but + # transifex does not recognise it. hence [\\:] and not [\\:'] + sub escape { $_[0] =~ s/([\\:])/\\$1/g; $_[0] } + sub format { return join(' = ', $_[0], escape( $_[1] ) ) } + sub header { "# autogenerated from " . ($opt{input} || '-stdin-') . "\n" } +} + +{ + package msgfmt::messages; + + sub format { return join( ":", @_ ) } + sub header + { + my $in = $opt{input} || '-stdin-'; + return <\n\n"; +# foreach my $lang (sort keys %data) { +# foreach my $plat (sort keys %{$data{$lang}}) { +# foreach my $key (sort keys %{$data{$lang}{$plat}}) { +# my $val = $data{$lang}{$plat}{$key}; +# print "" . encode_entities($val, '<>&"') . "\n"; +# } +# } +# } +# print ""; +#} -- cgit v1.2.3