Lingua-Preferred-0.2.4/0040755000103200010320000000000007767050347012417 5ustar ededLingua-Preferred-0.2.4/MANIFEST0100644000103200010320000000007107612220233013521 0ustar ededChanges Makefile.PL MANIFEST Preferred.pm test.pl README Lingua-Preferred-0.2.4/Makefile.PL0100644000103200010320000000037007646024251014355 0ustar ededuse ExtUtils::MakeMaker; # See lib/ExtUtils/MakeMaker.pm for details of how to influence # the contents of the Makefile that is written. WriteMakefile( 'NAME' => 'Lingua::Preferred', 'VERSION_FROM' => 'Preferred.pm', # finds $VERSION ); Lingua-Preferred-0.2.4/Preferred.pm0100644000103200010320000001575007767050151014671 0ustar ededpackage Lingua::Preferred; use strict; use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); require Exporter; require AutoLoader; # Use Log::TraceMessages if installed. BEGIN { eval { require Log::TraceMessages }; if ($@) { *t = sub {}; *d = sub { '' }; } else { *t = \&Log::TraceMessages::t; *d = \&Log::TraceMessages::d; Log::TraceMessages::check_argv(); } } @ISA = qw(Exporter AutoLoader); @EXPORT = qw(); @EXPORT_OK = qw(which_lang acceptable_lang); $VERSION = '0.2.4'; =pod =head1 NAME Lingua::Preferred - Perl extension to choose a language =head1 SYNOPSIS use Lingua::Preferred qw(which_lang acceptable_lang); my @wanted = qw(en de fr it de_CH); my @available = qw(fr it de); my $which = which_lang(\@wanted, \@available); print "language $which is the best of those available\n"; foreach (qw(en_US fr nl de_DE)) { print "language $_ is acceptable\n" if acceptable_lang(\@wanted, $_); } =head1 DESCRIPTION Often human-readable information is available in more than one language. Which should you use? This module provides a way for the user to specify possible languages in order of preference, and then to pick the best language of those available. Different 'dialects' given by the 'territory' part of the language specifier (such as en, en_GB, and en_US) are also supported. The routine C picks the best language from a list of alternatives. The arguments are: =over =item a reference to a list of preferred languages (first is best). Here, a language is a string like C<'en'> or C<'fr_CA'>. (C<'fr_*'> can also be given - see below.) C<'C'> (named for the Unix 'C' locale) matches any language. =item a reference to non-empty list of available languages. Here, a language can be like C<'en'>, C<'en_CA'>, or C meaning 'unknown'. =back The return code is which language to use. This will always be an element of the available languages list. The cleverness of this module (if you can call it that) comes from inferring implicit language preferences based on the explicit list passed in. For example, if you say that en is acceptable, then en_IE and en_DK will presumably be acceptable too (but not as good as just plain en). If you give your language as en_US, then en is almost as good, with the other dialects of en following soon afterwards. If there is a tie between two choices, as when two dialects of the same language are available and neither is explicitly preferred, or when none of the available languages appears in the userE<39>s list, then the choice appearing earlier in the available list is preferred. Sometimes, the automatic inferring of related dialects is not what you want, because a language dialect may be very different to the 'main' language, for example Swiss German or some forms of English. For this case, the special form 'XX_*' is available. If you dislike Mexican Spanish (as a completely arbitrary example), then C<[ 'es', 'es_*', 'es_MX' ]> would rank this dialect below any other dialect of es (but still acceptable). You donE<39>t have to explicitly list every other dialect of Spanish before es_MX. So for example, supposing C<@avail> contains the languages available: =over =item You know English and prefer US English: $which = which_lang([ 'en_US' ], \@avail); =item You know English and German, German/Germany is preferred: $which = which_lang([ 'en', 'de_DE' ], \@avail); =item You know English and German, but preferably not Swiss German: $which = which_lang([ 'en', 'de', 'de_*', 'de_CH' ], \@avail); Here any dialect of German (eg de_DE, de_AT) is preferable to de_CH. =cut sub which_lang( $$ ) { die 'usage: which_lang(listref of preferred langs, listref of available)' if @_ != 2; my ($pref, $avail) = @_; t '$pref=' . d $pref; t '$avail=' . d $avail; my (%explicit, %implicit); my $pos = 0; # This seems like the best way to make block-nested subroutines my $add_explicit = sub { my $l = shift; die "preferred language $l listed twice" if defined $explicit{$l}; if (delete $implicit{$l}) { t "moved implicit $l to explicit" } else { t "adding explicit $l" } $explicit{$l} = $pos++; }; my $add_implicit = sub { my $l = shift; if (defined $explicit{$l}) { t "$l already explict, not adding implicitly"; } else { if (defined $implicit{$l}) { t "replacing implicit $l" } else { t "adding implicit $l" } $implicit{$l} = $pos++ } }; foreach (@$pref) { $add_explicit->($_); if ($_ eq 'C') { # Doesn't imply anything - C already matches every # possible language. # } elsif (/^[a-z][a-z]$/) { # 'en' implies any dialect of 'en' also $add_implicit->($_ . '_*'); } elsif (/^([a-z][a-z])_([A-Z][A-Z])(?:\@.*)?$/) { # ignore @whatever # 'en_GB' implies 'en', and secondly any other dialect $add_implicit->($1); $add_implicit->($1 . '_*'); } elsif (/^([a-z][a-z])_\*$/) { # 'en_*' doesn't imply anything - it shouldn't be used # except in odd cases. # } else { die "bad language '$_'" } # FIXME support 'English' etc } my %ranking = reverse (%explicit, %implicit); if ($Log::TraceMessages::On) { t 'ranking:'; foreach (sort { $a <=> $b } keys %ranking) { t "$_\t$ranking{$_}"; } } my @langs = @ranking{sort { $a <=> $b } keys %ranking}; my %avail; foreach (@$avail) { next if not defined; $avail{$_}++ && die "available language $_ listed twice"; } while (defined (my $lang = shift @langs)) { if ($lang eq 'C') { # Match first available language. return $avail->[0]; } elsif ($lang =~ /^([a-z][a-z])_\*$/) { # Any dialect of $1 (but not standard). Work through all # of @$avail in order trying to find a match. (So there # is a slight bias towards languages appearing earlier in # @$avail.) # my $base_lang = $1; AVAIL: foreach (@$avail) { next if not defined; if (/^\Q$base_lang\E_/) { # Well, it matched... but maybe this dialect was # explicitly specified with a lower priority. # foreach my $lower_lang (@langs) { next AVAIL if (/^\Q$lower_lang\E$/); } return $_; } } } else { # Exact match return $lang if $avail{$lang}; } } # Couldn't find anything - pick first available language. return $avail->[0]; } =pod Whereas C picks the best language from a list of alternatives, C answers whether a single language is included (explicitly or implicitly) in the list of wanted languages. It adds the implicit dialects in the same way. =cut sub acceptable_lang( $$ ) { die 'usage: acceptable_lang(listref of wanted langs, lang)' if @_ != 2; my ($pref, $l) = @_; t '$pref=' . d $pref; t '$l=' . d $l; # We just need to ignore the dialects and compare the main part. my @pref = @$pref; # copy $l =~ s/_.+//; foreach (@pref) { s/_.+//; return 1 if $l eq $_; } return 0; } =pod =head1 AUTHOR Ed Avis, ed@membled.com =head1 SEE ALSO perl(1). =cut 1; __END__ Lingua-Preferred-0.2.4/README0100644000103200010320000000277207767050261013277 0ustar ededLingua::Preferred Many web browsers let you specify which languages you understand. Then they negotiate with the web server to get documents in the best language possible. This is something similar in Perl. which_lang() takes a list of languages the user understands, such as qw(en es) and a list of those available on the server, such as qw(en fr de), and it returns the language to use. There is some fooling around with picking second-best 'dialects' of a language, for example if the user's language is en_IE and a page is available in en_US. acceptable_lang() takes a list of languages the user understands and a single language, and returns true iff that language is acceptable. Again it assumes that different dialects of a language are mutually comprehensible. Note: I created this module by packaging up some of my own code, but with hindsight I'm not sure it was a good idea. The world does not need another NIH way of doing language selections. It would be better to pick languages by adapting HTTP language negotiation or gettext. So I do not plan further development on this module except for bugfixes. Version 0.2.4: accept language choice 'C' (named after the Unix locale) to mean pick the first available language. * Copying Copyright 2001-2003 Ed Avis. This is free software; you may distribute it under the same terms as perl itself (either under the GNU General Public License, version 2 or at your option any later version); or under the Artistic License. -- Ed Avis, , 2003-12-14 Lingua-Preferred-0.2.4/test.pl0100644000103200010320000001645707767047722013750 0ustar eded#!/usr/bin/perl -w use strict; my ($numtests, $loaded); BEGIN { $numtests = 76; $| = 1; print "1..$numtests\n"; } # FIXME END {print "not ok 1\n" unless $loaded;} use Lingua::Preferred qw(which_lang acceptable_lang); $loaded = 1; print "ok 1\n"; use Data::Dumper; my $tests_done = 1; sub check_which_lang( $$$ ) { my ($want, $avail, $ans) = @_; my $got = Dumper(which_lang($want, $avail)); if ($got ne Dumper($ans)) { warn "wanted: @$want\navailable: @$avail\nexpected: $ans\ngot: $got"; print 'not '; } print 'ok ', ++$tests_done, "\n"; } check_which_lang [ ], [ 'en' ], 'en'; check_which_lang [ ], [ undef ], undef; check_which_lang [ 'fr' ], [ 'en' ], 'en'; check_which_lang [ 'fr' ], [ 'en', 'fr' ], 'fr'; check_which_lang [ 'fr' ], [ 'en', 'fr_FR' ], 'fr_FR'; check_which_lang [ 'fr' ], [ 'en', 'fr_FR', 'fr' ], 'fr'; check_which_lang [ 'fr' ], [ undef ], undef; check_which_lang [ 'fr', 'en' ], [ 'fr' ], 'fr'; check_which_lang [ 'fr', 'en' ], [ 'en' ], 'en'; check_which_lang [ 'fr', 'en' ], [ 'de' ], 'de'; check_which_lang [ 'fr', 'en' ], [ 'de', 'it' ], 'de'; check_which_lang [ 'fr', 'en' ], [ undef ], undef; check_which_lang [ 'en_GB' ], [ 'en' ], 'en'; check_which_lang [ 'en_GB' ], [ 'fr' ], 'fr'; check_which_lang [ 'en_GB' ], [ undef ], undef; check_which_lang [ 'en_GB' ], [ 'en_US' ], 'en_US'; check_which_lang [ 'en_GB' ], [ 'en_US', 'en_IT' ], 'en_US'; check_which_lang [ 'en_GB' ], [ 'en_US', 'en' ], 'en'; check_which_lang [ 'en_GB' ], [ 'en_US', 'en', 'en_GB' ], 'en_GB'; check_which_lang [ 'en', 'en_GB' ], [ 'en_US' ], 'en_US'; check_which_lang [ 'en', 'en_GB' ], [ 'en_IT', 'en_GB' ], 'en_GB'; check_which_lang [ 'en', 'en_GB' ], [ 'en', 'en_GB' ], 'en'; check_which_lang [ 'en_GB', 'en' ], [ 'en', 'en_GB' ], 'en_GB'; check_which_lang [ 'de', 'de_*', 'de_CH' ], [ 'fr' ], 'fr'; check_which_lang [ 'de', 'de_*', 'de_CH' ], [ 'de_CH' ], 'de_CH'; check_which_lang [ 'de', 'de_*', 'de_CH' ], [ 'de_CH', 'de_DE' ], 'de_DE'; check_which_lang [ 'de', 'de_*', 'fr', 'de_CH' ], [ 'de_CH', 'fr' ], 'fr'; # C matches anything, but it need not be first in the list check_which_lang [ 'C', ], [ 'en' ], 'en'; check_which_lang [ 'C', ], [ undef ], undef; check_which_lang [ 'en', 'C', ], [ 'en' ], 'en'; check_which_lang [ 'C', 'en', ], [ 'en' ], 'en'; check_which_lang [ 'C' ], [ 'en', 'fr' ], 'en'; check_which_lang [ 'C', 'fr' ], [ 'en', 'fr' ], 'en'; check_which_lang [ 'fr', 'C' ], [ 'en', 'fr' ], 'fr'; # The following are probably not something you'd actually use check_which_lang [ 'en_*' ], [ 'en_GB', 'fr' ], 'en_GB'; # N.B. en_* implies en_IE, en_CA etc. but not en check_which_lang [ 'en_*' ], [ 'fr', 'en' ], 'fr'; check_which_lang [ 'en_*' ], [ undef ], undef; check_which_lang [ 'de_*', 'de_CH' ], [ 'de_CH', 'de', 'de_DE' ], 'de_DE'; check_which_lang [ 'de', 'fr', 'de_*', 'de_CH' ], [ 'de_CH', 'de_AT', 'fr' ], 'fr'; sub check_acceptable_lang( $$$ ) { my ($want, $l, $ans) = @_; my $got = acceptable_lang($want, $l); if ($got != $ans) { warn "wanted: @$want\nlang: $l\nexpected: $ans\ngot: $got"; print 'not '; } print 'ok ', ++$tests_done, "\n"; } check_acceptable_lang [ ], 'en', 0; check_acceptable_lang [ 'fr' ], 'en', 0; check_acceptable_lang [ 'fr' ], 'en_ZA', 0; check_acceptable_lang [ 'fr' ], 'fr', 1; check_acceptable_lang [ 'fr' ], 'fr_FR', 1; check_acceptable_lang [ 'fr', 'en' ], 'fr', 1; check_acceptable_lang [ 'fr', 'en' ], 'en', 1; check_acceptable_lang [ 'fr', 'en' ], 'de', 0; check_acceptable_lang [ 'fr', 'en' ], 'fr_FR', 1; check_acceptable_lang [ 'fr', 'en' ], 'en_FR', 1; # why not? check_acceptable_lang [ 'fr', 'en' ], 'it_CH', 0; check_acceptable_lang [ 'en_GB' ], 'en', 1; check_acceptable_lang [ 'en_GB' ], 'en_GB', 1; check_acceptable_lang [ 'en_GB' ], 'en_CA', 1; check_acceptable_lang [ 'en_GB' ], 'nl', 0; check_acceptable_lang [ 'en_GB' ], 'nl_NL', 0; check_acceptable_lang [ 'en', 'en_GB' ], 'en', 1; check_acceptable_lang [ 'en', 'en_GB' ], 'en_GB', 1; check_acceptable_lang [ 'en', 'en_GB' ], 'en_CA', 1; check_acceptable_lang [ 'en', 'en_GB' ], 'nl', 0; check_acceptable_lang [ 'en', 'en_GB' ], 'nl_NL', 0; check_acceptable_lang [ 'en_IE', 'en_US' ], 'en', 1; check_acceptable_lang [ 'en_IE', 'en_US' ], 'en_GB', 1; check_acceptable_lang [ 'en_IE', 'en_US' ], 'en_CA', 1; check_acceptable_lang [ 'en_IE', 'en_US' ], 'nl', 0; check_acceptable_lang [ 'en_IE', 'en_US' ], 'nl_NL', 0; check_acceptable_lang [ 'de', 'de_*', 'de_CH' ], 'fr', 0; check_acceptable_lang [ 'de', 'de_*', 'de_CH' ], 'de', 1; check_acceptable_lang [ 'de', 'de_*', 'de_CH' ], 'de_DE', 1; check_acceptable_lang [ 'de', 'de_*', 'de_CH' ], 'de_CH', 1; # The following are probably not something you'd actually use check_acceptable_lang [ 'en_*' ], 'en_GB', 1; check_acceptable_lang [ 'en_*' ], 'it', 0; check_acceptable_lang [ 'en_*' ], 'en', 1; check_acceptable_lang [ 'de', 'fr', 'de_*', 'de_CH' ], 'fr', 1; check_acceptable_lang [ 'de', 'fr', 'de_*', 'de_CH' ], 'nl', 0; check_acceptable_lang [ 'de', 'fr', 'de_*', 'de_CH' ], 'de_CH', 1; if ($tests_done != $numtests) { die "expected to run $numtests tests, but ran $tests_done\n"; } __END__ # Stuff for randomly generating test cases. I didn't really use this. my @l = qw(en en_GB en_US de de_DE de_AT de_CH fr fr_FR fr_CA it it_IT); my @l2 = qw(en_* fr_* de_* it_*); sub randomize(@) { my @r; push @r, splice(@_, (rand @_), 1) while @_; @r; } sub random_prefix(@) { @_[0 .. (rand @_)] } sub random_subset(@) { randomize (random_prefix @_) } for (;;) { my @avail = random_subset @l; my @want = random_subset (@l, @l2); my $which = which_lang(\@want, \@avail); print "which_lang([ qw(@want) ], [ qw(@avail) ]) is $which\n\n"; } Lingua-Preferred-0.2.4/Changes0100644000103200010320000000451207767050347013711 0ustar eded2003-12-14 12:00 ed * Preferred.pm, README: Version 0.2.4. 2003-12-14 11:56 ed * Preferred.pm, test.pl: Accept 'C' to mean pick the first available language. 2003-04-12 15:58 ed * Preferred.pm, README: Version 0.2.3. 2003-04-12 15:54 ed * Makefile.PL, Preferred.pm, test.pl: Use Log::TraceMessages if it's installed, but don't require it. 2003-01-18 09:41 ed * mkdist: Updated for CVS instead of RCS. 2003-01-18 09:38 ed * MANIFEST: Added README to file list. 2003-01-18 09:37 ed * Preferred.pm, README: Bug fix suggested by Juergen Appel to handle language strings with @ in them (stuff after the @ is ignored). Updated version to 0.2.2, and noted in the README that this module is a dead end. 2002-09-24 18:15 ed * Preferred.pm, README: Version 0.2.1. 2002-09-24 08:16 ed * README: Added copyright information. 2002-09-01 14:55 ed * Preferred.pm, README: Updated my email address. 2002-02-01 17:23 ed * Preferred.pm, README, test.pl: Version 0.2: added acceptable_lang() to return a yes or no answer about whether one language is okay. 2002-01-28 17:17 ed * README, Preferred.pm: Version 0.1.2. 2002-01-28 17:17 ed * Preferred.pm: Fixed print to stdout when trace enabled (d'oh!). 2001-11-28 13:27 ed * Preferred.pm: Version 0.1.1. 2001-11-28 13:27 ed * README: Updated for version 0.1.1. 2001-11-28 13:25 ed * Makefile.PL: Added Log::TraceMessages as a prerequisite (spotted by cpan-testers). 2001-02-21 16:36 ed * mkdist: Remove Makefile.old for tidiness. I suppose I should just get things out of the RCS directory as needed. 2001-02-21 16:35 ed * mkdist: Remove pm_to_blib also - it screws things up if it's lying around 2001-02-21 16:30 ed * README: Initial blurb 2001-02-21 16:25 ed * README: Initial revision 2001-02-19 14:27 ed * test.pl: Revised test case for en_* to reflect new semantics 2001-02-19 14:26 ed * Preferred.pm: Change semantics of en_* - it no longer implies 'en' 2001-02-16 13:39 ed * Preferred.pm: Fix synopsis 2001-02-16 13:38 ed * test.pl: Fix number-of-tests printing; add warnings and strict 2001-02-16 12:37 ed * test.pl: Wrote a few test cases 2001-02-16 12:37 ed * Preferred.pm: First working version 2001-02-14 11:41 ed * MANIFEST, Makefile.PL, Preferred.pm, test.pl, mkdist: Initial revision