Encode-Arabic-1.9/0000755001432400135600000000000011144136727012454 5ustar smrzufalEncode-Arabic-1.9/t/0000755001432400135600000000000011144136727012717 5ustar smrzufalEncode-Arabic-1.9/t/Encode-Arabic-ArabTeX.t0000444001432400135600000000347211144136727016750 0ustar smrzufal######################### use Test::More tests => 10; BEGIN { use_ok 'Encode::Arabic::ArabTeX', ':xml'; } ######################### # Insert your test code below, the Test::More module is use()ed here so read # its man page ( perldoc Test::More ) for help writing this test script. can_ok 'Encode::Arabic::ArabTeX', qw 'encode decode encoder decoder'; ok defined &encode, "import Encode's encode() function"; ok defined &decode, "import Encode's decode() function"; $Encode::Mapper::options{'Encode::Arabic::ArabTeX'}{'join'} = undef; my $encoder = Encode::Arabic::ArabTeX->encoder(); my $decoder = Encode::Arabic::ArabTeX->decoder(); ok defined $Encode::Arabic::ArabTeX::encoder, 'encoder defined'; ok defined $Encode::Arabic::ArabTeX::decoder, 'decoder defined'; my $utf = "\x{0627}\x{0650}\x{0642}\x{0652}\x{0631}\x{064E}\x{0623}\x{0652} " . "\x{0647}\x{0670}\x{0630}\x{064E}\x{0627} " . "\x{0671}\x{0644}\x{0646}\x{0651}\x{064E}\x{0635}\x{0651}\x{064E} " . "\x{0628}\x{0650}\x{0671}\x{0646}\x{0652}\x{062A}\x{0650}\x{0628}\x{064E}\x{0627}\x{0647}\x{064D}. " . "\x{0643}\x{064E}\x{064A}\x{0652}\x{0641}\x{064E} " . "\x{0671}\x{0644}\x{0652}\x{062D}\x{064E}\x{0627}\x{0644}\x{064F}\x{061F}"; my $tex = "iqra' h_a_dA an-na.s.sa bi-intibAhiN. kayfa al-.hAlu?"; my $encode = encode "arabtex", $utf; my $decode = decode "arabtex", $tex; TODO: { local $TODO = 'Non-simple mapping'; is $encode, $tex, '$encode is $tex'; # is $decode, $utf, '$decode is $utf'; is $encode, (encode "arabtex", $decode), 'encode(..., $decode) is fine'; # is $decode, (decode "arabtex", $encode), 'decode(..., $encode) is fine'; } ok ! Encode::is_utf8($encode), "from Perl's internal utf8: " . $encode; ok Encode::is_utf8($decode), "into Perl's internal utf8: " . encode 'utf8', $decode; Encode-Arabic-1.9/t/Encode-Arabic-ArabTeX-ZDMG.t0000444001432400135600000000337111144136727017505 0ustar smrzufal######################### use Test::More tests => 10; BEGIN { use_ok 'Encode::Arabic::ArabTeX::ZDMG', ':xml'; } ######################### # Insert your test code below, the Test::More module is use()ed here so read # its man page ( perldoc Test::More ) for help writing this test script. can_ok 'Encode::Arabic::ArabTeX::ZDMG', qw 'encode decode encoder decoder'; ok defined &encode, "import Encode's encode() function"; ok defined &decode, "import Encode's decode() function"; $Encode::Mapper::options{'Encode::Arabic::ArabTeX::ZDMG'}{'join'} = undef; my $encoder = Encode::Arabic::ArabTeX::ZDMG->encoder(); my $decoder = Encode::Arabic::ArabTeX::ZDMG->decoder(); ok defined $Encode::Arabic::ArabTeX::ZDMG::encoder, 'encoder defined'; ok defined $Encode::Arabic::ArabTeX::ZDMG::decoder, 'decoder defined'; my $utf = decode "utf8", "\x49\x71\x72\x61\xCA\xBE\x20" . "\x68\xC4\x81\xE1\xB8\x8F\xC4\x81\x20" . "\xCA\xBC\x6E\x2D\x6E\x61\xE1\xB9\xA3\xE1\xB9\xA3\x61\x20" . "\x62\x69\x2D\xCA\xBC\x6E\x74\x69\x62\xC4\x81\x68\x69\x6E\x2E\x20" . "\x4B\x61\x79\x66\x61\x20" . "\xCA\xBC\x6C\x2D\xE1\xB8\xA5\xC4\x81\x6C\x75\x3F"; my $tex = "\\cap iqra' h_a_dA an-na.s.sa bi-intibAhiN. \\cap kayfa al-.hAlu?"; my $encode = encode "arabtex-zdmg", $utf; my $decode = decode "arabtex-zdmg", $tex; TODO: { local $TODO = 'Non-simple mapping'; is $encode, $tex, '$encode is $tex'; # is $decode, $utf, '$decode is $utf'; is $encode, (encode "arabtex-zdmg", $decode), 'encode(..., $decode) is fine'; # is $decode, (decode "arabtex-zdmg", $encode), 'decode(..., $encode) is fine'; } ok ! Encode::is_utf8($encode), "from Perl's internal utf8: " . $encode; ok Encode::is_utf8($decode), "into Perl's internal utf8: " . encode 'utf8', $decode; Encode-Arabic-1.9/t/Encode-Arabic-ArabTeX-ZDMG-RE.t0000444001432400135600000000044411144136727020007 0ustar smrzufal######################### use Test::More tests => 1; BEGIN { use_ok 'Encode::Arabic::ArabTeX::ZDMG::RE'; } ######################### # Insert your test code below, the Test::More module is use()ed here so read # its man page ( perldoc Test::More ) for help writing this test script. Encode-Arabic-1.9/t/pod.t0000444001432400135600000000030511144136727013662 0ustar smrzufal#!perl -T use strict; use warnings; use Test::More; my $version = 1.22; eval "use Test::Pod $version"; plan skip_all => "Test::Pod $version required for testing POD" if $@; all_pod_files_ok(); Encode-Arabic-1.9/t/Encode-Arabic-Buckwalter.t0000444001432400135600000000267611144136727017572 0ustar smrzufal######################### use Test::More tests => 7; BEGIN { use_ok 'Encode::Arabic::Buckwalter', ':xml'; } ######################### # Insert your test code below, the Test::More module is use()ed here so read # its man page ( perldoc Test::More ) for help writing this test script. my $utf = "\x{0627}\x{0650}\x{0642}\x{0652}\x{0631}\x{064E}\x{0623}\x{0652} " . "\x{0647}\x{0670}\x{0630}\x{064E}\x{0627} " . "\x{0671}\x{0644}\x{0646}\x{0651}\x{064E}\x{0635}\x{0651}\x{064E} " . "\x{0628}\x{0650}\x{0671}\x{0646}\x{0652}\x{062A}\x{0650}\x{0628}\x{064E}\x{0627}\x{0647}\x{064D}. " . "\x{0643}\x{064E}\x{064A}\x{0652}\x{0641}\x{064E} " . "\x{0671}\x{0644}\x{0652}\x{062D}\x{064E}\x{0627}\x{0644}\x{064F}\x{061F}"; my $tim = "AiqoraOo h`*aA {ln~aS~a bi{notibaAhK. kayofa {loHaAlu?"; my $encode = encode "buckwalter", $utf; my $decode = decode "buckwalter", $tim; is $encode, $tim, '$encode is $tim'; is $decode, $utf, '$decode is $utf'; is $encode, (encode "buckwalter", $decode), 'encode(..., $decode) is fine'; is $decode, (decode "buckwalter", $encode), 'decode(..., $encode) is fine'; $using_xml = eval q { use Encode::Arabic::Buckwalter ':xml'; decode 'buckwalter', 'OWI' }; $classical = eval q { use Encode::Arabic::Buckwalter; decode 'buckwalter', '>&<' }; is $classical, $using_xml, '$classical eq $using_xml'; is $classical, "\x{0623}\x{0624}\x{0625}", '$classical eq "\x{0623}\x{0624}\x{0625}"'; Encode-Arabic-1.9/t/Encode-Arabic.t0000444001432400135600000000236711144136727015466 0ustar smrzufal######################### use Test::More tests => 16; BEGIN { use_ok 'Encode::Arabic'; } BEGIN { use_ok 'Encode::Arabic', 'from_to'; } ######################### # Insert your test code below, the Test::More module is use()ed here so read # its man page ( perldoc Test::More ) for help writing this test script. ok defined &encode, "import Encode's encode() function"; ok defined &decode, "import Encode's decode() function"; ok defined &encode_utf8, "import Encode's encode_utf8() function"; ok defined &decode_utf8, "import Encode's decode_utf8() function"; ok defined &encodings, "import Encode's encodings() function"; ok defined &find_encoding, "import Encode's find_encoding() function"; ok defined &from_to, "import Encode's from_to() function"; ok ref (find_encoding 'arabtex'), 'ArabTeX known with its alias'; ok ref (find_encoding 'arabtex-re'), 'ArabTeX-RE known, too'; ok ref (find_encoding 'arabtex-verbatim'), 'ArabTeX-Verbatim known with its alias'; ok ref (find_encoding 'arabtex-zdmg'), 'ArabTeX-ZDMG known with its alias'; ok ref (find_encoding 'arabtex-zdmg-re'), 'ArabTeX-ZDMG-RE known, too'; ok ref (find_encoding 'buckwalter'), 'Buckwalter known with its alias'; ok ref (find_encoding 'parkinson'), 'Parkinson known with its alias'; Encode-Arabic-1.9/t/Encode-Arabic-Parkinson.t0000444001432400135600000000217211144136727017422 0ustar smrzufal######################### use Test::More tests => 5; BEGIN { use_ok 'Encode::Arabic::Parkinson', ':xml'; } ######################### # Insert your test code below, the Test::More module is use()ed here so read # its man page ( perldoc Test::More ) for help writing this test script. my $utf = "\x{0627}\x{0650}\x{0642}\x{0652}\x{0631}\x{064E}\x{0623}\x{0652} " . "\x{0647}\x{0670}\x{0630}\x{064E}\x{0627} " . "\x{0671}\x{0644}\x{0646}\x{0651}\x{064E}\x{0635}\x{0651}\x{064E} " . "\x{0628}\x{0650}\x{0671}\x{0646}\x{0652}\x{062A}\x{0650}\x{0628}\x{064E}\x{0627}\x{0647}\x{064D}. " . "\x{0643}\x{064E}\x{064A}\x{0652}\x{0641}\x{064E} " . "\x{0671}\x{0644}\x{0652}\x{062D}\x{064E}\x{0627}\x{0644}\x{064F}\x{061F}"; my $dil = "AiqoraLo hRvaA Oln~aS~a biOnotibaAhI. kayofa OloHaAlu?"; my $encode = encode "parkinson", $utf; my $decode = decode "parkinson", $dil; is $encode, $dil, '$encode is $dil'; is $decode, $utf, '$decode is $utf'; is $encode, (encode "parkinson", $decode), 'encode(..., $decode) is fine'; is $decode, (decode "parkinson", $encode), 'decode(..., $encode) is fine'; Encode-Arabic-1.9/t/Encode-Arabic-ArabTeX-RE.t0000444001432400135600000000043611144136727017251 0ustar smrzufal######################### use Test::More tests => 1; BEGIN { use_ok 'Encode::Arabic::ArabTeX::RE'; } ######################### # Insert your test code below, the Test::More module is use()ed here so read # its man page ( perldoc Test::More ) for help writing this test script. Encode-Arabic-1.9/t/boilerplate.t0000444001432400135600000000324311144136727015406 0ustar smrzufal#!perl -T use strict; use warnings; use Test::More tests => 11; sub not_in_file_ok { my ($filename, %regex) = @_; open my $fh, "<", $filename or die "couldn't open $filename for reading: $!"; my %violated; while (my $line = <$fh>) { while (my ($desc, $regex) = each %regex) { if ($line =~ $regex) { push @{$violated{$desc}||=[]}, $.; } } } if (%violated) { fail("$filename contains boilerplate text"); diag "$_ appears on lines @{$violated{$_}}" for keys %violated; } else { pass("$filename contains no boilerplate text"); } } not_in_file_ok(README => "The README is used..." => qr/The README is used/, "'version information here'" => qr/to provide version information/, ); not_in_file_ok(Changes => "placeholder date/time" => qr(Date/time) ); sub module_boilerplate_ok { my ($module) = @_; not_in_file_ok($module => 'the great new $MODULENAME' => qr/ - The great new /, 'boilerplate description' => qr/Quick summary of what the module/, 'stub function definition' => qr/function[12]/, ); } module_boilerplate_ok('lib/Encode/Arabic.pm'); module_boilerplate_ok('lib/Encode/Arabic/ArabTeX.pm'); module_boilerplate_ok('lib/Encode/Arabic/ArabTeX/RE.pm'); module_boilerplate_ok('lib/Encode/Arabic/ArabTeX/Verbatim.pm'); module_boilerplate_ok('lib/Encode/Arabic/ArabTeX/ZDMG.pm'); module_boilerplate_ok('lib/Encode/Arabic/ArabTeX/ZDMG/RE.pm'); module_boilerplate_ok('lib/Encode/Arabic/Buckwalter.pm'); module_boilerplate_ok('lib/Encode/Arabic/Parkinson.pm'); module_boilerplate_ok('lib/Encode/Mapper.pm'); Encode-Arabic-1.9/t/Encode-Arabic-ArabTeX-Verbatim.t0000444001432400135600000000362711144136727020521 0ustar smrzufal######################### use Test::More tests => 10; BEGIN { use_ok 'Encode::Arabic::ArabTeX::Verbatim', ':xml'; } ######################### # Insert your test code below, the Test::More module is use()ed here so read # its man page ( perldoc Test::More ) for help writing this test script. can_ok 'Encode::Arabic::ArabTeX::Verbatim', qw 'encode decode encoder decoder'; ok defined &encode, "import Encode's encode() function"; ok defined &decode, "import Encode's decode() function"; $Encode::Mapper::options{'Encode::Arabic::ArabTeX::Verbatim'}{'join'} = undef; my $encoder = Encode::Arabic::ArabTeX::Verbatim->encoder(); my $decoder = Encode::Arabic::ArabTeX::Verbatim->decoder(); ok defined $Encode::Arabic::ArabTeX::Verbatim::encoder, 'encoder defined'; ok defined $Encode::Arabic::ArabTeX::Verbatim::decoder, 'decoder defined'; my $utf = "\x{0627}\x{0650}\x{0642}\x{0652}\x{0631}\x{064E}\x{0623}\x{0652} " . "\x{0647}\x{0670}\x{0630}\x{064E}\x{0627} " . "\x{0671}\x{0644}\x{0646}\x{0651}\x{064E}\x{0635}\x{0651}\x{064E} " . "\x{0628}\x{0650}\x{0671}\x{0646}\x{0652}\x{062A}\x{0650}\x{0628}\x{064E}\x{0627}\x{0647}\x{064D}. " . "\x{0643}\x{064E}\x{064A}\x{0652}\x{0641}\x{064E} " . "\x{0671}\x{0644}\x{0652}\x{062D}\x{064E}\x{0627}\x{0644}\x{064F}\x{061F}"; my $tex = "iqra'a h_a_dA al-n||a.s||a bi-intibAhiN. kayfa al-.hAlu?"; my $encode = encode "arabtex-verb", $utf; my $decode = decode "arabtex-verb", $tex; TODO: { local $TODO = 'Non-simple mapping'; is $encode, $tex, '$encode is $tex'; # is $decode, $utf, '$decode is $utf'; is $encode, (encode "arabtex-verb", $decode), 'encode(..., $decode) is fine'; # is $decode, (decode "arabtex-verb", $encode), 'decode(..., $encode) is fine'; } ok ! Encode::is_utf8($encode), "from Perl's internal utf8: " . $encode; ok Encode::is_utf8($decode), "into Perl's internal utf8: " . encode 'utf8', $decode; Encode-Arabic-1.9/t/00-load.t0000444001432400135600000000075711144136727014247 0ustar smrzufal#!perl -T use Test::More tests => 9; BEGIN { use_ok( 'Encode::Mapper' ); use_ok( 'Encode::Arabic' ); use_ok( 'Encode::Arabic::ArabTeX' ); use_ok( 'Encode::Arabic::ArabTeX::RE' ); use_ok( 'Encode::Arabic::ArabTeX::Verbatim' ); use_ok( 'Encode::Arabic::ArabTeX::ZDMG' ); use_ok( 'Encode::Arabic::ArabTeX::ZDMG::RE' ); use_ok( 'Encode::Arabic::Buckwalter' ); use_ok( 'Encode::Arabic::Parkinson' ); } diag( "Testing Encode::Arabic $Encode::Arabic::VERSION" ); Encode-Arabic-1.9/t/Encode-Mapper-1.t0000444001432400135600000000530511144136727015662 0ustar smrzufal######################### use Test::More tests => 9; BEGIN { use_ok 'Encode::Mapper'; } require_ok 'Encode'; require_ok 'Data::Dumper'; ######################### # Insert your test code below, the Test::More module is use()ed here so read # its man page ( perldoc Test::More ) for help writing this test script. can_ok 'Encode::Mapper', qw 'new compile process recover compute dumper', qw 'encode decode', qw 'options import'; my $mapper = Encode::Mapper->new ( map { (chr $_) x 2, } 0x00..0xFF ); my @tokens = ( "\x{060C}", ",", "\x{0637}", ".t", "\x{061B}", ";", "\x{0638}", ".z", "\x{061F}", "?", "\x{0639}", "`", "\x{0621}", "'", "\x{063A}", ".g", "\x{0622}", "'A", "\x{0640}", "--", "\x{0623}", "'", "\x{0641}", "f", "\x{0624}", "'", "\x{0642}", "q", "\x{0625}", "'", "\x{0643}", "k", "\x{0626}", "'", "\x{0644}", "l", "\x{0627}", "A", "\x{0645}", "m", "\x{0628}", "b", "\x{0646}", "n", "\x{0629}", "T", "\x{0647}", "h", "\x{062A}", "t", "\x{0648}", "w", "\x{062B}", "_t", "\x{0649}", "Y", "\x{062C}", "^g", "\x{064A}", "y", "\x{062D}", ".h", "\x{064B}", "aN", "\x{062E}", "_h", "\x{064C}", "uN", "\x{062F}", "d", "\x{064D}", "iN", "\x{0630}", "_d", "\x{064E}", "a", "\x{0631}", "r", "\x{064F}", "u", "\x{0632}", "z", "\x{0650}", "i", "\x{0633}", "s", "\x{0651}", "\\shadda{}", "\x{0634}", "^s", "\x{0652}", "\\sukuun{}", "\x{0635}", ".s", "\x{0670}", "_a", "\x{0636}", ".d", "\x{0671}", "A", ); push @tokens, qw 'ě š č ř ž ý á í é = ů ú'; ok defined $mapper, "use compile() as the constructor"; ok $mapper->isa('Encode::Mapper'), "constructs the right class"; is Encode::decode_utf8(join "", map { UNIVERSAL::isa($_, 'CODE') ? $_->() : $_ } $mapper->process(@tokens), $mapper->recover()), join("", map { Encode::is_utf8($_) ? $_ : Encode::decode_utf8($_) } @tokens), "identity mapping, bytes oriented"; is_deeply [ my @x = split //, "\x{c4}\x{80}"], [ split //, Encode::encode("utf8", "\x{0100}") ], 'unicodeness test'; is_deeply [ map { ord } @x ], [ 0xC4, 0x80 ], 'byte comparison'; Encode-Arabic-1.9/t/Encode-Mapper-2.t0000444001432400135600000000614211144136727015663 0ustar smrzufal######################### use Test::More tests => 4; ######################### # Insert your test code below, the Test::More module is use()ed here so read # its man page ( perldoc Test::More ) for help writing this test script. use Encode::Mapper; ############################################# Enjoy the ride ^^ use Encode::Mapper ':others', ':silent'; # syntactic sugar for compiler options .. Encode::Mapper->options ( # .. equivalent, see more in the text 'others' => sub { shift }, 'silent' => 1, ); Encode::Mapper->options ( # .. resetting, but not to use 'use' !!! 'others' => undef, 'silent' => 0 ); ## Types of rules for mapping the data and controlling the engine's configuration ##### @rules = ( 'x', 'y', # single 'x' be 'y', unless greediness prefers .. 'xx', 'Y', # .. double 'x' be 'Y' or other rules 'uc(x)x', sub { 'sorry ;)' }, # if 'x' follows 'uc(x)', be sorry, else .. 'uc(x)', [ '', 'X' ], # .. alias this *engine-initial* string 'xuc(x)', [ '', 'xX' ], # likewise, alias for the 'x' prefix 'Xxx', [ sub { $i++; '' }, 'X' ], # count the still married 'x' ); ## Constructors of the engine, i.e. one Encode::Mapper instance ####################### $mapper_A = Encode::Mapper->compile( @rules ); # engine constructor $mapper_B = Encode::Mapper->new( @rules ); # equivalent alias is_deeply $mapper_A, $mapper_B, 'constructor identity'; ## Elementary performance of the engine ############################################### @source = ( 'x', 'xx', 'xxuc(x)', 'xxx', '', 'xx' ); # distribution of the data .. $source = join '', @source; # .. is ignored in this sense @result_A = ($mapper_A->process(@source), $mapper_A->recover()); # the mapping procedure @result_B = ($mapper_B->process($source), $mapper_B->recover()); # completely equivalent is_deeply \@result_A, \@result_B, 'performance identity'; $result = join '', map { ref $_ eq 'CODE' ? $_->() : $_ } @result_A; # maps 'xxxxxuc(x)xxxxx' into ( 'Y', 'Y', '', 'y', CODE(...), CODE(...), 'y' ), .. # .. then converts it into 'YYyy', setting $i == 2 is $result, 'YYyy', 'expected output'; is $i, 2, 'expected side effect'; #@follow = $mapper->compute(@source); # follow the engine's computation over @source #$dumper = $mapper->dumper(); # returns the engine as a Data::Dumper object ## Module's higher API implemented for convenience #################################### #$encoder = [ $mapper, Encode::Mapper->compile( ... ), ... ]; # reference to mappers #$result = Encode::Mapper->encode($source, $encoder, 'utf8'); # encode down to 'utf8' #$decoder = [ $mapper, Encode::Mapper->compile( ... ), ... ]; # reference to mappers #$result = Encode::Mapper->decode($source, $decoder, 'utf8'); # decode up from 'utf8' Encode-Arabic-1.9/t/Encode-Mapper-3.t0000444001432400135600000000533111144136727015663 0ustar smrzufal######################### use Test::More tests => 4; ######################### # Insert your test code below, the Test::More module is use()ed here so read # its man page ( perldoc Test::More ) for help writing this test script. use Encode::Mapper; ############################################# Enjoy the ride ^^ use Encode::Mapper ':others', ':silent'; ## Types of rules for mapping the data and controlling the engine's configuration ##### @rules = ( 'x', 'y', # single 'x' be 'y', unless greediness prefers .. 'xx', 'Y', # .. double 'x' be 'Y' or other rules 'uc(x)x', sub { 'sorry ;)' }, # if 'x' follows 'uc(x)', be sorry, else .. 'uc(x)', [ '', 'X' ], # .. alias this *engine-initial* string 'xuc(x)', [ '', 'xX' ], # likewise, alias for the 'x' prefix 'Xxx', [ sub { $i++; '' }, 'X' ], # count the still married 'x' ); ## Constructors of the engine, i.e. one Encode::Mapper instance ####################### $mapper_A = Encode::Mapper->compile( @rules ); # engine constructor Encode::Mapper->options('others' => undef, 'silent' => undef, 'complement' => ['x','y','x','z']); $mapper_B = Encode::Mapper->new( ['others' => sub { shift }, 'silent' => 1], @rules ); is_deeply $mapper_A, $mapper_B, 'constructor identity'; ## Elementary performance of the engine ############################################### @source = ( 'x', 'xAx', 'xBxuc(x)', 'xxx', '', 'xxC' ); # distribution of the data .. $source = join '', @source; # .. is ignored in this sense @result_A = ($mapper_A->process(@source), $mapper_A->recover()); # the mapping procedure @result_B = ($mapper_B->process($source), $mapper_B->recover()); # completely equivalent is_deeply \@result_A, \@result_B, 'performance identity'; $result = join '', map { ref $_ eq 'CODE' ? $_->() : $_ } @result_A; is $result, 'YAYByXyC', 'expected output'; is $i, 2, 'expected side effect'; #@follow = $mapper->compute(@source); # follow the engine's computation over @source #$dumper = $mapper->dumper(); # returns the engine as a Data::Dumper object ## Module's higher API implemented for convenience #################################### #$encoder = [ $mapper, Encode::Mapper->compile( ... ), ... ]; # reference to mappers #$result = Encode::Mapper->encode($source, $encoder, 'utf8'); # encode down to 'utf8' #$decoder = [ $mapper, Encode::Mapper->compile( ... ), ... ]; # reference to mappers #$result = Encode::Mapper->decode($source, $decoder, 'utf8'); # decode up from 'utf8' Encode-Arabic-1.9/t/Encode-Mapper-4.t0000444001432400135600000000232511144136727015664 0ustar smrzufal######################### use Test::More tests => 1; require_ok 'Encode::Mapper'; __END__ ######################### # Insert your test code below, the Test::More module is use()ed here so read # its man page ( perldoc Test::More ) for help writing this test script. use Data::Dump 'dump'; # pretty data printing is below $Encode::Mapper::options{'ByForce'} = { qw ':others - silent errors' }; package ByMethod; # import called at compile time # no warnings, 'silent' is true Encode::Mapper->options('complement' => [ 'X', 'Y' ], 'others' => 'X'); use Encode::Mapper 'silent' => 299_792_458; package main; # import called at compile time # 'non-existent' may exist once print dump %Encode::Mapper::options; use Encode::Mapper ':others', ':silent', 'non-existent', 'one'; # ( # "ByMethod", # { complement => ["X", "Y"], others => "X", silent => 299_792_458 }, # "ByForce", # { ":others" => "-", silent => "errors" }, # "main", # { "non-existent" => "one", others => sub { "???" }, silent => 1 }, # ) Encode-Arabic-1.9/t/pod-coverage.t0000444001432400135600000000071611144136727015461 0ustar smrzufaluse strict; use warnings; use Test::More; foreach my $module ( [ 'Test::Pod::Coverage' => 1.08 ], [ 'Pod::Coverage' => 0.18 ] ) { eval "use @{$module}"; plan skip_all => "@{$module} required for testing POD coverage" if $@; } my @module = all_modules('lib/Encode'); plan tests => scalar @module; foreach my $module (@module) { pod_coverage_ok($module, { 'trustme' => [ qr/(?:[cm]ode|options|verify|whisper)/ ] }); } Encode-Arabic-1.9/bin/0000755001432400135600000000000011144136727013224 5ustar smrzufalEncode-Arabic-1.9/bin/encode.pl0000444001432400135600000000475011144136727015022 0ustar smrzufal#! perl -w our $VERSION = do { q $Revision: 550 $ =~ /(\d+)/; sprintf "%4.2f", $1 / 100 }; use Encode::Arabic::Buckwalter ':xml'; use Encode::Arabic; use Getopt::Std; $Getopt::Std::STANDARD_HELP_VERSION = 1; $options = { 'p' => '', 's' => '' }; getopts('p:s:v', $options); die $Encode::Arabic::VERSION . "\n" if exists $options->{'v'}; $e = shift @ARGV; while (<>) { print encode $e, decode "utf8", $options->{'p'} . $_ . $options->{'s'}; } __END__ =head1 NAME encode - Filter script mimicking the encode function =head1 REVISION $Revision: 550 $ $Date: 2008-05-06 16:22:13 +0200 (Tue, 06 May 2008) $ =head1 SYNOPSIS Examples of command-line invocation: $ decode ArabTeX < decode.d | encode Buckwalter > encode.d $ decode MacArabic < data.MacArabic > data.UTF8 $ encode WinArabic < data.UTF8 > data.WinArabic The core of the implementation: getopts('p:s:v', $options); $e = shift @ARGV; while (<>) { print encode $e, decode "utf8", $options->{'p'} . $_ . $options->{'s'}; } =head1 DESCRIPTION The L library provides a unified interface for converting strings from different encodings into a common representation, and vice versa. The L and L programs mimick the fuction calls to the C and C methods, respectively. For the list of supported encoding schemes, please refer to L and the source files of the programs. The naming of encodings is case-insensitive. =head1 OPTIONS encode [OPTIONS] encoding -v --version show program's version --help show usage information -p text --prefix=text prefix input with text -s text --suffix=text suffix input with text =head1 SEE ALSO Encode::Arabic Online Interface L Encode Arabic Project L ElixirFM Project L L, L, L =head1 AUTHOR Otakar Smrz, L eval { 'E' . ( join '.', qw 'otakar smrz' ) . "\x40" . ( join '.', qw 'mff cuni cz' ) . 'E' } Perl is also designed to make the easy jobs not that easy ;) =head1 COPYRIGHT AND LICENSE Copyright 2003-2008 by Otakar Smrz This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Encode-Arabic-1.9/bin/decode.pl0000444001432400135600000000475011144136727015010 0ustar smrzufal#! perl -w our $VERSION = do { q $Revision: 550 $ =~ /(\d+)/; sprintf "%4.2f", $1 / 100 }; use Encode::Arabic::Buckwalter ':xml'; use Encode::Arabic; use Getopt::Std; $Getopt::Std::STANDARD_HELP_VERSION = 1; $options = { 'p' => '', 's' => '' }; getopts('p:s:v', $options); die $Encode::Arabic::VERSION . "\n" if exists $options->{'v'}; $e = shift @ARGV; while (<>) { print encode "utf8", decode $e, $options->{'p'} . $_ . $options->{'s'}; } __END__ =head1 NAME decode - Filter script mimicking the decode function =head1 REVISION $Revision: 550 $ $Date: 2008-05-06 16:22:13 +0200 (Tue, 06 May 2008) $ =head1 SYNOPSIS Examples of command-line invocation: $ decode ArabTeX < decode.d | encode Buckwalter > encode.d $ decode MacArabic < data.MacArabic > data.UTF8 $ encode WinArabic < data.UTF8 > data.WinArabic The core of the implementation: getopts('p:s:v', $options); $e = shift @ARGV; while (<>) { print encode "utf8", decode $e, $options->{'p'} . $_ . $options->{'s'}; } =head1 DESCRIPTION The L library provides a unified interface for converting strings from different encodings into a common representation, and vice versa. The L and L programs mimick the fuction calls to the C and C methods, respectively. For the list of supported encoding schemes, please refer to L and the source files of the programs. The naming of encodings is case-insensitive. =head1 OPTIONS decode [OPTIONS] encoding -v --version show program's version --help show usage information -p text --prefix=text prefix input with text -s text --suffix=text suffix input with text =head1 SEE ALSO Encode::Arabic Online Interface L Encode Arabic Project L ElixirFM Project L L, L, L =head1 AUTHOR Otakar Smrz, L eval { 'E' . ( join '.', qw 'otakar smrz' ) . "\x40" . ( join '.', qw 'mff cuni cz' ) . 'E' } Perl is also designed to make the easy jobs not that easy ;) =head1 COPYRIGHT AND LICENSE Copyright 2003-2008 by Otakar Smrz This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Encode-Arabic-1.9/lib/0000755001432400135600000000000011144136727013222 5ustar smrzufalEncode-Arabic-1.9/lib/Encode/0000755001432400135600000000000011144136727014417 5ustar smrzufalEncode-Arabic-1.9/lib/Encode/Arabic/0000755001432400135600000000000011144136727015600 5ustar smrzufalEncode-Arabic-1.9/lib/Encode/Arabic/ArabTeX/0000755001432400135600000000000011144136727017066 5ustar smrzufalEncode-Arabic-1.9/lib/Encode/Arabic/ArabTeX/ZDMG/0000755001432400135600000000000011144136727017627 5ustar smrzufalEncode-Arabic-1.9/lib/Encode/Arabic/ArabTeX/ZDMG/RE.pm0000444001432400135600000000636211144136727020500 0ustar smrzufal# ################################################################### Otakar Smrz, 2003/01/23 # # Encoding of Arabic: ArabTeX Notation by Klaus Lagally ##################################### # $Id: RE.pm 143 2006-11-15 01:16:57Z smrz $ package Encode::Arabic::ArabTeX::ZDMG::RE; use 5.008; use strict; use warnings; our $VERSION = do { q $Revision: 143 $ =~ /(\d+)/; sprintf "%4.2f", $1 / 100 }; sub import { # perform import as if Encode were used one level before this module require Encode; Encode->export_to_level(1, @_); } use Encode::Encoding; use base 'Encode::Encoding'; __PACKAGE__->Define('ZDMG-RE', 'ArabTeX-ZDMG-RE'); our (%encode_used, %decode_used, @shams, @qamar); sub encode ($$;$$) { my (undef, $text, $check, $mode) = @_; $_[1] = '' if $check; # this is what in-place edit needs require Encode; Encode::_utf8_off($text); return $text; } sub decode ($$;$) { my (undef, $text, $check) = @_; $_[1] = '' if $check; # this is what in-place edit needs for ($text) { s/NY/n/g; s/UA/u\x{0304}/g; s/WA/w/g; s/_a/a\x{0304}/g; s/N/n/g; s/Y/a\x{0304}/g; s/T/t/g; #s/y/j/g; s/\\cap\s+([\._\^]?)([a-zAIU])/$1\*$2/g; s/\\cap\s+(['`])([a-zAIUEO])/\*$1\*$2/g; s/\.(\*?[hsdtz])/$1\x{0323}/g; s/\.(\*?g)/$1\x{0307}/g; s/_(\*?[td])/$1\x{0331}/g; s/_(\*?)h/$1\x{032E}/g; #s/_(\*?)h/$1ch/g; s/\^(\*?[gs])/$1\x{030C}/g; #s/\^(\*?s)/\\v{$1}/g; #s/\^(\*?)g/$1d\\v{z}/g; s/(? module. Originally, the method helped data typesetting in TeX. It has been modified to produce correct Perl's representation engaging Combining Diacritical Marks from the Unicode Standard, Version 4.0. =head2 EXPORT Exports as if C also appeared in the package. =head1 SEE ALSO L =head1 AUTHOR Otakar Smrz, L eval { 'E' . ( join '.', qw 'otakar smrz' ) . "\x40" . ( join '.', qw 'mff cuni cz' ) . 'E' } Perl is also designed to make the easy jobs not that easy ;) =head1 COPYRIGHT AND LICENSE Copyright 2003-2006 by Otakar Smrz This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Encode-Arabic-1.9/lib/Encode/Arabic/ArabTeX/RE.pm0000444001432400135600000004516611144136727017744 0ustar smrzufal# ################################################################### Otakar Smrz, 2003/01/23 # # Encoding of Arabic: ArabTeX Notation by Klaus Lagally ##################################### # $Id: RE.pm 162 2006-12-16 00:16:10Z smrz $ package Encode::Arabic::ArabTeX::RE; use 5.008; use strict; use warnings; use Scalar::Util 'blessed'; our $VERSION = do { q $Revision: 162 $ =~ /(\d+)/; sprintf "%4.2f", $1 / 100 }; use Encode::Encoding; use base 'Encode::Encoding'; __PACKAGE__->Define('ArabTeX-RE'); our (%encode_used, %decode_used, @shams, @qamar); our $enmode; our $demode; our %modemap = ( 'default' => 3, 'undef' => 0, 'fullvocalize' => 4, 'full' => 4, 'vocalize' => 3, 'nosukuun' => 3, 'novocalize' => 2, 'novowels' => 2, 'none' => 2, 'noshadda' => 1, 'noneplus' => 1, ); sub import { # perform import as if Encode were used one level before this module require Encode; Encode->export_to_level(1, @_); } sub encode ($$;$) { my ($cls, $text, $check) = @_; $_[1] = '' if $check; # this is what in-place edit needs $cls->initialize_encode() unless %encode_used; $text = join '', map { exists $encode_used{$_} ? $encode_used{$_} : $_ } split '', $text; $text =~ s/([\_\.\^]?\w)\\shadda{}/$1$1/g; $text =~ s/(\\ham{.})\\shadda{}/$1$1/g; $text =~ s/([\=\s\-\%])\\alif{}\\vow{a}l/$1"al-/g; $text =~ s/([\=\s\-\%])\\alif{}l/$1al-/g; $text =~ s/(b\\vow{i})\\alif{}l/$1-al-/g; $text =~ s/([\=\s\-\%])\\alif{}\\vow{([ui])}/$1"$2/g; $text =~ s/([\=\s\-\%])\\alif{}/$1i/g; $text =~ s/(\\vow{aN})\\alif{}/$1/g; $text =~ s/\\alif{}(\\vow{aN})/$1/g; $text =~ s/\\vow{a}\\alif{}/\\vow{A}/g; $text =~ s/\\alif{}/\\aux{A}/g; $text =~ s/\\madda{}/'A/g; $text =~ s/\\vow{a}\\maq{}/\\vow{Y}/g; $text =~ s/\\maq{}(\\vow{aN})/\\vow{aNY}/g; $text =~ s/\\vow{aN}\\maq{}/\\vow{aNY}/g; $text =~ s/\\maq{}/\\aux{Y}/g; $text =~ s/i(}?)y([^aiuAIUY])/I$1$2/g; # produces \ham{I}, too $text =~ s/u(}?)w([^aiuAIUY])/U$1$2/g; $text =~ s/([iIuU]})(\\ham{a})/$1-$2/g; $text =~ s/([\-\s])\\ham{a}([^\\])/$1'\\aux{a}$2/g; $text =~ s/([^\}\s])\\ham{a}/$1\\aux{a}'/g; $text =~ s/([^\-\s])(\\ham{i})/$1-$2/g; $text =~ s/\\ham{i}([^\\])/'\\aux{i}$1/g; $text =~ s/([^\}])(\\ham{w})/$1\\aux{u}$2/g; $text =~ s/(\\ham{w})([^\\])/$1\\aux{u}$2/g; $text =~ s/([^\}])(\\ham{y})/$1\\aux{i}$2/g; $text =~ s/(\\ham{y})([^\\])/$1\\aux{i}$2/g; $text =~ s/\\ham{[aiwy]}/'/g; $text =~ s/\\ham{I}/'I/g; $text =~ s/(?initialize_decode() unless %decode_used; $text = "\n" . $text . "\n"; $text =~ s/_a/a/g; # is there any special character for the defective fatha? $text =~ s/_U/U/g; # _U only affects the pronunciation, not the spelling foreach $one (@shams, '\\\'', @qamar) { $text =~ s/(?? $text =~ s/a'(\\D{})/a\\H{a}$1/g; # how do you write ? $text =~ s/'A/\\M{}/g; $text =~ s/'a/\\H{a}a/g; $text =~ s/'Y/\\H{a}Y/g; $text =~ s/'/\\H{a}a/g; # final resort foreach $one (@shams) { $text =~ s/($one)\-$one/l$1\\D{}/g; } foreach $one ('\\\\', @qamar) { $text =~ s/l\-($one)/l\\O{}$1/g; } # alas! using $one in the replacing expression produces extra \\ $text =~ s/aNY/\\V{aN}\\Q{}/g; $text =~ s/(?<=A\\H{}|\\H{a})aN/\\V{aN}/g; $text =~ s/(?<=T)aN/\\V{aN}/g; $text =~ s/(? "\x{0629}", "T", # C9 "\311", 'T', # 201 "\xC9", # "\xD8\xA9" "\x{062A}", "t", # CA "\312", 't', # 202 "\xCA", # "\xD8\xAA" "\x{062B}", "_t", # CB "\313", '\\_t', # 203 "\xCB", # "\xD8\xAB" <_t> "\x{062C}", "^g", # CC "\314", '\\^g', # 204 "\xCC", # "\xD8\xAC" <^g> "\x{062D}", ".h", # CD "\315", '\\.h', # 205 "\xCD", # "\xD8\xAD" <.h> "\x{062E}", "_h", # CE "\316", '\\_h', # 206 "\xCE", # "\xD8\xAE" <_h> "\x{062F}", "d", # CF "\317", 'd', # 207 "\xCF", # "\xD8\xAF" "\x{0630}", "_d", # D0 "\320", '\\_d', # 208 "\xD0", # "\xD8\xB0" <_d> "\x{0631}", "r", # D1 "\321", 'r', # 209 "\xD1", # "\xD8\xB1" "\x{0632}", "z", # D2 "\322", 'z', # 210 "\xD2", # "\xD8\xB2" "\x{0633}", "s", # D3 "\323", 's', # 211 "\xD3", # "\xD8\xB3" "\x{0634}", "^s", # D4 "\324", '\\^s', # 212 "\xD4", # "\xD8\xB4" <^s> "\x{0635}", ".s", # D5 "\325", '\\.s', # 213 "\xD5", # "\xD8\xB5" <.s> "\x{0636}", ".d", # D6 "\326", '\\.d', # 214 "\xD6", # "\xD8\xB6" <.d> # "\327", # D7 #"\327", "\x{0637}", ".t", # D8 "\330", '\\.t', # 216 "\xD8", # "\xD8\xB7" <.t> "\x{0638}", ".z", # D9 "\331", '\\.z', # 217 "\xD9", # "\xD8\xB8" <.z> "\x{0639}", "`", # DA "\332", '\\`', # 218 "\xDA", # "\xD8\xB9" <`> "\x{063A}", ".g", # DB "\333", '\\.g', # 219 "\xDB", # "\xD8\xBA" <.g> "\x{0640}", "-", # DC "\334", '--', # 220 "\xDC", # "\xD9\x80" ta.twiil "\x{0641}", "f", # DD "\335", 'f', # 221 "\xDD", # "\xD9\x81" "\x{0642}", "q", # DE "\336", 'q', # 222 "\xDE", # "\xD9\x82" "\x{0643}", "k", # DF "\337", 'k', # 223 "\xDF", # "\xD9\x83" # "\340", # E0 #"\340", "\x{0644}", "l", # E1 "\341", 'l', # 225 "\xE1", # "\xD9\x84" # "\342", # E2 #"\342", "\x{0645}", "m", # E3 "\343", 'm', # 227 "\xE3", # "\xD9\x85" "\x{0646}", "n", # E4 "\344", 'n', # 228 "\xE4", # "\xD9\x86" "\x{0647}", "h", # E5 "\345", 'h', # 229 "\xE5", # "\xD9\x87" "\x{0648}", "w", # E6 "\346", 'w', # 230 "\xE6", # "\xD9\x88" "\x{0649}", "\\maq{}", # EC "\354", '\\\\Q{}', # 236 "\xEC", # "\xD9\x89" 'alif maq.suura "\x{064A}", "y", # ED "\355", 'y', # 237 "\xED", # "\xD9\x8A" "\x{064B}", "\\vow{aN}", # F0 "\360", '\\\\V{aN}', # 240 "\xF0", # "\xD9\x8B" "\x{064C}", "\\vow{uN}", # F1 "\361", '\\\\V{uN}', # 241 "\xF1", # "\xD9\x8C" "\x{064D}", "\\vow{iN}", # F2 "\362", '\\\\V{iN}', # 242 "\xF2", # "\xD9\x8D" "\x{064E}", "\\vow{a}", # F3 "\363", '\\\\V{a}', # 243 "\xF3", # "\xD9\x8E" "\x{064F}", "\\vow{u}", # F5 "\365", '\\\\V{u}', # 245 "\xF5", # "\xD9\x8F" "\x{0650}", "\\vow{i}", # F6 "\366", '\\\\V{i}', # 246 "\xF6", # "\xD9\x90" "\x{0651}", "\\shadda{}", # F8 "\370", '\\\\D{}', # 248 "\xF8", # "\xD9\x91" ^sadda "\x{0652}", "\\sukun{}", # FA "\372", '\\\\O{}', # 250 "\xFA", # "\xD9\x92" sukuun "\x{0670}", # '\\\\A{}', # 243 "\xF3", # "\xD9\xB0" <_a> -> "\x{0671}", # '\\\\W{}', # 199 "\xC7", # "\xD9\xB1" wa.sla-on-'alif -> bare 'alif ); no strict 'refs'; $cls->enmode(defined ${ $cls . '::enmode' } ? ${ $cls . '::enmode' } : 'default'); } sub initialize_decode ($) { my $cls = shift @_; @shams = ('t', '\\_t', 'd', '\\_d', 'r', 'z', 's', '\\^s', '\\.s', '\\.d', '\\.t', '\\.z', 'l', 'n'); @qamar = ('b', '\\^g', '\\.h', '\\_h', '\\`', '\\.g', 'f', 'q', 'k', 'm', 'h', 'w', 'y'); %decode_used = ( # 'p', # 129 "\x81", # '\\^c', # 141 "\x8D", # '\\^z', # 142 "\x8E", # 'g', # 144 "\x90", '\\,', "\x{060C}", # 161 "\xA1", # "\xD8\x8C" right-to-left-comma '\\;', "\x{061B}", # 186 "\xBA", # "\xD8\x9B" right-to-left-semicolon '\\?', "\x{061F}", # 191 "\xBF", # "\xD8\x9F" right-to-left-question-mark '\\\\H{}', "\x{0621}", # 193 "\xC1", # "\xD8\xA1" hamza-on-the-line '\\\\M{}', "\x{0622}", # 194 "\xC2", # "\xD8\xA2" madda-over-'alif '\\\\H{a}', "\x{0623}", # 195 "\xC3", # "\xD8\xA3" hamza-over-'alif '\\\\H{w}', "\x{0624}", # 196 "\xC4", # "\xD8\xA4" hamza-over-waaw '\\\\H{i}', "\x{0625}", # 197 "\xC5", # "\xD8\xA5" hamza-under-'alif '\\\\H{y}', "\x{0626}", # 198 "\xC6", # "\xD8\xA6" hamza-over-yaa' '\\\\L{}', "\x{0627}", # 199 "\xC7", # "\xD8\xA7" bare 'alif 'b', "\x{0628}", # 200 "\xC8", # "\xD8\xA8" 'T', "\x{0629}", # 201 "\xC9", # "\xD8\xA9" 't', "\x{062A}", # 202 "\xCA", # "\xD8\xAA" '\\_t', "\x{062B}", # 203 "\xCB", # "\xD8\xAB" <_t> '\\^g', "\x{062C}", # 204 "\xCC", # "\xD8\xAC" <^g> '\\.h', "\x{062D}", # 205 "\xCD", # "\xD8\xAD" <.h> '\\_h', "\x{062E}", # 206 "\xCE", # "\xD8\xAE" <_h> 'd', "\x{062F}", # 207 "\xCF", # "\xD8\xAF" '\\_d', "\x{0630}", # 208 "\xD0", # "\xD8\xB0" <_d> 'r', "\x{0631}", # 209 "\xD1", # "\xD8\xB1" 'z', "\x{0632}", # 210 "\xD2", # "\xD8\xB2" 's', "\x{0633}", # 211 "\xD3", # "\xD8\xB3" '\\^s', "\x{0634}", # 212 "\xD4", # "\xD8\xB4" <^s> '\\.s', "\x{0635}", # 213 "\xD5", # "\xD8\xB5" <.s> '\\.d', "\x{0636}", # 214 "\xD6", # "\xD8\xB6" <.d> '\\.t', "\x{0637}", # 216 "\xD8", # "\xD8\xB7" <.t> '\\.z', "\x{0638}", # 217 "\xD9", # "\xD8\xB8" <.z> '\\`', "\x{0639}", # 218 "\xDA", # "\xD8\xB9" <`> '\\.g', "\x{063A}", # 219 "\xDB", # "\xD8\xBA" <.g> '--', "\x{0640}", # 220 "\xDC", # "\xD9\x80" ta.twiil 'f', "\x{0641}", # 221 "\xDD", # "\xD9\x81" 'q', "\x{0642}", # 222 "\xDE", # "\xD9\x82" 'k', "\x{0643}", # 223 "\xDF", # "\xD9\x83" 'l', "\x{0644}", # 225 "\xE1", # "\xD9\x84" 'm', "\x{0645}", # 227 "\xE3", # "\xD9\x85" 'n', "\x{0646}", # 228 "\xE4", # "\xD9\x86" 'h', "\x{0647}", # 229 "\xE5", # "\xD9\x87" 'w', "\x{0648}", # 230 "\xE6", # "\xD9\x88" '\\\\Q{}', "\x{0649}", # 236 "\xEC", # "\xD9\x89" 'alif maq.suura 'y', "\x{064A}", # 237 "\xED", # "\xD9\x8A" '\\\\V{aN}', "\x{064B}", # 240 "\xF0", # "\xD9\x8B" '\\\\V{uN}', "\x{064C}", # 241 "\xF1", # "\xD9\x8C" '\\\\V{iN}', "\x{064D}", # 242 "\xF2", # "\xD9\x8D" '\\\\V{a}', "\x{064E}", # 243 "\xF3", # "\xD9\x8E" '\\\\V{u}', "\x{064F}", # 245 "\xF5", # "\xD9\x8F" '\\\\V{i}', "\x{0650}", # 246 "\xF6", # "\xD9\x90" '\\\\D{}', "\x{0651}", # 248 "\xF8", # "\xD9\x91" ^sadda '\\\\O{}', "\x{0652}", # 250 "\xFA", # "\xD9\x92" sukuun '\\\\A{}', "\x{0670}", # 243 "\xF3", # "\xD9\xB0" <_a> -> '\\\\W{}', "\x{0671}", # 199 "\xC7", # "\xD9\xB1" wa.sla-on-'alif -> bare 'alif ); no strict 'refs'; $cls->demode(defined ${ $cls . '::demode' } ? ${ $cls . '::demode' } : 'default'); } sub enmode ($$) { my ($cls, $mode) = @_; $cls = blessed $cls if ref $cls; $mode = 'undef' unless defined $mode; $mode = $modemap{$mode} if exists $modemap{$mode}; no strict 'refs'; my $return = ${ $cls . '::enmode' }; ${ $cls . '::enmode' } = $mode; return $return; } sub demode ($$) { my ($cls, $mode) = @_; $cls = blessed $cls if ref $cls; $mode = 'undef' unless defined $mode; $mode = $modemap{$mode} if exists $modemap{$mode}; no strict 'refs'; my $return = ${ $cls . '::demode' }; ${ $cls . '::demode' } = $mode; return $return; } 1; __END__ =head1 NAME Encode::Arabic::ArabTeX::RE - Deprecated Encode::Arabic::ArabTeX implemented with regular expressions =head1 REVISION $Revision: 162 $ $Date: 2006-12-16 01:16:10 +0100 (Sat, 16 Dec 2006) $ =head1 SYNOPSIS use Encode::Arabic::ArabTeX::RE; $string = decode 'arabtex-re', $octets; $octets = encode 'arabtex-re', $string; =head1 DESCRIPTION Deprecated method using sequential regular-expression substitutions. Limited in scope over the ArabTeX notation and non-efficient in data processing, still, not requiring the L module. =head2 EXPORTS & MODES Exports as if C also appeared in the package. Experimental and incomplete support for B is provided, see L. =head1 SEE ALSO L =head1 AUTHOR Otakar Smrz, L eval { 'E' . ( join '.', qw 'otakar smrz' ) . "\x40" . ( join '.', qw 'mff cuni cz' ) . 'E' } Perl is also designed to make the easy jobs not that easy ;) =head1 COPYRIGHT AND LICENSE Copyright 2003-2006 by Otakar Smrz This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Encode-Arabic-1.9/lib/Encode/Arabic/ArabTeX/ZDMG.pm0000444001432400135600000006160511144136727020173 0ustar smrzufal# ##################################################################### Otakar Smrz, 2003/08/05 # # Encoding of Arabic: ArabTeX Notation by Klaus Lagally, ZDMG ################################# # $Id: ZDMG.pm 808 2009-02-09 23:19:07Z smrz $ package Encode::Arabic::ArabTeX::ZDMG; use 5.008; use strict; use warnings; use Carp; our $VERSION = do { q $Revision: 808 $ =~ /(\d+)/; sprintf "%4.2f", $1 / 100 }; use Encode::Arabic::ArabTeX (); use base 'Encode::Arabic::ArabTeX'; use Encode::Encoding; use base 'Encode::Encoding'; __PACKAGE__->Define('ZDMG', 'ArabTeX-ZDMG'); use Encode::Mapper ':others', ':silent', ':join'; our %options; # records of options per package .. global register our %option; # options of the caller package .. used with local sub import { # perform import as if Encode were used one level before this module if (defined $_[1] and $_[1] eq ':xml') { # interfere little with possible Encode's options Encode::Mapper->options ( 'override' => [ # override rules of these LHS .. no other tricks ^^ ( # combinations of '<' and '>' with the other bytes map { my $x = chr $_; "<" . $x, [ "<" . $x, ">" ], # propagate the '>' sign implying .. ">" . $x, [ $x, ">" ], # .. preservation of the bytes } 0x00 .. 0x3B, 0x3D, 0x3F .. 0xFF ), ">>", ">", # stop the whole process .. "<>", "<>", # .. do not even start it "><", [ "<", ">" ], # rather than nested '<' and '>', .. "<<", [ "<<", ">" ], ">\\<", [ "<", ">" ], # .. prefer these escape sequences ">\\\\", [ "\\", ">" ], ">\\>", [ ">", ">" ], ">", ">", # singular symbols may migrate right .. "<", "<", # .. or preserve the rest of the data ] ); splice @_, 1, 1; } if (defined $_[1] and $_[1] eq ':describe') { __PACKAGE__->options($_[1]); splice @_, 1, 1; } require Encode; Encode->export_to_level(1, @_); # here comes the only trick ^^ } sub encoder ($;%) { my ($cls, %opt) = @_; my $encoder = []; $encoder->[0] = Encode::Mapper->compile ( [ 'silent' => 0, ], "\x{0054}", "\\cap t", "\x{0074}", "t", "\x{1E6E}", "\\cap _t", "\x{1E6F}", "_t", "\x{0054}\x{0331}", "\\cap _t", "\x{0074}\x{0331}", "_t", "\x{0044}", "\\cap d", "\x{0064}", "d", "\x{1E0E}", "\\cap _d", "\x{1E0F}", "_d", "\x{0044}\x{0331}", "\\cap _d", "\x{0064}\x{0331}", "_d", "\x{0052}", "\\cap r", "\x{0072}", "r", "\x{005A}", "\\cap z", "\x{007A}", "z", "\x{0053}", "\\cap s", "\x{0073}", "s", "\x{0160}", "\\cap ^s", "\x{0161}", "^s", "\x{0053}\x{030C}", "\\cap ^s", "\x{0073}\x{030C}", "^s", "\x{1E62}", "\\cap .s", "\x{1E63}", ".s", "\x{0053}\x{0323}", "\\cap .s", "\x{0073}\x{0323}", ".s", "\x{1E0C}", "\\cap .d", "\x{1E0D}", ".d", "\x{0044}\x{0323}", "\\cap .d", "\x{0064}\x{0323}", ".d", "\x{1E6C}", "\\cap .t", "\x{1E6D}", ".t", "\x{0054}\x{0323}", "\\cap .t", "\x{0074}\x{0323}", ".t", "\x{1E92}", "\\cap .z", "\x{1E93}", ".z", "\x{005A}\x{0323}", "\\cap .z", "\x{007A}\x{0323}", ".z", "\x{004C}", "\\cap l", "\x{006C}", "l", "\x{004E}", "\\cap n", "\x{006E}", "n", # "\x{0054}", "\\cap T", "\x{0074}", "T", # "\x{004E}", "\\cap N", "\x{006E}", "N", # "\x{0057}", "\\cap W", "\x{0077}", "W", "\x{0041}", "\\cap a", "\x{0061}", "a", "\x{0045}", "\\cap e", "\x{0065}", "e", "\x{0049}", "\\cap i", "\x{0069}", "i", "\x{004F}", "\\cap o", "\x{006F}", "o", "\x{0055}", "\\cap u", "\x{0075}", "u", "\x{0100}", "\\cap A", "\x{0101}", "A", "\x{0041}\x{0304}", "\\cap A", "\x{0061}\x{0304}", "A", "\x{0112}", "\\cap E", "\x{0113}", "E", "\x{0045}\x{0304}", "\\cap E", "\x{0065}\x{0304}", "E", "\x{012A}", "\\cap I", "\x{012B}", "I", "\x{0049}\x{0304}", "\\cap I", "\x{0069}\x{0304}", "I", "\x{014C}", "\\cap O", "\x{014D}", "O", "\x{004F}\x{0304}", "\\cap O", "\x{006F}\x{0304}", "O", "\x{016A}", "\\cap U", "\x{016B}", "U", "\x{0055}\x{0304}", "\\cap U", "\x{0075}\x{0304}", "U", "\x{02BC}", "\"", "\x{02BE}", "'", "\x{02BF}", "`", "\x{0042}", "\\cap b", "\x{0062}", "b", "\x{01E6}", "\\cap ^g", "\x{01E7}", "^g", "\x{0047}\x{030C}", "\\cap ^g", "\x{0067}\x{030C}", "^g", "\x{1E24}", "\\cap .h", "\x{1E25}", ".h", "\x{0048}\x{0323}", "\\cap .h", "\x{0068}\x{0323}", ".h", "\x{1E2A}", "\\cap _h", "\x{1E2B}", "_h", "\x{0048}\x{032E}", "\\cap _h", "\x{0068}\x{032E}", "_h", "\x{0120}", "\\cap .g", "\x{0121}", ".g", "\x{0047}\x{0307}", "\\cap .g", "\x{0067}\x{0307}", ".g", "\x{0046}", "\\cap f", "\x{0066}", "f", "\x{0051}", "\\cap q", "\x{0071}", "q", "\x{004B}", "\\cap k", "\x{006B}", "k", "\x{004D}", "\\cap m", "\x{006D}", "m", "\x{0048}", "\\cap h", "\x{0068}", "h", "\x{0057}", "\\cap w", "\x{0077}", "w", "\x{0059}", "\\cap y", "\x{0079}", "y", "\x{0050}", "\\cap p", "\x{0070}", "p", "\x{0056}", "\\cap v", "\x{0076}", "v", "\x{0047}", "\\cap g", "\x{0067}", "g", "\x{0043}", "\\cap c", "\x{0063}", "c", "\x{010C}", "\\cap ^c", "\x{010D}", "^c", "\x{0043}\x{030C}", "\\cap ^c", "\x{0063}\x{030C}", "^c", "\x{0106}", "\\cap ,c", "\x{0107}", ",c", "\x{0043}\x{0301}", "\\cap ,c", "\x{0063}\x{0301}", ",c", "\x{017D}", "\\cap ^z", "\x{017E}", "^z", "\x{005A}\x{030C}", "\\cap ^z", "\x{007A}\x{030C}", "^z", "\x{00D1}", "\\cap ^n", "\x{00F1}", "^n", "\x{004E}\x{0303}", "\\cap ^n", "\x{006E}\x{0303}", "^n", "\x{004C}\x{0303}", "\\cap ^l", "\x{006C}\x{0303}", "^l", "\x{0052}\x{0307}", "\\cap .r", "\x{0072}\x{0307}", ".r", ); no strict 'refs'; ${ $cls . '::encoder' } = $encoder; if ($option{'describe'}) { $_->describe('') foreach @{${ $cls . '::encoder' }}; } return ${ $cls . '::encoder' }; } sub decoder ($;$$) { my ($cls, undef, undef) = @_; my $decoder = []; my @sunny = ( [ "t", "\x{0074}" ], [ "_t", "\x{0074}\x{0331}" ], # "\x{1E6F}" [ "d", "\x{0064}" ], [ "_d", "\x{0064}\x{0331}" ], # "\x{1E0F}" [ "r", "\x{0072}" ], [ "z", "\x{007A}" ], [ "s", "\x{0073}" ], [ "^s", "\x{0073}\x{030C}" ], # "\x{0161}" [ ".s", "\x{0073}\x{0323}" ], # "\x{1E63}" [ ".d", "\x{0064}\x{0323}" ], # "\x{1E0D}" [ ".t", "\x{0074}\x{0323}" ], # "\x{1E6D}" [ ".z", "\x{007A}\x{0323}" ], # "\x{1E93}" [ "l", "\x{006C}" ], [ "n", "\x{006E}" ], ); my @extra = ( [ "T", "\x{0074}" ], [ "H", "\x{0068}" ], # "" [ "N", "\x{006E}" ], [ "W", "\x{0077}" ], # "" ); my @vowel = ( [ "a", "\x{0061}" ], [ "_a", "\x{0061}\x{0304}" ], # "\x{0101}" [ "_aA", "\x{0061}\x{0304}" ], # "\x{0101}" [ "_aY", "\x{0061}\x{0304}" ], # "\x{0101}" [ "_aU", "\x{0061}\x{0304}" ], # "\x{0101}" [ "_aI", "\x{0061}\x{0304}" ], # "\x{0101}" [ "A", "\x{0061}\x{0304}" ], # "\x{0101}" [ "^A", "\x{0061}\x{0304}" ], # "\x{0101}" [ "e", "\x{0065}" ], [ "E", "\x{0065}\x{0304}" ], # "\x{0113}" [ "i", "\x{0069}" ], [ "_i", "\x{0069}\x{0304}" ], # "\x{012B}" [ "I", "\x{0069}\x{0304}" ], # "\x{012B}" [ "^I", "\x{0069}\x{0304}" ], # "\x{012B}" [ "_I", "\x{0069}" ], [ "o", "\x{006F}" ], [ "O", "\x{006F}\x{0304}" ], # "\x{014D}" [ "u", "\x{0075}" ], [ "_u", "\x{0075}\x{0304}" ], # "\x{016B}" [ "U", "\x{0075}\x{0304}" ], # "\x{016B}" [ "^U", "\x{0075}\x{0304}" ], # "\x{016B}" [ "_U", "\x{0075}" ], [ "Y", "\x{0061}\x{0304}" ], # "\x{0101}" ); my @minor = ( [ "'", "\x{02BE}" ], # "\x{02BC}" [ "`", "\x{02BF}" ], # "\x{02BB}" ); my @empty = ( [ "\"", "", ], # "\x{02BC}" [ "|", "", ], [ "B", "", ], ); my @moony = ( [ "b", "\x{0062}" ], [ "^g", "\x{0067}\x{030C}" ], # "\x{01E7}" [ ".h", "\x{0068}\x{0323}" ], # "\x{1E25}" [ "_h", "\x{0068}\x{032E}" ], # "\x{1E2B}" [ ".g", "\x{0067}\x{0307}" ], # "\x{0121}" [ "f", "\x{0066}" ], [ "q", "\x{0071}" ], [ "k", "\x{006B}" ], [ "m", "\x{006D}" ], [ "h", "\x{0068}" ], [ "w", "\x{0077}" ], [ "y", "\x{0079}" ], [ "p", "\x{0070}" ], [ "v", "\x{0076}" ], [ "g", "\x{0067}" ], [ "c", "\x{0063}" ], [ "^c", "\x{0063}\x{030C}" ], # "\x{010D}" [ ",c", "\x{0063}\x{0301}" ], # "\x{0107}" [ "^z", "\x{007A}\x{030C}" ], # "\x{017E}" [ "^n", "\x{006E}\x{0303}" ], # "\x{00F1}" [ "^l", "\x{006C}\x{0303}" ], [ ".r", "\x{0072}\x{0307}" ], ); $decoder->[0] = Encode::Mapper->compile ( [ 'silent' => 0, ], # definite article assimilation .. non-linguistic ( map { "l-" . $_->[0] x 2, [ "", $_->[0] . "-" . $_->[0] ], } @sunny, @moony ), # initial vowel tying ( map { my $x = $_; map { my $y = $_; map { $x->[0] . $_ . $y, $x->[1] . $_ . "\x{02BC}", # "\x{02C8}" "\\cap\x09" . $x->[0] . $_ . $y, ucfirst $x->[1] . $_ . "\x{02BC}", # "\x{02C8}" "\\cap\x0A" . $x->[0] . $_ . $y, ucfirst $x->[1] . $_ . "\x{02BC}", # "\x{02C8}" "\\cap\x0D" . $x->[0] . $_ . $y, ucfirst $x->[1] . $_ . "\x{02BC}", # "\x{02C8}" "\\cap\x20" . $x->[0] . $_ . $y, ucfirst $x->[1] . $_ . "\x{02BC}", # "\x{02C8}" } "-", "\x09", "\x0A", "\x0D", "\x20", "\x0D\x0A", "\x20\x20", "\x20\x20\x20", "\x20\x20\x20\x20" } "a", "e", "i", "o", "u" } @vowel ), # silence the silent "WA", [ "", "W" ], "UW", [ "", "U" ], "UA", [ "", "U" ], "NA", [ "", "N" ], "NY", [ "", "N" ], "NU", [ "", "N" ], "N_A", [ "", "N" ], # regular capitalization ( map { $_->[0], $_->[1], "\\cap\x09" . $_->[0], ucfirst $_->[1], "\\cap\x0A" . $_->[0], ucfirst $_->[1], "\\cap\x0D" . $_->[0], ucfirst $_->[1], "\\cap\x20" . $_->[0], ucfirst $_->[1], } @sunny, @moony, @empty, @vowel, @extra ), ( map { $_->[0] . "i", $_->[1] . "i", $_->[0] . "u", $_->[1] . "u", "\\cap\x09" . $_->[0] . "i", ucfirst $_->[1] . "i", "\\cap\x0A" . $_->[0] . "i", ucfirst $_->[1] . "i", "\\cap\x0D" . $_->[0] . "i", ucfirst $_->[1] . "i", "\\cap\x20" . $_->[0] . "i", ucfirst $_->[1] . "i", "\\cap\x09" . $_->[0] . "u", ucfirst $_->[1] . "u", "\\cap\x0A" . $_->[0] . "u", ucfirst $_->[1] . "u", "\\cap\x0D" . $_->[0] . "u", ucfirst $_->[1] . "u", "\\cap\x20" . $_->[0] . "u", ucfirst $_->[1] . "u", } @sunny, @moony, @empty ), ( map { my $x = $_; map { $x->[0] . "i" . $_, [ $x->[1], "i" . $_ ], $x->[0] . "u" . $_, [ $x->[1], "u" . $_ ], "\\cap\x09" . $x->[0] . "i" . $_, [ ucfirst $x->[1], "i" . $_ ], "\\cap\x0A" . $x->[0] . "i" . $_, [ ucfirst $x->[1], "i" . $_ ], "\\cap\x0D" . $x->[0] . "i" . $_, [ ucfirst $x->[1], "i" . $_ ], "\\cap\x20" . $x->[0] . "i" . $_, [ ucfirst $x->[1], "i" . $_ ], "\\cap\x09" . $x->[0] . "u" . $_, [ ucfirst $x->[1], "u" . $_ ], "\\cap\x0A" . $x->[0] . "u" . $_, [ ucfirst $x->[1], "u" . $_ ], "\\cap\x0D" . $x->[0] . "u" . $_, [ ucfirst $x->[1], "u" . $_ ], "\\cap\x20" . $x->[0] . "u" . $_, [ ucfirst $x->[1], "u" . $_ ], } "-", "\x09", "\x0A", "\x0D", "\x20" } @sunny, @moony, @empty ), # initial vowel assimilation ( map { my $fix = $_; "i" . $_, [ "", "I" ], "u" . $_, [ "", "U" ], "\\cap\x09" . "i" . $_, [ "", "\\cap\x09" . "I" ], "\\cap\x0A" . "i" . $_, [ "", "\\cap\x0A" . "I" ], "\\cap\x0D" . "i" . $_, [ "", "\\cap\x0D" . "I" ], "\\cap\x20" . "i" . $_, [ "", "\\cap\x20" . "I" ], "\\cap\x09" . "u" . $_, [ "", "\\cap\x09" . "U" ], "\\cap\x0A" . "u" . $_, [ "", "\\cap\x0A" . "U" ], "\\cap\x0D" . "u" . $_, [ "", "\\cap\x0D" . "U" ], "\\cap\x20" . "u" . $_, [ "", "\\cap\x20" . "U" ], map { "i" . $fix . $_->[0], [ "i", $fix . $_->[0] ], "u" . $fix . $_->[0], [ "u", $fix . $_->[0] ], "\\cap\x09" . "i" . $fix . $_->[0], [ "I", $fix . $_->[0] ], "\\cap\x0A" . "i" . $fix . $_->[0], [ "I", $fix . $_->[0] ], "\\cap\x0D" . "i" . $fix . $_->[0], [ "I", $fix . $_->[0] ], "\\cap\x20" . "i" . $fix . $_->[0], [ "I", $fix . $_->[0] ], "\\cap\x09" . "u" . $fix . $_->[0], [ "U", $fix . $_->[0] ], "\\cap\x0A" . "u" . $fix . $_->[0], [ "U", $fix . $_->[0] ], "\\cap\x0D" . "u" . $fix . $_->[0], [ "U", $fix . $_->[0] ], "\\cap\x20" . "u" . $fix . $_->[0], [ "U", $fix . $_->[0] ], } @vowel, $empty[0] } "y", "w" # "'" ), # capitalization of minors ( map { $_->[0], $_->[1], $_->[0] . "i", $_->[1] . "i", $_->[0] . "u", $_->[1] . "u", "\\cap\x09" . $_->[0], [ $_->[1], "\\cap " ], "\\cap\x0A" . $_->[0], [ $_->[1], "\\cap " ], "\\cap\x0D" . $_->[0], [ $_->[1], "\\cap " ], "\\cap\x20" . $_->[0], [ $_->[1], "\\cap " ], "\\cap\x09" . $_->[0] . "i", $_->[1] . ucfirst "i", "\\cap\x0A" . $_->[0] . "i", $_->[1] . ucfirst "i", "\\cap\x0D" . $_->[0] . "i", $_->[1] . ucfirst "i", "\\cap\x20" . $_->[0] . "i", $_->[1] . ucfirst "i", "\\cap\x09" . $_->[0] . "u", $_->[1] . ucfirst "u", "\\cap\x0A" . $_->[0] . "u", $_->[1] . ucfirst "u", "\\cap\x0D" . $_->[0] . "u", $_->[1] . ucfirst "u", "\\cap\x20" . $_->[0] . "u", $_->[1] . ucfirst "u", } @minor ), ( map { my $x = $_; map { $x->[0] . "i" . $_, [ $x->[1], "i" . $_ ], $x->[0] . "u" . $_, [ $x->[1], "u" . $_ ], "\\cap\x09" . $x->[0] . "i" . $_, [ $x->[1], "\\cap i" . $_ ], "\\cap\x0A" . $x->[0] . "i" . $_, [ $x->[1], "\\cap i" . $_ ], "\\cap\x0D" . $x->[0] . "i" . $_, [ $x->[1], "\\cap i" . $_ ], "\\cap\x20" . $x->[0] . "i" . $_, [ $x->[1], "\\cap i" . $_ ], "\\cap\x09" . $x->[0] . "u" . $_, [ $x->[1], "\\cap u" . $_ ], "\\cap\x0A" . $x->[0] . "u" . $_, [ $x->[1], "\\cap u" . $_ ], "\\cap\x0D" . $x->[0] . "u" . $_, [ $x->[1], "\\cap u" . $_ ], "\\cap\x20" . $x->[0] . "u" . $_, [ $x->[1], "\\cap u" . $_ ], } "-", "\x09", "\x0A", "\x0D", "\x20" } @minor ), # white-space collapsing ( map { "\\cap\x09" . $_, [ "", "\\cap " ], "\\cap\x0A" . $_, [ "", "\\cap " ], "\\cap\x0D" . $_, [ "", "\\cap " ], "\\cap\x20" . $_, [ "", "\\cap " ], } "\x09", "\x0A", "\x0D", "\x20" ), ); no strict 'refs'; ${ $cls . '::decoder' } = $decoder; if ($option{'describe'}) { $_->describe('') foreach @{${ $cls . '::decoder' }}; } return ${ $cls . '::decoder' }; } 1; __END__ =head1 NAME Encode::Arabic::ArabTeX::ZDMG - ZDMG phonetic transcription of Arabic using the ArabTeX notation =head1 REVISION $Revision: 808 $ $Date: 2009-02-10 00:19:07 +0100 (Tue, 10 Feb 2009) $ =head1 SYNOPSIS use Encode::Arabic::ArabTeX::ZDMG; # imports just like 'use Encode' would, plus extended options while ($line = <>) { # maps the ArabTeX notation for Arabic into the Latin symbols print encode 'utf8', decode 'zdmg', $line; # 'ZDMG' alias 'ArabTeX-ZDMG' } # ArabTeX lower ASCII transliteration <--> Latin phonetic transcription, ZDMG style $string = decode 'ArabTeX-ZDMG', $octets; $octets = encode 'ArabTeX-ZDMG', $string; =head1 DESCRIPTION ArabTeX is an excellent extension to TeX/LaTeX designed for typesetting the right-to-left scripts of the Orient. It comes up with very intuitive and comprehensible lower ASCII transliterations, the expressive power of which is even better than that of the scripts. L implements the rules needed for proper interpretation of the ArabTeX notation of Arabic into the phonetic transcription in the ZDMG style. The conversion ifself is done by L, and the user interface is built on the L module. Relevant guidance is given in L, from which this module inherits. The transformation rules are, however, quite different ;) =head1 SEE ALSO L, L, L, L, L ArabTeX system L Klaus Lagally L ArabTeX extensions L ArabXeTeX L Encode Arabic: Exercise in Functional Parsing L =head1 AUTHOR Otakar Smrz, L eval { 'E' . ( join '.', qw 'otakar smrz' ) . "\x40" . ( join '.', qw 'mff cuni cz' ) . 'E' } Perl is also designed to make the easy jobs not that easy ;) =head1 COPYRIGHT AND LICENSE Copyright 2003-2009 by Otakar Smrz This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Encode-Arabic-1.9/lib/Encode/Arabic/ArabTeX/Verbatim.pm0000444001432400135600000021341311144136727021177 0ustar smrzufal# ##################################################################### Otakar Smrz, 2005/07/16 # # Encoding of Arabic: ArabTeX Notation by Klaus Lagally, Verbatim ############################# # $Id: Verbatim.pm 717 2008-10-02 22:28:12Z smrz $ package Encode::Arabic::ArabTeX::Verbatim; use 5.008; use strict; use warnings; use Carp; our $VERSION = do { q $Revision: 717 $ =~ /(\d+)/; sprintf "%4.2f", $1 / 100 }; use Encode::Arabic::ArabTeX (); use base 'Encode::Arabic::ArabTeX'; use Encode::Encoding; use base 'Encode::Encoding'; __PACKAGE__->Define('ArabTeX-Verbatim', 'ArabTeX-Verb'); use Encode::Mapper ':others', ':silent', ':join'; our %options; # records of options per package .. global register our %option; # options of the caller package .. used with local our $enmode; our $demode; our $enlevel = 2; our $delevel = 3; our %modemap = ( 'default' => 3, 'undef' => 0, 'fullvocalize' => 4, 'full' => 4, 'vocalize' => 3, 'nosukuun' => 3, 'novocalize' => 2, 'novowels' => 2, 'none' => 2, 'noshadda' => 1, 'noneplus' => 1, ); sub import { # perform import as if Encode were used one level before this module if (defined $_[1] and $_[1] eq ':xml') { # interfere little with possible Encode's options Encode::Mapper->options ( 'override' => [ # override rules of these LHS .. no other tricks ^^ ( # combinations of '<' and '>' with the other bytes map { my $x = chr $_; "<" . $x, [ "<" . $x, ">" ], # propagate the '>' sign implying .. ">" . $x, [ $x, ">" ], # .. preservation of the bytes } 0x00 .. 0x3B, 0x3D, 0x3F .. 0xFF ), ">>", ">", # stop the whole process .. "<>", "<>", # .. do not even start it "><", [ "<", ">" ], # rather than nested '<' and '>', .. "<<", [ "<<", ">" ], ">\\<", [ "<", ">" ], # .. prefer these escape sequences ">\\\\", [ "\\", ">" ], ">\\>", [ ">", ">" ], ">", ">", # singular symbols may migrate right .. "<", "<", # .. or preserve the rest of the data ] ); splice @_, 1, 1; } if (defined $_[1] and $_[1] eq ':complex') { __PACKAGE__->options($_[1]); splice @_, 1, 1; } if (defined $_[1] and $_[1] eq ':describe') { __PACKAGE__->options($_[1]); splice @_, 1, 1; } require Encode; Encode->export_to_level(1, @_); # here comes the only trick ^^ } sub options ($%) { my $cls = shift @_; my ($i, $opt, %opt); my @returns = %option; $opt{'non-quoting'} = 0 unless defined $option{'non-quoting'}; $opt{'non-refined'} = 1 unless defined $option{'non-refined'}; while (@_) { $opt = lc shift @_; if ($opt =~ /^\:/) { $opt eq ':complex' and $opt{'non-quoting'} = 0, 1 and $opt{'non-refined'} = 0, 1 and next; $opt eq ':describe' and $opt{'describe'} = 1 and next; } else { $opt =~ /^\-*(.*)$/; $opt{$1} = shift @_; } } return %opt unless defined $cls; $option{$_} = $opt{$_} foreach keys %opt; return @returns; } sub encoder ($;%) { my ($cls, %opt) = @_; my $encoder = []; $encoder->[0] = Encode::Mapper->compile ( [ 'silent' => 0, ], ( map { chr 0x0660 + $_, "" . $_, } 0 .. 9 ), "\x{064B}", "aN", # 240 "\xF0", # "\xD9\x8B" "\x{064C}", "uN", # 241 "\xF1", # "\xD9\x8C" "\x{064D}", "iN", # 242 "\xF2", # "\xD9\x8D" "\x{064E}", "a", # 243 "\xF3", # "\xD9\x8E" "\x{064F}", "u", # 245 "\xF5", # "\xD9\x8F" "\x{0650}", "i", # 246 "\xF6", # "\xD9\x90" "\x{0670}", "_a", "\x{0657}", "_u", "\x{0656}", "_i", "\x{060C}", ",", # 161 "\xA1", # "\xD8\x8C" right-to-left-comma "\x{061B}", ";", # 186 "\xBA", # "\xD8\x9B" right-to-left-semicolon "\x{061F}", "?", # 191 "\xBF", # "\xD8\x9F" right-to-left-question-mark "\x{0621}", "'|", # 193 "\xC1", # "\xD8\xA1" hamza-on-the-line "\x{0622}", "'A", # 194 "\xC2", # "\xD8\xA2" madda-over-'alif "\x{0623}", "'a", # 195 "\xC3", # "\xD8\xA3" hamza-over-'alif "\x{0624}", "'w", # 196 "\xC4", # "\xD8\xA4" hamza-over-waaw "\x{0625}", "'i", # 197 "\xC5", # "\xD8\xA5" hamza-under-'alif "\x{0626}", "'y", # 198 "\xC6", # "\xD8\xA6" hamza-over-yaa' "\x{0627}", "A", # 199 "\xC7", # "\xD8\xA7" bare 'alif "\x{0628}", "b", # 200 "\xC8", # "\xD8\xA8" "\x{0629}", "T", # 201 "\xC9", # "\xD8\xA9" "\x{062A}", "t", # 202 "\xCA", # "\xD8\xAA" "\x{062B}", "_t", # 203 "\xCB", # "\xD8\xAB" <_t> "\x{062C}", "^g", # 204 "\xCC", # "\xD8\xAC" <^g> "\x{062D}", ".h", # 205 "\xCD", # "\xD8\xAD" <.h> "\x{062E}", "_h", # 206 "\xCE", # "\xD8\xAE" <_h> "\x{062F}", "d", # 207 "\xCF", # "\xD8\xAF" "\x{0630}", "_d", # 208 "\xD0", # "\xD8\xB0" <_d> "\x{0631}", "r", # 209 "\xD1", # "\xD8\xB1" "\x{0632}", "z", # 210 "\xD2", # "\xD8\xB2" "\x{0633}", "s", # 211 "\xD3", # "\xD8\xB3" "\x{0634}", "^s", # 212 "\xD4", # "\xD8\xB4" <^s> "\x{0635}", ".s", # 213 "\xD5", # "\xD8\xB5" <.s> "\x{0636}", ".d", # 214 "\xD6", # "\xD8\xB6" <.d> "\x{0637}", ".t", # 216 "\xD8", # "\xD8\xB7" <.t> "\x{0638}", ".z", # 217 "\xD9", # "\xD8\xB8" <.z> "\x{0639}", "`", # 218 "\xDA", # "\xD8\xB9" <`> "\x{063A}", ".g", # 219 "\xDB", # "\xD8\xBA" <.g> "\x{0640}", "--", # 220 "\xDC", # "\xD9\x80" ta.twiil "\x{0641}", "f", # 221 "\xDD", # "\xD9\x81" "\x{0642}", "q", # 222 "\xDE", # "\xD9\x82" "\x{0643}", "k", # 223 "\xDF", # "\xD9\x83" "\x{0644}", "l", # 225 "\xE1", # "\xD9\x84" "\x{0645}", "m", # 227 "\xE3", # "\xD9\x85" "\x{0646}", "n", # 228 "\xE4", # "\xD9\x86" "\x{0647}", "h", # 229 "\xE5", # "\xD9\x87" "\x{0648}", "w", # 230 "\xE6", # "\xD9\x88" "\x{0649}", "Y", # 236 "\xEC", # "\xD9\x89" 'alif maq.suura "\x{064A}", "y", # 237 "\xED", # "\xD9\x8A" "\x{0651}", "||", # 248 "\xF8", # "\xD9\x91" ^sadda "\x{0652}", "\"", # 250 "\xFA", # "\xD9\x92" sukuun "\x{0671}", "A", # 199 "\xC7", # "\xD9\xB1" wa.sla-on-'alif "\x{067E}", "p", "\x{06A4}", "v", "\x{06AF}", "g", "\x{0681}", "c", "\x{0686}", "^c", "\x{0685}", ",c", "\x{0698}", "^z", "\x{06AD}", "^n", "\x{06B5}", "^l", "\x{0695}", ".r", "\x{0640}\x{0651}", "|BB", ); no strict 'refs'; ${ $cls . '::encoder' } = $encoder; if ($option{'describe'}) { $_->describe('') foreach @{${ $cls . '::encoder' }}; } $cls->enmode(defined ${ $cls . '::enmode' } ? ${ $cls . '::enmode' } : 'default'); return ${ $cls . '::encoder' }; } sub decoder ($;$$) { my ($cls, undef, undef) = @_; my $decoder = []; my @sunny = ( [ "t", "\x{062A}" ], # "\xD8\xAA" [ "_t", "\x{062B}" ], # "\xD8\xAB" <_t> [ "d", "\x{062F}" ], # "\xD8\xAF" [ "_d", "\x{0630}" ], # "\xD8\xB0" <_d> [ "r", "\x{0631}" ], # "\xD8\xB1" [ "z", "\x{0632}" ], # "\xD8\xB2" [ "s", "\x{0633}" ], # "\xD8\xB3" [ "^s", "\x{0634}" ], # "\xD8\xB4" <^s> [ ".s", "\x{0635}" ], # "\xD8\xB5" <.s> [ ".d", "\x{0636}" ], # "\xD8\xB6" <.d> [ ".t", "\x{0637}" ], # "\xD8\xB7" <.t> [ ".z", "\x{0638}" ], # "\xD8\xB8" <.z> [ "l", "\x{0644}" ], # "\xD9\x84" [ "n", "\x{0646}" ], # "\xD9\x86" ); my @empty = ( [ "|", "" ], # ArabTeX's "invisible consonant" [ "", "\x{0627}" ], # "\xD8\xA7" bare 'alif ); my @taaaa = ( [ "T", "\x{0629}" ], # "\xD8\xA9" [ "H", "\x{0629}" ], # "\xD8\xA9" ); my @moony = ( [ "'A", "\x{0622}" ], # "\xD8\xA2" madda-over-'alif [ "'a", "\x{0623}" ], # "\xD8\xA3" hamza-over-'alif [ "'i", "\x{0625}" ], # "\xD8\xA5" hamza-under-'alif [ "'w", "\x{0624}" ], # "\xD8\xA4" hamza-over-waaw [ "'y", "\x{0626}" ], # "\xD8\xA6" hamza-over-yaa' [ "'|", "\x{0621}" ], # "\xD8\xA1" hamza-on-the-line [ "b", "\x{0628}" ], # "\xD8\xA8" [ "^g", "\x{062C}" ], # "\xD8\xAC" <^g> [ ".h", "\x{062D}" ], # "\xD8\xAD" <.h> [ "_h", "\x{062E}" ], # "\xD8\xAE" <_h> [ "`", "\x{0639}" ], # "\xD8\xB9" <`> [ ".g", "\x{063A}" ], # "\xD8\xBA" <.g> [ "f", "\x{0641}" ], # "\xD9\x81" [ "q", "\x{0642}" ], # "\xD9\x82" [ "k", "\x{0643}" ], # "\xD9\x83" [ "m", "\x{0645}" ], # "\xD9\x85" [ "h", "\x{0647}" ], # "\xD9\x87" [ "w", "\x{0648}" ], # "\xD9\x88" [ "y", "\x{064A}" ], # "\xD9\x8A" [ "B", "\x{0640}" ], # ArabTeX's "consonantal ta.twiil" [ "p", "\x{067E}" ], [ "v", "\x{06A4}" ], [ "g", "\x{06AF}" ], [ "c", "\x{0681}" ], # .ha with hamza [ "^c", "\x{0686}" ], # gim with three [ ",c", "\x{0685}" ], # _ha with three [ "^z", "\x{0698}" ], # zay with three [ "^n", "\x{06AD}" ], # kaf with three [ "^l", "\x{06B5}" ], # lam with a bow above [ ".r", "\x{0695}" ], # ra' with a bow below ); my @scope = ( "b", "t", "_t", "^g", ".h", "_h", "d", "_d", "r", "z", "s", "^s", ".s", ".d", ".t", ".z", "`", ".g", "f", "q", "k", "l", "m", "n", "h", "w", "p", "v", "g", "c", "^c", ",c", "^z", "^n", "^l", ".r", "|", "B", # "'", "y" treated specifically in some cases -- "T", "H" must as well ); $decoder->[0] = Encode::Mapper->compile ( [ 'silent' => 0, ], "_A", [ "", "Y" ], "_U", [ "", "U" ], "WA", [ "", "W" ], # word-internal occurrence "TA", [ "t", "A" ], "TU", [ "t", "U" ], "TI", [ "t", "I" ], "TY", [ "t", "Y" ], "T_A", [ "t", "_A" ], "T_U", [ "t", "_U" ], ( map { "T" . $_, [ "t", $_ ], "Ta" . $_, [ "t", "a" . $_ ], "Tu" . $_, [ "t", "u" . $_ ], "Ti" . $_, [ "t", "i" . $_ ], ( $option{'non-quoting'} ? () : ( "T\"" . $_, [ "t", "\"" . $_ ], "T\"a" . $_, [ "t", "\"a" . $_ ], "T\"u" . $_, [ "t", "\"u" . $_ ], "T\"i" . $_, [ "t", "\"i" . $_ ], ) ), } "'", @scope, "y" # "T", "H" ), # vowel-quoted sequences ( $option{'non-quoting'} ? ( "\"", "", # use non-quoting quotes only on no purpose ^^ ) : ( "\"", "\"", ) ), # general non-protection of \TeX directives ( map { "\\cap" . $_, [ "\\", "cap" . $_ ], } 'A' .. 'Z', 'a' .. 'z', '_', '0' .. '9' ), "\\", "\\", # strict \cap removal and white-space collapsing ( map { "\\cap" . $_ . "\x09", [ "", "\\cap " ], "\\cap" . $_ . "\x0A", [ "", "\\cap " ], "\\cap" . $_ . "\x0D", [ "", "\\cap " ], "\\cap" . $_ . "\x20", [ "", "\\cap " ], "\\cap" . $_, "", } "\x09", "\x0A", "\x0D", "\x20" ), "\\cap", "", # interfering rarely with the notation, or erroneous "^A", [ "^A", "|" ], "^I", [ "^I", "|" ], "^U", [ "^U", "|" ], "_a", [ "_a", "|" ], "_i", [ "_i", "|" ], "_u", [ "_u", "|" ], "_aA", [ "_aA", "|" ], "_aY", [ "_aY", "|" ], "_aU", [ "_aU", "|" ], "_aI", [ "_aI", "|" ], ); $decoder->[1] = Encode::Mapper->compile ( [ 'others' => undef, 'silent' => 0, ], # non-exciting entities "\x09", "\x09", "\x0A", "\x0A", "\x0D", "\x0D", " ", " ", ".", ".", ":", ":", "!", "!", "/", "/", "\\", "\\", ",", "\x{060C}", # "\xD8\x8C" right-to-left-comma ";", "\x{061B}", # "\xD8\x9B" right-to-left-semicolon "?", "\x{061F}", # "\xD8\x9F" right-to-left-question-mark "--", "\x{0640}", # "\xD9\x80" ta.twiil ( map { "" . $_, chr 0x0660 + $_, } 0 .. 9 ), # improper auxiliary vowels "-a", "", "-u", "", "-i", "", # explicit notations for ^sadda "||", [ "\x{0651}", "|" ], "|BB", [ "\x{0640}\x{0651}", "|" ], # non-voweled/sukuuned sunnies and moonies ( map { my $x = 1 + $_; my $y = "\x{0651}" x $_; # "\xD9\x91" ^sadda map { my $fix = $_; $_->[0] x $x, $_->[1] . $y . "\x{0652}", # "\xD9\x92" sukuun ( $option{'non-refined'} ? () : ( $_->[0] x $x . "-a", $_->[1] . $y . "\x{064E}", $_->[0] x $x . "-u", $_->[1] . $y . "\x{064F}", $_->[0] x $x . "-i", $_->[1] . $y . "\x{0650}", $_->[0] x $x . "-A", $_->[1] . $y . "\x{064E}\x{0627}", $_->[0] x $x . "-Y", $_->[1] . $y . "\x{064E}\x{0649}", $_->[0] x $x . "-U", $_->[1] . $y . "\x{064F}\x{0648}", $_->[0] x $x . "-I", $_->[1] . $y . "\x{0650}\x{064A}", $_->[0] x $x . "-aN", $_->[1] . $y . "\x{064B}" . ( $_->[0] eq "'a" ? "" : "\x{0627}" ), $_->[0] x $x . "-uN", $_->[1] . $y . "\x{064C}", $_->[0] x $x . "-iN", $_->[1] . $y . "\x{064D}", $_->[0] x $x . "-aNA", $_->[1] . $y . "\x{064B}\x{0627}", $_->[0] x $x . "-uNA", $_->[1] . $y . "\x{064C}\x{0627}", $_->[0] x $x . "-iNA", $_->[1] . $y . "\x{064D}\x{0627}", $_->[0] x $x . "-aNY", $_->[1] . $y . "\x{064B}\x{0649}", $_->[0] x $x . "-uNY", $_->[1] . $y . "\x{064C}\x{0649}", $_->[0] x $x . "-iNY", $_->[1] . $y . "\x{064D}\x{0649}", $_->[0] x $x . "-aNU", $_->[1] . $y . "\x{064B}\x{0648}", $_->[0] x $x . "-uNU", $_->[1] . $y . "\x{064C}\x{0648}", $_->[0] x $x . "-iNU", $_->[1] . $y . "\x{064D}\x{0648}", ) ), ( $option{'non-quoting'} ? () : ( $_->[0] x $x . "\"", $_->[1] . $y . "\"\x{0652}", # "\xD9\x92" sukuun ( $option{'non-refined'} ? () : ( $_->[0] x $x . "-\"a", $_->[1] . $y . "\"\x{064E}", $_->[0] x $x . "-\"u", $_->[1] . $y . "\"\x{064F}", $_->[0] x $x . "-\"i", $_->[1] . $y . "\"\x{0650}", $_->[0] x $x . "-\"A", $_->[1] . $y . "\"\x{064E}\x{0627}", $_->[0] x $x . "-\"Y", $_->[1] . $y . "\"\x{064E}\x{0649}", $_->[0] x $x . "-\"U", $_->[1] . $y . "\"\x{064F}\x{0648}", $_->[0] x $x . "-\"I", $_->[1] . $y . "\"\x{0650}\x{064A}", $_->[0] x $x . "-\"aN", $_->[1] . $y . "\"\x{064B}" . ( $_->[0] eq "'a" ? "" : "\x{0627}" ), $_->[0] x $x . "-\"uN", $_->[1] . $y . "\"\x{064C}", $_->[0] x $x . "-\"iN", $_->[1] . $y . "\"\x{064D}", $_->[0] x $x . "-\"aNA", $_->[1] . $y . "\"\x{064B}\x{0627}", $_->[0] x $x . "-\"uNA", $_->[1] . $y . "\"\x{064C}\x{0627}", $_->[0] x $x . "-\"iNA", $_->[1] . $y . "\"\x{064D}\x{0627}", $_->[0] x $x . "-\"aNY", $_->[1] . $y . "\"\x{064B}\x{0649}", $_->[0] x $x . "-\"uNY", $_->[1] . $y . "\"\x{064C}\x{0649}", $_->[0] x $x . "-\"iNY", $_->[1] . $y . "\"\x{064D}\x{0649}", $_->[0] x $x . "-\"aNU", $_->[1] . $y . "\"\x{064B}\x{0648}", $_->[0] x $x . "-\"uNU", $_->[1] . $y . "\"\x{064C}\x{0648}", $_->[0] x $x . "-\"iNU", $_->[1] . $y . "\"\x{064D}\x{0648}", ) ), ) ), map { ( $option{'non-refined'} ? () : ( $fix->[0] x $x . "-a" . $_->[0], [ $fix->[1] . $y . "\x{0652}", "a" . $_->[0] ], $fix->[0] x $x . "-u" . $_->[0], [ $fix->[1] . $y . "\x{0652}", "u" . $_->[0] ], $fix->[0] x $x . "-i" . $_->[0], [ $fix->[1] . $y . "\x{0652}", "i" . $_->[0] ], $fix->[0] x $x . "-A" . $_->[0], [ $fix->[1] . $y . "\x{0652}", "A" . $_->[0] ], $fix->[0] x $x . "-Y" . $_->[0], [ $fix->[1] . $y . "\x{0652}", "Y" . $_->[0] ], $fix->[0] x $x . "-U" . $_->[0], [ $fix->[1] . $y . "\x{0652}", "U" . $_->[0] ], $fix->[0] x $x . "-I" . $_->[0], [ $fix->[1] . $y . "\x{0652}", "I" . $_->[0] ], ( $option{'non-quoting'} ? () : ( $fix->[0] x $x . "-\"a" . $_->[0], [ $fix->[1] . $y . "\x{0652}\"", "a" . $_->[0] ], $fix->[0] x $x . "-\"u" . $_->[0], [ $fix->[1] . $y . "\x{0652}\"", "u" . $_->[0] ], $fix->[0] x $x . "-\"i" . $_->[0], [ $fix->[1] . $y . "\x{0652}\"", "i" . $_->[0] ], $fix->[0] x $x . "-\"A" . $_->[0], [ $fix->[1] . $y . "\x{0652}\"", "A" . $_->[0] ], $fix->[0] x $x . "-\"Y" . $_->[0], [ $fix->[1] . $y . "\x{0652}\"", "Y" . $_->[0] ], $fix->[0] x $x . "-\"U" . $_->[0], [ $fix->[1] . $y . "\x{0652}\"", "U" . $_->[0] ], $fix->[0] x $x . "-\"I" . $_->[0], [ $fix->[1] . $y . "\x{0652}\"", "I" . $_->[0] ], ) ), ) ), } @sunny, @moony, @taaaa, $empty[0] } @sunny, @moony[1 .. $#moony], $empty[0] # $moony[0] excluded as long as is unclear ^^ } 0 # 1 ), $moony[0]->[0], $moony[0]->[1], # now necessary of course ^^ # voweled/non-sukuuned sunnies and moonies ( map { my $x = 1 + $_; my $y = "\x{0651}" x $_; # "\xD9\x91" ^sadda map { my $fix = $_; $_->[0] x $x . "a", $_->[1] . $y . "\x{064E}", $_->[0] x $x . "u", $_->[1] . $y . "\x{064F}", $_->[0] x $x . "i", $_->[1] . $y . "\x{0650}", $_->[0] x $x . "_a", $_->[1] . $y . "\x{0670}", ( $option{'non-refined'} ? () : ( $_->[0] x $x . "_u", $_->[1] . $y . "\x{0657}", $_->[0] x $x . "_i", $_->[1] . $y . "\x{0656}", $_->[0] x $x . "_aA", $_->[1] . $y . "\x{0670}\x{0627}", $_->[0] x $x . "_aY", $_->[1] . $y . "\x{0670}\x{0649}", $_->[0] x $x . "_aU", $_->[1] . $y . "\x{0670}\x{0648}", $_->[0] x $x . "_aI", $_->[1] . $y . "\x{0670}\x{064A}", ) ), $_->[0] x $x . "A", $_->[1] . $y . "\x{064E}\x{0627}", $_->[0] x $x . "Y", $_->[1] . $y . "\x{064E}\x{0649}", $_->[0] x $x . "U", $_->[1] . $y . "\x{064F}\x{0648}", $_->[0] x $x . "I", $_->[1] . $y . "\x{0650}\x{064A}", $_->[0] x $x . "Uw", [ $_->[1] . $y . "\x{064F}", "ww" ], $_->[0] x $x . "Iy", [ $_->[1] . $y . "\x{0650}", "yy" ], ( $option{'non-refined'} ? () : ( $_->[0] x $x . "^A", $_->[1] . $y . "\x{064F}\x{0627}\x{0653}", $_->[0] x $x . "^U", $_->[1] . $y . "\x{064F}\x{0648}\x{0653}", $_->[0] x $x . "^I", $_->[1] . $y . "\x{0650}\x{064A}\x{0653}", $_->[0] x $x . "^Uw", [ $_->[1] . $y . "\x{064F}\x{0648}\x{0655}", "|" ], # roughly $_->[0] x $x . "^Iy", [ $_->[1] . $y . "\x{0650}\x{0649}\x{0655}", "|" ], # roughly ) ), $_->[0] x $x . "aa", [ "", $_->[0] x $x . "A" ], $_->[0] x $x . "uw", [ "", $_->[0] x $x . "U" ], $_->[0] x $x . "iy", [ "", $_->[0] x $x . "I" ], ( $option{'non-quoting'} ? () : ( $_->[0] x $x . "\"a", $_->[1] . $y . "\"\x{064E}", $_->[0] x $x . "\"u", $_->[1] . $y . "\"\x{064F}", $_->[0] x $x . "\"i", $_->[1] . $y . "\"\x{0650}", $_->[0] x $x . "\"_a", $_->[1] . $y . "\"\x{0670}", ( $option{'non-refined'} ? () : ( $_->[0] x $x . "\"_u", $_->[1] . $y . "\"\x{0657}", $_->[0] x $x . "\"_i", $_->[1] . $y . "\"\x{0656}", $_->[0] x $x . "\"_aA", $_->[1] . $y . "\"\x{0670}\x{0627}", $_->[0] x $x . "\"_aY", $_->[1] . $y . "\"\x{0670}\x{0649}", $_->[0] x $x . "\"_aU", $_->[1] . $y . "\"\x{0670}\x{0648}", $_->[0] x $x . "\"_aI", $_->[1] . $y . "\"\x{0670}\x{064A}", ) ), $_->[0] x $x . "\"A", $_->[1] . $y . "\"\x{064E}\x{0627}", $_->[0] x $x . "\"Y", $_->[1] . $y . "\"\x{064E}\x{0649}", $_->[0] x $x . "\"A\"", $_->[1] . $y . "\"\x{064E}\x{0627}\"\x{0652}", $_->[0] x $x . "\"Y\"", $_->[1] . $y . "\"\x{064E}\x{0649}\"\x{0652}", $_->[0] x $x . "A\"", $_->[1] . $y . "\x{064E}\x{0627}\"\x{0652}", $_->[0] x $x . "Y\"", $_->[1] . $y . "\x{064E}\x{0649}\"\x{0652}", $_->[0] x $x . "\"U", $_->[1] . $y . "\"\x{064F}\x{0648}", $_->[0] x $x . "\"I", $_->[1] . $y . "\"\x{0650}\x{064A}", $_->[0] x $x . "\"U\"", $_->[1] . $y . "\"\x{064F}\x{0648}\"\x{0652}", $_->[0] x $x . "\"I\"", $_->[1] . $y . "\"\x{0650}\x{064A}\"\x{0652}", $_->[0] x $x . "U\"", $_->[1] . $y . "\x{064F}\x{0648}\"\x{0652}", $_->[0] x $x . "I\"", $_->[1] . $y . "\x{0650}\x{064A}\"\x{0652}", $_->[0] x $x . "\"Uw", [ $_->[1] . $y . "\"\x{064F}", "ww" ], $_->[0] x $x . "\"Iy", [ $_->[1] . $y . "\"\x{0650}", "yy" ], ( $option{'non-refined'} ? () : ( $_->[0] x $x . "\"^A", $_->[1] . $y . "\"\x{064F}\x{0627}\x{0653}", $_->[0] x $x . "\"^U", $_->[1] . $y . "\"\x{064F}\x{0648}\x{0653}", $_->[0] x $x . "\"^I", $_->[1] . $y . "\"\x{0650}\x{064A}\x{0653}", $_->[0] x $x . "\"^Uw", [ $_->[1] . $y . "\"\x{064F}\x{0648}\x{0655}", "|" ], # roughly $_->[0] x $x . "\"^Iy", [ $_->[1] . $y . "\"\x{0650}\x{0649}\x{0655}", "|" ], # roughly ) ), $_->[0] x $x . "\"aa", [ "", $_->[0] x $x . "\"A" ], $_->[0] x $x . "\"uw", [ "", $_->[0] x $x . "\"U" ], $_->[0] x $x . "\"iy", [ "", $_->[0] x $x . "\"I" ], ) ), ( map { $fix->[0] x $x . "uw" . $_, [ $fix->[1] . $y . "\x{064F}", "w" . $_ ], $fix->[0] x $x . "iy" . $_, [ $fix->[1] . $y . "\x{0650}", "y" . $_ ], ( $option{'non-quoting'} ? () : ( $fix->[0] x $x . "\"uw" . $_, [ $fix->[1] . $y . "\"\x{064F}", "w" . $_ ], $fix->[0] x $x . "\"iy" . $_, [ $fix->[1] . $y . "\"\x{0650}", "y" . $_ ], ) ), } "\"", qw "a u i A Y U I", $option{'non-refined'} ? () : qw "_a _u _i ^A ^U ^I" ), $_->[0] x $x . "_aA'|aN", $_->[1] . $y . "\x{0670}\x{0627}\x{0621}\x{064B}", $_->[0] x $x . "A'|aN", $_->[1] . $y . "\x{064E}\x{0627}\x{0621}\x{064B}", $_->[0] x $x . "aN", $_->[1] . $y . "\x{064B}" . ( $_->[0] eq "'a" ? "" : "\x{0627}" ), $_->[0] x $x . "uN", $_->[1] . $y . "\x{064C}", $_->[0] x $x . "iN", $_->[1] . $y . "\x{064D}", ( $option{'non-quoting'} ? () : ( $_->[0] x $x . "\"_aA'|aN", $_->[1] . $y . "\"\x{0670}\x{0627}\x{0621}\x{064B}", $_->[0] x $x . "\"A'|aN", $_->[1] . $y . "\"\x{064E}\x{0627}\x{0621}\x{064B}", $_->[0] x $x . "\"_aA'|\"aN", $_->[1] . $y . "\"\x{0670}\x{0627}\x{0621}\"\x{064B}", $_->[0] x $x . "\"A'|\"aN", $_->[1] . $y . "\"\x{064E}\x{0627}\x{0621}\"\x{064B}", $_->[0] x $x . "_aA'|\"aN", $_->[1] . $y . "\x{0670}\x{0627}\x{0621}\"\x{064B}", $_->[0] x $x . "A'|\"aN", $_->[1] . $y . "\x{064E}\x{0627}\x{0621}\"\x{064B}", $_->[0] x $x . "\"A\"'|aN", $_->[1] . $y . "\"\x{064E}\x{0627}\"\x{0652}\x{0621}\x{064B}", $_->[0] x $x . "\"A\"'|\"aN", $_->[1] . $y . "\"\x{064E}\x{0627}\"\x{0652}\x{0621}\"\x{064B}", $_->[0] x $x . "A\"'|\"aN", $_->[1] . $y . "\x{064E}\x{0627}\"\x{0652}\x{0621}\"\x{064B}", $_->[0] x $x . "\"aN", $_->[1] . $y . "\"\x{064B}" . ( $_->[0] eq "'a" ? "" : "\x{0627}" ), $_->[0] x $x . "\"uN", $_->[1] . $y . "\"\x{064C}", $_->[0] x $x . "\"iN", $_->[1] . $y . "\"\x{064D}", ) ), } @sunny, @moony, $empty[0] } 0 # 1 ), # 'alif protected endings ( map { my $x = 1 + $_; my $y = "\x{0651}" x $_; # "\xD9\x91" ^sadda map { $_->[0] x $x . "_aA'|aNA", $_->[1] . $y . "\x{0670}\x{0627}\x{0621}\x{064B}\x{0627}", $_->[0] x $x . "A'|aNA", $_->[1] . $y . "\x{064E}\x{0627}\x{0621}\x{064B}\x{0627}", $_->[0] x $x . "aNA", $_->[1] . $y . "\x{064B}\x{0627}", $_->[0] x $x . "aNY", $_->[1] . $y . "\x{064B}\x{0649}", ( $option{'non-refined'} ? () : ( $_->[0] x $x . "uNA", $_->[1] . $y . "\x{064C}\x{0627}", $_->[0] x $x . "iNA", $_->[1] . $y . "\x{064D}\x{0627}", $_->[0] x $x . "uNY", $_->[1] . $y . "\x{064C}\x{0649}", $_->[0] x $x . "iNY", $_->[1] . $y . "\x{064D}\x{0649}", $_->[0] x $x . "aNU", $_->[1] . $y . "\x{064B}\x{0648}", $_->[0] x $x . "uNU", $_->[1] . $y . "\x{064C}\x{0648}", $_->[0] x $x . "iNU", $_->[1] . $y . "\x{064D}\x{0648}", ) ), $_->[0] x $x . "aW", $_->[1] . $y . "\x{064E}\x{0648}\x{0652}\x{0627}", $_->[0] x $x . "UA", $_->[1] . $y . "\x{064F}\x{0648}\x{0627}", ( $option{'non-quoting'} ? () : ( $_->[0] x $x . "\"_aA'|aNA", $_->[1] . $y . "\"\x{0670}\x{0627}\x{0621}\x{064B}\x{0627}", $_->[0] x $x . "\"A'|aNA", $_->[1] . $y . "\"\x{064E}\x{0627}\x{0621}\x{064B}\x{0627}", $_->[0] x $x . "\"_aA'|\"aNA", $_->[1] . $y . "\"\x{0670}\x{0627}\x{0621}\"\x{064B}\x{0627}", $_->[0] x $x . "\"A'|\"aNA", $_->[1] . $y . "\"\x{064E}\x{0627}\x{0621}\"\x{064B}\x{0627}", $_->[0] x $x . "_aA'|\"aNA", $_->[1] . $y . "\x{0670}\x{0627}\x{0621}\"\x{064B}\x{0627}", $_->[0] x $x . "A'|\"aNA", $_->[1] . $y . "\x{064E}\x{0627}\x{0621}\"\x{064B}\x{0627}", $_->[0] x $x . "\"A\"'|aNA", $_->[1] . $y . "\"\x{064E}\x{0627}\"\x{0652}\x{0621}\x{064B}\x{0627}", $_->[0] x $x . "\"A\"'|\"aNA", $_->[1] . $y . "\"\x{064E}\x{0627}\"\x{0652}\x{0621}\"\x{064B}\x{0627}", $_->[0] x $x . "A\"'|\"aNA", $_->[1] . $y . "\x{064E}\x{0627}\"\x{0652}\x{0621}\"\x{064B}\x{0627}", $_->[0] x $x . "\"aNA", $_->[1] . $y . "\"\x{064B}\x{0627}", $_->[0] x $x . "\"aNY", $_->[1] . $y . "\"\x{064B}\x{0649}", ( $option{'non-refined'} ? () : ( $_->[0] x $x . "\"uNA", $_->[1] . $y . "\"\x{064C}\x{0627}", $_->[0] x $x . "\"iNA", $_->[1] . $y . "\"\x{064D}\x{0627}", $_->[0] x $x . "\"uNY", $_->[1] . $y . "\"\x{064C}\x{0649}", $_->[0] x $x . "\"iNY", $_->[1] . $y . "\"\x{064D}\x{0649}", $_->[0] x $x . "\"aNU", $_->[1] . $y . "\"\x{064B}\x{0648}", $_->[0] x $x . "\"uNU", $_->[1] . $y . "\"\x{064C}\x{0648}", $_->[0] x $x . "\"iNU", $_->[1] . $y . "\"\x{064D}\x{0648}", ) ), $_->[0] x $x . "\"aW", $_->[1] . $y . "\"\x{064E}\x{0648}\"\x{0652}\x{0627}", # coupled? $_->[0] x $x . "\"UA", $_->[1] . $y . "\"\x{064F}\x{0648}\x{0627}", ) ), } @sunny, @moony, $empty[0] } 0 # 1 ), # taa' marbuu.ta endings ( map { $_->[0], $_->[1] . "\x{0652}", # "\xD9\x92" sukuun ( $option{'non-quoting'} ? () : ( $_->[0] . "\"", $_->[1] . "\"\x{0652}", # "\xD9\x92" sukuun ) ), } @taaaa ), ( map { my $fix = $_; $_->[0] . "a", $_->[1] . "\x{064E}", $_->[0] . "u", $_->[1] . "\x{064F}", $_->[0] . "i", $_->[1] . "\x{0650}", $_->[0] . "aN", $_->[1] . "\x{064B}", $_->[0] . "uN", $_->[1] . "\x{064C}", $_->[0] . "iN", $_->[1] . "\x{064D}", ( $option{'non-quoting'} ? () : ( $_->[0] . "\"a", $_->[1] . "\"\x{064E}", $_->[0] . "\"u", $_->[1] . "\"\x{064F}", $_->[0] . "\"i", $_->[1] . "\"\x{0650}", $_->[0] . "\"aN", $_->[1] . "\"\x{064B}", $_->[0] . "\"uN", $_->[1] . "\"\x{064C}", $_->[0] . "\"iN", $_->[1] . "\"\x{064D}", ) ), # non-voweled/sukuuned ( $option{'non-refined'} ? () : ( $_->[0] . "-a", $_->[1] . "\x{064E}", $_->[0] . "-u", $_->[1] . "\x{064F}", $_->[0] . "-i", $_->[1] . "\x{0650}", $_->[0] . "-aN", $_->[1] . "\x{064B}", $_->[0] . "-uN", $_->[1] . "\x{064C}", $_->[0] . "-iN", $_->[1] . "\x{064D}", ( $option{'non-quoting'} ? () : ( $_->[0] . "-\"a", $_->[1] . "\"\x{064E}", $_->[0] . "-\"u", $_->[1] . "\"\x{064F}", $_->[0] . "-\"i", $_->[1] . "\"\x{0650}", $_->[0] . "-\"aN", $_->[1] . "\"\x{064B}", $_->[0] . "-\"uN", $_->[1] . "\"\x{064C}", $_->[0] . "-\"iN", $_->[1] . "\"\x{064D}", ) ), ) ), map { ( $option{'non-refined'} ? () : ( $fix->[0] . "-a" . $_->[0], [ $fix->[1] . "\x{0652}", "a" . $_->[0] ], $fix->[0] . "-u" . $_->[0], [ $fix->[1] . "\x{0652}", "u" . $_->[0] ], $fix->[0] . "-i" . $_->[0], [ $fix->[1] . "\x{0652}", "i" . $_->[0] ], ( $option{'non-quoting'} ? () : ( $fix->[0] . "-\"a" . $_->[0], [ $fix->[1] . "\x{0652}\"", "a" . $_->[0] ], $fix->[0] . "-\"u" . $_->[0], [ $fix->[1] . "\x{0652}\"", "u" . $_->[0] ], $fix->[0] . "-\"i" . $_->[0], [ $fix->[1] . "\x{0652}\"", "i" . $_->[0] ], ) ), ) ), } @sunny, @moony, $empty[0] # @taaaa } $taaaa[0] ), # initial vowels ( $option{'non-quoting'} ? () : ( "\"", "\x{0671}", # this grapheme is mode-dependent in the next level ) ), ( map { my $fix = $_; $_->[0] . "a", $_->[1] . "\x{064E}", $_->[0] . "u", $_->[1] . "\x{064F}", $_->[0] . "i", $_->[1] . "\x{0650}", ( $option{'non-refined'} ? () : ( $_->[0] . "_a", $_->[1] . "\x{0670}", $_->[0] . "_u", $_->[1] . "\x{0657}", $_->[0] . "_i", $_->[1] . "\x{0656}", $_->[0] . "_aA", $_->[1] . "\x{0670}\x{0627}", $_->[0] . "_aY", $_->[1] . "\x{0670}\x{0649}", $_->[0] . "_aU", $_->[1] . "\x{0670}\x{0648}", $_->[0] . "_aI", $_->[1] . "\x{0670}\x{064A}", ) ), $_->[0] . "A", "\x{0627}", $_->[0] . "Y", "\x{0649}", $_->[0] . "U", $_->[1] . "\x{064F}\x{0648}", $_->[0] . "I", $_->[1] . "\x{0650}\x{064A}", $_->[0] . "Uw", [ $_->[1] . "\x{064F}\x{0648}\x{0651}", "|" ], $_->[0] . "Iy", [ $_->[1] . "\x{0650}\x{064A}\x{0651}", "|" ], ( $option{'non-refined'} ? () : ( $_->[0] . "^A", "\x{0622}", # use no equivs $_->[0] . "^U", "\x{0623}\x{064F}\x{0648}", # use no equivs $_->[0] . "^I", "\x{0625}\x{0650}\x{064A}", # use no equivs ) ), $_->[0] . "aa", [ "", $_->[0] . "A" ], $_->[0] . "uw", [ "", $_->[0] . "U" ], $_->[0] . "iy", [ "", $_->[0] . "I" ], ( $option{'non-quoting'} ? () : ( $_->[0] . "\"a", $_->[1] . "\"\x{064E}", $_->[0] . "\"u", $_->[1] . "\"\x{064F}", $_->[0] . "\"i", $_->[1] . "\"\x{0650}", ( $option{'non-refined'} ? () : ( $_->[0] . "\"_a", $_->[1] . "\"\x{0670}", $_->[0] . "\"_u", $_->[1] . "\"\x{0657}", $_->[0] . "\"_i", $_->[1] . "\"\x{0656}", $_->[0] . "\"_aA", $_->[1] . "\"\x{0670}\x{0627}", $_->[0] . "\"_aY", $_->[1] . "\"\x{0670}\x{0649}", $_->[0] . "\"_aU", $_->[1] . "\"\x{0670}\x{0648}", $_->[0] . "\"_aI", $_->[1] . "\"\x{0670}\x{064A}", ) ), $_->[0] . "\"A", $_->[1] . "\"\x{064E}\x{0627}", $_->[0] . "\"Y", $_->[1] . "\"\x{064E}\x{0649}", $_->[0] . "\"A\"", $_->[1] . "\"\x{064E}\x{0627}\"\x{0652}", $_->[0] . "\"Y\"", $_->[1] . "\"\x{064E}\x{0649}\"\x{0652}", $_->[0] . "A\"", "\x{0627}\"\x{0652}", $_->[0] . "Y\"", "\x{0649}\"\x{0652}", $_->[0] . "\"U", $_->[1] . "\"\x{064F}\x{0648}", $_->[0] . "\"I", $_->[1] . "\"\x{0650}\x{064A}", $_->[0] . "\"U\"", $_->[1] . "\"\x{064F}\x{0648}\"\x{0652}", $_->[0] . "\"I\"", $_->[1] . "\"\x{0650}\x{064A}\"\x{0652}", $_->[0] . "U\"", $_->[1] . "\x{064F}\x{0648}\"\x{0652}", $_->[0] . "I\"", $_->[1] . "\x{0650}\x{064A}\"\x{0652}", $_->[0] . "\"Uw", [ $_->[1] . "\"\x{064F}\x{0648}\x{0651}", "|" ], $_->[0] . "\"Iy", [ $_->[1] . "\"\x{0650}\x{064A}\x{0651}", "|" ], ( $option{'non-refined'} ? () : ( $_->[0] . "\"^A", "\"\x{0622}", # use no equivs $_->[0] . "\"^U", "\"\x{0623}\x{064F}\x{0648}", # use no equivs $_->[0] . "\"^I", "\"\x{0625}\x{0650}\x{064A}", # use no equivs ) ), $_->[0] . "\"aa", [ "", $_->[0] . "\"A" ], $_->[0] . "\"uw", [ "", $_->[0] . "\"U" ], $_->[0] . "\"iy", [ "", $_->[0] . "\"I" ], ) ), ( map { $fix->[0] . "uw" . $_, [ $fix->[1] . "\x{064F}", "w" . $_ ], $fix->[0] . "iy" . $_, [ $fix->[1] . "\x{0650}", "y" . $_ ], ( $option{'non-quoting'} ? () : ( $fix->[0] . "\"uw" . $_, [ $fix->[1] . "\"\x{064F}", "w" . $_ ], $fix->[0] . "\"iy" . $_, [ $fix->[1] . "\"\x{0650}", "y" . $_ ], ) ), } "\"", qw "a u i A Y U I", $option{'non-refined'} ? () : qw "_a _u _i ^A ^U ^I" ), $_->[0] . "_aA'|aN", $_->[1] . "\x{0670}\x{0627}\x{0621}\x{064B}", $_->[0] . "A'|aN", $_->[1] . "\x{064E}\x{0627}\x{0621}\x{064B}", $_->[0] . "aN", $_->[1] . "\x{064B}", $_->[0] . "uN", $_->[1] . "\x{064C}", $_->[0] . "iN", $_->[1] . "\x{064D}", ( $option{'non-quoting'} ? () : ( $_->[0] . "\"_aA'|aN", $_->[1] . "\"\x{0670}\x{0627}\x{0621}\x{064B}", $_->[0] . "\"A'|aN", $_->[1] . "\"\x{064E}\x{0627}\x{0621}\x{064B}", $_->[0] . "\"_aA'|\"aN", $_->[1] . "\"\x{0670}\x{0627}\x{0621}\"\x{064B}", $_->[0] . "\"A'|\"aN", $_->[1] . "\"\x{064E}\x{0627}\x{0621}\"\x{064B}", $_->[0] . "_aA'|\"aN", $_->[1] . "\x{0670}\x{0627}\x{0621}\"\x{064B}", $_->[0] . "A'|\"aN", $_->[1] . "\x{064E}\x{0627}\x{0621}\"\x{064B}", $_->[0] . "\"A\"'|aN", $_->[1] . "\"\x{064E}\x{0627}\"\x{0652}\x{0621}\x{064B}", $_->[0] . "\"A\"'|\"aN", $_->[1] . "\"\x{064E}\x{0627}\"\x{0652}\x{0621}\"\x{064B}", $_->[0] . "A\"'|\"aN", $_->[1] . "\x{064E}\x{0627}\"\x{0652}\x{0621}\"\x{064B}", $_->[0] . "\"aN", $_->[1] . "\"\x{064B}", $_->[0] . "\"uN", $_->[1] . "\"\x{064C}", $_->[0] . "\"iN", $_->[1] . "\"\x{064D}", ) ), } $empty[1] ), # non-notation insertion escapes provided through ':xml' ); no strict 'refs'; ${ $cls . '::decoder' } = $decoder; if ($option{'describe'}) { $_->describe('') foreach @{${ $cls . '::decoder' }}; } $cls->demode(defined ${ $cls . '::demode' } ? ${ $cls . '::demode' } : 'default'); return ${ $cls . '::decoder' }; } sub enmoder ($$@) { my ($cls, $mode) = @_; no strict 'refs'; return ${ $cls . '::encoder' }->[$mode + $enlevel] = undef; } sub demoder ($$@) { my ($cls, $mode) = @_; my $demoder = []; # rules for the fullvocalize mode $demoder->[4] = [ [ 'silent' => 0, ], "\x{0671}", "\x{0627}", "\"\x{0652}", "", "\"\x{064E}", "", "\"\x{064F}", "", "\"\x{0650}", "", "\"\x{064B}", "", "\"\x{064C}", "", "\"\x{064D}", "", "\"\x{0670}", "", "\"\x{0657}", "", "\"\x{0656}", "", "\"", "", "\x{064E}\x{0627}\"\x{0652}", "\x{064E}\x{0627}\x{0652}", "\"\x{064E}\x{0627}\"\x{0652}", "\x{0627}\x{0652}", ( ( $option{'font-fixing'} ? ( map { "\x{0644}" . $_ . "\x{064E}\x{0627}\"\x{0652}", "\x{0644}\x{0627}" . $_ . "\x{064E}\x{0652}", "\x{0644}" . $_ . "\"\x{064E}\x{0627}\"\x{0652}", "\x{0644}\x{0627}" . $_ . "\x{0652}", } "", "\x{0651}" ) : () ), ), "\x{064E}\x{0649}\"\x{0652}", "\x{064E}\x{0649}\x{0652}", "\"\x{064E}\x{0649}\"\x{0652}", "\x{0649}\x{0652}", "\x{064F}\x{0648}\"\x{0652}", "\x{064F}\x{0648}\x{0652}", "\"\x{064F}\x{0648}\"\x{0652}", "\x{0648}\x{0652}", "\x{0650}\x{064A}\"\x{0652}", "\x{0650}\x{064A}\x{0652}", "\"\x{0650}\x{064A}\"\x{0652}", "\x{064A}\x{0652}", # modern external/internal substitution with wa.sla ( map { my $vowel = $_; map { "\x{064E}" . $_ . "\x{0627}" . $vowel, [ "\x{064E}" . $_ . "\x{0671}", "\"" . $vowel ], "\x{064F}" . $_ . "\x{0627}" . $vowel, [ "\x{064F}" . $_ . "\x{0671}", "\"" . $vowel ], "\x{0650}" . $_ . "\x{0627}" . $vowel, [ "\x{0650}" . $_ . "\x{0671}", "\"" . $vowel ], "\x{064E}\x{0627}" . $_ . "\x{0627}" . $vowel, [ "\x{064E}\x{0627}" . $_ . "\x{0671}", "\"" . $vowel ], "\x{064E}\x{0649}" . $_ . "\x{0627}" . $vowel, [ "\x{064E}\x{0649}" . $_ . "\x{0671}", "\"" . $vowel ], "\x{064F}\x{0648}" . $_ . "\x{0627}" . $vowel, [ "\x{064F}\x{0648}" . $_ . "\x{0671}", "\"" . $vowel ], "\x{0650}\x{064A}" . $_ . "\x{0627}" . $vowel, [ "\x{0650}\x{064A}" . $_ . "\x{0671}", "\"" . $vowel ], # quoted "\"\x{064E}" . $_ . "\x{0627}" . $vowel, [ $_ . "\x{0671}", "\"" . $vowel ], "\"\x{064F}" . $_ . "\x{0627}" . $vowel, [ $_ . "\x{0671}", "\"" . $vowel ], "\"\x{0650}" . $_ . "\x{0627}" . $vowel, [ $_ . "\x{0671}", "\"" . $vowel ], "\"\x{064E}\x{0627}" . $_ . "\x{0627}" . $vowel, [ "\x{0627}" . $_ . "\x{0671}", "\"" . $vowel ], "\"\x{064E}\x{0649}" . $_ . "\x{0627}" . $vowel, [ "\x{0649}" . $_ . "\x{0671}", "\"" . $vowel ], "\"\x{064F}\x{0648}" . $_ . "\x{0627}" . $vowel, [ "\x{0648}" . $_ . "\x{0671}", "\"" . $vowel ], "\"\x{0650}\x{064A}" . $_ . "\x{0627}" . $vowel, [ "\x{064A}" . $_ . "\x{0671}", "\"" . $vowel ], "\"\x{064E}" . $_ . "\x{0627}\"" . $vowel, [ $_ . "\x{0627}", $vowel ], "\"\x{064F}" . $_ . "\x{0627}\"" . $vowel, [ $_ . "\x{0627}", $vowel ], "\"\x{0650}" . $_ . "\x{0627}\"" . $vowel, [ $_ . "\x{0627}", $vowel ], "\"\x{064E}\x{0627}" . $_ . "\x{0627}\"" . $vowel, [ "\x{0627}" . $_ . "\x{0627}", $vowel ], "\"\x{064E}\x{0649}" . $_ . "\x{0627}\"" . $vowel, [ "\x{0649}" . $_ . "\x{0627}", $vowel ], "\"\x{064F}\x{0648}" . $_ . "\x{0627}\"" . $vowel, [ "\x{0648}" . $_ . "\x{0627}", $vowel ], "\"\x{0650}\x{064A}" . $_ . "\x{0627}\"" . $vowel, [ "\x{064A}" . $_ . "\x{0627}", $vowel ], "\x{064E}" . $_ . "\x{0627}\"" . $vowel, [ "\x{064E}" . $_ . "\x{0627}", $vowel ], "\x{064F}" . $_ . "\x{0627}\"" . $vowel, [ "\x{064F}" . $_ . "\x{0627}", $vowel ], "\x{0650}" . $_ . "\x{0627}\"" . $vowel, [ "\x{0650}" . $_ . "\x{0627}", $vowel ], "\x{064E}\x{0627}" . $_ . "\x{0627}\"" . $vowel, [ "\x{064E}\x{0627}" . $_ . "\x{0627}", $vowel ], "\x{064E}\x{0649}" . $_ . "\x{0627}\"" . $vowel, [ "\x{064E}\x{0649}" . $_ . "\x{0627}", $vowel ], "\x{064F}\x{0648}" . $_ . "\x{0627}\"" . $vowel, [ "\x{064F}\x{0648}" . $_ . "\x{0627}", $vowel ], "\x{0650}\x{064A}" . $_ . "\x{0627}\"" . $vowel, [ "\x{0650}\x{064A}" . $_ . "\x{0627}", $vowel ], } "", "\x09", "\x0A", "\x0D", "\x20", "\x0D\x0A", "\x20\x20", "\x20\x20\x20", "\x20\x20\x20\x20" } "\x{064E}", "\x{064F}", "\x{0650}" ), # laam + 'alif .. either enforce ligatures, or shuffle the diacritics ( ( $option{'font-fixing'} ? ( map { my $alif = $_; map { my $vowel = $_; map { "\x{0644}" . $_ . $vowel . $alif, "\x{0644}" . $alif . $_ . $vowel, "\x{0644}" . $_ . "\"" . $vowel . $alif, "\x{0644}" . $alif . $_, } "", "\x{0651}" } "\x{064E}", "\x{064F}", "\x{0650}", "\x{064B}", "\x{064C}", "\x{064D}", "\x{0652}" } "\x{0622}", "\x{0623}", "\x{0625}", "\x{0627}" #, "\x{0671}" ) : () ), ), ( ( $option{'font-fixing'} ? ( map { my $vowel = $_; map { "\x{0644}" . $_ . $vowel . "\x{0671}", "\x{0644}\x{0627}" . $_ . $vowel, "\x{0644}" . $_ . "\"" . $vowel . "\x{0671}", "\x{0644}\x{0627}" . $_, } "", "\x{0651}" } "\x{064E}", "\x{064F}", "\x{0650}", "\x{064B}", "\x{064C}", "\x{064D}", "\x{0652}" ) : () ), ), # laam + vowel + 'alif + vowel .. internal substitution with wa.sla ( ( $option{'font-fixing'} ? ( map { my $double = $_; map { my $vowel = $_; map { "\x{0644}" . $double . $vowel . "\x{0627}" . $_, [ "\x{0644}\x{0671}" . $double . $vowel, "\"" . $_ ], # quoted "\x{0644}" . $double . "\"" . $vowel . "\x{0627}" . $_, [ "\x{0644}\x{0671}" . $double, "\"" . $_ ], "\x{0644}" . $double . "\"" . $vowel . "\x{0627}\"" . $_, [ "\x{0644}\x{0627}" . $double, $_ ], "\x{0644}" . $double . $vowel . "\x{0627}\"" . $_, [ "\x{0644}\x{0627}" . $double . $vowel, $_ ], } "\x{064E}", "\x{064F}", "\x{0650}" } "\x{064E}", "\x{064F}", "\x{0650}" } "", "\x{0651}" ) : () ), ), # optional ligatures to enforce here ]; # rules for the vocalize mode $demoder->[3] = [ [ 'silent' => 0, ], "\"\x{0652}", "\x{0652}", "\"\x{064E}", "", "\"\x{064F}", "", "\"\x{0650}", "", "\"\x{064B}", "", "\"\x{064C}", "", "\"\x{064D}", "", "\"\x{0670}", "", "\"\x{0657}", "", "\"\x{0656}", "", "\x{0652}", "", "\"", "", # modern external/internal substitution with wa.sla ( map { my $vowel = $_; map { "\x{064E}" . $_ . "\x{0627}" . $vowel, [ "\x{064E}" . $_ . "\x{0627}", "\"" . $vowel ], "\x{064F}" . $_ . "\x{0627}" . $vowel, [ "\x{064F}" . $_ . "\x{0627}", "\"" . $vowel ], "\x{0650}" . $_ . "\x{0627}" . $vowel, [ "\x{0650}" . $_ . "\x{0627}", "\"" . $vowel ], "\x{064E}\x{0627}" . $_ . "\x{0627}" . $vowel, [ "\x{064E}\x{0627}" . $_ . "\x{0627}", "\"" . $vowel ], "\x{064E}\x{0649}" . $_ . "\x{0627}" . $vowel, [ "\x{064E}\x{0649}" . $_ . "\x{0627}", "\"" . $vowel ], "\x{064F}\x{0648}" . $_ . "\x{0627}" . $vowel, [ "\x{064F}\x{0648}" . $_ . "\x{0627}", "\"" . $vowel ], "\x{0650}\x{064A}" . $_ . "\x{0627}" . $vowel, [ "\x{0650}\x{064A}" . $_ . "\x{0627}", "\"" . $vowel ], # quoted "\"\x{064E}" . $_ . "\x{0627}" . $vowel, [ $_ . "\x{0627}", "\"" . $vowel ], "\"\x{064F}" . $_ . "\x{0627}" . $vowel, [ $_ . "\x{0627}", "\"" . $vowel ], "\"\x{0650}" . $_ . "\x{0627}" . $vowel, [ $_ . "\x{0627}", "\"" . $vowel ], "\"\x{064E}\x{0627}" . $_ . "\x{0627}" . $vowel, [ "\x{0627}" . $_ . "\x{0627}", "\"" . $vowel ], "\"\x{064E}\x{0649}" . $_ . "\x{0627}" . $vowel, [ "\x{0649}" . $_ . "\x{0627}", "\"" . $vowel ], "\"\x{064F}\x{0648}" . $_ . "\x{0627}" . $vowel, [ "\x{0648}" . $_ . "\x{0627}", "\"" . $vowel ], "\"\x{0650}\x{064A}" . $_ . "\x{0627}" . $vowel, [ "\x{064A}" . $_ . "\x{0627}", "\"" . $vowel ], "\"\x{064E}" . $_ . "\x{0627}\"" . $vowel, [ $_ . "\x{0627}", $vowel ], "\"\x{064F}" . $_ . "\x{0627}\"" . $vowel, [ $_ . "\x{0627}", $vowel ], "\"\x{0650}" . $_ . "\x{0627}\"" . $vowel, [ $_ . "\x{0627}", $vowel ], "\"\x{064E}\x{0627}" . $_ . "\x{0627}\"" . $vowel, [ "\x{0627}" . $_ . "\x{0627}", $vowel ], "\"\x{064E}\x{0649}" . $_ . "\x{0627}\"" . $vowel, [ "\x{0649}" . $_ . "\x{0627}", $vowel ], "\"\x{064F}\x{0648}" . $_ . "\x{0627}\"" . $vowel, [ "\x{0648}" . $_ . "\x{0627}", $vowel ], "\"\x{0650}\x{064A}" . $_ . "\x{0627}\"" . $vowel, [ "\x{064A}" . $_ . "\x{0627}", $vowel ], "\x{064E}" . $_ . "\x{0627}\"" . $vowel, [ "\x{064E}" . $_ . "\x{0627}", $vowel ], "\x{064F}" . $_ . "\x{0627}\"" . $vowel, [ "\x{064F}" . $_ . "\x{0627}", $vowel ], "\x{0650}" . $_ . "\x{0627}\"" . $vowel, [ "\x{0650}" . $_ . "\x{0627}", $vowel ], "\x{064E}\x{0627}" . $_ . "\x{0627}\"" . $vowel, [ "\x{064E}\x{0627}" . $_ . "\x{0627}", $vowel ], "\x{064E}\x{0649}" . $_ . "\x{0627}\"" . $vowel, [ "\x{064E}\x{0649}" . $_ . "\x{0627}", $vowel ], "\x{064F}\x{0648}" . $_ . "\x{0627}\"" . $vowel, [ "\x{064F}\x{0648}" . $_ . "\x{0627}", $vowel ], "\x{0650}\x{064A}" . $_ . "\x{0627}\"" . $vowel, [ "\x{0650}\x{064A}" . $_ . "\x{0627}", $vowel ], } "", "\x09", "\x0A", "\x0D", "\x20", "\x0D\x0A", "\x20\x20", "\x20\x20\x20", "\x20\x20\x20\x20" } "\x{064E}", "\x{064F}", "\x{0650}" ), # laam + 'alif .. either enforce ligatures, or shuffle the diacritics ( ( $option{'font-fixing'} ? ( map { my $alif = $_; map { my $vowel = $_; map { "\x{0644}" . $_ . $vowel . $alif, "\x{0644}" . $alif . $_ . $vowel, "\x{0644}" . $_ . "\"" . $vowel . $alif, "\x{0644}" . $alif . $_, } "", "\x{0651}" } "\x{064E}", "\x{064F}", "\x{0650}", "\x{064B}", "\x{064C}", "\x{064D}", # "\x{0652}" } "\x{0622}", "\x{0623}", "\x{0625}", "\x{0627}", "\x{0671}" ) : () ), ), ( ( $option{'font-fixing'} ? ( map { my $alif = $_; map { "\x{0644}" . $_ . "\x{0652}" . $alif, "\x{0644}" . $alif . $_, "\x{0644}" . $_ . "\"\x{0652}" . $alif, "\x{0644}" . $alif . $_ . "\x{0652}", } "", "\x{0651}" } "\x{0622}", "\x{0623}", "\x{0625}", "\x{0627}", "\x{0671}" ) : () ), ), # laam + vowel + 'alif + vowel .. internal substitution with wa.sla ( ( $option{'font-fixing'} ? ( map { my $double = $_; map { my $vowel = $_; map { "\x{0644}" . $double . $vowel . "\x{0627}" . $_, [ "\x{0644}\x{0627}" . $double . $vowel, "\"" . $_ ], # quoted "\x{0644}" . $double . "\"" . $vowel . "\x{0627}" . $_, [ "\x{0644}\x{0627}" . $double, "\"" . $_ ], "\x{0644}" . $double . "\"" . $vowel . "\x{0627}\"" . $_, [ "\x{0644}\x{0627}" . $double, $_ ], "\x{0644}" . $double . $vowel . "\x{0627}\"" . $_, [ "\x{0644}\x{0627}" . $double . $vowel, $_ ], } "\x{064E}", "\x{064F}", "\x{0650}" } "\x{064E}", "\x{064F}", "\x{0650}" } "", "\x{0651}" ) : () ), ), # optional ligatures to enforce here ]; # rules for the novocalize mode $demoder->[2] = [ [ 'silent' => 0, ], "\"\x{0652}", "\x{0652}", "\"\x{064E}", "\x{064E}", "\"\x{064F}", "\x{064F}", "\"\x{0650}", "\x{0650}", "\"\x{064B}", "\x{064B}", "\"\x{064C}", "\x{064C}", "\"\x{064D}", "\x{064D}", "\"\x{0670}", "\x{0670}", "\"\x{0657}", "\x{0657}", "\"\x{0656}", "\x{0656}", "\x{0652}", "", "\x{064E}", "", "\x{064F}", "", "\x{0650}", "", "\x{064B}", "", "\x{064C}", "", "\x{064D}", "", "\x{0670}", "", "\x{0657}", "", "\x{0656}", "", "\"", "", # modern internal substitution with "fictitious" wa.sla .. lam + vowel + 'alif + vowel below # modern external substitution with "fictitious" wa.sla # laam + 'alif .. either enforce ligatures, or shuffle the diacritics ( ( $option{'font-fixing'} ? ( map { my $alif = $_; map { my $vowel = $_; map { "\x{0644}" . $_ . $vowel . $alif, "\x{0644}" . $alif . $_, "\x{0644}" . $_ . "\"" . $vowel . $alif, "\x{0644}" . $alif . $_ . $vowel, } "", "\x{0651}" } "\x{064E}", "\x{064F}", "\x{0650}", "\x{064B}", "\x{064C}", "\x{064D}", "\x{0652}" } "\x{0622}", "\x{0623}", "\x{0625}", "\x{0627}", "\x{0671}" ) : () ), ), # laam + vowel + 'alif + vowel .. internal substitution with wa.sla # optional ligatures to enforce here ]; # rules for the noshadda mode $demoder->[1] = [ [ 'silent' => 0, ], ]; # original no-quotes rules $demoder->[0] = [ [ 'silent' => 0, ], # modern internal substitution with wa.sla .. lam + vowel + 'alif + vowel below ( map { my $vowel = $_; map { $vowel . "\x{0627}" . $_, $vowel . "\x{0671}", } "\x{064E}", "\x{064F}", "\x{0650}" } "\x{064E}", "\x{064F}", "\x{0650}" ), # modern external substitution with wa.sla ( map { my $vowel = $_; map { "\x{064E}" . $_ . "\x{0627}" . $vowel, [ "\x{064E}" . $_, "\x{0671}" ], "\x{064F}" . $_ . "\x{0627}" . $vowel, [ "\x{064F}" . $_, "\x{0671}" ], "\x{0650}" . $_ . "\x{0627}" . $vowel, [ "\x{0650}" . $_, "\x{0671}" ], "\x{064E}\x{0627}" . $_ . "\x{0627}" . $vowel, [ "\x{064E}\x{0627}" . $_, "\x{0671}" ], "\x{064E}\x{0649}" . $_ . "\x{0627}" . $vowel, [ "\x{064E}\x{0649}" . $_, "\x{0671}" ], "\x{064F}\x{0648}" . $_ . "\x{0627}" . $vowel, [ "\x{064F}\x{0648}" . $_, "\x{0671}" ], "\x{0650}\x{064A}" . $_ . "\x{0627}" . $vowel, [ "\x{0650}\x{064A}" . $_, "\x{0671}" ], } "\x09", "\x0A", "\x0D", "\x20", "\x0D\x0A", "\x20\x20", "\x20\x20\x20", "\x20\x20\x20\x20" } "\x{064E}", "\x{064F}", "\x{0650}" ), # laam + 'alif .. either enforce ligatures, or shuffle the diacritics ( ( $option{'font-fixing'} ? ( map { my $alif = $_; map { my $vowel = $_; map { "\x{0644}" . $_ . $vowel . $alif, "\x{0644}" . $alif . $_ . $vowel, } "", "\x{0651}" } "\x{064E}", "\x{064F}", "\x{0650}", "\x{064B}", "\x{064C}", "\x{064D}", "\x{0652}" } "\x{0622}", "\x{0623}", "\x{0625}", "\x{0627}", "\x{0671}" ) : () ), ), # laam + vowel + 'alif + vowel .. internal substitution with wa.sla ( ( $option{'font-fixing'} ? ( map { my $double = $_; map { my $vowel = $_; map { "\x{0644}" . $double . $vowel . "\x{0627}" . $_, "\x{0644}" . "\x{0671}" . $double . $vowel, } "\x{064E}", "\x{064F}", "\x{0650}" } "\x{064E}", "\x{064F}", "\x{0650}" } "", "\x{0651}" ) : () ), ), # optional ligatures to enforce here ]; no strict 'refs'; ${ $cls . '::decoder' }->[$mode + $delevel] = Encode::Mapper->compile(@{$demoder->[$mode]}); ${ $cls . '::decoder' }->[$mode + $delevel]->describe('') if $option{'describe'}; return ${ $cls . '::decoder' }->[$mode + $delevel]; } 1; __END__ =head1 NAME Encode::Arabic::ArabTeX::Verbatim - Interpreter of the Verbatim variant of the ArabTeX notation =head1 REVISION $Revision: 717 $ $Date: 2008-10-03 00:28:12 +0200 (Fri, 03 Oct 2008) $ =head1 SYNOPSIS use Encode::Arabic::ArabTeX::Verbatim; # imports just like 'use Encode' would, plus extended options while ($line = <>) { # maps the ArabTeX notation for Arabic into the Arabic script print encode 'utf8', decode 'arabtex-verb', $line; # 'ArabTeX-Verbatim' alias 'ArabTeX-Verb' } # ArabTeX language-dependent transliteration <--> ArabTeX verbatim transliteration $string = decode 'ArabTeX-Verbatim', $octets; $octets = encode 'ArabTeX-Verbatim', $string; =head1 DESCRIPTION ArabTeX is an excellent extension to TeX/LaTeX designed for typesetting the right-to-left scripts of the Orient. It comes up with very intuitive and comprehensible lower ASCII transliterations, the expressive power of which is even better than that of the scripts. L implements the rules needed for proper interpretation of the ArabTeX verbatim notation, which is discussed in the ArabTeX User Manual. The conversion ifself is done by L, and the user interface is built on the L module. Relevant guidance is given in L, from which this module inherits. The transformation rules are, however, quite different ;) This work is presented in its B! =head1 SEE ALSO L, L, L, L, L ArabTeX system L Klaus Lagally L =head1 AUTHOR Otakar Smrz, L eval { 'E' . ( join '.', qw 'otakar smrz' ) . "\x40" . ( join '.', qw 'mff cuni cz' ) . 'E' } Perl is also designed to make the easy jobs not that easy ;) =head1 COPYRIGHT AND LICENSE Copyright 2005-2008 by Otakar Smrz This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Encode-Arabic-1.9/lib/Encode/Arabic/Parkinson.pm0000444001432400135600000002107411144136727020104 0ustar smrzufal# ###################################################################### Otakar Smrz, 2003/01/23 # # Encoding of Arabic: Dil Parkinson's Notation ###################################### 2006/02/03 # $Id: Parkinson.pm 179 2007-01-14 00:23:25Z smrz $ package Encode::Arabic::Parkinson; use 5.008; use strict; use warnings; use Scalar::Util 'blessed'; our $VERSION = do { q $Revision: 179 $ =~ /(\d+)/; sprintf "%4.2f", $1 / 100 }; use Encode::Encoding; use base 'Encode::Encoding'; __PACKAGE__->Define('Parkinson'); our $enmode; our $demode; our $optxml; our %modemap = ( 'default' => 0, 'undef' => 0, 'fullvocalize' => 0, 'full' => 0, 'nowasla' => 4, 'vocalize' => 3, 'nosukuun' => 3, 'novocalize' => 2, 'novowels' => 2, 'none' => 2, 'noshadda' => 1, 'noneplus' => 1, ); # use subs 'encoder', 'decoder'; # ignores later prototypes sub encoder ($); # respect prototypes sub decoder ($); # respect prototypes sub import { # perform import as if Encode were used one level before this module $optxml = defined $_[1] && $_[1] eq ':xml' ? 1 : 0; __PACKAGE__->enmode('full'); __PACKAGE__->demode('full'); splice @_, 1, 1; require Encode; Encode->export_to_level(1, @_); } sub encode ($$;$) { my (undef, $text, $check) = @_; $_[1] = '' if $check; # needed by in-place edit return encoder $text; } sub decode ($$;$) { my (undef, $text, $check) = @_; $_[1] = '' if $check; # needed by in-place edit return decoder $text; } sub enmode ($$;$$) { my ($cls, $mode, $xml, $kshd) = @_; $cls = blessed $cls if ref $cls; $xml = $optxml unless defined $xml; $mode = 'undef' unless defined $mode; $mode = $modemap{$mode} if exists $modemap{$mode}; no strict 'refs'; my $return = ${ $cls . '::enmode' }; if (defined $mode) { ${ $cls . '::enmode' } = $mode; my @set = ( ( $kshd ? '' : q [\x{0640}] ) . q [\x{0623}\x{0624}\x{0625}] . q [\x{060C}\x{061B}\x{061F}] . q [\x{0621}\x{0622}\x{0626}-\x{063A}\x{0641}-\x{064A}] . # q [\x{067E}\x{0686}\x{0698}\x{06A4}\x{06AF}] . q [\x{0660}-\x{0669}] . q [\x{0671}] . q [\x{0651}] . q [\x{064B}-\x{0650}\x{0670}] . q [\x{0652}] . ( $kshd ? q [\x{0640}] : '' ) , ( $kshd ? '' : q [_] ) . ( $xml ? q [LWE] : q [LWE] ) . q [,;?] . q [CMYAbQtVjHxdvrzspSDTZcgfqklmnhwey] . # q [PJRVG] . q [0-9] . ( $mode == 0 ? q [O] : q [A] ) . ( $mode == 1 ? '' : q [~] . ( $mode == 2 ? '' : q [NUIauiR] . ( $mode == 3 ? '' : q [o] ) ) ) ); undef &encoder; eval q / sub encoder ($) { $_[0] =~ tr[/ . $set[0] . q /] [/ . $set[1] . q /]d; return $_[0]; } /; } return $return; } sub demode ($$;$$) { my ($cls, $mode, $xml, $kshd) = @_; $cls = blessed $cls if ref $cls; $xml = $optxml unless defined $xml; $mode = 'undef' unless defined $mode; $mode = $modemap{$mode} if exists $modemap{$mode}; no strict 'refs'; my $return = ${ $cls . '::demode' }; if (defined $mode) { ${ $cls . '::demode' } = $mode; my @set = ( ( $kshd ? '' : q [_] ) . ( $xml ? q [LWE] : q [LWE] ) . q [,;?] . q [CMYAbQtVjHxdvrzspSDTZcgfqklmnhwey] . # q [PJRVG] . q [0-9] . q [O] . q [~] . q [NUIauiR] . q [o] . ( $kshd ? q [_] : '' ) , ( $kshd ? '' : q [\x{0640}] ) . q [\x{0623}\x{0624}\x{0625}] . q [\x{060C}\x{061B}\x{061F}] . q [\x{0621}\x{0622}\x{0626}-\x{063A}\x{0641}-\x{064A}] . # q [\x{067E}\x{0686}\x{0698}\x{06A4}\x{06AF}] . q [\x{0660}-\x{0669}] . ( $mode == 0 ? q [\x{0671}] : q [\x{0627}] ) . ( $mode == 1 ? '' : q [\x{0651}] . ( $mode == 2 ? '' : q [\x{064B}-\x{0650}\x{0670}] . ( $mode == 3 ? '' : q [\x{0652}] ) ) ) ); undef &decoder; eval q / sub decoder ($) { $_[0] =~ tr[/ . $set[0] . q /] [/ . $set[1] . q /]d; return $_[0]; } /; } return $return; } 1; __END__ =head1 NAME Encode::Arabic::Parkinson - Dil Parkinson's transliteration of Arabic =head1 REVISION $Revision: 179 $ $Date: 2007-01-14 01:23:25 +0100 (Sun, 14 Jan 2007) $ =head1 SYNOPSIS use Encode::Arabic::Parkinson; # imports just like 'use Encode' would, plus more while ($line = <>) { # Dil Parkinson's mapping into the Arabic script print encode 'utf8', decode 'parkinson', $line; } # shell filter of data, e.g. in *n*x systems instead of viewing the Arabic script proper % perl -MEncode::Arabic::Parkinson -pe '$_ = encode "parkinson", decode "utf8", $_' # employing the modes of conversion for filtering and trimming Encode::Arabic::enmode 'parkinson', 'nosukuun', 'LWE xml'; Encode::Arabic::Parkinson->demode(undef, undef, 'strip _'); $decode = "AiqoraLo hRvaA Ol_n~a_S~a bi___OnotibaAhI."; $encode = encode 'parkinson', decode 'parkinson', $decode; # $encode eq "AiqraL hRvaA Aln~aS~a biAntibaAhI." =head1 DESCRIPTION Dil Parkinson's notation is a one-to-one transliteration of the Arabic script for Modern Standard Arabic, using lower ASCII characters to encode the graphemes of the original script. =head2 IMPLEMENTATION Similar to that in L. =head2 EXPORTS & MODES The module exports as if C also appeared in the package. The other C options are just delegated to L and imports performed properly. The B of this module allow to override the setting of the C<:xml> option, in addition to filtering out diacritical marks and stripping off I. The modes and aliases relate like this: our %Encode::Arabic::Parkinson::modemap = ( 'default' => 0, 'undef' => 0, 'fullvocalize' => 0, 'full' => 0, 'nowasla' => 4, 'vocalize' => 3, 'nosukuun' => 3, 'novocalize' => 2, 'novowels' => 2, 'none' => 2, 'noshadda' => 1, 'noneplus' => 1, ); =over =item enmode (I<$obj,> $mode, $xml, $kshd) =item demode (I<$obj,> $mode, $xml, $kshd) These methods can be invoked directly or through the respective functions of L. The meaning of the extra parameters follows from the L. =back =head1 SEE ALSO L, L, L Xerox Arabic Home Page L =head1 AUTHOR Otakar Smrz, L eval { 'E' . ( join '.', qw 'otakar smrz' ) . "\x40" . ( join '.', qw 'mff cuni cz' ) . 'E' } Perl is also designed to make the easy jobs not that easy ;) =head1 COPYRIGHT AND LICENSE Copyright 2006-2007 by Otakar Smrz This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Encode-Arabic-1.9/lib/Encode/Arabic/Buckwalter.pm0000444001432400135600000002423011144136727020240 0ustar smrzufal# ###################################################################### Otakar Smrz, 2003/01/23 # # Encoding of Arabic: Tim Buckwalter's Notation ##################################### 2003/06/19 # $Id: Buckwalter.pm 179 2007-01-14 00:23:25Z smrz $ package Encode::Arabic::Buckwalter; use 5.008; use strict; use warnings; use Scalar::Util 'blessed'; our $VERSION = do { q $Revision: 179 $ =~ /(\d+)/; sprintf "%4.2f", $1 / 100 }; use Encode::Encoding; use base 'Encode::Encoding'; __PACKAGE__->Define('Buckwalter', 'Tim'); our $enmode; our $demode; our $optxml; our %modemap = ( 'default' => 0, 'undef' => 0, 'fullvocalize' => 0, 'full' => 0, 'nowasla' => 4, 'vocalize' => 3, 'nosukuun' => 3, 'novocalize' => 2, 'novowels' => 2, 'none' => 2, 'noshadda' => 1, 'noneplus' => 1, ); # use subs 'encoder', 'decoder'; # ignores later prototypes sub encoder ($); # respect prototypes sub decoder ($); # respect prototypes sub import { # perform import as if Encode were used one level before this module $optxml = defined $_[1] && $_[1] eq ':xml' ? 1 : 0; __PACKAGE__->enmode('full'); __PACKAGE__->demode('full'); splice @_, 1, 1; require Encode; Encode->export_to_level(1, @_); } sub encode ($$;$) { my (undef, $text, $check) = @_; $_[1] = '' if $check; # needed by in-place edit return encoder $text; } sub decode ($$;$) { my (undef, $text, $check) = @_; $_[1] = '' if $check; # needed by in-place edit return decoder $text; } sub enmode ($$;$$) { my ($cls, $mode, $xml, $kshd) = @_; $cls = blessed $cls if ref $cls; $xml = $optxml unless defined $xml; $mode = 'undef' unless defined $mode; $mode = $modemap{$mode} if exists $modemap{$mode}; no strict 'refs'; my $return = ${ $cls . '::enmode' }; if (defined $mode) { ${ $cls . '::enmode' } = $mode; my @set = ( ( $kshd ? '' : q [\x{0640}] ) . q [\x{0623}\x{0624}\x{0625}] . q [\x{060C}\x{061B}\x{061F}] . q [\x{0621}\x{0622}\x{0626}-\x{063A}\x{0641}-\x{064A}] . q [\x{067E}\x{0686}\x{0698}\x{06A4}\x{06AF}] . q [\x{0660}-\x{0669}] . q [\x{0671}] . q [\x{0651}] . q [\x{064B}-\x{0650}\x{0670}] . q [\x{0652}] . ( $kshd ? q [\x{0640}] : '' ) , ( $kshd ? '' : q [_] ) . ( $xml ? q [OWI] : q [>&<] ) . q [,;?] . q ['|}AbptvjHxd*rzs$SDTZEgfqklmnhwYy] . q [PJRVG] . q [0-9] . ( $mode == 0 ? q [{] : q [A] ) . ( $mode == 1 ? '' : q [~] . ( $mode == 2 ? '' : q [FNKaui`] . ( $mode == 3 ? '' : q [o] ) ) ) ); undef &encoder; eval q / sub encoder ($) { $_[0] =~ tr[/ . $set[0] . q /] [/ . $set[1] . q /]d; return $_[0]; } /; } return $return; } sub demode ($$;$$) { my ($cls, $mode, $xml, $kshd) = @_; $cls = blessed $cls if ref $cls; $xml = $optxml unless defined $xml; $mode = 'undef' unless defined $mode; $mode = $modemap{$mode} if exists $modemap{$mode}; no strict 'refs'; my $return = ${ $cls . '::demode' }; if (defined $mode) { ${ $cls . '::demode' } = $mode; my @set = ( ( $kshd ? '' : q [_] ) . ( $xml ? q [OWI] : q [>&<] ) . q [,;?] . q ['|}AbptvjHxd*rzs$SDTZEgfqklmnhwYy] . q [PJRVG] . q [0-9] . q [{] . q [~] . q [FNKaui`] . q [o] . ( $kshd ? q [_] : '' ) , ( $kshd ? '' : q [\x{0640}] ) . q [\x{0623}\x{0624}\x{0625}] . q [\x{060C}\x{061B}\x{061F}] . q [\x{0621}\x{0622}\x{0626}-\x{063A}\x{0641}-\x{064A}] . q [\x{067E}\x{0686}\x{0698}\x{06A4}\x{06AF}] . q [\x{0660}-\x{0669}] . ( $mode == 0 ? q [\x{0671}] : q [\x{0627}] ) . ( $mode == 1 ? '' : q [\x{0651}] . ( $mode == 2 ? '' : q [\x{064B}-\x{0650}\x{0670}] . ( $mode == 3 ? '' : q [\x{0652}] ) ) ) ); undef &decoder; eval q / sub decoder ($) { $_[0] =~ tr[/ . $set[0] . q /] [/ . $set[1] . q /]d; return $_[0]; } /; } return $return; } 1; __END__ =head1 NAME Encode::Arabic::Buckwalter - Tim Buckwalter's transliteration of Arabic =head1 REVISION $Revision: 179 $ $Date: 2007-01-14 01:23:25 +0100 (Sun, 14 Jan 2007) $ =head1 SYNOPSIS use Encode::Arabic::Buckwalter; # imports just like 'use Encode' would, plus more while ($line = <>) { # Tim Buckwalter's mapping into the Arabic script print encode 'utf8', decode 'buckwalter', $line; # 'Buckwalter' alias 'Tim' } # shell filter of data, e.g. in *n*x systems instead of viewing the Arabic script proper % perl -MEncode::Arabic::Buckwalter -pe '$_ = encode "buckwalter", decode "utf8", $_' # employing the modes of conversion for filtering and trimming Encode::Arabic::enmode 'buckwalter', 'nosukuun', '>&< xml'; Encode::Arabic::Buckwalter->demode(undef, undef, 'strip _'); $decode = "Aiqora>o h`*aA {l_n~a_S~a bi___{notibaAhK."; $encode = encode 'buckwalter', decode 'buckwalter', $decode; # $encode eq "AiqraO h`*aA Aln~aS~a biAntibaAhK." =head1 DESCRIPTION Tim Buckwalter's notation is a one-to-one transliteration of the Arabic script for Modern Standard Arabic, using lower ASCII characters to encode the graphemes of the original script. This system has been very popular in Natural Language Processing, however, there are limits to its applicability due to numerous non-alphabetic codes involved. =head2 IMPLEMENTATION The module takes care of the L programming interface, while the effective code is Tim Buckwalter's Cick: $encode =~ tr[\x{060C}\x{061B}\x{061F}\x{0621}-\x{063A}\x{0640}-\x{0652} # !! no break in true perl !! \x{0670}\x{0671}\x{067E}\x{0686}\x{0698}\x{06A4}\x{06AF}\x{0660}-\x{0669}] [,;?'|>&<}AbptvjHxd*rzs$SDTZEg_fqklmnhwYyFNKaui~o`{PJRVG0-9]; $decode =~ tr[,;?'|>&<}AbptvjHxd*rzs$SDTZEg_fqklmnhwYyFNKaui~o`{PJRVG0-9] [\x{060C}\x{061B}\x{061F}\x{0621}-\x{063A}\x{0640}-\x{0652} # !! no break in true perl !! \x{0670}\x{0671}\x{067E}\x{0686}\x{0698}\x{06A4}\x{06AF}\x{0660}-\x{0669}]; =head2 EXPORTS & MODES If the first element in the list to C is C<:xml>, the alternative mapping is introduced that suits the B. This option is there only to replace the C<< >&< >> reserved characters by C while still having a one-to-one notation. There is no XML parsing involved, and the markup would get distorted if subject to C! $using_xml = eval q { use Encode::Arabic::Buckwalter ':xml'; decode 'buckwalter', 'OWI' }; $classical = eval q { use Encode::Arabic::Buckwalter; decode 'buckwalter', '>&<' }; # $classical eq $using_xml and $classical eq "\x{0623}\x{0624}\x{0625}" The module exports as if C also appeared in the package. The other C options are just delegated to L and imports performed properly. The B of this module allow to override the setting of the C<:xml> option, in addition to filtering out diacritical marks and stripping off I. The modes and aliases relate like this: our %Encode::Arabic::Buckwalter::modemap = ( 'default' => 0, 'undef' => 0, 'fullvocalize' => 0, 'full' => 0, 'nowasla' => 4, 'vocalize' => 3, 'nosukuun' => 3, 'novocalize' => 2, 'novowels' => 2, 'none' => 2, 'noshadda' => 1, 'noneplus' => 1, ); =over =item enmode (I<$obj,> $mode, $xml, $kshd) =item demode (I<$obj,> $mode, $xml, $kshd) These methods can be invoked directly or through the respective functions of L. The meaning of the extra parameters follows from the L. =back =head1 SEE ALSO L, L, L Tim Buckwalter's Qamus L Buckwalter Arabic Morphological Analyzer L Xerox Arabic Home Page L =head1 AUTHOR Otakar Smrz, L eval { 'E' . ( join '.', qw 'otakar smrz' ) . "\x40" . ( join '.', qw 'mff cuni cz' ) . 'E' } Perl is also designed to make the easy jobs not that easy ;) =head1 COPYRIGHT AND LICENSE Copyright 2003-2007 by Otakar Smrz This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Encode-Arabic-1.9/lib/Encode/Arabic/ArabTeX.pm0000444001432400135600000033663011144136727017435 0ustar smrzufal# ##################################################################### Otakar Smrz, 2003/01/23 # # Encoding of Arabic: ArabTeX Notation by Klaus Lagally ############################ 2003/06/19 # $Id: ArabTeX.pm 717 2008-10-02 22:28:12Z smrz $ package Encode::Arabic::ArabTeX; use 5.008; use strict; use warnings; use Scalar::Util 'blessed'; use Carp; our $VERSION = do { q $Revision: 717 $ =~ /(\d+)/; sprintf "%4.2f", $1 / 100 }; use Encode::Encoding; use base 'Encode::Encoding'; __PACKAGE__->Define('ArabTeX', 'Lagally', 'TeX'); use Encode::Mapper ':others', ':silent', ':join'; our %options; # records of options per package .. global register our %option; # options of the caller package .. used with local our $enmode; our $demode; our $enlevel = 2; our $delevel = 3; our %modemap = ( 'default' => 3, 'undef' => 0, 'fullvocalize' => 4, 'full' => 4, 'vocalize' => 3, 'nosukuun' => 3, 'novocalize' => 2, 'novowels' => 2, 'none' => 2, 'noshadda' => 1, 'noneplus' => 1, ); sub import { # perform import as if Encode were used one level before this module if (defined $_[1] and $_[1] eq ':xml') { # interfere little with possible Encode's options Encode::Mapper->options ( 'override' => [ # override rules of these LHS .. no other tricks ^^ ( # combinations of '<' and '>' with the other bytes map { my $x = chr $_; "<" . $x, [ "<" . $x, ">" ], # propagate the '>' sign implying .. ">" . $x, [ $x, ">" ], # .. preservation of the bytes } 0x00 .. 0x3B, 0x3D, 0x3F .. 0xFF ), ">>", ">", # stop the whole process .. "<>", "<>", # .. do not even start it "><", [ "<", ">" ], # rather than nested '<' and '>', .. "<<", [ "<<", ">" ], ">\\<", [ "<", ">" ], # .. prefer these escape sequences ">\\\\", [ "\\", ">" ], ">\\>", [ ">", ">" ], ">", ">", # singular symbols may migrate right .. "<", "<", # .. or preserve the rest of the data ] ); splice @_, 1, 1; } if (defined $_[1] and $_[1] eq ':simple') { __PACKAGE__->options($_[1]); splice @_, 1, 1; } if (defined $_[1] and $_[1] eq ':describe') { __PACKAGE__->options($_[1]); splice @_, 1, 1; } require Encode; Encode->export_to_level(1, @_); # here comes the only trick ^^ } sub options ($%) { my $cls = shift @_; my ($i, $opt, %opt); my @returns = %option; while (@_) { $opt = lc shift @_; if ($opt =~ /^\:/) { $opt eq ':simple' and $opt{'non-quoting'} = 1 and $opt{'non-refined'} = 1 and next; $opt eq ':describe' and $opt{'describe'} = 1 and next; } else { $opt =~ /^\-*(.*)$/; $opt{$1} = shift @_; } } return %opt unless defined $cls; $option{$_} = $opt{$_} foreach keys %opt; return @returns; } sub encode ($$;$) { my ($cls, $text, $check) = @_; $cls = blessed $cls if ref $cls; no strict 'refs'; $cls->encoder() unless defined ${ $cls . '::encoder' }; return Encode::Mapper->encode($text, ${ $cls . '::encoder' }, undef); } sub decode ($$;$) { my ($cls, $text, $check) = @_; $cls = blessed $cls if ref $cls; no strict 'refs'; $cls->decoder() unless defined ${ $cls . '::decoder' }; return Encode::Mapper->decode($text, ${ $cls . '::decoder' }, 'utf8'); } sub encoder ($@) { my $cls = shift @_; my $encoder = $cls->eecoder('encoder', @_); return $encoder unless defined $encoder and $encoder == -1; $encoder = []; $encoder->[0] = Encode::Mapper->compile ( [ 'silent' => 0, ], ( map { chr 0x0660 + $_, "" . $_, } 0 .. 9 ), "\x{064B}", "aN", # 240 "\xF0", # "\xD9\x8B" "\x{064C}", "uN", # 241 "\xF1", # "\xD9\x8C" "\x{064D}", "iN", # 242 "\xF2", # "\xD9\x8D" "\x{064E}", "a", # 243 "\xF3", # "\xD9\x8E" "\x{064F}", "u", # 245 "\xF5", # "\xD9\x8F" "\x{0650}", "i", # 246 "\xF6", # "\xD9\x90" "\x{0670}", "_a", "\x{0657}", "_u", "\x{0656}", "_i", "\x{060C}", ",", # 161 "\xA1", # "\xD8\x8C" right-to-left-comma "\x{061B}", ";", # 186 "\xBA", # "\xD8\x9B" right-to-left-semicolon "\x{061F}", "?", # 191 "\xBF", # "\xD8\x9F" right-to-left-question-mark "\x{0621}", "'", # 193 "\xC1", # "\xD8\xA1" hamza-on-the-line "\x{0622}", "'A", # 194 "\xC2", # "\xD8\xA2" madda-over-'alif "\x{0623}", "'", # 195 "\xC3", # "\xD8\xA3" hamza-over-'alif "\x{0624}", "'", # 196 "\xC4", # "\xD8\xA4" hamza-over-waaw "\x{0625}", "'", # 197 "\xC5", # "\xD8\xA5" hamza-under-'alif "\x{0626}", "'", # 198 "\xC6", # "\xD8\xA6" hamza-over-yaa' "\x{0627}", "A", # 199 "\xC7", # "\xD8\xA7" bare 'alif "\x{0628}", "b", # 200 "\xC8", # "\xD8\xA8" "\x{0629}", "T", # 201 "\xC9", # "\xD8\xA9" "\x{062A}", "t", # 202 "\xCA", # "\xD8\xAA" "\x{062B}", "_t", # 203 "\xCB", # "\xD8\xAB" <_t> "\x{062C}", "^g", # 204 "\xCC", # "\xD8\xAC" <^g> "\x{062D}", ".h", # 205 "\xCD", # "\xD8\xAD" <.h> "\x{062E}", "_h", # 206 "\xCE", # "\xD8\xAE" <_h> "\x{062F}", "d", # 207 "\xCF", # "\xD8\xAF" "\x{0630}", "_d", # 208 "\xD0", # "\xD8\xB0" <_d> "\x{0631}", "r", # 209 "\xD1", # "\xD8\xB1" "\x{0632}", "z", # 210 "\xD2", # "\xD8\xB2" "\x{0633}", "s", # 211 "\xD3", # "\xD8\xB3" "\x{0634}", "^s", # 212 "\xD4", # "\xD8\xB4" <^s> "\x{0635}", ".s", # 213 "\xD5", # "\xD8\xB5" <.s> "\x{0636}", ".d", # 214 "\xD6", # "\xD8\xB6" <.d> "\x{0637}", ".t", # 216 "\xD8", # "\xD8\xB7" <.t> "\x{0638}", ".z", # 217 "\xD9", # "\xD8\xB8" <.z> "\x{0639}", "`", # 218 "\xDA", # "\xD8\xB9" <`> "\x{063A}", ".g", # 219 "\xDB", # "\xD8\xBA" <.g> "\x{0640}", "--", # 220 "\xDC", # "\xD9\x80" ta.twiil "\x{0641}", "f", # 221 "\xDD", # "\xD9\x81" "\x{0642}", "q", # 222 "\xDE", # "\xD9\x82" "\x{0643}", "k", # 223 "\xDF", # "\xD9\x83" "\x{0644}", "l", # 225 "\xE1", # "\xD9\x84" "\x{0645}", "m", # 227 "\xE3", # "\xD9\x85" "\x{0646}", "n", # 228 "\xE4", # "\xD9\x86" "\x{0647}", "h", # 229 "\xE5", # "\xD9\x87" "\x{0648}", "w", # 230 "\xE6", # "\xD9\x88" "\x{0649}", "Y", # 236 "\xEC", # "\xD9\x89" 'alif maq.suura "\x{064A}", "y", # 237 "\xED", # "\xD9\x8A" "\x{0651}", "\\shadda{}", # 248 "\xF8", # "\xD9\x91" ^sadda # "\x{0652}", '"', # 250 "\xFA", # "\xD9\x92" sukuun "\x{0652}", "", # 250 "\xFA", # "\xD9\x92" sukuun "\x{0671}", "A", # 199 "\xC7", # "\xD9\xB1" wa.sla-on-'alif "\x{067E}", "p", "\x{06A4}", "v", "\x{06AF}", "g", "\x{0681}", "c", "\x{0686}", "^c", "\x{0685}", ",c", "\x{0698}", "^z", "\x{06AD}", "^n", "\x{06B5}", "^l", "\x{0695}", ".r", "\x{0628}\x{0651}", "bb", # 200 "\xC8", # "\xD8\xA8" "\x{062A}\x{0651}", "tt", # 202 "\xCA", # "\xD8\xAA" "\x{062B}\x{0651}", "_t_t", # 203 "\xCB", # "\xD8\xAB" <_t> "\x{062C}\x{0651}", "^g^g", # 204 "\xCC", # "\xD8\xAC" <^g> "\x{062D}\x{0651}", ".h.h", # 205 "\xCD", # "\xD8\xAD" <.h> "\x{062E}\x{0651}", "_h_h", # 206 "\xCE", # "\xD8\xAE" <_h> "\x{062F}\x{0651}", "dd", # 207 "\xCF", # "\xD8\xAF" "\x{0630}\x{0651}", "_d_d", # 208 "\xD0", # "\xD8\xB0" <_d> "\x{0631}\x{0651}", "rr", # 209 "\xD1", # "\xD8\xB1" "\x{0632}\x{0651}", "zz", # 210 "\xD2", # "\xD8\xB2" "\x{0633}\x{0651}", "ss", # 211 "\xD3", # "\xD8\xB3" "\x{0634}\x{0651}", "^s^s", # 212 "\xD4", # "\xD8\xB4" <^s> "\x{0635}\x{0651}", ".s.s", # 213 "\xD5", # "\xD8\xB5" <.s> "\x{0636}\x{0651}", ".d.d", # 214 "\xD6", # "\xD8\xB6" <.d> "\x{0637}\x{0651}", ".t.t", # 216 "\xD8", # "\xD8\xB7" <.t> "\x{0638}\x{0651}", ".z.z", # 217 "\xD9", # "\xD8\xB8" <.z> "\x{0639}\x{0651}", "``", # 218 "\xDA", # "\xD8\xB9" <`> "\x{063A}\x{0651}", ".g.g", # 219 "\xDB", # "\xD8\xBA" <.g> "\x{0641}\x{0651}", "ff", # 221 "\xDD", # "\xD9\x81" "\x{0642}\x{0651}", "qq", # 222 "\xDE", # "\xD9\x82" "\x{0643}\x{0651}", "kk", # 223 "\xDF", # "\xD9\x83" "\x{0644}\x{0651}", "ll", # 225 "\xE1", # "\xD9\x84" "\x{0645}\x{0651}", "mm", # 227 "\xE3", # "\xD9\x85" "\x{0646}\x{0651}", "nn", # 228 "\xE4", # "\xD9\x86" "\x{0647}\x{0651}", "hh", # 229 "\xE5", # "\xD9\x87" "\x{0648}\x{0651}", "ww", # 230 "\xE6", # "\xD9\x88" "\x{064A}\x{0651}", "yy", # 237 "\xED", # "\xD9\x8A" ); no strict 'refs'; ${ $cls . '::encoder' } = $encoder; if ($option{'describe'}) { $_->describe('') foreach @{${ $cls . '::encoder' }}; } $cls->enmode(defined ${ $cls . '::enmode' } ? ${ $cls . '::enmode' } : 'default'); return ${ $cls . '::encoder' }; } sub decoder ($@) { my $cls = shift @_; my $decoder = $cls->eecoder('decoder', @_); return $decoder unless defined $decoder and $decoder == -1; $decoder = []; my @sunny = ( [ "t", "\x{062A}" ], # "\xD8\xAA" [ "_t", "\x{062B}" ], # "\xD8\xAB" <_t> [ "d", "\x{062F}" ], # "\xD8\xAF" [ "_d", "\x{0630}" ], # "\xD8\xB0" <_d> [ "r", "\x{0631}" ], # "\xD8\xB1" [ "z", "\x{0632}" ], # "\xD8\xB2" [ "s", "\x{0633}" ], # "\xD8\xB3" [ "^s", "\x{0634}" ], # "\xD8\xB4" <^s> [ ".s", "\x{0635}" ], # "\xD8\xB5" <.s> [ ".d", "\x{0636}" ], # "\xD8\xB6" <.d> [ ".t", "\x{0637}" ], # "\xD8\xB7" <.t> [ ".z", "\x{0638}" ], # "\xD8\xB8" <.z> [ "l", "\x{0644}" ], # "\xD9\x84" [ "n", "\x{0646}" ], # "\xD9\x86" ); my @empty = ( [ "|", "" ], # ArabTeX's "invisible consonant" [ "", "\x{0627}" ], # "\xD8\xA7" bare 'alif ); my @taaaa = ( [ "T", "\x{0629}" ], # "\xD8\xA9" [ "H", "\x{0629}" ], # "\xD8\xA9" ); my @moony = ( [ "'A", "\x{0622}" ], # "\xD8\xA2" madda-over-'alif [ "'a", "\x{0623}" ], # "\xD8\xA3" hamza-over-'alif [ "'i", "\x{0625}" ], # "\xD8\xA5" hamza-under-'alif [ "'w", "\x{0624}" ], # "\xD8\xA4" hamza-over-waaw [ "'y", "\x{0626}" ], # "\xD8\xA6" hamza-over-yaa' [ "'|", "\x{0621}" ], # "\xD8\xA1" hamza-on-the-line [ "b", "\x{0628}" ], # "\xD8\xA8" [ "^g", "\x{062C}" ], # "\xD8\xAC" <^g> [ ".h", "\x{062D}" ], # "\xD8\xAD" <.h> [ "_h", "\x{062E}" ], # "\xD8\xAE" <_h> [ "`", "\x{0639}" ], # "\xD8\xB9" <`> [ ".g", "\x{063A}" ], # "\xD8\xBA" <.g> [ "f", "\x{0641}" ], # "\xD9\x81" [ "q", "\x{0642}" ], # "\xD9\x82" [ "k", "\x{0643}" ], # "\xD9\x83" [ "m", "\x{0645}" ], # "\xD9\x85" [ "h", "\x{0647}" ], # "\xD9\x87" [ "w", "\x{0648}" ], # "\xD9\x88" [ "y", "\x{064A}" ], # "\xD9\x8A" [ "B", "\x{0640}" ], # ArabTeX's "consonantal ta.twiil" [ "p", "\x{067E}" ], [ "v", "\x{06A4}" ], [ "g", "\x{06AF}" ], [ "c", "\x{0681}" ], # .ha with hamza [ "^c", "\x{0686}" ], # gim with three [ ",c", "\x{0685}" ], # _ha with three [ "^z", "\x{0698}" ], # zay with three [ "^n", "\x{06AD}" ], # kaf with three [ "^l", "\x{06B5}" ], # lam with a bow above [ ".r", "\x{0695}" ], # ra' with a bow below ); my @scope = ( "b", "t", "_t", "^g", ".h", "_h", "d", "_d", "r", "z", "s", "^s", ".s", ".d", ".t", ".z", "`", ".g", "f", "q", "k", "l", "m", "n", "h", "w", "p", "v", "g", "c", "^c", ",c", "^z", "^n", "^l", ".r", "|", "B", # "'", "y" treated specifically in some cases -- "T", "H" must as well ); $decoder->[0] = Encode::Mapper->compile ( [ 'silent' => 0, ], "_A", [ "", "Y" ], "_U", [ "", "U" ], "WA", [ "", "W" ], "y_A", [ "", "yY" ], "yaN_A", [ "", "yaNY" ], "yaNY", [ "", "yaN" ], "yY", [ "y", "A" ], # word-internal occurrence "TA", [ "t", "A" ], "TU", [ "t", "U" ], "TI", [ "t", "I" ], "TY", [ "t", "Y" ], "T_I", [ "t", "_I" ], "T_A", [ "t", "_A" ], "T_U", [ "t", "_U" ], ( map { "T" . $_, [ "t", $_ ], "Ta" . $_, [ "t", "a" . $_ ], "Tu" . $_, [ "t", "u" . $_ ], "Ti" . $_, [ "t", "i" . $_ ], ( $option{'non-quoting'} ? () : ( "T\"" . $_, [ "t", "\"" . $_ ], "T\"a" . $_, [ "t", "\"a" . $_ ], "T\"u" . $_, [ "t", "\"u" . $_ ], "T\"i" . $_, [ "t", "\"i" . $_ ], ) ), } @scope, "y" # "T", "H", "W" ), "Ta'", [ "t", "a'" ], "Tu'", [ "t", "u'" ], "Ti'", [ "t", "i'" ], ( $option{'non-quoting'} ? () : ( "T\"'", [ "t", "\"'" ], "T\"a'", [ "t", "\"a'" ], "T\"u'", [ "t", "\"u'" ], "T\"i'", [ "t", "\"i'" ], ) ), ( map { "Y" . $_, [ "A", $_ ], } @scope, "y", "T", "H" # "W" ), # vowel-quoted sequences ( $option{'non-quoting'} ? ( "\"", "", # use non-quoting quotes only on no purpose ^^ ) : ( "\"", "\"", ) ), # general non-protection of \TeX directives ( map { "\\cap" . $_, [ "\\", "cap" . $_ ], } 'A' .. 'Z', 'a' .. 'z', '_', '0' .. '9' ), "\\", "\\", # strict \cap removal and white-space collapsing ( map { "\\cap" . $_ . "\x09", [ "", "\\cap " ], "\\cap" . $_ . "\x0A", [ "", "\\cap " ], "\\cap" . $_ . "\x0D", [ "", "\\cap " ], "\\cap" . $_ . "\x20", [ "", "\\cap " ], "\\cap" . $_, "", } "\x09", "\x0A", "\x0D", "\x20" ), "\\cap", "", # interfering rarely with the notation, or erroneous "^A'a", [ "^A'|", "a" ], "^A", [ "^A", "|" ], "^I", [ "^I", "|" ], "^U", [ "^U", "|" ], "_a", [ "_a", "|" ], "_i", [ "_i", "|" ], "_u", [ "_u", "|" ], "_aA", [ "_aA", "|" ], "_aY", [ "_aY", "|" ], "_aU", [ "_aU", "|" ], "_aI", [ "_aI", "|" ], "'_a", [ "", "_a" ], "'_i", [ "", "_i" ], "'_u", [ "", "_u" ], "'^A", [ "", "^A" ], "'^I", [ "", "^I" ], "'^U", [ "", "^U" ], # word-initial carriers "'", "'a", # covers much implicitly "'i", [ "'i", "i" ], "'A", [ "'", "A" ], "'I", [ "'i", "I" ], "'_I", [ "'i", "_I" ], "''", "'a'a", # .. still needed ^^ "''i", [ "'i'i", "i" ], "''I", [ "'i'i", "I" ], "''_I", [ "'i'i", "_I" ], ( $option{'non-quoting'} ? () : ( "'\"i", [ "'i\"", "i" ], "'\"A", [ "'", "A" ], "'\"I", [ "'i\"", "I" ], "'\"_I", [ "'i\"", "_I" ], "''\"i", [ "'i'i\"", "i" ], "''\"I", [ "'i'i\"", "I" ], "''\"_I", [ "'i'i\"", "_I" ], ) ), # word-final carriers "Y'", "A'|", "A'", "A'|", "I'", "I'|", "U'", "U'|", # "a'", "a'a", # "a'i", "a'ii", # "a'\"i", "a'i\"i", "i'", "i'y", "u'", "u'w", "Y''", "A'|'|", "A''", "A'|'|", "I''", "I'|'|", "U''", "U'|'|", # "a''", "a'a'a", # "a''i", "a'i'ii", # "a''\"i", "a'i'i\"i", "i''", "i'y'y", "u''", "u'w'w", ( map { # covers cases in the map below over @scope and # quoted included $_ . "'", $_ . "'|", $_ . "''", $_ . "'|'|", } @scope, "y", $option{'non-quoting'} ? () : "\"" # quoted included ), "T'", "t'|", "T''", "t'|'|", # word-internal carriers # doubled "a'A", [ "a'", "A" ], # unclear ^^ "a'I", [ "a'y", "I" ], "a''I", [ "a'y'y", "I" ], "a'U", [ "a'w", "U" ], "a''U", [ "a'w'w", "U" ], "a'_I", [ "a'y", "_I" ], "a''_I", [ "a'y'y", "_I" ], "u'I", [ "u'y", "I" ], "u''I", [ "u'y'y", "I" ], "u'_I", [ "u'y", "_I" ], "u''_I", [ "u'y'y", "_I" ], "I'aN", [ "I'y", "aN" ], "I''aN", [ "I'y'y", "aN" ], "y'aN", [ "y'y", "aN" ], "y''aN", [ "y'y'y", "aN" ], "A'A", [ "A'|", "A" ], "A''A", [ "A'|'|", "A" ], "A'I", [ "A'y", "I" ], "A''I", [ "A'y'y", "I" ], "A'U", [ "A'w", "U" ], "A''U", [ "A'w'w", "U" ], "A'Y", [ "A'|", "Y" ], "A''Y", [ "A'|'|", "Y" ], "A'_I", [ "A'y", "_I" ], "A''_I", [ "A'y'y", "_I" ], "A'_U", [ "", "A'U" ], "A''_U", [ "", "A''U" ], "A'_A", [ "", "A'Y" ], "A''_A", [ "", "A''Y" ], "I'A", [ "I'y", "A" ], "I''A", [ "I'y'y", "A" ], "I'I", [ "I'y", "I" ], "I''I", [ "I'y'y", "I" ], "I'U", [ "I'y", "U" ], "I''U", [ "I'y'y", "U" ], "I'Y", [ "I'y", "Y" ], "I''Y", [ "I'y'y", "Y" ], "I'_I", [ "I'y", "_I" ], "I''_I", [ "I'y'y", "_I" ], "I'_U", [ "", "I'U" ], "I''_U", [ "", "I''U" ], "I'_A", [ "", "I'Y" ], "I''_A", [ "", "I''Y" ], "y'A", [ "y'y", "A" ], "y''A", [ "y'y'y", "A" ], "y'I", [ "y'y", "I" ], "y''I", [ "y'y'y", "I" ], "y'U", [ "y'y", "U" ], "y''U", [ "y'y'y", "U" ], "y'Y", [ "y'y", "Y" ], "y''Y", [ "y'y'y", "Y" ], "y'_I", [ "y'y", "_I" ], "y''_I", [ "y'y'y", "_I" ], "y'_U", [ "", "y'U" ], "y''_U", [ "", "y''U" ], "y'_A", [ "", "y'Y" ], "y''_A", [ "", "y''Y" ], "U'A", [ "U'w", "A" ], "U''A", [ "U'w'w", "A" ], "U'I", [ "U'y", "I" ], "U''I", [ "U'y'y", "I" ], "U'U", [ "U'w", "U" ], "U''U", [ "U'w'w", "U" ], "U'Y", [ "U'w", "Y" ], "U''Y", [ "U'w'w", "Y" ], "U'_I", [ "U'y", "_I" ], "U''_I", [ "U'y'y", "_I" ], "U'_U", [ "", "U'U" ], "U''_U", [ "", "U''U" ], "U'_A", [ "", "U'Y" ], "U''_A", [ "", "U''Y" ], "uw'A", [ "uw'w", "A" ], "uw''A", [ "uw'w'w", "A" ], "uw'I", [ "uw'y", "I" ], "uw''I", [ "uw'y'y", "I" ], "uw'U", [ "uw'w", "U" ], "uw''U", [ "uw'w'w", "U" ], "uw'Y", [ "uw'w", "Y" ], "uw''Y", [ "uw'w'w", "Y" ], "uw'_I", [ "uw'y", "_I" ], "uw''_I", [ "uw'y'y", "_I" ], "uw'_U", [ "", "uw'U" ], "uw''_U", [ "", "uw''U" ], "uw'_A", [ "", "uw'Y" ], "uw''_A", [ "", "uw''Y" ], ( $option{'non-quoting'} ? () : ( "a'\"A", [ "a'", "A" ], # unclear ^^ "a'\"I", [ "a'y\"", "I" ], "a''\"I", [ "a'y'y\"", "I" ], "a'\"U", [ "a'w\"", "U" ], "a''\"U", [ "a'w'w\"", "U" ], "a'\"_I", [ "a'y\"", "_I" ], "a''\"_I", [ "a'y'y\"", "_I" ], "u'\"I", [ "u'y\"", "I" ], "u''\"I", [ "u'y'y\"", "I" ], "u'\"_I", [ "u'y\"", "_I" ], "u''\"_I", [ "u'y'y\"", "_I" ], "I'\"aN", [ "I'y\"", "aN" ], "I''\"aN", [ "I'y'y\"", "aN" ], "y'\"aN", [ "y'y\"", "aN" ], "y''\"aN", [ "y'y'y\"", "aN" ], "y\"'\"aN", [ "y\"'y\"", "aN" ], "y\"''\"aN", [ "y\"'y'y\"", "aN" ], "y\"'aN", [ "y\"'y", "aN" ], "y\"''aN", [ "y\"'y'y", "aN" ], "A'\"A", [ "A'|\"", "A" ], "A''\"A", [ "A'|'|\"", "A" ], "A'\"I", [ "A'y\"", "I" ], "A''\"I", [ "A'y'y\"", "I" ], "A'\"U", [ "A'w\"", "U" ], "A''\"U", [ "A'w'w\"", "U" ], "A'\"Y", [ "A'|\"", "Y" ], "A''\"Y", [ "A'|'|\"", "Y" ], "A'\"_I", [ "A'y\"", "_I" ], "A''\"_I", [ "A'y'y\"", "_I" ], "A'\"_U", [ "", "A'\"U" ], "A''\"_U", [ "", "A''\"U" ], "A'\"_A", [ "", "A'\"Y" ], "A''\"_A", [ "", "A''\"Y" ], "I'\"A", [ "I'y\"", "A" ], "I''\"A", [ "I'y'y\"", "A" ], "I'\"I", [ "I'y\"", "I" ], "I''\"I", [ "I'y'y\"", "I" ], "I'\"U", [ "I'y\"", "U" ], "I''\"U", [ "I'y'y\"", "U" ], "I'\"Y", [ "I'y\"", "Y" ], "I''\"Y", [ "I'y'y\"", "Y" ], "I'\"_I", [ "I'y\"", "_I" ], "I''\"_I", [ "I'y'y\"", "_I" ], "I'\"_U", [ "", "I'\"U" ], "I''\"_U", [ "", "I''\"U" ], "I'\"_A", [ "", "I'\"Y" ], "I''\"_A", [ "", "I''\"Y" ], "y'\"A", [ "y'y\"", "A" ], "y''\"A", [ "y'y'y\"", "A" ], "y'\"I", [ "y'y\"", "I" ], "y''\"I", [ "y'y'y\"", "I" ], "y'\"U", [ "y'y\"", "U" ], "y''\"U", [ "y'y'y\"", "U" ], "y'\"Y", [ "y'y\"", "Y" ], "y''\"Y", [ "y'y'y\"", "Y" ], "y'\"_I", [ "y'y\"", "_I" ], "y''\"_I", [ "y'y'y\"", "_I" ], "y'\"_U", [ "", "y'\"U" ], "y''\"_U", [ "", "y''\"U" ], "y'\"_A", [ "", "y'\"Y" ], "y''\"_A", [ "", "y''\"Y" ], "y\"'\"A", [ "y\"'y\"", "A" ], "y\"''\"A", [ "y\"'y'y\"", "A" ], "y\"'\"I", [ "y\"'y\"", "I" ], "y\"''\"I", [ "y\"'y'y\"", "I" ], "y\"'\"U", [ "y\"'y\"", "U" ], "y\"''\"U", [ "y\"'y'y\"", "U" ], "y\"'\"Y", [ "y\"'y\"", "Y" ], "y\"''\"Y", [ "y\"'y'y\"", "Y" ], "y\"'\"_I", [ "y\"'y\"", "_I" ], "y\"''\"_I", [ "y\"'y'y\"", "_I" ], "y\"'\"_U", [ "", "y\"'\"U" ], "y\"''\"_U", [ "", "y\"''\"U" ], "y\"'\"_A", [ "", "y\"'\"Y" ], "y\"''\"_A", [ "", "y\"''\"Y" ], "y\"'A", [ "y\"'y", "A" ], "y\"''A", [ "y\"'y'y", "A" ], "y\"'I", [ "y\"'y", "I" ], "y\"''I", [ "y\"'y'y", "I" ], "y\"'U", [ "y\"'y", "U" ], "y\"''U", [ "y\"'y'y", "U" ], "y\"'Y", [ "y\"'y", "Y" ], "y\"''Y", [ "y\"'y'y", "Y" ], "y\"'_I", [ "y\"'y", "_I" ], "y\"''_I", [ "y\"'y'y", "_I" ], "y\"'_U", [ "", "y\"'U" ], "y\"''_U", [ "", "y\"''U" ], "y\"'_A", [ "", "y\"'Y" ], "y\"''_A", [ "", "y\"''Y" ], "U'\"A", [ "U'w\"", "A" ], "U''\"A", [ "U'w'w\"", "A" ], "U'\"I", [ "U'y\"", "I" ], "U''\"I", [ "U'y'y\"", "I" ], "U'\"U", [ "U'w\"", "U" ], "U''\"U", [ "U'w'w\"", "U" ], "U'\"Y", [ "U'w\"", "Y" ], "U''\"Y", [ "U'w'w\"", "Y" ], "U'\"_I", [ "U'y\"", "_I" ], "U''\"_I", [ "U'y'y\"", "_I" ], "U'\"_U", [ "", "U'\"U" ], "U''\"_U", [ "", "U''\"U" ], "U'\"_A", [ "", "U'\"Y" ], "U''\"_A", [ "", "U''\"Y" ], "uw'\"A", [ "uw'w\"", "A" ], "uw''\"A", [ "uw'w'w\"", "A" ], "uw'\"I", [ "uw'y\"", "I" ], "uw''\"I", [ "uw'y'y\"", "I" ], "uw'\"U", [ "uw'w\"", "U" ], "uw''\"U", [ "uw'w'w\"", "U" ], "uw'\"Y", [ "uw'w\"", "Y" ], "uw''\"Y", [ "uw'w'w\"", "Y" ], "uw'\"_I", [ "uw'y\"", "_I" ], "uw''\"_I", [ "uw'y'y\"", "_I" ], "uw'\"_U", [ "", "uw'\"U" ], "uw''\"_U", [ "", "uw''\"U" ], "uw'\"_A", [ "", "uw'\"Y" ], "uw''\"_A", [ "", "uw''\"Y" ], ) ), ( map { # doubled "a'i" . $_, [ "a'y", "i" . $_ ], "a''i" . $_, [ "a'y'y", "i" . $_ ], "a'u" . $_, [ "a'w", "u" . $_ ], "a''u" . $_, [ "a'w'w", "u" . $_ ], "u'i" . $_, [ "u'y", "i" . $_ ], "u''i" . $_, [ "u'y'y", "i" . $_ ], "A'a" . $_, [ "A'|", "a" . $_ ], "A''a" . $_, [ "A'|'|", "a" . $_ ], "A'i" . $_, [ "A'y", "i" . $_ ], "A''i" . $_, [ "A'y'y", "i" . $_ ], "A'u" . $_, [ "A'w", "u" . $_ ], "A''u" . $_, [ "A'w'w", "u" . $_ ], "I'a" . $_, [ "I'y", "a" . $_ ], "I''a" . $_, [ "I'y'y", "a" . $_ ], "I'i" . $_, [ "I'y", "i" . $_ ], "I''i" . $_, [ "I'y'y", "i" . $_ ], "I'u" . $_, [ "I'y", "u" . $_ ], "I''u" . $_, [ "I'y'y", "u" . $_ ], "y'a" . $_, [ "y'y", "a" . $_ ], "y''a" . $_, [ "y'y'y", "a" . $_ ], "y'i" . $_, [ "y'y", "i" . $_ ], "y''i" . $_, [ "y'y'y", "i" . $_ ], "y'u" . $_, [ "y'y", "u" . $_ ], "y''u" . $_, [ "y'y'y", "u" . $_ ], "U'a" . $_, [ "U'w", "a" . $_ ], "U''a" . $_, [ "U'w'w", "a" . $_ ], "U'i" . $_, [ "U'y", "i" . $_ ], "U''i" . $_, [ "U'y'y", "i" . $_ ], "U'u" . $_, [ "U'w", "u" . $_ ], "U''u" . $_, [ "U'w'w", "u" . $_ ], "uw'a" . $_, [ "uw'w", "a" . $_ ], "uw''a" . $_, [ "uw'w'w", "a" . $_ ], "uw'i" . $_, [ "uw'y", "i" . $_ ], "uw''i" . $_, [ "uw'y'y", "i" . $_ ], "uw'u" . $_, [ "uw'w", "u" . $_ ], "uw''u" . $_, [ "uw'w'w", "u" . $_ ], ( $option{'non-quoting'} ? () : ( "a'\"i" . $_, [ "a'y\"", "i" . $_ ], "a''\"i" . $_, [ "a'y'y\"", "i" . $_ ], "a'\"u" . $_, [ "a'w\"", "u" . $_ ], "a''\"u" . $_, [ "a'w'w\"", "u" . $_ ], "u'\"i" . $_, [ "u'y\"", "i" . $_ ], "u''\"i" . $_, [ "u'y'y\"", "i" . $_ ], "A'\"a" . $_, [ "A'|\"", "a" . $_ ], "A''\"a" . $_, [ "A'|'|\"", "a" . $_ ], "A'\"i" . $_, [ "A'y\"", "i" . $_ ], "A''\"i" . $_, [ "A'y'y\"", "i" . $_ ], "A'\"u" . $_, [ "A'w\"", "u" . $_ ], "A''\"u" . $_, [ "A'w'w\"", "u" . $_ ], "I'\"a" . $_, [ "I'y\"", "a" . $_ ], "I''\"a" . $_, [ "I'y'y\"", "a" . $_ ], "I'\"i" . $_, [ "I'y\"", "i" . $_ ], "I''\"i" . $_, [ "I'y'y\"", "i" . $_ ], "I'\"u" . $_, [ "I'y\"", "u" . $_ ], "I''\"u" . $_, [ "I'y'y\"", "u" . $_ ], "y'\"a" . $_, [ "y'y\"", "a" . $_ ], "y''\"a" . $_, [ "y'y'y\"", "a" . $_ ], "y'\"i" . $_, [ "y'y\"", "i" . $_ ], "y''\"i" . $_, [ "y'y'y\"", "i" . $_ ], "y'\"u" . $_, [ "y'y\"", "u" . $_ ], "y''\"u" . $_, [ "y'y'y\"", "u" . $_ ], "y\"'\"a" . $_, [ "y\"'y\"", "a" . $_ ], "y\"''\"a" . $_, [ "y\"'y'y\"", "a" . $_ ], "y\"'\"i" . $_, [ "y\"'y\"", "i" . $_ ], "y\"''\"i" . $_, [ "y\"'y'y\"", "i" . $_ ], "y\"'\"u" . $_, [ "y\"'y\"", "u" . $_ ], "y\"''\"u" . $_, [ "y\"'y'y\"", "u" . $_ ], "y\"'a" . $_, [ "y\"'y", "a" . $_ ], "y\"''a" . $_, [ "y\"'y'y", "a" . $_ ], "y\"'i" . $_, [ "y\"'y", "i" . $_ ], "y\"''i" . $_, [ "y\"'y'y", "i" . $_ ], "y\"'u" . $_, [ "y\"'y", "u" . $_ ], "y\"''u" . $_, [ "y\"'y'y", "u" . $_ ], "U'\"a" . $_, [ "U'w\"", "a" . $_ ], "U''\"a" . $_, [ "U'w'w\"", "a" . $_ ], "U'\"i" . $_, [ "U'y\"", "i" . $_ ], "U''\"i" . $_, [ "U'y'y\"", "i" . $_ ], "U'\"u" . $_, [ "U'w\"", "u" . $_ ], "U''\"u" . $_, [ "U'w'w\"", "u" . $_ ], "uw'\"a" . $_, [ "uw'w\"", "a" . $_ ], "uw''\"a" . $_, [ "uw'w'w\"", "a" . $_ ], "uw'\"i" . $_, [ "uw'y\"", "i" . $_ ], "uw''\"i" . $_, [ "uw'y'y\"", "i" . $_ ], "uw'\"u" . $_, [ "uw'w\"", "u" . $_ ], "uw''\"u" . $_, [ "uw'w'w\"", "u" . $_ ], ) ), } "'", @scope, "y", "T", "H", "W" ), ( map { # doubled my $fix = $_; $_ . "'A", [ $_ . "'", "A" ], $_ . "''A", [ $_ . "'a'a", "A" ], $_ . "'I", [ $_ . "'y", "I" ], $_ . "''I", [ $_ . "'y'y", "I" ], $_ . "'U", [ $_ . "'w", "U" ], $_ . "''U", [ $_ . "'w'w", "U" ], $_ . "'Y", [ $_ . "'a", "Y" ], $_ . "''Y", [ $_ . "'a'a", "Y" ], $_ . "'aNY", [ $_ . "'a", "aNY" ], $_ . "''aNY", [ $_ . "'a'a", "aNY" ], $_ . "'_I", [ $_ . "'y", "_I" ], $_ . "''_I", [ $_ . "'y'y", "_I" ], $_ . "'_U", [ "", $_ . "'U" ], $_ . "''_U", [ "", $_ . "''U" ], ( $option{'non-quoting'} ? () : ( $_ . "'\"A", [ $_ . "'", "A" ], $_ . "''\"A", [ $_ . "'a'a\"", "A" ], $_ . "'\"I", [ $_ . "'y\"", "I" ], $_ . "''\"I", [ $_ . "'y'y\"", "I" ], $_ . "'\"U", [ $_ . "'w\"", "U" ], $_ . "''\"U", [ $_ . "'w'w\"", "U" ], $_ . "'\"Y", [ $_ . "'a\"", "Y" ], $_ . "''\"Y", [ $_ . "'a'a\"", "Y" ], $_ . "'\"aNY", [ $_ . "'a\"", "aNY" ], $_ . "''\"aNY", [ $_ . "'a'a\"", "aNY" ], $_ . "'\"_I", [ $_ . "'y\"", "_I" ], $_ . "''\"_I", [ $_ . "'y'y\"", "_I" ], $_ . "'\"_U", [ "", $_ . "'\"U" ], $_ . "''\"_U", [ "", $_ . "''\"U" ], ) ), map { # doubled $fix . "'a" . $_, [ $fix . "'a", "a" . $_ ], $fix . "''a" . $_, [ $fix . "'a'a", "a" . $_ ], $fix . "'i" . $_, [ $fix . "'y", "i" . $_ ], $fix . "''i" . $_, [ $fix . "'y'y", "i" . $_ ], $fix . "'u" . $_, [ $fix . "'w", "u" . $_ ], $fix . "''u" . $_, [ $fix . "'w'w", "u" . $_ ], ( $option{'non-quoting'} ? () : ( $fix . "'\"a" . $_, [ $fix . "'a\"", "a" . $_ ], $fix . "''\"a" . $_, [ $fix . "'a'a\"", "a" . $_ ], $fix . "'\"i" . $_, [ $fix . "'y\"", "i" . $_ ], $fix . "''\"i" . $_, [ $fix . "'y'y\"", "i" . $_ ], $fix . "'\"u" . $_, [ $fix . "'w\"", "u" . $_ ], $fix . "''\"u" . $_, [ $fix . "'w'w\"", "u" . $_ ], ) ), } "'", @scope, "y", "T", "H", "W" } @scope, $option{'non-quoting'} ? () : "\"" # quoted included ), "T'A", [ "t'", "A" ], "T''A", [ "t'a'a", "A" ], "T'I", [ "t'y", "I" ], "T''I", [ "t'y'y", "I" ], "T'U", [ "t'w", "U" ], "T''U", [ "t'w'w", "U" ], "T'_I", [ "t'y", "_I" ], "T''_I", [ "t'y'y", "_I" ], "T'_U", [ "", "T'U" ], "T''_U", [ "", "T''U" ], ( $option{'non-quoting'} ? () : ( "T'\"A", [ "t'", "A" ], "T''\"A", [ "t'a'a\"", "A" ], "T'\"I", [ "t'y\"", "I" ], "T''\"I", [ "t'y'y\"", "I" ], "T'\"U", [ "t'w\"", "U" ], "T''\"U", [ "t'w'w\"", "U" ], "T'\"_I", [ "t'y\"", "_I" ], "T''\"_I", [ "t'y'y\"", "_I" ], "T'\"_U", [ "", "T'\"U" ], "T''\"_U", [ "", "T''\"U" ], ) ), ( map { # doubled "T'a" . $_, [ "t'a", "a" . $_ ], "T''a" . $_, [ "t'a'a", "a" . $_ ], "T'i" . $_, [ "t'y", "i" . $_ ], "T''i" . $_, [ "t'y'y", "i" . $_ ], "T'u" . $_, [ "t'w", "u" . $_ ], "T''u" . $_, [ "t'w'w", "u" . $_ ], ( $option{'non-quoting'} ? () : ( "T'\"a" . $_, [ "t'a\"", "a" . $_ ], "T''\"a" . $_, [ "t'a'a\"", "a" . $_ ], "T'\"i" . $_, [ "t'y\"", "i" . $_ ], "T''\"i" . $_, [ "t'y'y\"", "i" . $_ ], "T'\"u" . $_, [ "t'w\"", "u" . $_ ], "T''\"u" . $_, [ "t'w'w\"", "u" . $_ ], ) ), } "'", @scope, "y" # "T", "H", "W" ), ); $decoder->[1] = Encode::Mapper->compile ( [ 'others' => undef, 'silent' => 0, ], # non-exciting entities "\x09", "\x09", "\x0A", "\x0A", "\x0D", "\x0D", " ", " ", ".", ".", ":", ":", "!", "!", "/", "/", "\\", "\\", ",", "\x{060C}", # "\xD8\x8C" right-to-left-comma ";", "\x{061B}", # "\xD8\x9B" right-to-left-semicolon "?", "\x{061F}", # "\xD8\x9F" right-to-left-question-mark "--", "\x{0640}", # "\xD9\x80" ta.twiil ( map { "" . $_, chr 0x0660 + $_, } 0 .. 9 ), # improper auxiliary vowels -- the case of conditioned deletion "-a", "", "-u", "", "-i", "", ( map { "-a" . $_->[0], [ "", "a" . $_->[0] ], "-i" . $_->[0], [ "", "i" . $_->[0] ], "-u" . $_->[0], [ "", "u" . $_->[0] ], } @sunny, @moony, @taaaa, $empty[0] ), # non-voweled/sukuuned sunnies and moonies ( map { my $x = 1 + $_; my $y = "\x{0651}" x $_; # "\xD9\x91" ^sadda map { my $fix = $_; $_->[0] x $x, $_->[1] . $y . "\x{0652}", # "\xD9\x92" sukuun ( $option{'non-refined'} ? () : ( $_->[0] x $x . "-a", $_->[1] . $y . "\x{064E}", $_->[0] x $x . "-u", $_->[1] . $y . "\x{064F}", $_->[0] x $x . "-i", $_->[1] . $y . "\x{0650}", $_->[0] x $x . "-A", $_->[1] . $y . "\x{064E}\x{0627}", $_->[0] x $x . "-Y", $_->[1] . $y . "\x{064E}\x{0649}", $_->[0] x $x . "-U", $_->[1] . $y . "\x{064F}\x{0648}", $_->[0] x $x . "-I", $_->[1] . $y . "\x{0650}\x{064A}", $_->[0] x $x . "-aN", $_->[1] . $y . "\x{064B}" . ( $_->[0] eq "'a" ? "" : "\x{0627}" ), $_->[0] x $x . "-uN", $_->[1] . $y . "\x{064C}", $_->[0] x $x . "-iN", $_->[1] . $y . "\x{064D}", $_->[0] x $x . "-aNA", $_->[1] . $y . "\x{064B}\x{0627}", $_->[0] x $x . "-uNA", $_->[1] . $y . "\x{064C}\x{0627}", $_->[0] x $x . "-iNA", $_->[1] . $y . "\x{064D}\x{0627}", $_->[0] x $x . "-aNY", $_->[1] . $y . "\x{064B}\x{0649}", $_->[0] x $x . "-uNY", $_->[1] . $y . "\x{064C}\x{0649}", $_->[0] x $x . "-iNY", $_->[1] . $y . "\x{064D}\x{0649}", $_->[0] x $x . "-aNU", $_->[1] . $y . "\x{064B}\x{0648}", $_->[0] x $x . "-uNU", $_->[1] . $y . "\x{064C}\x{0648}", $_->[0] x $x . "-iNU", $_->[1] . $y . "\x{064D}\x{0648}", ) ), ( $option{'non-quoting'} ? () : ( $_->[0] x $x . "\"", $_->[1] . $y . "\"\x{0652}", # "\xD9\x92" sukuun ( $option{'non-refined'} ? () : ( $_->[0] x $x . "-\"a", $_->[1] . $y . "\"\x{064E}", $_->[0] x $x . "-\"u", $_->[1] . $y . "\"\x{064F}", $_->[0] x $x . "-\"i", $_->[1] . $y . "\"\x{0650}", $_->[0] x $x . "-\"A", $_->[1] . $y . "\"\x{064E}\x{0627}", $_->[0] x $x . "-\"Y", $_->[1] . $y . "\"\x{064E}\x{0649}", $_->[0] x $x . "-\"U", $_->[1] . $y . "\"\x{064F}\x{0648}", $_->[0] x $x . "-\"I", $_->[1] . $y . "\"\x{0650}\x{064A}", $_->[0] x $x . "-\"aN", $_->[1] . $y . "\"\x{064B}" . ( $_->[0] eq "'a" ? "" : "\x{0627}" ), $_->[0] x $x . "-\"uN", $_->[1] . $y . "\"\x{064C}", $_->[0] x $x . "-\"iN", $_->[1] . $y . "\"\x{064D}", $_->[0] x $x . "-\"aNA", $_->[1] . $y . "\"\x{064B}\x{0627}", $_->[0] x $x . "-\"uNA", $_->[1] . $y . "\"\x{064C}\x{0627}", $_->[0] x $x . "-\"iNA", $_->[1] . $y . "\"\x{064D}\x{0627}", $_->[0] x $x . "-\"aNY", $_->[1] . $y . "\"\x{064B}\x{0649}", $_->[0] x $x . "-\"uNY", $_->[1] . $y . "\"\x{064C}\x{0649}", $_->[0] x $x . "-\"iNY", $_->[1] . $y . "\"\x{064D}\x{0649}", $_->[0] x $x . "-\"aNU", $_->[1] . $y . "\"\x{064B}\x{0648}", $_->[0] x $x . "-\"uNU", $_->[1] . $y . "\"\x{064C}\x{0648}", $_->[0] x $x . "-\"iNU", $_->[1] . $y . "\"\x{064D}\x{0648}", ) ), ) ), map { ( $option{'non-refined'} ? () : ( $fix->[0] x $x . "-a" . $_->[0], [ $fix->[1] . $y . "\x{0652}", "a" . $_->[0] ], $fix->[0] x $x . "-u" . $_->[0], [ $fix->[1] . $y . "\x{0652}", "u" . $_->[0] ], $fix->[0] x $x . "-i" . $_->[0], [ $fix->[1] . $y . "\x{0652}", "i" . $_->[0] ], $fix->[0] x $x . "-A" . $_->[0], [ $fix->[1] . $y . "\x{0652}", "A" . $_->[0] ], $fix->[0] x $x . "-Y" . $_->[0], [ $fix->[1] . $y . "\x{0652}", "Y" . $_->[0] ], $fix->[0] x $x . "-U" . $_->[0], [ $fix->[1] . $y . "\x{0652}", "U" . $_->[0] ], $fix->[0] x $x . "-I" . $_->[0], [ $fix->[1] . $y . "\x{0652}", "I" . $_->[0] ], ( $option{'non-quoting'} ? () : ( $fix->[0] x $x . "-\"a" . $_->[0], [ $fix->[1] . $y . "\x{0652}\"", "a" . $_->[0] ], $fix->[0] x $x . "-\"u" . $_->[0], [ $fix->[1] . $y . "\x{0652}\"", "u" . $_->[0] ], $fix->[0] x $x . "-\"i" . $_->[0], [ $fix->[1] . $y . "\x{0652}\"", "i" . $_->[0] ], $fix->[0] x $x . "-\"A" . $_->[0], [ $fix->[1] . $y . "\x{0652}\"", "A" . $_->[0] ], $fix->[0] x $x . "-\"Y" . $_->[0], [ $fix->[1] . $y . "\x{0652}\"", "Y" . $_->[0] ], $fix->[0] x $x . "-\"U" . $_->[0], [ $fix->[1] . $y . "\x{0652}\"", "U" . $_->[0] ], $fix->[0] x $x . "-\"I" . $_->[0], [ $fix->[1] . $y . "\x{0652}\"", "I" . $_->[0] ], ) ), ) ), } @sunny, @moony, @taaaa, $empty[0] } @sunny, @moony[1 .. $#moony], $empty[0] # $moony[0] excluded as long as is unclear ^^ } 0, 1 ), $moony[0]->[0], $moony[0]->[1], # now necessary of course ^^ # voweled/non-sukuuned sunnies and moonies ( map { my $x = 1 + $_; my $y = "\x{0651}" x $_; # "\xD9\x91" ^sadda map { my $fix = $_; $_->[0] x $x . "a", $_->[1] . $y . "\x{064E}", $_->[0] x $x . "u", $_->[1] . $y . "\x{064F}", $_->[0] x $x . "i", $_->[1] . $y . "\x{0650}", $_->[0] x $x . "_a", $_->[1] . $y . "\x{0670}", ( $option{'non-refined'} ? () : ( $_->[0] x $x . "_u", $_->[1] . $y . "\x{0657}", $_->[0] x $x . "_i", $_->[1] . $y . "\x{0656}", $_->[0] x $x . "_aA", $_->[1] . $y . "\x{0670}\x{0627}", $_->[0] x $x . "_aY", $_->[1] . $y . "\x{0670}\x{0649}", $_->[0] x $x . "_aU", $_->[1] . $y . "\x{0670}\x{0648}", $_->[0] x $x . "_aI", $_->[1] . $y . "\x{0670}\x{064A}", ) ), $_->[0] x $x . "A", $_->[1] . $y . "\x{064E}\x{0627}", $_->[0] x $x . "Y", $_->[1] . $y . "\x{064E}\x{0649}", $_->[0] x $x . "_I", $_->[1] . $y . "\x{0650}\x{0627}", $_->[0] x $x . "U", $_->[1] . $y . "\x{064F}\x{0648}", $_->[0] x $x . "I", $_->[1] . $y . "\x{0650}\x{064A}", $_->[0] x $x . "Uw", [ $_->[1] . $y . "\x{064F}", "ww" ], $_->[0] x $x . "Iy", [ $_->[1] . $y . "\x{0650}", "yy" ], ( $option{'non-refined'} ? () : ( $_->[0] x $x . "^A", $_->[1] . $y . "\x{064F}\x{0627}\x{0653}", $_->[0] x $x . "^U", $_->[1] . $y . "\x{064F}\x{0648}\x{0653}", $_->[0] x $x . "^I", $_->[1] . $y . "\x{0650}\x{064A}\x{0653}", $_->[0] x $x . "^Uw", [ $_->[1] . $y . "\x{064F}\x{0648}\x{0655}", "|" ], # roughly $_->[0] x $x . "^Iy", [ $_->[1] . $y . "\x{0650}\x{0649}\x{0655}", "|" ], # roughly ) ), $_->[0] x $x . "aa", [ "", $_->[0] x $x . "A" ], $_->[0] x $x . "uw", [ "", $_->[0] x $x . "U" ], $_->[0] x $x . "iy", [ "", $_->[0] x $x . "I" ], ( $option{'non-quoting'} ? () : ( $_->[0] x $x . "\"a", $_->[1] . $y . "\"\x{064E}", $_->[0] x $x . "\"u", $_->[1] . $y . "\"\x{064F}", $_->[0] x $x . "\"i", $_->[1] . $y . "\"\x{0650}", $_->[0] x $x . "\"_a", $_->[1] . $y . "\"\x{0670}", ( $option{'non-refined'} ? () : ( $_->[0] x $x . "\"_u", $_->[1] . $y . "\"\x{0657}", $_->[0] x $x . "\"_i", $_->[1] . $y . "\"\x{0656}", $_->[0] x $x . "\"_aA", $_->[1] . $y . "\"\x{0670}\x{0627}", $_->[0] x $x . "\"_aY", $_->[1] . $y . "\"\x{0670}\x{0649}", $_->[0] x $x . "\"_aU", $_->[1] . $y . "\"\x{0670}\x{0648}", $_->[0] x $x . "\"_aI", $_->[1] . $y . "\"\x{0670}\x{064A}", ) ), $_->[0] x $x . "\"A", $_->[1] . $y . "\"\x{064E}\x{0627}", $_->[0] x $x . "\"Y", $_->[1] . $y . "\"\x{064E}\x{0649}", $_->[0] x $x . "\"A\"", $_->[1] . $y . "\"\x{064E}\x{0627}\"\x{0652}", $_->[0] x $x . "\"Y\"", $_->[1] . $y . "\"\x{064E}\x{0649}\"\x{0652}", $_->[0] x $x . "A\"", $_->[1] . $y . "\x{064E}\x{0627}\"\x{0652}", $_->[0] x $x . "Y\"", $_->[1] . $y . "\x{064E}\x{0649}\"\x{0652}", $_->[0] x $x . "\"_I", $_->[1] . $y . "\"\x{0650}\x{0627}", $_->[0] x $x . "\"_I\"", $_->[1] . $y . "\"\x{0650}\x{0627}\"\x{0652}", $_->[0] x $x . "_I\"", $_->[1] . $y . "\x{0650}\x{0627}\"\x{0652}", $_->[0] x $x . "\"U", $_->[1] . $y . "\"\x{064F}\x{0648}", $_->[0] x $x . "\"I", $_->[1] . $y . "\"\x{0650}\x{064A}", $_->[0] x $x . "\"U\"", $_->[1] . $y . "\"\x{064F}\x{0648}\"\x{0652}", $_->[0] x $x . "\"I\"", $_->[1] . $y . "\"\x{0650}\x{064A}\"\x{0652}", $_->[0] x $x . "U\"", $_->[1] . $y . "\x{064F}\x{0648}\"\x{0652}", $_->[0] x $x . "I\"", $_->[1] . $y . "\x{0650}\x{064A}\"\x{0652}", $_->[0] x $x . "\"Uw", [ $_->[1] . $y . "\"\x{064F}", "ww" ], $_->[0] x $x . "\"Iy", [ $_->[1] . $y . "\"\x{0650}", "yy" ], ( $option{'non-refined'} ? () : ( $_->[0] x $x . "\"^A", $_->[1] . $y . "\"\x{064F}\x{0627}\x{0653}", $_->[0] x $x . "\"^U", $_->[1] . $y . "\"\x{064F}\x{0648}\x{0653}", $_->[0] x $x . "\"^I", $_->[1] . $y . "\"\x{0650}\x{064A}\x{0653}", $_->[0] x $x . "\"^Uw", [ $_->[1] . $y . "\"\x{064F}\x{0648}\x{0655}", "|" ], # roughly $_->[0] x $x . "\"^Iy", [ $_->[1] . $y . "\"\x{0650}\x{0649}\x{0655}", "|" ], # roughly ) ), $_->[0] x $x . "\"aa", [ "", $_->[0] x $x . "\"A" ], $_->[0] x $x . "\"uw", [ "", $_->[0] x $x . "\"U" ], $_->[0] x $x . "\"iy", [ "", $_->[0] x $x . "\"I" ], ) ), ( map { $fix->[0] x $x . "uw" . $_, [ $fix->[1] . $y . "\x{064F}", "w" . $_ ], $fix->[0] x $x . "iy" . $_, [ $fix->[1] . $y . "\x{0650}", "y" . $_ ], ( $option{'non-quoting'} ? () : ( $fix->[0] x $x . "\"uw" . $_, [ $fix->[1] . $y . "\"\x{064F}", "w" . $_ ], $fix->[0] x $x . "\"iy" . $_, [ $fix->[1] . $y . "\"\x{0650}", "y" . $_ ], ) ), } "\"", qw "a u i A Y U I _I", $option{'non-refined'} ? () : qw "_a _u _i ^A ^U ^I" ), $_->[0] x $x . "_aA'|aN", $_->[1] . $y . "\x{0670}\x{0627}\x{0621}\x{064B}", $_->[0] x $x . "A'|aN", $_->[1] . $y . "\x{064E}\x{0627}\x{0621}\x{064B}", $_->[0] x $x . "aN", $_->[1] . $y . "\x{064B}" . ( $_->[0] eq "'a" ? "" : "\x{0627}" ), $_->[0] x $x . "uN", $_->[1] . $y . "\x{064C}", $_->[0] x $x . "iN", $_->[1] . $y . "\x{064D}", ( $option{'non-quoting'} ? () : ( $_->[0] x $x . "\"_aA'|aN", $_->[1] . $y . "\"\x{0670}\x{0627}\x{0621}\x{064B}", $_->[0] x $x . "\"A'|aN", $_->[1] . $y . "\"\x{064E}\x{0627}\x{0621}\x{064B}", $_->[0] x $x . "\"_aA'|\"aN", $_->[1] . $y . "\"\x{0670}\x{0627}\x{0621}\"\x{064B}", $_->[0] x $x . "\"A'|\"aN", $_->[1] . $y . "\"\x{064E}\x{0627}\x{0621}\"\x{064B}", $_->[0] x $x . "_aA'|\"aN", $_->[1] . $y . "\x{0670}\x{0627}\x{0621}\"\x{064B}", $_->[0] x $x . "A'|\"aN", $_->[1] . $y . "\x{064E}\x{0627}\x{0621}\"\x{064B}", $_->[0] x $x . "\"A\"'|aN", $_->[1] . $y . "\"\x{064E}\x{0627}\"\x{0652}\x{0621}\x{064B}", $_->[0] x $x . "\"A\"'|\"aN", $_->[1] . $y . "\"\x{064E}\x{0627}\"\x{0652}\x{0621}\"\x{064B}", $_->[0] x $x . "A\"'|\"aN", $_->[1] . $y . "\x{064E}\x{0627}\"\x{0652}\x{0621}\"\x{064B}", $_->[0] x $x . "\"aN", $_->[1] . $y . "\"\x{064B}" . ( $_->[0] eq "'a" ? "" : "\x{0627}" ), $_->[0] x $x . "\"uN", $_->[1] . $y . "\"\x{064C}", $_->[0] x $x . "\"iN", $_->[1] . $y . "\"\x{064D}", ) ), } @sunny, @moony, $empty[0] } 0, 1 ), # 'alif protected endings ( map { my $x = 1 + $_; my $y = "\x{0651}" x $_; # "\xD9\x91" ^sadda map { $_->[0] x $x . "_aA'|aNA", $_->[1] . $y . "\x{0670}\x{0627}\x{0621}\x{064B}\x{0627}", $_->[0] x $x . "A'|aNA", $_->[1] . $y . "\x{064E}\x{0627}\x{0621}\x{064B}\x{0627}", $_->[0] x $x . "aNA", $_->[1] . $y . "\x{064B}\x{0627}", $_->[0] x $x . "aNY", $_->[1] . $y . "\x{064B}\x{0649}", ( $option{'non-refined'} ? () : ( $_->[0] x $x . "uNA", $_->[1] . $y . "\x{064C}\x{0627}", $_->[0] x $x . "iNA", $_->[1] . $y . "\x{064D}\x{0627}", $_->[0] x $x . "uNY", $_->[1] . $y . "\x{064C}\x{0649}", $_->[0] x $x . "iNY", $_->[1] . $y . "\x{064D}\x{0649}", $_->[0] x $x . "aNU", $_->[1] . $y . "\x{064B}\x{0648}", $_->[0] x $x . "uNU", $_->[1] . $y . "\x{064C}\x{0648}", $_->[0] x $x . "iNU", $_->[1] . $y . "\x{064D}\x{0648}", $_->[0] x $x . "aW-a", $_->[1] . $y . "\x{064E}\x{0648}\x{064E}\x{0627}", $_->[0] x $x . "aW-u", $_->[1] . $y . "\x{064E}\x{0648}\x{064F}\x{0627}", $_->[0] x $x . "aW-i", $_->[1] . $y . "\x{064E}\x{0648}\x{0650}\x{0627}", ) ), $_->[0] x $x . "aW", $_->[1] . $y . "\x{064E}\x{0648}\x{0652}\x{0627}", $_->[0] x $x . "uW", $_->[1] . $y . "\x{064F}\x{0648}\x{0627}", $_->[0] x $x . "UW", $_->[1] . $y . "\x{064F}\x{0648}\x{0627}", $_->[0] x $x . "UA", $_->[1] . $y . "\x{064F}\x{0648}\x{0627}", ( $option{'non-quoting'} ? () : ( $_->[0] x $x . "\"_aA'|aNA", $_->[1] . $y . "\"\x{0670}\x{0627}\x{0621}\x{064B}\x{0627}", $_->[0] x $x . "\"A'|aNA", $_->[1] . $y . "\"\x{064E}\x{0627}\x{0621}\x{064B}\x{0627}", $_->[0] x $x . "\"_aA'|\"aNA", $_->[1] . $y . "\"\x{0670}\x{0627}\x{0621}\"\x{064B}\x{0627}", $_->[0] x $x . "\"A'|\"aNA", $_->[1] . $y . "\"\x{064E}\x{0627}\x{0621}\"\x{064B}\x{0627}", $_->[0] x $x . "_aA'|\"aNA", $_->[1] . $y . "\x{0670}\x{0627}\x{0621}\"\x{064B}\x{0627}", $_->[0] x $x . "A'|\"aNA", $_->[1] . $y . "\x{064E}\x{0627}\x{0621}\"\x{064B}\x{0627}", $_->[0] x $x . "\"A\"'|aNA", $_->[1] . $y . "\"\x{064E}\x{0627}\"\x{0652}\x{0621}\x{064B}\x{0627}", $_->[0] x $x . "\"A\"'|\"aNA", $_->[1] . $y . "\"\x{064E}\x{0627}\"\x{0652}\x{0621}\"\x{064B}\x{0627}", $_->[0] x $x . "A\"'|\"aNA", $_->[1] . $y . "\x{064E}\x{0627}\"\x{0652}\x{0621}\"\x{064B}\x{0627}", $_->[0] x $x . "\"aNA", $_->[1] . $y . "\"\x{064B}\x{0627}", $_->[0] x $x . "\"aNY", $_->[1] . $y . "\"\x{064B}\x{0649}", ( $option{'non-refined'} ? () : ( $_->[0] x $x . "\"uNA", $_->[1] . $y . "\"\x{064C}\x{0627}", $_->[0] x $x . "\"iNA", $_->[1] . $y . "\"\x{064D}\x{0627}", $_->[0] x $x . "\"uNY", $_->[1] . $y . "\"\x{064C}\x{0649}", $_->[0] x $x . "\"iNY", $_->[1] . $y . "\"\x{064D}\x{0649}", $_->[0] x $x . "\"aNU", $_->[1] . $y . "\"\x{064B}\x{0648}", $_->[0] x $x . "\"uNU", $_->[1] . $y . "\"\x{064C}\x{0648}", $_->[0] x $x . "\"iNU", $_->[1] . $y . "\"\x{064D}\x{0648}", $_->[0] x $x . "\"aW-a", $_->[1] . $y . "\"\x{064E}\x{0648}\x{064E}\x{0627}", $_->[0] x $x . "\"aW-u", $_->[1] . $y . "\"\x{064E}\x{0648}\x{064F}\x{0627}", $_->[0] x $x . "\"aW-i", $_->[1] . $y . "\"\x{064E}\x{0648}\x{0650}\x{0627}", $_->[0] x $x . "\"aW-\"a", $_->[1] . $y . "\"\x{064E}\x{0648}\"\x{064E}\x{0627}", $_->[0] x $x . "\"aW-\"u", $_->[1] . $y . "\"\x{064E}\x{0648}\"\x{064F}\x{0627}", $_->[0] x $x . "\"aW-\"i", $_->[1] . $y . "\"\x{064E}\x{0648}\"\x{0650}\x{0627}", $_->[0] x $x . "aW-\"a", $_->[1] . $y . "\x{064E}\x{0648}\"\x{064E}\x{0627}", $_->[0] x $x . "aW-\"u", $_->[1] . $y . "\x{064E}\x{0648}\"\x{064F}\x{0627}", $_->[0] x $x . "aW-\"i", $_->[1] . $y . "\x{064E}\x{0648}\"\x{0650}\x{0627}", $_->[0] x $x . "\"aW-\"", $_->[1] . $y . "\"\x{064E}\x{0648}\"\x{0652}\x{0627}", $_->[0] x $x . "aW-\"", $_->[1] . $y . "\x{064E}\x{0648}\"\x{0652}\x{0627}", ) ), $_->[0] x $x . "\"aW", $_->[1] . $y . "\"\x{064E}\x{0648}\x{0652}\x{0627}", $_->[0] x $x . "\"uW", $_->[1] . $y . "\"\x{064F}\x{0648}\x{0627}", $_->[0] x $x . "\"UW", $_->[1] . $y . "\"\x{064F}\x{0648}\x{0627}", $_->[0] x $x . "\"UA", $_->[1] . $y . "\"\x{064F}\x{0648}\x{0627}", ) ), } @sunny, @moony, $empty[0] } 0, 1 ), # taa' marbuu.ta endings ( map { $_->[0], $_->[1] . "\x{0652}", # "\xD9\x92" sukuun ( $option{'non-quoting'} ? () : ( $_->[0] . "\"", $_->[1] . "\"\x{0652}", # "\xD9\x92" sukuun ) ), } @taaaa ), ( map { my $fix = $_; $_->[0] . "a", $_->[1] . "\x{064E}", $_->[0] . "u", $_->[1] . "\x{064F}", $_->[0] . "i", $_->[1] . "\x{0650}", $_->[0] . "aN", $_->[1] . "\x{064B}", $_->[0] . "uN", $_->[1] . "\x{064C}", $_->[0] . "iN", $_->[1] . "\x{064D}", ( $option{'non-quoting'} ? () : ( $_->[0] . "\"a", $_->[1] . "\"\x{064E}", $_->[0] . "\"u", $_->[1] . "\"\x{064F}", $_->[0] . "\"i", $_->[1] . "\"\x{0650}", $_->[0] . "\"aN", $_->[1] . "\"\x{064B}", $_->[0] . "\"uN", $_->[1] . "\"\x{064C}", $_->[0] . "\"iN", $_->[1] . "\"\x{064D}", ) ), # non-voweled/sukuuned ( $option{'non-refined'} ? () : ( $_->[0] . "-a", $_->[1] . "\x{064E}", $_->[0] . "-u", $_->[1] . "\x{064F}", $_->[0] . "-i", $_->[1] . "\x{0650}", $_->[0] . "-aN", $_->[1] . "\x{064B}", $_->[0] . "-uN", $_->[1] . "\x{064C}", $_->[0] . "-iN", $_->[1] . "\x{064D}", ( $option{'non-quoting'} ? () : ( $_->[0] . "-\"a", $_->[1] . "\"\x{064E}", $_->[0] . "-\"u", $_->[1] . "\"\x{064F}", $_->[0] . "-\"i", $_->[1] . "\"\x{0650}", $_->[0] . "-\"aN", $_->[1] . "\"\x{064B}", $_->[0] . "-\"uN", $_->[1] . "\"\x{064C}", $_->[0] . "-\"iN", $_->[1] . "\"\x{064D}", ) ), ) ), map { ( $option{'non-refined'} ? () : ( $fix->[0] . "-a" . $_->[0], [ $fix->[1] . "\x{0652}", "a" . $_->[0] ], $fix->[0] . "-u" . $_->[0], [ $fix->[1] . "\x{0652}", "u" . $_->[0] ], $fix->[0] . "-i" . $_->[0], [ $fix->[1] . "\x{0652}", "i" . $_->[0] ], ( $option{'non-quoting'} ? () : ( $fix->[0] . "-\"a" . $_->[0], [ $fix->[1] . "\x{0652}\"", "a" . $_->[0] ], $fix->[0] . "-\"u" . $_->[0], [ $fix->[1] . "\x{0652}\"", "u" . $_->[0] ], $fix->[0] . "-\"i" . $_->[0], [ $fix->[1] . "\x{0652}\"", "i" . $_->[0] ], ) ), ) ), } @sunny, @moony, $empty[0] # @taaaa } $taaaa[0] ), # definite article assimilation .. non-linguistic ( map { $_->[0] . "-" . $_->[0], [ "\x{0644}", $_->[0] x 2 ], "l-" . $_->[0] x 2, [ "\x{0644}", $_->[0] x 2 ], } @sunny, @moony ), ( map { my $fix = $_; "l" . $_ . "-all", [ "", "l" . ( $_ eq "" ? "|" : $_ ) . "ll" ], "l" . $_ . "-al-", [ "", "l" . ( $_ eq "" ? "|" : $_ ) . "l-" ], "l" . $_ . "-al-l", [ "", "l" . $_ . "-ll" ], "l" . $_ . "-al-ll", [ "", "l" . $_ . "-ll" ], map { "l" . $fix . "-a" . $_->[0] . "-" . $_->[0], [ "", "l" . $fix . "l-" . $_->[0] x 2 ], "l" . $fix . "-al-" . $_->[0] x 2, [ "", "l" . $fix . "l-" . $_->[0] x 2 ], } @moony, grep { $_->[0] ne "l" } @sunny } "", "a", "u", "i", $option{'non-quoting'} ? () : ( "\"", "\"a", "\"u", "\"i" ) ), # initial vowels ( $option{'non-quoting'} ? () : ( "\"", "\x{0671}", # this grapheme is mode-dependent in the next level ) ), ( map { my $fix = $_; $_->[0] . "a", $_->[1] . "\x{064E}", $_->[0] . "u", $_->[1] . "\x{064F}", $_->[0] . "i", $_->[1] . "\x{0650}", ( $option{'non-refined'} ? () : ( $_->[0] . "_a", $_->[1] . "\x{0670}", $_->[0] . "_u", $_->[1] . "\x{0657}", $_->[0] . "_i", $_->[1] . "\x{0656}", $_->[0] . "_aA", $_->[1] . "\x{0670}\x{0627}", $_->[0] . "_aY", $_->[1] . "\x{0670}\x{0649}", $_->[0] . "_aU", $_->[1] . "\x{0670}\x{0648}", $_->[0] . "_aI", $_->[1] . "\x{0670}\x{064A}", ) ), $_->[0] . "A", $_->[1] . "\x{064E}\x{0627}", $_->[0] . "Y", $_->[1] . "\x{064E}\x{0649}", $_->[0] . "_I", $_->[1] . "\x{0650}\x{0627}", $_->[0] . "U", $_->[1] . "\x{064F}\x{0648}", $_->[0] . "I", $_->[1] . "\x{0650}\x{064A}", $_->[0] . "Uw", [ $_->[1] . "\x{064F}\x{0648}\x{0651}", "|" ], $_->[0] . "Iy", [ $_->[1] . "\x{0650}\x{064A}\x{0651}", "|" ], ( $option{'non-refined'} ? () : ( $_->[0] . "^A", "\x{0622}", # use no equivs $_->[0] . "^U", "\x{0623}\x{064F}\x{0648}", # use no equivs $_->[0] . "^I", "\x{0625}\x{0650}\x{064A}", # use no equivs ) ), $_->[0] . "aa", [ "", $_->[0] . "A" ], $_->[0] . "uw", [ "", $_->[0] . "U" ], $_->[0] . "iy", [ "", $_->[0] . "I" ], ( $option{'non-quoting'} ? () : ( $_->[0] . "\"a", $_->[1] . "\"\x{064E}", $_->[0] . "\"u", $_->[1] . "\"\x{064F}", $_->[0] . "\"i", $_->[1] . "\"\x{0650}", ( $option{'non-refined'} ? () : ( $_->[0] . "\"_a", $_->[1] . "\"\x{0670}", $_->[0] . "\"_u", $_->[1] . "\"\x{0657}", $_->[0] . "\"_i", $_->[1] . "\"\x{0656}", $_->[0] . "\"_aA", $_->[1] . "\"\x{0670}\x{0627}", $_->[0] . "\"_aY", $_->[1] . "\"\x{0670}\x{0649}", $_->[0] . "\"_aU", $_->[1] . "\"\x{0670}\x{0648}", $_->[0] . "\"_aI", $_->[1] . "\"\x{0670}\x{064A}", ) ), $_->[0] . "\"A", $_->[1] . "\"\x{064E}\x{0627}", $_->[0] . "\"Y", $_->[1] . "\"\x{064E}\x{0649}", $_->[0] . "\"A\"", $_->[1] . "\"\x{064E}\x{0627}\"\x{0652}", $_->[0] . "\"Y\"", $_->[1] . "\"\x{064E}\x{0649}\"\x{0652}", $_->[0] . "A\"", $_->[1] . "\x{064E}\x{0627}\"\x{0652}", $_->[0] . "Y\"", $_->[1] . "\x{064E}\x{0649}\"\x{0652}", $_->[0] . "\"_I", $_->[1] . "\"\x{0650}\x{0627}", $_->[0] . "\"_I\"", $_->[1] . "\"\x{0650}\x{0627}\"\x{0652}", $_->[0] . "_I\"", $_->[1] . "\x{0650}\x{0627}\"\x{0652}", $_->[0] . "\"U", $_->[1] . "\"\x{064F}\x{0648}", $_->[0] . "\"I", $_->[1] . "\"\x{0650}\x{064A}", $_->[0] . "\"U\"", $_->[1] . "\"\x{064F}\x{0648}\"\x{0652}", $_->[0] . "\"I\"", $_->[1] . "\"\x{0650}\x{064A}\"\x{0652}", $_->[0] . "U\"", $_->[1] . "\x{064F}\x{0648}\"\x{0652}", $_->[0] . "I\"", $_->[1] . "\x{0650}\x{064A}\"\x{0652}", $_->[0] . "\"Uw", [ $_->[1] . "\"\x{064F}\x{0648}\x{0651}", "|" ], $_->[0] . "\"Iy", [ $_->[1] . "\"\x{0650}\x{064A}\x{0651}", "|" ], ( $option{'non-refined'} ? () : ( $_->[0] . "\"^A", "\"\x{0622}", # use no equivs $_->[0] . "\"^U", "\"\x{0623}\x{064F}\x{0648}", # use no equivs $_->[0] . "\"^I", "\"\x{0625}\x{0650}\x{064A}", # use no equivs ) ), $_->[0] . "\"aa", [ "", $_->[0] . "\"A" ], $_->[0] . "\"uw", [ "", $_->[0] . "\"U" ], $_->[0] . "\"iy", [ "", $_->[0] . "\"I" ], ) ), ( map { $fix->[0] . "uw" . $_, [ $fix->[1] . "\x{064F}", "w" . $_ ], $fix->[0] . "iy" . $_, [ $fix->[1] . "\x{0650}", "y" . $_ ], ( $option{'non-quoting'} ? () : ( $fix->[0] . "\"uw" . $_, [ $fix->[1] . "\"\x{064F}", "w" . $_ ], $fix->[0] . "\"iy" . $_, [ $fix->[1] . "\"\x{0650}", "y" . $_ ], ) ), } "\"", qw "a u i A Y U I _I", $option{'non-refined'} ? () : qw "_a _u _i ^A ^U ^I" ), $_->[0] . "_aA'|aN", $_->[1] . "\x{0670}\x{0627}\x{0621}\x{064B}", $_->[0] . "A'|aN", $_->[1] . "\x{064E}\x{0627}\x{0621}\x{064B}", $_->[0] . "aN", $_->[1] . "\x{064B}", $_->[0] . "uN", $_->[1] . "\x{064C}", $_->[0] . "iN", $_->[1] . "\x{064D}", ( $option{'non-quoting'} ? () : ( $_->[0] . "\"_aA'|aN", $_->[1] . "\"\x{0670}\x{0627}\x{0621}\x{064B}", $_->[0] . "\"A'|aN", $_->[1] . "\"\x{064E}\x{0627}\x{0621}\x{064B}", $_->[0] . "\"_aA'|\"aN", $_->[1] . "\"\x{0670}\x{0627}\x{0621}\"\x{064B}", $_->[0] . "\"A'|\"aN", $_->[1] . "\"\x{064E}\x{0627}\x{0621}\"\x{064B}", $_->[0] . "_aA'|\"aN", $_->[1] . "\x{0670}\x{0627}\x{0621}\"\x{064B}", $_->[0] . "A'|\"aN", $_->[1] . "\x{064E}\x{0627}\x{0621}\"\x{064B}", $_->[0] . "\"A\"'|aN", $_->[1] . "\"\x{064E}\x{0627}\"\x{0652}\x{0621}\x{064B}", $_->[0] . "\"A\"'|\"aN", $_->[1] . "\"\x{064E}\x{0627}\"\x{0652}\x{0621}\"\x{064B}", $_->[0] . "A\"'|\"aN", $_->[1] . "\x{064E}\x{0627}\"\x{0652}\x{0621}\"\x{064B}", $_->[0] . "\"aN", $_->[1] . "\"\x{064B}", $_->[0] . "\"uN", $_->[1] . "\"\x{064C}", $_->[0] . "\"iN", $_->[1] . "\"\x{064D}", ) ), } $empty[1] ), # non-notation insertion escapes provided through ':xml' ); no strict 'refs'; ${ $cls . '::decoder' } = $decoder; if ($option{'describe'}) { $_->describe('') foreach @{${ $cls . '::decoder' }}; } $cls->demode(defined ${ $cls . '::demode' } ? ${ $cls . '::demode' } : 'default'); return ${ $cls . '::decoder' }; } sub eecoder ($@) { my $cls = shift @_; my $ext = shift @_; my %opt = @_ ? do { my $i = 0; map { ++$i % 2 ? lc $_ : $_ } @_ } : (); no strict 'refs'; my $refcoder = \${ $cls . '::' . $ext }; use strict 'refs'; if (exists $opt{'load'}) { if (ref \$opt{'load'} eq 'SCALAR') { if (my $done = do $opt{'load'}) { # file-define return ${$refcoder} = $done; } else { carp "Cannot parse " . $opt{'load'} . ": $@" if $@; carp "Cannot do " . $opt{'load'} . ": $!" unless defined $done; carp "Cannot run " . $opt{'load'}; return undef; } } elsif (UNIVERSAL::isa($opt{'load'}, 'CODE')) { return ${$refcoder} = $opt{'load'}->(); } elsif (UNIVERSAL::isa($opt{'load'}, 'ARRAY')) { if (grep { not $_->isa('Encode::Mapper') } @{$opt{'load'}}) { carp "Expecting a reference to an array of 'Encode::Mapper' objects"; return undef; } return ${$refcoder} = $opt{'load'}; } carp "Invalid type of the 'load' parameter, action ignored"; return undef; } if (exists $opt{'dump'}) { require Data::Dumper; my ($data, $i, @refs, @data); $data = Data::Dumper->new([${$refcoder}], [$ext]); for ($i = 0; $i < @{${$refcoder}}; $i++) { $refs[$i] = ['L', 'H', $ext . "->[$i]" ]; $data[$i] = ${$refcoder}->[$i]->dumper($refs[$i]); } if (ref \$opt{'dump'} eq 'SCALAR') { if ($opt{'dump'} =~ /^[A-Z][A-Za-z]*(\:\:[A-Z][A-Za-z]*)+$/) { my $class = $cls; for ($class, $opt{'dump'}) { $_ =~ s/\:\:/\//g; $_ .= '.pm'; } my $where = $INC{$class} =~ /^(.*)$class$/; $opt{'dump'} = $where . $opt{'dump'}; } elsif ($opt{'dump'} !~ s/^!// and -f $opt{'dump'}) { # 'SCALAR' carp "The file " . $opt{'dump'} . " exists, ignoring action"; return undef; } open my $file, '>', $opt{'dump'} or die $opt{'dump'}; print $file 'my ($L, $H, $' . $ext . ');'; for ($i = 0; $i < @{${$refcoder}}; $i++) { print $file $data[$i]->Useqq(1)->Indent(0)->Dump(); } print $file 'return $' . $ext . ';'; close $file; return ${$refcoder}; } elsif (UNIVERSAL::isa($opt{'dump'}, 'SCALAR')) { my $dump = ${$opt{'dump'}}; ${$opt{'dump'}} = $data; return ${$refcoder}; } } return -1; } sub enmode ($$) { my ($cls, $mode) = @_; $cls = blessed $cls if ref $cls; $mode = 'undef' unless defined $mode; $mode = $modemap{$mode} if exists $modemap{$mode}; no strict 'refs'; my $return = ${ $cls . '::enmode' }; ${ $cls . '::enmode' } = $mode if defined $mode; return $return unless defined ${ $cls . '::encoder' }; if (defined $mode) { $cls->enmoder($mode) unless defined ${ $cls . '::encoder' }->[$mode + $enlevel] or $mode == 0; ${ $cls . '::encoder' }->[$enlevel - 1] = ${ $cls . '::encoder' }->[$mode + $enlevel]; } return $return; } sub demode ($$) { my ($cls, $mode) = @_; $cls = blessed $cls if ref $cls; $mode = 'undef' unless defined $mode; $mode = $modemap{$mode} if exists $modemap{$mode}; no strict 'refs'; my $return = ${ $cls . '::demode' }; ${ $cls . '::demode' } = $mode if defined $mode; return $return unless defined ${ $cls . '::decoder' }; if (defined $mode) { $cls->demoder($mode) unless defined ${ $cls . '::decoder' }->[$mode + $delevel] or $mode == 0; ${ $cls . '::decoder' }->[$delevel - 1] = ${ $cls . '::decoder' }->[$mode + $delevel]; } return $return; } sub enmoder ($$@) { my ($cls, $mode) = @_; no strict 'refs'; return ${ $cls . '::encoder' }->[$mode + $enlevel] = undef; } sub demoder ($$@) { my ($cls, $mode) = @_; my $demoder = []; # rules for the fullvocalize mode $demoder->[4] = [ [ 'silent' => 0, ], "\x{0671}", "\x{0627}", "\"\x{0652}", "", "\"\x{064E}", "", "\"\x{064F}", "", "\"\x{0650}", "", "\"\x{064B}", "", "\"\x{064C}", "", "\"\x{064D}", "", "\"\x{0670}", "", "\"\x{0657}", "", "\"\x{0656}", "", "\"", "", "\x{064E}\x{0627}\"\x{0652}", "\x{064E}\x{0627}\x{0652}", "\"\x{064E}\x{0627}\"\x{0652}", "\x{0627}\x{0652}", ( ( $option{'font-fixing'} ? ( map { "\x{0644}" . $_ . "\x{064E}\x{0627}\"\x{0652}", "\x{0644}\x{0627}" . $_ . "\x{064E}\x{0652}", "\x{0644}" . $_ . "\"\x{064E}\x{0627}\"\x{0652}", "\x{0644}\x{0627}" . $_ . "\x{0652}", } "", "\x{0651}" ) : () ), ), "\x{064E}\x{0649}\"\x{0652}", "\x{064E}\x{0649}\x{0652}", "\"\x{064E}\x{0649}\"\x{0652}", "\x{0649}\x{0652}", "\x{064F}\x{0648}\"\x{0652}", "\x{064F}\x{0648}\x{0652}", "\"\x{064F}\x{0648}\"\x{0652}", "\x{0648}\x{0652}", "\x{0650}\x{064A}\"\x{0652}", "\x{0650}\x{064A}\x{0652}", "\"\x{0650}\x{064A}\"\x{0652}", "\x{064A}\x{0652}", # modern external/internal substitution with wa.sla ( map { my $vowel = $_; map { "\x{064E}" . $_ . "\x{0627}" . $vowel, [ "\x{064E}" . $_ . "\x{0671}", "\"" . $vowel ], "\x{064F}" . $_ . "\x{0627}" . $vowel, [ "\x{064F}" . $_ . "\x{0671}", "\"" . $vowel ], "\x{0650}" . $_ . "\x{0627}" . $vowel, [ "\x{0650}" . $_ . "\x{0671}", "\"" . $vowel ], "\x{064E}\x{0627}" . $_ . "\x{0627}" . $vowel, [ "\x{064E}\x{0627}" . $_ . "\x{0671}", "\"" . $vowel ], "\x{064E}\x{0649}" . $_ . "\x{0627}" . $vowel, [ "\x{064E}\x{0649}" . $_ . "\x{0671}", "\"" . $vowel ], "\x{064F}\x{0648}" . $_ . "\x{0627}" . $vowel, [ "\x{064F}\x{0648}" . $_ . "\x{0671}", "\"" . $vowel ], "\x{0650}\x{064A}" . $_ . "\x{0627}" . $vowel, [ "\x{0650}\x{064A}" . $_ . "\x{0671}", "\"" . $vowel ], # quoted "\"\x{064E}" . $_ . "\x{0627}" . $vowel, [ $_ . "\x{0671}", "\"" . $vowel ], "\"\x{064F}" . $_ . "\x{0627}" . $vowel, [ $_ . "\x{0671}", "\"" . $vowel ], "\"\x{0650}" . $_ . "\x{0627}" . $vowel, [ $_ . "\x{0671}", "\"" . $vowel ], "\"\x{064E}\x{0627}" . $_ . "\x{0627}" . $vowel, [ "\x{0627}" . $_ . "\x{0671}", "\"" . $vowel ], "\"\x{064E}\x{0649}" . $_ . "\x{0627}" . $vowel, [ "\x{0649}" . $_ . "\x{0671}", "\"" . $vowel ], "\"\x{064F}\x{0648}" . $_ . "\x{0627}" . $vowel, [ "\x{0648}" . $_ . "\x{0671}", "\"" . $vowel ], "\"\x{0650}\x{064A}" . $_ . "\x{0627}" . $vowel, [ "\x{064A}" . $_ . "\x{0671}", "\"" . $vowel ], "\"\x{064E}" . $_ . "\x{0627}\"" . $vowel, [ $_ . "\x{0627}", $vowel ], "\"\x{064F}" . $_ . "\x{0627}\"" . $vowel, [ $_ . "\x{0627}", $vowel ], "\"\x{0650}" . $_ . "\x{0627}\"" . $vowel, [ $_ . "\x{0627}", $vowel ], "\"\x{064E}\x{0627}" . $_ . "\x{0627}\"" . $vowel, [ "\x{0627}" . $_ . "\x{0627}", $vowel ], "\"\x{064E}\x{0649}" . $_ . "\x{0627}\"" . $vowel, [ "\x{0649}" . $_ . "\x{0627}", $vowel ], "\"\x{064F}\x{0648}" . $_ . "\x{0627}\"" . $vowel, [ "\x{0648}" . $_ . "\x{0627}", $vowel ], "\"\x{0650}\x{064A}" . $_ . "\x{0627}\"" . $vowel, [ "\x{064A}" . $_ . "\x{0627}", $vowel ], "\x{064E}" . $_ . "\x{0627}\"" . $vowel, [ "\x{064E}" . $_ . "\x{0627}", $vowel ], "\x{064F}" . $_ . "\x{0627}\"" . $vowel, [ "\x{064F}" . $_ . "\x{0627}", $vowel ], "\x{0650}" . $_ . "\x{0627}\"" . $vowel, [ "\x{0650}" . $_ . "\x{0627}", $vowel ], "\x{064E}\x{0627}" . $_ . "\x{0627}\"" . $vowel, [ "\x{064E}\x{0627}" . $_ . "\x{0627}", $vowel ], "\x{064E}\x{0649}" . $_ . "\x{0627}\"" . $vowel, [ "\x{064E}\x{0649}" . $_ . "\x{0627}", $vowel ], "\x{064F}\x{0648}" . $_ . "\x{0627}\"" . $vowel, [ "\x{064F}\x{0648}" . $_ . "\x{0627}", $vowel ], "\x{0650}\x{064A}" . $_ . "\x{0627}\"" . $vowel, [ "\x{0650}\x{064A}" . $_ . "\x{0627}", $vowel ], } "", "\x09", "\x0A", "\x0D", "\x20", "\x0D\x0A", "\x20\x20", "\x20\x20\x20", "\x20\x20\x20\x20" } "\x{064E}", "\x{064F}", "\x{0650}" ), # laam + 'alif .. either enforce ligatures, or shuffle the diacritics ( ( $option{'font-fixing'} ? ( map { my $alif = $_; map { my $vowel = $_; map { "\x{0644}" . $_ . $vowel . $alif, "\x{0644}" . $alif . $_ . $vowel, "\x{0644}" . $_ . "\"" . $vowel . $alif, "\x{0644}" . $alif . $_, } "", "\x{0651}" } "\x{064E}", "\x{064F}", "\x{0650}", "\x{064B}", "\x{064C}", "\x{064D}", "\x{0652}" } "\x{0622}", "\x{0623}", "\x{0625}", "\x{0627}" #, "\x{0671}" ) : () ), ), ( ( $option{'font-fixing'} ? ( map { my $vowel = $_; map { "\x{0644}" . $_ . $vowel . "\x{0671}", "\x{0644}\x{0627}" . $_ . $vowel, "\x{0644}" . $_ . "\"" . $vowel . "\x{0671}", "\x{0644}\x{0627}" . $_, } "", "\x{0651}" } "\x{064E}", "\x{064F}", "\x{0650}", "\x{064B}", "\x{064C}", "\x{064D}", "\x{0652}" ) : () ), ), # laam + vowel + 'alif + vowel .. internal substitution with wa.sla ( ( $option{'font-fixing'} ? ( map { my $double = $_; map { my $vowel = $_; map { "\x{0644}" . $double . $vowel . "\x{0627}" . $_, [ "\x{0644}\x{0671}" . $double . $vowel, "\"" . $_ ], # quoted "\x{0644}" . $double . "\"" . $vowel . "\x{0627}" . $_, [ "\x{0644}\x{0671}" . $double, "\"" . $_ ], "\x{0644}" . $double . "\"" . $vowel . "\x{0627}\"" . $_, [ "\x{0644}\x{0627}" . $double, $_ ], "\x{0644}" . $double . $vowel . "\x{0627}\"" . $_, [ "\x{0644}\x{0627}" . $double . $vowel, $_ ], } "\x{064E}", "\x{064F}", "\x{0650}" } "\x{064E}", "\x{064F}", "\x{0650}" } "", "\x{0651}" ) : () ), ), # optional ligatures to enforce here ]; # rules for the vocalize mode $demoder->[3] = [ [ 'silent' => 0, ], "\"\x{0652}", "\x{0652}", "\"\x{064E}", "", "\"\x{064F}", "", "\"\x{0650}", "", "\"\x{064B}", "", "\"\x{064C}", "", "\"\x{064D}", "", "\"\x{0670}", "", "\"\x{0657}", "", "\"\x{0656}", "", "\x{0652}", "", "\"", "", # modern external/internal substitution with wa.sla ( map { my $vowel = $_; map { "\x{064E}" . $_ . "\x{0627}" . $vowel, [ "\x{064E}" . $_ . "\x{0627}", "\"" . $vowel ], "\x{064F}" . $_ . "\x{0627}" . $vowel, [ "\x{064F}" . $_ . "\x{0627}", "\"" . $vowel ], "\x{0650}" . $_ . "\x{0627}" . $vowel, [ "\x{0650}" . $_ . "\x{0627}", "\"" . $vowel ], "\x{064E}\x{0627}" . $_ . "\x{0627}" . $vowel, [ "\x{064E}\x{0627}" . $_ . "\x{0627}", "\"" . $vowel ], "\x{064E}\x{0649}" . $_ . "\x{0627}" . $vowel, [ "\x{064E}\x{0649}" . $_ . "\x{0627}", "\"" . $vowel ], "\x{064F}\x{0648}" . $_ . "\x{0627}" . $vowel, [ "\x{064F}\x{0648}" . $_ . "\x{0627}", "\"" . $vowel ], "\x{0650}\x{064A}" . $_ . "\x{0627}" . $vowel, [ "\x{0650}\x{064A}" . $_ . "\x{0627}", "\"" . $vowel ], # quoted "\"\x{064E}" . $_ . "\x{0627}" . $vowel, [ $_ . "\x{0627}", "\"" . $vowel ], "\"\x{064F}" . $_ . "\x{0627}" . $vowel, [ $_ . "\x{0627}", "\"" . $vowel ], "\"\x{0650}" . $_ . "\x{0627}" . $vowel, [ $_ . "\x{0627}", "\"" . $vowel ], "\"\x{064E}\x{0627}" . $_ . "\x{0627}" . $vowel, [ "\x{0627}" . $_ . "\x{0627}", "\"" . $vowel ], "\"\x{064E}\x{0649}" . $_ . "\x{0627}" . $vowel, [ "\x{0649}" . $_ . "\x{0627}", "\"" . $vowel ], "\"\x{064F}\x{0648}" . $_ . "\x{0627}" . $vowel, [ "\x{0648}" . $_ . "\x{0627}", "\"" . $vowel ], "\"\x{0650}\x{064A}" . $_ . "\x{0627}" . $vowel, [ "\x{064A}" . $_ . "\x{0627}", "\"" . $vowel ], "\"\x{064E}" . $_ . "\x{0627}\"" . $vowel, [ $_ . "\x{0627}", $vowel ], "\"\x{064F}" . $_ . "\x{0627}\"" . $vowel, [ $_ . "\x{0627}", $vowel ], "\"\x{0650}" . $_ . "\x{0627}\"" . $vowel, [ $_ . "\x{0627}", $vowel ], "\"\x{064E}\x{0627}" . $_ . "\x{0627}\"" . $vowel, [ "\x{0627}" . $_ . "\x{0627}", $vowel ], "\"\x{064E}\x{0649}" . $_ . "\x{0627}\"" . $vowel, [ "\x{0649}" . $_ . "\x{0627}", $vowel ], "\"\x{064F}\x{0648}" . $_ . "\x{0627}\"" . $vowel, [ "\x{0648}" . $_ . "\x{0627}", $vowel ], "\"\x{0650}\x{064A}" . $_ . "\x{0627}\"" . $vowel, [ "\x{064A}" . $_ . "\x{0627}", $vowel ], "\x{064E}" . $_ . "\x{0627}\"" . $vowel, [ "\x{064E}" . $_ . "\x{0627}", $vowel ], "\x{064F}" . $_ . "\x{0627}\"" . $vowel, [ "\x{064F}" . $_ . "\x{0627}", $vowel ], "\x{0650}" . $_ . "\x{0627}\"" . $vowel, [ "\x{0650}" . $_ . "\x{0627}", $vowel ], "\x{064E}\x{0627}" . $_ . "\x{0627}\"" . $vowel, [ "\x{064E}\x{0627}" . $_ . "\x{0627}", $vowel ], "\x{064E}\x{0649}" . $_ . "\x{0627}\"" . $vowel, [ "\x{064E}\x{0649}" . $_ . "\x{0627}", $vowel ], "\x{064F}\x{0648}" . $_ . "\x{0627}\"" . $vowel, [ "\x{064F}\x{0648}" . $_ . "\x{0627}", $vowel ], "\x{0650}\x{064A}" . $_ . "\x{0627}\"" . $vowel, [ "\x{0650}\x{064A}" . $_ . "\x{0627}", $vowel ], } "", "\x09", "\x0A", "\x0D", "\x20", "\x0D\x0A", "\x20\x20", "\x20\x20\x20", "\x20\x20\x20\x20" } "\x{064E}", "\x{064F}", "\x{0650}" ), # laam + 'alif .. either enforce ligatures, or shuffle the diacritics ( ( $option{'font-fixing'} ? ( map { my $alif = $_; map { my $vowel = $_; map { "\x{0644}" . $_ . $vowel . $alif, "\x{0644}" . $alif . $_ . $vowel, "\x{0644}" . $_ . "\"" . $vowel . $alif, "\x{0644}" . $alif . $_, } "", "\x{0651}" } "\x{064E}", "\x{064F}", "\x{0650}", "\x{064B}", "\x{064C}", "\x{064D}", # "\x{0652}" } "\x{0622}", "\x{0623}", "\x{0625}", "\x{0627}", "\x{0671}" ) : () ), ), ( ( $option{'font-fixing'} ? ( map { my $alif = $_; map { "\x{0644}" . $_ . "\x{0652}" . $alif, "\x{0644}" . $alif . $_, "\x{0644}" . $_ . "\"\x{0652}" . $alif, "\x{0644}" . $alif . $_ . "\x{0652}", } "", "\x{0651}" } "\x{0622}", "\x{0623}", "\x{0625}", "\x{0627}", "\x{0671}" ) : () ), ), # laam + vowel + 'alif + vowel .. internal substitution with wa.sla ( ( $option{'font-fixing'} ? ( map { my $double = $_; map { my $vowel = $_; map { "\x{0644}" . $double . $vowel . "\x{0627}" . $_, [ "\x{0644}\x{0627}" . $double . $vowel, "\"" . $_ ], # quoted "\x{0644}" . $double . "\"" . $vowel . "\x{0627}" . $_, [ "\x{0644}\x{0627}" . $double, "\"" . $_ ], "\x{0644}" . $double . "\"" . $vowel . "\x{0627}\"" . $_, [ "\x{0644}\x{0627}" . $double, $_ ], "\x{0644}" . $double . $vowel . "\x{0627}\"" . $_, [ "\x{0644}\x{0627}" . $double . $vowel, $_ ], } "\x{064E}", "\x{064F}", "\x{0650}" } "\x{064E}", "\x{064F}", "\x{0650}" } "", "\x{0651}" ) : () ), ), # optional ligatures to enforce here ]; # rules for the novocalize mode $demoder->[2] = [ [ 'silent' => 0, ], "\"\x{0652}", "\x{0652}", "\"\x{064E}", "\x{064E}", "\"\x{064F}", "\x{064F}", "\"\x{0650}", "\x{0650}", "\"\x{064B}", "\x{064B}", "\"\x{064C}", "\x{064C}", "\"\x{064D}", "\x{064D}", "\"\x{0670}", "\x{0670}", "\"\x{0657}", "\x{0657}", "\"\x{0656}", "\x{0656}", "\x{0652}", "", "\x{064E}", "", "\x{064F}", "", "\x{0650}", "", "\x{064B}", "", "\x{064C}", "", "\x{064D}", "", "\x{0670}", "", "\x{0657}", "", "\x{0656}", "", "\"", "", # modern internal substitution with "fictitious" wa.sla .. lam + vowel + 'alif + vowel below # modern external substitution with "fictitious" wa.sla # laam + 'alif .. either enforce ligatures, or shuffle the diacritics ( ( $option{'font-fixing'} ? ( map { my $alif = $_; map { my $vowel = $_; map { "\x{0644}" . $_ . $vowel . $alif, "\x{0644}" . $alif . $_, "\x{0644}" . $_ . "\"" . $vowel . $alif, "\x{0644}" . $alif . $_ . $vowel, } "", "\x{0651}" } "\x{064E}", "\x{064F}", "\x{0650}", "\x{064B}", "\x{064C}", "\x{064D}", "\x{0652}" } "\x{0622}", "\x{0623}", "\x{0625}", "\x{0627}", "\x{0671}" ) : () ), ), # laam + vowel + 'alif + vowel .. internal substitution with wa.sla # optional ligatures to enforce here ]; # rules for the noshadda mode $demoder->[1] = [ [ 'silent' => 0, ], ]; # original no-quotes rules $demoder->[0] = [ [ 'silent' => 0, ], # modern internal substitution with wa.sla .. lam + vowel + 'alif + vowel below ( map { my $vowel = $_; map { $vowel . "\x{0627}" . $_, $vowel . "\x{0671}", } "\x{064E}", "\x{064F}", "\x{0650}" } "\x{064E}", "\x{064F}", "\x{0650}" ), # modern external substitution with wa.sla ( map { my $vowel = $_; map { "\x{064E}" . $_ . "\x{0627}" . $vowel, [ "\x{064E}" . $_, "\x{0671}" ], "\x{064F}" . $_ . "\x{0627}" . $vowel, [ "\x{064F}" . $_, "\x{0671}" ], "\x{0650}" . $_ . "\x{0627}" . $vowel, [ "\x{0650}" . $_, "\x{0671}" ], "\x{064E}\x{0627}" . $_ . "\x{0627}" . $vowel, [ "\x{064E}\x{0627}" . $_, "\x{0671}" ], "\x{064E}\x{0649}" . $_ . "\x{0627}" . $vowel, [ "\x{064E}\x{0649}" . $_, "\x{0671}" ], "\x{064F}\x{0648}" . $_ . "\x{0627}" . $vowel, [ "\x{064F}\x{0648}" . $_, "\x{0671}" ], "\x{0650}\x{064A}" . $_ . "\x{0627}" . $vowel, [ "\x{0650}\x{064A}" . $_, "\x{0671}" ], } "\x09", "\x0A", "\x0D", "\x20", "\x0D\x0A", "\x20\x20", "\x20\x20\x20", "\x20\x20\x20\x20" } "\x{064E}", "\x{064F}", "\x{0650}" ), # laam + 'alif .. either enforce ligatures, or shuffle the diacritics ( ( $option{'font-fixing'} ? ( map { my $alif = $_; map { my $vowel = $_; map { "\x{0644}" . $_ . $vowel . $alif, "\x{0644}" . $alif . $_ . $vowel, } "", "\x{0651}" } "\x{064E}", "\x{064F}", "\x{0650}", "\x{064B}", "\x{064C}", "\x{064D}", "\x{0652}" } "\x{0622}", "\x{0623}", "\x{0625}", "\x{0627}", "\x{0671}" ) : () ), ), # laam + vowel + 'alif + vowel .. internal substitution with wa.sla ( ( $option{'font-fixing'} ? ( map { my $double = $_; map { my $vowel = $_; map { "\x{0644}" . $double . $vowel . "\x{0627}" . $_, "\x{0644}" . "\x{0671}" . $double . $vowel, } "\x{064E}", "\x{064F}", "\x{0650}" } "\x{064E}", "\x{064F}", "\x{0650}" } "", "\x{0651}" ) : () ), ), # optional ligatures to enforce here ]; no strict 'refs'; ${ $cls . '::decoder' }->[$mode + $delevel] = Encode::Mapper->compile(@{$demoder->[$mode]}); ${ $cls . '::decoder' }->[$mode + $delevel]->describe('') if $option{'describe'}; return ${ $cls . '::decoder' }->[$mode + $delevel]; } 1; __END__ =head1 NAME Encode::Arabic::ArabTeX - Interpreter of the ArabTeX notation of Arabic =head1 REVISION $Revision: 717 $ $Date: 2008-10-03 00:28:12 +0200 (Fri, 03 Oct 2008) $ =head1 SYNOPSIS use Encode::Arabic::ArabTeX; # imports just like 'use Encode' would, plus extended options while ($line = <>) { # maps the ArabTeX notation for Arabic into the Arabic script print encode 'utf8', decode 'arabtex', $line; # 'ArabTeX' alias 'Lagally' alias 'TeX' } # ArabTeX lower ASCII transliteration <--> Arabic script in Perl's internal format $string = decode 'ArabTeX', $octets; $octets = encode 'ArabTeX', $string; Encode::Arabic::ArabTeX->encoder('dump' => '!./encoder.code'); # dump the encoder engine to file Encode::Arabic::ArabTeX->decoder('load'); # load the decoder engine from module's extra sources =head1 DESCRIPTION ArabTeX is an excellent extension to TeX/LaTeX designed for typesetting the right-to-left scripts of the Orient. It comes up with very intuitive and comprehensible lower ASCII transliterations, the expressive power of which is even better than that of the scripts. L implements the rules needed for proper interpretation of the ArabTeX notation of Arabic. The conversion ifself is done by L, and the user interface is built on the L module. =head2 ENCODING BUSINESS Since the ArabTeX notation is not a simple mapping to the graphemes of the Arabic script, encoding the script into the notation is ambiguous. Two different strings in the notation may correspond to identical strings in the script. Heuristics must be engaged to decide which of the representations is more appropriate. Together with this bottle-neck, encoding may not be perfectly invertible by the decode operation, due to over-generation or approximations in the encoding algorithm. There are situations where conversion from the Arabic script to the ArabTeX notation is still convenient and useful. Imagine you need to edit the data, enhance it with vowels or other diacritical marks, produce phonetic transcripts and trim the typography of the script ... Do it in the ArabTeX notation, having an unrivalled control over your acts! Nonetheless, encoding is not the very purpose for this module's existence ;) =head2 DECODING BUSINESS The module decodes the ArabTeX notation as defined in the User Manual Version 4.00 of March 11, 2004, L. The implementation uses three levels of L engines to solve the problem: =over =item I writing I carriers are determined from the context in accordance with the Arabic orthographical conventions. The first level of mapping expands every C<< <'> >> into the verbatim encoding of the relevant carrier. This level of processing can become optional, if people ever need to encode the I carriers explicitly. Interpretation of geminated I C<< <''> >> is B here, as opposed to ArabTeX itself. In order to deduce the proper spelling rules, we resorted to L and experimented with words like C<< >>, C<< >>, C<< >>, etc. On this level, word-internal occurrences of C<< >> get translated into C<< >>, which is an extension to the notation that simplifies some requirements in modeling of the Arabic morphology. =item Grapheme generation The core level includes most of the rules needed, and converts the ArabTeX notation to Arabic graphemes in Unicode. The engine recognizes all the consonants of Modern Standard Arabic, plus the following letters: [ "|", "" ], # invisible consonant [ "B", "\x{0640}" ], # consonantal ta.twil [ "T", "\x{0629}" ], # ta' marbu.ta [ "H", "\x{0629}" ], # ta' marbu.ta silent [ "p", "\x{067E}" ], # pa' [ "v", "\x{06A4}" ], # va' [ "g", "\x{06AF}" ], # gaf [ "c", "\x{0681}" ], # .ha with hamza [ "^c", "\x{0686}" ], # gim with three [ ",c", "\x{0685}" ], # _ha with three [ "^z", "\x{0698}" ], # zay with three [ "^n", "\x{06AD}" ], # kaf with three [ "^l", "\x{06B5}" ], # lam with bow above [ ".r", "\x{0695}" ], # ra' with bow below There are many nice features in the notation, like assimilation, gemination, hyphenation, all implemented here. Defective and historical writings of vowels are supported, too! Try yourself if your fonts can handle these ;) Word-initial sequences like C<< >>, C<< >>, C<< >> and C<< >>, where C stands for a short, possibly quoted or missing, vowel, and C represents a fixed consonant, are processed according to the requirements of the Arabic orthography. Thus, C<< >> reduces to C<< >>, C<< >> becomes C<< >>, and C<< >> equals C<< >>, while C<< >> turns into C<< >>. =item I and ligatures I is introduced if there is a preceding long or short vowel, and the blank space is one newline, one tabulator, or up to four single spaces. Optionally, diacritical marks in between I and I<'alif> go after the latter letter, since most of the current systems rendering the Arabic script do not produce the desired ligatures if the two kinds of graphemes are not adjacent immediately. =back There are modes and options in ArabTeX that have not been dealt with yet in L. Still, mutual consistency of the systems is very high. This new release does support B and works in the ArabTeX's C<\vocalize> mode by default. The other B are implemented, too, as described below within the C and C methods. =head2 EXPORTS, ENGINES & MODES The module exports as if C also appeared in the package. The C options, except for the first-place subsequence of C<:xml>, C<:simple> or C<:describe>, are just delegated to L and imports performed properly. If the first element in the list to C is C<:xml>, all XML markup, or rather any B the well-paired and non-nested B C<< < >> and C<< > >>, will be preserved. Properties of the L engines can be generally controlled through the L API. In case the next, possibly the first, element in this list is C<:simple>, B in the engines B so that quotes be mapped to empty strings and infrequent or experimental notations of vowels not be interpreted in the extra manner of ArabTeX. Using C<:simple> is recommended for simple every-day tasks where these nuances would have no impact and where full initialization would be bothering. The C<:describe> option calls the L's C method on the module's engines right after their compilation. Initialization of the engines takes place the first time they are used, unless they have already been defined. There are two explicit methods for it: =over =item encoder Initialize or redefine the encoder engine. If no parameters are given, rules in the module are compiled into a list of L objects. Currently, the C<--dump> and C<--load> options have some experimental meaning. =item decoder See the description of C. =back There are five B currently recognized in this module, and their aliases are mapped according to the module's C<%modemap> hash. Selection of the appropriate mode is done best through the C and C functions of L, or with a direct call of the namesake methods in L: our %Encode::Arabic::ArabTeX::modemap = ( # the module provides these definitions 'default' => 3, 'undef' => 0, 'fullvocalize' => 4, 'full' => 4, 'vocalize' => 3, 'nosukuun' => 3, 'novocalize' => 2, 'novowels' => 2, 'none' => 2, 'noshadda' => 1, 'noneplus' => 1, ); # the function calls might be preferred as more comfortable Encode::Arabic::demode 'arabtex', 'full'; # like 'encode' and 'decode' of Encode Encode::Arabic::ArabTeX->demode('fullvocalize'); # like the Encode::Encoding interfaces # how modes can be set easily use Encode::Arabic ':modes'; enmode 'arabtex', 'undef'; demode 'arabtex', 'noneplus'; =over =item enmode Currently in development. The mode is fixed to C<'undef'> internally. =item demode Enforces the proper version of the final, third level of the L engines. =back =head1 SEE ALSO L, L, L, L ArabTeX system L Klaus Lagally L ArabTeX extensions L ArabXeTeX L Encode Arabic: Exercise in Functional Parsing L =head1 AUTHOR Otakar Smrz, L eval { 'E' . ( join '.', qw 'otakar smrz' ) . "\x40" . ( join '.', qw 'mff cuni cz' ) . 'E' } Perl is also designed to make the easy jobs not that easy ;) =head1 COPYRIGHT AND LICENSE Copyright 2003-2008 by Otakar Smrz This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Encode-Arabic-1.9/lib/Encode/Mapper.pm0000444001432400135600000010546711144136727016214 0ustar smrzufal# ############################################################################ Otakar Smrz, 2003/01/23 # # Mapper Engine Class ##################################################################### 2003/06/19 # $Id: Mapper.pm 663 2008-08-11 14:21:54Z smrz $ package Encode::Mapper; use 5.008; use strict; use warnings; use Carp; our $VERSION = '1.6' || do { q $Revision: 663 $ =~ /(\d+)/; sprintf "%4.2f", $1 / 100 }; use bytes; # ensures splitting into one-byte tokens .. lexically scoped our %options; # records of options per package .. global register our %option; # options of the caller package .. used with local sub import { # enforces setting of options my $cls = shift @_; $cls->options(@_) if @_; } sub whisper ($) { # module's internal function carp shift unless $option{'silent'}; } sub verify_rule ($$) { # module's internal function unless (defined $_[0] and $_[0] ne '') { whisper "Rule's LHS is empty, rule ignored"; return; } unless (defined $_[1]) { whisper "Rule's RHS is undefined, rule ignored"; return; } if (UNIVERSAL::isa($_[1], 'ARRAY')) { unless (defined $_[1]->[0]) { whisper "Rule's RHS is undefined, rule ignored"; return; } unless (ref \$_[1]->[0] eq 'SCALAR' or UNIVERSAL::isa($_[1]->[0], 'CODE')) { whisper "Rule's RHS is neither literal nor subroutine reference, rule ignored"; return; } unless (defined $_[1]->[1] and length $_[1]->[1] < length $_[0]) { whisper "Rule type '\$A => [\$X, \$Y], length \$A > length \$Y' misused, considering it '\$A => \$X'"; $_[1] = $_[1]->[0]; } } elsif (ref \$_[1] ne 'SCALAR' and not UNIVERSAL::isa($_[1], 'CODE')) { whisper "Rule's RHS is neither literal nor subroutine reference, rule ignored"; return; } return 1; } sub options ($%) { # options for general compilation of Mappers my $cls = shift @_; my ($i, $opt, %opt); my $caller = caller 0; $caller = caller 1 if $caller eq __PACKAGE__; my @returns = exists $options{$caller} ? %{$options{$caller}} : (); while (@_) { $opt = lc shift @_; if ($opt =~ /^\:/) { $opt eq ':others' and $opt{'others'} = sub { shift } and next; $opt eq ':silent' and $opt{'silent'} = 1 and next; $opt eq ':join' and $opt{'join'} = ''; } else { $opt =~ /^\-*(.*)$/; $opt{$1} = shift @_; } } { local $option{'silent'} = exists $opt{'silent'} ? $opt{'silent'} : $options{$caller}{'silent'}; if (defined $opt{'complement'} and UNIVERSAL::isa($opt{'complement'}, 'ARRAY')) { for ($i = 0; $i < @{$opt{'complement'}}; $i += 2) { verify_rule $opt{'complement'}->[$i], $opt{'complement'}->[$i + 1]; } } if (defined $opt{'override'} and UNIVERSAL::isa($opt{'override'}, 'ARRAY')) { for ($i = 0; $i < @{$opt{'override'}}; $i += 2) { verify_rule $opt{'override'}->[$i], $opt{'override'}->[$i + 1]; } } if (defined $opt{'others'} and not $option{'silent'}) { # see whisper if (UNIVERSAL::isa($opt{'others'}, 'CODE')) { carp "The subroutine will be called with the 'other' LHS parameter to get the rule's RHS"; } else { carp "The scalar value will become the RHS of each 'other' LHS"; } } } return %opt unless defined $cls; $options{$caller}{$_} = $opt{$_} foreach keys %opt; return @returns; } *new = *compile{'CODE'}; # provides the 'new' constructor .. the 'compile' method # *new = \&compile; # might be known at compile-time sub compile ($@) { # returns Mapper .. modified Aho-Corasick and Boyer-Moore search engine my $cls = shift @_; my (@tree, @bell, @skip, @queue, %redef); my ($q, $r, $s, $t, $i, $token, $trick); my ($null_list, $null_hash) = ([], {}); # references to empties need not consume unique memory my ($no_code, $no_list) = (1, 1); # optimization indicators local %option = exists $options{caller 0} ? %{$options{caller 0}} : (); # options be local due to verify_rule and whisper if (UNIVERSAL::isa($_[0], 'ARRAY')) { %option = (%option, options undef, @{shift @_}); } elsif (UNIVERSAL::isa($_[0], 'HASH')) { %option = (%option, options undef, %{shift @_}); } $skip[0] = undef; # never ever used .. fix the number of list elements equal $bell[0] = $null_list; # important .. depth-wise inheritation of the lists if (defined $option{'complement'}) { for ($i = 0; $i < @{$option{'complement'}}; $i += 2) { $q = 0; foreach $token (split //, $option{'complement'}->[$i]) { $tree[$q]->{$token} = ++$r unless defined $tree[$q]->{$token}; # increment $r ^^ $q = $tree[$q]->{$token}; } $tree[$q] = {} unless defined $tree[$q]; # define trees correctly, economize below whisper "Redefining the mapping for '" . $option{'complement'}->[$i] . "'" if defined $bell[$q]; $bell[$q] = [ $option{'complement'}->[$i + 1] ]; } } for ($i = 0; $i < @_; $i += 2) { # generate $tree[$q] transition function and initial $bell[$q] next unless verify_rule $_[$i], $_[$i + 1]; $q = 0; foreach $token (split //, $_[$i]) { $tree[$q]->{$token} = ++$r unless defined $tree[$q]->{$token}; # increment $r ^^ $q = $tree[$q]->{$token}; } $tree[$q] = {} unless defined $tree[$q]; # define trees correctly, economize below whisper "Redefining the mapping for '$_[$i]'" if $redef{$q}++; $bell[$q] = [ $_[$i + 1] ]; } if (defined $option{'override'}) { for ($i = 0; $i < @{$option{'override'}}; $i += 2) { $q = 0; foreach $token (split //, $option{'override'}->[$i]) { $tree[$q]->{$token} = ++$r unless defined $tree[$q]->{$token}; # increment $r ^^ $q = $tree[$q]->{$token}; } $tree[$q] = {} unless defined $tree[$q]; # define trees correctly, economize below whisper "Redefining the mapping for '" . $option{'override'}->[$i] . "'" if $redef{$q}++; $bell[$q] = [ $option{'override'}->[$i + 1] ]; } } foreach $token (map { chr } 0x00..0xFF) { unless (defined $tree[0]->{$token}) { unless (defined $option{'others'}) { $tree[0]->{$token} = 0; } else { $tree[0]->{$token} = ++$r; # increment $r ^^ $tree[$r] = {}; # define trees correctly } } $q = $tree[0]->{$token}; # including existing prefixes unless ($q == 0) { unless (defined $bell[$q]) { if (not defined $option{'others'}) { $bell[$q] = $bell[0]; } elsif (UNIVERSAL::isa($option{'others'}, 'CODE')) { $bell[$q] = [ $option{'others'}->($token) ]; } else { $bell[$q] = [ $option{'others'} ]; } } $skip[$q] = 0; push @queue, $q; } } while (@queue) { # generate $skip[$q] backward function and complete $bell[$q] $q = shift @queue; foreach $token (keys %{$tree[$q]}) { $t = $tree[$q]->{$token}; push @queue, $t; if (defined $bell[$t]) { $skip[$t] = 0; if (UNIVERSAL::isa($bell[$t]->[0], 'ARRAY')) { # shortening property of the rules $s = $skip[$t]; foreach $trick (split //, $bell[$t]->[0]->[1]) { until (defined $tree[$s]->{$trick}) { # loops only if not in the root ^^ push @{$bell[$t]}, @{$bell[$s]}; $s = $skip[$s]; } $s = $tree[$s]->{$trick}; } $skip[$t] = $s; $bell[$t]->[0] = $bell[$t]->[0]->[0]; } } else { $s = $skip[$q]; $bell[$t] = [ @{$bell[$q]} ]; # unique reference quite important ^^ until (defined $tree[$s]->{$token}) { # extremely tricky ... push @{$bell[$t]}, @{$bell[$s]}; $s = $skip[$s]; } $skip[$t] = $tree[$s]->{$token}; } } $tree[$q] = $null_hash unless keys %{$tree[$q]}; # economize with memory } for ($q = 1; $q < @bell; $q++) { # optimize the bell function for $q > 0 if (grep { UNIVERSAL::isa($_, 'CODE') } @{$bell[$q]}) { $no_code = 0; } elsif (defined $option{'join'}) { $bell[$q] = join $option{'join'}, @{$bell[$q]}; next; } if (@{$bell[$q]} == 1) { $bell[$q] = $bell[$q]->[0]; } else { $bell[$q] = $null_list if @{$bell[$q]} == 0; $no_list = 0; } } return bless { 'current' => 0, 'tree' => \@tree, 'bell' => \@bell, 'skip' => \@skip, 'null' => { 'list' => $null_list, 'hash' => $null_hash }, 'join' => $option{'join'}, 'no_code' => $no_code, 'no_list' => $no_list, }, $cls; } sub process ($@) { # returns the list of search results performed by Mapper my $obj = shift @_; my (@returns, $phrase, $token, $q); $q = $obj->{'current'}; if ($obj->{'no_list'}) { foreach $phrase (@_) { foreach $token (split //, $phrase) { until (defined $obj->{'tree'}[$q]->{$token}) { push @returns, $obj->{'bell'}[$q]; $q = $obj->{'skip'}[$q]; } $q = $obj->{'tree'}[$q]->{$token}; } } } else { foreach $phrase (@_) { foreach $token (split //, $phrase) { until (defined $obj->{'tree'}[$q]->{$token}) { push @returns, ref $obj->{'bell'}[$q] eq 'ARRAY' ? @{$obj->{'bell'}[$q]} : $obj->{'bell'}[$q]; $q = $obj->{'skip'}[$q]; } $q = $obj->{'tree'}[$q]->{$token}; } } } $obj->{'current'} = $q; return @returns; } sub recover ($;$$) { # returns the 'in-progress' search result and resets Mapper my ($obj, $r, $q) = @_; my (@returns); $q = $obj->{'current'} unless defined $q; if ($obj->{'no_list'}) { until ($q == 0) { push @returns, $obj->{'bell'}[$q]; $q = $obj->{'skip'}[$q]; } } else { until ($q == 0) { push @returns, ref $obj->{'bell'}[$q] eq 'ARRAY' ? @{$obj->{'bell'}[$q]} : $obj->{'bell'}[$q]; $q = $obj->{'skip'}[$q]; } } $obj->{'current'} = defined $r ? $r : 0; return @returns; } sub compute ($@) { my $obj = shift @_; my (@returns, $phrase, $token, $q); $obj->recover(); foreach $phrase (@_) { foreach $token (split //, $phrase) { push @returns, [$token, $obj->{'current'}]; push @{$returns[-1]}, [$obj->process($token)]; $q = $obj->{'current'}; push @{$returns[-1]}, $q, $obj->{'bell'}[$q], $obj->{'skip'}[$q]; } } push @returns, ['recover', $obj->{'current'}]; push @{$returns[-1]}, [$obj->recover()]; $q = $obj->{'current'}; push @{$returns[-1]}, $q, $obj->{'bell'}[$q], ($q == 0 ? 'undef' : $obj->{'skip'}[$q]); return @returns; } sub dumper ($;$) { my ($obj, $ref) = @_; $ref = ['L', 'H', 'mapper'] unless defined $ref; require Data::Dumper; return Data::Dumper->new([$obj->{'null'}{'list'}, $obj->{'null'}{'hash'}, $obj], $ref); } sub describe ($;$) { my ($obj, $ref) = @_; my ($q, $nodes, $edges, $skips, $bells, $paths, $lists); $nodes = @{$obj->{'tree'}}; $edges = []; $lists = []; if ($obj->{'no_list'}) { for ($q = 0; $q < @{$obj->{'tree'}}; $q++) { $lists->[$q * 3] = scalar %{$obj->{'tree'}[$q]}; $lists->[$q * 3] =~ m{^([0-9]+)(?:/([0-9]+))?$}; $edges->[0] += scalar keys %{$obj->{'tree'}[$q]}; $lists->[$q * 3] .= " " . keys %{$obj->{'tree'}[$q]}; if (defined $2) { $edges->[1] += $1; $edges->[2] += $2; } else { $paths++ unless $1; } $lists->[$q * 3 + 1] = $obj->{'bell'}[$q] eq "" ? 0 : 1; $bells += $lists->[$q * 3 + 1]; next if $q == 0; $lists->[$obj->{'skip'}[$q] * 3 + 2]++; $skips++ unless $obj->{'skip'}[$q] == 0; } } else { for ($q = 0; $q < @{$obj->{'tree'}}; $q++) { $lists->[$q * 3] = scalar %{$obj->{'tree'}[$q]}; $lists->[$q * 3] =~ m{^([0-9]+)(?:/([0-9]+))?$}; $edges->[0] += scalar keys %{$obj->{'tree'}[$q]}; $lists->[$q * 3] .= " " . keys %{$obj->{'tree'}[$q]}; if (defined $2) { $edges->[1] += $1; $edges->[2] += $2; } else { $paths++ unless $1; } $lists->[$q * 3 + 1] = ref $obj->{'bell'}[$q] eq 'ARRAY' ? scalar @{$obj->{'bell'}[$q]} : 1; $bells += $lists->[$q * 3 + 1]; next if $q == 0; $lists->[$obj->{'skip'}[$q] * 3 + 2]++; $skips++ unless $obj->{'skip'}[$q] == 0; } } my $return = {'nodes' => $nodes, 'edges' => $edges->[0], 'slots' => $edges->[1] . "/" . $edges->[2], 'skips' => $skips, 'bells' => $bells, 'paths' => $paths, 'lists' => $lists}; if (defined $ref) { $ref = *STDERR if ref $ref ne 'GLOB'; print $ref ( join ", ", map { ( defined $return->{$_} ? $return->{$_} : 'undef' ) . " " . $_ } grep { $_ ne 'lists' } keys %{$return} ) . "\n"; } return $return; } sub encode ($$$;$) { my ($cls, $text, $encoder, $enc) = @_; my ($mapper, $join); local %option = exists $options{caller 0} ? %{$options{caller 0}} : (); # options be local due to whisper require Encode; unless (Encode::is_utf8($text)) { whisper "The input text is not in Perl's internal utf8 .. note only, might be fine"; } if ($enc) { unless (Encode::resolve_alias($enc)) { carp "Cannot resolve the proposed '$enc' encoding"; return undef; } $text = Encode::encode($enc, $text); } if (not UNIVERSAL::isa($encoder, 'ARRAY') or grep { defined $_ and not $_->isa($cls) } @{$encoder}) { carp "Expecting a reference to an array of '$cls' objects"; return undef; } foreach $mapper (@{$encoder}) { last unless defined $mapper; $join = defined $mapper->{'join'} ? $mapper->{'join'} : defined $option{'join'} ? $option{'join'} : ""; if ($mapper->{'no_code'}) { $text = join $join, $mapper->process($text), $mapper->recover(); } else { $text = join $join, map { UNIVERSAL::isa($_, 'CODE') ? $_->() : $_ } $mapper->process($text), $mapper->recover(); } } return $text; } sub decode ($$$;$) { my ($cls, $text, $decoder, $enc) = @_; my ($mapper, $join); local %option = exists $options{caller 0} ? %{$options{caller 0}} : (); # options be local due to tradition ^^ require Encode; $enc = 'utf8' unless $enc; unless (Encode::resolve_alias($enc)) { carp "Cannot resolve the proposed '$enc' encoding"; return undef; } if (not UNIVERSAL::isa($decoder, 'ARRAY') or grep { defined $_ and not $_->isa($cls) } @{$decoder}) { carp "Expecting a reference to an array of $cls objects"; return undef; } foreach $mapper (@{$decoder}) { last unless defined $mapper; $join = defined $mapper->{'join'} ? $mapper->{'join'} : defined $option{'join'} ? $option{'join'} : ""; if ($mapper->{'no_code'}) { $text = join $join, $mapper->process($text), $mapper->recover(); } else { $text = join $join, map { UNIVERSAL::isa($_, 'CODE') ? $_->() : $_ } $mapper->process($text), $mapper->recover(); } } return Encode::is_utf8($text) ? $text : Encode::decode($enc, $text); } 1; __END__ =head1 NAME Encode::Mapper - Intuitive, yet efficient mappings for Encode =head1 REVISION $Revision: 663 $ $Date: 2008-08-11 16:21:54 +0200 (Mon, 11 Aug 2008) $ =head1 SYNOPSIS use Encode::Mapper; ############################################# Enjoy the ride ^^ use Encode::Mapper ':others', ':silent'; # syntactic sugar for compiler options .. Encode::Mapper->options ( # .. equivalent, see more in the text 'others' => sub { shift }, 'silent' => 1, ); Encode::Mapper->options ( # .. resetting, but not to use 'use' !!! 'others' => undef, 'silent' => 0 ); ## Types of rules for mapping the data and controlling the engine's configuration ##### @rules = ( 'x', 'y', # single 'x' be 'y', unless greediness prefers .. 'xx', 'Y', # .. double 'x' be 'Y' or other rules 'uc(x)x', sub { 'sorry ;)' }, # if 'x' follows 'uc(x)', be sorry, else .. 'uc(x)', [ '', 'X' ], # .. alias this *engine-initial* string 'xuc(x)', [ '', 'xX' ], # likewise, alias for the 'x' prefix 'Xxx', [ sub { $i++; '' }, 'X' ], # count the still married 'x' ); ## Constructors of the engine, i.e. one Encode::Mapper instance ####################### $mapper = Encode::Mapper->compile( @rules ); # engine constructor $mapper = Encode::Mapper->new( @rules ); # equivalent alias ## Elementary performance of the engine ############################################### @source = ( 'x', 'xx', 'xxuc(x)', 'xxx', '', 'xx' ); # distribution of the data .. $source = join '', @source; # .. is ignored in this sense @result = ($mapper->process(@source), $mapper->recover()); # the mapping procedure @result = ($mapper->process($source), $mapper->recover()); # completely equivalent $result = join '', map { ref $_ eq 'CODE' ? $_->() : $_ } @result; # maps 'xxxxxuc(x)xxxxx' into ( 'Y', 'Y', '', 'y', CODE(...), CODE(...), 'y' ), .. # .. then converts it into 'YYyy', setting $i == 2 @follow = $mapper->compute(@source); # follow the engine's computation over @source $dumper = $mapper->dumper(); # returns the engine as a Data::Dumper object ## Module's higher API implemented for convenience #################################### $encoder = [ $mapper, Encode::Mapper->compile( ... ), ... ]; # reference to mappers $result = Encode::Mapper->encode($source, $encoder, 'utf8'); # encode down to 'utf8' $decoder = [ $mapper, Encode::Mapper->compile( ... ), ... ]; # reference to mappers $result = Encode::Mapper->decode($source, $decoder, 'utf8'); # decode up from 'utf8' =head1 ABSTRACT Encode::Mapper serves for intuitive, yet efficient construction of mappings for Encode. The module finds direct application in Encode::Arabic. It provides an object-oriented programming interface to convert data consistently, follow the engine's computation, dump the engine using Data::Dumper, etc. =head1 DESCRIPTION It looks like the author of the extension ... ;) preferred giving formal and terse examples to writing English. Please, see L where L is used for solving complex real-world problems. =head2 INTRO AND RULE TYPES The module's core is an algoritm which, from the rules given by the user, builds a finite-state transducer, i.e. an engine performing greedy search in the input stream and producing output data and side effects relevant to the results of the search. Transducers may be linked one with another, thus forming multi-level devices suitable for nontrivial encoding/decoding tasks. The rules declare which input sequences of L to search for, and what to do upon their occurence. If the left-hand side (LHS) of a rule is the longest left-most string out of those applicable on the input, the righ-hand side (RHS) of the rule is evaluated. The RHS defines the corresponding output string, and possibly controls the engine as if the extra text were prepended before the rest of the input: $A => $X # $A .. literal string # $X .. literal string or subroutine reference $A => [$X, $Y] # $Y .. literal string for which 'length $Y < length $A' The order of the rules does not matter, except when several rules with the same LHS are stated. In such a case, redefinition warning is usually issued before overriding the RHS. =head2 LOW-LEVEL METHODS =over =item compile (I<$class,> @rules) =item compile (I<$class,> $opts, @rules) The constructor of an L instance. The first argument is the name of the class, the rest is the list of rules ... LHS odd elements, RHS even elements, unless the first element is a reference to an array or a hash, which then becomes C<$opts>. If C<$opts> is recognized, it is used to modify the compiler C locally for the engine being constructed. If an option is not overridden, its global setting holds. The compilation algorithm, and the search algorithm itself, were inspired by Aho-Corasick and Boyer-Moore algorithms, and by the studies of finite automata with the restart operation. The engine is implemented in the classical sense, using hashes for the transition function for instance. We expect to improve this to Perl code evaluation, if the speed-up is significant. It is to explore the way Perl's regular expressions would cope with the task, i.e. verify our initial doubts which prevented us from trying. Since L's functionality is much richer than pure search, simulating it completely might be resource-expensive and non-elegant. Therefore, experiment reports are welcome. =item new (I<$class,> @list) Name alias to the C constructor. =item process (I<$obj,> @list) Process the input list with the engine. There is no resetting within the call of the method. Internally, the text in the list is C into L, and there is just no need for the user to C his/hers strings or lines of data. Note the unveiled properties of the L class as well: sub process ($@) { # returns the list of search results performed by Mapper my $obj = shift @_; my (@returns, $phrase, $token, $q); use bytes; # ensures splitting into one-byte tokens $q = $obj->{'current'}; foreach $phrase (@_) { foreach $token (split //, $phrase) { until (defined $obj->{'tree'}[$q]->{$token}) { push @returns, @{$obj->{'bell'}[$q]}; $q = $obj->{'skip'}[$q]; } $q = $obj->{'tree'}[$q]->{$token}; } } $obj->{'current'} = $q; return @returns; } =item recover (I<$obj,> $r, $q) Since the search algorithm is greedy and the engine does not know when the end of the data comes, there must be a method to tell. Normally, C is called on the object without the other two optional parameters setting the initial and the final state, respectively. sub recover ($;$$) { # returns the 'in-progress' search result and resets Mapper my ($obj, $r, $q) = @_; my (@returns); $q = $obj->{'current'} unless defined $q; until ($q == 0) { push @returns, @{$obj->{'bell'}[$q]}; $q = $obj->{'skip'}[$q]; } $obj->{'current'} = defined $r ? $r : 0; return @returns; } =item compute (I<$obj,> @list) Tracks down the computation over the list of data, resetting the engine before and after to its initial state. Developers might like this ;) local $\ = "\n"; local $, = ":\t"; # just define the display foreach $result ($mapper->compute($source)) { # follow the computation print "Token" , $result->[0]; print "Source" , $result->[1]; print "Output" , join " + ", @{$result->[2]}; print "Target" , $result->[3]; print "Bell" , join ", ", @{$result->[4]}; print "Skip" , $result->[5]; } =item dumper (I<$obj,> $ref) The individual instances of L can be stored as revertible data structures. For minimalistic reasons, dumping needs to include explicit short-identifier references to the empty array and the empty hash of the engine. For details, see L. sub dumper ($;$) { my ($obj, $ref) = @_; $ref = ['L', 'H', 'mapper'] unless defined $ref; require Data::Dumper; return Data::Dumper->new([$obj->{'null'}{'list'}, $obj->{'null'}{'hash'}, $obj], $ref); } =item describe (I<$obj,> $ref) Describes the L object and returns a hash of the characteristics. If C<$ref> is defined, the information is also Ced into the C<$ref>erenced stream, or to C if C<$ref> is not a filehandle. =back =head2 HIGH-LEVEL METHODS In the L world, one can work with different encodings and is also provided a function for telling if the data are in Perl's internal utf8 format or not. In the L business, one is encouraged to compile different mappers and stack them on top of each other, getting an easy-to-work-with filtering device. In combination, this module offers the following C and C methods. In their prototypes, C<$encoder>/C<$decoder> represent merely a reference to an array of mappers, although mathematics might do more than that in future implementations ;) Currently, the mappers involved are not reset with C before the computation. See the C<--join> option for more comments on the code: foreach $mapper (@{$_[2]}) { # either $encoder or $decoder $join = defined $mapper->{'join'} ? $mapper->{'join'} : defined $option{'join'} ? $option{'join'} : ""; $text = join $join, map { UNIVERSAL::isa($_, 'CODE') ? $_->() : $_ } $mapper->process($text), $mapper->recover(); } =over =item encode (I<$class,> $text, $encoder, $enc) If C<$enc> is defined, the C<$text> is encoded into that encoding, using L. Then, the C<$encoder>'s engines are applied in series on the data. The returned text should have the utf8 flag off. =item decode (I<$class,> $text, $decoder, $enc) The C<$text> is run through the sequence of engines in C<$decoder>. If the result does not have the utf8 flag on, decoding from C<$enc> is further performed by L. If C<$enc> is not defined, utf8 is assumed. =back =head2 OPTIONS AND EXPORT The language the L engine works on is not given exclusively by the rules passed as parameters to the C or C constructor methods. The nature of the compilation is influenced by the current setting of the following options: =over =item --complement This option accepts a reference to an array declaring rules which are to complement the rules of the constructor. Redefinition warnings are issued only if you redefine within the option's list, not when a rule happens to be overridden during compilation. =item --override Overrides the rules of the constructor. Redefinition warnings are issued, though. You might, for example, want to preserve all XML markup in the data you are going to process through your encoders/decoders: 'override' => [ # override rules of these LHS .. there's no other tricks ^^ ( # combinations of '<' and '>' with the other bytes map { my $x = chr $_; "<" . $x, [ "<" . $x, ">" ], # propagate the '>' sign implying .. ">" . $x, [ $x, ">" ], # .. preservation of the bytes } 0x00..0x3B, 0x3D, 0x3F..0xFF ), ">>", ">", # stop the whole process .. "<>", "<>", # .. do not even start it "><", [ "<", ">" ], # rather than nested '<' and '>', .. "<<", [ "<<", ">" ], ">\\<", [ "<", ">" ], # .. prefer these escape sequences ">\\\\", [ "\\", ">" ], ">\\>", [ ">", ">" ], ">", ">", # singular symbols may migrate right .. "<", "<", # .. or preserve the rest of the data ] =item --others If defined, this option controls how to deal with 'others', i.e. bytes of input for which there is no rule, by defining rules for them. In case this option gets a code reference, the referenced subroutine will be called with the 'other' LHS parameter to get the rule's RHS. Otherwise, a defined scalar value will become the RHS of each 'other' LHS. To preserve the 'other' bytes, you can use 'others' => sub { shift } # preserve every non-treated byte the effect of which is similar to including the C to the C<--complement> rules: 'complement' => [ ( map { ( chr $_ ) x 2 } 0x00..0xFF ), ... ] # ... is your rules You may of course wish to return undefined values if there are any non-treated bytes in the input. In order for the C to be a correct RHS, you have to protect it once more by the C like this: 'others' => sub { sub { undef } } =item --silent Setting it to a true value will prevent any warnings issued during the engine's compilation, mostly reflecting an incorrect or dubious use of a rule. =item --join This option enables less memory-requiring representation of the engines. If this option is defined when the constructor is called, the setting is stored in the instance internally. Any lists of literal RHS which are to be emitted simultaneously from the engine are joined into a string with the option's value, empty lists turn into empty strings. If an engine was compiled with this option defined, the value will be used to join output of C and C, too. If not, either the current value of the option or the empty string will help instead. =back The keywords of options can be in mixed case and/or start with any number of dashes, and the next element in the list is taken as the option's value. There are special keywords, however, beginning with a colon and not gulping down the next element: =over =item :others Equivalent to the code C<< 'others' => sub { shift } >> explained above. =item :silent Equivalent to C<< 'silent' => 1 >>, or rather to the maximum silence if more degrees of it are introduced in the future. =item :join Equivalent to C<< 'join' => '' >>. Use this option if you are going to dump and load the new engine often, and if you do not miss the list-supporting uniformity of C and C. =back Compiler options are associated with package names in the C<%Encode::Mapper::options> variable, and confined to them. While C and C perform the setting with respect to the caller package, accessing the hash directly is neither recommended, nor restricted. There is a nice compile-time invocation of C with the CC< Encode::Mapper LIST> idiom, which you might prefer to explicit method calls. Local modification of the package's global setting that applies just to the engine being constructed is done by supplying the options as an extra parameter to C. use Data::Dump 'dump'; # pretty data printing is below $Encode::Mapper::options{'ByForce'} = { qw ':others - silent errors' }; package ByMethod; # import called at compile time # no warnings, 'silent' is true Encode::Mapper->options('complement' => [ 'X', 'Y' ], 'others' => 'X'); use Encode::Mapper 'silent' => 299_792_458; package main; # import called at compile time # 'non-existent' may exist once print dump %Encode::Mapper::options; use Encode::Mapper ':others', ':silent', 'non-existent', 'one'; # ( # "ByMethod", # { complement => ["X", "Y"], others => "X", silent => 299_792_458 }, # "ByForce", # { ":others" => "-", silent => "errors" }, # "main", # { "non-existent" => "one", others => sub { "???" }, silent => 1 }, # ) =over =item options (I<$class,> @list) If C<$class> is defined, enforces the options in the list globally for the calling package. The return value of this method is the state of the options before the proposed changes were set. If C<$class> is undefined, nothing is set, only the canonized forms of the declared keywords and their values are returned. =item import (I<$class,> @list) This module does not export any symbols. This method just calls C, provided there are some elements in the list. =back =head1 SEE ALSO There are related theoretical studies which the implementation may have touched. You might be interested in Aho-Corasick and Boyer-Moore algorithms as well as in finite automata with the restart operation. L, L, L Encode Arabic: Exercise in Functional Parsing L =head1 AUTHOR Otakar Smrz, L eval { 'E' . ( join '.', qw 'otakar smrz' ) . "\x40" . ( join '.', qw 'mff cuni cz' ) . 'E' } Perl is also designed to make the easy jobs not that easy ;) =head1 COPYRIGHT AND LICENSE Copyright 2003-2007 by Otakar Smrz This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Encode-Arabic-1.9/lib/Encode/Arabic.pm0000444001432400135600000001650311144136727016141 0ustar smrzufal# ###################################################################### Otakar Smrz, 2003/01/23 # # Encodings of Arabic ########################################################################## # $Id: Arabic.pm 808 2009-02-09 23:19:07Z smrz $ package Encode::Arabic; our $VERSION = '1.9' || do { q $Revision: 808 $ =~ /(\d+)/; sprintf "%4.2f", $1 / 100 }; sub import { # perform import as if Encode were used one level before this module if (defined $_[1] and $_[1] eq ':modes') { require Exporter; @ISA = qw 'Exporter'; @EXPORT_OK = qw 'enmode demode'; __PACKAGE__->export_to_level(1, $_[0], 'enmode', 'demode'); splice @_, 1, 1; } require Encode; Encode->export_to_level(1, @_); } use lib '..'; use Encode::Arabic::ArabTeX; use Encode::Arabic::ArabTeX::RE; use Encode::Arabic::ArabTeX::Verbatim; use Encode::Arabic::ArabTeX::ZDMG; use Encode::Arabic::ArabTeX::ZDMG::RE; use Encode::Arabic::Buckwalter; use Encode::Arabic::Parkinson; sub enmode ($@) { my $enc = shift; my $obj = Encode::find_encoding($enc); unless (defined $obj){ require Carp; Carp::croak("Unknown encoding '$enc'"); } $obj->enmode(@_); } sub demode ($@) { my $enc = shift; my $obj = Encode::find_encoding($enc); unless (defined $obj){ require Carp; Carp::croak("Unknown encoding '$enc'"); } $obj->demode(@_); } 1; __END__ =head1 NAME Encode::Arabic - Encodings of Arabic =head1 REVISION $Revision: 808 $ $Date: 2009-02-10 00:19:07 +0100 (Tue, 10 Feb 2009) $ =head1 SYNOPSIS use Encode::Arabic; # imports just like 'use Encode' even with options would while ($line = <>) { # renders the ArabTeX notation for Arabic both in the .. print encode 'utf8', decode 'arabtex', $line; # .. Arabic script proper and the print encode 'utf8', decode 'arabtex-zdmg', $line; # .. Latin phonetic transcription } # 'use Encode::Arabic ":modes"' would export the functions controlling the conversion modes Encode::Arabic::demode 'arabtex', 'default'; Encode::Arabic::enmode 'buckwalter', 'full', 'xml', 'strip off kashida'; # Arabic in lower ASCII transliterations <--> Arabic script in Perl's internal encoding $string = decode 'ArabTeX', $octets; $octets = encode 'Buckwalter', $string; $string = decode 'Buckwalter', $octets; $octets = encode 'ArabTeX', $string; # Arabic in lower ASCII transliterations <--> Latin phonetic transcription, Perl's utf8 $string = decode 'Buckwalter', $octets; $octets = encode 'ArabTeX', $string; $string = decode 'ArabTeX-ZDMG', $octets; $octets = encode 'utf8', $string; =head1 DESCRIPTION This module is a wrapper for various implementations of the encoding systems used for the Arabic language and covering even some non-Arabic extensions to the Arabic script. The included modules fit in the philosophy of L and can be used directly with the L module. =head2 LIST OF ENCODINGS =over =item ArabTeX ArabTeX multi-character notation for Arabic / Perl's internal format for the Arabic script L, uses L =item ArabTeX-RE Deprecated method using sequential regular-expression substitutions. Limited in scope over the ArabTeX notation and non-efficient in data processing, still, not requiring the L module. L =item ArabTeX-Verbatim ArabTeX multi-character I notation for Arabic / Perl's internal format for the Arabic script L, uses L =item ArabTeX-ZDMG ArabTeX multi-character notation for Arabic / Perl's internal format for the Latin phonetic trascription in the ZDMG style L, uses L =item ArabTeX-ZDMG-RE Deprecated method using sequential regular-expression substitutions. Limited in scope over the ArabTeX notation and non-efficient in data processing, still, not requiring the L module. L =item Buckwalter Buckwalter one-to-one notation for Arabic / Perl's internal format for the Arabic script L =item Parkinson Parkinson one-to-one notation for Arabic / Perl's internal format for the Arabic script L =back There are generic aliases to these provided by L. Case does not matter and all characters of the class C<[ _-]> are interchangable. Note that the standard L module already deals with several other single-byte encoding schemes for Arabic popular with whichever operating system, be it *n*x, Windows, DOS or Macintosh. See L and L for their identification names and aliases. =head2 EXPORTS & MODES The module exports as if C also appeared in the calling package. The C options are just delegated to L and imports performed properly, with the exception of the C<:modes> option coming first in the list. In such a case, the following functions will be introduced into the namespace of the importing package: =over =item enmode ($enc, @list) Calls the C method associated with the given C<$enc> encoding, and passes the C<@list> to it. The idea is similar to the C functions and methods of the L and L modules, respectively. Used for control over the modes of conversion. =item demode ($enc, @list) Analogous to C, but calling the appropriate C method. See the individual implementations of the listed encodings. =back =head1 SEE ALSO Encode::Arabic Online Interface L Encode Arabic Project L ElixirFM Online Interface L ElixirFM Project L Klaus Lagally's ArabTeX L Tim Buckwalter's Qamus L Arabeyes Arabic Unix Project L Lecture Notes on Arabic NLP L L, L, L, L L L L, L L =head1 AUTHOR Otakar Smrz, L eval { 'E' . ( join '.', qw 'otakar smrz' ) . "\x40" . ( join '.', qw 'mff cuni cz' ) . 'E' } Perl is also designed to make the easy jobs not that easy ;) =head1 COPYRIGHT AND LICENSE Copyright 2003-2009 by Otakar Smrz This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Encode-Arabic-1.9/README0000444001432400135600000000163111144136727013333 0ustar smrzufalEncode Arabic - Encodings of Arabic Encode Arabic is a wrapper for modules implementing the various encoding systems used for the Arabic language and covering even some non-Arabic extensions to the Arabic script. The modules fit in the philosophy of Encode::Encoding and can be used directly with the Encode module. INSTALLATION To install this module, run the following from the command line: perl Build.PL Build Build test Build install or use the Perl Package Manager instead and run ppm to enter the interactive interface for module management. DOCUMENTATION Visit the site http://search.cpan.org/dist/Encode-Arabic/ or run perldoc [file|module] in order to get more information on this and related software. COPYRIGHT AND LICENCE Copyright (C) 2003-2009 Otakar Smrz This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Encode-Arabic-1.9/Changes0000444001432400135600000002010511144136727013743 0ustar smrzufalRevision: 720 Author: smrz Date: 00:24:41, October 4, 2008 Message: Test improvements ... ---- Revision: 719 Author: smrz Date: 23:42:24, October 3, 2008 Message: Test improvements ... ---- Revision: 717 Author: smrz Date: 00:28:12, October 3, 2008 Message: Font-fixing option ^^ ---- Revision: 705 Author: smrz Date: 17:26:23, September 12, 2008 Message: Styling ... ---- Revision: 676 Author: smrz Date: 13:25:26, August 14, 2008 Message: Minor fixes ... ---- Revision: 675 Author: smrz Date: 02:25:15, August 14, 2008 Message: Encode Arabic Online ^^ ---- Revision: 674 Author: smrz Date: 11:39:03, August 13, 2008 Message: End of line styles ... ---- Revision: 673 Author: smrz Date: 23:33:48, August 12, 2008 Message: CGI application ^^ ---- Revision: 671 Author: smrz Date: 22:39:36, August 12, 2008 Message: CGI application ^^ ---- Revision: 670 Author: smrz Date: 21:51:05, August 12, 2008 Message: Moving files around ... ---- Revision: 669 Author: smrz Date: 21:42:56, August 12, 2008 Message: CGI application ^^ ---- Revision: 667 Author: smrz Date: 15:36:28, August 12, 2008 Message: Initial , , , assimilation ---- Revision: 666 Author: smrz Date: 11:23:23, August 12, 2008 Message: Improvements ... ---- Revision: 665 Author: smrz Date: 01:03:51, August 12, 2008 Message: Initial , , , assimilation ---- Revision: 664 Author: smrz Date: 16:39:16, August 11, 2008 Message: Simplifying ... ---- Revision: 663 Author: smrz Date: 16:21:54, August 11, 2008 Message: Merging Encode-Mapper with Encode-Arabic ---- Revision: 591 Author: smrz Date: 14:43:17, June 30, 2008 Message: Improved orthography ---- Revision: 582 Author: smrz Date: 00:41:41, June 20, 2008 Message: Minor changes ... ---- Revision: 550 Author: smrz Date: 16:22:13, May 6, 2008 Message: Executables ... ---- Revision: 455 Author: smrz Date: 21:22:04, December 14, 2007 Message: SVN properties ---- Revision: 450 Author: smrz Date: 14:52:22, December 10, 2007 Message: Changes ... ---- Revision: 449 Author: smrz Date: 13:46:26, December 10, 2007 Message: Encode Arabic 1.7 ---- Revision: 448 Author: smrz Date: 23:40:47, December 8, 2007 Message: Fixing and ---- Revision: 420 Author: smrz Date: 10:37:55, September 26, 2007 Message: Formal fixes ---- Revision: 419 Author: smrz Date: 21:06:29, September 25, 2007 Message: New releases ---- Revision: 417 Author: smrz Date: 15:03:33, September 24, 2007 Message: Release 1.6 ---- Revision: 416 Author: smrz Date: 11:34:49, Saturday, 22 September, 2007 Message: Perl dependent ---- Revision: 412 Author: smrz Date: 21:30:22, Thursday, 20 September, 2007 Message: Build dependencies ---- Revision: 370 Author: smrz Date: 01:01:11, Saturday, 07 July, 2007 Message: Uninflectedness and tags ---- Revision: 368 Author: smrz Date: 14:31:04, Thursday, 05 July, 2007 Message: CPAN warning on non-portability ---- Revision: 343 Author: smrz Date: 11:25:02, Wednesday, 13 June, 2007 Message: Line-oriented interaction ---- Revision: 339 Author: smrz Date: 02:26:53, Friday, 08 June, 2007 Message: Fixes in roots ---- Revision: 338 Author: smrz Date: 03:30:40, Thursday, 07 June, 2007 Message: Morphophonemic "yY" in effect ---- Revision: 308 Author: smrz Date: 20:58:57, Wednesday, 16 May, 2007 Message: Improving "mikwYT" ---- Revision: 219 Author: smrz Date: 12:34:22, Friday, 23 February, 2007 Message: Re-organizing ---- Revision: 197 Author: smrz Date: 15:12:25, Monday, 05 February, 2007 Message: Extensions and improvements ---- Revision: 187 Author: smrz Date: 14:27:14, Friday, 26 January, 2007 Message: Portable builds ---- Revision: 179 Author: smrz Date: 01:23:25, Sunday, 14 January, 2007 Message: Fixing documentation ---- Revision: 166 Author: smrz Date: 19:17:26, Thursday, 28 December, 2006 Message: Minor changes ---- Revision: 165 Author: smrz Date: 12:06:40, Thursday, 21 December, 2006 Message: Defining install_base parameter ---- Revision: 162 Author: smrz Date: 01:16:10, Saturday, 16 December, 2006 Message: Various fixes ---- Revision: 159 Author: smrz Date: 23:45:41, Wednesday, 13 December, 2006 Message: svn:properties ---- Revision: 147 Author: smrz Date: 11:21:52, Thursday, 16 November, 2006 Message: Release 1.4 ---- Revision: 146 Author: smrz Date: 11:13:07, Thursday, 16 November, 2006 Message: Requires with 1.4 ---- Revision: 144 Author: smrz Date: 21:18:37, Wednesday, 15 November, 2006 Message: Finalizing documentation ---- Revision: 143 Author: smrz Date: 02:16:57, Wednesday, 15 November, 2006 Message: Documentation and testing ---- Revision: 142 Author: smrz Date: 12:02:20, Tuesday, 14 November, 2006 Message: Documentation changes ---- Revision: 128 Author: smrz Date: 19:09:01, Saturday, 04 November, 2006 Message: Improvements ---- Revision: 123 Author: smrz Date: 01:59:22, Saturday, 28 October, 2006 Message: Interfaces ---- Revision: 118 Author: smrz Date: 13:32:09, Thursday, 19 October, 2006 Message: Exporting and removing Encode-Korean ---- Revision: 117 Author: smrz Date: 02:14:19, Thursday, 19 October, 2006 Message: Separated ^^ ---- Revision: 115 Author: smrz Date: 22:27:27, Wednesday, 18 October, 2006 Message: Fooling around ---- Revision: 114 Author: smrz Date: 22:15:43, Wednesday, 18 October, 2006 Message: Distributions ---- RCS file: /home/CVSROOT/smrz/perl/Encode/Arabic/Arabic.pm,v Working file: Arabic.pm head: 1.16 branch: locks: strict access list: symbolic names: Encode-Arabic-1-16: 1.16 Encode-Arabic-1-14: 1.14 Encode-Arabic-1-13: 1.13 Encode-Arabic-1-12: 1.12 Encode-Arabic-1-11: 1.11 Encode-Arabic-1-10: 1.10 Encode-Arabic-1-09: 1.9 start: 1.1.1.1 smrz: 1.1.1 keyword substitution: kv total revisions: 17; selected revisions: 17 description: ---------------------------- revision 1.16 date: 2006/02/04 01:04:03; author: smrz; state: Exp; lines: +12 -4 Encode::Arabic::Parkinson ---------------------------- revision 1.15 date: 2005/11/28 00:46:40; author: smrz; state: Exp; lines: +7 -7 Incognito ^^ ---------------------------- revision 1.14 date: 2005/10/02 16:08:04; author: smrz; state: Exp; lines: +7 -5 Fixes in Encode::Arabic::ArabTeX ---------------------------- revision 1.13 date: 2005/07/22 20:00:02; author: smrz; state: Exp; lines: +14 -5 Including Encode::Arabic::ArabTeX::Verbatim ---------------------------- revision 1.12 date: 2005/02/15 17:01:23; author: smrz; state: Exp; lines: +5 -5 Extensions in Encode::Arabic::ArabTeX ---------------------------- revision 1.11 date: 2004/08/21 11:02:10; author: smrz; state: Exp; lines: +80 -12 Implementation of modes ---------------------------- revision 1.10 date: 2004/01/12 22:10:28; author: smrz; state: Exp; lines: +4 -4 Improvements to the installation and Encode::Arabic::ArabTeX ---------------------------- revision 1.9 date: 2003/09/08 19:43:00; author: smrz; state: Exp; lines: +4 -4 Encode::Arabic Online Interface ---------------------------- revision 1.8 date: 2003/09/02 17:21:02; author: smrz; state: Exp; lines: +10 -8 Buckwalter needs use ;), Encode::Arabic On-Line ---------------------------- revision 1.7 date: 2003/08/26 11:06:17; author: smrz; state: Exp; lines: +28 -9 Improved documentation ---------------------------- revision 1.6 date: 2003/08/21 19:53:08; author: smrz; state: Exp; lines: +103 -23 Inclusion of all modules, plus documentation ---------------------------- revision 1.5 date: 2003/08/04 09:36:02; author: smrz; state: Exp; lines: +79 -79 UNIX EOL in all files ---------------------------- revision 1.4 date: 2003/07/23 13:58:59; author: smrz; state: Exp; lines: +0 -0 Tests, fix of is_utf8 ---------------------------- revision 1.3 date: 2003/07/21 22:48:27; author: smrz; state: Exp; lines: +15 -7 Module's 'use Encode' mimics, newer POD ---------------------------- revision 1.2 date: 2003/07/20 22:25:00; author: smrz; state: Exp; lines: +5 -7 Improvement in the module and POD ---------------------------- revision 1.1 date: 2003/07/19 19:31:01; author: smrz; state: Exp; branches: 1.1.1; Initial revision ---------------------------- revision 1.1.1.1 date: 2003/07/19 19:31:01; author: smrz; state: Exp; lines: +0 -0 Initial h2xs adaptation ============================================================================= Encode-Arabic-1.9/Build.PL0000444001432400135600000000106511144136727013750 0ustar smrzufaluse Module::Build; use strict; use warnings; my $build = Module::Build->new( module_name => 'Encode::Arabic', license => 'perl', dist_author => 'Otakar Smrz ', dist_version_from => 'lib/Encode/Arabic.pm', requires => { 'perl' => 5.008, }, build_requires => { 'Test::More' => 0, }, add_to_cleanup => [ 'Encode-Arabic-*' ], ); $build->create_build_script(); Encode-Arabic-1.9/META.yml0000444001432400135600000000222011144136727013717 0ustar smrzufal--- name: Encode-Arabic version: 1.9 author: - 'Otakar Smrz ' abstract: Encodings of Arabic license: perl resources: license: http://dev.perl.org/licenses/ requires: perl: 5.008 build_requires: Test::More: 0 provides: Encode::Arabic: file: lib/Encode/Arabic.pm version: 1.9 Encode::Arabic::ArabTeX: file: lib/Encode/Arabic/ArabTeX.pm version: 7.17 Encode::Arabic::ArabTeX::RE: file: lib/Encode/Arabic/ArabTeX/RE.pm version: 1.62 Encode::Arabic::ArabTeX::Verbatim: file: lib/Encode/Arabic/ArabTeX/Verbatim.pm version: 7.17 Encode::Arabic::ArabTeX::ZDMG: file: lib/Encode/Arabic/ArabTeX/ZDMG.pm version: 8.08 Encode::Arabic::ArabTeX::ZDMG::RE: file: lib/Encode/Arabic/ArabTeX/ZDMG/RE.pm version: 1.43 Encode::Arabic::Buckwalter: file: lib/Encode/Arabic/Buckwalter.pm version: 1.79 Encode::Arabic::Parkinson: file: lib/Encode/Arabic/Parkinson.pm version: 1.79 Encode::Mapper: file: lib/Encode/Mapper.pm version: 1.6 generated_by: Module::Build version 0.2808 meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.2.html version: 1.2 Encode-Arabic-1.9/MANIFEST0000444001432400135600000000130411144136727013601 0ustar smrzufalBuild.PL bin/decode.pl bin/encode.pl Changes lib/Encode/Arabic.pm lib/Encode/Arabic/ArabTeX.pm lib/Encode/Arabic/ArabTeX/RE.pm lib/Encode/Arabic/ArabTeX/Verbatim.pm lib/Encode/Arabic/ArabTeX/ZDMG.pm lib/Encode/Arabic/ArabTeX/ZDMG/RE.pm lib/Encode/Arabic/Buckwalter.pm lib/Encode/Arabic/Parkinson.pm lib/Encode/Mapper.pm MANIFEST META.yml README t/00-load.t t/boilerplate.t t/Encode-Arabic-ArabTeX-RE.t t/Encode-Arabic-ArabTeX-Verbatim.t t/Encode-Arabic-ArabTeX-ZDMG-RE.t t/Encode-Arabic-ArabTeX-ZDMG.t t/Encode-Arabic-ArabTeX.t t/Encode-Arabic-Buckwalter.t t/Encode-Arabic-Parkinson.t t/Encode-Arabic.t t/Encode-Mapper-1.t t/Encode-Mapper-2.t t/Encode-Mapper-3.t t/Encode-Mapper-4.t t/pod-coverage.t t/pod.t