Encode-ZapCP1252-0.33000755000767000024 011663101120 13465 5ustar00davidstaff000000000000Encode-ZapCP1252-0.33/Build.PL000444000767000024 144211663101120 15117 0ustar00davidstaff000000000000use Module::Build; my $build = Module::Build->new( module_name => 'Encode::ZapCP1252', license => 'perl', create_makefile_pl => 'traditional', requires => { 'perl' => 5.006_002 }, configure_requires => { 'Module::Build' => '0.36' }, recommends => { 'Test::Pod' => '1.41', 'Encode' => 0, }, build_requires => { 'Test::More' => '0.17', 'Module::Build' => '0.36', }, meta_merge => { resources => { homepage => 'http://search.cpan.org/dist/Encode-CP1252/', bugtracker => 'http://github.com/theory/encode-cp1252/issues/', repository => 'http://github.com/theory/encode-cp1252/tree', } }, ); $build->create_build_script; Encode-ZapCP1252-0.33/Changes000444000767000024 530111663101120 15114 0ustar00davidstaff000000000000Revision history for Perl extension Encode::ZapCP1252 0.33 2011-11-23T05:19:36Z - Require Test::Pod 1.41 to support `L` syntax in the Pod. - Support for decoded strings is supported only in 5.8.8 and higher due to bugs in the handling of UTF-8 characters in regular expressions prior to that release. 0.32 2010-06-16T22:45:06 - Fixed bug where `zap_cp1252` was failing to return a value when called in a list or scalar context. 0.31 2010-06-14T03:09:06 - Fixed test failures on Perls less than 5.10. 0.30 2010-06-12T18:05:38 - The conversion functions now ignore `undef` arguments and just return without doing anything. - Strings are no longer modifed in-place unless the conversion subroutines are called in a void context. - The conversion functions may optionally be called with no arugment when run in Perl 5.10 or higher, in which case they will instead act on `$_`. 0.20 2010-06-12T00:39:35 - Added `local` to examples of changing the maping tables. - When the Encode module is insatlled, zapping and fixing CP1252 gremlins now works in decoded strings, too. - For convenience, the functions now return the strings they've modified. - Shipping with a traditional `Makefile.PL` rather than one that passes through to Module::Build. - Moved repository to [GitHub](http://github.com/theory/encode-zapcp1252). 0.12 2008-06-23T17:48:04 - Fixed pasto in the "Support" section of the docs. - Fixed a typo in the "Synopsis" section of the docs, thaks to David Beaudet. - Fixed the 5.6.2 requirement to be properly detected in Perl 5.5. Thanks to Slaven Rezic for the report. 0.11 2008-05-03T21:33:40 - Removed bogus test.out file from the distribution. No idea how that got in there. - Added Module::Build to `build_requires` in Build.PL. - Fixed another Perl 5.6.2 test failure reported by David Cantrell. - Added Encode to the `recommends` parameter in Build.PL. 0.10 2008-05-02T18:32:27 - Added `fix_cp1252()`, which converts CP1252 gremlins to their UTF-8 equivalents. - Made the character mapping hashes into `our` variables, instead of lexicals, so that they can be messed with externally. Suggested by Max Maischen. - Added link to source code repository. - Now requiring Perl 5.6.2 or greater. 0.02 2008-05-02T04:50:04 - Fixed documentation typo. - Added the "configure_requires", and "recommends" parameters to Build.PL. - Fixed compatibility issue with Perl 5.6.2. 0.01 2005-10-04T23:20:26 - Initial public release. Encode-ZapCP1252-0.33/Makefile.PL000444000767000024 63611663101120 15561 0ustar00davidstaff000000000000# Note: this file was auto-generated by Module::Build::Compat version 0.3800 require 5.006002; use ExtUtils::MakeMaker; WriteMakefile ( 'NAME' => 'Encode::ZapCP1252', 'VERSION_FROM' => 'lib/Encode/ZapCP1252.pm', 'PREREQ_PM' => { 'Module::Build' => '0.36', 'Test::More' => '0.17' }, 'INSTALLDIRS' => 'site', 'EXE_FILES' => [], 'PL_FILES' => {} ) ; Encode-ZapCP1252-0.33/MANIFEST000444000767000024 22711663101120 14734 0ustar00davidstaff000000000000Build.PL Changes lib/Encode/ZapCP1252.pm MANIFEST This list of files README t/base.t t/decoded.t t/perl-510.t t/pod.t Makefile.PL META.yml META.json Encode-ZapCP1252-0.33/META.json000444000767000024 260511663101120 15246 0ustar00davidstaff000000000000{ "abstract" : "Zap Windows Western Gremlins", "author" : [ "David E. Wheeler " ], "dynamic_config" : 1, "generated_by" : "Module::Build version 0.38, CPAN::Meta::Converter version 2.112150", "license" : [ "perl_5" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : "2" }, "name" : "Encode-ZapCP1252", "prereqs" : { "build" : { "requires" : { "Module::Build" : "0.36", "Test::More" : "0.17" } }, "configure" : { "requires" : { "Module::Build" : "0.36" } }, "runtime" : { "recommends" : { "Encode" : 0, "Test::Pod" : "1.41" }, "requires" : { "perl" : "5.006002" } } }, "provides" : { "Encode::ZapCP1252" : { "file" : "lib/Encode/ZapCP1252.pm", "version" : "0.33" } }, "release_status" : "stable", "resources" : { "bugtracker" : { "web" : "http://github.com/theory/encode-cp1252/issues/" }, "homepage" : "http://search.cpan.org/dist/Encode-CP1252/", "license" : [ "http://dev.perl.org/licenses/" ], "repository" : { "url" : "http://github.com/theory/encode-cp1252/tree" } }, "version" : "0.33" } Encode-ZapCP1252-0.33/META.yml000444000767000024 146211663101120 15076 0ustar00davidstaff000000000000--- abstract: 'Zap Windows Western Gremlins' author: - 'David E. Wheeler ' build_requires: Module::Build: 0.36 Test::More: 0.17 configure_requires: Module::Build: 0.36 dynamic_config: 1 generated_by: 'Module::Build version 0.38, CPAN::Meta::Converter version 2.112150' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: 1.4 name: Encode-ZapCP1252 provides: Encode::ZapCP1252: file: lib/Encode/ZapCP1252.pm version: 0.33 recommends: Encode: 0 Test::Pod: 1.41 requires: perl: 5.006002 resources: bugtracker: http://github.com/theory/encode-cp1252/issues/ homepage: http://search.cpan.org/dist/Encode-CP1252/ license: http://dev.perl.org/licenses/ repository: http://github.com/theory/encode-cp1252/tree version: 0.33 Encode-ZapCP1252-0.33/README000444000767000024 264511663101120 14511 0ustar00davidstaff000000000000Encode/CP1252 version 0.33 ========================== Have you ever been processing a Web form submit, assuming that the incoming text was encoded in ISO-8859-1 (Latin-1), only to end up with a bunch of junk because someone pasted in content from Microsoft Word? Well, this is because Microsoft uses a superset of the Latin-1 encoding called "Windows Western" or "CP1252". So mostly things will come out right, but a few things--like curly quotes, m-dashes, ellipses, and the like--will not. The differences are well-known; you see a nice chart at documenting the differences on [Wikipedia](http://en.wikipedia.org/wiki/Windows-1252). Of course, that won't really help you. So this library's module, Encode::ZapCP1252, provides subroutines for removing Windows Western Gremlins from strings, turning them into their appropriate UTF-8 or ASCII approximations: my $clean_latin1 = zap_cp1252 $latin1_text; my $fixed_utf8 = fix_cp1252 $utf8_text; Installation ------------ To install this module, type the following: perl Build.PL ./Build ./Build test ./Build install Or, if you don't have Module::Build installed, type the following: perl Makefile.PL make make test make install Copyright and Licence --------------------- Copyright (c) 2005-2010 David E. Wheeler. Some Rights Reserved. This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Encode-ZapCP1252-0.33/lib000755000767000024 011663101120 14233 5ustar00davidstaff000000000000Encode-ZapCP1252-0.33/lib/Encode000755000767000024 011663101120 15430 5ustar00davidstaff000000000000Encode-ZapCP1252-0.33/lib/Encode/ZapCP1252.pm000444000767000024 2650411663101120 17441 0ustar00davidstaff000000000000package Encode::ZapCP1252; use strict; require Exporter; use vars qw($VERSION @ISA @EXPORT); use 5.006_002; $VERSION = '0.33'; @ISA = qw(Exporter); @EXPORT = qw(zap_cp1252 fix_cp1252); use constant PERL588 => $] >= 5.008_008; require Encode if PERL588; our %ascii_for = ( # http://en.wikipedia.org/wiki/Windows-1252 "\x80" => 'e', # EURO SIGN "\x82" => ',', # SINGLE LOW-9 QUOTATION MARK "\x83" => 'f', # LATIN SMALL LETTER F WITH HOOK "\x84" => ',,', # DOUBLE LOW-9 QUOTATION MARK "\x85" => '...', # HORIZONTAL ELLIPSIS "\x86" => '+', # DAGGER "\x87" => '++', # DOUBLE DAGGER "\x88" => '^', # MODIFIER LETTER CIRCUMFLEX ACCENT "\x89" => '%', # PER MILLE SIGN "\x8a" => 'S', # LATIN CAPITAL LETTER S WITH CARON "\x8b" => '<', # SINGLE LEFT-POINTING ANGLE QUOTATION MARK "\x8c" => 'OE', # LATIN CAPITAL LIGATURE OE "\x8e" => 'Z', # LATIN CAPITAL LETTER Z WITH CARON "\x91" => "'", # LEFT SINGLE QUOTATION MARK "\x92" => "'", # RIGHT SINGLE QUOTATION MARK "\x93" => '"', # LEFT DOUBLE QUOTATION MARK "\x94" => '"', # RIGHT DOUBLE QUOTATION MARK "\x95" => '*', # BULLET "\x96" => '-', # EN DASH "\x97" => '--', # EM DASH "\x98" => '~', # SMALL TILDE "\x99" => '(tm)', # TRADE MARK SIGN "\x9a" => 's', # LATIN SMALL LETTER S WITH CARON "\x9b" => '>', # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK "\x9c" => 'oe', # LATIN SMALL LIGATURE OE "\x9e" => 'z', # LATIN SMALL LETTER Z WITH CARON "\x9f" => 'Y', # LATIN CAPITAL LETTER Y WITH DIAERESIS ); our %utf8_for = ( # http://en.wikipedia.org/wiki/Windows-1252 "\x80" => '€', # EURO SIGN "\x82" => ',', # SINGLE LOW-9 QUOTATION MARK "\x83" => 'ƒ', # LATIN SMALL LETTER F WITH HOOK "\x84" => '„', # DOUBLE LOW-9 QUOTATION MARK "\x85" => '…', # HORIZONTAL ELLIPSIS "\x86" => '†', # DAGGER "\x87" => '‡', # DOUBLE DAGGER "\x88" => 'ˆ', # MODIFIER LETTER CIRCUMFLEX ACCENT "\x89" => '‰', # PER MILLE SIGN "\x8a" => 'Š', # LATIN CAPITAL LETTER S WITH CARON "\x8b" => '‹', # SINGLE LEFT-POINTING ANGLE QUOTATION MARK "\x8c" => 'Œ', # LATIN CAPITAL LIGATURE OE "\x8e" => 'Ž', # LATIN CAPITAL LETTER Z WITH CARON "\x91" => '‘', # LEFT SINGLE QUOTATION MARK "\x92" => '’', # RIGHT SINGLE QUOTATION MARK "\x93" => '“', # LEFT DOUBLE QUOTATION MARK "\x94" => '”', # RIGHT DOUBLE QUOTATION MARK "\x95" => '•', # BULLET "\x96" => '–', # EN DASH "\x97" => '—', # EM DASH "\x98" => '˜', # SMALL TILDE "\x99" => '™', # TRADE MARK SIGN "\x9a" => 'š', # LATIN SMALL LETTER S WITH CARON "\x9b" => '›', # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK "\x9c" => 'œ', # LATIN SMALL LIGATURE OE "\x9e" => 'ž', # LATIN SMALL LETTER Z WITH CARON "\x9f" => 'Ÿ', # LATIN CAPITAL LETTER Y WITH DIAERESIS ); BEGIN { my $proto = $] >= 5.010000 ? '_' : '$'; eval "sub zap_cp1252($proto) { unshift \@_, \\%ascii_for; &_tweakit; }"; eval "sub fix_cp1252($proto) { unshift \@_, \\%utf8_for; &_tweakit; }"; } sub _tweakit { my $table = shift; return unless defined $_[0]; local $_[0] = $_[0] if defined wantarray; if (PERL588 && Encode::is_utf8($_[0])) { _tweak_decoded($table, $_[0]); } else { $_[0] =~ s{([\x80-\x9f])}{$table->{$1} || $1}emxsg; } return $_[0] if defined wantarray; } sub _tweak_decoded { my $table = shift; local $@; # First, try to replace in the decoded string. eval { $_[0] =~ s{([\x80-\x9f])}{ $table->{$1} ? Encode::decode('UTF-8', $table->{$1}) : $1 }emxsg }; if (my $err = $@) { # If we got a "Malformed UTF-8 character" error, then someone # likely turned on the utf8 flag without decoding. So turn it off. # and try again. die if $err !~ /Malformed/; Encode::_utf8_off($_[0]); $_[0] =~ s/([\x80-\x9f])/$table->{$1} || $1/emxsg; Encode::_utf8_on($_[0]); } } 1; __END__ ############################################################################## =head1 Name Encode::ZapCP1252 - Zap Windows Western Gremlins =head1 Synopsis use Encode::ZapCP1252; # Zap or fix in-place. zap_cp1252 $latin1_text; fix_cp1252 $utf8_text; # Zap or fix copy. my $clean_latin1 = zap_cp1252 $latin1_text; my $fixed_utf8 = fix_cp1252 $utf8_text; =head1 Description Have you ever been processing a Web form submit for feed, assuming that the incoming text was encoded as specified in the Content-Type header, or in the XML declaration, only to end up with a bunch of junk because someone pasted in content from Microsoft Word? Well, this is because Microsoft uses a superset of the Latin-1 encoding called "Windows Western" or "CP1252". If the specified encoding is Latin-1, mostly things will come out right, but a few things--like curly quotes, m-dashes, ellipses, and the like--may not. The differences are well-known; you see a nice chart at documenting the differences on L. Of course, that won't really help you. What will help you is to quit using Latin-1 and switch to UTF-8. Then you can just convert from CP1252 to UTF-8 without losing a thing, just like this: use Encode; $text = decode 'cp1252', $text, 1; But I know that there are those of you out there stuck with Latin-1 and who don't want any junk characters from Word users. That's where this module comes in. Its C function will zap those CP1252 gremlins for you, turning them into their appropriate ASCII approximations. Another case that can occasionally come up is when you're reading reading in text that I to be UTF-8, but it I ends up with some CP1252 gremlins mixed in with properly encoded characters. I've seen examples of just this sort of thing when processing GMail messages and attempting to insert them into a UTF-8 database, as well as in some feeds processed by, say L. Doesn't work so well. For such cases, there's C, which converts those CP1252 gremlins into their UTF-8 equivalents. =head1 Usage This module exports two subroutines: C and C, each of which accept a single argument: zap_cp1252 $text; fix_cp1252 $text; When called in a void context, as in these examples, C and C subroutine perform I conversions of any CP1252 gremlins into their appropriate ASCII approximations or UTF-8 equivalents, respectively. Note that because the conversion happens in place, the data to be converted I be a string constant; it must be a scalar variable. When called in a scalar or list context, on the other hand, a copy will be modifed and returned. The original string will be unchanged: my $clean_latin1 = zap_cp1252 $latin1_text; my $fixed_utf8 = fix_cp1252 $utf8_text; In this case, even constant values can be processed. Either way, Cs will be ignored. In Perl 5.8.8 and higher, the conversion will work even when the string is decoded to Perl's internal form (usually via C) or the string is encoded (and thus simply processed by Perl as a series of bytes). The conversion will even work on a string that has not been decoded but has had its C flag flipped anyway (usually by an injudicious use of C. This is to enable the highest possible likelihood of removing those CP1252 gremlins no matter what kind of processing has already been executed on the string. In Perl 5.10 and higher, the functions may optionally be called with no arguments, in which case C<$_> will be converted, instead: zap_cp1252; # Modify $_ in-place. fix_cp1252; # Modify $_ in-place. my $zapped = zap_cp1252; # Copy $_ and return zapped my $fixed = zap_cp1252; # Copy $_ and return fixed =head1 Conversion Table Here's how the characters are converted to ASCII and UTF-8. The ASCII conversions are not perfect, but they should be good enough for general cleanup. If you want perfect, switch to UTF-8 and be done with it! =encoding utf8 Hex | Char | ASCII | UTF-8 Name -----+-------+-------+------------------------------------------- 0x80 | € | e | EURO SIGN 0x82 | ‚ | , | SINGLE LOW-9 QUOTATION MARK 0x83 | ƒ | f | LATIN SMALL LETTER F WITH HOOK 0x84 | „ | ,, | DOUBLE LOW-9 QUOTATION MARK 0x85 | … | ... | HORIZONTAL ELLIPSIS 0x86 | † | + | DAGGER 0x87 | ‡ | ++ | DOUBLE DAGGER 0x88 | ˆ | ^ | MODIFIER LETTER CIRCUMFLEX ACCENT 0x89 | ‰ | % | PER MILLE SIGN 0x8a | Š | S | LATIN CAPITAL LETTER S WITH CARON 0x8b | ‹ | < | SINGLE LEFT-POINTING ANGLE QUOTATION MARK 0x8c | Œ | OE | LATIN CAPITAL LIGATURE OE 0x8e | Ž | Z | LATIN CAPITAL LETTER Z WITH CARON 0x91 | ‘ | ' | LEFT SINGLE QUOTATION MARK 0x92 | ’ | ' | RIGHT SINGLE QUOTATION MARK 0x93 | “ | " | LEFT DOUBLE QUOTATION MARK 0x94 | ” | " | RIGHT DOUBLE QUOTATION MARK 0x95 | • | * | BULLET 0x96 | – | - | EN DASH 0x97 | — | -- | EM DASH 0x98 | ˜ | ~ | SMALL TILDE 0x99 | ™ | (tm) | TRADE MARK SIGN 0x9a | š | s | LATIN SMALL LETTER S WITH CARON 0x9b | › | > | SINGLE RIGHT-POINTING ANGLE QUOTATION MARK 0x9c | œ | oe | LATIN SMALL LIGATURE OE 0x9e | ž | z | LATIN SMALL LETTER Z WITH CARON 0x9f | Ÿ | Y | LATIN CAPITAL LETTER Y WITH DIAERESIS =head2 Changing the Tables Don't like these conversions? You can modify them to your heart's content by accessing this module's internal conversion tables. For example, if you wanted C to use an uppercase "E" for the euro sign, just do this: local $Encode::ZapCP1252::ascii_for{"\x80"} = 'E'; Or if, for some bizarre reason, you wanted the UTF-8 equivalent for a bullet converted by C to really be an asterisk (why would you? Just use C for that!), you can do this: local $Encode::ZapCP1252::utf8_for{"\x95"} = '*'; Just remember, without C this would be a global change. In that case, be careful if your code zaps CP1252 elsewhere. Of course, it shouldn't really be doing that. These functions are just for cleaning up messes in one spot in your code, not for making a fundamental part of your text handling. For that, use L. =head1 See Also =over =item L =item L =back =head1 Support This module is stored in an open L. Feel free to fork and contribute! Please file bug reports via L or by sending mail to L. =head1 Author David E. Wheeler =head1 Acknowledgments My thanks to Sean Burke for sending me his original method for converting CP1252 gremlins to more-or-less appropriate ASCII characters. =head1 Copyright and License Copyright (c) 2005-2010 David E. Wheeler. Some Rights Reserved. This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Encode-ZapCP1252-0.33/t000755000767000024 011663101120 13730 5ustar00davidstaff000000000000Encode-ZapCP1252-0.33/t/base.t000555000767000024 305311663101120 15170 0ustar00davidstaff000000000000#!/usr/bin/perl -w use strict; use Test::More tests => 13; BEGIN { use_ok 'Encode::ZapCP1252' or die; } can_ok 'Encode::ZapCP1252', 'zap_cp1252'; can_ok __PACKAGE__, 'zap_cp1252'; my $cp1252 = join ' ', map { chr } 0x80, 0x82 .. 0x8c, 0x8e, 0x91 .. 0x9c, 0x9e, 0x9f; my $ascii = q{e , f ,, ... + ++ ^ % S < OE Z ' ' " " * - -- ~ (tm) s > oe z Y}; my $utf8 = q{€ , ƒ „ … † ‡ ˆ ‰ Š ‹ Œ Ž ‘ ’ “ ” • – — ˜ ™ š › œ ž Ÿ}; # Test conversion to ASCII. my $fix_me = $cp1252; zap_cp1252 $fix_me; is $fix_me, $ascii, 'Convert to ascii'; # Test conversion to UTF-8. $fix_me = $cp1252; fix_cp1252 $fix_me; is $fix_me, $utf8, 'Convert to utf-8'; # Try checking the return value, too. $fix_me = $cp1252; is fix_cp1252 $fix_me, $utf8, 'Check fixed return value'; is $fix_me, $cp1252, 'Should not have been fixed in-place'; $fix_me = $cp1252; is zap_cp1252 $fix_me, $ascii, 'Check zapped return value'; is $fix_me, $cp1252, 'Should not have been zapped in-place'; # Test conversion to ASCII with modified table. $Encode::ZapCP1252::ascii_for{"\x80"} = 'E'; $ascii =~ s/^e/E/; $fix_me = $cp1252; zap_cp1252 $fix_me; is $fix_me, $ascii, 'Convert to ascii with modified table'; # Test conversion to UTF-8 with modified table. $Encode::ZapCP1252::utf8_for{"\x80"} = 'E'; $utf8 =~ s/€/E/; $fix_me = $cp1252; fix_cp1252 $fix_me; is $fix_me, $utf8, 'Convert to utf-8 with modified table'; # Test that undefs are ignored. is zap_cp1252 undef, undef, 'zap_cp1252 should ignore undef'; is fix_cp1252 undef, undef, 'fix_cp1252 should ignore undef'; Encode-ZapCP1252-0.33/t/decoded.t000444000767000024 320711663101120 15643 0ustar00davidstaff000000000000#!/usr/bin/perl -w use strict; use Test::More; BEGIN { plan skip_all => 'These tests require Perl 5.8.8 or higher' unless $] >= 5.008_008; plan tests => 6; } BEGIN { use_ok 'Encode::ZapCP1252' or die; } use utf8; my $ascii = q{e , f ,, ... + ++ ^ % S < OE Z ' ' " " * - -- ~ (tm) s > oe z Y}; my $utf8 = q{€ , ƒ „ … † ‡ ˆ ‰ Š ‹ Œ Ž ‘ ’ “ ” • – — ˜ ™ š › œ ž Ÿ}; # Test conversion of decoded from ISO-8859-1. my $fix_me = Encode::decode( 'ISO-8859-1', join ' ', map { chr } 0x80, 0x82 .. 0x8c, 0x8e, 0x91 .. 0x9c, 0x9e, 0x9f ); fix_cp1252 $fix_me; is $fix_me, $utf8, 'Convert decoded from Latin-1 to utf-8'; # Try ascii. $fix_me = Encode::decode( 'ISO-8859-1', join ' ', map { chr } 0x80, 0x82 .. 0x8c, 0x8e, 0x91 .. 0x9c, 0x9e, 0x9f ); zap_cp1252 $fix_me; is $fix_me, $ascii, 'Convert decoded from Latin-1 to ascii'; # Test conversion with utf8 bit flipped. $fix_me = join ' ', map { chr } 0x80, 0x82 .. 0x8c, 0x8e, 0x91 .. 0x9c, 0x9e, 0x9f; Encode::_utf8_on($fix_me); fix_cp1252 $fix_me; is $fix_me, $utf8, 'Convert utf8-bit-flipped to utf-8'; # Try it with ascii. $fix_me = join ' ', map { chr } 0x80, 0x82 .. 0x8c, 0x8e, 0x91 .. 0x9c, 0x9e, 0x9f; Encode::_utf8_on($fix_me); zap_cp1252 $fix_me; is $fix_me, $ascii, 'Convert utf8-bit-flipped to ascii'; # Test conversion to decoded with modified table. $Encode::ZapCP1252::utf8_for{"\x80"} = 'E'; $utf8 =~ s/€/E/; $fix_me = Encode::decode( 'ISO-8859-1', join ' ', map { chr } 0x80, 0x82 .. 0x8c, 0x8e, 0x91 .. 0x9c, 0x9e, 0x9f ); fix_cp1252 $fix_me; is $fix_me, $utf8, 'Convert decoded from Latin-1 with modified table'; Encode-ZapCP1252-0.33/t/perl-510.t000444000767000024 174211663101120 15523 0ustar00davidstaff000000000000#!/usr/bin/perl -w use strict; use Test::More; BEGIN { plan skip_all => 'Prototype _ not supported before Perl 5.10' if $] < 5.010000; plan tests => 7; } BEGIN { use_ok 'Encode::ZapCP1252' or die; } my $cp1252 = join ' ', map { chr } 0x80, 0x82 .. 0x8c, 0x8e, 0x91 .. 0x9c, 0x9e, 0x9f; my $ascii = q{e , f ,, ... + ++ ^ % S < OE Z ' ' " " * - -- ~ (tm) s > oe z Y}; my $utf8 = q{€ , ƒ „ … † ‡ ˆ ‰ Š ‹ Œ Ž ‘ ’ “ ” • – — ˜ ™ š › œ ž Ÿ}; # Test conversion of $_. local $_ = $cp1252; zap_cp1252; is $_, $ascii, 'Should have zapped $_ in-place'; local $_ = $cp1252; fix_cp1252; is $_, $utf8, 'Should have fixed $_ in-place'; # Test non-in-place conversion of $_. local $_ = $cp1252; is zap_cp1252, $ascii, 'Should have $_-zapped return value'; is $_, $cp1252, 'Should not have zapped $_ in-place'; local $_ = $cp1252; is fix_cp1252, $utf8, 'Should have $_->fixed return value'; is $_, $cp1252, 'Should not have fixed $_ in-place'; Encode-ZapCP1252-0.33/t/pod.t000555000767000024 41211663101120 15014 0ustar00davidstaff000000000000#!perl -w use strict; use Test::More; eval 'use Test::Pod 1.41'; plan skip_all => 'Test::Pod 1.41 required for testing POD' if $@; eval 'use Encode'; plan skip_all => 'Encode 1.20 required for testing POD because it has UTF-8 characters' if $@; all_pod_files_ok();