Text-CSV-Encoded-0.22/0000755000175000017500000000000012156255623013050 5ustar reonreonText-CSV-Encoded-0.22/META.yml0000664000175000017500000000117212156255623014324 0ustar reonreon--- abstract: 'Encoding aware Text::CSV.' author: - 'Makamaka Hannyaharamitu, Emakamaka[at]cpan.orgE' build_requires: ExtUtils::MakeMaker: 0 configure_requires: ExtUtils::MakeMaker: 0 dynamic_config: 1 generated_by: 'ExtUtils::MakeMaker version 6.62, CPAN::Meta::Converter version 2.130880' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: 1.4 name: Text-CSV-Encoded no_index: directory: - t - inc requires: IO::Handle: 0 Test::Harness: 0 Test::More: 0 Text::CSV: 1.31 resources: repository: http://github.com/makamaka/Text-CSV-Encoded version: 0.22 Text-CSV-Encoded-0.22/MANIFEST0000644000175000017500000000137712156255624014212 0ustar reonreonChanges lib/Text/CSV/Encoded.pm lib/Text/CSV/Encoded/Coder/Base.pm lib/Text/CSV/Encoded/Coder/Encode.pm lib/Text/CSV/Encoded/Coder/EncodeGuess.pm Makefile.PL MANIFEST META.yml Module meta-data (added by MakeMaker) README sample/sjis.csv sample/utf8.csv t/00_pod.t Check if pod is valid t/01_use.t t/02_undef.t t/03_attr_err.t t/_setup.pm t/pp_01_base.t t/pp_02_code.t t/pp_03_file.t t/pp_04_uni_but_utf8.t t/pp_05_guess.t t/pp_06_coder_class.t t/pp_07_coder_check_value.t t/tests/01_base.t t/tests/02_code.t t/tests/03_file.t t/tests/04_uni_but_utf8.t t/tests/05_guess.t t/tests/06_coder_class.t t/tests/07_coder_check_value_euc-jp.t t/xs_01_base.t t/xs_02_code.t t/xs_03_file.t META.json Module JSON meta-data (added by MakeMaker) Text-CSV-Encoded-0.22/t/0000755000175000017500000000000012156255623013313 5ustar reonreonText-CSV-Encoded-0.22/t/pp_05_guess.t0000644000175000017500000000026412155714455015635 0ustar reonreon#!/usr/bin/perl -w use strict; use Test::More; #BEGIN { $ARGV[0] = 1; } use lib qw(./t); use _setup; BEGIN { _setup->tests(6); } require './t/tests/05_guess.t'; __END__ Text-CSV-Encoded-0.22/t/pp_01_base.t0000644000175000017500000000023212155714455015410 0ustar reonreon#!/usr/bin/perl -w use strict; use Test::More; use lib qw(./t); use _setup; BEGIN { _setup->tests(81); } require './t/tests/01_base.t'; __END__ Text-CSV-Encoded-0.22/t/pp_02_code.t0000644000175000017500000000023112155714455015410 0ustar reonreon#!/usr/bin/perl -w use strict; use Test::More; use lib qw(./t); use _setup; BEGIN { _setup->tests(16); } require './t/tests/02_code.t'; __END__ Text-CSV-Encoded-0.22/t/02_undef.t0000644000175000017500000000072412155714455015107 0ustar reonreon use strict; use Test::More; plan tests => 2; BEGIN { local $ENV{PERL_TEXT_CSV} = $ARGV[0] || 0; require Text::CSV::Encoded; } my $csv = Text::CSV::Encoded->new( { encoding_in => 'utf8', encoding_out => 'shiftjis' } ); $csv->blank_is_undef( 1 ); $csv->parse('abc,"",,"def"'); is_deeply( [ $csv->fields ], [ 'abc', '', undef, 'def' ] ); $csv->blank_is_undef( 0 ); $csv->parse('abc,"",,"def"'); is_deeply( [ $csv->fields ], [ 'abc', '', '', 'def' ] ); Text-CSV-Encoded-0.22/t/xs_02_code.t0000644000175000017500000000026212155714455015427 0ustar reonreon#!/usr/bin/perl -w use strict; use Test::More; BEGIN { $ARGV[0] = 1; } use lib qw(./t); use _setup; BEGIN { _setup->tests(16); } require './t/tests/02_code.t'; __END__ Text-CSV-Encoded-0.22/t/pp_07_coder_check_value.t0000644000175000017500000000023212155714455020131 0ustar reonreon use strict; use Test::More; use lib qw(./t); use _setup; BEGIN { _setup->tests(2); } require './t/tests/07_coder_check_value_euc-jp.t'; __END__ Text-CSV-Encoded-0.22/t/pp_06_coder_class.t0000644000175000017500000000062312155714455016770 0ustar reonreon#!/usr/bin/perl -w use strict; use Test::More tests => 2; use Text::CSV::Encoded; my $csv = Text::CSV::Encoded->new( ); is( $csv->coder_class, $] >= 5.008 ? 'Text::CSV::Encoded::Coder::Encode' : 'Text::CSV::Encoded::Coder::Base' ); $csv = Text::CSV::Encoded->new( { coder_class => 'Text::CSV::Encoded::Coder::Base' } ); is( $csv->coder_class, 'Text::CSV::Encoded::Coder::Base' ); __END__ Text-CSV-Encoded-0.22/t/01_use.t0000644000175000017500000000011612155714455014574 0ustar reonreonuse strict; use Test::More tests => 1; use_ok( 'Text::CSV::Encoded' ); Text-CSV-Encoded-0.22/t/pp_04_uni_but_utf8.t0000644000175000017500000000020412155714455017113 0ustar reonreon use strict; use Test::More; use lib qw(./t); use _setup; BEGIN { _setup->tests(2); } require './t/tests/04_uni_but_utf8.t'; Text-CSV-Encoded-0.22/t/00_pod.t0000644000175000017500000000025212155714455014562 0ustar reonreon#!/usr/bin/perl use strict; $^W = 1; use Test::More; eval "use Test::Pod 1.00"; plan skip_all => "Test::Pod 1.00 required for testing POD" if $@; all_pod_files_ok (); Text-CSV-Encoded-0.22/t/03_attr_err.t0000644000175000017500000000042312155714455015625 0ustar reonreonuse strict; use Test::More; plan tests => 2; BEGIN { local $ENV{PERL_TEXT_CSV} = $ARGV[0] || 0; require Text::CSV::Encoded; } my $csv = Text::CSV::Encoded->new( { not_implemented_attr => 1 } ); ok( not $csv ); like( Text::CSV::Encoded->error_diag, qr/INI - / ); Text-CSV-Encoded-0.22/t/xs_03_file.t0000644000175000017500000000026312155714455015436 0ustar reonreon#!/usr/bin/perl -w use strict; use Test::More; BEGIN { $ARGV[0] = 1; } use lib qw(./t); use _setup; BEGIN { _setup->tests(20); } require './t/tests/03_file.t'; __END__ Text-CSV-Encoded-0.22/t/xs_01_base.t0000644000175000017500000000026312155714455015427 0ustar reonreon#!/usr/bin/perl -w use strict; use Test::More; BEGIN { $ARGV[0] = 1; } use lib qw(./t); use _setup; BEGIN { _setup->tests(81); } require './t/tests/01_base.t'; __END__ Text-CSV-Encoded-0.22/t/pp_03_file.t0000644000175000017500000000023212155714455015417 0ustar reonreon#!/usr/bin/perl -w use strict; use Test::More; use lib qw(./t); use _setup; BEGIN { _setup->tests(20); } require './t/tests/03_file.t'; __END__ Text-CSV-Encoded-0.22/t/tests/0000755000175000017500000000000012156255623014455 5ustar reonreonText-CSV-Encoded-0.22/t/tests/06_coder_class.t0000644000175000017500000000046112155714455017433 0ustar reonreon use strict; use Text::CSV::Encoded; my $csv = Text::CSV::Encoded->new( ); is( $csv->coder_class, 'Text::CSV::Encoded::Coder::Encode' ); $csv = Text::CSV::Encoded->new( { coder_class => 'Text::CSV::Encoded::Coder::Base' } ); is( $csv->coder_class, 'Text::CSV::Encoded::Coder::Base' ); 1; __END__ Text-CSV-Encoded-0.22/t/tests/03_file.t0000644000175000017500000000436712155773734016104 0ustar reonreonuse Encode; use utf8; my $csv = Text::CSV::Encoded->new({ encoding_in => 'utf8', encoding_out => 'shiftjis', }); my $file = sprintf('sample/test_%s.csv' , $csv->backend =~ /PP/ ? 'pp' : 'xs' ); open (my $fh, "sample/utf8.csv") or die $!; open (my $fh2, ">$file") or die $!; # # column_names & getline_hr # $csv->column_names( $csv->getline($fh) ); while( my $hr = $csv->getline_hr( $fh ) ) { $csv->print( $fh2, [ $hr->{text} ] ); $fh2->print("\n"); } close($fh); close($fh2); my $checker = Text::CSV->new({ binary => 1}); open ($fh, $file) or die $!; open ($fh2, "sample/sjis.csv") or die $!; <$fh2>; $csv->encoding_in( 'shiftjis' ); while( 1 ) { my $row = $csv->getline( $fh ); $csv->eof and last; is( Encode::encode( 'shiftjis', $row->[0] ), $checker->getline( $fh2 )->[1] ); } close($fh); close($fh2); # convert directly # shiftjis open ($fh, $file) or die $!; open ($fh2, "sample/sjis.csv") or die $!; <$fh2>; $csv->encoding_in( 'shiftjis' ); $csv->encoding( 'shiftjis' ); while( 1 ) { my $row = $csv->getline( $fh ); $csv->eof and last; is( $row->[0], $checker->getline( $fh2 )->[1] ); } close($fh); close($fh2); # utf8 open ($fh, $file) or die $!; open ($fh2, "sample/utf8.csv") or die $!; <$fh2>; $csv->encoding_in( 'shiftjis' ); $csv->encoding( 'utf8' ); while( 1 ) { my $row = $csv->getline( $fh ); $csv->eof and last; my $string = $checker->getline( $fh2 )->[1]; $string = encode_utf8( $string ) if ( $csv->automatic_UTF8 ); is( $row->[0], $string ); } close($fh); close($fh2); # unicode open ($fh, $file) or die $!; open ($fh2, "sample/utf8.csv") or die $!; <$fh2>; $csv->encoding_in( 'shiftjis' ); $csv->encoding( undef ); while( 1 ) { my $row = $csv->getline( $fh ); $csv->eof and last; is( $row->[0], Encode::decode_utf8( $checker->getline( $fh2 )->[1] ) ); } close($fh); close($fh2); # # bind_columns # my ( $id, $text ); $csv->bind_columns( \$id, \$text ); $csv->encoding_in( 'utf8' ); $csv->encoding( 'shiftjis' ); open ($fh, $file) or die $!; open ($fh2, "sample/utf8.csv") or die $!; <$fh2>; while( my $col = $csv->getline( $fh2 ) ) { is( $text, $checker->getline( $fh )->[0] ); } close($fh); close($fh2); unlink( $file ) or warn $!; 1; Text-CSV-Encoded-0.22/t/tests/02_code.t0000644000175000017500000000270512156246043016055 0ustar reonreon use strict; use utf8; my $csv = Text::CSV::Encoded->new(); my @tests = ( # encoding CSV Perl Str Re-CSV [ [ unicode => undef ] => "ü", "ü", "ü" ], [ [ latin1 => undef ] => "\xfc", "\xfc", "\xfc" ], [ [ latin1 => 'latin1' ] => "\xfc", "\xfc", "\xfc" ], [ [ utf8 => undef ] => "\xc3\xbc", "\xc3\xbc", qq|"\xc3\xbc"| ], [ [ unicode => 'utf8' ] => "\xc3\xbc", "ü", "\xc3\xbc" ], [ [ unicode => undef ] => 'あ,い', 'あ,い', '"あ","い"' ], [ [ unicode => 'utf8' ] => "\xE3\x81\x82,\xE3\x81\x84", 'あ,い', qq|"\xE3\x81\x82","\xE3\x81\x84"| ], [ [ unicode => 'shiftjis' ] => "\x82\xA0,\x82\xA2", 'あ,い', qq|"\x82\xA0","\x82\xA2"| ], ); for my $t ( @tests ) { my ( $name, $code ) = @{ $t->[0] }; $name .= " (<=$code)" if $code; my $columns = $code ? $csv->decode( $code, $t->[1] ) : $csv->decode( $t->[1] ); is( join( ',', @$columns ), $t->[2], $name . ' decode' ); my $string = $code ? $csv->encode( $code, $columns ) : $csv->encode( $columns ); is( $string, $t->[3], $name . ' encode' ); } 1; Text-CSV-Encoded-0.22/t/tests/07_coder_check_value_euc-jp.t0000644000175000017500000000136512155714455022047 0ustar reonreon#!/usr/bin/perl -w # This file is encoded in EUC-JP, and output is Shift_JIS. # ΥեΥ󥳡ǥ󥰤EUC-JPϤShift_JIS use strict; BEGIN { local $ENV{PERL_TEXT_CSV} = $ARGV[0] || 0; require Text::CSV::Encoded; } use Encode; my $str = ',,,'; my $check = encode( 'shiftjis', decode( 'euc-jp', '"","","",' ) ); my @cols; my $csv = Text::CSV::Encoded->new( { encoding_in => 'euc-jp', encoding_out => 'shiftjis' } ); $csv->parse( $str ); @cols = $csv->fields; $csv->combine( @cols ); is( $csv->string, $check . '"?"' ); # change check value $csv->coder->encode_check_value( Encode::FB_PERLQQ ); $csv->parse( $str ); @cols = $csv->fields; $csv->combine( @cols ); is( $csv->string, $check . '"\x{2460}"' ); 1; Text-CSV-Encoded-0.22/t/tests/04_uni_but_utf8.t0000644000175000017500000000035512155773734017572 0ustar reonreon use strict; use utf8; use Text::CSV::Encoded; my $csv = Text::CSV::Encoded->new; eval q| $csv->decode( "あいうえお" ) |; ok( !$@ ); eval q| $csv->decode('utf8', "あいうえお") |; ok( $csv->automatic_UTF8 ? 1 : $@ ); 1; Text-CSV-Encoded-0.22/t/tests/05_guess.t0000644000175000017500000000151112155714455016274 0ustar reonreon use strict; use utf8; use Text::CSV::Encoded coder_class => 'Text::CSV::Encoded::Coder::EncodeGuess'; my $csv = Text::CSV::Encoded->new; $csv->encoding( ['shiftjis', 'euc-jp'] ); # guessing euc-jp or shiftjis? $csv->encoding_out('shiftjis'); my @fields = ( Encode::encode('euc-jp', 'これはEUC-JP'), Encode::encode('shiftjis', 'これはShift_JIS') ); ok( $csv->combine( @fields ) ); is( $csv->string, Encode::encode( 'shiftjis', '"これはEUC-JP","これはShift_JIS"' ) ); $csv->encoding_to_parse( ['shiftjis', 'euc-jp'] ); # guessing euc-jp or shiftjis? $csv->encoding( undef ); ok( $csv->parse( Encode::encode('euc-jp', 'これはEUC-JP') ) ); is( join('', $csv->fields), 'これはEUC-JP' ); ok( $csv->parse( Encode::encode('shiftjis', 'これはShift_JIS') ) ); is( join('', $csv->fields), 'これはShift_JIS' ); 1; Text-CSV-Encoded-0.22/t/tests/01_base.t0000644000175000017500000000232012155773546016061 0ustar reonreon#!/usr/bin/perl -w use strict; use utf8; use Encode qw(encode decode); #Test::More::diag ( "This backend is ", Text::CSV::Encoded->backend ); my $csv = Text::CSV::Encoded->new({}); for my $enc_in ( undef, qw(latin1 utf8) ) { for my $enc_out ( undef, qw(latin1 utf8) ) { $csv->encoding_in ( $enc_in ); $csv->encoding_out( $enc_out ); ok( $csv->parse( $enc_in ? encode( $enc_in, "ü" ) : "ü" ) ); is( ($csv->fields)[0], "ü" ); # always Unicode ok( $csv->combine( $csv->fields ) ); is( $csv->string, $enc_out ? encode( $enc_out, "ü" ) : "ü" ); } } for my $enc_in ( undef, qw(shiftjis utf8) ) { for my $enc_out ( undef, qw(shiftjis utf8) ) { $csv->encoding_in ( $enc_in ); $csv->encoding_out( $enc_out ); my $subject = $enc_in ? $enc_in : 'Unicode'; $subject .= " => "; $subject .= $enc_out ? $enc_out : 'Unicode'; ok(1, $subject); ok( $csv->parse( $enc_in ? encode( $enc_in, "あ" ) : "あ" ) ); is( ($csv->fields)[0], 'あ' ); # always Unicode ok( $csv->combine( $csv->fields ) ); is( $csv->string, $enc_out ? encode( $enc_out, '"あ"' ) : '"あ"' ); } } 1; Text-CSV-Encoded-0.22/t/_setup.pm0000644000175000017500000000115512155714455015154 0ustar reonreonpackage _setup; use strict; sub tests { package main; plan tests => $_[1]; } BEGIN { package main; my $backend = $ARGV[0] || 0; local $ENV{PERL_TEXT_CSV} = $backend; require Text::CSV::Encoded; if ( $] < 5.008 ) { plan skip_all => "This test requires Perl version 5.8 or lator."; } elsif ( $backend and Text::CSV::Encoded->is_pp ) { plan skip_all => "Text::CSV_XS can't be loaded."; } elsif ( !$backend and Text::CSV::Encoded->is_xs ) { plan skip_all => "Text::CSV_PP can't be loaded."; } } 1; __END__ Text-CSV-Encoded-0.22/sample/0000755000175000017500000000000012156255623014331 5ustar reonreonText-CSV-Encoded-0.22/sample/sjis.csv0000644000175000017500000000005712155714455016022 0ustar reonreonid,text 1,{͂VC 2, 3,eXg 4, Text-CSV-Encoded-0.22/sample/utf8.csv0000644000175000017500000000010712155714455015734 0ustar reonreonid,text 1,本日はいい天気 2,アイウエオ 3,テスト 4,★ Text-CSV-Encoded-0.22/Changes0000644000175000017500000000227712156255423014351 0ustar reonreonRevision history for Perl extension Text::CSV::Encoded. 0.22 - Text::CSV 1.31 requierd in Makefile.PL (even works in minor version) 0.21 Thu Jun 13 14:31:12 2013 - Text::CSV 1.30 requierd in Makefile.PL (even works in minor version) 0.20 Wed Jun 12 13:18:02 2013 - update modules and test for Text::CSV_XS 0.99 and Text::CSV_PP 1.30 0.10 Mon Apr 26 17:41:12 2010 [Text::CSV::Encoded::Coder::*] - coder class can accept check value. ex. $csv->coder->encode_check_value( Encode::FB_PERLQQ ); $csv->coder->decode_check_value( Encode::FB_PERLQQ ); 0.09 Fri Oct 9 16:56:43 2009 - requires Text::CSV 1.12 or later. [Text::CSV::Encoded] - A construction fail returns undef (rt#46077) - improved new() section in the document [Text::CSV::Encoded::Coder::Encod] [Text::CSV::Encoded::Coder::EncodeGuess] - attr 'blank_is_undef' was not effective (reported by Zsombor Welker) 0.08 Thu Apr 23 15:23:42 2009 [Text::CSV::Encoded::Coder::EncodeGuess] - undef check before encode/decode (rt#45078) 0.07 Wed Oct 22 02:10:21 2008 - first release version [Text::CSV::Encoded] [Text::CSV::Encoded::Coder::Base] [Text::CSV::Encoded::Coder::Encode] [Text::CSV::Encoded::Coder::EncodeGuess] Text-CSV-Encoded-0.22/lib/0000755000175000017500000000000012156255623013616 5ustar reonreonText-CSV-Encoded-0.22/lib/Text/0000755000175000017500000000000012156255623014542 5ustar reonreonText-CSV-Encoded-0.22/lib/Text/CSV/0000755000175000017500000000000012156255623015175 5ustar reonreonText-CSV-Encoded-0.22/lib/Text/CSV/Encoded.pm0000644000175000017500000004410312156255410017070 0ustar reonreonpackage Text::CSV::Encoded; use strict; use vars qw( $VERSION ); use Carp (); $VERSION = '0.22'; BEGIN { require Text::CSV; if ( Text::CSV->VERSION < 1.06 ) { Carp::croak "Base class Text::CSV version is less than 1.06."; } my $backend = Text::CSV->backend; my $version = Text::CSV->backend->VERSION; if ( ( $backend =~ /XS/ and $version >= 0.99 ) or ( $backend =~ /PP/ and $version >= 1.30 ) ) { eval q/ sub automatic_UTF8 { 1; } /; # parse/getline return strings (UNICODE) } else { eval q/ sub automatic_UTF8 { 0; } /; } } use base qw( Text::CSV ); my $DefaultCoderClass = $] >= 5.008 ? 'Text::CSV::Encoded::Coder::Encode' : 'Text::CSV::Encoded::Coder::Base'; my @Attrs; BEGIN { @Attrs = qw( encoding encoding_in encoding_out encoding_io_in encoding_io_out encoding_to_parse encoding_to_combine ); } sub import { my ( $class, %args ) = @_; return unless %args; if ( exists $args{ coder_class } ) { $DefaultCoderClass = $args{ coder_class }; } } sub new { my $class = shift; my $opt = shift || {}; my %opt; $opt->{binary} = 1; for my $attr ( @Attrs, 'encoding', 'coder_class' ) { $opt{ $attr } = delete $opt->{ $attr } if ( exists $opt->{ $attr } ); } my $self = $class->SUPER::new( $opt ) || return; if ( my $coder_class = ( $opt{coder_class} || $DefaultCoderClass ) ) { $self->coder_class( $coder_class ); } else { Carp::croak "Coder class is not specified."; } for my $attr ( @Attrs, 'encoding' ) { $self->$attr( $opt{ $attr } ) if ( exists $opt{ $attr } ); } $self; } # # Methods # sub combine { my $self = shift; my @fields = @_; $self->coder->decode_fields_ref( $self->encoding, \@fields ) if ( $self->encoding ); unless ( $self->encoding_out ) { return $self->SUPER::combine( @fields ); } my $ret = $self->encode( $self->encoding_out, \@fields ); $self->{_STRING} = \$ret if ( $ret ); return $self->{_STATUS}; } sub parse { my $self = shift; my $ret; if ( $self->encoding_in ) { $ret = $self->decode( $self->encoding_in, $_[0] ); } else { $ret = [ $self->fields ] if $self->SUPER::parse( @_ ); } if ( $ret ) { $self->coder->encode_fields_ref( $self->encoding, $ret ) if ( $self->encoding ); $self->{_FIELDS} = $ret; } return $self->{_STATUS}; } # # IO style # sub print { # to CSV my ( $self, $io, $cols ) = @_; $self->coder->decode_fields_ref( $self->encoding, $cols ) if ( $self->encoding ); $self->coder->encode_fields_ref( $self->encoding_out, $cols ); $self->SUPER::print( $io, $cols ); } sub getline { # from CSV my ( $self, $io ) = @_; my $cols = $self->SUPER::getline( $io ); if ( my $binds = $self->{_BOUND_COLUMNS} ) { for my $val ( @$binds ) { $$val = $self->coder->decode( $self->encoding_in, $$val ); $$val = $self->coder->encode( $self->encoding, $$val ) if ( $self->encoding ); } return $cols; } return unless $cols; $self->coder->decode_fields_ref( $self->encoding_in, $cols ); $self->coder->encode_fields_ref( $self->encoding, $cols ) if ( $self->encoding ); $cols; } # # decode/encode style # sub decode { my ( $self, $enc, $text ) = @_; if ( @_ == 2 ) { $text = $enc, $enc = ''; } $self->coder->upgrade( $text ) unless ( $enc ); # as unicode return unless ( defined $text ); return unless ( $self->SUPER::parse( $text ) ); return $enc ? [ map { $self->coder->decode( $enc, $_ ) } $self->fields() ] : [ $self->fields() ]; } sub encode { my ( $self, $enc, $array ) = @_; if ( @_ == 2 ) { $array = $enc, $enc = ''; } return unless ( defined $array and ref $array eq 'ARRAY' ); return unless ( $self->SUPER::combine ( @$array ) ); return $enc ? $self->coder->encode( $enc, $self->string() ) : $self->string(); } # Internal sub _load_coder_class { my ( $class, $coder_class ) = @_; (my $file = "$coder_class.pm") =~ s{::}{/}g; eval { require $file }; if ( $@ ) { Carp::croak $@; } $coder_class; } # Accessors BEGIN { for my $method ( qw( encoding encoding_in encoding_out ) ) { eval qq| sub $method { my ( \$self, \$encoding ) = \@_; if ( \@_ > 1 ) { \$self->{ $method } = \$encoding; return \$self; } else { \$self->{ $method }; } } |; } } *encoding_io_in = *encoding_to_parse = *encoding_in; *encoding_io_out = *encoding_to_combine = *encoding_out; sub coder { my $self = shift; $self->{coder} ||= $self->coder_class->new( automatic_UTF8 => $self->automatic_UTF8, @_ ); } sub coder_class { my ( $self, $coder_class ) = @_; return $self->{coder_class} if ( @_ == 1 ); $self->_load_coder_class( $coder_class ); $self->{coder_class} = $coder_class; $self; } 1; __END__ =pod =head1 NAME Text::CSV::Encoded - Encoding aware Text::CSV. =head1 SYNOPSIS # Here in Perl 5.8 or later $csv = Text::CSV::Encoded->new ({ encoding_in => "iso-8859-1", # the encoding comes into Perl encoding_out => "cp1252", # the encoding comes out of Perl }); # parsing CSV is regarded as input $csv->parse( $line ); # $line is a iso-8859-1 encoded string @columns = $csv->fields(); # they are unicode data # combining list is regarded as output $csv->combine(@columns); # they are unicode data $line = $csv->string(); # $line is a cp1252 encoded string # if you want for returned @columns to be encoded in $encoding # or want for combining @columns to be assumed in $encoding $csv->encoding( $encoding ); # change input/output encodings $csv->encoding_in('shiftjis')->encoding_out('utf8'); $csv->eol("\n"); open (my $in, "sjis.csv"); open (my $out, "output.csv"); # change an encoding from shiftjis to utf8 while( my $columns = $csv->getline( $in ) ) { $csv->print( $out, $columns ); } close($in); close($out); # simple shortcuts # (regardless of encoding_in/out and encoding) $uni_columns = $csv->decode( 'euc-jp', $line ); # euc-jp => unicode $line = $csv->encode( 'euc-jp', $uni_columns ); # unicode => euc-jp # pass check value to coder class $csv->coder->encode_check_value( Encode::FB_PERLQQ ); =head1 DESCRIPTION This module inherits L and is aware of input/output encodings. =head1 ENCODINGS Acceptable names of encodings (C, C and C) are depend upon its coder class (see to L). But these names should be based on L supported names. See to L and L. =head1 METHODS =head2 new $csv = Text::CSV::Encoded->new(); Text::CSV::Encoded->error_diag unless $csv; # report error message Creates a new Text::CSV::Encoded object. It can take all options of L. Of course, C option is always on. If Text::CSV::Encoded fails in constructing, you can get an error message using C. See to L. The following options are supported by this method: =over =item encoding The encoding of list data in below cases. * list data returned by fields() after successful parse(). * list data consumed by combine(). * list reference returned by getline(). * list reference taken by print(). See to L. =item encoding_in =item encoding_io_in =item encoding_to_parse The encoding for pre-parsing CSV strings. See to L. C is an alias to C. If both C and C are set at the same time, the C takes precedence. C is an alias to C. If both C and C are set at the same time, the C takes precedence. =item encoding_out =item encoding_io_out =item encoding_to_combine The encoding for combined CSV strings. See to L. C is an alias to C. If both C and C are set at the same time, the C takes precedence. C is an alias to C. If both C and C are set at the same time, the C takes precedence. =item coder_class A name of coder class that really decodes and encodes data. =back =head2 encoding_in $csv = $csv->encoding_in( $encoding ); The accessor to an encoding for pre-parsing CSV strings. If no encoding is given, returns current C<$encoding>, otherwise the object itself. $encoding = $csv->encoding_in() In C or C, the C<$csv> will assume CSV data as the given encoding. If C is not specified or is set with false value (L), it will assume input CSV strings as Unicode (not UTF-8) when L is used. $csv->encoding_in( undef ); # assume as Unicode when Text::CSV::Encoded::Coder::Encode is used. If you pass a list reference that contains multiple encodings to the method, the working are depend upon the coder class. For example, if you use the coder class with L, it might guess the encoding from the given list. $csv->coder_class( 'Text::CSV::Encoded::Coder::EncodeGuess' ); $csv->encoding_in( ['shiftjis', 'euc-jp', 'iso-20022-jp'] ); See to L and L. =head2 encoding_out $csv = $csv->encoding_out( $encoding ); The accessor to an encoding for converting combined CSV strings. If no encoding is given, returns current C<$encoding>, otherwise the object itself. $encoding = $csv->encoding_out(); In C or C, the C<$csv> will return a result string encoded in the given encoding. If C is not specified or is set with false value, it will return a result string as Unicode (not UTF-8). $csv->encoding_out( undef ); # return as Unicode when Text::CSV::Encoded::Coder::Encode is used. You must not pass a list reference to C, unlike C or C. =head2 encoding $csv = $csv->encoding( $encoding ); $encoding = $csv->encoding(); The accessor to an encoding for list data in the below cases. * list data returned by fields() after successful parse(). * list data consumed by combine(). * list reference returned by getline(). * list reference taken by print(). In other word, in C and C, C is an encoding of the returned list. And in C and C, it is assumed as an encoding for the passing list data. If C is not specified or is set with false value (C), the field data will be regarded as Unicode (when L is used). # ex.) a souce code is encoded in euc-jp, and print to stdout in shiftjis. @fields = ( .... ); $csv->encoding('euc-jp') ->encoding_to_combine('shiftjis') # same as encoding_out ->combine( @fields ); # from euc-jp to shift_jis print $csv->string; $csv->encoding('shiftjis') ->encoding_to_parse('shiftjis') # same as encoding_in ->parse( $csv->string ); # from shift_jis to shift_jis print join(", ", $csv->fields ); If you pass a list reference contains multiple encodings to the method, The working are depend upon the coder class. For example, L might guess the encoding from the given list. $csv->coder_class( 'Text::CSV::Encoded::Coder::EncodeGuess' ); $csv->encoding( ['ascii', 'ucs2'] )->combine( @cols ); See to L and L. =head2 parse/combine/getline/print $csv->parse( $encoded_string ); @unicode_array = $csv->fields(); $csv->combine( @unicode_array ); $encoded_string = $csv->string; $unicode_arrayref = $csv->getline( $io ); # get arrayref contains unicode strings $csv->print( $io, $unicode_arrayref ); # print $io with string encoded in $csv->encoded_in. $encoded_arrayref = $csv->getline( $io => $encoding ) # directly encoded in $encoding. Here is the relation of C, C and C. # CSV string => (getline/parsed) => Perl array # assumed as encoded in # encoding_in encoding # Perl array => (print/combined) => CSV string # assumed as encoded in # encoding encoding_out If you want to treat Perl array data as Unicode in Perl5.8 and later, don't specify C (or set C into C). =head2 decode $arrayref = $csv->decode( $encoding, $encoded_string ); $arrayref = $csv->decode( $string ); A short cut method to convert CSV to Perl. Without C<$encoding>, C<$string> is assumed as a Unicode. The returned value status is depend upon its coder class. With L, C<$arrayref> contains Unicode strings. =head2 encode $encoded_string = $csv->encode( $encoding, $arrayref ); $string = $csv->encode( $arrayref ); A short cut method to convert Perl to CSV. With L, C<$arrayref> is assumed to contain Unicode strings. Without C<$encoding>, return as is. =head2 coder_class $csv = $csv->coder_class( $classname ); $classname = $csv->coder_class(); Returns the coder class name. See to L. =head2 coder $coder = $csv->coder(); Returns a coder object. =head2 automtic_UTF8 In L version 0.99 and L version 1.30 or later, They return UNICODE stinrgs in case of parsing utf8 encoded text. Backend module has that feature, automatic_UTF8 returns true. (This method is for internal code.) =head1 CODER CLASS Text::CSV::Encoded delegates the encoding converting process to another module. Since version 5.8, Perl standardly has L module. So the default coder module L also uses it. In this case, you don't have to take care of it. In older Perl, the default is L. It does nothing. So you have to make a coder module using your favorite converting module, for example, L or L and so on. Please check L and L to make such a module. In calling L, you can set another coder module with C; use Text::CSV::Encoded coder_class => 'YourCoder'; This will call C module in runtime. =head2 Use Encode module Perl 5.8 or later, L use L as its backend engine. You can set C, C and C with L supported encodings. See to L and L. Without C (or set C), C/C/C return list data whose entries are C strings. On the contrary, C/C take data as C string list. About the extra methods C and C. C returns C string list and C takes C string list. But If no C<$encoding> is passed to C, it returns a non-Unicode CSV string for non-Unicode list data. =head2 Use Encode::Guess module If you don't know definitely input CSV data encoding (for parse/getline), L may be useful to you. It inherits from L, so you can treate methods and attributes as same as L. And it provides a guessing fucntion with L. When it is backend coder class, C and C can take a encoding list reference, and then it might guess the encoding from the given list. $csv->encoding_in( ['shiftjis', 'euc-jp'] )->parse( $sjis_or_eucjp_encoded_csv_string ); It is important to remember the guessing feature is not always successful. Or, the method can be applied to C. For exmaple, you want to convert data from Microsoft Excel to CSV. use Text::CSV::Encoded coder_class => 'Text::CSV::Encoded::Coder::EncodeGuess'; use Spreadsheet::ParseExcel; my $csv = Text::CSV::Encoded->new( eol => "\n" ); $csv->encoding( ['ucs2', 'ascii'] ); # guessing ucs2 or ascii? $csv->encoding_out('shiftjis'); # print in shift_jis my $excel = Spreadsheet::ParseExcel::Workbook->Parse( $file ); my $sheet = $excel->{Worksheet}->[0]; for my $row ( $sheet->{MinRow} .. $sheet->{MaxRow} ) { my @fields; for my $col ( $sheet->{MinCol} .. $sheet->{MaxCol} ) { my $cell = $sheet->{Cells}[$row][$col]; push @fields, $cell->{Val}; } $csv->print( \@fields ); } In this case, guessing for list data. After combining, you may have a need to clear C. Again remember that the feature is not always successful. In addtion, Microsoft Excel data converting is a carefult thing. See to L. =head2 Use XXX module Someone might make a new coder module in older version Perl... There is an example with L in L document. =head1 TODO =over =item More sophisticated tests - Welcome! =item Speed =back =head1 SEE ALSO L, L, L, L, L, L, L, L =head1 AUTHOR Makamaka Hannyaharamitu, Emakamaka[at]cpan.orgE The basic idea for this module and suggestions were given by H.Merijn Brand. He and Juerd advised me many points about documents and sources. =head1 COPYRIGHT AND LICENSE Copyright 2008-2013 by Makamaka Hannyaharamitu This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Text-CSV-Encoded-0.22/lib/Text/CSV/Encoded/0000755000175000017500000000000012156255623016536 5ustar reonreonText-CSV-Encoded-0.22/lib/Text/CSV/Encoded/Coder/0000755000175000017500000000000012156255623017572 5ustar reonreonText-CSV-Encoded-0.22/lib/Text/CSV/Encoded/Coder/Base.pm0000644000175000017500000000763112155773734021020 0ustar reonreonpackage Text::CSV::Encoded::Coder::Base; use strict; BEGIN { $Text::CSV::Encoded::Coder::Base::VERSION = '0.03'; } sub new { my $class = shift; my %opt = @_; bless { %opt }, $class; } sub upgrade { 0; } sub encode { my ( $self, $encoding, $str ) = @_; $str; } sub decode { my ( $self, $encoding, $str ) = @_; $str; } sub decode_fields_ref { my ( $self, $encoding, $arrayref ) = @_; } sub encode_fields_ref { my ( $self, $encoding, $arrayref ) = @_; } sub encode_check_value { $_[0]->{ encode_check_value } = $_[1] if @_ > 1; $_[0]->{ encode_check_value } || 0; } sub decode_check_value { $_[0]->{ decode_check_value } = $_[1] if @_ > 1; $_[0]->{ decode_check_value } || 0; } 1; __END__ =pod =head1 NAME Text::CSV::Encoded::Coder::Base - Interface for Text::CSV::Encoded coder base class =head1 SYNOPSIS package Text::CSV::Encoded::Coder::YourCoder; use base qw( Text::CSV::Encoded::Coder::Base ); sub decode { ... } sub encode { ... } sub upgrade { ... } sub decode_fields_ref { ... } sub encode_fields_ref { ... } =head1 DESCRIPTION This module is used by L internally. =head1 INTERFACS =head2 decode ( $self, $encoding, $str ) = @_; .... return $decoded_str; Takes an encoding and a CSV string. It must return a Perl string decoded in C<$encoding>. In Perl 5.8 or later, if $enc is C or false, the encoding should be utf8. =head2 encode ( $self, $encoding, $str ) = @_; .... return $encoded_str; Takes an encoding and a Perl string. It must return a CSV string encoded in C<$encoding>. In Perl 5.8 or later, if $enc is C or false, the encoding should be utf8. =head2 decode_fields_ref ( $self, $encoding, $arrayref ) = @_; Takes an encoding and an array reference. It must decoded each array entries in $encoding. =head2 encode_fields_ref ( $self, $encoding, $arrayref ) = @_; Takes an encoding and an array reference. It must encoded each array entries in $encoding. =head2 upgrade ( $self, $str ) = @_; In Perl 5.8 or later, it is expected to do C against $str. In older versions, this method may be meaningless and there is no need to implement. See to L. =head2 encode_check_value Setter/Getter for an argument passing to encode. $coder->encode_check_value( Encode::FB_PERLQQ ); =head2 decode_check_value Setter/Getter for an argument passing to decode. $coder->encode_check_value( Encode::FB_PERLQQ ); =head1 EXAMPLE Use with L. package Text::CSV::Encoded::Coder::Jcode; use strict; use base qw( Text::CSV::Encoded::Coder::Base ); use Jcode (); my $Jcode = Jcode->new; my %alias = ( 'shiftjis' => 'sjis', 'euc-jp' => 'euc', 'sjis' => 'sjis', 'euc' => 'euc', ); sub decode { my ( $self, $encoding, $str ) = @_; my $enc = $alias{ $encoding }; $Jcode->set( $str, $enc )->euc; } sub encode { my ( $self, $encoding, $str ) = @_; my $enc = $alias{ $encoding }; $Jcode->set( $str, 'euc' )->$enc(); } sub decode_fields_ref { my ( $self, $encoding, $arrayref ) = @_; my $enc = $alias{ $encoding }; for ( @$arrayref ) { $_ = $Jcode->set( $_, $enc )->euc; } } sub encode_fields_ref { my ( $self, $encoding, $arrayref ) = @_; my $enc = $alias{ $encoding }; for ( @$arrayref ) { $_ = $Jcode->set( $_, 'euc' )->$enc(); } } =head1 AUTHOR Makamaka Hannyaharamitu, Emakamaka[at]cpan.orgE =head1 COPYRIGHT AND LICENSE Copyright 2008-2013 by Makamaka Hannyaharamitu This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Text-CSV-Encoded-0.22/lib/Text/CSV/Encoded/Coder/EncodeGuess.pm0000644000175000017500000000656412155773734022356 0ustar reonreonpackage Text::CSV::Encoded::Coder::EncodeGuess; use 5.008; use strict; use warnings; use base qw( Text::CSV::Encoded::Coder::Encode ); use Carp (); use Encode (); use Encode::Guess; our $VERSION = '0.04'; sub decode { my ( $self, $encoding, $str ) = @_; return undef unless defined $str; if ( ref $encoding ) { my $enc = Encode::Guess::guess_encoding( $str, @$encoding ); $enc = $self->find_encoding( $encoding->[0] ) unless ref $enc; return $enc->decode( $str, $self->decode_check_value ); } $self->find_encoding( $encoding )->decode( $str, $self->decode_check_value ); } sub decode_fields_ref { my ( $self, $encoding, $arrayref ) = @_; if ( ref $encoding ) { for ( @$arrayref ) { my $enc = Encode::Guess::guess_encoding( $_, @$encoding ); $enc = $self->find_encoding( $encoding->[0] ) unless ref $enc; $_ = $enc->decode( $_, $self->decode_check_value ); } } else { my $enc = $self->find_encoding( $encoding ) || return; for ( @$arrayref ) { $_ = $enc->decode( $_, $self->decode_check_value ); } } } 1; __END__ =pod =head1 NAME Text::CSV::Encoded::Coder::EncodeGuess - Text::CSV::Encoded coder class using Encode::Guess =head1 SYNOPSIS use Text::CSV::Encoded coder_class => 'Text::CSV::Encoded::Coder::EncodeGuess'; use Spreadsheet::ParseExcel; my $csv = Text::CSV::Encoded->new(); $csv->encoding( ['ucs2', 'ascii'] ); # guessing ucs2 or ascii? $csv->encoding_to_combine('shiftjis'); my $excel = Spreadsheet::ParseExcel::Workbook->Parse( $file ); my $sheet = $excel->{Worksheet}->[0]; for my $row ( $sheet->{MinRow} .. $sheet->{MaxRow} ) { my @fields; for my $col ( $sheet->{MinCol} .. $sheet->{MaxCol} ) { my $cell = $sheet->{Cells}[$row][$col]; push @fields, $cell->{Val}; } $csv->combine( @fields ) or die; print $csv->string, "\n"; } =head1 DESCRIPTION This module is inherited from L. =head1 USE Except for 2 attributes, same as L. =head2 encoding_in $csv = $csv->encoding_in( $encoding_list_ref ); The accessor to an encoding for pre-parsing CSV strings. If no encoding is given, returns current C<$encoding>, otherwise the object itself. $encoding_list_ref = $csv->encoding_in() When you pass a list reference, it might guess the encoding from the given list. $csv->encoding_in( ['shiftjis', 'euc-jp', 'iso-20022-jp'] ); If it cannot guess the encoding, the first encoding of the list is used. =head2 encoding $csv = $csv->encoding( $encoding_list_ref ); $encoding_list_ref = $csv->encoding(); You can pass a list reference to this attribute only: * For list data consumed by combine(). * For list reference returned by getline(). In other word, in C and C, it might guess an encoding for the passing list data. If it cannot guess the encoding, the first encoding of the list is used. =head1 SEE ALSO L, L =head1 AUTHOR Makamaka Hannyaharamitu, Emakamaka[at]cpan.orgE =head1 COPYRIGHT AND LICENSE Copyright 2008-2013 by Makamaka Hannyaharamitu This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Text-CSV-Encoded-0.22/lib/Text/CSV/Encoded/Coder/Encode.pm0000644000175000017500000000413412155773734021336 0ustar reonreonpackage Text::CSV::Encoded::Coder::Encode; use 5.008; use strict; use warnings; use base qw( Text::CSV::Encoded::Coder::Base ); use Carp (); use Encode (); our $VERSION = '0.05'; my %EncoderCache; sub upgrade { utf8::upgrade( $_[1] ) if ( $_[1] ); } sub encode { my ( $self, $encoding, $str ) = @_; return undef unless defined $str; $self->find_encoding( $encoding )->encode( $str, $self->encode_check_value ); } sub decode { my ( $self, $encoding, $str ) = @_; return undef unless defined $str; if ( $self->{automatic_UTF8} and $encoding =~ /utf-?8/i ) { return $str; } $self->find_encoding( $encoding )->decode( $str, $self->decode_check_value ); } sub decode_fields_ref { my ( $self, $encoding, $arrayref ) = @_; if ( $self->{automatic_UTF8} and $encoding =~ /utf-?8/i ) { return; } my $enc = $self->find_encoding( $encoding ) || return; for ( @$arrayref ) { $_ = $enc->decode( $_, $self->decode_check_value ) if defined $_; } } sub encode_fields_ref { my ( $self, $encoding, $arrayref ) = @_; my $enc = $self->find_encoding( $encoding ) || return; for ( @$arrayref ) { $_ = $enc->encode( $_, $self->encode_check_value ) if defined $_; } } sub find_encoding { shift; $EncoderCache { ($_[0] || 'utf8') } ||= Encode::find_encoding( $_[0] || 'utf8' ) || Carp::croak ( "Not found such an encoding name '$_[0]'" ); } 1; __END__ =pod =head1 NAME Text::CSV::Encoded::Coder::Encode - Text::CSV::Encoded coder class using Encode =head1 SYNOPSIS use Text::CSV::Encoded coder_class => 'Text::CSV::Encoded::Coder::Encode'; # In Perl 5.8 or later, it is a default module. =head1 DESCRIPTION This module is used by L internally. =head1 SEE ALSO L, L, L, L =head1 AUTHOR Makamaka Hannyaharamitu, Emakamaka[at]cpan.orgE =head1 COPYRIGHT AND LICENSE Copyright 2008-2013 by Makamaka Hannyaharamitu This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Text-CSV-Encoded-0.22/Makefile.PL0000644000175000017500000000173112156255436015026 0ustar reonreonuse 5.00503; use ExtUtils::MakeMaker; use lib qw( ./lib ); # See lib/ExtUtils/MakeMaker.pm for details of how to influence # the contents of the Makefile that is written. WriteMakefile( 'NAME' => 'Text::CSV::Encoded', 'VERSION_FROM' => 'lib/Text/CSV/Encoded.pm', # finds $VERSION 'PREREQ_PM' => { 'IO::Handle' => 0, 'Test::More' => 0, 'Test::Harness' => 0, 'Text::CSV' => 1.31, }, ($] >= 5.005 ? ## Add these new keywords supported since 5.005 (ABSTRACT_FROM => 'lib/Text/CSV/Encoded.pm', # retrieve abstract from module AUTHOR => 'Makamaka Hannyaharamitu, Emakamaka[at]cpan.orgE') : ()), ( $ExtUtils::MakeMaker::VERSION >= 6.3002 ? ('LICENSE' => 'perl', ) : () ), ( $ExtUtils::MakeMaker::VERSION >= 6.46 ? ( 'META_MERGE' => { resources => { repository => 'http://github.com/makamaka/Text-CSV-Encoded', }, } ) : () ), ); Text-CSV-Encoded-0.22/META.json0000664000175000017500000000214012156255623014470 0ustar reonreon{ "abstract" : "Encoding aware Text::CSV.", "author" : [ "Makamaka Hannyaharamitu, Emakamaka[at]cpan.orgE" ], "dynamic_config" : 1, "generated_by" : "ExtUtils::MakeMaker version 6.62, CPAN::Meta::Converter version 2.130880", "license" : [ "perl_5" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : "2" }, "name" : "Text-CSV-Encoded", "no_index" : { "directory" : [ "t", "inc" ] }, "prereqs" : { "build" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "configure" : { "requires" : { "ExtUtils::MakeMaker" : "0" } }, "runtime" : { "requires" : { "IO::Handle" : "0", "Test::Harness" : "0", "Test::More" : "0", "Text::CSV" : "1.31" } } }, "release_status" : "stable", "resources" : { "repository" : { "url" : "http://github.com/makamaka/Text-CSV-Encoded" } }, "version" : "0.22" } Text-CSV-Encoded-0.22/README0000644000175000017500000000052312155714455013732 0ustar reonreonText::CSV::Encoded ======================== encoding-handling Text::CSV INSTALLATION To install this module type the following: perl Makefile.PL make make test make install COPYRIGHT AND LICENSE This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.