Excel-ValueReader-XLSX-1.14000755000000000000 014621334531 16021 5ustar00unknownunknown000000000000Excel-ValueReader-XLSX-1.14/Build.PL000444000000000000 231614522257156 17463 0ustar00unknownunknown000000000000use strict; use warnings; use Module::Build; my $builder = Module::Build->new( module_name => 'Excel::ValueReader::XLSX', license => 'perl', dist_author => 'Laurent Dami ', dist_version_from => 'lib/Excel/ValueReader/XLSX.pm', requires => { 'perl' => "5.012001", 'utf8' => 0, 'Carp' => 0, 'Archive::Zip' => "1.61", 'Module::Load' => 0, 'Moose' => 0, 'Date::Calc' => 0, 'POSIX' => 0, 'Scalar::Util' => 0, }, recommends => { 'XML::LibXML::Reader' => 0, }, test_requires => { 'Test::More' => "1.302195", 'List::Util' => 0, 'List::MoreUtils' => 0, 'Module::Load::Conditional' => "0.66", 'Clone' => 0, }, add_to_cleanup => [ 'Excel-ValueReader-XLSX-*' ], meta_merge => { resources => { repository => 'https://github.com/damil/Excel-ValueReader-XLSX', } }, ); $builder->create_build_script(); Excel-ValueReader-XLSX-1.14/Changes000444000000000000 240614621334124 17451 0ustar00unknownunknown000000000000Revision history for Excel-ValueReader-XLSX 1.14 16.05.2023 - better error message for invalid table name 1.13 06.11.2023 - minor fixes: typos, source improvements, avoid dependencies on unsafe modules (Merijn Brand++) 1.12 06.11.2023 - fix bug in test suite (useless attempt to open a file in RW mode) 1.11 05.11.2023 - new() can take a filehandle instead of a filename - additional method ->active_sheet - minimal perl version is now 5.12 because of a dependency chain Moose/Sub::Exporter/Data::OptList 1.10 12.02.2023 - support cells without an 'r' attribute (David Flink++) 1.09 22.01.2023 - fix bug on parsing empty string nodes with LibXML (ulibuck++) 1.08 11.02.2022 - added support to parse Excel tables 1.07 31.12.2021 - oops, forgot a file in MANIFEST 1.06 30.12.2021 - fix 2 bugs signaled by https://github.com/ulibuck 1.05 17.12.2021 - suppress warnings when a style is applied to a non-numeric cell 1.04 25.08.2021 - hack to avoid floating-point imprecisions in computing time values 1.03 19.08.2021 - fix failures from cpantesters 1.02 18.08.2021 - added support for decoding dates 1.01 01.08.2021 - bug fix : properly handle strings with embedded newlines characters (David Flink++) 1.0 31.05.2020 - initial release Excel-ValueReader-XLSX-1.14/MANIFEST000444000000000000 55114372011100 17253 0ustar00unknownunknown000000000000benchmark.pl Build.PL Changes lib/Excel/ValueReader/XLSX.pm lib/Excel/ValueReader/XLSX/Backend.pm lib/Excel/ValueReader/XLSX/Backend/LibXML.pm lib/Excel/ValueReader/XLSX/Backend/Regex.pm MANIFEST This list of files META.json META.yml README.md t/ulibuck.xlsx t/valuereader.t t/valuereader.xlsx t/valuereader1904.xlsx t/Mappe1.xlsx t/cells_without_r_attr.xlsx Excel-ValueReader-XLSX-1.14/META.json000444000000000000 417614621334531 17607 0ustar00unknownunknown000000000000{ "abstract" : "extracting values from Excel workbooks in XLSX format, fast", "author" : [ "Laurent Dami " ], "dynamic_config" : 1, "generated_by" : "Module::Build version 0.4234", "license" : [ "perl_5" ], "meta-spec" : { "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", "version" : 2 }, "name" : "Excel-ValueReader-XLSX", "prereqs" : { "configure" : { "requires" : { "Module::Build" : "0.42" } }, "runtime" : { "recommends" : { "XML::LibXML::Reader" : "0" }, "requires" : { "Archive::Zip" : "1.61", "Carp" : "0", "Date::Calc" : "0", "Module::Load" : "0", "Moose" : "0", "POSIX" : "0", "Scalar::Util" : "0", "perl" : "5.012001", "utf8" : "0" } }, "test" : { "requires" : { "Clone" : "0", "List::MoreUtils" : "0", "List::Util" : "0", "Module::Load::Conditional" : "0.66", "Test::More" : "1.302195" } } }, "provides" : { "Excel::ValueReader::XLSX" : { "file" : "lib/Excel/ValueReader/XLSX.pm", "version" : "1.14" }, "Excel::ValueReader::XLSX::Backend" : { "file" : "lib/Excel/ValueReader/XLSX/Backend.pm", "version" : "1.13" }, "Excel::ValueReader::XLSX::Backend::LibXML" : { "file" : "lib/Excel/ValueReader/XLSX/Backend/LibXML.pm", "version" : "1.13" }, "Excel::ValueReader::XLSX::Backend::Regex" : { "file" : "lib/Excel/ValueReader/XLSX/Backend/Regex.pm", "version" : "1.13" } }, "release_status" : "stable", "resources" : { "license" : [ "http://dev.perl.org/licenses/" ], "repository" : { "url" : "https://github.com/damil/Excel-ValueReader-XLSX" } }, "version" : "1.14", "x_serialization_backend" : "JSON::PP version 4.16" } Excel-ValueReader-XLSX-1.14/META.yml000444000000000000 260014621334531 17425 0ustar00unknownunknown000000000000--- abstract: 'extracting values from Excel workbooks in XLSX format, fast' author: - 'Laurent Dami ' build_requires: Clone: '0' List::MoreUtils: '0' List::Util: '0' Module::Load::Conditional: '0.66' Test::More: '1.302195' configure_requires: Module::Build: '0.42' dynamic_config: 1 generated_by: 'Module::Build version 0.4234, CPAN::Meta::Converter version 2.150010' license: perl meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.4.html version: '1.4' name: Excel-ValueReader-XLSX provides: Excel::ValueReader::XLSX: file: lib/Excel/ValueReader/XLSX.pm version: '1.14' Excel::ValueReader::XLSX::Backend: file: lib/Excel/ValueReader/XLSX/Backend.pm version: '1.13' Excel::ValueReader::XLSX::Backend::LibXML: file: lib/Excel/ValueReader/XLSX/Backend/LibXML.pm version: '1.13' Excel::ValueReader::XLSX::Backend::Regex: file: lib/Excel/ValueReader/XLSX/Backend/Regex.pm version: '1.13' recommends: XML::LibXML::Reader: '0' requires: Archive::Zip: '1.61' Carp: '0' Date::Calc: '0' Module::Load: '0' Moose: '0' POSIX: '0' Scalar::Util: '0' perl: '5.012001' utf8: '0' resources: license: http://dev.perl.org/licenses/ repository: https://github.com/damil/Excel-ValueReader-XLSX version: '1.14' x_serialization_backend: 'CPAN::Meta::YAML version 0.018' Excel-ValueReader-XLSX-1.14/README.md000444000000000000 11013664571607 17422 0ustar00unknownunknown000000000000# Excel-ValueReader-XLSX Extracting values from Excel workbooks -- fast Excel-ValueReader-XLSX-1.14/benchmark.pl000444000000000000 542414363165626 20464 0ustar00unknownunknown000000000000use utf8; use strict; use warnings; use Getopt::Long; use Excel::ValueReader::XLSX; use Excel::Reader::XLSX; use Spreadsheet::ParseXLSX; use Data::XLSX::Parser; # options de la ligne de commande GetOptions \my %opt, 'xl_file=s', # fichier Excel des comparaisons 'valuereader!', 'vrlibxml!', 'reader!', 'parsexlsx!', 'xparser!', ; my ($start, $cpu, $system) = (time, times); valuereader($opt{xl_file}) if $opt{valuereader}; vrlibxml($opt{xl_file}) if $opt{vrlibxml}; reader($opt{xl_file}) if $opt{reader}; parsexlsx($opt{xl_file}) if $opt{parsexlsx}; xparser($opt{xl_file}) if $opt{xparser}; my ($end, $ecpu, $esystem) = (time, times); printf "%d elapsed, %d cpu, %d system\n", $end-$start, $ecpu-$cpu, $esystem-$system; sub valuereader { my $xl_file = shift; warn "using ValueReader\n"; my $rv = Excel::ValueReader::XLSX->new(xlsx => $xl_file); foreach my $sheet_name ($rv->sheet_names) { my $vals = $rv->values($sheet_name); my $n_rows = @$vals; warn "sheet $sheet_name has $n_rows rows\n"; } } sub vrlibxml { my $xl_file = shift; warn "using ValueReader with LibXML\n"; my $rv = Excel::ValueReader::XLSX->new(xlsx => $xl_file, using => 'LibXML'); foreach my $sheet_name ($rv->sheet_names) { my $vals = $rv->values($sheet_name); my $n_rows = @$vals; warn "sheet $sheet_name has $n_rows rows\n"; } } sub reader { my $xl_file = shift; warn "using Excel::Reader::XLSX\n"; my $reader = Excel::Reader::XLSX->new(); my $workbook = $reader->read_file($xl_file); for my $worksheet ( $workbook->worksheets() ) { my $sheet_name = $worksheet->name(); my @rows; while ( my $row = $worksheet->next_row() ) { my @row; while ( my $cell = $row->next_cell() ) { push @row, $cell->value(); } push @rows, \@row; } my $n_rows = @rows; warn "sheet $sheet_name has $n_rows rows\n"; } } sub parsexlsx { my $xl_file = shift; warn "using Spreadsheet::ParseXLSX\n"; my $parser = Spreadsheet::ParseXLSX->new(); my $workbook = $parser->parse($xl_file) or die $parser->error; for my $worksheet ( $workbook->worksheets() ) { my $sheet_name = $worksheet->get_name(); my ( $row_min, $row_max ) = $worksheet->row_range(); warn "sheet $sheet_name has $row_max rows\n"; } } sub xparser { my $xl_file = shift; warn "using Data::XLSX::Parser\n"; my $parser = Data::XLSX::Parser->new; my @rows; $parser->add_row_event_handler(sub { my ($row) = @_; push @rows, $row; }); $parser->open($xl_file); foreach my $sheet_name ($parser->workbook->names) { @rows = (); $parser->sheet_by_rid( "rId" . $parser->workbook->sheet_id( $sheet_name ) ); my $n_rows = @rows; warn "sheet $sheet_name has $n_rows rows\n"; @rows = (); } } Excel-ValueReader-XLSX-1.14/lib000755000000000000 014621334531 16567 5ustar00unknownunknown000000000000Excel-ValueReader-XLSX-1.14/lib/Excel000755000000000000 014621334531 17627 5ustar00unknownunknown000000000000Excel-ValueReader-XLSX-1.14/lib/Excel/ValueReader000755000000000000 014621334531 22026 5ustar00unknownunknown000000000000Excel-ValueReader-XLSX-1.14/lib/Excel/ValueReader/XLSX.pm000444000000000000 5312414621334475 23353 0ustar00unknownunknown000000000000package Excel::ValueReader::XLSX; use 5.12.1; use utf8; use Moose; use Module::Load qw/load/; use Date::Calc qw/Add_Delta_Days/; use POSIX qw/strftime modf/; use Carp qw/croak/; our $VERSION = '1.14'; #====================================================================== # ATTRIBUTES #====================================================================== # PUBLIC ATTRIBUTES has 'xlsx' => (is => 'ro', isa => 'Str|FileHandle', required => 1); # path of xlsx file has 'using' => (is => 'ro', isa => 'Str', default => 'Regex'); # name of backend class has 'date_format' => (is => 'ro', isa => 'Str', default => '%d.%m.%Y'); has 'time_format' => (is => 'ro', isa => 'Str', default => '%H:%M:%S'); has 'datetime_format' => (is => 'ro', isa => 'Str', builder => '_datetime_format', lazy => 1); has 'date_formatter' => (is => 'ro', isa => 'Maybe[CodeRef]', builder => '_date_formatter', lazy => 1); # ATTRIBUTES USED INTERNALLY, NOT DOCUMENTED has 'backend' => (is => 'ro', isa => 'Object', init_arg => undef, builder => '_backend', lazy => 1, handles => [qw/values base_year sheets active_sheet/]); #====================================================================== # BUILDING #====================================================================== # syntactic sugar for supporting ->new($path) instead of ->new(xlsx => $path) around BUILDARGS => sub { my $orig = shift; my $class = shift; return ( @_ == 1 && !ref $_[0] ) ? $class->$orig(xlsx => $_[0]) : $class->$orig(@_); }; #====================================================================== # ATTRIBUTE CONSTRUCTORS #====================================================================== sub _backend { my $self = shift; my $backend_class = ref($self) . '::Backend::' . $self->using; load $backend_class; return $backend_class->new(frontend => $self); } sub _datetime_format { my ($self) = @_; return $self->date_format . ' ' . $self->time_format; } sub _date_formatter { my ($self) = @_; # local copies of the various formats so that we can build a closure my @formats = (undef, # 0 -- error $self->date_format, # 1 -- just a date $self->time_format, # 2 -- just a time $self->datetime_format); # 3 -- date and time my $strftime_formatter = sub { my ($xl_date_format, $y, $m, $d, $h, $min, $s, $ms) = @_; # choose the proper format for strftime my $ix = 0; # index into the @formats array $ix += 1 if $xl_date_format =~ /[dy]/; # the Excel format contains a date portion $ix += 2 if $xl_date_format =~ /[hs]/; # the Excel format contains a time portion my $strftime_format = $formats[$ix] or die "cell with unexpected Excel date format : $xl_date_format"; # formatting through strftime my $formatted_date = strftime($strftime_format, $s, $min, $h, $d, $m-1, $y-1900); return $formatted_date; }; return $strftime_formatter; } #====================================================================== # GENERAL METHODS #====================================================================== sub sheet_names { my ($self) = @_; my $sheets = $self->sheets; # arrayref of shape {$name => $sheet_position} my @sorted_names = sort {$sheets->{$a} <=> $sheets->{$b}} keys %$sheets; return @sorted_names; } sub A1_to_num { # convert Excel A1 reference format to a number my ($self, $string) = @_; # ordinal number for character just before 'A' state $base = ord('A') - 1; # iterate on 'digits' (letters of the A1 cell reference) my $num = 0; foreach my $digit (map {ord($_) - $base} split //, $string) { $num = $num*26 + $digit; } return $num; } sub formatted_date { my ($self, $val, $date_format, $date_formatter) = @_; # separate date (integer part) from time (fractional part) my ($time, $n_days) = modf($val); # Convert $n_days into a date in Date::Calc format (year, month, day). # The algorithm is quite odd because in the 1900 system, 01.01.1900 == 0 while # in the 1904 system, 01.01.1904 == 1; furthermore, in the 1900 system, # Excel treats 1900 as a leap year. my $base_year = $self->base_year; if ($base_year == 1900) { my $is_after_february_1900 = $n_days > 60; $n_days -= $is_after_february_1900 ? 2 : 1; } my @d = Add_Delta_Days($base_year, 1, 1, $n_days); # decode the fractional part (the time) into hours, minutes, seconds, milliseconds my @t; foreach my $subdivision (24, 60, 60, 1000) { $time *= $subdivision; ($time, my $time_portion) = modf($time); push @t, $time_portion; } # dirty hack to deal with float imprecisions : if 999 millisecs, round to the next second my ($h, $m, $s, $ms) = @t; if ($ms == 999) { $s += 1, $ms = 0; if ($s == 60) { $m += 1, $s = 0; if ($m == 60) { $h += 1, $m = 0; } } } # NOTE : because of this hack, theoretically we could end up with a value # like 01.01.2000 24:00:00, semantically equal to 02.01.2000 00:00:00 but different # in its rendering. # call the date_formatter subroutine $date_formatter //= $self->date_formatter or die ref($self) . " has no date_formatter subroutine"; my $formatted_date = $date_formatter->($date_format, @d, $h, $m, $s, $ms); return $formatted_date; } #====================================================================== # METHODS FOR PARSING EXCEL TABLES #====================================================================== sub table_names { my ($self) = @_; my $table_info = $self->backend->table_info; # sort on table id (field [1] in table_info arrayrefs) my @table_names = sort {$table_info->{$a}[1] <=> $table_info->{$b}->[1]} keys %$table_info; return @table_names; } # info fields returned from the backend parsing methods my @table_info_fields = qw/sheet table_id ref columns no_headers/; # the same fields are also the valid args for the method call my $is_valid_arg = "^(" . join("|", @table_info_fields) . ")\$"; sub table { my $self = shift; # syntactic sugar : ->table('foo') is treated as ->table(name => 'foo') my %args = @_ == 1 ? (name => $_[0]) : @_; # if called with a table name, derive all other args from internal workbook info if (my $table_name = delete $args{name}) { !$args{$_} or croak "table() : arg '$_' is incompatible with 'name'" for @table_info_fields; my $table_info = $self->backend->table_info->{$table_name} or croak sprintf "Excel file '%s' contains no table named '%s'", $self->xlsx, $table_name; @args{@table_info_fields} = @$table_info; } # check args my @invalid_args = grep {!/$is_valid_arg/} keys %args; croak "invalid args to table(): " . join ", ", @invalid_args if @invalid_args; # get raw values from the sheet my $values = $self->values($args{sheet}); # restrict values to the table subrange (if applicable) $values = $self->_subrange($values, $args{ref}) if $args{ref}; # take headers from first row if not already given in $args{columns} $args{columns} //= $values->[0]; # if this table has headers (which is almost always the case), drop the header row shift @$values unless $args{no_headers}; # build a table of hashes. This could be done with a simple map(), but using a loop # avoids to store 2 copies of cell values in memory : @$values is shifted when @table is pushed. my @cols = @{$args{columns}}; croak "table contains undefined columns" if grep {!defined $_} @cols; my @rows; while (my $vals = shift @$values) { my %row; @row{@cols} = @$vals; push @rows, \%row; } # in scalar context, just return the rows. In list context, also return the column names return wantarray ? (\@cols, \@rows) : \@rows; } sub _subrange { my ($self, $values, $ref) = @_; # parse rows and columns from the $ref string (of shape like for example "A1:D34") my ($col1, $row1, $col2, $row2) = $ref =~ /^([A-Z]+)(\d+):([A-Z]+)(\d+)$/ or croak "_subrange : invalid ref: $ref"; # restrict to the row range if ($row1 > 1 || $row2 < @$values){ $values = [ @$values[$row1-1 .. $row2-1] ]; } # restrict to the column range my @col_nums = map {$self->A1_to_num($_) - 1} ($col1, $col2); if ($col_nums[0] > 1){ my @col_range = ($col_nums[0] .. $col_nums[1]); $values = [map { [ @$_[@col_range] ]} @$values]; } return $values; } 1; __END__ =head1 NAME Excel::ValueReader::XLSX - extracting values from Excel workbooks in XLSX format, fast =head1 SYNOPSIS my $reader = Excel::ValueReader::XLSX->new(xlsx => $filename_or_handle); # .. or with syntactic sugar : my $reader = Excel::ValueReader::XLSX->new($filename_or_handle); # .. or with LibXML backend : my $reader = Excel::ValueReader::XLSX->new(xlsx => $filename_or_handle, using => 'LibXML'); foreach my $sheet_name ($reader->sheet_names) { my $grid = $reader->values($sheet_name); my $n_rows = @$grid; print "sheet $sheet_name has $n_rows rows; ", "first cell contains : ", $grid->[0][0]; } foreach my $table_name ($reader->table_names) { my ($columns, $rows) = $reader->table($table_name); my $n_data_rows = @$rows; my $n_columns = @$columns; print "table $table_name has $n_data_rows rows and $n_columns columns; ", "column 'foo' in first row contains : ", $rows->[0]{foo}; } my $first_grid = $reader->values(1); # if using numerical indices, start at 1 =head1 DESCRIPTION =head2 Purpose This module reads the contents of an Excel file in XLSX format. Unlike other modules like L or L, there is no support for reading formulas, formats or other Excel internal information; all you get are plain values -- but you get them much faster ! Besides, this module also has support for parsing Excel tables. =head2 Backends Two different backends may be used for extracting values : =over =item Regex this backend uses regular expressions to parse the XML content. =item LibXML this backend uses L to parse the XML content. It is probably safer but about three times slower than the Regex backend (but still much faster than L). =back The default is the C backend. =head2 Sheet numbering Although worksheets are usually accessed by name, they may also be accessed by numerical indices, I. Some other Perl parsing modules use a different convention, where the first sheet has index 0. Here index 1 was chosen to be consistent with the common API for "collections" in Microsoft Office object model. =head1 METHODS =head2 new my $reader = Excel::ValueReader::XLSX->new(xlsx => $filename_or_handle, using => $backend, %date_formatting_options); The C argument is mandatory and points to the C<.xlsx> file to be parsed, or is an open filehandle. The C argument is optional; it specifies the backend to be used for parsing; default is 'Regex'. As syntactic sugar, a shorter form is admitted : my $reader = Excel::ValueReader::XLSX->new($filename_or_handle); Optional parameters for formatting date and time values are described in the L section below. =head2 sheet_names my @sheets = $reader->sheet_names; Returns the list of worksheet names, in the same order as in the Excel file. =head2 active_sheet my $active_sheet_number = $reader->active_sheet; Returns the numerical index (starting at 1) of the sheet that was active when the file was last saved. May return C. =head2 values my $grid = $reader->values($sheet); Returns a bidimensional array of scalars, corresponding to cell values in the specified worksheet. The C<$sheet> argument can be either a sheet name or a sheet position (starting at 1). Unlike the original Excel cells, positions in the grid are zero-based, so for example the content of cell B3 is in C<< $grid->[1][2] >>. The grid is sparse : the size of each row depends on the position of the last non-empty cell in that row. Thanks to Perl's auto-vivification mechanism, any attempt to access a non-existent cell will automatically create the corresponding cell within the grid. The number of rows and columns in the grid can be computed like this : my $nb_rows = @$grid; my $nb_cols = max map {scalar @$_} @$grid; # must import List::Util::max =head2 table_names my @table_names = $reader->table_names; Returns the list of names of tables registered in this workbook. =head2 table my $rows = $reader->table(name => $table_name); # or just : $reader->table($table_name) # or my ($columns, $rows) = $reader->table(name => $table_name); # or my ($columns, $rows) = $reader->table(sheet => $sheet [, ref => $range] [, columns => \@columns] [, no_headers => 1] ); In its simplest form, this method returns the content of an Excel table referenced by its table name (in Excel, the table name appears and can be modified through the ribbon tab entry "Table tools / Design"). The table name is passed either through the named argument C, or positionally as unique argument to the method. Rows are returned as hashrefs, where keys of the hashes correspond to column names in the table. In scalar context, the method just returns an arrayref to the list of rows. In list context, the method returns a pair, where the first element is an arrayref of column names, and the second element is an arrayref to the list of rows. Instead of specifying a table name, it is also possible to give a sheet name or sheet number. By default, this considers the whole sheet content as a single table, where column names are on the first row. However, additional arguments can be supplied to change the default behaviour : =over =item ref a specific range of cells within the sheet that contain the table rows and columns. The range must be expressed using traditional Excel notation, like for example C<"C9:E23"> (columns 3 to 5, rows 9 to 23). =item columns an arrayref containing the list of column names. If absent, column names will be taken from the first row in the table. =item no_headers if true, the first row in the table will be treated as a regular data row, instead of being treated as a list of column names. In that case, since column names cannot be inferred from cell values in the first row, the C argument to the method must be present. =back =head1 AUXILIARY METHODS =head1 A1_to_num my $col_num = $reader->A1_to_num('A'); # 1 $col_num = $reader->A1_to_num('AZ'); # 52 $col_num = $reader->A1_to_num('AA'); # 26 $col_num = $reader->A1_to_num('ABC'); # 731 Converts a column expressed as a sequence of capital letters (in Excel's "A1" notation) into the corresponding numeric value. =head1 formatted_date my $date = $reader->formatted_date($numeric_date, $excel_date_format); Given a numeric date, this method returns a string date formatted according to the I routine explained in the next section. The C<$excel_date_format> argument should be the Excel format string for that specific cell; it is used only for for deciding if the numeric value should be presented as a date, as a time, or both. Optionally, a custom date formatter callback could be passed as third argument. =head1 DATE AND TIME FORMATS =head2 Date and time handling In Excel, date and times values are stored as numeric values, where the integer part represents the date, and the fractional part represents the time. What distinguishes such numbers from ordinary numbers is the I applied to the cells where they appear. Numeric formats in Excel are complex to reproduce, in particular because they are locale-dependent; therefore the present module does not attempt to faithfully interpret Excel formats. It just infers from formats which cells should be presented as date and/or time values. All such values are then presented through the same I routine. The default formatter is based on L; other behaviours may be specified through the C parameter (explained below). =head2 Parameters for the default strftime formatter When using the default strftime formatter, the following parameters may be passed to the constructor : =over =item date_format The L format for representing dates. The default is C<%d.%m.%Y>. =item time_format The L format for representing times. The default is C<%H:%M:%S>. =item datetime_format The L format for representing date and time together. The default is the concatenation of C and C, with a space in between. =back =head2 Writing a custom formatter A custom algorithm for date formatting can be specified as a parameter to the constructor my $reader = Excel::ValueReader::XLSX->new(xlsx => $filename, date_formatter => sub {...}); If this parameter is C, date formatting is canceled and therefore date and time values will be presented as plain numbers. If not C, the date formatting routine will we called as : $date_formater->($excel_date_format, $year, $month, $day, $hour, $minute, $second, $millisecond); where =over =item * C<$excel_date_format> is the Excel numbering format associated to that cell, like for example C or C. See the Excel documentation for the syntax description. This is useful to decide if the value should be presented as a date, a time, or both. The present module uses a simple heuristic : if the format contains C or C, it should be presented as a date; if the format contains C or C, it should be presented as a time. The letter C is not taken into consideration because it is ambiguous : depending on the position in the format string, it may represent either a "month" or a "minute". =item * C is the full year, such as 1993 or 2021. The date system of the Excel file (either 1900 or 1904, see L) is properly taken into account. Excel has no support for dates prior to 1900 or 1904, so the C component will always be above this value. =item * C is the numeric value of the month, starting at 1 =item * C is the numeric value of the day in month, starting at 1 =item * C<$hour>, C<$minute>, C<$second>, C<$millisecond> obviously contain the corresponding numeric values. =back =head1 CAVEATS =over =item * This module was optimized for speed, not for completeness of OOXML-SpreadsheetML support; so there may be some edge cases where the output is incorrect with respect to the original Excel data. =item * Embedded newline characters in strings are stored in Excel as C<< \r\n >>, following the old Windows convention. When retrieved through the C backend, the result contains the original C<< \r >> and C<< \n >> characters; but when retrieved through the LibXML, C<< \r >> are silently removed by the C package. =back =head1 SEE ALSO The official reference for OOXML-SpreadsheetML format is in L. Introductory material on XLSX file structure can be found at L. The CPAN module L is claimed to be in alpha stage; it seems to be working but the documentation is insufficient -- I had to inspect the test suite to understand how to use it. Another unpublished but working module for parsing Excel files in Perl can be found at L. Some test cases were borrowed from that distribution. Conversions from and to Excel internal date format can also be performed through the L module. =head1 BENCHMARKS Below are some benchmarks computed with the program C in this distribution. The task was to parse an Excel file of five worksheets with about 62600 rows in total, and report the number of rows per sheet. Reported figures are in seconds. Excel::ValueReader::XLSX::Regex 11 elapsed, 10 cpu, 0 system Excel::ValueReader::XLSX::LibXML 35 elapsed, 34 cpu, 0 system [unpublished] Excel::Reader::XLSX 39 elapsed, 37 cpu, 0 system Spreadsheet::ParseXLSX 244 elapsed, 240 cpu, 1 system Data::XLSX::Parser 37 elapsed, 35 cpu, 0 system These figures show that the regex version is about 3 times faster than the LibXML version, and about 22 times faster than L. Tests with a bigger file of about 90000 rows showed similar ratios. Modules C (unpublished) and L are based on L like L; execution times for those three modules are very close. =head1 ACKNOWLEDGMENTS =over =item * David Flink signaled (and fixed) a bug about strings with embedded newline characters, and signaled that the 'r' attribute in cells is optional. =item * Ulibuck signaled bugs several minor bugs on the LibXML backend. =item * H.Merijn Brand suggested additions to the API and several improvements to the code source. =back =head1 AUTHOR Laurent Dami, Edami at cpan.orgE =head1 COPYRIGHT AND LICENSE Copyright 2020-2023 by Laurent Dami. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Excel-ValueReader-XLSX-1.14/lib/Excel/ValueReader/XLSX000755000000000000 014621334531 22624 5ustar00unknownunknown000000000000Excel-ValueReader-XLSX-1.14/lib/Excel/ValueReader/XLSX/Backend.pm000444000000000000 1360514522257270 24677 0ustar00unknownunknown000000000000package Excel::ValueReader::XLSX::Backend; use utf8; use 5.12.1; use Moose; use Archive::Zip 1.61 qw(AZ_OK); use Carp qw/croak/; use Scalar::Util qw/openhandle/; our $VERSION = '1.13'; #====================================================================== # ATTRIBUTES #====================================================================== has 'frontend' => (is => 'ro', isa => 'Excel::ValueReader::XLSX', required => 1, weak_ref => 1, handles => [qw/A1_to_num formatted_date/]); my %lazy_attrs = ( zip => 'Archive::Zip', date_styles => 'ArrayRef', strings => 'ArrayRef', workbook_data => 'HashRef', table_info => 'HashRef', sheet_for_table => 'ArrayRef', ); while (my ($name, $type) = each %lazy_attrs) { has $name => (is => 'ro', isa => $type, builder => "_$name", init_arg => undef, lazy => 1); } #====================================================================== # ATTRIBUTE CONSTRUCTORS #====================================================================== sub _zip { my $self = shift; my $zip = Archive::Zip->new; my $xlsx_source = $self->frontend->xlsx; my ($meth, $source_name) = openhandle($xlsx_source) ? (readFromFileHandle => 'filehandle') : (read => $xlsx_source); my $result = $zip->$meth($xlsx_source); $result == AZ_OK or die "cannot unzip from $source_name"; return $zip; } sub _table_info { my ($self) = @_; my %table_info; my @table_members = $self->zip->membersMatching(qr[^xl/tables/table\d+\.xml$]); foreach my $table_member (map {$_->fileName} @table_members) { my ($table_id) = $table_member =~ /table(\d+)\.xml/; my $table_xml = $self->_zip_member_contents($table_member); my ($name, $ref, $table_columns, $no_headers) = $self->_parse_table_xml($table_xml); # defined in subclass my $sheet_id = $self->sheet_for_table->[$table_id] or croak "could not find sheet id for table $table_id"; $table_info{$name} = [$sheet_id, $table_id, $ref, $table_columns, $no_headers]; } return \%table_info; } sub _sheet_for_table { my ($self) = @_; my @sheet_for_table; my @rel_members = $self->zip->membersMatching(qr[^xl/worksheets/_rels/sheet\d+\.xml\.rels$]); foreach my $rel_member (map {$_->fileName} @rel_members) { my ($sheet_id) = $rel_member =~ /sheet(\d+)\.xml/; my $rel_xml = $self->_zip_member_contents($rel_member); my @table_ids = $self->_table_targets($rel_xml); # defined in subclass $sheet_for_table[$_] = $sheet_id foreach @table_ids; } return \@sheet_for_table; } # attribute constructors for _date_styles, _strings and _workbook_data are supplied in subclasses #====================================================================== # METHODS #====================================================================== # accessors to workbook data sub base_year {shift->workbook_data->{base_year} } sub sheets {shift->workbook_data->{sheets} } sub active_sheet {shift->workbook_data->{active_sheet}} sub Excel_builtin_date_formats { my @numFmt; # source : section 18.8.30 numFmt (Number Format) in ECMA-376-1:2016 # Office Open XML File Formats - Fundamentals and Markup Language Reference $numFmt[14] = 'mm-dd-yy'; $numFmt[15] = 'd-mmm-yy'; $numFmt[16] = 'd-mmm'; $numFmt[17] = 'mmm-yy'; $numFmt[18] = 'h:mm AM/PM'; $numFmt[19] = 'h:mm:ss AM/PM'; $numFmt[20] = 'h:mm'; $numFmt[21] = 'h:mm:ss'; $numFmt[22] = 'm/d/yy h:mm'; $numFmt[45] = 'mm:ss'; $numFmt[46] = '[h]:mm:ss'; $numFmt[47] = 'mmss.0'; return @numFmt; } sub _zip_member_contents { my ($self, $member) = @_; my $contents = $self->zip->contents($member) or die "no contents for member $member"; utf8::decode($contents); return $contents; } sub _zip_member_name_for_sheet { my ($self, $sheet) = @_; # check that sheet name was given $sheet or die "->values(): missing sheet name"; # get sheet id my $id = $self->sheets->{$sheet}; $id //= $sheet if $sheet =~ /^\d+$/; $id or die "no such sheet: $sheet"; # construct member name for that sheet return "xl/worksheets/sheet$id.xml"; } 1; __END__ =head1 NAME Excel::ValueReader::XLSX::Backend -- abstract class, parent for the Regex and LibXML backends =head1 DESCRIPTION L has two possible implementation backends for parsing C files : L, based on regular expressions, or L, based on the libxml2 library. Both backends share some common features, so the present class implements those common features. This is about internal implementation; it should be of no interest to external users of the module. =head1 ATTRIBUTES A backend instance possesses the following attributes : =over =item frontend a weak reference to the frontend instance =item zip an L instance for accessing the contents of the C file =item date_styles an array of numeric styles for presenting dates and times. Styles are either Excel's builtin styles, or custom styles defined in the workbook. =item strings an array of all shared strings within the workbook =item workbook_data some metadata information about the workbook =back =head1 ABSTRACT METHODS Not defined in this abstract class, but implemented in subclasses. =over =item values Inspects all cells within the XSLX files and returns a bi-dimensional array of values. =back =head1 AUTHOR Laurent Dami, Edami at cpan.orgE =head1 COPYRIGHT AND LICENSE Copyright 2021 by Laurent Dami. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Excel-ValueReader-XLSX-1.14/lib/Excel/ValueReader/XLSX/Backend000755000000000000 014621334531 24153 5ustar00unknownunknown000000000000Excel-ValueReader-XLSX-1.14/lib/Excel/ValueReader/XLSX/Backend/LibXML.pm000444000000000000 2335414522257305 25767 0ustar00unknownunknown000000000000package Excel::ValueReader::XLSX::Backend::LibXML; use utf8; use 5.12.1; use Moose; use Scalar::Util qw/looks_like_number/; use XML::LibXML::Reader qw/XML_READER_TYPE_END_ELEMENT/; extends 'Excel::ValueReader::XLSX::Backend'; our $VERSION = '1.13'; #====================================================================== # LAZY ATTRIBUTE CONSTRUCTORS #====================================================================== sub _strings { my $self = shift; my $reader = $self->_xml_reader_for_zip_member('xl/sharedStrings.xml'); my @strings; my $last_string; NODE: while ($reader->read) { next NODE if $reader->nodeType == XML_READER_TYPE_END_ELEMENT; my $node_name = $reader->name; if ($node_name eq 'si') { push @strings, $last_string if defined $last_string; $last_string = ''; } elsif ($node_name eq '#text') { $last_string .= $reader->value; } } push @strings, $last_string if defined $last_string; return \@strings; } sub _workbook_data { my $self = shift; my %workbook_data = (sheets => {}, base_year => 1900); my $sheet_id = 1; my $reader = $self->_xml_reader_for_zip_member('xl/workbook.xml'); NODE: while ($reader->read) { next NODE if $reader->nodeType == XML_READER_TYPE_END_ELEMENT; if ($reader->name eq 'sheet') { my $name = $reader->getAttribute('name') or die "sheet node without name"; $workbook_data{sheets}{$name} = $sheet_id++; } elsif ($reader->name eq 'workbookPr' and my $date_attr = $reader->getAttribute('date1904')) { $workbook_data{base_year} = 1904 if $date_attr eq '1' or $date_attr eq 'true'; # this workbook uses the 1904 calendar } elsif ($reader->name eq 'workbookView' and my $active_attr = $reader->getAttribute('activeTab')) { $workbook_data{active_sheet} = $active_attr + 1 if defined $active_attr; } } return \%workbook_data; } sub _date_styles { my $self = shift; state $date_style_regex = qr{[dy]|\bmm\b}; my @date_styles; # read from the styles.xml zip member my $xml_reader = $self->_xml_reader_for_zip_member('xl/styles.xml'); # start with Excel builtin number formats for dates and times my @numFmt = $self->Excel_builtin_date_formats; my $expected_subnode = undef; # add other date formats explicitly specified in this workbook NODE: while ($xml_reader->read) { next NODE if $xml_reader->nodeType == XML_READER_TYPE_END_ELEMENT; # special treatment for some specific subtrees -- see 'numFmt' and 'xf' below if ($expected_subnode) { my ($name, $depth, $handler) = @$expected_subnode; if ($xml_reader->name eq $name && $xml_reader->depth == $depth) { # process that subnode and go to the next node $handler->(); next NODE; } elsif ($xml_reader->depth < $depth) { # finished handling subnodes; back to regular node treatment $expected_subnode = undef; } } # regular node treatement if ($xml_reader->name eq 'numFmts') { # start parsing nodes for numeric formats $expected_subnode = [numFmt => $xml_reader->depth+1 => sub { my $id = $xml_reader->getAttribute('numFmtId'); my $code = $xml_reader->getAttribute('formatCode'); $numFmt[$id] = $code if $id && $code && $code =~ $date_style_regex; }]; } elsif ($xml_reader->name eq 'cellXfs') { # start parsing nodes for cell formats $expected_subnode = [xf => $xml_reader->depth+1 => sub { state $xf_count = 0; my $numFmtId = $xml_reader->getAttribute('numFmtId'); my $code = $numFmt[$numFmtId]; # may be undef $date_styles[$xf_count++] = $code; }]; } } return \@date_styles; } #====================================================================== # METHODS #====================================================================== sub _xml_reader { my ($self, $xml) = @_; my $reader = XML::LibXML::Reader->new(string => $xml, no_blanks => 1, no_network => 1, huge => 1); return $reader; } sub _xml_reader_for_zip_member { my ($self, $member_name) = @_; my $contents = $self->_zip_member_contents($member_name); return $self->_xml_reader($contents); } sub values { my ($self, $sheet) = @_; # prepare for traversing the XML structure my $has_date_formatter = $self->frontend->date_formatter; my $sheet_member_name = $self->_zip_member_name_for_sheet($sheet); my $xml_reader = $self->_xml_reader_for_zip_member($sheet_member_name); my @data; my ($row, $col) = (0, 0); my ($cell_type, $cell_style, $seen_node); # iterate through XML nodes NODE: while ($xml_reader->read) { my $node_name = $xml_reader->name; my $node_type = $xml_reader->nodeType; last NODE if $node_name eq 'sheetData' && $node_type == XML_READER_TYPE_END_ELEMENT; next NODE if $node_type == XML_READER_TYPE_END_ELEMENT; if ($node_name eq 'row') { my $row_num = $xml_reader->getAttribute('r'); $row = $row_num // $row + 1; $col = 0; } if ($node_name eq 'c') { # new cell node : store its col/row reference and its type my $A1_cell_ref = $xml_reader->getAttribute('r'); ($col, $row) = $A1_cell_ref ? do {my ($c, $r) = ($A1_cell_ref =~ /^([A-Z]+)(\d+)$/); ($self->A1_to_num($c), $r )} : ($col+1, $row); $cell_type = $xml_reader->getAttribute('t'); $cell_style = $xml_reader->getAttribute('s'); $seen_node = ''; } elsif ($node_name =~ /^[vtf]$/) { # remember that we have seen a 'value' or 'text' or 'formula' node $seen_node = $node_name; } elsif ($node_name eq '#text') { #start processing cell content my $val = $xml_reader->value; $cell_type //= ''; if ($seen_node eq 'v') { if ($cell_type eq 's') { if (looks_like_number($val)) { $val = $self->strings->[$val]; # string -- pointer into the global # array of shared strings } else { warn "unexpected non-numerical value: $val inside a node of shape \n"; } } elsif ($cell_type eq 'e') { $val = undef; # error -- silently replace by undef } elsif ($cell_type =~ /^(n|d|b|str|)$/) { # number, date, boolean, formula string or no type : content is already in $val # if this is a date, replace the numeric value by the formatted date if ($has_date_formatter && $cell_style && looks_like_number($val) && $val >= 0) { my $date_style = $self->date_styles->[$cell_style]; $val = $self->formatted_date($val, $date_style) if $date_style; } } else { # handle unexpected cases warn "unsupported type '$cell_type' in cell L${row}C${col}\n"; $val = undef; } # insert this value into the global data array $data[$row-1][$col-1] = $val; } elsif ($seen_node eq 't' && $cell_type eq 'inlineStr') { # inline string -- accumulate all #text nodes until next cell no warnings 'uninitialized'; $data[$row-1][$col-1] .= $val; } elsif ($seen_node eq 'f') { # formula -- just ignore it } else { # handle unexpected cases warn "unexpected text node in cell L${row}C${col}: $val\n"; } } } # insert arrayrefs for empty rows $_ //= [] foreach @data; return \@data; } sub _table_targets { my ($self, $rel_xml) = @_; my $xml_reader = $self->_xml_reader($rel_xml); my @table_targets; # iterate through XML nodes NODE: while ($xml_reader->read) { my $node_name = $xml_reader->name; my $node_type = $xml_reader->nodeType; next NODE if $node_type == XML_READER_TYPE_END_ELEMENT; if ($node_name eq 'Relationship') { my $target = $xml_reader->getAttribute('Target'); if ($target =~ m[tables/table(\d+)\.xml]) { # just store the table id (positive integer) push @table_targets, $1; } } } return @table_targets; } sub _parse_table_xml { my ($self, $xml) = @_; my ($name, $ref, $no_headers, @columns); my $xml_reader = $self->_xml_reader($xml); # iterate through XML nodes NODE: while ($xml_reader->read) { my $node_name = $xml_reader->name; my $node_type = $xml_reader->nodeType; next NODE if $node_type == XML_READER_TYPE_END_ELEMENT; if ($node_name eq 'table') { $name = $xml_reader->getAttribute('displayName'); $ref = $xml_reader->getAttribute('ref'); $no_headers = ($xml_reader->getAttribute('headerRowCount') // "") eq "0"; } elsif ($node_name eq 'tableColumn') { push @columns, $xml_reader->getAttribute('name'); } } return ($name, $ref, \@columns, $no_headers); } 1; __END__ =head1 NAME Excel::ValueReader::XLSX::Backend::LibXML - using LibXML for extracting values from Excel workbooks =head1 DESCRIPTION This is one of two backend modules for L; the other possible backend is L. This backend parses OOXML structures using L. =head1 AUTHOR Laurent Dami, Edami at cpan.orgE =head1 COPYRIGHT AND LICENSE Copyright 2020-2022 by Laurent Dami. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. Excel-ValueReader-XLSX-1.14/lib/Excel/ValueReader/XLSX/Backend/Regex.pm000444000000000000 2151414522257320 25743 0ustar00unknownunknown000000000000package Excel::ValueReader::XLSX::Backend::Regex; use utf8; use 5.12.1; use Moose; use Scalar::Util qw/looks_like_number/; use Carp qw/croak/; extends 'Excel::ValueReader::XLSX::Backend'; our $VERSION = '1.13'; #====================================================================== # LAZY ATTRIBUTE CONSTRUCTORS #====================================================================== sub _strings { my $self = shift; my @strings; # read from the sharedStrings zip member my $contents = $self->_zip_member_contents('xl/sharedStrings.xml'); # iterate on nodes while ($contents =~ m[(.*?)]sg) { my $innerXML = $1; # concatenate contents from all nodes (usually there is only 1) and decode XML entities my $string = join "", ($innerXML =~ m[]*>(.+?)]sg); _decode_xml_entities($string); push @strings, $string; } return \@strings; } sub _workbook_data { my $self = shift; my %workbook_data; # read from the workbook.xml zip member my $workbook = $self->_zip_member_contents('xl/workbook.xml'); # extract sheet names my @sheet_names = ($workbook =~ m[ $_+1} 0 .. $#sheet_names}; # does this workbook use the 1904 calendar ? my ($date1904) = $workbook =~ m[date1904="(.+?)"]; $workbook_data{base_year} = $date1904 && $date1904 =~ /^(1|true)$/ ? 1904 : 1900; # active sheet my ($active_tab) = $workbook =~ m[]+activeTab="(\d+)"]; $workbook_data{active_sheet} = $active_tab + 1 if defined $active_tab; return \%workbook_data; } sub _date_styles { my $self = shift; state $date_style_regex = qr{[dy]|\bmm\b}; # read from the styles.xml zip member my $styles = $self->_zip_member_contents('xl/styles.xml'); # start with Excel builtin number formats for dates and times my @numFmt = $self->Excel_builtin_date_formats; # add other date formats explicitly specified in this workbook while ($styles =~ m[]g) { my ($id, $code) = ($1, $2); $numFmt[$id] = $code if $code =~ $date_style_regex; } # read all cell formats, just rembember those that involve a date number format my ($cellXfs) = ($styles =~ m[(.+?)]); my @cell_formats = $self->_extract_xf($cellXfs); my @date_styles = map {$numFmt[$_->{numFmtId}]} @cell_formats; return \@date_styles; # array of shape (xf_index => numFmt_code) } sub _extract_xf { my ($self, $xml) = @_; state $xf_node_regex = qr{ /]*+) # attributes (captured in $1) (?: # non-capturing group for an alternation : /> # .. either an xml closing without content | # or > # .. closing for the xf tag .*? # .. then some formatting content # .. then the ending tag for the xf node ) }x; my @xf_nodes; while ($xml =~ /$xf_node_regex/g) { my $all_attrs = $1; my %attr; while ($all_attrs =~ m[(\w+)="(.+?)"]g) { $attr{$1} = $2; } push @xf_nodes, \%attr; } return @xf_nodes; } #====================================================================== # METHODS #====================================================================== sub values { my ($self, $sheet) = @_; my @data; my ($cell_type, $seen_node); # regexes for extracting information from cell nodes state $row_regex = qr( <(row) # row tag ($1) (?:\s+r="(\d+)")? # optional row number ($2) [^>/]*? # unused attrs > # end of tag )x; state $cell_regex = qr( <(c) # cell tag ($3) (?: \s+ | (?=>) ) # either a space before attrs, or end of tag (?:r="([A-Z]+)(\d+)")? # capture col ($4) and row ($5) [^>/]*? # unused attrs (?:s="(\d+)"\s*)? # style attribute ($6) (?:t="(\w+)"\s*)? # type attribute ($7) (?: # non-capturing group for an alternation : /> # .. either an xml closing without content | # or > # .. closing xml tag, followed by (?: (.+?) # .. a value ($8) | # or (.+?) # .. some node content ($9) ) # followed by a closing cell tag ) )x; state $row_or_cell_regex = qr($row_regex|$cell_regex); # NOTE : these regexes uses positional capturing groups; it would be more readable with named # captures instead, but it doubles the execution time on big Excel files, so I # stick to plain old capturing groups. # does this instance want date formatting ? my $has_date_formatter = $self->frontend->date_formatter; # parse worksheet XML, gathering all cells my $contents = $self->_zip_member_contents($self->_zip_member_name_for_sheet($sheet)); # loop on matching nodes my ($row, $col) = (0, 0); while ($contents =~ /$row_or_cell_regex/g) { if ($1) { # this is a 'row' tag $row = $2 // $row+1; $col = 0; } elsif ($3) { # this is a 'c' tag my ($col_A1, $given_row, $style, $cell_type, $val, $inner) = ($4, $5, $6, $7, $8, $9); # row and column for this cell -- either given, or incremented from last cell ($col, $row) = $col_A1 && $given_row ? ($self->A1_to_num($col_A1), $given_row) : ($col+1, $row); # handle cell value according to cell type $cell_type //= ''; if ($cell_type eq 'inlineStr') { # this is an inline string; gather all nodes within the cell node $val = join "", ($inner =~ m[(.+?)]g); _decode_xml_entities($val) if $val; } elsif ($cell_type eq 's') { # this is a string cell; $val is a pointer into the global array of shared strings $val = $self->strings->[$val]; } else { # this is a plain value ($val) = ($inner =~ m[(.*?)]) if !defined $val && $inner; _decode_xml_entities($val) if $val && $cell_type eq 'str'; # if necessary, transform the numeric value into a formatted date if ($has_date_formatter && $style && looks_like_number($val) && $val >= 0) { my $date_style = $self->date_styles->[$style]; $val = $self->formatted_date($val, $date_style) if $date_style; } } # insert this value into the global data array $data[$row-1][$col-1] = $val; } else {die "unexpected regex match"} } # insert empty arrayrefs for empty rows $_ //= [] foreach @data; return \@data; } sub _table_targets { my ($self, $rel_xml) = @_; my @table_targets = $rel_xml =~ m[ }x; # extract relevant attributes from the node my ($name, $ref, $no_headers) = $xml =~ /$table_regex/g or croak "invalid table XML"; # column names. Other attributes from nodes are ignored. my @columns = ($xml =~ m{]+? name="([^"]+)"}gx); # decode entites for all string values _decode_xml_entities($_) for $name, @columns; return ($name, $ref, \@columns, $no_headers); } #====================================================================== # AUXILIARY FUNCTIONS #====================================================================== sub _decode_xml_entities { state $xml_entities = { amp => '&', lt => '<', gt => '>', quot => '"', apos => "'", }; state $entity_names = join '|', keys %$xml_entities; state $regex_entities = qr/&($entity_names);/; # substitute in-place $_[0] =~ s/$regex_entities/$xml_entities->{$1}/eg; } 1; __END__ =head1 NAME Excel::ValueReader::XLSX::Backend::Regex - using regexes for extracting values from Excel workbooks =head1 DESCRIPTION This is one of two backend modules for L; the other possible backend is L. This backend parses OOXML structures using regular expressions. =head1 AUTHOR Laurent Dami, Edami at cpan.orgE =head1 COPYRIGHT AND LICENSE Copyright 2020-2023 by Laurent Dami. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut Excel-ValueReader-XLSX-1.14/t000755000000000000 014621334531 16264 5ustar00unknownunknown000000000000Excel-ValueReader-XLSX-1.14/t/Mappe1.xlsx000444000000000000 2460014363164702 20512 0ustar00unknownunknown000000000000PK!d[Content_Types].xml (Ĕn0 US0M7C#$ `@ڥQ~qcf+h+E \qRO;!)Jp g;,EMĪFa8ޙ(8AU 59neZ 1>L--eOk%`Qd֫*k*EL*WNrwgMӡ`ʥFC6Q^Trm姏aJ? WAiEZF>Qa|ID?q;y~)̑#m,=\"w~j;> wrӫo6;, |7kץv)pv١3tLsmPK!U0#L _rels/.rels (MO0 HݐBKwAH!T~I$ݿ'TG~KAsc+EY5iQw~ om4]~ ɉ -i^Yy\YD>qW$KS3b2k T>:3[/%s* }+4?rV PK!ӏjvixl/worksheets/sheet1.xmlK0{w<6UmԽU٘!X8MVU{I*t%`30LGymɔ蕭u.ohFeg{( xzxnn[@P}Ic^`-ȀnցIcb:͘GpivpDt2`[Ҍ3mvHYEҝ/#JQ㺷NV}TxƧeVzۄ u9˙Tg:0zJ-$-q7b>+v.,$E$$,΢2KU3żK^DLb> {= d0Tv5w5G*~:>.`Rv>XsZ:*Fۓ"__l/J"һw,lL0 _;L`(3, m߾R`IoWX|r:|3:VP/wcSr|_ocVJht_p rڙkKu\[IƏU=9½>|=}5M5M5M5MHqUA,!DsrMrMrMrM%aYJќ%&&&&K8ZKќ%&&&&K]?hrM"}S{ި~3&;ݿVf4giyM:ޟCZ4giiivM: LOڗТ9K5MKش|>\v3= %Χ.&ֱ)IL'9a@Rd!3 Qg!sYwAQ B<> .ɝ%~fcQeϨPUo>>>72.X`3ӄ2zIFvFCt]- C@O'uPUoCcJAbdf|?(+D_Uonq1i\LL$"lU t jnq1bBgIWq`n|TE{C-DGbUEtO:_Uonbgt$GWtSW>2zKgrXbgs.[S'kUonbgt$nqB|Q9}NnoprAb7K퓋(M:`xzVhF:'?oO0ߛ-!nφ $cr(|{7#^nӫgR0ER)xԢV9󨐾/L͹hc)PY ń̟K=-2ɵ ķ)F)Xp&\9d$t kP+.GȜhf,NNyR)a`J4PK!!ڰxl/worksheets/sheet2.xmlMk0 BwG?R)!ٲew{Vq,bIFRޱCӅ\BAif̌?uG^yeMEdJ ike1)AZt@EO`η !%c^ zxu[{i =Jw 6r3A'[Op}I{DlTiRe5։M}y*$9:\13hʤt&L5__ y!]W. |6r^}[YsjYG=Ob>. ]ME|?ߙ ZXOc:Qώl^TZ=jF~ڶ< OkՆI!)HM-qI,IԷ)HLOM,J+VIM+U23WR(LπK &&`_RC2#51%$i_egPK!X7N xl/theme/theme1.xmlY͋7? sw5%l$dQV32%9R(Bo=@ $'#$lJZv G~ztҽzG ’_P=ؘ$Ӗk8(4|OHe n ,K۟~rmDlI9*f8&H#ޘ+R#^bP{}2!#o/)NP#RB,Drі3ߓGC/?}2!*7reÊOVA+@:_> @4Yt kүݽzZP},5`/Zx J>4;=[^j_"J5t)Uծ Fwf LylXeb)bt"IO.fxF]D'.Fx30åJiP 7Q!Kڐ#Nf_yO?GͭUYr;(r~WI>&_שx퓗O?zIw {7Y t؏$"@Cu_FQm2.]%q|5-cøW\dꞜMM\swQb?n-3oPH4 bXB,gMwxD.+rC\.!Ԗon{F]# QCL-7^Fsb!]$# >2q}!!SLc!\29U`w"\C]ĘnfD&3q)L{!‒M7- WӤύ`Wb]Av f"wRvO{'gMAԭԅSI^#PA8r]#ވuvg6{ Kbk &fp-XEԹNi0@ad;1IX({q0gPORvN8p ':gW5Uj6Z漖9e\o_l.[>B\P+tGx{,YM92gs"͠5TΩTO7c:FzX7Q ݺ48t˪P 8tdJNuui=dU DuF蕝M ~eWVQWn^[~dhAy>VqJi79%2H7V[D2H7# #xβleyH-+!7V$rhb2M_p2B?1|g;Bu!: fq!{HD5lGIW@!ڶr5 A' ZJ!;s~Ar8( [Y'vͫ(C8e'I)\ƒ:`}Q7C:@t\ G!FLwETUTh2|`?v =c2|om"+݅nZI-LMC0Zun$9+< dIþ J-[j n #Y>6J@fD}471o}qѝ]׼d/.ɒCb;fİGyZke;T꽲KA鯪pK蕧7HX"LBdA:Ȝ(*ౡ wn5\s ^OrȽ3c`s:gVs!&<̨V8;/CQL6`]IvLy*Xm!FÛ{ku ;m-TV6ZQs1 qZv_# Cs8ß K؍hۊõUK 0u7+/WC;{W[_E@م|ND{&S TЎc nυO%0zj6iMp>r+v}FMr]6z) uN׺ɷS@_W{a3|fKG"ç'wDsihOg%pf, z$z-an~L3^HCFU'5c#ۓ z4:Sx^'Qat,IP$Q7IL'|ICEÛ'+%\:*4H0 Q rPK!sSxl/sharedStrings.xml\n0 C;0)MHְ.~zj@>?eW)zY8Oi^s;QƉ|'= M3kZ9fڼg%8[Tgō/({X[d,`:B<C^^<&WPK-!d[Content_Types].xmlPK-!U0#L _rels/.relsPK-!oxl/workbook.xmlPK-!JaGr xl/_rels/workbook.xml.relsPK-!ӏjvi xl/worksheets/sheet1.xmlPK-!!ڰKxl/worksheets/sheet2.xmlPK-!X7N xl/theme/theme1.xmlPK-!^.u xl/styles.xmlPK-!sS] xl/sharedStrings.xmlPK-!yFgN!docProps/core.xmlPK-!ϣM#docProps/app.xmlPK &Excel-ValueReader-XLSX-1.14/t/cells_without_r_attr.xlsx000444000000000000 1700714372002554 23626 0ustar00unknownunknown000000000000PK!bhS[Content_Types].xmlMn0z[?nN i'زL~PK!U0#L _rels/.relsN0 HCnH *`Gm( н=ဠ=Ʊ??Ө9^mQbgR?n@D(58¡?H)ŮQe=b4O O#aEOfqW~3Z0jG{>y^Ö ?yإ3-βC4r`fhM!8%T4Šm@wlz\b0$ql#HLjkC^H?S⁁MW=_ ʥQ1):bըU!W(>Ჸy>-OPK!L;3[docProps/core.xmlQk0%mҖ 4a0do!jX$V[0KrӽX']4!(!Dy"n4M o1`vm7cǷKCqX| 6 G3<06#Hc 8hp`Y+N%M mۤ{kȟ5TSnV3/j",z-A<_8t,(|P>rdyLLdBɿWg:51%_@/PK!>xl/_rels/workbook.xml.relsMj0A̾RJlB!=Ɩ- ǷƁJ7Ҭ7 1Q(A7S<< !xT0!Z?97#IcRq8j*BDoڐF͹LurU2=>; -fжm0o#z>!! :d> ǯ܋ V%>BړC#ǟs$ 9оp6 #O6PK!ylR xl/styles.xml]o0'?XOJRHiR[8`6,lq$6+>~kһF gr2݄1Uꊫu.`uTUTh2|`ߥ{0 ƹ[nFoZIL͚حa~$1+fe J-ZF=6t%jhhlbԘSV}SGhkw\뚗)Z$ _Ggo̕1l}pZ9JS lًUᗼeTa OEQɺ{*p/Trq cp^$]v qvNShcF0AxyBy_Zi6`C;@ݕ6|}<vƏNo_tn:O+NZQᑧ% vS#tUӟB0t ;L7!c75%n ]*gD@f^Ue`T; 'VY_^cV?NEc_5ɺvD;3aax(`'%4/dt?_,i?=ofV@9h2HãKgKѤp)7Nm0ψ}i mJ?fyjy)DelNn~rsѤqh)hێ],W3~_חR{|MyW?~_ë'"/uBۧ/=}WDptIq Y LI D R@ޚ!uj»2x=>VgzI'|1G"N7 %Yp4 ؇;sjzV6׵^)BZSOfP 7]n3O-ʴVCZ)Uk]2Aڻ.s6XNv[Ndlt:RqjSzC0 Š=}:O7^܇R2Kr*T_,^jnԪu%mZn[Fosfk4NRݩv,+[VB*5jc7:;y85|0okPK!xB,xl/workbook.xmlU]o:}x'|`5RZEinRrnT1&7yX33peӵ#~adxuiE!k.'*/g?x^.F!mY5#Q`{[Z6#78V9Ǝj "hKЗ UuD7)Q#CMzݖmWW$WHNjmys-ӝn$۳44gPKKV3xl/sharedStrings.xml One two three four five six seven eight nine PKKV)Exl/worksheets/sheet1.xml 01234567811223344 PK!bhS[Content_Types].xmlPK!U0#L _rels/.relsPK!aI zdocProps/app.xmlPK!L;3[AdocProps/core.xmlPK!>xl/_rels/workbook.xml.relsPK!ylR xl/styles.xmlPK!  xl/theme/theme1.xmlPK!xB,Vxl/workbook.xmlPKKV3xl/sharedStrings.xmlPKKV)Exl/worksheets/sheet1.xmlPK qExcel-ValueReader-XLSX-1.14/t/ulibuck.xlsx000444000000000000 2653514521525034 21031 0ustar00unknownunknown000000000000PK!S _rels/.relsN0 {*5@LH!4$năD Îq~bLn,0&K^UY&c}qy/6͢~8GRoC*rOJAʤ{tJ MKcd=@r]Uw2dYqgVMmk5nIz>3W"!vJL|8 e ye} 0HM!ӷ!阘rpbͼ0gt{M#}HL3_JZPK4PK!S xl/styles.xml[Q8~_xBB†S%^NV:t8xۤl vn7I7$<3fQ&EWH9xg!Ϗ@Ù~4g[2[\3fYy)ߑ bYoil (Eж]+ 6S)ˍ@ɒ&{,'(tg|$R?AN)i(Bih?c 倜$Q~{޾e[X,<)7l-3=[q=$4⥻ÁY(1}f hAJ8"b M83hc$F"-}z߫]{z^qqç{y)-7 6 HRh}@IS% | ƚ;7-B\NbcI(DyM 0 Q=raMb1k/$Hj5)0 rDhimV{L;TƹOTSdJ f sL0#K3hЩhFfإh.ZE~bj3bjw9$?q<\zv*r+ {.kg|!o⬄=i3j/em|yj4v ִGuf47ٝ瀫嬳#'Q? \mXmtst&ig̳h䌑 aOu_Yu#*ٛ36R)K//_?!5W3OK5h[>%%XY>ISP[G 0An *AOBnk] 3:ipw*'HE Ş 5TUsv[]_H K_5a_#F{tjʏ.F՚yl{ښ3b\}xP?9}umbݷ/Z 9>=!'gu\ 7qxu~NG;-!Sjڏz4"9v\Sn85qt1]"Ƒ;sUuk[qPKRɧ8PK!S#xl/worksheets/_rels/sheet2.xml.relsMk0 ~E=QI/]nيm/,gd~1XaGlWlc Aދt9RGM < SJz`i#w1Q9fYCByAMpwb2E>A4D<[I(OqLRQ̚7ն当уBoWI}CБPQnO/g 1 ?sTZ(tj4pPK{RPK!S#xl/worksheets/_rels/sheet1.xml.relsMk0 ~E=QI/]nيm/,gd~1XaGlWlc Aދt9RGM < SJz`i#w1Q9fYCByAMpwb2E>A4D<[I(OqLRQ̚7ն当уBoWI}CБPQnO/g 1 ?sTZ(tj4pPK{RPK!Sxl/worksheets/sheet1.xml\]o}To$NmIH7n[7Ec";$%[;[7IRlyA/^AҚ2䃔MjYыy[+~]A &wKrŸdt,%92* Tr_0yƶ3;͵qFAVw>`Ƒn:[ч$BJ&5)1k}Wwhk 5[Axɷ!yr9N9κݬy ^&뜋֭}ORpCHZ b ? ~3"Mr`yn^'Ot-iQFŎ^D\WFE' Ss|JRN6Z[9-ĸ}\,pMq)FR#VM6nsmz_.VHj(f`&_ Rp4 8.#X P- opȌ6P;/2aљ˟'M ߤJhE {a%f':`P[Z:u8iGߟYWQ8={}~Lִʂa0:=lA-=5A:-x?1N ~߯N ~ N ~y.t *\T07pIL*gR< T8Ϥy&3pȤ! L*2pȤ! L*2pȤ! T8ߤ&7pIM*oR| T8ߤ&.0pI L*\`R T&.0pI M*\hRB TФ…&.4pI M*\dR" TȤE&.2pIL*\dR" Tؤ&.6pIM*\ t:yVO{9Vѡ>'pyG"l؄5>ܑb[3ª:F0*t{uYQ͵Կ#_&^LHX.(;ܶx&S꛼(_%7MۤUpYk-M'#_ F6+^w) \GzQAO"2UoIt!Ke^k8.UL$-k(*g;-ʱG?d}#mYR36T4Wg"IP*תFrOxnnw, /\s:﫭cּ- ̛̞1rY\2~iCI֑j3%)Ntpt`tͫ‘E\l"Y7ի @[V5wӦV̐'\I%USRm)IJ_$5%iKIf/_$M%))r?%aݦm35="jsmH~t%7[E.AQ57yYz͹^Yfįꌰ8G؝zW)PzR5ЃzzC=h<' !NO A7?p{z#jD\os~DyeMIMdɬI=zzH_~`.ἲNm^{&k=FY1=8nX;8C=~N_~`<"¹"\NݮpF=UNTr'\?0wn͞!lpp6ʓP: g署p3Q-Y41ڿ}9w:2sFv9>dHB6*Ik(Q'`}DװMuMIw~/6SWG%ktfyj:ME'//zjWI+y/OI[ 4o3mQqҖWCrJmн=<׫;.;O|.Դ;O{.>w}fw񝆺H!ntfH!C:t8p76(}86ޜVu0ˋf_oaP̠ca2$7ו?.B#@if zVa4 2yFzm7'mr]$ TΤ]"IrQRZHf-f*pSAG?OoZ磏f%Y ?,VmU}{w0S?Sk#Jd3;3U&ɮPK] PK!Sxl/_rels/workbook.xml.relsj0 } $c8A[Q6o?-Rv(;@j^x@U0؍a0{ujq'/fq\Ą!f+A'v@]学%#vhUvaǾn{1ȉr3Ҁb.2o৕>?dn.:o 'e,2F^PKpPK!Sxl/sharedStrings.xmln0E ^˔&NIA]( %-|uMVъ@hْgӔ3kݖ[ri[SXj]}.6S׃o6od@0Z0RY7PYt=*PU$^6; =y-PPbޫD. <zX0<_y>l+UHj}5AmT\d22eye8/#Dmiy!|Fk ўIj4 Q~hQ=p"PPKkYPK!SdocProps/core.xmlRN0D$(8P "7coSCX{I mggt1՞bї{ ՇFJbJiLNF 6{eWtmv #N 7PKdPK!SdocProps/app.xmlOk0 8 !+q;C6v ۠y7==n8ALڻWz6<R$N  {>@D zrD [J<ʶe[PT `4;ė{W4iYWdӗ6֕sJ3c p('"gvN 91h)zUBReՎM:1r&%enW jlP( Fh&ȱ@1$GmDKf'\tzmKsxay Ͳ SC ES8|!듍)#GcM䀇5J:.q#!#E+]zxyA*%0Q܈3PR=w*\۫fćfHgnDSb>J0긞&U?+5GmoPSY)0EC%H.зn]Z"Gn:^{`p?b|c݉3 {qƿo.w oN'xn=UCxwc:%Ni{#\1_PK Oy`PKdW鮌xl/workbook.xml PK!S4 _rels/.relsPK!SRɧ8 'xl/styles.xmlPK!S{R# xl/worksheets/_rels/sheet2.xml.relsPK!S{R#0xl/worksheets/_rels/sheet1.xml.relsPK!S U@Wxl/worksheets/sheet1.xmlPK!S] `xl/worksheets/sheet2.xmlPK!Spxl/_rels/workbook.xml.relsPK!SkYxl/sharedStrings.xmlPK!SddocProps/core.xmlPK!SZ{h!docProps/app.xmlPK!S9A"docProps/custom.xmlPK!S Oy`$[Content_Types].xmlPKdW鮌%xl/workbook.xmlPK h)Excel-ValueReader-XLSX-1.14/t/valuereader.t000444000000000000 3437014522256345 21142 0ustar00unknownunknown000000000000use utf8; use strict; use warnings; use Test::More 1.302195; use List::Util qw/max/; use List::MoreUtils qw/all/; use Scalar::Util qw/looks_like_number/; use Clone qw/clone/; use Module::Load::Conditional 0.66 qw/check_install/; use Excel::ValueReader::XLSX; note "testing Excel::ValueReader::XLSX version $Excel::ValueReader::XLSX::VERSION"; (my $tst_dir = $0) =~ s/valuereader\.t$//; $tst_dir ||= "./"; my $xl_file = $tst_dir . "valuereader.xlsx"; my $xl_1904 = $tst_dir . "valuereader1904.xlsx"; my $xl_ulibuck = $tst_dir . "ulibuck.xlsx"; my $xl_mappe = $tst_dir . "Mappe1.xlsx"; my $xl_without_r = $tst_dir . "cells_without_r_attr.xlsx"; my @expected_sheet_names = qw/Test Empty Entities Tab_entities Dates Tables/; my @expected_values = ( ["Hello", undef, undef, 22, 33, 55], [123, undef, '<>'], ["This is bold text", undef, '&'], ["This is a Unicode string €", undef, '&<>'], [], [undef, "after an empty row and col", undef, undef, undef, "Hello after an empty row and col"], ["cell\r\nwith\r\nembedded newlines"], ); my $expected_active_sheet = 6; my @expected_tab_entities = ( [], [], ['Nombre de Name', "\x{c9}tiquettes de colonnes" ], ["\x{c9}tiquettes de lignes", 'capital', 'small', '(vide)', "Total g\x{e9}n\x{e9}ral"], ['A', '6', '6', undef, '12'], ['acute accent', '1', '1', undef, '2'], ['circumflex accent', '1', '1', undef, '2'], ['grave accent', '1', '1', undef, '2'], ['ring', '1', '1', undef, '2'], ['tilde', '1', '1', undef, '2'], ['dieresis or umlaut mark', '1', '1', undef, '2'], ['AE diphthong (ligature)', '1', '1', undef, '2'], ['(vide)', '1', '1', undef, '2'], ['C', '1', '1', undef, '2'], ['cedilla', '1', '1', undef, '2'], ['E', '4', '4', undef, '8'], ['acute accent', '1', '1', undef, '2'], ['circumflex accent', '1', '1', undef, '2'], ['grave accent', '1', '1', undef, '2'], ['dieresis or umlaut mark', '1', '1', undef, '2'], ['Eth', '1', '1', undef, '2'], ['Icelandic', '1', '1', undef, '2'], ['greater than', undef, undef, '1', '1'], ['(vide)', undef, undef, '1', '1'], ['I', '4', '4', undef, '8'], ['acute accent', '1', '1', undef, '2'], ['circumflex accent', '1', '1', undef, '2'], ['grave accent', '1', '1', undef, '2'], ['dieresis or umlaut mark', '1', '1', undef, '2'], ['less than', undef, undef, '1', '1'], ['(vide)', undef, undef, '1', '1'], ['N', '1', '1', undef, '2'], ['tilde', '1', '1', undef, '2'], ['O', '6', '6', undef, '12'], ['acute accent', '1', '1', undef, '2'], ['circumflex accent', '1', '1', undef, '2'], ['grave accent', '1', '1', undef, '2'], ['tilde', '1', '1', undef, '2'], ['dieresis or umlaut mark', '1', '1', undef, '2'], ['slash', '1', '1', undef, '2'], ['sharp s', undef, '1', undef, '1'], ['German (sz ligature)', undef, '1', undef, '1'], ['single quote', undef, undef, '1', '1'], ['(vide)', undef, undef, '1', '1'], ['THORN', '1', '1', undef, '2'], ['Icelandic', '1', '1', undef, '2'], ['U', '4', '4', undef, '8'], ['acute accent', '1', '1', undef, '2'], ['circumflex accent', '1', '1', undef, '2'], ['grave accent', '1', '1', undef, '2'], ['dieresis or umlaut mark', '1', '1', undef, '2'], ['Y', '1', '2', undef, '3'], ['acute accent', '1', '1', undef, '2'], ['dieresis or umlaut mark', undef, '1', undef, '1'], ['(vide)', undef, undef, '1', '1'], ['(vide)', undef, undef, '1', '1'], ['ampersand', undef, undef, '1', '1'], ['(vide)', undef, undef, '1', '1'], ["Total g\x{e9}n\x{e9}ral", '30', '32', '5', '67'], ); my @expected_dates_and_times = ( [ '10.07.2020', '10.07.2020', '01.02.1989', '10.07.2020 02:57:00', '02:57:59'], [ '10.07.2020', '10.07.2020', '31.12.1999', '10.07.2020 02:57:59', '01:23:00'], [ '10.07.2020', undef, '01.01.1900', undef, '01:26:18'], [ '10.07.2020', undef, '02.01.1900', ], [ '10.07.2020', undef, '28.02.1900' ], [ '10.07.2020', undef, '01.03.1900' ], [ '10.07.2020', undef, '01.03.1900' ], [ '10.07.2020', undef, '04.04.4444' ], [ '10.07.2020' ], [ '10.07.2020' ], [ '10.07.2020' ], ); # NOTE : cell C6 displays "29.02.1900" in Excel, but that date does not exist, so # this module gets 01.03.1900 instead. my @expected_dates_1904 = ( ['11.07.2024', '11.07.2024', '01.02.1989',], ['11.07.2024', '11.07.2024', '31.12.1999',], ['11.07.2024', undef, '02.01.1904',], ['11.07.2024', undef, '03.01.1904',], ['11.07.2024', undef, '29.02.1904',], ['11.07.2024', undef, '01.03.1904',], ['11.07.2024', undef, '02.03.1904',], ['11.07.2024', undef, '05.04.4448',], ['11.07.2024', ], ['11.07.2024', ], ['11.07.2024', ], ); my @expected_mappe = ( [qw/a b c d e a /], [qw/a b c d e b /], [qw/a b c d e c /], [qw/a b c d e d /], [qw/a b c d e e /], [qw/a b bla-bla-bla bla-bla-bla bla-bla-bla f /], [qw/a b bla-bla-bla bla-bla-bla bla-bla-bla 1 /], [qw/a b bla-bla-bla bla-bla-bla bla-bla-bla 2 /], [qw/a b bla-bla-bla d e 3 /], [qw/a b bla-bla-bla d e 5 /], [qw/a b c d e 6 /], [qw/1 11 bla-bla-bla bla-bla-bla bla-bla-bla z /], [qw/2 12 bla-bla-bla bla-bla-bla bla-bla-bla v /], [qw/3 13 bla-bla-bla bla-bla-bla bla-bla-bla bla-bla-bla /], [qw/4 14 c d e bla-bla-bla /], [qw/5 15 c d e bla-bla-bla /], [qw/6 16 c d e bla-bla-bla /], [qw/7 17 bla-bla-bla bla-bla-bla bla-bla-bla bla-bla-bla /], [qw/8 18 bla-bla-bla bla-bla-bla bla-bla-bla bla-bla-bla /], [qw/9 19 bla-bla-bla bla-bla-bla bla-bla-bla bla-bla-bla /], [qw/10 20 bla-bla-bla bla-bla-bla bla-bla-bla bla-bla-bla /], [qw/11 21 bla-bla-bla bla-bla-bla bla-bla-bla bla-bla-bla /], [qw/12 22 bla-bla-bla bla-bla-bla bla-bla-bla bla-bla-bla /], ); my @expected_tab_names = qw(Entities tab_foobar tab_in_middle_of_sheet tab_without_headers Cols_with_entities); my @expected_tab_foobar = ( {foo => 11, bar => 22}, {foo => 33, bar => 44}, ); my @expected_tab_badambum = ( {badam => 99, bum => 88}, {badam => 77, bum => 66}, ); my @expected_tab_no_headers = ( {col1 => 'aa', col2 => 'bb', col3 => 'cc'}, {col1 => 'dd', col2 => undef, col3 => undef}, {col1 => 'ee', col2 => 'ff', col3 => 'gg'}, ); my @expected_tab_cols_with_entities = ( {'col<' => 'foo', 'col&' => 'bar', 'col>' => 'bim'}, ); my @expected_without_r = ( [qw/One two three/], [qw/four five six /], [qw/seven eight nine /], [11, 22], [], [33, 44], ); my @backends = ('Regex'); push @backends, 'LibXML' if check_install(module => 'XML::LibXML::Reader'); foreach my $backend (@backends) { # directly supply pathname run_tests(file => $xl_file, $backend); # open file and pass a filehandle open my $fh, "<:raw", $xl_file or die "could not open $xl_file: $!"; run_tests(handle => $fh, $backend); } sub run_tests { my ($source_kind, $xl_source, $backend) = @_; my $context = "$backend (source: $source_kind)"; # dirty hack when testing with LibXML, because \r\n are silently transformed into \n local $expected_values[-1][0] = "cell\nwith\nembedded newlines" if $backend eq 'LibXML'; # instantiate the reader my $reader = Excel::ValueReader::XLSX->new(xlsx => $xl_source, using => $backend); # check sheet names my @sheet_names = $reader->sheet_names; is_deeply(\@sheet_names, \@expected_sheet_names, "sheet names using $context"); # check active_sheet is($reader->active_sheet, $expected_active_sheet, "active_sheet using $context"); # check a regular sheet my $values = $reader->values('Test'); is_deeply($values, \@expected_values, "values using $context"); my $nb_cols = max map {scalar @$_} @$values; is ($nb_cols, 6, "nb_cols using $context"); # check an empty sheet my $empty = $reader->values('Empty'); is_deeply($empty, [], "empty values using $context"); # tables my ($entity_columns, $entities) = $reader->table('Entities'); is_deeply($entity_columns, [qw(Num Name Char Cap/small Letter Variant)], "column names, using $context"); is $entities->[0]{Name}, 'amp' , "1st table row, name, using $context"; is $entities->[0]{Letter}, 'ampersand' , "1st table row, letter, using $context"; is $entities->[-1]{Name}, 'yuml' , "last table row, name, using $context"; is_deeply([$reader->table_names], \@expected_tab_names, "table names, using $context"); my $tab_foobar = $reader->table('tab_foobar'); is_deeply($tab_foobar, \@expected_tab_foobar, "tab_foobar, using $context"); my $tab_badambum = $reader->table('tab_in_middle_of_sheet'); is_deeply($tab_badambum, \@expected_tab_badambum, "tab_badambum, using $context"); my ($col_headers, $tab_no_headers) = $reader->table('tab_without_headers'); is_deeply($tab_no_headers, \@expected_tab_no_headers, "tab_no_headers, using $context"); my $tab_cols_with_entities = $reader->table('Cols_with_entities'); is_deeply($tab_cols_with_entities, \@expected_tab_cols_with_entities, "tab_cols_with_entities, using $context"); # check a pivot table my $tab_entities = $reader->values('Tab_entities'); is_deeply($tab_entities, \@expected_tab_entities, "tab_entities using $context"); # check date conversions my $dates = $reader->values('Dates'); is_deeply($dates, \@expected_dates_and_times, "dates using $context"); # check time conversions with rounding hack my $t1 = $reader->formatted_date("44022.123599537037", "[h]:mm:ss"); is($t1, '02:57:59', 'time conversion 1'); my $t2 = $reader->formatted_date("0.123599537037", "[h]:mm:ss"); is($t2, '02:57:59', 'time conversion 2'); # other date format my $expected_other_format = clone \@expected_dates_and_times; foreach my $row (@$expected_other_format) { $_ and s/^(\d\d)\.(\d\d)\.\d\d(\d\d)/$2-$1-$3/ foreach @$row; } my $other_reader = Excel::ValueReader::XLSX->new(xlsx => $xl_source, using => $backend, date_format => "%m-%d-%y"); my $other_dates = $other_reader->values('Dates'); is_deeply($other_dates, $expected_other_format, "dates with other format, using $context"); # no date format my $reader_no_date = Excel::ValueReader::XLSX->new(xlsx => $xl_source, using => $backend, date_formatter => undef); my $dates_raw_nums = $reader_no_date->values('Dates'); my @all_vals_flat = grep {$_} map {@$_} @$dates_raw_nums; my $are_all_numbers = all {looks_like_number($_)} @all_vals_flat; ok($are_all_numbers, "dates with no format, using $context"); # Excel file in 1904 date format my $reader_1904 = Excel::ValueReader::XLSX->new(xlsx => $xl_1904, using => $backend); my $dates_1904 = $reader_1904->values('Dates'); is_deeply($dates_1904, \@expected_dates_1904, "dates in 1904 format, using $context"); # some edge cases provided by https://github.com/ulibuck my $reader_ulibuck = Excel::ValueReader::XLSX->new(xlsx => $xl_ulibuck, using => $backend); my $example1 = $reader_ulibuck->values('Example'); is($example1->[3][2], '30.12.2021', "date1904=\"false\", using $context"); my $example2 = $reader_ulibuck->values('Example two'); is($example2->[12][2], '# Dummy', "# Dummy, using $context"); # in this workbook the active_sheet is deliberately empty ok(! defined $reader_ulibuck->active_sheet, "empty active_sheet, using $context"); # https://github.com/damil/Excel-ValueReader-XLSX/issues/2 : empty string (ulibuck++) my $reader_mappe = Excel::ValueReader::XLSX->new(xlsx => $xl_mappe, using => $backend); my $strings = $reader_mappe->values('Tabelle2'); is_deeply $strings, \@expected_mappe, "empty string nodes, using $context"; # cells do not always have a 'r' attribute my $reader_without_r = Excel::ValueReader::XLSX->new(xlsx => $xl_without_r, using => $backend); my $vals = $reader_without_r->values(1); is_deeply $vals, \@expected_without_r, "cells without 'r' attribute, using $context"; } done_testing(); Excel-ValueReader-XLSX-1.14/t/valuereader.xlsx000444000000000000 6607414201664770 21702 0ustar00unknownunknown000000000000PK!Y=y [Content_Types].xml (̗Qo0';D~iUDjepl˾2J)%Du5^H ;&Q;[r )mW56Ċ*a 7C,hgΣt,=YaŽk~=p,l:K`$ mY1ۏk*&7Z $P[.CCe56AcD ,2aJb}|G?>y>üA+(D.έ"CSRT6BGipr/ 举B8>^zp&HOti3,&u邊w0aC$QLaĝpM}εj*;}) {9qH[3ݿFLґƜ4kP5eZrFj2 GSػ|V9pǾ=$5;ӮH}#W:y0PK!U0#L _rels/.rels (MO0 HݐBKwAH!T~I$ݿ'TG~[ RYqEd~ ;3&+xD߀3F{/@{uo2 F_еk@IZ4 #O:A$.wYHPK!R1xl/worksheets/sheet4.xml[n6}/0u bcf}(P<+ k[$EC")%59gH)\γi2}}￾~D+\_mlϏQ7SYv3pmѩnE?׷ c\~6֔Ł5rZ\qx%oq'My.:=Uvۧ}}\u?hv{Mr?ُKo>vsp9-PyڬKv_+3ڮw`ü{I8 /|/RhGn5O6y!XYp di8c8zKӊgmǘPpl%z3թqE&g}b=}zt9kԡ'gQ,(bgQ, ԡ'g=?,df6RtCS@D`[A%)NV0KQ fLC-LqyX<'1 -F; Y֝jMT+QJ>ֵ? F| "4>"9lQ<ǒ2GOH镵>ãa# -s<!e=160!<A փrc;laC%x`-RKz1ﰑކ੼Gzw Qu@:9=ܧ,F-%!FIF3g%Y(pF#nHGA'P,z0(1-F+Y(p QRm֎H+ BAUn$ôt.e)h%e$hZ,>EqlRQ"2]4J L &MKFm$tQ1YVYpi3I:3 HЈe ufh}QwdRgl))x3 VgK< :cKIC ufhmtY%ntP2&AZϏ ƭ+-A1njq0i^U Y\FF1ChX$ = KһRRX'A2SbPp-!I 3X,\cl))x, fI ssUFIC fh]=Bꑖ;};J^!x4 Y,I/CO͞4О A1n @Alo*49㚬hk;?wg\$Qw{;jPS d1aoƵI=Z"7 \V{Wncy@]z-꡷Y2fY([J z-C땛_%@QRe! [} ([J z-COnd[l))xm =U HЈEo e詪G,\RR~΃Y߼ ~2{3N0JjğTwUYb~/?+ZPh^k;;G< b?_ἫoKze|D /s֎u?`Xa<PK! ? >xl/worksheets/sheet3.xml[r|ߪ l-0VmmݻLI #DBo!}V[rmUӶ><ݰ߫z=>ϓ~Ԭծ>Tթ~z^ު k~]^yU#?Zm΍A6j{;-v]-}uhɱڭm~`ۯoۯ_Bmg~ozoT4wB7p՚@6[Q`O{X<5r<=O_)gs]c(jWWc9WyP8Wj^n7}MkVm_o!͏EuZieŵUzz8{2FNs_Z>=mR,d'||ή/,ƈCl"L1DKD+:qWYmD UąXA13(tp;}vIhzZa:1C9~ϳs(i\Ā@?4s{'J M3i8Z~ ̟}8s͔/!v14f"M3Q?E@]nEMx<9AQjt}Ӌ>B'iAQ8rE+a'3e4^ZoU+Nj@m V؟@&m]5$dfiL^y.m $'&iqj74%g*sĖ({ -2a ֣ Oͤ%ڸ5LUD 4 N-2a22yz3%L@?,@Kl=,ȥlˠVYʂ NMA'i_/8Iqĸ< T ,Z&t pd[0MRjkAP-(aȕ,;LW(Gͤc\{vK CH,PeAG)OlL?lOߗ\Yʂ3P="N% vMKsY9[lAj%Z/]YR j'MKe h"SO"6K{RP6ni2,爲"S)(rmfI)Hhr4^&[lAdޔ$N& Ke!hLLAt%eZy ,p<4^ @u 6%-eKK 2d* Qڶ& -X<ȒRTx`iYr BTYPw -)(fAI)( 0ʂUԝA ң $+KJARFS,PeA)[FSPzuxlAMh"% >#~}a 1߈"zc$n0x@ڢR[֓Qn%NdBrL AH;J ARF2فeE& -cQRjwiiiL@*A@mLܗnBT;,(DMmM Ate/uL AKZ/39ʂ0 NK2B2A2,C!&iGI!HϓAf:i-h,C pOn'r2N9:cea5+Yiry /{ SN9Ȅ9ZM;^yȒj'FKs9嘃({ -21ŇcO#N&9Ke>e-21(7=lޡY/c1Ę;^:'vVT 9ʂ.y  ʓbT;i,»s{I0@X[-=yRΑjo(-MDʅqOt,!g&laCY|p9O:RF *#BX9QwAr=7m|IaHP^>-M `"5pBz B9!/~.B1)7m~IHP4^(o3 f3BX H޴U$E"vB9Z@(lFkЛBQڹ.?-4P6#ٌB1N")IʱB!̨;!y{}G()`s0 d[dޔd$nD9Y?gu'#lGdT$%#vB&Sf2ddϢzVX ddiUf2dZMaeR2j'QKe3"au""+TG 캖&fz9a-z'eR2P~wwfiPv4lFkKl{)T;bilF!bֱuLJFRr24P6#ٌBqK E8W&%#vfYB!̨;!X ŝ %eR2j7,M T5ilGpd LJFR*ׅtޓG!Wfԝ7 ~^'eF+3NFHBkWu{8vՋ᝜wS+s4כ|]g;Ɨn>~ɍձ9W ]﷛}OPK!ʽmxl/worksheets/sheet2.xmlMk0 liZX1qv%1`k理t zMz_yy'~ &,ʆWR$ҡrI.-ujH0!BDTdZ:eAJk46*ut5\NNym<ֵ5D4]:Ҽ9u\o+cDi› uXnt͑=$'xoMĄ5eSAO5OD1i>,+mB>s,lӿX^뾰 y-R'xBm+jy jqMK|zjzQjdMgۿ14^ullHA=tK<㘰3D"Ǭ}5Ϯ/PK!;m2KB#xl/worksheets/_rels/sheet1.xml.rels0ECx{օ CS7"Ubۗ{ep6<f,Ժch{-A8 -Iy0Ьjm_/N,}W:=RY}H9EbAwk}m PK!%#xl/worksheets/_rels/sheet3.xml.rels 0DnzЫ IFѿ7EA4fj$zrw Χj 'HÓz4aG<"SkS ;،4#KeqƔ8uQlTd@m!] 9?56K?"TDqAR,R_PK!ȡ4#xl/worksheets/_rels/sheet4.xml.rels 0DnzU IFѿ7 NռI<)NC) 4\f'HÛz.4aG<"SkS ،4#KeqƔ8mQT\2bs!D9?5t1K?"TOZMJ\yKUWkPK!,B#xl/worksheets/_rels/sheet5.xml.relsj0D{$;P%_J!&E^ۢJh%؄Bc0]Yc" n@!4D,|>X< ~IлKZ9fVBlaopqSFdLeRcLOhvM7ܝS 0N\8ƀ)Hτ%`9H=U˄bAGw swPK!6#xl/worksheets/_rels/sheet6.xml.relsϊ1 CvQY\>@dL[[Dos IVqg ;a!3d{j9 8qp4\*go0#z"Q,kbt4"KɦMˆ1U [RyUπ) OٮizC_d8Pbh)j>{Y$ G>Gc1Gc9C=<]yPK! A G)xl/pivotTables/_rels/pivotTable1.xml.rels 0 Pr<:UBm--m}{mcȟW_ ub˝ڂ 1ixS}\T0[EpЧwJEӈQ:O7 #]:`~He| % RN?:JPK!KH)_1.xl/pivotTables/pivotTable1.xmlWn6}/ a{cl vgZlTw:ɢhI-6F3g)3ϕWCJxPi7Di,)_W,3,|# 8KՊ#OOI$iXOe9g:r 3/a"$e ~YA\ _$|\Q%w!eYP${J+  Qan>2yM,i;&B}hiv2#Ɗ~e1/rEIïgg$"̣~2c7%yB1~:1`hxV!EdcaR-uRZd1 ;CrHX ShE7tNI$r?AxY C /5p#cc= da`,:ʗM CDܬ׬; |(azi Mȩ"}^ by@@C_mcr4@ zoMg1o>n.0#u6 껆3q?vV3YX+bŬ56 bV7|X*ڕ^9:sQɈIS35Z/s6ȴ"sgB^|<өzy?In ~8CLmB5Z 2眄<_\k t8Qg2q&5+O5yvjj~]jGs@>fIi@0]B`E"333\ *K) /NO^i&E.Vltnf3Lxp} wӛt2oGiyZI(@&"Eh˫$Ο,:YGr\\qx-N ݫsS~ xe.?1zPK!nxl/worksheets/sheet1.xmlUKo0UBIJ>qj{mS;-Joyf|,jĴq%ʒ79?XҔ |S|4HhVYZ5cCWLIQ/c4#7u% 7ES0dUqfkl YM,7+L&)pǵ:R(XۭH,j9axFV6%6a<"piGU9MGKcߜm̫oҽ]ܗ9NQ82;iaw85т6^j_k"Fi?d1\Y0OŨΘP 2PD΂g޴>FtmϼgĒbAkQ5Z::[cHS'HݩLꌦo$[fپVN\tWEP!KX!فIqKJj oa#2$PK!wuLVxl/sharedStrings.xmln6琑m `[;s$v7ԐAd;ȞC"j4K% .1b/&NM=@rR!_8v]i0ٺz)5m=N>Zd5^},jS(P,5r& @|&1= {86{8.SjgSnov{RTd"Y,'w=P)q`&22Le\=#B$Y96R#) 순d&)TD9if# ŗmnǂXeT5/ xnz8F;_ zNB"`41ykkNuq}ctmyeb$k֐y.~BƑsiIpO9K< ]s%,+>/]f,/P츮{ig# K@,eGȺ ^"MQ<ם~&rO~p-{eԏ3PkO$[@S*Nq*:^]/E;Ԁ5=f)`@ ^0'f3*ږbN,jAwjBF]]?I8رƢchYT94o\={]/'(b=oJ]R áaz:zj;# j3+(8ӎ]阑TyA/E+!%8x-2̔E51aV{G>ޥfJ&UVCpe54? :!S<yL4s3l-o]g4 >BX__ii`* .=osF9r(|"ޮi5|Z t"U-Gγ]ڂ["E7=.:k;S`4OOUI̞s\wW88|}vCvu(okL,:ϧƯUֹj6W[z"\^F{*op60FŞ&@:i V40v3H}LD$\MqtfURRej$j(%Ճv MHL_NzIC W>*YLz^UNT8[ߗqh[Љf67ouI`lalBw6@ @F%{2R^XQ_P{ݷQ^aQ~ovDACw.y;$C$>asS_MPK!7` xl/worksheets/sheet5.xmlVYo0~_i{!! $avgL ߱MڰJP3o_z&\P֦ȳ]d6gm7)f,!q[$EDOO"DZЊURv3yE,l֑$% 78Vjjwi0maK8XYҜ,YmH+ '5࿨h'lM~ ]ӶYŚTjRd5a258s 969я1'Nb^P@Lѝ7[yrsߔɳfI @!HMrL2R)X90> } -dPok$| *uIDk~R r_}g8}lM YVH F9~PЍ@}7iE#Zh%^9YP谪lo65w yϝgHv˿dL~COk2 8`A$Q0q/<M Kd,^M8wA8ܣ}ILOhus\A̪Q.^b54.^95*fzds234>+0c\rL%*@P/; %G"cO|FaBkPK!1Gxxl/theme/theme1.xmlYn7;{O,ْcK6qbJ˘\݊X@Ѵ@o-P >m :$WiQq_lgqȽ|^!U/V"Dh4[If<'hLdtew.u u܌R%0yArx6" di raG(?n4&Dz\I=3erT5Be tY3E~#T 46./rS :槜WN,"OVƥ|`jtڝT8Z[\Z5yJR9V(mB ~*eo@_Z7 _w/5Vk>ހRF9h[JBmk_ a]ZŐjQe.]h ÊH 21qg}Aq s Jo|ix`gܐXB5Aj@^ApB£|@̓[~g}ڳM2ƌ\o= 0hT9=)|,.AJS\@_5D .74LN6[ 92j{V1'Ĝ'VWUr͔UU /jL3-md`pM{tKU3жi32~IXs L1랏Qi+4 H;OžW @$zoa{%]'ʑnq5F}hlu[Mij1tEN*ܜX-BHejS<*SRؿ\l+k G?d8$r팘[(HaNUXπJ1՞ysYtq̊tKtRnxjf5ڂŝ})i)nϖXp,0ڌP)*Rw\lax I 9+Ô53MT dar"Y)dc,}rHXOs#B6)iN~jcleb/RsH TS:x~ƭ2܊민p r""feP{|^! @ -=q6(۰QpC^vSPW˻ϳ9k dhr11aFLI2\*)þ5S7PK!꫸'xl/printerSettings/printerSettings2.binrdHagHbHeP``papc 00aa G #?& H:2h.>PI0> ϰG TjPA Kq0``0af`07X*@!.?Rw& 57ÂA  "cJT"C#7@iPK!a#3:'xl/pivotCache/pivotCacheDefinition1.xmlUMo8/)=8e%E8[vѦPc?$:H)GN{G3Ù7cv$y))M&:_nz#Jg:chH#8voB<?gE>n;0?*9*,lי RbNg NgH=rb T:eνV˴z;'  ڣA Wݥ;?NPzQ(8OcT48 1߉n.R\ڼu/:od0]>${&!Z8}QE(H2[@I0aC򶨧DY EgDQDY %Iq2ouEDQq*W j*̧(>GQ`B!X/hb r :80wϋ g|ڳɠ|P&)G%p^YLV8|{$eD?o;j~D+N_XxDPtJYFo7]Çi5erqA+2i|DWe+DM/Px&GY>`f|3Û2gfr}f$d;cVXos,nX:`hޯY8}> ػ*Td! U@Ģk̍tX$9SLj'Ṡ"=Ac'R}$\p*O4tѬ J>㿢J}!Ы%g";U1iJ)w_1{b+84~J4od9AlEr3ohIV$7DO=ǒLV?r,iSiO2 ׵2 F[S̯2˝bz^[,׃\Lb yC59M,:K7o94eJ˭94|ICF1n)XZn&\${Lr=)Me+5ǝYk eYbVuv;k/wM B`z I9bPd:6w%A\Ql'.ڧ7G^{{Fhjثtɭ39gkLP.j>vY&#[Lt~VU}MXNɶX`Y+0#SI) O S6ȾYR}]ꂠlU/ӻ[s͋SrD+Ke :|Y<ݙEZYOsjx~{inh>nX͖ΐIcb{?HịSrKLu9L"H)QM(ɨ QY'_0}D_!9i!|4+;ϖ_<|΃'i r vaOe#psʧ<_dʑ8@Թ } ㅰ1Zj1Y9:l-w jpUv}Zz!0 5",ͩ)|z|õP  PT힥1IR9=Ɓ^WˊWQFk;yv7TLju8clvPT]|cwUI:1>>)CVzӢ-1S5[Vfי@piQznO4zQB=6)S;Duބ$J'Pd=h߇ůk՚ORWLvxs- ;_,5¦D q!ҝ[-pFﻻCm;Y? )/#3w.tSoDr4|K!JE'{>xbb\(o#,I?e6;[ZiLT35 sir$oJN30B'NQĴxS4"f )/^>h|}U'`o7ZI./|.rۂ:(x>?֔X4D?LCH:PB_˞eSN--V m| ᠍֐QAu D ߙXd# F*Ȁgȗsaq*88ZyŢNksVZn%~~0wm]IھG_TO|jZi& &%YAQH4~:q\ ZoKPK!xH4xl/tables/table2.xmldN0HwH$ UBѶ4lodo(}{4T=zvg/g'h0do2~Wzyu * -{*@E.vu60N%&5acze7ڡLU GSGͼ2xx= A Ut@eXȶs>-w^OgPwsǸ,=}]B]5!/MrtgRR:ΣW@oPK!W)xl/tables/table3.xml|QN0#ީV 􀈄h9GzX#J)UGΌgv/'0Tv/cS D, 8RUy}Ēm݂k`l4a;{o-t N9~A68b^Dh7JAOPK!͐xl/calcChain.xmllM 0w"EI@ұ $ 73D M ɦӢzv'\ &$BOdv['QRֳlFMZjsK9Rm^$ mX@Vd ['qIPK! !`PbdocProps/core.xml (|AK0ߡ䤇5i+c;+m& Ifoon2ryyGQV[*CQHPעT  1%XU+ ,7)ה6`\ 6$e);)ƖA2mm$s4;d;1!S,1-p":#` 8 $(gqF'H烮3Jҝ;f 7іCiI: uRvWP Njx\U̺xL&<ߧ:䁝Ox#_:oӢX<&QEB]gYv}|*dm|Kj*ߔl&l@vW%"i%&Q p&-gtxِ$`p4C.ʿ>Y5\*BYuVxa旾~҇m =0/  Jx J Qw=@ !=Ngt+:(|1[{y_~ ;yhl gg:oPK-!Y=y [Content_Types].xmlPK-!U0#L -_rels/.relsPK-!:'ARxl/_rels/workbook.xml.relsPK-!J5b( xl/workbook.xmlPK-!R1b xl/worksheets/sheet4.xmlPK-! ? >xl/worksheets/sheet3.xmlPK-!ʽm_xl/worksheets/sheet2.xmlPK-!;m2KB#!xl/worksheets/_rels/sheet1.xml.relsPK-!%#"xl/worksheets/_rels/sheet3.xml.relsPK-!ȡ4##xl/worksheets/_rels/sheet4.xml.relsPK-!,B#$xl/worksheets/_rels/sheet5.xml.relsPK-!6#%xl/worksheets/_rels/sheet6.xml.relsPK-! A G)&xl/pivotTables/_rels/pivotTable1.xml.relsPK-!KH)_1.$'xl/pivotTables/pivotTable1.xmlPK-!n+xl/worksheets/sheet1.xmlPK-!wuLV.xl/sharedStrings.xmlPK-!23xl/worksheets/sheet6.xmlPK-!\8?R 5xl/styles.xmlPK-!7` :xl/worksheets/sheet5.xmlPK-!1Gxo>xl/theme/theme1.xmlPK-!꫸'2Exl/printerSettings/printerSettings2.binPK-!a#3:'DFxl/pivotCache/pivotCacheDefinition1.xmlPK-!+{32Ixl/pivotCache/_rels/pivotCacheDefinition1.xml.relsPK-!*AK$Jxl/pivotCache/pivotCacheRecords1.xmlPK-!p&'ZOxl/printerSettings/printerSettings1.binPK-!9UdocProps/app.xmlPK-!xH4Xxl/tables/table2.xmlPK-!W)Yxl/tables/table3.xmlPK-!V(O[xl/tables/table4.xmlPK-!@)ϔ5\xl/tables/table5.xmlPK-!͐^xl/calcChain.xmlPK-! !`Pb^docProps/core.xmlPK-!]Abdaxl/tables/table1.xmlPK!!O bExcel-ValueReader-XLSX-1.14/t/valuereader1904.xlsx000444000000000000 2215014107234660 22177 0ustar00unknownunknown000000000000PK!A7n[Content_Types].xml (Tn0W?DV[$xX$(}'fQU%Ql[&<&YB@l.YO$` r=HEV5 ӵLb.j""%5 3NB?C%*=YK)ub8xR-JWQ23V$sU.)PI]h:C@im2 3 1 g/#ݺʸ2 x|`G㮶u_;ѐUOղwj s4ȥ-ZeN xe|o, 1ysi޺s V788wa:  CrhݝAPK!U0#L _rels/.rels (MO0 HݐBKwAH!T~I$ݿ'TG~xl/_rels/workbook.xml.rels (RMK0 0wvt/"Uɴ)&!3~*]XK/oyv5+zl;obG s>,8(%"D҆4j0u2jsMY˴S쭂 )fCy I< y!+EfMyk K5=|t G)s墙UtB),fPK!2Bmxl/workbook.xmlSn0?zjrIk -4ur&Wa>TJҴERݙy5M1%` fLY=of[k/hBGhǶ:nZEigf!we)9,,o4Ѓ8P,`4 fn#nu[d8uh/w:U81y? ZKe#T7y6oGIҏIy0Ьjm_/N,}W:=RY}H9EbAwk}m PK!1Gxxl/theme/theme1.xmlYn7;{O,ْcK6qbJ˘\݊X@Ѵ@o-P >m :$WiQq_lgqȽ|^!U/V"Dh4[If<'hLdtew.u u܌R%0yArx6" di raG(?n4&Dz\I=3erT5Be tY3E~#T 46./rS :槜WN,"OVƥ|`jtڝT8Z[\Z5yJR9V(mB ~*eo@_Z7 _w/5Vk>ހRF9h[JBmk_ a]ZŐjQe.]h ÊH 21qg}Aq s Jo|ix`gܐXB5Aj@^ApB£|@̓[~g}ڳM2ƌ\o= 0hT9=)|,.AJS\@_5D .74LN6[ 92j{V1'Ĝ'VWUr͔UU /jL3-md`pM{tKU3жi32~IXs L1랏Qi+4 H;OžW @$zoa{%]'ʑnq5F}hlu[Mij1tEN*ܜX-BHejS<*SRؿ\l+k G?d8$r팘[(HaNUXπJ1՞ysYtq̊tKtRnxjf5ڂŝ})i)nϖXp,0ڌP)*Rw\lax I 9+Ô53MT dar"Y)dc,}rHXOs#B6)iN~jcleb/RsH TS:x~ƭ2܊민p r""feP{|^! @ -=q6(۰QpC^vSPW˻ϳ9k dhr11aFLI2\*)þ5S7PK!Xk xl/styles.xmlWmo6>`q %RlVWP+ $,@KC/DgֆIْY:o{s^pH)«CT`r~MdAtᵴޤ~3ot ,M@iTE%Dö⦪))#$8 09 3%ԟv+Q63Z]Vl8$G0KzbDV*(Ū,YN:SL^:MaNdB7(W;@C]d!ȕ* 4!ngS_IV{$ZwV%Cm  Yg"$5TЀ އ8+>I^c6FY-PrCpNei+wՓK%"2njIef W:o@%4ט+jg,EANJpa+`ulw|A"v/;\1Ώ-@4eԭ ”e]&ml ceGJ眖R֪5 L:/*IIA[@89/ITr 6`Htmj'f y1Dpr~mC4v6yø\d}y&T /%(BZgvduY,^\V_}p|q_-h_\KkuqvB\\j'ot_QzGAf-5lQaA]x1<5ožn ~ڼIa\@$; _D GU{3a, xF-?oM`9#xj^g V ilxpx@]=m 6};( ag dYFdx{1fr&wu!. _nҿPK!$N4uZxl/worksheets/sheet1.xmlVn0?z F MgZYD$Q%i;wInH}1(hfj~ZWdH0 bJ*͆џ?on)179Twϟ{_L ` 246"#JJDsR]EiOˆL_¡B X)DC-oJٚ#[-.~ٶ7B-Re%'ӦQ+RheTaHD?y"dZs'pe' Fl$4Z}~I؛wkb*r%k^ b4(rK*FCl/KN GG߱o~ܖ(C_@nJc+M 2HNG I ֔gG([cU}[k'8)6-wsdҝ;Wb4t8>=0'9%^3ګIROz8'\/3:-DJ&zkQ^];+:Ns`Fǽz!ƳNt`F'!'qAft+:Ͽxd^tN{tv3:= gZW؄C{h׹='C |z#C*(btxkZgSֵǧW@Ë?-4c vے_հJK4m2*m5* TVbM;Jq3~ PK! LRbdocProps/core.xml (|AK0ߡ䤇6M;m:v No!yۂMZn޴je"{dӃ*/0VV:G$QWBm^p4MۛהWV8 6$m)ss[l77Qlq'Nx8&cuODg=ޛYL">:XκCf>X//ݨ8"rU+uKx,fL`Oܭ3|Dx#ztҧz$&I3^1S&ïBM%&$'! z+oPK!꫸'xl/printerSettings/printerSettings1.binrdHagHbHeP``papc 00aa G #?& H:2h.>PI0> ϰG TjPA Kq0``0af`07X*@!.?Rw& 57ÂA  "cJT"C#7@iPK!/ȧdocProps/app.xml (n0 (@V1-zI{d:*Kɞ~NSo$'JкÔmJQ7~[OQd_ +q,nojBDs>WbGRfeJR i4m0=˲x 5d(FEG_5X_1:k5)Pqw0蔜hQJNS6pƺQɷz@藶V-:4R?^ۥ(B$ o!v1S^k,x3"7NLc{C獽9Ӭ ')0r8@? 7Aﬗ,G_S܄uEAšSA=BM;[_{> x?_%?臘ۗPK-!A7n[Content_Types].xmlPK-!U0#L _rels/.relsPK-!>xl/_rels/workbook.xml.relsPK-!2Bmxl/workbook.xmlPK-!Ќ |3 xl/sharedStrings.xmlPK-!;m2KB# xl/worksheets/_rels/sheet1.xml.relsPK-!1Gx xl/theme/theme1.xmlPK-!Xk xl/styles.xmlPK-!$N4uZxl/worksheets/sheet1.xmlPK-! LRbdocProps/core.xmlPK-!꫸'Xxl/printerSettings/printerSettings1.binPK-!/ȧjdocProps/app.xmlPK &,!