EBook-Tools-v0.4.9 000755 001750 000764 0 11672427456 13141 5 ustar 00zed proto 000000 000000 EBook-Tools-v0.4.9/Changes 000444 001750 000764 12143 11672427456 14612 0 ustar 00zed proto 000000 000000 Release Notes
=============
0.4.9
New Features:
* 'ebook unpack' now automatically handles ePub files (or to be more
specific, any zip file)
0.4.8
Bug fixes:
* Fix extra data size calculation when multiple flag bits are
present
* Properly handle extra data in uncompressed text records
0.4.7
Bug fixes:
* Mobipocket unpacks now correctly account for the extra data that
can be appended to PalmDoc-compressed text records that should not
be made part of the decompression process.
0.4.6
Bug fixes:
* EReader HTML conversion now creates (semi-valid) XHTML output and
better handles paragraphs
* EReader font marker handling improved
* Missing config file options are properly handled
* Documentation fixes
0.4.5
Bug Fixes:
* user script tests avoid smoke tests that tend to break on
non-libraries
0.4.4
Bug Fixes:
* split_metadata now writes split components into the directory where
the source file is located instead of the current working
directory. The old behaviour could cause failure when running as
CGI.
0.4.3
New Features:
* gen_opf() now accepts a 'mediatype' argument to override
autodetection of the mime type of the 'textfile' argument.
Bug Fixes:
* The opffile argument in gen_opf() was not being set correctly
* unpack_ereader now forces the appropriate mime type instead of
letting it be autodetected. Fixes incorrect setting of text/plain
on HTML output on Windows systems.
0.4.1 - 0.4.2: minor bugfixes only
0.4.0
New Features:
* IMP support!
* It is now possible to unpack unencrypted IMP files both into .RES
directories and into HTML files. Encrypted IMP files can still
be unpacked into .RES directories.
* .RES directories can be repacked into IMP files.
* IMP metadata can be edited in-place
* LZSS compression and decompression is now available as a general
library component, though this may be split out into a separate
module in the future.
* Thanks go to Nick Rapallo for assistance with this feature set,
and Jeffrey Kraus-yao for most of the original
reverse-engineering work.
Bug Fixes:
* Mobipocket files with EXTH headers but no EXTH records now unpack
correctly.
Library and Syntax Changes:
* Some of the input and output options in the 'ebook' command-line
tool have been standardized to '--input' or '-i' and '--output' or
'-o'. Check the documentation for exact syntax.
* EBook::Tools::Unpack::usedir() has been moved into EBook::Tools as
a procedure, not a method.
* The known uid check in EBook::Tools::search_knownuids() has been
factored out into the twigelt_is_knownuid() twig search procedure.
This causes a lot of 'undefined value' warning spew from XML::Twig
to be bypassed and has the added advantage of removing a loop
It does, however, slightly change the search behaviour --
previously, the highest priority known UID in the array was
selected if multiple known UID identifiers were found. Now, the
first dc:identifier matching any known good UID is used instead.
It's possible to reclaim the old behaviour by sorting the returned
array, but on afterthought, it is probably better to let the user
file order determine the package id by default.
----------
0.3.3
* Fixed bugs relating to find_in_path() on MSWin32 systems and Data::UUID
0.3.2
* EBook::Tools should now in theory no longer require Perl 5.10
(minimum requirement is now Perl 5.8.8)
0.3.0
New Features:
* Configuration file and directory support
* Mobipocket HUFF/CDIC support
* Unpacking DRM-protected Mobipocket files now just skips the
encrypted text, but still extracts the unencrypted images
* EBook::Tools now takes advantage of several external helper files,
if they are made available:
* Mobipocket generation possible if mobigen is available
* Unpacking interface supports MS Reader (.lit) if convertlit is available
* Downconverting interface supports MS Reader and Mobipocket if
convertlit and MobiDeDRM are available, respectively.
* See README.Helpers.txt for more information
* excerpt_line() procedure available to show just the beginning and
end of a paragraph or other long line of text
* ':all' export tag is available for all modules to export all
procedures at once.
Library Changes:
* EReader.pm: write_* methods now return the filename(s) written
instead of just returning 1
* Unpack.pm: gen_metadata() no longer calls split_metadata() if the
raw option is specified
* Tools.pm: fix_languages() now creates a if none
exists (this isn't mandated by the standard, but mobigen requires
it)
Bug Fixes:
* Unpacking an eReader book now correctly adds the text to the OPF
manifest
----------
0.2.0
New Features:
* eReader unpacking support
* New unpacking option --htmlconvert
* ebook stripscript command to remove blocks out of a HTML file.
=head3 Arguments
=over
=item C
Specifies the input file. If not specified, the sub croaks.
=item C
Specifies the output file. If not specified, it defaults to C
(i.e. the input file is overwritten).
=item C
\s*
#\n#gix;
}
return 1;
}
=head2 C
Takes the raw HTML text of the object and replaces the filepos
anchors. This has to be called before any other action that modifies
the text, or the filepos positions will not be valid.
Returns 1 if successful, undef if there was no text to fix.
This is called automatically by L.
=cut
sub fix_html_filepos :method
{
# There doesn't appear to be any clearer way of handling this
# than the if-elsif chain.
## no critic (Cascading if-elsif chain)
my $self = shift;
my $subname = ( caller(0) )[3];
debug(2,"DEBUG[",$subname,"]");
my @filepos = ($$self{text} =~ /filepos="?([0-9]+)/gix);
my $length = length($$self{text});
return unless($length);
my $atpos;
debug(1,"DEBUG: creating filepos anchors");
foreach my $pos (uniq reverse sort @filepos)
{
# First, see if we're pointing to a position outside the text
if($pos >= $length-4)
{
debug(1,"DEBUG: filepos ",$pos," outside text, skipping");
next;
}
# Second, figure out what we're dealing with at the filepos
# offset indicated
$atpos = substr($$self{text},$pos,5);
if($atpos =~ /^ here
debug(2,"DEBUG: filepos ",$pos," points to ']/ix)
{
# 1-character block-level elements
debug(2,"DEBUG: filepos ",$pos," points to '",$1,"', updating id");
substr($$self{text},$pos,2,"<$1 id=\"fp" . $pos . '"');
}
elsif($atpos =~ /^<(h\d)[ >]/ix)
{
# 2-character block-level elements
debug(2,"DEBUG: filepos ",$pos," points to '",$1,"', updating id");
substr($$self{text},$pos,3,"<$1 id=\"fp" . $pos . '"');
}
elsif($atpos =~ /^<(div)[ >]/ix)
{
# 3-character block-level elements
debug(2,"DEBUG: filepos ",$pos," points to '",$1,"', updating id");
substr($$self{text},$pos,4,"<$1 id=\"fp" . $pos . '"');
}
elsif($atpos =~ /^<');
}
else
{
# Not an element
carp("WARNING: filepos ",$pos," pointing to '",$atpos,
"' not handled!");
}
}
return 1;
}
=head2 C
Uncompresses all text records using
L. This destroys the existing
contents of $self->{text} if any.
This method is called automatically at the end of C if
DictionaryHuffman encoding is detected.
=cut
sub uncompress_dictionaryhuffman_records :method
{
my $self = shift;
my $subname = ( caller(0) )[3];
debug(2,"DEBUG[",$subname,"]");
my $huffdata = $self->{huff};
croak($subname,"(): no HUFF record found!\n") unless($huffdata);
my $lasttextrecord = min($self->{header}{mobi}{indxrecord},
$self->{header}{mobi}{firstimagerecord}) - 1;
my $compressed;
if($self->{text})
{
debug(1,"WARNING: re-parsing HUFF/CDIC-compressed text destroys",
" existing uncompressed text.");
}
$self->{text} = '';
foreach my $recoffset (1 .. $lasttextrecord)
{
local $OUTPUT_AUTOFLUSH = 1;
print "Uncompressing record ",$recoffset,"/",$lasttextrecord,"\r"
if($recoffset % 10 == 0);
$compressed = $self->{records}->[$recoffset]->{data};
croak($subname,"(): no data found in record ",$recoffset,"!\n")
unless($compressed);
$self->{text} .= uncompress_dictionaryhuffman(
data => $compressed,
huff => $self->{huff},
cdics => $self->{cdics});
}
print "Finished uncompressing text.\n";
return 1;
}
################################
########## PROCEDURES ##########
################################
=head1 PROCEDURES
All procedures are exportable, but none are exported by default. All
procedures can be exported by using the ":all" tag.
=head2 C
Attempts to locate a copy of the MobiDeDrm script by searching PATH
and looking in the EBook::Tools user configuration directory (see
L.
Returns the complete path to the script, or undef if nothing was found.
This will use package variable C<$mobidedrm_cmd> as its first guess,
and set that variable to the return value as well.
=cut
sub find_mobidedrm
{
my $subname = ( caller(0) )[3];
debug(2,"DEBUG[",$subname,"]");
my @guesses;
my $retval;
my $confdir = userconfigdir();
my $pattern = qr/^ MobiDeDrm(-\d+\.\d+)(.py)? $/ix;
if($mobidedrm_cmd and -f $mobidedrm_cmd) { return $mobidedrm_cmd; }
$mobidedrm_cmd = find_in_path($pattern,$confdir);
debug(1,"DEBUG: found mobidedrm as '",$mobidedrm_cmd,"'");
return $mobidedrm_cmd;
}
=head2 C
Attempts to locate the mobigen executable by making a test execution
on predicted locations (including just checking PATH) and looking in
the EBook::Tools user configuration directory (see
L.
Returns the system command used for a successful invocation, or undef
if nothing worked.
This will use package variable C<$mobigen_cmd> as its first guess, and
set that variable to the return value as well.
=cut
sub find_mobigen
{
my $subname = ( caller(0) )[3];
debug(2,"DEBUG[",$subname,"]");
my @mobigen_guesses;
my $retval;
my $confdir = userconfigdir();
if($OSNAME eq 'MSWin32')
{
@mobigen_guesses = (
'kindlegen',
'mobigen',
'C:\Program Files\Mobipocket.com\kindlegen',
'C:\Program Files\Mobipocket.com\mobigen',
);
if($confdir)
{
push(@mobigen_guesses,
$confdir . '\kindlegen',
$confdir . '\mobigen');
}
}
else
{
@mobigen_guesses = (
'kindlegen',
'mobigen',
'mobigen_linux',
);
if($confdir)
{
push(@mobigen_guesses,
$confdir . "/kindlegen",
$confdir . "/mobigen_linux",
$confdir . "/mobigen");
}
}
unshift(@mobigen_guesses,$mobigen_cmd)
if($mobigen_cmd);
undef($mobigen_cmd);
foreach my $guess (@mobigen_guesses)
{
no warnings 'exec';
`$guess`;
# MS Windows may use 256 for a not-found code instead of -1
if($? != -1 && $? != 256)
{
debug(2,'DEBUG: `',$guess,'` returned ',$?);
$mobigen_cmd = $guess;
last;
}
}
if($mobigen_cmd)
{
debug(1,"DEBUG: Found mobigen as '",$mobigen_cmd,"'");
return $mobigen_cmd;
}
else { return; }
}
=head2 C
Takes as an argument a scalar containing the variable-length
Mobipocket EXTH data from the first record. Returns an array of
hashes, each hash containing the data from one EXTH record with values
from that data keyed to recognizable names.
If C<$headerdata> doesn't appear to be an EXTH header, carps a warning
and returns an empty list.
See:
http://wiki.mobileread.com/wiki/MOBI
=head3 Hash keys
=over
=item * C
A numeric value indicating the type of EXTH data in the record. See
package variable C<%exthtypes>.
=item * C
The length of the C value in bytes
=item * C
The data of the record.
=back
=cut
sub parse_mobi_exth
{
my ($headerdata) = @_;
my $subname = ( caller(0) )[3];
debug(2,"DEBUG[",$subname,"]");
croak($subname,"(): no header data provided")
unless($headerdata);
my $length = length($headerdata);
my @list;
my $chunk;
my @exthrecords = ();
my $offset;
my $recordcnt;
$chunk = substr($headerdata,0,12);
@list = unpack("a4NN",$chunk);
if($list[0] ne 'EXTH')
{
debug(1,"(): Unrecognized Mobipocket EXTH ID '",$list[0],
"' (expected 'EXTH')");
return @exthrecords;
}
# The EXTH data never seems to be as long as remaining data after
# the Mobipocket main header, so only check to see if it is
# shorter, not equal
if($length < $list[1])
{
debug(1,"EXTH header specified length ",$list[1]," but found ",
$length," bytes.\n");
}
$recordcnt = $list[2];
unless($recordcnt)
{
debug(1,"EXTH flag set, but no EXTH records present");
return @exthrecords;
}
$offset = 12;
debug(2,"DEBUG: Examining ",$recordcnt," EXTH records");
foreach my $recordpos (1 .. $recordcnt)
{
my %exthrecord;
$chunk = substr($headerdata,$offset,8);
$offset += 8;
@list = unpack("NN",$chunk);
$exthrecord{type} = $list[0];
$exthrecord{length} = $list[1] - 8;
unless($exthtypes{$exthrecord{type}})
{
carp($subname,"(): EXTH record ",$recordpos," has unknown type ",
$exthrecord{type},"\n");
$offset += $exthrecord{length};
next;
}
unless($exthrecord{length})
{
carp($subname,"(): EXTH record ",$recordpos," has zero length\n");
next;
}
if( ($exthrecord{length} + $offset) > $length )
{
carp($subname,"(): EXTH record ",$recordpos,
" longer than available data");
last;
}
$exthrecord{data} = substr($headerdata,$offset,$exthrecord{length});
debug(2,"DEBUG: EXTH record ",$recordpos," [",
$exthtypes{$exthrecord{type}},"] has ",
$exthrecord{length}, " bytes");
push(@exthrecords,\%exthrecord);
$offset += $exthrecord{length};
}
debug(1,"DEBUG: Found ",$#exthrecords+1," EXTH records");
debug(1,"DEBUG: Found ",$length - $offset,
" remaining bytes of data at EXTH offset ",$offset);
return \@exthrecords;
}
=head2 parse_mobi_header($headerdata)
Takes as an argument a scalar containing the variable-length
Mobipocket-specific header data from the first record. Returns a hash
containing values from that data keyed to recognizable names.
See:
http://wiki.mobileread.com/wiki/MOBI
=head3 keys
The returned hash will have the following keys (documented in the
order in which they are encountered in the header):
=over
=item C
This should always be the string 'MOBI'. If it isn't, the procedure
croaks.
=item C
This is the size of the complete header. If this value is different
from the length of the argument, the procedure croaks.
=item C
A numeric code indicating what category of Mobipocket file this is.
=item C
A numeric code representing the encoding. Expected values are '1252'
(for Windows-1252) and '65001 (for UTF-8).
The procedure carps a warning if an unexpected value is encountered.
=item C
This is thought to be a unique ID for the book, but its actual use is
unknown.
Use with caution. This key may be renamed in the future if more
information is found.
=item C
This is thought to be the Mobipocket format version. A second version
code shows up again later as C which is usually the same on
unprotected books but different on DRMd books.
Use with caution. This key may be renamed in the future if more
information is found.
=item C
40 bytes of reserved data.
Use with caution. This key may be renamed in the future if more
information is found.
=item C
This is thought to be the record offset to the first 'INDX' record, so
named for its first four letters.
Use with caution. This key may be renamed in the future if more
information is found.
=item C
Offset in record 0 (not from start of file) of the full title of the
book.
=item C
Length in bytes of the full title of the book
=item C
16 bits of unknown data thought to be related to the book language.
Use with caution. This key may be renamed in the future if more
information is found.
=item C
A pseudo-IANA language code string representing the main book language
(i.e. the value of ). See C<%mobilangcodes> for an exact
map of raw values to this string and notes on non-compliant results.
=item C
16 bits of unknown data thought to be related to the dictionary input
language.
Use with caution. This key may be renamed in the future if more
information is found.
=item C
A pseudo-IANA language code string for the DictionaryInLanguage
element. See C<%mobilangcodes> for an exact map of raw values to this
string and notes on non-compliant results.
=item C
16 bits of unknown data thought to be related to the dictionary output
language.
Use with caution. This key may be renamed in the future if more
information is found.
=item C
A pseudo-IANA language code string for the DictionaryOutLanguage
element. See C<%mobilangcodes> for an exact map of raw values to this
string and notes on non-compliant results.
=item C
This is another Mobipocket format version related to DRM. If no DRM
is present, it should be the same as C.
Use with caution. This key may be renamed in the future if more
information is found.
=item C
This is thought to be an index to the first record containing image
data. If there are no images in the book, this value will be
4294967295 (0xffffffff)
Use with caution. This key may be renamed in the future if more
information is found.
=item C
This is thought to be the record offset to the 'HUFF' record, used in
HUFF/CDIC decompression.
Use with caution. This key may be renamed in the future if more
information is found.
=item C
This is thought to be the number of HUFF and CDIC records, starting at
C.
Use with caution. This key may be renamed in the future if more
information is found.
=item C
This is thought to be the record offset to the first 'DATP' record, so
named for its first four letters.
Use with caution. This key may be renamed in the future if more
information is found.
=item C
This is thought to be the number of 'DATP' records present.
Use with caution. This key may be renamed in the future if more
information is found.
=item C
A 32-bit bitfield related to the Mobipocket EXTH data. If bit 6
(0x40) is set, then there is at least one EXTH record.
=item C
36 bytes of unknown data at offset 116. This value will be undefined
if the header data was not long enough to contain it.
Use with caution. This key may be renamed in the future if more
information is found.
=item C
A number thought to be the byte offset inside of the record 0 data in
which DRM data can be found. If present and no DRM is set, contains
either the value 0xFFFFFFFF (normal books) or 0x00000000 (samples).
This value will be undefined if the header data was not long enough to
contain it.
Use with caution. This key may be renamed in the future if more
information is found.
=item C
A number thought to be related to DRM.
This value will be undefined if the header data was not long enough to
contain it.
Use with caution. This key may be renamed in the future if more
information is found.
=item C
A number thought to be the size of the data in bytes after
C containing DRM keys.
This value will be undefined if the header data was not long enough to
contain it.
Use with caution. This key may be renamed in the future if more
information is found.
=item C
A number thought to be related to DRM.
This value will be undefined if the header data was not long enough to
contain it.
Use with caution. This key may be renamed in the future if more
information is found.
=item C
32 bits of unknown data at offset 168, usually zeroes. This value
will be undefined if the header data was not long enough to contain
it.
Use with caution. This key may be renamed in the future if more
information is found.
=item C
32 bits of unknown data at offset 172, usually zeroes. This value
will be undefined if the header data was not long enough to contain
it.
Use with caution. This key may be renamed in the future if more
information is found.
=item C
16 bits of unknown data at offset 176. This value will be undefined
if the header data was not long enough to contain it.
Use with caution. This key may be renamed in the future if more
information is found.
=item C
This is thought to be an index to the last record containing image
data. If there are no images in the book, this value will be 65535
(0xffff).
Use with caution. This key may be renamed in the future if more
information is found.
=item C
32 bits of unknown data at offset 180. This value will be undefined
if the header data was not long enough to contain it.
Use with caution. This key may be renamed in the future if more
information is found.
=item C
This is thought to be an index to a 'FCIS' record, so named because
those are always the first four characters when the record data is
decompressed using uncompress_palmdoc().
This value will be undefined if the header data was not long enough to
contain it.
Use with caution. This key may be renamed in the future if more
information is found.
=item C
32 bits of unknown data at offset 188. This value will be undefined
if the header data was not long enough to contain it.
Use with caution. This key may be renamed in the future if more
information is found.
=item C
This is thought to be an index to a 'FLIS' record, so named because
those are always the first four characters when the record data is
decompressed using uncompress_palmdoc().
This value will be undefined if the header data was not long enough to
contain it.
Use with caution. This key may be renamed in the future if more
information is found.
=item C
32 bits of unknown data at offset 180. This value will be undefined
if the header data was not long enough to contain it.
Use with caution. This key may be renamed in the future if more
information is found.
=item C
Unknown data of unknown length running to the end of the header. This
value will be undefined if the header data was not long enough to
contain it.
Use with caution. This key may be renamed in the future if more
information is found.
=item C
Two bytes sometimes found inside of C, used to determine
if extra data has been appended to each text record that should not be
used in decompression.
=back
=cut
sub parse_mobi_header ## no critic (ProhibitExcessComplexity)
{
# There's no way to refactor this without breaking up chunks into
# separate subroutines, which is a bad idea.
my ($headerdata) = @_;
my $subname = ( caller(0) )[3];
debug(2,"DEBUG[",$subname,"]");
croak($subname,"(): no header data provided")
unless($headerdata);
my $length = length($headerdata);
my @enckeys = keys(%pdbencoding);
my $chunk; # current chunk of headerdata being unpacked
my @list; # temporary holding area for unpacked data
my %header; # header hash to return;
my $hexstring; # hexadecimal debugging output string
croak($subname,"(): header data is too short! (only ",$length," bytes)")
if($length < 116);
# The Mobipocket header data is large enough that it's easier to
# deal with when handled in smaller chunks
# First chunk is 24 bytes before reserved block
$chunk = substr($headerdata,0,24);
@list = unpack("a4NNNNN",$chunk);
if($list[0] ne 'MOBI')
{
croak($subname,
"(): Unrecognized Mobipocket header ID '",$list[0],
"' (expected 'MOBI')");
}
if($list[1] != $length)
{
croak($subname,
"(): header specified length ",$list[1]," but found ",
$length," bytes.");
}
$header{identifier} = $list[0]; # Bytes 00-04 (16-20)
$header{headerlength} = $list[1]; # Bytes 04-08 (20-24)
$header{type} = $list[2];
$header{encoding} = $list[3];
$header{uniqueid} = $list[4];
$header{version} = $list[5];
if(!defined $pdbencoding{$header{encoding}})
{
carp($subname,"(): unknown encoding '",$header{encoding},"'");
}
else
{
debug(1,"DEBUG: Found encoding ",$pdbencoding{$header{encoding}});
}
# Second chunk is 40 bytes of reserved data, usually all 0xff
$header{reserved} = substr($headerdata,24,40);
$hexstring = hexstring($header{reserved});
debug(2,"DEBUG: reserved data: 0x",$hexstring)
if($hexstring ne ('ff' x 40));
# Third chunk is 12 bytes up to the language block
$chunk = substr($headerdata,64,12);
@list = unpack("NNN",$chunk);
$header{indxrecord} = $list[0];
$header{titleoffset} = $list[1];
$header{titlelength} = $list[2];
# Fourth chunk is 12 bytes containing the language codes
$chunk = substr($headerdata,76,12);
@list = unpack("nCCnCCnCC",$chunk);
$header{languageunknown} = $list[0];
$header{language} = parse_mobi_language($list[2],$list[1]);
$header{dilanguageunknown} = $list[3];
$header{dilanguage} = parse_mobi_language($list[5],$list[4]);
$header{dolanguageunknown} = $list[6];
$header{dolanguage} = parse_mobi_language($list[8],$list[7]);
# Fifth chunk is 8 bytes until next unknown block
$chunk = substr($headerdata,88,8);
@list = unpack("NN",$chunk);
$header{version2} = $list[0];
$header{firstimagerecord} = $list[1];
debug(2,"DEBUG: INDX record: ",$header{indxrecord});
if($header{firstimagerecord} == 0xffffffff)
{
debug(2,"DEBUG: no image records present");
}
else
{
debug(2,"DEBUG: first image record: ",$header{firstimagerecord});
}
# Sixth chunk is HUFF/CDIC and DATP record offsets
$chunk = substr($headerdata,96,16);
@list = unpack("NNNN",$chunk);
$header{huffrecord} = $list[0];
$header{huffreccnt} = $list[1];
$header{datprecord} = $list[2];
$header{datpreccnt} = $list[3];
# Seventh and last chunk guaranteed to be present is the EXTH
# bitfield
$chunk = substr($headerdata,112,4);
$header{exthflags} = unpack("N",$chunk);
# Remaining chunks are only parsed if the header is long enough
# Eighth chunk is 36 bytes of unknown data
if($length >= 152)
{
$header{unknown116} = substr($headerdata,116,32);
$header{unknown148} = unpack('N',substr($headerdata,148,4));
}
# Ninth chunk is 16 bytes of DRM-related data
if($length >= 168)
{
$chunk = substr($headerdata,152,16);
@list = unpack("NNNN",$chunk);
$header{drmoffset} = $list[0]; # Offset 152 - 155
$header{drmcount} = $list[1]; # Offset 156 - 159
$header{drmsize} = $list[2]; # Offset 160 - 163
$header{drmflags} = $list[3]; # Offset 164 - 167
debug(1,"DEBUG: Found DRM offset ",
sprintf("0x%08x",$header{drmoffset}));
}
# Tenth chunk is 8 bytes of unknown data, usually zeroes
if($length >= 176)
{
$chunk = substr($headerdata,168,8);
@list = unpack('NN',$chunk);
$header{unknown168} = $list[0];
$header{unknown172} = $list[1];
}
# Eleventh chunk is 2 16-bit values and 5 32-bit values, usually nonzero
if($length >= 200)
{
$chunk = substr($headerdata,176,24);
@list = unpack("nnNNNNN",$chunk);
$header{unknown176} = $list[0];
$header{lastimagerecord} = $list[1];
$header{unknown180} = $list[2];
$header{fcisrecord} = $list[3];
$header{unknown188} = $list[4];
$header{flisrecord} = $list[5];
$header{unknown196} = $list[6];
}
# Last possible chunk is unknown data lasting to the end
# of the header.
if($length >= 201)
{
$header{unknown200} = substr($headerdata,200,$length-200);
debug(2,"DEBUG: Found ",$length-200,
" bytes of unknown final data in Mobipocket header");
debug(2," 0x",hexstring($header{unknown200}));
}
# Part of that unknown data is the Extra Data Flags
#
# This is 16 bits used to determine if extra data has been stuffed
# at the end of each record that should not be used in
# decompression.
if($length >= 228)
{
$chunk = substr($headerdata,226,2);
$header{extradataflags} = unpack("n",$chunk);
debug(2,"DEBUG: Found Extra Data Flags in Mobipocket header");
debug(2," 0x",hexstring($chunk));
}
foreach my $key (sort keys %header)
{
no warnings;
my $value;
if(length($header{$key}) <= 4)
{
$value = $header{$key};
}
elsif(length($header{$key}) > 10)
{
$value = '0x' . hexstring($header{$key});
}
elsif(int($header{$key}) > 0x04ff)
{
$value = sprintf("0x%08x",$header{$key});
}
else
{
$value = $header{$key};
}
debug(2,'DEBUG: mobi{',$key,'}=',$value);
}
return \%header;
}
=head2 C
Takes the integer values C<$languagecode> and C<$regioncode> unpacked from
the Mobipocket header and returns a language string mostly (but not
entirely) conformant to the IANA language subtag registry codes.
Croaks if C<$languagecode> is not provided. If C<$regioncode> is not
provided or not recognized, it is disregarded and the base language
string (with no region or script) is returned.
If C<$languagecode> is not provided, the sub croaks. If it isn't
recognized, a warning is carped and the sub returns undef. Note that
0,0 is a recognized code returning an empty string.
See C<%mobilanguagecodes> for an exact map of values. Note that the
bottom two bits of the region code appear to be unused (i.e. the
values are all multiples of 4).
=cut
sub parse_mobi_language
{
my ($languagecode,$regioncode) = @_;
my $subname = ( caller(0) )[3];
debug(3,"DEBUG[",$subname,"]");
croak($subname,"(): no language code provided\n")
unless(defined $languagecode);
my $language = $mobilangcode{$languagecode}{$regioncode};
if(defined $language)
{
debug(2,"DEBUG: found language '",$language,"'",
" (language code ",$languagecode,",",
" region code ",$regioncode,")");
}
else
{
debug(1,"DEBUG: language code ",$languagecode,
", region code ",$regioncode," not known",
" -- ignoring region code");
$language = $mobilangcode{$languagecode}{0};
if(!$language)
{
carp("WARNING: language code ",$languagecode,
" not recognized!\n");
}
else
{
debug(1,"DEBUG: found downgraded language '",$language,"'",
" (language code ",$languagecode,",",
" region code 0)");
}
} # if($language) / else
return $language;
}
=head2 C
Computes the Mobipocket PID checksum used as the final two bytes of
the PID and appends them to C<$pid>, returning the merged string.
Used by L.
=cut
sub pid_append_checksum
{
my $pid = shift;
my $subname = ( caller(0) )[3];
debug(2,"DEBUG[",$subname,"]");
my $retval = $pid;
my $crc;
my $byte;
my $pos;
my $letters = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789";
my $length = length($letters);
$crc = ~ String::CRC32::crc32($pid,-1);
$crc = $crc & 0xffffffff;
$crc = $crc ^ ($crc >> 16);
for(0 .. 1)
{
$byte = $crc & 0xff;
$pos = (int($byte / $length)) ^ ($byte % $length);
$retval .= substr($letters,$pos % $length,1);
$crc >>= 8;
}
return $retval;
}
=head2 C
Returns 1 if the PID is a valid Mobipocket/Kindle PID and 0 otherwise.
This is determined by first ensuring that C<$pid> is exactly ten bytes
long, and then stripping the final two bytes normally used as a
checksum and recomputing them, returning 1 only if they are recomputed
correctly.
=cut
sub pid_is_valid
{
my $pid = shift;
my $subname = ( caller(0) )[3];
debug(2,"DEBUG[",$subname,"]");
return 0 unless($pid);
my $pid2 = pid_append_checksum(substr($pid,0,-2));
return 0 unless(length($pid) == 10);
if($pid eq $pid2) { return 1; }
else { return 0; }
}
=head2 C
This is a COMPLETELY UNTESTED implementation of the Pukall Cipher 1
algorithm used for encryption and decryption in Mobipocket files. It
is a 128-bit stream cipher. For more information and alternate
implementations, see L.
Use at your own risk. Bug reports appreciated.
=head3 Arguments
=over
=item * C
16-byte encryption key. This must be provided, and must be exactly 16
bytes, or the procedure will croak.
=item * C
Input data to be either encrypted or decrypted. If this is not
provided, the procedure croaks.
=item * C (optional)
If set to true, the cipher will be used to encrypt the input data. If
not set, or set to false, the cipher will be used to decrypt the input
data.
=back
=cut
sub pukall_cipher_1
{
my %args = @_;
my $subname = ( caller(0) )[3];
debug(2,"DEBUG[",$subname,"]");
my %valid_args = (
'key' => 1,
'input' => 1,
'encrypt' => 1,
);
foreach my $arg (keys %args)
{
croak($subname,"(): invalid argument '",$arg,"'")
if(!$valid_args{$arg});
}
croak($subname,"(): no key provided!\n")
unless(defined $args{key});
croak($subname,"(): no input provided!\n")
unless(defined $args{input});
my $key = $args{key};
my $keylength = length($key);
my $encrypt = $args{encrypt} || 0;
my $input = $args{input};
my $output = '';
croak($subname,"(): Invalid key length (expected 16, got ",
$keylength,")!\n")
unless($keylength == 16);
my @keyarray;
my $key_xor = 0;
my $sum1 = 0;
my $sum2 = 0;
my $byte;
my $byte_xor;
my $temp_xor;
foreach my $pos (0 .. 7)
{
$byte = ord(substr($key,$pos*2,1) << 8) | ord(substr($key,$pos*2+1,1));
$keyarray[$pos] = $byte;
}
foreach my $offset ( 0 .. length($input) )
{
$temp_xor = 0;
$byte_xor = 0;
foreach my $keypos (0 .. 7)
{
$temp_xor ^= $keyarray[$keypos];
$sum2 = ($sum2 + $keypos) * 20021 + $sum1;
$sum1 = ($temp_xor * 346) & 0xFFFF;
$sum2 = ($sum1 + $sum2) & 0xFFFF;
$temp_xor = ($temp_xor * 20021 + 1) & 0xFFFF;
$byte_xor ^= $temp_xor ^ $sum2;
}
$byte = ord(substr($input,$offset,1));
if($encrypt) { $key_xor = $byte * 257; }
$byte = (($byte ^ ($byte_xor >> 8)) ^ $byte_xor) & 0xFF;
if(!$encrypt) { $key_xor = $byte * 257; };
foreach my $keypos (0 .. 7)
{
$keyarray[$keypos] ^= $key_xor;
}
$output .= chr($byte);
}
return $output;
}
=head2 C
This checks the end of a text record for extra data that should not be
made part of decompression and returns the total size of all data
fields.
=head3 Arguments
=over
=item * C
A reference to the record data
=item * C
16 bits worth of flags indicating which extra data fields are present.
=back
=cut
sub record_extradata_size
{
my %args = @_;
my $subname = ( caller(0) )[3];
debug(3,"DEBUG[",$subname,"]");
my %valid_args = (
'dataref' => 1,
'extradataflags' => 1,
);
foreach my $arg (keys %args)
{
croak($subname,"(): invalid argument '",$arg,"'")
if(!$valid_args{$arg});
}
my $dataref = $args{dataref};
croak($subname,"(): no record data provided!\n")
unless(defined $dataref);
croak($subname,"(): record data is not a reference\n") unless(ref $dataref);
croak($subname,"(): no extra data flags provided!\n")
unless(defined $args{extradataflags});
my $datalength = length($$dataref);
my $totalsize = 0;
my $trailpos = 0;
foreach my $flagbit (reverse 0..15)
{
if($args{extradataflags} & (1 << $flagbit))
{
my $bitpos = 0;
my $startpos = $trailpos;
my $traildata;
my $trailsize = 0;
my $byte;
# Bit 0 is the multi-byte character overlap flag, and has
# a different format from all other flags, where the size
# is the first two bits of the last unparsed byte of
# record data (i.e. the extra data bytes closest to the
# actual record text), plus one for the byte containing
# the size itself.
if($flagbit == 0)
{
$trailpos += 1;
$trailsize = ord(substr($$dataref,$datalength-$trailpos,1)) & 0x03;
$trailsize += 1; # The above line doesn't include the size byte itself
debug(3,"DEBUG: ",$trailsize," bytes of trailing data at flagbit ",
$flagbit,", startpos ",$startpos);
$totalsize += $trailsize;
last;
}
# For all other bits, the size is a backward-encoded
# variable-width integer at the end of the record data.
do
{
$trailpos += 1;
$byte = ord(substr($$dataref,$datalength-$trailpos,1));
$trailsize |= (($byte & 0x7f) << $bitpos);
$bitpos += 7;
}
while( !($byte & 0x80) && ($bitpos < 28) && ($trailpos < $datalength) );
$traildata = substr($$dataref,$datalength-$trailsize,$trailsize);
debug(3,"DEBUG: ",$trailsize," bytes of trailing data at flagbit ",
$flagbit,", startpos ",$startpos);
$totalsize += $trailsize;
}
}
return $totalsize;
}
=head2 C
Runs python on a copy of C if it is available (not
included with this distribution) to downconvert a Mobipocket file.
Returns the output filename on success, or undef otherwise.
=head3 Arguments
=over
=item * C
The input filename. If not specified or invalid, the procedure
returns undef.
=item * C
The output filename. If not specified, the program will use a name
based on the input file, appending '-nodrm' to the basename and
keeping the extension. In the special case of Mobipocket files ending
in '-sm', the '-sm' portion of the basename is simply removed, and
nothing else is appended.
=item * C
The PID to use to decrypt the file. If not specified or invalid, the
procedure returns undef.
=back
=cut
sub system_mobidedrm
{
my %args = @_;
my $subname = ( caller(0) )[3];
debug(2,"DEBUG[",$subname,"]");
my %valid_args = (
'infile' => 1,
'outfile' => 1,
'pid' => 1,
);
foreach my $arg (keys %args)
{
croak($subname,"(): invalid argument '",$arg,"'")
if(!$valid_args{$arg});
}
if(!$args{infile})
{
debug(1,$subname,"(): no input file specified!");
return;
}
if(! -f $args{infile})
{
debug(1,$subname,"(): input file '",$args{infile},"' not found!");
return;
}
if(! pid_is_valid($args{pid}))
{
debug(1,$subname,"(): pid '",$args{pid},"' is not valid!");
return;
}
if( !find_mobidedrm() )
{
debug(1,$subname,"(): MobiDeDRM is not available!");
return;
}
my $outfile = $args{outfile};
my $suffix;
if(!$outfile)
{
($outfile,undef,$suffix) = fileparse($args{infile},'\.\w+$');
if($outfile =~ /-sm$/ix)
{
$outfile =~ s/-sm$//ix;
$outfile .= $suffix;
}
else
{
$outfile .= '-nodrm' . $suffix;
}
}
my $retval = system('python',$mobidedrm_cmd,
$args{infile},$outfile,$args{pid});
if($retval == -1 or $retval == 256)
{
debug(1,$subname,"(): python not available!");
return;
}
if(-z $outfile)
{
debug(1,$subname,"(): MobiDeDRM produced 0-sized output!");
unlink($outfile);
return;
}
return $outfile;
}
=head2 C
Runs C to convert OPF, HTML, or ePub input into a Mobipocket
.prc/.mobi book. The procedure L is called to locate
the executable.
Returns the return value from mobigen, or undef if no filename was
specified or the file did not exist. Also returns undef if mobigen
could not be found.
=head3 Arguments
=over
=item * C
The input filename. If not specified or invalid, the procedure
returns undef.
=item * C
The output filename. The mobigen executable will choose its own
filename for direct output, but if this argument is specified, the
output file will be renamed to the specified filename instead.
If not specified, the default output will be left in place.
=item * C
The directory in which to place the output file. The mobigen
executable itself will always place its output into the current
working directory, but if this argument is specified, the output file
will be moved into the specified directory, creating that directory if
necessary.
=item * C
Compression level from 0-2, where 0 is no compression, 1 is PalmDoc
compression, and 2 is HUFF/CDIC compression. If not specified,
defaults to 1 (PalmDoc compression).
=back
=cut
sub system_mobigen
{
my %args = @_;
my $subname = ( caller(0) )[3];
debug(2,"DEBUG[",$subname,"]");
my %valid_args = (
'infile' => 1,
'outfile' => 1,
'dir' => 1,
'compression' => 1,
);
foreach my $arg (keys %args)
{
croak($subname,"(): invalid argument '",$arg,"'")
if(!$valid_args{$arg});
}
if(!$args{infile})
{
debug(1,$subname,"(): no input file specified!");
return;
}
if(! -f $args{infile})
{
debug(1,$subname,"(): input file '",$args{infile},"' not found!");
return;
}
find_mobigen();
if(!$mobigen_cmd)
{
debug(1,$subname,"(): mobigen command not specified!");
return;
}
my @mobigen = ($mobigen_cmd);
my $mobigenoutput = fileparse($args{infile},'\.\w+$') . '.mobi';
my $outfile = $args{outfile} || $mobigenoutput;
my $compression = $args{compression};
my $retval;
if(defined $compression)
{
unless($compression >= 0 and $compression <= 2)
{
croak($subname,"(): invalid compression level ",
$compression,"!\n");
}
}
else { $compression = 1; }
push(@mobigen,"-c$compression",$args{infile});
debug(2,"DEBUG: Compiling '",$args{infile},"' into '",$mobigenoutput,
"' using compression level ",$compression);
$retval = system(@mobigen);
if($retval)
{
debug(1,"WARNING: mobigen exited ",$retval);
}
if(! -f $mobigenoutput)
{
carp($subname,"(): expected output file '",$mobigenoutput,
"' not found!\n");
}
if($args{outfile})
{
rename($mobigenoutput,$args{outfile})
or carp($subname,"(): unable to rename '",$mobigenoutput,"' to '",
$args{outfile},"'!\n");
}
if($args{dir})
{
if(! -d $args{dir})
{
mkpath($args{dir})
or carp($subname,"(): unable to create directory '",
$args{dir},"'!\n");
}
rename($outfile,"$args{dir}/$outfile")
or carp($subname,"(): unable to move '",$outfile,"' into '",
$args{dir},"'!\n");
}
return $retval;
}
=head2 C
Uncompresses text compressed with the DictionaryHuffman compression
scheme.
=head3 Arguments
=over
=item * C
A scalar containing the compressed data to uncompress.
=item * C
A hashref pointing to the HUFF record data
=item * C
An arrayref pointing to the CDIC record data
=item * C
The current depth of the huffman tree, currently only used in
debugging.
=back
=cut
sub uncompress_dictionaryhuffman
{
my (%args) = @_;
my $subname = (caller(0))[3];
debug(3,"DEBUG[",$subname,"]");
my %valid_args = (
'data' => 1,
'huff' => 1,
'cdics' => 1,
'depth' => 1,
);
foreach my $arg (keys %args)
{
croak($subname,"(): invalid argument '",$arg,"'")
if(!$valid_args{$arg});
}
croak($subname,"(): no data provided!\n")
unless($args{data});
# Why does $data have to be zero-terminated this way? Sometimes it
# runs out of bits otherwise, though.
my $data = $args{data} . "\x00\x00";
my $octets = length($data);
my $depth = $args{depth} || 0;
debug(3,"DEBUG: uncompressing ",$octets," octets at depth ",$depth);
my $huffref = $args{huff};
my $cdicref = $args{cdics};
my $cdic;
my $dataoffset = 0;
my ($bits, $nextbits);
my ($cacheoffset, $cacheval);
my $codeword; # CDIC codeword
my $codebits; # Extra bits at the end of $codeword
my $codelength; # number of bits in the code
my $index; # CDIC index key
my $branch;
my $branchcode;
my $branchsize;
my $branchoffset;
my $text = '';
my $bitsize = length($data) * 8;
my $bitvector = Bit::Vector->new($bitsize);
my $bitoffset;
my $bitpos = 0;
# Unfortunately, the bitstream needs to be processed from left to
# right, and Bit::Vector really likes to process from right to
# left, so we have to process by chunk instead of using
# Block_Store
while($bitpos < $bitsize)
{
$bitoffset = $bitsize-$bitpos;
$bits = unpack('C',substr($data,$dataoffset,1));
$bitvector->Chunk_Store(8,$bitoffset-8,$bits);
$dataoffset++;
$bitpos += 8;
}
# Debugging code inside the loop is commented out because each
# subroutine call incurs substantial overhead (even when the
# debugging level is low enough that nothing is printed).
$bitpos = 0;
do
{
$bitoffset = $bitsize-$bitpos;
# debug(4,"\nDEBUG: DEPTH ",$depth," BITPOS ",$bitpos,
# " [bitoffset=",$bitoffset,"]");
$nextbits = min(8,$bitoffset);
$cacheoffset = $bitvector->Chunk_Read($nextbits,$bitoffset-$nextbits);
if($cacheoffset > $#{$huffref->{cache}})
{
croak($subname,"(): invalid HUFF cache offset ",$cacheoffset,
" found at bit position ",$bitpos,"!\n");
return $text;
}
$cacheval = $huffref->{cache}->[$cacheoffset];
# debug(4,"## cacheval[",$cacheoffset,"]=",$cacheval);
$codelength = $cacheval & 0x1F; # low 5 bits
# debug(4,"## codelength=",$codelength);
if(!$codelength)
{
croak($subname,"(): HUFF cache found zero codelength",
" at bit position ",$bitpos,"!\n");
return $text;
}
$nextbits = min(32,$bitoffset);
$bits = $bitvector->Chunk_Read($nextbits,$bitoffset-$nextbits);
# debug(4,"## bits=",sprintf("%0${nextbits}b",$bits));
if(!$bits)
{
# debug(2,"DEBUG: no more data, returning from depth ",
# $depth," with:");
# debug(2," '",excerpt_line($text),"'");
return $text;
}
if($codelength > $bitoffset)
{
carp($subname,"():\n",
"WARNING: ran out of bits at depth ",$depth,"!\n");
if($bits)
{
carp($subname,"():\n",
"supposedly out of bits, but bit data still exists!\n");
}
# debug(2,"DEBUG: returning from depth ",$depth," with '",$text,"'");
return $text;
}
$codebits = $bitvector->Chunk_Read($codelength,$bitoffset-$codelength);
# debug(4,"## codebits=",$codebits);
if($cacheval & 0x80)
{
# Codeword is unique and in the short codewords cache
$codeword = ($cacheval >> 8) - $codebits;
}
unless($cacheval & 0x80)
{
# Code is not in the cache, must be looked up from base
# table. The problem is, the codelength is not known, so
# we have to iterate through the basetable adding bits to
# the code until the code is larger than the table value.
#
# There has to be a better way to do this?
# debug(4,"## code not in cache");
while($codebits < $huffref->{basetable}->[($codelength-1)*2])
{
$codelength++;
# debug(4,"## codelength extended to ",$codelength);
$codebits = $bitvector->Chunk_Read($codelength,$bitoffset-$codelength);
}
$codeword = $huffref->{basetable}->[($codelength-1)*2+1];
$codeword -= $codebits;
}
# debug(4,"## codeword=",$codeword);
$cdic = $codeword >> $huffref->{codelength};
if($cdic > $#{$cdicref})
{
croak($subname,"(): \n HUFF entry referenced invalid CDIC ",$cdic,
" at bit position ",$bitpos,", depth ",$depth,"!\n");
return $text;
}
# debug(4,"## cdic=",$cdic);
$index = $codeword - ($cdic << $huffref->{codelength});
# debug(4,"## cdic[",$cdic,"][",$index,"]=",
# $cdicref->[$cdic]->{indexes}->[$index]);
$branchoffset = 16 + $cdicref->[$cdic]->{indexes}->[$index];
# debug(4,"## branchoffset=",$branchoffset);
# 15 lowest bits of $branchcode are the size of the leaf in
# bytes -- however, only sizes 1-127 are ever used?
# 16th bit is set if the leaf is the end of the tree and no
# recursion is needed
$branchcode = unpack('n',substr($cdicref->[$cdic]->{data},
$branchoffset,2) );
$branchsize = $branchcode & 0x7fff;
# debug(4,"## branchcode=",$branchcode," branchsize=",$branchsize,
# " branchcode&0x8000=",$branchcode & 0x8000);
if(!$branchsize || $branchsize > 127)
{
carp($subname,"():\n",
" HUFF branch at bit position ",$bitpos,", depth ",$depth,
" has size ",$branchsize," (expected 1-127)\n");
}
$branch = substr($cdicref->[$cdic]->{data},$branchoffset+2,$branchsize);
if($branchcode & 0x8000)
{
# End of tree. Append branch to text.
# debug(4,"## branch='",$branch,"'");
$text .= $branch;
}
else
{
# debug(2,"DEBUG: recursing at depth ",$depth,
# " with:");
# debug(2," '",excerpt_line($text),"'");
$text .= uncompress_dictionaryhuffman(
data => $branch,
huff => $args{huff},
cdics => $args{cdics},
depth => $depth + 1);
}
$bitpos += $codelength;
} while($bitpos < length($data) * 8);
# debug(2,"DEBUG: returning from depth ",$depth," with:");
# debug(2," '",excerpt_line($text),"'");
return $text;
}
=head2 C
Takes as an argument 4 bytes of data. If less data is provided, the
sub croaks. If more, a debug warning is provided, but the sub
continues.
In scalar context returns a language string mostly (but not entirely)
conformant to the IANA language subtag registry codes.
In list context, returns the language string, an unknown code integer,
a region code integer, and a language code integer, with the last
three being directly unpacked values.
See C<%mobilangcodes> for an exact map of values. Note that the
bottom two bits of the region code appear to be unused (i.e. the
values are all multiples of 4). The unknown code integer appears to
be unused, and is generally zero.
The original implementation by Mobipocket may have been via
Microsoft's .NET CultureInfo class. See:
L
=cut
sub unpack_mobi_language
{
my $data = shift;
my $subname = ( caller(0) )[3];
debug(2,"DEBUG[",$subname,"]");
croak($subname,"(): no language data provided")
unless($data);
croak($subname,"(): language data is too short (only ",length($data),
" bytes, need 4\n")
if(length($data) < 4);
debug(1,$subname,"(): expected 4 bytes of data, but received ",
length($data))
if(length($data) > 4);
my ($unknowncode,$regioncode,$languagecode) = unpack('nCC',$data);
my $language = parse_mobi_language($languagecode,$regioncode);
my @returnlist = ($language,$unknowncode,$regioncode,$languagecode);
if(wantarray) { return @returnlist; }
else { return $returnlist[0]; }
}
########## END CODE ##########
=head1 BUGS AND LIMITATIONS
=over
=item * Unpacking DRM-protected text isn't supported. Although
infrastructure may be added later to make use of external helpers and
plugins, direct DRM support will never be added to the main code for
legal reasons.
=item * Repacking a .prc without fully extracting to OPF and
completely converting back isn't supported. This will have to be
implemented before an interface to perform minor metadata alterations
can be implemented.
=item * Mobipocket HUFF/CDIC decoding (used mostly on dictionaries)
isn't well documented.
=item * Not all Mobipocket data is understood, so a conversion from
OPF to Mobipocket .prc back to OPF will not result in all data being
retained. Patches welcome.
=item * Mobipocket INDX, DATP, FCIS, and FLIS records are not
understood and are completely ignored
=item * Mobipocket EXTH subjectcode records may not end up attached to
the correct subject element if the number of subject records differs
from the number of subjectcode records. This is because the
Mobipocket format leaves the EXTH subjectcode records completely
unlinked from the subject records, and there is no way to detect if a
subject with no associated subjectcode comes before a subject with an
associated subjectcode.
Fortunately, this should rarely be a problem with real data, as
Mobipocket Creator only allows a single subject to be set, and the
only other way to have a subjectcode attached to a subject is to
manually edit the OPF file and insert an additional dc:Subject element
with a BASICCode attribute.
Mobipocket has indicated that they may move data currently in their
custom elements and attributes to the standard elements in a
future release, so this problem may become moot then.
=back
=head1 AUTHOR
Zed Pobre
=head1 LICENSE AND COPYRIGHT
Copyright 2008 Zed Pobre
Licensed to the public under the terms of the GNU GPL, version 2
=cut
1;
EBook-Tools-v0.4.9/lib/EBook/Tools/MSReader.pm 000444 001750 000764 20574 11672427456 20173 0 ustar 00zed proto 000000 000000 package EBook::Tools::MSReader;
use warnings; use strict; use utf8;
use English qw( -no_match_vars );
use version 0.74; our $VERSION = qv("0.4.8");
# Perl Critic overrides:
## no critic (Package variable)
# RequireBriefOpen seems to be way too brief to be useful
## no critic (RequireBriefOpen)
# Double-sigils are needed for lexical filehandles in clear print statements
## no critic (Double-sigil dereference)
=head1 NAME
EBook::Tools::MSReader - Helper code for working with Microsoft Reader (.lit) e-books.
=head1 SYNOPSIS
use EBook::Tools::MSReader qw(find_convertlit find_convertlit_keys
system_convertlit);
$EBook::Tools::MSReader::convertlit_cmd = '/opt/convertlit/clit';
$EBook::Tools::MSReader::convertlit_keys = '/opt/convertlit/keys.txt';
my $convertlit = find_convertlit();
my $keyfile = find_convertlit_keys();
system_convertlit(infile => 'myfile.lit',
dir => 'myfile-unpacked');
=cut
require Exporter;
use base qw(Exporter);
our @EXPORT_OK;
@EXPORT_OK = qw (
&find_convertlit
&find_convertlit_keys
&system_convertlit
);
our %EXPORT_TAGS = ('all' => [@EXPORT_OK]);
use Carp;
use EBook::Tools qw(debug userconfigdir);
use Encode;
use File::Basename qw(dirname fileparse);
use File::Path; # Exports 'mkpath' and 'rmtree'
binmode(STDERR,":utf8");
my $drmsupport = 0;
eval
{
require EBook::Tools::DRM;
EBook::Tools::DRM->import();
}; # Trailing semicolon is required here
unless($@){ $drmsupport = 1; }
our $convertlit_cmd = '';
our $convertlit_keys = '';
################################
########## PROCEDURES ##########
################################
=head1 PROCEDURES
All procedures are exportable, but none are exported by default.
=head2 C
Attempts to locate the convertlit executable by making a test
execution on predicted locations (including just checking PATH) and
looking in the EBook::Tools user configuration directory (see
L.
Returns the system command used for a successful invocation, or undef
if nothing worked.
This will use package variable C<$convertlit_cmd> as its first guess,
and set that variable to the return value as well.
=cut
sub find_convertlit
{
my $subname = ( caller(0) )[3];
debug(2,"DEBUG[",$subname,"]");
my @convertlit_guesses;
my $retval;
my $confdir = userconfigdir();
if($OSNAME eq 'MSWin32')
{
@convertlit_guesses = (
'clit',
'C:\Program Files\ConvertLIT\clit',
);
if($confdir)
{
push(@convertlit_guesses,
$confdir . '\clit',
$confdir . '\convertlit');
}
}
else
{
@convertlit_guesses = (
'clit',
'convertlit',
);
if($confdir)
{
push(@convertlit_guesses,
$confdir . "/clit",
$confdir . "/convertlit");
}
push(@convertlit_guesses,
'/opt/convertlit/clit',
'/opt/convertlit/convertlit',
'/opt/clit/clit',
'/opt/clit/convertlit'
);
}
unshift(@convertlit_guesses,$convertlit_cmd)
if($convertlit_cmd);
undef($convertlit_cmd);
foreach my $guess (@convertlit_guesses)
{
no warnings 'exec';
`$guess`;
# MS Windows may use 256 for a not-found code instead of -1
if($? != -1 && $? != 256)
{
debug(2,'DEBUG: `',$guess,'` returned ',$?);
$convertlit_cmd = $guess;
last;
}
}
if($convertlit_cmd)
{
debug(1,"DEBUG: Found convertlit as '",$convertlit_cmd,"'");
return $convertlit_cmd;
}
else { return; }
}
=head2 C
Attempts to locate the convertlit C file by checking
predicted filenames, both in the current working directory and in the
EBook::Tools user configuration directory (see
L.
If C<$filename> is provided, the file C will also
be checked in both locations.
Returns the name of the first file found, or undef if nothing was found.
This will use package variable C<$convertlit_keys> as its first guess,
and set that variable to the return value as well.
=cut
sub find_convertlit_keys
{
my $filename = shift;
my $subname = ( caller(0) )[3];
debug(2,"DEBUG[",$subname,"]");
my $basename;
$basename = fileparse($filename,'\.\w+$') if($filename);
my $confdir = userconfigdir();
my $keyfile;
my @guesses = ( 'keys.txt' );
push(@guesses, $basename . '-keys.txt') if($basename);
push(@guesses, $confdir . '/keys.txt') if($confdir);
push(@guesses, $confdir . '/' . $basename . '-keys.txt')
if($basename && $confdir);
unshift(@guesses,$convertlit_keys) if($convertlit_keys);
foreach my $guess (@guesses)
{
if(-f $guess)
{
$keyfile = $guess;
debug(1,"DEBUG: found convertlit keys in '",$keyfile,"'");
last;
}
}
if($keyfile)
{
$convertlit_keys = $keyfile;
return $keyfile;
}
else { return; }
}
=head2 C
Runs C to extract or downconvert a MS Reader .lit file.
The procedures L and L are
both called to locate necessary helper files.
Returns the return value from convertlit, or undef if convertlit or
the input file could not be found, or neither output file nor
directory is specified.
=head3 Arguments
=over
=item * C
The input filename. If not specified or invalid, the procedure croaks.
=item * C
The output filename. If this is specified convertlit will perform a
downconversion.
=item * C
The output directory. If this is specified, and C is not,
convertlit will perform an extraction. If both this and C
are specified, convertlit will downconvert and place the downconverted
file into the specified directory.
=item * C
The location of the C file containing the encryption keys,
if available. This is only required if the C<.lit> file is
DRM-protected and package variable C<$convertlit_keys> does not point
to the correct file.
=back
=cut
sub system_convertlit
{
my %args = @_;
my $subname = ( caller(0) )[3];
debug(2,"DEBUG[",$subname,"]");
my %valid_args = (
'infile' => 1,
'outfile' => 1,
'keyfile' => 1,
'dir' => 1,
);
foreach my $arg (keys %args)
{
croak($subname,"(): invalid argument '",$arg,"'")
if(!$valid_args{$arg});
}
if(!$args{infile})
{
debug(1,$subname,"(): no input file specified!");
return;
}
if(! -f $args{infile})
{
debug(1,$subname,"(): input file '",$args{infile},"' not found!");
return;
}
find_convertlit();
find_convertlit_keys();
croak($subname,"(): convertlit command not specified!\n")
unless($convertlit_cmd);
my @convertlit = ($convertlit_cmd);
my $retval;
my $keyfile = $args{keyfile} || $convertlit_keys;
my $outfile = $args{outfile};
my $dir = $args{dir};
if($keyfile)
{
push(@convertlit,"-k$keyfile");
}
push(@convertlit,$args{infile});
if($outfile && $dir)
{
push(@convertlit,"$dir/$outfile");
}
elsif($outfile)
{
push(@convertlit,$outfile);
}
elsif($dir)
{
# Expansion into a directory requires a trailing slash
unless($dir =~ m{(/ | \\) $}x)
{
$dir .= '/';
}
push(@convertlit,$dir);
}
else
{
debug(1,$subname,"(): neither output file nor directory specified!");
return;
}
debug(2,"DEBUG: converting lit with '",join(' ',@convertlit),"'");
$retval = system(@convertlit);
return $retval;
}
########## END CODE ##########
=head1 BUGS AND LIMITATIONS
=over
=item * All handling happens through ConvertLIT as an external helper.
Native Perl code may eventually be written to handle non-DRMed
extraction.
=item * Unit tests are unwritten
=back
=head1 AUTHOR
Zed Pobre
=head1 LICENSE AND COPYRIGHT
Copyright 2008 Zed Pobre
Licensed to the public under the terms of the GNU GPL, version 2.
ConvertLIT (not included) is copyright 2002, 2003 Dan A. Jackson, and
licensed under the terms of the GNU GPL, version 2 or later.
=cut
1;
__END__
EBook-Tools-v0.4.9/lib/EBook/Tools/IMP.pm 000444 001750 000764 251766 11672427456 17207 0 ustar 00zed proto 000000 000000 package EBook::Tools::IMP;
use warnings; use strict; use utf8;
use English qw( -no_match_vars );
use version 0.74; our $VERSION = qv("0.4.8");
# Perl Critic overrides:
## no critic (Package variable)
# RequireBriefOpen seems to be way too brief to be useful
## no critic (RequireBriefOpen)
# Double-sigils are needed for lexical filehandles in clear print statements
## no critic (Double-sigil dereference)
=head1 NAME
EBook::Tools::IMP - Object class for manipulating the SoftBook/GEB/REB/eBookWise C<.IMP> and C<.RES> e-book formats
=head1 SYNOPSIS
use EBook::Tools::IMP qw(:all)
my $imp = EBook::Tools::IMP->new();
$imp->load('myfile.imp');
=cut
require Exporter;
use base qw(Exporter);
our @EXPORT_OK;
@EXPORT_OK = qw (
&detect_resource_type
&parse_imp_resource_v1
&parse_imp_resource_v2
);
our %EXPORT_TAGS = ('all' => [@EXPORT_OK]);
use Carp;
use Cwd qw(getcwd realpath);
use EBook::Tools qw(:all);
use EBook::Tools::LZSS qw(:all);
use Encode;
use File::Basename qw(basename dirname fileparse);
use File::Path; # Exports 'mkpath' and 'rmtree'
use Image::Size;
use List::MoreUtils qw(any none);
binmode(STDERR,":utf8");
my $drmsupport = 0;
eval
{
require EBook::Tools::DRM;
EBook::Tools::DRM->import();
}; # Trailing semicolon is required here
unless($@){ $drmsupport = 1; }
# Constants for $self->{device},
use constant DEVICE_SB200 => 0; # SoftBook 200/250
use constant DEVICE_REB1200 => 1; # REB 1200/GEB 2150
use constant DEVICE_EBW1150 => 2; # EBW 1150/GEB 1150
use constant IMAGETYPES => ('png','jpg','gif','pic');
use constant IMAGERESOURCES => ('GIF ','JPEG','PICT','PIC2','PNG ');
my %IMAGE_RESOURCE_MAP = (
'GIF ' => 'gif',
'JPEG' => 'jpg',
'PICT' => 'pic',
'PIC2' => 'png',
'PNG ' => 'png',
);
####################################################
########## CONSTRUCTOR AND INITIALIZATION ##########
####################################################
my %rwfields = (
'version' => 'integer',
'filename' => 'string',
'filecount' => 'integer',
'resdirlength' => 'integer',
'resdiroffset' => 'integer',
'compression' => 'integer',
'encryption' => 'integer',
'device' => 'integer',
'zoomstates' => 'integer',
'identifier' => 'string',
'category' => 'string',
'subcategory' => 'string',
'title' => 'string',
'lastname' => 'string',
'middlename' => 'string',
'firstname' => 'string',
'etiserverdata' => 'hash', # Extra data after book properties
'resdirname' => 'string',
'RSRC.INF' => 'string',
'resfiles' => 'array', # Array of hashrefs
'toc' => 'array', # Table of Contents, array of hashes
'resources' => 'hash', # Hash of hashrefs keyed on 'type'
'lzsslengthbits' => 'integer',
'lzssoffsetbits' => 'integer',
'text' => 'string', # Uncompressed text
'imrn' => 'hash', # Hash of hashes of ImRn resource data
'gif' => 'hash', # Hash of hashes of GIF image data
'jpg' => 'hash', # Hash of hashes of JPEG image data
'pic' => 'hash', # Hash of hashes of PICT image data
'png' => 'hash', # Hash of hashes of PNG image data
'offsetelements' => 'hash', # Hash of text offsets to HTML elements
);
my %rofields = (
'unknown0x0a' => 'string',
'unknown0x18' => 'integer',
'unknown0x1c' => 'integer',
'unknown0x28' => 'integer',
'unknown0x2a' => 'integer',
'unknown0x2c' => 'integer',
);
my %privatefields = (
);
# A simple 'use fields' will not work here: use takes place inside
# BEGIN {}, so the @...fields variables won't exist.
require fields;
fields->import(
keys(%rwfields),keys(%rofields),keys(%privatefields)
);
=head1 CONSTRUCTOR AND INITIALIZATION
=head2 C
Instantiates a new EBook::Tools::IMP object. If C<$filename> is
specified, it will also immediately initialize itself via the C
method.
=cut
sub new ## no critic (Always unpack @_ first)
{
my $self = shift;
my $class = ref($self) || $self;
my ($filename) = @_;
my $subname = (caller(0))[3];
debug(2,"DEBUG[",$subname,"]");
$self = fields::new($class);
if($filename)
{
$self->{filename} = $filename;
$self->load();
}
return $self;
}
=head2 C
Loads a .imp file, parsing it into the various object attributes.
Returns 1 on success, or undef on failure.
=cut
sub load :method
{
my $self = shift;
my ($filename) = @_;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
if(!$self->{filename} and !$filename)
{
carp($subname,"(): no filename specified!\n");
return;
}
$self->{filename} = $filename if($filename);
$filename = $self->{filename} if(!$filename);
my $fh_imp;
my $headerdata;
my $bookpropdata;
my $retval;
my $toc_size;
my $tocdata;
my $entrydata;
my $resource; # Hashref
if(! -f $filename)
{
carp($subname,"(): '",$filename,"' not found!\n");
return;
}
open($fh_imp,'<:raw',$filename)
or croak($subname,"(): unable to open '",$filename,
"' for reading!\n");
sysread($fh_imp,$headerdata,48);
$retval = $self->parse_imp_header($headerdata);
if(!$retval)
{
carp($subname,"(): '",$filename,"' is not an IMP file!\n");
return;
}
if(!$self->{resdiroffset})
{
carp($subname,"(): '",$filename,"' has no res dir offset!\n");
return;
}
my $bookproplength = $self->{resdiroffset} - 24;
sysread($fh_imp,$bookpropdata,$bookproplength);
$retval = $self->parse_imp_book_properties($bookpropdata);
if(!$self->{resdirlength})
{
carp($subname,"(): '",$filename,"' has no directory name!\n");
return;
}
sysread($fh_imp,$self->{resdirname},$self->{resdirlength});
debug(1,"DEBUG: resource directory = '",$self->{resdirname},"'");
if($self->{version} == 1)
{
$toc_size = 10 * $self->{filecount};
sysread($fh_imp,$tocdata,$toc_size)
or croak($subname,"(): unable to read TOC data!\n");
$self->parse_imp_toc_v1($tocdata);
$self->{resources} = ();
foreach my $entry (@{$self->{toc}})
{
sysread($fh_imp,$entrydata,$entry->{size}+10);
$resource = parse_imp_resource_v1($entrydata);
if($resource->{type} ne $entry->{type})
{
carp($subname,"():\n",
" '",$entry->{type},"' TOC entry pointed to '",
$resource->{type},"' resource!\n");
}
$self->{resources}->{$resource->{type}} = $resource;
}
}
elsif($self->{version} == 2)
{
$toc_size = 20 * $self->{filecount};
sysread($fh_imp,$tocdata,$toc_size)
or croak($subname,"(): unable to read TOC data!\n");
$self->parse_imp_toc_v2($tocdata);
$self->{resources} = ();
foreach my $entry (@{$self->{toc}})
{
sysread($fh_imp,$entrydata,$entry->{size}+20);
$resource = parse_imp_resource_v2($entrydata);
$self->{resources}->{$resource->{type}} = $resource;
}
}
else
{
carp($subname,"(): IMP version ",$self->{version}," not supported!\n");
return;
}
$self->parse_resource_images();
$self->parse_resource_imrn();
$self->parse_text();
close($fh_imp)
or croak($subname,"(): failed to close '",$filename,"'!\n");
debug(3,$self->{text});
return 1;
}
=head2 C
Loads a C<.RES> resource directory, parsing it into the object
attributes. Returns 1 on success, or undef on failure.
=cut
sub load_resdir
{
my $self = shift;
my ($dirname) = @_;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
if(!$dirname)
{
carp($subname,"(): no resource directory specified!\n");
return;
}
if(! -d $dirname)
{
carp($subname,"(): resource directory '",$dirname,"' not found!\n");
return;
}
if(! -f $dirname . '/DATA.FRK')
{
carp($subname,"():\n",
" resource directory '",$dirname,"' has no text resource!\n");
return;
}
if(! -f $dirname . '/RSRC.INF')
{
carp($subname,"()\n",
" resource directory '",$dirname,"' has no RSRC.INF!\n");
return;
}
my $fh_resource;
my $rsrcinf;
my @list;
open($fh_resource,'<:raw',$dirname . '/RSRC.INF')
or croak($subname,"():\n",
" unable to open '$dirname/RSRC.INF' for reading!\n");
sysread($fh_resource,$rsrcinf,-s "$dirname/RSRC.INF");
close($fh_resource)
or croak($subname,"():\n",
" unable to close '$dirname/RSRC.INF'!\n");
if(length($rsrcinf) < 48)
{
carp($subname,"():\n",
" RSRC.INF is too short (only ",length($rsrcinf)," bytes)!\n");
return;
}
if(substr($rsrcinf,2,8) ne 'BOOKDOUG')
{
carp($subname,"():\n",
" RSRC.INF does not contain a valid header!\n");
return;
}
$self->{resdirname} = basename($dirname);
$self->{resdirlength} = length($self->{resdirname});
debug(2,"DEBUG: IMP resdir name = ",$self->{resdirname});
# We have no idea what to put here, so fill it with nulls
$self->{unknown0x0a} = "\x00\x00\x00\x00\x00\x00\x00\x00";
# No matter what the RSRC.INF says, we're going to use a v2 format
$self->{version} = 2;
@list = unpack('nNNNNnCCN',substr($rsrcinf,10,26));
$self->{resdiroffset} = $list[0];
$self->{unknown0x18} = $list[1];
$self->{unknown0x1c} = $list[2];
$self->{compression} = $list[3];
$self->{encryption} = $list[4];
$self->{unknown0x28} = $list[5];
$self->{unknown0x2a} = $list[6];
$self->{device} = $list[7] >> 4;
$self->{zoomstates} = $list[7] & 0x0f;
$self->{unknown0x2c} = $list[8];
debug(2,"DEBUG: IMP resdir offset = ",$self->{resdiroffset});
debug(2,"DEBUG: Unknown 0x18 = ",$self->{unknown0x18});
debug(2,"DEBUG: Unknown 0x1c = ",$self->{unknown0x1c});
debug(2,"DEBUG: IMP compression = ",$self->{compression});
debug(2,"DEBUG: IMP encryption = ",$self->{encryption});
debug(2,"DEBUG: Unknown 0x28 = ",$self->{unknown0x28});
debug(2,"DEBUG: Unknown 0x2A = ",$self->{unknown0x2a});
debug(2,"DEBUG: IMP device = ",$self->{device});
debug(2,"DEBUG: IMP zoom state = ",$self->{zoomstates});
debug(2,"DEBUG: Unknown 0x2c = ",$self->{unknown0x2c});
@list = unpack('Z*Z*Z*Z*Z*Z*Z*',substr($rsrcinf,36));
$self->{identifier} = $list[0];
$self->{category} = $list[1];
$self->{subcategory} = $list[2];
$self->{title} = $list[3];
$self->{lastname} = $list[4];
$self->{middlename} = $list[5];
$self->{firstname} = $list[6];
my $proplength = $self->bookproplength;
if(length($rsrcinf) > $proplength + 36)
{
debug(1,"Book properties data has extra ETI server data appended");
$self->parse_eti_server_data(substr($rsrcinf,$proplength + 36));
}
my $cwd = getcwd();
if(! chdir($dirname))
{
carp($subname,"(): unable to enter directory '",$dirname,"'!\n");
return;
}
my @filelist = <*>;
$self->{resources} = {};
$self->{toc} = ();
foreach my $file (@filelist)
{
my $resdata;
my %resource;
my %tocentry;
next if($file eq 'RSRC.INF');
unless($file =~ /^ ([A-Z]{4} | DATA\.FRK) $/x)
{
debug(1,"DEBUG: invalid resource filename '",$file,
"' -- skipping");
next;
}
if(-z $file)
{
debug(1,"DEBUG: resource file '",$file,
"' has zero size -- skipping");
next;
}
open($fh_resource,'<:raw',$file)
or croak($subname,"():\n",
" unable to open '",$file,"' for reading!\n");
sysread($fh_resource,$resdata,-s $file);
close($fh_resource)
or croak($subname,"(): unable to close '",$file,"'!\n");
if($file eq 'DATA.FRK')
{
$resource{name} = ' ';
$resource{type} = ' ';
}
else
{
$resource{name} = $file;
$resource{type} = detect_resource_type(\$resdata);
}
if(! $resource{type})
{
debug(1,"DEBUG: unable to determine resource type for file '",
$file,"' -- skipping");
next;
}
$resource{unknown1} = 0;
$resource{unknown2} = 0;
$resource{size} = length($resdata);
%tocentry = %resource;
push(@{$self->{toc}},\%tocentry);
$resource{data} = $resdata;
$self->{resources}->{$resource{type}} = \%resource;
debug(2,"DEBUG: found resource '",$resource{name},
"', type '",$resource{type},"' [",$resource{size}," bytes]");
}
chdir($cwd);
$self->parse_resource_images();
$self->parse_resource_imrn();
$self->parse_text();
return 1;
}
######################################
########## ACCESSOR METHODS ##########
######################################
=head2 C
Returns the full name of the author of the book.
Author information can either be found entirely in the
C<< $self->{firstname} >> attribute or split up into
C<< $self->{firstname} >>, C<< $self->{middlename} >>, and
C<< $self->{lastname} >>. If the last name is found separately,
the full name is returned in the format "Last, First Middle".
Otherwise, the full name is returned in the format "First Middle".
=cut
sub author :method
{
my $self = shift;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
my $author;
if($self->{lastname})
{
$author = $self->{lastname};
if($self->{firstname})
{
$author .= ", " . $self->{firstname};
$author .= " " . $self->{middlename} if($self->{middlename});
}
}
else
{
$author = $self->{firstname};
$author .= " " . $self->{middlename} if($self->{middlename});
}
return $author;
}
=head2 C
Returns the total length in bytes of the book properties data,
including the trailing null used to pack the C-style strings, but
excluding any ETI server data appended to the end of the standard book
properties.
=cut
sub bookproplength :method
{
my $self = shift;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
my $length = 0;
$length += length($self->{identifier}) + 1;
$length += length($self->{category}) + 1;
$length += length($self->{subcategory}) + 1;
$length += length($self->{title}) + 1;
$length += length($self->{lastname}) + 1;
$length += length($self->{middlename}) + 1;
$length += length($self->{firstname}) + 1;
return $length;
}
=head2 C
Returns the number of resource files as stored in
C<< $self->{filecount} >>. Note that this does NOT recompute that value
from the actual number of resources in C<< $self->{resources} >>. To do
that, use L.
=cut
sub filecount :method
{
my $self = shift;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
return $self->{filecount};
}
=head2 C
Goes through all stored images searching for one with the specified id
value, returning the first image type found or undef if there were no
matches or if no image id was specified. If the optional argument
C<@excluded> is specified, any types in the list will be skipped
during the search.
Expected types are 'png', 'jpg', 'gif', and 'pic', searched for in
that order.
This can be used to attempt to locate an alternate image for an
undisplayable PICT image.
=cut
sub find_image_type :method
{
my $self = shift;
my ($id,@excluded) = @_;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
return unless($id);
foreach my $type (IMAGETYPES)
{
next if(any {$type eq $_} @excluded);
return $type if($self->{$type}->{$id});
}
return;
}
=head2 C
Takes as a single argument a resource name and if a resource with that
name exists in C<< $self->{resources} >> returns the resource type
used as the hash key.
Returns undef if no match was found or a name was not specified.
=cut
sub find_resource_by_name :method
{
my $self = shift;
my ($name) = @_;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
return unless($name);
return unless($self->{resources});
foreach my $type (keys %{$self->{resources}})
{
return $type if($self->{resources}->{$type}->{name} eq $name);
}
return;
}
=head2 C
Returns the image data stored in the resource of the specified type
(specifically, stored in C<< $self->{$type}->{$id}->{data} >> as
parsed from the JPEG resource) corresponding to the 16-bit identifier
provided as C<$id>.
Valid values for C<$type> are 'gif','jpg', and 'png'.
Carps a warning and returns undef if C<$type> is not provided or is
not valid, or if C<$id> is not provided.
=cut
sub image :method
{
my $self = shift;
my ($type,$id) = @_;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
if(!$type)
{
carp($subname,"(): no image type specified!\n");
return;
}
if(none { $type eq $_ } IMAGETYPES)
{
carp($subname,"(): invalid image type '",$type,"'!\n");
return;
}
if(!$id)
{
carp($subname,"(): ID not specified!\n");
return;
}
return $self->{$type}->{$id}->{data};
}
=head2 C
Returns the raw object hashref used to store parsed image data for the
specified type, as stored in C<< $self->{$type} >>. Valid types are
'gif', 'jpg', and 'png'.
Carps a warning and returns undef if C<$type> is not provided or is
not valid.
If C<$id> is not specified, the keys of the returned hash are the
image IDs for the specified image type, and the values are hashrefs
pointing to hashes containing the following keys:
=over
=item * C
A 16-bit integer only available on EBW 1150 resources. Use with
caution. This key may be renamed if more information is found.
=item * C
The length of the actual image data
=item * C
The byte offset inside of the raw resource data in which the JPEG
image data can be found.
=item * C
An unknown value, but it appears to always be zero. Use with caution.
This key may be renamed if more information is found.
=back
If the optional argument C<$id> is specified, only the hash for that
specific ID is returned, rather than the entire hash of hashrefs.
=cut
sub image_hashref :method
{
my $self = shift;
my ($type,$id) = @_;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
if(!$type)
{
carp($subname,"(): no image type specified!\n");
return;
}
if(none { $type eq $_ } IMAGETYPES)
{
carp($subname,"(): invalid image type '",$type,"'!\n");
return;
}
if($id)
{
return $self->{$type}->{$id};
}
return $self->{$type};
}
=head2 C
Returns a list of the 16-bit integer IDs of the the specified type of
image data stored in the associated resource (specifically, stored in
C<< $self->{$type} >> as parsed from the JPEG resource).
Valid types are 'gif', 'jpg', and 'png'. The method will carp a
warning and return undef if another type is specified, or no type is
specified.
=cut
sub image_ids :method
{
my $self = shift;
my ($type) = @_;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
if(!$type)
{
carp($subname,"(): no image type specified!\n");
return;
}
if(none { $type eq $_ } IMAGETYPES)
{
carp($subname,"(): invalid image type '",$type,"'!\n");
return;
}
return keys %{$self->{$type}};
}
=head2 C
Returns 1 if C<< $self->{device} == 2 >>, returns 0 if it is some
other value, and undef it is undefined. This has value because
resources packed for a EBW 1150 or GEB 1150 are in a different format
than resources packed for other IMP readers.
=cut
sub is_1150
{
my $self = shift;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
return if(!defined $self->{device});
return 1 if($self->{device} == 2);
return 0;
}
=head2 C
Returns the text of the element corresponding to the given text offset
as stored in C<< $self->{offsetelements} >>, or undef if no such
element exists.
=cut
sub offsetelement :method
{
my $self = shift;
my ($offset) = @_;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
return unless($offset);
return unless($self->{offsetelements});
return $self->{offsetelements}->{$offset};
}
=head2 C
Packs object attributes into the 7 null-terminated strings that
constitute the book properties section of the header. Returns that
string.
Note that this does NOT pack the ETI server data appended to this
section in encrypted books downloaded directly from the ETI servers,
even if that data was found when the .imp file was loaded. This is
because the extra data can confuse the GEBLibrarian application, and
is not needed to read the book. The L and
L methods also assume that this data will not be
present.
=cut
sub pack_imp_book_properties :method
{
my $self = shift;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
my $bookpropdata = pack("Z*Z*Z*Z*Z*Z*Z*",
$self->{identifier},
$self->{category},
$self->{subcategory},
$self->{title},
$self->{lastname},
$self->{middlename},
$self->{firstname});
return $bookpropdata;
}
=head2 C
Packs object attributes into the 48-byte string representing the IMP
header. Returns that string on success, carps a warning and returns
undef if a required attribute did not contain valid data.
Note that in the case of an encrypted e-book with ETI server data in
it, this header will not be identical to the original -- the
resdiroffset value is recalculated for the position with the ETI
server data stripped. See L and
L.
=cut
sub pack_imp_header :method
{
my $self = shift;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
my $header;
my $filecount = scalar(keys %{$self->{resources}});
my $resdir = $self->{resdirname};
if(!$filecount)
{
carp($subname,"():\n",
" No resources found (has a file been loaded?)\n");
return;
}
if(!$resdir)
{
carp($subname,"():\n",
" No resource directory name specified!\n");
return;
}
if(!$self->{version})
{
carp($subname,"():\n",
" No version specified (has a file been loaded?)\n");
return;
}
if($self->{version} > 2)
{
carp($subname,"():\n",
" invalid version ",$self->{version},"\n");
return;
}
$header = pack('n',$self->{version});
$header .= 'BOOKDOUG';
if(! $self->{unknown0x0a}
or length($self->{unknown0x0a}) != 8)
{
carp($subname,"():\n",
" unknown data at 0x0a has incorrect length",
" -- substituting nulls\n");
$self->{unknown0x0a} = "\x00\x00\x00\x00\x00\x00\x00\x00";
}
$header .= $self->{unknown0x0a};
$header .= pack('nn',$filecount,length($resdir));
$header .= pack('n',$self->bookproplength + 24);
$header .= pack('NN',$self->{unknown0x18},$self->{unknown0x1c});
$header .= pack('NN',$self->{compression},$self->{encryption});
$header .= pack('nC',$self->{unknown0x28},$self->{unknown0x2a});
$header .= pack('C',$self->{device} * 16 + $self->{zoomstates});
$header .= pack('N',$self->{unknown0x2c});
if(length($header) != 48)
{
croak($subname,"():\n",
" total header length not 48 bytes (found ",
length($header),")\n");
}
return $header;
}
=head2 C
Packs the specified resource stored in C<< $self->{resources} >> into
a a data string suitable for writing into a .imp file, with a header
format determined by C<< $self->{version} >>.
Returns a reference to that string if the resource was found, or undef
it was not.
=head3 Arguments
=over
=item * C
Select the resource by resource name.
If both this and C are specified, the type is checked first and
the name is only used if the type lookup fails.
=item * C
Select the resource by resource type. This is faster than selecting
by name (since resources are stored in a hash keyed by type) and is
recommended for most use.
If both this and C are specified, the type is checked first and
the name is only used if the type lookup fails.
=back
=cut
sub pack_imp_resource :method
{
my $self = shift;
my %args = @_;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
my %valid_args = (
'name' => 1,
'type' => 1,
);
foreach my $arg (keys %args)
{
croak($subname,"(): invalid argument '",$arg,"'")
if(!$valid_args{$arg});
}
if(!$args{name} and !$args{type})
{
carp($subname,"():\n",
" at least one of name or type must be specified!\n");
return;
}
my $type = $args{type};
my $resource;
my $resdata;
if(!($type and $self->{resources}->{$type}) and $args{name})
{
$type = $self->find_resource_by_name($args{name});
if(!$type or !$self->{resources}->{$type})
{
carp($subname,"():\n",
" no resource with name '",$args{name},"' found!\n");
return;
}
}
if(!$self->{resources}->{$type})
{
carp($subname,"()\n",
" no resource with type '",$args{type},"' found!\n");
return;
}
$resource = $self->{resources}->{$type};
if($self->{version} == 1)
{
$resdata = pack('a[4]nN',
$resource->{name},
$resource->{unknown1},
$resource->{size});
$resdata .= $resource->{data};
}
elsif($self->{version} == 2)
{
$resdata = pack('a[4]NNa[4]N',
$resource->{name},
$resource->{unknown1},
$resource->{size},
$resource->{type},
$resource->{unknown2});
$resdata .= $resource->{data};
}
else
{
carp($subname,"(): invalid version ",$self->{version},"!\n");
return;
}
if(!$resdata)
{
carp($subname,"(): no resource data packed!\n");
return;
}
return \$resdata;
}
=head2 C
Packs object attributes into the data string that would be the content
of the RSRC.INF file. Returns that string.
=cut
sub pack_imp_rsrc_inf :method
{
my $self = shift;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
my $rsrc;
# Data from header
$rsrc = pack('na[8]n',1,'BOOKDOUG',$self->{resdiroffset});
$rsrc .= pack('NNNNnCCN',
$self->{unknown0x18},$self->{unknown0x1c},
$self->{compression},$self->{encryption},
$self->{unknown0x28},$self->{unknown0x2a},
($self->{device} * 16) + $self->{zoomstates},
$self->{unknown0x2c});
# Data from book properties
$rsrc .= pack('Z*',$self->{identifier});
$rsrc .= pack('Z*Z*Z*',
$self->{category},$self->{subcategory},$self->{title});
$rsrc .= pack('Z*Z*Z*',
$self->{lastname},$self->{middlename},$self->{firstname});
if($self->{etiserverdata})
{
my $length = length($rsrc);
my $padsize = length($self->{etiserverdata}->{pad});
# Pad must result in the following record being 4-byte aligned
if( ($length + $padsize) % 4 )
{
carp($subname,"():\n",
" ETI server data has invalid pad, regenerating it...\n");
undef($self->{etiserverdata}->{pad});
$padsize = $length % 4;
if($padsize)
{
$padsize = 4 - $padsize;
$self->{etiserverdata}->{pad} = pack("a[$padsize]","\0");
}
}
$rsrc .= $self->{etiserverdata}->{pad};
$rsrc .= pack('NNZ*Z*',
$self->{etiserverdata}->{unknown1},
$self->{etiserverdata}->{issuenumber},
$self->{etiserverdata}->{contentfeed},
$self->{etiserverdata}->{source});
if($self->{etiserverdata}->{unknown2})
{
$rsrc .= pack('N',$self->{etiserverdata}->{unknown2});
}
} # if($self->{etiserverdata}
return $rsrc;
}
=head2 C
Packs the C<< $self->{toc} >> object attribute into a data string
suitable for writing into a .imp file. The format is determined by
C<< $self->{version} >>.
Returns that string, or undef if valid version or TOC data is not
found.
=cut
sub pack_imp_toc :method
{
my $self = shift;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
my $tocdata;
if(!$self->{version})
{
carp($subname,"():\n",
" no version information found (did you load a file first?)\n");
return;
}
if($self->{version} > 2)
{
carp($subname,"():\n",
" invalid version ",$self->{version},"!\n");
return;
}
if(!$self->{toc})
{
carp($subname,"(): no TOC data found!\n");
return;
}
foreach my $entry (@{$self->{toc}})
{
if($self->{version} == 1)
{
$tocdata .= pack('a[4]nN',
$entry->{name},
$entry->{unknown1},
$entry->{size});
}
elsif($self->{version} == 2)
{
$tocdata .= pack('a[4]NNa[4]N',
$entry->{name},
$entry->{unknown1},
$entry->{size},
$entry->{type},
$entry->{unknown2});
}
}
if(!length($tocdata))
{
carp($subname,"(): no valid TOC data produced!\n");
return;
}
return $tocdata;
}
=head2 C
In scalar context, this returns the basename of C<< $self->{resdirname} >>.
In list context, it actually returns the basename, directory, and
extension as per C from L.
=cut
sub resdirbase :method
{
my $self = shift;
return fileparse($self->{resdirname},'\.\w+$');
}
=head2 C
Returns the length of the .RES directory name as stored in
C<< $self->{resdirlength} >>. Note that this does NOT recompute the
length from the actual name stored in C<< $self->{resdirname} >> --
for that, use L.
=cut
sub resdirlength :method
{
my $self = shift;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
return $self->{resdirlength};
}
=head2 C
Returns the .RES directory name stored in C<< $self->{resdirname} >>.
=cut
sub resdirname :method
{
my $self = shift;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
return $self->{resdirname};
}
=head2 C
Returns a hashref containing the resource data for the specified
resource type, as stored in C<< $self->{resources}->{$type} >>.
Returns undef if C<$type> is not specified, or if the specified type
is not found.
=cut
sub resource :method
{
my $self = shift;
my ($type) = @_;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
return unless($type);
return $self->{resources}->{$type};
}
=head2 C
Returns a hashref of hashrefs containing all of the resource data
keyed by type, as stored in C<< $self->{resources} >>.
=cut
sub resources :method
{
my $self = shift;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
return $self->{resources};
}
=head2 C
Returns the uncompressed text originally stored in the DATA.FRK
(C<' '>) resource. This will only work if the text was unencrypted.
=cut
sub text :method
{
my $self = shift;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
return $self->{text};
}
=head2 C
Returns the book title as stored in C<< $self->{title} >>.
=cut
sub title :method
{
my $self = shift;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
return $self->{title};
}
=head2 C
Takes as a single argument an integer index to the table of contents
data stored in C<< $self->{toc} >>. Returns the hashref corresponding
to that TOC entry, if it exists, or undef otherwise.
=cut
sub tocentry :method
{
my $self = shift;
my ($index) = @_;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
return $self->{toc}->[$index];
}
=head2 C
Returns the version of the IMP format used to determine TOC and
resource metadata size as stored in C<< $self->{version} >>. Expected
values are 1 (10-byte metadata) and 2 (20-byte metadata).
=cut
sub version :method
{
my $self = shift;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
return $self->{version};
}
=head2 C
Writes the images, if any, to the specified output directory.
Filenames are in the format C or C where
C is the image ID for that image type formatted as four
hexadecimal characters.
=head3 Arguments
=over
=item * C
The output directory in which to write the file. This will be created
if it does not exist. Defaults to the basename of the stored resource
directory (see also L).
=back
=cut
sub write_images :method
{
my $self = shift;
my %args = @_;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
my %valid_args = (
'dir' => 1,
);
foreach my $arg (keys %args)
{
croak($subname,"(): invalid argument '",$arg,"'")
if(!$valid_args{$arg});
}
my $dirname = $args{dir} || $self->resdirbase;
my $cwd = usedir($dirname);
foreach my $imagetype (IMAGETYPES)
{
foreach my $id (keys %{$self->{$imagetype}})
{
my $hexid = sprintf('%04X',$id);
my $prefix = uc($imagetype) . '_';
my $filename = "${prefix}${hexid}.${imagetype}";
my $fh_image;
if(! $self->{$imagetype}->{$id})
{
carp($subname,"(): data for image 0x",$hexid," not found!\n");
next;
}
if(!open($fh_image,'>:raw',$filename))
{
carp($subname,"():\n",
" unable to open '",$filename,"' for writing!\n");
return;
}
print {*$fh_image} $self->{$imagetype}->{$id}->{data};
if(!close($fh_image))
{
carp($subname,"():\n",
" unable to close '",$filename,"'!\n");
return;
}
} # foreach my $id (keys %{$self->{$imagetype}})
}
chdir($cwd);
return 1;
}
=head2 C
Takes as a sole argument the name of a file to write to, and writes a
.imp file to that filename using the object attribute data.
Returns 1 on success, or undef if required data (including the
filename) was invalid or missing, or the file could not be written.
=cut
sub write_imp :method
{
my $self = shift;
my ($filename) = @_;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
return unless($filename);
my $fh_imp;
if(!open($fh_imp,'>:raw',$filename))
{
carp($subname,"():\n",
" unable to open '",$filename,"' for writing!\n");
return;
}
my $headerdata = $self->pack_imp_header();
my $bookpropdata = $self->pack_imp_book_properties();
my $tocdata = $self->pack_imp_toc;
if(!$headerdata or length($headerdata) != 48)
{
carp($subname,"(): invalid header data!\n");
return;
}
if(!$bookpropdata)
{
carp($subname,"(): invalid book properties data!\n");
return;
}
if(!$tocdata)
{
carp($subname,"(): invalid table of contents data!\n");
return;
}
if(!$self->{resdirname})
{
carp($subname,"(): invalid .RES directory name!\n");
return;
}
if(!scalar(keys %{$self->{resources}}))
{
carp($subname,"(): no resources found!\n");
return;
}
print {*$fh_imp} $headerdata;
print {*$fh_imp} $bookpropdata;
print {*$fh_imp} $self->{resdirname};
print {*$fh_imp} $tocdata;
foreach my $tocentry (@{$self->{toc}})
{
print {*$fh_imp} ${$self->pack_imp_resource(type => $tocentry->{type})};
}
return 1;
}
=head2 C
Writes a C<.RES> resource directory from the object attribute data,
using C<< $self->{resdirname} >> as the directory name.
=cut
sub write_resdir :method
{
my $self = shift;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
if(!$self->{resdirname})
{
carp($subname,"(): .RES directory name not known!\n");
return;
}
my $cwd = getcwd();
my $fh_resource;
mkpath($self->{resdirname});
if(! -d $self->{resdirname})
{
croak($subname,"():\n",
" unable to create .RES directory '",$self->{resdirname},
"'!\n");
}
chdir($self->{resdirname});
$self->{'RSRC.INF'} = $self->pack_imp_rsrc_inf;
if($self->{'RSRC.INF'})
{
open($fh_resource,'>:raw','RSRC.INF')
or croak($subname,"():\n",
" unable to open 'RSRC.INF' for writing!\n");
print {*$fh_resource} $self->{'RSRC.INF'};
close($fh_resource)
or croak($subname,"():\n",
" unable to close 'RSRC.INF'!\n");
}
else
{
carp($subname,"():\n",
" WARNING: no RSRC.INF data found!\n");
}
foreach my $restype (keys %{$self->{resources}})
{
my $filename = $self->{resources}->{$restype}->{name};
$filename = 'DATA.FRK' if($filename eq ' ');
open($fh_resource,'>:raw',$filename)
or croak($subname,"():\n",
" unable to open '",$filename,"' for writing!\n");
print {*$fh_resource} $self->{resources}->{$restype}->{data};
close($fh_resource)
or croak($subname,"():\n",
" unable to close '",$filename,"'!\n");
}
chdir($cwd);
return 1;
}
=head2 C
Writes the uncompressed text, if any, to the specified output
directory and file.
=head3 Arguments
=over
=item * C
The output directory in which to write the file. This will be created
if it does not exist. Defaults to the basename of the stored resource
directory (see also L).
=item * C
The filename of the output file to write. If not specified, a warning
will be carped and the method will return undef.
=back
=cut
sub write_text :method
{
my $self = shift;
my %args = @_;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
my %valid_args = (
'dir' => 1,
'filename' => 1,
);
foreach my $arg (keys %args)
{
croak($subname,"(): invalid argument '",$arg,"'")
if(!$valid_args{$arg});
}
if(!$self->{text})
{
carp($subname,"(): no text to write!\n");
return;
}
my $dirname = $args{dir} || $self->resdirbase;
my $filename = $args{filename} || $self->resdirbase . '.html';
$filename = $dirname . '/' . $filename;
my $fh_text;
mkpath($dirname) if(! -d $dirname);
if(! -d $dirname)
{
carp($subname,"(): unable to create directory '",$dirname,"'!\n");
return;
}
if(!open($fh_text,'>:raw',$filename))
{
carp($subname,"(): unable to open '",$filename,"' for writing!\n");
return;
}
print {*$fh_text} $self->text;
if(!close($fh_text))
{
carp($subname,"(): unable to close '",$filename,"'!\n");
return;
}
return 1;
}
######################################
########## MODIFIER METHODS ##########
######################################
=head2 C
Creates appropriate table of contents data from the metadata in
C<< $self->{resources} >>, in the format specified by
C<< $self->{version} >>. This will also set C<< $self->{filecount} >>
to match the actual number of resources.
Returns the number of resources found.
=cut
sub create_toc_from_resources :method
{
my $self = shift;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
$self->{toc} = ();
return 0 unless($self->{resources});
foreach my $type (sort keys %{$self->{resources}})
{
my %tocentry;
$tocentry{name} = $self->{resources}->{$type}->{name};
$tocentry{type} = $type;
$tocentry{size} = length($self->{resources}->{$type}->{data});
$tocentry{unknown1} = $self->{resources}->{$type}->{unknown1};
$tocentry{unknown2} = $self->{resources}->{$type}->{unknown2};
push(@{$self->{toc}},\%tocentry);
}
$self->{filecount} = scalar($self->{toc});
debug(2,"DEBUG: created TOC data from ",$self->{filecount}," records");
return $self->{filecount};
}
=head2 C
Parses ETI server data, as potentially found appended to the end of
.imp book properties or a RSRC.INF resource file on encrypted books
downloaded directly from ETI servers.
Takes as a single argument a string containing just the extra appended
data, and stores the parsed values in C<< $self->{etiserverdata} >> as
a hash. Note that parsing requires knowledge of the length of the
book properties at the time this data was inserted; if the book
properties have not been properly parsed or have been modified, the
resulting behaviour of this method is not defined.
Returns the number of bytes handled, zero if no data was provided.
The data has the following format and keys:
=over
=item * [0-3 bytes]: padding data to make sure the following data is
4-byte aligned, stored in key C.
=item * [4 bytes, big-endian unsigned long int]: unknown value,
usually = 2, stored in key C
=item * [4 bytes, big-endian unsigned long int]: issue number for
periodicals (always 0xffffffff for books), stored in key
C.
=item * [variable-length null-terminated string]: content feed for
periodicals, null string for books, stored in key C.
=item * [variable-length null-terminated string]: source string in the
format C<'SOURCE_ID:SOURCE_TYPE:None'>, where C is usually
'3' and C is usually 'B'.
=item * [4 bytes, big-endian unsigned long int]: unknown value, stored
in key C. This value may not be present at all.
=back
=cut
sub parse_eti_server_data :method
{
my $self = shift;
my ($data) = @_;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
return 0 unless($data);
my $proplength = $self->bookproplength;
my $length = length($data);
if($length < 10)
{
carp($subname,"():\n",
" data is too short to contain ETI server data! [",
$length," bytes]\n");
return 0;
}
$self->{etiserverdata} = {};
# Up to 3 bytes of padding to make sure that the following data is
# 4-byte aligned.
my $padlength = $proplength % 4;
my @list;
if($padlength)
{
$padlength = 4 - $padlength;
$self->{etiserverdata}->{pad} = substr($data,0,$padlength);
$proplength += $padlength;
}
@list = unpack('NNZ*Z*N',substr($data,$padlength));
$self->{etiserverdata}->{unknown1} = $list[0];
$self->{etiserverdata}->{issuenumber} = $list[1];
$self->{etiserverdata}->{contentfeed} = $list[2];
$self->{etiserverdata}->{source} = $list[3];
$self->{etiserverdata}->{unknown2} = $list[4];
debug(2," pad=",hexstring($self->{etiserverdata}->{pad}))
if($self->{etiserverdata}->{pad});
debug(2,
" unknown1=",$list[0]," \t\tissuenumber=",$list[1],"\n",
" contentfeed='",$list[2],"' \tsource='",$list[3],"'");
debug(2," unknown2=",$list[4]) if(defined $list[4]);
return($length);
}
=head2 C
Takes as a single argument a string containing the book properties
data. Sets the object variables from its contents, which should be
seven null-terminated strings in the following order:
=over
=item * Identifier
=item * Category
=item * Subcategory
=item * Title
=item * Last Name
=item * Middle Name
=item * First Name
=back
Note that the entire name is frequently placed into the "First Name"
component, and the "Last Name" and "Middle Name" components are left
blank.
In addition, ETI server data may be appended to this data on encrypted
books downloaded from ETI servers. If present, that data will be
stored in the hash C<< $self->{etiserverdata} >>. See
L for details.
A warning will be carped if the length of the parsed properties
(including the C null string terminators) is not equal to the length
of the data passed.
=cut
sub parse_imp_book_properties :method
{
my $self = shift;
my ($propdata) = @_;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
my @properties = unpack("Z*Z*Z*Z*Z*Z*Z*",$propdata);
if(scalar(@properties) != 7)
{
carp($subname,"(): WARNING: expected 7 book properties, but found ",
scalar(@properties),"!\n");
}
$self->{identifier} = $properties[0];
$self->{category} = $properties[1];
$self->{subcategory} = $properties[2];
$self->{title} = $properties[3];
$self->{lastname} = $properties[4];
$self->{middlename} = $properties[5];
$self->{firstname} = $properties[6];
debug(2,"DEBUG: found ",scalar(@properties)," properties: ");
debug(2," Identifier: ",$self->{identifier});
debug(2," Category: ",$self->{category});
debug(2," Subcategory: ",$self->{subcategory});
debug(2," Title: ",$self->{title});
debug(2," Last Name: ",$self->{lastname});
debug(2," Middle Name: ",$self->{middlename});
debug(2," First Name: ",$self->{firstname});
# On encrypted files, there may be addtional ETI server data
# appended
my $proplength = $self->bookproplength;
if($proplength < length($propdata))
{
debug(1,"Book properties data has extra ETI server data appended");
$self->parse_eti_server_data(substr($propdata,$proplength));
}
return 1;
}
=head2 C
Parses the first 48 bytes of a .IMP file, setting object variables.
The method croaks if it receives any more or less than 48 bytes.
=head3 Header Format
=over
=item * Offset 0x00 [2 bytes, big-endian unsigned short int]
Version. Expected values are 1 or 2; the version affects the format
of the table of contents header. If this isn't 1 or 2, the method
carps a warning and returns undef.
=item * Offset 0x02 [8 bytes]
Identifier. This is always 'BOOKDOUG', and the method carps a warning
and returns undef if it isn't.
=item * Offset 0x0A [8 bytes]
Unknown data, stored in C<< $self->{unknown0x0a} >>. Use with caution
-- this value may be renamed if more information is obtained.
=item * Offset 0x12 [2 bytes, big-endian unsigned short int]
Number of included files, stored in C<< $self->{filecount} >>.
=item * Offset 0x14 [2 bytes, big-endian unsigned short int]
Length in bytes of the .RES directory name, stored in
C<< $self->{resdirlength} >>.
=item * Offset 0x16 [2 bytes, big-endian unsigned short int]
Offset from the point after this value to the .RES directory name,
which also marks the end of the book properties, stored in
C<< $self->{resdiroffset} >>. Note that this is NOT the length of the
book properties. To get the length of the book properties, subtract
24 from this value (the number of bytes remaining in the header after
this point). It is also NOT the offset from the beginning of the file
to the .RES directory name -- to find that, add 24 to this value (the
number of bytes already parsed).
=item * Offset 0x18 [4 bytes, big-endian unsigned long int?]
Unknown value, stored in C<< $self->{unknown0x18} >>. Use with
caution -- this value may be renamed if more information is obtained.
=item * Offset 0x1C [4 bytes, big-endian unsigned long int?]
Unknown value, stored in C<< $self->{unknown0x1c} >>. Use with
caution -- this value may be renamed if more information is obtained.
=item * Offset 0x20 [4 bytes, big-endian unsigned long int]
Compression type, stored in C<< $self->{compression} >>. Expected
values are 0 (no compression) and 1 (LZSS compression).
=item * Offset 0x24 [4 bytes, big-endian unsigned long int]
Encryption type, stored in C<< $self->{encryption} >>. Expected
values are 0 (no encryption) and 2 (DES encryption).
=item * Offset 0x28 [2 bytes, big-ending unsigned short int]
Unknown value, stored in C<< $self->{unknown0x28} >>. Use with
caution -- this value may be renamed if more information is obtained.
=item * Offset 0x2A [1 byte]
Unknown value, stored in C<< $self->{unknown0x2A} >>. Use with
caution -- this value may be renamed if more information is obtained.
=item * Offset 0x2B [2 nybbles (1 byte)]
The upper nybble at this position is the IMP reader device for which the
e-book was designed, stored in C<< $self->{device} >>. Expected values
are 0 (Softbook 200/250e), 1 (REB 1200/GEB 2150), and 2 (EBW
1150/GEB1150).
The lower nybble marks the possible zoom states, stored in
C<< $self->{zoomstates} >>. Expected values are 0 (both zooms), 1
(small zoom), and 2 (large zoom)
=item * Offset 0x2C [4 bytes, big-endian unsigned long int]
Unknown value, stored in C<< $self->{unknown0x2c} >>. Use with
caution -- this value may be renamed if more information is obtained.
=back
=cut
sub parse_imp_header :method
{
my $self = shift;
my ($headerdata) = @_;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
my $length = length($headerdata);
if($length != 48)
{
croak($subname,"(): expected 48 bytes, was passed ",$length,"!\n");
}
my $identstring = substr($headerdata,2,8);
if($identstring ne 'BOOKDOUG')
{
carp($subname,"(): invalid IMP header!\n");
return;
}
$self->{version} = unpack('n',$headerdata);
if($self->{version} < 1 or $self->{version} > 2)
{
carp($subname,"(): Version ",$self->{version}," is not supported!\n");
return;
}
$self->{unknown0x0a} = substr($headerdata,10,8);
# Unsigned short int values
my @list = unpack('nnn',substr($headerdata,0x12,6));
$self->{filecount} = $list[0];
$self->{resdirlength} = $list[1];
$self->{resdiroffset} = $list[2];
debug(2,"DEBUG: IMP file count = ",$self->{filecount});
debug(2,"DEBUG: IMP resdirlength = ",$self->{resdirlength});
debug(2,"DEBUG: IMP resdir offset = ",$self->{resdiroffset});
# Unknown long ints
@list = unpack('NN',substr($headerdata,0x18,8));
$self->{unknown0x18} = $list[0];
$self->{unknown0x1c} = $list[1];
debug(2,"DEBUG: Unknown long int at offset 0x18 = ",$self->{unknown0x18});
debug(2,"DEBUG: Unknown long int at offset 0x1c = ",$self->{unknown0x1c});
# Compression/Encryption/Unknown
@list = unpack('NNnC',substr($headerdata,0x20,11));
$self->{compression} = $list[0];
$self->{encryption} = $list[1];
$self->{unknown0x28} = $list[2];
$self->{unknown0x2a} = $list[3];
debug(2,"DEBUG: IMP compression = ",$self->{compression});
debug(2,"DEBUG: IMP encryption = ",$self->{encryption});
debug(2,"DEBUG: Unknown short int at offset 0x28 = ",$self->{unknown0x28});
debug(2,"DEBUG: Unknown byte at offset 0x2A = ",$self->{unknown0x2a});
# Zoom State, and Unknown
@list = unpack('CN',substr($headerdata,0x2B,5));
$self->{device} = $list[0] >> 4;
$self->{zoomstates} = $list[0] & 0x0f;
$self->{unknown0x2c} = $list[1];
debug(2,"DEBUG: IMP device = ",$self->{device});
debug(2,"DEBUG: IMP zoom state = ",$self->{zoomstates});
debug(2,"DEBUG: Unknown long int at offset 0x2c = ",$self->{unknown0x2c});
return 1;
}
=head2 C
Parses the C resource loaded into C<< $self->{resources} >>,
if present, extracting the LZSS uncompression parameters into
C<< $self->{lzssoffsetbits} >> and C<< $self->{lzsslengthbits} >>.
Returns 1 on success, or undef if no C resource has been loaded
yet or the resource data is invalid.
=cut
sub parse_resource_cm :method
{
my $self = shift;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
return unless($self->{resources}->{'!!cm'});
my @list;
my $version;
my $ident; # Must be constant string '!!cm'
my $unknown1;
my $indexoffset;
my $lzssdata;
@list = unpack('na[4]NN',$self->{resources}->{'!!cm'}->{data});
$version = $list[0];
$ident = $list[1];
$unknown1 = $list[2];
$indexoffset = $list[3];
if($ident ne '!!cm')
{
carp($subname,"():\n",
" Invalid '!!cm' record!\n");
return;
}
debug(2,"DEBUG: parsing !!cm v",$version,", index offset ",$indexoffset);
$lzssdata = substr($self->{resources}->{'!!cm'}->{data},$indexoffset-4,4);
@list = unpack('nn',$lzssdata);
if($list[0] + $list[1] > 32
or $list[0] < 2
or $list[1] < 1)
{
carp($subname,"():\n",
" invalid LZSS compression bit lengths!\n",
"[",$list[0]," offset bits, ",
$list[1]," length bits]\n");
return;
}
$self->{lzssoffsetbits} = $list[0];
$self->{lzsslengthbits} = $list[1];
debug(2,"DEBUG: !!cm specifies ",$list[0]," offset bits, ",
$list[1]," length bits");
return 1;
}
=head2 C
Parses the image data resources loaded into C<< $self->{resources} >>,
if present, placing the image data and metadata of each image found
into C<< $self->{jpg} >> and C<< $self->{png} >>, keyed by 16-bit
image resource ID.
Returns the total number of images found and parsed.
This method is called automatically by L and L.
See also accessor methods L and L.
=cut
sub parse_resource_images :method
{
my $self = shift;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
my $headersize;
my $imgdata;
my $imgcount;
my $total = 0;
my @list;
if($self->{device} == DEVICE_EBW1150) { $headersize = 14; }
else { $headersize = 12; }
foreach my $resource (keys %IMAGE_RESOURCE_MAP)
{
next unless($self->{resources}->{$resource});
my $rsize = $self->{resources}->{$resource}->{size};
my $itype = $IMAGE_RESOURCE_MAP{$resource};
next if ($rsize <= 32);
@list = unpack('na[4]NNnNNNN',$self->{resources}->{$resource}->{data});
my $version = $list[0];
my $ident = $list[1];
my $unknown1 = $list[2];
my $tocoffset = $list[3];
my $unknown2 = $list[4];
my $unknown3 = $list[5];
my $unknown4 = $list[6];
my $unknown5 = $list[7];
my $unknown6 = $list[8];
if($ident ne $resource)
{
carp($subname,"():\n",
" Invalid '",$resource,"' record!\n");
next;
}
debug(2,"DEBUG: parsing ",$resource," resource v",$version,
", index offset ",$tocoffset);
$imgcount = ($rsize - $tocoffset) / $headersize;
debug(2,"DEBUG: ",$imgcount," ",$itype," images listed in header");
$self->{$itype} = {};
foreach my $pos (0 .. ($imgcount - 1))
{
my $id; # Image ID -- this is only unique for each imagetype
my $hexid; # 4-digit hexadecimal string version of $id
$imgdata = substr($self->{resources}->{$resource}->{data},
$tocoffset + ($headersize * $pos),$headersize);
if($self->{device} == DEVICE_EBW1150)
{
#Standard 1150 Header (14 bytes)
@list = unpack("vvVVv",$imgdata);
$id = $list[0];
$self->{$itype}->{$id}->{unknown} = $list[1];
$self->{$itype}->{$id}->{length} = $list[2];
$self->{$itype}->{$id}->{offset} = $list[3];
$self->{$itype}->{$id}->{const0} = $list[4];
}
else
{
#Standard 1200 Header (12 bytes)
@list = unpack("nNNn",$imgdata);
$id = $list[0];
$self->{$itype}->{$id}->{length} = $list[1];
$self->{$itype}->{$id}->{offset} = $list[2];
$self->{$itype}->{$id}->{const0} = $list[3];
}
if($EBook::Tools::debug > 2)
{
printf(" id=%04X unk1=0x%04X length=%d offset=%d, const0=0x%04X\n",
$id, $self->{$itype}->{$id}->{unknown},
$self->{$itype}->{$id}->{length},
$self->{$itype}->{$id}->{offset},
$self->{$itype}->{$id}->{const0});
}
$hexid = sprintf("%04X",$id);
$self->{$itype}->{$id}->{data} =
substr($self->{resources}->{$resource}->{data},
$self->{$itype}->{$id}->{offset},
$self->{$itype}->{$id}->{length});
my ($imagex,$imagey,$imagetype) =
imgsize(\$self->{$itype}->{$id}->{data});
if(defined($imagex) && $imagetype)
{
debug(2," ",$itype," image ",$pos," (ID '",$hexid,"') is valid ",
$imagetype," image data (",$imagex," x ",$imagey,")");
}
else
{
carp($subname,"():\n",
" ",$itype," image ",$pos," (ID '",$id,
"') is not valid image data!\n");
next;
}
} # foreach my $pos (0 .. ($imgcount - 1))
my $found = scalar keys %{$self->{$itype}};
if($found != $imgcount)
{
carp($subname,"()\n",
" resource specified ",$imgcount," images, but found ",
$found,"!\n");
}
$total += $found;
} # foreach my $resource (keys %IMAGE_RESOURCE_MAP)
return $total;
}
=head2 C
Parses the index of text offsets to all images as stored in
C<< $self->{resources}->{'ImRn'} >>, if present, storing them in
C<< $self->{imrn} >> as a hash of hashrefs indexed by its
32-bit integer offset to the 0x0F control code in the uncompressed
text stored in the DATA.FRK resource.
Returns the total number of offsets found and parsed.
The hash keys of each offset hash are:
=over
=item * C
Image display width in pixels.
=item * C
Image display height in pixels.
=item * C
A 16-bit integer value used to uniquely identify the image inside a
particular resource type.
=item * C
The four-letter resource type string.
=item * C
A 32-bit value of unknown purpose which should always be 0xFFFFFFFF.
=item * C
A second 32-bit value of unknown purpose which should always be 0xFFFFFFFF.
=item * C
A 32-bit integer value of unknown purpose which should always be 0x00000000.
=item * C
A 16-bit integer value of unknown purpose which could be 0xFFFA, 0xFFFB,
0xFFFC, or 0xFFFE.
=item * C
A 16-bit integer value of unknown purpose found only in 1150 resources.
=item * C
A 32-bit integer value of unknown purpose.
=back
This method is called automatically by L and L.
=cut
sub parse_resource_imrn :method
{
my $self = shift;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
return unless($self->{resources}->{'ImRn'});
my $headersize;
my $imrndata;
my $imrncount;
my $total = 0;
my @list;
my $idxdata;
my $idxsize;
my $idx1id;
my $idx1size;
my $idx1offset;
my $idx1const0;
if($self->{device} == DEVICE_EBW1150)
{
$headersize = 36;
$idxsize = 14;
}
else
{
$headersize = 32;
$idxsize = 12;
}
my $rsize = $self->{resources}->{'ImRn'}->{size};
next if ($rsize <= 32);
@list = unpack('na[4]NNnNNNN',$self->{resources}->{'ImRn'}->{data});
my $version = $list[0];
my $ident = $list[1];
my $unknown1 = $list[2];
my $tocoffset = $list[3];
my $unknown2 = $list[4];
my $unknown3 = $list[5];
my $unknown4 = $list[6];
my $unknown5 = $list[7];
my $unknown6 = $list[8];
if($ident ne 'ImRn')
{
carp($subname,"():\n",
" Invalid 'ImRn' record!\n");
next;
}
debug(2,"DEBUG: parsing 'ImRn' resource v",$version,
", index offset ",$tocoffset);
$imrncount = ($rsize - 32 - 12) / $headersize;
debug(2,"DEBUG: ",$imrncount," images listed in header");
$self->{imrn} = {};
foreach my $pos (0 .. ($imrncount - 1))
{
my $offset; # offset within DATA.FRK text (0x0F) of image insertion
# FIX: The last (number $imrncount) image record is 2 bytes shorter and
# will not be of size 36, but 34. Currently this code will read into
# the index record!
$imrndata = substr($self->{resources}->{'ImRn'}->{data},
32 + ($headersize * $pos),$headersize);
if($self->{device} == DEVICE_EBW1150)
{
#imrn 1150 record
@list = unpack("VVvvVvvVVa[4]v",$imrndata);
$offset = $list[7];
$self->{imrn}->{$offset}->{constF1} = $list[0];
$self->{imrn}->{$offset}->{constF2} = $list[1];
$self->{imrn}->{$offset}->{width} = $list[2];
$self->{imrn}->{$offset}->{height} = $list[3];
$self->{imrn}->{$offset}->{const0} = $list[4];
$self->{imrn}->{$offset}->{unknown16} = $list[5];
$self->{imrn}->{$offset}->{constB} = $list[6];
$self->{imrn}->{$offset}->{unknown32} = $list[8];
$self->{imrn}->{$offset}->{restype} = $list[9];
$self->{imrn}->{$offset}->{id} = $list[10];
# restypes only reversed in 1150 ebooks
# (restypes in 1200 ebooks are not reversed)
my %restypefix = (
' FIG' => 'GIF ',
'GEPJ' => 'JPEG',
' GNP' => 'PNG ',
'2CIP' => 'PIC2',
'TCIP' => 'PICT',
);
my $type = $self->{imrn}->{$offset}->{restype};
$self->{imrn}->{$offset}->{restype} = $restypefix{$type}
if($restypefix{$type});
}
else
{
#imrn 1200 record
@list = unpack("NNnnNnNNa[4]n",$imrndata);
$offset = $list[6];
$self->{imrn}->{$offset}->{constF1} = $list[0];
$self->{imrn}->{$offset}->{constF2} = $list[1];
$self->{imrn}->{$offset}->{width} = $list[2];
$self->{imrn}->{$offset}->{height} = $list[3];
$self->{imrn}->{$offset}->{const0} = $list[4];
$self->{imrn}->{$offset}->{constB} = $list[5];
$self->{imrn}->{$offset}->{unknown32} = $list[7];
$self->{imrn}->{$offset}->{restype} = $list[8];
$self->{imrn}->{$offset}->{id} = $list[9];
}
my $restype = $self->{imrn}->{$offset}->{restype};
my $imgtype = $IMAGE_RESOURCE_MAP{$restype};
my $hexid = sprintf('%04X',$self->{imrn}->{$offset}->{id});
my $width = $self->{imrn}->{$offset}->{width};
my $height = $self->{imrn}->{$offset}->{height};
if(none { $restype eq $_ } (IMAGERESOURCES) )
{
carp($subname,"():\n",
" invalid image type '",$restype,"' at offset ",$offset,"!\n");
next;
}
debug(2,"DEBUG: ImRn offset ",$offset,": '",$restype,"' 0x",$hexid,
" (",$width," x ",$height,")");
# PICT images are unviewable, so see if there is an alternate to use instead
if($imgtype and $imgtype eq 'pic')
{
my $id = $self->{imrn}->{$offset}->{id};
my $alttype = $self->find_image_type($id,'pic');
$imgtype = $alttype if($alttype);
}
#TODO: use height/width from Pcz0/PcZ0 records
my $filename = uc($imgtype) . "_${hexid}.${imgtype}";
$self->{offsetelements}->{$offset} =
'';
debug(2,"DEBUG: tag = '",$self->{offsetelements}->{$offset},"'");
if($EBook::Tools::debug > 2)
{
printf(" offset=%d restype=%s imgid=%04X constF1=0x%04X constF2=0x%04X width=%d height=%d const0=0x%04X, constB=0x%04X",
$offset, $self->{imrn}->{$offset}->{restype},
$self->{imrn}->{$offset}->{id},
$self->{imrn}->{$offset}->{constF1},
$self->{imrn}->{$offset}->{constF2},
$self->{imrn}->{$offset}->{width},
$self->{imrn}->{$offset}->{height},
$self->{imrn}->{$offset}->{const0},
$self->{imrn}->{$offset}->{constB},
$self->{imrn}->{$offset}->{unknown16},
$self->{imrn}->{$offset}->{unknown32});
if($self->{imrn}->{$offset}->{const2})
{
printf(" const2=0x%04X",
$self->{imrn}->{$offset}->{const2});
}
printf("\n");
}
}
$idxdata = substr($self->{resources}->{'ImRn'}->{data},$tocoffset,$idxsize);
if($self->{device} == DEVICE_EBW1150)
{
#Standard 1150 (14-byte) Index Header
@list = unpack("vVVV",$idxdata);
}
else
{
#Standard 1200 (12-byte) Index Header
@list = unpack("nNNn",$idxdata);
}
$idx1id = $list[0];
$idx1size = $list[1];
$idx1offset = $list[2];
$idx1const0 = $list[3];
$total = scalar keys %{$self->{imrn}};
if($total != $imrncount)
{
carp($subname,"()\n",
" resource specified ",$imrncount," ImRn entries, but found ",
$total,"!\n");
}
return $total;
}
=head2 C
Parses the C<' '> (DATA.FRK) resource loaded into
C<< $self->{resources} >>, if present, extracting the text into
C<< $self->{text} >>, uncompressing it if necessary. LZSS uncompression
will use the C<< $self->{lzsslengthbits} >> and
C<< $self->{lzssoffsetbits} >> attributes if present, and default to 3
length bits and 14 offset bits otherwise.
HTML headers and footers are then applied, and control codes replaced
with appropriate tags.
Returns the length of the raw uncompressed text before any HTML
modification was done, or undef if no text resource was found or the
text was encrypted.
=cut
sub parse_text :method
{
my $self = shift;
my $subname = (caller(0))[3];
croak($subname . "() called as a procedure!\n") unless(ref $self);
debug(2,"DEBUG[",$subname,"]");
return unless($self->{resources}->{' '});
$self->parse_resource_cm();
my $lengthbits = $self->{lzsslengthbits} || 3;
my $offsetbits = $self->{lzssoffsetbits} || 14;
my $lzss = EBook::Tools::LZSS->new(lengthbits => $lengthbits,
offsetbits => $offsetbits,
windowstart => 1);
my $textref;
my $textlength;
if($self->{encryption})
{
warn($subname,"(): encrypted text not supported!\n");
return;
}
if($self->{compression})
{
$textref = $lzss->uncompress(\$self->{resources}->{' '}->{data});
}
else
{
$textref = \$self->{resources}->{' '}->{data};
}
$textlength = length($$textref);
if(!$textlength)
{
carp($subname,"(): no text extracted from DATA.FRK resource!\n");
return;
}
$self->{text} = <<'END';
END
$self->{text} .= " $self->{title}\n";
$self->{text} .= "\n\n";
my $pos = 0;
my %ccharmap = (
0x0A => "\n" . ' ', # supported!
0x0B => "\n