SGML-Parser-OpenSP-0.994/0000700000175300017630000000000011031762311013647 5ustar bjoerncpanSGML-Parser-OpenSP-0.994/Changes0000700000175300017630000000347311031755612015162 0ustar bjoerncpanRevision history for Perl extension SGML::Parser::OpenSP. 0.992 Sun Jun 29 20:00:00 2008 - added simple example script eg/xhtml1val.pl - added LICENSE => 'perl' to Makefile.PL - hopefully cleared executable bit on Makefile.PL - Updated copyright/license documentation - removed version number from README 0.991 Thu Dec 6 07:25:00 2007 - added 20passfd.t, 21parsestring.t, 22mwarnings.t - multi-arg options no longer have the last arg ignored - http://rt.cpan.org/Ticket/Display.html?id=26791 Added note to the README file about -dev packages. 0.99 Wed Aug 30 17:00:00 2006 - fixed a bug in how parse_string handles encodings - added parse_string tests - added parse_string documentation - added parse_string and pass_file_descriptor option - fixed a bug in handling warnings(qw/multiple args/) - added more sanity checks to typemap - added tests using Pod::Test, fixed found issues. - added tests using Pod::Test::Coverage, fixed found issues. 0.98 Sat Oct 02 01:19:04 2004 - added more split_message tests - fixed remaining typemap flaws - added description for ::Tools POD - better headings in OpenSP POD - added documentation for p->split_message - updated README to mention PPMs - optimized UTF-32 transcoder - improved performance through PERL_NO_GET_CONTEXT - major memory management improvements - fixed 0.02 release date in CHANGES - updated map_catalog_document documentation - updated restrict_file_reading documentation - included -wxml warnings in the OpenSP.pm POD 0.02 Wed Sep 22 22:22:22 2004 - complete rewrite; created by h2xs 1.23 with options -A -O -c -n SGML::Parser::OpenSP --skip-ppport --skip-exporter -v 0.02 -b 5.8.0 0.01 Mon Mar 17 00:57:04 2003 - original version; created by h2xs 1.22 with options -A -n SGML::Parser::OpenSP SGML-Parser-OpenSP-0.994/eg/0000700000175300017630000000000011031762310014241 5ustar bjoerncpanSGML-Parser-OpenSP-0.994/eg/xhtml1val.pl0000700000175300017630000000316611031747772016546 0ustar bjoerncpan#!/usr/bin/perl -w ####################################################################### # A simple XHTML 1.0 Strict Validation script with encoding detection # # By Bjoern Hoehrmann bjoern@hoehrmann.de http://bjoern.hoehrmann.de ####################################################################### BEGIN { $ENV{SP_CHARSET_FIXED} = 1; $ENV{SP_ENCODING} = "UTF-8"; $ENV{SP_BCTF} = "UTF-8"; } sub ErrorHandler::new {bless {p=>$_[1]}, shift} sub ErrorHandler::error { push @{$_[0]->{errors}}, $_[0]->{p}->split_message($_[1]) } use strict; use warnings; use SGML::Parser::OpenSP qw(); use HTML::Encoding qw(); use HTML::Doctype qw(); use LWP::UserAgent qw(); use I18N::Charset qw(); use Encode qw(); use constant TEST_CATALOG => File::Spec->catfile(File::Spec->updir, 'samples', 'test.soc'); our @SP_OPTS = qw/ non-sgml-char-ref valid no-duplicate xml /; my $u = LWP::UserAgent->new; my $p = SGML::Parser::OpenSP->new; my $e = ErrorHandler->new($p); my $r = $u->get("http://www.w3.org/"); my $name1 = HTML::Encoding::encoding_from_http_message($r); my $name2 = I18N::Charset::enco_charset_name($name1); my $text = Encode::decode($name2 => $r->content); # Validation $p->handler($e); $p->catalogs(TEST_CATALOG); $p->warnings(@SP_OPTS); $p->parse_string($text); foreach my $error (@{$e->{errors}}) { my $prim = $error->{primary_message}; printf "[%4d %4d %s]: %s\n", $prim->{LineNumber}, $prim->{ColumnNumber}, $prim->{Severity}, $prim->{Text} } if (not @{$e->{errors}}) { printf "No errors found!\n"; } SGML-Parser-OpenSP-0.994/lib/0000700000175300017630000000000011031762310014414 5ustar bjoerncpanSGML-Parser-OpenSP-0.994/lib/SGML/0000700000175300017630000000000011031762310015156 5ustar bjoerncpanSGML-Parser-OpenSP-0.994/lib/SGML/Parser/0000700000175300017630000000000011031762310016412 5ustar bjoerncpanSGML-Parser-OpenSP-0.994/lib/SGML/Parser/OpenSP/0000700000175300017630000000000011031762310017556 5ustar bjoerncpanSGML-Parser-OpenSP-0.994/lib/SGML/Parser/OpenSP/Tools.pm0000700000175300017630000002367111031746502021235 0ustar bjoerncpan# Tools.pm -- SGML::Parser::OpenSP::Tools module # # $Id: Tools.pm,v 1.9 2006/08/30 14:50:35 hoehrmann Exp $ package SGML::Parser::OpenSP::Tools; use 5.008; use strict; use warnings; use Carp; # white-space as defined in XML 1.0 our $WHITESPACE = qr/[\x20\x09\x0d\x0a]/; sub value_attribute { my $attr = shift; # illegal input return 0 unless defined $attr; return 0 unless ref $attr eq "HASH"; return 0 unless exists $attr->{Type}; # these cannot have values return 0 if $attr->{Type} eq "implied"; return 0 if $attr->{Type} eq "invalid"; return 1; } sub specified_attribute { my $attr = shift; return 0 unless value_attribute($attr); return 0 if $attr->{Defaulted} ne "specified"; return 1; } sub defaulted_attribute { my $attr = shift; return 0 unless value_attribute($attr); return 0 if $attr->{Defaulted} eq "specified"; return 1; } sub attribute_value { my $attr = shift; # ... return unless value_attribute($attr); # tokenized attributes return $attr->{Tokens} if $attr->{Type} eq "tokenized"; my $value = ""; # type is cdata foreach my $chunk (@{$attr->{CdataChunks}}) { # todo: fix this for SDATA # todo: fix this for non-sgml chars $value .= $chunk->{Data}; } return $value; } sub split_pi { my $orig = shift; return unless defined $orig; return unless length $orig; my ($targ, $data) = split /$WHITESPACE/, $orig, 1; return $targ, $data; } sub split_message { my $mess = shift; # message text my $name = shift; # file name my $oent = shift; # show_open_entities my $errn = shift; # show_error_numbers my $oelm = shift; # show_open_elements my $mess_debug = $mess; my %resu; if ($oent) { while($mess =~ s/^In entity (\S+) included from (.*?):(\d+):(\d+)\s+//) { push @{$resu{open_entities}}, { EntityName => $1, FileName => $2, LineNumber => $3, ColumnNumber => $4 } } } # this splits the error message into its components. this is designed # to cope with most if not all delimiter problems inherent to the # message format which does not escape delimiters which can result in # ambiguous data. The following format is expected by this code # each error message component starts on a new line which is either # the first line or something that follows \n, then an optional formal # system identifier such as or , then the file name # as reported by $p->get_location->{FileName} then the line and finally # the column number -- for each message there should thus be three # individual components, line, column, "text" -- which can contain # additional components depending on the message, see below. my @comp = split(/(?:^|\n)(?:<[^>]+>)?\Q$name\E:(\d+):(\d+):\s*/, $mess); # check for proper format, the first component must be # empty and each entry must have line, column and text croak "Unexpected error message format ($mess_debug)" if length $comp[0] or (@comp - 1) % 3; # remove empty component shift @comp; # the first component is the primary message $resu{primary_message}->{LineNumber} = shift @comp; $resu{primary_message}->{ColumnNumber} = shift @comp; if ($errn) { # with show_error_numbers the first component is # ".", remove and store it $comp[0] =~ s/^(\d+)\.(\d+)://; # this can happen if it was incorrectly specified # that show_error_numbers was enabled or if OpenSP # has a bug that causes the number to be missing croak "message lacks error number information" unless defined $1 and defined $2; $resu{primary_message}->{Module} = $1; $resu{primary_message}->{Number} = $2; } # next component is a character indicating the severity $comp[0] =~ s/^(\w):\s+//; # this can happen if OpenSP has a bug in this regard croak "severity character missing from error message" unless defined $1; $resu{primary_message}->{Severity} = $1; # trim trailing white-space from the text $comp[0] =~ s/\s+$//; # the remainder of the message is the error message text $resu{primary_message}->{Text} = shift @comp; # optional auxiliary message if (@comp > 3 or (@comp == 3 and !$oelm)) { # trim trailing white-space from the text $comp[2] =~ s/\s+$//; $resu{aux_message}->{LineNumber} = shift @comp; $resu{aux_message}->{ColumnNumber} = shift @comp; $resu{aux_message}->{Text} = shift @comp; } # open elements are optional in SGML declarations, etc. if ($oelm and @comp) { # this should only happen in case of OpenSP bugs croak "unexpected number of components in message" unless @comp == 3; croak "expected listing of open elements" unless pop(@comp) =~ /^open elements: (.*)/s; $resu{open_elements} = $1; } \%resu } 1; __END__ =pod =head1 NAME SGML::Parser::OpenSP::Tools - Tools to process OpenSP output =head1 DESCRIPTION Routines to post-process OpenSP event data. =head1 UTILITY FUNCTIONS =over 4 =item specified_attribute($attribute) specified_attribute returns a true value if the attribute is of type C or C and has its C property set to C. For example sub start_element { my $self = shift; my $elem = shift; my @spec = grep specified_attribute($_), values %{$elem->{Attributes}}; # @spec contains all explicitly specified attributes } =item defaulted_attribute($attribute) defaulted_attribute returns a true value if the attribute is of type C or C and has its C property set to something but C. For all attributes, the following always holds true, !defined(attribute_value($_)) or defaulted_attribute($_) or specified_attribute($_) since only defaulted and specified attributes can have a value. =item value_attribute($attribute) Returns true if the value can have a value, i.e., it is either specified or defaulted. =item attribute_value($attribute) attribute_value returns a textual representation of the value of an attribute as reported to a C handler or C if no value is available. =item split_message($message, $filename, $open_ent, $error_num, $open_elem) split_message splits an OpenSP error message into its components, the error or warning message, an optional auxiliary message that provides additional information about the error, like the first occurence of an ID in case of duplicate IDs in a document, each accompanied by line and column numbers relevant to the message, and depending on the parser configuration the open entities for the message, the error number of the message and a list of the current open elements. It returns a hash reference like # this is always present primary_message => { Number => 141, # only if $p->show_error_numbers(1) Module => 554521624, # only if $p->show_error_numbers(1) ColumnNumber => 9, LineNumber => 12, Severity => 'E', Text => 'ID "a" already defined' }, # only some messages have an aux_message aux_message => { ColumnNumber => 9, LineNumber => 11, Text => 'ID "a" first defined here' }, # iff $p->show_open_elements(1) and there are open elements open_elements => 'html body[1] (p[1])', # iff $p->show_open_entities(1) and there are open entities # other than the document, but the document will be reported # if the error is in some other entity open_entities => [ { ColumnNumber => 55, FileName => 'example.xhtml', EntityName => 'html', LineNumber => 2 }, ... ], This would typically be used like sub error { my $self = shift; my $erro = shift; my $mess = $erro->{Message}; # parser is the SGML::Parser::OpenSP # object stored in the handler object my $loca = $self->{parser}->get_location; my $name = $loca->{FileName}; my $splt = split_message($mess, $name, $self->{parser}->show_open_entities, $self->{parser}->show_error_numbers, $self->{parser}->show_open_elements); # ... } A more convenient way to access this function is provided by the C module which you can use like sub error { my $self = shift; my $erro = shift; my $mess = $self->{parser}->split_message($erro); # relevant data is now $mess and $erro->{Severity} # of which the latter provides more detailed information # than $mess->{primary_message}->{Severity}, see the # SGML::Parser::OpenSP documentation for details } =item split_pi($data) split_pi splits the data of a processing instructions at the first white space character into two components where white space character is defined in the $WHITESPACE package variable, qr/[\x20\x09\x0d\x0a]/ by default. It returns C if there is no data to split. sub pi { my $self = shift; my $proc = shift; my ($target, $data) = split_pi($proc->{Data}); # ... } =back =head1 AUTHOR / COPYRIGHT / LICENSE Copyright (c) 2006-2008 Bjoern Hoehrmann . This module is licensed under the same terms as Perl itself. =cut SGML-Parser-OpenSP-0.994/lib/SGML/Parser/OpenSP.pm0000700000175300017630000006502211031761464020135 0ustar bjoerncpan# OpenSP.pm -- SGML::Parser::OpenSP module # # $Id: OpenSP.pm,v 1.35 2007/12/06 06:00:02 hoehrmann Exp $ package SGML::Parser::OpenSP; use 5.008; use strict; use warnings; use Carp; use SGML::Parser::OpenSP::Tools qw(); use File::Temp qw(); use base qw(Class::Accessor); our $VERSION = '0.994'; require XSLoader; XSLoader::load('SGML::Parser::OpenSP', $VERSION); __PACKAGE__->mk_accessors(qw/ handler show_open_entities show_open_elements show_error_numbers output_comment_decls output_marked_sections output_general_entities map_catalog_document restrict_file_reading warnings catalogs search_dirs include_params active_links pass_file_descriptor /); sub split_message { my $self = shift; my $mess = shift; my $loca = $self->get_location; my $name = $loca->{FileName}; return SGML::Parser::OpenSP::Tools::split_message ( $mess->{Message}, $loca->{FileName}, $self->show_open_entities, $self->show_error_numbers, $self->show_open_elements ); } sub parse_string { my $self = shift; my $text = shift; # high security on systems that support it File::Temp->safe_level(File::Temp::HIGH); # create temp file, this would croak if it fails, so # there is no need for us to check the return value my $fh = File::Temp->new(); # set proper mode binmode $fh, ':utf8'; # store content print $fh $text; # seek to start seek $fh, 0, 0; if (not $self->pass_file_descriptor) { $self->parse('' . $fh->filename); } else { my $no = fileno $fh; unless (defined $no) { carp "fileno() on temporary file handle failed.\n"; return; } $self->parse('' . $no); } } 1; __END__ =pod =head1 NAME SGML::Parser::OpenSP - Parse SGML documents using OpenSP =head1 SYNOPSIS use SGML::Parser::OpenSP; my $p = SGML::Parser::OpenSP->new; my $h = ExampleHandler->new; $p->catalogs(qw(xhtml.soc)); $p->warnings(qw(xml valid)); $p->handler($h); $p->parse("example.xhtml"); =head1 DESCRIPTION This module provides an interface to the OpenSP SGML parser. OpenSP and this module are event based. As the parser recognizes parts of the document (say the start or end of an element), then any handlers registered for that type of an event are called with suitable parameters. =head1 COMMON METHODS =over 4 =item new() Returns a new SGML::Parser::OpenSP object. Takes no arguments. =item parse($file) Parses the file passed as an argument. Note that this must be a filename and not a filehandle. See L below for details. =item parse_string($data) Parses the data passed as an argument. See L below for details. =item halt() Halts processing before parsing the entire document. Takes no arguments. =item split_message() Splits OpenSP's error messages into their component parts. See L below for details. =item get_location() See L below for details. =back =head1 CONFIGURATION =head2 BOOLEAN OPTIONS =over 4 =item $p->handler([$handler]) Report events to the blessed reference $handler. =back =head2 ERROR MESSAGE FORMAT =over 4 =item $p->show_open_entities([$bool]) Describe open entities in error messages. Error messages always include the position of the most recently opened external entity. The default is false. =item $p->show_open_elements([$bool]) Show the generic identifiers of open elements in error messages. The default is false. =item $p->show_error_numbers([$bool]) Show message numbers in error messages. =back =head2 GENERATED EVENTS =over 4 =item $p->output_comment_decls([$bool]) Generate C events. The default is false. =item $p->output_marked_sections([$bool]) Generate marked section events (C, C, C). The default is false. =item $p->output_general_entities([$bool]) Generate C events. The default is false. =back =head2 IO SETTINGS =over 4 =item $p->map_catalog_document([$bool]) C arguments specify catalog files rather than the document entity. The document entity is specified by the first DOCUMENT entry in the catalog files. The default is false. =item $p->restrict_file_reading([$bool]) Restrict file reading to the specified directories (see the C method and the C environment variable). You should turn this option on and configure the search paths accordingly if you intend to process untrusted resources. The default is false. =item $p->catalogs([@catalogs]) Map public identifiers and entity names to system identifiers using the specified catalog entry files. Multiple catalogs are allowed. If there is a catalog entry file called C in the same place as the document entity, it will be searched for immediately after those specified. =item $p->search_dirs([@search_dirs]) Search the specified directories for files specified in system identifiers. Multiple values options are allowed. See the description of the osfile storage manager in the OpenSP documentation for more information about file searching. =item $p->pass_file_descriptor([$bool]) Instruct C to pass the input data down to the guts of OpenSP using the C storage manager (if true) or the C storage manager (if false). This amounts to the difference between passing a file descriptor and a (temporary) file name. The default is true except on platforms, such as Win32, which are known to not support passing file descriptors around in this manner. On platforms which support it you can call this method with a false parameter to force use of temporary file names instead. In general, this will do the right thing on its own so it's best to consider this an internal method. If your platform is such that you have to force use of the OSFILE storage manager, please report it as a bug and include the values of C<$^O>, C<$Config{archname}>, and a description of the platform (e.g. "Windows Vista Service Pack 42"). =back =head2 PROCESSING OPTIONS =over 4 =item $p->include_params([@include_params]) For each name in @include_params pretend that occurs at the start of the document type declaration subset in the SGML document entity. Since repeated definitions of an entity are ignored, this definition will take precedence over any other definitions of this entity in the document type declaration. Multiple names are allowed. If the SGML declaration replaces the reserved name INCLUDE then the new reserved name will be the replacement text of the entity. Typically the document type declaration will contain and will use %name; in the status keyword specification of a marked section declaration. In this case the effect of the option will be to cause the marked section not to be ignored. =item $p->active_links([@active_links]) ??? =back =head2 ENABLING WARNINGS Additional warnings can be enabled using $p->warnings([@warnings]) The following values can be used to enable warnings: =over 4 =item xml Warn about constructs that are not allowed by XML. =item mixed Warn about mixed content models that do not allow #pcdata anywhere. =item sgmldecl Warn about various dubious constructions in the SGML declaration. =item should Warn about various recommendations made in ISO 8879 that the document does not comply with. (Recommendations are expressed with ``should'', as distinct from requirements which are usually expressed with ``shall''.) =item default Warn about defaulted references. =item duplicate Warn about duplicate entity declarations. =item undefined Warn about undefined elements: elements used in the DTD but not defined. =item unclosed Warn about unclosed start and end-tags. =item empty Warn about empty start and end-tags. =item net Warn about net-enabling start-tags and null end-tags. =item min-tag Warn about minimized start and end-tags. Equivalent to combination of unclosed, empty and net warnings. =item unused-map Warn about unused short reference maps: maps that are declared with a short reference mapping declaration but never used in a short reference use declaration in the DTD. =item unused-param Warn about parameter entities that are defined but not used in a DTD. Unused internal parameter entities whose text is C or C won't get the warning. =item notation-sysid Warn about notations for which no system identifier could be generated. =item all Warn about conditions that should usually be avoided (in the opinion of the author). Equivalent to: C, C, C, C, C, C, C, C and C. =back =head2 DISABLING WARNINGS A warning can be disabled by using its name prefixed with C. Thus calling warnings(qw(all no-duplicate)) will enable all warnings except those about duplicate entity declarations. The following values for C disable errors: =over 4 =item no-idref Do not give an error for an ID reference value which no element has as its ID. The effect will be as if each attribute declared as an ID reference value had been declared as a name. =item no-significant Do not give an error when a character that is not a significant character in the reference concrete syntax occurs in a literal in the SGML declaration. This may be useful in conjunction with certain buggy test suites. =item no-valid Do not require the document to be type-valid. This has the effect of changing the SGML declaration to specify C and C. An option of C has the effect of changing the SGML declaration to specify C and C. If neither C nor C are specified, then the C and C specified in the SGML declaration will be used. =back =head2 XML WARNINGS The following warnings are turned on for the C warning described above: =over 4 =item inclusion Warn about inclusions in element type declarations. =item exclusion Warn about exclusions in element type declarations. =item rcdata-content Warn about RCDATA declared content in element type declarations. =item cdata-content Warn about CDATA declared content in element type declarations. =item ps-comment Warn about comments in parameter separators. =item attlist-group-decl Warn about name groups in attribute declarations. =item element-group-decl Warn about name groups in element type declarations. =item pi-entity Warn about PI entities. =item internal-sdata-entity Warn about internal SDATA entities. =item internal-cdata-entity Warn about internal CDATA entities. =item external-sdata-entity Warn about external SDATA entities. =item external-cdata-entity Warn about external CDATA entities. =item bracket-entity Warn about bracketed text entities. =item data-atts Warn about attribute definition list declarations for notations. =item missing-system-id Warn about external identifiers without system identifiers. =item conref Warn about content reference attributes. =item current Warn about current attributes. =item nutoken-decl-value Warn about attributes with a declared value of NUTOKEN or NUTOKENS. =item number-decl-value Warn about attributes with a declared value of NUMBER or NUMBERS. =item name-decl-value Warn about attributes with a declared value of NAME or NAMES. =item named-char-ref Warn about named character references. =item refc Warn about ommitted refc delimiters. =item temp-ms Warn about TEMP marked sections. =item rcdata-ms Warn about RCDATA marked sections. =item instance-include-ms Warn about INCLUDE marked sections in the document instance. =item instance-ignore-ms Warn about IGNORE marked sections in the document instance. =item and-group Warn about AND connectors in model groups. =item rank Warn about ranked elements. =item empty-comment-decl Warn about empty comment declarations. =item att-value-not-literal Warn about attribute values which are not literals. =item missing-att-name Warn about ommitted attribute names in start tags. =item comment-decl-s Warn about spaces before the MDC in comment declarations. =item comment-decl-multiple Warn about comment declarations containing multiple comments. =item missing-status-keyword Warn about marked sections without a status keyword. =item multiple-status-keyword Warn about marked sections with multiple status keywords. =item instance-param-entity Warn about parameter entities in the document instance. =item min-param Warn about minimization parameters in element type declarations. =item mixed-content-xml Warn about cases of mixed content which are not allowed in XML. =item name-group-not-or Warn about name groups with a connector different from OR. =item pi-missing-name Warn about processing instructions which don't start with a name. =item instance-status-keyword-s Warn about spaces between DSO and status keyword in marked sections. =item external-data-entity-ref Warn about references to external data entities in the content. =item att-value-external-entity-ref Warn about references to external data entities in attribute values. =item data-delim Warn about occurances of `<' and `&' as data. =item explicit-sgml-decl Warn about an explicit SGML declaration. =item internal-subset-ms Warn about marked sections in the internal subset. =item default-entity Warn about a default entity declaration. =item non-sgml-char-ref Warn about numeric character references to non-SGML characters. =item internal-subset-ps-param-entity Warn about parameter entity references in parameter separators in the internal subset. =item internal-subset-ts-param-entity Warn about parameter entity references in token separators in the internal subset. =item internal-subset-literal-param-entity Warn about parameter entity references in parameter literals in the internal subset. =back =head1 PROCESSING FILES In order to start processing of a document and recieve events, the C method must be called. It takes one argument specifying the path to a file (not a file handle). You must set an event handler using the C method prior to using this method. The return value of C is currently undefined. =head1 EVENT HANDLERS In order to receive data from the parser you need to write an event handler. For example, package ExampleHandler; sub new { bless {}, shift } sub start_element { my ($self, $elem) = @_; printf " * %s\n", $elem->{Name}; } This handler would print all the element names as they are found in the document, for a typical XHTML document this might result in something like * html * head * title * body * p * ... The events closely match those in the generic interface to OpenSP, see L for more information. The event names have been changed to lowercase and underscores to separate words and properties are capitalized. Arrays are represented as Perl array references. C information is not passed to the handler but made available through the C method which can be called from event handlers. Some redundant information has also been stripped and the generic identifier of an element is stored in the C hash entry. For example, for an EndElementEvent the C handler gets called with a hash reference { Name => 'gi' } The following events are defined: * appinfo * processing_instruction * start_element * end_element * data * sdata * external_data_entity_ref * subdoc_entity_ref * start_dtd * end_dtd * end_prolog * general_entity # set $p->output_general_entities(1) * comment_decl # set $p->output_comment_decls(1) * marked_section_start # set $p->output_marked_sections(1) * marked_section_end # set $p->output_marked_sections(1) * ignored_chars # set $p->output_marked_sections(1) * error * open_entity_change If the documentation of the generic interface to OpenSP states that certain data is not valid, it will not be available through this interface (i.e., the respective key does not exist in the hash ref). =head1 POSITIONING INFORMATION Event handlers can call the C method on the parser object to retrieve positioning information, the get_location method will return a hash reference with the following properties: LineNumber => ..., # line number ColumnNumber => ..., # column number ByteOffset => ..., # number of preceding bytes EntityOffset => ..., # number of preceding bit combinations EntityName => ..., # name of the external entity FileName => ..., # name of the file These can be C or an empty string. =head1 POST-PROCESSING ERROR MESSAGES OpenSP returns error messages in form of a string rather than individual components of the message like line numbers or message text. The C method on the parser object can be used to post-process these error message strings as reliable as possible. It can be used e.g. from an error event handler if the parser object is accessible like sub error { my $self = shift; my $erro = shift; my $mess = $self->{parser}->split_message($erro); } See the documentation of C in the L documentation. =head1 UNICODE SUPPORT All strings returned from event handlers and helper routines are UTF-8 encoded with the UTF-8 flag turned on, helper functions like C expect (but don't check) that string arguments are UTF-8 encoded and have the UTF-8 flag turned on. Behavior of helper functions is undefined when you pass unexpected input and should be avoided. C has limited support for binary input, but the binary input must be compatible with OpenSP's generic interface requirements and you must specify the encoding through means available to OpenSP to enable it to properly decode the binary input. Any encoding meta data about such binary input specific to Perl (such as encoding disciplines for file handles when you pass a file descriptor) will be ignored. For more specific information refer to the OpenSP manual. =over 4 =item * L =item * L =back =begin comment =head1 NOTES ON EXTERNAL ENTITIES (Note that this list of issues in incomplete.) If you intend to use this module to process untrusted content and/or provide access to its output to untrusted users, you should be aware of a number of issues involving external entities that might be relevant to your application. OpenSP will attempt to resolve external parsed entities and supports resolution of system identifiers in a variety of ways. This can have a number of undesired effects: =over 4 =item undesired network traffic You can compile OpenSP to support HTTP and if you attempt to process a document like OpenSP will attempt to fetch C if the system identifier cannot be generated from a catalog entry. A malicious user might be able to abuse this ability to run denial of service attacks on specific hosts or just to drive your network traffic expenses. =item access to internal and restricted resources If the machine and/or service running this module has access privileges to specific resources, a malicious user might be able to access these resources in undesired ways or even be able to read such resources if output from this module is exposed to them. Examples for such attacks might include triggering read access to special resources like C which might never finish or C of which the content might be revealed depending on how much output from this module is made available. If error messages are made available, a document like ] > could trigger such behavior as OpenSP cites the content of the entity replacement text in one of the error messages for the document (and elsewhere). To restrict access to local file resources have a look at the C method and the documentation of the functionality in the OpenSP documentation. The same applies to HTTP resources, if a web server trusts your host it might reveal private data, for example, you have a web server on localhost with a document root of C, then ] > would have the same effect if the web server has access privileges to the file. Formal system identifiers might be an additional problem in this regard, OpenSP for example generally supports documents like 4"> In order to resolve the system identifier OpenSP would attempt to read from the file descriptor C<4> if the system supports that and C<4> happens to be a legal file descriptor. See the OpenSP documentation on system identifiers for additional information. =item memory problems Note in particular that OpenSP supports a literal storage manager which would attempt to read from a string, an example would be > While generally harmless, you should note that OpenSP's current implementation would create many copies of the system identifier most of which are encoded using 4 bytes per character and which gets duplicated in a number of places, e.g. in error messages. Such a document could be used in a denial of service attack where your application runs quickly out of memory even for relatively small input documents. =back One strategy to avoid such problems would be to limit the resolution of external entities, it is for example possible to C the parser from within a C handler after checking the specified and/or generated system identifier for proper values. Though consider a document like %x; ]> Here OpenSP would attempt to read from the external entity and the C would not know about it. This can be solved by using a C handler which would be called when the reference to the parameter entity in the example above is encountered, the same for a document like ]>&x; Note that halting from all undesired C and C events might not be sufficient to prevent reading of external entities. Using the C event you can keep track of attempts to open external parsed entities referenced from the document or one of its entities. Note that the event handler gets called B OpenSP opened the entity. =end comment =head1 ENVIRONMENT VARIABLES OpenSP supports a number of environment variables to control specific processing aspects such as C or C. Portable applications need to ensure that these are set prior to loading the OpenSP library into memory which happens when the XS code is loaded. This means you need to wrap the code into a C block: BEGIN { $ENV{SP_CHARSET_FIXED} = 1; } use SGML::Parser::OpenSP; # ... Otherwise changes to the environment might not propagate to OpenSP. This applies specifically to Win32 systems. =over 4 =item SGML_SEARCH_PATH See L. =item SP_HTTP_USER_AGENT The C header for HTTP requests. =item SP_HTTP_ACCEPT The C header for HTTP requests. =item SP_MESSAGE_FORMAT Enable run time selection of message format, Value is one of C, C, C. Whether this will have an effect depends on a compile time setting which might not be enabled in your OpenSP build. This module assumes that no such support was compiled in. =item SGML_CATALOG_FILES =item SP_USE_DOCUMENT_CATALOG See L. =item SP_SYSTEM_CHARSET =item SP_CHARSET_FIXED =item SP_BCTF =item SP_ENCODING See L. =back Note that you can use the C method instead of using C and the C method instead of using C and attributes on storage object specifications for C and C respectively. For example, if C is set to C<1> you can use $p->parse("example.xhtml"); to process C using the C character encoding. =head1 KNOWN ISSUES OpenSP must be compiled with C I and with C I, this module will otherwise break at runtime or not compile. =head1 BUG REPORTS Please report bugs in this module via L Please report bugs in OpenSP via L Please send comments and questions to the spo-devel mailing list, see L for details. =head1 SEE ALSO =over 4 =item * L =item * L =item * L =back =head1 AUTHORS Terje Bless wrote version 0.01. Bjoern Hoehrmann wrote version 0.02+. =head1 COPYRIGHT AND LICENSE Copyright (c) 2006-2008 Bjoern Hoehrmann . This module is licensed under the same terms as Perl itself. =cut SGML-Parser-OpenSP-0.994/Makefile.PL0000600000175300017630000000244311031762220015625 0ustar bjoerncpanuse 5.008; use ExtUtils::MakeMaker; my %options; if ($^O eq "MSWin32") { $options{LIBS} = "-l" . prompt("Where is the OpenSP link library?", "T:\\osp\\lib\\Release\\osp152.lib"); $options{CC} = prompt("Which compiler should be used?", "cl -TP -EHsc -IT:/osp/"); } else { # assume some compatible Linux $options{LD} = "g++"; $options{CC} = "g++"; $options{LIBS} = "-lstdc++ -losp"; } WriteMakefile( NAME => 'SGML::Parser::OpenSP', VERSION_FROM => 'lib/SGML/Parser/OpenSP.pm', PREREQ_PM => { Class::Accessor => 0, Test::Exception => 0, File::Temp => 0, }, ($] >= 5.005 ? (ABSTRACT_FROM => 'lib/SGML/Parser/OpenSP.pm', AUTHOR => 'Bjoern Hoehrmann ') : ()), # SP_MULTI_BYTE is needed iff OpenSP is built with SP_MULTI_BYTE DEFINE => '-DSP_MULTI_BYTE=1', INC => '', # e.g., '-I/usr/include/other' XSOPT => '-C++', LICENSE => 'perl', 'dist' => { PREOP => 'chmod 600 Makefile.PL', TARFLAGS => '--group=cpan --owner=bjoern -cvf', }, %options ); SGML-Parser-OpenSP-0.994/MANIFEST0000700000175300017630000000127611031762311015011 0ustar bjoerncpanChanges Makefile.PL MANIFEST OpenSP.xs README typemap t/01basic.t t/02integrity.t t/03exceptions.t t/04basicparse.t t/05basichandler.t t/06parseliteral.t t/07defaults.t t/08comments.t t/09locations.t t/10errors.t t/11parsers.t t/12utf8.t t/13restricted.t t/14illegalparse.t t/15parseinput.t t/16catalogs.t t/17splitmessage.t t/18halt.t t/19refcounting.t t/20passfd.t t/21parsestring.t t/22mwarnings.t t/98podsyn.t t/99podcov.t lib/SGML/Parser/OpenSP.pm lib/SGML/Parser/OpenSP/Tools.pm samples/no-doctype.xml samples/test.soc samples/xml.dcl samples/xhtml1-strict-s.dtd eg/xhtml1val.pl META.yml Module meta-data (added by MakeMaker) SGML-Parser-OpenSP-0.994/META.yml0000700000175300017630000000104611031762311015124 0ustar bjoerncpan--- #YAML:1.0 name: SGML-Parser-OpenSP version: 0.994 abstract: Parse SGML documents using OpenSP license: perl author: - Bjoern Hoehrmann generated_by: ExtUtils::MakeMaker version 6.44 distribution_type: module requires: Class::Accessor: 0 File::Temp: 0 Test::Exception: 0 meta-spec: url: http://module-build.sourceforge.net/META-spec-v1.3.html version: 1.3 SGML-Parser-OpenSP-0.994/OpenSP.xs0000700000175300017630000011047010725707562015415 0ustar bjoerncpan// OpenSP.xs -- OpenSP XS Wrapper // // $Id: OpenSP.xs,v 1.30 2007/12/06 05:58:10 hoehrmann Exp $ // workaround for broken math.h in VC++ 6.0 #if defined(_MSC_VER) && _MSC_VER < 1300 #include #endif #define PERL_NO_GET_CONTEXT #define SPO_SMALL_STRINGS_LENGTH 1024 extern "C" { #include "EXTERN.h" #include "perl.h" #include "XSUB.h" } // these are specific to the system and might need to // be changed before the Perl extension can compile #ifdef WIN32 #include #else #include #endif /////////////////////////////////////////////////////////////////////////// // Class SgmlParserOpenSP /////////////////////////////////////////////////////////////////////////// class SgmlParserOpenSP : private SGMLApplication { public: // ... SgmlParserOpenSP(); public: // ... void parse(SV* file_sv); SV* get_location(); void halt(); // ... SV* m_self; private: // OpenSP event handler void appinfo (const AppinfoEvent& e); void pi (const PiEvent& e); void startElement (const StartElementEvent& e); void endElement (const EndElementEvent& e); void data (const DataEvent& e); void sdata (const SdataEvent& e); void externalDataEntityRef (const ExternalDataEntityRefEvent& e); void subdocEntityRef (const SubdocEntityRefEvent& e); void startDtd (const StartDtdEvent& e); void endDtd (const EndDtdEvent& e); void endProlog (const EndPrologEvent& e); void generalEntity (const GeneralEntityEvent& e); void commentDecl (const CommentDeclEvent& e); void markedSectionStart (const MarkedSectionStartEvent& e); void markedSectionEnd (const MarkedSectionEndEvent& e); void ignoredChars (const IgnoredCharsEvent& e); void error (const ErrorEvent& e); // OpenSP entity change handler void openEntityChange (const OpenEntityPtr& p); // ... void dispatchEvent (const char* name, const HV* hv); bool handler_can (const char* method); // ... SV* cs2sv (const SGMLApplication::CharString s); HV* location2hv (const SGMLApplication::Location l); HV* notation2hv (const SGMLApplication::Notation n); HV* externalid2hv (const SGMLApplication::ExternalId id); HV* entity2hv (const SGMLApplication::Entity e); HV* attributes2hv (const SGMLApplication::Attribute* attrs, const size_t n); HV* attribute2hv (const SGMLApplication::Attribute a); // ... bool _hv_fetch_SvTRUE(HV* hv, const char* key, const I32 klen); void _hv_fetch_pk_setOption(HV* hv, const char* key, const I32 klen, ParserEventGeneratorKit& pk, const enum ParserEventGeneratorKit::OptionWithArg o); // ... SV* m_handler; bool m_parsing; SGMLApplication::Position m_pos; SGMLApplication::OpenEntityPtr m_openEntityPtr; EventGenerator* m_egp; // ... PerlInterpreter* my_perl; // ... U8 m_temp[SPO_SMALL_STRINGS_LENGTH * UTF8_MAXLEN + 1]; }; /////////////////////////////////////////////////////////////////////////// // computed hash values /////////////////////////////////////////////////////////////////////////// static U32 HvvAttributes; static U32 HvvByteOffset; static U32 HvvCdataChunks; static U32 HvvColumnNumber; static U32 HvvComment; static U32 HvvComments; static U32 HvvContentType; static U32 HvvData; static U32 HvvDataType; static U32 HvvDeclType; static U32 HvvDefaulted; static U32 HvvEntities; static U32 HvvEntity; static U32 HvvEntityName; static U32 HvvEntityOffset; static U32 HvvExternalId; static U32 HvvFileName; static U32 HvvGeneratedSystemId; static U32 HvvIncluded; static U32 HvvIndex; static U32 HvvIsGroup; static U32 HvvIsId; static U32 HvvIsInternal; static U32 HvvIsNonSgml; static U32 HvvIsSdata; static U32 HvvLineNumber; static U32 HvvMessage; static U32 HvvName; static U32 HvvNonSgmlChar; static U32 HvvNone; static U32 HvvNotation; static U32 HvvParams; static U32 HvvPublicId; static U32 HvvSeparator; static U32 HvvStatus; static U32 HvvString; static U32 HvvSystemId; static U32 HvvText; static U32 HvvTokens; static U32 HvvType; /////////////////////////////////////////////////////////////////////////// // Helper functions /////////////////////////////////////////////////////////////////////////// SV* SgmlParserOpenSP::cs2sv(const SGMLApplication::CharString s) { SV* result; unsigned int i = 0; U8* d; // optimized memory-intensive version for small strings if (s.len < SPO_SMALL_STRINGS_LENGTH) { d = m_temp; for (i = 0; i < s.len; ++i) d = uvuni_to_utf8_flags(d, s.ptr[i], 0); result = newSVpvn((const char*)m_temp, d - m_temp); } else { result = newSVpvn("", 0); for (i = 0; i < s.len; ++i) { d = (U8 *)SvGROW(result, SvCUR(result) + UTF8_MAXLEN + 1); d = uvuni_to_utf8_flags(d + SvCUR(result), s.ptr[i], 0); SvCUR_set(result, d - (U8 *)SvPVX(result)); } } SvUTF8_on(result); return result; } /////////////////////////////////////////////////////////////////////////// // OpenSP data structure conversion helper functions /////////////////////////////////////////////////////////////////////////// #define uv_or_undef(x) (x == (unsigned long)-1 ? &PL_sv_undef : newSVuv(x)) HV* SgmlParserOpenSP::location2hv(const SGMLApplication::Location l) { HV* hv = newHV(); hv_store(hv, "LineNumber", 10, uv_or_undef(l.lineNumber), HvvLineNumber); hv_store(hv, "ColumnNumber", 12, uv_or_undef(l.columnNumber), HvvColumnNumber); hv_store(hv, "ByteOffset", 10, uv_or_undef(l.byteOffset), HvvByteOffset); hv_store(hv, "EntityOffset", 12, uv_or_undef(l.entityOffset), HvvEntityOffset); hv_store(hv, "EntityName", 10, cs2sv(l.entityName), HvvEntityName); hv_store(hv, "FileName", 8, cs2sv(l.filename), HvvFileName); return hv; } HV* SgmlParserOpenSP::notation2hv(const SGMLApplication::Notation n) { HV* hv = newHV(); if (n.name.len > 0) { SV* sv = newRV_noinc((SV*)externalid2hv(n.externalId)); hv_store(hv, "Name", 4, cs2sv(n.name), HvvName); hv_store(hv, "ExternalId", 10, sv, HvvExternalId); } return hv; } HV* SgmlParserOpenSP::externalid2hv(const SGMLApplication::ExternalId id) { HV* hv = newHV(); if (id.haveSystemId) hv_store(hv, "SystemId", 8, cs2sv(id.systemId), HvvSystemId); if (id.havePublicId) hv_store(hv, "PublicId", 8, cs2sv(id.publicId), HvvPublicId); if (id.haveGeneratedSystemId) { SV* sv = cs2sv(id.generatedSystemId); hv_store(hv, "GeneratedSystemId", 17, sv, HvvGeneratedSystemId); } return hv; } HV* SgmlParserOpenSP::entity2hv(const SGMLApplication::Entity e) { HV* hv = newHV(); hv_store(hv, "Name", 4, cs2sv(e.name), HvvName); // dataType switch (e.dataType) { case SGMLApplication::Entity::sgml: hv_store(hv, "DataType", 8, newSVpvn("sgml", 4), HvvDataType); break; case SGMLApplication::Entity::cdata: hv_store(hv, "DataType", 8, newSVpvn("cdata", 5), HvvDataType); break; case SGMLApplication::Entity::sdata: hv_store(hv, "DataType", 8, newSVpvn("sdata", 5), HvvDataType); break; case SGMLApplication::Entity::ndata: hv_store(hv, "DataType", 8, newSVpvn("ndata", 5), HvvDataType); break; case SGMLApplication::Entity::subdoc: hv_store(hv, "DataType", 8, newSVpvn("subdoc", 6), HvvDataType); break; case SGMLApplication::Entity::pi: hv_store(hv, "DataType", 8, newSVpvn("pi", 2), HvvDataType); break; } // declType switch (e.declType) { case SGMLApplication::Entity::general: hv_store(hv, "DeclType", 8, newSVpvn("general", 7), HvvDeclType); break; case SGMLApplication::Entity::parameter: hv_store(hv, "DeclType", 8, newSVpvn("parameter", 9), HvvDeclType); break; case SGMLApplication::Entity::doctype: hv_store(hv, "DeclType", 8, newSVpvn("doctype", 7), HvvDeclType); break; case SGMLApplication::Entity::linktype: hv_store(hv, "DeclType", 8, newSVpvn("linktype", 8), HvvDeclType); break; } if (e.isInternal) { hv_store(hv, "IsInternal", 10, newSViv(1), HvvIsInternal); hv_store(hv, "Text", 4, cs2sv(e.text), HvvText); } else { SV* sv1 = newRV_noinc((SV*)externalid2hv(e.externalId)); SV* sv2 = newRV_noinc((SV*)attributes2hv(e.attributes, e.nAttributes)); SV* sv3 = newRV_noinc((SV*)notation2hv(e.notation)); hv_store(hv, "ExternalId", 10, sv1, HvvExternalId); hv_store(hv, "Attributes", 10, sv2, HvvAttributes); hv_store(hv, "Notation", 8, sv3, HvvNotation); } return hv; } HV* SgmlParserOpenSP::attributes2hv(const SGMLApplication::Attribute* attrs, size_t n) { HV* hv = newHV(); for (unsigned int i = 0; i < n; ++i) { HV* a = attribute2hv(attrs[i]); hv_store(a, "Index", 5, newSViv(i), HvvIndex); hv_store_ent(hv, sv_2mortal(cs2sv(attrs[i].name)), newRV_noinc((SV*)a), 0); } return hv; } HV* SgmlParserOpenSP::attribute2hv(const SGMLApplication::Attribute a) { HV* hv = newHV(); // Name => ... hv_store(hv, "Name", 4, cs2sv(a.name), HvvName); // Type => ... if (a.type == SGMLApplication::Attribute::cdata) { AV* av = newAV(); for (unsigned int i = 0; i < a.nCdataChunks; ++i) { HV* cc = newHV(); if (a.cdataChunks[i].isSdata) { SV* sv = cs2sv(a.cdataChunks[i].entityName); // redundant? hv_store(cc, "IsSdata", 7, newSViv(1), HvvIsSdata); hv_store(cc, "EntityName", 10, sv, HvvEntityName); } else if (a.cdataChunks[i].isNonSgml) { SV* sv = newSViv(a.cdataChunks[i].nonSgmlChar); // redundant? hv_store(cc, "IsNonSgml", 9, newSViv(1), HvvIsNonSgml); hv_store(cc, "NonSgmlChar", 11, sv, HvvNonSgmlChar); } hv_store(cc, "Data", 4, cs2sv(a.cdataChunks[i].data), HvvData); av_push(av, newRV_noinc((SV*)cc)); } hv_store(hv, "Type", 4, newSVpvn("cdata", 5), HvvType); hv_store(hv, "CdataChunks", 11, newRV_noinc((SV*)av), HvvCdataChunks); } else if (a.type == SGMLApplication::Attribute::tokenized) { AV* entities = newAV(); hv_store(hv, "Type", 4, newSVpvn("tokenized", 9), HvvType); hv_store(hv, "Tokens", 6, cs2sv(a.tokens), HvvTokens); hv_store(hv, "IsGroup", 7, newSViv((int)a.isGroup), HvvIsGroup); hv_store(hv, "IsId", 4, newSViv((int)a.isId), HvvIsId); for (unsigned int i = 0; i < a.nEntities; ++i) { av_push(entities, newRV_noinc((SV*)entity2hv(a.entities[i]))); } SV* sv1 = newRV_noinc((SV*)notation2hv(a.notation)); SV* sv2 = newRV_noinc((SV*)entities); hv_store(hv, "Notation", 8, sv1, HvvNotation); hv_store(hv, "Entities", 8, sv2, HvvEntities); } else if (a.type == SGMLApplication::Attribute::implied) { hv_store(hv, "Type", 4, newSVpvn("implied", 7), HvvType); } else if (a.type == SGMLApplication::Attribute::invalid) { hv_store(hv, "Type", 4, newSVpvn("invalid", 7), HvvType); } if (a.type == SGMLApplication::Attribute::cdata || a.type == SGMLApplication::Attribute::tokenized) { switch (a.defaulted) { case SGMLApplication::Attribute::specified: hv_store(hv, "Defaulted", 9, newSVpvn("specified", 9), HvvDefaulted); break; case SGMLApplication::Attribute::definition: hv_store(hv, "Defaulted", 9, newSVpvn("definition", 10), HvvDefaulted); break; case SGMLApplication::Attribute::current: hv_store(hv, "Defaulted", 9, newSVpvn("current", 7), HvvDefaulted); break; } } return hv; } /////////////////////////////////////////////////////////////////////////// // ... /////////////////////////////////////////////////////////////////////////// bool SgmlParserOpenSP::_hv_fetch_SvTRUE(HV* hv, const char* key, const I32 klen) { SV** svp = hv_fetch(hv, key, klen, 0); return (svp && SvTRUE(*svp)); } void SgmlParserOpenSP::_hv_fetch_pk_setOption(HV* hv, const char* key, const I32 klen, ParserEventGeneratorKit& pk, const enum ParserEventGeneratorKit::OptionWithArg o) { SV** svp = hv_fetch(hv, key, klen, 0); SV* rv; if (!svp || !*svp) return; // character string if (SvPOK(*svp)) { pk.setOption(o, SvPV_nolen(*svp)); return; } if (!SvROK(*svp)) return; rv = SvRV(*svp); if (!rv) return; if (!(SvTYPE(rv) == SVt_PVAV)) return; // array reference AV* av = (AV*)rv; I32 len = av_len(av); for (I32 i = 0; i <= len; ++i) { SV** svp = av_fetch(av, i, 0); if (!svp || !*svp || !SvPOK(*svp)) { warn("not a legal argument in %s\n", key); continue; } #ifndef SP_WIDE_SYSTEM pk.setOption(o, SvPV_nolen(*svp)); #else croak("SP_WIDE_SYSTEM is not supported\n"); #endif } } /////////////////////////////////////////////////////////////////////////// // SgmlParserOpenSP implementation /////////////////////////////////////////////////////////////////////////// SgmlParserOpenSP::SgmlParserOpenSP() { dTHX; this->my_perl = my_perl; // compute hashes to improve performance PERL_HASH(HvvAttributes, "Attributes", 10); PERL_HASH(HvvByteOffset, "ByteOffset", 10); PERL_HASH(HvvCdataChunks, "CdataChunks", 11); PERL_HASH(HvvColumnNumber, "ColumnNumber", 12); PERL_HASH(HvvComment, "Comment", 7); PERL_HASH(HvvComments, "Comments", 8); PERL_HASH(HvvContentType, "ContentType", 11); PERL_HASH(HvvData, "Data", 4); PERL_HASH(HvvDataType, "DataType", 8); PERL_HASH(HvvDeclType, "DeclType", 8); PERL_HASH(HvvDefaulted, "Defaulted", 9); PERL_HASH(HvvEntities, "Entities", 8); PERL_HASH(HvvEntity, "Entity", 6); PERL_HASH(HvvEntityName, "EntityName", 10); PERL_HASH(HvvEntityOffset, "EntityOffset", 12); PERL_HASH(HvvExternalId, "ExternalId", 10); PERL_HASH(HvvFileName, "FileName", 8); PERL_HASH(HvvGeneratedSystemId, "GeneratedSystemId", 17); PERL_HASH(HvvIncluded, "Included", 8); PERL_HASH(HvvIndex, "Index", 5); PERL_HASH(HvvIsGroup, "IsGroup", 7); PERL_HASH(HvvIsId, "IsId", 4); PERL_HASH(HvvIsInternal, "IsInternal", 10); PERL_HASH(HvvIsNonSgml, "IsNonSgml", 9); PERL_HASH(HvvIsSdata, "IsSdata", 7); PERL_HASH(HvvLineNumber, "LineNumber", 10); PERL_HASH(HvvMessage, "Message", 7); PERL_HASH(HvvName, "Name", 4); PERL_HASH(HvvNonSgmlChar, "NonSgmlChar", 11); PERL_HASH(HvvNone, "None", 4); PERL_HASH(HvvNotation, "Notation", 8); PERL_HASH(HvvParams, "Params", 6); PERL_HASH(HvvPublicId, "PublicId", 8); PERL_HASH(HvvSeparator, "Separator", 9); PERL_HASH(HvvStatus, "Status", 6); PERL_HASH(HvvString, "String", 6); PERL_HASH(HvvSystemId, "SystemId", 8); PERL_HASH(HvvText, "Text", 4); PERL_HASH(HvvTokens, "Tokens", 6); PERL_HASH(HvvType, "Type", 4); // initialize member variables m_openEntityPtr = NULL; m_parsing = false; m_handler = NULL; m_self = NULL; m_pos = 0; m_egp = NULL; } SV* SgmlParserOpenSP::get_location() { if (!m_parsing) croak("get_location() must be called from event handlers\n"); SGMLApplication::Location l(m_openEntityPtr, m_pos); return newRV_noinc((SV*)location2hv(l)); } void SgmlParserOpenSP::halt() { if (!m_parsing) croak("halt() must be called from event handlers\n"); if (!m_egp) croak("egp not available, object corrupted\n"); m_egp->halt(); } bool SgmlParserOpenSP::handler_can(const char* method) { if (!method || !m_handler) return false; if (!SvROK(m_handler) || !sv_isobject(m_handler)) return false; HV* stash = SvSTASH(SvRV(m_handler)); if (!stash) return false; // todo: this could benefit from caching the result // todo: this does not look for autoloaded methods, should it? if (!gv_fetchmethod_autoload(stash, method, FALSE)) return false; return true; } void SgmlParserOpenSP::dispatchEvent(const char* name, const HV* hv) { dSP; ENTER; SAVETMPS; PUSHMARK(SP); XPUSHs(m_handler); XPUSHs(hv ? sv_2mortal(newRV_noinc((SV*)hv)) : &PL_sv_undef); PUTBACK; // call the callback method; should this use G_KEEPER? call_method(name, G_DISCARD | G_SCALAR | G_EVAL); // Refetch the stack pointer. SPAGAIN; // graceful recovery if (SvTRUE(ERRSV)) { m_egp->halt(); POPs; } PUTBACK; FREETMPS; LEAVE; } void SgmlParserOpenSP::parse(SV* file_sv) { ParserEventGeneratorKit pk; HV* hv; SV** svp; if (!file_sv) croak("you must specify a file name\n"); if (!SvPOK(file_sv)) croak("not a proper file name\n"); if (m_parsing) croak("parse must not be called during parse\n"); if (!m_self || !sv_isobject(m_self)) croak("not a proper SGML::Parser::OpenSP object\n"); hv = (HV*)SvRV(m_self); svp = hv_fetch(hv, "handler", 7, 0); if (!svp || !*svp) croak("you must specify a handler first\n"); if (!sv_isobject(*svp)) croak("handler must be a blessed reference\n"); m_handler = *svp; // Boolean Options if (_hv_fetch_SvTRUE(hv, "show_open_entities", 18)) pk.setOption(ParserEventGeneratorKit::showOpenEntities); if (_hv_fetch_SvTRUE(hv, "show_open_elements", 18)) pk.setOption(ParserEventGeneratorKit::showOpenElements); if (_hv_fetch_SvTRUE(hv, "show_error_numbers", 18)) pk.setOption(ParserEventGeneratorKit::showErrorNumbers); if (_hv_fetch_SvTRUE(hv, "output_comment_decls", 20)) pk.setOption(ParserEventGeneratorKit::outputCommentDecls); if (_hv_fetch_SvTRUE(hv, "output_marked_sections", 22)) pk.setOption(ParserEventGeneratorKit::outputMarkedSections); if (_hv_fetch_SvTRUE(hv, "output_general_entities", 23)) pk.setOption(ParserEventGeneratorKit::outputGeneralEntities); if (_hv_fetch_SvTRUE(hv, "map_catalog_document", 20)) pk.setOption(ParserEventGeneratorKit::mapCatalogDocument); if (_hv_fetch_SvTRUE(hv, "restrict_file_reading", 21)) pk.setOption(ParserEventGeneratorKit::restrictFileReading); // Options with argument _hv_fetch_pk_setOption(hv, "warnings", 8, pk, ParserEventGeneratorKit::enableWarning); _hv_fetch_pk_setOption(hv, "catalogs", 8, pk, ParserEventGeneratorKit::addCatalog); _hv_fetch_pk_setOption(hv, "search_dirs", 11, pk, ParserEventGeneratorKit::addSearchDir); _hv_fetch_pk_setOption(hv, "include_params", 14, pk, ParserEventGeneratorKit::includeParam); _hv_fetch_pk_setOption(hv, "active_links", 12, pk, ParserEventGeneratorKit::activateLink); char* file = SvPV_nolen(file_sv); #ifndef SP_WIDE_SYSTEM m_egp = pk.makeEventGenerator(1, &file); #else croak("SP_WIDE_SYSTEM is not supported\n"); #endif m_egp->inhibitMessages(true); m_parsing = true; m_egp->run(*this); m_parsing = false; // all entities closed now m_openEntityPtr = NULL; delete m_egp; // no longer valid m_egp = NULL; // After graceful recovery croak here to propagate the exception to // the caller. I am not sure how useful this behavior actually is, // but it's better than silently ignoring the error or to croak // before this point as the object would be unusable and leak memory. if (SvTRUE(ERRSV)) croak(Nullch); } /////////////////////////////////////////////////////////////////////////// // OpenSP event handler /////////////////////////////////////////////////////////////////////////// #define updatePosition(pos) m_pos = pos /////////////////////////////////////////////////////////////////////////// // SgmlParserOpenSP::appinfo /////////////////////////////////////////////////////////////////////////// void SgmlParserOpenSP::appinfo(const AppinfoEvent& e) { if (!handler_can("appinfo")) return; updatePosition(e.pos); HV* hv = newHV(); if (!e.none) { hv_store(hv, "None", 4, newSViv(0), HvvNone); hv_store(hv, "String", 6, cs2sv(e.string), HvvString); } else { hv_store(hv, "None", 4, newSViv(1), HvvNone); } dispatchEvent("appinfo", hv); } /////////////////////////////////////////////////////////////////////////// // SgmlParserOpenSP::pi /////////////////////////////////////////////////////////////////////////// void SgmlParserOpenSP::pi(const PiEvent& e) { if (!handler_can("processing_instruction")) return; updatePosition(e.pos); HV* hv = newHV(); hv_store(hv, "EntityName", 10, cs2sv(e.entityName), HvvEntityName); hv_store(hv, "Data", 4, cs2sv(e.data), HvvData); dispatchEvent("processing_instruction", hv); } /////////////////////////////////////////////////////////////////////////// // SgmlParserOpenSP::startElement /////////////////////////////////////////////////////////////////////////// void SgmlParserOpenSP::startElement(const StartElementEvent& e) { if (!handler_can("start_element")) return; updatePosition(e.pos); HV* hv = newHV(); SV* sv = newRV_noinc((SV*)attributes2hv(e.attributes, e.nAttributes)); hv_store(hv, "Name", 4, cs2sv(e.gi), HvvName); hv_store(hv, "Attributes", 10, sv, HvvAttributes); switch (e.contentType) { case SGMLApplication::StartElementEvent::empty: hv_store(hv, "ContentType", 11, newSVpvn("empty", 5), HvvContentType); break; case SGMLApplication::StartElementEvent::cdata: hv_store(hv, "ContentType", 11, newSVpvn("cdata", 5), HvvContentType); break; case SGMLApplication::StartElementEvent::rcdata: hv_store(hv, "ContentType", 11, newSVpvn("rcdata", 6), HvvContentType); break; case SGMLApplication::StartElementEvent::mixed: hv_store(hv, "ContentType", 11, newSVpvn("mixed", 5), HvvContentType); break; case SGMLApplication::StartElementEvent::element: hv_store(hv, "ContentType", 11, newSVpvn("element", 7), HvvContentType); break; } hv_store(hv, "Included", 8, newSViv(e.included ? 1 : 0), HvvIncluded); dispatchEvent("start_element", hv); } /////////////////////////////////////////////////////////////////////////// // SgmlParserOpenSP::endElement /////////////////////////////////////////////////////////////////////////// void SgmlParserOpenSP::endElement(const EndElementEvent& e) { if (!handler_can("end_element")) return; updatePosition(e.pos); HV* hv = newHV(); hv_store(hv, "Name", 4, cs2sv(e.gi), HvvName); dispatchEvent("end_element", hv); } /////////////////////////////////////////////////////////////////////////// // SgmlParserOpenSP::data /////////////////////////////////////////////////////////////////////////// void SgmlParserOpenSP::data(const DataEvent& e) { if (!handler_can("data")) return; updatePosition(e.pos); HV* hv = newHV(); hv_store(hv, "Data", 4, cs2sv(e.data), HvvData); dispatchEvent("data", hv); } /////////////////////////////////////////////////////////////////////////// // SgmlParserOpenSP::sdata /////////////////////////////////////////////////////////////////////////// void SgmlParserOpenSP::sdata(const SdataEvent& e) { if (!handler_can("sdata")) return; updatePosition(e.pos); HV* hv = newHV(); hv_store(hv, "EntityName", 10, cs2sv(e.entityName), HvvEntityName); hv_store(hv, "Text", 4, cs2sv(e.text), HvvText); dispatchEvent("sdata", hv); } /////////////////////////////////////////////////////////////////////////// // SgmlParserOpenSP::externalDataEntityRef /////////////////////////////////////////////////////////////////////////// void SgmlParserOpenSP::externalDataEntityRef(const ExternalDataEntityRefEvent& e) { if (!handler_can("external_data_entity_ref")) return; updatePosition(e.pos); HV* hv = newHV(); hv_store(hv, "Entity", 6, newRV_noinc((SV*)entity2hv(e.entity)), HvvEntity); dispatchEvent("external_data_entity_ref", hv); } /////////////////////////////////////////////////////////////////////////// // SgmlParserOpenSP::subdocEntityRef /////////////////////////////////////////////////////////////////////////// void SgmlParserOpenSP::subdocEntityRef(const SubdocEntityRefEvent& e) { if (!handler_can("subdoc_entity_ref")) return; updatePosition(e.pos); HV* hv = newHV(); hv_store(hv, "Entity", 6, newRV_noinc((SV*)entity2hv(e.entity)), HvvEntity); dispatchEvent("subdoc_entity_ref", hv); } /////////////////////////////////////////////////////////////////////////// // SgmlParserOpenSP::startDtd /////////////////////////////////////////////////////////////////////////// void SgmlParserOpenSP::startDtd(const StartDtdEvent& e) { if (!handler_can("start_dtd")) return; updatePosition(e.pos); HV* hv = newHV(); hv_store(hv, "Name", 4, cs2sv(e.name), HvvName); if (e.haveExternalId) { SV* sv = newRV_noinc((SV*)externalid2hv(e.externalId)); hv_store(hv, "ExternalId", 10, sv, HvvExternalId); } dispatchEvent("start_dtd", hv); } /////////////////////////////////////////////////////////////////////////// // SgmlParserOpenSP::endDtd /////////////////////////////////////////////////////////////////////////// void SgmlParserOpenSP::endDtd(const EndDtdEvent& e) { if (!handler_can("end_dtd")) return; updatePosition(e.pos); HV* hv = newHV(); hv_store(hv, "Name", 4, cs2sv(e.name), HvvName); dispatchEvent("end_dtd", hv); } /////////////////////////////////////////////////////////////////////////// // SgmlParserOpenSP::endProlog /////////////////////////////////////////////////////////////////////////// void SgmlParserOpenSP::endProlog(const EndPrologEvent& e) { if (!handler_can("end_prolog")) return; updatePosition(e.pos); // ??? dispatchEvent("end_prolog", NULL); } /////////////////////////////////////////////////////////////////////////// // SgmlParserOpenSP::generalEntity /////////////////////////////////////////////////////////////////////////// void SgmlParserOpenSP::generalEntity(const GeneralEntityEvent& e) { if (!handler_can("general_entity")) return; HV* hv = newHV(); hv_store(hv, "Entity", 6, newRV_noinc((SV*)entity2hv(e.entity)), HvvEntity); dispatchEvent("general_entity", hv); } /////////////////////////////////////////////////////////////////////////// // SgmlParserOpenSP::commentDecl /////////////////////////////////////////////////////////////////////////// void SgmlParserOpenSP::commentDecl(const CommentDeclEvent& e) { if (!handler_can("comment_decl")) return; updatePosition(e.pos); AV* av = newAV(); HV* hv = newHV(); for (unsigned int i = 0; i < e.nComments; ++i) { HV* comment = newHV(); hv_store(comment, "Comment", 7, cs2sv(e.comments[i]), HvvComment); hv_store(comment, "Separator", 9, cs2sv(e.seps[i]), HvvSeparator); av_push(av, newRV_noinc((SV*)comment)); } hv_store(hv, "Comments", 8, newRV_noinc((SV*)av), HvvComments); dispatchEvent("comment_decl", hv); } /////////////////////////////////////////////////////////////////////////// // SgmlParserOpenSP::markedSectionStart /////////////////////////////////////////////////////////////////////////// void SgmlParserOpenSP::markedSectionStart(const MarkedSectionStartEvent& e) { if (!handler_can("marked_section_start")) return; updatePosition(e.pos); HV* hv = newHV(); AV* av = newAV(); switch (e.status) { case SGMLApplication::MarkedSectionStartEvent::include: hv_store(hv, "Status", 6, newSVpvn("include", 7), HvvStatus); break; case SGMLApplication::MarkedSectionStartEvent::rcdata: hv_store(hv, "Status", 6, newSVpvn("rcdata", 6), HvvStatus); break; case SGMLApplication::MarkedSectionStartEvent::cdata: hv_store(hv, "Status", 6, newSVpvn("cdata", 5), HvvStatus); break; case SGMLApplication::MarkedSectionStartEvent::ignore: hv_store(hv, "Status", 6, newSVpvn("ignore", 6), HvvStatus); break; } for (unsigned int i = 0; i < e.nParams; ++i) { HV* param = newHV(); switch (e.params[i].type) { case SGMLApplication::MarkedSectionStartEvent::Param::temp: hv_store(param, "Type", 6, newSVpvn("temp", 4), HvvType); break; case SGMLApplication::MarkedSectionStartEvent::Param::include: hv_store(param, "Type", 6, newSVpvn("include", 7), HvvType); break; case SGMLApplication::MarkedSectionStartEvent::Param::rcdata: hv_store(param, "Type", 6, newSVpvn("rcdata", 6), HvvType); break; case SGMLApplication::MarkedSectionStartEvent::Param::cdata: hv_store(param, "Type", 6, newSVpvn("cdata", 5), HvvType); break; case SGMLApplication::MarkedSectionStartEvent::Param::ignore: hv_store(param, "Type", 6, newSVpvn("ignore", 6), HvvType); break; case SGMLApplication::MarkedSectionStartEvent::Param::entityRef: hv_store(param, "Type", 6, newSVpvn("entityRef", 9), HvvType); hv_store(param, "EntityName", 10, cs2sv(e.params[i].entityName), HvvEntityName); break; } av_push(av, newRV_noinc((SV*)av)); } hv_store(hv, "Params", 6, newRV_noinc((SV*)av), HvvParams); dispatchEvent("marked_section_start", hv); } /////////////////////////////////////////////////////////////////////////// // SgmlParserOpenSP::markedSectionEnd /////////////////////////////////////////////////////////////////////////// void SgmlParserOpenSP::markedSectionEnd(const MarkedSectionEndEvent& e) { if (!handler_can("marked_section_end")) return; updatePosition(e.pos); HV* hv = newHV(); switch (e.status) { case SGMLApplication::MarkedSectionEndEvent::include: hv_store(hv, "Status", 6, newSVpvn("include", 7), HvvStatus); break; case SGMLApplication::MarkedSectionEndEvent::rcdata: hv_store(hv, "Status", 6, newSVpvn("rcdata", 6), HvvStatus); break; case SGMLApplication::MarkedSectionEndEvent::cdata: hv_store(hv, "Status", 6, newSVpvn("cdata", 5), HvvStatus); break; case SGMLApplication::MarkedSectionEndEvent::ignore: hv_store(hv, "Status", 6, newSVpvn("ignore", 6), HvvStatus); break; } dispatchEvent("marked_section_end", hv); } /////////////////////////////////////////////////////////////////////////// // SgmlParserOpenSP::ignoredChars /////////////////////////////////////////////////////////////////////////// void SgmlParserOpenSP::ignoredChars(const IgnoredCharsEvent& e) { if (!handler_can("ignored_chars")) return; updatePosition(e.pos); HV* hv = newHV(); hv_store(hv, "Data", 4, cs2sv(e.data), HvvData); dispatchEvent("ignored_chars", hv); } /////////////////////////////////////////////////////////////////////////// // SgmlParserOpenSP::error /////////////////////////////////////////////////////////////////////////// void SgmlParserOpenSP::error(const ErrorEvent& e) { if (!handler_can("error")) return; updatePosition(e.pos); HV* hv = newHV(); hv_store(hv, "Message", 7, cs2sv(e.message), HvvMessage); switch (e.type) { case SGMLApplication::ErrorEvent::quantity: hv_store(hv, "Type", 4, newSVpvn("quantity", 8), HvvType); break; case SGMLApplication::ErrorEvent::idref: hv_store(hv, "Type", 4, newSVpvn("idref", 5), HvvType); break; case SGMLApplication::ErrorEvent::capacity: hv_store(hv, "Type", 4, newSVpvn("capacity", 8), HvvType); break; case SGMLApplication::ErrorEvent::otherError: hv_store(hv, "Type", 4, newSVpvn("otherError", 10), HvvType); break; case SGMLApplication::ErrorEvent::warning: hv_store(hv, "Type", 4, newSVpvn("warning", 7), HvvType); break; case SGMLApplication::ErrorEvent::info: hv_store(hv, "Type", 4, newSVpvn("info", 4), HvvType); break; } dispatchEvent("error", hv); } /////////////////////////////////////////////////////////////////////////// // SgmlParserOpenSP::openEntityChange /////////////////////////////////////////////////////////////////////////// void SgmlParserOpenSP::openEntityChange(const OpenEntityPtr& p) { // remember the current open entity m_openEntityPtr = p; if (handler_can("open_entity_change")) dispatchEvent("open_entity_change", newHV()); } /////////////////////////////////////////////////////////////////////////// // XS code /////////////////////////////////////////////////////////////////////////// MODULE = SGML::Parser::OpenSP PACKAGE = SGML::Parser::OpenSP PROTOTYPES: DISABLE SgmlParserOpenSP* SgmlParserOpenSP::new() INIT: SV* os; int pfd; CODE: RETVAL = new SgmlParserOpenSP(); ST(0) = sv_newmortal(); sv_upgrade(ST(0), SVt_RV); SvRV(ST(0)) = (SV*)newHV(); SvROK_on(ST(0)); sv_bless(ST(0), gv_stashpv(CLASS, 1)); hv_store((HV*)SvRV(ST(0)), "__o", 3, newSViv(PTR2IV(RETVAL)), 0); os = get_sv("\017", 0); pfd = (os && !strEQ("MSWin32", SvPV_nolen(os))) ? 1 : 0; hv_store((HV*)SvRV(ST(0)), "pass_file_descriptor", 20, newSViv(pfd), 0); void SgmlParserOpenSP::parse(SV* file_sv) SV* SgmlParserOpenSP::get_location() void SgmlParserOpenSP::halt() void SgmlParserOpenSP::DESTROY() SGML-Parser-OpenSP-0.994/README0000700000175300017630000000244511031750573014546 0ustar bjoerncpanSGML-Parser-OpenSP ================== This module provides an interface to the OpenSP SGML parser. INSTALLATION To install this module type the following: perl Makefile.PL make make test make install PPM PACKAGE A Win32 ppm package, suitable for use with ActivePerl compatible Perl packages, is kindly provided by Randy Kobes at http://theoryx5.uwinnipeg.ca/ppms/ To install it, within the ppm shell set the repository to http://theoryx5.uwinnipeg.ca/cgi-bin/ppmserver?urn:/PPMServer58 and then ppm> install SGML-Parser-OpenSP This should run a post-install script to fetch and install the needed OpenSP 1.5.2+ DLL; if this doesn't work, the DLL can be obtained from http://theoryx5.uwinnipeg.ca/ppms/scripts/ which should subsequently be placed somewhere in the PATH. DEPENDENCIES This module requires these other modules and libraries: Perl 5.8.0 or later Class::Accesor The OpenSP library v1.5.2 or later, see http://openjade.sf.net/ Note that you need the header files and link libraries included in "-dev" packages for OpenSP, not just the command line applications. COPYRIGHT AND LICENCE Copyright (c) 2006-2008 by Bjoern Hoehrmann This module is licensed under the same terms as Perl itself. SGML-Parser-OpenSP-0.994/samples/0000700000175300017630000000000011031762310015312 5ustar bjoerncpanSGML-Parser-OpenSP-0.994/samples/no-doctype.xml0000700000175300017630000000003110120233676020120 0ustar bjoerncpanSGML-Parser-OpenSP-0.994/samples/test.soc0000700000175300017630000000014310121067555017010 0ustar bjoerncpanSGMLDECL "xml.dcl" OVERRIDE YES PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "xhtml1-strict-s.dtd" SGML-Parser-OpenSP-0.994/samples/xhtml1-strict-s.dtd0000700000175300017630000004204210121066355021004 0ustar bjoerncpanSGML-Parser-OpenSP-0.994/samples/xml.dcl0000700000175300017630000001635710121066355016622 0ustar bjoerncpan" PIC "?>" SHORTREF NONE NAMES SGMLREF QUANTITY NONE -- Quantities are not restricted in XML -- ENTITIES "amp" 38 "lt" 60 "gt" 62 "quot" 34 "apos" 39 FEATURES MINIMIZE DATATAG NO OMITTAG NO RANK NO SHORTTAG STARTTAG EMPTY NO UNCLOSED NO NETENABL IMMEDNET ENDTAG EMPTY NO UNCLOSED NO ATTRIB DEFAULT YES OMITNAME NO VALUE NO EMPTYNRM YES IMPLYDEF ATTLIST NO -- VALID: was YES -- DOCTYPE NO ELEMENT NO -- VALID: was YES -- ENTITY NO NOTATION NO -- VALID: was YES -- LINK SIMPLE NO IMPLICIT NO EXPLICIT NO OTHER CONCUR NO SUBDOC NO FORMAL NO URN NO KEEPRSRE YES VALIDITY TYPE -- VALID: was NOASSERT -- ENTITIES REF ANY INTEGRAL YES APPINFO NONE SEEALSO "ISO 8879//NOTATION Extensible Markup Language (XML) 1.0//EN" > SGML-Parser-OpenSP-0.994/t/0000700000175300017630000000000011031762310014111 5ustar bjoerncpanSGML-Parser-OpenSP-0.994/t/01basic.t0000700000175300017630000000161110143274222015525 0ustar bjoerncpan# 01basic.t -- ... # # $Id: 01basic.t,v 1.2 2004/11/07 02:21:22 hoehrmann Exp $ use strict; use warnings; use Test::More tests => 4; use Test::Exception; use File::Spec qw(); use constant NO_DOCTYPE => File::Spec->catfile('samples', 'no-doctype.xml'); use constant TEST_CATALOG => File::Spec->catfile('samples', 'test.soc'); BEGIN { use_ok('SGML::Parser::OpenSP') }; require_ok('SGML::Parser::OpenSP'); my $p = SGML::Parser::OpenSP->new; isa_ok($p, 'SGML::Parser::OpenSP'); can_ok($p, qw/ handler show_open_entities show_open_elements show_error_numbers output_comment_decls output_marked_sections output_general_entities map_catalog_document restrict_file_reading warnings catalogs search_dirs include_params active_links parse split_message pass_file_descriptor parse_string /); SGML-Parser-OpenSP-0.994/t/02integrity.t0000700000175300017630000000137110121527177016474 0ustar bjoerncpan# 02integrity.t -- ... # # $Id: 02integrity.t,v 1.1 2004/09/14 08:40:31 hoehrmann Exp $ use strict; use warnings; use Test::More tests => 5; use Test::Exception; use File::Spec qw(); use constant NO_DOCTYPE => File::Spec->catfile('samples', 'no-doctype.xml'); use constant TEST_CATALOG => File::Spec->catfile('samples', 'test.soc'); BEGIN { use_ok('SGML::Parser::OpenSP') }; require_ok('SGML::Parser::OpenSP'); my $p = SGML::Parser::OpenSP->new; isa_ok($p, 'SGML::Parser::OpenSP'); ######################################################### ## XS integrity ######################################################### ok(exists $p->{__o}, "pointer to C++ object"); isnt($p->{__o}, 0, "C++ object pointer not null-pointer"); SGML-Parser-OpenSP-0.994/t/03exceptions.t0000700000175300017630000000157210121527177016643 0ustar bjoerncpan# 03exceptions.t -- ... # # $Id: 03exceptions.t,v 1.1 2004/09/14 08:40:31 hoehrmann Exp $ use strict; use warnings; use Test::More tests => 6; use Test::Exception; use File::Spec qw(); use constant NO_DOCTYPE => File::Spec->catfile('samples', 'no-doctype.xml'); use constant TEST_CATALOG => File::Spec->catfile('samples', 'test.soc'); BEGIN { use_ok('SGML::Parser::OpenSP') }; require_ok('SGML::Parser::OpenSP'); my $p = SGML::Parser::OpenSP->new; isa_ok($p, 'SGML::Parser::OpenSP'); ######################################################### ## Exceptions ######################################################### dies_ok { $p->get_location } 'must die when calling get_location while not parsing'; dies_ok { $p->parse } 'must die when no file name specified for parse'; dies_ok { $p->parse(NO_DOCTYPE) } 'must die when no handler specified'; SGML-Parser-OpenSP-0.994/t/04basicparse.t0000700000175300017630000000203010121527177016565 0ustar bjoerncpan# 04basicparse.t -- ... # # $Id: 04basicparse.t,v 1.1 2004/09/14 08:40:31 hoehrmann Exp $ use strict; use warnings; use Test::More tests => 6; use Test::Exception; use File::Spec qw(); use constant NO_DOCTYPE => File::Spec->catfile('samples', 'no-doctype.xml'); use constant TEST_CATALOG => File::Spec->catfile('samples', 'test.soc'); BEGIN { use_ok('SGML::Parser::OpenSP') }; require_ok('SGML::Parser::OpenSP'); my $p = SGML::Parser::OpenSP->new; isa_ok($p, 'SGML::Parser::OpenSP'); ######################################################### ## Accessors ######################################################### $p->handler(7); is($p->handler, 7, 'accessor'); ######################################################### ## More Exceptions ######################################################### dies_ok { $p->parse(NO_DOCTYPE) } 'must die when handler not an object'; $p->handler(bless{}, 'NullHandler'); lives_ok { $p->parse(NO_DOCTYPE) } 'must not die when handler cannot handle a method'; SGML-Parser-OpenSP-0.994/t/05basichandler.t0000700000175300017630000000361210121527177017100 0ustar bjoerncpan# 05basichandler.t -- ... # # $Id: 05basichandler.t,v 1.1 2004/09/14 08:40:31 hoehrmann Exp $ use strict; use warnings; use Test::More tests => 14; use Test::Exception; use File::Spec qw(); use constant NO_DOCTYPE => File::Spec->catfile('samples', 'no-doctype.xml'); use constant TEST_CATALOG => File::Spec->catfile('samples', 'test.soc'); BEGIN { use_ok('SGML::Parser::OpenSP') }; require_ok('SGML::Parser::OpenSP'); my $p = SGML::Parser::OpenSP->new; isa_ok($p, 'SGML::Parser::OpenSP'); ######################################################### ## Simple Event Handler ######################################################### sub TestHandler1::new { bless{ok1=>0,ok2=>0,ok3=>0,ok4=>0,ok5=>0, ok6=>0,ok7=>0,ok8=>0,ok9=>0,oka=>0},shift } sub TestHandler1::start_element { my $s = shift; my $e = shift; return unless defined $s; return unless defined $e; $s->{ok1}++ if UNIVERSAL::isa($s, 'TestHandler1'); # Name $s->{ok2}++ if exists $e->{Name}; $s->{ok3}++ if $e->{Name} =~ /no-doctype/i; # Attributes $s->{ok4}++ if exists $e->{Attributes}; $s->{ok5}++ if UNIVERSAL::isa($e->{Attributes}, "HASH"); $s->{ok6}++ if scalar(keys(%{$_[1]->{Attributes}})) == 0; # Included $s->{ok7}++ if exists $e->{Included}; $s->{ok8}++ if $e->{Included} == 0; # ContentType $s->{ok9}++ if exists $e->{ContentType}; } my $h1 = TestHandler1->new; isa_ok($h1, 'TestHandler1'); $p->handler($h1); lives_ok { $p->parse(NO_DOCTYPE) } 'basic parser test'; ok($h1->{ok1}, 'self to handler'); ok($h1->{ok2}, 'has name'); ok($h1->{ok3}, 'proper name'); ok($h1->{ok4}, 'has attrs'); ok($h1->{ok5}, 'attrs hash ref'); ok($h1->{ok6}, 'proper attrs'); ok($h1->{ok7}, 'has included'); ok($h1->{ok8}, 'included == 0'); ok($h1->{ok9}, 'has content type'); SGML-Parser-OpenSP-0.994/t/06parseliteral.t0000700000175300017630000000361010121527177017147 0ustar bjoerncpan# 06parseliteral.t -- ... # # $Id: 06parseliteral.t,v 1.1 2004/09/14 08:40:31 hoehrmann Exp $ use strict; use warnings; use Test::More tests => 13; use Test::Exception; use File::Spec qw(); use constant NO_DOCTYPE => File::Spec->catfile('samples', 'no-doctype.xml'); use constant TEST_CATALOG => File::Spec->catfile('samples', 'test.soc'); BEGIN { use_ok('SGML::Parser::OpenSP') }; require_ok('SGML::Parser::OpenSP'); my $p = SGML::Parser::OpenSP->new; isa_ok($p, 'SGML::Parser::OpenSP'); ######################################################### ## Read from a ######################################################### sub TestHandler1::new { bless{ok1=>0,ok2=>0,ok3=>0,ok4=>0,ok5=>0, ok6=>0,ok7=>0,ok8=>0,ok9=>0,oka=>0},shift } sub TestHandler1::start_element { my $s = shift; my $e = shift; return unless defined $s; return unless defined $e; $s->{ok1}++ if UNIVERSAL::isa($s, 'TestHandler1'); # Name $s->{ok2}++ if exists $e->{Name}; $s->{ok3}++ if $e->{Name} =~ /no-doctype/i; # Attributes $s->{ok4}++ if exists $e->{Attributes}; $s->{ok5}++ if UNIVERSAL::isa($e->{Attributes}, "HASH"); $s->{ok6}++ if scalar(keys(%{$_[1]->{Attributes}})) == 0; # Included $s->{ok7}++ if exists $e->{Included}; $s->{ok8}++ if $e->{Included} == 0; # ContentType $s->{ok9}++ if exists $e->{ContentType}; } my $h1 = TestHandler1->new; $p->handler($h1); lives_ok { $p->parse("") } 'reading from a '; ok($h1->{ok1}, 'self to handler'); ok($h1->{ok2}, 'has name'); ok($h1->{ok3}, 'proper name'); ok($h1->{ok4}, 'has attrs'); ok($h1->{ok5}, 'attrs hash ref'); ok($h1->{ok6}, 'proper attrs'); ok($h1->{ok7}, 'has included'); ok($h1->{ok8}, 'included == 0'); ok($h1->{ok9}, 'has content type'); SGML-Parser-OpenSP-0.994/t/07defaults.t0000700000175300017630000000174210121527177016274 0ustar bjoerncpan# 07defaults.t -- ... # # $Id: 07defaults.t,v 1.1 2004/09/14 08:40:31 hoehrmann Exp $ use strict; use warnings; use Test::More tests => 6; use Test::Exception; use File::Spec qw(); use constant NO_DOCTYPE => File::Spec->catfile('samples', 'no-doctype.xml'); use constant TEST_CATALOG => File::Spec->catfile('samples', 'test.soc'); BEGIN { use_ok('SGML::Parser::OpenSP') }; require_ok('SGML::Parser::OpenSP'); my $p = SGML::Parser::OpenSP->new; isa_ok($p, 'SGML::Parser::OpenSP'); ######################################################### ## Comments not default ######################################################### sub TestHandler2::new { bless{ok1=>0},shift } sub TestHandler2::comment { $_->{ok1}-- } my $h2 = TestHandler2->new; isa_ok($h2, 'TestHandler2'); $p->handler($h2); lives_ok { $p->parse("") } 'comments not reported by default'; is($h2->{ok1}, 0, 'comments not default'); SGML-Parser-OpenSP-0.994/t/08comments.t0000700000175300017630000000223110121527177016305 0ustar bjoerncpan# 08comments.t -- ... # # $Id: 08comments.t,v 1.1 2004/09/14 08:40:31 hoehrmann Exp $ use strict; use warnings; use Test::More tests => 8; use Test::Exception; use File::Spec qw(); use constant NO_DOCTYPE => File::Spec->catfile('samples', 'no-doctype.xml'); use constant TEST_CATALOG => File::Spec->catfile('samples', 'test.soc'); BEGIN { use_ok('SGML::Parser::OpenSP') }; require_ok('SGML::Parser::OpenSP'); my $p = SGML::Parser::OpenSP->new; isa_ok($p, 'SGML::Parser::OpenSP'); ######################################################### ## Comments at user option ######################################################### sub TestHandler3::new { bless{ok=>0},shift } sub TestHandler3::comment_decl{ $_[0]->{ok}++ } my $h3 = TestHandler3->new(1); isa_ok($h3, 'TestHandler3'); $p->handler($h3); $p->output_comment_decls(1); is($p->output_comment_decls, 1, 'comments turned on'); lives_ok { $p->parse("") } 'comment reported at user option'; isnt($h3->{ok}, 0, 'comments ok'); $p->output_comment_decls(0); is($p->output_comment_decls, 0, 'comments turned off'); SGML-Parser-OpenSP-0.994/t/09locations.t0000700000175300017630000000325310121527177016461 0ustar bjoerncpan# 09locations.t -- ... # # $Id: 09locations.t,v 1.1 2004/09/14 08:40:31 hoehrmann Exp $ use strict; use warnings; use Test::More tests => 8; use Test::Exception; use File::Spec qw(); use constant NO_DOCTYPE => File::Spec->catfile('samples', 'no-doctype.xml'); use constant TEST_CATALOG => File::Spec->catfile('samples', 'test.soc'); BEGIN { use_ok('SGML::Parser::OpenSP') }; require_ok('SGML::Parser::OpenSP'); my $p = SGML::Parser::OpenSP->new; isa_ok($p, 'SGML::Parser::OpenSP'); ######################################################### ## Locations for implied document type declarations ######################################################### # OpenSP 1.5.1 generates random numbers for the locations sub TestHandler4::new { bless{p=>$_[1],ok1=>0,ok2=>0,ok3=>0,ok4=>0},shift } sub TestHandler4::start_dtd { my $s = shift; return unless defined $s; my $l = $s->{p}->get_location; return unless defined $l; $s->{ok1}++ if $l->{ColumnNumber} == 3; $s->{ok2}++ if $l->{LineNumber} == 2; $s->{ok3}++ if $l->{EntityOffset} == 7; } sub TestHandler4::end_dtd { my $s = shift; return unless defined $s; my $l = $s->{p}->get_location; return unless defined $l; $s->{ok4}++ if $l->{ColumnNumber} == 3; $s->{ok4}++ if $l->{LineNumber} == 2; $s->{ok4}++ if $l->{EntityOffset} == 7; } my $h4 = TestHandler4->new($p); $p->handler($h4); lives_ok { $p->parse("\n \n ") } 'implied dtd locations'; is($h4->{ok1}, 1, "implied col"); is($h4->{ok2}, 1, "implied line"); is($h4->{ok3}, 1, "implied offset"); is($h4->{ok4}, 3, "implied end_dtd"); SGML-Parser-OpenSP-0.994/t/10errors.t0000700000175300017630000000216010121527177015766 0ustar bjoerncpan# 10errors.t -- ... # # $Id: 10errors.t,v 1.1 2004/09/14 08:40:31 hoehrmann Exp $ use strict; use warnings; use Test::More tests => 5; use Test::Exception; use File::Spec qw(); use constant NO_DOCTYPE => File::Spec->catfile('samples', 'no-doctype.xml'); use constant TEST_CATALOG => File::Spec->catfile('samples', 'test.soc'); BEGIN { use_ok('SGML::Parser::OpenSP') }; require_ok('SGML::Parser::OpenSP'); my $p = SGML::Parser::OpenSP->new; isa_ok($p, 'SGML::Parser::OpenSP'); ######################################################### ## Error reporting ######################################################### sub TestHandler5::new { bless{ok=>0},shift } sub TestHandler5::error { return unless @_ == 2; $_[0]->{ok}++ if $_[1]->{Message} =~ /:4:13:E:/; } my $h5 = TestHandler5->new; $p->handler($h5); lives_ok { $p->parse("" . <<"__DOC__"); ]> __DOC__ } 'does properly report erros'; is($h5->{ok}, 1, 'found right error message'); SGML-Parser-OpenSP-0.994/t/11parsers.t0000700000175300017630000000223210121527177016132 0ustar bjoerncpan# 11parsers.t -- ... # # $Id: 11parsers.t,v 1.1 2004/09/14 08:40:31 hoehrmann Exp $ use strict; use warnings; use Test::More tests => 85; use Test::Exception; use File::Spec qw(); use constant NO_DOCTYPE => File::Spec->catfile('samples', 'no-doctype.xml'); use constant TEST_CATALOG => File::Spec->catfile('samples', 'test.soc'); BEGIN { use_ok('SGML::Parser::OpenSP') }; require_ok('SGML::Parser::OpenSP'); my $p = SGML::Parser::OpenSP->new; isa_ok($p, 'SGML::Parser::OpenSP'); ######################################################### ## Lots of parsers ######################################################### my @parser; for (1..20) { my $p = SGML::Parser::OpenSP->new; isa_ok($p, 'SGML::Parser::OpenSP'); ok(exists $p->{__o}, 'pointer to C++ object'); isnt($p->{__o}, 0, 'C++ object pointer not null-pointer'); $p->handler(bless{},'TestHandler6'); lives_ok { $p->parse("") } 'reading from a '; push @parser, $p; } is(scalar(@parser), 20, 'all parsers loaded'); lives_ok { undef @parser } 'parser destructors'; SGML-Parser-OpenSP-0.994/t/12utf8.t0000700000175300017630000000541510121527177015350 0ustar bjoerncpan# 12utf8.t -- ... # # $Id: 12utf8.t,v 1.1 2004/09/14 08:40:31 hoehrmann Exp $ use strict; use warnings; use Test::More tests => 17; use Test::Exception; use File::Spec qw(); use Encode qw(); use constant NO_DOCTYPE => File::Spec->catfile('samples', 'no-doctype.xml'); use constant TEST_CATALOG => File::Spec->catfile('samples', 'test.soc'); BEGIN { use_ok('SGML::Parser::OpenSP') }; require_ok('SGML::Parser::OpenSP'); my $p = SGML::Parser::OpenSP->new; isa_ok($p, 'SGML::Parser::OpenSP'); ######################################################### ## UTF-8 flags ######################################################### sub TestHandler7::new { bless{ok0=>0,ok1=>0,ok2=>0,ok3=>0,ok4=>0, ok5=>0,ok6=>0,ok7=>0,ok8=>0,ok9=>0, oka=>0,okb=>0,okc=>0, data=>""},shift } sub TestHandler7::start_element { my $s = shift; my $e = shift; return unless defined $s and defined $e; my @k = keys %{$e->{Attributes}}; $s->{ok1}++ if Encode::is_utf8($e->{Name}); $s->{ok2}++ if Encode::is_utf8($e->{Name}, 1); return unless @k; $s->{ok8}++ if @k == 1; $s->{ok9}++ if Encode::is_utf8($k[0]); $s->{oka}++ if Encode::is_utf8($k[0], 1); $s->{okb}++ if Encode::is_utf8($e->{Attributes}{$k[0]}->{Name}); $s->{okc}++ if Encode::is_utf8($e->{Attributes}{$k[0]}->{Name}, 1); } sub TestHandler7::end_element { my $s = shift; my $e = shift; return unless defined $s and defined $e; $s->{ok3}++ if Encode::is_utf8($e->{Name}); $s->{ok4}++ if Encode::is_utf8($e->{Name}, 1); $s->{ok5}++ if Encode::is_utf8($s->{data}); $s->{ok6}++ if Encode::is_utf8($s->{data}, 1); $s->{ok7}++ if $s->{data} =~ /^Bj\x{F6}rn$/; } sub TestHandler7::data { my $s = shift; my $e = shift; return unless defined $s and defined $e; return unless exists $e->{Data}; $s->{ok0}-- unless Encode::is_utf8($e->{Data}); $s->{ok0}-- unless Encode::is_utf8($e->{Data}, 1); $s->{data}.=$e->{Data}; } my $h7 = TestHandler7->new; $p->handler($h7); lives_ok { $p->parse("Björn") } 'utf8 flags'; is($h7->{ok0}, 0, 'utf8 pcdata'); is($h7->{ok1}, 1, 'utf8 element name'); is($h7->{ok2}, 1, 'utf8 element name check'); is($h7->{ok8}, 1, 'attributes'); is($h7->{ok9}, 1, 'attribute hash key utf8'); is($h7->{oka}, 1, 'attribute hash key utf8 check'); is($h7->{okb}, 1, 'attribute name utf8'); is($h7->{okc}, 1, 'attribute name utf8 check'); is($h7->{ok3}, 1, 'end element name'); is($h7->{ok4}, 1, 'end element name'); is($h7->{ok5}, 1, 'complete data'); is($h7->{ok6}, 1, 'complete data'); is($h7->{ok7}, 1, 'correct data'); SGML-Parser-OpenSP-0.994/t/13restricted.t0000700000175300017630000000362010121527177016627 0ustar bjoerncpan# 13restricted.t -- ... # # $Id: 13restricted.t,v 1.1 2004/09/14 08:40:31 hoehrmann Exp $ use strict; use warnings; use Test::More tests => 12; use Test::Exception; use File::Spec qw(); use constant NO_DOCTYPE => File::Spec->catfile('samples', 'no-doctype.xml'); use constant TEST_CATALOG => File::Spec->catfile('samples', 'test.soc'); BEGIN { use_ok('SGML::Parser::OpenSP') }; require_ok('SGML::Parser::OpenSP'); my $p = SGML::Parser::OpenSP->new; isa_ok($p, 'SGML::Parser::OpenSP'); ######################################################### ## restricted reading ######################################################### sub TestHandler8::new{bless{ok1=>0,ok2=>0},shift} sub TestHandler8::error { my $s = shift; my $e = shift; return unless defined $s and defined $e; $s->{ok2}++ if $e->{Message} =~ /^E:\s+/ and $e->{Type} eq 'otherError'; } sub TestHandler8::start_element{shift->{ok1}--} my $h8 = TestHandler8->new; $p->handler($h8); $p->restrict_file_reading(1); lives_ok { $p->parse("samples/../samples/no-doctype.xml") } 'must not read paths with ..'; is($h8->{ok1}, 0, 'must not read paths with ..'); isnt($h8->{ok2}, 0, 'must not read paths with ..'); $h8->{ok1} = 0; $h8->{ok2} = 0; lives_ok { $p->parse("./samples/no-doctype.xml") } 'must not read paths with ./'; is($h8->{ok1}, 0, 'must not read paths with ./'); isnt($h8->{ok2}, 0, 'must not read paths with ./'); $h8->{ok1} = 0; $h8->{ok2} = 0; my $sd = File::Spec->catfile(File::Spec->rel2abs('.'), 'samples'); $p->search_dirs($sd); lives_ok { $p->parse(File::Spec->catfile($sd, 'no-doctype.xml')) } 'allow to read sample dir in restricted mode'; isnt($h8->{ok1}, 0, 'allow to read sample dir in restricted mode'); is($h8->{ok2}, 0, 'allow to read sample dir in restricted mode'); $p->search_dirs([]); $p->restrict_file_reading(0); SGML-Parser-OpenSP-0.994/t/14illegalparse.t0000700000175300017630000000204710121527177017126 0ustar bjoerncpan# 14illegalparse.t -- ... # # $Id: 14illegalparse.t,v 1.1 2004/09/14 08:40:31 hoehrmann Exp $ use strict; use warnings; use Test::More tests => 5; use Test::Exception; use File::Spec qw(); use constant NO_DOCTYPE => File::Spec->catfile('samples', 'no-doctype.xml'); use constant TEST_CATALOG => File::Spec->catfile('samples', 'test.soc'); BEGIN { use_ok('SGML::Parser::OpenSP') }; require_ok('SGML::Parser::OpenSP'); my $p = SGML::Parser::OpenSP->new; isa_ok($p, 'SGML::Parser::OpenSP'); ######################################################### ## parse from handler ######################################################### sub TestHandler9::new{bless{p=>$_[1],ok1=>0},shift} sub TestHandler9::start_element { my $s = shift; eval { $s->{p}->parse(NO_DOCTYPE) }; $s->{ok1}-- unless $@; } my $h9 = TestHandler9->new($p); $p->handler($h9); lives_ok { $p->parse(NO_DOCTYPE) } 'parse must not be called from handler'; is($h9->{ok1}, 0, 'parse from handler croaks'); SGML-Parser-OpenSP-0.994/t/15parseinput.t0000700000175300017630000000177210121527177016661 0ustar bjoerncpan# 15parseinput.t -- ... # # $Id: 15parseinput.t,v 1.1 2004/09/14 08:40:31 hoehrmann Exp $ use strict; use warnings; use Test::More tests => 8; use Test::Exception; use File::Spec qw(); use constant NO_DOCTYPE => File::Spec->catfile('samples', 'no-doctype.xml'); use constant TEST_CATALOG => File::Spec->catfile('samples', 'test.soc'); BEGIN { use_ok('SGML::Parser::OpenSP') }; require_ok('SGML::Parser::OpenSP'); my $p = SGML::Parser::OpenSP->new; isa_ok($p, 'SGML::Parser::OpenSP'); ######################################################### ## non-scalar to parse ######################################################### sub TestHandler10::new{bless{},shift} my $h10 = TestHandler10->new; $p->handler($h10); dies_ok { $p->parse({}) } 'non-scalar to parse'; dies_ok { $p->parse([]) } 'non-scalar to parse'; ok(open(F, '<', NO_DOCTYPE), 'can open no-doctype.xml'); dies_ok { $p->parse(\*F) } 'file handle to parse'; ok(close(F), 'can close no-doctype.xml'); SGML-Parser-OpenSP-0.994/t/16catalogs.t0000700000175300017630000000405010347072063016254 0ustar bjoerncpan# 16catalogs.t -- ... # # $Id: 16catalogs.t,v 1.3 2005/12/11 19:47:15 tbe Exp $ use strict; use warnings; use Test::More tests => 9; use Test::Exception; use File::Spec qw(); use constant NO_DOCTYPE => File::Spec->catfile('samples', 'no-doctype.xml'); use constant TEST_CATALOG => File::Spec->catfile('samples', 'test.soc'); BEGIN { use_ok('SGML::Parser::OpenSP') }; require_ok('SGML::Parser::OpenSP'); my $p = SGML::Parser::OpenSP->new; isa_ok($p, 'SGML::Parser::OpenSP'); ######################################################### ## SGML Catalogs ######################################################### sub TestHandler11::new{bless{ok1=>0,ok2=>0,ok3=>0,ok4=>0,ok5=>0},shift} sub TestHandler11::start_dtd { my $s = shift; my $d = shift; return unless defined $s; return unless defined $d; my $e = $d->{ExternalId}; return unless defined $e; $s->{ok1}++; $s->{ok2}++ if exists $e->{SystemId} and $e->{SystemId} eq "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"; $s->{ok3}++ if exists $e->{PublicId} and $e->{PublicId} eq "-//W3C//DTD XHTML 1.0 Strict//EN"; # this might fail in case of conflicting catalogs :-( $s->{ok4}++ if exists $e->{GeneratedSystemId} and $e->{GeneratedSystemId} =~ /^| /i; $s->{ok5}++ if exists $d->{Name} and $d->{Name} eq "html"; } my $h11 = TestHandler11->new; $p->catalogs(TEST_CATALOG); $p->handler($h11); lives_ok { $p->parse("" . <<"__DOC__");

...

__DOC__ } 'catalogs'; ok($h11->{ok1}, 'proper dtd event'); ok($h11->{ok2}, 'proper sys id'); ok($h11->{ok3}, 'proper pub id'); ok($h11->{ok4}, 'proper osfile gen id'); ok($h11->{ok5}, 'proper root element'); # reset catalogs $p->catalogs([]); SGML-Parser-OpenSP-0.994/t/17splitmessage.t0000700000175300017630000001047210127363157017170 0ustar bjoerncpan# 17splitmessage.t -- ... # # $Id: 17splitmessage.t,v 1.2 2004/10/01 23:21:19 hoehrmann Exp $ use strict; use warnings; use Test::More tests => 18; use Test::Exception; use File::Spec qw(); use constant NO_DOCTYPE => File::Spec->catfile('samples', 'no-doctype.xml'); use constant TEST_CATALOG => File::Spec->catfile('samples', 'test.soc'); BEGIN { use_ok('SGML::Parser::OpenSP') }; BEGIN { use_ok('SGML::Parser::OpenSP::Tools') }; require_ok('SGML::Parser::OpenSP'); require_ok('SGML::Parser::OpenSP::Tools'); my $p = SGML::Parser::OpenSP->new; isa_ok($p, 'SGML::Parser::OpenSP'); ######################################################### ## newlines in enum attribute ######################################################### sub TestHandler12::new{bless{p=>$_[1],ok1=>0,ok2=>0,ok3=>0,ok4=>0, ok5=>0,ok6=>0,ok7=>0,ok8=>0},shift} sub TestHandler12::error { my $s = shift; my $e = shift; my $p = $s->{p}; return unless defined $s and defined $e and defined $p; my $l = $p->get_location; return unless defined $l; $s->{ok1}++; my $m; eval { $m = $p->split_message($e); }; return if $@; $s->{ok2}++; if ($m->{primary_message}{Number} == 122) { $s->{ok3}++ if $m->{primary_message}{ColumnNumber} == 8; $s->{ok4}++ if $m->{primary_message}{LineNumber} == 8; $s->{ok5}++ if $m->{primary_message}{Text} =~ /.+\n.+/; } elsif ($m->{primary_message}{Number} == 131) { $s->{ok6}++ if $m->{primary_message}{ColumnNumber} == 13; $s->{ok7}++ if $m->{primary_message}{LineNumber} == 8; $s->{ok8}++ if $m->{primary_message}{Text} =~ /.+\n.+/; } } my $h12 = TestHandler12->new($p); $p->handler($h12); $p->catalogs(TEST_CATALOG); $p->show_error_numbers(1); lives_ok { $p->parse("" . <<"__DOC__");

...

__DOC__ } 'newlines in enum attr values'; cmp_ok($h12->{ok1}, '>=', 2, 'two errors'); cmp_ok($h12->{ok2}, '>=', 2, 'two errors split'); ok($h12->{ok3}, 'correct col 122'); ok($h12->{ok4}, 'correct lin 122'); ok($h12->{ok5}, 'correct text 122'); ok($h12->{ok6}, 'correct col 131'); ok($h12->{ok7}, 'correct lin 131'); ok($h12->{ok8}, 'correct text 131'); $p->catalogs([]); $p->show_error_numbers(0); my @tests = ( ### 1 ### { input => [ q(0:116:49:1075801588.108:E: there is no attribute "XMLNS:UTILITY"), q(0), 0, 1, 0, ], output => { primary_message => { Number => '108', ColumnNumber => '49', Module => '1075801588', Severity => 'E', LineNumber => '116', Text => 'there is no attribute "XMLNS:UTILITY"' } } }, ### 2 ### { input => [ q(c:\\temp\\file:116:49:1075801588.108:E: there is no attribute "XMLNS:UTILITY"), q(c:\\temp\\file), 0, 1, 0, ], output => { primary_message => { Number => '108', ColumnNumber => '49', Module => '1075801588', Severity => 'E', LineNumber => '116', Text => 'there is no attribute "XMLNS:UTILITY"' } } }, ### 3 ### { input => [ q(c:\\temp\\file:116:49:E: there is no attribute "XMLNS:UTILITY"), q(c:\\temp\\file), 0, 0, 0, ], output => { primary_message => { ColumnNumber => '49', Severity => 'E', LineNumber => '116', Text => 'there is no attribute "XMLNS:UTILITY"' } } }, ### 4 ### { input => [ q(0:320:175:1075801588.338:W: cannot generate system identifier for general entity "AP"), q(0), 0, 1, 0, ], output => { primary_message => { Number => '338', ColumnNumber => '175', Module => '1075801588', Severity => 'W', LineNumber => '320', Text => 'cannot generate system identifier for general entity "AP"' } } }, ); foreach (@tests) { my $inpu = $_->{input}; my $outp = $_->{output}; my $resu = SGML::Parser::OpenSP::Tools::split_message(@$inpu); is_deeply($resu, $outp); } SGML-Parser-OpenSP-0.994/t/18halt.t0000700000175300017630000000377410121527177015426 0ustar bjoerncpan# 18halt.t -- ... # # $Id: 18halt.t,v 1.1 2004/09/14 08:40:31 hoehrmann Exp $ use strict; use warnings; use Test::More tests => 10; use Test::Exception; use File::Spec qw(); use constant NO_DOCTYPE => File::Spec->catfile('samples', 'no-doctype.xml'); use constant TEST_CATALOG => File::Spec->catfile('samples', 'test.soc'); BEGIN { use_ok('SGML::Parser::OpenSP') }; require_ok('SGML::Parser::OpenSP'); my $p = SGML::Parser::OpenSP->new; isa_ok($p, 'SGML::Parser::OpenSP'); ######################################################### ## normal halt ######################################################### sub TestHandler13::new{bless{p=>$_[1],ok1=>0,ok2=>0},shift} sub TestHandler13::start_element { my $s = shift; my $e = shift; my $o = $s->{p}; return unless defined $s; return unless defined $e; return unless defined $o; $s->{ok1}++; $o->halt; } sub TestHandler13::end_element { my $s = shift; $s->{ok2}--; } my $h13 = TestHandler13->new($p); $p->handler($h13); lives_ok { $p->parse(NO_DOCTYPE); } 'normal halt'; ok($h13->{ok1}, 'halt handler called'); is($h13->{ok2}, 0, 'halt stops events'); ######################################################### ## halt via die in handler ######################################################### sub TestHandler14::new{bless{ok1=>0,ok2=>0},shift} sub TestHandler14::start_element { my $s = shift; my $e = shift; return unless defined $s and defined $e; $s->{ok1}++; die "SUCKS!" } sub TestHandler14::end_element { my $s = shift; my $e = shift; return unless defined $s and defined $e; $s->{ok2}--; } my $h14 = TestHandler14->new; $p->handler($h14); throws_ok { $p->parse(NO_DOCTYPE) } qr/SUCKS!/, 'die in handler propagates'; ok($h14->{ok1}); is($h14->{ok2}, 0, 'die in handler halts'); $p->handler(bless{},'NullHandler'); lives_ok { $p->parse(NO_DOCTYPE) } 'object still usable after die in handler'; SGML-Parser-OpenSP-0.994/t/19refcounting.t0000700000175300017630000000215410121527177017011 0ustar bjoerncpan# 19refcounting.t -- ... # # $Id: 19refcounting.t,v 1.1 2004/09/14 08:40:31 hoehrmann Exp $ use strict; use warnings; use Test::More tests => 5; use Test::Exception; use File::Spec qw(); use constant NO_DOCTYPE => File::Spec->catfile('samples', 'no-doctype.xml'); use constant TEST_CATALOG => File::Spec->catfile('samples', 'test.soc'); BEGIN { use_ok('SGML::Parser::OpenSP') }; require_ok('SGML::Parser::OpenSP'); my $p = SGML::Parser::OpenSP->new; isa_ok($p, 'SGML::Parser::OpenSP'); ######################################################### ## Parser refcounting ######################################################### # this is not exactly what I want, the issue here is that # I would like to tell whether in this cleanup process is # an attempt to free an unreferenced scalar for which Perl # would not croak but write to STDERR lives_ok { my $x = SGML::Parser::OpenSP->new; my $y = \$x; undef $x; undef $y; } 'parser refcounting 1'; lives_ok { my $x = SGML::Parser::OpenSP->new; my $y = \$x; undef $y; undef $x; } 'parser refcounting 2'; SGML-Parser-OpenSP-0.994/t/20passfd.t0000700000175300017630000000266410277704127015750 0ustar bjoerncpan# 20passfd.t -- ... # # $Id: 20passfd.t,v 1.4 2005/08/14 18:07:19 hoehrmann Exp $ use strict; use warnings; use Test::More tests => 7; use Test::Exception; use File::Spec qw(); use constant NO_DOCTYPE => File::Spec->catfile('samples', 'no-doctype.xml'); use constant TEST_CATALOG => File::Spec->catfile('samples', 'test.soc'); BEGIN { use_ok('SGML::Parser::OpenSP') }; require_ok('SGML::Parser::OpenSP'); my $p = SGML::Parser::OpenSP->new; isa_ok($p, 'SGML::Parser::OpenSP'); sub TestHandler20::new { bless{ok1=>0},shift } sub TestHandler20::start_element { shift->{ok1}++ } sub TestHandler21::new { bless{ok1=>0},shift } sub TestHandler21::start_element { shift->{ok1}++ } # # Check pass as filename (should work on all platforms). $p = SGML::Parser::OpenSP->new; my $h1 = TestHandler20->new; $p->handler($h1); $p->pass_file_descriptor(0); lives_ok { $p->parse_string("") } 'parse_string with temp file name'; is($h1->{ok1}, 1, "temp file name handler called"); undef $p; # # Check pass as file descriptor (not on Win32). SKIP: { skip 'passing fds for temp files not supported on Win32', 2 if $^O eq 'MSWin32'; $p = SGML::Parser::OpenSP->new; my $h2 = TestHandler21->new; $p->handler($h2); $p->pass_file_descriptor(1); lives_ok { $p->parse_string("") } 'parse by fd'; is($h2->{ok1}, 1, "temp file descriptor handler called"); undef $p; } SGML-Parser-OpenSP-0.994/t/21parsestring.t0000700000175300017630000000361210402765037017021 0ustar bjoerncpan# 06parseliteral.t -- ... # # $Id: 21parsestring.t,v 1.1 2006/03/06 08:51:59 tbe Exp $ use strict; use warnings; use Test::More tests => 13; use Test::Exception; use File::Spec qw(); use constant NO_DOCTYPE => File::Spec->catfile('samples', 'no-doctype.xml'); use constant TEST_CATALOG => File::Spec->catfile('samples', 'test.soc'); BEGIN { use_ok('SGML::Parser::OpenSP') }; require_ok('SGML::Parser::OpenSP'); my $p = SGML::Parser::OpenSP->new; isa_ok($p, 'SGML::Parser::OpenSP'); ######################################################### ## Parse using ->parse_string(). ######################################################### sub TestHandler1::new { bless{ok1=>0,ok2=>0,ok3=>0,ok4=>0,ok5=>0, ok6=>0,ok7=>0,ok8=>0,ok9=>0,oka=>0},shift } sub TestHandler1::start_element { my $s = shift; my $e = shift; return unless defined $s; return unless defined $e; $s->{ok1}++ if UNIVERSAL::isa($s, 'TestHandler1'); # Name $s->{ok2}++ if exists $e->{Name}; $s->{ok3}++ if $e->{Name} =~ /no-doctype/i; # Attributes $s->{ok4}++ if exists $e->{Attributes}; $s->{ok5}++ if UNIVERSAL::isa($e->{Attributes}, "HASH"); $s->{ok6}++ if scalar(keys(%{$_[1]->{Attributes}})) == 0; # Included $s->{ok7}++ if exists $e->{Included}; $s->{ok8}++ if $e->{Included} == 0; # ContentType $s->{ok9}++ if exists $e->{ContentType}; } my $h1 = TestHandler1->new; $p->handler($h1); lives_ok { $p->parse_string("") } 'parsing with parse_string()'; ok($h1->{ok1}, 'self to handler'); ok($h1->{ok2}, 'has name'); ok($h1->{ok3}, 'proper name'); ok($h1->{ok4}, 'has attrs'); ok($h1->{ok5}, 'attrs hash ref'); ok($h1->{ok6}, 'proper attrs'); ok($h1->{ok7}, 'has included'); ok($h1->{ok8}, 'included == 0'); ok($h1->{ok9}, 'has content type'); SGML-Parser-OpenSP-0.994/t/22mwarnings.t0000700000175300017630000000333310524065205016461 0ustar bjoerncpan# 10errors.t -- ... # # $Id: 22mwarnings.t,v 1.1 2006/11/07 11:14:13 hoehrmann Exp $ use strict; use warnings; use Test::More tests => 7; use Test::Exception; use File::Spec qw(); use constant NO_DOCTYPE => File::Spec->catfile('samples', 'no-doctype.xml'); use constant TEST_CATALOG => File::Spec->catfile('samples', 'test.soc'); BEGIN { use_ok('SGML::Parser::OpenSP') }; require_ok('SGML::Parser::OpenSP'); my $p = SGML::Parser::OpenSP->new; isa_ok($p, 'SGML::Parser::OpenSP'); ######################################################### ## Error reporting ######################################################### sub TestHandler5::new { bless{ok=>0},shift } sub TestHandler5::error { return unless @_ == 2; $_[0]->{ok}++ if $_[1]->{Message} =~ /:8:7:W:/; } my $h5 = TestHandler5->new; $p->catalogs(TEST_CATALOG); $p->warnings('xml'); $p->handler($h5); lives_ok { $p->parse("" . <<"__DOC__");

foo & bar

__DOC__ } 'ampersand as data'; is($h5->{ok}, 1, 'ampersand as data generates warning'); # special case $p->warnings(qw/non-sgml-char-ref valid no-duplicate xml/); lives_ok { $p->parse("" . <<"__DOC__");

foo & bar

__DOC__ } 'ampersand as data'; is($h5->{ok}, 2, 'ampersand as data generates warning'); SGML-Parser-OpenSP-0.994/t/98podsyn.t0000700000175300017630000000041510347064630016007 0ustar bjoerncpan# 99pod.t -- Minimally check POD for problems. # # $Id: 98podsyn.t,v 1.1 2005/12/11 19:02:00 tbe Exp $ use strict; use warnings; use Test::More; eval "use Test::Pod 1.00"; plan skip_all => "Test::Pod 1.00 required for testing POD" if $@; all_pod_files_ok(); SGML-Parser-OpenSP-0.994/t/99podcov.t0000700000175300017630000000060110347064630015763 0ustar bjoerncpan# 99pod.t -- Minimally check POD for code coverage. # # $Id: 99podcov.t,v 1.1 2005/12/11 19:02:00 tbe Exp $ use strict; use warnings; use Test::More; eval "use Test::Pod::Coverage"; plan skip_all => "Test::Pod::Coverage required for testing pod coverage" if $@; plan tests => 2; pod_coverage_ok('SGML::Parser::OpenSP'); pod_coverage_ok('SGML::Parser::OpenSP::Tools'); SGML-Parser-OpenSP-0.994/typemap0000700000175300017630000000100410277677260015272 0ustar bjoerncpanTYPEMAP SgmlParserOpenSP* SgmlParserOpenSPObj INPUT SgmlParserOpenSPObj if ($arg == NULL || !sv_isobject($arg)) croak(\"not a proper SGML::Parser::OpenSP object\\n\"); SV** svp = hv_fetch((HV*)SvRV($arg), \"__o\", 3, 0); if (!svp || !*svp) croak(\"not a proper SGML::Parser::OpenSP object\\n\"); $var = ($type)SvIV(*svp); if (!$var) croak(\"not a proper SGML::Parser::OpenSP object\\n\"); $var -> m_self = $arg;