SGML-DTDParse-2.00/0040755004705000001440000000000010266305433012627 5ustar ehoodusersSGML-DTDParse-2.00/lib/0040755004705000001440000000000010266305433013375 5ustar ehoodusersSGML-DTDParse-2.00/lib/SGML/0040755004705000001440000000000010266305433014137 5ustar ehoodusersSGML-DTDParse-2.00/lib/SGML/DTDParse/0040755004705000001440000000000010266305433015545 5ustar ehoodusersSGML-DTDParse-2.00/lib/SGML/DTDParse/Tokenizer.pm0100644004705000001440000001076610261624166020066 0ustar ehoodusers# -*- Perl -*- package SGML::DTDParse::Tokenizer; use strict; use vars qw($VERSION $CVS); $VERSION = do { my @r=(q$Revision: 2.1 $=~/\d+/g); sprintf "%d."."%03d"x$#r,@r }; $CVS = '$Id: Tokenizer.pm,v 2.1 2005/07/02 23:51:18 ehood Exp $ '; use strict; use Text::DelimMatch; require 5.000; require Carp; { package SGML::DTDParse::Tokenizer::Group; sub new { my($type, $cm) = @_; my($class) = ref($type) || $type; my($self) = {}; bless $self, $class; die "Bad call to SGML::DTDParse::Tokenizer::Group: $cm\n" if $cm !~ /^\((.*)\)(.?)$/s; $self->{'OCCURRENCE'} = $2; $self->{'CONTENT_MODEL'} = new SGML::DTDParse::Tokenizer $1, 1; return $self; } sub print { my($self, $depth) = @_; print "\t" x $depth, "(\n"; $self->{'CONTENT_MODEL'}->print($depth+1); print "\t" x $depth, ")\n"; } } { package SGML::DTDParse::Tokenizer::Element; sub new { my($type, $elem) = @_; my($class) = ref($type) || $type; my($self) = {}; bless $self, $class; die "Bad call to SGML::DTDParse::Tokenizer::Element: $elem\n" if $elem !~ /^(\S+?)([\*\?\+]?)$/s; $self->{'ELEMENT'} = $1; $self->{'OCCURRENCE'} = $2; return $self; } sub print { my($self, $depth) = @_; print "\t" x $depth, $self->{'ELEMENT'}, $self->{'OCCURRENCE'}, "\n"; } } { package SGML::DTDParse::Tokenizer::ParameterEntity; sub new { my($type, $pe) = @_; my($class) = ref($type) || $type; my($self) = {}; bless $self, $class; die "Bad call to SGML::DTDParse::Tokenizer::ParameterEntity: $pe\n" if $pe !~ /^(\S+)$/s; $self->{'PARAMETER_ENTITY'} = $1; return $self; } sub print { my($self, $depth) = @_; print "\t" x $depth, "%", $self->{'PARAMETER_ENTITY'}, ";\n"; } } { package SGML::DTDParse::Tokenizer::Connector; sub new { my($type, $con) = @_; my($class) = ref($type) || $type; my($self) = {}; bless $self, $class; die "Bad call to SGML::DTDParse::Tokenizer::Connector: $con\n" if $con !~ /^[\,\|\&]$/s; $self->{'CONNECTOR'} = $con; return $self; } sub print { my($self, $depth) = @_; print "\t" x $depth, $self->{'CONNECTOR'}, "\n"; } } sub new { my($type, $cm, $internal) = @_; my($class) = ref($type) || $type; my($self) = {}; my(@model) = (); bless $self, $class; $self->{'CONTENT_MODEL_STRING'} = $cm; # print "-->$cm\n"; if ($cm =~ /(.*?)\s\-(\(.*)$/) { my($excl) = $2; my($exclcm) = new SGML::DTDParse::Tokenizer $excl; $self->{'EXCLUSION'} = $exclcm; $cm = $1; } if ($cm =~ /(.*?)\s\+(\(.*)$/) { my($incl) = $2; my($inclcm) = new SGML::DTDParse::Tokenizer $incl; $self->{'INCLUSION'} = $inclcm; $cm = $1; } # print "==>$cm\n"; $cm =~ s/^\s+//sg; # Simplification: always make the content model a group; unless it's # declared content. # if (!$internal) { # print "$cm\n\n"; my($mc) = new Text::DelimMatch '\(', '\)[\?\+\*]*'; my($pre, $match, $rest) = $mc->match($cm); if ($cm ne 'EMPTY' && $cm ne 'CDATA' && $cm ne 'RCDATA') { if ($cm !~ /^\(/s || ($rest !~ /^\s*$/s)) { $cm = "($cm)"; } } } while ($cm ne "") { if ($cm =~ /^\(/s) { # group; my($mc) = new Text::DelimMatch '\(', '\)[\?\+\*]*'; my($pre, $match, $rest) = $mc->match($cm); my($group); # print "\tgroup:\n"; # print "\t\tp:$pre\n"; # print "\t\tm:$match\n"; # print "\t\tr:$rest\n"; $group = new SGML::DTDParse::Tokenizer::Group $match; push (@model, $group); $cm = $rest; } elsif ($cm =~ /^\%/s) { # parameter entity my($pe); my($pent); if ($cm =~ /%(.*?);?([\|\,\&\s].*)$/s) { $pe = $1; $cm = $2; } else { $pe = $cm; $cm = ""; $pe = $1 if $pe =~ /^\%(.*?);?$/s; } $pent = new SGML::DTDParse::Tokenizer::ParameterEntity $pe; push (@model, $pent); } elsif ($cm =~ /^[\,\|\&]/s) { # connector my($con) = new SGML::DTDParse::Tokenizer::Connector $&; $cm = $'; # print "\tconnector: $&\n"; push (@model, $con); } else { # element my($elem); my($element); if ($cm =~ /(.*?)([\|\,\&\s].*)$/s) { $elem = $1; $cm = $2; } else { $elem = $cm; $cm = ""; } $element = new SGML::DTDParse::Tokenizer::Element $elem; push (@model, $element); } $cm =~ s/^\s+//sg; } # print "<==\n"; @{$self->{'MODEL'}} = @model; return $self; } sub print { my($self) = shift; my($depth) = shift || 1; my(@model) = @{$self->{'MODEL'}}; local($_); foreach $_ (@model) { $_->print($depth); } } 1; SGML-DTDParse-2.00/lib/SGML/DTDParse/DTD.pm0100644004705000001440000010652310266076477016537 0ustar ehoodusers# -*- Perl -*- package SGML::DTDParse::DTD; use strict; use vars qw($VERSION $CVS); $VERSION = do { my @r=(q$Revision: 2.2 $=~/\d+/g); sprintf "%d."."%03d"x$#r,@r }; $CVS = '$Id: DTD.pm,v 2.2 2005/07/16 03:21:35 ehood Exp $ '; use Text::DelimMatch; use SGML::DTDParse; use SGML::DTDParse::Catalog; use SGML::DTDParse::Tokenizer; use SGML::DTDParse::ContentModel; use SGML::DTDParse::Util qw(entify); my $DTDVERSION = "1.0"; my $DTDPUBID = "-//Norman Walsh//DTD DTDParse V2.0//EN"; my $DTDSYSID = "dtd.dtd"; my $debug = 0; { package SGML::DTDParse::DTD::ENTITY; sub new { my($type, $dtd, $entity, $etype, $pub, $sys, $text) = @_; my $class = ref($type) || $type; my $self = {}; $text = $dtd->fix_entityrefs($text); if ($dtd->{'XML'} && ($pub && !$sys)) { $dtd->status("External entity declaration without system " . "identifer found in XML DTD. " . "This isn't an XML DTD.", 1); $dtd->{'XML'} = 0; } $self->{'DTD'} = $dtd; $self->{'NAME'} = $entity; $self->{'TYPE'} = $etype; $self->{'NOTATION'} = ""; $self->{'PUBLIC'} = $pub; $self->{'SYSTEM'} = $sys; $self->{'TEXT'} = $text; if ($etype =~ /^ndata (\S+)$/i) { $self->{'TYPE'} = 'ndata'; $self->{'NOTATION'} = $1; } if ($etype =~ /^cdata (\S+)$/i) { $self->{'TYPE'} = 'cdata'; $self->{'NOTATION'} = $1; } bless $self, $class; } sub name { my $self = shift; my $value = shift; $self->{'NAME'} = $value if defined($value); return $self->{'NAME'}; } sub type { my $self = shift; my $value = shift; $self->{'TYPE'} = $value if defined($value); return $self->{'TYPE'}; } sub notation { my $self = shift; my $value = shift; $self->{'NOTATION'} = $value if defined($value); return $self->{'NOTATION'}; } sub public { my $self = shift; my $value = shift; $self->{'PUBLIC'} = $value if defined($value); return $self->{'PUBLIC'}; } sub system { my $self = shift; my $value = shift; $self->{'SYSTEM'} = $value if defined($value); return $self->{'SYSTEM'}; } sub text { my $self = shift; my $value = shift; $self->{'TEXT'} = $value if defined($value); return $self->{'TEXT'}; } sub xml { my $self = shift; my $xml = ""; $xml .= "name() . "\"\n"; $xml .= " type=\"" . $self->type() . "\"\n"; $xml .= " notation=\"" . $self->notation() . "\"\n" if $self->notation(); if ($self->public() || $self->system()) { $xml .= " public=\"" . $self->public() . "\"\n" if $self->public(); $xml .= " system=\"" . $self->system() . "\"\n" if $self->system(); $xml .= "/>\n"; } else { my $text = $self->{'DTD'}->expand_entities($self->text()); $text =~ s/\&/\&/sg; $xml .= ">\n"; $xml .= "$text\n"; if ($self->{'DTD'}->{'UNEXPANDED_CONTENT'}) { $text = $self->text(); $text =~ s/\&/\&/sg; $xml .= "$text\n"; } $xml .= "\n"; } return $xml; } } { package SGML::DTDParse::DTD::ELEMENT; sub new { my($type, $dtd, $element, $stagm, $etagm, $cm, $incl, $excl) = @_; my $class = ref($type) || $type; my $self = {}; $cm = $dtd->fix_entityrefs($cm); $incl = $dtd->fix_entityrefs($incl); $excl = $dtd->fix_entityrefs($excl); if ($dtd->{'XML'} && ($cm eq 'CDATA')) { $dtd->status("CDATA declared element content found in XML DTD. " . "This isn't an XML DTD.", 1); $dtd->{'XML'} = 0; } if ($dtd->{'XML'} && ($stagm || $etagm)) { $dtd->status("Tag minimization found in XML DTD. " . "This isn't an XML DTD.", 1); $dtd->{'XML'} = 0; } $self->{'DTD'} = $dtd; $self->{'NAME'} = $element; $self->{'STAGM'} = $stagm; $self->{'ETAGM'} = $etagm; $self->{'CONMDL'} = $cm; $self->{'INCL'} = $incl; $self->{'EXCL'} = $excl; bless $self, $class; } sub name { my $self = shift; my $value = shift; $self->{'NAME'} = $value if defined($value); return $self->{'NAME'}; } sub type { return "element"; } sub starttag_min { my $self = shift; my $value = shift; $self->{'STAGM'} = $value if defined($value); return $self->{'STAGM'}; } sub endtag_min { my $self = shift; my $value = shift; $self->{'ETAGM'} = $value if defined($value); return $self->{'ETAGM'}; } sub content_model { my $self = shift; my $value = shift; $self->{'CONMDL'} = $value if defined($value); return $self->{'CONMDL'}; } sub inclusions { my $self = shift; my $value = shift; $self->{'INCL'} = $value if defined($value); return $self->{'INCL'}; } sub exclusions { my $self = shift; my $value = shift; $self->{'EXCL'} = $value if defined($value); return $self->{'EXCL'}; } sub xml_content_model { my $self = shift; my $wrapper = shift; my $model = shift; my $expand = shift; my $xml = ""; my ($text, $cmtok, $cm); # $text = $model; # $text =~ s/\%/\&/sg; # $xml = "<$wrapper text=\"$text\">\n"; $xml = "<$wrapper>\n"; $text = $expand ? $self->{'DTD'}->expand_entities($model) : $model; $cmtok = new SGML::DTDParse::Tokenizer $text; $cm = new SGML::DTDParse::ContentModel $cmtok; $xml .= $cm->xml(); $xml .= "\n"; return $xml; } sub xml { my $self = shift; my $xml = ""; my($text, $cmtok, $cm, $type); $text = $self->content_model(); $text = $self->{'DTD'}->expand_entities($text); $cmtok = new SGML::DTDParse::Tokenizer $text; $cm = new SGML::DTDParse::ContentModel $cmtok; $type = $cm->type(); $xml .= "name() . "\""; $xml .= " stagm=\"" . $self->starttag_min() . "\"" if $self->starttag_min(); $xml .= " etagm=\"" . $self->endtag_min() . "\"" if $self->endtag_min(); $xml .= "\n"; $xml .= " content-type=\"$type\""; $xml .= ">\n"; $xml .= $self->xml_content_model('content-model-expanded', $self->content_model(), 1); if ($self->{'DTD'}->{'UNEXPANDED_CONTENT'}) { $xml .= $self->xml_content_model('content-model', $self->content_model(), 0); } if ($self->inclusions()) { $xml .= $self->xml_content_model('inclusions', $self->inclusions(), 1); } if ($self->exclusions()) { $xml .= $self->xml_content_model('exclusions', $self->exclusions(), 1); } $xml .= "\n"; return $xml; } } { package SGML::DTDParse::DTD::ATTLIST; sub new { my $type = shift; my $dtd = shift; my $attlist = shift; my $attdecl = shift; my(@attrs) = @_; my $class = ref($type) || $type; my $self = {}; $self->{'DTD'} = $dtd; $self->{'NAME'} = $attlist; $self->{'TYPE'} = {}; $self->{'VALS'} = {}; $self->{'DEFV'} = {}; $self->{'DECL'} = $attdecl; while (@attrs) { my $name = shift @attrs; my $values = shift @attrs; my $attrtype = shift @attrs; my $defval = shift @attrs; $self->{'TYPE'}->{$name} = $attrtype; $self->{'VALS'}->{$name} = $values; $self->{'DEFV'}->{$name} = $defval; } bless $self, $class; } sub append { my $self = shift; my $dtd = shift; my $attlist = shift; my $attdecl = shift; my(@attrs) = @_; while (@attrs) { my $name = shift @attrs; my $values = shift @attrs; my $attrtype = shift @attrs; my $defval = shift @attrs; $self->{'TYPE'}->{$name} = $attrtype; $self->{'VALS'}->{$name} = $values; $self->{'DEFV'}->{$name} = $defval; } } sub name { my $self = shift; my $value = shift; $self->{'NAME'} = $value if defined($value); return $self->{'NAME'}; } sub type { return "attlist"; } sub text { my $self = shift; return $self->{'DECL'}; } sub attribute_list { my $self = shift; my(@attr) = keys %{$self->{'TYPE'}}; return @attr; } sub attribute_type { my $self = shift; my $attr = shift; my $value = shift; $self->{'TYPE'}->{$attr} = $value if defined($value); return $self->{'TYPE'}->{$attr}; } sub attribute_values { my $self = shift; my $attr = shift; my $value = shift; $self->{'VALS'}->{$attr} = $value if defined($value); return $self->{'VALS'}->{$attr}; } sub attribute_default { my $self = shift; my $attr = shift; my $value = shift; $self->{'DEFV'}->{$attr} = $value if defined($value); return $self->{'DEFV'}->{$attr}; } sub xml { my $self = shift; my $xml = ""; my(@attr) = $self->attribute_list(); my($attr, $text); $xml .= "name() . "\">\n"; my $cdata = $self->{'DECL'}; $cdata =~ s/&/&/sg; $cdata =~ s/$cdata\n"; foreach $attr (@attr) { $xml .= "attribute_type($attr); # $text =~ s/\%/\&/sg; $xml .= " type=\"$text\"\n"; $text = $self->attribute_values($attr); # $text =~ s/\%/\&/sg; my $enumtype = undef; if ($text =~ /^NOTATION \(/) { $enumtype = "notation"; $text = "(" . $'; # ' } if ($text =~ /^\(/) { $enumtype = "yes" if !defined($enumtype); $xml .= " enumeration=\"$enumtype\"\n"; $text =~ s/[\(\)\|]/ /g; $text =~ s/\s+/ /g; $text =~ s/^\s*//; $text =~ s/\s*$//; } $xml .= " value=\"$text\"\n"; $text = $self->attribute_default($attr); # $text =~ s/\%/\&/sg; $xml .= " default=\"$text\"/>\n"; } $xml .= "\n"; return $xml; } } { package SGML::DTDParse::DTD::NOTATION; sub new { my($type, $dtd, $notation, $pub, $sys, $text) = @_; my $class = ref($type) || $type; my $self = {}; $self->{'DTD'} = $dtd; $self->{'NAME'} = $notation; $self->{'PUBLIC'} = $pub; $self->{'SYSTEM'} = $sys; bless $self, $class; } sub name { my $self = shift; my $value = shift; $self->{'NAME'} = $value if defined($value); return $self->{'NAME'}; } sub type { return "notation"; } sub public { my $self = shift; my $value = shift; $self->{'PUBLIC'} = $value if defined($value); return $self->{'PUBLIC'}; } sub system { my $self = shift; my $value = shift; $self->{'SYSTEM'} = $value if defined($value); return $self->{'SYSTEM'}; } sub xml { my $self = shift; my $xml = ""; $xml .= "name() . "\"\n"; $xml .= " public=\"" . $self->public() . "\"\n" if $self->public(); if (!$self->public() || $self->system()) { $xml .= " system=\"" . $self->system() . "\"\n"; } $xml .= "/>\n"; return $xml; } } sub new { my $type = shift; my %param = @_; my $class = ref($type) || $type; my $self = bless {}, $class; my $cat = new SGML::DTDParse::Catalog (%param); $self->{'LASTMSGLEN'} = 0; $self->{'NEWLINE'} = 0; $self->{'CAT'} = $cat; $self->{'PENT'} = {}; $self->{'DECLS'} = []; $self->{'DECLS'}->[0] = 0; $self->{'PENTDECL'} = []; $self->{'PENTDECL'}->[0] = 0; $self->{'GENT'} = {}; $self->{'GENTDECL'} = []; $self->{'GENTDECL'}->[0] = 0; $self->{'ELEM'} = {}; $self->{'ATTR'} = {}; $self->{'NOTN'} = {}; $self->{'VERBOSE'} = $param{'Verbose'} || $param{'Debug'}; $self->debug($param{'Debug'}); $self->{'TITLE'} = $param{'Title'}; $self->{'UNEXPANDED_CONTENT'} = $param{'UnexpandedContent'} ? 1 : 0; $self->{'SOURCE_DTD'} = $param{'SourceDtd'}; $self->{'PUBLIC_ID'} = $param{'PublicId'}; $self->{'SYSTEM_ID'} = $param{'SystemId'}; $self->{'DECLARATION'} = $param{'Declaration'}; $self->{'XML'} = $param{'Xml'}; $self->{'NAMECASE_GEN'} = $param{'NamecaseGeneral'}; $self->{'NAMECASE_ENT'} = $param{'NamecaseEntity'}; # There's a deficiency in the way this code is written. The entity # boundaries are lost as entities are loaded, so there's no way to # keep track of the correct "current directory" for resolving # relative system identifiers. To work around this problem, the list # of all directories accessed is kept in a path, and that path is # searched for relative system identifiers. This could produce the # wrong results, but it doesn't seem very likely. A proper solution # may be implemented in the future. $self->{'SEARCHPATH'} = (); delete($self->{'DTD'}); # This isn't supposed to exist yet. return $self; } sub parse { my $self = shift; my $dtd = shift; my $dtd_fh = \*STDIN; local $_; die "Error: Already parsed " . $self->{'DTD'} . "\n" if $self->{'DTD'}; if (!$dtd) { if ($self->{'SYSTEM_ID'}) { $dtd = $self->{'CAT'}->system_map($self->{'SYSTEM_ID'}); } elsif ($self->{'PUBLIC_ID'}) { $dtd = $self->{'CAT'}->public_map($self->{'PUBLIC_ID'}); } } if (!$dtd) { $self->status('Reading DTD from stdin...', 1); $self->{'DTD'} = '0'; } else { $self->{'DTD'} = $dtd; } if (!$self->{'SYSTEM_ID'}) { $self->{'SYSTEM_ID'} = $self->{'DTD'}; } my $decl = $self->{'DECLARATION'}; if (!$decl) { if ($self->{'PUBLIC_ID'}) { $decl = $self->{'CAT'}->declaration($self->{'PUBLIC_ID'}); } else { my $pubid = $self->{'CAT'}->reverse_public_map($dtd); $decl = $self->{'CAT'}->declaration($pubid); } } if ($self->{'PUBLIC_ID'}) { $self->status('Public ID: ' . $self->{'PUBLIC_ID'}, 1); } else { $self->status('Public ID: unknown', 1); } $self->status('System ID: ' . $self->{'SYSTEM_ID'}, 1); if ($decl) { $self->{'DECLARATION'} = $decl; $self->status("SGML declaration: $decl", 1); my($xml, $namecase, $entitycase) = $self->parse_decl($decl); $self->{'XML'} = $xml; $self->{'NAMECASE_GEN'} = $namecase; $self->{'NAMECASE_ENT'} = $entitycase; } else { $self->status("SGML declaration: unknown, using defaults for xml and namecase", 1); } if ($dtd) { use Symbol; $dtd_fh = gensym; open($dtd_fh, $dtd) || die qq{Error: Unable to open "$dtd": $!\n}; } { # slurp up entire file local $/; $_ = <$dtd_fh>; } close ($dtd_fh) if $dtd; $self->add_to_searchpath($dtd || '.'); my ($tok, $rest) = $self->next_token($_); while ($tok) { if ($tok =~ /parse_entity($rest); } elsif ($tok =~ /parse_element($rest); } elsif ($tok =~ /parse_attlist($rest); } elsif ($tok =~ /parse_notation($rest); } elsif ($tok =~ /parse_markedsection($rest); } else { die "Error: Unexpected declaration: $tok\n"; } ($tok, $rest) = $self->next_token($rest); } $self->status("Parse complete.\n"); return $self; } sub parseCatalog { my $self = shift; my $catalog = shift; $self->{'CAT'}->parse($catalog); } sub verbose { my $self = shift; my $val = shift; my $verb = $self->{'VERBOSE'}; $self->{'VERBOSE'} = $val if defined($val); return $verb; } sub debug { my $self = shift; my $val = shift; my $dbg = $debug; if (defined($val)) { $debug = $val; if (ref($self)) { $self->{'DEBUG'} = $debug; } } return $dbg; } # ====================================================================== sub add_entity { my($self, $name, $type, $public, $system, $text) = @_; my $entity = new SGML::DTDParse::DTD::ENTITY $self, $name, $type, $public, $system, $text; my $count; if ($type eq 'param') { return if exists($self->{'PENT'}->{$name}); $count = $self->{'PENTDECL'}->[0] + 1; $self->{'PENT'}->{$name} = $count; $self->{'PENTDECL'}->[0] = $count; $self->{'PENTDECL'}->[$count] = $entity; $count = $self->{'DECLS'}->[0] + 1; $self->{'DECLS'}->[0] = $count; $self->{'DECLS'}->[$count] = $entity; } else { return if exists($self->{'GENT'}->{$name}); $count = $self->{'GENTDECL'}->[0] + 1; $self->{'GENT'}->{$name} = $count; $self->{'GENTDECL'}->[0] = $count; $self->{'GENTDECL'}->[$count] = $entity; $count = $self->{'DECLS'}->[0] + 1; $self->{'DECLS'}->[0] = $count; $self->{'DECLS'}->[$count] = $entity; } } sub pent { my $self = shift; my $name = shift; my $count = $self->{'PENT'}->{$name}; return undef if !$count; return $self->{'PENTDECL'}->[$count]; } sub gent { my $self = shift; my $name = shift; my $count = $self->{'GENT'}->{$name}; return undef if !$count; return $self->{'GENTDECL'}->[$count]; } sub declaration_count { my $self = shift; return $self->{'DECLS'}->[0]; } sub declarations { my $self = shift; my @decls = @{$self->{'DECLS'}}; shift @decls; return @decls; } # ====================================================================== sub xml_elements { my $self = shift; my $fh = shift; my %output = (); foreach $_ (keys %{$self->{'NOTN'}}) { print $fh $self->{'NOTN'}->{$_}->xml(), "\n"; } foreach $_ (keys %{$self->{'PENT'}}) { print $fh $self->pent($_)->xml(), "\n"; } foreach $_ (keys %{$self->{'GENT'}}) { print $fh $self->gent($_)->xml(), "\n"; } foreach $_ (keys %{$self->{'ELEM'}}) { print $fh $self->{'ELEM'}->{$_}->xml(), "\n"; print $fh $self->{'ATTR'}->{$_}->xml(), "\n" if exists ($self->{'ATTR'}->{$_}); $output{$_} = 1; } foreach $_ (keys %{$self->{'ATTR'}}) { print $fh $self->{'ATTR'}->{$_}->xml(), "\n" if !$output{$_}; } } sub xml { my $self = shift; my $fh = shift; my $count; print $fh "{'PENTDECL'}->[0]; $count++) { # my($pent) = $self->{'PENTDECL'}->[$count]; # next if $pent->system() || $pent->public(); # print $fh "name(), " \"%", $pent->name(), ";\">\n"; # } for ($count = 1; $count <= $self->{'GENTDECL'}->[0]; $count++) { my $gent = $self->{'GENTDECL'}->[$count]; if ($gent->type() ne 'sdata') { my $name = $gent->name(); my $text = $gent->text(); $text = "&#38;" if $text eq '&'; $text = "&#60;" if $text eq '<'; print $fh "\n"; } elsif ($gent->type() ne 'pi') { my $name = $gent->name(); my $text = $gent->text(); $text = "&#38;" if $text eq '&'; $text = "&#60;" if $text eq '<'; print $fh "\n"; } } print $fh "]>\n"; print $fh "{'TITLE'}), "\"\n"; print $fh " namecase-general=\"", $self->{'NAMECASE_GEN'}, "\"\n"; print $fh " namecase-entity=\"", $self->{'NAMECASE_ENT'}, "\"\n"; print $fh " xml=\"", $self->{'XML'}, "\"\n"; print $fh " system-id=\"", entify($self->{'SYSTEM_ID'}), "\"\n"; print $fh " public-id=\"", entify($self->{'PUBLIC_ID'}), "\"\n"; print $fh " declaration=\"", $self->{'DECLARATION'}, "\"\n"; print $fh " created-by=\"DTDParse V$SGML::DTDParse::VERSION\"\n"; print $fh " created-on=\"", scalar(localtime()), "\"\n"; print $fh ">\n"; $self->xml_elements($fh); print $fh "\n"; } # ====================================================================== sub parse_entity { my $self = shift; my $dtd = shift; my($type, $name) = ('gen', undef); my($public, $system, $text) = ("", "", ""); my($tok); ($tok, $dtd) = $self->next_token($dtd); if ($tok eq '%') { $type = 'param'; ($tok, $dtd) = $self->next_token($dtd); } $name = $tok; $tok = $self->peek_token($dtd); if ($tok =~ /^[\"\']/) { # we're looking at text... ($text, $dtd) = $self->next_token($dtd); $text = $self->trim_quotes($text); } else { ($tok, $dtd) = $self->next_token($dtd); if ($tok =~ /public/i) { ($public, $dtd) = $self->next_token($dtd); $public = $self->trim_quotes($public); $tok = $self->peek_token($dtd); if ($tok ne '>') { ($system, $dtd) = $self->next_token($dtd); $system = $self->trim_quotes($system); } } elsif ($tok =~ /system/i) { ($system, $dtd) = $self->next_token($dtd); $system = $self->trim_quotes($system); } elsif ($tok =~ /^sdata$/i) { $type = 'sdata'; ($text, $dtd) = $self->next_token($dtd); $text = $self->trim_quotes($text); } elsif ($tok =~ /^pi$/i) { $type = 'pi'; ($text, $dtd) = $self->next_token($dtd); $text = $self->trim_quotes($text); } elsif ($tok =~ /^cdata$/i) { $type = 'cdata'; ($text, $dtd) = $self->next_token($dtd); $text = $self->trim_quotes($text); } else { die "Error: Unexpected declared entity type ($name): $tok\n"; } } ($tok, $dtd) = $self->next_token($dtd); if ($tok =~ /ndata/i) { ($tok, $dtd) = $self->next_token($dtd); # now $tok contains the notation name $type = "ndata $tok"; ($tok, $dtd) = $self->next_token($dtd); # now $tok should contain the token after the notation } elsif ($tok =~ /cdata/i) { ($tok, $dtd) = $self->next_token($dtd); # now $tok contains the notation name $type = "cdata $tok"; ($tok, $dtd) = $self->next_token($dtd); # now $tok should contain the token after the notation } if ($tok ne '>') { print "[[", substr($dtd, 0, 100), "]]\n"; die "Error: Unexpected token in ENTITY declaration: $tok\n"; } print STDERR "ENT: $type $name (P: $public) (S: $system) [$text]\n" if $debug>1; $self->status("Entity $name"); $self->add_entity($name, $type, $public, $system, $text); return $dtd; } sub parse_element { my $self = shift; my $dtd = shift; my(@names) = (); my($stagm, $etagm) = ('', ''); my $mc = new Text::DelimMatch '\(', '\)[\?\+\*\,]*'; my($tok, $cm, $expand, $rest); my($incl, $excl, $name); ($tok, $dtd) = $self->next_token($dtd); if ($tok =~ /^\(/) { my($pre, $namegrp, $ntok, $rest); ($pre, $namegrp, $dtd) = $mc->match($tok . $dtd); ($ntok, $rest) = $self->next_token($namegrp); while ($ntok) { if ($ntok =~ /[\|\(\)]/) { # nop } else { push (@names, $ntok); } ($ntok, $rest) = $self->next_token($rest); } } else { push (@names, $tok); } # we need to look ahead a little bit here so that we can handle # the case where the start/end tag minimization flags are in # a parameter entity without accidentally expanding parameter # entities in the content model... ($tok, $dtd) = $self->next_token($dtd, 1); if ($tok =~ /^\%/) { # check to see what this is... ($expand, $rest) = $self->next_token($tok); if ($expand =~ /^[\-o]/is) { $stagm = $expand; $dtd = $rest . $dtd; ($etagm, $dtd) = $self->next_token($dtd); } else { $dtd = $tok . $dtd if $expand =~ /\S/; } } elsif ($tok =~ /^[\-o]/is) { $stagm = $tok; ($etagm, $dtd) = $self->next_token($dtd); } else { $dtd = $tok . $dtd; } # ok, now $dtd begins with the content model... ($tok, $dtd) = $self->next_token($dtd, 1); if ($tok eq '(') { my($pre, $match); ($pre, $match, $dtd) = $mc->match($tok . $dtd); $cm = $match; } else { $cm = $tok; } ($tok, $dtd) = $self->next_token($dtd); if ($tok eq '-') { my($pre, $match); ($pre, $match, $dtd) = $mc->match($tok . $dtd); $excl = $match; ($tok, $dtd) = $self->next_token($dtd); } if ($tok eq '+') { my($pre, $match); ($pre, $match, $dtd) = $mc->match($tok . $dtd); $incl = $match; ($tok, $dtd) = $self->next_token($dtd); } if ($tok ne '>') { die "Error: Unexpected token in ELEMENT declaration: $tok\n"; } foreach $name (@names) { $self->status("Element $name"); if (exists($self->{'ELEM'}->{$name})) { warn "Warning: Duplicate element declaration for $name ignored.\n"; } else { my $elem = new SGML::DTDParse::DTD::ELEMENT $self, $name, $stagm,$etagm, $cm, $incl, $excl; $self->{'ELEM'}->{$name} = $elem; my $count = $self->{'DECLS'}->[0] + 1; $self->{'DECLS'}->[0] = $count; $self->{'DECLS'}->[$count] = $elem; } print STDERR "ELEM: $name = $cm -($excl) +($incl)\n" if $debug>1; } return $dtd; } sub parse_attlist { my $self = shift; my $dtd = shift; my(@names) = (); my $mc = new Text::DelimMatch '\(', '\)[\?\+\*\,]*'; my(@attr) = (); my($name, $values, $defval, $type, $tok, $notation_hack); # name is name # values is CDATA or an enumeration (for example) # defval is a default value # type is #IMPLIED, #FIXED, #REQUIRED, etc. ($tok, $dtd) = $self->next_token($dtd); if ($tok =~ /^\(/) { my($pre, $namegrp, $ntok, $rest); ($pre, $namegrp, $dtd) = $mc->match($tok . $dtd); ($ntok, $rest) = $self->next_token($namegrp); while ($ntok) { if ($ntok =~ /[\|\(\)]/) { # nop } else { push (@names, $ntok); } ($ntok, $rest) = $self->next_token($rest); } } else { push (@names, $tok); } print STDERR "\nATTLIST ", join(" ", @names), "\n" if $debug > 2; # now we're looking at the attribute declarations... # first grab the whole darn thing, unexpanded... # this is a tad iffy, perhaps, but I think it always works... $dtd =~ /^(.*?)>/is; my $attdecl = $1; # then we can look at the expanded thing... ($tok, $dtd) = $self->next_token($dtd); while ($tok ne '>') { $name = $tok; ($values, $dtd) = $self->next_token($dtd); $defval = ""; $type = ""; print STDERR "$name\n" if $debug > 2; $notation_hack = ""; if ($values =~ /^notation$/i) { if ($self->peek_token($dtd)) { $notation_hack = "NOTATION "; ($values, $dtd) = $self->next_token($dtd); } } if ($values eq '(') { my(@enum) = (); my($pre, $enum, $ntok, $rest); ($pre, $enum, $dtd) = $mc->match($values . $dtd); ($ntok, $rest) = $self->next_token($enum); print STDERR "\$rest = $rest\n" if $debug>4; while ($ntok ne '') { print STDERR "\$ntok = $ntok\n" if $debug>4; if ($ntok =~ /[,\|\(\)]/) { # nop } else { print STDERR "Adding to \@enum: $ntok\n" if $debug>4; push (@enum, $ntok); } ($ntok, $rest) = $self->next_token($rest); } $values = $notation_hack . '(' . join("|", @enum) . ')'; } print STDERR "\t$values\n" if $debug > 2; ($type, $dtd) = $self->next_token($dtd); print STDERR "\t$type\n" if $debug > 2; if ($type =~ /\#FIXED/i) { ($defval, $dtd) = $self->next_token($dtd); $defval = $self->trim_quotes($defval) if $defval =~ /^[\"\']/; } elsif ($type !~ /^\#/) { $defval = $type; $defval = $self->trim_quotes($defval) if $defval =~ /^[\"\']/; $type = ""; } print STDERR "\t$defval\n" if $debug > 2; push (@attr, $name, $values, $type, $defval); ($tok, $dtd) = $self->next_token($dtd); } foreach $name (@names) { $self->status("Attlist $name"); if (exists($self->{'ATTR'}->{$name})) { my $attlist = $self->{'ATTR'}->{$name}; $attlist->append($self, $name, $attdecl, @attr); warn ": duplicate attlist declaration for $name appended.\n"; } else { my $attlist = new SGML::DTDParse::DTD::ATTLIST $self, $name, $attdecl, @attr; $self->{'ATTR'}->{$name} = $attlist; my $count = $self->{'DECLS'}->[0] + 1; $self->{'DECLS'}->[0] = $count; $self->{'DECLS'}->[$count] = $attlist; } } return $dtd; } sub parse_notation { my $self = shift; my $dtd = shift; my $name = undef; my($public, $system, $text) = ("", "", ""); my($tok); ($name, $dtd) = $self->next_token($dtd); ($tok, $dtd) = $self->next_token($dtd); if ($tok =~ /public/i) { ($public, $dtd) = $self->next_token($dtd); $public = $self->trim_quotes($public); $tok = $self->peek_token($dtd); if ($tok ne '>') { ($system, $dtd) = $self->next_token($dtd); $system = $self->trim_quotes($system); } } elsif ($tok =~ /system/i) { $tok = $self->peek_token($dtd); if ($tok eq '>') { $system = ""; } else { ($system, $dtd) = $self->next_token($dtd); $system = $self->trim_quotes($system); } } else { $text = $self->trim_quotes($tok); } ($tok, $dtd) = $self->next_token($dtd); if ($tok ne '>') { die "Error: Unexpected token in NOTATION declaration: $tok\n"; } print STDERR "NOT: $name (P: $public) (S: $system) [$text]\n" if $debug > 1; $self->status("Notation $name"); if (exists($self->{'NOTN'}->{$name})) { warn "Warning: Duplicate notation declaration for $name ignored.\n"; } else { my $notation = new SGML::DTDParse::DTD::NOTATION $self, $name, $public, $system, $text; $self->{'NOTN'}->{$name} = $notation; my $count = $self->{'DECLS'}->[0] + 1; $self->{'DECLS'}->[0] = $count; $self->{'DECLS'}->[$count] = $notation; } return $dtd; } sub parse_markedsection { my $self = shift; my $dtd = shift; my $mc = new Text::DelimMatch ''; my($tok, $pre, $match, $ms); ($tok, $dtd) = $self->next_token($dtd); ($pre, $ms, $dtd) = $mc->match("$/s; $dtd = $1 . $dtd; } return $dtd; } sub peek_token { my $self = shift; my $dtd = shift; my $return_peref = shift; my $tok; ($tok, $dtd) = $self->next_token($dtd, $return_peref); return $tok; } sub next_token { my $self = shift; my $dtd = shift; my $return_peref = shift; $dtd =~ s/^\s*//sg; if ($dtd =~ /^/s) { # comment declaration return $self->next_token($'); # ' } if ($dtd =~ /^--.*?--/s) { # comment return $self->next_token($'); # ' } if ($dtd =~ /^<\?.*?>/s) { # processing instruction return $self->next_token($'); # ' } if ($dtd =~ /^ 3; return ($&, $'); # ' } if ($dtd =~ /^[\(\)\-\+\|\&\,\>]/) { # beginning of a model group, or incl., or excl., or end decl print STDERR "TOK: [$&]\n" if $debug > 3; return ($&, $'); # ' } if ($dtd =~ /^[\"\']/) { # quoted string $dtd =~ /^(([\"\'])(.*?)\2)/s; print STDERR "TOK: [$1]\n" if $debug > 3; return ($&, $'); # ' } if ($dtd =~ /^\%([a-zA-Z0-9\_\-\.]+);?/) { # peref print STDERR "TOK: [$1]\n" if $debug > 3; if ($return_peref) { return ("%$1;", $'); # ' } else { my $repltext = $self->entity_repl($1); $dtd = $repltext . $'; # ' return $self->next_token($dtd); } } if ($dtd =~ /^([^\s\|\&\,\(\)\[\]\>\%]+)/s) { # next non-space sequence print STDERR "TOK: [$1]\n" if $debug > 3; return ($1, $'); # ' } if ($dtd =~ /^(\%)/s) { # lone % (for param entity declarations) print STDERR "TOK: [$1]\n" if $debug > 3; return ($1, $'); } print STDERR "TOK: <>\n" if $debug > 3; return (undef, $dtd); } sub entity_repl { my $self = shift; my $name = shift; my $entity = $self->pent($name); local(*F, $_); die "Error: %$name; undeclared.\n" if !$entity; if ($entity->{'PUBLIC'} || $entity->{'SYSTEM'}) { my $id = ""; my $filename = ""; if ($entity->{'PUBLIC'}) { $id = $entity->{'PUBLIC'}; $filename = $self->{'CAT'}->public_map($id); } if (!$filename && $entity->{'SYSTEM'}) { $id = $entity->{'SYSTEM'}; $filename = $self->{'CAT'}->system_map($id); } if (!defined($filename)) { die "%Error: $name; ($id): not found in catalog.\n"; } if ($self->debug()) { $self->status("Loading $id\n\t($filename)", 1); } else { $self->status("Loading $id", 1); } $filename = $self->resolve_relativesystem($filename); $self->add_to_searchpath($filename); open (F, $filename) || die qq{\n%Error: $name;: Unable to open "$filename": $! \n}; { local $/; $_ = ; } close (F); return $_; } else { return $entity->{'TEXT'}; } } sub trim_quotes { my $self = shift; my $text = shift; if ($text =~ /^\"(.*)\"$/s) { $text = $1; } elsif ($text =~ /^\'(.*)\'$/s) { $text = $1; } else { die "Error: Unexpected text: $text\n"; } return $text; } sub fix_entityrefs { my $self = shift; my $text = shift; if ($text ne "") { my $value = ""; # make sure all entity references end in semi-colons while ($text =~ /^(.*?)([\&\%]\#?[-.:_a-z0-9]+;?)(.*)$/si) { my $entref = $2; $value .= $1; $text = $3; if ($entref =~ /\;$/s) { $value .= $entref; } else { $value .= $entref . ";"; } } $text = $value . $text; } return $text; } sub expand_entities { my $self = shift; my $text = shift; while ($text =~ /\%(.*?);/) { my $pre = $`; my $pename = $1; my $post = $'; # ' $text = $pre . $self->entity_repl($pename) . $post; } return $text; } sub parse_decl { my $self = shift; my $decl = shift; local (*F, $_); my $xml = 0; my $namecase_gen = 1; my $namecase_ent = 0; if (!open (F, $decl)) { $self->status(qq{Warning: Failed to load declaration "$decl": $!}, 1); return ($xml, $namecase_gen, $namecase_ent); } { local $/; $_ = ; } close (F); # {'SEARCHPATH'}}) { $found = 1 if $path eq $searchpath; } push (@{$self->{'SEARCHPATH'}}, $searchpath) if !$found && $searchpath; } sub resolve_relativesystem { my $self = shift; my $system = shift; my $found = 0; my $resolved = $system; return $system if ($system =~ /^\//) || ($system =~ /^[a-z]:[\\\/]/); foreach my $path (@{$self->{'SEARCHPATH'}}) { if (-f "$path/$system") { $found = 1; $resolved = "$path/$system"; last; } } if ($found) { $self->add_to_searchpath($resolved); } else { $self->status("Could not resolve relative path: $system", 1); } return $resolved; } sub status { my $self = shift; my $msg = shift; my $persist = shift; return if !$self->verbose(); if ($self->debug() || $self->{'NEWLINE'}) { print STDERR "\n"; } else { print STDERR "\r"; print STDERR " " x $self->{'LASTMSGLEN'}; print STDERR "\r"; } print STDERR $msg; $self->{'LASTMSGLEN'} = length($msg); $self->{'NEWLINE'} = $persist || (length($msg) > 79); } 1; __END__ =head1 NAME SGML::DTDParse::DTD - Parse an SGML or XML DTD. =head1 SYNOPSIS use SGML::DTDParse::DTD; $dtd = SGML::DTDParse::DTD->new( %options ); $dtd->parse($dtd_file); $dtd->xml($file_handle); =head1 DESCRIPTION B is the main module for parsing a DTD. Normally, this module is not used directly with the program L being the prefered usage model for parsing a DTD. =head1 CONSTRUCTOR METHODS TODO. =head1 METHODS TODO. =head1 SEE ALSO L See L for an overview of the DTDParse package. =head1 PREREQUISITES B =head1 AVAILABILITY EIE =head1 AUTHORS Originally developed by Norman Walsh, Endw@nwalsh.comE. Earl Hood Eearl@earlhood.comE picked up support and maintenance. =head1 COPYRIGHT AND LICENSE See L for copyright and license information. SGML-DTDParse-2.00/lib/SGML/DTDParse/Format/0040755004705000001440000000000010266305433016775 5ustar ehoodusersSGML-DTDParse-2.00/lib/SGML/DTDParse/Format/plain.pl0100644004705000001440000000044110261624166020433 0ustar ehoodusers# $Id: plain.pl,v 2.1 2005/07/02 23:51:18 ehood Exp $ # Set default display options (these settings override the settings # in dtd2*, and may subsequently be overridden by command line options. # # you can reassign options here to override default values... $option{'synopsis'} = 1; 1; SGML-DTDParse-2.00/lib/SGML/DTDParse/Format/refentry.pl0100644004705000001440000010624310261624166021175 0ustar ehoodusers# dtdformat module for RefEntrys # $Id: refentry.pl,v 2.1 2005/07/02 23:51:18 ehood Exp $ use SGML::DTDParse::Util qw(entify); $fileext = ".xml"; $config{'expanded-element-index'} = "elements"; $config{'unexpanded-element-index'} = "dtdelem"; $config{'expanded-entity-index'} = "entities"; $config{'unexpanded-entity-index'} = "dtdent"; $config{'notation-index'} = 'notations'; # ====================================================================== my $dtdparseHomepage = "http://sourceforge.net/projects/dtdparse/"; # ====================================================================== sub elementRefpurpose { my $count = shift; my $name = $elements[$count]; return "&$baseid.purp.elem.$name;"; } sub entityRefpurpose { my $count = shift; my $name = $entities[$count]; my $entity = $entities{$name}; return "&$baseid.purp." . $entity->getAttribute('type') . ".$name;"; } sub notationRefpurpose { my $count = shift; my $name = $notations[$count]; return "&$baseid.purp.notn.$name;"; } sub elementDescription { my $count = shift; return "desc\n"; } sub entityDescription { my $count = shift; return "desc\n"; } sub notationDescription { my $count = shift; return "desc\n"; } # ====================================================================== sub basenames { my @names = @_; my %basename = (); my %usedname = (); foreach my $name (@names) { my $count = 2; my $bname = lc($name); if ($usedname{$bname}) { $bname = lc($name) . $count; while ($usedname{$bname}) { $bname++; } } $basename{$name} = $bname; $usedname{$name} = 1; } return %basename; } # ====================================================================== sub formatElement { my $count = shift; my $html = ""; my $name = $elements[$count]; my $element = $elements{$name}; my $cmex = undef; my $cmunx = undef; my $incl = undef; my $excl = undef; my $node = $element->getFirstChild(); while ($node) { if ($node->getNodeType() == XML::DOM::ELEMENT_NODE) { $cmex = $node if $node->getTagName() eq 'content-model-expanded'; $cmunx = $node if $node->getTagName() eq 'content-model'; $incl = $node if $node->getTagName() eq 'inclusions'; $excl = $node if $node->getTagName() eq 'exclusions'; } $node = $node->getNextSibling(); } $html .= &formatElementHeader($count); $html .= &formatElementTitle($count); if ($option{'synopsis'}) { if ($expanded eq 'expanded') { $html .= &formatElementSynopsis($count, $cmex, $cmex); } else { $html .= &formatElementSynopsis($count, $cmunx, $cmex); } } $html .= &formatElementDescription($count, $count) if $option{'description'}; $html .= &formatElementExamples($count, $count) if $option{'examples'}; $html .= &formatElementFooter($count); } sub formatElementHeader { my $count = shift; my $html = ""; my $name = $elements[$count]; my $element = $elements{$name}; $html .= "\n"; $html .= "\n"; $html .= "\n\n"; return $html; } sub formatElementTitle { my $count = shift; my $name = $elements[$count]; my $element = $elements{$name}; my $html = ""; $html .= "\n"; $html .= ""; $html .= $element->getAttribute('name'); $html .= "\n"; $html .= "Element\n"; $html .= "\n\n"; $html .= "\n"; $html .= "" . $element->getAttribute('name') . "\n"; $html .= ""; $html .= &elementRefpurpose($count); $html .= "\n"; $html .= "\n\n"; } sub formatElementSynopsis { my $count = shift; my $name = $elements[$count]; my $element = $elements{$name}; my $cm = shift; my $cmex = shift; my $html = ""; # What are the possibilities: mixed content, element content, or # declared content... my $mixed = $element->getAttribute('content-type') eq 'mixed'; my $declared = (!$mixed && $element->getAttribute('content-type') ne 'element'); $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "getAttribute('name') . " ::=\n"; $html .= &formatContentModel($count, $cm); $html .= "\n"; $html .= "\n"; $html .= &formatInclusions($count, $incl) if $incl && $option{'inclusions'}; $html .= &formatExclusions($count, $excl) if $excl && $option{'exclusions'}; $html .= &formatAttributeList($count) if $option{'attributes'}; $html .= &formatTagMinimization($count) if $option{'tag-minimization'}; $html .= &formatElementAppearsIn($count) if $option{'appears-in'}; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n\n"; return $html; } sub formatInclusions { my $count = shift; my $cm = shift; my $name = $elements[$count]; my $element = $elements{$name}; my $html = ""; $html .= "\n"; $html .= "getElementsByTagName("attribute"); for (my $count = 0; $count < $attrs->getLength(); $count++) { my $attr = $attrs->item($count); my $name = $attr->getAttribute('name'); my $type = $attr->getAttribute('value'); my $decltype = $attr->getAttribute('type'); my $default = ""; if ($decltype eq '#IMPLIED') { $default = "None"; } elsif ($decltype eq '#REQUIRED') { $default = "Required"; } elsif ($decltype eq '#CONREF') { $default = "Content reference"; } else { $default = $attr->getAttribute('default'); if ($default =~ /\"/) { $default = "'" . $default . "'"; } else { $default = "\"" . $default . "\""; } } if ($decltype eq '#FIXED') { $default = $default . " (fixed)"; } $html .= "\n"; $html .= &formatCell($name); $html .= &formatValues($type, $attr); $html .= &formatCell($default); $html .= "\n"; } return $html; } sub formatCell { my $value = shift; return "$value\n"; } sub formatValues { my $values = shift; my $attr = shift; my $enum = $attr->getAttribute('enumeration'); my $html = ""; if ($enum eq 'no' || $enum eq '') { return &formatCell($values); } $html .= ""; if ($enum eq 'notation') { $html .= "Enumerated notation:\n"; } else { $html .= "Enumeration:\n"; } $html .= "\n"; foreach my $val (sort { uc($a) cmp uc($b) } split(/\s+/, $attr->getAttribute('value'))) { $html .= "$val\n"; } $html .= "\n"; return $html; } sub formatTagMinimization { my $count = shift; my $name = $elements[$count]; my $element = $elements{$name}; my $html = ""; my $stagm = $element->getAttribute('stagm') || "-"; my $etagm = $element->getAttribute('etagm') || "-"; if ($element->getAttribute('stagm') || $element->getAttribute('etagm')) { my (%min) = ('--' => "Both the start- and end-tags are required for this element.", 'OO' => "Both the start- and end-tags are optional for this element, if your SGML declaration allows tag minimization.", 'O-' => "The start-tag is optional for this element, if your SGML declaration allows tag minimization. The end-tag is required.", '-O' => "The start-tag is required for this element. The end-tag is optional, if your SGML declaration allows minimization." ); $html .= "\n"; $html .= "getAttribute('name'); $html .= "\n"; } else { $html .= "\n"; } } $html .= "\n"; } } return $html; } sub formatElementDescription { my $count = shift; my $name = $elements[$count]; my $element = $elements{$name}; my $desc = &elementDescription($count); my $html = ""; return "" if !defined($desc); $html .= "Description\n"; $html .= $desc; $html .= "\n\n"; $html .= &formatParents($count) if $option{'parents'}; $html .= &formatChildren($count) if $option{'children'}; $html .= "\n\n"; return $html; } sub formatParents { my $count = shift; my $name = $elements[$count]; my $element = $elements{$name}; my $html = ""; if (exists $PARENTS{$name}) { $html .= "Parents\n"; $html .= "These elements contain "; $html .= $element->getAttribute('name') . ":\n"; $html .= ""; my $pname; foreach $pname (sort { uc($a) cmp uc($b) } keys %{$PARENTS{$name}}) { my $child = $elements{$pname}; $html .= ""; $html .= ""; $html .= "" . $child->getAttribute('name') . ""; $html .= ""; $html .= "\n"; } $html .= ".\n"; $html .= "\n\n"; } return $html; } sub formatChildren { my $count = shift; my $name = $elements[$count]; my $element = $elements{$name}; my $html = ""; my $mixed = $element->getAttribute('content-type') eq 'mixed'; my $declared = (!$mixed && $element->getAttribute('content-type') ne 'element'); return "" if $declared; # can't be any children... if (exists $CHILDREN{$name} || exists $POSSINCL{$name} || exists $POSSEXCL{$name}) { $html .= "Children\n"; } if (exists $CHILDREN{$name}) { $html .= "The following elements occur in "; $html .= $element->getAttribute('name') . ":\n"; $html .= ""; my $cname; foreach $cname (sort { uc($a) cmp uc($b) } keys %{$CHILDREN{$name}}) { my $child = $elements{$cname}; die "Unexpected error (1): can't find element \"$cname\".\n" if !$child; $html .= ""; $html .= ""; $html .= ""; $html .= $child->getAttribute('name'); $html .= ""; $html .= ""; $html .= "\n"; } $html .= ".\n"; } if (exists $POSSINCL{$name}) { $html .= "In some contexts, the following elements are\n"; $html .= "allowed anywhere:\n"; $html .= "\n"; my $cname; foreach $cname (sort { uc($a) cmp uc($b) } keys %{$POSSINCL{$name}}) { my $child = $elements{$cname}; die "Unexpected error (2): can't find element \"$cname\".\n" if !$child; $html .= ""; $html .= ""; $html .= ""; $html .= $child->getAttribute('name'); $html .= ""; $html .= ""; $html .= "\n"; } $html .= ".\n\n"; } if (exists $POSSEXCL{$name}) { $html .= "In some contexts, the following elements are\n"; $html .= "excluded:\n"; $html .= "\n"; my $cname; foreach $cname (sort { uc($a) cmp uc($b) } keys %{$POSSEXCL{$name}}) { my $element = $elements{$cname}; $html .= ""; $html .= ""; $html .= ""; $html .= $element->getAttribute('name'); $html .= ""; $html .= ""; $html .= "\n"; } $html .= ".\n\n"; } if (exists $CHILDREN{$name} || exists $POSSINCL{$name} || exists $POSSEXCL{$name}) { $html .= "\n\n"; } return $html; } sub formatElementExamples { my $count = shift; my $name = $elements[$count]; my $element = $elements{$name}; return ""; } sub formatElementFooter { my $count = shift; my $html = ""; $html .= "\n"; return $html; } # ---------------------------------------------------------------------- my $state = 'NONE'; my $depth = 0; my $col = 0; sub formatContentModel { my $count = shift; my $cm = shift; my $node = $cm->getFirstChild(); my $html = ""; $state = "NONE"; $depth = 0; $col = 0; while ($node) { if ($node->getNodeType == XML::DOM::ELEMENT_NODE) { $html .= formatContentModelElement($node); } $node = $node->getNextSibling(); } return $html; } sub formatContentModelElement { my $node = shift; my $html = ""; if ($node->getNodeType == XML::DOM::ELEMENT_NODE) { if ($node->getTagName() eq 'sequence-group') { $html .= &formatCMGroup($node, ","); } elsif ($node->getTagName() eq 'or-group') { $html .= &formatCMGroup($node, "|"); } elsif ($node->getTagName() eq 'and-group') { $html .= &formatCMGroup($node, "&"); } elsif ($node->getTagName() eq 'element-name') { $html .= &formatCMElement($node); } elsif ($node->getTagName() eq 'parament-name') { $html .= &formatCMParament($node); } elsif ($node->getTagName() eq 'pcdata') { $html .= &formatCMPCDATA($node); } elsif ($node->getTagName() eq 'cdata') { $html .= &formatCMCDATA($node); } elsif ($node->getTagName() eq 'rcdata') { $html .= &formatCMRCDATA($node); } elsif ($node->getTagName() eq 'empty') { $html .= &formatCMEMPTY($node); } elsif ($node->getTagName() eq 'any') { $html .= &formatCMANY($node); } else { die "Unexpected node: \"" . $node->getTagName() . "\"\n"; } $node = $node->getNextSibling(); } else { die "Unexpected node type.\n"; } return $html; } sub formatCMGroup { my $group = shift; my $occur = $group->getAttribute('occurrence'); my $sep = shift; my $first = 1; my $html = ""; if ($state ne 'NONE' && $state ne 'OPEN') { $html .= "\n"; $html .= " " x $depth if $depth > 0; $col = $depth; $state = 'NEWLINE'; } $html .= "("; $state = 'OPEN'; $depth++; $col++; my $node = $group->getFirstChild(); while ($node) { if ($node->getNodeType == XML::DOM::ELEMENT_NODE) { if (!$first) { $html .= $sep; $col++; if ($state ne 'NEWLINE' && ($col > 60)) { $html .= "\n"; $html .= " " x $depth if $depth > 0; $col = $depth; $state = 'NEWLINE'; } } $html .= &formatContentModelElement($node); $first = 0; } $node = $node->getNextSibling(); } $html .= ")"; $col++; if ($occur) { $html .= $occur; $col++; } $state = 'CLOSE'; $depth--; return $html; } sub formatCMElement { my $element = shift; my $name = $element->getAttribute('name'); my $occur = $element->getAttribute('occurrence'); my $href = ""; my $html = ""; $name = lc($name) if !$option{'case-sensitive'}; if ($state eq 'CLOSE') { $html .= "\n"; $html .= " " x $depth if $depth > 0; $col = $depth; $state = 'NEWLINE'; } $html .= ""; $html .= $element->getAttribute('name'); $html .= ""; $col += length($name); if ($occur) { $html .= $occur; $col++; } $state = 'ELEMENT'; return $html; } sub formatCMParament { my $element = shift; my $name = $element->getAttribute('name'); my $html = ""; if ($state eq 'CLOSE') { $html .= "\n"; $html .= " " x $depth if $depth > 0; $col = $depth; $state = 'NEWLINE'; } $html .= ""; $html .= "\%" . $name . ";"; $html .= ""; $col += length($name) + 2; $state = 'PARAMENT'; return $html; } sub formatCMPCDATA { my $html = ""; $html .= "#PCDATA"; $col += 7; $state = 'PCDATA'; return $html; } sub formatCMCDATA { my $html = ""; $html .= "CDATA"; $col += 5; $state = 'CDATA'; return $html; } sub formatCMRCDATA { my $html = ""; $html .= "RCDATA"; $col += 5; $state = 'RCDATA'; return $html; } sub formatCMEMPTY { my $html = ""; $html .= "EMPTY"; $col += 5; $state = 'EMPTY'; return $html; } sub formatCMANY { my $html = ""; $html .= "ANY"; $col += 3; $state = 'ANY'; return $html; } # ====================================================================== sub formatEntity { my $count = shift; my $name = $entities[$count]; my $entity = $entities{$name}; my $html = ""; my $textnl; if ($expanded eq 'expanded') { $textnl = $entity->getElementsByTagName("text-expanded"); } else { $textnl = $entity->getElementsByTagName("text"); } $html .= &formatEntityHeader($count); $html .= &formatEntityTitle($count); $html .= &formatEntitySynopsis($count, $textnl) if $option{'synopsis'}; $html .= &formatEntityAppearsIn($count) if $option{'appears-in'}; $html .= &formatEntityDescription($count) if $option{'description'}; $html .= &formatEntityExamples($count) if $option{'examples'}; $html .= &formatEntityFooter($count); return $html; } sub formatEntityHeader { my $count = shift; my $html = ""; my $name = $entities[$count]; $html .= "\n"; $html .= "\n"; $html .= "\n\n"; return $html; } sub formatEntityTitle { my $count = shift; my $name = $entities[$count]; my $entity = $entities{$name}; my $type = $entity->getAttribute("type"); my $html = ""; $html .= "\n"; $html .= ""; $html .= $entity->getAttribute('name'); $html .= "\n"; if ($type eq 'gen') { $html .= "General Entity\n"; } elsif ($type eq 'ndata' || $type eq 'cdata' || $type eq 'sdata' || $type eq 'pi') { $html .= "" . uc($type) . " Entity\n"; } else { $html .= "Parameter Entity\n"; } $html .= "\n\n"; $html .= "\n"; $html .= "" . $entity->getAttribute('name') . "\n"; $html .= ""; $html .= &entityRefpurpose($count); $html .= "\n"; $html .= "\n\n"; } sub formatEntitySynopsis { my $count = shift; my $textnl = shift; my $name = $entities[$count]; my $entity = $entities{$name}; my $html = ""; my $type = $entity->getAttribute("type"); my $public = entify($entity->getAttribute("public")); my $system = entify($entity->getAttribute("system")); my $text = ""; if ($textnl->getLength() > 0) { my $textnode = $textnl->item(0); my $content = $textnode->getFirstChild(); if ($content) { $text = $content->getData(); } else { $text = ""; } } $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "$match"; } else { $html .= $match; } } else { $html .= $match; } } $html .= $text; $html .= "\n"; $html .= "\n"; } } if ($type eq 'ndata' || $type eq 'cdata') { my $notation = $entity->getAttribute('notation'); $html .= uc($type) . " Entity"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "getAttribute('name') . ":\n"; $html .= "\n"; for (my $count = 0; $count <= $#ents; $count++) { my $entity = $entities{$ents[$count]}; $html .= ""; $html .= "getAttribute('type') . "."; $html .= $entity->getAttribute('name') . "\">"; $html .= $entity->getAttribute('name'); $html .= ""; $html .= "\n"; } $html .= "\n"; $html .= "\n"; } return $html; } sub formatEntityDescription { my $count = shift; my $name = $entities[$count]; my $entity = $entities{$name}; my $desc = &entityDescription($count); my $html = ""; return "" if !defined($desc); $html .= "Description\n"; $html .= $desc; $html .= "\n\n"; return $html; } sub formatEntityExamples { my $count = shift; my $name = $entities[$count]; my $entity = $entities{$name}; return ""; } sub formatEntityFooter { my $count = shift; my $html = ""; $html .= "\n"; return $html; } # ====================================================================== sub formatNotation { my $count = shift; my $html = ""; my $name = $notations[$count]; my $element = $notations{$name}; $html .= &formatNotationHeader($count); $html .= &formatNotationTitle($count); if ($option{'synopsis'}) { $html .= &formatNotationSynopsis($count); } $html .= &formatNotationDescription($count) if $option{'description'}; $html .= &formatNotationExamples($count) if $option{'examples'}; $html .= &formatNotationFooter($count); } sub formatNotationHeader { my $count = shift; my $html = ""; my $name = $notations[$count]; $html .= "\n"; $html .= "\n"; $html .= "\n\n"; return $html; } sub formatNotationTitle { my $count = shift; my $name = $notations[$count]; my $notation = $notations{$name}; my $html = ""; $html .= "\n"; $html .= ""; $html .= $notation->getAttribute('name'); $html .= "\n"; $html .= "Notation\n"; $html .= "\n\n"; $html .= "\n"; $html .= "" . $notation->getAttribute('name') . "\n"; $html .= ""; $html .= ¬ationRefpurpose($count); $html .= "\n"; $html .= "\n\n"; } sub formatNotationSynopsis { my $count = shift; my $name = $notations[$count]; my $notation = $notations{$name}; my $html = ""; my $public = entify($notation->getAttribute("public")); my $system = entify($notation->getAttribute("system")); $html .= "\n"; if ($public) { $html .= "\nPublic identifier:\n"; $html .= "$public."; $html .= "\n\n"; } if ($system) { $html .= "\nSystem identifier:\n"; $html .= "$system."; $html .= "\n\n"; } if (!$public && !$system) { $html .= "\nSystem specified\n"; $html .= "without a system identifier."; $html .= "\n\n"; } return $html; } sub formatNotationDescription { my $count = shift; my $name = $notations[$count]; my $notation = $notations{$name}; my $desc = ¬ationDescription($count); my $html = ""; return "" if !defined($desc); $html .= "Description\n"; $html .= $desc; $html .= "\n\n"; return $html; } sub formatNotationExamples { my $count = shift; my $name = $notations[$count]; my $element = $notations{$name}; return ""; } sub formatNotationFooter { my $count = shift; my $html = ""; $html .= "\n"; return $html; } # ====================================================================== sub writeElementIndexes { my $basedir = shift; my $title = entify($dtd->getDocumentElement->getAttribute('title')); my ($entfile, $sgmfile, $sysdir); local (*F, $_); $entfile = $basedir . "/" . $config{$expanded . "-element-index"} . ".ent"; $sgmfile = $basedir . "/" . $config{$expanded . "-element-index"} . $fileext; $sysdir = $config{$expanded . "-element-dir"}; open (F, ">$entfile"); foreach $name (@elements) { my $basename = $ELEMBASE{$name}; print F "\n"; print F "\n"; } close (F); open (F, ">$sgmfile"); print F "$title Element Reference\n"; foreach $name (@elements) { print F "&$baseid.elem.$name;\n"; } print F "\n"; close (F); } sub writeEntityIndexes { my $basedir = shift; my $title = entify($dtd->getDocumentElement->getAttribute('title')); my ($entfile, $sgmfile, $sysdir); local (*F, $_); $entfile = $basedir . "/" . $config{$expanded . "-entity-index"} . ".ent"; $sgmfile = $basedir . "/" . $config{$expanded . "-entity-index"} . $fileext; $sysdir = $config{$expanded . "-entity-dir"}; open (F, ">$entfile"); foreach $name (@entities) { my $entity = $entities{$name}; my $basename = $ENTBASE{$name}; print F "\n"; print F "getAttribute('type'), ".$name \"purpose\">\n"; } close (F); open (F, ">$sgmfile"); print F "$title Entity Reference\n"; foreach $name (@entities) { print F "&$baseid.param.$name;\n"; } print F "\n"; close (F); } sub writeNotationIndexes { my $basedir = shift; my $title = entify($dtd->getDocumentElement->getAttribute('title')); my ($notnfile, $sgmfile, $sysdir); local (*F, $_); $notnfile = $basedir . "/" . $config{"notation-index"} . ".ent"; $sgmfile = $basedir . "/" . $config{"notation-index"} . $fileext; $sysdir = $config{"notation-dir"}; open (F, ">$notnfile"); foreach $name (@notations) { my $notation = $notations{$name}; my $basename = $NOTBASE{$name}; print F "\n"; print F "\n"; } close (F); open (F, ">$sgmfile"); print F "$title Notation Reference\n"; foreach $name (@notations) { print F "&$baseid.notn.$name;\n"; } print F "\n"; close (F); } sub writeIndex { my $basedir = shift; my $title = entify($dtd->getDocumentElement->getAttribute('title')); my $entfile = $config{"expanded-entity-index"}; my $elemfile = $config{"expanded-element-index"}; my $notfile = $config{"notation-index"}; my $root = $dtd->getDocumentElement(); my $elements = $root->getElementsByTagName('element'); my $entities = $root->getElementsByTagName('entity'); my $notations = $root->getElementsByTagName('notation'); my $elemcount = $elements->getLength(); my $entcount = $entities->getLength(); my $notcount = $notations->getLength(); local (*F, $_); # nop; } 1; SGML-DTDParse-2.00/lib/SGML/DTDParse/Format/html.pl0100644004705000001440000012303410261624166020300 0ustar ehoodusers# dtdformat module for HTML # $Id: html.pl,v 2.1 2005/07/02 23:51:18 ehood Exp $ use SGML::DTDParse::Util qw(entify); $fileext = ".html"; $config{'home'} = 'index' . $fileext; $config{'expanded-element-index'} = "elements" . $fileext; $config{'unexpanded-element-index'} = "dtdelem" . $fileext; $config{'expanded-entity-index'} = "entities" . $fileext; $config{'unexpanded-entity-index'} = "dtdent" . $fileext; $config{'notation-index'} = 'notations' . $fileext; # ====================================================================== my $dtdparseHomepage = "http://sourceforge.net/projects/dtdparse/"; # ====================================================================== sub formatElement { my $count = shift; my $html = ""; my $name = $elements[$count]; my $element = $elements{$name}; my $cmex = undef; my $cmunx = undef; my $incl = undef; my $excl = undef; my $node = $element->getFirstChild(); while ($node) { if ($node->getNodeType() == XML::DOM::ELEMENT_NODE) { $cmex = $node if $node->getTagName() eq 'content-model-expanded'; $cmunx = $node if $node->getTagName() eq 'content-model'; $incl = $node if $node->getTagName() eq 'inclusions'; $excl = $node if $node->getTagName() eq 'exclusions'; } $node = $node->getNextSibling(); } $html .= &formatElementHeader($count); $html .= &formatElementTitle($count); if ($option{'synopsis'}) { if ($expanded eq 'expanded' || !$option{'unexpanded'}) { $html .= &formatElementSynopsis($count, $cmex, $cmex); } else { $html .= &formatElementSynopsis($count, $cmunx, $cmex); } } $html .= &formatInclusions($count, $incl) if $incl && $option{'inclusions'}; $html .= &formatExclusions($count, $excl) if $excl && $option{'exclusions'}; $html .= &formatAttributeList($count) if $option{'attributes'}; $html .= &formatTagMinimization($count) if $option{'tag-minimization'}; $html .= &formatElementAppearsIn($count) if $option{'appears-in'}; $html .= &formatElementDescription($count) if $option{'description'}; $html .= &formatParents($count) if $option{'parents'}; $html .= &formatChildren($count) if $option{'children'}; $html .= &formatElementExamples($count) if $option{'examples'}; $html .= &formatElementFooter($count); } sub formatElementHeader { my $count = shift; my $html = ""; my $name = $elements[$count]; my $element = $elements{$name}; my $basename = $ELEMBASE{$name}; my $title = $dtd->getDocumentElement()->getAttribute('title'); my %subtitle = ('expanded' => 'User Element View', 'unexpanded' => 'DTD Element View'); my %otherview = ('expanded' => 'DTD Element View', 'unexpanded' => 'User Element View'); my $otherpath = ""; if ($expanded eq 'expanded') { $otherpath = "../" . $config{'unexpanded-element-dir'} . "/"; } else { $otherpath = "../" . $config{'expanded-element-dir'} . "/"; } $html .= "\n\n$title: Element "; $html .= $element->getAttribute('name'); $html .= "\n"; $html .= "\n\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "
$title: " . $subtitle{$expanded} . ""; if ($option{'unexpanded'} || ($expanded eq 'unexpanded')) { $html .= "["; $html .= $otherview{$expanded}; $html .= "]"; } else { $html .= " "; } $html .= "
\n"; $html .= &headerLinks('none', 1); $html .= "\n"; if ($count > 0) { my $href = $ELEMBASE{$elements[$count-1]} . $fileext; $html .= "[Prev]\n"; } if ($count < $#elements) { my $href = $ELEMBASE{$elements[$count+1]} . $fileext; $html .= "[Next]\n"; } $html .= "
\n"; $html .= "
\n"; return $html; } sub formatElementTitle { my $count = shift; my $name = $elements[$count]; my $element = $elements{$name}; my $html = ""; $html .= "

Element " . $element->getAttribute('name') . "

\n"; } sub formatElementSynopsis { my $count = shift; my $name = $elements[$count]; my $element = $elements{$name}; my $cm = shift; my $cmex = shift; my $html = ""; # What are the possibilities: mixed content, element content, or # declared content... my $mixed = $element->getAttribute('content-type') eq 'mixed'; my $declared = (!$mixed && $element->getAttribute('content-type') ne 'element'); $html .= "

Synopsis

\n"; if ($option{'content-model'}) { if ($mixed) { $html .= "

Mixed Content Model

\n"; } elsif ($declared) { $html .= "

Declared Content

\n"; } else { $html .= "

Content Model

\n"; } $html .= "
";
	$html .= &formatContentModel($count, $cm);
	$html .= "
\n"; return $html; } } sub formatInclusions { my $count = shift; my $cm = shift; my $name = $elements[$count]; my $element = $elements{$name}; my $html = ""; $html .= "

Inclusions

\n"; $html .= "
";

    $html .= &formatContentModel($count, $cm);

    $html .= "
\n"; return $html; } sub formatExclusions { my $count = shift; my $cm = shift; my $name = $elements[$count]; my $element = $elements{$name}; my $html = ""; $html .= "

Exclusions

\n"; $html .= "
";

    $html .= &formatContentModel($count, $cm);

    $html .= "
\n"; return $html; } sub formatAttributeList { my $count = shift; my $html = ""; my $name = $elements[$count]; my $element = $elements{$name}; my $attlist = $attlists{$name}; $html .= "

Attributes

\n"; if (defined($attlist)) { $html .= &formatAttributes($attlist); } else { $html .= "

None

\n"; } return $html; } sub formatAttributes { my $attlist = shift; my $html = ""; my $attrs = $attlist->getElementsByTagName("attribute"); $html .= "\n"; $html .= "\n"; $html .= ""; $html .= ""; $html .= ""; $html .= "\n"; for (my $count = 0; $count < $attrs->getLength(); $count++) { my $attr = $attrs->item($count); my $name = $attr->getAttribute('name'); my $type = $attr->getAttribute('value'); my $decltype = $attr->getAttribute('type'); my $default = ""; if ($decltype eq '#IMPLIED') { $default = "None"; } elsif ($decltype eq '#REQUIRED') { $default = "Required"; } elsif ($decltype eq '#CONREF') { $default = "Content reference"; } else { $default = $attr->getAttribute('default'); if ($default =~ /\"/) { $default = "'" . $default . "'"; } else { $default = "\"" . $default . "\""; } } if ($decltype eq '#FIXED') { $default = $default . " (fixed)"; } $html .= "\n"; $html .= &formatCell($name); $html .= &formatValues($type, $attr); $html .= &formatCell($default); $html .= "\n"; } $html .= "
NameTypeDefault Value
\n"; return $html; } sub formatCell { my $value = shift; $value = " " if $value =~ /^\s*$/; return "$value\n"; } sub formatValues { my $values = shift; my $attr = shift; my $enum = $attr->getAttribute('enumeration'); my $html = ""; if ($enum eq 'no' || $enum eq '') { return &formatCell($values); } $html .= ""; if ($enum eq 'notation') { $html .= "Enumerated notation:
\n"; } else { $html .= "Enumeration:
\n"; } my $first = 1; foreach my $val (sort { uc($a) cmp uc($b) } split(/\s+/, $attr->getAttribute('value'))) { $html .= "
\n" if !$first; $first = 0; $html .= "  $val"; } $html .= ""; return $html; } sub formatTagMinimization { my $count = shift; my $name = $elements[$count]; my $element = $elements{$name}; my $html = ""; my $stagm = $element->getAttribute('stagm') || "-"; my $etagm = $element->getAttribute('etagm') || "-"; if ($element->getAttribute('stagm') || $element->getAttribute('etagm')) { my (%min) = ('--' => "Both the start- and end-tags are required for this element.", 'OO' => "Both the start- and end-tags are optional for this element, if your SGML declaration allows tag minimization.", 'O-' => "The start-tag is optional for this element, if your SGML declaration allows tag minimization. The end-tag is required.", '-O' => "The start-tag is required for this element. The end-tag is optional, if your SGML declaration allows minimization." ); $html .= "

Tag Minimization

\n"; $html .= "

"; $html .= $min{$stagm . $etagm}; $html .= "

\n"; } return $html; } sub formatElementAppearsIn { my $count = shift; my $html = ""; my $elementname = $elements[$count]; my $element = $elements{$elementname}; my %appears = (); %appears = %{$APPEARSIN{$elementname}} if exists $APPEARSIN{$elementname}; if (%appears) { my @ents = sort { uc($a) cmp uc($b) } keys %appears; my $href = $config{$expanded . "-entity-dir"}; $html .= "

Parameter Entities

\n"; $html .= "

The following parameter entities contain "; $html .= $element->getAttribute('name') . ":\n"; my $first = 1; for (my $count = 0; $count <= $#ents; $count++) { my $entity = $entities{$ents[$count]}; my $basename = $ENTBASE{$ents[$count]} . $fileext; $html .= ",\n" if !$first; $first = 0; $html .= ""; $html .= $entity->getAttribute('name'); $html .= ""; } $html .= "

"; } return $html; } sub formatElementDescription { my $count = shift; my $name = $elements[$count]; my $element = $elements{$name}; my $html = ""; $html .= "

Description

\n"; return $html; } sub formatParents { my $count = shift; my $name = $elements[$count]; my $element = $elements{$name}; my $html = ""; if (exists $PARENTS{$name}) { $html .= "

Parents

\n"; $html .= "

"; my $first = 1; my $pname; foreach $pname (sort { uc($a) cmp uc($b) } keys %{$PARENTS{$name}}) { my $child = $elements{$pname}; my $href = $ELEMBASE{$pname} . $fileext; $html .= ",\n" if !$first; $first = 0; $html .= ""; $html .= $child->getAttribute('name'); $html .= ""; } $html .= "

\n"; } return $html; } sub formatChildren { my $count = shift; my $name = $elements[$count]; my $element = $elements{$name}; my $html = ""; my $mixed = $element->getAttribute('content-type') eq 'mixed'; my $declared = (!$mixed && $element->getAttribute('content-type') ne 'element'); return "" if $declared; # can't be any children... if (exists $CHILDREN{$name}) { $html .= "

Children

\n"; $html .= "

"; my $first = 1; my $cname; foreach $cname (sort { uc($a) cmp uc($b) } keys %{$CHILDREN{$name}}) { my $child = $elements{$cname}; my $href = $ELEMBASE{$cname} . $fileext; die "Unexpected error (1): can't find element \"$cname\".\n" if !$child; $html .= ",\n" if !$first; $first = 0; $html .= ""; $html .= $child->getAttribute('name'); $html .= ""; } $html .= "

\n"; } if (exists $POSSINCL{$name}) { $html .= "

In some contexts, the following elements are\n"; $html .= "allowed anywhere: "; my $first = 1; my $cname; foreach $cname (sort { uc($a) cmp uc($b) } keys %{$POSSINCL{$name}}) { my $child = $elements{$cname}; my $href = $ELEMBASE{$cname} . $fileext; die "Unexpected error (2): can't find element \"$cname\".\n" if !$child; $html .= ",\n" if !$first; $first = 0; $html .= ""; $html .= $child->getAttribute('name'); $html .= ""; } $html .= "

\n"; } if (exists $POSSEXCL{$name}) { $html .= "

In some contexts, the following elements are\n"; $html .= "excluded: "; my $first = 1; my $cname; foreach $cname (sort { uc($a) cmp uc($b) } keys %{$POSSEXCL{$name}}) { my $element = $elements{$cname}; my $href = $ELEMBASE{$cname} . $fileext; $html .= ",\n" if !$first; $first = 0; $html .= ""; $html .= $element->getAttribute('name'); $html .= ""; } $html .= "

\n"; } return $html; } sub formatElementExamples { my $count = shift; my $name = $elements[$count]; my $element = $elements{$name}; return ""; } sub formatElementFooter { my $count = shift; my $html = ""; $html .= "

\n"; $html .= "
\n"; $html .= "HTML Presentation of "; $html .= $dtd->getDocumentElement()->getAttribute('title'); $html .= " by "; $html .= "DTDParse (version $main::VERSION).\n"; $html .= "\n"; $html .= "\n"; return $html; } # ---------------------------------------------------------------------- my $state = 'NONE'; my $depth = 0; my $col = 0; sub formatContentModel { my $count = shift; my $cm = shift; my $node = $cm->getFirstChild(); my $html = ""; while ($node) { if ($node->getNodeType == XML::DOM::ELEMENT_NODE) { $html .= formatContentModelElement($node); } $node = $node->getNextSibling(); } return $html; } sub formatContentModelElement { my $node = shift; my $html = ""; if ($node->getNodeType == XML::DOM::ELEMENT_NODE) { if ($node->getTagName() eq 'sequence-group') { $html .= &formatCMGroup($node, ","); } elsif ($node->getTagName() eq 'or-group') { $html .= &formatCMGroup($node, "|"); } elsif ($node->getTagName() eq 'and-group') { $html .= &formatCMGroup($node, "&"); } elsif ($node->getTagName() eq 'element-name') { $html .= &formatCMElement($node); } elsif ($node->getTagName() eq 'parament-name') { $html .= &formatCMParament($node); } elsif ($node->getTagName() eq 'pcdata') { $html .= &formatCMPCDATA($node); } elsif ($node->getTagName() eq 'cdata') { $html .= &formatCMCDATA($node); } elsif ($node->getTagName() eq 'rcdata') { $html .= &formatCMRCDATA($node); } elsif ($node->getTagName() eq 'empty') { $html .= &formatCMEMPTY($node); } elsif ($node->getTagName() eq 'any') { $html .= &formatCMANY($node); } else { die "Unexpected node: \"" . $node->getTagName() . "\"\n"; } $node = $node->getNextSibling(); } else { die "Unexpected node type.\n"; } return $html; } sub formatCMGroup { my $group = shift; my $occur = $group->getAttribute('occurrence'); my $sep = shift; my $first = 1; my $html = ""; if ($state ne 'NONE' && $state ne 'OPEN') { $html .= "\n"; $html .= " " x $depth if $depth > 0; $col = $depth; $state = 'NEWLINE'; } $html .= "("; $state = 'OPEN'; $depth++; $col++; my $node = $group->getFirstChild(); while ($node) { if ($node->getNodeType == XML::DOM::ELEMENT_NODE) { if (!$first) { $html .= $sep; $col++; if ($state ne 'NEWLINE' && ($col > 60)) { $html .= "\n"; $html .= " " x $depth if $depth > 0; $col = $depth; $state = 'NEWLINE'; } } $html .= &formatContentModelElement($node); $first = 0; } $node = $node->getNextSibling(); } $html .= ")"; $col++; if ($occur) { $html .= $occur; $col++; } $state = 'CLOSE'; $depth--; return $html; } sub formatCMElement { my $element = shift; my $name = $element->getAttribute('name'); my $occur = $element->getAttribute('occurrence'); my $href = ""; my $html = ""; $name = lc($name) if !$option{'namecase-general'}; $name = lc($name) if $option{'namecase-general'}; $href = $ELEMBASE{$name} . $fileext; if ($state eq 'CLOSE') { $html .= "\n"; $html .= " " x $depth if $depth > 0; $col = $depth; $state = 'NEWLINE'; } $html .= ""; $html .= $element->getAttribute('name'); $html .= ""; $col += length($name); if ($occur) { $html .= $occur; $col++; } $state = 'ELEMENT'; return $html; } sub formatCMParament { my $element = shift; my $name = $element->getAttribute('name'); my $href = ""; my $html = ""; $href = "../" . $config{$expanded . '-entity-dir'}; $href .= "/" . $ENTBASE{$name} . $fileext; if ($state eq 'CLOSE') { $html .= "\n"; $html .= " " x $depth if $depth > 0; $col = $depth; $state = 'NEWLINE'; } $html .= ""; $html .= "\%" . $name . ";"; $html .= ""; $col += length($name) + 2; $state = 'PARAMENT'; return $html; } sub formatCMPCDATA { my $html = ""; $html .= "#PCDATA"; $col += 7; $state = 'PCDATA'; return $html; } sub formatCMCDATA { my $html = ""; $html .= "CDATA"; $col += 5; $state = 'CDATA'; return $html; } sub formatCMRCDATA { my $html = ""; $html .= "RCDATA"; $col += 5; $state = 'RCDATA'; return $html; } sub formatCMEMPTY { my $html = ""; $html .= "EMPTY"; $col += 5; $state = 'EMPTY'; return $html; } sub formatCMANY { my $html = ""; $html .= "ANY"; $col += 3; $state = 'ANY'; return $html; } # ====================================================================== sub formatEntity { my $count = shift; my $name = $entities[$count]; my $entity = $entities{$name}; my $html = ""; my $textnl; if ($expanded eq 'expanded') { $textnl = $entity->getElementsByTagName("text-expanded"); } else { $textnl = $entity->getElementsByTagName("text"); } $html .= &formatEntityHeader($count); $html .= &formatEntityTitle($count); $html .= &formatEntitySynopsis($count, $textnl) if $option{'synopsis'}; $html .= &formatEntityAppearsIn($count) if $option{'appears-in'}; $html .= &formatEntityDescription($count) if $option{'description'}; $html .= &formatEntityExamples($count) if $option{'examples'}; $html .= &formatEntityFooter($count); return $html; } sub formatEntityHeader { my $count = shift; my $html = ""; my $name = $entities[$count]; my $entity = $entities{$name}; my $basename = $ENTBASE{$name}; my $title = $dtd->getDocumentElement()->getAttribute('title'); my %subtitle = ('expanded' => 'User Entity View', 'unexpanded' => 'DTD Entity View'); my %otherview = ('expanded' => 'DTD Entity View', 'unexpanded' => 'User Entity View'); my $otherpath = ""; if ($expanded eq 'expanded') { $otherpath = "../" . $config{'unexpanded-entity-dir'} . "/"; } else { $otherpath = "../" . $config{'expanded-entity-dir'} . "/"; } $html .= "\n\n$title: Entity "; $html .= $entity->getAttribute('name'); $html .= "\n"; $html .= "\n\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "
$title: " . $subtitle{$expanded} . ""; if ($option{'unexpanded'} || ($expanded eq 'unexpanded')) { $html .= "["; $html .= $otherview{$expanded}; $html .= "]"; } else { $html .= " "; } $html .= "
\n"; $html .= &headerLinks('none', 1); $html .= "\n"; if ($count > 0) { my $href = $ENTBASE{$entities[$count-1]} . $fileext; $html .= "[Prev]\n"; } if ($count < $#entities) { my $href = $ENTBASE{$entities[$count+1]} . $fileext; $html .= "[Next]\n"; } $html .= "
\n"; $html .= "
\n"; } sub formatEntityTitle { my $count = shift; my $name = $entities[$count]; my $element = $entities{$name}; my $html = ""; $html .= "

Entity " . $element->getAttribute('name') . "

\n"; } sub formatEntitySynopsis { my $count = shift; my $textnl = shift; my $name = $entities[$count]; my $entity = $entities{$name}; my $html = ""; my $type = $entity->getAttribute("type"); my $public = $entity->getAttribute("public"); my $system = $entity->getAttribute("system"); my $text = ""; if ($textnl->getLength() > 0) { my $textnode = $textnl->item(0); my $content = $textnode->getFirstChild(); if ($content) { $text = $content->getData(); } else { $text = ""; } } $html .= "

Synopsis

\n"; if ($type eq 'gen') { if ($public || $system) { $html .= "

External General Entity

\n"; $html .= "

Public identifier: $public

\n" if $public; $html .= "

System identifier: $system

\n" if $system; } else { $html .= "

General Entity

\n"; if ($text =~ /\"/) { $html .= "

'$text'

\n"; } else { $html .= "

\"$text\"

\n"; } } } if ($type eq 'param') { if ($public || $system) { $html .= "

External Parameter Entity

\n"; $html .= "

Public identifier: $public

\n" if $public; $html .= "

System identifier: $system

\n" if $system; } else { $html .= "

Parameter Entity

\n"; $html .= "
";

	    # OK, it's a parameter entity. Now, does it look like a 
	    # content model fragment

	    my $cmfragment = &cmFragment($text);

	    while ($text =~ /\%?[-a-z0-9.:_]+;?/is) {
		my $pre = $`;
		my $match = $&;
		$text = $';

		$html .= $pre;

		if ($pre =~ /\#$/) {
		    # if it comes after a '#', it's a keyword...
		    $html .= $match;
		    next;
		}

		if ($match =~ /\%([^;]+);?/) {
		    $name = $1;
		    if (exists $entities{$name}) {
			my $href = $ENTBASE{$name} . $fileext;
			$html .= "$match";
		    } else { 
			$html .= $match;
		    }
		} elsif ($cmfragment) {
		    $name = $match;
		    $name = lc($name) if !$option{'namecase-general'};
		    if (exists $elements{$name}) {
			my $href = $ELEMBASE{$name} . $fileext;
			my $dir = $config{$expanded . "-element-dir"};
			$html .= "$match";
		    } else {
			$html .= $match;
		    }
		} else {
		    $html .= $match;
		}
	    }
	    $html .= $text;
	    $html .= "
\n"; } } if ($type eq 'sdata' || $type eq 'pi') { $html .= "

" . uc($type) . " Entity

\n"; $text =~ s/\&/\&/sg; if ($text =~ /\"/) { $html .= "

'$text'

\n"; } else { $html .= "

\"$text\"

\n"; } } if ($type eq 'ndata' || $type eq 'cdata') { my $notation = $entity->getAttribute("notation"); $html .= "

" . uc($type) . " Entity

\n"; $html .= "

Notation: $notation

\n"; $html .= "

Public identifier: $public

\n" if $public; $html .= "

System identifier: $system

\n" if $system; } return $html; } sub formatEntityAppearsIn { my $count = shift; my $html = ""; my $entityname = $entities[$count]; my $entity = $entities{$entityname}; my %appears = (); my $key = "%$entityname"; %appears = %{$APPEARSIN{$key}} if exists $APPEARSIN{$key}; if (%appears) { my @ents = sort { uc($a) cmp uc($b) } keys %appears; $html .= "

Parameter Entities

\n"; $html .= "

The following parameter entities contain "; $html .= $entity->getAttribute('name') . ":\n"; my $first = 1; for (my $count = 0; $count <= $#ents; $count++) { my $entity = $entities{$ents[$count]}; my $basename = $ENTBASE{$ents[$count]} . $fileext; $html .= ",\n" if !$first; $first = 0; $html .= ""; $html .= $entity->getAttribute('name'); $html .= ""; } $html .= "

"; } return $html; } sub formatEntityDescription { my $count = shift; my $name = $entities[$count]; my $entity = $entities{$name}; return ""; } sub formatEntityExamples { my $count = shift; my $name = $entities[$count]; my $entity = $entities{$name}; return ""; } sub formatEntityFooter { my $count = shift; my $html = ""; $html .= "

\n"; $html .= "
\n"; $html .= "HTML Presentation of "; $html .= $dtd->getDocumentElement()->getAttribute('title'); $html .= " by "; $html .= "DTDParse (version $main::VERSION).\n"; $html .= "\n"; $html .= "\n"; return $html; } # ====================================================================== sub formatNotation { my $count = shift; my $html = ""; my $name = $notations[$count]; my $element = $notations{$name}; $html .= &formatNotationHeader($count); $html .= &formatNotationTitle($count); if ($option{'synopsis'}) { $html .= &formatNotationSynopsis($count); } $html .= &formatNotationDescription($count) if $option{'description'}; $html .= &formatNotationExamples($count) if $option{'examples'}; $html .= &formatNotationFooter($count); } sub formatNotationHeader { my $count = shift; my $html = ""; my $name = $notations[$count]; my $notation = $notations{$name}; my $basename = $NOTBASE{$name}; my $title = $dtd->getDocumentElement()->getAttribute('title'); my $subtitle = "Notation View"; $html .= "\n\n$title: Notation "; $html .= $notation->getAttribute('name'); $html .= "\n"; $html .= "\n\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "\n"; $html .= "
$title: $subtitle"; $html .= " "; $html .= "
\n"; $html .= &headerLinks('none', 1); $html .= "\n"; if ($count > 0) { my $href = $NOTBASE{$notations[$count-1]} . $fileext; $html .= "[Prev]\n"; } if ($count < $#notations) { my $href = $NOTBASE{$notations[$count+1]} . $fileext; $html .= "[Next]\n"; } $html .= "
\n"; $html .= "
\n"; return $html; } sub formatNotationTitle { my $count = shift; my $name = $notations[$count]; my $notation = $notations{$name}; my $html = ""; $html .= "

Notation " . $notation->getAttribute('name') . "

\n"; } sub formatNotationSynopsis { my $count = shift; my $name = $notations[$count]; my $notation = $notations{$name}; my $html = ""; my $public = $notation->getAttribute("public"); my $system = $notation->getAttribute("system"); $html .= "

Synopsis

\n"; $html .= "

Public identifier: $public

\n" if $public; $html .= "

System identifier: $system

\n" if $system; if (!$public && !$system) { $html .= "

SYSTEM specified without a system identifier.

\n"; } return $html; } sub formatNotationDescription { my $count = shift; my $name = $notations[$count]; my $notation = $notations{$name}; my $html = ""; $html .= "

Description

\n"; return $html; } sub formatNotationExamples { my $count = shift; my $name = $notations[$count]; my $element = $notations{$name}; return ""; } sub formatNotationFooter { my $count = shift; my $html = ""; $html .= "

\n"; $html .= "
\n"; $html .= "HTML Presentation of "; $html .= $dtd->getDocumentElement()->getAttribute('title'); $html .= " by "; $html .= "DTDParse (version $main::VERSION).\n"; $html .= "\n"; $html .= "\n"; return $html; } # ====================================================================== sub headerLinks { my $skip = shift; my $up = shift; my $html = ""; my $entfile = ($up ? "../" : "") . $config{$expanded . "-entity-index"}; my $elemfile = ($up ? "../" : "") . $config{$expanded . "-element-index"}; my $notfile = ($up ? "../" : "") . $config{"notation-index"}; my $home = ($up ? "../" : "") . $config{"home"}; my $elemcount = $#elements+1; my $entcount = $#entities+1; my $notcount = $#notations+1; if ($skip ne 'home') { $html .= "[Home]\n"; } if ($option{'elements'} && $skip ne 'elements' && $elemcount > 0) { $html .= "[Elements]\n"; } if ($option{'entities'} && $skip ne 'entities' && $entcount > 0) { $html .= "[Entities]\n"; } if ($option{'notations'} && $skip ne 'notations' && $notcount > 0) { $html .= "[Notations]\n"; } return $html; } sub writeHeaderLinks { local *F = shift; my $skip = shift; my $up = shift; print F &headerLinks($skip, $up); } sub writeElementIndexes { my $basedir = shift; my %letters = (); my $element = ""; my $title = $dtd->getDocumentElement()->getAttribute('title'); my %subtitle = ('expanded' => 'User Element View', 'unexpanded' => 'DTD Element View'); my %otherview = ('expanded' => 'DTD Element View', 'unexpanded' => 'User Element View'); my ($char, $lastchar, $first, $otherfile); local (*F, $_); if ($expanded eq 'expanded') { $otherfile = $config{'unexpanded-element-index'}; } else { $otherfile = $config{'expanded-element-index'}; } foreach $element (@elements) { $char = uc(substr($element, 0, 1)); $letters{$char} = 1; } open (F, ">" . $basedir . "/" . $config{$expanded . "-element-index"}); print F "\n\n$title: Elements\n"; print F "\n\n"; print F "\n"; print F "\n"; print F "\n"; print F "\n"; print F "\n"; print F "\n"; print F "\n"; print F "\n"; print F "\n"; print F "
$title: ", $subtitle{$expanded}, ""; if ($option{'unexpanded'} || ($expanded eq 'unexpanded')) { print F "["; print F $otherview{$expanded}; print F "]"; } else { print F " "; } print F "
\n"; writeHeaderLinks(*F, 'elements', 0); print F "\n"; print F " "; print F "
\n"; print F "
\n"; $first = 1; foreach $char (sort { uc($a) cmp uc($b) } keys %letters) { print F " | " if !$first; $first = 0; print F "$char"; } print F "\n"; my @roots = keys %ROOTS; if ($#roots > 0) { print F "

Top level elements: "; } else { print F "

Top level element: "; } $first = 1; foreach my $name (sort { uc($a) cmp uc($b) } @roots) { my $element = $ROOTS{$name}; my $basedir = $config{$expanded . "-element-dir"}; my $basename = $ELEMBASE{$name}; my $href = "$basedir/$basename" . $fileext; print F ",\n" if !$first; $first = 0; print F "", $element->getAttribute('name'), ""; } print F ".\n"; $lastchar = $char = ""; foreach my $name (@elements) { my $element = $elements{$name}; $char = uc(substr($name, 0, 1)); if ($char ne $lastchar) { print F "

$char

\n"; $lastchar = $char; } my $basedir = $config{$expanded . "-element-dir"}; my $basename = $ELEMBASE{$name}; my $href = "$basedir/$basename" . $fileext; print F "", $element->getAttribute('name'), "
\n"; } print F "

\n"; print F "
\n"; print F "HTML Presentation of "; print F $dtd->getDocumentElement()->getAttribute('title'); print F " by "; print F "DTDParse (version $main::VERSION).\n"; print F "\n"; print F "\n"; close (F); } sub writeEntityIndexes { my $basedir = shift; my %letters = (); my $entity = ""; my $title = $dtd->getDocumentElement()->getAttribute('title'); my %subtitle = ('expanded' => 'User Entity View', 'unexpanded' => 'DTD Entity View'); my %otherview = ('expanded' => 'DTD Entity View', 'unexpanded' => 'User Entity View'); my ($char, $lastchar, $first, $otherfile); local (*F, $_); if ($expanded eq 'expanded') { $otherfile = $config{'unexpanded-entity-index'}; } else { $otherfile = $config{'expanded-entity-index'}; } foreach $entity (@entities) { my $etype = &entityType($entities{$entity}); if (($etype eq 'sdata' && $option{'include-sdata'}) || ($etype eq 'msparam' && $option{'include-ms'}) || ($etype eq 'charent' && $option{'include-charent'}) || ($etype ne 'sdata' && $etype ne 'msparam' && $etype ne 'charent')) { $char = uc(substr($entity, 0, 1)); $letters{$char} = 1; } } open (F, ">" . $basedir . "/" . $config{$expanded . "-entity-index"}); print F "\n\n$title: Entities\n"; print F "\n\n"; print F "\n"; print F "\n"; print F "\n"; print F "\n"; print F "\n"; print F "\n"; print F "\n"; print F "\n"; print F "\n"; print F "
$title: ", $subtitle{$expanded}, ""; if ($option{'unexpanded'} || ($expanded eq 'unexpanded')) { print F "["; print F $otherview{$expanded}; print F "]"; } else { print F " "; } print F "
\n"; &writeHeaderLinks(*F, 'entities', 0); print F "\n"; print F " "; print F "
\n"; print F "
\n"; $first = 1; foreach $char (sort { uc($a) cmp uc($b) } keys %letters) { print F " | " if !$first; $first = 0; print F "$char"; } print F "\n"; $lastchar = $char = ""; foreach $entity (@entities) { my $etype = &entityType($entities{$entity}); next if (($etype eq 'sdata' && !$option{'include-sdata'}) || ($etype eq 'msparam' && !$option{'include-ms'}) || ($etype eq 'charent' && !$option{'include-charent'})); $char = uc(substr($entity, 0, 1)); if ($char ne $lastchar) { print F "

$char

\n"; $lastchar = $char; } my $basedir = $config{$expanded . "-entity-dir"}; my $basename = $ENTBASE{$entity}; my $href = "$basedir/$basename" . $fileext; print F "$entity"; if (0) { print F "--"; my $etype = &entityType($entities{$entity}); if ($etype eq 'param') { print F "parameter entity"; } elsif ($etype eq 'paramext') { print F "external entity"; } elsif ($etype eq 'sdata') { print F "SDATA entity"; } elsif ($etype eq 'msparam') { print F "marked section entity"; } elsif ($etype eq 'gen') { print F "general entity"; } else { print F "uknown entity"; } } print F "
\n"; } print F "

\n"; print F "
\n"; print F "HTML Presentation of "; print F $dtd->getDocumentElement()->getAttribute('title'); print F " by "; print F "DTDParse (version $main::VERSION).\n"; print F "\n"; print F "\n"; close (F); } sub writeNotationIndexes { my $basedir = shift; my %letters = (); my $notation = ""; my $title = $dtd->getDocumentElement()->getAttribute('title'); my $subtitle = "Notation View"; my $entfile = $config{$expanded . "-entity-index"}; my $elemfile = $config{$expanded . "-element-index"}; my ($char, $lastchar, $first); local (*F, $_); foreach $notation (@notations) { $char = uc(substr($notation, 0, 1)); $letters{$char} = 1; } open (F, ">" . $basedir . "/" . $config{"notation-index"}); print F "\n\n$title: Notations\n"; print F "\n\n"; print F "\n"; print F "\n"; print F "\n"; print F "\n"; print F "\n"; print F "\n"; print F "\n"; print F "\n"; print F "\n"; print F "
$title: $subtitle"; print F " "; print F "
\n"; &writeHeaderLinks(*F, 'notations', 0); print F "\n"; print F " "; print F "
\n"; print F "
\n"; $first = 1; foreach $char (sort { uc($a) cmp uc($b) } keys %letters) { print F " | " if !$first; $first = 0; print F "$char"; } print F "\n"; $lastchar = $char = ""; foreach my $name (@notations) { my $notation = $notations{$name}; $char = uc(substr($name, 0, 1)); if ($char ne $lastchar) { print F "

$char

\n"; $lastchar = $char; } my $basedir = $config{"notation-dir"}; my $basename = $NOTBASE{$name}; my $href = "$basedir/$basename" . $fileext; print F "", $notation->getAttribute('name'), "
\n"; } print F "

\n"; print F "
\n"; print F "HTML Presentation of "; print F $dtd->getDocumentElement()->getAttribute('title'); print F " by "; print F "DTDParse (version $main::VERSION).\n"; print F "\n"; print F "\n"; close (F); } sub writeIndex { my $basedir = shift; my $root = $dtd->getDocumentElement(); my $title = entify($root->getAttribute('title')); my $entfile = $config{"expanded-entity-index"}; my $elemfile = $config{"expanded-element-index"}; my $notfile = $config{"notation-index"}; my $elemcount = $#elements+1; my $entcount = $#entities+1; my $notcount = $#notations+1; local (*F, $_); open (F, ">" . $basedir . "/" . $config{'home'}); print F "\n\n$title\n"; print F "\n\n"; print F "

$title

\n"; &writeHeaderLinks(*F, 'home', 0); print F "
\n"; if ($root->getAttribute('public-id') || $root->getAttribute('system-id')) { my ($pub) = entify($root->getAttribute('public-id')); my ($sys) = entify($root->getAttribute('system-id')); print F "

"; print F "The $title "; print F "DTD " if $title !~ / DTD$/i; print F "is identified with:\n"; print F "

    \n"; if ($pub) { print F "
  • The public identifier: \"$pub\""; print F ", and" if $sys; print F "\n"; } print F "
  • The system identifier: \"$sys\"\n" if $sys; print F "
\n"; print F "

It is composed of\n"; } else { print F "

"; print F "The $title "; print F "DTD " if $title !~ / DTD$/i; print F "is composed of\n"; } print F "$elemcount elements, "; if ($entcount == 0) { print F "no entities, "; } elsif ($entcount == 1) { print F "1 entity, "; } else { print F "$entcount entities, "; } print F "and "; if ($notcount == 0) { print F "no notations.\n"; } elsif ($notcount == 1) { print F "1 notation.\n"; } else { print F "$notcount notations.\n"; } my %etypes = (); for (my $count = 0; $count < $entcount; $count++) { my $ent = $entities{$entities[$count]}; my $type = &entityType($ent); $etypes{$type} = 0 if !exists($etypes{$type}); $etypes{$type}++; } print F "

    \n"; print F "
  • $elemcount elements\n"; print F "
  • $entcount ", $entcount == 1 ? "entity" : "entities\n"; print F "
      \n"; print F ("
    • ", $etypes{'param'}, " parameter ", $etypes{'param'} == 1 ? "entity" : "entities", "\n") if $etypes{'param'} > 0; print F ("
    • ", $etypes{'paramext'}, " external ", $etypes{'paramext'} == 1 ? "entity" : "entities", "\n") if $etypes{'paramext'} > 0; print F ("
    • ", $etypes{'sdata'}, " SDATA ", $etypes{'sdata'} == 1 ? "entity" : "entities", "\n") if $etypes{'sdata'} > 0; print F ("
    • ", $etypes{'ndata'}, " NDATA ", $etypes{'ndata'} == 1 ? "entity" : "entities", "\n") if $etypes{'ndata'} > 0; print F ("
    • ", $etypes{'charent'}, " character ", $etypes{'charent'} == 1 ? "entity" : "entities", "\n") if $etypes{'charent'} > 0; print F ("
    • ", $etypes{'msparam'}, " Marked section ", $etypes{'msparam'} == 1 ? "entity" : "entities", "\n") if $etypes{'msparam'} > 0; print F ("
    • ", $etypes{'gen'}, " general ", $etypes{'gen'} == 1 ? "entity" : "entities", "\n") if $etypes{'gen'} > 0; print F "
    \n"; print F "
  • $notcount ", $notcount == 1 ? "notation" : "notations\n"; print F "
\n"; print F "

It claims to be an "; if ($root->getAttribute('xml')) { print F "XML"; } else { print F "SGML"; } print F " DTD. Element "; print F "and notation " if $notcount > 0; print F "names are "; print F "not " if $root->getAttribute('namecase-general'); print F "case sensitive. Entity names are "; print F "not " if $root->getAttribute('namecase-entity'); print F "case sensitive.\n"; print F "

\n"; print F "
\n"; print F "HTML Presentation of "; print F $dtd->getDocumentElement()->getAttribute('title'); print F " by "; print F "DTDParse (version $main::VERSION).\n"; print F "\n"; print F "\n"; close (F); } 1; SGML-DTDParse-2.00/lib/SGML/DTDParse/Catalog.pm0100644004705000001440000001712310261624166017460 0ustar ehoodusers# -*- Perl -*- package SGML::DTDParse::Catalog; use strict; use vars qw($VERSION $CVS); $VERSION = do { my @r=(q$Revision: 2.1 $=~/\d+/g); sprintf "%d."."%03d"x$#r,@r }; $CVS = '$Id: Catalog.pm,v 2.1 2005/07/02 23:51:18 ehood Exp $ '; sub new { my $type = shift; my %param = @_; my $class = ref($type) || $type; my $self = bless {}, $class; $self->{'DIRECTIVE'} = []; $self->{'FILES'} = {}; $self->{'VERBOSE'} = $param{'Verbose'} || $param{'Debug'}; $self->{'DEBUG'} = $param{'Debug'}; my $catfiles = $main::ENV{'SGML_CATALOG_FILES'}; my @files = (); if ($catfiles =~ /;/) { @files = split(/;/, $catfiles); } else { @files = split(/:/, $catfiles); } foreach my $file (@files) { $self->parse($file); } return $self; } sub verbose { my $self = shift; my $val = shift; my $verb = $self->{'VERBOSE'}; $self->{'VERBOSE'} = $val if defined($val); return $verb; } sub debug { my $self = shift; my $val = shift; my $dbg = $self->{'DEBUG'}; $self->{'DEBUG'} = $val if defined($val); return $dbg; } sub parse { my $self = shift; my $file = shift; return 2 if $self->{'FILES'}->{$file}; $self->{'FILES'}->{$file} = 1; return $self->load_catalog($file); } sub _find { my $self = shift; my $type = shift; my $key = shift; foreach my $dir (@{$self->{'DIRECTIVE'}}) { my %hash = %{$dir}; return $hash{'FILE'} if $hash{'TYPE'} = $type && $hash{$type} eq $key; } return undef; } sub system_map { my($self, $sysid) = @_; return $self->_find('SYSID', $sysid) || $sysid; } sub public_map { my($self, $pubid) = @_; $pubid =~ s/\s+/ /g; return $self->_find('PUBID', $pubid); } sub reverse_public_map { my($self, $filename) = @_; $filename =~ s/\\/\//g; # canonical path separator foreach my $dir (@{$self->{'DIRECTIVE'}}) { my %hash = %{$dir}; my $key = $hash{'TYPE'}; next if $key ne 'PUBID'; # print "$key\n"; # print $hash{$key}, "\n"; # print $hash{'FILE'}, "\n"; # print "\t$filename\n\n"; return $hash{$key} if $hash{'FILE'} eq $filename; } return undef; } sub declaration { my($self, $pubid) = @_; $pubid =~ s/\s+/ /g; foreach my $dir (@{$self->{'DIRECTIVE'}}) { my %hash = %{$dir}; return $hash{'FILE'} if $hash{'TYPE'} eq 'DTDDECL' && $hash{'DTDDECL'} eq $pubid; return $hash{'FILE'} if $hash{'TYPE'} eq 'SGMLDECL'; } return undef; } sub load_catalog { my $self = shift; my $catalog = shift; my $drive = ""; my $dir = ""; my @directives = (); my $count = 0; local (*F, $_); print "Reading $catalog...\n" if $self->verbose(); $catalog =~ s/\\/\//g; # canonical path separators $dir = $1 if $catalog =~ /^(.*)\/[^\/]+$/; $drive = substr($dir, 0, 2) if substr($dir, 1, 1) eq ':'; if (!open(F, $catalog)) { print "Failed to open $catalog...\n" if $self->verbose(); return; } read (F, $_, -s $catalog); close (F); while (/^\s*(\S+)/s) { my $keyword = uc($1); $_ = $'; if ($keyword eq 'OVERRIDE') { $_ =~ /^\s*\S+/s; $_ = $'; next; } if ($keyword eq 'PUBLIC') { my($pubid, $filename); if (/^\s*[\"\']/s) { ($pubid, $_) = &parse_quoted_string("CATALOG", $_); } else { /^\s*(\S+)/s; $pubid = $1; $_ = $'; } if (/^\s*[\"\']/s) { ($filename, $_) = &parse_quoted_string("CATALOG", $_); } else { /^\s*(\S+)/s; $filename = $1; $_ = $'; } if ($filename =~ /^[a-z]:/s) { # nop } elsif ($filename =~ /^[\\\/]/) { $filename = $drive . $filename; } else { $filename = $dir . "/" . $filename if $dir ne ""; } $directives[$count] = {}; $directives[$count]->{'TYPE'} = 'PUBID'; $directives[$count]->{'PUBID'} = $pubid; $directives[$count]->{'FILE'} = $filename; $count++; # print "\"$pubid\" = \"$filename\"\n"; next; } if ($keyword eq 'SYSTEM') { my($sysid, $filename); if (/^\s*[\"\']/s) { ($sysid, $_) = &parse_quoted_string("CATALOG", $_); } else { /^\s*(\S+)/s; $sysid = $1; $_ = $'; } if (/^\s*[\"\']/s) { ($filename, $_) = &parse_quoted_string("CATALOG", $_); } else { /^\s*(\S+)/s; $filename = $1; $_ = $'; } if ($filename =~ /^[a-z]:/s) { # nop } elsif ($filename =~ /^[\\\/]/) { $filename = $drive . $filename; } else { $filename = $dir . "/" . $filename if $dir ne ""; } $directives[$count] = {}; $directives[$count]->{'TYPE'} = 'SYSID'; $directives[$count]->{'SYSID'} = $sysid; $directives[$count]->{'FILE'} = $filename; $count++; next; } if ($keyword eq 'DTDDECL') { my($pubid, $filename); if (/^\s*[\"\']/s) { ($pubid, $_) = &parse_quoted_string("CATALOG", $_); } else { /^\s*(\S+)/s; $pubid = $1; $_ = $'; } if (/^\s*[\"\']/s) { ($filename, $_) = &parse_quoted_string("CATALOG", $_); } else { /^\s*(\S+)/s; $filename = $1; $_ = $'; } if ($filename =~ /^[a-z]:/s) { # nop } elsif ($filename =~ /^[\\\/]/) { $filename = $drive . $filename; } else { $filename = $dir . "/" . $filename if $dir ne ""; } $directives[$count] = {}; $directives[$count]->{'TYPE'} = 'DTDDECL'; $directives[$count]->{'DTDDECL'} = $pubid; $directives[$count]->{'FILE'} = $filename; $count++; next; } if ($keyword eq 'SGMLDECL') { my($filename); if (/^\s*[\"\']/s) { ($filename, $_) = &parse_quoted_string("CATALOG", $_); } else { /^\s*(\S+)/s; $filename = $1; $_ = $'; } if ($filename =~ /^[a-z]:/s) { # nop } elsif ($filename =~ /^[\\\/]/) { $filename = $drive . $filename; } else { $filename = $dir . "/" . $filename if $dir ne ""; } $directives[$count] = {}; $directives[$count]->{'TYPE'} = 'SGMLDECL'; $directives[$count]->{'SGMLDECL'} = 'SGMLDECL'; $directives[$count]->{'FILE'} = $filename; $count++; next; } if ($keyword eq 'DOCTYPE') { my($tag, $filename); if (/^\s*[\"\']/s) { ($tag, $_) = &parse_quoted_string("CATALOG", $_); } else { /^\s*(\S+)/s; $tag = $1; $_ = $'; } if (/^\s*[\"\']/s) { ($filename, $_) = &parse_quoted_string("CATALOG", $_); } else { /^\s*(\S+)/s; $filename = $1; $_ = $'; } if ($filename =~ /^[a-z]:/s) { # nop } elsif ($filename =~ /^[\\\/]/) { $filename = $drive . $filename; } else { $filename = $dir . "/" . $filename if $dir ne ""; } # nop... next; } if ($keyword =~ /^\-\-/) { $_ = $keyword . $_; /^--.*?--/s; $_ = $'; next; } die "Don't know how to parse CATALOG keyword: $keyword\n"; } # now populate the real array; making sure that SGMLDECL goes to # the end of the array foreach my $dir (@directives) { my %hash = %{$dir}; next if $hash{'TYPE'} eq 'SGMLDECL'; push(@{$self->{'DIRECTIVE'}}, $dir); } foreach my $dir (@directives) { my %hash = %{$dir}; next if $hash{'TYPE'} ne 'SGMLDECL'; push(@{$self->{'DIRECTIVE'}}, $dir); } return 1; } sub strip_comment { my($text) = shift; while ($text =~ /^\s*--.*?--/s) { $text = $'; } return $text; } sub parse_quoted_string { my($decl, $entity) = @_; my($text); if ($entity =~ /^\s*\"/s) { die "Unparseable text: $decl\n" if $entity !~ /^\s*\"(.*?)\"/s; $text = $1; $entity = &strip_comment($'); } elsif ($entity =~ /^\s*\'/s) { die "Unparseable text: $decl\n" if $entity !~ /^\s*\'(.*?)\'/s; $text = $1; $entity = &strip_comment($'); } else { die "Unexpected text: $decl\n"; } return ($text, $entity); } 1; SGML-DTDParse-2.00/lib/SGML/DTDParse/Util.pm0100644004705000001440000000331110266076477017030 0ustar ehoodusers# # $Id: Util.pm,v 2.2 2005/07/16 03:21:35 ehood Exp $ package SGML::DTDParse::Util; use strict; use vars qw($VERSION $CVS @ISA @EXPORT_OK %EXPORT_TAGS); use Exporter; $VERSION = do { my @r=(q$Revision: 2.2 $=~/\d+/g); sprintf "%d."."%03d"x$#r,@r }; $CVS = '$Id: Util.pm,v 2.2 2005/07/16 03:21:35 ehood Exp $ '; @ISA = qw(Exporter); @EXPORT_OK = qw( entify ); %EXPORT_TAGS = ( ALL => qw( entify ), ); ############################################################################# sub entify { my $str = shift; return undef unless defined($str); $str =~ s/([&<>"])/sprintf("&#x%X;",ord($1))/ge; $str; } ############################################################################# 1; __END__ =head1 NAME SGML::DTDParse::Util - DTDParse utility routines. =head1 SYNOPSIS use SGML::DTDParse::Util; use SGML::DTDParse::Util qw(:ALL); =head1 DESCRIPTION B provides utility routines for DTDParse modules and scripts. =head1 ROUTINES By default, no routines are exported into the user's namespace. If importing is desired, individual routines can be specified in the C statement or the special tag C<:ALL> can be specified to import all routines. =over 4 =item entify $xml_str = entify($str); Replace special characters with entity references. The characters converted are C>, C>, C<&>, and C<"> (double-quote). =back =head1 SEE ALSO See L for an overview of the DTDParse package. =head1 AVAILABILITY EIE =head1 AUTHORS Earl Hood, Eearl@earlhood.comE. =head1 COPYRIGHT AND LICENSE See L for copyright and license information. SGML-DTDParse-2.00/lib/SGML/DTDParse/ContentModel.pm0100644004705000001440000001327210261624166020502 0ustar ehoodusers# -*- Perl -*- package SGML::DTDParse::ContentModel; use strict; use vars qw($VERSION $CVS); $VERSION = do { my @r=(q$Revision: 2.1 $=~/\d+/g); sprintf "%d."."%03d"x$#r,@r }; $CVS = '$Id: ContentModel.pm,v 2.1 2005/07/02 23:51:18 ehood Exp $ '; use strict; use Text::DelimMatch; use SGML::DTDParse::Tokenizer; require 5.000; require Carp; { package SGML::DTDParse::ContentModel::Group; sub new { my($type, $tok) = @_; my($class) = ref($type) || $type; my($self) = {}; my(@toks); my(@model); local($_); bless $self, $class; # print "Group:\n"; # $tok->print(); # print "\n"; foreach $_ ('CONTENT_MODEL_STRING', 'OCCURRENCE') { $self->{$_} = $tok->{$_}; } $self->{'CONNECTOR'} = ''; @toks = @{$tok->{'CONTENT_MODEL'}->{'MODEL'}}; if ($toks[1]) { # if there is a connector... if (ref $toks[1] eq 'SGML::DTDParse::Tokenizer::Connector') { $self->{'CONNECTOR'} = $toks[1]->{'CONNECTOR'}; } } $self->{'CONTENT_MODEL'} = new SGML::DTDParse::ContentModel $tok->{'CONTENT_MODEL'}; return $self; } sub content_model { my $self = shift; return $self->{'CONTENT_MODEL'}; } sub print { my($self, $depth) = @_; print "\t" x $depth, "(connector: ", $self->{'CONNECTOR'}, "\n"; $self->{'CONTENT_MODEL'}->print($depth+1); print "\t" x $depth, ")\n"; } sub xml { my($self, $depth) = @_; my($con) = $self->{'CONNECTOR'}; my($occ) = $self->{'OCCURRENCE'}; my($type) = ""; my($xml) = ""; $xml .= " " x $depth; if ($con eq '|') { $type = "or-group"; } elsif ($con eq '&') { $type = 'and-group'; } else { $type = 'sequence-group'; } if ($occ) { $xml .= "<$type occurrence=\"$occ\">\n"; } else { $xml .= "<$type>\n"; } $xml .= $self->{'CONTENT_MODEL'}->xml($depth+1,1); $xml .= " " x $depth; $xml .= "\n"; return $xml; } } { package SGML::DTDParse::ContentModel::Element; sub new { my($type, $tok) = @_; my($class) = ref($type) || $type; my($self) = {}; my($model); bless $self, $class; foreach $_ ('ELEMENT', 'OCCURRENCE') { $self->{$_} = $tok->{$_}; } return $self; } sub element { my $self = shift; return $self->{'ELEMENT'}; } sub print { my($self, $depth) = @_; print "\t" x $depth, $self->{'ELEMENT'}, $self->{'OCCURRENCE'}, "\n"; } sub xml { my($self, $depth) = @_; my($occ) = $self->{'OCCURRENCE'}; my($xml) = ""; $xml .= " " x $depth; if ($self->{'ELEMENT'} eq '#PCDATA') { $xml .= "\n"; } elsif ($self->{'ELEMENT'} eq 'ANY') { $xml .= "\n"; } elsif ($self->{'ELEMENT'} eq 'EMPTY') { $xml .= "\n"; } elsif ($self->{'ELEMENT'} eq 'CDATA') { $xml .= "\n"; } elsif ($self->{'ELEMENT'} eq 'RCDATA') { $xml .= "\n"; } else { $xml .= "{'ELEMENT'} . "\""; $xml .= " occurrence=\"$occ\"" if $occ; $xml .= "/>\n"; } return $xml; } } { package SGML::DTDParse::ContentModel::ParameterEntity; sub new { my($type, $tok) = @_; my($class) = ref($type) || $type; my($self) = {}; my($model); bless $self, $class; $self->{'PARAMETER_ENTITY'} = $tok->{'PARAMETER_ENTITY'}; return $self; } sub print { my($self, $depth) = @_; print "\t" x $depth, "%", $self->{'PARAMETER_ENTITY'}, ";\n"; } sub xml { my($self, $depth) = @_; my($xml) = ""; $xml .= " " x $depth; $xml .= "{'PARAMETER_ENTITY'} . "\""; $xml .= "/>\n"; return $xml; } } sub new { my($type, $model) = @_; my $class = ref($type) || $type; my $self = {}; my(@toks) = (); my(@model) = (); bless $self, $class; $self->{'CONTENT_MODEL_STRING'} = $model->{'CONTENT_MODEL_STRING'}; @toks = @{$model->{'MODEL'}}; # Note: we know that the first token will always be a group, unless # the content model is declard content. See new() in Tokenizer. # while (@toks) { my($tok) = shift @toks; if (ref $tok eq 'SGML::DTDParse::Tokenizer::Group') { push (@model, new SGML::DTDParse::ContentModel::Group $tok); } elsif (ref $tok eq 'SGML::DTDParse::Tokenizer::Element') { push (@model, new SGML::DTDParse::ContentModel::Element $tok); } elsif (ref $tok eq 'SGML::DTDParse::Tokenizer::ParameterEntity') { push (@model, new SGML::DTDParse::ContentModel::ParameterEntity $tok); } elsif (ref $tok eq 'SGML::DTDParse::Tokenizer::Connector') { #nop; } else { die "Bad token in SGML::DTDParse::ContentModel"; } } @{$self->{'MODEL'}} = @model; return $self; } sub type { my $self = shift; my $depth = shift; my @model = @{$self->{'MODEL'}}; $depth = 0 if !defined($depth); while (@model) { my $tok = shift @model; if ((ref $tok) =~ /Element$/) { return 'mixed' if $tok->element() eq '#PCDATA'; if ($depth == 0) { return 'cdata' if $tok->element() eq 'CDATA'; return 'rcdata' if $tok->element() eq 'RCDATA'; return 'empty' if $tok->element() eq 'RCDATA'; } } elsif ((ref $tok) =~ /Group$/) { my $cm = $tok->content_model(); return $cm->type($depth+1); } } return 'element'; } sub print { my($self) = shift; my($depth) = shift || 1; my(@model) = @{$self->{'MODEL'}}; local($_); foreach $_ (@model) { $_->print($depth); } } sub xml { my($self) = shift; my($depth) = shift || 1; my($internal) = shift; my(@model) = @{$self->{'MODEL'}}; my($xml) = ""; my($tag); local($_); if (!$internal) { $tag = $depth; $depth = 1; # $xml .= "<$tag string=\""; # $xml .= $self->{'CONTENT_MODEL_STRING'}; # $xml .= "\">\n"; } foreach $_ (@model) { $xml .= $_->xml($depth); } # if (!$internal) { # $xml .= "\n"; # } return $xml; } 1; SGML-DTDParse-2.00/lib/SGML/DTDParse.pm0100644004705000001440000001226210266076477016120 0ustar ehoodusers# # $Id: DTDParse.pm,v 2.2 2005/07/16 03:21:35 ehood Exp $ package SGML::DTDParse; $VERSION = "2.00"; sub Version { $VERSION; } require 5.005; ## General utilities for programs @SGML::DTDParse::CommonOptions = ( 'help', 'man', 'version', ); sub process_common_options { my $opts = shift; usage(-verbose => 0, -exitval => 0) if ($opts->{'version'}); usage(-verbose => 1, -exitval => 0) if ($opts->{'help'}); usage(-verbose => 2, -exitval => 0) if ($opts->{'man'}); } sub usage { require Pod::Usage; require FindBin; Pod::Usage::pod2usage( { -message => join('', 'Version: ', $FindBin::Script, ' v', $VERSION, "\n"), @_ }); } 1; __END__ =head1 NAME SGML::DTDParse - Parse an SGML or XML DTD =head1 SYNOPSIS use SGML::DTDParse; print "This is DTDParse v$SGML::DTDParse::VERSION\n"; =head1 DESCRIPTION The DTDParse collection is a set of Perl modules and scripts for manipulating SGML an XML Document Type Definitions (DTDs). DTDParse is designed primarily to aid in the understanding and documentation of DTDs. Typical usage of this package is as follows: =over =item 1. Parse the DTD with L. This produces an XML representation of the DTD. This representation exposes both the logical structure of the DTD (the actual meta-structure of its grove) and the organizational structure of the DTD (the declarations and parameter entities) that comprise its textual form. =item 2. Manipulate the XML document produced by dtdparse to do whatever you want. DTDParse is shipped with several programs that demonstrate various capabilities, including B which can produce HTML or DocBook L RefEntry pages for each element and parameter entity in the DTD. =back =head1 DTDParse XML DTD The following is the XML DTD for XML documents created with L (the DTD is also provided in the file C of the DTDParse distribution): =head1 SEE ALSO L, L, L, L, L =head1 PREREQUISITES The prerequisites listed are for all modules and scripts: B, B, B, B For prerequisites that apply for a specific script or module, see the individual scripts' and modules' reference pages. =head1 AVAILABILITY EIE =head1 AUTHORS DTDParse package originally developed by Norman Walsh, Endw@nwalsh.comE. Earl Hood, Eearl@earlhood.comE, picked up support and maintenance. =head1 COPYRIGHT AND LICENSE Copyright (C) 1999-2001, 2003 Norman Walsh Copyright (C) 2005, Earl Hood DTDParse may be copied only under the terms of either the Artistic License or the GNU General Public License, which may be found in the DTDParse distribution. SGML-DTDParse-2.00/t/0040755004705000001440000000000010266305433013072 5ustar ehoodusersSGML-DTDParse-2.00/t/SGML-DTDParse.t0100644004705000001440000000073510261624166015431 0ustar ehoodusers# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl SGML-DTDParse.t' ######################### # change 'tests => 1' to 'tests => last_test_to_print'; use Test::More tests => 1; BEGIN { use_ok('SGML::DTDParse') }; ######################### # Insert your test code below, the Test::More module is use()ed here so read # its man page ( perldoc Test::More ) for help writing this test script. SGML-DTDParse-2.00/t/SGML-DTDParse-DTD.t0100644004705000001440000000074610261624166016044 0ustar ehoodusers# Before `make install' is performed this script should be runnable with # `make test'. After `make install' it should work as `perl SGML-DTDParse-DTD.t' ######################### # change 'tests => 1' to 'tests => last_test_to_print'; use Test::More tests => 1; BEGIN { use_ok('SGML::DTDParse::DTD') }; ######################### # Insert your test code below, the Test::More module is use()ed here so read # its man page ( perldoc Test::More ) for help writing this test script. SGML-DTDParse-2.00/COPYING0100644004705000001440000004313110261624165013662 0ustar ehoodusers GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Library General Public License instead of this License. SGML-DTDParse-2.00/Changes0100644004705000001440000000376410266077520014134 0ustar ehoodusersRevision history for DTDParse ============================================================================ 2.00 Jul 16, 2005 (Earl Hood, earlearlhoodcom) This is the first non-beta release of the rewrite of dtdparse 0.97. The following highlights changes from beta releases: - The package root namespace has changed from XML:: to SGML::. This was mainly done to avoid conflicts and confusion with numerous XML:: modules on CPAN (including the closely named XML::DTDParser). Also, DTDParse does support SGML DTDs and XML is a formal subset of SGML, so using SGML:: seems appropriate. - The release contains several bug fixes from last beta release along with added documentation: POD has been added for all scripts and a general overview documentation is provided via the meta-module SGML::DTDParse. Documentation is still needed for the main modules. - A new script, dtddiff (along with dtddiff2html) has been added that performs a context-like diff between two parsed DTDs. - dtdparse script modified to behave more like common Unix programs with respect to input and output. If no DTD filename is provided on the command-line, the DTD will be read from standard-input. If --output is not specified, then XML output is sent to standard-out. This change allows dtdparse to be used in a command pipeline. - Files have been reorganized to facilitate installation via Perl's standard installation process. See README for details. - Version numbering modified to match style used by Perl modules: 2.0.0 => 2.00. - DTDParse is now distributed under the Artistic License or the GNU General Public License. See README, Artistic, and COPYING. - Miscellaneous bug fixes. - Primary maintenance of DTDParse has been transitioned from Norman Walsh to Earl Hood. ============================================================================ $Id: Changes,v 2.2 2005/07/16 03:30:24 ehood Exp $ SGML-DTDParse-2.00/etc/0040755004705000001440000000000010266305433013402 5ustar ehoodusersSGML-DTDParse-2.00/etc/dtd.dtd0100644004705000001440000000444010261624165014652 0ustar ehoodusers SGML-DTDParse-2.00/etc/gen-html-doc.pl0100755004705000001440000000737410266101106016217 0ustar ehoodusers#!/usr/bin/perl -w # $Id: gen-html-doc.pl,v 2.1 2005/07/16 03:43:02 ehood Exp $ # Description: Script to convert POD to HTML # use Cwd; use File::Find; use File::Path; use Getopt::Long; use Pod::Find qw(pod_find); use Pod::Html; # Script globals/defaults my $cwd = getcwd; my $infile_path_root = $cwd; my $outfile_path_root = join('/', $cwd, 'doc', 'html'); my $toc_file = join('/', $outfile_path_root, 'index.html'); my $pod_cache_dir = join('/', $cwd, 'doc'); my @pod_dirs = ('bin', 'lib'); MAIN: { # Get command-line options my %opt = ( ); GetOptions(\%opt, 'inroot=s', 'outroot=s', 'cachedir=s', 'poddir=s@', ); $infile_path_root = Cwd::abs_path($opt{'inroot'}) if defined($opt{'inroot'}); $outfile_path_root = Cwd::abs_path($opt{'outroot'}) if defined($opt{'outroot'}); @pod_dirs = @{$opt{'poddir'}} if defined($opt{'poddir'}); $pod_cache_dir = Cwd::abs_path($opt{'cachedir'}) if defined($opt{'cachedir'}); $toc_file = join('/', $outfile_path_root, 'index.html'); my %pods = pod_find({ -verbose => 0 }, 'bin', 'lib'); foreach my $key (sort keys %pods) { convert_pod($key, $pods{$key}); } build_toc(); } ############################################################################# sub convert_pod { my $podfile = shift; my $title = shift; if ($podfile =~ m{$infile_path_root/(.*)/(.*?)(\.p(?:[lm]|od))?$}i) { my $path = $1; my $name = $2; my $ext = $3 || ""; my $html_root = $path; $html_root =~ s/[^\/]+/../g; print STDOUT "Htmlizing $path/$name$ext\n"; mkpath(join('/', $outfile_path_root, $path)); pod2html( '--cachedir='.$pod_cache_dir, '--infile='.$podfile, '--outfile='.join('/', $outfile_path_root, $path, $name.'.html'), '--podroot='.$infile_path_root, '--podpath=bin:lib', '--htmlroot='.$html_root, #'--title='.$title, '--header', '--quiet', ); } } sub build_toc { my $title = shift || "Package Documentation"; rename $toc_file, $toc_file.'.bak'; chdir $outfile_path_root; my %lib_docs = (); my %script_docs = (); my %docs = (); find(sub { return unless /\.html?$/; local *H; if (!open(H, $_)) { warn qq{Warning: Unable to open "$File::Find::name": $!\n}; return; } my $title = $File::Find::name; my $l; while (defined($l = )) { last if $l =~ /(.*?)}i) { $title = $1; last; } } close(H); if ($File::Find::name =~ m{(?:/|\b)lib/}) { $lib_docs{$File::Find::name} = $title; } elsif ($File::Find::name =~ m{(?:/|\b)(?:bin|scripts?)/}) { $script_docs{$File::Find::name} = $title; } else { $docs{$File::Find::name} = $title; } }, '.'); local *TOC; open(TOC, '>'.$toc_file) || die qq{Error: Unable to create "$toc_file": $!\n}; print TOC <<"EOT"; $title

$title

EOT if (%script_docs) { print TOC '

Scripts

', "\n"; print_doc_group(\*TOC, \%script_docs); } if (%lib_docs) { print TOC '

Modules

', "\n"; print_doc_group(\*TOC, \%lib_docs); } if (%docs) { print TOC '

Other

', "\n"; print_doc_group(\*TOC, \%docs); } print TOC < EOT close(TOC); unlink $toc_file.'.bak'; } sub print_doc_group { my $fh = shift; my $docs = shift; print $fh '
    ', "\n"; foreach my $doc (sort { $docs->{$a} cmp $docs->{$b} } keys %$docs) { print $fh '
  • ', $docs->{$doc}, '
  • ', "\n"; } print $fh '
', "\n"; } SGML-DTDParse-2.00/Artistic0100644004705000001440000001373710261624165014345 0ustar ehoodusers The "Artistic License" Preamble The intent of this document is to state the conditions under which a Package may be copied, such that the Copyright Holder maintains some semblance of artistic control over the development of the package, while giving the users of the package the right to use and distribute the Package in a more-or-less customary fashion, plus the right to make reasonable modifications. Definitions: "Package" refers to the collection of files distributed by the Copyright Holder, and derivatives of that collection of files created through textual modification. "Standard Version" refers to such a Package if it has not been modified, or has been modified in accordance with the wishes of the Copyright Holder as specified below. "Copyright Holder" is whoever is named in the copyright or copyrights for the package. "You" is you, if you're thinking about copying or distributing this Package. "Reasonable copying fee" is whatever you can justify on the basis of media cost, duplication charges, time of people involved, and so on. (You will not be required to justify it to the Copyright Holder, but only to the computing community at large as a market that must bear the fee.) "Freely Available" means that no fee is charged for the item itself, though there may be fees involved in handling the item. It also means that recipients of the item may redistribute it under the same conditions they received it. 1. You may make and give away verbatim copies of the source form of the Standard Version of this Package without restriction, provided that you duplicate all of the original copyright notices and associated disclaimers. 2. You may apply bug fixes, portability fixes and other modifications derived from the Public Domain or from the Copyright Holder. A Package modified in such a way shall still be considered the Standard Version. 3. You may otherwise modify your copy of this Package in any way, provided that you insert a prominent notice in each changed file stating how and when you changed that file, and provided that you do at least ONE of the following: a) place your modifications in the Public Domain or otherwise make them Freely Available, such as by posting said modifications to Usenet or an equivalent medium, or placing the modifications on a major archive site such as uunet.uu.net, or by allowing the Copyright Holder to include your modifications in the Standard Version of the Package. b) use the modified Package only within your corporation or organization. c) rename any non-standard executables so the names do not conflict with standard executables, which must also be provided, and provide a separate manual page for each non-standard executable that clearly documents how it differs from the Standard Version. d) make other distribution arrangements with the Copyright Holder. 4. You may distribute the programs of this Package in object code or executable form, provided that you do at least ONE of the following: a) distribute a Standard Version of the executables and library files, together with instructions (in the manual page or equivalent) on where to get the Standard Version. b) accompany the distribution with the machine-readable source of the Package with your modifications. c) give non-standard executables non-standard names, and clearly document the differences in manual pages (or equivalent), together with instructions on where to get the Standard Version. d) make other distribution arrangements with the Copyright Holder. 5. You may charge a reasonable copying fee for any distribution of this Package. You may charge any fee you choose for support of this Package. You may not charge a fee for this Package itself. However, you may distribute this Package in aggregate with other (possibly commercial) programs as part of a larger (possibly commercial) software distribution provided that you do not advertise this Package as a product of your own. You may embed this Package's interpreter within an executable of yours (by linking); this shall be construed as a mere form of aggregation, provided that the complete Standard Version of the interpreter is so embedded. 6. The scripts and library files supplied as input to or produced as output from the programs of this Package do not automatically fall under the copyright of this Package, but belong to whoever generated them, and may be sold commercially, and may be aggregated with this Package. If such scripts or library files are aggregated with this Package via the so-called "undump" or "unexec" methods of producing a binary executable image, then distribution of such an image shall neither be construed as a distribution of this Package nor shall it fall under the restrictions of Paragraphs 3 and 4, provided that you do not represent such an executable image as a Standard Version of this Package. 7. C subroutines (or comparably compiled subroutines in other languages) supplied by you and linked into this Package in order to emulate subroutines and variables of the language defined by this Package shall not be considered part of this Package, but are the equivalent of input as in Paragraph 6, provided these subroutines do not change the language in any way that would cause it to fail the regression tests for the language. 8. Aggregation of this Package with a commercial distribution is always permitted provided that the use of this Package is embedded; that is, when no overt attempt is made to make this Package's interfaces visible to the end user of the commercial distribution. Such use shall not be construed as a distribution of this Package. 9. The name of the Copyright Holder may not be used to endorse or promote products derived from this software without specific prior written permission. 10. THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. The End SGML-DTDParse-2.00/bin/0040755004705000001440000000000010266305433013377 5ustar ehoodusersSGML-DTDParse-2.00/bin/dtddiff2html0100755004705000001440000000546510266076621015713 0ustar ehoodusers#!/usr/bin/perl # $Id: dtddiff2html,v 2.2 2005/07/16 03:22:57 ehood Exp $ # Author(s): Earl Hood, # POD at end of file. use Getopt::Long; use SGML::DTDParse; MAIN: { my %opts = (); GetOptions(\%opts, @SGML::DTDParse::CommonOptions ) || SGML::DTDParse::usage(-verbose => 0, -exitval => 1); SGML::DTDParse::process_common_options(\%opts); print < Diff
EOT

  my $in_chng = 0;
  while (<>) {
    chomp;
    if (/^[!]/) {
      if (/::=/) {
        print '', entify($_), "\n";
        $in_chng = 1;
        next;
      } else {
        print '', entify($_), "\n";
        next;
      }
    }

    if (/^(---|[*+\-])/) {
      if ($in_chng) {
        print "";
        $in_chng = 0;
      }
      elsif ($1 eq '---' || $1 eq '*') {
        print '', entify($_), "\n";
      }
      elsif ($1 eq '+') {
        print '', entify($_), "\n";
      }
      elsif ($1 eq '-') {
        print '', entify($_), "\n";
      }
      next;
    }

    if ($in_chng && !/\S/) {
      print "";
      $in_chng = 0;
    }

    print entify($_), "\n";
  }

  print qq{
\n}; } ############################################################################## sub entify { my $txt = shift; $txt =~ s/&/&/g; $txt =~ s//>/g; $txt; } ############################################################################## __END__ =head1 NAME dtddiff2html - Convert DTD difference to HTML. =head1 SYNOPSIS dtddiff parsed-dtd1.xml parsed-dtd2.xml > dtd-diff.txt dtddiff2html dtd-diff.txt > dtd-diff.html dtddiff parsed-dtd1.xml parsed-dtd2.xml | dtddiff2html > dtd-diff.html =head1 DESCRIPTION B converts the diff-style output of B into HTML. The HTML created basically provides color-highlighting of the diff output. =head1 OPTIONS =over 4 =item --version Print version and synopsis. =item --help Print synopsis and options available. =item --man Print manual page. =back =head1 SEE ALSO L See L for an overview of the DTDParse package. =head1 PREREQUISITES B =head1 AVAILABILITY EIE =head1 AUTHORS Earl Hood, =head1 COPYRIGHT AND LICENSE See L for copyright and license information. SGML-DTDParse-2.00/bin/dtddiff0100755004705000001440000010055410266076621014737 0ustar ehoodusers#!/usr/bin/perl -w # $Id: dtddiff,v 2.2 2005/07/16 03:22:57 ehood Exp $ # Author(s): Earl Hood, # POD at end of file. use XML::Parser; use File::Basename; use Getopt::Long; use SGML::DTDParse; MAIN: { my %opts = ( 'attributes' => 1, 'content-model-expanded' => 1, 'dump' => 0, 'elements' => 1, 'general-ents' => 0, 'param-ents' => 0, ); GetOptions(\%opts, 'attributes!', # Show attribute differences 'content-model-expanded!', # Show expanded content-models 'elements!', # Show element differences 'general-ents!', # Show general entity differences 'param-ents!', # Show parameter entity differences 'dump', @SGML::DTDParse::CommonOptions ) || SGML::DTDParse::usage(-verbose => 0, -exitval => 1); SGML::DTDParse::process_common_options(\%opts); my $outfh = \*STDOUT; select($outfh); my $file1 = shift @ARGV; if (!defined($file1)) { usage(-verbose => 0, -exitval => 1, -message => 'Error: No input file(s) specified'); } my $file2 = shift @ARGV; if (!$opts{'dump'} && !defined($file2)) { usage(-verbose => 0, -exitval => 1, -message => 'Error: Second input file not specified'); } my $dtd1 = read_xml($file1); my $dtd2 = read_xml($file2) unless $opts{'dump'}; if ($opts{'dump'}) { dump_dtd_info($dtd1); last MAIN; } my $title1 = $dtd1->{'title'}; my $title2 = $dtd2->{'title'}; $title1 = basename($file1, '.xml') unless defined $title1; $title2 = basename($file2, '.xml') unless defined $title2; my @param_subtracted = ( ); my @param_added = ( ); my @param_diff = ( ); my @elems_subtracted = ( ); my @elems_added = ( ); my @elems_diff = ( ); my $elem_diff_rec; my $gi; my $aname; if ($opts{'param-ents'}) { # Check for param ents substracted foreach $gi (sort keys %{$dtd1->{'entities'}{'param'}}) { if (!defined($dtd2->{'entities'}{'param'}{$gi})) { push(@param_subtracted, $gi); } } # Check for param ents added foreach $gi (sort keys %{$dtd2->{'entities'}{'param'}}) { if (!defined($dtd1->{'entities'}{'param'}{$gi})) { push(@param_added, $gi); next; } # XXX: Should comparison be case-sensitive? Configurable? my $text1 = lc $dtd1->{'entities'}{'param'}{$gi}{'text-expanded'}; my $text2 = lc $dtd2->{'entities'}{'param'}{$gi}{'text-expanded'}; $text1 =~ s/^\s+//; $text1 =~ s/\s+\z//; $text2 =~ s/^\s+//; $text2 =~ s/\s+\z//; if ($text1 ne $text2) { push(@param_diff, $gi); } } } if ($opts{'elements'} || $opts{'attributes'}) { # Check for elements substracted if ($opts{'elements'}) { foreach $gi (sort keys %{$dtd1->{'elements'}}) { if (!defined($dtd2->{'elements'}{$gi})) { push(@elems_subtracted, $gi); } } } # Check for elements added and changed foreach $gi (sort keys %{$dtd2->{'elements'}}) { if (!defined($dtd1->{'elements'}{$gi})) { push(@elems_added, $gi) if ($opts{'elements'}); next; } $elem_diff_rec = { }; my $elem_info1 = $dtd1->{'elements'}{$gi}; my $elem_info2 = $dtd2->{'elements'}{$gi}; if ($opts{'elements'}) { my $model_type = $opts{'content-model-expanded'} ? 'content-model-expanded-tree' : 'content-model-tree'; my $cmp_model1 = sort_content_model_tree($elem_info1->{$model_type}); $cmp_model1 .= sort_content_model_tree($elem_info1->{'inclusion-tree'}) if $elem_info1->{'inclusion-tree'}; $cmp_model1 .= sort_content_model_tree($elem_info1->{'exclusion-tree'}) if $elem_info1->{'exclusion-tree'}; my $cmp_model2 = sort_content_model_tree($elem_info2->{$model_type}); $cmp_model2 .= sort_content_model_tree($elem_info2->{'inclusion-tree'}) if $elem_info2->{'inclusion-tree'}; $cmp_model2 .= sort_content_model_tree($elem_info2->{'exclusion-tree'}) if $elem_info1->{'exclusion-tree'}; # content models differ if ($cmp_model1 ne $cmp_model2) { $elem_diff_rec->{'name'} = $gi; $elem_diff_rec->{'model'} = 1; } } # check attributes if ($opts{'attributes'}) { my $attrs1 = $elem_info1->{'attributes'} || +{ }; my $attrs2 = $elem_info2->{'attributes'} || +{ }; my $attrs_added = [ ]; my $attrs_subtracted = [ ]; my $attrs_diff = [ ]; # attributes subtracted foreach $aname (sort keys %$attrs1) { if (!defined($attrs2->{$aname})) { push(@$attrs_subtracted, $aname); } } # attributes added and changed foreach $aname (sort keys %$attrs2) { if (!defined($attrs1->{$aname})) { push(@$attrs_added, $aname); next; } my $attr_info1 = $attrs1->{$aname}; my $attr_info2 = $attrs2->{$aname}; my $attr_value1 = $attr_info1->{'value'}; my $attr_value2 = $attr_info2->{'value'}; my $attr_def1 = $attr_info1->{'default'}; my $attr_def2 = $attr_info2->{'default'}; my $attr_type1 = $attr_info1->{'type'}; my $attr_type2 = $attr_info2->{'type'}; $attr_def1 = lc $attr_def1 if $dtd1->{'namecase-general'} && $attr_type1 !~ /^cdata$/i; $attr_def2 = lc $attr_def2 if $dtd2->{'namecase-general'} && $attr_type2 !~ /^cdata$/i; $attr_value1 = lc $attr_value1 if $dtd1->{'namecase-general'}; $attr_value2 = lc $attr_value2 if $dtd2->{'namecase-general'}; if ($attr_type1 ne $attr_type2 || $attr_value1 ne $attr_value2 || $attr_def1 ne $attr_def2 || $attr_info1->{'enumeration'} ne $attr_info2->{'enumeration'}) { push(@$attrs_diff, $aname); } } if (scalar(@$attrs_added)) { $elem_diff_rec->{'name'} = $gi; $elem_diff_rec->{'attr_added'} = $attrs_added; } if (scalar(@$attrs_subtracted)) { $elem_diff_rec->{'name'} = $gi; $elem_diff_rec->{'attr_subtracted'} = $attrs_subtracted; } if (scalar(@$attrs_diff)) { $elem_diff_rec->{'name'} = $gi; $elem_diff_rec->{'attr_diff'} = $attrs_diff; } } push(@elems_diff, $elem_diff_rec) if scalar(%$elem_diff_rec); } } # Print diff print $outfh "*** $title1\n"; print $outfh "--- $title2\n"; if ($opts{'param-ents'}) { if (@param_subtracted) { print $outfh ('*' x 15), " Parameter Entities Subtracted\n"; print $outfh "*** $title1 ****\n"; foreach $gi (@param_subtracted) { print $outfh '- ', $gi, "\n"; } } if (@param_added) { print $outfh ('*' x 15), " Parameter Entities Added\n"; print $outfh "--- $title2 ----\n"; foreach $gi (@param_added) { print $outfh '+ ', $gi, "\n"; } } if (@param_diff) { print $outfh ('*' x 15), " Parameter Entities Changed\n"; foreach $gi (@param_diff) { print $outfh "*** $title1 ****\n\n"; print $outfh '! %', $gi, "; = \n"; local $param_value = $dtd1->{'entities'}{'param'}{$gi}{'text-expanded'}; select((select($outfh), $~ = "PARAM_DIFF_CHNG", $= = 10000000, $: = "|&, \t\n" )[0]); write $outfh; print $outfh "\n--- $title2 ----\n\n"; print $outfh '! %', $gi, "; = \n"; $param_value = $dtd2->{'entities'}{'param'}{$gi}{'text-expanded'}; write $outfh; print $outfh "\n"; } } } # End: $opts{param-ents} if ($opts{'elements'} || $opts{'attributes'}) { if (@elems_subtracted) { print $outfh ('*' x 15), " Elements Subtracted\n"; print $outfh "*** $title1 ****\n"; foreach $gi (@elems_subtracted) { print $outfh '- ', $gi, "\n"; } } if (@elems_added) { print $outfh ('*' x 15), " Elements Added\n"; print $outfh "--- $title2 ----\n"; foreach $gi (@elems_added) { print $outfh '+ ', $gi, "\n"; } } if (@elems_diff) { print $outfh ('*' x 15), " Elements Changed\n"; foreach $elem_diff_rec (@elems_diff) { $gi = $elem_diff_rec->{'name'}; print $outfh "*** $title1 ****\n\n" if ($elem_diff_rec->{'model'}) || ($opts{'attributes'} && (($elem_diff_rec->{'attr_subtracted'}) || ($elem_diff_rec->{'attr_diff'}))); if ($elem_diff_rec->{'model'}) { print $outfh '! '; print_elem($outfh, $gi, $dtd1->{'elements'}{$gi}, $opts{'content-model-expanded'}, 2); } if ($opts{'attributes'}) { print $outfh "\n $gi Attributes:\n" if ($elem_diff_rec->{'attr_subtracted'}) || ($elem_diff_rec->{'attr_diff'}); if ($elem_diff_rec->{'attr_subtracted'}) { foreach $aname (@{$elem_diff_rec->{'attr_subtracted'}}) { print_attr_subtracted($outfh, $aname, $dtd1->{'elements'}{$gi}{'attributes'}{$aname}); } } if ($elem_diff_rec->{'attr_diff'}) { foreach $aname (@{$elem_diff_rec->{'attr_diff'}}) { print_attr_chng($outfh, $aname, $dtd1->{'elements'}{$gi}{'attributes'}{$aname}); } } } print $outfh "\n--- $title2 ----\n\n" if ($elem_diff_rec->{'model'}) || ($opts{'attributes'} && (($elem_diff_rec->{'attr_added'}) || ($elem_diff_rec->{'attr_diff'}))); if ($elem_diff_rec->{'model'}) { print $outfh '! '; print_elem($outfh, $gi, $dtd2->{'elements'}{$gi}, $opts{'content-model-expanded'}, 2); } if ($opts{'attributes'}) { print $outfh "\n $gi Attributes:\n" if ($elem_diff_rec->{'attr_added'}) || ($elem_diff_rec->{'attr_diff'}); if ($elem_diff_rec->{'attr_added'}) { foreach $aname (@{$elem_diff_rec->{'attr_added'}}) { print_attr_added($outfh, $aname, $dtd2->{'elements'}{$gi}{'attributes'}{$aname}); } } if ($elem_diff_rec->{'attr_diff'}) { foreach $aname (@{$elem_diff_rec->{'attr_diff'}}) { print_attr_chng($outfh, $aname, $dtd2->{'elements'}{$gi}{'attributes'}{$aname}); } } } print $outfh "\n"; } } } # End: $opts{elements} } ############################################################################## #** Read XML representation of DTD # # =param $file XML filename. # =return Reference to hash contain DTD data extracted. #* sub read_xml { my $file = shift; my $dtd = { filename => $file, }; # closure globals used in parsing my @element_stack = (); my @model_group = (); my $model_group = ""; my @first_in_group = (); my $first_in_group = 0; my $cur_dtd_elem = undef; my $cur_model = undef; my $cur_entity = undef; my @group_occurrence = (); my $group_occurrence = ""; my $tree_node = undef; my @node_stack = (); # Create parser with handlers my $parser = XML::Parser->new(Handlers => { ## Start tag handler ---------------------------------------------------- Start => sub { my $expat = shift; my $gi = shift; my %attr = @_; push(@element_stack, $gi); SW: { if ($gi eq 'dtd') { $dtd->{'namecase-general'} = $attr{'namecase-general'}; $dtd->{'title'} = $attr{'title'}; last SW; } if ($gi eq 'notation') { my $name = $attr{'name'}; $dtd->{'notations'}{$name} = { 'system' => $attr{'system'}, 'public' => $attr{'public'}, }; last SW; } if ($gi eq 'entity') { my $name = $attr{'name'}; my $type = $attr{'type'}; $cur_entity = { 'system' => $attr{'system'}, 'public' => $attr{'public'}, 'notation' => $attr{'notation'}, 'text-expanded' => '', 'text' => '', }; $dtd->{'entities'}{$type}{$name} = $cur_entity; last SW; } if ($gi eq 'element') { $cur_dtd_elem = $attr{'name'}; $cur_dtd_elem = lc($cur_dtd_elem) if ($dtd->{'namecase-general'}); $dtd->{'elements'}{$cur_dtd_elem}{'stagm'} = $attr{'stagm'}; $dtd->{'elements'}{$cur_dtd_elem}{'etagm'} = $attr{'etagm'}; $dtd->{'elements'}{$cur_dtd_elem}{'content-type'} = $attr{'content-type'}; last SW; } if ($gi eq 'content-model-expanded') { last SW unless defined $cur_dtd_elem; $dtd->{'elements'}{$cur_dtd_elem}{'content-model-expanded'} = ''; $cur_model = \$dtd->{'elements'}{$cur_dtd_elem} {'content-model-expanded'}; $tree_node = $dtd->{'elements'}{$cur_dtd_elem} {'content-model-expanded-tree'} = [ ]; last SW; } if ($gi eq 'content-model') { last SW unless defined $cur_dtd_elem; $dtd->{'elements'}{$cur_dtd_elem}{'content-model'} = ''; $cur_model = \$dtd->{'elements'}{$cur_dtd_elem}{'content-model'}; $tree_node = $dtd->{'elements'}{$cur_dtd_elem} {'content-model-tree'} = [ ]; last SW; } if ($gi eq 'inclusions') { last SW unless defined $cur_dtd_elem; $dtd->{'elements'}{$cur_dtd_elem}{'inclusions'} = ''; $cur_model = \$dtd->{'elements'}{$cur_dtd_elem}{'inclusions'}; $tree_node = $dtd->{'elements'}{$cur_dtd_elem}{'inclusion-tree'} = [ ]; last SW; } if ($gi eq 'exclusions') { last SW unless defined $cur_dtd_elem; $dtd->{'elements'}{$cur_dtd_elem}{'exclusions'} = ''; $cur_model = \$dtd->{'elements'}{$cur_dtd_elem}{'exclusions'}; $tree_node = $dtd->{'elements'}{$cur_dtd_elem}{'exclusion-tree'} = [ ]; last SW; } if ($gi eq 'sequence-group' || $gi eq 'or-group' || $gi eq 'and-group') { last SW if !defined($cur_dtd_elem) || !defined($cur_model); if (scalar(@first_in_group)) { $$cur_model .= $model_group if (!$first_in_group[$#first_in_group]); $first_in_group[$#first_in_group] = 0; } $$cur_model .= '('; $group_occurrence = $attr{'occurrence'} || ""; push(@group_occurrence, $group_occurrence); $model_group = $gi eq 'sequence-group' ? ',' : $gi eq 'or-group' ? '|' : '&'; push(@model_group, $model_group); push(@first_in_group, 1); my $new_node = [ $model_group, $group_occurrence ]; push(@$tree_node, $new_node); push(@node_stack, $tree_node); $tree_node = $new_node; last SW; } if ($gi eq 'element-name') { last SW if !defined($cur_dtd_elem) || !defined($cur_model); $elem_name = $attr{'name'}; $occurrence = $attr{'occurrence'} || ''; $elem_name = lc($elem_name) if ($dtd->{'namecase-general'}); $$cur_model .= $model_group if (!$first_in_group[$#first_in_group]); $$cur_model .= $elem_name . $occurrence; $first_in_group[$#first_in_group] = 0; push(@$tree_node, $elem_name.$occurrence); last SW; } if ($gi eq 'parament-name') { last SW if !defined($cur_dtd_elem) || !defined($cur_model); $$cur_model .= $model_group if (!$first_in_group[$#first_in_group]); $$cur_model .= '%'.$attr{'name'}.';'; $first_in_group[$#first_in_group] = 0; push(@$tree_node, '%'.$attr{'name'}.';'); last SW; } if ($gi eq 'pcdata') { last SW if !defined($cur_dtd_elem) || !defined($cur_model); $$cur_model .= $model_group if (!$first_in_group[$#first_in_group]); $$cur_model .= '#PCDATA'; $first_in_group[$#first_in_group] = 0; push(@$tree_node, '#PCDATA'); last SW; } if ($gi eq 'rcdata') { last SW if !defined($cur_dtd_elem) || !defined($cur_model); $$cur_model .= 'RCDATA'; push(@$tree_node, 'RCDATA'); last SW; } if ($gi eq 'cdata') { last SW if !defined($cur_dtd_elem) || !defined($cur_model); $$cur_model .= 'CDATA'; push(@$tree_node, 'CDATA'); last SW; } if ($gi eq 'empty') { last SW if !defined($cur_dtd_elem) || !defined($cur_model); $$cur_model .= 'EMPTY'; push(@$tree_node, 'EMPTY'); last SW; } if ($gi eq 'attlist') { $cur_dtd_elem = $attr{'name'}; $cur_dtd_elem = lc($cur_dtd_elem) if ($dtd->{'namecase-general'}); $dtd->{'elements'}{$cur_dtd_elem}{'attributes'} = { }; last SW; } if ($gi eq 'attribute') { last SW unless defined($cur_dtd_elem); my $attr_name = $attr{'name'}; $attr_name = lc($attr_name) if ($dtd->{'namecase-general'}); $dtd->{'elements'}{$cur_dtd_elem}{'attributes'}{$attr_name} = { type => $attr{'type'}, value => $attr{'value'}, default => $attr{'default'}, enumeration => ($attr{'enumeration'} || 'no'), }; last SW; } } # End: SW }, # End: Start Handler ## End tag handler ------------------------------------------------------ End => sub { my $expact = shift; my $gi = shift; my $name = pop(@element_stack); SW: { if ($gi eq 'entity') { $cur_entity->{'text-expanded'} =~ s/\s+/ /g; $cur_entity->{'text'} =~ s/\s+/ /g; $cur_entity = undef; last SW; } if ($gi eq 'element' || $gi eq 'attlist') { $cur_dtd_elem = undef; $cur_model = undef; last SW; } if ($gi eq 'content-model-expanded' || $gi eq 'content-model' || $gi eq 'inclusions' || $gi eq 'exclusions') { $cur_model = undef; $tree_node = undef; @node_stack = ( ); last SW; } if ($gi eq 'sequence-group' || $gi eq 'or-group' || $gi eq 'and-group') { pop(@model_group); $model_group = scalar($model_group) ? $model_group[-1] : ''; pop(@first_in_group); $$cur_model .= ')' . $group_occurrence if defined($cur_model); $tree_node = pop(@node_stack); pop(@group_occurrence); $group_occurrence = scalar(@group_occurrence) ? $group_occurrence[-1] : ''; last SW; } } # End: SW }, # End: End tag handler ## Character data handler ----------------------------------------------- Char => sub { my $expat = shift; my $string = shift; my $open_elem = $element_stack[-1]; SW: { if ($open_elem eq 'text-expanded') { $cur_entity->{'text-expanded'} .= $string; last SW; } if ($open_elem eq 'text') { $cur_entity->{'text'} .= $string; last SW; } } # End: SW }, # End: Character data handler }); $parser->parsefile($file); return $dtd; } # End: sub read_xml ##--------------------------------------------------------------------------## sub sort_content_model_tree { my $tree = shift; return '' unless defined $tree; return '' if (!scalar(@$tree)); return $tree->[0] if !ref($tree->[0]) && ($tree->[0] !~ /[,|&]/); my @items = @$tree; my $con = ref($items[0]) ? '' : shift(@items); my $occurrence = $con ? shift(@items) : ''; my @sort_items = ( ); foreach my $item (@items) { if (ref($item)) { push(@sort_items, sort_content_model_tree($item)); next; } push(@sort_items, $item); } @sort_items = sort { my $A = $a; my $B = $b; $A =~ s/[\(\)*?+]//g; $B =~ s/[\(\)*?+]//g; $A cmp $B; } @sort_items unless $con eq ','; my $text = ''; $text .= '(' if $con; $text .= join($con, @sort_items); $text .= ')' if $con; $text .= $occurrence; $text; } ##--------------------------------------------------------------------------## sub format_content_model { my $model = shift; my $indent = shift || 0; my $maxlen = shift || 65; my $tokens; if (ref($model) =~ /ARRAY/) { $tokens = $model; } else { $model =~ s/\s+//g; $tokens = [ split(/([,|&\(\)?*+])/, $model) ]; } my $nl = "\n" . (' ' x $indent); my $first = 1; my $open = 0; my $prev = ''; my $fmt = ''; my $len = 0; my($tmp); foreach my $token (@$tokens) { next unless $token =~ /\S/; if ($token eq '(') { if ($prev eq $token) { # Print consecutive ('s together $fmt .= $token; } else { # Else, start newline if ($first) { $first = 0; } else { $fmt .= $nl; } $fmt .= (' ' x $open) . $token; } $open++; # Increase group open counter $len = $open+1; # Adjust length of line counter next; # Goto next token } $len += length($token); if ($token eq '&' || # Put spaces around '&'. $token eq '|') { # Put spaces around '|'. $fmt .= ' ' . $token . ' '; $len += 2; } elsif ($token eq ',') { # Put space after ','. $fmt .= $token . ' '; ++$len; } elsif ($token eq ')') { $fmt .= $token; $open--; } elsif ($token =~ /[*+?]/) { $fmt .= $token; } elsif (($len+length($token)) > $maxlen) { $fmt .= $nl . (' ' x $open) . $token; $len = $open + length($token); } else { $fmt .= $token; } } continue { $prev = $token if $token =~ /\S/; } $fmt .= "\n"; $fmt; } ##--------------------------------------------------------------------------## sub print_elem { my $fh = shift; my $gi = shift; my $info = shift; my $expand = shift; my $indent = shift || 0; my $model_type = $expand ? 'content-model-expanded-tree' : 'content-model-tree'; $expand = 1 unless defined $expand; $indent += 4; print $fh "$gi ::=\n", (' ' x $indent), format_content_model(sort_content_model_tree( $info->{$model_type}), $indent); print $fh ' ' x ($indent-1), '+', format_content_model(sort_content_model_tree( $info->{'inclusion-tree'}), $indent), if $info->{'inclusion-tree'}; print $fh ' ' x ($indent-1), '-', format_content_model(sort_content_model_tree( $info->{'exclusion-tree'}), $indent), if $info->{'exclusion-tree'}; } ##--------------------------------------------------------------------------## sub print_attr_added { my $fh = shift; my $name = shift; my $attr_rec = shift; local($attr_name, $attr_type, $attr_default); set_attr_form_variables($name, $attr_rec); select((select($fh), $~ = "ATTR_DIFF_ADD", $= = 10000000 )[0]); write $fh; } sub print_attr_subtracted { my $fh = shift; my $name = shift; my $attr_rec = shift; local($attr_name, $attr_type, $attr_default); set_attr_form_variables($name, $attr_rec); select((select($fh), $~ = "ATTR_DIFF_SUB", $= = 10000000 )[0]); write $fh; } sub print_attr_chng { my $fh = shift; my $name = shift; my $attr_rec = shift; local($attr_name, $attr_type, $attr_default); set_attr_form_variables($name, $attr_rec); select((select($fh), $~ = "ATTR_DIFF_CHNG", $= = 10000000 )[0]); write $fh; } sub set_attr_form_variables { my $name = shift; # attribute name my $rec = shift; # attribute record my $enum = $rec->{'enumeration'}; $attr_name = $name; $attr_type = $enum eq 'yes' ? join(', ', split(' ', $rec->{'value'})) : $rec->{'value'}; $attr_default = $rec->{'default'}; if ($enum eq 'yes') { $attr_type = "[Enumeration] \n".$attr_type; } elsif ($enum eq 'notation') { $attr_type = "[Notation] \n".$attr_type; } if ($attr_default eq "") { $attr_default = $rec->{'type'}; } else { $attr_default = '"'.$attr_default.'"'; } } ##--------------------------------------------------------------------------## sub dump_dtd_info { my $dtd = shift; $= = 10000000; $: = "|,& \t\n"; $~ = "PARAM_DUMP"; foreach my $gi (sort keys %{$dtd->{'entities'}{'param'}}) { my $param_info = $dtd->{'entities'}{'param'}{$gi}; print '-' x 72, "\n"; print '%', $gi, " = \n"; if ($param_info->{'text-expanded'}) { local $param_value = $param_info->{'text-expanded'}; write; } else { if ($param_info->{'public'}) { print ' PUBLIC "', $param_info->{'public'}, '"', "\n"; } if ($param_info->{'system'}) { print ' PUBLIC "', $param_info->{'system'}, '"', "\n"; } } } $~ = "ATTR_DUMP"; foreach my $gi (sort keys %{$dtd->{'elements'}}) { print '-' x 72, "\n"; my $elem_info = $dtd->{'elements'}{$gi}; print "$gi ::=\n ", format_content_model(sort_content_model_tree( $elem_info->{'content-model-tree'}), 4), "\n"; print "$gi(expanded) ::=\n ", format_content_model(sort_content_model_tree( $elem_info->{'content-model-expanded-tree'}), 4); print "\n" if $elem_info->{'inclusion-tree'} || $elem_info->{'exclusion-tree'}; print " +", format_content_model(sort_content_model_tree( $elem_info->{'inclusion-tree'}), 4), if $elem_info->{'inclusion-tree'}; print " -", format_content_model(sort_content_model_tree( $elem_info->{'exclusion-tree'}), 4), if $elem_info->{'exclusion-tree'}; my $attrs = $elem_info->{'attributes'}; print "\n Attributes:\n"; foreach my $attr (sort keys %$attrs) { my $enum = $attrs->{$attr}{'enumeration'}; local $attr_name = $attr; local $attr_type = $enum eq 'yes' ? join(', ', split(' ', $attrs->{$attr}{'value'})) : $attrs->{$attr}{'value'}; local $attr_default = $attrs->{$attr}{'default'}; if ($enum eq 'yes') { $attr_type = "[Enumeration] \n".$attr_type; } elsif ($enum eq 'notation') { $attr_type = "[Notation] \n".$attr_type; } if ($attr_default eq "") { $attr_default = $attrs->{$attr}{'type'}; } else { $attr_default = '"'.$attr_default.'"'; } write; } print "\n"; } } ############################################################################## format ATTR_DUMP = @<<<<<<<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<< $attr_name, $attr_type, $attr_default ~~ ^<<<<<<<<<<<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<< $attr_type, $attr_default . format ATTR_DIFF_SUB = - @<<<<<<<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<< $attr_name, $attr_type, $attr_default -~~ ^<<<<<<<<<<<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<< $attr_type, $attr_default . format ATTR_DIFF_ADD = + @<<<<<<<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<< $attr_name, $attr_type, $attr_default +~~ ^<<<<<<<<<<<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<< $attr_type, $attr_default . format ATTR_DIFF_CHNG = ! @<<<<<<<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<< $attr_name, $attr_type, $attr_default !~~ ^<<<<<<<<<<<<<<<<<<<<<<<< ^<<<<<<<<<<<<<<<<<<<<<< $attr_type, $attr_default . format PARAM_DUMP = ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< $param_value ~~ ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< $param_value . format PARAM_DIFF_CHNG = ! ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< $param_value !~~ ^<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< $param_value . __END__ =head1 NAME dtddiff - Compare two SGML/XML DTDs =head1 SYNOPSIS dtddiff [options] =head1 DESCRIPTION B compares two SGML/XML DTDs based upon the XML dumps generated by B. The following summarizes the typically usage of dtddiff: dtdparse --outfile parsed-dtd1.xml dtd1.dtd dtdparse --outfile parsed-dtd2.xml dtd2.dtd dtddiff parsed-dtd1.xml parsed-dtd2.xml > dtd.diff Since dtddiff processes the XML dumps from dtdparse, a full reparse of the DTDs is avoided. dtddiff does a structural-based comparision. Therefore, the order of declarations in the DTDs does not affect the comparison. The output generated by dtddiff is similiar in style to a context-based diff done by the program diff(1). The following is an example of the type of output generated: *** DocBook 4.1 DTD --- DocBook 4.2 DTD *************** Elements Added --- DocBook 4.2 DTD ---- + bibliocoverage + biblioid + bibliorelation + bibliosource + blockinfo + citebiblioid + coref + errortext + personblurb + personname + refsection + refsectioninfo + textdata *************** Elements Changed ... [snip] ... *** DocBook 4.1 DTD **** ! entrytbl ::= (colspec*, spanspec*, thead?, tbody) -(entrytbl) entrytbl Attributes: ! charoff NUTOKEN #IMPLIED ! colname NMTOKEN #IMPLIED ! cols NUMBER #REQUIRED ! colsep NUMBER #IMPLIED ! nameend NMTOKEN #IMPLIED ! namest NMTOKEN #IMPLIED ! rowsep NUMBER #IMPLIED ! spanname NMTOKEN #IMPLIED ! tgroupstyle NMTOKEN #IMPLIED --- DocBook 4.2 DTD ---- ! entrytbl ::= (colspec*, spanspec*, thead?, tbody) entrytbl Attributes: ! charoff CDATA #IMPLIED ! colname CDATA #IMPLIED ! cols CDATA #REQUIRED ! colsep CDATA #IMPLIED ! nameend CDATA #IMPLIED ! namest CDATA #IMPLIED ! rowsep CDATA #IMPLIED ! spanname CDATA #IMPLIED ! tgroupstyle CDATA #IMPLIED ... [snip] ... *** DocBook 4.1 DTD **** graphic Attributes: ! depth NUTOKEN #IMPLIED ! format [Enumeration] #IMPLIED ! BMP, CGM-CHAR, CGM- ! BINARY, CGM-CLEAR, ! DITROFF, DVI, EPS, EQN, ! FAX, GIF, GIF87a, GIF89a, ! JPG, JPEG, IGES, PCX, ! PIC, PNG, PS, SGML, TBL, ! TEX, TIFF, WMF, WPG, ! linespecific ! scale NUMBER #IMPLIED ! scalefit NUMBER #IMPLIED ! width NUTOKEN #IMPLIED --- DocBook 4.2 DTD ---- graphic Attributes: + contentdepth CDATA #IMPLIED + contentwidth CDATA #IMPLIED + valign [Enumeration] #IMPLIED + top, middle, bottom ! depth CDATA #IMPLIED ! format [Enumeration] #IMPLIED ! BMP, CGM-CHAR, CGM- ! BINARY, CGM-CLEAR, ! DITROFF, DVI, EPS, EQN, ! FAX, GIF, GIF87a, GIF89a, ! JPG, JPEG, IGES, PCX, ! PIC, PNG, PS, SGML, TBL, ! TEX, TIFF, WMF, WPG, SVG, ! linespecific ! scale CDATA #IMPLIED ! scalefit CDATA #IMPLIED ! width CDATA #IMPLIED ... [snip] ... Lines starting with a C<- > (minus followed by a space) denote items removed. Lines starting with a C<+ > (plus followed by a space) denote items added. Lines starting with a C (explanation point followed by a space) denote items changed. =head1 OPTIONS =over =item --attributes =item --noattributes Print, or not, element attribute differences. The default is to print differences. =item --content-model-expanded =item --nocontent-model-expanded Expand, or not expand, element content models during comparison. Expanded models have all parameter entities resolved. The default is to use expanded content model. =item --elements =item --noelements Print, or not, element content model differences. The default is to print differences. =item --general-ents =item --nogeneral-ents Print, or not, general entity differences. The default is to B print differences. =item --parameter-ents =item --noparameter-ents Print, or not, parameter entity differences. The default is to B print differences. =item --dump Do a textual dump of a DTD. When this option is specified, only a single DTD is dumped. This is mainly used for debugging purposes. =item --version Print version and synopsis. =item --help Print synopsis and options available. =item --man Print manual page. =back =head1 SEE ALSO L, L See L for an overview of the DTDParse package. =head1 PREREQUISITES B, B, B =head1 AVAILABILITY EIE =head1 AUTHORS Earl Hood, =head1 COPYRIGHT AND LICENSE See L for copyright and license information. SGML-DTDParse-2.00/bin/dtdflatten0100755004705000001440000002341510266076621015464 0ustar ehoodusers#!/usr/bin/perl -- # -*- Perl -*- # $Id: dtdflatten,v 2.2 2005/07/16 03:22:57 ehood Exp $ # Author(s): Norman Walsh, # Earl Hood, # POD at end of file. use strict; use Getopt::Long; use SGML::DTDParse; use SGML::DTDParse::DTD; my %option = ('debug' => 0, 'verbose' => 1, 'output' => '-', 'declaration' => ''); my %opt = (); &GetOptions( \%opt, 'debug+', 'verbose!', 'output=s', 'catalog=s@', 'preserve=s@', 'declaration=s', @SGML::DTDParse::CommonOptions ) || SGML::DTDParse::usage(-verbose => 0, -exitval => 1); SGML::DTDParse::process_common_options(\%opt); foreach my $key (keys %option) { $option{$key} = $opt{$key} if exists($opt{$key}); } my @catalogs = exists($opt{'catalog'}) ? @{$opt{'catalog'}} : (); my @preserve = exists($opt{'preserve'}) ? @{$opt{'preserve'}} : (); my $file = shift @ARGV; my $output = $option{'output'} || ''; my $dtd = new SGML::DTDParse::DTD ( 'Verbose' => $option{'verbose'}, 'Debug' => $option{'debug'}, 'SgmlCatalogFilesEnv' => $option{'use-sgml-catalog-files'}, 'SourceDtd' => $file, 'Declaration' => $option{'declaration'}); foreach my $catalog (@catalogs) { $dtd->parseCatalog($catalog); } $dtd->parse($file); my $out_fh = \*STDOUT; if ($output ne '') { use Symbol; $out_fh = gensym; open ($out_fh, ">$output") || die qq{Error: Unable to create "$output": $!\n}; $dtd->status("Writing $output...", 1); } my $declcount = $dtd->declaration_count(); my @decls = $dtd->declarations(); my %peindex = (); for (my $count = 0; $count < $declcount; $count++) { $peindex{$decls[$count]->name()} = $count if $decls[$count]->type() eq 'param'; } $dtd->status("$declcount declarations.", 1); $dtd->status("Calculating used entities...", 1); my %usedPE = (); foreach my $decl (@decls) { if ($decl->type() eq 'element') { my $cm = $decl->content_model(); while ($cm =~ /^(.*?)%(\S+?);/s) { my $pe = $2; $usedPE{$pe} = 0 if !exists($usedPE{$pe}); $usedPE{$pe}++; $cm = $'; } } elsif ($decl->type() eq 'attlist') { my $text = $decl->text(); while ($text =~ /^(.*?)%(\S+?);/s) { my $pe = $2; $usedPE{$pe} = 0 if !exists($usedPE{$pe}); $usedPE{$pe}++; $text = $'; } } } # Now we know which elements use them, let's recurse... my %checkedPE = (); my $changed = 1; while ($changed) { $changed = 0; foreach my $decl (@decls) { if ($decl->type() eq 'param') { my $name = $decl->name(); my $text = $decl->text(); if ($usedPE{$name} && !$checkedPE{$name}) { $checkedPE{$name} = 1; $changed = 1; while ($text =~ /^(.*?)%(\S+?);/s) { my $pe = $2; $usedPE{$pe} = 0 if !exists($usedPE{$pe}); $usedPE{$pe}++; $text = $'; } } } } } # now output the flattened DTD print $out_fh <<'EOT'; EOT print $out_fh "\n\n"; foreach my $decl (@decls) { if ($decl->type() eq 'element') { my $name = $decl->name(); my $cm = $decl->content_model(); $cm = &expandPE($cm); print $out_fh "\n"; } elsif ($decl->type() eq 'attlist') { my $name = $decl->name(); my $text = $decl->text(); $text = &expandPE($text); print $out_fh "\n"; } elsif ($decl->type() eq 'param') { my $name = $decl->name(); my $keep = 0; if ($usedPE{$name}) { foreach my $re (@preserve) { $keep = 1 if $name =~ /$re/; last if $keep; } if ($keep) { my $text = $decl->text(); $text = &expandPE($text); my $quote = '"'; if ($text =~ /\"/s) { $quote = "'"; $text =~ s/\'/\'/sg; } print $out_fh "\n"; } } } elsif ($decl->type() eq 'gen') { my $name = $decl->name(); my $public = $decl->public(); my $system = $decl->system(); if ($public || $system) { print $out_fh "\n"; } else { my $text = $decl->text(); $text = &expandPE($text); my $quote = '"'; if ($text =~ /\"/s) { $quote = "'"; $text =~ s/\'/\'/sg; } print $out_fh "\n"; } } elsif ($decl->type() eq 'sdata' || $decl->type() eq 'pi') { my $name = $decl->name(); print $out_fh "type()) . " "; my $text = $decl->text(); $text = &expandPE($text); my $quote = '"'; if ($text =~ /\"/s) { $quote = "'"; $text =~ s/\'/\'/sg; } print $out_fh "$quote$text$quote>\n"; } elsif ($decl->type() eq 'ndata' || $decl->type() eq 'cdata') { my $name = $decl->name(); my $public = $decl->public(); my $system = $decl->system(); print $out_fh "type()), " ", $decl->notation(); print $out_fh ">\n"; } elsif ($decl->type() eq 'notation') { my $name = $decl->name(); my $public = $decl->public(); my $system = $decl->system(); print $out_fh "\n"; } else { die "Error: Unexpected declaration type: " . $decl->type() . "\n"; } } close($out_fh) if $output; $dtd->status("Done.", 1); exit 0; # ================================================================= sub expandPE { my $text = shift; my $expanded = ""; while ($text =~ /%(\S+?);/s) { $expanded .= $`; my $post = $'; my $pe = $1; my $keep = 0; foreach my $re (@preserve) { $keep = 1 if $pe =~ /$re/; last if $keep; } if ($keep) { $expanded .= "%$pe;"; $text = $post; } else { my $index = $peindex{$pe}; die "Error: Unexpected PE: $pe\n" if !defined($index); $text = $decls[$index]->text() . $post; } } return $expanded . $text; } __END__ =head1 NAME dtdflatten - Flatten an SGML/XML DTD. =head1 SYNOPSIS dtdflatten [options] =head1 DESCRIPTION B parses a DTD and prints out a flatten/expanded version of it with all parameter entities expanded. The first non-option-related argument provided on the command-line specifies the file to parse. If no filename is given, then the DTD is read from standard input. The flatten DTD is printed to standard output unless the C<--output> option is specified. =head1 OPTIONS =over 4 =item --catalog Specify catalog files to parse for resolving external entity references. This option can be specified multiple times. B Currently, only SGML Open Catalog format is supported. XML Catalog support is not implemented (yet). =item --debug Extra debugging output. This option can be specified multiple times to increase the amount of output. Debugging output is sent to standard error. =item --declaration Specify the SGML declaration. The SGML declaration is parsed to determine the type of DTD being parsed, XML or SGML. The key parts of the SGML declaration examined are the NAMECASE and CHARSET directives to determine the DTD type. If no SGML declaration is available, the C<--xml>, C<--namecase-general>, and C<--namecase-entity> options can be used. =item --output Output file. If not specified, standard output is used. =item --preserve Preserve parameter entity declaration denoted by . This option can be specified multiple times. Note, if matches any portion of a parameter entity, the parameter entity declaration will be preserved. =item --verbose =item --noverbose Print parsing progress. By default, this option is enabled. Verbose output is sent to standard error. If C<--debug> is specified, then this option is automatically enabled. =item --version Print version and synopsis. =item --help Print synopsis and options available. =item --man Print manual page. =back =head1 SEE ALSO L See L for an overview of the DTDParse package. =head1 PREREQUISITES B, B, B =head1 AVAILABILITY EIE =head1 AUTHORS Originally developed by Norman Walsh, Endw@nwalsh.comE. Earl Hood Eearl@earlhood.comE picked up support and maintenance. =head1 COPYRIGHT AND LICENSE See L for copyright and license information. SGML-DTDParse-2.00/bin/dtdformat0100755004705000001440000006452510266076621015326 0ustar ehoodusers#!/usr/bin/perl -- # -*- Perl -*- # $Id: dtdformat,v 2.2 2005/07/16 03:22:57 ehood Exp $ # Author(s): Norman Walsh, # Earl Hood, # POD at end of file. # XXX: Format modules need to be converted into formal Perl modules # with API documented. (ehood) # NAMECASE YES means NO use strict; use vars qw($VERSION); use vars qw(@elements %elements %attlists); use vars qw(@entities %entities @notations %notations); use vars qw($usage %option %config $fileext $baseid); use vars qw($xmldtd $basedir $dtd); use vars qw(%ELEMBASE %ENTBASE %NOTBASE %ROOTS); use vars qw(%APPEARSIN %EAPPEARSIN %XAPPEARSIN); use vars qw(%PARENTS %CHILDREN); use vars qw(%ELEMINCL %ELEMEXCL %POSSINCL %POSSEXCL); use vars qw($expanded); use Getopt::Long; use SGML::DTDParse; use XML::DOM; # Copy version variable for use by formatting modules. $VERSION = $SGML::DTDParse::VERSION; $expanded = 'expanded'; $usage = "$0 version $VERSION\nUsage: $0 [ options ] dtd[.xml]\n"; %option = ('synopsis' => 1, 'content-model' => 1, 'attributes' => 1, 'inclusions' => 1, 'exclusions' => 1, 'tag-minimization' => 1, 'appears-in' => 1, 'description' => 1, 'attributes' => 1, 'parents' => 1, 'children' => 1, 'examples' => 1, 'base-dir' => "", 'base-id' => undef, 'debug' => 0, 'unexpanded' => 1, 'verbose' => 1, 'include-sdata' => 0, 'include-charent' => 0, 'include-ms' => 0, 'elements' => 1, 'entities' => 1, 'notations' => 1); %config = ('expanded-element-dir' => 'elements', 'unexpanded-element-dir' => 'dtdelem', 'expanded-entity-dir' => 'entities', 'unexpanded-entity-dir' => 'dtdent', 'notation-dir' => 'notations', 'home' => 'index' . $fileext, 'expanded-element-index' => "index" . $fileext, 'unexpanded-element-index' => "dtdelem" . $fileext, 'expanded-entity-index' => "entities" . $fileext, 'unexpanded-entity-index' => "dtdent" . $fileext, 'notation-index' => 'notations' . $fileext); my %opt = (); &GetOptions(\%opt, 'html', 'refentry', 'debug+', 'verbose!', 'synopsis!', 'content-model!', 'attributes!', 'inclusions!', 'exclusions!', 'tag-minimization!', 'include-sdata!', 'include-charent!', 'include-ms!', 'appears-in!', 'description!', 'attributes!', 'parents!', 'chilren!', 'examples!', 'library=s@', 'unexpanded!', 'base-dir=s', 'base-id=s', 'elements!', 'entities!', 'notations!', @SGML::DTDParse::CommonOptions ) || SGML::DTDParse::usage(-verbose => 0, -exitval => 1); SGML::DTDParse::process_common_options(\%opt); if ($opt{'html'} && $opt{'refentry'}) { die "Error: You can't specify both --html and --refentry.\n"; } if (!$opt{'html'} && !$opt{'refentry'}) { if ($0 =~ /html$/) { $opt{'html'} = 1; } elsif ($0 =~ /refentry$/ || $0 =~ /man$/) { $opt{'refentry'} = 1; } else { die "Error: You must specify either --html or --refentry.\n"; } } if ($opt{'html'}) { &status("Formatting HTML.",1); require 'SGML/DTDParse/Format/html.pl'; } elsif ($opt{'refentry'}) { &status("Formating DocBook RefEntrys.",1); require 'SGML/DTDParse/Format/refentry.pl'; } my @libraries = exists($opt{'library'}) ? @{$opt{'library'}} : (); if (@libraries) { foreach my $userlib (@libraries) { require $userlib; } } else { my $plain = "SGML/DTDParse/Format/plain.pl"; &status("Using plain library.",1); require $plain; } foreach my $key (keys %option) { $option{$key} = $opt{$key} if exists $opt{$key}; } if (!defined($option{'base-id'})) { $baseid = "dtdparse"; if ($opt{'refentry'}) { &status("No base-id specified, \"$baseid\" will be used.",1); } } else { $baseid = $option{'base-id'}; } select(STDOUT); $| = 1; $xmldtd = shift @ARGV || die $usage; $xmldtd .= ".xml" if ($xmldtd =~ /\.dtd$/) && -f $xmldtd . ".xml"; if (! -f $xmldtd) { $xmldtd .= ".xml" if -f $xmldtd . ".xml"; die "$0: cannot load $xmldtd\[.xml\].\n" if ! -f $xmldtd; } if ($option{'base-dir'} ne "") { $basedir = $option{'base-dir'}; } else { $basedir = $xmldtd; $basedir =~ s/\\/\//g; # foo\bar.dtd.xml => foo/bar.dtd.xml $basedir =~ s/^.*\/([^\/]+)$/$1/; # foo/bar.dtd.xml => bar.dtd.xml $basedir =~ s/^([^\.]+).*$/$1/; # bar.dtd.xml => bar $option{'base-dir'} = $basedir; } my $parser = new XML::DOM::Parser (NoExpand => 0); &status("Loading $xmldtd..."); $dtd = $parser->parsefile($xmldtd); foreach my $opt ('namecase-general', 'namecase-entity', 'unexpanded', 'xml') { $option{$opt} = $dtd->getDocumentElement()->getAttribute($opt); } &createDir ($basedir, 0755) if ! -d $basedir; &checkDir ($basedir); foreach my $key ('expanded-element-dir', 'expanded-entity-dir', 'notation-dir') { my $dir = $basedir . "/" . $config{$key}; &createDir ($dir, 0755) if ! -d $dir; &checkDir ($dir); } if ($option{'unexpanded'}) { foreach my $key ('unexpanded-element-dir', 'unexpanded-entity-dir') { my $dir = $basedir . "/" . $config{$key}; &createDir ($dir, 0755) if ! -d $dir; &checkDir ($dir); } } my $elemnodelist = $dtd->getElementsByTagName("element"); # Build a hash of element nodes, then a sorted list %elements = (); for (my $count = 0; $count < $elemnodelist->getLength(); $count++) { my $element = $elemnodelist->item($count); my $name = $element->getAttribute('name'); $name = lc($name) if $option{'namecase-general'}; $elements{$name} = $element; } @elements = sort { uc($a) cmp uc($b) } keys %elements; %ELEMBASE = &basenames(@elements); # Build a hash of entity nodes, then a sorted list my $entnodelist = $dtd->getElementsByTagName("entity"); %entities = (); for (my $count = 0; $count < $entnodelist->getLength(); $count++) { my $entity = $entnodelist->item($count); my $name = $entity->getAttribute('name'); $name = lc($name) if $option{'namecase-entity'}; $entities{$name} = $entity; } @entities = sort { uc($a) cmp uc($b) } keys %entities; %ENTBASE = &basenames(@entities); # Build a hash of notation nodes, then a sorted list my $notnodelist = $dtd->getElementsByTagName("notation"); %notations = (); for (my $count = 0; $count < $notnodelist->getLength(); $count++) { my $notation = $notnodelist->item($count); my $name = $notation->getAttribute('name'); $notations{$name} = $notation; } @notations = sort { uc($a) cmp uc($b) } keys %notations; %NOTBASE = &basenames(@notations); &status("Calculating parents and children..."); %PARENTS = (); %CHILDREN = (); %ELEMINCL = (); %ELEMEXCL = (); %POSSINCL = (); %POSSEXCL = (); foreach my $element (values %elements) { my $cm = $element->getElementsByTagName('content-model-expanded'); my $incl = $element->getElementsByTagName('inclusions'); my $excl = $element->getElementsByTagName('exclusions'); my $chlist = $cm->item(0)->getElementsByTagName('element-name'); my $pname = $element->getAttribute('name'); $pname = lc($pname) if $option{'namecase-general'}; for (my $chcount = 0; $chcount < $chlist->getLength(); $chcount++) { my $child = $chlist->item($chcount); my $cname = $child->getAttribute('name'); $cname = lc($cname) if $option{'namecase-general'}; $PARENTS{$cname} = {} if !exists($PARENTS{$cname}); $PARENTS{$cname}->{$pname} = 0 if !exists($PARENTS{$cname}->{$pname}); $PARENTS{$cname}->{$pname}++; $CHILDREN{$pname} = {} if !exists($CHILDREN{$pname}); $CHILDREN{$pname}->{$cname} = 0 if !exists($CHILDREN{$pname}->{$cname}); $CHILDREN{$pname}->{$cname}++; } if ($incl && $incl->getLength() > 0) { $chlist = $incl->item(0)->getElementsByTagName('element-name'); for (my $chcount = 0; $chcount < $chlist->getLength(); $chcount++) { my $child = $chlist->item($chcount); my $cname = $child->getAttribute('name'); $cname = lc($cname) if $option{'namecase-general'}; $ELEMINCL{$pname} = {} if !exists($ELEMINCL{$pname}); $ELEMINCL{$pname}->{$cname} = 1; } } if ($excl && $excl->getLength() > 0) { $chlist = $excl->item(0)->getElementsByTagName('element-name'); for (my $chcount = 0; $chcount < $chlist->getLength(); $chcount++) { my $child = $chlist->item($chcount); my $cname = $child->getAttribute('name'); $cname = lc($cname) if $option{'namecase-general'}; $ELEMEXCL{$pname} = {} if !exists($ELEMEXCL{$pname}); $ELEMEXCL{$pname}->{$cname} = 1; } } } # Now the fun part, recurse over all elements and propagate inclusions # and exclusions... &status("Propagating inclusions and exclusions..."); &propagateInclExcl(); # Calculate the root elements. %ROOTS = (); foreach my $element (values %elements) { my $pname = $element->getAttribute('name'); $pname = lc($pname) if $option{'namecase-general'}; $ROOTS{$pname} = $element if !exists($PARENTS{$pname}); } # Elements that are inclusions aren't roots my %allincl = (); foreach my $element (keys %POSSINCL) { my %incl = %{$POSSINCL{$element}}; foreach my $key (keys %incl) { $allincl{$key} = 1; } } foreach my $element (keys %allincl) { delete $ROOTS{$element} if exists $ROOTS{$element}; } &status("Finding Attribute Lists..."); %attlists = (); my $attlistnodelist = $dtd->getElementsByTagName("attlist"); for (my $count = 0; $count < $attlistnodelist->getLength(); $count++) { my $node = $attlistnodelist->item($count); my $name = $node->getAttribute('name'); $name = lc($name) if $option{'namecase-general'}; $attlists{$name} = $node; } #open (DEBUGFILE, ">dtdformat.debug"); %APPEARSIN = (); %EAPPEARSIN = (); %XAPPEARSIN = (); if ($option{'appears-in'}) { &status("Calculating appears-in..."); &calculateAppearsIn(); &calculateEntityAppearsIn(); } #print DEBUGFILE "APPEARSIN:\n"; #foreach my $key (keys %APPEARSIN) { # print DEBUGFILE " $key (APPEARSIN)\n"; # my %x = %{$APPEARSIN{$key}}; # foreach my $key2 (keys %x) { # print DEBUGFILE "\t$key2\n"; # } #} #print "\n"; # #print DEBUGFILE "EAPPEARSIN:\n"; #foreach my $key (keys %EAPPEARSIN) { # print DEBUGFILE " $key (EAPPEARSIN)\n"; # my %x = %{$EAPPEARSIN{$key}}; # foreach my $key2 (keys %x) { # print DEBUGFILE "\t$key2\n"; # } #} #print "\n"; # #print DEBUGFILE "XAPPEARSIN:\n"; #foreach my $key (keys %XAPPEARSIN) { # print DEBUGFILE " $key (XAPPEARSIN)\n"; # my %x = %{$XAPPEARSIN{$key}}; # foreach my $key2 (keys %x) { # print DEBUGFILE "\t$key2\n"; # } #} #print "\n"; # #close (DEBUGFILE); &status("Writing Index Pages..."); &writeElementIndexes($basedir); &writeEntityIndexes($basedir); &writeNotationIndexes($basedir); &writeIndex($basedir); if ($option{'unexpanded'}) { $expanded = 'unexpanded'; &writeElementIndexes($basedir); &writeEntityIndexes($basedir); $expanded = 'expanded'; } &status("Writing Elements...",1); for (my $count = 0; $option{'elements'} && ($count <= $#elements); $count++) { my $name = $elements[$count]; my $element = $elements{$name}; my $path = $basedir . "/" . $config{'expanded-element-dir'}; my $basename = $ELEMBASE{$name}; my $html = ""; &status($element->getAttribute('name')); $expanded = 'expanded'; $html = &formatElement($count); &writeElement($count, $path, $basename, $fileext, $html); if ($option{'unexpanded'}) { $expanded = 'unexpanded'; $path = $basedir . "/" . $config{'unexpanded-element-dir'}; $html = &formatElement($count); &writeElement($count, $path, $basename, $fileext, $html); } } &status("Writing Entities...",1); for (my $count = 0; $option{'entities'} && ($count <= $#entities); $count++) { my $name = $entities[$count]; my $entity = $entities{$name}; my $etype = &entityType($entity); my $path = $basedir . "/" . $config{'expanded-entity-dir'}; my $basename = $ENTBASE{$name}; my $html = ""; &status($entity->getAttribute('name')); $expanded = 'expanded'; $html = ""; if ($etype eq 'sdata') { $html = &formatEntity($count) if $option{'include-sdata'}; } elsif ($etype eq 'msparam') { $html = &formatEntity($count) if $option{'include-ms'}; } elsif ($etype eq 'charent') { $html = &formatEntity($count) if $option{'include-charent'}; } else { $html = &formatEntity($count); } &writeEntity($count, $path, $basename, $fileext, $html); if ($option{'unexpanded'}) { $expanded = 'unexpanded'; $path = $basedir . "/" . $config{'unexpanded-entity-dir'}; $html = ""; if ($etype eq 'sdata') { $html = &formatEntity($count) if $option{'include-sdata'}; } elsif ($etype eq 'msparam') { $html = &formatEntity($count) if $option{'include-ms'}; } else { $html = &formatEntity($count); } &writeEntity($count, $path, $basename, $fileext, $html); } } &status("Writing Notations...",1); $expanded = 'expanded'; for (my $count = 0; $option{'notations'} && ($count <= $#notations); $count++) { my $name = $notations[$count]; my $notation = $notations{$name}; my $path = $basedir . "/" . $config{'notation-dir'}; my $basename = $NOTBASE{$name}; my $html = ""; &status($notation->getAttribute('name')); $html = &formatNotation($count); &writeNotation($count, $path, $basename, $fileext, $html); } &status("Done.",1); print "\n"; exit; # ====================================================================== sub createDir { my $dir = shift; my $mode = shift; mkdir($dir,$mode); } sub checkDir { my $dir = shift; die "$0: Failed to create $dir.\n" if ! -d $dir; } sub writeElement { my $count = shift; my $path = shift; my $basename = shift; my $fileext = shift; my $html = shift; open (F, ">$path/" . $basename . $fileext); print F $html; close (F); } sub writeEntity { my $count = shift; my $path = shift; my $basename = shift; my $fileext = shift; my $html = shift; open (F, ">$path/" . $basename . $fileext); print F $html; close (F); } sub writeNotation { my $count = shift; my $path = shift; my $basename = shift; my $fileext = shift; my $html = shift; open (F, ">$path/" . $basename . $fileext); print F $html; close (F); } sub basenames { my @names = @_; my %basename = (); my %usedname = (); foreach my $name (@names) { my $count = 2; my $bname = lc($name); if ($usedname{$bname}) { $bname = lc($name) . $count; while ($usedname{$bname}) { $bname++; } } $basename{$name} = $bname; $usedname{$name} = 1; } return %basename; } sub entityType { my $ent = shift; my $textnl = $ent->getElementsByTagName("text"); my $text = $textnl->item(0); my $type = $ent->getAttribute('type'); if ($type eq 'param') { if ($ent->getAttribute('system') || $ent->getAttribute('public')) { $type = 'paramext'; } elsif ($text && $text->getFirstChild()) { my $data = $text->getFirstChild()->getData(); if ($data eq 'INCLUDE' || $data eq 'IGNORE') { $type = 'msparam'; } } } elsif (($type eq 'gen') || ($type eq 'cdata')) { if ($text && $text->getFirstChild()) { my $data = $text->getFirstChild()->getData(); if ($data =~ /^\&\#[xX][0-9A-F]+\;/i || $data =~ /^\&\#[0-9]+\;/i) { $type = 'charent'; } } } return $type; } # ====================================================================== sub propagateInclExcl { # For each element, look for inclusions on all its parents my $totelem = $#elements+1; my $count = 0; foreach my $name (@elements) { my %children = (); my %checked = (); my @tocheck = (); my %excl = (); my %incl = (); %children = %{$CHILDREN{$name}} if exists $CHILDREN{$name}; &status(sprintf("Propagating inclusions and exclusions: %5.1f%%", $count / $totelem * 100.0)); $count++; @tocheck = keys %{$PARENTS{$name}} if exists $PARENTS{$name}; while (@tocheck) { my $parent = shift @tocheck; if (exists $ELEMINCL{$parent}) { foreach my $element (keys %{$ELEMINCL{$parent}}) { $incl{$element} = 1; } } if (exists $ELEMEXCL{$parent}) { foreach my $element (keys %{$ELEMEXCL{$parent}}) { $excl{$element} = 1; } } if (exists $PARENTS{$parent}) { foreach my $element (keys %{$PARENTS{$parent}}) { push (@tocheck, $element) unless $checked{$element}; $checked{$element} = 1; } } } # Exclusions are only interesting if they're allowed as children. foreach my $element (keys %excl) { delete $excl{$element} if !exists $children{$element}; } if (%excl) { $POSSEXCL{$name} = {}; %{$POSSEXCL{$name}} = %excl; } # Inclusions are only interesting if they're not also excluded if (exists $ELEMEXCL{$name}) { foreach my $element (keys %incl) { delete $incl{$element} if exists $ELEMEXCL{$name}->{$element}; } } if (%incl) { $POSSINCL{$name} = {}; %{$POSSINCL{$name}} = %incl; } } # foreach my $name (@elements) { # my %incl = (); # my %iincl = (); # my %excl = (); # my %iexcl = (); # # %incl = %{$ELEMINCL{$name}} if exists $ELEMINCL{$name}; # %iincl = %{$POSSINCL{$name}} if exists $POSSINCL{$name}; # %excl = %{$ELEMEXCL{$name}} if exists $ELEMEXCL{$name}; # %iexcl = %{$POSSEXCL{$name}} if exists $POSSEXCL{$name}; # # print "\n$name:\n"; # print "\t I:", join(",", keys %incl), "\n"; # print "\tiI:", join(",", keys %iincl), "\n"; # print "\t E:", join(",", keys %excl), "\n"; # print "\tiE:", join(",", keys %iexcl), "\n"; # } } sub calculateAppearsIn { # Calculates where elements and parameter entities appear in # other parameter entities my $totent = $#entities + 1; my $count = 0; foreach my $entname (@entities) { my $entity = $entities{$entname}; my $expnl = $entity->getElementsByTagName("text-expanded"); my $uexpnl = $entity->getElementsByTagName("text"); my $node = undef; my $cnode = undef; my $text = undef; &status(sprintf("Calculating appears-in: %5.1f%%", $count / $totent * 100.0)); $count++; $node = $expnl->item(0) if $expnl; $cnode = $node->getFirstChild() if $node; $text = $cnode->getData() if $cnode; if (&cmFragment($text)) { while ($text =~ /[-a-z0-9.:_]+/is) { my $pre = $`; my $match = $&; $text = $'; my $name = $match; $name = lc($name) if $option{'namecase-general'}; $APPEARSIN{$name} = {} if !exists $APPEARSIN{$name}; $APPEARSIN{$name}->{$entname} = 1; # print DEBUGFILE "A: $name appears in $entname\n"; } } $text = undef; $node = $uexpnl->item(0) if $uexpnl; $cnode = $node->getFirstChild() if $node; $text = $cnode->getData() if $cnode; while ($text =~ /\%([^\s;]+);?/is) { my $pre = $`; my $match = $1; $text = $'; my $name = "%$match"; $APPEARSIN{$name} = {} if !exists $APPEARSIN{$name}; $APPEARSIN{$name}->{$entname} = 1; # print DEBUGFILE "A: $name appears in $entname\n"; } } } sub calculateEntityAppearsIn { # Calculates where parameter entities appear in element declarations # Note: for any given PE 'x', this function calculates the # elements that contain %x; directly (%EAPPEARSIN) and the elements # that contain %x; indirectly (%XAPPEARSIN). my $totelem = $#elements + 1; my $count = 0; foreach my $elemname (@elements) { my $element = $elements{$elemname}; my $cmlist = $element->getElementsByTagName('content-model'); &status(sprintf("Calculating entity appears-in: %5.1f%%", $count / $totelem * 100.0)); $count++; if ($cmlist->getLength() > 0) { my $cm = $cmlist->item(0); my $pelist = $cm->getElementsByTagName('parament-name'); for (my $cnt = 0; $cnt < $pelist->getLength(); $cnt++) { my $pename = $pelist->item($cnt); my $name = $pename->getAttribute('name'); if (!exists($EAPPEARSIN{"%$name"})) { $EAPPEARSIN{"%$name"} = {}; } $EAPPEARSIN{"%$name"}->{$elemname} = 1; # print DEBUGFILE "E: %$name appears in $elemname\n"; } } # Ok, if a PE appears in the ATTLIST decl we say it appears in # the element. This may not really work, but it seems so unlikely # that the same pe would be used in both, that I don't see the # harm. my $attlist = $attlists{$elemname}; if (defined($attlist)) { my $adlist = $attlist->getElementsByTagName('attdecl'); if ($adlist->getLength() > 0) { my $attdecl = $adlist->item(0); my $cnode = $attdecl->getFirstChild(); # will be only one! my $text = $cnode->getData() if $cnode; while ($text =~ /%([^\s;]+);?/is) { my $pe = $1; $text = $'; $EAPPEARSIN{"%$pe"} = {} if !exists($EAPPEARSIN{"%$pe"}); $EAPPEARSIN{"%$pe"}->{$elemname} = 1; # print DEBUGFILE "EA: %$pe appears in $elemname\n"; } } } } # Ok, now $APPEARSIN{'%x'} tells us what PEs %x appears in and # $EAPPEARSIN{'%x'} tells us what elements %x appears in. # Next we've got to calculate the complete set of all elements # that are influenced by %x. This is the elements that contain # PEs that contain %x or PEs that contain PEs that contain %x, etc. my $totent = $#entities + 1; $count = 0; foreach my $name (@entities) { &status(sprintf("Calculating extended entity appears-in: %5.1f%%", $count / $totent * 100.0)); $count++; # Any element that contains %x is influenced by %x foreach my $elemname (keys %{$EAPPEARSIN{"%$name"}}) { $XAPPEARSIN{"%$name"} = {} if !exists $XAPPEARSIN{"%$name"}; $XAPPEARSIN{"%$name"}->{$elemname} = 1; # print DEBUGFILE "X': %$name appears in $elemname\n"; } next if !$APPEARSIN{"%$name"}; # print DEBUGFILE "?: %$name appears in: "; my %toinspect = %{$APPEARSIN{"%$name"}}; # print DEBUGFILE join(", ", keys %toinspect), "\n"; my %inspected = (); while (%toinspect) { my $pe = (keys %toinspect)[0]; $inspected{$pe} = 1; delete($toinspect{$pe}); if (exists($EAPPEARSIN{"%$pe"})) { foreach my $elemname (keys %{$EAPPEARSIN{"%$pe"}}) { # # nwalsh: 11/04/1999 Why was this here? It short-circuits the whole process. # What was I trying to accomplish? # # my %eapp = %{$EAPPEARSIN{"%$pe"}}; # next if exists $eapp{$elemname}; $XAPPEARSIN{"%$name"} = {} if !exists $XAPPEARSIN{"%$name"}; $XAPPEARSIN{"%$name"}->{$elemname} = 1; # print DEBUGFILE "X: %$name appears in $elemname\n"; } } if ($APPEARSIN{"%$pe"}) { foreach my $entname (keys %{$APPEARSIN{"%$pe"}}) { $toinspect{$entname} = 1 if !$inspected{$entname}; } } } } } # ====================================================================== sub cmFragment { my $text = shift; my $cmfragment = 1; # if it contains a keyword, it's not a content model fragment. $cmfragment = 0 if $text =~ /\#implied|\#required|\#fixed/is; # if it contains characters that can't appear in a content # model fragment, then it isn't one. # The string #PCDATA is allowed, but would confuse us... $text =~ s/\#pcdata//isg; $cmfragment = 0 if $text =~ /[^\sa-z0-9_\|\,\&\(\)\*\?\+\-]/is; return $cmfragment; } # ====================================================================== my $lastmsglen = 0; my $persist = 0; sub status { my $msg = shift; my $shouldpersist = shift || $opt{'debug'}; return if !$option{'verbose'}; if ($persist) { print "\n"; $persist = 0; } else { print "\r"; print " " x $lastmsglen; print "\r"; } print $msg; $lastmsglen = length($msg); $persist = 1 if $shouldpersist || (length($msg) > 78); } # ====================================================================== __END__ =head1 NAME dtdformat - Read a DTDParse XML file and produce formatted documentation =head1 SYNOPSIS dtdformat [options] xmlfile =head1 DESCRIPTION B generated formatted documentation based upon a DTDParse XML file created by L. The following documentation formats are supported: =over 4 =item HTML Designated by the C<--html> option. =item DocBook Refentry Designated by the C<--refenty> option. =back One of the above formats must be specified, or dtdformat will abort with an error. =head1 OPTIONS =over 4 =item --appears-in =item --noappears-in Include what an entity and/or element appears in. The default is to include. =item --attributes =item --noattributes Include, or not include, element attributes in documentation. The default is to include. =item --base-dir Root directory to place documentation. =item --base-id I Entity name prefix for entities defined in DocBook RefEntry, C<--refentry>, output. If not specific, C is used. =item --chilren =item --nochilren Include, or not include, list of children for elements. The default is to include. =item --content-model Include, or not include, element content models in documentation. =item --debug Enable debugging output. =item --description Include, or not include, Description sections in documentation. The default is to include. =item --examples =item --noexamples Include, or not include, Example sections in documentation. The default is to include. =item --exclusions =item --noexclusions Include, or not include, element exclusions in documentation. The default is to include. =item --html Generate HTML documentation. =item --inclusions =item --noinclusions Include, or not include, element inclusions in documentation. The default is to include. =item --parents =item --noparents Include, or not include, possible element parents in documentation. The default is to include. =item --refentry Generate DocBook Refentry (manpage) documentation. =item --synopsis =item --nosynopsis Include, or not include, element synopses in documentation. The default is to include. =item --tag-minimization =item --notag-minimization Include, or not include, element tag minization settings in documentation. The default is to include. =item --unexpanded =item --nounexpanded Included, or not include, unexpanded content models in element documentation. The default is to include. =item --verbose =item --noverbose Output progress (the default). =item --version Print program version and synopsis. =item --help Print program synopsis and options available. =item --man Print program manual page. =back =head1 SEE ALSO L See L for an overview of the DTDParse package. =head1 PREREQUISITES B, B =head1 AVAILABILITY EIE =head1 AUTHORS Originally developed by Norman Walsh, Endw@nwalsh.comE. Earl Hood Eearl@earlhood.comE picked up support and maintenance. =head1 COPYRIGHT AND LICENSE See L for copyright and license information. SGML-DTDParse-2.00/bin/dtdparse0100755004705000001440000001355710266076621015147 0ustar ehoodusers#!/usr/bin/perl -- # -*- Perl -*- # $Id: dtdparse,v 2.2 2005/07/16 03:22:57 ehood Exp $ # Author(s): Norman Walsh, # Earl Hood, # POD at end of file. package Dtdparse; use strict; use vars qw($CVS); $CVS = '$Id: dtdparse,v 2.2 2005/07/16 03:22:57 ehood Exp $ '; use Getopt::Long; use SGML::DTDParse; use SGML::DTDParse::DTD; MAIN: { my %option = ('debug' => 0, 'verbose' => 1, 'title' => '?untitled?', 'unexpanded' => 1, 'public-id' => '', 'system-id' => '', 'namecase-general' => 1, 'namecase-entity' => 0, 'output' => '', 'xml' => 0, 'declaration' => ''); my %opt = (); &GetOptions( \%opt, 'debug+', 'verbose!', 'title=s', 'unexpanded!', 'catalog=s@', 'public-id=s', 'system-id=s', 'output=s', 'xml!', 'namecase-general!', 'namecase-entity!', 'declaration=s', @SGML::DTDParse::CommonOptions ) || SGML::DTDParse::usage(-verbose => 0, -exitval => 1); SGML::DTDParse::process_common_options(\%opt); foreach my $key (keys %option) { $option{$key} = $opt{$key} if exists($opt{$key}); } my @catalogs = exists($opt{'catalog'}) ? @{$opt{'catalog'}} : (); my $file = shift @ARGV; my $xmlfile = $option{'output'} || ''; warn "Warning: Title not specified\n" if !defined($option{'title'}); my $dtd = new SGML::DTDParse::DTD ( 'Verbose' => $option{'verbose'}, 'Debug' => $option{'debug'}, 'SgmlCatalogFilesEnv' => $option{'use-sgml-catalog-files'}, 'Title' => $option{'title'}, 'UnexpandedContent' => $option{'unexpanded'}, 'SourceDtd' => $file, 'Xml' => $option{'xml'}, 'NamecaseGeneral' => $option{'namecase-general'}, 'NamecaseEntity' => $option{'namecase-entity'}, 'PublicId' => $option{'public-id'}, 'SystemId' => $option{'system-id'}, 'Declaration' => $option{'declaration'} ); foreach my $catalog (@catalogs) { $dtd->parseCatalog($catalog); } $dtd->parse($file); my $out_fh = \*STDOUT; if ($xmlfile ne '') { use Symbol; $out_fh = gensym; open ($out_fh, ">$xmlfile") || die qq{Error: Unable to create "$xmlfile": $!\n}; $dtd->status("Writing $xmlfile...\n"); } $dtd->xml($out_fh); close($out_fh) if $xmlfile; $dtd->status("Done.\n"); } # End: MAIN ############################################################################## sub usage { require Pod::Usage; Pod::Usage::pod2usage(@_); } __END__ =head1 NAME dtdparse - Generate an XML representation of an SGML or XML DTD. =head1 SYNOPSIS dtdparse [options] [dtdfile] =head1 DESCRIPTION B parses an XML or SGML DTD and prints an XML representation of it. The XML version can be further processed by other tools to aid in the analysis and documentation of the DTD. The first non-option-related argument provided on the command-line specifies the file to parse. If no filename is given, then the DTD is read from standard input. The generated XML document is printed to standard output unless the C<--output> option is specified. =head1 OPTIONS =over 4 =item --catalog Specify catalog files to parse for resolving external entity references. This option can be specified multiple times. B Currently, only SGML Open Catalog format is supported. XML Catalog support is not implemented (yet). =item --debug Extra debugging output. This option can be specified multiple times to increase the amount of output. Debugging output is sent to standard error. =item --declaration Specify the SGML declaration. The SGML declaration is parsed to determine the type of DTD being parsed, XML or SGML. The key parts of the SGML declaration examined are the NAMECASE and CHARSET directives to determine the DTD type. If no SGML declaration is available, the C<--xml>, C<--namecase-general>, and C<--namecase-entity> options can be used. =item --namecase-general =item --nonamecase-general In the absence of an SGML declaration, these options specifiy if C is YES or NO. The default is YES unless C<--xml> is specified. =item --namecase-entity =item --nonamecase-entity In the absence of an SGML declaration, these options specifiy if C is YES or NO. The default is NO. =item --output Specifies the filename to send XML output. =item --public-id The DTD's public ID. =item --system-id The DTD's system ID. =item --title Set the title of the DTD. =item --unexpanded =item --nounexpanded Include, or do not include, unexpanded content models in generated XML. By default, unexpanded content models are included. It is common for DTD authors and maintainers to use parameter entities within content models. When this option is enabled, dtdparse will include a version of content models with parameter entities not expanded. =item --verbose =item --noverbose Print parsing progress. By default, this option is enabled. Verbose output is sent to standard error. If C<--debug> is specified, then this option is automatically enabled. =item --xml =item --noxml In the absence of an SGML declaration, this option specifies if the DTD is an XML DTD or an SGML DTD (the default is --noxml). =item --version Print version and synopsis. =item --help Print synopsis and options available. =item --man Print manual page. =back =head1 SEE ALSO L<dtdformat|dtdformat>, L<dtddiff|dtddiff>, L<dtdflatten|dtdflatten> See L<SGML::DTDParse|SGML::DTDParse> for an overview of the DTDParse package. =head1 PREREQUISITES B<Getopt::Long>, B<Text::DelimMatch> =head1 AVAILABILITY E<lt>I<http://dtdparse.sourceforge.net/>E<gt> =head1 AUTHORS Originally developed by Norman Walsh, E<lt>ndw@nwalsh.comE<gt>. Earl Hood E<lt>earl@earlhood.comE<gt> picked up support and maintenance. =head1 COPYRIGHT AND LICENSE See L<SGML::DTDParse|SGML::DTDParse> for copyright and license information. �������������������������������������������������������������������������������������������������������������������������������������������������SGML-DTDParse-2.00/MANIFEST�������������������������������������������������������������������������0100644�0047050�0000144�00000001030�10266076666�013764� 0����������������������������������������������������������������������������������������������������ustar �ehood���������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������Artistic bin/dtddiff bin/dtddiff2html bin/dtdflatten bin/dtdformat bin/dtdparse Changes COPYING etc/dtd.dtd etc/gen-html-doc.pl lib/SGML/DTDParse.pm lib/SGML/DTDParse/Catalog.pm lib/SGML/DTDParse/ContentModel.pm lib/SGML/DTDParse/DTD.pm lib/SGML/DTDParse/Format/html.pl lib/SGML/DTDParse/Format/plain.pl lib/SGML/DTDParse/Format/refentry.pl lib/SGML/DTDParse/Tokenizer.pm lib/SGML/DTDParse/Util.pm Makefile.PL MANIFEST This list of files META.yml Module meta-data (added by MakeMaker) README t/SGML-DTDParse-DTD.t t/SGML-DTDParse.t ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������SGML-DTDParse-2.00/META.yml�������������������������������������������������������������������������0100644�0047050�0000144�00000000723�10266305434�014100� 0����������������������������������������������������������������������������������������������������ustar �ehood���������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������# http://module-build.sourceforge.net/META-spec.html #XXXXXXX This is a prototype!!! It will change in the future!!! XXXXX# name: SGML-DTDParse version: 2.00 version_from: lib/SGML/DTDParse.pm installdirs: site requires: Getopt::Long: 0 Text::DelimMatch: 1.05 XML::DOM: 1.43 XML::Parser: 2.25 distribution_type: module generated_by: ExtUtils::MakeMaker version 6.30 ���������������������������������������������SGML-DTDParse-2.00/README���������������������������������������������������������������������������0100644�0047050�0000144�00000003317�10266077033�013512� 0����������������������������������������������������������������������������������������������������ustar �ehood���������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������DTDParse v2.00 ============== DTDParse is a tool for processing SGML and XML DTDs. The primary motivation for writing DTDParse was to provide a framework for building documentation for DTDs, but other applications are easy to imagine. Using DTDParse is a two-step process. First the DTD is parsed with 'dtdparse'. This produces an XML version of the DTD. Subsequent processing is performed against this XML version. More information about DTDParse is available in the SGML::DTDParse manual page. INSTALLATION To install this package type the following: perl Makefile.PL make make test make install You may need administrative priviledges to do the above. If you want to install in a separate location from perl's default, you can do something like the following: perl Makefile.PL PREFIX=/path/to/install DOCUMENTATION Manpages are included in the DTDParse installation. They can be accessed like any other Perl module or command. For example, perldoc SGML::DTDParse If you want to read the documentation before installation, you can generate HTML documentation by typing the following after running 'perl Makefile.PL' described above: make htmldoc HTML documentation will be available in doc/html. DEPENDENCIES DTDParse modules and scripts require these other modules and libraries: Getopt::Long Text::DelimMatch XML::Parser XML::DOM For prerequisites that apply for a specific script or module, see the individual scripts' and modules' reference pages. COPYRIGHT AND LICENSE Copyright (C) 1999-2001, 2003 Norman Walsh 2005, Earl Hood DTDParse may be copied only under the terms of either the Artistic License or the GNU General Public License, which may be found in the DTDParse distribution. �����������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������SGML-DTDParse-2.00/Makefile.PL����������������������������������������������������������������������0100644�0047050�0000144�00000003512�10266077435�014607� 0����������������������������������������������������������������������������������������������������ustar �ehood���������������������������users������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������use 5.000; use ExtUtils::MakeMaker; # See lib/ExtUtils/MakeMaker.pm for details of how to influence # the contents of the Makefile that is written. WriteMakefile( NAME => 'SGML::DTDParse', ABSTRACT => 'Parse SGML and XML DTDs', VERSION_FROM => 'lib/SGML/DTDParse.pm', PREREQ_PM => { Text::DelimMatch => 1.05, XML::Parser => 2.25, XML::DOM => 1.43, Getopt::Long => 0, }, EXE_FILES => [qw( bin/dtddiff bin/dtddiff2html bin/dtdflatten bin/dtdformat bin/dtdparse )], PMLIBDIRS => [ 'lib' ], 'dist' => { COMPRESS => 'gzip -9f', SUFFIX => 'gz', }, ); package MY; # Add removal of HTML docs to clean target sub clean { my $inherited = shift->SUPER::clean(@_); $inherited .= "\t".'$(RM_RF) doc/pod2htm* doc/html example'."\n"; $inherited; } # Create htmldoc target sub postamble { return <<EOT; MKPATH=\$(PERL) -MExtUtils::Command -e mkpath htmldoc: _FORCE \@echo "Generating HTML docs in docs/html..." -\$(MKPATH) doc/html \$(PERL) etc/gen-html-doc.pl \\ --inroot . \\ --outroot doc/html \\ --cachedir doc \\ --poddir bin \\ --poddir lib example: _FORCE -\$(MKPATH) example/dtdparse-dtd \\ example/dtdparse-dtd/html \\ example/dtdparse-dtd/refentry PERL5LIB=lib \$(PERL) bin/dtdparse \\ --public-id "-//Norman Walsh//DTD DTDParse V2.0//EN" \\ --system-id dtd.dtd \\ --title "DTDParse XML DTD" \\ --xml \\ --output example/dtdparse-dtd/dtd.xml \\ etc/dtd.dtd PERL5LIB=lib \$(PERL) bin/dtdformat \\ --base-dir example/dtdparse-dtd/html \\ --html \\ example/dtdparse-dtd/dtd.xml PERL5LIB=lib \$(PERL) bin/dtdformat \\ --base-dir example/dtdparse-dtd/refentry \\ --refentry \\ example/dtdparse-dtd/dtd.xml _FORCE: EOT } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������