flexml-1.9.6/0000755000175000017500000000000012101762535013324 5ustar mquinsonmquinsonflexml-1.9.6/flexml-act-bootstrap.c0000644000175000017500000000447412101762535017550 0ustar mquinsonmquinson/* Flex(1) XML processor action language application. * Copyright (c) 1999 Kristoffer Rose. All rights reserved. * * This file is part of the FleXML XML processor generator system. * Copyright (c) 1999 Kristoffer Rose. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., 59 * Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ #include #include #if defined(_WIN32) || defined(__WIN32__) || defined(WIN32) || defined(__TOS_WIN__) # ifndef __STRICT_ANSI__ # include # include # endif #else # include #endif #include "flexml-act.h" extern char *bufferstack; extern FILE* yyin; extern int yylineno; extern int yylex(void); char* filename; void STag_top(void) { printf("\n#line %d \"%s\"\n", yylineno, filename); } void ETag_top(void) { printf("%s\n", pcdata); } const char* tag; void STag_start(void) { printf("void STag_%s(void)\n{", tag = A_start_tag); printf("\n#line %d \"%s\"\n", yylineno, filename); } void ETag_start(void) { printf("%s\n} /* STag_%s */\n\n", pcdata, tag); } void STag_end(void) { printf("void ETag_%s(void)\n{", tag = A_end_tag); printf("\n#line %d \"%s\"\n", yylineno, filename); } void ETag_end(void) { printf("%s\n} /* ETag_%s */\n\n", pcdata, tag); } char mainmissing = 1; void STag_main(void) {} void ETag_main(void) { printf("\n#line %d \"%s\"\n", yylineno, filename); printf("%s\n", pcdata); mainmissing = 0; } void STag_actions(void) {} void ETag_actions(void) { if (mainmissing) { printf("/* Dummy main: filter XML from stdin. */\n"); printf("int main() { exit(yylex()); }\n"); } } int main(int argc, char** argv) { filename = argv[1]; yyin = fopen(filename,"r"); return yylex(); } flexml-1.9.6/Makefile0000644000175000017500000001025712101762535014771 0ustar mquinsonmquinson# Make(1) rules for FleXML XML processor generator system. # Copyright (c) 1999 Kristoffer Rose. All rights reserved. # # This file is part of the FleXML XML processor generator system. # Copyright (c) 1999 Kristoffer Rose. All rights reserved. # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., 59 # Temple Place, Suite 330, Boston, MA 02111-1307 USA. # $Id: Makefile,v 1.47 2013/01/29 14:35:24 mquinson Exp $ # SUFF (defined in Makefile.defs) is the versioning suffix added to binaries and # resource files. To get rid of it (and do a non-versioned install, # for example), do: # make whatever SUFF= # FILES. include Makefile.defs STUFF = GPL Makefile Makefile.defs flexml.pl BINS = $(FLEXML) LIBS = $(FLEXML_ACT) DATA = skel MANS = flexml.1 DOCS = README ChangeLog NEWS NOTES TODO flexml-act.dtd HTMLS = FleXML.html paper.html SRC = $(STUFF) $(DATA) $(DOCS) $(HTMLS) ALL = $(PROGS) $(LIBS) $(DATA) $(MANS) $(DOCS) $(HTMLS) .PHONY: all install dist test clean # PRIMARY TARGETS. all: $(ALL) install: $(ALL) mkdir -p $(DESTDIR)$(BINDIR) $(DESTDIR)$(LIBDIR) \ $(DESTDIR)$(MAN1DIR) $(DESTDIR)$(DOCDIR) \ $(DESTDIR)$(DATADIR) \ $(DESTDIR)$(DOCDIR)/html \ $(DESTDIR)$(DOCDIR)/examples $(INSTALL) -m555 $(BINS) $(DESTDIR)$(BINDIR)/ $(INSTALL) -m555 $(LIBS) $(DESTDIR)$(LIBDIR)/ $(INSTALL) -m444 $(DATA) $(DESTDIR)$(DATADIR)/ $(INSTALL) -m444 $(MANS) $(DESTDIR)$(MAN1DIR)/ $(INSTALL) -m444 $(DOCS) $(DESTDIR)$(DOCDIR)/ $(INSTALL) -m444 $(HTMLS) $(DESTDIR)$(DOCDIR)/html/ if test "x$(SUFF)" != "x" ; then \ rm -f $(DESTDIR)$(BINDIR)/flexml; \ ln -s $(DESTDIR)$(BINDIR)/$(FLEXML) $(DESTDIR)$(BINDIR)/flexml;\ fi $(MAKE) -C examples install FLEXML_DISTDIR = $(FLEXML)-$(VER) dist: test clean @echo "Building distribution..." mkdir $(FLEXML_DISTDIR) cp $(STUFF) $(DATA) $(DOCS) $(HTMLS) flexml-act-bootstrap.c \ $(FLEXML_DISTDIR) cp -r examples testbed $(FLEXML_DISTDIR) find $(FLEXML_DISTDIR) -name CVS | xargs rm -rf find $(FLEXML_DISTDIR) -name .cvsignore | xargs rm -rf tar cvfz $(FLEXML_DISTDIR).tar.gz $(FLEXML_DISTDIR) rm -rf $(FLEXML_DISTDIR) #rsync -v FleXML.html $(WEBHOME)/FleXML.html #rsync -va --cvs-exclude --delete-excluded ./ $(FTPHOME)/ clean::; @echo "Cleaning..." $(RM) -rf $(FLEXML_DIR).tar.gz $(FLEXML_DIR) $(RM) *.[olh1] *-dummy.? lex.* *~ ./#* find -name '*~' | xargs $(RM) test:: all @echo "Testing..." clean::; $(RM) $(FLEXML) $(FLEXML_ACT) flexml-act flexml-act.c $(FLEXML): flexml.pl sed \ -e "s;FLEXMLVERSION;$(VER);g" \ -e "s;[.][/]flexml-act;$(ACT);g" \ -e "s;[.][/]skel;$(SKEL);g" \ -e "s;/var/tmp;$(TMPDIR);g" \ -e "s;/usr/share/doc/;$(DOCDIR)/;g" flexml.pl > $@ chmod +x $@ ifneq ($(SUFF),) $(FLEXML_ACT): flexml-act cp flexml-act $@ endif # Action language... flexml-act.l: flexml-act.dtd skel $(PERL) ./flexml.pl $(FLEXDEBUG) -Lv -ractions -s skel $< flexml-act.c: flexml-act.l $(FLEX) -B -s -v -oflexml-act.c flexml-act.l flexml-act.o: flexml-act.c flexml-act.h flexml-act-bootstrap.o: flexml-act-bootstrap.c flexml-act.h flexml-act: flexml-act.o flexml-act-bootstrap.o clean::; $(RM) TAGS TAGS: $(SRC); etags $(SRC) ci: $(SRC); ci -u $(SRC) # DOCUMENTS. clean::; $(RM) flexml.html index.html pod2html-* flexml.1: $(FLEXML) $(POD2MAN) $(FLEXML) > $@ flexml.html: $(FLEXML) $(POD2HTML) < $(FLEXML) > $@ index.html: FleXML.html sed 's.ftp/FleXML/..g' FleXML.html > $@ # TESTS. clean::; $(MAKE) -C examples $@; $(MAKE) -C testbed $@ test::; $(MAKE) -C examples $@; \ $(MAKE) -C testbed $@ > test.out 2>&1; ! grep '^fail' test.out && \ echo "Tests succeeded." # END. clean::; @echo "Done cleaning." test::; @echo "Done testing." flexml-1.9.6/paper.html0000644000175000017500000004330612101762535015327 0ustar mquinsonmquinson Generating Fast Validating XML Processors

Generating Fast Validating XML Processors

(Extended Abstract)
Kristoffer Rose
LIP, ENS-Lyon
krisrose@debian.org
Abstract

We present FleXML, a program that generates fast validating XML processors from `self-contained' XML DTDs. It uses the flex (lexical analyser generator) program to translate the DTD into a finite automaton enriched with a stack with the `element context'. This means that the XML processor will act directly on each character received. The program is freely redistributable and modifyable (under GNU `copyleft').

Keywords

Validating XML, DTD, lexical analysis, finite automata.

Overview

The `X' of XML stands for Extensible [XML]. This signifies that each and every XML document specifies in its header the details of the format that it will use and may change its format a bit relative to the used base format.

This has influenced the tools available to write validating XML processors: they use a call-back model where the XML processor passes strings with the tags and attributes names and values to the application. These call-backs must be generic because one cannot know whether a document will start by extending its own notation with more tags and attributes. For well-formed but non-validated XML documents this makes a lot of sense, of course, but we would in general like to exploit the information in the DTD for optimizations.

In particular, for many applications a fixed format suffices in the sense that a single DTD is used without individual extensions for a large number of documents. In that case we can do much better because the possible tags and attributes are static.

We have implemented an XML processor generator using the Flex scanner generator that produces deterministic finite automata [ASU]. Which means that there is almost no overhead for XML processing: the generated XML processors read the XML document character by character and can immediately dispatch the actions associated with each element (or reject the document as invalid).

Furthermore we have devised a simple extension of the C programming language that facilitates the writing of `element actions' in C, making it easy to write really fast XML applications. In particular we represent XML attribute values efficiently in C when this is possible, thus avoiding the otherwise ubiquitous conversions between strings and data values.

FleXML is available for free (from SourceForge). In this paper we present FleXML through an elaborated example and discuss some of the technical issues.

What can it do?

Assume that we have an XML document my-joke.xml containing the classical joke

<!DOCTYPE joke SYSTEM "my.dtd">
<joke><line>My appartment is so small</line> <suspense/>
<line type='punch-line'>the mice are round-shouldered</line></joke>
(and many others like it, of course). We wish to create an XML processor to validate it with respect to the DTD in the file my.dtd containing
<!-- my.dtd: Small DTD for jokes (just for fun). -->
<!ELEMENT joke (line,(line|suspense)*)>
<!ELEMENT line (#PCDATA)>
<!ATTLIST line type (normal|question|punch-line) 'normal'>
<!ELEMENT suspense EMPTY>
and, furthermore, we wish to write an XML application for displaying such messages in an amusing way.

With FleXML this can be done by creating an `actions file' my-show.act which implements the desired actions for each element. The remainder of this section explains the contents of such an actions file.

An actions file is itself an XML document which must begin with

<!DOCTYPE actions SYSTEM "flexml-act.dtd">
<actions>
(the flexml-act.dtd DTD is part of the FleXML system and is reproduced in the manual page.

We decide that our application should react to a line element by printing the text inside it, and that it should differentiate between the three possible `type' attribute values by printing corresponding trailing punctuation.

This introduces a slight complication, because the attribute values are available when parsing the start tag whereas the element text is not available until we parse the end tag (where it has been read).

This means that we must declare a top-level variable.

<top><![CDATA[
char* terminator = ".";
]]></top>
Notice how we use CDATA sections to make sure that all characters (including white-space) are passed unmodified to the C compiler.

With this we can write the action to set it when reading the line start tag as

<start tag='line'><![CDATA[
  switch ( {type} ) {
    case {!type}: terminator = "..."; break;
    case {type=normal}: terminator = "."; break;
    case {type=question}: terminator = "??"; break;
    case {type=punch-line}: terminator = "!!"; break;
  }
]]></start>

The idea is that the enumeration attribute type is implemented in C as if it had been declared by

enum { {!type}, {type=normal}, {type=question}, {type=punch-line} } {type};
(understanding the {...} units as C identifiers), hence the possibility of using the fast C switch statement to pick the right choice directly. Note that the first choice, {!type}, corresponds to not setting the attribute; in this example the attribute has a default value so this can never happen, however, we include the choice anyway to prevent the C compiler from issuing warnings about missing choices in switch statements.

With this in place we can write the action for </line>. Since it prints something, however, we first need to add the appropriate header inclusion.

<top><![CDATA[
#include <stdio.h>
]]></top>

<end tag='line'><![CDATA[
  printf("%s%s\n", pcdata, terminator);
]]></end>

Finally, we will make the application amusing by `displaying' the <suspense/> empty tag as a short delay; this also involves a header inclusion.

<top><![CDATA[
#include <unistd.h>
]]></top>

<start tag='suspense'><![CDATA[
  sleep(2);
]]></start>

That is all; the only remaining thing is to terminate the action file properly.

</actions>

We can now run FleXML with the DTD and the actions file as input and will get an XML application as output that, when run (after processing by flex and a C compiler), will indeed print

My appartment is so small.
the mice are round-shouldered!!
as expected, pausing duly for two seconds between the lines. On the authors system the above output was achieved with the command sequence
flexml -A -a my-show.act my.dtd 
flex -omy-show.c my-show.l
cc -omy-show my-show.c
./my-show <./my-joke.xml
(see the manual page for the exact meaning of the FleXML options).

An important aspect of the design of FleXML is that the only thing that should matter to the programmer should be the complexity of the application, not of the used DTD. As an example the following action file prints the href attribute of all hyperlinks in an XHTML document:

<!DOCTYPE actions SYSTEM "flexml-act.dtd">
<actions>

<top><![CDATA[
#include <stdio.h>
]]></top>

<start tag='a'><![CDATA[
if ({href}) printf("%s\n", {href});
]]></start>

</actions>
which was compiled into a running application on the author's system with the commands
flexml $(FLEXDEBUG) -rhtml -p'-//IETF//DTD XHTML 1.0 Transitional//EN' \
  'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'
gcc -Wall -ansi -pedantic -O2 -g -c xhtml-href.c -o xhtml-href.o
flex -Bsv -Ca -oxhtml1-transitional.c xhtml1-transitional.l
gcc -Wall -ansi -pedantic -O2 -g   -c xhtml1-transitional.c -o xhtml1-transitional.o
gcc -Wall -ansi -pedantic   xhtml-href.o xhtml1-transitional.o   -o xhtml-href
generating the XML processor directly from the official DTD on the web (which in fact required a patch to flex to enlarge the possible table size).

How does it work?

FleXML is a perl script [Perl] which reads and interprets a DTD and subsequently produces an XML processor source file for the flex scanner and optionally an XML application with the C source of the element actions from an actions file. The DTD is used to construct the rules used by flex to match the individual XML components in such a way that only valid documents match.

For example, the flex code for scanning an attribute declaration of the line tag is the following:

<AL_line>{
 "type"{Eq}"'normal'"  |
 "type"{Eq}"\"normal\""  A_line_type = A_line_type_normal;
 "type"{Eq}"'question'"  |
 "type"{Eq}"\"question\""  A_line_type = A_line_type_question;
 "type"{Eq}"'punch-line'"  |
 "type"{Eq}"\"punch-line\""  A_line_type = A_line_type_punch_d_line;

 ">" {
  LEAVE; STag_line(); pcdata = BUFFERSET(pcdata); ENTER(IN_line);
 }
 "/>" {
  LEAVE; STag_line(); pcdata = ""; ETag_line();
  switch (YY_START) {
   case ROOT_line: SET(EPILOG); break;
   case S_joke: SET(S_joke_1); break;
   case S_joke_1: case S_joke_2: case S_joke_3: SET(S_joke_3); break;
}}}
(with {Eq} an alias for the regular expression matching an equal sign (corresponding to production `[25] Eq' of the XML specification).

This reads as follows: when the XML processor is reading the attribute list of the line tag, i.e., when it is in the <AL_line> state, a `t' will enter an internal state where a `y' proceeds to another internal state but other characters makes the document invalid (because no rule permits it). Once the equal sign has been scanned, the next characters determine the attribute value, and at the end one of the flex actions is performed, setting the attribute value (A_line_type is the real C for what we wrote as {type}, etc.). The important thing is that one can ensure by careful tuning of the flex rules that a valid document will proceed only by looking each character up in a table and determining the subsequent `state' and `action'. One must avoid pairs of rules such as

"-->"		LEAVE(COMMENT);
.		SKIP;
(a single `.' matches any character) because they mean that the scanner will not be sure after having read a `-' character whether it is part of a comment terminator or `just' a dash. In such cases an extra rule must be inserted because for the set
"-->"		LEAVE(COMMENT);
"--"            |
.		SKIP;
the problem goes away.

After the actual attribute rules, two rules handle termination of the attribute list. There are two cases corresponding to whether we just read a start tag or an empty element. In case it was a start tag then we must enter the `inner' mode of the element called IN_line for the line element. The switch handles the state changes needed for the line element resulting from the fact that the element can appear in different contexts. This is always possible to construct because of the requirement that an XML DTD must be deterministic: we just need an element content stack (this is what the LEAVE and ENTER macros are for).

Why is it useful?

In comparison with the forthcoming XML Style-sheet Language [XSL] our approach is much more primitive for better and worse: only a limited range of applications can be produced with FleXML but those will be very fast.

This is useful for XML applications that are meant to process a large number of documents in a fixed format. One such application is the NTSys u-payment transaction server which is implemented as an Apache module where performance is of premium importance. Using FleXML permits a highly modular development of modules for the various transaction types based on a common DTD and a collection of applications that are generated separately and all linked together with the common processor.

FleXML is still under development: please try it out (either from SourceForge or from the Debian GNU/Linux distribution where FleXML is include from release 2.2. The author would welcome comments as to how the system can best evolve. Two things that are definitely on the agenda is a limited `context condition' language for expressing constraints on the position of an element in a document (corresponding to the Xpath subset of XSL), and an efficient way to combine several DTDs into one large t facilitate general XML servers (that can even dispatch to a generic XML interface in cases where the FleXML restrictions are not respected by a document).

Acknowledgements

I am grateful to NTSys for supporting the development of FleXML. Finally extend my sincere thanks to Jef Poskanzer, Vern Paxson, and the rest of the flex maintainers for a great tool.

References

ASU
Alfred Aho, Ravi Sethi and Jeffrey Ullman: Compilers: Principles, Techniques and Tools, Addison-Wesley (1986).

Flex
Jef Poskanzer, Vern Paxson, et. al.: Flex - fast lexical analyzer generator.

Perl
Larry Wall, Perl - Practical Extraction and Report Language.

XML
Extensible Markup Language (XML) 1.0 (W3C Recommendation REC-xml-1998-0210).

XSL
Extensible Stylesheet Language (XSL) (W3C Working Draft).


Copyright (c) Kristoffer Rose. Last modified: Tue Feb 11 13:56:40 EST 2003
flexml-1.9.6/GPL0000644000175000017500000004312712101762535013700 0ustar mquinsonmquinson GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) 19yy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) 19yy name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Library General Public License instead of this License. flexml-1.9.6/NOTES0000644000175000017500000000461412101762535014144 0ustar mquinsonmquinson# FleXML fast XML scanner framework # Copyright (c) 1999 Kristoffer Rose. All rights reserved. # # Description: Notes for FleXML scanner generator # Author: Kristoffer Rose # Created: August 1999 # License: NTSys proprietary # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # $Id: NOTES,v 1.3 2006/07/18 18:21:13 mquinson Exp $ GUIDELINES FOR WRITING A flex(1) SCANNER FOR SIMPLE XML FORMATS. We know how to handle the following DTD element declarations. 1. "Leaf" elements with the declarations Start conditions: %x AL_tag Rules: "" STag(tag), ETag(tag); "" STag(tag), ENTER(PCDATA); ""/>" LEAVE, Etag(tag); ">" LEAVE, ENTER(PCDATA); "" Etag(tag), LEAVE; Handlers: STag_tag(void) {...} ETag_tag(char* pcdata) {...} 2. "Logical" elements with unordered element contents using the declaration Start conditions: %x AL_tag %x IN_tag Rules: "" STag(tag), ETag(tag); "" STag(tag), ENTER(IN_tag); ""/>" LEAVE, Etag(tag); ">" BEGIN(IN_tag); "" Etag(tag); Handlers: STag_tag(void) {...} ETag_tag(char* pcdata) {...} 3. "Attribute" declarations of the form Rule: "attribute"{Eq}{Q} Attribute(tag,attribute); Handler: Attribute_tag_attribute(char* value) {...} That's all, for the moment. Note: the scanner can be made (more) validating by using the condition in front of all the rules in tag1...tagn and give the top-level rules the start condition INITIAL. flexml-1.9.6/testbed/0000755000175000017500000000000012101762535014756 5ustar mquinsonmquinsonflexml-1.9.6/testbed/mixed-stratt.dtd0000644000175000017500000000022712101762535020101 0ustar mquinsonmquinson flexml-1.9.6/testbed/biparser-two.in0000644000175000017500000000013412101762535017722 0ustar mquinsonmquinson 012456789abcdefghijklmn flexml-1.9.6/testbed/CompareOut.pl0000755000175000017500000000441412101762535017377 0ustar mquinsonmquinson#!/usr/bin/env perl # Assume ARGV is ($basename @REST). Print "passed" or "failed" based on the # following comparisons: # Compare $basename.stderr with $basename.stderr.expected # pass if no difference or # $basename.stderr is empty and $basename.stderr.expected doesn't exist; # Compare $basename.stdout with $basename.stdout.expected # pass if no difference or # $basename.stdout is empty and $basename.stdout.expected doesn't exist; # For each file $foo in @REST # Compare $foo with $foo.expected # pass if no difference; # Print "passed" if all tests pass, otherwise print # "failed " # NOTES # In order to compare directories, GNU diff must be used. There # should probably be a test to ensure this. # $Id: CompareOut.pl,v 1.2 2006/08/21 17:33:13 wdowling Exp $ # $Source: /cvsroot/flexml/flexml/testbed/CompareOut.pl,v $ use strict; use Getopt::Std; my %args; getopts('p:', \%args); my $diff_prog = $args{'p'} || 'diff'; my $diff_opt = ""; if ($ARGV[0] =~ /^-/) { $diff_opt = shift; } my $basename = shift; my $diff_cmd = "$diff_prog $diff_opt"; my $retcode; my $fail_file; my $made_stdout = 0; my $made_stderr = 0; # Compare basename.stderr to basename.stderr.expected if (! -f "$basename.stderr.expected") { system("touch $basename.stderr.expected"); $made_stderr = 1; } $retcode = system("$diff_cmd $basename.stderr $basename.stderr.expected " . "> $basename.stderr.diff 2>/dev/null"); system("rm -f $basename.stderr.expected") if $made_stderr; $fail_file = "$basename.stderr" if $retcode; # Compare basename.stdout to basename.stdout.expected if ($retcode == 0) { if (! -f "$basename.stdout.expected") { system("touch $basename.stdout.expected"); $made_stdout = 1; } $retcode = system("$diff_cmd $basename.stdout $basename.stdout.expected " . "> $basename.stdout.diff 2>/dev/null"); system("rm -f $basename.stdout.expected") if $made_stdout; $fail_file = "$basename.stdout" if $retcode; } while (($retcode == 0) && @ARGV) { my $file = shift; # compare file with file.expected $retcode = system("$diff_cmd $file $file.expected >$file.diff 2>/dev/null"); $fail_file = $file if $retcode; } print ($retcode ? "failed $fail_file\n" : "passed $basename\n"); flexml-1.9.6/testbed/Makefile0000644000175000017500000001476512101762535016433 0ustar mquinsonmquinson# $Source: /cvsroot/flexml/flexml/testbed/Makefile,v $ # regression testbed Makefile ##################################################### # Fixed definitions -- do not modify for new tests ##################################################### include ../Makefile.defs MAKEUTILS_DIR = . # If UNIT_TESTS is test_foo # and test_foo_out is bar baz # Then the created files (that need to be deleted by 'make clean' # will be # test_foo foo.std{err,out} foo.std{err,out}.diff bar baz bar.diff baz.diff TEST_INTERMEDIATES = \ $(UNIT_TESTS) \ $(UNIT_TESTS:test_%=%.stdout) $(UNIT_TESTS:test_%=%.stdout.diff) \ $(UNIT_TESTS:test_%=.test_%.stdout.expected) \ $(UNIT_TESTS:test_%=%.stderr) $(UNIT_TESTS:test_%=%.stderr.diff) \ $(UNIT_TESTS:test_%=.test_%.stderr.expected) \ $(foreach var,$(UNIT_TESTS:%=%_out),$(foreach o,$($(var)),$o $(o).diff)) # Compares test.out.std[out,err] with expected values COMPARE_OUT = $(MAKEUTILS_DIR)/CompareOut.pl #CC = /usr/bin/gcc-3.3 -Wall #CFLAGS = -O2 -g #FLEXDEBUG = -d FLEXML = ../flexml -s ../skel -T../flexml-act .PHONY : test .PHONY : clean ##################################################### # test definitions -- add new tests here ##################################################### EXES = \ biparser \ init_header \ missing-att \ multiple-att \ mixed-enumatt \ mixed-enumatt2 \ mixed-stratt \ mixed-stratt-def \ mixed1 \ multi-parser-run INTERMEDIATES = $(EXES) $(EXES:%=%.c) $(EXES:%=%.l) # leave this alphabetically sorted for easy scanning UNIT_TESTS = \ test_biparser \ test_init_header \ test_missing-att \ test_multiple-att \ test_mixed-enumatt \ test_mixed-enumatt2 \ test_mixed-stratt \ test_mixed-stratt-def \ test_mixed1 \ test_multi-parser-run test : $(UNIT_TESTS) # Test init_header test_init_header_cmd = ./init_header < init_header.in test_init_header_deps = init_header init_header.in # Test missing REQUIRED attribute test_missing-att_cmd = ./missing-att < missing-att.in test_missing-att_deps = missing-att missing-att.in # Test multiply defined attribute test_multiple-att_cmd = ./multiple-att < multiple-att.in test_multiple-att_deps = multiple-att multiple-att.in # Test mixed1 test_mixed1_cmd = ./mixed1 < mixed1.in test_mixed1_deps = mixed1 mixed1.in # Test mixed-enumatt (mixed content + enumerated attribute) test_mixed-enumatt_cmd = ./mixed-enumatt < mixed-enumatt.in test_mixed-enumatt_deps = mixed-enumatt mixed-enumatt.in # Test mixed-enumatt2 (mixed content + enumerated attribute, eval'ed in end tag) test_mixed-enumatt2_cmd = ./mixed-enumatt2 < mixed-enumatt2.in test_mixed-enumatt2_deps = mixed-enumatt2 mixed-enumatt2.in # Test mixed-stratt (mixed content + string attribute) test_mixed-stratt_cmd = ./mixed-stratt < mixed-stratt.in test_mixed-stratt_deps = mixed-stratt mixed-stratt.in # Test mixed-stratt (mixed content + default string attribute) test_mixed-stratt-def_cmd = ./mixed-stratt-def < mixed-stratt-def.in test_mixed-stratt-def_deps = mixed-stratt-def mixed-stratt-def.in # Test multi-parser-run (Run the parser several times on "different" files) test_multi-parser-run_cmd = ./multi-parser-run multi-parser-run.in test_multi-parser-run_deps = multi-parser-run multi-parser-run.in # Test biparser (Have two parsers in the same code) test_biparser_cmd = ./biparser biparser-one.in biparser-two.in test_biparser_deps = biparser biparser-one.in biparser-two.in biparser: biparser-one.dtd biparser-two.dtd biparser.precious.c for n in one two ; do \ $(FLEXML) -P$$n -H biparser-$$n.h -L biparser-$$n.dtd; \ $(FLEXML) -P$$n -S biparser-$$n.l -L biparser-$$n.dtd; \ $(FLEX) -s -L -P$${n}_ -obiparser-$$n.c biparser-$$n.l; \ done $(CC) $(CFLAGS) -o biparser \ biparser-one.c biparser-two.c \ biparser.precious.c CLEANFILES+= biparser-one.h biparser-one.l biparser-one.c \ biparser-two.h biparser-two.l biparser-two.c init_header : init_header.act init_header.dtd init_header.h $(FLEXML) -b 1000 -A -i$@.h -a$@.act $@.dtd $(FLEX) -s -L -o$@.c $@.l $(CC) $(CFLAGS) -o $@ $@.c % : %.dtd %.act $(FLEXML) -b 1000 -A -a$@.act $@.dtd $(FLEX) -s -L -o$@.c $@.l $(CC) $(CFLAGS) -o $@ $@.c ############################################################## # test build rules -- add override build rules here if needed ############################################################## #mixed1: mixed1.dtd mixed1.act # $(FLEXML) -b 500 -A -amixed1.act mixed1.dtd # $(FLEX) -s -L -o mixed1.c mixed1.l > mixed1.c # $(CC) $(CFLAGS) -o $@ mixed1.c ##################################################### # test infrastructure -- do not modify for new tests ##################################################### clean : rm -f $(TEST_INTERMEDIATES) $(INTERMEDIATES) $(CLEANFILES) # When we are building test_foo, the variable $(TEST_BASE) is "foo" test_% : TEST_BASE = $(@:test_%=%) ifndef distribute_deps # This will take a target and dependency list, and evaluate each dependency # individually. This only needs to be used on dependency lists that are to # be in an $(eval ...) function. This is a work-around for a bug in GNU Make # 3.80, which fails when $(eval)'ing dependency lists longer than about 160 # characters. # 1. target # 2. dependency list define distribute_deps $(foreach dep,$(2),$(eval $(1) : $(dep))) endef endif define unit_test_deps_t $(1) : .$(1).stdout.expected $(1) : .$(1).stderr.expected $$(call distribute_deps,$(1),$($(1)_deps)) endef $(foreach test,$(UNIT_TESTS),$(eval $(call unit_test_deps_t,$(test)))) # To build .test_foo.std{err,out}.expected -- # If foo.std{err,out}.expected exists, symlink to it # Else, .test_foo.std{err,out}.expected is a new empty file # These rules are invoked from rules in the Extradeps section. # They must be two separate rules (else 'make test' not idempotent # -- see bug #105) .test_%.stderr.expected: @if [ -f $(@:.test_%=%) ]; \ then $(SYMLINK) $(@:.test_%=%) $@; \ else touch $@; \ fi .test_%.stdout.expected: @if [ -f $(@:.test_%=%) ]; \ then $(SYMLINK) $(@:.test_%=%) $@; \ else touch $@; \ fi # Unit test rule. Expects: # UNIT_TESTS = test_foo test_bar (test_ prefix is necessary) # test_foo_cmd = command to run # test_foo_deps = dependencies (i.e. inputs to the test, and the executable) # test_foo_out = files and directories created by running the command # with corresponding .expected files for comparison $(UNIT_TESTS) : @$(test_$(@:test_%=%)_cmd) > $(@:test_%=%).stdout \ 2>$(@:test_%=%).stderr || true @$(PERL) $(COMPARE_OUT) -p '$($(@)_diff_prog)' -- \ $($(@)_diff_opts) $(@:test_%=%) $($(@)_out) | tee $@ flexml-1.9.6/testbed/mixed-enumatt2.in0000644000175000017500000000043112101762535020147 0ustar mquinsonmquinson foo start data reqatt data defatt data defatt data optatt data optatt data foo end data. flexml-1.9.6/testbed/mixed-stratt.act0000644000175000017500000000076112101762535020100 0ustar mquinsonmquinson ]]>
flexml-1.9.6/testbed/init_header.in0000644000175000017500000000012612101762535017560 0ustar mquinsonmquinson 012456789abcdefghijklmn flexml-1.9.6/testbed/biparser.precious.c0000644000175000017500000000414612101762535020566 0ustar mquinsonmquinson#include #include "biparser-one.h" #include "biparser-two.h" /* XML application entry points. Should be generated from two action files, but I wasn't succesfull in that area yet [Mt] */ void STag_one_foo(void) {} void ETag_one_foo(void) {printf("foo pcdata: %s\n", one_pcdata);} void STag_one_bar(void) {} void ETag_one_bar(void) {printf("bar pcdata: %s\n", one_pcdata);} void STag_two_toto(void) {} void ETag_two_toto(void) {printf("toto pcdata: %s\n", two_pcdata);} void STag_two_tutu(void) {} void ETag_two_tutu(void) {printf("tutu pcdata: %s\n", two_pcdata);} /* Parsers control. Should probably be added to the .h */ #ifndef YY_TYPEDEF_YY_BUFFER_STATE #define YY_TYPEDEF_YY_BUFFER_STATE typedef struct yy_buffer_state *YY_BUFFER_STATE; #endif extern int one_lex (void); extern void one_restart (FILE *input_file ); extern void one__switch_to_buffer (YY_BUFFER_STATE new_buffer ); YY_BUFFER_STATE one__create_buffer (FILE *file,int size ); extern void one__delete_buffer (YY_BUFFER_STATE b ); extern void one__flush_buffer (YY_BUFFER_STATE b ); extern void one_push_buffer_state (YY_BUFFER_STATE new_buffer ); extern void one_pop_buffer_state (void ); extern int two_lex (void); extern void two_restart (FILE *input_file ); extern void two__switch_to_buffer (YY_BUFFER_STATE new_buffer ); extern YY_BUFFER_STATE two__create_buffer (FILE *file,int size ); extern void two__delete_buffer (YY_BUFFER_STATE b ); extern void two__flush_buffer (YY_BUFFER_STATE b ); extern void two_push_buffer_state (YY_BUFFER_STATE new_buffer ); extern void two_pop_buffer_state (void ); int main(int argc, char **argv) { FILE *infile; YY_BUFFER_STATE buff; int retval; printf("Parse biparser-one.in\n"); infile=fopen("biparser-one.in","r"); buff=one__create_buffer(infile,10); one__switch_to_buffer(buff); retval = one_lex(); one__delete_buffer(buff); fclose(infile); printf("Parse biparser-two.in\n"); infile=fopen("biparser-two.in","r"); buff=two__create_buffer(infile,10); two__switch_to_buffer(buff); retval = two_lex() || retval; two__delete_buffer(buff); fclose(infile); return retval; } flexml-1.9.6/testbed/mixed1.act0000644000175000017500000000064612101762535016644 0ustar mquinsonmquinson ]]>
flexml-1.9.6/testbed/multiple-att.in0000644000175000017500000000015112101762535017724 0ustar mquinsonmquinson 012456789abcdefghijklmn flexml-1.9.6/testbed/multiple-att.stdout.expected0000644000175000017500000000001312101762535022435 0ustar mquinsonmquinsonretval = 1 flexml-1.9.6/testbed/init_header.dtd0000644000175000017500000000014512101762535017726 0ustar mquinsonmquinson flexml-1.9.6/testbed/missing-att.dtd0000644000175000017500000000022712101762535017713 0ustar mquinsonmquinson flexml-1.9.6/testbed/mixed-enumatt.dtd0000644000175000017500000000041512101762535020234 0ustar mquinsonmquinson flexml-1.9.6/testbed/multi-parser-run.stdout.expected0000644000175000017500000000036012101762535023247 0ustar mquinsonmquinsonRun 0 foo attributes name='toto' toz='0.0' gaz='0.0' bar attributes name='tutu' Run 1 foo attributes name='toto' toz='0.0' gaz='0.0' bar attributes name='tutu' Run 2 foo attributes name='toto' toz='0.0' gaz='0.0' bar attributes name='tutu' flexml-1.9.6/testbed/mixed-stratt-def.act0000644000175000017500000000104612101762535020631 0ustar mquinsonmquinson ]]>
flexml-1.9.6/testbed/biparser-two.dtd0000644000175000017500000000007312101762535020071 0ustar mquinsonmquinson flexml-1.9.6/testbed/missing-att.stdout.expected0000644000175000017500000000001312101762535022253 0ustar mquinsonmquinsonretval = 1 flexml-1.9.6/testbed/mixed-stratt.in0000644000175000017500000000014412101762535017732 0ustar mquinsonmquinson 012456789abcdefghijklmn flexml-1.9.6/testbed/multiple-att.act0000644000175000017500000000076112101762535020074 0ustar mquinsonmquinson ]]>
flexml-1.9.6/testbed/init_header.h0000644000175000017500000000031012101762535017374 0ustar mquinsonmquinson/* header file to be included at end of generated %{ ... %} section */ #define YY_DECL static int static_lexer_param; int lexer(int lexer_param) #define YY_USER_INIT static_lexer_param = lexer_param; flexml-1.9.6/testbed/mixed-enumatt2.stdout.expected0000644000175000017500000000047312101762535022671 0ustar mquinsonmquinsonreqatt pcdata: reqatt data reqatt att: (val1) defatt pcdata: defatt data defatt att: (val1) defatt pcdata: defatt data defatt att: (val2) optatt pcdata: optatt data optatt att: (val1) optatt pcdata: optatt data optatt att: (no att given) foo pcdata: foo start data foo end data. retval = 0 flexml-1.9.6/testbed/mixed-enumatt2.dtd0000644000175000017500000000041512101762535020316 0ustar mquinsonmquinson flexml-1.9.6/testbed/multiple-att.dtd0000644000175000017500000000022712101762535020075 0ustar mquinsonmquinson flexml-1.9.6/testbed/biparser.stdout.expected0000644000175000017500000000022012101762535021623 0ustar mquinsonmquinsonParse biparser-one.in bar pcdata: 456789ab foo pcdata: 012cdefghijklmn Parse biparser-two.in tutu pcdata: 456789ab toto pcdata: 012cdefghijklmn flexml-1.9.6/testbed/biparser-one.dtd0000644000175000017500000000006712101762535020044 0ustar mquinsonmquinson flexml-1.9.6/testbed/mixed1.in0000644000175000017500000000012112101762535016467 0ustar mquinsonmquinson 012456789abcdefghijklmn flexml-1.9.6/testbed/biparser-one.in0000644000175000017500000000012712101762535017674 0ustar mquinsonmquinson 012456789abcdefghijklmn flexml-1.9.6/testbed/mixed-stratt-def.in0000644000175000017500000000033312101762535020466 0ustar mquinsonmquinson 012 bar data 1 bar data 2 bar data 3 cdefghijklmn flexml-1.9.6/testbed/multi-parser-run.act0000644000175000017500000000126212101762535020676 0ustar mquinsonmquinson ]]>
flexml-1.9.6/testbed/mixed-enumatt.in0000644000175000017500000000043012101762535020064 0ustar mquinsonmquinson foo start data reqatt data defatt data defatt data optatt data optatt data foo end data. flexml-1.9.6/testbed/mixed1.dtd0000644000175000017500000000014512101762535016642 0ustar mquinsonmquinson flexml-1.9.6/testbed/multiple-att.stderr.expected0000644000175000017500000000010712101762535022422 0ustar mquinsonmquinsonInvalid XML (state 13): Multiple definition of attribute batt in flexml-1.9.6/testbed/mixed-stratt.stdout.expected0000644000175000017500000000011312101762535022442 0ustar mquinsonmquinsonbar att: batt1 bar pcdata: 456789ab foo pcdata: 012cdefghijklmn retval = 0 flexml-1.9.6/testbed/mixed-enumatt2.act0000644000175000017500000000210412101762535020307 0ustar mquinsonmquinson ]]>
flexml-1.9.6/testbed/mixed-stratt-def.dtd0000644000175000017500000000040712101762535020635 0ustar mquinsonmquinson flexml-1.9.6/testbed/init_header.stdout.expected0000644000175000017500000000011012101762535022265 0ustar mquinsonmquinsonbar pcdata: 456789ab foo pcdata: 012cdefghijklmn param was 7 retval = 0 flexml-1.9.6/testbed/multi-parser-run.in0000644000175000017500000000013012101762535020526 0ustar mquinsonmquinson flexml-1.9.6/testbed/mixed-stratt-def.stdout.expected0000644000175000017500000000060612101762535023205 0ustar mquinsonmquinsonbar batt: (default value) batt2: (second default value) batt3: (third default value) bar pcdata: bar data 1 bar batt: (default value) batt2: ( non-default 2) batt3: (third default value) bar pcdata: bar data 2 bar batt: (default value) batt2: (second default value) batt3: (non-default with single-quote(')) bar pcdata: bar data 3 foo pcdata: 012 cdefghijklmn retval = 0 flexml-1.9.6/testbed/multi-parser-run.dtd0000644000175000017500000000026212101762535020701 0ustar mquinsonmquinson flexml-1.9.6/testbed/init_header.act0000644000175000017500000000071112101762535017721 0ustar mquinsonmquinson ]]>
flexml-1.9.6/testbed/mixed-enumatt.act0000644000175000017500000000230212101762535020225 0ustar mquinsonmquinson ]]>
flexml-1.9.6/testbed/mixed1.stdout.expected0000644000175000017500000000007412101762535021212 0ustar mquinsonmquinsonbar pcdata: 456789ab foo pcdata: 012cdefghijklmn retval = 0 flexml-1.9.6/testbed/missing-att.in0000644000175000017500000000012612101762535017544 0ustar mquinsonmquinson 012456789abcdefghijklmn flexml-1.9.6/testbed/missing-att.stderr.expected0000644000175000017500000000011512101762535022237 0ustar mquinsonmquinsonInvalid XML (state 13): Required attribute `batt' not set for `bar' element. flexml-1.9.6/testbed/missing-att.act0000644000175000017500000000076112101762535017712 0ustar mquinsonmquinson ]]>
flexml-1.9.6/testbed/mixed-enumatt.stdout.expected0000644000175000017500000000047312101762535022607 0ustar mquinsonmquinsonreqatt att: (val1) reqatt pcdata: reqatt data defatt att: (val1) defatt pcdata: defatt data defatt att: (val2) defatt pcdata: defatt data optatt att: (val1) optatt pcdata: optatt data optatt att: (no att given) optatt pcdata: optatt data foo pcdata: foo start data foo end data. retval = 0 flexml-1.9.6/flexml-act.dtd0000644000175000017500000000243412101762535016060 0ustar mquinsonmquinson flexml-1.9.6/FleXML.html0000644000175000017500000002111712101762535015303 0ustar mquinsonmquinson FleXML - An XML Processor Generator

SourceForge Logo
Visit our Sponsor


Project Summary
Forums
Tracker
Bugs
Support
Patches
Lists
Tasks
Docs
News
CVS
Files



FleXML - XML Processor Generator

See also the manual page and a short white paper. Or peek into the master source archive.

FleXML reads a DTD (Document Type Definition) describing the format of XML (Extensible Markup Language) documents; it may be specified as a URI to the DTD on the web. From this FleXML produces a validating XML processor with an interface to support XML applications. Proper applications can be generated optionally from special action files, either for linking or textual combination with the processor.

FleXML is specifically developed for XML applications where a fixed data format suffices in the sense that a single DTD is used without individual extensions for a large number of documents. (Specifically it restricts XML rule [28] to

  [28r] doctypedecl ::= '<!DOCTYPE' S Name S ExternalID S? '>'

where the ExternalId denotes the used DTD - one might say, in fact, that FleXML implements ``non-extensible'' markup. :)

With this restriction we can do much better because the possible tags and attributes are static: FleXML-generated XML processors read the XML document character by character and can immediately dispatch the actions associated with each element (or reject the document as invalid). Technically this is done by using the Flex scanner generator to produce a deterministic finite automaton with an element context stack for the DTD, which means that there is almost no overhead for XML processing.

Furthermore we have devised a simple extension of the C programming language that facilitates the writing of `element actions' in C, making it easy to write really fast XML applications. In particular we represent XML attribute values efficiently in C when this is possible, thus avoiding the otherwise ubiquitous conversions between strings and data values.

Compared to SAX and its XSL-based friends, FleXML immediately produces efficient code in that the interdiction of extension makes it possible to encode efficiently, FleXML for example uses native C `enum' types to implement enumeration attribute types. However, the above limitation does prevent uses in more complex settings.

As an example: the following is all that is needed to produce a fast program that prints all href-attributes in <a...> tags in XHTML documents (and rejects invalid XHTML documents).

  <!DOCTYPE actions SYSTEM "flexml-act.dtd">
  <actions>
  <top><![CDATA[           #include <stdio.h>                  ]]></top>
  <start tag='a'><![CDATA[ if ({href}) printf("%s\n", {href}); ]]></start>
  </actions>

In general, action files are themselves XML documents conforming to the DTD

   <!ELEMENT actions ((top|start|end)*,main?)>
   <!ENTITY % C-code "(#PCDATA)">
   <!ELEMENT top   %C-code;>
   <!ELEMENT start %C-code;>  <!ATTLIST start tag NMTOKEN #REQUIRED>
   <!ELEMENT end   %C-code;>  <!ATTLIST end   tag NMTOKEN #REQUIRED>
   <!ELEMENT main  %C-code;>

with %C-code; segments being in C enriched as described below. The elements are used as follows:

top

Use for top-level C code such as global declarations, utility functions, etc.

start

Attaches the code as an action to the element with the name of the required ``tag'' attribute. The ``%C-code;'' component should be C code suitable for inclusion in a C block (i.e., within {...} so it may contain local variables); furthermore the following extensions are available: {attribute} Can be used to access the value of the attribute as set with attribute=value in the start tag. In C, {attribute} will be interpreted depending on the declaration of the attribute. If the attribute is declared as an enumerated type like

  <!ATTLIST attrib (alt1 | alt2 |...) ...>

then the C attribute value is of an enumerated type with the elements written {attrib=alt1}, {attrib=alt2}, etc.; furthermore an unset attribute has the ``value'' {!attrib}. If the attribute is not an enumeration then {attrib} is a null-terminated C string (of type char*) and {!attrib} is NULL.

end

Similarly attaches the code as an action to the end tag with the name of the required ``tag'' attribute; also here the ``%C-code;'' component should be C code suitable for inclusion in a C block. In case the element has ``Mixed'' contents, i.e, was declared to permit #PCDATA, then the special variable {#PCDATA} contains the text (#PCDATA) of the element as a null-terminated C string (of type char*). In case the Mixed contents element actually mixed text and child elements then {#PCDATA} contains the plain concatenation of the text fragments as one string.

main

Finally, an optional ``main'' element can contain the C main function of the XML application. Normally the main function should include (at least) one call of the XML processor yylex.

The program is freely redistributable and modifiable (under GNU `copyleft').


Copyright (C) Kristoffer Rose. Last modified: Tue Feb 11 18:06:44 EST 2003

$Id: FleXML.html,v 1.5 2005/04/06 10:05:15 mquinson Exp $

flexml-1.9.6/ChangeLog0000644000175000017500000000311012101762535015071 0ustar mquinsonmquinson2013-01-29 Martin Quinson * Don't install any file under examples/flexml/flexml/* * Release v1.9.6 with these fixes. 2013-01-29 Martin Quinson * "flexml --version" now reports the package version, not the file version in CVS. Users often don't care about the internal rcs numbering schema. * Update the Copyright notices: we are in 2013 already. * Release v1.9.5 with these cosmetics. 2012-12-20 Martin Quinson * New patch from Arnaud Giersch, this time to ensure that underscores in XML names don't mess the C identifiers. * Release v1.9.4 with this fix. 2012-07-05 Martin Quinson * Integrate another patch from Arnaud Giersch that avoids undefined behaviors breaking clang's optimizations * Release v1.9.3 with this fix. 2011-11-04 Martin Quinson * Integrate a patch from Arnaud Giersch to ensure that the variable defined to check multiply defined attributes are correctly added to the header. Thanks Arnaud. * Release v1.9.2 with this fix. 2011-10-28 Martin Quinson * Ensure that the generated parsers are robust to multiply defined attributes 2006-09-29 William F. Dowling * Makefiles use INSTALL and MAKE variables, that play better with automatic build systems. 2006-09-22 William F. Dowling * Modified skel by adding '#include ' to fix sourceforge bug #1563488. * This change log starts with flexml rev 1.7. flexml-1.9.6/skel0000644000175000017500000002440012101762535014205 0ustar mquinsonmquinson/* Flex(1) XML processor skeleton scanner (in -*-C-*-). * Copyright (C) 1999 Kristoffer Rose. All rights reserved. * * This file is part of the FleXML XML processor generator system. * Copyright (C) 1999 Kristoffer Rose. All rights reserved. * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free * Software Foundation; either version 2 of the License, or (at your option) * any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., 59 * Temple Place, Suite 330, Boston, MA 02111-1307 USA. * * Note: Files generated by the FleXML system have fewer restrictions on them * as explained in the header of each generated file. */ %{ /* Version strings. */ FLEXML_VERSION /* ANSI headers. */ #include /* for realloc() -- needed here when using flex 2.5.4 */ #include #include #include #include #include #if defined(_WIN32) || defined(__WIN32__) || defined(WIN32) || defined(__TOS_WIN__) # ifndef __STRICT_ANSI__ # include # include # endif #else # include #endif #ifndef FLEXML_INDEXSTACKSIZE #define FLEXML_INDEXSTACKSIZE 1000 #endif /* Generated definitions. */ FLEXML_DEFINITIONS /* XML state. */ #ifdef FLEX_DEBUG # define ENTER(state) debug_enter(state,#state) # define LEAVE debug_leave() # define SET(state) debug_set(state,#state) static void debug_enter(int, const char*); static void debug_leave(void); static void debug_set(int, const char*); #else # define ENTER(state) (yy_push_state(state)) # define LEAVE (yy_pop_state()) # define SET(state) BEGIN(state) #endif /* Generic actions. */ #define SKIP /*skip*/ #define SUCCEED CLEANUP; return 0 #define FAIL return fail static int fail(const char*, ...); enum {flexml_max_err_msg_size = 512}; static char flexml_err_msg[flexml_max_err_msg_size]; const char * parse_err_msg() { return flexml_err_msg; } static void reset_parse_err_msg() { flexml_err_msg[0] = '\0'; } /* Cleanup */ static void cleanup(void); #define CLEANUP cleanup() /* Text buffer stack handling. */ char *bufferstack = NULL; static int blimit = FLEXML_BUFFERSTACKSIZE; static int bnext = 1; static int *indexstack = NULL; static int ilimit = FLEXML_INDEXSTACKSIZE; static int inext = 1; #define BUFFERSET(P) (P = bnext) #define BUFFERPUTC(C) (ck_blimit(), bufferstack[bnext++] = (C)) #define BUFFERDONE (BUFFERPUTC('\0')) #define BUFFERLITERAL(C, P) bufferliteral(C, &(P), yytext) /* after this is called, there are at least 2 slots left in the stack */ static int ck_blimit() { if (bnext >= blimit) { blimit += FLEXML_BUFFERSTACKSIZE + 2; { char *temp = (char *) realloc(bufferstack, blimit); assert(temp); bufferstack = temp; } } return 0; } /* after this is called, there are at least 2 slots left in the stack */ static int ck_ilimit() { if (inext >= ilimit) { ilimit += FLEXML_INDEXSTACKSIZE + 2; { int *temp = (int *) realloc(indexstack, ilimit); assert(temp); indexstack = temp; } } return 0; } #ifdef FLEXML_NEED_BUFFERLIT static void bufferliteral(char c, int* pp, const char* text) { BUFFERSET(*pp); if (c) { const char *s = strchr(text, c), *e = strrchr(text, c); assert(s && e && s <= e); ++s; while (s < e) { if (isspace(*s)) { BUFFERPUTC(' '); do ++s; while (s < e && isspace(*s)); } else BUFFERPUTC(*s++); } } else { const char *s = text; while (*s) BUFFERPUTC(*s++); } BUFFERDONE; } #endif static void pushbuffer(int p) { ck_ilimit(); indexstack[inext++] = p; indexstack[inext++] = bnext; } static int popbuffer(void) { assert(inext >= 2); bnext = indexstack[--inext]; return indexstack[--inext]; } /* General internal entities are `unput' back onto the input stream... */ #define ENTITYTEXT(T) \ { char *s = (T), *e = s+strlen(s);\ while (--e >= s) { unput(*e); }} FLEXML_INCLUDE_INIT_HEADER %} /* Flex standard options. */ %option stack %option noyy_top_state %option noinput %option noreject %option noyymore %option noyywrap /* Flex user-requested options. */ FLEXML_FLEX_OPTIONS /* XML character classes (currently restricted to ASCII). */ /* "Common syntactic structures." */ S [ \t\n\r\f]+ s [ \t\n\r\f]* /* "Names and Tokens." */ NameChar [A-Za-z0-9.:_-] Name [A-Za-z_:]{NameChar}* Names {Name}({S}{Name})* Nmtoken ({NameChar})+ Nmtokens {Nmtoken}({S}{Nmtoken})* /* Miscellaneous. */ VersionNum [a-zA-Z0-9_.:-]+ Eq {s}"="{s} Literal \'[^'']*\'|\"[^""]*\" /* Parser states (flex `exclusive start conditions'): * * PROLOG the XML prolog of the document before * DOCTYPE the XML prolog of the document after * EPILOG after the root element * INCOMMENT inside an XML comment * INPI inside an XML PI * VALUE1 inside a '...'-delimited literal * VALUE2 inside a "..."-delimited literal * CDATA inside a section. * ROOT_ expect root element * AL_ inside the attribute list for * IN_ inside a with element contents (ready for end tag) * IMPOSSIBLE dummy to permit disabling rules; must be last */ %x PROLOG DOCTYPE EPILOG INCOMMENT INPI VALUE1 VALUE2 CDATA FLEXML_START_CONDITIONS %x IMPOSSIBLE FLEXML_EXTRA_DEFINITIONS %% /* Bypass Flex's default INITIAL state and begin by parsing the XML prolog. */ SET(PROLOG); reset_parse_err_msg(); bufferstack = (char *) malloc(FLEXML_BUFFERSTACKSIZE); assert(bufferstack); #ifdef FLEX_DEBUG { int i; for (i = 0; i < blimit; i++) { bufferstack[i] = '\377'; } } #endif bufferstack[0] = '\0'; indexstack = (int *) malloc(FLEXML_INDEXSTACKSIZE * sizeof(int)); assert(indexstack); indexstack[0] = 0; FLEXML_EXTRA_DEFINITIONS_INIT /* COMMENTS and PIs: handled uniformly for efficiency. */ { "" LEAVE; "--" | . | \n SKIP; <> FAIL("EOF in comment."); } { "?>" LEAVE; . | \n SKIP; <> FAIL("EOF in PI (processing instruction)."); } /* SPACES: skipped uniformly */ {S} SKIP; /* PROLOG: determine root element and process it. */ { "" SET(DOCTYPE); "]*">" FAIL("Bad declaration %s.",yytext); } { FLEXML_DOCTYPES "-][^>]*">" FAIL("Bad declaration %s.",yytext); . FAIL("Unexpected character `%c' in prolog.", yytext[0]); <> FAIL("EOF in prolog."); } /* RULES DERIVED FROM DTD. */ FLEXML_RULES /* EPILOG: after the root element. */ { . {SET(PROLOG); yyless(0); CLEANUP; return -1;} <> SUCCEED; } /* CHARACTER DATA. */ { FLEXML_ENTITIES /* Character entities. */ "&#"[[:digit:]]+";" BUFFERPUTC((unsigned char)atoi(yytext+2)); "&#x"[[:xdigit:]]+";" BUFFERPUTC((unsigned char)strtol(yytext+3,NULL,16)); } { "\n" | "\r" | "\r\n" | "\n\r" BUFFERPUTC('\n'); } { "" FAIL("Unexpected `]""]>' in character data."); } { \' BUFFERDONE; LEAVE; <> FAIL("EOF in literal (\"'\" expected)."); } { \" BUFFERDONE; LEAVE; <> FAIL("EOF in literal (`\"' expected)."); } { [^<&] BUFFERPUTC(yytext[0]); [<&] FAIL("Spurious `%c' in character data.",yytext[0]); } { "]""]>" LEAVE; /* "]""]" BUFFERPUTC(yytext[0]); BUFFERPUTC(yytext[1]); */ . BUFFERPUTC(yytext[0]); <> FAIL("EOF in CDATA section."); } /* Impossible rules to avoid warnings from flex(1). */ /* Ideally, this should be replaced by code in flexml.pl that generates just the states not covered by other rules. */ <*>{ .|[\n] FAIL("Syntax error on character `%c'.", yytext[0]); } %% /* Element context stack lookup. */ int element_context(int i) { return (0.\n" if $verbose; my $ua = new LWP::UserAgent; # Create a user agent object $ua->agent("FleXML/1 " . $ua->agent); my $req = new HTTP::Request GET => $url; # Create a request my $res = $ua->request($req); # Pass request to the user agent and get a response if ($res->is_error) { die place()."URL <$url> could not be fetched.\n"; } # Make the data available... push @inputs, [ split /\r?\n/, $res->content ]; if (@{$inputs[$#inputs]}) { push @inputnames, "$url"; push @inputlinenos, 0; } else { pop @inputs; # oops, empty. } # print STDOUT "Testing:\n"; # my $i = 0; # for my $ref (@inputs) { # print STDOUT " inputs[" . $i++ . "] =\n"; # for my $line (@{$ref}) { # print " | $line\n"; # } # } # $i = 0; # for my $name (@inputnames) { # print STDOUT " inputnames[" . $i++ . "] = `$name'\n"; # } # $i = 0; # for my $no (@inputlinenos) { # print STDOUT " inputlinenos[" . $i++ . "] = `$no'\n"; # } } # add attribute to list of all default attributes my @default_attributes; my $next_att_loc = 1; sub add_def_att { my ($att) = @_; my $retval = $next_att_loc; push @default_attributes, ($att); $next_att_loc += length($att) + 1; return $retval; } sub nextline { # return one input line return undef unless @inputs; my $line = shift @{$inputs[$#inputs]}; $inputlinenos[$#inputs]++; while (@inputs and not @{$inputs[$#inputs]}) { # discard exhausted inputs pop @inputs; pop @inputnames; pop @inputlinenos; } return $line; } sub place { if (@inputs) { local $_ = "\"$inputnames[$#inputs]\", line $inputlinenos[$#inputs]: "; s/"file:/"/; return $_; } else { return ""; } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # extractcp($str) - Split argument in one cp[48] (content particles) and the rest. # # returns ($cp,$rest) # sub extractcp { # $_ stores the remainder of the string we're looking at local ($_) = @_; # shack - I noticed that the 'match one Name' pattern below # does not accept leading spaces, but the ( pattern does # This is a little warning that will enlighten me if this occurs m/^\s+/ and do { cluck("extractcp matching leading spaces! '$_'"); }; if ( m/^($Name[+?*]?)\s*/o ) { # match one Name return ($1,$'); #' } if ( m/^\s*\(/o ) { # match the start of a choice or seq # build up one CP my matching parens my $cp = '('; $_ = $'; #' my $level = 1; # match nested parenthesis while ($level > 0 and $_) { if ( m/^\s*\(\s*/o ) { # open paren $level++; $cp .= '('; $_ = $'; #' } elsif ( m/^\s*(\)[+?*]?)\s*/o ) { # close paren $level--; $cp .= $1; $_ = $'; #' } elsif ( m/^\s+/o ) { # skip white space $_ = $'; #' } else { # everything else m/[^()\s]+/o; $cp .= $&; $_ = $'; #' } } return ($cp,$_); } confess ("should not get here!"); } my $statecounter; sub analysechildren { # Analyse DTD children specification; return # true if it may be empty. Uses global $statecounter. my ($tag,$re,$in,$out) = @_; $allstates{$in} = 1; $allstates{$out} = 1; print "analysechildren [ $tag, $re, $in, $out ] \n" if $debug; local $_ = $re; if ( m/^\s*($Name)\s*$/o ) { # tag my $thetag = $1; my %ins = (); if (exists $instates{$thetag}) { for (split /,/,$instates{$thetag}) { $ins{$_} = 'true'; } } $ins{$in} = 'true'; $instates{$thetag} = join(',',keys %ins); $exittrans{$thetag} .= ($exittrans{$thetag}?',':'') . "$in=>$out" if $in ne $out; return undef; } elsif ( m/^((.|\n)+)\?\s*$/o ) { # re ? $emptytrans{$in} .= ($emptytrans{$in}?',':'') . $out unless $in eq $out; analysechildren($tag,$1,$in,$out); return 'true'; } elsif ( m/^((.|\n)+)\+\s*$/o ) { # re + my $re = $1; my $s1 = "S_$ctag{$tag}_" . (++$statecounter); my $s2 = "S_$ctag{$tag}_" . (++$statecounter); $states{$tag} .= ",$s1,$s2"; $emptytrans{$in} .= ($emptytrans{$in}?',':'') . $s1; $emptytrans{$s2} = "$s1,$out"; return analysechildren($tag,$re,$s1,$s2); } elsif ( m/^((.|\n)+)\*\s*$/o ) { # re * return analysechildren($tag,"$1+?",$in,$out); } elsif ( m/^\s*\(\s*((.|\n)+)\s*\)\s*$/xo ) { # choice or seq local $_; my $cp; ($cp,$_) = extractcp($1); if ( m/^\s*$/ ) { # () with single member. return analysechildren($tag,$cp,$in,$out); } elsif ( m/^\s*([|,])\s*/m ) { my $type = "[$1]"; my $maybeempty = ($type eq '[,]'); my $state = $in; while ( m/^\s*$type\s*/ ) { $_ = $'; #' if ($type eq '[|]') { # $cp is choice $maybeempty = 'true' if analysechildren($tag,$cp,$in,$out); } else { # $cp is seq component my $oldstate = $state; $state = "S_$ctag{$tag}_" . (++$statecounter); $states{$tag} .= ",$state"; $maybeempty = undef unless analysechildren($tag,$cp,$oldstate,$state); } ($cp,$_) = extractcp($_); } # Last cp needs special treatment in sequence. if ($type eq '[|]') { # $cp is choice $maybeempty = 'true' if analysechildren($tag,$cp,$in,$out); } else { # $cp is seq component $maybeempty = undef unless analysechildren($tag,$cp,$state,$out); } $emptytrans{$in} .= ($emptytrans{$in}?',':'') . $out if $maybeempty and $in ne $out; return $maybeempty unless $_; } } die place()."DTD element `$tag' has nonsense fragment `$_'.\n"; } # OPTIONS PROCESSING (explained in manual). # Parse options. $Use = "Usage: flexml [-ASHDvdqnLXV] [-s skel] [-T actbin] [--sysid sysid] " . "[-p pubid] [-u uri] [-i init_header]\n" . " [-b stack_increment] [-r roottags] [-a actions] [-P prefix] name[.dtd]"; sub show_version { exit 0; } Getopt::Long::Configure ("bundling"); GetOptions( # Debugging and verbosity "debug|d" => \$debug, "verbose|v" => \$verbose, "quiet|q" => \$quiet_parser, # Version! "version|V" => sub { print "FleXML version FLEXMLVERSION.\n"; exit 0; }, # dry-run ? "dry-run|n" => \$dryrun, # Line numbers? "lineno|L" => \$lineno, # Exit without fail message? "nofail|X" => \$nofail, # Specific root tags? "root-tags|r=s" => sub { for (split ',',$_[1]) { $roottags{$_} = 'true'; } }, # Specific stack size? "stack-increment|b=s" => \$stacksize, # Specific tagprefix? "tag-prefix|P=s" => sub { $tagprefix = $_[1]."_" }, # Specific actbin? (internal use) "act-bin|T=s" => \$actbin, # Set skeleton scanner file name and check it is readable (if needed). "skel|s=s" => sub { $SKELETON = $_[1]; die "$0: No skeleton file $SKELETON.\n" if not -r $SKELETON and $_[1]; }, # Set document type URI and PUBID. "uri|u=s" => \$uri, "pubid|p=s" => \$pubid, "sysid=s" => \$sysid, # name of header file to be included in initial section of generated .l file "init_header|i=s" => \$init_header, # What to generate "header|H:s" => sub { $header = $_[1] || 'true' }, "dummy|D:s" => sub { $dummy = $_[1] || 'true' }, "stand-alone|A" => \$standalone, "scanner|S:s" => sub { $scanner = $_[1] || 'true' }, "actions|a=s" => \$actions ); print "FleXML version FLEXMLVERSION.\n" if $verbose; # Set DTD file name...and extract prefix for later my $prefix = $ARGV[0]; if (defined $prefix and $#ARGV == 0) { $prefix =~ s/\.dtd$//; $dtd = "$prefix.dtd"; # Require .dtd extension on DTD geturl($dtd); # Read the DTD $prefix =~ s|^([^:/]*[:/])*||; $cdtd = variablify($prefix); } else { die "$Use\n"; } # Selection options: If none of -SHDA specified then default to -SH. # Furthermore -a implies -D. $scanner = $header = 'true' unless ($scanner or $header or $dummy or $standalone); $dummy ||= $actions unless $standalone; # Set default (DTD-based) output file names. $SCANNER = (!defined($scanner)) || $scanner eq 'true' ? "$prefix.l" : $scanner; $HEADER = (!defined($header)) || $header eq 'true' ? "$prefix.h" : $header; $APPLICATION = (!defined($dummy)) || $dummy eq 'true' ? "$prefix-dummy.c" : $dummy; # Set actions=based output file names, if any. if ($ACTIONS = $actions) { $actions =~ s/\.[a-z]+$//; $APPLICATION = "$actions.c"; } # Stand-alone applications... if ($standalone) { die "$0: -A conflicts with -SHD.\n" if ($scanner or $header or $dummy); $SCANNER = $APPLICATION; $SCANNER =~ s/\.c$/.l/; } # Dry-run? if ($dryrun) { $standalone = $scanner = $header = $dummy = undef; } # PARSE DTD. print STDOUT "Processing DTD in $dtd.\n" if $verbose; $_ = ''; # Current entry while (@inputs) { # While there are lines... my $orig = "$_"; # current source line(s) # Skip spaces and complete comments (but save as source). do { # Skip blank lines. $_ = nextline() while @inputs and m/^\s*$/; # If we're looking at a parameter or external entity then expand it. if ( m/^\s*%($Name);/ ) { my $ent = $1; if ($parameter{$ent}) { expandparametersat('^'); } elsif ($external{$ent}) { $_ = $'; #' geturl($external{$ent}); } else { die "Unknown entity `%$ent;'\n"; } } # Skip (but save) comments. while ( m/^\s*/ or not @inputs) { $_ .= "\n" . nextline(); } # Extract first DTD version number... if ( not $dtdrevision and /\$(Id|Header|Revision): [^\$]*\$/ ) { $dtdrevision = "$&"; $dtdrevision =~ s/\s*\$\s*//go; } # Remove the comment to read on to next nonblank (but save as # source). $orig .= ($orig?"\n":"").$1 if s/^\s*()\s*//; } s/^\s*//; } until $_ or not @inputs; # If we're looking at a parameter or external entity then expand and retry. if ( m/^\s*%($Name);/ ) { my $ent = $1; if ($parameter{$ent}) { expandparametersat('^'); } elsif ($external{$ent}) { $_ = $'; #' geturl($external{$ent}); next; } else { die "Unknown entity `\%$ent;'\n"; } } die place()."Nonsense `$_'.\n" if /^[^<]/ or /^<[^!]/; # Read on until a full DTD or entry is available. until (m/^\s*]*>/o or m/^\s*<[?]([^?]|[?][^>])*[?]>/o or not @inputs) { my $line = nextline(); $orig .= ($orig?"\n":"") . $line; $_ .= "\n" . $line; } unless ( m/^\s*]*>/o or m/^\s*<[?]([^?]|[?][^>])*[?]>/o ) { last if not @inputs; die place()."Could not find end of declaration.\n"; } # Clean out in $orig. $orig =~ s/\n+/\n/g; $orig =~ s/^\n*//g; $orig =~ s/\n*$//g; print STDOUT " [$_]\n" if $debug; # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Processing instruction. if ( m/^\s*<[?]([^?]|[?][^>])*[?]>\s*/o ) { print STDERR place()."Warning: ignoring processing instruction $&.\n"; $_ = $'; #' } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Parse element declarations. elsif ( m/^]*)>\s*/xo ) { my ( $tag, $token_source ) = ( $1, $2 ); # strip off the matched code from the beginning of $_ $_ = $'; #' # place the newly found tag into the list of tags @tags die place()."Repeated element $tag.\n" if exists $source{$tag}; # ????? - most of the time $orig is undef $source{$tag} = "$orig"; push @tags, $tag; # Create C-friendly tag names. $ctag{$tag} = variablify($tag) unless $ctag{$tag}; my $c = $ctag{$tag}; # start looking at the token_source ($2) local $_ = $token_source; expandparametersat(''); s/^\s+//; # All elements should be followed by nothing when at the root. # IF there is a list of roottags ($0 -r ..,..), then only add # the exittrans if it is in the list $exittrans{$tag} .= ($exittrans{$tag}?',':'') . "ROOT_${tagprefix}$c=>EPILOG" if not %roottags or $roottags{$tag}; # Handle element declaration. if ( m/^EMPTY\s*$/o ) { $empty{$tag} = 'true'; $properempty{$tag} = 'true'; $states{$tag} = "E_$c"; $startstate{$tag} = "E_$c"; $endstates{$tag} = "E_$c"; } elsif ( m/^ANY\s*$/o ) { $any{$tag} = 'true'; $mixed{$tag} = 'true'; $empty{$tag} = 'true'; $states{$tag} = "IN_$c"; $startstate{$tag} = "IN_$c"; $endstates{$tag} = "IN_$c"; } elsif ( m/^\(\s*\#PCDATA\s*\)\s*$/o or m/^\(\s*\#PCDATA\s*((\|\s*$Name\s*)*)\)\*\s*$/xo ) { $mixed{$tag} = 'true'; $empty{$tag} = 'true'; if ($1) { my $desc = $1; $desc =~ s/^\s*\|\s*//o; for (split /\s*\|\s*/,$desc) { $instates{$_} .= ($instates{$_}?',':'') . "IN_$c"; $inmixed{$_} = 'true'; } } $states{$tag} = "IN_$c"; $startstate{$tag} = "IN_$c"; $endstates{$tag} = "IN_$c"; } else { $children{$tag} = 'true'; $statecounter = 0; $states{$tag} = "S_$c"; $startstate{$tag} = "S_$c"; $empty{$tag} = 'true' if analysechildren($tag,$_,"S_$c","E_$c"); $states{$tag} .= ",E_$c"; $endstates{$tag} = "E_$c"; } } # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - # Parse attribute declarations. elsif ( m/^]*)>\s*/o ) { $_ = $'; #' { my $tag = $1; local $_ = $2; expandparametersat(''); s/^\s+//; # Repeat while there are attribute declarations. while ( s/^($Name)\s+([A-Z]+|\(\s*$Nmtoken\s*(?:\|\s*$Nmtoken\s*)*\)) (?:\s+(\#IMPLIED|\#REQUIRED|(?:\#FIXED\s+)?$Literal))?\s*//xo ) { my ($attribute,$type,$default) = ($1,$2,$3); if ($atttype{"$tag/$attribute"}) { print place()."Warning: Redeclaration of element $tag attribute $attribute ignored.\n"; } else { if ($orig) { # to only print the source once once $source{"$tag/$attribute"} = "$orig"; $orig = ''; } $ctag{$tag} = variablify($tag) unless $ctag{$tag}; $catt{$attribute} = variablify($attribute) unless $catt{$attribute}; # Add atribute to the appropriate lists. $attlist{$tag} .= ($attlist{$tag}?',':'') . "$attribute"; if ($withattr{$attribute}) { $withattr{$attribute} .= ",$tag"; } else { push @attributes, $attribute; $withattr{$attribute} = "$tag"; } # Analyse default value. if (defined $default) { if ($default eq '#REQUIRED') { $required{"$tag/$attribute"} = 'true'; $default = undef; } elsif ($default eq '#IMPLIED') { $default = undef; } else { $fixed{"$tag/$attribute"} = 'true' if $default =~ s/\#FIXED\s+//o; $default =~ s/^'([^'']*)'$/$1/o unless $default =~ s/^"([^""]*)"$/$1/o; } } # Store attribute default string and type. $attdef{"$tag/$attribute"} = $default if $default; $atttype{"$tag/$attribute"} = $type; # Handle enumeration types... if ( $type =~ m/^\(((.|\n)*)\)$/x ) { local $_ = $1; s/\s+//go; s/\|/,/go; $enumtype{"$tag/$attribute"} = "$_"; s/$Nmtoken/ "A_${tagprefix}$ctag{$tag}_$catt{$attribute}_" . variablify($&) /xge; my $undefined = "AU_${tagprefix}$ctag{$tag}_$catt{$attribute}"; s/^/enum \{ $undefined, /o; s/$/ \}/o; $typeof{"$tag/$attribute"} = "$_"; if ($default) { $initof{"$tag/$attribute"} = "A_${tagprefix}$ctag{$tag}_$catt{$attribute}_" . variablify($default); } else { $initof{"$tag/$attribute"} = "$undefined"; } } # ...and string/token types. else { $typeof{"$tag/$attribute"} = 'int'; if ($default) { $initof{"$tag/$attribute"} = add_def_att($default); } else { $initof{"$tag/$attribute"} = '0'; } # Special treatment of token types. if ( $type eq 'ID' or $type eq 'IDREF' ) { $literaltype{"$tag/$attribute"} = '{Name}'; print STDERR place()."Warning: cannot validate attribute type `$type' (yet).\n"; } elsif ( $type eq 'IDREFS' ) { $literaltype{"$tag/$attribute"} = '{Names}'; print STDERR place()."Warning: cannot validate attribute type `$type' (yet).\n"; } elsif ( $type eq 'NMTOKEN' ) { $literaltype{"$tag/$attribute"} = '{Nmtoken}'; } elsif ( $type eq 'NMTOKENS' ) { $literaltype{"$tag/$attribute"} = '{Nmtokens}'; } elsif ( $type eq 'ENTITY' ) { #die place()."ENTITY attribute type unimplemented.\n"; $literaltype{"$tag/$attribute"} = '{Name}'; print STDERR place()."Warning: cannote validate attribute type `$type' (yet).\n"; } elsif ( $type eq 'ENTITIES' ) { #die place()."ENTITIES attribute type unimplemented.\n"; $literaltype{"$tag/$attribute"} = '{Names}'; print STDERR place()."Warning: cannot validate attribute type `$type' (yet).\n"; } elsif ( $type ne 'CDATA' ) { die place()."Unknown AttType `$type'.\n"; } } } expandparametersat('^'); s/^\s+//; # to expand next set of declarations... } die place()."Nonsense ($_) in attribute list.\n" if $_; } $orig = ''; # in case there were no attributes... } # end of a block # Parse internal parameter entity declaration. elsif ( m/^\s*\s*/xo or m/^\s*\s*/xo ) { $_ = $'; #' $source{"%$1;"} = "$orig"; $orig = ''; # cycle my $name = $1; local $_ = $2; die "Entity `%$name;' doubly defined.\n" if $parameter{$name} or $external{$name}; expandparametersat(''); s/^\s+//; s/\s+$//; s/\&\#([0-9]+|x[0-9a-fA-F]+);/ (substr($1,0,1) eq 'x' ? chr(hex(substr($1,1))) : chr($1)) /ge; $parameter{$name} = $_; } # Parse external parameter entity declaration. elsif ( m/^\s*/xo or m/^\s*/xo or m/^\s*/xo or m/^\s*/xo ) { $_ = $'; #' $source{"%$1;"} = "$orig"; $orig = ''; # cycle my $name = $1; die "Entity `%$name;' doubly defined.\n" if $parameter{$name} or $external{$name}; $external{$name} = $2; } # Parse internal general entity declarations. elsif ( /^\s*/xo ) { $_ = $'; #' $source{"&$1;"} = "$orig"; $orig = ''; # cycle my $name = $1; local $_ = $2; die "Entity `&$name;' doubly defined.\n" if $entity{$name}; s/\&\#([0-9]+|x[0-9a-fA-F]+);/ (substr($1,0,1) eq 'x' ? chr(hex(substr($1,1))) : chr($1)) /ge; $entity{$name} = cquote($_); } # Unrecognised declaration. else { die place()."Unrecognized declaration.\n"; } } # Post-process DTD. print STDOUT "Post-processing DTD.\n" if $verbose; # Add transitive empty transitions until none can be found. { my $changed = 'true'; while ($changed) { $changed = undef; for my $from (keys %emptytrans) { my %tos = (); for (split /,/,$emptytrans{$from}) { $tos{$_} = 'true'; } for my $to (keys %tos) { if (exists $emptytrans{$to}) { for my $next (split /,/,$emptytrans{$to}) { if (not $tos{$next}) { $changed = $tos{$next} = 'true'; } } } } $emptytrans{$from} = join ',',keys %tos; } } } # Complete all states with their `empty' equivalents. for my $tag (@tags) { # Complete and prune instates and endstates... my %ins = (); if (exists $instates{$tag}) { for (split ',',$instates{$tag}) { $ins{$_} = 'true'; } } my %ens = (); if (exists $endstates{$tag}) { for (split ',',$endstates{$tag}) { $ens{$_} = 'true'; } } # ...and exit transitions... my %exits = (); if (exists $exittrans{$tag}) { for (split ',',$exittrans{$tag}) { m/^($Name)=>($Name)$/o; $exits{$1} = $2; } } # Encode ANY as Mixed contents with all tags permitted. for (keys %any) { $ins{"IN_$_"} = 'true'; } # For each empty transition A->B add A where B occurs. for my $from (keys %emptytrans) { for my $to (split /,/,$emptytrans{$from}) { $ins{$from} = 'true' if $ins{$to}; $ens{$from} = 'true' if $ens{$to}; $exits{$from} = $exits{$to} if $exits{$to}; } } # Completion done...now store'em right back. $instates{$tag} = join ',', keys %ins if %ins; $endstates{$tag} = join ',', keys %ens if %ens; $exittrans{$tag} = join ',', map "$_=>$exits{$_}", keys %exits if %exits; } if (not %roottags) { for (@tags) { $roottags{$_} = 'true'; } } # Handling prefix if(length($tagprefix)) { my($h,$k); my(@hashlist) = (\%states,\%emptytrans,\%instates,\%startstate, \%endstates,\%exittrans); foreach $h (@hashlist) { foreach $k (keys (%$h)) { $$h{$k} =~ s/^E_/E_${tagprefix}/g; $$h{$k} =~ s/,E_/,E_${tagprefix}/g; $$h{$k} =~ s/>E_/>E_${tagprefix}/g; $$h{$k} =~ s/^S_/S_${tagprefix}/g; $$h{$k} =~ s/,S_/,S_${tagprefix}/g; $$h{$k} =~ s/>S_/>S_${tagprefix}/g; } } my %tmp = (); foreach $k (keys %allstates) { $k =~ s/^E_/E_${tagprefix}/; $k =~ s/^S_/S_${tagprefix}/; $tmp{$k} = 1; } %allstates = %tmp; } # Debugging: show DTD representation. if ($debug) { print STDOUT "DTD debug output:\n" if $verbose; sub printhash { my ($name) = @_; my ($k,$v); my $out = ''; while ( ($k, $v) = each(%$name) ) { $out .= "\n $k => '" . ($v || "undef")."'"; } return $out; } # display the options we got print "debug=".($debug||'undef')."\n"; print "verbose=".($verbose||'undef')."\n"; print "quiet=".($quiet_parser||'undef')."\n"; print "dry-run=".($dryrun||'undef')."\n"; print "lineno=".($lineno||'undef')."\n"; print "nofail=".($nofail||'undef')."\n"; print "stack-increment=".($stacksize||'undef')."\n"; print "tag-prefix=".($tagprefix||'undef')."\n"; print "act-bin=".($actbin||'undef')."\n"; print "skel=".($SKELETON||'undef')."\n"; print "uri=".($uri||'undef')."\n"; print "pubid=".($pubid||'undef')."\n\n"; print "sysid=".($sysid||'undef')."\n\n"; print "header=".($header||'undef')."\n"; print "dummy=".($dummy||'undef')."\n"; print "stand-alone=".($standalone||'undef')."\n"; print "scanner=".($scanner||'undef')."\n"; print "actions=".($actions||'undef')."\n\n"; print '%source = (' . printhash(\%source) . ")\n"; print "\n"; print '@tags = (' . join(',',@tags) . ")\n"; print '%ctag = (' . printhash(\%ctag) . ")\n"; print '%states = (' . printhash(\%states) . ")\n"; print '%instates = (' . printhash(\%instates) . ")\n"; print '%endstates = (' . printhash(\%endstates) . ")\n"; print '%emptytrans = (' . printhash(\%emptytrans) . ")\n"; print '%exittrans = (' . printhash(\%exittrans) . ")\n"; print '%roottags = (' . printhash(\%roottags) . ")\n"; print '%empty = (' . printhash(\%empty) . ")\n"; print '%properempty = (' . printhash(\%properempty) . ")\n"; print '%any = (' . printhash(\%any) . ")\n"; print '%mixed = (' . printhash(\%mixed) . ")\n"; print '%children = (' . printhash(\%children) . ")\n"; print '%inmixed = (' . printhash(\%inmixed) . ")\n"; print "\n"; print '@attributes = (' . join(',',@attributes) . ")\n"; print '%catt = (' . printhash(\%catt) . ")\n"; print '%atttype = (' . printhash(\%atttype) . ")\n"; print '%enumtype = (' . printhash(\%enumtype) . ")\n"; print '%literaltype = (' . printhash(\%literaltype) . ")\n"; print '%typeof = (' . printhash(\%typeof) . ")\n"; print '%attdef = (' . printhash(\%attdef) . ")\n"; print '%required = (' . printhash(\%required) . ")\n"; print '%fixed = (' . printhash(\%fixed) . ")\n"; print '%initof = (' . printhash(\%initof) . ")\n"; print '%attlist = (' . printhash(\%attlist) . ")\n"; print '%withattr = (' . printhash(\%withattr) . ")\n"; print "\n"; print '%entity = (' . printhash(\%entity) . ")\n"; print '%parameter = (' . printhash(\%parameter) . ")\n"; print '%external = (' . printhash(\%external) . ")\n"; } # WRITE API HEADER (if requested). if ($header) { print STDOUT "Generating XML processor header in `$HEADER'.\n" if $verbose; open HEADER, "+>$HEADER" || die "$0: cannot write $HEADER: $!\n"; select HEADER; # Identification and license. print "/* XML processor/application API for $dtd" . ($dtdrevision ? " ($dtdrevision)" : "") . ".\n"; print " * Generated " . `date +'%Y/%m/%d %T.'`; print " *\n"; redistribute(" *"); print " */\n"; print "\n"; # Output the declarations safeguarded againts repeated loading. print "#ifndef _FLEXML_${cdtd}_H\n"; print "#define _FLEXML_${cdtd}_H\n"; print "\n"; api_functions('extern ',';'); print "\n"; api_types(); print "\n"; api_data('extern '); print "\n"; print "/* XML application utilities. */\n"; print "extern int ${tagprefix}element_context(int);\n"; print "\n"; print "/* XML processor entry point. */\n"; print "extern int yylex(void);\n"; print "\n"; print "/* Flexml error handling function (useful only when -q flag passed to flexml) */\n"; print "const char * ${tagprefix}parse_err_msg(void);\n"; print "#endif\n"; close HEADER || die "$0: cannot read $HEADER: $!\n"; } # WRITE XML PROCESSOR (if requested). if ($scanner or $standalone) { print STDOUT "Writing XML processor" . ($actions || $standalone ? " and application" : "") . " onto `$SCANNER'.\n" if $verbose; open SCANNER, "+>$SCANNER"|| die "$0: cannot write $SCANNER: $!\n"; select SCANNER; open (SKELETON) || die "$0: cannot read $SKELETON: $!\n"; # Identification and license. print "/* Validating XML processor for $dtd" . ($dtdrevision ? " ($dtdrevision)" : "") . ".\n"; print " * Generated " . `date +'%Y/%m/%d %T.'`; print " *\n"; redistribute(" *"); print " */\n"; print "\n"; # Skip initial comment. while () { last if m/^\%\{/; } print; # Copy body of skeleton scanner with substitutions... while () { if ( /^FLEXML_VERSION$/ ) { print "const char ${tagprefix}flexml_version[] = \"FLEXMLVERSION\";\n"; print "const char rcs_${cdtd}_dtd[] =\n" . " \"\$\" \"$dtdrevision \$\";\n" if $dtdrevision; } elsif ( /^FLEXML_DEFINITIONS$/ ) { print "#define DEBUG\n" if $debug; print "#define FLEXML_yylineno\n" if $lineno; print "#define FLEXML_NOFAIL\n" if $nofail; print "#define FLEXML_quiet_parser\n" if $quiet_parser; print "#ifndef FLEXML_BUFFERSTACKSIZE\n"; print "#define FLEXML_BUFFERSTACKSIZE $stacksize\n"; print "#endif\n"; print "#define FLEXML_NEED_BUFFERLIT\n" if (scalar(%literaltype) or ($#default_attributes >= 0)); print "\n"; if ($standalone) { api_functions('static ',';'); print "\n"; api_types(); print "\n"; api_data('static '); } else { print "/* XML processor api. */\n"; print "#include \"$HEADER\"\n\n" if ($header); api_data(''); } } elsif ( /^FLEXML_INCLUDE_INIT_HEADER$/ ) { if ($init_header) { print "/* User-supplied header */\n"; print "#include \"$init_header\"\n"; } } elsif ( /^FLEXML_FLEX_OPTIONS$/ ) { print "%option yylineno\n" if $lineno; print "%option debug\n" if $debug; print "%option nounput\n" if not %entity; } elsif ( /^FLEXML_START_CONDITIONS$/ ) { for (@tags) { my $c = $ctag{$_}; print "%x" . ($roottags{$_} ? " ROOT_${tagprefix}$c" : "") . " AL_${tagprefix}$c " . join(' ',split(',',$states{$_})) . "\n"; } } elsif ( /^FLEXML_EXTRA_DEFINITIONS$/ ) { print "%{\n"; print "/* State names. */\n"; print "const char* *${tagprefix}statenames=NULL;\n"; print "%}\n"; } elsif ( /^FLEXML_EXTRA_DEFINITIONS_INIT$/ ) { my ($state, $tag); print " /* FleXML_init */\n"; print " bnext = inext = 1;\n"; for my $att (@default_attributes) { print " ${tagprefix}bufferliteral('\\0', &bnext, \"$att\");\n"; } print " if(!${tagprefix}statenames) {${tagprefix}statenames= (const char **)calloc(IMPOSSIBLE,sizeof(char*));\n"; for ('PROLOG','DOCTYPE','EPILOG','INCOMMENT','INPI','VALUE1','VALUE2','CDATA') { print " ${tagprefix}statenames[$_] = NULL;\n"; } for my $tag (@tags) { my $c = $ctag{$tag}; print " ${tagprefix}statenames[ROOT_${tagprefix}$c] = NULL;\n" if $roottags{$tag}; print " ${tagprefix}statenames[AL_${tagprefix}$c] = NULL;\n"; for (split ',',$states{$tag}) { print " ${tagprefix}statenames[$_] = \"$tag\";\n"; } } print " }\n"; } elsif ( /^FLEXML_DOCTYPES$/ ) { $sysid = $sysid ? "(\"'$sysid'\"|\"\\\"$sysid\\\"\")" : ( $uri ? "(\"'$uri'\"|\"\\\"$uri\\\"\")" : "(\"'$dtd'\"|\"\\\"$dtd\\\"\")" ); for (keys %roottags) { my $c = $ctag{$_}; print " \"\" SET(ROOT_${tagprefix}$c);\n"; if ($pubid) { print " \"\" SET(ROOT_${tagprefix}$c);\n"; } } } elsif ( /^FLEXML_RULES$/ ) { # Dump all parameter entity declarations. for (keys %parameter) { printsource($_); } # Dump all start and empty tag recognition rules. for my $tag (@tags) { my $myctag = $ctag{$tag}; my @myattributes = (exists $attlist{$tag} ? split /,/,"$attlist{$tag}" : ()); my ($intag, $attribute); # Tag's source element and attribute declarations. printsource($tag); for my $attribute (@myattributes) { printsource("$tag/$attribute"); } # Build element exit transition command. my $exitswitch = ""; if (exists $exittrans{$tag}) { $exitswitch .= " switch (YY_START) {\n"; my %casesto = (); for (split /,/,$exittrans{$tag}) { if (m/^($Name)=>($Name)$/o) { $casesto{$2} .= "case $1: "; } } for (keys %casesto) { $exitswitch .= " $casesto{$_}SET($_); break;\n" } $exitswitch .= " }\n"; } # Misplaced start or empty tag unless ($nofail) { print "\n"; my %ins = (); foreach (keys %allstates) { $ins{$_} = 'false'; } if (exists $instates{$tag}) { for (split /,/,$instates{$tag}) { $ins{$_} = 'true'; } } my $str = '<' . join(',', grep {$ins{$_} eq 'false'} keys %ins); if ($str ne '<') { print "$str>\"<$tag\"{s} FAIL(\"Starting tag <$tag> is not allowed here.\");\n"; } } # Start or empty tag: initialise attribute list. print "\n"; if ($roottags{$tag}) { print "{ \n" . # "\"<$tag\"{s} {\n"; print ">\"<$tag\"{s} {\n"; for my $attribute (@myattributes) { print " AX_${tagprefix}${myctag}_$catt{$attribute} = " . $initof{"$tag/$attribute"} . ";\n"; print " ${tagprefix}${myctag}_$catt{$attribute}_isset = 0;\n"; } print " ENTER(AL_${tagprefix}$myctag); pushbuffer(0);\n"; print " }\n"; # print " . FAIL(\"Unexpected character `%c': `<$tag' expected.\",yytext[0]);\n"; # print " <> FAIL(\"Premature EOF: `<$tag' expected.\");\n"; # #unless $mixed{$tag} or $nofail; # print "}\n"; # Attribute list (of start or empty tag): print "\n"; print "{\n"; for my $attribute (@myattributes) { my $type; # set by conditions if ($type = $enumtype{"$tag/$attribute"}) { # - fixed enumeration attribute: generate one rule, if ($fixed{"$tag/$attribute"}) { print " \"$attribute\"{Eq}\"'" . $attdef{"$tag/$attribute"} . "'\"" . " |\n" . " \"$attribute\"{Eq}\"\\\"" . $attdef{"$tag/$attribute"} . "\\\"\"" . " A_${tagprefix}${myctag}_$catt{$attribute}" . " = " . $initof{"$tag/$attribute"} . ";\n"; } else { # - (non-fixed) enumeration attribute: generate a rule per value, for my $alternative (split /,/,$type) { print " \"$attribute\"{Eq}\"'$alternative'\"" . " |\n" . " \"$attribute\"{Eq}\"\\\"$alternative\\\"\"" . " A_${tagprefix}${myctag}_$catt{$attribute}" . " = A_${tagprefix}${myctag}_$catt{$attribute}_" . variablify($alternative) . ";\n"; } } } elsif ($fixed{"$tag/$attribute"}) { # - fixed (non-enumeration) attribute: generate one rule per literal form, print " \"$attribute\"{Eq}\"'" . $attdef{"$tag/$attribute"} . "'\"" . " |\n" . " \"$attribute\"{Eq}\"\\\"" . $attdef{"$tag/$attribute"} . "\\\"\"" . " AX_${tagprefix}${myctag}_$catt{$attribute}" . " = " . $initof{"$tag/$attribute"} . ";\n"; } elsif ($type = $literaltype{"$tag/$attribute"}) { # - (non-fixed) literal-type attribute: scan literal string directly, or print " \"$attribute\"{Eq}\'$type\' BUFFERLITERAL('\\\'',AX_${tagprefix}${myctag}_$catt{$attribute});\n"; print " \"$attribute\"{Eq}\\\"$type\\\" BUFFERLITERAL('\"',AX_${tagprefix}${myctag}_$catt{$attribute});\n"; } else { # - (non-fixed non-literal) attribute: scan string with entity expansion. print " \"$attribute\"{Eq}\\' if (${tagprefix}${myctag}_$catt{$attribute}_isset != 0) {FAIL(\"Multiple definition of attribute $attribute in <${tagprefix}${myctag}>\");} ${tagprefix}${myctag}_$catt{$attribute}_isset = 1; ENTER(VALUE1); BUFFERSET(AX_${tagprefix}${myctag}_$catt{$attribute});\n"; print " \"$attribute\"{Eq}\\\" if (${tagprefix}${myctag}_$catt{$attribute}_isset != 0) {FAIL(\"Multiple definition of attribute $attribute in <${tagprefix}${myctag}>\");} ${tagprefix}${myctag}_$catt{$attribute}_isset = 1; ENTER(VALUE2); BUFFERSET(AX_${tagprefix}${myctag}_$catt{$attribute});\n"; } print "\n"; } # # - the end of a start tag means to enter the contents after # checking that all required attributes were set. print " \">\" {\n"; for my $attribute (@myattributes) { if ($required{"$tag/$attribute"}) { print " if (!AX_${tagprefix}$ctag{$tag}_" . variablify($attribute) . ")" . " FAIL(\"Required attribute `$attribute' not set for `$tag' element.\");\n"; } } print " LEAVE; STag_${tagprefix}$myctag();" . (%inmixed ? ' pushbuffer('."${tagprefix}".'pcdata_ix);' : '') . ($mixed{$tag} ? 'pushbuffer('."${tagprefix}".'pcdata_ix); BUFFERSET('."${tagprefix}".'pcdata_ix);' : "${tagprefix}".'pcdata_ix = 0'). ";" . " ENTER($startstate{$tag});\n"; print " }\n"; # # - accept and handle empty tags straight away, if ($empty{$tag}) { print " \"/>\" {\n"; for my $attribute (@myattributes) { if ($required{"$tag/$attribute"}) { print " if (!AX_${tagprefix}$ctag{$tag}_" . variablify($attribute) . ")" . " FAIL(\"Required attribute `$attribute' not set for `$tag' element.\");\n"; } } print " LEAVE; STag_${tagprefix}$myctag();" . (%inmixed ? ' pushbuffer('."${tagprefix}".'pcdata_ix);' : '') . " ${tagprefix}".'pcdata_ix = 0;' . " ETag_${tagprefix}$myctag();" . (%inmixed ? " ${tagprefix}".'pcdata_ix = popbuffer();' : '') . " popbuffer(); /* attribute */\n"; # print $exitswitch; print " }\n"; } elsif (not $nofail) { print " \"/>\" FAIL(\"`$tag' element cannot be empty.\");\n"; } # # - spaces are skipped, and print " . FAIL(\"Unexpected character \`%c\' in attribute list of $tag element.\", yytext[0]);\n" unless $nofail; # # - other stuff is an error. print " {Name} FAIL(\"Bad attribute `%s' in `$tag' element start tag.\",yytext);\n" unless $nofail; print " <> FAIL(\"EOF in attribute list of `$tag' element.\");\n" unless $nofail; print "}\n"; # End tag. print "\n"; print "<$endstates{$tag}>{\n"; print " \"\" {\n"; print " LEAVE;\n"; print " BUFFERDONE;\n" if $mixed{$tag}; print " ETag_${tagprefix}$myctag();\n"; print " ${tagprefix}pcdata_ix = popbuffer();\n" if $mixed{$tag}; print " ${tagprefix}pcdata_ix = popbuffer();\n" if %inmixed; print " popbuffer(); /* attribute */\n"; print $exitswitch; print " }\n"; # Errors when expecting end tag. print " \"\" FAIL(\"Unexpected end-tag `%s': `' expected.\",yytext);\n" unless $nofail; print " . FAIL(\"Unexpected character `%c': `' expected.\",yytext[0]);\n" unless $mixed{$tag} or $nofail; print " <> FAIL(\"Premature EOF: `' expected.\");\n" unless $nofail; print "}\n"; # Errors when expecting root tag. if ($roottags{$tag} and $nofail) { print "\n"; print "{\n"; print " . FAIL(\"Unexpected character `%c': `$tag' element expected.\",yytext[0]);\n"; print " <> FAIL(\"EOF in prolog.\");\n"; print "}\n"; } } } elsif ( /FLEXML_MIXED([,>])/ ) { if (%mixed) { print "$`" . join(',', map("IN_$ctag{$_}", keys %mixed)) . "$1$'"; } else { print "$`IMPOSSIBLE$1$'"; } } elsif ( /FLEXML_NON_MIXED([,>])/ ) { my $sep = $`; for (@tags) { print $sep . ($roottags{$_} ? "ROOT_${tagprefix}$ctag{$_}," : "") . "AL_${tagprefix}$ctag{$_}"; print ",$states{$_}" if $properempty{$_} or $children{$_}; $sep = ','; } print "$1$'"; } elsif ( /FLEXML_COMMENTS([,>])/ ) { print "$`" . join(',', map(($roottags{$_} ? "ROOT_${tagprefix}$ctag{$_}," : "") . "AL_${tagprefix}$ctag{$_},$states{$_}", @tags)) . "$1$'"; } elsif ( /^FLEXML_ENTITIES$/ ) { # Process general entities. for my $ent (keys %entity) { printsource("%$ent;"); print " \"&$ent;\" ENTITYTEXT(\"" . $entity{$ent} . "\");\n"; } print " /* Non-defined standard entities... */\n"; print "\"&\" BUFFERPUTC('&');\n" unless $entity{"amp"}; print "\"<\" BUFFERPUTC('<');\n" unless $entity{"lt"}; print "\">\" BUFFERPUTC('>');\n" unless $entity{"gt"}; print "\"'\" BUFFERPUTC('\\\'');\n" unless $entity{"apos"}; print "\""\" BUFFERPUTC('\"');\n" unless $entity{"quot"}; } elsif ( /^FLEXML_FINAL$/ and not $nofail ) { # Catch-all error cases. for my $tag (@tags) { for (split ',',$states{$tag}) { print "<$_>{\n"; print " . FAIL(\"Unrecognized `%c' in $_.\",yytext[0]);\n"; print " [\\n] FAIL(\"Unrecognized newline in $_.\");\n"; print "}\n"; } } for ('PROLOG','DOCTYPE','EPILOG','INCOMMENT','INPI','VALUE1','VALUE2','CDATA','INITIAL','IMPOSSIBLE') { print "<$_>{\n"; print " . FAIL(\"Unrecognized `%c' in $_.\",yytext[0]);\n"; print " [\\n] FAIL(\"Unrecognized space in $_.\");\n"; print "}\n"; } } elsif ( $nofail and /FAIL\(/ ) { #ignore } else { s/"\$Id/"\$" "Id/; s/statenames/${tagprefix}statenames/g; s/element_context/${tagprefix}element_context/g; s/parse_err_msg/${tagprefix}parse_err_msg/g; s/rcs_flexml_skeleton/rcs_${tagprefix}flexml_skeleton/g; s/bufferliteral/${tagprefix}bufferliteral/g; s/bufferstack/${tagprefix}bufferstack/g; print; } } close SKELETON || die "$0: Cannot close $SKELETON: $!\n"; unless ($standalone) { close SCANNER || die "$0: Cannot close $SCANNER: $!\n"; } } # WRITE APPLICATION. if ($dummy) { print STDOUT "Writing XML" . ($actions ? "" : " dummy") . " application onto `$APPLICATION'.\n" if $verbose; open APPLICATION, "+>$APPLICATION" || die "$0: Cannot write $APPLICATION: $!\n"; select APPLICATION; # Identification and license. print "/* XML application for $dtd" . ($dtdrevision ? " ($dtdrevision)" : "") . ".\n"; print " * Includes actions from $ACTIONS.\n" if $ACTIONS; print " * Generated " . `date +'%Y/%m/%d %T.'`; print " *\n"; redistribute(" *"); print " */\n"; print "\n"; # Declarations. print "#include \"$HEADER\"\n"; print "\n"; } if ($dummy or $standalone) { # Get requested actions. if ($ACTIONS) { open ACTIONS, "$actbin $ACTIONS|" || die "$0: Cannot exec $actbin $ACTIONS: $!\n"; my ($tag,$attribute); my @myattributes; my $lineno = 0; my $isstart = undef; while () { ++$lineno; if ( m/^\#line ([0-9]+)/ ) { $lineno = $1; } elsif ( m/^void\s+STag_${tagprefix}($Name)\(void\)$/xo ) { $tag = $1; die "\"$ACTIONS\", line $lineno: Unknown element `$tag'.\n" unless $ctag{$tag}; $startok{$tag} = 'true'; @myattributes = (exists $withattr{$tag} ? split /,/,"$withattr{$tag}" : ()); $isstart = 'true'; } elsif ( m|^\}\s+\/\*\s+STag_${tagprefix}($Name)\s+\*\/$|xo ) { $tag = undef; @myattributes = (); $isstart = 'true'; } elsif ( m/^void\s+ETag_${tagprefix}($Name)\(void\)$/xo ) { $tag = $1; $endok{$1} = 'true'; $isstart = undef; } # Make function names C-friendly (idempotently!) s/(\s+[SE])Tag_($Name)\(/$1Tag_$ctag{$2}\(/xg; # Replace special annotations with C equivalents. if ($tag) { while ( s/\{($Name)\}/A_${tagprefix}$ctag{$tag}_$catt{$1}/x ) { die "\"$ACTIONS\", line $lineno: Unknown attribute `$1' for <$tag>.\n" if not $atttype{"$tag/$1"}; } while ( s/\{[!]($Name)\}/AU_${tagprefix}$ctag{$tag}_$catt{$1}/x ) { die "\"$ACTIONS\", line $lineno: Unknown attribute `$1' for <$tag>.\n" if not $atttype{"$tag/$1"}; } while ( s|\{($Name)=($Name)\}| "A_${tagprefix}$ctag{$tag}_$catt{$1}_" . variablify($2); |xe ) { my ($att,$elt) = ($1,$2); die "\"$ACTIONS\", line $lineno: Unknown attribute $1 for <$tag>.\n" if not $atttype{"$tag/$1"}; die "\"$ACTIONS\", line $lineno: Attribute $att does not have value $elt for <$tag>.\n" if not $enumtype{"$tag/$att"} =~ m/\b$elt\b/ ; } while ( s|\{\#(PCDATA)\}|${tagprefix}pcdata| ) { die "\"$ACTIONS\", line $lineno: {#PCDATA} only allowed in end tag.\n" if $isstart; die "\"$ACTIONS\", line $lineno: {#PCDATA} only permitted in end tag with Mixed contents.\n" if not $mixed{$tag}; } die "\"$ACTIONS\", line $lineno: Malformed annotation `$&' in <$tag> action.\n" if m|\{[^;\s]+\}|o; } print STDERR "Action: $_" if $debug; print $_; } close ACTIONS || die "$0: Cannot close pipe to $actbin: $!\n"; print "\n"; } # Fill up with dummy declarations for the remaining functions. api_functions('',' {}'); } if ($dummy) { close APPLICATION || die "$0: Cannot close $APPLICATION: $!\n"; } elsif ($standalone) { close SCANNER || die "$0: Cannot close $SCANNER: $!\n"; } =pod =head1 NAME flexml - generate validating XML processor and applications from DTD =head1 SYNOPSIS B [B<-ASHDvdnLXV>] [B<-s>I] [B<-p>I] [B<-i>I] [B<-u>I] [B<-r>I] [B<-a>I] I[F<.dtd>] =head1 DESCRIPTION I reads IF<.dtd> which must be a DTD (Document Type Definition) describing the format of XML (Extensible Markup Language) documents, and produces a "validating" XML I with an interface to support XML Is. Proper applications can be generated optionally from special "action files", either for linking or textual combination with the processor. The generated processor will only validate documents that conform strictly to the DTD, I, more precisely we in practice restrict XML rule [28] to [28r] doctypedecl ::= '' where the C denotes the used DTD. (One might say, in fact, that I implements "non-extensible" markup. :) The generated processor is a I(1) scanner, by default named IF<.l> with a corresponding C header file IF<.h> for separate compilation of generated applications. Optionally I takes an I file with per-element actions and produces a C file with element functions for an XML application with entry points called from the XML processor (it can also fold the XML application into the XML processor to make stand-alone XML applications but this prevents sharing of the processor between applications). In LZ<> we list the possible options, in LZ<> we explain how to write applications, in L we explain how to compile produced processors and applications into executables, and in L we list the current limitations of the system before giving standard references. =head1 OPTIONS I takes the following options. =over 4 =item B<--stand-alone>, B<-A> Generate a I scanner application. If combined with B<-a>I then the application will be named as I with the extension replaced by F<.l>, otherwise it will be in IF<.l>. Conflicts with B<-S>, B<-H>, and B<-D>. =item B<--actions> I, B<-a> I Uses the I file to produce an XML application in the file with the same name as I after replacing the extension with F<.c>. If combined with B<-A> then instead the stand-alone application will include the action functions. =item B<--dummy> B<[>IB<]>, B<-D> B<[>IB<]> Generate a dummy application with just empty functions to be called by the XML processor. If I is not specified on the command line, it defaults to IF<-dummy.c>. If combined with B<-a> I then the application will insert the specified actions and be named as I with the extension replaced by F<.c>. Conflicts with B<-A>; implied by B<-a> unless either of B<-SHD> is specified. =item B<--debug>, B<-d> Turns on debug mode in the flex scanner and also prints out the details of the DTD analysis performed by I. =item B<--header> B<[>IB<]>, B<-H> B<[>IB<]> Generate the header file. If the I is not specified on the command line, defaults to IF<.h>. Conflicts with B<-A>; on by default if none of B<-SHD> specified. =item B<--lineno>, B<-L> Makes the XML processor (as produced by I(1)) count the lines in the input and keep it available to XML application actions in the integer C. (This is off by default as the performance overhead is significant.) =item B<--quiet>, B<-q> Prevents the XML processor (as produced by I(1)) from reporting the error it runs into on stderr. Instead, users will have to pool for error messages with the parse_err_msg() function. By default, error messages are written on stderr. =item B<--dry-run>, B<-n> "Dry-run": do not produce any of the output files. =item B<--pubid> I, B<-p> I Sets the document type to be C with the identifier I instead of C, the default. =item B<--init_header> I, B<-i> I Puts a line containing C<#include "init_header"> in the C<%{...%}> section at the top of the generated .l file. This may be useful for making various flex C<#define>s, for example C or C. =item B<--sysid>=I Overrides the C id of the accepted DTD. Sometimes useful when your dtd is placed in a subdirectory. =item B<--root-tags> I, B<-r> I Restricts the XML processor to validate only documents with one of the root elements listed in the comma-separated I. =item B<--scanner> B<[>IB<]>, B<-S> B<[>IB<]> Generate the scanner. If I is not given on command line, it defaults to IF<.l>. Conflicts with B<-A>; on by default if none of B<-SHD> specified. =item B<--skel> I, B<-s> I Use the skeleton scanner I instead of the default. =item B<--act-bin> I, B<-T> I This is an internal option mainly used to test versions of flexml not installed yet. =item B<--stack-increment> I, B<-b> I Sets the FLEXML_BUFFERSTACKSIZE to stack_increment (100000 by default). This controls how much the data stack grows in each realloc(). =item B<--tag-prefix> I, B<-O> I Use STRING to differentiate multiple versions of flexml in the same C code, just like the -P flex argument. =item B<--uri> I, B<-u> I Sets the URI of the DTD, used in the C header, to the specified I (the default is the DTD name). =item B<--verbose>, B<-v> Be verbose: echo each DTD declaration (after parameter expansion). =item B<--version>, B<-V> Print the version of I and exit. =back =head1 ACTION FILE FORMAT Action files, passed to the B<-a> option, are XML documents conforming to the DTD F which is the following: The elements should be used as follows: =over 4 =item C Use for top-level C code such as global declarations, utility functions, etc. =item C Attaches the code as an action to the element with the name of the required "C" attribute. The "C<%C-code;>" component should be C code suitable for inclusion in a C block (i.e., within C<{>...C<}> so it may contain local variables); furthermore the following extensions are available: C<{>IC<}>: Can be used to access the value of the I as set with IC<=>I in the start tag. In C, C<{>IC<}> will be interpreted depending on the declaration of the attribute. If the attribute is declared as an enumerated type like then the C attribute value is of an enumerated type with the elements written C<{>IC<=>IC<}>, C<{>IC<=>IC<}>, etc.; furthermore an I attribute has the "value" C<{!>IC<}>. If the attribute is not an enumeration then C<{>IC<}> is a null-terminated C string (of type C) and C<{!>IC<}> is C. =item C Similarly attaches the code as an action to the end tag with the name of the required "C" attribute; also here the "C<%C-code;>" component should be C code suitable for inclusion in a C block. In case the element has "Mixed" contents, i.e, was declared to permit C<#PCDATA>, then the following variable is available: C<{#PCDATA}>: Contains the text (C<#PCDATA>) of the element as a null-terminated C string (of type C). In case the Mixed contents element actually mixed text and child elements then C contains the plain concatenation of the text fragments as one string. =item C
Finally, an optional "C
" element can contain the C C
function of the XML application. Normally the C
function should include (at least) one call of the XML processor: C: Invokes the XML processor produced by I(1) on the XML document found on the standard input (actually the C file handle: see the manual for I(1) for information on how to change this as well as the name C). If no C
action is provided then the following is used: int main() { exit(yylex()); } =back It is advisable to use XML EC ... C<]]>E sections for the C code to make sure that all characters are properly passed to the output file. Finally note that I handles empty elements EICE as equivalent to EIEECIE. =head1 COMPILATION The following I(1) file fragment shows how one can compile I-generated programs: # Programs. FLEXML = flexml -v # Generate linkable XML processor with header for application. %.l %.h: %.dtd $(FLEXML) $< # Generate C source from flex scanner. %.c: %.l $(FLEX) -Bs -o"$@" "$<" # Generate XML application C source to link with processor. # Note: The dependency must be of the form "appl.c: appl.act proc.dtd". %.c: %.act $(FLEXML) -D -a $^ # Direct generation of stand-alone XML processor+application. # Note: The dependency must be of the form "appl.l: appl.act proc.dtd". %.l: %.act $(FLEXML) -A -a $^ =head1 BUGS The present version of I is to be considered in "early beta" state thus bugs should be expected (and the author would like to hear about them). Here are some known restrictions that we hope to overcome in the future: =over 4 =item * The character set is merely ASCII (actually I(1) handles 8 bit characters but only the ASCII character set is common with the XML default UTF-8 encoding). =item * C type attributes are not validated for uniqueness; C and C attributes are not validated for existence. =item * The C and C attribute types are not supported. =item * C declarations are not supported. =item * The various C-attributes are treated like any other attributes; in particular C should be supported. =item * The DTD parser is presently a perl hack so it may parse some DTDs badly; in particular the expansion of parameter entities may not conform fully to the XML specification. =item * A child should be able to "return" a value for the parent (also called a I). Similarly an element in Mixed contents should be able to inject text into the C of the parent. =back =head1 FILES =over 4 =item F<./skel> The skeleton scanner with the generic parts of XML scanning. =item F License, further documentation, and examples. =back =head1 SEE ALSO I(1), Extensible Markup Language (XML) 1.0 (W3C Recommendation REC-xml-1998-0210). =head1 AUTHOR I was written by Kristoffer Rose, ECE. =head1 COPYRIGHT The program is Copyright (c) 1999 Kristoffer Rose (all rights reserved) and distributed under the GNU General Public License (GPL, also known as "copyleft", which clarifies that the author provides absolutely no warranty for I and ensures that I is and will remain available for all uses, even comercial). =head1 ACKNOWLEDGEMENT I am grateful to NTSys (France) for supporting the development of I. Finally extend my severe thanks to Jef Poskanzer, Vern Paxson, and the rest of the I maintainers and GNU developers for a great tool. =cut flexml-1.9.6/README0000644000175000017500000000134312101762535014205 0ustar mquinsonmquinsonThis directory contains the sources of FleXML, a toolkit to build fast validating XML processors and applications. It is presently in an `early beta' state but definitely useful for production of simple but fast XML processors. Documentation is in FleXML.html, paper.html, and at the end of flexml.pl (from which flexml.1 and flexml.html may be generated). The Makefile is configured for Debian GNU/Linux so you may have to change the setup; similarly you will probably have to change /usr/bin/perl at the head of `flexml.pl' to point yo your perl5 executable. The toolkit produces scanners for flex(1) which you should thus install. Enjoy, Kristoffer Rose pgp f-p: A4D3 5BD7 3EC5 7CA2 924E D21D 126B B8E0 flexml-1.9.6/TODO0000644000175000017500000000052012101762535014011 0ustar mquinsonmquinson1. -- this space intentionally left blank -- 2. Rework scanner to get rid of *all* flex dummy rule fallbacks. 3. ID type attributes should be validated for uniqueness. 4. IDREF and IDREFS attributes should be validated. 5. ENTITY and ENTITIES attribute types should be validated. 6. The xml:spaces pragma attrbute should be supported. flexml-1.9.6/examples/0000755000175000017500000000000012101762535015142 5ustar mquinsonmquinsonflexml-1.9.6/examples/Makefile0000644000175000017500000001076012101762535016606 0ustar mquinsonmquinson# Make(1) rules for FleXML XML processor generator system. # Copyright (c) 1999 Kristoffer Rose. All rights reserved. # # This file is part of the FleXML XML processor generator system. # Copyright (c) 1999 Kristoffer Rose. All rights reserved. # # This program is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by the Free # Software Foundation; either version 2 of the License, or (at your option) # any later version. # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for # more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., 59 # Temple Place, Suite 330, Boston, MA 02111-1307 USA. # $Id: Makefile,v 1.9 2006/09/29 19:40:46 wdowling Exp $ # FILES. include ../Makefile.defs # Test the version in this dir, not the installed one SKEL=../skel ACT=../flexml-act SAMPS = my.dtd my-show.act my-joke.xml my-joke2.xml my-joke3.xml \ tricky.dtd tricky.act tricky.xml \ test.html xhtml1-transitional.dtd SRC = $(SAMPS) ALL = $(SAMPS) .PHONY: all install dist test clean # PRIMARY TARGETS. start: test all: $(ALL) install: $(ALL) mkdir -p $(DESTDIR)$(DOCDIR)/examples $(INSTALL) -m444 $(SAMPS) $(DESTDIR)$(DOCDIR)/examples/ dist: clean #rsync -v FleXML.html $(WEBHOME)/FleXML.html #rsync -va --cvs-exclude --delete-excluded ./ $(FTPHOME)/ clean::; @echo "Cleaning examples..." $(RM) *.[olh1] *-dummy.? lex.* *~ ./#* test:: all @echo "Testing..." # DEFAULT RULES. FLEXML_PROG = ../$(FLEXML) -s$(SKEL) -T$(ACT) # Generate C source from flex scanner. %.c: %.l $(FLEX) -B -s -v $(FLEXDEBUG) -o$@ $< # Direct generation of stand-alone XML processor+application. # Note: The dependency must be of the form "appl.l: appl.act proc.dtd". %.l: %.act $(FLEXML_PROG) $(FLEXDEBUG) -vA -a $^ # Generate XML processor to link with application. %.l %.h: %.dtd $(FLEXML_PROG) $(FLEXDEBUG) -v $< # Generate XML application C source to compile and link with processor. # Note: The dependency must be of the form "appl.c: appl.act proc.dtd". %.c: %.act $(FLEXML_PROG) $(FLEXDEBUG) -vD -a $^ clean::; $(RM) flexml-act flexml-act.c # Example: LINK processor with application: my.l my.h: my.dtd my.c: my.l my.o: my.c my.h my-show.c: my-show.act my.dtd my-show.o: my-show.c my.h my-show: my-show.o my.o test:: my-show @echo "XXXXX Test: LINK processor with application" if ./my-show values. xhtml1-transitional.l xhtml1-transitional.h: xhtml1-transitional.dtd $(FLEXML_PROG) -rhtml \ -p "-//IETF//DTD XHTML 1.0 Transitional//EN" \ -u "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd" \ xhtml1-transitional.dtd xhtml1-transitional.c: xhtml1-transitional.l $(FLEX) -B -s -v -Ca -oxhtml1-transitional.c xhtml1-transitional.l # turn off the -O2 when building this -- it takes too long to run xhtml1-transitional.o: CFLAGS = -g xhtml1-transitional.o: xhtml1-transitional.c xhtml1-transitional.h xhtml-href.c: xhtml-href.act xhtml1-transitional.dtd xhtml-href.o: xhtml-href.c xhtml1-transitional.h xhtml-href: xhtml-href.o xhtml1-transitional.o test:: xhtml-href FleXML.xml @echo "XXXXX Test: application to print XHTML values" if ./xhtml-href /* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */ #ifdef c_plusplus #ifndef __cplusplus #define __cplusplus #endif #endif #ifdef __cplusplus #include #include /* Use prototypes in function declarations. */ #define YY_USE_PROTOS /* The "const" storage-class-modifier is valid. */ #define YY_USE_CONST #else /* ! __cplusplus */ #if __STDC__ #define YY_USE_PROTOS #define YY_USE_CONST #endif /* __STDC__ */ #endif /* ! __cplusplus */ #ifdef __TURBOC__ #pragma warn -rch #pragma warn -use #include #include #define YY_USE_CONST #define YY_USE_PROTOS #endif #ifdef YY_USE_CONST #define yyconst const #else #define yyconst #endif #ifdef YY_USE_PROTOS #define YY_PROTO(proto) proto #else #define YY_PROTO(proto) () #endif /* Returned upon end-of-file. */ #define YY_NULL 0 /* Promotes a possibly negative, possibly signed char to an unsigned * integer for use as an array index. If the signed char is negative, * we want to instead treat it as an 8-bit unsigned char, hence the * double cast. */ #define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) /* Enter a start condition. This macro really ought to take a parameter, * but we do it the disgusting crufty way forced on us by the ()-less * definition of BEGIN. */ #define BEGIN yy_start = 1 + 2 * /* Translate the current start state into a value that can be later handed * to BEGIN to return to the state. The YYSTATE alias is for lex * compatibility. */ #define YY_START ((yy_start - 1) / 2) #define YYSTATE YY_START /* Action number for EOF rule of a given start state. */ #define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) /* Special action meaning "start processing a new file". */ #define YY_NEW_FILE yyrestart( yyin ) #define YY_END_OF_BUFFER_CHAR 0 /* Size of default input buffer. */ #define YY_BUF_SIZE 16384 typedef struct yy_buffer_state *YY_BUFFER_STATE; extern int yyleng; extern FILE *yyin, *yyout; #define EOB_ACT_CONTINUE_SCAN 0 #define EOB_ACT_END_OF_FILE 1 #define EOB_ACT_LAST_MATCH 2 /* The funky do-while in the following #define is used to turn the definition * int a single C statement (which needs a semi-colon terminator). This * avoids problems with code like: * * if ( condition_holds ) * yyless( 5 ); * else * do_something_else(); * * Prior to using the do-while the compiler would get upset at the * "else" because it interpreted the "if" statement as being all * done when it reached the ';' after the yyless() call. */ /* Return all but the first 'n' matched characters back to the input stream. */ #define yyless(n) \ do \ { \ /* Undo effects of setting up yytext. */ \ *yy_cp = yy_hold_char; \ YY_RESTORE_YY_MORE_OFFSET \ yy_c_buf_p = yy_cp = yy_bp + n - YY_MORE_ADJ; \ YY_DO_BEFORE_ACTION; /* set up yytext again */ \ } \ while ( 0 ) #define unput(c) yyunput( c, yytext_ptr ) /* The following is because we cannot portably get our hands on size_t * (without autoconf's help, which isn't available because we want * flex-generated scanners to compile on their own). */ typedef unsigned int yy_size_t; struct yy_buffer_state { FILE *yy_input_file; char *yy_ch_buf; /* input buffer */ char *yy_buf_pos; /* current position in input buffer */ /* Size of input buffer in bytes, not including room for EOB * characters. */ yy_size_t yy_buf_size; /* Number of characters read into yy_ch_buf, not including EOB * characters. */ int yy_n_chars; /* Whether we "own" the buffer - i.e., we know we created it, * and can realloc() it to grow it, and should free() it to * delete it. */ int yy_is_our_buffer; /* Whether this is an "interactive" input source; if so, and * if we're using stdio for input, then we want to use getc() * instead of fread(), to make sure we stop fetching input after * each newline. */ int yy_is_interactive; /* Whether we're considered to be at the beginning of a line. * If so, '^' rules will be active on the next match, otherwise * not. */ int yy_at_bol; /* Whether to try to fill the input buffer when we reach the * end of it. */ int yy_fill_buffer; int yy_buffer_status; #define YY_BUFFER_NEW 0 #define YY_BUFFER_NORMAL 1 /* When an EOF's been seen but there's still some text to process * then we mark the buffer as YY_EOF_PENDING, to indicate that we * shouldn't try reading from the input source any more. We might * still have a bunch of tokens to match, though, because of * possible backing-up. * * When we actually see the EOF, we change the status to "new" * (via yyrestart()), so that the user can continue scanning by * just pointing yyin at a new input file. */ #define YY_BUFFER_EOF_PENDING 2 }; static YY_BUFFER_STATE yy_current_buffer = 0; /* We provide macros for accessing buffer states in case in the * future we want to put the buffer states in a more general * "scanner state". */ #define YY_CURRENT_BUFFER yy_current_buffer /* yy_hold_char holds the character lost when yytext is formed. */ static char yy_hold_char; static int yy_n_chars; /* number of characters read into yy_ch_buf */ int yyleng; /* Points to current character in buffer. */ static char *yy_c_buf_p = (char *) 0; static int yy_init = 1; /* whether we need to initialize */ static int yy_start = 0; /* start state number */ /* Flag which is used to allow yywrap()'s to do buffer switches * instead of setting up a fresh yyin. A bit of a hack ... */ static int yy_did_buffer_switch_on_eof; void yyrestart YY_PROTO(( FILE *input_file )); void yy_switch_to_buffer YY_PROTO(( YY_BUFFER_STATE new_buffer )); void yy_load_buffer_state YY_PROTO(( void )); YY_BUFFER_STATE yy_create_buffer YY_PROTO(( FILE *file, int size )); void yy_delete_buffer YY_PROTO(( YY_BUFFER_STATE b )); void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, FILE *file )); void yy_flush_buffer YY_PROTO(( YY_BUFFER_STATE b )); #define YY_FLUSH_BUFFER yy_flush_buffer( yy_current_buffer ) YY_BUFFER_STATE yy_scan_buffer YY_PROTO(( char *base, yy_size_t size )); YY_BUFFER_STATE yy_scan_string YY_PROTO(( yyconst char *yy_str )); YY_BUFFER_STATE yy_scan_bytes YY_PROTO(( yyconst char *bytes, int len )); static void *yy_flex_alloc YY_PROTO(( yy_size_t )); static void *yy_flex_realloc YY_PROTO(( void *, yy_size_t )); static void yy_flex_free YY_PROTO(( void * )); #define yy_new_buffer yy_create_buffer #define yy_set_interactive(is_interactive) \ { \ if ( ! yy_current_buffer ) \ yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \ yy_current_buffer->yy_is_interactive = is_interactive; \ } #define yy_set_bol(at_bol) \ { \ if ( ! yy_current_buffer ) \ yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \ yy_current_buffer->yy_at_bol = at_bol; \ } #define YY_AT_BOL() (yy_current_buffer->yy_at_bol) typedef unsigned char YY_CHAR; FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; typedef int yy_state_type; extern char *yytext; #define yytext_ptr yytext static yy_state_type yy_get_previous_state YY_PROTO(( void )); static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state )); static int yy_get_next_buffer YY_PROTO(( void )); static void yy_fatal_error YY_PROTO(( yyconst char msg[] )); /* Done after the current pattern has been matched and before the * corresponding action - sets up yytext. */ #define YY_DO_BEFORE_ACTION \ yytext_ptr = yy_bp; \ yyleng = (int) (yy_cp - yy_bp); \ yy_hold_char = *yy_cp; \ *yy_cp = '\0'; \ yy_c_buf_p = yy_cp; #define YY_NUM_RULES 2 #define YY_END_OF_BUFFER 3 static yyconst short int yy_accept[9] = { 0, 0, 0, 3, 2, 2, 0, 1, 0 } ; static yyconst int yy_ec[256] = { 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } ; static yyconst int yy_meta[4] = { 0, 1, 2, 3 } ; static yyconst short int yy_base[12] = { 0, 6, 5, 6, 9, 0, 0, 9, 9, 0, 2, 2 } ; static yyconst short int yy_def[12] = { 0, 9, 9, 8, 8, 10, 11, 8, 0, 8, 8, 8 } ; static yyconst short int yy_nxt[13] = { 0, 4, 4, 4, 7, 6, 8, 5, 5, 3, 8, 8, 8 } ; static yyconst short int yy_chk[13] = { 0, 9, 9, 9, 11, 10, 3, 2, 1, 8, 8, 8, 8 } ; static yy_state_type yy_last_accepting_state; static char *yy_last_accepting_cpos; /* The intent behind this definition is that it'll catch * any uses of REJECT which flex missed. */ #define REJECT reject_used_but_not_detected #define yymore() yymore_used_but_not_detected #define YY_MORE_ADJ 0 #define YY_RESTORE_YY_MORE_OFFSET char *yytext; #line 1 "test.l" #define INITIAL 0 #line 368 "test.c" /* Macros after this point can all be overridden by user definitions in * section 1. */ #ifndef YY_SKIP_YYWRAP #ifdef __cplusplus extern "C" int yywrap YY_PROTO(( void )); #else extern int yywrap YY_PROTO(( void )); #endif #endif #ifndef YY_NO_UNPUT static void yyunput YY_PROTO(( int c, char *buf_ptr )); #endif #ifndef yytext_ptr static void yy_flex_strncpy YY_PROTO(( char *, yyconst char *, int )); #endif #ifdef YY_NEED_STRLEN static int yy_flex_strlen YY_PROTO(( yyconst char * )); #endif #ifndef YY_NO_INPUT #ifdef __cplusplus static int yyinput YY_PROTO(( void )); #else static int input YY_PROTO(( void )); #endif #endif #if YY_STACK_USED static int yy_start_stack_ptr = 0; static int yy_start_stack_depth = 0; static int *yy_start_stack = 0; #ifndef YY_NO_PUSH_STATE static void yy_push_state YY_PROTO(( int new_state )); #endif #ifndef YY_NO_POP_STATE static void yy_pop_state YY_PROTO(( void )); #endif #ifndef YY_NO_TOP_STATE static int yy_top_state YY_PROTO(( void )); #endif #else #define YY_NO_PUSH_STATE 1 #define YY_NO_POP_STATE 1 #define YY_NO_TOP_STATE 1 #endif #ifdef YY_MALLOC_DECL YY_MALLOC_DECL #else #if __STDC__ #ifndef __cplusplus #include #endif #else /* Just try to get by without declaring the routines. This will fail * miserably on non-ANSI systems for which sizeof(size_t) != sizeof(int) * or sizeof(void*) != sizeof(int). */ #endif #endif /* Amount of stuff to slurp up with each read. */ #ifndef YY_READ_BUF_SIZE #define YY_READ_BUF_SIZE 8192 #endif /* Copy whatever the last rule matched to the standard output. */ #ifndef ECHO /* This used to be an fputs(), but since the string might contain NUL's, * we now use fwrite(). */ #define ECHO (void) fwrite( yytext, yyleng, 1, yyout ) #endif /* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, * is returned in "result". */ #ifndef YY_INPUT #define YY_INPUT(buf,result,max_size) \ if ( yy_current_buffer->yy_is_interactive ) \ { \ int c = '*', n; \ for ( n = 0; n < max_size && \ (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ buf[n] = (char) c; \ if ( c == '\n' ) \ buf[n++] = (char) c; \ if ( c == EOF && ferror( yyin ) ) \ YY_FATAL_ERROR( "input in flex scanner failed" ); \ result = n; \ } \ else if ( ((result = fread( buf, 1, max_size, yyin )) == 0) \ && ferror( yyin ) ) \ YY_FATAL_ERROR( "input in flex scanner failed" ); #endif /* No semi-colon after return; correct usage is to write "yyterminate();" - * we don't want an extra ';' after the "return" because that will cause * some compilers to complain about unreachable statements. */ #ifndef yyterminate #define yyterminate() return YY_NULL #endif /* Number of entries by which start-condition stack grows. */ #ifndef YY_START_STACK_INCR #define YY_START_STACK_INCR 25 #endif /* Report a fatal error. */ #ifndef YY_FATAL_ERROR #define YY_FATAL_ERROR(msg) yy_fatal_error( msg ) #endif /* Default declaration of generated scanner - a define so the user can * easily add parameters. */ #ifndef YY_DECL #define YY_DECL int yylex YY_PROTO(( void )) #endif /* Code executed at the beginning of each rule, after yytext and yyleng * have been set up. */ #ifndef YY_USER_ACTION #define YY_USER_ACTION #endif /* Code executed at the end of each rule. */ #ifndef YY_BREAK #define YY_BREAK break; #endif #define YY_RULE_SETUP \ YY_USER_ACTION YY_DECL { register yy_state_type yy_current_state; register char *yy_cp, *yy_bp; register int yy_act; #line 1 "test.l" #line 522 "test.c" if ( yy_init ) { yy_init = 0; #ifdef YY_USER_INIT YY_USER_INIT; #endif if ( ! yy_start ) yy_start = 1; /* first start state */ if ( ! yyin ) yyin = stdin; if ( ! yyout ) yyout = stdout; if ( ! yy_current_buffer ) yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); yy_load_buffer_state(); } while ( 1 ) /* loops until end-of-file is reached */ { yy_cp = yy_c_buf_p; /* Support of yytext. */ *yy_cp = yy_hold_char; /* yy_bp points to the position in yy_ch_buf of the start of * the current run. */ yy_bp = yy_cp; yy_current_state = yy_start; yy_match: do { register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)]; if ( yy_accept[yy_current_state] ) { yy_last_accepting_state = yy_current_state; yy_last_accepting_cpos = yy_cp; } while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) { yy_current_state = (int) yy_def[yy_current_state]; if ( yy_current_state >= 9 ) yy_c = yy_meta[(unsigned int) yy_c]; } yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; ++yy_cp; } while ( yy_base[yy_current_state] != 9 ); yy_find_action: yy_act = yy_accept[yy_current_state]; if ( yy_act == 0 ) { /* have to back up */ yy_cp = yy_last_accepting_cpos; yy_current_state = yy_last_accepting_state; yy_act = yy_accept[yy_current_state]; } YY_DO_BEFORE_ACTION; do_action: /* This label is used only to access EOF actions. */ switch ( yy_act ) { /* beginning of action switch */ case 0: /* must back up */ /* undo the effects of YY_DO_BEFORE_ACTION */ *yy_cp = yy_hold_char; yy_cp = yy_last_accepting_cpos; yy_current_state = yy_last_accepting_state; goto yy_find_action; case 1: YY_RULE_SETUP #line 3 "test.l" printf("<<'a'>>"); YY_BREAK case 2: YY_RULE_SETUP #line 5 "test.l" ECHO; YY_BREAK #line 615 "test.c" case YY_STATE_EOF(INITIAL): yyterminate(); case YY_END_OF_BUFFER: { /* Amount of text matched not including the EOB char. */ int yy_amount_of_matched_text = (int) (yy_cp - yytext_ptr) - 1; /* Undo the effects of YY_DO_BEFORE_ACTION. */ *yy_cp = yy_hold_char; YY_RESTORE_YY_MORE_OFFSET if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_NEW ) { /* We're scanning a new file or input source. It's * possible that this happened because the user * just pointed yyin at a new source and called * yylex(). If so, then we have to assure * consistency between yy_current_buffer and our * globals. Here is the right place to do so, because * this is the first action (other than possibly a * back-up) that will match for the new input source. */ yy_n_chars = yy_current_buffer->yy_n_chars; yy_current_buffer->yy_input_file = yyin; yy_current_buffer->yy_buffer_status = YY_BUFFER_NORMAL; } /* Note that here we test for yy_c_buf_p "<=" to the position * of the first EOB in the buffer, since yy_c_buf_p will * already have been incremented past the NUL character * (since all states make transitions on EOB to the * end-of-buffer state). Contrast this with the test * in input(). */ if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[yy_n_chars] ) { /* This was really a NUL. */ yy_state_type yy_next_state; yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text; yy_current_state = yy_get_previous_state(); /* Okay, we're now positioned to make the NUL * transition. We couldn't have * yy_get_previous_state() go ahead and do it * for us because it doesn't know how to deal * with the possibility of jamming (and we don't * want to build jamming into it because then it * will run more slowly). */ yy_next_state = yy_try_NUL_trans( yy_current_state ); yy_bp = yytext_ptr + YY_MORE_ADJ; if ( yy_next_state ) { /* Consume the NUL. */ yy_cp = ++yy_c_buf_p; yy_current_state = yy_next_state; goto yy_match; } else { yy_cp = yy_c_buf_p; goto yy_find_action; } } else switch ( yy_get_next_buffer() ) { case EOB_ACT_END_OF_FILE: { yy_did_buffer_switch_on_eof = 0; if ( yywrap() ) { /* Note: because we've taken care in * yy_get_next_buffer() to have set up * yytext, we can now set up * yy_c_buf_p so that if some total * hoser (like flex itself) wants to * call the scanner after we return the * YY_NULL, it'll still work - another * YY_NULL will get returned. */ yy_c_buf_p = yytext_ptr + YY_MORE_ADJ; yy_act = YY_STATE_EOF(YY_START); goto do_action; } else { if ( ! yy_did_buffer_switch_on_eof ) YY_NEW_FILE; } break; } case EOB_ACT_CONTINUE_SCAN: yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text; yy_current_state = yy_get_previous_state(); yy_cp = yy_c_buf_p; yy_bp = yytext_ptr + YY_MORE_ADJ; goto yy_match; case EOB_ACT_LAST_MATCH: yy_c_buf_p = &yy_current_buffer->yy_ch_buf[yy_n_chars]; yy_current_state = yy_get_previous_state(); yy_cp = yy_c_buf_p; yy_bp = yytext_ptr + YY_MORE_ADJ; goto yy_find_action; } break; } default: YY_FATAL_ERROR( "fatal flex scanner internal error--no action found" ); } /* end of action switch */ } /* end of scanning one token */ } /* end of yylex */ /* yy_get_next_buffer - try to read in a new buffer * * Returns a code representing an action: * EOB_ACT_LAST_MATCH - * EOB_ACT_CONTINUE_SCAN - continue scanning from current position * EOB_ACT_END_OF_FILE - end of file */ static int yy_get_next_buffer() { register char *dest = yy_current_buffer->yy_ch_buf; register char *source = yytext_ptr; register int number_to_move, i; int ret_val; if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] ) YY_FATAL_ERROR( "fatal flex scanner internal error--end of buffer missed" ); if ( yy_current_buffer->yy_fill_buffer == 0 ) { /* Don't try to fill the buffer, so this is an EOF. */ if ( yy_c_buf_p - yytext_ptr - YY_MORE_ADJ == 1 ) { /* We matched a single character, the EOB, so * treat this as a final EOF. */ return EOB_ACT_END_OF_FILE; } else { /* We matched some text prior to the EOB, first * process it. */ return EOB_ACT_LAST_MATCH; } } /* Try to read more data. */ /* First move last chars to start of buffer. */ number_to_move = (int) (yy_c_buf_p - yytext_ptr) - 1; for ( i = 0; i < number_to_move; ++i ) *(dest++) = *(source++); if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_EOF_PENDING ) /* don't do the read, it's not guaranteed to return an EOF, * just force an EOF */ yy_current_buffer->yy_n_chars = yy_n_chars = 0; else { int num_to_read = yy_current_buffer->yy_buf_size - number_to_move - 1; while ( num_to_read <= 0 ) { /* Not enough room in the buffer - grow it. */ #ifdef YY_USES_REJECT YY_FATAL_ERROR( "input buffer overflow, can't enlarge buffer because scanner uses REJECT" ); #else /* just a shorter name for the current buffer */ YY_BUFFER_STATE b = yy_current_buffer; int yy_c_buf_p_offset = (int) (yy_c_buf_p - b->yy_ch_buf); if ( b->yy_is_our_buffer ) { int new_size = b->yy_buf_size * 2; if ( new_size <= 0 ) b->yy_buf_size += b->yy_buf_size / 8; else b->yy_buf_size *= 2; b->yy_ch_buf = (char *) /* Include room in for 2 EOB chars. */ yy_flex_realloc( (void *) b->yy_ch_buf, b->yy_buf_size + 2 ); } else /* Can't grow it, we don't own it. */ b->yy_ch_buf = 0; if ( ! b->yy_ch_buf ) YY_FATAL_ERROR( "fatal error - scanner input buffer overflow" ); yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset]; num_to_read = yy_current_buffer->yy_buf_size - number_to_move - 1; #endif } if ( num_to_read > YY_READ_BUF_SIZE ) num_to_read = YY_READ_BUF_SIZE; /* Read in more data. */ YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]), yy_n_chars, num_to_read ); yy_current_buffer->yy_n_chars = yy_n_chars; } if ( yy_n_chars == 0 ) { if ( number_to_move == YY_MORE_ADJ ) { ret_val = EOB_ACT_END_OF_FILE; yyrestart( yyin ); } else { ret_val = EOB_ACT_LAST_MATCH; yy_current_buffer->yy_buffer_status = YY_BUFFER_EOF_PENDING; } } else ret_val = EOB_ACT_CONTINUE_SCAN; yy_n_chars += number_to_move; yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; yytext_ptr = &yy_current_buffer->yy_ch_buf[0]; return ret_val; } /* yy_get_previous_state - get the state just before the EOB char was reached */ static yy_state_type yy_get_previous_state() { register yy_state_type yy_current_state; register char *yy_cp; yy_current_state = yy_start; for ( yy_cp = yytext_ptr + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp ) { register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1); if ( yy_accept[yy_current_state] ) { yy_last_accepting_state = yy_current_state; yy_last_accepting_cpos = yy_cp; } while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) { yy_current_state = (int) yy_def[yy_current_state]; if ( yy_current_state >= 9 ) yy_c = yy_meta[(unsigned int) yy_c]; } yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; } return yy_current_state; } /* yy_try_NUL_trans - try to make a transition on the NUL character * * synopsis * next_state = yy_try_NUL_trans( current_state ); */ #ifdef YY_USE_PROTOS static yy_state_type yy_try_NUL_trans( yy_state_type yy_current_state ) #else static yy_state_type yy_try_NUL_trans( yy_current_state ) yy_state_type yy_current_state; #endif { register int yy_is_jam; register char *yy_cp = yy_c_buf_p; register YY_CHAR yy_c = 1; if ( yy_accept[yy_current_state] ) { yy_last_accepting_state = yy_current_state; yy_last_accepting_cpos = yy_cp; } while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) { yy_current_state = (int) yy_def[yy_current_state]; if ( yy_current_state >= 9 ) yy_c = yy_meta[(unsigned int) yy_c]; } yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; yy_is_jam = (yy_current_state == 8); return yy_is_jam ? 0 : yy_current_state; } #ifndef YY_NO_UNPUT #ifdef YY_USE_PROTOS static void yyunput( int c, register char *yy_bp ) #else static void yyunput( c, yy_bp ) int c; register char *yy_bp; #endif { register char *yy_cp = yy_c_buf_p; /* undo effects of setting up yytext */ *yy_cp = yy_hold_char; if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) { /* need to shift things up to make room */ /* +2 for EOB chars. */ register int number_to_move = yy_n_chars + 2; register char *dest = &yy_current_buffer->yy_ch_buf[ yy_current_buffer->yy_buf_size + 2]; register char *source = &yy_current_buffer->yy_ch_buf[number_to_move]; while ( source > yy_current_buffer->yy_ch_buf ) *--dest = *--source; yy_cp += (int) (dest - source); yy_bp += (int) (dest - source); yy_current_buffer->yy_n_chars = yy_n_chars = yy_current_buffer->yy_buf_size; if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) YY_FATAL_ERROR( "flex scanner push-back overflow" ); } *--yy_cp = (char) c; yytext_ptr = yy_bp; yy_hold_char = *yy_cp; yy_c_buf_p = yy_cp; } #endif /* ifndef YY_NO_UNPUT */ #ifdef __cplusplus static int yyinput() #else static int input() #endif { int c; *yy_c_buf_p = yy_hold_char; if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) { /* yy_c_buf_p now points to the character we want to return. * If this occurs *before* the EOB characters, then it's a * valid NUL; if not, then we've hit the end of the buffer. */ if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] ) /* This was really a NUL. */ *yy_c_buf_p = '\0'; else { /* need more input */ int offset = yy_c_buf_p - yytext_ptr; ++yy_c_buf_p; switch ( yy_get_next_buffer() ) { case EOB_ACT_LAST_MATCH: /* This happens because yy_g_n_b() * sees that we've accumulated a * token and flags that we need to * try matching the token before * proceeding. But for input(), * there's no matching to consider. * So convert the EOB_ACT_LAST_MATCH * to EOB_ACT_END_OF_FILE. */ /* Reset buffer status. */ yyrestart( yyin ); /* fall through */ case EOB_ACT_END_OF_FILE: { if ( yywrap() ) return EOF; if ( ! yy_did_buffer_switch_on_eof ) YY_NEW_FILE; #ifdef __cplusplus return yyinput(); #else return input(); #endif } case EOB_ACT_CONTINUE_SCAN: yy_c_buf_p = yytext_ptr + offset; break; } } } c = *(unsigned char *) yy_c_buf_p; /* cast for 8-bit char's */ *yy_c_buf_p = '\0'; /* preserve yytext */ yy_hold_char = *++yy_c_buf_p; return c; } #ifdef YY_USE_PROTOS void yyrestart( FILE *input_file ) #else void yyrestart( input_file ) FILE *input_file; #endif { if ( ! yy_current_buffer ) yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); yy_init_buffer( yy_current_buffer, input_file ); yy_load_buffer_state(); } #ifdef YY_USE_PROTOS void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ) #else void yy_switch_to_buffer( new_buffer ) YY_BUFFER_STATE new_buffer; #endif { if ( yy_current_buffer == new_buffer ) return; if ( yy_current_buffer ) { /* Flush out information for old buffer. */ *yy_c_buf_p = yy_hold_char; yy_current_buffer->yy_buf_pos = yy_c_buf_p; yy_current_buffer->yy_n_chars = yy_n_chars; } yy_current_buffer = new_buffer; yy_load_buffer_state(); /* We don't actually know whether we did this switch during * EOF (yywrap()) processing, but the only time this flag * is looked at is after yywrap() is called, so it's safe * to go ahead and always set it. */ yy_did_buffer_switch_on_eof = 1; } #ifdef YY_USE_PROTOS void yy_load_buffer_state( void ) #else void yy_load_buffer_state() #endif { yy_n_chars = yy_current_buffer->yy_n_chars; yytext_ptr = yy_c_buf_p = yy_current_buffer->yy_buf_pos; yyin = yy_current_buffer->yy_input_file; yy_hold_char = *yy_c_buf_p; } #ifdef YY_USE_PROTOS YY_BUFFER_STATE yy_create_buffer( FILE *file, int size ) #else YY_BUFFER_STATE yy_create_buffer( file, size ) FILE *file; int size; #endif { YY_BUFFER_STATE b; b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) ); if ( ! b ) YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); b->yy_buf_size = size; /* yy_ch_buf has to be 2 characters longer than the size given because * we need to put in 2 end-of-buffer characters. */ b->yy_ch_buf = (char *) yy_flex_alloc( b->yy_buf_size + 2 ); if ( ! b->yy_ch_buf ) YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); b->yy_is_our_buffer = 1; yy_init_buffer( b, file ); return b; } #ifdef YY_USE_PROTOS void yy_delete_buffer( YY_BUFFER_STATE b ) #else void yy_delete_buffer( b ) YY_BUFFER_STATE b; #endif { if ( ! b ) return; if ( b == yy_current_buffer ) yy_current_buffer = (YY_BUFFER_STATE) 0; if ( b->yy_is_our_buffer ) yy_flex_free( (void *) b->yy_ch_buf ); yy_flex_free( (void *) b ); } #ifndef YY_ALWAYS_INTERACTIVE #ifndef YY_NEVER_INTERACTIVE extern int isatty YY_PROTO(( int )); #endif #endif #ifdef YY_USE_PROTOS void yy_init_buffer( YY_BUFFER_STATE b, FILE *file ) #else void yy_init_buffer( b, file ) YY_BUFFER_STATE b; FILE *file; #endif { yy_flush_buffer( b ); b->yy_input_file = file; b->yy_fill_buffer = 1; #if YY_ALWAYS_INTERACTIVE b->yy_is_interactive = 1; #else #if YY_NEVER_INTERACTIVE b->yy_is_interactive = 0; #else b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0; #endif #endif } #ifdef YY_USE_PROTOS void yy_flush_buffer( YY_BUFFER_STATE b ) #else void yy_flush_buffer( b ) YY_BUFFER_STATE b; #endif { if ( ! b ) return; b->yy_n_chars = 0; /* We always need two end-of-buffer characters. The first causes * a transition to the end-of-buffer state. The second causes * a jam in that state. */ b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; b->yy_buf_pos = &b->yy_ch_buf[0]; b->yy_at_bol = 1; b->yy_buffer_status = YY_BUFFER_NEW; if ( b == yy_current_buffer ) yy_load_buffer_state(); } #ifndef YY_NO_SCAN_BUFFER #ifdef YY_USE_PROTOS YY_BUFFER_STATE yy_scan_buffer( char *base, yy_size_t size ) #else YY_BUFFER_STATE yy_scan_buffer( base, size ) char *base; yy_size_t size; #endif { YY_BUFFER_STATE b; if ( size < 2 || base[size-2] != YY_END_OF_BUFFER_CHAR || base[size-1] != YY_END_OF_BUFFER_CHAR ) /* They forgot to leave room for the EOB's. */ return 0; b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) ); if ( ! b ) YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" ); b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */ b->yy_buf_pos = b->yy_ch_buf = base; b->yy_is_our_buffer = 0; b->yy_input_file = 0; b->yy_n_chars = b->yy_buf_size; b->yy_is_interactive = 0; b->yy_at_bol = 1; b->yy_fill_buffer = 0; b->yy_buffer_status = YY_BUFFER_NEW; yy_switch_to_buffer( b ); return b; } #endif #ifndef YY_NO_SCAN_STRING #ifdef YY_USE_PROTOS YY_BUFFER_STATE yy_scan_string( yyconst char *yy_str ) #else YY_BUFFER_STATE yy_scan_string( yy_str ) yyconst char *yy_str; #endif { int len; for ( len = 0; yy_str[len]; ++len ) ; return yy_scan_bytes( yy_str, len ); } #endif #ifndef YY_NO_SCAN_BYTES #ifdef YY_USE_PROTOS YY_BUFFER_STATE yy_scan_bytes( yyconst char *bytes, int len ) #else YY_BUFFER_STATE yy_scan_bytes( bytes, len ) yyconst char *bytes; int len; #endif { YY_BUFFER_STATE b; char *buf; yy_size_t n; int i; /* Get memory for full buffer, including space for trailing EOB's. */ n = len + 2; buf = (char *) yy_flex_alloc( n ); if ( ! buf ) YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" ); for ( i = 0; i < len; ++i ) buf[i] = bytes[i]; buf[len] = buf[len+1] = YY_END_OF_BUFFER_CHAR; b = yy_scan_buffer( buf, n ); if ( ! b ) YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" ); /* It's okay to grow etc. this buffer, and we should throw it * away when we're done. */ b->yy_is_our_buffer = 1; return b; } #endif #ifndef YY_NO_PUSH_STATE #ifdef YY_USE_PROTOS static void yy_push_state( int new_state ) #else static void yy_push_state( new_state ) int new_state; #endif { if ( yy_start_stack_ptr >= yy_start_stack_depth ) { yy_size_t new_size; yy_start_stack_depth += YY_START_STACK_INCR; new_size = yy_start_stack_depth * sizeof( int ); if ( ! yy_start_stack ) yy_start_stack = (int *) yy_flex_alloc( new_size ); else yy_start_stack = (int *) yy_flex_realloc( (void *) yy_start_stack, new_size ); if ( ! yy_start_stack ) YY_FATAL_ERROR( "out of memory expanding start-condition stack" ); } yy_start_stack[yy_start_stack_ptr++] = YY_START; BEGIN(new_state); } #endif #ifndef YY_NO_POP_STATE static void yy_pop_state() { if ( --yy_start_stack_ptr < 0 ) YY_FATAL_ERROR( "start-condition stack underflow" ); BEGIN(yy_start_stack[yy_start_stack_ptr]); } #endif #ifndef YY_NO_TOP_STATE static int yy_top_state() { return yy_start_stack[yy_start_stack_ptr - 1]; } #endif #ifndef YY_EXIT_FAILURE #define YY_EXIT_FAILURE 2 #endif #ifdef YY_USE_PROTOS static void yy_fatal_error( yyconst char msg[] ) #else static void yy_fatal_error( msg ) char msg[]; #endif { (void) fprintf( stderr, "%s\n", msg ); exit( YY_EXIT_FAILURE ); } /* Redefine yyless() so it works in section 3 code. */ #undef yyless #define yyless(n) \ do \ { \ /* Undo effects of setting up yytext. */ \ yytext[yyleng] = yy_hold_char; \ yy_c_buf_p = yytext + n; \ yy_hold_char = *yy_c_buf_p; \ *yy_c_buf_p = '\0'; \ yyleng = n; \ } \ while ( 0 ) /* Internal utility routines. */ #ifndef yytext_ptr #ifdef YY_USE_PROTOS static void yy_flex_strncpy( char *s1, yyconst char *s2, int n ) #else static void yy_flex_strncpy( s1, s2, n ) char *s1; yyconst char *s2; int n; #endif { register int i; for ( i = 0; i < n; ++i ) s1[i] = s2[i]; } #endif #ifdef YY_NEED_STRLEN #ifdef YY_USE_PROTOS static int yy_flex_strlen( yyconst char *s ) #else static int yy_flex_strlen( s ) yyconst char *s; #endif { register int n; for ( n = 0; s[n]; ++n ) ; return n; } #endif #ifdef YY_USE_PROTOS static void *yy_flex_alloc( yy_size_t size ) #else static void *yy_flex_alloc( size ) yy_size_t size; #endif { return (void *) malloc( size ); } #ifdef YY_USE_PROTOS static void *yy_flex_realloc( void *ptr, yy_size_t size ) #else static void *yy_flex_realloc( ptr, size ) void *ptr; yy_size_t size; #endif { /* The cast to (char *) in the following accommodates both * implementations that use char* generic pointers, and those * that use void* generic pointers. It works with the latter * because both ANSI C and C++ allow castless assignment from * any pointer type to void*, and deal with argument conversions * as though doing an assignment. */ return (void *) realloc( (char *) ptr, size ); } #ifdef YY_USE_PROTOS static void yy_flex_free( void *ptr ) #else static void yy_flex_free( ptr ) void *ptr; #endif { free( ptr ); } #if YY_MAIN int main() { yylex(); return 0; } #endif #line 5 "test.l" flexml-1.9.6/examples/my-joke.xml0000644000175000017500000000023612101762535017240 0ustar mquinsonmquinson My appartment is so small the mice are round-shouldered flexml-1.9.6/examples/joke.dtd0000644000175000017500000000125512101762535016572 0ustar mquinsonmquinson flexml-1.9.6/examples/joke1.xml0000644000175000017500000000030612101762535016674 0ustar mquinsonmquinson My appartment is so small The mice are round-shouldered flexml-1.9.6/examples/xhtml-href.act0000644000175000017500000000027012101762535017710 0ustar mquinsonmquinson ]]> flexml-1.9.6/examples/my-joke3.xml0000644000175000017500000000023312101762535017320 0ustar mquinsonmquinson My appartment is so small the mice are round-shouldered flexml-1.9.6/examples/my-joke2.xml0000644000175000017500000000023412101762535017320 0ustar mquinsonmquinson My appartment is so small the mice are round-shouldered flexml-1.9.6/examples/tricky.act0000644000175000017500000000026012101762535017136 0ustar mquinsonmquinson ]]> flexml-1.9.6/examples/joke2.xml0000644000175000017500000000032312101762535016674 0ustar mquinsonmquinson My appartment is so small The mice are round-shouldered flexml-1.9.6/examples/test.ent0000644000175000017500000000010212101762535016622 0ustar mquinsonmquinson flexml-1.9.6/examples/xhtml1-transitional.dtd0000644000175000017500000007655712101762535021605 0ustar mquinsonmquinson %HTMLlat1; %HTMLsymbol; %HTMLspecial; flexml-1.9.6/examples/xhtml-lat1.ent0000644000175000017500000002701512101762535017652 0ustar mquinsonmquinson flexml-1.9.6/examples/tricky.dtd0000644000175000017500000000027512101762535017150 0ustar mquinsonmquinson ' > %xx; flexml-1.9.6/examples/test.dtd0000644000175000017500000000015212101762535016614 0ustar mquinsonmquinson %Test; flexml-1.9.6/examples/test.pl0000755000175000017500000000106612101762535016464 0ustar mquinsonmquinson#!/usr/bin/perl -w # Create a user agent object use LWP::UserAgent; $ua = new LWP::UserAgent; $ua->agent("Test/0.1 " . $ua->agent); # Create a request my $req = new HTTP::Request GET => 'file:test.pl'; # Pass request to the user agent and get a response back my $res = $ua->request($req); # Check the outcome of the response if ($res->is_success) { print "Contents:\n" . $res->content; } else { print "Bad luck this time\n"; } flexml-1.9.6/examples/my.dtd0000644000175000017500000000031412101762535016262 0ustar mquinsonmquinson flexml-1.9.6/examples/test.html0000644000175000017500000000057112101762535017012 0ustar mquinsonmquinson Title

Test document

Testing, 1 2 3.


Kristoffer H. Rose
flexml-1.9.6/examples/tricky.xml0000644000175000017500000000016212101762535017170 0ustar mquinsonmquinson This sample shows a &tricky; method. flexml-1.9.6/examples/FleXML.xml0000644000175000017500000001517312101762535016762 0ustar mquinsonmquinson FleXML - XML Processor Generator

FleXML - XML Processor Generator

See also the manual page and a short white paper. Or peek into the master source archive.

FleXML reads a DTD (Document Type Definition) describing the format of XML (Extensible Markup Language) documents; it may be specified as a URI to the DTD on the web. From this FleXML produces a validating XML processor with an interface to support XML applications. Proper applications can be generated optionally from special action files, either for linking or textual combination with the processor.

FleXML is specifically developed for XML applications where a fixed data format suffices in the sense that a single DTD is used without individual extensions for a large number of documents. (Specifically it restricts XML rule [28] to

  [28r] doctypedecl ::= '<!DOCTYPE' S Name S ExternalID S? '>'

where the ExternalId denotes the used DTD - one might say, in fact, that FleXML implements ``non-extensible'' markup. :)

With this restriction we can do much better because the possible tags and attributes are static: FleXML-generated XML processors read the XML document character by character and can immediately dispatch the actions associated with each element (or reject the document as invalid). Technically this is done by using the Flex scanner generator to produce a deterministic finite automaton with an element context stack for the DTD, which means that there is almost no overhead for XML processing.

Furthermore we have devised a simple extension of the C programming language that facilitates the writing of `element actions' in C, making it easy to write really fast XML applications. In particular we represent XML attribute values efficiently in C when this is possible, thus avoiding the otherwise ubiquitous conversions between strings and data values.

Compared to SAX and its XSL-based friends, FleXML immediately produces efficient code in that the interdiction of extension makes it possible to encode efficiently, FleXML for example uses native C `enum' types to implement enumeration attribute types. However, the above limitation does prevent uses in more complex settings.

As an example: the following is all that is needed to produce a fast program that prints all href-attributes in <a...> tags in XHTML documents (and rejects invalid XHTML documents).

  <!DOCTYPE actions SYSTEM "flexml-act.dtd">
  <actions>
  <top><![CDATA[           #include <stdio.h>                  ]]></top>
  <start tag='a'><![CDATA[ if ({href}) printf("%s\n", {href}); ]]></start>
  </actions>

In general, action files are themselves XML documents conforming to the DTD

   <!ELEMENT actions ((top|start|end)*,main?)>
   <!ENTITY % C-code "(#PCDATA)">
   <!ELEMENT top   %C-code;>
   <!ELEMENT start %C-code;>  <!ATTLIST start tag NMTOKEN #REQUIRED>
   <!ELEMENT end   %C-code;>  <!ATTLIST end   tag NMTOKEN #REQUIRED>
   <!ELEMENT main  %C-code;>

with %C-code; segments being in C enriched as described below. The elements are used as follows:

top

Use for top-level C code such as global declarations, utility functions, etc.

start

Attaches the code as an action to the element with the name of the required ``tag'' attribute. The ``%C-code;'' component should be C code suitable for inclusion in a C block (i.e., within {...} so it may contain local variables); furthermore the following extensions are available: {attribute} Can be used to access the value of the attribute as set with attribute=value in the start tag. In C, {attribute} will be interpreted depending on the declaration of the attribute. If the attribute is declared as an enumerated type like

  <!ATTLIST attrib (alt1 | alt2 |...) ...>

then the C attribute value is of an enumerated type with the elements written {attrib=alt1}, {attrib=alt2}, etc.; furthermore an unset attribute has the ``value'' {!attrib}. If the attribute is not an enumeration then {attrib} is a null-terminated C string (of type char*) and {!attrib} is NULL.

end

Similarly attaches the code as an action to the end tag with the name of the required ``tag'' attribute; also here the ``%C-code;'' component should be C code suitable for inclusion in a C block. In case the element has ``Mixed'' contents, i.e, was declared to permit #PCDATA, then the special variable {#PCDATA} contains the text (#PCDATA) of the element as a null-terminated C string (of type char*). In case the Mixed contents element actually mixed text and child elements then {#PCDATA} contains the plain concatenation of the text fragments as one string.

main

Finally, an optional ``main'' element can contain the C main function of the XML application. Normally the main function should include (at least) one call of the XML processor yylex.

The program is freely redistributable and modifiable (under GNU `copyleft').


Copyright (c) Kristoffer Rose. Last modified: Mon Dec 13 17:17:44 CET 1999
flexml-1.9.6/examples/xhtml-symbol.ent0000644000175000017500000003345712101762535020325 0ustar mquinsonmquinson flexml-1.9.6/examples/joke.act0000644000175000017500000000124312101762535016563 0ustar mquinsonmquinson #include #include char* terminator; ]]> flexml-1.9.6/examples/test.xml0000644000175000017500000000016012101762535016640 0ustar mquinsonmquinson ' > %xx; flexml-1.9.6/examples/test.act0000644000175000017500000000102712101762535016612 0ustar mquinsonmquinson #include char* terminator = "."; ]]> flexml-1.9.6/examples/xhtml-special.ent0000644000175000017500000001006012101762535020421 0ustar mquinsonmquinson flexml-1.9.6/examples/my-show.act0000644000175000017500000000103012101762535017230 0ustar mquinsonmquinson #include char* terminator = "."; ]]>