#if STDC_HEADERS
# include
#else
# ifndef HAVE_STRCHR
# define strchr index
# define strrchr rindex
# endif
# ifndef HAVE_STRSTR
# include "strstr.e"
# endif
#endif
#include "export.h"
#include "types.e"
#include "tree.e"
#include "html.e"
#include "scan.e"
#include "dict.e"
#include "openurl.e"
#include "class.e"
#include "errexit.e"
#define EXCLUDE_CLASS "exclude" /* Default value for class */
static Tree tree;
static bool xml = false; /* Use convention */
/* handle_error -- called when a parse error occurred */
static void handle_error(void *clientdata, const string s, int lineno)
{
fprintf(stderr, "%d: %s\n", lineno, s);
}
/* start -- called before the first event is reported */
static void* start(void)
{
tree = create();
return NULL;
}
/* end -- called after the last event is reported */
static void end(void *clientdata)
{
/* skip */
}
/* handle_comment -- called after a comment is parsed */
static void handle_comment(void *clientdata, string commenttext)
{
tree = append_comment(tree, commenttext);
}
/* handle_text -- called after a tex chunk is parsed */
static void handle_text(void *clientdata, string text)
{
tree = append_text(tree, text);
}
/* handle_declaration -- called after a declaration is parsed */
static void handle_decl(void *clientdata, string gi,
string fpi, string url)
{
tree = append_declaration(tree, gi, fpi, url);
}
/* handle_proc_instr -- called after a PI is parsed */
static void handle_pi(void *clientdata, string pi_text)
{
tree = append_procins(tree, pi_text);
}
/* handle_starttag -- called after a start tag is parsed */
static void handle_starttag(void *clientdata, string name, pairlist attribs)
{
tree = html_push(tree, name, attribs);
}
/* handle_emptytag -- called after an empty tag is parsed */
static void handle_emptytag(void *clientdata, string name, pairlist attribs)
{
tree = html_push(tree, name, attribs);
}
/* handle_pop -- called after an endtag is parsed (name may be "") */
static void handle_endtag(void *clientdata, string name)
{
tree = html_pop(tree, name);
}
/* prune -- write the tree, suppressing elements with a certain class */
static void prune(Tree t, const string class)
{
Tree h;
pairlist a;
for (h = t->children; h != NULL; h = h->sister) {
switch (h->tp) {
case Text:
printf("%s", h->text);
break;
case Comment:
printf("", h->text);
break;
case Declaration:
printf("name);
if (h->text) printf(" PUBLIC \"%s\"", h->text);
if (h->url) printf(" %s\"%s\"", h->text ? "" : "SYSTEM ", h->url);
printf(">");
break;
case Procins:
printf("%s>", h->text);
break;
case Element:
if (! has_class(h->attribs, class)) {
printf("<%s", h->name);
for (a = h->attribs; a != NULL; a = a->next) {
printf(" %s", a->name);
if (a->value != NULL) printf("=\"%s\"", a->value);
else if (xml) printf("=\"%s\"", a->name);
}
if (is_empty(h->name)) {
assert(h->children == NULL);
printf(xml ? " />" : ">");
} else {
printf(">");
prune(h, class);
printf("%s>", h->name);
}
}
break;
case Root:
assert(! "Cannot happen");
break;
default:
assert(! "Cannot happen");
}
}
}
/* usage -- print usage message and exit */
static void usage(string name)
{
fprintf(stderr, "Usage: %s [-c class] [-x] [html-file]\n", name);
exit(1);
}
int main(int argc, char *argv[])
{
int i, status;
string class = EXCLUDE_CLASS;
/* mtrace(); */
/* Bind the parser callback routines to our handlers */
set_error_handler(handle_error);
set_start_handler(start);
set_end_handler(end);
set_comment_handler(handle_comment);
set_text_handler(handle_text);
set_decl_handler(handle_decl);
set_pi_handler(handle_pi);
set_starttag_handler(handle_starttag);
set_emptytag_handler(handle_emptytag);
set_endtag_handler(handle_endtag);
yyin = stdin;
for (i = 1; i < argc; i++) {
if (eq(argv[i], "-c")) {
if (i >= argc - 1) usage(argv[0]);
class = argv[++i];
} else if (eq(argv[i], "-x")) {
xml = true;
} else {
yyin = fopenurl(argv[i], "r", &status);
if (yyin == NULL) {perror(argv[1]); exit(2);}
if (status != 200) errexit("%s : %s\n", argv[i], http_strerror(status));
}
}
if (yyparse() != 0) {
exit(3);
}
tree = get_root(tree);
prune(tree, class);
tree_delete(tree); /* Just to test memory mgmt */
return 0;
}
html-xml-utils-7.6/hxaddid.1 0000644 0001750 0001750 00000005366 12704011745 012711 0000000 0000000 .de d \" begin display
.sp
.in +4
.nf
..
.de e \" end display
.in -4
.fi
.sp
..
.TH "HXADDID" "1" "10 Jul 2011" "7.x" "HTML-XML-utils"
.SH NAME
hxaddid \- add IDs to selected elements
.SH SYNOPSIS
.B hxaddid
.RB "[\| " \-x " \|]"
.RB "[\-\-]"
.IR elem|.class|elem.class
.RI "[\| " file-or-URL " \|]"
.SH DESCRIPTION
.LP
The
.B hxaddid
command copies an HTML or XML file to standard output, while adding
element IDs to the specified elements or classes.
.LP
For example, given the input
.d
A paragraph without an ID
.e
the command
.d
hxaddid p
.e
will output
.d
A paragraph without an ID
.e
.LP
If you specify a class using \fB.class\fP then IDs will only be added
to elements that contain that class. And if you specify an element and
a class using \fBelem.class\fP then IDs will only be added to the
specified elements that contain the specified class.
.LP
If two elements would naturally generate the same ID, a number is
added to the ID name (starting with 0) to make sure the IDs are
unique. IDs are not added to matching elements that already contain
an ID.
.SH OPTIONS
The following options are supported:
.TP 10
.B \-x
Use XML conventions: empty elements are written with a slash at the
end:
. Also causes the element to be matched case-sensitively.
.SH OPERANDS
The following operands are supported:
.TP 10
.I elem
The name of element to select.
.TP 10
.I .class
The name of class to select.
.TP 10
.I elem.class
The name of element that contains class to select.
.TP 10
.I file-or-URL
The name or URL of an HTML or XHTML file.
.SH "EXIT STATUS"
The following exit values are returned:
.TP 10
.B 0
Successful completion.
.TP
.B > 0
An error occurred in the parsing of one of the HTML or XML files.
.SH ENVIRONMENT
To use a proxy to retrieve remote files, set the environment variables
.B http_proxy
or
.BR ftp_proxy "."
E.g.,
.B http_proxy="http://localhost:8080/"
.SH BUGS
.LP
Assumes UTF-8 as input. Doesn't expand character entities. Instead
pipe the input through
.BR hxunent (1)
and
.BR asc2xml (1)
to convert it to UTF-8.
.LP
.B hxaddid
tries first to generate "readable" IDs, by forming the ID out of the
letters and digits found in the content of the element and falls back
to generating arbitrary IDs if it doesn't find enough. However, the
algorithm in this version is primitive and only gives reasonable
results for ASCII letters and digits.
.LP
Remote files (specified with a URL) are currently only supported for
HTTP. Password-protected files or files that depend on HTTP "cookies"
are not handled. (You can use tools such as
.BR curl (1)
or
.BR wget (1)
to retrieve such files.)
.SH "SEE ALSO"
.BR asc2xml (1),
.BR hxprune (1),
.BR hxnormalize (1),
.BR hxnum (1),
.BR hxtoc (1),
.BR hxunent (1),
.BR xml2asc (1),
.BR UTF-8 " (RFC 2279)"
html-xml-utils-7.6/unent.e 0000755 0001750 0001750 00000000252 13205772341 012514 0000000 0000000 struct _Entity {char *name; unsigned int code;};
extern const struct _Entity *lookup_entity (register const char *str,
register size_t len);
html-xml-utils-7.6/hxref.1 0000644 0001750 0001750 00000007057 13206020555 012414 0000000 0000000 .de d \" begin display
.sp
.in +4
.nf
..
.de e \" end display
.in -4
.fi
.sp
..
.TH "HXREF" "1" "10 Jul 2011" "7.x" "HTML-XML-utils"
.SH NAME
hxref \- generate cross-references inside and between HTML files
.SH SYNOPSIS
.B hxref
.RB "[\| " \-x " \|]"
.RB "[\| " \-l " \|]"
.RB "[\| " \-b
.IR base " \|]"
.RB "[\| " \-i
.IR index " \|]"
.RI "[\| -- \|] [\| " input " [\| " output " \|] \|]"
.SH DESCRIPTION
.LP
The
.B hxref
command links inline elements to DFN elements with the same content.
It adds ID attributes where necessary. If the content of a DFN or
other element isn't suitable, the TITLE attribute may be used to
provide the term to use for comparisons.
.LP
Here is an example:
.d
A b-box is defined to be...
For every b there is a b-box...
.e
The output of
.B hxref
will be similar to this:
.d
A b-box is defined to be...
For every b there is a b-box...
.e
.SH OPTIONS
The following options are supported:
.TP 10
.B \-x
Use XML conventions: empty elements are written with a slash at the
end:
.TP
.BI \-b " base"
Sets the prefix for the generated URLs. By default
.I base
is empty, which generates URLs like "#b-box" above. If
.I base
is set to, e.g., "http://xyz/", the URLs will look like
"http://xyz/#b-box".
.TP
.BI \-i " index"
Directs
.B hxref
to read terms from a database file before looking for them in the
document and afterwards store the terms that were found in the same
file. DFN element in the document override terms found in
.IR index "."
This allows
.B hxref
to be run multiple times on different files, to make the files refer
to each other. It may be necessary to run the commands twice, to
resolve all cross-references.
.TP
.B \-l
Try to use language-specific information to match instances to their
definitions. Currently, only English is supported and the only rules
applied are to search without a final "s" ("trees" matches "tree"),
without a final "es" ("bosses" matches "boss") and with a "y"
replacing a final "ies" ("bounties" matches "bounty").
.B hxref
determines the language from "lang" and "xml:lang" attributes in the
document.
.SH OPERANDS
The following operands are supported:
.TP 10
.I input
The name of an HTML file. If absent, standard input is read instead.
The special name "-" also indicates standard input. The
.I input
may be an URL.
.TP
.I output
The file to write to. If absent, standard output is used. This may
.I not
be a URL.
.SH "DIAGNOSTICS"
The following exit values are returned:
.TP 10
.B 0
Successful completion.
.TP
.B > 0
An error occurred in the parsing of the HTML file.
.B hxref
will try to correct the error and produce output anyway.
.SH "SEE ALSO"
.BR asc2xml (1),
.BR hxindex (1),
.BR hxnormalize (1),
.BR hxnum (1),
.BR hxtoc (1),
.BR xml2asc (1)
.SH BUGS
.LP
The error recovery for incorrect HTML is primitive.
.LP
The program generates ID attributes, but doesn't generate
tags, so the links only work in browsers that recognize ID attributes.
.LP
The rules for matching English plurals are primitive. No dictionary is
used. E.g., "bees" will be considered a plural of "be".
.LP
There is currently no way to set the default language for a document
for when the root element has no "lang" or "xml:lang" attribute.
.LP
.B hxref
tries first to generate "readable" ID attributes, by forming the ID
out of the letters and digits found in the content of the element and
falls back to generating arbitrary IDs if it doesn't find
enough. However, the algorithm in this version is primitive and only
gives reasonable results for ASCII letters and digits.
html-xml-utils-7.6/scan.e 0000755 0001750 0001750 00000000341 13205772341 012306 0000000 0000000 extern FILE *yyin;
extern void set_yyin(FILE *f, const conststring name);
extern conststring get_yyin_name(void);
extern void include_file(FILE *f, const conststring name);
extern void set_cdata_element(const conststring e);
html-xml-utils-7.6/hxclean.1 0000644 0001750 0001750 00000000412 12704011745 012711 0000000 0000000 .de d \" begin display
.sp
.in +4
.nf
..
.de e \" end display
.in -4
.fi
.sp
..
.TH "HXCLEAN" "1" "10 Jul 2011" "7.x" "HTML-XML-utils"
.SH NAME
hxclean \- apply heuristics to correct an HTML file
.SH SYNOPSIS
.B hxclean
.RI "[\| " file " \|]"
.SH DESCRIPTION
[ToDo]
html-xml-utils-7.6/hxwls.c 0000645 0001750 0001750 00000023767 13212535556 012547 0000000 0000000 /*
* List all links from the given document.
*
* Copyright © 1994-2000 World Wide Web Consortium
* See http://www.w3.org/Consortium/Legal/copyright-software
*
* Bert Bos
* Created 31 July 1999
* $Id: hxwls.c,v 1.12 2017/12/07 02:05:23 bbos Exp $
*/
#include "config.h"
#include
#ifdef HAVE_UNISTD_H
# include
#endif
#include
#include
#include
#include
#if STDC_HEADERS
# include
#else
# ifndef HAVE_STRCHR
# define strchr index
# define strrchr rindex
# endif
# ifndef HAVE_STRDUP
# include "strdup.e"
# endif
#endif
#include "export.h"
#include "heap.e"
#include "types.e"
#include "html.e"
#include "scan.e"
#include "dict.e"
#include "openurl.e"
#include "url.e"
#include "errexit.e"
#include "unent.e"
static bool has_error = false;
static string base = NULL;
static string self;
static enum {Short, Long, HTML, Tuple} format = Short; /* Option -l -h -t */
static bool relative = false; /* Option -r */
static bool ascii = false; /* Option -a */
/* append_utf8 -- add UTF-8 bytes for code n at s, return end of string */
static string append_utf8(string s, const unsigned long n)
{
/* We assume s is long enough */
if (n <= 0x7F) {
*(s++) = n;
} else if (n <= 0x7FF) {
*(s++) = 0xC0 | (n >> 6);
*(s++) = 0x80 | (n & 0x3F);
} else if (n <= 0xFFFF) {
*(s++) = 0xE0 | (n >> 12);
*(s++) = 0x80 | ((n >> 6) & 0x3F);
*(s++) = 0x80 | (n & 0x3F);
} else if (n <= 0x1FFFFF) {
*(s++) = 0xF0 | (n >> 18);
*(s++) = 0x80 | ((n >> 12) & 0x3F);
*(s++) = 0x80 | ((n >> 6) & 0x3F);
*(s++) = 0x80 | (n & 0x3F);
} else if (n <= 0x3FFFFFF) {
*(s++) = 0xF0 | (n >> 24);
*(s++) = 0x80 | ((n >> 18) & 0x3F);
*(s++) = 0x80 | ((n >> 12) & 0x3F);
*(s++) = 0x80 | ((n >> 6) & 0x3F);
*(s++) = 0x80 | (n & 0x3F);
} else {
*(s++) = 0xF0 | (n >> 30);
*(s++) = 0x80 | ((n >> 24) & 0x3F);
*(s++) = 0x80 | ((n >> 18) & 0x3F);
*(s++) = 0x80 | ((n >> 12) & 0x3F);
*(s++) = 0x80 | ((n >> 6) & 0x3F);
*(s++) = 0x80 | (n & 0x3F);
}
return s;
}
/* output -- print the link (lowercases rel argument) */
static void output(const conststring type, const conststring rel,
conststring url)
{
string h = NULL, q, r, rel1;
conststring p, s;
const struct _Entity *e;
unsigned long c;
if (url) { /* If we found a URL */
/* Replace entities. */
h = newnstring(url, 2 * strlen(url)); /* Reserve sufficient space */
for (p = url, q = h; *p; p++) {
if (*p != '&') {
*(q++) = *p;
} else if (*(p+1) == '#') { /* Numeric entity */
if (*(p+2) == 'x') c = strtoul(p + 3, &r, 16);
else c = strtoul(p + 2, &r, 10);
if (c > 0 && c <= 2147483647) q = append_utf8(q, c);
p = *r == ';' ? r : r - 1;
} else { /* Entity */
for (s = p + 1; isalnum(*s); s++);
if (!(e = lookup_entity(p+1, s - (p+1)))) *(q++) = '&'; /* Unknown */
else {q = append_utf8(q, e->code); p = *s == ';' ? s : s -1;}
}
}
*q = '\0';
url = h;
/* Make URL absolute */
if (! relative && base) {
h = URL_s_absolutize(base, url);
dispose(url);
url = h;
}
/* Convert IRI to URL, if requested */
if (ascii) {
h = URL_s_to_ascii(url);
dispose(url);
url = h;
}
rel1 = newstring(rel ? rel : "");
down(rel1);
switch (format) {
case HTML:
printf("%s\n",
type, rel1, url, url);
break;
case Long:
printf("%s\t%s\t%s\n", type, rel1, url);
break;
case Short:
printf("%s\n", url);
break;
case Tuple:
printf("%s\t%s\t%s\t%s\n", self, type, rel1, url);
break;
default:
assert(!"Cannot happen!");
}
free(rel1);
free(h);
}
}
/* --------------- implements parser interface api------------------------- */
/* handle_error -- called when a parse error occurred */
void handle_error(void *clientdata, const string s, int lineno)
{
fprintf(stderr, "%d: %s\n", lineno, s);
has_error = true;
}
/* start -- called before the first event is reported */
void* start(void)
{
if (format == HTML) {
printf("\n");
printf("\n");
printf("Output of listlinks\n");
printf("\n");
printf("\n");
}
return NULL;
}
/* end -- called after the last event is reported */
void end(void *clientdata)
{
if (format == HTML) {
printf("
\n");
printf("\n");
printf("\n");
}
}
/* handle_comment -- called after a comment is parsed */
void handle_comment(void *clientdata, string commenttext)
{
free(commenttext);
}
/* handle_text -- called after a text chunk is parsed */
void handle_text(void *clientdata, string text)
{
/* There may be several consecutive calls to this routine. */
/* escape(text); */
free(text);
}
/* handle_decl -- called after a declaration is parsed */
void handle_decl(void *clientdata, string gi, string fpi, string url)
{
/* skip */
if (gi) free(gi);
if (fpi) free(fpi);
if (url) free(url);
}
/* handle_pi -- called after a PI is parsed */
void handle_pi(void *clientdata, string pi_text)
{
if (pi_text) free(pi_text);
}
/* handle_starttag -- called after a start tag is parsed */
void handle_starttag(void *clientdata, string name, pairlist attribs)
{
/* ToDo: print text of anchor, if available */
conststring h;
if (strcasecmp(name, "base") == 0) {
h = pairlist_get(attribs, "href");
if (h) base = strdup(h); /* Use as base from now on */
output("base", NULL, h);
} else if (strcasecmp(name, "link") == 0) {
output("link", pairlist_get(attribs, "rel"), pairlist_get(attribs, "href"));
} else if (strcasecmp(name, "a") == 0) {
output("a", pairlist_get(attribs, "rel"), pairlist_get(attribs, "href"));
} else if (strcasecmp(name, "img") == 0) {
output("img", NULL, pairlist_get(attribs, "src"));
output("longdesc", NULL, pairlist_get(attribs, "longdesc"));
} else if (strcasecmp(name, "input") == 0) {
output("input", NULL, pairlist_get(attribs, "src"));
} else if (strcasecmp(name, "object") == 0) {
output("object", NULL, pairlist_get(attribs, "data"));
output("object", NULL, pairlist_get(attribs, "classid"));
output("object", NULL, pairlist_get(attribs, "codebase"));
} else if (strcasecmp(name, "area") == 0) {
output("area", pairlist_get(attribs, "rel"), pairlist_get(attribs, "href"));
} else if (strcasecmp(name, "ins") == 0) {
output("ins", NULL, pairlist_get(attribs, "cite"));
} else if (strcasecmp(name, "del") == 0) {
output("del", NULL, pairlist_get(attribs, "cite"));
} else if (strcasecmp(name, "q") == 0) {
output("q", NULL, pairlist_get(attribs, "cite"));
} else if (strcasecmp(name, "blockquote") == 0) {
output("bq", NULL, pairlist_get(attribs, "cite"));
} else if (strcasecmp(name, "form") == 0) {
output("form", pairlist_get(attribs, "method"), pairlist_get(attribs, "action"));
} else if (strcasecmp(name, "frame") == 0) {
output("frame", NULL, pairlist_get(attribs, "src"));
} else if (strcasecmp(name, "iframe") == 0) {
output("iframe", NULL, pairlist_get(attribs, "src"));
} else if (strcasecmp(name, "head") == 0) {
output("head", NULL, pairlist_get(attribs, "profile"));
} else if (strcasecmp(name, "script") == 0) {
output("script", NULL, pairlist_get(attribs, "src"));
} else if (strcasecmp(name, "body") == 0) {
output("body", NULL, pairlist_get(attribs, "background"));
} else if (strcasecmp(name, "video") == 0) {
output("video", NULL, pairlist_get(attribs, "src"));
} else if (strcasecmp(name, "audio") == 0) {
output("audio", NULL, pairlist_get(attribs, "src"));
} else if (strcasecmp(name, "source") == 0) {
output("source", NULL, pairlist_get(attribs, "srcset"));
output("source", NULL, pairlist_get(attribs, "src"));
}
/* Free memory */
pairlist_delete(attribs);
free(name);
}
/* handle_emptytag -- called after an empty tag is parsed */
void handle_emptytag(void *clientdata, string name, pairlist attribs)
{
handle_starttag(clientdata, name, attribs);
}
/* handle_endtag -- called after an endtag is parsed (name may be "") */
void handle_endtag(void *clientdata, string name)
{
free(name);
}
/* --------------------------------------------------------------------- */
/* usage -- print usage message and exit */
static void usage(string progname)
{
fprintf(stderr,
"Version %s\nUsage: %s [-l] [-r] [-h] [-b base] [-t] [-a] [HTML-file]\n",
VERSION, progname);
exit(1);
}
int main(int argc, char *argv[])
{
int c, status = 200;
/* Bind the parser callback routines to our handlers */
set_error_handler(handle_error);
set_start_handler(start);
set_end_handler(end);
set_comment_handler(handle_comment);
set_text_handler(handle_text);
set_decl_handler(handle_decl);
set_pi_handler(handle_pi);
set_starttag_handler(handle_starttag);
set_emptytag_handler(handle_emptytag);
set_endtag_handler(handle_endtag);
/* Parse command line arguments */
while ((c = getopt(argc, argv, "lb:rhta")) != -1) {
switch (c) {
case 'l': format = Long; break; /* Long listing */
case 'b': base = strdup(optarg); break; /* Set base of URL */
case 'r': relative = true; break; /* Do not make URLs absolute */
case 'h': format = HTML; break; /* Output in HTML format */
case 't': format = Tuple; break; /* Output as 4-tuples */
case 'a': ascii = true; break; /* Convert IRIs to URLs */
default: usage(argv[0]);
}
}
if (optind == argc) {
yyin = stdin;
self = "-";
} else if (optind == argc - 1) {
if (!base) base = strdup(argv[optind]);
if (eq(argv[optind], "-")) yyin = stdin;
else yyin = fopenurl(argv[optind], "r", &status);
self = argv[optind];
} else {
usage(argv[0]);
}
if (yyin == NULL) {perror(argv[optind]); exit(1);}
if (status != 200) errexit("%s : %s\n", argv[optind], http_strerror(status));
if (yyparse() != 0) exit(3);
if (base) free(base);
return has_error ? 1 : 0;
}
html-xml-utils-7.6/hxnum.1 0000644 0001750 0001750 00000000337 12704011745 012434 0000000 0000000 .de d \" begin display
.sp
.in +4
.nf
..
.de e \" end display
.in -4
.fi
.sp
..
.TH "HXNUM" "1" "10 Jul 2011" "7.x" "HTML-XML-utils"
.SH NAME
hxnum \- number section headings in an HTML file
.SH SYNOPSIS
[ToDo. Try num -?]
html-xml-utils-7.6/hxcopy.c 0000644 0001750 0001750 00000020037 13205764472 012701 0000000 0000000 /* hxcopy -- copy an HTML file and update relative URLs at the same time
*
* Copy an HTML file with all URLs that were relative to OLDURL
* updated to be relative to NEWURL instead. (If the document has a
* BASE element, only that is updated.) OLDURL and NEWURL may
* themselves be relative (to the same base URL, which need not be
* mentioned).
*
* Part of HTML-XML-utils, see:
* http://www.w3.org/Tools/HTML-XML-utils/
*
* TO DO: Should it be an option whether URL references of the form
* "", "#foo" and "?bar" are replaced by "oldurl", "oldurl#foo" and
* "oldurl?bar"? (See adjust_url().)
*
* Created: 5 Dec 2008
* Author: Bert Bos
*
* Copyright © 2008-2012 W3C
* See http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231
*/
#include "config.h"
#include
#include
#include
#include
#include
#include
#if HAVE_STRING_H
# include
#endif
#if HAVE_STRINGS_H
# include
#endif
#include "export.h"
#include "heap.e"
#include "types.e"
#include "html.e"
#include "scan.e"
#include "url.e"
#include "dict.e"
#include "openurl.e"
#include "errexit.e"
#define same(a, b) ((a) ? ((b) && eq((a), (b))) : !(b))
static bool has_errors = false; /* Enconutered errors during parsing */
static FILE *out = NULL; /* Where to write output */
static bool has_base = false; /* Document has a element */
static string newbase; /* Path from OLDURL to NEWURL */
static bool replace_self = false; /* Change link to self in link to old */
/* path_from_url_to_url -- compute URL that is path from one URL to another */
static string path_from_url_to_url(const conststring a, const conststring b)
{
URL p, q;
string s = NULL;
char cwd[4096];
int i, j;
if (!getcwd(cwd, sizeof(cwd) - 1)) return NULL; /* To do: handle long path */
strcat(cwd, "/");
s = URL_s_absolutize(cwd, a); p = URL_new(s); dispose(s);
s = URL_s_absolutize(cwd, b); q = URL_new(s); dispose(s);
if (p->proto && !q->proto) {
errno = EACCES; /* Path from remote to local not possible */
} else if (!same(p->proto, q->proto) ||
!same(p->user, q->user) ||
!same(p->password, q->password) ||
!same(p->machine, q->machine) ||
!same(p->port, q->port)) {
s = newstring(b); /* Just use the URL b */
} else {
/* Find the last '/' before which both paths are the same */
for (j = i = 0; p->path[i] && q->path[i] && p->path[i] == q->path[i]; i++)
if (p->path[i] == '/') j = i;
/* Construct path from a to b by descending a and climbing b */
for (i = j + 1; p->path[i]; i++)
if (p->path[i] == '/') strapp(&s, "../", NULL);
strapp(&s, q->path + j + 1, NULL);
}
URL_dispose(p);
URL_dispose(q);
return s;
}
/* adjust_url -- return a new URL relative to newurl instead of oldurl */
static conststring adjust_url(const conststring url)
{
if (!replace_self && (!url || !url[0] || url[0] == '#' || url[0] == '?'))
return url; /* Don't replace references to self */
else
return URL_s_absolutize(newbase, url);
}
/* attribute_is_url -- check if the attribute is URL-valued */
static bool attribute_is_url(const conststring attrib)
{
return strcasecmp(attrib, "href") == 0 ||
strcasecmp(attrib, "src") == 0 ||
strcasecmp(attrib, "action") == 0 ||
strcasecmp(attrib, "background") == 0 ||
strcasecmp(attrib, "cite") == 0 ||
strcasecmp(attrib, "classid") == 0 ||
strcasecmp(attrib, "codebase") == 0 ||
strcasecmp(attrib, "data") == 0 ||
strcasecmp(attrib, "longdesc") == 0 ||
strcasecmp(attrib, "profile") == 0 ||
strcasecmp(attrib, "usemap") == 0;
}
/* handle_error -- called when a parse error occurred */
void handle_error(void *clientdata, const string s, int lineno)
{
fprintf(stderr, "%d: %s\n", lineno, s);
has_errors = true;
}
/* start -- called before the first event is reported */
void* start(void)
{
return NULL;
}
/* end -- called after the last event is reported */
void end(void *clientdata)
{
/* skip */
}
/* handle_comment -- called after a comment is parsed */
void handle_comment(void *clientdata, string commenttext)
{
fprintf(out, "", commenttext);
}
/* handle_text -- called after a text chunk is parsed */
void handle_text(void *clientdata, string text)
{
fprintf(out, "%s", text);
}
/* handle_decl -- called after a declaration is parsed */
void handle_decl(void *clientdata, string gi,
string fpi, string url)
{
fprintf(out, "");
}
/* handle_pi -- called after a PI is parsed */
void handle_pi(void *clientdata, string pi_text)
{
fprintf(out, "%s>", pi_text);
}
/* handle_starttag -- called after a start tag is parsed */
void handle_starttag(void *clientdata, string name, pairlist attribs)
{
conststring v;
pairlist p;
fprintf(out, "<%s", name);
for (p = attribs; p; p = p->next) {
fprintf(out, " %s", p->name);
if (!p->value) v = NULL;
else if (has_base) v = newstring(p->value); /* No need to adjust */
else if (attribute_is_url(p->name)) v = adjust_url(p->value);
else v = newstring(p->value); /* No need to adjust */
if (v) fprintf(out, "=\"%s\"", v);
dispose(v);
}
fprintf(out, ">");
/* If this is a tag, no further adjustments are needed */
if (strcasecmp(name, "base") == 0) has_base = true;
}
/* handle_emptytag -- called after an empty tag is parsed */
void handle_emptytag(void *clientdata, string name, pairlist attribs)
{
conststring v;
pairlist p;
fprintf(out, "<%s", name);
for (p = attribs; p; p = p->next) {
fprintf(out, " %s", p->name);
if (!p->value) v = NULL;
else if (has_base) v = newstring(p->value); /* No need to adjust */
else if (attribute_is_url(p->name)) v = adjust_url(p->value);
else v = newstring(p->value); /* No need to adjust */
if (v) fprintf(out, "=\"%s\"", v);
dispose(v);
}
fprintf(out, " />");
/* If this is a tag, no further adjustments are needed */
if (strcasecmp(name, "base") == 0) has_base = true;
}
/* handle_endtag -- called after an endtag is parsed (name may be "") */
void handle_endtag(void *clientdata, string name)
{
fprintf(out, "%s>", name);
}
/* usage -- print usage message and exit */
static void usage(const conststring progname)
{
fprintf(stderr, "Usage: %s [-v] [-s] [-i old-URL] [-o new-URL] [URL [URL]]\n", progname);
exit(1);
}
int main(int argc, char *argv[])
{
int c, status = 200;
string oldurl = NULL, newurl = NULL;
/* Bind the parser callback routines to our handlers */
set_error_handler(handle_error);
set_start_handler(start);
set_end_handler(end);
set_comment_handler(handle_comment);
set_text_handler(handle_text);
set_decl_handler(handle_decl);
set_pi_handler(handle_pi);
set_starttag_handler(handle_starttag);
set_emptytag_handler(handle_emptytag);
set_endtag_handler(handle_endtag);
/* Parse command line */
while ((c = getopt(argc, argv, "i:o:sv")) != -1)
switch (c) {
case 'o': newurl = optarg; break;
case 'i': oldurl = optarg; break;
case 's': replace_self = true; break;
case 'v': printf("Version: %s %s\n", PACKAGE, VERSION); return 0;
default: usage(argv[0]);
}
if (argc > optind + 2) usage(argv[0]);
if (argc > optind + 1) out = fopenurl(argv[optind+1], "w", NULL);
else if (newurl) out = stdout;
else errexit("%s: option -o is required if output is to stdout\n", argv[0]);
if (!out) {perror(argv[optind+1]); exit(3);}
if (argc > optind) yyin = fopenurl(argv[optind], "r", &status);
else if (oldurl) yyin = stdin;
else errexit("%s: option -i is required if input is from stdin\n", argv[0]);
if (!yyin) {perror(argv[optind]); exit(2);}
if (status != 200) errexit("%s : %s\n", argv[1], http_strerror(status));
if (!oldurl) oldurl = argv[optind];
if (!newurl) newurl = argv[optind+1];
newbase = path_from_url_to_url(newurl, oldurl);
if (!newbase) errexit("%s: could not parse argument as a URL\n", argv[0]);
if (yyparse() != 0) exit(4);
return has_errors ? 1 : 0;
}
html-xml-utils-7.6/hxtoc.c 0000645 0001750 0001750 00000033563 13205765541 012523 0000000 0000000 /*
* Insert an active ToC between "" and "",
* or replacing the comment ""
*
* Headers with class "no-toc" will not be listed in the ToC.
*
* The ToC links to elements with ID attributes as well as with
* empty elements.
*
* Tags for a with class "index" are assumed to be used by
* a cross-reference generator and will not be copied to the ToC.
*
* Similarly, DFN tags are not copied to the ToC (but the element's
* content is).
*
* Any tags with a class of "bctarget" are not copied, but
* regenerated. They are assumed to be backwards-compatible versions
* of ID attributes on their parent elements. With the option -t or -x
* they are removed.
*
* Copyright © 1994-2013 World Wide Web Consortium
* See http://www.w3.org/Consortium/Legal/copyright-software
*
* Author: Bert Bos
* Created Sep 1997
* Version: $Id: hxtoc.c,v 1.12 2017/11/24 09:50:25 bbos Exp $
*
**/
#include "config.h"
#include
#include
#include
#include
#include
#include
#include
#if STDC_HEADERS
# include
#else
# ifndef HAVE_STRCHR
# define strchr index
# define strrchr rindex
# endif
# ifndef HAVE_STRSTR
# include "strstr.e"
# endif
#endif
#ifdef HAVE_ERRNO_H
# include
#endif
#ifdef HAVE_SEARCH_H
# include
#else
# include "search-freebsd.h"
#endif
#include "export.h"
#include "types.e"
#include "heap.e"
#include "tree.e"
#include "html.e"
#include "scan.e"
#include "dict.e"
#include "openurl.e"
#include "errexit.e"
#include "genid.e"
#include "class.e"
#define BEGIN_TOC "begin-toc" /* */
#define END_TOC "end-toc" /* */
#define TOC "toc" /* */
#define NO_TOC "no-toc" /* CLASS="... no-toc..." */
#define INDEX "index" /* CLASS="... index..." */
#define TARGET "bctarget" /* CLASS="...bctarget..." */
#define EXPAND true
#define NO_EXPAND false
#define KEEP_ANCHORS true
#define REMOVE_ANCHORS false
#define DONT_FLATTEN false
#define INDENT " " /* Amount to indent ToC per level */
static Tree tree;
static int toc_low = 1, toc_high = 6; /* Which headers to include */
static bool xml = false; /* Use convention */
static bool bctarget = true; /* Generate after IDs */
static string toc_class = "toc"; /* */
static bool use_div = false; /* Option -d */
static bool do_flatten = false; /* Option -f */
/* handle_error -- called when a parse error occurred */
static void handle_error(void *clientdata, const string s, int lineno)
{
fprintf(stderr, "%d: %s\n", lineno, s);
}
/* start -- called before the first event is reported */
static void* start(void)
{
tree = create();
return NULL;
}
/* end -- called after the last event is reported */
static void end(void *clientdata)
{
/* skip */
}
/* handle_comment -- called after a comment is parsed */
static void handle_comment(void *clientdata, string commenttext)
{
tree = append_comment(tree, commenttext);
}
/* handle_text -- called after a tex chunk is parsed */
static void handle_text(void *clientdata, string text)
{
tree = append_text(tree, text);
}
/* handle_declaration -- called after a declaration is parsed */
static void handle_decl(void *clientdata, string gi,
string fpi, string url)
{
tree = append_declaration(tree, gi, fpi, url);
}
/* handle_proc_instr -- called after a PI is parsed */
static void handle_pi(void *clientdata, string pi_text)
{
tree = append_procins(tree, pi_text);
}
/* handle_starttag -- called after a start tag is parsed */
static void handle_starttag(void *clientdata, string name, pairlist attribs)
{
conststring id;
tree = html_push(tree, name, attribs);
/* If it has an ID, store it (so we don't accidentally generate it) */
if ((id = pairlist_get(attribs, "id"))) storeID(id);
}
/* handle_emptytag -- called after an empty tag is parsed */
static void handle_emptytag(void *clientdata, string name, pairlist attribs)
{
handle_starttag(clientdata, name, attribs);
}
/* handle_endtag -- called after an endtag is parsed (name may be "") */
static void handle_endtag(void *clientdata, string name)
{
tree = html_pop(tree, name);
}
/* indent -- print level times a number of spaces */
static void indent(int level)
{
for (; level > 0; level--) printf(INDENT);
}
/* is_div -- t is a division (DIV, SECTION, ARTICLE, ASIDE or NAV) */
static bool is_div(Tree t)
{
assert(t->tp == Element);
return eq(t->name, "div") ||
eq(t->name, "section") || /* HTML5 */
eq(t->name, "article") || /* HTML5 */
eq(t->name, "aside") || /* HTML5 */
eq(t->name, "nav"); /* HTML5 */
}
/* heading_level -- return 1..6 if t is H1..H6, else 0 */
static int heading_level(Tree t)
{
assert(t->tp == Element);
if (has_class(t->attribs, NO_TOC)) return 0;
if (eq(t->name, "h1")) return 1;
if (eq(t->name, "h2")) return 2;
if (eq(t->name, "h3")) return 3;
if (eq(t->name, "h4")) return 4;
if (eq(t->name, "h5")) return 5;
if (eq(t->name, "h6")) return 6;
return 0;
}
/* div_parent -- if t is the first child of a section elt, return that elt */
static Tree div_parent(Tree t)
{
Tree h, result = NULL;
assert(t->tp == Element);
assert(t->parent);
if (t->parent->tp != Element) return NULL;
if (has_class(t->parent->attribs, NO_TOC)) return NULL;
if (is_div(t->parent)) result = t->parent;
else if (!eq(t->parent->name, "header")) return NULL;
else if (!(result = div_parent(t->parent))) return NULL;
for (h = t->parent->children; h != t; h = h->sister) {
if (h->tp == Element) return NULL;
if (h->tp == Text && !only_space(h->text)) return NULL;
}
return result;
}
/* first_child_is_heading -- true if first child is a Hn or HEADER */
static bool first_child_is_heading(Tree t)
{
Tree h;
assert(t->tp == Element);
for (h = t->children; h; h = h->sister) {
switch (h->tp) {
case Element:
return eq(h->name, "header") || heading_level(h) > 0;
case Text:
if (!only_space(h->text))
return false;
break;
default:
break;
}
}
return false;
}
static void expand(Tree t, bool *write, bool exp, bool keep_anchors,
int div_depth, bool flatten);
/* toc -- create a table of contents */
static void toc(Tree t, int *curlevel, bool *item_is_open, int div_depth)
{
conststring val, id;
int level;
Tree h, div = NULL;
bool write = true;
switch (t->tp) {
case Text: break;
case Comment: break;
case Declaration: break;
case Procins: break;
case Element:
if (use_div && is_div(t) && first_child_is_heading(t)) {
/* It's a section element with a heading as first child */
div_depth++;
level = 0;
} else {
/* Check if the element is a heading and what its level is */
level = heading_level(t);
if (level && use_div && (div = div_parent(t))) level = div_depth;
}
/* If it's a header for the ToC, create a list item for it */
if (level >= toc_low && level <= toc_high) {
/* Ensure there is an ID to point to */
h = use_div && div ? div : t;
if (! (id = get_attrib(h, "id"))) {
id = gen_id(h);
set_attrib(h, "id", id);
}
assert(*curlevel <= level || *item_is_open);
while (*curlevel > level) {
printf(xml ? "\n" : "\n");
indent(*curlevel - toc_low);
printf("
");
(*curlevel)--;
}
if (*curlevel == level && *item_is_open) {
printf(xml ? "\n" : "\n");
} else if (*item_is_open) {
printf("\n");
(*curlevel)++;
indent(*curlevel - toc_low);
printf("\n", toc_class);
}
while (*curlevel < level) {
indent(*curlevel - toc_low);
printf("- \n");
(*curlevel)++;
indent(*curlevel - toc_low);
printf("
\n", toc_class);
}
indent(*curlevel - toc_low);
if ((val = get_attrib(t, "class"))) {
printf("- ", val, id);
} else {
printf("
- ", id);
}
expand(t, &write, NO_EXPAND, REMOVE_ANCHORS, div_depth, do_flatten);
printf("");
*item_is_open = true;
} else {
for (h = t->children; h != NULL; h = h->sister)
toc(h, curlevel, item_is_open, div_depth);
}
break;
case Root:
for (h = t->children; h != NULL; h = h->sister)
toc(h, curlevel, item_is_open, div_depth);
break;
default: assert(! "Cannot happen");
}
}
/* expand -- write the tree, inserting ID's at H* and inserting a toc */
static void expand(Tree t, bool *write, bool exp, bool keep_anchors,
int div_depth, bool flatten)
{
conststring val;
Tree h;
pairlist a;
conststring s;
int level;
bool item_is_open = false;
for (h = t->children; h != NULL; h = h->sister) {
switch (h->tp) {
case Text:
if (*write) printf("%s", h->text);
break;
case Comment:
for (s = h->text; isspace(*s); s++) ;
if (exp && (!strncmp(s, TOC, sizeof(TOC) - 1)
|| !strncmp(s, BEGIN_TOC, sizeof(BEGIN_TOC) - 1))) {
printf("\n", BEGIN_TOC);
printf("
\n", toc_class);
level = toc_low;
toc(get_root(t), &level, &item_is_open, 1);
while (level > toc_low) {
printf(xml ? "
\n" : "\n");
indent(level - toc_low);
printf("
");
level--;
}
if (item_is_open && xml) printf(" \n");
printf("
\n");
printf("", END_TOC);
if (!strncmp(s, BEGIN_TOC, sizeof(BEGIN_TOC) - 1))
*write = false; /* Suppress old ToC */
} else if (exp && !strncmp(s, END_TOC, sizeof(END_TOC) - 1)) {
*write = true;
} else {
printf("", h->text);
}
break;
case Declaration:
printf("name);
if (h->text) printf(" PUBLIC \"%s\"", h->text);
if (h->url) printf(" %s\"%s\"", h->text ? "" : "SYSTEM ", h->url);
printf(">");
break;
case Procins:
if (*write) printf("%s>", h->text);
break;
case Element:
if (use_div && is_div(h) && first_child_is_heading(h)) {
/* It's a section element with a heading as first child */
div_depth++;
level = div_depth;
} else {
/* Check if the element is a heading and what its level is */
level = heading_level(h);
if (level && use_div && div_parent(h)) level = 0;
}
/* Give DIVs and headers an ID, if they need one */
if (level >= toc_low && level <= toc_high) {
if (!get_attrib(h, "id")) set_attrib(h, "id", gen_id(h));
}
if (*write) {
if (flatten && ((s = get_attrib(h, "alt")))) {
/* Flatten: use ALT attribute instead of element */
printf("%s", s);
} else if (flatten && !eq(h->name, "bdo")
&& ((s = get_attrib(h, "dir")))) {
/* Flatten: keep DIR attributes */
printf("", s);
expand(h, write, exp, false, div_depth, flatten);
printf("");
} else if (flatten && !eq(h->name, "bdo")) {
/* Flatten: remove all elements except BDO */
expand(h, write, exp, false, div_depth, flatten);
} else if (! keep_anchors && eq(h->name, "a")) {
/* Don't write the and tags */
expand(h, write, exp, false, div_depth, flatten);
} else if (! keep_anchors && eq(h->name, "span")
&& has_class(h->attribs, INDEX)) {
/* Don't write ... tags */
expand(h, write, exp, false, div_depth, flatten);
} else if (! keep_anchors && eq(h->name, "dfn")) {
/* Don't copy dfn tags to the ToC */
expand(h, write, exp, false, div_depth, flatten);
} else if (eq(h->name, "a") && (has_class(h->attribs, TARGET)
|| has_class(h->attribs, TOC))) {
/* This was inserted by toc itself; remove it */
expand(h, write, exp, false, div_depth, flatten);
} else {
printf("<%s", h->name);
for (a = h->attribs; a != NULL; a = a->next) {
if (keep_anchors || !eq(a->name, "id")) {
/* If we don't keep anchors, we don't keep IDs either */
printf(" %s", a->name);
if (a->value != NULL) printf("=\"%s\"", a->value);
}
}
if (is_empty(h->name)) {
assert(h->children == NULL);
printf(xml ? " />" : ">");
} else {
printf(">");
/* Insert an if element has an ID and is not */
if (bctarget && is_mixed(h->name) && (val = get_attrib(h, "id"))
&& !eq(h->name, "a") && ! xml)
printf("", TARGET, val);
expand(h, write, exp, keep_anchors, div_depth, flatten);
printf("%s>", h->name);
}
}
}
break;
case Root:
assert(! "Cannot happen");
break;
default:
assert(! "Cannot happen");
}
}
}
/* usage -- print usage message and exit */
static void usage(string name)
{
errexit("Version %s\nUsage: %s [-l low] [-h high] [-x] [-t] [-d] [-c class] [html-file]\n",
VERSION, name);
}
int main(int argc, char *argv[])
{
int c, status;
bool write = true;
while ((c = getopt(argc, argv, "l:h:xtdc:f")) != -1) {
switch (c) {
case 'l': toc_low = atoi(optarg); break;
case 'h': toc_high = atoi(optarg); break;
case 'x': xml = true; break;
case 't': bctarget = false; break;
case 'd': use_div = true; break;
case 'c': toc_class = newstring(optarg); break;
case 'f': do_flatten = true; break;
default: usage(argv[0]);
}
}
if (toc_low < 1) toc_low = 1;
if (toc_high > 6) toc_high = 6;
if (argc > optind + 1) {
usage(argv[0]);
} else if (optind >= argc || eq(argv[optind], "-")) {
yyin = stdin;
} else if (!(yyin = fopenurl(argv[optind], "r", &status))) {
perror(argv[optind]); exit(2);
} else if (status != 200) {
errexit("%s : %s\n", argv[optind], http_strerror(status));
}
/* Bind the parser callback routines to our handlers */
set_error_handler(handle_error);
set_start_handler(start);
set_end_handler(end);
set_comment_handler(handle_comment);
set_text_handler(handle_text);
set_decl_handler(handle_decl);
set_pi_handler(handle_pi);
set_starttag_handler(handle_starttag);
set_emptytag_handler(handle_emptytag);
set_endtag_handler(handle_endtag);
if (yyparse() != 0) exit(3);
tree = get_root(tree);
expand(tree, &write, EXPAND, KEEP_ANCHORS, 1, DONT_FLATTEN);
#if 0
tree_delete(tree); /* Just to test memory mgmt */
#endif
return 0;
}
html-xml-utils-7.6/hxextract.c 0000645 0001750 0001750 00000016054 13205765541 013404 0000000 0000000 /*
* Output all elements with a certain name and/or class.
* Input must be well-formed, since no HTML heuristics are applied.
*
* Copyright © 2000-2012 World Wide Web Consortium
* See http://www.w3.org/Consortium/Legal/copyright-software
*
* Author: Bert Bos
* Created: 20 Aug 2000
* Version: $Id: hxextract.c,v 1.7 2017/11/24 09:50:25 bbos Exp $
*/
#include "config.h"
#include
#include
#include
#include
#include
#include
#if STDC_HEADERS
# include
#else
# ifndef HAVE_STRCHR
# define strchr index
# define strrchr rindex
# endif
# ifndef HAVE_STRSTR
# include "strstr.e"
# endif
#endif
#include "export.h"
#include "types.e"
#include "html.e"
#include "heap.e"
#include "scan.e"
#include "dict.e"
#include "openurl.e"
#include "class.e"
#define INDEX "index" /* CLASS="... index..." */
#define MAXLINELEN 1024 /* In configfile */
static bool xml = false; /* Use convention */
static int copying = 0; /* Start by not copying */
static string base = NULL; /* URL of each file */
static string endtext = ""; /* Text to insert at end */
static string targetelement = NULL; /* Element to extract */
static string targetclass = NULL; /* Class to extract */
/* add_href -- add an "href" attribute to a list of attributes */
static void add_href(pairlist *attribs, const string base, const conststring id)
{
string h = NULL;
pairlist_set(attribs, "href", strapp(&h, base, "#", id, NULL));
free(h);
}
/* handle_error -- called when a parse error occurred */
static void handle_error(void *unused, const string s, int lineno)
{
fprintf(stderr, "%d: %s\n", lineno, s);
}
/* start -- called before the first event is reported */
static void* start(void) {return NULL;}
/* end -- called after the last event is reported */
static void end(void *unused) {}
/* handle_comment -- called after a comment is parsed */
static void handle_comment(void *unused, const string commenttext) {}
/* handle_text -- called after a text chunk is parsed */
static void handle_text(void *unused, const string text)
{
if (copying > 0) fputs(text, stdout);
}
/* handle_declaration -- called after a declaration is parsed */
static void handle_decl(void *unused, const string gi,
const string fpi, const string url) {}
/* handle_proc_instr -- called after a PI is parsed */
static void handle_pi(void *unused, const string pi_text) {}
/* print_tag -- print a start- or empty tag */
static void print_tag(const string name, pairlist attribs, bool empty)
{
pairlist a;
conststring t, h;
printf("<%s", name);
for (a = attribs; a != NULL; a = a->next) {
printf(" %s", a->name);
if (strcasecmp(a->name, "class") == 0 && (t = contains(a->value, INDEX))) {
/* Print value excluding INDEX */
printf("=\"");
for (h = a->value; h != t; h++) putchar(*h);
printf("%s\"", t + sizeof(INDEX) - 1);
} else {
if (a->value) printf("=\"%s\"", a->value);
}
}
printf((empty && xml) ? " />" : ">");
}
/* is_match check whether the element matches the target element and class */
static bool is_match(const string name, pairlist attribs)
{
return ((!targetelement || strcasecmp(name, targetelement) == 0)
&& (!targetclass || has_class(attribs, targetclass)));
}
/* handle_starttag -- called after a start tag is parsed */
static void handle_starttag(void *unused, const string name, pairlist attribs)
{
conststring id;
if (copying || is_match(name, attribs)) {
if (!copying && (id = pairlist_get(attribs, "id")))
add_href(&attribs, base, id);
if (!eq(name, "a") && !eq(name, "A")) print_tag(name, attribs, false);
copying++;
}
}
/* handle_emptytag -- called after an empty tag is parsed */
static void handle_emptytag(void *unused, const string name, pairlist attribs)
{
conststring id;
if (copying || is_match(name, attribs)) {
if (!copying && (id = pairlist_get(attribs, "id")))
add_href(&attribs, base, id);
if (!eq(name, "a") && !eq(name, "A")) print_tag(name, attribs, true);
}
}
/* handle_endtag -- called after an endtag is parsed (name may be "") */
static void handle_endtag(void *unused, const string name)
{
if (copying) {
if (!eq(name, "a") && !eq(name, "A")) printf("%s>", name);
copying--;
}
}
/* process_configfile -- read @chapter lines from config file */
static void process_configfile(const string configfile)
{
char line[MAXLINELEN], chapter[MAXLINELEN];
FILE *f;
if (! (f = fopenurl(configfile, "r", NULL))) {perror(configfile); exit(2);}
/* ToDo: accept quoted file names with spaces in their name */
while (fgets(line, sizeof(line), f)) {
if (sscanf(line, " @chapter %s", chapter) == 1) {
if (!base) base = chapter;
yyin = fopenurl(chapter, "r", NULL);
if (yyin == NULL) {perror(chapter); exit(2);}
if (yyparse() != 0) exit(3);
fclose(yyin);
base = NULL;
}
}
fclose(f);
}
/* usage -- print usage message and exit */
static void usage(const string name)
{
fprintf(stderr, "Usage: %s [-v] [-x] [-s text] [-e text] [-b base] element-or-class [-c configfile | file-or-URL]...\n",
name);
exit(1);
}
int main(int argc, char *argv[])
{
char *p;
int i;
/* Bind the parser callback routines to our handlers */
set_error_handler(handle_error);
set_start_handler(start);
set_end_handler(end);
set_comment_handler(handle_comment);
set_text_handler(handle_text);
set_decl_handler(handle_decl);
set_pi_handler(handle_pi);
set_starttag_handler(handle_starttag);
set_emptytag_handler(handle_emptytag);
set_endtag_handler(handle_endtag);
/* Loop over arguments; options may be in between file names */
for (i = 1; i < argc; i++) {
if (eq(argv[i], "-h") || eq(argv[i], "-?")) { /* Usage */
usage(argv[0]);
} else if (eq(argv[i], "-x")) { /* XML format */
xml = true;
} else if (eq(argv[i], "-s")) { /* Insert text at start */
printf("%s", argv[++i]);
} else if (eq(argv[i], "-e")) { /* Insert text at end */
endtext = argv[++i];
} else if (eq(argv[i], "-b")) { /* URL base */
base = argv[++i];
} else if (eq(argv[i], "-c")) { /* Config file */
process_configfile(argv[++i]);
} else if (eq(argv[i], "-v")) {
printf("Version: %s %s\n", PACKAGE, VERSION);
return 0;
} else if (eq(argv[i], "-")) { /* "-" = stdin */
if (!base) base = "";
yyin = stdin;
if (yyparse() != 0) exit(3);
base = NULL; /* Reset base */
} else if (targetelement || targetclass) { /* It's a file name or URL */
if (!base) base = argv[i];
yyin = fopenurl(argv[i], "r", NULL);
if (yyin == NULL) {perror(argv[i]); exit(2);}
if (yyparse() != 0) exit(3);
fclose(yyin);
base = NULL;
} else if (argv[i][0] == '.') { /* Class name */
targetclass = argv[i] + 1;
} else { /* Element name */
targetelement = argv[i];
if ((p = strchr(targetelement, '.'))) {
*p = '\0';
targetclass = p + 1;
}
}
}
if (!targetelement && !targetclass) usage(argv[0]);
printf("%s", endtext); /* Insert text at end */
return 0;
}
html-xml-utils-7.6/install-sh 0000755 0001750 0001750 00000033255 12517753263 013236 0000000 0000000 #!/bin/sh
# install - install a program, script, or datafile
scriptversion=2011-11-20.07; # UTC
# This originates from X11R5 (mit/util/scripts/install.sh), which was
# later released in X11R6 (xc/config/util/install.sh) with the
# following copyright and license.
#
# Copyright (C) 1994 X Consortium
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#
# Except as contained in this notice, the name of the X Consortium shall not
# be used in advertising or otherwise to promote the sale, use or other deal-
# ings in this Software without prior written authorization from the X Consor-
# tium.
#
#
# FSF changes to this file are in the public domain.
#
# Calling this script install-sh is preferred over install.sh, to prevent
# 'make' implicit rules from creating a file called install from it
# when there is no Makefile.
#
# This script is compatible with the BSD install script, but was written
# from scratch.
nl='
'
IFS=" "" $nl"
# set DOITPROG to echo to test this script
# Don't use :- since 4.3BSD and earlier shells don't like it.
doit=${DOITPROG-}
if test -z "$doit"; then
doit_exec=exec
else
doit_exec=$doit
fi
# Put in absolute file names if you don't have them in your path;
# or use environment vars.
chgrpprog=${CHGRPPROG-chgrp}
chmodprog=${CHMODPROG-chmod}
chownprog=${CHOWNPROG-chown}
cmpprog=${CMPPROG-cmp}
cpprog=${CPPROG-cp}
mkdirprog=${MKDIRPROG-mkdir}
mvprog=${MVPROG-mv}
rmprog=${RMPROG-rm}
stripprog=${STRIPPROG-strip}
posix_glob='?'
initialize_posix_glob='
test "$posix_glob" != "?" || {
if (set -f) 2>/dev/null; then
posix_glob=
else
posix_glob=:
fi
}
'
posix_mkdir=
# Desired mode of installed file.
mode=0755
chgrpcmd=
chmodcmd=$chmodprog
chowncmd=
mvcmd=$mvprog
rmcmd="$rmprog -f"
stripcmd=
src=
dst=
dir_arg=
dst_arg=
copy_on_change=false
no_target_directory=
usage="\
Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
or: $0 [OPTION]... SRCFILES... DIRECTORY
or: $0 [OPTION]... -t DIRECTORY SRCFILES...
or: $0 [OPTION]... -d DIRECTORIES...
In the 1st form, copy SRCFILE to DSTFILE.
In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
In the 4th, create DIRECTORIES.
Options:
--help display this help and exit.
--version display version info and exit.
-c (ignored)
-C install only if different (preserve the last data modification time)
-d create directories instead of installing files.
-g GROUP $chgrpprog installed files to GROUP.
-m MODE $chmodprog installed files to MODE.
-o USER $chownprog installed files to USER.
-s $stripprog installed files.
-t DIRECTORY install into DIRECTORY.
-T report an error if DSTFILE is a directory.
Environment variables override the default commands:
CHGRPPROG CHMODPROG CHOWNPROG CMPPROG CPPROG MKDIRPROG MVPROG
RMPROG STRIPPROG
"
while test $# -ne 0; do
case $1 in
-c) ;;
-C) copy_on_change=true;;
-d) dir_arg=true;;
-g) chgrpcmd="$chgrpprog $2"
shift;;
--help) echo "$usage"; exit $?;;
-m) mode=$2
case $mode in
*' '* | *' '* | *'
'* | *'*'* | *'?'* | *'['*)
echo "$0: invalid mode: $mode" >&2
exit 1;;
esac
shift;;
-o) chowncmd="$chownprog $2"
shift;;
-s) stripcmd=$stripprog;;
-t) dst_arg=$2
# Protect names problematic for 'test' and other utilities.
case $dst_arg in
-* | [=\(\)!]) dst_arg=./$dst_arg;;
esac
shift;;
-T) no_target_directory=true;;
--version) echo "$0 $scriptversion"; exit $?;;
--) shift
break;;
-*) echo "$0: invalid option: $1" >&2
exit 1;;
*) break;;
esac
shift
done
if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then
# When -d is used, all remaining arguments are directories to create.
# When -t is used, the destination is already specified.
# Otherwise, the last argument is the destination. Remove it from $@.
for arg
do
if test -n "$dst_arg"; then
# $@ is not empty: it contains at least $arg.
set fnord "$@" "$dst_arg"
shift # fnord
fi
shift # arg
dst_arg=$arg
# Protect names problematic for 'test' and other utilities.
case $dst_arg in
-* | [=\(\)!]) dst_arg=./$dst_arg;;
esac
done
fi
if test $# -eq 0; then
if test -z "$dir_arg"; then
echo "$0: no input file specified." >&2
exit 1
fi
# It's OK to call 'install-sh -d' without argument.
# This can happen when creating conditional directories.
exit 0
fi
if test -z "$dir_arg"; then
do_exit='(exit $ret); exit $ret'
trap "ret=129; $do_exit" 1
trap "ret=130; $do_exit" 2
trap "ret=141; $do_exit" 13
trap "ret=143; $do_exit" 15
# Set umask so as not to create temps with too-generous modes.
# However, 'strip' requires both read and write access to temps.
case $mode in
# Optimize common cases.
*644) cp_umask=133;;
*755) cp_umask=22;;
*[0-7])
if test -z "$stripcmd"; then
u_plus_rw=
else
u_plus_rw='% 200'
fi
cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;;
*)
if test -z "$stripcmd"; then
u_plus_rw=
else
u_plus_rw=,u+rw
fi
cp_umask=$mode$u_plus_rw;;
esac
fi
for src
do
# Protect names problematic for 'test' and other utilities.
case $src in
-* | [=\(\)!]) src=./$src;;
esac
if test -n "$dir_arg"; then
dst=$src
dstdir=$dst
test -d "$dstdir"
dstdir_status=$?
else
# Waiting for this to be detected by the "$cpprog $src $dsttmp" command
# might cause directories to be created, which would be especially bad
# if $src (and thus $dsttmp) contains '*'.
if test ! -f "$src" && test ! -d "$src"; then
echo "$0: $src does not exist." >&2
exit 1
fi
if test -z "$dst_arg"; then
echo "$0: no destination specified." >&2
exit 1
fi
dst=$dst_arg
# If destination is a directory, append the input filename; won't work
# if double slashes aren't ignored.
if test -d "$dst"; then
if test -n "$no_target_directory"; then
echo "$0: $dst_arg: Is a directory" >&2
exit 1
fi
dstdir=$dst
dst=$dstdir/`basename "$src"`
dstdir_status=0
else
# Prefer dirname, but fall back on a substitute if dirname fails.
dstdir=`
(dirname "$dst") 2>/dev/null ||
expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \
X"$dst" : 'X\(//\)[^/]' \| \
X"$dst" : 'X\(//\)$' \| \
X"$dst" : 'X\(/\)' \| . 2>/dev/null ||
echo X"$dst" |
sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{
s//\1/
q
}
/^X\(\/\/\)[^/].*/{
s//\1/
q
}
/^X\(\/\/\)$/{
s//\1/
q
}
/^X\(\/\).*/{
s//\1/
q
}
s/.*/./; q'
`
test -d "$dstdir"
dstdir_status=$?
fi
fi
obsolete_mkdir_used=false
if test $dstdir_status != 0; then
case $posix_mkdir in
'')
# Create intermediate dirs using mode 755 as modified by the umask.
# This is like FreeBSD 'install' as of 1997-10-28.
umask=`umask`
case $stripcmd.$umask in
# Optimize common cases.
*[2367][2367]) mkdir_umask=$umask;;
.*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;;
*[0-7])
mkdir_umask=`expr $umask + 22 \
- $umask % 100 % 40 + $umask % 20 \
- $umask % 10 % 4 + $umask % 2
`;;
*) mkdir_umask=$umask,go-w;;
esac
# With -d, create the new directory with the user-specified mode.
# Otherwise, rely on $mkdir_umask.
if test -n "$dir_arg"; then
mkdir_mode=-m$mode
else
mkdir_mode=
fi
posix_mkdir=false
case $umask in
*[123567][0-7][0-7])
# POSIX mkdir -p sets u+wx bits regardless of umask, which
# is incompatible with FreeBSD 'install' when (umask & 300) != 0.
;;
*)
tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$
trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0
if (umask $mkdir_umask &&
exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1
then
if test -z "$dir_arg" || {
# Check for POSIX incompatibilities with -m.
# HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or
# other-writable bit of parent directory when it shouldn't.
# FreeBSD 6.1 mkdir -m -p sets mode of existing directory.
ls_ld_tmpdir=`ls -ld "$tmpdir"`
case $ls_ld_tmpdir in
d????-?r-*) different_mode=700;;
d????-?--*) different_mode=755;;
*) false;;
esac &&
$mkdirprog -m$different_mode -p -- "$tmpdir" && {
ls_ld_tmpdir_1=`ls -ld "$tmpdir"`
test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1"
}
}
then posix_mkdir=:
fi
rmdir "$tmpdir/d" "$tmpdir"
else
# Remove any dirs left behind by ancient mkdir implementations.
rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null
fi
trap '' 0;;
esac;;
esac
if
$posix_mkdir && (
umask $mkdir_umask &&
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir"
)
then :
else
# The umask is ridiculous, or mkdir does not conform to POSIX,
# or it failed possibly due to a race condition. Create the
# directory the slow way, step by step, checking for races as we go.
case $dstdir in
/*) prefix='/';;
[-=\(\)!]*) prefix='./';;
*) prefix='';;
esac
eval "$initialize_posix_glob"
oIFS=$IFS
IFS=/
$posix_glob set -f
set fnord $dstdir
shift
$posix_glob set +f
IFS=$oIFS
prefixes=
for d
do
test X"$d" = X && continue
prefix=$prefix$d
if test -d "$prefix"; then
prefixes=
else
if $posix_mkdir; then
(umask=$mkdir_umask &&
$doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break
# Don't fail if two instances are running concurrently.
test -d "$prefix" || exit 1
else
case $prefix in
*\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;;
*) qprefix=$prefix;;
esac
prefixes="$prefixes '$qprefix'"
fi
fi
prefix=$prefix/
done
if test -n "$prefixes"; then
# Don't fail if two instances are running concurrently.
(umask $mkdir_umask &&
eval "\$doit_exec \$mkdirprog $prefixes") ||
test -d "$dstdir" || exit 1
obsolete_mkdir_used=true
fi
fi
fi
if test -n "$dir_arg"; then
{ test -z "$chowncmd" || $doit $chowncmd "$dst"; } &&
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } &&
{ test "$obsolete_mkdir_used$chowncmd$chgrpcmd" = false ||
test -z "$chmodcmd" || $doit $chmodcmd $mode "$dst"; } || exit 1
else
# Make a couple of temp file names in the proper directory.
dsttmp=$dstdir/_inst.$$_
rmtmp=$dstdir/_rm.$$_
# Trap to clean up those temp files at exit.
trap 'ret=$?; rm -f "$dsttmp" "$rmtmp" && exit $ret' 0
# Copy the file name to the temp name.
(umask $cp_umask && $doit_exec $cpprog "$src" "$dsttmp") &&
# and set any options; do chmod last to preserve setuid bits.
#
# If any of these fail, we abort the whole thing. If we want to
# ignore errors from any of these, just make sure not to ignore
# errors from the above "$doit $cpprog $src $dsttmp" command.
#
{ test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } &&
{ test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } &&
{ test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } &&
{ test -z "$chmodcmd" || $doit $chmodcmd $mode "$dsttmp"; } &&
# If -C, don't bother to copy if it wouldn't change the file.
if $copy_on_change &&
old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` &&
new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` &&
eval "$initialize_posix_glob" &&
$posix_glob set -f &&
set X $old && old=:$2:$4:$5:$6 &&
set X $new && new=:$2:$4:$5:$6 &&
$posix_glob set +f &&
test "$old" = "$new" &&
$cmpprog "$dst" "$dsttmp" >/dev/null 2>&1
then
rm -f "$dsttmp"
else
# Rename the file to the real destination.
$doit $mvcmd -f "$dsttmp" "$dst" 2>/dev/null ||
# The rename failed, perhaps because mv can't rename something else
# to itself, or perhaps because mv is so ancient that it does not
# support -f.
{
# Now remove or move aside any old file at destination location.
# We try this two ways since rm can't unlink itself on some
# systems and the destination file might be busy for other
# reasons. In this case, the final cleanup might fail but the new
# file should still install successfully.
{
test ! -f "$dst" ||
$doit $rmcmd -f "$dst" 2>/dev/null ||
{ $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null &&
{ $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; }
} ||
{ echo "$0: cannot unlink or rename $dst" >&2
(exit 1); exit 1
}
} &&
# Now rename the file to the real destination.
$doit $mvcmd "$dsttmp" "$dst"
}
fi || exit 1
trap '' 0
fi
done
# Local variables:
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC"
# time-stamp-end: "; # UTC"
# End:
html-xml-utils-7.6/selmatch.e 0000644 0001750 0001750 00000000340 13237331770 013160 0000000 0000000 extern void init_language(const conststring lang);
extern void set_case_insensitive(void);
extern _Bool
same(const string a, const string b);
extern _Bool
matches_sel(const Tree t, const Selector s);
html-xml-utils-7.6/hxcount.c 0000645 0001750 0001750 00000010511 13205765541 013052 0000000 0000000 /*
* Count elements and attributes.
*
* This counts occurrences of elements and element/attribute pairs.
* This is just an example of how to use the parser.
* No attempt is made to count efficiently.
*
* Copyright © 1994-2000 World Wide Web Consortium
* See http://www.w3.org/Consortium/Legal/copyright-software
*
* Bert Bos
* Created Nov 1998
* $Id: hxcount.c,v 1.5 2017/11/24 09:50:25 bbos Exp $
*/
#include "config.h"
#include
#ifdef HAVE_UNISTD_H
# include
#endif
#include
#if STDC_HEADERS
# include
#else
# ifndef HAVE_STRCHR
# define strchr index
# define strrchr rindex
# endif
# ifndef HAVE_STRdup
# include "strdup.e"
# endif
#endif
#include
#include
#include
#include "export.h"
#include "types.e"
#include "html.e"
#include "scan.e"
#include "dict.e"
#include "openurl.e"
#include "errexit.e"
typedef struct _pair {
char *name;
int count;
} pair;
static pair *freq = NULL;
static int nrelems = 0;
static bool has_errors = false;
/* countstring -- add 1 to number of occurences for s (case-insensitively) */
static void countstring(char *s)
{
int i;
i = 0;
while (i < nrelems && strcasecmp(freq[i].name, s) != 0) i++;
if (i == nrelems) {
nrelems++;
freq = realloc(freq, nrelems * sizeof(freq[0]));
if (freq == NULL) {fprintf(stderr, "Out of memory\n"); exit(4);}
freq[i].name = strdup(s);
freq[i].count = 0;
}
freq[i].count++;
}
/* count -- count element types and their attributes */
static void count(char *name, pairlist attribs)
{
/* Count element name */
countstring(name);
/* Count attribute names (or rather, the strings "elem/attrib") */
for (; attribs != NULL; attribs = attribs->next) {
char *s = malloc(strlen(name) + strlen(attribs->name) + 2);
if (s == NULL) {fprintf(stderr, "Out of memory\n"); exit(4);}
strcat(strcat(strcpy(s, name), "/"), attribs->name);
countstring(s);
free(s);
}
}
/* handle_error -- called when a parse error occurred */
void handle_error(void *clientdata, const string s, int lineno)
{
fprintf(stderr, "%d: %s\n", lineno, s);
has_errors = true;
}
/* start -- called before the first event is reported */
void* start(void) {return NULL;}
/* end -- called after the last event is reported */
void end(void *clientdata) {}
/* handle_comment -- called after a comment is parsed */
void handle_comment(void *clientdata, string commenttext) {}
/* handle_text -- called after a tex chunk is parsed */
void handle_text(void *clientdata, string text) {}
/* handle_declaration -- called after a declaration is parsed */
void handle_decl(void *clientdata, string gi,
string fpi, string url) {}
/* handle_proc_instr -- called after a PI is parsed */
void handle_pi(void *clientdata, string pi_text) {}
/* handle_starttag -- called after a start tag is parsed */
void handle_starttag(void *clientdata, string name, pairlist attribs)
{
count(name, attribs);
}
/* handle_emptytag -- called after am empty tag is parsed */
extern void handle_emptytag(void *clientdata, string name, pairlist attribs)
{
count(name, attribs);
}
/* handle_pop -- called after an endtag is parsed (name may be "") */
extern void handle_endtag(void *clientdata, string name) {}
/* usage -- print usage message and exit */
static void usage(string prog)
{
fprintf(stderr, "Version %s\n", VERSION);
fprintf(stderr, "Usage: %s [html-file]\n", prog);
exit(2);
}
/* main -- parse input, count elements and attributes of each type */
int main(int argc, char *argv[])
{
int i, status = 200;
/* Bind the parser callback routines to our handlers */
set_error_handler(handle_error);
set_start_handler(start);
set_end_handler(end);
set_comment_handler(handle_comment);
set_text_handler(handle_text);
set_decl_handler(handle_decl);
set_pi_handler(handle_pi);
set_starttag_handler(handle_starttag);
set_emptytag_handler(handle_emptytag);
set_endtag_handler(handle_endtag);
if (argc == 1) yyin = stdin;
else if (argc == 2) yyin = fopenurl(argv[1], "r", &status);
else usage(argv[0]);
if (yyin == NULL) {perror(argv[1]); exit(1);}
if (status != 200) errexit("%s : %s\n", argv[1], http_strerror(status));
/* Parse input */
if (yyparse() != 0) exit(3);
/* Print results */
for (i = 0; i < nrelems; i++)
printf("%6d\t%s\n", freq[i].count, freq[i].name);
return has_errors ? 1 : 0;
}
html-xml-utils-7.6/strdup.c 0000645 0001750 0001750 00000001051 13205765541 012702 0000000 0000000 /*
* Copyright © 1994-2000 World Wide Web Consortium
* See http://www.w3.org/Consortium/Legal/copyright-software
*
* Author: Bert Bos
* Created: 31 Mar 2000
* Version: $Id: strdup.c,v 1.4 2017/11/24 09:50:25 bbos Exp $
**/
#include "config.h"
#include
#include "export.h"
#ifndef HAVE_STRDUP
/* strdup -- allocate a copy of a string on the heap; NULL if no memory */
EXPORT char *strdup(const char *s)
{
char *t;
if ((t = malloc((strlen(s) + 1) * sizeof(*s)))) strcpy(t, s);
return t;
}
#endif /* HAVE_STRDUP */
html-xml-utils-7.6/headers.e 0000644 0001750 0001750 00000000111 13205770445 012770 0000000 0000000 extern _Bool
read_mail_headers(FILE *f, Dictionary headers);
html-xml-utils-7.6/README 0000645 0001750 0001750 00000007043 12703565452 012105 0000000 0000000 In this directory:
html-xml-utils-*.tar.gz
A number of simple utilities for manipulating HTML and XML files.
See INSTALL for generic installation instructions.
Get the source at: http://www.w3.org/Tools/HTML-XML-utils/
htmlutils-*.tar.gz
Old versions (before version 0.1)
Note 1: Your package manager may have a precompiled copy
already. There are versions in Debian, Ubuntu, Macports and others. In
that case you need to download from here only if you want a different
version or want to hack on the source.
Note 2: the names changed in version 5.0: most programs got an "hx"
prefix. Please, uninstall any version < 5.0 before installing a
version >= 5.0
cexport (1) - create headerfile of exported declarations from a C file
hxaddid (1) - add ID's to selected elements
hxcite (1) - replace bibliographic references by hyperlinks
hxcite-mkbib (1) - expand references and create bibliography
hxcopy (1) - copy an HTML file while preserving relative links
hxcount (1) - count elements and attributes in HTML or XML files
hxextract (1) - extract selected elements
hxclean (1) - apply heuristics to correct an HTML file
hxprune (1) - remove marked elements from an HTML file
hxincl (1) - expand included HTML or XML files
hxindex (1) - create an alphabetically sorted index
hxmkbib (1) - create bibliography from a template
hxmultitoc (1) - create a table of contents for a set of HTML files
hxname2id - move some ID= or NAME= from A elements to their parents
hxnormalize (1) - pretty-print an HTML file
hxnum (1) - number section headings in an HTML file
hxpipe (1) - convert XML to a format easier to parse with Perl or AWK
hxprintlinks (1) - number links & add table of URLs at end of an HTML file
hxremove (1) - remove selected elements from an XML file
hxtabletrans (1) - transpose an HTML or XHTML table
hxtoc (1) - insert a table of contents in an HTML file
hxuncdata (1) - replace CDATA sections by character entities
hxunent (1) - replace HTML predefined character entities to UTF-8
hxunpipe (1) - convert output of pipe back to XML format
hxunxmlns (1) - replace "global names" by XML Namespace prefixes
hxwls (1) - list links in an HTML file
hxxmlns (1) - replace XML Namespace prefixes by "global names"
asc2xml, xml2asc (1) - convert between UTF8 and nnn; entities
hxref (1) - generate cross-references
hxselect (1) - extract elements that match a (CSS) selector
This package is configured with automake/autoconf. Generic
instructions are in the file INSTALL. Here are some specific problems
that may arise:
1) Error when running lex:
lex scan.l && mv lex.yy.c scan.c
"scan.l":line 2: Error: missing translation value
The scan.l file uses features of flex that do not exist in lex.
However, it is not necessary to run lex, since the file scan.c is
provided in the package. Just do a "touch scan.c" to make sure
"make" will not try to generate it anew.
2) Warning about "libidn not found":
Without libidn2 or libidn, hxwls will not be able to translate
Internationalized Domain Names to ASCII (option -a). You can install
either libidn2 or libidn.
If you install them in a non-standard location, use --with-libidn2
or --with-libidn when invoking ./configure. E.g., if you install
libidn from MacPorts on Mac OS X, run:
./configure --with-libidn=/opt/local
$Date: 2016/04/14 00:42:15 $
html-xml-utils-7.6/scan.c 0000755 0001750 0001750 00000236645 13205772316 012330 0000000 0000000
#line 3 "scan.c"
#define YY_INT_ALIGNED short int
/* A lexical scanner generated by flex */
#define FLEX_SCANNER
#define YY_FLEX_MAJOR_VERSION 2
#define YY_FLEX_MINOR_VERSION 6
#define YY_FLEX_SUBMINOR_VERSION 1
#if YY_FLEX_SUBMINOR_VERSION > 0
#define FLEX_BETA
#endif
/* First, we deal with platform-specific or compiler-specific issues. */
/* begin standard C headers. */
#include
#include
#include
#include
/* end standard C headers. */
/* flex integer type definitions */
#ifndef FLEXINT_H
#define FLEXINT_H
/* C99 systems have . Non-C99 systems may or may not. */
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h,
* if you want the limit (max/min) macros for int types.
*/
#ifndef __STDC_LIMIT_MACROS
#define __STDC_LIMIT_MACROS 1
#endif
#include
typedef int8_t flex_int8_t;
typedef uint8_t flex_uint8_t;
typedef int16_t flex_int16_t;
typedef uint16_t flex_uint16_t;
typedef int32_t flex_int32_t;
typedef uint32_t flex_uint32_t;
#else
typedef signed char flex_int8_t;
typedef short int flex_int16_t;
typedef int flex_int32_t;
typedef unsigned char flex_uint8_t;
typedef unsigned short int flex_uint16_t;
typedef unsigned int flex_uint32_t;
/* Limits of integral types. */
#ifndef INT8_MIN
#define INT8_MIN (-128)
#endif
#ifndef INT16_MIN
#define INT16_MIN (-32767-1)
#endif
#ifndef INT32_MIN
#define INT32_MIN (-2147483647-1)
#endif
#ifndef INT8_MAX
#define INT8_MAX (127)
#endif
#ifndef INT16_MAX
#define INT16_MAX (32767)
#endif
#ifndef INT32_MAX
#define INT32_MAX (2147483647)
#endif
#ifndef UINT8_MAX
#define UINT8_MAX (255U)
#endif
#ifndef UINT16_MAX
#define UINT16_MAX (65535U)
#endif
#ifndef UINT32_MAX
#define UINT32_MAX (4294967295U)
#endif
#endif /* ! C99 */
#endif /* ! FLEXINT_H */
/* TODO: this is always defined, so inline it */
#define yyconst const
#if defined(__GNUC__) && __GNUC__ >= 3
#define yynoreturn __attribute__((__noreturn__))
#else
#define yynoreturn
#endif
/* Returned upon end-of-file. */
#define YY_NULL 0
/* Promotes a possibly negative, possibly signed char to an unsigned
* integer for use as an array index. If the signed char is negative,
* we want to instead treat it as an 8-bit unsigned char, hence the
* double cast.
*/
#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c)
/* Enter a start condition. This macro really ought to take a parameter,
* but we do it the disgusting crufty way forced on us by the ()-less
* definition of BEGIN.
*/
#define BEGIN (yy_start) = 1 + 2 *
/* Translate the current start state into a value that can be later handed
* to BEGIN to return to the state. The YYSTATE alias is for lex
* compatibility.
*/
#define YY_START (((yy_start) - 1) / 2)
#define YYSTATE YY_START
/* Action number for EOF rule of a given start state. */
#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1)
/* Special action meaning "start processing a new file". */
#define YY_NEW_FILE yyrestart(yyin )
#define YY_END_OF_BUFFER_CHAR 0
/* Size of default input buffer. */
#ifndef YY_BUF_SIZE
#ifdef __ia64__
/* On IA-64, the buffer size is 16k, not 8k.
* Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case.
* Ditto for the __ia64__ case accordingly.
*/
#define YY_BUF_SIZE 32768
#else
#define YY_BUF_SIZE 16384
#endif /* __ia64__ */
#endif
/* The state buf must be large enough to hold one state per character in the main buffer.
*/
#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type))
#ifndef YY_TYPEDEF_YY_BUFFER_STATE
#define YY_TYPEDEF_YY_BUFFER_STATE
typedef struct yy_buffer_state *YY_BUFFER_STATE;
#endif
#ifndef YY_TYPEDEF_YY_SIZE_T
#define YY_TYPEDEF_YY_SIZE_T
typedef size_t yy_size_t;
#endif
extern int yyleng;
extern FILE *yyin, *yyout;
#define EOB_ACT_CONTINUE_SCAN 0
#define EOB_ACT_END_OF_FILE 1
#define EOB_ACT_LAST_MATCH 2
#define YY_LESS_LINENO(n)
#define YY_LINENO_REWIND_TO(ptr)
/* Return all but the first "n" matched characters back to the input stream. */
#define yyless(n) \
do \
{ \
/* Undo effects of setting up yytext. */ \
int yyless_macro_arg = (n); \
YY_LESS_LINENO(yyless_macro_arg);\
*yy_cp = (yy_hold_char); \
YY_RESTORE_YY_MORE_OFFSET \
(yy_c_buf_p) = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \
YY_DO_BEFORE_ACTION; /* set up yytext again */ \
} \
while ( 0 )
#define unput(c) yyunput( c, (yytext_ptr) )
#ifndef YY_STRUCT_YY_BUFFER_STATE
#define YY_STRUCT_YY_BUFFER_STATE
struct yy_buffer_state
{
FILE *yy_input_file;
char *yy_ch_buf; /* input buffer */
char *yy_buf_pos; /* current position in input buffer */
/* Size of input buffer in bytes, not including room for EOB
* characters.
*/
int yy_buf_size;
/* Number of characters read into yy_ch_buf, not including EOB
* characters.
*/
int yy_n_chars;
/* Whether we "own" the buffer - i.e., we know we created it,
* and can realloc() it to grow it, and should free() it to
* delete it.
*/
int yy_is_our_buffer;
/* Whether this is an "interactive" input source; if so, and
* if we're using stdio for input, then we want to use getc()
* instead of fread(), to make sure we stop fetching input after
* each newline.
*/
int yy_is_interactive;
/* Whether we're considered to be at the beginning of a line.
* If so, '^' rules will be active on the next match, otherwise
* not.
*/
int yy_at_bol;
int yy_bs_lineno; /**< The line count. */
int yy_bs_column; /**< The column count. */
/* Whether to try to fill the input buffer when we reach the
* end of it.
*/
int yy_fill_buffer;
int yy_buffer_status;
#define YY_BUFFER_NEW 0
#define YY_BUFFER_NORMAL 1
/* When an EOF's been seen but there's still some text to process
* then we mark the buffer as YY_EOF_PENDING, to indicate that we
* shouldn't try reading from the input source any more. We might
* still have a bunch of tokens to match, though, because of
* possible backing-up.
*
* When we actually see the EOF, we change the status to "new"
* (via yyrestart()), so that the user can continue scanning by
* just pointing yyin at a new input file.
*/
#define YY_BUFFER_EOF_PENDING 2
};
#endif /* !YY_STRUCT_YY_BUFFER_STATE */
/* Stack of input buffers. */
static size_t yy_buffer_stack_top = 0; /**< index of top of stack. */
static size_t yy_buffer_stack_max = 0; /**< capacity of stack. */
static YY_BUFFER_STATE * yy_buffer_stack = NULL; /**< Stack as an array. */
/* We provide macros for accessing buffer states in case in the
* future we want to put the buffer states in a more general
* "scanner state".
*
* Returns the top of the stack, or NULL.
*/
#define YY_CURRENT_BUFFER ( (yy_buffer_stack) \
? (yy_buffer_stack)[(yy_buffer_stack_top)] \
: NULL)
/* Same as previous macro, but useful when we know that the buffer stack is not
* NULL or when we need an lvalue. For internal use only.
*/
#define YY_CURRENT_BUFFER_LVALUE (yy_buffer_stack)[(yy_buffer_stack_top)]
/* yy_hold_char holds the character lost when yytext is formed. */
static char yy_hold_char;
static int yy_n_chars; /* number of characters read into yy_ch_buf */
int yyleng;
/* Points to current character in buffer. */
static char *yy_c_buf_p = NULL;
static int yy_init = 0; /* whether we need to initialize */
static int yy_start = 0; /* start state number */
/* Flag which is used to allow yywrap()'s to do buffer switches
* instead of setting up a fresh yyin. A bit of a hack ...
*/
static int yy_did_buffer_switch_on_eof;
void yyrestart (FILE *input_file );
void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer );
YY_BUFFER_STATE yy_create_buffer (FILE *file,int size );
void yy_delete_buffer (YY_BUFFER_STATE b );
void yy_flush_buffer (YY_BUFFER_STATE b );
void yypush_buffer_state (YY_BUFFER_STATE new_buffer );
void yypop_buffer_state (void );
static void yyensure_buffer_stack (void );
static void yy_load_buffer_state (void );
static void yy_init_buffer (YY_BUFFER_STATE b,FILE *file );
#define YY_FLUSH_BUFFER yy_flush_buffer(YY_CURRENT_BUFFER )
YY_BUFFER_STATE yy_scan_buffer (char *base,yy_size_t size );
YY_BUFFER_STATE yy_scan_string (yyconst char *yy_str );
YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,int len );
void *yyalloc (yy_size_t );
void *yyrealloc (void *,yy_size_t );
void yyfree (void * );
#define yy_new_buffer yy_create_buffer
#define yy_set_interactive(is_interactive) \
{ \
if ( ! YY_CURRENT_BUFFER ){ \
yyensure_buffer_stack (); \
YY_CURRENT_BUFFER_LVALUE = \
yy_create_buffer(yyin,YY_BUF_SIZE ); \
} \
YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \
}
#define yy_set_bol(at_bol) \
{ \
if ( ! YY_CURRENT_BUFFER ){\
yyensure_buffer_stack (); \
YY_CURRENT_BUFFER_LVALUE = \
yy_create_buffer(yyin,YY_BUF_SIZE ); \
} \
YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \
}
#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol)
/* Begin user sect3 */
#define yywrap() (/*CONSTCOND*/1)
#define YY_SKIP_YYWRAP
typedef unsigned char YY_CHAR;
FILE *yyin = NULL, *yyout = NULL;
typedef int yy_state_type;
extern int yylineno;
int yylineno = 1;
extern char *yytext;
#ifdef yytext_ptr
#undef yytext_ptr
#endif
#define yytext_ptr yytext
static yyconst flex_int16_t yy_nxt[][39] =
{
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0
},
{
13, 14, 14, 15, 16, 14, 14, 14, 14, 14,
14, 14, 17, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 18
},
{
13, 14, 14, 15, 16, 14, 14, 14, 14, 14,
14, 14, 17, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 18
},
{
13, 19, 20, 21, 22, 19, 19, 19, 23, 23,
24, 23, 25, 26, 27, 19, 23, 23, 23, 23,
23, 23, 23, 23, 19, 19, 23, 23, 23, 23,
23, 23, 23, 23, 28, 19, 23, 23, 23
},
{
13, 19, 20, 21, 22, 19, 19, 19, 23, 23,
24, 23, 25, 26, 27, 19, 23, 23, 23, 23,
23, 23, 23, 23, 19, 19, 23, 23, 23, 23,
23, 23, 23, 23, 28, 19, 23, 23, 23
},
{
13, 29, 30, 31, 32, 29, 33, 34, 29, 29,
29, 29, 19, 29, 19, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29, 29
},
{
13, 29, 30, 31, 32, 29, 33, 34, 29, 29,
29, 29, 19, 29, 19, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29, 29, 29,
29, 29, 29, 29, 29, 29, 29, 29, 29
},
{
13, 19, 35, 36, 37, 19, 38, 39, 40, 40,
19, 40, 19, 19, 41, 19, 40, 40, 40, 40,
40, 40, 40, 40, 19, 19, 40, 40, 40, 40,
40, 40, 40, 40, 42, 19, 40, 40, 40
},
{
13, 19, 35, 36, 37, 19, 38, 39, 40, 40,
19, 40, 19, 19, 41, 19, 40, 40, 40, 40,
40, 40, 40, 40, 19, 19, 40, 40, 40, 40,
40, 40, 40, 40, 42, 19, 40, 40, 40
},
{
13, 14, 14, 15, 16, 14, 14, 14, 14, 14,
14, 14, 17, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14
},
{
13, 14, 14, 15, 16, 14, 14, 14, 14, 14,
14, 14, 17, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
14, 14, 14, 14, 14, 14, 14, 14, 14
},
{
13, 43, 43, 44, 43, 43, 43, 43, 43, 43,
43, 43, 45, 43, 43, 43, 43, 43, 43, 43,
43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
43, 43, 43, 43, 43, 43, 43, 43, 43
},
{
13, 43, 43, 44, 43, 43, 43, 43, 43, 43,
43, 43, 45, 43, 43, 43, 43, 43, 43, 43,
43, 43, 43, 43, 43, 43, 43, 43, 43, 43,
43, 43, 43, 43, 43, 43, 43, 43, 43
},
{
-13, -13, -13, -13, -13, -13, -13, -13, -13, -13,
-13, -13, -13, -13, -13, -13, -13, -13, -13, -13,
-13, -13, -13, -13, -13, -13, -13, -13, -13, -13,
-13, -13, -13, -13, -13, -13, -13, -13, -13
},
{
13, 46, 46, -14, -14, 46, 46, 46, 46, 46,
46, 46, -14, 46, 46, 46, 46, 46, 46, 46,
46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
46, 46, 46, 46, 46, 46, 46, 46, 46
},
{
13, -15, -15, -15, -15, -15, -15, -15, -15, -15,
-15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
-15, -15, -15, -15, -15, -15, -15, -15, -15, -15,
-15, -15, -15, -15, -15, -15, -15, -15, -15
},
{
13, -16, -16, 47, -16, -16, -16, -16, -16, -16,
-16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
-16, -16, -16, -16, -16, -16, -16, -16, -16, -16,
-16, -16, -16, -16, -16, -16, -16, -16, -16
},
{
13, -17, -17, -17, -17, 48, -17, -17, 49, 49,
50, 49, -17, -17, -17, 51, 49, 49, 49, 49,
49, 49, 49, 49, -17, -17, 49, 49, 49, 49,
49, 49, 49, 49, 52, -17, 49, 49, 49
},
{
13, 46, 46, -18, -18, 46, 46, 46, 46, 46,
46, 46, -18, 46, 46, 46, 46, 46, 46, 46,
46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
46, 46, 46, 46, 46, 46, 53, 46, 46
},
{
13, -19, -19, -19, -19, -19, -19, -19, -19, -19,
-19, -19, -19, -19, -19, -19, -19, -19, -19, -19,
-19, -19, -19, -19, -19, -19, -19, -19, -19, -19,
-19, -19, -19, -19, -19, -19, -19, -19, -19
},
{
13, -20, 54, -20, -20, -20, -20, -20, -20, -20,
-20, -20, -20, -20, -20, -20, -20, -20, -20, -20,
-20, -20, -20, -20, -20, -20, -20, -20, -20, -20,
-20, -20, -20, -20, -20, -20, -20, -20, -20
},
{
13, -21, -21, -21, -21, -21, -21, -21, -21, -21,
-21, -21, -21, -21, -21, -21, -21, -21, -21, -21,
-21, -21, -21, -21, -21, -21, -21, -21, -21, -21,
-21, -21, -21, -21, -21, -21, -21, -21, -21
},
{
13, -22, -22, 55, -22, -22, -22, -22, -22, -22,
-22, -22, -22, -22, -22, -22, -22, -22, -22, -22,
-22, -22, -22, -22, -22, -22, -22, -22, -22, -22,
-22, -22, -22, -22, -22, -22, -22, -22, -22
},
{
13, -23, -23, -23, -23, -23, -23, -23, 56, 56,
-23, 56, -23, -23, -23, -23, 56, 56, 56, 56,
56, 56, 56, 56, -23, -23, 56, 56, 56, 56,
56, 56, 56, 56, -23, -23, 56, 56, 56
},
{
13, -24, -24, -24, -24, -24, -24, -24, -24, -24,
-24, -24, -24, -24, 57, -24, -24, -24, -24, -24,
-24, -24, -24, -24, -24, -24, -24, -24, -24, -24,
-24, -24, -24, -24, -24, -24, -24, -24, -24
},
{
13, -25, -25, -25, -25, -25, -25, -25, -25, -25,
-25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
-25, -25, -25, -25, -25, -25, -25, -25, -25, -25,
-25, -25, -25, -25, -25, -25, -25, -25, -25
},
{
13, -26, -26, -26, -26, -26, -26, -26, -26, -26,
-26, -26, -26, -26, -26, -26, -26, -26, -26, -26,
-26, -26, -26, -26, -26, -26, -26, -26, -26, -26,
-26, -26, -26, -26, -26, -26, -26, -26, -26
},
{
13, -27, -27, -27, -27, -27, -27, -27, -27, -27,
-27, -27, -27, -27, -27, -27, -27, -27, -27, -27,
-27, -27, -27, -27, -27, -27, -27, -27, -27, -27,
-27, -27, -27, -27, -27, -27, -27, -27, -27
},
{
13, 58, -28, -28, -28, 58, 58, 58, 58, 58,
58, 58, 58, 58, 58, 58, 58, 58, 58, 58,
58, 58, 58, 58, 58, 58, 58, 58, 58, 58,
58, 58, 58, 58, 58, 59, 58, 58, 58
},
{
13, 60, -29, -29, -29, 60, -29, -29, 60, 60,
60, 60, -29, 60, -29, 60, 60, 60, 60, 60,
60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
60, 60, 60, 60, 60, 60, 60, 60, 60
},
{
13, -30, 61, -30, -30, -30, -30, -30, -30, -30,
-30, -30, -30, -30, -30, -30, -30, -30, -30, -30,
-30, -30, -30, -30, -30, -30, -30, -30, -30, -30,
-30, -30, -30, -30, -30, -30, -30, -30, -30
},
{
13, -31, -31, -31, -31, -31, -31, -31, -31, -31,
-31, -31, -31, -31, -31, -31, -31, -31, -31, -31,
-31, -31, -31, -31, -31, -31, -31, -31, -31, -31,
-31, -31, -31, -31, -31, -31, -31, -31, -31
},
{
13, -32, -32, 62, -32, -32, -32, -32, -32, -32,
-32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
-32, -32, -32, -32, -32, -32, -32, -32, -32, -32,
-32, -32, -32, -32, -32, -32, -32, -32, -32
},
{
13, 63, 63, 63, 63, 63, 64, 63, 63, 63,
63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
63, 63, 63, 63, 63, 63, 63, 63, 63
},
{
13, 65, 65, 65, 65, 65, 65, 66, 65, 65,
65, 65, 65, 65, 65, 65, 65, 65, 65, 65,
65, 65, 65, 65, 65, 65, 65, 65, 65, 65,
65, 65, 65, 65, 65, 65, 65, 65, 65
},
{
13, -35, 67, -35, -35, -35, -35, -35, -35, -35,
-35, -35, -35, -35, -35, -35, -35, -35, -35, -35,
-35, -35, -35, -35, -35, -35, -35, -35, -35, -35,
-35, -35, -35, -35, -35, -35, -35, -35, -35
},
{
13, -36, -36, -36, -36, -36, -36, -36, -36, -36,
-36, -36, -36, -36, -36, -36, -36, -36, -36, -36,
-36, -36, -36, -36, -36, -36, -36, -36, -36, -36,
-36, -36, -36, -36, -36, -36, -36, -36, -36
},
{
13, -37, -37, 68, -37, -37, -37, -37, -37, -37,
-37, -37, -37, -37, -37, -37, -37, -37, -37, -37,
-37, -37, -37, -37, -37, -37, -37, -37, -37, -37,
-37, -37, -37, -37, -37, -37, -37, -37, -37
},
{
13, 69, 69, 69, 69, 69, 70, 69, 69, 69,
69, 69, 69, 69, 69, 69, 69, 69, 69, 69,
69, 69, 69, 69, 69, 69, 69, 69, 69, 69,
69, 69, 69, 69, 69, 69, 69, 69, 69
},
{
13, 71, 71, 71, 71, 71, 71, 72, 71, 71,
71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
71, 71, 71, 71, 71, 71, 71, 71, 71
},
{
13, -40, -40, -40, -40, -40, -40, -40, 73, 73,
-40, 73, -40, -40, -40, -40, 73, 73, 73, 73,
73, 73, 73, 73, -40, -40, 73, 73, 73, 73,
73, 73, 73, 73, -40, -40, 73, 73, 73
},
{
13, -41, -41, -41, -41, -41, -41, -41, -41, -41,
-41, -41, -41, -41, -41, -41, -41, -41, -41, -41,
-41, -41, -41, -41, -41, -41, -41, -41, -41, -41,
-41, -41, -41, -41, -41, -41, -41, -41, -41
},
{
13, 74, -42, -42, -42, 74, 74, 74, 74, 74,
74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
74, 74, 74, 74, 74, 75, 74, 74, 74
},
{
13, 76, 76, 76, 76, 76, 76, 76, 76, 76,
76, 76, 77, 76, 76, 76, 76, 76, 76, 76,
76, 76, 76, 76, 76, 76, 76, 76, 76, 76,
76, 76, 76, 76, 76, 76, 76, 76, 76
},
{
13, 76, 76, 76, 76, 76, 76, 76, 76, 76,
76, 76, 77, 76, 76, 76, 76, 76, 76, 76,
76, 76, 76, 76, 76, 76, 76, 76, 76, 76,
76, 76, 76, 76, 76, 76, 76, 76, 76
},
{
13, 76, 76, 76, 76, 76, 76, 76, 76, 76,
78, 76, 76, 76, 76, 76, 76, 76, 76, 76,
76, 76, 76, 76, 76, 76, 76, 76, 76, 76,
76, 76, 76, 76, 76, 76, 76, 76, 76
},
{
13, 46, 46, -46, -46, 46, 46, 46, 46, 46,
46, 46, -46, 46, 46, 46, 46, 46, 46, 46,
46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
46, 46, 46, 46, 46, 46, 46, 46, 46
},
{
13, -47, -47, -47, -47, -47, -47, -47, -47, -47,
-47, -47, -47, -47, -47, -47, -47, -47, -47, -47,
-47, -47, -47, -47, -47, -47, -47, -47, -47, -47,
-47, -47, -47, -47, -47, -47, -47, -47, -47
},
{
13, -48, -48, -48, -48, -48, -48, -48, 79, -48,
-48, -48, -48, -48, -48, -48, -48, -48, 80, -48,
-48, -48, -48, -48, 81, -48, -48, -48, 80, -48,
-48, -48, -48, -48, -48, -48, -48, -48, -48
},
{
13, -49, -49, -49, -49, -49, -49, -49, 49, 49,
-49, 49, -49, -49, -49, -49, 49, 49, 49, 49,
49, 49, 49, 49, -49, -49, 49, 49, 49, 49,
49, 49, 49, 49, -49, -49, 49, 49, 49
},
{
13, -50, -50, -50, -50, -50, -50, -50, 82, 82,
-50, 82, -50, -50, -50, -50, 82, 82, 82, 82,
82, 82, 82, 82, -50, -50, 82, 82, 82, 82,
82, 82, 82, 82, 83, -50, 82, 82, 82
},
{
13, 84, 84, 84, 84, 84, 84, 84, 84, 84,
84, 84, 84, 84, 85, 84, 84, 84, 84, 84,
84, 84, 84, 84, 84, 84, 84, 84, 84, 84,
84, 84, 84, 84, 84, 84, 84, 84, 84
},
{
13, 86, -52, -52, -52, 86, 86, 86, 86, 86,
86, 86, 86, 86, 86, 86, 86, 86, 86, 86,
86, 86, 86, 86, 86, 86, 86, 86, 86, 86,
86, 86, 86, 86, 86, 87, 86, 86, 86
},
{
13, 46, 46, -53, -53, 46, 46, 46, 46, 46,
46, 46, -53, 46, 46, 46, 46, 46, 46, 46,
46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
46, 46, 46, 46, 46, 46, 46, 88, 46
},
{
13, -54, 54, -54, -54, -54, -54, -54, -54, -54,
-54, -54, -54, -54, -54, -54, -54, -54, -54, -54,
-54, -54, -54, -54, -54, -54, -54, -54, -54, -54,
-54, -54, -54, -54, -54, -54, -54, -54, -54
},
{
13, -55, -55, -55, -55, -55, -55, -55, -55, -55,
-55, -55, -55, -55, -55, -55, -55, -55, -55, -55,
-55, -55, -55, -55, -55, -55, -55, -55, -55, -55,
-55, -55, -55, -55, -55, -55, -55, -55, -55
},
{
13, -56, -56, -56, -56, -56, -56, -56, 56, 56,
-56, 56, -56, -56, -56, -56, 56, 56, 56, 56,
56, 56, 56, 56, -56, -56, 56, 56, 56, 56,
56, 56, 56, 56, -56, -56, 56, 56, 56
},
{
13, -57, -57, -57, -57, -57, -57, -57, -57, -57,
-57, -57, -57, -57, -57, -57, -57, -57, -57, -57,
-57, -57, -57, -57, -57, -57, -57, -57, -57, -57,
-57, -57, -57, -57, -57, -57, -57, -57, -57
},
{
13, 58, -58, -58, -58, 58, 58, 58, 58, 58,
58, 58, 58, 58, 58, 58, 58, 58, 58, 58,
58, 58, 58, 58, 58, 58, 58, 58, 58, 58,
58, 58, 58, 58, 58, 59, 58, 58, 58
},
{
13, -59, -59, -59, -59, -59, -59, -59, 56, 56,
-59, 56, -59, -59, -59, -59, 56, 56, 56, 56,
56, 56, 56, 56, -59, -59, 56, 56, 56, 56,
56, 56, 56, 56, -59, -59, 56, 56, 56
},
{
13, 60, -60, -60, -60, 60, -60, -60, 60, 60,
60, 60, -60, 60, -60, 60, 60, 60, 60, 60,
60, 60, 60, 60, 60, 60, 60, 60, 60, 60,
60, 60, 60, 60, 60, 60, 60, 60, 60
},
{
13, -61, 61, -61, -61, -61, -61, -61, -61, -61,
-61, -61, -61, -61, -61, -61, -61, -61, -61, -61,
-61, -61, -61, -61, -61, -61, -61, -61, -61, -61,
-61, -61, -61, -61, -61, -61, -61, -61, -61
},
{
13, -62, -62, -62, -62, -62, -62, -62, -62, -62,
-62, -62, -62, -62, -62, -62, -62, -62, -62, -62,
-62, -62, -62, -62, -62, -62, -62, -62, -62, -62,
-62, -62, -62, -62, -62, -62, -62, -62, -62
},
{
13, 63, 63, 63, 63, 63, 64, 63, 63, 63,
63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
63, 63, 63, 63, 63, 63, 63, 63, 63, 63,
63, 63, 63, 63, 63, 63, 63, 63, 63
},
{
13, -64, -64, -64, -64, -64, -64, -64, -64, -64,
-64, -64, -64, -64, -64, -64, -64, -64, -64, -64,
-64, -64, -64, -64, -64, -64, -64, -64, -64, -64,
-64, -64, -64, -64, -64, -64, -64, -64, -64
},
{
13, 65, 65, 65, 65, 65, 65, 66, 65, 65,
65, 65, 65, 65, 65, 65, 65, 65, 65, 65,
65, 65, 65, 65, 65, 65, 65, 65, 65, 65,
65, 65, 65, 65, 65, 65, 65, 65, 65
},
{
13, -66, -66, -66, -66, -66, -66, -66, -66, -66,
-66, -66, -66, -66, -66, -66, -66, -66, -66, -66,
-66, -66, -66, -66, -66, -66, -66, -66, -66, -66,
-66, -66, -66, -66, -66, -66, -66, -66, -66
},
{
13, -67, 67, -67, -67, -67, -67, -67, -67, -67,
-67, -67, -67, -67, -67, -67, -67, -67, -67, -67,
-67, -67, -67, -67, -67, -67, -67, -67, -67, -67,
-67, -67, -67, -67, -67, -67, -67, -67, -67
},
{
13, -68, -68, -68, -68, -68, -68, -68, -68, -68,
-68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
-68, -68, -68, -68, -68, -68, -68, -68, -68, -68,
-68, -68, -68, -68, -68, -68, -68, -68, -68
},
{
13, 69, 69, 69, 69, 69, 70, 69, 69, 69,
69, 69, 69, 69, 69, 69, 69, 69, 69, 69,
69, 69, 69, 69, 69, 69, 69, 69, 69, 69,
69, 69, 69, 69, 69, 69, 69, 69, 69
},
{
13, -70, -70, -70, -70, -70, -70, -70, -70, -70,
-70, -70, -70, -70, -70, -70, -70, -70, -70, -70,
-70, -70, -70, -70, -70, -70, -70, -70, -70, -70,
-70, -70, -70, -70, -70, -70, -70, -70, -70
},
{
13, 71, 71, 71, 71, 71, 71, 72, 71, 71,
71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
71, 71, 71, 71, 71, 71, 71, 71, 71, 71,
71, 71, 71, 71, 71, 71, 71, 71, 71
},
{
13, -72, -72, -72, -72, -72, -72, -72, -72, -72,
-72, -72, -72, -72, -72, -72, -72, -72, -72, -72,
-72, -72, -72, -72, -72, -72, -72, -72, -72, -72,
-72, -72, -72, -72, -72, -72, -72, -72, -72
},
{
13, -73, -73, -73, -73, -73, -73, -73, 73, 73,
-73, 73, -73, -73, -73, -73, 73, 73, 73, 73,
73, 73, 73, 73, -73, -73, 73, 73, 73, 73,
73, 73, 73, 73, -73, -73, 73, 73, 73
},
{
13, 74, -74, -74, -74, 74, 74, 74, 74, 74,
74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
74, 74, 74, 74, 74, 74, 74, 74, 74, 74,
74, 74, 74, 74, 74, 75, 74, 74, 74
},
{
13, -75, -75, -75, -75, -75, -75, -75, 73, 73,
-75, 73, -75, -75, -75, -75, 73, 73, 73, 73,
73, 73, 73, 73, -75, -75, 73, 73, 73, 73,
73, 73, 73, 73, -75, -75, 73, 73, 73
},
{
13, 76, 76, 76, 76, 76, 76, 76, 76, 76,
76, 76, 77, 76, 76, 76, 76, 76, 76, 76,
76, 76, 76, 76, 76, 76, 76, 76, 76, 76,
76, 76, 76, 76, 76, 76, 76, 76, 76
},
{
13, 76, 76, 76, 76, 76, 76, 76, 76, 76,
89, 76, 76, 76, 76, 76, 76, 76, 76, 76,
76, 76, 76, 76, 76, 76, 76, 76, 76, 76,
76, 76, 76, 76, 76, 76, 76, 76, 76
},
{
13, 76, 76, 76, 76, 76, 76, 76, 90, 90,
76, 91, 76, 76, 76, 76, 91, 91, 91, 91,
91, 91, 91, 91, 76, 76, 90, 90, 90, 90,
90, 90, 90, 90, 92, 76, 91, 91, 91
},
{
13, -79, -79, -79, -79, -79, -79, -79, 93, -79,
-79, -79, -79, -79, -79, -79, -79, -79, -79, -79,
-79, -79, -79, -79, -79, -79, -79, -79, -79, -79,
-79, -79, -79, -79, -79, -79, -79, -79, -79
},
{
13, -80, -80, -80, -80, -80, -80, -80, -80, -80,
-80, -80, -80, -80, -80, -80, -80, -80, -80, -80,
94, -80, -80, -80, -80, -80, -80, -80, -80, -80,
94, -80, -80, -80, -80, -80, -80, -80, -80
},
{
13, -81, -81, -81, -81, -81, -81, -81, -81, -81,
-81, -81, -81, -81, -81, -81, -81, 95, -81, -81,
-81, -81, -81, -81, -81, -81, -81, 95, -81, -81,
-81, -81, -81, -81, -81, -81, -81, -81, -81
},
{
13, -82, -82, -82, -82, -82, -82, -82, 82, 82,
-82, 82, -82, -82, -82, -82, 82, 82, 82, 82,
82, 82, 82, 82, -82, -82, 82, 82, 82, 82,
82, 82, 82, 82, -82, -82, 82, 82, 82
},
{
13, 96, -83, -83, -83, 96, 96, 96, 96, 96,
96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
96, 96, 96, 96, 96, 97, 96, 96, 96
},
{
13, 84, 84, 84, 84, 84, 84, 84, 84, 84,
84, 84, 84, 84, 85, 84, 84, 84, 84, 84,
84, 84, 84, 84, 84, 84, 84, 84, 84, 84,
84, 84, 84, 84, 84, 84, 84, 84, 84
},
{
13, -85, -85, -85, -85, -85, -85, -85, -85, -85,
-85, -85, -85, -85, -85, -85, -85, -85, -85, -85,
-85, -85, -85, -85, -85, -85, -85, -85, -85, -85,
-85, -85, -85, -85, -85, -85, -85, -85, -85
},
{
13, 86, -86, -86, -86, 86, 86, 86, 86, 86,
86, 86, 86, 86, 86, 86, 86, 86, 86, 86,
86, 86, 86, 86, 86, 86, 86, 86, 86, 86,
86, 86, 86, 86, 86, 87, 86, 86, 86
},
{
13, -87, -87, -87, -87, -87, -87, -87, 49, 49,
-87, 49, -87, -87, -87, -87, 49, 49, 49, 49,
49, 49, 49, 49, -87, -87, 49, 49, 49, 49,
49, 49, 49, 49, -87, -87, 49, 49, 49
},
{
13, 46, 46, -88, -88, 46, 46, 46, 46, 46,
46, 46, -88, 46, 46, 46, 46, 46, 46, 46,
46, 46, 46, 46, 46, 46, 46, 46, 46, 46,
46, 46, 46, 46, 46, 46, 46, 46, 46
},
{
13, 76, 76, 76, 76, 76, 76, 76, -89, -89,
76, 76, 76, 76, 76, 76, 76, 76, 76, 76,
76, 76, 76, 76, 76, 76, -89, -89, -89, -89,
-89, -89, -89, -89, -89, 76, 76, 76, 76
},
{
13, -90, -90, -90, -90, -90, -90, -90, 90, 90,
-90, 90, -90, -90, -90, -90, 90, 90, 90, 90,
90, 90, 90, 90, -90, -90, 90, 90, 90, 90,
90, 90, 90, 90, -90, -90, 90, 90, 90
},
{
13, 76, 76, 76, 76, 76, 76, 76, 91, 91,
76, 91, 77, 76, 76, 76, 91, 91, 91, 91,
91, 91, 91, 91, 76, 76, 91, 91, 91, 91,
91, 91, 91, 91, 76, 76, 91, 91, 91
},
{
13, 98, -92, -92, -92, 98, 98, 98, 98, 98,
98, 98, 98, 98, 98, 98, 98, 98, 98, 98,
98, 98, 98, 98, 98, 98, 98, 98, 98, 98,
98, 98, 98, 98, 98, 99, 98, 98, 98
},
{
13, 100, 100, 100, 100, 100, 100, 100, 101, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100
},
{
13, -94, -94, -94, -94, -94, -94, -94, -94, -94,
-94, -94, -94, -94, -94, -94, -94, 102, -94, -94,
-94, -94, -94, -94, -94, -94, -94, 102, -94, -94,
-94, -94, -94, -94, -94, -94, -94, -94, -94
},
{
13, -95, -95, -95, -95, -95, -95, -95, -95, -95,
-95, -95, -95, -95, -95, -95, -95, -95, 103, -95,
-95, -95, -95, -95, -95, -95, -95, -95, 103, -95,
-95, -95, -95, -95, -95, -95, -95, -95, -95
},
{
13, 96, -96, -96, -96, 96, 96, 96, 96, 96,
96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
96, 96, 96, 96, 96, 96, 96, 96, 96, 96,
96, 96, 96, 96, 96, 97, 96, 96, 96
},
{
13, -97, -97, -97, -97, -97, -97, -97, 82, 82,
-97, 82, -97, -97, -97, -97, 82, 82, 82, 82,
82, 82, 82, 82, -97, -97, 82, 82, 82, 82,
82, 82, 82, 82, -97, -97, 82, 82, 82
},
{
13, 98, -98, -98, -98, 98, 98, 98, 98, 98,
98, 98, 98, 98, 98, 98, 98, 98, 98, 98,
98, 98, 98, 98, 98, 98, 98, 98, 98, 98,
98, 98, 98, 98, 98, 99, 98, 98, 98
},
{
13, -99, -99, -99, -99, -99, -99, -99, 90, 90,
-99, 90, -99, -99, -99, -99, 90, 90, 90, 90,
90, 90, 90, 90, -99, -99, 90, 90, 90, 90,
90, 90, 90, 90, -99, -99, 90, 90, 90
},
{
13, 100, 100, 100, 100, 100, 100, 100, 101, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100
},
{
13, 100, 100, 100, 100, 100, 100, 100, 104, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100
},
{
13, -102, -102, -102, -102, -102, -102, -102, -102, -102,
-102, -102, -102, -102, -102, -102, -102, -102, -102, -102,
-102, -102, 105, -102, -102, -102, -102, -102, -102, -102,
-102, -102, 105, -102, -102, -102, -102, -102, -102
},
{
13, -103, -103, -103, -103, -103, -103, -103, -103, -103,
-103, -103, -103, -103, -103, -103, 106, -103, -103, -103,
-103, -103, -103, -103, -103, -103, 106, -103, -103, -103,
-103, -103, -103, -103, -103, -103, -103, -103, -103
},
{
13, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 107, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100, 100,
100, 100, 100, 100, 100, 100, 100, 100, 100
},
{
13, -105, -105, -105, -105, -105, -105, -105, -105, -105,
-105, -105, -105, -105, -105, -105, -105, -105, -105, -105,
-105, -105, -105, 108, -105, -105, -105, -105, -105, -105,
-105, -105, -105, 108, -105, -105, -105, -105, -105
},
{
13, -106, -106, -106, -106, -106, -106, -106, -106, -106,
-106, -106, -106, -106, -106, -106, -106, -106, -106, -106,
-106, -106, 109, -106, -106, -106, -106, -106, -106, -106,
-106, -106, 109, -106, -106, -106, -106, -106, -106
},
{
13, -107, -107, -107, -107, -107, -107, -107, -107, -107,
-107, -107, -107, -107, -107, -107, -107, -107, -107, -107,
-107, -107, -107, -107, -107, -107, -107, -107, -107, -107,
-107, -107, -107, -107, -107, -107, -107, -107, -107
},
{
13, -108, -108, -108, -108, -108, -108, -108, -108, -108,
-108, -108, -108, -108, -108, -108, -108, -108, -108, -108,
-108, 110, -108, -108, -108, -108, -108, -108, -108, -108,
-108, 110, -108, -108, -108, -108, -108, -108, -108
},
{
13, -109, -109, -109, -109, -109, -109, -109, -109, -109,
-109, -109, -109, -109, -109, -109, 111, -109, -109, -109,
-109, -109, -109, -109, -109, -109, 111, -109, -109, -109,
-109, -109, -109, -109, -109, -109, -109, -109, -109
},
{
13, -110, -110, -110, -110, -110, -110, -110, -110, -110,
-110, -110, -110, -110, -110, -110, -110, -110, -110, 112,
-110, -110, -110, -110, -110, -110, -110, -110, -110, 112,
-110, -110, -110, -110, -110, -110, -110, -110, -110
},
{
13, -111, -111, -111, -111, -111, -111, -111, -111, -111,
-111, -111, -111, -111, -111, -111, -111, -111, -111, -111,
-111, -111, -111, -111, 113, -111, -111, -111, -111, -111,
-111, -111, -111, -111, -111, -111, -111, -111, -111
},
{
13, -112, 114, 114, 114, -112, -112, -112, -112, -112,
-112, -112, -112, -112, -112, -112, -112, -112, -112, -112,
-112, -112, -112, -112, -112, -112, -112, -112, -112, -112,
-112, -112, -112, -112, -112, -112, -112, -112, -112
},
{
13, 115, 115, 115, 115, 115, 115, 115, 115, 115,
115, 115, 115, 115, 115, 115, 115, 115, 115, 115,
115, 115, 115, 115, 115, 116, 115, 115, 115, 115,
115, 115, 115, 115, 115, 115, 115, 115, 115
},
{
13, -114, -114, -114, -114, -114, -114, -114, -114, -114,
-114, -114, -114, -114, -114, -114, -114, -114, -114, -114,
-114, -114, -114, -114, -114, -114, -114, -114, -114, -114,
-114, -114, -114, -114, -114, -114, -114, -114, -114
},
{
13, 115, 115, 115, 115, 115, 115, 115, 115, 115,
115, 115, 115, 115, 115, 115, 115, 115, 115, 115,
115, 115, 115, 115, 115, 116, 115, 115, 115, 115,
115, 115, 115, 115, 115, 115, 115, 115, 115
},
{
13, 115, 115, 115, 115, 115, 115, 115, 115, 115,
115, 115, 115, 115, 115, 115, 115, 115, 115, 115,
115, 115, 115, 115, 115, 117, 115, 115, 115, 115,
115, 115, 115, 115, 115, 115, 115, 115, 115
},
{
13, 115, 115, 115, 115, 115, 115, 115, 115, 115,
115, 115, 115, 115, 118, 115, 115, 115, 115, 115,
115, 115, 115, 115, 115, 115, 115, 115, 115, 115,
115, 115, 115, 115, 115, 115, 115, 115, 115
},
{
13, -118, -118, -118, -118, -118, -118, -118, -118, -118,
-118, -118, -118, -118, -118, -118, -118, -118, -118, -118,
-118, -118, -118, -118, -118, -118, -118, -118, -118, -118,
-118, -118, -118, -118, -118, -118, -118, -118, -118
},
} ;
static yy_state_type yy_get_previous_state (void );
static yy_state_type yy_try_NUL_trans (yy_state_type current_state );
static int yy_get_next_buffer (void );
static void yynoreturn yy_fatal_error (yyconst char* msg );
/* Done after the current pattern has been matched and before the
* corresponding action - sets up yytext.
*/
#define YY_DO_BEFORE_ACTION \
(yytext_ptr) = yy_bp; \
yyleng = (int) (yy_cp - yy_bp); \
(yy_hold_char) = *yy_cp; \
*yy_cp = '\0'; \
(yy_c_buf_p) = yy_cp;
#define YY_NUM_RULES 32
#define YY_END_OF_BUFFER 33
/* This struct is not used in this scanner,
but its presence is necessary. */
struct yy_trans_info
{
flex_int32_t yy_verify;
flex_int32_t yy_nxt;
};
static yyconst flex_int16_t yy_accept[119] =
{ 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29, 29, 33, 4, 6, 6, 10, 4, 31, 13,
14, 14, 11, 31, 17, 12, 15, 31, 20, 18,
19, 19, 31, 31, 24, 25, 25, 31, 31, 23,
28, 31, 29, 29, 31, 4, 6, 0, 2, 3,
0, 0, 4, 13, 14, 11, 16, 0, 0, 20,
18, 19, 0, 21, 0, 22, 24, 25, 0, 26,
0, 27, 23, 0, 0, 29, 0, 0, 0, 0,
0, 3, 0, 0, 9, 0, 0, 1, 0, 30,
29, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 7, 0, 0, 0,
0, 0, 0, 8, 0, 0, 0, 5
} ;
static yyconst YY_CHAR yy_ec[256] =
{ 0,
1, 1, 1, 1, 1, 1, 1, 1, 2, 3,
1, 2, 4, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 2, 5, 6, 1, 1, 1, 1, 7, 1,
1, 1, 1, 1, 8, 9, 10, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 9, 1, 12,
13, 14, 15, 1, 16, 11, 17, 18, 19, 11,
11, 11, 11, 11, 11, 11, 11, 11, 20, 21,
11, 11, 11, 22, 11, 11, 11, 11, 23, 11,
24, 1, 25, 1, 9, 1, 26, 9, 27, 28,
29, 9, 9, 9, 9, 9, 9, 9, 9, 9,
30, 31, 9, 9, 9, 32, 9, 9, 9, 9,
33, 9, 34, 1, 35, 1, 1, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 36, 11, 11, 11,
37, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11, 11, 11, 11, 38, 11,
11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
11, 11, 11, 11, 11
} ;
static yy_state_type yy_last_accepting_state;
static char *yy_last_accepting_cpos;
extern int yy_flex_debug;
int yy_flex_debug = 0;
/* The intent behind this definition is that it'll catch
* any uses of REJECT which flex missed.
*/
#define REJECT reject_used_but_not_detected
#define yymore() yymore_used_but_not_detected
#define YY_MORE_ADJ 0
#define YY_RESTORE_YY_MORE_OFFSET
char *yytext;
#line 1 "scan.l"
/* %option yylineno */
#line 5 "scan.l"
/*
* Copyright © 1994-2008 World Wide Web Consortium
* See http://www.w3.org/Consortium/Legal/copyright-software
*
* Author: Bert Bos
* Created: 1997
**/
#include "config.h"
#include
#if HAVE_STRING_H
# include
#elif HAVE_STRINGS_H
# include
#endif
#if !HAVE_STRDUP
# include "strdup.e"
#endif
#include
#include
#include
#include "export.h"
#include "types.e"
#include "heap.e"
#include "html.h"
#include "html.e"
#include "errexit.e"
EXPORT extern FILE *yyin;
string yyin_name = NULL;
string cur_cdata_element = NULL;
typedef struct _Stack {
YY_BUFFER_STATE buf;
FILE *f;
string name;
struct _Stack *next;
} *Stack;
static Stack stack = NULL;
/* set_yyin -- routine to set yyin and store its file name */
EXPORT void set_yyin(FILE *f, const conststring name)
{
yyin = f;
free(yyin_name);
yyin_name = newstring(name);
}
/* get_yyin_name -- return the name of the current input, if known */
EXPORT conststring get_yyin_name(void)
{
return yyin_name;
}
/* include_file -- stack current file and switch to another one */
EXPORT void include_file(FILE *f, const conststring name)
{
Stack h;
new(h);
h->buf = YY_CURRENT_BUFFER;
h->f = f;
h->name = yyin_name;
h->next = stack;
stack = h;
yyin_name = newstring(name);
yy_switch_to_buffer(yy_create_buffer(f,YY_BUF_SIZE));
}
/* pop_file -- back to previous input file */
static bool pop_file(void)
{
Stack h;
if (!stack) {
return false;
} else {
h = stack;
yy_delete_buffer(YY_CURRENT_BUFFER);
fclose(h->f);
free(yyin_name);
yyin_name = h->name;
yy_switch_to_buffer(h->buf);
stack = h->next;
dispose(h);
return true;
}
}
/* esc -- remove outer quotes, escape ", remove \n, return malloc'ed string */
static string esc(string s)
{
int i, j;
string u;
/* Find new length */
for (i = 0, j = 1; s[j] != s[0]; i++, j++) {
if (s[j] == '"' || s[j] == '<' || s[j] == '>') i+= 4;
}
/* Copy and expand */
u = malloc(i + 1);
if (!u) errexit("Out of memory\n");
for (i = 0, j = 1; s[j] != s[0]; i++, j++) {
if (s[j] == '"') {strcpy(u + i, """); i += 4;}
else if (s[j] == '<') {strcpy(u + i, "<"); i += 4;}
else if (s[j] == '>') {strcpy(u + i, ">"); i += 4;}
else if (s[j] == '\n') u[i] = ' '; /* \n */
else if (s[j] == '\r' && s[j+1] == '\n') {u[i] = ' '; j++;} /* \r\n */
else if (s[j] == '\r') {u[i] = ' ';} /* \r */
else u[i] = s[j];
}
u[i] = '\0';
return u;
}
#ifndef HAVE_STRNDUP
/* strndup -- allocate a string, copy n characters into it and add \0 */
static string strndup(const string s, size_t n)
{
string t = malloc(n + 1);
if (!t) errexit("Out of memory\n");
strncpy(t, s, n);
t[n] = '\0';
return t;
}
#else
# ifndef strndup
/* We know strndup() exists (HAVE_STRNDUP) and it is not defined as a
macro (!strndup), but older versions of string.h do not provide the
declaration, so let's declare it here to be sure. */
extern char *strndup(const char *s, size_t n);
# endif
#endif
/* lns -- count newlines */
static void lns(const string t)
{
string s = t;
while (*s) {
if (*s == '\n') lineno++;
else if (*s != '\r') ;
else if (*(s+1) == '\n') {lineno++; s++;}
else lineno++;
s++;
}
}
/* thing is rather too permissive, but it will accept
... */
#line 1485 "scan.c"
#define INITIAL 0
#define MARKUP 1
#define VALUE 2
#define DECL 3
#define INIT 4
#define CDATA 5
#ifndef YY_NO_UNISTD_H
/* Special case for "unistd.h", since it is non-ANSI. We include it way
* down here because we want the user's section 1 to have been scanned first.
* The user has a chance to override it with an option.
*/
#include
#endif
#ifndef YY_EXTRA_TYPE
#define YY_EXTRA_TYPE void *
#endif
static int yy_init_globals (void );
/* Accessor methods to globals.
These are made visible to non-reentrant scanners for convenience. */
int yylex_destroy (void );
int yyget_debug (void );
void yyset_debug (int debug_flag );
YY_EXTRA_TYPE yyget_extra (void );
void yyset_extra (YY_EXTRA_TYPE user_defined );
FILE *yyget_in (void );
void yyset_in (FILE * _in_str );
FILE *yyget_out (void );
void yyset_out (FILE * _out_str );
int yyget_leng (void );
char *yyget_text (void );
int yyget_lineno (void );
void yyset_lineno (int _line_number );
/* Macros after this point can all be overridden by user definitions in
* section 1.
*/
#ifndef YY_SKIP_YYWRAP
#ifdef __cplusplus
extern "C" int yywrap (void );
#else
extern int yywrap (void );
#endif
#endif
#ifndef YY_NO_UNPUT
#endif
#ifndef yytext_ptr
static void yy_flex_strncpy (char *,yyconst char *,int );
#endif
#ifdef YY_NEED_STRLEN
static int yy_flex_strlen (yyconst char * );
#endif
#ifndef YY_NO_INPUT
#ifdef __cplusplus
static int yyinput (void );
#else
static int input (void );
#endif
#endif
/* Amount of stuff to slurp up with each read. */
#ifndef YY_READ_BUF_SIZE
#ifdef __ia64__
/* On IA-64, the buffer size is 16k, not 8k */
#define YY_READ_BUF_SIZE 16384
#else
#define YY_READ_BUF_SIZE 8192
#endif /* __ia64__ */
#endif
/* Copy whatever the last rule matched to the standard output. */
#ifndef ECHO
/* This used to be an fputs(), but since the string might contain NUL's,
* we now use fwrite().
*/
#define ECHO do { if (fwrite( yytext, (size_t) yyleng, 1, yyout )) {} } while (0)
#endif
/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL,
* is returned in "result".
*/
#ifndef YY_INPUT
#define YY_INPUT(buf,result,max_size) \
if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \
{ \
int c = '*'; \
size_t n; \
for ( n = 0; n < max_size && \
(c = getc( yyin )) != EOF && c != '\n'; ++n ) \
buf[n] = (char) c; \
if ( c == '\n' ) \
buf[n++] = (char) c; \
if ( c == EOF && ferror( yyin ) ) \
YY_FATAL_ERROR( "input in flex scanner failed" ); \
result = n; \
} \
else \
{ \
errno=0; \
while ( (result = (int) fread(buf, 1, max_size, yyin))==0 && ferror(yyin)) \
{ \
if( errno != EINTR) \
{ \
YY_FATAL_ERROR( "input in flex scanner failed" ); \
break; \
} \
errno=0; \
clearerr(yyin); \
} \
}\
\
#endif
/* No semi-colon after return; correct usage is to write "yyterminate();" -
* we don't want an extra ';' after the "return" because that will cause
* some compilers to complain about unreachable statements.
*/
#ifndef yyterminate
#define yyterminate() return YY_NULL
#endif
/* Number of entries by which start-condition stack grows. */
#ifndef YY_START_STACK_INCR
#define YY_START_STACK_INCR 25
#endif
/* Report a fatal error. */
#ifndef YY_FATAL_ERROR
#define YY_FATAL_ERROR(msg) yy_fatal_error( msg )
#endif
/* end tables serialization structures and prototypes */
/* Default declaration of generated scanner - a define so the user can
* easily add parameters.
*/
#ifndef YY_DECL
#define YY_DECL_IS_OURS 1
extern int yylex (void);
#define YY_DECL int yylex (void)
#endif /* !YY_DECL */
/* Code executed at the beginning of each rule, after yytext and yyleng
* have been set up.
*/
#ifndef YY_USER_ACTION
#define YY_USER_ACTION
#endif
/* Code executed at the end of each rule. */
#ifndef YY_BREAK
#define YY_BREAK /*LINTED*/break;
#endif
#define YY_RULE_SETUP \
YY_USER_ACTION
/** The main scanner function which does all the work.
*/
YY_DECL
{
yy_state_type yy_current_state;
char *yy_cp, *yy_bp;
int yy_act;
if ( !(yy_init) )
{
(yy_init) = 1;
#ifdef YY_USER_INIT
YY_USER_INIT;
#endif
if ( ! (yy_start) )
(yy_start) = 1; /* first start state */
if ( ! yyin )
yyin = stdin;
if ( ! yyout )
yyout = stdout;
if ( ! YY_CURRENT_BUFFER ) {
yyensure_buffer_stack ();
YY_CURRENT_BUFFER_LVALUE =
yy_create_buffer(yyin,YY_BUF_SIZE );
}
yy_load_buffer_state( );
}
{
#line 177 "scan.l"
#line 1710 "scan.c"
while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */
{
yy_cp = (yy_c_buf_p);
/* Support of yytext. */
*yy_cp = (yy_hold_char);
/* yy_bp points to the position in yy_ch_buf of the start of
* the current run.
*/
yy_bp = yy_cp;
yy_current_state = (yy_start);
yy_match:
while ( (yy_current_state = yy_nxt[yy_current_state][ yy_ec[YY_SC_TO_UI(*yy_cp)] ]) > 0 )
{
if ( yy_accept[yy_current_state] )
{
(yy_last_accepting_state) = yy_current_state;
(yy_last_accepting_cpos) = yy_cp;
}
++yy_cp;
}
yy_current_state = -yy_current_state;
yy_find_action:
yy_act = yy_accept[yy_current_state];
YY_DO_BEFORE_ACTION;
do_action: /* This label is used only to access EOF actions. */
switch ( yy_act )
{ /* beginning of action switch */
case 0: /* must back up */
/* undo the effects of YY_DO_BEFORE_ACTION */
*yy_cp = (yy_hold_char);
yy_cp = (yy_last_accepting_cpos) + 1;
yy_current_state = (yy_last_accepting_state);
goto yy_find_action;
case 1:
YY_RULE_SETUP
#line 180 "scan.l"
{BEGIN(INIT); /* Byte Order Mark is ignored */}
YY_BREAK
case 2:
YY_RULE_SETUP
#line 182 "scan.l"
{BEGIN(MARKUP); yylval.s=strdup(yytext+1); return START;}
YY_BREAK
case 3:
YY_RULE_SETUP
#line 183 "scan.l"
{BEGIN(MARKUP); yylval.s=strdup(yytext+2); return END;}
YY_BREAK
case 4:
YY_RULE_SETUP
#line 184 "scan.l"
{yylval.s=strdup(yytext); return TEXT;}
YY_BREAK
case 5:
/* rule 5 can match eol */
YY_RULE_SETUP
#line 185 "scan.l"
{yylval.s=strdup(yytext); lns(yytext); return TEXT;}
YY_BREAK
case 6:
/* rule 6 can match eol */
YY_RULE_SETUP
#line 186 "scan.l"
{yylval.s=strdup(yytext); lineno++; return TEXT;}
YY_BREAK
case 7:
/* rule 7 can match eol */
YY_RULE_SETUP
#line 187 "scan.l"
{yylval.s=strndup(yytext+4,yyleng-7); lns(yytext); return COMMENT;}
YY_BREAK
case 8:
/* rule 8 can match eol */
YY_RULE_SETUP
#line 188 "scan.l"
{BEGIN(DECL); lns(yytext+9); return DOCTYPE;}
YY_BREAK
case 9:
/* rule 9 can match eol */
YY_RULE_SETUP
#line 189 "scan.l"
{yylval.s=strndup(yytext+2,yyleng-3); lns(yytext); return PROCINS;}
YY_BREAK
case 10:
YY_RULE_SETUP
#line 190 "scan.l"
{yylval.s=strdup("<"); return TEXT;}
YY_BREAK
case 11:
YY_RULE_SETUP
#line 192 "scan.l"
{yylval.s = strdup(yytext); return NAME;}
YY_BREAK
case 12:
YY_RULE_SETUP
#line 193 "scan.l"
{BEGIN(VALUE); return '=';}
YY_BREAK
case 13:
YY_RULE_SETUP
#line 194 "scan.l"
{; /* skip */}
YY_BREAK
case 14:
/* rule 14 can match eol */
YY_RULE_SETUP
#line 195 "scan.l"
{lineno++; /* skip */}
YY_BREAK
case 15:
YY_RULE_SETUP
#line 196 "scan.l"
{BEGIN(INIT); return '>';}
YY_BREAK
case 16:
YY_RULE_SETUP
#line 197 "scan.l"
{BEGIN(INIT); return EMPTYEND;}
YY_BREAK
case 17:
YY_RULE_SETUP
#line 198 "scan.l"
{BEGIN(INIT); yyless(0); return '>'; /* Implicit ">" */}
YY_BREAK
case 18:
YY_RULE_SETUP
#line 200 "scan.l"
{; /* skip */}
YY_BREAK
case 19:
/* rule 19 can match eol */
YY_RULE_SETUP
#line 201 "scan.l"
{lineno++; /* skip */}
YY_BREAK
case 20:
YY_RULE_SETUP
#line 202 "scan.l"
{BEGIN(MARKUP); yylval.s=strdup(yytext); return NAME;}
YY_BREAK
case 21:
/* rule 21 can match eol */
#line 204 "scan.l"
case 22:
/* rule 22 can match eol */
YY_RULE_SETUP
#line 204 "scan.l"
{BEGIN(MARKUP); yylval.s=esc(yytext); lns(yytext); return STRING;}
YY_BREAK
case 23:
YY_RULE_SETUP
#line 206 "scan.l"
{yylval.s = strdup(yytext); return NAME;}
YY_BREAK
case 24:
YY_RULE_SETUP
#line 207 "scan.l"
{; /* skip */}
YY_BREAK
case 25:
/* rule 25 can match eol */
YY_RULE_SETUP
#line 208 "scan.l"
{lineno++; /* skip */}
YY_BREAK
case 26:
/* rule 26 can match eol */
#line 210 "scan.l"
case 27:
/* rule 27 can match eol */
YY_RULE_SETUP
#line 210 "scan.l"
{lns(yytext); yylval.s = esc(yytext); return STRING;}
YY_BREAK
case 28:
YY_RULE_SETUP
#line 211 "scan.l"
{BEGIN(INIT); return '>';}
YY_BREAK
case 29:
/* rule 29 can match eol */
YY_RULE_SETUP
#line 213 "scan.l"
{lns(yytext); yylval.s = strdup(yytext); return TEXT;}
YY_BREAK
case 30:
YY_RULE_SETUP
#line 214 "scan.l"
{lns(yytext);
if (strcasecmp(yytext+2, cur_cdata_element) == 0) {
BEGIN(MARKUP);
yylval.s = strdup(yytext+2);
return END;
} else {
yylval.s = strdup(yytext);
return TEXT;
}
}
YY_BREAK
case 31:
YY_RULE_SETUP
#line 225 "scan.l"
{return *yytext; /* illegal char, in fact */}
YY_BREAK
case YY_STATE_EOF(INITIAL):
case YY_STATE_EOF(MARKUP):
case YY_STATE_EOF(VALUE):
case YY_STATE_EOF(DECL):
case YY_STATE_EOF(INIT):
case YY_STATE_EOF(CDATA):
#line 227 "scan.l"
{if (pop_file()) return ENDINCL; else yyterminate();}
YY_BREAK
case 32:
YY_RULE_SETUP
#line 229 "scan.l"
YY_FATAL_ERROR( "flex scanner jammed" );
YY_BREAK
#line 1940 "scan.c"
case YY_END_OF_BUFFER:
{
/* Amount of text matched not including the EOB char. */
int yy_amount_of_matched_text = (int) (yy_cp - (yytext_ptr)) - 1;
/* Undo the effects of YY_DO_BEFORE_ACTION. */
*yy_cp = (yy_hold_char);
YY_RESTORE_YY_MORE_OFFSET
if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW )
{
/* We're scanning a new file or input source. It's
* possible that this happened because the user
* just pointed yyin at a new source and called
* yylex(). If so, then we have to assure
* consistency between YY_CURRENT_BUFFER and our
* globals. Here is the right place to do so, because
* this is the first action (other than possibly a
* back-up) that will match for the new input source.
*/
(yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin;
YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL;
}
/* Note that here we test for yy_c_buf_p "<=" to the position
* of the first EOB in the buffer, since yy_c_buf_p will
* already have been incremented past the NUL character
* (since all states make transitions on EOB to the
* end-of-buffer state). Contrast this with the test
* in input().
*/
if ( (yy_c_buf_p) <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] )
{ /* This was really a NUL. */
yy_state_type yy_next_state;
(yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text;
yy_current_state = yy_get_previous_state( );
/* Okay, we're now positioned to make the NUL
* transition. We couldn't have
* yy_get_previous_state() go ahead and do it
* for us because it doesn't know how to deal
* with the possibility of jamming (and we don't
* want to build jamming into it because then it
* will run more slowly).
*/
yy_next_state = yy_try_NUL_trans( yy_current_state );
yy_bp = (yytext_ptr) + YY_MORE_ADJ;
if ( yy_next_state )
{
/* Consume the NUL. */
yy_cp = ++(yy_c_buf_p);
yy_current_state = yy_next_state;
goto yy_match;
}
else
{
yy_cp = (yy_c_buf_p);
goto yy_find_action;
}
}
else switch ( yy_get_next_buffer( ) )
{
case EOB_ACT_END_OF_FILE:
{
(yy_did_buffer_switch_on_eof) = 0;
if ( yywrap( ) )
{
/* Note: because we've taken care in
* yy_get_next_buffer() to have set up
* yytext, we can now set up
* yy_c_buf_p so that if some total
* hoser (like flex itself) wants to
* call the scanner after we return the
* YY_NULL, it'll still work - another
* YY_NULL will get returned.
*/
(yy_c_buf_p) = (yytext_ptr) + YY_MORE_ADJ;
yy_act = YY_STATE_EOF(YY_START);
goto do_action;
}
else
{
if ( ! (yy_did_buffer_switch_on_eof) )
YY_NEW_FILE;
}
break;
}
case EOB_ACT_CONTINUE_SCAN:
(yy_c_buf_p) =
(yytext_ptr) + yy_amount_of_matched_text;
yy_current_state = yy_get_previous_state( );
yy_cp = (yy_c_buf_p);
yy_bp = (yytext_ptr) + YY_MORE_ADJ;
goto yy_match;
case EOB_ACT_LAST_MATCH:
(yy_c_buf_p) =
&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)];
yy_current_state = yy_get_previous_state( );
yy_cp = (yy_c_buf_p);
yy_bp = (yytext_ptr) + YY_MORE_ADJ;
goto yy_find_action;
}
break;
}
default:
YY_FATAL_ERROR(
"fatal flex scanner internal error--no action found" );
} /* end of action switch */
} /* end of scanning one token */
} /* end of user's declarations */
} /* end of yylex */
/* yy_get_next_buffer - try to read in a new buffer
*
* Returns a code representing an action:
* EOB_ACT_LAST_MATCH -
* EOB_ACT_CONTINUE_SCAN - continue scanning from current position
* EOB_ACT_END_OF_FILE - end of file
*/
static int yy_get_next_buffer (void)
{
char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf;
char *source = (yytext_ptr);
int number_to_move, i;
int ret_val;
if ( (yy_c_buf_p) > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] )
YY_FATAL_ERROR(
"fatal flex scanner internal error--end of buffer missed" );
if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 )
{ /* Don't try to fill the buffer, so this is an EOF. */
if ( (yy_c_buf_p) - (yytext_ptr) - YY_MORE_ADJ == 1 )
{
/* We matched a single character, the EOB, so
* treat this as a final EOF.
*/
return EOB_ACT_END_OF_FILE;
}
else
{
/* We matched some text prior to the EOB, first
* process it.
*/
return EOB_ACT_LAST_MATCH;
}
}
/* Try to read more data. */
/* First move last chars to start of buffer. */
number_to_move = (int) ((yy_c_buf_p) - (yytext_ptr) - 1);
for ( i = 0; i < number_to_move; ++i )
*(dest++) = *(source++);
if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING )
/* don't do the read, it's not guaranteed to return an EOF,
* just force an EOF
*/
YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) = 0;
else
{
int num_to_read =
YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1;
while ( num_to_read <= 0 )
{ /* Not enough room in the buffer - grow it. */
/* just a shorter name for the current buffer */
YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE;
int yy_c_buf_p_offset =
(int) ((yy_c_buf_p) - b->yy_ch_buf);
if ( b->yy_is_our_buffer )
{
int new_size = b->yy_buf_size * 2;
if ( new_size <= 0 )
b->yy_buf_size += b->yy_buf_size / 8;
else
b->yy_buf_size *= 2;
b->yy_ch_buf = (char *)
/* Include room in for 2 EOB chars. */
yyrealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 );
}
else
/* Can't grow it, we don't own it. */
b->yy_ch_buf = NULL;
if ( ! b->yy_ch_buf )
YY_FATAL_ERROR(
"fatal error - scanner input buffer overflow" );
(yy_c_buf_p) = &b->yy_ch_buf[yy_c_buf_p_offset];
num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size -
number_to_move - 1;
}
if ( num_to_read > YY_READ_BUF_SIZE )
num_to_read = YY_READ_BUF_SIZE;
/* Read in more data. */
YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]),
(yy_n_chars), num_to_read );
YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
}
if ( (yy_n_chars) == 0 )
{
if ( number_to_move == YY_MORE_ADJ )
{
ret_val = EOB_ACT_END_OF_FILE;
yyrestart(yyin );
}
else
{
ret_val = EOB_ACT_LAST_MATCH;
YY_CURRENT_BUFFER_LVALUE->yy_buffer_status =
YY_BUFFER_EOF_PENDING;
}
}
else
ret_val = EOB_ACT_CONTINUE_SCAN;
if (((yy_n_chars) + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) {
/* Extend the array by 50%, plus the number we really need. */
int new_size = (yy_n_chars) + number_to_move + ((yy_n_chars) >> 1);
YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size );
if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf )
YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" );
}
(yy_n_chars) += number_to_move;
YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] = YY_END_OF_BUFFER_CHAR;
YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] = YY_END_OF_BUFFER_CHAR;
(yytext_ptr) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0];
return ret_val;
}
/* yy_get_previous_state - get the state just before the EOB char was reached */
static yy_state_type yy_get_previous_state (void)
{
yy_state_type yy_current_state;
char *yy_cp;
yy_current_state = (yy_start);
for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp )
{
yy_current_state = yy_nxt[yy_current_state][(*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1)];
if ( yy_accept[yy_current_state] )
{
(yy_last_accepting_state) = yy_current_state;
(yy_last_accepting_cpos) = yy_cp;
}
}
return yy_current_state;
}
/* yy_try_NUL_trans - try to make a transition on the NUL character
*
* synopsis
* next_state = yy_try_NUL_trans( current_state );
*/
static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state )
{
int yy_is_jam;
char *yy_cp = (yy_c_buf_p);
yy_current_state = yy_nxt[yy_current_state][1];
yy_is_jam = (yy_current_state <= 0);
if ( ! yy_is_jam )
{
if ( yy_accept[yy_current_state] )
{
(yy_last_accepting_state) = yy_current_state;
(yy_last_accepting_cpos) = yy_cp;
}
}
return yy_is_jam ? 0 : yy_current_state;
}
#ifndef YY_NO_UNPUT
#endif
#ifndef YY_NO_INPUT
#ifdef __cplusplus
static int yyinput (void)
#else
static int input (void)
#endif
{
int c;
*(yy_c_buf_p) = (yy_hold_char);
if ( *(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR )
{
/* yy_c_buf_p now points to the character we want to return.
* If this occurs *before* the EOB characters, then it's a
* valid NUL; if not, then we've hit the end of the buffer.
*/
if ( (yy_c_buf_p) < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] )
/* This was really a NUL. */
*(yy_c_buf_p) = '\0';
else
{ /* need more input */
int offset = (yy_c_buf_p) - (yytext_ptr);
++(yy_c_buf_p);
switch ( yy_get_next_buffer( ) )
{
case EOB_ACT_LAST_MATCH:
/* This happens because yy_g_n_b()
* sees that we've accumulated a
* token and flags that we need to
* try matching the token before
* proceeding. But for input(),
* there's no matching to consider.
* So convert the EOB_ACT_LAST_MATCH
* to EOB_ACT_END_OF_FILE.
*/
/* Reset buffer status. */
yyrestart(yyin );
/*FALLTHROUGH*/
case EOB_ACT_END_OF_FILE:
{
if ( yywrap( ) )
return 0;
if ( ! (yy_did_buffer_switch_on_eof) )
YY_NEW_FILE;
#ifdef __cplusplus
return yyinput();
#else
return input();
#endif
}
case EOB_ACT_CONTINUE_SCAN:
(yy_c_buf_p) = (yytext_ptr) + offset;
break;
}
}
}
c = *(unsigned char *) (yy_c_buf_p); /* cast for 8-bit char's */
*(yy_c_buf_p) = '\0'; /* preserve yytext */
(yy_hold_char) = *++(yy_c_buf_p);
return c;
}
#endif /* ifndef YY_NO_INPUT */
/** Immediately switch to a different input stream.
* @param input_file A readable stream.
*
* @note This function does not reset the start condition to @c INITIAL .
*/
void yyrestart (FILE * input_file )
{
if ( ! YY_CURRENT_BUFFER ){
yyensure_buffer_stack ();
YY_CURRENT_BUFFER_LVALUE =
yy_create_buffer(yyin,YY_BUF_SIZE );
}
yy_init_buffer(YY_CURRENT_BUFFER,input_file );
yy_load_buffer_state( );
}
/** Switch to a different input buffer.
* @param new_buffer The new input buffer.
*
*/
void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer )
{
/* TODO. We should be able to replace this entire function body
* with
* yypop_buffer_state();
* yypush_buffer_state(new_buffer);
*/
yyensure_buffer_stack ();
if ( YY_CURRENT_BUFFER == new_buffer )
return;
if ( YY_CURRENT_BUFFER )
{
/* Flush out information for old buffer. */
*(yy_c_buf_p) = (yy_hold_char);
YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
}
YY_CURRENT_BUFFER_LVALUE = new_buffer;
yy_load_buffer_state( );
/* We don't actually know whether we did this switch during
* EOF (yywrap()) processing, but the only time this flag
* is looked at is after yywrap() is called, so it's safe
* to go ahead and always set it.
*/
(yy_did_buffer_switch_on_eof) = 1;
}
static void yy_load_buffer_state (void)
{
(yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars;
(yytext_ptr) = (yy_c_buf_p) = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos;
yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file;
(yy_hold_char) = *(yy_c_buf_p);
}
/** Allocate and initialize an input buffer state.
* @param file A readable stream.
* @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE.
*
* @return the allocated buffer state.
*/
YY_BUFFER_STATE yy_create_buffer (FILE * file, int size )
{
YY_BUFFER_STATE b;
b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state ) );
if ( ! b )
YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
b->yy_buf_size = (yy_size_t)size;
/* yy_ch_buf has to be 2 characters longer than the size given because
* we need to put in 2 end-of-buffer characters.
*/
b->yy_ch_buf = (char *) yyalloc(b->yy_buf_size + 2 );
if ( ! b->yy_ch_buf )
YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" );
b->yy_is_our_buffer = 1;
yy_init_buffer(b,file );
return b;
}
/** Destroy the buffer.
* @param b a buffer created with yy_create_buffer()
*
*/
void yy_delete_buffer (YY_BUFFER_STATE b )
{
if ( ! b )
return;
if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */
YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0;
if ( b->yy_is_our_buffer )
yyfree((void *) b->yy_ch_buf );
yyfree((void *) b );
}
/* Initializes or reinitializes a buffer.
* This function is sometimes called more than once on the same buffer,
* such as during a yyrestart() or at EOF.
*/
static void yy_init_buffer (YY_BUFFER_STATE b, FILE * file )
{
int oerrno = errno;
yy_flush_buffer(b );
b->yy_input_file = file;
b->yy_fill_buffer = 1;
/* If b is the current buffer, then yy_init_buffer was _probably_
* called from yyrestart() or through yy_get_next_buffer.
* In that case, we don't want to reset the lineno or column.
*/
if (b != YY_CURRENT_BUFFER){
b->yy_bs_lineno = 1;
b->yy_bs_column = 0;
}
b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0;
errno = oerrno;
}
/** Discard all buffered characters. On the next scan, YY_INPUT will be called.
* @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER.
*
*/
void yy_flush_buffer (YY_BUFFER_STATE b )
{
if ( ! b )
return;
b->yy_n_chars = 0;
/* We always need two end-of-buffer characters. The first causes
* a transition to the end-of-buffer state. The second causes
* a jam in that state.
*/
b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR;
b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR;
b->yy_buf_pos = &b->yy_ch_buf[0];
b->yy_at_bol = 1;
b->yy_buffer_status = YY_BUFFER_NEW;
if ( b == YY_CURRENT_BUFFER )
yy_load_buffer_state( );
}
/** Pushes the new state onto the stack. The new state becomes
* the current state. This function will allocate the stack
* if necessary.
* @param new_buffer The new state.
*
*/
void yypush_buffer_state (YY_BUFFER_STATE new_buffer )
{
if (new_buffer == NULL)
return;
yyensure_buffer_stack();
/* This block is copied from yy_switch_to_buffer. */
if ( YY_CURRENT_BUFFER )
{
/* Flush out information for old buffer. */
*(yy_c_buf_p) = (yy_hold_char);
YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p);
YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars);
}
/* Only push if top exists. Otherwise, replace top. */
if (YY_CURRENT_BUFFER)
(yy_buffer_stack_top)++;
YY_CURRENT_BUFFER_LVALUE = new_buffer;
/* copied from yy_switch_to_buffer. */
yy_load_buffer_state( );
(yy_did_buffer_switch_on_eof) = 1;
}
/** Removes and deletes the top of the stack, if present.
* The next element becomes the new top.
*
*/
void yypop_buffer_state (void)
{
if (!YY_CURRENT_BUFFER)
return;
yy_delete_buffer(YY_CURRENT_BUFFER );
YY_CURRENT_BUFFER_LVALUE = NULL;
if ((yy_buffer_stack_top) > 0)
--(yy_buffer_stack_top);
if (YY_CURRENT_BUFFER) {
yy_load_buffer_state( );
(yy_did_buffer_switch_on_eof) = 1;
}
}
/* Allocates the stack if it does not exist.
* Guarantees space for at least one push.
*/
static void yyensure_buffer_stack (void)
{
int num_to_alloc;
if (!(yy_buffer_stack)) {
/* First allocation is just for 2 elements, since we don't know if this
* scanner will even need a stack. We use 2 instead of 1 to avoid an
* immediate realloc on the next call.
*/
num_to_alloc = 1; /* After all that talk, this was set to 1 anyways... */
(yy_buffer_stack) = (struct yy_buffer_state**)yyalloc
(num_to_alloc * sizeof(struct yy_buffer_state*)
);
if ( ! (yy_buffer_stack) )
YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" );
memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state*));
(yy_buffer_stack_max) = num_to_alloc;
(yy_buffer_stack_top) = 0;
return;
}
if ((yy_buffer_stack_top) >= ((yy_buffer_stack_max)) - 1){
/* Increase the buffer to prepare for a possible push. */
yy_size_t grow_size = 8 /* arbitrary grow size */;
num_to_alloc = (yy_buffer_stack_max) + grow_size;
(yy_buffer_stack) = (struct yy_buffer_state**)yyrealloc
((yy_buffer_stack),
num_to_alloc * sizeof(struct yy_buffer_state*)
);
if ( ! (yy_buffer_stack) )
YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" );
/* zero only the new slots.*/
memset((yy_buffer_stack) + (yy_buffer_stack_max), 0, grow_size * sizeof(struct yy_buffer_state*));
(yy_buffer_stack_max) = num_to_alloc;
}
}
/** Setup the input buffer state to scan directly from a user-specified character buffer.
* @param base the character buffer
* @param size the size in bytes of the character buffer
*
* @return the newly allocated buffer state object.
*/
YY_BUFFER_STATE yy_scan_buffer (char * base, yy_size_t size )
{
YY_BUFFER_STATE b;
if ( size < 2 ||
base[size-2] != YY_END_OF_BUFFER_CHAR ||
base[size-1] != YY_END_OF_BUFFER_CHAR )
/* They forgot to leave room for the EOB's. */
return NULL;
b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state ) );
if ( ! b )
YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" );
b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */
b->yy_buf_pos = b->yy_ch_buf = base;
b->yy_is_our_buffer = 0;
b->yy_input_file = NULL;
b->yy_n_chars = b->yy_buf_size;
b->yy_is_interactive = 0;
b->yy_at_bol = 1;
b->yy_fill_buffer = 0;
b->yy_buffer_status = YY_BUFFER_NEW;
yy_switch_to_buffer(b );
return b;
}
/** Setup the input buffer state to scan a string. The next call to yylex() will
* scan from a @e copy of @a str.
* @param yystr a NUL-terminated string to scan
*
* @return the newly allocated buffer state object.
* @note If you want to scan bytes that may contain NUL values, then use
* yy_scan_bytes() instead.
*/
YY_BUFFER_STATE yy_scan_string (yyconst char * yystr )
{
return yy_scan_bytes(yystr,(int) strlen(yystr) );
}
/** Setup the input buffer state to scan the given bytes. The next call to yylex() will
* scan from a @e copy of @a bytes.
* @param yybytes the byte buffer to scan
* @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes.
*
* @return the newly allocated buffer state object.
*/
YY_BUFFER_STATE yy_scan_bytes (yyconst char * yybytes, int _yybytes_len )
{
YY_BUFFER_STATE b;
char *buf;
yy_size_t n;
int i;
/* Get memory for full buffer, including space for trailing EOB's. */
n = (yy_size_t) (_yybytes_len + 2);
buf = (char *) yyalloc(n );
if ( ! buf )
YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" );
for ( i = 0; i < _yybytes_len; ++i )
buf[i] = yybytes[i];
buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR;
b = yy_scan_buffer(buf,n );
if ( ! b )
YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" );
/* It's okay to grow etc. this buffer, and we should throw it
* away when we're done.
*/
b->yy_is_our_buffer = 1;
return b;
}
#ifndef YY_EXIT_FAILURE
#define YY_EXIT_FAILURE 2
#endif
static void yynoreturn yy_fatal_error (yyconst char* msg )
{
(void) fprintf( stderr, "%s\n", msg );
exit( YY_EXIT_FAILURE );
}
/* Redefine yyless() so it works in section 3 code. */
#undef yyless
#define yyless(n) \
do \
{ \
/* Undo effects of setting up yytext. */ \
int yyless_macro_arg = (n); \
YY_LESS_LINENO(yyless_macro_arg);\
yytext[yyleng] = (yy_hold_char); \
(yy_c_buf_p) = yytext + yyless_macro_arg; \
(yy_hold_char) = *(yy_c_buf_p); \
*(yy_c_buf_p) = '\0'; \
yyleng = yyless_macro_arg; \
} \
while ( 0 )
/* Accessor methods (get/set functions) to struct members. */
/** Get the current line number.
*
*/
int yyget_lineno (void)
{
return yylineno;
}
/** Get the input stream.
*
*/
FILE *yyget_in (void)
{
return yyin;
}
/** Get the output stream.
*
*/
FILE *yyget_out (void)
{
return yyout;
}
/** Get the length of the current token.
*
*/
int yyget_leng (void)
{
return yyleng;
}
/** Get the current token.
*
*/
char *yyget_text (void)
{
return yytext;
}
/** Set the current line number.
* @param _line_number line number
*
*/
void yyset_lineno (int _line_number )
{
yylineno = _line_number;
}
/** Set the input stream. This does not discard the current
* input buffer.
* @param _in_str A readable stream.
*
* @see yy_switch_to_buffer
*/
void yyset_in (FILE * _in_str )
{
yyin = _in_str ;
}
void yyset_out (FILE * _out_str )
{
yyout = _out_str ;
}
int yyget_debug (void)
{
return yy_flex_debug;
}
void yyset_debug (int _bdebug )
{
yy_flex_debug = _bdebug ;
}
static int yy_init_globals (void)
{
/* Initialization is the same as for the non-reentrant scanner.
* This function is called from yylex_destroy(), so don't allocate here.
*/
(yy_buffer_stack) = NULL;
(yy_buffer_stack_top) = 0;
(yy_buffer_stack_max) = 0;
(yy_c_buf_p) = NULL;
(yy_init) = 0;
(yy_start) = 0;
/* Defined in main.c */
#ifdef YY_STDINIT
yyin = stdin;
yyout = stdout;
#else
yyin = NULL;
yyout = NULL;
#endif
/* For future reference: Set errno on error, since we are called by
* yylex_init()
*/
return 0;
}
/* yylex_destroy is for both reentrant and non-reentrant scanners. */
int yylex_destroy (void)
{
/* Pop the buffer stack, destroying each element. */
while(YY_CURRENT_BUFFER){
yy_delete_buffer(YY_CURRENT_BUFFER );
YY_CURRENT_BUFFER_LVALUE = NULL;
yypop_buffer_state();
}
/* Destroy the stack itself. */
yyfree((yy_buffer_stack) );
(yy_buffer_stack) = NULL;
/* Reset the globals. This is important in a non-reentrant scanner so the next time
* yylex() is called, initialization will occur. */
yy_init_globals( );
return 0;
}
/*
* Internal utility routines.
*/
#ifndef yytext_ptr
static void yy_flex_strncpy (char* s1, yyconst char * s2, int n )
{
int i;
for ( i = 0; i < n; ++i )
s1[i] = s2[i];
}
#endif
#ifdef YY_NEED_STRLEN
static int yy_flex_strlen (yyconst char * s )
{
int n;
for ( n = 0; s[n]; ++n )
;
return n;
}
#endif
void *yyalloc (yy_size_t size )
{
return malloc(size);
}
void *yyrealloc (void * ptr, yy_size_t size )
{
/* The cast to (char *) in the following accommodates both
* implementations that use char* generic pointers, and those
* that use void* generic pointers. It works with the latter
* because both ANSI C and C++ allow castless assignment from
* any pointer type to void*, and deal with argument conversions
* as though doing an assignment.
*/
return realloc(ptr, size);
}
void yyfree (void * ptr )
{
free( (char *) ptr ); /* see yyrealloc() for (char *) cast */
}
#define YYTABLES_NAME "yytables"
#line 229 "scan.l"
/* set_cdata_element -- set parsing rule for an element with CDATA content */
EXPORT void set_cdata_element(const conststring e)
{
dispose(cur_cdata_element);
cur_cdata_element = newstring(e);
BEGIN(CDATA);
}
/*
* Local variables:
* mode: indented-text
* End:
*/
html-xml-utils-7.6/connectsock.e 0000644 0001750 0001750 00000000560 13205770445 013676 0000000 0000000 extern u_short portbase ;
extern int connectsock(const char *host, const char *service, char *protocol);
extern int connectTCP(const char *host, const char *service);
extern int connectUDP(char *host, char *service);
extern int passivesock(char *service, char *protocol, int qlen);
extern int passiveTCP(char *service, int qlen);
extern int passiveUDP(char *service);
html-xml-utils-7.6/hxremove.c 0000645 0001750 0001750 00000013557 13206022221 013212 0000000 0000000 /*
* Copy input (well-formed XML) without any elements that match a given selector
*
* To do: Put common routines with hxselect in separate module.
*
* To do: Only look for a LANG attribute if the input is XHTML.
*
* Part of HTML-XML-utils, see:
* http://www.w3.org/Tools/HTML-XML-utils/
*
* Copyright © 2012-2017 World Wide Web Consortium
* See http://www.w3.org/Consortium/Legal/copyright-software
*
* Author: Bert Bos
* Created: 21 Oct 2012
*/
#include "config.h"
#include
#include
#include
#include
#include
#include
#ifdef HAVE_STRING_H
# include
#elif HAVE_STRINGS_H
# include
#endif
#include "types.e"
#include "tree.e"
#include "selector.e"
#include "heap.e"
#include "errexit.e"
#include "html.e"
#include "scan.e"
#include "selmatch.e"
static Selector selector; /* The selector to match */
/* print_tree -- print tree below t, omitting elements that match the selector */
static void print_tree(Tree t)
{
pairlist p;
if (!t) return;
switch (t->tp) {
case Element:
/* Print the element, unless it matches the selector */
if (!matches_sel(t, selector)) {
printf("<%s", t->name);
/* Print each attribute, unless it matches the selector */
for (p = t->attribs; p; p = p->next)
if (!selector->pseudoelts || selector->pseudoelts->type != AttrNode
|| !same(p->name, selector->pseudoelts->s)
|| !matches_sel(t, selector->context))
printf(" %s=\"%s\"", p->name, p->value);
if (!t->children) {
printf("/>");
} else {
printf(">");
print_tree(t->children);
printf("%s>", t->name);
}
}
break;
case Text:
printf("%s", t->text);
break;
case Comment:
printf("", t->text);
break;
case Declaration:
printf("name);
if (t->text && t->url) printf(" PUBLIC \"%s\" \"%s\"", t->text, t->url);
else if (t->text) printf(" PUBLIC \"%s\"", t->text);
else if (t->url) printf(" SYSTEM \"%s\"", t->url);
printf(">");
break;
case Procins:
printf("%s>", t->text);
break;
case Root:
print_tree(t->children);
break;
default: assert(!"Cannot happen!");
}
print_tree(t->sister);
}
/*********************** Parser callback API ***********************/
/* handle_error -- called when a parse error occurred */
static void handle_error(void *clientdata, const string s, int lineno)
{
fprintf(stderr, "%d: %s\n", lineno, s);
}
/* handle_start -- called before the first event is reported */
static void* handle_start(void)
{
Tree *tp;
new(tp);
*tp = create(); /* Create an empty tree */
return tp;
}
/* handle_end -- called after the last event is reported */
static void handle_end(void *clientdata)
{
Tree *t = (Tree*)clientdata;
print_tree(get_root(*t)); /* Print tree, filtering out unwanted elements */
tree_delete(*t);
}
/* handle_comment -- called after a comment is parsed */
static void handle_comment(void *clientdata, const string commenttext)
{
Tree *t = (Tree*)clientdata;
*t = append_comment(*t, commenttext);
}
/* handle_text -- called after a text chunk is parsed */
static void handle_text(void *clientdata, const string text)
{
Tree *t = (Tree*)clientdata;
*t = tree_append_text(*t, text);
}
/* handle_decl -- called after a declaration is parsed */
static void handle_decl(void *clientdata, const string gi, const string fpi,
const string url)
{
Tree *t = (Tree*)clientdata;
*t = append_declaration(*t, gi, fpi, url);
}
/* handle_pi -- called after a Processing Instruction is parsed */
static void handle_pi(void *clientdata, const string pi_text)
{
Tree *t = (Tree*)clientdata;
*t = append_procins(*t, pi_text);
}
/* handle_starttag -- called after a start tag is parsed */
static void handle_starttag(void *clientdata, const string name,
pairlist attribs)
{
Tree *t = (Tree*)clientdata;
*t = tree_push(*t, name, attribs);
}
/* handle_emptytag -- called after an empty tag is parsed */
static void handle_emptytag(void *clientdata, const string name,
pairlist attribs)
{
Tree *t = (Tree*)clientdata;
*t = tree_push(*t, name, attribs);
*t = tree_pop(*t, name);
}
/* handle_endtag -- called after an endtag is parsed (name may be "") */
static void handle_endtag(void *clientdata, const string name)
{
Tree *t = (Tree*)clientdata;
*t = tree_pop(*t, name);
}
/* handle_endincl -- called at the end of an included file */
/* void handle_endincl(void *clientdata) */
/********************* End of parser callbacks *********************/
/* usage -- print usage message and exit */
static void usage(const conststring progname)
{
errexit("Usage: %s [-v] [-l language] [-i] selector\n", progname);
}
int main(int argc, char *argv[])
{
string s;
int c;
/* Command line options */
while ((c = getopt(argc, argv, "il:v")) != -1) {
switch (c) {
case 'l': init_language(optarg); break;
case 'i': set_case_insensitive(); break;
case 'v': printf("Version: %s %s\n", PACKAGE, VERSION); return 0;
case '?': usage(argv[0]); break;
default: assert(!"Cannot happen");
}
}
/* Bind the parser callback routines to our handlers */
set_error_handler(handle_error);
set_start_handler(handle_start);
set_end_handler(handle_end);
set_comment_handler(handle_comment);
set_text_handler(handle_text);
set_decl_handler(handle_decl);
set_pi_handler(handle_pi);
set_starttag_handler(handle_starttag);
set_emptytag_handler(handle_emptytag);
set_endtag_handler(handle_endtag);
/* Parse the selector */
if (optind >= argc) usage(argv[0]); /* Need at least 1 arg */
for (s = newstring(argv[optind++]); optind < argc; optind++)
strapp(&s, " ", argv[optind], NULL);
selector = parse_selector(s, &s);
if (*s) errexit("Syntax error at \"%c\"\n", *s);
/* Parse the input, build a tree, filter the tree */
yyin = stdin;
if (yyparse() != 0) exit(3);
return 0;
}
html-xml-utils-7.6/selector.e 0000644 0001750 0001750 00000002063 13206054362 013177 0000000 0000000 typedef enum {
AttrNode,
RootSel, NthChild, NthOfType, FirstChild, FirstOfType, Lang,
NthLastChild, NthLastOfType, LastChild, LastOfType, OnlyChild,
OnlyOfType, Empty,
Not,
} PseudoType;
typedef struct _PseudoCond {
PseudoType type;
int a, b;
string s;
struct _SimpleSelector *sel;
struct _PseudoCond *next;
} PseudoCond;
typedef enum {
Exists, Equals, Includes, StartsWith, EndsWith, Contains, LangMatch,
HasClass, HasID
} Operator;
typedef struct _AttribCond {
Operator op;
string name;
string value;
struct _AttribCond *next;
} AttribCond;
typedef enum {
Descendant, Child, Adjacent, Sibling
} Combinator;
typedef struct _SimpleSelector {
string name;
AttribCond *attribs;
PseudoCond *pseudos;
PseudoCond *pseudoelts;
Combinator combinator;
struct _SimpleSelector *context;
struct _SimpleSelector *next;
} SimpleSelector, *Selector;
extern Selector parse_selector(const string selector, string *rest);
extern void dump_simple_selector(FILE *f, const SimpleSelector *s);
extern void dump_selector(FILE *f, const Selector s);
html-xml-utils-7.6/xml2asc.1 0000644 0001750 0001750 00000002232 12704011745 012642 0000000 0000000 .TH "XML2ASC" "1" "10 Jul 2011" "7.x" "HTML-XML-utils"
.de d \" begin display
.sp
.in +4
.nf
.ft CR
.CDS
..
.de e \" end display
.CDE
.in -4
.fi
.ft R
.sp
..
.SH NAME
xml2asc \- convert UTF-8 to nnn; entities
.SH SYNOPSIS
.B xml2asc
.SH DESCRIPTION
.LP
Reads an UTF-8 encoded text from standard input and writes to standard
output, converting all non-ASCII characters to nnn; entities, so
that the result is ASCII-encoded.
.LP
One example use is to convert ISO-8859-1 to ASCII with nnn;
entities, by first running
.B asc2xml
to convert ISO-8859-1 to UTF-8 and then pipe the result into
.B xml2asc
to convert to ASCII with nnn; entities for all accented characters.
.LP
To test if a file is correct UTF-8, ignore the output and test the
exit code, e.g. in Bash:
.d
xml2asc /dev/null && echo "OK" || echo "Fail"
.e
.SH "DIAGNOSTICS"
.B xml2asc
returns with a non-zero exit code if the input was not
UTF-8.
.SH "SEE ALSO"
.BR asc2xml (1),
.BR UTF-8 " (RFC 2279)"
.SH BUGS
.LP
Doesn't distinguish mark-up from content, so if the input uses
non-ASCII characters in XML element names, they will be output with
numerical entities in them, which is not legal in XML.
html-xml-utils-7.6/heap.c 0000644 0001750 0001750 00000004117 13205764472 012305 0000000 0000000 /*
* Some memory allocation routines, they call abort() in case of failure
*
* Part of HTML-XML-utils, see:
* http://www.w3.org/Tools/HTML-XML-utils/
*
* Copyright © 1994-2003 World Wide Web Consortium
* See http://www.w3.org/Consortium/Legal/copyright-software
*
* Author: Bert Bos
* Created: before 1995
**/
#include "config.h"
#include
#include
#if STDC_HEADERS
# include
#else
# ifndef HAVE_STRCHR
# define strchr index
# define strrchr rindex
# endif
#endif
#include "export.h"
#ifdef __export
#undef __FILE__ /* Don't expand while making the .e file */
#undef __LINE__ /* Don't expand while making the .e file */
#endif
#define fatal(msg) fatal3(msg, __FILE__, __LINE__)
#define new(p) if (((p)=malloc(sizeof(*(p))))); else fatal("out of memory")
#define dispose(p) if (!(p)) ; else (free((void*)p), (p) = (void*)0)
#define heapmax(p) 9999999 /* ? */
#define newstring(s) heap_newstring(s, __FILE__, __LINE__)
#define newnstring(s,n) heap_newnstring(s, n, __FILE__, __LINE__)
#define newarray(p,n) \
if (((p)=malloc((n)*sizeof(*(p))))); else fatal("out of memory")
#define renewarray(p,n) \
if (((p)=realloc(p,(n)*sizeof(*(p))))); else fatal("out of memory")
EXPORTDEF(fatal(msg))
EXPORTDEF(new(p))
EXPORTDEF(dispose(p))
EXPORTDEF(heapmax(p))
EXPORTDEF(newstring(s))
EXPORTDEF(newnstring(s,n))
EXPORTDEF(newarray(p,n))
EXPORTDEF(renewarray(p,n))
EXPORT void fatal3(const char *s, const char *file, const unsigned int line)
{
fprintf(stderr, "%s (file %s, line %d)\n", s, file, line);
abort();
}
EXPORT char * heap_newstring(const char *s, const char *file, const int line)
{
char *t;
if (!s) return NULL;
t = malloc((strlen(s) + 1) * sizeof(*t));
if (!t) fatal3("out of memory", file, line);
strcpy(t, s);
return t;
}
EXPORT char * heap_newnstring(const char *s, const size_t n,
const char *file, const int line)
{
char *t;
if (!s) return NULL;
t = malloc((n + 1) * sizeof(*t));
if (!t) fatal3("out of memory", file, line);
strncpy(t, s, n);
t[n] = '\0';
return t;
}
html-xml-utils-7.6/hxxmlns.c 0000645 0001750 0001750 00000016311 13205765541 013067 0000000 0000000 /*
* hxxmlns - expand XML Namespace prefixes
*
* Expand all element and attribute names to "global names" by
* expanding the prefix. All names will be printed as "{URL}name".
* Attribute names without a prefix will have an empty namespace part:
* "{}name".
*
* Copyright © 1994-2000 World Wide Web Consortium
* See http://www.w3.org/Consortium/Legal/copyright-software
*
* Author: Bert Bos
* Created: 22 Mar 2000
* Version: $Id: hxxmlns.c,v 1.8 2017/11/24 09:50:25 bbos Exp $
*
**/
#include "config.h"
#include
#ifdef HAVE_UNISTD_H
# include
#endif
#include
#if STDC_HEADERS
# include
#else
# ifndef HAVE_STRCHR
# define strchr index
# define strrchr rindex
# endif
#endif
#include
#include
#include
#include "export.h"
#include "types.e"
#include "heap.e"
#include "html.e"
#include "scan.e"
#include "dict.e"
#include "openurl.e"
#include "errexit.e"
extern int yylineno; /* From scan.l */
/* The symbol table is a chain of prefix/uri pairs. Every time an
* element starts, the prefixes defined by it are added at the end. To
* expand a prefix, the most recently added prefix/uri pair is used.
* When en element ends, the chain is reduced to what it was when the
* element started. The stack keeps track of where the chain ended at
* the start of the element.
*
* ToDo: should we hash the prefixes? or is linear search good enough?
**/
typedef struct _Symbol {
string prefix;
string uri;
struct _Symbol *next;
} Symbol, *SymbolTable;
typedef struct _StackElt {
Symbol *frame;
struct _StackElt *next;
} *Stack;
static Symbol xml = {"xml", "http://www.w3.org/XML/1998/namespace", NULL};
static bool has_error = false;
static SymbolTable symtable = &xml;
static Stack stack = NULL;
static bool do_decls = true; /* Print decl, comment, PI? */
/* print_globalname -- print a name with expanded prefix */
static void print_globalname(string name, bool use_default)
{
string h, prefix, local;
Symbol *s;
/* Split the name */
h = strchr(name, ':');
if (!h && !use_default) { /* No prefix & no default ns */
printf("%s", name);
return;
}
if (h) {
*h = '\0';
prefix = name;
local = h + 1;
} else {
prefix = "";
local = name;
}
/* Find the prefix in the symbol table */
for (s = symtable; s && !eq(prefix, s->prefix); s = s->next) ;
if (!s && !eq(prefix, "")) {
fprintf(stderr, "%d: prefix \"%s\" not defined\n", yylineno, prefix);
has_error = true;
/* To do: do we report anything if the default prefix is undefined? */
}
/* ToDo: check that any '}' in uri is escaped */
printf("{%s}%s", s ? s->uri : (string)"", local);
}
/* do_tag -- print a start or empty tag expanded */
static void do_tag(string name, pairlist attribs, bool empty)
{
Stack h;
pairlist p;
Symbol *sym;
/* Mark the current end of the symbol table */
new(h);
h->next = stack;
h->frame = symtable;
stack = h;
/* Scan the attributes for namespace definitions and store them */
for (p = attribs; p; p = p->next) {
if (strncmp(p->name, "xmlns", 5) == 0) {
new(sym);
sym->prefix = newstring(p->name + (p->name[5] ? 6 : 5));
sym->uri = newstring(p->value);
sym->next = symtable;
symtable = sym;
}
}
/* Print the tag with prefixes expanded */
putchar('<');
print_globalname(name, true);
for (p = attribs; p; p = p->next) {
if (strncmp(p->name, "xmlns", 5) != 0) {
putchar(' ');
print_globalname(p->name, false);
printf("=\"%s\"", p->value);
}
}
printf(empty ? "/>" : ">");
}
/* pop_symboltable -- unwind the symbol table to previous mark */
static void pop_symboltable(string name)
{
Symbol *h;
Stack p;
if (!stack) {
if (! has_error) fprintf(stderr, "%d: too many end tags\n", yylineno);
has_error = true;
return;
}
/* Remove entries from symbol table chain until last mark */
while (symtable != stack->frame) {
h = symtable;
symtable = symtable->next;
dispose(h->prefix);
dispose(h->uri);
dispose(h);
}
/* Pop stack itself */
p = stack;
stack = stack->next;
dispose(p);
}
/* handle_error -- called when a parse error occurred */
void handle_error(void *clientdata, const string s, int lineno)
{
fprintf(stderr, "%d: %s\n", lineno, s);
has_error = true;
}
/* start -- called before the first event is reported */
void* start(void)
{
return NULL;
}
/* end -- called after the last event is reported */
void end(void *clientdata)
{
/* skip */
}
/* handle_comment -- called after a comment is parsed */
void handle_comment(void *clientdata, string commenttext)
{
if (do_decls) printf("", commenttext);
free(commenttext);
}
/* handle_text -- called after a text chunk is parsed */
void handle_text(void *clientdata, string text)
{
printf("%s", text);
free(text);
}
/* handle_decl -- called after a declaration is parsed */
void handle_decl(void *clientdata, string gi, string fpi, string url)
{
if (do_decls) {
printf("", fpi);
if (url) printf(" %s\"%s\">", fpi ? "" : "SYSTEM ", url);
printf(">");
}
free(gi);
if (fpi) free(fpi);
if (url) free(url);
}
/* handle_pi -- called after a PI is parsed */
void handle_pi(void *clientdata, string pi_text)
{
if (do_decls) printf("%s>", pi_text);
free(pi_text);
}
/* handle_starttag -- called after a start tag is parsed */
void handle_starttag(void *clientdata, string name, pairlist attribs)
{
do_tag(name, attribs, false);
free(name);
pairlist_delete(attribs);
}
/* handle_emptytag -- called after an empty tag is parsed */
void handle_emptytag(void *clientdata, string name, pairlist attribs)
{
do_tag(name, attribs, true);
pop_symboltable(name);
free(name);
pairlist_delete(attribs);
}
/* handle_endtag -- called after an endtag is parsed (name may be "") */
void handle_endtag(void *clientdata, string name)
{
/* Printf the end tag */
printf("");
print_globalname(name, true);
putchar('>');
/* Unwind the symbol table */
pop_symboltable(name);
free(name);
}
/* usage -- print usage message and exit */
static void usage(string prog)
{
fprintf(stderr, "Version %s\nUsage: %s [-d] [xml-file-or-url]\n", VERSION, prog);
exit(2);
}
int main(int argc, char *argv[])
{
int i, status = 200;
/* Bind the parser callback routines to our handlers */
set_error_handler(handle_error);
set_start_handler(start);
set_end_handler(end);
set_comment_handler(handle_comment);
set_text_handler(handle_text);
set_decl_handler(handle_decl);
set_pi_handler(handle_pi);
set_starttag_handler(handle_starttag);
set_emptytag_handler(handle_emptytag);
set_endtag_handler(handle_endtag);
/* Parse command line arguments */
for (i = 1; i < argc && argv[i][0] == '-' && !eq(argv[i], "--"); i++) {
switch (argv[i][1]) {
case 'd': do_decls = false; break;
default: usage(argv[0]);
}
}
if (i < argc && eq(argv[i], "--")) i++;
if (i == argc) yyin = stdin;
else if (i == argc - 1) yyin = fopenurl(argv[i], "r", &status);
else usage(argv[0]);
if (yyin == NULL) {perror(argv[i]); exit(1);}
if (status != 200) errexit("%s : %s\n", argv[i], http_strerror(status));
if (yyparse() != 0) exit(3);
return has_error ? 1 : 0;
}
html-xml-utils-7.6/hxtoc.1 0000644 0001750 0001750 00000010260 12704011745 012416 0000000 0000000 .de d \" begin display
.sp
.in +4
.nf
..
.de e \" end display
.in -4
.fi
.sp
..
.TH "HXTOC" "1" "10 Jul 2011" "7.x" "HTML-XML-utils"
.SH NAME
hxtoc \- insert a table of contents in an HTML file
.SH SYNOPSIS
.B hxtoc
.RB "[\| " \-x " \|]"
.RB "[\| " \-l
.IR low " \|]"
.RB "[\| " \-h
.IR high " \|]"
.RB "[\| " \-t " \|]"
.RB "[\| " \-d " \|]"
.RB "[\| " \-c
.IR class " \|]"
.RB "[\| " \-f " \|]"
.RI "[\| " file-or-URL " \|]"
.SH DESCRIPTION
.LP
The
.B hxtoc
command reads an HTML file, inserts missing ID attributes in all H1 to
H6 elements between the levels
.B \-l
and
.B \-h
(unless the option
.B \-d
is in effect, see below) and also inserts A elements with NAME
attributes, so old browsers will recognize the H1 to H6 headers as
target anchors as well (unless the option
.B \-t
is in effect). The output is written to stdout.
.LP
If there is a comment of the form
.d
.e
or a pair of comments
.d
\&...
.e
then the comment, or the pair with everything in between, will be
replaced by a table of contents, consisting of a list (UL) of links to
all headers in the document.
.LP
The text of headers is copied to this table of contents, including any
inline markup, except that ID attributes, DFN tags and SPAN tags with
a CLASS of "index" are omitted (but the elements' content is copied).
.LP
The copied text can optionally be "flattened" first, see option
.BR \-f .
.LP
If a header has a CLASS attribute with as value (or one of its values)
the keyword "no-toc", then that header will not appear in the table
of contents.
.SH OPTIONS
The following options are supported:
.TP 10
.B \-x
Use XML conventions: empty elements are written with a slash at the
end:
.TP
.BI \-l " low"
Sets the lowest numbered header to appear in the table of
content. Default is 1 (i.e., H1).
.TP
.BI \-h " high"
Sets the highest numbered header to appear in the table of
content. Default is 6 (i.e., H6).
.TP
.B \-t
Normally,
.B hxtoc
adds both ID attributes and empty A elements with a NAME attribute and
CLASS="bctarget", so that older browsers that do no understand ID will
still find the target. With this option, the A elements will not be
generated.
.TP
.BI \-c " class"
The generated UL elements in the table of contents will have a CLASS attribute with the value
.I class.
The default is "toc".
.TP
.B \-d
Tries to use sectioning elements as targets in the table of contents
instead of H1 to H6. A sectioning elements is a DIV, SECTION, ARTICLE,
ASIDE or NAV element whose first child is a heading element (H1 to H6)
or an HGROUP. The sectioning element will be given an ID if it doesn't
have one yet. With this option, the level of any H1 to H6 that is the
first child of a sectioning element (or of an HGROUP that is itself
the first child of a sectioning element) is not determined by its
name, but by the nesting depth of the sectioning elements. (Any H1 to
H6 that are not the first child of a sectioning element still have
their level implied by their name.)
.TP
.B \-f
Flatten the text of the table of contents. Without
.BR \-f ,
the contents of header elements are copied to the table of contents
almost unchanged, i.e., including any child elements and their
attributes (except for ID attributes, DFN elements and certain SPAN
elements, as explained above). With
.BR \-f ,
the contents are flattened instead: All child elements are removed and
only their contents are copied to the table of contents. Additionally
elements with an ALT attribute, such as IMG, are replaced by the
contents of the ALT attribute. Exception: BDO tags are copied
unchanged and elements with a DIR attribute are replaced by a SPAN
with that DIR attribute. (BDO and DIR may occur in languages written
right-to-left.)
.SH OPERANDS
The following operand is supported:
.TP 10
.I file-or-URL
The name or URL of an HTML file. If absent, standard input is read instead.
.SH "DIAGNOSTICS"
The following exit values are returned:
.TP 10
.B 0
Successful completion.
.TP
.B > 0
An error occurred in the parsing of the HTML file.
.B hxtoc
will try to correct the error and produce output anyway.
.SH "SEE ALSO"
.BR asc2xml (1),
.BR hxnormalize (1),
.BR hxnum (1),
.BR xml2asc (1)
.SH BUGS
.LP
The error recovery for incorrect HTML is primitive.
html-xml-utils-7.6/hxclean.c 0000645 0001750 0001750 00000005355 13205765541 013016 0000000 0000000 /*
* Clean up an HTML file:
* Insert missing tags.
*
* Copyright © 1994-2000 World Wide Web Consortium
* See http://www.w3.org/Consortium/Legal/copyright-software
*
* 16 September 1997
* Bert Bos
* $Id: hxclean.c,v 1.4 2017/11/24 09:50:25 bbos Exp $
*/
#include "config.h"
#include
#include
#include "export.h"
#include "types.e"
#include "tree.e"
#include "html.e"
#include "scan.e"
static Tree tree;
/* handle_error -- called when a parse error occurred */
void handle_error(void *clientdata, const string s, int lineno)
{
fprintf(stderr, "%d: %s\n", lineno, s);
}
/* start -- called before the first event is reported */
void* start(void)
{
tree = create();
return NULL;
}
/* end -- called after the last even is reported */
void end(void *clientdata)
{
/* skip */
}
/* handle_comment -- called after a comment is parsed */
void handle_comment(void *clientdata, string commenttext)
{
tree = append_comment(tree, commenttext);
}
/* handle_text -- called after a tex chunk is parsed */
void handle_text(void *clientdata, string text)
{
tree = append_text(tree, text);
}
/* handle_decl -- called after a declaration is parsed */
void handle_decl(void *clientdata, string gi,
string fpi, string url)
{
tree = append_declaration(tree, gi, fpi, url);
}
/* handle_pi -- called after a PI is parsed */
void handle_pi(void *clientdata, string pi_text)
{
tree = append_procins(tree, pi_text);
}
/* handle_starttag -- called after a start tag is parsed */
void handle_starttag(void *clientdata, string name, pairlist attribs)
{
tree = html_push(tree, name, attribs);
}
/* handle_emptytag -- called after an empty tag is parsed */
void handle_emptytag(void *clientdata, string name, pairlist attribs)
{
tree = html_push(tree, name, attribs);
}
/* handle_pop -- called after an endtag is parsed (name may be "") */
void handle_endtag(void *clientdata, string name)
{
tree = html_pop(tree, name);
}
int main(int argc, char *argv[])
{
/* Bind the parser callback routines to our handlers */
set_error_handler(handle_error);
set_start_handler(start);
set_end_handler(end);
set_comment_handler(handle_comment);
set_text_handler(handle_text);
set_decl_handler(handle_decl);
set_pi_handler(handle_pi);
set_starttag_handler(handle_starttag);
set_emptytag_handler(handle_emptytag);
set_endtag_handler(handle_endtag);
if (argc == 1) {
yyin = stdin;
} else if (argc == 2) {
yyin = fopen(argv[1], "r");
if (yyin == NULL) {
perror(argv[1]);
exit(2);
}
} else {
fprintf(stderr, "Version %s\n", VERSION);
fprintf(stderr, "Usage: %s [html-file]\n", argv[0]);
exit(1);
}
if (yyparse() != 0) {
exit(3);
}
tree = get_root(tree);
dumptree(tree, stdout);
return 0;
}
html-xml-utils-7.6/test-driver 0000755 0001750 0001750 00000010277 12517753264 013430 0000000 0000000 #! /bin/sh
# test-driver - basic testsuite driver script.
scriptversion=2013-07-13.22; # UTC
# Copyright (C) 2011-2013 Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2, or (at your option)
# any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see .
# As a special exception to the GNU General Public License, if you
# distribute this file as part of a program that contains a
# configuration script generated by Autoconf, you may include it under
# the same distribution terms that you use for the rest of that program.
# This file is maintained in Automake, please report
# bugs to or send patches to
# .
# Make unconditional expansion of undefined variables an error. This
# helps a lot in preventing typo-related bugs.
set -u
usage_error ()
{
echo "$0: $*" >&2
print_usage >&2
exit 2
}
print_usage ()
{
cat <$log_file 2>&1
estatus=$?
if test $enable_hard_errors = no && test $estatus -eq 99; then
estatus=1
fi
case $estatus:$expect_failure in
0:yes) col=$red res=XPASS recheck=yes gcopy=yes;;
0:*) col=$grn res=PASS recheck=no gcopy=no;;
77:*) col=$blu res=SKIP recheck=no gcopy=yes;;
99:*) col=$mgn res=ERROR recheck=yes gcopy=yes;;
*:yes) col=$lgn res=XFAIL recheck=no gcopy=yes;;
*:*) col=$red res=FAIL recheck=yes gcopy=yes;;
esac
# Report outcome to console.
echo "${col}${res}${std}: $test_name"
# Register the test result, and other relevant metadata.
echo ":test-result: $res" > $trs_file
echo ":global-test-result: $res" >> $trs_file
echo ":recheck: $recheck" >> $trs_file
echo ":copy-in-global-log: $gcopy" >> $trs_file
# Local Variables:
# mode: shell-script
# sh-indentation: 2
# eval: (add-hook 'write-file-hooks 'time-stamp)
# time-stamp-start: "scriptversion="
# time-stamp-format: "%:y-%02m-%02d.%02H"
# time-stamp-time-zone: "UTC"
# time-stamp-end: "; # UTC"
# End:
html-xml-utils-7.6/hxname2id.c 0000645 0001750 0001750 00000014103 13205765541 013242 0000000 0000000 /*
* Move target anchors to the element they belong to, i.e., look for
* and and replace it with
*
* There is no attempt to check if the name is a valid SGML/XML token
* or whether it is unique. The replacement is syntactical only.
*
* Copyright © 2004 World Wide Web Consortium
* See http://www.w3.org/Consortium/Legal/copyright-software
*
* Author: Bert Bos
* Created: Dec 2004
* Version: $Id: hxname2id.c,v 1.7 2017/11/24 09:50:25 bbos Exp $
*
**/
#include "config.h"
#include
#include
#include
#include
#include
#include
#if STDC_HEADERS
# include
#else
# ifndef HAVE_STRCHR
# define strchr index
# define strrchr rindex
# endif
# ifndef HAVE_STRSTR
# include "strstr.e"
# endif
#endif
#ifdef HAVE_ERRNO_H
# include
#endif
#ifdef HAVE_SEARCH_H
# include
#else
# include "search-freebsd.h"
#endif
#include "export.h"
#include "types.e"
#include "heap.e"
#include "tree.e"
#include "html.e"
#include "scan.e"
#include "dict.e"
#include "openurl.e"
#include "errexit.e"
static Tree tree;
static bool xml = false; /* Use convention */
/* handle_error -- called when a parse error occurred */
static void handle_error(void *clientdata, const string s, int lineno)
{
fprintf(stderr, "%d: %s\n", lineno, s);
}
/* start -- called before the first event is reported */
static void* start(void)
{
tree = create();
return NULL;
}
/* end -- called after the last event is reported */
static void end(void *clientdata)
{
/* skip */
}
/* handle_comment -- called after a comment is parsed */
static void handle_comment(void *clientdata, string commenttext)
{
tree = append_comment(tree, commenttext);
}
/* handle_text -- called after a tex chunk is parsed */
static void handle_text(void *clientdata, string text)
{
tree = append_text(tree, text);
}
/* handle_declaration -- called after a declaration is parsed */
static void handle_decl(void *clientdata, string gi,
string fpi, string url)
{
tree = append_declaration(tree, gi, fpi, url);
}
/* handle_proc_instr -- called after a PI is parsed */
static void handle_pi(void *clientdata, string pi_text)
{
tree = append_procins(tree, pi_text);
}
/* handle_starttag -- called after a start tag is parsed */
static void handle_starttag(void *clientdata, string name, pairlist attribs)
{
tree = html_push(tree, name, attribs);
}
/* handle_emptytag -- called after an empty tag is parsed */
static void handle_emptytag(void *clientdata, string name, pairlist attribs)
{
handle_starttag(clientdata, name, attribs);
}
/* handle_endtag -- called after an endtag is parsed (name may be "") */
static void handle_endtag(void *clientdata, string name)
{
tree = html_pop(tree, name);
}
/* has_anchor_child -- check if the first thing in the element is an */
static bool has_anchor_child(Tree t, conststring *nameval)
{
Tree h;
/* Loop until either text or an element is found */
for (h = t->children; h != NULL; h = h->sister) {
switch (h->tp) {
case Comment: /* Skip these */
case Procins:
break;
case Text: /* Skip if whitespace, otherwise return false */
if (! only_space(h->text)) return false;
break;
case Element: /* true if or , else false */
return eq(h->name, "a") &&
((*nameval = get_attrib(h, "id")) ||
(*nameval = get_attrib(h, "name")));
default:
assert(! "Cannot happen");
}
}
return false;
}
/* process -- write the tree, add IDs at elements with an child */
static void process(Tree t, bool remove_anchor)
{
Tree h;
conststring nameval;
bool remove_next_anchor = false;
pairlist a;
for (h = t->children; h != NULL; h = h->sister) {
switch (h->tp) {
case Text:
printf("%s", h->text);
break;
case Comment:
printf("", h->text);
break;
case Declaration:
printf("name);
if (h->text) printf(" PUBLIC \"%s\"", h->text);
if (h->url) printf(" %s\"%s\"", h->text ? "" : "SYSTEM ", h->url);
printf(">");
break;
case Procins:
printf("%s>", h->text);
break;
case Element:
if (!get_attrib(h, "id") && has_anchor_child(h, &nameval)) {
/* Put the anchor on this element and remove it from the child */
set_attrib(h, "id", nameval);
remove_next_anchor = true;
}
printf("<%s", h->name);
for (a = h->attribs; a != NULL; a = a->next) {
/* Print attribs, except id/name that the parent wants us to remove */
if (!remove_anchor || (!eq(a->name, "id") && !eq(a->name, "name"))) {
printf(" %s", a->name);
if (a->value != NULL) printf("=\"%s\"", a->value);
}
}
if (is_empty(h->name)) {
assert(h->children == NULL);
printf(xml ? " />" : ">");
} else {
printf(">");
process(h, remove_next_anchor);
printf("%s>", h->name);
}
break;
case Root:
assert(! "Cannot happen");
break;
default:
assert(! "Cannot happen");
}
}
}
/* usage -- print usage message and exit */
static void usage(string name)
{
errexit("Version %s\nUsage: %s [-x] [html-file]\n", VERSION, name);
}
int main(int argc, char *argv[])
{
int i, status;
/* Bind the parser callback routines to our handlers */
set_error_handler(handle_error);
set_start_handler(start);
set_end_handler(end);
set_comment_handler(handle_comment);
set_text_handler(handle_text);
set_decl_handler(handle_decl);
set_pi_handler(handle_pi);
set_starttag_handler(handle_starttag);
set_emptytag_handler(handle_emptytag);
set_endtag_handler(handle_endtag);
yyin = stdin;
for (i = 1; i < argc; i++) {
if (eq(argv[i], "-x")) {
xml = true;
} else if (eq(argv[i], "-?")) {
usage(argv[0]);
} else if (eq(argv[i], "-")) {
/* yyin = stdin; */
} else {
yyin = fopenurl(argv[i], "r", &status);
if (yyin == NULL) {perror(argv[1]); exit(2);}
if (status != 200) errexit("%s : %s\n", argv[i], http_strerror(status));
}
}
if (yyparse() != 0) exit(3);
tree = get_root(tree);
process(tree, false);
tree_delete(tree); /* Just to test memory mgmt */
return 0;
}
html-xml-utils-7.6/hxcount.1 0000644 0001750 0001750 00000002525 12704011745 012766 0000000 0000000 .TH "HXCOUNT" "1" "10 Jul 2011" "7.x" "HTML-XML-utils"
.SH NAME
hxcount \- count elements and attributes in HTML or XML files
.SH SYNOPSIS
.B hxcount
.RI "[\| " file-or-URL " \|]"
.SH DESCRIPTION
.LP
The
.B hxcount
command counts the number of elements and attributes of each type that
appears in the input and prints a report on stdout.
.SH OPERANDS
The following operand is supported:
.TP 10
.I file-or-URL
The name or URL of an HTML or XML file. If absent, standard input is read
instead.
.SH "EXIT STATUS"
The following exit values are returned:
.TP 10
.B 0
Successful completion.
.TP
.B > 0
An error occurred in the parsing of the HTML or XML file.
.B hxcount
will try to recover from the error and produce output anyway.
.SH ENVIRONMENT
To use a proxy to retrieve remote files, set the environment variables
.B http_proxy
and
.BR ftp_proxy "."
E.g.,
.B http_proxy="http://localhost:8080/"
.SH BUGS
.LP
Don't trust the output if there were errors in the input.
.LP
Remote files (specified with a URL) are currently only supported for
HTTP. Password-protected files or files that depend on HTTP "cookies"
are not handled. (You can use tools such as
.BR curl (1)
or
.BR wget (1)
to retrieve such files.)
.SH "SEE ALSO"
.BR asc2xml (1),
.BR hxprune (1),
.BR hxnormalize (1),
.BR hxnum (1),
.BR hxtoc (1),
.BR hxunent (1),
.BR xml2asc (1),
.BR UTF-8 " (RFC 2279)"
html-xml-utils-7.6/hxwls.1 0000645 0001750 0001750 00000003725 12704011745 012447 0000000 0000000 .TH "HXWLS" "1" "10 Jul 2011" "7.x" "HTML-XML-utils"
.de d \" begin display
.sp
.in +4
.nf
.ft CR
.CDS
..
.de e \" end display
.CDE
.in -4
.fi
.ft R
.sp
..
.SH NAME
hxwls \- list links in an HTML file
.SH SYNOPSIS
.B hxwls
.RB "[\| " \-l " \|]"
.RB "[\| " \-t " \|]"
.RB "[\| " \-r " \|]"
.RB "[\| " \-h " \|]"
.RB "[\| " \-a " \|]"
.RB "[\| " \-b
.IR " base" " \|]"
.RI "[\| " file " \|]"
.SH DESCRIPTION
.LP
The
.B hxwls
command reads an HTML file (standard input by default) and prints out
all links it finds. The output is written to stdout.
.SH OPTIONS
The following options are supported:
.TP 10
.B \-l
Produce a long listing. Instead of just the URI,
.B hxwls
prints three columns: the element name, the value of the REL
attribute, and the target URI.
.TP
.B \-t
Produce a tuple listing.
.B hxwls
prints four columns: the URI of the document itself, the element name,
the value of the REL attribute, and the target URI.
.TP
.BI \-r
Print relative URLs as they are, without converting them to absolute
URLs.
.TP
.BI \-b " base"
Use
.I base
as the initial base URL. If there is a element in the document,
it will override the \-b option.
.TP
.B \-h
Output as HTML. The output will be listed in the form of elements.
.TP
.B \-a
Convert any IRIs (Internationalized Resource Identifiers) to
ASCII-only URIs. This causes any non-ASCII characters in the path of a
URI to be encoded as %-escaped octets and non-ASCII characters in the
domain name as punycode. (Punycode encoding is only available if
.B hxwls
is compiled with libidn support.)
.SH OPERANDS
The following operand is supported:
.TP 10
.I file
The name or the URL of an HTML file. If absent, standard input is read instead.
.SH "DIAGNOSTICS"
The following exit values are returned:
.TP 10
.B 0
Successful completion.
.TP
.B > 0
An error occurred in the parsing of the HTML file.
.B hxwls
will try to correct the error and produce output anyway.
.SH "SEE ALSO"
.BR asc2xml (1),
.BR hxnormalize (1),
.BR hxnum (1),
.BR xml2asc (1)
html-xml-utils-7.6/strstr.c 0000645 0001750 0001750 00000001173 13205765541 012727 0000000 0000000 /*
* Copyright © 1994-2000 World Wide Web Consortium
* See http://www.w3.org/Consortium/Legal/copyright-software
*
* Author: Bert Bos
* Created: 31 Mar 2000
* Version: $Id: strstr.c,v 1.4 2017/11/24 09:50:25 bbos Exp $
**/
#include "config.h"
#include "export.h"
#ifndef HAVE_STRSTR
EXPORT char *strstr(const char *haystack, const char *needle)
{
char *s, *t, *u;
if (! needle) return haystack; /* No needle */
for (s = haystack; *s; s++) {
for (t = needle, u = s; *t == *u && *t; t++, u++);
if (! *t) return s; /* Found it */
}
return NULL; /* Not found */
}
#endif /* HAVE_STRSTR */
html-xml-utils-7.6/hxunpipe.1 0000644 0001750 0001750 00000003214 12714373762 013145 0000000 0000000 .de d \" begin display
.sp
.in +4
.nf
..
.de e \" end display
.in -4
.fi
.sp
..
.TH "HXUNPIPE" "1" "10 Jul 2011" "7.x" "HTML-XML-utils"
.SH NAME
hxunpipe \- convert output of hxpipe back to XML format
.SH SYNOPSIS
.B hxunpipe
.RB "[\| " \-b " \|]"
.RI "[\| " file-or-URL " \|]"
.SH DESCRIPTION
.B hxunpipe
takes the output of
.BR hxpipe (1)
(or of
.BR onsgmls (1))
and turns it back into XML/SGML mark-up.
.SH OPTIONS
The following options are supported:
.TP 10
.B \-b
Normally,
.B hxunpipe
assumes the input was made by hxpipe, i.e., the
input may contain character entities but will never contain SGML/XML
delimiters (<>&"') that need to be escaped. When the input was made by
(o)nsgmls, however, the entities will have been expanded and the input
may contain SGML/XML delimiters. The option
.B \-b
causes
.B hxunpipe
to look for those delimiters and escape them: "<" as "<", ">" as
">", "&" as "&", """ as """ and "'" as "'".
.SH OPERANDS
The following operand is supported:
.TP 10
.I file-or-URL
The name or URL of an HTML file. If absent, standard input is read
instead.
.SH "EXIT STATUS"
The following exit values are returned:
.TP 10
.B 0
Successful completion.
.TP
.B > 0
An error occurred in the input.
.SH ENVIRONMENT
To use a proxy to retrieve remote files, set the environment variables
.B http_proxy
and
.BR ftp_proxy "."
E.g.,
.B http_proxy="http://localhost:8080/"
.SH BUGS
.LP
Not all syntax errors in the input are recognized.
.LP
.B hxunpipe
can currently only retrieve remote files over HTTP. It doesn't handle
password-protected files, nor files whose content depends on HTTP
"cookies."
.SH "SEE ALSO"
.BR hxpipe (1),
.BR onsgmls (1).
html-xml-utils-7.6/hxcite.c 0000645 0001750 0001750 00000025105 13205765541 012653 0000000 0000000 /*
* cite - adds hyperlinks to bibliographic references in HTML
*
* The programs looks for strings of the form [[name]] (i.e., a
* bibliographic label inside a double pair of square brackets), e.g.,
* [[Knuth84]] or [[LieBos97]]. The label will be looked up in a
* bibliography database and if it is found, the string will be
* replaced by a pattern which is typically of the form [name], but the pattern can be changed
* with a command line option.
*
* If the string is of the form {{name}}, the name will be looked up,
* but the string will be copied unchanged.
*
* If the label is not found, a warning is printed and the string is
* left unchanged.
*
* All labels that are found are also stored, one label per line, in a
* separate file with extension .aux. This file can be used by mkbib
* to create the bibliography by extracting the corresponding
* bibliographic entries from the database.
*
* The bibliography database must be a refer-style database. Though
* for the purposes of this program all lines that don't start with
* "%L" or %K are ignored. Lines with "%L" are assumed to contain a
* label. Lines with %K are assumed to contain whitespace separated
* keywords, which are effectively aliases for the label. Entries must
* have one %L line and one or zero %K lines.
*
* Options:
*
* -b base
* Give the value for %b in the pattern.
*
* -p pattern
* The replacement for the string [[label]]. The default is
*
* [%L]
*
* %L will be replaced by the label, %b by the value of the -b
* option and %m by the marker (-m option).
*
* -a auxfile
* The name of the file in which the list of labels will be stored.
* Default is the name of the file given as argument, minus its
* extension, plus ".aux". If no file is give (input comes from
* stdin), the default name is "aux.aux".
*
* -m marker
* By default, the program looks for "[[name]]", but it can be
* made to look for "[[Xname]]" where X is some string, usually a
* symbol such as '!' or ='. This allows references to be
* classified, e.g., "[[!name]]" for normative references and
* "[[name]]" for non-normative references.
*
* -c
* Assume that every pair "" delimit a comment and
* do not process any [[label]] that occurs between them. Any
* "{{label}}" is processed as normal. This does not actually
* parse the input as HTML or XML and thus the program will
* mistake occurrences of these two strings inside CDATA sections
* or attribute values for comment delimiters.
*
* Copyright © 1994-2012 World Wide Web Consortium
* See http://www.w3.org/Consortium/Legal/copyright-software
*
* Author: Bert Bos
* Created: 18 March 2000
* Version: $Id: hxcite.c,v 1.10 2017/11/24 09:50:25 bbos Exp $
**/
#include "config.h"
#ifdef HAVE_UNISTD_H
# include
#endif
#include
#include
#include
#include
#if STDC_HEADERS
# include
#else
# ifndef HAVE_STRCHR
# define strchr index
# define strrchr rindex
# endif
# ifndef HAVE_STRSTR
# include "strstr.e"
# endif
#endif
#ifdef HAVE_SEARCH_H
# include
#else
# include "hash.e"
#endif
#include
#include
#include "export.h"
#include "heap.e"
#include "types.e"
#include "errexit.e"
/* Warning: arbitrary limits! */
#define LINESIZE 32768
#define HASHSIZE 4096 /* Size of hash table */
#define WS " \t\r\n\f" /* Separates %K keywords */
static string base = ""; /* URL of bibilography */
static string mark = ""; /* Flag after "'[[" */
static size_t marklen = 0; /* Length of mark */
static string prog; /* = argv[0] */
static string pattern =
"[%L]";
static FILE *aux;
static bool skip_comments = false; /* Whether to skip [[ inside */
/* get_label -- get the label for the keyword, or NULL */
static string get_label(const string keyword)
{
ENTRY *result, e = {keyword, NULL};
result = hsearch(e, FIND);
return result ? (string) result->data : NULL;
}
/* valid_label -- check if the label is well-formed */
static bool valid_label(const string label)
{
int i;
for (i = 0; label[i]; i++)
if (! isalnum(label[i])
&& label[i] != '-'
&& label[i] != '_'
&& label[i] != '.') return false;
return true;
}
/* expand_ref -- print the reformatted reference */
static void expand_ref(const string label)
{
int i;
/* ToDo: somehow allow sequence numbers for references [1], [2], etc. */
for (i = 0; pattern[i]; i++) {
if (pattern[i] != '%') {
putchar(pattern[i]);
} else {
switch (pattern[++i]) {
case '%': putchar('%'); break; /* Literal '%' */
case 'b': printf("%s", base); break; /* Base URL */
case 'L': printf("%s", label); break; /* Label */
case 'm': printf("%s", mark); break; /* Mark (-m option) */
default: break; /* Error in pattern */
}
}
}
}
/* process_line -- look for citations in a line */
EXPORT void process_line(const string text, const string fname, int lineno,
bool *in_comment)
{
string h = text, p, q, label = NULL, key;
char c;
/* Loop over occurrences of "[[" + mark + label + "]]"
and "{{" + mark + label + "}}" */
while (*in_comment ? (p = strpbrk(h, "-{")) : (p = strpbrk(h, "[{<"))) {
while (h != p) putchar(*(h++)); /* Print text up to here */
if (strncmp(p, "-->", 3) == 0) { /* End of comment */
putchar(*(h++));
*in_comment = false;
continue;
}
if (strncmp(p, ""
data [^<\r\n]+
doctype ])*\]\]>
%s MARKUP VALUE DECL INIT CDATA
%%
\357\273\277 {BEGIN(INIT); /* Byte Order Mark is ignored */}
"<"{name} {BEGIN(MARKUP); yylval.s=strdup(yytext+1); return START;}
""({name})? {BEGIN(MARKUP); yylval.s=strdup(yytext+2); return END;}
{data} {yylval.s=strdup(yytext); return TEXT;}
{cdata} {yylval.s=strdup(yytext); lns(yytext); return TEXT;}
{nl} {yylval.s=strdup(yytext); lineno++; return TEXT;}
{comment} {yylval.s=strndup(yytext+4,yyleng-7); lns(yytext); return COMMENT;}
{doctype} {BEGIN(DECL); lns(yytext+9); return DOCTYPE;}
""[^>]*">" {yylval.s=strndup(yytext+2,yyleng-3); lns(yytext); return PROCINS;}
"<" {yylval.s=strdup("<"); return TEXT;}
{name} {yylval.s = strdup(yytext); return NAME;}
"=" {BEGIN(VALUE); return '=';}
[ \t\f]+ {; /* skip */}
{nl} {lineno++; /* skip */}
">" {BEGIN(INIT); return '>';}
"/>" {BEGIN(INIT); return EMPTYEND;}
"<" {BEGIN(INIT); yyless(0); return '>'; /* Implicit ">" */}
[ \t\f]+ {; /* skip */}
{nl} {lineno++; /* skip */}
{thing} {BEGIN(MARKUP); yylval.s=strdup(yytext); return NAME;}
\"[^"]*\" |
\'[^']*\' {BEGIN(MARKUP); yylval.s=esc(yytext); lns(yytext); return STRING;}
{name} {yylval.s = strdup(yytext); return NAME;}
[ \t\f]+ {; /* skip */}
{nl} {lineno++; /* skip */}
\"[^"]*\" |
\'[^']*\' {lns(yytext); yylval.s = esc(yytext); return STRING;}
">" {BEGIN(INIT); return '>';}
([^<]|\<[^/]|\<\/[^{a-z:._-])* {lns(yytext); yylval.s = strdup(yytext); return TEXT;}
""{name} {lns(yytext);
if (strcasecmp(yytext+2, cur_cdata_element) == 0) {
BEGIN(MARKUP);
yylval.s = strdup(yytext+2);
return END;
} else {
yylval.s = strdup(yytext);
return TEXT;
}
}
. {return *yytext; /* illegal char, in fact */}
<> {if (pop_file()) return ENDINCL; else yyterminate();}
%%
/* set_cdata_element -- set parsing rule for an element with CDATA content */
EXPORT void set_cdata_element(const conststring e)
{
dispose(cur_cdata_element);
cur_cdata_element = newstring(e);
BEGIN(CDATA);
}
/*
* Local variables:
* mode: indented-text
* End:
*/
html-xml-utils-7.6/configure.ac 0000645 0001750 0001750 00000003564 13237335152 013512 0000000 0000000 # -*- Autoconf -*-
# Process this file with autoconf to produce a configure script.
AC_PREREQ([2.69])
AC_INIT([html-xml-utils],[7.6])
dnl print all automake warnings with -Wall
dnl http://sources.redhat.com/automake/automake.html#Options
AM_INIT_AUTOMAKE([-Wall])
AC_CONFIG_SRCDIR([cexport.c])
AC_CONFIG_HEADERS([config.h])
AC_CONFIG_MACRO_DIRS([m4])
# Checks for programs.
AC_PROG_YACC
AC_PROG_CC
AC_PROG_MAKE_SET
AM_PROG_LEX
# AC_PROG_INSTALL
# AC_PROG_CPP
# AC_PROG_LN_S
# AC_PROG_AWK
# AC_PROG_MAN2HTML
# Checks for libraries.
AC_SEARCH_LIBS(socket, socket nsl)
AC_SEARCH_LIBS(gethostbyname, socket nsl)
AC_SEARCH_LIBS(iconv, iconv)
LIBIDN2_CHECK
LIBIDN_CHECK
LIBCURL_CHECK_CONFIG(yes, 7.9.7)
# Checks for header files.
AC_HEADER_STDC
AC_FUNC_ALLOCA
AC_CHECK_HEADERS([arpa/inet.h errno.h fcntl.h inttypes.h libintl.h locale.h malloc.h netdb.h netinet/in.h stddef.h stdlib.h string.h strings.h sys/param.h sys/socket.h sys/time.h unistd.h search.h wchar.h])
# Checks for typedefs, structures, and compiler characteristics.
AC_CHECK_HEADER_STDBOOL
AC_C_CONST
AC_C_INLINE
AC_TYPE_INT16_T
AC_TYPE_INT32_T
AC_TYPE_INT8_T
AC_TYPE_SIZE_T
AC_TYPE_SSIZE_T
AC_TYPE_UINT16_T
AC_TYPE_UINT32_T
AC_TYPE_UINT8_T
# Checks for library functions.
AC_FUNC_MALLOC
AC_FUNC_REALLOC
AC_FUNC_STRERROR_R
AC_FUNC_VPRINTF
AC_CHECK_FUNCS([atexit getcwd memchr gethostbyname memmove memset regcomp select setlocale socket strcasecmp strchr strcspn strdup strerror strncasecmp strndup strpbrk strrchr strspn strstr strtol strtoul fopencookie])
AC_REPLACE_FUNCS(strdup strerror strstr tsearch tfind twalk)
# Check for library variables
CHECK_GETOPT_OPTRESET
# Optimization flags for flex
AC_FLEX_OPTIMIZE
AC_SUBST(lex_opt_flags)
AC_CONFIG_FILES([Makefile])
AC_OUTPUT
if test "$libidn2" = "no" && test "$libidn" = "no"; then
AC_MSG_WARN([Neither libidn2 nor libidn found])
fi
html-xml-utils-7.6/unent.c 0000755 0001750 0001750 00000050766 13205772316 012533 0000000 0000000 /* ANSI-C code produced by gperf version 3.1 */
/* Command-line: gperf -a -c -C -o -t -p -k '1,2,$' -D -N lookup_entity unent.hash */
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
&& ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
&& (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
&& ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
&& ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
&& ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
&& ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
&& ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
&& ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
&& ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
&& ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
&& ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
&& ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
&& ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
&& ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
&& ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
&& ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
&& ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
&& ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
&& ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
&& ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
&& ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
&& ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126))
/* The character set is not based on ISO-646. */
#error "gperf generated tables don't work with this execution character set. Please report a bug to ."
#endif
#line 1 "unent.hash"
/* -*-indented-text-*- */
/*
* Copyright © 1998-2010 World Wide Web Consortium
* See http://www.w3.org/Consortium/Legal/copyright-software
*
* Author: Bert Bos
* Created: 2 Dec 1998
*
* Input file for gperf, to generate a perfect hash function
* of all HTML named character entities. This list translates
* names to Unicode numbers.
*/
#include
#if STDC_HEADERS
# include
#else
# ifndef HAVE_STRCHR
# define strchr index
# define strrchr rindex
# endif
#endif
#ifdef HAVE_UNISTD_H
# include
#endif
#include
#include
#include
#include "export.h"
EXPORT struct _Entity {char *name; unsigned int code;};
EXPORT const struct _Entity *lookup_entity (register const char *str,
register size_t len);
#line 37 "unent.hash"
struct _Entity;
#define TOTAL_KEYWORDS 253
#define MIN_WORD_LENGTH 2
#define MAX_WORD_LENGTH 8
#define MIN_HASH_VALUE 10
#define MAX_HASH_VALUE 533
/* maximum key range = 524, duplicates = 2 */
#ifdef __GNUC__
__inline
#else
#ifdef __cplusplus
inline
#endif
#endif
static unsigned int
hash (register const char *str, register size_t len)
{
static const unsigned short asso_values[] =
{
534, 534, 534, 534, 534, 534, 534, 534, 534, 534,
534, 534, 534, 534, 534, 534, 534, 534, 534, 534,
534, 534, 534, 534, 534, 534, 534, 534, 534, 534,
534, 534, 534, 534, 534, 534, 534, 534, 534, 534,
534, 534, 534, 534, 534, 534, 534, 534, 534, 40,
70, 20, 50, 534, 534, 534, 534, 534, 534, 534,
534, 534, 534, 534, 534, 193, 5, 215, 0, 219,
534, 185, 5, 215, 190, 45, 5, 0, 30, 199,
35, 534, 15, 15, 10, 110, 0, 534, 10, 40,
0, 534, 534, 534, 534, 534, 534, 10, 210, 5,
155, 0, 200, 10, 15, 100, 175, 155, 20, 150,
145, 45, 65, 200, 35, 105, 55, 75, 140, 115,
0, 250, 80, 534, 100, 534, 534, 534, 534, 534,
534, 534, 534, 534, 534, 534, 534, 534, 534, 534,
534, 534, 534, 534, 534, 534, 534, 534, 534, 534,
534, 534, 534, 534, 534, 534, 534, 534, 534, 534,
534, 534, 534, 534, 534, 534, 534, 534, 534, 534,
534, 534, 534, 534, 534, 534, 534, 534, 534, 534,
534, 534, 534, 534, 534, 534, 534, 534, 534, 534,
534, 534, 534, 534, 534, 534, 534, 534, 534, 534,
534, 534, 534, 534, 534, 534, 534, 534, 534, 534,
534, 534, 534, 534, 534, 534, 534, 534, 534, 534,
534, 534, 534, 534, 534, 534, 534, 534, 534, 534,
534, 534, 534, 534, 534, 534, 534, 534, 534, 534,
534, 534, 534, 534, 534, 534, 534, 534, 534, 534,
534, 534, 534, 534, 534, 534, 534, 534
};
return len + asso_values[(unsigned char)str[1]+2] + asso_values[(unsigned char)str[0]] + asso_values[(unsigned char)str[len - 1]];
}
const struct _Entity *
lookup_entity (register const char *str, register size_t len)
{
static const struct _Entity wordlist[] =
{
#line 114 "unent.hash"
{"ecirc", 234},
#line 113 "unent.hash"
{"eacute", 233},
#line 60 "unent.hash"
{"acute", 180},
#line 106 "unent.hash"
{"acirc", 226},
#line 105 "unent.hash"
{"aacute", 225},
#line 239 "unent.hash"
{"ge", 8805},
#line 142 "unent.hash"
{"Zeta", 918},
#line 140 "unent.hash"
{"Delta", 916},
#line 147 "unent.hash"
{"Lambda", 923},
#line 138 "unent.hash"
{"Beta", 914},
#line 163 "unent.hash"
{"gamma", 947},
#line 111 "unent.hash"
{"ccedil", 231},
#line 238 "unent.hash"
{"le", 8804},
#line 110 "unent.hash"
{"aelig", 230},
#line 253 "unent.hash"
{"lang", 9001},
#line 64 "unent.hash"
{"cedil", 184},
#line 171 "unent.hash"
{"lambda", 955},
#line 249 "unent.hash"
{"lceil", 8968},
#line 288 "unent.hash"
{"Dagger", 8225},
#line 223 "unent.hash"
{"radic", 8730},
#line 101 "unent.hash"
{"Yacute", 221},
#line 254 "unent.hash"
{"rang", 9002},
#line 124 "unent.hash"
{"ocirc", 244},
#line 123 "unent.hash"
{"oacute", 243},
#line 54 "unent.hash"
{"reg", 174},
#line 204 "unent.hash"
{"harr", 8596},
#line 250 "unent.hash"
{"rceil", 8969},
#line 200 "unent.hash"
{"larr", 8592},
#line 146 "unent.hash"
{"Kappa", 922},
#line 197 "unent.hash"
{"real", 8476},
#line 266 "unent.hash"
{"oelig", 339},
#line 42 "unent.hash"
{"cent", 162},
#line 51 "unent.hash"
{"laquo", 171},
#line 251 "unent.hash"
{"lfloor", 8970},
#line 229 "unent.hash"
{"cap", 8745},
#line 202 "unent.hash"
{"rarr", 8594},
#line 109 "unent.hash"
{"aring", 229},
#line 62 "unent.hash"
{"para", 182},
#line 131 "unent.hash"
{"ucirc", 251},
#line 130 "unent.hash"
{"uacute", 250},
#line 226 "unent.hash"
{"ang", 8736},
#line 67 "unent.hash"
{"raquo", 187},
#line 252 "unent.hash"
{"rfloor", 8971},
#line 155 "unent.hash"
{"Tau", 932},
#line 192 "unent.hash"
{"Prime", 8243},
#line 190 "unent.hash"
{"hellip", 8230},
#line 205 "unent.hash"
{"crarr", 8629},
#line 289 "unent.hash"
{"permil", 8240},
#line 166 "unent.hash"
{"zeta", 950},
#line 185 "unent.hash"
{"omega", 969},
#line 112 "unent.hash"
{"egrave", 232},
#line 118 "unent.hash"
{"icirc", 238},
#line 117 "unent.hash"
{"iacute", 237},
#line 273 "unent.hash"
{"emsp", 8195},
#line 198 "unent.hash"
{"trade", 8482},
#line 104 "unent.hash"
{"agrave", 224},
#line 201 "unent.hash"
{"uarr", 8593},
#line 99 "unent.hash"
{"Ucirc", 219},
#line 98 "unent.hash"
{"Uacute", 218},
#line 261 "unent.hash"
{"amp", 38},
#line 191 "unent.hash"
{"prime", 8242},
#line 212 "unent.hash"
{"part", 8706},
#line 187 "unent.hash"
{"upsih", 978},
#line 272 "unent.hash"
{"ensp", 8194},
#line 41 "unent.hash"
{"iexcl", 161},
#line 258 "unent.hash"
{"hearts", 9829},
#line 228 "unent.hash"
{"or", 8744},
#line 180 "unent.hash"
{"tau", 964},
#line 115 "unent.hash"
{"euml", 235},
#line 213 "unent.hash"
{"exist", 8707},
#line 128 "unent.hash"
{"oslash", 248},
#line 48 "unent.hash"
{"uml", 168},
#line 247 "unent.hash"
{"perp", 8869},
#line 281 "unent.hash"
{"lsquo", 8216},
#line 290 "unent.hash"
{"lsaquo", 8249},
#line 108 "unent.hash"
{"auml", 228},
#line 196 "unent.hash"
{"image", 8465},
#line 122 "unent.hash"
{"ograve", 242},
#line 167 "unent.hash"
{"eta", 951},
#line 262 "unent.hash"
{"apos", 39},
#line 235 "unent.hash"
{"asymp", 8776},
#line 107 "unent.hash"
{"atilde", 227},
#line 236 "unent.hash"
{"ne", 8800},
#line 120 "unent.hash"
{"eth", 240},
#line 282 "unent.hash"
{"rsquo", 8217},
#line 291 "unent.hash"
{"rsaquo", 8250},
#line 292 "unent.hash"
{"euro", 8364},
#line 215 "unent.hash"
{"nabla", 8711},
#line 267 "unent.hash"
{"Scaron", 352},
#line 270 "unent.hash"
{"circ", 710},
#line 161 "unent.hash"
{"alpha", 945},
#line 47 "unent.hash"
{"sect", 167},
#line 170 "unent.hash"
{"kappa", 954},
#line 89 "unent.hash"
{"Ntilde", 209},
#line 56 "unent.hash"
{"deg", 176},
#line 269 "unent.hash"
{"Yuml", 376},
#line 164 "unent.hash"
{"delta", 948},
#line 129 "unent.hash"
{"ugrave", 249},
#line 126 "unent.hash"
{"ouml", 246},
#line 154 "unent.hash"
{"Sigma", 931},
#line 165 "unent.hash"
{"epsilon", 949},
#line 230 "unent.hash"
{"cup", 8746},
#line 224 "unent.hash"
{"prop", 8733},
#line 245 "unent.hash"
{"oplus", 8853},
#line 125 "unent.hash"
{"otilde", 245},
#line 148 "unent.hash"
{"Mu", 924},
#line 55 "unent.hash"
{"macr", 175},
#line 193 "unent.hash"
{"oline", 8254},
#line 195 "unent.hash"
{"weierp", 8472},
#line 203 "unent.hash"
{"darr", 8595},
#line 144 "unent.hash"
{"Theta", 920},
#line 287 "unent.hash"
{"dagger", 8224},
#line 74 "unent.hash"
{"Acirc", 194},
#line 73 "unent.hash"
{"Aacute", 193},
#line 139 "unent.hash"
{"Gamma", 915},
#line 116 "unent.hash"
{"igrave", 236},
#line 264 "unent.hash"
{"gt", 62},
#line 92 "unent.hash"
{"Ocirc", 212},
#line 91 "unent.hash"
{"Oacute", 211},
#line 159 "unent.hash"
{"Psi", 936},
#line 132 "unent.hash"
{"uuml", 252},
#line 271 "unent.hash"
{"tilde", 732},
#line 97 "unent.hash"
{"Ugrave", 217},
#line 263 "unent.hash"
{"lt", 60},
#line 234 "unent.hash"
{"cong", 8773},
#line 103 "unent.hash"
{"szlig", 223},
#line 149 "unent.hash"
{"Nu", 925},
#line 231 "unent.hash"
{"int", 8747},
#line 243 "unent.hash"
{"sube", 8838},
#line 244 "unent.hash"
{"supe", 8839},
#line 86 "unent.hash"
{"Icirc", 206},
#line 85 "unent.hash"
{"Iacute", 205},
#line 88 "unent.hash"
{"ETH", 208},
#line 277 "unent.hash"
{"lrm", 8206},
#line 82 "unent.hash"
{"Ecirc", 202},
#line 81 "unent.hash"
{"Eacute", 201},
#line 227 "unent.hash"
{"and", 8743},
#line 162 "unent.hash"
{"beta", 946},
#line 102 "unent.hash"
{"THORN", 222},
#line 153 "unent.hash"
{"Rho", 929},
#line 119 "unent.hash"
{"iuml", 239},
#line 79 "unent.hash"
{"Ccedil", 199},
#line 175 "unent.hash"
{"omicron", 959},
#line 184 "unent.hash"
{"psi", 968},
#line 59 "unent.hash"
{"sup3", 179},
#line 168 "unent.hash"
{"theta", 952},
#line 100 "unent.hash"
{"Uuml", 220},
#line 237 "unent.hash"
{"equiv", 8801},
#line 256 "unent.hash"
{"spades", 9824},
#line 66 "unent.hash"
{"ordm", 186},
#line 268 "unent.hash"
{"scaron", 353},
#line 174 "unent.hash"
{"xi", 958},
#line 177 "unent.hash"
{"rho", 961},
#line 160 "unent.hash"
{"Omega", 937},
#line 257 "unent.hash"
{"clubs", 9827},
#line 133 "unent.hash"
{"yacute", 253},
#line 181 "unent.hash"
{"upsilon", 965},
#line 77 "unent.hash"
{"Aring", 197},
#line 65 "unent.hash"
{"sup1", 185},
#line 71 "unent.hash"
{"iquest", 191},
#line 150 "unent.hash"
{"Xi", 926},
#line 210 "unent.hash"
{"hArr", 8660},
#line 284 "unent.hash"
{"ldquo", 8220},
#line 44 "unent.hash"
{"curren", 164},
#line 206 "unent.hash"
{"lArr", 8656},
#line 179 "unent.hash"
{"sigma", 963},
#line 219 "unent.hash"
{"prod", 8719},
#line 194 "unent.hash"
{"frasl", 8260},
#line 222 "unent.hash"
{"lowast", 8727},
#line 183 "unent.hash"
{"chi", 967},
#line 285 "unent.hash"
{"rdquo", 8221},
#line 232 "unent.hash"
{"there4", 8756},
#line 241 "unent.hash"
{"sup", 8835},
#line 208 "unent.hash"
{"rArr", 8658},
#line 121 "unent.hash"
{"ntilde", 241},
#line 152 "unent.hash"
{"Pi", 928},
#line 58 "unent.hash"
{"sup2", 178},
#line 96 "unent.hash"
{"Oslash", 216},
#line 246 "unent.hash"
{"otimes", 8855},
#line 156 "unent.hash"
{"Upsilon", 933},
#line 72 "unent.hash"
{"Agrave", 192},
#line 214 "unent.hash"
{"empty", 8709},
#line 274 "unent.hash"
{"thinsp", 8201},
#line 255 "unent.hash"
{"loz", 9674},
#line 50 "unent.hash"
{"ordf", 170},
#line 90 "unent.hash"
{"Ograve", 210},
#line 46 "unent.hash"
{"brvbar", 166},
#line 283 "unent.hash"
{"sbquo", 8218},
#line 68 "unent.hash"
{"frac14", 188},
#line 70 "unent.hash"
{"frac34", 190},
#line 199 "unent.hash"
{"alefsym", 8501},
#line 157 "unent.hash"
{"Phi", 934},
#line 169 "unent.hash"
{"iota", 953},
#line 225 "unent.hash"
{"infin", 8734},
#line 127 "unent.hash"
{"divide", 247},
#line 95 "unent.hash"
{"times", 215},
#line 84 "unent.hash"
{"Igrave", 204},
#line 176 "unent.hash"
{"pi", 960},
#line 216 "unent.hash"
{"isin", 8712},
#line 80 "unent.hash"
{"Egrave", 200},
#line 207 "unent.hash"
{"uArr", 8657},
#line 69 "unent.hash"
{"frac12", 189},
#line 76 "unent.hash"
{"Auml", 196},
#line 278 "unent.hash"
{"rlm", 8207},
#line 173 "unent.hash"
{"nu", 957},
#line 94 "unent.hash"
{"Ouml", 214},
#line 75 "unent.hash"
{"Atilde", 195},
#line 172 "unent.hash"
{"mu", 956},
#line 182 "unent.hash"
{"phi", 966},
#line 93 "unent.hash"
{"Otilde", 213},
#line 189 "unent.hash"
{"bull", 8226},
#line 137 "unent.hash"
{"Alpha", 913},
#line 87 "unent.hash"
{"Iuml", 207},
#line 61 "unent.hash"
{"micro", 181},
#line 83 "unent.hash"
{"Euml", 203},
#line 57 "unent.hash"
{"plusmn", 177},
#line 188 "unent.hash"
{"piv", 982},
#line 248 "unent.hash"
{"sdot", 8901},
#line 279 "unent.hash"
{"ndash", 8211},
#line 63 "unent.hash"
{"middot", 183},
#line 40 "unent.hash"
{"nbsp", 160},
#line 280 "unent.hash"
{"mdash", 8212},
#line 143 "unent.hash"
{"Eta", 919},
#line 220 "unent.hash"
{"sum", 8721},
#line 260 "unent.hash"
{"quot", 34},
#line 134 "unent.hash"
{"thorn", 254},
#line 186 "unent.hash"
{"thetasym", 977},
#line 135 "unent.hash"
{"yuml", 255},
#line 78 "unent.hash"
{"AElig", 198},
#line 151 "unent.hash"
{"Omicron", 927},
#line 265 "unent.hash"
{"OElig", 338},
#line 218 "unent.hash"
{"ni", 8715},
#line 52 "unent.hash"
{"not", 172},
#line 141 "unent.hash"
{"Epsilon", 917},
#line 45 "unent.hash"
{"yen", 165},
#line 209 "unent.hash"
{"dArr", 8659},
#line 233 "unent.hash"
{"sim", 8764},
#line 221 "unent.hash"
{"minus", 8722},
#line 259 "unent.hash"
{"diams", 9830},
#line 43 "unent.hash"
{"pound", 163},
#line 211 "unent.hash"
{"forall", 8704},
#line 145 "unent.hash"
{"Iota", 921},
#line 240 "unent.hash"
{"sub", 8834},
#line 242 "unent.hash"
{"nsub", 8836},
#line 49 "unent.hash"
{"copy", 169},
#line 286 "unent.hash"
{"bdquo", 8222},
#line 178 "unent.hash"
{"sigmaf", 962},
#line 136 "unent.hash"
{"fnof", 402},
#line 158 "unent.hash"
{"Chi", 935},
#line 217 "unent.hash"
{"notin", 8713},
#line 276 "unent.hash"
{"zwj", 8205},
#line 275 "unent.hash"
{"zwnj", 8204},
#line 53 "unent.hash"
{"shy", 173}
};
static const short lookup[] =
{
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, 0, 1, -1, -1, -1, 2,
-1, -1, -1, -1, 3, 4, 5, -1,
6, 7, 8, -1, -1, 9, 10, 11,
12, -1, -1, 13, -1, -1, -1, 14,
15, 16, -1, -1, -1, 17, 18, -1,
-1, -1, 19, 20, -1, -1, 21, 22,
23, -1, 24, 25, 26, -1, -1, -1,
27, 28, -1, -1, -1, 29, 30, -1,
-1, -1, 31, 32, 33, -1, 34, 35,
36, -1, -1, -1, 37, 38, 39, -1,
40, -1, 41, 42, -1, 43, -1, 44,
45, -1, -1, -1, 46, 47, -1, -1,
48, 49, 50, -1, -1, -1, 51, 52,
-1, -1, 53, 54, 55, -1, -1, 56,
57, 58, -1, 59, -1, 60, -1, -1,
-1, 61, 62, -1, -1, -1, 63, 64,
65, 66, 67, 68, 69, 70, -1, 71,
72, 73, 74, -1, -1, 75, 76, 77,
-1, 78, 79, 80, 81, 82, 83, -1,
84, 85, -1, -1, 86, 87, 88, -1,
-1, 89, 90, -1, -1, -1, 91, 92,
93, -1, 94, 95, 96, 97, -1, -1,
98, 99, -1, 100, 101, 102, 103, 104,
105, -1, 106, 107, 108, -1, -1, 109,
110, 111, -1, 112, 113, 114, 115, 116,
-1, 117, 118, -1, -1, 119, 120, 121,
122, 123, -1, 124, 125, -1, 126, 127,
-485, 130, 131, 132, 133, 134, 135, -125,
-2, 136, 137, 138, -1, -1, 139, 140,
-1, 141, 142, 143, 144, 145, -1, -1,
-1, 146, 147, 148, -1, -1, 149, -1,
150, 151, 152, 153, 154, 155, 156, 157,
158, -1, 159, 160, -1, 161, 162, 163,
-1, -1, 164, 165, -1, -1, -1, 166,
167, 168, -1, 169, -1, 170, 171, -1,
172, 173, -1, 174, 175, -1, 176, 177,
178, 179, -1, 180, 181, 182, -1, 183,
184, 185, 186, -1, -1, -1, 187, -571,
190, 191, 192, 193, 194, -65, -2, -1,
195, 196, 197, -1, 198, 199, -1, -1,
-1, 200, -1, 201, 202, 203, -1, -1,
-1, 204, 205, 206, -1, -1, 207, 208,
-1, 209, -1, -1, -1, 210, -1, -1,
-1, 211, 212, 213, -1, -1, 214, -1,
-1, 215, -1, 216, 217, 218, 219, -1,
-1, 220, 221, -1, 222, 223, 224, -1,
-1, -1, -1, -1, 225, -1, -1, -1,
-1, -1, -1, -1, 226, 227, -1, -1,
-1, 228, -1, -1, 229, -1, -1, 230,
-1, -1, 231, 232, -1, -1, 233, -1,
234, 235, -1, -1, -1, 236, -1, 237,
-1, -1, -1, -1, 238, -1, -1, -1,
-1, 239, 240, -1, -1, 241, -1, -1,
-1, 242, 243, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, 244, 245, -1, -1, -1,
-1, -1, 246, -1, -1, 247, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, 248, -1, 249,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, 250, 251, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, 252
};
if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
{
register unsigned int key = hash (str, len);
if (key <= MAX_HASH_VALUE)
{
register int index = lookup[key];
if (index >= 0)
{
register const char *s = wordlist[index].name;
if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
return &wordlist[index];
}
else if (index < -TOTAL_KEYWORDS)
{
register int offset = - 1 - TOTAL_KEYWORDS - index;
register const struct _Entity *wordptr = &wordlist[TOTAL_KEYWORDS + lookup[offset]];
register const struct _Entity *wordendptr = wordptr + -lookup[offset + 1];
while (wordptr < wordendptr)
{
register const char *s = wordptr->name;
if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
return wordptr;
wordptr++;
}
}
}
}
return 0;
}
#line 293 "unent.hash"
html-xml-utils-7.6/tsearch.c 0000644 0001750 0001750 00000003140 10707353274 013012 0000000 0000000 /* $NetBSD: tsearch.c,v 1.3 1999/09/16 11:45:37 lukem Exp $ */
/* $FreeBSD: src/lib/libc/stdlib/tsearch.c,v 1.1.2.1 2000/08/17 07:38:39 jhb Exp $ */
/*
* Tree search generalized from Knuth (6.2.2) Algorithm T just like
* the AT&T man page says.
*
* The node_t structure is for internal use only, lint doesn't grok it.
*
* Written by reading the System V Interface Definition, not the code.
*
* Totally public domain.
*/
#include
#if defined(LIBC_SCCS) && !defined(lint)
__RCSID("$NetBSD: tsearch.c,v 1.3 1999/09/16 11:45:37 lukem Exp $");
#endif /* LIBC_SCCS and not lint */
#include
#define _SEARCH_PRIVATE
#include "config.h"
#ifdef HAVE_SEARCH_H
# include
#else
# include "search-freebsd.h"
#endif
#include
/* find or insert datum into search tree */
void *
tsearch(vkey, vrootp, compar)
const void *vkey; /* key to be located */
void **vrootp; /* address of tree root */
int (*compar) __P((const void *, const void *));
{
node_t *q;
node_t **rootp = (node_t **)vrootp;
if (rootp == NULL)
return NULL;
while (*rootp != NULL) { /* Knuth's T1: */
int r;
if ((r = (*compar)(vkey, (*rootp)->key)) == 0) /* T2: */
return *rootp; /* we found it! */
rootp = (r < 0) ?
&(*rootp)->llink : /* T3: follow left branch */
&(*rootp)->rlink; /* T4: follow right branch */
}
q = malloc(sizeof(node_t)); /* T5: key not found */
if (q != 0) { /* make new node */
*rootp = q; /* link new node to old */
/* LINTED const castaway ok */
q->key = (void *)vkey; /* initialize new node */
q->llink = q->rlink = NULL;
}
return q;
}
html-xml-utils-7.6/cexport.c 0000645 0001750 0001750 00000031707 12374701340 013051 0000000 0000000 /* cexport.c -- create header file of EXPORT'ed declarations from c files */
/*
* Author: Bert Bos
* Created: before 1995
*
* C files are scanned for the keyword EXPORT. Any declaration that
* follows it is copied to a file with the extension .e. It works for
* typedefs, #defines, variables and functions, but only if ANSI
* prototypes are used. Macros are exported with EXPORTDEF(.)
*
* Examples:
*
* EXPORT typedef int * IntPtr -- export IntPtr
*
* EXPORT void walkTree(Tree t) -- export walkTree()
*
* #define max(a,b) ((a)>(b)?(a):(b))
* EXPORTDEF(max(a,b)) -- export max(a,b)
*
* Files are first piped through the C preprocessor cpp.
*
* Command line options:
* -c : use instead of cpp
* -e : use instead of '.e'
* other options are passed to cpp
*
* The program is not very smart about C syntax, but it doesn't have
* to be, as long as the input is correct ANSI C. If it is not, no
* warnings will be given (except possibly for unmatched braces,
* quotes and paretheses), but the output will not be correct C,
* either.
*
* TO DO: an option to check if the new .e file is different any
* existing one and to keep the old one in that case. (Useful to save
* unnecessary recompilations.)
*/
#include "config.h"
#include
#if STDC_HEADERS
# include
#else
# ifndef HAVE_STRCHR
# define strchr index
# define strrchr rindex
# endif
#endif
#include
#include
#ifndef CPP
#define CPP "cc -E"
#endif
#define LINELEN BUFSIZ
static int err = 0; /* Global error counter */
static char *cppcmd = CPP;
static char *extension = ".e";
static FILE *in, *out;
static int eof;
static long lineno;
static char line[LINELEN];
static char *curname;
/***************************************************************************
* get_line -- read next line, return 0 if eof
***************************************************************************/
static int get_line()
{
static char buf[BUFSIZ];
char *s;
int i;
do {
if (eof)
return 0;
else if (! fgets(line, LINELEN, in)) {
eof = 1;
return 0;
} else if (line[0] != '#') {
lineno++;
return 1;
} else if (line[1] == ' ') {
i = 2; while (isspace(line[i])) i++;
if (! isdigit(line[i])) {
lineno++;
return 1;
} else {
lineno = strtol(line + i, &s, 0) - 1;
if (*(s+1) != '"') {
strcpy(buf, s + 1);
buf[strlen(buf)-1] = '\0';
} else {
strcpy(buf, s + 2);
for (i = 2; buf[i] != '"'; i++) ;
buf[i] = '\0';
}
if (buf[0]) curname = buf;
}
} else if (line[1] == 'l' && strncmp(line, "#line", 5) == 0) {
lineno = strtol(line + 5, &s, 0) - 1;
if (*(s+1) != '"') {
strcpy(buf, s + 1);
buf[strlen(buf)-1] = '\0';
} else {
strcpy(buf, s + 2);
for (i = 2; buf[i] != '"'; i++) ;
buf[i] = '\0';
}
if (buf[0]) curname = buf;
} else {
lineno++;
return 1;
}
} while (1);
}
/***************************************************************************
* exportdef -- copy a #define to output
***************************************************************************/
static void exportdef(i)
long i;
{
unsigned long len;
/*
* TO DO: encountering an end of file should produce a suitable error
* message: end of file in middle of macro definition.
*/
fputs("#define ", out); /* EXPORTDEF -> #define */
/* Unquote the following string */
for (i += 10; line[i] && line[i] != '"'; i++) ;
for (i++; line[i] && line[i] != '"'; i++) putc(line[i], out);
putc(' ', out);
fputs(line + i + 1, out); /* Write rest of line */
len = strlen(line); /* Continuation lines? */
while (len >= 2 && line[len-2] == '\\') {
if (! get_line()) break;
fputs(line, out);
len = strlen(line);
}
}
/***************************************************************************
* export -- copy next declaration to output
***************************************************************************/
static void export(i)
long *i;
{
int brace, paren, squote, dquote, comment, stop, is_typedef, start, is_enum,
is_extern, is_struct;
/*
* TO DO: End of file while any of the variables is still
* non-null is also an error.
*/
*i += 6; /* Skip "EXPORT" */
comment = 0;
squote = 0;
dquote = 0;
paren = 0;
brace = 0;
stop = 0;
is_typedef = 0;
is_enum = 0;
is_extern = 0;
is_struct = 0;
start = 1;
do {
switch (line[*i]) {
case '\\':
if (line[*i+1]) (*i)++; /* Skip next char */
break;
case '{':
if (!comment && !squote && !dquote && !paren) brace++;
break;
case '}':
if (!comment && !squote && !dquote && !paren) brace--;
if (brace < 0) {
fprintf(stderr, "%s:%ld: syntax error (too many '}'s)\n",
curname, lineno);
err++;
brace = 0;
}
break;
case '"':
if (!comment && !squote) dquote = !dquote;
break;
case '\'':
if (!comment && !dquote) squote = !squote;
break;
case '*':
if (!comment && !dquote && !squote && *i > 0 && line[*i-1] == '/')
comment = 1; /* Start of comment */
break;
case '/': /* Possible end of comment */
if (comment && *i > 0 && line[*i-1] == '*') comment = 0;
break;
case '(':
if (!comment && !dquote && !squote && !brace) paren++;
break;
case ')':
if (!comment && !dquote && !squote && !brace) {
paren--;
if (paren == 0 && !is_typedef) {
putc(')', out);
putc(';', out);
putc('\n', out);
stop = 1;
}
}
break;
case ';':
if (!comment && !dquote && !squote && !paren && !brace) {
putc(';', out);
putc('\n', out);
stop = 1;
}
break;
case '=':
if (!comment && !dquote && !squote && !brace && !paren) {
putc(';', out); /* End of variable decl. */
putc('\n', out);
stop = 1;
}
break;
case '\n':
if (dquote) {
fprintf(stderr,
"%s:%ld: syntax error (string didn't end)\n",
curname, lineno);
err++;
dquote = 0;
}
if (squote) {
fprintf(stderr,
"%s:%ld: syntax error (char const didn't end)\n",
curname, lineno);
err++;
squote = 0;
}
break;
case '\0':
if (! get_line()) stop = 1;
else *i = -1;
break;
case 't':
if (!comment && !squote && !dquote && paren == 0 && brace == 0
&& strncmp("typedef", &line[*i], 7) == 0)
is_typedef = 1;
break;
case 's':
if (!comment && !squote && !dquote && paren == 0 && brace == 0
&& strncmp("struct", &line[*i], 6) == 0)
is_struct = 1;
break;
case 'e':
if (!comment && !squote && !dquote && paren == 0 && brace == 0) {
if (strncmp("enum", &line[*i], 4) == 0) is_enum = 1;
else if (strncmp("extern", &line[*i], 6) == 0) is_extern = 1;
}
break;
}
if (! stop) {
if (*i >= 0) {
if (! start) {
putc(line[*i], out);
} else if (! isspace(line[*i])) {
if (! is_typedef && ! is_enum && ! is_extern && ! is_struct)
fputs("extern ", out);
putc(line[*i], out);
start = 0;
}
}
(*i)++;
}
} while (! stop);
}
/***************************************************************************
* process -- scan file and write exported declarations
***************************************************************************/
static void process(file, cpp)
char *file, *cpp;
{
char cmd[1024], *s, outname[1024];
int brace, paren, dquote, squote, comment;
long i;
strcpy(cmd, cppcmd); /* Build cpp command line */
strcat(cmd, cpp);
strcat(cmd, file ? file : "-");
eof = 0;
lineno = 0;
in = popen(cmd, "r"); /* Pipe file through cpp */
if (! in) { perror(cmd); err++; return; }
if (file) {
strcpy(outname, file); /* Construct output file */
s = strrchr(outname, '.'); /* Extension becomes .e */
if (! s) s = outname + strlen(outname);
strcpy(s, extension);
out = fopen(outname, "w");
if (! out) { perror(outname); err++; return; }
} else {
out = stdout; /* No file name, use stdout */
}
if (file) curname = file; else curname = "";
/*
* If the word EXPORT is found and it is not inside a comment, between
* quotes, parentheses or braces, the export() function is called to copy
* the declaration to the out file. When the export() function ends, `line'
* may have changed, but `i' points to the last copied character.
*
* If the word EXPORTDEF is found at the start of a line and it
* is not inside a comment or between quotes, exportdef is called.
*/
comment = 0;
dquote = 0;
squote = 0;
paren = 0;
brace = 0;
while (get_line()) {
for (i = 0; line[i]; i++) {
switch (line[i]) {
case '\\':
if (line[i+1]) i++; /* Skip next char */
break;
case '{':
if (!comment && !dquote && !squote) brace++;
break;
case '}':
if (!comment && !dquote && !squote) brace--;
if (brace < 0) {
fprintf(stderr, "%s:%ld: syntax error (too many '}'s)\n",
curname, lineno);
err++;
brace = 0;
}
break;
case '(':
if (!comment && !dquote && !squote) paren++;
break;
case ')':
if (!comment && !dquote && !squote) paren--;
if (paren < 0) {
fprintf(stderr, "%s:%ld: syntax error (too many ')'s)\n",
curname, lineno);
err++;
paren = 0;
}
break;
case '\'':
if (!comment && !dquote) squote = !squote;
break;
case '"':
if (!comment && !squote) dquote = !dquote;
break;
case '\n':
if (dquote) {
fprintf(stderr,
"%s:%ld: syntax error (string didn't end)\n",
curname, lineno);
err++;
dquote = 0;
}
if (squote) {
fprintf(stderr,
"%s:%ld: syntax error (char const didn't end)\n",
curname, lineno);
err++;
squote = 0;
}
break;
case '*':
if (!comment && !dquote && !squote && i > 0 && line[i-1] == '/')
comment = 1; /* Start of comment */
break;
case '/': /* Possible end of comment */
if (comment && i > 0 && line[i-1] == '*') comment = 0;
break;
case 'E':
if (comment || dquote || squote || paren != 0 || brace != 0)
;
else if (strncmp(&line[i], "EXPORT", 6) == 0
&& (i == 0 || !isalnum(line[i-1]))
&& !isalnum(line[i+6]))
export(&i);
else if (strncmp(&line[i], "EXPORTDEF ", 10) == 0
&& (i == 0 || !isalnum(line[i-1]))) {
exportdef(i);
i = (long) strlen(line) - 1;
}
break;
}
}
}
if (comment) {
fprintf(stderr, "%s:%ld: syntax error (comment didn't end)\n",
curname, lineno);
err++;
}
if (dquote) {
fprintf(stderr, "%s:%ld: syntax error (string didn't end)\n",
curname, lineno);
err++;
}
if (squote) {
fprintf(stderr, "%s:%ld: syntax error (char const didn't end)\n",
curname, lineno);
err++;
}
if (file) fclose(out);
fclose(in);
}
static void usage(s)
char *s;
{
fprintf(stderr,
"Usage: %s {-Idir|-Dsym} [-h] [-c cppcmd] [-e ext] {file}\n",
s);
err++;
}
int main(argc, argv)
int argc;
char *argv[];
{
char cpp[BUFSIZ]; /* Max. cmd. line length */
int nfiles, i;
strcpy(cpp, " -D__export ");
nfiles = 0;
for (i = 1; i < argc; i++) {
if (!strncmp(argv[i], "-c", 2)) { /* Replace cpp command */
if (argv[i][2])
cppcmd = argv[i] + 2;
else
cppcmd = argv[++i];
} else if (!strncmp(argv[i], "-e", 2)) { /* Extension instead of .e */
if (argv[i][2])
extension = argv[i] + 2;
else
extension = argv[++i];
} else if (!strncmp(argv[i], "-h", 2)) { /* -h: help */
usage(argv[0]);
} else if (argv[i][0] == '-' || argv[i][0] == '+') {
strcat(cpp, argv[i]); /* Pass options to cpp */
strcat(cpp, " ");
} else { /* Not option, must be file */
nfiles++;
process(argv[i], cpp);
}
}
if (nfiles == 0) /* no arguments, use stdin */
process(NULL, cpp);
return err;
}
html-xml-utils-7.6/hxcite.1 0000645 0001750 0001750 00000012115 13231344676 012567 0000000 0000000 .de d \" begin display
.sp
.in +4
.nf
..
.de e \" end display
.in -4
.fi
.sp
..
.TH "HXCITE" "1" "10 Jul 2011" "7.x" "HTML-XML-utils"
.SH NAME
hxcite \- replace bibliographic references by hyperlinks
.SH SYNOPSIS
.B hxcite
.RB "[\| " \-b
.IR base " \|]"
.RB "[\| " \-p
.IR pattern " \|]"
.RB "[\| " \-a
.IR auxfile " \|]"
.RB "[\| " \-m
.IR marker " \|]"
.RB "[\| " -c " \|]"
.IR bibfile " [\| " file " \|]"
.SH DESCRIPTION
.LP
The
.B hxcite
commands copies the
.I file
to standard output, looking for strings of the form [[\fIlabel\fP]].
The label may not include white space and the double pair of square
brackets must enclose the label without any spaces in between. If
.B hxcite
finds the label in the
.IR bibfile ","
the string is replaced by the
.IR pattern "."
The pattern can include certain variables. If the label is not found
in
.IR bibfile ","
it is left unchanged.
.PP
The default pattern replaces the string with a hyperlink, but if the
.B \-p
option is used, the replacement can be any pattern. The input doesn't
even have to be HTML.
.LP
If the label is enclosed in {{...}} instead of [[...]], it is copied
to the output unchanged and not
replaced by the pattern, but the label is still searched in the
.IR bibfile "."
.SH OPTIONS
The following options are supported:
.TP 10
.BI \-p " pattern"
Specifies the pattern by which the string [[\fIlabel\fP]] is replaced.
The pattern may include the variables
.B %b
(which is replaced by the value of the
.B \-b
option),
.B %m
(which is replaced by the value of the
.B \-m
option) and
.B %L
(which is replaced by the
.IR label ")."
The default pattern is
.d
[%L]
.e
.TP
.BI \-b " base"
Sets the value for the
.B %b
variable in the pattern. Typically this is set to a relative or
absolute URL. By default this value is an empty string.
.TP
.BI \-a " auxfile"
All labels that have been found and replaced are also written to a
file. This is so that
.BR hxmkbib (1)
can find them and create a bibliography. The default
.I auxfile
is constructed from the name of the
.I file
by removing the last extension (if any) and replacing it by ".aux".
If no
.I file
is given, the default name is "aux.aux".
.TP
.BI \-m " marker"
By default, the program looks for "[[name]]", but it can be
made to look for "[[#name]]" where # is some string, usually a
symbol such as '!' or '='. This allows references to be
classified, e.g., "[[!name]]" for normative references and
"[[name]]" for non-normative references.
.TP
.B \-c
Causes "[[name]]" to be ignored when it occurs inside XML comments
(""). This is useful for files where such labels occur in
comments, to avoid that they be expanded and possibly lead to invalid
output; useful also if
.B hxcite
is used for non-HTML files which may contain "" for the end of a comment.
.PP
There is currently no way to use numbers for references (e.g., "[1]",
"[2]") instead of the labels ("[Lie1996]", "[UTN22]").
.PP
.B hxcite
requires the
.B %L
(label) field to be present in every entry in
.IR bibfile ","
which is not the case for
.BR refer "(1)."
.B hxcite
does not implement
.BR refer "'s"
keyword search.
.SH "EXAMPLE"
.PP
The following looks for reference of the form "[[!label]]" in
"myfile.html", skipping references that occur inside HTML comments,
and looks up the labels in "biblio.ref". The output is written to
"new.html" and the list of recognized labels to "myfile.aux".
.d
hxcite -c -m '!' biblio.ref myfile.html > new.html
.e
.SH "SEE ALSO"
.BR asc2xml (1),
.BR refer (1),
.BR hxmkbib (1),
.BR hxnormalize (1),
.BR hxnum (1),
.BR hxprune (1),
.BR hxtoc (1),
.BR hxunent (1),
.BR xml2asc (1),
.BR UTF-8 " (RFC 2279)"
html-xml-utils-7.6/hxextract.1 0000644 0001750 0001750 00000003462 12704011745 013311 0000000 0000000 .de d \" begin display
.sp
.in +4
.nf
..
.de e \" end display
.in -4
.fi
.sp
..
.TH "HXEXTRACT" "1" "10 Jul 2011" "7.x" "HTML-XML-utils"
.SH NAME
hxextract \- extract selected elements from a HTML or XML file
.SH SYNOPSIS
.B hxextract
.RB "[\| " \-h
.RB "| " \-? " \|]"
.RB "[\| " \-x " \|]"
.RB "[\| " \-s
.IR text " \|]"
.RB "[\| " \-e
.IR text " \|]"
.RB "[\| " \-b
.IR base " \|]"
.I element-or-class
.RB "[\| " \-c
.IR "configfile" " | "
.IR file\-or\-URL " \|]"
.SH DESCRIPTION
.B hxextract
outputs all elements with a certain name and/or class.
.PP
Input must be well-formed, since no HTML heuristics are applied.
.SH OPTIONS
The following options are supported:
.TP 10
.B \-x
Use XML format conventions.
.TP 10
.BI \-s " text"
Insert
.I text
at the start of the output.
.TP 10
.BI \-e " text"
Insert
.I text
at the end of the output.
.TP 10
.BI \-b " base"
URL base
.TP 10
.BI \-c " configfile"
Read @chapter lines from
.I configfile
(lines must be of the form "@chapter filename") and extract elements from each of those files.
.TP 10
.BR \-h ", " \-?
Print command usage.
.SH OPERANDS
The following operands are supported:
.TP 10
.I element-or-class
The name of an element to extract (e.g., "H2"), or the name of a class
preceded by "." (e.g., ".example") or a combination of both (e.g.,
"H2.example").
.TP
.I file-or-URL
A file name or a URL. To read from standard input, use "-".
.SH ENVIRONMENT
To use a proxy to retrieve remote files, set the environment variables
.B http_proxy
and
.BR ftp_proxy "."
E.g.,
.B http_proxy="http://localhost:8080/"
.SH BUGS
.LP
Remote files (specified with a URL) are currently only supported for
HTTP. Password-protected files or files that depend on HTTP "cookies"
are not handled. (You can use tools such as
.BR curl (1)
or
.BR wget (1)
to retrieve such files.)
.SH "SEE ALSO"
.BR hxselect (1)
html-xml-utils-7.6/hxunent.1 0000644 0001750 0001750 00000003132 12704011745 012762 0000000 0000000 .TH "HXUNENT" "1" "10 Jul 2011" "7.x" "HTML-XML-utils"
.de d \" begin display
.sp
.in +4
.nf
.ft CR
.CDS
..
.de e \" end display
.CDE
.in -4
.fi
.ft R
.sp
..
.SH NAME
hxunent \- replace HTML predefined character entities by UTF-8
.SH SYNOPSIS
.B hxunent
.RB "[\| " \-b " \|]"
.RB "[\| " \-f " \|]"
.RI "[\| " file " \|]"
.SH DESCRIPTION
.LP
The
.B hxunent
command reads the
.I file
(or standard input) and copies it to standard output with &-entities
by their equivalent character (encoded as UTF-8). E.g., " is
replaced by " and < is replaced by <.
.SH OPTIONS
The following options are supported:
.TP 10
.B -b
The five builtin entities of XML (< > " ' &) are not
replaced but copied unchanged. This is necessary if the output has to
be valid XML or SGML.
.TP
.B -f
This option changes how unknown entities or lone ampersands are handled. Normally they are copied unchanged, but this option tries to "fix" them by replacing ampersands by &. Often such stray ampersands are the result of copy and paste of URLs into a document and then this option indeed fixes them and makes the document valid.
.SH "DIAGNOSTICS"
The program's exit value is 0 if all went well, otherwise:
.TP 10
.B 1
The input couldn't be read (file not found, file not readable...)
.TP
.B 2
Wrong command line arguments.
.SH "SEE ALSO"
.BR asc2xml (1),
.BR xml2asc (1),
.BR UTF-8 " (RFC 2279)"
.SH BUGS
.LP
The program assumes entities are as defined by HTML. It doesn't read a
document's DTD to find the actual definitions in use in a document.
With
.BR \-f ,
it will even remove all entities that are not HTML entities.
html-xml-utils-7.6/hxprintlinks.c 0000644 0001750 0001750 00000013550 13205764570 014125 0000000 0000000 /*
* Add a numbered list of links at the end of an HTML file
*
* Copyright © 2001-2015 World Wide Web Consortium
* See http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231
*
* Created 23 Jan 2015 (based on a Perl version from 1 Feb 2001)
* Bert Bos
*/
#include "config.h"
#include
#include
#include
#ifdef HAVE_STRING_H
# include
#elif HAVE_STRINGS_H
# include
#endif
#ifdef HAVE_UNISTD_H
# include
#endif
#include "types.e"
#include "dict.e"
#include "openurl.e"
#include "errexit.e"
#include "heap.e"
#include "html.e"
#include "scan.e"
#include "url.e"
static conststring attname[] = { /* Attributes that contain URLs: */
"src", "href", "data", "longdesc", "cite", "action", "profile",
"background", "usemap", "classid", "codebase"};
static pairlist list = NULL; /* Stored list of URLs */
static bool has_error = false; /* Parsing errors occurred */
static conststring base = NULL; /* Make URLs relative to this base */
/* pairlist_push -- insert a name/value pair at the start of a list */
static void pairlist_push(pairlist *p, const conststring name, const conststring val)
{
pairlist h;
new(h);
h->name = newstring(name);
h->value = newstring(val);
h->next = *p;
*p = h;
}
/* print_list_recursive -- print LI items for all entries in list */
static void print_list_recursive(const pairlist list)
{
conststring url;
/* ToDo: Escape double quotes */
if (list) {
print_list_recursive(list->next);
url = base ? URL_s_absolutize(base, list->name) : list->name;
printf("%s\n",
list->value, url, url);
}
}
/* print_list -- print an OL with the entries of list */
static void print_list(const pairlist list)
{
if (list) {
printf("\n");
print_list_recursive(list);
printf("
\n");
}
}
/* handle_error -- called when a parse error occurred */
void handle_error(void *clientdata, const string s, int lineno)
{
fprintf(stderr, "%d: %s\n", lineno, s);
has_error = true;
}
/* start -- called before the first event is reported */
void* start(void)
{
return NULL;
}
/* end -- called after the last event is reported */
void end(void *clientdata)
{
/* If we still have a list, print it here */
if (list) {
print_list(list);
pairlist_delete(list);
list = NULL;
}
}
/* handle_comment -- called after a comment is parsed */
void handle_comment(void *clientdata, string commenttext)
{
printf("", commenttext);
}
/* handle_text -- called after a text chunk is parsed */
void handle_text(void *clientdata, string text)
{
printf("%s", text);
}
/* handle_decl -- called after a declaration is parsed */
void handle_decl(void *clientdata, string gi, string fpi,
string url)
{
if (fpi && url)
printf("\n", gi, fpi, url);
else if (fpi)
printf("\n", gi, fpi);
else if (url)
printf("\n", gi, url);
else
printf("\n", gi);
}
/* handle_pi -- called after a PI is parsed */
void handle_pi(void *clientdata, string pi_text)
{
printf("%s>", pi_text);
}
/* print_attrs -- print attributes */
static void print_attrs(const pairlist attribs)
{
pairlist p;
/* ToDo: Distinguish SGML (a NULL value means that the name is the
value and the actual attribute name is implicit) and XML? */
for (p = attribs; p; p = p->next)
printf(" %s=\"%s\"", p->name, p->value ? p->value : p->name);
}
/* handle_starttag -- called after a start tag is parsed */
void handle_starttag(void *clientdata, string name, pairlist attribs)
{
int i;
conststring url;
/* Store any URLs from attributes */
for (i = 0; i < sizeof(attname)/sizeof(*attname); i++)
if ((url = pairlist_get(attribs, attname[i])))
pairlist_push(&list, url, attname[i]);
printf("<%s", name);
print_attrs(attribs);
printf(">");
}
/* handle_emptytag -- called after an empty element is parsed */
void handle_emptytag(void *clientdata, string name, pairlist attribs)
{
int i;
conststring url;
/* Store any URLs from attributes */
for (i = 0; i < sizeof(attname)/sizeof(*attname); i++)
if ((url = pairlist_get(attribs, attname[i])))
pairlist_push(&list, url, attname[i]);
printf("<%s", name);
print_attrs(attribs);
printf(" />");
}
/* handle_endtag -- called after an endtag is parsed (name may be "") */
void handle_endtag(void *clientdata, string name)
{
/* Just before